From 23238dbed506037679201f161888900ed25dc15d Mon Sep 17 00:00:00 2001 From: "John M. Schanck" Date: Thu, 10 Sep 2020 10:00:09 -0400 Subject: [PATCH] Initialize arrays in fft.c and fix a few compiler warnings --- crypto_kem/hqc-128/clean/fft.c | 278 ++++++++++-------- crypto_kem/hqc-192/clean/fft.c | 278 ++++++++++-------- crypto_kem/hqc-256/clean/fft.c | 278 ++++++++++-------- crypto_kem/hqc-rmrs-128/clean/fft.c | 138 +++++---- crypto_kem/hqc-rmrs-192/clean/fft.c | 138 +++++---- crypto_kem/hqc-rmrs-256/clean/fft.c | 138 +++++---- .../hqc-rmrs-128_avx2.yml | 3 - .../hqc-rmrs-128_clean.yml | 3 - .../hqc-rmrs-192_avx2.yml | 2 - .../hqc-rmrs-192_clean.yml | 2 - .../hqc-rmrs-256_avx2.yml | 1 - .../hqc-rmrs-256_clean.yml | 1 - 12 files changed, 678 insertions(+), 582 deletions(-) diff --git a/crypto_kem/hqc-128/clean/fft.c b/crypto_kem/hqc-128/clean/fft.c index 1a08fc8a..1a2763b6 100644 --- a/crypto_kem/hqc-128/clean/fft.c +++ b/crypto_kem/hqc-128/clean/fft.c @@ -19,8 +19,10 @@ static void compute_fft_betas(uint16_t *betas); static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size); static void radix_t(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_t m_f); +static void radix_t_big(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_t m_f); static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); @@ -30,7 +32,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[out] betas Array of size PARAM_M-1 */ static void compute_fft_betas(uint16_t *betas) { - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + size_t i; + for (i = 0 ; i < PARAM_M - 1 ; ++i) { betas[i] = 1 << (PARAM_M - 1 - i); } } @@ -48,10 +51,11 @@ static void compute_fft_betas(uint16_t *betas) { * @param[in] set_size Size of the array set */ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) { + size_t i, j; subset_sums[0] = 0; - for (size_t i = 0 ; i < set_size ; ++i) { - for (size_t j = 0 ; j < (1U << i) ; ++j) { + for (i = 0 ; i < set_size ; ++i) { + for (j = 0 ; j < (1U << i) ; ++j) { subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; } } @@ -90,7 +94,7 @@ static void radix_t(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_ f[13] = f[7] ^ f[9] ^ f[11] ^ f1[6]; f[14] = f[6] ^ f0[6] ^ f0[7] ^ f1[6]; f[15] = f[7] ^ f0[7] ^ f1[7]; - return; + break; case 3: f[0] = f0[0]; @@ -101,49 +105,53 @@ static void radix_t(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_ f[5] = f[3] ^ f1[2]; f[6] = f[4] ^ f0[3] ^ f1[2]; f[7] = f[3] ^ f0[3] ^ f1[3]; - return; + break; case 2: f[0] = f0[0]; f[1] = f1[0]; f[2] = f0[1] ^ f1[0]; f[3] = f[2] ^ f1[1]; - return; + break; case 1: f[0] = f0[0]; f[1] = f1[0]; - return; + break; default: - ; + radix_t_big(f, f0, f1, m_f); + break; + } +} - size_t n = 1 << (m_f - 2); +static void radix_t_big(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_t m_f) { + uint16_t Q0[1 << (PARAM_FFT_T - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT_T - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT_T - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT_T - 2)] = {0}; - uint16_t Q0[1 << (PARAM_FFT_T - 2)]; - uint16_t Q1[1 << (PARAM_FFT_T - 2)]; - uint16_t R0[1 << (PARAM_FFT_T - 2)]; - uint16_t R1[1 << (PARAM_FFT_T - 2)]; + uint16_t Q[1 << 2 * (PARAM_FFT_T - 2)] = {0}; + uint16_t R[1 << 2 * (PARAM_FFT_T - 2)] = {0}; - uint16_t Q[1 << 2 * (PARAM_FFT_T - 2)]; - uint16_t R[1 << 2 * (PARAM_FFT_T - 2)]; + size_t i, n; - memcpy(Q0, f0 + n, 2 * n); - memcpy(Q1, f1 + n, 2 * n); - memcpy(R0, f0, 2 * n); - memcpy(R1, f1, 2 * n); + n = 1 << (m_f - 2); + memcpy(Q0, f0 + n, 2 * n); + memcpy(Q1, f1 + n, 2 * n); + memcpy(R0, f0, 2 * n); + memcpy(R1, f1, 2 * n); - radix_t (Q, Q0, Q1, m_f - 1); - radix_t (R, R0, R1, m_f - 1); + radix_t (Q, Q0, Q1, m_f - 1); + radix_t (R, R0, R1, m_f - 1); - memcpy(f, R, 4 * n); - memcpy(f + 2 * n, R + n, 2 * n); - memcpy(f + 3 * n, Q + n, 2 * n); + memcpy(f, R, 4 * n); + memcpy(f + 2 * n, R + n, 2 * n); + memcpy(f + 3 * n, Q + n, 2 * n); - for (size_t i = 0 ; i < n ; ++i) { - f[2 * n + i] ^= Q[i]; - f[3 * n + i] ^= f[2 * n + i]; - } + for (i = 0 ; i < n ; ++i) { + f[2 * n + i] ^= Q[i]; + f[3 * n + i] ^= f[2 * n + i]; } } @@ -162,29 +170,31 @@ static void radix_t(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_ * @param[in] betas FFT constants */ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { - size_t k = 1 << (m - 1); - uint16_t gammas[PARAM_M - 2]; - uint16_t deltas[PARAM_M - 2]; - uint16_t gammas_sums[1 << (PARAM_M - 1)]; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; uint16_t u[1 << (PARAM_M - 2)] = {0}; uint16_t f0[1 << (PARAM_FFT_T - 2)] = {0}; uint16_t f1[1 << (PARAM_FFT_T - 2)] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t beta_m_pow; + + size_t i, j, k; // Step 1 if (m_f == 1) { f[0] = 0; - for (size_t i = 0 ; i < (1U << m) ; ++i) { + for (i = 0 ; i < (1U << m) ; ++i) { f[0] ^= w[i]; } f[1] = 0; - uint16_t betas_sums[1 << (PARAM_M - 1)]; betas_sums[0] = 0; - for (size_t j = 0 ; j < m ; ++j) { - for (size_t k = 0 ; k < (1U << j) ; ++k) { - size_t index = (1 << j) + k; - betas_sums[index] = betas_sums[k] ^ betas[j]; - f[1] ^= PQCLEAN_HQC128_CLEAN_gf_mul(betas_sums[index], w[index]); + for (j = 0 ; j < m ; ++j) { + for (k = 0 ; k < (1U << j) ; ++k) { + betas_sums[(1 << j) + k] = betas_sums[k] ^ betas[j]; + f[1] ^= PQCLEAN_HQC128_CLEAN_gf_mul(betas_sums[(1 << j) + k], w[(1 << j) + k]); } } @@ -192,7 +202,7 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m } // Compute gammas and deltas - for (uint8_t i = 0 ; i < m - 1 ; ++i) { + for (i = 0 ; i + 1 < m ; ++i) { gammas[i] = PQCLEAN_HQC128_CLEAN_gf_mul(betas[i], PQCLEAN_HQC128_CLEAN_gf_inverse(betas[m - 1])); deltas[i] = PQCLEAN_HQC128_CLEAN_gf_square(gammas[i]) ^ gammas[i]; } @@ -206,23 +216,22 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m * Transpose: * u[i] = w[i] + w[k+i] * v[i] = G[i].w[i] + (G[i]+1).w[k+i] = G[i].u[i] + w[k+i] */ + k = 1 << ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. if (f_coeffs <= 3) { // 3-coefficient polynomial f case // Step 5: Compute f0 from u and f1 from v f1[1] = 0; u[0] = w[0] ^ w[k]; f1[0] = w[k]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { u[i] = w[i] ^ w[k + i]; f1[0] ^= PQCLEAN_HQC128_CLEAN_gf_mul(gammas_sums[i], u[i]) ^ w[k + i]; } fft_t_rec(f0, u, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); } else { - uint16_t v[1 << (PARAM_M - 2)] = {0}; - u[0] = w[0] ^ w[k]; v[0] = w[k]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { u[i] = w[i] ^ w[k + i]; v[i] = PQCLEAN_HQC128_CLEAN_gf_mul(gammas_sums[i], u[i]) ^ w[k + i]; } @@ -237,8 +246,8 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m // Step 2: compute f from g if (betas[m - 1] != 1) { - uint16_t beta_m_pow = 1; - for (size_t i = 1 ; i < (1U << m_f) ; ++i) { + beta_m_pow = 1; + for (i = 1 ; i < (1U << m_f) ; ++i) { beta_m_pow = PQCLEAN_HQC128_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); f[i] = PQCLEAN_HQC128_CLEAN_gf_mul(beta_m_pow, f[i]); } @@ -261,14 +270,15 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m */ void PQCLEAN_HQC128_CLEAN_fft_t(uint16_t *f, const uint16_t *w, size_t f_coeffs) { // Transposed from Gao and Mateer algorithm - uint16_t betas[PARAM_M - 1]; - uint16_t betas_sums[1 << (PARAM_M - 1)]; - size_t k = 1 << (PARAM_M - 1); + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; uint16_t u[1 << (PARAM_M - 1)] = {0}; uint16_t v[1 << (PARAM_M - 1)] = {0}; - uint16_t deltas[PARAM_M - 1]; - uint16_t f0[1 << (PARAM_FFT_T - 1)]; - uint16_t f1[1 << (PARAM_FFT_T - 1)]; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t f0[1 << (PARAM_FFT_T - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT_T - 1)] = {0}; + + size_t i, k; compute_fft_betas(betas); compute_subset_sums(betas_sums, betas, PARAM_M - 1); @@ -281,15 +291,16 @@ void PQCLEAN_HQC128_CLEAN_fft_t(uint16_t *f, const uint16_t *w, size_t f_coeffs) * Transpose: * u[i] = w[i] + w[k+i] * v[i] = G[i].w[i] + (G[i]+1).w[k+i] = G[i].u[i] + w[k+i] */ + k = 1 << (PARAM_M - 1); u[0] = w[0] ^ w[k]; v[0] = w[k]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { u[i] = w[i] ^ w[k + i]; v[i] = PQCLEAN_HQC128_CLEAN_gf_mul(betas_sums[i], u[i]) ^ w[k + i]; } // Compute deltas - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + for (i = 0 ; i < PARAM_M - 1 ; ++i) { deltas[i] = PQCLEAN_HQC128_CLEAN_gf_square(betas[i]) ^ betas[i]; } @@ -337,7 +348,7 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[2] = f[3] ^ f1[1] ^ f0[3]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 3: f0[0] = f[0]; @@ -348,51 +359,56 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[3] = f[7]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 2: f0[0] = f[0]; f0[1] = f[2] ^ f[3]; f1[0] = f[1] ^ f0[1]; f1[1] = f[3]; - return; + break; case 1: f0[0] = f[0]; f1[0] = f[1]; - return; + break; default: - ; - size_t n = 1 << (m_f - 2); - - uint16_t Q[2 * (1 << (PARAM_FFT - 2))]; - uint16_t R[2 * (1 << (PARAM_FFT - 2))]; - - uint16_t Q0[1 << (PARAM_FFT - 2)]; - uint16_t Q1[1 << (PARAM_FFT - 2)]; - uint16_t R0[1 << (PARAM_FFT - 2)]; - uint16_t R1[1 << (PARAM_FFT - 2)]; - - memcpy(Q, f + 3 * n, 2 * n); - memcpy(Q + n, f + 3 * n, 2 * n); - memcpy(R, f, 4 * n); - - for (size_t i = 0 ; i < n ; ++i) { - Q[i] ^= f[2 * n + i]; - R[n + i] ^= Q[i]; - } - - radix(Q0, Q1, Q, m_f - 1); - radix(R0, R1, R, m_f - 1); - - memcpy(f0, R0, 2 * n); - memcpy(f0 + n, Q0, 2 * n); - memcpy(f1, R1, 2 * n); - memcpy(f1 + n, Q1, 2 * n); + radix_big(f0, f1, f, m_f); + break; } } +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1 << (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0 ; i < n ; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + /** @@ -408,25 +424,27 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { * @param[in] betas FFT constants */ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { - uint16_t f0[1 << (PARAM_FFT - 2)]; - uint16_t f1[1 << (PARAM_FFT - 2)]; - uint16_t gammas[PARAM_M - 2]; - uint16_t deltas[PARAM_M - 2]; - size_t k = 1 << (m - 1); - uint16_t gammas_sums[1 << (PARAM_M - 2)]; + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; uint16_t u[1 << (PARAM_M - 2)] = {0}; uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; // Step 1 if (m_f == 1) { - uint16_t tmp[PARAM_M - (PARAM_FFT - 1)]; - for (size_t i = 0 ; i < m ; ++i) { + for (i = 0 ; i < m ; ++i) { tmp[i] = PQCLEAN_HQC128_CLEAN_gf_mul(betas[i], f[1]); } w[0] = f[0]; - for (size_t j = 0 ; j < m ; ++j) { - for (size_t k = 0 ; k < (1U << j) ; ++k) { + for (j = 0 ; j < m ; ++j) { + for (k = 0 ; k < (1U << j) ; ++k) { w[(1 << j) + k] = w[k] ^ tmp[j]; } } @@ -436,8 +454,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 2: compute g if (betas[m - 1] != 1) { - uint16_t beta_m_pow = 1; - for (size_t i = 1 ; i < (1U << m_f) ; ++i) { + beta_m_pow = 1; + for (i = 1 ; i < (1U << m_f) ; ++i) { beta_m_pow = PQCLEAN_HQC128_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); f[i] = PQCLEAN_HQC128_CLEAN_gf_mul(beta_m_pow, f[i]); } @@ -447,7 +465,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 radix(f0, f1, f, m_f); // Step 4: compute gammas and deltas - for (uint8_t i = 0 ; i < m - 1 ; ++i) { + for (i = 0 ; i + 1 < m ; ++i) { gammas[i] = PQCLEAN_HQC128_CLEAN_gf_mul(betas[i], PQCLEAN_HQC128_CLEAN_gf_inverse(betas[m - 1])); deltas[i] = PQCLEAN_HQC128_CLEAN_gf_square(gammas[i]) ^ gammas[i]; } @@ -458,10 +476,11 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 5 fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + k = 1 << ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant w[0] = u[0]; w[k] = u[0] ^ f1[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC128_CLEAN_gf_mul(gammas_sums[i], f1[0]); w[k + i] = w[i] ^ f1[0]; } @@ -472,7 +491,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 memcpy(w + k, v, 2 * k); w[0] = u[0]; w[k] ^= u[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC128_CLEAN_gf_mul(gammas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -501,14 +520,15 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) */ void PQCLEAN_HQC128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { - uint16_t betas[PARAM_M - 1]; - uint16_t betas_sums[1 << (PARAM_M - 1)]; - uint16_t f0[1 << (PARAM_FFT - 1)]; - uint16_t f1[1 << (PARAM_FFT - 1)]; - uint16_t deltas[PARAM_M - 1]; - size_t k = 1 << (PARAM_M - 1); - uint16_t u[1 << (PARAM_M - 1)]; - uint16_t v[1 << (PARAM_M - 1)]; + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; // Follows Gao and Mateer algorithm compute_fft_betas(betas); @@ -524,7 +544,7 @@ void PQCLEAN_HQC128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { radix(f0, f1, f, PARAM_FFT); // Step 4: Compute deltas - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + for (i = 0 ; i < PARAM_M - 1 ; ++i) { deltas[i] = PQCLEAN_HQC128_CLEAN_gf_square(betas[i]) ^ betas[i]; } @@ -532,6 +552,7 @@ void PQCLEAN_HQC128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + k = 1 << (PARAM_M - 1); // Step 6, 7 and error polynomial computation memcpy(w + k, v, 2 * k); @@ -542,7 +563,7 @@ void PQCLEAN_HQC128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { w[k] ^= u[0]; // Find other roots - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC128_CLEAN_gf_mul(betas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -561,21 +582,20 @@ void PQCLEAN_HQC128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { * @param[in] vector Array of size VEC_N1_SIZE_BYTES */ void PQCLEAN_HQC128_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint64_t *vector) { - uint16_t r[1 << PARAM_M]; - uint16_t gammas[PARAM_M - 1]; - uint16_t gammas_sums[1 << (PARAM_M - 1)]; - size_t k = 1 << (PARAM_M - 1); + uint16_t r[1 << PARAM_M] = {0}; + uint16_t gammas[PARAM_M - 1] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; + size_t i, j, k; // Unpack the received word vector into array r - size_t i; for (i = 0 ; i < VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0) ; ++i) { - for (size_t j = 0 ; j < 64 ; ++j) { + for (j = 0 ; j < 64 ; ++j) { r[64 * i + j] = (uint8_t) ((vector[i] >> j) & 1); } } // Last byte - for (size_t j = 0 ; j < PARAM_N1 % 64 ; ++j) { + for (j = 0 ; j < PARAM_N1 % 64 ; ++j) { r[64 * i + j] = (uint8_t) ((vector[i] >> j) & 1); } @@ -586,9 +606,10 @@ void PQCLEAN_HQC128_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint6 compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); // Twist and permute r adequately to obtain w + k = 1 << (PARAM_M - 1); w[0] = 0; w[k] = -r[0] & 1; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = -r[PQCLEAN_HQC128_CLEAN_gf_log(gammas_sums[i])] & gammas_sums[i]; w[k + i] = -r[PQCLEAN_HQC128_CLEAN_gf_log(gammas_sums[i] ^ 1)] & (gammas_sums[i] ^ 1); } @@ -603,25 +624,28 @@ void PQCLEAN_HQC128_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint6 * @param[in] w Array of size 2^PARAM_M */ void PQCLEAN_HQC128_CLEAN_fft_retrieve_bch_error_poly(uint64_t *error, const uint16_t *w) { - uint16_t gammas[PARAM_M - 1]; - uint16_t gammas_sums[1 << (PARAM_M - 1)]; - size_t k = 1 << (PARAM_M - 1); - size_t index = PARAM_GF_MUL_ORDER; + uint16_t gammas[PARAM_M - 1] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; + uint64_t bit; + size_t i, k, index; compute_fft_betas(gammas); compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); - error[0] ^= ((uint64_t) 1) ^ ((uint16_t) - w[0] >> 15); - uint64_t bit = ((uint64_t) 1) ^ ((uint16_t) - w[k] >> 15); + error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); + + k = 1 << (PARAM_M - 1); + index = PARAM_GF_MUL_ORDER; + bit = 1 ^ ((uint16_t) - w[k] >> 15); error[index / 8] ^= bit << (index % 64); - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC128_CLEAN_gf_log(gammas_sums[i]); - bit = ((uint64_t) 1) ^ ((uint16_t) - w[i] >> 15); + bit = 1 ^ ((uint16_t) - w[i] >> 15); error[index / 64] ^= bit << (index % 64); index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC128_CLEAN_gf_log(gammas_sums[i] ^ 1); - bit = ((uint64_t) 1) ^ ((uint16_t) - w[k + i] >> 15); + bit = 1 ^ ((uint16_t) - w[k + i] >> 15); error[index / 64] ^= bit << (index % 64); } } diff --git a/crypto_kem/hqc-192/clean/fft.c b/crypto_kem/hqc-192/clean/fft.c index 44a7ab2e..8fbb04f1 100644 --- a/crypto_kem/hqc-192/clean/fft.c +++ b/crypto_kem/hqc-192/clean/fft.c @@ -19,8 +19,10 @@ static void compute_fft_betas(uint16_t *betas); static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size); static void radix_t(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_t m_f); +static void radix_t_big(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_t m_f); static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); @@ -30,7 +32,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[out] betas Array of size PARAM_M-1 */ static void compute_fft_betas(uint16_t *betas) { - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + size_t i; + for (i = 0 ; i < PARAM_M - 1 ; ++i) { betas[i] = 1 << (PARAM_M - 1 - i); } } @@ -48,10 +51,11 @@ static void compute_fft_betas(uint16_t *betas) { * @param[in] set_size Size of the array set */ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) { + size_t i, j; subset_sums[0] = 0; - for (size_t i = 0 ; i < set_size ; ++i) { - for (size_t j = 0 ; j < (1U << i) ; ++j) { + for (i = 0 ; i < set_size ; ++i) { + for (j = 0 ; j < (1U << i) ; ++j) { subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; } } @@ -90,7 +94,7 @@ static void radix_t(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_ f[13] = f[7] ^ f[9] ^ f[11] ^ f1[6]; f[14] = f[6] ^ f0[6] ^ f0[7] ^ f1[6]; f[15] = f[7] ^ f0[7] ^ f1[7]; - return; + break; case 3: f[0] = f0[0]; @@ -101,49 +105,53 @@ static void radix_t(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_ f[5] = f[3] ^ f1[2]; f[6] = f[4] ^ f0[3] ^ f1[2]; f[7] = f[3] ^ f0[3] ^ f1[3]; - return; + break; case 2: f[0] = f0[0]; f[1] = f1[0]; f[2] = f0[1] ^ f1[0]; f[3] = f[2] ^ f1[1]; - return; + break; case 1: f[0] = f0[0]; f[1] = f1[0]; - return; + break; default: - ; + radix_t_big(f, f0, f1, m_f); + break; + } +} - size_t n = 1 << (m_f - 2); +static void radix_t_big(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_t m_f) { + uint16_t Q0[1 << (PARAM_FFT_T - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT_T - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT_T - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT_T - 2)] = {0}; - uint16_t Q0[1 << (PARAM_FFT_T - 2)]; - uint16_t Q1[1 << (PARAM_FFT_T - 2)]; - uint16_t R0[1 << (PARAM_FFT_T - 2)]; - uint16_t R1[1 << (PARAM_FFT_T - 2)]; + uint16_t Q[1 << 2 * (PARAM_FFT_T - 2)] = {0}; + uint16_t R[1 << 2 * (PARAM_FFT_T - 2)] = {0}; - uint16_t Q[1 << 2 * (PARAM_FFT_T - 2)]; - uint16_t R[1 << 2 * (PARAM_FFT_T - 2)]; + size_t i, n; - memcpy(Q0, f0 + n, 2 * n); - memcpy(Q1, f1 + n, 2 * n); - memcpy(R0, f0, 2 * n); - memcpy(R1, f1, 2 * n); + n = 1 << (m_f - 2); + memcpy(Q0, f0 + n, 2 * n); + memcpy(Q1, f1 + n, 2 * n); + memcpy(R0, f0, 2 * n); + memcpy(R1, f1, 2 * n); - radix_t (Q, Q0, Q1, m_f - 1); - radix_t (R, R0, R1, m_f - 1); + radix_t (Q, Q0, Q1, m_f - 1); + radix_t (R, R0, R1, m_f - 1); - memcpy(f, R, 4 * n); - memcpy(f + 2 * n, R + n, 2 * n); - memcpy(f + 3 * n, Q + n, 2 * n); + memcpy(f, R, 4 * n); + memcpy(f + 2 * n, R + n, 2 * n); + memcpy(f + 3 * n, Q + n, 2 * n); - for (size_t i = 0 ; i < n ; ++i) { - f[2 * n + i] ^= Q[i]; - f[3 * n + i] ^= f[2 * n + i]; - } + for (i = 0 ; i < n ; ++i) { + f[2 * n + i] ^= Q[i]; + f[3 * n + i] ^= f[2 * n + i]; } } @@ -162,29 +170,31 @@ static void radix_t(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_ * @param[in] betas FFT constants */ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { - size_t k = 1 << (m - 1); - uint16_t gammas[PARAM_M - 2]; - uint16_t deltas[PARAM_M - 2]; - uint16_t gammas_sums[1 << (PARAM_M - 1)]; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; uint16_t u[1 << (PARAM_M - 2)] = {0}; uint16_t f0[1 << (PARAM_FFT_T - 2)] = {0}; uint16_t f1[1 << (PARAM_FFT_T - 2)] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t beta_m_pow; + + size_t i, j, k; // Step 1 if (m_f == 1) { f[0] = 0; - for (size_t i = 0 ; i < (1U << m) ; ++i) { + for (i = 0 ; i < (1U << m) ; ++i) { f[0] ^= w[i]; } f[1] = 0; - uint16_t betas_sums[1 << (PARAM_M - 1)]; betas_sums[0] = 0; - for (size_t j = 0 ; j < m ; ++j) { - for (size_t k = 0 ; k < (1U << j) ; ++k) { - size_t index = (1 << j) + k; - betas_sums[index] = betas_sums[k] ^ betas[j]; - f[1] ^= PQCLEAN_HQC192_CLEAN_gf_mul(betas_sums[index], w[index]); + for (j = 0 ; j < m ; ++j) { + for (k = 0 ; k < (1U << j) ; ++k) { + betas_sums[(1 << j) + k] = betas_sums[k] ^ betas[j]; + f[1] ^= PQCLEAN_HQC192_CLEAN_gf_mul(betas_sums[(1 << j) + k], w[(1 << j) + k]); } } @@ -192,7 +202,7 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m } // Compute gammas and deltas - for (uint8_t i = 0 ; i < m - 1 ; ++i) { + for (i = 0 ; i + 1 < m ; ++i) { gammas[i] = PQCLEAN_HQC192_CLEAN_gf_mul(betas[i], PQCLEAN_HQC192_CLEAN_gf_inverse(betas[m - 1])); deltas[i] = PQCLEAN_HQC192_CLEAN_gf_square(gammas[i]) ^ gammas[i]; } @@ -206,23 +216,22 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m * Transpose: * u[i] = w[i] + w[k+i] * v[i] = G[i].w[i] + (G[i]+1).w[k+i] = G[i].u[i] + w[k+i] */ + k = 1 << ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. if (f_coeffs <= 3) { // 3-coefficient polynomial f case // Step 5: Compute f0 from u and f1 from v f1[1] = 0; u[0] = w[0] ^ w[k]; f1[0] = w[k]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { u[i] = w[i] ^ w[k + i]; f1[0] ^= PQCLEAN_HQC192_CLEAN_gf_mul(gammas_sums[i], u[i]) ^ w[k + i]; } fft_t_rec(f0, u, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); } else { - uint16_t v[1 << (PARAM_M - 2)] = {0}; - u[0] = w[0] ^ w[k]; v[0] = w[k]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { u[i] = w[i] ^ w[k + i]; v[i] = PQCLEAN_HQC192_CLEAN_gf_mul(gammas_sums[i], u[i]) ^ w[k + i]; } @@ -237,8 +246,8 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m // Step 2: compute f from g if (betas[m - 1] != 1) { - uint16_t beta_m_pow = 1; - for (size_t i = 1 ; i < (1U << m_f) ; ++i) { + beta_m_pow = 1; + for (i = 1 ; i < (1U << m_f) ; ++i) { beta_m_pow = PQCLEAN_HQC192_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); f[i] = PQCLEAN_HQC192_CLEAN_gf_mul(beta_m_pow, f[i]); } @@ -261,14 +270,15 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m */ void PQCLEAN_HQC192_CLEAN_fft_t(uint16_t *f, const uint16_t *w, size_t f_coeffs) { // Transposed from Gao and Mateer algorithm - uint16_t betas[PARAM_M - 1]; - uint16_t betas_sums[1 << (PARAM_M - 1)]; - size_t k = 1 << (PARAM_M - 1); + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; uint16_t u[1 << (PARAM_M - 1)] = {0}; uint16_t v[1 << (PARAM_M - 1)] = {0}; - uint16_t deltas[PARAM_M - 1]; - uint16_t f0[1 << (PARAM_FFT_T - 1)]; - uint16_t f1[1 << (PARAM_FFT_T - 1)]; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t f0[1 << (PARAM_FFT_T - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT_T - 1)] = {0}; + + size_t i, k; compute_fft_betas(betas); compute_subset_sums(betas_sums, betas, PARAM_M - 1); @@ -281,15 +291,16 @@ void PQCLEAN_HQC192_CLEAN_fft_t(uint16_t *f, const uint16_t *w, size_t f_coeffs) * Transpose: * u[i] = w[i] + w[k+i] * v[i] = G[i].w[i] + (G[i]+1).w[k+i] = G[i].u[i] + w[k+i] */ + k = 1 << (PARAM_M - 1); u[0] = w[0] ^ w[k]; v[0] = w[k]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { u[i] = w[i] ^ w[k + i]; v[i] = PQCLEAN_HQC192_CLEAN_gf_mul(betas_sums[i], u[i]) ^ w[k + i]; } // Compute deltas - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + for (i = 0 ; i < PARAM_M - 1 ; ++i) { deltas[i] = PQCLEAN_HQC192_CLEAN_gf_square(betas[i]) ^ betas[i]; } @@ -337,7 +348,7 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[2] = f[3] ^ f1[1] ^ f0[3]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 3: f0[0] = f[0]; @@ -348,51 +359,56 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[3] = f[7]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 2: f0[0] = f[0]; f0[1] = f[2] ^ f[3]; f1[0] = f[1] ^ f0[1]; f1[1] = f[3]; - return; + break; case 1: f0[0] = f[0]; f1[0] = f[1]; - return; + break; default: - ; - size_t n = 1 << (m_f - 2); - - uint16_t Q[2 * (1 << (PARAM_FFT - 2))]; - uint16_t R[2 * (1 << (PARAM_FFT - 2))]; - - uint16_t Q0[1 << (PARAM_FFT - 2)]; - uint16_t Q1[1 << (PARAM_FFT - 2)]; - uint16_t R0[1 << (PARAM_FFT - 2)]; - uint16_t R1[1 << (PARAM_FFT - 2)]; - - memcpy(Q, f + 3 * n, 2 * n); - memcpy(Q + n, f + 3 * n, 2 * n); - memcpy(R, f, 4 * n); - - for (size_t i = 0 ; i < n ; ++i) { - Q[i] ^= f[2 * n + i]; - R[n + i] ^= Q[i]; - } - - radix(Q0, Q1, Q, m_f - 1); - radix(R0, R1, R, m_f - 1); - - memcpy(f0, R0, 2 * n); - memcpy(f0 + n, Q0, 2 * n); - memcpy(f1, R1, 2 * n); - memcpy(f1 + n, Q1, 2 * n); + radix_big(f0, f1, f, m_f); + break; } } +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1 << (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0 ; i < n ; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + /** @@ -408,25 +424,27 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { * @param[in] betas FFT constants */ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { - uint16_t f0[1 << (PARAM_FFT - 2)]; - uint16_t f1[1 << (PARAM_FFT - 2)]; - uint16_t gammas[PARAM_M - 2]; - uint16_t deltas[PARAM_M - 2]; - size_t k = 1 << (m - 1); - uint16_t gammas_sums[1 << (PARAM_M - 2)]; + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; uint16_t u[1 << (PARAM_M - 2)] = {0}; uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; // Step 1 if (m_f == 1) { - uint16_t tmp[PARAM_M - (PARAM_FFT - 1)]; - for (size_t i = 0 ; i < m ; ++i) { + for (i = 0 ; i < m ; ++i) { tmp[i] = PQCLEAN_HQC192_CLEAN_gf_mul(betas[i], f[1]); } w[0] = f[0]; - for (size_t j = 0 ; j < m ; ++j) { - for (size_t k = 0 ; k < (1U << j) ; ++k) { + for (j = 0 ; j < m ; ++j) { + for (k = 0 ; k < (1U << j) ; ++k) { w[(1 << j) + k] = w[k] ^ tmp[j]; } } @@ -436,8 +454,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 2: compute g if (betas[m - 1] != 1) { - uint16_t beta_m_pow = 1; - for (size_t i = 1 ; i < (1U << m_f) ; ++i) { + beta_m_pow = 1; + for (i = 1 ; i < (1U << m_f) ; ++i) { beta_m_pow = PQCLEAN_HQC192_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); f[i] = PQCLEAN_HQC192_CLEAN_gf_mul(beta_m_pow, f[i]); } @@ -447,7 +465,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 radix(f0, f1, f, m_f); // Step 4: compute gammas and deltas - for (uint8_t i = 0 ; i < m - 1 ; ++i) { + for (i = 0 ; i + 1 < m ; ++i) { gammas[i] = PQCLEAN_HQC192_CLEAN_gf_mul(betas[i], PQCLEAN_HQC192_CLEAN_gf_inverse(betas[m - 1])); deltas[i] = PQCLEAN_HQC192_CLEAN_gf_square(gammas[i]) ^ gammas[i]; } @@ -458,10 +476,11 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 5 fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + k = 1 << ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant w[0] = u[0]; w[k] = u[0] ^ f1[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC192_CLEAN_gf_mul(gammas_sums[i], f1[0]); w[k + i] = w[i] ^ f1[0]; } @@ -472,7 +491,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 memcpy(w + k, v, 2 * k); w[0] = u[0]; w[k] ^= u[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC192_CLEAN_gf_mul(gammas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -501,14 +520,15 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) */ void PQCLEAN_HQC192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { - uint16_t betas[PARAM_M - 1]; - uint16_t betas_sums[1 << (PARAM_M - 1)]; - uint16_t f0[1 << (PARAM_FFT - 1)]; - uint16_t f1[1 << (PARAM_FFT - 1)]; - uint16_t deltas[PARAM_M - 1]; - size_t k = 1 << (PARAM_M - 1); - uint16_t u[1 << (PARAM_M - 1)]; - uint16_t v[1 << (PARAM_M - 1)]; + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; // Follows Gao and Mateer algorithm compute_fft_betas(betas); @@ -524,7 +544,7 @@ void PQCLEAN_HQC192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { radix(f0, f1, f, PARAM_FFT); // Step 4: Compute deltas - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + for (i = 0 ; i < PARAM_M - 1 ; ++i) { deltas[i] = PQCLEAN_HQC192_CLEAN_gf_square(betas[i]) ^ betas[i]; } @@ -532,6 +552,7 @@ void PQCLEAN_HQC192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + k = 1 << (PARAM_M - 1); // Step 6, 7 and error polynomial computation memcpy(w + k, v, 2 * k); @@ -542,7 +563,7 @@ void PQCLEAN_HQC192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { w[k] ^= u[0]; // Find other roots - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC192_CLEAN_gf_mul(betas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -561,21 +582,20 @@ void PQCLEAN_HQC192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { * @param[in] vector Array of size VEC_N1_SIZE_BYTES */ void PQCLEAN_HQC192_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint64_t *vector) { - uint16_t r[1 << PARAM_M]; - uint16_t gammas[PARAM_M - 1]; - uint16_t gammas_sums[1 << (PARAM_M - 1)]; - size_t k = 1 << (PARAM_M - 1); + uint16_t r[1 << PARAM_M] = {0}; + uint16_t gammas[PARAM_M - 1] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; + size_t i, j, k; // Unpack the received word vector into array r - size_t i; for (i = 0 ; i < VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0) ; ++i) { - for (size_t j = 0 ; j < 64 ; ++j) { + for (j = 0 ; j < 64 ; ++j) { r[64 * i + j] = (uint8_t) ((vector[i] >> j) & 1); } } // Last byte - for (size_t j = 0 ; j < PARAM_N1 % 64 ; ++j) { + for (j = 0 ; j < PARAM_N1 % 64 ; ++j) { r[64 * i + j] = (uint8_t) ((vector[i] >> j) & 1); } @@ -586,9 +606,10 @@ void PQCLEAN_HQC192_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint6 compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); // Twist and permute r adequately to obtain w + k = 1 << (PARAM_M - 1); w[0] = 0; w[k] = -r[0] & 1; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = -r[PQCLEAN_HQC192_CLEAN_gf_log(gammas_sums[i])] & gammas_sums[i]; w[k + i] = -r[PQCLEAN_HQC192_CLEAN_gf_log(gammas_sums[i] ^ 1)] & (gammas_sums[i] ^ 1); } @@ -603,25 +624,28 @@ void PQCLEAN_HQC192_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint6 * @param[in] w Array of size 2^PARAM_M */ void PQCLEAN_HQC192_CLEAN_fft_retrieve_bch_error_poly(uint64_t *error, const uint16_t *w) { - uint16_t gammas[PARAM_M - 1]; - uint16_t gammas_sums[1 << (PARAM_M - 1)]; - size_t k = 1 << (PARAM_M - 1); - size_t index = PARAM_GF_MUL_ORDER; + uint16_t gammas[PARAM_M - 1] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; + uint64_t bit; + size_t i, k, index; compute_fft_betas(gammas); compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); - error[0] ^= ((uint64_t) 1) ^ ((uint16_t) - w[0] >> 15); - uint64_t bit = ((uint64_t) 1) ^ ((uint16_t) - w[k] >> 15); + error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); + + k = 1 << (PARAM_M - 1); + index = PARAM_GF_MUL_ORDER; + bit = 1 ^ ((uint16_t) - w[k] >> 15); error[index / 8] ^= bit << (index % 64); - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC192_CLEAN_gf_log(gammas_sums[i]); - bit = ((uint64_t) 1) ^ ((uint16_t) - w[i] >> 15); + bit = 1 ^ ((uint16_t) - w[i] >> 15); error[index / 64] ^= bit << (index % 64); index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC192_CLEAN_gf_log(gammas_sums[i] ^ 1); - bit = ((uint64_t) 1) ^ ((uint16_t) - w[k + i] >> 15); + bit = 1 ^ ((uint16_t) - w[k + i] >> 15); error[index / 64] ^= bit << (index % 64); } } diff --git a/crypto_kem/hqc-256/clean/fft.c b/crypto_kem/hqc-256/clean/fft.c index 60583582..e4dca7de 100644 --- a/crypto_kem/hqc-256/clean/fft.c +++ b/crypto_kem/hqc-256/clean/fft.c @@ -19,8 +19,10 @@ static void compute_fft_betas(uint16_t *betas); static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size); static void radix_t(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_t m_f); +static void radix_t_big(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_t m_f); static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); @@ -30,7 +32,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[out] betas Array of size PARAM_M-1 */ static void compute_fft_betas(uint16_t *betas) { - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + size_t i; + for (i = 0 ; i < PARAM_M - 1 ; ++i) { betas[i] = 1 << (PARAM_M - 1 - i); } } @@ -48,10 +51,11 @@ static void compute_fft_betas(uint16_t *betas) { * @param[in] set_size Size of the array set */ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) { + size_t i, j; subset_sums[0] = 0; - for (size_t i = 0 ; i < set_size ; ++i) { - for (size_t j = 0 ; j < (1U << i) ; ++j) { + for (i = 0 ; i < set_size ; ++i) { + for (j = 0 ; j < (1U << i) ; ++j) { subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; } } @@ -90,7 +94,7 @@ static void radix_t(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_ f[13] = f[7] ^ f[9] ^ f[11] ^ f1[6]; f[14] = f[6] ^ f0[6] ^ f0[7] ^ f1[6]; f[15] = f[7] ^ f0[7] ^ f1[7]; - return; + break; case 3: f[0] = f0[0]; @@ -101,49 +105,53 @@ static void radix_t(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_ f[5] = f[3] ^ f1[2]; f[6] = f[4] ^ f0[3] ^ f1[2]; f[7] = f[3] ^ f0[3] ^ f1[3]; - return; + break; case 2: f[0] = f0[0]; f[1] = f1[0]; f[2] = f0[1] ^ f1[0]; f[3] = f[2] ^ f1[1]; - return; + break; case 1: f[0] = f0[0]; f[1] = f1[0]; - return; + break; default: - ; + radix_t_big(f, f0, f1, m_f); + break; + } +} - size_t n = 1 << (m_f - 2); +static void radix_t_big(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_t m_f) { + uint16_t Q0[1 << (PARAM_FFT_T - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT_T - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT_T - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT_T - 2)] = {0}; - uint16_t Q0[1 << (PARAM_FFT_T - 2)]; - uint16_t Q1[1 << (PARAM_FFT_T - 2)]; - uint16_t R0[1 << (PARAM_FFT_T - 2)]; - uint16_t R1[1 << (PARAM_FFT_T - 2)]; + uint16_t Q[1 << 2 * (PARAM_FFT_T - 2)] = {0}; + uint16_t R[1 << 2 * (PARAM_FFT_T - 2)] = {0}; - uint16_t Q[1 << 2 * (PARAM_FFT_T - 2)]; - uint16_t R[1 << 2 * (PARAM_FFT_T - 2)]; + size_t i, n; - memcpy(Q0, f0 + n, 2 * n); - memcpy(Q1, f1 + n, 2 * n); - memcpy(R0, f0, 2 * n); - memcpy(R1, f1, 2 * n); + n = 1 << (m_f - 2); + memcpy(Q0, f0 + n, 2 * n); + memcpy(Q1, f1 + n, 2 * n); + memcpy(R0, f0, 2 * n); + memcpy(R1, f1, 2 * n); - radix_t (Q, Q0, Q1, m_f - 1); - radix_t (R, R0, R1, m_f - 1); + radix_t (Q, Q0, Q1, m_f - 1); + radix_t (R, R0, R1, m_f - 1); - memcpy(f, R, 4 * n); - memcpy(f + 2 * n, R + n, 2 * n); - memcpy(f + 3 * n, Q + n, 2 * n); + memcpy(f, R, 4 * n); + memcpy(f + 2 * n, R + n, 2 * n); + memcpy(f + 3 * n, Q + n, 2 * n); - for (size_t i = 0 ; i < n ; ++i) { - f[2 * n + i] ^= Q[i]; - f[3 * n + i] ^= f[2 * n + i]; - } + for (i = 0 ; i < n ; ++i) { + f[2 * n + i] ^= Q[i]; + f[3 * n + i] ^= f[2 * n + i]; } } @@ -162,29 +170,31 @@ static void radix_t(uint16_t *f, const uint16_t *f0, const uint16_t *f1, uint32_ * @param[in] betas FFT constants */ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { - size_t k = 1 << (m - 1); - uint16_t gammas[PARAM_M - 2]; - uint16_t deltas[PARAM_M - 2]; - uint16_t gammas_sums[1 << (PARAM_M - 1)]; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; uint16_t u[1 << (PARAM_M - 2)] = {0}; uint16_t f0[1 << (PARAM_FFT_T - 2)] = {0}; uint16_t f1[1 << (PARAM_FFT_T - 2)] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t beta_m_pow; + + size_t i, j, k; // Step 1 if (m_f == 1) { f[0] = 0; - for (size_t i = 0 ; i < (1U << m) ; ++i) { + for (i = 0 ; i < (1U << m) ; ++i) { f[0] ^= w[i]; } f[1] = 0; - uint16_t betas_sums[1 << (PARAM_M - 1)]; betas_sums[0] = 0; - for (size_t j = 0 ; j < m ; ++j) { - for (size_t k = 0 ; k < (1U << j) ; ++k) { - size_t index = (1 << j) + k; - betas_sums[index] = betas_sums[k] ^ betas[j]; - f[1] ^= PQCLEAN_HQC256_CLEAN_gf_mul(betas_sums[index], w[index]); + for (j = 0 ; j < m ; ++j) { + for (k = 0 ; k < (1U << j) ; ++k) { + betas_sums[(1 << j) + k] = betas_sums[k] ^ betas[j]; + f[1] ^= PQCLEAN_HQC256_CLEAN_gf_mul(betas_sums[(1 << j) + k], w[(1 << j) + k]); } } @@ -192,7 +202,7 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m } // Compute gammas and deltas - for (uint8_t i = 0 ; i < m - 1 ; ++i) { + for (i = 0 ; i + 1 < m ; ++i) { gammas[i] = PQCLEAN_HQC256_CLEAN_gf_mul(betas[i], PQCLEAN_HQC256_CLEAN_gf_inverse(betas[m - 1])); deltas[i] = PQCLEAN_HQC256_CLEAN_gf_square(gammas[i]) ^ gammas[i]; } @@ -206,23 +216,22 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m * Transpose: * u[i] = w[i] + w[k+i] * v[i] = G[i].w[i] + (G[i]+1).w[k+i] = G[i].u[i] + w[k+i] */ + k = 1 << ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. if (f_coeffs <= 3) { // 3-coefficient polynomial f case // Step 5: Compute f0 from u and f1 from v f1[1] = 0; u[0] = w[0] ^ w[k]; f1[0] = w[k]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { u[i] = w[i] ^ w[k + i]; f1[0] ^= PQCLEAN_HQC256_CLEAN_gf_mul(gammas_sums[i], u[i]) ^ w[k + i]; } fft_t_rec(f0, u, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); } else { - uint16_t v[1 << (PARAM_M - 2)] = {0}; - u[0] = w[0] ^ w[k]; v[0] = w[k]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { u[i] = w[i] ^ w[k + i]; v[i] = PQCLEAN_HQC256_CLEAN_gf_mul(gammas_sums[i], u[i]) ^ w[k + i]; } @@ -237,8 +246,8 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m // Step 2: compute f from g if (betas[m - 1] != 1) { - uint16_t beta_m_pow = 1; - for (size_t i = 1 ; i < (1U << m_f) ; ++i) { + beta_m_pow = 1; + for (i = 1 ; i < (1U << m_f) ; ++i) { beta_m_pow = PQCLEAN_HQC256_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); f[i] = PQCLEAN_HQC256_CLEAN_gf_mul(beta_m_pow, f[i]); } @@ -261,14 +270,15 @@ static void fft_t_rec(uint16_t *f, const uint16_t *w, size_t f_coeffs, uint8_t m */ void PQCLEAN_HQC256_CLEAN_fft_t(uint16_t *f, const uint16_t *w, size_t f_coeffs) { // Transposed from Gao and Mateer algorithm - uint16_t betas[PARAM_M - 1]; - uint16_t betas_sums[1 << (PARAM_M - 1)]; - size_t k = 1 << (PARAM_M - 1); + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; uint16_t u[1 << (PARAM_M - 1)] = {0}; uint16_t v[1 << (PARAM_M - 1)] = {0}; - uint16_t deltas[PARAM_M - 1]; - uint16_t f0[1 << (PARAM_FFT_T - 1)]; - uint16_t f1[1 << (PARAM_FFT_T - 1)]; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t f0[1 << (PARAM_FFT_T - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT_T - 1)] = {0}; + + size_t i, k; compute_fft_betas(betas); compute_subset_sums(betas_sums, betas, PARAM_M - 1); @@ -281,15 +291,16 @@ void PQCLEAN_HQC256_CLEAN_fft_t(uint16_t *f, const uint16_t *w, size_t f_coeffs) * Transpose: * u[i] = w[i] + w[k+i] * v[i] = G[i].w[i] + (G[i]+1).w[k+i] = G[i].u[i] + w[k+i] */ + k = 1 << (PARAM_M - 1); u[0] = w[0] ^ w[k]; v[0] = w[k]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { u[i] = w[i] ^ w[k + i]; v[i] = PQCLEAN_HQC256_CLEAN_gf_mul(betas_sums[i], u[i]) ^ w[k + i]; } // Compute deltas - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + for (i = 0 ; i < PARAM_M - 1 ; ++i) { deltas[i] = PQCLEAN_HQC256_CLEAN_gf_square(betas[i]) ^ betas[i]; } @@ -337,7 +348,7 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[2] = f[3] ^ f1[1] ^ f0[3]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 3: f0[0] = f[0]; @@ -348,51 +359,56 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[3] = f[7]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 2: f0[0] = f[0]; f0[1] = f[2] ^ f[3]; f1[0] = f[1] ^ f0[1]; f1[1] = f[3]; - return; + break; case 1: f0[0] = f[0]; f1[0] = f[1]; - return; + break; default: - ; - size_t n = 1 << (m_f - 2); - - uint16_t Q[2 * (1 << (PARAM_FFT - 2))]; - uint16_t R[2 * (1 << (PARAM_FFT - 2))]; - - uint16_t Q0[1 << (PARAM_FFT - 2)]; - uint16_t Q1[1 << (PARAM_FFT - 2)]; - uint16_t R0[1 << (PARAM_FFT - 2)]; - uint16_t R1[1 << (PARAM_FFT - 2)]; - - memcpy(Q, f + 3 * n, 2 * n); - memcpy(Q + n, f + 3 * n, 2 * n); - memcpy(R, f, 4 * n); - - for (size_t i = 0 ; i < n ; ++i) { - Q[i] ^= f[2 * n + i]; - R[n + i] ^= Q[i]; - } - - radix(Q0, Q1, Q, m_f - 1); - radix(R0, R1, R, m_f - 1); - - memcpy(f0, R0, 2 * n); - memcpy(f0 + n, Q0, 2 * n); - memcpy(f1, R1, 2 * n); - memcpy(f1 + n, Q1, 2 * n); + radix_big(f0, f1, f, m_f); + break; } } +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1 << (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0 ; i < n ; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + /** @@ -408,25 +424,27 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { * @param[in] betas FFT constants */ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { - uint16_t f0[1 << (PARAM_FFT - 2)]; - uint16_t f1[1 << (PARAM_FFT - 2)]; - uint16_t gammas[PARAM_M - 2]; - uint16_t deltas[PARAM_M - 2]; - size_t k = 1 << (m - 1); - uint16_t gammas_sums[1 << (PARAM_M - 2)]; + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; uint16_t u[1 << (PARAM_M - 2)] = {0}; uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; // Step 1 if (m_f == 1) { - uint16_t tmp[PARAM_M - (PARAM_FFT - 1)]; - for (size_t i = 0 ; i < m ; ++i) { + for (i = 0 ; i < m ; ++i) { tmp[i] = PQCLEAN_HQC256_CLEAN_gf_mul(betas[i], f[1]); } w[0] = f[0]; - for (size_t j = 0 ; j < m ; ++j) { - for (size_t k = 0 ; k < (1U << j) ; ++k) { + for (j = 0 ; j < m ; ++j) { + for (k = 0 ; k < (1U << j) ; ++k) { w[(1 << j) + k] = w[k] ^ tmp[j]; } } @@ -436,8 +454,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 2: compute g if (betas[m - 1] != 1) { - uint16_t beta_m_pow = 1; - for (size_t i = 1 ; i < (1U << m_f) ; ++i) { + beta_m_pow = 1; + for (i = 1 ; i < (1U << m_f) ; ++i) { beta_m_pow = PQCLEAN_HQC256_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); f[i] = PQCLEAN_HQC256_CLEAN_gf_mul(beta_m_pow, f[i]); } @@ -447,7 +465,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 radix(f0, f1, f, m_f); // Step 4: compute gammas and deltas - for (uint8_t i = 0 ; i < m - 1 ; ++i) { + for (i = 0 ; i + 1 < m ; ++i) { gammas[i] = PQCLEAN_HQC256_CLEAN_gf_mul(betas[i], PQCLEAN_HQC256_CLEAN_gf_inverse(betas[m - 1])); deltas[i] = PQCLEAN_HQC256_CLEAN_gf_square(gammas[i]) ^ gammas[i]; } @@ -458,10 +476,11 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 5 fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + k = 1 << ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant w[0] = u[0]; w[k] = u[0] ^ f1[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC256_CLEAN_gf_mul(gammas_sums[i], f1[0]); w[k + i] = w[i] ^ f1[0]; } @@ -472,7 +491,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 memcpy(w + k, v, 2 * k); w[0] = u[0]; w[k] ^= u[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC256_CLEAN_gf_mul(gammas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -501,14 +520,15 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) */ void PQCLEAN_HQC256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { - uint16_t betas[PARAM_M - 1]; - uint16_t betas_sums[1 << (PARAM_M - 1)]; - uint16_t f0[1 << (PARAM_FFT - 1)]; - uint16_t f1[1 << (PARAM_FFT - 1)]; - uint16_t deltas[PARAM_M - 1]; - size_t k = 1 << (PARAM_M - 1); - uint16_t u[1 << (PARAM_M - 1)]; - uint16_t v[1 << (PARAM_M - 1)]; + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; // Follows Gao and Mateer algorithm compute_fft_betas(betas); @@ -524,7 +544,7 @@ void PQCLEAN_HQC256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { radix(f0, f1, f, PARAM_FFT); // Step 4: Compute deltas - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + for (i = 0 ; i < PARAM_M - 1 ; ++i) { deltas[i] = PQCLEAN_HQC256_CLEAN_gf_square(betas[i]) ^ betas[i]; } @@ -532,6 +552,7 @@ void PQCLEAN_HQC256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + k = 1 << (PARAM_M - 1); // Step 6, 7 and error polynomial computation memcpy(w + k, v, 2 * k); @@ -542,7 +563,7 @@ void PQCLEAN_HQC256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { w[k] ^= u[0]; // Find other roots - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC256_CLEAN_gf_mul(betas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -561,21 +582,20 @@ void PQCLEAN_HQC256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { * @param[in] vector Array of size VEC_N1_SIZE_BYTES */ void PQCLEAN_HQC256_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint64_t *vector) { - uint16_t r[1 << PARAM_M]; - uint16_t gammas[PARAM_M - 1]; - uint16_t gammas_sums[1 << (PARAM_M - 1)]; - size_t k = 1 << (PARAM_M - 1); + uint16_t r[1 << PARAM_M] = {0}; + uint16_t gammas[PARAM_M - 1] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; + size_t i, j, k; // Unpack the received word vector into array r - size_t i; for (i = 0 ; i < VEC_N1_SIZE_64 - (PARAM_N1 % 64 != 0) ; ++i) { - for (size_t j = 0 ; j < 64 ; ++j) { + for (j = 0 ; j < 64 ; ++j) { r[64 * i + j] = (uint8_t) ((vector[i] >> j) & 1); } } // Last byte - for (size_t j = 0 ; j < PARAM_N1 % 64 ; ++j) { + for (j = 0 ; j < PARAM_N1 % 64 ; ++j) { r[64 * i + j] = (uint8_t) ((vector[i] >> j) & 1); } @@ -586,9 +606,10 @@ void PQCLEAN_HQC256_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint6 compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); // Twist and permute r adequately to obtain w + k = 1 << (PARAM_M - 1); w[0] = 0; w[k] = -r[0] & 1; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = -r[PQCLEAN_HQC256_CLEAN_gf_log(gammas_sums[i])] & gammas_sums[i]; w[k + i] = -r[PQCLEAN_HQC256_CLEAN_gf_log(gammas_sums[i] ^ 1)] & (gammas_sums[i] ^ 1); } @@ -603,25 +624,28 @@ void PQCLEAN_HQC256_CLEAN_fft_t_preprocess_bch_codeword(uint16_t *w, const uint6 * @param[in] w Array of size 2^PARAM_M */ void PQCLEAN_HQC256_CLEAN_fft_retrieve_bch_error_poly(uint64_t *error, const uint16_t *w) { - uint16_t gammas[PARAM_M - 1]; - uint16_t gammas_sums[1 << (PARAM_M - 1)]; - size_t k = 1 << (PARAM_M - 1); - size_t index = PARAM_GF_MUL_ORDER; + uint16_t gammas[PARAM_M - 1] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; + uint64_t bit; + size_t i, k, index; compute_fft_betas(gammas); compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); - error[0] ^= ((uint64_t) 1) ^ ((uint16_t) - w[0] >> 15); - uint64_t bit = ((uint64_t) 1) ^ ((uint16_t) - w[k] >> 15); + error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); + + k = 1 << (PARAM_M - 1); + index = PARAM_GF_MUL_ORDER; + bit = 1 ^ ((uint16_t) - w[k] >> 15); error[index / 8] ^= bit << (index % 64); - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC256_CLEAN_gf_log(gammas_sums[i]); - bit = ((uint64_t) 1) ^ ((uint16_t) - w[i] >> 15); + bit = 1 ^ ((uint16_t) - w[i] >> 15); error[index / 64] ^= bit << (index % 64); index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC256_CLEAN_gf_log(gammas_sums[i] ^ 1); - bit = ((uint64_t) 1) ^ ((uint16_t) - w[k + i] >> 15); + bit = 1 ^ ((uint16_t) - w[k + i] >> 15); error[index / 64] ^= bit << (index % 64); } } diff --git a/crypto_kem/hqc-rmrs-128/clean/fft.c b/crypto_kem/hqc-rmrs-128/clean/fft.c index c00f1ac0..bdc5f248 100644 --- a/crypto_kem/hqc-rmrs-128/clean/fft.c +++ b/crypto_kem/hqc-rmrs-128/clean/fft.c @@ -18,6 +18,7 @@ static void compute_fft_betas(uint16_t *betas); static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size); static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); @@ -27,7 +28,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[out] betas Array of size PARAM_M-1 */ static void compute_fft_betas(uint16_t *betas) { - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + size_t i; + for (i = 0 ; i < PARAM_M - 1 ; ++i) { betas[i] = 1 << (PARAM_M - 1 - i); } } @@ -45,10 +47,11 @@ static void compute_fft_betas(uint16_t *betas) { * @param[in] set_size Size of the array set */ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) { + size_t i, j; subset_sums[0] = 0; - for (size_t i = 0 ; i < set_size ; ++i) { - for (size_t j = 0 ; j < (1U << i) ; ++j) { + for (i = 0 ; i < set_size ; ++i) { + for (j = 0 ; j < (1U << i) ; ++j) { subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; } } @@ -88,7 +91,7 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[2] = f[3] ^ f1[1] ^ f0[3]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 3: f0[0] = f[0]; @@ -99,51 +102,56 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[3] = f[7]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 2: f0[0] = f[0]; f0[1] = f[2] ^ f[3]; f1[0] = f[1] ^ f0[1]; f1[1] = f[3]; - return; + break; case 1: f0[0] = f[0]; f1[0] = f[1]; - return; + break; default: - ; - size_t n = 1 << (m_f - 2); - - uint16_t Q[2 * (1 << (PARAM_FFT - 2))]; - uint16_t R[2 * (1 << (PARAM_FFT - 2))]; - - uint16_t Q0[1 << (PARAM_FFT - 2)]; - uint16_t Q1[1 << (PARAM_FFT - 2)]; - uint16_t R0[1 << (PARAM_FFT - 2)]; - uint16_t R1[1 << (PARAM_FFT - 2)]; - - memcpy(Q, f + 3 * n, 2 * n); - memcpy(Q + n, f + 3 * n, 2 * n); - memcpy(R, f, 4 * n); - - for (size_t i = 0 ; i < n ; ++i) { - Q[i] ^= f[2 * n + i]; - R[n + i] ^= Q[i]; - } - - radix(Q0, Q1, Q, m_f - 1); - radix(R0, R1, R, m_f - 1); - - memcpy(f0, R0, 2 * n); - memcpy(f0 + n, Q0, 2 * n); - memcpy(f1, R1, 2 * n); - memcpy(f1 + n, Q1, 2 * n); + radix_big(f0, f1, f, m_f); + break; } } +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1 << (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0 ; i < n ; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + /** @@ -159,25 +167,27 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { * @param[in] betas FFT constants */ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { - uint16_t f0[1 << (PARAM_FFT - 2)]; - uint16_t f1[1 << (PARAM_FFT - 2)]; - uint16_t gammas[PARAM_M - 2]; - uint16_t deltas[PARAM_M - 2]; - size_t k = 1 << (m - 1); - uint16_t gammas_sums[1 << (PARAM_M - 2)]; + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; uint16_t u[1 << (PARAM_M - 2)] = {0}; uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; // Step 1 if (m_f == 1) { - uint16_t tmp[PARAM_M - (PARAM_FFT - 1)]; - for (size_t i = 0 ; i < m ; ++i) { + for (i = 0 ; i < m ; ++i) { tmp[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas[i], f[1]); } w[0] = f[0]; - for (size_t j = 0 ; j < m ; ++j) { - for (size_t k = 0 ; k < (1U << j) ; ++k) { + for (j = 0 ; j < m ; ++j) { + for (k = 0 ; k < (1U << j) ; ++k) { w[(1 << j) + k] = w[k] ^ tmp[j]; } } @@ -187,8 +197,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 2: compute g if (betas[m - 1] != 1) { - uint16_t beta_m_pow = 1; - for (size_t i = 1 ; i < (1U << m_f) ; ++i) { + beta_m_pow = 1; + for (i = 1 ; i < (1U << m_f) ; ++i) { beta_m_pow = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); f[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(beta_m_pow, f[i]); } @@ -198,7 +208,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 radix(f0, f1, f, m_f); // Step 4: compute gammas and deltas - for (uint8_t i = 0 ; i < m - 1 ; ++i) { + for (i = 0 ; i + 1 < m ; ++i) { gammas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas[i], PQCLEAN_HQCRMRS128_CLEAN_gf_inverse(betas[m - 1])); deltas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_square(gammas[i]) ^ gammas[i]; } @@ -209,10 +219,11 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 5 fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + k = 1 << ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant w[0] = u[0]; w[k] = u[0] ^ f1[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(gammas_sums[i], f1[0]); w[k + i] = w[i] ^ f1[0]; } @@ -223,7 +234,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 memcpy(w + k, v, 2 * k); w[0] = u[0]; w[k] ^= u[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(gammas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -252,14 +263,15 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) */ void PQCLEAN_HQCRMRS128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { - uint16_t betas[PARAM_M - 1]; - uint16_t betas_sums[1 << (PARAM_M - 1)]; - uint16_t f0[1 << (PARAM_FFT - 1)]; - uint16_t f1[1 << (PARAM_FFT - 1)]; - uint16_t deltas[PARAM_M - 1]; - size_t k = 1 << (PARAM_M - 1); - uint16_t u[1 << (PARAM_M - 1)]; - uint16_t v[1 << (PARAM_M - 1)]; + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; // Follows Gao and Mateer algorithm compute_fft_betas(betas); @@ -275,7 +287,7 @@ void PQCLEAN_HQCRMRS128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff radix(f0, f1, f, PARAM_FFT); // Step 4: Compute deltas - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + for (i = 0 ; i < PARAM_M - 1 ; ++i) { deltas[i] = PQCLEAN_HQCRMRS128_CLEAN_gf_square(betas[i]) ^ betas[i]; } @@ -283,6 +295,7 @@ void PQCLEAN_HQCRMRS128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + k = 1 << (PARAM_M - 1); // Step 6, 7 and error polynomial computation memcpy(w + k, v, 2 * k); @@ -293,7 +306,7 @@ void PQCLEAN_HQCRMRS128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff w[k] ^= u[0]; // Find other roots - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS128_CLEAN_gf_mul(betas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -311,17 +324,16 @@ void PQCLEAN_HQCRMRS128_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff void PQCLEAN_HQCRMRS128_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) { uint16_t gammas[PARAM_M - 1] = {0}; uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; - size_t k = 1 << (PARAM_M - 1); + size_t i, k, index; compute_fft_betas(gammas); compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); + k = 1 << (PARAM_M - 1); error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); - size_t index = PARAM_GF_MUL_ORDER; - - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { index = PARAM_GF_MUL_ORDER - PQCLEAN_HQCRMRS128_CLEAN_gf_log(gammas_sums[i]); error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); diff --git a/crypto_kem/hqc-rmrs-192/clean/fft.c b/crypto_kem/hqc-rmrs-192/clean/fft.c index 55bf9a56..3485d924 100644 --- a/crypto_kem/hqc-rmrs-192/clean/fft.c +++ b/crypto_kem/hqc-rmrs-192/clean/fft.c @@ -18,6 +18,7 @@ static void compute_fft_betas(uint16_t *betas); static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size); static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); @@ -27,7 +28,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[out] betas Array of size PARAM_M-1 */ static void compute_fft_betas(uint16_t *betas) { - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + size_t i; + for (i = 0 ; i < PARAM_M - 1 ; ++i) { betas[i] = 1 << (PARAM_M - 1 - i); } } @@ -45,10 +47,11 @@ static void compute_fft_betas(uint16_t *betas) { * @param[in] set_size Size of the array set */ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) { + size_t i, j; subset_sums[0] = 0; - for (size_t i = 0 ; i < set_size ; ++i) { - for (size_t j = 0 ; j < (1U << i) ; ++j) { + for (i = 0 ; i < set_size ; ++i) { + for (j = 0 ; j < (1U << i) ; ++j) { subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; } } @@ -88,7 +91,7 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[2] = f[3] ^ f1[1] ^ f0[3]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 3: f0[0] = f[0]; @@ -99,51 +102,56 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[3] = f[7]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 2: f0[0] = f[0]; f0[1] = f[2] ^ f[3]; f1[0] = f[1] ^ f0[1]; f1[1] = f[3]; - return; + break; case 1: f0[0] = f[0]; f1[0] = f[1]; - return; + break; default: - ; - size_t n = 1 << (m_f - 2); - - uint16_t Q[2 * (1 << (PARAM_FFT - 2))]; - uint16_t R[2 * (1 << (PARAM_FFT - 2))]; - - uint16_t Q0[1 << (PARAM_FFT - 2)]; - uint16_t Q1[1 << (PARAM_FFT - 2)]; - uint16_t R0[1 << (PARAM_FFT - 2)]; - uint16_t R1[1 << (PARAM_FFT - 2)]; - - memcpy(Q, f + 3 * n, 2 * n); - memcpy(Q + n, f + 3 * n, 2 * n); - memcpy(R, f, 4 * n); - - for (size_t i = 0 ; i < n ; ++i) { - Q[i] ^= f[2 * n + i]; - R[n + i] ^= Q[i]; - } - - radix(Q0, Q1, Q, m_f - 1); - radix(R0, R1, R, m_f - 1); - - memcpy(f0, R0, 2 * n); - memcpy(f0 + n, Q0, 2 * n); - memcpy(f1, R1, 2 * n); - memcpy(f1 + n, Q1, 2 * n); + radix_big(f0, f1, f, m_f); + break; } } +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1 << (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0 ; i < n ; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + /** @@ -159,25 +167,27 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { * @param[in] betas FFT constants */ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { - uint16_t f0[1 << (PARAM_FFT - 2)]; - uint16_t f1[1 << (PARAM_FFT - 2)]; - uint16_t gammas[PARAM_M - 2]; - uint16_t deltas[PARAM_M - 2]; - size_t k = 1 << (m - 1); - uint16_t gammas_sums[1 << (PARAM_M - 2)]; + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; uint16_t u[1 << (PARAM_M - 2)] = {0}; uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; // Step 1 if (m_f == 1) { - uint16_t tmp[PARAM_M - (PARAM_FFT - 1)]; - for (size_t i = 0 ; i < m ; ++i) { + for (i = 0 ; i < m ; ++i) { tmp[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(betas[i], f[1]); } w[0] = f[0]; - for (size_t j = 0 ; j < m ; ++j) { - for (size_t k = 0 ; k < (1U << j) ; ++k) { + for (j = 0 ; j < m ; ++j) { + for (k = 0 ; k < (1U << j) ; ++k) { w[(1 << j) + k] = w[k] ^ tmp[j]; } } @@ -187,8 +197,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 2: compute g if (betas[m - 1] != 1) { - uint16_t beta_m_pow = 1; - for (size_t i = 1 ; i < (1U << m_f) ; ++i) { + beta_m_pow = 1; + for (i = 1 ; i < (1U << m_f) ; ++i) { beta_m_pow = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); f[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(beta_m_pow, f[i]); } @@ -198,7 +208,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 radix(f0, f1, f, m_f); // Step 4: compute gammas and deltas - for (uint8_t i = 0 ; i < m - 1 ; ++i) { + for (i = 0 ; i + 1 < m ; ++i) { gammas[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_mul(betas[i], PQCLEAN_HQCRMRS192_CLEAN_gf_inverse(betas[m - 1])); deltas[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_square(gammas[i]) ^ gammas[i]; } @@ -209,10 +219,11 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 5 fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + k = 1 << ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant w[0] = u[0]; w[k] = u[0] ^ f1[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(gammas_sums[i], f1[0]); w[k + i] = w[i] ^ f1[0]; } @@ -223,7 +234,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 memcpy(w + k, v, 2 * k); w[0] = u[0]; w[k] ^= u[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(gammas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -252,14 +263,15 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) */ void PQCLEAN_HQCRMRS192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { - uint16_t betas[PARAM_M - 1]; - uint16_t betas_sums[1 << (PARAM_M - 1)]; - uint16_t f0[1 << (PARAM_FFT - 1)]; - uint16_t f1[1 << (PARAM_FFT - 1)]; - uint16_t deltas[PARAM_M - 1]; - size_t k = 1 << (PARAM_M - 1); - uint16_t u[1 << (PARAM_M - 1)]; - uint16_t v[1 << (PARAM_M - 1)]; + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; // Follows Gao and Mateer algorithm compute_fft_betas(betas); @@ -275,7 +287,7 @@ void PQCLEAN_HQCRMRS192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff radix(f0, f1, f, PARAM_FFT); // Step 4: Compute deltas - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + for (i = 0 ; i < PARAM_M - 1 ; ++i) { deltas[i] = PQCLEAN_HQCRMRS192_CLEAN_gf_square(betas[i]) ^ betas[i]; } @@ -283,6 +295,7 @@ void PQCLEAN_HQCRMRS192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + k = 1 << (PARAM_M - 1); // Step 6, 7 and error polynomial computation memcpy(w + k, v, 2 * k); @@ -293,7 +306,7 @@ void PQCLEAN_HQCRMRS192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff w[k] ^= u[0]; // Find other roots - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS192_CLEAN_gf_mul(betas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -311,17 +324,16 @@ void PQCLEAN_HQCRMRS192_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff void PQCLEAN_HQCRMRS192_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) { uint16_t gammas[PARAM_M - 1] = {0}; uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; - size_t k = 1 << (PARAM_M - 1); + size_t i, k, index; compute_fft_betas(gammas); compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); + k = 1 << (PARAM_M - 1); error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); - size_t index = PARAM_GF_MUL_ORDER; - - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { index = PARAM_GF_MUL_ORDER - PQCLEAN_HQCRMRS192_CLEAN_gf_log(gammas_sums[i]); error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); diff --git a/crypto_kem/hqc-rmrs-256/clean/fft.c b/crypto_kem/hqc-rmrs-256/clean/fft.c index 9631d529..4aef9823 100644 --- a/crypto_kem/hqc-rmrs-256/clean/fft.c +++ b/crypto_kem/hqc-rmrs-256/clean/fft.c @@ -18,6 +18,7 @@ static void compute_fft_betas(uint16_t *betas); static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size); static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); @@ -27,7 +28,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[out] betas Array of size PARAM_M-1 */ static void compute_fft_betas(uint16_t *betas) { - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + size_t i; + for (i = 0 ; i < PARAM_M - 1 ; ++i) { betas[i] = 1 << (PARAM_M - 1 - i); } } @@ -45,10 +47,11 @@ static void compute_fft_betas(uint16_t *betas) { * @param[in] set_size Size of the array set */ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) { + size_t i, j; subset_sums[0] = 0; - for (size_t i = 0 ; i < set_size ; ++i) { - for (size_t j = 0 ; j < (1U << i) ; ++j) { + for (i = 0 ; i < set_size ; ++i) { + for (j = 0 ; j < (1U << i) ; ++j) { subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; } } @@ -88,7 +91,7 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[2] = f[3] ^ f1[1] ^ f0[3]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 3: f0[0] = f[0]; @@ -99,51 +102,56 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[3] = f[7]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 2: f0[0] = f[0]; f0[1] = f[2] ^ f[3]; f1[0] = f[1] ^ f0[1]; f1[1] = f[3]; - return; + break; case 1: f0[0] = f[0]; f1[0] = f[1]; - return; + break; default: - ; - size_t n = 1 << (m_f - 2); - - uint16_t Q[2 * (1 << (PARAM_FFT - 2))]; - uint16_t R[2 * (1 << (PARAM_FFT - 2))]; - - uint16_t Q0[1 << (PARAM_FFT - 2)]; - uint16_t Q1[1 << (PARAM_FFT - 2)]; - uint16_t R0[1 << (PARAM_FFT - 2)]; - uint16_t R1[1 << (PARAM_FFT - 2)]; - - memcpy(Q, f + 3 * n, 2 * n); - memcpy(Q + n, f + 3 * n, 2 * n); - memcpy(R, f, 4 * n); - - for (size_t i = 0 ; i < n ; ++i) { - Q[i] ^= f[2 * n + i]; - R[n + i] ^= Q[i]; - } - - radix(Q0, Q1, Q, m_f - 1); - radix(R0, R1, R, m_f - 1); - - memcpy(f0, R0, 2 * n); - memcpy(f0 + n, Q0, 2 * n); - memcpy(f1, R1, 2 * n); - memcpy(f1 + n, Q1, 2 * n); + radix_big(f0, f1, f, m_f); + break; } } +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1 << (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0 ; i < n ; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + /** @@ -159,25 +167,27 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { * @param[in] betas FFT constants */ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { - uint16_t f0[1 << (PARAM_FFT - 2)]; - uint16_t f1[1 << (PARAM_FFT - 2)]; - uint16_t gammas[PARAM_M - 2]; - uint16_t deltas[PARAM_M - 2]; - size_t k = 1 << (m - 1); - uint16_t gammas_sums[1 << (PARAM_M - 2)]; + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; uint16_t u[1 << (PARAM_M - 2)] = {0}; uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; // Step 1 if (m_f == 1) { - uint16_t tmp[PARAM_M - (PARAM_FFT - 1)]; - for (size_t i = 0 ; i < m ; ++i) { + for (i = 0 ; i < m ; ++i) { tmp[i] = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(betas[i], f[1]); } w[0] = f[0]; - for (size_t j = 0 ; j < m ; ++j) { - for (size_t k = 0 ; k < (1U << j) ; ++k) { + for (j = 0 ; j < m ; ++j) { + for (k = 0 ; k < (1U << j) ; ++k) { w[(1 << j) + k] = w[k] ^ tmp[j]; } } @@ -187,8 +197,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 2: compute g if (betas[m - 1] != 1) { - uint16_t beta_m_pow = 1; - for (size_t i = 1 ; i < (1U << m_f) ; ++i) { + beta_m_pow = 1; + for (i = 1 ; i < (1U << m_f) ; ++i) { beta_m_pow = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(beta_m_pow, betas[m - 1]); f[i] = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(beta_m_pow, f[i]); } @@ -198,7 +208,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 radix(f0, f1, f, m_f); // Step 4: compute gammas and deltas - for (uint8_t i = 0 ; i < m - 1 ; ++i) { + for (i = 0 ; i + 1 < m ; ++i) { gammas[i] = PQCLEAN_HQCRMRS256_CLEAN_gf_mul(betas[i], PQCLEAN_HQCRMRS256_CLEAN_gf_inverse(betas[m - 1])); deltas[i] = PQCLEAN_HQCRMRS256_CLEAN_gf_square(gammas[i]) ^ gammas[i]; } @@ -209,10 +219,11 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 5 fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + k = 1 << ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant w[0] = u[0]; w[k] = u[0] ^ f1[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS256_CLEAN_gf_mul(gammas_sums[i], f1[0]); w[k + i] = w[i] ^ f1[0]; } @@ -223,7 +234,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 memcpy(w + k, v, 2 * k); w[0] = u[0]; w[k] ^= u[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS256_CLEAN_gf_mul(gammas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -252,14 +263,15 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) */ void PQCLEAN_HQCRMRS256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { - uint16_t betas[PARAM_M - 1]; - uint16_t betas_sums[1 << (PARAM_M - 1)]; - uint16_t f0[1 << (PARAM_FFT - 1)]; - uint16_t f1[1 << (PARAM_FFT - 1)]; - uint16_t deltas[PARAM_M - 1]; - size_t k = 1 << (PARAM_M - 1); - uint16_t u[1 << (PARAM_M - 1)]; - uint16_t v[1 << (PARAM_M - 1)]; + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; // Follows Gao and Mateer algorithm compute_fft_betas(betas); @@ -275,7 +287,7 @@ void PQCLEAN_HQCRMRS256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff radix(f0, f1, f, PARAM_FFT); // Step 4: Compute deltas - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + for (i = 0 ; i < PARAM_M - 1 ; ++i) { deltas[i] = PQCLEAN_HQCRMRS256_CLEAN_gf_square(betas[i]) ^ betas[i]; } @@ -283,6 +295,7 @@ void PQCLEAN_HQCRMRS256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + k = 1 << (PARAM_M - 1); // Step 6, 7 and error polynomial computation memcpy(w + k, v, 2 * k); @@ -293,7 +306,7 @@ void PQCLEAN_HQCRMRS256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff w[k] ^= u[0]; // Find other roots - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS256_CLEAN_gf_mul(betas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -311,17 +324,16 @@ void PQCLEAN_HQCRMRS256_CLEAN_fft(uint16_t *w, const uint16_t *f, size_t f_coeff void PQCLEAN_HQCRMRS256_CLEAN_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) { uint16_t gammas[PARAM_M - 1] = {0}; uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; - size_t k = 1 << (PARAM_M - 1); + size_t i, k, index; compute_fft_betas(gammas); compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); + k = 1 << (PARAM_M - 1); error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); - size_t index = PARAM_GF_MUL_ORDER; - - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { index = PARAM_GF_MUL_ORDER - PQCLEAN_HQCRMRS256_CLEAN_gf_log(gammas_sums[i]); error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); diff --git a/test/duplicate_consistency/hqc-rmrs-128_avx2.yml b/test/duplicate_consistency/hqc-rmrs-128_avx2.yml index a16db890..32258a03 100644 --- a/test/duplicate_consistency/hqc-rmrs-128_avx2.yml +++ b/test/duplicate_consistency/hqc-rmrs-128_avx2.yml @@ -11,7 +11,6 @@ consistency_checks: - reed_muller.h - reed_solomon.h - code.c - - fft.c - source: scheme: hqc-rmrs-192 implementation: clean @@ -23,7 +22,6 @@ consistency_checks: - reed_muller.h - reed_solomon.h - code.c - - fft.c - source: scheme: hqc-rmrs-192 implementation: avx2 @@ -56,7 +54,6 @@ consistency_checks: - reed_muller.h - reed_solomon.h - code.c - - fft.c - source: scheme: hqc-rmrs-256 implementation: avx2 diff --git a/test/duplicate_consistency/hqc-rmrs-128_clean.yml b/test/duplicate_consistency/hqc-rmrs-128_clean.yml index 4bed9a73..68beced3 100644 --- a/test/duplicate_consistency/hqc-rmrs-128_clean.yml +++ b/test/duplicate_consistency/hqc-rmrs-128_clean.yml @@ -11,7 +11,6 @@ consistency_checks: - reed_muller.h - reed_solomon.h - code.c - - fft.c - source: scheme: hqc-rmrs-192 implementation: clean @@ -45,7 +44,6 @@ consistency_checks: - reed_muller.h - reed_solomon.h - code.c - - fft.c - source: scheme: hqc-rmrs-256 implementation: clean @@ -79,4 +77,3 @@ consistency_checks: - reed_muller.h - reed_solomon.h - code.c - - fft.c diff --git a/test/duplicate_consistency/hqc-rmrs-192_avx2.yml b/test/duplicate_consistency/hqc-rmrs-192_avx2.yml index ab92b812..468c4448 100644 --- a/test/duplicate_consistency/hqc-rmrs-192_avx2.yml +++ b/test/duplicate_consistency/hqc-rmrs-192_avx2.yml @@ -11,7 +11,6 @@ consistency_checks: - reed_muller.h - reed_solomon.h - code.c - - fft.c - source: scheme: hqc-rmrs-256 implementation: clean @@ -23,7 +22,6 @@ consistency_checks: - reed_muller.h - reed_solomon.h - code.c - - fft.c - source: scheme: hqc-rmrs-256 implementation: avx2 diff --git a/test/duplicate_consistency/hqc-rmrs-192_clean.yml b/test/duplicate_consistency/hqc-rmrs-192_clean.yml index db7a8c32..58649dff 100644 --- a/test/duplicate_consistency/hqc-rmrs-192_clean.yml +++ b/test/duplicate_consistency/hqc-rmrs-192_clean.yml @@ -11,7 +11,6 @@ consistency_checks: - reed_muller.h - reed_solomon.h - code.c - - fft.c - source: scheme: hqc-rmrs-256 implementation: clean @@ -45,4 +44,3 @@ consistency_checks: - reed_muller.h - reed_solomon.h - code.c - - fft.c diff --git a/test/duplicate_consistency/hqc-rmrs-256_avx2.yml b/test/duplicate_consistency/hqc-rmrs-256_avx2.yml index 755728eb..c70728ab 100644 --- a/test/duplicate_consistency/hqc-rmrs-256_avx2.yml +++ b/test/duplicate_consistency/hqc-rmrs-256_avx2.yml @@ -11,4 +11,3 @@ consistency_checks: - reed_muller.h - reed_solomon.h - code.c - - fft.c diff --git a/test/duplicate_consistency/hqc-rmrs-256_clean.yml b/test/duplicate_consistency/hqc-rmrs-256_clean.yml index 895ecd4b..98ffa2ad 100644 --- a/test/duplicate_consistency/hqc-rmrs-256_clean.yml +++ b/test/duplicate_consistency/hqc-rmrs-256_clean.yml @@ -11,4 +11,3 @@ consistency_checks: - reed_muller.h - reed_solomon.h - code.c - - fft.c