#include "fft.h" #include "gf.h" #include "parameters.h" #include "reed_solomon.h" #include #include #include /** * @file reed_solomon.c * Constant time implementation of Reed-Solomon codes */ static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw); static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes); static void compute_roots(uint8_t *error, uint16_t *sigma); static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes); static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error); static void correct_errors(uint8_t *cdw, const uint16_t *error_values); /** * @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes * * Following @cite lin1983error (Chapter 4 - Cyclic Codes), * We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register * with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code. * * @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message * @param[in] msg Array of size VEC_K_SIZE_64 storing the message */ void PQCLEAN_HQCRMRS256_AVX2_reed_solomon_encode(uint64_t *cdw, const uint64_t *msg) { uint8_t gate_value = 0; uint16_t tmp[PARAM_G] = {0}; uint16_t PARAM_RS_POLY [] = {RS_POLY_COEFS}; uint8_t msg_bytes[PARAM_K] = {0}; uint8_t cdw_bytes[PARAM_N1] = {0}; for (size_t i = 0; i < VEC_K_SIZE_64; ++i) { for (size_t j = 0; j < 8; ++j) { msg_bytes[i * 8 + j] = (uint8_t) (msg[i] >> (j * 8)); } } for (int i = PARAM_K - 1; i >= 0; --i) { gate_value = msg_bytes[i] ^ cdw_bytes[PARAM_N1 - PARAM_K - 1]; for (size_t j = 0; j < PARAM_G; ++j) { tmp[j] = PQCLEAN_HQCRMRS256_AVX2_gf_mul(gate_value, PARAM_RS_POLY[j]); } for (size_t k = PARAM_N1 - PARAM_K - 1; k; --k) { cdw_bytes[k] = cdw_bytes[k - 1] ^ tmp[k]; } cdw_bytes[0] = tmp[0]; } memcpy(cdw_bytes + PARAM_N1 - PARAM_K, msg_bytes, PARAM_K); memcpy(cdw, cdw_bytes, PARAM_N1); } /** * @brief Computes 2 * PARAM_DELTA syndromes * * @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes * @param[in] cdw Array of size PARAM_N1 storing the received vector */ void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) { for (size_t i = 0; i < 2 * PARAM_DELTA; ++i) { for (size_t j = 1; j < PARAM_N1; ++j) { syndromes[i] ^= PQCLEAN_HQCRMRS256_AVX2_gf_mul(cdw[j], alpha_ij_pow[i][j - 1]); } syndromes[i] ^= cdw[0]; } } /** * @brief Computes the error locator polynomial (ELP) sigma * * This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes).
* We use the letter p for rho which is initialized at -1.
* The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X).
* Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p.
* sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated.
* We can properly correct only if the degree of sigma does not exceed PARAM_DELTA. * This means only the first PARAM_DELTA + 1 coefficients of sigma are of value * and we only need to save its first PARAM_DELTA - 1 coefficients. * * @returns the degree of the ELP sigma * @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP * @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes */ static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) { uint16_t deg_sigma = 0; uint16_t deg_sigma_p = 0; uint16_t deg_sigma_copy = 0; uint16_t sigma_copy[PARAM_DELTA + 1] = {0}; uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1}; uint16_t pp = -1; // 2*rho uint16_t d_p = 1; uint16_t d = syndromes[0]; uint16_t mask1, mask2, mask12; uint16_t deg_X, deg_X_sigma_p; uint16_t dd; uint16_t mu; uint16_t i; sigma[0] = 1; for (mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) { // Save sigma in case we need it to update X_sigma_p memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA)); deg_sigma_copy = deg_sigma; dd = PQCLEAN_HQCRMRS256_AVX2_gf_mul(d, PQCLEAN_HQCRMRS256_AVX2_gf_inverse(d_p)); for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { sigma[i] ^= PQCLEAN_HQCRMRS256_AVX2_gf_mul(dd, X_sigma_p[i]); } deg_X = mu - pp; deg_X_sigma_p = deg_X + deg_sigma_p; // mask1 = 0xffff if(d != 0) and 0 otherwise mask1 = -((uint16_t) - d >> 15); // mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15); // mask12 = 0xffff if the deg_sigma increased and 0 otherwise mask12 = mask1 & mask2; deg_sigma ^= mask12 & (deg_X_sigma_p ^ deg_sigma); if (mu == (2 * PARAM_DELTA - 1)) { break; } pp ^= mask12 & (mu ^ pp); d_p ^= mask12 & (d ^ d_p); for (i = PARAM_DELTA; i; --i) { X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]); } deg_sigma_p ^= mask12 & (deg_sigma_copy ^ deg_sigma_p); d = syndromes[mu + 1]; for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) { d ^= PQCLEAN_HQCRMRS256_AVX2_gf_mul(sigma[i], syndromes[mu + 1 - i]); } } return deg_sigma; } /** * @brief Computes the error polynomial error from the error locator polynomial sigma * * See function PQCLEAN_HQCRMRS256_AVX2_fft for more details. * * @param[out] error Array of 2^PARAM_M elements receiving the error polynomial * @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial */ static void compute_roots(uint8_t *error, uint16_t *sigma) { uint16_t w[1 << PARAM_M] = {0}; PQCLEAN_HQCRMRS256_AVX2_fft(w, sigma, PARAM_DELTA + 1); PQCLEAN_HQCRMRS256_AVX2_fft_retrieve_error_poly(error, w); } /** * @brief Computes the polynomial z(x) * * See @cite lin1983error (Chapter 6 - BCH Codes) for more details. * * @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x) * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial * @param[in] degree Integer that is the degree of polynomial sigma * @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes */ static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes) { size_t i, j; uint16_t mask; z[0] = 1; for (i = 1; i < PARAM_DELTA + 1; ++i) { mask = -((uint16_t) (i - degree - 1) >> 15); z[i] = mask & sigma[i]; } z[1] ^= syndromes[0]; for (i = 2; i <= PARAM_DELTA; ++i) { mask = -((uint16_t) (i - degree - 1) >> 15); z[i] ^= mask & syndromes[i - 1]; for (j = 1; j < i; ++j) { z[i] ^= mask & PQCLEAN_HQCRMRS256_AVX2_gf_mul(sigma[j], syndromes[i - j - 1]); } } } /** * @brief Computes the error values * * See @cite lin1983error (Chapter 6 - BCH Codes) for more details. * * @param[out] error_values Array of PARAM_DELTA elements receiving the error values * @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x) * @param[in] z_degree Integer that is the degree of polynomial z(x) * @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error */ static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) { uint16_t beta_j[PARAM_DELTA] = {0}; uint16_t e_j[PARAM_DELTA] = {0}; uint16_t delta_counter = 0; uint16_t delta_real_value; // Compute the beta_{j_i} page 31 of the documentation for (size_t i = 0; i < PARAM_N1; i++) { uint16_t found = 0; uint16_t valuemask = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 for (uint16_t j = 0; j < PARAM_DELTA; j++) { uint16_t indexmask = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter beta_j[j] += indexmask & valuemask & gf_exp[i]; found += indexmask & valuemask & 1; } delta_counter += found; } delta_real_value = delta_counter; // Compute the e_{j_i} page 31 of the documentation for (size_t i = 0; i < PARAM_DELTA; ++i) { uint16_t tmp1 = 1; uint16_t tmp2 = 1; uint16_t inverse = PQCLEAN_HQCRMRS256_AVX2_gf_inverse(beta_j[i]); uint16_t inverse_power_j = 1; for (size_t j = 1; j <= PARAM_DELTA; ++j) { inverse_power_j = PQCLEAN_HQCRMRS256_AVX2_gf_mul(inverse_power_j, inverse); tmp1 ^= PQCLEAN_HQCRMRS256_AVX2_gf_mul(inverse_power_j, z[j]); } for (size_t k = 1; k < PARAM_DELTA; ++k) { tmp2 = PQCLEAN_HQCRMRS256_AVX2_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS256_AVX2_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA]))); } uint16_t mask = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value e_j[i] = mask & PQCLEAN_HQCRMRS256_AVX2_gf_mul(tmp1, PQCLEAN_HQCRMRS256_AVX2_gf_inverse(tmp2)); } // Place the delta e_{j_i} values at the right coordinates of the output vector delta_counter = 0; for (size_t i = 0; i < PARAM_N1; ++i) { uint16_t found = 0; uint16_t valuemask = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0 for (size_t j = 0; j < PARAM_DELTA; j++) { uint16_t indexmask = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter error_values[i] += indexmask & valuemask & e_j[j]; found += indexmask & valuemask & 1; } delta_counter += found; } } /** * @brief Correct the errors * * @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector * @param[in] error Array of the error vector * @param[in] error_values Array of PARAM_DELTA elements storing the error values */ static void correct_errors(uint8_t *cdw, const uint16_t *error_values) { for (size_t i = 0; i < PARAM_N1; ++i) { cdw[i] ^= error_values[i]; } } /** * @brief Decodes the received word * * This function relies on six steps: *

The first step, is the computation of the 2*PARAM_DELTA syndromes. *
The second step is the computation of the error-locator polynomial sigma. *
The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses. *
The fourth step, is the polynomial z(x). *
The fifth step, is the computation of the error values. *
The sixth step is the correction of the errors in the received polynomial. *

* For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error * * @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message * @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word */ void PQCLEAN_HQCRMRS256_AVX2_reed_solomon_decode(uint64_t *msg, uint64_t *cdw) { uint8_t cdw_bytes[PARAM_N1] = {0}; uint16_t syndromes[2 * PARAM_DELTA] = {0}; uint16_t sigma[1 << PARAM_FFT] = {0}; uint8_t error[1 << PARAM_M] = {0}; uint16_t z[PARAM_N1] = {0}; uint16_t error_values[PARAM_N1] = {0}; uint16_t deg; // Copy the vector in an array of bytes memcpy(cdw_bytes, cdw, PARAM_N1); // Calculate the 2*PARAM_DELTA syndromes compute_syndromes(syndromes, cdw_bytes); // Compute the error locator polynomial sigma // Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room deg = compute_elp(sigma, syndromes); // Compute the error polynomial error compute_roots(error, sigma); // Compute the polynomial z(x) compute_z_poly(z, sigma, deg, syndromes); // Compute the error values compute_error_values(error_values, z, error); // Correct the errors correct_errors(cdw_bytes, error_values); // Retrieve the message from the decoded codeword memcpy(msg, cdw_bytes + (PARAM_G - 1), PARAM_K); }