pqc/crypto_kem/hqc-rmrs-128/avx2/reed_solomon.c

341 lines
29 KiB
C
Raw Normal View History

2020-09-07 19:23:34 +01:00
#include "fft.h"
#include "gf.h"
#include "parameters.h"
#include "reed_solomon.h"
#include <stdint.h>
#include <stdio.h>
#include <string.h>
/**
* @file reed_solomon.c
* Constant time implementation of Reed-Solomon codes
*/
static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw);
static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
static void compute_roots(uint8_t *error, uint16_t *sigma);
static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint8_t degree, const uint16_t *syndromes);
static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error);
static void correct_errors(uint8_t *cdw, const uint16_t *error_values);
static const uint16_t alpha_ij_pow [48][79] = {{2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, 76, 152, 45, 90, 180, 117, 234, 201, 143, 3, 6, 12, 24, 48, 96, 192, 157, 39, 78, 156, 37, 74, 148, 53, 106, 212, 181, 119, 238, 193, 159, 35, 70, 140, 5, 10, 20, 40, 80, 160, 93, 186, 105, 210, 185, 111, 222, 161, 95, 190, 97, 194, 153, 47, 94, 188, 101, 202, 137, 15, 30, 60, 120, 240}, {4, 16, 64, 29, 116, 205, 19, 76, 45, 180, 234, 143, 6, 24, 96, 157, 78, 37, 148, 106, 181, 238, 159, 70, 5, 20, 80, 93, 105, 185, 222, 95, 97, 153, 94, 101, 137, 30, 120, 253, 211, 107, 177, 254, 223, 91, 113, 217, 67, 17, 68, 13, 52, 208, 103, 129, 62, 248, 199, 59, 236, 151, 102, 133, 46, 184, 218, 79, 33, 132, 42, 168, 154, 82, 85, 73, 57, 228, 183}, {8, 64, 58, 205, 38, 45, 117, 143, 12, 96, 39, 37, 53, 181, 193, 70, 10, 80, 186, 185, 161, 97, 47, 101, 15, 120, 231, 107, 127, 223, 182, 217, 134, 68, 26, 208, 206, 62, 237, 59, 197, 102, 23, 184, 169, 33, 21, 168, 41, 85, 146, 228, 115, 191, 145, 252, 179, 241, 219, 150, 196, 110, 87, 130, 100, 7, 56, 221, 166, 89, 242, 195, 86, 138, 36, 61, 245, 251, 139}, {16, 29, 205, 76, 180, 143, 24, 157, 37, 106, 238, 70, 20, 93, 185, 95, 153, 101, 30, 253, 107, 254, 91, 217, 17, 13, 208, 129, 248, 59, 151, 133, 184, 79, 132, 168, 82, 73, 228, 230, 198, 252, 123, 227, 150, 149, 165, 130, 200, 28, 221, 81, 121, 195, 172, 18, 61, 247, 203, 44, 250, 27, 173, 2, 32, 58, 135, 152, 117, 3, 48, 39, 74, 212, 193, 140, 40, 186, 111}, {32, 116, 38, 180, 3, 96, 156, 106, 193, 5, 160, 185, 190, 94, 15, 253, 214, 223, 226, 17, 26, 103, 124, 59, 51, 46, 169, 132, 77, 85, 114, 230, 145, 215, 255, 150, 55, 174, 100, 28, 167, 89, 239, 172, 36, 244, 235, 44, 233, 108, 1, 32, 116, 38, 180, 3, 96, 156, 106, 193, 5, 160, 185, 190, 94, 15, 253, 214, 223, 226, 17, 26, 103, 124, 59, 51, 46, 169, 132}, {64, 205, 45, 143, 96, 37, 181, 70, 80, 185, 97, 101, 120, 107, 223, 217, 68, 208, 62, 59, 102, 184, 33, 168, 85, 228, 191, 252, 241, 150, 110, 130, 7, 221, 89, 195, 138, 61, 251, 44, 207, 173, 8, 58, 38, 117, 12, 39, 53, 193, 10, 186, 161, 47, 15, 231, 127, 182, 134, 26, 206, 237, 197, 23, 169, 21, 41, 146, 115, 145, 179, 219, 196, 87, 100, 56, 166, 242, 86}, {128, 19, 117, 24, 156, 181, 140, 93, 161, 94, 60, 107, 163, 67, 26, 129, 147, 102, 109, 132, 41, 57, 209, 252, 255, 98, 87, 200, 224, 89, 155, 18, 245, 11, 233, 173, 16, 232, 45, 3, 157, 53, 159, 40, 185, 194, 137, 231, 254, 226, 68, 189, 248, 197, 46, 158, 168, 170, 183, 145, 123, 75, 110, 25, 28, 166, 249, 69, 61, 235, 176, 54, 2, 29, 38, 234, 48, 37, 119}, {29, 76, 143, 157, 106, 70, 93, 95, 101, 253, 254, 217, 13, 129, 59, 133, 79, 168, 73, 230, 252, 227, 149, 130, 28, 81, 195, 18, 247, 44, 27, 2, 58, 152, 3, 39, 212, 140, 186, 190, 202, 231, 225, 175, 26, 31, 118, 23, 158, 77, 146, 209, 229, 219, 55, 25, 56, 162, 155, 36, 243, 88, 54, 4, 116, 45, 6, 78, 181, 5, 105, 97, 137, 211, 223, 67, 52, 62, 236}, {58, 45, 12, 37, 193, 80, 161, 101, 231, 223, 134, 208, 237, 102, 169, 168, 146, 191, 179, 150, 87, 7, 166, 195, 36, 251, 125, 173, 64, 38, 143, 39, 181, 10, 185, 47, 120, 127, 217, 26, 62, 197, 184, 21, 85, 115, 252, 219, 110, 100, 221, 242, 138, 245, 44, 54, 8, 205, 117, 96, 53, 70, 186, 97, 15, 107, 182, 68, 206, 59, 23, 33, 41, 228, 145, 241, 196, 130, 56}, {116, 180, 96, 106, 5, 185, 94, 253, 223, 17, 103, 59, 46, 132, 85, 230, 215, 150, 174, 28, 89, 172, 244, 44, 108, 32, 38, 3, 156, 193, 160, 190, 15, 214, 226, 26, 124, 51, 169, 77, 114, 145, 255, 55, 100, 167, 239, 36, 235, 233, 1, 116, 180, 96, 106, 5, 185, 94, 253, 223, 17, 103, 59, 46, 132, 85, 230, 215, 150, 174, 28, 89, 172, 244, 44, 108, 32, 38, 3}, {232, 234, 39, 238, 160, 97, 60, 254, 134, 103, 118, 184, 84, 57, 145, 227, 220, 7, 162, 172, 245, 176, 71, 58, 180, 192, 181, 40, 95, 15, 177, 175, 208, 147, 46, 21, 73, 99, 241, 55, 200, 166, 43, 122, 44, 216, 128, 45, 48, 106, 10, 222, 202, 107, 226, 52, 237, 133, 66, 85, 209, 123, 196, 50, 167, 195, 144, 11, 54, 32, 76, 12, 148, 140, 185, 188, 211, 182, 13}, {205, 143, 37, 70, 185, 101, 107, 217, 208, 59, 184, 168, 228, 252, 150, 130, 221,
/**
* @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes
*
* Following @cite lin1983error (Chapter 4 - Cyclic Codes),
* We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register
* with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code.
*
* @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message
* @param[in] msg Array of size VEC_K_SIZE_64 storing the message
*/
void PQCLEAN_HQCRMRS128_AVX2_reed_solomon_encode(uint64_t *cdw, const uint64_t *msg) {
uint8_t gate_value = 0;
uint16_t tmp[PARAM_G] = {0};
uint16_t PARAM_RS_POLY [] = {RS_POLY_COEFS};
uint8_t msg_bytes[PARAM_K] = {0};
uint8_t cdw_bytes[PARAM_N1] = {0};
2020-09-10 21:36:42 +01:00
for (size_t i = 0; i < VEC_K_SIZE_64; ++i) {
for (size_t j = 0; j < 8; ++j) {
2020-09-07 19:23:34 +01:00
msg_bytes[i * 8 + j] = (uint8_t) (msg[i] >> (j * 8));
}
}
2020-09-10 21:36:42 +01:00
for (int i = PARAM_K - 1; i >= 0; --i) {
2020-09-07 19:23:34 +01:00
gate_value = msg_bytes[i] ^ cdw_bytes[PARAM_N1 - PARAM_K - 1];
2020-09-10 21:36:42 +01:00
for (size_t j = 0; j < PARAM_G; ++j) {
2020-09-07 19:23:34 +01:00
tmp[j] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(gate_value, PARAM_RS_POLY[j]);
}
2020-09-10 21:36:42 +01:00
for (size_t k = PARAM_N1 - PARAM_K - 1; k; --k) {
2020-09-07 19:23:34 +01:00
cdw_bytes[k] = cdw_bytes[k - 1] ^ tmp[k];
}
cdw_bytes[0] = tmp[0];
}
memcpy(cdw_bytes + PARAM_N1 - PARAM_K, msg_bytes, PARAM_K);
memcpy(cdw, cdw_bytes, PARAM_N1);
}
/**
* @brief Computes 2 * PARAM_DELTA syndromes
*
* @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes
* @param[in] cdw Array of size PARAM_N1 storing the received vector
*/
void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) {
2020-09-10 21:36:42 +01:00
for (size_t i = 0; i < 2 * PARAM_DELTA; ++i) {
for (size_t j = 1; j < PARAM_N1; ++j) {
2020-09-07 19:23:34 +01:00
syndromes[i] ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(cdw[j], alpha_ij_pow[i][j - 1]);
}
syndromes[i] ^= cdw[0];
}
}
/**
* @brief Computes the error locator polynomial (ELP) sigma
*
* This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes). <br>
* We use the letter p for rho which is initialized at -1. <br>
* The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X). <br>
* Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p. <br>
* sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated. <br>
* We can properly correct only if the degree of sigma does not exceed PARAM_DELTA.
* This means only the first PARAM_DELTA + 1 coefficients of sigma are of value
* and we only need to save its first PARAM_DELTA - 1 coefficients.
*
* @returns the degree of the ELP sigma
* @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP
* @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes
*/
static size_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) {
sigma[0] = 1;
size_t deg_sigma = 0;
size_t deg_sigma_p = 0;
uint16_t sigma_copy[PARAM_DELTA + 1] = {0};
size_t deg_sigma_copy = 0;
uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1};
int32_t pp = -1; // 2*rho
uint16_t d_p = 1;
uint16_t d = syndromes[0];
2020-09-10 21:36:42 +01:00
for (size_t mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) {
2020-09-07 19:23:34 +01:00
// Save sigma in case we need it to update X_sigma_p
memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA));
deg_sigma_copy = deg_sigma;
uint16_t dd = PQCLEAN_HQCRMRS128_AVX2_gf_mul(d, PQCLEAN_HQCRMRS128_AVX2_gf_inverse(d_p));
2020-09-10 21:36:42 +01:00
for (size_t i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
2020-09-07 19:23:34 +01:00
sigma[i] ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(dd, X_sigma_p[i]);
}
size_t deg_X = mu - pp;
size_t deg_X_sigma_p = deg_X + deg_sigma_p;
// mask1 = 0xffff if(d != 0) and 0 otherwise
int16_t mask1 = -((uint16_t) - d >> 15);
// mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise
int16_t mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15);
// mask12 = 0xffff if the deg_sigma increased and 0 otherwise
int16_t mask12 = mask1 & mask2;
deg_sigma = (mask12 & deg_X_sigma_p) ^ (~mask12 & deg_sigma);
if (mu == (2 * PARAM_DELTA - 1)) {
break;
}
pp = (mask12 & mu) ^ (~mask12 & pp);
d_p = (mask12 & d) ^ (~mask12 & d_p);
2020-09-10 21:36:42 +01:00
for (size_t i = PARAM_DELTA; i; --i) {
2020-09-07 19:23:34 +01:00
X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]);
}
deg_sigma_p = (mask12 & deg_sigma_copy) ^ (~mask12 & deg_sigma_p);
d = syndromes[mu + 1];
2020-09-10 21:36:42 +01:00
for (size_t i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
2020-09-07 19:23:34 +01:00
d ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(sigma[i], syndromes[mu + 1 - i]);
}
}
return deg_sigma;
}
/**
* @brief Computes the error polynomial error from the error locator polynomial sigma
*
* See function PQCLEAN_HQCRMRS128_AVX2_fft for more details.
*
* @param[out] error Array of 2^PARAM_M elements receiving the error polynomial
* @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error
* @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
*/
static void compute_roots(uint8_t *error, uint16_t *sigma) {
uint16_t w[1 << PARAM_M] = {0};
PQCLEAN_HQCRMRS128_AVX2_fft(w, sigma, PARAM_DELTA + 1);
PQCLEAN_HQCRMRS128_AVX2_fft_retrieve_error_poly(error, w);
}
/**
* @brief Computes the polynomial z(x)
*
* See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
*
* @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x)
* @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
* @param[in] degree Integer that is the degree of polynomial sigma
* @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes
*/
static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint8_t degree, const uint16_t *syndromes) {
z[0] = 1;
2020-09-10 21:36:42 +01:00
for (size_t i = 1; i < PARAM_DELTA + 1; ++i) {
2020-09-07 19:23:34 +01:00
int16_t mask2 = -((uint16_t) (i - degree - 1) >> 15);
z[i] = ((uint16_t)mask2) & sigma[i];
}
z[1] ^= syndromes[0];
2020-09-10 21:36:42 +01:00
for (size_t i = 2; i <= PARAM_DELTA; ++i) {
2020-09-07 19:23:34 +01:00
int16_t mask2 = -((uint16_t) (i - degree - 1) >> 15);
z[i] ^= ((uint16_t)mask2 & syndromes[i - 1]);
2020-09-10 21:36:42 +01:00
for (size_t j = 1; j < i; ++j) {
2020-09-07 19:23:34 +01:00
z[i] ^= ((uint16_t)mask2) & PQCLEAN_HQCRMRS128_AVX2_gf_mul(sigma[j], syndromes[i - j - 1]);
}
}
}
/**
* @brief Computes the error values
*
* See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
*
* @param[out] error_values Array of PARAM_DELTA elements receiving the error values
* @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x)
* @param[in] z_degree Integer that is the degree of polynomial z(x)
* @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error
*/
static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) {
uint16_t beta_j[PARAM_DELTA] = {0};
uint16_t e_j[PARAM_DELTA] = {0};
uint16_t delta_counter = 0;
uint16_t delta_real_value;
// Compute the beta_{j_i} page 31 of the documentation
2020-09-10 21:36:42 +01:00
for (size_t i = 0; i < PARAM_N1; i++) {
2020-09-07 19:23:34 +01:00
uint16_t found = 0;
int16_t valuemask = ((int16_t) - (error[i] != 0)) >> 15;
2020-09-10 21:36:42 +01:00
for (size_t j = 0; j < PARAM_DELTA; j++) {
2020-09-07 19:23:34 +01:00
int16_t indexmask = ((int16_t) - (j == delta_counter)) >> 15;
beta_j[j] += indexmask & valuemask & gf_exp[i];
2020-09-07 19:23:34 +01:00
found += indexmask & valuemask & 1;
}
delta_counter += found;
}
delta_real_value = delta_counter;
// Compute the e_{j_i} page 31 of the documentation
2020-09-10 21:36:42 +01:00
for (size_t i = 0; i < PARAM_DELTA; ++i) {
2020-09-07 19:23:34 +01:00
uint16_t tmp1 = 1;
uint16_t tmp2 = 1;
uint16_t inverse = PQCLEAN_HQCRMRS128_AVX2_gf_inverse(beta_j[i]);
uint16_t inverse_power_j = 1;
2020-09-10 21:36:42 +01:00
for (size_t j = 1; j <= PARAM_DELTA; ++j) {
2020-09-07 19:23:34 +01:00
inverse_power_j = PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse_power_j, inverse);
tmp1 ^= PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse_power_j, z[j]);
}
2020-09-10 21:36:42 +01:00
for (size_t k = 1; k < PARAM_DELTA; ++k) {
2020-09-07 19:23:34 +01:00
tmp2 = PQCLEAN_HQCRMRS128_AVX2_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA])));
}
int16_t mask = ((int16_t) - (i < delta_real_value)) >> 15;
e_j[i] = mask & PQCLEAN_HQCRMRS128_AVX2_gf_mul(tmp1, PQCLEAN_HQCRMRS128_AVX2_gf_inverse(tmp2));
}
// Place the delta e_{j_i} values at the right coordinates of the output vector
delta_counter = 0;
2020-09-10 21:36:42 +01:00
for (size_t i = 0; i < PARAM_N1; ++i) {
2020-09-07 19:23:34 +01:00
uint16_t found = 0;
int16_t valuemask = ((int16_t) - (error[i] != 0)) >> 15;
2020-09-10 21:36:42 +01:00
for (size_t j = 0; j < PARAM_DELTA; j++) {
2020-09-07 19:23:34 +01:00
int16_t indexmask = ((int16_t) - (j == delta_counter)) >> 15;
error_values[i] += indexmask & valuemask & e_j[j];
found += indexmask & valuemask & 1;
}
delta_counter += found;
}
}
/**
* @brief Correct the errors
*
* @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector
* @param[in] error Array of the error vector
* @param[in] error_values Array of PARAM_DELTA elements storing the error values
*/
static void correct_errors(uint8_t *cdw, const uint16_t *error_values) {
2020-09-10 21:36:42 +01:00
for (size_t i = 0; i < PARAM_N1; ++i) {
2020-09-07 19:23:34 +01:00
cdw[i] ^= error_values[i];
}
}
/**
* @brief Decodes the received word
*
* This function relies on six steps:
* <ol>
* <li> The first step, is the computation of the 2*PARAM_DELTA syndromes.
* <li> The second step is the computation of the error-locator polynomial sigma.
* <li> The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses.
* <li> The fourth step, is the polynomial z(x).
* <li> The fifth step, is the computation of the error values.
* <li> The sixth step is the correction of the errors in the received polynomial.
* </ol>
* For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error
*
* @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message
* @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word
*/
void PQCLEAN_HQCRMRS128_AVX2_reed_solomon_decode(uint64_t *msg, uint64_t *cdw) {
uint8_t cdw_bytes[PARAM_N1] = {0};
uint16_t syndromes[2 * PARAM_DELTA] = {0};
uint16_t sigma[1 << PARAM_FFT] = {0};
uint8_t error[1 << PARAM_M] = {0};
uint16_t z[PARAM_N1] = {0};
uint16_t error_values[PARAM_N1] = {0};
// Copy the vector in an array of bytes
memcpy(cdw_bytes, cdw, PARAM_N1);
// Calculate the 2*PARAM_DELTA syndromes
compute_syndromes(syndromes, cdw_bytes);
// Compute the error locator polynomial sigma
// Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room
size_t deg = compute_elp(sigma, syndromes);
// Compute the error polynomial error
compute_roots(error, sigma);
// Compute the polynomial z(x)
compute_z_poly(z, sigma, deg, syndromes);
// Compute the error values
compute_error_values(error_values, z, error);
// Correct the errors
correct_errors(cdw_bytes, error_values);
// Retrieve the message from the decoded codeword
memcpy(msg, cdw_bytes + (PARAM_G - 1), PARAM_K);
}