From 1f4fa5ec3efd13cf9e5437e17f653bc8f6230ab1 Mon Sep 17 00:00:00 2001 From: "John M. Schanck" Date: Thu, 10 Sep 2020 16:26:03 -0400 Subject: [PATCH] compiler warnings --- crypto_kem/hqc-128/avx2/fft.c | 152 +++++++++++++++------------- crypto_kem/hqc-128/avx2/gf.c | 47 ++++----- crypto_kem/hqc-128/avx2/gf.h | 9 +- crypto_kem/hqc-128/avx2/gf2x.c | 32 ++---- crypto_kem/hqc-128/clean/gf.c | 55 ---------- crypto_kem/hqc-128/clean/gf.h | 15 ++- crypto_kem/hqc-192/avx2/fft.c | 152 +++++++++++++++------------- crypto_kem/hqc-192/avx2/gf.c | 47 ++++----- crypto_kem/hqc-192/avx2/gf.h | 9 +- crypto_kem/hqc-192/avx2/gf2x.c | 32 ++---- crypto_kem/hqc-192/clean/gf.c | 55 ---------- crypto_kem/hqc-192/clean/gf.h | 15 ++- crypto_kem/hqc-256/avx2/fft.c | 152 +++++++++++++++------------- crypto_kem/hqc-256/avx2/gf.c | 47 ++++----- crypto_kem/hqc-256/avx2/gf.h | 9 +- crypto_kem/hqc-256/avx2/gf2x.c | 36 ++----- crypto_kem/hqc-256/clean/gf.c | 55 ---------- crypto_kem/hqc-256/clean/gf.h | 15 ++- crypto_kem/hqc-rmrs-128/avx2/fft.c | 138 +++++++++++++------------ crypto_kem/hqc-rmrs-128/avx2/gf.c | 29 +++--- crypto_kem/hqc-rmrs-128/avx2/gf2x.c | 32 ++---- crypto_kem/hqc-rmrs-192/avx2/fft.c | 138 +++++++++++++------------ crypto_kem/hqc-rmrs-192/avx2/gf.c | 29 +++--- crypto_kem/hqc-rmrs-192/avx2/gf2x.c | 32 ++---- crypto_kem/hqc-rmrs-256/avx2/fft.c | 138 +++++++++++++------------ crypto_kem/hqc-rmrs-256/avx2/gf.c | 29 +++--- crypto_kem/hqc-rmrs-256/avx2/gf2x.c | 36 ++----- 27 files changed, 686 insertions(+), 849 deletions(-) diff --git a/crypto_kem/hqc-128/avx2/fft.c b/crypto_kem/hqc-128/avx2/fft.c index f402d991..9045c936 100644 --- a/crypto_kem/hqc-128/avx2/fft.c +++ b/crypto_kem/hqc-128/avx2/fft.c @@ -19,6 +19,7 @@ static void compute_fft_betas(uint16_t *betas); static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size); static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); @@ -28,7 +29,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[out] betas Array of size PARAM_M-1 */ static void compute_fft_betas(uint16_t *betas) { - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + size_t i; + for (i = 0 ; i < PARAM_M - 1 ; ++i) { betas[i] = 1 << (PARAM_M - 1 - i); } } @@ -46,10 +48,11 @@ static void compute_fft_betas(uint16_t *betas) { * @param[in] set_size Size of the array set */ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) { + size_t i, j; subset_sums[0] = 0; - for (size_t i = 0 ; i < set_size ; ++i) { - for (size_t j = 0 ; j < (1U << i) ; ++j) { + for (i = 0 ; i < set_size ; ++i) { + for (j = 0 ; j < (1U << i) ; ++j) { subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; } } @@ -89,7 +92,7 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[2] = f[3] ^ f1[1] ^ f0[3]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 3: f0[0] = f[0]; @@ -100,51 +103,56 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[3] = f[7]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 2: f0[0] = f[0]; f0[1] = f[2] ^ f[3]; f1[0] = f[1] ^ f0[1]; f1[1] = f[3]; - return; + break; case 1: f0[0] = f[0]; f1[0] = f[1]; - return; + break; default: - ; - size_t n = 1 << (m_f - 2); - - uint16_t Q[2 * (1 << (PARAM_FFT - 2))]; - uint16_t R[2 * (1 << (PARAM_FFT - 2))]; - - uint16_t Q0[1 << (PARAM_FFT - 2)]; - uint16_t Q1[1 << (PARAM_FFT - 2)]; - uint16_t R0[1 << (PARAM_FFT - 2)]; - uint16_t R1[1 << (PARAM_FFT - 2)]; - - memcpy(Q, f + 3 * n, 2 * n); - memcpy(Q + n, f + 3 * n, 2 * n); - memcpy(R, f, 4 * n); - - for (size_t i = 0 ; i < n ; ++i) { - Q[i] ^= f[2 * n + i]; - R[n + i] ^= Q[i]; - } - - radix(Q0, Q1, Q, m_f - 1); - radix(R0, R1, R, m_f - 1); - - memcpy(f0, R0, 2 * n); - memcpy(f0 + n, Q0, 2 * n); - memcpy(f1, R1, 2 * n); - memcpy(f1 + n, Q1, 2 * n); + radix_big(f0, f1, f, m_f); + break; } } +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1 << (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0 ; i < n ; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + /** @@ -160,25 +168,27 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { * @param[in] betas FFT constants */ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { - uint16_t f0[1 << (PARAM_FFT - 2)]; - uint16_t f1[1 << (PARAM_FFT - 2)]; - uint16_t gammas[PARAM_M - 2]; - uint16_t deltas[PARAM_M - 2]; - size_t k = 1 << (m - 1); - uint16_t gammas_sums[1 << (PARAM_M - 2)]; + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; uint16_t u[1 << (PARAM_M - 2)] = {0}; uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; // Step 1 if (m_f == 1) { - uint16_t tmp[PARAM_M - (PARAM_FFT - 1)]; - for (size_t i = 0 ; i < m ; ++i) { + for (i = 0 ; i < m ; ++i) { tmp[i] = PQCLEAN_HQC128_AVX2_gf_mul(betas[i], f[1]); } w[0] = f[0]; - for (size_t j = 0 ; j < m ; ++j) { - for (size_t k = 0 ; k < (1U << j) ; ++k) { + for (j = 0 ; j < m ; ++j) { + for (k = 0 ; k < (1U << j) ; ++k) { w[(1 << j) + k] = w[k] ^ tmp[j]; } } @@ -188,8 +198,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 2: compute g if (betas[m - 1] != 1) { - uint16_t beta_m_pow = 1; - for (size_t i = 1 ; i < (1U << m_f) ; ++i) { + beta_m_pow = 1; + for (i = 1 ; i < (1U << m_f) ; ++i) { beta_m_pow = PQCLEAN_HQC128_AVX2_gf_mul(beta_m_pow, betas[m - 1]); f[i] = PQCLEAN_HQC128_AVX2_gf_mul(beta_m_pow, f[i]); } @@ -199,7 +209,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 radix(f0, f1, f, m_f); // Step 4: compute gammas and deltas - for (uint8_t i = 0 ; i < m - 1 ; ++i) { + for (i = 0 ; i + 1 < m ; ++i) { gammas[i] = PQCLEAN_HQC128_AVX2_gf_mul(betas[i], PQCLEAN_HQC128_AVX2_gf_inverse(betas[m - 1])); deltas[i] = PQCLEAN_HQC128_AVX2_gf_square(gammas[i]) ^ gammas[i]; } @@ -210,10 +220,11 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 5 fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + k = 1 << ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant w[0] = u[0]; w[k] = u[0] ^ f1[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC128_AVX2_gf_mul(gammas_sums[i], f1[0]); w[k + i] = w[i] ^ f1[0]; } @@ -224,7 +235,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 memcpy(w + k, v, 2 * k); w[0] = u[0]; w[k] ^= u[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC128_AVX2_gf_mul(gammas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -253,14 +264,15 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) */ void PQCLEAN_HQC128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { - uint16_t betas[PARAM_M - 1]; - uint16_t betas_sums[1 << (PARAM_M - 1)]; - uint16_t f0[1 << (PARAM_FFT - 1)]; - uint16_t f1[1 << (PARAM_FFT - 1)]; - uint16_t deltas[PARAM_M - 1]; - size_t k = 1 << (PARAM_M - 1); - uint16_t u[1 << (PARAM_M - 1)]; - uint16_t v[1 << (PARAM_M - 1)]; + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; // Follows Gao and Mateer algorithm compute_fft_betas(betas); @@ -276,7 +288,7 @@ void PQCLEAN_HQC128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { radix(f0, f1, f, PARAM_FFT); // Step 4: Compute deltas - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + for (i = 0 ; i < PARAM_M - 1 ; ++i) { deltas[i] = PQCLEAN_HQC128_AVX2_gf_square(betas[i]) ^ betas[i]; } @@ -284,6 +296,7 @@ void PQCLEAN_HQC128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + k = 1 << (PARAM_M - 1); // Step 6, 7 and error polynomial computation memcpy(w + k, v, 2 * k); @@ -294,7 +307,7 @@ void PQCLEAN_HQC128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { w[k] ^= u[0]; // Find other roots - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC128_AVX2_gf_mul(betas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -309,25 +322,28 @@ void PQCLEAN_HQC128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { * @param[in] w Array of size 2^PARAM_M */ void PQCLEAN_HQC128_AVX2_fft_retrieve_bch_error_poly(uint64_t *error, const uint16_t *w) { - uint16_t gammas[PARAM_M - 1]; - uint16_t gammas_sums[1 << (PARAM_M - 1)]; - size_t k = 1 << (PARAM_M - 1); - size_t index = PARAM_GF_MUL_ORDER; + uint16_t gammas[PARAM_M - 1] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; + uint64_t bit; + size_t i, k, index; compute_fft_betas(gammas); compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); - error[0] ^= ((uint64_t) 1) ^ ((uint16_t) - w[0] >> 15); - uint64_t bit = ((uint64_t) 1) ^ ((uint16_t) - w[k] >> 15); + error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); + + k = 1 << (PARAM_M - 1); + index = PARAM_GF_MUL_ORDER; + bit = 1 ^ ((uint16_t) - w[k] >> 15); error[index / 8] ^= bit << (index % 64); - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC128_AVX2_gf_log(gammas_sums[i]); - bit = ((uint64_t) 1) ^ ((uint16_t) - w[i] >> 15); + bit = 1 ^ ((uint16_t) - w[i] >> 15); error[index / 64] ^= bit << (index % 64); index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC128_AVX2_gf_log(gammas_sums[i] ^ 1); - bit = ((uint64_t) 1) ^ ((uint16_t) - w[k + i] >> 15); + bit = 1 ^ ((uint16_t) - w[k + i] >> 15); error[index / 64] ^= bit << (index % 64); } } diff --git a/crypto_kem/hqc-128/avx2/gf.c b/crypto_kem/hqc-128/avx2/gf.c index 3022c732..ab6125e5 100644 --- a/crypto_kem/hqc-128/avx2/gf.c +++ b/crypto_kem/hqc-128/avx2/gf.c @@ -14,16 +14,6 @@ static uint16_t gf_quad(uint64_t a); -/** - * Logarithm of elements of GF(2^10) to the base alpha (root of x^10 + x^3 + 1). - * The logarithm of 0 is set to 1024 by convention. - */ -static const uint16_t log[1024] = { - 1024, 0, 1, 77, 2, 154, 78, 956, 3, 10, 155, 325, 79, 618, 957, 231, 4, 308, 11, 200, 156, 889, 326, 695, 80, 24, 619, 87, 958, 402, 232, 436, 5, 513, 309, 551, 12, 40, 201, 479, 157, 518, 890, 101, 327, 164, 696, 860, 81, 258, 25, 385, 620, 277, 88, 577, 959, 772, 403, 680, 233, 52, 437, 966, 6, 20, 514, 768, 310, 650, 552, 129, 13, 314, 41, 849, 202, 757, 480, 980, 158, 213, 519, 335, 891, 462, 102, 907, 328, 654, 165, 264, 697, 369, 861, 354, 82, 675, 259, 590, 26, 628, 386, 991, 621, 556, 278, 822, 89, 219, 578, 117, 960, 937, 773, 533, 404, 491, 681, 241, 234, 133, 53, 595, 438, 178, 967, 943, 7, 1020, 21, 305, 515, 510, 769, 255, 311, 17, 651, 210, 553, 672, 130, 934, 14, 1017, 315, 1014, 42, 610, 850, 191, 203, 318, 758, 31, 481, 428, 981, 568, 159, 613, 214, 752, 520, 667, 336, 788, 892, 45, 463, 801, 103, 525, 908, 705, 329, 194, 655, 1008, 166, 642, 265, 296, 698, 853, 370, 633, 862, 899, 355, 779, 83, 321, 676, 97, 260, 845, 591, 818, 27, 206, 629, 797, 387, 793, 992, 727, 622, 34, 557, 661, 279, 420, 823, 834, 90, 761, 220, 391, 579, 926, 118, 451, 961, 431, 938, 349, 774, 563, 534, 446, 405, 484, 492, 731, 682, 341, 242, 714, 235, 571, 134, 290, 54, 412, 596, 140, 439, 984, 179, 996, 968, 810, 944, 539, 8, 616, 1021, 152, 22, 400, 306, 887, 516, 162, 511, 38, 770, 50, 256, 275, 312, 755, 18, 648, 652, 367, 211, 460, 554, 217, 673, 626, 131, 176, 935, 489, 15, 670, 1018, 508, 316, 426, 1015, 608, 43, 523, 611, 665, 851, 897, 192, 640, 204, 791, 319, 843, 759, 924, 32, 418, 482, 339, 429, 561, 982, 808, 569, 410, 160, 48, 614, 398, 215, 174, 753, 365, 521, 895, 668, 424, 337, 806, 789, 922, 893, 804, 46, 172, 464, 872, 802, 870, 104, 466, 526, 283, 909, 874, 706, 736, 330, 528, 195, 380, 656, 285, 1009, 1003, 167, 106, 643, 838, 266, 468, 297, 66, 699, 708, 854, 111, 371, 738, 634, 60, 863, 911, 900, 827, 356, 876, 780, 497, 84, 197, 322, 74, 677, 382, 98, 548, 261, 332, 846, 765, 592, 530, 819, 587, 28, 1011, 207, 302, 630, 1005, 798, 749, 388, 658, 794, 94, 993, 287, 728, 346, 623, 645, 35, 149, 558, 840, 662, 505, 280, 169, 421, 395, 824, 108, 835, 377, 91, 299, 762, 71, 221, 68, 392, 146, 580, 268, 927, 224, 119, 470, 452, 687, 962, 856, 432, 227, 939, 113, 350, 976, 775, 701, 564, 930, 535, 710, 447, 723, 406, 636, 485, 271, 493, 62, 732, 918, 683, 373, 342, 583, 243, 740, 715, 719, 236, 902, 572, 690, 135, 829, 291, 186, 55, 865, 413, 455, 597, 913, 141, 744, 440, 782, 985, 473, 180, 499, 997, 602, 969, 358, 811, 122, 945, 878, 540, 247, 9, 324, 617, 230, 1022, 76, 153, 955, 23, 86, 401, 435, 307, 199, 888, 694, 517, 100, 163, 859, 512, 550, 39, 478, 771, 679, 51, 965, 257, 384, 276, 576, 313, 848, 756, 979, 19, 767, 649, 128, 653, 263, 368, 353, 212, 334, 461, 906, 555, 821, 218, 116, 674, 589, 627, 990, 132, 594, 177, 942, 936, 532, 490, 240, 16, 209, 671, 933, 1019, 304, 509, 254, 317, 30, 427, 567, 1016, 1013, 609, 190, 44, 800, 524, 704, 612, 751, 666, 787, 852, 632, 898, 778, 193, 1007, 641, 295, 205, 796, 792, 726, 320, 96, 844, 817, 760, 390, 925, 450, 33, 660, 419, 833, 483, 730, 340, 713, 430, 348, 562, 445, 983, 995, 809, 538, 570, 289, 411, 139, 161, 37, 49, 274, 615, 151, 399, 886, 216, 625, 175, 488, 754, 647, 366, 459, 522, 664, 896, 639, 669, 507, 425, 607, 338, 560, 807, 409, 790, 842, 923, 417, 894, 423, 805, 921, 47, 397, 173, 364, 465, 282, 873, 735, 803, 171, 871, 869, 105, 837, 467, 65, 527, 379, 284, 1002, 910, 826, 875, 496, 707, 110, 737, 59, 331, 764, 529, 586, 196, 73, 381, 547, 657, 93, 286, 345, 1010, 301, 1004, 748, 168, 394, 107, 376, 644, 148, 839, 504, 267, 223, 469, 686, 298, 70, 67, 145, 700, 929, 709, 722, 855, 226, 112, 975, 372, 582, 739, 718, 635, 270, 61, 917, 864, 454, 912, 743, 901, 689, 828, 185, 357, 121, 877, 246, 781, 472, 498, 601, 85, 434, 198, 693, 323, 229, 75, 954, 678, 964, 383, 575, 99, 858, 549, 477, 262, 352, 333, 905, 847, 978, 766, 127, 593, 941, 531, 239, 820, 115, 588, 989, 29, 566, 1012, 189, 208, 932, 303, 253, 631, 777, 1006, 294, 799, 703, 750, 786, 389, 449, 659, 832, 795, 725, 95, 816, 994, 537, 288, 138, 729, 712, 347, 444, 624, 487, 646, 458, 36, 273, 150, 885, 559, 408, 841, 416, 663, 638, 506, 606, 281, 734, 170, 868, 422, 920, 396, 363, 825, 495, 109, 58, 836, 64, 378, 1001, 92, 344, 300, 747, 763, 585, 72, 546, 222, 685, 69, 144, 393, 375, 147, 503, 581, 717, 269, 916, 928, 721, 225, 974, 120, 245, 471, 600, 453, 742, 688, 184, 963, 574, 857, 476, 433, 692, 228, 953, 940, 238, 114, 988, 351, 904, 977, 126, 776, 293, 702, 785, 565, 188, 931, 252, 536, 137, 711, 443, 448, 831, 724, 815, 407, 415, 637, 605, 486, 457, 272, 884, 494, 57, 63, 1000, 733, 867, 919, 362, 684, 143, 374, 502, 343, 746, 584, 545, 244, 599, 741, 183, 716, 915, 720, 973, 237, 987, 903, 125, 573, 475, 691, 952, 136, 442, 830, 814, 292, 784, 187, 251, 56, 999, 866, 361, 414, 604, 456, 883, 598, 182, 914, 972, 142, 501, 745, 544, 441, 813, 783, 250, 986, 124, 474, 951, 181, 971, 500, 543, 998, 360, 603, 882, 970, 542, 359, 881, 812, 249, 123, 950, 946, 947, 879, 948, 541, 880, 248, 949 -}; - - - /** * Returns the integer i such that elt = a^i * where a is the primitive element of GF(2^GF_M). @@ -41,30 +31,29 @@ uint16_t PQCLEAN_HQC128_AVX2_gf_log(uint16_t elt) { * @param[in] x Polynomial of degree less than 64 * @param[in] deg_x The degree of polynomial x */ -uint16_t gf_reduce(uint64_t x, size_t deg_x) { - // Compute the distance between the primitive polynomial first two set bits - size_t lz1 = __builtin_clz(PARAM_GF_POLY); - size_t lz2 = __builtin_clz(PARAM_GF_POLY ^ 1 << PARAM_M); - size_t dist = lz2 - lz1; +static uint16_t gf_reduce(uint64_t x, size_t deg_x) { + uint16_t z1, z2, rmdr, dist; + uint64_t mod; + size_t steps, i, j; // Deduce the number of steps of reduction - size_t steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), dist); + steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), PARAM_GF_POLY_M2); // Reduce - for (size_t i = 0; i < steps; ++i) { - uint64_t mod = x >> PARAM_M; + for (i = 0; i < steps; ++i) { + mod = x >> PARAM_M; x &= (1 << PARAM_M) - 1; x ^= mod; - size_t tz1 = 0; - uint16_t rmdr = PARAM_GF_POLY ^ 1; - for (size_t j = __builtin_popcount(PARAM_GF_POLY) - 2; j; --j) { - size_t tz2 = __builtin_ctz(rmdr); - size_t shift = tz2 - tz1; - mod <<= shift; + z1 = 0; + rmdr = PARAM_GF_POLY ^ 1; + for (j = PARAM_GF_POLY_WT - 2; j; --j) { + z2 = __tzcnt_u16(rmdr); + dist = (uint16_t) (z2 - z1); + mod <<= dist; x ^= mod; - rmdr ^= 1 << tz2; - tz1 = tz2; + rmdr ^= 1 << z2; + z1 = z2; } } @@ -113,7 +102,7 @@ uint16_t PQCLEAN_HQC128_AVX2_gf_square(uint16_t a) { * @returns a^4 * @param[in] a Element of GF(2^GF_M) */ -uint16_t gf_quad(uint64_t a) { +static uint16_t gf_quad(uint64_t a) { uint64_t q = a & 1; for (size_t i = 1; i < PARAM_M; ++i) { a <<= 3; @@ -158,10 +147,10 @@ uint16_t PQCLEAN_HQC128_AVX2_gf_inverse(uint16_t a) { * @param[in] i The integer whose modulo is taken */ uint16_t PQCLEAN_HQC128_AVX2_gf_mod(uint16_t i) { - uint16_t tmp = i - PARAM_GF_MUL_ORDER; + uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER); // mask = 0xffff if (i < GF_MUL_ORDER) - int16_t mask = -(tmp >> 15); + uint16_t mask = -(tmp >> 15); return tmp + (mask & PARAM_GF_MUL_ORDER); } diff --git a/crypto_kem/hqc-128/avx2/gf.h b/crypto_kem/hqc-128/avx2/gf.h index e08d8403..1f1c76a4 100644 --- a/crypto_kem/hqc-128/avx2/gf.h +++ b/crypto_kem/hqc-128/avx2/gf.h @@ -12,8 +12,13 @@ #include #include -void PQCLEAN_HQC128_AVX2_gf_generate(uint16_t *exp, uint16_t *log, int16_t m); - +/** + * Logarithm of elements of GF(2^10) to the base alpha (root of x^10 + x^3 + 1). + * The logarithm of 0 is set to 1024 by convention. + */ +static const uint16_t log[1024] = { + 1024, 0, 1, 77, 2, 154, 78, 956, 3, 10, 155, 325, 79, 618, 957, 231, 4, 308, 11, 200, 156, 889, 326, 695, 80, 24, 619, 87, 958, 402, 232, 436, 5, 513, 309, 551, 12, 40, 201, 479, 157, 518, 890, 101, 327, 164, 696, 860, 81, 258, 25, 385, 620, 277, 88, 577, 959, 772, 403, 680, 233, 52, 437, 966, 6, 20, 514, 768, 310, 650, 552, 129, 13, 314, 41, 849, 202, 757, 480, 980, 158, 213, 519, 335, 891, 462, 102, 907, 328, 654, 165, 264, 697, 369, 861, 354, 82, 675, 259, 590, 26, 628, 386, 991, 621, 556, 278, 822, 89, 219, 578, 117, 960, 937, 773, 533, 404, 491, 681, 241, 234, 133, 53, 595, 438, 178, 967, 943, 7, 1020, 21, 305, 515, 510, 769, 255, 311, 17, 651, 210, 553, 672, 130, 934, 14, 1017, 315, 1014, 42, 610, 850, 191, 203, 318, 758, 31, 481, 428, 981, 568, 159, 613, 214, 752, 520, 667, 336, 788, 892, 45, 463, 801, 103, 525, 908, 705, 329, 194, 655, 1008, 166, 642, 265, 296, 698, 853, 370, 633, 862, 899, 355, 779, 83, 321, 676, 97, 260, 845, 591, 818, 27, 206, 629, 797, 387, 793, 992, 727, 622, 34, 557, 661, 279, 420, 823, 834, 90, 761, 220, 391, 579, 926, 118, 451, 961, 431, 938, 349, 774, 563, 534, 446, 405, 484, 492, 731, 682, 341, 242, 714, 235, 571, 134, 290, 54, 412, 596, 140, 439, 984, 179, 996, 968, 810, 944, 539, 8, 616, 1021, 152, 22, 400, 306, 887, 516, 162, 511, 38, 770, 50, 256, 275, 312, 755, 18, 648, 652, 367, 211, 460, 554, 217, 673, 626, 131, 176, 935, 489, 15, 670, 1018, 508, 316, 426, 1015, 608, 43, 523, 611, 665, 851, 897, 192, 640, 204, 791, 319, 843, 759, 924, 32, 418, 482, 339, 429, 561, 982, 808, 569, 410, 160, 48, 614, 398, 215, 174, 753, 365, 521, 895, 668, 424, 337, 806, 789, 922, 893, 804, 46, 172, 464, 872, 802, 870, 104, 466, 526, 283, 909, 874, 706, 736, 330, 528, 195, 380, 656, 285, 1009, 1003, 167, 106, 643, 838, 266, 468, 297, 66, 699, 708, 854, 111, 371, 738, 634, 60, 863, 911, 900, 827, 356, 876, 780, 497, 84, 197, 322, 74, 677, 382, 98, 548, 261, 332, 846, 765, 592, 530, 819, 587, 28, 1011, 207, 302, 630, 1005, 798, 749, 388, 658, 794, 94, 993, 287, 728, 346, 623, 645, 35, 149, 558, 840, 662, 505, 280, 169, 421, 395, 824, 108, 835, 377, 91, 299, 762, 71, 221, 68, 392, 146, 580, 268, 927, 224, 119, 470, 452, 687, 962, 856, 432, 227, 939, 113, 350, 976, 775, 701, 564, 930, 535, 710, 447, 723, 406, 636, 485, 271, 493, 62, 732, 918, 683, 373, 342, 583, 243, 740, 715, 719, 236, 902, 572, 690, 135, 829, 291, 186, 55, 865, 413, 455, 597, 913, 141, 744, 440, 782, 985, 473, 180, 499, 997, 602, 969, 358, 811, 122, 945, 878, 540, 247, 9, 324, 617, 230, 1022, 76, 153, 955, 23, 86, 401, 435, 307, 199, 888, 694, 517, 100, 163, 859, 512, 550, 39, 478, 771, 679, 51, 965, 257, 384, 276, 576, 313, 848, 756, 979, 19, 767, 649, 128, 653, 263, 368, 353, 212, 334, 461, 906, 555, 821, 218, 116, 674, 589, 627, 990, 132, 594, 177, 942, 936, 532, 490, 240, 16, 209, 671, 933, 1019, 304, 509, 254, 317, 30, 427, 567, 1016, 1013, 609, 190, 44, 800, 524, 704, 612, 751, 666, 787, 852, 632, 898, 778, 193, 1007, 641, 295, 205, 796, 792, 726, 320, 96, 844, 817, 760, 390, 925, 450, 33, 660, 419, 833, 483, 730, 340, 713, 430, 348, 562, 445, 983, 995, 809, 538, 570, 289, 411, 139, 161, 37, 49, 274, 615, 151, 399, 886, 216, 625, 175, 488, 754, 647, 366, 459, 522, 664, 896, 639, 669, 507, 425, 607, 338, 560, 807, 409, 790, 842, 923, 417, 894, 423, 805, 921, 47, 397, 173, 364, 465, 282, 873, 735, 803, 171, 871, 869, 105, 837, 467, 65, 527, 379, 284, 1002, 910, 826, 875, 496, 707, 110, 737, 59, 331, 764, 529, 586, 196, 73, 381, 547, 657, 93, 286, 345, 1010, 301, 1004, 748, 168, 394, 107, 376, 644, 148, 839, 504, 267, 223, 469, 686, 298, 70, 67, 145, 700, 929, 709, 722, 855, 226, 112, 975, 372, 582, 739, 718, 635, 270, 61, 917, 864, 454, 912, 743, 901, 689, 828, 185, 357, 121, 877, 246, 781, 472, 498, 601, 85, 434, 198, 693, 323, 229, 75, 954, 678, 964, 383, 575, 99, 858, 549, 477, 262, 352, 333, 905, 847, 978, 766, 127, 593, 941, 531, 239, 820, 115, 588, 989, 29, 566, 1012, 189, 208, 932, 303, 253, 631, 777, 1006, 294, 799, 703, 750, 786, 389, 449, 659, 832, 795, 725, 95, 816, 994, 537, 288, 138, 729, 712, 347, 444, 624, 487, 646, 458, 36, 273, 150, 885, 559, 408, 841, 416, 663, 638, 506, 606, 281, 734, 170, 868, 422, 920, 396, 363, 825, 495, 109, 58, 836, 64, 378, 1001, 92, 344, 300, 747, 763, 585, 72, 546, 222, 685, 69, 144, 393, 375, 147, 503, 581, 717, 269, 916, 928, 721, 225, 974, 120, 245, 471, 600, 453, 742, 688, 184, 963, 574, 857, 476, 433, 692, 228, 953, 940, 238, 114, 988, 351, 904, 977, 126, 776, 293, 702, 785, 565, 188, 931, 252, 536, 137, 711, 443, 448, 831, 724, 815, 407, 415, 637, 605, 486, 457, 272, 884, 494, 57, 63, 1000, 733, 867, 919, 362, 684, 143, 374, 502, 343, 746, 584, 545, 244, 599, 741, 183, 716, 915, 720, 973, 237, 987, 903, 125, 573, 475, 691, 952, 136, 442, 830, 814, 292, 784, 187, 251, 56, 999, 866, 361, 414, 604, 456, 883, 598, 182, 914, 972, 142, 501, 745, 544, 441, 813, 783, 250, 986, 124, 474, 951, 181, 971, 500, 543, 998, 360, 603, 882, 970, 542, 359, 881, 812, 249, 123, 950, 946, 947, 879, 948, 541, 880, 248, 949 +}; uint16_t PQCLEAN_HQC128_AVX2_gf_log(uint16_t elt); diff --git a/crypto_kem/hqc-128/avx2/gf2x.c b/crypto_kem/hqc-128/avx2/gf2x.c index 1eb9ca2d..86b2fe76 100644 --- a/crypto_kem/hqc-128/avx2/gf2x.c +++ b/crypto_kem/hqc-128/avx2/gf2x.c @@ -328,9 +328,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { static __m256i W0[2 * (T_TM3_3W_256)], W1[2 * (T_TM3_3W_256)], W2[2 * (T_TM3_3W_256)], W3[2 * (T_TM3_3W_256)], W4[2 * (T_TM3_3W_256)]; static __m256i tmp[2 * (T_TM3_3W_256)]; static __m256i ro256[6 * (T_TM3_3W_256)]; - const __m256i zero = (__m256i) { - 0ul, 0ul, 0ul, 0ul - }; + const __m256i zero = _mm256_setzero_si256(); int32_t T2 = T_TM3_3W_64 << 1; for (int32_t i = 0 ; i < T_TM3_3W_256 - 1 ; i++) { @@ -347,24 +345,12 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { for (int32_t i = T_TM3_3W_256 - 1 ; i < T_TM3_3W_256 ; i++) { int32_t i4 = i << 2; int32_t i41 = i4 + 1; - U0[i] = (__m256i) { - A[i4], A[i41], 0x0ul, 0x0ul - }; - V0[i] = (__m256i) { - B[i4], B[i41], 0x0ul, 0x0ul - }; - U1[i] = (__m256i) { - A[i4 + T_TM3_3W_64 - 2], A[i41 + T_TM3_3W_64 - 2], 0x0ul, 0x0ul - }; - V1[i] = (__m256i) { - B[i4 + T_TM3_3W_64 - 2], B[i41 + T_TM3_3W_64 - 2], 0x0ul, 0x0ul - }; - U2[i] = (__m256i) { - A[i4 - 4 + T2], A[i4 - 3 + T2], 0x0ul, 0x0ul - }; - V2[i] = (__m256i) { - B[i4 - 4 + T2], B[i4 - 3 + T2], 0x0ul, 0x0ul - }; + U0[i] = _mm256_set_epi64x(0, 0, A[i41], A[i4]); + V0[i] = _mm256_set_epi64x(0, 0, B[i41], B[i4]); + U1[i] = _mm256_set_epi64x(0, 0, A[i41 + T_TM3_3W_64 - 2], A[i4 + T_TM3_3W_64 - 2]); + V1[i] = _mm256_set_epi64x(0, 0, B[i41 + T_TM3_3W_64 - 2], B[i4 + T_TM3_3W_64 - 2]); + U2[i] = _mm256_set_epi64x(0, 0, A[i4 - 3 + T2], A[i4 - 4 + T2]); + V2[i] = _mm256_set_epi64x(0, 0, B[i4 - 3 + T2], B[i4 - 4 + T2]); } // Evaluation phase : x= X^64 @@ -452,9 +438,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { //W2 =(W2 + W3 + W4*(x^3+1))/(x+1) U1_64 = ((int64_t *) W4); __m256i *U1_256 = (__m256i *) (U1_64 + 1); - tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ (__m256i) { - 0x0ul, 0x0ul, 0x0ul, U1_64[0] - }; + tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ _mm256_set_epi64x(U1_64[0], 0, 0, 0); for (int32_t i = 1 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { tmp[i] = W2[i] ^ W3[i] ^ W4[i] ^ _mm256_lddqu_si256(&U1_256[i - 1]); diff --git a/crypto_kem/hqc-128/clean/gf.c b/crypto_kem/hqc-128/clean/gf.c index 9068f7c6..21ba4ede 100644 --- a/crypto_kem/hqc-128/clean/gf.c +++ b/crypto_kem/hqc-128/clean/gf.c @@ -7,61 +7,6 @@ */ - -/** - * Powers of the root alpha of x^10 + x^3 + 1. - * The last two elements are needed by the PQCLEAN_HQC128_CLEAN_gf_mul function from gf_mul.c - * (for example if both elements to multiply are zero). - */ -static const uint16_t exp[1026] = { - 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 9, 18, 36, 72, 144, 288, 576, 137, 274, 548, 65, 130, 260, 520, 25, 50, 100, 200, 400, 800, 585, 155, 310, 620, 209, 418, 836, 641, 267, 534, 37, 74, 148, 296, 592, 169, 338, 676, 321, 642, 269, 538, 61, 122, 244, 488, 976, 937, 859, 703, 375, 750, 469, 938, 861, 691, 367, 734, 437, 874, 733, 435, 870, 709, 387, 774, 517, 3, 6, 12, 24, 48, 96, 192, 384, 768, 521, 27, 54, 108, 216, 432, 864, 713, 411, 822, 613, 195, 390, 780, 529, 43, 86, 172, 344, 688, 361, 722, 429, 858, 701, 371, 742, 453, 906, 797, 563, 111, 222, 444, 888, 761, 507, 1014, 997, 963, 911, 791, 551, 71, 142, 284, 568, 121, 242, 484, 968, 921, 827, 639, 247, 494, 988, 945, 875, 735, 439, 878, 725, 419, 838, 645, 259, 518, 5, 10, 20, 40, 80, 160, 320, 640, 265, 530, 45, 90, 180, 360, 720, 425, 850, 685, 339, 678, 325, 650, 285, 570, 125, 250, 500, 1000, 985, 955, 895, 759, 487, 974, 917, 803, 591, 151, 302, 604, 177, 354, 708, 385, 770, 525, 19, 38, 76, 152, 304, 608, 201, 402, 804, 577, 139, 278, 556, 81, 162, 324, 648, 281, 562, 109, 218, 436, 872, 729, 443, 886, 741, 451, 902, 773, 515, 15, 30, 60, 120, 240, 480, 960, 905, 795, 575, 119, 238, 476, 952, 889, 763, 511, 1022, 1013, 995, 975, 919, 807, 583, 135, 270, 540, 49, 98, 196, 392, 784, 553, 91, 182, 364, 728, 441, 882, 749, 467, 934, 837, 643, 271, 542, 53, 106, 212, 424, 848, 681, 347, 694, 357, 714, 413, 826, 637, 243, 486, 972, 913, 811, 607, 183, 366, 732, 433, 866, 717, 403, 806, 581, 131, 262, 524, 17, 34, 68, 136, 272, 544, 73, 146, 292, 584, 153, 306, 612, 193, 386, 772, 513, 11, 22, 44, 88, 176, 352, 704, 393, 786, 557, 83, 166, 332, 664, 313, 626, 237, 474, 948, 865, 715, 415, 830, 629, 227, 454, 908, 785, 555, 95, 190, 380, 760, 505, 1010, 1005, 979, 943, 855, 679, 327, 654, 277, 554, 93, 186, 372, 744, 473, 946, 877, 723, 431, 862, 693, 355, 710, 389, 778, 541, 51, 102, 204, 408, 816, 617, 219, 438, 876, 721, 427, 854, 677, 323, 646, 261, 522, 29, 58, 116, 232, 464, 928, 841, 667, 319, 638, 245, 490, 980, 929, 843, 671, 311, 622, 213, 426, 852, 673, 331, 662, 293, 586, 157, 314, 628, 225, 450, 900, 769, 523, 31, 62, 124, 248, 496, 992, 969, 923, 831, 631, 231, 462, 924, 817, 619, 223, 446, 892, 753, 491, 982, 933, 835, 655, 279, 558, 85, 170, 340, 680, 345, 690, 365, 730, 445, 890, 765, 499, 998, 965, 899, 783, 535, 39, 78, 156, 312, 624, 233, 466, 932, 833, 651, 287, 574, 117, 234, 468, 936, 857, 699, 383, 766, 501, 1002, 989, 947, 879, 727, 423, 846, 661, 291, 582, 133, 266, 532, 33, 66, 132, 264, 528, 41, 82, 164, 328, 656, 297, 594, 173, 346, 692, 353, 706, 397, 794, 573, 115, 230, 460, 920, 825, 635, 255, 510, 1020, 1009, 1003, 991, 951, 871, 711, 391, 782, 533, 35, 70, 140, 280, 560, 105, 210, 420, 840, 665, 315, 630, 229, 458, 916, 801, 587, 159, 318, 636, 241, 482, 964, 897, 779, 543, 55, 110, 220, 440, 880, 745, 475, 950, 869, 707, 399, 798, 565, 99, 198, 396, 792, 569, 123, 246, 492, 984, 953, 891, 767, 503, 1006, 981, 931, 847, 663, 295, 590, 149, 298, 596, 161, 322, 644, 257, 514, 13, 26, 52, 104, 208, 416, 832, 649, 283, 566, 101, 202, 404, 808, 601, 187, 374, 748, 465, 930, 845, 659, 303, 606, 181, 362, 724, 417, 834, 653, 275, 550, 69, 138, 276, 552, 89, 178, 356, 712, 409, 818, 621, 211, 422, 844, 657, 299, 598, 165, 330, 660, 289, 578, 141, 282, 564, 97, 194, 388, 776, 537, 59, 118, 236, 472, 944, 873, 731, 447, 894, 757, 483, 966, 901, 771, 527, 23, 46, 92, 184, 368, 736, 457, 914, 813, 595, 175, 350, 700, 369, 738, 461, 922, 829, 627, 239, 478, 956, 881, 747, 479, 958, 885, 739, 463, 926, 821, 611, 207, 414, 828, 625, 235, 470, 940, 849, 683, 351, 702, 373, 746, 477, 954, 893, 755, 495, 990, 949, 867, 719, 407, 814, 597, 163, 326, 652, 273, 546, 77, 154, 308, 616, 217, 434, 868, 705, 395, 790, 549, 67, 134, 268, 536, 57, 114, 228, 456, 912, 809, 603, 191, 382, 764, 497, 994, 973, 915, 815, 599, 167, 334, 668, 305, 610, 205, 410, 820, 609, 203, 406, 812, 593, 171, 342, 684, 337, 674, 333, 666, 317, 634, 253, 506, 1012, 993, 971, 927, 823, 615, 199, 398, 796, 561, 107, 214, 428, 856, 697, 379, 758, 485, 970, 925, 819, 623, 215, 430, 860, 689, 363, 726, 421, 842, 669, 307, 614, 197, 394, 788, 545, 75, 150, 300, 600, 185, 370, 740, 449, 898, 781, 531, 47, 94, 188, 376, 752, 489, 978, 941, 851, 687, 343, 686, 341, 682, 349, 698, 381, 762, 509, 1018, 1021, 1011, 1007, 983, 935, 839, 647, 263, 526, 21, 42, 84, 168, 336, 672, 329, 658, 301, 602, 189, 378, 756, 481, 962, 909, 787, 559, 87, 174, 348, 696, 377, 754, 493, 986, 957, 883, 751, 471, 942, 853, 675, 335, 670, 309, 618, 221, 442, 884, 737, 459, 918, 805, 579, 143, 286, 572, 113, 226, 452, 904, 793, 571, 127, 254, 508, 1016, 1017, 1019, 1023, 1015, 999, 967, 903, 775, 519, 7, 14, 28, 56, 112, 224, 448, 896, 777, 539, 63, 126, 252, 504, 1008, 1001, 987, 959, 887, 743, 455, 910, 789, 547, 79, 158, 316, 632, 249, 498, 996, 961, 907, 799, 567, 103, 206, 412, 824, 633, 251, 502, 1004, 977, 939, 863, 695, 359, 718, 405, 810, 605, 179, 358, 716, 401, 802, 589, 147, 294, 588, 145, 290, 580, 129, 258, 516, 1, 2, 4 -}; - - - -/** - * Logarithm of elements of GF(2^10) to the base alpha (root of x^10 + x^3 + 1). - * The logarithm of 0 is set to 1024 by convention. - */ -static const uint16_t log[1024] = { - 1024, 0, 1, 77, 2, 154, 78, 956, 3, 10, 155, 325, 79, 618, 957, 231, 4, 308, 11, 200, 156, 889, 326, 695, 80, 24, 619, 87, 958, 402, 232, 436, 5, 513, 309, 551, 12, 40, 201, 479, 157, 518, 890, 101, 327, 164, 696, 860, 81, 258, 25, 385, 620, 277, 88, 577, 959, 772, 403, 680, 233, 52, 437, 966, 6, 20, 514, 768, 310, 650, 552, 129, 13, 314, 41, 849, 202, 757, 480, 980, 158, 213, 519, 335, 891, 462, 102, 907, 328, 654, 165, 264, 697, 369, 861, 354, 82, 675, 259, 590, 26, 628, 386, 991, 621, 556, 278, 822, 89, 219, 578, 117, 960, 937, 773, 533, 404, 491, 681, 241, 234, 133, 53, 595, 438, 178, 967, 943, 7, 1020, 21, 305, 515, 510, 769, 255, 311, 17, 651, 210, 553, 672, 130, 934, 14, 1017, 315, 1014, 42, 610, 850, 191, 203, 318, 758, 31, 481, 428, 981, 568, 159, 613, 214, 752, 520, 667, 336, 788, 892, 45, 463, 801, 103, 525, 908, 705, 329, 194, 655, 1008, 166, 642, 265, 296, 698, 853, 370, 633, 862, 899, 355, 779, 83, 321, 676, 97, 260, 845, 591, 818, 27, 206, 629, 797, 387, 793, 992, 727, 622, 34, 557, 661, 279, 420, 823, 834, 90, 761, 220, 391, 579, 926, 118, 451, 961, 431, 938, 349, 774, 563, 534, 446, 405, 484, 492, 731, 682, 341, 242, 714, 235, 571, 134, 290, 54, 412, 596, 140, 439, 984, 179, 996, 968, 810, 944, 539, 8, 616, 1021, 152, 22, 400, 306, 887, 516, 162, 511, 38, 770, 50, 256, 275, 312, 755, 18, 648, 652, 367, 211, 460, 554, 217, 673, 626, 131, 176, 935, 489, 15, 670, 1018, 508, 316, 426, 1015, 608, 43, 523, 611, 665, 851, 897, 192, 640, 204, 791, 319, 843, 759, 924, 32, 418, 482, 339, 429, 561, 982, 808, 569, 410, 160, 48, 614, 398, 215, 174, 753, 365, 521, 895, 668, 424, 337, 806, 789, 922, 893, 804, 46, 172, 464, 872, 802, 870, 104, 466, 526, 283, 909, 874, 706, 736, 330, 528, 195, 380, 656, 285, 1009, 1003, 167, 106, 643, 838, 266, 468, 297, 66, 699, 708, 854, 111, 371, 738, 634, 60, 863, 911, 900, 827, 356, 876, 780, 497, 84, 197, 322, 74, 677, 382, 98, 548, 261, 332, 846, 765, 592, 530, 819, 587, 28, 1011, 207, 302, 630, 1005, 798, 749, 388, 658, 794, 94, 993, 287, 728, 346, 623, 645, 35, 149, 558, 840, 662, 505, 280, 169, 421, 395, 824, 108, 835, 377, 91, 299, 762, 71, 221, 68, 392, 146, 580, 268, 927, 224, 119, 470, 452, 687, 962, 856, 432, 227, 939, 113, 350, 976, 775, 701, 564, 930, 535, 710, 447, 723, 406, 636, 485, 271, 493, 62, 732, 918, 683, 373, 342, 583, 243, 740, 715, 719, 236, 902, 572, 690, 135, 829, 291, 186, 55, 865, 413, 455, 597, 913, 141, 744, 440, 782, 985, 473, 180, 499, 997, 602, 969, 358, 811, 122, 945, 878, 540, 247, 9, 324, 617, 230, 1022, 76, 153, 955, 23, 86, 401, 435, 307, 199, 888, 694, 517, 100, 163, 859, 512, 550, 39, 478, 771, 679, 51, 965, 257, 384, 276, 576, 313, 848, 756, 979, 19, 767, 649, 128, 653, 263, 368, 353, 212, 334, 461, 906, 555, 821, 218, 116, 674, 589, 627, 990, 132, 594, 177, 942, 936, 532, 490, 240, 16, 209, 671, 933, 1019, 304, 509, 254, 317, 30, 427, 567, 1016, 1013, 609, 190, 44, 800, 524, 704, 612, 751, 666, 787, 852, 632, 898, 778, 193, 1007, 641, 295, 205, 796, 792, 726, 320, 96, 844, 817, 760, 390, 925, 450, 33, 660, 419, 833, 483, 730, 340, 713, 430, 348, 562, 445, 983, 995, 809, 538, 570, 289, 411, 139, 161, 37, 49, 274, 615, 151, 399, 886, 216, 625, 175, 488, 754, 647, 366, 459, 522, 664, 896, 639, 669, 507, 425, 607, 338, 560, 807, 409, 790, 842, 923, 417, 894, 423, 805, 921, 47, 397, 173, 364, 465, 282, 873, 735, 803, 171, 871, 869, 105, 837, 467, 65, 527, 379, 284, 1002, 910, 826, 875, 496, 707, 110, 737, 59, 331, 764, 529, 586, 196, 73, 381, 547, 657, 93, 286, 345, 1010, 301, 1004, 748, 168, 394, 107, 376, 644, 148, 839, 504, 267, 223, 469, 686, 298, 70, 67, 145, 700, 929, 709, 722, 855, 226, 112, 975, 372, 582, 739, 718, 635, 270, 61, 917, 864, 454, 912, 743, 901, 689, 828, 185, 357, 121, 877, 246, 781, 472, 498, 601, 85, 434, 198, 693, 323, 229, 75, 954, 678, 964, 383, 575, 99, 858, 549, 477, 262, 352, 333, 905, 847, 978, 766, 127, 593, 941, 531, 239, 820, 115, 588, 989, 29, 566, 1012, 189, 208, 932, 303, 253, 631, 777, 1006, 294, 799, 703, 750, 786, 389, 449, 659, 832, 795, 725, 95, 816, 994, 537, 288, 138, 729, 712, 347, 444, 624, 487, 646, 458, 36, 273, 150, 885, 559, 408, 841, 416, 663, 638, 506, 606, 281, 734, 170, 868, 422, 920, 396, 363, 825, 495, 109, 58, 836, 64, 378, 1001, 92, 344, 300, 747, 763, 585, 72, 546, 222, 685, 69, 144, 393, 375, 147, 503, 581, 717, 269, 916, 928, 721, 225, 974, 120, 245, 471, 600, 453, 742, 688, 184, 963, 574, 857, 476, 433, 692, 228, 953, 940, 238, 114, 988, 351, 904, 977, 126, 776, 293, 702, 785, 565, 188, 931, 252, 536, 137, 711, 443, 448, 831, 724, 815, 407, 415, 637, 605, 486, 457, 272, 884, 494, 57, 63, 1000, 733, 867, 919, 362, 684, 143, 374, 502, 343, 746, 584, 545, 244, 599, 741, 183, 716, 915, 720, 973, 237, 987, 903, 125, 573, 475, 691, 952, 136, 442, 830, 814, 292, 784, 187, 251, 56, 999, 866, 361, 414, 604, 456, 883, 598, 182, 914, 972, 142, 501, 745, 544, 441, 813, 783, 250, 986, 124, 474, 951, 181, 971, 500, 543, 998, 360, 603, 882, 970, 542, 359, 881, 812, 249, 123, 950, 946, 947, 879, 948, 541, 880, 248, 949 -}; - - - -/** - * @brief Generates exp and log lookup tables of GF(2^m). - * - * The logarithm of 0 is defined as 2^PARAM_M by convention.
- * The last two elements of the exp table are needed by the PQCLEAN_HQC128_CLEAN_gf_mul function from gf_lutmul.c - * (for example if both elements to multiply are zero). - * @param[out] exp Array of size 2^PARAM_M + 2 receiving the powers of the primitive element - * @param[out] log Array of size 2^PARAM_M receiving the logarithms of the elements of GF(2^m) - * @param[in] m Parameter of Galois field GF(2^m) - */ -void PQCLEAN_HQC128_CLEAN_gf_generate(uint16_t *exp, uint16_t *log, int16_t m) { - uint16_t elt = 1; - uint16_t alpha = 2; // primitive element of GF(2^PARAM_M) - uint16_t gf_poly = PARAM_GF_POLY; - - for (size_t i = 0 ; i < (1U << m) - 1 ; ++i) { - exp[i] = elt; - log[elt] = i; - - elt *= alpha; - if (elt >= 1 << m) { - elt ^= gf_poly; - } - } - - exp[(1 << m) - 1] = 1; - exp[1 << m] = 2; - exp[(1 << m) + 1] = 4; - log[0] = 1 << m; // by convention -} - - - /** * @brief Returns the integer i such that elt = a^i where a is the primitive element of GF(2^PARAM_M). * diff --git a/crypto_kem/hqc-128/clean/gf.h b/crypto_kem/hqc-128/clean/gf.h index 387cf927..c4b0c536 100644 --- a/crypto_kem/hqc-128/clean/gf.h +++ b/crypto_kem/hqc-128/clean/gf.h @@ -12,7 +12,20 @@ #include #include -void PQCLEAN_HQC128_CLEAN_gf_generate(uint16_t *exp, uint16_t *log, int16_t m); + +/** + * Powers of the root alpha of x^10 + x^3 + 1. + * The last two elements are needed by the PQCLEAN_HQC128_CLEAN_gf_mul function from gf_mul.c + * (for example if both elements to multiply are zero). + */ +static const uint16_t exp[1026] = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 9, 18, 36, 72, 144, 288, 576, 137, 274, 548, 65, 130, 260, 520, 25, 50, 100, 200, 400, 800, 585, 155, 310, 620, 209, 418, 836, 641, 267, 534, 37, 74, 148, 296, 592, 169, 338, 676, 321, 642, 269, 538, 61, 122, 244, 488, 976, 937, 859, 703, 375, 750, 469, 938, 861, 691, 367, 734, 437, 874, 733, 435, 870, 709, 387, 774, 517, 3, 6, 12, 24, 48, 96, 192, 384, 768, 521, 27, 54, 108, 216, 432, 864, 713, 411, 822, 613, 195, 390, 780, 529, 43, 86, 172, 344, 688, 361, 722, 429, 858, 701, 371, 742, 453, 906, 797, 563, 111, 222, 444, 888, 761, 507, 1014, 997, 963, 911, 791, 551, 71, 142, 284, 568, 121, 242, 484, 968, 921, 827, 639, 247, 494, 988, 945, 875, 735, 439, 878, 725, 419, 838, 645, 259, 518, 5, 10, 20, 40, 80, 160, 320, 640, 265, 530, 45, 90, 180, 360, 720, 425, 850, 685, 339, 678, 325, 650, 285, 570, 125, 250, 500, 1000, 985, 955, 895, 759, 487, 974, 917, 803, 591, 151, 302, 604, 177, 354, 708, 385, 770, 525, 19, 38, 76, 152, 304, 608, 201, 402, 804, 577, 139, 278, 556, 81, 162, 324, 648, 281, 562, 109, 218, 436, 872, 729, 443, 886, 741, 451, 902, 773, 515, 15, 30, 60, 120, 240, 480, 960, 905, 795, 575, 119, 238, 476, 952, 889, 763, 511, 1022, 1013, 995, 975, 919, 807, 583, 135, 270, 540, 49, 98, 196, 392, 784, 553, 91, 182, 364, 728, 441, 882, 749, 467, 934, 837, 643, 271, 542, 53, 106, 212, 424, 848, 681, 347, 694, 357, 714, 413, 826, 637, 243, 486, 972, 913, 811, 607, 183, 366, 732, 433, 866, 717, 403, 806, 581, 131, 262, 524, 17, 34, 68, 136, 272, 544, 73, 146, 292, 584, 153, 306, 612, 193, 386, 772, 513, 11, 22, 44, 88, 176, 352, 704, 393, 786, 557, 83, 166, 332, 664, 313, 626, 237, 474, 948, 865, 715, 415, 830, 629, 227, 454, 908, 785, 555, 95, 190, 380, 760, 505, 1010, 1005, 979, 943, 855, 679, 327, 654, 277, 554, 93, 186, 372, 744, 473, 946, 877, 723, 431, 862, 693, 355, 710, 389, 778, 541, 51, 102, 204, 408, 816, 617, 219, 438, 876, 721, 427, 854, 677, 323, 646, 261, 522, 29, 58, 116, 232, 464, 928, 841, 667, 319, 638, 245, 490, 980, 929, 843, 671, 311, 622, 213, 426, 852, 673, 331, 662, 293, 586, 157, 314, 628, 225, 450, 900, 769, 523, 31, 62, 124, 248, 496, 992, 969, 923, 831, 631, 231, 462, 924, 817, 619, 223, 446, 892, 753, 491, 982, 933, 835, 655, 279, 558, 85, 170, 340, 680, 345, 690, 365, 730, 445, 890, 765, 499, 998, 965, 899, 783, 535, 39, 78, 156, 312, 624, 233, 466, 932, 833, 651, 287, 574, 117, 234, 468, 936, 857, 699, 383, 766, 501, 1002, 989, 947, 879, 727, 423, 846, 661, 291, 582, 133, 266, 532, 33, 66, 132, 264, 528, 41, 82, 164, 328, 656, 297, 594, 173, 346, 692, 353, 706, 397, 794, 573, 115, 230, 460, 920, 825, 635, 255, 510, 1020, 1009, 1003, 991, 951, 871, 711, 391, 782, 533, 35, 70, 140, 280, 560, 105, 210, 420, 840, 665, 315, 630, 229, 458, 916, 801, 587, 159, 318, 636, 241, 482, 964, 897, 779, 543, 55, 110, 220, 440, 880, 745, 475, 950, 869, 707, 399, 798, 565, 99, 198, 396, 792, 569, 123, 246, 492, 984, 953, 891, 767, 503, 1006, 981, 931, 847, 663, 295, 590, 149, 298, 596, 161, 322, 644, 257, 514, 13, 26, 52, 104, 208, 416, 832, 649, 283, 566, 101, 202, 404, 808, 601, 187, 374, 748, 465, 930, 845, 659, 303, 606, 181, 362, 724, 417, 834, 653, 275, 550, 69, 138, 276, 552, 89, 178, 356, 712, 409, 818, 621, 211, 422, 844, 657, 299, 598, 165, 330, 660, 289, 578, 141, 282, 564, 97, 194, 388, 776, 537, 59, 118, 236, 472, 944, 873, 731, 447, 894, 757, 483, 966, 901, 771, 527, 23, 46, 92, 184, 368, 736, 457, 914, 813, 595, 175, 350, 700, 369, 738, 461, 922, 829, 627, 239, 478, 956, 881, 747, 479, 958, 885, 739, 463, 926, 821, 611, 207, 414, 828, 625, 235, 470, 940, 849, 683, 351, 702, 373, 746, 477, 954, 893, 755, 495, 990, 949, 867, 719, 407, 814, 597, 163, 326, 652, 273, 546, 77, 154, 308, 616, 217, 434, 868, 705, 395, 790, 549, 67, 134, 268, 536, 57, 114, 228, 456, 912, 809, 603, 191, 382, 764, 497, 994, 973, 915, 815, 599, 167, 334, 668, 305, 610, 205, 410, 820, 609, 203, 406, 812, 593, 171, 342, 684, 337, 674, 333, 666, 317, 634, 253, 506, 1012, 993, 971, 927, 823, 615, 199, 398, 796, 561, 107, 214, 428, 856, 697, 379, 758, 485, 970, 925, 819, 623, 215, 430, 860, 689, 363, 726, 421, 842, 669, 307, 614, 197, 394, 788, 545, 75, 150, 300, 600, 185, 370, 740, 449, 898, 781, 531, 47, 94, 188, 376, 752, 489, 978, 941, 851, 687, 343, 686, 341, 682, 349, 698, 381, 762, 509, 1018, 1021, 1011, 1007, 983, 935, 839, 647, 263, 526, 21, 42, 84, 168, 336, 672, 329, 658, 301, 602, 189, 378, 756, 481, 962, 909, 787, 559, 87, 174, 348, 696, 377, 754, 493, 986, 957, 883, 751, 471, 942, 853, 675, 335, 670, 309, 618, 221, 442, 884, 737, 459, 918, 805, 579, 143, 286, 572, 113, 226, 452, 904, 793, 571, 127, 254, 508, 1016, 1017, 1019, 1023, 1015, 999, 967, 903, 775, 519, 7, 14, 28, 56, 112, 224, 448, 896, 777, 539, 63, 126, 252, 504, 1008, 1001, 987, 959, 887, 743, 455, 910, 789, 547, 79, 158, 316, 632, 249, 498, 996, 961, 907, 799, 567, 103, 206, 412, 824, 633, 251, 502, 1004, 977, 939, 863, 695, 359, 718, 405, 810, 605, 179, 358, 716, 401, 802, 589, 147, 294, 588, 145, 290, 580, 129, 258, 516, 1, 2, 4}; + + +/** + * Logarithm of elements of GF(2^10) to the base alpha (root of x^10 + x^3 + 1). + * The logarithm of 0 is set to 1024 by convention. + */ +static const uint16_t log[1024] = {1024, 0, 1, 77, 2, 154, 78, 956, 3, 10, 155, 325, 79, 618, 957, 231, 4, 308, 11, 200, 156, 889, 326, 695, 80, 24, 619, 87, 958, 402, 232, 436, 5, 513, 309, 551, 12, 40, 201, 479, 157, 518, 890, 101, 327, 164, 696, 860, 81, 258, 25, 385, 620, 277, 88, 577, 959, 772, 403, 680, 233, 52, 437, 966, 6, 20, 514, 768, 310, 650, 552, 129, 13, 314, 41, 849, 202, 757, 480, 980, 158, 213, 519, 335, 891, 462, 102, 907, 328, 654, 165, 264, 697, 369, 861, 354, 82, 675, 259, 590, 26, 628, 386, 991, 621, 556, 278, 822, 89, 219, 578, 117, 960, 937, 773, 533, 404, 491, 681, 241, 234, 133, 53, 595, 438, 178, 967, 943, 7, 1020, 21, 305, 515, 510, 769, 255, 311, 17, 651, 210, 553, 672, 130, 934, 14, 1017, 315, 1014, 42, 610, 850, 191, 203, 318, 758, 31, 481, 428, 981, 568, 159, 613, 214, 752, 520, 667, 336, 788, 892, 45, 463, 801, 103, 525, 908, 705, 329, 194, 655, 1008, 166, 642, 265, 296, 698, 853, 370, 633, 862, 899, 355, 779, 83, 321, 676, 97, 260, 845, 591, 818, 27, 206, 629, 797, 387, 793, 992, 727, 622, 34, 557, 661, 279, 420, 823, 834, 90, 761, 220, 391, 579, 926, 118, 451, 961, 431, 938, 349, 774, 563, 534, 446, 405, 484, 492, 731, 682, 341, 242, 714, 235, 571, 134, 290, 54, 412, 596, 140, 439, 984, 179, 996, 968, 810, 944, 539, 8, 616, 1021, 152, 22, 400, 306, 887, 516, 162, 511, 38, 770, 50, 256, 275, 312, 755, 18, 648, 652, 367, 211, 460, 554, 217, 673, 626, 131, 176, 935, 489, 15, 670, 1018, 508, 316, 426, 1015, 608, 43, 523, 611, 665, 851, 897, 192, 640, 204, 791, 319, 843, 759, 924, 32, 418, 482, 339, 429, 561, 982, 808, 569, 410, 160, 48, 614, 398, 215, 174, 753, 365, 521, 895, 668, 424, 337, 806, 789, 922, 893, 804, 46, 172, 464, 872, 802, 870, 104, 466, 526, 283, 909, 874, 706, 736, 330, 528, 195, 380, 656, 285, 1009, 1003, 167, 106, 643, 838, 266, 468, 297, 66, 699, 708, 854, 111, 371, 738, 634, 60, 863, 911, 900, 827, 356, 876, 780, 497, 84, 197, 322, 74, 677, 382, 98, 548, 261, 332, 846, 765, 592, 530, 819, 587, 28, 1011, 207, 302, 630, 1005, 798, 749, 388, 658, 794, 94, 993, 287, 728, 346, 623, 645, 35, 149, 558, 840, 662, 505, 280, 169, 421, 395, 824, 108, 835, 377, 91, 299, 762, 71, 221, 68, 392, 146, 580, 268, 927, 224, 119, 470, 452, 687, 962, 856, 432, 227, 939, 113, 350, 976, 775, 701, 564, 930, 535, 710, 447, 723, 406, 636, 485, 271, 493, 62, 732, 918, 683, 373, 342, 583, 243, 740, 715, 719, 236, 902, 572, 690, 135, 829, 291, 186, 55, 865, 413, 455, 597, 913, 141, 744, 440, 782, 985, 473, 180, 499, 997, 602, 969, 358, 811, 122, 945, 878, 540, 247, 9, 324, 617, 230, 1022, 76, 153, 955, 23, 86, 401, 435, 307, 199, 888, 694, 517, 100, 163, 859, 512, 550, 39, 478, 771, 679, 51, 965, 257, 384, 276, 576, 313, 848, 756, 979, 19, 767, 649, 128, 653, 263, 368, 353, 212, 334, 461, 906, 555, 821, 218, 116, 674, 589, 627, 990, 132, 594, 177, 942, 936, 532, 490, 240, 16, 209, 671, 933, 1019, 304, 509, 254, 317, 30, 427, 567, 1016, 1013, 609, 190, 44, 800, 524, 704, 612, 751, 666, 787, 852, 632, 898, 778, 193, 1007, 641, 295, 205, 796, 792, 726, 320, 96, 844, 817, 760, 390, 925, 450, 33, 660, 419, 833, 483, 730, 340, 713, 430, 348, 562, 445, 983, 995, 809, 538, 570, 289, 411, 139, 161, 37, 49, 274, 615, 151, 399, 886, 216, 625, 175, 488, 754, 647, 366, 459, 522, 664, 896, 639, 669, 507, 425, 607, 338, 560, 807, 409, 790, 842, 923, 417, 894, 423, 805, 921, 47, 397, 173, 364, 465, 282, 873, 735, 803, 171, 871, 869, 105, 837, 467, 65, 527, 379, 284, 1002, 910, 826, 875, 496, 707, 110, 737, 59, 331, 764, 529, 586, 196, 73, 381, 547, 657, 93, 286, 345, 1010, 301, 1004, 748, 168, 394, 107, 376, 644, 148, 839, 504, 267, 223, 469, 686, 298, 70, 67, 145, 700, 929, 709, 722, 855, 226, 112, 975, 372, 582, 739, 718, 635, 270, 61, 917, 864, 454, 912, 743, 901, 689, 828, 185, 357, 121, 877, 246, 781, 472, 498, 601, 85, 434, 198, 693, 323, 229, 75, 954, 678, 964, 383, 575, 99, 858, 549, 477, 262, 352, 333, 905, 847, 978, 766, 127, 593, 941, 531, 239, 820, 115, 588, 989, 29, 566, 1012, 189, 208, 932, 303, 253, 631, 777, 1006, 294, 799, 703, 750, 786, 389, 449, 659, 832, 795, 725, 95, 816, 994, 537, 288, 138, 729, 712, 347, 444, 624, 487, 646, 458, 36, 273, 150, 885, 559, 408, 841, 416, 663, 638, 506, 606, 281, 734, 170, 868, 422, 920, 396, 363, 825, 495, 109, 58, 836, 64, 378, 1001, 92, 344, 300, 747, 763, 585, 72, 546, 222, 685, 69, 144, 393, 375, 147, 503, 581, 717, 269, 916, 928, 721, 225, 974, 120, 245, 471, 600, 453, 742, 688, 184, 963, 574, 857, 476, 433, 692, 228, 953, 940, 238, 114, 988, 351, 904, 977, 126, 776, 293, 702, 785, 565, 188, 931, 252, 536, 137, 711, 443, 448, 831, 724, 815, 407, 415, 637, 605, 486, 457, 272, 884, 494, 57, 63, 1000, 733, 867, 919, 362, 684, 143, 374, 502, 343, 746, 584, 545, 244, 599, 741, 183, 716, 915, 720, 973, 237, 987, 903, 125, 573, 475, 691, 952, 136, 442, 830, 814, 292, 784, 187, 251, 56, 999, 866, 361, 414, 604, 456, 883, 598, 182, 914, 972, 142, 501, 745, 544, 441, 813, 783, 250, 986, 124, 474, 951, 181, 971, 500, 543, 998, 360, 603, 882, 970, 542, 359, 881, 812, 249, 123, 950, 946, 947, 879, 948, 541, 880, 248, 949}; uint16_t PQCLEAN_HQC128_CLEAN_gf_log(uint16_t elt); diff --git a/crypto_kem/hqc-192/avx2/fft.c b/crypto_kem/hqc-192/avx2/fft.c index 18769698..75bae4c3 100644 --- a/crypto_kem/hqc-192/avx2/fft.c +++ b/crypto_kem/hqc-192/avx2/fft.c @@ -19,6 +19,7 @@ static void compute_fft_betas(uint16_t *betas); static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size); static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); @@ -28,7 +29,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[out] betas Array of size PARAM_M-1 */ static void compute_fft_betas(uint16_t *betas) { - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + size_t i; + for (i = 0 ; i < PARAM_M - 1 ; ++i) { betas[i] = 1 << (PARAM_M - 1 - i); } } @@ -46,10 +48,11 @@ static void compute_fft_betas(uint16_t *betas) { * @param[in] set_size Size of the array set */ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) { + size_t i, j; subset_sums[0] = 0; - for (size_t i = 0 ; i < set_size ; ++i) { - for (size_t j = 0 ; j < (1U << i) ; ++j) { + for (i = 0 ; i < set_size ; ++i) { + for (j = 0 ; j < (1U << i) ; ++j) { subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; } } @@ -89,7 +92,7 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[2] = f[3] ^ f1[1] ^ f0[3]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 3: f0[0] = f[0]; @@ -100,51 +103,56 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[3] = f[7]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 2: f0[0] = f[0]; f0[1] = f[2] ^ f[3]; f1[0] = f[1] ^ f0[1]; f1[1] = f[3]; - return; + break; case 1: f0[0] = f[0]; f1[0] = f[1]; - return; + break; default: - ; - size_t n = 1 << (m_f - 2); - - uint16_t Q[2 * (1 << (PARAM_FFT - 2))]; - uint16_t R[2 * (1 << (PARAM_FFT - 2))]; - - uint16_t Q0[1 << (PARAM_FFT - 2)]; - uint16_t Q1[1 << (PARAM_FFT - 2)]; - uint16_t R0[1 << (PARAM_FFT - 2)]; - uint16_t R1[1 << (PARAM_FFT - 2)]; - - memcpy(Q, f + 3 * n, 2 * n); - memcpy(Q + n, f + 3 * n, 2 * n); - memcpy(R, f, 4 * n); - - for (size_t i = 0 ; i < n ; ++i) { - Q[i] ^= f[2 * n + i]; - R[n + i] ^= Q[i]; - } - - radix(Q0, Q1, Q, m_f - 1); - radix(R0, R1, R, m_f - 1); - - memcpy(f0, R0, 2 * n); - memcpy(f0 + n, Q0, 2 * n); - memcpy(f1, R1, 2 * n); - memcpy(f1 + n, Q1, 2 * n); + radix_big(f0, f1, f, m_f); + break; } } +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1 << (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0 ; i < n ; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + /** @@ -160,25 +168,27 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { * @param[in] betas FFT constants */ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { - uint16_t f0[1 << (PARAM_FFT - 2)]; - uint16_t f1[1 << (PARAM_FFT - 2)]; - uint16_t gammas[PARAM_M - 2]; - uint16_t deltas[PARAM_M - 2]; - size_t k = 1 << (m - 1); - uint16_t gammas_sums[1 << (PARAM_M - 2)]; + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; uint16_t u[1 << (PARAM_M - 2)] = {0}; uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; // Step 1 if (m_f == 1) { - uint16_t tmp[PARAM_M - (PARAM_FFT - 1)]; - for (size_t i = 0 ; i < m ; ++i) { + for (i = 0 ; i < m ; ++i) { tmp[i] = PQCLEAN_HQC192_AVX2_gf_mul(betas[i], f[1]); } w[0] = f[0]; - for (size_t j = 0 ; j < m ; ++j) { - for (size_t k = 0 ; k < (1U << j) ; ++k) { + for (j = 0 ; j < m ; ++j) { + for (k = 0 ; k < (1U << j) ; ++k) { w[(1 << j) + k] = w[k] ^ tmp[j]; } } @@ -188,8 +198,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 2: compute g if (betas[m - 1] != 1) { - uint16_t beta_m_pow = 1; - for (size_t i = 1 ; i < (1U << m_f) ; ++i) { + beta_m_pow = 1; + for (i = 1 ; i < (1U << m_f) ; ++i) { beta_m_pow = PQCLEAN_HQC192_AVX2_gf_mul(beta_m_pow, betas[m - 1]); f[i] = PQCLEAN_HQC192_AVX2_gf_mul(beta_m_pow, f[i]); } @@ -199,7 +209,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 radix(f0, f1, f, m_f); // Step 4: compute gammas and deltas - for (uint8_t i = 0 ; i < m - 1 ; ++i) { + for (i = 0 ; i + 1 < m ; ++i) { gammas[i] = PQCLEAN_HQC192_AVX2_gf_mul(betas[i], PQCLEAN_HQC192_AVX2_gf_inverse(betas[m - 1])); deltas[i] = PQCLEAN_HQC192_AVX2_gf_square(gammas[i]) ^ gammas[i]; } @@ -210,10 +220,11 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 5 fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + k = 1 << ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant w[0] = u[0]; w[k] = u[0] ^ f1[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC192_AVX2_gf_mul(gammas_sums[i], f1[0]); w[k + i] = w[i] ^ f1[0]; } @@ -224,7 +235,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 memcpy(w + k, v, 2 * k); w[0] = u[0]; w[k] ^= u[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC192_AVX2_gf_mul(gammas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -253,14 +264,15 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) */ void PQCLEAN_HQC192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { - uint16_t betas[PARAM_M - 1]; - uint16_t betas_sums[1 << (PARAM_M - 1)]; - uint16_t f0[1 << (PARAM_FFT - 1)]; - uint16_t f1[1 << (PARAM_FFT - 1)]; - uint16_t deltas[PARAM_M - 1]; - size_t k = 1 << (PARAM_M - 1); - uint16_t u[1 << (PARAM_M - 1)]; - uint16_t v[1 << (PARAM_M - 1)]; + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; // Follows Gao and Mateer algorithm compute_fft_betas(betas); @@ -276,7 +288,7 @@ void PQCLEAN_HQC192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { radix(f0, f1, f, PARAM_FFT); // Step 4: Compute deltas - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + for (i = 0 ; i < PARAM_M - 1 ; ++i) { deltas[i] = PQCLEAN_HQC192_AVX2_gf_square(betas[i]) ^ betas[i]; } @@ -284,6 +296,7 @@ void PQCLEAN_HQC192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + k = 1 << (PARAM_M - 1); // Step 6, 7 and error polynomial computation memcpy(w + k, v, 2 * k); @@ -294,7 +307,7 @@ void PQCLEAN_HQC192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { w[k] ^= u[0]; // Find other roots - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC192_AVX2_gf_mul(betas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -309,25 +322,28 @@ void PQCLEAN_HQC192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { * @param[in] w Array of size 2^PARAM_M */ void PQCLEAN_HQC192_AVX2_fft_retrieve_bch_error_poly(uint64_t *error, const uint16_t *w) { - uint16_t gammas[PARAM_M - 1]; - uint16_t gammas_sums[1 << (PARAM_M - 1)]; - size_t k = 1 << (PARAM_M - 1); - size_t index = PARAM_GF_MUL_ORDER; + uint16_t gammas[PARAM_M - 1] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; + uint64_t bit; + size_t i, k, index; compute_fft_betas(gammas); compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); - error[0] ^= ((uint64_t) 1) ^ ((uint16_t) - w[0] >> 15); - uint64_t bit = ((uint64_t) 1) ^ ((uint16_t) - w[k] >> 15); + error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); + + k = 1 << (PARAM_M - 1); + index = PARAM_GF_MUL_ORDER; + bit = 1 ^ ((uint16_t) - w[k] >> 15); error[index / 8] ^= bit << (index % 64); - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC192_AVX2_gf_log(gammas_sums[i]); - bit = ((uint64_t) 1) ^ ((uint16_t) - w[i] >> 15); + bit = 1 ^ ((uint16_t) - w[i] >> 15); error[index / 64] ^= bit << (index % 64); index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC192_AVX2_gf_log(gammas_sums[i] ^ 1); - bit = ((uint64_t) 1) ^ ((uint16_t) - w[k + i] >> 15); + bit = 1 ^ ((uint16_t) - w[k + i] >> 15); error[index / 64] ^= bit << (index % 64); } } diff --git a/crypto_kem/hqc-192/avx2/gf.c b/crypto_kem/hqc-192/avx2/gf.c index 262bd271..b087638c 100644 --- a/crypto_kem/hqc-192/avx2/gf.c +++ b/crypto_kem/hqc-192/avx2/gf.c @@ -14,16 +14,6 @@ static uint16_t gf_quad(uint64_t a); -/** - * Logarithm of elements of GF(2^10) to the base alpha (root of x^10 + x^3 + 1). - * The logarithm of 0 is set to 1024 by convention. - */ -static const uint16_t log[1024] = { - 1024, 0, 1, 77, 2, 154, 78, 956, 3, 10, 155, 325, 79, 618, 957, 231, 4, 308, 11, 200, 156, 889, 326, 695, 80, 24, 619, 87, 958, 402, 232, 436, 5, 513, 309, 551, 12, 40, 201, 479, 157, 518, 890, 101, 327, 164, 696, 860, 81, 258, 25, 385, 620, 277, 88, 577, 959, 772, 403, 680, 233, 52, 437, 966, 6, 20, 514, 768, 310, 650, 552, 129, 13, 314, 41, 849, 202, 757, 480, 980, 158, 213, 519, 335, 891, 462, 102, 907, 328, 654, 165, 264, 697, 369, 861, 354, 82, 675, 259, 590, 26, 628, 386, 991, 621, 556, 278, 822, 89, 219, 578, 117, 960, 937, 773, 533, 404, 491, 681, 241, 234, 133, 53, 595, 438, 178, 967, 943, 7, 1020, 21, 305, 515, 510, 769, 255, 311, 17, 651, 210, 553, 672, 130, 934, 14, 1017, 315, 1014, 42, 610, 850, 191, 203, 318, 758, 31, 481, 428, 981, 568, 159, 613, 214, 752, 520, 667, 336, 788, 892, 45, 463, 801, 103, 525, 908, 705, 329, 194, 655, 1008, 166, 642, 265, 296, 698, 853, 370, 633, 862, 899, 355, 779, 83, 321, 676, 97, 260, 845, 591, 818, 27, 206, 629, 797, 387, 793, 992, 727, 622, 34, 557, 661, 279, 420, 823, 834, 90, 761, 220, 391, 579, 926, 118, 451, 961, 431, 938, 349, 774, 563, 534, 446, 405, 484, 492, 731, 682, 341, 242, 714, 235, 571, 134, 290, 54, 412, 596, 140, 439, 984, 179, 996, 968, 810, 944, 539, 8, 616, 1021, 152, 22, 400, 306, 887, 516, 162, 511, 38, 770, 50, 256, 275, 312, 755, 18, 648, 652, 367, 211, 460, 554, 217, 673, 626, 131, 176, 935, 489, 15, 670, 1018, 508, 316, 426, 1015, 608, 43, 523, 611, 665, 851, 897, 192, 640, 204, 791, 319, 843, 759, 924, 32, 418, 482, 339, 429, 561, 982, 808, 569, 410, 160, 48, 614, 398, 215, 174, 753, 365, 521, 895, 668, 424, 337, 806, 789, 922, 893, 804, 46, 172, 464, 872, 802, 870, 104, 466, 526, 283, 909, 874, 706, 736, 330, 528, 195, 380, 656, 285, 1009, 1003, 167, 106, 643, 838, 266, 468, 297, 66, 699, 708, 854, 111, 371, 738, 634, 60, 863, 911, 900, 827, 356, 876, 780, 497, 84, 197, 322, 74, 677, 382, 98, 548, 261, 332, 846, 765, 592, 530, 819, 587, 28, 1011, 207, 302, 630, 1005, 798, 749, 388, 658, 794, 94, 993, 287, 728, 346, 623, 645, 35, 149, 558, 840, 662, 505, 280, 169, 421, 395, 824, 108, 835, 377, 91, 299, 762, 71, 221, 68, 392, 146, 580, 268, 927, 224, 119, 470, 452, 687, 962, 856, 432, 227, 939, 113, 350, 976, 775, 701, 564, 930, 535, 710, 447, 723, 406, 636, 485, 271, 493, 62, 732, 918, 683, 373, 342, 583, 243, 740, 715, 719, 236, 902, 572, 690, 135, 829, 291, 186, 55, 865, 413, 455, 597, 913, 141, 744, 440, 782, 985, 473, 180, 499, 997, 602, 969, 358, 811, 122, 945, 878, 540, 247, 9, 324, 617, 230, 1022, 76, 153, 955, 23, 86, 401, 435, 307, 199, 888, 694, 517, 100, 163, 859, 512, 550, 39, 478, 771, 679, 51, 965, 257, 384, 276, 576, 313, 848, 756, 979, 19, 767, 649, 128, 653, 263, 368, 353, 212, 334, 461, 906, 555, 821, 218, 116, 674, 589, 627, 990, 132, 594, 177, 942, 936, 532, 490, 240, 16, 209, 671, 933, 1019, 304, 509, 254, 317, 30, 427, 567, 1016, 1013, 609, 190, 44, 800, 524, 704, 612, 751, 666, 787, 852, 632, 898, 778, 193, 1007, 641, 295, 205, 796, 792, 726, 320, 96, 844, 817, 760, 390, 925, 450, 33, 660, 419, 833, 483, 730, 340, 713, 430, 348, 562, 445, 983, 995, 809, 538, 570, 289, 411, 139, 161, 37, 49, 274, 615, 151, 399, 886, 216, 625, 175, 488, 754, 647, 366, 459, 522, 664, 896, 639, 669, 507, 425, 607, 338, 560, 807, 409, 790, 842, 923, 417, 894, 423, 805, 921, 47, 397, 173, 364, 465, 282, 873, 735, 803, 171, 871, 869, 105, 837, 467, 65, 527, 379, 284, 1002, 910, 826, 875, 496, 707, 110, 737, 59, 331, 764, 529, 586, 196, 73, 381, 547, 657, 93, 286, 345, 1010, 301, 1004, 748, 168, 394, 107, 376, 644, 148, 839, 504, 267, 223, 469, 686, 298, 70, 67, 145, 700, 929, 709, 722, 855, 226, 112, 975, 372, 582, 739, 718, 635, 270, 61, 917, 864, 454, 912, 743, 901, 689, 828, 185, 357, 121, 877, 246, 781, 472, 498, 601, 85, 434, 198, 693, 323, 229, 75, 954, 678, 964, 383, 575, 99, 858, 549, 477, 262, 352, 333, 905, 847, 978, 766, 127, 593, 941, 531, 239, 820, 115, 588, 989, 29, 566, 1012, 189, 208, 932, 303, 253, 631, 777, 1006, 294, 799, 703, 750, 786, 389, 449, 659, 832, 795, 725, 95, 816, 994, 537, 288, 138, 729, 712, 347, 444, 624, 487, 646, 458, 36, 273, 150, 885, 559, 408, 841, 416, 663, 638, 506, 606, 281, 734, 170, 868, 422, 920, 396, 363, 825, 495, 109, 58, 836, 64, 378, 1001, 92, 344, 300, 747, 763, 585, 72, 546, 222, 685, 69, 144, 393, 375, 147, 503, 581, 717, 269, 916, 928, 721, 225, 974, 120, 245, 471, 600, 453, 742, 688, 184, 963, 574, 857, 476, 433, 692, 228, 953, 940, 238, 114, 988, 351, 904, 977, 126, 776, 293, 702, 785, 565, 188, 931, 252, 536, 137, 711, 443, 448, 831, 724, 815, 407, 415, 637, 605, 486, 457, 272, 884, 494, 57, 63, 1000, 733, 867, 919, 362, 684, 143, 374, 502, 343, 746, 584, 545, 244, 599, 741, 183, 716, 915, 720, 973, 237, 987, 903, 125, 573, 475, 691, 952, 136, 442, 830, 814, 292, 784, 187, 251, 56, 999, 866, 361, 414, 604, 456, 883, 598, 182, 914, 972, 142, 501, 745, 544, 441, 813, 783, 250, 986, 124, 474, 951, 181, 971, 500, 543, 998, 360, 603, 882, 970, 542, 359, 881, 812, 249, 123, 950, 946, 947, 879, 948, 541, 880, 248, 949 -}; - - - /** * Returns the integer i such that elt = a^i * where a is the primitive element of GF(2^GF_M). @@ -41,30 +31,29 @@ uint16_t PQCLEAN_HQC192_AVX2_gf_log(uint16_t elt) { * @param[in] x Polynomial of degree less than 64 * @param[in] deg_x The degree of polynomial x */ -uint16_t gf_reduce(uint64_t x, size_t deg_x) { - // Compute the distance between the primitive polynomial first two set bits - size_t lz1 = __builtin_clz(PARAM_GF_POLY); - size_t lz2 = __builtin_clz(PARAM_GF_POLY ^ 1 << PARAM_M); - size_t dist = lz2 - lz1; +static uint16_t gf_reduce(uint64_t x, size_t deg_x) { + uint16_t z1, z2, rmdr, dist; + uint64_t mod; + size_t steps, i, j; // Deduce the number of steps of reduction - size_t steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), dist); + steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), PARAM_GF_POLY_M2); // Reduce - for (size_t i = 0; i < steps; ++i) { - uint64_t mod = x >> PARAM_M; + for (i = 0; i < steps; ++i) { + mod = x >> PARAM_M; x &= (1 << PARAM_M) - 1; x ^= mod; - size_t tz1 = 0; - uint16_t rmdr = PARAM_GF_POLY ^ 1; - for (size_t j = __builtin_popcount(PARAM_GF_POLY) - 2; j; --j) { - size_t tz2 = __builtin_ctz(rmdr); - size_t shift = tz2 - tz1; - mod <<= shift; + z1 = 0; + rmdr = PARAM_GF_POLY ^ 1; + for (j = PARAM_GF_POLY_WT - 2; j; --j) { + z2 = __tzcnt_u16(rmdr); + dist = (uint16_t) (z2 - z1); + mod <<= dist; x ^= mod; - rmdr ^= 1 << tz2; - tz1 = tz2; + rmdr ^= 1 << z2; + z1 = z2; } } @@ -113,7 +102,7 @@ uint16_t PQCLEAN_HQC192_AVX2_gf_square(uint16_t a) { * @returns a^4 * @param[in] a Element of GF(2^GF_M) */ -uint16_t gf_quad(uint64_t a) { +static uint16_t gf_quad(uint64_t a) { uint64_t q = a & 1; for (size_t i = 1; i < PARAM_M; ++i) { a <<= 3; @@ -158,10 +147,10 @@ uint16_t PQCLEAN_HQC192_AVX2_gf_inverse(uint16_t a) { * @param[in] i The integer whose modulo is taken */ uint16_t PQCLEAN_HQC192_AVX2_gf_mod(uint16_t i) { - uint16_t tmp = i - PARAM_GF_MUL_ORDER; + uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER); // mask = 0xffff if (i < GF_MUL_ORDER) - int16_t mask = -(tmp >> 15); + uint16_t mask = -(tmp >> 15); return tmp + (mask & PARAM_GF_MUL_ORDER); } diff --git a/crypto_kem/hqc-192/avx2/gf.h b/crypto_kem/hqc-192/avx2/gf.h index 82b0f5e7..b66fe68a 100644 --- a/crypto_kem/hqc-192/avx2/gf.h +++ b/crypto_kem/hqc-192/avx2/gf.h @@ -12,8 +12,13 @@ #include #include -void PQCLEAN_HQC192_AVX2_gf_generate(uint16_t *exp, uint16_t *log, int16_t m); - +/** + * Logarithm of elements of GF(2^10) to the base alpha (root of x^10 + x^3 + 1). + * The logarithm of 0 is set to 1024 by convention. + */ +static const uint16_t log[1024] = { + 1024, 0, 1, 77, 2, 154, 78, 956, 3, 10, 155, 325, 79, 618, 957, 231, 4, 308, 11, 200, 156, 889, 326, 695, 80, 24, 619, 87, 958, 402, 232, 436, 5, 513, 309, 551, 12, 40, 201, 479, 157, 518, 890, 101, 327, 164, 696, 860, 81, 258, 25, 385, 620, 277, 88, 577, 959, 772, 403, 680, 233, 52, 437, 966, 6, 20, 514, 768, 310, 650, 552, 129, 13, 314, 41, 849, 202, 757, 480, 980, 158, 213, 519, 335, 891, 462, 102, 907, 328, 654, 165, 264, 697, 369, 861, 354, 82, 675, 259, 590, 26, 628, 386, 991, 621, 556, 278, 822, 89, 219, 578, 117, 960, 937, 773, 533, 404, 491, 681, 241, 234, 133, 53, 595, 438, 178, 967, 943, 7, 1020, 21, 305, 515, 510, 769, 255, 311, 17, 651, 210, 553, 672, 130, 934, 14, 1017, 315, 1014, 42, 610, 850, 191, 203, 318, 758, 31, 481, 428, 981, 568, 159, 613, 214, 752, 520, 667, 336, 788, 892, 45, 463, 801, 103, 525, 908, 705, 329, 194, 655, 1008, 166, 642, 265, 296, 698, 853, 370, 633, 862, 899, 355, 779, 83, 321, 676, 97, 260, 845, 591, 818, 27, 206, 629, 797, 387, 793, 992, 727, 622, 34, 557, 661, 279, 420, 823, 834, 90, 761, 220, 391, 579, 926, 118, 451, 961, 431, 938, 349, 774, 563, 534, 446, 405, 484, 492, 731, 682, 341, 242, 714, 235, 571, 134, 290, 54, 412, 596, 140, 439, 984, 179, 996, 968, 810, 944, 539, 8, 616, 1021, 152, 22, 400, 306, 887, 516, 162, 511, 38, 770, 50, 256, 275, 312, 755, 18, 648, 652, 367, 211, 460, 554, 217, 673, 626, 131, 176, 935, 489, 15, 670, 1018, 508, 316, 426, 1015, 608, 43, 523, 611, 665, 851, 897, 192, 640, 204, 791, 319, 843, 759, 924, 32, 418, 482, 339, 429, 561, 982, 808, 569, 410, 160, 48, 614, 398, 215, 174, 753, 365, 521, 895, 668, 424, 337, 806, 789, 922, 893, 804, 46, 172, 464, 872, 802, 870, 104, 466, 526, 283, 909, 874, 706, 736, 330, 528, 195, 380, 656, 285, 1009, 1003, 167, 106, 643, 838, 266, 468, 297, 66, 699, 708, 854, 111, 371, 738, 634, 60, 863, 911, 900, 827, 356, 876, 780, 497, 84, 197, 322, 74, 677, 382, 98, 548, 261, 332, 846, 765, 592, 530, 819, 587, 28, 1011, 207, 302, 630, 1005, 798, 749, 388, 658, 794, 94, 993, 287, 728, 346, 623, 645, 35, 149, 558, 840, 662, 505, 280, 169, 421, 395, 824, 108, 835, 377, 91, 299, 762, 71, 221, 68, 392, 146, 580, 268, 927, 224, 119, 470, 452, 687, 962, 856, 432, 227, 939, 113, 350, 976, 775, 701, 564, 930, 535, 710, 447, 723, 406, 636, 485, 271, 493, 62, 732, 918, 683, 373, 342, 583, 243, 740, 715, 719, 236, 902, 572, 690, 135, 829, 291, 186, 55, 865, 413, 455, 597, 913, 141, 744, 440, 782, 985, 473, 180, 499, 997, 602, 969, 358, 811, 122, 945, 878, 540, 247, 9, 324, 617, 230, 1022, 76, 153, 955, 23, 86, 401, 435, 307, 199, 888, 694, 517, 100, 163, 859, 512, 550, 39, 478, 771, 679, 51, 965, 257, 384, 276, 576, 313, 848, 756, 979, 19, 767, 649, 128, 653, 263, 368, 353, 212, 334, 461, 906, 555, 821, 218, 116, 674, 589, 627, 990, 132, 594, 177, 942, 936, 532, 490, 240, 16, 209, 671, 933, 1019, 304, 509, 254, 317, 30, 427, 567, 1016, 1013, 609, 190, 44, 800, 524, 704, 612, 751, 666, 787, 852, 632, 898, 778, 193, 1007, 641, 295, 205, 796, 792, 726, 320, 96, 844, 817, 760, 390, 925, 450, 33, 660, 419, 833, 483, 730, 340, 713, 430, 348, 562, 445, 983, 995, 809, 538, 570, 289, 411, 139, 161, 37, 49, 274, 615, 151, 399, 886, 216, 625, 175, 488, 754, 647, 366, 459, 522, 664, 896, 639, 669, 507, 425, 607, 338, 560, 807, 409, 790, 842, 923, 417, 894, 423, 805, 921, 47, 397, 173, 364, 465, 282, 873, 735, 803, 171, 871, 869, 105, 837, 467, 65, 527, 379, 284, 1002, 910, 826, 875, 496, 707, 110, 737, 59, 331, 764, 529, 586, 196, 73, 381, 547, 657, 93, 286, 345, 1010, 301, 1004, 748, 168, 394, 107, 376, 644, 148, 839, 504, 267, 223, 469, 686, 298, 70, 67, 145, 700, 929, 709, 722, 855, 226, 112, 975, 372, 582, 739, 718, 635, 270, 61, 917, 864, 454, 912, 743, 901, 689, 828, 185, 357, 121, 877, 246, 781, 472, 498, 601, 85, 434, 198, 693, 323, 229, 75, 954, 678, 964, 383, 575, 99, 858, 549, 477, 262, 352, 333, 905, 847, 978, 766, 127, 593, 941, 531, 239, 820, 115, 588, 989, 29, 566, 1012, 189, 208, 932, 303, 253, 631, 777, 1006, 294, 799, 703, 750, 786, 389, 449, 659, 832, 795, 725, 95, 816, 994, 537, 288, 138, 729, 712, 347, 444, 624, 487, 646, 458, 36, 273, 150, 885, 559, 408, 841, 416, 663, 638, 506, 606, 281, 734, 170, 868, 422, 920, 396, 363, 825, 495, 109, 58, 836, 64, 378, 1001, 92, 344, 300, 747, 763, 585, 72, 546, 222, 685, 69, 144, 393, 375, 147, 503, 581, 717, 269, 916, 928, 721, 225, 974, 120, 245, 471, 600, 453, 742, 688, 184, 963, 574, 857, 476, 433, 692, 228, 953, 940, 238, 114, 988, 351, 904, 977, 126, 776, 293, 702, 785, 565, 188, 931, 252, 536, 137, 711, 443, 448, 831, 724, 815, 407, 415, 637, 605, 486, 457, 272, 884, 494, 57, 63, 1000, 733, 867, 919, 362, 684, 143, 374, 502, 343, 746, 584, 545, 244, 599, 741, 183, 716, 915, 720, 973, 237, 987, 903, 125, 573, 475, 691, 952, 136, 442, 830, 814, 292, 784, 187, 251, 56, 999, 866, 361, 414, 604, 456, 883, 598, 182, 914, 972, 142, 501, 745, 544, 441, 813, 783, 250, 986, 124, 474, 951, 181, 971, 500, 543, 998, 360, 603, 882, 970, 542, 359, 881, 812, 249, 123, 950, 946, 947, 879, 948, 541, 880, 248, 949 +}; uint16_t PQCLEAN_HQC192_AVX2_gf_log(uint16_t elt); diff --git a/crypto_kem/hqc-192/avx2/gf2x.c b/crypto_kem/hqc-192/avx2/gf2x.c index 18eaa5b0..32aceb8f 100644 --- a/crypto_kem/hqc-192/avx2/gf2x.c +++ b/crypto_kem/hqc-192/avx2/gf2x.c @@ -368,9 +368,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { static __m256i W0[2 * (T_TM3_3W_256)], W1[2 * (T_TM3_3W_256)], W2[2 * (T_TM3_3W_256)], W3[2 * (T_TM3_3W_256)], W4[2 * (T_TM3_3W_256)]; static __m256i tmp[2 * (T_TM3_3W_256)]; static __m256i ro256[6 * (T_TM3_3W_256)]; - const __m256i zero = (__m256i) { - 0ul, 0ul, 0ul, 0ul - }; + const __m256i zero = _mm256_setzero_si256(); int32_t T2 = T_TM3_3W_64 << 1; for (int32_t i = 0 ; i < T_TM3_3W_256 - 1 ; i++) { @@ -387,24 +385,12 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { for (int32_t i = T_TM3_3W_256 - 1 ; i < T_TM3_3W_256 ; i++) { int32_t i4 = i << 2; int32_t i41 = i4 + 1; - U0[i] = (__m256i) { - A[i4], A[i41], 0x0ul, 0x0ul - }; - V0[i] = (__m256i) { - B[i4], B[i41], 0x0ul, 0x0ul - }; - U1[i] = (__m256i) { - A[i4 + T_TM3_3W_64 - 2], A[i41 + T_TM3_3W_64 - 2], 0x0ul, 0x0ul - }; - V1[i] = (__m256i) { - B[i4 + T_TM3_3W_64 - 2], B[i41 + T_TM3_3W_64 - 2], 0x0ul, 0x0ul - }; - U2[i] = (__m256i) { - A[i4 - 4 + T2], A[i4 - 3 + T2], 0x0ul, 0x0ul - }; - V2[i] = (__m256i) { - B[i4 - 4 + T2], B[i4 - 3 + T2], 0x0ul, 0x0ul - }; + U0[i] = _mm256_set_epi64x(0, 0, A[i41], A[i4]); + V0[i] = _mm256_set_epi64x(0, 0, B[i41], B[i4]); + U1[i] = _mm256_set_epi64x(0, 0, A[i41 + T_TM3_3W_64 - 2], A[i4 + T_TM3_3W_64 - 2]); + V1[i] = _mm256_set_epi64x(0, 0, B[i41 + T_TM3_3W_64 - 2], B[i4 + T_TM3_3W_64 - 2]); + U2[i] = _mm256_set_epi64x(0, 0, A[i4 - 3 + T2], A[i4 - 4 + T2]); + V2[i] = _mm256_set_epi64x(0, 0, B[i4 - 3 + T2], B[i4 - 4 + T2]); } // Evaluation phase : x= X^64 @@ -492,9 +478,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { //W2 =(W2 + W3 + W4*(x^3+1))/(x+1) U1_64 = ((int64_t *) W4); __m256i *U1_256 = (__m256i *) (U1_64 + 1); - tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ (__m256i) { - 0x0ul, 0x0ul, 0x0ul, U1_64[0] - }; + tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ _mm256_set_epi64x(U1_64[0], 0, 0, 0); for (int32_t i = 1 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { tmp[i] = W2[i] ^ W3[i] ^ W4[i] ^ _mm256_lddqu_si256(&U1_256[i - 1]); diff --git a/crypto_kem/hqc-192/clean/gf.c b/crypto_kem/hqc-192/clean/gf.c index 7be78f3f..ce76f0b3 100644 --- a/crypto_kem/hqc-192/clean/gf.c +++ b/crypto_kem/hqc-192/clean/gf.c @@ -7,61 +7,6 @@ */ - -/** - * Powers of the root alpha of x^10 + x^3 + 1. - * The last two elements are needed by the PQCLEAN_HQC192_CLEAN_gf_mul function from gf_mul.c - * (for example if both elements to multiply are zero). - */ -static const uint16_t exp[1026] = { - 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 9, 18, 36, 72, 144, 288, 576, 137, 274, 548, 65, 130, 260, 520, 25, 50, 100, 200, 400, 800, 585, 155, 310, 620, 209, 418, 836, 641, 267, 534, 37, 74, 148, 296, 592, 169, 338, 676, 321, 642, 269, 538, 61, 122, 244, 488, 976, 937, 859, 703, 375, 750, 469, 938, 861, 691, 367, 734, 437, 874, 733, 435, 870, 709, 387, 774, 517, 3, 6, 12, 24, 48, 96, 192, 384, 768, 521, 27, 54, 108, 216, 432, 864, 713, 411, 822, 613, 195, 390, 780, 529, 43, 86, 172, 344, 688, 361, 722, 429, 858, 701, 371, 742, 453, 906, 797, 563, 111, 222, 444, 888, 761, 507, 1014, 997, 963, 911, 791, 551, 71, 142, 284, 568, 121, 242, 484, 968, 921, 827, 639, 247, 494, 988, 945, 875, 735, 439, 878, 725, 419, 838, 645, 259, 518, 5, 10, 20, 40, 80, 160, 320, 640, 265, 530, 45, 90, 180, 360, 720, 425, 850, 685, 339, 678, 325, 650, 285, 570, 125, 250, 500, 1000, 985, 955, 895, 759, 487, 974, 917, 803, 591, 151, 302, 604, 177, 354, 708, 385, 770, 525, 19, 38, 76, 152, 304, 608, 201, 402, 804, 577, 139, 278, 556, 81, 162, 324, 648, 281, 562, 109, 218, 436, 872, 729, 443, 886, 741, 451, 902, 773, 515, 15, 30, 60, 120, 240, 480, 960, 905, 795, 575, 119, 238, 476, 952, 889, 763, 511, 1022, 1013, 995, 975, 919, 807, 583, 135, 270, 540, 49, 98, 196, 392, 784, 553, 91, 182, 364, 728, 441, 882, 749, 467, 934, 837, 643, 271, 542, 53, 106, 212, 424, 848, 681, 347, 694, 357, 714, 413, 826, 637, 243, 486, 972, 913, 811, 607, 183, 366, 732, 433, 866, 717, 403, 806, 581, 131, 262, 524, 17, 34, 68, 136, 272, 544, 73, 146, 292, 584, 153, 306, 612, 193, 386, 772, 513, 11, 22, 44, 88, 176, 352, 704, 393, 786, 557, 83, 166, 332, 664, 313, 626, 237, 474, 948, 865, 715, 415, 830, 629, 227, 454, 908, 785, 555, 95, 190, 380, 760, 505, 1010, 1005, 979, 943, 855, 679, 327, 654, 277, 554, 93, 186, 372, 744, 473, 946, 877, 723, 431, 862, 693, 355, 710, 389, 778, 541, 51, 102, 204, 408, 816, 617, 219, 438, 876, 721, 427, 854, 677, 323, 646, 261, 522, 29, 58, 116, 232, 464, 928, 841, 667, 319, 638, 245, 490, 980, 929, 843, 671, 311, 622, 213, 426, 852, 673, 331, 662, 293, 586, 157, 314, 628, 225, 450, 900, 769, 523, 31, 62, 124, 248, 496, 992, 969, 923, 831, 631, 231, 462, 924, 817, 619, 223, 446, 892, 753, 491, 982, 933, 835, 655, 279, 558, 85, 170, 340, 680, 345, 690, 365, 730, 445, 890, 765, 499, 998, 965, 899, 783, 535, 39, 78, 156, 312, 624, 233, 466, 932, 833, 651, 287, 574, 117, 234, 468, 936, 857, 699, 383, 766, 501, 1002, 989, 947, 879, 727, 423, 846, 661, 291, 582, 133, 266, 532, 33, 66, 132, 264, 528, 41, 82, 164, 328, 656, 297, 594, 173, 346, 692, 353, 706, 397, 794, 573, 115, 230, 460, 920, 825, 635, 255, 510, 1020, 1009, 1003, 991, 951, 871, 711, 391, 782, 533, 35, 70, 140, 280, 560, 105, 210, 420, 840, 665, 315, 630, 229, 458, 916, 801, 587, 159, 318, 636, 241, 482, 964, 897, 779, 543, 55, 110, 220, 440, 880, 745, 475, 950, 869, 707, 399, 798, 565, 99, 198, 396, 792, 569, 123, 246, 492, 984, 953, 891, 767, 503, 1006, 981, 931, 847, 663, 295, 590, 149, 298, 596, 161, 322, 644, 257, 514, 13, 26, 52, 104, 208, 416, 832, 649, 283, 566, 101, 202, 404, 808, 601, 187, 374, 748, 465, 930, 845, 659, 303, 606, 181, 362, 724, 417, 834, 653, 275, 550, 69, 138, 276, 552, 89, 178, 356, 712, 409, 818, 621, 211, 422, 844, 657, 299, 598, 165, 330, 660, 289, 578, 141, 282, 564, 97, 194, 388, 776, 537, 59, 118, 236, 472, 944, 873, 731, 447, 894, 757, 483, 966, 901, 771, 527, 23, 46, 92, 184, 368, 736, 457, 914, 813, 595, 175, 350, 700, 369, 738, 461, 922, 829, 627, 239, 478, 956, 881, 747, 479, 958, 885, 739, 463, 926, 821, 611, 207, 414, 828, 625, 235, 470, 940, 849, 683, 351, 702, 373, 746, 477, 954, 893, 755, 495, 990, 949, 867, 719, 407, 814, 597, 163, 326, 652, 273, 546, 77, 154, 308, 616, 217, 434, 868, 705, 395, 790, 549, 67, 134, 268, 536, 57, 114, 228, 456, 912, 809, 603, 191, 382, 764, 497, 994, 973, 915, 815, 599, 167, 334, 668, 305, 610, 205, 410, 820, 609, 203, 406, 812, 593, 171, 342, 684, 337, 674, 333, 666, 317, 634, 253, 506, 1012, 993, 971, 927, 823, 615, 199, 398, 796, 561, 107, 214, 428, 856, 697, 379, 758, 485, 970, 925, 819, 623, 215, 430, 860, 689, 363, 726, 421, 842, 669, 307, 614, 197, 394, 788, 545, 75, 150, 300, 600, 185, 370, 740, 449, 898, 781, 531, 47, 94, 188, 376, 752, 489, 978, 941, 851, 687, 343, 686, 341, 682, 349, 698, 381, 762, 509, 1018, 1021, 1011, 1007, 983, 935, 839, 647, 263, 526, 21, 42, 84, 168, 336, 672, 329, 658, 301, 602, 189, 378, 756, 481, 962, 909, 787, 559, 87, 174, 348, 696, 377, 754, 493, 986, 957, 883, 751, 471, 942, 853, 675, 335, 670, 309, 618, 221, 442, 884, 737, 459, 918, 805, 579, 143, 286, 572, 113, 226, 452, 904, 793, 571, 127, 254, 508, 1016, 1017, 1019, 1023, 1015, 999, 967, 903, 775, 519, 7, 14, 28, 56, 112, 224, 448, 896, 777, 539, 63, 126, 252, 504, 1008, 1001, 987, 959, 887, 743, 455, 910, 789, 547, 79, 158, 316, 632, 249, 498, 996, 961, 907, 799, 567, 103, 206, 412, 824, 633, 251, 502, 1004, 977, 939, 863, 695, 359, 718, 405, 810, 605, 179, 358, 716, 401, 802, 589, 147, 294, 588, 145, 290, 580, 129, 258, 516, 1, 2, 4 -}; - - - -/** - * Logarithm of elements of GF(2^10) to the base alpha (root of x^10 + x^3 + 1). - * The logarithm of 0 is set to 1024 by convention. - */ -static const uint16_t log[1024] = { - 1024, 0, 1, 77, 2, 154, 78, 956, 3, 10, 155, 325, 79, 618, 957, 231, 4, 308, 11, 200, 156, 889, 326, 695, 80, 24, 619, 87, 958, 402, 232, 436, 5, 513, 309, 551, 12, 40, 201, 479, 157, 518, 890, 101, 327, 164, 696, 860, 81, 258, 25, 385, 620, 277, 88, 577, 959, 772, 403, 680, 233, 52, 437, 966, 6, 20, 514, 768, 310, 650, 552, 129, 13, 314, 41, 849, 202, 757, 480, 980, 158, 213, 519, 335, 891, 462, 102, 907, 328, 654, 165, 264, 697, 369, 861, 354, 82, 675, 259, 590, 26, 628, 386, 991, 621, 556, 278, 822, 89, 219, 578, 117, 960, 937, 773, 533, 404, 491, 681, 241, 234, 133, 53, 595, 438, 178, 967, 943, 7, 1020, 21, 305, 515, 510, 769, 255, 311, 17, 651, 210, 553, 672, 130, 934, 14, 1017, 315, 1014, 42, 610, 850, 191, 203, 318, 758, 31, 481, 428, 981, 568, 159, 613, 214, 752, 520, 667, 336, 788, 892, 45, 463, 801, 103, 525, 908, 705, 329, 194, 655, 1008, 166, 642, 265, 296, 698, 853, 370, 633, 862, 899, 355, 779, 83, 321, 676, 97, 260, 845, 591, 818, 27, 206, 629, 797, 387, 793, 992, 727, 622, 34, 557, 661, 279, 420, 823, 834, 90, 761, 220, 391, 579, 926, 118, 451, 961, 431, 938, 349, 774, 563, 534, 446, 405, 484, 492, 731, 682, 341, 242, 714, 235, 571, 134, 290, 54, 412, 596, 140, 439, 984, 179, 996, 968, 810, 944, 539, 8, 616, 1021, 152, 22, 400, 306, 887, 516, 162, 511, 38, 770, 50, 256, 275, 312, 755, 18, 648, 652, 367, 211, 460, 554, 217, 673, 626, 131, 176, 935, 489, 15, 670, 1018, 508, 316, 426, 1015, 608, 43, 523, 611, 665, 851, 897, 192, 640, 204, 791, 319, 843, 759, 924, 32, 418, 482, 339, 429, 561, 982, 808, 569, 410, 160, 48, 614, 398, 215, 174, 753, 365, 521, 895, 668, 424, 337, 806, 789, 922, 893, 804, 46, 172, 464, 872, 802, 870, 104, 466, 526, 283, 909, 874, 706, 736, 330, 528, 195, 380, 656, 285, 1009, 1003, 167, 106, 643, 838, 266, 468, 297, 66, 699, 708, 854, 111, 371, 738, 634, 60, 863, 911, 900, 827, 356, 876, 780, 497, 84, 197, 322, 74, 677, 382, 98, 548, 261, 332, 846, 765, 592, 530, 819, 587, 28, 1011, 207, 302, 630, 1005, 798, 749, 388, 658, 794, 94, 993, 287, 728, 346, 623, 645, 35, 149, 558, 840, 662, 505, 280, 169, 421, 395, 824, 108, 835, 377, 91, 299, 762, 71, 221, 68, 392, 146, 580, 268, 927, 224, 119, 470, 452, 687, 962, 856, 432, 227, 939, 113, 350, 976, 775, 701, 564, 930, 535, 710, 447, 723, 406, 636, 485, 271, 493, 62, 732, 918, 683, 373, 342, 583, 243, 740, 715, 719, 236, 902, 572, 690, 135, 829, 291, 186, 55, 865, 413, 455, 597, 913, 141, 744, 440, 782, 985, 473, 180, 499, 997, 602, 969, 358, 811, 122, 945, 878, 540, 247, 9, 324, 617, 230, 1022, 76, 153, 955, 23, 86, 401, 435, 307, 199, 888, 694, 517, 100, 163, 859, 512, 550, 39, 478, 771, 679, 51, 965, 257, 384, 276, 576, 313, 848, 756, 979, 19, 767, 649, 128, 653, 263, 368, 353, 212, 334, 461, 906, 555, 821, 218, 116, 674, 589, 627, 990, 132, 594, 177, 942, 936, 532, 490, 240, 16, 209, 671, 933, 1019, 304, 509, 254, 317, 30, 427, 567, 1016, 1013, 609, 190, 44, 800, 524, 704, 612, 751, 666, 787, 852, 632, 898, 778, 193, 1007, 641, 295, 205, 796, 792, 726, 320, 96, 844, 817, 760, 390, 925, 450, 33, 660, 419, 833, 483, 730, 340, 713, 430, 348, 562, 445, 983, 995, 809, 538, 570, 289, 411, 139, 161, 37, 49, 274, 615, 151, 399, 886, 216, 625, 175, 488, 754, 647, 366, 459, 522, 664, 896, 639, 669, 507, 425, 607, 338, 560, 807, 409, 790, 842, 923, 417, 894, 423, 805, 921, 47, 397, 173, 364, 465, 282, 873, 735, 803, 171, 871, 869, 105, 837, 467, 65, 527, 379, 284, 1002, 910, 826, 875, 496, 707, 110, 737, 59, 331, 764, 529, 586, 196, 73, 381, 547, 657, 93, 286, 345, 1010, 301, 1004, 748, 168, 394, 107, 376, 644, 148, 839, 504, 267, 223, 469, 686, 298, 70, 67, 145, 700, 929, 709, 722, 855, 226, 112, 975, 372, 582, 739, 718, 635, 270, 61, 917, 864, 454, 912, 743, 901, 689, 828, 185, 357, 121, 877, 246, 781, 472, 498, 601, 85, 434, 198, 693, 323, 229, 75, 954, 678, 964, 383, 575, 99, 858, 549, 477, 262, 352, 333, 905, 847, 978, 766, 127, 593, 941, 531, 239, 820, 115, 588, 989, 29, 566, 1012, 189, 208, 932, 303, 253, 631, 777, 1006, 294, 799, 703, 750, 786, 389, 449, 659, 832, 795, 725, 95, 816, 994, 537, 288, 138, 729, 712, 347, 444, 624, 487, 646, 458, 36, 273, 150, 885, 559, 408, 841, 416, 663, 638, 506, 606, 281, 734, 170, 868, 422, 920, 396, 363, 825, 495, 109, 58, 836, 64, 378, 1001, 92, 344, 300, 747, 763, 585, 72, 546, 222, 685, 69, 144, 393, 375, 147, 503, 581, 717, 269, 916, 928, 721, 225, 974, 120, 245, 471, 600, 453, 742, 688, 184, 963, 574, 857, 476, 433, 692, 228, 953, 940, 238, 114, 988, 351, 904, 977, 126, 776, 293, 702, 785, 565, 188, 931, 252, 536, 137, 711, 443, 448, 831, 724, 815, 407, 415, 637, 605, 486, 457, 272, 884, 494, 57, 63, 1000, 733, 867, 919, 362, 684, 143, 374, 502, 343, 746, 584, 545, 244, 599, 741, 183, 716, 915, 720, 973, 237, 987, 903, 125, 573, 475, 691, 952, 136, 442, 830, 814, 292, 784, 187, 251, 56, 999, 866, 361, 414, 604, 456, 883, 598, 182, 914, 972, 142, 501, 745, 544, 441, 813, 783, 250, 986, 124, 474, 951, 181, 971, 500, 543, 998, 360, 603, 882, 970, 542, 359, 881, 812, 249, 123, 950, 946, 947, 879, 948, 541, 880, 248, 949 -}; - - - -/** - * @brief Generates exp and log lookup tables of GF(2^m). - * - * The logarithm of 0 is defined as 2^PARAM_M by convention.
- * The last two elements of the exp table are needed by the PQCLEAN_HQC192_CLEAN_gf_mul function from gf_lutmul.c - * (for example if both elements to multiply are zero). - * @param[out] exp Array of size 2^PARAM_M + 2 receiving the powers of the primitive element - * @param[out] log Array of size 2^PARAM_M receiving the logarithms of the elements of GF(2^m) - * @param[in] m Parameter of Galois field GF(2^m) - */ -void PQCLEAN_HQC192_CLEAN_gf_generate(uint16_t *exp, uint16_t *log, int16_t m) { - uint16_t elt = 1; - uint16_t alpha = 2; // primitive element of GF(2^PARAM_M) - uint16_t gf_poly = PARAM_GF_POLY; - - for (size_t i = 0 ; i < (1U << m) - 1 ; ++i) { - exp[i] = elt; - log[elt] = i; - - elt *= alpha; - if (elt >= 1 << m) { - elt ^= gf_poly; - } - } - - exp[(1 << m) - 1] = 1; - exp[1 << m] = 2; - exp[(1 << m) + 1] = 4; - log[0] = 1 << m; // by convention -} - - - /** * @brief Returns the integer i such that elt = a^i where a is the primitive element of GF(2^PARAM_M). * diff --git a/crypto_kem/hqc-192/clean/gf.h b/crypto_kem/hqc-192/clean/gf.h index 5176db14..ade45411 100644 --- a/crypto_kem/hqc-192/clean/gf.h +++ b/crypto_kem/hqc-192/clean/gf.h @@ -12,7 +12,20 @@ #include #include -void PQCLEAN_HQC192_CLEAN_gf_generate(uint16_t *exp, uint16_t *log, int16_t m); + +/** + * Powers of the root alpha of x^10 + x^3 + 1. + * The last two elements are needed by the PQCLEAN_HQC192_CLEAN_gf_mul function from gf_mul.c + * (for example if both elements to multiply are zero). + */ +static const uint16_t exp[1026] = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 9, 18, 36, 72, 144, 288, 576, 137, 274, 548, 65, 130, 260, 520, 25, 50, 100, 200, 400, 800, 585, 155, 310, 620, 209, 418, 836, 641, 267, 534, 37, 74, 148, 296, 592, 169, 338, 676, 321, 642, 269, 538, 61, 122, 244, 488, 976, 937, 859, 703, 375, 750, 469, 938, 861, 691, 367, 734, 437, 874, 733, 435, 870, 709, 387, 774, 517, 3, 6, 12, 24, 48, 96, 192, 384, 768, 521, 27, 54, 108, 216, 432, 864, 713, 411, 822, 613, 195, 390, 780, 529, 43, 86, 172, 344, 688, 361, 722, 429, 858, 701, 371, 742, 453, 906, 797, 563, 111, 222, 444, 888, 761, 507, 1014, 997, 963, 911, 791, 551, 71, 142, 284, 568, 121, 242, 484, 968, 921, 827, 639, 247, 494, 988, 945, 875, 735, 439, 878, 725, 419, 838, 645, 259, 518, 5, 10, 20, 40, 80, 160, 320, 640, 265, 530, 45, 90, 180, 360, 720, 425, 850, 685, 339, 678, 325, 650, 285, 570, 125, 250, 500, 1000, 985, 955, 895, 759, 487, 974, 917, 803, 591, 151, 302, 604, 177, 354, 708, 385, 770, 525, 19, 38, 76, 152, 304, 608, 201, 402, 804, 577, 139, 278, 556, 81, 162, 324, 648, 281, 562, 109, 218, 436, 872, 729, 443, 886, 741, 451, 902, 773, 515, 15, 30, 60, 120, 240, 480, 960, 905, 795, 575, 119, 238, 476, 952, 889, 763, 511, 1022, 1013, 995, 975, 919, 807, 583, 135, 270, 540, 49, 98, 196, 392, 784, 553, 91, 182, 364, 728, 441, 882, 749, 467, 934, 837, 643, 271, 542, 53, 106, 212, 424, 848, 681, 347, 694, 357, 714, 413, 826, 637, 243, 486, 972, 913, 811, 607, 183, 366, 732, 433, 866, 717, 403, 806, 581, 131, 262, 524, 17, 34, 68, 136, 272, 544, 73, 146, 292, 584, 153, 306, 612, 193, 386, 772, 513, 11, 22, 44, 88, 176, 352, 704, 393, 786, 557, 83, 166, 332, 664, 313, 626, 237, 474, 948, 865, 715, 415, 830, 629, 227, 454, 908, 785, 555, 95, 190, 380, 760, 505, 1010, 1005, 979, 943, 855, 679, 327, 654, 277, 554, 93, 186, 372, 744, 473, 946, 877, 723, 431, 862, 693, 355, 710, 389, 778, 541, 51, 102, 204, 408, 816, 617, 219, 438, 876, 721, 427, 854, 677, 323, 646, 261, 522, 29, 58, 116, 232, 464, 928, 841, 667, 319, 638, 245, 490, 980, 929, 843, 671, 311, 622, 213, 426, 852, 673, 331, 662, 293, 586, 157, 314, 628, 225, 450, 900, 769, 523, 31, 62, 124, 248, 496, 992, 969, 923, 831, 631, 231, 462, 924, 817, 619, 223, 446, 892, 753, 491, 982, 933, 835, 655, 279, 558, 85, 170, 340, 680, 345, 690, 365, 730, 445, 890, 765, 499, 998, 965, 899, 783, 535, 39, 78, 156, 312, 624, 233, 466, 932, 833, 651, 287, 574, 117, 234, 468, 936, 857, 699, 383, 766, 501, 1002, 989, 947, 879, 727, 423, 846, 661, 291, 582, 133, 266, 532, 33, 66, 132, 264, 528, 41, 82, 164, 328, 656, 297, 594, 173, 346, 692, 353, 706, 397, 794, 573, 115, 230, 460, 920, 825, 635, 255, 510, 1020, 1009, 1003, 991, 951, 871, 711, 391, 782, 533, 35, 70, 140, 280, 560, 105, 210, 420, 840, 665, 315, 630, 229, 458, 916, 801, 587, 159, 318, 636, 241, 482, 964, 897, 779, 543, 55, 110, 220, 440, 880, 745, 475, 950, 869, 707, 399, 798, 565, 99, 198, 396, 792, 569, 123, 246, 492, 984, 953, 891, 767, 503, 1006, 981, 931, 847, 663, 295, 590, 149, 298, 596, 161, 322, 644, 257, 514, 13, 26, 52, 104, 208, 416, 832, 649, 283, 566, 101, 202, 404, 808, 601, 187, 374, 748, 465, 930, 845, 659, 303, 606, 181, 362, 724, 417, 834, 653, 275, 550, 69, 138, 276, 552, 89, 178, 356, 712, 409, 818, 621, 211, 422, 844, 657, 299, 598, 165, 330, 660, 289, 578, 141, 282, 564, 97, 194, 388, 776, 537, 59, 118, 236, 472, 944, 873, 731, 447, 894, 757, 483, 966, 901, 771, 527, 23, 46, 92, 184, 368, 736, 457, 914, 813, 595, 175, 350, 700, 369, 738, 461, 922, 829, 627, 239, 478, 956, 881, 747, 479, 958, 885, 739, 463, 926, 821, 611, 207, 414, 828, 625, 235, 470, 940, 849, 683, 351, 702, 373, 746, 477, 954, 893, 755, 495, 990, 949, 867, 719, 407, 814, 597, 163, 326, 652, 273, 546, 77, 154, 308, 616, 217, 434, 868, 705, 395, 790, 549, 67, 134, 268, 536, 57, 114, 228, 456, 912, 809, 603, 191, 382, 764, 497, 994, 973, 915, 815, 599, 167, 334, 668, 305, 610, 205, 410, 820, 609, 203, 406, 812, 593, 171, 342, 684, 337, 674, 333, 666, 317, 634, 253, 506, 1012, 993, 971, 927, 823, 615, 199, 398, 796, 561, 107, 214, 428, 856, 697, 379, 758, 485, 970, 925, 819, 623, 215, 430, 860, 689, 363, 726, 421, 842, 669, 307, 614, 197, 394, 788, 545, 75, 150, 300, 600, 185, 370, 740, 449, 898, 781, 531, 47, 94, 188, 376, 752, 489, 978, 941, 851, 687, 343, 686, 341, 682, 349, 698, 381, 762, 509, 1018, 1021, 1011, 1007, 983, 935, 839, 647, 263, 526, 21, 42, 84, 168, 336, 672, 329, 658, 301, 602, 189, 378, 756, 481, 962, 909, 787, 559, 87, 174, 348, 696, 377, 754, 493, 986, 957, 883, 751, 471, 942, 853, 675, 335, 670, 309, 618, 221, 442, 884, 737, 459, 918, 805, 579, 143, 286, 572, 113, 226, 452, 904, 793, 571, 127, 254, 508, 1016, 1017, 1019, 1023, 1015, 999, 967, 903, 775, 519, 7, 14, 28, 56, 112, 224, 448, 896, 777, 539, 63, 126, 252, 504, 1008, 1001, 987, 959, 887, 743, 455, 910, 789, 547, 79, 158, 316, 632, 249, 498, 996, 961, 907, 799, 567, 103, 206, 412, 824, 633, 251, 502, 1004, 977, 939, 863, 695, 359, 718, 405, 810, 605, 179, 358, 716, 401, 802, 589, 147, 294, 588, 145, 290, 580, 129, 258, 516, 1, 2, 4}; + + +/** + * Logarithm of elements of GF(2^10) to the base alpha (root of x^10 + x^3 + 1). + * The logarithm of 0 is set to 1024 by convention. + */ +static const uint16_t log[1024] = {1024, 0, 1, 77, 2, 154, 78, 956, 3, 10, 155, 325, 79, 618, 957, 231, 4, 308, 11, 200, 156, 889, 326, 695, 80, 24, 619, 87, 958, 402, 232, 436, 5, 513, 309, 551, 12, 40, 201, 479, 157, 518, 890, 101, 327, 164, 696, 860, 81, 258, 25, 385, 620, 277, 88, 577, 959, 772, 403, 680, 233, 52, 437, 966, 6, 20, 514, 768, 310, 650, 552, 129, 13, 314, 41, 849, 202, 757, 480, 980, 158, 213, 519, 335, 891, 462, 102, 907, 328, 654, 165, 264, 697, 369, 861, 354, 82, 675, 259, 590, 26, 628, 386, 991, 621, 556, 278, 822, 89, 219, 578, 117, 960, 937, 773, 533, 404, 491, 681, 241, 234, 133, 53, 595, 438, 178, 967, 943, 7, 1020, 21, 305, 515, 510, 769, 255, 311, 17, 651, 210, 553, 672, 130, 934, 14, 1017, 315, 1014, 42, 610, 850, 191, 203, 318, 758, 31, 481, 428, 981, 568, 159, 613, 214, 752, 520, 667, 336, 788, 892, 45, 463, 801, 103, 525, 908, 705, 329, 194, 655, 1008, 166, 642, 265, 296, 698, 853, 370, 633, 862, 899, 355, 779, 83, 321, 676, 97, 260, 845, 591, 818, 27, 206, 629, 797, 387, 793, 992, 727, 622, 34, 557, 661, 279, 420, 823, 834, 90, 761, 220, 391, 579, 926, 118, 451, 961, 431, 938, 349, 774, 563, 534, 446, 405, 484, 492, 731, 682, 341, 242, 714, 235, 571, 134, 290, 54, 412, 596, 140, 439, 984, 179, 996, 968, 810, 944, 539, 8, 616, 1021, 152, 22, 400, 306, 887, 516, 162, 511, 38, 770, 50, 256, 275, 312, 755, 18, 648, 652, 367, 211, 460, 554, 217, 673, 626, 131, 176, 935, 489, 15, 670, 1018, 508, 316, 426, 1015, 608, 43, 523, 611, 665, 851, 897, 192, 640, 204, 791, 319, 843, 759, 924, 32, 418, 482, 339, 429, 561, 982, 808, 569, 410, 160, 48, 614, 398, 215, 174, 753, 365, 521, 895, 668, 424, 337, 806, 789, 922, 893, 804, 46, 172, 464, 872, 802, 870, 104, 466, 526, 283, 909, 874, 706, 736, 330, 528, 195, 380, 656, 285, 1009, 1003, 167, 106, 643, 838, 266, 468, 297, 66, 699, 708, 854, 111, 371, 738, 634, 60, 863, 911, 900, 827, 356, 876, 780, 497, 84, 197, 322, 74, 677, 382, 98, 548, 261, 332, 846, 765, 592, 530, 819, 587, 28, 1011, 207, 302, 630, 1005, 798, 749, 388, 658, 794, 94, 993, 287, 728, 346, 623, 645, 35, 149, 558, 840, 662, 505, 280, 169, 421, 395, 824, 108, 835, 377, 91, 299, 762, 71, 221, 68, 392, 146, 580, 268, 927, 224, 119, 470, 452, 687, 962, 856, 432, 227, 939, 113, 350, 976, 775, 701, 564, 930, 535, 710, 447, 723, 406, 636, 485, 271, 493, 62, 732, 918, 683, 373, 342, 583, 243, 740, 715, 719, 236, 902, 572, 690, 135, 829, 291, 186, 55, 865, 413, 455, 597, 913, 141, 744, 440, 782, 985, 473, 180, 499, 997, 602, 969, 358, 811, 122, 945, 878, 540, 247, 9, 324, 617, 230, 1022, 76, 153, 955, 23, 86, 401, 435, 307, 199, 888, 694, 517, 100, 163, 859, 512, 550, 39, 478, 771, 679, 51, 965, 257, 384, 276, 576, 313, 848, 756, 979, 19, 767, 649, 128, 653, 263, 368, 353, 212, 334, 461, 906, 555, 821, 218, 116, 674, 589, 627, 990, 132, 594, 177, 942, 936, 532, 490, 240, 16, 209, 671, 933, 1019, 304, 509, 254, 317, 30, 427, 567, 1016, 1013, 609, 190, 44, 800, 524, 704, 612, 751, 666, 787, 852, 632, 898, 778, 193, 1007, 641, 295, 205, 796, 792, 726, 320, 96, 844, 817, 760, 390, 925, 450, 33, 660, 419, 833, 483, 730, 340, 713, 430, 348, 562, 445, 983, 995, 809, 538, 570, 289, 411, 139, 161, 37, 49, 274, 615, 151, 399, 886, 216, 625, 175, 488, 754, 647, 366, 459, 522, 664, 896, 639, 669, 507, 425, 607, 338, 560, 807, 409, 790, 842, 923, 417, 894, 423, 805, 921, 47, 397, 173, 364, 465, 282, 873, 735, 803, 171, 871, 869, 105, 837, 467, 65, 527, 379, 284, 1002, 910, 826, 875, 496, 707, 110, 737, 59, 331, 764, 529, 586, 196, 73, 381, 547, 657, 93, 286, 345, 1010, 301, 1004, 748, 168, 394, 107, 376, 644, 148, 839, 504, 267, 223, 469, 686, 298, 70, 67, 145, 700, 929, 709, 722, 855, 226, 112, 975, 372, 582, 739, 718, 635, 270, 61, 917, 864, 454, 912, 743, 901, 689, 828, 185, 357, 121, 877, 246, 781, 472, 498, 601, 85, 434, 198, 693, 323, 229, 75, 954, 678, 964, 383, 575, 99, 858, 549, 477, 262, 352, 333, 905, 847, 978, 766, 127, 593, 941, 531, 239, 820, 115, 588, 989, 29, 566, 1012, 189, 208, 932, 303, 253, 631, 777, 1006, 294, 799, 703, 750, 786, 389, 449, 659, 832, 795, 725, 95, 816, 994, 537, 288, 138, 729, 712, 347, 444, 624, 487, 646, 458, 36, 273, 150, 885, 559, 408, 841, 416, 663, 638, 506, 606, 281, 734, 170, 868, 422, 920, 396, 363, 825, 495, 109, 58, 836, 64, 378, 1001, 92, 344, 300, 747, 763, 585, 72, 546, 222, 685, 69, 144, 393, 375, 147, 503, 581, 717, 269, 916, 928, 721, 225, 974, 120, 245, 471, 600, 453, 742, 688, 184, 963, 574, 857, 476, 433, 692, 228, 953, 940, 238, 114, 988, 351, 904, 977, 126, 776, 293, 702, 785, 565, 188, 931, 252, 536, 137, 711, 443, 448, 831, 724, 815, 407, 415, 637, 605, 486, 457, 272, 884, 494, 57, 63, 1000, 733, 867, 919, 362, 684, 143, 374, 502, 343, 746, 584, 545, 244, 599, 741, 183, 716, 915, 720, 973, 237, 987, 903, 125, 573, 475, 691, 952, 136, 442, 830, 814, 292, 784, 187, 251, 56, 999, 866, 361, 414, 604, 456, 883, 598, 182, 914, 972, 142, 501, 745, 544, 441, 813, 783, 250, 986, 124, 474, 951, 181, 971, 500, 543, 998, 360, 603, 882, 970, 542, 359, 881, 812, 249, 123, 950, 946, 947, 879, 948, 541, 880, 248, 949}; uint16_t PQCLEAN_HQC192_CLEAN_gf_log(uint16_t elt); diff --git a/crypto_kem/hqc-256/avx2/fft.c b/crypto_kem/hqc-256/avx2/fft.c index 4a12768c..167ac286 100644 --- a/crypto_kem/hqc-256/avx2/fft.c +++ b/crypto_kem/hqc-256/avx2/fft.c @@ -19,6 +19,7 @@ static void compute_fft_betas(uint16_t *betas); static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size); static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); @@ -28,7 +29,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[out] betas Array of size PARAM_M-1 */ static void compute_fft_betas(uint16_t *betas) { - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + size_t i; + for (i = 0 ; i < PARAM_M - 1 ; ++i) { betas[i] = 1 << (PARAM_M - 1 - i); } } @@ -46,10 +48,11 @@ static void compute_fft_betas(uint16_t *betas) { * @param[in] set_size Size of the array set */ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) { + size_t i, j; subset_sums[0] = 0; - for (size_t i = 0 ; i < set_size ; ++i) { - for (size_t j = 0 ; j < (1U << i) ; ++j) { + for (i = 0 ; i < set_size ; ++i) { + for (j = 0 ; j < (1U << i) ; ++j) { subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; } } @@ -89,7 +92,7 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[2] = f[3] ^ f1[1] ^ f0[3]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 3: f0[0] = f[0]; @@ -100,51 +103,56 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[3] = f[7]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 2: f0[0] = f[0]; f0[1] = f[2] ^ f[3]; f1[0] = f[1] ^ f0[1]; f1[1] = f[3]; - return; + break; case 1: f0[0] = f[0]; f1[0] = f[1]; - return; + break; default: - ; - size_t n = 1 << (m_f - 2); - - uint16_t Q[2 * (1 << (PARAM_FFT - 2))]; - uint16_t R[2 * (1 << (PARAM_FFT - 2))]; - - uint16_t Q0[1 << (PARAM_FFT - 2)]; - uint16_t Q1[1 << (PARAM_FFT - 2)]; - uint16_t R0[1 << (PARAM_FFT - 2)]; - uint16_t R1[1 << (PARAM_FFT - 2)]; - - memcpy(Q, f + 3 * n, 2 * n); - memcpy(Q + n, f + 3 * n, 2 * n); - memcpy(R, f, 4 * n); - - for (size_t i = 0 ; i < n ; ++i) { - Q[i] ^= f[2 * n + i]; - R[n + i] ^= Q[i]; - } - - radix(Q0, Q1, Q, m_f - 1); - radix(R0, R1, R, m_f - 1); - - memcpy(f0, R0, 2 * n); - memcpy(f0 + n, Q0, 2 * n); - memcpy(f1, R1, 2 * n); - memcpy(f1 + n, Q1, 2 * n); + radix_big(f0, f1, f, m_f); + break; } } +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1 << (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0 ; i < n ; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + /** @@ -160,25 +168,27 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { * @param[in] betas FFT constants */ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { - uint16_t f0[1 << (PARAM_FFT - 2)]; - uint16_t f1[1 << (PARAM_FFT - 2)]; - uint16_t gammas[PARAM_M - 2]; - uint16_t deltas[PARAM_M - 2]; - size_t k = 1 << (m - 1); - uint16_t gammas_sums[1 << (PARAM_M - 2)]; + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; uint16_t u[1 << (PARAM_M - 2)] = {0}; uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; // Step 1 if (m_f == 1) { - uint16_t tmp[PARAM_M - (PARAM_FFT - 1)]; - for (size_t i = 0 ; i < m ; ++i) { + for (i = 0 ; i < m ; ++i) { tmp[i] = PQCLEAN_HQC256_AVX2_gf_mul(betas[i], f[1]); } w[0] = f[0]; - for (size_t j = 0 ; j < m ; ++j) { - for (size_t k = 0 ; k < (1U << j) ; ++k) { + for (j = 0 ; j < m ; ++j) { + for (k = 0 ; k < (1U << j) ; ++k) { w[(1 << j) + k] = w[k] ^ tmp[j]; } } @@ -188,8 +198,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 2: compute g if (betas[m - 1] != 1) { - uint16_t beta_m_pow = 1; - for (size_t i = 1 ; i < (1U << m_f) ; ++i) { + beta_m_pow = 1; + for (i = 1 ; i < (1U << m_f) ; ++i) { beta_m_pow = PQCLEAN_HQC256_AVX2_gf_mul(beta_m_pow, betas[m - 1]); f[i] = PQCLEAN_HQC256_AVX2_gf_mul(beta_m_pow, f[i]); } @@ -199,7 +209,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 radix(f0, f1, f, m_f); // Step 4: compute gammas and deltas - for (uint8_t i = 0 ; i < m - 1 ; ++i) { + for (i = 0 ; i + 1 < m ; ++i) { gammas[i] = PQCLEAN_HQC256_AVX2_gf_mul(betas[i], PQCLEAN_HQC256_AVX2_gf_inverse(betas[m - 1])); deltas[i] = PQCLEAN_HQC256_AVX2_gf_square(gammas[i]) ^ gammas[i]; } @@ -210,10 +220,11 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 5 fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + k = 1 << ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant w[0] = u[0]; w[k] = u[0] ^ f1[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC256_AVX2_gf_mul(gammas_sums[i], f1[0]); w[k + i] = w[i] ^ f1[0]; } @@ -224,7 +235,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 memcpy(w + k, v, 2 * k); w[0] = u[0]; w[k] ^= u[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC256_AVX2_gf_mul(gammas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -253,14 +264,15 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) */ void PQCLEAN_HQC256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { - uint16_t betas[PARAM_M - 1]; - uint16_t betas_sums[1 << (PARAM_M - 1)]; - uint16_t f0[1 << (PARAM_FFT - 1)]; - uint16_t f1[1 << (PARAM_FFT - 1)]; - uint16_t deltas[PARAM_M - 1]; - size_t k = 1 << (PARAM_M - 1); - uint16_t u[1 << (PARAM_M - 1)]; - uint16_t v[1 << (PARAM_M - 1)]; + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; // Follows Gao and Mateer algorithm compute_fft_betas(betas); @@ -276,7 +288,7 @@ void PQCLEAN_HQC256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { radix(f0, f1, f, PARAM_FFT); // Step 4: Compute deltas - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + for (i = 0 ; i < PARAM_M - 1 ; ++i) { deltas[i] = PQCLEAN_HQC256_AVX2_gf_square(betas[i]) ^ betas[i]; } @@ -284,6 +296,7 @@ void PQCLEAN_HQC256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + k = 1 << (PARAM_M - 1); // Step 6, 7 and error polynomial computation memcpy(w + k, v, 2 * k); @@ -294,7 +307,7 @@ void PQCLEAN_HQC256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { w[k] ^= u[0]; // Find other roots - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQC256_AVX2_gf_mul(betas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -309,25 +322,28 @@ void PQCLEAN_HQC256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { * @param[in] w Array of size 2^PARAM_M */ void PQCLEAN_HQC256_AVX2_fft_retrieve_bch_error_poly(uint64_t *error, const uint16_t *w) { - uint16_t gammas[PARAM_M - 1]; - uint16_t gammas_sums[1 << (PARAM_M - 1)]; - size_t k = 1 << (PARAM_M - 1); - size_t index = PARAM_GF_MUL_ORDER; + uint16_t gammas[PARAM_M - 1] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; + uint64_t bit; + size_t i, k, index; compute_fft_betas(gammas); compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); - error[0] ^= ((uint64_t) 1) ^ ((uint16_t) - w[0] >> 15); - uint64_t bit = ((uint64_t) 1) ^ ((uint16_t) - w[k] >> 15); + error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); + + k = 1 << (PARAM_M - 1); + index = PARAM_GF_MUL_ORDER; + bit = 1 ^ ((uint16_t) - w[k] >> 15); error[index / 8] ^= bit << (index % 64); - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC256_AVX2_gf_log(gammas_sums[i]); - bit = ((uint64_t) 1) ^ ((uint16_t) - w[i] >> 15); + bit = 1 ^ ((uint16_t) - w[i] >> 15); error[index / 64] ^= bit << (index % 64); index = PARAM_GF_MUL_ORDER - PQCLEAN_HQC256_AVX2_gf_log(gammas_sums[i] ^ 1); - bit = ((uint64_t) 1) ^ ((uint16_t) - w[k + i] >> 15); + bit = 1 ^ ((uint16_t) - w[k + i] >> 15); error[index / 64] ^= bit << (index % 64); } } diff --git a/crypto_kem/hqc-256/avx2/gf.c b/crypto_kem/hqc-256/avx2/gf.c index f112f62f..103ffff7 100644 --- a/crypto_kem/hqc-256/avx2/gf.c +++ b/crypto_kem/hqc-256/avx2/gf.c @@ -14,16 +14,6 @@ static uint16_t gf_quad(uint64_t a); -/** - * Logarithm of elements of GF(2^10) to the base alpha (root of x^10 + x^3 + 1). - * The logarithm of 0 is set to 1024 by convention. - */ -static const uint16_t log[1024] = { - 1024, 0, 1, 77, 2, 154, 78, 956, 3, 10, 155, 325, 79, 618, 957, 231, 4, 308, 11, 200, 156, 889, 326, 695, 80, 24, 619, 87, 958, 402, 232, 436, 5, 513, 309, 551, 12, 40, 201, 479, 157, 518, 890, 101, 327, 164, 696, 860, 81, 258, 25, 385, 620, 277, 88, 577, 959, 772, 403, 680, 233, 52, 437, 966, 6, 20, 514, 768, 310, 650, 552, 129, 13, 314, 41, 849, 202, 757, 480, 980, 158, 213, 519, 335, 891, 462, 102, 907, 328, 654, 165, 264, 697, 369, 861, 354, 82, 675, 259, 590, 26, 628, 386, 991, 621, 556, 278, 822, 89, 219, 578, 117, 960, 937, 773, 533, 404, 491, 681, 241, 234, 133, 53, 595, 438, 178, 967, 943, 7, 1020, 21, 305, 515, 510, 769, 255, 311, 17, 651, 210, 553, 672, 130, 934, 14, 1017, 315, 1014, 42, 610, 850, 191, 203, 318, 758, 31, 481, 428, 981, 568, 159, 613, 214, 752, 520, 667, 336, 788, 892, 45, 463, 801, 103, 525, 908, 705, 329, 194, 655, 1008, 166, 642, 265, 296, 698, 853, 370, 633, 862, 899, 355, 779, 83, 321, 676, 97, 260, 845, 591, 818, 27, 206, 629, 797, 387, 793, 992, 727, 622, 34, 557, 661, 279, 420, 823, 834, 90, 761, 220, 391, 579, 926, 118, 451, 961, 431, 938, 349, 774, 563, 534, 446, 405, 484, 492, 731, 682, 341, 242, 714, 235, 571, 134, 290, 54, 412, 596, 140, 439, 984, 179, 996, 968, 810, 944, 539, 8, 616, 1021, 152, 22, 400, 306, 887, 516, 162, 511, 38, 770, 50, 256, 275, 312, 755, 18, 648, 652, 367, 211, 460, 554, 217, 673, 626, 131, 176, 935, 489, 15, 670, 1018, 508, 316, 426, 1015, 608, 43, 523, 611, 665, 851, 897, 192, 640, 204, 791, 319, 843, 759, 924, 32, 418, 482, 339, 429, 561, 982, 808, 569, 410, 160, 48, 614, 398, 215, 174, 753, 365, 521, 895, 668, 424, 337, 806, 789, 922, 893, 804, 46, 172, 464, 872, 802, 870, 104, 466, 526, 283, 909, 874, 706, 736, 330, 528, 195, 380, 656, 285, 1009, 1003, 167, 106, 643, 838, 266, 468, 297, 66, 699, 708, 854, 111, 371, 738, 634, 60, 863, 911, 900, 827, 356, 876, 780, 497, 84, 197, 322, 74, 677, 382, 98, 548, 261, 332, 846, 765, 592, 530, 819, 587, 28, 1011, 207, 302, 630, 1005, 798, 749, 388, 658, 794, 94, 993, 287, 728, 346, 623, 645, 35, 149, 558, 840, 662, 505, 280, 169, 421, 395, 824, 108, 835, 377, 91, 299, 762, 71, 221, 68, 392, 146, 580, 268, 927, 224, 119, 470, 452, 687, 962, 856, 432, 227, 939, 113, 350, 976, 775, 701, 564, 930, 535, 710, 447, 723, 406, 636, 485, 271, 493, 62, 732, 918, 683, 373, 342, 583, 243, 740, 715, 719, 236, 902, 572, 690, 135, 829, 291, 186, 55, 865, 413, 455, 597, 913, 141, 744, 440, 782, 985, 473, 180, 499, 997, 602, 969, 358, 811, 122, 945, 878, 540, 247, 9, 324, 617, 230, 1022, 76, 153, 955, 23, 86, 401, 435, 307, 199, 888, 694, 517, 100, 163, 859, 512, 550, 39, 478, 771, 679, 51, 965, 257, 384, 276, 576, 313, 848, 756, 979, 19, 767, 649, 128, 653, 263, 368, 353, 212, 334, 461, 906, 555, 821, 218, 116, 674, 589, 627, 990, 132, 594, 177, 942, 936, 532, 490, 240, 16, 209, 671, 933, 1019, 304, 509, 254, 317, 30, 427, 567, 1016, 1013, 609, 190, 44, 800, 524, 704, 612, 751, 666, 787, 852, 632, 898, 778, 193, 1007, 641, 295, 205, 796, 792, 726, 320, 96, 844, 817, 760, 390, 925, 450, 33, 660, 419, 833, 483, 730, 340, 713, 430, 348, 562, 445, 983, 995, 809, 538, 570, 289, 411, 139, 161, 37, 49, 274, 615, 151, 399, 886, 216, 625, 175, 488, 754, 647, 366, 459, 522, 664, 896, 639, 669, 507, 425, 607, 338, 560, 807, 409, 790, 842, 923, 417, 894, 423, 805, 921, 47, 397, 173, 364, 465, 282, 873, 735, 803, 171, 871, 869, 105, 837, 467, 65, 527, 379, 284, 1002, 910, 826, 875, 496, 707, 110, 737, 59, 331, 764, 529, 586, 196, 73, 381, 547, 657, 93, 286, 345, 1010, 301, 1004, 748, 168, 394, 107, 376, 644, 148, 839, 504, 267, 223, 469, 686, 298, 70, 67, 145, 700, 929, 709, 722, 855, 226, 112, 975, 372, 582, 739, 718, 635, 270, 61, 917, 864, 454, 912, 743, 901, 689, 828, 185, 357, 121, 877, 246, 781, 472, 498, 601, 85, 434, 198, 693, 323, 229, 75, 954, 678, 964, 383, 575, 99, 858, 549, 477, 262, 352, 333, 905, 847, 978, 766, 127, 593, 941, 531, 239, 820, 115, 588, 989, 29, 566, 1012, 189, 208, 932, 303, 253, 631, 777, 1006, 294, 799, 703, 750, 786, 389, 449, 659, 832, 795, 725, 95, 816, 994, 537, 288, 138, 729, 712, 347, 444, 624, 487, 646, 458, 36, 273, 150, 885, 559, 408, 841, 416, 663, 638, 506, 606, 281, 734, 170, 868, 422, 920, 396, 363, 825, 495, 109, 58, 836, 64, 378, 1001, 92, 344, 300, 747, 763, 585, 72, 546, 222, 685, 69, 144, 393, 375, 147, 503, 581, 717, 269, 916, 928, 721, 225, 974, 120, 245, 471, 600, 453, 742, 688, 184, 963, 574, 857, 476, 433, 692, 228, 953, 940, 238, 114, 988, 351, 904, 977, 126, 776, 293, 702, 785, 565, 188, 931, 252, 536, 137, 711, 443, 448, 831, 724, 815, 407, 415, 637, 605, 486, 457, 272, 884, 494, 57, 63, 1000, 733, 867, 919, 362, 684, 143, 374, 502, 343, 746, 584, 545, 244, 599, 741, 183, 716, 915, 720, 973, 237, 987, 903, 125, 573, 475, 691, 952, 136, 442, 830, 814, 292, 784, 187, 251, 56, 999, 866, 361, 414, 604, 456, 883, 598, 182, 914, 972, 142, 501, 745, 544, 441, 813, 783, 250, 986, 124, 474, 951, 181, 971, 500, 543, 998, 360, 603, 882, 970, 542, 359, 881, 812, 249, 123, 950, 946, 947, 879, 948, 541, 880, 248, 949 -}; - - - /** * Returns the integer i such that elt = a^i * where a is the primitive element of GF(2^GF_M). @@ -41,30 +31,29 @@ uint16_t PQCLEAN_HQC256_AVX2_gf_log(uint16_t elt) { * @param[in] x Polynomial of degree less than 64 * @param[in] deg_x The degree of polynomial x */ -uint16_t gf_reduce(uint64_t x, size_t deg_x) { - // Compute the distance between the primitive polynomial first two set bits - size_t lz1 = __builtin_clz(PARAM_GF_POLY); - size_t lz2 = __builtin_clz(PARAM_GF_POLY ^ 1 << PARAM_M); - size_t dist = lz2 - lz1; +static uint16_t gf_reduce(uint64_t x, size_t deg_x) { + uint16_t z1, z2, rmdr, dist; + uint64_t mod; + size_t steps, i, j; // Deduce the number of steps of reduction - size_t steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), dist); + steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), PARAM_GF_POLY_M2); // Reduce - for (size_t i = 0; i < steps; ++i) { - uint64_t mod = x >> PARAM_M; + for (i = 0; i < steps; ++i) { + mod = x >> PARAM_M; x &= (1 << PARAM_M) - 1; x ^= mod; - size_t tz1 = 0; - uint16_t rmdr = PARAM_GF_POLY ^ 1; - for (size_t j = __builtin_popcount(PARAM_GF_POLY) - 2; j; --j) { - size_t tz2 = __builtin_ctz(rmdr); - size_t shift = tz2 - tz1; - mod <<= shift; + z1 = 0; + rmdr = PARAM_GF_POLY ^ 1; + for (j = PARAM_GF_POLY_WT - 2; j; --j) { + z2 = __tzcnt_u16(rmdr); + dist = (uint16_t) (z2 - z1); + mod <<= dist; x ^= mod; - rmdr ^= 1 << tz2; - tz1 = tz2; + rmdr ^= 1 << z2; + z1 = z2; } } @@ -113,7 +102,7 @@ uint16_t PQCLEAN_HQC256_AVX2_gf_square(uint16_t a) { * @returns a^4 * @param[in] a Element of GF(2^GF_M) */ -uint16_t gf_quad(uint64_t a) { +static uint16_t gf_quad(uint64_t a) { uint64_t q = a & 1; for (size_t i = 1; i < PARAM_M; ++i) { a <<= 3; @@ -158,10 +147,10 @@ uint16_t PQCLEAN_HQC256_AVX2_gf_inverse(uint16_t a) { * @param[in] i The integer whose modulo is taken */ uint16_t PQCLEAN_HQC256_AVX2_gf_mod(uint16_t i) { - uint16_t tmp = i - PARAM_GF_MUL_ORDER; + uint16_t tmp = (uint16_t) (i - PARAM_GF_MUL_ORDER); // mask = 0xffff if (i < GF_MUL_ORDER) - int16_t mask = -(tmp >> 15); + uint16_t mask = -(tmp >> 15); return tmp + (mask & PARAM_GF_MUL_ORDER); } diff --git a/crypto_kem/hqc-256/avx2/gf.h b/crypto_kem/hqc-256/avx2/gf.h index c2dbc118..06aed696 100644 --- a/crypto_kem/hqc-256/avx2/gf.h +++ b/crypto_kem/hqc-256/avx2/gf.h @@ -12,8 +12,13 @@ #include #include -void PQCLEAN_HQC256_AVX2_gf_generate(uint16_t *exp, uint16_t *log, int16_t m); - +/** + * Logarithm of elements of GF(2^10) to the base alpha (root of x^10 + x^3 + 1). + * The logarithm of 0 is set to 1024 by convention. + */ +static const uint16_t log[1024] = { + 1024, 0, 1, 77, 2, 154, 78, 956, 3, 10, 155, 325, 79, 618, 957, 231, 4, 308, 11, 200, 156, 889, 326, 695, 80, 24, 619, 87, 958, 402, 232, 436, 5, 513, 309, 551, 12, 40, 201, 479, 157, 518, 890, 101, 327, 164, 696, 860, 81, 258, 25, 385, 620, 277, 88, 577, 959, 772, 403, 680, 233, 52, 437, 966, 6, 20, 514, 768, 310, 650, 552, 129, 13, 314, 41, 849, 202, 757, 480, 980, 158, 213, 519, 335, 891, 462, 102, 907, 328, 654, 165, 264, 697, 369, 861, 354, 82, 675, 259, 590, 26, 628, 386, 991, 621, 556, 278, 822, 89, 219, 578, 117, 960, 937, 773, 533, 404, 491, 681, 241, 234, 133, 53, 595, 438, 178, 967, 943, 7, 1020, 21, 305, 515, 510, 769, 255, 311, 17, 651, 210, 553, 672, 130, 934, 14, 1017, 315, 1014, 42, 610, 850, 191, 203, 318, 758, 31, 481, 428, 981, 568, 159, 613, 214, 752, 520, 667, 336, 788, 892, 45, 463, 801, 103, 525, 908, 705, 329, 194, 655, 1008, 166, 642, 265, 296, 698, 853, 370, 633, 862, 899, 355, 779, 83, 321, 676, 97, 260, 845, 591, 818, 27, 206, 629, 797, 387, 793, 992, 727, 622, 34, 557, 661, 279, 420, 823, 834, 90, 761, 220, 391, 579, 926, 118, 451, 961, 431, 938, 349, 774, 563, 534, 446, 405, 484, 492, 731, 682, 341, 242, 714, 235, 571, 134, 290, 54, 412, 596, 140, 439, 984, 179, 996, 968, 810, 944, 539, 8, 616, 1021, 152, 22, 400, 306, 887, 516, 162, 511, 38, 770, 50, 256, 275, 312, 755, 18, 648, 652, 367, 211, 460, 554, 217, 673, 626, 131, 176, 935, 489, 15, 670, 1018, 508, 316, 426, 1015, 608, 43, 523, 611, 665, 851, 897, 192, 640, 204, 791, 319, 843, 759, 924, 32, 418, 482, 339, 429, 561, 982, 808, 569, 410, 160, 48, 614, 398, 215, 174, 753, 365, 521, 895, 668, 424, 337, 806, 789, 922, 893, 804, 46, 172, 464, 872, 802, 870, 104, 466, 526, 283, 909, 874, 706, 736, 330, 528, 195, 380, 656, 285, 1009, 1003, 167, 106, 643, 838, 266, 468, 297, 66, 699, 708, 854, 111, 371, 738, 634, 60, 863, 911, 900, 827, 356, 876, 780, 497, 84, 197, 322, 74, 677, 382, 98, 548, 261, 332, 846, 765, 592, 530, 819, 587, 28, 1011, 207, 302, 630, 1005, 798, 749, 388, 658, 794, 94, 993, 287, 728, 346, 623, 645, 35, 149, 558, 840, 662, 505, 280, 169, 421, 395, 824, 108, 835, 377, 91, 299, 762, 71, 221, 68, 392, 146, 580, 268, 927, 224, 119, 470, 452, 687, 962, 856, 432, 227, 939, 113, 350, 976, 775, 701, 564, 930, 535, 710, 447, 723, 406, 636, 485, 271, 493, 62, 732, 918, 683, 373, 342, 583, 243, 740, 715, 719, 236, 902, 572, 690, 135, 829, 291, 186, 55, 865, 413, 455, 597, 913, 141, 744, 440, 782, 985, 473, 180, 499, 997, 602, 969, 358, 811, 122, 945, 878, 540, 247, 9, 324, 617, 230, 1022, 76, 153, 955, 23, 86, 401, 435, 307, 199, 888, 694, 517, 100, 163, 859, 512, 550, 39, 478, 771, 679, 51, 965, 257, 384, 276, 576, 313, 848, 756, 979, 19, 767, 649, 128, 653, 263, 368, 353, 212, 334, 461, 906, 555, 821, 218, 116, 674, 589, 627, 990, 132, 594, 177, 942, 936, 532, 490, 240, 16, 209, 671, 933, 1019, 304, 509, 254, 317, 30, 427, 567, 1016, 1013, 609, 190, 44, 800, 524, 704, 612, 751, 666, 787, 852, 632, 898, 778, 193, 1007, 641, 295, 205, 796, 792, 726, 320, 96, 844, 817, 760, 390, 925, 450, 33, 660, 419, 833, 483, 730, 340, 713, 430, 348, 562, 445, 983, 995, 809, 538, 570, 289, 411, 139, 161, 37, 49, 274, 615, 151, 399, 886, 216, 625, 175, 488, 754, 647, 366, 459, 522, 664, 896, 639, 669, 507, 425, 607, 338, 560, 807, 409, 790, 842, 923, 417, 894, 423, 805, 921, 47, 397, 173, 364, 465, 282, 873, 735, 803, 171, 871, 869, 105, 837, 467, 65, 527, 379, 284, 1002, 910, 826, 875, 496, 707, 110, 737, 59, 331, 764, 529, 586, 196, 73, 381, 547, 657, 93, 286, 345, 1010, 301, 1004, 748, 168, 394, 107, 376, 644, 148, 839, 504, 267, 223, 469, 686, 298, 70, 67, 145, 700, 929, 709, 722, 855, 226, 112, 975, 372, 582, 739, 718, 635, 270, 61, 917, 864, 454, 912, 743, 901, 689, 828, 185, 357, 121, 877, 246, 781, 472, 498, 601, 85, 434, 198, 693, 323, 229, 75, 954, 678, 964, 383, 575, 99, 858, 549, 477, 262, 352, 333, 905, 847, 978, 766, 127, 593, 941, 531, 239, 820, 115, 588, 989, 29, 566, 1012, 189, 208, 932, 303, 253, 631, 777, 1006, 294, 799, 703, 750, 786, 389, 449, 659, 832, 795, 725, 95, 816, 994, 537, 288, 138, 729, 712, 347, 444, 624, 487, 646, 458, 36, 273, 150, 885, 559, 408, 841, 416, 663, 638, 506, 606, 281, 734, 170, 868, 422, 920, 396, 363, 825, 495, 109, 58, 836, 64, 378, 1001, 92, 344, 300, 747, 763, 585, 72, 546, 222, 685, 69, 144, 393, 375, 147, 503, 581, 717, 269, 916, 928, 721, 225, 974, 120, 245, 471, 600, 453, 742, 688, 184, 963, 574, 857, 476, 433, 692, 228, 953, 940, 238, 114, 988, 351, 904, 977, 126, 776, 293, 702, 785, 565, 188, 931, 252, 536, 137, 711, 443, 448, 831, 724, 815, 407, 415, 637, 605, 486, 457, 272, 884, 494, 57, 63, 1000, 733, 867, 919, 362, 684, 143, 374, 502, 343, 746, 584, 545, 244, 599, 741, 183, 716, 915, 720, 973, 237, 987, 903, 125, 573, 475, 691, 952, 136, 442, 830, 814, 292, 784, 187, 251, 56, 999, 866, 361, 414, 604, 456, 883, 598, 182, 914, 972, 142, 501, 745, 544, 441, 813, 783, 250, 986, 124, 474, 951, 181, 971, 500, 543, 998, 360, 603, 882, 970, 542, 359, 881, 812, 249, 123, 950, 946, 947, 879, 948, 541, 880, 248, 949 +}; uint16_t PQCLEAN_HQC256_AVX2_gf_log(uint16_t elt); diff --git a/crypto_kem/hqc-256/avx2/gf2x.c b/crypto_kem/hqc-256/avx2/gf2x.c index 2332b606..4c117552 100644 --- a/crypto_kem/hqc-256/avx2/gf2x.c +++ b/crypto_kem/hqc-256/avx2/gf2x.c @@ -335,9 +335,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { static __m256i W0[2 * (T_TM3_3W_256)], W1[2 * (T_TM3_3W_256)], W2[2 * (T_TM3_3W_256)], W3[2 * (T_TM3_3W_256)], W4[2 * (T_TM3_3W_256)]; static __m256i tmp[2 * (T_TM3_3W_256)]; static __m256i ro256[6 * (T_TM3_3W_256)]; - const __m256i zero = (__m256i) { - 0ul, 0ul, 0ul, 0ul - }; + const __m256i zero = _mm256_setzero_si256(); int32_t T2 = T_TM3_3W_64 << 1; for (int32_t i = 0 ; i < T_TM3_3W_256 - 1 ; i++) { @@ -354,24 +352,12 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { for (int32_t i = T_TM3_3W_256 - 1 ; i < T_TM3_3W_256 ; i++) { int32_t i4 = i << 2; int32_t i41 = i4 + 1; - U0[i] = (__m256i) { - A[i4], A[i41], 0x0ul, 0x0ul - }; - V0[i] = (__m256i) { - B[i4], B[i41], 0x0ul, 0x0ul - }; - U1[i] = (__m256i) { - A[i4 + T_TM3_3W_64 - 2], A[i41 + T_TM3_3W_64 - 2], 0x0ul, 0x0ul - }; - V1[i] = (__m256i) { - B[i4 + T_TM3_3W_64 - 2], B[i41 + T_TM3_3W_64 - 2], 0x0ul, 0x0ul - }; - U2[i] = (__m256i) { - A[i4 - 4 + T2], A[i4 - 3 + T2], 0x0ul, 0x0ul - }; - V2[i] = (__m256i) { - B[i4 - 4 + T2], B[i4 - 3 + T2], 0x0ul, 0x0ul - }; + U0[i] = _mm256_set_epi64x(0, 0, A[i41], A[i4]); + V0[i] = _mm256_set_epi64x(0, 0, B[i41], B[i4]); + U1[i] = _mm256_set_epi64x(0, 0, A[i41 + T_TM3_3W_64 - 2], A[i4 + T_TM3_3W_64 - 2]); + V1[i] = _mm256_set_epi64x(0, 0, B[i41 + T_TM3_3W_64 - 2], B[i4 + T_TM3_3W_64 - 2]); + U2[i] = _mm256_set_epi64x(0, 0, A[i4 - 3 + T2], A[i4 - 4 + T2]); + V2[i] = _mm256_set_epi64x(0, 0, B[i4 - 3 + T2], B[i4 - 4 + T2]); } // Evaluation phase : x= X^64 @@ -459,9 +445,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { //W2 =(W2 + W3 + W4*(x^3+1))/(x+1) U1_64 = ((int64_t *) W4); __m256i *U1_256 = (__m256i *) (U1_64 + 1); - tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ (__m256i) { - 0x0ul, 0x0ul, 0x0ul, U1_64[0] - }; + tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ _mm256_set_epi64x(U1_64[0], 0, 0, 0); for (int32_t i = 1 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { tmp[i] = W2[i] ^ W3[i] ^ W4[i] ^ _mm256_lddqu_si256(&U1_256[i - 1]); @@ -555,9 +539,7 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { __m256i W0[2 * (T_TM3R_3W_256 + 2)], W1[2 * (T_TM3R_3W_256 + 2)], W2[2 * (T_TM3R_3W_256 + 2)], W3[2 * (T_TM3R_3W_256 + 2)], W4[2 * (T_TM3R_3W_256 + 2)]; __m256i tmp[2 * (T_TM3R_3W_256 + 2) + 3]; __m256i ro256[tTM3R / 2]; - const __m256i zero = (__m256i) { - 0ul, 0ul, 0ul, 0ul - }; + const __m256i zero = _mm256_setzero_si256(); int32_t T2 = T_TM3R_3W_64 << 1; for (int32_t i = 0 ; i < T_TM3R_3W_256 ; i++) { diff --git a/crypto_kem/hqc-256/clean/gf.c b/crypto_kem/hqc-256/clean/gf.c index 41f630a7..027ac6b3 100644 --- a/crypto_kem/hqc-256/clean/gf.c +++ b/crypto_kem/hqc-256/clean/gf.c @@ -7,61 +7,6 @@ */ - -/** - * Powers of the root alpha of x^10 + x^3 + 1. - * The last two elements are needed by the PQCLEAN_HQC256_CLEAN_gf_mul function from gf_mul.c - * (for example if both elements to multiply are zero). - */ -static const uint16_t exp[1026] = { - 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 9, 18, 36, 72, 144, 288, 576, 137, 274, 548, 65, 130, 260, 520, 25, 50, 100, 200, 400, 800, 585, 155, 310, 620, 209, 418, 836, 641, 267, 534, 37, 74, 148, 296, 592, 169, 338, 676, 321, 642, 269, 538, 61, 122, 244, 488, 976, 937, 859, 703, 375, 750, 469, 938, 861, 691, 367, 734, 437, 874, 733, 435, 870, 709, 387, 774, 517, 3, 6, 12, 24, 48, 96, 192, 384, 768, 521, 27, 54, 108, 216, 432, 864, 713, 411, 822, 613, 195, 390, 780, 529, 43, 86, 172, 344, 688, 361, 722, 429, 858, 701, 371, 742, 453, 906, 797, 563, 111, 222, 444, 888, 761, 507, 1014, 997, 963, 911, 791, 551, 71, 142, 284, 568, 121, 242, 484, 968, 921, 827, 639, 247, 494, 988, 945, 875, 735, 439, 878, 725, 419, 838, 645, 259, 518, 5, 10, 20, 40, 80, 160, 320, 640, 265, 530, 45, 90, 180, 360, 720, 425, 850, 685, 339, 678, 325, 650, 285, 570, 125, 250, 500, 1000, 985, 955, 895, 759, 487, 974, 917, 803, 591, 151, 302, 604, 177, 354, 708, 385, 770, 525, 19, 38, 76, 152, 304, 608, 201, 402, 804, 577, 139, 278, 556, 81, 162, 324, 648, 281, 562, 109, 218, 436, 872, 729, 443, 886, 741, 451, 902, 773, 515, 15, 30, 60, 120, 240, 480, 960, 905, 795, 575, 119, 238, 476, 952, 889, 763, 511, 1022, 1013, 995, 975, 919, 807, 583, 135, 270, 540, 49, 98, 196, 392, 784, 553, 91, 182, 364, 728, 441, 882, 749, 467, 934, 837, 643, 271, 542, 53, 106, 212, 424, 848, 681, 347, 694, 357, 714, 413, 826, 637, 243, 486, 972, 913, 811, 607, 183, 366, 732, 433, 866, 717, 403, 806, 581, 131, 262, 524, 17, 34, 68, 136, 272, 544, 73, 146, 292, 584, 153, 306, 612, 193, 386, 772, 513, 11, 22, 44, 88, 176, 352, 704, 393, 786, 557, 83, 166, 332, 664, 313, 626, 237, 474, 948, 865, 715, 415, 830, 629, 227, 454, 908, 785, 555, 95, 190, 380, 760, 505, 1010, 1005, 979, 943, 855, 679, 327, 654, 277, 554, 93, 186, 372, 744, 473, 946, 877, 723, 431, 862, 693, 355, 710, 389, 778, 541, 51, 102, 204, 408, 816, 617, 219, 438, 876, 721, 427, 854, 677, 323, 646, 261, 522, 29, 58, 116, 232, 464, 928, 841, 667, 319, 638, 245, 490, 980, 929, 843, 671, 311, 622, 213, 426, 852, 673, 331, 662, 293, 586, 157, 314, 628, 225, 450, 900, 769, 523, 31, 62, 124, 248, 496, 992, 969, 923, 831, 631, 231, 462, 924, 817, 619, 223, 446, 892, 753, 491, 982, 933, 835, 655, 279, 558, 85, 170, 340, 680, 345, 690, 365, 730, 445, 890, 765, 499, 998, 965, 899, 783, 535, 39, 78, 156, 312, 624, 233, 466, 932, 833, 651, 287, 574, 117, 234, 468, 936, 857, 699, 383, 766, 501, 1002, 989, 947, 879, 727, 423, 846, 661, 291, 582, 133, 266, 532, 33, 66, 132, 264, 528, 41, 82, 164, 328, 656, 297, 594, 173, 346, 692, 353, 706, 397, 794, 573, 115, 230, 460, 920, 825, 635, 255, 510, 1020, 1009, 1003, 991, 951, 871, 711, 391, 782, 533, 35, 70, 140, 280, 560, 105, 210, 420, 840, 665, 315, 630, 229, 458, 916, 801, 587, 159, 318, 636, 241, 482, 964, 897, 779, 543, 55, 110, 220, 440, 880, 745, 475, 950, 869, 707, 399, 798, 565, 99, 198, 396, 792, 569, 123, 246, 492, 984, 953, 891, 767, 503, 1006, 981, 931, 847, 663, 295, 590, 149, 298, 596, 161, 322, 644, 257, 514, 13, 26, 52, 104, 208, 416, 832, 649, 283, 566, 101, 202, 404, 808, 601, 187, 374, 748, 465, 930, 845, 659, 303, 606, 181, 362, 724, 417, 834, 653, 275, 550, 69, 138, 276, 552, 89, 178, 356, 712, 409, 818, 621, 211, 422, 844, 657, 299, 598, 165, 330, 660, 289, 578, 141, 282, 564, 97, 194, 388, 776, 537, 59, 118, 236, 472, 944, 873, 731, 447, 894, 757, 483, 966, 901, 771, 527, 23, 46, 92, 184, 368, 736, 457, 914, 813, 595, 175, 350, 700, 369, 738, 461, 922, 829, 627, 239, 478, 956, 881, 747, 479, 958, 885, 739, 463, 926, 821, 611, 207, 414, 828, 625, 235, 470, 940, 849, 683, 351, 702, 373, 746, 477, 954, 893, 755, 495, 990, 949, 867, 719, 407, 814, 597, 163, 326, 652, 273, 546, 77, 154, 308, 616, 217, 434, 868, 705, 395, 790, 549, 67, 134, 268, 536, 57, 114, 228, 456, 912, 809, 603, 191, 382, 764, 497, 994, 973, 915, 815, 599, 167, 334, 668, 305, 610, 205, 410, 820, 609, 203, 406, 812, 593, 171, 342, 684, 337, 674, 333, 666, 317, 634, 253, 506, 1012, 993, 971, 927, 823, 615, 199, 398, 796, 561, 107, 214, 428, 856, 697, 379, 758, 485, 970, 925, 819, 623, 215, 430, 860, 689, 363, 726, 421, 842, 669, 307, 614, 197, 394, 788, 545, 75, 150, 300, 600, 185, 370, 740, 449, 898, 781, 531, 47, 94, 188, 376, 752, 489, 978, 941, 851, 687, 343, 686, 341, 682, 349, 698, 381, 762, 509, 1018, 1021, 1011, 1007, 983, 935, 839, 647, 263, 526, 21, 42, 84, 168, 336, 672, 329, 658, 301, 602, 189, 378, 756, 481, 962, 909, 787, 559, 87, 174, 348, 696, 377, 754, 493, 986, 957, 883, 751, 471, 942, 853, 675, 335, 670, 309, 618, 221, 442, 884, 737, 459, 918, 805, 579, 143, 286, 572, 113, 226, 452, 904, 793, 571, 127, 254, 508, 1016, 1017, 1019, 1023, 1015, 999, 967, 903, 775, 519, 7, 14, 28, 56, 112, 224, 448, 896, 777, 539, 63, 126, 252, 504, 1008, 1001, 987, 959, 887, 743, 455, 910, 789, 547, 79, 158, 316, 632, 249, 498, 996, 961, 907, 799, 567, 103, 206, 412, 824, 633, 251, 502, 1004, 977, 939, 863, 695, 359, 718, 405, 810, 605, 179, 358, 716, 401, 802, 589, 147, 294, 588, 145, 290, 580, 129, 258, 516, 1, 2, 4 -}; - - - -/** - * Logarithm of elements of GF(2^10) to the base alpha (root of x^10 + x^3 + 1). - * The logarithm of 0 is set to 1024 by convention. - */ -static const uint16_t log[1024] = { - 1024, 0, 1, 77, 2, 154, 78, 956, 3, 10, 155, 325, 79, 618, 957, 231, 4, 308, 11, 200, 156, 889, 326, 695, 80, 24, 619, 87, 958, 402, 232, 436, 5, 513, 309, 551, 12, 40, 201, 479, 157, 518, 890, 101, 327, 164, 696, 860, 81, 258, 25, 385, 620, 277, 88, 577, 959, 772, 403, 680, 233, 52, 437, 966, 6, 20, 514, 768, 310, 650, 552, 129, 13, 314, 41, 849, 202, 757, 480, 980, 158, 213, 519, 335, 891, 462, 102, 907, 328, 654, 165, 264, 697, 369, 861, 354, 82, 675, 259, 590, 26, 628, 386, 991, 621, 556, 278, 822, 89, 219, 578, 117, 960, 937, 773, 533, 404, 491, 681, 241, 234, 133, 53, 595, 438, 178, 967, 943, 7, 1020, 21, 305, 515, 510, 769, 255, 311, 17, 651, 210, 553, 672, 130, 934, 14, 1017, 315, 1014, 42, 610, 850, 191, 203, 318, 758, 31, 481, 428, 981, 568, 159, 613, 214, 752, 520, 667, 336, 788, 892, 45, 463, 801, 103, 525, 908, 705, 329, 194, 655, 1008, 166, 642, 265, 296, 698, 853, 370, 633, 862, 899, 355, 779, 83, 321, 676, 97, 260, 845, 591, 818, 27, 206, 629, 797, 387, 793, 992, 727, 622, 34, 557, 661, 279, 420, 823, 834, 90, 761, 220, 391, 579, 926, 118, 451, 961, 431, 938, 349, 774, 563, 534, 446, 405, 484, 492, 731, 682, 341, 242, 714, 235, 571, 134, 290, 54, 412, 596, 140, 439, 984, 179, 996, 968, 810, 944, 539, 8, 616, 1021, 152, 22, 400, 306, 887, 516, 162, 511, 38, 770, 50, 256, 275, 312, 755, 18, 648, 652, 367, 211, 460, 554, 217, 673, 626, 131, 176, 935, 489, 15, 670, 1018, 508, 316, 426, 1015, 608, 43, 523, 611, 665, 851, 897, 192, 640, 204, 791, 319, 843, 759, 924, 32, 418, 482, 339, 429, 561, 982, 808, 569, 410, 160, 48, 614, 398, 215, 174, 753, 365, 521, 895, 668, 424, 337, 806, 789, 922, 893, 804, 46, 172, 464, 872, 802, 870, 104, 466, 526, 283, 909, 874, 706, 736, 330, 528, 195, 380, 656, 285, 1009, 1003, 167, 106, 643, 838, 266, 468, 297, 66, 699, 708, 854, 111, 371, 738, 634, 60, 863, 911, 900, 827, 356, 876, 780, 497, 84, 197, 322, 74, 677, 382, 98, 548, 261, 332, 846, 765, 592, 530, 819, 587, 28, 1011, 207, 302, 630, 1005, 798, 749, 388, 658, 794, 94, 993, 287, 728, 346, 623, 645, 35, 149, 558, 840, 662, 505, 280, 169, 421, 395, 824, 108, 835, 377, 91, 299, 762, 71, 221, 68, 392, 146, 580, 268, 927, 224, 119, 470, 452, 687, 962, 856, 432, 227, 939, 113, 350, 976, 775, 701, 564, 930, 535, 710, 447, 723, 406, 636, 485, 271, 493, 62, 732, 918, 683, 373, 342, 583, 243, 740, 715, 719, 236, 902, 572, 690, 135, 829, 291, 186, 55, 865, 413, 455, 597, 913, 141, 744, 440, 782, 985, 473, 180, 499, 997, 602, 969, 358, 811, 122, 945, 878, 540, 247, 9, 324, 617, 230, 1022, 76, 153, 955, 23, 86, 401, 435, 307, 199, 888, 694, 517, 100, 163, 859, 512, 550, 39, 478, 771, 679, 51, 965, 257, 384, 276, 576, 313, 848, 756, 979, 19, 767, 649, 128, 653, 263, 368, 353, 212, 334, 461, 906, 555, 821, 218, 116, 674, 589, 627, 990, 132, 594, 177, 942, 936, 532, 490, 240, 16, 209, 671, 933, 1019, 304, 509, 254, 317, 30, 427, 567, 1016, 1013, 609, 190, 44, 800, 524, 704, 612, 751, 666, 787, 852, 632, 898, 778, 193, 1007, 641, 295, 205, 796, 792, 726, 320, 96, 844, 817, 760, 390, 925, 450, 33, 660, 419, 833, 483, 730, 340, 713, 430, 348, 562, 445, 983, 995, 809, 538, 570, 289, 411, 139, 161, 37, 49, 274, 615, 151, 399, 886, 216, 625, 175, 488, 754, 647, 366, 459, 522, 664, 896, 639, 669, 507, 425, 607, 338, 560, 807, 409, 790, 842, 923, 417, 894, 423, 805, 921, 47, 397, 173, 364, 465, 282, 873, 735, 803, 171, 871, 869, 105, 837, 467, 65, 527, 379, 284, 1002, 910, 826, 875, 496, 707, 110, 737, 59, 331, 764, 529, 586, 196, 73, 381, 547, 657, 93, 286, 345, 1010, 301, 1004, 748, 168, 394, 107, 376, 644, 148, 839, 504, 267, 223, 469, 686, 298, 70, 67, 145, 700, 929, 709, 722, 855, 226, 112, 975, 372, 582, 739, 718, 635, 270, 61, 917, 864, 454, 912, 743, 901, 689, 828, 185, 357, 121, 877, 246, 781, 472, 498, 601, 85, 434, 198, 693, 323, 229, 75, 954, 678, 964, 383, 575, 99, 858, 549, 477, 262, 352, 333, 905, 847, 978, 766, 127, 593, 941, 531, 239, 820, 115, 588, 989, 29, 566, 1012, 189, 208, 932, 303, 253, 631, 777, 1006, 294, 799, 703, 750, 786, 389, 449, 659, 832, 795, 725, 95, 816, 994, 537, 288, 138, 729, 712, 347, 444, 624, 487, 646, 458, 36, 273, 150, 885, 559, 408, 841, 416, 663, 638, 506, 606, 281, 734, 170, 868, 422, 920, 396, 363, 825, 495, 109, 58, 836, 64, 378, 1001, 92, 344, 300, 747, 763, 585, 72, 546, 222, 685, 69, 144, 393, 375, 147, 503, 581, 717, 269, 916, 928, 721, 225, 974, 120, 245, 471, 600, 453, 742, 688, 184, 963, 574, 857, 476, 433, 692, 228, 953, 940, 238, 114, 988, 351, 904, 977, 126, 776, 293, 702, 785, 565, 188, 931, 252, 536, 137, 711, 443, 448, 831, 724, 815, 407, 415, 637, 605, 486, 457, 272, 884, 494, 57, 63, 1000, 733, 867, 919, 362, 684, 143, 374, 502, 343, 746, 584, 545, 244, 599, 741, 183, 716, 915, 720, 973, 237, 987, 903, 125, 573, 475, 691, 952, 136, 442, 830, 814, 292, 784, 187, 251, 56, 999, 866, 361, 414, 604, 456, 883, 598, 182, 914, 972, 142, 501, 745, 544, 441, 813, 783, 250, 986, 124, 474, 951, 181, 971, 500, 543, 998, 360, 603, 882, 970, 542, 359, 881, 812, 249, 123, 950, 946, 947, 879, 948, 541, 880, 248, 949 -}; - - - -/** - * @brief Generates exp and log lookup tables of GF(2^m). - * - * The logarithm of 0 is defined as 2^PARAM_M by convention.
- * The last two elements of the exp table are needed by the PQCLEAN_HQC256_CLEAN_gf_mul function from gf_lutmul.c - * (for example if both elements to multiply are zero). - * @param[out] exp Array of size 2^PARAM_M + 2 receiving the powers of the primitive element - * @param[out] log Array of size 2^PARAM_M receiving the logarithms of the elements of GF(2^m) - * @param[in] m Parameter of Galois field GF(2^m) - */ -void PQCLEAN_HQC256_CLEAN_gf_generate(uint16_t *exp, uint16_t *log, int16_t m) { - uint16_t elt = 1; - uint16_t alpha = 2; // primitive element of GF(2^PARAM_M) - uint16_t gf_poly = PARAM_GF_POLY; - - for (size_t i = 0 ; i < (1U << m) - 1 ; ++i) { - exp[i] = elt; - log[elt] = i; - - elt *= alpha; - if (elt >= 1 << m) { - elt ^= gf_poly; - } - } - - exp[(1 << m) - 1] = 1; - exp[1 << m] = 2; - exp[(1 << m) + 1] = 4; - log[0] = 1 << m; // by convention -} - - - /** * @brief Returns the integer i such that elt = a^i where a is the primitive element of GF(2^PARAM_M). * diff --git a/crypto_kem/hqc-256/clean/gf.h b/crypto_kem/hqc-256/clean/gf.h index f539652d..86051d1b 100644 --- a/crypto_kem/hqc-256/clean/gf.h +++ b/crypto_kem/hqc-256/clean/gf.h @@ -12,7 +12,20 @@ #include #include -void PQCLEAN_HQC256_CLEAN_gf_generate(uint16_t *exp, uint16_t *log, int16_t m); + +/** + * Powers of the root alpha of x^10 + x^3 + 1. + * The last two elements are needed by the PQCLEAN_HQC256_CLEAN_gf_mul function from gf_mul.c + * (for example if both elements to multiply are zero). + */ +static const uint16_t exp[1026] = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 9, 18, 36, 72, 144, 288, 576, 137, 274, 548, 65, 130, 260, 520, 25, 50, 100, 200, 400, 800, 585, 155, 310, 620, 209, 418, 836, 641, 267, 534, 37, 74, 148, 296, 592, 169, 338, 676, 321, 642, 269, 538, 61, 122, 244, 488, 976, 937, 859, 703, 375, 750, 469, 938, 861, 691, 367, 734, 437, 874, 733, 435, 870, 709, 387, 774, 517, 3, 6, 12, 24, 48, 96, 192, 384, 768, 521, 27, 54, 108, 216, 432, 864, 713, 411, 822, 613, 195, 390, 780, 529, 43, 86, 172, 344, 688, 361, 722, 429, 858, 701, 371, 742, 453, 906, 797, 563, 111, 222, 444, 888, 761, 507, 1014, 997, 963, 911, 791, 551, 71, 142, 284, 568, 121, 242, 484, 968, 921, 827, 639, 247, 494, 988, 945, 875, 735, 439, 878, 725, 419, 838, 645, 259, 518, 5, 10, 20, 40, 80, 160, 320, 640, 265, 530, 45, 90, 180, 360, 720, 425, 850, 685, 339, 678, 325, 650, 285, 570, 125, 250, 500, 1000, 985, 955, 895, 759, 487, 974, 917, 803, 591, 151, 302, 604, 177, 354, 708, 385, 770, 525, 19, 38, 76, 152, 304, 608, 201, 402, 804, 577, 139, 278, 556, 81, 162, 324, 648, 281, 562, 109, 218, 436, 872, 729, 443, 886, 741, 451, 902, 773, 515, 15, 30, 60, 120, 240, 480, 960, 905, 795, 575, 119, 238, 476, 952, 889, 763, 511, 1022, 1013, 995, 975, 919, 807, 583, 135, 270, 540, 49, 98, 196, 392, 784, 553, 91, 182, 364, 728, 441, 882, 749, 467, 934, 837, 643, 271, 542, 53, 106, 212, 424, 848, 681, 347, 694, 357, 714, 413, 826, 637, 243, 486, 972, 913, 811, 607, 183, 366, 732, 433, 866, 717, 403, 806, 581, 131, 262, 524, 17, 34, 68, 136, 272, 544, 73, 146, 292, 584, 153, 306, 612, 193, 386, 772, 513, 11, 22, 44, 88, 176, 352, 704, 393, 786, 557, 83, 166, 332, 664, 313, 626, 237, 474, 948, 865, 715, 415, 830, 629, 227, 454, 908, 785, 555, 95, 190, 380, 760, 505, 1010, 1005, 979, 943, 855, 679, 327, 654, 277, 554, 93, 186, 372, 744, 473, 946, 877, 723, 431, 862, 693, 355, 710, 389, 778, 541, 51, 102, 204, 408, 816, 617, 219, 438, 876, 721, 427, 854, 677, 323, 646, 261, 522, 29, 58, 116, 232, 464, 928, 841, 667, 319, 638, 245, 490, 980, 929, 843, 671, 311, 622, 213, 426, 852, 673, 331, 662, 293, 586, 157, 314, 628, 225, 450, 900, 769, 523, 31, 62, 124, 248, 496, 992, 969, 923, 831, 631, 231, 462, 924, 817, 619, 223, 446, 892, 753, 491, 982, 933, 835, 655, 279, 558, 85, 170, 340, 680, 345, 690, 365, 730, 445, 890, 765, 499, 998, 965, 899, 783, 535, 39, 78, 156, 312, 624, 233, 466, 932, 833, 651, 287, 574, 117, 234, 468, 936, 857, 699, 383, 766, 501, 1002, 989, 947, 879, 727, 423, 846, 661, 291, 582, 133, 266, 532, 33, 66, 132, 264, 528, 41, 82, 164, 328, 656, 297, 594, 173, 346, 692, 353, 706, 397, 794, 573, 115, 230, 460, 920, 825, 635, 255, 510, 1020, 1009, 1003, 991, 951, 871, 711, 391, 782, 533, 35, 70, 140, 280, 560, 105, 210, 420, 840, 665, 315, 630, 229, 458, 916, 801, 587, 159, 318, 636, 241, 482, 964, 897, 779, 543, 55, 110, 220, 440, 880, 745, 475, 950, 869, 707, 399, 798, 565, 99, 198, 396, 792, 569, 123, 246, 492, 984, 953, 891, 767, 503, 1006, 981, 931, 847, 663, 295, 590, 149, 298, 596, 161, 322, 644, 257, 514, 13, 26, 52, 104, 208, 416, 832, 649, 283, 566, 101, 202, 404, 808, 601, 187, 374, 748, 465, 930, 845, 659, 303, 606, 181, 362, 724, 417, 834, 653, 275, 550, 69, 138, 276, 552, 89, 178, 356, 712, 409, 818, 621, 211, 422, 844, 657, 299, 598, 165, 330, 660, 289, 578, 141, 282, 564, 97, 194, 388, 776, 537, 59, 118, 236, 472, 944, 873, 731, 447, 894, 757, 483, 966, 901, 771, 527, 23, 46, 92, 184, 368, 736, 457, 914, 813, 595, 175, 350, 700, 369, 738, 461, 922, 829, 627, 239, 478, 956, 881, 747, 479, 958, 885, 739, 463, 926, 821, 611, 207, 414, 828, 625, 235, 470, 940, 849, 683, 351, 702, 373, 746, 477, 954, 893, 755, 495, 990, 949, 867, 719, 407, 814, 597, 163, 326, 652, 273, 546, 77, 154, 308, 616, 217, 434, 868, 705, 395, 790, 549, 67, 134, 268, 536, 57, 114, 228, 456, 912, 809, 603, 191, 382, 764, 497, 994, 973, 915, 815, 599, 167, 334, 668, 305, 610, 205, 410, 820, 609, 203, 406, 812, 593, 171, 342, 684, 337, 674, 333, 666, 317, 634, 253, 506, 1012, 993, 971, 927, 823, 615, 199, 398, 796, 561, 107, 214, 428, 856, 697, 379, 758, 485, 970, 925, 819, 623, 215, 430, 860, 689, 363, 726, 421, 842, 669, 307, 614, 197, 394, 788, 545, 75, 150, 300, 600, 185, 370, 740, 449, 898, 781, 531, 47, 94, 188, 376, 752, 489, 978, 941, 851, 687, 343, 686, 341, 682, 349, 698, 381, 762, 509, 1018, 1021, 1011, 1007, 983, 935, 839, 647, 263, 526, 21, 42, 84, 168, 336, 672, 329, 658, 301, 602, 189, 378, 756, 481, 962, 909, 787, 559, 87, 174, 348, 696, 377, 754, 493, 986, 957, 883, 751, 471, 942, 853, 675, 335, 670, 309, 618, 221, 442, 884, 737, 459, 918, 805, 579, 143, 286, 572, 113, 226, 452, 904, 793, 571, 127, 254, 508, 1016, 1017, 1019, 1023, 1015, 999, 967, 903, 775, 519, 7, 14, 28, 56, 112, 224, 448, 896, 777, 539, 63, 126, 252, 504, 1008, 1001, 987, 959, 887, 743, 455, 910, 789, 547, 79, 158, 316, 632, 249, 498, 996, 961, 907, 799, 567, 103, 206, 412, 824, 633, 251, 502, 1004, 977, 939, 863, 695, 359, 718, 405, 810, 605, 179, 358, 716, 401, 802, 589, 147, 294, 588, 145, 290, 580, 129, 258, 516, 1, 2, 4}; + + +/** + * Logarithm of elements of GF(2^10) to the base alpha (root of x^10 + x^3 + 1). + * The logarithm of 0 is set to 1024 by convention. + */ +static const uint16_t log[1024] = {1024, 0, 1, 77, 2, 154, 78, 956, 3, 10, 155, 325, 79, 618, 957, 231, 4, 308, 11, 200, 156, 889, 326, 695, 80, 24, 619, 87, 958, 402, 232, 436, 5, 513, 309, 551, 12, 40, 201, 479, 157, 518, 890, 101, 327, 164, 696, 860, 81, 258, 25, 385, 620, 277, 88, 577, 959, 772, 403, 680, 233, 52, 437, 966, 6, 20, 514, 768, 310, 650, 552, 129, 13, 314, 41, 849, 202, 757, 480, 980, 158, 213, 519, 335, 891, 462, 102, 907, 328, 654, 165, 264, 697, 369, 861, 354, 82, 675, 259, 590, 26, 628, 386, 991, 621, 556, 278, 822, 89, 219, 578, 117, 960, 937, 773, 533, 404, 491, 681, 241, 234, 133, 53, 595, 438, 178, 967, 943, 7, 1020, 21, 305, 515, 510, 769, 255, 311, 17, 651, 210, 553, 672, 130, 934, 14, 1017, 315, 1014, 42, 610, 850, 191, 203, 318, 758, 31, 481, 428, 981, 568, 159, 613, 214, 752, 520, 667, 336, 788, 892, 45, 463, 801, 103, 525, 908, 705, 329, 194, 655, 1008, 166, 642, 265, 296, 698, 853, 370, 633, 862, 899, 355, 779, 83, 321, 676, 97, 260, 845, 591, 818, 27, 206, 629, 797, 387, 793, 992, 727, 622, 34, 557, 661, 279, 420, 823, 834, 90, 761, 220, 391, 579, 926, 118, 451, 961, 431, 938, 349, 774, 563, 534, 446, 405, 484, 492, 731, 682, 341, 242, 714, 235, 571, 134, 290, 54, 412, 596, 140, 439, 984, 179, 996, 968, 810, 944, 539, 8, 616, 1021, 152, 22, 400, 306, 887, 516, 162, 511, 38, 770, 50, 256, 275, 312, 755, 18, 648, 652, 367, 211, 460, 554, 217, 673, 626, 131, 176, 935, 489, 15, 670, 1018, 508, 316, 426, 1015, 608, 43, 523, 611, 665, 851, 897, 192, 640, 204, 791, 319, 843, 759, 924, 32, 418, 482, 339, 429, 561, 982, 808, 569, 410, 160, 48, 614, 398, 215, 174, 753, 365, 521, 895, 668, 424, 337, 806, 789, 922, 893, 804, 46, 172, 464, 872, 802, 870, 104, 466, 526, 283, 909, 874, 706, 736, 330, 528, 195, 380, 656, 285, 1009, 1003, 167, 106, 643, 838, 266, 468, 297, 66, 699, 708, 854, 111, 371, 738, 634, 60, 863, 911, 900, 827, 356, 876, 780, 497, 84, 197, 322, 74, 677, 382, 98, 548, 261, 332, 846, 765, 592, 530, 819, 587, 28, 1011, 207, 302, 630, 1005, 798, 749, 388, 658, 794, 94, 993, 287, 728, 346, 623, 645, 35, 149, 558, 840, 662, 505, 280, 169, 421, 395, 824, 108, 835, 377, 91, 299, 762, 71, 221, 68, 392, 146, 580, 268, 927, 224, 119, 470, 452, 687, 962, 856, 432, 227, 939, 113, 350, 976, 775, 701, 564, 930, 535, 710, 447, 723, 406, 636, 485, 271, 493, 62, 732, 918, 683, 373, 342, 583, 243, 740, 715, 719, 236, 902, 572, 690, 135, 829, 291, 186, 55, 865, 413, 455, 597, 913, 141, 744, 440, 782, 985, 473, 180, 499, 997, 602, 969, 358, 811, 122, 945, 878, 540, 247, 9, 324, 617, 230, 1022, 76, 153, 955, 23, 86, 401, 435, 307, 199, 888, 694, 517, 100, 163, 859, 512, 550, 39, 478, 771, 679, 51, 965, 257, 384, 276, 576, 313, 848, 756, 979, 19, 767, 649, 128, 653, 263, 368, 353, 212, 334, 461, 906, 555, 821, 218, 116, 674, 589, 627, 990, 132, 594, 177, 942, 936, 532, 490, 240, 16, 209, 671, 933, 1019, 304, 509, 254, 317, 30, 427, 567, 1016, 1013, 609, 190, 44, 800, 524, 704, 612, 751, 666, 787, 852, 632, 898, 778, 193, 1007, 641, 295, 205, 796, 792, 726, 320, 96, 844, 817, 760, 390, 925, 450, 33, 660, 419, 833, 483, 730, 340, 713, 430, 348, 562, 445, 983, 995, 809, 538, 570, 289, 411, 139, 161, 37, 49, 274, 615, 151, 399, 886, 216, 625, 175, 488, 754, 647, 366, 459, 522, 664, 896, 639, 669, 507, 425, 607, 338, 560, 807, 409, 790, 842, 923, 417, 894, 423, 805, 921, 47, 397, 173, 364, 465, 282, 873, 735, 803, 171, 871, 869, 105, 837, 467, 65, 527, 379, 284, 1002, 910, 826, 875, 496, 707, 110, 737, 59, 331, 764, 529, 586, 196, 73, 381, 547, 657, 93, 286, 345, 1010, 301, 1004, 748, 168, 394, 107, 376, 644, 148, 839, 504, 267, 223, 469, 686, 298, 70, 67, 145, 700, 929, 709, 722, 855, 226, 112, 975, 372, 582, 739, 718, 635, 270, 61, 917, 864, 454, 912, 743, 901, 689, 828, 185, 357, 121, 877, 246, 781, 472, 498, 601, 85, 434, 198, 693, 323, 229, 75, 954, 678, 964, 383, 575, 99, 858, 549, 477, 262, 352, 333, 905, 847, 978, 766, 127, 593, 941, 531, 239, 820, 115, 588, 989, 29, 566, 1012, 189, 208, 932, 303, 253, 631, 777, 1006, 294, 799, 703, 750, 786, 389, 449, 659, 832, 795, 725, 95, 816, 994, 537, 288, 138, 729, 712, 347, 444, 624, 487, 646, 458, 36, 273, 150, 885, 559, 408, 841, 416, 663, 638, 506, 606, 281, 734, 170, 868, 422, 920, 396, 363, 825, 495, 109, 58, 836, 64, 378, 1001, 92, 344, 300, 747, 763, 585, 72, 546, 222, 685, 69, 144, 393, 375, 147, 503, 581, 717, 269, 916, 928, 721, 225, 974, 120, 245, 471, 600, 453, 742, 688, 184, 963, 574, 857, 476, 433, 692, 228, 953, 940, 238, 114, 988, 351, 904, 977, 126, 776, 293, 702, 785, 565, 188, 931, 252, 536, 137, 711, 443, 448, 831, 724, 815, 407, 415, 637, 605, 486, 457, 272, 884, 494, 57, 63, 1000, 733, 867, 919, 362, 684, 143, 374, 502, 343, 746, 584, 545, 244, 599, 741, 183, 716, 915, 720, 973, 237, 987, 903, 125, 573, 475, 691, 952, 136, 442, 830, 814, 292, 784, 187, 251, 56, 999, 866, 361, 414, 604, 456, 883, 598, 182, 914, 972, 142, 501, 745, 544, 441, 813, 783, 250, 986, 124, 474, 951, 181, 971, 500, 543, 998, 360, 603, 882, 970, 542, 359, 881, 812, 249, 123, 950, 946, 947, 879, 948, 541, 880, 248, 949}; uint16_t PQCLEAN_HQC256_CLEAN_gf_log(uint16_t elt); diff --git a/crypto_kem/hqc-rmrs-128/avx2/fft.c b/crypto_kem/hqc-rmrs-128/avx2/fft.c index 4e7ccedc..a9a23813 100644 --- a/crypto_kem/hqc-rmrs-128/avx2/fft.c +++ b/crypto_kem/hqc-rmrs-128/avx2/fft.c @@ -18,6 +18,7 @@ static void compute_fft_betas(uint16_t *betas); static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size); static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); @@ -27,7 +28,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[out] betas Array of size PARAM_M-1 */ static void compute_fft_betas(uint16_t *betas) { - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + size_t i; + for (i = 0 ; i < PARAM_M - 1 ; ++i) { betas[i] = 1 << (PARAM_M - 1 - i); } } @@ -45,10 +47,11 @@ static void compute_fft_betas(uint16_t *betas) { * @param[in] set_size Size of the array set */ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) { + size_t i, j; subset_sums[0] = 0; - for (size_t i = 0 ; i < set_size ; ++i) { - for (size_t j = 0 ; j < (1U << i) ; ++j) { + for (i = 0 ; i < set_size ; ++i) { + for (j = 0 ; j < (1U << i) ; ++j) { subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; } } @@ -88,7 +91,7 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[2] = f[3] ^ f1[1] ^ f0[3]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 3: f0[0] = f[0]; @@ -99,51 +102,56 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[3] = f[7]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 2: f0[0] = f[0]; f0[1] = f[2] ^ f[3]; f1[0] = f[1] ^ f0[1]; f1[1] = f[3]; - return; + break; case 1: f0[0] = f[0]; f1[0] = f[1]; - return; + break; default: - ; - size_t n = 1 << (m_f - 2); - - uint16_t Q[2 * (1 << (PARAM_FFT - 2))]; - uint16_t R[2 * (1 << (PARAM_FFT - 2))]; - - uint16_t Q0[1 << (PARAM_FFT - 2)]; - uint16_t Q1[1 << (PARAM_FFT - 2)]; - uint16_t R0[1 << (PARAM_FFT - 2)]; - uint16_t R1[1 << (PARAM_FFT - 2)]; - - memcpy(Q, f + 3 * n, 2 * n); - memcpy(Q + n, f + 3 * n, 2 * n); - memcpy(R, f, 4 * n); - - for (size_t i = 0 ; i < n ; ++i) { - Q[i] ^= f[2 * n + i]; - R[n + i] ^= Q[i]; - } - - radix(Q0, Q1, Q, m_f - 1); - radix(R0, R1, R, m_f - 1); - - memcpy(f0, R0, 2 * n); - memcpy(f0 + n, Q0, 2 * n); - memcpy(f1, R1, 2 * n); - memcpy(f1 + n, Q1, 2 * n); + radix_big(f0, f1, f, m_f); + break; } } +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1 << (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0 ; i < n ; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + /** @@ -159,25 +167,27 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { * @param[in] betas FFT constants */ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { - uint16_t f0[1 << (PARAM_FFT - 2)]; - uint16_t f1[1 << (PARAM_FFT - 2)]; - uint16_t gammas[PARAM_M - 2]; - uint16_t deltas[PARAM_M - 2]; - size_t k = 1 << (m - 1); - uint16_t gammas_sums[1 << (PARAM_M - 2)]; + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; uint16_t u[1 << (PARAM_M - 2)] = {0}; uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; // Step 1 if (m_f == 1) { - uint16_t tmp[PARAM_M - (PARAM_FFT - 1)]; - for (size_t i = 0 ; i < m ; ++i) { + for (i = 0 ; i < m ; ++i) { tmp[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas[i], f[1]); } w[0] = f[0]; - for (size_t j = 0 ; j < m ; ++j) { - for (size_t k = 0 ; k < (1U << j) ; ++k) { + for (j = 0 ; j < m ; ++j) { + for (k = 0 ; k < (1U << j) ; ++k) { w[(1 << j) + k] = w[k] ^ tmp[j]; } } @@ -187,8 +197,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 2: compute g if (betas[m - 1] != 1) { - uint16_t beta_m_pow = 1; - for (size_t i = 1 ; i < (1U << m_f) ; ++i) { + beta_m_pow = 1; + for (i = 1 ; i < (1U << m_f) ; ++i) { beta_m_pow = PQCLEAN_HQCRMRS128_AVX2_gf_mul(beta_m_pow, betas[m - 1]); f[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(beta_m_pow, f[i]); } @@ -198,7 +208,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 radix(f0, f1, f, m_f); // Step 4: compute gammas and deltas - for (uint8_t i = 0 ; i < m - 1 ; ++i) { + for (i = 0 ; i + 1 < m ; ++i) { gammas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas[i], PQCLEAN_HQCRMRS128_AVX2_gf_inverse(betas[m - 1])); deltas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_square(gammas[i]) ^ gammas[i]; } @@ -209,10 +219,11 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 5 fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + k = 1 << ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant w[0] = u[0]; w[k] = u[0] ^ f1[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(gammas_sums[i], f1[0]); w[k + i] = w[i] ^ f1[0]; } @@ -223,7 +234,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 memcpy(w + k, v, 2 * k); w[0] = u[0]; w[k] ^= u[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(gammas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -252,14 +263,15 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) */ void PQCLEAN_HQCRMRS128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { - uint16_t betas[PARAM_M - 1]; - uint16_t betas_sums[1 << (PARAM_M - 1)]; - uint16_t f0[1 << (PARAM_FFT - 1)]; - uint16_t f1[1 << (PARAM_FFT - 1)]; - uint16_t deltas[PARAM_M - 1]; - size_t k = 1 << (PARAM_M - 1); - uint16_t u[1 << (PARAM_M - 1)]; - uint16_t v[1 << (PARAM_M - 1)]; + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; // Follows Gao and Mateer algorithm compute_fft_betas(betas); @@ -275,7 +287,7 @@ void PQCLEAN_HQCRMRS128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs radix(f0, f1, f, PARAM_FFT); // Step 4: Compute deltas - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + for (i = 0 ; i < PARAM_M - 1 ; ++i) { deltas[i] = PQCLEAN_HQCRMRS128_AVX2_gf_square(betas[i]) ^ betas[i]; } @@ -283,6 +295,7 @@ void PQCLEAN_HQCRMRS128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + k = 1 << (PARAM_M - 1); // Step 6, 7 and error polynomial computation memcpy(w + k, v, 2 * k); @@ -293,7 +306,7 @@ void PQCLEAN_HQCRMRS128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs w[k] ^= u[0]; // Find other roots - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS128_AVX2_gf_mul(betas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -311,17 +324,16 @@ void PQCLEAN_HQCRMRS128_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs void PQCLEAN_HQCRMRS128_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) { uint16_t gammas[PARAM_M - 1] = {0}; uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; - size_t k = 1 << (PARAM_M - 1); + size_t i, k, index; compute_fft_betas(gammas); compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); + k = 1 << (PARAM_M - 1); error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); - size_t index = PARAM_GF_MUL_ORDER; - - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { index = PARAM_GF_MUL_ORDER - PQCLEAN_HQCRMRS128_AVX2_gf_log(gammas_sums[i]); error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); diff --git a/crypto_kem/hqc-rmrs-128/avx2/gf.c b/crypto_kem/hqc-rmrs-128/avx2/gf.c index 31195a00..818207cb 100644 --- a/crypto_kem/hqc-rmrs-128/avx2/gf.c +++ b/crypto_kem/hqc-rmrs-128/avx2/gf.c @@ -30,29 +30,28 @@ uint16_t PQCLEAN_HQCRMRS128_AVX2_gf_log(uint16_t elt) { * @param[in] deg_x The degree of polynomial x */ static uint16_t gf_reduce(uint64_t x, size_t deg_x) { - // Compute the distance between the primitive polynomial first two set bits - size_t lz1 = __builtin_clz(PARAM_GF_POLY); - size_t lz2 = __builtin_clz(PARAM_GF_POLY ^ 1 << PARAM_M); - size_t dist = lz2 - lz1; + uint16_t z1, z2, rmdr, dist; + uint64_t mod; + size_t steps, i, j; // Deduce the number of steps of reduction - size_t steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), dist); + steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), PARAM_GF_POLY_M2); // Reduce - for (size_t i = 0; i < steps; ++i) { - uint64_t mod = x >> PARAM_M; + for (i = 0; i < steps; ++i) { + mod = x >> PARAM_M; x &= (1 << PARAM_M) - 1; x ^= mod; - size_t tz1 = 0; - uint16_t rmdr = PARAM_GF_POLY ^ 1; - for (size_t j = __builtin_popcount(PARAM_GF_POLY) - 2; j; --j) { - size_t tz2 = __builtin_ctz(rmdr); - size_t shift = tz2 - tz1; - mod <<= shift; + z1 = 0; + rmdr = PARAM_GF_POLY ^ 1; + for (j = PARAM_GF_POLY_WT - 2; j; --j) { + z2 = __tzcnt_u16(rmdr); + dist = (uint16_t) (z2 - z1); + mod <<= dist; x ^= mod; - rmdr ^= 1 << tz2; - tz1 = tz2; + rmdr ^= 1 << z2; + z1 = z2; } } diff --git a/crypto_kem/hqc-rmrs-128/avx2/gf2x.c b/crypto_kem/hqc-rmrs-128/avx2/gf2x.c index 6d7e932a..60aea53d 100644 --- a/crypto_kem/hqc-rmrs-128/avx2/gf2x.c +++ b/crypto_kem/hqc-rmrs-128/avx2/gf2x.c @@ -328,9 +328,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { static __m256i W0[2 * (T_TM3_3W_256)], W1[2 * (T_TM3_3W_256)], W2[2 * (T_TM3_3W_256)], W3[2 * (T_TM3_3W_256)], W4[2 * (T_TM3_3W_256)]; static __m256i tmp[2 * (T_TM3_3W_256)]; static __m256i ro256[6 * (T_TM3_3W_256)]; - const __m256i zero = (__m256i) { - 0ul, 0ul, 0ul, 0ul - }; + const __m256i zero = _mm256_setzero_si256(); int32_t T2 = T_TM3_3W_64 << 1; for (int32_t i = 0 ; i < T_TM3_3W_256 - 1 ; i++) { @@ -347,24 +345,12 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { for (int32_t i = T_TM3_3W_256 - 1 ; i < T_TM3_3W_256 ; i++) { int32_t i4 = i << 2; int32_t i41 = i4 + 1; - U0[i] = (__m256i) { - A[i4], A[i41], 0x0ul, 0x0ul - }; - V0[i] = (__m256i) { - B[i4], B[i41], 0x0ul, 0x0ul - }; - U1[i] = (__m256i) { - A[i4 + T_TM3_3W_64 - 2], A[i41 + T_TM3_3W_64 - 2], 0x0ul, 0x0ul - }; - V1[i] = (__m256i) { - B[i4 + T_TM3_3W_64 - 2], B[i41 + T_TM3_3W_64 - 2], 0x0ul, 0x0ul - }; - U2[i] = (__m256i) { - A[i4 - 4 + T2], A[i4 - 3 + T2], 0x0ul, 0x0ul - }; - V2[i] = (__m256i) { - B[i4 - 4 + T2], B[i4 - 3 + T2], 0x0ul, 0x0ul - }; + U0[i] = _mm256_set_epi64x(0, 0, A[i41], A[i4]); + V0[i] = _mm256_set_epi64x(0, 0, B[i41], B[i4]); + U1[i] = _mm256_set_epi64x(0, 0, A[i41 + T_TM3_3W_64 - 2], A[i4 + T_TM3_3W_64 - 2]); + V1[i] = _mm256_set_epi64x(0, 0, B[i41 + T_TM3_3W_64 - 2], B[i4 + T_TM3_3W_64 - 2]); + U2[i] = _mm256_set_epi64x(0, 0, A[i4 - 3 + T2], A[i4 - 4 + T2]); + V2[i] = _mm256_set_epi64x(0, 0, B[i4 - 3 + T2], B[i4 - 4 + T2]); } // Evaluation phase : x= X^64 @@ -452,9 +438,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { //W2 =(W2 + W3 + W4*(x^3+1))/(x+1) U1_64 = ((int64_t *) W4); __m256i *U1_256 = (__m256i *) (U1_64 + 1); - tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ (__m256i) { - 0x0ul, 0x0ul, 0x0ul, U1_64[0] - }; + tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ _mm256_set_epi64x(U1_64[0], 0, 0, 0); for (int32_t i = 1 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { tmp[i] = W2[i] ^ W3[i] ^ W4[i] ^ _mm256_lddqu_si256(&U1_256[i - 1]); diff --git a/crypto_kem/hqc-rmrs-192/avx2/fft.c b/crypto_kem/hqc-rmrs-192/avx2/fft.c index 0d16a456..13968252 100644 --- a/crypto_kem/hqc-rmrs-192/avx2/fft.c +++ b/crypto_kem/hqc-rmrs-192/avx2/fft.c @@ -18,6 +18,7 @@ static void compute_fft_betas(uint16_t *betas); static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size); static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); @@ -27,7 +28,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[out] betas Array of size PARAM_M-1 */ static void compute_fft_betas(uint16_t *betas) { - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + size_t i; + for (i = 0 ; i < PARAM_M - 1 ; ++i) { betas[i] = 1 << (PARAM_M - 1 - i); } } @@ -45,10 +47,11 @@ static void compute_fft_betas(uint16_t *betas) { * @param[in] set_size Size of the array set */ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) { + size_t i, j; subset_sums[0] = 0; - for (size_t i = 0 ; i < set_size ; ++i) { - for (size_t j = 0 ; j < (1U << i) ; ++j) { + for (i = 0 ; i < set_size ; ++i) { + for (j = 0 ; j < (1U << i) ; ++j) { subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; } } @@ -88,7 +91,7 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[2] = f[3] ^ f1[1] ^ f0[3]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 3: f0[0] = f[0]; @@ -99,51 +102,56 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[3] = f[7]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 2: f0[0] = f[0]; f0[1] = f[2] ^ f[3]; f1[0] = f[1] ^ f0[1]; f1[1] = f[3]; - return; + break; case 1: f0[0] = f[0]; f1[0] = f[1]; - return; + break; default: - ; - size_t n = 1 << (m_f - 2); - - uint16_t Q[2 * (1 << (PARAM_FFT - 2))]; - uint16_t R[2 * (1 << (PARAM_FFT - 2))]; - - uint16_t Q0[1 << (PARAM_FFT - 2)]; - uint16_t Q1[1 << (PARAM_FFT - 2)]; - uint16_t R0[1 << (PARAM_FFT - 2)]; - uint16_t R1[1 << (PARAM_FFT - 2)]; - - memcpy(Q, f + 3 * n, 2 * n); - memcpy(Q + n, f + 3 * n, 2 * n); - memcpy(R, f, 4 * n); - - for (size_t i = 0 ; i < n ; ++i) { - Q[i] ^= f[2 * n + i]; - R[n + i] ^= Q[i]; - } - - radix(Q0, Q1, Q, m_f - 1); - radix(R0, R1, R, m_f - 1); - - memcpy(f0, R0, 2 * n); - memcpy(f0 + n, Q0, 2 * n); - memcpy(f1, R1, 2 * n); - memcpy(f1 + n, Q1, 2 * n); + radix_big(f0, f1, f, m_f); + break; } } +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1 << (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0 ; i < n ; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + /** @@ -159,25 +167,27 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { * @param[in] betas FFT constants */ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { - uint16_t f0[1 << (PARAM_FFT - 2)]; - uint16_t f1[1 << (PARAM_FFT - 2)]; - uint16_t gammas[PARAM_M - 2]; - uint16_t deltas[PARAM_M - 2]; - size_t k = 1 << (m - 1); - uint16_t gammas_sums[1 << (PARAM_M - 2)]; + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; uint16_t u[1 << (PARAM_M - 2)] = {0}; uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; // Step 1 if (m_f == 1) { - uint16_t tmp[PARAM_M - (PARAM_FFT - 1)]; - for (size_t i = 0 ; i < m ; ++i) { + for (i = 0 ; i < m ; ++i) { tmp[i] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(betas[i], f[1]); } w[0] = f[0]; - for (size_t j = 0 ; j < m ; ++j) { - for (size_t k = 0 ; k < (1U << j) ; ++k) { + for (j = 0 ; j < m ; ++j) { + for (k = 0 ; k < (1U << j) ; ++k) { w[(1 << j) + k] = w[k] ^ tmp[j]; } } @@ -187,8 +197,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 2: compute g if (betas[m - 1] != 1) { - uint16_t beta_m_pow = 1; - for (size_t i = 1 ; i < (1U << m_f) ; ++i) { + beta_m_pow = 1; + for (i = 1 ; i < (1U << m_f) ; ++i) { beta_m_pow = PQCLEAN_HQCRMRS192_AVX2_gf_mul(beta_m_pow, betas[m - 1]); f[i] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(beta_m_pow, f[i]); } @@ -198,7 +208,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 radix(f0, f1, f, m_f); // Step 4: compute gammas and deltas - for (uint8_t i = 0 ; i < m - 1 ; ++i) { + for (i = 0 ; i + 1 < m ; ++i) { gammas[i] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(betas[i], PQCLEAN_HQCRMRS192_AVX2_gf_inverse(betas[m - 1])); deltas[i] = PQCLEAN_HQCRMRS192_AVX2_gf_square(gammas[i]) ^ gammas[i]; } @@ -209,10 +219,11 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 5 fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + k = 1 << ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant w[0] = u[0]; w[k] = u[0] ^ f1[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(gammas_sums[i], f1[0]); w[k + i] = w[i] ^ f1[0]; } @@ -223,7 +234,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 memcpy(w + k, v, 2 * k); w[0] = u[0]; w[k] ^= u[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(gammas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -252,14 +263,15 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) */ void PQCLEAN_HQCRMRS192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { - uint16_t betas[PARAM_M - 1]; - uint16_t betas_sums[1 << (PARAM_M - 1)]; - uint16_t f0[1 << (PARAM_FFT - 1)]; - uint16_t f1[1 << (PARAM_FFT - 1)]; - uint16_t deltas[PARAM_M - 1]; - size_t k = 1 << (PARAM_M - 1); - uint16_t u[1 << (PARAM_M - 1)]; - uint16_t v[1 << (PARAM_M - 1)]; + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; // Follows Gao and Mateer algorithm compute_fft_betas(betas); @@ -275,7 +287,7 @@ void PQCLEAN_HQCRMRS192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs radix(f0, f1, f, PARAM_FFT); // Step 4: Compute deltas - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + for (i = 0 ; i < PARAM_M - 1 ; ++i) { deltas[i] = PQCLEAN_HQCRMRS192_AVX2_gf_square(betas[i]) ^ betas[i]; } @@ -283,6 +295,7 @@ void PQCLEAN_HQCRMRS192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + k = 1 << (PARAM_M - 1); // Step 6, 7 and error polynomial computation memcpy(w + k, v, 2 * k); @@ -293,7 +306,7 @@ void PQCLEAN_HQCRMRS192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs w[k] ^= u[0]; // Find other roots - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(betas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -311,17 +324,16 @@ void PQCLEAN_HQCRMRS192_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs void PQCLEAN_HQCRMRS192_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) { uint16_t gammas[PARAM_M - 1] = {0}; uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; - size_t k = 1 << (PARAM_M - 1); + size_t i, k, index; compute_fft_betas(gammas); compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); + k = 1 << (PARAM_M - 1); error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); - size_t index = PARAM_GF_MUL_ORDER; - - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { index = PARAM_GF_MUL_ORDER - PQCLEAN_HQCRMRS192_AVX2_gf_log(gammas_sums[i]); error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); diff --git a/crypto_kem/hqc-rmrs-192/avx2/gf.c b/crypto_kem/hqc-rmrs-192/avx2/gf.c index 2d74a30d..03ca20eb 100644 --- a/crypto_kem/hqc-rmrs-192/avx2/gf.c +++ b/crypto_kem/hqc-rmrs-192/avx2/gf.c @@ -30,29 +30,28 @@ uint16_t PQCLEAN_HQCRMRS192_AVX2_gf_log(uint16_t elt) { * @param[in] deg_x The degree of polynomial x */ static uint16_t gf_reduce(uint64_t x, size_t deg_x) { - // Compute the distance between the primitive polynomial first two set bits - size_t lz1 = __builtin_clz(PARAM_GF_POLY); - size_t lz2 = __builtin_clz(PARAM_GF_POLY ^ 1 << PARAM_M); - size_t dist = lz2 - lz1; + uint16_t z1, z2, rmdr, dist; + uint64_t mod; + size_t steps, i, j; // Deduce the number of steps of reduction - size_t steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), dist); + steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), PARAM_GF_POLY_M2); // Reduce - for (size_t i = 0; i < steps; ++i) { - uint64_t mod = x >> PARAM_M; + for (i = 0; i < steps; ++i) { + mod = x >> PARAM_M; x &= (1 << PARAM_M) - 1; x ^= mod; - size_t tz1 = 0; - uint16_t rmdr = PARAM_GF_POLY ^ 1; - for (size_t j = __builtin_popcount(PARAM_GF_POLY) - 2; j; --j) { - size_t tz2 = __builtin_ctz(rmdr); - size_t shift = tz2 - tz1; - mod <<= shift; + z1 = 0; + rmdr = PARAM_GF_POLY ^ 1; + for (j = PARAM_GF_POLY_WT - 2; j; --j) { + z2 = __tzcnt_u16(rmdr); + dist = (uint16_t) (z2 - z1); + mod <<= dist; x ^= mod; - rmdr ^= 1 << tz2; - tz1 = tz2; + rmdr ^= 1 << z2; + z1 = z2; } } diff --git a/crypto_kem/hqc-rmrs-192/avx2/gf2x.c b/crypto_kem/hqc-rmrs-192/avx2/gf2x.c index e2c62fa2..75c24717 100644 --- a/crypto_kem/hqc-rmrs-192/avx2/gf2x.c +++ b/crypto_kem/hqc-rmrs-192/avx2/gf2x.c @@ -368,9 +368,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { static __m256i W0[2 * (T_TM3_3W_256)], W1[2 * (T_TM3_3W_256)], W2[2 * (T_TM3_3W_256)], W3[2 * (T_TM3_3W_256)], W4[2 * (T_TM3_3W_256)]; static __m256i tmp[2 * (T_TM3_3W_256)]; static __m256i ro256[6 * (T_TM3_3W_256)]; - const __m256i zero = (__m256i) { - 0ul, 0ul, 0ul, 0ul - }; + const __m256i zero = _mm256_setzero_si256(); int32_t T2 = T_TM3_3W_64 << 1; for (int32_t i = 0 ; i < T_TM3_3W_256 - 1 ; i++) { @@ -387,24 +385,12 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { for (int32_t i = T_TM3_3W_256 - 1 ; i < T_TM3_3W_256 ; i++) { int32_t i4 = i << 2; int32_t i41 = i4 + 1; - U0[i] = (__m256i) { - A[i4], A[i41], 0x0ul, 0x0ul - }; - V0[i] = (__m256i) { - B[i4], B[i41], 0x0ul, 0x0ul - }; - U1[i] = (__m256i) { - A[i4 + T_TM3_3W_64 - 2], A[i41 + T_TM3_3W_64 - 2], 0x0ul, 0x0ul - }; - V1[i] = (__m256i) { - B[i4 + T_TM3_3W_64 - 2], B[i41 + T_TM3_3W_64 - 2], 0x0ul, 0x0ul - }; - U2[i] = (__m256i) { - A[i4 - 4 + T2], A[i4 - 3 + T2], 0x0ul, 0x0ul - }; - V2[i] = (__m256i) { - B[i4 - 4 + T2], B[i4 - 3 + T2], 0x0ul, 0x0ul - }; + U0[i] = _mm256_set_epi64x(0, 0, A[i41], A[i4]); + V0[i] = _mm256_set_epi64x(0, 0, B[i41], B[i4]); + U1[i] = _mm256_set_epi64x(0, 0, A[i41 + T_TM3_3W_64 - 2], A[i4 + T_TM3_3W_64 - 2]); + V1[i] = _mm256_set_epi64x(0, 0, B[i41 + T_TM3_3W_64 - 2], B[i4 + T_TM3_3W_64 - 2]); + U2[i] = _mm256_set_epi64x(0, 0, A[i4 - 3 + T2], A[i4 - 4 + T2]); + V2[i] = _mm256_set_epi64x(0, 0, B[i4 - 3 + T2], B[i4 - 4 + T2]); } // Evaluation phase : x= X^64 @@ -492,9 +478,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { //W2 =(W2 + W3 + W4*(x^3+1))/(x+1) U1_64 = ((int64_t *) W4); __m256i *U1_256 = (__m256i *) (U1_64 + 1); - tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ (__m256i) { - 0x0ul, 0x0ul, 0x0ul, U1_64[0] - }; + tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ _mm256_set_epi64x(U1_64[0], 0, 0, 0); for (int32_t i = 1 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { tmp[i] = W2[i] ^ W3[i] ^ W4[i] ^ _mm256_lddqu_si256(&U1_256[i - 1]); diff --git a/crypto_kem/hqc-rmrs-256/avx2/fft.c b/crypto_kem/hqc-rmrs-256/avx2/fft.c index 46f73955..c0985083 100644 --- a/crypto_kem/hqc-rmrs-256/avx2/fft.c +++ b/crypto_kem/hqc-rmrs-256/avx2/fft.c @@ -18,6 +18,7 @@ static void compute_fft_betas(uint16_t *betas); static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size); static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f); static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas); @@ -27,7 +28,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[out] betas Array of size PARAM_M-1 */ static void compute_fft_betas(uint16_t *betas) { - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + size_t i; + for (i = 0 ; i < PARAM_M - 1 ; ++i) { betas[i] = 1 << (PARAM_M - 1 - i); } } @@ -45,10 +47,11 @@ static void compute_fft_betas(uint16_t *betas) { * @param[in] set_size Size of the array set */ static void compute_subset_sums(uint16_t *subset_sums, const uint16_t *set, size_t set_size) { + size_t i, j; subset_sums[0] = 0; - for (size_t i = 0 ; i < set_size ; ++i) { - for (size_t j = 0 ; j < (1U << i) ; ++j) { + for (i = 0 ; i < set_size ; ++i) { + for (j = 0 ; j < (1U << i) ; ++j) { subset_sums[(1 << i) + j] = set[i] ^ subset_sums[j]; } } @@ -88,7 +91,7 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[2] = f[3] ^ f1[1] ^ f0[3]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 3: f0[0] = f[0]; @@ -99,51 +102,56 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { f1[3] = f[7]; f0[1] = f[2] ^ f0[2] ^ f1[1]; f1[0] = f[1] ^ f0[1]; - return; + break; case 2: f0[0] = f[0]; f0[1] = f[2] ^ f[3]; f1[0] = f[1] ^ f0[1]; f1[1] = f[3]; - return; + break; case 1: f0[0] = f[0]; f1[0] = f[1]; - return; + break; default: - ; - size_t n = 1 << (m_f - 2); - - uint16_t Q[2 * (1 << (PARAM_FFT - 2))]; - uint16_t R[2 * (1 << (PARAM_FFT - 2))]; - - uint16_t Q0[1 << (PARAM_FFT - 2)]; - uint16_t Q1[1 << (PARAM_FFT - 2)]; - uint16_t R0[1 << (PARAM_FFT - 2)]; - uint16_t R1[1 << (PARAM_FFT - 2)]; - - memcpy(Q, f + 3 * n, 2 * n); - memcpy(Q + n, f + 3 * n, 2 * n); - memcpy(R, f, 4 * n); - - for (size_t i = 0 ; i < n ; ++i) { - Q[i] ^= f[2 * n + i]; - R[n + i] ^= Q[i]; - } - - radix(Q0, Q1, Q, m_f - 1); - radix(R0, R1, R, m_f - 1); - - memcpy(f0, R0, 2 * n); - memcpy(f0 + n, Q0, 2 * n); - memcpy(f1, R1, 2 * n); - memcpy(f1 + n, Q1, 2 * n); + radix_big(f0, f1, f, m_f); + break; } } +static void radix_big(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { + uint16_t Q[2 * (1 << (PARAM_FFT - 2))] = {0}; + uint16_t R[2 * (1 << (PARAM_FFT - 2))] = {0}; + + uint16_t Q0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t Q1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t R1[1 << (PARAM_FFT - 2)] = {0}; + + size_t i, n; + + n = 1 << (m_f - 2); + memcpy(Q, f + 3 * n, 2 * n); + memcpy(Q + n, f + 3 * n, 2 * n); + memcpy(R, f, 4 * n); + + for (i = 0 ; i < n ; ++i) { + Q[i] ^= f[2 * n + i]; + R[n + i] ^= Q[i]; + } + + radix(Q0, Q1, Q, m_f - 1); + radix(R0, R1, R, m_f - 1); + + memcpy(f0, R0, 2 * n); + memcpy(f0 + n, Q0, 2 * n); + memcpy(f1, R1, 2 * n); + memcpy(f1 + n, Q1, 2 * n); +} + /** @@ -159,25 +167,27 @@ static void radix(uint16_t *f0, uint16_t *f1, const uint16_t *f, uint32_t m_f) { * @param[in] betas FFT constants */ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32_t m_f, const uint16_t *betas) { - uint16_t f0[1 << (PARAM_FFT - 2)]; - uint16_t f1[1 << (PARAM_FFT - 2)]; - uint16_t gammas[PARAM_M - 2]; - uint16_t deltas[PARAM_M - 2]; - size_t k = 1 << (m - 1); - uint16_t gammas_sums[1 << (PARAM_M - 2)]; + uint16_t f0[1 << (PARAM_FFT - 2)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 2)] = {0}; + uint16_t gammas[PARAM_M - 2] = {0}; + uint16_t deltas[PARAM_M - 2] = {0}; + uint16_t gammas_sums[1 << (PARAM_M - 2)] = {0}; uint16_t u[1 << (PARAM_M - 2)] = {0}; uint16_t v[1 << (PARAM_M - 2)] = {0}; + uint16_t tmp[PARAM_M - (PARAM_FFT - 1)] = {0}; + + uint16_t beta_m_pow; + size_t i, j, k; // Step 1 if (m_f == 1) { - uint16_t tmp[PARAM_M - (PARAM_FFT - 1)]; - for (size_t i = 0 ; i < m ; ++i) { + for (i = 0 ; i < m ; ++i) { tmp[i] = PQCLEAN_HQCRMRS256_AVX2_gf_mul(betas[i], f[1]); } w[0] = f[0]; - for (size_t j = 0 ; j < m ; ++j) { - for (size_t k = 0 ; k < (1U << j) ; ++k) { + for (j = 0 ; j < m ; ++j) { + for (k = 0 ; k < (1U << j) ; ++k) { w[(1 << j) + k] = w[k] ^ tmp[j]; } } @@ -187,8 +197,8 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 2: compute g if (betas[m - 1] != 1) { - uint16_t beta_m_pow = 1; - for (size_t i = 1 ; i < (1U << m_f) ; ++i) { + beta_m_pow = 1; + for (i = 1 ; i < (1U << m_f) ; ++i) { beta_m_pow = PQCLEAN_HQCRMRS256_AVX2_gf_mul(beta_m_pow, betas[m - 1]); f[i] = PQCLEAN_HQCRMRS256_AVX2_gf_mul(beta_m_pow, f[i]); } @@ -198,7 +208,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 radix(f0, f1, f, m_f); // Step 4: compute gammas and deltas - for (uint8_t i = 0 ; i < m - 1 ; ++i) { + for (i = 0 ; i + 1 < m ; ++i) { gammas[i] = PQCLEAN_HQCRMRS256_AVX2_gf_mul(betas[i], PQCLEAN_HQCRMRS256_AVX2_gf_inverse(betas[m - 1])); deltas[i] = PQCLEAN_HQCRMRS256_AVX2_gf_square(gammas[i]) ^ gammas[i]; } @@ -209,10 +219,11 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 // Step 5 fft_rec(u, f0, (f_coeffs + 1) / 2, m - 1, m_f - 1, deltas); + k = 1 << ((m - 1) & 0xf); // &0xf is to let the compiler know that m-1 is small. if (f_coeffs <= 3) { // 3-coefficient polynomial f case: f1 is constant w[0] = u[0]; w[k] = u[0] ^ f1[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS256_AVX2_gf_mul(gammas_sums[i], f1[0]); w[k + i] = w[i] ^ f1[0]; } @@ -223,7 +234,7 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 memcpy(w + k, v, 2 * k); w[0] = u[0]; w[k] ^= u[0]; - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS256_AVX2_gf_mul(gammas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -252,14 +263,15 @@ static void fft_rec(uint16_t *w, uint16_t *f, size_t f_coeffs, uint8_t m, uint32 * @param[in] f_coeffs Number coefficients of f (i.e. deg(f)+1) */ void PQCLEAN_HQCRMRS256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs) { - uint16_t betas[PARAM_M - 1]; - uint16_t betas_sums[1 << (PARAM_M - 1)]; - uint16_t f0[1 << (PARAM_FFT - 1)]; - uint16_t f1[1 << (PARAM_FFT - 1)]; - uint16_t deltas[PARAM_M - 1]; - size_t k = 1 << (PARAM_M - 1); - uint16_t u[1 << (PARAM_M - 1)]; - uint16_t v[1 << (PARAM_M - 1)]; + uint16_t betas[PARAM_M - 1] = {0}; + uint16_t betas_sums[1 << (PARAM_M - 1)] = {0}; + uint16_t f0[1 << (PARAM_FFT - 1)] = {0}; + uint16_t f1[1 << (PARAM_FFT - 1)] = {0}; + uint16_t deltas[PARAM_M - 1] = {0}; + uint16_t u[1 << (PARAM_M - 1)] = {0}; + uint16_t v[1 << (PARAM_M - 1)] = {0}; + + size_t i, k; // Follows Gao and Mateer algorithm compute_fft_betas(betas); @@ -275,7 +287,7 @@ void PQCLEAN_HQCRMRS256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs radix(f0, f1, f, PARAM_FFT); // Step 4: Compute deltas - for (size_t i = 0 ; i < PARAM_M - 1 ; ++i) { + for (i = 0 ; i < PARAM_M - 1 ; ++i) { deltas[i] = PQCLEAN_HQCRMRS256_AVX2_gf_square(betas[i]) ^ betas[i]; } @@ -283,6 +295,7 @@ void PQCLEAN_HQCRMRS256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs fft_rec(u, f0, (f_coeffs + 1) / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); fft_rec(v, f1, f_coeffs / 2, PARAM_M - 1, PARAM_FFT - 1, deltas); + k = 1 << (PARAM_M - 1); // Step 6, 7 and error polynomial computation memcpy(w + k, v, 2 * k); @@ -293,7 +306,7 @@ void PQCLEAN_HQCRMRS256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs w[k] ^= u[0]; // Find other roots - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { w[i] = u[i] ^ PQCLEAN_HQCRMRS256_AVX2_gf_mul(betas_sums[i], v[i]); w[k + i] ^= w[i]; } @@ -311,17 +324,16 @@ void PQCLEAN_HQCRMRS256_AVX2_fft(uint16_t *w, const uint16_t *f, size_t f_coeffs void PQCLEAN_HQCRMRS256_AVX2_fft_retrieve_error_poly(uint8_t *error, const uint16_t *w) { uint16_t gammas[PARAM_M - 1] = {0}; uint16_t gammas_sums[1 << (PARAM_M - 1)] = {0}; - size_t k = 1 << (PARAM_M - 1); + size_t i, k, index; compute_fft_betas(gammas); compute_subset_sums(gammas_sums, gammas, PARAM_M - 1); + k = 1 << (PARAM_M - 1); error[0] ^= 1 ^ ((uint16_t) - w[0] >> 15); error[0] ^= 1 ^ ((uint16_t) - w[k] >> 15); - size_t index = PARAM_GF_MUL_ORDER; - - for (size_t i = 1 ; i < k ; ++i) { + for (i = 1 ; i < k ; ++i) { index = PARAM_GF_MUL_ORDER - PQCLEAN_HQCRMRS256_AVX2_gf_log(gammas_sums[i]); error[index] ^= 1 ^ ((uint16_t) - w[i] >> 15); diff --git a/crypto_kem/hqc-rmrs-256/avx2/gf.c b/crypto_kem/hqc-rmrs-256/avx2/gf.c index 40bb9da7..f5ea0ef4 100644 --- a/crypto_kem/hqc-rmrs-256/avx2/gf.c +++ b/crypto_kem/hqc-rmrs-256/avx2/gf.c @@ -30,29 +30,28 @@ uint16_t PQCLEAN_HQCRMRS256_AVX2_gf_log(uint16_t elt) { * @param[in] deg_x The degree of polynomial x */ static uint16_t gf_reduce(uint64_t x, size_t deg_x) { - // Compute the distance between the primitive polynomial first two set bits - size_t lz1 = __builtin_clz(PARAM_GF_POLY); - size_t lz2 = __builtin_clz(PARAM_GF_POLY ^ 1 << PARAM_M); - size_t dist = lz2 - lz1; + uint16_t z1, z2, rmdr, dist; + uint64_t mod; + size_t steps, i, j; // Deduce the number of steps of reduction - size_t steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), dist); + steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), PARAM_GF_POLY_M2); // Reduce - for (size_t i = 0; i < steps; ++i) { - uint64_t mod = x >> PARAM_M; + for (i = 0; i < steps; ++i) { + mod = x >> PARAM_M; x &= (1 << PARAM_M) - 1; x ^= mod; - size_t tz1 = 0; - uint16_t rmdr = PARAM_GF_POLY ^ 1; - for (size_t j = __builtin_popcount(PARAM_GF_POLY) - 2; j; --j) { - size_t tz2 = __builtin_ctz(rmdr); - size_t shift = tz2 - tz1; - mod <<= shift; + z1 = 0; + rmdr = PARAM_GF_POLY ^ 1; + for (j = PARAM_GF_POLY_WT - 2; j; --j) { + z2 = __tzcnt_u16(rmdr); + dist = (uint16_t) (z2 - z1); + mod <<= dist; x ^= mod; - rmdr ^= 1 << tz2; - tz1 = tz2; + rmdr ^= 1 << z2; + z1 = z2; } } diff --git a/crypto_kem/hqc-rmrs-256/avx2/gf2x.c b/crypto_kem/hqc-rmrs-256/avx2/gf2x.c index 927c7ff7..0cc215cc 100644 --- a/crypto_kem/hqc-rmrs-256/avx2/gf2x.c +++ b/crypto_kem/hqc-rmrs-256/avx2/gf2x.c @@ -335,9 +335,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { static __m256i W0[2 * (T_TM3_3W_256)], W1[2 * (T_TM3_3W_256)], W2[2 * (T_TM3_3W_256)], W3[2 * (T_TM3_3W_256)], W4[2 * (T_TM3_3W_256)]; static __m256i tmp[2 * (T_TM3_3W_256)]; static __m256i ro256[6 * (T_TM3_3W_256)]; - const __m256i zero = (__m256i) { - 0ul, 0ul, 0ul, 0ul - }; + const __m256i zero = _mm256_setzero_si256(); int32_t T2 = T_TM3_3W_64 << 1; for (int32_t i = 0 ; i < T_TM3_3W_256 - 1 ; i++) { @@ -354,24 +352,12 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { for (int32_t i = T_TM3_3W_256 - 1 ; i < T_TM3_3W_256 ; i++) { int32_t i4 = i << 2; int32_t i41 = i4 + 1; - U0[i] = (__m256i) { - A[i4], A[i41], 0x0ul, 0x0ul - }; - V0[i] = (__m256i) { - B[i4], B[i41], 0x0ul, 0x0ul - }; - U1[i] = (__m256i) { - A[i4 + T_TM3_3W_64 - 2], A[i41 + T_TM3_3W_64 - 2], 0x0ul, 0x0ul - }; - V1[i] = (__m256i) { - B[i4 + T_TM3_3W_64 - 2], B[i41 + T_TM3_3W_64 - 2], 0x0ul, 0x0ul - }; - U2[i] = (__m256i) { - A[i4 - 4 + T2], A[i4 - 3 + T2], 0x0ul, 0x0ul - }; - V2[i] = (__m256i) { - B[i4 - 4 + T2], B[i4 - 3 + T2], 0x0ul, 0x0ul - }; + U0[i] = _mm256_set_epi64x(0, 0, A[i41], A[i4]); + V0[i] = _mm256_set_epi64x(0, 0, B[i41], B[i4]); + U1[i] = _mm256_set_epi64x(0, 0, A[i41 + T_TM3_3W_64 - 2], A[i4 + T_TM3_3W_64 - 2]); + V1[i] = _mm256_set_epi64x(0, 0, B[i41 + T_TM3_3W_64 - 2], B[i4 + T_TM3_3W_64 - 2]); + U2[i] = _mm256_set_epi64x(0, 0, A[i4 - 3 + T2], A[i4 - 4 + T2]); + V2[i] = _mm256_set_epi64x(0, 0, B[i4 - 3 + T2], B[i4 - 4 + T2]); } // Evaluation phase : x= X^64 @@ -459,9 +445,7 @@ static void TOOM3Mult(__m256i *Out, const uint64_t *A, const uint64_t *B) { //W2 =(W2 + W3 + W4*(x^3+1))/(x+1) U1_64 = ((int64_t *) W4); __m256i *U1_256 = (__m256i *) (U1_64 + 1); - tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ (__m256i) { - 0x0ul, 0x0ul, 0x0ul, U1_64[0] - }; + tmp[0] = W2[0] ^ W3[0] ^ W4[0] ^ _mm256_set_epi64x(U1_64[0], 0, 0, 0); for (int32_t i = 1 ; i < (T_TM3_3W_256 << 1) - 1 ; i++) { tmp[i] = W2[i] ^ W3[i] ^ W4[i] ^ _mm256_lddqu_si256(&U1_256[i - 1]); @@ -555,9 +539,7 @@ static void TOOM3RecMult(__m256i *Out, const uint64_t *A, const uint64_t *B) { __m256i W0[2 * (T_TM3R_3W_256 + 2)], W1[2 * (T_TM3R_3W_256 + 2)], W2[2 * (T_TM3R_3W_256 + 2)], W3[2 * (T_TM3R_3W_256 + 2)], W4[2 * (T_TM3R_3W_256 + 2)]; __m256i tmp[2 * (T_TM3R_3W_256 + 2) + 3]; __m256i ro256[tTM3R / 2]; - const __m256i zero = (__m256i) { - 0ul, 0ul, 0ul, 0ul - }; + const __m256i zero = _mm256_setzero_si256(); int32_t T2 = T_TM3R_3W_64 << 1; for (int32_t i = 0 ; i < T_TM3R_3W_256 ; i++) {