168 lines
9.0 KiB
C
168 lines
9.0 KiB
C
|
#include "gf.h"
|
||
|
#include "parameters.h"
|
||
|
#include <emmintrin.h>
|
||
|
#include <immintrin.h>
|
||
|
#include <stdint.h>
|
||
|
/**
|
||
|
* @file gf.c
|
||
|
* Galois field implementation with multiplication using the pclmulqdq instruction
|
||
|
*/
|
||
|
|
||
|
|
||
|
static uint16_t gf_reduce(uint64_t x, size_t deg_x);
|
||
|
static uint16_t gf_quad(uint64_t a);
|
||
|
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Logarithm of elements of GF(2^10) to the base alpha (root of x^10 + x^3 + 1).
|
||
|
* The logarithm of 0 is set to 1024 by convention.
|
||
|
*/
|
||
|
static const uint16_t log[1024] = {
|
||
|
1024, 0, 1, 77, 2, 154, 78, 956, 3, 10, 155, 325, 79, 618, 957, 231, 4, 308, 11, 200, 156, 889, 326, 695, 80, 24, 619, 87, 958, 402, 232, 436, 5, 513, 309, 551, 12, 40, 201, 479, 157, 518, 890, 101, 327, 164, 696, 860, 81, 258, 25, 385, 620, 277, 88, 577, 959, 772, 403, 680, 233, 52, 437, 966, 6, 20, 514, 768, 310, 650, 552, 129, 13, 314, 41, 849, 202, 757, 480, 980, 158, 213, 519, 335, 891, 462, 102, 907, 328, 654, 165, 264, 697, 369, 861, 354, 82, 675, 259, 590, 26, 628, 386, 991, 621, 556, 278, 822, 89, 219, 578, 117, 960, 937, 773, 533, 404, 491, 681, 241, 234, 133, 53, 595, 438, 178, 967, 943, 7, 1020, 21, 305, 515, 510, 769, 255, 311, 17, 651, 210, 553, 672, 130, 934, 14, 1017, 315, 1014, 42, 610, 850, 191, 203, 318, 758, 31, 481, 428, 981, 568, 159, 613, 214, 752, 520, 667, 336, 788, 892, 45, 463, 801, 103, 525, 908, 705, 329, 194, 655, 1008, 166, 642, 265, 296, 698, 853, 370, 633, 862, 899, 355, 779, 83, 321, 676, 97, 260, 845, 591, 818, 27, 206, 629, 797, 387, 793, 992, 727, 622, 34, 557, 661, 279, 420, 823, 834, 90, 761, 220, 391, 579, 926, 118, 451, 961, 431, 938, 349, 774, 563, 534, 446, 405, 484, 492, 731, 682, 341, 242, 714, 235, 571, 134, 290, 54, 412, 596, 140, 439, 984, 179, 996, 968, 810, 944, 539, 8, 616, 1021, 152, 22, 400, 306, 887, 516, 162, 511, 38, 770, 50, 256, 275, 312, 755, 18, 648, 652, 367, 211, 460, 554, 217, 673, 626, 131, 176, 935, 489, 15, 670, 1018, 508, 316, 426, 1015, 608, 43, 523, 611, 665, 851, 897, 192, 640, 204, 791, 319, 843, 759, 924, 32, 418, 482, 339, 429, 561, 982, 808, 569, 410, 160, 48, 614, 398, 215, 174, 753, 365, 521, 895, 668, 424, 337, 806, 789, 922, 893, 804, 46, 172, 464, 872, 802, 870, 104, 466, 526, 283, 909, 874, 706, 736, 330, 528, 195, 380, 656, 285, 1009, 1003, 167, 106, 643, 838, 266, 468, 297, 66, 699, 708, 854, 111, 371, 738, 634, 60, 863, 911, 900, 827, 356, 876, 780, 497, 84, 197, 322, 74, 677, 382, 98, 548, 261, 332, 846, 765, 592, 530, 819, 587, 28, 1011, 207, 302, 630, 1005, 798, 749, 388, 658, 794, 94, 993, 287, 728, 346, 623, 645, 35, 149, 558, 840, 662, 505, 280, 169, 421, 395, 824, 108, 835, 377, 91, 299, 762, 71, 221, 68, 392, 146, 580, 268, 927, 224, 119, 470, 452, 687, 962, 856, 432, 227, 939, 113, 350, 976, 775, 701, 564, 930, 535, 710, 447, 723, 406, 636, 485, 271, 493, 62, 732, 918, 683, 373, 342, 583, 243, 740, 715, 719, 236, 902, 572, 690, 135, 829, 291, 186, 55, 865, 413, 455, 597, 913, 141, 744, 440, 782, 985, 473, 180, 499, 997, 602, 969, 358, 811, 122, 945, 878, 540, 247, 9, 324, 617, 230, 1022, 76, 153, 955, 23, 86, 401, 435, 307, 199, 888, 694, 517, 100, 163, 859, 512, 550, 39, 478, 771, 679, 51, 965, 257, 384, 276, 576, 313, 848, 756, 979, 19, 767, 649, 128, 653, 263, 368, 353, 212, 334, 461, 906, 555, 821, 218, 116, 674, 589, 627, 990, 132, 594, 177, 942, 936, 532, 490, 240, 16, 209, 671, 933, 1019, 304, 509, 254, 317, 30, 427, 567, 1016, 1013, 609, 190, 44, 800, 524, 704, 612, 751, 666, 787, 852, 632, 898, 778, 193, 1007, 641, 295, 205, 796, 792, 726, 320, 96, 844, 817, 760, 390, 925, 450, 33, 660, 419, 833, 483, 730, 340, 713, 430, 348, 562, 445, 983, 995, 809, 538, 570, 289, 411, 139, 161, 37, 49, 274, 615, 151, 399, 886, 216, 625, 175, 488, 754, 647, 366, 459, 522, 664, 896, 639, 669, 507, 425, 607, 338, 560, 807, 409, 790, 842, 923, 417, 894, 423, 805, 921, 47, 397, 173, 364, 465, 282, 873, 735, 803, 171, 871, 869, 105, 837, 467, 65, 527, 379, 284, 1002, 910, 826, 875, 496, 707, 110, 737, 59, 331, 764, 529, 586, 196, 73, 381, 547, 657, 93, 286, 345, 1010, 301, 1004, 748, 168, 394, 107, 376, 644, 148, 839, 504, 267, 223, 469, 686, 298, 70, 67, 145, 700, 929, 709, 722, 855, 226, 112, 975, 372, 582, 739, 718, 635, 270, 61, 917, 864, 454, 912, 743, 901, 689, 828, 185, 357, 121, 877, 246, 781, 472, 498, 601, 85, 434, 198, 693, 323, 229, 75, 954, 678, 964, 383, 575, 99, 858, 549, 477, 262, 352, 333, 905, 847, 978, 766, 127, 593, 941, 531, 239, 820, 115, 588, 989, 29, 566, 1012, 189, 208, 932, 303, 253, 631, 777, 1006, 294, 799, 703, 750, 786, 389, 449, 659, 832, 795, 725, 95, 816, 994, 537, 288, 138, 729, 712, 347, 444, 624, 487,
|
||
|
};
|
||
|
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Returns the integer i such that elt = a^i
|
||
|
* where a is the primitive element of GF(2^GF_M).
|
||
|
*@returns the logarithm of the given element
|
||
|
*/
|
||
|
uint16_t PQCLEAN_HQC256_AVX2_gf_log(uint16_t elt) {
|
||
|
return log[elt];
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Reduces polynomial x modulo primitive polynomial GF_POLY.
|
||
|
* @returns x mod GF_POLY
|
||
|
* @param[in] x Polynomial of degree less than 64
|
||
|
* @param[in] deg_x The degree of polynomial x
|
||
|
*/
|
||
|
uint16_t gf_reduce(uint64_t x, size_t deg_x) {
|
||
|
// Compute the distance between the primitive polynomial first two set bits
|
||
|
size_t lz1 = __builtin_clz(PARAM_GF_POLY);
|
||
|
size_t lz2 = __builtin_clz(PARAM_GF_POLY ^ 1 << PARAM_M);
|
||
|
size_t dist = lz2 - lz1;
|
||
|
|
||
|
// Deduce the number of steps of reduction
|
||
|
size_t steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), dist);
|
||
|
|
||
|
// Reduce
|
||
|
for (size_t i = 0; i < steps; ++i) {
|
||
|
uint64_t mod = x >> PARAM_M;
|
||
|
x &= (1 << PARAM_M) - 1;
|
||
|
x ^= mod;
|
||
|
|
||
|
size_t tz1 = 0;
|
||
|
uint16_t rmdr = PARAM_GF_POLY ^ 1;
|
||
|
for (size_t j = __builtin_popcount(PARAM_GF_POLY) - 2; j; --j) {
|
||
|
size_t tz2 = __builtin_ctz(rmdr);
|
||
|
size_t shift = tz2 - tz1;
|
||
|
mod <<= shift;
|
||
|
x ^= mod;
|
||
|
rmdr ^= 1 << tz2;
|
||
|
tz1 = tz2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return x;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Multiplies two elements of GF(2^GF_M).
|
||
|
* @returns the product a*b
|
||
|
* @param[in] a Element of GF(2^GF_M)
|
||
|
* @param[in] b Element of GF(2^GF_M)
|
||
|
*/
|
||
|
uint16_t PQCLEAN_HQC256_AVX2_gf_mul(uint16_t a, uint16_t b) {
|
||
|
__m128i va = _mm_cvtsi32_si128(a);
|
||
|
__m128i vb = _mm_cvtsi32_si128(b);
|
||
|
__m128i vab = _mm_clmulepi64_si128(va, vb, 0);
|
||
|
uint32_t ab = _mm_cvtsi128_si32(vab);
|
||
|
|
||
|
return gf_reduce(ab, 2 * (PARAM_M - 1));
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Squares an element of GF(2^GF_M).
|
||
|
* @returns a^2
|
||
|
* @param[in] a Element of GF(2^GF_M)
|
||
|
*/
|
||
|
uint16_t PQCLEAN_HQC256_AVX2_gf_square(uint16_t a) {
|
||
|
uint32_t b = a;
|
||
|
uint32_t s = b & 1;
|
||
|
for (size_t i = 1; i < PARAM_M; ++i) {
|
||
|
b <<= 1;
|
||
|
s ^= b & (1 << 2 * i);
|
||
|
}
|
||
|
|
||
|
return gf_reduce(s, 2 * (PARAM_M - 1));
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Computes the 4th power of an element of GF(2^GF_M).
|
||
|
* @returns a^4
|
||
|
* @param[in] a Element of GF(2^GF_M)
|
||
|
*/
|
||
|
uint16_t gf_quad(uint64_t a) {
|
||
|
uint64_t q = a & 1;
|
||
|
for (size_t i = 1; i < PARAM_M; ++i) {
|
||
|
a <<= 3;
|
||
|
q ^= a & (1ull << 4 * i);
|
||
|
}
|
||
|
|
||
|
return gf_reduce(q, 4 * (PARAM_M - 1));
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Computes the inverse of an element of GF(2^10),
|
||
|
* using a shorter chain of squares and multiplications than fast exponentiation.
|
||
|
* @returns the inverse of a
|
||
|
* @param[in] a Element of GF(2^10)
|
||
|
*/
|
||
|
uint16_t PQCLEAN_HQC256_AVX2_gf_inverse(uint16_t a) {
|
||
|
uint16_t p;
|
||
|
uint16_t a2;
|
||
|
|
||
|
a2 = PQCLEAN_HQC256_AVX2_gf_square(a); // a^2
|
||
|
a = PQCLEAN_HQC256_AVX2_gf_mul(a2, a); // a^2.a
|
||
|
p = gf_quad(a); // a^8.a^4
|
||
|
a = PQCLEAN_HQC256_AVX2_gf_mul(p, a); // a^8.a^4.a^2.a
|
||
|
p = gf_quad(a); // a^32.a^16.a^8.a^4
|
||
|
p = gf_quad(p); // a^128.a^64.a^32.a^16
|
||
|
a = PQCLEAN_HQC256_AVX2_gf_mul(p, a); // a^128.a^64.a^32.a^16.a^8.a^4.a^2.a
|
||
|
p = gf_quad(a); // a^512.a^256.a^128.a^64.a^32.a^16.a^8.a^4
|
||
|
p = PQCLEAN_HQC256_AVX2_gf_mul(p, a2); // a^-1
|
||
|
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/**
|
||
|
* Returns i modulo 2^GF_M-1.
|
||
|
* i must be less than 2*(2^GF_M-1).
|
||
|
* Therefore, the return value is either i or i-2^GF_M+1.
|
||
|
* @returns i mod (2^GF_M-1)
|
||
|
* @param[in] i The integer whose modulo is taken
|
||
|
*/
|
||
|
uint16_t PQCLEAN_HQC256_AVX2_gf_mod(uint16_t i) {
|
||
|
uint16_t tmp = i - PARAM_GF_MUL_ORDER;
|
||
|
|
||
|
// mask = 0xffff if (i < GF_MUL_ORDER)
|
||
|
int16_t mask = -(tmp >> 15);
|
||
|
|
||
|
return tmp + (mask & PARAM_GF_MUL_ORDER);
|
||
|
}
|