pqc/crypto_kem/hqc-256/avx2/gf.c

#include "gf.h"
#include "parameters.h"
#include <emmintrin.h>
#include <immintrin.h>
#include <stdint.h>
/**
 * @file gf.c
 * Galois field implementation with multiplication using the pclmulqdq instruction
 */


static uint16_t gf_reduce(uint64_t x, size_t deg_x);
static uint16_t gf_quad(uint64_t a);


/**
 * Logarithm of elements of GF(2^10) to the base alpha (root of x^10 + x^3 + 1).
 * The logarithm of 0 is set to 1024 by convention.
 */
static const uint16_t log[1024] = {
    1024, 0, 1, 77, 2, 154, 78, 956, 3, 10, 155, 325, 79, 618, 957, 231, 4, 308, 11, 200, 156, 889, 326, 695, 80, 24, 619, 87, 958, 402, 232, 436, 5, 513, 309, 551, 12, 40, 201, 479, 157, 518, 890, 101, 327, 164, 696, 860, 81, 258, 25, 385, 620, 277, 88, 577, 959, 772, 403, 680, 233, 52, 437, 966, 6, 20, 514, 768, 310, 650, 552, 129, 13, 314, 41, 849, 202, 757, 480, 980, 158, 213, 519, 335, 891, 462, 102, 907, 328, 654, 165, 264, 697, 369, 861, 354, 82, 675, 259, 590, 26, 628, 386, 991, 621, 556, 278, 822, 89, 219, 578, 117, 960, 937, 773, 533, 404, 491, 681, 241, 234, 133, 53, 595, 438, 178, 967, 943, 7, 1020, 21, 305, 515, 510, 769, 255, 311, 17, 651, 210, 553, 672, 130, 934, 14, 1017, 315, 1014, 42, 610, 850, 191, 203, 318, 758, 31, 481, 428, 981, 568, 159, 613, 214, 752, 520, 667, 336, 788, 892, 45, 463, 801, 103, 525, 908, 705, 329, 194, 655, 1008, 166, 642, 265, 296, 698, 853, 370, 633, 862, 899, 355, 779, 83, 321, 676, 97, 260, 845, 591, 818, 27, 206, 629, 797, 387, 793, 992, 727, 622, 34, 557, 661, 279, 420, 823, 834, 90, 761, 220, 391, 579, 926, 118, 451, 961, 431, 938, 349, 774, 563, 534, 446, 405, 484, 492, 731, 682, 341, 242, 714, 235, 571, 134, 290, 54, 412, 596, 140, 439, 984, 179, 996, 968, 810, 944, 539, 8, 616, 1021, 152, 22, 400, 306, 887, 516, 162, 511, 38, 770, 50, 256, 275, 312, 755, 18, 648, 652, 367, 211, 460, 554, 217, 673, 626, 131, 176, 935, 489, 15, 670, 1018, 508, 316, 426, 1015, 608, 43, 523, 611, 665, 851, 897, 192, 640, 204, 791, 319, 843, 759, 924, 32, 418, 482, 339, 429, 561, 982, 808, 569, 410, 160, 48, 614, 398, 215, 174, 753, 365, 521, 895, 668, 424, 337, 806, 789, 922, 893, 804, 46, 172, 464, 872, 802, 870, 104, 466, 526, 283, 909, 874, 706, 736, 330, 528, 195, 380, 656, 285, 1009, 1003, 167, 106, 643, 838, 266, 468, 297, 66, 699, 708, 854, 111, 371, 738, 634, 60, 863, 911, 900, 827, 356, 876, 780, 497, 84, 197, 322, 74, 677, 382, 98, 548, 261, 332, 846, 765, 592, 530, 819, 587, 28, 1011, 207, 302, 630, 1005, 798, 749, 388, 658, 794, 94, 993, 287, 728, 346, 623, 645, 35, 149, 558, 840, 662, 505, 280, 169, 421, 395, 824, 108, 835, 377, 91, 299, 762, 71, 221, 68, 392, 146, 580, 268, 927, 224, 119, 470, 452, 687, 962, 856, 432, 227, 939, 113, 350, 976, 775, 701, 564, 930, 535, 710, 447, 723, 406, 636, 485, 271, 493, 62, 732, 918, 683, 373, 342, 583, 243, 740, 715, 719, 236, 902, 572, 690, 135, 829, 291, 186, 55, 865, 413, 455, 597, 913, 141, 744, 440, 782, 985, 473, 180, 499, 997, 602, 969, 358, 811, 122, 945, 878, 540, 247, 9, 324, 617, 230, 1022, 76, 153, 955, 23, 86, 401, 435, 307, 199, 888, 694, 517, 100, 163, 859, 512, 550, 39, 478, 771, 679, 51, 965, 257, 384, 276, 576, 313, 848, 756, 979, 19, 767, 649, 128, 653, 263, 368, 353, 212, 334, 461, 906, 555, 821, 218, 116, 674, 589, 627, 990, 132, 594, 177, 942, 936, 532, 490, 240, 16, 209, 671, 933, 1019, 304, 509, 254, 317, 30, 427, 567, 1016, 1013, 609, 190, 44, 800, 524, 704, 612, 751, 666, 787, 852, 632, 898, 778, 193, 1007, 641, 295, 205, 796, 792, 726, 320, 96, 844, 817, 760, 390, 925, 450, 33, 660, 419, 833, 483, 730, 340, 713, 430, 348, 562, 445, 983, 995, 809, 538, 570, 289, 411, 139, 161, 37, 49, 274, 615, 151, 399, 886, 216, 625, 175, 488, 754, 647, 366, 459, 522, 664, 896, 639, 669, 507, 425, 607, 338, 560, 807, 409, 790, 842, 923, 417, 894, 423, 805, 921, 47, 397, 173, 364, 465, 282, 873, 735, 803, 171, 871, 869, 105, 837, 467, 65, 527, 379, 284, 1002, 910, 826, 875, 496, 707, 110, 737, 59, 331, 764, 529, 586, 196, 73, 381, 547, 657, 93, 286, 345, 1010, 301, 1004, 748, 168, 394, 107, 376, 644, 148, 839, 504, 267, 223, 469, 686, 298, 70, 67, 145, 700, 929, 709, 722, 855, 226, 112, 975, 372, 582, 739, 718, 635, 270, 61, 917, 864, 454, 912, 743, 901, 689, 828, 185, 357, 121, 877, 246, 781, 472, 498, 601, 85, 434, 198, 693, 323, 229, 75, 954, 678, 964, 383, 575, 99, 858, 549, 477, 262, 352, 333, 905, 847, 978, 766, 127, 593, 941, 531, 239, 820, 115, 588, 989, 29, 566, 1012, 189, 208, 932, 303, 253, 631, 777, 1006, 294, 799, 703, 750, 786, 389, 449, 659, 832, 795, 725, 95, 816, 994, 537, 288, 138, 729, 712, 347, 444, 624, 487, 
};


/**
 * Returns the integer i such that elt = a^i
 * where a is the primitive element of GF(2^GF_M).
 *@returns the logarithm of the given element
 */
uint16_t PQCLEAN_HQC256_AVX2_gf_log(uint16_t elt) {
    return log[elt];
}


/**
 * Reduces polynomial x modulo primitive polynomial GF_POLY.
 * @returns x mod GF_POLY
 * @param[in] x Polynomial of degree less than 64
 * @param[in] deg_x The degree of polynomial x
 */
uint16_t gf_reduce(uint64_t x, size_t deg_x) {
    // Compute the distance between the primitive polynomial first two set bits
    size_t lz1 = __builtin_clz(PARAM_GF_POLY);
    size_t lz2 = __builtin_clz(PARAM_GF_POLY ^ 1 << PARAM_M);
    size_t dist = lz2 - lz1;

    // Deduce the number of steps of reduction
    size_t steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), dist);

    // Reduce
    for (size_t i = 0; i < steps; ++i) {
        uint64_t mod = x >> PARAM_M;
        x &= (1 << PARAM_M) - 1;
        x ^= mod;

        size_t tz1 = 0;
        uint16_t rmdr = PARAM_GF_POLY ^ 1;
        for (size_t j = __builtin_popcount(PARAM_GF_POLY) - 2; j; --j) {
            size_t tz2 = __builtin_ctz(rmdr);
            size_t shift = tz2 - tz1;
            mod <<= shift;
            x ^= mod;
            rmdr ^= 1 << tz2;
            tz1 = tz2;
        }
    }

    return x;
}


/**
 * Multiplies two elements of GF(2^GF_M).
 * @returns the product a*b
 * @param[in] a Element of GF(2^GF_M)
 * @param[in] b Element of GF(2^GF_M)
 */
uint16_t PQCLEAN_HQC256_AVX2_gf_mul(uint16_t a, uint16_t b) {
    __m128i va = _mm_cvtsi32_si128(a);
    __m128i vb = _mm_cvtsi32_si128(b);
    __m128i vab = _mm_clmulepi64_si128(va, vb, 0);
    uint32_t ab = _mm_cvtsi128_si32(vab);

    return gf_reduce(ab, 2 * (PARAM_M - 1));
}


/**
 * Squares an element of GF(2^GF_M).
 * @returns a^2
 * @param[in] a Element of GF(2^GF_M)
 */
uint16_t PQCLEAN_HQC256_AVX2_gf_square(uint16_t a) {
    uint32_t b = a;
    uint32_t s = b & 1;
    for (size_t i = 1; i < PARAM_M; ++i) {
        b <<= 1;
        s ^= b & (1 << 2 * i);
    }

    return gf_reduce(s, 2 * (PARAM_M - 1));
}


/**
 * Computes the 4th power of an element of GF(2^GF_M).
 * @returns a^4
 * @param[in] a Element of GF(2^GF_M)
 */
uint16_t gf_quad(uint64_t a) {
    uint64_t q = a & 1;
    for (size_t i = 1; i < PARAM_M; ++i) {
        a <<= 3;
        q ^= a & (1ull << 4 * i);
    }

    return gf_reduce(q, 4 * (PARAM_M - 1));
}


/**
 * Computes the inverse of an element of GF(2^10),
 * using a shorter chain of squares and multiplications than fast exponentiation.
 * @returns the inverse of a
 * @param[in] a Element of GF(2^10)
 */
uint16_t PQCLEAN_HQC256_AVX2_gf_inverse(uint16_t a) {
    uint16_t p;
    uint16_t a2;

    a2 = PQCLEAN_HQC256_AVX2_gf_square(a);  // a^2
    a = PQCLEAN_HQC256_AVX2_gf_mul(a2, a);  // a^2.a
    p = gf_quad(a);     // a^8.a^4
    a = PQCLEAN_HQC256_AVX2_gf_mul(p, a);   // a^8.a^4.a^2.a
    p = gf_quad(a);     // a^32.a^16.a^8.a^4
    p = gf_quad(p);     // a^128.a^64.a^32.a^16
    a = PQCLEAN_HQC256_AVX2_gf_mul(p, a);   // a^128.a^64.a^32.a^16.a^8.a^4.a^2.a
    p = gf_quad(a);     // a^512.a^256.a^128.a^64.a^32.a^16.a^8.a^4
    p = PQCLEAN_HQC256_AVX2_gf_mul(p, a2);  // a^-1

    return p;
}


/**
 * Returns i modulo 2^GF_M-1.
 * i must be less than 2*(2^GF_M-1).
 * Therefore, the return value is either i or i-2^GF_M+1.
 * @returns i mod (2^GF_M-1)
 * @param[in] i The integer whose modulo is taken
 */
uint16_t PQCLEAN_HQC256_AVX2_gf_mod(uint16_t i) {
    uint16_t tmp = i - PARAM_GF_MUL_ORDER;

    // mask = 0xffff if (i < GF_MUL_ORDER)
    int16_t mask = -(tmp >> 15);

    return tmp + (mask & PARAM_GF_MUL_ORDER);
}
New HQC and HQC-RMRS from upstream 2020-09-07 19:23:34 +01:00			`#include "gf.h"`
			`#include "parameters.h"`
			`#include <emmintrin.h>`
			`#include <immintrin.h>`
			`#include <stdint.h>`
			`/**`
			`* @file gf.c`
			`* Galois field implementation with multiplication using the pclmulqdq instruction`
			`*/`


			`static uint16_t gf_reduce(uint64_t x, size_t deg_x);`
			`static uint16_t gf_quad(uint64_t a);`



			`/**`
			`* Logarithm of elements of GF(2^10) to the base alpha (root of x^10 + x^3 + 1).`
			`* The logarithm of 0 is set to 1024 by convention.`
			`*/`
			`static const uint16_t log[1024] = {`
			1024, 0, 1, 77, 2, 154, 78, 956, 3, 10, 155, 325, 79, 618, 957, 231, 4, 308, 11, 200, 156, 889, 326, 695, 80, 24, 619, 87, 958, 402, 232, 436, 5, 513, 309, 551, 12, 40, 201, 479, 157, 518, 890, 101, 327, 164, 696, 860, 81, 258, 25, 385, 620, 277, 88, 577, 959, 772, 403, 680, 233, 52, 437, 966, 6, 20, 514, 768, 310, 650, 552, 129, 13, 314, 41, 849, 202, 757, 480, 980, 158, 213, 519, 335, 891, 462, 102, 907, 328, 654, 165, 264, 697, 369, 861, 354, 82, 675, 259, 590, 26, 628, 386, 991, 621, 556, 278, 822, 89, 219, 578, 117, 960, 937, 773, 533, 404, 491, 681, 241, 234, 133, 53, 595, 438, 178, 967, 943, 7, 1020, 21, 305, 515, 510, 769, 255, 311, 17, 651, 210, 553, 672, 130, 934, 14, 1017, 315, 1014, 42, 610, 850, 191, 203, 318, 758, 31, 481, 428, 981, 568, 159, 613, 214, 752, 520, 667, 336, 788, 892, 45, 463, 801, 103, 525, 908, 705, 329, 194, 655, 1008, 166, 642, 265, 296, 698, 853, 370, 633, 862, 899, 355, 779, 83, 321, 676, 97, 260, 845, 591, 818, 27, 206, 629, 797, 387, 793, 992, 727, 622, 34, 557, 661, 279, 420, 823, 834, 90, 761, 220, 391, 579, 926, 118, 451, 961, 431, 938, 349, 774, 563, 534, 446, 405, 484, 492, 731, 682, 341, 242, 714, 235, 571, 134, 290, 54, 412, 596, 140, 439, 984, 179, 996, 968, 810, 944, 539, 8, 616, 1021, 152, 22, 400, 306, 887, 516, 162, 511, 38, 770, 50, 256, 275, 312, 755, 18, 648, 652, 367, 211, 460, 554, 217, 673, 626, 131, 176, 935, 489, 15, 670, 1018, 508, 316, 426, 1015, 608, 43, 523, 611, 665, 851, 897, 192, 640, 204, 791, 319, 843, 759, 924, 32, 418, 482, 339, 429, 561, 982, 808, 569, 410, 160, 48, 614, 398, 215, 174, 753, 365, 521, 895, 668, 424, 337, 806, 789, 922, 893, 804, 46, 172, 464, 872, 802, 870, 104, 466, 526, 283, 909, 874, 706, 736, 330, 528, 195, 380, 656, 285, 1009, 1003, 167, 106, 643, 838, 266, 468, 297, 66, 699, 708, 854, 111, 371, 738, 634, 60, 863, 911, 900, 827, 356, 876, 780, 497, 84, 197, 322, 74, 677, 382, 98, 548, 261, 332, 846, 765, 592, 530, 819, 587, 28, 1011, 207, 302, 630, 1005, 798, 749, 388, 658, 794, 94, 993, 287, 728, 346, 623, 645, 35, 149, 558, 840, 662, 505, 280, 169, 421, 395, 824, 108, 835, 377, 91, 299, 762, 71, 221, 68, 392, 146, 580, 268, 927, 224, 119, 470, 452, 687, 962, 856, 432, 227, 939, 113, 350, 976, 775, 701, 564, 930, 535, 710, 447, 723, 406, 636, 485, 271, 493, 62, 732, 918, 683, 373, 342, 583, 243, 740, 715, 719, 236, 902, 572, 690, 135, 829, 291, 186, 55, 865, 413, 455, 597, 913, 141, 744, 440, 782, 985, 473, 180, 499, 997, 602, 969, 358, 811, 122, 945, 878, 540, 247, 9, 324, 617, 230, 1022, 76, 153, 955, 23, 86, 401, 435, 307, 199, 888, 694, 517, 100, 163, 859, 512, 550, 39, 478, 771, 679, 51, 965, 257, 384, 276, 576, 313, 848, 756, 979, 19, 767, 649, 128, 653, 263, 368, 353, 212, 334, 461, 906, 555, 821, 218, 116, 674, 589, 627, 990, 132, 594, 177, 942, 936, 532, 490, 240, 16, 209, 671, 933, 1019, 304, 509, 254, 317, 30, 427, 567, 1016, 1013, 609, 190, 44, 800, 524, 704, 612, 751, 666, 787, 852, 632, 898, 778, 193, 1007, 641, 295, 205, 796, 792, 726, 320, 96, 844, 817, 760, 390, 925, 450, 33, 660, 419, 833, 483, 730, 340, 713, 430, 348, 562, 445, 983, 995, 809, 538, 570, 289, 411, 139, 161, 37, 49, 274, 615, 151, 399, 886, 216, 625, 175, 488, 754, 647, 366, 459, 522, 664, 896, 639, 669, 507, 425, 607, 338, 560, 807, 409, 790, 842, 923, 417, 894, 423, 805, 921, 47, 397, 173, 364, 465, 282, 873, 735, 803, 171, 871, 869, 105, 837, 467, 65, 527, 379, 284, 1002, 910, 826, 875, 496, 707, 110, 737, 59, 331, 764, 529, 586, 196, 73, 381, 547, 657, 93, 286, 345, 1010, 301, 1004, 748, 168, 394, 107, 376, 644, 148, 839, 504, 267, 223, 469, 686, 298, 70, 67, 145, 700, 929, 709, 722, 855, 226, 112, 975, 372, 582, 739, 718, 635, 270, 61, 917, 864, 454, 912, 743, 901, 689, 828, 185, 357, 121, 877, 246, 781, 472, 498, 601, 85, 434, 198, 693, 323, 229, 75, 954, 678, 964, 383, 575, 99, 858, 549, 477, 262, 352, 333, 905, 847, 978, 766, 127, 593, 941, 531, 239, 820, 115, 588, 989, 29, 566, 1012, 189, 208, 932, 303, 253, 631, 777, 1006, 294, 799, 703, 750, 786, 389, 449, 659, 832, 795, 725, 95, 816, 994, 537, 288, 138, 729, 712, 347, 444, 624, 487,
			`};`



			`/**`
			`* Returns the integer i such that elt = a^i`
			`* where a is the primitive element of GF(2^GF_M).`
			`*@returns the logarithm of the given element`
			`*/`
			`uint16_t PQCLEAN_HQC256_AVX2_gf_log(uint16_t elt) {`
			`return log[elt];`
			`}`



			`/**`
			`* Reduces polynomial x modulo primitive polynomial GF_POLY.`
			`* @returns x mod GF_POLY`
			`* @param[in] x Polynomial of degree less than 64`
			`* @param[in] deg_x The degree of polynomial x`
			`*/`
			`uint16_t gf_reduce(uint64_t x, size_t deg_x) {`
			`// Compute the distance between the primitive polynomial first two set bits`
			`size_t lz1 = __builtin_clz(PARAM_GF_POLY);`
			`size_t lz2 = __builtin_clz(PARAM_GF_POLY ^ 1 << PARAM_M);`
			`size_t dist = lz2 - lz1;`

			`// Deduce the number of steps of reduction`
			`size_t steps = CEIL_DIVIDE(deg_x - (PARAM_M - 1), dist);`

			`// Reduce`
			`for (size_t i = 0; i < steps; ++i) {`
			`uint64_t mod = x >> PARAM_M;`
			`x &= (1 << PARAM_M) - 1;`
			`x ^= mod;`

			`size_t tz1 = 0;`
			`uint16_t rmdr = PARAM_GF_POLY ^ 1;`
			`for (size_t j = __builtin_popcount(PARAM_GF_POLY) - 2; j; --j) {`
			`size_t tz2 = __builtin_ctz(rmdr);`
			`size_t shift = tz2 - tz1;`
			`mod <<= shift;`
			`x ^= mod;`
			`rmdr ^= 1 << tz2;`
			`tz1 = tz2;`
			`}`
			`}`

			`return x;`
			`}`



			`/**`
			`* Multiplies two elements of GF(2^GF_M).`
			`* @returns the product a*b`
			`* @param[in] a Element of GF(2^GF_M)`
			`* @param[in] b Element of GF(2^GF_M)`
			`*/`
			`uint16_t PQCLEAN_HQC256_AVX2_gf_mul(uint16_t a, uint16_t b) {`
			`__m128i va = _mm_cvtsi32_si128(a);`
			`__m128i vb = _mm_cvtsi32_si128(b);`
			`__m128i vab = _mm_clmulepi64_si128(va, vb, 0);`
			`uint32_t ab = _mm_cvtsi128_si32(vab);`

			`return gf_reduce(ab, 2 * (PARAM_M - 1));`
			`}`



			`/**`
			`* Squares an element of GF(2^GF_M).`
			`* @returns a^2`
			`* @param[in] a Element of GF(2^GF_M)`
			`*/`
			`uint16_t PQCLEAN_HQC256_AVX2_gf_square(uint16_t a) {`
			`uint32_t b = a;`
			`uint32_t s = b & 1;`
			`for (size_t i = 1; i < PARAM_M; ++i) {`
			`b <<= 1;`
			`s ^= b & (1 << 2 * i);`
			`}`

			`return gf_reduce(s, 2 * (PARAM_M - 1));`
			`}`



			`/**`
			`* Computes the 4th power of an element of GF(2^GF_M).`
			`* @returns a^4`
			`* @param[in] a Element of GF(2^GF_M)`
			`*/`
			`uint16_t gf_quad(uint64_t a) {`
			`uint64_t q = a & 1;`
			`for (size_t i = 1; i < PARAM_M; ++i) {`
			`a <<= 3;`
			`q ^= a & (1ull << 4 * i);`
			`}`

			`return gf_reduce(q, 4 * (PARAM_M - 1));`
			`}`



			`/**`
			`* Computes the inverse of an element of GF(2^10),`
			`* using a shorter chain of squares and multiplications than fast exponentiation.`
			`* @returns the inverse of a`
			`* @param[in] a Element of GF(2^10)`
			`*/`
			`uint16_t PQCLEAN_HQC256_AVX2_gf_inverse(uint16_t a) {`
			`uint16_t p;`
			`uint16_t a2;`

			`a2 = PQCLEAN_HQC256_AVX2_gf_square(a); // a^2`
			`a = PQCLEAN_HQC256_AVX2_gf_mul(a2, a); // a^2.a`
			`p = gf_quad(a); // a^8.a^4`
			`a = PQCLEAN_HQC256_AVX2_gf_mul(p, a); // a^8.a^4.a^2.a`
			`p = gf_quad(a); // a^32.a^16.a^8.a^4`
			`p = gf_quad(p); // a^128.a^64.a^32.a^16`
			`a = PQCLEAN_HQC256_AVX2_gf_mul(p, a); // a^128.a^64.a^32.a^16.a^8.a^4.a^2.a`
			`p = gf_quad(a); // a^512.a^256.a^128.a^64.a^32.a^16.a^8.a^4`
			`p = PQCLEAN_HQC256_AVX2_gf_mul(p, a2); // a^-1`

			`return p;`
			`}`



			`/**`
			`* Returns i modulo 2^GF_M-1.`
			`* i must be less than 2*(2^GF_M-1).`
			`* Therefore, the return value is either i or i-2^GF_M+1.`
			`* @returns i mod (2^GF_M-1)`
			`* @param[in] i The integer whose modulo is taken`
			`*/`
			`uint16_t PQCLEAN_HQC256_AVX2_gf_mod(uint16_t i) {`
			`uint16_t tmp = i - PARAM_GF_MUL_ORDER;`

			`// mask = 0xffff if (i < GF_MUL_ORDER)`
			`int16_t mask = -(tmp >> 15);`

			`return tmp + (mask & PARAM_GF_MUL_ORDER);`
			`}`