Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.
 
 
 

147 рядки
5.0 KiB

  1. /*
  2. This file is for functions related to 256-bit vectors
  3. including functions for bitsliced field operations
  4. */
  5. #include "vec256.h"
  6. extern void PQCLEAN_MCELIECE460896F_AVX_vec256_mul_asm(vec256 *, vec256 *, const vec256 *);
  7. vec256 PQCLEAN_MCELIECE460896F_AVX_vec256_set1_16b(uint16_t a) {
  8. return _mm256_set1_epi16(a);
  9. }
  10. vec256 PQCLEAN_MCELIECE460896F_AVX_vec256_setzero(void) {
  11. return _mm256_setzero_si256();
  12. }
  13. vec256 PQCLEAN_MCELIECE460896F_AVX_vec256_set4x(uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3) {
  14. return _mm256_set_epi64x(a3, a2, a1, a0);
  15. }
  16. int PQCLEAN_MCELIECE460896F_AVX_vec256_testz(vec256 a) {
  17. return _mm256_testz_si256(a, a);
  18. }
  19. vec256 PQCLEAN_MCELIECE460896F_AVX_vec256_and(vec256 a, vec256 b) {
  20. return _mm256_and_si256(a, b);
  21. }
  22. vec256 PQCLEAN_MCELIECE460896F_AVX_vec256_xor(vec256 a, vec256 b) {
  23. return _mm256_xor_si256(a, b);
  24. }
  25. vec256 PQCLEAN_MCELIECE460896F_AVX_vec256_or(vec256 a, vec256 b) {
  26. return _mm256_or_si256(a, b);
  27. }
  28. vec256 PQCLEAN_MCELIECE460896F_AVX_vec256_sll_4x(vec256 a, int s) {
  29. return _mm256_slli_epi64(a, s);
  30. }
  31. vec256 PQCLEAN_MCELIECE460896F_AVX_vec256_srl_4x(vec256 a, int s) {
  32. return _mm256_srli_epi64(a, s);
  33. }
  34. vec256 PQCLEAN_MCELIECE460896F_AVX_vec256_unpack_low(vec256 a, vec256 b) {
  35. return _mm256_permute2x128_si256 (a, b, 0x20);
  36. }
  37. vec256 PQCLEAN_MCELIECE460896F_AVX_vec256_unpack_high(vec256 a, vec256 b) {
  38. return _mm256_permute2x128_si256 (a, b, 0x31);
  39. }
  40. vec256 PQCLEAN_MCELIECE460896F_AVX_vec256_unpack_low_2x(vec256 a, vec256 b) {
  41. return _mm256_unpacklo_epi64 (a, b);
  42. }
  43. vec256 PQCLEAN_MCELIECE460896F_AVX_vec256_unpack_high_2x(vec256 a, vec256 b) {
  44. return _mm256_unpackhi_epi64 (a, b);
  45. }
  46. vec256 PQCLEAN_MCELIECE460896F_AVX_vec256_or_reduce(const vec256 *a) {
  47. int i;
  48. vec256 ret;
  49. ret = a[0];
  50. for (i = 1; i < GFBITS; i++) {
  51. ret = PQCLEAN_MCELIECE460896F_AVX_vec256_or(ret, a[i]);
  52. }
  53. return ret;
  54. }
  55. void PQCLEAN_MCELIECE460896F_AVX_vec256_copy(vec256 *dest, const vec256 *src) {
  56. int i;
  57. for (i = 0; i < GFBITS; i++) {
  58. dest[i] = src[i];
  59. }
  60. }
  61. void PQCLEAN_MCELIECE460896F_AVX_vec256_mul(vec256 *h, vec256 *f, const vec256 *g) {
  62. PQCLEAN_MCELIECE460896F_AVX_vec256_mul_asm(h, f, g);
  63. }
  64. /* bitsliced field squarings */
  65. void PQCLEAN_MCELIECE460896F_AVX_vec256_sq(vec256 *out, const vec256 *in) {
  66. int i;
  67. vec256 result[GFBITS], t;
  68. t = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(in[11], in[12]);
  69. result[0] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(in[0], in[11]);
  70. result[1] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(in[7], t);
  71. result[2] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(in[1], in[7]);
  72. result[3] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(in[8], t);
  73. result[4] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(in[2], in[7]);
  74. result[4] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(result[4], in[8]);
  75. result[4] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(result[4], t);
  76. result[5] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(in[7], in[9]);
  77. result[6] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(in[3], in[8]);
  78. result[6] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(result[6], in[9]);
  79. result[6] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(result[6], in[12]);
  80. result[7] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(in[8], in[10]);
  81. result[8] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(in[4], in[9]);
  82. result[8] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(result[8], in[10]);
  83. result[9] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(in[9], in[11]);
  84. result[10] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(in[5], in[10]);
  85. result[10] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(result[10], in[11]);
  86. result[11] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(in[10], in[12]);
  87. result[12] = PQCLEAN_MCELIECE460896F_AVX_vec256_xor(in[6], t);
  88. for (i = 0; i < GFBITS; i++) {
  89. out[i] = result[i];
  90. }
  91. }
  92. /* bitsliced field inverses */
  93. void PQCLEAN_MCELIECE460896F_AVX_vec256_inv(vec256 *out, const vec256 *in) {
  94. vec256 tmp_11[ GFBITS ];
  95. vec256 tmp_1111[ GFBITS ];
  96. PQCLEAN_MCELIECE460896F_AVX_vec256_copy(out, in);
  97. PQCLEAN_MCELIECE460896F_AVX_vec256_sq(out, out);
  98. PQCLEAN_MCELIECE460896F_AVX_vec256_mul(tmp_11, out, in); // ^11
  99. PQCLEAN_MCELIECE460896F_AVX_vec256_sq(out, tmp_11);
  100. PQCLEAN_MCELIECE460896F_AVX_vec256_sq(out, out);
  101. PQCLEAN_MCELIECE460896F_AVX_vec256_mul(tmp_1111, out, tmp_11); // ^1111
  102. PQCLEAN_MCELIECE460896F_AVX_vec256_sq(out, tmp_1111);
  103. PQCLEAN_MCELIECE460896F_AVX_vec256_sq(out, out);
  104. PQCLEAN_MCELIECE460896F_AVX_vec256_sq(out, out);
  105. PQCLEAN_MCELIECE460896F_AVX_vec256_sq(out, out);
  106. PQCLEAN_MCELIECE460896F_AVX_vec256_mul(out, out, tmp_1111); // ^11111111
  107. PQCLEAN_MCELIECE460896F_AVX_vec256_sq(out, out);
  108. PQCLEAN_MCELIECE460896F_AVX_vec256_sq(out, out);
  109. PQCLEAN_MCELIECE460896F_AVX_vec256_sq(out, out);
  110. PQCLEAN_MCELIECE460896F_AVX_vec256_sq(out, out);
  111. PQCLEAN_MCELIECE460896F_AVX_vec256_mul(out, out, tmp_1111); // ^111111111111
  112. PQCLEAN_MCELIECE460896F_AVX_vec256_sq(out, out); // ^1111111111110
  113. }