Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

thash_sha256_robustx8.c 13 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. #include <stdint.h>
  2. #include <string.h>
  3. #include "address.h"
  4. #include "params.h"
  5. #include "sha256.h"
  6. #include "sha256avx.h"
  7. #include "sha256x8.h"
  8. #include "thashx8.h"
  9. #include "utils.h"
  10. /**
  11. * 8-way parallel version of thash; takes 8x as much input and output
  12. */
  13. static void thashx8(uint8_t *out0,
  14. uint8_t *out1,
  15. uint8_t *out2,
  16. uint8_t *out3,
  17. uint8_t *out4,
  18. uint8_t *out5,
  19. uint8_t *out6,
  20. uint8_t *out7,
  21. const uint8_t *in0,
  22. const uint8_t *in1,
  23. const uint8_t *in2,
  24. const uint8_t *in3,
  25. const uint8_t *in4,
  26. const uint8_t *in5,
  27. const uint8_t *in6,
  28. const uint8_t *in7,
  29. unsigned int inblocks,
  30. const uint8_t *pub_seed,
  31. uint32_t addrx8[8 * 8],
  32. uint8_t *bufx8,
  33. uint8_t *bitmaskx8,
  34. const hash_state *state_seeded) {
  35. unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES];
  36. unsigned int i;
  37. sha256ctxx8 ctx;
  38. (void)pub_seed; /* Suppress an 'unused parameter' warning. */
  39. for (i = 0; i < 8; i++) {
  40. memcpy(bufx8 + i * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  41. pub_seed, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
  42. PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N +
  43. i * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  44. addrx8 + i * 8);
  45. }
  46. PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_mgf1x8(bitmaskx8, inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N,
  47. bufx8 + 0 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  48. bufx8 + 1 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  49. bufx8 + 2 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  50. bufx8 + 3 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  51. bufx8 + 4 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  52. bufx8 + 5 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  53. bufx8 + 6 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  54. bufx8 + 7 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  55. PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES
  56. );
  57. PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_clone_statex8(&ctx, &state_seeded->x8);
  58. for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; i++) {
  59. bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i +
  60. 0 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] =
  61. in0[i] ^ bitmaskx8[i + 0 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)];
  62. bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i +
  63. 1 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] =
  64. in1[i] ^ bitmaskx8[i + 1 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)];
  65. bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i +
  66. 2 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] =
  67. in2[i] ^ bitmaskx8[i + 2 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)];
  68. bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i +
  69. 3 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] =
  70. in3[i] ^ bitmaskx8[i + 3 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)];
  71. bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i +
  72. 4 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] =
  73. in4[i] ^ bitmaskx8[i + 4 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)];
  74. bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i +
  75. 5 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] =
  76. in5[i] ^ bitmaskx8[i + 5 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)];
  77. bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i +
  78. 6 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] =
  79. in6[i] ^ bitmaskx8[i + 6 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)];
  80. bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i +
  81. 7 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] =
  82. in7[i] ^ bitmaskx8[i + 7 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)];
  83. }
  84. PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_update8x(&ctx,
  85. bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 0 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  86. bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 1 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  87. bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 2 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  88. bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 3 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  89. bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 4 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  90. bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 5 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  91. bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 6 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  92. bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 7 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
  93. PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
  94. PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_final8x(&ctx,
  95. outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES,
  96. outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES,
  97. outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES,
  98. outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES,
  99. outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES,
  100. outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES,
  101. outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES,
  102. outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES);
  103. memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
  104. memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
  105. memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
  106. memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
  107. memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
  108. memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
  109. memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
  110. memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
  111. }
  112. #define thash_size_variant(name, size) \
  113. void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thashx8_##name(unsigned char *out0, \
  114. unsigned char *out1, \
  115. unsigned char *out2, \
  116. unsigned char *out3, \
  117. unsigned char *out4, \
  118. unsigned char *out5, \
  119. unsigned char *out6, \
  120. unsigned char *out7, \
  121. const unsigned char *in0, \
  122. const unsigned char *in1, \
  123. const unsigned char *in2, \
  124. const unsigned char *in3, \
  125. const unsigned char *in4, \
  126. const unsigned char *in5, \
  127. const unsigned char *in6, \
  128. const unsigned char *in7, \
  129. const unsigned char *pub_seed, \
  130. uint32_t addrx8[8*8], \
  131. const hash_state *state_seeded) { \
  132. const unsigned int inblocks = (size); \
  133. uint8_t bufx8[8*(PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + (size)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)]; \
  134. uint8_t bitmaskx8[8*((size) * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)]; \
  135. thashx8(out0, out1, out2, out3, out4, out5, out6, out7, \
  136. in0, in1, in2, in3, in4, in5, in6, in7, inblocks, \
  137. pub_seed, addrx8, bufx8, bitmaskx8, state_seeded); \
  138. }
  139. thash_size_variant(1, 1)
  140. thash_size_variant(2, 2)
  141. thash_size_variant(WOTS_LEN, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN)
  142. thash_size_variant(FORS_TREES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES)
  143. #undef thash_size_variant