您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 

130 行
8.9 KiB

  1. #include <stdint.h>
  2. #include <string.h>
  3. #include "address.h"
  4. #include "hash_state.h"
  5. #include "params.h"
  6. #include "sha256.h"
  7. #include "sha256avx.h"
  8. #include "sha256x8.h"
  9. #include "thashx8.h"
  10. #include "utils.h"
  11. /**
  12. * 8-way parallel version of thash; takes 8x as much input and output
  13. */
  14. static void thashx8(unsigned char *out0,
  15. unsigned char *out1,
  16. unsigned char *out2,
  17. unsigned char *out3,
  18. unsigned char *out4,
  19. unsigned char *out5,
  20. unsigned char *out6,
  21. unsigned char *out7,
  22. const unsigned char *in0,
  23. const unsigned char *in1,
  24. const unsigned char *in2,
  25. const unsigned char *in3,
  26. const unsigned char *in4,
  27. const unsigned char *in5,
  28. const unsigned char *in6,
  29. const unsigned char *in7, unsigned int inblocks,
  30. const unsigned char *pub_seed, uint32_t addrx8[8 * 8],
  31. uint8_t *bufx8,
  32. const hash_state *state_seeded) {
  33. unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES];
  34. unsigned int i;
  35. sha256ctxx8 ctx;
  36. (void)pub_seed; /* Suppress an 'unused parameter' warning. */
  37. PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_clone_statex8(&ctx, &state_seeded->x8);
  38. for (i = 0; i < 8; i++) {
  39. PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_compress_address(bufx8 + i * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N),
  40. addrx8 + i * 8);
  41. }
  42. memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES +
  43. 0 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), in0, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N);
  44. memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES +
  45. 1 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), in1, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N);
  46. memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES +
  47. 2 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), in2, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N);
  48. memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES +
  49. 3 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), in3, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N);
  50. memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES +
  51. 4 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), in4, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N);
  52. memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES +
  53. 5 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), in5, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N);
  54. memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES +
  55. 6 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), in6, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N);
  56. memcpy(bufx8 + PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES +
  57. 7 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N), in7, inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N);
  58. PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_update8x(&ctx,
  59. bufx8 + 0 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N),
  60. bufx8 + 1 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N),
  61. bufx8 + 2 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N),
  62. bufx8 + 3 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N),
  63. bufx8 + 4 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N),
  64. bufx8 + 5 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N),
  65. bufx8 + 6 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N),
  66. bufx8 + 7 * (PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N),
  67. PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N);
  68. PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_sha256_final8x(&ctx,
  69. outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES,
  70. outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES,
  71. outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES,
  72. outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES,
  73. outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES,
  74. outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES,
  75. outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES,
  76. outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES);
  77. memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N);
  78. memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N);
  79. memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N);
  80. memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N);
  81. memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N);
  82. memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N);
  83. memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N);
  84. memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N);
  85. }
  86. #define thashx8_variant_impl(name, size) \
  87. void PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_thashx8_##name(unsigned char *out0, \
  88. unsigned char *out1, \
  89. unsigned char *out2, \
  90. unsigned char *out3, \
  91. unsigned char *out4, \
  92. unsigned char *out5, \
  93. unsigned char *out6, \
  94. unsigned char *out7, \
  95. const unsigned char *in0, \
  96. const unsigned char *in1, \
  97. const unsigned char *in2, \
  98. const unsigned char *in3, \
  99. const unsigned char *in4, \
  100. const unsigned char *in5, \
  101. const unsigned char *in6, \
  102. const unsigned char *in7, \
  103. const unsigned char *pub_seed, \
  104. uint32_t addrx8[8*8], \
  105. const hash_state *state_seeded) \
  106. { \
  107. const unsigned int inblocks = (size); \
  108. uint8_t bufx8[8*(PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_SHA256_ADDR_BYTES + (size)*PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_N)]; \
  109. thashx8(out0, out1, out2, out3, out4, out5, out6, out7, \
  110. in0, in1, in2, in3, in4, in5, in6, in7, inblocks, \
  111. pub_seed, addrx8, bufx8, state_seeded); \
  112. }
  113. thashx8_variant_impl(1, 1)
  114. thashx8_variant_impl(2, 2)
  115. thashx8_variant_impl(WOTS_LEN, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_WOTS_LEN)
  116. thashx8_variant_impl(FORS_TREES, PQCLEAN_SPHINCSSHA256256FSIMPLE_AVX2_FORS_TREES)
  117. #undef thashx8_variant_impl