No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.
 
 
 

206 líneas
5.7 KiB

  1. #include <immintrin.h>
  2. #include <stdint.h>
  3. #include <string.h>
  4. #include "fips202.h"
  5. #include "fips202x4.h"
  6. #define NROUNDS 24
  7. #define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64-(offset))))
  8. static uint64_t load64(const unsigned char *x) {
  9. unsigned long long r = 0, i;
  10. for (i = 0; i < 8; ++i) {
  11. r |= (unsigned long long)x[i] << 8 * i;
  12. }
  13. return r;
  14. }
  15. static void store64(uint8_t *x, uint64_t u) {
  16. unsigned int i;
  17. for (i = 0; i < 8; ++i) {
  18. x[i] = (uint8_t)u;
  19. u >>= 8;
  20. }
  21. }
  22. /* Use implementation from the Keccak Code Package */
  23. extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s);
  24. #define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds
  25. static void keccak_absorb4x(__m256i *s,
  26. unsigned int r,
  27. const unsigned char *m0,
  28. const unsigned char *m1,
  29. const unsigned char *m2,
  30. const unsigned char *m3,
  31. size_t mlen,
  32. unsigned char p) {
  33. unsigned char t0[200] = {0};
  34. unsigned char t1[200] = {0};
  35. unsigned char t2[200] = {0};
  36. unsigned char t3[200] = {0};
  37. unsigned long long *ss = (unsigned long long *)s;
  38. while (mlen >= r) {
  39. for (size_t i = 0; i < r / 8; ++i) {
  40. ss[4 * i + 0] ^= load64(m0 + 8 * i);
  41. ss[4 * i + 1] ^= load64(m1 + 8 * i);
  42. ss[4 * i + 2] ^= load64(m2 + 8 * i);
  43. ss[4 * i + 3] ^= load64(m3 + 8 * i);
  44. }
  45. KeccakF1600_StatePermute4x(s);
  46. mlen -= r;
  47. m0 += r;
  48. m1 += r;
  49. m2 += r;
  50. m3 += r;
  51. }
  52. memcpy(t0, m0, mlen);
  53. memcpy(t1, m1, mlen);
  54. memcpy(t2, m2, mlen);
  55. memcpy(t3, m3, mlen);
  56. t0[mlen] = p;
  57. t1[mlen] = p;
  58. t2[mlen] = p;
  59. t3[mlen] = p;
  60. t0[r - 1] |= 128;
  61. t1[r - 1] |= 128;
  62. t2[r - 1] |= 128;
  63. t3[r - 1] |= 128;
  64. for (size_t i = 0; i < r / 8; ++i) {
  65. ss[4 * i + 0] ^= load64(t0 + 8 * i);
  66. ss[4 * i + 1] ^= load64(t1 + 8 * i);
  67. ss[4 * i + 2] ^= load64(t2 + 8 * i);
  68. ss[4 * i + 3] ^= load64(t3 + 8 * i);
  69. }
  70. }
  71. static void keccak_squeezeblocks4x(unsigned char *h0,
  72. unsigned char *h1,
  73. unsigned char *h2,
  74. unsigned char *h3,
  75. unsigned long long int nblocks,
  76. __m256i *s,
  77. unsigned int r) {
  78. unsigned int i;
  79. unsigned long long *ss = (unsigned long long *)s;
  80. while (nblocks > 0) {
  81. KeccakF1600_StatePermute4x(s);
  82. for (i = 0; i < (r >> 3); i++) {
  83. store64(h0 + 8 * i, ss[4 * i + 0]);
  84. store64(h1 + 8 * i, ss[4 * i + 1]);
  85. store64(h2 + 8 * i, ss[4 * i + 2]);
  86. store64(h3 + 8 * i, ss[4 * i + 3]);
  87. }
  88. h0 += r;
  89. h1 += r;
  90. h2 += r;
  91. h3 += r;
  92. nblocks--;
  93. }
  94. }
  95. void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_shake128x4(unsigned char *out0,
  96. unsigned char *out1,
  97. unsigned char *out2,
  98. unsigned char *out3,
  99. unsigned long long outlen,
  100. unsigned char *in0,
  101. unsigned char *in1,
  102. unsigned char *in2,
  103. unsigned char *in3,
  104. unsigned long long inlen) {
  105. __m256i s[25];
  106. unsigned char t0[SHAKE128_RATE];
  107. unsigned char t1[SHAKE128_RATE];
  108. unsigned char t2[SHAKE128_RATE];
  109. unsigned char t3[SHAKE128_RATE];
  110. unsigned int i;
  111. /* zero state */
  112. for (i = 0; i < 25; i++) {
  113. s[i] = _mm256_xor_si256(s[i], s[i]);
  114. }
  115. /* absorb 4 message of identical length in parallel */
  116. keccak_absorb4x(s, SHAKE128_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F);
  117. /* Squeeze output */
  118. keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE128_RATE, s, SHAKE128_RATE);
  119. out0 += (outlen / SHAKE128_RATE) * SHAKE128_RATE;
  120. out1 += (outlen / SHAKE128_RATE) * SHAKE128_RATE;
  121. out2 += (outlen / SHAKE128_RATE) * SHAKE128_RATE;
  122. out3 += (outlen / SHAKE128_RATE) * SHAKE128_RATE;
  123. if (outlen % SHAKE128_RATE) {
  124. keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE128_RATE);
  125. for (i = 0; i < outlen % SHAKE128_RATE; i++) {
  126. out0[i] = t0[i];
  127. out1[i] = t1[i];
  128. out2[i] = t2[i];
  129. out3[i] = t3[i];
  130. }
  131. }
  132. }
  133. void PQCLEAN_SPHINCSSHAKE256192SROBUST_AVX2_shake256x4(unsigned char *out0,
  134. unsigned char *out1,
  135. unsigned char *out2,
  136. unsigned char *out3,
  137. unsigned long long outlen,
  138. unsigned char *in0,
  139. unsigned char *in1,
  140. unsigned char *in2,
  141. unsigned char *in3,
  142. unsigned long long inlen) {
  143. __m256i s[25];
  144. unsigned char t0[SHAKE256_RATE];
  145. unsigned char t1[SHAKE256_RATE];
  146. unsigned char t2[SHAKE256_RATE];
  147. unsigned char t3[SHAKE256_RATE];
  148. unsigned int i;
  149. /* zero state */
  150. for (i = 0; i < 25; i++) {
  151. s[i] = _mm256_xor_si256(s[i], s[i]);
  152. }
  153. /* absorb 4 message of identical length in parallel */
  154. keccak_absorb4x(s, SHAKE256_RATE, in0, in1, in2, in3, (size_t)inlen, 0x1F);
  155. /* Squeeze output */
  156. keccak_squeezeblocks4x(out0, out1, out2, out3, outlen / SHAKE256_RATE, s, SHAKE256_RATE);
  157. out0 += (outlen / SHAKE256_RATE) * SHAKE256_RATE;
  158. out1 += (outlen / SHAKE256_RATE) * SHAKE256_RATE;
  159. out2 += (outlen / SHAKE256_RATE) * SHAKE256_RATE;
  160. out3 += (outlen / SHAKE256_RATE) * SHAKE256_RATE;
  161. if (outlen % SHAKE256_RATE) {
  162. keccak_squeezeblocks4x(t0, t1, t2, t3, 1, s, SHAKE256_RATE);
  163. for (i = 0; i < outlen % SHAKE256_RATE; i++) {
  164. out0[i] = t0[i];
  165. out1[i] = t1[i];
  166. out2[i] = t2[i];
  167. out3[i] = t3[i];
  168. }
  169. }
  170. }