|
|
@@ -91,17 +91,22 @@ static void keccakx4_squeezeblocks(uint8_t *out0, |
|
|
|
unsigned int r, |
|
|
|
__m256i s[25]) { |
|
|
|
unsigned int i; |
|
|
|
double temp0, temp1; |
|
|
|
__m128d t; |
|
|
|
|
|
|
|
while (nblocks > 0) { |
|
|
|
PQCLEAN_DILITHIUM2_AVX2_f1600x4(s, KeccakF_RoundConstants); |
|
|
|
for (i = 0; i < r / 8; ++i) { |
|
|
|
t = _mm_castsi128_pd(_mm256_castsi256_si128(s[i])); |
|
|
|
_mm_storel_pd((double *)&out0[8 * i], t); |
|
|
|
_mm_storeh_pd((double *)&out1[8 * i], t); |
|
|
|
_mm_storel_pd(&temp0, t); |
|
|
|
_mm_storeh_pd(&temp1, t); |
|
|
|
memmove(&out0[8 * i], &temp0, sizeof(double)); |
|
|
|
memmove(&out1[8 * i], &temp1, sizeof(double)); |
|
|
|
t = _mm_castsi128_pd(_mm256_extracti128_si256(s[i], 1)); |
|
|
|
_mm_storel_pd((double *)&out2[8 * i], t); |
|
|
|
_mm_storeh_pd((double *)&out3[8 * i], t); |
|
|
|
_mm_storel_pd(&temp0, t); |
|
|
|
_mm_storeh_pd(&temp1, t); |
|
|
|
memmove(&out2[8 * i], &temp0, sizeof(double)); |
|
|
|
memmove(&out3[8 * i], &temp1, sizeof(double)); |
|
|
|
} |
|
|
|
|
|
|
|
out0 += r; |
|
|
|