dilithium/fips202x4: align _mm_storel_pd result using double on stack
This commit is contained in:
parent
52750b86b1
commit
52851284ab
@ -91,17 +91,22 @@ static void keccakx4_squeezeblocks(uint8_t *out0,
|
|||||||
unsigned int r,
|
unsigned int r,
|
||||||
__m256i s[25]) {
|
__m256i s[25]) {
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
double temp0, temp1;
|
||||||
__m128d t;
|
__m128d t;
|
||||||
|
|
||||||
while (nblocks > 0) {
|
while (nblocks > 0) {
|
||||||
PQCLEAN_DILITHIUM2_AVX2_f1600x4(s, KeccakF_RoundConstants);
|
PQCLEAN_DILITHIUM2_AVX2_f1600x4(s, KeccakF_RoundConstants);
|
||||||
for (i = 0; i < r / 8; ++i) {
|
for (i = 0; i < r / 8; ++i) {
|
||||||
t = _mm_castsi128_pd(_mm256_castsi256_si128(s[i]));
|
t = _mm_castsi128_pd(_mm256_castsi256_si128(s[i]));
|
||||||
_mm_storel_pd((double *)&out0[8 * i], t);
|
_mm_storel_pd(&temp0, t);
|
||||||
_mm_storeh_pd((double *)&out1[8 * i], t);
|
_mm_storeh_pd(&temp1, t);
|
||||||
|
memmove(&out0[8 * i], &temp0, sizeof(double));
|
||||||
|
memmove(&out1[8 * i], &temp1, sizeof(double));
|
||||||
t = _mm_castsi128_pd(_mm256_extracti128_si256(s[i], 1));
|
t = _mm_castsi128_pd(_mm256_extracti128_si256(s[i], 1));
|
||||||
_mm_storel_pd((double *)&out2[8 * i], t);
|
_mm_storel_pd(&temp0, t);
|
||||||
_mm_storeh_pd((double *)&out3[8 * i], t);
|
_mm_storeh_pd(&temp1, t);
|
||||||
|
memmove(&out2[8 * i], &temp0, sizeof(double));
|
||||||
|
memmove(&out3[8 * i], &temp1, sizeof(double));
|
||||||
}
|
}
|
||||||
|
|
||||||
out0 += r;
|
out0 += r;
|
||||||
|
@ -91,17 +91,22 @@ static void keccakx4_squeezeblocks(uint8_t *out0,
|
|||||||
unsigned int r,
|
unsigned int r,
|
||||||
__m256i s[25]) {
|
__m256i s[25]) {
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
double temp0, temp1;
|
||||||
__m128d t;
|
__m128d t;
|
||||||
|
|
||||||
while (nblocks > 0) {
|
while (nblocks > 0) {
|
||||||
PQCLEAN_DILITHIUM3_AVX2_f1600x4(s, KeccakF_RoundConstants);
|
PQCLEAN_DILITHIUM3_AVX2_f1600x4(s, KeccakF_RoundConstants);
|
||||||
for (i = 0; i < r / 8; ++i) {
|
for (i = 0; i < r / 8; ++i) {
|
||||||
t = _mm_castsi128_pd(_mm256_castsi256_si128(s[i]));
|
t = _mm_castsi128_pd(_mm256_castsi256_si128(s[i]));
|
||||||
_mm_storel_pd((double *)&out0[8 * i], t);
|
_mm_storel_pd(&temp0, t);
|
||||||
_mm_storeh_pd((double *)&out1[8 * i], t);
|
_mm_storeh_pd(&temp1, t);
|
||||||
|
memmove(&out0[8 * i], &temp0, sizeof(double));
|
||||||
|
memmove(&out1[8 * i], &temp1, sizeof(double));
|
||||||
t = _mm_castsi128_pd(_mm256_extracti128_si256(s[i], 1));
|
t = _mm_castsi128_pd(_mm256_extracti128_si256(s[i], 1));
|
||||||
_mm_storel_pd((double *)&out2[8 * i], t);
|
_mm_storel_pd(&temp0, t);
|
||||||
_mm_storeh_pd((double *)&out3[8 * i], t);
|
_mm_storeh_pd(&temp1, t);
|
||||||
|
memmove(&out2[8 * i], &temp0, sizeof(double));
|
||||||
|
memmove(&out3[8 * i], &temp1, sizeof(double));
|
||||||
}
|
}
|
||||||
|
|
||||||
out0 += r;
|
out0 += r;
|
||||||
|
@ -91,17 +91,22 @@ static void keccakx4_squeezeblocks(uint8_t *out0,
|
|||||||
unsigned int r,
|
unsigned int r,
|
||||||
__m256i s[25]) {
|
__m256i s[25]) {
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
|
double temp0, temp1;
|
||||||
__m128d t;
|
__m128d t;
|
||||||
|
|
||||||
while (nblocks > 0) {
|
while (nblocks > 0) {
|
||||||
PQCLEAN_DILITHIUM5_AVX2_f1600x4(s, KeccakF_RoundConstants);
|
PQCLEAN_DILITHIUM5_AVX2_f1600x4(s, KeccakF_RoundConstants);
|
||||||
for (i = 0; i < r / 8; ++i) {
|
for (i = 0; i < r / 8; ++i) {
|
||||||
t = _mm_castsi128_pd(_mm256_castsi256_si128(s[i]));
|
t = _mm_castsi128_pd(_mm256_castsi256_si128(s[i]));
|
||||||
_mm_storel_pd((double *)&out0[8 * i], t);
|
_mm_storel_pd(&temp0, t);
|
||||||
_mm_storeh_pd((double *)&out1[8 * i], t);
|
_mm_storeh_pd(&temp1, t);
|
||||||
|
memmove(&out0[8 * i], &temp0, sizeof(double));
|
||||||
|
memmove(&out1[8 * i], &temp1, sizeof(double));
|
||||||
t = _mm_castsi128_pd(_mm256_extracti128_si256(s[i], 1));
|
t = _mm_castsi128_pd(_mm256_extracti128_si256(s[i], 1));
|
||||||
_mm_storel_pd((double *)&out2[8 * i], t);
|
_mm_storel_pd(&temp0, t);
|
||||||
_mm_storeh_pd((double *)&out3[8 * i], t);
|
_mm_storeh_pd(&temp1, t);
|
||||||
|
memmove(&out2[8 * i], &temp0, sizeof(double));
|
||||||
|
memmove(&out3[8 * i], &temp1, sizeof(double));
|
||||||
}
|
}
|
||||||
|
|
||||||
out0 += r;
|
out0 += r;
|
||||||
|
Loading…
Reference in New Issue
Block a user