1
1
mirror of https://github.com/henrydcase/pqc.git synced 2024-11-22 23:48:58 +00:00

dilithium: Remove leading underscore from some internal symbols

This commit is contained in:
John M. Schanck 2020-10-08 09:56:15 -04:00 committed by Kris Kwiatkowski
parent 351d17ae70
commit 3db4fa4876
21 changed files with 105 additions and 105 deletions

View File

@ -45,9 +45,9 @@ vpsrlq $32,%ymm\h3,%ymm\h3
.global cdecl(PQCLEAN_DILITHIUM2_AVX2_invntt_levels0t4_avx) .global cdecl(PQCLEAN_DILITHIUM2_AVX2_invntt_levels0t4_avx)
cdecl(PQCLEAN_DILITHIUM2_AVX2_invntt_levels0t4_avx): cdecl(PQCLEAN_DILITHIUM2_AVX2_invntt_levels0t4_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8xq)(%rip),%ymm1
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8x256q)(%rip),%ymm2 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8x256q)(%rip),%ymm2
#load #load
vmovdqa (%rsi),%ymm6 vmovdqa (%rsi),%ymm6
@ -165,9 +165,9 @@ ret
.global cdecl(PQCLEAN_DILITHIUM2_AVX2_invntt_levels5t7_avx) .global cdecl(PQCLEAN_DILITHIUM2_AVX2_invntt_levels5t7_avx)
cdecl(PQCLEAN_DILITHIUM2_AVX2_invntt_levels5t7_avx): cdecl(PQCLEAN_DILITHIUM2_AVX2_invntt_levels5t7_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8xq)(%rip),%ymm1
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8x256q)(%rip),%ymm2 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8x256q)(%rip),%ymm2
#load #load
vmovdqa (%rsi),%ymm4 vmovdqa (%rsi),%ymm4
@ -237,7 +237,7 @@ vpbroadcastd 24(%rdx),%ymm3
butterfly 4,5,6,7,8,9,10,11,3,3 butterfly 4,5,6,7,8,9,10,11,3,3
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8xdiv)(%rip),%ymm3 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8xdiv)(%rip),%ymm3
vpmuludq %ymm3,%ymm4,%ymm4 vpmuludq %ymm3,%ymm4,%ymm4
vpmuludq %ymm3,%ymm5,%ymm5 vpmuludq %ymm3,%ymm5,%ymm5
@ -261,7 +261,7 @@ vpsrlq $32,%ymm6,%ymm6
vpsrlq $32,%ymm7,%ymm7 vpsrlq $32,%ymm7,%ymm7
#store #store
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_mask)(%rip),%ymm3 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_mask)(%rip),%ymm3
vpermd %ymm4,%ymm3,%ymm4 vpermd %ymm4,%ymm3,%ymm4
vpermd %ymm5,%ymm3,%ymm5 vpermd %ymm5,%ymm3,%ymm5
vpermd %ymm6,%ymm3,%ymm6 vpermd %ymm6,%ymm3,%ymm6

View File

@ -44,9 +44,9 @@ vpsubd %ymm15,%ymm\rh3,%ymm\rh3
.global cdecl(PQCLEAN_DILITHIUM2_AVX2_ntt_levels0t2_avx) .global cdecl(PQCLEAN_DILITHIUM2_AVX2_ntt_levels0t2_avx)
cdecl(PQCLEAN_DILITHIUM2_AVX2_ntt_levels0t2_avx): cdecl(PQCLEAN_DILITHIUM2_AVX2_ntt_levels0t2_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8xq)(%rip),%ymm1
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8x2q)(%rip),%ymm2 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8x2q)(%rip),%ymm2
level0: level0:
#zetas #zetas
@ -95,9 +95,9 @@ ret
.global cdecl(PQCLEAN_DILITHIUM2_AVX2_ntt_levels3t8_avx) .global cdecl(PQCLEAN_DILITHIUM2_AVX2_ntt_levels3t8_avx)
cdecl(PQCLEAN_DILITHIUM2_AVX2_ntt_levels3t8_avx): cdecl(PQCLEAN_DILITHIUM2_AVX2_ntt_levels3t8_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8xq)(%rip),%ymm1
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8x2q)(%rip),%ymm2 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8x2q)(%rip),%ymm2
#load #load
vmovdqa (%rsi),%ymm4 vmovdqa (%rsi),%ymm4

View File

@ -5,19 +5,19 @@
#define DIV (((MONT*MONT % Q) * (Q-1) % Q) * ((Q-1) >> 8) % Q) #define DIV (((MONT*MONT % Q) * (Q-1) % Q) * ((Q-1) >> 8) % Q)
const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8xqinv = {.as_arr = {QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV}}; const aligned_uint32x8_t PQCLEAN_DILITHIUM2_AVX2_8xqinv = {.as_arr = {QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV}};
const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8xq = {.as_arr = {Q, Q, Q, Q, Q, Q, Q, Q}}; const aligned_uint32x8_t PQCLEAN_DILITHIUM2_AVX2_8xq = {.as_arr = {Q, Q, Q, Q, Q, Q, Q, Q}};
const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8x2q = {.as_arr = {2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q}}; const aligned_uint32x8_t PQCLEAN_DILITHIUM2_AVX2_8x2q = {.as_arr = {2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q}};
const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8x256q = {.as_arr = {256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, const aligned_uint32x8_t PQCLEAN_DILITHIUM2_AVX2_8x256q = {.as_arr = {256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q,
256 * Q 256 * Q
} }
}; };
const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_mask = {.as_arr = {0, 2, 4, 6, 0, 0, 0, 0}}; const aligned_uint32x8_t PQCLEAN_DILITHIUM2_AVX2_mask = {.as_arr = {0, 2, 4, 6, 0, 0, 0, 0}};
const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8x23ones = {.as_arr = {0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, const aligned_uint32x8_t PQCLEAN_DILITHIUM2_AVX2_8x23ones = {.as_arr = {0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF,
0x7FFFFF, 0x7FFFFF 0x7FFFFF, 0x7FFFFF
} }
}; };
const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8xdiv = { .as_arr = {DIV, DIV, DIV, DIV, DIV, DIV, DIV, DIV}}; const aligned_uint32x8_t PQCLEAN_DILITHIUM2_AVX2_8xdiv = { .as_arr = {DIV, DIV, DIV, DIV, DIV, DIV, DIV, DIV}};
#undef QINV #undef QINV
#undef MONT #undef MONT

View File

@ -12,13 +12,13 @@ typedef ALIGNED_UINT32(8) aligned_uint32x8_t;
typedef ALIGNED_UINT32(N) aligned_uint32xN_t; typedef ALIGNED_UINT32(N) aligned_uint32xN_t;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8xqinv; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM2_AVX2_8xqinv;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8xq; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM2_AVX2_8xq;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8x2q; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM2_AVX2_8x2q;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8x256q; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM2_AVX2_8x256q;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_mask; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM2_AVX2_mask;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8x23ones; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM2_AVX2_8x23ones;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8xdiv; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM2_AVX2_8xdiv;
extern const aligned_uint32xN_t PQCLEAN_DILITHIUM2_AVX2_zetas; extern const aligned_uint32xN_t PQCLEAN_DILITHIUM2_AVX2_zetas;
extern const aligned_uint32xN_t PQCLEAN_DILITHIUM2_AVX2_zetas_inv; extern const aligned_uint32xN_t PQCLEAN_DILITHIUM2_AVX2_zetas_inv;

View File

@ -4,8 +4,8 @@
.global cdecl(PQCLEAN_DILITHIUM2_AVX2_pointwise_avx) .global cdecl(PQCLEAN_DILITHIUM2_AVX2_pointwise_avx)
cdecl(PQCLEAN_DILITHIUM2_AVX2_pointwise_avx): cdecl(PQCLEAN_DILITHIUM2_AVX2_pointwise_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8xq)(%rip),%ymm1
xor %eax,%eax xor %eax,%eax
_looptop1: _looptop1:
@ -136,8 +136,8 @@ vpaddq %ymm9,%ymm5,%ymm5
.global cdecl(PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx) .global cdecl(PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx)
cdecl(PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx): cdecl(PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8xq)(%rip),%ymm1
xor %eax,%eax xor %eax,%eax
_looptop2: _looptop2:

View File

@ -82,7 +82,7 @@ void PQCLEAN_DILITHIUM2_AVX2_poly_add(poly *c, const poly *a, const poly *b) {
**************************************************/ **************************************************/
void PQCLEAN_DILITHIUM2_AVX2_poly_sub(poly *c, const poly *a, const poly *b) { void PQCLEAN_DILITHIUM2_AVX2_poly_sub(poly *c, const poly *a, const poly *b) {
__m256i vec0, vec1; __m256i vec0, vec1;
const __m256i twoq = _mm256_load_si256(_PQCLEAN_DILITHIUM2_AVX2_8x2q.as_vec); const __m256i twoq = _mm256_load_si256(PQCLEAN_DILITHIUM2_AVX2_8x2q.as_vec);
for (size_t i = 0; i < N / 8; i++) { for (size_t i = 0; i < N / 8; i++) {
vec0 = _mm256_load_si256(&a->coeffs_x8[i]); vec0 = _mm256_load_si256(&a->coeffs_x8[i]);

View File

@ -3,7 +3,7 @@
.global cdecl(PQCLEAN_DILITHIUM2_AVX2_reduce_avx) .global cdecl(PQCLEAN_DILITHIUM2_AVX2_reduce_avx)
cdecl(PQCLEAN_DILITHIUM2_AVX2_reduce_avx): cdecl(PQCLEAN_DILITHIUM2_AVX2_reduce_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8x23ones)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8x23ones)(%rip),%ymm0
xor %eax,%eax xor %eax,%eax
_looptop_rdc32: _looptop_rdc32:
@ -51,7 +51,7 @@ ret
.global cdecl(PQCLEAN_DILITHIUM2_AVX2_csubq_avx) .global cdecl(PQCLEAN_DILITHIUM2_AVX2_csubq_avx)
cdecl(PQCLEAN_DILITHIUM2_AVX2_csubq_avx): cdecl(PQCLEAN_DILITHIUM2_AVX2_csubq_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM2_AVX2_8xq)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM2_AVX2_8xq)(%rip),%ymm0
xor %eax,%eax xor %eax,%eax
_looptop_csubq: _looptop_csubq:

View File

@ -45,9 +45,9 @@ vpsrlq $32,%ymm\h3,%ymm\h3
.global cdecl(PQCLEAN_DILITHIUM3_AVX2_invntt_levels0t4_avx) .global cdecl(PQCLEAN_DILITHIUM3_AVX2_invntt_levels0t4_avx)
cdecl(PQCLEAN_DILITHIUM3_AVX2_invntt_levels0t4_avx): cdecl(PQCLEAN_DILITHIUM3_AVX2_invntt_levels0t4_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8xq)(%rip),%ymm1
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8x256q)(%rip),%ymm2 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8x256q)(%rip),%ymm2
#load #load
vmovdqa (%rsi),%ymm6 vmovdqa (%rsi),%ymm6
@ -165,9 +165,9 @@ ret
.global cdecl(PQCLEAN_DILITHIUM3_AVX2_invntt_levels5t7_avx) .global cdecl(PQCLEAN_DILITHIUM3_AVX2_invntt_levels5t7_avx)
cdecl(PQCLEAN_DILITHIUM3_AVX2_invntt_levels5t7_avx): cdecl(PQCLEAN_DILITHIUM3_AVX2_invntt_levels5t7_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8xq)(%rip),%ymm1
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8x256q)(%rip),%ymm2 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8x256q)(%rip),%ymm2
#load #load
vmovdqa (%rsi),%ymm4 vmovdqa (%rsi),%ymm4
@ -237,7 +237,7 @@ vpbroadcastd 24(%rdx),%ymm3
butterfly 4,5,6,7,8,9,10,11,3,3 butterfly 4,5,6,7,8,9,10,11,3,3
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8xdiv)(%rip),%ymm3 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8xdiv)(%rip),%ymm3
vpmuludq %ymm3,%ymm4,%ymm4 vpmuludq %ymm3,%ymm4,%ymm4
vpmuludq %ymm3,%ymm5,%ymm5 vpmuludq %ymm3,%ymm5,%ymm5
@ -261,7 +261,7 @@ vpsrlq $32,%ymm6,%ymm6
vpsrlq $32,%ymm7,%ymm7 vpsrlq $32,%ymm7,%ymm7
#store #store
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_mask)(%rip),%ymm3 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_mask)(%rip),%ymm3
vpermd %ymm4,%ymm3,%ymm4 vpermd %ymm4,%ymm3,%ymm4
vpermd %ymm5,%ymm3,%ymm5 vpermd %ymm5,%ymm3,%ymm5
vpermd %ymm6,%ymm3,%ymm6 vpermd %ymm6,%ymm3,%ymm6

View File

@ -44,9 +44,9 @@ vpsubd %ymm15,%ymm\rh3,%ymm\rh3
.global cdecl(PQCLEAN_DILITHIUM3_AVX2_ntt_levels0t2_avx) .global cdecl(PQCLEAN_DILITHIUM3_AVX2_ntt_levels0t2_avx)
cdecl(PQCLEAN_DILITHIUM3_AVX2_ntt_levels0t2_avx): cdecl(PQCLEAN_DILITHIUM3_AVX2_ntt_levels0t2_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8xq)(%rip),%ymm1
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8x2q)(%rip),%ymm2 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8x2q)(%rip),%ymm2
level0: level0:
#zetas #zetas
@ -95,9 +95,9 @@ ret
.global cdecl(PQCLEAN_DILITHIUM3_AVX2_ntt_levels3t8_avx) .global cdecl(PQCLEAN_DILITHIUM3_AVX2_ntt_levels3t8_avx)
cdecl(PQCLEAN_DILITHIUM3_AVX2_ntt_levels3t8_avx): cdecl(PQCLEAN_DILITHIUM3_AVX2_ntt_levels3t8_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8xq)(%rip),%ymm1
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8x2q)(%rip),%ymm2 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8x2q)(%rip),%ymm2
#load #load
vmovdqa (%rsi),%ymm4 vmovdqa (%rsi),%ymm4

View File

@ -5,19 +5,19 @@
#define DIV (((MONT*MONT % Q) * (Q-1) % Q) * ((Q-1) >> 8) % Q) #define DIV (((MONT*MONT % Q) * (Q-1) % Q) * ((Q-1) >> 8) % Q)
const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8xqinv = {.as_arr = {QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV}}; const aligned_uint32x8_t PQCLEAN_DILITHIUM3_AVX2_8xqinv = {.as_arr = {QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV}};
const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8xq = {.as_arr = {Q, Q, Q, Q, Q, Q, Q, Q}}; const aligned_uint32x8_t PQCLEAN_DILITHIUM3_AVX2_8xq = {.as_arr = {Q, Q, Q, Q, Q, Q, Q, Q}};
const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8x2q = {.as_arr = {2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q}}; const aligned_uint32x8_t PQCLEAN_DILITHIUM3_AVX2_8x2q = {.as_arr = {2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q}};
const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8x256q = {.as_arr = {256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, const aligned_uint32x8_t PQCLEAN_DILITHIUM3_AVX2_8x256q = {.as_arr = {256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q,
256 * Q 256 * Q
} }
}; };
const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_mask = {.as_arr = {0, 2, 4, 6, 0, 0, 0, 0}}; const aligned_uint32x8_t PQCLEAN_DILITHIUM3_AVX2_mask = {.as_arr = {0, 2, 4, 6, 0, 0, 0, 0}};
const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8x23ones = {.as_arr = {0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, const aligned_uint32x8_t PQCLEAN_DILITHIUM3_AVX2_8x23ones = {.as_arr = {0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF,
0x7FFFFF, 0x7FFFFF 0x7FFFFF, 0x7FFFFF
} }
}; };
const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8xdiv = { .as_arr = {DIV, DIV, DIV, DIV, DIV, DIV, DIV, DIV}}; const aligned_uint32x8_t PQCLEAN_DILITHIUM3_AVX2_8xdiv = { .as_arr = {DIV, DIV, DIV, DIV, DIV, DIV, DIV, DIV}};
#undef QINV #undef QINV
#undef MONT #undef MONT

View File

@ -12,13 +12,13 @@ typedef ALIGNED_UINT32(8) aligned_uint32x8_t;
typedef ALIGNED_UINT32(N) aligned_uint32xN_t; typedef ALIGNED_UINT32(N) aligned_uint32xN_t;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8xqinv; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM3_AVX2_8xqinv;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8xq; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM3_AVX2_8xq;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8x2q; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM3_AVX2_8x2q;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8x256q; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM3_AVX2_8x256q;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_mask; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM3_AVX2_mask;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8x23ones; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM3_AVX2_8x23ones;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8xdiv; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM3_AVX2_8xdiv;
extern const aligned_uint32xN_t PQCLEAN_DILITHIUM3_AVX2_zetas; extern const aligned_uint32xN_t PQCLEAN_DILITHIUM3_AVX2_zetas;
extern const aligned_uint32xN_t PQCLEAN_DILITHIUM3_AVX2_zetas_inv; extern const aligned_uint32xN_t PQCLEAN_DILITHIUM3_AVX2_zetas_inv;

View File

@ -4,8 +4,8 @@
.global cdecl(PQCLEAN_DILITHIUM3_AVX2_pointwise_avx) .global cdecl(PQCLEAN_DILITHIUM3_AVX2_pointwise_avx)
cdecl(PQCLEAN_DILITHIUM3_AVX2_pointwise_avx): cdecl(PQCLEAN_DILITHIUM3_AVX2_pointwise_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8xq)(%rip),%ymm1
xor %eax,%eax xor %eax,%eax
_looptop1: _looptop1:
@ -136,8 +136,8 @@ vpaddq %ymm9,%ymm5,%ymm5
.global cdecl(PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx) .global cdecl(PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx)
cdecl(PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx): cdecl(PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8xq)(%rip),%ymm1
xor %eax,%eax xor %eax,%eax
_looptop2: _looptop2:

View File

@ -82,7 +82,7 @@ void PQCLEAN_DILITHIUM3_AVX2_poly_add(poly *c, const poly *a, const poly *b) {
**************************************************/ **************************************************/
void PQCLEAN_DILITHIUM3_AVX2_poly_sub(poly *c, const poly *a, const poly *b) { void PQCLEAN_DILITHIUM3_AVX2_poly_sub(poly *c, const poly *a, const poly *b) {
__m256i vec0, vec1; __m256i vec0, vec1;
const __m256i twoq = _mm256_load_si256(_PQCLEAN_DILITHIUM3_AVX2_8x2q.as_vec); const __m256i twoq = _mm256_load_si256(PQCLEAN_DILITHIUM3_AVX2_8x2q.as_vec);
for (size_t i = 0; i < N / 8; i++) { for (size_t i = 0; i < N / 8; i++) {
vec0 = _mm256_load_si256(&a->coeffs_x8[i]); vec0 = _mm256_load_si256(&a->coeffs_x8[i]);

View File

@ -3,7 +3,7 @@
.global cdecl(PQCLEAN_DILITHIUM3_AVX2_reduce_avx) .global cdecl(PQCLEAN_DILITHIUM3_AVX2_reduce_avx)
cdecl(PQCLEAN_DILITHIUM3_AVX2_reduce_avx): cdecl(PQCLEAN_DILITHIUM3_AVX2_reduce_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8x23ones)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8x23ones)(%rip),%ymm0
xor %eax,%eax xor %eax,%eax
_looptop_rdc32: _looptop_rdc32:
@ -51,7 +51,7 @@ ret
.global cdecl(PQCLEAN_DILITHIUM3_AVX2_csubq_avx) .global cdecl(PQCLEAN_DILITHIUM3_AVX2_csubq_avx)
cdecl(PQCLEAN_DILITHIUM3_AVX2_csubq_avx): cdecl(PQCLEAN_DILITHIUM3_AVX2_csubq_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM3_AVX2_8xq)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM3_AVX2_8xq)(%rip),%ymm0
xor %eax,%eax xor %eax,%eax
_looptop_csubq: _looptop_csubq:

View File

@ -45,9 +45,9 @@ vpsrlq $32,%ymm\h3,%ymm\h3
.global cdecl(PQCLEAN_DILITHIUM4_AVX2_invntt_levels0t4_avx) .global cdecl(PQCLEAN_DILITHIUM4_AVX2_invntt_levels0t4_avx)
cdecl(PQCLEAN_DILITHIUM4_AVX2_invntt_levels0t4_avx): cdecl(PQCLEAN_DILITHIUM4_AVX2_invntt_levels0t4_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8xq)(%rip),%ymm1
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8x256q)(%rip),%ymm2 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8x256q)(%rip),%ymm2
#load #load
vmovdqa (%rsi),%ymm6 vmovdqa (%rsi),%ymm6
@ -165,9 +165,9 @@ ret
.global cdecl(PQCLEAN_DILITHIUM4_AVX2_invntt_levels5t7_avx) .global cdecl(PQCLEAN_DILITHIUM4_AVX2_invntt_levels5t7_avx)
cdecl(PQCLEAN_DILITHIUM4_AVX2_invntt_levels5t7_avx): cdecl(PQCLEAN_DILITHIUM4_AVX2_invntt_levels5t7_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8xq)(%rip),%ymm1
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8x256q)(%rip),%ymm2 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8x256q)(%rip),%ymm2
#load #load
vmovdqa (%rsi),%ymm4 vmovdqa (%rsi),%ymm4
@ -237,7 +237,7 @@ vpbroadcastd 24(%rdx),%ymm3
butterfly 4,5,6,7,8,9,10,11,3,3 butterfly 4,5,6,7,8,9,10,11,3,3
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8xdiv)(%rip),%ymm3 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8xdiv)(%rip),%ymm3
vpmuludq %ymm3,%ymm4,%ymm4 vpmuludq %ymm3,%ymm4,%ymm4
vpmuludq %ymm3,%ymm5,%ymm5 vpmuludq %ymm3,%ymm5,%ymm5
@ -261,7 +261,7 @@ vpsrlq $32,%ymm6,%ymm6
vpsrlq $32,%ymm7,%ymm7 vpsrlq $32,%ymm7,%ymm7
#store #store
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_mask)(%rip),%ymm3 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_mask)(%rip),%ymm3
vpermd %ymm4,%ymm3,%ymm4 vpermd %ymm4,%ymm3,%ymm4
vpermd %ymm5,%ymm3,%ymm5 vpermd %ymm5,%ymm3,%ymm5
vpermd %ymm6,%ymm3,%ymm6 vpermd %ymm6,%ymm3,%ymm6

View File

@ -44,9 +44,9 @@ vpsubd %ymm15,%ymm\rh3,%ymm\rh3
.global cdecl(PQCLEAN_DILITHIUM4_AVX2_ntt_levels0t2_avx) .global cdecl(PQCLEAN_DILITHIUM4_AVX2_ntt_levels0t2_avx)
cdecl(PQCLEAN_DILITHIUM4_AVX2_ntt_levels0t2_avx): cdecl(PQCLEAN_DILITHIUM4_AVX2_ntt_levels0t2_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8xq)(%rip),%ymm1
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8x2q)(%rip),%ymm2 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8x2q)(%rip),%ymm2
level0: level0:
#zetas #zetas
@ -95,9 +95,9 @@ ret
.global cdecl(PQCLEAN_DILITHIUM4_AVX2_ntt_levels3t8_avx) .global cdecl(PQCLEAN_DILITHIUM4_AVX2_ntt_levels3t8_avx)
cdecl(PQCLEAN_DILITHIUM4_AVX2_ntt_levels3t8_avx): cdecl(PQCLEAN_DILITHIUM4_AVX2_ntt_levels3t8_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8xq)(%rip),%ymm1
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8x2q)(%rip),%ymm2 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8x2q)(%rip),%ymm2
#load #load
vmovdqa (%rsi),%ymm4 vmovdqa (%rsi),%ymm4

View File

@ -5,19 +5,19 @@
#define DIV (((MONT*MONT % Q) * (Q-1) % Q) * ((Q-1) >> 8) % Q) #define DIV (((MONT*MONT % Q) * (Q-1) % Q) * ((Q-1) >> 8) % Q)
const aligned_uint32x8_t _PQCLEAN_DILITHIUM4_AVX2_8xqinv = {.as_arr = {QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV}}; const aligned_uint32x8_t PQCLEAN_DILITHIUM4_AVX2_8xqinv = {.as_arr = {QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV}};
const aligned_uint32x8_t _PQCLEAN_DILITHIUM4_AVX2_8xq = {.as_arr = {Q, Q, Q, Q, Q, Q, Q, Q}}; const aligned_uint32x8_t PQCLEAN_DILITHIUM4_AVX2_8xq = {.as_arr = {Q, Q, Q, Q, Q, Q, Q, Q}};
const aligned_uint32x8_t _PQCLEAN_DILITHIUM4_AVX2_8x2q = {.as_arr = {2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q}}; const aligned_uint32x8_t PQCLEAN_DILITHIUM4_AVX2_8x2q = {.as_arr = {2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q}};
const aligned_uint32x8_t _PQCLEAN_DILITHIUM4_AVX2_8x256q = {.as_arr = {256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, const aligned_uint32x8_t PQCLEAN_DILITHIUM4_AVX2_8x256q = {.as_arr = {256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q,
256 * Q 256 * Q
} }
}; };
const aligned_uint32x8_t _PQCLEAN_DILITHIUM4_AVX2_mask = {.as_arr = {0, 2, 4, 6, 0, 0, 0, 0}}; const aligned_uint32x8_t PQCLEAN_DILITHIUM4_AVX2_mask = {.as_arr = {0, 2, 4, 6, 0, 0, 0, 0}};
const aligned_uint32x8_t _PQCLEAN_DILITHIUM4_AVX2_8x23ones = {.as_arr = {0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, const aligned_uint32x8_t PQCLEAN_DILITHIUM4_AVX2_8x23ones = {.as_arr = {0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF,
0x7FFFFF, 0x7FFFFF 0x7FFFFF, 0x7FFFFF
} }
}; };
const aligned_uint32x8_t _PQCLEAN_DILITHIUM4_AVX2_8xdiv = { .as_arr = {DIV, DIV, DIV, DIV, DIV, DIV, DIV, DIV}}; const aligned_uint32x8_t PQCLEAN_DILITHIUM4_AVX2_8xdiv = { .as_arr = {DIV, DIV, DIV, DIV, DIV, DIV, DIV, DIV}};
#undef QINV #undef QINV
#undef MONT #undef MONT

View File

@ -12,13 +12,13 @@ typedef ALIGNED_UINT32(8) aligned_uint32x8_t;
typedef ALIGNED_UINT32(N) aligned_uint32xN_t; typedef ALIGNED_UINT32(N) aligned_uint32xN_t;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM4_AVX2_8xqinv; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM4_AVX2_8xqinv;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM4_AVX2_8xq; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM4_AVX2_8xq;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM4_AVX2_8x2q; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM4_AVX2_8x2q;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM4_AVX2_8x256q; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM4_AVX2_8x256q;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM4_AVX2_mask; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM4_AVX2_mask;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM4_AVX2_8x23ones; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM4_AVX2_8x23ones;
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM4_AVX2_8xdiv; extern const aligned_uint32x8_t PQCLEAN_DILITHIUM4_AVX2_8xdiv;
extern const aligned_uint32xN_t PQCLEAN_DILITHIUM4_AVX2_zetas; extern const aligned_uint32xN_t PQCLEAN_DILITHIUM4_AVX2_zetas;
extern const aligned_uint32xN_t PQCLEAN_DILITHIUM4_AVX2_zetas_inv; extern const aligned_uint32xN_t PQCLEAN_DILITHIUM4_AVX2_zetas_inv;

View File

@ -4,8 +4,8 @@
.global cdecl(PQCLEAN_DILITHIUM4_AVX2_pointwise_avx) .global cdecl(PQCLEAN_DILITHIUM4_AVX2_pointwise_avx)
cdecl(PQCLEAN_DILITHIUM4_AVX2_pointwise_avx): cdecl(PQCLEAN_DILITHIUM4_AVX2_pointwise_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8xq)(%rip),%ymm1
xor %eax,%eax xor %eax,%eax
_looptop1: _looptop1:
@ -136,8 +136,8 @@ vpaddq %ymm9,%ymm5,%ymm5
.global cdecl(PQCLEAN_DILITHIUM4_AVX2_pointwise_acc_avx) .global cdecl(PQCLEAN_DILITHIUM4_AVX2_pointwise_acc_avx)
cdecl(PQCLEAN_DILITHIUM4_AVX2_pointwise_acc_avx): cdecl(PQCLEAN_DILITHIUM4_AVX2_pointwise_acc_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8xqinv)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8xqinv)(%rip),%ymm0
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8xq)(%rip),%ymm1 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8xq)(%rip),%ymm1
xor %eax,%eax xor %eax,%eax
_looptop2: _looptop2:

View File

@ -82,7 +82,7 @@ void PQCLEAN_DILITHIUM4_AVX2_poly_add(poly *c, const poly *a, const poly *b) {
**************************************************/ **************************************************/
void PQCLEAN_DILITHIUM4_AVX2_poly_sub(poly *c, const poly *a, const poly *b) { void PQCLEAN_DILITHIUM4_AVX2_poly_sub(poly *c, const poly *a, const poly *b) {
__m256i vec0, vec1; __m256i vec0, vec1;
const __m256i twoq = _mm256_load_si256(_PQCLEAN_DILITHIUM4_AVX2_8x2q.as_vec); const __m256i twoq = _mm256_load_si256(PQCLEAN_DILITHIUM4_AVX2_8x2q.as_vec);
for (size_t i = 0; i < N / 8; i++) { for (size_t i = 0; i < N / 8; i++) {
vec0 = _mm256_load_si256(&a->coeffs_x8[i]); vec0 = _mm256_load_si256(&a->coeffs_x8[i]);

View File

@ -3,7 +3,7 @@
.global cdecl(PQCLEAN_DILITHIUM4_AVX2_reduce_avx) .global cdecl(PQCLEAN_DILITHIUM4_AVX2_reduce_avx)
cdecl(PQCLEAN_DILITHIUM4_AVX2_reduce_avx): cdecl(PQCLEAN_DILITHIUM4_AVX2_reduce_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8x23ones)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8x23ones)(%rip),%ymm0
xor %eax,%eax xor %eax,%eax
_looptop_rdc32: _looptop_rdc32:
@ -51,7 +51,7 @@ ret
.global cdecl(PQCLEAN_DILITHIUM4_AVX2_csubq_avx) .global cdecl(PQCLEAN_DILITHIUM4_AVX2_csubq_avx)
cdecl(PQCLEAN_DILITHIUM4_AVX2_csubq_avx): cdecl(PQCLEAN_DILITHIUM4_AVX2_csubq_avx):
#consts #consts
vmovdqa cdecl(_PQCLEAN_DILITHIUM4_AVX2_8xq)(%rip),%ymm0 vmovdqa cdecl(PQCLEAN_DILITHIUM4_AVX2_8xq)(%rip),%ymm0
xor %eax,%eax xor %eax,%eax
_looptop_csubq: _looptop_csubq: