Use more robust macro syntax

This commit is contained in:
Thom Wiggers 2019-12-11 13:01:07 +01:00
parent a1fb93da73
commit dbf2d34235
No known key found for this signature in database
GPG Key ID: 001BB0A7CE26E363
9 changed files with 108 additions and 78 deletions

View File

@ -1,6 +1,6 @@
.include "shuffle.inc" .include "shuffle.inc"
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3 z0=15,z1=3 .macro butterfly l0,l1,l2,l3,h0,h1,h2,h3,z0=15,z1=3
vpaddd %ymm2,%ymm\l0,%ymm12 vpaddd %ymm2,%ymm\l0,%ymm12
vpaddd %ymm2,%ymm\l1,%ymm13 vpaddd %ymm2,%ymm\l1,%ymm13
vpaddd %ymm2,%ymm\l2,%ymm14 vpaddd %ymm2,%ymm\l2,%ymm14
@ -121,7 +121,7 @@ level2:
#PQCLEAN_DILITHIUM2_AVX2_zetas #PQCLEAN_DILITHIUM2_AVX2_zetas
vpmovzxdq 96(%rdx),%ymm3 vpmovzxdq 96(%rdx),%ymm3
butterfly 4,5,6,7,8,9,10,11 3,3 butterfly 4,5,6,7,8,9,10,11,3,3
#shuffle #shuffle
shuffle4 4,5,3,5 shuffle4 4,5,3,5
@ -135,7 +135,7 @@ vpbroadcastd 112(%rdx),%ymm14
vpbroadcastd 116(%rdx),%ymm15 vpbroadcastd 116(%rdx),%ymm15
vpblendd $0xF0,%ymm15,%ymm14,%ymm10 vpblendd $0xF0,%ymm15,%ymm14,%ymm10
butterfly 3,4,6,8,5,7,9,11 10,10 butterfly 3,4,6,8,5,7,9,11,10,10
#shuffle #shuffle
shuffle8 3,4,10,4 shuffle8 3,4,10,4
@ -147,7 +147,7 @@ level4:
#PQCLEAN_DILITHIUM2_AVX2_zetas #PQCLEAN_DILITHIUM2_AVX2_zetas
vpbroadcastd 120(%rdx),%ymm9 vpbroadcastd 120(%rdx),%ymm9
butterfly 10,3,6,5,4,8,7,11 9,9 butterfly 10,3,6,5,4,8,7,11,9,9
#store #store
vmovdqa %ymm10,(%rdi) vmovdqa %ymm10,(%rdi)
@ -233,7 +233,7 @@ level7:
#PQCLEAN_DILITHIUM2_AVX2_zetas #PQCLEAN_DILITHIUM2_AVX2_zetas
vpbroadcastd 24(%rdx),%ymm3 vpbroadcastd 24(%rdx),%ymm3
butterfly 4,5,6,7,8,9,10,11 3,3 butterfly 4,5,6,7,8,9,10,11,3,3
#consts #consts
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xdiv(%rip),%ymm3 vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xdiv(%rip),%ymm3

View File

@ -6,21 +6,31 @@
#include "nttconsts.h" #include "nttconsts.h"
#include "params.h" #include "params.h"
void PQCLEAN_DILITHIUM2_AVX2_ntt_levels0t2_avx(uint64_t *tmp, void PQCLEAN_DILITHIUM2_AVX2_ntt_levels0t2_avx(
uint64_t *tmp,
const uint32_t *a, const uint32_t *a,
const uint32_t *zetas); const uint32_t *zetas
void PQCLEAN_DILITHIUM2_AVX2_ntt_levels3t8_avx(uint32_t *a, );
void PQCLEAN_DILITHIUM2_AVX2_ntt_levels3t8_avx(
uint32_t *a,
const uint64_t *tmp, const uint64_t *tmp,
const uint32_t *zetas); const uint32_t *zetas
);
void PQCLEAN_DILITHIUM2_AVX2_invntt_levels0t4_avx(uint64_t *tmp, void PQCLEAN_DILITHIUM2_AVX2_invntt_levels0t4_avx(
uint64_t *tmp,
const uint32_t *a, const uint32_t *a,
const uint32_t *zetas_inv); const uint32_t *zetas_inv
void PQCLEAN_DILITHIUM2_AVX2_invntt_levels5t7_avx(uint32_t *a, );
void PQCLEAN_DILITHIUM2_AVX2_invntt_levels5t7_avx(
uint32_t *a,
const uint64_t *tmp, const uint64_t *tmp,
const uint32_t *zetas_inv); const uint32_t *zetas_inv
);
void PQCLEAN_DILITHIUM2_AVX2_pointwise_avx(uint32_t *c, const uint32_t *a, const uint32_t *b); void PQCLEAN_DILITHIUM2_AVX2_pointwise_avx(
void PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx(uint32_t *c, const uint32_t *a, const uint32_t *b); uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx(
uint32_t *c, const uint32_t *a, const uint32_t *b);
#endif #endif

View File

@ -1,6 +1,6 @@
.include "shuffle.inc" .include "shuffle.inc"
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 z0=3,z1=3,z2=3,z3=3 .macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,z0=3,z1=3,z2=3,z3=3
#mul #mul
vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0 vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0
vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1 vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1
@ -68,7 +68,7 @@ level1:
vpbroadcastd 4(%rdx),%ymm12 vpbroadcastd 4(%rdx),%ymm12
vpbroadcastd 8(%rdx),%ymm13 vpbroadcastd 8(%rdx),%ymm13
butterfly 4,5,8,9,6,7,10,11 12,12,13,13 butterfly 4,5,8,9,6,7,10,11,12,12,13,13
level2: level2:
#PQCLEAN_DILITHIUM2_AVX2_zetas #PQCLEAN_DILITHIUM2_AVX2_zetas
@ -77,7 +77,7 @@ vpbroadcastd 16(%rdx),%ymm13
vpbroadcastd 20(%rdx),%ymm14 vpbroadcastd 20(%rdx),%ymm14
vpbroadcastd 24(%rdx),%ymm15 vpbroadcastd 24(%rdx),%ymm15
butterfly 4,6,8,10,5,7,9,11 12,13,14,15 butterfly 4,6,8,10,5,7,9,11,12,13,14,15
#store #store
vmovdqa %ymm4,(%rdi) vmovdqa %ymm4,(%rdi)
@ -125,7 +125,7 @@ shuffle8 5,9,4,9
shuffle8 6,10,5,10 shuffle8 6,10,5,10
shuffle8 7,11,6,11 shuffle8 7,11,6,11
butterfly 3,8,4,9,5,10,6,11 12,12,12,12 butterfly 3,8,4,9,5,10,6,11,12,12,12,12
level5: level5:
#PQCLEAN_DILITHIUM2_AVX2_zetas #PQCLEAN_DILITHIUM2_AVX2_zetas
@ -136,14 +136,14 @@ shuffle4 8,10,3,10
shuffle4 4,6,8,6 shuffle4 4,6,8,6
shuffle4 9,11,4,11 shuffle4 9,11,4,11
butterfly 7,5,3,10,8,6,4,11 12,12,12,12 butterfly 7,5,3,10,8,6,4,11,12,12,12,12
level6: level6:
#PQCLEAN_DILITHIUM2_AVX2_zetas #PQCLEAN_DILITHIUM2_AVX2_zetas
vpmovzxdq 28(%rdx),%ymm12 vpmovzxdq 28(%rdx),%ymm12
vpmovzxdq 44(%rdx),%ymm13 vpmovzxdq 44(%rdx),%ymm13
butterfly 7,5,8,6,3,10,4,11 12,12,13,13 butterfly 7,5,8,6,3,10,4,11,12,12,13,13
level7: level7:
#PQCLEAN_DILITHIUM2_AVX2_zetas #PQCLEAN_DILITHIUM2_AVX2_zetas
@ -152,7 +152,7 @@ vpmovzxdq 76(%rdx),%ymm13
vpmovzxdq 92(%rdx),%ymm14 vpmovzxdq 92(%rdx),%ymm14
vpmovzxdq 108(%rdx),%ymm15 vpmovzxdq 108(%rdx),%ymm15
butterfly 7,3,8,4,5,10,6,11 12,13,14,15 butterfly 7,3,8,4,5,10,6,11,12,13,14,15
#store #store
vpsllq $32,%ymm5,%ymm5 vpsllq $32,%ymm5,%ymm5

View File

@ -1,6 +1,6 @@
.include "shuffle.inc" .include "shuffle.inc"
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3 z0=15,z1=3 .macro butterfly l0,l1,l2,l3,h0,h1,h2,h3,z0=15,z1=3
vpaddd %ymm2,%ymm\l0,%ymm12 vpaddd %ymm2,%ymm\l0,%ymm12
vpaddd %ymm2,%ymm\l1,%ymm13 vpaddd %ymm2,%ymm\l1,%ymm13
vpaddd %ymm2,%ymm\l2,%ymm14 vpaddd %ymm2,%ymm\l2,%ymm14
@ -121,7 +121,7 @@ level2:
#PQCLEAN_DILITHIUM3_AVX2_zetas #PQCLEAN_DILITHIUM3_AVX2_zetas
vpmovzxdq 96(%rdx),%ymm3 vpmovzxdq 96(%rdx),%ymm3
butterfly 4,5,6,7,8,9,10,11 3,3 butterfly 4,5,6,7,8,9,10,11,3,3
#shuffle #shuffle
shuffle4 4,5,3,5 shuffle4 4,5,3,5
@ -135,7 +135,7 @@ vpbroadcastd 112(%rdx),%ymm14
vpbroadcastd 116(%rdx),%ymm15 vpbroadcastd 116(%rdx),%ymm15
vpblendd $0xF0,%ymm15,%ymm14,%ymm10 vpblendd $0xF0,%ymm15,%ymm14,%ymm10
butterfly 3,4,6,8,5,7,9,11 10,10 butterfly 3,4,6,8,5,7,9,11,10,10
#shuffle #shuffle
shuffle8 3,4,10,4 shuffle8 3,4,10,4
@ -147,7 +147,7 @@ level4:
#PQCLEAN_DILITHIUM3_AVX2_zetas #PQCLEAN_DILITHIUM3_AVX2_zetas
vpbroadcastd 120(%rdx),%ymm9 vpbroadcastd 120(%rdx),%ymm9
butterfly 10,3,6,5,4,8,7,11 9,9 butterfly 10,3,6,5,4,8,7,11,9,9
#store #store
vmovdqa %ymm10,(%rdi) vmovdqa %ymm10,(%rdi)
@ -233,7 +233,7 @@ level7:
#PQCLEAN_DILITHIUM3_AVX2_zetas #PQCLEAN_DILITHIUM3_AVX2_zetas
vpbroadcastd 24(%rdx),%ymm3 vpbroadcastd 24(%rdx),%ymm3
butterfly 4,5,6,7,8,9,10,11 3,3 butterfly 4,5,6,7,8,9,10,11,3,3
#consts #consts
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xdiv(%rip),%ymm3 vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xdiv(%rip),%ymm3

View File

@ -6,21 +6,31 @@
#include "nttconsts.h" #include "nttconsts.h"
#include "params.h" #include "params.h"
void PQCLEAN_DILITHIUM3_AVX2_ntt_levels0t2_avx(uint64_t *tmp, void PQCLEAN_DILITHIUM3_AVX2_ntt_levels0t2_avx(
uint64_t *tmp,
const uint32_t *a, const uint32_t *a,
const uint32_t *zetas); const uint32_t *zetas
void PQCLEAN_DILITHIUM3_AVX2_ntt_levels3t8_avx(uint32_t *a, );
void PQCLEAN_DILITHIUM3_AVX2_ntt_levels3t8_avx(
uint32_t *a,
const uint64_t *tmp, const uint64_t *tmp,
const uint32_t *zetas); const uint32_t *zetas
);
void PQCLEAN_DILITHIUM3_AVX2_invntt_levels0t4_avx(uint64_t *tmp, void PQCLEAN_DILITHIUM3_AVX2_invntt_levels0t4_avx(
uint64_t *tmp,
const uint32_t *a, const uint32_t *a,
const uint32_t *zetas_inv); const uint32_t *zetas_inv
void PQCLEAN_DILITHIUM3_AVX2_invntt_levels5t7_avx(uint32_t *a, );
void PQCLEAN_DILITHIUM3_AVX2_invntt_levels5t7_avx(
uint32_t *a,
const uint64_t *tmp, const uint64_t *tmp,
const uint32_t *zetas_inv); const uint32_t *zetas_inv
);
void PQCLEAN_DILITHIUM3_AVX2_pointwise_avx(uint32_t *c, const uint32_t *a, const uint32_t *b); void PQCLEAN_DILITHIUM3_AVX2_pointwise_avx(
void PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx(uint32_t *c, const uint32_t *a, const uint32_t *b); uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx(
uint32_t *c, const uint32_t *a, const uint32_t *b);
#endif #endif

View File

@ -1,6 +1,6 @@
.include "shuffle.inc" .include "shuffle.inc"
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 z0=3,z1=3,z2=3,z3=3 .macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,z0=3,z1=3,z2=3,z3=3
#mul #mul
vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0 vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0
vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1 vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1
@ -68,7 +68,7 @@ level1:
vpbroadcastd 4(%rdx),%ymm12 vpbroadcastd 4(%rdx),%ymm12
vpbroadcastd 8(%rdx),%ymm13 vpbroadcastd 8(%rdx),%ymm13
butterfly 4,5,8,9,6,7,10,11 12,12,13,13 butterfly 4,5,8,9,6,7,10,11,12,12,13,13
level2: level2:
#PQCLEAN_DILITHIUM3_AVX2_zetas #PQCLEAN_DILITHIUM3_AVX2_zetas
@ -77,7 +77,7 @@ vpbroadcastd 16(%rdx),%ymm13
vpbroadcastd 20(%rdx),%ymm14 vpbroadcastd 20(%rdx),%ymm14
vpbroadcastd 24(%rdx),%ymm15 vpbroadcastd 24(%rdx),%ymm15
butterfly 4,6,8,10,5,7,9,11 12,13,14,15 butterfly 4,6,8,10,5,7,9,11,12,13,14,15
#store #store
vmovdqa %ymm4,(%rdi) vmovdqa %ymm4,(%rdi)
@ -125,7 +125,7 @@ shuffle8 5,9,4,9
shuffle8 6,10,5,10 shuffle8 6,10,5,10
shuffle8 7,11,6,11 shuffle8 7,11,6,11
butterfly 3,8,4,9,5,10,6,11 12,12,12,12 butterfly 3,8,4,9,5,10,6,11,12,12,12,12
level5: level5:
#PQCLEAN_DILITHIUM3_AVX2_zetas #PQCLEAN_DILITHIUM3_AVX2_zetas
@ -136,14 +136,14 @@ shuffle4 8,10,3,10
shuffle4 4,6,8,6 shuffle4 4,6,8,6
shuffle4 9,11,4,11 shuffle4 9,11,4,11
butterfly 7,5,3,10,8,6,4,11 12,12,12,12 butterfly 7,5,3,10,8,6,4,11,12,12,12,12
level6: level6:
#PQCLEAN_DILITHIUM3_AVX2_zetas #PQCLEAN_DILITHIUM3_AVX2_zetas
vpmovzxdq 28(%rdx),%ymm12 vpmovzxdq 28(%rdx),%ymm12
vpmovzxdq 44(%rdx),%ymm13 vpmovzxdq 44(%rdx),%ymm13
butterfly 7,5,8,6,3,10,4,11 12,12,13,13 butterfly 7,5,8,6,3,10,4,11,12,12,13,13
level7: level7:
#PQCLEAN_DILITHIUM3_AVX2_zetas #PQCLEAN_DILITHIUM3_AVX2_zetas
@ -152,7 +152,7 @@ vpmovzxdq 76(%rdx),%ymm13
vpmovzxdq 92(%rdx),%ymm14 vpmovzxdq 92(%rdx),%ymm14
vpmovzxdq 108(%rdx),%ymm15 vpmovzxdq 108(%rdx),%ymm15
butterfly 7,3,8,4,5,10,6,11 12,13,14,15 butterfly 7,3,8,4,5,10,6,11,12,13,14,15
#store #store
vpsllq $32,%ymm5,%ymm5 vpsllq $32,%ymm5,%ymm5

View File

@ -1,6 +1,6 @@
.include "shuffle.inc" .include "shuffle.inc"
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3 z0=15,z1=3 .macro butterfly l0,l1,l2,l3,h0,h1,h2,h3,z0=15,z1=3
vpaddd %ymm2,%ymm\l0,%ymm12 vpaddd %ymm2,%ymm\l0,%ymm12
vpaddd %ymm2,%ymm\l1,%ymm13 vpaddd %ymm2,%ymm\l1,%ymm13
vpaddd %ymm2,%ymm\l2,%ymm14 vpaddd %ymm2,%ymm\l2,%ymm14
@ -121,7 +121,7 @@ level2:
#PQCLEAN_DILITHIUM4_AVX2_zetas #PQCLEAN_DILITHIUM4_AVX2_zetas
vpmovzxdq 96(%rdx),%ymm3 vpmovzxdq 96(%rdx),%ymm3
butterfly 4,5,6,7,8,9,10,11 3,3 butterfly 4,5,6,7,8,9,10,11,3,3
#shuffle #shuffle
shuffle4 4,5,3,5 shuffle4 4,5,3,5
@ -135,7 +135,7 @@ vpbroadcastd 112(%rdx),%ymm14
vpbroadcastd 116(%rdx),%ymm15 vpbroadcastd 116(%rdx),%ymm15
vpblendd $0xF0,%ymm15,%ymm14,%ymm10 vpblendd $0xF0,%ymm15,%ymm14,%ymm10
butterfly 3,4,6,8,5,7,9,11 10,10 butterfly 3,4,6,8,5,7,9,11,10,10
#shuffle #shuffle
shuffle8 3,4,10,4 shuffle8 3,4,10,4
@ -147,7 +147,7 @@ level4:
#PQCLEAN_DILITHIUM4_AVX2_zetas #PQCLEAN_DILITHIUM4_AVX2_zetas
vpbroadcastd 120(%rdx),%ymm9 vpbroadcastd 120(%rdx),%ymm9
butterfly 10,3,6,5,4,8,7,11 9,9 butterfly 10,3,6,5,4,8,7,11,9,9
#store #store
vmovdqa %ymm10,(%rdi) vmovdqa %ymm10,(%rdi)
@ -233,7 +233,7 @@ level7:
#PQCLEAN_DILITHIUM4_AVX2_zetas #PQCLEAN_DILITHIUM4_AVX2_zetas
vpbroadcastd 24(%rdx),%ymm3 vpbroadcastd 24(%rdx),%ymm3
butterfly 4,5,6,7,8,9,10,11 3,3 butterfly 4,5,6,7,8,9,10,11,3,3
#consts #consts
vmovdqa _PQCLEAN_DILITHIUM4_AVX2_8xdiv(%rip),%ymm3 vmovdqa _PQCLEAN_DILITHIUM4_AVX2_8xdiv(%rip),%ymm3

View File

@ -6,21 +6,31 @@
#include "nttconsts.h" #include "nttconsts.h"
#include "params.h" #include "params.h"
void PQCLEAN_DILITHIUM4_AVX2_ntt_levels0t2_avx(uint64_t *tmp, void PQCLEAN_DILITHIUM4_AVX2_ntt_levels0t2_avx(
uint64_t *tmp,
const uint32_t *a, const uint32_t *a,
const uint32_t *zetas); const uint32_t *zetas
void PQCLEAN_DILITHIUM4_AVX2_ntt_levels3t8_avx(uint32_t *a, );
void PQCLEAN_DILITHIUM4_AVX2_ntt_levels3t8_avx(
uint32_t *a,
const uint64_t *tmp, const uint64_t *tmp,
const uint32_t *zetas); const uint32_t *zetas
);
void PQCLEAN_DILITHIUM4_AVX2_invntt_levels0t4_avx(uint64_t *tmp, void PQCLEAN_DILITHIUM4_AVX2_invntt_levels0t4_avx(
uint64_t *tmp,
const uint32_t *a, const uint32_t *a,
const uint32_t *zetas_inv); const uint32_t *zetas_inv
void PQCLEAN_DILITHIUM4_AVX2_invntt_levels5t7_avx(uint32_t *a, );
void PQCLEAN_DILITHIUM4_AVX2_invntt_levels5t7_avx(
uint32_t *a,
const uint64_t *tmp, const uint64_t *tmp,
const uint32_t *zetas_inv); const uint32_t *zetas_inv
);
void PQCLEAN_DILITHIUM4_AVX2_pointwise_avx(uint32_t *c, const uint32_t *a, const uint32_t *b); void PQCLEAN_DILITHIUM4_AVX2_pointwise_avx(
void PQCLEAN_DILITHIUM4_AVX2_pointwise_acc_avx(uint32_t *c, const uint32_t *a, const uint32_t *b); uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM4_AVX2_pointwise_acc_avx(
uint32_t *c, const uint32_t *a, const uint32_t *b);
#endif #endif

View File

@ -1,6 +1,6 @@
.include "shuffle.inc" .include "shuffle.inc"
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 z0=3,z1=3,z2=3,z3=3 .macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,z0=3,z1=3,z2=3,z3=3
#mul #mul
vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0 vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0
vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1 vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1
@ -68,7 +68,7 @@ level1:
vpbroadcastd 4(%rdx),%ymm12 vpbroadcastd 4(%rdx),%ymm12
vpbroadcastd 8(%rdx),%ymm13 vpbroadcastd 8(%rdx),%ymm13
butterfly 4,5,8,9,6,7,10,11 12,12,13,13 butterfly 4,5,8,9,6,7,10,11,12,12,13,13
level2: level2:
#PQCLEAN_DILITHIUM4_AVX2_zetas #PQCLEAN_DILITHIUM4_AVX2_zetas
@ -77,7 +77,7 @@ vpbroadcastd 16(%rdx),%ymm13
vpbroadcastd 20(%rdx),%ymm14 vpbroadcastd 20(%rdx),%ymm14
vpbroadcastd 24(%rdx),%ymm15 vpbroadcastd 24(%rdx),%ymm15
butterfly 4,6,8,10,5,7,9,11 12,13,14,15 butterfly 4,6,8,10,5,7,9,11,12,13,14,15
#store #store
vmovdqa %ymm4,(%rdi) vmovdqa %ymm4,(%rdi)
@ -125,7 +125,7 @@ shuffle8 5,9,4,9
shuffle8 6,10,5,10 shuffle8 6,10,5,10
shuffle8 7,11,6,11 shuffle8 7,11,6,11
butterfly 3,8,4,9,5,10,6,11 12,12,12,12 butterfly 3,8,4,9,5,10,6,11,12,12,12,12
level5: level5:
#PQCLEAN_DILITHIUM4_AVX2_zetas #PQCLEAN_DILITHIUM4_AVX2_zetas
@ -136,14 +136,14 @@ shuffle4 8,10,3,10
shuffle4 4,6,8,6 shuffle4 4,6,8,6
shuffle4 9,11,4,11 shuffle4 9,11,4,11
butterfly 7,5,3,10,8,6,4,11 12,12,12,12 butterfly 7,5,3,10,8,6,4,11,12,12,12,12
level6: level6:
#PQCLEAN_DILITHIUM4_AVX2_zetas #PQCLEAN_DILITHIUM4_AVX2_zetas
vpmovzxdq 28(%rdx),%ymm12 vpmovzxdq 28(%rdx),%ymm12
vpmovzxdq 44(%rdx),%ymm13 vpmovzxdq 44(%rdx),%ymm13
butterfly 7,5,8,6,3,10,4,11 12,12,13,13 butterfly 7,5,8,6,3,10,4,11,12,12,13,13
level7: level7:
#PQCLEAN_DILITHIUM4_AVX2_zetas #PQCLEAN_DILITHIUM4_AVX2_zetas
@ -152,7 +152,7 @@ vpmovzxdq 76(%rdx),%ymm13
vpmovzxdq 92(%rdx),%ymm14 vpmovzxdq 92(%rdx),%ymm14
vpmovzxdq 108(%rdx),%ymm15 vpmovzxdq 108(%rdx),%ymm15
butterfly 7,3,8,4,5,10,6,11 12,13,14,15 butterfly 7,3,8,4,5,10,6,11,12,13,14,15
#store #store
vpsllq $32,%ymm5,%ymm5 vpsllq $32,%ymm5,%ymm5