Use more robust macro syntax
This commit is contained in:
parent
a1fb93da73
commit
dbf2d34235
@ -1,6 +1,6 @@
|
|||||||
.include "shuffle.inc"
|
.include "shuffle.inc"
|
||||||
|
|
||||||
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3 z0=15,z1=3
|
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3,z0=15,z1=3
|
||||||
vpaddd %ymm2,%ymm\l0,%ymm12
|
vpaddd %ymm2,%ymm\l0,%ymm12
|
||||||
vpaddd %ymm2,%ymm\l1,%ymm13
|
vpaddd %ymm2,%ymm\l1,%ymm13
|
||||||
vpaddd %ymm2,%ymm\l2,%ymm14
|
vpaddd %ymm2,%ymm\l2,%ymm14
|
||||||
@ -121,7 +121,7 @@ level2:
|
|||||||
#PQCLEAN_DILITHIUM2_AVX2_zetas
|
#PQCLEAN_DILITHIUM2_AVX2_zetas
|
||||||
vpmovzxdq 96(%rdx),%ymm3
|
vpmovzxdq 96(%rdx),%ymm3
|
||||||
|
|
||||||
butterfly 4,5,6,7,8,9,10,11 3,3
|
butterfly 4,5,6,7,8,9,10,11,3,3
|
||||||
|
|
||||||
#shuffle
|
#shuffle
|
||||||
shuffle4 4,5,3,5
|
shuffle4 4,5,3,5
|
||||||
@ -135,7 +135,7 @@ vpbroadcastd 112(%rdx),%ymm14
|
|||||||
vpbroadcastd 116(%rdx),%ymm15
|
vpbroadcastd 116(%rdx),%ymm15
|
||||||
vpblendd $0xF0,%ymm15,%ymm14,%ymm10
|
vpblendd $0xF0,%ymm15,%ymm14,%ymm10
|
||||||
|
|
||||||
butterfly 3,4,6,8,5,7,9,11 10,10
|
butterfly 3,4,6,8,5,7,9,11,10,10
|
||||||
|
|
||||||
#shuffle
|
#shuffle
|
||||||
shuffle8 3,4,10,4
|
shuffle8 3,4,10,4
|
||||||
@ -147,7 +147,7 @@ level4:
|
|||||||
#PQCLEAN_DILITHIUM2_AVX2_zetas
|
#PQCLEAN_DILITHIUM2_AVX2_zetas
|
||||||
vpbroadcastd 120(%rdx),%ymm9
|
vpbroadcastd 120(%rdx),%ymm9
|
||||||
|
|
||||||
butterfly 10,3,6,5,4,8,7,11 9,9
|
butterfly 10,3,6,5,4,8,7,11,9,9
|
||||||
|
|
||||||
#store
|
#store
|
||||||
vmovdqa %ymm10,(%rdi)
|
vmovdqa %ymm10,(%rdi)
|
||||||
@ -233,7 +233,7 @@ level7:
|
|||||||
#PQCLEAN_DILITHIUM2_AVX2_zetas
|
#PQCLEAN_DILITHIUM2_AVX2_zetas
|
||||||
vpbroadcastd 24(%rdx),%ymm3
|
vpbroadcastd 24(%rdx),%ymm3
|
||||||
|
|
||||||
butterfly 4,5,6,7,8,9,10,11 3,3
|
butterfly 4,5,6,7,8,9,10,11,3,3
|
||||||
|
|
||||||
#consts
|
#consts
|
||||||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xdiv(%rip),%ymm3
|
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xdiv(%rip),%ymm3
|
||||||
|
@ -6,21 +6,31 @@
|
|||||||
#include "nttconsts.h"
|
#include "nttconsts.h"
|
||||||
#include "params.h"
|
#include "params.h"
|
||||||
|
|
||||||
void PQCLEAN_DILITHIUM2_AVX2_ntt_levels0t2_avx(uint64_t *tmp,
|
void PQCLEAN_DILITHIUM2_AVX2_ntt_levels0t2_avx(
|
||||||
|
uint64_t *tmp,
|
||||||
const uint32_t *a,
|
const uint32_t *a,
|
||||||
const uint32_t *zetas);
|
const uint32_t *zetas
|
||||||
void PQCLEAN_DILITHIUM2_AVX2_ntt_levels3t8_avx(uint32_t *a,
|
);
|
||||||
|
void PQCLEAN_DILITHIUM2_AVX2_ntt_levels3t8_avx(
|
||||||
|
uint32_t *a,
|
||||||
const uint64_t *tmp,
|
const uint64_t *tmp,
|
||||||
const uint32_t *zetas);
|
const uint32_t *zetas
|
||||||
|
);
|
||||||
|
|
||||||
void PQCLEAN_DILITHIUM2_AVX2_invntt_levels0t4_avx(uint64_t *tmp,
|
void PQCLEAN_DILITHIUM2_AVX2_invntt_levels0t4_avx(
|
||||||
|
uint64_t *tmp,
|
||||||
const uint32_t *a,
|
const uint32_t *a,
|
||||||
const uint32_t *zetas_inv);
|
const uint32_t *zetas_inv
|
||||||
void PQCLEAN_DILITHIUM2_AVX2_invntt_levels5t7_avx(uint32_t *a,
|
);
|
||||||
|
void PQCLEAN_DILITHIUM2_AVX2_invntt_levels5t7_avx(
|
||||||
|
uint32_t *a,
|
||||||
const uint64_t *tmp,
|
const uint64_t *tmp,
|
||||||
const uint32_t *zetas_inv);
|
const uint32_t *zetas_inv
|
||||||
|
);
|
||||||
|
|
||||||
void PQCLEAN_DILITHIUM2_AVX2_pointwise_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
|
void PQCLEAN_DILITHIUM2_AVX2_pointwise_avx(
|
||||||
void PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
|
uint32_t *c, const uint32_t *a, const uint32_t *b);
|
||||||
|
void PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx(
|
||||||
|
uint32_t *c, const uint32_t *a, const uint32_t *b);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
.include "shuffle.inc"
|
.include "shuffle.inc"
|
||||||
|
|
||||||
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 z0=3,z1=3,z2=3,z3=3
|
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,z0=3,z1=3,z2=3,z3=3
|
||||||
#mul
|
#mul
|
||||||
vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0
|
vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0
|
||||||
vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1
|
vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1
|
||||||
@ -68,7 +68,7 @@ level1:
|
|||||||
vpbroadcastd 4(%rdx),%ymm12
|
vpbroadcastd 4(%rdx),%ymm12
|
||||||
vpbroadcastd 8(%rdx),%ymm13
|
vpbroadcastd 8(%rdx),%ymm13
|
||||||
|
|
||||||
butterfly 4,5,8,9,6,7,10,11 12,12,13,13
|
butterfly 4,5,8,9,6,7,10,11,12,12,13,13
|
||||||
|
|
||||||
level2:
|
level2:
|
||||||
#PQCLEAN_DILITHIUM2_AVX2_zetas
|
#PQCLEAN_DILITHIUM2_AVX2_zetas
|
||||||
@ -77,7 +77,7 @@ vpbroadcastd 16(%rdx),%ymm13
|
|||||||
vpbroadcastd 20(%rdx),%ymm14
|
vpbroadcastd 20(%rdx),%ymm14
|
||||||
vpbroadcastd 24(%rdx),%ymm15
|
vpbroadcastd 24(%rdx),%ymm15
|
||||||
|
|
||||||
butterfly 4,6,8,10,5,7,9,11 12,13,14,15
|
butterfly 4,6,8,10,5,7,9,11,12,13,14,15
|
||||||
|
|
||||||
#store
|
#store
|
||||||
vmovdqa %ymm4,(%rdi)
|
vmovdqa %ymm4,(%rdi)
|
||||||
@ -125,7 +125,7 @@ shuffle8 5,9,4,9
|
|||||||
shuffle8 6,10,5,10
|
shuffle8 6,10,5,10
|
||||||
shuffle8 7,11,6,11
|
shuffle8 7,11,6,11
|
||||||
|
|
||||||
butterfly 3,8,4,9,5,10,6,11 12,12,12,12
|
butterfly 3,8,4,9,5,10,6,11,12,12,12,12
|
||||||
|
|
||||||
level5:
|
level5:
|
||||||
#PQCLEAN_DILITHIUM2_AVX2_zetas
|
#PQCLEAN_DILITHIUM2_AVX2_zetas
|
||||||
@ -136,14 +136,14 @@ shuffle4 8,10,3,10
|
|||||||
shuffle4 4,6,8,6
|
shuffle4 4,6,8,6
|
||||||
shuffle4 9,11,4,11
|
shuffle4 9,11,4,11
|
||||||
|
|
||||||
butterfly 7,5,3,10,8,6,4,11 12,12,12,12
|
butterfly 7,5,3,10,8,6,4,11,12,12,12,12
|
||||||
|
|
||||||
level6:
|
level6:
|
||||||
#PQCLEAN_DILITHIUM2_AVX2_zetas
|
#PQCLEAN_DILITHIUM2_AVX2_zetas
|
||||||
vpmovzxdq 28(%rdx),%ymm12
|
vpmovzxdq 28(%rdx),%ymm12
|
||||||
vpmovzxdq 44(%rdx),%ymm13
|
vpmovzxdq 44(%rdx),%ymm13
|
||||||
|
|
||||||
butterfly 7,5,8,6,3,10,4,11 12,12,13,13
|
butterfly 7,5,8,6,3,10,4,11,12,12,13,13
|
||||||
|
|
||||||
level7:
|
level7:
|
||||||
#PQCLEAN_DILITHIUM2_AVX2_zetas
|
#PQCLEAN_DILITHIUM2_AVX2_zetas
|
||||||
@ -152,7 +152,7 @@ vpmovzxdq 76(%rdx),%ymm13
|
|||||||
vpmovzxdq 92(%rdx),%ymm14
|
vpmovzxdq 92(%rdx),%ymm14
|
||||||
vpmovzxdq 108(%rdx),%ymm15
|
vpmovzxdq 108(%rdx),%ymm15
|
||||||
|
|
||||||
butterfly 7,3,8,4,5,10,6,11 12,13,14,15
|
butterfly 7,3,8,4,5,10,6,11,12,13,14,15
|
||||||
|
|
||||||
#store
|
#store
|
||||||
vpsllq $32,%ymm5,%ymm5
|
vpsllq $32,%ymm5,%ymm5
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
.include "shuffle.inc"
|
.include "shuffle.inc"
|
||||||
|
|
||||||
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3 z0=15,z1=3
|
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3,z0=15,z1=3
|
||||||
vpaddd %ymm2,%ymm\l0,%ymm12
|
vpaddd %ymm2,%ymm\l0,%ymm12
|
||||||
vpaddd %ymm2,%ymm\l1,%ymm13
|
vpaddd %ymm2,%ymm\l1,%ymm13
|
||||||
vpaddd %ymm2,%ymm\l2,%ymm14
|
vpaddd %ymm2,%ymm\l2,%ymm14
|
||||||
@ -121,7 +121,7 @@ level2:
|
|||||||
#PQCLEAN_DILITHIUM3_AVX2_zetas
|
#PQCLEAN_DILITHIUM3_AVX2_zetas
|
||||||
vpmovzxdq 96(%rdx),%ymm3
|
vpmovzxdq 96(%rdx),%ymm3
|
||||||
|
|
||||||
butterfly 4,5,6,7,8,9,10,11 3,3
|
butterfly 4,5,6,7,8,9,10,11,3,3
|
||||||
|
|
||||||
#shuffle
|
#shuffle
|
||||||
shuffle4 4,5,3,5
|
shuffle4 4,5,3,5
|
||||||
@ -135,7 +135,7 @@ vpbroadcastd 112(%rdx),%ymm14
|
|||||||
vpbroadcastd 116(%rdx),%ymm15
|
vpbroadcastd 116(%rdx),%ymm15
|
||||||
vpblendd $0xF0,%ymm15,%ymm14,%ymm10
|
vpblendd $0xF0,%ymm15,%ymm14,%ymm10
|
||||||
|
|
||||||
butterfly 3,4,6,8,5,7,9,11 10,10
|
butterfly 3,4,6,8,5,7,9,11,10,10
|
||||||
|
|
||||||
#shuffle
|
#shuffle
|
||||||
shuffle8 3,4,10,4
|
shuffle8 3,4,10,4
|
||||||
@ -147,7 +147,7 @@ level4:
|
|||||||
#PQCLEAN_DILITHIUM3_AVX2_zetas
|
#PQCLEAN_DILITHIUM3_AVX2_zetas
|
||||||
vpbroadcastd 120(%rdx),%ymm9
|
vpbroadcastd 120(%rdx),%ymm9
|
||||||
|
|
||||||
butterfly 10,3,6,5,4,8,7,11 9,9
|
butterfly 10,3,6,5,4,8,7,11,9,9
|
||||||
|
|
||||||
#store
|
#store
|
||||||
vmovdqa %ymm10,(%rdi)
|
vmovdqa %ymm10,(%rdi)
|
||||||
@ -233,7 +233,7 @@ level7:
|
|||||||
#PQCLEAN_DILITHIUM3_AVX2_zetas
|
#PQCLEAN_DILITHIUM3_AVX2_zetas
|
||||||
vpbroadcastd 24(%rdx),%ymm3
|
vpbroadcastd 24(%rdx),%ymm3
|
||||||
|
|
||||||
butterfly 4,5,6,7,8,9,10,11 3,3
|
butterfly 4,5,6,7,8,9,10,11,3,3
|
||||||
|
|
||||||
#consts
|
#consts
|
||||||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xdiv(%rip),%ymm3
|
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xdiv(%rip),%ymm3
|
||||||
|
@ -6,21 +6,31 @@
|
|||||||
#include "nttconsts.h"
|
#include "nttconsts.h"
|
||||||
#include "params.h"
|
#include "params.h"
|
||||||
|
|
||||||
void PQCLEAN_DILITHIUM3_AVX2_ntt_levels0t2_avx(uint64_t *tmp,
|
void PQCLEAN_DILITHIUM3_AVX2_ntt_levels0t2_avx(
|
||||||
|
uint64_t *tmp,
|
||||||
const uint32_t *a,
|
const uint32_t *a,
|
||||||
const uint32_t *zetas);
|
const uint32_t *zetas
|
||||||
void PQCLEAN_DILITHIUM3_AVX2_ntt_levels3t8_avx(uint32_t *a,
|
);
|
||||||
|
void PQCLEAN_DILITHIUM3_AVX2_ntt_levels3t8_avx(
|
||||||
|
uint32_t *a,
|
||||||
const uint64_t *tmp,
|
const uint64_t *tmp,
|
||||||
const uint32_t *zetas);
|
const uint32_t *zetas
|
||||||
|
);
|
||||||
|
|
||||||
void PQCLEAN_DILITHIUM3_AVX2_invntt_levels0t4_avx(uint64_t *tmp,
|
void PQCLEAN_DILITHIUM3_AVX2_invntt_levels0t4_avx(
|
||||||
|
uint64_t *tmp,
|
||||||
const uint32_t *a,
|
const uint32_t *a,
|
||||||
const uint32_t *zetas_inv);
|
const uint32_t *zetas_inv
|
||||||
void PQCLEAN_DILITHIUM3_AVX2_invntt_levels5t7_avx(uint32_t *a,
|
);
|
||||||
|
void PQCLEAN_DILITHIUM3_AVX2_invntt_levels5t7_avx(
|
||||||
|
uint32_t *a,
|
||||||
const uint64_t *tmp,
|
const uint64_t *tmp,
|
||||||
const uint32_t *zetas_inv);
|
const uint32_t *zetas_inv
|
||||||
|
);
|
||||||
|
|
||||||
void PQCLEAN_DILITHIUM3_AVX2_pointwise_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
|
void PQCLEAN_DILITHIUM3_AVX2_pointwise_avx(
|
||||||
void PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
|
uint32_t *c, const uint32_t *a, const uint32_t *b);
|
||||||
|
void PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx(
|
||||||
|
uint32_t *c, const uint32_t *a, const uint32_t *b);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
.include "shuffle.inc"
|
.include "shuffle.inc"
|
||||||
|
|
||||||
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 z0=3,z1=3,z2=3,z3=3
|
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,z0=3,z1=3,z2=3,z3=3
|
||||||
#mul
|
#mul
|
||||||
vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0
|
vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0
|
||||||
vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1
|
vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1
|
||||||
@ -68,7 +68,7 @@ level1:
|
|||||||
vpbroadcastd 4(%rdx),%ymm12
|
vpbroadcastd 4(%rdx),%ymm12
|
||||||
vpbroadcastd 8(%rdx),%ymm13
|
vpbroadcastd 8(%rdx),%ymm13
|
||||||
|
|
||||||
butterfly 4,5,8,9,6,7,10,11 12,12,13,13
|
butterfly 4,5,8,9,6,7,10,11,12,12,13,13
|
||||||
|
|
||||||
level2:
|
level2:
|
||||||
#PQCLEAN_DILITHIUM3_AVX2_zetas
|
#PQCLEAN_DILITHIUM3_AVX2_zetas
|
||||||
@ -77,7 +77,7 @@ vpbroadcastd 16(%rdx),%ymm13
|
|||||||
vpbroadcastd 20(%rdx),%ymm14
|
vpbroadcastd 20(%rdx),%ymm14
|
||||||
vpbroadcastd 24(%rdx),%ymm15
|
vpbroadcastd 24(%rdx),%ymm15
|
||||||
|
|
||||||
butterfly 4,6,8,10,5,7,9,11 12,13,14,15
|
butterfly 4,6,8,10,5,7,9,11,12,13,14,15
|
||||||
|
|
||||||
#store
|
#store
|
||||||
vmovdqa %ymm4,(%rdi)
|
vmovdqa %ymm4,(%rdi)
|
||||||
@ -125,7 +125,7 @@ shuffle8 5,9,4,9
|
|||||||
shuffle8 6,10,5,10
|
shuffle8 6,10,5,10
|
||||||
shuffle8 7,11,6,11
|
shuffle8 7,11,6,11
|
||||||
|
|
||||||
butterfly 3,8,4,9,5,10,6,11 12,12,12,12
|
butterfly 3,8,4,9,5,10,6,11,12,12,12,12
|
||||||
|
|
||||||
level5:
|
level5:
|
||||||
#PQCLEAN_DILITHIUM3_AVX2_zetas
|
#PQCLEAN_DILITHIUM3_AVX2_zetas
|
||||||
@ -136,14 +136,14 @@ shuffle4 8,10,3,10
|
|||||||
shuffle4 4,6,8,6
|
shuffle4 4,6,8,6
|
||||||
shuffle4 9,11,4,11
|
shuffle4 9,11,4,11
|
||||||
|
|
||||||
butterfly 7,5,3,10,8,6,4,11 12,12,12,12
|
butterfly 7,5,3,10,8,6,4,11,12,12,12,12
|
||||||
|
|
||||||
level6:
|
level6:
|
||||||
#PQCLEAN_DILITHIUM3_AVX2_zetas
|
#PQCLEAN_DILITHIUM3_AVX2_zetas
|
||||||
vpmovzxdq 28(%rdx),%ymm12
|
vpmovzxdq 28(%rdx),%ymm12
|
||||||
vpmovzxdq 44(%rdx),%ymm13
|
vpmovzxdq 44(%rdx),%ymm13
|
||||||
|
|
||||||
butterfly 7,5,8,6,3,10,4,11 12,12,13,13
|
butterfly 7,5,8,6,3,10,4,11,12,12,13,13
|
||||||
|
|
||||||
level7:
|
level7:
|
||||||
#PQCLEAN_DILITHIUM3_AVX2_zetas
|
#PQCLEAN_DILITHIUM3_AVX2_zetas
|
||||||
@ -152,7 +152,7 @@ vpmovzxdq 76(%rdx),%ymm13
|
|||||||
vpmovzxdq 92(%rdx),%ymm14
|
vpmovzxdq 92(%rdx),%ymm14
|
||||||
vpmovzxdq 108(%rdx),%ymm15
|
vpmovzxdq 108(%rdx),%ymm15
|
||||||
|
|
||||||
butterfly 7,3,8,4,5,10,6,11 12,13,14,15
|
butterfly 7,3,8,4,5,10,6,11,12,13,14,15
|
||||||
|
|
||||||
#store
|
#store
|
||||||
vpsllq $32,%ymm5,%ymm5
|
vpsllq $32,%ymm5,%ymm5
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
.include "shuffle.inc"
|
.include "shuffle.inc"
|
||||||
|
|
||||||
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3 z0=15,z1=3
|
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3,z0=15,z1=3
|
||||||
vpaddd %ymm2,%ymm\l0,%ymm12
|
vpaddd %ymm2,%ymm\l0,%ymm12
|
||||||
vpaddd %ymm2,%ymm\l1,%ymm13
|
vpaddd %ymm2,%ymm\l1,%ymm13
|
||||||
vpaddd %ymm2,%ymm\l2,%ymm14
|
vpaddd %ymm2,%ymm\l2,%ymm14
|
||||||
@ -121,7 +121,7 @@ level2:
|
|||||||
#PQCLEAN_DILITHIUM4_AVX2_zetas
|
#PQCLEAN_DILITHIUM4_AVX2_zetas
|
||||||
vpmovzxdq 96(%rdx),%ymm3
|
vpmovzxdq 96(%rdx),%ymm3
|
||||||
|
|
||||||
butterfly 4,5,6,7,8,9,10,11 3,3
|
butterfly 4,5,6,7,8,9,10,11,3,3
|
||||||
|
|
||||||
#shuffle
|
#shuffle
|
||||||
shuffle4 4,5,3,5
|
shuffle4 4,5,3,5
|
||||||
@ -135,7 +135,7 @@ vpbroadcastd 112(%rdx),%ymm14
|
|||||||
vpbroadcastd 116(%rdx),%ymm15
|
vpbroadcastd 116(%rdx),%ymm15
|
||||||
vpblendd $0xF0,%ymm15,%ymm14,%ymm10
|
vpblendd $0xF0,%ymm15,%ymm14,%ymm10
|
||||||
|
|
||||||
butterfly 3,4,6,8,5,7,9,11 10,10
|
butterfly 3,4,6,8,5,7,9,11,10,10
|
||||||
|
|
||||||
#shuffle
|
#shuffle
|
||||||
shuffle8 3,4,10,4
|
shuffle8 3,4,10,4
|
||||||
@ -147,7 +147,7 @@ level4:
|
|||||||
#PQCLEAN_DILITHIUM4_AVX2_zetas
|
#PQCLEAN_DILITHIUM4_AVX2_zetas
|
||||||
vpbroadcastd 120(%rdx),%ymm9
|
vpbroadcastd 120(%rdx),%ymm9
|
||||||
|
|
||||||
butterfly 10,3,6,5,4,8,7,11 9,9
|
butterfly 10,3,6,5,4,8,7,11,9,9
|
||||||
|
|
||||||
#store
|
#store
|
||||||
vmovdqa %ymm10,(%rdi)
|
vmovdqa %ymm10,(%rdi)
|
||||||
@ -233,7 +233,7 @@ level7:
|
|||||||
#PQCLEAN_DILITHIUM4_AVX2_zetas
|
#PQCLEAN_DILITHIUM4_AVX2_zetas
|
||||||
vpbroadcastd 24(%rdx),%ymm3
|
vpbroadcastd 24(%rdx),%ymm3
|
||||||
|
|
||||||
butterfly 4,5,6,7,8,9,10,11 3,3
|
butterfly 4,5,6,7,8,9,10,11,3,3
|
||||||
|
|
||||||
#consts
|
#consts
|
||||||
vmovdqa _PQCLEAN_DILITHIUM4_AVX2_8xdiv(%rip),%ymm3
|
vmovdqa _PQCLEAN_DILITHIUM4_AVX2_8xdiv(%rip),%ymm3
|
||||||
|
@ -6,21 +6,31 @@
|
|||||||
#include "nttconsts.h"
|
#include "nttconsts.h"
|
||||||
#include "params.h"
|
#include "params.h"
|
||||||
|
|
||||||
void PQCLEAN_DILITHIUM4_AVX2_ntt_levels0t2_avx(uint64_t *tmp,
|
void PQCLEAN_DILITHIUM4_AVX2_ntt_levels0t2_avx(
|
||||||
|
uint64_t *tmp,
|
||||||
const uint32_t *a,
|
const uint32_t *a,
|
||||||
const uint32_t *zetas);
|
const uint32_t *zetas
|
||||||
void PQCLEAN_DILITHIUM4_AVX2_ntt_levels3t8_avx(uint32_t *a,
|
);
|
||||||
|
void PQCLEAN_DILITHIUM4_AVX2_ntt_levels3t8_avx(
|
||||||
|
uint32_t *a,
|
||||||
const uint64_t *tmp,
|
const uint64_t *tmp,
|
||||||
const uint32_t *zetas);
|
const uint32_t *zetas
|
||||||
|
);
|
||||||
|
|
||||||
void PQCLEAN_DILITHIUM4_AVX2_invntt_levels0t4_avx(uint64_t *tmp,
|
void PQCLEAN_DILITHIUM4_AVX2_invntt_levels0t4_avx(
|
||||||
|
uint64_t *tmp,
|
||||||
const uint32_t *a,
|
const uint32_t *a,
|
||||||
const uint32_t *zetas_inv);
|
const uint32_t *zetas_inv
|
||||||
void PQCLEAN_DILITHIUM4_AVX2_invntt_levels5t7_avx(uint32_t *a,
|
);
|
||||||
|
void PQCLEAN_DILITHIUM4_AVX2_invntt_levels5t7_avx(
|
||||||
|
uint32_t *a,
|
||||||
const uint64_t *tmp,
|
const uint64_t *tmp,
|
||||||
const uint32_t *zetas_inv);
|
const uint32_t *zetas_inv
|
||||||
|
);
|
||||||
|
|
||||||
void PQCLEAN_DILITHIUM4_AVX2_pointwise_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
|
void PQCLEAN_DILITHIUM4_AVX2_pointwise_avx(
|
||||||
void PQCLEAN_DILITHIUM4_AVX2_pointwise_acc_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
|
uint32_t *c, const uint32_t *a, const uint32_t *b);
|
||||||
|
void PQCLEAN_DILITHIUM4_AVX2_pointwise_acc_avx(
|
||||||
|
uint32_t *c, const uint32_t *a, const uint32_t *b);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
.include "shuffle.inc"
|
.include "shuffle.inc"
|
||||||
|
|
||||||
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 z0=3,z1=3,z2=3,z3=3
|
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,z0=3,z1=3,z2=3,z3=3
|
||||||
#mul
|
#mul
|
||||||
vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0
|
vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0
|
||||||
vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1
|
vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1
|
||||||
@ -68,7 +68,7 @@ level1:
|
|||||||
vpbroadcastd 4(%rdx),%ymm12
|
vpbroadcastd 4(%rdx),%ymm12
|
||||||
vpbroadcastd 8(%rdx),%ymm13
|
vpbroadcastd 8(%rdx),%ymm13
|
||||||
|
|
||||||
butterfly 4,5,8,9,6,7,10,11 12,12,13,13
|
butterfly 4,5,8,9,6,7,10,11,12,12,13,13
|
||||||
|
|
||||||
level2:
|
level2:
|
||||||
#PQCLEAN_DILITHIUM4_AVX2_zetas
|
#PQCLEAN_DILITHIUM4_AVX2_zetas
|
||||||
@ -77,7 +77,7 @@ vpbroadcastd 16(%rdx),%ymm13
|
|||||||
vpbroadcastd 20(%rdx),%ymm14
|
vpbroadcastd 20(%rdx),%ymm14
|
||||||
vpbroadcastd 24(%rdx),%ymm15
|
vpbroadcastd 24(%rdx),%ymm15
|
||||||
|
|
||||||
butterfly 4,6,8,10,5,7,9,11 12,13,14,15
|
butterfly 4,6,8,10,5,7,9,11,12,13,14,15
|
||||||
|
|
||||||
#store
|
#store
|
||||||
vmovdqa %ymm4,(%rdi)
|
vmovdqa %ymm4,(%rdi)
|
||||||
@ -125,7 +125,7 @@ shuffle8 5,9,4,9
|
|||||||
shuffle8 6,10,5,10
|
shuffle8 6,10,5,10
|
||||||
shuffle8 7,11,6,11
|
shuffle8 7,11,6,11
|
||||||
|
|
||||||
butterfly 3,8,4,9,5,10,6,11 12,12,12,12
|
butterfly 3,8,4,9,5,10,6,11,12,12,12,12
|
||||||
|
|
||||||
level5:
|
level5:
|
||||||
#PQCLEAN_DILITHIUM4_AVX2_zetas
|
#PQCLEAN_DILITHIUM4_AVX2_zetas
|
||||||
@ -136,14 +136,14 @@ shuffle4 8,10,3,10
|
|||||||
shuffle4 4,6,8,6
|
shuffle4 4,6,8,6
|
||||||
shuffle4 9,11,4,11
|
shuffle4 9,11,4,11
|
||||||
|
|
||||||
butterfly 7,5,3,10,8,6,4,11 12,12,12,12
|
butterfly 7,5,3,10,8,6,4,11,12,12,12,12
|
||||||
|
|
||||||
level6:
|
level6:
|
||||||
#PQCLEAN_DILITHIUM4_AVX2_zetas
|
#PQCLEAN_DILITHIUM4_AVX2_zetas
|
||||||
vpmovzxdq 28(%rdx),%ymm12
|
vpmovzxdq 28(%rdx),%ymm12
|
||||||
vpmovzxdq 44(%rdx),%ymm13
|
vpmovzxdq 44(%rdx),%ymm13
|
||||||
|
|
||||||
butterfly 7,5,8,6,3,10,4,11 12,12,13,13
|
butterfly 7,5,8,6,3,10,4,11,12,12,13,13
|
||||||
|
|
||||||
level7:
|
level7:
|
||||||
#PQCLEAN_DILITHIUM4_AVX2_zetas
|
#PQCLEAN_DILITHIUM4_AVX2_zetas
|
||||||
@ -152,7 +152,7 @@ vpmovzxdq 76(%rdx),%ymm13
|
|||||||
vpmovzxdq 92(%rdx),%ymm14
|
vpmovzxdq 92(%rdx),%ymm14
|
||||||
vpmovzxdq 108(%rdx),%ymm15
|
vpmovzxdq 108(%rdx),%ymm15
|
||||||
|
|
||||||
butterfly 7,3,8,4,5,10,6,11 12,13,14,15
|
butterfly 7,3,8,4,5,10,6,11,12,13,14,15
|
||||||
|
|
||||||
#store
|
#store
|
||||||
vpsllq $32,%ymm5,%ymm5
|
vpsllq $32,%ymm5,%ymm5
|
||||||
|
Loading…
Reference in New Issue
Block a user