Use more robust macro syntax

这个提交包含在:
Thom Wiggers 2019-12-11 13:01:07 +01:00 提交者 Kris Kwiatkowski
父节点 a037d6ccf6
当前提交 ebb416a2ba
共有 9 个文件被更改,包括 108 次插入78 次删除

查看文件

@ -1,6 +1,6 @@
.include "shuffle.inc"
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3 z0=15,z1=3
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3,z0=15,z1=3
vpaddd %ymm2,%ymm\l0,%ymm12
vpaddd %ymm2,%ymm\l1,%ymm13
vpaddd %ymm2,%ymm\l2,%ymm14
@ -121,7 +121,7 @@ level2:
#PQCLEAN_DILITHIUM2_AVX2_zetas
vpmovzxdq 96(%rdx),%ymm3
butterfly 4,5,6,7,8,9,10,11 3,3
butterfly 4,5,6,7,8,9,10,11,3,3
#shuffle
shuffle4 4,5,3,5
@ -135,7 +135,7 @@ vpbroadcastd 112(%rdx),%ymm14
vpbroadcastd 116(%rdx),%ymm15
vpblendd $0xF0,%ymm15,%ymm14,%ymm10
butterfly 3,4,6,8,5,7,9,11 10,10
butterfly 3,4,6,8,5,7,9,11,10,10
#shuffle
shuffle8 3,4,10,4
@ -147,7 +147,7 @@ level4:
#PQCLEAN_DILITHIUM2_AVX2_zetas
vpbroadcastd 120(%rdx),%ymm9
butterfly 10,3,6,5,4,8,7,11 9,9
butterfly 10,3,6,5,4,8,7,11,9,9
#store
vmovdqa %ymm10,(%rdi)
@ -233,7 +233,7 @@ level7:
#PQCLEAN_DILITHIUM2_AVX2_zetas
vpbroadcastd 24(%rdx),%ymm3
butterfly 4,5,6,7,8,9,10,11 3,3
butterfly 4,5,6,7,8,9,10,11,3,3
#consts
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xdiv(%rip),%ymm3

查看文件

@ -6,21 +6,31 @@
#include "nttconsts.h"
#include "params.h"
void PQCLEAN_DILITHIUM2_AVX2_ntt_levels0t2_avx(uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas);
void PQCLEAN_DILITHIUM2_AVX2_ntt_levels3t8_avx(uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas);
void PQCLEAN_DILITHIUM2_AVX2_ntt_levels0t2_avx(
uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas
);
void PQCLEAN_DILITHIUM2_AVX2_ntt_levels3t8_avx(
uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas
);
void PQCLEAN_DILITHIUM2_AVX2_invntt_levels0t4_avx(uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas_inv);
void PQCLEAN_DILITHIUM2_AVX2_invntt_levels5t7_avx(uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas_inv);
void PQCLEAN_DILITHIUM2_AVX2_invntt_levels0t4_avx(
uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas_inv
);
void PQCLEAN_DILITHIUM2_AVX2_invntt_levels5t7_avx(
uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas_inv
);
void PQCLEAN_DILITHIUM2_AVX2_pointwise_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM2_AVX2_pointwise_avx(
uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx(
uint32_t *c, const uint32_t *a, const uint32_t *b);
#endif

查看文件

@ -1,6 +1,6 @@
.include "shuffle.inc"
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 z0=3,z1=3,z2=3,z3=3
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,z0=3,z1=3,z2=3,z3=3
#mul
vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0
vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1
@ -68,7 +68,7 @@ level1:
vpbroadcastd 4(%rdx),%ymm12
vpbroadcastd 8(%rdx),%ymm13
butterfly 4,5,8,9,6,7,10,11 12,12,13,13
butterfly 4,5,8,9,6,7,10,11,12,12,13,13
level2:
#PQCLEAN_DILITHIUM2_AVX2_zetas
@ -77,7 +77,7 @@ vpbroadcastd 16(%rdx),%ymm13
vpbroadcastd 20(%rdx),%ymm14
vpbroadcastd 24(%rdx),%ymm15
butterfly 4,6,8,10,5,7,9,11 12,13,14,15
butterfly 4,6,8,10,5,7,9,11,12,13,14,15
#store
vmovdqa %ymm4,(%rdi)
@ -125,7 +125,7 @@ shuffle8 5,9,4,9
shuffle8 6,10,5,10
shuffle8 7,11,6,11
butterfly 3,8,4,9,5,10,6,11 12,12,12,12
butterfly 3,8,4,9,5,10,6,11,12,12,12,12
level5:
#PQCLEAN_DILITHIUM2_AVX2_zetas
@ -136,14 +136,14 @@ shuffle4 8,10,3,10
shuffle4 4,6,8,6
shuffle4 9,11,4,11
butterfly 7,5,3,10,8,6,4,11 12,12,12,12
butterfly 7,5,3,10,8,6,4,11,12,12,12,12
level6:
#PQCLEAN_DILITHIUM2_AVX2_zetas
vpmovzxdq 28(%rdx),%ymm12
vpmovzxdq 44(%rdx),%ymm13
butterfly 7,5,8,6,3,10,4,11 12,12,13,13
butterfly 7,5,8,6,3,10,4,11,12,12,13,13
level7:
#PQCLEAN_DILITHIUM2_AVX2_zetas
@ -152,7 +152,7 @@ vpmovzxdq 76(%rdx),%ymm13
vpmovzxdq 92(%rdx),%ymm14
vpmovzxdq 108(%rdx),%ymm15
butterfly 7,3,8,4,5,10,6,11 12,13,14,15
butterfly 7,3,8,4,5,10,6,11,12,13,14,15
#store
vpsllq $32,%ymm5,%ymm5

查看文件

@ -1,6 +1,6 @@
.include "shuffle.inc"
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3 z0=15,z1=3
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3,z0=15,z1=3
vpaddd %ymm2,%ymm\l0,%ymm12
vpaddd %ymm2,%ymm\l1,%ymm13
vpaddd %ymm2,%ymm\l2,%ymm14
@ -121,7 +121,7 @@ level2:
#PQCLEAN_DILITHIUM3_AVX2_zetas
vpmovzxdq 96(%rdx),%ymm3
butterfly 4,5,6,7,8,9,10,11 3,3
butterfly 4,5,6,7,8,9,10,11,3,3
#shuffle
shuffle4 4,5,3,5
@ -135,7 +135,7 @@ vpbroadcastd 112(%rdx),%ymm14
vpbroadcastd 116(%rdx),%ymm15
vpblendd $0xF0,%ymm15,%ymm14,%ymm10
butterfly 3,4,6,8,5,7,9,11 10,10
butterfly 3,4,6,8,5,7,9,11,10,10
#shuffle
shuffle8 3,4,10,4
@ -147,7 +147,7 @@ level4:
#PQCLEAN_DILITHIUM3_AVX2_zetas
vpbroadcastd 120(%rdx),%ymm9
butterfly 10,3,6,5,4,8,7,11 9,9
butterfly 10,3,6,5,4,8,7,11,9,9
#store
vmovdqa %ymm10,(%rdi)
@ -233,7 +233,7 @@ level7:
#PQCLEAN_DILITHIUM3_AVX2_zetas
vpbroadcastd 24(%rdx),%ymm3
butterfly 4,5,6,7,8,9,10,11 3,3
butterfly 4,5,6,7,8,9,10,11,3,3
#consts
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xdiv(%rip),%ymm3

查看文件

@ -6,21 +6,31 @@
#include "nttconsts.h"
#include "params.h"
void PQCLEAN_DILITHIUM3_AVX2_ntt_levels0t2_avx(uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas);
void PQCLEAN_DILITHIUM3_AVX2_ntt_levels3t8_avx(uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas);
void PQCLEAN_DILITHIUM3_AVX2_ntt_levels0t2_avx(
uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas
);
void PQCLEAN_DILITHIUM3_AVX2_ntt_levels3t8_avx(
uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas
);
void PQCLEAN_DILITHIUM3_AVX2_invntt_levels0t4_avx(uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas_inv);
void PQCLEAN_DILITHIUM3_AVX2_invntt_levels5t7_avx(uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas_inv);
void PQCLEAN_DILITHIUM3_AVX2_invntt_levels0t4_avx(
uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas_inv
);
void PQCLEAN_DILITHIUM3_AVX2_invntt_levels5t7_avx(
uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas_inv
);
void PQCLEAN_DILITHIUM3_AVX2_pointwise_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM3_AVX2_pointwise_avx(
uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx(
uint32_t *c, const uint32_t *a, const uint32_t *b);
#endif

查看文件

@ -1,6 +1,6 @@
.include "shuffle.inc"
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 z0=3,z1=3,z2=3,z3=3
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,z0=3,z1=3,z2=3,z3=3
#mul
vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0
vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1
@ -68,7 +68,7 @@ level1:
vpbroadcastd 4(%rdx),%ymm12
vpbroadcastd 8(%rdx),%ymm13
butterfly 4,5,8,9,6,7,10,11 12,12,13,13
butterfly 4,5,8,9,6,7,10,11,12,12,13,13
level2:
#PQCLEAN_DILITHIUM3_AVX2_zetas
@ -77,7 +77,7 @@ vpbroadcastd 16(%rdx),%ymm13
vpbroadcastd 20(%rdx),%ymm14
vpbroadcastd 24(%rdx),%ymm15
butterfly 4,6,8,10,5,7,9,11 12,13,14,15
butterfly 4,6,8,10,5,7,9,11,12,13,14,15
#store
vmovdqa %ymm4,(%rdi)
@ -125,7 +125,7 @@ shuffle8 5,9,4,9
shuffle8 6,10,5,10
shuffle8 7,11,6,11
butterfly 3,8,4,9,5,10,6,11 12,12,12,12
butterfly 3,8,4,9,5,10,6,11,12,12,12,12
level5:
#PQCLEAN_DILITHIUM3_AVX2_zetas
@ -136,14 +136,14 @@ shuffle4 8,10,3,10
shuffle4 4,6,8,6
shuffle4 9,11,4,11
butterfly 7,5,3,10,8,6,4,11 12,12,12,12
butterfly 7,5,3,10,8,6,4,11,12,12,12,12
level6:
#PQCLEAN_DILITHIUM3_AVX2_zetas
vpmovzxdq 28(%rdx),%ymm12
vpmovzxdq 44(%rdx),%ymm13
butterfly 7,5,8,6,3,10,4,11 12,12,13,13
butterfly 7,5,8,6,3,10,4,11,12,12,13,13
level7:
#PQCLEAN_DILITHIUM3_AVX2_zetas
@ -152,7 +152,7 @@ vpmovzxdq 76(%rdx),%ymm13
vpmovzxdq 92(%rdx),%ymm14
vpmovzxdq 108(%rdx),%ymm15
butterfly 7,3,8,4,5,10,6,11 12,13,14,15
butterfly 7,3,8,4,5,10,6,11,12,13,14,15
#store
vpsllq $32,%ymm5,%ymm5

查看文件

@ -1,6 +1,6 @@
.include "shuffle.inc"
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3 z0=15,z1=3
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3,z0=15,z1=3
vpaddd %ymm2,%ymm\l0,%ymm12
vpaddd %ymm2,%ymm\l1,%ymm13
vpaddd %ymm2,%ymm\l2,%ymm14
@ -121,7 +121,7 @@ level2:
#PQCLEAN_DILITHIUM4_AVX2_zetas
vpmovzxdq 96(%rdx),%ymm3
butterfly 4,5,6,7,8,9,10,11 3,3
butterfly 4,5,6,7,8,9,10,11,3,3
#shuffle
shuffle4 4,5,3,5
@ -135,7 +135,7 @@ vpbroadcastd 112(%rdx),%ymm14
vpbroadcastd 116(%rdx),%ymm15
vpblendd $0xF0,%ymm15,%ymm14,%ymm10
butterfly 3,4,6,8,5,7,9,11 10,10
butterfly 3,4,6,8,5,7,9,11,10,10
#shuffle
shuffle8 3,4,10,4
@ -147,7 +147,7 @@ level4:
#PQCLEAN_DILITHIUM4_AVX2_zetas
vpbroadcastd 120(%rdx),%ymm9
butterfly 10,3,6,5,4,8,7,11 9,9
butterfly 10,3,6,5,4,8,7,11,9,9
#store
vmovdqa %ymm10,(%rdi)
@ -233,7 +233,7 @@ level7:
#PQCLEAN_DILITHIUM4_AVX2_zetas
vpbroadcastd 24(%rdx),%ymm3
butterfly 4,5,6,7,8,9,10,11 3,3
butterfly 4,5,6,7,8,9,10,11,3,3
#consts
vmovdqa _PQCLEAN_DILITHIUM4_AVX2_8xdiv(%rip),%ymm3

查看文件

@ -6,21 +6,31 @@
#include "nttconsts.h"
#include "params.h"
void PQCLEAN_DILITHIUM4_AVX2_ntt_levels0t2_avx(uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas);
void PQCLEAN_DILITHIUM4_AVX2_ntt_levels3t8_avx(uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas);
void PQCLEAN_DILITHIUM4_AVX2_ntt_levels0t2_avx(
uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas
);
void PQCLEAN_DILITHIUM4_AVX2_ntt_levels3t8_avx(
uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas
);
void PQCLEAN_DILITHIUM4_AVX2_invntt_levels0t4_avx(uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas_inv);
void PQCLEAN_DILITHIUM4_AVX2_invntt_levels5t7_avx(uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas_inv);
void PQCLEAN_DILITHIUM4_AVX2_invntt_levels0t4_avx(
uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas_inv
);
void PQCLEAN_DILITHIUM4_AVX2_invntt_levels5t7_avx(
uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas_inv
);
void PQCLEAN_DILITHIUM4_AVX2_pointwise_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM4_AVX2_pointwise_acc_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM4_AVX2_pointwise_avx(
uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM4_AVX2_pointwise_acc_avx(
uint32_t *c, const uint32_t *a, const uint32_t *b);
#endif

查看文件

@ -1,6 +1,6 @@
.include "shuffle.inc"
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 z0=3,z1=3,z2=3,z3=3
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,z0=3,z1=3,z2=3,z3=3
#mul
vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0
vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1
@ -68,7 +68,7 @@ level1:
vpbroadcastd 4(%rdx),%ymm12
vpbroadcastd 8(%rdx),%ymm13
butterfly 4,5,8,9,6,7,10,11 12,12,13,13
butterfly 4,5,8,9,6,7,10,11,12,12,13,13
level2:
#PQCLEAN_DILITHIUM4_AVX2_zetas
@ -77,7 +77,7 @@ vpbroadcastd 16(%rdx),%ymm13
vpbroadcastd 20(%rdx),%ymm14
vpbroadcastd 24(%rdx),%ymm15
butterfly 4,6,8,10,5,7,9,11 12,13,14,15
butterfly 4,6,8,10,5,7,9,11,12,13,14,15
#store
vmovdqa %ymm4,(%rdi)
@ -125,7 +125,7 @@ shuffle8 5,9,4,9
shuffle8 6,10,5,10
shuffle8 7,11,6,11
butterfly 3,8,4,9,5,10,6,11 12,12,12,12
butterfly 3,8,4,9,5,10,6,11,12,12,12,12
level5:
#PQCLEAN_DILITHIUM4_AVX2_zetas
@ -136,14 +136,14 @@ shuffle4 8,10,3,10
shuffle4 4,6,8,6
shuffle4 9,11,4,11
butterfly 7,5,3,10,8,6,4,11 12,12,12,12
butterfly 7,5,3,10,8,6,4,11,12,12,12,12
level6:
#PQCLEAN_DILITHIUM4_AVX2_zetas
vpmovzxdq 28(%rdx),%ymm12
vpmovzxdq 44(%rdx),%ymm13
butterfly 7,5,8,6,3,10,4,11 12,12,13,13
butterfly 7,5,8,6,3,10,4,11,12,12,13,13
level7:
#PQCLEAN_DILITHIUM4_AVX2_zetas
@ -152,7 +152,7 @@ vpmovzxdq 76(%rdx),%ymm13
vpmovzxdq 92(%rdx),%ymm14
vpmovzxdq 108(%rdx),%ymm15
butterfly 7,3,8,4,5,10,6,11 12,13,14,15
butterfly 7,3,8,4,5,10,6,11,12,13,14,15
#store
vpsllq $32,%ymm5,%ymm5