Browse Source

Use more robust macro syntax

kyber
Thom Wiggers 5 years ago
committed by Kris Kwiatkowski
parent
commit
ebb416a2ba
9 changed files with 108 additions and 78 deletions
  1. +5
    -5
      crypto_sign/dilithium2/avx2/invntt.s
  2. +24
    -14
      crypto_sign/dilithium2/avx2/ntt.h
  3. +7
    -7
      crypto_sign/dilithium2/avx2/ntt.s
  4. +5
    -5
      crypto_sign/dilithium3/avx2/invntt.s
  5. +24
    -14
      crypto_sign/dilithium3/avx2/ntt.h
  6. +7
    -7
      crypto_sign/dilithium3/avx2/ntt.s
  7. +5
    -5
      crypto_sign/dilithium4/avx2/invntt.s
  8. +24
    -14
      crypto_sign/dilithium4/avx2/ntt.h
  9. +7
    -7
      crypto_sign/dilithium4/avx2/ntt.s

+ 5
- 5
crypto_sign/dilithium2/avx2/invntt.s View File

@@ -1,6 +1,6 @@
.include "shuffle.inc"

.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3 z0=15,z1=3
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3,z0=15,z1=3
vpaddd %ymm2,%ymm\l0,%ymm12
vpaddd %ymm2,%ymm\l1,%ymm13
vpaddd %ymm2,%ymm\l2,%ymm14
@@ -121,7 +121,7 @@ level2:
#PQCLEAN_DILITHIUM2_AVX2_zetas
vpmovzxdq 96(%rdx),%ymm3

butterfly 4,5,6,7,8,9,10,11 3,3
butterfly 4,5,6,7,8,9,10,11,3,3

#shuffle
shuffle4 4,5,3,5
@@ -135,7 +135,7 @@ vpbroadcastd 112(%rdx),%ymm14
vpbroadcastd 116(%rdx),%ymm15
vpblendd $0xF0,%ymm15,%ymm14,%ymm10

butterfly 3,4,6,8,5,7,9,11 10,10
butterfly 3,4,6,8,5,7,9,11,10,10

#shuffle
shuffle8 3,4,10,4
@@ -147,7 +147,7 @@ level4:
#PQCLEAN_DILITHIUM2_AVX2_zetas
vpbroadcastd 120(%rdx),%ymm9

butterfly 10,3,6,5,4,8,7,11 9,9
butterfly 10,3,6,5,4,8,7,11,9,9

#store
vmovdqa %ymm10,(%rdi)
@@ -233,7 +233,7 @@ level7:
#PQCLEAN_DILITHIUM2_AVX2_zetas
vpbroadcastd 24(%rdx),%ymm3

butterfly 4,5,6,7,8,9,10,11 3,3
butterfly 4,5,6,7,8,9,10,11,3,3

#consts
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xdiv(%rip),%ymm3


+ 24
- 14
crypto_sign/dilithium2/avx2/ntt.h View File

@@ -6,21 +6,31 @@
#include "nttconsts.h"
#include "params.h"

void PQCLEAN_DILITHIUM2_AVX2_ntt_levels0t2_avx(uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas);
void PQCLEAN_DILITHIUM2_AVX2_ntt_levels3t8_avx(uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas);
void PQCLEAN_DILITHIUM2_AVX2_ntt_levels0t2_avx(
uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas
);
void PQCLEAN_DILITHIUM2_AVX2_ntt_levels3t8_avx(
uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas
);

void PQCLEAN_DILITHIUM2_AVX2_invntt_levels0t4_avx(uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas_inv);
void PQCLEAN_DILITHIUM2_AVX2_invntt_levels5t7_avx(uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas_inv);
void PQCLEAN_DILITHIUM2_AVX2_invntt_levels0t4_avx(
uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas_inv
);
void PQCLEAN_DILITHIUM2_AVX2_invntt_levels5t7_avx(
uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas_inv
);

void PQCLEAN_DILITHIUM2_AVX2_pointwise_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM2_AVX2_pointwise_avx(
uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx(
uint32_t *c, const uint32_t *a, const uint32_t *b);

#endif

+ 7
- 7
crypto_sign/dilithium2/avx2/ntt.s View File

@@ -1,6 +1,6 @@
.include "shuffle.inc"

.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 z0=3,z1=3,z2=3,z3=3
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,z0=3,z1=3,z2=3,z3=3
#mul
vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0
vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1
@@ -68,7 +68,7 @@ level1:
vpbroadcastd 4(%rdx),%ymm12
vpbroadcastd 8(%rdx),%ymm13

butterfly 4,5,8,9,6,7,10,11 12,12,13,13
butterfly 4,5,8,9,6,7,10,11,12,12,13,13

level2:
#PQCLEAN_DILITHIUM2_AVX2_zetas
@@ -77,7 +77,7 @@ vpbroadcastd 16(%rdx),%ymm13
vpbroadcastd 20(%rdx),%ymm14
vpbroadcastd 24(%rdx),%ymm15

butterfly 4,6,8,10,5,7,9,11 12,13,14,15
butterfly 4,6,8,10,5,7,9,11,12,13,14,15

#store
vmovdqa %ymm4,(%rdi)
@@ -125,7 +125,7 @@ shuffle8 5,9,4,9
shuffle8 6,10,5,10
shuffle8 7,11,6,11

butterfly 3,8,4,9,5,10,6,11 12,12,12,12
butterfly 3,8,4,9,5,10,6,11,12,12,12,12

level5:
#PQCLEAN_DILITHIUM2_AVX2_zetas
@@ -136,14 +136,14 @@ shuffle4 8,10,3,10
shuffle4 4,6,8,6
shuffle4 9,11,4,11

butterfly 7,5,3,10,8,6,4,11 12,12,12,12
butterfly 7,5,3,10,8,6,4,11,12,12,12,12

level6:
#PQCLEAN_DILITHIUM2_AVX2_zetas
vpmovzxdq 28(%rdx),%ymm12
vpmovzxdq 44(%rdx),%ymm13

butterfly 7,5,8,6,3,10,4,11 12,12,13,13
butterfly 7,5,8,6,3,10,4,11,12,12,13,13

level7:
#PQCLEAN_DILITHIUM2_AVX2_zetas
@@ -152,7 +152,7 @@ vpmovzxdq 76(%rdx),%ymm13
vpmovzxdq 92(%rdx),%ymm14
vpmovzxdq 108(%rdx),%ymm15

butterfly 7,3,8,4,5,10,6,11 12,13,14,15
butterfly 7,3,8,4,5,10,6,11,12,13,14,15

#store
vpsllq $32,%ymm5,%ymm5


+ 5
- 5
crypto_sign/dilithium3/avx2/invntt.s View File

@@ -1,6 +1,6 @@
.include "shuffle.inc"

.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3 z0=15,z1=3
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3,z0=15,z1=3
vpaddd %ymm2,%ymm\l0,%ymm12
vpaddd %ymm2,%ymm\l1,%ymm13
vpaddd %ymm2,%ymm\l2,%ymm14
@@ -121,7 +121,7 @@ level2:
#PQCLEAN_DILITHIUM3_AVX2_zetas
vpmovzxdq 96(%rdx),%ymm3

butterfly 4,5,6,7,8,9,10,11 3,3
butterfly 4,5,6,7,8,9,10,11,3,3

#shuffle
shuffle4 4,5,3,5
@@ -135,7 +135,7 @@ vpbroadcastd 112(%rdx),%ymm14
vpbroadcastd 116(%rdx),%ymm15
vpblendd $0xF0,%ymm15,%ymm14,%ymm10

butterfly 3,4,6,8,5,7,9,11 10,10
butterfly 3,4,6,8,5,7,9,11,10,10

#shuffle
shuffle8 3,4,10,4
@@ -147,7 +147,7 @@ level4:
#PQCLEAN_DILITHIUM3_AVX2_zetas
vpbroadcastd 120(%rdx),%ymm9

butterfly 10,3,6,5,4,8,7,11 9,9
butterfly 10,3,6,5,4,8,7,11,9,9

#store
vmovdqa %ymm10,(%rdi)
@@ -233,7 +233,7 @@ level7:
#PQCLEAN_DILITHIUM3_AVX2_zetas
vpbroadcastd 24(%rdx),%ymm3

butterfly 4,5,6,7,8,9,10,11 3,3
butterfly 4,5,6,7,8,9,10,11,3,3

#consts
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xdiv(%rip),%ymm3


+ 24
- 14
crypto_sign/dilithium3/avx2/ntt.h View File

@@ -6,21 +6,31 @@
#include "nttconsts.h"
#include "params.h"

void PQCLEAN_DILITHIUM3_AVX2_ntt_levels0t2_avx(uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas);
void PQCLEAN_DILITHIUM3_AVX2_ntt_levels3t8_avx(uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas);
void PQCLEAN_DILITHIUM3_AVX2_ntt_levels0t2_avx(
uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas
);
void PQCLEAN_DILITHIUM3_AVX2_ntt_levels3t8_avx(
uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas
);

void PQCLEAN_DILITHIUM3_AVX2_invntt_levels0t4_avx(uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas_inv);
void PQCLEAN_DILITHIUM3_AVX2_invntt_levels5t7_avx(uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas_inv);
void PQCLEAN_DILITHIUM3_AVX2_invntt_levels0t4_avx(
uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas_inv
);
void PQCLEAN_DILITHIUM3_AVX2_invntt_levels5t7_avx(
uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas_inv
);

void PQCLEAN_DILITHIUM3_AVX2_pointwise_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM3_AVX2_pointwise_avx(
uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx(
uint32_t *c, const uint32_t *a, const uint32_t *b);

#endif

+ 7
- 7
crypto_sign/dilithium3/avx2/ntt.s View File

@@ -1,6 +1,6 @@
.include "shuffle.inc"

.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 z0=3,z1=3,z2=3,z3=3
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,z0=3,z1=3,z2=3,z3=3
#mul
vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0
vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1
@@ -68,7 +68,7 @@ level1:
vpbroadcastd 4(%rdx),%ymm12
vpbroadcastd 8(%rdx),%ymm13

butterfly 4,5,8,9,6,7,10,11 12,12,13,13
butterfly 4,5,8,9,6,7,10,11,12,12,13,13

level2:
#PQCLEAN_DILITHIUM3_AVX2_zetas
@@ -77,7 +77,7 @@ vpbroadcastd 16(%rdx),%ymm13
vpbroadcastd 20(%rdx),%ymm14
vpbroadcastd 24(%rdx),%ymm15

butterfly 4,6,8,10,5,7,9,11 12,13,14,15
butterfly 4,6,8,10,5,7,9,11,12,13,14,15

#store
vmovdqa %ymm4,(%rdi)
@@ -125,7 +125,7 @@ shuffle8 5,9,4,9
shuffle8 6,10,5,10
shuffle8 7,11,6,11

butterfly 3,8,4,9,5,10,6,11 12,12,12,12
butterfly 3,8,4,9,5,10,6,11,12,12,12,12

level5:
#PQCLEAN_DILITHIUM3_AVX2_zetas
@@ -136,14 +136,14 @@ shuffle4 8,10,3,10
shuffle4 4,6,8,6
shuffle4 9,11,4,11

butterfly 7,5,3,10,8,6,4,11 12,12,12,12
butterfly 7,5,3,10,8,6,4,11,12,12,12,12

level6:
#PQCLEAN_DILITHIUM3_AVX2_zetas
vpmovzxdq 28(%rdx),%ymm12
vpmovzxdq 44(%rdx),%ymm13

butterfly 7,5,8,6,3,10,4,11 12,12,13,13
butterfly 7,5,8,6,3,10,4,11,12,12,13,13

level7:
#PQCLEAN_DILITHIUM3_AVX2_zetas
@@ -152,7 +152,7 @@ vpmovzxdq 76(%rdx),%ymm13
vpmovzxdq 92(%rdx),%ymm14
vpmovzxdq 108(%rdx),%ymm15

butterfly 7,3,8,4,5,10,6,11 12,13,14,15
butterfly 7,3,8,4,5,10,6,11,12,13,14,15

#store
vpsllq $32,%ymm5,%ymm5


+ 5
- 5
crypto_sign/dilithium4/avx2/invntt.s View File

@@ -1,6 +1,6 @@
.include "shuffle.inc"

.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3 z0=15,z1=3
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3,z0=15,z1=3
vpaddd %ymm2,%ymm\l0,%ymm12
vpaddd %ymm2,%ymm\l1,%ymm13
vpaddd %ymm2,%ymm\l2,%ymm14
@@ -121,7 +121,7 @@ level2:
#PQCLEAN_DILITHIUM4_AVX2_zetas
vpmovzxdq 96(%rdx),%ymm3

butterfly 4,5,6,7,8,9,10,11 3,3
butterfly 4,5,6,7,8,9,10,11,3,3

#shuffle
shuffle4 4,5,3,5
@@ -135,7 +135,7 @@ vpbroadcastd 112(%rdx),%ymm14
vpbroadcastd 116(%rdx),%ymm15
vpblendd $0xF0,%ymm15,%ymm14,%ymm10

butterfly 3,4,6,8,5,7,9,11 10,10
butterfly 3,4,6,8,5,7,9,11,10,10

#shuffle
shuffle8 3,4,10,4
@@ -147,7 +147,7 @@ level4:
#PQCLEAN_DILITHIUM4_AVX2_zetas
vpbroadcastd 120(%rdx),%ymm9

butterfly 10,3,6,5,4,8,7,11 9,9
butterfly 10,3,6,5,4,8,7,11,9,9

#store
vmovdqa %ymm10,(%rdi)
@@ -233,7 +233,7 @@ level7:
#PQCLEAN_DILITHIUM4_AVX2_zetas
vpbroadcastd 24(%rdx),%ymm3

butterfly 4,5,6,7,8,9,10,11 3,3
butterfly 4,5,6,7,8,9,10,11,3,3

#consts
vmovdqa _PQCLEAN_DILITHIUM4_AVX2_8xdiv(%rip),%ymm3


+ 24
- 14
crypto_sign/dilithium4/avx2/ntt.h View File

@@ -6,21 +6,31 @@
#include "nttconsts.h"
#include "params.h"

void PQCLEAN_DILITHIUM4_AVX2_ntt_levels0t2_avx(uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas);
void PQCLEAN_DILITHIUM4_AVX2_ntt_levels3t8_avx(uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas);
void PQCLEAN_DILITHIUM4_AVX2_ntt_levels0t2_avx(
uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas
);
void PQCLEAN_DILITHIUM4_AVX2_ntt_levels3t8_avx(
uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas
);

void PQCLEAN_DILITHIUM4_AVX2_invntt_levels0t4_avx(uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas_inv);
void PQCLEAN_DILITHIUM4_AVX2_invntt_levels5t7_avx(uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas_inv);
void PQCLEAN_DILITHIUM4_AVX2_invntt_levels0t4_avx(
uint64_t *tmp,
const uint32_t *a,
const uint32_t *zetas_inv
);
void PQCLEAN_DILITHIUM4_AVX2_invntt_levels5t7_avx(
uint32_t *a,
const uint64_t *tmp,
const uint32_t *zetas_inv
);

void PQCLEAN_DILITHIUM4_AVX2_pointwise_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM4_AVX2_pointwise_acc_avx(uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM4_AVX2_pointwise_avx(
uint32_t *c, const uint32_t *a, const uint32_t *b);
void PQCLEAN_DILITHIUM4_AVX2_pointwise_acc_avx(
uint32_t *c, const uint32_t *a, const uint32_t *b);

#endif

+ 7
- 7
crypto_sign/dilithium4/avx2/ntt.s View File

@@ -1,6 +1,6 @@
.include "shuffle.inc"

.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 z0=3,z1=3,z2=3,z3=3
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3,z0=3,z1=3,z2=3,z3=3
#mul
vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0
vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1
@@ -68,7 +68,7 @@ level1:
vpbroadcastd 4(%rdx),%ymm12
vpbroadcastd 8(%rdx),%ymm13

butterfly 4,5,8,9,6,7,10,11 12,12,13,13
butterfly 4,5,8,9,6,7,10,11,12,12,13,13

level2:
#PQCLEAN_DILITHIUM4_AVX2_zetas
@@ -77,7 +77,7 @@ vpbroadcastd 16(%rdx),%ymm13
vpbroadcastd 20(%rdx),%ymm14
vpbroadcastd 24(%rdx),%ymm15

butterfly 4,6,8,10,5,7,9,11 12,13,14,15
butterfly 4,6,8,10,5,7,9,11,12,13,14,15

#store
vmovdqa %ymm4,(%rdi)
@@ -125,7 +125,7 @@ shuffle8 5,9,4,9
shuffle8 6,10,5,10
shuffle8 7,11,6,11

butterfly 3,8,4,9,5,10,6,11 12,12,12,12
butterfly 3,8,4,9,5,10,6,11,12,12,12,12

level5:
#PQCLEAN_DILITHIUM4_AVX2_zetas
@@ -136,14 +136,14 @@ shuffle4 8,10,3,10
shuffle4 4,6,8,6
shuffle4 9,11,4,11

butterfly 7,5,3,10,8,6,4,11 12,12,12,12
butterfly 7,5,3,10,8,6,4,11,12,12,12,12

level6:
#PQCLEAN_DILITHIUM4_AVX2_zetas
vpmovzxdq 28(%rdx),%ymm12
vpmovzxdq 44(%rdx),%ymm13

butterfly 7,5,8,6,3,10,4,11 12,12,13,13
butterfly 7,5,8,6,3,10,4,11,12,12,13,13

level7:
#PQCLEAN_DILITHIUM4_AVX2_zetas
@@ -152,7 +152,7 @@ vpmovzxdq 76(%rdx),%ymm13
vpmovzxdq 92(%rdx),%ymm14
vpmovzxdq 108(%rdx),%ymm15

butterfly 7,3,8,4,5,10,6,11 12,13,14,15
butterfly 7,3,8,4,5,10,6,11,12,13,14,15

#store
vpsllq $32,%ymm5,%ymm5


Loading…
Cancel
Save