1
1
mirror of https://github.com/henrydcase/pqc.git synced 2024-11-27 09:51:30 +00:00
pqcrypto/crypto_sign/dilithium3/avx2/reduce.s

92 lines
1.8 KiB
ArmAsm
Raw Normal View History

2019-12-06 15:16:41 +00:00
.global PQCLEAN_DILITHIUM3_AVX2_reduce_avx
PQCLEAN_DILITHIUM3_AVX2_reduce_avx:
#consts
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8x23ones(%rip),%ymm0
xor %eax,%eax
_looptop_rdc32:
#load
vmovdqa (%rdi),%ymm1
vmovdqa 32(%rdi),%ymm3
vmovdqa 64(%rdi),%ymm5
vmovdqa 96(%rdi),%ymm7
#reduce
vpsrld $23,%ymm1,%ymm2
vpsrld $23,%ymm3,%ymm4
vpsrld $23,%ymm5,%ymm6
vpsrld $23,%ymm7,%ymm8
vpand %ymm0,%ymm1,%ymm1
vpand %ymm0,%ymm3,%ymm3
vpand %ymm0,%ymm5,%ymm5
vpand %ymm0,%ymm7,%ymm7
vpsubd %ymm2,%ymm1,%ymm1
vpsubd %ymm4,%ymm3,%ymm3
vpsubd %ymm6,%ymm5,%ymm5
vpsubd %ymm8,%ymm7,%ymm7
vpslld $13,%ymm2,%ymm2
vpslld $13,%ymm4,%ymm4
vpslld $13,%ymm6,%ymm6
vpslld $13,%ymm8,%ymm8
vpaddd %ymm2,%ymm1,%ymm1
vpaddd %ymm4,%ymm3,%ymm3
vpaddd %ymm6,%ymm5,%ymm5
vpaddd %ymm8,%ymm7,%ymm7
#store
vmovdqa %ymm1,(%rdi)
vmovdqa %ymm3,32(%rdi)
vmovdqa %ymm5,64(%rdi)
vmovdqa %ymm7,96(%rdi)
add $128,%rdi
add $1,%eax
cmp $8,%eax
jb _looptop_rdc32
ret
.global PQCLEAN_DILITHIUM3_AVX2_csubq_avx
PQCLEAN_DILITHIUM3_AVX2_csubq_avx:
#consts
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xq(%rip),%ymm0
xor %eax,%eax
_looptop_csubq:
#load
vmovdqa (%rdi),%ymm1
vmovdqa 32(%rdi),%ymm3
vmovdqa 64(%rdi),%ymm5
vmovdqa 96(%rdi),%ymm7
#PQCLEAN_DILITHIUM3_AVX2_csubq
vpsubd %ymm0,%ymm1,%ymm1
vpsubd %ymm0,%ymm3,%ymm3
vpsubd %ymm0,%ymm5,%ymm5
vpsubd %ymm0,%ymm7,%ymm7
vpsrad $31,%ymm1,%ymm2
vpsrad $31,%ymm3,%ymm4
vpsrad $31,%ymm5,%ymm6
vpsrad $31,%ymm7,%ymm8
vpand %ymm0,%ymm2,%ymm2
vpand %ymm0,%ymm4,%ymm4
vpand %ymm0,%ymm6,%ymm6
vpand %ymm0,%ymm8,%ymm8
vpaddd %ymm2,%ymm1,%ymm1
vpaddd %ymm4,%ymm3,%ymm3
vpaddd %ymm6,%ymm5,%ymm5
vpaddd %ymm8,%ymm7,%ymm7
#store
vmovdqa %ymm1,(%rdi)
vmovdqa %ymm3,32(%rdi)
vmovdqa %ymm5,64(%rdi)
vmovdqa %ymm7,96(%rdi)
add $128,%rdi
add $1,%eax
cmp $8,%eax
jb _looptop_csubq
ret