113 lines
2.2 KiB
ArmAsm
113 lines
2.2 KiB
ArmAsm
.include "fq.inc"
|
|
|
|
.global PQCLEAN_KYBER76890S_AVX2_reduce_avx
|
|
PQCLEAN_KYBER76890S_AVX2_reduce_avx:
|
|
#consts
|
|
vmovdqa PQCLEAN_KYBER76890S_AVX2_16xq(%rip),%ymm0
|
|
vmovdqa PQCLEAN_KYBER76890S_AVX2_16xv(%rip),%ymm1
|
|
|
|
#load
|
|
vmovdqa (%rdi),%ymm2
|
|
vmovdqa 32(%rdi),%ymm3
|
|
vmovdqa 64(%rdi),%ymm4
|
|
vmovdqa 96(%rdi),%ymm5
|
|
vmovdqa 128(%rdi),%ymm6
|
|
vmovdqa 160(%rdi),%ymm7
|
|
vmovdqa 192(%rdi),%ymm8
|
|
vmovdqa 224(%rdi),%ymm9
|
|
|
|
red16 2 10
|
|
red16 3 11
|
|
red16 4 12
|
|
red16 5 13
|
|
red16 6 14
|
|
red16 7 15
|
|
red16 8 10
|
|
red16 9 11
|
|
|
|
#store
|
|
vmovdqa %ymm2,(%rdi)
|
|
vmovdqa %ymm3,32(%rdi)
|
|
vmovdqa %ymm4,64(%rdi)
|
|
vmovdqa %ymm5,96(%rdi)
|
|
vmovdqa %ymm6,128(%rdi)
|
|
vmovdqa %ymm7,160(%rdi)
|
|
vmovdqa %ymm8,192(%rdi)
|
|
vmovdqa %ymm9,224(%rdi)
|
|
|
|
ret
|
|
|
|
.global PQCLEAN_KYBER76890S_AVX2_csubq_avx
|
|
PQCLEAN_KYBER76890S_AVX2_csubq_avx:
|
|
#consts
|
|
vmovdqa PQCLEAN_KYBER76890S_AVX2_16xq(%rip),%ymm0
|
|
|
|
#load
|
|
vmovdqa (%rdi),%ymm1
|
|
vmovdqa 32(%rdi),%ymm2
|
|
vmovdqa 64(%rdi),%ymm3
|
|
vmovdqa 96(%rdi),%ymm4
|
|
vmovdqa 128(%rdi),%ymm5
|
|
vmovdqa 160(%rdi),%ymm6
|
|
vmovdqa 192(%rdi),%ymm7
|
|
vmovdqa 224(%rdi),%ymm8
|
|
|
|
csubq 1 9
|
|
csubq 2 10
|
|
csubq 3 11
|
|
csubq 4 12
|
|
csubq 5 13
|
|
csubq 6 14
|
|
csubq 7 15
|
|
csubq 8 9
|
|
|
|
#store
|
|
vmovdqa %ymm1,(%rdi)
|
|
vmovdqa %ymm2,32(%rdi)
|
|
vmovdqa %ymm3,64(%rdi)
|
|
vmovdqa %ymm4,96(%rdi)
|
|
vmovdqa %ymm5,128(%rdi)
|
|
vmovdqa %ymm6,160(%rdi)
|
|
vmovdqa %ymm7,192(%rdi)
|
|
vmovdqa %ymm8,224(%rdi)
|
|
|
|
ret
|
|
|
|
.global PQCLEAN_KYBER76890S_AVX2_frommont_avx
|
|
PQCLEAN_KYBER76890S_AVX2_frommont_avx:
|
|
#consts
|
|
vmovdqa PQCLEAN_KYBER76890S_AVX2_16xq(%rip),%ymm0
|
|
vmovdqa PQCLEAN_KYBER76890S_AVX2_16xmontsqlo(%rip),%ymm1
|
|
vmovdqa PQCLEAN_KYBER76890S_AVX2_16xmontsqhi(%rip),%ymm2
|
|
|
|
#load
|
|
vmovdqa (%rdi),%ymm3
|
|
vmovdqa 32(%rdi),%ymm4
|
|
vmovdqa 64(%rdi),%ymm5
|
|
vmovdqa 96(%rdi),%ymm6
|
|
vmovdqa 128(%rdi),%ymm7
|
|
vmovdqa 160(%rdi),%ymm8
|
|
vmovdqa 192(%rdi),%ymm9
|
|
vmovdqa 224(%rdi),%ymm10
|
|
|
|
fqmulprecomp 1,2,3 11
|
|
fqmulprecomp 1,2,4 12
|
|
fqmulprecomp 1,2,5 13
|
|
fqmulprecomp 1,2,6 14
|
|
fqmulprecomp 1,2,7 15
|
|
fqmulprecomp 1,2,8 11
|
|
fqmulprecomp 1,2,9 12
|
|
fqmulprecomp 1,2,10 13
|
|
|
|
#store
|
|
vmovdqa %ymm3,(%rdi)
|
|
vmovdqa %ymm4,32(%rdi)
|
|
vmovdqa %ymm5,64(%rdi)
|
|
vmovdqa %ymm6,96(%rdi)
|
|
vmovdqa %ymm7,128(%rdi)
|
|
vmovdqa %ymm8,160(%rdi)
|
|
vmovdqa %ymm9,192(%rdi)
|
|
vmovdqa %ymm10,224(%rdi)
|
|
|
|
ret
|