|
- .include "fq.inc"
-
- .global PQCLEAN_KYBER76890S_AVX2_reduce_avx
- PQCLEAN_KYBER76890S_AVX2_reduce_avx:
- #consts
- vmovdqa PQCLEAN_KYBER76890S_AVX2_16xq(%rip),%ymm0
- vmovdqa PQCLEAN_KYBER76890S_AVX2_16xv(%rip),%ymm1
-
- #load
- vmovdqa (%rdi),%ymm2
- vmovdqa 32(%rdi),%ymm3
- vmovdqa 64(%rdi),%ymm4
- vmovdqa 96(%rdi),%ymm5
- vmovdqa 128(%rdi),%ymm6
- vmovdqa 160(%rdi),%ymm7
- vmovdqa 192(%rdi),%ymm8
- vmovdqa 224(%rdi),%ymm9
-
- red16 2 10
- red16 3 11
- red16 4 12
- red16 5 13
- red16 6 14
- red16 7 15
- red16 8 10
- red16 9 11
-
- #store
- vmovdqa %ymm2,(%rdi)
- vmovdqa %ymm3,32(%rdi)
- vmovdqa %ymm4,64(%rdi)
- vmovdqa %ymm5,96(%rdi)
- vmovdqa %ymm6,128(%rdi)
- vmovdqa %ymm7,160(%rdi)
- vmovdqa %ymm8,192(%rdi)
- vmovdqa %ymm9,224(%rdi)
-
- ret
-
- .global PQCLEAN_KYBER76890S_AVX2_csubq_avx
- PQCLEAN_KYBER76890S_AVX2_csubq_avx:
- #consts
- vmovdqa PQCLEAN_KYBER76890S_AVX2_16xq(%rip),%ymm0
-
- #load
- vmovdqa (%rdi),%ymm1
- vmovdqa 32(%rdi),%ymm2
- vmovdqa 64(%rdi),%ymm3
- vmovdqa 96(%rdi),%ymm4
- vmovdqa 128(%rdi),%ymm5
- vmovdqa 160(%rdi),%ymm6
- vmovdqa 192(%rdi),%ymm7
- vmovdqa 224(%rdi),%ymm8
-
- csubq 1 9
- csubq 2 10
- csubq 3 11
- csubq 4 12
- csubq 5 13
- csubq 6 14
- csubq 7 15
- csubq 8 9
-
- #store
- vmovdqa %ymm1,(%rdi)
- vmovdqa %ymm2,32(%rdi)
- vmovdqa %ymm3,64(%rdi)
- vmovdqa %ymm4,96(%rdi)
- vmovdqa %ymm5,128(%rdi)
- vmovdqa %ymm6,160(%rdi)
- vmovdqa %ymm7,192(%rdi)
- vmovdqa %ymm8,224(%rdi)
-
- ret
-
- .global PQCLEAN_KYBER76890S_AVX2_frommont_avx
- PQCLEAN_KYBER76890S_AVX2_frommont_avx:
- #consts
- vmovdqa PQCLEAN_KYBER76890S_AVX2_16xq(%rip),%ymm0
- vmovdqa PQCLEAN_KYBER76890S_AVX2_16xmontsqlo(%rip),%ymm1
- vmovdqa PQCLEAN_KYBER76890S_AVX2_16xmontsqhi(%rip),%ymm2
-
- #load
- vmovdqa (%rdi),%ymm3
- vmovdqa 32(%rdi),%ymm4
- vmovdqa 64(%rdi),%ymm5
- vmovdqa 96(%rdi),%ymm6
- vmovdqa 128(%rdi),%ymm7
- vmovdqa 160(%rdi),%ymm8
- vmovdqa 192(%rdi),%ymm9
- vmovdqa 224(%rdi),%ymm10
-
- fqmulprecomp 1,2,3 11
- fqmulprecomp 1,2,4 12
- fqmulprecomp 1,2,5 13
- fqmulprecomp 1,2,6 14
- fqmulprecomp 1,2,7 15
- fqmulprecomp 1,2,8 11
- fqmulprecomp 1,2,9 12
- fqmulprecomp 1,2,10 13
-
- #store
- vmovdqa %ymm3,(%rdi)
- vmovdqa %ymm4,32(%rdi)
- vmovdqa %ymm5,64(%rdi)
- vmovdqa %ymm6,96(%rdi)
- vmovdqa %ymm7,128(%rdi)
- vmovdqa %ymm8,160(%rdi)
- vmovdqa %ymm9,192(%rdi)
- vmovdqa %ymm10,224(%rdi)
-
- ret
|