You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

113 line
2.2 KiB

  1. .include "fq.inc"
  2. .global PQCLEAN_KYBER76890S_AVX2_reduce_avx
  3. PQCLEAN_KYBER76890S_AVX2_reduce_avx:
  4. #consts
  5. vmovdqa PQCLEAN_KYBER76890S_AVX2_16xq(%rip),%ymm0
  6. vmovdqa PQCLEAN_KYBER76890S_AVX2_16xv(%rip),%ymm1
  7. #load
  8. vmovdqa (%rdi),%ymm2
  9. vmovdqa 32(%rdi),%ymm3
  10. vmovdqa 64(%rdi),%ymm4
  11. vmovdqa 96(%rdi),%ymm5
  12. vmovdqa 128(%rdi),%ymm6
  13. vmovdqa 160(%rdi),%ymm7
  14. vmovdqa 192(%rdi),%ymm8
  15. vmovdqa 224(%rdi),%ymm9
  16. red16 2 10
  17. red16 3 11
  18. red16 4 12
  19. red16 5 13
  20. red16 6 14
  21. red16 7 15
  22. red16 8 10
  23. red16 9 11
  24. #store
  25. vmovdqa %ymm2,(%rdi)
  26. vmovdqa %ymm3,32(%rdi)
  27. vmovdqa %ymm4,64(%rdi)
  28. vmovdqa %ymm5,96(%rdi)
  29. vmovdqa %ymm6,128(%rdi)
  30. vmovdqa %ymm7,160(%rdi)
  31. vmovdqa %ymm8,192(%rdi)
  32. vmovdqa %ymm9,224(%rdi)
  33. ret
  34. .global PQCLEAN_KYBER76890S_AVX2_csubq_avx
  35. PQCLEAN_KYBER76890S_AVX2_csubq_avx:
  36. #consts
  37. vmovdqa PQCLEAN_KYBER76890S_AVX2_16xq(%rip),%ymm0
  38. #load
  39. vmovdqa (%rdi),%ymm1
  40. vmovdqa 32(%rdi),%ymm2
  41. vmovdqa 64(%rdi),%ymm3
  42. vmovdqa 96(%rdi),%ymm4
  43. vmovdqa 128(%rdi),%ymm5
  44. vmovdqa 160(%rdi),%ymm6
  45. vmovdqa 192(%rdi),%ymm7
  46. vmovdqa 224(%rdi),%ymm8
  47. csubq 1 9
  48. csubq 2 10
  49. csubq 3 11
  50. csubq 4 12
  51. csubq 5 13
  52. csubq 6 14
  53. csubq 7 15
  54. csubq 8 9
  55. #store
  56. vmovdqa %ymm1,(%rdi)
  57. vmovdqa %ymm2,32(%rdi)
  58. vmovdqa %ymm3,64(%rdi)
  59. vmovdqa %ymm4,96(%rdi)
  60. vmovdqa %ymm5,128(%rdi)
  61. vmovdqa %ymm6,160(%rdi)
  62. vmovdqa %ymm7,192(%rdi)
  63. vmovdqa %ymm8,224(%rdi)
  64. ret
  65. .global PQCLEAN_KYBER76890S_AVX2_frommont_avx
  66. PQCLEAN_KYBER76890S_AVX2_frommont_avx:
  67. #consts
  68. vmovdqa PQCLEAN_KYBER76890S_AVX2_16xq(%rip),%ymm0
  69. vmovdqa PQCLEAN_KYBER76890S_AVX2_16xmontsqlo(%rip),%ymm1
  70. vmovdqa PQCLEAN_KYBER76890S_AVX2_16xmontsqhi(%rip),%ymm2
  71. #load
  72. vmovdqa (%rdi),%ymm3
  73. vmovdqa 32(%rdi),%ymm4
  74. vmovdqa 64(%rdi),%ymm5
  75. vmovdqa 96(%rdi),%ymm6
  76. vmovdqa 128(%rdi),%ymm7
  77. vmovdqa 160(%rdi),%ymm8
  78. vmovdqa 192(%rdi),%ymm9
  79. vmovdqa 224(%rdi),%ymm10
  80. fqmulprecomp 1,2,3 11
  81. fqmulprecomp 1,2,4 12
  82. fqmulprecomp 1,2,5 13
  83. fqmulprecomp 1,2,6 14
  84. fqmulprecomp 1,2,7 15
  85. fqmulprecomp 1,2,8 11
  86. fqmulprecomp 1,2,9 12
  87. fqmulprecomp 1,2,10 13
  88. #store
  89. vmovdqa %ymm3,(%rdi)
  90. vmovdqa %ymm4,32(%rdi)
  91. vmovdqa %ymm5,64(%rdi)
  92. vmovdqa %ymm6,96(%rdi)
  93. vmovdqa %ymm7,128(%rdi)
  94. vmovdqa %ymm8,160(%rdi)
  95. vmovdqa %ymm9,192(%rdi)
  96. vmovdqa %ymm10,224(%rdi)
  97. ret