You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

89 line
2.4 KiB

  1. // +build amd64,!noasm
  2. package p503
  3. import (
  4. . "github.com/cloudflare/p751sidh/internal/isogeny"
  5. "golang.org/x/sys/cpu"
  6. )
  7. // If choice = 0, leave x,y unchanged. If choice = 1, set x,y = y,x.
  8. // If choice is neither 0 nor 1 then behaviour is undefined.
  9. // This function executes in constant time.
  10. //go:noescape
  11. func fp503ConditionalSwap(x, y *FpElement, choice uint8)
  12. // Compute z = x + y (mod p).
  13. //go:noescape
  14. func fp503AddReduced(z, x, y *FpElement)
  15. // Compute z = x - y (mod p).
  16. //go:noescape
  17. func fp503SubReduced(z, x, y *FpElement)
  18. // Compute z = x + y, without reducing mod p.
  19. //go:noescape
  20. func fp503AddLazy(z, x, y *FpElement)
  21. // Compute z = x + y, without reducing mod p.
  22. //go:noescape
  23. func fp503X2AddLazy(z, x, y *FpElementX2)
  24. // Compute z = x - y, without reducing mod p.
  25. //go:noescape
  26. func fp503X2SubLazy(z, x, y *FpElementX2)
  27. // Reduce a field element in [0, 2*p) to one in [0,p).
  28. //go:noescape
  29. func fp503StrongReduce(x *FpElement)
  30. // Function pointer to function computing z = x * y.
  31. // Concrete implementation depends on capabilities of the CPU which
  32. // are resolved at runtime. CPUs with ADCX, ADOX and MULX support
  33. // run most optimized implementation
  34. func fp503Mul(z *FpElementX2, x, y *FpElement) {
  35. if cpu.X86.HasBMI2 && cpu.X86.HasADX {
  36. mulWithMULXADX(z,x,y)
  37. } else if cpu.X86.HasBMI2 {
  38. mulWithMULX(z,x,y)
  39. } else {
  40. mul(z,x,y)
  41. }
  42. }
  43. // Mul implementattion for legacy CPUs
  44. //go:noescape
  45. func mul(z *FpElementX2, x, y *FpElement)
  46. // Mul implementation for CPUs supporting carry-less MULX multiplier.
  47. //go:noescape
  48. func mulWithMULX(z *FpElementX2, x, y *FpElement)
  49. // Mul implementation for CPUs supporting two independent carry chain
  50. // (ADOX/ADCX) instructions and carry-less MULX multiplier
  51. //go:noescape
  52. func mulWithMULXADX(z *FpElementX2, x, y *FpElement)
  53. // Computes the Montgomery reduction z = x R^{-1} (mod 2*p). On return value
  54. // of x may be changed. z=x not allowed.
  55. func fp503MontgomeryReduce(z *FpElement, x *FpElementX2) {
  56. if cpu.X86.HasBMI2 && cpu.X86.HasADX {
  57. redcWithMULXADX(z,x)
  58. } else if cpu.X86.HasBMI2 {
  59. redcWithMULX(z,x)
  60. } else {
  61. redc(z,x)
  62. }
  63. }
  64. func redc(z *FpElement, x *FpElementX2)
  65. // Mul implementation for CPUs supporting carry-less MULX multiplier.
  66. //go:noescape
  67. func redcWithMULX(z *FpElement, x *FpElementX2)
  68. // Mul implementation for CPUs supporting two independent carry chain
  69. // (ADOX/ADCX) instructions and carry-less MULX multiplier
  70. //go:noescape
  71. func redcWithMULXADX(z *FpElement, x *FpElementX2)