You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

289 lines
8.0 KiB

  1. package csidh
  2. import (
  3. "math/bits"
  4. "golang.org/x/sys/cpu"
  5. )
  6. // CPU Capabilities. Those flags are referred by assembly code. According to
  7. // https://github.com/golang/go/issues/28230, variables referred from the
  8. // assembly must be in the same package.
  9. // We declare variables not constants, in order to facilitate testing.
  10. var (
  11. // Signals support for BMI2 (MULX)
  12. hasBMI2 = cpu.X86.HasBMI2 //nolint
  13. // Signals support for ADX and BMI2
  14. hasADXandBMI2 = cpu.X86.HasBMI2 && cpu.X86.HasADX
  15. )
  16. // Constant time select.
  17. // if pick == 0xFF..FF (out = in1)
  18. // if pick == 0 (out = in2)
  19. // else out is undefined.
  20. func ctPick64(which uint64, in1, in2 uint64) uint64 {
  21. return (in1 & which) | (in2 & ^which)
  22. }
  23. // ctIsNonZero64 returns 0 in case i == 0, otherwise it returns 1.
  24. // Constant-time.
  25. func ctIsNonZero64(i uint64) int {
  26. // In case i==0 then i-1 will set MSB. Only in such case (i OR ~(i-1))
  27. // will result in MSB being not set (logical implication: (i-1)=>i is
  28. // false iff (i-1)==0 and i==non-zero). In every other case MSB is
  29. // set and hence function returns 1.
  30. return int((i | (^(i - 1))) >> 63)
  31. }
  32. func mulGeneric(r, x, y *fp) {
  33. var s fp // keeps intermediate results
  34. var t1, t2 [9]uint64
  35. var c, q uint64
  36. for i := 0; i < numWords-1; i++ {
  37. q = ((x[i] * y[0]) + s[0]) * pNegInv[0]
  38. mul576(&t1, &p, q)
  39. mul576(&t2, y, x[i])
  40. // x[i]*y + q_i*p
  41. t1[0], c = bits.Add64(t1[0], t2[0], 0)
  42. t1[1], c = bits.Add64(t1[1], t2[1], c)
  43. t1[2], c = bits.Add64(t1[2], t2[2], c)
  44. t1[3], c = bits.Add64(t1[3], t2[3], c)
  45. t1[4], c = bits.Add64(t1[4], t2[4], c)
  46. t1[5], c = bits.Add64(t1[5], t2[5], c)
  47. t1[6], c = bits.Add64(t1[6], t2[6], c)
  48. t1[7], c = bits.Add64(t1[7], t2[7], c)
  49. t1[8], _ = bits.Add64(t1[8], t2[8], c)
  50. // s = (s + x[i]*y + q_i * p) / R
  51. _, c = bits.Add64(t1[0], s[0], 0)
  52. s[0], c = bits.Add64(t1[1], s[1], c)
  53. s[1], c = bits.Add64(t1[2], s[2], c)
  54. s[2], c = bits.Add64(t1[3], s[3], c)
  55. s[3], c = bits.Add64(t1[4], s[4], c)
  56. s[4], c = bits.Add64(t1[5], s[5], c)
  57. s[5], c = bits.Add64(t1[6], s[6], c)
  58. s[6], c = bits.Add64(t1[7], s[7], c)
  59. s[7], _ = bits.Add64(t1[8], 0, c)
  60. }
  61. // last iteration stores result in r
  62. q = ((x[numWords-1] * y[0]) + s[0]) * pNegInv[0]
  63. mul576(&t1, &p, q)
  64. mul576(&t2, y, x[numWords-1])
  65. t1[0], c = bits.Add64(t1[0], t2[0], c)
  66. t1[1], c = bits.Add64(t1[1], t2[1], c)
  67. t1[2], c = bits.Add64(t1[2], t2[2], c)
  68. t1[3], c = bits.Add64(t1[3], t2[3], c)
  69. t1[4], c = bits.Add64(t1[4], t2[4], c)
  70. t1[5], c = bits.Add64(t1[5], t2[5], c)
  71. t1[6], c = bits.Add64(t1[6], t2[6], c)
  72. t1[7], c = bits.Add64(t1[7], t2[7], c)
  73. t1[8], _ = bits.Add64(t1[8], t2[8], c)
  74. _, c = bits.Add64(t1[0], s[0], 0)
  75. r[0], c = bits.Add64(t1[1], s[1], c)
  76. r[1], c = bits.Add64(t1[2], s[2], c)
  77. r[2], c = bits.Add64(t1[3], s[3], c)
  78. r[3], c = bits.Add64(t1[4], s[4], c)
  79. r[4], c = bits.Add64(t1[5], s[5], c)
  80. r[5], c = bits.Add64(t1[6], s[6], c)
  81. r[6], c = bits.Add64(t1[7], s[7], c)
  82. r[7], _ = bits.Add64(t1[8], 0, c)
  83. }
  84. // Returns result of x<y operation.
  85. func isLess(x, y *fp) bool {
  86. for i := numWords - 1; i >= 0; i-- {
  87. v, c := bits.Sub64(y[i], x[i], 0)
  88. if c != 0 {
  89. return false
  90. }
  91. if v != 0 {
  92. return true
  93. }
  94. }
  95. // x == y
  96. return false
  97. }
  98. // r = x + y mod p.
  99. func addRdc(r, x, y *fp) {
  100. var c uint64
  101. var t fp
  102. r[0], c = bits.Add64(x[0], y[0], 0)
  103. r[1], c = bits.Add64(x[1], y[1], c)
  104. r[2], c = bits.Add64(x[2], y[2], c)
  105. r[3], c = bits.Add64(x[3], y[3], c)
  106. r[4], c = bits.Add64(x[4], y[4], c)
  107. r[5], c = bits.Add64(x[5], y[5], c)
  108. r[6], c = bits.Add64(x[6], y[6], c)
  109. r[7], _ = bits.Add64(x[7], y[7], c)
  110. t[0], c = bits.Sub64(r[0], p[0], 0)
  111. t[1], c = bits.Sub64(r[1], p[1], c)
  112. t[2], c = bits.Sub64(r[2], p[2], c)
  113. t[3], c = bits.Sub64(r[3], p[3], c)
  114. t[4], c = bits.Sub64(r[4], p[4], c)
  115. t[5], c = bits.Sub64(r[5], p[5], c)
  116. t[6], c = bits.Sub64(r[6], p[6], c)
  117. t[7], c = bits.Sub64(r[7], p[7], c)
  118. var w = 0 - c
  119. r[0] = ctPick64(w, r[0], t[0])
  120. r[1] = ctPick64(w, r[1], t[1])
  121. r[2] = ctPick64(w, r[2], t[2])
  122. r[3] = ctPick64(w, r[3], t[3])
  123. r[4] = ctPick64(w, r[4], t[4])
  124. r[5] = ctPick64(w, r[5], t[5])
  125. r[6] = ctPick64(w, r[6], t[6])
  126. r[7] = ctPick64(w, r[7], t[7])
  127. }
  128. // r = x - y.
  129. func sub512(r, x, y *fp) uint64 {
  130. var c uint64
  131. r[0], c = bits.Sub64(x[0], y[0], 0)
  132. r[1], c = bits.Sub64(x[1], y[1], c)
  133. r[2], c = bits.Sub64(x[2], y[2], c)
  134. r[3], c = bits.Sub64(x[3], y[3], c)
  135. r[4], c = bits.Sub64(x[4], y[4], c)
  136. r[5], c = bits.Sub64(x[5], y[5], c)
  137. r[6], c = bits.Sub64(x[6], y[6], c)
  138. r[7], c = bits.Sub64(x[7], y[7], c)
  139. return c
  140. }
  141. // r = x - y mod p.
  142. func subRdc(r, x, y *fp) {
  143. var c uint64
  144. // Same as sub512(r,x,y). Unfortunately
  145. // compiler is not able to inline it.
  146. r[0], c = bits.Sub64(x[0], y[0], 0)
  147. r[1], c = bits.Sub64(x[1], y[1], c)
  148. r[2], c = bits.Sub64(x[2], y[2], c)
  149. r[3], c = bits.Sub64(x[3], y[3], c)
  150. r[4], c = bits.Sub64(x[4], y[4], c)
  151. r[5], c = bits.Sub64(x[5], y[5], c)
  152. r[6], c = bits.Sub64(x[6], y[6], c)
  153. r[7], c = bits.Sub64(x[7], y[7], c)
  154. // if x<y => r=x-y+p
  155. var w = 0 - c
  156. r[0], c = bits.Add64(r[0], ctPick64(w, p[0], 0), 0)
  157. r[1], c = bits.Add64(r[1], ctPick64(w, p[1], 0), c)
  158. r[2], c = bits.Add64(r[2], ctPick64(w, p[2], 0), c)
  159. r[3], c = bits.Add64(r[3], ctPick64(w, p[3], 0), c)
  160. r[4], c = bits.Add64(r[4], ctPick64(w, p[4], 0), c)
  161. r[5], c = bits.Add64(r[5], ctPick64(w, p[5], 0), c)
  162. r[6], c = bits.Add64(r[6], ctPick64(w, p[6], 0), c)
  163. r[7], _ = bits.Add64(r[7], ctPick64(w, p[7], 0), c)
  164. }
  165. // Fixed-window mod exp for fpBitLen bit value with 4 bit window. Returned
  166. // result is a number in montgomery domain.
  167. // r = b ^ e (mod p).
  168. // Constant time.
  169. func modExpRdcCommon(r, b, e *fp, fpBitLen int) {
  170. var precomp [16]fp
  171. var t fp
  172. var c uint64
  173. // Precompute step, computes an array of small powers of 'b'. As this
  174. // algorithm implements 4-bit window, we need 2^4=16 of such values.
  175. // b^0 = 1, which is equal to R from REDC.
  176. precomp[0] = one // b ^ 0
  177. precomp[1] = *b // b ^ 1
  178. for i := 2; i < 16; i = i + 2 {
  179. // OPTIMIZE: implement fast squering. Then interleaving fast squaring
  180. // with multiplication should improve performance.
  181. mulRdc(&precomp[i], &precomp[i/2], &precomp[i/2]) // sqr
  182. mulRdc(&precomp[i+1], &precomp[i], b)
  183. }
  184. *r = one
  185. for i := fpBitLen/4 - 1; i >= 0; i-- {
  186. for j := 0; j < 4; j++ {
  187. mulRdc(r, r, r)
  188. }
  189. // note: non resistant to cache SCA
  190. idx := (e[i/16] >> uint((i%16)*4)) & 15
  191. mulRdc(r, r, &precomp[idx])
  192. }
  193. // if p <= r < 2p then r = r-p
  194. t[0], c = bits.Sub64(r[0], p[0], 0)
  195. t[1], c = bits.Sub64(r[1], p[1], c)
  196. t[2], c = bits.Sub64(r[2], p[2], c)
  197. t[3], c = bits.Sub64(r[3], p[3], c)
  198. t[4], c = bits.Sub64(r[4], p[4], c)
  199. t[5], c = bits.Sub64(r[5], p[5], c)
  200. t[6], c = bits.Sub64(r[6], p[6], c)
  201. t[7], c = bits.Sub64(r[7], p[7], c)
  202. var w = 0 - c
  203. r[0] = ctPick64(w, r[0], t[0])
  204. r[1] = ctPick64(w, r[1], t[1])
  205. r[2] = ctPick64(w, r[2], t[2])
  206. r[3] = ctPick64(w, r[3], t[3])
  207. r[4] = ctPick64(w, r[4], t[4])
  208. r[5] = ctPick64(w, r[5], t[5])
  209. r[6] = ctPick64(w, r[6], t[6])
  210. r[7] = ctPick64(w, r[7], t[7])
  211. }
  212. // modExpRdc does modular exponentation of 512-bit number.
  213. // Constant-time.
  214. func modExpRdc512(r, b, e *fp) {
  215. modExpRdcCommon(r, b, e, 512)
  216. }
  217. // modExpRdc does modular exponentation of 64-bit number.
  218. // Constant-time.
  219. func modExpRdc64(r, b *fp, e uint64) {
  220. modExpRdcCommon(r, b, &fp{e}, 64)
  221. }
  222. // isNonQuadRes checks whether value v is quadratic residue.
  223. // Implementation uses Fermat's little theorem (or
  224. // Euler's criterion)
  225. // a^(p-1) == 1, hence
  226. // (a^2) ((p-1)/2) == 1
  227. // Which means v is a quadratic residue iff v^((p-1)/2) == 1.
  228. // Caller provided v must be in montgomery domain.
  229. // Returns 0 in case v is quadratic residue or 1 in case
  230. // v is quadratic non-residue.
  231. func (v *fp) isNonQuadRes() int {
  232. var res fp
  233. var b uint64
  234. modExpRdc512(&res, v, &pMin1By2)
  235. for i := range res {
  236. b |= res[i] ^ one[i]
  237. }
  238. return ctIsNonZero64(b)
  239. }
  240. // isZero returns false in case v is equal to 0, otherwise
  241. // true. Constant time.
  242. func (v *fp) isZero() bool {
  243. var r uint64
  244. for i := 0; i < numWords; i++ {
  245. r |= v[i]
  246. }
  247. return ctIsNonZero64(r) == 0
  248. }
  249. // equal checks if v is equal to in. Constant time.
  250. func (v *fp) equal(in *fp) bool {
  251. var r uint64
  252. for i := range v {
  253. r |= v[i] ^ in[i]
  254. }
  255. return ctIsNonZero64(r) == 0
  256. }