You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

2378 regels
38 KiB

  1. #include "textflag.h"
  2. // p751 + 1
  3. #define P751P1_5 $0xEEB0000000000000
  4. #define P751P1_6 $0xE3EC968549F878A8
  5. #define P751P1_7 $0xDA959B1A13F7CC76
  6. #define P751P1_8 $0x084E9867D6EBE876
  7. #define P751P1_9 $0x8562B5045CB25748
  8. #define P751P1_10 $0x0E12909F97BADC66
  9. #define P751P1_11 $0x00006FE5D541F71C
  10. #define P751_0 $0xFFFFFFFFFFFFFFFF
  11. #define P751_5 $0xEEAFFFFFFFFFFFFF
  12. #define P751_6 $0xE3EC968549F878A8
  13. #define P751_7 $0xDA959B1A13F7CC76
  14. #define P751_8 $0x084E9867D6EBE876
  15. #define P751_9 $0x8562B5045CB25748
  16. #define P751_10 $0x0E12909F97BADC66
  17. #define P751_11 $0x00006FE5D541F71C
  18. #define P751X2_0 $0xFFFFFFFFFFFFFFFE
  19. #define P751X2_1 $0xFFFFFFFFFFFFFFFF
  20. #define P751X2_5 $0xDD5FFFFFFFFFFFFF
  21. #define P751X2_6 $0xC7D92D0A93F0F151
  22. #define P751X2_7 $0xB52B363427EF98ED
  23. #define P751X2_8 $0x109D30CFADD7D0ED
  24. #define P751X2_9 $0x0AC56A08B964AE90
  25. #define P751X2_10 $0x1C25213F2F75B8CD
  26. #define P751X2_11 $0x0000DFCBAA83EE38
  27. // The MSR code uses these registers for parameter passing. Keep using
  28. // them to avoid significant code changes. This means that when the Go
  29. // assembler does something strange, we can diff the machine code
  30. // against a different assembler to find out what Go did.
  31. #define REG_P1 DI
  32. #define REG_P2 SI
  33. #define REG_P3 DX
  34. // We can't write MOVQ $0, AX because Go's assembler incorrectly
  35. // optimizes this to XOR AX, AX, which clobbers the carry flags.
  36. //
  37. // This bug was defined to be "correct" behaviour (cf.
  38. // https://github.com/golang/go/issues/12405 ) by declaring that the MOV
  39. // pseudo-instruction clobbers flags, although this fact is mentioned
  40. // nowhere in the documentation for the Go assembler.
  41. //
  42. // Defining MOVQ to clobber flags has the effect that it is never safe
  43. // to interleave MOVQ with ADCQ and SBBQ instructions. Since this is
  44. // required to write a carry chain longer than registers' working set,
  45. // all of the below code therefore relies on the unspecified and
  46. // undocumented behaviour that MOV won't clobber flags, except in the
  47. // case of the above-mentioned bug.
  48. //
  49. // However, there's also no specification of which instructions
  50. // correspond to machine instructions, and which are
  51. // pseudo-instructions (i.e., no specification of what the assembler
  52. // actually does), so this doesn't seem much worse than usual.
  53. //
  54. // Avoid the bug by dropping the bytes for `mov eax, 0` in directly:
  55. #define ZERO_AX_WITHOUT_CLOBBERING_FLAGS BYTE $0xB8; BYTE $0; BYTE $0; BYTE $0; BYTE $0;
  56. TEXT ·fp751StrongReduce(SB), NOSPLIT, $0-8
  57. MOVQ x+0(FP), REG_P1
  58. // Zero AX for later use:
  59. XORQ AX, AX
  60. // Load p into registers:
  61. MOVQ P751_0, R8
  62. // P751_{1,2,3,4} = P751_0, so reuse R8
  63. MOVQ P751_5, R9
  64. MOVQ P751_6, R10
  65. MOVQ P751_7, R11
  66. MOVQ P751_8, R12
  67. MOVQ P751_9, R13
  68. MOVQ P751_10, R14
  69. MOVQ P751_11, R15
  70. // Set x <- x - p
  71. SUBQ R8, (REG_P1)
  72. SBBQ R8, (8)(REG_P1)
  73. SBBQ R8, (16)(REG_P1)
  74. SBBQ R8, (24)(REG_P1)
  75. SBBQ R8, (32)(REG_P1)
  76. SBBQ R9, (40)(REG_P1)
  77. SBBQ R10, (48)(REG_P1)
  78. SBBQ R11, (56)(REG_P1)
  79. SBBQ R12, (64)(REG_P1)
  80. SBBQ R13, (72)(REG_P1)
  81. SBBQ R14, (80)(REG_P1)
  82. SBBQ R15, (88)(REG_P1)
  83. // Save carry flag indicating x-p < 0 as a mask in AX
  84. SBBQ $0, AX
  85. // Conditionally add p to x if x-p < 0
  86. ANDQ AX, R8
  87. ANDQ AX, R9
  88. ANDQ AX, R10
  89. ANDQ AX, R11
  90. ANDQ AX, R12
  91. ANDQ AX, R13
  92. ANDQ AX, R14
  93. ANDQ AX, R15
  94. ADDQ R8, (REG_P1)
  95. ADCQ R8, (8)(REG_P1)
  96. ADCQ R8, (16)(REG_P1)
  97. ADCQ R8, (24)(REG_P1)
  98. ADCQ R8, (32)(REG_P1)
  99. ADCQ R9, (40)(REG_P1)
  100. ADCQ R10, (48)(REG_P1)
  101. ADCQ R11, (56)(REG_P1)
  102. ADCQ R12, (64)(REG_P1)
  103. ADCQ R13, (72)(REG_P1)
  104. ADCQ R14, (80)(REG_P1)
  105. ADCQ R15, (88)(REG_P1)
  106. RET
  107. TEXT ·fp751ConditionalSwap(SB), NOSPLIT, $0-17
  108. MOVQ x+0(FP), REG_P1
  109. MOVQ y+8(FP), REG_P2
  110. MOVB choice+16(FP), AL // AL = 0 or 1
  111. MOVBLZX AL, AX // AX = 0 or 1
  112. NEGQ AX // RAX = 0x00..00 or 0xff..ff
  113. MOVQ (0*8)(REG_P1), BX // BX = x[0]
  114. MOVQ (0*8)(REG_P2), CX // CX = y[0]
  115. MOVQ CX, DX // DX = y[0]
  116. XORQ BX, DX // DX = y[0] ^ x[0]
  117. ANDQ AX, DX // DX = (y[0] ^ x[0]) & mask
  118. XORQ DX, BX // BX = (y[0] ^ x[0]) & mask) ^ x[0] = x[0] or y[0]
  119. XORQ DX, CX // CX = (y[0] ^ x[0]) & mask) ^ y[0] = y[0] or x[0]
  120. MOVQ BX, (0*8)(REG_P1)
  121. MOVQ CX, (0*8)(REG_P2)
  122. MOVQ (1*8)(REG_P1), BX
  123. MOVQ (1*8)(REG_P2), CX
  124. MOVQ CX, DX
  125. XORQ BX, DX
  126. ANDQ AX, DX
  127. XORQ DX, BX
  128. XORQ DX, CX
  129. MOVQ BX, (1*8)(REG_P1)
  130. MOVQ CX, (1*8)(REG_P2)
  131. MOVQ (2*8)(REG_P1), BX
  132. MOVQ (2*8)(REG_P2), CX
  133. MOVQ CX, DX
  134. XORQ BX, DX
  135. ANDQ AX, DX
  136. XORQ DX, BX
  137. XORQ DX, CX
  138. MOVQ BX, (2*8)(REG_P1)
  139. MOVQ CX, (2*8)(REG_P2)
  140. MOVQ (3*8)(REG_P1), BX
  141. MOVQ (3*8)(REG_P2), CX
  142. MOVQ CX, DX
  143. XORQ BX, DX
  144. ANDQ AX, DX
  145. XORQ DX, BX
  146. XORQ DX, CX
  147. MOVQ BX, (3*8)(REG_P1)
  148. MOVQ CX, (3*8)(REG_P2)
  149. MOVQ (4*8)(REG_P1), BX
  150. MOVQ (4*8)(REG_P2), CX
  151. MOVQ CX, DX
  152. XORQ BX, DX
  153. ANDQ AX, DX
  154. XORQ DX, BX
  155. XORQ DX, CX
  156. MOVQ BX, (4*8)(REG_P1)
  157. MOVQ CX, (4*8)(REG_P2)
  158. MOVQ (5*8)(REG_P1), BX
  159. MOVQ (5*8)(REG_P2), CX
  160. MOVQ CX, DX
  161. XORQ BX, DX
  162. ANDQ AX, DX
  163. XORQ DX, BX
  164. XORQ DX, CX
  165. MOVQ BX, (5*8)(REG_P1)
  166. MOVQ CX, (5*8)(REG_P2)
  167. MOVQ (6*8)(REG_P1), BX
  168. MOVQ (6*8)(REG_P2), CX
  169. MOVQ CX, DX
  170. XORQ BX, DX
  171. ANDQ AX, DX
  172. XORQ DX, BX
  173. XORQ DX, CX
  174. MOVQ BX, (6*8)(REG_P1)
  175. MOVQ CX, (6*8)(REG_P2)
  176. MOVQ (7*8)(REG_P1), BX
  177. MOVQ (7*8)(REG_P2), CX
  178. MOVQ CX, DX
  179. XORQ BX, DX
  180. ANDQ AX, DX
  181. XORQ DX, BX
  182. XORQ DX, CX
  183. MOVQ BX, (7*8)(REG_P1)
  184. MOVQ CX, (7*8)(REG_P2)
  185. MOVQ (8*8)(REG_P1), BX
  186. MOVQ (8*8)(REG_P2), CX
  187. MOVQ CX, DX
  188. XORQ BX, DX
  189. ANDQ AX, DX
  190. XORQ DX, BX
  191. XORQ DX, CX
  192. MOVQ BX, (8*8)(REG_P1)
  193. MOVQ CX, (8*8)(REG_P2)
  194. MOVQ (9*8)(REG_P1), BX
  195. MOVQ (9*8)(REG_P2), CX
  196. MOVQ CX, DX
  197. XORQ BX, DX
  198. ANDQ AX, DX
  199. XORQ DX, BX
  200. XORQ DX, CX
  201. MOVQ BX, (9*8)(REG_P1)
  202. MOVQ CX, (9*8)(REG_P2)
  203. MOVQ (10*8)(REG_P1), BX
  204. MOVQ (10*8)(REG_P2), CX
  205. MOVQ CX, DX
  206. XORQ BX, DX
  207. ANDQ AX, DX
  208. XORQ DX, BX
  209. XORQ DX, CX
  210. MOVQ BX, (10*8)(REG_P1)
  211. MOVQ CX, (10*8)(REG_P2)
  212. MOVQ (11*8)(REG_P1), BX
  213. MOVQ (11*8)(REG_P2), CX
  214. MOVQ CX, DX
  215. XORQ BX, DX
  216. ANDQ AX, DX
  217. XORQ DX, BX
  218. XORQ DX, CX
  219. MOVQ BX, (11*8)(REG_P1)
  220. MOVQ CX, (11*8)(REG_P2)
  221. RET
  222. TEXT ·fp751ConditionalAssign(SB), NOSPLIT, $0-25
  223. MOVQ z+0(FP), REG_P3
  224. MOVQ x+8(FP), REG_P1
  225. MOVQ y+16(FP), REG_P2
  226. MOVB choice+24(FP), AL // AL = 0 or 1
  227. MOVBLZX AL, AX // AX = 0 or 1
  228. NEGQ AX // RAX = 0x00..00 or 0xff..ff
  229. MOVQ (0*8)(REG_P1), BX // BX = x[0]
  230. MOVQ (0*8)(REG_P2), CX // CX = y[0]
  231. XORQ BX, CX // CX = y[0] ^ x[0]
  232. ANDQ AX, CX // CX = (y[0] ^ x[0]) & mask
  233. XORQ BX, CX // CX = (y[0] ^ x[0]) & mask) ^ x[0]
  234. MOVQ CX, (0*8)(REG_P3) // = x[0] or y[0]
  235. MOVQ (1*8)(REG_P1), BX
  236. MOVQ (1*8)(REG_P2), CX
  237. XORQ BX, CX
  238. ANDQ AX, CX
  239. XORQ BX, CX
  240. MOVQ CX, (1*8)(REG_P3)
  241. MOVQ (2*8)(REG_P1), BX
  242. MOVQ (2*8)(REG_P2), CX
  243. XORQ BX, CX
  244. ANDQ AX, CX
  245. XORQ BX, CX
  246. MOVQ CX, (2*8)(REG_P3)
  247. MOVQ (3*8)(REG_P1), BX
  248. MOVQ (3*8)(REG_P2), CX
  249. XORQ BX, CX
  250. ANDQ AX, CX
  251. XORQ BX, CX
  252. MOVQ CX, (3*8)(REG_P3)
  253. MOVQ (4*8)(REG_P1), BX
  254. MOVQ (4*8)(REG_P2), CX
  255. XORQ BX, CX
  256. ANDQ AX, CX
  257. XORQ BX, CX
  258. MOVQ CX, (4*8)(REG_P3)
  259. MOVQ (5*8)(REG_P1), BX
  260. MOVQ (5*8)(REG_P2), CX
  261. XORQ BX, CX
  262. ANDQ AX, CX
  263. XORQ BX, CX
  264. MOVQ CX, (5*8)(REG_P3)
  265. MOVQ (6*8)(REG_P1), BX
  266. MOVQ (6*8)(REG_P2), CX
  267. XORQ BX, CX
  268. ANDQ AX, CX
  269. XORQ BX, CX
  270. MOVQ CX, (6*8)(REG_P3)
  271. MOVQ (7*8)(REG_P1), BX
  272. MOVQ (7*8)(REG_P2), CX
  273. XORQ BX, CX
  274. ANDQ AX, CX
  275. XORQ BX, CX
  276. MOVQ CX, (7*8)(REG_P3)
  277. MOVQ (8*8)(REG_P1), BX
  278. MOVQ (8*8)(REG_P2), CX
  279. XORQ BX, CX
  280. ANDQ AX, CX
  281. XORQ BX, CX
  282. MOVQ CX, (8*8)(REG_P3)
  283. MOVQ (9*8)(REG_P1), BX
  284. MOVQ (9*8)(REG_P2), CX
  285. XORQ BX, CX
  286. ANDQ AX, CX
  287. XORQ BX, CX
  288. MOVQ CX, (9*8)(REG_P3)
  289. MOVQ (10*8)(REG_P1), BX
  290. MOVQ (10*8)(REG_P2), CX
  291. XORQ BX, CX
  292. ANDQ AX, CX
  293. XORQ BX, CX
  294. MOVQ CX, (10*8)(REG_P3)
  295. MOVQ (11*8)(REG_P1), BX
  296. MOVQ (11*8)(REG_P2), CX
  297. XORQ BX, CX
  298. ANDQ AX, CX
  299. XORQ BX, CX
  300. MOVQ CX, (11*8)(REG_P3)
  301. RET
  302. TEXT ·fp751AddReduced(SB), NOSPLIT, $0-24
  303. MOVQ z+0(FP), REG_P3
  304. MOVQ x+8(FP), REG_P1
  305. MOVQ y+16(FP), REG_P2
  306. MOVQ (REG_P1), R8
  307. MOVQ (8)(REG_P1), R9
  308. MOVQ (16)(REG_P1), R10
  309. MOVQ (24)(REG_P1), R11
  310. MOVQ (32)(REG_P1), R12
  311. MOVQ (40)(REG_P1), R13
  312. MOVQ (48)(REG_P1), R14
  313. MOVQ (56)(REG_P1), R15
  314. MOVQ (64)(REG_P1), CX
  315. ADDQ (REG_P2), R8
  316. ADCQ (8)(REG_P2), R9
  317. ADCQ (16)(REG_P2), R10
  318. ADCQ (24)(REG_P2), R11
  319. ADCQ (32)(REG_P2), R12
  320. ADCQ (40)(REG_P2), R13
  321. ADCQ (48)(REG_P2), R14
  322. ADCQ (56)(REG_P2), R15
  323. ADCQ (64)(REG_P2), CX
  324. MOVQ (72)(REG_P1), AX
  325. ADCQ (72)(REG_P2), AX
  326. MOVQ AX, (72)(REG_P3)
  327. MOVQ (80)(REG_P1), AX
  328. ADCQ (80)(REG_P2), AX
  329. MOVQ AX, (80)(REG_P3)
  330. MOVQ (88)(REG_P1), AX
  331. ADCQ (88)(REG_P2), AX
  332. MOVQ AX, (88)(REG_P3)
  333. MOVQ P751X2_0, AX
  334. SUBQ AX, R8
  335. MOVQ P751X2_1, AX
  336. SBBQ AX, R9
  337. SBBQ AX, R10
  338. SBBQ AX, R11
  339. SBBQ AX, R12
  340. MOVQ P751X2_5, AX
  341. SBBQ AX, R13
  342. MOVQ P751X2_6, AX
  343. SBBQ AX, R14
  344. MOVQ P751X2_7, AX
  345. SBBQ AX, R15
  346. MOVQ P751X2_8, AX
  347. SBBQ AX, CX
  348. MOVQ R8, (REG_P3)
  349. MOVQ R9, (8)(REG_P3)
  350. MOVQ R10, (16)(REG_P3)
  351. MOVQ R11, (24)(REG_P3)
  352. MOVQ R12, (32)(REG_P3)
  353. MOVQ R13, (40)(REG_P3)
  354. MOVQ R14, (48)(REG_P3)
  355. MOVQ R15, (56)(REG_P3)
  356. MOVQ CX, (64)(REG_P3)
  357. MOVQ (72)(REG_P3), R8
  358. MOVQ (80)(REG_P3), R9
  359. MOVQ (88)(REG_P3), R10
  360. MOVQ P751X2_9, AX
  361. SBBQ AX, R8
  362. MOVQ P751X2_10, AX
  363. SBBQ AX, R9
  364. MOVQ P751X2_11, AX
  365. SBBQ AX, R10
  366. MOVQ R8, (72)(REG_P3)
  367. MOVQ R9, (80)(REG_P3)
  368. MOVQ R10, (88)(REG_P3)
  369. ZERO_AX_WITHOUT_CLOBBERING_FLAGS
  370. SBBQ $0, AX
  371. MOVQ P751X2_0, SI
  372. ANDQ AX, SI
  373. MOVQ P751X2_1, R8
  374. ANDQ AX, R8
  375. MOVQ P751X2_5, R9
  376. ANDQ AX, R9
  377. MOVQ P751X2_6, R10
  378. ANDQ AX, R10
  379. MOVQ P751X2_7, R11
  380. ANDQ AX, R11
  381. MOVQ P751X2_8, R12
  382. ANDQ AX, R12
  383. MOVQ P751X2_9, R13
  384. ANDQ AX, R13
  385. MOVQ P751X2_10, R14
  386. ANDQ AX, R14
  387. MOVQ P751X2_11, R15
  388. ANDQ AX, R15
  389. MOVQ (REG_P3), AX
  390. ADDQ SI, AX
  391. MOVQ AX, (REG_P3)
  392. MOVQ (8)(REG_P3), AX
  393. ADCQ R8, AX
  394. MOVQ AX, (8)(REG_P3)
  395. MOVQ (16)(REG_P3), AX
  396. ADCQ R8, AX
  397. MOVQ AX, (16)(REG_P3)
  398. MOVQ (24)(REG_P3), AX
  399. ADCQ R8, AX
  400. MOVQ AX, (24)(REG_P3)
  401. MOVQ (32)(REG_P3), AX
  402. ADCQ R8, AX
  403. MOVQ AX, (32)(REG_P3)
  404. MOVQ (40)(REG_P3), AX
  405. ADCQ R9, AX
  406. MOVQ AX, (40)(REG_P3)
  407. MOVQ (48)(REG_P3), AX
  408. ADCQ R10, AX
  409. MOVQ AX, (48)(REG_P3)
  410. MOVQ (56)(REG_P3), AX
  411. ADCQ R11, AX
  412. MOVQ AX, (56)(REG_P3)
  413. MOVQ (64)(REG_P3), AX
  414. ADCQ R12, AX
  415. MOVQ AX, (64)(REG_P3)
  416. MOVQ (72)(REG_P3), AX
  417. ADCQ R13, AX
  418. MOVQ AX, (72)(REG_P3)
  419. MOVQ (80)(REG_P3), AX
  420. ADCQ R14, AX
  421. MOVQ AX, (80)(REG_P3)
  422. MOVQ (88)(REG_P3), AX
  423. ADCQ R15, AX
  424. MOVQ AX, (88)(REG_P3)
  425. RET
  426. TEXT ·fp751SubReduced(SB), NOSPLIT, $0-24
  427. MOVQ z+0(FP), REG_P3
  428. MOVQ x+8(FP), REG_P1
  429. MOVQ y+16(FP), REG_P2
  430. MOVQ (REG_P1), R8
  431. MOVQ (8)(REG_P1), R9
  432. MOVQ (16)(REG_P1), R10
  433. MOVQ (24)(REG_P1), R11
  434. MOVQ (32)(REG_P1), R12
  435. MOVQ (40)(REG_P1), R13
  436. MOVQ (48)(REG_P1), R14
  437. MOVQ (56)(REG_P1), R15
  438. MOVQ (64)(REG_P1), CX
  439. SUBQ (REG_P2), R8
  440. SBBQ (8)(REG_P2), R9
  441. SBBQ (16)(REG_P2), R10
  442. SBBQ (24)(REG_P2), R11
  443. SBBQ (32)(REG_P2), R12
  444. SBBQ (40)(REG_P2), R13
  445. SBBQ (48)(REG_P2), R14
  446. SBBQ (56)(REG_P2), R15
  447. SBBQ (64)(REG_P2), CX
  448. MOVQ R8, (REG_P3)
  449. MOVQ R9, (8)(REG_P3)
  450. MOVQ R10, (16)(REG_P3)
  451. MOVQ R11, (24)(REG_P3)
  452. MOVQ R12, (32)(REG_P3)
  453. MOVQ R13, (40)(REG_P3)
  454. MOVQ R14, (48)(REG_P3)
  455. MOVQ R15, (56)(REG_P3)
  456. MOVQ CX, (64)(REG_P3)
  457. MOVQ (72)(REG_P1), AX
  458. SBBQ (72)(REG_P2), AX
  459. MOVQ AX, (72)(REG_P3)
  460. MOVQ (80)(REG_P1), AX
  461. SBBQ (80)(REG_P2), AX
  462. MOVQ AX, (80)(REG_P3)
  463. MOVQ (88)(REG_P1), AX
  464. SBBQ (88)(REG_P2), AX
  465. MOVQ AX, (88)(REG_P3)
  466. ZERO_AX_WITHOUT_CLOBBERING_FLAGS
  467. SBBQ $0, AX
  468. MOVQ P751X2_0, SI
  469. ANDQ AX, SI
  470. MOVQ P751X2_1, R8
  471. ANDQ AX, R8
  472. MOVQ P751X2_5, R9
  473. ANDQ AX, R9
  474. MOVQ P751X2_6, R10
  475. ANDQ AX, R10
  476. MOVQ P751X2_7, R11
  477. ANDQ AX, R11
  478. MOVQ P751X2_8, R12
  479. ANDQ AX, R12
  480. MOVQ P751X2_9, R13
  481. ANDQ AX, R13
  482. MOVQ P751X2_10, R14
  483. ANDQ AX, R14
  484. MOVQ P751X2_11, R15
  485. ANDQ AX, R15
  486. MOVQ (REG_P3), AX
  487. ADDQ SI, AX
  488. MOVQ AX, (REG_P3)
  489. MOVQ (8)(REG_P3), AX
  490. ADCQ R8, AX
  491. MOVQ AX, (8)(REG_P3)
  492. MOVQ (16)(REG_P3), AX
  493. ADCQ R8, AX
  494. MOVQ AX, (16)(REG_P3)
  495. MOVQ (24)(REG_P3), AX
  496. ADCQ R8, AX
  497. MOVQ AX, (24)(REG_P3)
  498. MOVQ (32)(REG_P3), AX
  499. ADCQ R8, AX
  500. MOVQ AX, (32)(REG_P3)
  501. MOVQ (40)(REG_P3), AX
  502. ADCQ R9, AX
  503. MOVQ AX, (40)(REG_P3)
  504. MOVQ (48)(REG_P3), AX
  505. ADCQ R10, AX
  506. MOVQ AX, (48)(REG_P3)
  507. MOVQ (56)(REG_P3), AX
  508. ADCQ R11, AX
  509. MOVQ AX, (56)(REG_P3)
  510. MOVQ (64)(REG_P3), AX
  511. ADCQ R12, AX
  512. MOVQ AX, (64)(REG_P3)
  513. MOVQ (72)(REG_P3), AX
  514. ADCQ R13, AX
  515. MOVQ AX, (72)(REG_P3)
  516. MOVQ (80)(REG_P3), AX
  517. ADCQ R14, AX
  518. MOVQ AX, (80)(REG_P3)
  519. MOVQ (88)(REG_P3), AX
  520. ADCQ R15, AX
  521. MOVQ AX, (88)(REG_P3)
  522. RET
  523. TEXT ·fp751Mul(SB), $96-24
  524. // Here we store the destination in CX instead of in REG_P3 because the
  525. // multiplication instructions use DX as an implicit destination
  526. // operand: MULQ $REG sets DX:AX <-- AX * $REG.
  527. MOVQ z+0(FP), CX
  528. MOVQ x+8(FP), REG_P1
  529. MOVQ y+16(FP), REG_P2
  530. XORQ AX, AX
  531. MOVQ (48)(REG_P1), R8
  532. MOVQ (56)(REG_P1), R9
  533. MOVQ (64)(REG_P1), R10
  534. MOVQ (72)(REG_P1), R11
  535. MOVQ (80)(REG_P1), R12
  536. MOVQ (88)(REG_P1), R13
  537. ADDQ (REG_P1), R8
  538. ADCQ (8)(REG_P1), R9
  539. ADCQ (16)(REG_P1), R10
  540. ADCQ (24)(REG_P1), R11
  541. ADCQ (32)(REG_P1), R12
  542. ADCQ (40)(REG_P1), R13
  543. MOVQ R8, (CX)
  544. MOVQ R9, (8)(CX)
  545. MOVQ R10, (16)(CX)
  546. MOVQ R11, (24)(CX)
  547. MOVQ R12, (32)(CX)
  548. MOVQ R13, (40)(CX)
  549. SBBQ $0, AX
  550. XORQ DX, DX
  551. MOVQ (48)(REG_P2), R8
  552. MOVQ (56)(REG_P2), R9
  553. MOVQ (64)(REG_P2), R10
  554. MOVQ (72)(REG_P2), R11
  555. MOVQ (80)(REG_P2), R12
  556. MOVQ (88)(REG_P2), R13
  557. ADDQ (REG_P2), R8
  558. ADCQ (8)(REG_P2), R9
  559. ADCQ (16)(REG_P2), R10
  560. ADCQ (24)(REG_P2), R11
  561. ADCQ (32)(REG_P2), R12
  562. ADCQ (40)(REG_P2), R13
  563. MOVQ R8, (48)(CX)
  564. MOVQ R9, (56)(CX)
  565. MOVQ R10, (64)(CX)
  566. MOVQ R11, (72)(CX)
  567. MOVQ R12, (80)(CX)
  568. MOVQ R13, (88)(CX)
  569. SBBQ $0, DX
  570. MOVQ AX, (80)(SP)
  571. MOVQ DX, (88)(SP)
  572. // (SP[0-8],R10,R8,R9) <- (AH+AL)*(BH+BL)
  573. MOVQ (CX), R11
  574. MOVQ R8, AX
  575. MULQ R11
  576. MOVQ AX, (SP) // c0
  577. MOVQ DX, R14
  578. XORQ R15, R15
  579. MOVQ R9, AX
  580. MULQ R11
  581. XORQ R9, R9
  582. ADDQ AX, R14
  583. ADCQ DX, R9
  584. MOVQ (8)(CX), R12
  585. MOVQ R8, AX
  586. MULQ R12
  587. ADDQ AX, R14
  588. MOVQ R14, (8)(SP) // c1
  589. ADCQ DX, R9
  590. ADCQ $0, R15
  591. XORQ R8, R8
  592. MOVQ R10, AX
  593. MULQ R11
  594. ADDQ AX, R9
  595. MOVQ (48)(CX), R13
  596. ADCQ DX, R15
  597. ADCQ $0, R8
  598. MOVQ (16)(CX), AX
  599. MULQ R13
  600. ADDQ AX, R9
  601. ADCQ DX, R15
  602. MOVQ (56)(CX), AX
  603. ADCQ $0, R8
  604. MULQ R12
  605. ADDQ AX, R9
  606. MOVQ R9, (16)(SP) // c2
  607. ADCQ DX, R15
  608. ADCQ $0, R8
  609. XORQ R9, R9
  610. MOVQ (72)(CX), AX
  611. MULQ R11
  612. ADDQ AX, R15
  613. ADCQ DX, R8
  614. ADCQ $0, R9
  615. MOVQ (24)(CX), AX
  616. MULQ R13
  617. ADDQ AX, R15
  618. ADCQ DX, R8
  619. ADCQ $0, R9
  620. MOVQ R10, AX
  621. MULQ R12
  622. ADDQ AX, R15
  623. ADCQ DX, R8
  624. ADCQ $0, R9
  625. MOVQ (16)(CX), R14
  626. MOVQ (56)(CX), AX
  627. MULQ R14
  628. ADDQ AX, R15
  629. MOVQ R15, (24)(SP) // c3
  630. ADCQ DX, R8
  631. ADCQ $0, R9
  632. XORQ R10, R10
  633. MOVQ (80)(CX), AX
  634. MULQ R11
  635. ADDQ AX, R8
  636. ADCQ DX, R9
  637. ADCQ $0, R10
  638. MOVQ (64)(CX), AX
  639. MULQ R14
  640. ADDQ AX, R8
  641. ADCQ DX, R9
  642. ADCQ $0, R10
  643. MOVQ (48)(CX), R15
  644. MOVQ (32)(CX), AX
  645. MULQ R15
  646. ADDQ AX, R8
  647. ADCQ DX, R9
  648. ADCQ $0, R10
  649. MOVQ (72)(CX), AX
  650. MULQ R12
  651. ADDQ AX, R8
  652. ADCQ DX, R9
  653. ADCQ $0, R10
  654. MOVQ (24)(CX), R13
  655. MOVQ (56)(CX), AX
  656. MULQ R13
  657. ADDQ AX, R8
  658. MOVQ R8, (32)(SP) // c4
  659. ADCQ DX, R9
  660. ADCQ $0, R10
  661. XORQ R8, R8
  662. MOVQ (88)(CX), AX
  663. MULQ R11
  664. ADDQ AX, R9
  665. ADCQ DX, R10
  666. ADCQ $0, R8
  667. MOVQ (64)(CX), AX
  668. MULQ R13
  669. ADDQ AX, R9
  670. ADCQ DX, R10
  671. ADCQ $0, R8
  672. MOVQ (72)(CX), AX
  673. MULQ R14
  674. ADDQ AX, R9
  675. ADCQ DX, R10
  676. ADCQ $0, R8
  677. MOVQ (40)(CX), AX
  678. MULQ R15
  679. ADDQ AX, R9
  680. ADCQ DX, R10
  681. ADCQ $0, R8
  682. MOVQ (80)(CX), AX
  683. MULQ R12
  684. ADDQ AX, R9
  685. ADCQ DX, R10
  686. ADCQ $0, R8
  687. MOVQ (32)(CX), R15
  688. MOVQ (56)(CX), AX
  689. MULQ R15
  690. ADDQ AX, R9
  691. MOVQ R9, (40)(SP) // c5
  692. ADCQ DX, R10
  693. ADCQ $0, R8
  694. XORQ R9, R9
  695. MOVQ (64)(CX), AX
  696. MULQ R15
  697. ADDQ AX, R10
  698. ADCQ DX, R8
  699. ADCQ $0, R9
  700. MOVQ (88)(CX), AX
  701. MULQ R12
  702. ADDQ AX, R10
  703. ADCQ DX, R8
  704. ADCQ $0, R9
  705. MOVQ (80)(CX), AX
  706. MULQ R14
  707. ADDQ AX, R10
  708. ADCQ DX, R8
  709. ADCQ $0, R9
  710. MOVQ (40)(CX), R11
  711. MOVQ (56)(CX), AX
  712. MULQ R11
  713. ADDQ AX, R10
  714. ADCQ DX, R8
  715. ADCQ $0, R9
  716. MOVQ (72)(CX), AX
  717. MULQ R13
  718. ADDQ AX, R10
  719. MOVQ R10, (48)(SP) // c6
  720. ADCQ DX, R8
  721. ADCQ $0, R9
  722. XORQ R10, R10
  723. MOVQ (88)(CX), AX
  724. MULQ R14
  725. ADDQ AX, R8
  726. ADCQ DX, R9
  727. ADCQ $0, R10
  728. MOVQ (64)(CX), AX
  729. MULQ R11
  730. ADDQ AX, R8
  731. ADCQ DX, R9
  732. ADCQ $0, R10
  733. MOVQ (80)(CX), AX
  734. MULQ R13
  735. ADDQ AX, R8
  736. ADCQ DX, R9
  737. ADCQ $0, R10
  738. MOVQ (72)(CX), AX
  739. MULQ R15
  740. ADDQ AX, R8
  741. MOVQ R8, (56)(SP) // c7
  742. ADCQ DX, R9
  743. ADCQ $0, R10
  744. XORQ R8, R8
  745. MOVQ (72)(CX), AX
  746. MULQ R11
  747. ADDQ AX, R9
  748. ADCQ DX, R10
  749. ADCQ $0, R8
  750. MOVQ (80)(CX), AX
  751. MULQ R15
  752. ADDQ AX, R9
  753. ADCQ DX, R10
  754. ADCQ $0, R8
  755. MOVQ (88)(CX), AX
  756. MULQ R13
  757. ADDQ AX, R9
  758. MOVQ R9, (64)(SP) // c8
  759. ADCQ DX, R10
  760. ADCQ $0, R8
  761. XORQ R9, R9
  762. MOVQ (88)(CX), AX
  763. MULQ R15
  764. ADDQ AX, R10
  765. ADCQ DX, R8
  766. ADCQ $0, R9
  767. MOVQ (80)(CX), AX
  768. MULQ R11
  769. ADDQ AX, R10 // c9
  770. ADCQ DX, R8
  771. ADCQ $0, R9
  772. MOVQ (88)(CX), AX
  773. MULQ R11
  774. ADDQ AX, R8 // c10
  775. ADCQ DX, R9 // c11
  776. MOVQ (88)(SP), AX
  777. MOVQ (CX), DX
  778. ANDQ AX, R12
  779. ANDQ AX, R14
  780. ANDQ AX, DX
  781. ANDQ AX, R13
  782. ANDQ AX, R15
  783. ANDQ AX, R11
  784. MOVQ (48)(SP), AX
  785. ADDQ AX, DX
  786. MOVQ (56)(SP), AX
  787. ADCQ AX, R12
  788. MOVQ (64)(SP), AX
  789. ADCQ AX, R14
  790. ADCQ R10, R13
  791. ADCQ R8, R15
  792. ADCQ R9, R11
  793. MOVQ (80)(SP), AX
  794. MOVQ DX, (48)(SP)
  795. MOVQ R12, (56)(SP)
  796. MOVQ R14, (64)(SP)
  797. MOVQ R13, (72)(SP)
  798. MOVQ R15, (80)(SP)
  799. MOVQ R11, (88)(SP)
  800. MOVQ (48)(CX), R8
  801. MOVQ (56)(CX), R9
  802. MOVQ (64)(CX), R10
  803. MOVQ (72)(CX), R11
  804. MOVQ (80)(CX), R12
  805. MOVQ (88)(CX), R13
  806. ANDQ AX, R8
  807. ANDQ AX, R9
  808. ANDQ AX, R10
  809. ANDQ AX, R11
  810. ANDQ AX, R12
  811. ANDQ AX, R13
  812. MOVQ (48)(SP), AX
  813. ADDQ AX, R8
  814. MOVQ (56)(SP), AX
  815. ADCQ AX, R9
  816. MOVQ (64)(SP), AX
  817. ADCQ AX, R10
  818. MOVQ (72)(SP), AX
  819. ADCQ AX, R11
  820. MOVQ (80)(SP), AX
  821. ADCQ AX, R12
  822. MOVQ (88)(SP), AX
  823. ADCQ AX, R13
  824. MOVQ R8, (48)(SP)
  825. MOVQ R9, (56)(SP)
  826. MOVQ R11, (72)(SP)
  827. // CX[0-11] <- AL*BL
  828. MOVQ (REG_P1), R11
  829. MOVQ (REG_P2), AX
  830. MULQ R11
  831. XORQ R9, R9
  832. MOVQ AX, (CX) // c0
  833. MOVQ R10, (64)(SP)
  834. MOVQ DX, R8
  835. MOVQ (8)(REG_P2), AX
  836. MULQ R11
  837. XORQ R10, R10
  838. ADDQ AX, R8
  839. MOVQ R12, (80)(SP)
  840. ADCQ DX, R9
  841. MOVQ (8)(REG_P1), R12
  842. MOVQ (REG_P2), AX
  843. MULQ R12
  844. ADDQ AX, R8
  845. MOVQ R8, (8)(CX) // c1
  846. ADCQ DX, R9
  847. MOVQ R13, (88)(SP)
  848. ADCQ $0, R10
  849. XORQ R8, R8
  850. MOVQ (16)(REG_P2), AX
  851. MULQ R11
  852. ADDQ AX, R9
  853. ADCQ DX, R10
  854. ADCQ $0, R8
  855. MOVQ (REG_P2), R13
  856. MOVQ (16)(REG_P1), AX
  857. MULQ R13
  858. ADDQ AX, R9
  859. ADCQ DX, R10
  860. ADCQ $0, R8
  861. MOVQ (8)(REG_P2), AX
  862. MULQ R12
  863. ADDQ AX, R9
  864. MOVQ R9, (16)(CX) // c2
  865. ADCQ DX, R10
  866. ADCQ $0, R8
  867. XORQ R9, R9
  868. MOVQ (24)(REG_P2), AX
  869. MULQ R11
  870. ADDQ AX, R10
  871. ADCQ DX, R8
  872. ADCQ $0, R9
  873. MOVQ (24)(REG_P1), AX
  874. MULQ R13
  875. ADDQ AX, R10
  876. ADCQ DX, R8
  877. ADCQ $0, R9
  878. MOVQ (16)(REG_P2), AX
  879. MULQ R12
  880. ADDQ AX, R10
  881. ADCQ DX, R8
  882. ADCQ $0, R9
  883. MOVQ (16)(REG_P1), R14
  884. MOVQ (8)(REG_P2), AX
  885. MULQ R14
  886. ADDQ AX, R10
  887. MOVQ R10, (24)(CX) // c3
  888. ADCQ DX, R8
  889. ADCQ $0, R9
  890. XORQ R10, R10
  891. MOVQ (32)(REG_P2), AX
  892. MULQ R11
  893. ADDQ AX, R8
  894. ADCQ DX, R9
  895. ADCQ $0, R10
  896. MOVQ (16)(REG_P2), AX
  897. MULQ R14
  898. ADDQ AX, R8
  899. ADCQ DX, R9
  900. ADCQ $0, R10
  901. MOVQ (32)(REG_P1), AX
  902. MULQ R13
  903. ADDQ AX, R8
  904. ADCQ DX, R9
  905. ADCQ $0, R10
  906. MOVQ (24)(REG_P2), AX
  907. MULQ R12
  908. ADDQ AX, R8
  909. ADCQ DX, R9
  910. ADCQ $0, R10
  911. MOVQ (24)(REG_P1), R13
  912. MOVQ (8)(REG_P2), AX
  913. MULQ R13
  914. ADDQ AX, R8
  915. MOVQ R8, (32)(CX) // c4
  916. ADCQ DX, R9
  917. ADCQ $0, R10
  918. XORQ R8, R8
  919. MOVQ (40)(REG_P2), AX
  920. MULQ R11
  921. ADDQ AX, R9
  922. ADCQ DX, R10
  923. ADCQ $0, R8
  924. MOVQ (16)(REG_P2), AX
  925. MULQ R13
  926. ADDQ AX, R9
  927. ADCQ DX, R10
  928. ADCQ $0, R8
  929. MOVQ (24)(REG_P2), AX
  930. MULQ R14
  931. ADDQ AX, R9
  932. ADCQ DX, R10
  933. ADCQ $0, R8
  934. MOVQ (40)(REG_P1), R11
  935. MOVQ (REG_P2), AX
  936. MULQ R11
  937. ADDQ AX, R9
  938. ADCQ DX, R10
  939. ADCQ $0, R8
  940. MOVQ (32)(REG_P2), AX
  941. MULQ R12
  942. ADDQ AX, R9
  943. ADCQ DX, R10
  944. ADCQ $0, R8
  945. MOVQ (32)(REG_P1), R15
  946. MOVQ (8)(REG_P2), AX
  947. MULQ R15
  948. ADDQ AX, R9
  949. MOVQ R9, (40)(CX) //c5
  950. ADCQ DX, R10
  951. ADCQ $0, R8
  952. XORQ R9, R9
  953. MOVQ (16)(REG_P2), AX
  954. MULQ R15
  955. ADDQ AX, R10
  956. ADCQ DX, R8
  957. ADCQ $0, R9
  958. MOVQ (40)(REG_P2), AX
  959. MULQ R12
  960. ADDQ AX, R10
  961. ADCQ DX, R8
  962. ADCQ $0, R9
  963. MOVQ (32)(REG_P2), AX
  964. MULQ R14
  965. ADDQ AX, R10
  966. ADCQ DX, R8
  967. ADCQ $0, R9
  968. MOVQ (8)(REG_P2), AX
  969. MULQ R11
  970. ADDQ AX, R10
  971. ADCQ DX, R8
  972. ADCQ $0, R9
  973. MOVQ (24)(REG_P2), AX
  974. MULQ R13
  975. ADDQ AX, R10
  976. MOVQ R10, (48)(CX) // c6
  977. ADCQ DX, R8
  978. ADCQ $0, R9
  979. XORQ R10, R10
  980. MOVQ (40)(REG_P2), AX
  981. MULQ R14
  982. ADDQ AX, R8
  983. ADCQ DX, R9
  984. ADCQ $0, R10
  985. MOVQ (16)(REG_P2), AX
  986. MULQ R11
  987. ADDQ AX, R8
  988. ADCQ DX, R9
  989. ADCQ $0, R10
  990. MOVQ (32)(REG_P2), AX
  991. MULQ R13
  992. ADDQ AX, R8
  993. ADCQ DX, R9
  994. ADCQ $0, R10
  995. MOVQ (24)(REG_P2), AX
  996. MULQ R15
  997. ADDQ AX, R8
  998. MOVQ R8, (56)(CX) // c7
  999. ADCQ DX, R9
  1000. ADCQ $0, R10
  1001. XORQ R8, R8
  1002. MOVQ (24)(REG_P2), AX
  1003. MULQ R11
  1004. ADDQ AX, R9
  1005. ADCQ DX, R10
  1006. ADCQ $0, R8
  1007. MOVQ (32)(REG_P2), AX
  1008. MULQ R15
  1009. ADDQ AX, R9
  1010. ADCQ DX, R10
  1011. ADCQ $0, R8
  1012. MOVQ (40)(REG_P2), AX
  1013. MULQ R13
  1014. ADDQ AX, R9
  1015. MOVQ R9, (64)(CX) // c8
  1016. ADCQ DX, R10
  1017. ADCQ $0, R8
  1018. XORQ R9, R9
  1019. MOVQ (40)(REG_P2), AX
  1020. MULQ R15
  1021. ADDQ AX, R10
  1022. ADCQ DX, R8
  1023. ADCQ $0, R9
  1024. MOVQ (32)(REG_P2), AX
  1025. MULQ R11
  1026. ADDQ AX, R10
  1027. MOVQ R10, (72)(CX) // c9
  1028. ADCQ DX, R8
  1029. ADCQ $0, R9
  1030. MOVQ (40)(REG_P2), AX
  1031. MULQ R11
  1032. ADDQ AX, R8
  1033. MOVQ R8, (80)(CX) // c10
  1034. ADCQ DX, R9
  1035. MOVQ R9, (88)(CX) // c11
  1036. // CX[12-23] <- AH*BH
  1037. MOVQ (48)(REG_P1), R11
  1038. MOVQ (48)(REG_P2), AX
  1039. MULQ R11
  1040. XORQ R9, R9
  1041. MOVQ AX, (96)(CX) // c0
  1042. MOVQ DX, R8
  1043. MOVQ (56)(REG_P2), AX
  1044. MULQ R11
  1045. XORQ R10, R10
  1046. ADDQ AX, R8
  1047. ADCQ DX, R9
  1048. MOVQ (56)(REG_P1), R12
  1049. MOVQ (48)(REG_P2), AX
  1050. MULQ R12
  1051. ADDQ AX, R8
  1052. MOVQ R8, (104)(CX) // c1
  1053. ADCQ DX, R9
  1054. ADCQ $0, R10
  1055. XORQ R8, R8
  1056. MOVQ (64)(REG_P2), AX
  1057. MULQ R11
  1058. ADDQ AX, R9
  1059. ADCQ DX, R10
  1060. ADCQ $0, R8
  1061. MOVQ (48)(REG_P2), R13
  1062. MOVQ (64)(REG_P1), AX
  1063. MULQ R13
  1064. ADDQ AX, R9
  1065. ADCQ DX, R10
  1066. ADCQ $0, R8
  1067. MOVQ (56)(REG_P2), AX
  1068. MULQ R12
  1069. ADDQ AX, R9
  1070. MOVQ R9, (112)(CX) // c2
  1071. ADCQ DX, R10
  1072. ADCQ $0, R8
  1073. XORQ R9, R9
  1074. MOVQ (72)(REG_P2), AX
  1075. MULQ R11
  1076. ADDQ AX, R10
  1077. ADCQ DX, R8
  1078. ADCQ $0, R9
  1079. MOVQ (72)(REG_P1), AX
  1080. MULQ R13
  1081. ADDQ AX, R10
  1082. ADCQ DX, R8
  1083. ADCQ $0, R9
  1084. MOVQ (64)(REG_P2), AX
  1085. MULQ R12
  1086. ADDQ AX, R10
  1087. ADCQ DX, R8
  1088. ADCQ $0, R9
  1089. MOVQ (64)(REG_P1), R14
  1090. MOVQ (56)(REG_P2), AX
  1091. MULQ R14
  1092. ADDQ AX, R10
  1093. MOVQ R10, (120)(CX) // c3
  1094. ADCQ DX, R8
  1095. ADCQ $0, R9
  1096. XORQ R10, R10
  1097. MOVQ (80)(REG_P2), AX
  1098. MULQ R11
  1099. ADDQ AX, R8
  1100. ADCQ DX, R9
  1101. ADCQ $0, R10
  1102. MOVQ (64)(REG_P2), AX
  1103. MULQ R14
  1104. ADDQ AX, R8
  1105. ADCQ DX, R9
  1106. ADCQ $0, R10
  1107. MOVQ (80)(REG_P1), R15
  1108. MOVQ R13, AX
  1109. MULQ R15
  1110. ADDQ AX, R8
  1111. ADCQ DX, R9
  1112. ADCQ $0, R10
  1113. MOVQ (72)(REG_P2), AX
  1114. MULQ R12
  1115. ADDQ AX, R8
  1116. ADCQ DX, R9
  1117. ADCQ $0, R10
  1118. MOVQ (72)(REG_P1), R13
  1119. MOVQ (56)(REG_P2), AX
  1120. MULQ R13
  1121. ADDQ AX, R8
  1122. MOVQ R8, (128)(CX) // c4
  1123. ADCQ DX, R9
  1124. ADCQ $0, R10
  1125. XORQ R8, R8
  1126. MOVQ (88)(REG_P2), AX
  1127. MULQ R11
  1128. ADDQ AX, R9
  1129. ADCQ DX, R10
  1130. ADCQ $0, R8
  1131. MOVQ (64)(REG_P2), AX
  1132. MULQ R13
  1133. ADDQ AX, R9
  1134. ADCQ DX, R10
  1135. ADCQ $0, R8
  1136. MOVQ (72)(REG_P2), AX
  1137. MULQ R14
  1138. ADDQ AX, R9
  1139. ADCQ DX, R10
  1140. ADCQ $0, R8
  1141. MOVQ (88)(REG_P1), R11
  1142. MOVQ (48)(REG_P2), AX
  1143. MULQ R11
  1144. ADDQ AX, R9
  1145. ADCQ DX, R10
  1146. ADCQ $0, R8
  1147. MOVQ (80)(REG_P2), AX
  1148. MULQ R12
  1149. ADDQ AX, R9
  1150. ADCQ DX, R10
  1151. ADCQ $0, R8
  1152. MOVQ (56)(REG_P2), AX
  1153. MULQ R15
  1154. ADDQ AX, R9
  1155. MOVQ R9, (136)(CX) // c5
  1156. ADCQ DX, R10
  1157. ADCQ $0, R8
  1158. XORQ R9, R9
  1159. MOVQ (64)(REG_P2), AX
  1160. MULQ R15
  1161. ADDQ AX, R10
  1162. ADCQ DX, R8
  1163. ADCQ $0, R9
  1164. MOVQ (88)(REG_P2), AX
  1165. MULQ R12
  1166. ADDQ AX, R10
  1167. ADCQ DX, R8
  1168. ADCQ $0, R9
  1169. MOVQ (80)(REG_P2), AX
  1170. MULQ R14
  1171. ADDQ AX, R10
  1172. ADCQ DX, R8
  1173. ADCQ $0, R9
  1174. MOVQ (56)(REG_P2), AX
  1175. MULQ R11
  1176. ADDQ AX, R10
  1177. ADCQ DX, R8
  1178. ADCQ $0, R9
  1179. MOVQ (72)(REG_P2), AX
  1180. MULQ R13
  1181. ADDQ AX, R10
  1182. MOVQ R10, (144)(CX) // c6
  1183. ADCQ DX, R8
  1184. ADCQ $0, R9
  1185. XORQ R10, R10
  1186. MOVQ (88)(REG_P2), AX
  1187. MULQ R14
  1188. ADDQ AX, R8
  1189. ADCQ DX, R9
  1190. ADCQ $0, R10
  1191. MOVQ (64)(REG_P2), AX
  1192. MULQ R11
  1193. ADDQ AX, R8
  1194. ADCQ DX, R9
  1195. ADCQ $0, R10
  1196. MOVQ (80)(REG_P2), AX
  1197. MULQ R13
  1198. ADDQ AX, R8
  1199. ADCQ DX, R9
  1200. ADCQ $0, R10
  1201. MOVQ (72)(REG_P2), AX
  1202. MULQ R15
  1203. ADDQ AX, R8
  1204. MOVQ R8, (152)(CX) // c7
  1205. ADCQ DX, R9
  1206. ADCQ $0, R10
  1207. XORQ R8, R8
  1208. MOVQ (72)(REG_P2), AX
  1209. MULQ R11
  1210. ADDQ AX, R9
  1211. ADCQ DX, R10
  1212. ADCQ $0, R8
  1213. MOVQ (80)(REG_P2), AX
  1214. MULQ R15
  1215. ADDQ AX, R9
  1216. ADCQ DX, R10
  1217. ADCQ $0, R8
  1218. MOVQ (88)(REG_P2), AX
  1219. MULQ R13
  1220. ADDQ AX, R9
  1221. MOVQ R9, (160)(CX) // c8
  1222. ADCQ DX, R10
  1223. ADCQ $0, R8
  1224. MOVQ (88)(REG_P2), AX
  1225. MULQ R15
  1226. ADDQ AX, R10
  1227. ADCQ DX, R8
  1228. MOVQ (80)(REG_P2), AX
  1229. MULQ R11
  1230. ADDQ AX, R10
  1231. MOVQ R10, (168)(CX) // c9
  1232. ADCQ DX, R8
  1233. MOVQ (88)(REG_P2), AX
  1234. MULQ R11
  1235. ADDQ AX, R8
  1236. MOVQ R8, (176)(CX) // c10
  1237. ADCQ $0, DX
  1238. MOVQ DX, (184)(CX) // c11
  1239. // [R8-R15,AX,DX,DI,(SP)] <- (AH+AL)*(BH+BL)-AL*BL
  1240. MOVQ (SP), R8
  1241. SUBQ (CX), R8
  1242. MOVQ (8)(SP), R9
  1243. SBBQ (8)(CX), R9
  1244. MOVQ (16)(SP), R10
  1245. SBBQ (16)(CX), R10
  1246. MOVQ (24)(SP), R11
  1247. SBBQ (24)(CX), R11
  1248. MOVQ (32)(SP), R12
  1249. SBBQ (32)(CX), R12
  1250. MOVQ (40)(SP), R13
  1251. SBBQ (40)(CX), R13
  1252. MOVQ (48)(SP), R14
  1253. SBBQ (48)(CX), R14
  1254. MOVQ (56)(SP), R15
  1255. SBBQ (56)(CX), R15
  1256. MOVQ (64)(SP), AX
  1257. SBBQ (64)(CX), AX
  1258. MOVQ (72)(SP), DX
  1259. SBBQ (72)(CX), DX
  1260. MOVQ (80)(SP), DI
  1261. SBBQ (80)(CX), DI
  1262. MOVQ (88)(SP), SI
  1263. SBBQ (88)(CX), SI
  1264. MOVQ SI, (SP)
  1265. // [R8-R15,AX,DX,DI,(SP)] <- (AH+AL)*(BH+BL) - AL*BL - AH*BH
  1266. MOVQ (96)(CX), SI
  1267. SUBQ SI, R8
  1268. MOVQ (104)(CX), SI
  1269. SBBQ SI, R9
  1270. MOVQ (112)(CX), SI
  1271. SBBQ SI, R10
  1272. MOVQ (120)(CX), SI
  1273. SBBQ SI, R11
  1274. MOVQ (128)(CX), SI
  1275. SBBQ SI, R12
  1276. MOVQ (136)(CX), SI
  1277. SBBQ SI, R13
  1278. MOVQ (144)(CX), SI
  1279. SBBQ SI, R14
  1280. MOVQ (152)(CX), SI
  1281. SBBQ SI, R15
  1282. MOVQ (160)(CX), SI
  1283. SBBQ SI, AX
  1284. MOVQ (168)(CX), SI
  1285. SBBQ SI, DX
  1286. MOVQ (176)(CX), SI
  1287. SBBQ SI, DI
  1288. MOVQ (SP), SI
  1289. SBBQ (184)(CX), SI
  1290. // FINAL RESULT
  1291. ADDQ (48)(CX), R8
  1292. MOVQ R8, (48)(CX)
  1293. ADCQ (56)(CX), R9
  1294. MOVQ R9, (56)(CX)
  1295. ADCQ (64)(CX), R10
  1296. MOVQ R10, (64)(CX)
  1297. ADCQ (72)(CX), R11
  1298. MOVQ R11, (72)(CX)
  1299. ADCQ (80)(CX), R12
  1300. MOVQ R12, (80)(CX)
  1301. ADCQ (88)(CX), R13
  1302. MOVQ R13, (88)(CX)
  1303. ADCQ (96)(CX), R14
  1304. MOVQ R14, (96)(CX)
  1305. ADCQ (104)(CX), R15
  1306. MOVQ R15, (104)(CX)
  1307. ADCQ (112)(CX), AX
  1308. MOVQ AX, (112)(CX)
  1309. ADCQ (120)(CX), DX
  1310. MOVQ DX, (120)(CX)
  1311. ADCQ (128)(CX), DI
  1312. MOVQ DI, (128)(CX)
  1313. ADCQ (136)(CX), SI
  1314. MOVQ SI, (136)(CX)
  1315. MOVQ (144)(CX), AX
  1316. ADCQ $0, AX
  1317. MOVQ AX, (144)(CX)
  1318. MOVQ (152)(CX), AX
  1319. ADCQ $0, AX
  1320. MOVQ AX, (152)(CX)
  1321. MOVQ (160)(CX), AX
  1322. ADCQ $0, AX
  1323. MOVQ AX, (160)(CX)
  1324. MOVQ (168)(CX), AX
  1325. ADCQ $0, AX
  1326. MOVQ AX, (168)(CX)
  1327. MOVQ (176)(CX), AX
  1328. ADCQ $0, AX
  1329. MOVQ AX, (176)(CX)
  1330. MOVQ (184)(CX), AX
  1331. ADCQ $0, AX
  1332. MOVQ AX, (184)(CX)
  1333. RET
  1334. TEXT ·fp751MontgomeryReduce(SB), $0-16
  1335. MOVQ z+0(FP), REG_P2
  1336. MOVQ x+8(FP), REG_P1
  1337. MOVQ (REG_P1), R11
  1338. MOVQ P751P1_5, AX
  1339. MULQ R11
  1340. XORQ R8, R8
  1341. ADDQ (40)(REG_P1), AX
  1342. MOVQ AX, (40)(REG_P2) // Z5
  1343. ADCQ DX, R8
  1344. XORQ R9, R9
  1345. MOVQ P751P1_6, AX
  1346. MULQ R11
  1347. XORQ R10, R10
  1348. ADDQ AX, R8
  1349. ADCQ DX, R9
  1350. MOVQ (8)(REG_P1), R12
  1351. MOVQ P751P1_5, AX
  1352. MULQ R12
  1353. ADDQ AX, R8
  1354. ADCQ DX, R9
  1355. ADCQ $0, R10
  1356. ADDQ (48)(REG_P1), R8
  1357. MOVQ R8, (48)(REG_P2) // Z6
  1358. ADCQ $0, R9
  1359. ADCQ $0, R10
  1360. XORQ R8, R8
  1361. MOVQ P751P1_7, AX
  1362. MULQ R11
  1363. ADDQ AX, R9
  1364. ADCQ DX, R10
  1365. ADCQ $0, R8
  1366. MOVQ P751P1_6, AX
  1367. MULQ R12
  1368. ADDQ AX, R9
  1369. ADCQ DX, R10
  1370. ADCQ $0, R8
  1371. MOVQ (16)(REG_P1), R13
  1372. MOVQ P751P1_5, AX
  1373. MULQ R13
  1374. ADDQ AX, R9
  1375. ADCQ DX, R10
  1376. ADCQ $0, R8
  1377. ADDQ (56)(REG_P1), R9
  1378. MOVQ R9, (56)(REG_P2) // Z7
  1379. ADCQ $0, R10
  1380. ADCQ $0, R8
  1381. XORQ R9, R9
  1382. MOVQ P751P1_8, AX
  1383. MULQ R11
  1384. ADDQ AX, R10
  1385. ADCQ DX, R8
  1386. ADCQ $0, R9
  1387. MOVQ P751P1_7, AX
  1388. MULQ R12
  1389. ADDQ AX, R10
  1390. ADCQ DX, R8
  1391. ADCQ $0, R9
  1392. MOVQ P751P1_6, AX
  1393. MULQ R13
  1394. ADDQ AX, R10
  1395. ADCQ DX, R8
  1396. ADCQ $0, R9
  1397. MOVQ (24)(REG_P1), R14
  1398. MOVQ P751P1_5, AX
  1399. MULQ R14
  1400. ADDQ AX, R10
  1401. ADCQ DX, R8
  1402. ADCQ $0, R9
  1403. ADDQ (64)(REG_P1), R10
  1404. MOVQ R10, (64)(REG_P2) // Z8
  1405. ADCQ $0, R8
  1406. ADCQ $0, R9
  1407. XORQ R10, R10
  1408. MOVQ P751P1_9, AX
  1409. MULQ R11
  1410. ADDQ AX, R8
  1411. ADCQ DX, R9
  1412. ADCQ $0, R10
  1413. MOVQ P751P1_8, AX
  1414. MULQ R12
  1415. ADDQ AX, R8
  1416. ADCQ DX, R9
  1417. ADCQ $0, R10
  1418. MOVQ P751P1_7, AX
  1419. MULQ R13
  1420. ADDQ AX, R8
  1421. ADCQ DX, R9
  1422. ADCQ $0, R10
  1423. MOVQ P751P1_6, AX
  1424. MULQ R14
  1425. ADDQ AX, R8
  1426. ADCQ DX, R9
  1427. ADCQ $0, R10
  1428. MOVQ (32)(REG_P1), R15
  1429. MOVQ P751P1_5, AX
  1430. MULQ R15
  1431. ADDQ AX, R8
  1432. ADCQ DX, R9
  1433. ADCQ $0, R10
  1434. ADDQ (72)(REG_P1), R8
  1435. MOVQ R8, (72)(REG_P2) // Z9
  1436. ADCQ $0, R9
  1437. ADCQ $0, R10
  1438. XORQ R8, R8
  1439. MOVQ P751P1_10, AX
  1440. MULQ R11
  1441. ADDQ AX, R9
  1442. ADCQ DX, R10
  1443. ADCQ $0, R8
  1444. MOVQ P751P1_9, AX
  1445. MULQ R12
  1446. ADDQ AX, R9
  1447. ADCQ DX, R10
  1448. ADCQ $0, R8
  1449. MOVQ P751P1_8, AX
  1450. MULQ R13
  1451. ADDQ AX, R9
  1452. ADCQ DX, R10
  1453. ADCQ $0, R8
  1454. MOVQ P751P1_7, AX
  1455. MULQ R14
  1456. ADDQ AX, R9
  1457. ADCQ DX, R10
  1458. ADCQ $0, R8
  1459. MOVQ P751P1_6, AX
  1460. MULQ R15
  1461. ADDQ AX, R9
  1462. ADCQ DX, R10
  1463. ADCQ $0, R8
  1464. MOVQ (40)(REG_P2), CX
  1465. MOVQ P751P1_5, AX
  1466. MULQ CX
  1467. ADDQ AX, R9
  1468. ADCQ DX, R10
  1469. ADCQ $0, R8
  1470. ADDQ (80)(REG_P1), R9
  1471. MOVQ R9, (80)(REG_P2) // Z10
  1472. ADCQ $0, R10
  1473. ADCQ $0, R8
  1474. XORQ R9, R9
  1475. MOVQ P751P1_11, AX
  1476. MULQ R11
  1477. ADDQ AX, R10
  1478. ADCQ DX, R8
  1479. ADCQ $0, R9
  1480. MOVQ P751P1_10, AX
  1481. MULQ R12
  1482. ADDQ AX, R10
  1483. ADCQ DX, R8
  1484. ADCQ $0, R9
  1485. MOVQ P751P1_9, AX
  1486. MULQ R13
  1487. ADDQ AX, R10
  1488. ADCQ DX, R8
  1489. ADCQ $0, R9
  1490. MOVQ P751P1_8, AX
  1491. MULQ R14
  1492. ADDQ AX, R10
  1493. ADCQ DX, R8
  1494. ADCQ $0, R9
  1495. MOVQ P751P1_7, AX
  1496. MULQ R15
  1497. ADDQ AX, R10
  1498. ADCQ DX, R8
  1499. ADCQ $0, R9
  1500. MOVQ P751P1_6, AX
  1501. MULQ CX
  1502. ADDQ AX, R10
  1503. ADCQ DX, R8
  1504. ADCQ $0, R9
  1505. MOVQ (48)(REG_P2), R11
  1506. MOVQ P751P1_5, AX
  1507. MULQ R11
  1508. ADDQ AX, R10
  1509. ADCQ DX, R8
  1510. ADCQ $0, R9
  1511. ADDQ (88)(REG_P1), R10
  1512. MOVQ R10, (88)(REG_P2) // Z11
  1513. ADCQ $0, R8
  1514. ADCQ $0, R9
  1515. XORQ R10, R10
  1516. MOVQ P751P1_11, AX
  1517. MULQ R12
  1518. ADDQ AX, R8
  1519. ADCQ DX, R9
  1520. ADCQ $0, R10
  1521. MOVQ P751P1_10, AX
  1522. MULQ R13
  1523. ADDQ AX, R8
  1524. ADCQ DX, R9
  1525. ADCQ $0, R10
  1526. MOVQ P751P1_9, AX
  1527. MULQ R14
  1528. ADDQ AX, R8
  1529. ADCQ DX, R9
  1530. ADCQ $0, R10
  1531. MOVQ P751P1_8, AX
  1532. MULQ R15
  1533. ADDQ AX, R8
  1534. ADCQ DX, R9
  1535. ADCQ $0, R10
  1536. MOVQ P751P1_7, AX
  1537. MULQ CX
  1538. ADDQ AX, R8
  1539. ADCQ DX, R9
  1540. ADCQ $0, R10
  1541. MOVQ P751P1_6, AX
  1542. MULQ R11
  1543. ADDQ AX, R8
  1544. ADCQ DX, R9
  1545. ADCQ $0, R10
  1546. MOVQ (56)(REG_P2), R12
  1547. MOVQ P751P1_5, AX
  1548. MULQ R12
  1549. ADDQ AX, R8
  1550. ADCQ DX, R9
  1551. ADCQ $0, R10
  1552. ADDQ (96)(REG_P1), R8
  1553. MOVQ R8, (REG_P2) // Z0
  1554. ADCQ $0, R9
  1555. ADCQ $0, R10
  1556. XORQ R8, R8
  1557. MOVQ P751P1_11, AX
  1558. MULQ R13
  1559. ADDQ AX, R9
  1560. ADCQ DX, R10
  1561. ADCQ $0, R8
  1562. MOVQ P751P1_10, AX
  1563. MULQ R14
  1564. ADDQ AX, R9
  1565. ADCQ DX, R10
  1566. ADCQ $0, R8
  1567. MOVQ P751P1_9, AX
  1568. MULQ R15
  1569. ADDQ AX, R9
  1570. ADCQ DX, R10
  1571. ADCQ $0, R8
  1572. MOVQ P751P1_8, AX
  1573. MULQ CX
  1574. ADDQ AX, R9
  1575. ADCQ DX, R10
  1576. ADCQ $0, R8
  1577. MOVQ P751P1_7, AX
  1578. MULQ R11
  1579. ADDQ AX, R9
  1580. ADCQ DX, R10
  1581. ADCQ $0, R8
  1582. MOVQ P751P1_6, AX
  1583. MULQ R12
  1584. ADDQ AX, R9
  1585. ADCQ DX, R10
  1586. ADCQ $0, R8
  1587. MOVQ (64)(REG_P2), R13
  1588. MOVQ P751P1_5, AX
  1589. MULQ R13
  1590. ADDQ AX, R9
  1591. ADCQ DX, R10
  1592. ADCQ $0, R8
  1593. ADDQ (104)(REG_P1), R9
  1594. MOVQ R9, (8)(REG_P2) // Z1
  1595. ADCQ $0, R10
  1596. ADCQ $0, R8
  1597. XORQ R9, R9
  1598. MOVQ P751P1_11, AX
  1599. MULQ R14
  1600. ADDQ AX, R10
  1601. ADCQ DX, R8
  1602. ADCQ $0, R9
  1603. MOVQ P751P1_10, AX
  1604. MULQ R15
  1605. ADDQ AX, R10
  1606. ADCQ DX, R8
  1607. ADCQ $0, R9
  1608. MOVQ P751P1_9, AX
  1609. MULQ CX
  1610. ADDQ AX, R10
  1611. ADCQ DX, R8
  1612. ADCQ $0, R9
  1613. MOVQ P751P1_8, AX
  1614. MULQ R11
  1615. ADDQ AX, R10
  1616. ADCQ DX, R8
  1617. ADCQ $0, R9
  1618. MOVQ P751P1_7, AX
  1619. MULQ R12
  1620. ADDQ AX, R10
  1621. ADCQ DX, R8
  1622. ADCQ $0, R9
  1623. MOVQ P751P1_6, AX
  1624. MULQ R13
  1625. ADDQ AX, R10
  1626. ADCQ DX, R8
  1627. ADCQ $0, R9
  1628. MOVQ (72)(REG_P2), R14
  1629. MOVQ P751P1_5, AX
  1630. MULQ R14
  1631. ADDQ AX, R10
  1632. ADCQ DX, R8
  1633. ADCQ $0, R9
  1634. ADDQ (112)(REG_P1), R10
  1635. MOVQ R10, (16)(REG_P2) // Z2
  1636. ADCQ $0, R8
  1637. ADCQ $0, R9
  1638. XORQ R10, R10
  1639. MOVQ P751P1_11, AX
  1640. MULQ R15
  1641. ADDQ AX, R8
  1642. ADCQ DX, R9
  1643. ADCQ $0, R10
  1644. MOVQ P751P1_10, AX
  1645. MULQ CX
  1646. ADDQ AX, R8
  1647. ADCQ DX, R9
  1648. ADCQ $0, R10
  1649. MOVQ P751P1_9, AX
  1650. MULQ R11
  1651. ADDQ AX, R8
  1652. ADCQ DX, R9
  1653. ADCQ $0, R10
  1654. MOVQ P751P1_8, AX
  1655. MULQ R12
  1656. ADDQ AX, R8
  1657. ADCQ DX, R9
  1658. ADCQ $0, R10
  1659. MOVQ P751P1_7, AX
  1660. MULQ R13
  1661. ADDQ AX, R8
  1662. ADCQ DX, R9
  1663. ADCQ $0, R10
  1664. MOVQ P751P1_6, AX
  1665. MULQ R14
  1666. ADDQ AX, R8
  1667. ADCQ DX, R9
  1668. ADCQ $0, R10
  1669. MOVQ (80)(REG_P2), R15
  1670. MOVQ P751P1_5, AX
  1671. MULQ R15
  1672. ADDQ AX, R8
  1673. ADCQ DX, R9
  1674. ADCQ $0, R10
  1675. ADDQ (120)(REG_P1), R8
  1676. MOVQ R8, (24)(REG_P2) // Z3
  1677. ADCQ $0, R9
  1678. ADCQ $0, R10
  1679. XORQ R8, R8
  1680. MOVQ P751P1_11, AX
  1681. MULQ CX
  1682. ADDQ AX, R9
  1683. ADCQ DX, R10
  1684. ADCQ $0, R8
  1685. MOVQ P751P1_10, AX
  1686. MULQ R11
  1687. ADDQ AX, R9
  1688. ADCQ DX, R10
  1689. ADCQ $0, R8
  1690. MOVQ P751P1_9, AX
  1691. MULQ R12
  1692. ADDQ AX, R9
  1693. ADCQ DX, R10
  1694. ADCQ $0, R8
  1695. MOVQ P751P1_8, AX
  1696. MULQ R13
  1697. ADDQ AX, R9
  1698. ADCQ DX, R10
  1699. ADCQ $0, R8
  1700. MOVQ P751P1_7, AX
  1701. MULQ R14
  1702. ADDQ AX, R9
  1703. ADCQ DX, R10
  1704. ADCQ $0, R8
  1705. MOVQ P751P1_6, AX
  1706. MULQ R15
  1707. ADDQ AX, R9
  1708. ADCQ DX, R10
  1709. ADCQ $0, R8
  1710. MOVQ (88)(REG_P2), CX
  1711. MOVQ P751P1_5, AX
  1712. MULQ CX
  1713. ADDQ AX, R9
  1714. ADCQ DX, R10
  1715. ADCQ $0, R8
  1716. ADDQ (128)(REG_P1), R9
  1717. MOVQ R9, (32)(REG_P2) // Z4
  1718. ADCQ $0, R10
  1719. ADCQ $0, R8
  1720. XORQ R9, R9
  1721. MOVQ P751P1_11, AX
  1722. MULQ R11
  1723. ADDQ AX, R10
  1724. ADCQ DX, R8
  1725. ADCQ $0, R9
  1726. MOVQ P751P1_10, AX
  1727. MULQ R12
  1728. ADDQ AX, R10
  1729. ADCQ DX, R8
  1730. ADCQ $0, R9
  1731. MOVQ P751P1_9, AX
  1732. MULQ R13
  1733. ADDQ AX, R10
  1734. ADCQ DX, R8
  1735. ADCQ $0, R9
  1736. MOVQ P751P1_8, AX
  1737. MULQ R14
  1738. ADDQ AX, R10
  1739. ADCQ DX, R8
  1740. ADCQ $0, R9
  1741. MOVQ P751P1_7, AX
  1742. MULQ R15
  1743. ADDQ AX, R10
  1744. ADCQ DX, R8
  1745. ADCQ $0, R9
  1746. MOVQ P751P1_6, AX
  1747. MULQ CX
  1748. ADDQ AX, R10
  1749. ADCQ DX, R8
  1750. ADCQ $0, R9
  1751. ADDQ (136)(REG_P1), R10
  1752. MOVQ R10, (40)(REG_P2) // Z5
  1753. ADCQ $0, R8
  1754. ADCQ $0, R9
  1755. XORQ R10, R10
  1756. MOVQ P751P1_11, AX
  1757. MULQ R12
  1758. ADDQ AX, R8
  1759. ADCQ DX, R9
  1760. ADCQ $0, R10
  1761. MOVQ P751P1_10, AX
  1762. MULQ R13
  1763. ADDQ AX, R8
  1764. ADCQ DX, R9
  1765. ADCQ $0, R10
  1766. MOVQ P751P1_9, AX
  1767. MULQ R14
  1768. ADDQ AX, R8
  1769. ADCQ DX, R9
  1770. ADCQ $0, R10
  1771. MOVQ P751P1_8, AX
  1772. MULQ R15
  1773. ADDQ AX, R8
  1774. ADCQ DX, R9
  1775. ADCQ $0, R10
  1776. MOVQ P751P1_7, AX
  1777. MULQ CX
  1778. ADDQ AX, R8
  1779. ADCQ DX, R9
  1780. ADCQ $0, R10
  1781. ADDQ (144)(REG_P1), R8
  1782. MOVQ R8, (48)(REG_P2) // Z6
  1783. ADCQ $0, R9
  1784. ADCQ $0, R10
  1785. XORQ R8, R8
  1786. MOVQ P751P1_11, AX
  1787. MULQ R13
  1788. ADDQ AX, R9
  1789. ADCQ DX, R10
  1790. ADCQ $0, R8
  1791. MOVQ P751P1_10, AX
  1792. MULQ R14
  1793. ADDQ AX, R9
  1794. ADCQ DX, R10
  1795. ADCQ $0, R8
  1796. MOVQ P751P1_9, AX
  1797. MULQ R15
  1798. ADDQ AX, R9
  1799. ADCQ DX, R10
  1800. ADCQ $0, R8
  1801. MOVQ P751P1_8, AX
  1802. MULQ CX
  1803. ADDQ AX, R9
  1804. ADCQ DX, R10
  1805. ADCQ $0, R8
  1806. ADDQ (152)(REG_P1), R9
  1807. MOVQ R9, (56)(REG_P2) // Z7
  1808. ADCQ $0, R10
  1809. ADCQ $0, R8
  1810. XORQ R9, R9
  1811. MOVQ P751P1_11, AX
  1812. MULQ R14
  1813. ADDQ AX, R10
  1814. ADCQ DX, R8
  1815. ADCQ $0, R9
  1816. MOVQ P751P1_10, AX
  1817. MULQ R15
  1818. ADDQ AX, R10
  1819. ADCQ DX, R8
  1820. ADCQ $0, R9
  1821. MOVQ P751P1_9, AX
  1822. MULQ CX
  1823. ADDQ AX, R10
  1824. ADCQ DX, R8
  1825. ADCQ $0, R9
  1826. ADDQ (160)(REG_P1), R10
  1827. MOVQ R10, (64)(REG_P2) // Z8
  1828. ADCQ $0, R8
  1829. ADCQ $0, R9
  1830. XORQ R10, R10
  1831. MOVQ P751P1_11, AX
  1832. MULQ R15
  1833. ADDQ AX, R8
  1834. ADCQ DX, R9
  1835. ADCQ $0, R10
  1836. MOVQ P751P1_10, AX
  1837. MULQ CX
  1838. ADDQ AX, R8
  1839. ADCQ DX, R9
  1840. ADCQ $0, R10
  1841. ADDQ (168)(REG_P1), R8 // Z9
  1842. MOVQ R8, (72)(REG_P2) // Z9
  1843. ADCQ $0, R9
  1844. ADCQ $0, R10
  1845. MOVQ P751P1_11, AX
  1846. MULQ CX
  1847. ADDQ AX, R9
  1848. ADCQ DX, R10
  1849. ADDQ (176)(REG_P1), R9 // Z10
  1850. MOVQ R9, (80)(REG_P2) // Z10
  1851. ADCQ $0, R10
  1852. ADDQ (184)(REG_P1), R10 // Z11
  1853. MOVQ R10, (88)(REG_P2) // Z11
  1854. RET
  1855. TEXT ·fp751AddLazy(SB), NOSPLIT, $0-24
  1856. MOVQ z+0(FP), REG_P3
  1857. MOVQ x+8(FP), REG_P1
  1858. MOVQ y+16(FP), REG_P2
  1859. MOVQ (REG_P1), R8
  1860. MOVQ (8)(REG_P1), R9
  1861. MOVQ (16)(REG_P1), R10
  1862. MOVQ (24)(REG_P1), R11
  1863. MOVQ (32)(REG_P1), R12
  1864. MOVQ (40)(REG_P1), R13
  1865. MOVQ (48)(REG_P1), R14
  1866. MOVQ (56)(REG_P1), R15
  1867. MOVQ (64)(REG_P1), AX
  1868. MOVQ (72)(REG_P1), BX
  1869. MOVQ (80)(REG_P1), CX
  1870. MOVQ (88)(REG_P1), DI
  1871. ADDQ (REG_P2), R8
  1872. ADCQ (8)(REG_P2), R9
  1873. ADCQ (16)(REG_P2), R10
  1874. ADCQ (24)(REG_P2), R11
  1875. ADCQ (32)(REG_P2), R12
  1876. ADCQ (40)(REG_P2), R13
  1877. ADCQ (48)(REG_P2), R14
  1878. ADCQ (56)(REG_P2), R15
  1879. ADCQ (64)(REG_P2), AX
  1880. ADCQ (72)(REG_P2), BX
  1881. ADCQ (80)(REG_P2), CX
  1882. ADCQ (88)(REG_P2), DI
  1883. MOVQ R8, (REG_P3)
  1884. MOVQ R9, (8)(REG_P3)
  1885. MOVQ R10, (16)(REG_P3)
  1886. MOVQ R11, (24)(REG_P3)
  1887. MOVQ R12, (32)(REG_P3)
  1888. MOVQ R13, (40)(REG_P3)
  1889. MOVQ R14, (48)(REG_P3)
  1890. MOVQ R15, (56)(REG_P3)
  1891. MOVQ AX, (64)(REG_P3)
  1892. MOVQ BX, (72)(REG_P3)
  1893. MOVQ CX, (80)(REG_P3)
  1894. MOVQ DI, (88)(REG_P3)
  1895. RET
  1896. TEXT ·fp751X2AddLazy(SB), NOSPLIT, $0-24
  1897. MOVQ z+0(FP), REG_P3
  1898. MOVQ x+8(FP), REG_P1
  1899. MOVQ y+16(FP), REG_P2
  1900. MOVQ (REG_P1), R8
  1901. MOVQ (8)(REG_P1), R9
  1902. MOVQ (16)(REG_P1), R10
  1903. MOVQ (24)(REG_P1), R11
  1904. MOVQ (32)(REG_P1), R12
  1905. MOVQ (40)(REG_P1), R13
  1906. MOVQ (48)(REG_P1), R14
  1907. MOVQ (56)(REG_P1), R15
  1908. MOVQ (64)(REG_P1), AX
  1909. MOVQ (72)(REG_P1), BX
  1910. MOVQ (80)(REG_P1), CX
  1911. ADDQ (REG_P2), R8
  1912. ADCQ (8)(REG_P2), R9
  1913. ADCQ (16)(REG_P2), R10
  1914. ADCQ (24)(REG_P2), R11
  1915. ADCQ (32)(REG_P2), R12
  1916. ADCQ (40)(REG_P2), R13
  1917. ADCQ (48)(REG_P2), R14
  1918. ADCQ (56)(REG_P2), R15
  1919. ADCQ (64)(REG_P2), AX
  1920. ADCQ (72)(REG_P2), BX
  1921. ADCQ (80)(REG_P2), CX
  1922. MOVQ R8, (REG_P3)
  1923. MOVQ R9, (8)(REG_P3)
  1924. MOVQ R10, (16)(REG_P3)
  1925. MOVQ R11, (24)(REG_P3)
  1926. MOVQ R12, (32)(REG_P3)
  1927. MOVQ R13, (40)(REG_P3)
  1928. MOVQ R14, (48)(REG_P3)
  1929. MOVQ R15, (56)(REG_P3)
  1930. MOVQ AX, (64)(REG_P3)
  1931. MOVQ BX, (72)(REG_P3)
  1932. MOVQ CX, (80)(REG_P3)
  1933. MOVQ (88)(REG_P1), AX
  1934. ADCQ (88)(REG_P2), AX
  1935. MOVQ AX, (88)(REG_P3)
  1936. MOVQ (96)(REG_P1), R8
  1937. MOVQ (104)(REG_P1), R9
  1938. MOVQ (112)(REG_P1), R10
  1939. MOVQ (120)(REG_P1), R11
  1940. MOVQ (128)(REG_P1), R12
  1941. MOVQ (136)(REG_P1), R13
  1942. MOVQ (144)(REG_P1), R14
  1943. MOVQ (152)(REG_P1), R15
  1944. MOVQ (160)(REG_P1), AX
  1945. MOVQ (168)(REG_P1), BX
  1946. MOVQ (176)(REG_P1), CX
  1947. MOVQ (184)(REG_P1), DI
  1948. ADCQ (96)(REG_P2), R8
  1949. ADCQ (104)(REG_P2), R9
  1950. ADCQ (112)(REG_P2), R10
  1951. ADCQ (120)(REG_P2), R11
  1952. ADCQ (128)(REG_P2), R12
  1953. ADCQ (136)(REG_P2), R13
  1954. ADCQ (144)(REG_P2), R14
  1955. ADCQ (152)(REG_P2), R15
  1956. ADCQ (160)(REG_P2), AX
  1957. ADCQ (168)(REG_P2), BX
  1958. ADCQ (176)(REG_P2), CX
  1959. ADCQ (184)(REG_P2), DI
  1960. MOVQ R8, (96)(REG_P3)
  1961. MOVQ R9, (104)(REG_P3)
  1962. MOVQ R10, (112)(REG_P3)
  1963. MOVQ R11, (120)(REG_P3)
  1964. MOVQ R12, (128)(REG_P3)
  1965. MOVQ R13, (136)(REG_P3)
  1966. MOVQ R14, (144)(REG_P3)
  1967. MOVQ R15, (152)(REG_P3)
  1968. MOVQ AX, (160)(REG_P3)
  1969. MOVQ BX, (168)(REG_P3)
  1970. MOVQ CX, (176)(REG_P3)
  1971. MOVQ DI, (184)(REG_P3)
  1972. RET
  1973. TEXT ·fp751X2SubLazy(SB), NOSPLIT, $0-24
  1974. MOVQ z+0(FP), REG_P3
  1975. MOVQ x+8(FP), REG_P1
  1976. MOVQ y+16(FP), REG_P2
  1977. MOVQ (REG_P1), R8
  1978. MOVQ (8)(REG_P1), R9
  1979. MOVQ (16)(REG_P1), R10
  1980. MOVQ (24)(REG_P1), R11
  1981. MOVQ (32)(REG_P1), R12
  1982. MOVQ (40)(REG_P1), R13
  1983. MOVQ (48)(REG_P1), R14
  1984. MOVQ (56)(REG_P1), R15
  1985. MOVQ (64)(REG_P1), AX
  1986. MOVQ (72)(REG_P1), BX
  1987. MOVQ (80)(REG_P1), CX
  1988. SUBQ (REG_P2), R8
  1989. SBBQ (8)(REG_P2), R9
  1990. SBBQ (16)(REG_P2), R10
  1991. SBBQ (24)(REG_P2), R11
  1992. SBBQ (32)(REG_P2), R12
  1993. SBBQ (40)(REG_P2), R13
  1994. SBBQ (48)(REG_P2), R14
  1995. SBBQ (56)(REG_P2), R15
  1996. SBBQ (64)(REG_P2), AX
  1997. SBBQ (72)(REG_P2), BX
  1998. SBBQ (80)(REG_P2), CX
  1999. MOVQ R8, (REG_P3)
  2000. MOVQ R9, (8)(REG_P3)
  2001. MOVQ R10, (16)(REG_P3)
  2002. MOVQ R11, (24)(REG_P3)
  2003. MOVQ R12, (32)(REG_P3)
  2004. MOVQ R13, (40)(REG_P3)
  2005. MOVQ R14, (48)(REG_P3)
  2006. MOVQ R15, (56)(REG_P3)
  2007. MOVQ AX, (64)(REG_P3)
  2008. MOVQ BX, (72)(REG_P3)
  2009. MOVQ CX, (80)(REG_P3)
  2010. MOVQ (88)(REG_P1), AX
  2011. SBBQ (88)(REG_P2), AX
  2012. MOVQ AX, (88)(REG_P3)
  2013. MOVQ (96)(REG_P1), R8
  2014. MOVQ (104)(REG_P1), R9
  2015. MOVQ (112)(REG_P1), R10
  2016. MOVQ (120)(REG_P1), R11
  2017. MOVQ (128)(REG_P1), R12
  2018. MOVQ (136)(REG_P1), R13
  2019. MOVQ (144)(REG_P1), R14
  2020. MOVQ (152)(REG_P1), R15
  2021. MOVQ (160)(REG_P1), AX
  2022. MOVQ (168)(REG_P1), BX
  2023. MOVQ (176)(REG_P1), CX
  2024. MOVQ (184)(REG_P1), DI
  2025. SBBQ (96)(REG_P2), R8
  2026. SBBQ (104)(REG_P2), R9
  2027. SBBQ (112)(REG_P2), R10
  2028. SBBQ (120)(REG_P2), R11
  2029. SBBQ (128)(REG_P2), R12
  2030. SBBQ (136)(REG_P2), R13
  2031. SBBQ (144)(REG_P2), R14
  2032. SBBQ (152)(REG_P2), R15
  2033. SBBQ (160)(REG_P2), AX
  2034. SBBQ (168)(REG_P2), BX
  2035. SBBQ (176)(REG_P2), CX
  2036. SBBQ (184)(REG_P2), DI
  2037. MOVQ R8, (96)(REG_P3)
  2038. MOVQ R9, (104)(REG_P3)
  2039. MOVQ R10, (112)(REG_P3)
  2040. MOVQ R11, (120)(REG_P3)
  2041. MOVQ R12, (128)(REG_P3)
  2042. MOVQ R13, (136)(REG_P3)
  2043. MOVQ R14, (144)(REG_P3)
  2044. MOVQ R15, (152)(REG_P3)
  2045. MOVQ AX, (160)(REG_P3)
  2046. MOVQ BX, (168)(REG_P3)
  2047. MOVQ CX, (176)(REG_P3)
  2048. MOVQ DI, (184)(REG_P3)
  2049. // Now the carry flag is 1 if x-y < 0. If so, add p*2^768.
  2050. ZERO_AX_WITHOUT_CLOBBERING_FLAGS
  2051. SBBQ $0, AX
  2052. // Load p into registers:
  2053. MOVQ P751_0, R8
  2054. // P751_{1,2,3,4} = P751_0, so reuse R8
  2055. MOVQ P751_5, R9
  2056. MOVQ P751_6, R10
  2057. MOVQ P751_7, R11
  2058. MOVQ P751_8, R12
  2059. MOVQ P751_9, R13
  2060. MOVQ P751_10, R14
  2061. MOVQ P751_11, R15
  2062. ANDQ AX, R8
  2063. ANDQ AX, R9
  2064. ANDQ AX, R10
  2065. ANDQ AX, R11
  2066. ANDQ AX, R12
  2067. ANDQ AX, R13
  2068. ANDQ AX, R14
  2069. ANDQ AX, R15
  2070. ADDQ R8, (96 )(REG_P3)
  2071. ADCQ R8, (96+ 8)(REG_P3)
  2072. ADCQ R8, (96+16)(REG_P3)
  2073. ADCQ R8, (96+24)(REG_P3)
  2074. ADCQ R8, (96+32)(REG_P3)
  2075. ADCQ R9, (96+40)(REG_P3)
  2076. ADCQ R10, (96+48)(REG_P3)
  2077. ADCQ R11, (96+56)(REG_P3)
  2078. ADCQ R12, (96+64)(REG_P3)
  2079. ADCQ R13, (96+72)(REG_P3)
  2080. ADCQ R14, (96+80)(REG_P3)
  2081. ADCQ R15, (96+88)(REG_P3)
  2082. RET