You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

2285 rivejä
36 KiB

  1. // +build amd64,!noasm
  2. #include "textflag.h"
  3. // p751 + 1
  4. #define P751P1_5 $0xEEB0000000000000
  5. #define P751P1_6 $0xE3EC968549F878A8
  6. #define P751P1_7 $0xDA959B1A13F7CC76
  7. #define P751P1_8 $0x084E9867D6EBE876
  8. #define P751P1_9 $0x8562B5045CB25748
  9. #define P751P1_10 $0x0E12909F97BADC66
  10. #define P751P1_11 $0x00006FE5D541F71C
  11. #define P751_0 $0xFFFFFFFFFFFFFFFF
  12. #define P751_5 $0xEEAFFFFFFFFFFFFF
  13. #define P751_6 $0xE3EC968549F878A8
  14. #define P751_7 $0xDA959B1A13F7CC76
  15. #define P751_8 $0x084E9867D6EBE876
  16. #define P751_9 $0x8562B5045CB25748
  17. #define P751_10 $0x0E12909F97BADC66
  18. #define P751_11 $0x00006FE5D541F71C
  19. #define P751X2_0 $0xFFFFFFFFFFFFFFFE
  20. #define P751X2_1 $0xFFFFFFFFFFFFFFFF
  21. #define P751X2_5 $0xDD5FFFFFFFFFFFFF
  22. #define P751X2_6 $0xC7D92D0A93F0F151
  23. #define P751X2_7 $0xB52B363427EF98ED
  24. #define P751X2_8 $0x109D30CFADD7D0ED
  25. #define P751X2_9 $0x0AC56A08B964AE90
  26. #define P751X2_10 $0x1C25213F2F75B8CD
  27. #define P751X2_11 $0x0000DFCBAA83EE38
  28. // The MSR code uses these registers for parameter passing. Keep using
  29. // them to avoid significant code changes. This means that when the Go
  30. // assembler does something strange, we can diff the machine code
  31. // against a different assembler to find out what Go did.
  32. #define REG_P1 DI
  33. #define REG_P2 SI
  34. #define REG_P3 DX
  35. // We can't write MOVQ $0, AX because Go's assembler incorrectly
  36. // optimizes this to XOR AX, AX, which clobbers the carry flags.
  37. //
  38. // This bug was defined to be "correct" behaviour (cf.
  39. // https://github.com/golang/go/issues/12405 ) by declaring that the MOV
  40. // pseudo-instruction clobbers flags, although this fact is mentioned
  41. // nowhere in the documentation for the Go assembler.
  42. //
  43. // Defining MOVQ to clobber flags has the effect that it is never safe
  44. // to interleave MOVQ with ADCQ and SBBQ instructions. Since this is
  45. // required to write a carry chain longer than registers' working set,
  46. // all of the below code therefore relies on the unspecified and
  47. // undocumented behaviour that MOV won't clobber flags, except in the
  48. // case of the above-mentioned bug.
  49. //
  50. // However, there's also no specification of which instructions
  51. // correspond to machine instructions, and which are
  52. // pseudo-instructions (i.e., no specification of what the assembler
  53. // actually does), so this doesn't seem much worse than usual.
  54. //
  55. // Avoid the bug by dropping the bytes for `mov eax, 0` in directly:
  56. #define ZERO_AX_WITHOUT_CLOBBERING_FLAGS BYTE $0xB8; BYTE $0; BYTE $0; BYTE $0; BYTE $0;
  57. TEXT ·fp751StrongReduce(SB), NOSPLIT, $0-8
  58. MOVQ x+0(FP), REG_P1
  59. // Zero AX for later use:
  60. XORQ AX, AX
  61. // Load p into registers:
  62. MOVQ P751_0, R8
  63. // P751_{1,2,3,4} = P751_0, so reuse R8
  64. MOVQ P751_5, R9
  65. MOVQ P751_6, R10
  66. MOVQ P751_7, R11
  67. MOVQ P751_8, R12
  68. MOVQ P751_9, R13
  69. MOVQ P751_10, R14
  70. MOVQ P751_11, R15
  71. // Set x <- x - p
  72. SUBQ R8, (REG_P1)
  73. SBBQ R8, (8)(REG_P1)
  74. SBBQ R8, (16)(REG_P1)
  75. SBBQ R8, (24)(REG_P1)
  76. SBBQ R8, (32)(REG_P1)
  77. SBBQ R9, (40)(REG_P1)
  78. SBBQ R10, (48)(REG_P1)
  79. SBBQ R11, (56)(REG_P1)
  80. SBBQ R12, (64)(REG_P1)
  81. SBBQ R13, (72)(REG_P1)
  82. SBBQ R14, (80)(REG_P1)
  83. SBBQ R15, (88)(REG_P1)
  84. // Save carry flag indicating x-p < 0 as a mask in AX
  85. SBBQ $0, AX
  86. // Conditionally add p to x if x-p < 0
  87. ANDQ AX, R8
  88. ANDQ AX, R9
  89. ANDQ AX, R10
  90. ANDQ AX, R11
  91. ANDQ AX, R12
  92. ANDQ AX, R13
  93. ANDQ AX, R14
  94. ANDQ AX, R15
  95. ADDQ R8, (REG_P1)
  96. ADCQ R8, (8)(REG_P1)
  97. ADCQ R8, (16)(REG_P1)
  98. ADCQ R8, (24)(REG_P1)
  99. ADCQ R8, (32)(REG_P1)
  100. ADCQ R9, (40)(REG_P1)
  101. ADCQ R10, (48)(REG_P1)
  102. ADCQ R11, (56)(REG_P1)
  103. ADCQ R12, (64)(REG_P1)
  104. ADCQ R13, (72)(REG_P1)
  105. ADCQ R14, (80)(REG_P1)
  106. ADCQ R15, (88)(REG_P1)
  107. RET
  108. TEXT ·fp751ConditionalSwap(SB), NOSPLIT, $0-17
  109. MOVQ x+0(FP), REG_P1
  110. MOVQ y+8(FP), REG_P2
  111. MOVB choice+16(FP), AL // AL = 0 or 1
  112. MOVBLZX AL, AX // AX = 0 or 1
  113. NEGQ AX // RAX = 0x00..00 or 0xff..ff
  114. MOVQ (0*8)(REG_P1), BX // BX = x[0]
  115. MOVQ (0*8)(REG_P2), CX // CX = y[0]
  116. MOVQ CX, DX // DX = y[0]
  117. XORQ BX, DX // DX = y[0] ^ x[0]
  118. ANDQ AX, DX // DX = (y[0] ^ x[0]) & mask
  119. XORQ DX, BX // BX = (y[0] ^ x[0]) & mask) ^ x[0] = x[0] or y[0]
  120. XORQ DX, CX // CX = (y[0] ^ x[0]) & mask) ^ y[0] = y[0] or x[0]
  121. MOVQ BX, (0*8)(REG_P1)
  122. MOVQ CX, (0*8)(REG_P2)
  123. MOVQ (1*8)(REG_P1), BX
  124. MOVQ (1*8)(REG_P2), CX
  125. MOVQ CX, DX
  126. XORQ BX, DX
  127. ANDQ AX, DX
  128. XORQ DX, BX
  129. XORQ DX, CX
  130. MOVQ BX, (1*8)(REG_P1)
  131. MOVQ CX, (1*8)(REG_P2)
  132. MOVQ (2*8)(REG_P1), BX
  133. MOVQ (2*8)(REG_P2), CX
  134. MOVQ CX, DX
  135. XORQ BX, DX
  136. ANDQ AX, DX
  137. XORQ DX, BX
  138. XORQ DX, CX
  139. MOVQ BX, (2*8)(REG_P1)
  140. MOVQ CX, (2*8)(REG_P2)
  141. MOVQ (3*8)(REG_P1), BX
  142. MOVQ (3*8)(REG_P2), CX
  143. MOVQ CX, DX
  144. XORQ BX, DX
  145. ANDQ AX, DX
  146. XORQ DX, BX
  147. XORQ DX, CX
  148. MOVQ BX, (3*8)(REG_P1)
  149. MOVQ CX, (3*8)(REG_P2)
  150. MOVQ (4*8)(REG_P1), BX
  151. MOVQ (4*8)(REG_P2), CX
  152. MOVQ CX, DX
  153. XORQ BX, DX
  154. ANDQ AX, DX
  155. XORQ DX, BX
  156. XORQ DX, CX
  157. MOVQ BX, (4*8)(REG_P1)
  158. MOVQ CX, (4*8)(REG_P2)
  159. MOVQ (5*8)(REG_P1), BX
  160. MOVQ (5*8)(REG_P2), CX
  161. MOVQ CX, DX
  162. XORQ BX, DX
  163. ANDQ AX, DX
  164. XORQ DX, BX
  165. XORQ DX, CX
  166. MOVQ BX, (5*8)(REG_P1)
  167. MOVQ CX, (5*8)(REG_P2)
  168. MOVQ (6*8)(REG_P1), BX
  169. MOVQ (6*8)(REG_P2), CX
  170. MOVQ CX, DX
  171. XORQ BX, DX
  172. ANDQ AX, DX
  173. XORQ DX, BX
  174. XORQ DX, CX
  175. MOVQ BX, (6*8)(REG_P1)
  176. MOVQ CX, (6*8)(REG_P2)
  177. MOVQ (7*8)(REG_P1), BX
  178. MOVQ (7*8)(REG_P2), CX
  179. MOVQ CX, DX
  180. XORQ BX, DX
  181. ANDQ AX, DX
  182. XORQ DX, BX
  183. XORQ DX, CX
  184. MOVQ BX, (7*8)(REG_P1)
  185. MOVQ CX, (7*8)(REG_P2)
  186. MOVQ (8*8)(REG_P1), BX
  187. MOVQ (8*8)(REG_P2), CX
  188. MOVQ CX, DX
  189. XORQ BX, DX
  190. ANDQ AX, DX
  191. XORQ DX, BX
  192. XORQ DX, CX
  193. MOVQ BX, (8*8)(REG_P1)
  194. MOVQ CX, (8*8)(REG_P2)
  195. MOVQ (9*8)(REG_P1), BX
  196. MOVQ (9*8)(REG_P2), CX
  197. MOVQ CX, DX
  198. XORQ BX, DX
  199. ANDQ AX, DX
  200. XORQ DX, BX
  201. XORQ DX, CX
  202. MOVQ BX, (9*8)(REG_P1)
  203. MOVQ CX, (9*8)(REG_P2)
  204. MOVQ (10*8)(REG_P1), BX
  205. MOVQ (10*8)(REG_P2), CX
  206. MOVQ CX, DX
  207. XORQ BX, DX
  208. ANDQ AX, DX
  209. XORQ DX, BX
  210. XORQ DX, CX
  211. MOVQ BX, (10*8)(REG_P1)
  212. MOVQ CX, (10*8)(REG_P2)
  213. MOVQ (11*8)(REG_P1), BX
  214. MOVQ (11*8)(REG_P2), CX
  215. MOVQ CX, DX
  216. XORQ BX, DX
  217. ANDQ AX, DX
  218. XORQ DX, BX
  219. XORQ DX, CX
  220. MOVQ BX, (11*8)(REG_P1)
  221. MOVQ CX, (11*8)(REG_P2)
  222. RET
  223. TEXT ·fp751AddReduced(SB), NOSPLIT, $0-24
  224. MOVQ z+0(FP), REG_P3
  225. MOVQ x+8(FP), REG_P1
  226. MOVQ y+16(FP), REG_P2
  227. MOVQ (REG_P1), R8
  228. MOVQ (8)(REG_P1), R9
  229. MOVQ (16)(REG_P1), R10
  230. MOVQ (24)(REG_P1), R11
  231. MOVQ (32)(REG_P1), R12
  232. MOVQ (40)(REG_P1), R13
  233. MOVQ (48)(REG_P1), R14
  234. MOVQ (56)(REG_P1), R15
  235. MOVQ (64)(REG_P1), CX
  236. ADDQ (REG_P2), R8
  237. ADCQ (8)(REG_P2), R9
  238. ADCQ (16)(REG_P2), R10
  239. ADCQ (24)(REG_P2), R11
  240. ADCQ (32)(REG_P2), R12
  241. ADCQ (40)(REG_P2), R13
  242. ADCQ (48)(REG_P2), R14
  243. ADCQ (56)(REG_P2), R15
  244. ADCQ (64)(REG_P2), CX
  245. MOVQ (72)(REG_P1), AX
  246. ADCQ (72)(REG_P2), AX
  247. MOVQ AX, (72)(REG_P3)
  248. MOVQ (80)(REG_P1), AX
  249. ADCQ (80)(REG_P2), AX
  250. MOVQ AX, (80)(REG_P3)
  251. MOVQ (88)(REG_P1), AX
  252. ADCQ (88)(REG_P2), AX
  253. MOVQ AX, (88)(REG_P3)
  254. MOVQ P751X2_0, AX
  255. SUBQ AX, R8
  256. MOVQ P751X2_1, AX
  257. SBBQ AX, R9
  258. SBBQ AX, R10
  259. SBBQ AX, R11
  260. SBBQ AX, R12
  261. MOVQ P751X2_5, AX
  262. SBBQ AX, R13
  263. MOVQ P751X2_6, AX
  264. SBBQ AX, R14
  265. MOVQ P751X2_7, AX
  266. SBBQ AX, R15
  267. MOVQ P751X2_8, AX
  268. SBBQ AX, CX
  269. MOVQ R8, (REG_P3)
  270. MOVQ R9, (8)(REG_P3)
  271. MOVQ R10, (16)(REG_P3)
  272. MOVQ R11, (24)(REG_P3)
  273. MOVQ R12, (32)(REG_P3)
  274. MOVQ R13, (40)(REG_P3)
  275. MOVQ R14, (48)(REG_P3)
  276. MOVQ R15, (56)(REG_P3)
  277. MOVQ CX, (64)(REG_P3)
  278. MOVQ (72)(REG_P3), R8
  279. MOVQ (80)(REG_P3), R9
  280. MOVQ (88)(REG_P3), R10
  281. MOVQ P751X2_9, AX
  282. SBBQ AX, R8
  283. MOVQ P751X2_10, AX
  284. SBBQ AX, R9
  285. MOVQ P751X2_11, AX
  286. SBBQ AX, R10
  287. MOVQ R8, (72)(REG_P3)
  288. MOVQ R9, (80)(REG_P3)
  289. MOVQ R10, (88)(REG_P3)
  290. ZERO_AX_WITHOUT_CLOBBERING_FLAGS
  291. SBBQ $0, AX
  292. MOVQ P751X2_0, SI
  293. ANDQ AX, SI
  294. MOVQ P751X2_1, R8
  295. ANDQ AX, R8
  296. MOVQ P751X2_5, R9
  297. ANDQ AX, R9
  298. MOVQ P751X2_6, R10
  299. ANDQ AX, R10
  300. MOVQ P751X2_7, R11
  301. ANDQ AX, R11
  302. MOVQ P751X2_8, R12
  303. ANDQ AX, R12
  304. MOVQ P751X2_9, R13
  305. ANDQ AX, R13
  306. MOVQ P751X2_10, R14
  307. ANDQ AX, R14
  308. MOVQ P751X2_11, R15
  309. ANDQ AX, R15
  310. MOVQ (REG_P3), AX
  311. ADDQ SI, AX
  312. MOVQ AX, (REG_P3)
  313. MOVQ (8)(REG_P3), AX
  314. ADCQ R8, AX
  315. MOVQ AX, (8)(REG_P3)
  316. MOVQ (16)(REG_P3), AX
  317. ADCQ R8, AX
  318. MOVQ AX, (16)(REG_P3)
  319. MOVQ (24)(REG_P3), AX
  320. ADCQ R8, AX
  321. MOVQ AX, (24)(REG_P3)
  322. MOVQ (32)(REG_P3), AX
  323. ADCQ R8, AX
  324. MOVQ AX, (32)(REG_P3)
  325. MOVQ (40)(REG_P3), AX
  326. ADCQ R9, AX
  327. MOVQ AX, (40)(REG_P3)
  328. MOVQ (48)(REG_P3), AX
  329. ADCQ R10, AX
  330. MOVQ AX, (48)(REG_P3)
  331. MOVQ (56)(REG_P3), AX
  332. ADCQ R11, AX
  333. MOVQ AX, (56)(REG_P3)
  334. MOVQ (64)(REG_P3), AX
  335. ADCQ R12, AX
  336. MOVQ AX, (64)(REG_P3)
  337. MOVQ (72)(REG_P3), AX
  338. ADCQ R13, AX
  339. MOVQ AX, (72)(REG_P3)
  340. MOVQ (80)(REG_P3), AX
  341. ADCQ R14, AX
  342. MOVQ AX, (80)(REG_P3)
  343. MOVQ (88)(REG_P3), AX
  344. ADCQ R15, AX
  345. MOVQ AX, (88)(REG_P3)
  346. RET
  347. TEXT ·fp751SubReduced(SB), NOSPLIT, $0-24
  348. MOVQ z+0(FP), REG_P3
  349. MOVQ x+8(FP), REG_P1
  350. MOVQ y+16(FP), REG_P2
  351. MOVQ (REG_P1), R8
  352. MOVQ (8)(REG_P1), R9
  353. MOVQ (16)(REG_P1), R10
  354. MOVQ (24)(REG_P1), R11
  355. MOVQ (32)(REG_P1), R12
  356. MOVQ (40)(REG_P1), R13
  357. MOVQ (48)(REG_P1), R14
  358. MOVQ (56)(REG_P1), R15
  359. MOVQ (64)(REG_P1), CX
  360. SUBQ (REG_P2), R8
  361. SBBQ (8)(REG_P2), R9
  362. SBBQ (16)(REG_P2), R10
  363. SBBQ (24)(REG_P2), R11
  364. SBBQ (32)(REG_P2), R12
  365. SBBQ (40)(REG_P2), R13
  366. SBBQ (48)(REG_P2), R14
  367. SBBQ (56)(REG_P2), R15
  368. SBBQ (64)(REG_P2), CX
  369. MOVQ R8, (REG_P3)
  370. MOVQ R9, (8)(REG_P3)
  371. MOVQ R10, (16)(REG_P3)
  372. MOVQ R11, (24)(REG_P3)
  373. MOVQ R12, (32)(REG_P3)
  374. MOVQ R13, (40)(REG_P3)
  375. MOVQ R14, (48)(REG_P3)
  376. MOVQ R15, (56)(REG_P3)
  377. MOVQ CX, (64)(REG_P3)
  378. MOVQ (72)(REG_P1), AX
  379. SBBQ (72)(REG_P2), AX
  380. MOVQ AX, (72)(REG_P3)
  381. MOVQ (80)(REG_P1), AX
  382. SBBQ (80)(REG_P2), AX
  383. MOVQ AX, (80)(REG_P3)
  384. MOVQ (88)(REG_P1), AX
  385. SBBQ (88)(REG_P2), AX
  386. MOVQ AX, (88)(REG_P3)
  387. ZERO_AX_WITHOUT_CLOBBERING_FLAGS
  388. SBBQ $0, AX
  389. MOVQ P751X2_0, SI
  390. ANDQ AX, SI
  391. MOVQ P751X2_1, R8
  392. ANDQ AX, R8
  393. MOVQ P751X2_5, R9
  394. ANDQ AX, R9
  395. MOVQ P751X2_6, R10
  396. ANDQ AX, R10
  397. MOVQ P751X2_7, R11
  398. ANDQ AX, R11
  399. MOVQ P751X2_8, R12
  400. ANDQ AX, R12
  401. MOVQ P751X2_9, R13
  402. ANDQ AX, R13
  403. MOVQ P751X2_10, R14
  404. ANDQ AX, R14
  405. MOVQ P751X2_11, R15
  406. ANDQ AX, R15
  407. MOVQ (REG_P3), AX
  408. ADDQ SI, AX
  409. MOVQ AX, (REG_P3)
  410. MOVQ (8)(REG_P3), AX
  411. ADCQ R8, AX
  412. MOVQ AX, (8)(REG_P3)
  413. MOVQ (16)(REG_P3), AX
  414. ADCQ R8, AX
  415. MOVQ AX, (16)(REG_P3)
  416. MOVQ (24)(REG_P3), AX
  417. ADCQ R8, AX
  418. MOVQ AX, (24)(REG_P3)
  419. MOVQ (32)(REG_P3), AX
  420. ADCQ R8, AX
  421. MOVQ AX, (32)(REG_P3)
  422. MOVQ (40)(REG_P3), AX
  423. ADCQ R9, AX
  424. MOVQ AX, (40)(REG_P3)
  425. MOVQ (48)(REG_P3), AX
  426. ADCQ R10, AX
  427. MOVQ AX, (48)(REG_P3)
  428. MOVQ (56)(REG_P3), AX
  429. ADCQ R11, AX
  430. MOVQ AX, (56)(REG_P3)
  431. MOVQ (64)(REG_P3), AX
  432. ADCQ R12, AX
  433. MOVQ AX, (64)(REG_P3)
  434. MOVQ (72)(REG_P3), AX
  435. ADCQ R13, AX
  436. MOVQ AX, (72)(REG_P3)
  437. MOVQ (80)(REG_P3), AX
  438. ADCQ R14, AX
  439. MOVQ AX, (80)(REG_P3)
  440. MOVQ (88)(REG_P3), AX
  441. ADCQ R15, AX
  442. MOVQ AX, (88)(REG_P3)
  443. RET
  444. TEXT ·fp751Mul(SB), $96-24
  445. // Here we store the destination in CX instead of in REG_P3 because the
  446. // multiplication instructions use DX as an implicit destination
  447. // operand: MULQ $REG sets DX:AX <-- AX * $REG.
  448. MOVQ z+0(FP), CX
  449. MOVQ x+8(FP), REG_P1
  450. MOVQ y+16(FP), REG_P2
  451. XORQ AX, AX
  452. MOVQ (48)(REG_P1), R8
  453. MOVQ (56)(REG_P1), R9
  454. MOVQ (64)(REG_P1), R10
  455. MOVQ (72)(REG_P1), R11
  456. MOVQ (80)(REG_P1), R12
  457. MOVQ (88)(REG_P1), R13
  458. ADDQ (REG_P1), R8
  459. ADCQ (8)(REG_P1), R9
  460. ADCQ (16)(REG_P1), R10
  461. ADCQ (24)(REG_P1), R11
  462. ADCQ (32)(REG_P1), R12
  463. ADCQ (40)(REG_P1), R13
  464. MOVQ R8, (CX)
  465. MOVQ R9, (8)(CX)
  466. MOVQ R10, (16)(CX)
  467. MOVQ R11, (24)(CX)
  468. MOVQ R12, (32)(CX)
  469. MOVQ R13, (40)(CX)
  470. SBBQ $0, AX
  471. XORQ DX, DX
  472. MOVQ (48)(REG_P2), R8
  473. MOVQ (56)(REG_P2), R9
  474. MOVQ (64)(REG_P2), R10
  475. MOVQ (72)(REG_P2), R11
  476. MOVQ (80)(REG_P2), R12
  477. MOVQ (88)(REG_P2), R13
  478. ADDQ (REG_P2), R8
  479. ADCQ (8)(REG_P2), R9
  480. ADCQ (16)(REG_P2), R10
  481. ADCQ (24)(REG_P2), R11
  482. ADCQ (32)(REG_P2), R12
  483. ADCQ (40)(REG_P2), R13
  484. MOVQ R8, (48)(CX)
  485. MOVQ R9, (56)(CX)
  486. MOVQ R10, (64)(CX)
  487. MOVQ R11, (72)(CX)
  488. MOVQ R12, (80)(CX)
  489. MOVQ R13, (88)(CX)
  490. SBBQ $0, DX
  491. MOVQ AX, (80)(SP)
  492. MOVQ DX, (88)(SP)
  493. // (SP[0-8],R10,R8,R9) <- (AH+AL)*(BH+BL)
  494. MOVQ (CX), R11
  495. MOVQ R8, AX
  496. MULQ R11
  497. MOVQ AX, (SP) // c0
  498. MOVQ DX, R14
  499. XORQ R15, R15
  500. MOVQ R9, AX
  501. MULQ R11
  502. XORQ R9, R9
  503. ADDQ AX, R14
  504. ADCQ DX, R9
  505. MOVQ (8)(CX), R12
  506. MOVQ R8, AX
  507. MULQ R12
  508. ADDQ AX, R14
  509. MOVQ R14, (8)(SP) // c1
  510. ADCQ DX, R9
  511. ADCQ $0, R15
  512. XORQ R8, R8
  513. MOVQ R10, AX
  514. MULQ R11
  515. ADDQ AX, R9
  516. MOVQ (48)(CX), R13
  517. ADCQ DX, R15
  518. ADCQ $0, R8
  519. MOVQ (16)(CX), AX
  520. MULQ R13
  521. ADDQ AX, R9
  522. ADCQ DX, R15
  523. MOVQ (56)(CX), AX
  524. ADCQ $0, R8
  525. MULQ R12
  526. ADDQ AX, R9
  527. MOVQ R9, (16)(SP) // c2
  528. ADCQ DX, R15
  529. ADCQ $0, R8
  530. XORQ R9, R9
  531. MOVQ (72)(CX), AX
  532. MULQ R11
  533. ADDQ AX, R15
  534. ADCQ DX, R8
  535. ADCQ $0, R9
  536. MOVQ (24)(CX), AX
  537. MULQ R13
  538. ADDQ AX, R15
  539. ADCQ DX, R8
  540. ADCQ $0, R9
  541. MOVQ R10, AX
  542. MULQ R12
  543. ADDQ AX, R15
  544. ADCQ DX, R8
  545. ADCQ $0, R9
  546. MOVQ (16)(CX), R14
  547. MOVQ (56)(CX), AX
  548. MULQ R14
  549. ADDQ AX, R15
  550. MOVQ R15, (24)(SP) // c3
  551. ADCQ DX, R8
  552. ADCQ $0, R9
  553. XORQ R10, R10
  554. MOVQ (80)(CX), AX
  555. MULQ R11
  556. ADDQ AX, R8
  557. ADCQ DX, R9
  558. ADCQ $0, R10
  559. MOVQ (64)(CX), AX
  560. MULQ R14
  561. ADDQ AX, R8
  562. ADCQ DX, R9
  563. ADCQ $0, R10
  564. MOVQ (48)(CX), R15
  565. MOVQ (32)(CX), AX
  566. MULQ R15
  567. ADDQ AX, R8
  568. ADCQ DX, R9
  569. ADCQ $0, R10
  570. MOVQ (72)(CX), AX
  571. MULQ R12
  572. ADDQ AX, R8
  573. ADCQ DX, R9
  574. ADCQ $0, R10
  575. MOVQ (24)(CX), R13
  576. MOVQ (56)(CX), AX
  577. MULQ R13
  578. ADDQ AX, R8
  579. MOVQ R8, (32)(SP) // c4
  580. ADCQ DX, R9
  581. ADCQ $0, R10
  582. XORQ R8, R8
  583. MOVQ (88)(CX), AX
  584. MULQ R11
  585. ADDQ AX, R9
  586. ADCQ DX, R10
  587. ADCQ $0, R8
  588. MOVQ (64)(CX), AX
  589. MULQ R13
  590. ADDQ AX, R9
  591. ADCQ DX, R10
  592. ADCQ $0, R8
  593. MOVQ (72)(CX), AX
  594. MULQ R14
  595. ADDQ AX, R9
  596. ADCQ DX, R10
  597. ADCQ $0, R8
  598. MOVQ (40)(CX), AX
  599. MULQ R15
  600. ADDQ AX, R9
  601. ADCQ DX, R10
  602. ADCQ $0, R8
  603. MOVQ (80)(CX), AX
  604. MULQ R12
  605. ADDQ AX, R9
  606. ADCQ DX, R10
  607. ADCQ $0, R8
  608. MOVQ (32)(CX), R15
  609. MOVQ (56)(CX), AX
  610. MULQ R15
  611. ADDQ AX, R9
  612. MOVQ R9, (40)(SP) // c5
  613. ADCQ DX, R10
  614. ADCQ $0, R8
  615. XORQ R9, R9
  616. MOVQ (64)(CX), AX
  617. MULQ R15
  618. ADDQ AX, R10
  619. ADCQ DX, R8
  620. ADCQ $0, R9
  621. MOVQ (88)(CX), AX
  622. MULQ R12
  623. ADDQ AX, R10
  624. ADCQ DX, R8
  625. ADCQ $0, R9
  626. MOVQ (80)(CX), AX
  627. MULQ R14
  628. ADDQ AX, R10
  629. ADCQ DX, R8
  630. ADCQ $0, R9
  631. MOVQ (40)(CX), R11
  632. MOVQ (56)(CX), AX
  633. MULQ R11
  634. ADDQ AX, R10
  635. ADCQ DX, R8
  636. ADCQ $0, R9
  637. MOVQ (72)(CX), AX
  638. MULQ R13
  639. ADDQ AX, R10
  640. MOVQ R10, (48)(SP) // c6
  641. ADCQ DX, R8
  642. ADCQ $0, R9
  643. XORQ R10, R10
  644. MOVQ (88)(CX), AX
  645. MULQ R14
  646. ADDQ AX, R8
  647. ADCQ DX, R9
  648. ADCQ $0, R10
  649. MOVQ (64)(CX), AX
  650. MULQ R11
  651. ADDQ AX, R8
  652. ADCQ DX, R9
  653. ADCQ $0, R10
  654. MOVQ (80)(CX), AX
  655. MULQ R13
  656. ADDQ AX, R8
  657. ADCQ DX, R9
  658. ADCQ $0, R10
  659. MOVQ (72)(CX), AX
  660. MULQ R15
  661. ADDQ AX, R8
  662. MOVQ R8, (56)(SP) // c7
  663. ADCQ DX, R9
  664. ADCQ $0, R10
  665. XORQ R8, R8
  666. MOVQ (72)(CX), AX
  667. MULQ R11
  668. ADDQ AX, R9
  669. ADCQ DX, R10
  670. ADCQ $0, R8
  671. MOVQ (80)(CX), AX
  672. MULQ R15
  673. ADDQ AX, R9
  674. ADCQ DX, R10
  675. ADCQ $0, R8
  676. MOVQ (88)(CX), AX
  677. MULQ R13
  678. ADDQ AX, R9
  679. MOVQ R9, (64)(SP) // c8
  680. ADCQ DX, R10
  681. ADCQ $0, R8
  682. XORQ R9, R9
  683. MOVQ (88)(CX), AX
  684. MULQ R15
  685. ADDQ AX, R10
  686. ADCQ DX, R8
  687. ADCQ $0, R9
  688. MOVQ (80)(CX), AX
  689. MULQ R11
  690. ADDQ AX, R10 // c9
  691. ADCQ DX, R8
  692. ADCQ $0, R9
  693. MOVQ (88)(CX), AX
  694. MULQ R11
  695. ADDQ AX, R8 // c10
  696. ADCQ DX, R9 // c11
  697. MOVQ (88)(SP), AX
  698. MOVQ (CX), DX
  699. ANDQ AX, R12
  700. ANDQ AX, R14
  701. ANDQ AX, DX
  702. ANDQ AX, R13
  703. ANDQ AX, R15
  704. ANDQ AX, R11
  705. MOVQ (48)(SP), AX
  706. ADDQ AX, DX
  707. MOVQ (56)(SP), AX
  708. ADCQ AX, R12
  709. MOVQ (64)(SP), AX
  710. ADCQ AX, R14
  711. ADCQ R10, R13
  712. ADCQ R8, R15
  713. ADCQ R9, R11
  714. MOVQ (80)(SP), AX
  715. MOVQ DX, (48)(SP)
  716. MOVQ R12, (56)(SP)
  717. MOVQ R14, (64)(SP)
  718. MOVQ R13, (72)(SP)
  719. MOVQ R15, (80)(SP)
  720. MOVQ R11, (88)(SP)
  721. MOVQ (48)(CX), R8
  722. MOVQ (56)(CX), R9
  723. MOVQ (64)(CX), R10
  724. MOVQ (72)(CX), R11
  725. MOVQ (80)(CX), R12
  726. MOVQ (88)(CX), R13
  727. ANDQ AX, R8
  728. ANDQ AX, R9
  729. ANDQ AX, R10
  730. ANDQ AX, R11
  731. ANDQ AX, R12
  732. ANDQ AX, R13
  733. MOVQ (48)(SP), AX
  734. ADDQ AX, R8
  735. MOVQ (56)(SP), AX
  736. ADCQ AX, R9
  737. MOVQ (64)(SP), AX
  738. ADCQ AX, R10
  739. MOVQ (72)(SP), AX
  740. ADCQ AX, R11
  741. MOVQ (80)(SP), AX
  742. ADCQ AX, R12
  743. MOVQ (88)(SP), AX
  744. ADCQ AX, R13
  745. MOVQ R8, (48)(SP)
  746. MOVQ R9, (56)(SP)
  747. MOVQ R11, (72)(SP)
  748. // CX[0-11] <- AL*BL
  749. MOVQ (REG_P1), R11
  750. MOVQ (REG_P2), AX
  751. MULQ R11
  752. XORQ R9, R9
  753. MOVQ AX, (CX) // c0
  754. MOVQ R10, (64)(SP)
  755. MOVQ DX, R8
  756. MOVQ (8)(REG_P2), AX
  757. MULQ R11
  758. XORQ R10, R10
  759. ADDQ AX, R8
  760. MOVQ R12, (80)(SP)
  761. ADCQ DX, R9
  762. MOVQ (8)(REG_P1), R12
  763. MOVQ (REG_P2), AX
  764. MULQ R12
  765. ADDQ AX, R8
  766. MOVQ R8, (8)(CX) // c1
  767. ADCQ DX, R9
  768. MOVQ R13, (88)(SP)
  769. ADCQ $0, R10
  770. XORQ R8, R8
  771. MOVQ (16)(REG_P2), AX
  772. MULQ R11
  773. ADDQ AX, R9
  774. ADCQ DX, R10
  775. ADCQ $0, R8
  776. MOVQ (REG_P2), R13
  777. MOVQ (16)(REG_P1), AX
  778. MULQ R13
  779. ADDQ AX, R9
  780. ADCQ DX, R10
  781. ADCQ $0, R8
  782. MOVQ (8)(REG_P2), AX
  783. MULQ R12
  784. ADDQ AX, R9
  785. MOVQ R9, (16)(CX) // c2
  786. ADCQ DX, R10
  787. ADCQ $0, R8
  788. XORQ R9, R9
  789. MOVQ (24)(REG_P2), AX
  790. MULQ R11
  791. ADDQ AX, R10
  792. ADCQ DX, R8
  793. ADCQ $0, R9
  794. MOVQ (24)(REG_P1), AX
  795. MULQ R13
  796. ADDQ AX, R10
  797. ADCQ DX, R8
  798. ADCQ $0, R9
  799. MOVQ (16)(REG_P2), AX
  800. MULQ R12
  801. ADDQ AX, R10
  802. ADCQ DX, R8
  803. ADCQ $0, R9
  804. MOVQ (16)(REG_P1), R14
  805. MOVQ (8)(REG_P2), AX
  806. MULQ R14
  807. ADDQ AX, R10
  808. MOVQ R10, (24)(CX) // c3
  809. ADCQ DX, R8
  810. ADCQ $0, R9
  811. XORQ R10, R10
  812. MOVQ (32)(REG_P2), AX
  813. MULQ R11
  814. ADDQ AX, R8
  815. ADCQ DX, R9
  816. ADCQ $0, R10
  817. MOVQ (16)(REG_P2), AX
  818. MULQ R14
  819. ADDQ AX, R8
  820. ADCQ DX, R9
  821. ADCQ $0, R10
  822. MOVQ (32)(REG_P1), AX
  823. MULQ R13
  824. ADDQ AX, R8
  825. ADCQ DX, R9
  826. ADCQ $0, R10
  827. MOVQ (24)(REG_P2), AX
  828. MULQ R12
  829. ADDQ AX, R8
  830. ADCQ DX, R9
  831. ADCQ $0, R10
  832. MOVQ (24)(REG_P1), R13
  833. MOVQ (8)(REG_P2), AX
  834. MULQ R13
  835. ADDQ AX, R8
  836. MOVQ R8, (32)(CX) // c4
  837. ADCQ DX, R9
  838. ADCQ $0, R10
  839. XORQ R8, R8
  840. MOVQ (40)(REG_P2), AX
  841. MULQ R11
  842. ADDQ AX, R9
  843. ADCQ DX, R10
  844. ADCQ $0, R8
  845. MOVQ (16)(REG_P2), AX
  846. MULQ R13
  847. ADDQ AX, R9
  848. ADCQ DX, R10
  849. ADCQ $0, R8
  850. MOVQ (24)(REG_P2), AX
  851. MULQ R14
  852. ADDQ AX, R9
  853. ADCQ DX, R10
  854. ADCQ $0, R8
  855. MOVQ (40)(REG_P1), R11
  856. MOVQ (REG_P2), AX
  857. MULQ R11
  858. ADDQ AX, R9
  859. ADCQ DX, R10
  860. ADCQ $0, R8
  861. MOVQ (32)(REG_P2), AX
  862. MULQ R12
  863. ADDQ AX, R9
  864. ADCQ DX, R10
  865. ADCQ $0, R8
  866. MOVQ (32)(REG_P1), R15
  867. MOVQ (8)(REG_P2), AX
  868. MULQ R15
  869. ADDQ AX, R9
  870. MOVQ R9, (40)(CX) //c5
  871. ADCQ DX, R10
  872. ADCQ $0, R8
  873. XORQ R9, R9
  874. MOVQ (16)(REG_P2), AX
  875. MULQ R15
  876. ADDQ AX, R10
  877. ADCQ DX, R8
  878. ADCQ $0, R9
  879. MOVQ (40)(REG_P2), AX
  880. MULQ R12
  881. ADDQ AX, R10
  882. ADCQ DX, R8
  883. ADCQ $0, R9
  884. MOVQ (32)(REG_P2), AX
  885. MULQ R14
  886. ADDQ AX, R10
  887. ADCQ DX, R8
  888. ADCQ $0, R9
  889. MOVQ (8)(REG_P2), AX
  890. MULQ R11
  891. ADDQ AX, R10
  892. ADCQ DX, R8
  893. ADCQ $0, R9
  894. MOVQ (24)(REG_P2), AX
  895. MULQ R13
  896. ADDQ AX, R10
  897. MOVQ R10, (48)(CX) // c6
  898. ADCQ DX, R8
  899. ADCQ $0, R9
  900. XORQ R10, R10
  901. MOVQ (40)(REG_P2), AX
  902. MULQ R14
  903. ADDQ AX, R8
  904. ADCQ DX, R9
  905. ADCQ $0, R10
  906. MOVQ (16)(REG_P2), AX
  907. MULQ R11
  908. ADDQ AX, R8
  909. ADCQ DX, R9
  910. ADCQ $0, R10
  911. MOVQ (32)(REG_P2), AX
  912. MULQ R13
  913. ADDQ AX, R8
  914. ADCQ DX, R9
  915. ADCQ $0, R10
  916. MOVQ (24)(REG_P2), AX
  917. MULQ R15
  918. ADDQ AX, R8
  919. MOVQ R8, (56)(CX) // c7
  920. ADCQ DX, R9
  921. ADCQ $0, R10
  922. XORQ R8, R8
  923. MOVQ (24)(REG_P2), AX
  924. MULQ R11
  925. ADDQ AX, R9
  926. ADCQ DX, R10
  927. ADCQ $0, R8
  928. MOVQ (32)(REG_P2), AX
  929. MULQ R15
  930. ADDQ AX, R9
  931. ADCQ DX, R10
  932. ADCQ $0, R8
  933. MOVQ (40)(REG_P2), AX
  934. MULQ R13
  935. ADDQ AX, R9
  936. MOVQ R9, (64)(CX) // c8
  937. ADCQ DX, R10
  938. ADCQ $0, R8
  939. XORQ R9, R9
  940. MOVQ (40)(REG_P2), AX
  941. MULQ R15
  942. ADDQ AX, R10
  943. ADCQ DX, R8
  944. ADCQ $0, R9
  945. MOVQ (32)(REG_P2), AX
  946. MULQ R11
  947. ADDQ AX, R10
  948. MOVQ R10, (72)(CX) // c9
  949. ADCQ DX, R8
  950. ADCQ $0, R9
  951. MOVQ (40)(REG_P2), AX
  952. MULQ R11
  953. ADDQ AX, R8
  954. MOVQ R8, (80)(CX) // c10
  955. ADCQ DX, R9
  956. MOVQ R9, (88)(CX) // c11
  957. // CX[12-23] <- AH*BH
  958. MOVQ (48)(REG_P1), R11
  959. MOVQ (48)(REG_P2), AX
  960. MULQ R11
  961. XORQ R9, R9
  962. MOVQ AX, (96)(CX) // c0
  963. MOVQ DX, R8
  964. MOVQ (56)(REG_P2), AX
  965. MULQ R11
  966. XORQ R10, R10
  967. ADDQ AX, R8
  968. ADCQ DX, R9
  969. MOVQ (56)(REG_P1), R12
  970. MOVQ (48)(REG_P2), AX
  971. MULQ R12
  972. ADDQ AX, R8
  973. MOVQ R8, (104)(CX) // c1
  974. ADCQ DX, R9
  975. ADCQ $0, R10
  976. XORQ R8, R8
  977. MOVQ (64)(REG_P2), AX
  978. MULQ R11
  979. ADDQ AX, R9
  980. ADCQ DX, R10
  981. ADCQ $0, R8
  982. MOVQ (48)(REG_P2), R13
  983. MOVQ (64)(REG_P1), AX
  984. MULQ R13
  985. ADDQ AX, R9
  986. ADCQ DX, R10
  987. ADCQ $0, R8
  988. MOVQ (56)(REG_P2), AX
  989. MULQ R12
  990. ADDQ AX, R9
  991. MOVQ R9, (112)(CX) // c2
  992. ADCQ DX, R10
  993. ADCQ $0, R8
  994. XORQ R9, R9
  995. MOVQ (72)(REG_P2), AX
  996. MULQ R11
  997. ADDQ AX, R10
  998. ADCQ DX, R8
  999. ADCQ $0, R9
  1000. MOVQ (72)(REG_P1), AX
  1001. MULQ R13
  1002. ADDQ AX, R10
  1003. ADCQ DX, R8
  1004. ADCQ $0, R9
  1005. MOVQ (64)(REG_P2), AX
  1006. MULQ R12
  1007. ADDQ AX, R10
  1008. ADCQ DX, R8
  1009. ADCQ $0, R9
  1010. MOVQ (64)(REG_P1), R14
  1011. MOVQ (56)(REG_P2), AX
  1012. MULQ R14
  1013. ADDQ AX, R10
  1014. MOVQ R10, (120)(CX) // c3
  1015. ADCQ DX, R8
  1016. ADCQ $0, R9
  1017. XORQ R10, R10
  1018. MOVQ (80)(REG_P2), AX
  1019. MULQ R11
  1020. ADDQ AX, R8
  1021. ADCQ DX, R9
  1022. ADCQ $0, R10
  1023. MOVQ (64)(REG_P2), AX
  1024. MULQ R14
  1025. ADDQ AX, R8
  1026. ADCQ DX, R9
  1027. ADCQ $0, R10
  1028. MOVQ (80)(REG_P1), R15
  1029. MOVQ R13, AX
  1030. MULQ R15
  1031. ADDQ AX, R8
  1032. ADCQ DX, R9
  1033. ADCQ $0, R10
  1034. MOVQ (72)(REG_P2), AX
  1035. MULQ R12
  1036. ADDQ AX, R8
  1037. ADCQ DX, R9
  1038. ADCQ $0, R10
  1039. MOVQ (72)(REG_P1), R13
  1040. MOVQ (56)(REG_P2), AX
  1041. MULQ R13
  1042. ADDQ AX, R8
  1043. MOVQ R8, (128)(CX) // c4
  1044. ADCQ DX, R9
  1045. ADCQ $0, R10
  1046. XORQ R8, R8
  1047. MOVQ (88)(REG_P2), AX
  1048. MULQ R11
  1049. ADDQ AX, R9
  1050. ADCQ DX, R10
  1051. ADCQ $0, R8
  1052. MOVQ (64)(REG_P2), AX
  1053. MULQ R13
  1054. ADDQ AX, R9
  1055. ADCQ DX, R10
  1056. ADCQ $0, R8
  1057. MOVQ (72)(REG_P2), AX
  1058. MULQ R14
  1059. ADDQ AX, R9
  1060. ADCQ DX, R10
  1061. ADCQ $0, R8
  1062. MOVQ (88)(REG_P1), R11
  1063. MOVQ (48)(REG_P2), AX
  1064. MULQ R11
  1065. ADDQ AX, R9
  1066. ADCQ DX, R10
  1067. ADCQ $0, R8
  1068. MOVQ (80)(REG_P2), AX
  1069. MULQ R12
  1070. ADDQ AX, R9
  1071. ADCQ DX, R10
  1072. ADCQ $0, R8
  1073. MOVQ (56)(REG_P2), AX
  1074. MULQ R15
  1075. ADDQ AX, R9
  1076. MOVQ R9, (136)(CX) // c5
  1077. ADCQ DX, R10
  1078. ADCQ $0, R8
  1079. XORQ R9, R9
  1080. MOVQ (64)(REG_P2), AX
  1081. MULQ R15
  1082. ADDQ AX, R10
  1083. ADCQ DX, R8
  1084. ADCQ $0, R9
  1085. MOVQ (88)(REG_P2), AX
  1086. MULQ R12
  1087. ADDQ AX, R10
  1088. ADCQ DX, R8
  1089. ADCQ $0, R9
  1090. MOVQ (80)(REG_P2), AX
  1091. MULQ R14
  1092. ADDQ AX, R10
  1093. ADCQ DX, R8
  1094. ADCQ $0, R9
  1095. MOVQ (56)(REG_P2), AX
  1096. MULQ R11
  1097. ADDQ AX, R10
  1098. ADCQ DX, R8
  1099. ADCQ $0, R9
  1100. MOVQ (72)(REG_P2), AX
  1101. MULQ R13
  1102. ADDQ AX, R10
  1103. MOVQ R10, (144)(CX) // c6
  1104. ADCQ DX, R8
  1105. ADCQ $0, R9
  1106. XORQ R10, R10
  1107. MOVQ (88)(REG_P2), AX
  1108. MULQ R14
  1109. ADDQ AX, R8
  1110. ADCQ DX, R9
  1111. ADCQ $0, R10
  1112. MOVQ (64)(REG_P2), AX
  1113. MULQ R11
  1114. ADDQ AX, R8
  1115. ADCQ DX, R9
  1116. ADCQ $0, R10
  1117. MOVQ (80)(REG_P2), AX
  1118. MULQ R13
  1119. ADDQ AX, R8
  1120. ADCQ DX, R9
  1121. ADCQ $0, R10
  1122. MOVQ (72)(REG_P2), AX
  1123. MULQ R15
  1124. ADDQ AX, R8
  1125. MOVQ R8, (152)(CX) // c7
  1126. ADCQ DX, R9
  1127. ADCQ $0, R10
  1128. XORQ R8, R8
  1129. MOVQ (72)(REG_P2), AX
  1130. MULQ R11
  1131. ADDQ AX, R9
  1132. ADCQ DX, R10
  1133. ADCQ $0, R8
  1134. MOVQ (80)(REG_P2), AX
  1135. MULQ R15
  1136. ADDQ AX, R9
  1137. ADCQ DX, R10
  1138. ADCQ $0, R8
  1139. MOVQ (88)(REG_P2), AX
  1140. MULQ R13
  1141. ADDQ AX, R9
  1142. MOVQ R9, (160)(CX) // c8
  1143. ADCQ DX, R10
  1144. ADCQ $0, R8
  1145. MOVQ (88)(REG_P2), AX
  1146. MULQ R15
  1147. ADDQ AX, R10
  1148. ADCQ DX, R8
  1149. MOVQ (80)(REG_P2), AX
  1150. MULQ R11
  1151. ADDQ AX, R10
  1152. MOVQ R10, (168)(CX) // c9
  1153. ADCQ DX, R8
  1154. MOVQ (88)(REG_P2), AX
  1155. MULQ R11
  1156. ADDQ AX, R8
  1157. MOVQ R8, (176)(CX) // c10
  1158. ADCQ $0, DX
  1159. MOVQ DX, (184)(CX) // c11
  1160. // [R8-R15,AX,DX,DI,(SP)] <- (AH+AL)*(BH+BL)-AL*BL
  1161. MOVQ (SP), R8
  1162. SUBQ (CX), R8
  1163. MOVQ (8)(SP), R9
  1164. SBBQ (8)(CX), R9
  1165. MOVQ (16)(SP), R10
  1166. SBBQ (16)(CX), R10
  1167. MOVQ (24)(SP), R11
  1168. SBBQ (24)(CX), R11
  1169. MOVQ (32)(SP), R12
  1170. SBBQ (32)(CX), R12
  1171. MOVQ (40)(SP), R13
  1172. SBBQ (40)(CX), R13
  1173. MOVQ (48)(SP), R14
  1174. SBBQ (48)(CX), R14
  1175. MOVQ (56)(SP), R15
  1176. SBBQ (56)(CX), R15
  1177. MOVQ (64)(SP), AX
  1178. SBBQ (64)(CX), AX
  1179. MOVQ (72)(SP), DX
  1180. SBBQ (72)(CX), DX
  1181. MOVQ (80)(SP), DI
  1182. SBBQ (80)(CX), DI
  1183. MOVQ (88)(SP), SI
  1184. SBBQ (88)(CX), SI
  1185. MOVQ SI, (SP)
  1186. // [R8-R15,AX,DX,DI,(SP)] <- (AH+AL)*(BH+BL) - AL*BL - AH*BH
  1187. MOVQ (96)(CX), SI
  1188. SUBQ SI, R8
  1189. MOVQ (104)(CX), SI
  1190. SBBQ SI, R9
  1191. MOVQ (112)(CX), SI
  1192. SBBQ SI, R10
  1193. MOVQ (120)(CX), SI
  1194. SBBQ SI, R11
  1195. MOVQ (128)(CX), SI
  1196. SBBQ SI, R12
  1197. MOVQ (136)(CX), SI
  1198. SBBQ SI, R13
  1199. MOVQ (144)(CX), SI
  1200. SBBQ SI, R14
  1201. MOVQ (152)(CX), SI
  1202. SBBQ SI, R15
  1203. MOVQ (160)(CX), SI
  1204. SBBQ SI, AX
  1205. MOVQ (168)(CX), SI
  1206. SBBQ SI, DX
  1207. MOVQ (176)(CX), SI
  1208. SBBQ SI, DI
  1209. MOVQ (SP), SI
  1210. SBBQ (184)(CX), SI
  1211. // FINAL RESULT
  1212. ADDQ (48)(CX), R8
  1213. MOVQ R8, (48)(CX)
  1214. ADCQ (56)(CX), R9
  1215. MOVQ R9, (56)(CX)
  1216. ADCQ (64)(CX), R10
  1217. MOVQ R10, (64)(CX)
  1218. ADCQ (72)(CX), R11
  1219. MOVQ R11, (72)(CX)
  1220. ADCQ (80)(CX), R12
  1221. MOVQ R12, (80)(CX)
  1222. ADCQ (88)(CX), R13
  1223. MOVQ R13, (88)(CX)
  1224. ADCQ (96)(CX), R14
  1225. MOVQ R14, (96)(CX)
  1226. ADCQ (104)(CX), R15
  1227. MOVQ R15, (104)(CX)
  1228. ADCQ (112)(CX), AX
  1229. MOVQ AX, (112)(CX)
  1230. ADCQ (120)(CX), DX
  1231. MOVQ DX, (120)(CX)
  1232. ADCQ (128)(CX), DI
  1233. MOVQ DI, (128)(CX)
  1234. ADCQ (136)(CX), SI
  1235. MOVQ SI, (136)(CX)
  1236. MOVQ (144)(CX), AX
  1237. ADCQ $0, AX
  1238. MOVQ AX, (144)(CX)
  1239. MOVQ (152)(CX), AX
  1240. ADCQ $0, AX
  1241. MOVQ AX, (152)(CX)
  1242. MOVQ (160)(CX), AX
  1243. ADCQ $0, AX
  1244. MOVQ AX, (160)(CX)
  1245. MOVQ (168)(CX), AX
  1246. ADCQ $0, AX
  1247. MOVQ AX, (168)(CX)
  1248. MOVQ (176)(CX), AX
  1249. ADCQ $0, AX
  1250. MOVQ AX, (176)(CX)
  1251. MOVQ (184)(CX), AX
  1252. ADCQ $0, AX
  1253. MOVQ AX, (184)(CX)
  1254. RET
  1255. TEXT ·fp751MontgomeryReduce(SB), $0-16
  1256. MOVQ z+0(FP), REG_P2
  1257. MOVQ x+8(FP), REG_P1
  1258. MOVQ (REG_P1), R11
  1259. MOVQ P751P1_5, AX
  1260. MULQ R11
  1261. XORQ R8, R8
  1262. ADDQ (40)(REG_P1), AX
  1263. MOVQ AX, (40)(REG_P2) // Z5
  1264. ADCQ DX, R8
  1265. XORQ R9, R9
  1266. MOVQ P751P1_6, AX
  1267. MULQ R11
  1268. XORQ R10, R10
  1269. ADDQ AX, R8
  1270. ADCQ DX, R9
  1271. MOVQ (8)(REG_P1), R12
  1272. MOVQ P751P1_5, AX
  1273. MULQ R12
  1274. ADDQ AX, R8
  1275. ADCQ DX, R9
  1276. ADCQ $0, R10
  1277. ADDQ (48)(REG_P1), R8
  1278. MOVQ R8, (48)(REG_P2) // Z6
  1279. ADCQ $0, R9
  1280. ADCQ $0, R10
  1281. XORQ R8, R8
  1282. MOVQ P751P1_7, AX
  1283. MULQ R11
  1284. ADDQ AX, R9
  1285. ADCQ DX, R10
  1286. ADCQ $0, R8
  1287. MOVQ P751P1_6, AX
  1288. MULQ R12
  1289. ADDQ AX, R9
  1290. ADCQ DX, R10
  1291. ADCQ $0, R8
  1292. MOVQ (16)(REG_P1), R13
  1293. MOVQ P751P1_5, AX
  1294. MULQ R13
  1295. ADDQ AX, R9
  1296. ADCQ DX, R10
  1297. ADCQ $0, R8
  1298. ADDQ (56)(REG_P1), R9
  1299. MOVQ R9, (56)(REG_P2) // Z7
  1300. ADCQ $0, R10
  1301. ADCQ $0, R8
  1302. XORQ R9, R9
  1303. MOVQ P751P1_8, AX
  1304. MULQ R11
  1305. ADDQ AX, R10
  1306. ADCQ DX, R8
  1307. ADCQ $0, R9
  1308. MOVQ P751P1_7, AX
  1309. MULQ R12
  1310. ADDQ AX, R10
  1311. ADCQ DX, R8
  1312. ADCQ $0, R9
  1313. MOVQ P751P1_6, AX
  1314. MULQ R13
  1315. ADDQ AX, R10
  1316. ADCQ DX, R8
  1317. ADCQ $0, R9
  1318. MOVQ (24)(REG_P1), R14
  1319. MOVQ P751P1_5, AX
  1320. MULQ R14
  1321. ADDQ AX, R10
  1322. ADCQ DX, R8
  1323. ADCQ $0, R9
  1324. ADDQ (64)(REG_P1), R10
  1325. MOVQ R10, (64)(REG_P2) // Z8
  1326. ADCQ $0, R8
  1327. ADCQ $0, R9
  1328. XORQ R10, R10
  1329. MOVQ P751P1_9, AX
  1330. MULQ R11
  1331. ADDQ AX, R8
  1332. ADCQ DX, R9
  1333. ADCQ $0, R10
  1334. MOVQ P751P1_8, AX
  1335. MULQ R12
  1336. ADDQ AX, R8
  1337. ADCQ DX, R9
  1338. ADCQ $0, R10
  1339. MOVQ P751P1_7, AX
  1340. MULQ R13
  1341. ADDQ AX, R8
  1342. ADCQ DX, R9
  1343. ADCQ $0, R10
  1344. MOVQ P751P1_6, AX
  1345. MULQ R14
  1346. ADDQ AX, R8
  1347. ADCQ DX, R9
  1348. ADCQ $0, R10
  1349. MOVQ (32)(REG_P1), R15
  1350. MOVQ P751P1_5, AX
  1351. MULQ R15
  1352. ADDQ AX, R8
  1353. ADCQ DX, R9
  1354. ADCQ $0, R10
  1355. ADDQ (72)(REG_P1), R8
  1356. MOVQ R8, (72)(REG_P2) // Z9
  1357. ADCQ $0, R9
  1358. ADCQ $0, R10
  1359. XORQ R8, R8
  1360. MOVQ P751P1_10, AX
  1361. MULQ R11
  1362. ADDQ AX, R9
  1363. ADCQ DX, R10
  1364. ADCQ $0, R8
  1365. MOVQ P751P1_9, AX
  1366. MULQ R12
  1367. ADDQ AX, R9
  1368. ADCQ DX, R10
  1369. ADCQ $0, R8
  1370. MOVQ P751P1_8, AX
  1371. MULQ R13
  1372. ADDQ AX, R9
  1373. ADCQ DX, R10
  1374. ADCQ $0, R8
  1375. MOVQ P751P1_7, AX
  1376. MULQ R14
  1377. ADDQ AX, R9
  1378. ADCQ DX, R10
  1379. ADCQ $0, R8
  1380. MOVQ P751P1_6, AX
  1381. MULQ R15
  1382. ADDQ AX, R9
  1383. ADCQ DX, R10
  1384. ADCQ $0, R8
  1385. MOVQ (40)(REG_P2), CX
  1386. MOVQ P751P1_5, AX
  1387. MULQ CX
  1388. ADDQ AX, R9
  1389. ADCQ DX, R10
  1390. ADCQ $0, R8
  1391. ADDQ (80)(REG_P1), R9
  1392. MOVQ R9, (80)(REG_P2) // Z10
  1393. ADCQ $0, R10
  1394. ADCQ $0, R8
  1395. XORQ R9, R9
  1396. MOVQ P751P1_11, AX
  1397. MULQ R11
  1398. ADDQ AX, R10
  1399. ADCQ DX, R8
  1400. ADCQ $0, R9
  1401. MOVQ P751P1_10, AX
  1402. MULQ R12
  1403. ADDQ AX, R10
  1404. ADCQ DX, R8
  1405. ADCQ $0, R9
  1406. MOVQ P751P1_9, AX
  1407. MULQ R13
  1408. ADDQ AX, R10
  1409. ADCQ DX, R8
  1410. ADCQ $0, R9
  1411. MOVQ P751P1_8, AX
  1412. MULQ R14
  1413. ADDQ AX, R10
  1414. ADCQ DX, R8
  1415. ADCQ $0, R9
  1416. MOVQ P751P1_7, AX
  1417. MULQ R15
  1418. ADDQ AX, R10
  1419. ADCQ DX, R8
  1420. ADCQ $0, R9
  1421. MOVQ P751P1_6, AX
  1422. MULQ CX
  1423. ADDQ AX, R10
  1424. ADCQ DX, R8
  1425. ADCQ $0, R9
  1426. MOVQ (48)(REG_P2), R11
  1427. MOVQ P751P1_5, AX
  1428. MULQ R11
  1429. ADDQ AX, R10
  1430. ADCQ DX, R8
  1431. ADCQ $0, R9
  1432. ADDQ (88)(REG_P1), R10
  1433. MOVQ R10, (88)(REG_P2) // Z11
  1434. ADCQ $0, R8
  1435. ADCQ $0, R9
  1436. XORQ R10, R10
  1437. MOVQ P751P1_11, AX
  1438. MULQ R12
  1439. ADDQ AX, R8
  1440. ADCQ DX, R9
  1441. ADCQ $0, R10
  1442. MOVQ P751P1_10, AX
  1443. MULQ R13
  1444. ADDQ AX, R8
  1445. ADCQ DX, R9
  1446. ADCQ $0, R10
  1447. MOVQ P751P1_9, AX
  1448. MULQ R14
  1449. ADDQ AX, R8
  1450. ADCQ DX, R9
  1451. ADCQ $0, R10
  1452. MOVQ P751P1_8, AX
  1453. MULQ R15
  1454. ADDQ AX, R8
  1455. ADCQ DX, R9
  1456. ADCQ $0, R10
  1457. MOVQ P751P1_7, AX
  1458. MULQ CX
  1459. ADDQ AX, R8
  1460. ADCQ DX, R9
  1461. ADCQ $0, R10
  1462. MOVQ P751P1_6, AX
  1463. MULQ R11
  1464. ADDQ AX, R8
  1465. ADCQ DX, R9
  1466. ADCQ $0, R10
  1467. MOVQ (56)(REG_P2), R12
  1468. MOVQ P751P1_5, AX
  1469. MULQ R12
  1470. ADDQ AX, R8
  1471. ADCQ DX, R9
  1472. ADCQ $0, R10
  1473. ADDQ (96)(REG_P1), R8
  1474. MOVQ R8, (REG_P2) // Z0
  1475. ADCQ $0, R9
  1476. ADCQ $0, R10
  1477. XORQ R8, R8
  1478. MOVQ P751P1_11, AX
  1479. MULQ R13
  1480. ADDQ AX, R9
  1481. ADCQ DX, R10
  1482. ADCQ $0, R8
  1483. MOVQ P751P1_10, AX
  1484. MULQ R14
  1485. ADDQ AX, R9
  1486. ADCQ DX, R10
  1487. ADCQ $0, R8
  1488. MOVQ P751P1_9, AX
  1489. MULQ R15
  1490. ADDQ AX, R9
  1491. ADCQ DX, R10
  1492. ADCQ $0, R8
  1493. MOVQ P751P1_8, AX
  1494. MULQ CX
  1495. ADDQ AX, R9
  1496. ADCQ DX, R10
  1497. ADCQ $0, R8
  1498. MOVQ P751P1_7, AX
  1499. MULQ R11
  1500. ADDQ AX, R9
  1501. ADCQ DX, R10
  1502. ADCQ $0, R8
  1503. MOVQ P751P1_6, AX
  1504. MULQ R12
  1505. ADDQ AX, R9
  1506. ADCQ DX, R10
  1507. ADCQ $0, R8
  1508. MOVQ (64)(REG_P2), R13
  1509. MOVQ P751P1_5, AX
  1510. MULQ R13
  1511. ADDQ AX, R9
  1512. ADCQ DX, R10
  1513. ADCQ $0, R8
  1514. ADDQ (104)(REG_P1), R9
  1515. MOVQ R9, (8)(REG_P2) // Z1
  1516. ADCQ $0, R10
  1517. ADCQ $0, R8
  1518. XORQ R9, R9
  1519. MOVQ P751P1_11, AX
  1520. MULQ R14
  1521. ADDQ AX, R10
  1522. ADCQ DX, R8
  1523. ADCQ $0, R9
  1524. MOVQ P751P1_10, AX
  1525. MULQ R15
  1526. ADDQ AX, R10
  1527. ADCQ DX, R8
  1528. ADCQ $0, R9
  1529. MOVQ P751P1_9, AX
  1530. MULQ CX
  1531. ADDQ AX, R10
  1532. ADCQ DX, R8
  1533. ADCQ $0, R9
  1534. MOVQ P751P1_8, AX
  1535. MULQ R11
  1536. ADDQ AX, R10
  1537. ADCQ DX, R8
  1538. ADCQ $0, R9
  1539. MOVQ P751P1_7, AX
  1540. MULQ R12
  1541. ADDQ AX, R10
  1542. ADCQ DX, R8
  1543. ADCQ $0, R9
  1544. MOVQ P751P1_6, AX
  1545. MULQ R13
  1546. ADDQ AX, R10
  1547. ADCQ DX, R8
  1548. ADCQ $0, R9
  1549. MOVQ (72)(REG_P2), R14
  1550. MOVQ P751P1_5, AX
  1551. MULQ R14
  1552. ADDQ AX, R10
  1553. ADCQ DX, R8
  1554. ADCQ $0, R9
  1555. ADDQ (112)(REG_P1), R10
  1556. MOVQ R10, (16)(REG_P2) // Z2
  1557. ADCQ $0, R8
  1558. ADCQ $0, R9
  1559. XORQ R10, R10
  1560. MOVQ P751P1_11, AX
  1561. MULQ R15
  1562. ADDQ AX, R8
  1563. ADCQ DX, R9
  1564. ADCQ $0, R10
  1565. MOVQ P751P1_10, AX
  1566. MULQ CX
  1567. ADDQ AX, R8
  1568. ADCQ DX, R9
  1569. ADCQ $0, R10
  1570. MOVQ P751P1_9, AX
  1571. MULQ R11
  1572. ADDQ AX, R8
  1573. ADCQ DX, R9
  1574. ADCQ $0, R10
  1575. MOVQ P751P1_8, AX
  1576. MULQ R12
  1577. ADDQ AX, R8
  1578. ADCQ DX, R9
  1579. ADCQ $0, R10
  1580. MOVQ P751P1_7, AX
  1581. MULQ R13
  1582. ADDQ AX, R8
  1583. ADCQ DX, R9
  1584. ADCQ $0, R10
  1585. MOVQ P751P1_6, AX
  1586. MULQ R14
  1587. ADDQ AX, R8
  1588. ADCQ DX, R9
  1589. ADCQ $0, R10
  1590. MOVQ (80)(REG_P2), R15
  1591. MOVQ P751P1_5, AX
  1592. MULQ R15
  1593. ADDQ AX, R8
  1594. ADCQ DX, R9
  1595. ADCQ $0, R10
  1596. ADDQ (120)(REG_P1), R8
  1597. MOVQ R8, (24)(REG_P2) // Z3
  1598. ADCQ $0, R9
  1599. ADCQ $0, R10
  1600. XORQ R8, R8
  1601. MOVQ P751P1_11, AX
  1602. MULQ CX
  1603. ADDQ AX, R9
  1604. ADCQ DX, R10
  1605. ADCQ $0, R8
  1606. MOVQ P751P1_10, AX
  1607. MULQ R11
  1608. ADDQ AX, R9
  1609. ADCQ DX, R10
  1610. ADCQ $0, R8
  1611. MOVQ P751P1_9, AX
  1612. MULQ R12
  1613. ADDQ AX, R9
  1614. ADCQ DX, R10
  1615. ADCQ $0, R8
  1616. MOVQ P751P1_8, AX
  1617. MULQ R13
  1618. ADDQ AX, R9
  1619. ADCQ DX, R10
  1620. ADCQ $0, R8
  1621. MOVQ P751P1_7, AX
  1622. MULQ R14
  1623. ADDQ AX, R9
  1624. ADCQ DX, R10
  1625. ADCQ $0, R8
  1626. MOVQ P751P1_6, AX
  1627. MULQ R15
  1628. ADDQ AX, R9
  1629. ADCQ DX, R10
  1630. ADCQ $0, R8
  1631. MOVQ (88)(REG_P2), CX
  1632. MOVQ P751P1_5, AX
  1633. MULQ CX
  1634. ADDQ AX, R9
  1635. ADCQ DX, R10
  1636. ADCQ $0, R8
  1637. ADDQ (128)(REG_P1), R9
  1638. MOVQ R9, (32)(REG_P2) // Z4
  1639. ADCQ $0, R10
  1640. ADCQ $0, R8
  1641. XORQ R9, R9
  1642. MOVQ P751P1_11, AX
  1643. MULQ R11
  1644. ADDQ AX, R10
  1645. ADCQ DX, R8
  1646. ADCQ $0, R9
  1647. MOVQ P751P1_10, AX
  1648. MULQ R12
  1649. ADDQ AX, R10
  1650. ADCQ DX, R8
  1651. ADCQ $0, R9
  1652. MOVQ P751P1_9, AX
  1653. MULQ R13
  1654. ADDQ AX, R10
  1655. ADCQ DX, R8
  1656. ADCQ $0, R9
  1657. MOVQ P751P1_8, AX
  1658. MULQ R14
  1659. ADDQ AX, R10
  1660. ADCQ DX, R8
  1661. ADCQ $0, R9
  1662. MOVQ P751P1_7, AX
  1663. MULQ R15
  1664. ADDQ AX, R10
  1665. ADCQ DX, R8
  1666. ADCQ $0, R9
  1667. MOVQ P751P1_6, AX
  1668. MULQ CX
  1669. ADDQ AX, R10
  1670. ADCQ DX, R8
  1671. ADCQ $0, R9
  1672. ADDQ (136)(REG_P1), R10
  1673. MOVQ R10, (40)(REG_P2) // Z5
  1674. ADCQ $0, R8
  1675. ADCQ $0, R9
  1676. XORQ R10, R10
  1677. MOVQ P751P1_11, AX
  1678. MULQ R12
  1679. ADDQ AX, R8
  1680. ADCQ DX, R9
  1681. ADCQ $0, R10
  1682. MOVQ P751P1_10, AX
  1683. MULQ R13
  1684. ADDQ AX, R8
  1685. ADCQ DX, R9
  1686. ADCQ $0, R10
  1687. MOVQ P751P1_9, AX
  1688. MULQ R14
  1689. ADDQ AX, R8
  1690. ADCQ DX, R9
  1691. ADCQ $0, R10
  1692. MOVQ P751P1_8, AX
  1693. MULQ R15
  1694. ADDQ AX, R8
  1695. ADCQ DX, R9
  1696. ADCQ $0, R10
  1697. MOVQ P751P1_7, AX
  1698. MULQ CX
  1699. ADDQ AX, R8
  1700. ADCQ DX, R9
  1701. ADCQ $0, R10
  1702. ADDQ (144)(REG_P1), R8
  1703. MOVQ R8, (48)(REG_P2) // Z6
  1704. ADCQ $0, R9
  1705. ADCQ $0, R10
  1706. XORQ R8, R8
  1707. MOVQ P751P1_11, AX
  1708. MULQ R13
  1709. ADDQ AX, R9
  1710. ADCQ DX, R10
  1711. ADCQ $0, R8
  1712. MOVQ P751P1_10, AX
  1713. MULQ R14
  1714. ADDQ AX, R9
  1715. ADCQ DX, R10
  1716. ADCQ $0, R8
  1717. MOVQ P751P1_9, AX
  1718. MULQ R15
  1719. ADDQ AX, R9
  1720. ADCQ DX, R10
  1721. ADCQ $0, R8
  1722. MOVQ P751P1_8, AX
  1723. MULQ CX
  1724. ADDQ AX, R9
  1725. ADCQ DX, R10
  1726. ADCQ $0, R8
  1727. ADDQ (152)(REG_P1), R9
  1728. MOVQ R9, (56)(REG_P2) // Z7
  1729. ADCQ $0, R10
  1730. ADCQ $0, R8
  1731. XORQ R9, R9
  1732. MOVQ P751P1_11, AX
  1733. MULQ R14
  1734. ADDQ AX, R10
  1735. ADCQ DX, R8
  1736. ADCQ $0, R9
  1737. MOVQ P751P1_10, AX
  1738. MULQ R15
  1739. ADDQ AX, R10
  1740. ADCQ DX, R8
  1741. ADCQ $0, R9
  1742. MOVQ P751P1_9, AX
  1743. MULQ CX
  1744. ADDQ AX, R10
  1745. ADCQ DX, R8
  1746. ADCQ $0, R9
  1747. ADDQ (160)(REG_P1), R10
  1748. MOVQ R10, (64)(REG_P2) // Z8
  1749. ADCQ $0, R8
  1750. ADCQ $0, R9
  1751. XORQ R10, R10
  1752. MOVQ P751P1_11, AX
  1753. MULQ R15
  1754. ADDQ AX, R8
  1755. ADCQ DX, R9
  1756. ADCQ $0, R10
  1757. MOVQ P751P1_10, AX
  1758. MULQ CX
  1759. ADDQ AX, R8
  1760. ADCQ DX, R9
  1761. ADCQ $0, R10
  1762. ADDQ (168)(REG_P1), R8 // Z9
  1763. MOVQ R8, (72)(REG_P2) // Z9
  1764. ADCQ $0, R9
  1765. ADCQ $0, R10
  1766. MOVQ P751P1_11, AX
  1767. MULQ CX
  1768. ADDQ AX, R9
  1769. ADCQ DX, R10
  1770. ADDQ (176)(REG_P1), R9 // Z10
  1771. MOVQ R9, (80)(REG_P2) // Z10
  1772. ADCQ $0, R10
  1773. ADDQ (184)(REG_P1), R10 // Z11
  1774. MOVQ R10, (88)(REG_P2) // Z11
  1775. RET
  1776. TEXT ·fp751AddLazy(SB), NOSPLIT, $0-24
  1777. MOVQ z+0(FP), REG_P3
  1778. MOVQ x+8(FP), REG_P1
  1779. MOVQ y+16(FP), REG_P2
  1780. MOVQ (REG_P1), R8
  1781. MOVQ (8)(REG_P1), R9
  1782. MOVQ (16)(REG_P1), R10
  1783. MOVQ (24)(REG_P1), R11
  1784. MOVQ (32)(REG_P1), R12
  1785. MOVQ (40)(REG_P1), R13
  1786. MOVQ (48)(REG_P1), R14
  1787. MOVQ (56)(REG_P1), R15
  1788. MOVQ (64)(REG_P1), AX
  1789. MOVQ (72)(REG_P1), BX
  1790. MOVQ (80)(REG_P1), CX
  1791. MOVQ (88)(REG_P1), DI
  1792. ADDQ (REG_P2), R8
  1793. ADCQ (8)(REG_P2), R9
  1794. ADCQ (16)(REG_P2), R10
  1795. ADCQ (24)(REG_P2), R11
  1796. ADCQ (32)(REG_P2), R12
  1797. ADCQ (40)(REG_P2), R13
  1798. ADCQ (48)(REG_P2), R14
  1799. ADCQ (56)(REG_P2), R15
  1800. ADCQ (64)(REG_P2), AX
  1801. ADCQ (72)(REG_P2), BX
  1802. ADCQ (80)(REG_P2), CX
  1803. ADCQ (88)(REG_P2), DI
  1804. MOVQ R8, (REG_P3)
  1805. MOVQ R9, (8)(REG_P3)
  1806. MOVQ R10, (16)(REG_P3)
  1807. MOVQ R11, (24)(REG_P3)
  1808. MOVQ R12, (32)(REG_P3)
  1809. MOVQ R13, (40)(REG_P3)
  1810. MOVQ R14, (48)(REG_P3)
  1811. MOVQ R15, (56)(REG_P3)
  1812. MOVQ AX, (64)(REG_P3)
  1813. MOVQ BX, (72)(REG_P3)
  1814. MOVQ CX, (80)(REG_P3)
  1815. MOVQ DI, (88)(REG_P3)
  1816. RET
  1817. TEXT ·fp751X2AddLazy(SB), NOSPLIT, $0-24
  1818. MOVQ z+0(FP), REG_P3
  1819. MOVQ x+8(FP), REG_P1
  1820. MOVQ y+16(FP), REG_P2
  1821. MOVQ (REG_P1), R8
  1822. MOVQ (8)(REG_P1), R9
  1823. MOVQ (16)(REG_P1), R10
  1824. MOVQ (24)(REG_P1), R11
  1825. MOVQ (32)(REG_P1), R12
  1826. MOVQ (40)(REG_P1), R13
  1827. MOVQ (48)(REG_P1), R14
  1828. MOVQ (56)(REG_P1), R15
  1829. MOVQ (64)(REG_P1), AX
  1830. MOVQ (72)(REG_P1), BX
  1831. MOVQ (80)(REG_P1), CX
  1832. ADDQ (REG_P2), R8
  1833. ADCQ (8)(REG_P2), R9
  1834. ADCQ (16)(REG_P2), R10
  1835. ADCQ (24)(REG_P2), R11
  1836. ADCQ (32)(REG_P2), R12
  1837. ADCQ (40)(REG_P2), R13
  1838. ADCQ (48)(REG_P2), R14
  1839. ADCQ (56)(REG_P2), R15
  1840. ADCQ (64)(REG_P2), AX
  1841. ADCQ (72)(REG_P2), BX
  1842. ADCQ (80)(REG_P2), CX
  1843. MOVQ R8, (REG_P3)
  1844. MOVQ R9, (8)(REG_P3)
  1845. MOVQ R10, (16)(REG_P3)
  1846. MOVQ R11, (24)(REG_P3)
  1847. MOVQ R12, (32)(REG_P3)
  1848. MOVQ R13, (40)(REG_P3)
  1849. MOVQ R14, (48)(REG_P3)
  1850. MOVQ R15, (56)(REG_P3)
  1851. MOVQ AX, (64)(REG_P3)
  1852. MOVQ BX, (72)(REG_P3)
  1853. MOVQ CX, (80)(REG_P3)
  1854. MOVQ (88)(REG_P1), AX
  1855. ADCQ (88)(REG_P2), AX
  1856. MOVQ AX, (88)(REG_P3)
  1857. MOVQ (96)(REG_P1), R8
  1858. MOVQ (104)(REG_P1), R9
  1859. MOVQ (112)(REG_P1), R10
  1860. MOVQ (120)(REG_P1), R11
  1861. MOVQ (128)(REG_P1), R12
  1862. MOVQ (136)(REG_P1), R13
  1863. MOVQ (144)(REG_P1), R14
  1864. MOVQ (152)(REG_P1), R15
  1865. MOVQ (160)(REG_P1), AX
  1866. MOVQ (168)(REG_P1), BX
  1867. MOVQ (176)(REG_P1), CX
  1868. MOVQ (184)(REG_P1), DI
  1869. ADCQ (96)(REG_P2), R8
  1870. ADCQ (104)(REG_P2), R9
  1871. ADCQ (112)(REG_P2), R10
  1872. ADCQ (120)(REG_P2), R11
  1873. ADCQ (128)(REG_P2), R12
  1874. ADCQ (136)(REG_P2), R13
  1875. ADCQ (144)(REG_P2), R14
  1876. ADCQ (152)(REG_P2), R15
  1877. ADCQ (160)(REG_P2), AX
  1878. ADCQ (168)(REG_P2), BX
  1879. ADCQ (176)(REG_P2), CX
  1880. ADCQ (184)(REG_P2), DI
  1881. MOVQ R8, (96)(REG_P3)
  1882. MOVQ R9, (104)(REG_P3)
  1883. MOVQ R10, (112)(REG_P3)
  1884. MOVQ R11, (120)(REG_P3)
  1885. MOVQ R12, (128)(REG_P3)
  1886. MOVQ R13, (136)(REG_P3)
  1887. MOVQ R14, (144)(REG_P3)
  1888. MOVQ R15, (152)(REG_P3)
  1889. MOVQ AX, (160)(REG_P3)
  1890. MOVQ BX, (168)(REG_P3)
  1891. MOVQ CX, (176)(REG_P3)
  1892. MOVQ DI, (184)(REG_P3)
  1893. RET
  1894. TEXT ·fp751X2SubLazy(SB), NOSPLIT, $0-24
  1895. MOVQ z+0(FP), REG_P3
  1896. MOVQ x+8(FP), REG_P1
  1897. MOVQ y+16(FP), REG_P2
  1898. MOVQ (REG_P1), R8
  1899. MOVQ (8)(REG_P1), R9
  1900. MOVQ (16)(REG_P1), R10
  1901. MOVQ (24)(REG_P1), R11
  1902. MOVQ (32)(REG_P1), R12
  1903. MOVQ (40)(REG_P1), R13
  1904. MOVQ (48)(REG_P1), R14
  1905. MOVQ (56)(REG_P1), R15
  1906. MOVQ (64)(REG_P1), AX
  1907. MOVQ (72)(REG_P1), BX
  1908. MOVQ (80)(REG_P1), CX
  1909. SUBQ (REG_P2), R8
  1910. SBBQ (8)(REG_P2), R9
  1911. SBBQ (16)(REG_P2), R10
  1912. SBBQ (24)(REG_P2), R11
  1913. SBBQ (32)(REG_P2), R12
  1914. SBBQ (40)(REG_P2), R13
  1915. SBBQ (48)(REG_P2), R14
  1916. SBBQ (56)(REG_P2), R15
  1917. SBBQ (64)(REG_P2), AX
  1918. SBBQ (72)(REG_P2), BX
  1919. SBBQ (80)(REG_P2), CX
  1920. MOVQ R8, (REG_P3)
  1921. MOVQ R9, (8)(REG_P3)
  1922. MOVQ R10, (16)(REG_P3)
  1923. MOVQ R11, (24)(REG_P3)
  1924. MOVQ R12, (32)(REG_P3)
  1925. MOVQ R13, (40)(REG_P3)
  1926. MOVQ R14, (48)(REG_P3)
  1927. MOVQ R15, (56)(REG_P3)
  1928. MOVQ AX, (64)(REG_P3)
  1929. MOVQ BX, (72)(REG_P3)
  1930. MOVQ CX, (80)(REG_P3)
  1931. MOVQ (88)(REG_P1), AX
  1932. SBBQ (88)(REG_P2), AX
  1933. MOVQ AX, (88)(REG_P3)
  1934. MOVQ (96)(REG_P1), R8
  1935. MOVQ (104)(REG_P1), R9
  1936. MOVQ (112)(REG_P1), R10
  1937. MOVQ (120)(REG_P1), R11
  1938. MOVQ (128)(REG_P1), R12
  1939. MOVQ (136)(REG_P1), R13
  1940. MOVQ (144)(REG_P1), R14
  1941. MOVQ (152)(REG_P1), R15
  1942. MOVQ (160)(REG_P1), AX
  1943. MOVQ (168)(REG_P1), BX
  1944. MOVQ (176)(REG_P1), CX
  1945. MOVQ (184)(REG_P1), DI
  1946. SBBQ (96)(REG_P2), R8
  1947. SBBQ (104)(REG_P2), R9
  1948. SBBQ (112)(REG_P2), R10
  1949. SBBQ (120)(REG_P2), R11
  1950. SBBQ (128)(REG_P2), R12
  1951. SBBQ (136)(REG_P2), R13
  1952. SBBQ (144)(REG_P2), R14
  1953. SBBQ (152)(REG_P2), R15
  1954. SBBQ (160)(REG_P2), AX
  1955. SBBQ (168)(REG_P2), BX
  1956. SBBQ (176)(REG_P2), CX
  1957. SBBQ (184)(REG_P2), DI
  1958. MOVQ R8, (96)(REG_P3)
  1959. MOVQ R9, (104)(REG_P3)
  1960. MOVQ R10, (112)(REG_P3)
  1961. MOVQ R11, (120)(REG_P3)
  1962. MOVQ R12, (128)(REG_P3)
  1963. MOVQ R13, (136)(REG_P3)
  1964. MOVQ R14, (144)(REG_P3)
  1965. MOVQ R15, (152)(REG_P3)
  1966. MOVQ AX, (160)(REG_P3)
  1967. MOVQ BX, (168)(REG_P3)
  1968. MOVQ CX, (176)(REG_P3)
  1969. MOVQ DI, (184)(REG_P3)
  1970. // Now the carry flag is 1 if x-y < 0. If so, add p*2^768.
  1971. ZERO_AX_WITHOUT_CLOBBERING_FLAGS
  1972. SBBQ $0, AX
  1973. // Load p into registers:
  1974. MOVQ P751_0, R8
  1975. // P751_{1,2,3,4} = P751_0, so reuse R8
  1976. MOVQ P751_5, R9
  1977. MOVQ P751_6, R10
  1978. MOVQ P751_7, R11
  1979. MOVQ P751_8, R12
  1980. MOVQ P751_9, R13
  1981. MOVQ P751_10, R14
  1982. MOVQ P751_11, R15
  1983. ANDQ AX, R8
  1984. ANDQ AX, R9
  1985. ANDQ AX, R10
  1986. ANDQ AX, R11
  1987. ANDQ AX, R12
  1988. ANDQ AX, R13
  1989. ANDQ AX, R14
  1990. ANDQ AX, R15
  1991. ADDQ R8, (96 )(REG_P3)
  1992. ADCQ R8, (96+ 8)(REG_P3)
  1993. ADCQ R8, (96+16)(REG_P3)
  1994. ADCQ R8, (96+24)(REG_P3)
  1995. ADCQ R8, (96+32)(REG_P3)
  1996. ADCQ R9, (96+40)(REG_P3)
  1997. ADCQ R10, (96+48)(REG_P3)
  1998. ADCQ R11, (96+56)(REG_P3)
  1999. ADCQ R12, (96+64)(REG_P3)
  2000. ADCQ R13, (96+72)(REG_P3)
  2001. ADCQ R14, (96+80)(REG_P3)
  2002. ADCQ R15, (96+88)(REG_P3)
  2003. RET