Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.
 
 
 
 
 
 

708 linhas
19 KiB

  1. /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  2. * All rights reserved.
  3. *
  4. * This package is an SSL implementation written
  5. * by Eric Young (eay@cryptsoft.com).
  6. * The implementation was written so as to conform with Netscapes SSL.
  7. *
  8. * This library is free for commercial and non-commercial use as long as
  9. * the following conditions are aheared to. The following conditions
  10. * apply to all code found in this distribution, be it the RC4, RSA,
  11. * lhash, DES, etc., code; not just the SSL code. The SSL documentation
  12. * included with this distribution is covered by the same copyright terms
  13. * except that the holder is Tim Hudson (tjh@cryptsoft.com).
  14. *
  15. * Copyright remains Eric Young's, and as such any Copyright notices in
  16. * the code are not to be removed.
  17. * If this package is used in a product, Eric Young should be given attribution
  18. * as the author of the parts of the library used.
  19. * This can be in the form of a textual message at program startup or
  20. * in documentation (online or textual) provided with the package.
  21. *
  22. * Redistribution and use in source and binary forms, with or without
  23. * modification, are permitted provided that the following conditions
  24. * are met:
  25. * 1. Redistributions of source code must retain the copyright
  26. * notice, this list of conditions and the following disclaimer.
  27. * 2. Redistributions in binary form must reproduce the above copyright
  28. * notice, this list of conditions and the following disclaimer in the
  29. * documentation and/or other materials provided with the distribution.
  30. * 3. All advertising materials mentioning features or use of this software
  31. * must display the following acknowledgement:
  32. * "This product includes cryptographic software written by
  33. * Eric Young (eay@cryptsoft.com)"
  34. * The word 'cryptographic' can be left out if the rouines from the library
  35. * being used are not cryptographic related :-).
  36. * 4. If you include any Windows specific code (or a derivative thereof) from
  37. * the apps directory (application code) you must include an acknowledgement:
  38. * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
  39. *
  40. * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  41. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  42. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  43. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  44. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  45. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  46. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  47. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  48. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  49. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  50. * SUCH DAMAGE.
  51. *
  52. * The licence and distribution terms for any publically available version or
  53. * derivative of this code cannot be changed. i.e. this code cannot simply be
  54. * copied and put under another distribution licence
  55. * [including the GNU Public Licence.] */
  56. #include <openssl/bn.h>
  57. #include <assert.h>
  58. #include "internal.h"
  59. /* This file has two other implementations: x86 assembly language in
  60. * asm/bn-586.pl and x86_64 inline assembly in asm/x86_64-gcc.c. */
  61. #if defined(OPENSSL_NO_ASM) || \
  62. !(defined(OPENSSL_X86) || (defined(OPENSSL_X86_64) && defined(__GNUC__)))
  63. #ifdef BN_ULLONG
  64. #define mul_add(r, a, w, c) \
  65. do { \
  66. BN_ULLONG t; \
  67. t = (BN_ULLONG)(w) * (a) + (r) + (c); \
  68. (r) = Lw(t); \
  69. (c) = Hw(t); \
  70. } while (0)
  71. #define mul(r, a, w, c) \
  72. do { \
  73. BN_ULLONG t; \
  74. t = (BN_ULLONG)(w) * (a) + (c); \
  75. (r) = Lw(t); \
  76. (c) = Hw(t); \
  77. } while (0)
  78. #define sqr(r0, r1, a) \
  79. do { \
  80. BN_ULLONG t; \
  81. t = (BN_ULLONG)(a) * (a); \
  82. (r0) = Lw(t); \
  83. (r1) = Hw(t); \
  84. } while (0)
  85. #else
  86. #define mul_add(r, a, w, c) \
  87. do { \
  88. BN_ULONG high, low, ret, tmp = (a); \
  89. ret = (r); \
  90. BN_UMULT_LOHI(low, high, w, tmp); \
  91. ret += (c); \
  92. (c) = (ret < (c)) ? 1 : 0; \
  93. (c) += high; \
  94. ret += low; \
  95. (c) += (ret < low) ? 1 : 0; \
  96. (r) = ret; \
  97. } while (0)
  98. #define mul(r, a, w, c) \
  99. do { \
  100. BN_ULONG high, low, ret, ta = (a); \
  101. BN_UMULT_LOHI(low, high, w, ta); \
  102. ret = low + (c); \
  103. (c) = high; \
  104. (c) += (ret < low) ? 1 : 0; \
  105. (r) = ret; \
  106. } while (0)
  107. #define sqr(r0, r1, a) \
  108. do { \
  109. BN_ULONG tmp = (a); \
  110. BN_UMULT_LOHI(r0, r1, tmp, tmp); \
  111. } while (0)
  112. #endif /* !BN_ULLONG */
  113. BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
  114. BN_ULONG w) {
  115. BN_ULONG c1 = 0;
  116. assert(num >= 0);
  117. if (num <= 0) {
  118. return c1;
  119. }
  120. while (num & ~3) {
  121. mul_add(rp[0], ap[0], w, c1);
  122. mul_add(rp[1], ap[1], w, c1);
  123. mul_add(rp[2], ap[2], w, c1);
  124. mul_add(rp[3], ap[3], w, c1);
  125. ap += 4;
  126. rp += 4;
  127. num -= 4;
  128. }
  129. while (num) {
  130. mul_add(rp[0], ap[0], w, c1);
  131. ap++;
  132. rp++;
  133. num--;
  134. }
  135. return c1;
  136. }
  137. BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) {
  138. BN_ULONG c1 = 0;
  139. assert(num >= 0);
  140. if (num <= 0) {
  141. return c1;
  142. }
  143. while (num & ~3) {
  144. mul(rp[0], ap[0], w, c1);
  145. mul(rp[1], ap[1], w, c1);
  146. mul(rp[2], ap[2], w, c1);
  147. mul(rp[3], ap[3], w, c1);
  148. ap += 4;
  149. rp += 4;
  150. num -= 4;
  151. }
  152. while (num) {
  153. mul(rp[0], ap[0], w, c1);
  154. ap++;
  155. rp++;
  156. num--;
  157. }
  158. return c1;
  159. }
  160. void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) {
  161. assert(n >= 0);
  162. if (n <= 0) {
  163. return;
  164. }
  165. while (n & ~3) {
  166. sqr(r[0], r[1], a[0]);
  167. sqr(r[2], r[3], a[1]);
  168. sqr(r[4], r[5], a[2]);
  169. sqr(r[6], r[7], a[3]);
  170. a += 4;
  171. r += 8;
  172. n -= 4;
  173. }
  174. while (n) {
  175. sqr(r[0], r[1], a[0]);
  176. a++;
  177. r += 2;
  178. n--;
  179. }
  180. }
  181. #ifdef BN_ULLONG
  182. BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
  183. int n) {
  184. BN_ULLONG ll = 0;
  185. assert(n >= 0);
  186. if (n <= 0) {
  187. return (BN_ULONG)0;
  188. }
  189. while (n & ~3) {
  190. ll += (BN_ULLONG)a[0] + b[0];
  191. r[0] = (BN_ULONG)ll & BN_MASK2;
  192. ll >>= BN_BITS2;
  193. ll += (BN_ULLONG)a[1] + b[1];
  194. r[1] = (BN_ULONG)ll & BN_MASK2;
  195. ll >>= BN_BITS2;
  196. ll += (BN_ULLONG)a[2] + b[2];
  197. r[2] = (BN_ULONG)ll & BN_MASK2;
  198. ll >>= BN_BITS2;
  199. ll += (BN_ULLONG)a[3] + b[3];
  200. r[3] = (BN_ULONG)ll & BN_MASK2;
  201. ll >>= BN_BITS2;
  202. a += 4;
  203. b += 4;
  204. r += 4;
  205. n -= 4;
  206. }
  207. while (n) {
  208. ll += (BN_ULLONG)a[0] + b[0];
  209. r[0] = (BN_ULONG)ll & BN_MASK2;
  210. ll >>= BN_BITS2;
  211. a++;
  212. b++;
  213. r++;
  214. n--;
  215. }
  216. return (BN_ULONG)ll;
  217. }
  218. #else /* !BN_ULLONG */
  219. BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
  220. int n) {
  221. BN_ULONG c, l, t;
  222. assert(n >= 0);
  223. if (n <= 0) {
  224. return (BN_ULONG)0;
  225. }
  226. c = 0;
  227. while (n & ~3) {
  228. t = a[0];
  229. t = (t + c) & BN_MASK2;
  230. c = (t < c);
  231. l = (t + b[0]) & BN_MASK2;
  232. c += (l < t);
  233. r[0] = l;
  234. t = a[1];
  235. t = (t + c) & BN_MASK2;
  236. c = (t < c);
  237. l = (t + b[1]) & BN_MASK2;
  238. c += (l < t);
  239. r[1] = l;
  240. t = a[2];
  241. t = (t + c) & BN_MASK2;
  242. c = (t < c);
  243. l = (t + b[2]) & BN_MASK2;
  244. c += (l < t);
  245. r[2] = l;
  246. t = a[3];
  247. t = (t + c) & BN_MASK2;
  248. c = (t < c);
  249. l = (t + b[3]) & BN_MASK2;
  250. c += (l < t);
  251. r[3] = l;
  252. a += 4;
  253. b += 4;
  254. r += 4;
  255. n -= 4;
  256. }
  257. while (n) {
  258. t = a[0];
  259. t = (t + c) & BN_MASK2;
  260. c = (t < c);
  261. l = (t + b[0]) & BN_MASK2;
  262. c += (l < t);
  263. r[0] = l;
  264. a++;
  265. b++;
  266. r++;
  267. n--;
  268. }
  269. return (BN_ULONG)c;
  270. }
  271. #endif /* !BN_ULLONG */
  272. BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
  273. int n) {
  274. BN_ULONG t1, t2;
  275. int c = 0;
  276. assert(n >= 0);
  277. if (n <= 0) {
  278. return (BN_ULONG)0;
  279. }
  280. while (n & ~3) {
  281. t1 = a[0];
  282. t2 = b[0];
  283. r[0] = (t1 - t2 - c) & BN_MASK2;
  284. if (t1 != t2) {
  285. c = (t1 < t2);
  286. }
  287. t1 = a[1];
  288. t2 = b[1];
  289. r[1] = (t1 - t2 - c) & BN_MASK2;
  290. if (t1 != t2) {
  291. c = (t1 < t2);
  292. }
  293. t1 = a[2];
  294. t2 = b[2];
  295. r[2] = (t1 - t2 - c) & BN_MASK2;
  296. if (t1 != t2) {
  297. c = (t1 < t2);
  298. }
  299. t1 = a[3];
  300. t2 = b[3];
  301. r[3] = (t1 - t2 - c) & BN_MASK2;
  302. if (t1 != t2) {
  303. c = (t1 < t2);
  304. }
  305. a += 4;
  306. b += 4;
  307. r += 4;
  308. n -= 4;
  309. }
  310. while (n) {
  311. t1 = a[0];
  312. t2 = b[0];
  313. r[0] = (t1 - t2 - c) & BN_MASK2;
  314. if (t1 != t2) {
  315. c = (t1 < t2);
  316. }
  317. a++;
  318. b++;
  319. r++;
  320. n--;
  321. }
  322. return c;
  323. }
  324. /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
  325. /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
  326. /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
  327. /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
  328. #ifdef BN_ULLONG
  329. /* Keep in mind that additions to multiplication result can not overflow,
  330. * because its high half cannot be all-ones. */
  331. #define mul_add_c(a, b, c0, c1, c2) \
  332. do { \
  333. BN_ULONG hi; \
  334. BN_ULLONG t = (BN_ULLONG)(a) * (b); \
  335. t += (c0); /* no carry */ \
  336. (c0) = (BN_ULONG)Lw(t); \
  337. hi = (BN_ULONG)Hw(t); \
  338. (c1) = ((c1) + (hi)) & BN_MASK2; \
  339. if ((c1) < hi) { \
  340. (c2)++; \
  341. } \
  342. } while (0)
  343. #define mul_add_c2(a, b, c0, c1, c2) \
  344. do { \
  345. BN_ULONG hi; \
  346. BN_ULLONG t = (BN_ULLONG)(a) * (b); \
  347. BN_ULLONG tt = t + (c0); /* no carry */ \
  348. (c0) = (BN_ULONG)Lw(tt); \
  349. hi = (BN_ULONG)Hw(tt); \
  350. (c1) = ((c1) + hi) & BN_MASK2; \
  351. if ((c1) < hi) { \
  352. (c2)++; \
  353. } \
  354. t += (c0); /* no carry */ \
  355. (c0) = (BN_ULONG)Lw(t); \
  356. hi = (BN_ULONG)Hw(t); \
  357. (c1) = ((c1) + hi) & BN_MASK2; \
  358. if ((c1) < hi) { \
  359. (c2)++; \
  360. } \
  361. } while (0)
  362. #define sqr_add_c(a, i, c0, c1, c2) \
  363. do { \
  364. BN_ULONG hi; \
  365. BN_ULLONG t = (BN_ULLONG)(a)[i] * (a)[i]; \
  366. t += (c0); /* no carry */ \
  367. (c0) = (BN_ULONG)Lw(t); \
  368. hi = (BN_ULONG)Hw(t); \
  369. (c1) = ((c1) + hi) & BN_MASK2; \
  370. if ((c1) < hi) { \
  371. (c2)++; \
  372. } \
  373. } while (0)
  374. #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
  375. #else
  376. /* Keep in mind that additions to hi can not overflow, because the high word of
  377. * a multiplication result cannot be all-ones. */
  378. #define mul_add_c(a, b, c0, c1, c2) \
  379. do { \
  380. BN_ULONG ta = (a), tb = (b); \
  381. BN_ULONG lo, hi; \
  382. BN_UMULT_LOHI(lo, hi, ta, tb); \
  383. (c0) += lo; \
  384. hi += ((c0) < lo) ? 1 : 0; \
  385. (c1) += hi; \
  386. (c2) += ((c1) < hi) ? 1 : 0; \
  387. } while (0)
  388. #define mul_add_c2(a, b, c0, c1, c2) \
  389. do { \
  390. BN_ULONG ta = (a), tb = (b); \
  391. BN_ULONG lo, hi, tt; \
  392. BN_UMULT_LOHI(lo, hi, ta, tb); \
  393. (c0) += lo; \
  394. tt = hi + (((c0) < lo) ? 1 : 0); \
  395. (c1) += tt; \
  396. (c2) += ((c1) < tt) ? 1 : 0; \
  397. (c0) += lo; \
  398. hi += (c0 < lo) ? 1 : 0; \
  399. (c1) += hi; \
  400. (c2) += ((c1) < hi) ? 1 : 0; \
  401. } while (0)
  402. #define sqr_add_c(a, i, c0, c1, c2) \
  403. do { \
  404. BN_ULONG ta = (a)[i]; \
  405. BN_ULONG lo, hi; \
  406. BN_UMULT_LOHI(lo, hi, ta, ta); \
  407. (c0) += lo; \
  408. hi += (c0 < lo) ? 1 : 0; \
  409. (c1) += hi; \
  410. (c2) += ((c1) < hi) ? 1 : 0; \
  411. } while (0)
  412. #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
  413. #endif /* !BN_ULLONG */
  414. void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) {
  415. BN_ULONG c1, c2, c3;
  416. c1 = 0;
  417. c2 = 0;
  418. c3 = 0;
  419. mul_add_c(a[0], b[0], c1, c2, c3);
  420. r[0] = c1;
  421. c1 = 0;
  422. mul_add_c(a[0], b[1], c2, c3, c1);
  423. mul_add_c(a[1], b[0], c2, c3, c1);
  424. r[1] = c2;
  425. c2 = 0;
  426. mul_add_c(a[2], b[0], c3, c1, c2);
  427. mul_add_c(a[1], b[1], c3, c1, c2);
  428. mul_add_c(a[0], b[2], c3, c1, c2);
  429. r[2] = c3;
  430. c3 = 0;
  431. mul_add_c(a[0], b[3], c1, c2, c3);
  432. mul_add_c(a[1], b[2], c1, c2, c3);
  433. mul_add_c(a[2], b[1], c1, c2, c3);
  434. mul_add_c(a[3], b[0], c1, c2, c3);
  435. r[3] = c1;
  436. c1 = 0;
  437. mul_add_c(a[4], b[0], c2, c3, c1);
  438. mul_add_c(a[3], b[1], c2, c3, c1);
  439. mul_add_c(a[2], b[2], c2, c3, c1);
  440. mul_add_c(a[1], b[3], c2, c3, c1);
  441. mul_add_c(a[0], b[4], c2, c3, c1);
  442. r[4] = c2;
  443. c2 = 0;
  444. mul_add_c(a[0], b[5], c3, c1, c2);
  445. mul_add_c(a[1], b[4], c3, c1, c2);
  446. mul_add_c(a[2], b[3], c3, c1, c2);
  447. mul_add_c(a[3], b[2], c3, c1, c2);
  448. mul_add_c(a[4], b[1], c3, c1, c2);
  449. mul_add_c(a[5], b[0], c3, c1, c2);
  450. r[5] = c3;
  451. c3 = 0;
  452. mul_add_c(a[6], b[0], c1, c2, c3);
  453. mul_add_c(a[5], b[1], c1, c2, c3);
  454. mul_add_c(a[4], b[2], c1, c2, c3);
  455. mul_add_c(a[3], b[3], c1, c2, c3);
  456. mul_add_c(a[2], b[4], c1, c2, c3);
  457. mul_add_c(a[1], b[5], c1, c2, c3);
  458. mul_add_c(a[0], b[6], c1, c2, c3);
  459. r[6] = c1;
  460. c1 = 0;
  461. mul_add_c(a[0], b[7], c2, c3, c1);
  462. mul_add_c(a[1], b[6], c2, c3, c1);
  463. mul_add_c(a[2], b[5], c2, c3, c1);
  464. mul_add_c(a[3], b[4], c2, c3, c1);
  465. mul_add_c(a[4], b[3], c2, c3, c1);
  466. mul_add_c(a[5], b[2], c2, c3, c1);
  467. mul_add_c(a[6], b[1], c2, c3, c1);
  468. mul_add_c(a[7], b[0], c2, c3, c1);
  469. r[7] = c2;
  470. c2 = 0;
  471. mul_add_c(a[7], b[1], c3, c1, c2);
  472. mul_add_c(a[6], b[2], c3, c1, c2);
  473. mul_add_c(a[5], b[3], c3, c1, c2);
  474. mul_add_c(a[4], b[4], c3, c1, c2);
  475. mul_add_c(a[3], b[5], c3, c1, c2);
  476. mul_add_c(a[2], b[6], c3, c1, c2);
  477. mul_add_c(a[1], b[7], c3, c1, c2);
  478. r[8] = c3;
  479. c3 = 0;
  480. mul_add_c(a[2], b[7], c1, c2, c3);
  481. mul_add_c(a[3], b[6], c1, c2, c3);
  482. mul_add_c(a[4], b[5], c1, c2, c3);
  483. mul_add_c(a[5], b[4], c1, c2, c3);
  484. mul_add_c(a[6], b[3], c1, c2, c3);
  485. mul_add_c(a[7], b[2], c1, c2, c3);
  486. r[9] = c1;
  487. c1 = 0;
  488. mul_add_c(a[7], b[3], c2, c3, c1);
  489. mul_add_c(a[6], b[4], c2, c3, c1);
  490. mul_add_c(a[5], b[5], c2, c3, c1);
  491. mul_add_c(a[4], b[6], c2, c3, c1);
  492. mul_add_c(a[3], b[7], c2, c3, c1);
  493. r[10] = c2;
  494. c2 = 0;
  495. mul_add_c(a[4], b[7], c3, c1, c2);
  496. mul_add_c(a[5], b[6], c3, c1, c2);
  497. mul_add_c(a[6], b[5], c3, c1, c2);
  498. mul_add_c(a[7], b[4], c3, c1, c2);
  499. r[11] = c3;
  500. c3 = 0;
  501. mul_add_c(a[7], b[5], c1, c2, c3);
  502. mul_add_c(a[6], b[6], c1, c2, c3);
  503. mul_add_c(a[5], b[7], c1, c2, c3);
  504. r[12] = c1;
  505. c1 = 0;
  506. mul_add_c(a[6], b[7], c2, c3, c1);
  507. mul_add_c(a[7], b[6], c2, c3, c1);
  508. r[13] = c2;
  509. c2 = 0;
  510. mul_add_c(a[7], b[7], c3, c1, c2);
  511. r[14] = c3;
  512. r[15] = c1;
  513. }
  514. void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) {
  515. BN_ULONG c1, c2, c3;
  516. c1 = 0;
  517. c2 = 0;
  518. c3 = 0;
  519. mul_add_c(a[0], b[0], c1, c2, c3);
  520. r[0] = c1;
  521. c1 = 0;
  522. mul_add_c(a[0], b[1], c2, c3, c1);
  523. mul_add_c(a[1], b[0], c2, c3, c1);
  524. r[1] = c2;
  525. c2 = 0;
  526. mul_add_c(a[2], b[0], c3, c1, c2);
  527. mul_add_c(a[1], b[1], c3, c1, c2);
  528. mul_add_c(a[0], b[2], c3, c1, c2);
  529. r[2] = c3;
  530. c3 = 0;
  531. mul_add_c(a[0], b[3], c1, c2, c3);
  532. mul_add_c(a[1], b[2], c1, c2, c3);
  533. mul_add_c(a[2], b[1], c1, c2, c3);
  534. mul_add_c(a[3], b[0], c1, c2, c3);
  535. r[3] = c1;
  536. c1 = 0;
  537. mul_add_c(a[3], b[1], c2, c3, c1);
  538. mul_add_c(a[2], b[2], c2, c3, c1);
  539. mul_add_c(a[1], b[3], c2, c3, c1);
  540. r[4] = c2;
  541. c2 = 0;
  542. mul_add_c(a[2], b[3], c3, c1, c2);
  543. mul_add_c(a[3], b[2], c3, c1, c2);
  544. r[5] = c3;
  545. c3 = 0;
  546. mul_add_c(a[3], b[3], c1, c2, c3);
  547. r[6] = c1;
  548. r[7] = c2;
  549. }
  550. void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) {
  551. BN_ULONG c1, c2, c3;
  552. c1 = 0;
  553. c2 = 0;
  554. c3 = 0;
  555. sqr_add_c(a, 0, c1, c2, c3);
  556. r[0] = c1;
  557. c1 = 0;
  558. sqr_add_c2(a, 1, 0, c2, c3, c1);
  559. r[1] = c2;
  560. c2 = 0;
  561. sqr_add_c(a, 1, c3, c1, c2);
  562. sqr_add_c2(a, 2, 0, c3, c1, c2);
  563. r[2] = c3;
  564. c3 = 0;
  565. sqr_add_c2(a, 3, 0, c1, c2, c3);
  566. sqr_add_c2(a, 2, 1, c1, c2, c3);
  567. r[3] = c1;
  568. c1 = 0;
  569. sqr_add_c(a, 2, c2, c3, c1);
  570. sqr_add_c2(a, 3, 1, c2, c3, c1);
  571. sqr_add_c2(a, 4, 0, c2, c3, c1);
  572. r[4] = c2;
  573. c2 = 0;
  574. sqr_add_c2(a, 5, 0, c3, c1, c2);
  575. sqr_add_c2(a, 4, 1, c3, c1, c2);
  576. sqr_add_c2(a, 3, 2, c3, c1, c2);
  577. r[5] = c3;
  578. c3 = 0;
  579. sqr_add_c(a, 3, c1, c2, c3);
  580. sqr_add_c2(a, 4, 2, c1, c2, c3);
  581. sqr_add_c2(a, 5, 1, c1, c2, c3);
  582. sqr_add_c2(a, 6, 0, c1, c2, c3);
  583. r[6] = c1;
  584. c1 = 0;
  585. sqr_add_c2(a, 7, 0, c2, c3, c1);
  586. sqr_add_c2(a, 6, 1, c2, c3, c1);
  587. sqr_add_c2(a, 5, 2, c2, c3, c1);
  588. sqr_add_c2(a, 4, 3, c2, c3, c1);
  589. r[7] = c2;
  590. c2 = 0;
  591. sqr_add_c(a, 4, c3, c1, c2);
  592. sqr_add_c2(a, 5, 3, c3, c1, c2);
  593. sqr_add_c2(a, 6, 2, c3, c1, c2);
  594. sqr_add_c2(a, 7, 1, c3, c1, c2);
  595. r[8] = c3;
  596. c3 = 0;
  597. sqr_add_c2(a, 7, 2, c1, c2, c3);
  598. sqr_add_c2(a, 6, 3, c1, c2, c3);
  599. sqr_add_c2(a, 5, 4, c1, c2, c3);
  600. r[9] = c1;
  601. c1 = 0;
  602. sqr_add_c(a, 5, c2, c3, c1);
  603. sqr_add_c2(a, 6, 4, c2, c3, c1);
  604. sqr_add_c2(a, 7, 3, c2, c3, c1);
  605. r[10] = c2;
  606. c2 = 0;
  607. sqr_add_c2(a, 7, 4, c3, c1, c2);
  608. sqr_add_c2(a, 6, 5, c3, c1, c2);
  609. r[11] = c3;
  610. c3 = 0;
  611. sqr_add_c(a, 6, c1, c2, c3);
  612. sqr_add_c2(a, 7, 5, c1, c2, c3);
  613. r[12] = c1;
  614. c1 = 0;
  615. sqr_add_c2(a, 7, 6, c2, c3, c1);
  616. r[13] = c2;
  617. c2 = 0;
  618. sqr_add_c(a, 7, c3, c1, c2);
  619. r[14] = c3;
  620. r[15] = c1;
  621. }
  622. void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) {
  623. BN_ULONG c1, c2, c3;
  624. c1 = 0;
  625. c2 = 0;
  626. c3 = 0;
  627. sqr_add_c(a, 0, c1, c2, c3);
  628. r[0] = c1;
  629. c1 = 0;
  630. sqr_add_c2(a, 1, 0, c2, c3, c1);
  631. r[1] = c2;
  632. c2 = 0;
  633. sqr_add_c(a, 1, c3, c1, c2);
  634. sqr_add_c2(a, 2, 0, c3, c1, c2);
  635. r[2] = c3;
  636. c3 = 0;
  637. sqr_add_c2(a, 3, 0, c1, c2, c3);
  638. sqr_add_c2(a, 2, 1, c1, c2, c3);
  639. r[3] = c1;
  640. c1 = 0;
  641. sqr_add_c(a, 2, c2, c3, c1);
  642. sqr_add_c2(a, 3, 1, c2, c3, c1);
  643. r[4] = c2;
  644. c2 = 0;
  645. sqr_add_c2(a, 3, 2, c3, c1, c2);
  646. r[5] = c3;
  647. c3 = 0;
  648. sqr_add_c(a, 3, c1, c2, c3);
  649. r[6] = c1;
  650. r[7] = c2;
  651. }
  652. #endif