Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.
 
 
 
 
 
 

1535 řádky
42 KiB

  1. /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  2. * All rights reserved.
  3. *
  4. * This package is an SSL implementation written
  5. * by Eric Young (eay@cryptsoft.com).
  6. * The implementation was written so as to conform with Netscapes SSL.
  7. *
  8. * This library is free for commercial and non-commercial use as long as
  9. * the following conditions are aheared to. The following conditions
  10. * apply to all code found in this distribution, be it the RC4, RSA,
  11. * lhash, DES, etc., code; not just the SSL code. The SSL documentation
  12. * included with this distribution is covered by the same copyright terms
  13. * except that the holder is Tim Hudson (tjh@cryptsoft.com).
  14. *
  15. * Copyright remains Eric Young's, and as such any Copyright notices in
  16. * the code are not to be removed.
  17. * If this package is used in a product, Eric Young should be given attribution
  18. * as the author of the parts of the library used.
  19. * This can be in the form of a textual message at program startup or
  20. * in documentation (online or textual) provided with the package.
  21. *
  22. * Redistribution and use in source and binary forms, with or without
  23. * modification, are permitted provided that the following conditions
  24. * are met:
  25. * 1. Redistributions of source code must retain the copyright
  26. * notice, this list of conditions and the following disclaimer.
  27. * 2. Redistributions in binary form must reproduce the above copyright
  28. * notice, this list of conditions and the following disclaimer in the
  29. * documentation and/or other materials provided with the distribution.
  30. * 3. All advertising materials mentioning features or use of this software
  31. * must display the following acknowledgement:
  32. * "This product includes cryptographic software written by
  33. * Eric Young (eay@cryptsoft.com)"
  34. * The word 'cryptographic' can be left out if the rouines from the library
  35. * being used are not cryptographic related :-).
  36. * 4. If you include any Windows specific code (or a derivative thereof) from
  37. * the apps directory (application code) you must include an acknowledgement:
  38. * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
  39. *
  40. * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  41. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  42. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  43. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  44. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  45. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  46. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  47. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  48. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  49. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  50. * SUCH DAMAGE.
  51. *
  52. * The licence and distribution terms for any publically available version or
  53. * derivative of this code cannot be changed. i.e. this code cannot simply be
  54. * copied and put under another distribution licence
  55. * [including the GNU Public Licence.]
  56. */
  57. /* ====================================================================
  58. * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
  59. *
  60. * Redistribution and use in source and binary forms, with or without
  61. * modification, are permitted provided that the following conditions
  62. * are met:
  63. *
  64. * 1. Redistributions of source code must retain the above copyright
  65. * notice, this list of conditions and the following disclaimer.
  66. *
  67. * 2. Redistributions in binary form must reproduce the above copyright
  68. * notice, this list of conditions and the following disclaimer in
  69. * the documentation and/or other materials provided with the
  70. * distribution.
  71. *
  72. * 3. All advertising materials mentioning features or use of this
  73. * software must display the following acknowledgment:
  74. * "This product includes software developed by the OpenSSL Project
  75. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  76. *
  77. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  78. * endorse or promote products derived from this software without
  79. * prior written permission. For written permission, please contact
  80. * openssl-core@openssl.org.
  81. *
  82. * 5. Products derived from this software may not be called "OpenSSL"
  83. * nor may "OpenSSL" appear in their names without prior written
  84. * permission of the OpenSSL Project.
  85. *
  86. * 6. Redistributions of any form whatsoever must retain the following
  87. * acknowledgment:
  88. * "This product includes software developed by the OpenSSL Project
  89. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  90. *
  91. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  92. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  93. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  94. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  95. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  96. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  97. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  98. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  99. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  100. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  101. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  102. * OF THE POSSIBILITY OF SUCH DAMAGE.
  103. * ====================================================================
  104. *
  105. * This product includes cryptographic software written by Eric Young
  106. * (eay@cryptsoft.com). This product includes software written by Tim
  107. * Hudson (tjh@cryptsoft.com). */
  108. #include <openssl/bn.h>
  109. #include <assert.h>
  110. #include <openssl/cpu.h>
  111. #include <openssl/err.h>
  112. #include <openssl/mem.h>
  113. #include "internal.h"
  114. #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
  115. #define OPENSSL_BN_ASM_MONT5
  116. #define RSAZ_ENABLED
  117. #include "rsaz_exp.h"
  118. #endif
  119. int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
  120. int i, bits, ret = 0;
  121. BIGNUM *v, *rr;
  122. if ((p->flags & BN_FLG_CONSTTIME) != 0) {
  123. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  124. OPENSSL_PUT_ERROR(BN, BN_exp, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  125. return 0;
  126. }
  127. BN_CTX_start(ctx);
  128. if (r == a || r == p) {
  129. rr = BN_CTX_get(ctx);
  130. } else {
  131. rr = r;
  132. }
  133. v = BN_CTX_get(ctx);
  134. if (rr == NULL || v == NULL) {
  135. goto err;
  136. }
  137. if (BN_copy(v, a) == NULL) {
  138. goto err;
  139. }
  140. bits = BN_num_bits(p);
  141. if (BN_is_odd(p)) {
  142. if (BN_copy(rr, a) == NULL) {
  143. goto err;
  144. }
  145. } else {
  146. if (!BN_one(rr)) {
  147. goto err;
  148. }
  149. }
  150. for (i = 1; i < bits; i++) {
  151. if (!BN_sqr(v, v, ctx)) {
  152. goto err;
  153. }
  154. if (BN_is_bit_set(p, i)) {
  155. if (!BN_mul(rr, rr, v, ctx)) {
  156. goto err;
  157. }
  158. }
  159. }
  160. ret = 1;
  161. err:
  162. if (r != rr) {
  163. BN_copy(r, rr);
  164. }
  165. BN_CTX_end(ctx);
  166. return ret;
  167. }
  168. /* maximum precomputation table size for *variable* sliding windows */
  169. #define TABLE_SIZE 32
  170. typedef struct bn_recp_ctx_st {
  171. BIGNUM N; /* the divisor */
  172. BIGNUM Nr; /* the reciprocal */
  173. int num_bits;
  174. int shift;
  175. int flags;
  176. } BN_RECP_CTX;
  177. static void BN_RECP_CTX_init(BN_RECP_CTX *recp) {
  178. BN_init(&recp->N);
  179. BN_init(&recp->Nr);
  180. recp->num_bits = 0;
  181. recp->flags = 0;
  182. }
  183. static void BN_RECP_CTX_free(BN_RECP_CTX *recp) {
  184. if (recp == NULL) {
  185. return;
  186. }
  187. BN_free(&recp->N);
  188. BN_free(&recp->Nr);
  189. }
  190. static int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) {
  191. if (!BN_copy(&(recp->N), d)) {
  192. return 0;
  193. }
  194. BN_zero(&recp->Nr);
  195. recp->num_bits = BN_num_bits(d);
  196. recp->shift = 0;
  197. return 1;
  198. }
  199. /* len is the expected size of the result We actually calculate with an extra
  200. * word of precision, so we can do faster division if the remainder is not
  201. * required.
  202. * r := 2^len / m */
  203. static int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) {
  204. int ret = -1;
  205. BIGNUM *t;
  206. BN_CTX_start(ctx);
  207. t = BN_CTX_get(ctx);
  208. if (t == NULL) {
  209. goto err;
  210. }
  211. if (!BN_set_bit(t, len)) {
  212. goto err;
  213. }
  214. if (!BN_div(r, NULL, t, m, ctx)) {
  215. goto err;
  216. }
  217. ret = len;
  218. err:
  219. BN_CTX_end(ctx);
  220. return ret;
  221. }
  222. static int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
  223. BN_RECP_CTX *recp, BN_CTX *ctx) {
  224. int i, j, ret = 0;
  225. BIGNUM *a, *b, *d, *r;
  226. BN_CTX_start(ctx);
  227. a = BN_CTX_get(ctx);
  228. b = BN_CTX_get(ctx);
  229. if (dv != NULL) {
  230. d = dv;
  231. } else {
  232. d = BN_CTX_get(ctx);
  233. }
  234. if (rem != NULL) {
  235. r = rem;
  236. } else {
  237. r = BN_CTX_get(ctx);
  238. }
  239. if (a == NULL || b == NULL || d == NULL || r == NULL) {
  240. goto err;
  241. }
  242. if (BN_ucmp(m, &(recp->N)) < 0) {
  243. BN_zero(d);
  244. if (!BN_copy(r, m)) {
  245. return 0;
  246. }
  247. BN_CTX_end(ctx);
  248. return 1;
  249. }
  250. /* We want the remainder
  251. * Given input of ABCDEF / ab
  252. * we need multiply ABCDEF by 3 digests of the reciprocal of ab */
  253. /* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */
  254. i = BN_num_bits(m);
  255. j = recp->num_bits << 1;
  256. if (j > i) {
  257. i = j;
  258. }
  259. /* Nr := round(2^i / N) */
  260. if (i != recp->shift) {
  261. recp->shift =
  262. BN_reciprocal(&(recp->Nr), &(recp->N), i,
  263. ctx); /* BN_reciprocal returns i, or -1 for an error */
  264. }
  265. if (recp->shift == -1) {
  266. goto err;
  267. }
  268. /* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i -
  269. * BN_num_bits(N)))|
  270. * = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i -
  271. * BN_num_bits(N)))|
  272. * <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
  273. * = |m/N| */
  274. if (!BN_rshift(a, m, recp->num_bits)) {
  275. goto err;
  276. }
  277. if (!BN_mul(b, a, &(recp->Nr), ctx)) {
  278. goto err;
  279. }
  280. if (!BN_rshift(d, b, i - recp->num_bits)) {
  281. goto err;
  282. }
  283. d->neg = 0;
  284. if (!BN_mul(b, &(recp->N), d, ctx)) {
  285. goto err;
  286. }
  287. if (!BN_usub(r, m, b)) {
  288. goto err;
  289. }
  290. r->neg = 0;
  291. j = 0;
  292. while (BN_ucmp(r, &(recp->N)) >= 0) {
  293. if (j++ > 2) {
  294. OPENSSL_PUT_ERROR(BN, BN_div_recp, BN_R_BAD_RECIPROCAL);
  295. goto err;
  296. }
  297. if (!BN_usub(r, r, &(recp->N))) {
  298. goto err;
  299. }
  300. if (!BN_add_word(d, 1)) {
  301. goto err;
  302. }
  303. }
  304. r->neg = BN_is_zero(r) ? 0 : m->neg;
  305. d->neg = m->neg ^ recp->N.neg;
  306. ret = 1;
  307. err:
  308. BN_CTX_end(ctx);
  309. return ret;
  310. }
  311. static int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
  312. BN_RECP_CTX *recp, BN_CTX *ctx) {
  313. int ret = 0;
  314. BIGNUM *a;
  315. const BIGNUM *ca;
  316. BN_CTX_start(ctx);
  317. a = BN_CTX_get(ctx);
  318. if (a == NULL) {
  319. goto err;
  320. }
  321. if (y != NULL) {
  322. if (x == y) {
  323. if (!BN_sqr(a, x, ctx)) {
  324. goto err;
  325. }
  326. } else {
  327. if (!BN_mul(a, x, y, ctx)) {
  328. goto err;
  329. }
  330. }
  331. ca = a;
  332. } else {
  333. ca = x; /* Just do the mod */
  334. }
  335. ret = BN_div_recp(NULL, r, ca, recp, ctx);
  336. err:
  337. BN_CTX_end(ctx);
  338. return ret;
  339. }
  340. /* BN_window_bits_for_exponent_size -- macro for sliding window mod_exp
  341. * functions
  342. *
  343. * For window size 'w' (w >= 2) and a random 'b' bits exponent, the number of
  344. * multiplications is a constant plus on average
  345. *
  346. * 2^(w-1) + (b-w)/(w+1);
  347. *
  348. * here 2^(w-1) is for precomputing the table (we actually need entries only
  349. * for windows that have the lowest bit set), and (b-w)/(w+1) is an
  350. * approximation for the expected number of w-bit windows, not counting the
  351. * first one.
  352. *
  353. * Thus we should use
  354. *
  355. * w >= 6 if b > 671
  356. * w = 5 if 671 > b > 239
  357. * w = 4 if 239 > b > 79
  358. * w = 3 if 79 > b > 23
  359. * w <= 2 if 23 > b
  360. *
  361. * (with draws in between). Very small exponents are often selected
  362. * with low Hamming weight, so we use w = 1 for b <= 23. */
  363. #define BN_window_bits_for_exponent_size(b) \
  364. ((b) > 671 ? 6 : \
  365. (b) > 239 ? 5 : \
  366. (b) > 79 ? 4 : \
  367. (b) > 23 ? 3 : 1)
  368. static int mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
  369. const BIGNUM *m, BN_CTX *ctx) {
  370. int i, j, bits, ret = 0, wstart, window;
  371. int start = 1;
  372. BIGNUM *aa;
  373. /* Table of variables obtained from 'ctx' */
  374. BIGNUM *val[TABLE_SIZE];
  375. BN_RECP_CTX recp;
  376. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  377. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  378. OPENSSL_PUT_ERROR(BN, mod_exp_recp, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  379. return 0;
  380. }
  381. bits = BN_num_bits(p);
  382. if (bits == 0) {
  383. ret = BN_one(r);
  384. return ret;
  385. }
  386. BN_CTX_start(ctx);
  387. aa = BN_CTX_get(ctx);
  388. val[0] = BN_CTX_get(ctx);
  389. if (!aa || !val[0]) {
  390. goto err;
  391. }
  392. BN_RECP_CTX_init(&recp);
  393. if (m->neg) {
  394. /* ignore sign of 'm' */
  395. if (!BN_copy(aa, m)) {
  396. goto err;
  397. }
  398. aa->neg = 0;
  399. if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) {
  400. goto err;
  401. }
  402. } else {
  403. if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) {
  404. goto err;
  405. }
  406. }
  407. if (!BN_nnmod(val[0], a, m, ctx)) {
  408. goto err; /* 1 */
  409. }
  410. if (BN_is_zero(val[0])) {
  411. BN_zero(r);
  412. ret = 1;
  413. goto err;
  414. }
  415. window = BN_window_bits_for_exponent_size(bits);
  416. if (window > 1) {
  417. if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) {
  418. goto err; /* 2 */
  419. }
  420. j = 1 << (window - 1);
  421. for (i = 1; i < j; i++) {
  422. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  423. !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) {
  424. goto err;
  425. }
  426. }
  427. }
  428. start = 1; /* This is used to avoid multiplication etc
  429. * when there is only the value '1' in the
  430. * buffer. */
  431. wstart = bits - 1; /* The top bit of the window */
  432. if (!BN_one(r)) {
  433. goto err;
  434. }
  435. for (;;) {
  436. int wvalue; /* The 'value' of the window */
  437. int wend; /* The bottom bit of the window */
  438. if (BN_is_bit_set(p, wstart) == 0) {
  439. if (!start) {
  440. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
  441. goto err;
  442. }
  443. }
  444. if (wstart == 0) {
  445. break;
  446. }
  447. wstart--;
  448. continue;
  449. }
  450. /* We now have wstart on a 'set' bit, we now need to work out
  451. * how bit a window to do. To do this we need to scan
  452. * forward until the last set bit before the end of the
  453. * window */
  454. wvalue = 1;
  455. wend = 0;
  456. for (i = 1; i < window; i++) {
  457. if (wstart - i < 0) {
  458. break;
  459. }
  460. if (BN_is_bit_set(p, wstart - i)) {
  461. wvalue <<= (i - wend);
  462. wvalue |= 1;
  463. wend = i;
  464. }
  465. }
  466. /* wend is the size of the current window */
  467. j = wend + 1;
  468. /* add the 'bytes above' */
  469. if (!start) {
  470. for (i = 0; i < j; i++) {
  471. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
  472. goto err;
  473. }
  474. }
  475. }
  476. /* wvalue will be an odd number < 2^window */
  477. if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) {
  478. goto err;
  479. }
  480. /* move the 'window' down further */
  481. wstart -= wend + 1;
  482. start = 0;
  483. if (wstart < 0) {
  484. break;
  485. }
  486. }
  487. ret = 1;
  488. err:
  489. BN_CTX_end(ctx);
  490. BN_RECP_CTX_free(&recp);
  491. return ret;
  492. }
  493. int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
  494. BN_CTX *ctx) {
  495. /* For even modulus m = 2^k*m_odd, it might make sense to compute
  496. * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
  497. * exponentiation for the odd part), using appropriate exponent
  498. * reductions, and combine the results using the CRT.
  499. *
  500. * For now, we use Montgomery only if the modulus is odd; otherwise,
  501. * exponentiation using the reciprocal-based quick remaindering
  502. * algorithm is used.
  503. *
  504. * (Timing obtained with expspeed.c [computations a^p mod m
  505. * where a, p, m are of the same length: 256, 512, 1024, 2048,
  506. * 4096, 8192 bits], compared to the running time of the
  507. * standard algorithm:
  508. *
  509. * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
  510. * 55 .. 77 % [UltraSparc processor, but
  511. * debug-solaris-sparcv8-gcc conf.]
  512. *
  513. * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
  514. * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
  515. *
  516. * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
  517. * at 2048 and more bits, but at 512 and 1024 bits, it was
  518. * slower even than the standard algorithm!
  519. *
  520. * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
  521. * should be obtained when the new Montgomery reduction code
  522. * has been integrated into OpenSSL.) */
  523. if (BN_is_odd(m)) {
  524. if (a->top == 1 && !a->neg && BN_get_flags(p, BN_FLG_CONSTTIME) == 0) {
  525. BN_ULONG A = a->d[0];
  526. return BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
  527. }
  528. return BN_mod_exp_mont(r, a, p, m, ctx, NULL);
  529. }
  530. return mod_exp_recp(r, a, p, m, ctx);
  531. }
  532. int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  533. const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) {
  534. int i, j, bits, ret = 0, wstart, window;
  535. int start = 1;
  536. BIGNUM *d, *r;
  537. const BIGNUM *aa;
  538. /* Table of variables obtained from 'ctx' */
  539. BIGNUM *val[TABLE_SIZE];
  540. BN_MONT_CTX *mont = NULL;
  541. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  542. return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
  543. }
  544. if (!BN_is_odd(m)) {
  545. OPENSSL_PUT_ERROR(BN, BN_mod_exp_mont, BN_R_CALLED_WITH_EVEN_MODULUS);
  546. return 0;
  547. }
  548. bits = BN_num_bits(p);
  549. if (bits == 0) {
  550. ret = BN_one(rr);
  551. return ret;
  552. }
  553. BN_CTX_start(ctx);
  554. d = BN_CTX_get(ctx);
  555. r = BN_CTX_get(ctx);
  556. val[0] = BN_CTX_get(ctx);
  557. if (!d || !r || !val[0]) {
  558. goto err;
  559. }
  560. /* If this is not done, things will break in the montgomery part */
  561. if (in_mont != NULL) {
  562. mont = in_mont;
  563. } else {
  564. mont = BN_MONT_CTX_new();
  565. if (mont == NULL) {
  566. goto err;
  567. }
  568. if (!BN_MONT_CTX_set(mont, m, ctx)) {
  569. goto err;
  570. }
  571. }
  572. if (a->neg || BN_ucmp(a, m) >= 0) {
  573. if (!BN_nnmod(val[0], a, m, ctx)) {
  574. goto err;
  575. }
  576. aa = val[0];
  577. } else {
  578. aa = a;
  579. }
  580. if (BN_is_zero(aa)) {
  581. BN_zero(rr);
  582. ret = 1;
  583. goto err;
  584. }
  585. if (!BN_to_montgomery(val[0], aa, mont, ctx)) {
  586. goto err; /* 1 */
  587. }
  588. window = BN_window_bits_for_exponent_size(bits);
  589. if (window > 1) {
  590. if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) {
  591. goto err; /* 2 */
  592. }
  593. j = 1 << (window - 1);
  594. for (i = 1; i < j; i++) {
  595. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  596. !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) {
  597. goto err;
  598. }
  599. }
  600. }
  601. start = 1; /* This is used to avoid multiplication etc
  602. * when there is only the value '1' in the
  603. * buffer. */
  604. wstart = bits - 1; /* The top bit of the window */
  605. j = m->top; /* borrow j */
  606. if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  607. if (bn_wexpand(r, j) == NULL)
  608. goto err;
  609. /* 2^(top*BN_BITS2) - m */
  610. r->d[0] = (0 - m->d[0]) & BN_MASK2;
  611. for (i = 1; i < j; i++)
  612. r->d[i] = (~m->d[i]) & BN_MASK2;
  613. r->top = j;
  614. /* Upper words will be zero if the corresponding words of 'm'
  615. * were 0xfff[...], so decrement r->top accordingly. */
  616. bn_correct_top(r);
  617. } else if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) {
  618. goto err;
  619. }
  620. for (;;) {
  621. int wvalue; /* The 'value' of the window */
  622. int wend; /* The bottom bit of the window */
  623. if (BN_is_bit_set(p, wstart) == 0) {
  624. if (!start) {
  625. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
  626. goto err;
  627. }
  628. if (wstart == 0) {
  629. break;
  630. }
  631. wstart--;
  632. continue;
  633. }
  634. /* We now have wstart on a 'set' bit, we now need to work out how bit a
  635. * window to do. To do this we need to scan forward until the last set bit
  636. * before the end of the window */
  637. wvalue = 1;
  638. wend = 0;
  639. for (i = 1; i < window; i++) {
  640. if (wstart - i < 0) {
  641. break;
  642. }
  643. if (BN_is_bit_set(p, wstart - i)) {
  644. wvalue <<= (i - wend);
  645. wvalue |= 1;
  646. wend = i;
  647. }
  648. }
  649. /* wend is the size of the current window */
  650. j = wend + 1;
  651. /* add the 'bytes above' */
  652. if (!start) {
  653. for (i = 0; i < j; i++) {
  654. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  655. goto err;
  656. }
  657. }
  658. }
  659. /* wvalue will be an odd number < 2^window */
  660. if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) {
  661. goto err;
  662. }
  663. /* move the 'window' down further */
  664. wstart -= wend + 1;
  665. start = 0;
  666. if (wstart < 0) {
  667. break;
  668. }
  669. }
  670. if (!BN_from_montgomery(rr, r, mont, ctx)) {
  671. goto err;
  672. }
  673. ret = 1;
  674. err:
  675. if (in_mont == NULL && mont != NULL) {
  676. BN_MONT_CTX_free(mont);
  677. }
  678. BN_CTX_end(ctx);
  679. return ret;
  680. }
  681. /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
  682. * layout so that accessing any of these table values shows the same access
  683. * pattern as far as cache lines are concerned. The following functions are
  684. * used to transfer a BIGNUM from/to that table. */
  685. static int copy_to_prebuf(const BIGNUM *b, int top, unsigned char *buf, int idx,
  686. int width) {
  687. size_t i, j;
  688. if (top > b->top) {
  689. top = b->top; /* this works because 'buf' is explicitly zeroed */
  690. }
  691. for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
  692. buf[j] = ((unsigned char *)b->d)[i];
  693. }
  694. return 1;
  695. }
  696. static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx,
  697. int width) {
  698. size_t i, j;
  699. if (bn_wexpand(b, top) == NULL) {
  700. return 0;
  701. }
  702. for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
  703. ((unsigned char *)b->d)[i] = buf[j];
  704. }
  705. b->top = top;
  706. bn_correct_top(b);
  707. return 1;
  708. }
  709. /* BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache
  710. * line width of the target processor is at least the following value. */
  711. #define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH (64)
  712. #define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK \
  713. (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
  714. /* Window sizes optimized for fixed window size modular exponentiation
  715. * algorithm (BN_mod_exp_mont_consttime).
  716. *
  717. * To achieve the security goals of BN_mode_exp_mont_consttime, the maximum
  718. * size of the window must not exceed
  719. * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
  720. *
  721. * Window size thresholds are defined for cache line sizes of 32 and 64, cache
  722. * line sizes where log_2(32)=5 and log_2(64)=6 respectively. A window size of
  723. * 7 should only be used on processors that have a 128 byte or greater cache
  724. * line size. */
  725. #if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
  726. #define BN_window_bits_for_ctime_exponent_size(b) \
  727. ((b) > 937 ? 6 : (b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
  728. #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
  729. #elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
  730. #define BN_window_bits_for_ctime_exponent_size(b) \
  731. ((b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
  732. #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
  733. #endif
  734. /* Given a pointer value, compute the next address that is a cache line
  735. * multiple. */
  736. #define MOD_EXP_CTIME_ALIGN(x_) \
  737. ((unsigned char *)(x_) + \
  738. (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - \
  739. (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
  740. /* This variant of BN_mod_exp_mont() uses fixed windows and the special
  741. * precomputation memory layout to limit data-dependency to a minimum
  742. * to protect secret exponents (cf. the hyper-threading timing attacks
  743. * pointed out by Colin Percival,
  744. * http://www.daemonology.net/hyperthreading-considered-harmful/)
  745. */
  746. int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  747. const BIGNUM *m, BN_CTX *ctx,
  748. BN_MONT_CTX *in_mont) {
  749. int i, bits, ret = 0, window, wvalue;
  750. int top;
  751. BN_MONT_CTX *mont = NULL;
  752. int numPowers;
  753. unsigned char *powerbufFree = NULL;
  754. int powerbufLen = 0;
  755. unsigned char *powerbuf = NULL;
  756. BIGNUM tmp, am;
  757. top = m->top;
  758. if (!(m->d[0] & 1)) {
  759. OPENSSL_PUT_ERROR(BN, BN_mod_exp_mont_consttime,
  760. BN_R_CALLED_WITH_EVEN_MODULUS);
  761. return 0;
  762. }
  763. bits = BN_num_bits(p);
  764. if (bits == 0) {
  765. ret = BN_one(rr);
  766. return ret;
  767. }
  768. BN_CTX_start(ctx);
  769. /* Allocate a montgomery context if it was not supplied by the caller.
  770. * If this is not done, things will break in the montgomery part.
  771. */
  772. if (in_mont != NULL)
  773. mont = in_mont;
  774. else {
  775. if ((mont = BN_MONT_CTX_new()) == NULL)
  776. goto err;
  777. if (!BN_MONT_CTX_set(mont, m, ctx))
  778. goto err;
  779. }
  780. #ifdef RSAZ_ENABLED
  781. /* If the size of the operands allow it, perform the optimized
  782. * RSAZ exponentiation. For further information see
  783. * crypto/bn/rsaz_exp.c and accompanying assembly modules. */
  784. if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) &&
  785. rsaz_avx2_eligible()) {
  786. if (NULL == bn_wexpand(rr, 16))
  787. goto err;
  788. RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0]);
  789. rr->top = 16;
  790. rr->neg = 0;
  791. bn_correct_top(rr);
  792. ret = 1;
  793. goto err;
  794. } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
  795. if (NULL == bn_wexpand(rr, 8))
  796. goto err;
  797. RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
  798. rr->top = 8;
  799. rr->neg = 0;
  800. bn_correct_top(rr);
  801. ret = 1;
  802. goto err;
  803. }
  804. #endif
  805. /* Get the window size to use with size of p. */
  806. window = BN_window_bits_for_ctime_exponent_size(bits);
  807. #if defined(OPENSSL_BN_ASM_MONT5)
  808. if (window >= 5) {
  809. window = 5; /* ~5% improvement for RSA2048 sign, and even for RSA4096 */
  810. if ((top & 7) == 0)
  811. powerbufLen += 2 * top * sizeof(m->d[0]);
  812. }
  813. #endif
  814. (void)0;
  815. /* Allocate a buffer large enough to hold all of the pre-computed
  816. * powers of am, am itself and tmp.
  817. */
  818. numPowers = 1 << window;
  819. powerbufLen +=
  820. sizeof(m->d[0]) *
  821. (top * numPowers + ((2 * top) > numPowers ? (2 * top) : numPowers));
  822. #ifdef alloca
  823. if (powerbufLen < 3072)
  824. powerbufFree = alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
  825. else
  826. #endif
  827. if ((powerbufFree = (unsigned char *)OPENSSL_malloc(
  828. powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
  829. goto err;
  830. powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
  831. memset(powerbuf, 0, powerbufLen);
  832. #ifdef alloca
  833. if (powerbufLen < 3072)
  834. powerbufFree = NULL;
  835. #endif
  836. /* lay down tmp and am right after powers table */
  837. tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
  838. am.d = tmp.d + top;
  839. tmp.top = am.top = 0;
  840. tmp.dmax = am.dmax = top;
  841. tmp.neg = am.neg = 0;
  842. tmp.flags = am.flags = BN_FLG_STATIC_DATA;
  843. /* prepare a^0 in Montgomery domain */
  844. /* by Shay Gueron's suggestion */
  845. if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  846. /* 2^(top*BN_BITS2) - m */
  847. tmp.d[0] = (0 - m->d[0]) & BN_MASK2;
  848. for (i = 1; i < top; i++)
  849. tmp.d[i] = (~m->d[i]) & BN_MASK2;
  850. tmp.top = top;
  851. } else if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
  852. goto err;
  853. /* prepare a^1 in Montgomery domain */
  854. if (a->neg || BN_ucmp(a, m) >= 0) {
  855. if (!BN_mod(&am, a, m, ctx))
  856. goto err;
  857. if (!BN_to_montgomery(&am, &am, mont, ctx))
  858. goto err;
  859. } else if (!BN_to_montgomery(&am, a, mont, ctx))
  860. goto err;
  861. #if defined(OPENSSL_BN_ASM_MONT5)
  862. /* This optimization uses ideas from http://eprint.iacr.org/2011/239,
  863. * specifically optimization of cache-timing attack countermeasures
  864. * and pre-computation optimization. */
  865. /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
  866. * 512-bit RSA is hardly relevant, we omit it to spare size... */
  867. if (window == 5 && top > 1) {
  868. void bn_mul_mont_gather5(BN_ULONG * rp, const BN_ULONG * ap,
  869. const void * table, const BN_ULONG * np,
  870. const BN_ULONG * n0, int num, int power);
  871. void bn_scatter5(const BN_ULONG * inp, size_t num, void * table,
  872. size_t power);
  873. void bn_gather5(BN_ULONG * out, size_t num, void * table, size_t power);
  874. void bn_power5(BN_ULONG * rp, const BN_ULONG * ap, const void * table,
  875. const BN_ULONG * np, const BN_ULONG * n0, int num,
  876. int power);
  877. int bn_from_montgomery(BN_ULONG * rp, const BN_ULONG * ap,
  878. const BN_ULONG * not_used, const BN_ULONG * np,
  879. const BN_ULONG * n0, int num);
  880. BN_ULONG *np = mont->N.d, *n0 = mont->n0, *np2;
  881. /* BN_to_montgomery can contaminate words above .top
  882. * [in BN_DEBUG[_DEBUG] build]... */
  883. for (i = am.top; i < top; i++)
  884. am.d[i] = 0;
  885. for (i = tmp.top; i < top; i++)
  886. tmp.d[i] = 0;
  887. if (top & 7)
  888. np2 = np;
  889. else
  890. for (np2 = am.d + top, i = 0; i < top; i++)
  891. np2[2 * i] = np[i];
  892. bn_scatter5(tmp.d, top, powerbuf, 0);
  893. bn_scatter5(am.d, am.top, powerbuf, 1);
  894. bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
  895. bn_scatter5(tmp.d, top, powerbuf, 2);
  896. /* same as above, but uses squaring for 1/2 of operations */
  897. for (i = 4; i < 32; i *= 2) {
  898. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  899. bn_scatter5(tmp.d, top, powerbuf, i);
  900. }
  901. for (i = 3; i < 8; i += 2) {
  902. int j;
  903. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  904. bn_scatter5(tmp.d, top, powerbuf, i);
  905. for (j = 2 * i; j < 32; j *= 2) {
  906. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  907. bn_scatter5(tmp.d, top, powerbuf, j);
  908. }
  909. }
  910. for (; i < 16; i += 2) {
  911. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  912. bn_scatter5(tmp.d, top, powerbuf, i);
  913. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  914. bn_scatter5(tmp.d, top, powerbuf, 2 * i);
  915. }
  916. for (; i < 32; i += 2) {
  917. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  918. bn_scatter5(tmp.d, top, powerbuf, i);
  919. }
  920. bits--;
  921. for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
  922. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  923. bn_gather5(tmp.d, top, powerbuf, wvalue);
  924. /* At this point |bits| is 4 mod 5 and at least -1. (|bits| is the first bit
  925. * that has not been read yet.) */
  926. assert(bits >= -1 && (bits == -1 || bits % 5 == 4));
  927. /* Scan the exponent one window at a time starting from the most
  928. * significant bits.
  929. */
  930. if (top & 7) {
  931. while (bits >= 0) {
  932. for (wvalue = 0, i = 0; i < 5; i++, bits--)
  933. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  934. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  935. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  936. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  937. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  938. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  939. bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
  940. }
  941. } else {
  942. const uint8_t *p_bytes = (const uint8_t *)p->d;
  943. int max_bits = p->top * BN_BITS2;
  944. assert(bits < max_bits);
  945. /* |p = 0| has been handled as a special case, so |max_bits| is at least
  946. * one word. */
  947. assert(max_bits >= 64);
  948. /* If the first bit to be read lands in the last byte, unroll the first
  949. * iteration to avoid reading past the bounds of |p->d|. (After the first
  950. * iteration, we are guaranteed to be past the last byte.) Note |bits|
  951. * here is the top bit, inclusive. */
  952. if (bits - 4 >= max_bits - 8) {
  953. /* Read five bits from |bits-4| through |bits|, inclusive. */
  954. wvalue = p_bytes[p->top * BN_BYTES - 1];
  955. wvalue >>= (bits - 4) & 7;
  956. wvalue &= 0x1f;
  957. bits -= 5;
  958. bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue);
  959. }
  960. while (bits >= 0) {
  961. /* Read five bits from |bits-4| through |bits|, inclusive. */
  962. int first_bit = bits - 4;
  963. wvalue = *(const uint16_t *) (p_bytes + (first_bit >> 3));
  964. wvalue >>= first_bit & 7;
  965. wvalue &= 0x1f;
  966. bits -= 5;
  967. bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue);
  968. }
  969. }
  970. ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np2, n0, top);
  971. tmp.top = top;
  972. bn_correct_top(&tmp);
  973. if (ret) {
  974. if (!BN_copy(rr, &tmp))
  975. ret = 0;
  976. goto err; /* non-zero ret means it's not error */
  977. }
  978. } else
  979. #endif
  980. {
  981. if (!copy_to_prebuf(&tmp, top, powerbuf, 0, numPowers))
  982. goto err;
  983. if (!copy_to_prebuf(&am, top, powerbuf, 1, numPowers))
  984. goto err;
  985. /* If the window size is greater than 1, then calculate
  986. * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
  987. * (even powers could instead be computed as (a^(i/2))^2
  988. * to use the slight performance advantage of sqr over mul).
  989. */
  990. if (window > 1) {
  991. if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
  992. goto err;
  993. if (!copy_to_prebuf(&tmp, top, powerbuf, 2, numPowers))
  994. goto err;
  995. for (i = 3; i < numPowers; i++) {
  996. /* Calculate a^i = a^(i-1) * a */
  997. if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx))
  998. goto err;
  999. if (!copy_to_prebuf(&tmp, top, powerbuf, i, numPowers))
  1000. goto err;
  1001. }
  1002. }
  1003. bits--;
  1004. for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
  1005. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  1006. if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, numPowers))
  1007. goto err;
  1008. /* Scan the exponent one window at a time starting from the most
  1009. * significant bits.
  1010. */
  1011. while (bits >= 0) {
  1012. wvalue = 0; /* The 'value' of the window */
  1013. /* Scan the window, squaring the result as we go */
  1014. for (i = 0; i < window; i++, bits--) {
  1015. if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx))
  1016. goto err;
  1017. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  1018. }
  1019. /* Fetch the appropriate pre-computed value from the pre-buf */
  1020. if (!copy_from_prebuf(&am, top, powerbuf, wvalue, numPowers))
  1021. goto err;
  1022. /* Multiply the result into the intermediate result */
  1023. if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
  1024. goto err;
  1025. }
  1026. }
  1027. /* Convert the final result from montgomery to standard format */
  1028. if (!BN_from_montgomery(rr, &tmp, mont, ctx))
  1029. goto err;
  1030. ret = 1;
  1031. err:
  1032. if ((in_mont == NULL) && (mont != NULL))
  1033. BN_MONT_CTX_free(mont);
  1034. if (powerbuf != NULL) {
  1035. OPENSSL_cleanse(powerbuf, powerbufLen);
  1036. if (powerbufFree)
  1037. OPENSSL_free(powerbufFree);
  1038. }
  1039. BN_CTX_end(ctx);
  1040. return (ret);
  1041. }
  1042. int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
  1043. const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) {
  1044. BN_MONT_CTX *mont = NULL;
  1045. int b, bits, ret = 0;
  1046. int r_is_one;
  1047. BN_ULONG w, next_w;
  1048. BIGNUM *d, *r, *t;
  1049. BIGNUM *swap_tmp;
  1050. #define BN_MOD_MUL_WORD(r, w, m) \
  1051. (BN_mul_word(r, (w)) && \
  1052. (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
  1053. (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
  1054. /* BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is
  1055. * probably more overhead than always using BN_mod (which uses BN_copy if a
  1056. * similar test returns true). We can use BN_mod and do not need BN_nnmod
  1057. * because our accumulator is never negative (the result of BN_mod does not
  1058. * depend on the sign of the modulus). */
  1059. #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
  1060. (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
  1061. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  1062. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  1063. OPENSSL_PUT_ERROR(BN, BN_mod_exp_mont_word,
  1064. ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  1065. return 0;
  1066. }
  1067. if (!BN_is_odd(m)) {
  1068. OPENSSL_PUT_ERROR(BN, BN_mod_exp_mont_word, BN_R_CALLED_WITH_EVEN_MODULUS);
  1069. return 0;
  1070. }
  1071. if (m->top == 1) {
  1072. a %= m->d[0]; /* make sure that 'a' is reduced */
  1073. }
  1074. bits = BN_num_bits(p);
  1075. if (bits == 0) {
  1076. /* x**0 mod 1 is still zero. */
  1077. if (BN_is_one(m)) {
  1078. ret = 1;
  1079. BN_zero(rr);
  1080. } else {
  1081. ret = BN_one(rr);
  1082. }
  1083. return ret;
  1084. }
  1085. if (a == 0) {
  1086. BN_zero(rr);
  1087. ret = 1;
  1088. return ret;
  1089. }
  1090. BN_CTX_start(ctx);
  1091. d = BN_CTX_get(ctx);
  1092. r = BN_CTX_get(ctx);
  1093. t = BN_CTX_get(ctx);
  1094. if (d == NULL || r == NULL || t == NULL) {
  1095. goto err;
  1096. }
  1097. if (in_mont != NULL)
  1098. mont = in_mont;
  1099. else {
  1100. if ((mont = BN_MONT_CTX_new()) == NULL) {
  1101. goto err;
  1102. }
  1103. if (!BN_MONT_CTX_set(mont, m, ctx)) {
  1104. goto err;
  1105. }
  1106. }
  1107. r_is_one = 1; /* except for Montgomery factor */
  1108. /* bits-1 >= 0 */
  1109. /* The result is accumulated in the product r*w. */
  1110. w = a; /* bit 'bits-1' of 'p' is always set */
  1111. for (b = bits - 2; b >= 0; b--) {
  1112. /* First, square r*w. */
  1113. next_w = w * w;
  1114. if ((next_w / w) != w) {
  1115. /* overflow */
  1116. if (r_is_one) {
  1117. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
  1118. goto err;
  1119. }
  1120. r_is_one = 0;
  1121. } else {
  1122. if (!BN_MOD_MUL_WORD(r, w, m)) {
  1123. goto err;
  1124. }
  1125. }
  1126. next_w = 1;
  1127. }
  1128. w = next_w;
  1129. if (!r_is_one) {
  1130. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  1131. goto err;
  1132. }
  1133. }
  1134. /* Second, multiply r*w by 'a' if exponent bit is set. */
  1135. if (BN_is_bit_set(p, b)) {
  1136. next_w = w * a;
  1137. if ((next_w / a) != w) {
  1138. /* overflow */
  1139. if (r_is_one) {
  1140. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
  1141. goto err;
  1142. }
  1143. r_is_one = 0;
  1144. } else {
  1145. if (!BN_MOD_MUL_WORD(r, w, m)) {
  1146. goto err;
  1147. }
  1148. }
  1149. next_w = a;
  1150. }
  1151. w = next_w;
  1152. }
  1153. }
  1154. /* Finally, set r:=r*w. */
  1155. if (w != 1) {
  1156. if (r_is_one) {
  1157. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
  1158. goto err;
  1159. }
  1160. r_is_one = 0;
  1161. } else {
  1162. if (!BN_MOD_MUL_WORD(r, w, m)) {
  1163. goto err;
  1164. }
  1165. }
  1166. }
  1167. if (r_is_one) {
  1168. /* can happen only if a == 1*/
  1169. if (!BN_one(rr)) {
  1170. goto err;
  1171. }
  1172. } else {
  1173. if (!BN_from_montgomery(rr, r, mont, ctx)) {
  1174. goto err;
  1175. }
  1176. }
  1177. ret = 1;
  1178. err:
  1179. if (in_mont == NULL && mont != NULL) {
  1180. BN_MONT_CTX_free(mont);
  1181. }
  1182. BN_CTX_end(ctx);
  1183. return ret;
  1184. }
  1185. #define TABLE_SIZE 32
  1186. int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
  1187. const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
  1188. BN_CTX *ctx, BN_MONT_CTX *in_mont) {
  1189. int i, j, bits, b, bits1, bits2, ret = 0, wpos1, wpos2, window1, window2,
  1190. wvalue1, wvalue2;
  1191. int r_is_one = 1;
  1192. BIGNUM *d, *r;
  1193. const BIGNUM *a_mod_m;
  1194. /* Tables of variables obtained from 'ctx' */
  1195. BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE];
  1196. BN_MONT_CTX *mont = NULL;
  1197. if (!(m->d[0] & 1)) {
  1198. OPENSSL_PUT_ERROR(BN, BN_mod_exp2_mont, BN_R_CALLED_WITH_EVEN_MODULUS);
  1199. return 0;
  1200. }
  1201. bits1 = BN_num_bits(p1);
  1202. bits2 = BN_num_bits(p2);
  1203. if (bits1 == 0 && bits2 == 0) {
  1204. ret = BN_one(rr);
  1205. return ret;
  1206. }
  1207. bits = (bits1 > bits2) ? bits1 : bits2;
  1208. BN_CTX_start(ctx);
  1209. d = BN_CTX_get(ctx);
  1210. r = BN_CTX_get(ctx);
  1211. val1[0] = BN_CTX_get(ctx);
  1212. val2[0] = BN_CTX_get(ctx);
  1213. if (!d || !r || !val1[0] || !val2[0]) {
  1214. goto err;
  1215. }
  1216. if (in_mont != NULL) {
  1217. mont = in_mont;
  1218. } else {
  1219. mont = BN_MONT_CTX_new();
  1220. if (mont == NULL) {
  1221. goto err;
  1222. }
  1223. if (!BN_MONT_CTX_set(mont, m, ctx)) {
  1224. goto err;
  1225. }
  1226. }
  1227. window1 = BN_window_bits_for_exponent_size(bits1);
  1228. window2 = BN_window_bits_for_exponent_size(bits2);
  1229. /* Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 ..
  1230. * 2^(window1-1) */
  1231. if (a1->neg || BN_ucmp(a1, m) >= 0) {
  1232. if (!BN_mod(val1[0], a1, m, ctx)) {
  1233. goto err;
  1234. }
  1235. a_mod_m = val1[0];
  1236. } else {
  1237. a_mod_m = a1;
  1238. }
  1239. if (BN_is_zero(a_mod_m)) {
  1240. BN_zero(rr);
  1241. ret = 1;
  1242. goto err;
  1243. }
  1244. if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx)) {
  1245. goto err;
  1246. }
  1247. if (window1 > 1) {
  1248. if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx)) {
  1249. goto err;
  1250. }
  1251. j = 1 << (window1 - 1);
  1252. for (i = 1; i < j; i++) {
  1253. if (((val1[i] = BN_CTX_get(ctx)) == NULL) ||
  1254. !BN_mod_mul_montgomery(val1[i], val1[i - 1], d, mont, ctx)) {
  1255. goto err;
  1256. }
  1257. }
  1258. }
  1259. /* Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 ..
  1260. * 2^(window2-1) */
  1261. if (a2->neg || BN_ucmp(a2, m) >= 0) {
  1262. if (!BN_mod(val2[0], a2, m, ctx)) {
  1263. goto err;
  1264. }
  1265. a_mod_m = val2[0];
  1266. } else {
  1267. a_mod_m = a2;
  1268. }
  1269. if (BN_is_zero(a_mod_m)) {
  1270. BN_zero(rr);
  1271. ret = 1;
  1272. goto err;
  1273. }
  1274. if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx)) {
  1275. goto err;
  1276. }
  1277. if (window2 > 1) {
  1278. if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx)) {
  1279. goto err;
  1280. }
  1281. j = 1 << (window2 - 1);
  1282. for (i = 1; i < j; i++) {
  1283. if (((val2[i] = BN_CTX_get(ctx)) == NULL) ||
  1284. !BN_mod_mul_montgomery(val2[i], val2[i - 1], d, mont, ctx)) {
  1285. goto err;
  1286. }
  1287. }
  1288. }
  1289. /* Now compute the power product, using independent windows. */
  1290. r_is_one = 1;
  1291. wvalue1 = 0; /* The 'value' of the first window */
  1292. wvalue2 = 0; /* The 'value' of the second window */
  1293. wpos1 = 0; /* If wvalue1 > 0, the bottom bit of the first window */
  1294. wpos2 = 0; /* If wvalue2 > 0, the bottom bit of the second window */
  1295. if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) {
  1296. goto err;
  1297. }
  1298. for (b = bits - 1; b >= 0; b--) {
  1299. if (!r_is_one) {
  1300. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  1301. goto err;
  1302. }
  1303. }
  1304. if (!wvalue1 && BN_is_bit_set(p1, b)) {
  1305. /* consider bits b-window1+1 .. b for this window */
  1306. i = b - window1 + 1;
  1307. while (!BN_is_bit_set(p1, i)) /* works for i<0 */
  1308. i++;
  1309. wpos1 = i;
  1310. wvalue1 = 1;
  1311. for (i = b - 1; i >= wpos1; i--) {
  1312. wvalue1 <<= 1;
  1313. if (BN_is_bit_set(p1, i))
  1314. wvalue1++;
  1315. }
  1316. }
  1317. if (!wvalue2 && BN_is_bit_set(p2, b)) {
  1318. /* consider bits b-window2+1 .. b for this window */
  1319. i = b - window2 + 1;
  1320. while (!BN_is_bit_set(p2, i))
  1321. i++;
  1322. wpos2 = i;
  1323. wvalue2 = 1;
  1324. for (i = b - 1; i >= wpos2; i--) {
  1325. wvalue2 <<= 1;
  1326. if (BN_is_bit_set(p2, i))
  1327. wvalue2++;
  1328. }
  1329. }
  1330. if (wvalue1 && b == wpos1) {
  1331. /* wvalue1 is odd and < 2^window1 */
  1332. if (!BN_mod_mul_montgomery(r, r, val1[wvalue1 >> 1], mont, ctx)) {
  1333. goto err;
  1334. }
  1335. wvalue1 = 0;
  1336. r_is_one = 0;
  1337. }
  1338. if (wvalue2 && b == wpos2) {
  1339. /* wvalue2 is odd and < 2^window2 */
  1340. if (!BN_mod_mul_montgomery(r, r, val2[wvalue2 >> 1], mont, ctx)) {
  1341. goto err;
  1342. }
  1343. wvalue2 = 0;
  1344. r_is_one = 0;
  1345. }
  1346. }
  1347. if (!BN_from_montgomery(rr, r, mont, ctx)) {
  1348. goto err;
  1349. }
  1350. ret = 1;
  1351. err:
  1352. if (in_mont == NULL && mont != NULL) {
  1353. BN_MONT_CTX_free(mont);
  1354. }
  1355. BN_CTX_end(ctx);
  1356. return ret;
  1357. }