Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.
 
 
 
 
 
 

1536 rader
42 KiB

  1. /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  2. * All rights reserved.
  3. *
  4. * This package is an SSL implementation written
  5. * by Eric Young (eay@cryptsoft.com).
  6. * The implementation was written so as to conform with Netscapes SSL.
  7. *
  8. * This library is free for commercial and non-commercial use as long as
  9. * the following conditions are aheared to. The following conditions
  10. * apply to all code found in this distribution, be it the RC4, RSA,
  11. * lhash, DES, etc., code; not just the SSL code. The SSL documentation
  12. * included with this distribution is covered by the same copyright terms
  13. * except that the holder is Tim Hudson (tjh@cryptsoft.com).
  14. *
  15. * Copyright remains Eric Young's, and as such any Copyright notices in
  16. * the code are not to be removed.
  17. * If this package is used in a product, Eric Young should be given attribution
  18. * as the author of the parts of the library used.
  19. * This can be in the form of a textual message at program startup or
  20. * in documentation (online or textual) provided with the package.
  21. *
  22. * Redistribution and use in source and binary forms, with or without
  23. * modification, are permitted provided that the following conditions
  24. * are met:
  25. * 1. Redistributions of source code must retain the copyright
  26. * notice, this list of conditions and the following disclaimer.
  27. * 2. Redistributions in binary form must reproduce the above copyright
  28. * notice, this list of conditions and the following disclaimer in the
  29. * documentation and/or other materials provided with the distribution.
  30. * 3. All advertising materials mentioning features or use of this software
  31. * must display the following acknowledgement:
  32. * "This product includes cryptographic software written by
  33. * Eric Young (eay@cryptsoft.com)"
  34. * The word 'cryptographic' can be left out if the rouines from the library
  35. * being used are not cryptographic related :-).
  36. * 4. If you include any Windows specific code (or a derivative thereof) from
  37. * the apps directory (application code) you must include an acknowledgement:
  38. * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
  39. *
  40. * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  41. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  42. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  43. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  44. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  45. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  46. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  47. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  48. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  49. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  50. * SUCH DAMAGE.
  51. *
  52. * The licence and distribution terms for any publically available version or
  53. * derivative of this code cannot be changed. i.e. this code cannot simply be
  54. * copied and put under another distribution licence
  55. * [including the GNU Public Licence.]
  56. */
  57. /* ====================================================================
  58. * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
  59. *
  60. * Redistribution and use in source and binary forms, with or without
  61. * modification, are permitted provided that the following conditions
  62. * are met:
  63. *
  64. * 1. Redistributions of source code must retain the above copyright
  65. * notice, this list of conditions and the following disclaimer.
  66. *
  67. * 2. Redistributions in binary form must reproduce the above copyright
  68. * notice, this list of conditions and the following disclaimer in
  69. * the documentation and/or other materials provided with the
  70. * distribution.
  71. *
  72. * 3. All advertising materials mentioning features or use of this
  73. * software must display the following acknowledgment:
  74. * "This product includes software developed by the OpenSSL Project
  75. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  76. *
  77. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  78. * endorse or promote products derived from this software without
  79. * prior written permission. For written permission, please contact
  80. * openssl-core@openssl.org.
  81. *
  82. * 5. Products derived from this software may not be called "OpenSSL"
  83. * nor may "OpenSSL" appear in their names without prior written
  84. * permission of the OpenSSL Project.
  85. *
  86. * 6. Redistributions of any form whatsoever must retain the following
  87. * acknowledgment:
  88. * "This product includes software developed by the OpenSSL Project
  89. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  90. *
  91. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  92. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  93. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  94. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  95. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  96. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  97. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  98. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  99. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  100. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  101. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  102. * OF THE POSSIBILITY OF SUCH DAMAGE.
  103. * ====================================================================
  104. *
  105. * This product includes cryptographic software written by Eric Young
  106. * (eay@cryptsoft.com). This product includes software written by Tim
  107. * Hudson (tjh@cryptsoft.com). */
  108. #include <openssl/bn.h>
  109. #include <assert.h>
  110. #include <openssl/cpu.h>
  111. #include <openssl/err.h>
  112. #include <openssl/mem.h>
  113. #include "internal.h"
  114. #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
  115. #define OPENSSL_BN_ASM_MONT5
  116. #define RSAZ_ENABLED
  117. #include "rsaz_exp.h"
  118. #endif
  119. int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
  120. int i, bits, ret = 0;
  121. BIGNUM *v, *rr;
  122. if ((p->flags & BN_FLG_CONSTTIME) != 0) {
  123. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  124. OPENSSL_PUT_ERROR(BN, BN_exp, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  125. return 0;
  126. }
  127. BN_CTX_start(ctx);
  128. if (r == a || r == p) {
  129. rr = BN_CTX_get(ctx);
  130. } else {
  131. rr = r;
  132. }
  133. v = BN_CTX_get(ctx);
  134. if (rr == NULL || v == NULL) {
  135. goto err;
  136. }
  137. if (BN_copy(v, a) == NULL) {
  138. goto err;
  139. }
  140. bits = BN_num_bits(p);
  141. if (BN_is_odd(p)) {
  142. if (BN_copy(rr, a) == NULL) {
  143. goto err;
  144. }
  145. } else {
  146. if (!BN_one(rr)) {
  147. goto err;
  148. }
  149. }
  150. for (i = 1; i < bits; i++) {
  151. if (!BN_sqr(v, v, ctx)) {
  152. goto err;
  153. }
  154. if (BN_is_bit_set(p, i)) {
  155. if (!BN_mul(rr, rr, v, ctx)) {
  156. goto err;
  157. }
  158. }
  159. }
  160. ret = 1;
  161. err:
  162. if (r != rr) {
  163. BN_copy(r, rr);
  164. }
  165. BN_CTX_end(ctx);
  166. return ret;
  167. }
  168. /* maximum precomputation table size for *variable* sliding windows */
  169. #define TABLE_SIZE 32
  170. typedef struct bn_recp_ctx_st {
  171. BIGNUM N; /* the divisor */
  172. BIGNUM Nr; /* the reciprocal */
  173. int num_bits;
  174. int shift;
  175. int flags;
  176. } BN_RECP_CTX;
  177. static void BN_RECP_CTX_init(BN_RECP_CTX *recp) {
  178. BN_init(&recp->N);
  179. BN_init(&recp->Nr);
  180. recp->num_bits = 0;
  181. recp->flags = 0;
  182. }
  183. static void BN_RECP_CTX_free(BN_RECP_CTX *recp) {
  184. if (recp == NULL) {
  185. return;
  186. }
  187. BN_free(&recp->N);
  188. BN_free(&recp->Nr);
  189. }
  190. static int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) {
  191. if (!BN_copy(&(recp->N), d)) {
  192. return 0;
  193. }
  194. BN_zero(&recp->Nr);
  195. recp->num_bits = BN_num_bits(d);
  196. recp->shift = 0;
  197. return 1;
  198. }
  199. /* len is the expected size of the result We actually calculate with an extra
  200. * word of precision, so we can do faster division if the remainder is not
  201. * required.
  202. * r := 2^len / m */
  203. static int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) {
  204. int ret = -1;
  205. BIGNUM *t;
  206. BN_CTX_start(ctx);
  207. t = BN_CTX_get(ctx);
  208. if (t == NULL) {
  209. goto err;
  210. }
  211. if (!BN_set_bit(t, len)) {
  212. goto err;
  213. }
  214. if (!BN_div(r, NULL, t, m, ctx)) {
  215. goto err;
  216. }
  217. ret = len;
  218. err:
  219. BN_CTX_end(ctx);
  220. return ret;
  221. }
  222. static int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
  223. BN_RECP_CTX *recp, BN_CTX *ctx) {
  224. int i, j, ret = 0;
  225. BIGNUM *a, *b, *d, *r;
  226. BN_CTX_start(ctx);
  227. a = BN_CTX_get(ctx);
  228. b = BN_CTX_get(ctx);
  229. if (dv != NULL) {
  230. d = dv;
  231. } else {
  232. d = BN_CTX_get(ctx);
  233. }
  234. if (rem != NULL) {
  235. r = rem;
  236. } else {
  237. r = BN_CTX_get(ctx);
  238. }
  239. if (a == NULL || b == NULL || d == NULL || r == NULL) {
  240. goto err;
  241. }
  242. if (BN_ucmp(m, &(recp->N)) < 0) {
  243. BN_zero(d);
  244. if (!BN_copy(r, m)) {
  245. return 0;
  246. }
  247. BN_CTX_end(ctx);
  248. return 1;
  249. }
  250. /* We want the remainder
  251. * Given input of ABCDEF / ab
  252. * we need multiply ABCDEF by 3 digests of the reciprocal of ab */
  253. /* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */
  254. i = BN_num_bits(m);
  255. j = recp->num_bits << 1;
  256. if (j > i) {
  257. i = j;
  258. }
  259. /* Nr := round(2^i / N) */
  260. if (i != recp->shift) {
  261. recp->shift =
  262. BN_reciprocal(&(recp->Nr), &(recp->N), i,
  263. ctx); /* BN_reciprocal returns i, or -1 for an error */
  264. }
  265. if (recp->shift == -1) {
  266. goto err;
  267. }
  268. /* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i -
  269. * BN_num_bits(N)))|
  270. * = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i -
  271. * BN_num_bits(N)))|
  272. * <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
  273. * = |m/N| */
  274. if (!BN_rshift(a, m, recp->num_bits)) {
  275. goto err;
  276. }
  277. if (!BN_mul(b, a, &(recp->Nr), ctx)) {
  278. goto err;
  279. }
  280. if (!BN_rshift(d, b, i - recp->num_bits)) {
  281. goto err;
  282. }
  283. d->neg = 0;
  284. if (!BN_mul(b, &(recp->N), d, ctx)) {
  285. goto err;
  286. }
  287. if (!BN_usub(r, m, b)) {
  288. goto err;
  289. }
  290. r->neg = 0;
  291. j = 0;
  292. while (BN_ucmp(r, &(recp->N)) >= 0) {
  293. if (j++ > 2) {
  294. OPENSSL_PUT_ERROR(BN, BN_div_recp, BN_R_BAD_RECIPROCAL);
  295. goto err;
  296. }
  297. if (!BN_usub(r, r, &(recp->N))) {
  298. goto err;
  299. }
  300. if (!BN_add_word(d, 1)) {
  301. goto err;
  302. }
  303. }
  304. r->neg = BN_is_zero(r) ? 0 : m->neg;
  305. d->neg = m->neg ^ recp->N.neg;
  306. ret = 1;
  307. err:
  308. BN_CTX_end(ctx);
  309. return ret;
  310. }
  311. static int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
  312. BN_RECP_CTX *recp, BN_CTX *ctx) {
  313. int ret = 0;
  314. BIGNUM *a;
  315. const BIGNUM *ca;
  316. BN_CTX_start(ctx);
  317. a = BN_CTX_get(ctx);
  318. if (a == NULL) {
  319. goto err;
  320. }
  321. if (y != NULL) {
  322. if (x == y) {
  323. if (!BN_sqr(a, x, ctx)) {
  324. goto err;
  325. }
  326. } else {
  327. if (!BN_mul(a, x, y, ctx)) {
  328. goto err;
  329. }
  330. }
  331. ca = a;
  332. } else {
  333. ca = x; /* Just do the mod */
  334. }
  335. ret = BN_div_recp(NULL, r, ca, recp, ctx);
  336. err:
  337. BN_CTX_end(ctx);
  338. return ret;
  339. }
  340. /* BN_window_bits_for_exponent_size -- macro for sliding window mod_exp
  341. * functions
  342. *
  343. * For window size 'w' (w >= 2) and a random 'b' bits exponent, the number of
  344. * multiplications is a constant plus on average
  345. *
  346. * 2^(w-1) + (b-w)/(w+1);
  347. *
  348. * here 2^(w-1) is for precomputing the table (we actually need entries only
  349. * for windows that have the lowest bit set), and (b-w)/(w+1) is an
  350. * approximation for the expected number of w-bit windows, not counting the
  351. * first one.
  352. *
  353. * Thus we should use
  354. *
  355. * w >= 6 if b > 671
  356. * w = 5 if 671 > b > 239
  357. * w = 4 if 239 > b > 79
  358. * w = 3 if 79 > b > 23
  359. * w <= 2 if 23 > b
  360. *
  361. * (with draws in between). Very small exponents are often selected
  362. * with low Hamming weight, so we use w = 1 for b <= 23. */
  363. #define BN_window_bits_for_exponent_size(b) \
  364. ((b) > 671 ? 6 : \
  365. (b) > 239 ? 5 : \
  366. (b) > 79 ? 4 : \
  367. (b) > 23 ? 3 : 1)
  368. static int mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
  369. const BIGNUM *m, BN_CTX *ctx) {
  370. int i, j, bits, ret = 0, wstart, wend, window, wvalue;
  371. int start = 1;
  372. BIGNUM *aa;
  373. /* Table of variables obtained from 'ctx' */
  374. BIGNUM *val[TABLE_SIZE];
  375. BN_RECP_CTX recp;
  376. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  377. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  378. OPENSSL_PUT_ERROR(BN, mod_exp_recp, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  379. return 0;
  380. }
  381. bits = BN_num_bits(p);
  382. if (bits == 0) {
  383. ret = BN_one(r);
  384. return ret;
  385. }
  386. BN_CTX_start(ctx);
  387. aa = BN_CTX_get(ctx);
  388. val[0] = BN_CTX_get(ctx);
  389. if (!aa || !val[0]) {
  390. goto err;
  391. }
  392. BN_RECP_CTX_init(&recp);
  393. if (m->neg) {
  394. /* ignore sign of 'm' */
  395. if (!BN_copy(aa, m)) {
  396. goto err;
  397. }
  398. aa->neg = 0;
  399. if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) {
  400. goto err;
  401. }
  402. } else {
  403. if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) {
  404. goto err;
  405. }
  406. }
  407. if (!BN_nnmod(val[0], a, m, ctx)) {
  408. goto err; /* 1 */
  409. }
  410. if (BN_is_zero(val[0])) {
  411. BN_zero(r);
  412. ret = 1;
  413. goto err;
  414. }
  415. window = BN_window_bits_for_exponent_size(bits);
  416. if (window > 1) {
  417. if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) {
  418. goto err; /* 2 */
  419. }
  420. j = 1 << (window - 1);
  421. for (i = 1; i < j; i++) {
  422. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  423. !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) {
  424. goto err;
  425. }
  426. }
  427. }
  428. start = 1; /* This is used to avoid multiplication etc
  429. * when there is only the value '1' in the
  430. * buffer. */
  431. wvalue = 0; /* The 'value' of the window */
  432. wstart = bits - 1; /* The top bit of the window */
  433. wend = 0; /* The bottom bit of the window */
  434. if (!BN_one(r)) {
  435. goto err;
  436. }
  437. for (;;) {
  438. if (BN_is_bit_set(p, wstart) == 0) {
  439. if (!start) {
  440. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
  441. goto err;
  442. }
  443. }
  444. if (wstart == 0) {
  445. break;
  446. }
  447. wstart--;
  448. continue;
  449. }
  450. /* We now have wstart on a 'set' bit, we now need to work out
  451. * how bit a window to do. To do this we need to scan
  452. * forward until the last set bit before the end of the
  453. * window */
  454. wvalue = 1;
  455. wend = 0;
  456. for (i = 1; i < window; i++) {
  457. if (wstart - i < 0) {
  458. break;
  459. }
  460. if (BN_is_bit_set(p, wstart - i)) {
  461. wvalue <<= (i - wend);
  462. wvalue |= 1;
  463. wend = i;
  464. }
  465. }
  466. /* wend is the size of the current window */
  467. j = wend + 1;
  468. /* add the 'bytes above' */
  469. if (!start) {
  470. for (i = 0; i < j; i++) {
  471. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
  472. goto err;
  473. }
  474. }
  475. }
  476. /* wvalue will be an odd number < 2^window */
  477. if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) {
  478. goto err;
  479. }
  480. /* move the 'window' down further */
  481. wstart -= wend + 1;
  482. wvalue = 0;
  483. start = 0;
  484. if (wstart < 0) {
  485. break;
  486. }
  487. }
  488. ret = 1;
  489. err:
  490. BN_CTX_end(ctx);
  491. BN_RECP_CTX_free(&recp);
  492. return ret;
  493. }
  494. int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
  495. BN_CTX *ctx) {
  496. /* For even modulus m = 2^k*m_odd, it might make sense to compute
  497. * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
  498. * exponentiation for the odd part), using appropriate exponent
  499. * reductions, and combine the results using the CRT.
  500. *
  501. * For now, we use Montgomery only if the modulus is odd; otherwise,
  502. * exponentiation using the reciprocal-based quick remaindering
  503. * algorithm is used.
  504. *
  505. * (Timing obtained with expspeed.c [computations a^p mod m
  506. * where a, p, m are of the same length: 256, 512, 1024, 2048,
  507. * 4096, 8192 bits], compared to the running time of the
  508. * standard algorithm:
  509. *
  510. * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
  511. * 55 .. 77 % [UltraSparc processor, but
  512. * debug-solaris-sparcv8-gcc conf.]
  513. *
  514. * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
  515. * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
  516. *
  517. * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
  518. * at 2048 and more bits, but at 512 and 1024 bits, it was
  519. * slower even than the standard algorithm!
  520. *
  521. * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
  522. * should be obtained when the new Montgomery reduction code
  523. * has been integrated into OpenSSL.) */
  524. if (BN_is_odd(m)) {
  525. if (a->top == 1 && !a->neg && BN_get_flags(p, BN_FLG_CONSTTIME) == 0) {
  526. BN_ULONG A = a->d[0];
  527. return BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
  528. }
  529. return BN_mod_exp_mont(r, a, p, m, ctx, NULL);
  530. }
  531. return mod_exp_recp(r, a, p, m, ctx);
  532. }
  533. int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  534. const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) {
  535. int i, j, bits, ret = 0, wstart, wend, window, wvalue;
  536. int start = 1;
  537. BIGNUM *d, *r;
  538. const BIGNUM *aa;
  539. /* Table of variables obtained from 'ctx' */
  540. BIGNUM *val[TABLE_SIZE];
  541. BN_MONT_CTX *mont = NULL;
  542. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  543. return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
  544. }
  545. if (!BN_is_odd(m)) {
  546. OPENSSL_PUT_ERROR(BN, BN_mod_exp_mont, BN_R_CALLED_WITH_EVEN_MODULUS);
  547. return 0;
  548. }
  549. bits = BN_num_bits(p);
  550. if (bits == 0) {
  551. ret = BN_one(rr);
  552. return ret;
  553. }
  554. BN_CTX_start(ctx);
  555. d = BN_CTX_get(ctx);
  556. r = BN_CTX_get(ctx);
  557. val[0] = BN_CTX_get(ctx);
  558. if (!d || !r || !val[0]) {
  559. goto err;
  560. }
  561. /* If this is not done, things will break in the montgomery part */
  562. if (in_mont != NULL) {
  563. mont = in_mont;
  564. } else {
  565. mont = BN_MONT_CTX_new();
  566. if (mont == NULL) {
  567. goto err;
  568. }
  569. if (!BN_MONT_CTX_set(mont, m, ctx)) {
  570. goto err;
  571. }
  572. }
  573. if (a->neg || BN_ucmp(a, m) >= 0) {
  574. if (!BN_nnmod(val[0], a, m, ctx)) {
  575. goto err;
  576. }
  577. aa = val[0];
  578. } else {
  579. aa = a;
  580. }
  581. if (BN_is_zero(aa)) {
  582. BN_zero(rr);
  583. ret = 1;
  584. goto err;
  585. }
  586. if (!BN_to_montgomery(val[0], aa, mont, ctx)) {
  587. goto err; /* 1 */
  588. }
  589. window = BN_window_bits_for_exponent_size(bits);
  590. if (window > 1) {
  591. if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) {
  592. goto err; /* 2 */
  593. }
  594. j = 1 << (window - 1);
  595. for (i = 1; i < j; i++) {
  596. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  597. !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) {
  598. goto err;
  599. }
  600. }
  601. }
  602. start = 1; /* This is used to avoid multiplication etc
  603. * when there is only the value '1' in the
  604. * buffer. */
  605. wvalue = 0; /* The 'value' of the window */
  606. wstart = bits - 1; /* The top bit of the window */
  607. wend = 0; /* The bottom bit of the window */
  608. j = m->top; /* borrow j */
  609. if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  610. if (bn_wexpand(r, j) == NULL)
  611. goto err;
  612. /* 2^(top*BN_BITS2) - m */
  613. r->d[0] = (0 - m->d[0]) & BN_MASK2;
  614. for (i = 1; i < j; i++)
  615. r->d[i] = (~m->d[i]) & BN_MASK2;
  616. r->top = j;
  617. /* Upper words will be zero if the corresponding words of 'm'
  618. * were 0xfff[...], so decrement r->top accordingly. */
  619. bn_correct_top(r);
  620. } else if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) {
  621. goto err;
  622. }
  623. for (;;) {
  624. if (BN_is_bit_set(p, wstart) == 0) {
  625. if (!start) {
  626. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
  627. goto err;
  628. }
  629. if (wstart == 0) {
  630. break;
  631. }
  632. wstart--;
  633. continue;
  634. }
  635. /* We now have wstart on a 'set' bit, we now need to work out how bit a
  636. * window to do. To do this we need to scan forward until the last set bit
  637. * before the end of the window */
  638. j = wstart;
  639. wvalue = 1;
  640. wend = 0;
  641. for (i = 1; i < window; i++) {
  642. if (wstart - i < 0) {
  643. break;
  644. }
  645. if (BN_is_bit_set(p, wstart - i)) {
  646. wvalue <<= (i - wend);
  647. wvalue |= 1;
  648. wend = i;
  649. }
  650. }
  651. /* wend is the size of the current window */
  652. j = wend + 1;
  653. /* add the 'bytes above' */
  654. if (!start) {
  655. for (i = 0; i < j; i++) {
  656. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  657. goto err;
  658. }
  659. }
  660. }
  661. /* wvalue will be an odd number < 2^window */
  662. if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) {
  663. goto err;
  664. }
  665. /* move the 'window' down further */
  666. wstart -= wend + 1;
  667. wvalue = 0;
  668. start = 0;
  669. if (wstart < 0) {
  670. break;
  671. }
  672. }
  673. if (!BN_from_montgomery(rr, r, mont, ctx)) {
  674. goto err;
  675. }
  676. ret = 1;
  677. err:
  678. if (in_mont == NULL && mont != NULL) {
  679. BN_MONT_CTX_free(mont);
  680. }
  681. BN_CTX_end(ctx);
  682. return ret;
  683. }
  684. /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
  685. * layout so that accessing any of these table values shows the same access
  686. * pattern as far as cache lines are concerned. The following functions are
  687. * used to transfer a BIGNUM from/to that table. */
  688. static int copy_to_prebuf(const BIGNUM *b, int top, unsigned char *buf, int idx,
  689. int width) {
  690. size_t i, j;
  691. if (top > b->top) {
  692. top = b->top; /* this works because 'buf' is explicitly zeroed */
  693. }
  694. for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
  695. buf[j] = ((unsigned char *)b->d)[i];
  696. }
  697. return 1;
  698. }
  699. static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx,
  700. int width) {
  701. size_t i, j;
  702. if (bn_wexpand(b, top) == NULL) {
  703. return 0;
  704. }
  705. for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
  706. ((unsigned char *)b->d)[i] = buf[j];
  707. }
  708. b->top = top;
  709. bn_correct_top(b);
  710. return 1;
  711. }
  712. /* BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache
  713. * line width of the target processor is at least the following value. */
  714. #define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH (64)
  715. #define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK \
  716. (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
  717. /* Window sizes optimized for fixed window size modular exponentiation
  718. * algorithm (BN_mod_exp_mont_consttime).
  719. *
  720. * To achieve the security goals of BN_mode_exp_mont_consttime, the maximum
  721. * size of the window must not exceed
  722. * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
  723. *
  724. * Window size thresholds are defined for cache line sizes of 32 and 64, cache
  725. * line sizes where log_2(32)=5 and log_2(64)=6 respectively. A window size of
  726. * 7 should only be used on processors that have a 128 byte or greater cache
  727. * line size. */
  728. #if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
  729. #define BN_window_bits_for_ctime_exponent_size(b) \
  730. ((b) > 937 ? 6 : (b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
  731. #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
  732. #elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
  733. #define BN_window_bits_for_ctime_exponent_size(b) \
  734. ((b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
  735. #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
  736. #endif
  737. /* Given a pointer value, compute the next address that is a cache line
  738. * multiple. */
  739. #define MOD_EXP_CTIME_ALIGN(x_) \
  740. ((unsigned char *)(x_) + \
  741. (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - \
  742. (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
  743. /* This variant of BN_mod_exp_mont() uses fixed windows and the special
  744. * precomputation memory layout to limit data-dependency to a minimum
  745. * to protect secret exponents (cf. the hyper-threading timing attacks
  746. * pointed out by Colin Percival,
  747. * http://www.daemonology.net/hyperthreading-considered-harmful/)
  748. */
  749. int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  750. const BIGNUM *m, BN_CTX *ctx,
  751. BN_MONT_CTX *in_mont) {
  752. int i, bits, ret = 0, window, wvalue;
  753. int top;
  754. BN_MONT_CTX *mont = NULL;
  755. int numPowers;
  756. unsigned char *powerbufFree = NULL;
  757. int powerbufLen = 0;
  758. unsigned char *powerbuf = NULL;
  759. BIGNUM tmp, am;
  760. top = m->top;
  761. if (!(m->d[0] & 1)) {
  762. OPENSSL_PUT_ERROR(BN, BN_mod_exp_mont_consttime,
  763. BN_R_CALLED_WITH_EVEN_MODULUS);
  764. return 0;
  765. }
  766. bits = BN_num_bits(p);
  767. if (bits == 0) {
  768. ret = BN_one(rr);
  769. return ret;
  770. }
  771. BN_CTX_start(ctx);
  772. /* Allocate a montgomery context if it was not supplied by the caller.
  773. * If this is not done, things will break in the montgomery part.
  774. */
  775. if (in_mont != NULL)
  776. mont = in_mont;
  777. else {
  778. if ((mont = BN_MONT_CTX_new()) == NULL)
  779. goto err;
  780. if (!BN_MONT_CTX_set(mont, m, ctx))
  781. goto err;
  782. }
  783. #ifdef RSAZ_ENABLED
  784. /* If the size of the operands allow it, perform the optimized
  785. * RSAZ exponentiation. For further information see
  786. * crypto/bn/rsaz_exp.c and accompanying assembly modules. */
  787. if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) &&
  788. rsaz_avx2_eligible()) {
  789. if (NULL == bn_wexpand(rr, 16))
  790. goto err;
  791. RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0]);
  792. rr->top = 16;
  793. rr->neg = 0;
  794. bn_correct_top(rr);
  795. ret = 1;
  796. goto err;
  797. } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
  798. if (NULL == bn_wexpand(rr, 8))
  799. goto err;
  800. RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
  801. rr->top = 8;
  802. rr->neg = 0;
  803. bn_correct_top(rr);
  804. ret = 1;
  805. goto err;
  806. }
  807. #endif
  808. /* Get the window size to use with size of p. */
  809. window = BN_window_bits_for_ctime_exponent_size(bits);
  810. #if defined(OPENSSL_BN_ASM_MONT5)
  811. if (window >= 5) {
  812. window = 5; /* ~5% improvement for RSA2048 sign, and even for RSA4096 */
  813. if ((top & 7) == 0)
  814. powerbufLen += 2 * top * sizeof(m->d[0]);
  815. }
  816. #endif
  817. (void)0;
  818. /* Allocate a buffer large enough to hold all of the pre-computed
  819. * powers of am, am itself and tmp.
  820. */
  821. numPowers = 1 << window;
  822. powerbufLen +=
  823. sizeof(m->d[0]) *
  824. (top * numPowers + ((2 * top) > numPowers ? (2 * top) : numPowers));
  825. #ifdef alloca
  826. if (powerbufLen < 3072)
  827. powerbufFree = alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
  828. else
  829. #endif
  830. if ((powerbufFree = (unsigned char *)OPENSSL_malloc(
  831. powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
  832. goto err;
  833. powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
  834. memset(powerbuf, 0, powerbufLen);
  835. #ifdef alloca
  836. if (powerbufLen < 3072)
  837. powerbufFree = NULL;
  838. #endif
  839. /* lay down tmp and am right after powers table */
  840. tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
  841. am.d = tmp.d + top;
  842. tmp.top = am.top = 0;
  843. tmp.dmax = am.dmax = top;
  844. tmp.neg = am.neg = 0;
  845. tmp.flags = am.flags = BN_FLG_STATIC_DATA;
  846. /* prepare a^0 in Montgomery domain */
  847. /* by Shay Gueron's suggestion */
  848. if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  849. /* 2^(top*BN_BITS2) - m */
  850. tmp.d[0] = (0 - m->d[0]) & BN_MASK2;
  851. for (i = 1; i < top; i++)
  852. tmp.d[i] = (~m->d[i]) & BN_MASK2;
  853. tmp.top = top;
  854. } else if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
  855. goto err;
  856. /* prepare a^1 in Montgomery domain */
  857. if (a->neg || BN_ucmp(a, m) >= 0) {
  858. if (!BN_mod(&am, a, m, ctx))
  859. goto err;
  860. if (!BN_to_montgomery(&am, &am, mont, ctx))
  861. goto err;
  862. } else if (!BN_to_montgomery(&am, a, mont, ctx))
  863. goto err;
  864. #if defined(OPENSSL_BN_ASM_MONT5)
  865. /* This optimization uses ideas from http://eprint.iacr.org/2011/239,
  866. * specifically optimization of cache-timing attack countermeasures
  867. * and pre-computation optimization. */
  868. /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
  869. * 512-bit RSA is hardly relevant, we omit it to spare size... */
  870. if (window == 5 && top > 1) {
  871. void bn_mul_mont_gather5(BN_ULONG * rp, const BN_ULONG * ap,
  872. const void * table, const BN_ULONG * np,
  873. const BN_ULONG * n0, int num, int power);
  874. void bn_scatter5(const BN_ULONG * inp, size_t num, void * table,
  875. size_t power);
  876. void bn_gather5(BN_ULONG * out, size_t num, void * table, size_t power);
  877. void bn_power5(BN_ULONG * rp, const BN_ULONG * ap, const void * table,
  878. const BN_ULONG * np, const BN_ULONG * n0, int num,
  879. int power);
  880. int bn_from_montgomery(BN_ULONG * rp, const BN_ULONG * ap,
  881. const BN_ULONG * not_used, const BN_ULONG * np,
  882. const BN_ULONG * n0, int num);
  883. BN_ULONG *np = mont->N.d, *n0 = mont->n0, *np2;
  884. /* BN_to_montgomery can contaminate words above .top
  885. * [in BN_DEBUG[_DEBUG] build]... */
  886. for (i = am.top; i < top; i++)
  887. am.d[i] = 0;
  888. for (i = tmp.top; i < top; i++)
  889. tmp.d[i] = 0;
  890. if (top & 7)
  891. np2 = np;
  892. else
  893. for (np2 = am.d + top, i = 0; i < top; i++)
  894. np2[2 * i] = np[i];
  895. bn_scatter5(tmp.d, top, powerbuf, 0);
  896. bn_scatter5(am.d, am.top, powerbuf, 1);
  897. bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
  898. bn_scatter5(tmp.d, top, powerbuf, 2);
  899. /* same as above, but uses squaring for 1/2 of operations */
  900. for (i = 4; i < 32; i *= 2) {
  901. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  902. bn_scatter5(tmp.d, top, powerbuf, i);
  903. }
  904. for (i = 3; i < 8; i += 2) {
  905. int j;
  906. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  907. bn_scatter5(tmp.d, top, powerbuf, i);
  908. for (j = 2 * i; j < 32; j *= 2) {
  909. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  910. bn_scatter5(tmp.d, top, powerbuf, j);
  911. }
  912. }
  913. for (; i < 16; i += 2) {
  914. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  915. bn_scatter5(tmp.d, top, powerbuf, i);
  916. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  917. bn_scatter5(tmp.d, top, powerbuf, 2 * i);
  918. }
  919. for (; i < 32; i += 2) {
  920. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  921. bn_scatter5(tmp.d, top, powerbuf, i);
  922. }
  923. bits--;
  924. for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
  925. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  926. bn_gather5(tmp.d, top, powerbuf, wvalue);
  927. /* At this point |bits| is 4 mod 5 and at least -1. (|bits| is the first bit
  928. * that has not been read yet.) */
  929. assert(bits >= -1 && (bits == -1 || bits % 5 == 4));
  930. /* Scan the exponent one window at a time starting from the most
  931. * significant bits.
  932. */
  933. if (top & 7) {
  934. while (bits >= 0) {
  935. for (wvalue = 0, i = 0; i < 5; i++, bits--)
  936. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  937. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  938. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  939. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  940. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  941. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  942. bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
  943. }
  944. } else {
  945. const uint8_t *p_bytes = (const uint8_t *)p->d;
  946. int max_bits = p->top * BN_BITS2;
  947. assert(bits < max_bits);
  948. /* |p = 0| has been handled as a special case, so |max_bits| is at least
  949. * one word. */
  950. assert(max_bits >= 64);
  951. /* If the first bit to be read lands in the last byte, unroll the first
  952. * iteration to avoid reading past the bounds of |p->d|. (After the first
  953. * iteration, we are guaranteed to be past the last byte.) Note |bits|
  954. * here is the top bit, inclusive. */
  955. if (bits - 4 >= max_bits - 8) {
  956. /* Read five bits from |bits-4| through |bits|, inclusive. */
  957. wvalue = p_bytes[p->top * BN_BYTES - 1];
  958. wvalue >>= (bits - 4) & 7;
  959. wvalue &= 0x1f;
  960. bits -= 5;
  961. bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue);
  962. }
  963. while (bits >= 0) {
  964. /* Read five bits from |bits-4| through |bits|, inclusive. */
  965. int first_bit = bits - 4;
  966. wvalue = *(const uint16_t *) (p_bytes + (first_bit >> 3));
  967. wvalue >>= first_bit & 7;
  968. wvalue &= 0x1f;
  969. bits -= 5;
  970. bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue);
  971. }
  972. }
  973. ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np2, n0, top);
  974. tmp.top = top;
  975. bn_correct_top(&tmp);
  976. if (ret) {
  977. if (!BN_copy(rr, &tmp))
  978. ret = 0;
  979. goto err; /* non-zero ret means it's not error */
  980. }
  981. } else
  982. #endif
  983. {
  984. if (!copy_to_prebuf(&tmp, top, powerbuf, 0, numPowers))
  985. goto err;
  986. if (!copy_to_prebuf(&am, top, powerbuf, 1, numPowers))
  987. goto err;
  988. /* If the window size is greater than 1, then calculate
  989. * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
  990. * (even powers could instead be computed as (a^(i/2))^2
  991. * to use the slight performance advantage of sqr over mul).
  992. */
  993. if (window > 1) {
  994. if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
  995. goto err;
  996. if (!copy_to_prebuf(&tmp, top, powerbuf, 2, numPowers))
  997. goto err;
  998. for (i = 3; i < numPowers; i++) {
  999. /* Calculate a^i = a^(i-1) * a */
  1000. if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx))
  1001. goto err;
  1002. if (!copy_to_prebuf(&tmp, top, powerbuf, i, numPowers))
  1003. goto err;
  1004. }
  1005. }
  1006. bits--;
  1007. for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
  1008. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  1009. if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, numPowers))
  1010. goto err;
  1011. /* Scan the exponent one window at a time starting from the most
  1012. * significant bits.
  1013. */
  1014. while (bits >= 0) {
  1015. wvalue = 0; /* The 'value' of the window */
  1016. /* Scan the window, squaring the result as we go */
  1017. for (i = 0; i < window; i++, bits--) {
  1018. if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx))
  1019. goto err;
  1020. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  1021. }
  1022. /* Fetch the appropriate pre-computed value from the pre-buf */
  1023. if (!copy_from_prebuf(&am, top, powerbuf, wvalue, numPowers))
  1024. goto err;
  1025. /* Multiply the result into the intermediate result */
  1026. if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
  1027. goto err;
  1028. }
  1029. }
  1030. /* Convert the final result from montgomery to standard format */
  1031. if (!BN_from_montgomery(rr, &tmp, mont, ctx))
  1032. goto err;
  1033. ret = 1;
  1034. err:
  1035. if ((in_mont == NULL) && (mont != NULL))
  1036. BN_MONT_CTX_free(mont);
  1037. if (powerbuf != NULL) {
  1038. OPENSSL_cleanse(powerbuf, powerbufLen);
  1039. if (powerbufFree)
  1040. OPENSSL_free(powerbufFree);
  1041. }
  1042. BN_CTX_end(ctx);
  1043. return (ret);
  1044. }
  1045. int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
  1046. const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) {
  1047. BN_MONT_CTX *mont = NULL;
  1048. int b, bits, ret = 0;
  1049. int r_is_one;
  1050. BN_ULONG w, next_w;
  1051. BIGNUM *d, *r, *t;
  1052. BIGNUM *swap_tmp;
  1053. #define BN_MOD_MUL_WORD(r, w, m) \
  1054. (BN_mul_word(r, (w)) && \
  1055. (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
  1056. (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
  1057. /* BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is
  1058. * probably more overhead than always using BN_mod (which uses BN_copy if a
  1059. * similar test returns true). We can use BN_mod and do not need BN_nnmod
  1060. * because our accumulator is never negative (the result of BN_mod does not
  1061. * depend on the sign of the modulus). */
  1062. #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
  1063. (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
  1064. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  1065. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  1066. OPENSSL_PUT_ERROR(BN, BN_mod_exp_mont_word,
  1067. ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  1068. return 0;
  1069. }
  1070. if (!BN_is_odd(m)) {
  1071. OPENSSL_PUT_ERROR(BN, BN_mod_exp_mont_word, BN_R_CALLED_WITH_EVEN_MODULUS);
  1072. return 0;
  1073. }
  1074. if (m->top == 1) {
  1075. a %= m->d[0]; /* make sure that 'a' is reduced */
  1076. }
  1077. bits = BN_num_bits(p);
  1078. if (bits == 0) {
  1079. /* x**0 mod 1 is still zero. */
  1080. if (BN_is_one(m)) {
  1081. ret = 1;
  1082. BN_zero(rr);
  1083. } else {
  1084. ret = BN_one(rr);
  1085. }
  1086. return ret;
  1087. }
  1088. if (a == 0) {
  1089. BN_zero(rr);
  1090. ret = 1;
  1091. return ret;
  1092. }
  1093. BN_CTX_start(ctx);
  1094. d = BN_CTX_get(ctx);
  1095. r = BN_CTX_get(ctx);
  1096. t = BN_CTX_get(ctx);
  1097. if (d == NULL || r == NULL || t == NULL) {
  1098. goto err;
  1099. }
  1100. if (in_mont != NULL)
  1101. mont = in_mont;
  1102. else {
  1103. if ((mont = BN_MONT_CTX_new()) == NULL) {
  1104. goto err;
  1105. }
  1106. if (!BN_MONT_CTX_set(mont, m, ctx)) {
  1107. goto err;
  1108. }
  1109. }
  1110. r_is_one = 1; /* except for Montgomery factor */
  1111. /* bits-1 >= 0 */
  1112. /* The result is accumulated in the product r*w. */
  1113. w = a; /* bit 'bits-1' of 'p' is always set */
  1114. for (b = bits - 2; b >= 0; b--) {
  1115. /* First, square r*w. */
  1116. next_w = w * w;
  1117. if ((next_w / w) != w) {
  1118. /* overflow */
  1119. if (r_is_one) {
  1120. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
  1121. goto err;
  1122. }
  1123. r_is_one = 0;
  1124. } else {
  1125. if (!BN_MOD_MUL_WORD(r, w, m)) {
  1126. goto err;
  1127. }
  1128. }
  1129. next_w = 1;
  1130. }
  1131. w = next_w;
  1132. if (!r_is_one) {
  1133. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  1134. goto err;
  1135. }
  1136. }
  1137. /* Second, multiply r*w by 'a' if exponent bit is set. */
  1138. if (BN_is_bit_set(p, b)) {
  1139. next_w = w * a;
  1140. if ((next_w / a) != w) {
  1141. /* overflow */
  1142. if (r_is_one) {
  1143. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
  1144. goto err;
  1145. }
  1146. r_is_one = 0;
  1147. } else {
  1148. if (!BN_MOD_MUL_WORD(r, w, m)) {
  1149. goto err;
  1150. }
  1151. }
  1152. next_w = a;
  1153. }
  1154. w = next_w;
  1155. }
  1156. }
  1157. /* Finally, set r:=r*w. */
  1158. if (w != 1) {
  1159. if (r_is_one) {
  1160. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
  1161. goto err;
  1162. }
  1163. r_is_one = 0;
  1164. } else {
  1165. if (!BN_MOD_MUL_WORD(r, w, m)) {
  1166. goto err;
  1167. }
  1168. }
  1169. }
  1170. if (r_is_one) {
  1171. /* can happen only if a == 1*/
  1172. if (!BN_one(rr)) {
  1173. goto err;
  1174. }
  1175. } else {
  1176. if (!BN_from_montgomery(rr, r, mont, ctx)) {
  1177. goto err;
  1178. }
  1179. }
  1180. ret = 1;
  1181. err:
  1182. if (in_mont == NULL && mont != NULL) {
  1183. BN_MONT_CTX_free(mont);
  1184. }
  1185. BN_CTX_end(ctx);
  1186. return ret;
  1187. }
  1188. #define TABLE_SIZE 32
  1189. int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
  1190. const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
  1191. BN_CTX *ctx, BN_MONT_CTX *in_mont) {
  1192. int i, j, bits, b, bits1, bits2, ret = 0, wpos1, wpos2, window1, window2,
  1193. wvalue1, wvalue2;
  1194. int r_is_one = 1;
  1195. BIGNUM *d, *r;
  1196. const BIGNUM *a_mod_m;
  1197. /* Tables of variables obtained from 'ctx' */
  1198. BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE];
  1199. BN_MONT_CTX *mont = NULL;
  1200. if (!(m->d[0] & 1)) {
  1201. OPENSSL_PUT_ERROR(BN, BN_mod_exp2_mont, BN_R_CALLED_WITH_EVEN_MODULUS);
  1202. return 0;
  1203. }
  1204. bits1 = BN_num_bits(p1);
  1205. bits2 = BN_num_bits(p2);
  1206. if (bits1 == 0 && bits2 == 0) {
  1207. ret = BN_one(rr);
  1208. return ret;
  1209. }
  1210. bits = (bits1 > bits2) ? bits1 : bits2;
  1211. BN_CTX_start(ctx);
  1212. d = BN_CTX_get(ctx);
  1213. r = BN_CTX_get(ctx);
  1214. val1[0] = BN_CTX_get(ctx);
  1215. val2[0] = BN_CTX_get(ctx);
  1216. if (!d || !r || !val1[0] || !val2[0]) {
  1217. goto err;
  1218. }
  1219. if (in_mont != NULL) {
  1220. mont = in_mont;
  1221. } else {
  1222. mont = BN_MONT_CTX_new();
  1223. if (mont == NULL) {
  1224. goto err;
  1225. }
  1226. if (!BN_MONT_CTX_set(mont, m, ctx)) {
  1227. goto err;
  1228. }
  1229. }
  1230. window1 = BN_window_bits_for_exponent_size(bits1);
  1231. window2 = BN_window_bits_for_exponent_size(bits2);
  1232. /* Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 ..
  1233. * 2^(window1-1) */
  1234. if (a1->neg || BN_ucmp(a1, m) >= 0) {
  1235. if (!BN_mod(val1[0], a1, m, ctx)) {
  1236. goto err;
  1237. }
  1238. a_mod_m = val1[0];
  1239. } else {
  1240. a_mod_m = a1;
  1241. }
  1242. if (BN_is_zero(a_mod_m)) {
  1243. BN_zero(rr);
  1244. ret = 1;
  1245. goto err;
  1246. }
  1247. if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx)) {
  1248. goto err;
  1249. }
  1250. if (window1 > 1) {
  1251. if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx)) {
  1252. goto err;
  1253. }
  1254. j = 1 << (window1 - 1);
  1255. for (i = 1; i < j; i++) {
  1256. if (((val1[i] = BN_CTX_get(ctx)) == NULL) ||
  1257. !BN_mod_mul_montgomery(val1[i], val1[i - 1], d, mont, ctx)) {
  1258. goto err;
  1259. }
  1260. }
  1261. }
  1262. /* Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 ..
  1263. * 2^(window2-1) */
  1264. if (a2->neg || BN_ucmp(a2, m) >= 0) {
  1265. if (!BN_mod(val2[0], a2, m, ctx)) {
  1266. goto err;
  1267. }
  1268. a_mod_m = val2[0];
  1269. } else {
  1270. a_mod_m = a2;
  1271. }
  1272. if (BN_is_zero(a_mod_m)) {
  1273. BN_zero(rr);
  1274. ret = 1;
  1275. goto err;
  1276. }
  1277. if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx)) {
  1278. goto err;
  1279. }
  1280. if (window2 > 1) {
  1281. if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx)) {
  1282. goto err;
  1283. }
  1284. j = 1 << (window2 - 1);
  1285. for (i = 1; i < j; i++) {
  1286. if (((val2[i] = BN_CTX_get(ctx)) == NULL) ||
  1287. !BN_mod_mul_montgomery(val2[i], val2[i - 1], d, mont, ctx)) {
  1288. goto err;
  1289. }
  1290. }
  1291. }
  1292. /* Now compute the power product, using independent windows. */
  1293. r_is_one = 1;
  1294. wvalue1 = 0; /* The 'value' of the first window */
  1295. wvalue2 = 0; /* The 'value' of the second window */
  1296. wpos1 = 0; /* If wvalue1 > 0, the bottom bit of the first window */
  1297. wpos2 = 0; /* If wvalue2 > 0, the bottom bit of the second window */
  1298. if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) {
  1299. goto err;
  1300. }
  1301. for (b = bits - 1; b >= 0; b--) {
  1302. if (!r_is_one) {
  1303. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  1304. goto err;
  1305. }
  1306. }
  1307. if (!wvalue1 && BN_is_bit_set(p1, b)) {
  1308. /* consider bits b-window1+1 .. b for this window */
  1309. i = b - window1 + 1;
  1310. while (!BN_is_bit_set(p1, i)) /* works for i<0 */
  1311. i++;
  1312. wpos1 = i;
  1313. wvalue1 = 1;
  1314. for (i = b - 1; i >= wpos1; i--) {
  1315. wvalue1 <<= 1;
  1316. if (BN_is_bit_set(p1, i))
  1317. wvalue1++;
  1318. }
  1319. }
  1320. if (!wvalue2 && BN_is_bit_set(p2, b)) {
  1321. /* consider bits b-window2+1 .. b for this window */
  1322. i = b - window2 + 1;
  1323. while (!BN_is_bit_set(p2, i))
  1324. i++;
  1325. wpos2 = i;
  1326. wvalue2 = 1;
  1327. for (i = b - 1; i >= wpos2; i--) {
  1328. wvalue2 <<= 1;
  1329. if (BN_is_bit_set(p2, i))
  1330. wvalue2++;
  1331. }
  1332. }
  1333. if (wvalue1 && b == wpos1) {
  1334. /* wvalue1 is odd and < 2^window1 */
  1335. if (!BN_mod_mul_montgomery(r, r, val1[wvalue1 >> 1], mont, ctx)) {
  1336. goto err;
  1337. }
  1338. wvalue1 = 0;
  1339. r_is_one = 0;
  1340. }
  1341. if (wvalue2 && b == wpos2) {
  1342. /* wvalue2 is odd and < 2^window2 */
  1343. if (!BN_mod_mul_montgomery(r, r, val2[wvalue2 >> 1], mont, ctx)) {
  1344. goto err;
  1345. }
  1346. wvalue2 = 0;
  1347. r_is_one = 0;
  1348. }
  1349. }
  1350. if (!BN_from_montgomery(rr, r, mont, ctx)) {
  1351. goto err;
  1352. }
  1353. ret = 1;
  1354. err:
  1355. if (in_mont == NULL && mont != NULL) {
  1356. BN_MONT_CTX_free(mont);
  1357. }
  1358. BN_CTX_end(ctx);
  1359. return ret;
  1360. }