Não pode escolher mais do que 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

exponentiation.c 41 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509
  1. /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  2. * All rights reserved.
  3. *
  4. * This package is an SSL implementation written
  5. * by Eric Young (eay@cryptsoft.com).
  6. * The implementation was written so as to conform with Netscapes SSL.
  7. *
  8. * This library is free for commercial and non-commercial use as long as
  9. * the following conditions are aheared to. The following conditions
  10. * apply to all code found in this distribution, be it the RC4, RSA,
  11. * lhash, DES, etc., code; not just the SSL code. The SSL documentation
  12. * included with this distribution is covered by the same copyright terms
  13. * except that the holder is Tim Hudson (tjh@cryptsoft.com).
  14. *
  15. * Copyright remains Eric Young's, and as such any Copyright notices in
  16. * the code are not to be removed.
  17. * If this package is used in a product, Eric Young should be given attribution
  18. * as the author of the parts of the library used.
  19. * This can be in the form of a textual message at program startup or
  20. * in documentation (online or textual) provided with the package.
  21. *
  22. * Redistribution and use in source and binary forms, with or without
  23. * modification, are permitted provided that the following conditions
  24. * are met:
  25. * 1. Redistributions of source code must retain the copyright
  26. * notice, this list of conditions and the following disclaimer.
  27. * 2. Redistributions in binary form must reproduce the above copyright
  28. * notice, this list of conditions and the following disclaimer in the
  29. * documentation and/or other materials provided with the distribution.
  30. * 3. All advertising materials mentioning features or use of this software
  31. * must display the following acknowledgement:
  32. * "This product includes cryptographic software written by
  33. * Eric Young (eay@cryptsoft.com)"
  34. * The word 'cryptographic' can be left out if the rouines from the library
  35. * being used are not cryptographic related :-).
  36. * 4. If you include any Windows specific code (or a derivative thereof) from
  37. * the apps directory (application code) you must include an acknowledgement:
  38. * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
  39. *
  40. * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  41. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  42. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  43. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  44. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  45. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  46. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  47. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  48. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  49. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  50. * SUCH DAMAGE.
  51. *
  52. * The licence and distribution terms for any publically available version or
  53. * derivative of this code cannot be changed. i.e. this code cannot simply be
  54. * copied and put under another distribution licence
  55. * [including the GNU Public Licence.]
  56. */
  57. /* ====================================================================
  58. * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
  59. *
  60. * Redistribution and use in source and binary forms, with or without
  61. * modification, are permitted provided that the following conditions
  62. * are met:
  63. *
  64. * 1. Redistributions of source code must retain the above copyright
  65. * notice, this list of conditions and the following disclaimer.
  66. *
  67. * 2. Redistributions in binary form must reproduce the above copyright
  68. * notice, this list of conditions and the following disclaimer in
  69. * the documentation and/or other materials provided with the
  70. * distribution.
  71. *
  72. * 3. All advertising materials mentioning features or use of this
  73. * software must display the following acknowledgment:
  74. * "This product includes software developed by the OpenSSL Project
  75. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  76. *
  77. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  78. * endorse or promote products derived from this software without
  79. * prior written permission. For written permission, please contact
  80. * openssl-core@openssl.org.
  81. *
  82. * 5. Products derived from this software may not be called "OpenSSL"
  83. * nor may "OpenSSL" appear in their names without prior written
  84. * permission of the OpenSSL Project.
  85. *
  86. * 6. Redistributions of any form whatsoever must retain the following
  87. * acknowledgment:
  88. * "This product includes software developed by the OpenSSL Project
  89. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  90. *
  91. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  92. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  93. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  94. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  95. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  96. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  97. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  98. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  99. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  100. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  101. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  102. * OF THE POSSIBILITY OF SUCH DAMAGE.
  103. * ====================================================================
  104. *
  105. * This product includes cryptographic software written by Eric Young
  106. * (eay@cryptsoft.com). This product includes software written by Tim
  107. * Hudson (tjh@cryptsoft.com). */
  108. #include <openssl/bn.h>
  109. #include <openssl/cpu.h>
  110. #include <openssl/err.h>
  111. #include <openssl/mem.h>
  112. #include "internal.h"
  113. #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
  114. #define OPENSSL_BN_ASM_MONT5
  115. #define RSAZ_ENABLED
  116. #include "rsaz_exp.h"
  117. #endif
  118. int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
  119. int i, bits, ret = 0;
  120. BIGNUM *v, *rr;
  121. if ((p->flags & BN_FLG_CONSTTIME) != 0) {
  122. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  123. OPENSSL_PUT_ERROR(BN, BN_exp, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  124. return 0;
  125. }
  126. BN_CTX_start(ctx);
  127. if (r == a || r == p) {
  128. rr = BN_CTX_get(ctx);
  129. } else {
  130. rr = r;
  131. }
  132. v = BN_CTX_get(ctx);
  133. if (rr == NULL || v == NULL) {
  134. goto err;
  135. }
  136. if (BN_copy(v, a) == NULL) {
  137. goto err;
  138. }
  139. bits = BN_num_bits(p);
  140. if (BN_is_odd(p)) {
  141. if (BN_copy(rr, a) == NULL) {
  142. goto err;
  143. }
  144. } else {
  145. if (!BN_one(rr)) {
  146. goto err;
  147. }
  148. }
  149. for (i = 1; i < bits; i++) {
  150. if (!BN_sqr(v, v, ctx)) {
  151. goto err;
  152. }
  153. if (BN_is_bit_set(p, i)) {
  154. if (!BN_mul(rr, rr, v, ctx)) {
  155. goto err;
  156. }
  157. }
  158. }
  159. ret = 1;
  160. err:
  161. if (r != rr) {
  162. BN_copy(r, rr);
  163. }
  164. BN_CTX_end(ctx);
  165. return ret;
  166. }
  167. /* maximum precomputation table size for *variable* sliding windows */
  168. #define TABLE_SIZE 32
  169. typedef struct bn_recp_ctx_st {
  170. BIGNUM N; /* the divisor */
  171. BIGNUM Nr; /* the reciprocal */
  172. int num_bits;
  173. int shift;
  174. int flags;
  175. } BN_RECP_CTX;
  176. static void BN_RECP_CTX_init(BN_RECP_CTX *recp) {
  177. BN_init(&recp->N);
  178. BN_init(&recp->Nr);
  179. recp->num_bits = 0;
  180. recp->flags = 0;
  181. }
  182. static void BN_RECP_CTX_free(BN_RECP_CTX *recp) {
  183. if (recp == NULL) {
  184. return;
  185. }
  186. BN_free(&recp->N);
  187. BN_free(&recp->Nr);
  188. }
  189. static int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) {
  190. if (!BN_copy(&(recp->N), d)) {
  191. return 0;
  192. }
  193. BN_zero(&recp->Nr);
  194. recp->num_bits = BN_num_bits(d);
  195. recp->shift = 0;
  196. return 1;
  197. }
  198. /* len is the expected size of the result We actually calculate with an extra
  199. * word of precision, so we can do faster division if the remainder is not
  200. * required.
  201. * r := 2^len / m */
  202. static int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) {
  203. int ret = -1;
  204. BIGNUM *t;
  205. BN_CTX_start(ctx);
  206. t = BN_CTX_get(ctx);
  207. if (t == NULL) {
  208. goto err;
  209. }
  210. if (!BN_set_bit(t, len)) {
  211. goto err;
  212. }
  213. if (!BN_div(r, NULL, t, m, ctx)) {
  214. goto err;
  215. }
  216. ret = len;
  217. err:
  218. BN_CTX_end(ctx);
  219. return ret;
  220. }
  221. static int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
  222. BN_RECP_CTX *recp, BN_CTX *ctx) {
  223. int i, j, ret = 0;
  224. BIGNUM *a, *b, *d, *r;
  225. BN_CTX_start(ctx);
  226. a = BN_CTX_get(ctx);
  227. b = BN_CTX_get(ctx);
  228. if (dv != NULL) {
  229. d = dv;
  230. } else {
  231. d = BN_CTX_get(ctx);
  232. }
  233. if (rem != NULL) {
  234. r = rem;
  235. } else {
  236. r = BN_CTX_get(ctx);
  237. }
  238. if (a == NULL || b == NULL || d == NULL || r == NULL) {
  239. goto err;
  240. }
  241. if (BN_ucmp(m, &(recp->N)) < 0) {
  242. BN_zero(d);
  243. if (!BN_copy(r, m)) {
  244. return 0;
  245. }
  246. BN_CTX_end(ctx);
  247. return 1;
  248. }
  249. /* We want the remainder
  250. * Given input of ABCDEF / ab
  251. * we need multiply ABCDEF by 3 digests of the reciprocal of ab */
  252. /* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */
  253. i = BN_num_bits(m);
  254. j = recp->num_bits << 1;
  255. if (j > i) {
  256. i = j;
  257. }
  258. /* Nr := round(2^i / N) */
  259. if (i != recp->shift) {
  260. recp->shift =
  261. BN_reciprocal(&(recp->Nr), &(recp->N), i,
  262. ctx); /* BN_reciprocal returns i, or -1 for an error */
  263. }
  264. if (recp->shift == -1) {
  265. goto err;
  266. }
  267. /* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i -
  268. * BN_num_bits(N)))|
  269. * = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i -
  270. * BN_num_bits(N)))|
  271. * <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
  272. * = |m/N| */
  273. if (!BN_rshift(a, m, recp->num_bits)) {
  274. goto err;
  275. }
  276. if (!BN_mul(b, a, &(recp->Nr), ctx)) {
  277. goto err;
  278. }
  279. if (!BN_rshift(d, b, i - recp->num_bits)) {
  280. goto err;
  281. }
  282. d->neg = 0;
  283. if (!BN_mul(b, &(recp->N), d, ctx)) {
  284. goto err;
  285. }
  286. if (!BN_usub(r, m, b)) {
  287. goto err;
  288. }
  289. r->neg = 0;
  290. j = 0;
  291. while (BN_ucmp(r, &(recp->N)) >= 0) {
  292. if (j++ > 2) {
  293. OPENSSL_PUT_ERROR(BN, BN_div_recp, BN_R_BAD_RECIPROCAL);
  294. goto err;
  295. }
  296. if (!BN_usub(r, r, &(recp->N))) {
  297. goto err;
  298. }
  299. if (!BN_add_word(d, 1)) {
  300. goto err;
  301. }
  302. }
  303. r->neg = BN_is_zero(r) ? 0 : m->neg;
  304. d->neg = m->neg ^ recp->N.neg;
  305. ret = 1;
  306. err:
  307. BN_CTX_end(ctx);
  308. return ret;
  309. }
  310. static int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
  311. BN_RECP_CTX *recp, BN_CTX *ctx) {
  312. int ret = 0;
  313. BIGNUM *a;
  314. const BIGNUM *ca;
  315. BN_CTX_start(ctx);
  316. a = BN_CTX_get(ctx);
  317. if (a == NULL) {
  318. goto err;
  319. }
  320. if (y != NULL) {
  321. if (x == y) {
  322. if (!BN_sqr(a, x, ctx)) {
  323. goto err;
  324. }
  325. } else {
  326. if (!BN_mul(a, x, y, ctx)) {
  327. goto err;
  328. }
  329. }
  330. ca = a;
  331. } else {
  332. ca = x; /* Just do the mod */
  333. }
  334. ret = BN_div_recp(NULL, r, ca, recp, ctx);
  335. err:
  336. BN_CTX_end(ctx);
  337. return ret;
  338. }
  339. /* BN_window_bits_for_exponent_size -- macro for sliding window mod_exp
  340. * functions
  341. *
  342. * For window size 'w' (w >= 2) and a random 'b' bits exponent, the number of
  343. * multiplications is a constant plus on average
  344. *
  345. * 2^(w-1) + (b-w)/(w+1);
  346. *
  347. * here 2^(w-1) is for precomputing the table (we actually need entries only
  348. * for windows that have the lowest bit set), and (b-w)/(w+1) is an
  349. * approximation for the expected number of w-bit windows, not counting the
  350. * first one.
  351. *
  352. * Thus we should use
  353. *
  354. * w >= 6 if b > 671
  355. * w = 5 if 671 > b > 239
  356. * w = 4 if 239 > b > 79
  357. * w = 3 if 79 > b > 23
  358. * w <= 2 if 23 > b
  359. *
  360. * (with draws in between). Very small exponents are often selected
  361. * with low Hamming weight, so we use w = 1 for b <= 23. */
  362. #define BN_window_bits_for_exponent_size(b) \
  363. ((b) > 671 ? 6 : \
  364. (b) > 239 ? 5 : \
  365. (b) > 79 ? 4 : \
  366. (b) > 23 ? 3 : 1)
  367. static int mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
  368. const BIGNUM *m, BN_CTX *ctx) {
  369. int i, j, bits, ret = 0, wstart, wend, window, wvalue;
  370. int start = 1;
  371. BIGNUM *aa;
  372. /* Table of variables obtained from 'ctx' */
  373. BIGNUM *val[TABLE_SIZE];
  374. BN_RECP_CTX recp;
  375. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  376. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  377. OPENSSL_PUT_ERROR(BN, mod_exp_recp, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  378. return 0;
  379. }
  380. bits = BN_num_bits(p);
  381. if (bits == 0) {
  382. ret = BN_one(r);
  383. return ret;
  384. }
  385. BN_CTX_start(ctx);
  386. aa = BN_CTX_get(ctx);
  387. val[0] = BN_CTX_get(ctx);
  388. if (!aa || !val[0]) {
  389. goto err;
  390. }
  391. BN_RECP_CTX_init(&recp);
  392. if (m->neg) {
  393. /* ignore sign of 'm' */
  394. if (!BN_copy(aa, m)) {
  395. goto err;
  396. }
  397. aa->neg = 0;
  398. if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) {
  399. goto err;
  400. }
  401. } else {
  402. if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) {
  403. goto err;
  404. }
  405. }
  406. if (!BN_nnmod(val[0], a, m, ctx)) {
  407. goto err; /* 1 */
  408. }
  409. if (BN_is_zero(val[0])) {
  410. BN_zero(r);
  411. ret = 1;
  412. goto err;
  413. }
  414. window = BN_window_bits_for_exponent_size(bits);
  415. if (window > 1) {
  416. if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) {
  417. goto err; /* 2 */
  418. }
  419. j = 1 << (window - 1);
  420. for (i = 1; i < j; i++) {
  421. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  422. !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) {
  423. goto err;
  424. }
  425. }
  426. }
  427. start = 1; /* This is used to avoid multiplication etc
  428. * when there is only the value '1' in the
  429. * buffer. */
  430. wvalue = 0; /* The 'value' of the window */
  431. wstart = bits - 1; /* The top bit of the window */
  432. wend = 0; /* The bottom bit of the window */
  433. if (!BN_one(r)) {
  434. goto err;
  435. }
  436. for (;;) {
  437. if (BN_is_bit_set(p, wstart) == 0) {
  438. if (!start) {
  439. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
  440. goto err;
  441. }
  442. }
  443. if (wstart == 0) {
  444. break;
  445. }
  446. wstart--;
  447. continue;
  448. }
  449. /* We now have wstart on a 'set' bit, we now need to work out
  450. * how bit a window to do. To do this we need to scan
  451. * forward until the last set bit before the end of the
  452. * window */
  453. j = wstart;
  454. wvalue = 1;
  455. wend = 0;
  456. for (i = 1; i < window; i++) {
  457. if (wstart - i < 0) {
  458. break;
  459. }
  460. if (BN_is_bit_set(p, wstart - i)) {
  461. wvalue <<= (i - wend);
  462. wvalue |= 1;
  463. wend = i;
  464. }
  465. }
  466. /* wend is the size of the current window */
  467. j = wend + 1;
  468. /* add the 'bytes above' */
  469. if (!start) {
  470. for (i = 0; i < j; i++) {
  471. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
  472. goto err;
  473. }
  474. }
  475. }
  476. /* wvalue will be an odd number < 2^window */
  477. if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) {
  478. goto err;
  479. }
  480. /* move the 'window' down further */
  481. wstart -= wend + 1;
  482. wvalue = 0;
  483. start = 0;
  484. if (wstart < 0) {
  485. break;
  486. }
  487. }
  488. ret = 1;
  489. err:
  490. BN_CTX_end(ctx);
  491. BN_RECP_CTX_free(&recp);
  492. return ret;
  493. }
  494. int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
  495. BN_CTX *ctx) {
  496. /* For even modulus m = 2^k*m_odd, it might make sense to compute
  497. * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
  498. * exponentiation for the odd part), using appropriate exponent
  499. * reductions, and combine the results using the CRT.
  500. *
  501. * For now, we use Montgomery only if the modulus is odd; otherwise,
  502. * exponentiation using the reciprocal-based quick remaindering
  503. * algorithm is used.
  504. *
  505. * (Timing obtained with expspeed.c [computations a^p mod m
  506. * where a, p, m are of the same length: 256, 512, 1024, 2048,
  507. * 4096, 8192 bits], compared to the running time of the
  508. * standard algorithm:
  509. *
  510. * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
  511. * 55 .. 77 % [UltraSparc processor, but
  512. * debug-solaris-sparcv8-gcc conf.]
  513. *
  514. * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
  515. * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
  516. *
  517. * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
  518. * at 2048 and more bits, but at 512 and 1024 bits, it was
  519. * slower even than the standard algorithm!
  520. *
  521. * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
  522. * should be obtained when the new Montgomery reduction code
  523. * has been integrated into OpenSSL.) */
  524. if (BN_is_odd(m)) {
  525. if (a->top == 1 && !a->neg && BN_get_flags(p, BN_FLG_CONSTTIME) == 0) {
  526. BN_ULONG A = a->d[0];
  527. return BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
  528. }
  529. return BN_mod_exp_mont(r, a, p, m, ctx, NULL);
  530. }
  531. return mod_exp_recp(r, a, p, m, ctx);
  532. }
  533. int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  534. const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) {
  535. int i, j, bits, ret = 0, wstart, wend, window, wvalue;
  536. int start = 1;
  537. BIGNUM *d, *r;
  538. const BIGNUM *aa;
  539. /* Table of variables obtained from 'ctx' */
  540. BIGNUM *val[TABLE_SIZE];
  541. BN_MONT_CTX *mont = NULL;
  542. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  543. return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
  544. }
  545. if (!BN_is_odd(m)) {
  546. OPENSSL_PUT_ERROR(BN, BN_mod_exp_mont, BN_R_CALLED_WITH_EVEN_MODULUS);
  547. return 0;
  548. }
  549. bits = BN_num_bits(p);
  550. if (bits == 0) {
  551. ret = BN_one(rr);
  552. return ret;
  553. }
  554. BN_CTX_start(ctx);
  555. d = BN_CTX_get(ctx);
  556. r = BN_CTX_get(ctx);
  557. val[0] = BN_CTX_get(ctx);
  558. if (!d || !r || !val[0]) {
  559. goto err;
  560. }
  561. /* If this is not done, things will break in the montgomery part */
  562. if (in_mont != NULL) {
  563. mont = in_mont;
  564. } else {
  565. mont = BN_MONT_CTX_new();
  566. if (mont == NULL) {
  567. goto err;
  568. }
  569. if (!BN_MONT_CTX_set(mont, m, ctx)) {
  570. goto err;
  571. }
  572. }
  573. if (a->neg || BN_ucmp(a, m) >= 0) {
  574. if (!BN_nnmod(val[0], a, m, ctx)) {
  575. goto err;
  576. }
  577. aa = val[0];
  578. } else {
  579. aa = a;
  580. }
  581. if (BN_is_zero(aa)) {
  582. BN_zero(rr);
  583. ret = 1;
  584. goto err;
  585. }
  586. if (!BN_to_montgomery(val[0], aa, mont, ctx)) {
  587. goto err; /* 1 */
  588. }
  589. window = BN_window_bits_for_exponent_size(bits);
  590. if (window > 1) {
  591. if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) {
  592. goto err; /* 2 */
  593. }
  594. j = 1 << (window - 1);
  595. for (i = 1; i < j; i++) {
  596. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  597. !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) {
  598. goto err;
  599. }
  600. }
  601. }
  602. start = 1; /* This is used to avoid multiplication etc
  603. * when there is only the value '1' in the
  604. * buffer. */
  605. wvalue = 0; /* The 'value' of the window */
  606. wstart = bits - 1; /* The top bit of the window */
  607. wend = 0; /* The bottom bit of the window */
  608. j = m->top; /* borrow j */
  609. if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  610. if (bn_wexpand(r, j) == NULL)
  611. goto err;
  612. /* 2^(top*BN_BITS2) - m */
  613. r->d[0] = (0 - m->d[0]) & BN_MASK2;
  614. for (i = 1; i < j; i++)
  615. r->d[i] = (~m->d[i]) & BN_MASK2;
  616. r->top = j;
  617. /* Upper words will be zero if the corresponding words of 'm'
  618. * were 0xfff[...], so decrement r->top accordingly. */
  619. bn_correct_top(r);
  620. } else if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) {
  621. goto err;
  622. }
  623. for (;;) {
  624. if (BN_is_bit_set(p, wstart) == 0) {
  625. if (!start) {
  626. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
  627. goto err;
  628. }
  629. if (wstart == 0) {
  630. break;
  631. }
  632. wstart--;
  633. continue;
  634. }
  635. /* We now have wstart on a 'set' bit, we now need to work out how bit a
  636. * window to do. To do this we need to scan forward until the last set bit
  637. * before the end of the window */
  638. j = wstart;
  639. wvalue = 1;
  640. wend = 0;
  641. for (i = 1; i < window; i++) {
  642. if (wstart - i < 0) {
  643. break;
  644. }
  645. if (BN_is_bit_set(p, wstart - i)) {
  646. wvalue <<= (i - wend);
  647. wvalue |= 1;
  648. wend = i;
  649. }
  650. }
  651. /* wend is the size of the current window */
  652. j = wend + 1;
  653. /* add the 'bytes above' */
  654. if (!start) {
  655. for (i = 0; i < j; i++) {
  656. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  657. goto err;
  658. }
  659. }
  660. }
  661. /* wvalue will be an odd number < 2^window */
  662. if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) {
  663. goto err;
  664. }
  665. /* move the 'window' down further */
  666. wstart -= wend + 1;
  667. wvalue = 0;
  668. start = 0;
  669. if (wstart < 0) {
  670. break;
  671. }
  672. }
  673. if (!BN_from_montgomery(rr, r, mont, ctx)) {
  674. goto err;
  675. }
  676. ret = 1;
  677. err:
  678. if (in_mont == NULL && mont != NULL) {
  679. BN_MONT_CTX_free(mont);
  680. }
  681. BN_CTX_end(ctx);
  682. return ret;
  683. }
  684. /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
  685. * layout so that accessing any of these table values shows the same access
  686. * pattern as far as cache lines are concerned. The following functions are
  687. * used to transfer a BIGNUM from/to that table. */
  688. static int copy_to_prebuf(const BIGNUM *b, int top, unsigned char *buf, int idx,
  689. int width) {
  690. size_t i, j;
  691. if (top > b->top) {
  692. top = b->top; /* this works because 'buf' is explicitly zeroed */
  693. }
  694. for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
  695. buf[j] = ((unsigned char *)b->d)[i];
  696. }
  697. return 1;
  698. }
  699. static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx,
  700. int width) {
  701. size_t i, j;
  702. if (bn_wexpand(b, top) == NULL) {
  703. return 0;
  704. }
  705. for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
  706. ((unsigned char *)b->d)[i] = buf[j];
  707. }
  708. b->top = top;
  709. bn_correct_top(b);
  710. return 1;
  711. }
  712. /* BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache
  713. * line width of the target processor is at least the following value. */
  714. #define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH (64)
  715. #define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK \
  716. (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
  717. /* Window sizes optimized for fixed window size modular exponentiation
  718. * algorithm (BN_mod_exp_mont_consttime).
  719. *
  720. * To achieve the security goals of BN_mode_exp_mont_consttime, the maximum
  721. * size of the window must not exceed
  722. * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
  723. *
  724. * Window size thresholds are defined for cache line sizes of 32 and 64, cache
  725. * line sizes where log_2(32)=5 and log_2(64)=6 respectively. A window size of
  726. * 7 should only be used on processors that have a 128 byte or greater cache
  727. * line size. */
  728. #if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
  729. #define BN_window_bits_for_ctime_exponent_size(b) \
  730. ((b) > 937 ? 6 : (b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
  731. #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
  732. #elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
  733. #define BN_window_bits_for_ctime_exponent_size(b) \
  734. ((b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
  735. #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
  736. #endif
  737. /* Given a pointer value, compute the next address that is a cache line
  738. * multiple. */
  739. #define MOD_EXP_CTIME_ALIGN(x_) \
  740. ((unsigned char *)(x_) + \
  741. (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - \
  742. (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
  743. /* This variant of BN_mod_exp_mont() uses fixed windows and the special
  744. * precomputation memory layout to limit data-dependency to a minimum
  745. * to protect secret exponents (cf. the hyper-threading timing attacks
  746. * pointed out by Colin Percival,
  747. * http://www.daemonology.net/hyperthreading-considered-harmful/)
  748. */
  749. int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  750. const BIGNUM *m, BN_CTX *ctx,
  751. BN_MONT_CTX *in_mont) {
  752. int i, bits, ret = 0, window, wvalue;
  753. int top;
  754. BN_MONT_CTX *mont = NULL;
  755. int numPowers;
  756. unsigned char *powerbufFree = NULL;
  757. int powerbufLen = 0;
  758. unsigned char *powerbuf = NULL;
  759. BIGNUM tmp, am;
  760. top = m->top;
  761. if (!(m->d[0] & 1)) {
  762. OPENSSL_PUT_ERROR(BN, BN_mod_exp_mont_consttime,
  763. BN_R_CALLED_WITH_EVEN_MODULUS);
  764. return 0;
  765. }
  766. bits = BN_num_bits(p);
  767. if (bits == 0) {
  768. ret = BN_one(rr);
  769. return ret;
  770. }
  771. BN_CTX_start(ctx);
  772. /* Allocate a montgomery context if it was not supplied by the caller.
  773. * If this is not done, things will break in the montgomery part.
  774. */
  775. if (in_mont != NULL)
  776. mont = in_mont;
  777. else {
  778. if ((mont = BN_MONT_CTX_new()) == NULL)
  779. goto err;
  780. if (!BN_MONT_CTX_set(mont, m, ctx))
  781. goto err;
  782. }
  783. #ifdef RSAZ_ENABLED
  784. /* If the size of the operands allow it, perform the optimized
  785. * RSAZ exponentiation. For further information see
  786. * crypto/bn/rsaz_exp.c and accompanying assembly modules. */
  787. if (((OPENSSL_ia32cap_P[2] & 0x80100) != 0x80100) /* check for MULX/AD*X */
  788. && (16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) &&
  789. rsaz_avx2_eligible()) {
  790. if (NULL == bn_wexpand(rr, 16))
  791. goto err;
  792. RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0]);
  793. rr->top = 16;
  794. rr->neg = 0;
  795. bn_correct_top(rr);
  796. ret = 1;
  797. goto err;
  798. } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
  799. if (NULL == bn_wexpand(rr, 8))
  800. goto err;
  801. RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
  802. rr->top = 8;
  803. rr->neg = 0;
  804. bn_correct_top(rr);
  805. ret = 1;
  806. goto err;
  807. }
  808. #endif
  809. /* Get the window size to use with size of p. */
  810. window = BN_window_bits_for_ctime_exponent_size(bits);
  811. #if defined(OPENSSL_BN_ASM_MONT5)
  812. if (window >= 5) {
  813. window = 5; /* ~5% improvement for RSA2048 sign, and even for RSA4096 */
  814. if ((top & 7) == 0)
  815. powerbufLen += 2 * top * sizeof(m->d[0]);
  816. }
  817. #endif
  818. (void)0;
  819. /* Allocate a buffer large enough to hold all of the pre-computed
  820. * powers of am, am itself and tmp.
  821. */
  822. numPowers = 1 << window;
  823. powerbufLen +=
  824. sizeof(m->d[0]) *
  825. (top * numPowers + ((2 * top) > numPowers ? (2 * top) : numPowers));
  826. #ifdef alloca
  827. if (powerbufLen < 3072)
  828. powerbufFree = alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
  829. else
  830. #endif
  831. if ((powerbufFree = (unsigned char *)OPENSSL_malloc(
  832. powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
  833. goto err;
  834. powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
  835. memset(powerbuf, 0, powerbufLen);
  836. #ifdef alloca
  837. if (powerbufLen < 3072)
  838. powerbufFree = NULL;
  839. #endif
  840. /* lay down tmp and am right after powers table */
  841. tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
  842. am.d = tmp.d + top;
  843. tmp.top = am.top = 0;
  844. tmp.dmax = am.dmax = top;
  845. tmp.neg = am.neg = 0;
  846. tmp.flags = am.flags = BN_FLG_STATIC_DATA;
  847. /* prepare a^0 in Montgomery domain */
  848. /* by Shay Gueron's suggestion */
  849. if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  850. /* 2^(top*BN_BITS2) - m */
  851. tmp.d[0] = (0 - m->d[0]) & BN_MASK2;
  852. for (i = 1; i < top; i++)
  853. tmp.d[i] = (~m->d[i]) & BN_MASK2;
  854. tmp.top = top;
  855. } else if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx))
  856. goto err;
  857. /* prepare a^1 in Montgomery domain */
  858. if (a->neg || BN_ucmp(a, m) >= 0) {
  859. if (!BN_mod(&am, a, m, ctx))
  860. goto err;
  861. if (!BN_to_montgomery(&am, &am, mont, ctx))
  862. goto err;
  863. } else if (!BN_to_montgomery(&am, a, mont, ctx))
  864. goto err;
  865. #if defined(OPENSSL_BN_ASM_MONT5)
  866. /* This optimization uses ideas from http://eprint.iacr.org/2011/239,
  867. * specifically optimization of cache-timing attack countermeasures
  868. * and pre-computation optimization. */
  869. /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
  870. * 512-bit RSA is hardly relevant, we omit it to spare size... */
  871. if (window == 5) {
  872. void bn_mul_mont_gather5(BN_ULONG * rp, const BN_ULONG * ap,
  873. const void * table, const BN_ULONG * np,
  874. const BN_ULONG * n0, int num, int power);
  875. void bn_scatter5(const BN_ULONG * inp, size_t num, void * table,
  876. size_t power);
  877. void bn_gather5(BN_ULONG * out, size_t num, void * table, size_t power);
  878. void bn_power5(BN_ULONG * rp, const BN_ULONG * ap, const void * table,
  879. const BN_ULONG * np, const BN_ULONG * n0, int num,
  880. int power);
  881. int bn_get_bits5(const BN_ULONG * ap, int off);
  882. int bn_from_montgomery(BN_ULONG * rp, const BN_ULONG * ap,
  883. const BN_ULONG * not_used, const BN_ULONG * np,
  884. const BN_ULONG * n0, int num);
  885. BN_ULONG *np = mont->N.d, *n0 = mont->n0, *np2;
  886. /* BN_to_montgomery can contaminate words above .top
  887. * [in BN_DEBUG[_DEBUG] build]... */
  888. for (i = am.top; i < top; i++)
  889. am.d[i] = 0;
  890. for (i = tmp.top; i < top; i++)
  891. tmp.d[i] = 0;
  892. if (top & 7)
  893. np2 = np;
  894. else
  895. for (np2 = am.d + top, i = 0; i < top; i++)
  896. np2[2 * i] = np[i];
  897. bn_scatter5(tmp.d, top, powerbuf, 0);
  898. bn_scatter5(am.d, am.top, powerbuf, 1);
  899. bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
  900. bn_scatter5(tmp.d, top, powerbuf, 2);
  901. /* same as above, but uses squaring for 1/2 of operations */
  902. for (i = 4; i < 32; i *= 2) {
  903. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  904. bn_scatter5(tmp.d, top, powerbuf, i);
  905. }
  906. for (i = 3; i < 8; i += 2) {
  907. int j;
  908. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  909. bn_scatter5(tmp.d, top, powerbuf, i);
  910. for (j = 2 * i; j < 32; j *= 2) {
  911. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  912. bn_scatter5(tmp.d, top, powerbuf, j);
  913. }
  914. }
  915. for (; i < 16; i += 2) {
  916. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  917. bn_scatter5(tmp.d, top, powerbuf, i);
  918. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  919. bn_scatter5(tmp.d, top, powerbuf, 2 * i);
  920. }
  921. for (; i < 32; i += 2) {
  922. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  923. bn_scatter5(tmp.d, top, powerbuf, i);
  924. }
  925. bits--;
  926. for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--)
  927. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  928. bn_gather5(tmp.d, top, powerbuf, wvalue);
  929. /* Scan the exponent one window at a time starting from the most
  930. * significant bits.
  931. */
  932. if (top & 7)
  933. while (bits >= 0) {
  934. for (wvalue = 0, i = 0; i < 5; i++, bits--)
  935. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  936. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  937. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  938. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  939. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  940. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  941. bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
  942. }
  943. else {
  944. while (bits >= 0) {
  945. wvalue = bn_get_bits5(p->d, bits - 4);
  946. bits -= 5;
  947. bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue);
  948. }
  949. }
  950. ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np2, n0, top);
  951. tmp.top = top;
  952. bn_correct_top(&tmp);
  953. if (ret) {
  954. if (!BN_copy(rr, &tmp))
  955. ret = 0;
  956. goto err; /* non-zero ret means it's not error */
  957. }
  958. } else
  959. #endif
  960. {
  961. if (!copy_to_prebuf(&tmp, top, powerbuf, 0, numPowers))
  962. goto err;
  963. if (!copy_to_prebuf(&am, top, powerbuf, 1, numPowers))
  964. goto err;
  965. /* If the window size is greater than 1, then calculate
  966. * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
  967. * (even powers could instead be computed as (a^(i/2))^2
  968. * to use the slight performance advantage of sqr over mul).
  969. */
  970. if (window > 1) {
  971. if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
  972. goto err;
  973. if (!copy_to_prebuf(&tmp, top, powerbuf, 2, numPowers))
  974. goto err;
  975. for (i = 3; i < numPowers; i++) {
  976. /* Calculate a^i = a^(i-1) * a */
  977. if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx))
  978. goto err;
  979. if (!copy_to_prebuf(&tmp, top, powerbuf, i, numPowers))
  980. goto err;
  981. }
  982. }
  983. bits--;
  984. for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
  985. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  986. if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, numPowers))
  987. goto err;
  988. /* Scan the exponent one window at a time starting from the most
  989. * significant bits.
  990. */
  991. while (bits >= 0) {
  992. wvalue = 0; /* The 'value' of the window */
  993. /* Scan the window, squaring the result as we go */
  994. for (i = 0; i < window; i++, bits--) {
  995. if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx))
  996. goto err;
  997. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  998. }
  999. /* Fetch the appropriate pre-computed value from the pre-buf */
  1000. if (!copy_from_prebuf(&am, top, powerbuf, wvalue, numPowers))
  1001. goto err;
  1002. /* Multiply the result into the intermediate result */
  1003. if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx))
  1004. goto err;
  1005. }
  1006. }
  1007. /* Convert the final result from montgomery to standard format */
  1008. if (!BN_from_montgomery(rr, &tmp, mont, ctx))
  1009. goto err;
  1010. ret = 1;
  1011. err:
  1012. if ((in_mont == NULL) && (mont != NULL))
  1013. BN_MONT_CTX_free(mont);
  1014. if (powerbuf != NULL) {
  1015. OPENSSL_cleanse(powerbuf, powerbufLen);
  1016. if (powerbufFree)
  1017. OPENSSL_free(powerbufFree);
  1018. }
  1019. BN_CTX_end(ctx);
  1020. return (ret);
  1021. }
  1022. int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
  1023. const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) {
  1024. BN_MONT_CTX *mont = NULL;
  1025. int b, bits, ret = 0;
  1026. int r_is_one;
  1027. BN_ULONG w, next_w;
  1028. BIGNUM *d, *r, *t;
  1029. BIGNUM *swap_tmp;
  1030. #define BN_MOD_MUL_WORD(r, w, m) \
  1031. (BN_mul_word(r, (w)) && \
  1032. (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
  1033. (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
  1034. /* BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is
  1035. * probably more overhead than always using BN_mod (which uses BN_copy if a
  1036. * similar test returns true). We can use BN_mod and do not need BN_nnmod
  1037. * because our accumulator is never negative (the result of BN_mod does not
  1038. * depend on the sign of the modulus). */
  1039. #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
  1040. (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
  1041. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  1042. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  1043. OPENSSL_PUT_ERROR(BN, BN_mod_exp_mont_word,
  1044. ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  1045. return 0;
  1046. }
  1047. if (!BN_is_odd(m)) {
  1048. OPENSSL_PUT_ERROR(BN, BN_mod_exp_mont_word, BN_R_CALLED_WITH_EVEN_MODULUS);
  1049. return 0;
  1050. }
  1051. if (m->top == 1) {
  1052. a %= m->d[0]; /* make sure that 'a' is reduced */
  1053. }
  1054. bits = BN_num_bits(p);
  1055. if (bits == 0) {
  1056. /* x**0 mod 1 is still zero. */
  1057. if (BN_is_one(m)) {
  1058. ret = 1;
  1059. BN_zero(rr);
  1060. } else {
  1061. ret = BN_one(rr);
  1062. }
  1063. return ret;
  1064. }
  1065. if (a == 0) {
  1066. BN_zero(rr);
  1067. ret = 1;
  1068. return ret;
  1069. }
  1070. BN_CTX_start(ctx);
  1071. d = BN_CTX_get(ctx);
  1072. r = BN_CTX_get(ctx);
  1073. t = BN_CTX_get(ctx);
  1074. if (d == NULL || r == NULL || t == NULL) {
  1075. goto err;
  1076. }
  1077. if (in_mont != NULL)
  1078. mont = in_mont;
  1079. else {
  1080. if ((mont = BN_MONT_CTX_new()) == NULL) {
  1081. goto err;
  1082. }
  1083. if (!BN_MONT_CTX_set(mont, m, ctx)) {
  1084. goto err;
  1085. }
  1086. }
  1087. r_is_one = 1; /* except for Montgomery factor */
  1088. /* bits-1 >= 0 */
  1089. /* The result is accumulated in the product r*w. */
  1090. w = a; /* bit 'bits-1' of 'p' is always set */
  1091. for (b = bits - 2; b >= 0; b--) {
  1092. /* First, square r*w. */
  1093. next_w = w * w;
  1094. if ((next_w / w) != w) {
  1095. /* overflow */
  1096. if (r_is_one) {
  1097. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
  1098. goto err;
  1099. }
  1100. r_is_one = 0;
  1101. } else {
  1102. if (!BN_MOD_MUL_WORD(r, w, m)) {
  1103. goto err;
  1104. }
  1105. }
  1106. next_w = 1;
  1107. }
  1108. w = next_w;
  1109. if (!r_is_one) {
  1110. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  1111. goto err;
  1112. }
  1113. }
  1114. /* Second, multiply r*w by 'a' if exponent bit is set. */
  1115. if (BN_is_bit_set(p, b)) {
  1116. next_w = w * a;
  1117. if ((next_w / a) != w) {
  1118. /* overflow */
  1119. if (r_is_one) {
  1120. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
  1121. goto err;
  1122. }
  1123. r_is_one = 0;
  1124. } else {
  1125. if (!BN_MOD_MUL_WORD(r, w, m)) {
  1126. goto err;
  1127. }
  1128. }
  1129. next_w = a;
  1130. }
  1131. w = next_w;
  1132. }
  1133. }
  1134. /* Finally, set r:=r*w. */
  1135. if (w != 1) {
  1136. if (r_is_one) {
  1137. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
  1138. goto err;
  1139. }
  1140. r_is_one = 0;
  1141. } else {
  1142. if (!BN_MOD_MUL_WORD(r, w, m)) {
  1143. goto err;
  1144. }
  1145. }
  1146. }
  1147. if (r_is_one) {
  1148. /* can happen only if a == 1*/
  1149. if (!BN_one(rr)) {
  1150. goto err;
  1151. }
  1152. } else {
  1153. if (!BN_from_montgomery(rr, r, mont, ctx)) {
  1154. goto err;
  1155. }
  1156. }
  1157. ret = 1;
  1158. err:
  1159. if (in_mont == NULL && mont != NULL) {
  1160. BN_MONT_CTX_free(mont);
  1161. }
  1162. BN_CTX_end(ctx);
  1163. return ret;
  1164. }
  1165. #define TABLE_SIZE 32
  1166. int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
  1167. const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
  1168. BN_CTX *ctx, BN_MONT_CTX *in_mont) {
  1169. int i, j, bits, b, bits1, bits2, ret = 0, wpos1, wpos2, window1, window2,
  1170. wvalue1, wvalue2;
  1171. int r_is_one = 1;
  1172. BIGNUM *d, *r;
  1173. const BIGNUM *a_mod_m;
  1174. /* Tables of variables obtained from 'ctx' */
  1175. BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE];
  1176. BN_MONT_CTX *mont = NULL;
  1177. if (!(m->d[0] & 1)) {
  1178. OPENSSL_PUT_ERROR(BN, BN_mod_exp2_mont, BN_R_CALLED_WITH_EVEN_MODULUS);
  1179. return 0;
  1180. }
  1181. bits1 = BN_num_bits(p1);
  1182. bits2 = BN_num_bits(p2);
  1183. if (bits1 == 0 && bits2 == 0) {
  1184. ret = BN_one(rr);
  1185. return ret;
  1186. }
  1187. bits = (bits1 > bits2) ? bits1 : bits2;
  1188. BN_CTX_start(ctx);
  1189. d = BN_CTX_get(ctx);
  1190. r = BN_CTX_get(ctx);
  1191. val1[0] = BN_CTX_get(ctx);
  1192. val2[0] = BN_CTX_get(ctx);
  1193. if (!d || !r || !val1[0] || !val2[0]) {
  1194. goto err;
  1195. }
  1196. if (in_mont != NULL) {
  1197. mont = in_mont;
  1198. } else {
  1199. mont = BN_MONT_CTX_new();
  1200. if (mont == NULL) {
  1201. goto err;
  1202. }
  1203. if (!BN_MONT_CTX_set(mont, m, ctx)) {
  1204. goto err;
  1205. }
  1206. }
  1207. window1 = BN_window_bits_for_exponent_size(bits1);
  1208. window2 = BN_window_bits_for_exponent_size(bits2);
  1209. /* Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 ..
  1210. * 2^(window1-1) */
  1211. if (a1->neg || BN_ucmp(a1, m) >= 0) {
  1212. if (!BN_mod(val1[0], a1, m, ctx)) {
  1213. goto err;
  1214. }
  1215. a_mod_m = val1[0];
  1216. } else {
  1217. a_mod_m = a1;
  1218. }
  1219. if (BN_is_zero(a_mod_m)) {
  1220. BN_zero(rr);
  1221. ret = 1;
  1222. goto err;
  1223. }
  1224. if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx)) {
  1225. goto err;
  1226. }
  1227. if (window1 > 1) {
  1228. if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx)) {
  1229. goto err;
  1230. }
  1231. j = 1 << (window1 - 1);
  1232. for (i = 1; i < j; i++) {
  1233. if (((val1[i] = BN_CTX_get(ctx)) == NULL) ||
  1234. !BN_mod_mul_montgomery(val1[i], val1[i - 1], d, mont, ctx)) {
  1235. goto err;
  1236. }
  1237. }
  1238. }
  1239. /* Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 ..
  1240. * 2^(window2-1) */
  1241. if (a2->neg || BN_ucmp(a2, m) >= 0) {
  1242. if (!BN_mod(val2[0], a2, m, ctx)) {
  1243. goto err;
  1244. }
  1245. a_mod_m = val2[0];
  1246. } else {
  1247. a_mod_m = a2;
  1248. }
  1249. if (BN_is_zero(a_mod_m)) {
  1250. BN_zero(rr);
  1251. ret = 1;
  1252. goto err;
  1253. }
  1254. if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx)) {
  1255. goto err;
  1256. }
  1257. if (window2 > 1) {
  1258. if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx)) {
  1259. goto err;
  1260. }
  1261. j = 1 << (window2 - 1);
  1262. for (i = 1; i < j; i++) {
  1263. if (((val2[i] = BN_CTX_get(ctx)) == NULL) ||
  1264. !BN_mod_mul_montgomery(val2[i], val2[i - 1], d, mont, ctx)) {
  1265. goto err;
  1266. }
  1267. }
  1268. }
  1269. /* Now compute the power product, using independent windows. */
  1270. r_is_one = 1;
  1271. wvalue1 = 0; /* The 'value' of the first window */
  1272. wvalue2 = 0; /* The 'value' of the second window */
  1273. wpos1 = 0; /* If wvalue1 > 0, the bottom bit of the first window */
  1274. wpos2 = 0; /* If wvalue2 > 0, the bottom bit of the second window */
  1275. if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) {
  1276. goto err;
  1277. }
  1278. for (b = bits - 1; b >= 0; b--) {
  1279. if (!r_is_one) {
  1280. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  1281. goto err;
  1282. }
  1283. }
  1284. if (!wvalue1 && BN_is_bit_set(p1, b)) {
  1285. /* consider bits b-window1+1 .. b for this window */
  1286. i = b - window1 + 1;
  1287. while (!BN_is_bit_set(p1, i)) /* works for i<0 */
  1288. i++;
  1289. wpos1 = i;
  1290. wvalue1 = 1;
  1291. for (i = b - 1; i >= wpos1; i--) {
  1292. wvalue1 <<= 1;
  1293. if (BN_is_bit_set(p1, i))
  1294. wvalue1++;
  1295. }
  1296. }
  1297. if (!wvalue2 && BN_is_bit_set(p2, b)) {
  1298. /* consider bits b-window2+1 .. b for this window */
  1299. i = b - window2 + 1;
  1300. while (!BN_is_bit_set(p2, i))
  1301. i++;
  1302. wpos2 = i;
  1303. wvalue2 = 1;
  1304. for (i = b - 1; i >= wpos2; i--) {
  1305. wvalue2 <<= 1;
  1306. if (BN_is_bit_set(p2, i))
  1307. wvalue2++;
  1308. }
  1309. }
  1310. if (wvalue1 && b == wpos1) {
  1311. /* wvalue1 is odd and < 2^window1 */
  1312. if (!BN_mod_mul_montgomery(r, r, val1[wvalue1 >> 1], mont, ctx)) {
  1313. goto err;
  1314. }
  1315. wvalue1 = 0;
  1316. r_is_one = 0;
  1317. }
  1318. if (wvalue2 && b == wpos2) {
  1319. /* wvalue2 is odd and < 2^window2 */
  1320. if (!BN_mod_mul_montgomery(r, r, val2[wvalue2 >> 1], mont, ctx)) {
  1321. goto err;
  1322. }
  1323. wvalue2 = 0;
  1324. r_is_one = 0;
  1325. }
  1326. }
  1327. if (!BN_from_montgomery(rr, r, mont, ctx)) {
  1328. goto err;
  1329. }
  1330. ret = 1;
  1331. err:
  1332. if (in_mont == NULL && mont != NULL) {
  1333. BN_MONT_CTX_free(mont);
  1334. }
  1335. BN_CTX_end(ctx);
  1336. return ret;
  1337. }