Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

exponentiation.c 42 KiB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544
  1. /* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
  2. * All rights reserved.
  3. *
  4. * This package is an SSL implementation written
  5. * by Eric Young (eay@cryptsoft.com).
  6. * The implementation was written so as to conform with Netscapes SSL.
  7. *
  8. * This library is free for commercial and non-commercial use as long as
  9. * the following conditions are aheared to. The following conditions
  10. * apply to all code found in this distribution, be it the RC4, RSA,
  11. * lhash, DES, etc., code; not just the SSL code. The SSL documentation
  12. * included with this distribution is covered by the same copyright terms
  13. * except that the holder is Tim Hudson (tjh@cryptsoft.com).
  14. *
  15. * Copyright remains Eric Young's, and as such any Copyright notices in
  16. * the code are not to be removed.
  17. * If this package is used in a product, Eric Young should be given attribution
  18. * as the author of the parts of the library used.
  19. * This can be in the form of a textual message at program startup or
  20. * in documentation (online or textual) provided with the package.
  21. *
  22. * Redistribution and use in source and binary forms, with or without
  23. * modification, are permitted provided that the following conditions
  24. * are met:
  25. * 1. Redistributions of source code must retain the copyright
  26. * notice, this list of conditions and the following disclaimer.
  27. * 2. Redistributions in binary form must reproduce the above copyright
  28. * notice, this list of conditions and the following disclaimer in the
  29. * documentation and/or other materials provided with the distribution.
  30. * 3. All advertising materials mentioning features or use of this software
  31. * must display the following acknowledgement:
  32. * "This product includes cryptographic software written by
  33. * Eric Young (eay@cryptsoft.com)"
  34. * The word 'cryptographic' can be left out if the rouines from the library
  35. * being used are not cryptographic related :-).
  36. * 4. If you include any Windows specific code (or a derivative thereof) from
  37. * the apps directory (application code) you must include an acknowledgement:
  38. * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
  39. *
  40. * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  41. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  42. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  43. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  44. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  45. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  46. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  47. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  48. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  49. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  50. * SUCH DAMAGE.
  51. *
  52. * The licence and distribution terms for any publically available version or
  53. * derivative of this code cannot be changed. i.e. this code cannot simply be
  54. * copied and put under another distribution licence
  55. * [including the GNU Public Licence.]
  56. */
  57. /* ====================================================================
  58. * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
  59. *
  60. * Redistribution and use in source and binary forms, with or without
  61. * modification, are permitted provided that the following conditions
  62. * are met:
  63. *
  64. * 1. Redistributions of source code must retain the above copyright
  65. * notice, this list of conditions and the following disclaimer.
  66. *
  67. * 2. Redistributions in binary form must reproduce the above copyright
  68. * notice, this list of conditions and the following disclaimer in
  69. * the documentation and/or other materials provided with the
  70. * distribution.
  71. *
  72. * 3. All advertising materials mentioning features or use of this
  73. * software must display the following acknowledgment:
  74. * "This product includes software developed by the OpenSSL Project
  75. * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
  76. *
  77. * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
  78. * endorse or promote products derived from this software without
  79. * prior written permission. For written permission, please contact
  80. * openssl-core@openssl.org.
  81. *
  82. * 5. Products derived from this software may not be called "OpenSSL"
  83. * nor may "OpenSSL" appear in their names without prior written
  84. * permission of the OpenSSL Project.
  85. *
  86. * 6. Redistributions of any form whatsoever must retain the following
  87. * acknowledgment:
  88. * "This product includes software developed by the OpenSSL Project
  89. * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
  90. *
  91. * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
  92. * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  93. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  94. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
  95. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  96. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  97. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  98. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  99. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  100. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  101. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  102. * OF THE POSSIBILITY OF SUCH DAMAGE.
  103. * ====================================================================
  104. *
  105. * This product includes cryptographic software written by Eric Young
  106. * (eay@cryptsoft.com). This product includes software written by Tim
  107. * Hudson (tjh@cryptsoft.com). */
  108. #include <openssl/bn.h>
  109. #include <assert.h>
  110. #include <string.h>
  111. #include <openssl/cpu.h>
  112. #include <openssl/err.h>
  113. #include <openssl/mem.h>
  114. #include "internal.h"
  115. #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
  116. #define OPENSSL_BN_ASM_MONT5
  117. #define RSAZ_ENABLED
  118. #include "rsaz_exp.h"
  119. void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, const void *table,
  120. const BN_ULONG *np, const BN_ULONG *n0, int num,
  121. int power);
  122. void bn_scatter5(const BN_ULONG *inp, size_t num, void *table, size_t power);
  123. void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power);
  124. void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const void *table,
  125. const BN_ULONG *np, const BN_ULONG *n0, int num, int power);
  126. int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
  127. const BN_ULONG *not_used, const BN_ULONG *np,
  128. const BN_ULONG *n0, int num);
  129. #endif
  130. int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
  131. int i, bits, ret = 0;
  132. BIGNUM *v, *rr;
  133. if ((p->flags & BN_FLG_CONSTTIME) != 0) {
  134. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  135. OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  136. return 0;
  137. }
  138. BN_CTX_start(ctx);
  139. if (r == a || r == p) {
  140. rr = BN_CTX_get(ctx);
  141. } else {
  142. rr = r;
  143. }
  144. v = BN_CTX_get(ctx);
  145. if (rr == NULL || v == NULL) {
  146. goto err;
  147. }
  148. if (BN_copy(v, a) == NULL) {
  149. goto err;
  150. }
  151. bits = BN_num_bits(p);
  152. if (BN_is_odd(p)) {
  153. if (BN_copy(rr, a) == NULL) {
  154. goto err;
  155. }
  156. } else {
  157. if (!BN_one(rr)) {
  158. goto err;
  159. }
  160. }
  161. for (i = 1; i < bits; i++) {
  162. if (!BN_sqr(v, v, ctx)) {
  163. goto err;
  164. }
  165. if (BN_is_bit_set(p, i)) {
  166. if (!BN_mul(rr, rr, v, ctx)) {
  167. goto err;
  168. }
  169. }
  170. }
  171. if (r != rr && !BN_copy(r, rr)) {
  172. goto err;
  173. }
  174. ret = 1;
  175. err:
  176. BN_CTX_end(ctx);
  177. return ret;
  178. }
  179. /* maximum precomputation table size for *variable* sliding windows */
  180. #define TABLE_SIZE 32
  181. typedef struct bn_recp_ctx_st {
  182. BIGNUM N; /* the divisor */
  183. BIGNUM Nr; /* the reciprocal */
  184. int num_bits;
  185. int shift;
  186. int flags;
  187. } BN_RECP_CTX;
  188. static void BN_RECP_CTX_init(BN_RECP_CTX *recp) {
  189. BN_init(&recp->N);
  190. BN_init(&recp->Nr);
  191. recp->num_bits = 0;
  192. recp->flags = 0;
  193. }
  194. static void BN_RECP_CTX_free(BN_RECP_CTX *recp) {
  195. if (recp == NULL) {
  196. return;
  197. }
  198. BN_free(&recp->N);
  199. BN_free(&recp->Nr);
  200. }
  201. static int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) {
  202. if (!BN_copy(&(recp->N), d)) {
  203. return 0;
  204. }
  205. BN_zero(&recp->Nr);
  206. recp->num_bits = BN_num_bits(d);
  207. recp->shift = 0;
  208. return 1;
  209. }
  210. /* len is the expected size of the result We actually calculate with an extra
  211. * word of precision, so we can do faster division if the remainder is not
  212. * required.
  213. * r := 2^len / m */
  214. static int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) {
  215. int ret = -1;
  216. BIGNUM *t;
  217. BN_CTX_start(ctx);
  218. t = BN_CTX_get(ctx);
  219. if (t == NULL) {
  220. goto err;
  221. }
  222. if (!BN_set_bit(t, len)) {
  223. goto err;
  224. }
  225. if (!BN_div(r, NULL, t, m, ctx)) {
  226. goto err;
  227. }
  228. ret = len;
  229. err:
  230. BN_CTX_end(ctx);
  231. return ret;
  232. }
  233. static int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
  234. BN_RECP_CTX *recp, BN_CTX *ctx) {
  235. int i, j, ret = 0;
  236. BIGNUM *a, *b, *d, *r;
  237. BN_CTX_start(ctx);
  238. a = BN_CTX_get(ctx);
  239. b = BN_CTX_get(ctx);
  240. if (dv != NULL) {
  241. d = dv;
  242. } else {
  243. d = BN_CTX_get(ctx);
  244. }
  245. if (rem != NULL) {
  246. r = rem;
  247. } else {
  248. r = BN_CTX_get(ctx);
  249. }
  250. if (a == NULL || b == NULL || d == NULL || r == NULL) {
  251. goto err;
  252. }
  253. if (BN_ucmp(m, &recp->N) < 0) {
  254. BN_zero(d);
  255. if (!BN_copy(r, m)) {
  256. goto err;
  257. }
  258. BN_CTX_end(ctx);
  259. return 1;
  260. }
  261. /* We want the remainder
  262. * Given input of ABCDEF / ab
  263. * we need multiply ABCDEF by 3 digests of the reciprocal of ab */
  264. /* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */
  265. i = BN_num_bits(m);
  266. j = recp->num_bits << 1;
  267. if (j > i) {
  268. i = j;
  269. }
  270. /* Nr := round(2^i / N) */
  271. if (i != recp->shift) {
  272. recp->shift =
  273. BN_reciprocal(&(recp->Nr), &(recp->N), i,
  274. ctx); /* BN_reciprocal returns i, or -1 for an error */
  275. }
  276. if (recp->shift == -1) {
  277. goto err;
  278. }
  279. /* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i -
  280. * BN_num_bits(N)))|
  281. * = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i -
  282. * BN_num_bits(N)))|
  283. * <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
  284. * = |m/N| */
  285. if (!BN_rshift(a, m, recp->num_bits)) {
  286. goto err;
  287. }
  288. if (!BN_mul(b, a, &(recp->Nr), ctx)) {
  289. goto err;
  290. }
  291. if (!BN_rshift(d, b, i - recp->num_bits)) {
  292. goto err;
  293. }
  294. d->neg = 0;
  295. if (!BN_mul(b, &(recp->N), d, ctx)) {
  296. goto err;
  297. }
  298. if (!BN_usub(r, m, b)) {
  299. goto err;
  300. }
  301. r->neg = 0;
  302. j = 0;
  303. while (BN_ucmp(r, &(recp->N)) >= 0) {
  304. if (j++ > 2) {
  305. OPENSSL_PUT_ERROR(BN, BN_R_BAD_RECIPROCAL);
  306. goto err;
  307. }
  308. if (!BN_usub(r, r, &(recp->N))) {
  309. goto err;
  310. }
  311. if (!BN_add_word(d, 1)) {
  312. goto err;
  313. }
  314. }
  315. r->neg = BN_is_zero(r) ? 0 : m->neg;
  316. d->neg = m->neg ^ recp->N.neg;
  317. ret = 1;
  318. err:
  319. BN_CTX_end(ctx);
  320. return ret;
  321. }
  322. static int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
  323. BN_RECP_CTX *recp, BN_CTX *ctx) {
  324. int ret = 0;
  325. BIGNUM *a;
  326. const BIGNUM *ca;
  327. BN_CTX_start(ctx);
  328. a = BN_CTX_get(ctx);
  329. if (a == NULL) {
  330. goto err;
  331. }
  332. if (y != NULL) {
  333. if (x == y) {
  334. if (!BN_sqr(a, x, ctx)) {
  335. goto err;
  336. }
  337. } else {
  338. if (!BN_mul(a, x, y, ctx)) {
  339. goto err;
  340. }
  341. }
  342. ca = a;
  343. } else {
  344. ca = x; /* Just do the mod */
  345. }
  346. ret = BN_div_recp(NULL, r, ca, recp, ctx);
  347. err:
  348. BN_CTX_end(ctx);
  349. return ret;
  350. }
  351. /* BN_window_bits_for_exponent_size -- macro for sliding window mod_exp
  352. * functions
  353. *
  354. * For window size 'w' (w >= 2) and a random 'b' bits exponent, the number of
  355. * multiplications is a constant plus on average
  356. *
  357. * 2^(w-1) + (b-w)/(w+1);
  358. *
  359. * here 2^(w-1) is for precomputing the table (we actually need entries only
  360. * for windows that have the lowest bit set), and (b-w)/(w+1) is an
  361. * approximation for the expected number of w-bit windows, not counting the
  362. * first one.
  363. *
  364. * Thus we should use
  365. *
  366. * w >= 6 if b > 671
  367. * w = 5 if 671 > b > 239
  368. * w = 4 if 239 > b > 79
  369. * w = 3 if 79 > b > 23
  370. * w <= 2 if 23 > b
  371. *
  372. * (with draws in between). Very small exponents are often selected
  373. * with low Hamming weight, so we use w = 1 for b <= 23. */
  374. #define BN_window_bits_for_exponent_size(b) \
  375. ((b) > 671 ? 6 : \
  376. (b) > 239 ? 5 : \
  377. (b) > 79 ? 4 : \
  378. (b) > 23 ? 3 : 1)
  379. static int mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
  380. const BIGNUM *m, BN_CTX *ctx) {
  381. int i, j, bits, ret = 0, wstart, window;
  382. int start = 1;
  383. BIGNUM *aa;
  384. /* Table of variables obtained from 'ctx' */
  385. BIGNUM *val[TABLE_SIZE];
  386. BN_RECP_CTX recp;
  387. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  388. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  389. OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  390. return 0;
  391. }
  392. bits = BN_num_bits(p);
  393. if (bits == 0) {
  394. ret = BN_one(r);
  395. return ret;
  396. }
  397. BN_CTX_start(ctx);
  398. aa = BN_CTX_get(ctx);
  399. val[0] = BN_CTX_get(ctx);
  400. if (!aa || !val[0]) {
  401. goto err;
  402. }
  403. BN_RECP_CTX_init(&recp);
  404. if (m->neg) {
  405. /* ignore sign of 'm' */
  406. if (!BN_copy(aa, m)) {
  407. goto err;
  408. }
  409. aa->neg = 0;
  410. if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) {
  411. goto err;
  412. }
  413. } else {
  414. if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) {
  415. goto err;
  416. }
  417. }
  418. if (!BN_nnmod(val[0], a, m, ctx)) {
  419. goto err; /* 1 */
  420. }
  421. if (BN_is_zero(val[0])) {
  422. BN_zero(r);
  423. ret = 1;
  424. goto err;
  425. }
  426. window = BN_window_bits_for_exponent_size(bits);
  427. if (window > 1) {
  428. if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) {
  429. goto err; /* 2 */
  430. }
  431. j = 1 << (window - 1);
  432. for (i = 1; i < j; i++) {
  433. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  434. !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) {
  435. goto err;
  436. }
  437. }
  438. }
  439. start = 1; /* This is used to avoid multiplication etc
  440. * when there is only the value '1' in the
  441. * buffer. */
  442. wstart = bits - 1; /* The top bit of the window */
  443. if (!BN_one(r)) {
  444. goto err;
  445. }
  446. for (;;) {
  447. int wvalue; /* The 'value' of the window */
  448. int wend; /* The bottom bit of the window */
  449. if (BN_is_bit_set(p, wstart) == 0) {
  450. if (!start) {
  451. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
  452. goto err;
  453. }
  454. }
  455. if (wstart == 0) {
  456. break;
  457. }
  458. wstart--;
  459. continue;
  460. }
  461. /* We now have wstart on a 'set' bit, we now need to work out
  462. * how bit a window to do. To do this we need to scan
  463. * forward until the last set bit before the end of the
  464. * window */
  465. wvalue = 1;
  466. wend = 0;
  467. for (i = 1; i < window; i++) {
  468. if (wstart - i < 0) {
  469. break;
  470. }
  471. if (BN_is_bit_set(p, wstart - i)) {
  472. wvalue <<= (i - wend);
  473. wvalue |= 1;
  474. wend = i;
  475. }
  476. }
  477. /* wend is the size of the current window */
  478. j = wend + 1;
  479. /* add the 'bytes above' */
  480. if (!start) {
  481. for (i = 0; i < j; i++) {
  482. if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
  483. goto err;
  484. }
  485. }
  486. }
  487. /* wvalue will be an odd number < 2^window */
  488. if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) {
  489. goto err;
  490. }
  491. /* move the 'window' down further */
  492. wstart -= wend + 1;
  493. start = 0;
  494. if (wstart < 0) {
  495. break;
  496. }
  497. }
  498. ret = 1;
  499. err:
  500. BN_CTX_end(ctx);
  501. BN_RECP_CTX_free(&recp);
  502. return ret;
  503. }
  504. int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
  505. BN_CTX *ctx) {
  506. /* For even modulus m = 2^k*m_odd, it might make sense to compute
  507. * a^p mod m_odd and a^p mod 2^k separately (with Montgomery
  508. * exponentiation for the odd part), using appropriate exponent
  509. * reductions, and combine the results using the CRT.
  510. *
  511. * For now, we use Montgomery only if the modulus is odd; otherwise,
  512. * exponentiation using the reciprocal-based quick remaindering
  513. * algorithm is used.
  514. *
  515. * (Timing obtained with expspeed.c [computations a^p mod m
  516. * where a, p, m are of the same length: 256, 512, 1024, 2048,
  517. * 4096, 8192 bits], compared to the running time of the
  518. * standard algorithm:
  519. *
  520. * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration]
  521. * 55 .. 77 % [UltraSparc processor, but
  522. * debug-solaris-sparcv8-gcc conf.]
  523. *
  524. * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration]
  525. * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
  526. *
  527. * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
  528. * at 2048 and more bits, but at 512 and 1024 bits, it was
  529. * slower even than the standard algorithm!
  530. *
  531. * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
  532. * should be obtained when the new Montgomery reduction code
  533. * has been integrated into OpenSSL.) */
  534. if (BN_is_odd(m)) {
  535. if (a->top == 1 && !a->neg && BN_get_flags(p, BN_FLG_CONSTTIME) == 0) {
  536. BN_ULONG A = a->d[0];
  537. return BN_mod_exp_mont_word(r, A, p, m, ctx, NULL);
  538. }
  539. return BN_mod_exp_mont(r, a, p, m, ctx, NULL);
  540. }
  541. return mod_exp_recp(r, a, p, m, ctx);
  542. }
  543. int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  544. const BIGNUM *m, BN_CTX *ctx, const BN_MONT_CTX *mont) {
  545. int i, j, bits, ret = 0, wstart, window;
  546. int start = 1;
  547. BIGNUM *d, *r;
  548. const BIGNUM *aa;
  549. /* Table of variables obtained from 'ctx' */
  550. BIGNUM *val[TABLE_SIZE];
  551. BN_MONT_CTX *new_mont = NULL;
  552. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  553. return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, mont);
  554. }
  555. if (!BN_is_odd(m)) {
  556. OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
  557. return 0;
  558. }
  559. bits = BN_num_bits(p);
  560. if (bits == 0) {
  561. ret = BN_one(rr);
  562. return ret;
  563. }
  564. BN_CTX_start(ctx);
  565. d = BN_CTX_get(ctx);
  566. r = BN_CTX_get(ctx);
  567. val[0] = BN_CTX_get(ctx);
  568. if (!d || !r || !val[0]) {
  569. goto err;
  570. }
  571. /* Allocate a montgomery context if it was not supplied by the caller. */
  572. if (mont == NULL) {
  573. new_mont = BN_MONT_CTX_new();
  574. if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) {
  575. goto err;
  576. }
  577. mont = new_mont;
  578. }
  579. if (a->neg || BN_ucmp(a, m) >= 0) {
  580. if (!BN_nnmod(val[0], a, m, ctx)) {
  581. goto err;
  582. }
  583. aa = val[0];
  584. } else {
  585. aa = a;
  586. }
  587. if (BN_is_zero(aa)) {
  588. BN_zero(rr);
  589. ret = 1;
  590. goto err;
  591. }
  592. if (!BN_to_montgomery(val[0], aa, mont, ctx)) {
  593. goto err; /* 1 */
  594. }
  595. window = BN_window_bits_for_exponent_size(bits);
  596. if (window > 1) {
  597. if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) {
  598. goto err; /* 2 */
  599. }
  600. j = 1 << (window - 1);
  601. for (i = 1; i < j; i++) {
  602. if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
  603. !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) {
  604. goto err;
  605. }
  606. }
  607. }
  608. start = 1; /* This is used to avoid multiplication etc
  609. * when there is only the value '1' in the
  610. * buffer. */
  611. wstart = bits - 1; /* The top bit of the window */
  612. j = m->top; /* borrow j */
  613. if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  614. if (bn_wexpand(r, j) == NULL) {
  615. goto err;
  616. }
  617. /* 2^(top*BN_BITS2) - m */
  618. r->d[0] = (0 - m->d[0]) & BN_MASK2;
  619. for (i = 1; i < j; i++) {
  620. r->d[i] = (~m->d[i]) & BN_MASK2;
  621. }
  622. r->top = j;
  623. /* Upper words will be zero if the corresponding words of 'm'
  624. * were 0xfff[...], so decrement r->top accordingly. */
  625. bn_correct_top(r);
  626. } else if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) {
  627. goto err;
  628. }
  629. for (;;) {
  630. int wvalue; /* The 'value' of the window */
  631. int wend; /* The bottom bit of the window */
  632. if (BN_is_bit_set(p, wstart) == 0) {
  633. if (!start && !BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  634. goto err;
  635. }
  636. if (wstart == 0) {
  637. break;
  638. }
  639. wstart--;
  640. continue;
  641. }
  642. /* We now have wstart on a 'set' bit, we now need to work out how bit a
  643. * window to do. To do this we need to scan forward until the last set bit
  644. * before the end of the window */
  645. wvalue = 1;
  646. wend = 0;
  647. for (i = 1; i < window; i++) {
  648. if (wstart - i < 0) {
  649. break;
  650. }
  651. if (BN_is_bit_set(p, wstart - i)) {
  652. wvalue <<= (i - wend);
  653. wvalue |= 1;
  654. wend = i;
  655. }
  656. }
  657. /* wend is the size of the current window */
  658. j = wend + 1;
  659. /* add the 'bytes above' */
  660. if (!start) {
  661. for (i = 0; i < j; i++) {
  662. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  663. goto err;
  664. }
  665. }
  666. }
  667. /* wvalue will be an odd number < 2^window */
  668. if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) {
  669. goto err;
  670. }
  671. /* move the 'window' down further */
  672. wstart -= wend + 1;
  673. start = 0;
  674. if (wstart < 0) {
  675. break;
  676. }
  677. }
  678. if (!BN_from_montgomery(rr, r, mont, ctx)) {
  679. goto err;
  680. }
  681. ret = 1;
  682. err:
  683. BN_MONT_CTX_free(new_mont);
  684. BN_CTX_end(ctx);
  685. return ret;
  686. }
  687. /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
  688. * layout so that accessing any of these table values shows the same access
  689. * pattern as far as cache lines are concerned. The following functions are
  690. * used to transfer a BIGNUM from/to that table. */
  691. static int copy_to_prebuf(const BIGNUM *b, int top, unsigned char *buf, int idx,
  692. int width) {
  693. size_t i, j;
  694. if (top > b->top) {
  695. top = b->top; /* this works because 'buf' is explicitly zeroed */
  696. }
  697. for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
  698. buf[j] = ((unsigned char *)b->d)[i];
  699. }
  700. return 1;
  701. }
  702. static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx,
  703. int width) {
  704. size_t i, j;
  705. if (bn_wexpand(b, top) == NULL) {
  706. return 0;
  707. }
  708. for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
  709. ((unsigned char *)b->d)[i] = buf[j];
  710. }
  711. b->top = top;
  712. bn_correct_top(b);
  713. return 1;
  714. }
  715. /* BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache
  716. * line width of the target processor is at least the following value. */
  717. #define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH (64)
  718. #define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK \
  719. (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
  720. /* Window sizes optimized for fixed window size modular exponentiation
  721. * algorithm (BN_mod_exp_mont_consttime).
  722. *
  723. * To achieve the security goals of BN_mode_exp_mont_consttime, the maximum
  724. * size of the window must not exceed
  725. * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
  726. *
  727. * Window size thresholds are defined for cache line sizes of 32 and 64, cache
  728. * line sizes where log_2(32)=5 and log_2(64)=6 respectively. A window size of
  729. * 7 should only be used on processors that have a 128 byte or greater cache
  730. * line size. */
  731. #if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
  732. #define BN_window_bits_for_ctime_exponent_size(b) \
  733. ((b) > 937 ? 6 : (b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
  734. #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
  735. #elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
  736. #define BN_window_bits_for_ctime_exponent_size(b) \
  737. ((b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
  738. #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
  739. #endif
  740. /* Given a pointer value, compute the next address that is a cache line
  741. * multiple. */
  742. #define MOD_EXP_CTIME_ALIGN(x_) \
  743. ((unsigned char *)(x_) + \
  744. (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - \
  745. (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
  746. /* This variant of BN_mod_exp_mont() uses fixed windows and the special
  747. * precomputation memory layout to limit data-dependency to a minimum
  748. * to protect secret exponents (cf. the hyper-threading timing attacks
  749. * pointed out by Colin Percival,
  750. * http://www.daemonology.net/hyperthreading-considered-harmful/)
  751. */
  752. int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
  753. const BIGNUM *m, BN_CTX *ctx,
  754. const BN_MONT_CTX *mont) {
  755. int i, bits, ret = 0, window, wvalue;
  756. int top;
  757. BN_MONT_CTX *new_mont = NULL;
  758. int numPowers;
  759. unsigned char *powerbufFree = NULL;
  760. int powerbufLen = 0;
  761. unsigned char *powerbuf = NULL;
  762. BIGNUM tmp, am;
  763. if (!BN_is_odd(m)) {
  764. OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
  765. return 0;
  766. }
  767. top = m->top;
  768. bits = BN_num_bits(p);
  769. if (bits == 0) {
  770. ret = BN_one(rr);
  771. return ret;
  772. }
  773. BN_CTX_start(ctx);
  774. /* Allocate a montgomery context if it was not supplied by the caller. */
  775. if (mont == NULL) {
  776. new_mont = BN_MONT_CTX_new();
  777. if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) {
  778. goto err;
  779. }
  780. mont = new_mont;
  781. }
  782. #ifdef RSAZ_ENABLED
  783. /* If the size of the operands allow it, perform the optimized
  784. * RSAZ exponentiation. For further information see
  785. * crypto/bn/rsaz_exp.c and accompanying assembly modules. */
  786. if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) &&
  787. rsaz_avx2_eligible()) {
  788. if (NULL == bn_wexpand(rr, 16)) {
  789. goto err;
  790. }
  791. RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0]);
  792. rr->top = 16;
  793. rr->neg = 0;
  794. bn_correct_top(rr);
  795. ret = 1;
  796. goto err;
  797. } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) {
  798. if (NULL == bn_wexpand(rr, 8)) {
  799. goto err;
  800. }
  801. RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d);
  802. rr->top = 8;
  803. rr->neg = 0;
  804. bn_correct_top(rr);
  805. ret = 1;
  806. goto err;
  807. }
  808. #endif
  809. /* Get the window size to use with size of p. */
  810. window = BN_window_bits_for_ctime_exponent_size(bits);
  811. #if defined(OPENSSL_BN_ASM_MONT5)
  812. if (window >= 5) {
  813. window = 5; /* ~5% improvement for RSA2048 sign, and even for RSA4096 */
  814. if ((top & 7) == 0) {
  815. powerbufLen += 2 * top * sizeof(m->d[0]);
  816. }
  817. }
  818. #endif
  819. /* Allocate a buffer large enough to hold all of the pre-computed
  820. * powers of am, am itself and tmp.
  821. */
  822. numPowers = 1 << window;
  823. powerbufLen +=
  824. sizeof(m->d[0]) *
  825. (top * numPowers + ((2 * top) > numPowers ? (2 * top) : numPowers));
  826. #ifdef alloca
  827. if (powerbufLen < 3072) {
  828. powerbufFree = alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
  829. } else
  830. #endif
  831. {
  832. if ((powerbufFree = (unsigned char *)OPENSSL_malloc(
  833. powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) {
  834. goto err;
  835. }
  836. }
  837. powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
  838. memset(powerbuf, 0, powerbufLen);
  839. #ifdef alloca
  840. if (powerbufLen < 3072) {
  841. powerbufFree = NULL;
  842. }
  843. #endif
  844. /* lay down tmp and am right after powers table */
  845. tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
  846. am.d = tmp.d + top;
  847. tmp.top = am.top = 0;
  848. tmp.dmax = am.dmax = top;
  849. tmp.neg = am.neg = 0;
  850. tmp.flags = am.flags = BN_FLG_STATIC_DATA;
  851. /* prepare a^0 in Montgomery domain */
  852. /* by Shay Gueron's suggestion */
  853. if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
  854. /* 2^(top*BN_BITS2) - m */
  855. tmp.d[0] = (0 - m->d[0]) & BN_MASK2;
  856. for (i = 1; i < top; i++) {
  857. tmp.d[i] = (~m->d[i]) & BN_MASK2;
  858. }
  859. tmp.top = top;
  860. } else if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) {
  861. goto err;
  862. }
  863. /* prepare a^1 in Montgomery domain */
  864. if (a->neg || BN_ucmp(a, m) >= 0) {
  865. if (!BN_mod(&am, a, m, ctx) ||
  866. !BN_to_montgomery(&am, &am, mont, ctx)) {
  867. goto err;
  868. }
  869. } else if (!BN_to_montgomery(&am, a, mont, ctx)) {
  870. goto err;
  871. }
  872. #if defined(OPENSSL_BN_ASM_MONT5)
  873. /* This optimization uses ideas from http://eprint.iacr.org/2011/239,
  874. * specifically optimization of cache-timing attack countermeasures
  875. * and pre-computation optimization. */
  876. /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
  877. * 512-bit RSA is hardly relevant, we omit it to spare size... */
  878. if (window == 5 && top > 1) {
  879. const BN_ULONG *np = mont->N.d, *n0 = mont->n0, *np2;
  880. /* BN_to_montgomery can contaminate words above .top
  881. * [in BN_DEBUG[_DEBUG] build]... */
  882. for (i = am.top; i < top; i++) {
  883. am.d[i] = 0;
  884. }
  885. for (i = tmp.top; i < top; i++) {
  886. tmp.d[i] = 0;
  887. }
  888. if (top & 7) {
  889. np2 = np;
  890. } else {
  891. BN_ULONG *np_double = am.d + top;
  892. for (i = 0; i < top; i++) {
  893. np_double[2 * i] = np[i];
  894. }
  895. np2 = np_double;
  896. }
  897. bn_scatter5(tmp.d, top, powerbuf, 0);
  898. bn_scatter5(am.d, am.top, powerbuf, 1);
  899. bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
  900. bn_scatter5(tmp.d, top, powerbuf, 2);
  901. /* same as above, but uses squaring for 1/2 of operations */
  902. for (i = 4; i < 32; i *= 2) {
  903. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  904. bn_scatter5(tmp.d, top, powerbuf, i);
  905. }
  906. for (i = 3; i < 8; i += 2) {
  907. int j;
  908. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  909. bn_scatter5(tmp.d, top, powerbuf, i);
  910. for (j = 2 * i; j < 32; j *= 2) {
  911. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  912. bn_scatter5(tmp.d, top, powerbuf, j);
  913. }
  914. }
  915. for (; i < 16; i += 2) {
  916. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  917. bn_scatter5(tmp.d, top, powerbuf, i);
  918. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  919. bn_scatter5(tmp.d, top, powerbuf, 2 * i);
  920. }
  921. for (; i < 32; i += 2) {
  922. bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
  923. bn_scatter5(tmp.d, top, powerbuf, i);
  924. }
  925. bits--;
  926. for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) {
  927. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  928. }
  929. bn_gather5(tmp.d, top, powerbuf, wvalue);
  930. /* At this point |bits| is 4 mod 5 and at least -1. (|bits| is the first bit
  931. * that has not been read yet.) */
  932. assert(bits >= -1 && (bits == -1 || bits % 5 == 4));
  933. /* Scan the exponent one window at a time starting from the most
  934. * significant bits.
  935. */
  936. if (top & 7) {
  937. while (bits >= 0) {
  938. for (wvalue = 0, i = 0; i < 5; i++, bits--) {
  939. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  940. }
  941. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  942. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  943. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  944. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  945. bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
  946. bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
  947. }
  948. } else {
  949. const uint8_t *p_bytes = (const uint8_t *)p->d;
  950. int max_bits = p->top * BN_BITS2;
  951. assert(bits < max_bits);
  952. /* |p = 0| has been handled as a special case, so |max_bits| is at least
  953. * one word. */
  954. assert(max_bits >= 64);
  955. /* If the first bit to be read lands in the last byte, unroll the first
  956. * iteration to avoid reading past the bounds of |p->d|. (After the first
  957. * iteration, we are guaranteed to be past the last byte.) Note |bits|
  958. * here is the top bit, inclusive. */
  959. if (bits - 4 >= max_bits - 8) {
  960. /* Read five bits from |bits-4| through |bits|, inclusive. */
  961. wvalue = p_bytes[p->top * BN_BYTES - 1];
  962. wvalue >>= (bits - 4) & 7;
  963. wvalue &= 0x1f;
  964. bits -= 5;
  965. bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue);
  966. }
  967. while (bits >= 0) {
  968. /* Read five bits from |bits-4| through |bits|, inclusive. */
  969. int first_bit = bits - 4;
  970. wvalue = *(const uint16_t *) (p_bytes + (first_bit >> 3));
  971. wvalue >>= first_bit & 7;
  972. wvalue &= 0x1f;
  973. bits -= 5;
  974. bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue);
  975. }
  976. }
  977. ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np2, n0, top);
  978. tmp.top = top;
  979. bn_correct_top(&tmp);
  980. if (ret) {
  981. if (!BN_copy(rr, &tmp)) {
  982. ret = 0;
  983. }
  984. goto err; /* non-zero ret means it's not error */
  985. }
  986. } else
  987. #endif
  988. {
  989. if (!copy_to_prebuf(&tmp, top, powerbuf, 0, numPowers) ||
  990. !copy_to_prebuf(&am, top, powerbuf, 1, numPowers)) {
  991. goto err;
  992. }
  993. /* If the window size is greater than 1, then calculate
  994. * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
  995. * (even powers could instead be computed as (a^(i/2))^2
  996. * to use the slight performance advantage of sqr over mul).
  997. */
  998. if (window > 1) {
  999. if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx) ||
  1000. !copy_to_prebuf(&tmp, top, powerbuf, 2, numPowers)) {
  1001. goto err;
  1002. }
  1003. for (i = 3; i < numPowers; i++) {
  1004. /* Calculate a^i = a^(i-1) * a */
  1005. if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx) ||
  1006. !copy_to_prebuf(&tmp, top, powerbuf, i, numPowers)) {
  1007. goto err;
  1008. }
  1009. }
  1010. }
  1011. bits--;
  1012. for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) {
  1013. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  1014. }
  1015. if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, numPowers)) {
  1016. goto err;
  1017. }
  1018. /* Scan the exponent one window at a time starting from the most
  1019. * significant bits.
  1020. */
  1021. while (bits >= 0) {
  1022. wvalue = 0; /* The 'value' of the window */
  1023. /* Scan the window, squaring the result as we go */
  1024. for (i = 0; i < window; i++, bits--) {
  1025. if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) {
  1026. goto err;
  1027. }
  1028. wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
  1029. }
  1030. /* Fetch the appropriate pre-computed value from the pre-buf */
  1031. if (!copy_from_prebuf(&am, top, powerbuf, wvalue, numPowers)) {
  1032. goto err;
  1033. }
  1034. /* Multiply the result into the intermediate result */
  1035. if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) {
  1036. goto err;
  1037. }
  1038. }
  1039. }
  1040. /* Convert the final result from montgomery to standard format */
  1041. if (!BN_from_montgomery(rr, &tmp, mont, ctx)) {
  1042. goto err;
  1043. }
  1044. ret = 1;
  1045. err:
  1046. BN_MONT_CTX_free(new_mont);
  1047. if (powerbuf != NULL) {
  1048. OPENSSL_cleanse(powerbuf, powerbufLen);
  1049. OPENSSL_free(powerbufFree);
  1050. }
  1051. BN_CTX_end(ctx);
  1052. return (ret);
  1053. }
  1054. int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
  1055. const BIGNUM *m, BN_CTX *ctx,
  1056. const BN_MONT_CTX *mont) {
  1057. BN_MONT_CTX *new_mont = NULL;
  1058. int b, bits, ret = 0;
  1059. int r_is_one;
  1060. BN_ULONG w, next_w;
  1061. BIGNUM *d, *r, *t;
  1062. BIGNUM *swap_tmp;
  1063. #define BN_MOD_MUL_WORD(r, w, m) \
  1064. (BN_mul_word(r, (w)) && \
  1065. (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \
  1066. (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
  1067. /* BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is
  1068. * probably more overhead than always using BN_mod (which uses BN_copy if a
  1069. * similar test returns true). We can use BN_mod and do not need BN_nnmod
  1070. * because our accumulator is never negative (the result of BN_mod does not
  1071. * depend on the sign of the modulus). */
  1072. #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
  1073. (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
  1074. if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) {
  1075. /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
  1076. OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
  1077. return 0;
  1078. }
  1079. if (!BN_is_odd(m)) {
  1080. OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
  1081. return 0;
  1082. }
  1083. if (m->top == 1) {
  1084. a %= m->d[0]; /* make sure that 'a' is reduced */
  1085. }
  1086. bits = BN_num_bits(p);
  1087. if (bits == 0) {
  1088. /* x**0 mod 1 is still zero. */
  1089. if (BN_is_one(m)) {
  1090. ret = 1;
  1091. BN_zero(rr);
  1092. } else {
  1093. ret = BN_one(rr);
  1094. }
  1095. return ret;
  1096. }
  1097. if (a == 0) {
  1098. BN_zero(rr);
  1099. ret = 1;
  1100. return ret;
  1101. }
  1102. BN_CTX_start(ctx);
  1103. d = BN_CTX_get(ctx);
  1104. r = BN_CTX_get(ctx);
  1105. t = BN_CTX_get(ctx);
  1106. if (d == NULL || r == NULL || t == NULL) {
  1107. goto err;
  1108. }
  1109. /* Allocate a montgomery context if it was not supplied by the caller. */
  1110. if (mont == NULL) {
  1111. new_mont = BN_MONT_CTX_new();
  1112. if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) {
  1113. goto err;
  1114. }
  1115. mont = new_mont;
  1116. }
  1117. r_is_one = 1; /* except for Montgomery factor */
  1118. /* bits-1 >= 0 */
  1119. /* The result is accumulated in the product r*w. */
  1120. w = a; /* bit 'bits-1' of 'p' is always set */
  1121. for (b = bits - 2; b >= 0; b--) {
  1122. /* First, square r*w. */
  1123. next_w = w * w;
  1124. if ((next_w / w) != w) {
  1125. /* overflow */
  1126. if (r_is_one) {
  1127. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
  1128. goto err;
  1129. }
  1130. r_is_one = 0;
  1131. } else {
  1132. if (!BN_MOD_MUL_WORD(r, w, m)) {
  1133. goto err;
  1134. }
  1135. }
  1136. next_w = 1;
  1137. }
  1138. w = next_w;
  1139. if (!r_is_one) {
  1140. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  1141. goto err;
  1142. }
  1143. }
  1144. /* Second, multiply r*w by 'a' if exponent bit is set. */
  1145. if (BN_is_bit_set(p, b)) {
  1146. next_w = w * a;
  1147. if ((next_w / a) != w) {
  1148. /* overflow */
  1149. if (r_is_one) {
  1150. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
  1151. goto err;
  1152. }
  1153. r_is_one = 0;
  1154. } else {
  1155. if (!BN_MOD_MUL_WORD(r, w, m)) {
  1156. goto err;
  1157. }
  1158. }
  1159. next_w = a;
  1160. }
  1161. w = next_w;
  1162. }
  1163. }
  1164. /* Finally, set r:=r*w. */
  1165. if (w != 1) {
  1166. if (r_is_one) {
  1167. if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) {
  1168. goto err;
  1169. }
  1170. r_is_one = 0;
  1171. } else {
  1172. if (!BN_MOD_MUL_WORD(r, w, m)) {
  1173. goto err;
  1174. }
  1175. }
  1176. }
  1177. if (r_is_one) {
  1178. /* can happen only if a == 1*/
  1179. if (!BN_one(rr)) {
  1180. goto err;
  1181. }
  1182. } else {
  1183. if (!BN_from_montgomery(rr, r, mont, ctx)) {
  1184. goto err;
  1185. }
  1186. }
  1187. ret = 1;
  1188. err:
  1189. BN_MONT_CTX_free(new_mont);
  1190. BN_CTX_end(ctx);
  1191. return ret;
  1192. }
  1193. #define TABLE_SIZE 32
  1194. int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
  1195. const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
  1196. BN_CTX *ctx, const BN_MONT_CTX *mont) {
  1197. int i, j, bits, b, bits1, bits2, ret = 0, wpos1, wpos2, window1, window2,
  1198. wvalue1, wvalue2;
  1199. int r_is_one = 1;
  1200. BIGNUM *d, *r;
  1201. const BIGNUM *a_mod_m;
  1202. /* Tables of variables obtained from 'ctx' */
  1203. BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE];
  1204. BN_MONT_CTX *new_mont = NULL;
  1205. if (!(m->d[0] & 1)) {
  1206. OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
  1207. return 0;
  1208. }
  1209. bits1 = BN_num_bits(p1);
  1210. bits2 = BN_num_bits(p2);
  1211. if (bits1 == 0 && bits2 == 0) {
  1212. ret = BN_one(rr);
  1213. return ret;
  1214. }
  1215. bits = (bits1 > bits2) ? bits1 : bits2;
  1216. BN_CTX_start(ctx);
  1217. d = BN_CTX_get(ctx);
  1218. r = BN_CTX_get(ctx);
  1219. val1[0] = BN_CTX_get(ctx);
  1220. val2[0] = BN_CTX_get(ctx);
  1221. if (!d || !r || !val1[0] || !val2[0]) {
  1222. goto err;
  1223. }
  1224. /* Allocate a montgomery context if it was not supplied by the caller. */
  1225. if (mont == NULL) {
  1226. new_mont = BN_MONT_CTX_new();
  1227. if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) {
  1228. goto err;
  1229. }
  1230. mont = new_mont;
  1231. }
  1232. window1 = BN_window_bits_for_exponent_size(bits1);
  1233. window2 = BN_window_bits_for_exponent_size(bits2);
  1234. /* Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 ..
  1235. * 2^(window1-1) */
  1236. if (a1->neg || BN_ucmp(a1, m) >= 0) {
  1237. if (!BN_mod(val1[0], a1, m, ctx)) {
  1238. goto err;
  1239. }
  1240. a_mod_m = val1[0];
  1241. } else {
  1242. a_mod_m = a1;
  1243. }
  1244. if (BN_is_zero(a_mod_m)) {
  1245. BN_zero(rr);
  1246. ret = 1;
  1247. goto err;
  1248. }
  1249. if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx)) {
  1250. goto err;
  1251. }
  1252. if (window1 > 1) {
  1253. if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx)) {
  1254. goto err;
  1255. }
  1256. j = 1 << (window1 - 1);
  1257. for (i = 1; i < j; i++) {
  1258. if (((val1[i] = BN_CTX_get(ctx)) == NULL) ||
  1259. !BN_mod_mul_montgomery(val1[i], val1[i - 1], d, mont, ctx)) {
  1260. goto err;
  1261. }
  1262. }
  1263. }
  1264. /* Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 ..
  1265. * 2^(window2-1) */
  1266. if (a2->neg || BN_ucmp(a2, m) >= 0) {
  1267. if (!BN_mod(val2[0], a2, m, ctx)) {
  1268. goto err;
  1269. }
  1270. a_mod_m = val2[0];
  1271. } else {
  1272. a_mod_m = a2;
  1273. }
  1274. if (BN_is_zero(a_mod_m)) {
  1275. BN_zero(rr);
  1276. ret = 1;
  1277. goto err;
  1278. }
  1279. if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx)) {
  1280. goto err;
  1281. }
  1282. if (window2 > 1) {
  1283. if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx)) {
  1284. goto err;
  1285. }
  1286. j = 1 << (window2 - 1);
  1287. for (i = 1; i < j; i++) {
  1288. if (((val2[i] = BN_CTX_get(ctx)) == NULL) ||
  1289. !BN_mod_mul_montgomery(val2[i], val2[i - 1], d, mont, ctx)) {
  1290. goto err;
  1291. }
  1292. }
  1293. }
  1294. /* Now compute the power product, using independent windows. */
  1295. r_is_one = 1;
  1296. wvalue1 = 0; /* The 'value' of the first window */
  1297. wvalue2 = 0; /* The 'value' of the second window */
  1298. wpos1 = 0; /* If wvalue1 > 0, the bottom bit of the first window */
  1299. wpos2 = 0; /* If wvalue2 > 0, the bottom bit of the second window */
  1300. if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) {
  1301. goto err;
  1302. }
  1303. for (b = bits - 1; b >= 0; b--) {
  1304. if (!r_is_one) {
  1305. if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
  1306. goto err;
  1307. }
  1308. }
  1309. if (!wvalue1 && BN_is_bit_set(p1, b)) {
  1310. /* consider bits b-window1+1 .. b for this window */
  1311. i = b - window1 + 1;
  1312. /* works for i<0 */
  1313. while (!BN_is_bit_set(p1, i)) {
  1314. i++;
  1315. }
  1316. wpos1 = i;
  1317. wvalue1 = 1;
  1318. for (i = b - 1; i >= wpos1; i--) {
  1319. wvalue1 <<= 1;
  1320. if (BN_is_bit_set(p1, i)) {
  1321. wvalue1++;
  1322. }
  1323. }
  1324. }
  1325. if (!wvalue2 && BN_is_bit_set(p2, b)) {
  1326. /* consider bits b-window2+1 .. b for this window */
  1327. i = b - window2 + 1;
  1328. while (!BN_is_bit_set(p2, i)) {
  1329. i++;
  1330. }
  1331. wpos2 = i;
  1332. wvalue2 = 1;
  1333. for (i = b - 1; i >= wpos2; i--) {
  1334. wvalue2 <<= 1;
  1335. if (BN_is_bit_set(p2, i)) {
  1336. wvalue2++;
  1337. }
  1338. }
  1339. }
  1340. if (wvalue1 && b == wpos1) {
  1341. /* wvalue1 is odd and < 2^window1 */
  1342. if (!BN_mod_mul_montgomery(r, r, val1[wvalue1 >> 1], mont, ctx)) {
  1343. goto err;
  1344. }
  1345. wvalue1 = 0;
  1346. r_is_one = 0;
  1347. }
  1348. if (wvalue2 && b == wpos2) {
  1349. /* wvalue2 is odd and < 2^window2 */
  1350. if (!BN_mod_mul_montgomery(r, r, val2[wvalue2 >> 1], mont, ctx)) {
  1351. goto err;
  1352. }
  1353. wvalue2 = 0;
  1354. r_is_one = 0;
  1355. }
  1356. }
  1357. if (!BN_from_montgomery(rr, r, mont, ctx)) {
  1358. goto err;
  1359. }
  1360. ret = 1;
  1361. err:
  1362. BN_MONT_CTX_free(new_mont);
  1363. BN_CTX_end(ctx);
  1364. return ret;
  1365. }