boringssl/crypto/fipsmodule/bn/montgomery.c

510 lines
16 KiB
C
Raw Normal View History

/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.]
*/
/* ====================================================================
* Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*
* This product includes cryptographic software written by Eric Young
* (eay@cryptsoft.com). This product includes software written by Tim
* Hudson (tjh@cryptsoft.com). */
#include <openssl/bn.h>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <openssl/err.h>
#include <openssl/mem.h>
#include <openssl/thread.h>
#include <openssl/type_check.h>
#include "internal.h"
#include "../../internal.h"
#if !defined(OPENSSL_NO_ASM) && \
(defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
#define OPENSSL_BN_ASM_MONT
#endif
BN_MONT_CTX *BN_MONT_CTX_new(void) {
BN_MONT_CTX *ret = OPENSSL_malloc(sizeof(BN_MONT_CTX));
if (ret == NULL) {
return NULL;
}
OPENSSL_memset(ret, 0, sizeof(BN_MONT_CTX));
BN_init(&ret->RR);
BN_init(&ret->N);
return ret;
}
void BN_MONT_CTX_free(BN_MONT_CTX *mont) {
if (mont == NULL) {
return;
}
BN_free(&mont->RR);
BN_free(&mont->N);
OPENSSL_free(mont);
}
BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, const BN_MONT_CTX *from) {
if (to == from) {
return to;
}
if (!BN_copy(&to->RR, &from->RR) ||
!BN_copy(&to->N, &from->N)) {
return NULL;
}
to->n0[0] = from->n0[0];
to->n0[1] = from->n0[1];
return to;
}
Restore the BN_mod codepath for public Montgomery moduli. https://boringssl-review.googlesource.com/10520 and then later https://boringssl-review.googlesource.com/25285 made BN_MONT_CTX_set constant-time, which is necessary for RSA's mont_p and mont_q. However, due to a typo in the benchmark, they did not correctly measure. Split BN_MONT_CTX creation into a constant-time and variable-time one. The constant-time one uses our current algorithm and the latter restores the original BN_mod codepath. Should we wish to avoid BN_mod, I have an alternate version lying around: First, BN_set_bit + bn_mod_lshift1_consttime as now to count up to 2*R. Next, observe that 2*R = BN_to_montgomery(2) and R*R = BN_to_montgomery(R) = BN_to_montgomery(2^r_bits) Also observe that BN_mod_mul_montgomery only needs n0, not RR. Split the core of BN_mod_exp_mont into its own function so the caller handles conversion. Raise 2*R to the r_bits power to get 2^r_bits*R = R*R. The advantage of that algorithm is that it is still constant-time, so we only need one BN_MONT_CTX_new. Additionally, it avoids BN_mod which is otherwise (almost, but the remaining links should be easy to cut) out of the critical path for correctness. One less operation to worry about. The disadvantage is that it is gives a 25% (RSA-2048) or 32% (RSA-4096) slower RSA verification speed. I went with the BN_mod one for the time being. Before: Did 9204 RSA 2048 signing operations in 10052053us (915.6 ops/sec) Did 326000 RSA 2048 verify (same key) operations in 10028823us (32506.3 ops/sec) Did 50830 RSA 2048 verify (fresh key) operations in 10033794us (5065.9 ops/sec) Did 1269 RSA 4096 signing operations in 10019204us (126.7 ops/sec) Did 88435 RSA 4096 verify (same key) operations in 10031129us (8816.1 ops/sec) Did 14552 RSA 4096 verify (fresh key) operations in 10053411us (1447.5 ops/sec) After: Did 9150 RSA 2048 signing operations in 10022831us (912.9 ops/sec) Did 322000 RSA 2048 verify (same key) operations in 10028604us (32108.2 ops/sec) Did 289000 RSA 2048 verify (fresh key) operations in 10017205us (28850.4 ops/sec) Did 1270 RSA 4096 signing operations in 10072950us (126.1 ops/sec) Did 87480 RSA 4096 verify (same key) operations in 10036328us (8716.3 ops/sec) Did 80730 RSA 4096 verify (fresh key) operations in 10073614us (8014.0 ops/sec) Change-Id: Ie8916d1634ccf8513ceda458fa302f09f3e93c07 Reviewed-on: https://boringssl-review.googlesource.com/27287 Commit-Queue: David Benjamin <davidben@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org> Reviewed-by: Adam Langley <agl@google.com>
2018-04-10 21:46:33 +01:00
static int bn_mont_ctx_set_N_and_n0(BN_MONT_CTX *mont, const BIGNUM *mod) {
if (BN_is_zero(mod)) {
OPENSSL_PUT_ERROR(BN, BN_R_DIV_BY_ZERO);
return 0;
}
if (!BN_is_odd(mod)) {
OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
return 0;
}
if (BN_is_negative(mod)) {
OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
return 0;
}
// Save the modulus.
if (!BN_copy(&mont->N, mod)) {
OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
return 0;
}
// |mont->N| is always stored minimally. Computing RR efficiently leaks the
// size of the modulus. While the modulus may be private in RSA (one of the
// primes), their sizes are public, so this is fine.
bn_set_minimal_width(&mont->N);
// Find n0 such that n0 * N == -1 (mod r).
//
// Only certain BN_BITS2<=32 platforms actually make use of n0[1]. For the
// others, we could use a shorter R value and use faster |BN_ULONG|-based
// math instead of |uint64_t|-based math, which would be double-precision.
// However, currently only the assembler files know which is which.
OPENSSL_STATIC_ASSERT(BN_MONT_CTX_N0_LIMBS == 1 || BN_MONT_CTX_N0_LIMBS == 2,
"BN_MONT_CTX_N0_LIMBS value is invalid");
OPENSSL_STATIC_ASSERT(
sizeof(BN_ULONG) * BN_MONT_CTX_N0_LIMBS == sizeof(uint64_t),
"uint64_t is insufficient precision for n0");
uint64_t n0 = bn_mont_n0(&mont->N);
mont->n0[0] = (BN_ULONG)n0;
#if BN_MONT_CTX_N0_LIMBS == 2
mont->n0[1] = (BN_ULONG)(n0 >> BN_BITS2);
#else
mont->n0[1] = 0;
#endif
Restore the BN_mod codepath for public Montgomery moduli. https://boringssl-review.googlesource.com/10520 and then later https://boringssl-review.googlesource.com/25285 made BN_MONT_CTX_set constant-time, which is necessary for RSA's mont_p and mont_q. However, due to a typo in the benchmark, they did not correctly measure. Split BN_MONT_CTX creation into a constant-time and variable-time one. The constant-time one uses our current algorithm and the latter restores the original BN_mod codepath. Should we wish to avoid BN_mod, I have an alternate version lying around: First, BN_set_bit + bn_mod_lshift1_consttime as now to count up to 2*R. Next, observe that 2*R = BN_to_montgomery(2) and R*R = BN_to_montgomery(R) = BN_to_montgomery(2^r_bits) Also observe that BN_mod_mul_montgomery only needs n0, not RR. Split the core of BN_mod_exp_mont into its own function so the caller handles conversion. Raise 2*R to the r_bits power to get 2^r_bits*R = R*R. The advantage of that algorithm is that it is still constant-time, so we only need one BN_MONT_CTX_new. Additionally, it avoids BN_mod which is otherwise (almost, but the remaining links should be easy to cut) out of the critical path for correctness. One less operation to worry about. The disadvantage is that it is gives a 25% (RSA-2048) or 32% (RSA-4096) slower RSA verification speed. I went with the BN_mod one for the time being. Before: Did 9204 RSA 2048 signing operations in 10052053us (915.6 ops/sec) Did 326000 RSA 2048 verify (same key) operations in 10028823us (32506.3 ops/sec) Did 50830 RSA 2048 verify (fresh key) operations in 10033794us (5065.9 ops/sec) Did 1269 RSA 4096 signing operations in 10019204us (126.7 ops/sec) Did 88435 RSA 4096 verify (same key) operations in 10031129us (8816.1 ops/sec) Did 14552 RSA 4096 verify (fresh key) operations in 10053411us (1447.5 ops/sec) After: Did 9150 RSA 2048 signing operations in 10022831us (912.9 ops/sec) Did 322000 RSA 2048 verify (same key) operations in 10028604us (32108.2 ops/sec) Did 289000 RSA 2048 verify (fresh key) operations in 10017205us (28850.4 ops/sec) Did 1270 RSA 4096 signing operations in 10072950us (126.1 ops/sec) Did 87480 RSA 4096 verify (same key) operations in 10036328us (8716.3 ops/sec) Did 80730 RSA 4096 verify (fresh key) operations in 10073614us (8014.0 ops/sec) Change-Id: Ie8916d1634ccf8513ceda458fa302f09f3e93c07 Reviewed-on: https://boringssl-review.googlesource.com/27287 Commit-Queue: David Benjamin <davidben@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org> Reviewed-by: Adam Langley <agl@google.com>
2018-04-10 21:46:33 +01:00
return 1;
}
int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) {
if (!bn_mont_ctx_set_N_and_n0(mont, mod)) {
return 0;
}
BN_CTX *new_ctx = NULL;
if (ctx == NULL) {
new_ctx = BN_CTX_new();
if (new_ctx == NULL) {
return 0;
}
ctx = new_ctx;
}
// Save RR = R**2 (mod N). R is the smallest power of 2**BN_BITS2 such that R
// > mod. Even though the assembly on some 32-bit platforms works with 64-bit
// values, using |BN_BITS2| here, rather than |BN_MONT_CTX_N0_LIMBS *
// BN_BITS2|, is correct because R**2 will still be a multiple of the latter
// as |BN_MONT_CTX_N0_LIMBS| is either one or two.
unsigned lgBigR = mont->N.width * BN_BITS2;
Restore the BN_mod codepath for public Montgomery moduli. https://boringssl-review.googlesource.com/10520 and then later https://boringssl-review.googlesource.com/25285 made BN_MONT_CTX_set constant-time, which is necessary for RSA's mont_p and mont_q. However, due to a typo in the benchmark, they did not correctly measure. Split BN_MONT_CTX creation into a constant-time and variable-time one. The constant-time one uses our current algorithm and the latter restores the original BN_mod codepath. Should we wish to avoid BN_mod, I have an alternate version lying around: First, BN_set_bit + bn_mod_lshift1_consttime as now to count up to 2*R. Next, observe that 2*R = BN_to_montgomery(2) and R*R = BN_to_montgomery(R) = BN_to_montgomery(2^r_bits) Also observe that BN_mod_mul_montgomery only needs n0, not RR. Split the core of BN_mod_exp_mont into its own function so the caller handles conversion. Raise 2*R to the r_bits power to get 2^r_bits*R = R*R. The advantage of that algorithm is that it is still constant-time, so we only need one BN_MONT_CTX_new. Additionally, it avoids BN_mod which is otherwise (almost, but the remaining links should be easy to cut) out of the critical path for correctness. One less operation to worry about. The disadvantage is that it is gives a 25% (RSA-2048) or 32% (RSA-4096) slower RSA verification speed. I went with the BN_mod one for the time being. Before: Did 9204 RSA 2048 signing operations in 10052053us (915.6 ops/sec) Did 326000 RSA 2048 verify (same key) operations in 10028823us (32506.3 ops/sec) Did 50830 RSA 2048 verify (fresh key) operations in 10033794us (5065.9 ops/sec) Did 1269 RSA 4096 signing operations in 10019204us (126.7 ops/sec) Did 88435 RSA 4096 verify (same key) operations in 10031129us (8816.1 ops/sec) Did 14552 RSA 4096 verify (fresh key) operations in 10053411us (1447.5 ops/sec) After: Did 9150 RSA 2048 signing operations in 10022831us (912.9 ops/sec) Did 322000 RSA 2048 verify (same key) operations in 10028604us (32108.2 ops/sec) Did 289000 RSA 2048 verify (fresh key) operations in 10017205us (28850.4 ops/sec) Did 1270 RSA 4096 signing operations in 10072950us (126.1 ops/sec) Did 87480 RSA 4096 verify (same key) operations in 10036328us (8716.3 ops/sec) Did 80730 RSA 4096 verify (fresh key) operations in 10073614us (8014.0 ops/sec) Change-Id: Ie8916d1634ccf8513ceda458fa302f09f3e93c07 Reviewed-on: https://boringssl-review.googlesource.com/27287 Commit-Queue: David Benjamin <davidben@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org> Reviewed-by: Adam Langley <agl@google.com>
2018-04-10 21:46:33 +01:00
BN_zero(&mont->RR);
int ok = BN_set_bit(&mont->RR, lgBigR * 2) &&
BN_mod(&mont->RR, &mont->RR, &mont->N, ctx) &&
bn_resize_words(&mont->RR, mont->N.width);
BN_CTX_free(new_ctx);
return ok;
}
BN_MONT_CTX *BN_MONT_CTX_new_for_modulus(const BIGNUM *mod, BN_CTX *ctx) {
BN_MONT_CTX *mont = BN_MONT_CTX_new();
if (mont == NULL ||
!BN_MONT_CTX_set(mont, mod, ctx)) {
BN_MONT_CTX_free(mont);
return NULL;
}
return mont;
}
Restore the BN_mod codepath for public Montgomery moduli. https://boringssl-review.googlesource.com/10520 and then later https://boringssl-review.googlesource.com/25285 made BN_MONT_CTX_set constant-time, which is necessary for RSA's mont_p and mont_q. However, due to a typo in the benchmark, they did not correctly measure. Split BN_MONT_CTX creation into a constant-time and variable-time one. The constant-time one uses our current algorithm and the latter restores the original BN_mod codepath. Should we wish to avoid BN_mod, I have an alternate version lying around: First, BN_set_bit + bn_mod_lshift1_consttime as now to count up to 2*R. Next, observe that 2*R = BN_to_montgomery(2) and R*R = BN_to_montgomery(R) = BN_to_montgomery(2^r_bits) Also observe that BN_mod_mul_montgomery only needs n0, not RR. Split the core of BN_mod_exp_mont into its own function so the caller handles conversion. Raise 2*R to the r_bits power to get 2^r_bits*R = R*R. The advantage of that algorithm is that it is still constant-time, so we only need one BN_MONT_CTX_new. Additionally, it avoids BN_mod which is otherwise (almost, but the remaining links should be easy to cut) out of the critical path for correctness. One less operation to worry about. The disadvantage is that it is gives a 25% (RSA-2048) or 32% (RSA-4096) slower RSA verification speed. I went with the BN_mod one for the time being. Before: Did 9204 RSA 2048 signing operations in 10052053us (915.6 ops/sec) Did 326000 RSA 2048 verify (same key) operations in 10028823us (32506.3 ops/sec) Did 50830 RSA 2048 verify (fresh key) operations in 10033794us (5065.9 ops/sec) Did 1269 RSA 4096 signing operations in 10019204us (126.7 ops/sec) Did 88435 RSA 4096 verify (same key) operations in 10031129us (8816.1 ops/sec) Did 14552 RSA 4096 verify (fresh key) operations in 10053411us (1447.5 ops/sec) After: Did 9150 RSA 2048 signing operations in 10022831us (912.9 ops/sec) Did 322000 RSA 2048 verify (same key) operations in 10028604us (32108.2 ops/sec) Did 289000 RSA 2048 verify (fresh key) operations in 10017205us (28850.4 ops/sec) Did 1270 RSA 4096 signing operations in 10072950us (126.1 ops/sec) Did 87480 RSA 4096 verify (same key) operations in 10036328us (8716.3 ops/sec) Did 80730 RSA 4096 verify (fresh key) operations in 10073614us (8014.0 ops/sec) Change-Id: Ie8916d1634ccf8513ceda458fa302f09f3e93c07 Reviewed-on: https://boringssl-review.googlesource.com/27287 Commit-Queue: David Benjamin <davidben@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org> Reviewed-by: Adam Langley <agl@google.com>
2018-04-10 21:46:33 +01:00
BN_MONT_CTX *BN_MONT_CTX_new_consttime(const BIGNUM *mod, BN_CTX *ctx) {
BN_MONT_CTX *mont = BN_MONT_CTX_new();
if (mont == NULL ||
!bn_mont_ctx_set_N_and_n0(mont, mod)) {
goto err;
}
unsigned lgBigR = mont->N.width * BN_BITS2;
if (!bn_mod_exp_base_2_consttime(&mont->RR, lgBigR * 2, &mont->N, ctx) ||
!bn_resize_words(&mont->RR, mont->N.width)) {
Restore the BN_mod codepath for public Montgomery moduli. https://boringssl-review.googlesource.com/10520 and then later https://boringssl-review.googlesource.com/25285 made BN_MONT_CTX_set constant-time, which is necessary for RSA's mont_p and mont_q. However, due to a typo in the benchmark, they did not correctly measure. Split BN_MONT_CTX creation into a constant-time and variable-time one. The constant-time one uses our current algorithm and the latter restores the original BN_mod codepath. Should we wish to avoid BN_mod, I have an alternate version lying around: First, BN_set_bit + bn_mod_lshift1_consttime as now to count up to 2*R. Next, observe that 2*R = BN_to_montgomery(2) and R*R = BN_to_montgomery(R) = BN_to_montgomery(2^r_bits) Also observe that BN_mod_mul_montgomery only needs n0, not RR. Split the core of BN_mod_exp_mont into its own function so the caller handles conversion. Raise 2*R to the r_bits power to get 2^r_bits*R = R*R. The advantage of that algorithm is that it is still constant-time, so we only need one BN_MONT_CTX_new. Additionally, it avoids BN_mod which is otherwise (almost, but the remaining links should be easy to cut) out of the critical path for correctness. One less operation to worry about. The disadvantage is that it is gives a 25% (RSA-2048) or 32% (RSA-4096) slower RSA verification speed. I went with the BN_mod one for the time being. Before: Did 9204 RSA 2048 signing operations in 10052053us (915.6 ops/sec) Did 326000 RSA 2048 verify (same key) operations in 10028823us (32506.3 ops/sec) Did 50830 RSA 2048 verify (fresh key) operations in 10033794us (5065.9 ops/sec) Did 1269 RSA 4096 signing operations in 10019204us (126.7 ops/sec) Did 88435 RSA 4096 verify (same key) operations in 10031129us (8816.1 ops/sec) Did 14552 RSA 4096 verify (fresh key) operations in 10053411us (1447.5 ops/sec) After: Did 9150 RSA 2048 signing operations in 10022831us (912.9 ops/sec) Did 322000 RSA 2048 verify (same key) operations in 10028604us (32108.2 ops/sec) Did 289000 RSA 2048 verify (fresh key) operations in 10017205us (28850.4 ops/sec) Did 1270 RSA 4096 signing operations in 10072950us (126.1 ops/sec) Did 87480 RSA 4096 verify (same key) operations in 10036328us (8716.3 ops/sec) Did 80730 RSA 4096 verify (fresh key) operations in 10073614us (8014.0 ops/sec) Change-Id: Ie8916d1634ccf8513ceda458fa302f09f3e93c07 Reviewed-on: https://boringssl-review.googlesource.com/27287 Commit-Queue: David Benjamin <davidben@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org> Reviewed-by: Adam Langley <agl@google.com>
2018-04-10 21:46:33 +01:00
goto err;
}
return mont;
err:
BN_MONT_CTX_free(mont);
return NULL;
}
int BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, CRYPTO_MUTEX *lock,
const BIGNUM *mod, BN_CTX *bn_ctx) {
CRYPTO_MUTEX_lock_read(lock);
BN_MONT_CTX *ctx = *pmont;
CRYPTO_MUTEX_unlock_read(lock);
if (ctx) {
return 1;
}
CRYPTO_MUTEX_lock_write(lock);
if (*pmont == NULL) {
*pmont = BN_MONT_CTX_new_for_modulus(mod, bn_ctx);
}
const int ok = *pmont != NULL;
CRYPTO_MUTEX_unlock_write(lock);
return ok;
}
int BN_to_montgomery(BIGNUM *ret, const BIGNUM *a, const BN_MONT_CTX *mont,
BN_CTX *ctx) {
return BN_mod_mul_montgomery(ret, a, &mont->RR, mont, ctx);
}
static int bn_from_montgomery_in_place(BN_ULONG *r, size_t num_r, BN_ULONG *a,
size_t num_a, const BN_MONT_CTX *mont) {
const BN_ULONG *n = mont->N.d;
size_t num_n = mont->N.width;
if (num_r != num_n || num_a != 2 * num_n) {
OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
return 0;
}
// Add multiples of |n| to |r| until R = 2^(nl * BN_BITS2) divides it. On
// input, we had |r| < |n| * R, so now |r| < 2 * |n| * R. Note that |r|
// includes |carry| which is stored separately.
BN_ULONG n0 = mont->n0[0];
BN_ULONG carry = 0;
for (size_t i = 0; i < num_n; i++) {
BN_ULONG v = bn_mul_add_words(a + i, n, num_n, a[i] * n0);
v += carry + a[i + num_n];
carry |= (v != a[i + num_n]);
carry &= (v <= a[i + num_n]);
a[i + num_n] = v;
}
// Shift |num_n| words to divide by R. We have |a| < 2 * |n|. Note that |a|
// includes |carry| which is stored separately.
a += num_n;
// |a| thus requires at most one additional subtraction |n| to be reduced.
bn_reduce_once(r, a, carry, n, num_n);
return 1;
}
static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r,
const BN_MONT_CTX *mont) {
if (r->neg) {
OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
return 0;
}
const BIGNUM *n = &mont->N;
if (n->width == 0) {
ret->width = 0;
return 1;
}
int max = 2 * n->width; // carry is stored separately
if (!bn_resize_words(r, max) ||
!bn_wexpand(ret, n->width)) {
return 0;
}
ret->width = n->width;
ret->neg = 0;
return bn_from_montgomery_in_place(ret->d, ret->width, r->d, r->width, mont);
}
int BN_from_montgomery(BIGNUM *r, const BIGNUM *a, const BN_MONT_CTX *mont,
BN_CTX *ctx) {
int ret = 0;
BIGNUM *t;
BN_CTX_start(ctx);
t = BN_CTX_get(ctx);
if (t == NULL ||
!BN_copy(t, a)) {
goto err;
}
ret = BN_from_montgomery_word(r, t, mont);
err:
BN_CTX_end(ctx);
return ret;
}
int bn_one_to_montgomery(BIGNUM *r, const BN_MONT_CTX *mont, BN_CTX *ctx) {
// If the high bit of |n| is set, R = 2^(width*BN_BITS2) < 2 * |n|, so we
// compute R - |n| rather than perform Montgomery reduction.
const BIGNUM *n = &mont->N;
if (n->width > 0 && (n->d[n->width - 1] >> (BN_BITS2 - 1)) != 0) {
if (!bn_wexpand(r, n->width)) {
return 0;
}
r->d[0] = 0 - n->d[0];
for (int i = 1; i < n->width; i++) {
r->d[i] = ~n->d[i];
}
r->width = n->width;
r->neg = 0;
return 1;
}
return BN_from_montgomery(r, &mont->RR, mont, ctx);
}
static int bn_mod_mul_montgomery_fallback(BIGNUM *r, const BIGNUM *a,
const BIGNUM *b,
const BN_MONT_CTX *mont,
BN_CTX *ctx) {
int ret = 0;
BN_CTX_start(ctx);
BIGNUM *tmp = BN_CTX_get(ctx);
if (tmp == NULL) {
goto err;
}
if (a == b) {
if (!bn_sqr_consttime(tmp, a, ctx)) {
goto err;
}
} else {
if (!bn_mul_consttime(tmp, a, b, ctx)) {
goto err;
}
}
// reduce from aRR to aR
if (!BN_from_montgomery_word(r, tmp, mont)) {
goto err;
}
ret = 1;
err:
BN_CTX_end(ctx);
return ret;
}
int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
const BN_MONT_CTX *mont, BN_CTX *ctx) {
if (a->neg || b->neg) {
OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
return 0;
}
#if defined(OPENSSL_BN_ASM_MONT)
// |bn_mul_mont| requires at least 128 bits of limbs, at least for x86.
int num = mont->N.width;
if (num >= (128 / BN_BITS2) &&
a->width == num &&
b->width == num) {
if (!bn_wexpand(r, num)) {
return 0;
}
if (!bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) {
// The check above ensures this won't happen.
assert(0);
OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
return 0;
}
r->neg = 0;
r->width = num;
return 1;
}
#endif
return bn_mod_mul_montgomery_fallback(r, a, b, mont, ctx);
}
int bn_less_than_montgomery_R(const BIGNUM *bn, const BN_MONT_CTX *mont) {
return !BN_is_negative(bn) &&
bn_fits_in_words(bn, mont->N.width);
}
void bn_to_montgomery_small(BN_ULONG *r, const BN_ULONG *a, size_t num,
const BN_MONT_CTX *mont) {
bn_mod_mul_montgomery_small(r, a, mont->RR.d, num, mont);
}
void bn_from_montgomery_small(BN_ULONG *r, const BN_ULONG *a, size_t num,
const BN_MONT_CTX *mont) {
if (num != (size_t)mont->N.width || num > BN_SMALL_MAX_WORDS) {
abort();
}
BN_ULONG tmp[BN_SMALL_MAX_WORDS * 2];
OPENSSL_memcpy(tmp, a, num * sizeof(BN_ULONG));
OPENSSL_memset(tmp + num, 0, num * sizeof(BN_ULONG));
if (!bn_from_montgomery_in_place(r, num, tmp, 2 * num, mont)) {
abort();
}
OPENSSL_cleanse(tmp, 2 * num * sizeof(BN_ULONG));
}
void bn_mod_mul_montgomery_small(BN_ULONG *r, const BN_ULONG *a,
const BN_ULONG *b, size_t num,
const BN_MONT_CTX *mont) {
if (num != (size_t)mont->N.width || num > BN_SMALL_MAX_WORDS) {
abort();
}
#if defined(OPENSSL_BN_ASM_MONT)
// |bn_mul_mont| requires at least 128 bits of limbs, at least for x86.
if (num >= (128 / BN_BITS2)) {
if (!bn_mul_mont(r, a, b, mont->N.d, mont->n0, num)) {
abort(); // The check above ensures this won't happen.
}
return;
}
#endif
// Compute the product.
BN_ULONG tmp[2 * BN_SMALL_MAX_WORDS];
if (a == b) {
bn_sqr_small(tmp, 2 * num, a, num);
} else {
bn_mul_small(tmp, 2 * num, a, num, b, num);
}
// Reduce.
if (!bn_from_montgomery_in_place(r, num, tmp, 2 * num, mont)) {
abort();
}
OPENSSL_cleanse(tmp, 2 * num * sizeof(BN_ULONG));
}