2014-06-20 20:00:00 +01:00
|
|
|
/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* This package is an SSL implementation written
|
|
|
|
* by Eric Young (eay@cryptsoft.com).
|
|
|
|
* The implementation was written so as to conform with Netscapes SSL.
|
|
|
|
*
|
|
|
|
* This library is free for commercial and non-commercial use as long as
|
|
|
|
* the following conditions are aheared to. The following conditions
|
|
|
|
* apply to all code found in this distribution, be it the RC4, RSA,
|
|
|
|
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
|
|
|
|
* included with this distribution is covered by the same copyright terms
|
|
|
|
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
|
|
|
|
*
|
|
|
|
* Copyright remains Eric Young's, and as such any Copyright notices in
|
|
|
|
* the code are not to be removed.
|
|
|
|
* If this package is used in a product, Eric Young should be given attribution
|
|
|
|
* as the author of the parts of the library used.
|
|
|
|
* This can be in the form of a textual message at program startup or
|
|
|
|
* in documentation (online or textual) provided with the package.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
|
|
* must display the following acknowledgement:
|
|
|
|
* "This product includes cryptographic software written by
|
|
|
|
* Eric Young (eay@cryptsoft.com)"
|
|
|
|
* The word 'cryptographic' can be left out if the rouines from the library
|
|
|
|
* being used are not cryptographic related :-).
|
|
|
|
* 4. If you include any Windows specific code (or a derivative thereof) from
|
|
|
|
* the apps directory (application code) you must include an acknowledgement:
|
|
|
|
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
* The licence and distribution terms for any publically available version or
|
|
|
|
* derivative of this code cannot be changed. i.e. this code cannot simply be
|
|
|
|
* copied and put under another distribution licence
|
|
|
|
* [including the GNU Public Licence.]
|
|
|
|
*/
|
|
|
|
/* ====================================================================
|
|
|
|
* Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
*
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
*
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in
|
|
|
|
* the documentation and/or other materials provided with the
|
|
|
|
* distribution.
|
|
|
|
*
|
|
|
|
* 3. All advertising materials mentioning features or use of this
|
|
|
|
* software must display the following acknowledgment:
|
|
|
|
* "This product includes software developed by the OpenSSL Project
|
|
|
|
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
|
|
|
|
*
|
|
|
|
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
|
|
|
|
* endorse or promote products derived from this software without
|
|
|
|
* prior written permission. For written permission, please contact
|
|
|
|
* openssl-core@openssl.org.
|
|
|
|
*
|
|
|
|
* 5. Products derived from this software may not be called "OpenSSL"
|
|
|
|
* nor may "OpenSSL" appear in their names without prior written
|
|
|
|
* permission of the OpenSSL Project.
|
|
|
|
*
|
|
|
|
* 6. Redistributions of any form whatsoever must retain the following
|
|
|
|
* acknowledgment:
|
|
|
|
* "This product includes software developed by the OpenSSL Project
|
|
|
|
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
|
|
|
|
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
|
|
|
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
|
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
|
|
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
* ====================================================================
|
|
|
|
*
|
|
|
|
* This product includes cryptographic software written by Eric Young
|
|
|
|
* (eay@cryptsoft.com). This product includes software written by Tim
|
|
|
|
* Hudson (tjh@cryptsoft.com).
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
/* ====================================================================
|
|
|
|
* Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
|
|
|
|
*
|
|
|
|
* Portions of the attached software ("Contribution") are developed by
|
|
|
|
* SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project.
|
|
|
|
*
|
|
|
|
* The Contribution is licensed pursuant to the Eric Young open source
|
|
|
|
* license provided above.
|
|
|
|
*
|
|
|
|
* The binary polynomial arithmetic software is originally written by
|
|
|
|
* Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems
|
|
|
|
* Laboratories. */
|
|
|
|
|
|
|
|
#ifndef OPENSSL_HEADER_BN_INTERNAL_H
|
|
|
|
#define OPENSSL_HEADER_BN_INTERNAL_H
|
|
|
|
|
|
|
|
#include <openssl/base.h>
|
|
|
|
|
2016-02-04 06:26:25 +00:00
|
|
|
#if defined(OPENSSL_X86_64) && defined(_MSC_VER)
|
2016-06-09 21:48:33 +01:00
|
|
|
OPENSSL_MSVC_PRAGMA(warning(push, 3))
|
2014-12-03 22:57:04 +00:00
|
|
|
#include <intrin.h>
|
2016-06-09 21:48:33 +01:00
|
|
|
OPENSSL_MSVC_PRAGMA(warning(pop))
|
2014-12-03 22:57:04 +00:00
|
|
|
#pragma intrinsic(__umulh, _umul128)
|
|
|
|
#endif
|
|
|
|
|
2017-04-28 22:47:06 +01:00
|
|
|
#include "../../internal.h"
|
2016-01-27 01:16:37 +00:00
|
|
|
|
2014-06-20 20:00:00 +01:00
|
|
|
#if defined(__cplusplus)
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if defined(OPENSSL_64_BIT)
|
|
|
|
|
2017-12-08 02:11:24 +00:00
|
|
|
#if defined(BORINGSSL_HAS_UINT128)
|
2017-08-18 19:06:02 +01:00
|
|
|
// MSVC doesn't support two-word integers on 64-bit.
|
2017-11-09 22:07:54 +00:00
|
|
|
#define BN_ULLONG uint128_t
|
2017-12-08 02:11:24 +00:00
|
|
|
#if defined(BORINGSSL_CAN_DIVIDE_UINT128)
|
|
|
|
#define BN_CAN_DIVIDE_ULLONG
|
|
|
|
#endif
|
2015-01-09 23:44:37 +00:00
|
|
|
#endif
|
|
|
|
|
2017-11-09 22:07:54 +00:00
|
|
|
#define BN_BITS2 64
|
|
|
|
#define BN_BYTES 8
|
|
|
|
#define BN_BITS4 32
|
|
|
|
#define BN_MASK2 (0xffffffffffffffffUL)
|
|
|
|
#define BN_MASK2l (0xffffffffUL)
|
|
|
|
#define BN_MASK2h (0xffffffff00000000UL)
|
|
|
|
#define BN_MASK2h1 (0xffffffff80000000UL)
|
2016-07-30 03:19:46 +01:00
|
|
|
#define BN_MONT_CTX_N0_LIMBS 1
|
2017-11-09 22:07:54 +00:00
|
|
|
#define BN_DEC_CONV (10000000000000000000UL)
|
|
|
|
#define BN_DEC_NUM 19
|
2016-10-24 20:25:11 +01:00
|
|
|
#define TOBN(hi, lo) ((BN_ULONG)(hi) << 32 | (lo))
|
2014-06-20 20:00:00 +01:00
|
|
|
|
|
|
|
#elif defined(OPENSSL_32_BIT)
|
|
|
|
|
2017-11-09 22:07:54 +00:00
|
|
|
#define BN_ULLONG uint64_t
|
2017-12-08 02:11:24 +00:00
|
|
|
#define BN_CAN_DIVIDE_ULLONG
|
2017-11-09 22:07:54 +00:00
|
|
|
#define BN_BITS2 32
|
|
|
|
#define BN_BYTES 4
|
|
|
|
#define BN_BITS4 16
|
|
|
|
#define BN_MASK2 (0xffffffffUL)
|
|
|
|
#define BN_MASK2l (0xffffUL)
|
|
|
|
#define BN_MASK2h1 (0xffff8000UL)
|
|
|
|
#define BN_MASK2h (0xffff0000UL)
|
2017-08-18 19:06:02 +01:00
|
|
|
// On some 32-bit platforms, Montgomery multiplication is done using 64-bit
|
|
|
|
// arithmetic with SIMD instructions. On such platforms, |BN_MONT_CTX::n0|
|
|
|
|
// needs to be two words long. Only certain 32-bit platforms actually make use
|
|
|
|
// of n0[1] and shorter R value would suffice for the others. However,
|
|
|
|
// currently only the assembly files know which is which.
|
2016-07-30 03:19:46 +01:00
|
|
|
#define BN_MONT_CTX_N0_LIMBS 2
|
2017-11-09 22:07:54 +00:00
|
|
|
#define BN_DEC_CONV (1000000000UL)
|
|
|
|
#define BN_DEC_NUM 9
|
2016-10-24 20:25:11 +01:00
|
|
|
#define TOBN(hi, lo) (lo), (hi)
|
2014-06-20 20:00:00 +01:00
|
|
|
|
|
|
|
#else
|
|
|
|
#error "Must define either OPENSSL_32_BIT or OPENSSL_64_BIT"
|
|
|
|
#endif
|
|
|
|
|
2015-11-18 08:00:09 +00:00
|
|
|
|
2016-10-18 18:05:01 +01:00
|
|
|
#define STATIC_BIGNUM(x) \
|
|
|
|
{ \
|
|
|
|
(BN_ULONG *)(x), sizeof(x) / sizeof(BN_ULONG), \
|
|
|
|
sizeof(x) / sizeof(BN_ULONG), 0, BN_FLG_STATIC_DATA \
|
2015-11-18 08:00:09 +00:00
|
|
|
}
|
|
|
|
|
2015-11-18 20:57:00 +00:00
|
|
|
#if defined(BN_ULLONG)
|
2017-10-12 04:55:18 +01:00
|
|
|
#define Lw(t) ((BN_ULONG)(t))
|
|
|
|
#define Hw(t) ((BN_ULONG)((t) >> BN_BITS2))
|
2015-01-09 23:44:37 +00:00
|
|
|
#endif
|
|
|
|
|
Add initial support for non-minimal BIGNUMs.
Thanks to Andres Erbsen for extremely helpful suggestions on how finally
plug this long-standing hole!
OpenSSL BIGNUMs are currently minimal-width, which means they cannot be
constant-time. We'll need to either excise BIGNUM from RSA and EC or
somehow fix BIGNUM. EC_SCALAR and later EC_FELEM work will excise it
from EC, but RSA's BIGNUMs are more transparent. Teaching BIGNUM to
handle non-minimal word widths is probably simpler.
The main constraint is BIGNUM's large "calculator" API surface. One
could, in theory, do arbitrary math on RSA components, which means all
public functions must tolerate non-minimal inputs. This is also useful
for EC; https://boringssl-review.googlesource.com/c/boringssl/+/24445 is
silly.
As a first step, fix comparison-type functions that were assuming
minimal BIGNUMs. I've also added bn_resize_words, but it is testing-only
until the rest of the library is fixed.
bn->top is now a loose upper bound we carry around. It does not affect
numerical results, only performance and secrecy. This is a departure
from the original meaning, and compiler help in auditing everything is
nice, so the final change in this series will rename bn->top to
bn->width. Thus these new functions are named per "width", not "top".
Looking further ahead, how are output BIGNUM widths determined? There's
three notions of correctness here:
1. Do I compute the right answer for all widths?
2. Do I handle secret data in constant time?
3. Does my memory usage not balloon absurdly?
For (1), a BIGNUM function must give the same answer for all input
widths. BN_mod_add_quick may assume |a| < |m|, but |a| may still be
wider than |m| by way of leading zeres. The simplest approach is to
write code in a width-agnostic way and rely on functions to accept all
widths. Where functions need to look at bn->d, we'll a few helper
functions to smooth over funny widths.
For (2), (1) is little cumbersome. Consider constant-time modular
addition. A sane type system would guarantee input widths match. But C
is weak here, and bifurcating the internals is a lot of work. Thus, at
least for now, I do not propose we move RSA's internal computation out
of BIGNUM. (EC_SCALAR/EC_FELEM are valuable for EC because we get to
stack-allocate, curves were already specialized, and EC only has two
types with many operations on those types. None of these apply to RSA.
We've got numbers mod n, mod p, mod q, and their corresponding
exponents, each of which is used for basically one operation.)
Instead, constant-time BIGNUM functions will output non-minimal widths.
This is trivial for BN_bin2bn or modular arithmetic. But for BN_mul,
constant-time[*] would dictate r->top = a->top + b->top. A calculator
repeatedly multiplying by one would then run out of memory. Those we'll
split into a private BN_mul_fixed for crypto, leaving BN_mul for
calculators. BN_mul is just BN_mul_fixed followed by bn_correct_top.
[*] BN_mul is not constant-time for other reasons, but that will be
fixed separately.
Bug: 232
Change-Id: Ide2258ae8c09a9a41bb71d6777908d1c27917069
Reviewed-on: https://boringssl-review.googlesource.com/25244
Reviewed-by: Adam Langley <agl@google.com>
2018-01-20 20:56:53 +00:00
|
|
|
// bn_minimal_width returns the minimal value of |bn->top| which fits the
|
|
|
|
// value of |bn|.
|
|
|
|
int bn_minimal_width(const BIGNUM *bn);
|
|
|
|
|
2018-01-15 10:23:24 +00:00
|
|
|
// bn_set_minimal_width sets |bn->width| to |bn_minimal_width(bn)|. If |bn| is
|
|
|
|
// zero, |bn->neg| is set to zero.
|
|
|
|
void bn_set_minimal_width(BIGNUM *bn);
|
2017-04-21 16:20:25 +01:00
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// bn_wexpand ensures that |bn| has at least |words| works of space without
|
|
|
|
// altering its value. It returns one on success or zero on allocation
|
|
|
|
// failure.
|
2017-04-21 16:26:30 +01:00
|
|
|
int bn_wexpand(BIGNUM *bn, size_t words);
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// bn_expand acts the same as |bn_wexpand|, but takes a number of bits rather
|
|
|
|
// than a number of words.
|
2017-04-21 16:26:30 +01:00
|
|
|
int bn_expand(BIGNUM *bn, size_t bits);
|
2017-04-21 16:20:25 +01:00
|
|
|
|
Add initial support for non-minimal BIGNUMs.
Thanks to Andres Erbsen for extremely helpful suggestions on how finally
plug this long-standing hole!
OpenSSL BIGNUMs are currently minimal-width, which means they cannot be
constant-time. We'll need to either excise BIGNUM from RSA and EC or
somehow fix BIGNUM. EC_SCALAR and later EC_FELEM work will excise it
from EC, but RSA's BIGNUMs are more transparent. Teaching BIGNUM to
handle non-minimal word widths is probably simpler.
The main constraint is BIGNUM's large "calculator" API surface. One
could, in theory, do arbitrary math on RSA components, which means all
public functions must tolerate non-minimal inputs. This is also useful
for EC; https://boringssl-review.googlesource.com/c/boringssl/+/24445 is
silly.
As a first step, fix comparison-type functions that were assuming
minimal BIGNUMs. I've also added bn_resize_words, but it is testing-only
until the rest of the library is fixed.
bn->top is now a loose upper bound we carry around. It does not affect
numerical results, only performance and secrecy. This is a departure
from the original meaning, and compiler help in auditing everything is
nice, so the final change in this series will rename bn->top to
bn->width. Thus these new functions are named per "width", not "top".
Looking further ahead, how are output BIGNUM widths determined? There's
three notions of correctness here:
1. Do I compute the right answer for all widths?
2. Do I handle secret data in constant time?
3. Does my memory usage not balloon absurdly?
For (1), a BIGNUM function must give the same answer for all input
widths. BN_mod_add_quick may assume |a| < |m|, but |a| may still be
wider than |m| by way of leading zeres. The simplest approach is to
write code in a width-agnostic way and rely on functions to accept all
widths. Where functions need to look at bn->d, we'll a few helper
functions to smooth over funny widths.
For (2), (1) is little cumbersome. Consider constant-time modular
addition. A sane type system would guarantee input widths match. But C
is weak here, and bifurcating the internals is a lot of work. Thus, at
least for now, I do not propose we move RSA's internal computation out
of BIGNUM. (EC_SCALAR/EC_FELEM are valuable for EC because we get to
stack-allocate, curves were already specialized, and EC only has two
types with many operations on those types. None of these apply to RSA.
We've got numbers mod n, mod p, mod q, and their corresponding
exponents, each of which is used for basically one operation.)
Instead, constant-time BIGNUM functions will output non-minimal widths.
This is trivial for BN_bin2bn or modular arithmetic. But for BN_mul,
constant-time[*] would dictate r->top = a->top + b->top. A calculator
repeatedly multiplying by one would then run out of memory. Those we'll
split into a private BN_mul_fixed for crypto, leaving BN_mul for
calculators. BN_mul is just BN_mul_fixed followed by bn_correct_top.
[*] BN_mul is not constant-time for other reasons, but that will be
fixed separately.
Bug: 232
Change-Id: Ide2258ae8c09a9a41bb71d6777908d1c27917069
Reviewed-on: https://boringssl-review.googlesource.com/25244
Reviewed-by: Adam Langley <agl@google.com>
2018-01-20 20:56:53 +00:00
|
|
|
// bn_resize_words adjusts |bn->top| to be |words|. It returns one on success
|
|
|
|
// and zero on allocation error or if |bn|'s value is too large.
|
2018-01-23 23:51:42 +00:00
|
|
|
OPENSSL_EXPORT int bn_resize_words(BIGNUM *bn, size_t words);
|
Add initial support for non-minimal BIGNUMs.
Thanks to Andres Erbsen for extremely helpful suggestions on how finally
plug this long-standing hole!
OpenSSL BIGNUMs are currently minimal-width, which means they cannot be
constant-time. We'll need to either excise BIGNUM from RSA and EC or
somehow fix BIGNUM. EC_SCALAR and later EC_FELEM work will excise it
from EC, but RSA's BIGNUMs are more transparent. Teaching BIGNUM to
handle non-minimal word widths is probably simpler.
The main constraint is BIGNUM's large "calculator" API surface. One
could, in theory, do arbitrary math on RSA components, which means all
public functions must tolerate non-minimal inputs. This is also useful
for EC; https://boringssl-review.googlesource.com/c/boringssl/+/24445 is
silly.
As a first step, fix comparison-type functions that were assuming
minimal BIGNUMs. I've also added bn_resize_words, but it is testing-only
until the rest of the library is fixed.
bn->top is now a loose upper bound we carry around. It does not affect
numerical results, only performance and secrecy. This is a departure
from the original meaning, and compiler help in auditing everything is
nice, so the final change in this series will rename bn->top to
bn->width. Thus these new functions are named per "width", not "top".
Looking further ahead, how are output BIGNUM widths determined? There's
three notions of correctness here:
1. Do I compute the right answer for all widths?
2. Do I handle secret data in constant time?
3. Does my memory usage not balloon absurdly?
For (1), a BIGNUM function must give the same answer for all input
widths. BN_mod_add_quick may assume |a| < |m|, but |a| may still be
wider than |m| by way of leading zeres. The simplest approach is to
write code in a width-agnostic way and rely on functions to accept all
widths. Where functions need to look at bn->d, we'll a few helper
functions to smooth over funny widths.
For (2), (1) is little cumbersome. Consider constant-time modular
addition. A sane type system would guarantee input widths match. But C
is weak here, and bifurcating the internals is a lot of work. Thus, at
least for now, I do not propose we move RSA's internal computation out
of BIGNUM. (EC_SCALAR/EC_FELEM are valuable for EC because we get to
stack-allocate, curves were already specialized, and EC only has two
types with many operations on those types. None of these apply to RSA.
We've got numbers mod n, mod p, mod q, and their corresponding
exponents, each of which is used for basically one operation.)
Instead, constant-time BIGNUM functions will output non-minimal widths.
This is trivial for BN_bin2bn or modular arithmetic. But for BN_mul,
constant-time[*] would dictate r->top = a->top + b->top. A calculator
repeatedly multiplying by one would then run out of memory. Those we'll
split into a private BN_mul_fixed for crypto, leaving BN_mul for
calculators. BN_mul is just BN_mul_fixed followed by bn_correct_top.
[*] BN_mul is not constant-time for other reasons, but that will be
fixed separately.
Bug: 232
Change-Id: Ide2258ae8c09a9a41bb71d6777908d1c27917069
Reviewed-on: https://boringssl-review.googlesource.com/25244
Reviewed-by: Adam Langley <agl@google.com>
2018-01-20 20:56:53 +00:00
|
|
|
|
2018-01-26 04:56:35 +00:00
|
|
|
// bn_select_words sets |r| to |a| if |mask| is all ones or |b| if |mask| is
|
|
|
|
// all zeros.
|
|
|
|
void bn_select_words(BN_ULONG *r, BN_ULONG mask, const BN_ULONG *a,
|
|
|
|
const BN_ULONG *b, size_t num);
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// bn_set_words sets |bn| to the value encoded in the |num| words in |words|,
|
|
|
|
// least significant word first.
|
2016-03-09 03:09:40 +00:00
|
|
|
int bn_set_words(BIGNUM *bn, const BN_ULONG *words, size_t num);
|
|
|
|
|
2018-01-23 21:17:55 +00:00
|
|
|
// bn_fits_in_words returns one if |bn| may be represented in |num| words, plus
|
|
|
|
// a sign bit, and zero otherwise.
|
|
|
|
int bn_fits_in_words(const BIGNUM *bn, size_t num);
|
|
|
|
|
2018-01-20 21:51:54 +00:00
|
|
|
// bn_copy_words copies the value of |bn| to |out| and returns one if the value
|
|
|
|
// is representable in |num| words. Otherwise, it returns zero.
|
|
|
|
int bn_copy_words(BN_ULONG *out, size_t num, const BIGNUM *bn);
|
|
|
|
|
2017-11-09 22:07:54 +00:00
|
|
|
// bn_mul_add_words multiples |ap| by |w|, adds the result to |rp|, and places
|
|
|
|
// the result in |rp|. |ap| and |rp| must both be |num| words long. It returns
|
|
|
|
// the carry word of the operation. |ap| and |rp| may be equal but otherwise may
|
|
|
|
// not alias.
|
2017-11-12 01:18:42 +00:00
|
|
|
BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
|
2017-11-09 22:07:54 +00:00
|
|
|
BN_ULONG w);
|
|
|
|
|
|
|
|
// bn_mul_words multiples |ap| by |w| and places the result in |rp|. |ap| and
|
|
|
|
// |rp| must both be |num| words long. It returns the carry word of the
|
|
|
|
// operation. |ap| and |rp| may be equal but otherwise may not alias.
|
2017-11-12 01:18:42 +00:00
|
|
|
BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num, BN_ULONG w);
|
2017-11-09 22:07:54 +00:00
|
|
|
|
|
|
|
// bn_sqr_words sets |rp[2*i]| and |rp[2*i+1]| to |ap[i]|'s square, for all |i|
|
|
|
|
// up to |num|. |ap| is an array of |num| words and |rp| an array of |2*num|
|
|
|
|
// words. |ap| and |rp| may not alias.
|
|
|
|
//
|
|
|
|
// This gives the contribution of the |ap[i]*ap[i]| terms when squaring |ap|.
|
2017-11-12 01:18:42 +00:00
|
|
|
void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num);
|
2017-11-09 22:07:54 +00:00
|
|
|
|
|
|
|
// bn_add_words adds |ap| to |bp| and places the result in |rp|, each of which
|
|
|
|
// are |num| words long. It returns the carry bit, which is one if the operation
|
|
|
|
// overflowed and zero otherwise. Any pair of |ap|, |bp|, and |rp| may be equal
|
|
|
|
// to each other but otherwise may not alias.
|
|
|
|
BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
2017-11-12 01:18:42 +00:00
|
|
|
size_t num);
|
2017-11-09 22:07:54 +00:00
|
|
|
|
|
|
|
// bn_sub_words subtracts |bp| from |ap| and places the result in |rp|. It
|
|
|
|
// returns the borrow bit, which is one if the computation underflowed and zero
|
|
|
|
// otherwise. Any pair of |ap|, |bp|, and |rp| may be equal to each other but
|
|
|
|
// otherwise may not alias.
|
|
|
|
BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
2017-11-12 01:18:42 +00:00
|
|
|
size_t num);
|
2017-11-09 22:07:54 +00:00
|
|
|
|
|
|
|
// bn_mul_comba4 sets |r| to the product of |a| and |b|.
|
2017-11-12 03:12:08 +00:00
|
|
|
void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]);
|
2017-11-09 22:07:54 +00:00
|
|
|
|
|
|
|
// bn_mul_comba8 sets |r| to the product of |a| and |b|.
|
2017-11-12 03:12:08 +00:00
|
|
|
void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]);
|
2017-11-09 22:07:54 +00:00
|
|
|
|
|
|
|
// bn_sqr_comba8 sets |r| to |a|^2.
|
|
|
|
void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[4]);
|
|
|
|
|
|
|
|
// bn_sqr_comba4 sets |r| to |a|^2.
|
|
|
|
void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]);
|
2014-06-20 20:00:00 +01:00
|
|
|
|
2017-11-09 17:31:03 +00:00
|
|
|
// bn_less_than_words returns one if |a| < |b| and zero otherwise, where |a|
|
|
|
|
// and |b| both are |len| words long. It runs in constant time.
|
|
|
|
int bn_less_than_words(const BN_ULONG *a, const BN_ULONG *b, size_t len);
|
|
|
|
|
|
|
|
// bn_in_range_words returns one if |min_inclusive| <= |a| < |max_exclusive|,
|
Blind the range check for finding a Rabin-Miller witness.
Rabin-Miller requires selecting a random number from 2 to |w|-1.
This is done by picking an N-bit number and discarding out-of-range
values. This leaks information about |w|, so apply blinding. Rather than
discard bad values, adjust them to be in range.
Though not uniformly selected, these adjusted values
are still usable as Rabin-Miller checks.
Rabin-Miller is already probabilistic, so we could reach the desired
confidence levels by just suitably increasing the iteration count.
However, to align with FIPS 186-4, we use a more pessimal analysis: we
do not count the non-uniform values towards the iteration count. As a
result, this function is more complex and has more timing risk than
necessary.
We count both total iterations and uniform ones and iterate until we've
reached at least |BN_PRIME_CHECKS_BLINDED| and |iterations|,
respectively. If the latter is large enough, it will be the limiting
factor with high probability and we won't leak information.
Note this blinding does not impact most calls when picking primes
because composites are rejected early. Only the two secret primes see
extra work. So while this does make the BNTest.PrimeChecking test take
about 2x longer to run on debug mode, RSA key generation time is fine.
Another, perhaps simpler, option here would have to run
bn_rand_range_words to the full 100 count, select an arbitrary
successful try, and declare failure of the entire keygen process (as we
do already) if all tries failed. I went with the option in this CL
because I happened to come up with it first, and because the failure
probability decreases much faster. Additionally, the option in this CL
does not affect composite numbers, while the alternate would. This gives
a smaller multiplier on our entropy draw. We also continue to use the
"wasted" work for stronger assurance on primality. FIPS' numbers are
remarkably low, considering the increase has negligible cost.
Thanks to Nathan Benjamin for helping me explore the failure rate as the
target count and blinding count change.
Now we're down to the rest of RSA keygen, which will require all the
operations we've traditionally just avoided in constant-time code!
Median of 29 RSA keygens: 0m0.169s -> 0m0.298s
(Accuracy beyond 0.1s is questionable. The runs at subsequent test- and
rename-only CLs were 0m0.217s, 0m0.245s, 0m0.244s, 0m0.247s.)
Bug: 238
Change-Id: Id6406c3020f2585b86946eb17df64ac42f30ebab
Reviewed-on: https://boringssl-review.googlesource.com/25890
Commit-Queue: Adam Langley <agl@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
Reviewed-by: Adam Langley <agl@google.com>
2018-02-05 04:48:36 +00:00
|
|
|
// where |a| and |max_exclusive| both are |len| words long. |a| and
|
|
|
|
// |max_exclusive| are treated as secret.
|
2017-11-09 17:31:03 +00:00
|
|
|
int bn_in_range_words(const BN_ULONG *a, BN_ULONG min_inclusive,
|
|
|
|
const BN_ULONG *max_exclusive, size_t len);
|
|
|
|
|
Make ECDSA signing 10% faster and plug some timing leaks.
None of the asymmetric crypto we inherented from OpenSSL is
constant-time because of BIGNUM. BIGNUM chops leading zeros off the
front of everything, so we end up leaking information about the first
word, in theory. BIGNUM functions additionally tend to take the full
range of inputs and then call into BN_nnmod at various points.
All our secret values should be acted on in constant-time, but k in
ECDSA is a particularly sensitive value. So, ecdsa_sign_setup, in an
attempt to mitigate the BIGNUM leaks, would add a couple copies of the
order.
This does not work at all. k is used to compute two values: k^-1 and kG.
The first operation when computing k^-1 is to call BN_nnmod if k is out
of range. The entry point to our tuned constant-time curve
implementations is to call BN_nnmod if the scalar has too many bits,
which this causes. The result is both corrections are immediately undone
but cause us to do more variable-time work in the meantime.
Replace all these computations around k with the word-based functions
added in the various preceding CLs. In doing so, replace the BN_mod_mul
calls (which internally call BN_nnmod) with Montgomery reduction. We can
avoid taking k^-1 out of Montgomery form, which combines nicely with
Brian Smith's trick in 3426d1011946b26ff1bb2fd98a081ba4753c9cc8. Along
the way, we avoid some unnecessary mallocs.
BIGNUM still affects the private key itself, as well as the EC_POINTs.
But this should hopefully be much better now. Also it's 10% faster:
Before:
Did 15000 ECDSA P-224 signing operations in 1069117us (14030.3 ops/sec)
Did 18000 ECDSA P-256 signing operations in 1053908us (17079.3 ops/sec)
Did 1078 ECDSA P-384 signing operations in 1087853us (990.9 ops/sec)
Did 473 ECDSA P-521 signing operations in 1069835us (442.1 ops/sec)
After:
Did 16000 ECDSA P-224 signing operations in 1064799us (15026.3 ops/sec)
Did 19000 ECDSA P-256 signing operations in 1007839us (18852.2 ops/sec)
Did 1078 ECDSA P-384 signing operations in 1079413us (998.7 ops/sec)
Did 484 ECDSA P-521 signing operations in 1083616us (446.7 ops/sec)
Change-Id: I2a25e90fc99dac13c0616d0ea45e125a4bd8cca1
Reviewed-on: https://boringssl-review.googlesource.com/23075
Reviewed-by: Adam Langley <agl@google.com>
2017-11-13 03:58:00 +00:00
|
|
|
// bn_rand_range_words sets |out| to a uniformly distributed random number from
|
|
|
|
// |min_inclusive| to |max_exclusive|. Both |out| and |max_exclusive| are |len|
|
|
|
|
// words long.
|
|
|
|
//
|
|
|
|
// This function runs in time independent of the result, but |min_inclusive| and
|
|
|
|
// |max_exclusive| are public data. (Information about the range is unavoidably
|
|
|
|
// leaked by how many iterations it took to select a number.)
|
|
|
|
int bn_rand_range_words(BN_ULONG *out, BN_ULONG min_inclusive,
|
|
|
|
const BN_ULONG *max_exclusive, size_t len,
|
|
|
|
const uint8_t additional_data[32]);
|
|
|
|
|
Blind the range check for finding a Rabin-Miller witness.
Rabin-Miller requires selecting a random number from 2 to |w|-1.
This is done by picking an N-bit number and discarding out-of-range
values. This leaks information about |w|, so apply blinding. Rather than
discard bad values, adjust them to be in range.
Though not uniformly selected, these adjusted values
are still usable as Rabin-Miller checks.
Rabin-Miller is already probabilistic, so we could reach the desired
confidence levels by just suitably increasing the iteration count.
However, to align with FIPS 186-4, we use a more pessimal analysis: we
do not count the non-uniform values towards the iteration count. As a
result, this function is more complex and has more timing risk than
necessary.
We count both total iterations and uniform ones and iterate until we've
reached at least |BN_PRIME_CHECKS_BLINDED| and |iterations|,
respectively. If the latter is large enough, it will be the limiting
factor with high probability and we won't leak information.
Note this blinding does not impact most calls when picking primes
because composites are rejected early. Only the two secret primes see
extra work. So while this does make the BNTest.PrimeChecking test take
about 2x longer to run on debug mode, RSA key generation time is fine.
Another, perhaps simpler, option here would have to run
bn_rand_range_words to the full 100 count, select an arbitrary
successful try, and declare failure of the entire keygen process (as we
do already) if all tries failed. I went with the option in this CL
because I happened to come up with it first, and because the failure
probability decreases much faster. Additionally, the option in this CL
does not affect composite numbers, while the alternate would. This gives
a smaller multiplier on our entropy draw. We also continue to use the
"wasted" work for stronger assurance on primality. FIPS' numbers are
remarkably low, considering the increase has negligible cost.
Thanks to Nathan Benjamin for helping me explore the failure rate as the
target count and blinding count change.
Now we're down to the rest of RSA keygen, which will require all the
operations we've traditionally just avoided in constant-time code!
Median of 29 RSA keygens: 0m0.169s -> 0m0.298s
(Accuracy beyond 0.1s is questionable. The runs at subsequent test- and
rename-only CLs were 0m0.217s, 0m0.245s, 0m0.244s, 0m0.247s.)
Bug: 238
Change-Id: Id6406c3020f2585b86946eb17df64ac42f30ebab
Reviewed-on: https://boringssl-review.googlesource.com/25890
Commit-Queue: Adam Langley <agl@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
Reviewed-by: Adam Langley <agl@google.com>
2018-02-05 04:48:36 +00:00
|
|
|
// bn_range_secret_range behaves like |BN_rand_range_ex|, but treats
|
|
|
|
// |max_exclusive| as secret. Because of this constraint, the distribution of
|
|
|
|
// values returned is more complex.
|
|
|
|
//
|
|
|
|
// Rather than repeatedly generating values until one is in range, which would
|
|
|
|
// leak information, it generates one value. If the value is in range, it sets
|
|
|
|
// |*out_is_uniform| to one. Otherwise, it sets |*out_is_uniform| to zero,
|
|
|
|
// fixing up the value to force it in range.
|
|
|
|
//
|
|
|
|
// The subset of calls to |bn_rand_secret_range| which set |*out_is_uniform| to
|
|
|
|
// one are uniformly distributed in the target range. Calls overall are not.
|
|
|
|
// This function is intended for use in situations where the extra values are
|
|
|
|
// still usable and where the number of iterations needed to reach the target
|
|
|
|
// number of uniform outputs may be blinded for negligible probabilities of
|
|
|
|
// timing leaks.
|
|
|
|
//
|
|
|
|
// Although this function treats |max_exclusive| as secret, it treats the number
|
|
|
|
// of bits in |max_exclusive| as public.
|
|
|
|
int bn_rand_secret_range(BIGNUM *r, int *out_is_uniform, BN_ULONG min_inclusive,
|
|
|
|
const BIGNUM *max_exclusive);
|
|
|
|
|
2014-06-20 20:00:00 +01:00
|
|
|
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
|
|
|
|
const BN_ULONG *np, const BN_ULONG *n0, int num);
|
|
|
|
|
2016-07-30 03:19:46 +01:00
|
|
|
uint64_t bn_mont_n0(const BIGNUM *n);
|
2018-01-25 20:04:22 +00:00
|
|
|
|
|
|
|
// bn_mod_exp_base_2_consttime calculates r = 2**p (mod n). |p| must be larger
|
|
|
|
// than log_2(n); i.e. 2**p must be larger than |n|. |n| must be positive and
|
|
|
|
// odd. |p| and the bit width of |n| are assumed public, but |n| is otherwise
|
|
|
|
// treated as secret.
|
|
|
|
int bn_mod_exp_base_2_consttime(BIGNUM *r, unsigned p, const BIGNUM *n,
|
|
|
|
BN_CTX *ctx);
|
2016-07-30 03:19:46 +01:00
|
|
|
|
2016-02-04 07:12:08 +00:00
|
|
|
#if defined(OPENSSL_X86_64) && defined(_MSC_VER)
|
2016-02-04 07:12:37 +00:00
|
|
|
#define BN_UMULT_LOHI(low, high, a, b) ((low) = _umul128((a), (b), &(high)))
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if !defined(BN_ULLONG) && !defined(BN_UMULT_LOHI)
|
|
|
|
#error "Either BN_ULLONG or BN_UMULT_LOHI must be defined on every platform."
|
2014-06-20 20:00:00 +01:00
|
|
|
#endif
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// bn_mod_inverse_prime sets |out| to the modular inverse of |a| modulo |p|,
|
|
|
|
// computed with Fermat's Little Theorem. It returns one on success and zero on
|
|
|
|
// error. If |mont_p| is NULL, one will be computed temporarily.
|
2016-12-17 19:27:16 +00:00
|
|
|
int bn_mod_inverse_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
|
|
|
|
BN_CTX *ctx, const BN_MONT_CTX *mont_p);
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// bn_mod_inverse_secret_prime behaves like |bn_mod_inverse_prime| but uses
|
|
|
|
// |BN_mod_exp_mont_consttime| instead of |BN_mod_exp_mont| in hopes of
|
|
|
|
// protecting the exponent.
|
2016-12-17 19:27:16 +00:00
|
|
|
int bn_mod_inverse_secret_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
|
|
|
|
BN_CTX *ctx, const BN_MONT_CTX *mont_p);
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// bn_jacobi returns the Jacobi symbol of |a| and |b| (which is -1, 0 or 1), or
|
|
|
|
// -2 on error.
|
2016-12-09 02:55:39 +00:00
|
|
|
int bn_jacobi(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
|
|
|
|
|
2017-11-12 10:03:24 +00:00
|
|
|
// bn_is_bit_set_words returns one if bit |bit| is set in |a| and zero
|
|
|
|
// otherwise.
|
|
|
|
int bn_is_bit_set_words(const BN_ULONG *a, size_t num, unsigned bit);
|
|
|
|
|
2018-01-19 14:37:13 +00:00
|
|
|
// bn_one_to_montgomery sets |r| to one in Montgomery form. It returns one on
|
|
|
|
// success and zero on error. This function treats the bit width of the modulus
|
|
|
|
// as public.
|
|
|
|
int bn_one_to_montgomery(BIGNUM *r, const BN_MONT_CTX *mont, BN_CTX *ctx);
|
|
|
|
|
2018-01-23 21:17:55 +00:00
|
|
|
// bn_less_than_montgomery_R returns one if |bn| is less than the Montgomery R
|
|
|
|
// value for |mont| and zero otherwise.
|
|
|
|
int bn_less_than_montgomery_R(const BIGNUM *bn, const BN_MONT_CTX *mont);
|
|
|
|
|
2018-02-04 01:32:44 +00:00
|
|
|
// bn_mod_u16_consttime returns |bn| mod |d|, ignoring |bn|'s sign bit. It runs
|
|
|
|
// in time independent of the value of |bn|, but it treats |d| as public.
|
|
|
|
OPENSSL_EXPORT uint16_t bn_mod_u16_consttime(const BIGNUM *bn, uint16_t d);
|
|
|
|
|
2018-02-06 23:32:30 +00:00
|
|
|
// bn_odd_number_is_obviously_composite returns one if |bn| is divisible by one
|
|
|
|
// of the first several odd primes and zero otherwise.
|
|
|
|
int bn_odd_number_is_obviously_composite(const BIGNUM *bn);
|
|
|
|
|
2018-02-04 03:39:35 +00:00
|
|
|
// bn_rshift_secret_shift behaves like |BN_rshift| but runs in time independent
|
|
|
|
// of both |a| and |n|.
|
|
|
|
OPENSSL_EXPORT int bn_rshift_secret_shift(BIGNUM *r, const BIGNUM *a,
|
|
|
|
unsigned n, BN_CTX *ctx);
|
|
|
|
|
2014-06-20 20:00:00 +01:00
|
|
|
|
2018-02-06 23:56:10 +00:00
|
|
|
// Constant-time non-modular arithmetic.
|
2018-01-24 17:10:56 +00:00
|
|
|
//
|
|
|
|
// The following functions implement non-modular arithmetic in constant-time
|
|
|
|
// and pessimally set |r->width| to the largest possible word size.
|
|
|
|
//
|
|
|
|
// Note this means that, e.g., repeatedly multiplying by one will cause widths
|
|
|
|
// to increase without bound. The corresponding public API functions minimize
|
|
|
|
// their outputs to avoid regressing calculator consumers.
|
|
|
|
|
2018-02-06 23:56:10 +00:00
|
|
|
// bn_uadd_consttime behaves like |BN_uadd|, but it pessimally sets
|
2018-01-25 20:01:39 +00:00
|
|
|
// |r->width| = |a->width| + |b->width| + 1.
|
2018-02-06 23:56:10 +00:00
|
|
|
int bn_uadd_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
|
2018-01-25 20:01:39 +00:00
|
|
|
|
2018-02-06 23:56:10 +00:00
|
|
|
// bn_usub_consttime behaves like |BN_usub|, but it pessimally sets
|
2018-02-04 00:43:33 +00:00
|
|
|
// |r->width| = |a->width|.
|
2018-02-06 23:56:10 +00:00
|
|
|
int bn_usub_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
|
2018-02-04 00:43:33 +00:00
|
|
|
|
2018-02-06 23:56:10 +00:00
|
|
|
// bn_mul_consttime behaves like |BN_mul|, but it rejects negative inputs and
|
2018-01-24 17:10:56 +00:00
|
|
|
// pessimally sets |r->width| to |a->width| + |b->width|, to avoid leaking
|
|
|
|
// information about |a| and |b|.
|
2018-02-06 23:56:10 +00:00
|
|
|
int bn_mul_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
|
2018-01-24 17:10:56 +00:00
|
|
|
|
2018-02-06 23:56:10 +00:00
|
|
|
// bn_sqrt_consttime behaves like |BN_sqrt|, but it pessimally sets |r->width|
|
|
|
|
// to 2*|a->width|, to avoid leaking information about |a| and |b|.
|
|
|
|
int bn_sqr_consttime(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx);
|
2018-01-24 17:10:56 +00:00
|
|
|
|
|
|
|
|
Make BN_mod_*_quick constant-time.
As the EC code will ultimately want to use these in "words" form by way
of EC_FELEM, and because it's much easier, I've implement these as
low-level words-based functions that require all inputs have the same
width. The BIGNUM versions which RSA and, for now, EC calls are
implemented on top of that.
Unfortunately, doing such things in constant-time and accounting for
undersized inputs requires some scratch space, and these functions don't
take BN_CTX. So I've added internal bn_mod_*_quick_ctx functions that
take a BN_CTX and the old functions now allocate a bit unnecessarily.
RSA only needs lshift (for BN_MONT_CTX) and sub (for CRT), but the
generic EC code wants add as well.
The generic EC code isn't even remotely constant-time, and I hope to
ultimately use stack-allocated EC_FELEMs, so I've made the actual
implementations here implemented in "words", which is much simpler
anyway due to not having to take care of widths.
I've also gone ahead and switched the EC code to these functions,
largely as a test of their performance (an earlier iteration made the EC
code noticeably slower). These operations are otherwise not
performance-critical in RSA.
The conversion from BIGNUM to BIGNUM+BN_CTX should be dropped by the
static linker already, and the unused BIGNUM+BN_CTX functions will fall
off when EC_FELEM happens.
Update-Note: BN_mod_*_quick bounce on malloc a bit now, but they're not
really used externally. The one caller I found was wpa_supplicant
which bounces on malloc already. They appear to be implementing
compressed coordinates by hand? We may be able to convince them to
call EC_POINT_set_compressed_coordinates_GFp.
Bug: 233, 236
Change-Id: I2bf361e9c089e0211b97d95523dbc06f1168e12b
Reviewed-on: https://boringssl-review.googlesource.com/25261
Commit-Queue: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
Reviewed-by: Adam Langley <agl@google.com>
2018-01-24 20:29:00 +00:00
|
|
|
// Constant-time modular arithmetic.
|
|
|
|
//
|
2018-02-06 23:56:10 +00:00
|
|
|
// The following functions implement basic constant-time modular arithmetic.
|
Make BN_mod_*_quick constant-time.
As the EC code will ultimately want to use these in "words" form by way
of EC_FELEM, and because it's much easier, I've implement these as
low-level words-based functions that require all inputs have the same
width. The BIGNUM versions which RSA and, for now, EC calls are
implemented on top of that.
Unfortunately, doing such things in constant-time and accounting for
undersized inputs requires some scratch space, and these functions don't
take BN_CTX. So I've added internal bn_mod_*_quick_ctx functions that
take a BN_CTX and the old functions now allocate a bit unnecessarily.
RSA only needs lshift (for BN_MONT_CTX) and sub (for CRT), but the
generic EC code wants add as well.
The generic EC code isn't even remotely constant-time, and I hope to
ultimately use stack-allocated EC_FELEMs, so I've made the actual
implementations here implemented in "words", which is much simpler
anyway due to not having to take care of widths.
I've also gone ahead and switched the EC code to these functions,
largely as a test of their performance (an earlier iteration made the EC
code noticeably slower). These operations are otherwise not
performance-critical in RSA.
The conversion from BIGNUM to BIGNUM+BN_CTX should be dropped by the
static linker already, and the unused BIGNUM+BN_CTX functions will fall
off when EC_FELEM happens.
Update-Note: BN_mod_*_quick bounce on malloc a bit now, but they're not
really used externally. The one caller I found was wpa_supplicant
which bounces on malloc already. They appear to be implementing
compressed coordinates by hand? We may be able to convince them to
call EC_POINT_set_compressed_coordinates_GFp.
Bug: 233, 236
Change-Id: I2bf361e9c089e0211b97d95523dbc06f1168e12b
Reviewed-on: https://boringssl-review.googlesource.com/25261
Commit-Queue: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
Reviewed-by: Adam Langley <agl@google.com>
2018-01-24 20:29:00 +00:00
|
|
|
|
2018-02-06 23:56:10 +00:00
|
|
|
// bn_mod_add_consttime acts like |BN_mod_add_quick| but takes a |BN_CTX|.
|
|
|
|
int bn_mod_add_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
|
Make BN_mod_*_quick constant-time.
As the EC code will ultimately want to use these in "words" form by way
of EC_FELEM, and because it's much easier, I've implement these as
low-level words-based functions that require all inputs have the same
width. The BIGNUM versions which RSA and, for now, EC calls are
implemented on top of that.
Unfortunately, doing such things in constant-time and accounting for
undersized inputs requires some scratch space, and these functions don't
take BN_CTX. So I've added internal bn_mod_*_quick_ctx functions that
take a BN_CTX and the old functions now allocate a bit unnecessarily.
RSA only needs lshift (for BN_MONT_CTX) and sub (for CRT), but the
generic EC code wants add as well.
The generic EC code isn't even remotely constant-time, and I hope to
ultimately use stack-allocated EC_FELEMs, so I've made the actual
implementations here implemented in "words", which is much simpler
anyway due to not having to take care of widths.
I've also gone ahead and switched the EC code to these functions,
largely as a test of their performance (an earlier iteration made the EC
code noticeably slower). These operations are otherwise not
performance-critical in RSA.
The conversion from BIGNUM to BIGNUM+BN_CTX should be dropped by the
static linker already, and the unused BIGNUM+BN_CTX functions will fall
off when EC_FELEM happens.
Update-Note: BN_mod_*_quick bounce on malloc a bit now, but they're not
really used externally. The one caller I found was wpa_supplicant
which bounces on malloc already. They appear to be implementing
compressed coordinates by hand? We may be able to convince them to
call EC_POINT_set_compressed_coordinates_GFp.
Bug: 233, 236
Change-Id: I2bf361e9c089e0211b97d95523dbc06f1168e12b
Reviewed-on: https://boringssl-review.googlesource.com/25261
Commit-Queue: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
Reviewed-by: Adam Langley <agl@google.com>
2018-01-24 20:29:00 +00:00
|
|
|
const BIGNUM *m, BN_CTX *ctx);
|
|
|
|
|
2018-02-06 23:56:10 +00:00
|
|
|
// bn_mod_sub_consttime acts like |BN_mod_sub_quick| but takes a |BN_CTX|.
|
|
|
|
int bn_mod_sub_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
|
Make BN_mod_*_quick constant-time.
As the EC code will ultimately want to use these in "words" form by way
of EC_FELEM, and because it's much easier, I've implement these as
low-level words-based functions that require all inputs have the same
width. The BIGNUM versions which RSA and, for now, EC calls are
implemented on top of that.
Unfortunately, doing such things in constant-time and accounting for
undersized inputs requires some scratch space, and these functions don't
take BN_CTX. So I've added internal bn_mod_*_quick_ctx functions that
take a BN_CTX and the old functions now allocate a bit unnecessarily.
RSA only needs lshift (for BN_MONT_CTX) and sub (for CRT), but the
generic EC code wants add as well.
The generic EC code isn't even remotely constant-time, and I hope to
ultimately use stack-allocated EC_FELEMs, so I've made the actual
implementations here implemented in "words", which is much simpler
anyway due to not having to take care of widths.
I've also gone ahead and switched the EC code to these functions,
largely as a test of their performance (an earlier iteration made the EC
code noticeably slower). These operations are otherwise not
performance-critical in RSA.
The conversion from BIGNUM to BIGNUM+BN_CTX should be dropped by the
static linker already, and the unused BIGNUM+BN_CTX functions will fall
off when EC_FELEM happens.
Update-Note: BN_mod_*_quick bounce on malloc a bit now, but they're not
really used externally. The one caller I found was wpa_supplicant
which bounces on malloc already. They appear to be implementing
compressed coordinates by hand? We may be able to convince them to
call EC_POINT_set_compressed_coordinates_GFp.
Bug: 233, 236
Change-Id: I2bf361e9c089e0211b97d95523dbc06f1168e12b
Reviewed-on: https://boringssl-review.googlesource.com/25261
Commit-Queue: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
Reviewed-by: Adam Langley <agl@google.com>
2018-01-24 20:29:00 +00:00
|
|
|
const BIGNUM *m, BN_CTX *ctx);
|
|
|
|
|
2018-02-06 23:56:10 +00:00
|
|
|
// bn_mod_lshift1_consttime acts like |BN_mod_lshift1_quick| but takes a
|
Make BN_mod_*_quick constant-time.
As the EC code will ultimately want to use these in "words" form by way
of EC_FELEM, and because it's much easier, I've implement these as
low-level words-based functions that require all inputs have the same
width. The BIGNUM versions which RSA and, for now, EC calls are
implemented on top of that.
Unfortunately, doing such things in constant-time and accounting for
undersized inputs requires some scratch space, and these functions don't
take BN_CTX. So I've added internal bn_mod_*_quick_ctx functions that
take a BN_CTX and the old functions now allocate a bit unnecessarily.
RSA only needs lshift (for BN_MONT_CTX) and sub (for CRT), but the
generic EC code wants add as well.
The generic EC code isn't even remotely constant-time, and I hope to
ultimately use stack-allocated EC_FELEMs, so I've made the actual
implementations here implemented in "words", which is much simpler
anyway due to not having to take care of widths.
I've also gone ahead and switched the EC code to these functions,
largely as a test of their performance (an earlier iteration made the EC
code noticeably slower). These operations are otherwise not
performance-critical in RSA.
The conversion from BIGNUM to BIGNUM+BN_CTX should be dropped by the
static linker already, and the unused BIGNUM+BN_CTX functions will fall
off when EC_FELEM happens.
Update-Note: BN_mod_*_quick bounce on malloc a bit now, but they're not
really used externally. The one caller I found was wpa_supplicant
which bounces on malloc already. They appear to be implementing
compressed coordinates by hand? We may be able to convince them to
call EC_POINT_set_compressed_coordinates_GFp.
Bug: 233, 236
Change-Id: I2bf361e9c089e0211b97d95523dbc06f1168e12b
Reviewed-on: https://boringssl-review.googlesource.com/25261
Commit-Queue: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
Reviewed-by: Adam Langley <agl@google.com>
2018-01-24 20:29:00 +00:00
|
|
|
// |BN_CTX|.
|
2018-02-06 23:56:10 +00:00
|
|
|
int bn_mod_lshift1_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *m,
|
Make BN_mod_*_quick constant-time.
As the EC code will ultimately want to use these in "words" form by way
of EC_FELEM, and because it's much easier, I've implement these as
low-level words-based functions that require all inputs have the same
width. The BIGNUM versions which RSA and, for now, EC calls are
implemented on top of that.
Unfortunately, doing such things in constant-time and accounting for
undersized inputs requires some scratch space, and these functions don't
take BN_CTX. So I've added internal bn_mod_*_quick_ctx functions that
take a BN_CTX and the old functions now allocate a bit unnecessarily.
RSA only needs lshift (for BN_MONT_CTX) and sub (for CRT), but the
generic EC code wants add as well.
The generic EC code isn't even remotely constant-time, and I hope to
ultimately use stack-allocated EC_FELEMs, so I've made the actual
implementations here implemented in "words", which is much simpler
anyway due to not having to take care of widths.
I've also gone ahead and switched the EC code to these functions,
largely as a test of their performance (an earlier iteration made the EC
code noticeably slower). These operations are otherwise not
performance-critical in RSA.
The conversion from BIGNUM to BIGNUM+BN_CTX should be dropped by the
static linker already, and the unused BIGNUM+BN_CTX functions will fall
off when EC_FELEM happens.
Update-Note: BN_mod_*_quick bounce on malloc a bit now, but they're not
really used externally. The one caller I found was wpa_supplicant
which bounces on malloc already. They appear to be implementing
compressed coordinates by hand? We may be able to convince them to
call EC_POINT_set_compressed_coordinates_GFp.
Bug: 233, 236
Change-Id: I2bf361e9c089e0211b97d95523dbc06f1168e12b
Reviewed-on: https://boringssl-review.googlesource.com/25261
Commit-Queue: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
Reviewed-by: Adam Langley <agl@google.com>
2018-01-24 20:29:00 +00:00
|
|
|
BN_CTX *ctx);
|
|
|
|
|
2018-02-06 23:56:10 +00:00
|
|
|
// bn_mod_lshift_consttime acts like |BN_mod_lshift_quick| but takes a |BN_CTX|.
|
|
|
|
int bn_mod_lshift_consttime(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m,
|
Make BN_mod_*_quick constant-time.
As the EC code will ultimately want to use these in "words" form by way
of EC_FELEM, and because it's much easier, I've implement these as
low-level words-based functions that require all inputs have the same
width. The BIGNUM versions which RSA and, for now, EC calls are
implemented on top of that.
Unfortunately, doing such things in constant-time and accounting for
undersized inputs requires some scratch space, and these functions don't
take BN_CTX. So I've added internal bn_mod_*_quick_ctx functions that
take a BN_CTX and the old functions now allocate a bit unnecessarily.
RSA only needs lshift (for BN_MONT_CTX) and sub (for CRT), but the
generic EC code wants add as well.
The generic EC code isn't even remotely constant-time, and I hope to
ultimately use stack-allocated EC_FELEMs, so I've made the actual
implementations here implemented in "words", which is much simpler
anyway due to not having to take care of widths.
I've also gone ahead and switched the EC code to these functions,
largely as a test of their performance (an earlier iteration made the EC
code noticeably slower). These operations are otherwise not
performance-critical in RSA.
The conversion from BIGNUM to BIGNUM+BN_CTX should be dropped by the
static linker already, and the unused BIGNUM+BN_CTX functions will fall
off when EC_FELEM happens.
Update-Note: BN_mod_*_quick bounce on malloc a bit now, but they're not
really used externally. The one caller I found was wpa_supplicant
which bounces on malloc already. They appear to be implementing
compressed coordinates by hand? We may be able to convince them to
call EC_POINT_set_compressed_coordinates_GFp.
Bug: 233, 236
Change-Id: I2bf361e9c089e0211b97d95523dbc06f1168e12b
Reviewed-on: https://boringssl-review.googlesource.com/25261
Commit-Queue: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
Reviewed-by: Adam Langley <agl@google.com>
2018-01-24 20:29:00 +00:00
|
|
|
BN_CTX *ctx);
|
|
|
|
|
|
|
|
|
2017-11-12 03:41:17 +00:00
|
|
|
// Low-level operations for small numbers.
|
|
|
|
//
|
|
|
|
// The following functions implement algorithms suitable for use with scalars
|
|
|
|
// and field elements in elliptic curves. They rely on the number being small
|
|
|
|
// both to stack-allocate various temporaries and because they do not implement
|
|
|
|
// optimizations useful for the larger values used in RSA.
|
|
|
|
|
|
|
|
// BN_SMALL_MAX_WORDS is the largest size input these functions handle. This
|
|
|
|
// limit allows temporaries to be more easily stack-allocated. This limit is set
|
|
|
|
// to accommodate P-521.
|
|
|
|
#if defined(OPENSSL_32_BIT)
|
|
|
|
#define BN_SMALL_MAX_WORDS 17
|
|
|
|
#else
|
|
|
|
#define BN_SMALL_MAX_WORDS 9
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// bn_mul_small sets |r| to |a|*|b|. |num_r| must be |num_a| + |num_b|. |r| may
|
|
|
|
// not alias with |a| or |b|. This function returns one on success and zero if
|
|
|
|
// lengths are inconsistent.
|
|
|
|
int bn_mul_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a,
|
|
|
|
const BN_ULONG *b, size_t num_b);
|
|
|
|
|
|
|
|
// bn_sqr_small sets |r| to |a|^2. |num_a| must be at most |BN_SMALL_MAX_WORDS|.
|
|
|
|
// |num_r| must be |num_a|*2. |r| and |a| may not alias. This function returns
|
|
|
|
// one on success and zero on programmer error.
|
|
|
|
int bn_sqr_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a);
|
|
|
|
|
2017-11-12 05:58:13 +00:00
|
|
|
// In the following functions, the modulus must be at most |BN_SMALL_MAX_WORDS|
|
|
|
|
// words long.
|
|
|
|
|
|
|
|
// bn_to_montgomery_small sets |r| to |a| translated to the Montgomery domain.
|
|
|
|
// |num_a| and |num_r| must be the length of the modulus, which is
|
|
|
|
// |mont->N.top|. |a| must be fully reduced. This function returns one on
|
|
|
|
// success and zero if lengths are inconsistent. |r| and |a| may alias.
|
|
|
|
int bn_to_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
|
|
|
|
size_t num_a, const BN_MONT_CTX *mont);
|
|
|
|
|
|
|
|
// bn_from_montgomery_small sets |r| to |a| translated out of the Montgomery
|
|
|
|
// domain. |num_r| must be the length of the modulus, which is |mont->N.top|.
|
|
|
|
// |a| must be at most |mont->N.top| * R and |num_a| must be at most 2 *
|
|
|
|
// |mont->N.top|. This function returns one on success and zero if lengths are
|
|
|
|
// inconsistent. |r| and |a| may alias.
|
|
|
|
int bn_from_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
|
|
|
|
size_t num_a, const BN_MONT_CTX *mont);
|
|
|
|
|
2018-01-19 14:37:13 +00:00
|
|
|
// bn_one_to_montgomery_small sets |r| to one in Montgomery form. It returns one
|
|
|
|
// on success and zero on error. |num_r| must be the length of the modulus,
|
|
|
|
// which is |mont->N.top|. This function treats the bit width of the modulus as
|
|
|
|
// public.
|
|
|
|
int bn_one_to_montgomery_small(BN_ULONG *r, size_t num_r,
|
|
|
|
const BN_MONT_CTX *mont);
|
|
|
|
|
2017-11-12 05:58:13 +00:00
|
|
|
// bn_mod_mul_montgomery_small sets |r| to |a| * |b| mod |mont->N|. Both inputs
|
|
|
|
// and outputs are in the Montgomery domain. |num_r| must be the length of the
|
|
|
|
// modulus, which is |mont->N.top|. This function returns one on success and
|
|
|
|
// zero on internal error or inconsistent lengths. Any two of |r|, |a|, and |b|
|
|
|
|
// may alias.
|
|
|
|
//
|
|
|
|
// This function requires |a| * |b| < N * R, where N is the modulus and R is the
|
|
|
|
// Montgomery divisor, 2^(N.top * BN_BITS2). This should generally be satisfied
|
|
|
|
// by ensuring |a| and |b| are fully reduced, however ECDSA has one computation
|
|
|
|
// which requires the more general bound.
|
|
|
|
int bn_mod_mul_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
|
|
|
|
size_t num_a, const BN_ULONG *b, size_t num_b,
|
|
|
|
const BN_MONT_CTX *mont);
|
|
|
|
|
2017-11-12 10:03:24 +00:00
|
|
|
// bn_mod_exp_mont_small sets |r| to |a|^|p| mod |mont->N|. It returns one on
|
|
|
|
// success and zero on programmer or internal error. Both inputs and outputs are
|
|
|
|
// in the Montgomery domain. |num_r| and |num_a| must be |mont->N.top|, which
|
|
|
|
// must be at most |BN_SMALL_MAX_WORDS|. |a| must be fully-reduced. This
|
|
|
|
// function runs in time independent of |a|, but |p| and |mont->N| are public
|
|
|
|
// values.
|
|
|
|
//
|
|
|
|
// Note this function differs from |BN_mod_exp_mont| which uses Montgomery
|
|
|
|
// reduction but takes input and output outside the Montgomery domain. Combine
|
|
|
|
// this function with |bn_from_montgomery_small| and |bn_to_montgomery_small|
|
|
|
|
// if necessary.
|
|
|
|
int bn_mod_exp_mont_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
|
|
|
|
size_t num_a, const BN_ULONG *p, size_t num_p,
|
|
|
|
const BN_MONT_CTX *mont);
|
|
|
|
|
|
|
|
// bn_mod_inverse_prime_mont_small sets |r| to |a|^-1 mod |mont->N|. |mont->N|
|
|
|
|
// must be a prime. |num_r| and |num_a| must be |mont->N.top|, which must be at
|
|
|
|
// most |BN_SMALL_MAX_WORDS|. |a| must be fully-reduced. This function runs in
|
|
|
|
// time independent of |a|, but |mont->N| is a public value.
|
|
|
|
int bn_mod_inverse_prime_mont_small(BN_ULONG *r, size_t num_r,
|
|
|
|
const BN_ULONG *a, size_t num_a,
|
|
|
|
const BN_MONT_CTX *mont);
|
|
|
|
|
2017-11-12 03:41:17 +00:00
|
|
|
|
2014-06-20 20:00:00 +01:00
|
|
|
#if defined(__cplusplus)
|
2017-08-18 19:06:02 +01:00
|
|
|
} // extern C
|
2014-06-20 20:00:00 +01:00
|
|
|
#endif
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
#endif // OPENSSL_HEADER_BN_INTERNAL_H
|