2014-06-20 20:00:00 +01:00
|
|
|
/* Originally written by Bodo Moeller and Nils Larsch for the OpenSSL project.
|
|
|
|
* ====================================================================
|
|
|
|
* Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
*
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
*
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in
|
|
|
|
* the documentation and/or other materials provided with the
|
|
|
|
* distribution.
|
|
|
|
*
|
|
|
|
* 3. All advertising materials mentioning features or use of this
|
|
|
|
* software must display the following acknowledgment:
|
|
|
|
* "This product includes software developed by the OpenSSL Project
|
|
|
|
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
|
|
|
|
*
|
|
|
|
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
|
|
|
|
* endorse or promote products derived from this software without
|
|
|
|
* prior written permission. For written permission, please contact
|
|
|
|
* openssl-core@openssl.org.
|
|
|
|
*
|
|
|
|
* 5. Products derived from this software may not be called "OpenSSL"
|
|
|
|
* nor may "OpenSSL" appear in their names without prior written
|
|
|
|
* permission of the OpenSSL Project.
|
|
|
|
*
|
|
|
|
* 6. Redistributions of any form whatsoever must retain the following
|
|
|
|
* acknowledgment:
|
|
|
|
* "This product includes software developed by the OpenSSL Project
|
|
|
|
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
|
|
|
|
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
|
|
|
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
|
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
|
|
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
* ====================================================================
|
|
|
|
*
|
|
|
|
* This product includes cryptographic software written by Eric Young
|
|
|
|
* (eay@cryptsoft.com). This product includes software written by Tim
|
|
|
|
* Hudson (tjh@cryptsoft.com).
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
/* ====================================================================
|
|
|
|
* Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
|
|
|
|
*
|
|
|
|
* Portions of the attached software ("Contribution") are developed by
|
|
|
|
* SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project.
|
|
|
|
*
|
|
|
|
* The Contribution is licensed pursuant to the OpenSSL open source
|
|
|
|
* license provided above.
|
|
|
|
*
|
|
|
|
* The elliptic curve binary polynomial software is originally written by
|
|
|
|
* Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems
|
|
|
|
* Laboratories. */
|
|
|
|
|
|
|
|
#include <openssl/ec.h>
|
|
|
|
|
|
|
|
#include <openssl/bn.h>
|
|
|
|
#include <openssl/err.h>
|
|
|
|
#include <openssl/mem.h>
|
|
|
|
|
2017-05-02 22:25:39 +01:00
|
|
|
#include "../bn/internal.h"
|
|
|
|
#include "../delocate.h"
|
2014-06-20 20:00:00 +01:00
|
|
|
#include "internal.h"
|
|
|
|
|
|
|
|
|
|
|
|
int ec_GFp_mont_group_init(EC_GROUP *group) {
|
|
|
|
int ok;
|
|
|
|
|
|
|
|
ok = ec_GFp_simple_group_init(group);
|
2015-04-08 22:11:16 +01:00
|
|
|
group->mont = NULL;
|
2014-06-20 20:00:00 +01:00
|
|
|
return ok;
|
|
|
|
}
|
|
|
|
|
|
|
|
void ec_GFp_mont_group_finish(EC_GROUP *group) {
|
2015-04-22 20:08:19 +01:00
|
|
|
BN_MONT_CTX_free(group->mont);
|
|
|
|
group->mont = NULL;
|
2014-06-20 20:00:00 +01:00
|
|
|
ec_GFp_simple_group_finish(group);
|
|
|
|
}
|
|
|
|
|
|
|
|
int ec_GFp_mont_group_set_curve(EC_GROUP *group, const BIGNUM *p,
|
|
|
|
const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
|
|
|
|
BN_CTX *new_ctx = NULL;
|
|
|
|
int ret = 0;
|
|
|
|
|
2015-04-22 20:08:19 +01:00
|
|
|
BN_MONT_CTX_free(group->mont);
|
|
|
|
group->mont = NULL;
|
2014-06-20 20:00:00 +01:00
|
|
|
|
|
|
|
if (ctx == NULL) {
|
|
|
|
ctx = new_ctx = BN_CTX_new();
|
2015-02-11 06:17:18 +00:00
|
|
|
if (ctx == NULL) {
|
2014-06-20 20:00:00 +01:00
|
|
|
return 0;
|
2015-02-11 06:17:18 +00:00
|
|
|
}
|
2014-06-20 20:00:00 +01:00
|
|
|
}
|
|
|
|
|
2018-01-23 22:03:26 +00:00
|
|
|
group->mont = BN_MONT_CTX_new_for_modulus(p, ctx);
|
|
|
|
if (group->mont == NULL) {
|
2015-06-29 05:28:17 +01:00
|
|
|
OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
|
2014-06-20 20:00:00 +01:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = ec_GFp_simple_group_set_curve(group, p, a, b, ctx);
|
|
|
|
|
|
|
|
if (!ret) {
|
2015-04-08 22:11:16 +01:00
|
|
|
BN_MONT_CTX_free(group->mont);
|
|
|
|
group->mont = NULL;
|
2014-06-20 20:00:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
err:
|
2015-04-22 20:08:19 +01:00
|
|
|
BN_CTX_free(new_ctx);
|
2014-06-20 20:00:00 +01:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
Add EC_FELEM for EC_POINTs and related temporaries.
This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used
for EC_POINT's representation in the generic EC_METHOD, as well as
random operations on tuned EC_METHODs that still are implemented
genericly.
Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific
to the EC_METHOD, analogous to how the old values were BIGNUMs but may
or may not have been in Montgomery form. This is kind of a nuisance, but
no more than before. (If p224-64.c were easily convertable to Montgomery
form, we could say |EC_FELEM| is always in Montgomery form. If we
exposed the internal add and double implementations in each of the
curves, we could give |EC_POINT| an |EC_METHOD|-specific representation
and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this
for later.)
The generic add and doubling formulas are aligned with the formulas
proved in fiat-crypto. Those only applied to a = -3, so I've proved a
generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case
someone uses a custom curve. The new formulas are verified,
constant-time, and swap a multiply for a square. As expressed in
fiat-crypto they do use more temporaries, but this seems to be fine with
stack-allocated EC_FELEMs. (We can try to help the compiler later,
but benchamrks below suggest this isn't necessary.)
Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the
bounds in the type system and, in particular, that the width is correct,
which will make it easier to select a point in constant-time in the
future. (Indeed the old code did not always have the correct width. Its
point formula involved halving and implemented this in variable time and
variable width.)
Before:
Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec)
Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec)
Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec)
Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec)
Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec)
Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec)
Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec)
After:
Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%]
Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%]
Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%]
Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%]
Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%]
Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%]
Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%]
This more than pays for removing points_make_affine previously and even
speeds up ECDH P-256 slightly. (The point-on-curve check uses the
generic code.)
Next is to push the stack-allocating up to ec_wNAF_mul, followed by a
constant-time single-point multiplication.
Bug: 239
Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c
Reviewed-on: https://boringssl-review.googlesource.com/27668
Reviewed-by: Adam Langley <agl@google.com>
2018-04-23 02:39:34 +01:00
|
|
|
static void ec_GFp_mont_felem_to_montgomery(const EC_GROUP *group,
|
|
|
|
EC_FELEM *out, const EC_FELEM *in) {
|
|
|
|
bn_to_montgomery_small(out->words, in->words, group->field.width,
|
|
|
|
group->mont);
|
|
|
|
}
|
2014-06-20 20:00:00 +01:00
|
|
|
|
Add EC_FELEM for EC_POINTs and related temporaries.
This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used
for EC_POINT's representation in the generic EC_METHOD, as well as
random operations on tuned EC_METHODs that still are implemented
genericly.
Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific
to the EC_METHOD, analogous to how the old values were BIGNUMs but may
or may not have been in Montgomery form. This is kind of a nuisance, but
no more than before. (If p224-64.c were easily convertable to Montgomery
form, we could say |EC_FELEM| is always in Montgomery form. If we
exposed the internal add and double implementations in each of the
curves, we could give |EC_POINT| an |EC_METHOD|-specific representation
and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this
for later.)
The generic add and doubling formulas are aligned with the formulas
proved in fiat-crypto. Those only applied to a = -3, so I've proved a
generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case
someone uses a custom curve. The new formulas are verified,
constant-time, and swap a multiply for a square. As expressed in
fiat-crypto they do use more temporaries, but this seems to be fine with
stack-allocated EC_FELEMs. (We can try to help the compiler later,
but benchamrks below suggest this isn't necessary.)
Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the
bounds in the type system and, in particular, that the width is correct,
which will make it easier to select a point in constant-time in the
future. (Indeed the old code did not always have the correct width. Its
point formula involved halving and implemented this in variable time and
variable width.)
Before:
Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec)
Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec)
Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec)
Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec)
Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec)
Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec)
Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec)
After:
Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%]
Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%]
Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%]
Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%]
Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%]
Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%]
Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%]
This more than pays for removing points_make_affine previously and even
speeds up ECDH P-256 slightly. (The point-on-curve check uses the
generic code.)
Next is to push the stack-allocating up to ec_wNAF_mul, followed by a
constant-time single-point multiplication.
Bug: 239
Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c
Reviewed-on: https://boringssl-review.googlesource.com/27668
Reviewed-by: Adam Langley <agl@google.com>
2018-04-23 02:39:34 +01:00
|
|
|
static void ec_GFp_mont_felem_from_montgomery(const EC_GROUP *group,
|
|
|
|
EC_FELEM *out,
|
|
|
|
const EC_FELEM *in) {
|
|
|
|
bn_from_montgomery_small(out->words, in->words, group->field.width,
|
|
|
|
group->mont);
|
2014-06-20 20:00:00 +01:00
|
|
|
}
|
|
|
|
|
Add EC_FELEM for EC_POINTs and related temporaries.
This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used
for EC_POINT's representation in the generic EC_METHOD, as well as
random operations on tuned EC_METHODs that still are implemented
genericly.
Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific
to the EC_METHOD, analogous to how the old values were BIGNUMs but may
or may not have been in Montgomery form. This is kind of a nuisance, but
no more than before. (If p224-64.c were easily convertable to Montgomery
form, we could say |EC_FELEM| is always in Montgomery form. If we
exposed the internal add and double implementations in each of the
curves, we could give |EC_POINT| an |EC_METHOD|-specific representation
and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this
for later.)
The generic add and doubling formulas are aligned with the formulas
proved in fiat-crypto. Those only applied to a = -3, so I've proved a
generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case
someone uses a custom curve. The new formulas are verified,
constant-time, and swap a multiply for a square. As expressed in
fiat-crypto they do use more temporaries, but this seems to be fine with
stack-allocated EC_FELEMs. (We can try to help the compiler later,
but benchamrks below suggest this isn't necessary.)
Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the
bounds in the type system and, in particular, that the width is correct,
which will make it easier to select a point in constant-time in the
future. (Indeed the old code did not always have the correct width. Its
point formula involved halving and implemented this in variable time and
variable width.)
Before:
Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec)
Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec)
Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec)
Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec)
Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec)
Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec)
Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec)
After:
Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%]
Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%]
Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%]
Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%]
Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%]
Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%]
Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%]
This more than pays for removing points_make_affine previously and even
speeds up ECDH P-256 slightly. (The point-on-curve check uses the
generic code.)
Next is to push the stack-allocating up to ec_wNAF_mul, followed by a
constant-time single-point multiplication.
Bug: 239
Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c
Reviewed-on: https://boringssl-review.googlesource.com/27668
Reviewed-by: Adam Langley <agl@google.com>
2018-04-23 02:39:34 +01:00
|
|
|
static void ec_GFp_mont_felem_inv(const EC_GROUP *group, EC_FELEM *out,
|
|
|
|
const EC_FELEM *a) {
|
|
|
|
bn_mod_inverse_prime_mont_small(out->words, a->words, group->field.width,
|
|
|
|
group->mont);
|
|
|
|
}
|
2014-06-20 20:00:00 +01:00
|
|
|
|
Add EC_FELEM for EC_POINTs and related temporaries.
This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used
for EC_POINT's representation in the generic EC_METHOD, as well as
random operations on tuned EC_METHODs that still are implemented
genericly.
Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific
to the EC_METHOD, analogous to how the old values were BIGNUMs but may
or may not have been in Montgomery form. This is kind of a nuisance, but
no more than before. (If p224-64.c were easily convertable to Montgomery
form, we could say |EC_FELEM| is always in Montgomery form. If we
exposed the internal add and double implementations in each of the
curves, we could give |EC_POINT| an |EC_METHOD|-specific representation
and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this
for later.)
The generic add and doubling formulas are aligned with the formulas
proved in fiat-crypto. Those only applied to a = -3, so I've proved a
generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case
someone uses a custom curve. The new formulas are verified,
constant-time, and swap a multiply for a square. As expressed in
fiat-crypto they do use more temporaries, but this seems to be fine with
stack-allocated EC_FELEMs. (We can try to help the compiler later,
but benchamrks below suggest this isn't necessary.)
Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the
bounds in the type system and, in particular, that the width is correct,
which will make it easier to select a point in constant-time in the
future. (Indeed the old code did not always have the correct width. Its
point formula involved halving and implemented this in variable time and
variable width.)
Before:
Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec)
Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec)
Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec)
Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec)
Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec)
Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec)
Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec)
After:
Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%]
Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%]
Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%]
Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%]
Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%]
Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%]
Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%]
This more than pays for removing points_make_affine previously and even
speeds up ECDH P-256 slightly. (The point-on-curve check uses the
generic code.)
Next is to push the stack-allocating up to ec_wNAF_mul, followed by a
constant-time single-point multiplication.
Bug: 239
Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c
Reviewed-on: https://boringssl-review.googlesource.com/27668
Reviewed-by: Adam Langley <agl@google.com>
2018-04-23 02:39:34 +01:00
|
|
|
void ec_GFp_mont_felem_mul(const EC_GROUP *group, EC_FELEM *r,
|
|
|
|
const EC_FELEM *a, const EC_FELEM *b) {
|
|
|
|
bn_mod_mul_montgomery_small(r->words, a->words, b->words, group->field.width,
|
|
|
|
group->mont);
|
2014-06-20 20:00:00 +01:00
|
|
|
}
|
|
|
|
|
Add EC_FELEM for EC_POINTs and related temporaries.
This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used
for EC_POINT's representation in the generic EC_METHOD, as well as
random operations on tuned EC_METHODs that still are implemented
genericly.
Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific
to the EC_METHOD, analogous to how the old values were BIGNUMs but may
or may not have been in Montgomery form. This is kind of a nuisance, but
no more than before. (If p224-64.c were easily convertable to Montgomery
form, we could say |EC_FELEM| is always in Montgomery form. If we
exposed the internal add and double implementations in each of the
curves, we could give |EC_POINT| an |EC_METHOD|-specific representation
and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this
for later.)
The generic add and doubling formulas are aligned with the formulas
proved in fiat-crypto. Those only applied to a = -3, so I've proved a
generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case
someone uses a custom curve. The new formulas are verified,
constant-time, and swap a multiply for a square. As expressed in
fiat-crypto they do use more temporaries, but this seems to be fine with
stack-allocated EC_FELEMs. (We can try to help the compiler later,
but benchamrks below suggest this isn't necessary.)
Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the
bounds in the type system and, in particular, that the width is correct,
which will make it easier to select a point in constant-time in the
future. (Indeed the old code did not always have the correct width. Its
point formula involved halving and implemented this in variable time and
variable width.)
Before:
Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec)
Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec)
Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec)
Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec)
Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec)
Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec)
Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec)
After:
Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%]
Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%]
Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%]
Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%]
Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%]
Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%]
Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%]
This more than pays for removing points_make_affine previously and even
speeds up ECDH P-256 slightly. (The point-on-curve check uses the
generic code.)
Next is to push the stack-allocating up to ec_wNAF_mul, followed by a
constant-time single-point multiplication.
Bug: 239
Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c
Reviewed-on: https://boringssl-review.googlesource.com/27668
Reviewed-by: Adam Langley <agl@google.com>
2018-04-23 02:39:34 +01:00
|
|
|
void ec_GFp_mont_felem_sqr(const EC_GROUP *group, EC_FELEM *r,
|
|
|
|
const EC_FELEM *a) {
|
|
|
|
bn_mod_mul_montgomery_small(r->words, a->words, a->words, group->field.width,
|
|
|
|
group->mont);
|
|
|
|
}
|
|
|
|
|
|
|
|
int ec_GFp_mont_bignum_to_felem(const EC_GROUP *group, EC_FELEM *out,
|
|
|
|
const BIGNUM *in) {
|
2015-04-08 22:11:16 +01:00
|
|
|
if (group->mont == NULL) {
|
2015-06-29 05:28:17 +01:00
|
|
|
OPENSSL_PUT_ERROR(EC, EC_R_NOT_INITIALIZED);
|
2014-06-20 20:00:00 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
Add EC_FELEM for EC_POINTs and related temporaries.
This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used
for EC_POINT's representation in the generic EC_METHOD, as well as
random operations on tuned EC_METHODs that still are implemented
genericly.
Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific
to the EC_METHOD, analogous to how the old values were BIGNUMs but may
or may not have been in Montgomery form. This is kind of a nuisance, but
no more than before. (If p224-64.c were easily convertable to Montgomery
form, we could say |EC_FELEM| is always in Montgomery form. If we
exposed the internal add and double implementations in each of the
curves, we could give |EC_POINT| an |EC_METHOD|-specific representation
and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this
for later.)
The generic add and doubling formulas are aligned with the formulas
proved in fiat-crypto. Those only applied to a = -3, so I've proved a
generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case
someone uses a custom curve. The new formulas are verified,
constant-time, and swap a multiply for a square. As expressed in
fiat-crypto they do use more temporaries, but this seems to be fine with
stack-allocated EC_FELEMs. (We can try to help the compiler later,
but benchamrks below suggest this isn't necessary.)
Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the
bounds in the type system and, in particular, that the width is correct,
which will make it easier to select a point in constant-time in the
future. (Indeed the old code did not always have the correct width. Its
point formula involved halving and implemented this in variable time and
variable width.)
Before:
Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec)
Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec)
Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec)
Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec)
Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec)
Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec)
Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec)
After:
Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%]
Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%]
Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%]
Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%]
Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%]
Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%]
Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%]
This more than pays for removing points_make_affine previously and even
speeds up ECDH P-256 slightly. (The point-on-curve check uses the
generic code.)
Next is to push the stack-allocating up to ec_wNAF_mul, followed by a
constant-time single-point multiplication.
Bug: 239
Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c
Reviewed-on: https://boringssl-review.googlesource.com/27668
Reviewed-by: Adam Langley <agl@google.com>
2018-04-23 02:39:34 +01:00
|
|
|
if (!bn_copy_words(out->words, group->field.width, in)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
ec_GFp_mont_felem_to_montgomery(group, out, out);
|
|
|
|
return 1;
|
2014-06-20 20:00:00 +01:00
|
|
|
}
|
|
|
|
|
Add EC_FELEM for EC_POINTs and related temporaries.
This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used
for EC_POINT's representation in the generic EC_METHOD, as well as
random operations on tuned EC_METHODs that still are implemented
genericly.
Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific
to the EC_METHOD, analogous to how the old values were BIGNUMs but may
or may not have been in Montgomery form. This is kind of a nuisance, but
no more than before. (If p224-64.c were easily convertable to Montgomery
form, we could say |EC_FELEM| is always in Montgomery form. If we
exposed the internal add and double implementations in each of the
curves, we could give |EC_POINT| an |EC_METHOD|-specific representation
and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this
for later.)
The generic add and doubling formulas are aligned with the formulas
proved in fiat-crypto. Those only applied to a = -3, so I've proved a
generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case
someone uses a custom curve. The new formulas are verified,
constant-time, and swap a multiply for a square. As expressed in
fiat-crypto they do use more temporaries, but this seems to be fine with
stack-allocated EC_FELEMs. (We can try to help the compiler later,
but benchamrks below suggest this isn't necessary.)
Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the
bounds in the type system and, in particular, that the width is correct,
which will make it easier to select a point in constant-time in the
future. (Indeed the old code did not always have the correct width. Its
point formula involved halving and implemented this in variable time and
variable width.)
Before:
Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec)
Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec)
Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec)
Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec)
Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec)
Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec)
Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec)
After:
Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%]
Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%]
Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%]
Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%]
Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%]
Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%]
Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%]
This more than pays for removing points_make_affine previously and even
speeds up ECDH P-256 slightly. (The point-on-curve check uses the
generic code.)
Next is to push the stack-allocating up to ec_wNAF_mul, followed by a
constant-time single-point multiplication.
Bug: 239
Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c
Reviewed-on: https://boringssl-review.googlesource.com/27668
Reviewed-by: Adam Langley <agl@google.com>
2018-04-23 02:39:34 +01:00
|
|
|
int ec_GFp_mont_felem_to_bignum(const EC_GROUP *group, BIGNUM *out,
|
|
|
|
const EC_FELEM *in) {
|
2015-04-08 22:11:16 +01:00
|
|
|
if (group->mont == NULL) {
|
2015-06-29 05:28:17 +01:00
|
|
|
OPENSSL_PUT_ERROR(EC, EC_R_NOT_INITIALIZED);
|
2014-06-20 20:00:00 +01:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
Add EC_FELEM for EC_POINTs and related temporaries.
This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used
for EC_POINT's representation in the generic EC_METHOD, as well as
random operations on tuned EC_METHODs that still are implemented
genericly.
Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific
to the EC_METHOD, analogous to how the old values were BIGNUMs but may
or may not have been in Montgomery form. This is kind of a nuisance, but
no more than before. (If p224-64.c were easily convertable to Montgomery
form, we could say |EC_FELEM| is always in Montgomery form. If we
exposed the internal add and double implementations in each of the
curves, we could give |EC_POINT| an |EC_METHOD|-specific representation
and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this
for later.)
The generic add and doubling formulas are aligned with the formulas
proved in fiat-crypto. Those only applied to a = -3, so I've proved a
generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case
someone uses a custom curve. The new formulas are verified,
constant-time, and swap a multiply for a square. As expressed in
fiat-crypto they do use more temporaries, but this seems to be fine with
stack-allocated EC_FELEMs. (We can try to help the compiler later,
but benchamrks below suggest this isn't necessary.)
Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the
bounds in the type system and, in particular, that the width is correct,
which will make it easier to select a point in constant-time in the
future. (Indeed the old code did not always have the correct width. Its
point formula involved halving and implemented this in variable time and
variable width.)
Before:
Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec)
Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec)
Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec)
Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec)
Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec)
Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec)
Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec)
After:
Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%]
Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%]
Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%]
Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%]
Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%]
Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%]
Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%]
This more than pays for removing points_make_affine previously and even
speeds up ECDH P-256 slightly. (The point-on-curve check uses the
generic code.)
Next is to push the stack-allocating up to ec_wNAF_mul, followed by a
constant-time single-point multiplication.
Bug: 239
Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c
Reviewed-on: https://boringssl-review.googlesource.com/27668
Reviewed-by: Adam Langley <agl@google.com>
2018-04-23 02:39:34 +01:00
|
|
|
EC_FELEM tmp;
|
|
|
|
ec_GFp_mont_felem_from_montgomery(group, &tmp, in);
|
|
|
|
return bn_set_words(out, tmp.words, group->field.width);
|
2014-06-20 20:00:00 +01:00
|
|
|
}
|
|
|
|
|
2016-03-11 23:12:11 +00:00
|
|
|
static int ec_GFp_mont_point_get_affine_coordinates(const EC_GROUP *group,
|
2018-04-25 03:53:07 +01:00
|
|
|
const EC_RAW_POINT *point,
|
Add EC_FELEM for EC_POINTs and related temporaries.
This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used
for EC_POINT's representation in the generic EC_METHOD, as well as
random operations on tuned EC_METHODs that still are implemented
genericly.
Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific
to the EC_METHOD, analogous to how the old values were BIGNUMs but may
or may not have been in Montgomery form. This is kind of a nuisance, but
no more than before. (If p224-64.c were easily convertable to Montgomery
form, we could say |EC_FELEM| is always in Montgomery form. If we
exposed the internal add and double implementations in each of the
curves, we could give |EC_POINT| an |EC_METHOD|-specific representation
and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this
for later.)
The generic add and doubling formulas are aligned with the formulas
proved in fiat-crypto. Those only applied to a = -3, so I've proved a
generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case
someone uses a custom curve. The new formulas are verified,
constant-time, and swap a multiply for a square. As expressed in
fiat-crypto they do use more temporaries, but this seems to be fine with
stack-allocated EC_FELEMs. (We can try to help the compiler later,
but benchamrks below suggest this isn't necessary.)
Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the
bounds in the type system and, in particular, that the width is correct,
which will make it easier to select a point in constant-time in the
future. (Indeed the old code did not always have the correct width. Its
point formula involved halving and implemented this in variable time and
variable width.)
Before:
Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec)
Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec)
Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec)
Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec)
Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec)
Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec)
Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec)
After:
Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%]
Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%]
Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%]
Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%]
Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%]
Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%]
Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%]
This more than pays for removing points_make_affine previously and even
speeds up ECDH P-256 slightly. (The point-on-curve check uses the
generic code.)
Next is to push the stack-allocating up to ec_wNAF_mul, followed by a
constant-time single-point multiplication.
Bug: 239
Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c
Reviewed-on: https://boringssl-review.googlesource.com/27668
Reviewed-by: Adam Langley <agl@google.com>
2018-04-23 02:39:34 +01:00
|
|
|
BIGNUM *x, BIGNUM *y) {
|
2018-04-25 03:53:07 +01:00
|
|
|
if (ec_GFp_simple_is_at_infinity(group, point)) {
|
2016-03-11 23:12:11 +00:00
|
|
|
OPENSSL_PUT_ERROR(EC, EC_R_POINT_AT_INFINITY);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
Add EC_FELEM for EC_POINTs and related temporaries.
This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used
for EC_POINT's representation in the generic EC_METHOD, as well as
random operations on tuned EC_METHODs that still are implemented
genericly.
Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific
to the EC_METHOD, analogous to how the old values were BIGNUMs but may
or may not have been in Montgomery form. This is kind of a nuisance, but
no more than before. (If p224-64.c were easily convertable to Montgomery
form, we could say |EC_FELEM| is always in Montgomery form. If we
exposed the internal add and double implementations in each of the
curves, we could give |EC_POINT| an |EC_METHOD|-specific representation
and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this
for later.)
The generic add and doubling formulas are aligned with the formulas
proved in fiat-crypto. Those only applied to a = -3, so I've proved a
generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case
someone uses a custom curve. The new formulas are verified,
constant-time, and swap a multiply for a square. As expressed in
fiat-crypto they do use more temporaries, but this seems to be fine with
stack-allocated EC_FELEMs. (We can try to help the compiler later,
but benchamrks below suggest this isn't necessary.)
Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the
bounds in the type system and, in particular, that the width is correct,
which will make it easier to select a point in constant-time in the
future. (Indeed the old code did not always have the correct width. Its
point formula involved halving and implemented this in variable time and
variable width.)
Before:
Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec)
Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec)
Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec)
Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec)
Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec)
Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec)
Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec)
After:
Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%]
Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%]
Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%]
Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%]
Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%]
Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%]
Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%]
This more than pays for removing points_make_affine previously and even
speeds up ECDH P-256 slightly. (The point-on-curve check uses the
generic code.)
Next is to push the stack-allocating up to ec_wNAF_mul, followed by a
constant-time single-point multiplication.
Bug: 239
Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c
Reviewed-on: https://boringssl-review.googlesource.com/27668
Reviewed-by: Adam Langley <agl@google.com>
2018-04-23 02:39:34 +01:00
|
|
|
// Transform (X, Y, Z) into (x, y) := (X/Z^2, Y/Z^3).
|
2016-03-11 23:12:11 +00:00
|
|
|
|
Add EC_FELEM for EC_POINTs and related temporaries.
This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used
for EC_POINT's representation in the generic EC_METHOD, as well as
random operations on tuned EC_METHODs that still are implemented
genericly.
Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific
to the EC_METHOD, analogous to how the old values were BIGNUMs but may
or may not have been in Montgomery form. This is kind of a nuisance, but
no more than before. (If p224-64.c were easily convertable to Montgomery
form, we could say |EC_FELEM| is always in Montgomery form. If we
exposed the internal add and double implementations in each of the
curves, we could give |EC_POINT| an |EC_METHOD|-specific representation
and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this
for later.)
The generic add and doubling formulas are aligned with the formulas
proved in fiat-crypto. Those only applied to a = -3, so I've proved a
generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case
someone uses a custom curve. The new formulas are verified,
constant-time, and swap a multiply for a square. As expressed in
fiat-crypto they do use more temporaries, but this seems to be fine with
stack-allocated EC_FELEMs. (We can try to help the compiler later,
but benchamrks below suggest this isn't necessary.)
Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the
bounds in the type system and, in particular, that the width is correct,
which will make it easier to select a point in constant-time in the
future. (Indeed the old code did not always have the correct width. Its
point formula involved halving and implemented this in variable time and
variable width.)
Before:
Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec)
Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec)
Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec)
Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec)
Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec)
Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec)
Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec)
After:
Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%]
Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%]
Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%]
Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%]
Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%]
Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%]
Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%]
This more than pays for removing points_make_affine previously and even
speeds up ECDH P-256 slightly. (The point-on-curve check uses the
generic code.)
Next is to push the stack-allocating up to ec_wNAF_mul, followed by a
constant-time single-point multiplication.
Bug: 239
Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c
Reviewed-on: https://boringssl-review.googlesource.com/27668
Reviewed-by: Adam Langley <agl@google.com>
2018-04-23 02:39:34 +01:00
|
|
|
EC_FELEM z1, z2;
|
|
|
|
ec_GFp_mont_felem_inv(group, &z2, &point->Z);
|
|
|
|
ec_GFp_mont_felem_sqr(group, &z1, &z2);
|
2016-03-11 23:12:11 +00:00
|
|
|
|
Add EC_FELEM for EC_POINTs and related temporaries.
This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used
for EC_POINT's representation in the generic EC_METHOD, as well as
random operations on tuned EC_METHODs that still are implemented
genericly.
Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific
to the EC_METHOD, analogous to how the old values were BIGNUMs but may
or may not have been in Montgomery form. This is kind of a nuisance, but
no more than before. (If p224-64.c were easily convertable to Montgomery
form, we could say |EC_FELEM| is always in Montgomery form. If we
exposed the internal add and double implementations in each of the
curves, we could give |EC_POINT| an |EC_METHOD|-specific representation
and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this
for later.)
The generic add and doubling formulas are aligned with the formulas
proved in fiat-crypto. Those only applied to a = -3, so I've proved a
generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case
someone uses a custom curve. The new formulas are verified,
constant-time, and swap a multiply for a square. As expressed in
fiat-crypto they do use more temporaries, but this seems to be fine with
stack-allocated EC_FELEMs. (We can try to help the compiler later,
but benchamrks below suggest this isn't necessary.)
Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the
bounds in the type system and, in particular, that the width is correct,
which will make it easier to select a point in constant-time in the
future. (Indeed the old code did not always have the correct width. Its
point formula involved halving and implemented this in variable time and
variable width.)
Before:
Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec)
Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec)
Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec)
Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec)
Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec)
Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec)
Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec)
After:
Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%]
Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%]
Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%]
Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%]
Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%]
Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%]
Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%]
This more than pays for removing points_make_affine previously and even
speeds up ECDH P-256 slightly. (The point-on-curve check uses the
generic code.)
Next is to push the stack-allocating up to ec_wNAF_mul, followed by a
constant-time single-point multiplication.
Bug: 239
Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c
Reviewed-on: https://boringssl-review.googlesource.com/27668
Reviewed-by: Adam Langley <agl@google.com>
2018-04-23 02:39:34 +01:00
|
|
|
// Instead of using |ec_GFp_mont_felem_from_montgomery| to convert the |x|
|
|
|
|
// coordinate and then calling |ec_GFp_mont_felem_from_montgomery| again to
|
|
|
|
// convert the |y| coordinate below, convert the common factor |z1| once now,
|
|
|
|
// saving one reduction.
|
|
|
|
ec_GFp_mont_felem_from_montgomery(group, &z1, &z1);
|
2016-03-12 00:04:14 +00:00
|
|
|
|
2018-04-23 23:33:20 +01:00
|
|
|
if (x != NULL) {
|
Add EC_FELEM for EC_POINTs and related temporaries.
This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used
for EC_POINT's representation in the generic EC_METHOD, as well as
random operations on tuned EC_METHODs that still are implemented
genericly.
Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific
to the EC_METHOD, analogous to how the old values were BIGNUMs but may
or may not have been in Montgomery form. This is kind of a nuisance, but
no more than before. (If p224-64.c were easily convertable to Montgomery
form, we could say |EC_FELEM| is always in Montgomery form. If we
exposed the internal add and double implementations in each of the
curves, we could give |EC_POINT| an |EC_METHOD|-specific representation
and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this
for later.)
The generic add and doubling formulas are aligned with the formulas
proved in fiat-crypto. Those only applied to a = -3, so I've proved a
generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case
someone uses a custom curve. The new formulas are verified,
constant-time, and swap a multiply for a square. As expressed in
fiat-crypto they do use more temporaries, but this seems to be fine with
stack-allocated EC_FELEMs. (We can try to help the compiler later,
but benchamrks below suggest this isn't necessary.)
Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the
bounds in the type system and, in particular, that the width is correct,
which will make it easier to select a point in constant-time in the
future. (Indeed the old code did not always have the correct width. Its
point formula involved halving and implemented this in variable time and
variable width.)
Before:
Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec)
Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec)
Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec)
Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec)
Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec)
Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec)
Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec)
After:
Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%]
Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%]
Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%]
Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%]
Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%]
Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%]
Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%]
This more than pays for removing points_make_affine previously and even
speeds up ECDH P-256 slightly. (The point-on-curve check uses the
generic code.)
Next is to push the stack-allocating up to ec_wNAF_mul, followed by a
constant-time single-point multiplication.
Bug: 239
Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c
Reviewed-on: https://boringssl-review.googlesource.com/27668
Reviewed-by: Adam Langley <agl@google.com>
2018-04-23 02:39:34 +01:00
|
|
|
EC_FELEM tmp;
|
|
|
|
ec_GFp_mont_felem_mul(group, &tmp, &point->X, &z1);
|
|
|
|
if (!bn_set_words(x, tmp.words, group->field.width)) {
|
|
|
|
return 0;
|
2016-03-12 00:04:14 +00:00
|
|
|
}
|
2018-04-23 23:33:20 +01:00
|
|
|
}
|
2016-03-11 23:12:11 +00:00
|
|
|
|
2018-04-23 23:33:20 +01:00
|
|
|
if (y != NULL) {
|
Add EC_FELEM for EC_POINTs and related temporaries.
This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used
for EC_POINT's representation in the generic EC_METHOD, as well as
random operations on tuned EC_METHODs that still are implemented
genericly.
Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific
to the EC_METHOD, analogous to how the old values were BIGNUMs but may
or may not have been in Montgomery form. This is kind of a nuisance, but
no more than before. (If p224-64.c were easily convertable to Montgomery
form, we could say |EC_FELEM| is always in Montgomery form. If we
exposed the internal add and double implementations in each of the
curves, we could give |EC_POINT| an |EC_METHOD|-specific representation
and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this
for later.)
The generic add and doubling formulas are aligned with the formulas
proved in fiat-crypto. Those only applied to a = -3, so I've proved a
generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case
someone uses a custom curve. The new formulas are verified,
constant-time, and swap a multiply for a square. As expressed in
fiat-crypto they do use more temporaries, but this seems to be fine with
stack-allocated EC_FELEMs. (We can try to help the compiler later,
but benchamrks below suggest this isn't necessary.)
Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the
bounds in the type system and, in particular, that the width is correct,
which will make it easier to select a point in constant-time in the
future. (Indeed the old code did not always have the correct width. Its
point formula involved halving and implemented this in variable time and
variable width.)
Before:
Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec)
Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec)
Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec)
Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec)
Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec)
Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec)
Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec)
After:
Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%]
Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%]
Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%]
Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%]
Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%]
Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%]
Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%]
This more than pays for removing points_make_affine previously and even
speeds up ECDH P-256 slightly. (The point-on-curve check uses the
generic code.)
Next is to push the stack-allocating up to ec_wNAF_mul, followed by a
constant-time single-point multiplication.
Bug: 239
Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c
Reviewed-on: https://boringssl-review.googlesource.com/27668
Reviewed-by: Adam Langley <agl@google.com>
2018-04-23 02:39:34 +01:00
|
|
|
EC_FELEM tmp;
|
|
|
|
ec_GFp_mont_felem_mul(group, &z1, &z1, &z2);
|
|
|
|
ec_GFp_mont_felem_mul(group, &tmp, &point->Y, &z1);
|
|
|
|
if (!bn_set_words(y, tmp.words, group->field.width)) {
|
|
|
|
return 0;
|
2016-03-11 23:12:11 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Add EC_FELEM for EC_POINTs and related temporaries.
This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used
for EC_POINT's representation in the generic EC_METHOD, as well as
random operations on tuned EC_METHODs that still are implemented
genericly.
Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific
to the EC_METHOD, analogous to how the old values were BIGNUMs but may
or may not have been in Montgomery form. This is kind of a nuisance, but
no more than before. (If p224-64.c were easily convertable to Montgomery
form, we could say |EC_FELEM| is always in Montgomery form. If we
exposed the internal add and double implementations in each of the
curves, we could give |EC_POINT| an |EC_METHOD|-specific representation
and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this
for later.)
The generic add and doubling formulas are aligned with the formulas
proved in fiat-crypto. Those only applied to a = -3, so I've proved a
generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case
someone uses a custom curve. The new formulas are verified,
constant-time, and swap a multiply for a square. As expressed in
fiat-crypto they do use more temporaries, but this seems to be fine with
stack-allocated EC_FELEMs. (We can try to help the compiler later,
but benchamrks below suggest this isn't necessary.)
Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the
bounds in the type system and, in particular, that the width is correct,
which will make it easier to select a point in constant-time in the
future. (Indeed the old code did not always have the correct width. Its
point formula involved halving and implemented this in variable time and
variable width.)
Before:
Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec)
Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec)
Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec)
Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec)
Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec)
Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec)
Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec)
After:
Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%]
Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%]
Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%]
Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%]
Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%]
Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%]
Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%]
This more than pays for removing points_make_affine previously and even
speeds up ECDH P-256 slightly. (The point-on-curve check uses the
generic code.)
Next is to push the stack-allocating up to ec_wNAF_mul, followed by a
constant-time single-point multiplication.
Bug: 239
Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c
Reviewed-on: https://boringssl-review.googlesource.com/27668
Reviewed-by: Adam Langley <agl@google.com>
2018-04-23 02:39:34 +01:00
|
|
|
return 1;
|
2016-03-11 23:12:11 +00:00
|
|
|
}
|
|
|
|
|
2018-11-05 23:37:29 +00:00
|
|
|
void ec_GFp_mont_add(const EC_GROUP *group, EC_RAW_POINT *out,
|
|
|
|
const EC_RAW_POINT *a, const EC_RAW_POINT *b) {
|
|
|
|
if (a == b) {
|
|
|
|
ec_GFp_mont_dbl(group, out, a);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// The method is taken from:
|
|
|
|
// http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#addition-add-2007-bl
|
|
|
|
//
|
|
|
|
// Coq transcription and correctness proof:
|
|
|
|
// <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L467>
|
|
|
|
// <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L544>
|
|
|
|
EC_FELEM x_out, y_out, z_out;
|
|
|
|
BN_ULONG z1nz = ec_felem_non_zero_mask(group, &a->Z);
|
|
|
|
BN_ULONG z2nz = ec_felem_non_zero_mask(group, &b->Z);
|
|
|
|
|
|
|
|
// z1z1 = z1z1 = z1**2
|
|
|
|
EC_FELEM z1z1;
|
|
|
|
ec_GFp_mont_felem_sqr(group, &z1z1, &a->Z);
|
|
|
|
|
|
|
|
// z2z2 = z2**2
|
|
|
|
EC_FELEM z2z2;
|
|
|
|
ec_GFp_mont_felem_sqr(group, &z2z2, &b->Z);
|
|
|
|
|
|
|
|
// u1 = x1*z2z2
|
|
|
|
EC_FELEM u1;
|
|
|
|
ec_GFp_mont_felem_mul(group, &u1, &a->X, &z2z2);
|
|
|
|
|
|
|
|
// two_z1z2 = (z1 + z2)**2 - (z1z1 + z2z2) = 2z1z2
|
|
|
|
EC_FELEM two_z1z2;
|
|
|
|
ec_felem_add(group, &two_z1z2, &a->Z, &b->Z);
|
|
|
|
ec_GFp_mont_felem_sqr(group, &two_z1z2, &two_z1z2);
|
|
|
|
ec_felem_sub(group, &two_z1z2, &two_z1z2, &z1z1);
|
|
|
|
ec_felem_sub(group, &two_z1z2, &two_z1z2, &z2z2);
|
|
|
|
|
|
|
|
// s1 = y1 * z2**3
|
|
|
|
EC_FELEM s1;
|
|
|
|
ec_GFp_mont_felem_mul(group, &s1, &b->Z, &z2z2);
|
|
|
|
ec_GFp_mont_felem_mul(group, &s1, &s1, &a->Y);
|
|
|
|
|
|
|
|
// u2 = x2*z1z1
|
|
|
|
EC_FELEM u2;
|
|
|
|
ec_GFp_mont_felem_mul(group, &u2, &b->X, &z1z1);
|
|
|
|
|
|
|
|
// h = u2 - u1
|
|
|
|
EC_FELEM h;
|
|
|
|
ec_felem_sub(group, &h, &u2, &u1);
|
|
|
|
|
|
|
|
BN_ULONG xneq = ec_felem_non_zero_mask(group, &h);
|
|
|
|
|
|
|
|
// z_out = two_z1z2 * h
|
|
|
|
ec_GFp_mont_felem_mul(group, &z_out, &h, &two_z1z2);
|
|
|
|
|
|
|
|
// z1z1z1 = z1 * z1z1
|
|
|
|
EC_FELEM z1z1z1;
|
|
|
|
ec_GFp_mont_felem_mul(group, &z1z1z1, &a->Z, &z1z1);
|
|
|
|
|
|
|
|
// s2 = y2 * z1**3
|
|
|
|
EC_FELEM s2;
|
|
|
|
ec_GFp_mont_felem_mul(group, &s2, &b->Y, &z1z1z1);
|
|
|
|
|
|
|
|
// r = (s2 - s1)*2
|
|
|
|
EC_FELEM r;
|
|
|
|
ec_felem_sub(group, &r, &s2, &s1);
|
|
|
|
ec_felem_add(group, &r, &r, &r);
|
|
|
|
|
|
|
|
BN_ULONG yneq = ec_felem_non_zero_mask(group, &r);
|
|
|
|
|
|
|
|
// This case will never occur in the constant-time |ec_GFp_mont_mul|.
|
|
|
|
if (!xneq && !yneq && z1nz && z2nz) {
|
|
|
|
ec_GFp_mont_dbl(group, out, a);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// I = (2h)**2
|
|
|
|
EC_FELEM i;
|
|
|
|
ec_felem_add(group, &i, &h, &h);
|
|
|
|
ec_GFp_mont_felem_sqr(group, &i, &i);
|
|
|
|
|
|
|
|
// J = h * I
|
|
|
|
EC_FELEM j;
|
|
|
|
ec_GFp_mont_felem_mul(group, &j, &h, &i);
|
|
|
|
|
|
|
|
// V = U1 * I
|
|
|
|
EC_FELEM v;
|
|
|
|
ec_GFp_mont_felem_mul(group, &v, &u1, &i);
|
|
|
|
|
|
|
|
// x_out = r**2 - J - 2V
|
|
|
|
ec_GFp_mont_felem_sqr(group, &x_out, &r);
|
|
|
|
ec_felem_sub(group, &x_out, &x_out, &j);
|
|
|
|
ec_felem_sub(group, &x_out, &x_out, &v);
|
|
|
|
ec_felem_sub(group, &x_out, &x_out, &v);
|
|
|
|
|
|
|
|
// y_out = r(V-x_out) - 2 * s1 * J
|
|
|
|
ec_felem_sub(group, &y_out, &v, &x_out);
|
|
|
|
ec_GFp_mont_felem_mul(group, &y_out, &y_out, &r);
|
|
|
|
EC_FELEM s1j;
|
|
|
|
ec_GFp_mont_felem_mul(group, &s1j, &s1, &j);
|
|
|
|
ec_felem_sub(group, &y_out, &y_out, &s1j);
|
|
|
|
ec_felem_sub(group, &y_out, &y_out, &s1j);
|
|
|
|
|
|
|
|
ec_felem_select(group, &x_out, z1nz, &x_out, &b->X);
|
|
|
|
ec_felem_select(group, &out->X, z2nz, &x_out, &a->X);
|
|
|
|
ec_felem_select(group, &y_out, z1nz, &y_out, &b->Y);
|
|
|
|
ec_felem_select(group, &out->Y, z2nz, &y_out, &a->Y);
|
|
|
|
ec_felem_select(group, &z_out, z1nz, &z_out, &b->Z);
|
|
|
|
ec_felem_select(group, &out->Z, z2nz, &z_out, &a->Z);
|
|
|
|
}
|
|
|
|
|
|
|
|
void ec_GFp_mont_dbl(const EC_GROUP *group, EC_RAW_POINT *r,
|
|
|
|
const EC_RAW_POINT *a) {
|
|
|
|
if (group->a_is_minus3) {
|
|
|
|
// The method is taken from:
|
|
|
|
// http://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#doubling-dbl-2001-b
|
|
|
|
//
|
|
|
|
// Coq transcription and correctness proof:
|
|
|
|
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L93>
|
|
|
|
// <https://github.com/mit-plv/fiat-crypto/blob/79f8b5f39ed609339f0233098dee1a3c4e6b3080/src/Curves/Weierstrass/Jacobian.v#L201>
|
|
|
|
EC_FELEM delta, gamma, beta, ftmp, ftmp2, tmptmp, alpha, fourbeta;
|
|
|
|
// delta = z^2
|
|
|
|
ec_GFp_mont_felem_sqr(group, &delta, &a->Z);
|
|
|
|
// gamma = y^2
|
|
|
|
ec_GFp_mont_felem_sqr(group, &gamma, &a->Y);
|
|
|
|
// beta = x*gamma
|
|
|
|
ec_GFp_mont_felem_mul(group, &beta, &a->X, &gamma);
|
|
|
|
|
|
|
|
// alpha = 3*(x-delta)*(x+delta)
|
|
|
|
ec_felem_sub(group, &ftmp, &a->X, &delta);
|
|
|
|
ec_felem_add(group, &ftmp2, &a->X, &delta);
|
|
|
|
|
|
|
|
ec_felem_add(group, &tmptmp, &ftmp2, &ftmp2);
|
|
|
|
ec_felem_add(group, &ftmp2, &ftmp2, &tmptmp);
|
|
|
|
ec_GFp_mont_felem_mul(group, &alpha, &ftmp, &ftmp2);
|
|
|
|
|
|
|
|
// x' = alpha^2 - 8*beta
|
|
|
|
ec_GFp_mont_felem_sqr(group, &r->X, &alpha);
|
|
|
|
ec_felem_add(group, &fourbeta, &beta, &beta);
|
|
|
|
ec_felem_add(group, &fourbeta, &fourbeta, &fourbeta);
|
|
|
|
ec_felem_add(group, &tmptmp, &fourbeta, &fourbeta);
|
|
|
|
ec_felem_sub(group, &r->X, &r->X, &tmptmp);
|
|
|
|
|
|
|
|
// z' = (y + z)^2 - gamma - delta
|
|
|
|
ec_felem_add(group, &delta, &gamma, &delta);
|
|
|
|
ec_felem_add(group, &ftmp, &a->Y, &a->Z);
|
|
|
|
ec_GFp_mont_felem_sqr(group, &r->Z, &ftmp);
|
|
|
|
ec_felem_sub(group, &r->Z, &r->Z, &delta);
|
|
|
|
|
|
|
|
// y' = alpha*(4*beta - x') - 8*gamma^2
|
|
|
|
ec_felem_sub(group, &r->Y, &fourbeta, &r->X);
|
|
|
|
ec_felem_add(group, &gamma, &gamma, &gamma);
|
|
|
|
ec_GFp_mont_felem_sqr(group, &gamma, &gamma);
|
|
|
|
ec_GFp_mont_felem_mul(group, &r->Y, &alpha, &r->Y);
|
|
|
|
ec_felem_add(group, &gamma, &gamma, &gamma);
|
|
|
|
ec_felem_sub(group, &r->Y, &r->Y, &gamma);
|
|
|
|
} else {
|
|
|
|
// The method is taken from:
|
|
|
|
// http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-2007-bl
|
|
|
|
//
|
|
|
|
// Coq transcription and correctness proof:
|
|
|
|
// <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L102>
|
|
|
|
// <https://github.com/davidben/fiat-crypto/blob/c7b95f62b2a54b559522573310e9b487327d219a/src/Curves/Weierstrass/Jacobian.v#L534>
|
|
|
|
EC_FELEM xx, yy, yyyy, zz;
|
|
|
|
ec_GFp_mont_felem_sqr(group, &xx, &a->X);
|
|
|
|
ec_GFp_mont_felem_sqr(group, &yy, &a->Y);
|
|
|
|
ec_GFp_mont_felem_sqr(group, &yyyy, &yy);
|
|
|
|
ec_GFp_mont_felem_sqr(group, &zz, &a->Z);
|
|
|
|
|
|
|
|
// s = 2*((x_in + yy)^2 - xx - yyyy)
|
|
|
|
EC_FELEM s;
|
|
|
|
ec_felem_add(group, &s, &a->X, &yy);
|
|
|
|
ec_GFp_mont_felem_sqr(group, &s, &s);
|
|
|
|
ec_felem_sub(group, &s, &s, &xx);
|
|
|
|
ec_felem_sub(group, &s, &s, &yyyy);
|
|
|
|
ec_felem_add(group, &s, &s, &s);
|
|
|
|
|
|
|
|
// m = 3*xx + a*zz^2
|
|
|
|
EC_FELEM m;
|
|
|
|
ec_GFp_mont_felem_sqr(group, &m, &zz);
|
|
|
|
ec_GFp_mont_felem_mul(group, &m, &group->a, &m);
|
|
|
|
ec_felem_add(group, &m, &m, &xx);
|
|
|
|
ec_felem_add(group, &m, &m, &xx);
|
|
|
|
ec_felem_add(group, &m, &m, &xx);
|
|
|
|
|
|
|
|
// x_out = m^2 - 2*s
|
|
|
|
ec_GFp_mont_felem_sqr(group, &r->X, &m);
|
|
|
|
ec_felem_sub(group, &r->X, &r->X, &s);
|
|
|
|
ec_felem_sub(group, &r->X, &r->X, &s);
|
|
|
|
|
|
|
|
// z_out = (y_in + z_in)^2 - yy - zz
|
|
|
|
ec_felem_add(group, &r->Z, &a->Y, &a->Z);
|
|
|
|
ec_GFp_mont_felem_sqr(group, &r->Z, &r->Z);
|
|
|
|
ec_felem_sub(group, &r->Z, &r->Z, &yy);
|
|
|
|
ec_felem_sub(group, &r->Z, &r->Z, &zz);
|
|
|
|
|
|
|
|
// y_out = m*(s-x_out) - 8*yyyy
|
|
|
|
ec_felem_add(group, &yyyy, &yyyy, &yyyy);
|
|
|
|
ec_felem_add(group, &yyyy, &yyyy, &yyyy);
|
|
|
|
ec_felem_add(group, &yyyy, &yyyy, &yyyy);
|
|
|
|
ec_felem_sub(group, &r->Y, &s, &r->X);
|
|
|
|
ec_GFp_mont_felem_mul(group, &r->Y, &r->Y, &m);
|
|
|
|
ec_felem_sub(group, &r->Y, &r->Y, &yyyy);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-02 22:25:39 +01:00
|
|
|
DEFINE_METHOD_FUNCTION(EC_METHOD, EC_GFp_mont_method) {
|
|
|
|
out->group_init = ec_GFp_mont_group_init;
|
|
|
|
out->group_finish = ec_GFp_mont_group_finish;
|
|
|
|
out->group_set_curve = ec_GFp_mont_group_set_curve;
|
|
|
|
out->point_get_affine_coordinates = ec_GFp_mont_point_get_affine_coordinates;
|
2018-11-05 23:37:29 +00:00
|
|
|
out->add = ec_GFp_mont_add;
|
|
|
|
out->dbl = ec_GFp_mont_dbl;
|
|
|
|
out->mul = ec_GFp_mont_mul;
|
|
|
|
out->mul_public = ec_GFp_mont_mul_public;
|
Add EC_FELEM for EC_POINTs and related temporaries.
This introduces EC_FELEM, which is analogous to EC_SCALAR. It is used
for EC_POINT's representation in the generic EC_METHOD, as well as
random operations on tuned EC_METHODs that still are implemented
genericly.
Unlike EC_SCALAR, EC_FELEM's exact representation is awkwardly specific
to the EC_METHOD, analogous to how the old values were BIGNUMs but may
or may not have been in Montgomery form. This is kind of a nuisance, but
no more than before. (If p224-64.c were easily convertable to Montgomery
form, we could say |EC_FELEM| is always in Montgomery form. If we
exposed the internal add and double implementations in each of the
curves, we could give |EC_POINT| an |EC_METHOD|-specific representation
and |EC_FELEM| is purely a |EC_GFp_mont_method| type. I'll leave this
for later.)
The generic add and doubling formulas are aligned with the formulas
proved in fiat-crypto. Those only applied to a = -3, so I've proved a
generic one in https://github.com/mit-plv/fiat-crypto/pull/356, in case
someone uses a custom curve. The new formulas are verified,
constant-time, and swap a multiply for a square. As expressed in
fiat-crypto they do use more temporaries, but this seems to be fine with
stack-allocated EC_FELEMs. (We can try to help the compiler later,
but benchamrks below suggest this isn't necessary.)
Unlike BIGNUM, EC_FELEM can be stack-allocated. It also captures the
bounds in the type system and, in particular, that the width is correct,
which will make it easier to select a point in constant-time in the
future. (Indeed the old code did not always have the correct width. Its
point formula involved halving and implemented this in variable time and
variable width.)
Before:
Did 77274 ECDH P-256 operations in 10046087us (7692.0 ops/sec)
Did 5959 ECDH P-384 operations in 10031701us (594.0 ops/sec)
Did 10815 ECDSA P-384 signing operations in 10087892us (1072.1 ops/sec)
Did 8976 ECDSA P-384 verify operations in 10071038us (891.3 ops/sec)
Did 2600 ECDH P-521 operations in 10091688us (257.6 ops/sec)
Did 4590 ECDSA P-521 signing operations in 10055195us (456.5 ops/sec)
Did 3811 ECDSA P-521 verify operations in 10003574us (381.0 ops/sec)
After:
Did 77736 ECDH P-256 operations in 10029858us (7750.5 ops/sec) [+0.8%]
Did 7519 ECDH P-384 operations in 10068076us (746.8 ops/sec) [+25.7%]
Did 13335 ECDSA P-384 signing operations in 10029962us (1329.5 ops/sec) [+24.0%]
Did 11021 ECDSA P-384 verify operations in 10088600us (1092.4 ops/sec) [+22.6%]
Did 2912 ECDH P-521 operations in 10001325us (291.2 ops/sec) [+13.0%]
Did 5150 ECDSA P-521 signing operations in 10027462us (513.6 ops/sec) [+12.5%]
Did 4264 ECDSA P-521 verify operations in 10069694us (423.4 ops/sec) [+11.1%]
This more than pays for removing points_make_affine previously and even
speeds up ECDH P-256 slightly. (The point-on-curve check uses the
generic code.)
Next is to push the stack-allocating up to ec_wNAF_mul, followed by a
constant-time single-point multiplication.
Bug: 239
Change-Id: I44a2dff7c52522e491d0f8cffff64c4ab5cd353c
Reviewed-on: https://boringssl-review.googlesource.com/27668
Reviewed-by: Adam Langley <agl@google.com>
2018-04-23 02:39:34 +01:00
|
|
|
out->felem_mul = ec_GFp_mont_felem_mul;
|
|
|
|
out->felem_sqr = ec_GFp_mont_felem_sqr;
|
|
|
|
out->bignum_to_felem = ec_GFp_mont_bignum_to_felem;
|
|
|
|
out->felem_to_bignum = ec_GFp_mont_felem_to_bignum;
|
2018-04-07 00:43:29 +01:00
|
|
|
out->scalar_inv_montgomery = ec_simple_scalar_inv_montgomery;
|
2017-05-02 22:25:39 +01:00
|
|
|
}
|