2018-02-01 21:49:18 +00:00
|
|
|
/*
|
|
|
|
* Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
|
|
|
* Copyright (c) 2014, Intel Corporation. All Rights Reserved.
|
2016-10-25 01:02:26 +01:00
|
|
|
*
|
2018-02-01 21:49:18 +00:00
|
|
|
* Licensed under the OpenSSL license (the "License"). You may not use
|
|
|
|
* this file except in compliance with the License. You can obtain a copy
|
|
|
|
* in the file LICENSE in the source distribution or at
|
|
|
|
* https://www.openssl.org/source/license.html
|
2016-10-25 01:02:26 +01:00
|
|
|
*
|
2018-02-01 21:49:18 +00:00
|
|
|
* Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
|
|
|
|
* (1) Intel Corporation, Israel Development Center, Haifa, Israel
|
|
|
|
* (2) University of Haifa, Israel
|
|
|
|
*
|
|
|
|
* Reference:
|
|
|
|
* S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with
|
|
|
|
* 256 Bit Primes"
|
|
|
|
*/
|
2016-10-25 01:02:26 +01:00
|
|
|
|
|
|
|
#ifndef OPENSSL_HEADER_EC_P256_X86_64_H
|
|
|
|
#define OPENSSL_HEADER_EC_P256_X86_64_H
|
|
|
|
|
|
|
|
#include <openssl/base.h>
|
|
|
|
|
|
|
|
#include <openssl/bn.h>
|
|
|
|
|
Add an ABI testing framework.
Dear reader, I must apologize in advance. This CL contains the following:
- A new 256-line perlasm file with non-trivial perl bits and a dual-ABI
variadic function caller.
- C preprocessor gymnastics, with variadic macros and fun facts about
__VA_ARGS__'s behavior on empty argument lists.
- C++ template gymnastics, including variadic arguments, template
specialization, std::enable_if, and machinery to control template argument
deduction.
Enjoy.
This tests that our assembly functions correctly honor platform ABI
conventions. Right now this only tests callee-saved registers, but it should be
extendable to SEH/CFI unwind testing with single-step debugging APIs.
Register-checking does not involve anything funny and should be compatible with
SDE. (The future unwind testing is unlikely to be compatible.)
This CL adds support for x86_64 SysV and Win64 ABIs. ARM, AArch64, and x86 can
be added in the future. The testing is injected in two places. First, all the
assembly tests in p256-x86_64-test.cc are now instrumented. This is the
intended workflow and should capture all registers.
However, we currently do not unit-test our assembly much directly. We should do
that as follow-up work[0] but, in the meantime, I've also wrapped all of the GTest
main function in an ABI test. This is imperfect as ABI failures may be masked
by other stack frames, but it costs nothing[1] and is pretty reliable at
catching Win64 xmm register failures.
[0] An alternate strategy would be, in debug builds, unconditionally instrument
every assembly call in libcrypto. But the CHECK_ABI macro would be difficult to
replicate in pure C, and unwind testing may be too invasive for this. Still,
something to consider when we C++ libcrypto.
[1] When single-stepped unwind testing exists, it won't cost nothing. The
gtest_main.cc call will turn unwind testing off.
Change-Id: I6643b26445891fd46abfacac52bc024024c8d7f6
Reviewed-on: https://boringssl-review.googlesource.com/c/33764
Reviewed-by: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <alangley@gmail.com>
Commit-Queue: David Benjamin <davidben@google.com>
2018-12-16 00:58:43 +00:00
|
|
|
#include "../bn/internal.h"
|
|
|
|
|
2016-10-25 01:02:26 +01:00
|
|
|
#if defined(__cplusplus)
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
|
|
|
|
!defined(OPENSSL_SMALL)
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// P-256 field operations.
|
|
|
|
//
|
|
|
|
// An element mod P in P-256 is represented as a little-endian array of
|
|
|
|
// |P256_LIMBS| |BN_ULONG|s, spanning the full range of values.
|
|
|
|
//
|
|
|
|
// The following functions take fully-reduced inputs mod P and give
|
|
|
|
// fully-reduced outputs. They may be used in-place.
|
2016-10-25 01:02:26 +01:00
|
|
|
|
|
|
|
#define P256_LIMBS (256 / BN_BITS2)
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// ecp_nistz256_neg sets |res| to -|a| mod P.
|
2016-10-25 01:02:26 +01:00
|
|
|
void ecp_nistz256_neg(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]);
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// ecp_nistz256_mul_mont sets |res| to |a| * |b| * 2^-256 mod P.
|
2016-10-25 01:02:26 +01:00
|
|
|
void ecp_nistz256_mul_mont(BN_ULONG res[P256_LIMBS],
|
|
|
|
const BN_ULONG a[P256_LIMBS],
|
|
|
|
const BN_ULONG b[P256_LIMBS]);
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// ecp_nistz256_sqr_mont sets |res| to |a| * |a| * 2^-256 mod P.
|
2016-10-25 01:02:26 +01:00
|
|
|
void ecp_nistz256_sqr_mont(BN_ULONG res[P256_LIMBS],
|
|
|
|
const BN_ULONG a[P256_LIMBS]);
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// ecp_nistz256_from_mont sets |res| to |in|, converted from Montgomery domain
|
|
|
|
// by multiplying with 1.
|
2016-12-22 08:01:40 +00:00
|
|
|
static inline void ecp_nistz256_from_mont(BN_ULONG res[P256_LIMBS],
|
|
|
|
const BN_ULONG in[P256_LIMBS]) {
|
|
|
|
static const BN_ULONG ONE[P256_LIMBS] = { 1 };
|
|
|
|
ecp_nistz256_mul_mont(res, in, ONE);
|
|
|
|
}
|
2016-10-25 01:02:26 +01:00
|
|
|
|
2018-11-06 23:18:56 +00:00
|
|
|
// ecp_nistz256_to_mont sets |res| to |in|, converted to Montgomery domain
|
|
|
|
// by multiplying with RR = 2^512 mod P precomputed for NIST P256 curve.
|
|
|
|
static inline void ecp_nistz256_to_mont(BN_ULONG res[P256_LIMBS],
|
|
|
|
const BN_ULONG in[P256_LIMBS]) {
|
|
|
|
static const BN_ULONG RR[P256_LIMBS] = {
|
|
|
|
TOBN(0x00000000, 0x00000003), TOBN(0xfffffffb, 0xffffffff),
|
|
|
|
TOBN(0xffffffff, 0xfffffffe), TOBN(0x00000004, 0xfffffffd)};
|
|
|
|
ecp_nistz256_mul_mont(res, in, RR);
|
|
|
|
}
|
|
|
|
|
2016-10-25 01:02:26 +01:00
|
|
|
|
2018-04-21 06:20:15 +01:00
|
|
|
// P-256 scalar operations.
|
|
|
|
//
|
|
|
|
// The following functions compute modulo N, where N is the order of P-256. They
|
|
|
|
// take fully-reduced inputs and give fully-reduced outputs.
|
|
|
|
|
|
|
|
// ecp_nistz256_ord_mul_mont sets |res| to |a| * |b| where inputs and outputs
|
|
|
|
// are in Montgomery form. That is, |res| is |a| * |b| * 2^-256 mod N.
|
|
|
|
void ecp_nistz256_ord_mul_mont(BN_ULONG res[P256_LIMBS],
|
|
|
|
const BN_ULONG a[P256_LIMBS],
|
|
|
|
const BN_ULONG b[P256_LIMBS]);
|
|
|
|
|
|
|
|
// ecp_nistz256_ord_sqr_mont sets |res| to |a|^(2*|rep|) where inputs and
|
|
|
|
// outputs are in Montgomery form. That is, |res| is
|
|
|
|
// (|a| * 2^-256)^(2*|rep|) * 2^256 mod N.
|
|
|
|
void ecp_nistz256_ord_sqr_mont(BN_ULONG res[P256_LIMBS],
|
Test and fix an ABI issue with small parameters.
Calling conventions must specify how to handle arguments smaller than a
machine word. Should the caller pad them up to a machine word size with
predictable values (zero/sign-extended), or should the callee tolerate
an arbitrary bit pattern?
Annoyingly, I found no text in either SysV or Win64 ABI documentation
describing any of this and resorted to experiment. The short answer is
that callees must tolerate an arbitrary bit pattern on x86_64, which
means we must test this. See the comment in abi_test::internal::ToWord
for the long answer.
CHECK_ABI now, if the type of the parameter is smaller than
crypto_word_t, fills the remaining bytes with 0xaa. This is so the
number is out of bounds for code expecting either zero or sign
extension. (Not that crypto assembly has any business seeing negative
numbers.)
Doing so reveals a bug in ecp_nistz256_ord_sqr_mont. The rep parameter
is typed int, but the code expected uint64_t. In practice, the compiler
will always compile this correctly because:
- On both Win64 and SysV, rep is a register parameter.
- The rep parameter is always a constant, so the compiler has no reason
to leave garbage in the upper half.
However, I was indeed able to get a bug out of GCC via:
uint64_t foo = (1ull << 63) | 2; // Some global the compiler can't
// prove constant.
ecp_nistz256_ord_sqr_mont(res, a, foo >> 1);
Were ecp_nistz256_ord_sqr_mont a true int-taking function, this would
act like ecp_nistz256_ord_sqr_mont(res, a, 1). Instead, it hung. Fix
this by having it take a full-width word.
This mess has several consequences:
- ABI testing now ideally needs a functional testing component to fully cover
this case. A bad input might merely produce the wrong answer. Still,
this is fairly effective as it will cause most code to either segfault
or loop forever. (Not the enc parameter to AES however...)
- We cannot freely change the type of assembly function prototypes. If the
prototype says int or unsigned, it must be ignoring the upper half and
thus "fixing" it to size_t cannot have handled the full range. (Unless
it was simply wrong of the parameter is already bounded.) If the
prototype says size_t, switching to int or unsigned will hit this type
of bug. The former is a safer failure mode though.
- The simplest path out of this mess: new assembly code should *only*
ever take word-sized parameters. This is not a tall order as the bad
parameters are usually ints that should have been size_t.
Calling conventions are hard.
Change-Id: If8254aff8953844679fbce4bd3e345e5e2fa5213
Reviewed-on: https://boringssl-review.googlesource.com/c/34627
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
2019-01-28 04:06:57 +00:00
|
|
|
const BN_ULONG a[P256_LIMBS], BN_ULONG rep);
|
2018-04-21 06:20:15 +01:00
|
|
|
|
2018-11-06 23:18:56 +00:00
|
|
|
// beeu_mod_inverse_vartime sets out = a^-1 mod p using a Euclidean algorithm.
|
|
|
|
// Assumption: 0 < a < p < 2^(256) and p is odd.
|
|
|
|
int beeu_mod_inverse_vartime(BN_ULONG out[P256_LIMBS],
|
|
|
|
const BN_ULONG a[P256_LIMBS],
|
|
|
|
const BN_ULONG p[P256_LIMBS]);
|
|
|
|
|
2018-04-21 06:20:15 +01:00
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// P-256 point operations.
|
|
|
|
//
|
|
|
|
// The following functions may be used in-place. All coordinates are in the
|
|
|
|
// Montgomery domain.
|
2016-10-25 01:02:26 +01:00
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// A P256_POINT represents a P-256 point in Jacobian coordinates.
|
2016-10-25 01:02:26 +01:00
|
|
|
typedef struct {
|
|
|
|
BN_ULONG X[P256_LIMBS];
|
|
|
|
BN_ULONG Y[P256_LIMBS];
|
|
|
|
BN_ULONG Z[P256_LIMBS];
|
|
|
|
} P256_POINT;
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// A P256_POINT_AFFINE represents a P-256 point in affine coordinates. Infinity
|
|
|
|
// is encoded as (0, 0).
|
2016-10-25 01:02:26 +01:00
|
|
|
typedef struct {
|
|
|
|
BN_ULONG X[P256_LIMBS];
|
|
|
|
BN_ULONG Y[P256_LIMBS];
|
|
|
|
} P256_POINT_AFFINE;
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// ecp_nistz256_select_w5 sets |*val| to |in_t[index-1]| if 1 <= |index| <= 16
|
|
|
|
// and all zeros (the point at infinity) if |index| is 0. This is done in
|
|
|
|
// constant time.
|
2016-10-25 01:02:26 +01:00
|
|
|
void ecp_nistz256_select_w5(P256_POINT *val, const P256_POINT in_t[16],
|
|
|
|
int index);
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// ecp_nistz256_select_w7 sets |*val| to |in_t[index-1]| if 1 <= |index| <= 64
|
|
|
|
// and all zeros (the point at infinity) if |index| is 0. This is done in
|
|
|
|
// constant time.
|
2016-10-25 01:02:26 +01:00
|
|
|
void ecp_nistz256_select_w7(P256_POINT_AFFINE *val,
|
|
|
|
const P256_POINT_AFFINE in_t[64], int index);
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// ecp_nistz256_point_double sets |r| to |a| doubled.
|
2016-10-25 01:02:26 +01:00
|
|
|
void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a);
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// ecp_nistz256_point_add adds |a| to |b| and places the result in |r|.
|
2016-10-25 01:02:26 +01:00
|
|
|
void ecp_nistz256_point_add(P256_POINT *r, const P256_POINT *a,
|
|
|
|
const P256_POINT *b);
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
// ecp_nistz256_point_add_affine adds |a| to |b| and places the result in
|
|
|
|
// |r|. |a| and |b| must not represent the same point unless they are both
|
|
|
|
// infinity.
|
2016-10-25 01:02:26 +01:00
|
|
|
void ecp_nistz256_point_add_affine(P256_POINT *r, const P256_POINT *a,
|
|
|
|
const P256_POINT_AFFINE *b);
|
|
|
|
|
|
|
|
#endif /* !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
|
|
|
|
!defined(OPENSSL_SMALL) */
|
|
|
|
|
|
|
|
|
|
|
|
#if defined(__cplusplus)
|
2017-08-18 19:06:02 +01:00
|
|
|
} // extern C++
|
2016-10-25 01:02:26 +01:00
|
|
|
#endif
|
|
|
|
|
2017-08-18 19:06:02 +01:00
|
|
|
#endif // OPENSSL_HEADER_EC_P256_X86_64_H
|