Преглед на файлове

Speed up variable windowed exponentation a bit.

The first non-zero window (which we can condition on for public
exponents) always multiplies by one. This means we can cut out one
Montgomery multiplication. It also means we never actually need to
initialize r to one, saving another Montgomery multiplication for P-521.

This, in turn, means we don't need the bn_one_to_montgomery optimization
for the public-exponent exponentations, so we can delete
bn_one_to_montgomery_small. (The function does currently promise to
handle p = 0, but this is not actually reachable, so it can just do a
reduction on RR.)

For RSA, where we're not doing many multiplications to begin with,
saving one is noticeable.

Before:
Did 92000 RSA 2048 verify (same key) operations in 3002557us (30640.6 ops/sec)
Did 25165 RSA 4096 verify (same key) operations in 3045046us (8264.2 ops/sec)

After:
Did 100000 RSA 2048 verify (same key) operations in 3002483us (33305.8 ops/sec)
Did 26603 RSA 4096 verify (same key) operations in 3010942us (8835.4 ops/sec)

(Not looking at the fresh key number yet as that still needs to be
fixed.)

Change-Id: I81a025a68d9b0f8eb0f9c6c04ec4eedf0995a345
Reviewed-on: https://boringssl-review.googlesource.com/27286
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
kris/onging/CECPQ3_patch15
David Benjamin преди 6 години
committed by CQ bot account: commit-bot@chromium.org
родител
ревизия
7e2a8a34ba
променени са 3 файла, в които са добавени 27 реда и са изтрити 49 реда
  1. +27
    -20
      crypto/fipsmodule/bn/exponentiation.c
  2. +0
    -7
      crypto/fipsmodule/bn/internal.h
  3. +0
    -22
      crypto/fipsmodule/bn/montgomery.c

+ 27
- 20
crypto/fipsmodule/bn/exponentiation.c Целия файл

@@ -660,10 +660,8 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
}
}

if (!bn_one_to_montgomery(r, mont, ctx)) {
goto err;
}

// |p| is non-zero, so at least one window is non-zero. To save some
// multiplications, defer initializing |r| until then.
int r_is_one = 1;
int wstart = bits - 1; // The top bit of the window.
for (;;) {
@@ -700,7 +698,11 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,

assert(wvalue & 1);
assert(wvalue < (1 << window));
if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) {
if (r_is_one) {
if (!BN_copy(r, val[wvalue >> 1])) {
goto err;
}
} else if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) {
goto err;
}

@@ -711,6 +713,9 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
wstart -= wsize + 1;
}

// |p| is non-zero, so |r_is_one| must be cleared at some point.
assert(!r_is_one);

if (!BN_from_montgomery(rr, r, mont, ctx)) {
goto err;
}
@@ -730,17 +735,17 @@ int bn_mod_exp_mont_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
return 0;
}
if (!BN_is_odd(&mont->N)) {
OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
return 0;
}
unsigned bits = 0;
if (num_p != 0) {
bits = BN_num_bits_word(p[num_p - 1]) + (num_p - 1) * BN_BITS2;
assert(BN_is_odd(&mont->N));

// Count the number of bits in |p|. Note this function treats |p| as public.
while (num_p != 0 && p[num_p - 1] == 0) {
num_p--;
}
if (bits == 0) {
return bn_one_to_montgomery_small(r, num_r, mont);
if (num_p == 0) {
return bn_from_montgomery_small(r, num_r, mont->RR.d, mont->RR.width, mont);
}
unsigned bits = BN_num_bits_word(p[num_p - 1]) + (num_p - 1) * BN_BITS2;
assert(bits != 0);

// We exponentiate by looking at sliding windows of the exponent and
// precomputing powers of |a|. Windows may be shifted so they always end on a
@@ -767,10 +772,8 @@ int bn_mod_exp_mont_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
}
}

if (!bn_one_to_montgomery_small(r, num_r, mont)) {
goto err;
}

// |p| is non-zero, so at least one window is non-zero. To save some
// multiplications, defer initializing |r| until then.
int r_is_one = 1;
unsigned wstart = bits - 1; // The top bit of the window.
for (;;) {
@@ -808,8 +811,10 @@ int bn_mod_exp_mont_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,

assert(wvalue & 1);
assert(wvalue < (1u << window));
if (!bn_mod_mul_montgomery_small(r, num_r, r, num_r, val[wvalue >> 1],
num_n, mont)) {
if (r_is_one) {
OPENSSL_memcpy(r, val[wvalue >> 1], num_r * sizeof(BN_ULONG));
} else if (!bn_mod_mul_montgomery_small(r, num_r, r, num_r,
val[wvalue >> 1], num_n, mont)) {
goto err;
}

@@ -820,6 +825,8 @@ int bn_mod_exp_mont_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
wstart -= wsize + 1;
}

// |p| is non-zero, so |r_is_one| must be cleared at some point.
assert(!r_is_one);
ret = 1;

err:


+ 0
- 7
crypto/fipsmodule/bn/internal.h Целия файл

@@ -547,13 +547,6 @@ int bn_to_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
int bn_from_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
size_t num_a, const BN_MONT_CTX *mont);

// bn_one_to_montgomery_small sets |r| to one in Montgomery form. It returns one
// on success and zero on error. |num_r| must be the length of the modulus,
// which is |mont->N.top|. This function treats the bit width of the modulus as
// public.
int bn_one_to_montgomery_small(BN_ULONG *r, size_t num_r,
const BN_MONT_CTX *mont);

// bn_mod_mul_montgomery_small sets |r| to |a| * |b| mod |mont->N|. Both inputs
// and outputs are in the Montgomery domain. |num_r| must be the length of the
// modulus, which is |mont->N.top|. This function returns one on success and


+ 0
- 22
crypto/fipsmodule/bn/montgomery.c Целия файл

@@ -449,28 +449,6 @@ int bn_from_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
return ret;
}

int bn_one_to_montgomery_small(BN_ULONG *r, size_t num_r,
const BN_MONT_CTX *mont) {
const BN_ULONG *n = mont->N.d;
size_t num_n = mont->N.width;
if (num_n == 0 || num_r != num_n) {
OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
return 0;
}

// If the high bit of |n| is set, R = 2^(num_n*BN_BITS2) < 2 * |n|, so we
// compute R - |n| rather than perform Montgomery reduction.
if (num_n > 0 && (n[num_n - 1] >> (BN_BITS2 - 1)) != 0) {
r[0] = 0 - n[0];
for (size_t i = 1; i < num_n; i++) {
r[i] = ~n[i];
}
return 1;
}

return bn_from_montgomery_small(r, num_r, mont->RR.d, mont->RR.width, mont);
}

int bn_mod_mul_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
size_t num_a, const BN_ULONG *b, size_t num_b,
const BN_MONT_CTX *mont) {


Зареждане…
Отказ
Запис