Make copy_from_prebuf constant time.

(Imported from upstream's 708dc2f1291e104fe4eef810bb8ffc1fae5b19c1.)

Performance penalty varies from platform to platform, and even key
length. For rsa2048 sign it was observed to reach almost 10%.

This is part of the fix for CVE-2016-0702.

Change-Id: Ie0860bf3e531196f03102db1bc48eeaf30ab1d58
Reviewed-on: https://boringssl-review.googlesource.com/7241
Reviewed-by: Adam Langley <agl@google.com>
This commit is contained in:
Adam Langley 2016-03-01 07:54:10 -08:00
parent aeb69a02b8
commit 82bdaa89f0

View File

@ -788,29 +788,65 @@ err:
* pattern as far as cache lines are concerned. The following functions are
* used to transfer a BIGNUM from/to that table. */
static int copy_to_prebuf(const BIGNUM *b, int top, unsigned char *buf, int idx,
int width) {
size_t i, j;
int window) {
int i, j;
const int width = 1 << window;
BN_ULONG *table = (BN_ULONG *) buf;
if (top > b->top) {
top = b->top; /* this works because 'buf' is explicitly zeroed */
}
for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
buf[j] = ((unsigned char *)b->d)[i];
for (i = 0, j = idx; i < top; i++, j += width) {
table[j] = b->d[i];
}
return 1;
}
static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx,
int width) {
size_t i, j;
int window) {
int i, j;
const int width = 1 << window;
volatile BN_ULONG *table = (volatile BN_ULONG *)buf;
if (bn_wexpand(b, top) == NULL) {
return 0;
}
for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
((unsigned char *)b->d)[i] = buf[j];
if (window <= 3) {
for (i = 0; i < top; i++, table += width) {
BN_ULONG acc = 0;
for (j = 0; j < width; j++) {
acc |= table[j] & ((BN_ULONG)0 - (constant_time_eq_int(j, idx) & 1));
}
b->d[i] = acc;
}
} else {
int xstride = 1 << (window - 2);
BN_ULONG y0, y1, y2, y3;
i = idx >> (window - 2); /* equivalent of idx / xstride */
idx &= xstride - 1; /* equivalent of idx % xstride */
y0 = (BN_ULONG)0 - (constant_time_eq_int(i, 0) & 1);
y1 = (BN_ULONG)0 - (constant_time_eq_int(i, 1) & 1);
y2 = (BN_ULONG)0 - (constant_time_eq_int(i, 2) & 1);
y3 = (BN_ULONG)0 - (constant_time_eq_int(i, 3) & 1);
for (i = 0; i < top; i++, table += width) {
BN_ULONG acc = 0;
for (j = 0; j < xstride; j++) {
acc |= ((table[j + 0 * xstride] & y0) | (table[j + 1 * xstride] & y1) |
(table[j + 2 * xstride] & y2) | (table[j + 3 * xstride] & y3)) &
((BN_ULONG)0 - (constant_time_eq_int(j, idx) & 1));
}
b->d[i] = acc;
}
}
b->top = top;
@ -1129,8 +1165,8 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
} else
#endif
{
if (!copy_to_prebuf(&tmp, top, powerbuf, 0, numPowers) ||
!copy_to_prebuf(&am, top, powerbuf, 1, numPowers)) {
if (!copy_to_prebuf(&tmp, top, powerbuf, 0, window) ||
!copy_to_prebuf(&am, top, powerbuf, 1, window)) {
goto err;
}
@ -1141,13 +1177,13 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
*/
if (window > 1) {
if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx) ||
!copy_to_prebuf(&tmp, top, powerbuf, 2, numPowers)) {
!copy_to_prebuf(&tmp, top, powerbuf, 2, window)) {
goto err;
}
for (i = 3; i < numPowers; i++) {
/* Calculate a^i = a^(i-1) * a */
if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx) ||
!copy_to_prebuf(&tmp, top, powerbuf, i, numPowers)) {
!copy_to_prebuf(&tmp, top, powerbuf, i, window)) {
goto err;
}
}
@ -1157,7 +1193,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) {
wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
}
if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, numPowers)) {
if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, window)) {
goto err;
}
@ -1176,7 +1212,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
}
/* Fetch the appropriate pre-computed value from the pre-buf */
if (!copy_from_prebuf(&am, top, powerbuf, wvalue, numPowers)) {
if (!copy_from_prebuf(&am, top, powerbuf, wvalue, window)) {
goto err;
}