Remove non-ASM version of |bn_mul_mont| in bn/generic.c.

When building in OPENSSL_NO_ASM mode, MSVC complains about unreachable
code. The redundant initialization of |i| is the main problem. The
skipping of the first test of the condition |i < num| with |goto| was
also confusing.

It turns out that |bn_mul_mont| is only called when assembly language
optimizations are available, but in that case the assmebly language
versions will always be used instead. Although this code will be
compiled in |OPENSSL_NO_ASM| builds, it is never called in
|OPENSSL_NO_ASM| builds. Thus, it can just be removed.

Change-Id: Id551899b2602824978edc1a1cb0703b76516808d
Reviewed-on: https://boringssl-review.googlesource.com/5550
Reviewed-by: Adam Langley <agl@google.com>
This commit is contained in:
Brian Smith 2015-07-26 16:25:26 -04:00 committed by Adam Langley
parent 59b0fccb51
commit 5d5e39f5d2

View File

@ -1022,110 +1022,4 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) {
r[7] = c2;
}
#if defined(OPENSSL_NO_ASM) || (!defined(OPENSSL_ARM) && !defined(OPENSSL_X86_64))
/* This is essentially reference implementation, which may or may not
* result in performance improvement. E.g. on IA-32 this routine was
* observed to give 40% faster rsa1024 private key operations and 10%
* faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only
* by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a
* reference implementation, one to be used as starting point for
* platform-specific assembler. Mentioned numbers apply to compiler
* generated code compiled with and without -DOPENSSL_BN_ASM_MONT and
* can vary not only from platform to platform, but even for compiler
* versions. Assembler vs. assembler improvement coefficients can
* [and are known to] differ and are to be documented elsewhere. */
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0p, int num) {
BN_ULONG c0, c1, ml, *tp, n0;
#ifdef mul64
BN_ULONG mh;
#endif
volatile BN_ULONG *vp;
int i = 0, j;
#if 0 /* template for platform-specific implementation */
if (ap==bp) return bn_sqr_mont(rp,ap,np,n0p,num);
#endif
vp = tp = alloca((num + 2) * sizeof(BN_ULONG));
n0 = *n0p;
c0 = 0;
ml = bp[0];
#ifdef mul64
mh = HBITS(ml);
ml = LBITS(ml);
for (j = 0; j < num; ++j) {
mul(tp[j], ap[j], ml, mh, c0);
}
#else
for (j = 0; j < num; ++j) {
mul(tp[j], ap[j], ml, c0);
}
#endif
tp[num] = c0;
tp[num + 1] = 0;
goto enter;
for (; i < num; i++) {
c0 = 0;
ml = bp[i];
#ifdef mul64
mh = HBITS(ml);
ml = LBITS(ml);
for (j = 0; j < num; ++j) {
mul_add(tp[j], ap[j], ml, mh, c0);
}
#else
for (j = 0; j < num; ++j) {
mul_add(tp[j], ap[j], ml, c0);
}
#endif
c1 = (tp[num] + c0) & BN_MASK2;
tp[num] = c1;
tp[num + 1] = (c1 < c0 ? 1 : 0);
enter:
c1 = tp[0];
ml = (c1 * n0) & BN_MASK2;
c0 = 0;
#ifdef mul64
mh = HBITS(ml);
ml = LBITS(ml);
mul_add(c1, np[0], ml, mh, c0);
#else
mul_add(c1, ml, np[0], c0);
#endif
for (j = 1; j < num; j++) {
c1 = tp[j];
#ifdef mul64
mul_add(c1, np[j], ml, mh, c0);
#else
mul_add(c1, ml, np[j], c0);
#endif
tp[j - 1] = c1 & BN_MASK2;
}
c1 = (tp[num] + c0) & BN_MASK2;
tp[num - 1] = c1;
tp[num] = tp[num + 1] + (c1 < c0 ? 1 : 0);
}
if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) {
c0 = bn_sub_words(rp, tp, np, num);
if (tp[num] != 0 || c0 == 0) {
for (i = 0; i < num + 2; i++) {
vp[i] = 0;
}
return 1;
}
}
for (i = 0; i < num; i++) {
rp[i] = tp[i], vp[i] = 0;
}
vp[num] = 0;
vp[num + 1] = 0;
return 1;
}
#endif
#endif