move check for AD*X to rsaz-avx2.pl.

This ensures high performance is situations when assembler supports
AVX2, but not AD*X.

(Imported from upstream's 82a9dafe32e1e39b5adff18f9061e43d8df3d3c5)

Change-Id: Ie67f49a1c5467807139b6a8a0d4e62162d8a974f
This commit is contained in:
Adam Langley 2014-07-24 16:05:32 -07:00
parent 05b7377065
commit 25ba90e34a
2 changed files with 13 additions and 2 deletions

View File

@ -78,16 +78,19 @@ die "can't locate x86_64-xlate.pl";
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1` if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
=~ /GNU assembler version ([2-9]\.[0-9]+)/) { =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
$avx = ($1>=2.19) + ($1>=2.22); $avx = ($1>=2.19) + ($1>=2.22);
$addx = ($1>=2.23);
} }
if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) && if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
`nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) { `nasm -v 2>&1` =~ /NASM version ([2-9]\.[0-9]+)/) {
$avx = ($1>=2.09) + ($1>=2.10); $avx = ($1>=2.09) + ($1>=2.10);
$addx = ($1>=2.10);
} }
if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
`ml64 2>&1` =~ /Version ([0-9]+)\./) { `ml64 2>&1` =~ /Version ([0-9]+)\./) {
$avx = ($1>=10) + ($1>=11); $avx = ($1>=10) + ($1>=11);
$addx = ($1>=11);
} }
open OUT,"| $^X $xlate $flavour $output"; open OUT,"| $^X $xlate $flavour $output";
@ -1673,6 +1676,15 @@ $code.=<<___;
.align 32 .align 32
rsaz_avx2_eligible: rsaz_avx2_eligible:
mov OPENSSL_ia32cap_P+8(%rip),%eax mov OPENSSL_ia32cap_P+8(%rip),%eax
___
$code.=<<___ if ($addx);
mov \$`1<<8|1<<19`,%ecx
mov \$0,%edx
and %eax,%ecx
cmp \$`1<<8|1<<19`,%ecx # check for BMI2+AD*X
cmove %edx,%eax
___
$code.=<<___;
and \$`1<<5`,%eax and \$`1<<5`,%eax
shr \$5,%eax shr \$5,%eax
ret ret

View File

@ -890,8 +890,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
/* If the size of the operands allow it, perform the optimized /* If the size of the operands allow it, perform the optimized
* RSAZ exponentiation. For further information see * RSAZ exponentiation. For further information see
* crypto/bn/rsaz_exp.c and accompanying assembly modules. */ * crypto/bn/rsaz_exp.c and accompanying assembly modules. */
if (((OPENSSL_ia32cap_P[2] & 0x80100) != 0x80100) /* check for MULX/AD*X */ if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) &&
&& (16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) &&
rsaz_avx2_eligible()) { rsaz_avx2_eligible()) {
if (NULL == bn_wexpand(rr, 16)) if (NULL == bn_wexpand(rr, 16))
goto err; goto err;