Enable AVX2 and ADX in p256-x86_64-asm.pl.
We can test these with Intel SDE now. The AVX2 code just affects the two select functions while the ADX code is a separate implementation. Haswell numbers: Before: Did 84630 ECDH P-256 operations in 10031494us (8436.4 ops/sec) Did 206000 ECDSA P-256 signing operations in 10015055us (20569.0 ops/sec) Did 77256 ECDSA P-256 verify operations in 10064556us (7676.0 ops/sec) After: Did 86112 ECDH P-256 operations in 10015008us (8598.3 ops/sec) Did 211000 ECDSA P-256 signing operations in 10025104us (21047.2 ops/sec) Did 79344 ECDSA P-256 verify operations in 10017076us (7920.9 ops/sec) Skylake numbers: Before: Did 75684 ECDH P-256 operations in 10016019us (7556.3 ops/sec) Did 185000 ECDSA P-256 signing operations in 10012090us (18477.7 ops/sec) Did 72885 ECDSA P-256 verify operations in 10027154us (7268.8 ops/sec) After: Did 89598 ECDH P-256 operations in 10032162us (8931.1 ops/sec) Did 203000 ECDSA P-256 signing operations in 10019739us (20260.0 ops/sec) Did 87040 ECDSA P-256 verify operations in 10000441us (8703.6 ops/sec) The code was slightly patched for delocate.go compatibility. Change-Id: Ic44ced4eca65c656bbe07d5a7fee91ec6925eb59 Reviewed-on: https://boringssl-review.googlesource.com/18967 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: David Benjamin <davidben@google.com>
This commit is contained in:
parent
488ca0eace
commit
78f5e75739
@ -54,9 +54,8 @@ die "can't locate x86_64-xlate.pl";
|
||||
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
# TODO: enable these after testing. $avx goes to two and $addx to one.
|
||||
$avx=0;
|
||||
$addx=0;
|
||||
$avx = 2;
|
||||
$addx = 1;
|
||||
|
||||
$code.=<<___;
|
||||
.text
|
||||
@ -150,8 +149,9 @@ $code.=<<___;
|
||||
ecp_nistz256_mul_mont:
|
||||
___
|
||||
$code.=<<___ if ($addx);
|
||||
mov \$0x80100, %ecx
|
||||
and OPENSSL_ia32cap_P+8(%rip), %ecx
|
||||
leaq OPENSSL_ia32cap_P(%rip), %rcx
|
||||
mov 8(%rcx), %rcx
|
||||
and \$0x80100, %ecx
|
||||
___
|
||||
$code.=<<___;
|
||||
.Lmul_mont:
|
||||
@ -431,8 +431,9 @@ __ecp_nistz256_mul_montq:
|
||||
ecp_nistz256_sqr_mont:
|
||||
___
|
||||
$code.=<<___ if ($addx);
|
||||
mov \$0x80100, %ecx
|
||||
and OPENSSL_ia32cap_P+8(%rip), %ecx
|
||||
leaq OPENSSL_ia32cap_P(%rip), %rcx
|
||||
mov 8(%rcx), %rcx
|
||||
and \$0x80100, %ecx
|
||||
___
|
||||
$code.=<<___;
|
||||
push %rbp
|
||||
@ -955,7 +956,8 @@ $code.=<<___;
|
||||
ecp_nistz256_select_w5:
|
||||
___
|
||||
$code.=<<___ if ($avx>1);
|
||||
mov OPENSSL_ia32cap_P+8(%rip), %eax
|
||||
leaq OPENSSL_ia32cap_P(%rip), %rax
|
||||
mov 8(%rax), %rax
|
||||
test \$`1<<5`, %eax
|
||||
jnz .Lavx2_select_w5
|
||||
___
|
||||
@ -1052,7 +1054,8 @@ $code.=<<___;
|
||||
ecp_nistz256_select_w7:
|
||||
___
|
||||
$code.=<<___ if ($avx>1);
|
||||
mov OPENSSL_ia32cap_P+8(%rip), %eax
|
||||
leaq OPENSSL_ia32cap_P(%rip), %rax
|
||||
mov 8(%rax), %rax
|
||||
test \$`1<<5`, %eax
|
||||
jnz .Lavx2_select_w7
|
||||
___
|
||||
@ -1555,8 +1558,9 @@ $code.=<<___;
|
||||
ecp_nistz256_point_double:
|
||||
___
|
||||
$code.=<<___ if ($addx);
|
||||
mov \$0x80100, %ecx
|
||||
and OPENSSL_ia32cap_P+8(%rip), %ecx
|
||||
leaq OPENSSL_ia32cap_P(%rip), %rcx
|
||||
mov 8(%rcx), %rcx
|
||||
and \$0x80100, %ecx
|
||||
cmp \$0x80100, %ecx
|
||||
je .Lpoint_doublex
|
||||
___
|
||||
@ -1785,8 +1789,9 @@ $code.=<<___;
|
||||
ecp_nistz256_point_add:
|
||||
___
|
||||
$code.=<<___ if ($addx);
|
||||
mov \$0x80100, %ecx
|
||||
and OPENSSL_ia32cap_P+8(%rip), %ecx
|
||||
leaq OPENSSL_ia32cap_P(%rip), %rcx
|
||||
mov 8(%rcx), %rcx
|
||||
and \$0x80100, %ecx
|
||||
cmp \$0x80100, %ecx
|
||||
je .Lpoint_addx
|
||||
___
|
||||
@ -2152,8 +2157,9 @@ $code.=<<___;
|
||||
ecp_nistz256_point_add_affine:
|
||||
___
|
||||
$code.=<<___ if ($addx);
|
||||
mov \$0x80100, %ecx
|
||||
and OPENSSL_ia32cap_P+8(%rip), %ecx
|
||||
leaq OPENSSL_ia32cap_P(%rip), %rcx
|
||||
mov 8(%rcx), %rcx
|
||||
and \$0x80100, %ecx
|
||||
cmp \$0x80100, %ecx
|
||||
je .Lpoint_add_affinex
|
||||
___
|
||||
|
Loading…
Reference in New Issue
Block a user