Enable AVX2 and ADX in p256-x86_64-asm.pl.
We can test these with Intel SDE now. The AVX2 code just affects the two select functions while the ADX code is a separate implementation. Haswell numbers: Before: Did 84630 ECDH P-256 operations in 10031494us (8436.4 ops/sec) Did 206000 ECDSA P-256 signing operations in 10015055us (20569.0 ops/sec) Did 77256 ECDSA P-256 verify operations in 10064556us (7676.0 ops/sec) After: Did 86112 ECDH P-256 operations in 10015008us (8598.3 ops/sec) Did 211000 ECDSA P-256 signing operations in 10025104us (21047.2 ops/sec) Did 79344 ECDSA P-256 verify operations in 10017076us (7920.9 ops/sec) Skylake numbers: Before: Did 75684 ECDH P-256 operations in 10016019us (7556.3 ops/sec) Did 185000 ECDSA P-256 signing operations in 10012090us (18477.7 ops/sec) Did 72885 ECDSA P-256 verify operations in 10027154us (7268.8 ops/sec) After: Did 89598 ECDH P-256 operations in 10032162us (8931.1 ops/sec) Did 203000 ECDSA P-256 signing operations in 10019739us (20260.0 ops/sec) Did 87040 ECDSA P-256 verify operations in 10000441us (8703.6 ops/sec) The code was slightly patched for delocate.go compatibility. Change-Id: Ic44ced4eca65c656bbe07d5a7fee91ec6925eb59 Reviewed-on: https://boringssl-review.googlesource.com/18967 Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: David Benjamin <davidben@google.com>
This commit is contained in:
parent
488ca0eace
commit
78f5e75739
@ -54,9 +54,8 @@ die "can't locate x86_64-xlate.pl";
|
|||||||
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
|
open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
|
||||||
*STDOUT=*OUT;
|
*STDOUT=*OUT;
|
||||||
|
|
||||||
# TODO: enable these after testing. $avx goes to two and $addx to one.
|
$avx = 2;
|
||||||
$avx=0;
|
$addx = 1;
|
||||||
$addx=0;
|
|
||||||
|
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
.text
|
.text
|
||||||
@ -150,8 +149,9 @@ $code.=<<___;
|
|||||||
ecp_nistz256_mul_mont:
|
ecp_nistz256_mul_mont:
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($addx);
|
$code.=<<___ if ($addx);
|
||||||
mov \$0x80100, %ecx
|
leaq OPENSSL_ia32cap_P(%rip), %rcx
|
||||||
and OPENSSL_ia32cap_P+8(%rip), %ecx
|
mov 8(%rcx), %rcx
|
||||||
|
and \$0x80100, %ecx
|
||||||
___
|
___
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
.Lmul_mont:
|
.Lmul_mont:
|
||||||
@ -431,8 +431,9 @@ __ecp_nistz256_mul_montq:
|
|||||||
ecp_nistz256_sqr_mont:
|
ecp_nistz256_sqr_mont:
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($addx);
|
$code.=<<___ if ($addx);
|
||||||
mov \$0x80100, %ecx
|
leaq OPENSSL_ia32cap_P(%rip), %rcx
|
||||||
and OPENSSL_ia32cap_P+8(%rip), %ecx
|
mov 8(%rcx), %rcx
|
||||||
|
and \$0x80100, %ecx
|
||||||
___
|
___
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
push %rbp
|
push %rbp
|
||||||
@ -955,7 +956,8 @@ $code.=<<___;
|
|||||||
ecp_nistz256_select_w5:
|
ecp_nistz256_select_w5:
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($avx>1);
|
$code.=<<___ if ($avx>1);
|
||||||
mov OPENSSL_ia32cap_P+8(%rip), %eax
|
leaq OPENSSL_ia32cap_P(%rip), %rax
|
||||||
|
mov 8(%rax), %rax
|
||||||
test \$`1<<5`, %eax
|
test \$`1<<5`, %eax
|
||||||
jnz .Lavx2_select_w5
|
jnz .Lavx2_select_w5
|
||||||
___
|
___
|
||||||
@ -1052,7 +1054,8 @@ $code.=<<___;
|
|||||||
ecp_nistz256_select_w7:
|
ecp_nistz256_select_w7:
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($avx>1);
|
$code.=<<___ if ($avx>1);
|
||||||
mov OPENSSL_ia32cap_P+8(%rip), %eax
|
leaq OPENSSL_ia32cap_P(%rip), %rax
|
||||||
|
mov 8(%rax), %rax
|
||||||
test \$`1<<5`, %eax
|
test \$`1<<5`, %eax
|
||||||
jnz .Lavx2_select_w7
|
jnz .Lavx2_select_w7
|
||||||
___
|
___
|
||||||
@ -1555,8 +1558,9 @@ $code.=<<___;
|
|||||||
ecp_nistz256_point_double:
|
ecp_nistz256_point_double:
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($addx);
|
$code.=<<___ if ($addx);
|
||||||
mov \$0x80100, %ecx
|
leaq OPENSSL_ia32cap_P(%rip), %rcx
|
||||||
and OPENSSL_ia32cap_P+8(%rip), %ecx
|
mov 8(%rcx), %rcx
|
||||||
|
and \$0x80100, %ecx
|
||||||
cmp \$0x80100, %ecx
|
cmp \$0x80100, %ecx
|
||||||
je .Lpoint_doublex
|
je .Lpoint_doublex
|
||||||
___
|
___
|
||||||
@ -1785,8 +1789,9 @@ $code.=<<___;
|
|||||||
ecp_nistz256_point_add:
|
ecp_nistz256_point_add:
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($addx);
|
$code.=<<___ if ($addx);
|
||||||
mov \$0x80100, %ecx
|
leaq OPENSSL_ia32cap_P(%rip), %rcx
|
||||||
and OPENSSL_ia32cap_P+8(%rip), %ecx
|
mov 8(%rcx), %rcx
|
||||||
|
and \$0x80100, %ecx
|
||||||
cmp \$0x80100, %ecx
|
cmp \$0x80100, %ecx
|
||||||
je .Lpoint_addx
|
je .Lpoint_addx
|
||||||
___
|
___
|
||||||
@ -2152,8 +2157,9 @@ $code.=<<___;
|
|||||||
ecp_nistz256_point_add_affine:
|
ecp_nistz256_point_add_affine:
|
||||||
___
|
___
|
||||||
$code.=<<___ if ($addx);
|
$code.=<<___ if ($addx);
|
||||||
mov \$0x80100, %ecx
|
leaq OPENSSL_ia32cap_P(%rip), %rcx
|
||||||
and OPENSSL_ia32cap_P+8(%rip), %ecx
|
mov 8(%rcx), %rcx
|
||||||
|
and \$0x80100, %ecx
|
||||||
cmp \$0x80100, %ecx
|
cmp \$0x80100, %ecx
|
||||||
je .Lpoint_add_affinex
|
je .Lpoint_add_affinex
|
||||||
___
|
___
|
||||||
|
Loading…
Reference in New Issue
Block a user