WIP
Change-Id: Ibe1eaf5ec006d270d8f08bd2bd1a877a340793fc
This commit is contained in:
parent
77f0c0b35d
commit
929a29ed0c
46
third_party/sike/asm/fp-x86_64.pl
vendored
46
third_party/sike/asm/fp-x86_64.pl
vendored
@ -20,7 +20,6 @@ open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
|
||||
*STDOUT=*OUT;
|
||||
|
||||
$PREFIX="sike";
|
||||
$addx = 1;
|
||||
$bmi2_adx = 1;
|
||||
|
||||
# Swaps 16-bytes pointed by %rdi and %rsi
|
||||
@ -42,22 +41,7 @@ ___
|
||||
}
|
||||
|
||||
sub MUL256_SCHOOL {
|
||||
my $idxM0 = shift;
|
||||
my $M0 = shift;
|
||||
my $idxM1 = shift;
|
||||
my $M1 = shift;
|
||||
my $idxDST = shift;
|
||||
my $DST = shift;
|
||||
my $T0 = shift;
|
||||
my $T1 = shift;
|
||||
my $T2 = shift;
|
||||
my $T3 = shift;
|
||||
my $T4 = shift;
|
||||
my $T5 = shift;
|
||||
my $T6 = shift;
|
||||
my $T7 = shift;
|
||||
my $T8 = shift;
|
||||
my $T9 = shift;
|
||||
my ($idxM0,$M0,$idxM1,$M1,$idxDST,$DST,$T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7,$T8,$T9)=@_;
|
||||
my ($ML0,$ML8,$ML16,$ML24)=map("$idxM0+$_(%$M0)",(0,8,16,24));
|
||||
my ($MR0,$MR8,$MR16,$MR24)=map("$idxM1+$_(%$M1)",(0,8,16,24));
|
||||
my ($D0,$D1,$D2,$D3,$D4,$D5,$D6,$D7)=map("$idxDST+$_(%$DST)",(0,8,16,24,32,40,48,56));
|
||||
@ -134,19 +118,7 @@ ___
|
||||
}
|
||||
|
||||
sub MUL128x320_SCHOOL {
|
||||
my $idxM0 = shift;
|
||||
my $M0 = shift;
|
||||
my $M1 = shift;
|
||||
my $T0 = shift;
|
||||
my $T1 = shift;
|
||||
my $T2 = shift;
|
||||
my $T3 = shift;
|
||||
my $T4 = shift;
|
||||
my $T5 = shift;
|
||||
my $T6 = shift;
|
||||
my $T7 = shift;
|
||||
my $T8 = shift;
|
||||
my $T9 = shift;
|
||||
my ($idxM0,$M0,$M1,$T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7,$T8,$T9)=@_;
|
||||
my ($MUL0,$MUL8)=map("$idxM0+$_(%$M0)", (0,8));
|
||||
$code.=<<___;
|
||||
mov $MUL0, %rdx
|
||||
@ -661,7 +633,7 @@ ${PREFIX}_mpdblsubx2_asm:
|
||||
# Montgomery multiplication
|
||||
# 503-bit multiplication using Karatsuba
|
||||
# (one level), schoolbook (one level)
|
||||
OZAPTFmul_mulx_asm:
|
||||
.Lmul_mulx_asm:
|
||||
.cfi_startproc
|
||||
# sike_mpmul has already pushed r12--15 by this point..cfi_adjust_cfa_offset 32
|
||||
.cfi_adjust_cfa_offset 32
|
||||
@ -738,13 +710,13 @@ OZAPTFmul_mulx_asm:
|
||||
___
|
||||
|
||||
# [rcx+64] <- (AH+AL) x (BH+BL), low part
|
||||
&MUL256_SCHOOL( 0, "rsp",32,"rsp",64,"rcx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "rbx", "rbp") if ($addx);
|
||||
&MUL256_SCHOOL(0,"rsp",32,"rsp",64,"rcx",map("r$_",(8..15)),"rbx","rbp") if ($bmi2_adx);
|
||||
# [rcx] <- AL x BL (Result c0-c3)
|
||||
&MUL256_SCHOOL( 0, "rdi", 0,"rsi", 0,"rcx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "rbx", "rbp") if ($addx);
|
||||
&MUL256_SCHOOL(0,"rdi",0,"rsi",0,"rcx",map("r$_",(8..15)),"rbx","rbp") if ($bmi2_adx);
|
||||
# [rsp] <- AH x BH
|
||||
&MUL256_SCHOOL(32, "rdi",32,"rsi", 0,"rsp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "rbx", "rbp") if ($addx);
|
||||
&MUL256_SCHOOL(32,"rdi",32,"rsi",0,"rsp",map("r$_",(8..15)),"rbx","rbp") if ($bmi2_adx);
|
||||
|
||||
$code.=<<___ if ($addx);
|
||||
$code.=<<___ if ($bmi2_adx);
|
||||
# r8-r11 <- (AH+AL) x (BH+BL), final step
|
||||
mov 0x40(%rsp), %r8
|
||||
mov 0x48(%rsp), %r9
|
||||
@ -864,12 +836,12 @@ ${PREFIX}_mpmul:
|
||||
|
||||
___
|
||||
|
||||
$code.=<<___ if ($addx);
|
||||
$code.=<<___ if ($bmi2_adx);
|
||||
lea OPENSSL_ia32cap_P(%rip), %rcx
|
||||
mov 8(%rcx), %rcx
|
||||
and \$0x80100, %ecx
|
||||
cmp \$0x80100, %ecx
|
||||
je OZAPTFmul_mulx_asm
|
||||
je .Lmul_mulx_asm
|
||||
|
||||
___
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user