WIP
Change-Id: I4c1365af614cb17578005ee45866173f201c732a
This commit is contained in:
parent
de62115a49
commit
70e42e8f74
82
third_party/sike/asm/fp-x86_64.pl
vendored
82
third_party/sike/asm/fp-x86_64.pl
vendored
@ -57,42 +57,43 @@ ___
|
||||
sub mul128x320_school {
|
||||
my ($idxM0,$M0,$M1,$T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7,$T8,$T9)=@_;
|
||||
my ($MUL0,$MUL8)=map("$idxM0+$_(%$M0)", (0,8));
|
||||
$code.=<<___;
|
||||
mov $MUL0, %rdx
|
||||
mulx 0+$M1, %$T0, %$T1 # T0 <- C0_final
|
||||
mulx 8+$M1, %$T4, %$T2
|
||||
my $body.=<<___;
|
||||
mov $MUL0, %rdx
|
||||
mulx 0+$M1, %$T0, %$T1 # T0 <- C0_final
|
||||
mulx 8+$M1, %$T4, %$T2
|
||||
|
||||
xor %rax, %rax
|
||||
mulx 16+$M1, %$T5, %$T3
|
||||
adox %$T4, %$T1
|
||||
adox %$T5, %$T2
|
||||
mulx 24+$M1, %$T7, %$T4
|
||||
adox %$T7, %$T3
|
||||
mulx 32+$M1, %$T6, %$T5
|
||||
adox %$T6, %$T4
|
||||
adox %rax, %$T5
|
||||
xor %rax, %rax
|
||||
mulx 16+$M1, %$T5, %$T3
|
||||
adox %$T4, %$T1
|
||||
adox %$T5, %$T2
|
||||
mulx 24+$M1, %$T7, %$T4
|
||||
adox %$T7, %$T3
|
||||
mulx 32+$M1, %$T6, %$T5
|
||||
adox %$T6, %$T4
|
||||
adox %rax, %$T5
|
||||
|
||||
mov $MUL8, %rdx
|
||||
mulx 0+$M1, %$T6, %$T7
|
||||
adcx %$T6, %$T1 # T1 <- C1_final
|
||||
adcx %$T7, %$T2
|
||||
mulx 8+$M1, %$T8, %$T6
|
||||
adcx %$T6, %$T3
|
||||
mulx 16+$M1, %$T7, %$T9
|
||||
adcx %$T9, %$T4
|
||||
mulx 24+$M1, %$T9, %$T6
|
||||
adcx %$T6, %$T5
|
||||
mulx 32+$M1, %rdx, %$T6
|
||||
adcx %rax, %$T6
|
||||
mov $MUL8, %rdx
|
||||
mulx 0+$M1, %$T6, %$T7
|
||||
adcx %$T6, %$T1 # T1 <- C1_final
|
||||
adcx %$T7, %$T2
|
||||
mulx 8+$M1, %$T8, %$T6
|
||||
adcx %$T6, %$T3
|
||||
mulx 16+$M1, %$T7, %$T9
|
||||
adcx %$T9, %$T4
|
||||
mulx 24+$M1, %$T9, %$T6
|
||||
adcx %$T6, %$T5
|
||||
mulx 32+$M1, %rdx, %$T6
|
||||
adcx %rax, %$T6
|
||||
|
||||
xor %rax, %rax
|
||||
adox %$T8, %$T2
|
||||
adox %$T7, %$T3
|
||||
adox %$T9, %$T4
|
||||
adox %rdx, %$T5
|
||||
adox %rax, %$T6
|
||||
xor %rax, %rax
|
||||
adox %$T8, %$T2
|
||||
adox %$T7, %$T3
|
||||
adox %$T9, %$T4
|
||||
adox %rdx, %$T5
|
||||
adox %rax, %$T6
|
||||
|
||||
___
|
||||
return $body;
|
||||
}
|
||||
|
||||
# Compute z = x + y (mod p).
|
||||
@ -840,12 +841,10 @@ ___
|
||||
# Operation: c [rdx] = a [rdi] * b [rsi]
|
||||
# NOTE: a=c or b=c are not allowed
|
||||
sub mul {
|
||||
if ($bmi2_adx) {
|
||||
my $jump_optim=&alt_impl(".Lmul_mulx");
|
||||
$body=&mul_mulx();
|
||||
}
|
||||
my $jump_optim.=&alt_impl(".Lmul_mulx") if ($bmi2_adx);
|
||||
my $body.=&mul_mulx() if ($bmi2_adx);
|
||||
|
||||
my $body.=<<___;
|
||||
$body.=<<___;
|
||||
.globl ${PREFIX}_mpmul
|
||||
.type ${PREFIX}_mpmul,\@function,3
|
||||
${PREFIX}_mpmul:
|
||||
@ -1349,7 +1348,7 @@ sub rdc_mulx {
|
||||
# a[6-7] x p503p1_nz --> result: r8:r14
|
||||
my $mul67=&mul128x320_school(48,"rdi","p503p1_nz(%rip)",map("r$_", (8..14)),"rbx","rcx","r15");
|
||||
|
||||
$body=<<___;
|
||||
my $body=<<___;
|
||||
.Lrdc_mulx_asm:
|
||||
.cfi_startproc
|
||||
# sike_fprdc has already pushed r12--15 and rbx by this point.
|
||||
@ -1483,6 +1482,7 @@ sub rdc_mulx {
|
||||
ret
|
||||
.cfi_endproc
|
||||
___
|
||||
return $body;
|
||||
}
|
||||
|
||||
# Montgomery reduction
|
||||
@ -1490,12 +1490,10 @@ ___
|
||||
# Operation: c [rsi] = a [rdi]
|
||||
# NOTE: a=c is not allowed
|
||||
sub rdc {
|
||||
if ($bmi2_adx) {
|
||||
my $jump_optim=&alt_impl(".Lrdc_mulx_asm");
|
||||
$body=&rdc_mulx();
|
||||
}
|
||||
my $jump_optim=&alt_impl(".Lrdc_mulx_asm") if ($bmi2_adx);
|
||||
my $body=&rdc_mulx() if ($bmi2_adx);
|
||||
|
||||
my $body.=<<___;
|
||||
$body.=<<___;
|
||||
.globl ${PREFIX}_fprdc
|
||||
.type ${PREFIX}_fprdc,\@function,3
|
||||
${PREFIX}_fprdc:
|
||||
|
Loading…
Reference in New Issue
Block a user