Change-Id: I4c1365af614cb17578005ee45866173f201c732a
This commit is contained in:
Henry Case 2019-04-20 23:50:22 +01:00
parent de62115a49
commit 70e42e8f74

View File

@ -57,42 +57,43 @@ ___
sub mul128x320_school {
my ($idxM0,$M0,$M1,$T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7,$T8,$T9)=@_;
my ($MUL0,$MUL8)=map("$idxM0+$_(%$M0)", (0,8));
$code.=<<___;
mov $MUL0, %rdx
mulx 0+$M1, %$T0, %$T1 # T0 <- C0_final
mulx 8+$M1, %$T4, %$T2
my $body.=<<___;
mov $MUL0, %rdx
mulx 0+$M1, %$T0, %$T1 # T0 <- C0_final
mulx 8+$M1, %$T4, %$T2
xor %rax, %rax
mulx 16+$M1, %$T5, %$T3
adox %$T4, %$T1
adox %$T5, %$T2
mulx 24+$M1, %$T7, %$T4
adox %$T7, %$T3
mulx 32+$M1, %$T6, %$T5
adox %$T6, %$T4
adox %rax, %$T5
xor %rax, %rax
mulx 16+$M1, %$T5, %$T3
adox %$T4, %$T1
adox %$T5, %$T2
mulx 24+$M1, %$T7, %$T4
adox %$T7, %$T3
mulx 32+$M1, %$T6, %$T5
adox %$T6, %$T4
adox %rax, %$T5
mov $MUL8, %rdx
mulx 0+$M1, %$T6, %$T7
adcx %$T6, %$T1 # T1 <- C1_final
adcx %$T7, %$T2
mulx 8+$M1, %$T8, %$T6
adcx %$T6, %$T3
mulx 16+$M1, %$T7, %$T9
adcx %$T9, %$T4
mulx 24+$M1, %$T9, %$T6
adcx %$T6, %$T5
mulx 32+$M1, %rdx, %$T6
adcx %rax, %$T6
mov $MUL8, %rdx
mulx 0+$M1, %$T6, %$T7
adcx %$T6, %$T1 # T1 <- C1_final
adcx %$T7, %$T2
mulx 8+$M1, %$T8, %$T6
adcx %$T6, %$T3
mulx 16+$M1, %$T7, %$T9
adcx %$T9, %$T4
mulx 24+$M1, %$T9, %$T6
adcx %$T6, %$T5
mulx 32+$M1, %rdx, %$T6
adcx %rax, %$T6
xor %rax, %rax
adox %$T8, %$T2
adox %$T7, %$T3
adox %$T9, %$T4
adox %rdx, %$T5
adox %rax, %$T6
xor %rax, %rax
adox %$T8, %$T2
adox %$T7, %$T3
adox %$T9, %$T4
adox %rdx, %$T5
adox %rax, %$T6
___
return $body;
}
# Compute z = x + y (mod p).
@ -840,12 +841,10 @@ ___
# Operation: c [rdx] = a [rdi] * b [rsi]
# NOTE: a=c or b=c are not allowed
sub mul {
if ($bmi2_adx) {
my $jump_optim=&alt_impl(".Lmul_mulx");
$body=&mul_mulx();
}
my $jump_optim.=&alt_impl(".Lmul_mulx") if ($bmi2_adx);
my $body.=&mul_mulx() if ($bmi2_adx);
my $body.=<<___;
$body.=<<___;
.globl ${PREFIX}_mpmul
.type ${PREFIX}_mpmul,\@function,3
${PREFIX}_mpmul:
@ -1349,7 +1348,7 @@ sub rdc_mulx {
# a[6-7] x p503p1_nz --> result: r8:r14
my $mul67=&mul128x320_school(48,"rdi","p503p1_nz(%rip)",map("r$_", (8..14)),"rbx","rcx","r15");
$body=<<___;
my $body=<<___;
.Lrdc_mulx_asm:
.cfi_startproc
# sike_fprdc has already pushed r12--15 and rbx by this point.
@ -1483,6 +1482,7 @@ sub rdc_mulx {
ret
.cfi_endproc
___
return $body;
}
# Montgomery reduction
@ -1490,12 +1490,10 @@ ___
# Operation: c [rsi] = a [rdi]
# NOTE: a=c is not allowed
sub rdc {
if ($bmi2_adx) {
my $jump_optim=&alt_impl(".Lrdc_mulx_asm");
$body=&rdc_mulx();
}
my $jump_optim=&alt_impl(".Lrdc_mulx_asm") if ($bmi2_adx);
my $body=&rdc_mulx() if ($bmi2_adx);
my $body.=<<___;
$body.=<<___;
.globl ${PREFIX}_fprdc
.type ${PREFIX}_fprdc,\@function,3
${PREFIX}_fprdc: