Change-Id: I714b2727fb54824d0a3025ec6f4ad4a5e5d64187
This commit is contained in:
Henry Case 2019-04-24 18:55:23 +01:00
parent 16012c0b1d
commit 4674a7ec65

View File

@ -296,45 +296,47 @@ ___
}
$code.=&mul();
# Computes C0-C4 = (A0-A1) * (B0-B3)
# Inputs remain intact
sub mul128x256_comba {
my ($A0,$A1,$B0,$B1,$B2,$B3,$C1,$C2,$C3,$C4,$C5,$T0,$T1,$T2,$T3)=@_;
my ($A0,$A1,$B0,$B1,$B2,$B3,$C0,$C1,$C2,$C3,$C4,$T0,$T1,$T2,$T3)=@_;
my $body=<<___;
mul $T0, $A1, $B0
umulh $T1, $A1, $B0
adds $C1, $C1, $C3
adc $C2, $C2, xzr
adds $C0, $C0, $C2
adc $C1, $C1, xzr
mul $T2, $A0, $B2
umulh $T3, $A0, $B2
adds $C1, $C1, $T0
adcs $C2, $C2, $T1
adc $C3, xzr, xzr
adds $C0, $C0, $T0
adcs $C1, $C1, $T1
adc $C2, xzr, xzr
mul $T0, $A1, $B1
umulh $T1, $A1, $B1
adds $C1, $C1, $T2
adcs $C2, $C2, $T3
adc $C3, xzr, xzr
mul $T2, $A0, $B3
umulh $T3, $A0, $B3
adds $C1, $C1, $T0
adcs $C2, $C2, $T1
adc $C3, $C3, xzr
mul $T0, $A1, $B2
umulh $T1, $A1, $B2
adds $C2, $C2, $T2
adcs $C3, $C3, $T3
adc $C4, xzr, xzr
mul $T2, $A0, $B3
umulh $T3, $A0, $B3
mul $T2, $A1, $B3
umulh $T3, $A1, $B3
adds $C2, $C2, $T0
adcs $C3, $C3, $T1
adc $C4, $C4, xzr
mul $T0, $A1, $B2
umulh $T1, $A1, $B2
adds $C3, $C3, $T2
adcs $C4, $C4, $T3
adc $C5, xzr, xzr
mul $T2, $A1, $B3
umulh $T3, $A1, $B3
adds $C3, $C3, $T0
adcs $C4, $C4, $T1
adc $C5, $C5, xzr
adds $C4, $C4, $T2
adc $C5, $C5, $T3
adc $C4, $C4, $T3
___
return $body;
@ -345,10 +347,10 @@ ___
# Operation: mc [x1] = ma [x0]
# NOTE: ma=mc is not allowed
sub rdc {
my $mul01=&mul128x256_comba("x2","x3","x24","x25","x26","x27","x5","x6","x7","x8","x9","x28","x29","x30","x10");
my $mul23=&mul128x256_comba("x3","x11","x24","x25","x26","x27","x5","x6","x7","x8","x9","x28","x29","x30","x10");
my $mul45=&mul128x256_comba("x12","x13","x24","x25","x26","x27","x5","x6","x7","x8","x9","x28","x29","x30","x10");
my $mul67=&mul128x256_comba("x14","x15","x24","x25","x26","x27","x5","x6","x7","x8","x9","x28","x29","x30","x10");
my $mul01=&mul128x256_comba("x2","x3","x24","x25","x26","x27","x5","x6","x7","x8","x9","x28","x29","x1","x10");
my $mul23=&mul128x256_comba("x3","x11","x24","x25","x26","x27","x5","x6","x7","x8","x9","x28","x29","x1","x10");
my $mul45=&mul128x256_comba("x12","x13","x24","x25","x26","x27","x5","x6","x7","x8","x9","x28","x29","x1","x10");
my $mul67=&mul128x256_comba("x14","x15","x24","x25","x26","x27","x5","x6","x7","x8","x9","x28","x29","x1","x10");
my $body=<<___;
.global ${PREFIX}_fprdc
.align 4
@ -401,12 +403,12 @@ sub rdc {
adcs x14, x7, x14
adcs x15, x8, x15
ldp x16, x17, [x0,#64]
ldp x18, x19, [x0,#80]
ldp x30, x19, [x0,#80]
mul x4, x3, x24 // a[2] x .Lp503p1_nz_s8[0]
umulh x7, x3, x24
adcs x16, x9, x16
adcs x17, x10, x17
adcs x18, xzr, x18
adcs x30, xzr, x30
adcs x19, xzr, x19
ldp x20, x21, [x0,#96]
ldp x22, x23, [x0,#112]
@ -440,7 +442,7 @@ sub rdc {
mul x4, x12, x24 // a[4] x .Lp503p1_nz_s8[0]
umulh x7, x12, x24
adcs x17, x8, x17
adcs x18, x9, x18
adcs x30, x9, x30
adcs x19, x10, x19
adcs x20, xzr, x20
mul x5, x12, x25 // a[4] x .Lp503p1_nz_s8[1]
@ -468,7 +470,7 @@ sub rdc {
adds x15, x4, x15 // a[7]
adcs x16, x5, x16 // a[8]
adcs x17, x6, x17
adcs x18, x7, x18
adcs x30, x7, x30
mul x4, x14, x24 // a[6] x .Lp503p1_nz_s8[0]
umulh x7, x14, x24
adcs x19, x8, x19
@ -496,12 +498,12 @@ sub rdc {
lsl x4, x4, #56
adds x17, x4, x17
adcs x18, x5, x18
adcs x30, x5, x30
ldr x1, [sp,#96]
adcs x19, x6, x19
adcs x20, x7, x20
stp x16, x17, [x1,#0] // Final result
stp x18, x19, [x1,#16]
stp x30, x19, [x1,#16]
adcs x21, x8, x21
adcs x22, x9, x22
adc x23, x10, x23