WIP6
Change-Id: I714b2727fb54824d0a3025ec6f4ad4a5e5d64187
This commit is contained in:
parent
16012c0b1d
commit
4674a7ec65
64
third_party/sike/asm/fp-armv8.pl
vendored
64
third_party/sike/asm/fp-armv8.pl
vendored
@ -296,45 +296,47 @@ ___
|
|||||||
}
|
}
|
||||||
$code.=&mul();
|
$code.=&mul();
|
||||||
|
|
||||||
|
# Computes C0-C4 = (A0-A1) * (B0-B3)
|
||||||
|
# Inputs remain intact
|
||||||
sub mul128x256_comba {
|
sub mul128x256_comba {
|
||||||
my ($A0,$A1,$B0,$B1,$B2,$B3,$C1,$C2,$C3,$C4,$C5,$T0,$T1,$T2,$T3)=@_;
|
my ($A0,$A1,$B0,$B1,$B2,$B3,$C0,$C1,$C2,$C3,$C4,$T0,$T1,$T2,$T3)=@_;
|
||||||
my $body=<<___;
|
my $body=<<___;
|
||||||
mul $T0, $A1, $B0
|
mul $T0, $A1, $B0
|
||||||
umulh $T1, $A1, $B0
|
umulh $T1, $A1, $B0
|
||||||
adds $C1, $C1, $C3
|
adds $C0, $C0, $C2
|
||||||
adc $C2, $C2, xzr
|
adc $C1, $C1, xzr
|
||||||
|
|
||||||
mul $T2, $A0, $B2
|
mul $T2, $A0, $B2
|
||||||
umulh $T3, $A0, $B2
|
umulh $T3, $A0, $B2
|
||||||
adds $C1, $C1, $T0
|
adds $C0, $C0, $T0
|
||||||
adcs $C2, $C2, $T1
|
adcs $C1, $C1, $T1
|
||||||
adc $C3, xzr, xzr
|
adc $C2, xzr, xzr
|
||||||
|
|
||||||
mul $T0, $A1, $B1
|
mul $T0, $A1, $B1
|
||||||
umulh $T1, $A1, $B1
|
umulh $T1, $A1, $B1
|
||||||
|
adds $C1, $C1, $T2
|
||||||
|
adcs $C2, $C2, $T3
|
||||||
|
adc $C3, xzr, xzr
|
||||||
|
|
||||||
|
mul $T2, $A0, $B3
|
||||||
|
umulh $T3, $A0, $B3
|
||||||
|
adds $C1, $C1, $T0
|
||||||
|
adcs $C2, $C2, $T1
|
||||||
|
adc $C3, $C3, xzr
|
||||||
|
|
||||||
|
mul $T0, $A1, $B2
|
||||||
|
umulh $T1, $A1, $B2
|
||||||
adds $C2, $C2, $T2
|
adds $C2, $C2, $T2
|
||||||
adcs $C3, $C3, $T3
|
adcs $C3, $C3, $T3
|
||||||
adc $C4, xzr, xzr
|
adc $C4, xzr, xzr
|
||||||
|
|
||||||
mul $T2, $A0, $B3
|
mul $T2, $A1, $B3
|
||||||
umulh $T3, $A0, $B3
|
umulh $T3, $A1, $B3
|
||||||
adds $C2, $C2, $T0
|
adds $C2, $C2, $T0
|
||||||
adcs $C3, $C3, $T1
|
adcs $C3, $C3, $T1
|
||||||
adc $C4, $C4, xzr
|
adc $C4, $C4, xzr
|
||||||
|
|
||||||
mul $T0, $A1, $B2
|
|
||||||
umulh $T1, $A1, $B2
|
|
||||||
adds $C3, $C3, $T2
|
adds $C3, $C3, $T2
|
||||||
adcs $C4, $C4, $T3
|
adc $C4, $C4, $T3
|
||||||
adc $C5, xzr, xzr
|
|
||||||
|
|
||||||
mul $T2, $A1, $B3
|
|
||||||
umulh $T3, $A1, $B3
|
|
||||||
adds $C3, $C3, $T0
|
|
||||||
adcs $C4, $C4, $T1
|
|
||||||
adc $C5, $C5, xzr
|
|
||||||
adds $C4, $C4, $T2
|
|
||||||
adc $C5, $C5, $T3
|
|
||||||
|
|
||||||
___
|
___
|
||||||
return $body;
|
return $body;
|
||||||
@ -345,10 +347,10 @@ ___
|
|||||||
# Operation: mc [x1] = ma [x0]
|
# Operation: mc [x1] = ma [x0]
|
||||||
# NOTE: ma=mc is not allowed
|
# NOTE: ma=mc is not allowed
|
||||||
sub rdc {
|
sub rdc {
|
||||||
my $mul01=&mul128x256_comba("x2","x3","x24","x25","x26","x27","x5","x6","x7","x8","x9","x28","x29","x30","x10");
|
my $mul01=&mul128x256_comba("x2","x3","x24","x25","x26","x27","x5","x6","x7","x8","x9","x28","x29","x1","x10");
|
||||||
my $mul23=&mul128x256_comba("x3","x11","x24","x25","x26","x27","x5","x6","x7","x8","x9","x28","x29","x30","x10");
|
my $mul23=&mul128x256_comba("x3","x11","x24","x25","x26","x27","x5","x6","x7","x8","x9","x28","x29","x1","x10");
|
||||||
my $mul45=&mul128x256_comba("x12","x13","x24","x25","x26","x27","x5","x6","x7","x8","x9","x28","x29","x30","x10");
|
my $mul45=&mul128x256_comba("x12","x13","x24","x25","x26","x27","x5","x6","x7","x8","x9","x28","x29","x1","x10");
|
||||||
my $mul67=&mul128x256_comba("x14","x15","x24","x25","x26","x27","x5","x6","x7","x8","x9","x28","x29","x30","x10");
|
my $mul67=&mul128x256_comba("x14","x15","x24","x25","x26","x27","x5","x6","x7","x8","x9","x28","x29","x1","x10");
|
||||||
my $body=<<___;
|
my $body=<<___;
|
||||||
.global ${PREFIX}_fprdc
|
.global ${PREFIX}_fprdc
|
||||||
.align 4
|
.align 4
|
||||||
@ -401,12 +403,12 @@ sub rdc {
|
|||||||
adcs x14, x7, x14
|
adcs x14, x7, x14
|
||||||
adcs x15, x8, x15
|
adcs x15, x8, x15
|
||||||
ldp x16, x17, [x0,#64]
|
ldp x16, x17, [x0,#64]
|
||||||
ldp x18, x19, [x0,#80]
|
ldp x30, x19, [x0,#80]
|
||||||
mul x4, x3, x24 // a[2] x .Lp503p1_nz_s8[0]
|
mul x4, x3, x24 // a[2] x .Lp503p1_nz_s8[0]
|
||||||
umulh x7, x3, x24
|
umulh x7, x3, x24
|
||||||
adcs x16, x9, x16
|
adcs x16, x9, x16
|
||||||
adcs x17, x10, x17
|
adcs x17, x10, x17
|
||||||
adcs x18, xzr, x18
|
adcs x30, xzr, x30
|
||||||
adcs x19, xzr, x19
|
adcs x19, xzr, x19
|
||||||
ldp x20, x21, [x0,#96]
|
ldp x20, x21, [x0,#96]
|
||||||
ldp x22, x23, [x0,#112]
|
ldp x22, x23, [x0,#112]
|
||||||
@ -440,7 +442,7 @@ sub rdc {
|
|||||||
mul x4, x12, x24 // a[4] x .Lp503p1_nz_s8[0]
|
mul x4, x12, x24 // a[4] x .Lp503p1_nz_s8[0]
|
||||||
umulh x7, x12, x24
|
umulh x7, x12, x24
|
||||||
adcs x17, x8, x17
|
adcs x17, x8, x17
|
||||||
adcs x18, x9, x18
|
adcs x30, x9, x30
|
||||||
adcs x19, x10, x19
|
adcs x19, x10, x19
|
||||||
adcs x20, xzr, x20
|
adcs x20, xzr, x20
|
||||||
mul x5, x12, x25 // a[4] x .Lp503p1_nz_s8[1]
|
mul x5, x12, x25 // a[4] x .Lp503p1_nz_s8[1]
|
||||||
@ -468,7 +470,7 @@ sub rdc {
|
|||||||
adds x15, x4, x15 // a[7]
|
adds x15, x4, x15 // a[7]
|
||||||
adcs x16, x5, x16 // a[8]
|
adcs x16, x5, x16 // a[8]
|
||||||
adcs x17, x6, x17
|
adcs x17, x6, x17
|
||||||
adcs x18, x7, x18
|
adcs x30, x7, x30
|
||||||
mul x4, x14, x24 // a[6] x .Lp503p1_nz_s8[0]
|
mul x4, x14, x24 // a[6] x .Lp503p1_nz_s8[0]
|
||||||
umulh x7, x14, x24
|
umulh x7, x14, x24
|
||||||
adcs x19, x8, x19
|
adcs x19, x8, x19
|
||||||
@ -496,12 +498,12 @@ sub rdc {
|
|||||||
lsl x4, x4, #56
|
lsl x4, x4, #56
|
||||||
|
|
||||||
adds x17, x4, x17
|
adds x17, x4, x17
|
||||||
adcs x18, x5, x18
|
adcs x30, x5, x30
|
||||||
ldr x1, [sp,#96]
|
ldr x1, [sp,#96]
|
||||||
adcs x19, x6, x19
|
adcs x19, x6, x19
|
||||||
adcs x20, x7, x20
|
adcs x20, x7, x20
|
||||||
stp x16, x17, [x1,#0] // Final result
|
stp x16, x17, [x1,#0] // Final result
|
||||||
stp x18, x19, [x1,#16]
|
stp x30, x19, [x1,#16]
|
||||||
adcs x21, x8, x21
|
adcs x21, x8, x21
|
||||||
adcs x22, x9, x22
|
adcs x22, x9, x22
|
||||||
adc x23, x10, x23
|
adc x23, x10, x23
|
||||||
|
Loading…
Reference in New Issue
Block a user