p256-x86_64-asm.pl: minor sqr_montx cleanup.
Drop some redundant instructions in reduction in ecp_nistz256_sqr_montx. (Imported from upstream's 8fc063dcc9668589fd95533d25932396d60987f9.) I believe this is a no-op for us as we do not currently enable the ADX-based optimizations. Change-Id: I34a5f5ffb965d59c67f6b9f0ca7937e49ba6e820 Reviewed-on: https://boringssl-review.googlesource.com/16884 Commit-Queue: Adam Langley <agl@google.com> Reviewed-by: Adam Langley <agl@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
This commit is contained in:
parent
2b56981b64
commit
a51912f7fe
@ -863,19 +863,18 @@ __ecp_nistz256_sqr_montx:
|
|||||||
adox $t1, $acc5
|
adox $t1, $acc5
|
||||||
.byte 0x67,0x67
|
.byte 0x67,0x67
|
||||||
mulx %rdx, $t0, $t4
|
mulx %rdx, $t0, $t4
|
||||||
mov $acc0, %rdx
|
mov .Lpoly+8*3(%rip), %rdx
|
||||||
adox $t0, $acc6
|
adox $t0, $acc6
|
||||||
shlx $a_ptr, $acc0, $t0
|
shlx $a_ptr, $acc0, $t0
|
||||||
adox $t4, $acc7
|
adox $t4, $acc7
|
||||||
shrx $a_ptr, $acc0, $t4
|
shrx $a_ptr, $acc0, $t4
|
||||||
mov .Lpoly+8*3(%rip), $t1
|
mov %rdx,$t1
|
||||||
|
|
||||||
# reduction step 1
|
# reduction step 1
|
||||||
add $t0, $acc1
|
add $t0, $acc1
|
||||||
adc $t4, $acc2
|
adc $t4, $acc2
|
||||||
|
|
||||||
mulx $t1, $t0, $acc0
|
mulx $acc0, $t0, $acc0
|
||||||
mov $acc1, %rdx
|
|
||||||
adc $t0, $acc3
|
adc $t0, $acc3
|
||||||
shlx $a_ptr, $acc1, $t0
|
shlx $a_ptr, $acc1, $t0
|
||||||
adc \$0, $acc0
|
adc \$0, $acc0
|
||||||
@ -885,8 +884,7 @@ __ecp_nistz256_sqr_montx:
|
|||||||
add $t0, $acc2
|
add $t0, $acc2
|
||||||
adc $t4, $acc3
|
adc $t4, $acc3
|
||||||
|
|
||||||
mulx $t1, $t0, $acc1
|
mulx $acc1, $t0, $acc1
|
||||||
mov $acc2, %rdx
|
|
||||||
adc $t0, $acc0
|
adc $t0, $acc0
|
||||||
shlx $a_ptr, $acc2, $t0
|
shlx $a_ptr, $acc2, $t0
|
||||||
adc \$0, $acc1
|
adc \$0, $acc1
|
||||||
@ -896,8 +894,7 @@ __ecp_nistz256_sqr_montx:
|
|||||||
add $t0, $acc3
|
add $t0, $acc3
|
||||||
adc $t4, $acc0
|
adc $t4, $acc0
|
||||||
|
|
||||||
mulx $t1, $t0, $acc2
|
mulx $acc2, $t0, $acc2
|
||||||
mov $acc3, %rdx
|
|
||||||
adc $t0, $acc1
|
adc $t0, $acc1
|
||||||
shlx $a_ptr, $acc3, $t0
|
shlx $a_ptr, $acc3, $t0
|
||||||
adc \$0, $acc2
|
adc \$0, $acc2
|
||||||
@ -907,12 +904,12 @@ __ecp_nistz256_sqr_montx:
|
|||||||
add $t0, $acc0
|
add $t0, $acc0
|
||||||
adc $t4, $acc1
|
adc $t4, $acc1
|
||||||
|
|
||||||
mulx $t1, $t0, $acc3
|
mulx $acc3, $t0, $acc3
|
||||||
adc $t0, $acc2
|
adc $t0, $acc2
|
||||||
adc \$0, $acc3
|
adc \$0, $acc3
|
||||||
|
|
||||||
xor $t3, $t3 # cf=0
|
xor $t3, $t3
|
||||||
adc $acc0, $acc4 # accumulate upper half
|
add $acc0, $acc4 # accumulate upper half
|
||||||
mov .Lpoly+8*1(%rip), $a_ptr
|
mov .Lpoly+8*1(%rip), $a_ptr
|
||||||
adc $acc1, $acc5
|
adc $acc1, $acc5
|
||||||
mov $acc4, $acc0
|
mov $acc4, $acc0
|
||||||
@ -921,8 +918,7 @@ __ecp_nistz256_sqr_montx:
|
|||||||
mov $acc5, $acc1
|
mov $acc5, $acc1
|
||||||
adc \$0, $t3
|
adc \$0, $t3
|
||||||
|
|
||||||
xor %eax, %eax # cf=0
|
sub \$-1, $acc4 # .Lpoly[0]
|
||||||
sbb \$-1, $acc4 # .Lpoly[0]
|
|
||||||
mov $acc6, $acc2
|
mov $acc6, $acc2
|
||||||
sbb $a_ptr, $acc5 # .Lpoly[1]
|
sbb $a_ptr, $acc5 # .Lpoly[1]
|
||||||
sbb \$0, $acc6 # .Lpoly[2]
|
sbb \$0, $acc6 # .Lpoly[2]
|
||||||
|
Loading…
Reference in New Issue
Block a user