p256-x86_64-asm.pl: minor sqr_montx cleanup.

Drop some redundant instructions in reduction in ecp_nistz256_sqr_montx.

(Imported from upstream's 8fc063dcc9668589fd95533d25932396d60987f9.)

I believe this is a no-op for us as we do not currently enable the
ADX-based optimizations.

Change-Id: I34a5f5ffb965d59c67f6b9f0ca7937e49ba6e820
Reviewed-on: https://boringssl-review.googlesource.com/16884
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <agl@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
This commit is contained in:
David Benjamin 2017-06-05 13:25:46 -04:00 committed by CQ bot account: commit-bot@chromium.org
parent 2b56981b64
commit a51912f7fe

View File

@ -863,19 +863,18 @@ __ecp_nistz256_sqr_montx:
adox $t1, $acc5 adox $t1, $acc5
.byte 0x67,0x67 .byte 0x67,0x67
mulx %rdx, $t0, $t4 mulx %rdx, $t0, $t4
mov $acc0, %rdx mov .Lpoly+8*3(%rip), %rdx
adox $t0, $acc6 adox $t0, $acc6
shlx $a_ptr, $acc0, $t0 shlx $a_ptr, $acc0, $t0
adox $t4, $acc7 adox $t4, $acc7
shrx $a_ptr, $acc0, $t4 shrx $a_ptr, $acc0, $t4
mov .Lpoly+8*3(%rip), $t1 mov %rdx,$t1
# reduction step 1 # reduction step 1
add $t0, $acc1 add $t0, $acc1
adc $t4, $acc2 adc $t4, $acc2
mulx $t1, $t0, $acc0 mulx $acc0, $t0, $acc0
mov $acc1, %rdx
adc $t0, $acc3 adc $t0, $acc3
shlx $a_ptr, $acc1, $t0 shlx $a_ptr, $acc1, $t0
adc \$0, $acc0 adc \$0, $acc0
@ -885,8 +884,7 @@ __ecp_nistz256_sqr_montx:
add $t0, $acc2 add $t0, $acc2
adc $t4, $acc3 adc $t4, $acc3
mulx $t1, $t0, $acc1 mulx $acc1, $t0, $acc1
mov $acc2, %rdx
adc $t0, $acc0 adc $t0, $acc0
shlx $a_ptr, $acc2, $t0 shlx $a_ptr, $acc2, $t0
adc \$0, $acc1 adc \$0, $acc1
@ -896,8 +894,7 @@ __ecp_nistz256_sqr_montx:
add $t0, $acc3 add $t0, $acc3
adc $t4, $acc0 adc $t4, $acc0
mulx $t1, $t0, $acc2 mulx $acc2, $t0, $acc2
mov $acc3, %rdx
adc $t0, $acc1 adc $t0, $acc1
shlx $a_ptr, $acc3, $t0 shlx $a_ptr, $acc3, $t0
adc \$0, $acc2 adc \$0, $acc2
@ -907,12 +904,12 @@ __ecp_nistz256_sqr_montx:
add $t0, $acc0 add $t0, $acc0
adc $t4, $acc1 adc $t4, $acc1
mulx $t1, $t0, $acc3 mulx $acc3, $t0, $acc3
adc $t0, $acc2 adc $t0, $acc2
adc \$0, $acc3 adc \$0, $acc3
xor $t3, $t3 # cf=0 xor $t3, $t3
adc $acc0, $acc4 # accumulate upper half add $acc0, $acc4 # accumulate upper half
mov .Lpoly+8*1(%rip), $a_ptr mov .Lpoly+8*1(%rip), $a_ptr
adc $acc1, $acc5 adc $acc1, $acc5
mov $acc4, $acc0 mov $acc4, $acc0
@ -921,8 +918,7 @@ __ecp_nistz256_sqr_montx:
mov $acc5, $acc1 mov $acc5, $acc1
adc \$0, $t3 adc \$0, $t3
xor %eax, %eax # cf=0 sub \$-1, $acc4 # .Lpoly[0]
sbb \$-1, $acc4 # .Lpoly[0]
mov $acc6, $acc2 mov $acc6, $acc2
sbb $a_ptr, $acc5 # .Lpoly[1] sbb $a_ptr, $acc5 # .Lpoly[1]
sbb \$0, $acc6 # .Lpoly[2] sbb \$0, $acc6 # .Lpoly[2]