Change-Id: I4f8225da42a82e7a3cd1a0ace18525fb9647b36f
This commit is contained in:
Henry Case 2019-04-20 00:39:39 +01:00
parent fd3bcd4d72
commit aa5445c035

View File

@ -55,7 +55,6 @@ p503p1_nz:
___
# Swaps 16-bytes pointed by %rdi and %rsi
# in constant time
sub CSWAP16() {
@ -232,30 +231,28 @@ ${PREFIX}_fpadd:
adc 0x30(%rsi), %r14
adc 0x38(%rsi), %r15
lea p503x2(%rip), %rbx
mov 0(%rbx), %rcx;
mov p503x2(%rip), %rcx;
sub %rcx, %r8
mov 8(%rbx), %rcx;
mov 8+p503x2(%rip), %rcx;
sbb %rcx, %r9
sbb %rcx, %r10
mov 16(%rbx), %rcx;
mov 16+p503x2(%rip), %rcx;
sbb %rcx, %r11
mov 24(%rbx), %rcx;
mov 24+p503x2(%rip), %rcx;
sbb %rcx, %r12
mov 32(%rbx), %rcx;
mov 32+p503x2(%rip), %rcx;
sbb %rcx, %r13
mov 40(%rbx), %rcx;
mov 40+p503x2(%rip), %rcx;
sbb %rcx, %r14
mov 48(%rbx), %rcx;
mov 48+p503x2(%rip), %rcx;
sbb %rcx, %r15
sbb \$0, %rax
mov 0(%rbx), %rdi
mov p503x2(%rip), %rdi
and %rax, %rdi
mov 8(%rbx), %rsi
mov 8+p503x2(%rip), %rsi
and %rax, %rsi
mov 16(%rbx), %rcx
mov 16+p503x2(%rip), %rcx
and %rax, %rcx
add %rdi, %r8
@ -269,13 +266,13 @@ ${PREFIX}_fpadd:
setc %cl
mov 24(%rbx), %r8
mov 24+p503x2(%rip), %r8
and %rax, %r8
mov 32(%rbx), %r9
mov 32+p503x2(%rip), %r9
and %rax, %r9
mov 40(%rbx), %r10
mov 40+p503x2(%rip), %r10
and %rax, %r10
mov 48(%rbx), %r11
mov 48+p503x2(%rip), %r11
and %rax, %r11
bt \$0, %rcx
@ -346,9 +343,6 @@ ${PREFIX}_fpsub:
push %r15
.cfi_adjust_cfa_offset 8
.cfi_offset r15, -40
push %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset rbx, -48
xor %rax, %rax
@ -371,13 +365,11 @@ ${PREFIX}_fpsub:
sbb 0x38(%rsi), %r15
sbb \$0x0, %rax
lea p503x2(%rip), %rbx
mov p503x2(%rip), %rdi
and %rax, %rdi
mov 8+p503x2(%rip), %rsi
mov 0x8+p503x2(%rip), %rsi
and %rax, %rsi
mov 0x10(%rbx), %rcx
mov 0x10+p503x2(%rip), %rcx
and %rax, %rcx
add %rdi, %r8
@ -391,13 +383,13 @@ ${PREFIX}_fpsub:
setc %cl
mov 0x18(%rbx), %r8
mov 0x18+p503x2(%rip), %r8
and %rax, %r8
mov 0x20(%rbx), %r9
mov 0x20+p503x2(%rip), %r9
and %rax, %r9
mov 0x28(%rbx), %r10
mov 0x28+p503x2(%rip), %r10
and %rax, %r10
mov 0x30(%rbx), %r11
mov 0x30+p503x2(%rip), %r11
and %rax, %r11
bt \$0x0, %rcx
@ -411,8 +403,6 @@ ${PREFIX}_fpsub:
mov %r14, 0x30(%rdx)
mov %r15, 0x38(%rdx)
pop %rbx
.cfi_adjust_cfa_offset -8
pop %r15
.cfi_adjust_cfa_offset -8
pop %r14
@ -1332,7 +1322,7 @@ $code.=<<___ if ($bmi2_adx);
___
# a[0-1] x p503p1_nz --> result: r8:r14
&MUL128x320_SCHOOL(0, "rdi", "p503p1_nz(%rip)", map("r$_",(8..14)), "rbx", "rcx", "r15") if($bmi2_adx);
&MUL128x320_SCHOOL(0,"rdi","p503p1_nz(%rip)",map("r$_",(8..14)),"rbx","rcx","r15") if($bmi2_adx);
$code.=<<___ if ($bmi2_adx);
xor %r15, %r15
add 0x18(%rdi), %r8
@ -1369,7 +1359,7 @@ $code.=<<___ if ($bmi2_adx);
___
# a[2-3] x p503p1_nz --> result: r8:r14
&MUL128x320_SCHOOL(16, "rdi", "p503p1_nz(%rip)", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "rbx", "rcx", "r15") if($bmi2_adx);
&MUL128x320_SCHOOL(16,"rdi","p503p1_nz(%rip)",map("r$_",(8..14)),"rbx","rcx","r15") if($bmi2_adx);
$code.=<<___ if ($bmi2_adx);
xor %r15, %r15
@ -1401,7 +1391,7 @@ $code.=<<___ if ($bmi2_adx);
___
# a[4-5] x p503p1_nz --> result: r8:r14
&MUL128x320_SCHOOL(32, "rdi", "p503p1_nz(%rip)", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "rbx", "rcx", "r15") if($bmi2_adx);
&MUL128x320_SCHOOL(32,"rdi","p503p1_nz(%rip)",map("r$_",(8..14)),"rbx","rcx","r15") if($bmi2_adx);
$code.=<<___ if ($bmi2_adx);
xor %r15, %r15
@ -1427,7 +1417,7 @@ $code.=<<___ if ($bmi2_adx);
___
# a[6-7] x p503p1_nz --> result: r8:r14
&MUL128x320_SCHOOL(48, "rdi", "p503p1_nz(%rip)", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "rbx", "rcx", "r15") if($bmi2_adx);
&MUL128x320_SCHOOL(48,"rdi","p503p1_nz(%rip)",map("r$_", (8..14)),"rbx","rcx","r15") if($bmi2_adx);
# Final result c1:c7
$code.=<<___ if ($bmi2_adx);