(Imports upstream's 76e624a003db22db2d99ece04a15e20fe44c1fbe.) Also includes the following fixes: https://github.com/openssl/openssl/pull/2582 https://github.com/openssl/openssl/pull/2655 Change-Id: I6086a87a534d152cdbff104c62ad9dcd9b4e012a Reviewed-on: https://boringssl-review.googlesource.com/13783 Reviewed-by: David Benjamin <davidben@google.com> Commit-Queue: David Benjamin <davidben@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>kris/onging/CECPQ3_patch15
@@ -145,13 +145,21 @@ $code.=<<___; | |||
.type rsaz_1024_sqr_avx2,\@function,5 | |||
.align 64 | |||
rsaz_1024_sqr_avx2: # 702 cycles, 14% faster than rsaz_1024_mul_avx2 | |||
.cfi_startproc | |||
lea (%rsp), %rax | |||
.cfi_def_cfa_register %rax | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
vzeroupper | |||
___ | |||
$code.=<<___ if ($win64); | |||
@@ -170,6 +178,7 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov %rax,%rbp | |||
.cfi_def_cfa_register %rbp | |||
mov %rdx, $np # reassigned argument | |||
sub \$$FrameSize, %rsp | |||
mov $np, $tmp | |||
@@ -802,6 +811,7 @@ $code.=<<___; | |||
vzeroall | |||
mov %rbp, %rax | |||
.cfi_def_cfa_register %rax | |||
___ | |||
$code.=<<___ if ($win64); | |||
.Lsqr_1024_in_tail: | |||
@@ -818,14 +828,22 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov -48(%rax),%r15 | |||
.cfi_restore %r15 | |||
mov -40(%rax),%r14 | |||
.cfi_restore %r14 | |||
mov -32(%rax),%r13 | |||
.cfi_restore %r13 | |||
mov -24(%rax),%r12 | |||
.cfi_restore %r12 | |||
mov -16(%rax),%rbp | |||
.cfi_restore %rbp | |||
mov -8(%rax),%rbx | |||
.cfi_restore %rbx | |||
lea (%rax),%rsp # restore %rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lsqr_1024_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2 | |||
___ | |||
} | |||
@@ -878,13 +896,21 @@ $code.=<<___; | |||
.type rsaz_1024_mul_avx2,\@function,5 | |||
.align 64 | |||
rsaz_1024_mul_avx2: | |||
.cfi_startproc | |||
lea (%rsp), %rax | |||
.cfi_def_cfa_register %rax | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
___ | |||
$code.=<<___ if ($win64); | |||
vzeroupper | |||
@@ -903,6 +929,7 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov %rax,%rbp | |||
.cfi_def_cfa_register %rbp | |||
vzeroall | |||
mov %rdx, $bp # reassigned argument | |||
sub \$64,%rsp | |||
@@ -1436,6 +1463,7 @@ $code.=<<___; | |||
vzeroupper | |||
mov %rbp, %rax | |||
.cfi_def_cfa_register %rax | |||
___ | |||
$code.=<<___ if ($win64); | |||
.Lmul_1024_in_tail: | |||
@@ -1452,14 +1480,22 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov -48(%rax),%r15 | |||
.cfi_restore %r15 | |||
mov -40(%rax),%r14 | |||
.cfi_restore %r14 | |||
mov -32(%rax),%r13 | |||
.cfi_restore %r13 | |||
mov -24(%rax),%r12 | |||
.cfi_restore %r12 | |||
mov -16(%rax),%rbp | |||
.cfi_restore %rbp | |||
mov -8(%rax),%rbx | |||
.cfi_restore %rbx | |||
lea (%rax),%rsp # restore %rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lmul_1024_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2 | |||
___ | |||
} | |||
@@ -1578,8 +1614,10 @@ rsaz_1024_scatter5_avx2: | |||
.type rsaz_1024_gather5_avx2,\@abi-omnipotent | |||
.align 32 | |||
rsaz_1024_gather5_avx2: | |||
.cfi_startproc | |||
vzeroupper | |||
mov %rsp,%r11 | |||
.cfi_def_cfa_register %r11 | |||
___ | |||
$code.=<<___ if ($win64); | |||
lea -0x88(%rsp),%rax | |||
@@ -1720,7 +1758,9 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
lea (%r11),%rsp | |||
.cfi_def_cfa_register %rsp | |||
ret | |||
.cfi_endproc | |||
.LSEH_end_rsaz_1024_gather5: | |||
.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2 | |||
___ | |||
@@ -84,8 +84,10 @@ $code=<<___; | |||
.type bn_mul_mont,\@function,6 | |||
.align 16 | |||
bn_mul_mont: | |||
.cfi_startproc | |||
mov ${num}d,${num}d | |||
mov %rsp,%rax | |||
.cfi_def_cfa_register %rax | |||
test \$3,${num}d | |||
jnz .Lmul_enter | |||
cmp \$8,${num}d | |||
@@ -104,11 +106,17 @@ $code.=<<___; | |||
.align 16 | |||
.Lmul_enter: | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
neg $num | |||
mov %rsp,%r11 | |||
@@ -141,6 +149,7 @@ $code.=<<___; | |||
.Lmul_page_walk_done: | |||
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp | |||
.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8 | |||
.Lmul_body: | |||
mov $bp,%r12 # reassign $bp | |||
___ | |||
@@ -311,16 +320,25 @@ $code.=<<___; | |||
jnz .Lcopy | |||
mov 8(%rsp,$num,8),%rsi # restore %rsp | |||
.cfi_def_cfa %rsi,8 | |||
mov \$1,%rax | |||
mov -48(%rsi),%r15 | |||
.cfi_restore %r15 | |||
mov -40(%rsi),%r14 | |||
.cfi_restore %r14 | |||
mov -32(%rsi),%r13 | |||
.cfi_restore %r13 | |||
mov -24(%rsi),%r12 | |||
.cfi_restore %r12 | |||
mov -16(%rsi),%rbp | |||
.cfi_restore %rbp | |||
mov -8(%rsi),%rbx | |||
.cfi_restore %rbx | |||
lea (%rsi),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lmul_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size bn_mul_mont,.-bn_mul_mont | |||
___ | |||
{{{ | |||
@@ -330,8 +348,10 @@ $code.=<<___; | |||
.type bn_mul4x_mont,\@function,6 | |||
.align 16 | |||
bn_mul4x_mont: | |||
.cfi_startproc | |||
mov ${num}d,${num}d | |||
mov %rsp,%rax | |||
.cfi_def_cfa_register %rax | |||
.Lmul4x_enter: | |||
___ | |||
$code.=<<___ if ($addx); | |||
@@ -341,11 +361,17 @@ $code.=<<___ if ($addx); | |||
___ | |||
$code.=<<___; | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
neg $num | |||
mov %rsp,%r11 | |||
@@ -369,6 +395,7 @@ $code.=<<___; | |||
.Lmul4x_page_walk_done: | |||
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp | |||
.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8 | |||
.Lmul4x_body: | |||
mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp | |||
mov %rdx,%r12 # reassign $bp | |||
@@ -747,16 +774,25 @@ ___ | |||
} | |||
$code.=<<___; | |||
mov 8(%rsp,$num,8),%rsi # restore %rsp | |||
.cfi_def_cfa %rsi, 8 | |||
mov \$1,%rax | |||
mov -48(%rsi),%r15 | |||
.cfi_restore %r15 | |||
mov -40(%rsi),%r14 | |||
.cfi_restore %r14 | |||
mov -32(%rsi),%r13 | |||
.cfi_restore %r13 | |||
mov -24(%rsi),%r12 | |||
.cfi_restore %r12 | |||
mov -16(%rsi),%rbp | |||
.cfi_restore %rbp | |||
mov -8(%rsi),%rbx | |||
.cfi_restore %rbx | |||
lea (%rsi),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lmul4x_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size bn_mul4x_mont,.-bn_mul4x_mont | |||
___ | |||
}}} | |||
@@ -784,14 +820,22 @@ $code.=<<___; | |||
.type bn_sqr8x_mont,\@function,6 | |||
.align 32 | |||
bn_sqr8x_mont: | |||
.cfi_startproc | |||
mov %rsp,%rax | |||
.cfi_def_cfa_register %rax | |||
.Lsqr8x_enter: | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
.Lsqr8x_prologue: | |||
mov ${num}d,%r10d | |||
@@ -847,6 +891,7 @@ bn_sqr8x_mont: | |||
mov $n0, 32(%rsp) | |||
mov %rax, 40(%rsp) # save original %rsp | |||
.cfi_cfa_expression %rsp+40,deref,+8 | |||
.Lsqr8x_body: | |||
movq $nptr, %xmm2 # save pointer to modulus | |||
@@ -916,6 +961,7 @@ $code.=<<___; | |||
pxor %xmm0,%xmm0 | |||
pshufd \$0,%xmm1,%xmm1 | |||
mov 40(%rsp),%rsi # restore %rsp | |||
.cfi_def_cfa %rsi,8 | |||
jmp .Lsqr8x_cond_copy | |||
.align 32 | |||
@@ -945,14 +991,22 @@ $code.=<<___; | |||
mov \$1,%rax | |||
mov -48(%rsi),%r15 | |||
.cfi_restore %r15 | |||
mov -40(%rsi),%r14 | |||
.cfi_restore %r14 | |||
mov -32(%rsi),%r13 | |||
.cfi_restore %r13 | |||
mov -24(%rsi),%r12 | |||
.cfi_restore %r12 | |||
mov -16(%rsi),%rbp | |||
.cfi_restore %rbp | |||
mov -8(%rsi),%rbx | |||
.cfi_restore %rbx | |||
lea (%rsi),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lsqr8x_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size bn_sqr8x_mont,.-bn_sqr8x_mont | |||
___ | |||
}}} | |||
@@ -964,14 +1018,22 @@ $code.=<<___; | |||
.type bn_mulx4x_mont,\@function,6 | |||
.align 32 | |||
bn_mulx4x_mont: | |||
.cfi_startproc | |||
mov %rsp,%rax | |||
.cfi_def_cfa_register %rax | |||
.Lmulx4x_enter: | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
.Lmulx4x_prologue: | |||
shl \$3,${num}d # convert $num to bytes | |||
@@ -1017,6 +1079,7 @@ bn_mulx4x_mont: | |||
mov $n0, 24(%rsp) # save *n0 | |||
mov $rp, 32(%rsp) # save $rp | |||
mov %rax,40(%rsp) # save original %rsp | |||
.cfi_cfa_expression %rsp+40,deref,+8 | |||
mov $num,48(%rsp) # inner counter | |||
jmp .Lmulx4x_body | |||
@@ -1266,6 +1329,7 @@ $code.=<<___; | |||
pxor %xmm0,%xmm0 | |||
pshufd \$0,%xmm1,%xmm1 | |||
mov 40(%rsp),%rsi # restore %rsp | |||
.cfi_def_cfa %rsi,8 | |||
jmp .Lmulx4x_cond_copy | |||
.align 32 | |||
@@ -1295,14 +1359,22 @@ $code.=<<___; | |||
mov \$1,%rax | |||
mov -48(%rsi),%r15 | |||
.cfi_restore %r15 | |||
mov -40(%rsi),%r14 | |||
.cfi_restore %r14 | |||
mov -32(%rsi),%r13 | |||
.cfi_restore %r13 | |||
mov -24(%rsi),%r12 | |||
.cfi_restore %r12 | |||
mov -16(%rsi),%rbp | |||
.cfi_restore %rbp | |||
mov -8(%rsi),%rbx | |||
.cfi_restore %rbx | |||
lea (%rsi),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lmulx4x_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size bn_mulx4x_mont,.-bn_mulx4x_mont | |||
___ | |||
}}} | |||
@@ -73,8 +73,10 @@ $code=<<___; | |||
.type bn_mul_mont_gather5,\@function,6 | |||
.align 64 | |||
bn_mul_mont_gather5: | |||
.cfi_startproc | |||
mov ${num}d,${num}d | |||
mov %rsp,%rax | |||
.cfi_def_cfa_register %rax | |||
test \$7,${num}d | |||
jnz .Lmul_enter | |||
___ | |||
@@ -88,11 +90,17 @@ $code.=<<___; | |||
.Lmul_enter: | |||
movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
neg $num | |||
mov %rsp,%r11 | |||
@@ -125,6 +133,7 @@ $code.=<<___; | |||
lea .Linc(%rip),%r10 | |||
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp | |||
.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8 | |||
.Lmul_body: | |||
lea 128($bp),%r12 # reassign $bp (+size optimization) | |||
@@ -411,17 +420,26 @@ $code.=<<___; | |||
jnz .Lcopy | |||
mov 8(%rsp,$num,8),%rsi # restore %rsp | |||
.cfi_def_cfa %rsi,8 | |||
mov \$1,%rax | |||
mov -48(%rsi),%r15 | |||
.cfi_restore %r15 | |||
mov -40(%rsi),%r14 | |||
.cfi_restore %r14 | |||
mov -32(%rsi),%r13 | |||
.cfi_restore %r13 | |||
mov -24(%rsi),%r12 | |||
.cfi_restore %r12 | |||
mov -16(%rsi),%rbp | |||
.cfi_restore %rbp | |||
mov -8(%rsi),%rbx | |||
.cfi_restore %rbx | |||
lea (%rsi),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lmul_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size bn_mul_mont_gather5,.-bn_mul_mont_gather5 | |||
___ | |||
{{{ | |||
@@ -431,8 +449,10 @@ $code.=<<___; | |||
.type bn_mul4x_mont_gather5,\@function,6 | |||
.align 32 | |||
bn_mul4x_mont_gather5: | |||
.cfi_startproc | |||
.byte 0x67 | |||
mov %rsp,%rax | |||
.cfi_def_cfa_register %rax | |||
.Lmul4x_enter: | |||
___ | |||
$code.=<<___ if ($addx); | |||
@@ -442,11 +462,17 @@ $code.=<<___ if ($addx); | |||
___ | |||
$code.=<<___; | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
.Lmul4x_prologue: | |||
.byte 0x67 | |||
@@ -502,22 +528,32 @@ $code.=<<___; | |||
neg $num | |||
mov %rax,40(%rsp) | |||
.cfi_cfa_expression %rsp+40,deref,+8 | |||
.Lmul4x_body: | |||
call mul4x_internal | |||
mov 40(%rsp),%rsi # restore %rsp | |||
.cfi_def_cfa %rsi,8 | |||
mov \$1,%rax | |||
mov -48(%rsi),%r15 | |||
.cfi_restore %r15 | |||
mov -40(%rsi),%r14 | |||
.cfi_restore %r14 | |||
mov -32(%rsi),%r13 | |||
.cfi_restore %r13 | |||
mov -24(%rsi),%r12 | |||
.cfi_restore %r12 | |||
mov -16(%rsi),%rbp | |||
.cfi_restore %rbp | |||
mov -8(%rsi),%rbx | |||
.cfi_restore %rbx | |||
lea (%rsi),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lmul4x_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 | |||
.type mul4x_internal,\@abi-omnipotent | |||
@@ -1041,7 +1077,9 @@ $code.=<<___; | |||
.type bn_power5,\@function,6 | |||
.align 32 | |||
bn_power5: | |||
.cfi_startproc | |||
mov %rsp,%rax | |||
.cfi_def_cfa_register %rax | |||
___ | |||
$code.=<<___ if ($addx); | |||
mov OPENSSL_ia32cap_P+8(%rip),%r11d | |||
@@ -1051,11 +1089,17 @@ $code.=<<___ if ($addx); | |||
___ | |||
$code.=<<___; | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
.Lpower5_prologue: | |||
shl \$3,${num}d # convert $num to bytes | |||
@@ -1120,6 +1164,7 @@ $code.=<<___; | |||
# | |||
mov $n0, 32(%rsp) | |||
mov %rax, 40(%rsp) # save original %rsp | |||
.cfi_cfa_expression %rsp+40,deref,+8 | |||
.Lpower5_body: | |||
movq $rptr,%xmm1 # save $rptr, used in sqr8x | |||
movq $nptr,%xmm2 # save $nptr | |||
@@ -1146,16 +1191,25 @@ $code.=<<___; | |||
call mul4x_internal | |||
mov 40(%rsp),%rsi # restore %rsp | |||
.cfi_def_cfa %rsi,8 | |||
mov \$1,%rax | |||
mov -48(%rsi),%r15 | |||
.cfi_restore %r15 | |||
mov -40(%rsi),%r14 | |||
.cfi_restore %r14 | |||
mov -32(%rsi),%r13 | |||
.cfi_restore %r13 | |||
mov -24(%rsi),%r12 | |||
.cfi_restore %r12 | |||
mov -16(%rsi),%rbp | |||
.cfi_restore %rbp | |||
mov -8(%rsi),%rbx | |||
.cfi_restore %rbx | |||
lea (%rsi),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lpower5_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size bn_power5,.-bn_power5 | |||
.globl bn_sqr8x_internal | |||
@@ -2035,14 +2089,22 @@ bn_from_montgomery: | |||
.type bn_from_mont8x,\@function,6 | |||
.align 32 | |||
bn_from_mont8x: | |||
.cfi_startproc | |||
.byte 0x67 | |||
mov %rsp,%rax | |||
.cfi_def_cfa_register %rax | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
.Lfrom_prologue: | |||
shl \$3,${num}d # convert $num to bytes | |||
@@ -2107,6 +2169,7 @@ bn_from_mont8x: | |||
# | |||
mov $n0, 32(%rsp) | |||
mov %rax, 40(%rsp) # save original %rsp | |||
.cfi_cfa_expression %rsp+40,deref,+8 | |||
.Lfrom_body: | |||
mov $num,%r11 | |||
lea 48(%rsp),%rax | |||
@@ -2150,7 +2213,6 @@ $code.=<<___ if ($addx); | |||
pxor %xmm0,%xmm0 | |||
lea 48(%rsp),%rax | |||
mov 40(%rsp),%rsi # restore %rsp | |||
jmp .Lfrom_mont_zero | |||
.align 32 | |||
@@ -2162,11 +2224,12 @@ $code.=<<___; | |||
pxor %xmm0,%xmm0 | |||
lea 48(%rsp),%rax | |||
mov 40(%rsp),%rsi # restore %rsp | |||
jmp .Lfrom_mont_zero | |||
.align 32 | |||
.Lfrom_mont_zero: | |||
mov 40(%rsp),%rsi # restore %rsp | |||
.cfi_def_cfa %rsi,8 | |||
movdqa %xmm0,16*0(%rax) | |||
movdqa %xmm0,16*1(%rax) | |||
movdqa %xmm0,16*2(%rax) | |||
@@ -2177,14 +2240,22 @@ $code.=<<___; | |||
mov \$1,%rax | |||
mov -48(%rsi),%r15 | |||
.cfi_restore %r15 | |||
mov -40(%rsi),%r14 | |||
.cfi_restore %r14 | |||
mov -32(%rsi),%r13 | |||
.cfi_restore %r13 | |||
mov -24(%rsi),%r12 | |||
.cfi_restore %r12 | |||
mov -16(%rsi),%rbp | |||
.cfi_restore %rbp | |||
mov -8(%rsi),%rbx | |||
.cfi_restore %rbx | |||
lea (%rsi),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lfrom_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size bn_from_mont8x,.-bn_from_mont8x | |||
___ | |||
} | |||
@@ -2197,14 +2268,22 @@ $code.=<<___; | |||
.type bn_mulx4x_mont_gather5,\@function,6 | |||
.align 32 | |||
bn_mulx4x_mont_gather5: | |||
.cfi_startproc | |||
mov %rsp,%rax | |||
.cfi_def_cfa_register %rax | |||
.Lmulx4x_enter: | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
.Lmulx4x_prologue: | |||
shl \$3,${num}d # convert $num to bytes | |||
@@ -2270,21 +2349,31 @@ bn_mulx4x_mont_gather5: | |||
# | |||
mov $n0, 32(%rsp) # save *n0 | |||
mov %rax,40(%rsp) # save original %rsp | |||
.cfi_cfa_expression %rsp+40,deref,+8 | |||
.Lmulx4x_body: | |||
call mulx4x_internal | |||
mov 40(%rsp),%rsi # restore %rsp | |||
.cfi_def_cfa %rsi,8 | |||
mov \$1,%rax | |||
mov -48(%rsi),%r15 | |||
.cfi_restore %r15 | |||
mov -40(%rsi),%r14 | |||
.cfi_restore %r14 | |||
mov -32(%rsi),%r13 | |||
.cfi_restore %r13 | |||
mov -24(%rsi),%r12 | |||
.cfi_restore %r12 | |||
mov -16(%rsi),%rbp | |||
.cfi_restore %rbp | |||
mov -8(%rsi),%rbx | |||
.cfi_restore %rbx | |||
lea (%rsi),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lmulx4x_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 | |||
.type mulx4x_internal,\@abi-omnipotent | |||
@@ -2662,14 +2751,22 @@ $code.=<<___; | |||
.type bn_powerx5,\@function,6 | |||
.align 32 | |||
bn_powerx5: | |||
.cfi_startproc | |||
mov %rsp,%rax | |||
.cfi_def_cfa_register %rax | |||
.Lpowerx5_enter: | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
.Lpowerx5_prologue: | |||
shl \$3,${num}d # convert $num to bytes | |||
@@ -2741,6 +2838,7 @@ bn_powerx5: | |||
movq $bptr,%xmm4 | |||
mov $n0, 32(%rsp) | |||
mov %rax, 40(%rsp) # save original %rsp | |||
.cfi_cfa_expression %rsp+40,deref,+8 | |||
.Lpowerx5_body: | |||
call __bn_sqrx8x_internal | |||
@@ -2763,17 +2861,26 @@ bn_powerx5: | |||
call mulx4x_internal | |||
mov 40(%rsp),%rsi # restore %rsp | |||
.cfi_def_cfa %rsi,8 | |||
mov \$1,%rax | |||
mov -48(%rsi),%r15 | |||
.cfi_restore %r15 | |||
mov -40(%rsi),%r14 | |||
.cfi_restore %r14 | |||
mov -32(%rsi),%r13 | |||
.cfi_restore %r13 | |||
mov -24(%rsi),%r12 | |||
.cfi_restore %r12 | |||
mov -16(%rsi),%rbp | |||
.cfi_restore %rbp | |||
mov -8(%rsi),%rbx | |||
.cfi_restore %rbx | |||
lea (%rsi),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lpowerx5_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size bn_powerx5,.-bn_powerx5 | |||
.globl bn_sqrx8x_internal | |||