From 628f518cdc46726b3159aaa7ea4f6b48f6890f7f Mon Sep 17 00:00:00 2001 From: Adam Langley Date: Thu, 9 Feb 2017 14:25:02 -0800 Subject: [PATCH] bn/asm/x86_64*: add DWARF CFI directives. (Imports upstream's 76e624a003db22db2d99ece04a15e20fe44c1fbe.) Also includes the following fixes: https://github.com/openssl/openssl/pull/2582 https://github.com/openssl/openssl/pull/2655 Change-Id: I6086a87a534d152cdbff104c62ad9dcd9b4e012a Reviewed-on: https://boringssl-review.googlesource.com/13783 Reviewed-by: David Benjamin Commit-Queue: David Benjamin CQ-Verified: CQ bot account: commit-bot@chromium.org --- crypto/bn/asm/rsaz-avx2.pl | 40 ++++++++++++ crypto/bn/asm/x86_64-mont.pl | 72 ++++++++++++++++++++++ crypto/bn/asm/x86_64-mont5.pl | 111 +++++++++++++++++++++++++++++++++- 3 files changed, 221 insertions(+), 2 deletions(-) diff --git a/crypto/bn/asm/rsaz-avx2.pl b/crypto/bn/asm/rsaz-avx2.pl index 08d2d6cb..5562d691 100755 --- a/crypto/bn/asm/rsaz-avx2.pl +++ b/crypto/bn/asm/rsaz-avx2.pl @@ -145,13 +145,21 @@ $code.=<<___; .type rsaz_1024_sqr_avx2,\@function,5 .align 64 rsaz_1024_sqr_avx2: # 702 cycles, 14% faster than rsaz_1024_mul_avx2 +.cfi_startproc lea (%rsp), %rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 vzeroupper ___ $code.=<<___ if ($win64); @@ -170,6 +178,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %rax,%rbp +.cfi_def_cfa_register %rbp mov %rdx, $np # reassigned argument sub \$$FrameSize, %rsp mov $np, $tmp @@ -802,6 +811,7 @@ $code.=<<___; vzeroall mov %rbp, %rax +.cfi_def_cfa_register %rax ___ $code.=<<___ if ($win64); .Lsqr_1024_in_tail: @@ -818,14 +828,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lsqr_1024_epilogue: ret +.cfi_endproc .size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2 ___ } @@ -878,13 +896,21 @@ $code.=<<___; .type rsaz_1024_mul_avx2,\@function,5 .align 64 rsaz_1024_mul_avx2: +.cfi_startproc lea (%rsp), %rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 ___ $code.=<<___ if ($win64); vzeroupper @@ -903,6 +929,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %rax,%rbp +.cfi_def_cfa_register %rbp vzeroall mov %rdx, $bp # reassigned argument sub \$64,%rsp @@ -1436,6 +1463,7 @@ $code.=<<___; vzeroupper mov %rbp, %rax +.cfi_def_cfa_register %rax ___ $code.=<<___ if ($win64); .Lmul_1024_in_tail: @@ -1452,14 +1480,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lmul_1024_epilogue: ret +.cfi_endproc .size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2 ___ } @@ -1578,8 +1614,10 @@ rsaz_1024_scatter5_avx2: .type rsaz_1024_gather5_avx2,\@abi-omnipotent .align 32 rsaz_1024_gather5_avx2: +.cfi_startproc vzeroupper mov %rsp,%r11 +.cfi_def_cfa_register %r11 ___ $code.=<<___ if ($win64); lea -0x88(%rsp),%rax @@ -1720,7 +1758,9 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea (%r11),%rsp +.cfi_def_cfa_register %rsp ret +.cfi_endproc .LSEH_end_rsaz_1024_gather5: .size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2 ___ diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl index 6b7c35e7..5775f658 100755 --- a/crypto/bn/asm/x86_64-mont.pl +++ b/crypto/bn/asm/x86_64-mont.pl @@ -84,8 +84,10 @@ $code=<<___; .type bn_mul_mont,\@function,6 .align 16 bn_mul_mont: +.cfi_startproc mov ${num}d,${num}d mov %rsp,%rax +.cfi_def_cfa_register %rax test \$3,${num}d jnz .Lmul_enter cmp \$8,${num}d @@ -104,11 +106,17 @@ $code.=<<___; .align 16 .Lmul_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 neg $num mov %rsp,%r11 @@ -141,6 +149,7 @@ $code.=<<___; .Lmul_page_walk_done: mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp +.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8 .Lmul_body: mov $bp,%r12 # reassign $bp ___ @@ -311,16 +320,25 @@ $code.=<<___; jnz .Lcopy mov 8(%rsp,$num,8),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmul_epilogue: ret +.cfi_endproc .size bn_mul_mont,.-bn_mul_mont ___ {{{ @@ -330,8 +348,10 @@ $code.=<<___; .type bn_mul4x_mont,\@function,6 .align 16 bn_mul4x_mont: +.cfi_startproc mov ${num}d,${num}d mov %rsp,%rax +.cfi_def_cfa_register %rax .Lmul4x_enter: ___ $code.=<<___ if ($addx); @@ -341,11 +361,17 @@ $code.=<<___ if ($addx); ___ $code.=<<___; push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 neg $num mov %rsp,%r11 @@ -369,6 +395,7 @@ $code.=<<___; .Lmul4x_page_walk_done: mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp +.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8 .Lmul4x_body: mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp mov %rdx,%r12 # reassign $bp @@ -747,16 +774,25 @@ ___ } $code.=<<___; mov 8(%rsp,$num,8),%rsi # restore %rsp +.cfi_def_cfa %rsi, 8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmul4x_epilogue: ret +.cfi_endproc .size bn_mul4x_mont,.-bn_mul4x_mont ___ }}} @@ -784,14 +820,22 @@ $code.=<<___; .type bn_sqr8x_mont,\@function,6 .align 32 bn_sqr8x_mont: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax .Lsqr8x_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lsqr8x_prologue: mov ${num}d,%r10d @@ -847,6 +891,7 @@ bn_sqr8x_mont: mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lsqr8x_body: movq $nptr, %xmm2 # save pointer to modulus @@ -916,6 +961,7 @@ $code.=<<___; pxor %xmm0,%xmm0 pshufd \$0,%xmm1,%xmm1 mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 jmp .Lsqr8x_cond_copy .align 32 @@ -945,14 +991,22 @@ $code.=<<___; mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lsqr8x_epilogue: ret +.cfi_endproc .size bn_sqr8x_mont,.-bn_sqr8x_mont ___ }}} @@ -964,14 +1018,22 @@ $code.=<<___; .type bn_mulx4x_mont,\@function,6 .align 32 bn_mulx4x_mont: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax .Lmulx4x_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lmulx4x_prologue: shl \$3,${num}d # convert $num to bytes @@ -1017,6 +1079,7 @@ bn_mulx4x_mont: mov $n0, 24(%rsp) # save *n0 mov $rp, 32(%rsp) # save $rp mov %rax,40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 mov $num,48(%rsp) # inner counter jmp .Lmulx4x_body @@ -1266,6 +1329,7 @@ $code.=<<___; pxor %xmm0,%xmm0 pshufd \$0,%xmm1,%xmm1 mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 jmp .Lmulx4x_cond_copy .align 32 @@ -1295,14 +1359,22 @@ $code.=<<___; mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmulx4x_epilogue: ret +.cfi_endproc .size bn_mulx4x_mont,.-bn_mulx4x_mont ___ }}} diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl index e089d8ec..bf68aadd 100755 --- a/crypto/bn/asm/x86_64-mont5.pl +++ b/crypto/bn/asm/x86_64-mont5.pl @@ -73,8 +73,10 @@ $code=<<___; .type bn_mul_mont_gather5,\@function,6 .align 64 bn_mul_mont_gather5: +.cfi_startproc mov ${num}d,${num}d mov %rsp,%rax +.cfi_def_cfa_register %rax test \$7,${num}d jnz .Lmul_enter ___ @@ -88,11 +90,17 @@ $code.=<<___; .Lmul_enter: movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 neg $num mov %rsp,%r11 @@ -125,6 +133,7 @@ $code.=<<___; lea .Linc(%rip),%r10 mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp +.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8 .Lmul_body: lea 128($bp),%r12 # reassign $bp (+size optimization) @@ -411,17 +420,26 @@ $code.=<<___; jnz .Lcopy mov 8(%rsp,$num,8),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmul_epilogue: ret +.cfi_endproc .size bn_mul_mont_gather5,.-bn_mul_mont_gather5 ___ {{{ @@ -431,8 +449,10 @@ $code.=<<___; .type bn_mul4x_mont_gather5,\@function,6 .align 32 bn_mul4x_mont_gather5: +.cfi_startproc .byte 0x67 mov %rsp,%rax +.cfi_def_cfa_register %rax .Lmul4x_enter: ___ $code.=<<___ if ($addx); @@ -442,11 +462,17 @@ $code.=<<___ if ($addx); ___ $code.=<<___; push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lmul4x_prologue: .byte 0x67 @@ -502,22 +528,32 @@ $code.=<<___; neg $num mov %rax,40(%rsp) +.cfi_cfa_expression %rsp+40,deref,+8 .Lmul4x_body: call mul4x_internal mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmul4x_epilogue: ret +.cfi_endproc .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5 .type mul4x_internal,\@abi-omnipotent @@ -1041,7 +1077,9 @@ $code.=<<___; .type bn_power5,\@function,6 .align 32 bn_power5: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax ___ $code.=<<___ if ($addx); mov OPENSSL_ia32cap_P+8(%rip),%r11d @@ -1051,11 +1089,17 @@ $code.=<<___ if ($addx); ___ $code.=<<___; push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lpower5_prologue: shl \$3,${num}d # convert $num to bytes @@ -1120,6 +1164,7 @@ $code.=<<___; # mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lpower5_body: movq $rptr,%xmm1 # save $rptr, used in sqr8x movq $nptr,%xmm2 # save $nptr @@ -1146,16 +1191,25 @@ $code.=<<___; call mul4x_internal mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lpower5_epilogue: ret +.cfi_endproc .size bn_power5,.-bn_power5 .globl bn_sqr8x_internal @@ -2035,14 +2089,22 @@ bn_from_montgomery: .type bn_from_mont8x,\@function,6 .align 32 bn_from_mont8x: +.cfi_startproc .byte 0x67 mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lfrom_prologue: shl \$3,${num}d # convert $num to bytes @@ -2107,6 +2169,7 @@ bn_from_mont8x: # mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lfrom_body: mov $num,%r11 lea 48(%rsp),%rax @@ -2150,7 +2213,6 @@ $code.=<<___ if ($addx); pxor %xmm0,%xmm0 lea 48(%rsp),%rax - mov 40(%rsp),%rsi # restore %rsp jmp .Lfrom_mont_zero .align 32 @@ -2162,11 +2224,12 @@ $code.=<<___; pxor %xmm0,%xmm0 lea 48(%rsp),%rax - mov 40(%rsp),%rsi # restore %rsp jmp .Lfrom_mont_zero .align 32 .Lfrom_mont_zero: + mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 movdqa %xmm0,16*0(%rax) movdqa %xmm0,16*1(%rax) movdqa %xmm0,16*2(%rax) @@ -2177,14 +2240,22 @@ $code.=<<___; mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lfrom_epilogue: ret +.cfi_endproc .size bn_from_mont8x,.-bn_from_mont8x ___ } @@ -2197,14 +2268,22 @@ $code.=<<___; .type bn_mulx4x_mont_gather5,\@function,6 .align 32 bn_mulx4x_mont_gather5: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax .Lmulx4x_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lmulx4x_prologue: shl \$3,${num}d # convert $num to bytes @@ -2270,21 +2349,31 @@ bn_mulx4x_mont_gather5: # mov $n0, 32(%rsp) # save *n0 mov %rax,40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lmulx4x_body: call mulx4x_internal mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lmulx4x_epilogue: ret +.cfi_endproc .size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5 .type mulx4x_internal,\@abi-omnipotent @@ -2662,14 +2751,22 @@ $code.=<<___; .type bn_powerx5,\@function,6 .align 32 bn_powerx5: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax .Lpowerx5_enter: push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lpowerx5_prologue: shl \$3,${num}d # convert $num to bytes @@ -2741,6 +2838,7 @@ bn_powerx5: movq $bptr,%xmm4 mov $n0, 32(%rsp) mov %rax, 40(%rsp) # save original %rsp +.cfi_cfa_expression %rsp+40,deref,+8 .Lpowerx5_body: call __bn_sqrx8x_internal @@ -2763,17 +2861,26 @@ bn_powerx5: call mulx4x_internal mov 40(%rsp),%rsi # restore %rsp +.cfi_def_cfa %rsi,8 mov \$1,%rax mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lpowerx5_epilogue: ret +.cfi_endproc .size bn_powerx5,.-bn_powerx5 .globl bn_sqrx8x_internal