Selaa lähdekoodia

bn/asm/x86_64*: add DWARF CFI directives.

(Imports upstream's 76e624a003db22db2d99ece04a15e20fe44c1fbe.)

Also includes the following fixes:
https://github.com/openssl/openssl/pull/2582
https://github.com/openssl/openssl/pull/2655

Change-Id: I6086a87a534d152cdbff104c62ad9dcd9b4e012a
Reviewed-on: https://boringssl-review.googlesource.com/13783
Reviewed-by: David Benjamin <davidben@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
kris/onging/CECPQ3_patch15
Adam Langley 7 vuotta sitten
committed by CQ bot account: commit-bot@chromium.org
vanhempi
commit
628f518cdc
3 muutettua tiedostoa jossa 221 lisäystä ja 2 poistoa
  1. +40
    -0
      crypto/bn/asm/rsaz-avx2.pl
  2. +72
    -0
      crypto/bn/asm/x86_64-mont.pl
  3. +109
    -2
      crypto/bn/asm/x86_64-mont5.pl

+ 40
- 0
crypto/bn/asm/rsaz-avx2.pl Näytä tiedosto

@@ -145,13 +145,21 @@ $code.=<<___;
.type rsaz_1024_sqr_avx2,\@function,5
.align 64
rsaz_1024_sqr_avx2: # 702 cycles, 14% faster than rsaz_1024_mul_avx2
.cfi_startproc
lea (%rsp), %rax
.cfi_def_cfa_register %rax
push %rbx
.cfi_push %rbx
push %rbp
.cfi_push %rbp
push %r12
.cfi_push %r12
push %r13
.cfi_push %r13
push %r14
.cfi_push %r14
push %r15
.cfi_push %r15
vzeroupper
___
$code.=<<___ if ($win64);
@@ -170,6 +178,7 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov %rax,%rbp
.cfi_def_cfa_register %rbp
mov %rdx, $np # reassigned argument
sub \$$FrameSize, %rsp
mov $np, $tmp
@@ -802,6 +811,7 @@ $code.=<<___;

vzeroall
mov %rbp, %rax
.cfi_def_cfa_register %rax
___
$code.=<<___ if ($win64);
.Lsqr_1024_in_tail:
@@ -818,14 +828,22 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov -48(%rax),%r15
.cfi_restore %r15
mov -40(%rax),%r14
.cfi_restore %r14
mov -32(%rax),%r13
.cfi_restore %r13
mov -24(%rax),%r12
.cfi_restore %r12
mov -16(%rax),%rbp
.cfi_restore %rbp
mov -8(%rax),%rbx
.cfi_restore %rbx
lea (%rax),%rsp # restore %rsp
.cfi_def_cfa_register %rsp
.Lsqr_1024_epilogue:
ret
.cfi_endproc
.size rsaz_1024_sqr_avx2,.-rsaz_1024_sqr_avx2
___
}
@@ -878,13 +896,21 @@ $code.=<<___;
.type rsaz_1024_mul_avx2,\@function,5
.align 64
rsaz_1024_mul_avx2:
.cfi_startproc
lea (%rsp), %rax
.cfi_def_cfa_register %rax
push %rbx
.cfi_push %rbx
push %rbp
.cfi_push %rbp
push %r12
.cfi_push %r12
push %r13
.cfi_push %r13
push %r14
.cfi_push %r14
push %r15
.cfi_push %r15
___
$code.=<<___ if ($win64);
vzeroupper
@@ -903,6 +929,7 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov %rax,%rbp
.cfi_def_cfa_register %rbp
vzeroall
mov %rdx, $bp # reassigned argument
sub \$64,%rsp
@@ -1436,6 +1463,7 @@ $code.=<<___;
vzeroupper

mov %rbp, %rax
.cfi_def_cfa_register %rax
___
$code.=<<___ if ($win64);
.Lmul_1024_in_tail:
@@ -1452,14 +1480,22 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov -48(%rax),%r15
.cfi_restore %r15
mov -40(%rax),%r14
.cfi_restore %r14
mov -32(%rax),%r13
.cfi_restore %r13
mov -24(%rax),%r12
.cfi_restore %r12
mov -16(%rax),%rbp
.cfi_restore %rbp
mov -8(%rax),%rbx
.cfi_restore %rbx
lea (%rax),%rsp # restore %rsp
.cfi_def_cfa_register %rsp
.Lmul_1024_epilogue:
ret
.cfi_endproc
.size rsaz_1024_mul_avx2,.-rsaz_1024_mul_avx2
___
}
@@ -1578,8 +1614,10 @@ rsaz_1024_scatter5_avx2:
.type rsaz_1024_gather5_avx2,\@abi-omnipotent
.align 32
rsaz_1024_gather5_avx2:
.cfi_startproc
vzeroupper
mov %rsp,%r11
.cfi_def_cfa_register %r11
___
$code.=<<___ if ($win64);
lea -0x88(%rsp),%rax
@@ -1720,7 +1758,9 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
lea (%r11),%rsp
.cfi_def_cfa_register %rsp
ret
.cfi_endproc
.LSEH_end_rsaz_1024_gather5:
.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
___


+ 72
- 0
crypto/bn/asm/x86_64-mont.pl Näytä tiedosto

@@ -84,8 +84,10 @@ $code=<<___;
.type bn_mul_mont,\@function,6
.align 16
bn_mul_mont:
.cfi_startproc
mov ${num}d,${num}d
mov %rsp,%rax
.cfi_def_cfa_register %rax
test \$3,${num}d
jnz .Lmul_enter
cmp \$8,${num}d
@@ -104,11 +106,17 @@ $code.=<<___;
.align 16
.Lmul_enter:
push %rbx
.cfi_push %rbx
push %rbp
.cfi_push %rbp
push %r12
.cfi_push %r12
push %r13
.cfi_push %r13
push %r14
.cfi_push %r14
push %r15
.cfi_push %r15

neg $num
mov %rsp,%r11
@@ -141,6 +149,7 @@ $code.=<<___;
.Lmul_page_walk_done:

mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8
.Lmul_body:
mov $bp,%r12 # reassign $bp
___
@@ -311,16 +320,25 @@ $code.=<<___;
jnz .Lcopy

mov 8(%rsp,$num,8),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax
mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul_epilogue:
ret
.cfi_endproc
.size bn_mul_mont,.-bn_mul_mont
___
{{{
@@ -330,8 +348,10 @@ $code.=<<___;
.type bn_mul4x_mont,\@function,6
.align 16
bn_mul4x_mont:
.cfi_startproc
mov ${num}d,${num}d
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter:
___
$code.=<<___ if ($addx);
@@ -341,11 +361,17 @@ $code.=<<___ if ($addx);
___
$code.=<<___;
push %rbx
.cfi_push %rbx
push %rbp
.cfi_push %rbp
push %r12
.cfi_push %r12
push %r13
.cfi_push %r13
push %r14
.cfi_push %r14
push %r15
.cfi_push %r15

neg $num
mov %rsp,%r11
@@ -369,6 +395,7 @@ $code.=<<___;
.Lmul4x_page_walk_done:

mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8
.Lmul4x_body:
mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp
mov %rdx,%r12 # reassign $bp
@@ -747,16 +774,25 @@ ___
}
$code.=<<___;
mov 8(%rsp,$num,8),%rsi # restore %rsp
.cfi_def_cfa %rsi, 8
mov \$1,%rax
mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul4x_epilogue:
ret
.cfi_endproc
.size bn_mul4x_mont,.-bn_mul4x_mont
___
}}}
@@ -784,14 +820,22 @@ $code.=<<___;
.type bn_sqr8x_mont,\@function,6
.align 32
bn_sqr8x_mont:
.cfi_startproc
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lsqr8x_enter:
push %rbx
.cfi_push %rbx
push %rbp
.cfi_push %rbp
push %r12
.cfi_push %r12
push %r13
.cfi_push %r13
push %r14
.cfi_push %r14
push %r15
.cfi_push %r15
.Lsqr8x_prologue:

mov ${num}d,%r10d
@@ -847,6 +891,7 @@ bn_sqr8x_mont:

mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
.Lsqr8x_body:

movq $nptr, %xmm2 # save pointer to modulus
@@ -916,6 +961,7 @@ $code.=<<___;
pxor %xmm0,%xmm0
pshufd \$0,%xmm1,%xmm1
mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
jmp .Lsqr8x_cond_copy

.align 32
@@ -945,14 +991,22 @@ $code.=<<___;

mov \$1,%rax
mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lsqr8x_epilogue:
ret
.cfi_endproc
.size bn_sqr8x_mont,.-bn_sqr8x_mont
___
}}}
@@ -964,14 +1018,22 @@ $code.=<<___;
.type bn_mulx4x_mont,\@function,6
.align 32
bn_mulx4x_mont:
.cfi_startproc
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmulx4x_enter:
push %rbx
.cfi_push %rbx
push %rbp
.cfi_push %rbp
push %r12
.cfi_push %r12
push %r13
.cfi_push %r13
push %r14
.cfi_push %r14
push %r15
.cfi_push %r15
.Lmulx4x_prologue:

shl \$3,${num}d # convert $num to bytes
@@ -1017,6 +1079,7 @@ bn_mulx4x_mont:
mov $n0, 24(%rsp) # save *n0
mov $rp, 32(%rsp) # save $rp
mov %rax,40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
mov $num,48(%rsp) # inner counter
jmp .Lmulx4x_body

@@ -1266,6 +1329,7 @@ $code.=<<___;
pxor %xmm0,%xmm0
pshufd \$0,%xmm1,%xmm1
mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
jmp .Lmulx4x_cond_copy

.align 32
@@ -1295,14 +1359,22 @@ $code.=<<___;

mov \$1,%rax
mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmulx4x_epilogue:
ret
.cfi_endproc
.size bn_mulx4x_mont,.-bn_mulx4x_mont
___
}}}


+ 109
- 2
crypto/bn/asm/x86_64-mont5.pl Näytä tiedosto

@@ -73,8 +73,10 @@ $code=<<___;
.type bn_mul_mont_gather5,\@function,6
.align 64
bn_mul_mont_gather5:
.cfi_startproc
mov ${num}d,${num}d
mov %rsp,%rax
.cfi_def_cfa_register %rax
test \$7,${num}d
jnz .Lmul_enter
___
@@ -88,11 +90,17 @@ $code.=<<___;
.Lmul_enter:
movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument
push %rbx
.cfi_push %rbx
push %rbp
.cfi_push %rbp
push %r12
.cfi_push %r12
push %r13
.cfi_push %r13
push %r14
.cfi_push %r14
push %r15
.cfi_push %r15

neg $num
mov %rsp,%r11
@@ -125,6 +133,7 @@ $code.=<<___;

lea .Linc(%rip),%r10
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
.cfi_cfa_expression %rsp+8,$num,8,mul,plus,deref,+8
.Lmul_body:

lea 128($bp),%r12 # reassign $bp (+size optimization)
@@ -411,17 +420,26 @@ $code.=<<___;
jnz .Lcopy

mov 8(%rsp,$num,8),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax

mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul_epilogue:
ret
.cfi_endproc
.size bn_mul_mont_gather5,.-bn_mul_mont_gather5
___
{{{
@@ -431,8 +449,10 @@ $code.=<<___;
.type bn_mul4x_mont_gather5,\@function,6
.align 32
bn_mul4x_mont_gather5:
.cfi_startproc
.byte 0x67
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmul4x_enter:
___
$code.=<<___ if ($addx);
@@ -442,11 +462,17 @@ $code.=<<___ if ($addx);
___
$code.=<<___;
push %rbx
.cfi_push %rbx
push %rbp
.cfi_push %rbp
push %r12
.cfi_push %r12
push %r13
.cfi_push %r13
push %r14
.cfi_push %r14
push %r15
.cfi_push %r15
.Lmul4x_prologue:

.byte 0x67
@@ -502,22 +528,32 @@ $code.=<<___;
neg $num

mov %rax,40(%rsp)
.cfi_cfa_expression %rsp+40,deref,+8
.Lmul4x_body:

call mul4x_internal

mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax

mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmul4x_epilogue:
ret
.cfi_endproc
.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5

.type mul4x_internal,\@abi-omnipotent
@@ -1041,7 +1077,9 @@ $code.=<<___;
.type bn_power5,\@function,6
.align 32
bn_power5:
.cfi_startproc
mov %rsp,%rax
.cfi_def_cfa_register %rax
___
$code.=<<___ if ($addx);
mov OPENSSL_ia32cap_P+8(%rip),%r11d
@@ -1051,11 +1089,17 @@ $code.=<<___ if ($addx);
___
$code.=<<___;
push %rbx
.cfi_push %rbx
push %rbp
.cfi_push %rbp
push %r12
.cfi_push %r12
push %r13
.cfi_push %r13
push %r14
.cfi_push %r14
push %r15
.cfi_push %r15
.Lpower5_prologue:

shl \$3,${num}d # convert $num to bytes
@@ -1120,6 +1164,7 @@ $code.=<<___;
#
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
.Lpower5_body:
movq $rptr,%xmm1 # save $rptr, used in sqr8x
movq $nptr,%xmm2 # save $nptr
@@ -1146,16 +1191,25 @@ $code.=<<___;
call mul4x_internal

mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax
mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lpower5_epilogue:
ret
.cfi_endproc
.size bn_power5,.-bn_power5

.globl bn_sqr8x_internal
@@ -2035,14 +2089,22 @@ bn_from_montgomery:
.type bn_from_mont8x,\@function,6
.align 32
bn_from_mont8x:
.cfi_startproc
.byte 0x67
mov %rsp,%rax
.cfi_def_cfa_register %rax
push %rbx
.cfi_push %rbx
push %rbp
.cfi_push %rbp
push %r12
.cfi_push %r12
push %r13
.cfi_push %r13
push %r14
.cfi_push %r14
push %r15
.cfi_push %r15
.Lfrom_prologue:

shl \$3,${num}d # convert $num to bytes
@@ -2107,6 +2169,7 @@ bn_from_mont8x:
#
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
.Lfrom_body:
mov $num,%r11
lea 48(%rsp),%rax
@@ -2150,7 +2213,6 @@ $code.=<<___ if ($addx);

pxor %xmm0,%xmm0
lea 48(%rsp),%rax
mov 40(%rsp),%rsi # restore %rsp
jmp .Lfrom_mont_zero

.align 32
@@ -2162,11 +2224,12 @@ $code.=<<___;

pxor %xmm0,%xmm0
lea 48(%rsp),%rax
mov 40(%rsp),%rsi # restore %rsp
jmp .Lfrom_mont_zero

.align 32
.Lfrom_mont_zero:
mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
movdqa %xmm0,16*0(%rax)
movdqa %xmm0,16*1(%rax)
movdqa %xmm0,16*2(%rax)
@@ -2177,14 +2240,22 @@ $code.=<<___;

mov \$1,%rax
mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lfrom_epilogue:
ret
.cfi_endproc
.size bn_from_mont8x,.-bn_from_mont8x
___
}
@@ -2197,14 +2268,22 @@ $code.=<<___;
.type bn_mulx4x_mont_gather5,\@function,6
.align 32
bn_mulx4x_mont_gather5:
.cfi_startproc
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lmulx4x_enter:
push %rbx
.cfi_push %rbx
push %rbp
.cfi_push %rbp
push %r12
.cfi_push %r12
push %r13
.cfi_push %r13
push %r14
.cfi_push %r14
push %r15
.cfi_push %r15
.Lmulx4x_prologue:

shl \$3,${num}d # convert $num to bytes
@@ -2270,21 +2349,31 @@ bn_mulx4x_mont_gather5:
#
mov $n0, 32(%rsp) # save *n0
mov %rax,40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
.Lmulx4x_body:
call mulx4x_internal

mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax

mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lmulx4x_epilogue:
ret
.cfi_endproc
.size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5

.type mulx4x_internal,\@abi-omnipotent
@@ -2662,14 +2751,22 @@ $code.=<<___;
.type bn_powerx5,\@function,6
.align 32
bn_powerx5:
.cfi_startproc
mov %rsp,%rax
.cfi_def_cfa_register %rax
.Lpowerx5_enter:
push %rbx
.cfi_push %rbx
push %rbp
.cfi_push %rbp
push %r12
.cfi_push %r12
push %r13
.cfi_push %r13
push %r14
.cfi_push %r14
push %r15
.cfi_push %r15
.Lpowerx5_prologue:

shl \$3,${num}d # convert $num to bytes
@@ -2741,6 +2838,7 @@ bn_powerx5:
movq $bptr,%xmm4
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
.cfi_cfa_expression %rsp+40,deref,+8
.Lpowerx5_body:

call __bn_sqrx8x_internal
@@ -2763,17 +2861,26 @@ bn_powerx5:
call mulx4x_internal

mov 40(%rsp),%rsi # restore %rsp
.cfi_def_cfa %rsi,8
mov \$1,%rax

mov -48(%rsi),%r15
.cfi_restore %r15
mov -40(%rsi),%r14
.cfi_restore %r14
mov -32(%rsi),%r13
.cfi_restore %r13
mov -24(%rsi),%r12
.cfi_restore %r12
mov -16(%rsi),%rbp
.cfi_restore %rbp
mov -8(%rsi),%rbx
.cfi_restore %rbx
lea (%rsi),%rsp
.cfi_def_cfa_register %rsp
.Lpowerx5_epilogue:
ret
.cfi_endproc
.size bn_powerx5,.-bn_powerx5

.globl bn_sqrx8x_internal


Ladataan…
Peruuta
Tallenna