Add CFI information to the x86-64 X25519 asm.
This change serves to check that all our consumers can process assembly with CFI directives in it. For the first change I picked a file that's not perlasm to keep things slightly simplier, but that might have been a mistake: DJB's tooling always aligns the stack to 32 bytes and it's not possible to express this in DWARF format (without using a register to store the old stack pointer). Since none of the functions here appear to care about that alignment, I removed it from each of them. I also trimmed the set of saved registers where possible and used the redzone for functions that didn't need much stack. Overall, this appears to have slightly improved the performance (by about 0.7%): Before: Did 46000 Curve25519 base-point multiplication operations in 3023288us (15215.2 ops/sec) Did 46000 Curve25519 arbitrary point multiplication operations in 3017315us (15245.3 ops/sec) Did 46000 Curve25519 base-point multiplication operations in 3015346us (15255.3 ops/sec) Did 46000 Curve25519 arbitrary point multiplication operations in 3018609us (15238.8 ops/sec) Did 46000 Curve25519 base-point multiplication operations in 3019004us (15236.8 ops/sec) Did 46000 Curve25519 arbitrary point multiplication operations in 3013135us (15266.5 ops/sec) After: Did 46000 Curve25519 base-point multiplication operations in 3007659us (15294.3 ops/sec) Did 47000 Curve25519 arbitrary point multiplication operations in 3054202us (15388.6 ops/sec) Did 46000 Curve25519 base-point multiplication operations in 3008714us (15288.9 ops/sec) Did 46000 Curve25519 arbitrary point multiplication operations in 3004740us (15309.1 ops/sec) Did 46000 Curve25519 base-point multiplication operations in 3009140us (15286.8 ops/sec) Did 47000 Curve25519 arbitrary point multiplication operations in 3057518us (15371.9 ops/sec) Change-Id: I31df11c45b2ea0bf44dde861d52c27f848331691 Reviewed-on: https://boringssl-review.googlesource.com/13200 CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org> Reviewed-by: David Benjamin <davidben@google.com> Reviewed-by: Adam Langley <agl@google.com> Commit-Queue: Adam Langley <agl@google.com>
This commit is contained in:
parent
8c2480f740
commit
3f38d80b2f
@ -60,17 +60,10 @@ x25519_x86_64__38: .quad 38
|
|||||||
.globl C_ABI(x25519_x86_64_freeze)
|
.globl C_ABI(x25519_x86_64_freeze)
|
||||||
HIDDEN C_ABI(x25519_x86_64_freeze)
|
HIDDEN C_ABI(x25519_x86_64_freeze)
|
||||||
C_ABI(x25519_x86_64_freeze):
|
C_ABI(x25519_x86_64_freeze):
|
||||||
mov %rsp,%r11
|
.cfi_startproc
|
||||||
and $31,%r11
|
/* This is a leaf function and uses the redzone for saving registers. */
|
||||||
add $64,%r11
|
movq %r12,-8(%rsp)
|
||||||
sub %r11,%rsp
|
.cfi_rel_offset r12, -8
|
||||||
movq %r11,0(%rsp)
|
|
||||||
movq %r12,8(%rsp)
|
|
||||||
movq %r13,16(%rsp)
|
|
||||||
movq %r14,24(%rsp)
|
|
||||||
movq %r15,32(%rsp)
|
|
||||||
movq %rbx,40(%rsp)
|
|
||||||
movq %rbp,48(%rsp)
|
|
||||||
movq 0(%rdi),%rsi
|
movq 0(%rdi),%rsi
|
||||||
movq 8(%rdi),%rdx
|
movq 8(%rdi),%rdx
|
||||||
movq 16(%rdi),%rcx
|
movq 16(%rdi),%rcx
|
||||||
@ -128,44 +121,40 @@ movq %rdx,8(%rdi)
|
|||||||
movq %rcx,16(%rdi)
|
movq %rcx,16(%rdi)
|
||||||
movq %r8,24(%rdi)
|
movq %r8,24(%rdi)
|
||||||
movq %r9,32(%rdi)
|
movq %r9,32(%rdi)
|
||||||
movq 0(%rsp),%r11
|
movq -8(%rsp),%r12
|
||||||
movq 8(%rsp),%r12
|
|
||||||
movq 16(%rsp),%r13
|
|
||||||
movq 24(%rsp),%r14
|
|
||||||
movq 32(%rsp),%r15
|
|
||||||
movq 40(%rsp),%rbx
|
|
||||||
movq 48(%rsp),%rbp
|
|
||||||
add %r11,%rsp
|
|
||||||
mov %rdi,%rax
|
|
||||||
mov %rsi,%rdx
|
|
||||||
ret
|
ret
|
||||||
|
.cfi_endproc
|
||||||
|
|
||||||
.p2align 5
|
.p2align 5
|
||||||
.globl C_ABI(x25519_x86_64_mul)
|
.globl C_ABI(x25519_x86_64_mul)
|
||||||
HIDDEN C_ABI(x25519_x86_64_mul)
|
HIDDEN C_ABI(x25519_x86_64_mul)
|
||||||
C_ABI(x25519_x86_64_mul):
|
C_ABI(x25519_x86_64_mul):
|
||||||
mov %rsp,%r11
|
.cfi_startproc
|
||||||
and $31,%r11
|
/* This is a leaf function and uses the redzone for saving registers. */
|
||||||
add $96,%r11
|
movq %r12,-8(%rsp)
|
||||||
sub %r11,%rsp
|
.cfi_rel_offset r12, -8
|
||||||
movq %r11,0(%rsp)
|
movq %r13,-16(%rsp)
|
||||||
movq %r12,8(%rsp)
|
.cfi_rel_offset r13, -16
|
||||||
movq %r13,16(%rsp)
|
movq %r14,-24(%rsp)
|
||||||
movq %r14,24(%rsp)
|
.cfi_rel_offset r14, -24
|
||||||
movq %r15,32(%rsp)
|
movq %r15,-32(%rsp)
|
||||||
movq %rbx,40(%rsp)
|
.cfi_rel_offset r15, -32
|
||||||
movq %rbp,48(%rsp)
|
movq %rbx,-40(%rsp)
|
||||||
movq %rdi,56(%rsp)
|
.cfi_rel_offset rbx, -40
|
||||||
|
movq %rbp,-48(%rsp)
|
||||||
|
.cfi_rel_offset rbp, -48
|
||||||
|
movq %rdi,-56(%rsp)
|
||||||
|
.cfi_rel_offset rdi, -56
|
||||||
mov %rdx,%rcx
|
mov %rdx,%rcx
|
||||||
movq 24(%rsi),%rdx
|
movq 24(%rsi),%rdx
|
||||||
imulq $19,%rdx,%rax
|
imulq $19,%rdx,%rax
|
||||||
movq %rax,64(%rsp)
|
movq %rax,-64(%rsp)
|
||||||
mulq 16(%rcx)
|
mulq 16(%rcx)
|
||||||
mov %rax,%r8
|
mov %rax,%r8
|
||||||
mov %rdx,%r9
|
mov %rdx,%r9
|
||||||
movq 32(%rsi),%rdx
|
movq 32(%rsi),%rdx
|
||||||
imulq $19,%rdx,%rax
|
imulq $19,%rdx,%rax
|
||||||
movq %rax,72(%rsp)
|
movq %rax,-72(%rsp)
|
||||||
mulq 8(%rcx)
|
mulq 8(%rcx)
|
||||||
add %rax,%r8
|
add %rax,%r8
|
||||||
adc %rdx,%r9
|
adc %rdx,%r9
|
||||||
@ -240,11 +229,11 @@ movq 24(%rsi),%rax
|
|||||||
mulq 8(%rcx)
|
mulq 8(%rcx)
|
||||||
add %rax,%rbx
|
add %rax,%rbx
|
||||||
adc %rdx,%rbp
|
adc %rdx,%rbp
|
||||||
movq 64(%rsp),%rax
|
movq -64(%rsp),%rax
|
||||||
mulq 24(%rcx)
|
mulq 24(%rcx)
|
||||||
add %rax,%r10
|
add %rax,%r10
|
||||||
adc %rdx,%r11
|
adc %rdx,%r11
|
||||||
movq 64(%rsp),%rax
|
movq -64(%rsp),%rax
|
||||||
mulq 32(%rcx)
|
mulq 32(%rcx)
|
||||||
add %rax,%r12
|
add %rax,%r12
|
||||||
adc %rdx,%r13
|
adc %rdx,%r13
|
||||||
@ -252,15 +241,15 @@ movq 32(%rsi),%rax
|
|||||||
mulq 0(%rcx)
|
mulq 0(%rcx)
|
||||||
add %rax,%rbx
|
add %rax,%rbx
|
||||||
adc %rdx,%rbp
|
adc %rdx,%rbp
|
||||||
movq 72(%rsp),%rax
|
movq -72(%rsp),%rax
|
||||||
mulq 16(%rcx)
|
mulq 16(%rcx)
|
||||||
add %rax,%r10
|
add %rax,%r10
|
||||||
adc %rdx,%r11
|
adc %rdx,%r11
|
||||||
movq 72(%rsp),%rax
|
movq -72(%rsp),%rax
|
||||||
mulq 24(%rcx)
|
mulq 24(%rcx)
|
||||||
add %rax,%r12
|
add %rax,%r12
|
||||||
adc %rdx,%r13
|
adc %rdx,%r13
|
||||||
movq 72(%rsp),%rax
|
movq -72(%rsp),%rax
|
||||||
mulq 32(%rcx)
|
mulq 32(%rcx)
|
||||||
add %rax,%r14
|
add %rax,%r14
|
||||||
adc %rdx,%r15
|
adc %rdx,%r15
|
||||||
@ -307,33 +296,31 @@ movq %rcx,8(%rdi)
|
|||||||
movq %r9,16(%rdi)
|
movq %r9,16(%rdi)
|
||||||
movq %rax,24(%rdi)
|
movq %rax,24(%rdi)
|
||||||
movq %r10,32(%rdi)
|
movq %r10,32(%rdi)
|
||||||
movq 0(%rsp),%r11
|
movq -8(%rsp),%r12
|
||||||
movq 8(%rsp),%r12
|
movq -16(%rsp),%r13
|
||||||
movq 16(%rsp),%r13
|
movq -24(%rsp),%r14
|
||||||
movq 24(%rsp),%r14
|
movq -32(%rsp),%r15
|
||||||
movq 32(%rsp),%r15
|
movq -40(%rsp),%rbx
|
||||||
movq 40(%rsp),%rbx
|
movq -48(%rsp),%rbp
|
||||||
movq 48(%rsp),%rbp
|
|
||||||
add %r11,%rsp
|
|
||||||
mov %rdi,%rax
|
|
||||||
mov %rsi,%rdx
|
|
||||||
ret
|
ret
|
||||||
|
.cfi_endproc
|
||||||
|
|
||||||
.p2align 5
|
.p2align 5
|
||||||
.globl C_ABI(x25519_x86_64_square)
|
.globl C_ABI(x25519_x86_64_square)
|
||||||
HIDDEN C_ABI(x25519_x86_64_square)
|
HIDDEN C_ABI(x25519_x86_64_square)
|
||||||
C_ABI(x25519_x86_64_square):
|
C_ABI(x25519_x86_64_square):
|
||||||
mov %rsp,%r11
|
.cfi_startproc
|
||||||
and $31,%r11
|
/* This is a leaf function and uses the redzone for saving registers. */
|
||||||
add $64,%r11
|
movq %r12,-8(%rsp)
|
||||||
sub %r11,%rsp
|
.cfi_rel_offset r12, -8
|
||||||
movq %r11,0(%rsp)
|
movq %r13,-16(%rsp)
|
||||||
movq %r12,8(%rsp)
|
.cfi_rel_offset r13, -16
|
||||||
movq %r13,16(%rsp)
|
movq %r14,-24(%rsp)
|
||||||
movq %r14,24(%rsp)
|
.cfi_rel_offset r14, -24
|
||||||
movq %r15,32(%rsp)
|
movq %r15,-32(%rsp)
|
||||||
movq %rbx,40(%rsp)
|
.cfi_rel_offset r15, -32
|
||||||
movq %rbp,48(%rsp)
|
movq %rbx,-40(%rsp)
|
||||||
|
.cfi_rel_offset rbx, -40
|
||||||
movq 0(%rsi),%rax
|
movq 0(%rsi),%rax
|
||||||
mulq 0(%rsi)
|
mulq 0(%rsi)
|
||||||
mov %rax,%rcx
|
mov %rax,%rcx
|
||||||
@ -449,33 +436,33 @@ movq %r8,8(%rdi)
|
|||||||
movq %r9,16(%rdi)
|
movq %r9,16(%rdi)
|
||||||
movq %rax,24(%rdi)
|
movq %rax,24(%rdi)
|
||||||
movq %r10,32(%rdi)
|
movq %r10,32(%rdi)
|
||||||
movq 0(%rsp),%r11
|
movq -8(%rsp),%r12
|
||||||
movq 8(%rsp),%r12
|
movq -16(%rsp),%r13
|
||||||
movq 16(%rsp),%r13
|
movq -24(%rsp),%r14
|
||||||
movq 24(%rsp),%r14
|
movq -32(%rsp),%r15
|
||||||
movq 32(%rsp),%r15
|
movq -40(%rsp),%rbx
|
||||||
movq 40(%rsp),%rbx
|
|
||||||
movq 48(%rsp),%rbp
|
|
||||||
add %r11,%rsp
|
|
||||||
mov %rdi,%rax
|
|
||||||
mov %rsi,%rdx
|
|
||||||
ret
|
ret
|
||||||
|
.cfi_endproc
|
||||||
|
|
||||||
.p2align 5
|
.p2align 5
|
||||||
.globl C_ABI(x25519_x86_64_ladderstep)
|
.globl C_ABI(x25519_x86_64_ladderstep)
|
||||||
HIDDEN C_ABI(x25519_x86_64_ladderstep)
|
HIDDEN C_ABI(x25519_x86_64_ladderstep)
|
||||||
C_ABI(x25519_x86_64_ladderstep):
|
C_ABI(x25519_x86_64_ladderstep):
|
||||||
mov %rsp,%r11
|
.cfi_startproc
|
||||||
and $31,%r11
|
sub $352,%rsp
|
||||||
add $352,%r11
|
.cfi_adjust_cfa_offset 352
|
||||||
sub %r11,%rsp
|
|
||||||
movq %r11,0(%rsp)
|
|
||||||
movq %r12,8(%rsp)
|
movq %r12,8(%rsp)
|
||||||
|
.cfi_rel_offset r12, 8
|
||||||
movq %r13,16(%rsp)
|
movq %r13,16(%rsp)
|
||||||
|
.cfi_rel_offset r13, 16
|
||||||
movq %r14,24(%rsp)
|
movq %r14,24(%rsp)
|
||||||
|
.cfi_rel_offset r14, 24
|
||||||
movq %r15,32(%rsp)
|
movq %r15,32(%rsp)
|
||||||
|
.cfi_rel_offset r15, 32
|
||||||
movq %rbx,40(%rsp)
|
movq %rbx,40(%rsp)
|
||||||
|
.cfi_rel_offset rbx, 40
|
||||||
movq %rbp,48(%rsp)
|
movq %rbp,48(%rsp)
|
||||||
|
.cfi_rel_offset rbp, 48
|
||||||
movq 40(%rdi),%rsi
|
movq 40(%rdi),%rsi
|
||||||
movq 48(%rdi),%rdx
|
movq 48(%rdi),%rdx
|
||||||
movq 56(%rdi),%rcx
|
movq 56(%rdi),%rcx
|
||||||
@ -1837,26 +1824,22 @@ movq %r8,88(%rdi)
|
|||||||
movq %r9,96(%rdi)
|
movq %r9,96(%rdi)
|
||||||
movq %rax,104(%rdi)
|
movq %rax,104(%rdi)
|
||||||
movq %r10,112(%rdi)
|
movq %r10,112(%rdi)
|
||||||
movq 0(%rsp),%r11
|
|
||||||
movq 8(%rsp),%r12
|
movq 8(%rsp),%r12
|
||||||
movq 16(%rsp),%r13
|
movq 16(%rsp),%r13
|
||||||
movq 24(%rsp),%r14
|
movq 24(%rsp),%r14
|
||||||
movq 32(%rsp),%r15
|
movq 32(%rsp),%r15
|
||||||
movq 40(%rsp),%rbx
|
movq 40(%rsp),%rbx
|
||||||
movq 48(%rsp),%rbp
|
movq 48(%rsp),%rbp
|
||||||
add %r11,%rsp
|
add $352,%rsp
|
||||||
mov %rdi,%rax
|
.cfi_adjust_cfa_offset -352
|
||||||
mov %rsi,%rdx
|
|
||||||
ret
|
ret
|
||||||
|
.cfi_endproc
|
||||||
|
|
||||||
.p2align 5
|
.p2align 5
|
||||||
.globl C_ABI(x25519_x86_64_work_cswap)
|
.globl C_ABI(x25519_x86_64_work_cswap)
|
||||||
HIDDEN C_ABI(x25519_x86_64_work_cswap)
|
HIDDEN C_ABI(x25519_x86_64_work_cswap)
|
||||||
C_ABI(x25519_x86_64_work_cswap):
|
C_ABI(x25519_x86_64_work_cswap):
|
||||||
mov %rsp,%r11
|
.cfi_startproc
|
||||||
and $31,%r11
|
|
||||||
add $0,%r11
|
|
||||||
sub %r11,%rsp
|
|
||||||
cmp $1,%rsi
|
cmp $1,%rsi
|
||||||
movq 0(%rdi),%rsi
|
movq 0(%rdi),%rsi
|
||||||
movq 80(%rdi),%rdx
|
movq 80(%rdi),%rdx
|
||||||
@ -1928,10 +1911,10 @@ movq %rsi,64(%rdi)
|
|||||||
movq %rdx,144(%rdi)
|
movq %rdx,144(%rdi)
|
||||||
movq %rcx,72(%rdi)
|
movq %rcx,72(%rdi)
|
||||||
movq %r8,152(%rdi)
|
movq %r8,152(%rdi)
|
||||||
add %r11,%rsp
|
|
||||||
mov %rdi,%rax
|
mov %rdi,%rax
|
||||||
mov %rsi,%rdx
|
mov %rsi,%rdx
|
||||||
ret
|
ret
|
||||||
|
.cfi_endproc
|
||||||
|
|
||||||
#endif /* __x86_64__ */
|
#endif /* __x86_64__ */
|
||||||
#endif /* !OPENSSL_NO_ASM */
|
#endif /* !OPENSSL_NO_ASM */
|
||||||
|
Loading…
Reference in New Issue
Block a user