boringssl/crypto/curve25519/asm/x25519-asm-x86_64.S
Andreas Auernhammer e7d3922b43 Improve Curve25519 cswap x64 assembly
This change replace the cmovq scheme with slightly faster SSE2 code.
The SSE2 code was first introduced in Go's curve25519 implementation.
See: https://go-review.googlesource.com/c/39693/

The implementation is basicly copied from the Go assembly.

Change-Id: I25931a421ba141ce33809875699f048b0941c061
Reviewed-on: https://boringssl-review.googlesource.com/16564
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: Adam Langley <agl@google.com>
CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
2017-05-23 22:51:48 +00:00

1895 lines
33 KiB
ArmAsm

/* Copyright (c) 2015, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
/* This file is adapted from crypto_scalarmult/curve25519/amd64-51/ in
* SUPERCOP 20141124 (http://bench.cr.yp.to/supercop.html). That code is public
* domain licensed but the standard ISC license is included above to keep
* licensing simple. */
#if !defined(OPENSSL_NO_ASM)
#if defined(__x86_64__)
.data
.p2align 4
#if defined(__APPLE__)
/* OS X's C ABI prefixes functions with underscore. */
#define C_ABI(x) _ ## x
#define HIDDEN .private_extern
#else
#define C_ABI(x) x
#define HIDDEN .hidden
#endif
x25519_x86_64_REDMASK51: .quad 0x0007FFFFFFFFFFFF
x25519_x86_64_121666_213: .quad 996687872
x25519_x86_64_2P0: .quad 0xFFFFFFFFFFFDA
x25519_x86_64_2P1234: .quad 0xFFFFFFFFFFFFE
x25519_x86_64_4P0: .quad 0x1FFFFFFFFFFFB4
x25519_x86_64_4P1234: .quad 0x1FFFFFFFFFFFFC
x25519_x86_64_MU0: .quad 0xED9CE5A30A2C131B
x25519_x86_64_MU1: .quad 0x2106215D086329A7
x25519_x86_64_MU2: .quad 0xFFFFFFFFFFFFFFEB
x25519_x86_64_MU3: .quad 0xFFFFFFFFFFFFFFFF
x25519_x86_64_MU4: .quad 0x000000000000000F
x25519_x86_64_ORDER0: .quad 0x5812631A5CF5D3ED
x25519_x86_64_ORDER1: .quad 0x14DEF9DEA2F79CD6
x25519_x86_64_ORDER2: .quad 0x0000000000000000
x25519_x86_64_ORDER3: .quad 0x1000000000000000
x25519_x86_64_EC2D0: .quad 1859910466990425
x25519_x86_64_EC2D1: .quad 932731440258426
x25519_x86_64_EC2D2: .quad 1072319116312658
x25519_x86_64_EC2D3: .quad 1815898335770999
x25519_x86_64_EC2D4: .quad 633789495995903
x25519_x86_64__38: .quad 38
.text
.p2align 5
.globl C_ABI(x25519_x86_64_freeze)
HIDDEN C_ABI(x25519_x86_64_freeze)
C_ABI(x25519_x86_64_freeze):
.cfi_startproc
/* This is a leaf function and uses the redzone for saving registers. */
movq %r12,-8(%rsp)
.cfi_rel_offset r12, -8
movq 0(%rdi),%rsi
movq 8(%rdi),%rdx
movq 16(%rdi),%rcx
movq 24(%rdi),%r8
movq 32(%rdi),%r9
movq x25519_x86_64_REDMASK51(%rip),%rax
mov %rax,%r10
sub $18,%r10
mov $3,%r11
._reduceloop:
mov %rsi,%r12
shr $51,%r12
and %rax,%rsi
add %r12,%rdx
mov %rdx,%r12
shr $51,%r12
and %rax,%rdx
add %r12,%rcx
mov %rcx,%r12
shr $51,%r12
and %rax,%rcx
add %r12,%r8
mov %r8,%r12
shr $51,%r12
and %rax,%r8
add %r12,%r9
mov %r9,%r12
shr $51,%r12
and %rax,%r9
imulq $19,%r12,%r12
add %r12,%rsi
sub $1,%r11
ja ._reduceloop
mov $1,%r12
cmp %r10,%rsi
cmovl %r11,%r12
cmp %rax,%rdx
cmovne %r11,%r12
cmp %rax,%rcx
cmovne %r11,%r12
cmp %rax,%r8
cmovne %r11,%r12
cmp %rax,%r9
cmovne %r11,%r12
neg %r12
and %r12,%rax
and %r12,%r10
sub %r10,%rsi
sub %rax,%rdx
sub %rax,%rcx
sub %rax,%r8
sub %rax,%r9
movq %rsi,0(%rdi)
movq %rdx,8(%rdi)
movq %rcx,16(%rdi)
movq %r8,24(%rdi)
movq %r9,32(%rdi)
movq -8(%rsp),%r12
ret
.cfi_endproc
.p2align 5
.globl C_ABI(x25519_x86_64_mul)
HIDDEN C_ABI(x25519_x86_64_mul)
C_ABI(x25519_x86_64_mul):
.cfi_startproc
/* This is a leaf function and uses the redzone for saving registers. */
movq %r12,-8(%rsp)
.cfi_rel_offset r12, -8
movq %r13,-16(%rsp)
.cfi_rel_offset r13, -16
movq %r14,-24(%rsp)
.cfi_rel_offset r14, -24
movq %r15,-32(%rsp)
.cfi_rel_offset r15, -32
movq %rbx,-40(%rsp)
.cfi_rel_offset rbx, -40
movq %rbp,-48(%rsp)
.cfi_rel_offset rbp, -48
mov %rdx,%rcx
movq 24(%rsi),%rdx
imulq $19,%rdx,%rax
movq %rax,-64(%rsp)
mulq 16(%rcx)
mov %rax,%r8
mov %rdx,%r9
movq 32(%rsi),%rdx
imulq $19,%rdx,%rax
movq %rax,-72(%rsp)
mulq 8(%rcx)
add %rax,%r8
adc %rdx,%r9
movq 0(%rsi),%rax
mulq 0(%rcx)
add %rax,%r8
adc %rdx,%r9
movq 0(%rsi),%rax
mulq 8(%rcx)
mov %rax,%r10
mov %rdx,%r11
movq 0(%rsi),%rax
mulq 16(%rcx)
mov %rax,%r12
mov %rdx,%r13
movq 0(%rsi),%rax
mulq 24(%rcx)
mov %rax,%r14
mov %rdx,%r15
movq 0(%rsi),%rax
mulq 32(%rcx)
mov %rax,%rbx
mov %rdx,%rbp
movq 8(%rsi),%rax
mulq 0(%rcx)
add %rax,%r10
adc %rdx,%r11
movq 8(%rsi),%rax
mulq 8(%rcx)
add %rax,%r12
adc %rdx,%r13
movq 8(%rsi),%rax
mulq 16(%rcx)
add %rax,%r14
adc %rdx,%r15
movq 8(%rsi),%rax
mulq 24(%rcx)
add %rax,%rbx
adc %rdx,%rbp
movq 8(%rsi),%rdx
imulq $19,%rdx,%rax
mulq 32(%rcx)
add %rax,%r8
adc %rdx,%r9
movq 16(%rsi),%rax
mulq 0(%rcx)
add %rax,%r12
adc %rdx,%r13
movq 16(%rsi),%rax
mulq 8(%rcx)
add %rax,%r14
adc %rdx,%r15
movq 16(%rsi),%rax
mulq 16(%rcx)
add %rax,%rbx
adc %rdx,%rbp
movq 16(%rsi),%rdx
imulq $19,%rdx,%rax
mulq 24(%rcx)
add %rax,%r8
adc %rdx,%r9
movq 16(%rsi),%rdx
imulq $19,%rdx,%rax
mulq 32(%rcx)
add %rax,%r10
adc %rdx,%r11
movq 24(%rsi),%rax
mulq 0(%rcx)
add %rax,%r14
adc %rdx,%r15
movq 24(%rsi),%rax
mulq 8(%rcx)
add %rax,%rbx
adc %rdx,%rbp
movq -64(%rsp),%rax
mulq 24(%rcx)
add %rax,%r10
adc %rdx,%r11
movq -64(%rsp),%rax
mulq 32(%rcx)
add %rax,%r12
adc %rdx,%r13
movq 32(%rsi),%rax
mulq 0(%rcx)
add %rax,%rbx
adc %rdx,%rbp
movq -72(%rsp),%rax
mulq 16(%rcx)
add %rax,%r10
adc %rdx,%r11
movq -72(%rsp),%rax
mulq 24(%rcx)
add %rax,%r12
adc %rdx,%r13
movq -72(%rsp),%rax
mulq 32(%rcx)
add %rax,%r14
adc %rdx,%r15
movq x25519_x86_64_REDMASK51(%rip),%rsi
shld $13,%r8,%r9
and %rsi,%r8
shld $13,%r10,%r11
and %rsi,%r10
add %r9,%r10
shld $13,%r12,%r13
and %rsi,%r12
add %r11,%r12
shld $13,%r14,%r15
and %rsi,%r14
add %r13,%r14
shld $13,%rbx,%rbp
and %rsi,%rbx
add %r15,%rbx
imulq $19,%rbp,%rdx
add %rdx,%r8
mov %r8,%rdx
shr $51,%rdx
add %r10,%rdx
mov %rdx,%rcx
shr $51,%rdx
and %rsi,%r8
add %r12,%rdx
mov %rdx,%r9
shr $51,%rdx
and %rsi,%rcx
add %r14,%rdx
mov %rdx,%rax
shr $51,%rdx
and %rsi,%r9
add %rbx,%rdx
mov %rdx,%r10
shr $51,%rdx
and %rsi,%rax
imulq $19,%rdx,%rdx
add %rdx,%r8
and %rsi,%r10
movq %r8,0(%rdi)
movq %rcx,8(%rdi)
movq %r9,16(%rdi)
movq %rax,24(%rdi)
movq %r10,32(%rdi)
movq -8(%rsp),%r12
movq -16(%rsp),%r13
movq -24(%rsp),%r14
movq -32(%rsp),%r15
movq -40(%rsp),%rbx
movq -48(%rsp),%rbp
ret
.cfi_endproc
.p2align 5
.globl C_ABI(x25519_x86_64_square)
HIDDEN C_ABI(x25519_x86_64_square)
C_ABI(x25519_x86_64_square):
.cfi_startproc
/* This is a leaf function and uses the redzone for saving registers. */
movq %r12,-8(%rsp)
.cfi_rel_offset r12, -8
movq %r13,-16(%rsp)
.cfi_rel_offset r13, -16
movq %r14,-24(%rsp)
.cfi_rel_offset r14, -24
movq %r15,-32(%rsp)
.cfi_rel_offset r15, -32
movq %rbx,-40(%rsp)
.cfi_rel_offset rbx, -40
movq 0(%rsi),%rax
mulq 0(%rsi)
mov %rax,%rcx
mov %rdx,%r8
movq 0(%rsi),%rax
shl $1,%rax
mulq 8(%rsi)
mov %rax,%r9
mov %rdx,%r10
movq 0(%rsi),%rax
shl $1,%rax
mulq 16(%rsi)
mov %rax,%r11
mov %rdx,%r12
movq 0(%rsi),%rax
shl $1,%rax
mulq 24(%rsi)
mov %rax,%r13
mov %rdx,%r14
movq 0(%rsi),%rax
shl $1,%rax
mulq 32(%rsi)
mov %rax,%r15
mov %rdx,%rbx
movq 8(%rsi),%rax
mulq 8(%rsi)
add %rax,%r11
adc %rdx,%r12
movq 8(%rsi),%rax
shl $1,%rax
mulq 16(%rsi)
add %rax,%r13
adc %rdx,%r14
movq 8(%rsi),%rax
shl $1,%rax
mulq 24(%rsi)
add %rax,%r15
adc %rdx,%rbx
movq 8(%rsi),%rdx
imulq $38,%rdx,%rax
mulq 32(%rsi)
add %rax,%rcx
adc %rdx,%r8
movq 16(%rsi),%rax
mulq 16(%rsi)
add %rax,%r15
adc %rdx,%rbx
movq 16(%rsi),%rdx
imulq $38,%rdx,%rax
mulq 24(%rsi)
add %rax,%rcx
adc %rdx,%r8
movq 16(%rsi),%rdx
imulq $38,%rdx,%rax
mulq 32(%rsi)
add %rax,%r9
adc %rdx,%r10
movq 24(%rsi),%rdx
imulq $19,%rdx,%rax
mulq 24(%rsi)
add %rax,%r9
adc %rdx,%r10
movq 24(%rsi),%rdx
imulq $38,%rdx,%rax
mulq 32(%rsi)
add %rax,%r11
adc %rdx,%r12
movq 32(%rsi),%rdx
imulq $19,%rdx,%rax
mulq 32(%rsi)
add %rax,%r13
adc %rdx,%r14
movq x25519_x86_64_REDMASK51(%rip),%rsi
shld $13,%rcx,%r8
and %rsi,%rcx
shld $13,%r9,%r10
and %rsi,%r9
add %r8,%r9
shld $13,%r11,%r12
and %rsi,%r11
add %r10,%r11
shld $13,%r13,%r14
and %rsi,%r13
add %r12,%r13
shld $13,%r15,%rbx
and %rsi,%r15
add %r14,%r15
imulq $19,%rbx,%rdx
add %rdx,%rcx
mov %rcx,%rdx
shr $51,%rdx
add %r9,%rdx
and %rsi,%rcx
mov %rdx,%r8
shr $51,%rdx
add %r11,%rdx
and %rsi,%r8
mov %rdx,%r9
shr $51,%rdx
add %r13,%rdx
and %rsi,%r9
mov %rdx,%rax
shr $51,%rdx
add %r15,%rdx
and %rsi,%rax
mov %rdx,%r10
shr $51,%rdx
imulq $19,%rdx,%rdx
add %rdx,%rcx
and %rsi,%r10
movq %rcx,0(%rdi)
movq %r8,8(%rdi)
movq %r9,16(%rdi)
movq %rax,24(%rdi)
movq %r10,32(%rdi)
movq -8(%rsp),%r12
movq -16(%rsp),%r13
movq -24(%rsp),%r14
movq -32(%rsp),%r15
movq -40(%rsp),%rbx
ret
.cfi_endproc
.p2align 5
.globl C_ABI(x25519_x86_64_ladderstep)
HIDDEN C_ABI(x25519_x86_64_ladderstep)
C_ABI(x25519_x86_64_ladderstep):
.cfi_startproc
sub $344,%rsp
.cfi_adjust_cfa_offset 344
movq %r12,296(%rsp)
.cfi_rel_offset r12, 296
movq %r13,304(%rsp)
.cfi_rel_offset r13, 304
movq %r14,312(%rsp)
.cfi_rel_offset r14, 312
movq %r15,320(%rsp)
.cfi_rel_offset r15, 320
movq %rbx,328(%rsp)
.cfi_rel_offset rbx, 328
movq %rbp,336(%rsp)
.cfi_rel_offset rbp, 336
movq 40(%rdi),%rsi
movq 48(%rdi),%rdx
movq 56(%rdi),%rcx
movq 64(%rdi),%r8
movq 72(%rdi),%r9
mov %rsi,%rax
mov %rdx,%r10
mov %rcx,%r11
mov %r8,%r12
mov %r9,%r13
add x25519_x86_64_2P0(%rip),%rax
add x25519_x86_64_2P1234(%rip),%r10
add x25519_x86_64_2P1234(%rip),%r11
add x25519_x86_64_2P1234(%rip),%r12
add x25519_x86_64_2P1234(%rip),%r13
addq 80(%rdi),%rsi
addq 88(%rdi),%rdx
addq 96(%rdi),%rcx
addq 104(%rdi),%r8
addq 112(%rdi),%r9
subq 80(%rdi),%rax
subq 88(%rdi),%r10
subq 96(%rdi),%r11
subq 104(%rdi),%r12
subq 112(%rdi),%r13
movq %rsi,0(%rsp)
movq %rdx,8(%rsp)
movq %rcx,16(%rsp)
movq %r8,24(%rsp)
movq %r9,32(%rsp)
movq %rax,40(%rsp)
movq %r10,48(%rsp)
movq %r11,56(%rsp)
movq %r12,64(%rsp)
movq %r13,72(%rsp)
movq 40(%rsp),%rax
mulq 40(%rsp)
mov %rax,%rsi
mov %rdx,%rcx
movq 40(%rsp),%rax
shl $1,%rax
mulq 48(%rsp)
mov %rax,%r8
mov %rdx,%r9
movq 40(%rsp),%rax
shl $1,%rax
mulq 56(%rsp)
mov %rax,%r10
mov %rdx,%r11
movq 40(%rsp),%rax
shl $1,%rax
mulq 64(%rsp)
mov %rax,%r12
mov %rdx,%r13
movq 40(%rsp),%rax
shl $1,%rax
mulq 72(%rsp)
mov %rax,%r14
mov %rdx,%r15
movq 48(%rsp),%rax
mulq 48(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 48(%rsp),%rax
shl $1,%rax
mulq 56(%rsp)
add %rax,%r12
adc %rdx,%r13
movq 48(%rsp),%rax
shl $1,%rax
mulq 64(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 48(%rsp),%rdx
imulq $38,%rdx,%rax
mulq 72(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 56(%rsp),%rax
mulq 56(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 56(%rsp),%rdx
imulq $38,%rdx,%rax
mulq 64(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 56(%rsp),%rdx
imulq $38,%rdx,%rax
mulq 72(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 64(%rsp),%rdx
imulq $19,%rdx,%rax
mulq 64(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 64(%rsp),%rdx
imulq $38,%rdx,%rax
mulq 72(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 72(%rsp),%rdx
imulq $19,%rdx,%rax
mulq 72(%rsp)
add %rax,%r12
adc %rdx,%r13
movq x25519_x86_64_REDMASK51(%rip),%rdx
shld $13,%rsi,%rcx
and %rdx,%rsi
shld $13,%r8,%r9
and %rdx,%r8
add %rcx,%r8
shld $13,%r10,%r11
and %rdx,%r10
add %r9,%r10
shld $13,%r12,%r13
and %rdx,%r12
add %r11,%r12
shld $13,%r14,%r15
and %rdx,%r14
add %r13,%r14
imulq $19,%r15,%rcx
add %rcx,%rsi
mov %rsi,%rcx
shr $51,%rcx
add %r8,%rcx
and %rdx,%rsi
mov %rcx,%r8
shr $51,%rcx
add %r10,%rcx
and %rdx,%r8
mov %rcx,%r9
shr $51,%rcx
add %r12,%rcx
and %rdx,%r9
mov %rcx,%rax
shr $51,%rcx
add %r14,%rcx
and %rdx,%rax
mov %rcx,%r10
shr $51,%rcx
imulq $19,%rcx,%rcx
add %rcx,%rsi
and %rdx,%r10
movq %rsi,80(%rsp)
movq %r8,88(%rsp)
movq %r9,96(%rsp)
movq %rax,104(%rsp)
movq %r10,112(%rsp)
movq 0(%rsp),%rax
mulq 0(%rsp)
mov %rax,%rsi
mov %rdx,%rcx
movq 0(%rsp),%rax
shl $1,%rax
mulq 8(%rsp)
mov %rax,%r8
mov %rdx,%r9
movq 0(%rsp),%rax
shl $1,%rax
mulq 16(%rsp)
mov %rax,%r10
mov %rdx,%r11
movq 0(%rsp),%rax
shl $1,%rax
mulq 24(%rsp)
mov %rax,%r12
mov %rdx,%r13
movq 0(%rsp),%rax
shl $1,%rax
mulq 32(%rsp)
mov %rax,%r14
mov %rdx,%r15
movq 8(%rsp),%rax
mulq 8(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 8(%rsp),%rax
shl $1,%rax
mulq 16(%rsp)
add %rax,%r12
adc %rdx,%r13
movq 8(%rsp),%rax
shl $1,%rax
mulq 24(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 8(%rsp),%rdx
imulq $38,%rdx,%rax
mulq 32(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 16(%rsp),%rax
mulq 16(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 16(%rsp),%rdx
imulq $38,%rdx,%rax
mulq 24(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 16(%rsp),%rdx
imulq $38,%rdx,%rax
mulq 32(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 24(%rsp),%rdx
imulq $19,%rdx,%rax
mulq 24(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 24(%rsp),%rdx
imulq $38,%rdx,%rax
mulq 32(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 32(%rsp),%rdx
imulq $19,%rdx,%rax
mulq 32(%rsp)
add %rax,%r12
adc %rdx,%r13
movq x25519_x86_64_REDMASK51(%rip),%rdx
shld $13,%rsi,%rcx
and %rdx,%rsi
shld $13,%r8,%r9
and %rdx,%r8
add %rcx,%r8
shld $13,%r10,%r11
and %rdx,%r10
add %r9,%r10
shld $13,%r12,%r13
and %rdx,%r12
add %r11,%r12
shld $13,%r14,%r15
and %rdx,%r14
add %r13,%r14
imulq $19,%r15,%rcx
add %rcx,%rsi
mov %rsi,%rcx
shr $51,%rcx
add %r8,%rcx
and %rdx,%rsi
mov %rcx,%r8
shr $51,%rcx
add %r10,%rcx
and %rdx,%r8
mov %rcx,%r9
shr $51,%rcx
add %r12,%rcx
and %rdx,%r9
mov %rcx,%rax
shr $51,%rcx
add %r14,%rcx
and %rdx,%rax
mov %rcx,%r10
shr $51,%rcx
imulq $19,%rcx,%rcx
add %rcx,%rsi
and %rdx,%r10
movq %rsi,120(%rsp)
movq %r8,128(%rsp)
movq %r9,136(%rsp)
movq %rax,144(%rsp)
movq %r10,152(%rsp)
mov %rsi,%rsi
mov %r8,%rdx
mov %r9,%rcx
mov %rax,%r8
mov %r10,%r9
add x25519_x86_64_2P0(%rip),%rsi
add x25519_x86_64_2P1234(%rip),%rdx
add x25519_x86_64_2P1234(%rip),%rcx
add x25519_x86_64_2P1234(%rip),%r8
add x25519_x86_64_2P1234(%rip),%r9
subq 80(%rsp),%rsi
subq 88(%rsp),%rdx
subq 96(%rsp),%rcx
subq 104(%rsp),%r8
subq 112(%rsp),%r9
movq %rsi,160(%rsp)
movq %rdx,168(%rsp)
movq %rcx,176(%rsp)
movq %r8,184(%rsp)
movq %r9,192(%rsp)
movq 120(%rdi),%rsi
movq 128(%rdi),%rdx
movq 136(%rdi),%rcx
movq 144(%rdi),%r8
movq 152(%rdi),%r9
mov %rsi,%rax
mov %rdx,%r10
mov %rcx,%r11
mov %r8,%r12
mov %r9,%r13
add x25519_x86_64_2P0(%rip),%rax
add x25519_x86_64_2P1234(%rip),%r10
add x25519_x86_64_2P1234(%rip),%r11
add x25519_x86_64_2P1234(%rip),%r12
add x25519_x86_64_2P1234(%rip),%r13
addq 160(%rdi),%rsi
addq 168(%rdi),%rdx
addq 176(%rdi),%rcx
addq 184(%rdi),%r8
addq 192(%rdi),%r9
subq 160(%rdi),%rax
subq 168(%rdi),%r10
subq 176(%rdi),%r11
subq 184(%rdi),%r12
subq 192(%rdi),%r13
movq %rsi,200(%rsp)
movq %rdx,208(%rsp)
movq %rcx,216(%rsp)
movq %r8,224(%rsp)
movq %r9,232(%rsp)
movq %rax,240(%rsp)
movq %r10,248(%rsp)
movq %r11,256(%rsp)
movq %r12,264(%rsp)
movq %r13,272(%rsp)
movq 224(%rsp),%rsi
imulq $19,%rsi,%rax
movq %rax,280(%rsp)
mulq 56(%rsp)
mov %rax,%rsi
mov %rdx,%rcx
movq 232(%rsp),%rdx
imulq $19,%rdx,%rax
movq %rax,288(%rsp)
mulq 48(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 200(%rsp),%rax
mulq 40(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 200(%rsp),%rax
mulq 48(%rsp)
mov %rax,%r8
mov %rdx,%r9
movq 200(%rsp),%rax
mulq 56(%rsp)
mov %rax,%r10
mov %rdx,%r11
movq 200(%rsp),%rax
mulq 64(%rsp)
mov %rax,%r12
mov %rdx,%r13
movq 200(%rsp),%rax
mulq 72(%rsp)
mov %rax,%r14
mov %rdx,%r15
movq 208(%rsp),%rax
mulq 40(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 208(%rsp),%rax
mulq 48(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 208(%rsp),%rax
mulq 56(%rsp)
add %rax,%r12
adc %rdx,%r13
movq 208(%rsp),%rax
mulq 64(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 208(%rsp),%rdx
imulq $19,%rdx,%rax
mulq 72(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 216(%rsp),%rax
mulq 40(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 216(%rsp),%rax
mulq 48(%rsp)
add %rax,%r12
adc %rdx,%r13
movq 216(%rsp),%rax
mulq 56(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 216(%rsp),%rdx
imulq $19,%rdx,%rax
mulq 64(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 216(%rsp),%rdx
imulq $19,%rdx,%rax
mulq 72(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 224(%rsp),%rax
mulq 40(%rsp)
add %rax,%r12
adc %rdx,%r13
movq 224(%rsp),%rax
mulq 48(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 280(%rsp),%rax
mulq 64(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 280(%rsp),%rax
mulq 72(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 232(%rsp),%rax
mulq 40(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 288(%rsp),%rax
mulq 56(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 288(%rsp),%rax
mulq 64(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 288(%rsp),%rax
mulq 72(%rsp)
add %rax,%r12
adc %rdx,%r13
movq x25519_x86_64_REDMASK51(%rip),%rdx
shld $13,%rsi,%rcx
and %rdx,%rsi
shld $13,%r8,%r9
and %rdx,%r8
add %rcx,%r8
shld $13,%r10,%r11
and %rdx,%r10
add %r9,%r10
shld $13,%r12,%r13
and %rdx,%r12
add %r11,%r12
shld $13,%r14,%r15
and %rdx,%r14
add %r13,%r14
imulq $19,%r15,%rcx
add %rcx,%rsi
mov %rsi,%rcx
shr $51,%rcx
add %r8,%rcx
mov %rcx,%r8
shr $51,%rcx
and %rdx,%rsi
add %r10,%rcx
mov %rcx,%r9
shr $51,%rcx
and %rdx,%r8
add %r12,%rcx
mov %rcx,%rax
shr $51,%rcx
and %rdx,%r9
add %r14,%rcx
mov %rcx,%r10
shr $51,%rcx
and %rdx,%rax
imulq $19,%rcx,%rcx
add %rcx,%rsi
and %rdx,%r10
movq %rsi,40(%rsp)
movq %r8,48(%rsp)
movq %r9,56(%rsp)
movq %rax,64(%rsp)
movq %r10,72(%rsp)
movq 264(%rsp),%rsi
imulq $19,%rsi,%rax
movq %rax,200(%rsp)
mulq 16(%rsp)
mov %rax,%rsi
mov %rdx,%rcx
movq 272(%rsp),%rdx
imulq $19,%rdx,%rax
movq %rax,208(%rsp)
mulq 8(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 240(%rsp),%rax
mulq 0(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 240(%rsp),%rax
mulq 8(%rsp)
mov %rax,%r8
mov %rdx,%r9
movq 240(%rsp),%rax
mulq 16(%rsp)
mov %rax,%r10
mov %rdx,%r11
movq 240(%rsp),%rax
mulq 24(%rsp)
mov %rax,%r12
mov %rdx,%r13
movq 240(%rsp),%rax
mulq 32(%rsp)
mov %rax,%r14
mov %rdx,%r15
movq 248(%rsp),%rax
mulq 0(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 248(%rsp),%rax
mulq 8(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 248(%rsp),%rax
mulq 16(%rsp)
add %rax,%r12
adc %rdx,%r13
movq 248(%rsp),%rax
mulq 24(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 248(%rsp),%rdx
imulq $19,%rdx,%rax
mulq 32(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 256(%rsp),%rax
mulq 0(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 256(%rsp),%rax
mulq 8(%rsp)
add %rax,%r12
adc %rdx,%r13
movq 256(%rsp),%rax
mulq 16(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 256(%rsp),%rdx
imulq $19,%rdx,%rax
mulq 24(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 256(%rsp),%rdx
imulq $19,%rdx,%rax
mulq 32(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 264(%rsp),%rax
mulq 0(%rsp)
add %rax,%r12
adc %rdx,%r13
movq 264(%rsp),%rax
mulq 8(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 200(%rsp),%rax
mulq 24(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 200(%rsp),%rax
mulq 32(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 272(%rsp),%rax
mulq 0(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 208(%rsp),%rax
mulq 16(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 208(%rsp),%rax
mulq 24(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 208(%rsp),%rax
mulq 32(%rsp)
add %rax,%r12
adc %rdx,%r13
movq x25519_x86_64_REDMASK51(%rip),%rdx
shld $13,%rsi,%rcx
and %rdx,%rsi
shld $13,%r8,%r9
and %rdx,%r8
add %rcx,%r8
shld $13,%r10,%r11
and %rdx,%r10
add %r9,%r10
shld $13,%r12,%r13
and %rdx,%r12
add %r11,%r12
shld $13,%r14,%r15
and %rdx,%r14
add %r13,%r14
imulq $19,%r15,%rcx
add %rcx,%rsi
mov %rsi,%rcx
shr $51,%rcx
add %r8,%rcx
mov %rcx,%r8
shr $51,%rcx
and %rdx,%rsi
add %r10,%rcx
mov %rcx,%r9
shr $51,%rcx
and %rdx,%r8
add %r12,%rcx
mov %rcx,%rax
shr $51,%rcx
and %rdx,%r9
add %r14,%rcx
mov %rcx,%r10
shr $51,%rcx
and %rdx,%rax
imulq $19,%rcx,%rcx
add %rcx,%rsi
and %rdx,%r10
mov %rsi,%rdx
mov %r8,%rcx
mov %r9,%r11
mov %rax,%r12
mov %r10,%r13
add x25519_x86_64_2P0(%rip),%rdx
add x25519_x86_64_2P1234(%rip),%rcx
add x25519_x86_64_2P1234(%rip),%r11
add x25519_x86_64_2P1234(%rip),%r12
add x25519_x86_64_2P1234(%rip),%r13
addq 40(%rsp),%rsi
addq 48(%rsp),%r8
addq 56(%rsp),%r9
addq 64(%rsp),%rax
addq 72(%rsp),%r10
subq 40(%rsp),%rdx
subq 48(%rsp),%rcx
subq 56(%rsp),%r11
subq 64(%rsp),%r12
subq 72(%rsp),%r13
movq %rsi,120(%rdi)
movq %r8,128(%rdi)
movq %r9,136(%rdi)
movq %rax,144(%rdi)
movq %r10,152(%rdi)
movq %rdx,160(%rdi)
movq %rcx,168(%rdi)
movq %r11,176(%rdi)
movq %r12,184(%rdi)
movq %r13,192(%rdi)
movq 120(%rdi),%rax
mulq 120(%rdi)
mov %rax,%rsi
mov %rdx,%rcx
movq 120(%rdi),%rax
shl $1,%rax
mulq 128(%rdi)
mov %rax,%r8
mov %rdx,%r9
movq 120(%rdi),%rax
shl $1,%rax
mulq 136(%rdi)
mov %rax,%r10
mov %rdx,%r11
movq 120(%rdi),%rax
shl $1,%rax
mulq 144(%rdi)
mov %rax,%r12
mov %rdx,%r13
movq 120(%rdi),%rax
shl $1,%rax
mulq 152(%rdi)
mov %rax,%r14
mov %rdx,%r15
movq 128(%rdi),%rax
mulq 128(%rdi)
add %rax,%r10
adc %rdx,%r11
movq 128(%rdi),%rax
shl $1,%rax
mulq 136(%rdi)
add %rax,%r12
adc %rdx,%r13
movq 128(%rdi),%rax
shl $1,%rax
mulq 144(%rdi)
add %rax,%r14
adc %rdx,%r15
movq 128(%rdi),%rdx
imulq $38,%rdx,%rax
mulq 152(%rdi)
add %rax,%rsi
adc %rdx,%rcx
movq 136(%rdi),%rax
mulq 136(%rdi)
add %rax,%r14
adc %rdx,%r15
movq 136(%rdi),%rdx
imulq $38,%rdx,%rax
mulq 144(%rdi)
add %rax,%rsi
adc %rdx,%rcx
movq 136(%rdi),%rdx
imulq $38,%rdx,%rax
mulq 152(%rdi)
add %rax,%r8
adc %rdx,%r9
movq 144(%rdi),%rdx
imulq $19,%rdx,%rax
mulq 144(%rdi)
add %rax,%r8
adc %rdx,%r9
movq 144(%rdi),%rdx
imulq $38,%rdx,%rax
mulq 152(%rdi)
add %rax,%r10
adc %rdx,%r11
movq 152(%rdi),%rdx
imulq $19,%rdx,%rax
mulq 152(%rdi)
add %rax,%r12
adc %rdx,%r13
movq x25519_x86_64_REDMASK51(%rip),%rdx
shld $13,%rsi,%rcx
and %rdx,%rsi
shld $13,%r8,%r9
and %rdx,%r8
add %rcx,%r8
shld $13,%r10,%r11
and %rdx,%r10
add %r9,%r10
shld $13,%r12,%r13
and %rdx,%r12
add %r11,%r12
shld $13,%r14,%r15
and %rdx,%r14
add %r13,%r14
imulq $19,%r15,%rcx
add %rcx,%rsi
mov %rsi,%rcx
shr $51,%rcx
add %r8,%rcx
and %rdx,%rsi
mov %rcx,%r8
shr $51,%rcx
add %r10,%rcx
and %rdx,%r8
mov %rcx,%r9
shr $51,%rcx
add %r12,%rcx
and %rdx,%r9
mov %rcx,%rax
shr $51,%rcx
add %r14,%rcx
and %rdx,%rax
mov %rcx,%r10
shr $51,%rcx
imulq $19,%rcx,%rcx
add %rcx,%rsi
and %rdx,%r10
movq %rsi,120(%rdi)
movq %r8,128(%rdi)
movq %r9,136(%rdi)
movq %rax,144(%rdi)
movq %r10,152(%rdi)
movq 160(%rdi),%rax
mulq 160(%rdi)
mov %rax,%rsi
mov %rdx,%rcx
movq 160(%rdi),%rax
shl $1,%rax
mulq 168(%rdi)
mov %rax,%r8
mov %rdx,%r9
movq 160(%rdi),%rax
shl $1,%rax
mulq 176(%rdi)
mov %rax,%r10
mov %rdx,%r11
movq 160(%rdi),%rax
shl $1,%rax
mulq 184(%rdi)
mov %rax,%r12
mov %rdx,%r13
movq 160(%rdi),%rax
shl $1,%rax
mulq 192(%rdi)
mov %rax,%r14
mov %rdx,%r15
movq 168(%rdi),%rax
mulq 168(%rdi)
add %rax,%r10
adc %rdx,%r11
movq 168(%rdi),%rax
shl $1,%rax
mulq 176(%rdi)
add %rax,%r12
adc %rdx,%r13
movq 168(%rdi),%rax
shl $1,%rax
mulq 184(%rdi)
add %rax,%r14
adc %rdx,%r15
movq 168(%rdi),%rdx
imulq $38,%rdx,%rax
mulq 192(%rdi)
add %rax,%rsi
adc %rdx,%rcx
movq 176(%rdi),%rax
mulq 176(%rdi)
add %rax,%r14
adc %rdx,%r15
movq 176(%rdi),%rdx
imulq $38,%rdx,%rax
mulq 184(%rdi)
add %rax,%rsi
adc %rdx,%rcx
movq 176(%rdi),%rdx
imulq $38,%rdx,%rax
mulq 192(%rdi)
add %rax,%r8
adc %rdx,%r9
movq 184(%rdi),%rdx
imulq $19,%rdx,%rax
mulq 184(%rdi)
add %rax,%r8
adc %rdx,%r9
movq 184(%rdi),%rdx
imulq $38,%rdx,%rax
mulq 192(%rdi)
add %rax,%r10
adc %rdx,%r11
movq 192(%rdi),%rdx
imulq $19,%rdx,%rax
mulq 192(%rdi)
add %rax,%r12
adc %rdx,%r13
movq x25519_x86_64_REDMASK51(%rip),%rdx
shld $13,%rsi,%rcx
and %rdx,%rsi
shld $13,%r8,%r9
and %rdx,%r8
add %rcx,%r8
shld $13,%r10,%r11
and %rdx,%r10
add %r9,%r10
shld $13,%r12,%r13
and %rdx,%r12
add %r11,%r12
shld $13,%r14,%r15
and %rdx,%r14
add %r13,%r14
imulq $19,%r15,%rcx
add %rcx,%rsi
mov %rsi,%rcx
shr $51,%rcx
add %r8,%rcx
and %rdx,%rsi
mov %rcx,%r8
shr $51,%rcx
add %r10,%rcx
and %rdx,%r8
mov %rcx,%r9
shr $51,%rcx
add %r12,%rcx
and %rdx,%r9
mov %rcx,%rax
shr $51,%rcx
add %r14,%rcx
and %rdx,%rax
mov %rcx,%r10
shr $51,%rcx
imulq $19,%rcx,%rcx
add %rcx,%rsi
and %rdx,%r10
movq %rsi,160(%rdi)
movq %r8,168(%rdi)
movq %r9,176(%rdi)
movq %rax,184(%rdi)
movq %r10,192(%rdi)
movq 184(%rdi),%rsi
imulq $19,%rsi,%rax
movq %rax,0(%rsp)
mulq 16(%rdi)
mov %rax,%rsi
mov %rdx,%rcx
movq 192(%rdi),%rdx
imulq $19,%rdx,%rax
movq %rax,8(%rsp)
mulq 8(%rdi)
add %rax,%rsi
adc %rdx,%rcx
movq 160(%rdi),%rax
mulq 0(%rdi)
add %rax,%rsi
adc %rdx,%rcx
movq 160(%rdi),%rax
mulq 8(%rdi)
mov %rax,%r8
mov %rdx,%r9
movq 160(%rdi),%rax
mulq 16(%rdi)
mov %rax,%r10
mov %rdx,%r11
movq 160(%rdi),%rax
mulq 24(%rdi)
mov %rax,%r12
mov %rdx,%r13
movq 160(%rdi),%rax
mulq 32(%rdi)
mov %rax,%r14
mov %rdx,%r15
movq 168(%rdi),%rax
mulq 0(%rdi)
add %rax,%r8
adc %rdx,%r9
movq 168(%rdi),%rax
mulq 8(%rdi)
add %rax,%r10
adc %rdx,%r11
movq 168(%rdi),%rax
mulq 16(%rdi)
add %rax,%r12
adc %rdx,%r13
movq 168(%rdi),%rax
mulq 24(%rdi)
add %rax,%r14
adc %rdx,%r15
movq 168(%rdi),%rdx
imulq $19,%rdx,%rax
mulq 32(%rdi)
add %rax,%rsi
adc %rdx,%rcx
movq 176(%rdi),%rax
mulq 0(%rdi)
add %rax,%r10
adc %rdx,%r11
movq 176(%rdi),%rax
mulq 8(%rdi)
add %rax,%r12
adc %rdx,%r13
movq 176(%rdi),%rax
mulq 16(%rdi)
add %rax,%r14
adc %rdx,%r15
movq 176(%rdi),%rdx
imulq $19,%rdx,%rax
mulq 24(%rdi)
add %rax,%rsi
adc %rdx,%rcx
movq 176(%rdi),%rdx
imulq $19,%rdx,%rax
mulq 32(%rdi)
add %rax,%r8
adc %rdx,%r9
movq 184(%rdi),%rax
mulq 0(%rdi)
add %rax,%r12
adc %rdx,%r13
movq 184(%rdi),%rax
mulq 8(%rdi)
add %rax,%r14
adc %rdx,%r15
movq 0(%rsp),%rax
mulq 24(%rdi)
add %rax,%r8
adc %rdx,%r9
movq 0(%rsp),%rax
mulq 32(%rdi)
add %rax,%r10
adc %rdx,%r11
movq 192(%rdi),%rax
mulq 0(%rdi)
add %rax,%r14
adc %rdx,%r15
movq 8(%rsp),%rax
mulq 16(%rdi)
add %rax,%r8
adc %rdx,%r9
movq 8(%rsp),%rax
mulq 24(%rdi)
add %rax,%r10
adc %rdx,%r11
movq 8(%rsp),%rax
mulq 32(%rdi)
add %rax,%r12
adc %rdx,%r13
movq x25519_x86_64_REDMASK51(%rip),%rdx
shld $13,%rsi,%rcx
and %rdx,%rsi
shld $13,%r8,%r9
and %rdx,%r8
add %rcx,%r8
shld $13,%r10,%r11
and %rdx,%r10
add %r9,%r10
shld $13,%r12,%r13
and %rdx,%r12
add %r11,%r12
shld $13,%r14,%r15
and %rdx,%r14
add %r13,%r14
imulq $19,%r15,%rcx
add %rcx,%rsi
mov %rsi,%rcx
shr $51,%rcx
add %r8,%rcx
mov %rcx,%r8
shr $51,%rcx
and %rdx,%rsi
add %r10,%rcx
mov %rcx,%r9
shr $51,%rcx
and %rdx,%r8
add %r12,%rcx
mov %rcx,%rax
shr $51,%rcx
and %rdx,%r9
add %r14,%rcx
mov %rcx,%r10
shr $51,%rcx
and %rdx,%rax
imulq $19,%rcx,%rcx
add %rcx,%rsi
and %rdx,%r10
movq %rsi,160(%rdi)
movq %r8,168(%rdi)
movq %r9,176(%rdi)
movq %rax,184(%rdi)
movq %r10,192(%rdi)
movq 144(%rsp),%rsi
imulq $19,%rsi,%rax
movq %rax,0(%rsp)
mulq 96(%rsp)
mov %rax,%rsi
mov %rdx,%rcx
movq 152(%rsp),%rdx
imulq $19,%rdx,%rax
movq %rax,8(%rsp)
mulq 88(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 120(%rsp),%rax
mulq 80(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 120(%rsp),%rax
mulq 88(%rsp)
mov %rax,%r8
mov %rdx,%r9
movq 120(%rsp),%rax
mulq 96(%rsp)
mov %rax,%r10
mov %rdx,%r11
movq 120(%rsp),%rax
mulq 104(%rsp)
mov %rax,%r12
mov %rdx,%r13
movq 120(%rsp),%rax
mulq 112(%rsp)
mov %rax,%r14
mov %rdx,%r15
movq 128(%rsp),%rax
mulq 80(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 128(%rsp),%rax
mulq 88(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 128(%rsp),%rax
mulq 96(%rsp)
add %rax,%r12
adc %rdx,%r13
movq 128(%rsp),%rax
mulq 104(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 128(%rsp),%rdx
imulq $19,%rdx,%rax
mulq 112(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 136(%rsp),%rax
mulq 80(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 136(%rsp),%rax
mulq 88(%rsp)
add %rax,%r12
adc %rdx,%r13
movq 136(%rsp),%rax
mulq 96(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 136(%rsp),%rdx
imulq $19,%rdx,%rax
mulq 104(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 136(%rsp),%rdx
imulq $19,%rdx,%rax
mulq 112(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 144(%rsp),%rax
mulq 80(%rsp)
add %rax,%r12
adc %rdx,%r13
movq 144(%rsp),%rax
mulq 88(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 0(%rsp),%rax
mulq 104(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 0(%rsp),%rax
mulq 112(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 152(%rsp),%rax
mulq 80(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 8(%rsp),%rax
mulq 96(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 8(%rsp),%rax
mulq 104(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 8(%rsp),%rax
mulq 112(%rsp)
add %rax,%r12
adc %rdx,%r13
movq x25519_x86_64_REDMASK51(%rip),%rdx
shld $13,%rsi,%rcx
and %rdx,%rsi
shld $13,%r8,%r9
and %rdx,%r8
add %rcx,%r8
shld $13,%r10,%r11
and %rdx,%r10
add %r9,%r10
shld $13,%r12,%r13
and %rdx,%r12
add %r11,%r12
shld $13,%r14,%r15
and %rdx,%r14
add %r13,%r14
imulq $19,%r15,%rcx
add %rcx,%rsi
mov %rsi,%rcx
shr $51,%rcx
add %r8,%rcx
mov %rcx,%r8
shr $51,%rcx
and %rdx,%rsi
add %r10,%rcx
mov %rcx,%r9
shr $51,%rcx
and %rdx,%r8
add %r12,%rcx
mov %rcx,%rax
shr $51,%rcx
and %rdx,%r9
add %r14,%rcx
mov %rcx,%r10
shr $51,%rcx
and %rdx,%rax
imulq $19,%rcx,%rcx
add %rcx,%rsi
and %rdx,%r10
movq %rsi,40(%rdi)
movq %r8,48(%rdi)
movq %r9,56(%rdi)
movq %rax,64(%rdi)
movq %r10,72(%rdi)
movq 160(%rsp),%rax
mulq x25519_x86_64_121666_213(%rip)
shr $13,%rax
mov %rax,%rsi
mov %rdx,%rcx
movq 168(%rsp),%rax
mulq x25519_x86_64_121666_213(%rip)
shr $13,%rax
add %rax,%rcx
mov %rdx,%r8
movq 176(%rsp),%rax
mulq x25519_x86_64_121666_213(%rip)
shr $13,%rax
add %rax,%r8
mov %rdx,%r9
movq 184(%rsp),%rax
mulq x25519_x86_64_121666_213(%rip)
shr $13,%rax
add %rax,%r9
mov %rdx,%r10
movq 192(%rsp),%rax
mulq x25519_x86_64_121666_213(%rip)
shr $13,%rax
add %rax,%r10
imulq $19,%rdx,%rdx
add %rdx,%rsi
addq 80(%rsp),%rsi
addq 88(%rsp),%rcx
addq 96(%rsp),%r8
addq 104(%rsp),%r9
addq 112(%rsp),%r10
movq %rsi,80(%rdi)
movq %rcx,88(%rdi)
movq %r8,96(%rdi)
movq %r9,104(%rdi)
movq %r10,112(%rdi)
movq 104(%rdi),%rsi
imulq $19,%rsi,%rax
movq %rax,0(%rsp)
mulq 176(%rsp)
mov %rax,%rsi
mov %rdx,%rcx
movq 112(%rdi),%rdx
imulq $19,%rdx,%rax
movq %rax,8(%rsp)
mulq 168(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 80(%rdi),%rax
mulq 160(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 80(%rdi),%rax
mulq 168(%rsp)
mov %rax,%r8
mov %rdx,%r9
movq 80(%rdi),%rax
mulq 176(%rsp)
mov %rax,%r10
mov %rdx,%r11
movq 80(%rdi),%rax
mulq 184(%rsp)
mov %rax,%r12
mov %rdx,%r13
movq 80(%rdi),%rax
mulq 192(%rsp)
mov %rax,%r14
mov %rdx,%r15
movq 88(%rdi),%rax
mulq 160(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 88(%rdi),%rax
mulq 168(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 88(%rdi),%rax
mulq 176(%rsp)
add %rax,%r12
adc %rdx,%r13
movq 88(%rdi),%rax
mulq 184(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 88(%rdi),%rdx
imulq $19,%rdx,%rax
mulq 192(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 96(%rdi),%rax
mulq 160(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 96(%rdi),%rax
mulq 168(%rsp)
add %rax,%r12
adc %rdx,%r13
movq 96(%rdi),%rax
mulq 176(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 96(%rdi),%rdx
imulq $19,%rdx,%rax
mulq 184(%rsp)
add %rax,%rsi
adc %rdx,%rcx
movq 96(%rdi),%rdx
imulq $19,%rdx,%rax
mulq 192(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 104(%rdi),%rax
mulq 160(%rsp)
add %rax,%r12
adc %rdx,%r13
movq 104(%rdi),%rax
mulq 168(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 0(%rsp),%rax
mulq 184(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 0(%rsp),%rax
mulq 192(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 112(%rdi),%rax
mulq 160(%rsp)
add %rax,%r14
adc %rdx,%r15
movq 8(%rsp),%rax
mulq 176(%rsp)
add %rax,%r8
adc %rdx,%r9
movq 8(%rsp),%rax
mulq 184(%rsp)
add %rax,%r10
adc %rdx,%r11
movq 8(%rsp),%rax
mulq 192(%rsp)
add %rax,%r12
adc %rdx,%r13
movq x25519_x86_64_REDMASK51(%rip),%rdx
shld $13,%rsi,%rcx
and %rdx,%rsi
shld $13,%r8,%r9
and %rdx,%r8
add %rcx,%r8
shld $13,%r10,%r11
and %rdx,%r10
add %r9,%r10
shld $13,%r12,%r13
and %rdx,%r12
add %r11,%r12
shld $13,%r14,%r15
and %rdx,%r14
add %r13,%r14
imulq $19,%r15,%rcx
add %rcx,%rsi
mov %rsi,%rcx
shr $51,%rcx
add %r8,%rcx
mov %rcx,%r8
shr $51,%rcx
and %rdx,%rsi
add %r10,%rcx
mov %rcx,%r9
shr $51,%rcx
and %rdx,%r8
add %r12,%rcx
mov %rcx,%rax
shr $51,%rcx
and %rdx,%r9
add %r14,%rcx
mov %rcx,%r10
shr $51,%rcx
and %rdx,%rax
imulq $19,%rcx,%rcx
add %rcx,%rsi
and %rdx,%r10
movq %rsi,80(%rdi)
movq %r8,88(%rdi)
movq %r9,96(%rdi)
movq %rax,104(%rdi)
movq %r10,112(%rdi)
movq 296(%rsp),%r12
movq 304(%rsp),%r13
movq 312(%rsp),%r14
movq 320(%rsp),%r15
movq 328(%rsp),%rbx
movq 336(%rsp),%rbp
add $344,%rsp
.cfi_adjust_cfa_offset -344
ret
.cfi_endproc
.p2align 5
.globl C_ABI(x25519_x86_64_work_cswap)
HIDDEN C_ABI(x25519_x86_64_work_cswap)
C_ABI(x25519_x86_64_work_cswap):
.cfi_startproc
subq $1,%rsi
notq %rsi
movq %rsi,%xmm15
pshufd $0x44,%xmm15,%xmm15
movdqu 0(%rdi),%xmm0
movdqu 16(%rdi),%xmm2
movdqu 32(%rdi),%xmm4
movdqu 48(%rdi),%xmm6
movdqu 64(%rdi),%xmm8
movdqu 80(%rdi),%xmm1
movdqu 96(%rdi),%xmm3
movdqu 112(%rdi),%xmm5
movdqu 128(%rdi),%xmm7
movdqu 144(%rdi),%xmm9
movdqa %xmm1,%xmm10
movdqa %xmm3,%xmm11
movdqa %xmm5,%xmm12
movdqa %xmm7,%xmm13
movdqa %xmm9,%xmm14
pxor %xmm0,%xmm10
pxor %xmm2,%xmm11
pxor %xmm4,%xmm12
pxor %xmm6,%xmm13
pxor %xmm8,%xmm14
pand %xmm15,%xmm10
pand %xmm15,%xmm11
pand %xmm15,%xmm12
pand %xmm15,%xmm13
pand %xmm15,%xmm14
pxor %xmm10,%xmm0
pxor %xmm10,%xmm1
pxor %xmm11,%xmm2
pxor %xmm11,%xmm3
pxor %xmm12,%xmm4
pxor %xmm12,%xmm5
pxor %xmm13,%xmm6
pxor %xmm13,%xmm7
pxor %xmm14,%xmm8
pxor %xmm14,%xmm9
movdqu %xmm0,0(%rdi)
movdqu %xmm2,16(%rdi)
movdqu %xmm4,32(%rdi)
movdqu %xmm6,48(%rdi)
movdqu %xmm8,64(%rdi)
movdqu %xmm1,80(%rdi)
movdqu %xmm3,96(%rdi)
movdqu %xmm5,112(%rdi)
movdqu %xmm7,128(%rdi)
movdqu %xmm9,144(%rdi)
ret
.cfi_endproc
#endif /* __x86_64__ */
#endif /* !OPENSSL_NO_ASM */