boringssl/crypto/chacha/chacha_vec_arm.S

1424 lines
28 KiB
ArmAsm
Raw Normal View History

2014-06-20 20:00:00 +01:00
# Copyright (c) 2014, Google Inc.
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
# This file contains a pre-compiled version of chacha_vec.c for ARM. This is
# needed to support switching on NEON code at runtime. If the whole of OpenSSL
# were to be compiled with the needed flags to build chacha_vec.c, then it
# wouldn't be possible to run on non-NEON systems.
#
# This file was generated by:
#
# /opt/gcc-linaro-4.9-2014.11-x86_64_arm-linux-gnueabihf/bin/arm-linux-gnueabihf-gcc -O3 -mcpu=cortex-a8 -mfpu=neon -S chacha_vec.c -DASM_GEN -I ../../include -fpic -o chacha_vec_arm.S
2014-06-20 20:00:00 +01:00
#if !defined(OPENSSL_NO_ASM)
2014-06-20 20:00:00 +01:00
.syntax unified
.cpu cortex-a8
.eabi_attribute 27, 3
# EABI attribute 28 sets whether VFP register arguments were used to build this
# file. If object files are inconsistent on this point, the linker will refuse
# to link them. Thus we report whatever the compiler expects since we don't use
# VFP arguments.
#if defined(__ARM_PCS_VFP)
.eabi_attribute 28, 1
#else
.eabi_attribute 28, 0
#endif
.fpu neon
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 2
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.thumb
.file "chacha_vec.c"
.text
.align 2
.global CRYPTO_chacha_20_neon
.hidden CRYPTO_chacha_20_neon
2014-06-20 20:00:00 +01:00
.thumb
.thumb_func
.type CRYPTO_chacha_20_neon, %function
CRYPTO_chacha_20_neon:
@ args = 8, pretend = 0, frame = 128
2014-06-20 20:00:00 +01:00
@ frame_needed = 1, uses_anonymous_args = 0
push {r4, r5, r6, r7, r8, r9, r10, fp, lr}
mov r4, r2
vpush.64 {d8, d9, d10, d11, d12, d13, d14, d15}
movw r8, #43691
movt r8, 43690
mov ip, r3
umull r8, r9, r4, r8
sub sp, sp, #132
2014-06-20 20:00:00 +01:00
add r7, sp, #0
sub sp, sp, #112
mov fp, r0
mov r10, r1
str r2, [r7, #8]
2014-06-20 20:00:00 +01:00
add r4, sp, #15
ldr r2, .L92+16
2014-06-20 20:00:00 +01:00
bic r4, r4, #15
ldr r5, [r7, #232]
add lr, r4, #64
.LPIC16:
add r2, pc
str r0, [r7, #60]
str r1, [r7, #12]
str r3, [r7, #44]
2014-06-20 20:00:00 +01:00
ldmia r2, {r0, r1, r2, r3}
ldr r6, [r5]
str r4, [r7, #72]
ldr r5, [r5, #4]
ldr r4, [r7, #236]
str r6, [r7, #120]
str r5, [r7, #124]
str r4, [r7, #112]
stmia lr, {r0, r1, r2, r3}
movs r3, #0
ldr r0, [r7, #72]
str r3, [r7, #116]
lsrs r3, r9, #7
vldr d22, [r7, #112]
vldr d23, [r7, #120]
vldr d24, [r0, #64]
vldr d25, [r0, #72]
vld1.64 {d26-d27}, [ip:64]
vldr d28, [ip, #16]
vldr d29, [ip, #24]
beq .L26
ldr r1, [r0, #64]
lsls r2, r3, #8
sub r3, r2, r3, lsl #6
str r3, [r7, #4]
ldr r2, [r0, #72]
str r1, [r7, #40]
mov r1, r3
ldr r3, [r0, #68]
vldr d0, .L92
vldr d1, .L92+8
str r2, [r7, #32]
adds r2, r4, #2
str r3, [r7, #36]
ldr r3, [r0, #76]
str r2, [r7, #48]
mov r2, r0
mov r0, fp
str r10, [r7, #64]
str r3, [r7, #28]
adds r3, r0, r1
mov r1, r6
str r3, [r7, #16]
add r3, r2, #80
mov r2, r5
str r3, [r7, #68]
2014-06-20 20:00:00 +01:00
.L4:
ldr r0, [r7, #44]
add r8, r7, #28
str r2, [r7, #108]
vadd.i32 q3, q11, q0
ldmia r8, {r8, r9, r10, fp}
vmov q8, q14 @ v4si
ldr r3, [r0]
2014-06-20 20:00:00 +01:00
vmov q1, q13 @ v4si
vmov q9, q12 @ v4si
vmov q2, q11 @ v4si
str r3, [r7, #52]
mov r3, r0
ldr r5, [r3, #8]
vmov q15, q14 @ v4si
ldr lr, [r3, #20]
vmov q5, q13 @ v4si
ldr r6, [r3, #12]
vmov q10, q12 @ v4si
str r5, [r7, #92]
mov r5, r3
ldr r4, [r5, #28]
movs r5, #10
ldr ip, [r3, #16]
ldr r3, [r3, #24]
str r4, [r7, #104]
ldr r4, [r7, #48]
str r3, [r7, #100]
mov r3, r1
str r6, [r7, #56]
str r4, [r7, #96]
str r8, [r7, #80]
mov r8, r10
ldr r0, [r0, #4]
mov r10, r9
ldr r1, [r7, #92]
ldr r2, [r7, #56]
ldr r9, [r7, #100]
ldr r4, [r7, #52]
str lr, [r7, #88]
mov lr, r3
str r5, [r7, #76]
movs r5, #0
str r5, [r7, #84]
b .L93
.L94:
2014-06-20 20:00:00 +01:00
.align 3
.L92:
2014-06-20 20:00:00 +01:00
.word 1
.word 0
.word 0
.word 0
.word .LANCHOR0-(.LPIC16+4)
.L93:
2014-06-20 20:00:00 +01:00
.L3:
vadd.i32 q9, q9, q1
add r3, r8, r0
vadd.i32 q10, q10, q5
add r5, fp, r4
veor q3, q3, q9
mov r6, r3
veor q2, q2, q10
ldr r3, [r7, #80]
str r5, [r7, #100]
add r10, r10, r1
vrev32.16 q3, q3
eor lr, lr, r10
vadd.i32 q8, q8, q3
vrev32.16 q2, q2
vadd.i32 q15, q15, q2
mov fp, r3
ldr r3, [r7, #96]
veor q4, q8, q1
str r6, [r7, #96]
veor q6, q15, q5
eors r3, r3, r5
mov r5, r6
ldr r6, [r7, #84]
vshl.i32 q1, q4, #12
vshl.i32 q5, q6, #12
add fp, fp, r2
eors r6, r6, r5
ror r3, r3, #16
vsri.32 q1, q4, #20
ror lr, lr, #16
mov r5, r6
ldr r6, [r7, #108]
vsri.32 q5, q6, #20
str r3, [r7, #108]
eor r6, r6, fp
ror r5, r5, #16
vadd.i32 q9, q9, q1
add r9, r9, lr
ror r3, r6, #16
ldr r6, [r7, #108]
vadd.i32 q10, q10, q5
str r3, [r7, #92]
veor q4, q9, q3
add ip, ip, r6
ldr r6, [r7, #88]
veor q6, q10, q2
eor r4, ip, r4
eor r1, r9, r1
vshl.i32 q3, q4, #8
mov r8, r6
ldr r6, [r7, #104]
vshl.i32 q2, q6, #8
ror r4, r4, #20
add r6, r6, r3
vsri.32 q3, q4, #24
str r6, [r7, #88]
eors r2, r2, r6
ldr r6, [r7, #100]
vsri.32 q2, q6, #24
add r8, r8, r5
ror r2, r2, #20
adds r6, r4, r6
vadd.i32 q4, q8, q3
eor r0, r8, r0
vadd.i32 q15, q15, q2
mov r3, r6
ldr r6, [r7, #96]
veor q6, q4, q1
2014-06-20 20:00:00 +01:00
ror r0, r0, #20
str r3, [r7, #96]
veor q5, q15, q5
2014-06-20 20:00:00 +01:00
adds r6, r0, r6
str r6, [r7, #104]
mov r6, r3
ldr r3, [r7, #108]
vshl.i32 q8, q6, #7
add fp, fp, r2
eors r3, r3, r6
ldr r6, [r7, #104]
vshl.i32 q1, q5, #7
ror r1, r1, #20
eors r5, r5, r6
vsri.32 q8, q6, #25
ldr r6, [r7, #92]
ror r3, r3, #24
ror r5, r5, #24
vsri.32 q1, q5, #25
str r5, [r7, #100]
eor r6, fp, r6
ldr r5, [r7, #100]
add r10, r10, r1
add ip, r3, ip
vext.32 q8, q8, q8, #1
str ip, [r7, #108]
add ip, r5, r8
ldr r5, [r7, #88]
eor lr, r10, lr
ror r6, r6, #24
vext.32 q1, q1, q1, #1
add r8, r6, r5
vadd.i32 q9, q9, q8
ldr r5, [r7, #108]
2014-06-20 20:00:00 +01:00
vext.32 q3, q3, q3, #3
vadd.i32 q10, q10, q1
ror lr, lr, #24
eor r0, ip, r0
vext.32 q2, q2, q2, #3
add r9, r9, lr
eors r4, r4, r5
veor q3, q9, q3
ldr r5, [r7, #96]
eor r1, r9, r1
ror r0, r0, #25
veor q2, q10, q2
adds r5, r0, r5
vext.32 q4, q4, q4, #2
str r5, [r7, #96]
ldr r5, [r7, #104]
2014-06-20 20:00:00 +01:00
ror r1, r1, #25
vrev32.16 q3, q3
eor r2, r8, r2
2014-06-20 20:00:00 +01:00
vext.32 q15, q15, q15, #2
adds r5, r1, r5
vadd.i32 q4, q4, q3
ror r4, r4, #25
vrev32.16 q2, q2
str r5, [r7, #84]
vadd.i32 q15, q15, q2
2014-06-20 20:00:00 +01:00
eors r3, r3, r5
ldr r5, [r7, #96]
add fp, fp, r4
veor q8, q4, q8
2014-06-20 20:00:00 +01:00
ror r2, r2, #25
veor q1, q15, q1
eor lr, fp, lr
eors r6, r6, r5
ror r3, r3, #16
ldr r5, [r7, #100]
add r10, r10, r2
str r3, [r7, #104]
ror lr, lr, #16
ldr r3, [r7, #104]
eor r5, r10, r5
vshl.i32 q5, q8, #12
add ip, lr, ip
vshl.i32 q6, q1, #12
str ip, [r7, #88]
add ip, r3, r8
str ip, [r7, #100]
ldr r3, [r7, #108]
ror r5, r5, #16
vsri.32 q5, q8, #20
ror r6, r6, #16
add ip, r5, r3
ldr r3, [r7, #88]
vsri.32 q6, q1, #20
add r9, r9, r6
eor r2, ip, r2
eors r4, r4, r3
ldr r3, [r7, #100]
eor r0, r9, r0
vadd.i32 q9, q9, q5
ror r4, r4, #20
eors r1, r1, r3
vadd.i32 q10, q10, q6
ror r3, r2, #20
str r3, [r7, #92]
ldr r3, [r7, #96]
veor q3, q9, q3
2014-06-20 20:00:00 +01:00
ror r0, r0, #20
add r8, r4, fp
veor q2, q10, q2
add fp, r0, r3
ldr r3, [r7, #84]
2014-06-20 20:00:00 +01:00
ror r1, r1, #20
mov r2, r8
vshl.i32 q8, q3, #8
str r8, [r7, #80]
add r8, r1, r3
ldr r3, [r7, #92]
vmov q1, q6 @ v4si
vshl.i32 q6, q2, #8
eor r6, fp, r6
add r10, r10, r3
ldr r3, [r7, #104]
vsri.32 q8, q3, #24
eor lr, r2, lr
eor r3, r8, r3
ror r2, r6, #24
vsri.32 q6, q2, #24
eor r5, r10, r5
str r2, [r7, #108]
ror r2, r3, #24
ldr r3, [r7, #88]
vmov q3, q8 @ v4si
vadd.i32 q15, q15, q6
ror lr, lr, #24
vadd.i32 q8, q4, q8
ror r6, r5, #24
add r5, lr, r3
ldr r3, [r7, #108]
veor q4, q8, q5
add ip, ip, r6
vmov q2, q6 @ v4si
add r9, r9, r3
veor q6, q15, q1
ldr r3, [r7, #100]
vshl.i32 q1, q4, #7
str r2, [r7, #96]
add r3, r3, r2
str r3, [r7, #104]
vshl.i32 q5, q6, #7
eors r1, r1, r3
ldr r3, [r7, #92]
vsri.32 q1, q4, #25
eors r4, r4, r5
eor r0, r9, r0
eor r2, ip, r3
vsri.32 q5, q6, #25
ldr r3, [r7, #76]
ror r4, r4, #25
str r6, [r7, #84]
ror r0, r0, #25
subs r3, r3, #1
str r5, [r7, #88]
2014-06-20 20:00:00 +01:00
ror r1, r1, #25
ror r2, r2, #25
2014-06-20 20:00:00 +01:00
vext.32 q15, q15, q15, #2
str r3, [r7, #76]
vext.32 q2, q2, q2, #1
vext.32 q8, q8, q8, #2
vext.32 q3, q3, q3, #1
vext.32 q5, q5, q5, #3
vext.32 q1, q1, q1, #3
2014-06-20 20:00:00 +01:00
bne .L3
ldr r3, [r7, #68]
vadd.i32 q4, q12, q10
str r9, [r7, #100]
mov r9, r10
mov r10, r8
ldr r8, [r7, #80]
str lr, [r7, #80]
mov lr, r5
ldr r5, [r7, #40]
vadd.i32 q5, q13, q5
ldr r6, [r7, #64]
vadd.i32 q15, q14, q15
add fp, fp, r5
ldr r5, [r7, #36]
str r4, [r7, #52]
vadd.i32 q7, q14, q8
ldr r4, [r7, #96]
add r5, r10, r5
str r3, [r7, #96]
vadd.i32 q2, q11, q2
ldr r3, [r6, #12] @ unaligned
vadd.i32 q6, q12, q9
str r0, [r7, #76]
2014-06-20 20:00:00 +01:00
vadd.i32 q1, q13, q1
ldr r0, [r6] @ unaligned
vadd.i32 q11, q11, q0
str r1, [r7, #92]
str r2, [r7, #56]
vadd.i32 q3, q11, q3
ldr r1, [r6, #4] @ unaligned
vadd.i32 q11, q11, q0
ldr r2, [r6, #8] @ unaligned
str r5, [r7, #88]
vadd.i32 q11, q11, q0
ldr r5, [r7, #96]
ldr r10, [r7, #68]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r2, [r7, #72]
ldr r1, [r7, #32]
ldr r3, [r7, #48]
vldr d20, [r2, #80]
vldr d21, [r2, #88]
add r9, r9, r1
veor q10, q10, q4
ldr r1, [r7, #28]
add r0, r8, r1
str r0, [r7, #24]
vstr d20, [r2, #80]
vstr d21, [r2, #88]
adds r0, r4, r3
str r0, [r7, #20]
ldmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r4, [r7, #60]
str r0, [r4] @ unaligned
mov r4, r10
ldr r0, [r7, #60]
str r1, [r0, #4] @ unaligned
mov r8, r0
str r2, [r0, #8] @ unaligned
str r3, [r0, #12] @ unaligned
ldr r0, [r6, #16]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #64]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r3, [r7, #72]
vldr d20, [r3, #80]
vldr d21, [r3, #88]
veor q10, q10, q5
vstr d20, [r3, #80]
vstr d21, [r3, #88]
ldmia r4!, {r0, r1, r2, r3}
mov r4, r8
str r0, [r8, #16] @ unaligned
str r1, [r8, #20] @ unaligned
str r2, [r8, #24] @ unaligned
str r3, [r8, #28] @ unaligned
ldr r0, [r6, #32]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #64]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r0, [r7, #72]
vldr d16, [r0, #80]
vldr d17, [r0, #88]
veor q15, q8, q15
vstr d30, [r0, #80]
vstr d31, [r0, #88]
ldmia r10!, {r0, r1, r2, r3}
mov r10, r5
str r0, [r4, #32] @ unaligned
str r1, [r4, #36] @ unaligned
str r2, [r4, #40] @ unaligned
str r3, [r4, #44] @ unaligned
ldr r0, [r6, #48]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #64]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r2, [r7, #72]
vldr d18, [r2, #80]
vldr d19, [r2, #88]
veor q9, q9, q2
vstr d18, [r2, #80]
vstr d19, [r2, #88]
ldmia r10!, {r0, r1, r2, r3}
mov r10, r5
str r0, [r4, #48] @ unaligned
str r1, [r4, #52] @ unaligned
str r2, [r4, #56] @ unaligned
str r3, [r4, #60] @ unaligned
ldr r0, [r6, #64]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #64]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r2, [r7, #72]
vldr d18, [r2, #80]
vldr d19, [r2, #88]
veor q9, q9, q6
vstr d18, [r2, #80]
vstr d19, [r2, #88]
ldmia r10!, {r0, r1, r2, r3}
mov r10, r5
str r0, [r4, #64] @ unaligned
str r1, [r4, #68] @ unaligned
str r2, [r4, #72] @ unaligned
str r3, [r4, #76] @ unaligned
ldr r0, [r6, #80]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #64]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r2, [r7, #72]
vldr d18, [r2, #80]
vldr d19, [r2, #88]
veor q1, q9, q1
vstr d2, [r2, #80]
vstr d3, [r2, #88]
ldmia r10!, {r0, r1, r2, r3}
mov r10, r5
str r0, [r4, #80] @ unaligned
str r1, [r4, #84] @ unaligned
str r2, [r4, #88] @ unaligned
str r3, [r4, #92] @ unaligned
ldr r0, [r6, #96]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #64]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r3, [r7, #72]
vldr d16, [r3, #80]
vldr d17, [r3, #88]
veor q8, q8, q7
vstr d16, [r3, #80]
vstr d17, [r3, #88]
ldmia r10!, {r0, r1, r2, r3}
mov r10, r5
str r0, [r4, #96] @ unaligned
str r1, [r4, #100] @ unaligned
str r2, [r4, #104] @ unaligned
str r3, [r4, #108] @ unaligned
ldr r0, [r6, #112]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r0, [r7, #72]
ldr r6, [r7, #44]
vldr d16, [r0, #80]
vldr d17, [r0, #88]
veor q8, q8, q3
vstr d16, [r0, #80]
vstr d17, [r0, #88]
ldmia r5!, {r0, r1, r2, r3}
mov r5, r4
mov r8, r5
str r1, [r4, #116] @ unaligned
ldr r1, [r7, #64]
str r0, [r4, #112] @ unaligned
2014-06-20 20:00:00 +01:00
mov r0, r5
str r2, [r4, #120] @ unaligned
str r3, [r4, #124] @ unaligned
ldr r3, [r1, #128]
ldr r2, [r7, #88]
eor r3, fp, r3
str r3, [r4, #128]
ldr r3, [r1, #132]
mov r4, r1
mov r1, r5
eors r2, r2, r3
str r2, [r8, #132]
ldr r3, [r4, #136]
ldr r2, [r7, #24]
eor r3, r9, r3
str r3, [r5, #136]
ldr r3, [r4, #140]
2014-06-20 20:00:00 +01:00
eors r3, r3, r2
str r3, [r5, #140]
mov r5, r4
ldr r3, [r6]
ldr r2, [r4, #144]
ldr r4, [r7, #52]
add r4, r4, r3
eors r2, r2, r4
mov r4, r1
str r2, [r1, #144]
ldr r1, [r7, #76]
ldr r2, [r6, #4]
ldr r3, [r5, #148]
mov r8, r1
add r8, r8, r2
mov r2, r8
eors r3, r3, r2
str r3, [r0, #148]
mov r0, r4
ldr r2, [r6, #8]
ldr r1, [r7, #92]
ldr r3, [r5, #152]
mov r8, r1
add r8, r8, r2
ldr r1, [r7, #56]
mov r2, r8
eors r3, r3, r2
str r3, [r4, #152]
mov r8, r6
ldr r2, [r6, #12]
mov r4, r5
ldr r3, [r5, #156]
add r1, r1, r2
eors r3, r3, r1
str r3, [r0, #156]
ldr r2, [r6, #16]
mov r1, r0
ldr r3, [r5, #160]
add ip, ip, r2
2014-06-20 20:00:00 +01:00
eor r3, ip, r3
str r3, [r0, #160]
ldr r2, [r6, #20]
mov ip, r0
ldr r3, [r5, #164]
add lr, lr, r2
ldr r2, [r7, #100]
eor r3, lr, r3
str r3, [r1, #164]
ldr r6, [r6, #24]
ldr r3, [r4, #168]
add r2, r2, r6
eors r3, r3, r2
ldr r2, [r7, #104]
str r3, [r0, #168]
ldr r5, [r8, #28]
ldr r3, [r4, #172]
add r2, r2, r5
mov r5, r4
eors r3, r3, r2
mov r2, r0
str r3, [r0, #172]
ldr r3, [r7, #48]
ldr r4, [r4, #176]
ldr r0, [r7, #20]
adds r1, r3, #3
ldr r3, [r7, #84]
eors r4, r4, r0
str r4, [r2, #176]
ldr r0, [r5, #180]
mov r4, r2
str r1, [r7, #48]
eors r3, r3, r0
mov r0, r3
ldr r3, [r7, #232]
str r0, [r2, #180]
ldr r1, [r3]
ldr r3, [r5, #184]
ldr r2, [r7, #80]
add r2, r2, r1
mov r1, r5
eors r3, r3, r2
str r3, [ip, #184]
ldr r3, [r7, #232]
adds r1, r1, #192
str r1, [r7, #64]
ldr r1, [r7, #108]
ldr r2, [r3, #4]
ldr r3, [r5, #188]
add r1, r1, r2
mov r2, r1
eors r2, r2, r3
str r2, [ip, #188]
mov r3, r4
ldr r2, [r7, #16]
adds r3, r3, #192
str r3, [r7, #60]
cmp r2, r3
beq .L85
ldr r3, [r7, #232]
ldmia r3, {r1, r2}
b .L4
.L85:
ldr r3, [r7, #12]
ldr r2, [r7, #4]
add r3, r3, r2
str r3, [r7, #12]
2014-06-20 20:00:00 +01:00
.L2:
ldr r1, [r7, #8]
movw r2, #43691
movt r2, 43690
umull r2, r3, r1, r2
lsr fp, r3, #7
lsl r3, fp, #8
sub fp, r3, fp, lsl #6
rsb fp, fp, r1
lsrs fp, fp, #6
beq .L6
ldr r6, [r7, #72]
ldr r5, [r7, #12]
ldr r4, [r7, #16]
mov r3, r6
adds r3, r3, #80
vldr d30, .L95
vldr d31, .L95+8
mov lr, r3
str fp, [r7, #104]
str fp, [r7, #108]
.L8:
vmov q2, q11 @ v4si
2014-06-20 20:00:00 +01:00
movs r3, #10
vmov q8, q14 @ v4si
vmov q9, q13 @ v4si
vmov q10, q12 @ v4si
2014-06-20 20:00:00 +01:00
.L7:
vadd.i32 q10, q10, q9
2014-06-20 20:00:00 +01:00
subs r3, r3, #1
veor q3, q2, q10
vrev32.16 q3, q3
vadd.i32 q8, q8, q3
veor q9, q8, q9
vshl.i32 q2, q9, #12
vsri.32 q2, q9, #20
vadd.i32 q10, q10, q2
veor q3, q10, q3
vshl.i32 q9, q3, #8
vsri.32 q9, q3, #24
2014-06-20 20:00:00 +01:00
vadd.i32 q8, q8, q9
vext.32 q9, q9, q9, #3
veor q2, q8, q2
vext.32 q8, q8, q8, #2
vshl.i32 q3, q2, #7
vsri.32 q3, q2, #25
vext.32 q3, q3, q3, #1
vadd.i32 q10, q10, q3
veor q9, q10, q9
vrev32.16 q9, q9
vadd.i32 q8, q8, q9
veor q3, q8, q3
vshl.i32 q2, q3, #12
vsri.32 q2, q3, #20
vadd.i32 q10, q10, q2
vmov q3, q2 @ v4si
veor q9, q10, q9
vshl.i32 q2, q9, #8
vsri.32 q2, q9, #24
vadd.i32 q8, q8, q2
vext.32 q2, q2, q2, #1
veor q3, q8, q3
vext.32 q8, q8, q8, #2
vshl.i32 q9, q3, #7
vsri.32 q9, q3, #25
vext.32 q9, q9, q9, #3
2014-06-20 20:00:00 +01:00
bne .L7
ldr r0, [r5] @ unaligned
vadd.i32 q1, q12, q10
ldr r1, [r5, #4] @ unaligned
mov ip, lr
ldr r2, [r5, #8] @ unaligned
mov r9, lr
ldr r3, [r5, #12] @ unaligned
mov r10, r5
vadd.i32 q9, q13, q9
mov r8, lr
vadd.i32 q8, q14, q8
stmia ip!, {r0, r1, r2, r3}
mov ip, lr
vldr d20, [r6, #80]
vldr d21, [r6, #88]
vadd.i32 q3, q11, q2
veor q10, q10, q1
vadd.i32 q11, q11, q15
vstr d20, [r6, #80]
vstr d21, [r6, #88]
ldmia r9!, {r0, r1, r2, r3}
mov r9, r5
str r0, [r4] @ unaligned
str r1, [r4, #4] @ unaligned
str r2, [r4, #8] @ unaligned
str r3, [r4, #12] @ unaligned
ldr r0, [r10, #16]! @ unaligned
ldr r1, [r10, #4] @ unaligned
ldr r2, [r10, #8] @ unaligned
ldr r3, [r10, #12] @ unaligned
add r10, r4, #48
adds r4, r4, #64
stmia r8!, {r0, r1, r2, r3}
mov r8, lr
vldr d20, [r6, #80]
vldr d21, [r6, #88]
veor q10, q10, q9
vstr d20, [r6, #80]
vstr d21, [r6, #88]
ldmia ip!, {r0, r1, r2, r3}
mov ip, lr
str r0, [r4, #-48] @ unaligned
str r1, [r4, #-44] @ unaligned
str r2, [r4, #-40] @ unaligned
str r3, [r4, #-36] @ unaligned
ldr r0, [r9, #32]! @ unaligned
ldr r1, [r9, #4] @ unaligned
ldr r2, [r9, #8] @ unaligned
ldr r3, [r9, #12] @ unaligned
mov r9, r5
adds r5, r5, #64
stmia r8!, {r0, r1, r2, r3}
mov r8, lr
vldr d18, [r6, #80]
vldr d19, [r6, #88]
veor q9, q9, q8
vstr d18, [r6, #80]
vstr d19, [r6, #88]
ldmia ip!, {r0, r1, r2, r3}
mov ip, lr
str r0, [r4, #-32] @ unaligned
str r1, [r4, #-28] @ unaligned
str r2, [r4, #-24] @ unaligned
str r3, [r4, #-20] @ unaligned
ldr r0, [r9, #48]! @ unaligned
ldr r1, [r9, #4] @ unaligned
ldr r2, [r9, #8] @ unaligned
ldr r3, [r9, #12] @ unaligned
stmia r8!, {r0, r1, r2, r3}
vldr d16, [r6, #80]
vldr d17, [r6, #88]
veor q8, q8, q3
vstr d16, [r6, #80]
vstr d17, [r6, #88]
ldmia ip!, {r0, r1, r2, r3}
str r0, [r4, #-16] @ unaligned
str r1, [r4, #-12] @ unaligned
str r3, [r10, #12] @ unaligned
ldr r3, [r7, #108]
str r2, [r10, #8] @ unaligned
cmp r3, #1
beq .L88
movs r3, #1
str r3, [r7, #108]
b .L8
.L96:
.align 3
.L95:
.word 1
.word 0
.word 0
.word 0
.L88:
ldr fp, [r7, #104]
ldr r3, [r7, #12]
lsl fp, fp, #6
add r3, r3, fp
str r3, [r7, #12]
ldr r3, [r7, #16]
add r3, r3, fp
str r3, [r7, #16]
.L6:
ldr r3, [r7, #8]
ands r9, r3, #63
2014-06-20 20:00:00 +01:00
beq .L1
vmov q3, q11 @ v4si
2014-06-20 20:00:00 +01:00
movs r3, #10
vmov q8, q14 @ v4si
mov r5, r9
vmov q15, q13 @ v4si
vmov q10, q12 @ v4si
2014-06-20 20:00:00 +01:00
.L10:
vadd.i32 q10, q10, q15
2014-06-20 20:00:00 +01:00
subs r3, r3, #1
veor q9, q3, q10
vrev32.16 q9, q9
vadd.i32 q8, q8, q9
veor q15, q8, q15
vshl.i32 q3, q15, #12
vsri.32 q3, q15, #20
vadd.i32 q10, q10, q3
veor q15, q10, q9
vshl.i32 q9, q15, #8
vsri.32 q9, q15, #24
vadd.i32 q8, q8, q9
2014-06-20 20:00:00 +01:00
vext.32 q9, q9, q9, #3
veor q3, q8, q3
vext.32 q8, q8, q8, #2
vshl.i32 q15, q3, #7
vsri.32 q15, q3, #25
vext.32 q15, q15, q15, #1
vadd.i32 q10, q10, q15
veor q9, q10, q9
vrev32.16 q9, q9
vadd.i32 q8, q8, q9
veor q15, q8, q15
vshl.i32 q3, q15, #12
vsri.32 q3, q15, #20
vadd.i32 q10, q10, q3
vmov q15, q3 @ v4si
veor q9, q10, q9
vshl.i32 q3, q9, #8
vsri.32 q3, q9, #24
vadd.i32 q8, q8, q3
vext.32 q3, q3, q3, #1
veor q9, q8, q15
vext.32 q8, q8, q8, #2
vshl.i32 q15, q9, #7
vsri.32 q15, q9, #25
vext.32 q15, q15, q15, #3
2014-06-20 20:00:00 +01:00
bne .L10
cmp r5, #15
mov r9, r5
bhi .L89
vadd.i32 q12, q12, q10
ldr r3, [r7, #72]
vst1.64 {d24-d25}, [r3:128]
2014-06-20 20:00:00 +01:00
.L14:
ldr r3, [r7, #8]
and r2, r3, #48
cmp r9, r2
2014-06-20 20:00:00 +01:00
bls .L1
ldr r6, [r7, #16]
add r3, r2, #16
ldr r1, [r7, #12]
rsb ip, r2, r9
adds r0, r1, r2
mov r4, r6
add r1, r1, r3
add r4, r4, r2
add r3, r3, r6
cmp r0, r3
2014-06-20 20:00:00 +01:00
it cc
cmpcc r4, r1
ite cs
movcs r3, #1
movcc r3, #0
cmp ip, #18
2014-06-20 20:00:00 +01:00
ite ls
movls r3, #0
andhi r3, r3, #1
cmp r3, #0
beq .L16
and r1, r0, #7
mov r3, r2
negs r1, r1
and r1, r1, #15
cmp r1, ip
it cs
movcs r1, ip
cmp r1, #0
beq .L17
ldr r5, [r7, #72]
cmp r1, #1
ldrb r0, [r0] @ zero_extendqisi2
add r3, r2, #1
ldrb lr, [r5, r2] @ zero_extendqisi2
mov r6, r5
eor r0, lr, r0
strb r0, [r4]
beq .L17
ldr r0, [r7, #12]
cmp r1, #2
ldrb r4, [r5, r3] @ zero_extendqisi2
ldr r5, [r7, #16]
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #2
beq .L17
ldr r0, [r7, #12]
cmp r1, #3
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #3
beq .L17
ldr r0, [r7, #12]
cmp r1, #4
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #4
beq .L17
ldr r0, [r7, #12]
cmp r1, #5
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #5
beq .L17
ldr r0, [r7, #12]
cmp r1, #6
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #6
beq .L17
ldr r0, [r7, #12]
cmp r1, #7
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #7
beq .L17
ldr r0, [r7, #12]
cmp r1, #8
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #8
beq .L17
ldr r0, [r7, #12]
cmp r1, #9
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #9
beq .L17
ldr r0, [r7, #12]
cmp r1, #10
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #10
beq .L17
ldr r0, [r7, #12]
cmp r1, #11
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #11
beq .L17
ldr r0, [r7, #12]
cmp r1, #12
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #12
beq .L17
ldr r0, [r7, #12]
cmp r1, #13
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #13
beq .L17
ldr r0, [r7, #12]
cmp r1, #15
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #14
bne .L17
ldr r0, [r7, #12]
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eors r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #15
2014-06-20 20:00:00 +01:00
.L17:
rsb r4, r1, ip
add r0, ip, #-1
sub r6, r4, #16
subs r0, r0, r1
cmp r0, #14
lsr r6, r6, #4
add r6, r6, #1
lsl lr, r6, #4
bls .L19
add r2, r2, r1
ldr r1, [r7, #12]
ldr r5, [r7, #16]
cmp r6, #1
add r0, r1, r2
ldr r1, [r7, #72]
add r1, r1, r2
vld1.64 {d18-d19}, [r0:64]
add r2, r2, r5
vld1.8 {q8}, [r1]
veor q8, q8, q9
vst1.8 {q8}, [r2]
beq .L20
add r8, r1, #16
add ip, r2, #16
vldr d18, [r0, #16]
vldr d19, [r0, #24]
cmp r6, #2
vld1.8 {q8}, [r8]
veor q8, q8, q9
vst1.8 {q8}, [ip]
beq .L20
add r8, r1, #32
add ip, r2, #32
vldr d18, [r0, #32]
vldr d19, [r0, #40]
cmp r6, #3
vld1.8 {q8}, [r8]
veor q8, q8, q9
vst1.8 {q8}, [ip]
beq .L20
adds r1, r1, #48
adds r2, r2, #48
vldr d18, [r0, #48]
vldr d19, [r0, #56]
vld1.8 {q8}, [r1]
veor q8, q8, q9
vst1.8 {q8}, [r2]
.L20:
cmp lr, r4
add r3, r3, lr
2014-06-20 20:00:00 +01:00
beq .L1
.L19:
ldr r4, [r7, #72]
adds r2, r3, #1
ldr r1, [r7, #12]
cmp r2, r9
ldr r5, [r7, #16]
ldrb r0, [r4, r3] @ zero_extendqisi2
ldrb r1, [r1, r3] @ zero_extendqisi2
eor r1, r1, r0
strb r1, [r5, r3]
bcs .L1
ldr r0, [r7, #12]
adds r1, r3, #2
mov r6, r4
cmp r9, r1
ldrb r4, [r4, r2] @ zero_extendqisi2
ldrb r0, [r0, r2] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r2]
bls .L1
ldr r0, [r7, #12]
adds r2, r3, #3
ldrb r4, [r6, r1] @ zero_extendqisi2
cmp r9, r2
ldrb r0, [r0, r1] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r1]
bls .L1
ldr r0, [r7, #12]
adds r1, r3, #4
ldrb r4, [r6, r2] @ zero_extendqisi2
cmp r9, r1
ldrb r0, [r0, r2] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r2]
bls .L1
ldr r0, [r7, #12]
adds r2, r3, #5
ldrb r4, [r6, r1] @ zero_extendqisi2
cmp r9, r2
ldrb r0, [r0, r1] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r1]
bls .L1
ldr r0, [r7, #12]
adds r1, r3, #6
ldrb r4, [r6, r2] @ zero_extendqisi2
cmp r9, r1
ldrb r0, [r0, r2] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r2]
bls .L1
ldr r0, [r7, #12]
adds r2, r3, #7
ldrb r4, [r6, r1] @ zero_extendqisi2
cmp r9, r2
ldrb r0, [r0, r1] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r1]
bls .L1
ldr r0, [r7, #12]
add r1, r3, #8
ldrb r4, [r6, r2] @ zero_extendqisi2
cmp r9, r1
ldrb r0, [r0, r2] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r2]
bls .L1
ldr r0, [r7, #12]
add r2, r3, #9
ldrb r4, [r6, r1] @ zero_extendqisi2
cmp r9, r2
ldrb r0, [r0, r1] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r1]
bls .L1
ldr r0, [r7, #12]
add r1, r3, #10
ldrb r4, [r6, r2] @ zero_extendqisi2
cmp r9, r1
ldrb r0, [r0, r2] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r2]
bls .L1
ldr r0, [r7, #12]
add r2, r3, #11
ldrb r4, [r6, r1] @ zero_extendqisi2
cmp r9, r2
ldrb r0, [r0, r1] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r1]
bls .L1
ldr r0, [r7, #12]
add r1, r3, #12
ldrb r4, [r6, r2] @ zero_extendqisi2
cmp r9, r1
ldrb r0, [r0, r2] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r2]
bls .L1
ldr r0, [r7, #12]
add r2, r3, #13
ldrb r4, [r6, r1] @ zero_extendqisi2
cmp r9, r2
ldrb r0, [r0, r1] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r1]
bls .L1
ldr r1, [r7, #12]
adds r3, r3, #14
ldrb r0, [r6, r2] @ zero_extendqisi2
cmp r9, r3
ldrb r1, [r1, r2] @ zero_extendqisi2
eor r1, r1, r0
strb r1, [r5, r2]
bls .L1
ldr r2, [r7, #72]
ldrb r1, [r2, r3] @ zero_extendqisi2
ldr r2, [r7, #12]
ldrb r2, [r2, r3] @ zero_extendqisi2
2014-06-20 20:00:00 +01:00
eors r2, r2, r1
ldr r1, [r7, #16]
strb r2, [r1, r3]
2014-06-20 20:00:00 +01:00
.L1:
adds r7, r7, #132
2014-06-20 20:00:00 +01:00
mov sp, r7
@ sp needed
vldm sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, fp, pc}
.L89:
ldr r4, [r7, #12]
vadd.i32 q12, q12, q10
ldr r5, [r7, #72]
cmp r9, #31
ldr r0, [r4] @ unaligned
add r6, r5, #80
ldr r1, [r4, #4] @ unaligned
ldr r2, [r4, #8] @ unaligned
mov r5, r6
ldr r3, [r4, #12] @ unaligned
mov r4, r6
str r6, [r7, #68]
stmia r6!, {r0, r1, r2, r3}
ldr r2, [r7, #72]
ldr r6, [r7, #16]
vldr d18, [r2, #80]
vldr d19, [r2, #88]
veor q9, q9, q12
vstr d18, [r2, #80]
vstr d19, [r2, #88]
ldmia r4!, {r0, r1, r2, r3}
str r1, [r6, #4] @ unaligned
mov r1, r6
str r0, [r6] @ unaligned
str r2, [r6, #8] @ unaligned
str r3, [r6, #12] @ unaligned
bhi .L90
vadd.i32 q13, q13, q15
ldr r3, [r7, #72]
vstr d26, [r3, #16]
vstr d27, [r3, #24]
2014-06-20 20:00:00 +01:00
b .L14
.L16:
subs r3, r2, #1
ldr r2, [r7, #12]
add r2, r2, r9
mov r5, r2
ldr r2, [r7, #72]
add r2, r2, r3
mov r3, r2
.L24:
ldrb r1, [r0], #1 @ zero_extendqisi2
ldrb r2, [r3, #1]! @ zero_extendqisi2
cmp r0, r5
eor r2, r2, r1
strb r2, [r4], #1
bne .L24
adds r7, r7, #132
mov sp, r7
@ sp needed
vldm sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, fp, pc}
.L26:
str fp, [r7, #16]
2014-06-20 20:00:00 +01:00
b .L2
.L90:
ldr r3, [r7, #12]
add lr, r1, #16
mov r4, r5
mov r6, r5
mov r5, r1
vadd.i32 q13, q13, q15
ldr r0, [r3, #16]! @ unaligned
cmp r9, #47
ldr r1, [r3, #4] @ unaligned
ldr r2, [r3, #8] @ unaligned
ldr r3, [r3, #12] @ unaligned
stmia r6!, {r0, r1, r2, r3}
ldr r2, [r7, #72]
vldr d18, [r2, #80]
vldr d19, [r2, #88]
veor q13, q9, q13
vstr d26, [r2, #80]
vstr d27, [r2, #88]
ldmia r4!, {r0, r1, r2, r3}
str r0, [r5, #16] @ unaligned
str r1, [lr, #4] @ unaligned
str r2, [lr, #8] @ unaligned
str r3, [lr, #12] @ unaligned
bhi .L91
vadd.i32 q8, q14, q8
ldr r3, [r7, #72]
vstr d16, [r3, #32]
vstr d17, [r3, #40]
b .L14
.L91:
ldr r3, [r7, #12]
add lr, r5, #32
ldr r4, [r7, #68]
vadd.i32 q8, q14, q8
ldr r5, [r7, #72]
vadd.i32 q11, q11, q3
ldr r0, [r3, #32]! @ unaligned
mov r6, r4
vstr d22, [r5, #48]
vstr d23, [r5, #56]
ldr r1, [r3, #4] @ unaligned
ldr r2, [r3, #8] @ unaligned
ldr r3, [r3, #12] @ unaligned
stmia r4!, {r0, r1, r2, r3}
vldr d18, [r5, #80]
vldr d19, [r5, #88]
veor q9, q9, q8
ldr r4, [r7, #16]
vstr d18, [r5, #80]
vstr d19, [r5, #88]
ldmia r6!, {r0, r1, r2, r3}
str r0, [r4, #32] @ unaligned
str r1, [lr, #4] @ unaligned
str r2, [lr, #8] @ unaligned
str r3, [lr, #12] @ unaligned
2014-06-20 20:00:00 +01:00
b .L14
.size CRYPTO_chacha_20_neon, .-CRYPTO_chacha_20_neon
.section .rodata
.align 2
2014-06-20 20:00:00 +01:00
.LANCHOR0 = . + 0
.LC0:
.word 1634760805
.word 857760878
.word 2036477234
.word 1797285236
.ident "GCC: (Linaro GCC 2014.11) 4.9.3 20141031 (prerelease)"
2014-06-20 20:00:00 +01:00
.section .note.GNU-stack,"",%progbits