boringssl/crypto/chacha/chacha_vec_arm.S
Adam Langley 868c7ef1f4 Don't assume alignment of ChaCha key on ARM.
When addressing [1], I checked the AEAD code but brain-farted: a key is
aligned in that code, but it's the Poly1305 key, which doesn't matter
here.

It would be nice to align the ChaCha key too, but Android doesn't have
|posix_memalign| in the versions that we care about. It does have
|memalign|, but that's documented as "obsolete" and we don't have a
concept of an Android OS yet and I don't want to add one just for this.

So this change uses the buffer for loading the key again.

(Note that we never used to check for alignment of the |key| before
calling this. We must have gotten it for free somehow when checking the
alignment of |in| and |out|. But there are clearly some paths that don't
have an aligned key:
https://code.google.com/p/chromium/issues/detail?id=454308.)

At least the generation script started paying off immediately ☺.

[1] https://boringssl-review.googlesource.com/#/c/3132/1/crypto/chacha/chacha_vec.c@185

Change-Id: I4f893ba0733440fddd453f9636cc2aeaf05076ed
Reviewed-on: https://boringssl-review.googlesource.com/3270
Reviewed-by: Adam Langley <agl@google.com>
2015-02-03 00:34:17 +00:00

1436 lines
29 KiB
ArmAsm

# Copyright (c) 2014, Google Inc.
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
# This file contains a pre-compiled version of chacha_vec.c for ARM. This is
# needed to support switching on NEON code at runtime. If the whole of OpenSSL
# were to be compiled with the needed flags to build chacha_vec.c, then it
# wouldn't be possible to run on non-NEON systems.
#
# This file was generated by chacha_vec_arm_generate.go using the following
# compiler command:
#
# /opt/gcc-linaro-4.9-2014.11-x86_64_arm-linux-gnueabihf/bin/arm-linux-gnueabihf-gcc -O3 -mcpu=cortex-a8 -mfpu=neon -fpic -DASM_GEN -I ../../include -S chacha_vec.c -o -
#if !defined(OPENSSL_NO_ASM)
.syntax unified
.cpu cortex-a8
.eabi_attribute 27, 3
# EABI attribute 28 sets whether VFP register arguments were used to build this
# file. If object files are inconsistent on this point, the linker will refuse
# to link them. Thus we report whatever the compiler expects since we don't use
# VFP arguments.
#if defined(__ARM_PCS_VFP)
.eabi_attribute 28, 1
#else
.eabi_attribute 28, 0
#endif
.fpu neon
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 2
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.thumb
.file "chacha_vec.c"
.text
.align 2
.global CRYPTO_chacha_20_neon
.hidden CRYPTO_chacha_20_neon
.thumb
.thumb_func
.type CRYPTO_chacha_20_neon, %function
CRYPTO_chacha_20_neon:
@ args = 8, pretend = 0, frame = 128
@ frame_needed = 1, uses_anonymous_args = 0
push {r4, r5, r6, r7, r8, r9, r10, fp, lr}
mov ip, r3
vpush.64 {d8, d9, d10, d11, d12, d13, d14, d15}
mov r9, r2
ldr r4, .L92+16
mov fp, r0
mov r10, r1
mov lr, ip
.LPIC16:
add r4, pc
movw r8, #43691
sub sp, sp, #132
add r7, sp, #0
sub sp, sp, #112
movt r8, 43690
str r0, [r7, #60]
str r1, [r7, #12]
str r2, [r7, #8]
ldmia r4, {r0, r1, r2, r3}
add r4, sp, #15
bic r4, r4, #15
str ip, [r7, #44]
umull r8, r9, r9, r8
mov r6, r4
adds r4, r4, #64
add r5, r6, #80
str r5, [r7, #68]
stmia r4, {r0, r1, r2, r3}
movs r4, #0
ldr r0, [ip] @ unaligned
ldr r1, [ip, #4] @ unaligned
ldr r2, [ip, #8] @ unaligned
ldr r3, [ip, #12] @ unaligned
vldr d24, [r6, #64]
vldr d25, [r6, #72]
str r4, [r7, #116]
mov r4, r5
stmia r5!, {r0, r1, r2, r3}
ldr r0, [lr, #16]! @ unaligned
ldr r3, [r7, #232]
str r6, [r7, #72]
ldr r2, [lr, #8] @ unaligned
ldr ip, [r3]
ldr r6, [r7, #236]
ldr r1, [lr, #4] @ unaligned
ldr r3, [lr, #12] @ unaligned
ldr r5, [r7, #72]
vldr d26, [r5, #80]
vldr d27, [r5, #88]
str ip, [r7, #120]
stmia r4!, {r0, r1, r2, r3}
lsrs r2, r9, #7
ldr r3, [r7, #232]
str r6, [r7, #112]
vldr d28, [r5, #80]
vldr d29, [r5, #88]
ldr r3, [r3, #4]
str r3, [r7, #124]
vldr d22, [r7, #112]
vldr d23, [r7, #120]
beq .L26
lsls r1, r2, #8
ldr r4, [r5, #64]
sub r2, r1, r2, lsl #6
str r2, [r7, #4]
vldr d0, .L92
vldr d1, .L92+8
mov r0, fp
mov r1, r2
ldr r2, [r5, #68]
str r4, [r7, #40]
ldr r4, [r5, #72]
str r2, [r7, #36]
ldr r2, [r5, #76]
str r4, [r7, #32]
adds r4, r6, #2
str r10, [r7, #64]
str r2, [r7, #28]
adds r2, r0, r1
mov r1, ip
str r2, [r7, #16]
mov r2, r3
str r4, [r7, #48]
.L4:
ldr r0, [r7, #44]
add r8, r7, #28
str r2, [r7, #108]
vadd.i32 q3, q11, q0
ldmia r8, {r8, r9, r10, fp}
vmov q8, q14 @ v4si
ldr r3, [r0]
vmov q1, q13 @ v4si
vmov q9, q12 @ v4si
vmov q2, q11 @ v4si
str r3, [r7, #52]
mov r3, r0
ldr r5, [r3, #8]
vmov q15, q14 @ v4si
ldr lr, [r3, #20]
vmov q5, q13 @ v4si
ldr r6, [r3, #12]
vmov q10, q12 @ v4si
str r5, [r7, #92]
mov r5, r3
ldr r4, [r5, #28]
movs r5, #10
ldr ip, [r3, #16]
ldr r3, [r3, #24]
str r4, [r7, #104]
ldr r4, [r7, #48]
str r3, [r7, #100]
mov r3, r1
str r6, [r7, #56]
str r4, [r7, #96]
str r8, [r7, #80]
mov r8, r10
ldr r0, [r0, #4]
mov r10, r9
ldr r1, [r7, #92]
ldr r2, [r7, #56]
ldr r9, [r7, #100]
ldr r4, [r7, #52]
str lr, [r7, #88]
mov lr, r3
str r5, [r7, #76]
movs r5, #0
str r5, [r7, #84]
b .L93
.L94:
.align 3
.L92:
.word 1
.word 0
.word 0
.word 0
.word .LANCHOR0-(.LPIC16+4)
.L93:
.L3:
vadd.i32 q9, q9, q1
add r3, r8, r0
vadd.i32 q10, q10, q5
add r5, fp, r4
veor q3, q3, q9
mov r6, r3
veor q2, q2, q10
ldr r3, [r7, #80]
str r5, [r7, #100]
add r10, r10, r1
vrev32.16 q3, q3
eor lr, lr, r10
vadd.i32 q8, q8, q3
vrev32.16 q2, q2
vadd.i32 q15, q15, q2
mov fp, r3
ldr r3, [r7, #96]
veor q4, q8, q1
str r6, [r7, #96]
veor q6, q15, q5
eors r3, r3, r5
mov r5, r6
ldr r6, [r7, #84]
vshl.i32 q1, q4, #12
vshl.i32 q5, q6, #12
add fp, fp, r2
eors r6, r6, r5
ror r3, r3, #16
vsri.32 q1, q4, #20
ror lr, lr, #16
mov r5, r6
ldr r6, [r7, #108]
vsri.32 q5, q6, #20
str r3, [r7, #108]
eor r6, r6, fp
ror r5, r5, #16
vadd.i32 q9, q9, q1
add r9, r9, lr
ror r3, r6, #16
ldr r6, [r7, #108]
vadd.i32 q10, q10, q5
str r3, [r7, #92]
veor q4, q9, q3
add ip, ip, r6
ldr r6, [r7, #88]
veor q6, q10, q2
eor r4, ip, r4
eor r1, r9, r1
vshl.i32 q3, q4, #8
mov r8, r6
ldr r6, [r7, #104]
vshl.i32 q2, q6, #8
ror r4, r4, #20
add r6, r6, r3
vsri.32 q3, q4, #24
str r6, [r7, #88]
eors r2, r2, r6
ldr r6, [r7, #100]
vsri.32 q2, q6, #24
add r8, r8, r5
ror r2, r2, #20
adds r6, r4, r6
vadd.i32 q4, q8, q3
eor r0, r8, r0
vadd.i32 q15, q15, q2
mov r3, r6
ldr r6, [r7, #96]
veor q6, q4, q1
ror r0, r0, #20
str r3, [r7, #96]
veor q5, q15, q5
adds r6, r0, r6
str r6, [r7, #104]
mov r6, r3
ldr r3, [r7, #108]
vshl.i32 q8, q6, #7
add fp, fp, r2
eors r3, r3, r6
ldr r6, [r7, #104]
vshl.i32 q1, q5, #7
ror r1, r1, #20
eors r5, r5, r6
vsri.32 q8, q6, #25
ldr r6, [r7, #92]
ror r3, r3, #24
ror r5, r5, #24
vsri.32 q1, q5, #25
str r5, [r7, #100]
eor r6, fp, r6
ldr r5, [r7, #100]
add r10, r10, r1
add ip, r3, ip
vext.32 q8, q8, q8, #1
str ip, [r7, #108]
add ip, r5, r8
ldr r5, [r7, #88]
eor lr, r10, lr
ror r6, r6, #24
vext.32 q1, q1, q1, #1
add r8, r6, r5
vadd.i32 q9, q9, q8
ldr r5, [r7, #108]
vext.32 q3, q3, q3, #3
vadd.i32 q10, q10, q1
ror lr, lr, #24
eor r0, ip, r0
vext.32 q2, q2, q2, #3
add r9, r9, lr
eors r4, r4, r5
veor q3, q9, q3
ldr r5, [r7, #96]
eor r1, r9, r1
ror r0, r0, #25
veor q2, q10, q2
adds r5, r0, r5
vext.32 q4, q4, q4, #2
str r5, [r7, #96]
ldr r5, [r7, #104]
ror r1, r1, #25
vrev32.16 q3, q3
eor r2, r8, r2
vext.32 q15, q15, q15, #2
adds r5, r1, r5
vadd.i32 q4, q4, q3
ror r4, r4, #25
vrev32.16 q2, q2
str r5, [r7, #84]
vadd.i32 q15, q15, q2
eors r3, r3, r5
ldr r5, [r7, #96]
add fp, fp, r4
veor q8, q4, q8
ror r2, r2, #25
veor q1, q15, q1
eor lr, fp, lr
eors r6, r6, r5
ror r3, r3, #16
ldr r5, [r7, #100]
add r10, r10, r2
str r3, [r7, #104]
ror lr, lr, #16
ldr r3, [r7, #104]
eor r5, r10, r5
vshl.i32 q5, q8, #12
add ip, lr, ip
vshl.i32 q6, q1, #12
str ip, [r7, #88]
add ip, r3, r8
str ip, [r7, #100]
ldr r3, [r7, #108]
ror r5, r5, #16
vsri.32 q5, q8, #20
ror r6, r6, #16
add ip, r5, r3
ldr r3, [r7, #88]
vsri.32 q6, q1, #20
add r9, r9, r6
eor r2, ip, r2
eors r4, r4, r3
ldr r3, [r7, #100]
eor r0, r9, r0
vadd.i32 q9, q9, q5
ror r4, r4, #20
eors r1, r1, r3
vadd.i32 q10, q10, q6
ror r3, r2, #20
str r3, [r7, #92]
ldr r3, [r7, #96]
veor q3, q9, q3
ror r0, r0, #20
add r8, r4, fp
veor q2, q10, q2
add fp, r0, r3
ldr r3, [r7, #84]
ror r1, r1, #20
mov r2, r8
vshl.i32 q8, q3, #8
str r8, [r7, #80]
add r8, r1, r3
ldr r3, [r7, #92]
vmov q1, q6 @ v4si
vshl.i32 q6, q2, #8
eor r6, fp, r6
add r10, r10, r3
ldr r3, [r7, #104]
vsri.32 q8, q3, #24
eor lr, r2, lr
eor r3, r8, r3
ror r2, r6, #24
vsri.32 q6, q2, #24
eor r5, r10, r5
str r2, [r7, #108]
ror r2, r3, #24
ldr r3, [r7, #88]
vmov q3, q8 @ v4si
vadd.i32 q15, q15, q6
ror lr, lr, #24
vadd.i32 q8, q4, q8
ror r6, r5, #24
add r5, lr, r3
ldr r3, [r7, #108]
veor q4, q8, q5
add ip, ip, r6
vmov q2, q6 @ v4si
add r9, r9, r3
veor q6, q15, q1
ldr r3, [r7, #100]
vshl.i32 q1, q4, #7
str r2, [r7, #96]
add r3, r3, r2
str r3, [r7, #104]
vshl.i32 q5, q6, #7
eors r1, r1, r3
ldr r3, [r7, #92]
vsri.32 q1, q4, #25
eors r4, r4, r5
eor r0, r9, r0
eor r2, ip, r3
vsri.32 q5, q6, #25
ldr r3, [r7, #76]
ror r4, r4, #25
str r6, [r7, #84]
ror r0, r0, #25
subs r3, r3, #1
str r5, [r7, #88]
ror r1, r1, #25
ror r2, r2, #25
vext.32 q15, q15, q15, #2
str r3, [r7, #76]
vext.32 q2, q2, q2, #1
vext.32 q8, q8, q8, #2
vext.32 q3, q3, q3, #1
vext.32 q5, q5, q5, #3
vext.32 q1, q1, q1, #3
bne .L3
ldr r3, [r7, #68]
vadd.i32 q4, q12, q10
str r9, [r7, #100]
mov r9, r10
mov r10, r8
ldr r8, [r7, #80]
str lr, [r7, #80]
mov lr, r5
ldr r5, [r7, #40]
vadd.i32 q5, q13, q5
ldr r6, [r7, #64]
vadd.i32 q15, q14, q15
add fp, fp, r5
ldr r5, [r7, #36]
str r4, [r7, #52]
vadd.i32 q7, q14, q8
ldr r4, [r7, #96]
add r5, r10, r5
str r3, [r7, #96]
vadd.i32 q2, q11, q2
ldr r3, [r6, #12] @ unaligned
vadd.i32 q6, q12, q9
str r0, [r7, #76]
vadd.i32 q1, q13, q1
ldr r0, [r6] @ unaligned
vadd.i32 q11, q11, q0
str r1, [r7, #92]
str r2, [r7, #56]
vadd.i32 q3, q11, q3
ldr r1, [r6, #4] @ unaligned
vadd.i32 q11, q11, q0
ldr r2, [r6, #8] @ unaligned
str r5, [r7, #88]
vadd.i32 q11, q11, q0
ldr r5, [r7, #96]
ldr r10, [r7, #68]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r2, [r7, #72]
ldr r1, [r7, #32]
ldr r3, [r7, #48]
vldr d20, [r2, #80]
vldr d21, [r2, #88]
add r9, r9, r1
veor q10, q10, q4
ldr r1, [r7, #28]
add r0, r8, r1
str r0, [r7, #24]
vstr d20, [r2, #80]
vstr d21, [r2, #88]
adds r0, r4, r3
str r0, [r7, #20]
ldmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r4, [r7, #60]
str r0, [r4] @ unaligned
mov r4, r10
ldr r0, [r7, #60]
str r1, [r0, #4] @ unaligned
mov r8, r0
str r2, [r0, #8] @ unaligned
str r3, [r0, #12] @ unaligned
ldr r0, [r6, #16]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #64]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r3, [r7, #72]
vldr d20, [r3, #80]
vldr d21, [r3, #88]
veor q10, q10, q5
vstr d20, [r3, #80]
vstr d21, [r3, #88]
ldmia r4!, {r0, r1, r2, r3}
mov r4, r8
str r0, [r8, #16] @ unaligned
str r1, [r8, #20] @ unaligned
str r2, [r8, #24] @ unaligned
str r3, [r8, #28] @ unaligned
ldr r0, [r6, #32]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #64]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r0, [r7, #72]
vldr d16, [r0, #80]
vldr d17, [r0, #88]
veor q15, q8, q15
vstr d30, [r0, #80]
vstr d31, [r0, #88]
ldmia r10!, {r0, r1, r2, r3}
mov r10, r5
str r0, [r4, #32] @ unaligned
str r1, [r4, #36] @ unaligned
str r2, [r4, #40] @ unaligned
str r3, [r4, #44] @ unaligned
ldr r0, [r6, #48]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #64]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r2, [r7, #72]
vldr d18, [r2, #80]
vldr d19, [r2, #88]
veor q9, q9, q2
vstr d18, [r2, #80]
vstr d19, [r2, #88]
ldmia r10!, {r0, r1, r2, r3}
mov r10, r5
str r0, [r4, #48] @ unaligned
str r1, [r4, #52] @ unaligned
str r2, [r4, #56] @ unaligned
str r3, [r4, #60] @ unaligned
ldr r0, [r6, #64]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #64]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r2, [r7, #72]
vldr d18, [r2, #80]
vldr d19, [r2, #88]
veor q9, q9, q6
vstr d18, [r2, #80]
vstr d19, [r2, #88]
ldmia r10!, {r0, r1, r2, r3}
mov r10, r5
str r0, [r4, #64] @ unaligned
str r1, [r4, #68] @ unaligned
str r2, [r4, #72] @ unaligned
str r3, [r4, #76] @ unaligned
ldr r0, [r6, #80]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #64]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r2, [r7, #72]
vldr d18, [r2, #80]
vldr d19, [r2, #88]
veor q1, q9, q1
vstr d2, [r2, #80]
vstr d3, [r2, #88]
ldmia r10!, {r0, r1, r2, r3}
mov r10, r5
str r0, [r4, #80] @ unaligned
str r1, [r4, #84] @ unaligned
str r2, [r4, #88] @ unaligned
str r3, [r4, #92] @ unaligned
ldr r0, [r6, #96]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
ldr r6, [r7, #64]
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r3, [r7, #72]
vldr d16, [r3, #80]
vldr d17, [r3, #88]
veor q8, q8, q7
vstr d16, [r3, #80]
vstr d17, [r3, #88]
ldmia r10!, {r0, r1, r2, r3}
mov r10, r5
str r0, [r4, #96] @ unaligned
str r1, [r4, #100] @ unaligned
str r2, [r4, #104] @ unaligned
str r3, [r4, #108] @ unaligned
ldr r0, [r6, #112]! @ unaligned
ldr r1, [r6, #4] @ unaligned
ldr r2, [r6, #8] @ unaligned
ldr r3, [r6, #12] @ unaligned
stmia r5!, {r0, r1, r2, r3}
mov r5, r10
ldr r0, [r7, #72]
ldr r6, [r7, #44]
vldr d16, [r0, #80]
vldr d17, [r0, #88]
veor q8, q8, q3
vstr d16, [r0, #80]
vstr d17, [r0, #88]
ldmia r5!, {r0, r1, r2, r3}
mov r5, r4
mov r8, r5
str r1, [r4, #116] @ unaligned
ldr r1, [r7, #64]
str r0, [r4, #112] @ unaligned
mov r0, r5
str r2, [r4, #120] @ unaligned
str r3, [r4, #124] @ unaligned
ldr r3, [r1, #128]
ldr r2, [r7, #88]
eor r3, fp, r3
str r3, [r4, #128]
ldr r3, [r1, #132]
mov r4, r1
mov r1, r5
eors r2, r2, r3
str r2, [r8, #132]
ldr r3, [r4, #136]
ldr r2, [r7, #24]
eor r3, r9, r3
str r3, [r5, #136]
ldr r3, [r4, #140]
eors r3, r3, r2
str r3, [r5, #140]
mov r5, r4
ldr r3, [r6]
ldr r2, [r4, #144]
ldr r4, [r7, #52]
add r4, r4, r3
eors r2, r2, r4
mov r4, r1
str r2, [r1, #144]
ldr r1, [r7, #76]
ldr r2, [r6, #4]
ldr r3, [r5, #148]
mov r8, r1
add r8, r8, r2
mov r2, r8
eors r3, r3, r2
str r3, [r0, #148]
mov r0, r4
ldr r2, [r6, #8]
ldr r1, [r7, #92]
ldr r3, [r5, #152]
mov r8, r1
add r8, r8, r2
ldr r1, [r7, #56]
mov r2, r8
eors r3, r3, r2
str r3, [r4, #152]
mov r8, r6
ldr r2, [r6, #12]
mov r4, r5
ldr r3, [r5, #156]
add r1, r1, r2
eors r3, r3, r1
str r3, [r0, #156]
ldr r2, [r6, #16]
mov r1, r0
ldr r3, [r5, #160]
add ip, ip, r2
eor r3, ip, r3
str r3, [r0, #160]
ldr r2, [r6, #20]
mov ip, r0
ldr r3, [r5, #164]
add lr, lr, r2
ldr r2, [r7, #100]
eor r3, lr, r3
str r3, [r1, #164]
ldr r6, [r6, #24]
ldr r3, [r4, #168]
add r2, r2, r6
eors r3, r3, r2
ldr r2, [r7, #104]
str r3, [r0, #168]
ldr r5, [r8, #28]
ldr r3, [r4, #172]
add r2, r2, r5
mov r5, r4
eors r3, r3, r2
mov r2, r0
str r3, [r0, #172]
ldr r3, [r7, #48]
ldr r4, [r4, #176]
ldr r0, [r7, #20]
adds r1, r3, #3
ldr r3, [r7, #84]
eors r4, r4, r0
str r4, [r2, #176]
ldr r0, [r5, #180]
mov r4, r2
str r1, [r7, #48]
eors r3, r3, r0
mov r0, r3
ldr r3, [r7, #232]
str r0, [r2, #180]
ldr r1, [r3]
ldr r3, [r5, #184]
ldr r2, [r7, #80]
add r2, r2, r1
mov r1, r5
eors r3, r3, r2
str r3, [ip, #184]
ldr r3, [r7, #232]
adds r1, r1, #192
str r1, [r7, #64]
ldr r1, [r7, #108]
ldr r2, [r3, #4]
ldr r3, [r5, #188]
add r1, r1, r2
mov r2, r1
eors r2, r2, r3
str r2, [ip, #188]
mov r3, r4
ldr r2, [r7, #16]
adds r3, r3, #192
str r3, [r7, #60]
cmp r2, r3
beq .L85
ldr r3, [r7, #232]
ldmia r3, {r1, r2}
b .L4
.L85:
ldr r3, [r7, #12]
ldr r2, [r7, #4]
add r3, r3, r2
str r3, [r7, #12]
.L2:
ldr r1, [r7, #8]
movw r2, #43691
movt r2, 43690
umull r2, r3, r1, r2
lsr fp, r3, #7
lsl r3, fp, #8
sub fp, r3, fp, lsl #6
rsb fp, fp, r1
lsrs fp, fp, #6
beq .L6
ldr r5, [r7, #12]
ldr r4, [r7, #16]
ldr r6, [r7, #72]
ldr lr, [r7, #68]
vldr d30, .L95
vldr d31, .L95+8
str fp, [r7, #104]
str fp, [r7, #108]
.L8:
vmov q2, q11 @ v4si
movs r3, #10
vmov q8, q14 @ v4si
vmov q9, q13 @ v4si
vmov q10, q12 @ v4si
.L7:
vadd.i32 q10, q10, q9
subs r3, r3, #1
veor q3, q2, q10
vrev32.16 q3, q3
vadd.i32 q8, q8, q3
veor q9, q8, q9
vshl.i32 q2, q9, #12
vsri.32 q2, q9, #20
vadd.i32 q10, q10, q2
veor q3, q10, q3
vshl.i32 q9, q3, #8
vsri.32 q9, q3, #24
vadd.i32 q8, q8, q9
vext.32 q9, q9, q9, #3
veor q2, q8, q2
vext.32 q8, q8, q8, #2
vshl.i32 q3, q2, #7
vsri.32 q3, q2, #25
vext.32 q3, q3, q3, #1
vadd.i32 q10, q10, q3
veor q9, q10, q9
vrev32.16 q9, q9
vadd.i32 q8, q8, q9
veor q3, q8, q3
vshl.i32 q2, q3, #12
vsri.32 q2, q3, #20
vadd.i32 q10, q10, q2
vmov q3, q2 @ v4si
veor q9, q10, q9
vshl.i32 q2, q9, #8
vsri.32 q2, q9, #24
vadd.i32 q8, q8, q2
vext.32 q2, q2, q2, #1
veor q3, q8, q3
vext.32 q8, q8, q8, #2
vshl.i32 q9, q3, #7
vsri.32 q9, q3, #25
vext.32 q9, q9, q9, #3
bne .L7
ldr r0, [r5] @ unaligned
vadd.i32 q1, q12, q10
ldr r1, [r5, #4] @ unaligned
mov ip, lr
ldr r2, [r5, #8] @ unaligned
mov r9, lr
ldr r3, [r5, #12] @ unaligned
mov r10, r5
vadd.i32 q9, q13, q9
mov r8, lr
vadd.i32 q8, q14, q8
stmia ip!, {r0, r1, r2, r3}
mov ip, lr
vldr d20, [r6, #80]
vldr d21, [r6, #88]
vadd.i32 q3, q11, q2
veor q10, q10, q1
vadd.i32 q11, q11, q15
vstr d20, [r6, #80]
vstr d21, [r6, #88]
ldmia r9!, {r0, r1, r2, r3}
mov r9, r5
str r0, [r4] @ unaligned
str r1, [r4, #4] @ unaligned
str r2, [r4, #8] @ unaligned
str r3, [r4, #12] @ unaligned
ldr r0, [r10, #16]! @ unaligned
ldr r1, [r10, #4] @ unaligned
ldr r2, [r10, #8] @ unaligned
ldr r3, [r10, #12] @ unaligned
add r10, r4, #48
adds r4, r4, #64
stmia r8!, {r0, r1, r2, r3}
mov r8, lr
vldr d20, [r6, #80]
vldr d21, [r6, #88]
veor q10, q10, q9
vstr d20, [r6, #80]
vstr d21, [r6, #88]
ldmia ip!, {r0, r1, r2, r3}
mov ip, lr
str r0, [r4, #-48] @ unaligned
str r1, [r4, #-44] @ unaligned
str r2, [r4, #-40] @ unaligned
str r3, [r4, #-36] @ unaligned
ldr r0, [r9, #32]! @ unaligned
ldr r1, [r9, #4] @ unaligned
ldr r2, [r9, #8] @ unaligned
ldr r3, [r9, #12] @ unaligned
mov r9, r5
adds r5, r5, #64
stmia r8!, {r0, r1, r2, r3}
mov r8, lr
vldr d18, [r6, #80]
vldr d19, [r6, #88]
veor q9, q9, q8
vstr d18, [r6, #80]
vstr d19, [r6, #88]
ldmia ip!, {r0, r1, r2, r3}
mov ip, lr
str r0, [r4, #-32] @ unaligned
str r1, [r4, #-28] @ unaligned
str r2, [r4, #-24] @ unaligned
str r3, [r4, #-20] @ unaligned
ldr r0, [r9, #48]! @ unaligned
ldr r1, [r9, #4] @ unaligned
ldr r2, [r9, #8] @ unaligned
ldr r3, [r9, #12] @ unaligned
stmia r8!, {r0, r1, r2, r3}
vldr d16, [r6, #80]
vldr d17, [r6, #88]
veor q8, q8, q3
vstr d16, [r6, #80]
vstr d17, [r6, #88]
ldmia ip!, {r0, r1, r2, r3}
str r0, [r4, #-16] @ unaligned
str r1, [r4, #-12] @ unaligned
str r3, [r10, #12] @ unaligned
ldr r3, [r7, #108]
str r2, [r10, #8] @ unaligned
cmp r3, #1
beq .L88
movs r3, #1
str r3, [r7, #108]
b .L8
.L96:
.align 3
.L95:
.word 1
.word 0
.word 0
.word 0
.L88:
ldr fp, [r7, #104]
ldr r3, [r7, #12]
lsl fp, fp, #6
add r3, r3, fp
str r3, [r7, #12]
ldr r3, [r7, #16]
add r3, r3, fp
str r3, [r7, #16]
.L6:
ldr r3, [r7, #8]
ands r9, r3, #63
beq .L1
vmov q3, q11 @ v4si
movs r3, #10
vmov q8, q14 @ v4si
mov r5, r9
vmov q15, q13 @ v4si
vmov q10, q12 @ v4si
.L10:
vadd.i32 q10, q10, q15
subs r3, r3, #1
veor q9, q3, q10
vrev32.16 q9, q9
vadd.i32 q8, q8, q9
veor q15, q8, q15
vshl.i32 q3, q15, #12
vsri.32 q3, q15, #20
vadd.i32 q10, q10, q3
veor q15, q10, q9
vshl.i32 q9, q15, #8
vsri.32 q9, q15, #24
vadd.i32 q8, q8, q9
vext.32 q9, q9, q9, #3
veor q3, q8, q3
vext.32 q8, q8, q8, #2
vshl.i32 q15, q3, #7
vsri.32 q15, q3, #25
vext.32 q15, q15, q15, #1
vadd.i32 q10, q10, q15
veor q9, q10, q9
vrev32.16 q9, q9
vadd.i32 q8, q8, q9
veor q15, q8, q15
vshl.i32 q3, q15, #12
vsri.32 q3, q15, #20
vadd.i32 q10, q10, q3
vmov q15, q3 @ v4si
veor q9, q10, q9
vshl.i32 q3, q9, #8
vsri.32 q3, q9, #24
vadd.i32 q8, q8, q3
vext.32 q3, q3, q3, #1
veor q9, q8, q15
vext.32 q8, q8, q8, #2
vshl.i32 q15, q9, #7
vsri.32 q15, q9, #25
vext.32 q15, q15, q15, #3
bne .L10
cmp r5, #15
mov r9, r5
bhi .L89
vadd.i32 q12, q12, q10
ldr r3, [r7, #72]
vst1.64 {d24-d25}, [r3:128]
.L14:
ldr r3, [r7, #8]
and r2, r3, #48
cmp r9, r2
bls .L1
ldr r6, [r7, #16]
add r3, r2, #16
ldr r1, [r7, #12]
rsb ip, r2, r9
adds r0, r1, r2
mov r4, r6
add r1, r1, r3
add r4, r4, r2
add r3, r3, r6
cmp r0, r3
it cc
cmpcc r4, r1
ite cs
movcs r3, #1
movcc r3, #0
cmp ip, #18
ite ls
movls r3, #0
andhi r3, r3, #1
cmp r3, #0
beq .L16
and r1, r0, #7
mov r3, r2
negs r1, r1
and r1, r1, #15
cmp r1, ip
it cs
movcs r1, ip
cmp r1, #0
beq .L17
ldr r5, [r7, #72]
cmp r1, #1
ldrb r0, [r0] @ zero_extendqisi2
add r3, r2, #1
ldrb lr, [r5, r2] @ zero_extendqisi2
mov r6, r5
eor r0, lr, r0
strb r0, [r4]
beq .L17
ldr r0, [r7, #12]
cmp r1, #2
ldrb r4, [r5, r3] @ zero_extendqisi2
ldr r5, [r7, #16]
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #2
beq .L17
ldr r0, [r7, #12]
cmp r1, #3
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #3
beq .L17
ldr r0, [r7, #12]
cmp r1, #4
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #4
beq .L17
ldr r0, [r7, #12]
cmp r1, #5
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #5
beq .L17
ldr r0, [r7, #12]
cmp r1, #6
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #6
beq .L17
ldr r0, [r7, #12]
cmp r1, #7
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #7
beq .L17
ldr r0, [r7, #12]
cmp r1, #8
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #8
beq .L17
ldr r0, [r7, #12]
cmp r1, #9
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #9
beq .L17
ldr r0, [r7, #12]
cmp r1, #10
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #10
beq .L17
ldr r0, [r7, #12]
cmp r1, #11
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #11
beq .L17
ldr r0, [r7, #12]
cmp r1, #12
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #12
beq .L17
ldr r0, [r7, #12]
cmp r1, #13
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #13
beq .L17
ldr r0, [r7, #12]
cmp r1, #15
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #14
bne .L17
ldr r0, [r7, #12]
ldrb r4, [r6, r3] @ zero_extendqisi2
ldrb r0, [r0, r3] @ zero_extendqisi2
eors r0, r0, r4
strb r0, [r5, r3]
add r3, r2, #15
.L17:
rsb r4, r1, ip
add r0, ip, #-1
sub r6, r4, #16
subs r0, r0, r1
cmp r0, #14
lsr r6, r6, #4
add r6, r6, #1
lsl lr, r6, #4
bls .L19
add r2, r2, r1
ldr r1, [r7, #12]
ldr r5, [r7, #16]
cmp r6, #1
add r0, r1, r2
ldr r1, [r7, #72]
add r1, r1, r2
vld1.64 {d18-d19}, [r0:64]
add r2, r2, r5
vld1.8 {q8}, [r1]
veor q8, q8, q9
vst1.8 {q8}, [r2]
beq .L20
add r8, r1, #16
add ip, r2, #16
vldr d18, [r0, #16]
vldr d19, [r0, #24]
cmp r6, #2
vld1.8 {q8}, [r8]
veor q8, q8, q9
vst1.8 {q8}, [ip]
beq .L20
add r8, r1, #32
add ip, r2, #32
vldr d18, [r0, #32]
vldr d19, [r0, #40]
cmp r6, #3
vld1.8 {q8}, [r8]
veor q8, q8, q9
vst1.8 {q8}, [ip]
beq .L20
adds r1, r1, #48
adds r2, r2, #48
vldr d18, [r0, #48]
vldr d19, [r0, #56]
vld1.8 {q8}, [r1]
veor q8, q8, q9
vst1.8 {q8}, [r2]
.L20:
cmp lr, r4
add r3, r3, lr
beq .L1
.L19:
ldr r4, [r7, #72]
adds r2, r3, #1
ldr r1, [r7, #12]
cmp r2, r9
ldr r5, [r7, #16]
ldrb r0, [r4, r3] @ zero_extendqisi2
ldrb r1, [r1, r3] @ zero_extendqisi2
eor r1, r1, r0
strb r1, [r5, r3]
bcs .L1
ldr r0, [r7, #12]
adds r1, r3, #2
mov r6, r4
cmp r9, r1
ldrb r4, [r4, r2] @ zero_extendqisi2
ldrb r0, [r0, r2] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r2]
bls .L1
ldr r0, [r7, #12]
adds r2, r3, #3
ldrb r4, [r6, r1] @ zero_extendqisi2
cmp r9, r2
ldrb r0, [r0, r1] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r1]
bls .L1
ldr r0, [r7, #12]
adds r1, r3, #4
ldrb r4, [r6, r2] @ zero_extendqisi2
cmp r9, r1
ldrb r0, [r0, r2] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r2]
bls .L1
ldr r0, [r7, #12]
adds r2, r3, #5
ldrb r4, [r6, r1] @ zero_extendqisi2
cmp r9, r2
ldrb r0, [r0, r1] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r1]
bls .L1
ldr r0, [r7, #12]
adds r1, r3, #6
ldrb r4, [r6, r2] @ zero_extendqisi2
cmp r9, r1
ldrb r0, [r0, r2] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r2]
bls .L1
ldr r0, [r7, #12]
adds r2, r3, #7
ldrb r4, [r6, r1] @ zero_extendqisi2
cmp r9, r2
ldrb r0, [r0, r1] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r1]
bls .L1
ldr r0, [r7, #12]
add r1, r3, #8
ldrb r4, [r6, r2] @ zero_extendqisi2
cmp r9, r1
ldrb r0, [r0, r2] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r2]
bls .L1
ldr r0, [r7, #12]
add r2, r3, #9
ldrb r4, [r6, r1] @ zero_extendqisi2
cmp r9, r2
ldrb r0, [r0, r1] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r1]
bls .L1
ldr r0, [r7, #12]
add r1, r3, #10
ldrb r4, [r6, r2] @ zero_extendqisi2
cmp r9, r1
ldrb r0, [r0, r2] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r2]
bls .L1
ldr r0, [r7, #12]
add r2, r3, #11
ldrb r4, [r6, r1] @ zero_extendqisi2
cmp r9, r2
ldrb r0, [r0, r1] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r1]
bls .L1
ldr r0, [r7, #12]
add r1, r3, #12
ldrb r4, [r6, r2] @ zero_extendqisi2
cmp r9, r1
ldrb r0, [r0, r2] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r2]
bls .L1
ldr r0, [r7, #12]
add r2, r3, #13
ldrb r4, [r6, r1] @ zero_extendqisi2
cmp r9, r2
ldrb r0, [r0, r1] @ zero_extendqisi2
eor r0, r0, r4
strb r0, [r5, r1]
bls .L1
ldr r1, [r7, #12]
adds r3, r3, #14
ldrb r0, [r6, r2] @ zero_extendqisi2
cmp r9, r3
ldrb r1, [r1, r2] @ zero_extendqisi2
eor r1, r1, r0
strb r1, [r5, r2]
bls .L1
ldr r2, [r7, #72]
ldrb r1, [r2, r3] @ zero_extendqisi2
ldr r2, [r7, #12]
ldrb r2, [r2, r3] @ zero_extendqisi2
eors r2, r2, r1
ldr r1, [r7, #16]
strb r2, [r1, r3]
.L1:
adds r7, r7, #132
mov sp, r7
@ sp needed
vldm sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, fp, pc}
.L89:
ldr r5, [r7, #12]
vadd.i32 q12, q12, q10
ldr r4, [r7, #68]
cmp r9, #31
ldr r0, [r5] @ unaligned
ldr r1, [r5, #4] @ unaligned
mov r6, r4
ldr r2, [r5, #8] @ unaligned
ldr r3, [r5, #12] @ unaligned
stmia r6!, {r0, r1, r2, r3}
ldr r2, [r7, #72]
ldr r6, [r7, #16]
vldr d18, [r2, #80]
vldr d19, [r2, #88]
veor q9, q9, q12
vstr d18, [r2, #80]
vstr d19, [r2, #88]
ldmia r4!, {r0, r1, r2, r3}
str r1, [r6, #4] @ unaligned
mov r1, r6
str r0, [r6] @ unaligned
str r2, [r6, #8] @ unaligned
str r3, [r6, #12] @ unaligned
bhi .L90
vadd.i32 q13, q13, q15
ldr r3, [r7, #72]
vstr d26, [r3, #16]
vstr d27, [r3, #24]
b .L14
.L16:
subs r3, r2, #1
ldr r2, [r7, #12]
add r2, r2, r9
mov r5, r2
ldr r2, [r7, #72]
add r2, r2, r3
mov r3, r2
.L24:
ldrb r1, [r0], #1 @ zero_extendqisi2
ldrb r2, [r3, #1]! @ zero_extendqisi2
cmp r0, r5
eor r2, r2, r1
strb r2, [r4], #1
bne .L24
adds r7, r7, #132
mov sp, r7
@ sp needed
vldm sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, fp, pc}
.L26:
str fp, [r7, #16]
b .L2
.L90:
mov r3, r5
ldr r4, [r7, #68]
ldr r0, [r3, #16]! @ unaligned
add lr, r1, #16
mov r5, r1
vadd.i32 q13, q13, q15
mov r6, r4
cmp r9, #47
ldr r1, [r3, #4] @ unaligned
ldr r2, [r3, #8] @ unaligned
ldr r3, [r3, #12] @ unaligned
stmia r6!, {r0, r1, r2, r3}
ldr r2, [r7, #72]
vldr d18, [r2, #80]
vldr d19, [r2, #88]
veor q13, q9, q13
vstr d26, [r2, #80]
vstr d27, [r2, #88]
ldmia r4!, {r0, r1, r2, r3}
str r0, [r5, #16] @ unaligned
str r1, [lr, #4] @ unaligned
str r2, [lr, #8] @ unaligned
str r3, [lr, #12] @ unaligned
bhi .L91
vadd.i32 q8, q14, q8
ldr r3, [r7, #72]
vstr d16, [r3, #32]
vstr d17, [r3, #40]
b .L14
.L91:
ldr r3, [r7, #12]
add lr, r5, #32
ldr r4, [r7, #68]
vadd.i32 q8, q14, q8
ldr r5, [r7, #72]
vadd.i32 q11, q11, q3
ldr r0, [r3, #32]! @ unaligned
mov r6, r4
vstr d22, [r5, #48]
vstr d23, [r5, #56]
ldr r1, [r3, #4] @ unaligned
ldr r2, [r3, #8] @ unaligned
ldr r3, [r3, #12] @ unaligned
stmia r4!, {r0, r1, r2, r3}
vldr d18, [r5, #80]
vldr d19, [r5, #88]
veor q9, q9, q8
ldr r4, [r7, #16]
vstr d18, [r5, #80]
vstr d19, [r5, #88]
ldmia r6!, {r0, r1, r2, r3}
str r0, [r4, #32] @ unaligned
str r1, [lr, #4] @ unaligned
str r2, [lr, #8] @ unaligned
str r3, [lr, #12] @ unaligned
b .L14
.size CRYPTO_chacha_20_neon, .-CRYPTO_chacha_20_neon
.section .rodata
.align 2
.LANCHOR0 = . + 0
.LC0:
.word 1634760805
.word 857760878
.word 2036477234
.word 1797285236
.ident "GCC: (Linaro GCC 2014.11) 4.9.3 20141031 (prerelease)"
.section .note.GNU-stack,"",%progbits
#endif /* !OPENSSL_NO_ASM */