boringssl/crypto/chacha/chacha_vec_arm.S

891 lines
17 KiB
ArmAsm
Raw Normal View History

2014-06-20 20:00:00 +01:00
# Copyright (c) 2014, Google Inc.
#
# Permission to use, copy, modify, and/or distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
# copyright notice and this permission notice appear in all copies.
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
# This file contains a pre-compiled version of chacha_vec.c for ARM. This is
# needed to support switching on NEON code at runtime. If the whole of OpenSSL
# were to be compiled with the needed flags to build chacha_vec.c, then it
# wouldn't be possible to run on non-NEON systems.
#
# This file was generated by:
#
# /opt/gcc-linaro-arm-linux-gnueabihf-4.7-2012.10-20121022_linux/bin/arm-linux-gnueabihf-gcc -O3 -mcpu=cortex-a8 -mfpu=neon -S chacha_vec.c -I ../../include -fpic -o chacha_vec_arm.S
#if !defined(OPENSSL_NO_ASM)
2014-06-20 20:00:00 +01:00
.syntax unified
.cpu cortex-a8
.eabi_attribute 27, 3
# EABI attribute 28 sets whether VFP register arguments were used to build this
# file. If object files are inconsistent on this point, the linker will refuse
# to link them. Thus we report whatever the compiler expects since we don't use
# VFP arguments.
#if defined(__ARM_PCS_VFP)
.eabi_attribute 28, 1
#else
.eabi_attribute 28, 0
#endif
.fpu neon
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 2
.eabi_attribute 30, 2
.eabi_attribute 34, 1
.eabi_attribute 18, 4
.thumb
.file "chacha_vec.c"
.text
.align 2
.global CRYPTO_chacha_20_neon
.hidden CRYPTO_chacha_20_neon
2014-06-20 20:00:00 +01:00
.thumb
.thumb_func
.type CRYPTO_chacha_20_neon, %function
CRYPTO_chacha_20_neon:
@ args = 8, pretend = 0, frame = 304
@ frame_needed = 1, uses_anonymous_args = 0
@ link register save eliminated.
push {r4, r5, r6, r7, r8, r9, sl, fp}
fstmfdd sp!, {d8, d9, d10, d11, d12, d13, d14, d15}
sub sp, sp, #304
add r7, sp, #0
movw ip, #43691
movt ip, 43690
str r2, [r7, #196]
sub sp, sp, #96
ldr r4, [r7, #196]
ldr r6, [r7, #400]
ldr r2, .L38+16
umull r4, ip, ip, r4
ldr r6, [r6, #0]
ldr r8, [r7, #400]
.LPIC24:
add r2, pc
add r4, sp, #15
str r3, [r7, #244]
str r6, [r7, #176]
bic r4, r4, #15
str r0, [r7, #188]
str r4, [r7, #200]
lsrs ip, ip, #7
str r1, [r7, #184]
ldmia r2, {r0, r1, r2, r3}
ldr r4, [r8, #4]
ldr r5, [r7, #244]
vld1.64 {d24-d25}, [r5:64]
vldr d26, [r5, #16]
vldr d27, [r5, #24]
ldr r9, [r7, #200]
ldr r8, [r7, #404]
ldr r5, [r7, #176]
add r6, r9, #64
str r4, [r7, #300]
mov r4, #0
str r8, [r7, #288]
str r5, [r7, #296]
str r4, [r7, #292]
stmia r6, {r0, r1, r2, r3}
vldr d22, [r9, #64]
vldr d23, [r9, #72]
vldr d20, [r7, #288]
vldr d21, [r7, #296]
str ip, [r7, #192]
beq .L20
lsl r6, ip, #1
ldr r1, [r9, #68]
add r3, r6, ip
str r6, [r7, #180]
ldr r2, [r9, #72]
add r8, r8, #2
ldr r5, [r9, #76]
vldr d18, .L38
vldr d19, .L38+8
str r4, [r7, #240]
ldr r6, [r7, #184]
ldr r4, [r7, #188]
str r0, [r7, #224]
str r1, [r7, #220]
str r8, [r7, #208]
str r2, [r7, #216]
str r3, [r7, #204]
str r5, [r7, #212]
str r6, [r7, #252]
str r4, [r7, #248]
.L4:
ldr r2, [r7, #244]
add r9, r7, #216
ldr r3, [r7, #244]
vadd.i32 q8, q10, q9
ldr r6, [r7, #208]
vmov q15, q13 @ v4si
ldr r5, [r7, #240]
vmov q3, q12 @ v4si
ldr r4, [r7, #244]
vmov q2, q11 @ v4si
adds r5, r5, r6
ldr r2, [r2, #8]
ldr r6, [r7, #400]
vmov q5, q10 @ v4si
ldr r3, [r3, #12]
vmov q1, q13 @ v4si
ldr r0, [r7, #244]
vmov q0, q12 @ v4si
ldr r1, [r7, #244]
vmov q4, q11 @ v4si
ldmia r9, {r9, sl, fp}
str r5, [r7, #228]
ldr r5, [r4, #24]
ldr r0, [r0, #0]
ldr r1, [r1, #4]
str r2, [r7, #264]
str r3, [r7, #236]
ldr r2, [r6, #4]
ldr r3, [r4, #28]
str r5, [r7, #280]
ldr r5, [r6, #0]
movs r6, #0
ldr ip, [r7, #228]
ldr r8, [r7, #212]
str r0, [r7, #232]
str r1, [r7, #268]
ldr r0, [r4, #16]
ldr r1, [r4, #20]
movs r4, #10
str r2, [r7, #24]
str r3, [r7, #284]
str r4, [r7, #256]
ldr r2, [r7, #264]
str r9, [r7, #276]
mov r9, r6
ldr r6, [r7, #280]
str r8, [r7, #260]
mov r8, sl
str r1, [r7, #272]
mov sl, ip
str r6, [r7, #264]
mov r6, r5
ldr r3, [r7, #236]
mov r5, r0
ldr ip, [r7, #24]
ldr r1, [r7, #268]
ldr r0, [r7, #232]
b .L39
.L40:
.align 3
.L38:
.word 1
.word 0
.word 0
.word 0
.word .LANCHOR0-(.LPIC24+4)
.L39:
.L3:
vadd.i32 q4, q4, q0
add r8, r8, r1
vadd.i32 q2, q2, q3
str r8, [r7, #268]
veor q5, q5, q4
ldr r8, [r7, #276]
veor q8, q8, q2
add fp, fp, r0
str fp, [r7, #280]
add r8, r8, r2
vrev32.16 q5, q5
str r8, [r7, #276]
vrev32.16 q8, q8
vadd.i32 q1, q1, q5
vadd.i32 q15, q15, q8
ldr r8, [r7, #280]
veor q0, q1, q0
ldr r4, [r7, #260]
veor q3, q15, q3
eor sl, sl, r8
ldr r8, [r7, #276]
add fp, r4, r3
vshl.i32 q7, q0, #12
ldr r4, [r7, #268]
vshl.i32 q6, q3, #12
eor r6, r6, r8
eor r9, r9, r4
ldr r4, [r7, #272]
vsri.32 q7, q0, #20
ror r8, r6, #16
ldr r6, [r7, #264]
eor ip, ip, fp
vsri.32 q6, q3, #20
ror sl, sl, #16
ror r9, r9, #16
add r5, r5, sl
vadd.i32 q4, q4, q7
str r5, [r7, #236]
vadd.i32 q2, q2, q6
add r5, r4, r9
add r4, r6, r8
ldr r6, [r7, #284]
ror ip, ip, #16
veor q5, q4, q5
veor q8, q2, q8
add r6, r6, ip
str r6, [r7, #264]
eors r1, r1, r5
ldr r6, [r7, #236]
vshl.i32 q3, q5, #8
vshl.i32 q14, q8, #8
eors r2, r2, r4
eors r0, r0, r6
ldr r6, [r7, #264]
vsri.32 q3, q5, #24
ror r1, r1, #20
eors r3, r3, r6
ldr r6, [r7, #280]
ror r0, r0, #20
vsri.32 q14, q8, #24
adds r6, r0, r6
str r6, [r7, #284]
ldr r6, [r7, #268]
vadd.i32 q1, q1, q3
vadd.i32 q15, q15, q14
ror r2, r2, #20
adds r6, r1, r6
str r6, [r7, #260]
ldr r6, [r7, #276]
veor q6, q15, q6
veor q7, q1, q7
ror r3, r3, #20
adds r6, r2, r6
str r6, [r7, #280]
ldr r6, [r7, #284]
vshl.i32 q0, q6, #7
vshl.i32 q5, q7, #7
add fp, r3, fp
eor sl, r6, sl
ldr r6, [r7, #260]
eor ip, fp, ip
vsri.32 q0, q6, #25
eor r9, r6, r9
ldr r6, [r7, #280]
ror sl, sl, #24
vsri.32 q5, q7, #25
eor r8, r6, r8
ldr r6, [r7, #236]
ror r9, r9, #24
ror ip, ip, #24
add r6, sl, r6
str r6, [r7, #276]
ldr r6, [r7, #264]
add r5, r9, r5
str r5, [r7, #272]
vext.32 q5, q5, q5, #1
add r5, ip, r6
ldr r6, [r7, #276]
vext.32 q0, q0, q0, #1
vadd.i32 q4, q4, q5
eors r0, r0, r6
ldr r6, [r7, #272]
vadd.i32 q2, q2, q0
vext.32 q3, q3, q3, #3
ror r8, r8, #24
eors r1, r1, r6
vext.32 q14, q14, q14, #3
add r4, r8, r4
ldr r6, [r7, #284]
veor q3, q4, q3
veor q14, q2, q14
eors r2, r2, r4
ror r1, r1, #25
vext.32 q1, q1, q1, #2
adds r6, r1, r6
str r6, [r7, #284]
vext.32 q15, q15, q15, #2
ldr r6, [r7, #260]
eors r3, r3, r5
ror r2, r2, #25
vrev32.16 q8, q14
adds r6, r2, r6
vrev32.16 q3, q3
str r6, [r7, #268]
vadd.i32 q1, q1, q3
ldr r6, [r7, #280]
vadd.i32 q15, q15, q8
ror r3, r3, #25
veor q5, q1, q5
adds r6, r3, r6
veor q0, q15, q0
str r6, [r7, #264]
ldr r6, [r7, #268]
ror r0, r0, #25
add fp, r0, fp
vshl.i32 q6, q5, #12
eor sl, r6, sl
ldr r6, [r7, #284]
vshl.i32 q14, q0, #12
eor r8, fp, r8
eor ip, r6, ip
ldr r6, [r7, #264]
vsri.32 q6, q5, #20
ror sl, sl, #16
eor r9, r6, r9
ror r6, r8, #16
vsri.32 q14, q0, #20
ldr r8, [r7, #272]
ror ip, ip, #16
add r5, sl, r5
add r8, r6, r8
add r4, ip, r4
str r4, [r7, #236]
eor r0, r8, r0
str r5, [r7, #280]
vadd.i32 q4, q4, q6
ldr r5, [r7, #236]
vadd.i32 q2, q2, q14
ldr r4, [r7, #276]
ror r0, r0, #20
veor q3, q4, q3
eors r1, r1, r5
veor q0, q2, q8
str r8, [r7, #272]
str r0, [r7, #24]
add fp, r0, fp
ldr r8, [r7, #280]
ror r9, r9, #16
ldr r0, [r7, #284]
add r4, r9, r4
str fp, [r7, #260]
ror r1, r1, #20
add fp, r1, r0
eor r2, r8, r2
ldr r0, [r7, #260]
eors r3, r3, r4
vshl.i32 q5, q3, #8
str r4, [r7, #232]
vshl.i32 q8, q0, #8
ldr r4, [r7, #268]
ldr r5, [r7, #264]
ror r2, r2, #20
ror r3, r3, #20
eors r6, r6, r0
adds r5, r3, r5
add r8, r2, r4
vsri.32 q5, q3, #24
ldr r4, [r7, #272]
eor r9, r5, r9
eor ip, fp, ip
vsri.32 q8, q0, #24
eor sl, r8, sl
ror r6, r6, #24
ldr r0, [r7, #280]
str r5, [r7, #276]
adds r4, r6, r4
ldr r5, [r7, #236]
vadd.i32 q1, q1, q5
str r4, [r7, #272]
vadd.i32 q15, q15, q8
ldr r4, [r7, #232]
ror ip, ip, #24
ror sl, sl, #24
ror r9, r9, #24
add r5, ip, r5
add r0, sl, r0
str r5, [r7, #264]
add r5, r9, r4
str r0, [r7, #284]
veor q6, q1, q6
ldr r4, [r7, #24]
veor q14, q15, q14
ldr r0, [r7, #272]
eors r3, r3, r5
vshl.i32 q0, q6, #7
vext.32 q1, q1, q1, #2
eors r0, r0, r4
ldr r4, [r7, #284]
str r0, [r7, #280]
vshl.i32 q3, q14, #7
eors r2, r2, r4
ldr r4, [r7, #280]
ldr r0, [r7, #264]
vsri.32 q0, q6, #25
ror r2, r2, #25
ror r3, r3, #25
eors r1, r1, r0
vsri.32 q3, q14, #25
ror r0, r4, #25
ldr r4, [r7, #256]
ror r1, r1, #25
vext.32 q5, q5, q5, #1
subs r4, r4, #1
str r4, [r7, #256]
vext.32 q15, q15, q15, #2
vext.32 q8, q8, q8, #1
vext.32 q0, q0, q0, #3
vext.32 q3, q3, q3, #3
bne .L3
ldr r4, [r7, #264]
vadd.i32 q14, q10, q9
str r2, [r7, #264]
vadd.i32 q10, q10, q5
ldr r2, [r7, #252]
vld1.64 {d12-d13}, [r2:64]
ldr r2, [r7, #220]
vadd.i32 q4, q11, q4
str ip, [r7, #24]
mov ip, sl
mov sl, r8
ldr r8, [r7, #260]
add sl, sl, r2
ldr r2, [r7, #212]
str r4, [r7, #280]
vadd.i32 q0, q12, q0
ldr r4, [r7, #224]
add r8, r8, r2
ldr r2, [r7, #240]
vadd.i32 q1, q13, q1
str r0, [r7, #232]
add fp, fp, r4
mov r0, r5
ldr r4, [r7, #216]
mov r5, r6
mov r6, r9
ldr r9, [r7, #276]
adds r2, r2, #3
str r2, [r7, #240]
vadd.i32 q2, q11, q2
ldr r2, [r7, #252]
add r9, r9, r4
vadd.i32 q3, q12, q3
ldr r4, [r7, #228]
vadd.i32 q15, q13, q15
str r1, [r7, #268]
vadd.i32 q8, q14, q8
str r3, [r7, #236]
veor q4, q4, q6
ldr r3, [r7, #284]
ldr r1, [r7, #272]
add ip, r4, ip
ldr r4, [r7, #248]
vst1.64 {d8-d9}, [r4:64]
vldr d8, [r2, #16]
vldr d9, [r2, #24]
veor q0, q0, q4
vstr d0, [r4, #16]
vstr d1, [r4, #24]
vldr d0, [r2, #32]
vldr d1, [r2, #40]
veor q1, q1, q0
vstr d2, [r4, #32]
vstr d3, [r4, #40]
vldr d2, [r2, #48]
vldr d3, [r2, #56]
veor q10, q10, q1
vstr d20, [r4, #48]
vstr d21, [r4, #56]
vldr d8, [r2, #64]
vldr d9, [r2, #72]
veor q2, q2, q4
vstr d4, [r4, #64]
vstr d5, [r4, #72]
vldr d10, [r2, #80]
vldr d11, [r2, #88]
veor q3, q3, q5
vstr d6, [r4, #80]
vstr d7, [r4, #88]
vldr d12, [r2, #96]
vldr d13, [r2, #104]
veor q15, q15, q6
vstr d30, [r4, #96]
vstr d31, [r4, #104]
vldr d20, [r2, #112]
vldr d21, [r2, #120]
veor q8, q8, q10
vstr d16, [r4, #112]
vstr d17, [r4, #120]
ldr r4, [r2, #128]
ldr r2, [r7, #248]
vadd.i32 q10, q14, q9
eor r4, fp, r4
vadd.i32 q10, q10, q9
str r4, [r2, #128]
ldr r4, [r7, #252]
ldr r2, [r4, #132]
eor r2, sl, r2
ldr sl, [r7, #248]
str r2, [sl, #132]
ldr r2, [r4, #136]
eor r2, r9, r2
str r2, [sl, #136]
ldr r2, [r4, #140]
eor r2, r8, r2
str r2, [sl, #140]
ldr r2, [r7, #244]
ldr r4, [r4, #144]
ldr r2, [r2, #0]
str r4, [r7, #44]
ldr r4, [r7, #232]
add r8, r4, r2
ldr r2, [r7, #44]
ldr r4, [r7, #244]
eor r8, r8, r2
ldr r2, [r7, #252]
str r8, [sl, #144]
ldr r4, [r4, #4]
ldr r2, [r2, #148]
str r2, [r7, #40]
ldr r2, [r7, #268]
add r8, r2, r4
ldr r4, [r7, #40]
ldr r2, [r7, #244]
eor r8, r8, r4
ldr r4, [r7, #252]
str r8, [sl, #148]
ldr r2, [r2, #8]
ldr r4, [r4, #152]
str r4, [r7, #36]
ldr r4, [r7, #264]
add r8, r4, r2
ldr r2, [r7, #36]
eor r8, r8, r2
str r8, [sl, #152]
ldr r2, [r7, #252]
ldr r4, [r7, #244]
ldr r2, [r2, #156]
ldr r4, [r4, #12]
str r2, [r7, #32]
ldr r2, [r7, #236]
add r8, r2, r4
ldr r4, [r7, #32]
ldr r2, [r7, #252]
eor r8, r8, r4
str r8, [sl, #156]
ldr r8, [r7, #244]
ldr r2, [r2, #160]
ldr r4, [r8, #16]
adds r0, r0, r4
ldr r4, [r7, #252]
eors r0, r0, r2
str r0, [sl, #160]
ldr r0, [r8, #20]
ldr r2, [r4, #164]
adds r1, r1, r0
ldr r0, [r7, #280]
eors r1, r1, r2
str r1, [sl, #164]
ldr r2, [r8, #24]
ldr r1, [r4, #168]
adds r2, r0, r2
eors r2, r2, r1
str r2, [sl, #168]
ldr r1, [r8, #28]
ldr r2, [r4, #172]
adds r3, r3, r1
eors r3, r3, r2
str r3, [sl, #172]
ldr r3, [r4, #176]
eor r3, ip, r3
str r3, [sl, #176]
ldr r3, [r4, #180]
ldr r4, [r7, #400]
eors r6, r6, r3
str r6, [sl, #180]
ldr r6, [r7, #252]
ldr r2, [r4, #0]
ldr r3, [r6, #184]
adds r5, r5, r2
eors r5, r5, r3
str r5, [sl, #184]
ldr r2, [r6, #188]
adds r6, r6, #192
ldr r3, [r4, #4]
str r6, [r7, #252]
ldr r0, [r7, #24]
ldr r1, [r7, #240]
adds r4, r0, r3
eors r4, r4, r2
ldr r2, [r7, #204]
str r4, [sl, #188]
add sl, sl, #192
cmp r1, r2
str sl, [r7, #248]
bne .L4
ldr r4, [r7, #192]
ldr r3, [r7, #180]
ldr r6, [r7, #188]
adds r5, r3, r4
ldr r8, [r7, #184]
lsls r5, r5, #6
adds r4, r6, r5
add r5, r8, r5
.L2:
ldr r9, [r7, #196]
movw r3, #43691
movt r3, 43690
ldr sl, [r7, #196]
umull r9, r3, r3, r9
lsrs r3, r3, #7
add r3, r3, r3, lsl #1
sub r3, sl, r3, lsl #6
lsrs r6, r3, #6
beq .L5
add r1, r5, #16
add r2, r4, #16
mov r0, r6
vldr d30, .L41
vldr d31, .L41+8
.L6:
vmov q8, q10 @ v4si
movs r3, #10
vmov q1, q13 @ v4si
vmov q14, q12 @ v4si
vmov q3, q11 @ v4si
.L7:
vadd.i32 q3, q3, q14
subs r3, r3, #1
veor q2, q8, q3
vrev32.16 q2, q2
vadd.i32 q8, q1, q2
veor q9, q8, q14
vshl.i32 q14, q9, #12
vsri.32 q14, q9, #20
vadd.i32 q3, q3, q14
veor q2, q3, q2
vshl.i32 q9, q2, #8
vsri.32 q9, q2, #24
vadd.i32 q8, q8, q9
vext.32 q9, q9, q9, #3
veor q14, q8, q14
vext.32 q1, q8, q8, #2
vshl.i32 q8, q14, #7
vsri.32 q8, q14, #25
vext.32 q8, q8, q8, #1
vadd.i32 q3, q3, q8
veor q2, q3, q9
vrev32.16 q2, q2
vadd.i32 q9, q1, q2
veor q8, q9, q8
vshl.i32 q14, q8, #12
vsri.32 q14, q8, #20
vadd.i32 q3, q3, q14
veor q2, q3, q2
vshl.i32 q8, q2, #8
vsri.32 q8, q2, #24
vadd.i32 q9, q9, q8
vext.32 q8, q8, q8, #1
veor q14, q9, q14
vext.32 q1, q9, q9, #2
vshl.i32 q9, q14, #7
vsri.32 q9, q14, #25
vext.32 q14, q9, q9, #3
bne .L7
vadd.i32 q8, q10, q8
subs r0, r0, #1
vadd.i32 q3, q11, q3
vldr d0, [r1, #-16]
vldr d1, [r1, #-8]
vadd.i32 q14, q12, q14
vadd.i32 q1, q13, q1
veor q3, q3, q0
vstr d6, [r2, #-16]
vstr d7, [r2, #-8]
vadd.i32 q10, q10, q15
vld1.64 {d8-d9}, [r1:64]
veor q14, q14, q4
vst1.64 {d28-d29}, [r2:64]
vldr d10, [r1, #16]
vldr d11, [r1, #24]
veor q1, q1, q5
vstr d2, [r2, #16]
vstr d3, [r2, #24]
vldr d18, [r1, #32]
vldr d19, [r1, #40]
add r1, r1, #64
veor q8, q8, q9
vstr d16, [r2, #32]
vstr d17, [r2, #40]
add r2, r2, #64
bne .L6
lsls r6, r6, #6
adds r4, r4, r6
adds r5, r5, r6
.L5:
ldr r6, [r7, #196]
ands ip, r6, #63
beq .L1
vmov q8, q10 @ v4si
movs r3, #10
vmov q14, q13 @ v4si
vmov q9, q12 @ v4si
vmov q15, q11 @ v4si
.L10:
vadd.i32 q15, q15, q9
subs r3, r3, #1
veor q8, q8, q15
vrev32.16 q8, q8
vadd.i32 q3, q14, q8
veor q9, q3, q9
vshl.i32 q14, q9, #12
vsri.32 q14, q9, #20
vadd.i32 q15, q15, q14
veor q9, q15, q8
vshl.i32 q8, q9, #8
vsri.32 q8, q9, #24
vadd.i32 q9, q3, q8
vext.32 q8, q8, q8, #3
veor q2, q9, q14
vext.32 q14, q9, q9, #2
vshl.i32 q9, q2, #7
vsri.32 q9, q2, #25
vext.32 q9, q9, q9, #1
vadd.i32 q15, q15, q9
veor q3, q15, q8
vrev32.16 q3, q3
vadd.i32 q14, q14, q3
veor q8, q14, q9
vshl.i32 q9, q8, #12
vsri.32 q9, q8, #20
vadd.i32 q15, q15, q9
veor q3, q15, q3
vshl.i32 q8, q3, #8
vsri.32 q8, q3, #24
vadd.i32 q14, q14, q8
vext.32 q8, q8, q8, #1
veor q3, q14, q9
vext.32 q14, q14, q14, #2
vshl.i32 q9, q3, #7
vsri.32 q9, q3, #25
vext.32 q9, q9, q9, #3
bne .L10
cmp ip, #15
vadd.i32 q11, q11, q15
bhi .L37
ldr r9, [r7, #200]
vst1.64 {d22-d23}, [r9:128]
.L14:
ldr sl, [r7, #196]
and r3, sl, #48
cmp ip, r3
bls .L1
adds r0, r5, r3
adds r1, r4, r3
add r2, r0, #16
add r6, r1, #16
cmp r1, r2
it cc
cmpcc r0, r6
rsb r9, r3, ip
ite cc
movcc r2, #0
movcs r2, #1
cmp r9, #15
ite ls
movls r2, #0
andhi r2, r2, #1
lsr r8, r9, #4
eor r2, r2, #1
cmp r8, #0
it eq
orreq r2, r2, #1
lsl sl, r8, #4
cbnz r2, .L35
ldr fp, [r7, #200]
add r6, fp, r3
.L17:
vld1.8 {q8}, [r0]!
adds r2, r2, #1
cmp r8, r2
vld1.8 {q9}, [r6]!
veor q8, q9, q8
vst1.8 {q8}, [r1]!
bhi .L17
cmp r9, sl
add r3, r3, sl
beq .L1
.L35:
ldr r0, [r7, #200]
.L25:
ldrb r2, [r5, r3] @ zero_extendqisi2
ldrb r1, [r3, r0] @ zero_extendqisi2
eors r2, r2, r1
strb r2, [r4, r3]
adds r3, r3, #1
cmp ip, r3
bhi .L25
.L1:
add r7, r7, #304
mov sp, r7
fldmfdd sp!, {d8, d9, d10, d11, d12, d13, d14, d15}
pop {r4, r5, r6, r7, r8, r9, sl, fp}
bx lr
.L37:
cmp ip, #31
vld1.64 {d0-d1}, [r5:64]
vadd.i32 q9, q12, q9
veor q11, q11, q0
vst1.64 {d22-d23}, [r4:64]
bls .L12
cmp ip, #47
vldr d2, [r5, #16]
vldr d3, [r5, #24]
vadd.i32 q13, q13, q14
veor q9, q9, q1
vstr d18, [r4, #16]
vstr d19, [r4, #24]
bls .L13
vadd.i32 q8, q8, q10
vldr d0, [r5, #32]
vldr d1, [r5, #40]
ldr r6, [r7, #200]
vstr d16, [r6, #48]
vstr d17, [r6, #56]
veor q8, q13, q0
vstr d16, [r4, #32]
vstr d17, [r4, #40]
b .L14
.L12:
ldr r8, [r7, #200]
vstr d18, [r8, #16]
vstr d19, [r8, #24]
b .L14
.L20:
ldr r5, [r7, #184]
ldr r4, [r7, #188]
b .L2
.L13:
ldr r6, [r7, #200]
vstr d26, [r6, #32]
vstr d27, [r6, #40]
b .L14
.L42:
.align 3
.L41:
.word 1
.word 0
.word 0
.word 0
.size CRYPTO_chacha_20_neon, .-CRYPTO_chacha_20_neon
.section .rodata
.align 3
.LANCHOR0 = . + 0
.LC0:
.word 1634760805
.word 857760878
.word 2036477234
.word 1797285236
.ident "GCC: (crosstool-NG linaro-1.13.1-4.7-2012.10-20121022 - Linaro GCC 2012.10) 4.7.3 20121001 (prerelease)"
.section .note.GNU-stack,"",%progbits
#endif /* !OPENSSL_NO_ASM */