Browse Source

Don't assume alignment of ChaCha key on ARM.

When addressing [1], I checked the AEAD code but brain-farted: a key is
aligned in that code, but it's the Poly1305 key, which doesn't matter
here.

It would be nice to align the ChaCha key too, but Android doesn't have
|posix_memalign| in the versions that we care about. It does have
|memalign|, but that's documented as "obsolete" and we don't have a
concept of an Android OS yet and I don't want to add one just for this.

So this change uses the buffer for loading the key again.

(Note that we never used to check for alignment of the |key| before
calling this. We must have gotten it for free somehow when checking the
alignment of |in| and |out|. But there are clearly some paths that don't
have an aligned key:
https://code.google.com/p/chromium/issues/detail?id=454308.)

At least the generation script started paying off immediately ☺.

[1] https://boringssl-review.googlesource.com/#/c/3132/1/crypto/chacha/chacha_vec.c@185

Change-Id: I4f893ba0733440fddd453f9636cc2aeaf05076ed
Reviewed-on: https://boringssl-review.googlesource.com/3270
Reviewed-by: Adam Langley <agl@google.com>
kris/onging/CECPQ3_patch15
Adam Langley 9 years ago
parent
commit
868c7ef1f4
2 changed files with 83 additions and 87 deletions
  1. +3
    -16
      crypto/chacha/chacha_vec.c
  2. +80
    -71
      crypto/chacha/chacha_vec_arm.S

+ 3
- 16
crypto/chacha/chacha_vec.c View File

@@ -40,6 +40,7 @@ typedef unsigned vec __attribute__((vector_size(16)));
* This implementation supports parallel processing of multiple blocks,
* including potentially using general-purpose registers. */
#if __ARM_NEON__
#include <string.h>
#include <arm_neon.h>
#define GPR_TOO 1
#define VBPI 2
@@ -162,29 +163,15 @@ void CRYPTO_chacha_20(
uint8_t alignment_buffer[16] __attribute__((aligned(16)));
#endif
vec s0, s1, s2, s3;
#if !defined(__ARM_NEON__) && !defined(__SSE2__)
__attribute__ ((aligned (16))) unsigned key[8], nonce[4];
#endif
__attribute__ ((aligned (16))) unsigned chacha_const[] =
{0x61707865,0x3320646E,0x79622D32,0x6B206574};
#if defined(__ARM_NEON__) || defined(__SSE2__)
kp = (unsigned *)key;
#else
((vec *)key)[0] = REVV_BE(((vec *)key)[0]);
((vec *)key)[1] = REVV_BE(((vec *)key)[1]);
nonce[0] = REVW_BE(((unsigned *)nonce)[0]);
nonce[1] = REVW_BE(((unsigned *)nonce)[1]);
nonce[2] = REVW_BE(((unsigned *)nonce)[2]);
nonce[3] = REVW_BE(((unsigned *)nonce)[3]);
kp = (unsigned *)key;
np = (unsigned *)nonce;
#endif
#if defined(__ARM_NEON__)
np = (unsigned*) nonce;
#endif
s0 = LOAD_ALIGNED(chacha_const);
s1 = LOAD_ALIGNED(&((vec*)kp)[0]);
s2 = LOAD_ALIGNED(&((vec*)kp)[1]);
s1 = LOAD(&((vec*)kp)[0]);
s2 = LOAD(&((vec*)kp)[1]);
s3 = (vec){
counter & 0xffffffff,
#if __ARM_NEON__ || defined(OPENSSL_X86)


+ 80
- 71
crypto/chacha/chacha_vec_arm.S View File

@@ -62,74 +62,88 @@ CRYPTO_chacha_20_neon:
@ args = 8, pretend = 0, frame = 128
@ frame_needed = 1, uses_anonymous_args = 0
push {r4, r5, r6, r7, r8, r9, r10, fp, lr}
mov r4, r2
mov ip, r3
vpush.64 {d8, d9, d10, d11, d12, d13, d14, d15}
mov r9, r2
ldr r4, .L92+16
mov fp, r0
mov r10, r1
mov lr, ip
.LPIC16:
add r4, pc
movw r8, #43691
movt r8, 43690
mov ip, r3
umull r8, r9, r4, r8
sub sp, sp, #132
add r7, sp, #0
sub sp, sp, #112
mov fp, r0
mov r10, r1
movt r8, 43690
str r0, [r7, #60]
str r1, [r7, #12]
str r2, [r7, #8]
ldmia r4, {r0, r1, r2, r3}
add r4, sp, #15
ldr r2, .L92+16
bic r4, r4, #15
ldr r5, [r7, #232]
add lr, r4, #64
.LPIC16:
add r2, pc
str r0, [r7, #60]
str r1, [r7, #12]
str r3, [r7, #44]
ldmia r2, {r0, r1, r2, r3}
ldr r6, [r5]
str r4, [r7, #72]
ldr r5, [r5, #4]
ldr r4, [r7, #236]
str r6, [r7, #120]
str r5, [r7, #124]
str r4, [r7, #112]
stmia lr, {r0, r1, r2, r3}
movs r3, #0
ldr r0, [r7, #72]
str r3, [r7, #116]
lsrs r3, r9, #7
str ip, [r7, #44]
umull r8, r9, r9, r8
mov r6, r4
adds r4, r4, #64
add r5, r6, #80
str r5, [r7, #68]
stmia r4, {r0, r1, r2, r3}
movs r4, #0
ldr r0, [ip] @ unaligned
ldr r1, [ip, #4] @ unaligned
ldr r2, [ip, #8] @ unaligned
ldr r3, [ip, #12] @ unaligned
vldr d24, [r6, #64]
vldr d25, [r6, #72]
str r4, [r7, #116]
mov r4, r5
stmia r5!, {r0, r1, r2, r3}
ldr r0, [lr, #16]! @ unaligned
ldr r3, [r7, #232]
str r6, [r7, #72]
ldr r2, [lr, #8] @ unaligned
ldr ip, [r3]
ldr r6, [r7, #236]
ldr r1, [lr, #4] @ unaligned
ldr r3, [lr, #12] @ unaligned
ldr r5, [r7, #72]
vldr d26, [r5, #80]
vldr d27, [r5, #88]
str ip, [r7, #120]
stmia r4!, {r0, r1, r2, r3}
lsrs r2, r9, #7
ldr r3, [r7, #232]
str r6, [r7, #112]
vldr d28, [r5, #80]
vldr d29, [r5, #88]
ldr r3, [r3, #4]
str r3, [r7, #124]
vldr d22, [r7, #112]
vldr d23, [r7, #120]
vldr d24, [r0, #64]
vldr d25, [r0, #72]
vld1.64 {d26-d27}, [ip:64]
vldr d28, [ip, #16]
vldr d29, [ip, #24]
beq .L26
ldr r1, [r0, #64]
lsls r2, r3, #8
sub r3, r2, r3, lsl #6
str r3, [r7, #4]
ldr r2, [r0, #72]
str r1, [r7, #40]
mov r1, r3
ldr r3, [r0, #68]
lsls r1, r2, #8
ldr r4, [r5, #64]
sub r2, r1, r2, lsl #6
str r2, [r7, #4]
vldr d0, .L92
vldr d1, .L92+8
str r2, [r7, #32]
adds r2, r4, #2
str r3, [r7, #36]
ldr r3, [r0, #76]
str r2, [r7, #48]
mov r2, r0
mov r0, fp
mov r1, r2
ldr r2, [r5, #68]
str r4, [r7, #40]
ldr r4, [r5, #72]
str r2, [r7, #36]
ldr r2, [r5, #76]
str r4, [r7, #32]
adds r4, r6, #2
str r10, [r7, #64]
str r3, [r7, #28]
adds r3, r0, r1
mov r1, r6
str r3, [r7, #16]
add r3, r2, #80
mov r2, r5
str r3, [r7, #68]
str r2, [r7, #28]
adds r2, r0, r1
mov r1, ip
str r2, [r7, #16]
mov r2, r3
str r4, [r7, #48]
.L4:
ldr r0, [r7, #44]
add r8, r7, #28
@@ -749,14 +763,12 @@ CRYPTO_chacha_20_neon:
rsb fp, fp, r1
lsrs fp, fp, #6
beq .L6
ldr r6, [r7, #72]
ldr r5, [r7, #12]
ldr r4, [r7, #16]
mov r3, r6
adds r3, r3, #80
ldr r6, [r7, #72]
ldr lr, [r7, #68]
vldr d30, .L95
vldr d31, .L95+8
mov lr, r3
str fp, [r7, #104]
str fp, [r7, #108]
.L8:
@@ -1299,18 +1311,15 @@ CRYPTO_chacha_20_neon:
vldm sp!, {d8-d15}
pop {r4, r5, r6, r7, r8, r9, r10, fp, pc}
.L89:
ldr r4, [r7, #12]
ldr r5, [r7, #12]
vadd.i32 q12, q12, q10
ldr r5, [r7, #72]
ldr r4, [r7, #68]
cmp r9, #31
ldr r0, [r4] @ unaligned
add r6, r5, #80
ldr r1, [r4, #4] @ unaligned
ldr r2, [r4, #8] @ unaligned
mov r5, r6
ldr r3, [r4, #12] @ unaligned
mov r4, r6
str r6, [r7, #68]
ldr r0, [r5] @ unaligned
ldr r1, [r5, #4] @ unaligned
mov r6, r4
ldr r2, [r5, #8] @ unaligned
ldr r3, [r5, #12] @ unaligned
stmia r6!, {r0, r1, r2, r3}
ldr r2, [r7, #72]
ldr r6, [r7, #16]
@@ -1355,13 +1364,13 @@ CRYPTO_chacha_20_neon:
str fp, [r7, #16]
b .L2
.L90:
ldr r3, [r7, #12]
mov r3, r5
ldr r4, [r7, #68]
ldr r0, [r3, #16]! @ unaligned
add lr, r1, #16
mov r4, r5
mov r6, r5
mov r5, r1
vadd.i32 q13, q13, q15
ldr r0, [r3, #16]! @ unaligned
mov r6, r4
cmp r9, #47
ldr r1, [r3, #4] @ unaligned
ldr r2, [r3, #8] @ unaligned


Loading…
Cancel
Save