As some of ARM processors, more specifically Cortex-Mx series, are Thumb2-only, we need to support Thumb2-only builds even in assembly. (Imported from upstream's 11208dcfb9105e8afa37233185decefd45e89e17.) Change-Id: I7cb48ce6a842cf3cfdf553f6e6e6227d52d525c0 Reviewed-on: https://boringssl-review.googlesource.com/17108 Reviewed-by: Adam Langley <agl@google.com>kris/onging/CECPQ3_patch15
@@ -70,15 +70,11 @@ $code=<<___; | |||||
#endif | #endif | ||||
.text | .text | ||||
#if __ARM_ARCH__<7 | |||||
.code 32 | |||||
#else | |||||
#if defined(__thumb2__) && !defined(__APPLE__) | |||||
.syntax unified | .syntax unified | ||||
# if defined(__thumb2__) && !defined(__APPLE__) | |||||
.thumb | .thumb | ||||
# else | |||||
#else | |||||
.code 32 | .code 32 | ||||
# endif | |||||
#endif | #endif | ||||
.type AES_Te,%object | .type AES_Te,%object | ||||
@@ -193,7 +189,7 @@ AES_Te: | |||||
.type asm_AES_encrypt,%function | .type asm_AES_encrypt,%function | ||||
.align 5 | .align 5 | ||||
asm_AES_encrypt: | asm_AES_encrypt: | ||||
#if __ARM_ARCH__<7 | |||||
#ifndef __thumb2__ | |||||
sub r3,pc,#8 @ asm_AES_encrypt | sub r3,pc,#8 @ asm_AES_encrypt | ||||
#else | #else | ||||
adr r3,asm_AES_encrypt | adr r3,asm_AES_encrypt | ||||
@@ -443,19 +439,19 @@ _armv4_AES_encrypt: | |||||
.align 5 | .align 5 | ||||
asm_AES_set_encrypt_key: | asm_AES_set_encrypt_key: | ||||
_armv4_AES_set_encrypt_key: | _armv4_AES_set_encrypt_key: | ||||
#if __ARM_ARCH__<7 | |||||
#ifndef __thumb2__ | |||||
sub r3,pc,#8 @ asm_AES_set_encrypt_key | sub r3,pc,#8 @ asm_AES_set_encrypt_key | ||||
#else | #else | ||||
adr r3,asm_AES_set_encrypt_key | adr r3,asm_AES_set_encrypt_key | ||||
#endif | #endif | ||||
teq r0,#0 | teq r0,#0 | ||||
#if __ARM_ARCH__>=7 | |||||
#ifdef __thumb2__ | |||||
itt eq @ Thumb2 thing, sanity check in ARM | itt eq @ Thumb2 thing, sanity check in ARM | ||||
#endif | #endif | ||||
moveq r0,#-1 | moveq r0,#-1 | ||||
beq .Labrt | beq .Labrt | ||||
teq r2,#0 | teq r2,#0 | ||||
#if __ARM_ARCH__>=7 | |||||
#ifdef __thumb2__ | |||||
itt eq @ Thumb2 thing, sanity check in ARM | itt eq @ Thumb2 thing, sanity check in ARM | ||||
#endif | #endif | ||||
moveq r0,#-1 | moveq r0,#-1 | ||||
@@ -466,7 +462,7 @@ _armv4_AES_set_encrypt_key: | |||||
teq r1,#192 | teq r1,#192 | ||||
beq .Lok | beq .Lok | ||||
teq r1,#256 | teq r1,#256 | ||||
#if __ARM_ARCH__>=7 | |||||
#ifdef __thumb2__ | |||||
itt ne @ Thumb2 thing, sanity check in ARM | itt ne @ Thumb2 thing, sanity check in ARM | ||||
#endif | #endif | ||||
movne r0,#-1 | movne r0,#-1 | ||||
@@ -627,7 +623,7 @@ _armv4_AES_set_encrypt_key: | |||||
str $s2,[$key,#-16] | str $s2,[$key,#-16] | ||||
subs $rounds,$rounds,#1 | subs $rounds,$rounds,#1 | ||||
str $s3,[$key,#-12] | str $s3,[$key,#-12] | ||||
#if __ARM_ARCH__>=7 | |||||
#ifdef __thumb2__ | |||||
itt eq @ Thumb2 thing, sanity check in ARM | itt eq @ Thumb2 thing, sanity check in ARM | ||||
#endif | #endif | ||||
subeq r2,$key,#216 | subeq r2,$key,#216 | ||||
@@ -699,7 +695,7 @@ _armv4_AES_set_encrypt_key: | |||||
str $s2,[$key,#-24] | str $s2,[$key,#-24] | ||||
subs $rounds,$rounds,#1 | subs $rounds,$rounds,#1 | ||||
str $s3,[$key,#-20] | str $s3,[$key,#-20] | ||||
#if __ARM_ARCH__>=7 | |||||
#ifdef __thumb2__ | |||||
itt eq @ Thumb2 thing, sanity check in ARM | itt eq @ Thumb2 thing, sanity check in ARM | ||||
#endif | #endif | ||||
subeq r2,$key,#256 | subeq r2,$key,#256 | ||||
@@ -969,7 +965,7 @@ AES_Td: | |||||
.type asm_AES_decrypt,%function | .type asm_AES_decrypt,%function | ||||
.align 5 | .align 5 | ||||
asm_AES_decrypt: | asm_AES_decrypt: | ||||
#if __ARM_ARCH__<7 | |||||
#ifndef __thumb2__ | |||||
sub r3,pc,#8 @ asm_AES_decrypt | sub r3,pc,#8 @ asm_AES_decrypt | ||||
#else | #else | ||||
adr r3,asm_AES_decrypt | adr r3,asm_AES_decrypt | ||||
@@ -82,7 +82,12 @@ $code=<<___; | |||||
#include <openssl/arm_arch.h> | #include <openssl/arm_arch.h> | ||||
.text | .text | ||||
#if defined(__thumb2__) && !defined(__APPLE__) | |||||
.syntax unified | |||||
.thumb | |||||
#else | |||||
.code 32 | .code 32 | ||||
#endif | |||||
#if __ARM_MAX_ARCH__>=7 | #if __ARM_MAX_ARCH__>=7 | ||||
.align 5 | .align 5 | ||||
@@ -101,7 +106,7 @@ bn_mul_mont: | |||||
#if __ARM_MAX_ARCH__>=7 | #if __ARM_MAX_ARCH__>=7 | ||||
tst ip,#7 | tst ip,#7 | ||||
bne .Lialu | bne .Lialu | ||||
adr r0,bn_mul_mont | |||||
adr r0,.Lbn_mul_mont | |||||
ldr r2,.LOPENSSL_armcap | ldr r2,.LOPENSSL_armcap | ||||
ldr r0,[r0,r2] | ldr r0,[r0,r2] | ||||
#ifdef __APPLE__ | #ifdef __APPLE__ | ||||
@@ -117,6 +122,9 @@ bn_mul_mont: | |||||
#endif | #endif | ||||
cmp ip,#2 | cmp ip,#2 | ||||
mov $num,ip @ load num | mov $num,ip @ load num | ||||
#ifdef __thumb2__ | |||||
ittt lt | |||||
#endif | |||||
movlt r0,#0 | movlt r0,#0 | ||||
addlt sp,sp,#2*4 | addlt sp,sp,#2*4 | ||||
blt .Labrt | blt .Labrt | ||||
@@ -164,10 +172,11 @@ bn_mul_mont: | |||||
ldr $n0,[$_n0] @ restore n0 | ldr $n0,[$_n0] @ restore n0 | ||||
adc $nhi,$nhi,#0 | adc $nhi,$nhi,#0 | ||||
str $nlo,[$num] @ tp[num-1]= | str $nlo,[$num] @ tp[num-1]= | ||||
mov $tj,sp | |||||
str $nhi,[$num,#4] @ tp[num]= | str $nhi,[$num,#4] @ tp[num]= | ||||
.Louter: | .Louter: | ||||
sub $tj,$num,sp @ "original" $num-1 value | |||||
sub $tj,$num,$tj @ "original" $num-1 value | |||||
sub $ap,$ap,$tj @ "rewind" ap to &ap[1] | sub $ap,$ap,$tj @ "rewind" ap to &ap[1] | ||||
ldr $bi,[$tp,#4]! @ *(++bp) | ldr $bi,[$tp,#4]! @ *(++bp) | ||||
sub $np,$np,$tj @ "rewind" np to &np[1] | sub $np,$np,$tj @ "rewind" np to &np[1] | ||||
@@ -212,11 +221,16 @@ bn_mul_mont: | |||||
str $nhi,[$num,#4] @ tp[num]= | str $nhi,[$num,#4] @ tp[num]= | ||||
cmp $tp,$tj | cmp $tp,$tj | ||||
#ifdef __thumb2__ | |||||
itt ne | |||||
#endif | |||||
movne $tj,sp | |||||
bne .Louter | bne .Louter | ||||
ldr $rp,[$_rp] @ pull rp | ldr $rp,[$_rp] @ pull rp | ||||
mov $aj,sp | |||||
add $num,$num,#4 @ $num to point at &tp[num] | add $num,$num,#4 @ $num to point at &tp[num] | ||||
sub $aj,$num,sp @ "original" num value | |||||
sub $aj,$num,$aj @ "original" num value | |||||
mov $tp,sp @ "rewind" $tp | mov $tp,sp @ "rewind" $tp | ||||
mov $ap,$tp @ "borrow" $ap | mov $ap,$tp @ "borrow" $ap | ||||
sub $np,$np,$aj @ "rewind" $np to &np[0] | sub $np,$np,$aj @ "rewind" $np to &np[0] | ||||
@@ -242,7 +256,8 @@ bn_mul_mont: | |||||
cmp $tp,$num | cmp $tp,$num | ||||
bne .Lcopy | bne .Lcopy | ||||
add sp,$num,#4 @ skip over tp[num+1] | |||||
mov sp,$num | |||||
add sp,sp,#4 @ skip over tp[num+1] | |||||
ldmia sp!,{r4-r12,lr} @ restore registers | ldmia sp!,{r4-r12,lr} @ restore registers | ||||
add sp,sp,#2*4 @ skip over {r0,r2} | add sp,sp,#2*4 @ skip over {r0,r2} | ||||
mov r0,#1 | mov r0,#1 | ||||
@@ -283,6 +298,7 @@ bn_mul8x_mont_neon: | |||||
stmdb sp!,{r4-r11} | stmdb sp!,{r4-r11} | ||||
vstmdb sp!,{d8-d15} @ ABI specification says so | vstmdb sp!,{d8-d15} @ ABI specification says so | ||||
ldmia ip,{r4-r5} @ load rest of parameter block | ldmia ip,{r4-r5} @ load rest of parameter block | ||||
mov ip,sp | |||||
sub $toutptr,sp,#16 | sub $toutptr,sp,#16 | ||||
vld1.32 {${Bi}[0]}, [$bptr,:32]! | vld1.32 {${Bi}[0]}, [$bptr,:32]! | ||||
@@ -638,8 +654,9 @@ bn_mul8x_mont_neon: | |||||
bne .LNEON_sub | bne .LNEON_sub | ||||
ldr r10, [$aptr] @ load top-most bit | ldr r10, [$aptr] @ load top-most bit | ||||
mov r11,sp | |||||
veor q0,q0,q0 | veor q0,q0,q0 | ||||
sub r11,$bptr,sp @ this is num*4 | |||||
sub r11,$bptr,r11 @ this is num*4 | |||||
veor q1,q1,q1 | veor q1,q1,q1 | ||||
mov $aptr,sp | mov $aptr,sp | ||||
sub $rptr,$rptr,r11 @ rewind $rptr | sub $rptr,$rptr,r11 @ rewind $rptr | ||||
@@ -649,27 +666,33 @@ bn_mul8x_mont_neon: | |||||
.LNEON_copy_n_zap: | .LNEON_copy_n_zap: | ||||
ldmia $aptr!, {r4-r7} | ldmia $aptr!, {r4-r7} | ||||
ldmia $rptr, {r8-r11} | ldmia $rptr, {r8-r11} | ||||
it cc | |||||
movcc r8, r4 | movcc r8, r4 | ||||
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe | vst1.64 {q0-q1}, [$nptr,:256]! @ wipe | ||||
itt cc | |||||
movcc r9, r5 | movcc r9, r5 | ||||
movcc r10,r6 | movcc r10,r6 | ||||
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe | vst1.64 {q0-q1}, [$nptr,:256]! @ wipe | ||||
it cc | |||||
movcc r11,r7 | movcc r11,r7 | ||||
ldmia $aptr, {r4-r7} | ldmia $aptr, {r4-r7} | ||||
stmia $rptr!, {r8-r11} | stmia $rptr!, {r8-r11} | ||||
sub $aptr,$aptr,#16 | sub $aptr,$aptr,#16 | ||||
ldmia $rptr, {r8-r11} | ldmia $rptr, {r8-r11} | ||||
it cc | |||||
movcc r8, r4 | movcc r8, r4 | ||||
vst1.64 {q0-q1}, [$aptr,:256]! @ wipe | vst1.64 {q0-q1}, [$aptr,:256]! @ wipe | ||||
itt cc | |||||
movcc r9, r5 | movcc r9, r5 | ||||
movcc r10,r6 | movcc r10,r6 | ||||
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe | vst1.64 {q0-q1}, [$nptr,:256]! @ wipe | ||||
it cc | |||||
movcc r11,r7 | movcc r11,r7 | ||||
teq $aptr,$bptr @ preserves carry | teq $aptr,$bptr @ preserves carry | ||||
stmia $rptr!, {r8-r11} | stmia $rptr!, {r8-r11} | ||||
bne .LNEON_copy_n_zap | bne .LNEON_copy_n_zap | ||||
sub sp,ip,#96 | |||||
mov sp,ip | |||||
vldmia sp!,{d8-d15} | vldmia sp!,{d8-d15} | ||||
ldmia sp!,{r4-r11} | ldmia sp!,{r4-r11} | ||||
ret @ bx lr | ret @ bx lr | ||||
@@ -136,7 +136,12 @@ $code=<<___; | |||||
#include <openssl/arm_arch.h> | #include <openssl/arm_arch.h> | ||||
.text | .text | ||||
#if defined(__thumb2__) && !defined(__APPLE__) | |||||
.syntax unified | |||||
.thumb | |||||
#else | |||||
.code 32 | .code 32 | ||||
#endif | |||||
#ifdef __clang__ | #ifdef __clang__ | ||||
#define ldrplb ldrbpl | #define ldrplb ldrbpl | ||||
@@ -154,19 +159,27 @@ rem_4bit: | |||||
.type rem_4bit_get,%function | .type rem_4bit_get,%function | ||||
rem_4bit_get: | rem_4bit_get: | ||||
sub $rem_4bit,pc,#8 | |||||
sub $rem_4bit,$rem_4bit,#32 @ &rem_4bit | |||||
#if defined(__thumb2__) | |||||
adr $rem_4bit,rem_4bit | |||||
#else | |||||
sub $rem_4bit,pc,#8+32 @ &rem_4bit | |||||
#endif | |||||
b .Lrem_4bit_got | b .Lrem_4bit_got | ||||
nop | nop | ||||
nop | |||||
.size rem_4bit_get,.-rem_4bit_get | .size rem_4bit_get,.-rem_4bit_get | ||||
.global gcm_ghash_4bit | .global gcm_ghash_4bit | ||||
.type gcm_ghash_4bit,%function | .type gcm_ghash_4bit,%function | ||||
.align 4 | |||||
gcm_ghash_4bit: | gcm_ghash_4bit: | ||||
sub r12,pc,#8 | |||||
#if defined(__thumb2__) | |||||
adr r12,rem_4bit | |||||
#else | |||||
sub r12,pc,#8+48 @ &rem_4bit | |||||
#endif | |||||
add $len,$inp,$len @ $len to point at the end | add $len,$inp,$len @ $len to point at the end | ||||
stmdb sp!,{r3-r11,lr} @ save $len/end too | stmdb sp!,{r3-r11,lr} @ save $len/end too | ||||
sub r12,r12,#48 @ &rem_4bit | |||||
ldmia r12,{r4-r11} @ copy rem_4bit ... | ldmia r12,{r4-r11} @ copy rem_4bit ... | ||||
stmdb sp!,{r4-r11} @ ... to stack | stmdb sp!,{r4-r11} @ ... to stack | ||||
@@ -213,6 +226,9 @@ gcm_ghash_4bit: | |||||
eor $Zlh,$Zlh,$Zhl,lsl#28 | eor $Zlh,$Zlh,$Zhl,lsl#28 | ||||
ldrh $Tll,[sp,$nlo] @ rem_4bit[rem] | ldrh $Tll,[sp,$nlo] @ rem_4bit[rem] | ||||
eor $Zhl,$Thl,$Zhl,lsr#4 | eor $Zhl,$Thl,$Zhl,lsr#4 | ||||
#ifdef __thumb2__ | |||||
it pl | |||||
#endif | |||||
ldrplb $nlo,[$inp,$cnt] | ldrplb $nlo,[$inp,$cnt] | ||||
eor $Zhl,$Zhl,$Zhh,lsl#28 | eor $Zhl,$Zhl,$Zhh,lsl#28 | ||||
eor $Zhh,$Thh,$Zhh,lsr#4 | eor $Zhh,$Thh,$Zhh,lsr#4 | ||||
@@ -223,6 +239,9 @@ gcm_ghash_4bit: | |||||
add $nhi,$nhi,$nhi | add $nhi,$nhi,$nhi | ||||
ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi] | ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi] | ||||
eor $Zll,$Tll,$Zll,lsr#4 | eor $Zll,$Tll,$Zll,lsr#4 | ||||
#ifdef __thumb2__ | |||||
it pl | |||||
#endif | |||||
ldrplb $Tll,[$Xi,$cnt] | ldrplb $Tll,[$Xi,$cnt] | ||||
eor $Zll,$Zll,$Zlh,lsl#28 | eor $Zll,$Zll,$Zlh,lsl#28 | ||||
eor $Zlh,$Tlh,$Zlh,lsr#4 | eor $Zlh,$Tlh,$Zlh,lsr#4 | ||||
@@ -230,8 +249,14 @@ gcm_ghash_4bit: | |||||
eor $Zlh,$Zlh,$Zhl,lsl#28 | eor $Zlh,$Zlh,$Zhl,lsl#28 | ||||
eor $Zhl,$Thl,$Zhl,lsr#4 | eor $Zhl,$Thl,$Zhl,lsr#4 | ||||
eor $Zhl,$Zhl,$Zhh,lsl#28 | eor $Zhl,$Zhl,$Zhh,lsl#28 | ||||
#ifdef __thumb2__ | |||||
it pl | |||||
#endif | |||||
eorpl $nlo,$nlo,$Tll | eorpl $nlo,$nlo,$Tll | ||||
eor $Zhh,$Thh,$Zhh,lsr#4 | eor $Zhh,$Thh,$Zhh,lsr#4 | ||||
#ifdef __thumb2__ | |||||
itt pl | |||||
#endif | |||||
andpl $nhi,$nlo,#0xf0 | andpl $nhi,$nlo,#0xf0 | ||||
andpl $nlo,$nlo,#0x0f | andpl $nlo,$nlo,#0x0f | ||||
eor $Zhh,$Zhh,$Tlh,lsl#16 @ ^= rem_4bit[rem] | eor $Zhh,$Zhh,$Tlh,lsl#16 @ ^= rem_4bit[rem] | ||||
@@ -241,7 +266,11 @@ gcm_ghash_4bit: | |||||
add $inp,$inp,#16 | add $inp,$inp,#16 | ||||
mov $nhi,$Zll | mov $nhi,$Zll | ||||
___ | ___ | ||||
&Zsmash("cmp\t$inp,$len","ldrneb\t$nlo,[$inp,#15]"); | |||||
&Zsmash("cmp\t$inp,$len","\n". | |||||
"#ifdef __thumb2__\n". | |||||
" it ne\n". | |||||
"#endif\n". | |||||
" ldrneb $nlo,[$inp,#15]"); | |||||
$code.=<<___; | $code.=<<___; | ||||
bne .Louter | bne .Louter | ||||
@@ -299,6 +328,9 @@ gcm_gmult_4bit: | |||||
eor $Zlh,$Zlh,$Zhl,lsl#28 | eor $Zlh,$Zlh,$Zhl,lsl#28 | ||||
ldrh $Tll,[$rem_4bit,$nlo] @ rem_4bit[rem] | ldrh $Tll,[$rem_4bit,$nlo] @ rem_4bit[rem] | ||||
eor $Zhl,$Thl,$Zhl,lsr#4 | eor $Zhl,$Thl,$Zhl,lsr#4 | ||||
#ifdef __thumb2__ | |||||
it pl | |||||
#endif | |||||
ldrplb $nlo,[$Xi,$cnt] | ldrplb $nlo,[$Xi,$cnt] | ||||
eor $Zhl,$Zhl,$Zhh,lsl#28 | eor $Zhl,$Zhl,$Zhh,lsl#28 | ||||
eor $Zhh,$Thh,$Zhh,lsr#4 | eor $Zhh,$Thh,$Zhh,lsr#4 | ||||
@@ -316,6 +348,9 @@ gcm_gmult_4bit: | |||||
eor $Zhl,$Thl,$Zhl,lsr#4 | eor $Zhl,$Thl,$Zhl,lsr#4 | ||||
eor $Zhl,$Zhl,$Zhh,lsl#28 | eor $Zhl,$Zhl,$Zhh,lsl#28 | ||||
eor $Zhh,$Thh,$Zhh,lsr#4 | eor $Zhh,$Thh,$Zhh,lsr#4 | ||||
#ifdef __thumb2__ | |||||
itt pl | |||||
#endif | |||||
andpl $nhi,$nlo,#0xf0 | andpl $nhi,$nlo,#0xf0 | ||||
andpl $nlo,$nlo,#0x0f | andpl $nlo,$nlo,#0x0f | ||||
eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem] | eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem] | ||||
@@ -181,7 +181,12 @@ $code=<<___; | |||||
#include <openssl/arm_arch.h> | #include <openssl/arm_arch.h> | ||||
.text | .text | ||||
#if defined(__thumb2__) && !defined(__APPLE__) | |||||
.syntax unified | |||||
.thumb | |||||
#else | |||||
.code 32 | .code 32 | ||||
#endif | |||||
.global sha1_block_data_order | .global sha1_block_data_order | ||||
.type sha1_block_data_order,%function | .type sha1_block_data_order,%function | ||||
@@ -189,7 +194,8 @@ $code=<<___; | |||||
.align 5 | .align 5 | ||||
sha1_block_data_order: | sha1_block_data_order: | ||||
#if __ARM_MAX_ARCH__>=7 | #if __ARM_MAX_ARCH__>=7 | ||||
sub r3,pc,#8 @ sha1_block_data_order | |||||
.Lsha1_block: | |||||
adr r3,.Lsha1_block | |||||
ldr r12,.LOPENSSL_armcap | ldr r12,.LOPENSSL_armcap | ||||
ldr r12,[r3,r12] @ OPENSSL_armcap_P | ldr r12,[r3,r12] @ OPENSSL_armcap_P | ||||
#ifdef __APPLE__ | #ifdef __APPLE__ | ||||
@@ -216,7 +222,12 @@ for($i=0;$i<5;$i++) { | |||||
&BODY_00_15(@V); unshift(@V,pop(@V)); | &BODY_00_15(@V); unshift(@V,pop(@V)); | ||||
} | } | ||||
$code.=<<___; | $code.=<<___; | ||||
#if defined(__thumb2__) && !defined(__APPLE__) | |||||
mov $t3,sp | |||||
teq $Xi,$t3 | |||||
#else | |||||
teq $Xi,sp | teq $Xi,sp | ||||
#endif | |||||
bne .L_00_15 @ [((11+4)*5+2)*3] | bne .L_00_15 @ [((11+4)*5+2)*3] | ||||
sub sp,sp,#25*4 | sub sp,sp,#25*4 | ||||
___ | ___ | ||||
@@ -235,7 +246,12 @@ for($i=0;$i<5;$i++) { | |||||
&BODY_20_39(@V); unshift(@V,pop(@V)); | &BODY_20_39(@V); unshift(@V,pop(@V)); | ||||
} | } | ||||
$code.=<<___; | $code.=<<___; | ||||
#if defined(__thumb2__) && !defined(__APPLE__) | |||||
mov $t3,sp | |||||
teq $Xi,$t3 | |||||
#else | |||||
teq $Xi,sp @ preserve carry | teq $Xi,sp @ preserve carry | ||||
#endif | |||||
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] | bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4] | ||||
bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes | bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes | ||||
@@ -247,7 +263,12 @@ for($i=0;$i<5;$i++) { | |||||
&BODY_40_59(@V); unshift(@V,pop(@V)); | &BODY_40_59(@V); unshift(@V,pop(@V)); | ||||
} | } | ||||
$code.=<<___; | $code.=<<___; | ||||
#if defined(__thumb2__) && !defined(__APPLE__) | |||||
mov $t3,sp | |||||
teq $Xi,$t3 | |||||
#else | |||||
teq $Xi,sp | teq $Xi,sp | ||||
#endif | |||||
bne .L_40_59 @ [+((12+5)*5+2)*4] | bne .L_40_59 @ [+((12+5)*5+2)*4] | ||||
ldr $K,.LK_60_79 | ldr $K,.LK_60_79 | ||||
@@ -283,7 +304,7 @@ $code.=<<___; | |||||
.LK_60_79: .word 0xca62c1d6 | .LK_60_79: .word 0xca62c1d6 | ||||
#if __ARM_MAX_ARCH__>=7 | #if __ARM_MAX_ARCH__>=7 | ||||
.LOPENSSL_armcap: | .LOPENSSL_armcap: | ||||
.word OPENSSL_armcap_P-sha1_block_data_order | |||||
.word OPENSSL_armcap_P-.Lsha1_block | |||||
#endif | #endif | ||||
.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>" | .asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>" | ||||
.align 5 | .align 5 | ||||
@@ -458,6 +479,7 @@ sub Xuplast_80 () | |||||
&teq ($inp,$len); | &teq ($inp,$len); | ||||
&sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX | &sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX | ||||
&it ("eq"); | |||||
&subeq ($inp,$inp,64); # reload last block to avoid SEGV | &subeq ($inp,$inp,64); # reload last block to avoid SEGV | ||||
&vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!"); | &vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!"); | ||||
eval(shift(@insns)); | eval(shift(@insns)); | ||||
@@ -508,12 +530,12 @@ sha1_block_data_order_neon: | |||||
@ dmb @ errata #451034 on early Cortex A8 | @ dmb @ errata #451034 on early Cortex A8 | ||||
@ vstmdb sp!,{d8-d15} @ ABI specification says so | @ vstmdb sp!,{d8-d15} @ ABI specification says so | ||||
mov $saved_sp,sp | mov $saved_sp,sp | ||||
sub sp,sp,#64 @ alloca | |||||
sub $Xfer,sp,#64 | |||||
adr $K_XX_XX,.LK_00_19 | adr $K_XX_XX,.LK_00_19 | ||||
bic sp,sp,#15 @ align for 128-bit stores | |||||
bic $Xfer,$Xfer,#15 @ align for 128-bit stores | |||||
ldmia $ctx,{$a,$b,$c,$d,$e} @ load context | ldmia $ctx,{$a,$b,$c,$d,$e} @ load context | ||||
mov $Xfer,sp | |||||
mov sp,$Xfer @ alloca | |||||
vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned | vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned | ||||
veor $zero,$zero,$zero | veor $zero,$zero,$zero | ||||
@@ -560,10 +582,13 @@ $code.=<<___; | |||||
add $b,$b,$t0 | add $b,$b,$t0 | ||||
add $c,$c,$t1 | add $c,$c,$t1 | ||||
add $d,$d,$Xfer | add $d,$d,$Xfer | ||||
it eq | |||||
moveq sp,$saved_sp | moveq sp,$saved_sp | ||||
add $e,$e,$Ki | add $e,$e,$Ki | ||||
it ne | |||||
ldrne $Ki,[sp] | ldrne $Ki,[sp] | ||||
stmia $ctx,{$a,$b,$c,$d,$e} | stmia $ctx,{$a,$b,$c,$d,$e} | ||||
itt ne | |||||
addne $Xfer,sp,#3*16 | addne $Xfer,sp,#3*16 | ||||
bne .Loop_neon | bne .Loop_neon | ||||
@@ -584,6 +609,13 @@ my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14)); | |||||
$code.=<<___; | $code.=<<___; | ||||
#if __ARM_MAX_ARCH__>=7 | #if __ARM_MAX_ARCH__>=7 | ||||
# if defined(__thumb2__) && !defined(__APPLE__) | |||||
# define INST(a,b,c,d) .byte c,d|0xf,a,b | |||||
# else | |||||
# define INST(a,b,c,d) .byte a,b,c,d|0x10 | |||||
# endif | |||||
.type sha1_block_data_order_armv8,%function | .type sha1_block_data_order_armv8,%function | ||||
.align 5 | .align 5 | ||||
sha1_block_data_order_armv8: | sha1_block_data_order_armv8: | ||||
@@ -678,7 +710,10 @@ ___ | |||||
# since ARMv7 instructions are always encoded little-endian. | # since ARMv7 instructions are always encoded little-endian. | ||||
# correct solution is to use .inst directive, but older | # correct solution is to use .inst directive, but older | ||||
# assemblers don't implement it:-( | # assemblers don't implement it:-( | ||||
sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", | |||||
# this fix-up provides Thumb encoding in conjunction with INST | |||||
$word &= ~0x10000000 if (($word & 0x0f000000) == 0x02000000); | |||||
sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s", | |||||
$word&0xff,($word>>8)&0xff, | $word&0xff,($word>>8)&0xff, | ||||
($word>>16)&0xff,($word>>24)&0xff, | ($word>>16)&0xff,($word>>24)&0xff, | ||||
$mnemonic,$arg; | $mnemonic,$arg; | ||||
@@ -212,16 +212,12 @@ $code=<<___; | |||||
#endif | #endif | ||||
.text | .text | ||||
#if __ARM_ARCH__<7 || defined(__APPLE__) | |||||
.code 32 | |||||
#else | |||||
#if defined(__thumb2__) && !defined(__APPLE__) | |||||
.syntax unified | .syntax unified | ||||
# ifdef __thumb2__ | |||||
# define adrl adr | |||||
.thumb | .thumb | ||||
# else | |||||
.code 32 | |||||
# endif | |||||
# define adrl adr | |||||
#else | |||||
.code 32 | |||||
#endif | #endif | ||||
.type K512,%object | .type K512,%object | ||||
@@ -280,10 +276,10 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) | |||||
.type sha512_block_data_order,%function | .type sha512_block_data_order,%function | ||||
sha512_block_data_order: | sha512_block_data_order: | ||||
.Lsha512_block_data_order: | .Lsha512_block_data_order: | ||||
#if __ARM_ARCH__<7 | |||||
#if __ARM_ARCH__<7 && !defined(__thumb2__) | |||||
sub r3,pc,#8 @ sha512_block_data_order | sub r3,pc,#8 @ sha512_block_data_order | ||||
#else | #else | ||||
adr r3,sha512_block_data_order | |||||
adr r3,.Lsha512_block_data_order | |||||
#endif | #endif | ||||
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) | ||||
ldr r12,.LOPENSSL_armcap | ldr r12,.LOPENSSL_armcap | ||||