Explorar el Código

ARMv4 assembly pack: implement support for Thumb2.

As some of ARM processors, more specifically Cortex-Mx series, are
Thumb2-only, we need to support Thumb2-only builds even in assembly.

(Imported from upstream's 11208dcfb9105e8afa37233185decefd45e89e17.)

Change-Id: I7cb48ce6a842cf3cfdf553f6e6e6227d52d525c0
Reviewed-on: https://boringssl-review.googlesource.com/17108
Reviewed-by: Adam Langley <agl@google.com>
kris/onging/CECPQ3_patch15
David Benjamin hace 7 años
padre
commit
ae96383af3
Se han modificado 5 ficheros con 126 adiciones y 41 borrados
  1. +10
    -14
      crypto/fipsmodule/aes/asm/aes-armv4.pl
  2. +29
    -6
      crypto/fipsmodule/bn/asm/armv4-mont.pl
  3. +40
    -5
      crypto/fipsmodule/modes/asm/ghash-armv4.pl
  4. +41
    -6
      crypto/fipsmodule/sha/asm/sha1-armv4-large.pl
  5. +6
    -10
      crypto/fipsmodule/sha/asm/sha512-armv4.pl

+ 10
- 14
crypto/fipsmodule/aes/asm/aes-armv4.pl Ver fichero

@@ -70,15 +70,11 @@ $code=<<___;
#endif

.text
#if __ARM_ARCH__<7
.code 32
#else
#if defined(__thumb2__) && !defined(__APPLE__)
.syntax unified
# if defined(__thumb2__) && !defined(__APPLE__)
.thumb
# else
#else
.code 32
# endif
#endif

.type AES_Te,%object
@@ -193,7 +189,7 @@ AES_Te:
.type asm_AES_encrypt,%function
.align 5
asm_AES_encrypt:
#if __ARM_ARCH__<7
#ifndef __thumb2__
sub r3,pc,#8 @ asm_AES_encrypt
#else
adr r3,asm_AES_encrypt
@@ -443,19 +439,19 @@ _armv4_AES_encrypt:
.align 5
asm_AES_set_encrypt_key:
_armv4_AES_set_encrypt_key:
#if __ARM_ARCH__<7
#ifndef __thumb2__
sub r3,pc,#8 @ asm_AES_set_encrypt_key
#else
adr r3,asm_AES_set_encrypt_key
#endif
teq r0,#0
#if __ARM_ARCH__>=7
#ifdef __thumb2__
itt eq @ Thumb2 thing, sanity check in ARM
#endif
moveq r0,#-1
beq .Labrt
teq r2,#0
#if __ARM_ARCH__>=7
#ifdef __thumb2__
itt eq @ Thumb2 thing, sanity check in ARM
#endif
moveq r0,#-1
@@ -466,7 +462,7 @@ _armv4_AES_set_encrypt_key:
teq r1,#192
beq .Lok
teq r1,#256
#if __ARM_ARCH__>=7
#ifdef __thumb2__
itt ne @ Thumb2 thing, sanity check in ARM
#endif
movne r0,#-1
@@ -627,7 +623,7 @@ _armv4_AES_set_encrypt_key:
str $s2,[$key,#-16]
subs $rounds,$rounds,#1
str $s3,[$key,#-12]
#if __ARM_ARCH__>=7
#ifdef __thumb2__
itt eq @ Thumb2 thing, sanity check in ARM
#endif
subeq r2,$key,#216
@@ -699,7 +695,7 @@ _armv4_AES_set_encrypt_key:
str $s2,[$key,#-24]
subs $rounds,$rounds,#1
str $s3,[$key,#-20]
#if __ARM_ARCH__>=7
#ifdef __thumb2__
itt eq @ Thumb2 thing, sanity check in ARM
#endif
subeq r2,$key,#256
@@ -969,7 +965,7 @@ AES_Td:
.type asm_AES_decrypt,%function
.align 5
asm_AES_decrypt:
#if __ARM_ARCH__<7
#ifndef __thumb2__
sub r3,pc,#8 @ asm_AES_decrypt
#else
adr r3,asm_AES_decrypt


+ 29
- 6
crypto/fipsmodule/bn/asm/armv4-mont.pl Ver fichero

@@ -82,7 +82,12 @@ $code=<<___;
#include <openssl/arm_arch.h>

.text
#if defined(__thumb2__) && !defined(__APPLE__)
.syntax unified
.thumb
#else
.code 32
#endif

#if __ARM_MAX_ARCH__>=7
.align 5
@@ -101,7 +106,7 @@ bn_mul_mont:
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne .Lialu
adr r0,bn_mul_mont
adr r0,.Lbn_mul_mont
ldr r2,.LOPENSSL_armcap
ldr r0,[r0,r2]
#ifdef __APPLE__
@@ -117,6 +122,9 @@ bn_mul_mont:
#endif
cmp ip,#2
mov $num,ip @ load num
#ifdef __thumb2__
ittt lt
#endif
movlt r0,#0
addlt sp,sp,#2*4
blt .Labrt
@@ -164,10 +172,11 @@ bn_mul_mont:
ldr $n0,[$_n0] @ restore n0
adc $nhi,$nhi,#0
str $nlo,[$num] @ tp[num-1]=
mov $tj,sp
str $nhi,[$num,#4] @ tp[num]=
.Louter:
sub $tj,$num,sp @ "original" $num-1 value
sub $tj,$num,$tj @ "original" $num-1 value
sub $ap,$ap,$tj @ "rewind" ap to &ap[1]
ldr $bi,[$tp,#4]! @ *(++bp)
sub $np,$np,$tj @ "rewind" np to &np[1]
@@ -212,11 +221,16 @@ bn_mul_mont:
str $nhi,[$num,#4] @ tp[num]=

cmp $tp,$tj
#ifdef __thumb2__
itt ne
#endif
movne $tj,sp
bne .Louter
ldr $rp,[$_rp] @ pull rp
mov $aj,sp
add $num,$num,#4 @ $num to point at &tp[num]
sub $aj,$num,sp @ "original" num value
sub $aj,$num,$aj @ "original" num value
mov $tp,sp @ "rewind" $tp
mov $ap,$tp @ "borrow" $ap
sub $np,$np,$aj @ "rewind" $np to &np[0]
@@ -242,7 +256,8 @@ bn_mul_mont:
cmp $tp,$num
bne .Lcopy

add sp,$num,#4 @ skip over tp[num+1]
mov sp,$num
add sp,sp,#4 @ skip over tp[num+1]
ldmia sp!,{r4-r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
@@ -283,6 +298,7 @@ bn_mul8x_mont_neon:
stmdb sp!,{r4-r11}
vstmdb sp!,{d8-d15} @ ABI specification says so
ldmia ip,{r4-r5} @ load rest of parameter block
mov ip,sp

sub $toutptr,sp,#16
vld1.32 {${Bi}[0]}, [$bptr,:32]!
@@ -638,8 +654,9 @@ bn_mul8x_mont_neon:
bne .LNEON_sub

ldr r10, [$aptr] @ load top-most bit
mov r11,sp
veor q0,q0,q0
sub r11,$bptr,sp @ this is num*4
sub r11,$bptr,r11 @ this is num*4
veor q1,q1,q1
mov $aptr,sp
sub $rptr,$rptr,r11 @ rewind $rptr
@@ -649,27 +666,33 @@ bn_mul8x_mont_neon:
.LNEON_copy_n_zap:
ldmia $aptr!, {r4-r7}
ldmia $rptr, {r8-r11}
it cc
movcc r8, r4
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
it cc
movcc r11,r7
ldmia $aptr, {r4-r7}
stmia $rptr!, {r8-r11}
sub $aptr,$aptr,#16
ldmia $rptr, {r8-r11}
it cc
movcc r8, r4
vst1.64 {q0-q1}, [$aptr,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
it cc
movcc r11,r7
teq $aptr,$bptr @ preserves carry
stmia $rptr!, {r8-r11}
bne .LNEON_copy_n_zap

sub sp,ip,#96
mov sp,ip
vldmia sp!,{d8-d15}
ldmia sp!,{r4-r11}
ret @ bx lr


+ 40
- 5
crypto/fipsmodule/modes/asm/ghash-armv4.pl Ver fichero

@@ -136,7 +136,12 @@ $code=<<___;
#include <openssl/arm_arch.h>

.text
#if defined(__thumb2__) && !defined(__APPLE__)
.syntax unified
.thumb
#else
.code 32
#endif

#ifdef __clang__
#define ldrplb ldrbpl
@@ -154,19 +159,27 @@ rem_4bit:

.type rem_4bit_get,%function
rem_4bit_get:
sub $rem_4bit,pc,#8
sub $rem_4bit,$rem_4bit,#32 @ &rem_4bit
#if defined(__thumb2__)
adr $rem_4bit,rem_4bit
#else
sub $rem_4bit,pc,#8+32 @ &rem_4bit
#endif
b .Lrem_4bit_got
nop
nop
.size rem_4bit_get,.-rem_4bit_get

.global gcm_ghash_4bit
.type gcm_ghash_4bit,%function
.align 4
gcm_ghash_4bit:
sub r12,pc,#8
#if defined(__thumb2__)
adr r12,rem_4bit
#else
sub r12,pc,#8+48 @ &rem_4bit
#endif
add $len,$inp,$len @ $len to point at the end
stmdb sp!,{r3-r11,lr} @ save $len/end too
sub r12,r12,#48 @ &rem_4bit

ldmia r12,{r4-r11} @ copy rem_4bit ...
stmdb sp!,{r4-r11} @ ... to stack
@@ -213,6 +226,9 @@ gcm_ghash_4bit:
eor $Zlh,$Zlh,$Zhl,lsl#28
ldrh $Tll,[sp,$nlo] @ rem_4bit[rem]
eor $Zhl,$Thl,$Zhl,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb $nlo,[$inp,$cnt]
eor $Zhl,$Zhl,$Zhh,lsl#28
eor $Zhh,$Thh,$Zhh,lsr#4
@@ -223,6 +239,9 @@ gcm_ghash_4bit:
add $nhi,$nhi,$nhi
ldmia $Thh,{$Tll-$Thh} @ load Htbl[nhi]
eor $Zll,$Tll,$Zll,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb $Tll,[$Xi,$cnt]
eor $Zll,$Zll,$Zlh,lsl#28
eor $Zlh,$Tlh,$Zlh,lsr#4
@@ -230,8 +249,14 @@ gcm_ghash_4bit:
eor $Zlh,$Zlh,$Zhl,lsl#28
eor $Zhl,$Thl,$Zhl,lsr#4
eor $Zhl,$Zhl,$Zhh,lsl#28
#ifdef __thumb2__
it pl
#endif
eorpl $nlo,$nlo,$Tll
eor $Zhh,$Thh,$Zhh,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl $nhi,$nlo,#0xf0
andpl $nlo,$nlo,#0x0f
eor $Zhh,$Zhh,$Tlh,lsl#16 @ ^= rem_4bit[rem]
@@ -241,7 +266,11 @@ gcm_ghash_4bit:
add $inp,$inp,#16
mov $nhi,$Zll
___
&Zsmash("cmp\t$inp,$len","ldrneb\t$nlo,[$inp,#15]");
&Zsmash("cmp\t$inp,$len","\n".
"#ifdef __thumb2__\n".
" it ne\n".
"#endif\n".
" ldrneb $nlo,[$inp,#15]");
$code.=<<___;
bne .Louter

@@ -299,6 +328,9 @@ gcm_gmult_4bit:
eor $Zlh,$Zlh,$Zhl,lsl#28
ldrh $Tll,[$rem_4bit,$nlo] @ rem_4bit[rem]
eor $Zhl,$Thl,$Zhl,lsr#4
#ifdef __thumb2__
it pl
#endif
ldrplb $nlo,[$Xi,$cnt]
eor $Zhl,$Zhl,$Zhh,lsl#28
eor $Zhh,$Thh,$Zhh,lsr#4
@@ -316,6 +348,9 @@ gcm_gmult_4bit:
eor $Zhl,$Thl,$Zhl,lsr#4
eor $Zhl,$Zhl,$Zhh,lsl#28
eor $Zhh,$Thh,$Zhh,lsr#4
#ifdef __thumb2__
itt pl
#endif
andpl $nhi,$nlo,#0xf0
andpl $nlo,$nlo,#0x0f
eor $Zhh,$Zhh,$Tll,lsl#16 @ ^= rem_4bit[rem]


+ 41
- 6
crypto/fipsmodule/sha/asm/sha1-armv4-large.pl Ver fichero

@@ -181,7 +181,12 @@ $code=<<___;
#include <openssl/arm_arch.h>

.text
#if defined(__thumb2__) && !defined(__APPLE__)
.syntax unified
.thumb
#else
.code 32
#endif

.global sha1_block_data_order
.type sha1_block_data_order,%function
@@ -189,7 +194,8 @@ $code=<<___;
.align 5
sha1_block_data_order:
#if __ARM_MAX_ARCH__>=7
sub r3,pc,#8 @ sha1_block_data_order
.Lsha1_block:
adr r3,.Lsha1_block
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
#ifdef __APPLE__
@@ -216,7 +222,12 @@ for($i=0;$i<5;$i++) {
&BODY_00_15(@V); unshift(@V,pop(@V));
}
$code.=<<___;
#if defined(__thumb2__) && !defined(__APPLE__)
mov $t3,sp
teq $Xi,$t3
#else
teq $Xi,sp
#endif
bne .L_00_15 @ [((11+4)*5+2)*3]
sub sp,sp,#25*4
___
@@ -235,7 +246,12 @@ for($i=0;$i<5;$i++) {
&BODY_20_39(@V); unshift(@V,pop(@V));
}
$code.=<<___;
#if defined(__thumb2__) && !defined(__APPLE__)
mov $t3,sp
teq $Xi,$t3
#else
teq $Xi,sp @ preserve carry
#endif
bne .L_20_39_or_60_79 @ [+((12+3)*5+2)*4]
bcs .L_done @ [+((12+3)*5+2)*4], spare 300 bytes

@@ -247,7 +263,12 @@ for($i=0;$i<5;$i++) {
&BODY_40_59(@V); unshift(@V,pop(@V));
}
$code.=<<___;
#if defined(__thumb2__) && !defined(__APPLE__)
mov $t3,sp
teq $Xi,$t3
#else
teq $Xi,sp
#endif
bne .L_40_59 @ [+((12+5)*5+2)*4]

ldr $K,.LK_60_79
@@ -283,7 +304,7 @@ $code.=<<___;
.LK_60_79: .word 0xca62c1d6
#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-sha1_block_data_order
.word OPENSSL_armcap_P-.Lsha1_block
#endif
.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 5
@@ -458,6 +479,7 @@ sub Xuplast_80 ()

&teq ($inp,$len);
&sub ($K_XX_XX,$K_XX_XX,16); # rewind $K_XX_XX
&it ("eq");
&subeq ($inp,$inp,64); # reload last block to avoid SEGV
&vld1_8 ("{@X[-4&7]-@X[-3&7]}","[$inp]!");
eval(shift(@insns));
@@ -508,12 +530,12 @@ sha1_block_data_order_neon:
@ dmb @ errata #451034 on early Cortex A8
@ vstmdb sp!,{d8-d15} @ ABI specification says so
mov $saved_sp,sp
sub sp,sp,#64 @ alloca
sub $Xfer,sp,#64
adr $K_XX_XX,.LK_00_19
bic sp,sp,#15 @ align for 128-bit stores
bic $Xfer,$Xfer,#15 @ align for 128-bit stores

ldmia $ctx,{$a,$b,$c,$d,$e} @ load context
mov $Xfer,sp
mov sp,$Xfer @ alloca

vld1.8 {@X[-4&7]-@X[-3&7]},[$inp]! @ handles unaligned
veor $zero,$zero,$zero
@@ -560,10 +582,13 @@ $code.=<<___;
add $b,$b,$t0
add $c,$c,$t1
add $d,$d,$Xfer
it eq
moveq sp,$saved_sp
add $e,$e,$Ki
it ne
ldrne $Ki,[sp]
stmia $ctx,{$a,$b,$c,$d,$e}
itt ne
addne $Xfer,sp,#3*16
bne .Loop_neon

@@ -584,6 +609,13 @@ my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14));

$code.=<<___;
#if __ARM_MAX_ARCH__>=7

# if defined(__thumb2__) && !defined(__APPLE__)
# define INST(a,b,c,d) .byte c,d|0xf,a,b
# else
# define INST(a,b,c,d) .byte a,b,c,d|0x10
# endif

.type sha1_block_data_order_armv8,%function
.align 5
sha1_block_data_order_armv8:
@@ -678,7 +710,10 @@ ___
# since ARMv7 instructions are always encoded little-endian.
# correct solution is to use .inst directive, but older
# assemblers don't implement it:-(
sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",

# this fix-up provides Thumb encoding in conjunction with INST
$word &= ~0x10000000 if (($word & 0x0f000000) == 0x02000000);
sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
$word&0xff,($word>>8)&0xff,
($word>>16)&0xff,($word>>24)&0xff,
$mnemonic,$arg;


+ 6
- 10
crypto/fipsmodule/sha/asm/sha512-armv4.pl Ver fichero

@@ -212,16 +212,12 @@ $code=<<___;
#endif

.text
#if __ARM_ARCH__<7 || defined(__APPLE__)
.code 32
#else
#if defined(__thumb2__) && !defined(__APPLE__)
.syntax unified
# ifdef __thumb2__
# define adrl adr
.thumb
# else
.code 32
# endif
# define adrl adr
#else
.code 32
#endif

.type K512,%object
@@ -280,10 +276,10 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
.type sha512_block_data_order,%function
sha512_block_data_order:
.Lsha512_block_data_order:
#if __ARM_ARCH__<7
#if __ARM_ARCH__<7 && !defined(__thumb2__)
sub r3,pc,#8 @ sha512_block_data_order
#else
adr r3,sha512_block_data_order
adr r3,.Lsha512_block_data_order
#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,.LOPENSSL_armcap


Cargando…
Cancelar
Guardar