sha/asm/sha512-armv4.pl: adapt for use in Linux kernel context.

Follow-up to sha256-armv4.pl in cooperation with Ard Biesheuvel
(Linaro) and Sami Tolvanen (Google).

(Imported from upstream's b1a5d1c652086257930a1f62ae51c9cdee654b2c.)

Change-Id: Ibc4f289cc8f499924ade8d6b8d494f53bc08bda7
Reviewed-on: https://boringssl-review.googlesource.com/4467
Reviewed-by: Adam Langley <agl@google.com>
This commit is contained in:
David Benjamin 2015-04-20 15:48:03 -04:00 committed by Adam Langley
parent 0fd37062b6
commit 7af16eb49f

View File

@ -5,6 +5,8 @@
# project. The module is, however, dual licensed under OpenSSL and # project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further # CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/. # details see http://www.openssl.org/~appro/cryptogams/.
#
# Permission to use under GPL terms is granted.
# ==================================================================== # ====================================================================
# SHA512 block procedure for ARMv4. September 2007. # SHA512 block procedure for ARMv4. September 2007.
@ -136,6 +138,9 @@ $code.=<<___;
teq $t0,#$magic teq $t0,#$magic
ldr $t3,[sp,#$Coff+0] @ c.lo ldr $t3,[sp,#$Coff+0] @ c.lo
#if __ARM_ARCH__>=7
it eq @ Thumb2 thing, sanity check in ARM
#endif
orreq $Ktbl,$Ktbl,#1 orreq $Ktbl,$Ktbl,#1
@ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
@ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
@ -173,7 +178,17 @@ $code.=<<___;
___ ___
} }
$code=<<___; $code=<<___;
#include "arm_arch.h" #ifndef __KERNEL__
# include "arm_arch.h"
# define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
# define VFP_ABI_POP vldmia sp!,{d8-d15}
#else
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
# define __ARM_MAX_ARCH__ 7
# define VFP_ABI_PUSH
# define VFP_ABI_POP
#endif
#ifdef __ARMEL__ #ifdef __ARMEL__
# define LO 0 # define LO 0
# define HI 4 # define HI 4
@ -185,7 +200,18 @@ $code=<<___;
#endif #endif
.text .text
#if __ARM_ARCH__<7
.code 32 .code 32
#else
.syntax unified
# ifdef __thumb2__
# define adrl adr
.thumb
# else
.code 32
# endif
#endif
.type K512,%object .type K512,%object
.align 5 .align 5
K512: K512:
@ -230,7 +256,7 @@ WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
.size K512,.-K512 .size K512,.-K512
#if __ARM_MAX_ARCH__>=7 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
.LOPENSSL_armcap: .LOPENSSL_armcap:
.word OPENSSL_armcap_P-sha512_block_data_order .word OPENSSL_armcap_P-sha512_block_data_order
.skip 32-4 .skip 32-4
@ -241,14 +267,18 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
.global sha512_block_data_order .global sha512_block_data_order
.type sha512_block_data_order,%function .type sha512_block_data_order,%function
sha512_block_data_order: sha512_block_data_order:
#if __ARM_ARCH__<7
sub r3,pc,#8 @ sha512_block_data_order sub r3,pc,#8 @ sha512_block_data_order
add $len,$inp,$len,lsl#7 @ len to point at the end of inp #else
#if __ARM_MAX_ARCH__>=7 adr r3,sha512_block_data_order
#endif
#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
ldr r12,.LOPENSSL_armcap ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P ldr r12,[r3,r12] @ OPENSSL_armcap_P
tst r12,#1 tst r12,#1
bne .LNEON bne .LNEON
#endif #endif
add $len,$inp,$len,lsl#7 @ len to point at the end of inp
stmdb sp!,{r4-r12,lr} stmdb sp!,{r4-r12,lr}
sub $Ktbl,r3,#672 @ K512 sub $Ktbl,r3,#672 @ K512
sub sp,sp,#9*8 sub sp,sp,#9*8
@ -362,6 +392,9 @@ $code.=<<___;
___ ___
&BODY_00_15(0x17); &BODY_00_15(0x17);
$code.=<<___; $code.=<<___;
#if __ARM_ARCH__>=7
ittt eq @ Thumb2 thing, sanity check in ARM
#endif
ldreq $t0,[sp,#`$Xoff+8*(16-1)`+0] ldreq $t0,[sp,#`$Xoff+8*(16-1)`+0]
ldreq $t1,[sp,#`$Xoff+8*(16-1)`+4] ldreq $t1,[sp,#`$Xoff+8*(16-1)`+4]
beq .L16_79 beq .L16_79
@ -446,6 +479,7 @@ $code.=<<___;
moveq pc,lr @ be binary compatible with V4, yet moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-) bx lr @ interoperable with Thumb ISA:-)
#endif #endif
.size sha512_block_data_order,.-sha512_block_data_order
___ ___
{ {
@ -552,11 +586,15 @@ $code.=<<___;
.arch armv7-a .arch armv7-a
.fpu neon .fpu neon
.global sha512_block_data_order_neon
.type sha512_block_data_order_neon,%function
.align 4 .align 4
sha512_block_data_order_neon:
.LNEON: .LNEON:
dmb @ errata #451034 on early Cortex A8 dmb @ errata #451034 on early Cortex A8
vstmdb sp!,{d8-d15} @ ABI specification says so add $len,$inp,$len,lsl#7 @ len to point at the end of inp
sub $Ktbl,r3,#672 @ K512 VFP_ABI_PUSH
adrl $Ktbl,K512
vldmia $ctx,{$A-$H} @ load context vldmia $ctx,{$A-$H} @ load context
.Loop_neon: .Loop_neon:
___ ___
@ -581,16 +619,16 @@ $code.=<<___;
sub $Ktbl,#640 @ rewind K512 sub $Ktbl,#640 @ rewind K512
bne .Loop_neon bne .Loop_neon
vldmia sp!,{d8-d15} @ epilogue VFP_ABI_POP
ret @ bx lr ret @ bx lr
.size sha512_block_data_order_neon,.-sha512_block_data_order_neon
#endif #endif
___ ___
} }
$code.=<<___; $code.=<<___;
.size sha512_block_data_order,.-sha512_block_data_order
.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>" .asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 2 .align 2
#if __ARM_MAX_ARCH__>=7 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
.comm OPENSSL_armcap_P,4,4 .comm OPENSSL_armcap_P,4,4
.hidden OPENSSL_armcap_P .hidden OPENSSL_armcap_P
#endif #endif
@ -599,5 +637,14 @@ ___
$code =~ s/\`([^\`]*)\`/eval $1/gem; $code =~ s/\`([^\`]*)\`/eval $1/gem;
$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
$code =~ s/\bret\b/bx lr/gm; $code =~ s/\bret\b/bx lr/gm;
open SELF,$0;
while(<SELF>) {
next if (/^#!/);
last if (!s/^#/@/ and !/^$/);
print;
}
close SELF;
print $code; print $code;
close STDOUT; # enforce flush close STDOUT; # enforce flush