From 7af16eb49fdb9383512362c663449e3833fd05b0 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Mon, 20 Apr 2015 15:48:03 -0400 Subject: [PATCH] sha/asm/sha512-armv4.pl: adapt for use in Linux kernel context. Follow-up to sha256-armv4.pl in cooperation with Ard Biesheuvel (Linaro) and Sami Tolvanen (Google). (Imported from upstream's b1a5d1c652086257930a1f62ae51c9cdee654b2c.) Change-Id: Ibc4f289cc8f499924ade8d6b8d494f53bc08bda7 Reviewed-on: https://boringssl-review.googlesource.com/4467 Reviewed-by: Adam Langley --- crypto/sha/asm/sha512-armv4.pl | 65 +++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 9 deletions(-) diff --git a/crypto/sha/asm/sha512-armv4.pl b/crypto/sha/asm/sha512-armv4.pl index 4a12b493..3c574e99 100644 --- a/crypto/sha/asm/sha512-armv4.pl +++ b/crypto/sha/asm/sha512-armv4.pl @@ -5,6 +5,8 @@ # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. +# +# Permission to use under GPL terms is granted. # ==================================================================== # SHA512 block procedure for ARMv4. September 2007. @@ -136,6 +138,9 @@ $code.=<<___; teq $t0,#$magic ldr $t3,[sp,#$Coff+0] @ c.lo +#if __ARM_ARCH__>=7 + it eq @ Thumb2 thing, sanity check in ARM +#endif orreq $Ktbl,$Ktbl,#1 @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 @@ -173,7 +178,17 @@ $code.=<<___; ___ } $code=<<___; -#include "arm_arch.h" +#ifndef __KERNEL__ +# include "arm_arch.h" +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +# define VFP_ABI_PUSH +# define VFP_ABI_POP +#endif + #ifdef __ARMEL__ # define LO 0 # define HI 4 @@ -185,7 +200,18 @@ $code=<<___; #endif .text +#if __ARM_ARCH__<7 .code 32 +#else +.syntax unified +# ifdef __thumb2__ +# define adrl adr +.thumb +# else +.code 32 +# endif +#endif + .type K512,%object .align 5 K512: @@ -230,7 +256,7 @@ WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .size K512,.-K512 -#if __ARM_MAX_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) .LOPENSSL_armcap: .word OPENSSL_armcap_P-sha512_block_data_order .skip 32-4 @@ -241,14 +267,18 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .global sha512_block_data_order .type sha512_block_data_order,%function sha512_block_data_order: +#if __ARM_ARCH__<7 sub r3,pc,#8 @ sha512_block_data_order - add $len,$inp,$len,lsl#7 @ len to point at the end of inp -#if __ARM_MAX_ARCH__>=7 +#else + adr r3,sha512_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap ldr r12,[r3,r12] @ OPENSSL_armcap_P tst r12,#1 bne .LNEON #endif + add $len,$inp,$len,lsl#7 @ len to point at the end of inp stmdb sp!,{r4-r12,lr} sub $Ktbl,r3,#672 @ K512 sub sp,sp,#9*8 @@ -362,6 +392,9 @@ $code.=<<___; ___ &BODY_00_15(0x17); $code.=<<___; +#if __ARM_ARCH__>=7 + ittt eq @ Thumb2 thing, sanity check in ARM +#endif ldreq $t0,[sp,#`$Xoff+8*(16-1)`+0] ldreq $t1,[sp,#`$Xoff+8*(16-1)`+4] beq .L16_79 @@ -446,6 +479,7 @@ $code.=<<___; moveq pc,lr @ be binary compatible with V4, yet bx lr @ interoperable with Thumb ISA:-) #endif +.size sha512_block_data_order,.-sha512_block_data_order ___ { @@ -552,11 +586,15 @@ $code.=<<___; .arch armv7-a .fpu neon +.global sha512_block_data_order_neon +.type sha512_block_data_order_neon,%function .align 4 +sha512_block_data_order_neon: .LNEON: dmb @ errata #451034 on early Cortex A8 - vstmdb sp!,{d8-d15} @ ABI specification says so - sub $Ktbl,r3,#672 @ K512 + add $len,$inp,$len,lsl#7 @ len to point at the end of inp + VFP_ABI_PUSH + adrl $Ktbl,K512 vldmia $ctx,{$A-$H} @ load context .Loop_neon: ___ @@ -581,16 +619,16 @@ $code.=<<___; sub $Ktbl,#640 @ rewind K512 bne .Loop_neon - vldmia sp!,{d8-d15} @ epilogue + VFP_ABI_POP ret @ bx lr +.size sha512_block_data_order_neon,.-sha512_block_data_order_neon #endif ___ } $code.=<<___; -.size sha512_block_data_order,.-sha512_block_data_order .asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by " .align 2 -#if __ARM_MAX_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) .comm OPENSSL_armcap_P,4,4 .hidden OPENSSL_armcap_P #endif @@ -599,5 +637,14 @@ ___ $code =~ s/\`([^\`]*)\`/eval $1/gem; $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4 $code =~ s/\bret\b/bx lr/gm; + +open SELF,$0; +while() { + next if (/^#!/); + last if (!s/^#/@/ and !/^$/); + print; +} +close SELF; + print $code; close STDOUT; # enforce flush