From 0a87c4982c617799d48cad210945ec6429c13b8f Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Mon, 7 Jan 2019 14:29:48 -0600 Subject: [PATCH] Implement ABI testing for ARM. Update-Note: There's some chance this'll break iOS since I was unable to test it there. The iPad I have to test on is too new to run 32-bit code at all. Change-Id: I6593f91b67a5e8a82828237d3b69ed948b07922d Reviewed-on: https://boringssl-review.googlesource.com/c/34725 Commit-Queue: David Benjamin Reviewed-by: Adam Langley --- crypto/CMakeLists.txt | 2 + crypto/abi_self_test.cc | 97 +++++++++++++++ crypto/fipsmodule/modes/gcm_test.cc | 18 +++ crypto/test/abi_test.h | 42 ++++++- crypto/test/asm/trampoline-armv4.pl | 181 ++++++++++++++++++++++++++++ 5 files changed, 339 insertions(+), 1 deletion(-) create mode 100755 crypto/test/asm/trampoline-armv4.pl diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt index a3cdc089..2b9479b9 100644 --- a/crypto/CMakeLists.txt +++ b/crypto/CMakeLists.txt @@ -105,6 +105,7 @@ if(${ARCH} STREQUAL "arm") chacha/chacha-armv4.${ASM_EXT} curve25519/asm/x25519-asm-arm.S poly1305/poly1305_arm_asm.S + test/trampoline-armv4.${ASM_EXT} ) endif() @@ -143,6 +144,7 @@ perlasm(chacha/chacha-x86.${ASM_EXT} chacha/asm/chacha-x86.pl) perlasm(chacha/chacha-x86_64.${ASM_EXT} chacha/asm/chacha-x86_64.pl) perlasm(cipher_extra/aes128gcmsiv-x86_64.${ASM_EXT} cipher_extra/asm/aes128gcmsiv-x86_64.pl) perlasm(cipher_extra/chacha20_poly1305_x86_64.${ASM_EXT} cipher_extra/asm/chacha20_poly1305_x86_64.pl) +perlasm(test/trampoline-armv4.${ASM_EXT} test/asm/trampoline-armv4.pl) perlasm(test/trampoline-x86.${ASM_EXT} test/asm/trampoline-x86.pl) perlasm(test/trampoline-x86_64.${ASM_EXT} test/asm/trampoline-x86_64.pl) diff --git a/crypto/abi_self_test.cc b/crypto/abi_self_test.cc index c5bace15..d47f37c6 100644 --- a/crypto/abi_self_test.cc +++ b/crypto/abi_self_test.cc @@ -243,3 +243,100 @@ TEST(ABITest, X86) { << "CHECK_ABI did not insulate the caller from direction flag errors"; } #endif // OPENSSL_X86 && SUPPORTS_ABI_TEST + +#if defined(OPENSSL_ARM) && defined(SUPPORTS_ABI_TEST) +extern "C" { +void abi_test_clobber_r0(void); +void abi_test_clobber_r1(void); +void abi_test_clobber_r2(void); +void abi_test_clobber_r3(void); +void abi_test_clobber_r4(void); +void abi_test_clobber_r5(void); +void abi_test_clobber_r6(void); +void abi_test_clobber_r7(void); +void abi_test_clobber_r8(void); +void abi_test_clobber_r9(void); +void abi_test_clobber_r10(void); +void abi_test_clobber_r11(void); +void abi_test_clobber_r12(void); +// r13, r14, and r15, are sp, lr, and pc, respectively. + +void abi_test_clobber_d0(void); +void abi_test_clobber_d1(void); +void abi_test_clobber_d2(void); +void abi_test_clobber_d3(void); +void abi_test_clobber_d4(void); +void abi_test_clobber_d5(void); +void abi_test_clobber_d6(void); +void abi_test_clobber_d7(void); +void abi_test_clobber_d8(void); +void abi_test_clobber_d9(void); +void abi_test_clobber_d10(void); +void abi_test_clobber_d11(void); +void abi_test_clobber_d12(void); +void abi_test_clobber_d13(void); +void abi_test_clobber_d14(void); +void abi_test_clobber_d15(void); +} // extern "C" + +TEST(ABITest, ARM) { + // abi_test_trampoline hides unsaved registers from the caller, so we can + // safely call the abi_test_clobber_* functions below. + abi_test::internal::CallerState state; + RAND_bytes(reinterpret_cast(&state), sizeof(state)); + CHECK_ABI_NO_UNWIND(abi_test_trampoline, + reinterpret_cast(abi_test_clobber_r4), + &state, nullptr, 0, 0 /* no breakpoint */); + + CHECK_ABI_NO_UNWIND(abi_test_clobber_r0); + CHECK_ABI_NO_UNWIND(abi_test_clobber_r1); + CHECK_ABI_NO_UNWIND(abi_test_clobber_r2); + CHECK_ABI_NO_UNWIND(abi_test_clobber_r3); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r4), + "r4 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r5), + "r5 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r6), + "r6 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r7), + "r7 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r8), + "r8 was not restored after return"); +#if defined(OPENSSL_APPLE) + CHECK_ABI_NO_UNWIND(abi_test_clobber_r9); +#else + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r9), + "r9 was not restored after return"); +#endif + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r10), + "r10 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_r11), + "r11 was not restored after return"); + CHECK_ABI_NO_UNWIND(abi_test_clobber_r12); + + CHECK_ABI_NO_UNWIND(abi_test_clobber_d0); + CHECK_ABI_NO_UNWIND(abi_test_clobber_d1); + CHECK_ABI_NO_UNWIND(abi_test_clobber_d2); + CHECK_ABI_NO_UNWIND(abi_test_clobber_d3); + CHECK_ABI_NO_UNWIND(abi_test_clobber_d4); + CHECK_ABI_NO_UNWIND(abi_test_clobber_d5); + CHECK_ABI_NO_UNWIND(abi_test_clobber_d6); + CHECK_ABI_NO_UNWIND(abi_test_clobber_d7); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_d8), + "d8 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_d9), + "d9 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_d10), + "d10 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_d11), + "d11 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_d12), + "d12 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_d13), + "d13 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_d14), + "d14 was not restored after return"); + EXPECT_NONFATAL_FAILURE(CHECK_ABI_NO_UNWIND(abi_test_clobber_d15), + "d15 was not restored after return"); +} +#endif // OPENSSL_ARM && SUPPORTS_ABI_TEST diff --git a/crypto/fipsmodule/modes/gcm_test.cc b/crypto/fipsmodule/modes/gcm_test.cc index 9283cd26..7110f1c4 100644 --- a/crypto/fipsmodule/modes/gcm_test.cc +++ b/crypto/fipsmodule/modes/gcm_test.cc @@ -176,5 +176,23 @@ TEST(GCMTest, ABI) { #endif // GHASH_ASM_X86_64 } #endif // GHASH_ASM_X86 || GHASH_ASM_X86_64 + +#if defined(GHASH_ASM_ARM) + if (gcm_neon_capable()) { + CHECK_ABI(gcm_init_neon, Htable, kH); + CHECK_ABI(gcm_gmult_neon, X, Htable); + for (size_t blocks : kBlockCounts) { + CHECK_ABI(gcm_ghash_neon, X, Htable, buf, 16 * blocks); + } + } + + if (gcm_pmull_capable()) { + CHECK_ABI(gcm_init_v8, Htable, kH); + CHECK_ABI(gcm_gmult_v8, X, Htable); + for (size_t blocks : kBlockCounts) { + CHECK_ABI(gcm_ghash_v8, X, Htable, buf, 16 * blocks); + } + } +#endif // GHASH_ASM_ARM } #endif // SUPPORTS_ABI_TEST && GHASH_ASM diff --git a/crypto/test/abi_test.h b/crypto/test/abi_test.h index bf255520..e04b26c1 100644 --- a/crypto/test/abi_test.h +++ b/crypto/test/abi_test.h @@ -98,7 +98,47 @@ struct alignas(16) Reg128 { CALLER_STATE_REGISTER(uint32_t, edi) \ CALLER_STATE_REGISTER(uint32_t, ebx) \ CALLER_STATE_REGISTER(uint32_t, ebp) -#endif // X86_64 || X86 +#elif defined(OPENSSL_ARM) +// Unlike x86, ARM has a common ABI across all platforms, described in +// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0042f/IHI0042F_aapcs.pdf +// It almost specifies the callee-saved registers, except r9 is left to the +// platform. Android and iOS differ in handling of r9. +#define LOOP_CALLER_STATE_REGISTERS_PRE_R9() \ + CALLER_STATE_REGISTER(uint64_t, d8) \ + CALLER_STATE_REGISTER(uint64_t, d9) \ + CALLER_STATE_REGISTER(uint64_t, d10) \ + CALLER_STATE_REGISTER(uint64_t, d11) \ + CALLER_STATE_REGISTER(uint64_t, d12) \ + CALLER_STATE_REGISTER(uint64_t, d13) \ + CALLER_STATE_REGISTER(uint64_t, d14) \ + CALLER_STATE_REGISTER(uint64_t, d15) \ + CALLER_STATE_REGISTER(uint32_t, r4) \ + CALLER_STATE_REGISTER(uint32_t, r5) \ + CALLER_STATE_REGISTER(uint32_t, r6) \ + CALLER_STATE_REGISTER(uint32_t, r7) \ + CALLER_STATE_REGISTER(uint32_t, r8) +#define LOOP_CALLER_STATE_REGISTERS_POST_R9() \ + CALLER_STATE_REGISTER(uint32_t, r10) \ + CALLER_STATE_REGISTER(uint32_t, r11) +#if defined(OPENSSL_APPLE) +// Starting iOS 3, r9 is treated as a caller-saved register. Before that, it +// could not be used at all. Most of our assembly treats it as callee-saved +// anyway to be uniform, but we match the platform to avoid false positives when +// testing compiler-generated output. +// +// https://developer.apple.com/library/archive/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARMv6FunctionCallingConventions.html +#define LOOP_CALLER_STATE_REGISTERS() \ + LOOP_CALLER_STATE_REGISTERS_PRE_R9() \ + LOOP_CALLER_STATE_REGISTERS_POST_R9() +#else +// We found no clear reference which defines Linux's use of r9, but LLVM treats +// r9 as callee-saved on non-Apple ARM platforms. +#define LOOP_CALLER_STATE_REGISTERS() \ + LOOP_CALLER_STATE_REGISTERS_PRE_R9() \ + CALLER_STATE_REGISTER(uint32_t, r9) \ + LOOP_CALLER_STATE_REGISTERS_POST_R9() +#endif // OPENSSL_APPLE +#endif // X86_64 || X86 || ARM // Enable ABI testing if all of the following are true. // diff --git a/crypto/test/asm/trampoline-armv4.pl b/crypto/test/asm/trampoline-armv4.pl new file mode 100755 index 00000000..bfa67e42 --- /dev/null +++ b/crypto/test/asm/trampoline-armv4.pl @@ -0,0 +1,181 @@ +#!/usr/bin/env perl +# Copyright (c) 2019, Google Inc. +# +# Permission to use, copy, modify, and/or distribute this software for any +# purpose with or without fee is hereby granted, provided that the above +# copyright notice and this permission notice appear in all copies. +# +# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +# This file defines helper functions for crypto/test/abi_test.h on 32-bit +# ARM. See that header for details on how to use this. +# +# For convenience, this file is linked into libcrypto, where consuming builds +# already support architecture-specific sources. The static linker should drop +# this code in non-test binaries. This includes a shared library build of +# libcrypto, provided --gc-sections (ELF), -dead_strip (iOS), or equivalent is +# used. +# +# References: +# +# AAPCS: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0042f/IHI0042F_aapcs.pdf +# iOS ARMv6: https://developer.apple.com/library/archive/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARMv6FunctionCallingConventions.html +# iOS ARMv7: https://developer.apple.com/library/archive/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARMv7FunctionCallingConventions.html + +use strict; + +my $flavour = shift; +my $output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; +my $dir = $1; +my $xlate; +( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or +die "can't locate arm-xlate.pl"; + +open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\""; +*STDOUT = *OUT; + +my ($func, $state, $argv, $argc) = ("r0", "r1", "r2", "r3"); +my $code = <<____; +.syntax unified + +.arch armv7-a +.fpu vfp + +.text + +@ abi_test_trampoline loads callee-saved registers from |state|, calls |func| +@ with |argv|, then saves the callee-saved registers into |state|. It returns +@ the result of |func|. The |unwind| argument is unused. +@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state, +@ const uint32_t *argv, size_t argc, +@ int unwind); +.type abi_test_trampoline, %function +.globl abi_test_trampoline +.align 4 +abi_test_trampoline: +.Labi_test_trampoline_begin: + @ Save parameters and all callee-saved registers. For convenience, we + @ save r9 on iOS even though it's volatile. + vstmdb sp!, {d8-d15} + stmdb sp!, {r0-r11,lr} + + @ Reserve stack space for six (10-4) stack parameters, plus an extra 4 + @ bytes to keep it 8-byte-aligned (see APCS, section 5.3). + sub sp, sp, #28 + + @ Every register in APCS is either non-volatile or a parameter (except + @ r9 on iOS), so this code, by the actual call, loses all its scratch + @ registers. First fill in stack parameters while there are registers + @ to spare. + cmp $argc, #4 + bls .Lstack_args_done + mov r4, sp @ r4 is the output pointer. + add r5, $argv, $argc, lsl #2 @ Set r5 to the end of argv. + add $argv, $argv, #16 @ Skip four arguments. +.Lstack_args_loop: + ldr r6, [$argv], #4 + cmp $argv, r5 + str r6, [r4], #4 + bne .Lstack_args_loop + +.Lstack_args_done: + @ Load registers from |$state|. + vldmia $state!, {d8-d15} +#if defined(__APPLE__) + @ r9 is not volatile on iOS. + ldmia $state!, {r4-r8,r10-r11} +#else + ldmia $state!, {r4-r11} +#endif + + @ Load register parameters. This uses up our remaining registers, so we + @ repurpose lr as scratch space. + ldr $argc, [sp, #40] @ Reload argc. + ldr lr, [sp, #36] @ Load argv into lr. + cmp $argc, #3 + bhi .Larg_r3 + beq .Larg_r2 + cmp $argc, #1 + bhi .Larg_r1 + beq .Larg_r0 + b .Largs_done + +.Larg_r3: + ldr r3, [lr, #12] @ argv[3] +.Larg_r2: + ldr r2, [lr, #8] @ argv[2] +.Larg_r1: + ldr r1, [lr, #4] @ argv[1] +.Larg_r0: + ldr r0, [lr] @ argv[0] +.Largs_done: + + @ With every other register in use, load the function pointer into lr + @ and call the function. + ldr lr, [sp, #28] + blx lr + + @ r1-r3 are free for use again. The trampoline only supports + @ single-return functions. Pass r4-r11 to the caller. + ldr $state, [sp, #32] + vstmia $state!, {d8-d15} +#if defined(__APPLE__) + @ r9 is not volatile on iOS. + stmia $state!, {r4-r8,r10-r11} +#else + stmia $state!, {r4-r11} +#endif + + @ Unwind the stack and restore registers. + add sp, sp, #44 @ 44 = 28+16 + ldmia sp!, {r4-r11,lr} @ Skip r0-r3 (see +16 above). + vldmia sp!, {d8-d15} + + bx lr +.size abi_test_trampoline,.-abi_test_trampoline +____ + +# abi_test_clobber_* zeros the corresponding register. These are used to test +# the ABI-testing framework. +foreach (0..12) { + # This loop skips r13 (sp), r14 (lr, implicitly clobbered by every call), and + # r15 (pc). + $code .= <<____; +.type abi_test_clobber_r$_, %function +.globl abi_test_clobber_r$_ +.align 4 +abi_test_clobber_r$_: + mov r$_, #0 + bx lr +.size abi_test_clobber_r$_,.-abi_test_clobber_r$_ +____ +} + +foreach (0..15) { + my $lo = "s".(2*$_); + my $hi = "s".(2*$_+1); + $code .= <<____; +.type abi_test_clobber_d$_, %function +.globl abi_test_clobber_d$_ +.align 4 +abi_test_clobber_d$_: + mov r0, #0 + vmov $lo, r0 + vmov $hi, r0 + bx lr +.size abi_test_clobber_d$_,.-abi_test_clobber_d$_ +____ +} + +print $code; +close STDOUT;