From def85b403db7bc2f40b4a7a9d83ae43331c0617c Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Tue, 25 Apr 2017 18:32:32 -0400 Subject: [PATCH] Revise OPENSSL_ia32cap_P strategy to avoid TEXTRELs. OPENSSL_ia32cap_addr avoids any relocations within the module, at the cost of a runtime TEXTREL, which causes problems in some cases. (Notably, if someone links us into a binary which uses the GCC "ifunc" attribute, the loader crashes.) We add a OPENSSL_ia32cap_addr_delta symbol (which is reachable relocation-free from the module) stores the difference between OPENSSL_ia32cap_P and its own address. Next, reference OPENSSL_ia32cap_P in code as usual, but always doing LEAQ (or the equivalent GOTPCREL MOVQ) into a register first. This pattern we can then transform into a LEAQ and ADDQ on OPENSSL_ia32cap_addr_delta. ADDQ modifies the FLAGS register, so this is only a safe transformation if we safe and restore flags first. That, in turn, is only a safe transformation if code always uses %rsp as a stack pointer (specifically everything below the stack must be fair game for scribbling over). Linux delivers signals on %rsp, so this should already be an ABI requirement. Further, we must clear the red zone (using LEAQ to avoid touching FLAGS) which signal handlers may not scribble over. This also fixes the GOTTPOFF logic to clear the red zone. Change-Id: I4ca6133ab936d5a13d5c8ef265a12ab6bd0073c9 Reviewed-on: https://boringssl-review.googlesource.com/15545 Reviewed-by: Adam Langley Commit-Queue: Adam Langley CQ-Verified: CQ bot account: commit-bot@chromium.org --- crypto/crypto.c | 3 - crypto/fipsmodule/aes/asm/aes-x86_64.pl | 4 +- crypto/fipsmodule/aes/asm/aesni-x86_64.pl | 8 +-- crypto/fipsmodule/delocate.go | 65 ++++++++++++--------- crypto/fipsmodule/modes/asm/ghash-x86_64.pl | 4 +- crypto/fipsmodule/sha/asm/sha1-x86_64.pl | 5 +- crypto/fipsmodule/sha/asm/sha512-x86_64.pl | 5 +- 7 files changed, 51 insertions(+), 43 deletions(-) diff --git a/crypto/crypto.c b/crypto/crypto.c index 5d352892..a52b8089 100644 --- a/crypto/crypto.c +++ b/crypto/crypto.c @@ -58,9 +58,6 @@ * initialising it to zero, it becomes a "data symbol", which isn't so * affected. */ uint32_t OPENSSL_ia32cap_P[4] = {0}; -#if !defined(BORINGSSL_FIPS) -uint32_t *OPENSSL_ia32cap_addr = OPENSSL_ia32cap_P; -#endif #elif defined(OPENSSL_PPC64LE) diff --git a/crypto/fipsmodule/aes/asm/aes-x86_64.pl b/crypto/fipsmodule/aes/asm/aes-x86_64.pl index 362c1c9e..4bf7db3f 100755 --- a/crypto/fipsmodule/aes/asm/aes-x86_64.pl +++ b/crypto/fipsmodule/aes/asm/aes-x86_64.pl @@ -1642,7 +1642,7 @@ $code.=<<___; .align 16 .globl asm_AES_cbc_encrypt .type asm_AES_cbc_encrypt,\@function,6 -.extern OPENSSL_ia32cap_addr +.extern OPENSSL_ia32cap_P .hidden asm_AES_cbc_encrypt asm_AES_cbc_encrypt: cmp \$0,%rdx # check length @@ -1664,7 +1664,7 @@ asm_AES_cbc_encrypt: cmp \$0,%r9 cmoveq %r10,$sbox - mov OPENSSL_ia32cap_addr(%rip),%r10 + leaq OPENSSL_ia32cap_P(%rip),%r10 mov (%r10), %r10d cmp \$$speed_limit,%rdx jb .Lcbc_slow_prologue diff --git a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl index 58109802..4ad0fb11 100644 --- a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl +++ b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl @@ -209,7 +209,7 @@ $movkey = $PREFIX eq "aesni" ? "movups" : "movups"; ("%rdi","%rsi","%rdx","%rcx"); # Unix order $code=".text\n"; -$code.=".extern OPENSSL_ia32cap_addr\n"; +$code.=".extern OPENSSL_ia32cap_P\n"; $rounds="%eax"; # input to and changed by aesni_[en|de]cryptN !!! # this is natural Unix argument order for public $PREFIX_[ecb|cbc]_encrypt ... @@ -1271,7 +1271,7 @@ $code.=<<___; lea 7($ctr),%r9 mov %r10d,0x60+12(%rsp) bswap %r9d - movq OPENSSL_ia32cap_addr(%rip),%r10 + leaq OPENSSL_ia32cap_P(%rip),%r10 mov 4(%r10),%r10d xor $key0,%r9d and \$`1<<26|1<<22`,%r10d # isolate XSAVE+MOVBE @@ -3772,7 +3772,7 @@ $code.=<<___; movdqa $inout3,$in3 movdqu 0x50($inp),$inout5 movdqa $inout4,$in4 - movq OPENSSL_ia32cap_addr(%rip),%r9 + leaq OPENSSL_ia32cap_P(%rip),%r9 mov 4(%r9),%r9d cmp \$0x70,$len jbe .Lcbc_dec_six_or_seven @@ -4278,7 +4278,7 @@ __aesni_set_encrypt_key: movups ($inp),%xmm0 # pull first 128 bits of *userKey xorps %xmm4,%xmm4 # low dword of xmm4 is assumed 0 - movq OPENSSL_ia32cap_addr(%rip),%r10 + leaq OPENSSL_ia32cap_P(%rip),%r10 movl 4(%r10),%r10d and \$`1<<28|1<<11`,%r10d # AVX and XOP bits lea 16($key),%rax # %rax is used as modifiable copy of $key diff --git a/crypto/fipsmodule/delocate.go b/crypto/fipsmodule/delocate.go index 2a692559..7b5454c5 100644 --- a/crypto/fipsmodule/delocate.go +++ b/crypto/fipsmodule/delocate.go @@ -121,16 +121,6 @@ func definedSymbols(lines []string) map[string]bool { return symbols } -func referencesIA32CapDirectly(line string) bool { - const symbol = "OPENSSL_ia32cap_P" - i := strings.Index(line, symbol) - if i < 0 { - return false - } - i += len(symbol) - return i == len(line) || line[i] == '+' || line[i] == '(' || line[i] == '@' -} - // threadLocalOffsetFunc describes a function that fetches the offset to symbol // in the thread-local space and writes it to the given target register. type threadLocalOffsetFunc struct { @@ -231,9 +221,9 @@ func transform(lines []string, symbols map[string]bool) (ret []string) { // redirector function for that symbol. redirectors := make(map[string]string) - // ia32capAddrNeeded is true iff OPENSSL_ia32cap_addr has been - // referenced and thus needs to be emitted outside the module. - ia32capAddrNeeded := false + // ia32capAddrDeltaNeeded is true iff OPENSSL_ia32cap_addr_delta has + // been referenced and thus needs to be emitted outside the module. + ia32capAddrDeltaNeeded := false // ia32capGetNeeded is true iff OPENSSL_ia32cap_get has been referenced // and thus needs to be emitted outside the module. @@ -258,14 +248,6 @@ func transform(lines []string, symbols map[string]bool) (ret []string) { break } - if referencesIA32CapDirectly(line) { - panic("reference to OPENSSL_ia32cap_P needs to be changed to indirect via OPENSSL_ia32cap_addr") - } - - if strings.Contains(line, "OPENSSL_ia32cap_addr(%rip)") { - ia32capAddrNeeded = true - } - if strings.Contains(line, "OPENSSL_ia32cap_get@PLT") { ia32capGetNeeded = true } @@ -326,7 +308,9 @@ func transform(lines []string, symbols map[string]bool) (ret []string) { symbol := strings.SplitN(args[0], "@", 2)[0] functionName := fmt.Sprintf("BORINGSSL_bcm_tpoff_to_%s_for_%s", targetRegister, symbol) threadLocalOffsets[functionName] = threadLocalOffsetFunc{target: targetRegister, symbol: symbol} + ret = append(ret, "leaq -128(%rsp), %rsp") // Clear the red zone. ret = append(ret, "\tcallq "+functionName+"\n") + ret = append(ret, "leaq 128(%rsp), %rsp") continue } @@ -351,6 +335,35 @@ func transform(lines []string, symbols map[string]bool) (ret []string) { // referencing the symbol directly, // needs to be transformed into an LEA. line = strings.Replace(line, "movq", "leaq", 1) + instr = "leaq" + } + + if target == "OPENSSL_ia32cap_P" { + if instr != "leaq" { + panic("reference to OPENSSL_ia32cap_P needs to be changed to go through leaq or GOTPCREL") + } + if args[1][0] != '%' { + panic("reference to OPENSSL_ia32cap_P must target a register.") + } + + // We assume pushfq is safe, after + // clearing the red zone, because any + // signals will be delivered using + // %rsp. Thus perlasm and + // compiler-generated code must not use + // %rsp as a general-purpose register. + // + // TODO(davidben): This messes up CFI + // for a small window if %rsp is the CFI + // register. + ia32capAddrDeltaNeeded = true + ret = append(ret, "leaq -128(%rsp), %rsp") // Clear the red zone. + ret = append(ret, "pushfq") + ret = append(ret, fmt.Sprintf("leaq OPENSSL_ia32cap_addr_delta(%%rip), %s", args[1])) + ret = append(ret, fmt.Sprintf("addq OPENSSL_ia32cap_addr_delta(%%rip), %s", args[1])) + ret = append(ret, "popfq") + ret = append(ret, "leaq 128(%rsp), %rsp") + continue } } @@ -468,12 +481,12 @@ func transform(lines []string, symbols map[string]bool) (ret []string) { } // Emit an indirect reference to OPENSSL_ia32cap_P. - if ia32capAddrNeeded { + if ia32capAddrDeltaNeeded { ret = append(ret, ".extern OPENSSL_ia32cap_P") - ret = append(ret, ".type OPENSSL_ia32cap_addr,@object") - ret = append(ret, ".size OPENSSL_ia32cap_addr,8") - ret = append(ret, "OPENSSL_ia32cap_addr:") - ret = append(ret, "\t.quad OPENSSL_ia32cap_P") + ret = append(ret, ".type OPENSSL_ia32cap_addr_delta,@object") + ret = append(ret, ".size OPENSSL_ia32cap_addr_delta,8") + ret = append(ret, "OPENSSL_ia32cap_addr_delta:") + ret = append(ret, "\t.quad OPENSSL_ia32cap_P-OPENSSL_ia32cap_addr_delta") } // Emit accessors for thread-local offsets. diff --git a/crypto/fipsmodule/modes/asm/ghash-x86_64.pl b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl index 1a74edfc..1778ac0b 100644 --- a/crypto/fipsmodule/modes/asm/ghash-x86_64.pl +++ b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl @@ -212,7 +212,7 @@ ___ $code=<<___; .text -.extern OPENSSL_ia32cap_addr +.extern OPENSSL_ia32cap_P .globl gcm_gmult_4bit .type gcm_gmult_4bit,\@function,2 @@ -644,7 +644,7 @@ if ($do4xaggr) { my ($Xl,$Xm,$Xh,$Hkey3,$Hkey4)=map("%xmm$_",(11..15)); $code.=<<___; - mov OPENSSL_ia32cap_addr(%rip),%rax + leaq OPENSSL_ia32cap_P(%rip),%rax mov 4(%rax),%eax cmp \$0x30,$len jb .Lskip4x diff --git a/crypto/fipsmodule/sha/asm/sha1-x86_64.pl b/crypto/fipsmodule/sha/asm/sha1-x86_64.pl index 25a1cad0..b269e84d 100755 --- a/crypto/fipsmodule/sha/asm/sha1-x86_64.pl +++ b/crypto/fipsmodule/sha/asm/sha1-x86_64.pl @@ -236,14 +236,13 @@ push(@xi,shift(@xi)); $code.=<<___; .text -.extern OPENSSL_ia32cap_addr +.extern OPENSSL_ia32cap_P .globl sha1_block_data_order .type sha1_block_data_order,\@function,3 .align 16 sha1_block_data_order: - lea OPENSSL_ia32cap_addr(%rip),%r10 - mov (%r10),%r10 + leaq OPENSSL_ia32cap_P(%rip),%r10 mov 0(%r10),%r9d mov 4(%r10),%r8d mov 8(%r10),%r10d diff --git a/crypto/fipsmodule/sha/asm/sha512-x86_64.pl b/crypto/fipsmodule/sha/asm/sha512-x86_64.pl index c6385958..7ad74916 100755 --- a/crypto/fipsmodule/sha/asm/sha512-x86_64.pl +++ b/crypto/fipsmodule/sha/asm/sha512-x86_64.pl @@ -249,15 +249,14 @@ ___ $code=<<___; .text -.extern OPENSSL_ia32cap_addr +.extern OPENSSL_ia32cap_P .globl $func .type $func,\@function,3 .align 16 $func: ___ $code.=<<___ if ($SZ==4 || $avx); - lea OPENSSL_ia32cap_addr(%rip),%r11 - mov (%r11),%r11 + leaq OPENSSL_ia32cap_P(%rip),%r11 mov 0(%r11),%r9d mov 4(%r11),%r10d mov 8(%r11),%r11d