fb3f0638ba
perlasm's bizarre mix of asm and perl indentation and clever editors always mess me up. Change-Id: Iac906a636207867939cc327b4c21b8a982abce29 Reviewed-on: https://boringssl-review.googlesource.com/c/33844 Commit-Queue: Adam Langley <agl@google.com> Reviewed-by: Adam Langley <agl@google.com>
367 lines
11 KiB
Perl
Executable File
367 lines
11 KiB
Perl
Executable File
#!/usr/bin/env perl
|
|
# Copyright (c) 2018, Google Inc.
|
|
#
|
|
# Permission to use, copy, modify, and/or distribute this software for any
|
|
# purpose with or without fee is hereby granted, provided that the above
|
|
# copyright notice and this permission notice appear in all copies.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
|
# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
|
# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
# This file defines helper functions for crypto/test/abi_test.h on x86_64. See
|
|
# that header for details on how to use this.
|
|
#
|
|
# For convenience, this file is linked into libcrypto, where consuming builds
|
|
# already support architecture-specific sources. The static linker should drop
|
|
# this code in non-test binaries. This includes a shared library build of
|
|
# libcrypto, provided --gc-sections (ELF), -dead_strip (Mac), or equivalent is
|
|
# used.
|
|
#
|
|
# References:
|
|
#
|
|
# SysV ABI: https://github.com/hjl-tools/x86-psABI/wiki/x86-64-psABI-1.0.pdf
|
|
# Win64 ABI: https://docs.microsoft.com/en-us/cpp/build/x64-software-conventions?view=vs-2017
|
|
|
|
use strict;
|
|
|
|
my $flavour = shift;
|
|
my $output = shift;
|
|
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
|
|
|
|
my $win64 = 0;
|
|
$win64 = 1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
|
|
|
|
$0 =~ m/(.*[\/\\])[^\/\\]+$/;
|
|
my $dir = $1;
|
|
my $xlate;
|
|
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
|
|
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
|
|
die "can't locate x86_64-xlate.pl";
|
|
|
|
open OUT, "| \"$^X\" \"$xlate\" $flavour \"$output\"";
|
|
*STDOUT = *OUT;
|
|
|
|
# @inp is the registers used for function inputs, in order.
|
|
my @inp = $win64 ? ("%rcx", "%rdx", "%r8", "%r9") :
|
|
("%rdi", "%rsi", "%rdx", "%rcx", "%r8", "%r9");
|
|
|
|
# @caller_state is the list of registers that the callee must preserve for the
|
|
# caller. This must match the definition of CallerState in abi_test.h.
|
|
my @caller_state = ("%rbx", "%rbp", "%r12", "%r13", "%r14", "%r15");
|
|
if ($win64) {
|
|
@caller_state = ("%rbx", "%rbp", "%rdi", "%rsi", "%r12", "%r13", "%r14",
|
|
"%r15", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10",
|
|
"%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15");
|
|
}
|
|
|
|
# $caller_state_size is the size of CallerState, in bytes.
|
|
my $caller_state_size = 0;
|
|
foreach (@caller_state) {
|
|
if (/^%r/) {
|
|
$caller_state_size += 8;
|
|
} elsif (/^%xmm/) {
|
|
$caller_state_size += 16;
|
|
} else {
|
|
die "unknown register $_";
|
|
}
|
|
}
|
|
|
|
# load_caller_state returns code which loads a CallerState structure at
|
|
# $off($reg) into the respective registers. No other registers are touched, but
|
|
# $reg may not be a register in CallerState. $cb is an optional callback to
|
|
# add extra lines after each movq or movdqa. $cb is passed the offset, relative
|
|
# to $reg, and name of each register.
|
|
sub load_caller_state {
|
|
my ($off, $reg, $cb) = @_;
|
|
my $ret = "";
|
|
foreach (@caller_state) {
|
|
my $old_off = $off;
|
|
if (/^%r/) {
|
|
$ret .= "\tmovq\t$off($reg), $_\n";
|
|
$off += 8;
|
|
} elsif (/^%xmm/) {
|
|
$ret .= "\tmovdqa\t$off($reg), $_\n";
|
|
$off += 16;
|
|
} else {
|
|
die "unknown register $_";
|
|
}
|
|
$ret .= $cb->($old_off, $_) if (defined($cb));
|
|
}
|
|
return $ret;
|
|
}
|
|
|
|
# store_caller_state behaves like load_caller_state, except that it writes the
|
|
# current values of the registers into $off($reg).
|
|
sub store_caller_state {
|
|
my ($off, $reg, $cb) = @_;
|
|
my $ret = "";
|
|
foreach (@caller_state) {
|
|
my $old_off = $off;
|
|
if (/^%r/) {
|
|
$ret .= "\tmovq\t$_, $off($reg)\n";
|
|
$off += 8;
|
|
} elsif (/^%xmm/) {
|
|
$ret .= "\tmovdqa\t$_, $off($reg)\n";
|
|
$off += 16;
|
|
} else {
|
|
die "unknown register $_";
|
|
}
|
|
$ret .= $cb->($old_off, $_) if (defined($cb));
|
|
}
|
|
return $ret;
|
|
}
|
|
|
|
# $max_params is the maximum number of parameters abi_test_trampoline supports.
|
|
my $max_params = 10;
|
|
|
|
# Windows reserves stack space for the register-based parameters, while SysV
|
|
# only reserves space for the overflow ones.
|
|
my $stack_params_skip = $win64 ? scalar(@inp) : 0;
|
|
my $num_stack_params = $win64 ? $max_params : $max_params - scalar(@inp);
|
|
|
|
my ($func, $state, $argv, $argc) = @inp;
|
|
my $code = <<____;
|
|
.text
|
|
|
|
# abi_test_trampoline loads callee-saved registers from |state|, calls |func|
|
|
# with |argv|, then saves the callee-saved registers into |state|. It returns
|
|
# the result of |func|.
|
|
# uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
|
|
# const uint64_t *argv, size_t argc);
|
|
.type abi_test_trampoline, \@abi-omnipotent
|
|
.globl abi_test_trampoline
|
|
.align 16
|
|
abi_test_trampoline:
|
|
.Labi_test_trampoline_begin:
|
|
.cfi_startproc
|
|
# Stack layout:
|
|
# 8 bytes - align
|
|
# $caller_state_size bytes - saved caller registers
|
|
# 8 bytes - scratch space
|
|
# 8 bytes - saved copy of \$state
|
|
# 8 bytes - saved copy of \$func
|
|
# 8 bytes - if needed for stack alignment
|
|
# 8*$num_stack_params bytes - parameters for \$func
|
|
____
|
|
my $stack_alloc_size = 8 + $caller_state_size + 8*3 + 8*$num_stack_params;
|
|
# SysV and Windows both require the stack to be 16-byte-aligned. The call
|
|
# instruction offsets it by 8, so stack allocations must be 8 mod 16.
|
|
if ($stack_alloc_size % 16 != 8) {
|
|
$num_stack_params++;
|
|
$stack_alloc_size += 8;
|
|
}
|
|
my $stack_params_offset = 8 * $stack_params_skip;
|
|
my $func_offset = 8 * $num_stack_params;
|
|
my $state_offset = $func_offset + 8;
|
|
my $scratch_offset = $state_offset + 8;
|
|
my $caller_state_offset = $scratch_offset + 8;
|
|
$code .= <<____;
|
|
subq \$$stack_alloc_size, %rsp
|
|
.cfi_adjust_cfa_offset $stack_alloc_size
|
|
.Labi_test_trampoline_prolog_alloc:
|
|
____
|
|
# Store our caller's state. This is needed because we modify it ourselves, and
|
|
# also to isolate the test infrastruction from the function under test failing
|
|
# to save some register.
|
|
my %reg_offsets;
|
|
$code .= store_caller_state($caller_state_offset, "%rsp", sub {
|
|
my ($off, $reg) = @_;
|
|
$reg = substr($reg, 1);
|
|
$reg_offsets{$reg} = $off;
|
|
$off -= $stack_alloc_size + 8;
|
|
return <<____;
|
|
.cfi_offset $reg, $off
|
|
.Labi_test_trampoline_prolog_$reg:
|
|
____
|
|
});
|
|
$code .= <<____;
|
|
.Labi_test_trampoline_prolog_end:
|
|
____
|
|
|
|
$code .= load_caller_state(0, $state);
|
|
$code .= <<____;
|
|
# Stash \$func and \$state, so they are available after the call returns.
|
|
movq $func, $func_offset(%rsp)
|
|
movq $state, $state_offset(%rsp)
|
|
|
|
# Load parameters. Note this will clobber \$argv and \$argc, so we can
|
|
# only use non-parameter volatile registers. There are three, and they
|
|
# are the same between SysV and Win64: %rax, %r10, and %r11.
|
|
movq $argv, %r10
|
|
movq $argc, %r11
|
|
____
|
|
foreach (@inp) {
|
|
$code .= <<____;
|
|
dec %r11
|
|
js .Lcall
|
|
movq (%r10), $_
|
|
addq \$8, %r10
|
|
____
|
|
}
|
|
$code .= <<____;
|
|
leaq $stack_params_offset(%rsp), %rax
|
|
.Largs_loop:
|
|
dec %r11
|
|
js .Lcall
|
|
|
|
# This block should be:
|
|
# movq (%r10), %rtmp
|
|
# movq %rtmp, (%rax)
|
|
# There are no spare registers available, so we spill into the scratch
|
|
# space.
|
|
movq %r11, $scratch_offset(%rsp)
|
|
movq (%r10), %r11
|
|
movq %r11, (%rax)
|
|
movq $scratch_offset(%rsp), %r11
|
|
|
|
addq \$8, %r10
|
|
addq \$8, %rax
|
|
jmp .Largs_loop
|
|
|
|
.Lcall:
|
|
movq $func_offset(%rsp), %rax
|
|
call *%rax
|
|
|
|
# Store what \$func did our state, so our caller can check.
|
|
movq $state_offset(%rsp), $state
|
|
____
|
|
$code .= store_caller_state(0, $state);
|
|
|
|
# Restore our caller's state.
|
|
$code .= load_caller_state($caller_state_offset, "%rsp", sub {
|
|
my ($off, $reg) = @_;
|
|
$reg = substr($reg, 1);
|
|
return ".cfi_restore\t$reg\n";
|
|
});
|
|
$code .= <<____;
|
|
addq \$$stack_alloc_size, %rsp
|
|
.cfi_adjust_cfa_offset -$stack_alloc_size
|
|
|
|
# %rax already contains \$func's return value, unmodified.
|
|
ret
|
|
.cfi_endproc
|
|
.Labi_test_trampoline_end:
|
|
.size abi_test_trampoline,.-abi_test_trampoline
|
|
____
|
|
|
|
# abi_test_clobber_* zeros the corresponding register. These are used to test
|
|
# the ABI-testing framework.
|
|
foreach ("ax", "bx", "cx", "dx", "di", "si", "bp", 8..15) {
|
|
$code .= <<____;
|
|
.type abi_test_clobber_r$_, \@abi-omnipotent
|
|
.globl abi_test_clobber_r$_
|
|
.align 16
|
|
abi_test_clobber_r$_:
|
|
xorq %r$_, %r$_
|
|
ret
|
|
.size abi_test_clobber_r$_,.-abi_test_clobber_r$_
|
|
____
|
|
}
|
|
|
|
foreach (0..15) {
|
|
$code .= <<____;
|
|
.type abi_test_clobber_xmm$_, \@abi-omnipotent
|
|
.globl abi_test_clobber_xmm$_
|
|
.align 16
|
|
abi_test_clobber_xmm$_:
|
|
pxor %xmm$_, %xmm$_
|
|
ret
|
|
.size abi_test_clobber_xmm$_,.-abi_test_clobber_xmm$_
|
|
____
|
|
}
|
|
|
|
if ($win64) {
|
|
# Add unwind metadata for SEH.
|
|
#
|
|
# TODO(davidben): This is all manual right now. Once we've added SEH tests,
|
|
# add support for emitting these in x86_64-xlate.pl, probably based on MASM
|
|
# and Yasm's unwind directives, and unify with CFI. (Sadly, NASM does not
|
|
# support these directives.) Then push that upstream to replace the
|
|
# error-prone and non-standard custom handlers.
|
|
|
|
# See https://docs.microsoft.com/en-us/cpp/build/struct-unwind-code?view=vs-2017
|
|
my $UWOP_ALLOC_LARGE = 1;
|
|
my $UWOP_ALLOC_SMALL = 2;
|
|
my $UWOP_SAVE_NONVOL = 4;
|
|
my $UWOP_SAVE_XMM128 = 8;
|
|
|
|
my %UWOP_REG_NUMBER = (rax => 0, rcx => 1, rdx => 2, rbx => 3, rsp => 4,
|
|
rbp => 5, rsi => 6, rdi => 7,
|
|
map(("r$_" => $_), (8..15)));
|
|
|
|
my $unwind_codes = "";
|
|
my $num_slots = 0;
|
|
if ($stack_alloc_size <= 128) {
|
|
my $info = $UWOP_ALLOC_SMALL | ((($stack_alloc_size - 8) / 8) << 4);
|
|
$unwind_codes .= <<____;
|
|
.byte .Labi_test_trampoline_prolog_alloc-.Labi_test_trampoline_begin
|
|
.byte $info
|
|
____
|
|
$num_slots++;
|
|
} else {
|
|
die "stack allocation needs three unwind slots" if ($stack_alloc_size > 512 * 1024 + 8);
|
|
my $info = $UWOP_ALLOC_LARGE;
|
|
my $value = $stack_alloc_size / 8;
|
|
$unwind_codes .= <<____;
|
|
.byte .Labi_test_trampoline_prolog_alloc-.Labi_test_trampoline_begin
|
|
.byte $info
|
|
.value $value
|
|
____
|
|
$num_slots += 2;
|
|
}
|
|
|
|
foreach my $reg (@caller_state) {
|
|
$reg = substr($reg, 1);
|
|
die "unknown register $reg" unless exists($reg_offsets{$reg});
|
|
if ($reg =~ /^r/) {
|
|
die "unknown register $reg" unless exists($UWOP_REG_NUMBER{$reg});
|
|
my $info = $UWOP_SAVE_NONVOL | ($UWOP_REG_NUMBER{$reg} << 4);
|
|
my $value = $reg_offsets{$reg} / 8;
|
|
$unwind_codes .= <<____;
|
|
.byte .Labi_test_trampoline_prolog_$reg-.Labi_test_trampoline_begin
|
|
.byte $info
|
|
.value $value
|
|
____
|
|
$num_slots += 2;
|
|
} elsif ($reg =~ /^xmm/) {
|
|
my $info = $UWOP_SAVE_XMM128 | (substr($reg, 3) << 4);
|
|
my $value = $reg_offsets{$reg} / 16;
|
|
$unwind_codes .= <<____;
|
|
.byte .Labi_test_trampoline_prolog_$reg-.Labi_test_trampoline_begin
|
|
.byte $info
|
|
.value $value
|
|
____
|
|
$num_slots += 2;
|
|
} else {
|
|
die "unknown register $reg";
|
|
}
|
|
}
|
|
|
|
$code .= <<____;
|
|
.section .pdata
|
|
.align 4
|
|
# https://docs.microsoft.com/en-us/cpp/build/struct-runtime-function?view=vs-2017
|
|
.rva .Labi_test_trampoline_begin
|
|
.rva .Labi_test_trampoline_end
|
|
.rva .Labi_test_trampoline_info
|
|
|
|
.section .xdata
|
|
.align 8
|
|
.Labi_test_trampoline_info:
|
|
# https://docs.microsoft.com/en-us/cpp/build/struct-unwind-info?view=vs-2017
|
|
.byte 1 # version 1, no flags
|
|
.byte .Labi_test_trampoline_prolog_end-.Labi_test_trampoline_begin
|
|
.byte $num_slots
|
|
.byte 0 # no frame register
|
|
$unwind_codes
|
|
____
|
|
}
|
|
|
|
print $code;
|
|
close STDOUT;
|