This syncs up with OpenSSL master as ofkris/onging/CECPQ3_patch1550ea9d2b35
. The non-license non-spelling changes are CFI bits, which were added in upstream inb84460ad3a
. Change-Id: I42280985f834d5b9133eacafc8ff9dbd2f0ea59a Reviewed-on: https://boringssl-review.googlesource.com/25704 Reviewed-by: Adam Langley <agl@google.com>
@@ -1,7 +1,14 @@ | |||
#!/usr/bin/env perl | |||
#! /usr/bin/env perl | |||
# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. | |||
# | |||
# Licensed under the OpenSSL license (the "License"). You may not use | |||
# this file except in compliance with the License. You can obtain a copy | |||
# in the file LICENSE in the source distribution or at | |||
# https://www.openssl.org/source/license.html | |||
# | |||
# ==================================================================== | |||
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | |||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL | |||
# project. The module is, however, dual licensed under OpenSSL and | |||
# CRYPTOGAMS licenses depending on where you obtain it. For further | |||
# details see http://www.openssl.org/~appro/cryptogams/. | |||
@@ -590,13 +597,21 @@ $code.=<<___; | |||
.type asm_AES_encrypt,\@function,3 | |||
.hidden asm_AES_encrypt | |||
asm_AES_encrypt: | |||
.cfi_startproc | |||
mov %rsp,%rax | |||
.cfi_def_cfa_register %rax | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
# allocate frame "above" key schedule | |||
lea -63(%rdx),%rcx # %rdx is key argument | |||
@@ -609,6 +624,7 @@ asm_AES_encrypt: | |||
mov %rsi,16(%rsp) # save out | |||
mov %rax,24(%rsp) # save original stack pointer | |||
.cfi_cfa_expression %rsp+24,deref,+8 | |||
.Lenc_prologue: | |||
mov %rdx,$key | |||
@@ -635,20 +651,29 @@ asm_AES_encrypt: | |||
mov 16(%rsp),$out # restore out | |||
mov 24(%rsp),%rsi # restore saved stack pointer | |||
.cfi_def_cfa %rsi,8 | |||
mov $s0,0($out) # write output vector | |||
mov $s1,4($out) | |||
mov $s2,8($out) | |||
mov $s3,12($out) | |||
mov -48(%rsi),%r15 | |||
.cfi_restore %r15 | |||
mov -40(%rsi),%r14 | |||
.cfi_restore %r14 | |||
mov -32(%rsi),%r13 | |||
.cfi_restore %r13 | |||
mov -24(%rsi),%r12 | |||
.cfi_restore %r12 | |||
mov -16(%rsi),%rbp | |||
.cfi_restore %rbp | |||
mov -8(%rsi),%rbx | |||
.cfi_restore %rbx | |||
lea (%rsi),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lenc_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size asm_AES_encrypt,.-asm_AES_encrypt | |||
___ | |||
@@ -1186,13 +1211,21 @@ $code.=<<___; | |||
.type asm_AES_decrypt,\@function,3 | |||
.hidden asm_AES_decrypt | |||
asm_AES_decrypt: | |||
.cfi_startproc | |||
mov %rsp,%rax | |||
.cfi_def_cfa_register %rax | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
# allocate frame "above" key schedule | |||
lea -63(%rdx),%rcx # %rdx is key argument | |||
@@ -1205,6 +1238,7 @@ asm_AES_decrypt: | |||
mov %rsi,16(%rsp) # save out | |||
mov %rax,24(%rsp) # save original stack pointer | |||
.cfi_cfa_expression %rsp+24,deref,+8 | |||
.Ldec_prologue: | |||
mov %rdx,$key | |||
@@ -1233,20 +1267,29 @@ asm_AES_decrypt: | |||
mov 16(%rsp),$out # restore out | |||
mov 24(%rsp),%rsi # restore saved stack pointer | |||
.cfi_def_cfa %rsi,8 | |||
mov $s0,0($out) # write output vector | |||
mov $s1,4($out) | |||
mov $s2,8($out) | |||
mov $s3,12($out) | |||
mov -48(%rsi),%r15 | |||
.cfi_restore %r15 | |||
mov -40(%rsi),%r14 | |||
.cfi_restore %r14 | |||
mov -32(%rsi),%r13 | |||
.cfi_restore %r13 | |||
mov -24(%rsi),%r12 | |||
.cfi_restore %r12 | |||
mov -16(%rsi),%rbp | |||
.cfi_restore %rbp | |||
mov -8(%rsi),%rbx | |||
.cfi_restore %rbx | |||
lea (%rsi),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Ldec_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size asm_AES_decrypt,.-asm_AES_decrypt | |||
___ | |||
#------------------------------------------------------------------# | |||
@@ -1284,22 +1327,34 @@ $code.=<<___; | |||
.globl asm_AES_set_encrypt_key | |||
.type asm_AES_set_encrypt_key,\@function,3 | |||
asm_AES_set_encrypt_key: | |||
.cfi_startproc | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 # redundant, but allows to share | |||
.cfi_push %r12 | |||
push %r13 # exception handler... | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
sub \$8,%rsp | |||
.cfi_adjust_cfa_offset 8 | |||
.Lenc_key_prologue: | |||
call _x86_64_AES_set_encrypt_key | |||
mov 40(%rsp),%rbp | |||
.cfi_restore %rbp | |||
mov 48(%rsp),%rbx | |||
.cfi_restore %rbx | |||
add \$56,%rsp | |||
.cfi_adjust_cfa_offset -56 | |||
.Lenc_key_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size asm_AES_set_encrypt_key,.-asm_AES_set_encrypt_key | |||
.type _x86_64_AES_set_encrypt_key,\@abi-omnipotent | |||
@@ -1549,13 +1604,21 @@ $code.=<<___; | |||
.globl asm_AES_set_decrypt_key | |||
.type asm_AES_set_decrypt_key,\@function,3 | |||
asm_AES_set_decrypt_key: | |||
.cfi_startproc | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
push %rdx # save key schedule | |||
.cfi_adjust_cfa_offset 8 | |||
.Ldec_key_prologue: | |||
call _x86_64_AES_set_encrypt_key | |||
@@ -1609,14 +1672,22 @@ $code.=<<___; | |||
xor %rax,%rax | |||
.Labort: | |||
mov 8(%rsp),%r15 | |||
.cfi_restore %r15 | |||
mov 16(%rsp),%r14 | |||
.cfi_restore %r14 | |||
mov 24(%rsp),%r13 | |||
.cfi_restore %r13 | |||
mov 32(%rsp),%r12 | |||
.cfi_restore %r12 | |||
mov 40(%rsp),%rbp | |||
.cfi_restore %rbp | |||
mov 48(%rsp),%rbx | |||
.cfi_restore %rbx | |||
add \$56,%rsp | |||
.cfi_adjust_cfa_offset -56 | |||
.Ldec_key_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size asm_AES_set_decrypt_key,.-asm_AES_set_decrypt_key | |||
___ | |||
@@ -1645,15 +1716,23 @@ $code.=<<___; | |||
.extern OPENSSL_ia32cap_P | |||
.hidden asm_AES_cbc_encrypt | |||
asm_AES_cbc_encrypt: | |||
.cfi_startproc | |||
cmp \$0,%rdx # check length | |||
je .Lcbc_epilogue | |||
pushfq | |||
.cfi_push 49 # %rflags | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
.Lcbc_prologue: | |||
cld | |||
@@ -1699,8 +1778,10 @@ asm_AES_cbc_encrypt: | |||
.Lcbc_te_ok: | |||
xchg %rsp,$key | |||
.cfi_def_cfa_register $key | |||
#add \$8,%rsp # reserve for return address! | |||
mov $key,$_rsp # save %rsp | |||
.cfi_cfa_expression $_rsp,deref,+64 | |||
.Lcbc_fast_body: | |||
mov %rdi,$_inp # save copy of inp | |||
mov %rsi,$_out # save copy of out | |||
@@ -1930,7 +2011,7 @@ asm_AES_cbc_encrypt: | |||
lea ($key,%rax),%rax | |||
mov %rax,$keyend | |||
# pick Te4 copy which can't "overlap" with stack frame or key scdedule | |||
# pick Te4 copy which can't "overlap" with stack frame or key schedule | |||
lea 2048($sbox),$sbox | |||
lea 768-8(%rsp),%rax | |||
sub $sbox,%rax | |||
@@ -2082,17 +2163,27 @@ asm_AES_cbc_encrypt: | |||
.align 16 | |||
.Lcbc_exit: | |||
mov $_rsp,%rsi | |||
.cfi_def_cfa %rsi,64 | |||
mov (%rsi),%r15 | |||
.cfi_restore %r15 | |||
mov 8(%rsi),%r14 | |||
.cfi_restore %r14 | |||
mov 16(%rsi),%r13 | |||
.cfi_restore %r13 | |||
mov 24(%rsi),%r12 | |||
.cfi_restore %r12 | |||
mov 32(%rsi),%rbp | |||
.cfi_restore %rbp | |||
mov 40(%rsi),%rbx | |||
.cfi_restore %rbx | |||
lea 48(%rsi),%rsp | |||
.cfi_def_cfa %rsp,16 | |||
.Lcbc_popfq: | |||
popfq | |||
.cfi_pop 49 # %rflags | |||
.Lcbc_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size asm_AES_cbc_encrypt,.-asm_AES_cbc_encrypt | |||
___ | |||
} | |||
@@ -60,7 +60,7 @@ | |||
# identical to CBC, because CBC-MAC is essentially CBC encrypt without | |||
# saving output. CCM CTR "stays invisible," because it's neatly | |||
# interleaved wih CBC-MAC. This provides ~30% improvement over | |||
# "straghtforward" CCM implementation with CTR and CBC-MAC performed | |||
# "straightforward" CCM implementation with CTR and CBC-MAC performed | |||
# disjointly. Parallelizable modes practically achieve the theoretical | |||
# limit. | |||
# | |||
@@ -143,14 +143,14 @@ | |||
# asymptotic, if it can be surpassed, isn't it? What happens there? | |||
# Rewind to CBC paragraph for the answer. Yes, out-of-order execution | |||
# magic is responsible for this. Processor overlaps not only the | |||
# additional instructions with AES ones, but even AES instuctions | |||
# additional instructions with AES ones, but even AES instructions | |||
# processing adjacent triplets of independent blocks. In the 6x case | |||
# additional instructions still claim disproportionally small amount | |||
# of additional cycles, but in 8x case number of instructions must be | |||
# a tad too high for out-of-order logic to cope with, and AES unit | |||
# remains underutilized... As you can see 8x interleave is hardly | |||
# justifiable, so there no need to feel bad that 32-bit aesni-x86.pl | |||
# utilizies 6x interleave because of limited register bank capacity. | |||
# utilizes 6x interleave because of limited register bank capacity. | |||
# | |||
# Higher interleave factors do have negative impact on Westmere | |||
# performance. While for ECB mode it's negligible ~1.5%, other | |||
@@ -1182,6 +1182,7 @@ $code.=<<___; | |||
.type aesni_ctr32_encrypt_blocks,\@function,5 | |||
.align 16 | |||
aesni_ctr32_encrypt_blocks: | |||
.cfi_startproc | |||
cmp \$1,$len | |||
jne .Lctr32_bulk | |||
@@ -1204,7 +1205,9 @@ $code.=<<___; | |||
.align 16 | |||
.Lctr32_bulk: | |||
lea (%rsp),$key_ # use $key_ as frame pointer | |||
.cfi_def_cfa_register $key_ | |||
push %rbp | |||
.cfi_push %rbp | |||
sub \$$frame_size,%rsp | |||
and \$-16,%rsp # Linux kernel stack can be incorrectly seeded | |||
___ | |||
@@ -1548,7 +1551,7 @@ $code.=<<___; | |||
sub \$8,$len | |||
jnc .Lctr32_loop8 # loop if $len-=8 didn't borrow | |||
add \$8,$len # restore real remainig $len | |||
add \$8,$len # restore real remaining $len | |||
jz .Lctr32_done # done if ($len==0) | |||
lea -0x80($key),$key | |||
@@ -1665,7 +1668,7 @@ $code.=<<___; | |||
movups $inout2,0x20($out) # $len was 3, stop store | |||
.Lctr32_done: | |||
xorps %xmm0,%xmm0 # clear regiser bank | |||
xorps %xmm0,%xmm0 # clear register bank | |||
xor $key0,$key0 | |||
pxor %xmm1,%xmm1 | |||
pxor %xmm2,%xmm2 | |||
@@ -1725,9 +1728,12 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov -8($key_),%rbp | |||
.cfi_restore %rbp | |||
lea ($key_),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lctr32_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks | |||
___ | |||
} | |||
@@ -1749,8 +1755,11 @@ $code.=<<___; | |||
.type aesni_xts_encrypt,\@function,6 | |||
.align 16 | |||
aesni_xts_encrypt: | |||
.cfi_startproc | |||
lea (%rsp),%r11 # frame pointer | |||
.cfi_def_cfa_register %r11 | |||
push %rbp | |||
.cfi_push %rbp | |||
sub \$$frame_size,%rsp | |||
and \$-16,%rsp # Linux kernel stack can be incorrectly seeded | |||
___ | |||
@@ -1848,7 +1857,7 @@ $code.=<<___; | |||
lea `16*6`($inp),$inp | |||
pxor $twmask,$inout5 | |||
pxor $twres,@tweak[0] # calclulate tweaks^round[last] | |||
pxor $twres,@tweak[0] # calculate tweaks^round[last] | |||
aesenc $rndkey1,$inout4 | |||
pxor $twres,@tweak[1] | |||
movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks^round[last] | |||
@@ -2215,9 +2224,12 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov -8(%r11),%rbp | |||
.cfi_restore %rbp | |||
lea (%r11),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lxts_enc_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size aesni_xts_encrypt,.-aesni_xts_encrypt | |||
___ | |||
@@ -2226,8 +2238,11 @@ $code.=<<___; | |||
.type aesni_xts_decrypt,\@function,6 | |||
.align 16 | |||
aesni_xts_decrypt: | |||
.cfi_startproc | |||
lea (%rsp),%r11 # frame pointer | |||
.cfi_def_cfa_register %r11 | |||
push %rbp | |||
.cfi_push %rbp | |||
sub \$$frame_size,%rsp | |||
and \$-16,%rsp # Linux kernel stack can be incorrectly seeded | |||
___ | |||
@@ -2328,7 +2343,7 @@ $code.=<<___; | |||
lea `16*6`($inp),$inp | |||
pxor $twmask,$inout5 | |||
pxor $twres,@tweak[0] # calclulate tweaks^round[last] | |||
pxor $twres,@tweak[0] # calculate tweaks^round[last] | |||
aesdec $rndkey1,$inout4 | |||
pxor $twres,@tweak[1] | |||
movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks^last round key | |||
@@ -2718,9 +2733,12 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov -8(%r11),%rbp | |||
.cfi_restore %rbp | |||
lea (%r11),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lxts_dec_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size aesni_xts_decrypt,.-aesni_xts_decrypt | |||
___ | |||
} | |||
@@ -2745,12 +2763,18 @@ $code.=<<___; | |||
.type aesni_ocb_encrypt,\@function,6 | |||
.align 32 | |||
aesni_ocb_encrypt: | |||
.cfi_startproc | |||
lea (%rsp),%rax | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
___ | |||
$code.=<<___ if ($win64); | |||
lea -0xa0(%rsp),%rsp | |||
@@ -2945,6 +2969,7 @@ $code.=<<___ if (!$win64); | |||
pxor %xmm14,%xmm14 | |||
pxor %xmm15,%xmm15 | |||
lea 0x28(%rsp),%rax | |||
.cfi_def_cfa %rax,8 | |||
___ | |||
$code.=<<___ if ($win64); | |||
movaps 0x00(%rsp),%xmm6 | |||
@@ -2972,13 +2997,20 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov -40(%rax),%r14 | |||
.cfi_restore %r14 | |||
mov -32(%rax),%r13 | |||
.cfi_restore %r13 | |||
mov -24(%rax),%r12 | |||
.cfi_restore %r12 | |||
mov -16(%rax),%rbp | |||
.cfi_restore %rbp | |||
mov -8(%rax),%rbx | |||
.cfi_restore %rbx | |||
lea (%rax),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Locb_enc_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size aesni_ocb_encrypt,.-aesni_ocb_encrypt | |||
.type __ocb_encrypt6,\@abi-omnipotent | |||
@@ -3191,12 +3223,18 @@ __ocb_encrypt1: | |||
.type aesni_ocb_decrypt,\@function,6 | |||
.align 32 | |||
aesni_ocb_decrypt: | |||
.cfi_startproc | |||
lea (%rsp),%rax | |||
push %rbx | |||
.cfi_push %rbx | |||
push %rbp | |||
.cfi_push %rbp | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
___ | |||
$code.=<<___ if ($win64); | |||
lea -0xa0(%rsp),%rsp | |||
@@ -3413,6 +3451,7 @@ $code.=<<___ if (!$win64); | |||
pxor %xmm14,%xmm14 | |||
pxor %xmm15,%xmm15 | |||
lea 0x28(%rsp),%rax | |||
.cfi_def_cfa %rax,8 | |||
___ | |||
$code.=<<___ if ($win64); | |||
movaps 0x00(%rsp),%xmm6 | |||
@@ -3440,13 +3479,20 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov -40(%rax),%r14 | |||
.cfi_restore %r14 | |||
mov -32(%rax),%r13 | |||
.cfi_restore %r13 | |||
mov -24(%rax),%r12 | |||
.cfi_restore %r12 | |||
mov -16(%rax),%rbp | |||
.cfi_restore %rbp | |||
mov -8(%rax),%rbx | |||
.cfi_restore %rbx | |||
lea (%rax),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Locb_dec_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size aesni_ocb_decrypt,.-aesni_ocb_decrypt | |||
.type __ocb_decrypt6,\@abi-omnipotent | |||
@@ -3659,6 +3705,7 @@ $code.=<<___; | |||
.type ${PREFIX}_cbc_encrypt,\@function,6 | |||
.align 16 | |||
${PREFIX}_cbc_encrypt: | |||
.cfi_startproc | |||
test $len,$len # check length | |||
jz .Lcbc_ret | |||
@@ -3735,7 +3782,9 @@ $code.=<<___; | |||
.align 16 | |||
.Lcbc_decrypt_bulk: | |||
lea (%rsp),%r11 # frame pointer | |||
.cfi_def_cfa_register %r11 | |||
push %rbp | |||
.cfi_push %rbp | |||
sub \$$frame_size,%rsp | |||
and \$-16,%rsp # Linux kernel stack can be incorrectly seeded | |||
___ | |||
@@ -4179,9 +4228,12 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov -8(%r11),%rbp | |||
.cfi_restore %rbp | |||
lea (%r11),%rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lcbc_ret: | |||
ret | |||
.cfi_endproc | |||
.size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt | |||
___ | |||
} | |||
@@ -4202,7 +4254,9 @@ $code.=<<___; | |||
.type ${PREFIX}_set_decrypt_key,\@abi-omnipotent | |||
.align 16 | |||
${PREFIX}_set_decrypt_key: | |||
.cfi_startproc | |||
.byte 0x48,0x83,0xEC,0x08 # sub rsp,8 | |||
.cfi_adjust_cfa_offset 8 | |||
call __aesni_set_encrypt_key | |||
shl \$4,$bits # rounds-1 after _aesni_set_encrypt_key | |||
test %eax,%eax | |||
@@ -4235,15 +4289,16 @@ ${PREFIX}_set_decrypt_key: | |||
pxor %xmm0,%xmm0 | |||
.Ldec_key_ret: | |||
add \$8,%rsp | |||
.cfi_adjust_cfa_offset -8 | |||
ret | |||
.cfi_endproc | |||
.LSEH_end_set_decrypt_key: | |||
.size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key | |||
___ | |||
# This is based on submission by | |||
# | |||
# Huang Ying <ying.huang@intel.com> | |||
# Vinodh Gopal <vinodh.gopal@intel.com> | |||
# This is based on submission from Intel by | |||
# Huang Ying | |||
# Vinodh Gopal | |||
# Kahraman Akdemir | |||
# | |||
# Aggressively optimized in respect to aeskeygenassist's critical path | |||
@@ -4271,7 +4326,9 @@ $code.=<<___; | |||
.align 16 | |||
${PREFIX}_set_encrypt_key: | |||
__aesni_set_encrypt_key: | |||
.cfi_startproc | |||
.byte 0x48,0x83,0xEC,0x08 # sub rsp,8 | |||
.cfi_adjust_cfa_offset 8 | |||
mov \$-1,%rax | |||
test $inp,$inp | |||
jz .Lenc_key_ret | |||
@@ -4461,7 +4518,7 @@ __aesni_set_encrypt_key: | |||
.align 16 | |||
.L14rounds: | |||
movups 16($inp),%xmm2 # remaning half of *userKey | |||
movups 16($inp),%xmm2 # remaining half of *userKey | |||
mov \$13,$bits # 14 rounds for 256 | |||
lea 16(%rax),%rax | |||
cmp \$`1<<28`,%r10d # AVX, but no XOP | |||
@@ -4565,7 +4622,9 @@ __aesni_set_encrypt_key: | |||
pxor %xmm4,%xmm4 | |||
pxor %xmm5,%xmm5 | |||
add \$8,%rsp | |||
.cfi_adjust_cfa_offset -8 | |||
ret | |||
.cfi_endproc | |||
.LSEH_end_set_encrypt_key: | |||
.align 16 | |||
@@ -929,7 +929,7 @@ if ($flavour =~ /64/) { ######## 64-bit code | |||
s/^(\s+)v/$1/o or # strip off v prefix | |||
s/\bbx\s+lr\b/ret/o; | |||
# fix up remainig legacy suffixes | |||
# fix up remaining legacy suffixes | |||
s/\.[ui]?8//o; | |||
m/\],#8/o and s/\.16b/\.8b/go; | |||
s/\.[ui]?32//o and s/\.16b/\.4s/go; | |||
@@ -988,7 +988,7 @@ if ($flavour =~ /64/) { ######## 64-bit code | |||
s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers | |||
s/\/\/\s?/@ /o; # new->old style commentary | |||
# fix up remainig new-style suffixes | |||
# fix up remaining new-style suffixes | |||
s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo or | |||
s/\],#[0-9]+/]!/o; | |||
@@ -1,4 +1,11 @@ | |||
#!/usr/bin/env perl | |||
#! /usr/bin/env perl | |||
# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. | |||
# | |||
# Licensed under the OpenSSL license (the "License"). You may not use | |||
# this file except in compliance with the License. You can obtain a copy | |||
# in the file LICENSE in the source distribution or at | |||
# https://www.openssl.org/source/license.html | |||
################################################################### | |||
### AES-128 [originally in CTR mode] ### | |||
@@ -1158,15 +1165,23 @@ $code.=<<___; | |||
.type bsaes_ecb_encrypt_blocks,\@abi-omnipotent | |||
.align 16 | |||
bsaes_ecb_encrypt_blocks: | |||
.cfi_startproc | |||
mov %rsp, %rax | |||
.Lecb_enc_prologue: | |||
push %rbp | |||
.cfi_push %rbp | |||
push %rbx | |||
.cfi_push %rbx | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
lea -0x48(%rsp),%rsp | |||
.cfi_adjust_cfa_offset 0x48 | |||
___ | |||
$code.=<<___ if ($win64); | |||
lea -0xa0(%rsp), %rsp | |||
@@ -1184,6 +1199,7 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov %rsp,%rbp # backup %rsp | |||
.cfi_def_cfa_register %rbp | |||
mov 240($arg4),%eax # rounds | |||
mov $arg1,$inp # backup arguments | |||
mov $arg2,$out | |||
@@ -1328,6 +1344,7 @@ $code.=<<___; | |||
jb .Lecb_enc_bzero | |||
lea 0x78(%rbp),%rax | |||
.cfi_def_cfa %rax,8 | |||
___ | |||
$code.=<<___ if ($win64); | |||
movaps 0x40(%rbp), %xmm6 | |||
@@ -1345,29 +1362,45 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov -48(%rax), %r15 | |||
.cfi_restore %r15 | |||
mov -40(%rax), %r14 | |||
.cfi_restore %r14 | |||
mov -32(%rax), %r13 | |||
.cfi_restore %r13 | |||
mov -24(%rax), %r12 | |||
.cfi_restore %r12 | |||
mov -16(%rax), %rbx | |||
.cfi_restore %rbx | |||
mov -8(%rax), %rbp | |||
.cfi_restore %rbp | |||
lea (%rax), %rsp # restore %rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lecb_enc_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size bsaes_ecb_encrypt_blocks,.-bsaes_ecb_encrypt_blocks | |||
.globl bsaes_ecb_decrypt_blocks | |||
.type bsaes_ecb_decrypt_blocks,\@abi-omnipotent | |||
.align 16 | |||
bsaes_ecb_decrypt_blocks: | |||
.cfi_startproc | |||
mov %rsp, %rax | |||
.Lecb_dec_prologue: | |||
push %rbp | |||
.cfi_push %rbp | |||
push %rbx | |||
.cfi_push %rbx | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
lea -0x48(%rsp),%rsp | |||
.cfi_adjust_cfa_offset 0x48 | |||
___ | |||
$code.=<<___ if ($win64); | |||
lea -0xa0(%rsp), %rsp | |||
@@ -1385,6 +1418,7 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov %rsp,%rbp # backup %rsp | |||
.cfi_def_cfa_register %rbp | |||
mov 240($arg4),%eax # rounds | |||
mov $arg1,$inp # backup arguments | |||
mov $arg2,$out | |||
@@ -1530,6 +1564,7 @@ $code.=<<___; | |||
jb .Lecb_dec_bzero | |||
lea 0x78(%rbp),%rax | |||
.cfi_def_cfa %rax,8 | |||
___ | |||
$code.=<<___ if ($win64); | |||
movaps 0x40(%rbp), %xmm6 | |||
@@ -1547,14 +1582,22 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov -48(%rax), %r15 | |||
.cfi_restore %r15 | |||
mov -40(%rax), %r14 | |||
.cfi_restore %r14 | |||
mov -32(%rax), %r13 | |||
.cfi_restore %r13 | |||
mov -24(%rax), %r12 | |||
.cfi_restore %r12 | |||
mov -16(%rax), %rbx | |||
.cfi_restore %rbx | |||
mov -8(%rax), %rbp | |||
.cfi_restore %rbp | |||
lea (%rax), %rsp # restore %rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lecb_dec_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size bsaes_ecb_decrypt_blocks,.-bsaes_ecb_decrypt_blocks | |||
___ | |||
} | |||
@@ -1564,6 +1607,7 @@ $code.=<<___; | |||
.type bsaes_cbc_encrypt,\@abi-omnipotent | |||
.align 16 | |||
bsaes_cbc_encrypt: | |||
.cfi_startproc | |||
___ | |||
$code.=<<___ if ($win64); | |||
mov 48(%rsp),$arg6 # pull direction flag | |||
@@ -1577,12 +1621,19 @@ $code.=<<___; | |||
mov %rsp, %rax | |||
.Lcbc_dec_prologue: | |||
push %rbp | |||
.cfi_push %rbp | |||
push %rbx | |||
.cfi_push %rbx | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
lea -0x48(%rsp), %rsp | |||
.cfi_adjust_cfa_offset 0x48 | |||
___ | |||
$code.=<<___ if ($win64); | |||
mov 0xa0(%rsp),$arg5 # pull ivp | |||
@@ -1601,6 +1652,7 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov %rsp, %rbp # backup %rsp | |||
.cfi_def_cfa_register %rbp | |||
mov 240($arg4), %eax # rounds | |||
mov $arg1, $inp # backup arguments | |||
mov $arg2, $out | |||
@@ -1820,6 +1872,7 @@ $code.=<<___; | |||
ja .Lcbc_dec_bzero | |||
lea 0x78(%rbp),%rax | |||
.cfi_def_cfa %rax,8 | |||
___ | |||
$code.=<<___ if ($win64); | |||
movaps 0x40(%rbp), %xmm6 | |||
@@ -1837,29 +1890,45 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov -48(%rax), %r15 | |||
.cfi_restore %r15 | |||
mov -40(%rax), %r14 | |||
.cfi_restore %r14 | |||
mov -32(%rax), %r13 | |||
.cfi_restore %r13 | |||
mov -24(%rax), %r12 | |||
.cfi_restore %r12 | |||
mov -16(%rax), %rbx | |||
.cfi_restore %rbx | |||
mov -8(%rax), %rbp | |||
.cfi_restore %rbp | |||
lea (%rax), %rsp # restore %rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lcbc_dec_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt | |||
.globl bsaes_ctr32_encrypt_blocks | |||
.type bsaes_ctr32_encrypt_blocks,\@abi-omnipotent | |||
.align 16 | |||
bsaes_ctr32_encrypt_blocks: | |||
.cfi_startproc | |||
mov %rsp, %rax | |||
.Lctr_enc_prologue: | |||
push %rbp | |||
.cfi_push %rbp | |||
push %rbx | |||
.cfi_push %rbx | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
lea -0x48(%rsp), %rsp | |||
.cfi_adjust_cfa_offset 0x48 | |||
___ | |||
$code.=<<___ if ($win64); | |||
mov 0xa0(%rsp),$arg5 # pull ivp | |||
@@ -1878,6 +1947,7 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov %rsp, %rbp # backup %rsp | |||
.cfi_def_cfa_register %rbp | |||
movdqu ($arg5), %xmm0 # load counter | |||
mov 240($arg4), %eax # rounds | |||
mov $arg1, $inp # backup arguments | |||
@@ -2052,6 +2122,7 @@ $code.=<<___; | |||
ja .Lctr_enc_bzero | |||
lea 0x78(%rbp),%rax | |||
.cfi_def_cfa %rax,8 | |||
___ | |||
$code.=<<___ if ($win64); | |||
movaps 0x40(%rbp), %xmm6 | |||
@@ -2069,14 +2140,22 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov -48(%rax), %r15 | |||
.cfi_restore %r15 | |||
mov -40(%rax), %r14 | |||
.cfi_restore %r14 | |||
mov -32(%rax), %r13 | |||
.cfi_restore %r13 | |||
mov -24(%rax), %r12 | |||
.cfi_restore %r12 | |||
mov -16(%rax), %rbx | |||
.cfi_restore %rbx | |||
mov -8(%rax), %rbp | |||
.cfi_restore %rbp | |||
lea (%rax), %rsp # restore %rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lctr_enc_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks | |||
___ | |||
###################################################################### | |||
@@ -2092,15 +2171,23 @@ $code.=<<___; | |||
.type bsaes_xts_encrypt,\@abi-omnipotent | |||
.align 16 | |||
bsaes_xts_encrypt: | |||
.cfi_startproc | |||
mov %rsp, %rax | |||
.Lxts_enc_prologue: | |||
push %rbp | |||
.cfi_push %rbp | |||
push %rbx | |||
.cfi_push %rbx | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
lea -0x48(%rsp), %rsp | |||
.cfi_adjust_cfa_offset 0x48 | |||
___ | |||
$code.=<<___ if ($win64); | |||
mov 0xa0(%rsp),$arg5 # pull key2 | |||
@@ -2120,6 +2207,7 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov %rsp, %rbp # backup %rsp | |||
.cfi_def_cfa_register %rbp | |||
mov $arg1, $inp # backup arguments | |||
mov $arg2, $out | |||
mov $arg3, $len | |||
@@ -2442,6 +2530,7 @@ $code.=<<___; | |||
ja .Lxts_enc_bzero | |||
lea 0x78(%rbp),%rax | |||
.cfi_def_cfa %rax,8 | |||
___ | |||
$code.=<<___ if ($win64); | |||
movaps 0x40(%rbp), %xmm6 | |||
@@ -2459,29 +2548,45 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov -48(%rax), %r15 | |||
.cfi_restore %r15 | |||
mov -40(%rax), %r14 | |||
.cfi_restore %r14 | |||
mov -32(%rax), %r13 | |||
.cfi_restore %r13 | |||
mov -24(%rax), %r12 | |||
.cfi_restore %r12 | |||
mov -16(%rax), %rbx | |||
.cfi_restore %rbx | |||
mov -8(%rax), %rbp | |||
.cfi_restore %rbp | |||
lea (%rax), %rsp # restore %rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lxts_enc_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size bsaes_xts_encrypt,.-bsaes_xts_encrypt | |||
.globl bsaes_xts_decrypt | |||
.type bsaes_xts_decrypt,\@abi-omnipotent | |||
.align 16 | |||
bsaes_xts_decrypt: | |||
.cfi_startproc | |||
mov %rsp, %rax | |||
.Lxts_dec_prologue: | |||
push %rbp | |||
.cfi_push %rbp | |||
push %rbx | |||
.cfi_push %rbx | |||
push %r12 | |||
.cfi_push %r12 | |||
push %r13 | |||
.cfi_push %r13 | |||
push %r14 | |||
.cfi_push %r14 | |||
push %r15 | |||
.cfi_push %r15 | |||
lea -0x48(%rsp), %rsp | |||
.cfi_adjust_cfa_offset 0x48 | |||
___ | |||
$code.=<<___ if ($win64); | |||
mov 0xa0(%rsp),$arg5 # pull key2 | |||
@@ -2849,6 +2954,7 @@ $code.=<<___; | |||
ja .Lxts_dec_bzero | |||
lea 0x78(%rbp),%rax | |||
.cfi_def_cfa %rax,8 | |||
___ | |||
$code.=<<___ if ($win64); | |||
movaps 0x40(%rbp), %xmm6 | |||
@@ -2866,14 +2972,22 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
mov -48(%rax), %r15 | |||
.cfi_restore %r15 | |||
mov -40(%rax), %r14 | |||
.cfi_restore %r14 | |||
mov -32(%rax), %r13 | |||
.cfi_restore %r13 | |||
mov -24(%rax), %r12 | |||
.cfi_restore %r12 | |||
mov -16(%rax), %rbx | |||
.cfi_restore %rbx | |||
mov -8(%rax), %rbp | |||
.cfi_restore %rbp | |||
lea (%rax), %rsp # restore %rsp | |||
.cfi_def_cfa_register %rsp | |||
.Lxts_dec_epilogue: | |||
ret | |||
.cfi_endproc | |||
.size bsaes_xts_decrypt,.-bsaes_xts_decrypt | |||
___ | |||
} | |||