Patch out the aes_nohw fallback in bsaes_cbc_encrypt.

This plugs all bsaes fallback leaks for CBC outside of the key schedule.
The CBC EVP_CIPHERs never call the block function directly when there's
a stream.cbc function available.

This affects CBC decryptions of length < 128 or 16 mod 128.
Performance-wise, we don't really care about CBC apart from passing
glances at its use in TLS. There, the Lucky13 workaround mutes the
effects.

Cortex-A53 (Raspberry Pi 3 Model B+)
Before:
Did 78000 AES-128-CBC-SHA1 (16 bytes) open operations in 3020254us (25825.6 ops/sec): 0.4 MB/s
Did 75000 AES-128-CBC-SHA1 (32 bytes) open operations in 3005760us (24952.1 ops/sec): 0.8 MB/s
Did 71000 AES-128-CBC-SHA1 (64 bytes) open operations in 3038137us (23369.6 ops/sec): 1.5 MB/s
Did 67000 AES-128-CBC-SHA1 (96 bytes) open operations in 3027686us (22129.1 ops/sec): 2.1 MB/s
Did 64000 AES-128-CBC-SHA1 (112 bytes) open operations in 3005491us (21294.4 ops/sec): 2.4 MB/s
Did 59000 AES-128-CBC-SHA1 (128 bytes) open operations in 3020083us (19535.9 ops/sec): 2.5 MB/s
Did 53000 AES-128-CBC-SHA1 (240 bytes) open operations in 3020105us (17549.1 ops/sec): 4.2 MB/s
After:
Did 71668 AES-128-CBC-SHA1 (16 bytes) open operations in 3020896us (23724.1 ops/sec): 0.4 MB/s
Did 71000 AES-128-CBC-SHA1 (32 bytes) open operations in 3040826us (23348.9 ops/sec): 0.7 MB/s
Did 68000 AES-128-CBC-SHA1 (64 bytes) open operations in 3009913us (22592.0 ops/sec): 1.4 MB/s
Did 66000 AES-128-CBC-SHA1 (96 bytes) open operations in 3007597us (21944.4 ops/sec): 2.1 MB/s
Did 59000 AES-128-CBC-SHA1 (112 bytes) open operations in 3002878us (19647.8 ops/sec): 2.2 MB/s
Did 59000 AES-128-CBC-SHA1 (128 bytes) open operations in 3046786us (19364.7 ops/sec): 2.5 MB/s
Did 50000 AES-128-CBC-SHA1 (240 bytes) open operations in 3043643us (16427.7 ops/sec): 3.9 MB/s

Penryn (Mac mini, mid 2010)
Before:
Did 152000 AES-128-CBC-SHA1 (16 bytes) open operations in 1004422us (151330.8 ops/sec): 2.4 MB/s
Did 143000 AES-128-CBC-SHA1 (32 bytes) open operations in 1000443us (142936.7 ops/sec): 4.6 MB/s
Did 136000 AES-128-CBC-SHA1 (48 bytes) open operations in 1006580us (135111.0 ops/sec): 6.5 MB/s
Did 146000 AES-128-CBC-SHA1 (96 bytes) open operations in 1005731us (145168.0 ops/sec): 13.9 MB/s
Did 138000 AES-128-CBC-SHA1 (112 bytes) open operations in 1003330us (137542.0 ops/sec): 15.4 MB/s
Did 133000 AES-128-CBC-SHA1 (128 bytes) open operations in 1005876us (132223.1 ops/sec): 16.9 MB/s
Did 117000 AES-128-CBC-SHA1 (240 bytes) open operations in 1004922us (116426.9 ops/sec): 27.9 MB/s
After:
Did 159000 AES-128-CBC-SHA1 (16 bytes) open operations in 1000505us (158919.7 ops/sec): 2.5 MB/s
Did 157000 AES-128-CBC-SHA1 (32 bytes) open operations in 1006091us (156049.5 ops/sec): 5.0 MB/s
Did 154000 AES-128-CBC-SHA1 (48 bytes) open operations in 1002720us (153582.3 ops/sec): 7.4 MB/s
Did 146000 AES-128-CBC-SHA1 (96 bytes) open operations in 1002567us (145626.2 ops/sec): 14.0 MB/s
Did 135000 AES-128-CBC-SHA1 (112 bytes) open operations in 1001212us (134836.6 ops/sec): 15.1 MB/s
Did 133000 AES-128-CBC-SHA1 (128 bytes) open operations in 1006441us (132148.8 ops/sec): 16.9 MB/s
Did 115000 AES-128-CBC-SHA1 (240 bytes) open operations in 1005246us (114399.9 ops/sec): 27.5 MB/s

Bug: 256
Change-Id: I864b4455ada0d4d245380fce6f869dabb0686354
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/35167
Reviewed-by: Adam Langley <agl@google.com>
This commit is contained in:
David Benjamin 2019-01-12 15:20:22 +00:00 committed by Adam Langley
parent 885a63fb74
commit 4851041967
3 changed files with 26 additions and 51 deletions

View File

@ -1113,26 +1113,12 @@ my ($inp,$out,$len,$key, $ivp,$fp,$rounds)=map("r$_",(0..3,8..10));
my ($keysched)=("sp"); my ($keysched)=("sp");
$code.=<<___; $code.=<<___;
@ TODO(davidben): This should be aes_nohw_cbc_encrypt, but that function does
@ not exist. Rather than add it, patch this fallback out. See
@ https://crbug.com/boringssl/256.
.extern AES_cbc_encrypt
.extern aes_nohw_decrypt
.global bsaes_cbc_encrypt .global bsaes_cbc_encrypt
.type bsaes_cbc_encrypt,%function .type bsaes_cbc_encrypt,%function
.align 5 .align 5
bsaes_cbc_encrypt: bsaes_cbc_encrypt:
#ifndef __KERNEL__ @ In OpenSSL, this function had a fallback to aes_nohw_cbc_encrypt for
cmp $len, #128 @ short inputs. We patch this out, using bsaes for all input sizes.
#ifndef __thumb__
blo AES_cbc_encrypt
#else
bhs 1f
b AES_cbc_encrypt
1:
#endif
#endif
@ it is up to the caller to make sure we are called with enc == 0 @ it is up to the caller to make sure we are called with enc == 0
@ -1230,10 +1216,7 @@ bsaes_cbc_encrypt:
adds $len, $len, #8 adds $len, $len, #8
beq .Lcbc_dec_done beq .Lcbc_dec_done
vld1.8 {@XMM[0]}, [$inp]! @ load input @ Set up most parameters for the _bsaes_decrypt8 call.
cmp $len, #2
blo .Lcbc_dec_one
vld1.8 {@XMM[1]}, [$inp]!
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY
mov r4, $keysched @ pass the key mov r4, $keysched @ pass the key
#else #else
@ -1241,6 +1224,11 @@ bsaes_cbc_encrypt:
#endif #endif
mov r5, $rounds mov r5, $rounds
vstmia $fp, {@XMM[15]} @ put aside IV vstmia $fp, {@XMM[15]} @ put aside IV
vld1.8 {@XMM[0]}, [$inp]! @ load input
cmp $len, #2
blo .Lcbc_dec_one
vld1.8 {@XMM[1]}, [$inp]!
beq .Lcbc_dec_two beq .Lcbc_dec_two
vld1.8 {@XMM[2]}, [$inp]! vld1.8 {@XMM[2]}, [$inp]!
cmp $len, #4 cmp $len, #4
@ -1358,16 +1346,11 @@ bsaes_cbc_encrypt:
.align 4 .align 4
.Lcbc_dec_one: .Lcbc_dec_one:
sub $inp, $inp, #0x10 sub $inp, $inp, #0x10
mov $rounds, $out @ save original out pointer bl _bsaes_decrypt8
mov $out, $fp @ use the iv scratch space as out buffer vldmia $fp, {@XMM[14]} @ reload IV
mov r2, $key vld1.8 {@XMM[15]}, [$inp]! @ reload input
vmov @XMM[4],@XMM[15] @ just in case ensure that IV veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV
vmov @XMM[5],@XMM[0] @ and input are preserved vst1.8 {@XMM[0]}, [$out]! @ write output
bl aes_nohw_decrypt
vld1.8 {@XMM[0]}, [$fp] @ load result
veor @XMM[0], @XMM[0], @XMM[4] @ ^= IV
vmov @XMM[15], @XMM[5] @ @XMM[5] holds input
vst1.8 {@XMM[0]}, [$rounds] @ write output
.Lcbc_dec_done: .Lcbc_dec_done:
#ifndef BSAES_ASM_EXTENDED_KEY #ifndef BSAES_ASM_EXTENDED_KEY

View File

@ -811,8 +811,6 @@ ___
$code.=<<___; $code.=<<___;
.text .text
.extern aes_nohw_decrypt
.type _bsaes_encrypt8,\@abi-omnipotent .type _bsaes_encrypt8,\@abi-omnipotent
.align 64 .align 64
_bsaes_encrypt8: _bsaes_encrypt8:
@ -1608,22 +1606,14 @@ $code.=<<___;
___ ___
} }
$code.=<<___; $code.=<<___;
.extern aes_nohw_cbc_encrypt
.globl bsaes_cbc_encrypt .globl bsaes_cbc_encrypt
.type bsaes_cbc_encrypt,\@abi-omnipotent .type bsaes_cbc_encrypt,\@abi-omnipotent
.align 16 .align 16
bsaes_cbc_encrypt: bsaes_cbc_encrypt:
.cfi_startproc .cfi_startproc
___ # In OpenSSL, this function had a fallback to aes_nohw_cbc_encrypt for
$code.=<<___ if ($win64); # short inputs or if enc is one. We patch this out, using bsaes for all
mov 48(%rsp),$arg6 # pull direction flag # input sizes. The caller is required to ensure enc is zero.
___
$code.=<<___;
cmp \$0,$arg6
jne aes_nohw_cbc_encrypt
cmp \$128,$arg3
jb aes_nohw_cbc_encrypt
mov %rsp, %rax mov %rsp, %rax
.Lcbc_dec_prologue: .Lcbc_dec_prologue:
push %rbp push %rbp
@ -1682,6 +1672,8 @@ $code.=<<___;
movdqu (%rbx), @XMM[15] # load IV movdqu (%rbx), @XMM[15] # load IV
sub \$8,$len sub \$8,$len
jc .Lcbc_dec_loop_done
.Lcbc_dec_loop: .Lcbc_dec_loop:
movdqu 0x00($inp), @XMM[0] # load input movdqu 0x00($inp), @XMM[0] # load input
movdqu 0x10($inp), @XMM[1] movdqu 0x10($inp), @XMM[1]
@ -1726,6 +1718,7 @@ $code.=<<___;
sub \$8,$len sub \$8,$len
jnc .Lcbc_dec_loop jnc .Lcbc_dec_loop
.Lcbc_dec_loop_done:
add \$8,$len add \$8,$len
jz .Lcbc_dec_done jz .Lcbc_dec_done
@ -1858,13 +1851,12 @@ $code.=<<___;
jmp .Lcbc_dec_done jmp .Lcbc_dec_done
.align 16 .align 16
.Lcbc_dec_one: .Lcbc_dec_one:
lea ($inp), $arg1 movdqa @XMM[15], 0x20(%rbp) # put aside IV
lea 0x20(%rbp), $arg2 # buffer output call _bsaes_decrypt8
lea ($key), $arg3 pxor 0x20(%rbp), @XMM[0] # ^= IV
call aes_nohw_decrypt # doesn't touch %xmm movdqu 0x00($inp), @XMM[15] # IV
pxor 0x20(%rbp), @XMM[15] # ^= IV movdqu @XMM[0], 0x00($out) # write output
movdqu @XMM[15], ($out) # write output jmp .Lcbc_dec_done
movdqa @XMM[0], @XMM[15] # IV
.Lcbc_dec_done: .Lcbc_dec_done:
movdqu @XMM[15], (%rbx) # return IV movdqu @XMM[15], (%rbx) # return IV

View File

@ -133,7 +133,7 @@ void aes_hw_ecb_encrypt(const uint8_t *in, uint8_t *out, size_t length,
#if defined(BSAES) #if defined(BSAES)
// On platforms where BSAES gets defined (just above), then these functions are // On platforms where BSAES gets defined (just above), then these functions are
// provided by asm. // provided by asm. Note |bsaes_cbc_encrypt| requires |enc| to be zero.
void bsaes_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length, void bsaes_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key, uint8_t ivec[16], int enc); const AES_KEY *key, uint8_t ivec[16], int enc);
void bsaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len, void bsaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,