Optimized Seal/Open routines for ChaCha20-Poly1305 for x86-64
This is basically the same implementation I wrote for Go The Go implementation: https://github.com/golang/crypto/blob/master/chacha20poly1305/chacha20poly1305_amd64.s The Cloudflare patch for OpenSSL: https://github.com/cloudflare/sslconfig/blob/master/patches/openssl__chacha20_poly1305_draft_and_rfc_ossl102j.patch The Seal/Open is only available for the new version, the old one uses the bundled Poly1305, and the existing ChaCha20 implementations The benefits of this code, compared to the optimized code currently disabled in BoringSSL: * Passes test vectors * Faster performance: The AVX2 code (on Haswell), is 55% faster for 16B, 15% for 1350 and 6% for 8192 byte buffers * Even faster on pre-AVX2 CPUs Feel free to put whatever license, etc. is appropriate, under the existing CLA. Benchmarks are for 16/1350/8192 chunk sizes and given in MB/s: Before (Ivy Bridge): 34.2 589.5 739.4 After: 68.4 692.1 799.4 Before (Skylake): 50 1233 1649 After: 119.4 1736 2196 After (Andy's): 63.6 1608 2261 Change-Id: I9186f721812655011fc17698b67ddbe8a1c7203b Reviewed-on: https://boringssl-review.googlesource.com/13142 Commit-Queue: Adam Langley <agl@google.com> Reviewed-by: Adam Langley <agl@google.com>
This commit is contained in:
parent
358baeb9a4
commit
8d56558031
@ -1,5 +1,13 @@
|
||||
include_directories(../../include)
|
||||
|
||||
if (${ARCH} STREQUAL "x86_64")
|
||||
set(
|
||||
CIPHER_ARCH_SOURCES
|
||||
|
||||
chacha20_poly1305_x86_64.${ASM_EXT}
|
||||
)
|
||||
endif()
|
||||
|
||||
add_library(
|
||||
cipher
|
||||
|
||||
@ -19,6 +27,8 @@ add_library(
|
||||
tls_cbc.c
|
||||
e_tls.c
|
||||
e_ssl3.c
|
||||
|
||||
${CIPHER_ARCH_SOURCES}
|
||||
)
|
||||
|
||||
add_executable(
|
||||
@ -35,6 +45,8 @@ add_executable(
|
||||
$<TARGET_OBJECTS:test_support>
|
||||
)
|
||||
|
||||
perlasm(chacha20_poly1305_x86_64.${ASM_EXT} asm/chacha20_poly1305_x86_64.pl)
|
||||
|
||||
target_link_libraries(cipher_test crypto)
|
||||
target_link_libraries(aead_test crypto)
|
||||
add_dependencies(all_tests cipher_test aead_test)
|
||||
|
2371
crypto/cipher/asm/chacha20_poly1305_x86_64.pl
Normal file
2371
crypto/cipher/asm/chacha20_poly1305_x86_64.pl
Normal file
File diff suppressed because it is too large
Load Diff
@ -33,6 +33,42 @@ struct aead_chacha20_poly1305_ctx {
|
||||
unsigned char tag_len;
|
||||
};
|
||||
|
||||
#if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM) && \
|
||||
!defined(OPENSSL_WINDOWS)
|
||||
static const int kHaveAsm = 1;
|
||||
// chacha20_poly1305_open is defined in chacha20_poly1305_x86_64.pl. It
|
||||
// decrypts |plaintext_len| bytes from |ciphertext| and writes them to
|
||||
// |out_plaintext|. On entry, |aead_data| must contain the final 48 bytes of
|
||||
// the initial ChaCha20 block, i.e. the key, followed by four zeros, followed
|
||||
// by the nonce. On exit, it will contain the calculated tag value, which the
|
||||
// caller must check.
|
||||
void chacha20_poly1305_open(uint8_t *out_plaintext, const uint8_t *ciphertext,
|
||||
size_t plaintext_len, const uint8_t *ad,
|
||||
size_t ad_len, uint8_t *aead_data);
|
||||
|
||||
// chacha20_poly1305_open is defined in chacha20_poly1305_x86_64.pl. It
|
||||
// encrypts |plaintext_len| bytes from |plaintext| and writes them to
|
||||
// |out_ciphertext|. On entry, |aead_data| must contain the final 48 bytes of
|
||||
// the initial ChaCha20 block, i.e. the key, followed by four zeros, followed
|
||||
// by the nonce. On exit, it will contain the calculated tag value, which the
|
||||
// caller must append to the ciphertext.
|
||||
void chacha20_poly1305_seal(uint8_t *out_ciphertext, const uint8_t *plaintext,
|
||||
size_t plaintext_len, const uint8_t *ad,
|
||||
size_t ad_len, uint8_t *aead_data);
|
||||
#else
|
||||
static const int kHaveAsm = 0;
|
||||
|
||||
static void chacha20_poly1305_open(uint8_t *out_plaintext,
|
||||
const uint8_t *ciphertext,
|
||||
size_t plaintext_len, const uint8_t *ad,
|
||||
size_t ad_len, uint8_t *aead_data) {}
|
||||
|
||||
static void chacha20_poly1305_seal(uint8_t *out_ciphertext,
|
||||
const uint8_t *plaintext,
|
||||
size_t plaintext_len, const uint8_t *ad,
|
||||
size_t ad_len, uint8_t *aead_data) {}
|
||||
#endif
|
||||
|
||||
static int aead_chacha20_poly1305_init(EVP_AEAD_CTX *ctx, const uint8_t *key,
|
||||
size_t key_len, size_t tag_len) {
|
||||
struct aead_chacha20_poly1305_ctx *c20_ctx;
|
||||
@ -143,10 +179,17 @@ static int aead_chacha20_poly1305_seal(const EVP_AEAD_CTX *ctx, uint8_t *out,
|
||||
return 0;
|
||||
}
|
||||
|
||||
CRYPTO_chacha_20(out, in, in_len, c20_ctx->key, nonce, 1);
|
||||
alignas(16) uint8_t tag[48];
|
||||
|
||||
alignas(16) uint8_t tag[POLY1305_TAG_LEN];
|
||||
calc_tag(tag, c20_ctx, nonce, ad, ad_len, out, in_len);
|
||||
if (kHaveAsm) {
|
||||
OPENSSL_memcpy(tag, c20_ctx->key, 32);
|
||||
OPENSSL_memset(tag + 32, 0, 4);
|
||||
OPENSSL_memcpy(tag + 32 + 4, nonce, 12);
|
||||
chacha20_poly1305_seal(out, in, in_len, ad, ad_len, tag);
|
||||
} else {
|
||||
CRYPTO_chacha_20(out, in, in_len, c20_ctx->key, nonce, 1);
|
||||
calc_tag(tag, c20_ctx, nonce, ad, ad_len, out, in_len);
|
||||
}
|
||||
|
||||
OPENSSL_memcpy(out + in_len, tag, c20_ctx->tag_len);
|
||||
*out_len = in_len + c20_ctx->tag_len;
|
||||
@ -184,14 +227,23 @@ static int aead_chacha20_poly1305_open(const EVP_AEAD_CTX *ctx, uint8_t *out,
|
||||
}
|
||||
|
||||
plaintext_len = in_len - c20_ctx->tag_len;
|
||||
alignas(16) uint8_t tag[POLY1305_TAG_LEN];
|
||||
calc_tag(tag, c20_ctx, nonce, ad, ad_len, in, plaintext_len);
|
||||
alignas(16) uint8_t tag[48];
|
||||
|
||||
if (kHaveAsm) {
|
||||
OPENSSL_memcpy(tag, c20_ctx->key, 32);
|
||||
OPENSSL_memset(tag + 32, 0, 4);
|
||||
OPENSSL_memcpy(tag + 32 + 4, nonce, 12);
|
||||
chacha20_poly1305_open(out, in, plaintext_len, ad, ad_len, tag);
|
||||
} else {
|
||||
calc_tag(tag, c20_ctx, nonce, ad, ad_len, in, plaintext_len);
|
||||
CRYPTO_chacha_20(out, in, plaintext_len, c20_ctx->key, nonce, 1);
|
||||
}
|
||||
|
||||
if (CRYPTO_memcmp(tag, in + plaintext_len, c20_ctx->tag_len) != 0) {
|
||||
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_DECRYPT);
|
||||
return 0;
|
||||
}
|
||||
|
||||
CRYPTO_chacha_20(out, in, plaintext_len, c20_ctx->key, nonce, 1);
|
||||
*out_len = plaintext_len;
|
||||
return 1;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user