瀏覽代碼

Optimized Seal/Open routines for ChaCha20-Poly1305 for x86-64

This is basically the same implementation I wrote for Go

The Go implementation:
https://github.com/golang/crypto/blob/master/chacha20poly1305/chacha20poly1305_amd64.s
The Cloudflare patch for OpenSSL:
https://github.com/cloudflare/sslconfig/blob/master/patches/openssl__chacha20_poly1305_draft_and_rfc_ossl102j.patch

The Seal/Open is only available for the new version, the old one uses
the bundled Poly1305, and the existing ChaCha20 implementations

The benefits of this code, compared to the optimized code currently
disabled in BoringSSL:

* Passes test vectors
* Faster performance: The AVX2 code (on Haswell), is 55% faster for 16B,
  15% for 1350 and 6% for 8192 byte buffers
* Even faster on pre-AVX2 CPUs

Feel free to put whatever license, etc. is appropriate, under the
existing CLA.

Benchmarks are for 16/1350/8192 chunk sizes and given in MB/s:

Before (Ivy Bridge): 34.2   589.5  739.4
After:               68.4   692.1  799.4
Before (Skylake):    50    1233   1649
After:              119.4  1736   2196
After (Andy's):      63.6  1608   2261

Change-Id: I9186f721812655011fc17698b67ddbe8a1c7203b
Reviewed-on: https://boringssl-review.googlesource.com/13142
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <agl@google.com>
kris/onging/CECPQ3_patch15
vkrasnov 7 年之前
committed by Adam Langley
父節點
當前提交
8d56558031
共有 3 個文件被更改,包括 2441 次插入6 次删除
  1. +12
    -0
      crypto/cipher/CMakeLists.txt
  2. +2371
    -0
      crypto/cipher/asm/chacha20_poly1305_x86_64.pl
  3. +58
    -6
      crypto/cipher/e_chacha20poly1305.c

+ 12
- 0
crypto/cipher/CMakeLists.txt 查看文件

@@ -1,5 +1,13 @@
include_directories(../../include)

if (${ARCH} STREQUAL "x86_64")
set(
CIPHER_ARCH_SOURCES

chacha20_poly1305_x86_64.${ASM_EXT}
)
endif()

add_library(
cipher

@@ -19,6 +27,8 @@ add_library(
tls_cbc.c
e_tls.c
e_ssl3.c

${CIPHER_ARCH_SOURCES}
)

add_executable(
@@ -35,6 +45,8 @@ add_executable(
$<TARGET_OBJECTS:test_support>
)

perlasm(chacha20_poly1305_x86_64.${ASM_EXT} asm/chacha20_poly1305_x86_64.pl)

target_link_libraries(cipher_test crypto)
target_link_libraries(aead_test crypto)
add_dependencies(all_tests cipher_test aead_test)

+ 2371
- 0
crypto/cipher/asm/chacha20_poly1305_x86_64.pl
文件差異過大導致無法顯示
查看文件


+ 58
- 6
crypto/cipher/e_chacha20poly1305.c 查看文件

@@ -33,6 +33,42 @@ struct aead_chacha20_poly1305_ctx {
unsigned char tag_len;
};

#if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM) && \
!defined(OPENSSL_WINDOWS)
static const int kHaveAsm = 1;
// chacha20_poly1305_open is defined in chacha20_poly1305_x86_64.pl. It
// decrypts |plaintext_len| bytes from |ciphertext| and writes them to
// |out_plaintext|. On entry, |aead_data| must contain the final 48 bytes of
// the initial ChaCha20 block, i.e. the key, followed by four zeros, followed
// by the nonce. On exit, it will contain the calculated tag value, which the
// caller must check.
void chacha20_poly1305_open(uint8_t *out_plaintext, const uint8_t *ciphertext,
size_t plaintext_len, const uint8_t *ad,
size_t ad_len, uint8_t *aead_data);

// chacha20_poly1305_open is defined in chacha20_poly1305_x86_64.pl. It
// encrypts |plaintext_len| bytes from |plaintext| and writes them to
// |out_ciphertext|. On entry, |aead_data| must contain the final 48 bytes of
// the initial ChaCha20 block, i.e. the key, followed by four zeros, followed
// by the nonce. On exit, it will contain the calculated tag value, which the
// caller must append to the ciphertext.
void chacha20_poly1305_seal(uint8_t *out_ciphertext, const uint8_t *plaintext,
size_t plaintext_len, const uint8_t *ad,
size_t ad_len, uint8_t *aead_data);
#else
static const int kHaveAsm = 0;

static void chacha20_poly1305_open(uint8_t *out_plaintext,
const uint8_t *ciphertext,
size_t plaintext_len, const uint8_t *ad,
size_t ad_len, uint8_t *aead_data) {}

static void chacha20_poly1305_seal(uint8_t *out_ciphertext,
const uint8_t *plaintext,
size_t plaintext_len, const uint8_t *ad,
size_t ad_len, uint8_t *aead_data) {}
#endif

static int aead_chacha20_poly1305_init(EVP_AEAD_CTX *ctx, const uint8_t *key,
size_t key_len, size_t tag_len) {
struct aead_chacha20_poly1305_ctx *c20_ctx;
@@ -143,10 +179,17 @@ static int aead_chacha20_poly1305_seal(const EVP_AEAD_CTX *ctx, uint8_t *out,
return 0;
}

CRYPTO_chacha_20(out, in, in_len, c20_ctx->key, nonce, 1);
alignas(16) uint8_t tag[48];

alignas(16) uint8_t tag[POLY1305_TAG_LEN];
calc_tag(tag, c20_ctx, nonce, ad, ad_len, out, in_len);
if (kHaveAsm) {
OPENSSL_memcpy(tag, c20_ctx->key, 32);
OPENSSL_memset(tag + 32, 0, 4);
OPENSSL_memcpy(tag + 32 + 4, nonce, 12);
chacha20_poly1305_seal(out, in, in_len, ad, ad_len, tag);
} else {
CRYPTO_chacha_20(out, in, in_len, c20_ctx->key, nonce, 1);
calc_tag(tag, c20_ctx, nonce, ad, ad_len, out, in_len);
}

OPENSSL_memcpy(out + in_len, tag, c20_ctx->tag_len);
*out_len = in_len + c20_ctx->tag_len;
@@ -184,14 +227,23 @@ static int aead_chacha20_poly1305_open(const EVP_AEAD_CTX *ctx, uint8_t *out,
}

plaintext_len = in_len - c20_ctx->tag_len;
alignas(16) uint8_t tag[POLY1305_TAG_LEN];
calc_tag(tag, c20_ctx, nonce, ad, ad_len, in, plaintext_len);
alignas(16) uint8_t tag[48];

if (kHaveAsm) {
OPENSSL_memcpy(tag, c20_ctx->key, 32);
OPENSSL_memset(tag + 32, 0, 4);
OPENSSL_memcpy(tag + 32 + 4, nonce, 12);
chacha20_poly1305_open(out, in, plaintext_len, ad, ad_len, tag);
} else {
calc_tag(tag, c20_ctx, nonce, ad, ad_len, in, plaintext_len);
CRYPTO_chacha_20(out, in, plaintext_len, c20_ctx->key, nonce, 1);
}

if (CRYPTO_memcmp(tag, in + plaintext_len, c20_ctx->tag_len) != 0) {
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_DECRYPT);
return 0;
}

CRYPTO_chacha_20(out, in, plaintext_len, c20_ctx->key, nonce, 1);
*out_len = plaintext_len;
return 1;
}


Loading…
取消
儲存