Change-Id: If28096e677104c6109e31e31a636fee82ef4ba11 Reviewed-on: https://boringssl-review.googlesource.com/c/34266 Commit-Queue: David Benjamin <davidben@google.com> Reviewed-by: Adam Langley <agl@google.com>kris/onging/CECPQ3_patch15
@@ -518,6 +518,7 @@ $code.=<<___; | |||
.type gcm_init_clmul,\@abi-omnipotent | |||
.align 16 | |||
gcm_init_clmul: | |||
.cfi_startproc | |||
.L_init_clmul: | |||
___ | |||
$code.=<<___ if ($win64); | |||
@@ -587,6 +588,7 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
ret | |||
.cfi_endproc | |||
.size gcm_init_clmul,.-gcm_init_clmul | |||
___ | |||
} | |||
@@ -598,6 +600,7 @@ $code.=<<___; | |||
.type gcm_gmult_clmul,\@abi-omnipotent | |||
.align 16 | |||
gcm_gmult_clmul: | |||
.cfi_startproc | |||
.L_gmult_clmul: | |||
movdqu ($Xip),$Xi | |||
movdqa .Lbswap_mask(%rip),$T3 | |||
@@ -634,6 +637,7 @@ $code.=<<___; | |||
pshufb $T3,$Xi | |||
movdqu $Xi,($Xip) | |||
ret | |||
.cfi_endproc | |||
.size gcm_gmult_clmul,.-gcm_gmult_clmul | |||
___ | |||
} | |||
@@ -647,6 +651,7 @@ $code.=<<___; | |||
.type gcm_ghash_clmul,\@abi-omnipotent | |||
.align 32 | |||
gcm_ghash_clmul: | |||
.cfi_startproc | |||
.L_ghash_clmul: | |||
___ | |||
$code.=<<___ if ($win64); | |||
@@ -995,6 +1000,7 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
ret | |||
.cfi_endproc | |||
.size gcm_ghash_clmul,.-gcm_ghash_clmul | |||
___ | |||
} | |||
@@ -1004,6 +1010,7 @@ $code.=<<___; | |||
.type gcm_init_avx,\@abi-omnipotent | |||
.align 32 | |||
gcm_init_avx: | |||
.cfi_startproc | |||
___ | |||
if ($avx) { | |||
my ($Htbl,$Xip)=@_4args; | |||
@@ -1132,6 +1139,7 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
ret | |||
.cfi_endproc | |||
.size gcm_init_avx,.-gcm_init_avx | |||
___ | |||
} else { | |||
@@ -1146,7 +1154,9 @@ $code.=<<___; | |||
.type gcm_gmult_avx,\@abi-omnipotent | |||
.align 32 | |||
gcm_gmult_avx: | |||
.cfi_startproc | |||
jmp .L_gmult_clmul | |||
.cfi_endproc | |||
.size gcm_gmult_avx,.-gcm_gmult_avx | |||
___ | |||
@@ -1155,6 +1165,7 @@ $code.=<<___; | |||
.type gcm_ghash_avx,\@abi-omnipotent | |||
.align 32 | |||
gcm_ghash_avx: | |||
.cfi_startproc | |||
___ | |||
if ($avx) { | |||
my ($Xip,$Htbl,$inp,$len)=@_4args; | |||
@@ -1567,6 +1578,7 @@ $code.=<<___ if ($win64); | |||
___ | |||
$code.=<<___; | |||
ret | |||
.cfi_endproc | |||
.size gcm_ghash_avx,.-gcm_ghash_avx | |||
___ | |||
} else { | |||
@@ -57,12 +57,6 @@ | |||
#include "internal.h" | |||
#include "../../internal.h" | |||
#if !defined(OPENSSL_NO_ASM) && \ | |||
(defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \ | |||
defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \ | |||
defined(OPENSSL_PPC64LE)) | |||
#define GHASH_ASM | |||
#endif | |||
#define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16)) | |||
#define REDUCE1BIT(V) \ | |||
@@ -82,7 +76,7 @@ | |||
// bits of a |size_t|. | |||
static const size_t kSizeTWithoutLower4Bits = (size_t) -16; | |||
static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) { | |||
void gcm_init_4bit(u128 Htable[16], const uint64_t H[2]) { | |||
u128 V; | |||
Htable[0].hi = 0; | |||
@@ -127,7 +121,7 @@ static const size_t rem_4bit[16] = { | |||
PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), | |||
PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)}; | |||
static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) { | |||
void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) { | |||
u128 Z; | |||
int cnt = 15; | |||
size_t rem, nlo, nhi; | |||
@@ -182,8 +176,8 @@ static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) { | |||
// performance improvement, at least not on x86[_64]. It's here | |||
// mostly as reference and a placeholder for possible future | |||
// non-trivial optimization[s]... | |||
static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], | |||
const uint8_t *inp, size_t len) { | |||
void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, | |||
size_t len) { | |||
u128 Z; | |||
int cnt; | |||
size_t rem, nlo, nhi; | |||
@@ -237,11 +231,7 @@ static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], | |||
Xi[1] = CRYPTO_bswap8(Z.lo); | |||
} while (inp += 16, len -= 16); | |||
} | |||
#else // GHASH_ASM | |||
void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]); | |||
void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, | |||
size_t len); | |||
#endif | |||
#endif // !GHASH_ASM || AARCH64 || PPC64LE | |||
#define GCM_MUL(ctx, Xi) gcm_gmult_4bit((ctx)->Xi.u, (ctx)->gcm_key.Htable) | |||
#if defined(GHASH_ASM) | |||
@@ -251,90 +241,7 @@ void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, | |||
// trashing effect. In other words idea is to hash data while it's | |||
// still in L1 cache after encryption pass... | |||
#define GHASH_CHUNK (3 * 1024) | |||
#endif | |||
#if defined(GHASH_ASM) | |||
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) | |||
#define GCM_FUNCREF_4BIT | |||
void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]); | |||
void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]); | |||
void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, | |||
size_t len); | |||
#if defined(OPENSSL_X86_64) | |||
#define GHASH_ASM_X86_64 | |||
void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]); | |||
void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]); | |||
void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in, | |||
size_t len); | |||
#define AESNI_GCM | |||
size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len, | |||
const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi); | |||
size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len, | |||
const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi); | |||
#endif | |||
#if defined(OPENSSL_X86) | |||
#define GHASH_ASM_X86 | |||
void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]); | |||
void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, | |||
size_t len); | |||
#endif | |||
#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) | |||
#include <openssl/arm_arch.h> | |||
#if __ARM_ARCH__ >= 7 | |||
#define GHASH_ASM_ARM | |||
#define GCM_FUNCREF_4BIT | |||
static int pmull_capable(void) { | |||
return CRYPTO_is_ARMv8_PMULL_capable(); | |||
} | |||
void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]); | |||
void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]); | |||
void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, | |||
size_t len); | |||
#if defined(OPENSSL_ARM) | |||
// 32-bit ARM also has support for doing GCM with NEON instructions. | |||
static int neon_capable(void) { | |||
return CRYPTO_is_NEON_capable(); | |||
} | |||
void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]); | |||
void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]); | |||
void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, | |||
size_t len); | |||
#else | |||
// AArch64 only has the ARMv8 versions of functions. | |||
static int neon_capable(void) { | |||
return 0; | |||
} | |||
static void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]) { | |||
abort(); | |||
} | |||
static void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]) { | |||
abort(); | |||
} | |||
static void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], | |||
const uint8_t *inp, size_t len) { | |||
abort(); | |||
} | |||
#endif | |||
#endif | |||
#elif defined(OPENSSL_PPC64LE) | |||
#define GHASH_ASM_PPC64LE | |||
#define GCM_FUNCREF_4BIT | |||
void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]); | |||
void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]); | |||
void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, | |||
size_t len); | |||
#endif | |||
#endif | |||
#endif // GHASH_ASM | |||
#ifdef GCM_FUNCREF_4BIT | |||
#undef GCM_MUL | |||
@@ -344,12 +251,11 @@ void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, | |||
#define GHASH(ctx, in, len) \ | |||
(*gcm_ghash_p)((ctx)->Xi.u, (ctx)->gcm_key.Htable, in, len) | |||
#endif | |||
#endif | |||
#endif // GCM_FUNCREF_4BIT | |||
void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash, | |||
u128 *out_key, u128 out_table[16], | |||
int *out_is_avx, | |||
const uint8_t *gcm_key) { | |||
u128 *out_key, u128 out_table[16], int *out_is_avx, | |||
const uint8_t gcm_key[16]) { | |||
*out_is_avx = 0; | |||
union { | |||
@@ -387,14 +293,14 @@ void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash, | |||
return; | |||
} | |||
#elif defined(GHASH_ASM_ARM) | |||
if (pmull_capable()) { | |||
if (gcm_pmull_capable()) { | |||
gcm_init_v8(out_table, H.u); | |||
*out_mult = gcm_gmult_v8; | |||
*out_hash = gcm_ghash_v8; | |||
return; | |||
} | |||
if (neon_capable()) { | |||
if (gcm_neon_capable()) { | |||
gcm_init_neon(out_table, H.u); | |||
*out_mult = gcm_gmult_neon; | |||
*out_hash = gcm_ghash_neon; | |||
@@ -54,8 +54,10 @@ | |||
#include <gtest/gtest.h> | |||
#include <openssl/aes.h> | |||
#include <openssl/cpu.h> | |||
#include "internal.h" | |||
#include "../../test/abi_test.h" | |||
#include "../../test/file_test.h" | |||
#include "../../test/test_util.h" | |||
@@ -115,3 +117,43 @@ TEST(GCMTest, ByteSwap) { | |||
EXPECT_EQ(UINT64_C(0x0807060504030201), | |||
CRYPTO_bswap8(UINT64_C(0x0102030405060708))); | |||
} | |||
#if defined(GHASH_ASM_X86_64) && defined(SUPPORTS_ABI_TEST) | |||
TEST(GCMTest, ABI) { | |||
static const uint64_t kH[2] = { | |||
UINT64_C(0x66e94bd4ef8a2c3b), | |||
UINT64_C(0x884cfa59ca342b2e), | |||
}; | |||
static const size_t kBlockCounts[] = {1, 2, 3, 4, 7, 8, 15, 16, 31, 32}; | |||
uint8_t buf[16 * 32]; | |||
OPENSSL_memset(buf, 42, sizeof(buf)); | |||
uint64_t X[2] = { | |||
UINT64_C(0x0388dace60b6a392), | |||
UINT64_C(0xf328c2b971b2fe78), | |||
}; | |||
u128 Htable[16]; | |||
CHECK_ABI(gcm_init_4bit, Htable, kH); | |||
CHECK_ABI(gcm_gmult_4bit, X, Htable); | |||
for (size_t blocks : kBlockCounts) { | |||
CHECK_ABI(gcm_ghash_4bit, X, Htable, buf, 16 * blocks); | |||
} | |||
if (crypto_gcm_clmul_enabled()) { | |||
CHECK_ABI(gcm_init_clmul, Htable, kH); | |||
CHECK_ABI(gcm_gmult_clmul, X, Htable); | |||
for (size_t blocks : kBlockCounts) { | |||
CHECK_ABI(gcm_ghash_clmul, X, Htable, buf, 16 * blocks); | |||
} | |||
if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) { // AVX+MOVBE | |||
CHECK_ABI(gcm_init_avx, Htable, kH); | |||
CHECK_ABI(gcm_gmult_avx, X, Htable); | |||
for (size_t blocks : kBlockCounts) { | |||
CHECK_ABI(gcm_ghash_avx, X, Htable, buf, 16 * blocks); | |||
} | |||
} | |||
} | |||
} | |||
#endif // GHASH_ASM_X86_64 && SUPPORTS_ABI_TEST |
@@ -50,8 +50,11 @@ | |||
#define OPENSSL_HEADER_MODES_INTERNAL_H | |||
#include <openssl/base.h> | |||
#include <openssl/aes.h> | |||
#include <openssl/cpu.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
#include "../../internal.h" | |||
@@ -199,7 +202,7 @@ int crypto_gcm_clmul_enabled(void); | |||
// AVX implementation was used |*out_is_avx| will be true. | |||
void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash, | |||
u128 *out_key, u128 out_table[16], int *out_is_avx, | |||
const uint8_t *gcm_key); | |||
const uint8_t gcm_key[16]); | |||
// CRYPTO_gcm128_init_key initialises |gcm_key| to use |block| (typically AES) | |||
// with the given key. |block_is_hwaes| is one if |block| is |aes_hw_encrypt|. | |||
@@ -263,6 +266,99 @@ OPENSSL_EXPORT void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, uint8_t *tag, | |||
size_t len); | |||
// GCM assembly. | |||
#if !defined(OPENSSL_NO_ASM) && \ | |||
(defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \ | |||
defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \ | |||
defined(OPENSSL_PPC64LE)) | |||
#define GHASH_ASM | |||
#endif | |||
void gcm_init_4bit(u128 Htable[16], const uint64_t H[2]); | |||
void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]); | |||
void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, | |||
size_t len); | |||
#if defined(GHASH_ASM) | |||
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) | |||
#define GCM_FUNCREF_4BIT | |||
void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]); | |||
void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]); | |||
void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, | |||
size_t len); | |||
#if defined(OPENSSL_X86_64) | |||
#define GHASH_ASM_X86_64 | |||
void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]); | |||
void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]); | |||
void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in, | |||
size_t len); | |||
#define AESNI_GCM | |||
size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len, | |||
const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi); | |||
size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len, | |||
const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi); | |||
#endif // OPENSSL_X86_64 | |||
#if defined(OPENSSL_X86) | |||
#define GHASH_ASM_X86 | |||
void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]); | |||
void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, | |||
size_t len); | |||
#endif // OPENSSL_X86 | |||
#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) | |||
#include <openssl/arm_arch.h> | |||
#if __ARM_ARCH__ >= 7 | |||
#define GHASH_ASM_ARM | |||
#define GCM_FUNCREF_4BIT | |||
OPENSSL_INLINE int gcm_pmull_capable(void) { | |||
return CRYPTO_is_ARMv8_PMULL_capable(); | |||
} | |||
void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]); | |||
void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]); | |||
void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, | |||
size_t len); | |||
#if defined(OPENSSL_ARM) | |||
// 32-bit ARM also has support for doing GCM with NEON instructions. | |||
OPENSSL_INLINE int gcm_neon_capable(void) { return CRYPTO_is_NEON_capable(); } | |||
void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]); | |||
void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]); | |||
void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, | |||
size_t len); | |||
#else | |||
// AArch64 only has the ARMv8 versions of functions. | |||
OPENSSL_INLINE int gcm_neon_capable(void) { return 0; } | |||
OPENSSL_INLINE void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]) { | |||
abort(); | |||
} | |||
OPENSSL_INLINE void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]) { | |||
abort(); | |||
} | |||
OPENSSL_INLINE void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], | |||
const uint8_t *inp, size_t len) { | |||
abort(); | |||
} | |||
#endif // OPENSSL_ARM | |||
#endif // __ARM_ARCH__ >= 7 | |||
#elif defined(OPENSSL_PPC64LE) | |||
#define GHASH_ASM_PPC64LE | |||
#define GCM_FUNCREF_4BIT | |||
void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]); | |||
void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]); | |||
void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, | |||
size_t len); | |||
#endif | |||
#endif // GHASH_ASM | |||
// CCM. | |||
typedef struct ccm128_context { | |||