diff --git a/crypto/fipsmodule/modes/asm/ghash-x86_64.pl b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl index fc631c77..b2676985 100644 --- a/crypto/fipsmodule/modes/asm/ghash-x86_64.pl +++ b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl @@ -518,6 +518,7 @@ $code.=<<___; .type gcm_init_clmul,\@abi-omnipotent .align 16 gcm_init_clmul: +.cfi_startproc .L_init_clmul: ___ $code.=<<___ if ($win64); @@ -587,6 +588,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size gcm_init_clmul,.-gcm_init_clmul ___ } @@ -598,6 +600,7 @@ $code.=<<___; .type gcm_gmult_clmul,\@abi-omnipotent .align 16 gcm_gmult_clmul: +.cfi_startproc .L_gmult_clmul: movdqu ($Xip),$Xi movdqa .Lbswap_mask(%rip),$T3 @@ -634,6 +637,7 @@ $code.=<<___; pshufb $T3,$Xi movdqu $Xi,($Xip) ret +.cfi_endproc .size gcm_gmult_clmul,.-gcm_gmult_clmul ___ } @@ -647,6 +651,7 @@ $code.=<<___; .type gcm_ghash_clmul,\@abi-omnipotent .align 32 gcm_ghash_clmul: +.cfi_startproc .L_ghash_clmul: ___ $code.=<<___ if ($win64); @@ -995,6 +1000,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size gcm_ghash_clmul,.-gcm_ghash_clmul ___ } @@ -1004,6 +1010,7 @@ $code.=<<___; .type gcm_init_avx,\@abi-omnipotent .align 32 gcm_init_avx: +.cfi_startproc ___ if ($avx) { my ($Htbl,$Xip)=@_4args; @@ -1132,6 +1139,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size gcm_init_avx,.-gcm_init_avx ___ } else { @@ -1146,7 +1154,9 @@ $code.=<<___; .type gcm_gmult_avx,\@abi-omnipotent .align 32 gcm_gmult_avx: +.cfi_startproc jmp .L_gmult_clmul +.cfi_endproc .size gcm_gmult_avx,.-gcm_gmult_avx ___ @@ -1155,6 +1165,7 @@ $code.=<<___; .type gcm_ghash_avx,\@abi-omnipotent .align 32 gcm_ghash_avx: +.cfi_startproc ___ if ($avx) { my ($Xip,$Htbl,$inp,$len)=@_4args; @@ -1567,6 +1578,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; ret +.cfi_endproc .size gcm_ghash_avx,.-gcm_ghash_avx ___ } else { diff --git a/crypto/fipsmodule/modes/gcm.c b/crypto/fipsmodule/modes/gcm.c index 5e556df2..2a450cd5 100644 --- a/crypto/fipsmodule/modes/gcm.c +++ b/crypto/fipsmodule/modes/gcm.c @@ -57,12 +57,6 @@ #include "internal.h" #include "../../internal.h" -#if !defined(OPENSSL_NO_ASM) && \ - (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \ - defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \ - defined(OPENSSL_PPC64LE)) -#define GHASH_ASM -#endif #define PACK(s) ((size_t)(s) << (sizeof(size_t) * 8 - 16)) #define REDUCE1BIT(V) \ @@ -82,7 +76,7 @@ // bits of a |size_t|. static const size_t kSizeTWithoutLower4Bits = (size_t) -16; -static void gcm_init_4bit(u128 Htable[16], uint64_t H[2]) { +void gcm_init_4bit(u128 Htable[16], const uint64_t H[2]) { u128 V; Htable[0].hi = 0; @@ -127,7 +121,7 @@ static const size_t rem_4bit[16] = { PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)}; -static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) { +void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) { u128 Z; int cnt = 15; size_t rem, nlo, nhi; @@ -182,8 +176,8 @@ static void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]) { // performance improvement, at least not on x86[_64]. It's here // mostly as reference and a placeholder for possible future // non-trivial optimization[s]... -static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], - const uint8_t *inp, size_t len) { +void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, + size_t len) { u128 Z; int cnt; size_t rem, nlo, nhi; @@ -237,11 +231,7 @@ static void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], Xi[1] = CRYPTO_bswap8(Z.lo); } while (inp += 16, len -= 16); } -#else // GHASH_ASM -void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]); -void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, - size_t len); -#endif +#endif // !GHASH_ASM || AARCH64 || PPC64LE #define GCM_MUL(ctx, Xi) gcm_gmult_4bit((ctx)->Xi.u, (ctx)->gcm_key.Htable) #if defined(GHASH_ASM) @@ -251,90 +241,7 @@ void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, // trashing effect. In other words idea is to hash data while it's // still in L1 cache after encryption pass... #define GHASH_CHUNK (3 * 1024) -#endif - - -#if defined(GHASH_ASM) - -#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) -#define GCM_FUNCREF_4BIT -void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]); -void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]); -void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, - size_t len); - -#if defined(OPENSSL_X86_64) -#define GHASH_ASM_X86_64 -void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]); -void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]); -void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in, - size_t len); -#define AESNI_GCM -size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len, - const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi); -size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len, - const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi); -#endif - -#if defined(OPENSSL_X86) -#define GHASH_ASM_X86 -void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]); -void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, - size_t len); -#endif - -#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) -#include -#if __ARM_ARCH__ >= 7 -#define GHASH_ASM_ARM -#define GCM_FUNCREF_4BIT - -static int pmull_capable(void) { - return CRYPTO_is_ARMv8_PMULL_capable(); -} - -void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]); -void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]); -void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, - size_t len); - -#if defined(OPENSSL_ARM) -// 32-bit ARM also has support for doing GCM with NEON instructions. -static int neon_capable(void) { - return CRYPTO_is_NEON_capable(); -} - -void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]); -void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]); -void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, - size_t len); -#else -// AArch64 only has the ARMv8 versions of functions. -static int neon_capable(void) { - return 0; -} -static void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]) { - abort(); -} -static void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]) { - abort(); -} -static void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], - const uint8_t *inp, size_t len) { - abort(); -} -#endif - -#endif -#elif defined(OPENSSL_PPC64LE) -#define GHASH_ASM_PPC64LE -#define GCM_FUNCREF_4BIT -void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]); -void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]); -void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, - size_t len); -#endif -#endif +#endif // GHASH_ASM #ifdef GCM_FUNCREF_4BIT #undef GCM_MUL @@ -344,12 +251,11 @@ void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, #define GHASH(ctx, in, len) \ (*gcm_ghash_p)((ctx)->Xi.u, (ctx)->gcm_key.Htable, in, len) #endif -#endif +#endif // GCM_FUNCREF_4BIT void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash, - u128 *out_key, u128 out_table[16], - int *out_is_avx, - const uint8_t *gcm_key) { + u128 *out_key, u128 out_table[16], int *out_is_avx, + const uint8_t gcm_key[16]) { *out_is_avx = 0; union { @@ -387,14 +293,14 @@ void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash, return; } #elif defined(GHASH_ASM_ARM) - if (pmull_capable()) { + if (gcm_pmull_capable()) { gcm_init_v8(out_table, H.u); *out_mult = gcm_gmult_v8; *out_hash = gcm_ghash_v8; return; } - if (neon_capable()) { + if (gcm_neon_capable()) { gcm_init_neon(out_table, H.u); *out_mult = gcm_gmult_neon; *out_hash = gcm_ghash_neon; diff --git a/crypto/fipsmodule/modes/gcm_test.cc b/crypto/fipsmodule/modes/gcm_test.cc index 16d9d097..fb17bbbf 100644 --- a/crypto/fipsmodule/modes/gcm_test.cc +++ b/crypto/fipsmodule/modes/gcm_test.cc @@ -54,8 +54,10 @@ #include #include +#include #include "internal.h" +#include "../../test/abi_test.h" #include "../../test/file_test.h" #include "../../test/test_util.h" @@ -115,3 +117,43 @@ TEST(GCMTest, ByteSwap) { EXPECT_EQ(UINT64_C(0x0807060504030201), CRYPTO_bswap8(UINT64_C(0x0102030405060708))); } + +#if defined(GHASH_ASM_X86_64) && defined(SUPPORTS_ABI_TEST) +TEST(GCMTest, ABI) { + static const uint64_t kH[2] = { + UINT64_C(0x66e94bd4ef8a2c3b), + UINT64_C(0x884cfa59ca342b2e), + }; + static const size_t kBlockCounts[] = {1, 2, 3, 4, 7, 8, 15, 16, 31, 32}; + uint8_t buf[16 * 32]; + OPENSSL_memset(buf, 42, sizeof(buf)); + + uint64_t X[2] = { + UINT64_C(0x0388dace60b6a392), + UINT64_C(0xf328c2b971b2fe78), + }; + + u128 Htable[16]; + CHECK_ABI(gcm_init_4bit, Htable, kH); + CHECK_ABI(gcm_gmult_4bit, X, Htable); + for (size_t blocks : kBlockCounts) { + CHECK_ABI(gcm_ghash_4bit, X, Htable, buf, 16 * blocks); + } + + if (crypto_gcm_clmul_enabled()) { + CHECK_ABI(gcm_init_clmul, Htable, kH); + CHECK_ABI(gcm_gmult_clmul, X, Htable); + for (size_t blocks : kBlockCounts) { + CHECK_ABI(gcm_ghash_clmul, X, Htable, buf, 16 * blocks); + } + + if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) { // AVX+MOVBE + CHECK_ABI(gcm_init_avx, Htable, kH); + CHECK_ABI(gcm_gmult_avx, X, Htable); + for (size_t blocks : kBlockCounts) { + CHECK_ABI(gcm_ghash_avx, X, Htable, buf, 16 * blocks); + } + } + } +} +#endif // GHASH_ASM_X86_64 && SUPPORTS_ABI_TEST diff --git a/crypto/fipsmodule/modes/internal.h b/crypto/fipsmodule/modes/internal.h index 23aaca2c..3163c502 100644 --- a/crypto/fipsmodule/modes/internal.h +++ b/crypto/fipsmodule/modes/internal.h @@ -50,8 +50,11 @@ #define OPENSSL_HEADER_MODES_INTERNAL_H #include + #include +#include +#include #include #include "../../internal.h" @@ -199,7 +202,7 @@ int crypto_gcm_clmul_enabled(void); // AVX implementation was used |*out_is_avx| will be true. void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash, u128 *out_key, u128 out_table[16], int *out_is_avx, - const uint8_t *gcm_key); + const uint8_t gcm_key[16]); // CRYPTO_gcm128_init_key initialises |gcm_key| to use |block| (typically AES) // with the given key. |block_is_hwaes| is one if |block| is |aes_hw_encrypt|. @@ -263,6 +266,99 @@ OPENSSL_EXPORT void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, uint8_t *tag, size_t len); +// GCM assembly. + +#if !defined(OPENSSL_NO_ASM) && \ + (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \ + defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \ + defined(OPENSSL_PPC64LE)) +#define GHASH_ASM +#endif + +void gcm_init_4bit(u128 Htable[16], const uint64_t H[2]); +void gcm_gmult_4bit(uint64_t Xi[2], const u128 Htable[16]); +void gcm_ghash_4bit(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, + size_t len); + +#if defined(GHASH_ASM) + +#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) +#define GCM_FUNCREF_4BIT +void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]); +void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]); +void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, + size_t len); + +#if defined(OPENSSL_X86_64) +#define GHASH_ASM_X86_64 +void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]); +void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]); +void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in, + size_t len); +#define AESNI_GCM +size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len, + const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi); +size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len, + const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi); +#endif // OPENSSL_X86_64 + +#if defined(OPENSSL_X86) +#define GHASH_ASM_X86 +void gcm_gmult_4bit_mmx(uint64_t Xi[2], const u128 Htable[16]); +void gcm_ghash_4bit_mmx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, + size_t len); +#endif // OPENSSL_X86 + +#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) +#include +#if __ARM_ARCH__ >= 7 +#define GHASH_ASM_ARM +#define GCM_FUNCREF_4BIT + +OPENSSL_INLINE int gcm_pmull_capable(void) { + return CRYPTO_is_ARMv8_PMULL_capable(); +} + +void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]); +void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]); +void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, + size_t len); + +#if defined(OPENSSL_ARM) +// 32-bit ARM also has support for doing GCM with NEON instructions. +OPENSSL_INLINE int gcm_neon_capable(void) { return CRYPTO_is_NEON_capable(); } + +void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]); +void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]); +void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, + size_t len); +#else +// AArch64 only has the ARMv8 versions of functions. +OPENSSL_INLINE int gcm_neon_capable(void) { return 0; } +OPENSSL_INLINE void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]) { + abort(); +} +OPENSSL_INLINE void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]) { + abort(); +} +OPENSSL_INLINE void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], + const uint8_t *inp, size_t len) { + abort(); +} +#endif // OPENSSL_ARM + +#endif // __ARM_ARCH__ >= 7 +#elif defined(OPENSSL_PPC64LE) +#define GHASH_ASM_PPC64LE +#define GCM_FUNCREF_4BIT +void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]); +void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]); +void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, + size_t len); +#endif +#endif // GHASH_ASM + + // CCM. typedef struct ccm128_context {