From 104306f587751f34852838915fb61ce5551c2332 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Sat, 9 Feb 2019 22:14:30 +0000 Subject: [PATCH] Remove STRICT_ALIGNMENT code from modes. STRICT_ALIGNMENT is a remnant of OpenSSL code would cast pointers to size_t* and load more than one byte at a time. Not all architectures support unaligned access, so it did an alignment check and only enterred this path if aligned or the underlying architecture didn't care. This is UB. Unaligned casts in C are undefined on all architectures, so we switch these to memcpy some time ago. Compilers can optimize memcpy to the unaligned accesses we wanted. That left our modes logic as: - If STRICT_ALIGNMENT is 1 and things are unaligned, work byte-by-byte. - Otherwise, use the memcpy-based word-by-word code, which now works independent of STRICT_ALIGNMENT. Remove the first check to simplify things. On x86, x86_64, and aarch64, STRICT_ALIGNMENT is zero and this is a no-op. ARM is more complex. Per [0], ARMv7 and up support unaligned access. ARMv5 do not. ARMv6 does, but can run in a mode where it looks more like ARMv5. For ARMv7 and up, STRICT_ALIGNMENT should have been zero, but was one. Thus this change should be an improvement for ARMv7 (right now unaligned inputs lose bsaes-armv7). The Android NDK does not even support the pre-ARMv7 ABI anymore[1]. Nonetheless, Cronet still supports ARMv6 as a library. It builds with -march=armv6 which GCC interprets as supporting unaligned access, so it too did not want this code. For completeness, should anyone still care about ARMv5 or be building with an overly permissive -march flag, GCC does appear unable to inline the memcpy calls. However, GCC also does not interpret (uintptr_t)ptr % sizeof(size_t) as an alignment assertion, so such consumers have already been paying for the memcpy here and throughout the library. In general, C's arcane pointer rules mean we must resort to memcpy often, so, realistically, we must require that the compiler optimize memcpy well. [0] https://medium.com/@iLevex/the-curious-case-of-unaligned-access-on-arm-5dd0ebe24965 [1] https://developer.android.com/ndk/guides/abis#armeabi Change-Id: I3c7dea562adaeb663032e395499e69530dd8e145 Reviewed-on: https://boringssl-review.googlesource.com/c/34873 Reviewed-by: Adam Langley --- crypto/fipsmodule/modes/cbc.c | 110 +++++++++-------------------- crypto/fipsmodule/modes/cfb.c | 33 --------- crypto/fipsmodule/modes/ctr.c | 20 ------ crypto/fipsmodule/modes/gcm.c | 39 ---------- crypto/fipsmodule/modes/internal.h | 6 -- 5 files changed, 33 insertions(+), 175 deletions(-) diff --git a/crypto/fipsmodule/modes/cbc.c b/crypto/fipsmodule/modes/cbc.c index 64ea5056..3f1d7776 100644 --- a/crypto/fipsmodule/modes/cbc.c +++ b/crypto/fipsmodule/modes/cbc.c @@ -49,6 +49,8 @@ #include #include +#include + #include "internal.h" @@ -61,30 +63,15 @@ void CRYPTO_cbc128_encrypt(const uint8_t *in, uint8_t *out, size_t len, assert(key != NULL && ivec != NULL); assert(len == 0 || (in != NULL && out != NULL)); - if (STRICT_ALIGNMENT && - ((uintptr_t)in | (uintptr_t)out | (uintptr_t)ivec) % sizeof(size_t) != - 0) { - while (len >= 16) { - for (n = 0; n < 16; ++n) { - out[n] = in[n] ^ iv[n]; - } - (*block)(out, out, key); - iv = out; - len -= 16; - in += 16; - out += 16; - } - } else { - while (len >= 16) { - for (n = 0; n < 16; n += sizeof(size_t)) { - store_word_le(out + n, load_word_le(in + n) ^ load_word_le(iv + n)); - } - (*block)(out, out, key); - iv = out; - len -= 16; - in += 16; - out += 16; + while (len >= 16) { + for (n = 0; n < 16; n += sizeof(size_t)) { + store_word_le(out + n, load_word_le(in + n) ^ load_word_le(iv + n)); } + (*block)(out, out, key); + iv = out; + len -= 16; + in += 16; + out += 16; } while (len) { @@ -127,66 +114,35 @@ void CRYPTO_cbc128_decrypt(const uint8_t *in, uint8_t *out, size_t len, if ((inptr >= 32 && outptr <= inptr - 32) || inptr < outptr) { // If |out| is at least two blocks behind |in| or completely disjoint, there // is no need to decrypt to a temporary block. + OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0, + "block cannot be evenly divided into words"); const uint8_t *iv = ivec; - - if (STRICT_ALIGNMENT && - ((uintptr_t)in | (uintptr_t)out | (uintptr_t)ivec) % sizeof(size_t) != - 0) { - while (len >= 16) { - (*block)(in, out, key); - for (n = 0; n < 16; ++n) { - out[n] ^= iv[n]; - } - iv = in; - len -= 16; - in += 16; - out += 16; - } - } else if (16 % sizeof(size_t) == 0) { // always true - while (len >= 16) { - (*block)(in, out, key); - for (n = 0; n < 16; n += sizeof(size_t)) { - store_word_le(out + n, load_word_le(out + n) ^ load_word_le(iv + n)); - } - iv = in; - len -= 16; - in += 16; - out += 16; + while (len >= 16) { + (*block)(in, out, key); + for (n = 0; n < 16; n += sizeof(size_t)) { + store_word_le(out + n, load_word_le(out + n) ^ load_word_le(iv + n)); } + iv = in; + len -= 16; + in += 16; + out += 16; } OPENSSL_memcpy(ivec, iv, 16); } else { - // |out| is less than two blocks behind |in|. Decrypting an input block - // directly to |out| would overwrite a ciphertext block before it is used as - // the next block's IV. Decrypt to a temporary block instead. - if (STRICT_ALIGNMENT && - ((uintptr_t)in | (uintptr_t)out | (uintptr_t)ivec) % sizeof(size_t) != - 0) { - uint8_t c; - while (len >= 16) { - (*block)(in, tmp.c, key); - for (n = 0; n < 16; ++n) { - c = in[n]; - out[n] = tmp.c[n] ^ ivec[n]; - ivec[n] = c; - } - len -= 16; - in += 16; - out += 16; - } - } else if (16 % sizeof(size_t) == 0) { // always true - while (len >= 16) { - (*block)(in, tmp.c, key); - for (n = 0; n < 16; n += sizeof(size_t)) { - size_t c = load_word_le(in + n); - store_word_le(out + n, - tmp.t[n / sizeof(size_t)] ^ load_word_le(ivec + n)); - store_word_le(ivec + n, c); - } - len -= 16; - in += 16; - out += 16; + OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0, + "block cannot be evenly divided into words"); + + while (len >= 16) { + (*block)(in, tmp.c, key); + for (n = 0; n < 16; n += sizeof(size_t)) { + size_t c = load_word_le(in + n); + store_word_le(out + n, + tmp.t[n / sizeof(size_t)] ^ load_word_le(ivec + n)); + store_word_le(ivec + n, c); } + len -= 16; + in += 16; + out += 16; } } diff --git a/crypto/fipsmodule/modes/cfb.c b/crypto/fipsmodule/modes/cfb.c index 0a81f3b2..8ca90041 100644 --- a/crypto/fipsmodule/modes/cfb.c +++ b/crypto/fipsmodule/modes/cfb.c @@ -60,8 +60,6 @@ OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0, void CRYPTO_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t len, const AES_KEY *key, uint8_t ivec[16], unsigned *num, int enc, block128_f block) { - size_t l = 0; - assert(in && out && key && ivec && num); unsigned n = *num; @@ -72,21 +70,6 @@ void CRYPTO_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t len, --len; n = (n + 1) % 16; } -#if STRICT_ALIGNMENT - if (((uintptr_t)in | (uintptr_t)out | (uintptr_t)ivec) % sizeof(size_t) != - 0) { - while (l < len) { - if (n == 0) { - (*block)(ivec, ivec, key); - } - out[l] = ivec[n] ^= in[l]; - ++l; - n = (n + 1) % 16; - } - *num = n; - return; - } -#endif while (len >= 16) { (*block)(ivec, ivec, key); for (; n < 16; n += sizeof(size_t)) { @@ -116,22 +99,6 @@ void CRYPTO_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t len, --len; n = (n + 1) % 16; } - if (STRICT_ALIGNMENT && - ((uintptr_t)in | (uintptr_t)out | (uintptr_t)ivec) % sizeof(size_t) != - 0) { - while (l < len) { - uint8_t c; - if (n == 0) { - (*block)(ivec, ivec, key); - } - out[l] = ivec[n] ^ (c = in[l]); - ivec[n] = c; - ++l; - n = (n + 1) % 16; - } - *num = n; - return; - } while (len >= 16) { (*block)(ivec, ivec, key); for (; n < 16; n += sizeof(size_t)) { diff --git a/crypto/fipsmodule/modes/ctr.c b/crypto/fipsmodule/modes/ctr.c index b806b9a3..8b0e0594 100644 --- a/crypto/fipsmodule/modes/ctr.c +++ b/crypto/fipsmodule/modes/ctr.c @@ -99,26 +99,6 @@ void CRYPTO_ctr128_encrypt(const uint8_t *in, uint8_t *out, size_t len, --len; n = (n + 1) % 16; } - -#if STRICT_ALIGNMENT - if (((uintptr_t)in | (uintptr_t)out | - (uintptr_t)ecount_buf) % sizeof(size_t) != 0) { - size_t l = 0; - while (l < len) { - if (n == 0) { - (*block)(ivec, ecount_buf, key); - ctr128_inc(ivec); - } - out[l] = in[l] ^ ecount_buf[n]; - ++l; - n = (n + 1) % 16; - } - - *num = n; - return; - } -#endif - while (len >= 16) { (*block)(ivec, ecount_buf, key); ctr128_inc(ivec); diff --git a/crypto/fipsmodule/modes/gcm.c b/crypto/fipsmodule/modes/gcm.c index 97fde3e4..f92f6750 100644 --- a/crypto/fipsmodule/modes/gcm.c +++ b/crypto/fipsmodule/modes/gcm.c @@ -514,24 +514,6 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const AES_KEY *key, } uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]); - if (STRICT_ALIGNMENT && - ((uintptr_t)in | (uintptr_t)out) % sizeof(size_t) != 0) { - for (size_t i = 0; i < len; ++i) { - if (n == 0) { - (*block)(ctx->Yi.c, ctx->EKi.c, key); - ++ctr; - ctx->Yi.d[3] = CRYPTO_bswap4(ctr); - } - ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n]; - n = (n + 1) % 16; - if (n == 0) { - GCM_MUL(ctx, Xi); - } - } - - ctx->mres = n; - return 1; - } while (len >= GHASH_CHUNK) { size_t j = GHASH_CHUNK; @@ -622,27 +604,6 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const AES_KEY *key, } uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]); - if (STRICT_ALIGNMENT && - ((uintptr_t)in | (uintptr_t)out) % sizeof(size_t) != 0) { - for (size_t i = 0; i < len; ++i) { - uint8_t c; - if (n == 0) { - (*block)(ctx->Yi.c, ctx->EKi.c, key); - ++ctr; - ctx->Yi.d[3] = CRYPTO_bswap4(ctr); - } - c = in[i]; - out[i] = c ^ ctx->EKi.c[n]; - ctx->Xi.c[n] ^= c; - n = (n + 1) % 16; - if (n == 0) { - GCM_MUL(ctx, Xi); - } - } - - ctx->mres = n; - return 1; - } while (len >= GHASH_CHUNK) { size_t j = GHASH_CHUNK; diff --git a/crypto/fipsmodule/modes/internal.h b/crypto/fipsmodule/modes/internal.h index 18ec60ca..ed1160b8 100644 --- a/crypto/fipsmodule/modes/internal.h +++ b/crypto/fipsmodule/modes/internal.h @@ -64,12 +64,6 @@ extern "C" { #endif -#define STRICT_ALIGNMENT 1 -#if defined(OPENSSL_X86_64) || defined(OPENSSL_X86) || defined(OPENSSL_AARCH64) -#undef STRICT_ALIGNMENT -#define STRICT_ALIGNMENT 0 -#endif - static inline uint32_t GETU32(const void *in) { uint32_t v; OPENSSL_memcpy(&v, in, sizeof(v));