Remove STRICT_ALIGNMENT code from modes.

STRICT_ALIGNMENT is a remnant of OpenSSL code would cast pointers to
size_t* and load more than one byte at a time. Not all architectures
support unaligned access, so it did an alignment check and only enterred
this path if aligned or the underlying architecture didn't care.

This is UB. Unaligned casts in C are undefined on all architectures, so
we switch these to memcpy some time ago. Compilers can optimize memcpy
to the unaligned accesses we wanted. That left our modes logic as:

- If STRICT_ALIGNMENT is 1 and things are unaligned, work byte-by-byte.

- Otherwise, use the memcpy-based word-by-word code, which now works
  independent of STRICT_ALIGNMENT.

Remove the first check to simplify things. On x86, x86_64, and aarch64,
STRICT_ALIGNMENT is zero and this is a no-op. ARM is more complex. Per
[0], ARMv7 and up support unaligned access. ARMv5 do not. ARMv6 does,
but can run in a mode where it looks more like ARMv5.

For ARMv7 and up, STRICT_ALIGNMENT should have been zero, but was one.
Thus this change should be an improvement for ARMv7 (right now unaligned
inputs lose bsaes-armv7). The Android NDK does not even support the
pre-ARMv7 ABI anymore[1]. Nonetheless, Cronet still supports ARMv6 as a
library. It builds with -march=armv6 which GCC interprets as supporting
unaligned access, so it too did not want this code.

For completeness, should anyone still care about ARMv5 or be building
with an overly permissive -march flag, GCC does appear unable to inline
the memcpy calls. However, GCC also does not interpret
(uintptr_t)ptr % sizeof(size_t) as an alignment assertion, so such
consumers have already been paying for the memcpy here and throughout
the library.

In general, C's arcane pointer rules mean we must resort to memcpy
often, so, realistically, we must require that the compiler optimize
memcpy well.

[0] https://medium.com/@iLevex/the-curious-case-of-unaligned-access-on-arm-5dd0ebe24965
[1] https://developer.android.com/ndk/guides/abis#armeabi

Change-Id: I3c7dea562adaeb663032e395499e69530dd8e145
Reviewed-on: https://boringssl-review.googlesource.com/c/34873
Reviewed-by: Adam Langley <agl@google.com>
This commit is contained in:
David Benjamin 2019-02-09 22:14:30 +00:00 committed by Adam Langley
parent d8598ce03f
commit 104306f587
5 changed files with 33 additions and 175 deletions

View File

@ -49,6 +49,8 @@
#include <assert.h>
#include <string.h>
#include <openssl/type_check.h>
#include "internal.h"
@ -61,30 +63,15 @@ void CRYPTO_cbc128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
assert(key != NULL && ivec != NULL);
assert(len == 0 || (in != NULL && out != NULL));
if (STRICT_ALIGNMENT &&
((uintptr_t)in | (uintptr_t)out | (uintptr_t)ivec) % sizeof(size_t) !=
0) {
while (len >= 16) {
for (n = 0; n < 16; ++n) {
out[n] = in[n] ^ iv[n];
}
(*block)(out, out, key);
iv = out;
len -= 16;
in += 16;
out += 16;
}
} else {
while (len >= 16) {
for (n = 0; n < 16; n += sizeof(size_t)) {
store_word_le(out + n, load_word_le(in + n) ^ load_word_le(iv + n));
}
(*block)(out, out, key);
iv = out;
len -= 16;
in += 16;
out += 16;
while (len >= 16) {
for (n = 0; n < 16; n += sizeof(size_t)) {
store_word_le(out + n, load_word_le(in + n) ^ load_word_le(iv + n));
}
(*block)(out, out, key);
iv = out;
len -= 16;
in += 16;
out += 16;
}
while (len) {
@ -127,66 +114,35 @@ void CRYPTO_cbc128_decrypt(const uint8_t *in, uint8_t *out, size_t len,
if ((inptr >= 32 && outptr <= inptr - 32) || inptr < outptr) {
// If |out| is at least two blocks behind |in| or completely disjoint, there
// is no need to decrypt to a temporary block.
OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
"block cannot be evenly divided into words");
const uint8_t *iv = ivec;
if (STRICT_ALIGNMENT &&
((uintptr_t)in | (uintptr_t)out | (uintptr_t)ivec) % sizeof(size_t) !=
0) {
while (len >= 16) {
(*block)(in, out, key);
for (n = 0; n < 16; ++n) {
out[n] ^= iv[n];
}
iv = in;
len -= 16;
in += 16;
out += 16;
}
} else if (16 % sizeof(size_t) == 0) { // always true
while (len >= 16) {
(*block)(in, out, key);
for (n = 0; n < 16; n += sizeof(size_t)) {
store_word_le(out + n, load_word_le(out + n) ^ load_word_le(iv + n));
}
iv = in;
len -= 16;
in += 16;
out += 16;
while (len >= 16) {
(*block)(in, out, key);
for (n = 0; n < 16; n += sizeof(size_t)) {
store_word_le(out + n, load_word_le(out + n) ^ load_word_le(iv + n));
}
iv = in;
len -= 16;
in += 16;
out += 16;
}
OPENSSL_memcpy(ivec, iv, 16);
} else {
// |out| is less than two blocks behind |in|. Decrypting an input block
// directly to |out| would overwrite a ciphertext block before it is used as
// the next block's IV. Decrypt to a temporary block instead.
if (STRICT_ALIGNMENT &&
((uintptr_t)in | (uintptr_t)out | (uintptr_t)ivec) % sizeof(size_t) !=
0) {
uint8_t c;
while (len >= 16) {
(*block)(in, tmp.c, key);
for (n = 0; n < 16; ++n) {
c = in[n];
out[n] = tmp.c[n] ^ ivec[n];
ivec[n] = c;
}
len -= 16;
in += 16;
out += 16;
}
} else if (16 % sizeof(size_t) == 0) { // always true
while (len >= 16) {
(*block)(in, tmp.c, key);
for (n = 0; n < 16; n += sizeof(size_t)) {
size_t c = load_word_le(in + n);
store_word_le(out + n,
tmp.t[n / sizeof(size_t)] ^ load_word_le(ivec + n));
store_word_le(ivec + n, c);
}
len -= 16;
in += 16;
out += 16;
OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
"block cannot be evenly divided into words");
while (len >= 16) {
(*block)(in, tmp.c, key);
for (n = 0; n < 16; n += sizeof(size_t)) {
size_t c = load_word_le(in + n);
store_word_le(out + n,
tmp.t[n / sizeof(size_t)] ^ load_word_le(ivec + n));
store_word_le(ivec + n, c);
}
len -= 16;
in += 16;
out += 16;
}
}

View File

@ -60,8 +60,6 @@ OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
void CRYPTO_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16], unsigned *num,
int enc, block128_f block) {
size_t l = 0;
assert(in && out && key && ivec && num);
unsigned n = *num;
@ -72,21 +70,6 @@ void CRYPTO_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
--len;
n = (n + 1) % 16;
}
#if STRICT_ALIGNMENT
if (((uintptr_t)in | (uintptr_t)out | (uintptr_t)ivec) % sizeof(size_t) !=
0) {
while (l < len) {
if (n == 0) {
(*block)(ivec, ivec, key);
}
out[l] = ivec[n] ^= in[l];
++l;
n = (n + 1) % 16;
}
*num = n;
return;
}
#endif
while (len >= 16) {
(*block)(ivec, ivec, key);
for (; n < 16; n += sizeof(size_t)) {
@ -116,22 +99,6 @@ void CRYPTO_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
--len;
n = (n + 1) % 16;
}
if (STRICT_ALIGNMENT &&
((uintptr_t)in | (uintptr_t)out | (uintptr_t)ivec) % sizeof(size_t) !=
0) {
while (l < len) {
uint8_t c;
if (n == 0) {
(*block)(ivec, ivec, key);
}
out[l] = ivec[n] ^ (c = in[l]);
ivec[n] = c;
++l;
n = (n + 1) % 16;
}
*num = n;
return;
}
while (len >= 16) {
(*block)(ivec, ivec, key);
for (; n < 16; n += sizeof(size_t)) {

View File

@ -99,26 +99,6 @@ void CRYPTO_ctr128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
--len;
n = (n + 1) % 16;
}
#if STRICT_ALIGNMENT
if (((uintptr_t)in | (uintptr_t)out |
(uintptr_t)ecount_buf) % sizeof(size_t) != 0) {
size_t l = 0;
while (l < len) {
if (n == 0) {
(*block)(ivec, ecount_buf, key);
ctr128_inc(ivec);
}
out[l] = in[l] ^ ecount_buf[n];
++l;
n = (n + 1) % 16;
}
*num = n;
return;
}
#endif
while (len >= 16) {
(*block)(ivec, ecount_buf, key);
ctr128_inc(ivec);

View File

@ -514,24 +514,6 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
}
uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
if (STRICT_ALIGNMENT &&
((uintptr_t)in | (uintptr_t)out) % sizeof(size_t) != 0) {
for (size_t i = 0; i < len; ++i) {
if (n == 0) {
(*block)(ctx->Yi.c, ctx->EKi.c, key);
++ctr;
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
}
ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
n = (n + 1) % 16;
if (n == 0) {
GCM_MUL(ctx, Xi);
}
}
ctx->mres = n;
return 1;
}
while (len >= GHASH_CHUNK) {
size_t j = GHASH_CHUNK;
@ -622,27 +604,6 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
}
uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
if (STRICT_ALIGNMENT &&
((uintptr_t)in | (uintptr_t)out) % sizeof(size_t) != 0) {
for (size_t i = 0; i < len; ++i) {
uint8_t c;
if (n == 0) {
(*block)(ctx->Yi.c, ctx->EKi.c, key);
++ctr;
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
}
c = in[i];
out[i] = c ^ ctx->EKi.c[n];
ctx->Xi.c[n] ^= c;
n = (n + 1) % 16;
if (n == 0) {
GCM_MUL(ctx, Xi);
}
}
ctx->mres = n;
return 1;
}
while (len >= GHASH_CHUNK) {
size_t j = GHASH_CHUNK;

View File

@ -64,12 +64,6 @@ extern "C" {
#endif
#define STRICT_ALIGNMENT 1
#if defined(OPENSSL_X86_64) || defined(OPENSSL_X86) || defined(OPENSSL_AARCH64)
#undef STRICT_ALIGNMENT
#define STRICT_ALIGNMENT 0
#endif
static inline uint32_t GETU32(const void *in) {
uint32_t v;
OPENSSL_memcpy(&v, in, sizeof(v));