Remove STRICT_ALIGNMENT code from modes.

STRICT_ALIGNMENT is a remnant of OpenSSL code would cast pointers to size_t* and load more than one byte at a time. Not all architectures support unaligned access, so it did an alignment check and only enterred this path if aligned or the underlying architecture didn't care. This is UB. Unaligned casts in C are undefined on all architectures, so we switch these to memcpy some time ago. Compilers can optimize memcpy to the unaligned accesses we wanted. That left our modes logic as: - If STRICT_ALIGNMENT is 1 and things are unaligned, work byte-by-byte. - Otherwise, use the memcpy-based word-by-word code, which now works independent of STRICT_ALIGNMENT. Remove the first check to simplify things. On x86, x86_64, and aarch64, STRICT_ALIGNMENT is zero and this is a no-op. ARM is more complex. Per [0], ARMv7 and up support unaligned access. ARMv5 do not. ARMv6 does, but can run in a mode where it looks more like ARMv5. For ARMv7 and up, STRICT_ALIGNMENT should have been zero, but was one. Thus this change should be an improvement for ARMv7 (right now unaligned inputs lose bsaes-armv7). The Android NDK does not even support the pre-ARMv7 ABI anymore[1]. Nonetheless, Cronet still supports ARMv6 as a library. It builds with -march=armv6 which GCC interprets as supporting unaligned access, so it too did not want this code. For completeness, should anyone still care about ARMv5 or be building with an overly permissive -march flag, GCC does appear unable to inline the memcpy calls. However, GCC also does not interpret (uintptr_t)ptr % sizeof(size_t) as an alignment assertion, so such consumers have already been paying for the memcpy here and throughout the library. In general, C's arcane pointer rules mean we must resort to memcpy often, so, realistically, we must require that the compiler optimize memcpy well. [0] https://medium.com/@iLevex/the-curious-case-of-unaligned-access-on-arm-5dd0ebe24965 [1] https://developer.android.com/ndk/guides/abis#armeabi Change-Id: I3c7dea562adaeb663032e395499e69530dd8e145 Reviewed-on: https://boringssl-review.googlesource.com/c/34873 Reviewed-by: Adam Langley <agl@google.com>
2019-02-09 22:14:30 +00:00 · 2019-02-09 22:14:30 +00:00 · 104306f587
commit 104306f587
parent d8598ce03f
5 changed files with 33 additions and 175 deletions
--- a/crypto/fipsmodule/modes/cbc.c
+++ b/crypto/fipsmodule/modes/cbc.c
@ -49,6 +49,8 @@
 #include <assert.h>
 #include <string.h>

+#include <openssl/type_check.h>
+
 #include "internal.h"


@ -61,20 +63,6 @@ void CRYPTO_cbc128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
  assert(key != NULL && ivec != NULL);
  assert(len == 0 || (in != NULL && out != NULL));

-  if (STRICT_ALIGNMENT &&
-      ((uintptr_t)in | (uintptr_t)out | (uintptr_t)ivec) % sizeof(size_t) !=
-          0) {
-    while (len >= 16) {
-      for (n = 0; n < 16; ++n) {
-        out[n] = in[n] ^ iv[n];
-      }
-      (*block)(out, out, key);
-      iv = out;
-      len -= 16;
-      in += 16;
-      out += 16;
-    }
-  } else {
  while (len >= 16) {
    for (n = 0; n < 16; n += sizeof(size_t)) {
      store_word_le(out + n, load_word_le(in + n) ^ load_word_le(iv + n));
@ -85,7 +73,6 @@ void CRYPTO_cbc128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
    in += 16;
    out += 16;
  }
-  }

  while (len) {
    for (n = 0; n < 16 && n < len; ++n) {
@ -127,22 +114,9 @@ void CRYPTO_cbc128_decrypt(const uint8_t *in, uint8_t *out, size_t len,
  if ((inptr >= 32 && outptr <= inptr - 32) || inptr < outptr) {
    // If |out| is at least two blocks behind |in| or completely disjoint, there
    // is no need to decrypt to a temporary block.
+    OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
+                          "block cannot be evenly divided into words");
    const uint8_t *iv = ivec;
-
-    if (STRICT_ALIGNMENT &&
-        ((uintptr_t)in | (uintptr_t)out | (uintptr_t)ivec) % sizeof(size_t) !=
-            0) {
-      while (len >= 16) {
-        (*block)(in, out, key);
-        for (n = 0; n < 16; ++n) {
-          out[n] ^= iv[n];
-        }
-        iv = in;
-        len -= 16;
-        in += 16;
-        out += 16;
-      }
-    } else if (16 % sizeof(size_t) == 0) {  // always true
    while (len >= 16) {
      (*block)(in, out, key);
      for (n = 0; n < 16; n += sizeof(size_t)) {
@ -153,28 +127,11 @@ void CRYPTO_cbc128_decrypt(const uint8_t *in, uint8_t *out, size_t len,
      in += 16;
      out += 16;
    }
-    }
    OPENSSL_memcpy(ivec, iv, 16);
  } else {
-    // |out| is less than two blocks behind |in|. Decrypting an input block
-    // directly to |out| would overwrite a ciphertext block before it is used as
-    // the next block's IV. Decrypt to a temporary block instead.
-    if (STRICT_ALIGNMENT &&
-        ((uintptr_t)in | (uintptr_t)out | (uintptr_t)ivec) % sizeof(size_t) !=
-            0) {
-      uint8_t c;
-      while (len >= 16) {
-        (*block)(in, tmp.c, key);
-        for (n = 0; n < 16; ++n) {
-          c = in[n];
-          out[n] = tmp.c[n] ^ ivec[n];
-          ivec[n] = c;
-        }
-        len -= 16;
-        in += 16;
-        out += 16;
-      }
-    } else if (16 % sizeof(size_t) == 0) {  // always true
+    OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
+                          "block cannot be evenly divided into words");
+
    while (len >= 16) {
      (*block)(in, tmp.c, key);
      for (n = 0; n < 16; n += sizeof(size_t)) {
@ -188,7 +145,6 @@ void CRYPTO_cbc128_decrypt(const uint8_t *in, uint8_t *out, size_t len,
      out += 16;
    }
  }
-  }

  while (len) {
    uint8_t c;
--- a/crypto/fipsmodule/modes/cfb.c
+++ b/crypto/fipsmodule/modes/cfb.c
@ -60,8 +60,6 @@ OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
 void CRYPTO_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
                           const AES_KEY *key, uint8_t ivec[16], unsigned *num,
                           int enc, block128_f block) {
-  size_t l = 0;
-
  assert(in && out && key && ivec && num);

  unsigned n = *num;
@ -72,21 +70,6 @@ void CRYPTO_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
      --len;
      n = (n + 1) % 16;
    }
-#if STRICT_ALIGNMENT
-    if (((uintptr_t)in | (uintptr_t)out | (uintptr_t)ivec) % sizeof(size_t) !=
-        0) {
-      while (l < len) {
-        if (n == 0) {
-          (*block)(ivec, ivec, key);
-        }
-        out[l] = ivec[n] ^= in[l];
-        ++l;
-        n = (n + 1) % 16;
-      }
-      *num = n;
-      return;
-    }
-#endif
    while (len >= 16) {
      (*block)(ivec, ivec, key);
      for (; n < 16; n += sizeof(size_t)) {
@ -116,22 +99,6 @@ void CRYPTO_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
      --len;
      n = (n + 1) % 16;
    }
-    if (STRICT_ALIGNMENT &&
-        ((uintptr_t)in | (uintptr_t)out | (uintptr_t)ivec) % sizeof(size_t) !=
-            0) {
-      while (l < len) {
-        uint8_t c;
-        if (n == 0) {
-          (*block)(ivec, ivec, key);
-        }
-        out[l] = ivec[n] ^ (c = in[l]);
-        ivec[n] = c;
-        ++l;
-        n = (n + 1) % 16;
-      }
-      *num = n;
-      return;
-    }
    while (len >= 16) {
      (*block)(ivec, ivec, key);
      for (; n < 16; n += sizeof(size_t)) {
--- a/crypto/fipsmodule/modes/ctr.c
+++ b/crypto/fipsmodule/modes/ctr.c
@ -99,26 +99,6 @@ void CRYPTO_ctr128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
    --len;
    n = (n + 1) % 16;
  }
-
-#if STRICT_ALIGNMENT
-  if (((uintptr_t)in | (uintptr_t)out |
-        (uintptr_t)ecount_buf) % sizeof(size_t) != 0) {
-    size_t l = 0;
-    while (l < len) {
-      if (n == 0) {
-        (*block)(ivec, ecount_buf, key);
-        ctr128_inc(ivec);
-      }
-      out[l] = in[l] ^ ecount_buf[n];
-      ++l;
-      n = (n + 1) % 16;
-    }
-
-    *num = n;
-    return;
-  }
-#endif
-
  while (len >= 16) {
    (*block)(ivec, ecount_buf, key);
    ctr128_inc(ivec);
--- a/crypto/fipsmodule/modes/gcm.c
+++ b/crypto/fipsmodule/modes/gcm.c
@ -514,24 +514,6 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
  }

  uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
-  if (STRICT_ALIGNMENT &&
-      ((uintptr_t)in | (uintptr_t)out) % sizeof(size_t) != 0) {
-    for (size_t i = 0; i < len; ++i) {
-      if (n == 0) {
-        (*block)(ctx->Yi.c, ctx->EKi.c, key);
-        ++ctr;
-        ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
-      }
-      ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
-      n = (n + 1) % 16;
-      if (n == 0) {
-        GCM_MUL(ctx, Xi);
-      }
-    }
-
-    ctx->mres = n;
-    return 1;
-  }
  while (len >= GHASH_CHUNK) {
    size_t j = GHASH_CHUNK;

@ -622,27 +604,6 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
  }

  uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
-  if (STRICT_ALIGNMENT &&
-      ((uintptr_t)in | (uintptr_t)out) % sizeof(size_t) != 0) {
-    for (size_t i = 0; i < len; ++i) {
-      uint8_t c;
-      if (n == 0) {
-        (*block)(ctx->Yi.c, ctx->EKi.c, key);
-        ++ctr;
-        ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
-      }
-      c = in[i];
-      out[i] = c ^ ctx->EKi.c[n];
-      ctx->Xi.c[n] ^= c;
-      n = (n + 1) % 16;
-      if (n == 0) {
-        GCM_MUL(ctx, Xi);
-      }
-    }
-
-    ctx->mres = n;
-    return 1;
-  }
  while (len >= GHASH_CHUNK) {
    size_t j = GHASH_CHUNK;

--- a/crypto/fipsmodule/modes/internal.h
+++ b/crypto/fipsmodule/modes/internal.h
@ -64,12 +64,6 @@ extern "C" {
 #endif


-#define STRICT_ALIGNMENT 1
-#if defined(OPENSSL_X86_64) || defined(OPENSSL_X86) || defined(OPENSSL_AARCH64)
-#undef STRICT_ALIGNMENT
-#define STRICT_ALIGNMENT 0
-#endif
-
 static inline uint32_t GETU32(const void *in) {
  uint32_t v;
  OPENSSL_memcpy(&v, in, sizeof(v));