104306f587
STRICT_ALIGNMENT is a remnant of OpenSSL code would cast pointers to size_t* and load more than one byte at a time. Not all architectures support unaligned access, so it did an alignment check and only enterred this path if aligned or the underlying architecture didn't care. This is UB. Unaligned casts in C are undefined on all architectures, so we switch these to memcpy some time ago. Compilers can optimize memcpy to the unaligned accesses we wanted. That left our modes logic as: - If STRICT_ALIGNMENT is 1 and things are unaligned, work byte-by-byte. - Otherwise, use the memcpy-based word-by-word code, which now works independent of STRICT_ALIGNMENT. Remove the first check to simplify things. On x86, x86_64, and aarch64, STRICT_ALIGNMENT is zero and this is a no-op. ARM is more complex. Per [0], ARMv7 and up support unaligned access. ARMv5 do not. ARMv6 does, but can run in a mode where it looks more like ARMv5. For ARMv7 and up, STRICT_ALIGNMENT should have been zero, but was one. Thus this change should be an improvement for ARMv7 (right now unaligned inputs lose bsaes-armv7). The Android NDK does not even support the pre-ARMv7 ABI anymore[1]. Nonetheless, Cronet still supports ARMv6 as a library. It builds with -march=armv6 which GCC interprets as supporting unaligned access, so it too did not want this code. For completeness, should anyone still care about ARMv5 or be building with an overly permissive -march flag, GCC does appear unable to inline the memcpy calls. However, GCC also does not interpret (uintptr_t)ptr % sizeof(size_t) as an alignment assertion, so such consumers have already been paying for the memcpy here and throughout the library. In general, C's arcane pointer rules mean we must resort to memcpy often, so, realistically, we must require that the compiler optimize memcpy well. [0] https://medium.com/@iLevex/the-curious-case-of-unaligned-access-on-arm-5dd0ebe24965 [1] https://developer.android.com/ndk/guides/abis#armeabi Change-Id: I3c7dea562adaeb663032e395499e69530dd8e145 Reviewed-on: https://boringssl-review.googlesource.com/c/34873 Reviewed-by: Adam Langley <agl@google.com>
201 lines
6.4 KiB
C
201 lines
6.4 KiB
C
/* ====================================================================
|
|
* Copyright (c) 2008 The OpenSSL Project. All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
*
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
*
|
|
* 3. All advertising materials mentioning features or use of this
|
|
* software must display the following acknowledgment:
|
|
* "This product includes software developed by the OpenSSL Project
|
|
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
|
|
*
|
|
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
|
|
* endorse or promote products derived from this software without
|
|
* prior written permission. For written permission, please contact
|
|
* openssl-core@openssl.org.
|
|
*
|
|
* 5. Products derived from this software may not be called "OpenSSL"
|
|
* nor may "OpenSSL" appear in their names without prior written
|
|
* permission of the OpenSSL Project.
|
|
*
|
|
* 6. Redistributions of any form whatsoever must retain the following
|
|
* acknowledgment:
|
|
* "This product includes software developed by the OpenSSL Project
|
|
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
|
|
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
|
|
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
* OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
* ==================================================================== */
|
|
|
|
#include <openssl/type_check.h>
|
|
|
|
#include <assert.h>
|
|
#include <string.h>
|
|
|
|
#include "internal.h"
|
|
|
|
|
|
// NOTE: the IV/counter CTR mode is big-endian. The code itself
|
|
// is endian-neutral.
|
|
|
|
// increment counter (128-bit int) by 1
|
|
static void ctr128_inc(uint8_t *counter) {
|
|
uint32_t n = 16, c = 1;
|
|
|
|
do {
|
|
--n;
|
|
c += counter[n];
|
|
counter[n] = (uint8_t) c;
|
|
c >>= 8;
|
|
} while (n);
|
|
}
|
|
|
|
OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
|
|
"block cannot be divided into size_t");
|
|
|
|
// The input encrypted as though 128bit counter mode is being used. The extra
|
|
// state information to record how much of the 128bit block we have used is
|
|
// contained in *num, and the encrypted counter is kept in ecount_buf. Both
|
|
// *num and ecount_buf must be initialised with zeros before the first call to
|
|
// CRYPTO_ctr128_encrypt().
|
|
//
|
|
// This algorithm assumes that the counter is in the x lower bits of the IV
|
|
// (ivec), and that the application has full control over overflow and the rest
|
|
// of the IV. This implementation takes NO responsibility for checking that
|
|
// the counter doesn't overflow into the rest of the IV when incremented.
|
|
void CRYPTO_ctr128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
|
|
const AES_KEY *key, uint8_t ivec[16],
|
|
uint8_t ecount_buf[16], unsigned int *num,
|
|
block128_f block) {
|
|
unsigned int n;
|
|
|
|
assert(key && ecount_buf && num);
|
|
assert(len == 0 || (in && out));
|
|
assert(*num < 16);
|
|
|
|
n = *num;
|
|
|
|
while (n && len) {
|
|
*(out++) = *(in++) ^ ecount_buf[n];
|
|
--len;
|
|
n = (n + 1) % 16;
|
|
}
|
|
while (len >= 16) {
|
|
(*block)(ivec, ecount_buf, key);
|
|
ctr128_inc(ivec);
|
|
for (n = 0; n < 16; n += sizeof(size_t)) {
|
|
store_word_le(out + n,
|
|
load_word_le(in + n) ^ load_word_le(ecount_buf + n));
|
|
}
|
|
len -= 16;
|
|
out += 16;
|
|
in += 16;
|
|
n = 0;
|
|
}
|
|
if (len) {
|
|
(*block)(ivec, ecount_buf, key);
|
|
ctr128_inc(ivec);
|
|
while (len--) {
|
|
out[n] = in[n] ^ ecount_buf[n];
|
|
++n;
|
|
}
|
|
}
|
|
*num = n;
|
|
}
|
|
|
|
// increment upper 96 bits of 128-bit counter by 1
|
|
static void ctr96_inc(uint8_t *counter) {
|
|
uint32_t n = 12, c = 1;
|
|
|
|
do {
|
|
--n;
|
|
c += counter[n];
|
|
counter[n] = (uint8_t) c;
|
|
c >>= 8;
|
|
} while (n);
|
|
}
|
|
|
|
void CRYPTO_ctr128_encrypt_ctr32(const uint8_t *in, uint8_t *out, size_t len,
|
|
const AES_KEY *key, uint8_t ivec[16],
|
|
uint8_t ecount_buf[16], unsigned int *num,
|
|
ctr128_f func) {
|
|
unsigned int n, ctr32;
|
|
|
|
assert(key && ecount_buf && num);
|
|
assert(len == 0 || (in && out));
|
|
assert(*num < 16);
|
|
|
|
n = *num;
|
|
|
|
while (n && len) {
|
|
*(out++) = *(in++) ^ ecount_buf[n];
|
|
--len;
|
|
n = (n + 1) % 16;
|
|
}
|
|
|
|
ctr32 = GETU32(ivec + 12);
|
|
while (len >= 16) {
|
|
size_t blocks = len / 16;
|
|
// 1<<28 is just a not-so-small yet not-so-large number...
|
|
// Below condition is practically never met, but it has to
|
|
// be checked for code correctness.
|
|
if (sizeof(size_t) > sizeof(unsigned int) && blocks > (1U << 28)) {
|
|
blocks = (1U << 28);
|
|
}
|
|
// As (*func) operates on 32-bit counter, caller
|
|
// has to handle overflow. 'if' below detects the
|
|
// overflow, which is then handled by limiting the
|
|
// amount of blocks to the exact overflow point...
|
|
ctr32 += (uint32_t)blocks;
|
|
if (ctr32 < blocks) {
|
|
blocks -= ctr32;
|
|
ctr32 = 0;
|
|
}
|
|
(*func)(in, out, blocks, key, ivec);
|
|
// (*func) does not update ivec, caller does:
|
|
PUTU32(ivec + 12, ctr32);
|
|
// ... overflow was detected, propogate carry.
|
|
if (ctr32 == 0) {
|
|
ctr96_inc(ivec);
|
|
}
|
|
blocks *= 16;
|
|
len -= blocks;
|
|
out += blocks;
|
|
in += blocks;
|
|
}
|
|
if (len) {
|
|
OPENSSL_memset(ecount_buf, 0, 16);
|
|
(*func)(ecount_buf, ecount_buf, 1, key, ivec);
|
|
++ctr32;
|
|
PUTU32(ivec + 12, ctr32);
|
|
if (ctr32 == 0) {
|
|
ctr96_inc(ivec);
|
|
}
|
|
while (len--) {
|
|
out[n] = in[n] ^ ecount_buf[n];
|
|
++n;
|
|
}
|
|
}
|
|
|
|
*num = n;
|
|
}
|