boringssl/crypto/poly1305/poly1305_arm.c
Adam Langley d9e817309a Fix several warnings that arise in Android.
Android is now using Ninja so it doesn't spew so much to the terminal
and thus any warnings in BoringSSL (which builds really early in the
process) and much more obvious.

Thus this change fixes a few warnings that appear in the Android build.

Change-Id: Id255ace90fece772a1c3a718c877559ce920b960
Reviewed-on: https://boringssl-review.googlesource.com/6400
Reviewed-by: David Benjamin <davidben@chromium.org>
Reviewed-by: Adam Langley <agl@google.com>
2015-10-30 21:11:48 +00:00

302 lines
7.4 KiB
C

/* Copyright (c) 2014, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
/* This implementation was taken from the public domain, neon2 version in
* SUPERCOP by D. J. Bernstein and Peter Schwabe. */
#include <openssl/poly1305.h>
#if defined(OPENSSL_ARM) && !defined(OPENSSL_NO_ASM)
#include <string.h>
typedef struct {
uint32_t v[12]; /* for alignment; only using 10 */
} fe1305x2;
#define addmulmod openssl_poly1305_neon2_addmulmod
#define blocks openssl_poly1305_neon2_blocks
extern void addmulmod(fe1305x2 *r, const fe1305x2 *x, const fe1305x2 *y,
const fe1305x2 *c);
extern int blocks(fe1305x2 *h, const fe1305x2 *precomp, const uint8_t *in,
unsigned int inlen);
static void freeze(fe1305x2 *r) {
int i;
uint32_t x0 = r->v[0];
uint32_t x1 = r->v[2];
uint32_t x2 = r->v[4];
uint32_t x3 = r->v[6];
uint32_t x4 = r->v[8];
uint32_t y0;
uint32_t y1;
uint32_t y2;
uint32_t y3;
uint32_t y4;
uint32_t swap;
for (i = 0; i < 3; ++i) {
x1 += x0 >> 26;
x0 &= 0x3ffffff;
x2 += x1 >> 26;
x1 &= 0x3ffffff;
x3 += x2 >> 26;
x2 &= 0x3ffffff;
x4 += x3 >> 26;
x3 &= 0x3ffffff;
x0 += 5 * (x4 >> 26);
x4 &= 0x3ffffff;
}
y0 = x0 + 5;
y1 = x1 + (y0 >> 26);
y0 &= 0x3ffffff;
y2 = x2 + (y1 >> 26);
y1 &= 0x3ffffff;
y3 = x3 + (y2 >> 26);
y2 &= 0x3ffffff;
y4 = x4 + (y3 >> 26);
y3 &= 0x3ffffff;
swap = -(y4 >> 26);
y4 &= 0x3ffffff;
y0 ^= x0;
y1 ^= x1;
y2 ^= x2;
y3 ^= x3;
y4 ^= x4;
y0 &= swap;
y1 &= swap;
y2 &= swap;
y3 &= swap;
y4 &= swap;
y0 ^= x0;
y1 ^= x1;
y2 ^= x2;
y3 ^= x3;
y4 ^= x4;
r->v[0] = y0;
r->v[2] = y1;
r->v[4] = y2;
r->v[6] = y3;
r->v[8] = y4;
}
static void fe1305x2_tobytearray(uint8_t *r, fe1305x2 *x) {
uint32_t x0 = x->v[0];
uint32_t x1 = x->v[2];
uint32_t x2 = x->v[4];
uint32_t x3 = x->v[6];
uint32_t x4 = x->v[8];
x1 += x0 >> 26;
x0 &= 0x3ffffff;
x2 += x1 >> 26;
x1 &= 0x3ffffff;
x3 += x2 >> 26;
x2 &= 0x3ffffff;
x4 += x3 >> 26;
x3 &= 0x3ffffff;
*(uint32_t *)r = x0 + (x1 << 26);
*(uint32_t *)(r + 4) = (x1 >> 6) + (x2 << 20);
*(uint32_t *)(r + 8) = (x2 >> 12) + (x3 << 14);
*(uint32_t *)(r + 12) = (x3 >> 18) + (x4 << 8);
}
/* load32 exists to avoid breaking strict aliasing rules in
* fe1305x2_frombytearray. */
static uint32_t load32(uint8_t *t) {
uint32_t tmp;
memcpy(&tmp, t, sizeof(tmp));
return tmp;
}
static void fe1305x2_frombytearray(fe1305x2 *r, const uint8_t *x,
unsigned long long xlen) {
unsigned i;
uint8_t t[17];
for (i = 0; (i < 16) && (i < xlen); i++) {
t[i] = x[i];
}
xlen -= i;
x += i;
t[i++] = 1;
for (; i < 17; i++) {
t[i] = 0;
}
r->v[0] = 0x3ffffff & load32(t);
r->v[2] = 0x3ffffff & (load32(t + 3) >> 2);
r->v[4] = 0x3ffffff & (load32(t + 6) >> 4);
r->v[6] = 0x3ffffff & (load32(t + 9) >> 6);
r->v[8] = load32(t + 13);
if (xlen) {
for (i = 0; (i < 16) && (i < xlen); i++) {
t[i] = x[i];
}
t[i++] = 1;
for (; i < 17; i++) {
t[i] = 0;
}
r->v[1] = 0x3ffffff & load32(t);
r->v[3] = 0x3ffffff & (load32(t + 3) >> 2);
r->v[5] = 0x3ffffff & (load32(t + 6) >> 4);
r->v[7] = 0x3ffffff & (load32(t + 9) >> 6);
r->v[9] = load32(t + 13);
} else {
r->v[1] = r->v[3] = r->v[5] = r->v[7] = r->v[9] = 0;
}
}
static const fe1305x2 zero __attribute__((aligned(16)));
struct poly1305_state_st {
uint8_t data[sizeof(fe1305x2[5]) + 128];
uint8_t buf[32];
unsigned int buf_used;
uint8_t key[16];
};
void CRYPTO_poly1305_init_neon(poly1305_state *state, const uint8_t key[32]) {
struct poly1305_state_st *st = (struct poly1305_state_st *)(state);
fe1305x2 *const r = (fe1305x2 *)(st->data + (15 & (-(int)st->data)));
fe1305x2 *const h = r + 1;
fe1305x2 *const c = h + 1;
fe1305x2 *const precomp = c + 1;
unsigned int j;
r->v[1] = r->v[0] = 0x3ffffff & *(uint32_t *)key;
r->v[3] = r->v[2] = 0x3ffff03 & ((*(uint32_t *)(key + 3)) >> 2);
r->v[5] = r->v[4] = 0x3ffc0ff & ((*(uint32_t *)(key + 6)) >> 4);
r->v[7] = r->v[6] = 0x3f03fff & ((*(uint32_t *)(key + 9)) >> 6);
r->v[9] = r->v[8] = 0x00fffff & ((*(uint32_t *)(key + 12)) >> 8);
for (j = 0; j < 10; j++) {
h->v[j] = 0; /* XXX: should fast-forward a bit */
}
addmulmod(precomp, r, r, &zero); /* precompute r^2 */
addmulmod(precomp + 1, precomp, precomp, &zero); /* precompute r^4 */
memcpy(st->key, key + 16, 16);
st->buf_used = 0;
}
void CRYPTO_poly1305_update_neon(poly1305_state *state, const uint8_t *in,
size_t in_len) {
struct poly1305_state_st *st = (struct poly1305_state_st *)(state);
fe1305x2 *const r = (fe1305x2 *)(st->data + (15 & (-(int)st->data)));
fe1305x2 *const h = r + 1;
fe1305x2 *const c = h + 1;
fe1305x2 *const precomp = c + 1;
unsigned int i;
if (st->buf_used) {
unsigned int todo = 32 - st->buf_used;
if (todo > in_len) {
todo = in_len;
}
for (i = 0; i < todo; i++) {
st->buf[st->buf_used + i] = in[i];
}
st->buf_used += todo;
in_len -= todo;
in += todo;
if (st->buf_used == sizeof(st->buf) && in_len) {
addmulmod(h, h, precomp, &zero);
fe1305x2_frombytearray(c, st->buf, sizeof(st->buf));
for (i = 0; i < 10; i++) {
h->v[i] += c->v[i];
}
st->buf_used = 0;
}
}
while (in_len > 32) {
unsigned int tlen = 1048576;
if (in_len < tlen) {
tlen = in_len;
}
tlen -= blocks(h, precomp, in, tlen);
in_len -= tlen;
in += tlen;
}
if (in_len) {
for (i = 0; i < in_len; i++) {
st->buf[i] = in[i];
}
st->buf_used = in_len;
}
}
void CRYPTO_poly1305_finish_neon(poly1305_state *state, uint8_t mac[16]) {
struct poly1305_state_st *st = (struct poly1305_state_st *)(state);
fe1305x2 *const r = (fe1305x2 *)(st->data + (15 & (-(int)st->data)));
fe1305x2 *const h = r + 1;
fe1305x2 *const c = h + 1;
fe1305x2 *const precomp = c + 1;
addmulmod(h, h, precomp, &zero);
if (st->buf_used > 16) {
fe1305x2_frombytearray(c, st->buf, st->buf_used);
precomp->v[1] = r->v[1];
precomp->v[3] = r->v[3];
precomp->v[5] = r->v[5];
precomp->v[7] = r->v[7];
precomp->v[9] = r->v[9];
addmulmod(h, h, precomp, c);
} else if (st->buf_used > 0) {
fe1305x2_frombytearray(c, st->buf, st->buf_used);
r->v[1] = 1;
r->v[3] = 0;
r->v[5] = 0;
r->v[7] = 0;
r->v[9] = 0;
addmulmod(h, h, r, c);
}
h->v[0] += h->v[1];
h->v[2] += h->v[3];
h->v[4] += h->v[5];
h->v[6] += h->v[7];
h->v[8] += h->v[9];
freeze(h);
fe1305x2_frombytearray(c, st->key, 16);
c->v[8] ^= (1 << 24);
h->v[0] += c->v[0];
h->v[2] += c->v[2];
h->v[4] += c->v[4];
h->v[6] += c->v[6];
h->v[8] += c->v[8];
fe1305x2_tobytearray(mac, h);
}
#endif /* OPENSSL_ARM && !OPENSSL_NO_ASM */