diff --git a/Makefile b/Makefile index 13ba925..471e7ac 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ OBJS = sha3.o main.o DIST = tiny_sha3 CC = gcc -CFLAGS = -Wall -O3 +CFLAGS = -Wall -O3 -DBIT_INTERLEAVING LIBS = LDFLAGS = INCLUDES = @@ -18,7 +18,7 @@ $(BINARY): $(OBJS) $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ clean: - rm -rf $(DIST)-*.txz $(OBJS) $(BINARY) *~ + rm -rf $(DIST)-*.txz $(OBJS) $(BINARY) *~ dist: clean cd ..; \ diff --git a/sha3.c b/sha3.c index 931ae02..f5b66cf 100644 --- a/sha3.c +++ b/sha3.c @@ -1,12 +1,42 @@ // sha3.c // 19-Nov-11 Markku-Juhani O. Saarinen +// 22-May-25 Kris Kwiatkowski // Revised 07-Aug-15 to match with official release of FIPS PUB 202 "SHA3" // Revised 03-Sep-15 for portability + OpenSSL - style API +// Revised 22-May-25 Added bit-interleaved implementation optimized for 32-bit architectures. #include "sha3.h" -// update the state with given number of rounds +// Interleave even and odd bits into one 64-bit line +uint64_t unshuffle(uint32_t even, uint32_t odd) { + uint64_t result = 0; + for (int i = 0; i < 32; i++) { + result |= ((uint64_t)(even >> i) & 1) << (2 * i); + result |= ((uint64_t)(odd >> i) & 1) << (2 * i + 1); + } + return result; +} + +/* Get 32 bits from 'x' located on even possitions. + * Example: Assuming x={1,0,1,0,1,0} and index of first + * bit start from 0. This function returns x={0,0,0}. */ +uint32_t shuffle_even(uint64_t x) { + x &= 0x5555555555555555ULL; + x = (x | (x >> 1)) & 0x3333333333333333ULL; + x = (x | (x >> 2)) & 0x0F0F0F0F0F0F0F0FULL; + x = (x | (x >> 4)) & 0x00FF00FF00FF00FFULL; + x = (x | (x >> 8)) & 0x0000FFFF0000FFFFULL; + x = (x | (x >> 16)) & 0x00000000FFFFFFFFULL; + return (uint32_t)x; +} + +/* Get 32 bits from 'x' located on even possitions. + * Example: Assuming x={1,0,1,0,1,0} and index of first + * bit start from 0. This function returns x={1,1,1}. */ +uint32_t shuffle_odd(uint64_t x) { + return shuffle_even(x >> 1); +} void sha3_keccakf(uint64_t st[25]) { @@ -98,6 +128,141 @@ void sha3_keccakf(uint64_t st[25]) #endif } +void sha3_keccakf_bi(uint64_t st[25]) +{ + // constants + const uint64_t keccakf_rndc[24] = { + 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, + 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, + 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, + 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, + 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, + 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, + 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, + 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 + }; + const int keccakf_rotc[24] = { + 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 + }; + const int keccakf_piln[24] = { + 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 + }; + + // variables + int i, j, r; + uint32_t t1, t2; + uint32_t even[25], odd[25]; + uint32_t bc_even[5], bc_odd[5]; + +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ + uint8_t *v; + + // endianess conversion. this is redundant on little-endian targets + for (i = 0; i < 25; i++) { + v = (uint8_t *) &st[i]; + st[i] = ((uint64_t) v[0]) | (((uint64_t) v[1]) << 8) | + (((uint64_t) v[2]) << 16) | (((uint64_t) v[3]) << 24) | + (((uint64_t) v[4]) << 32) | (((uint64_t) v[5]) << 40) | + (((uint64_t) v[6]) << 48) | (((uint64_t) v[7]) << 56); + } +#endif + + for (i = 0; i < 25; i++) { + even[i] = shuffle_even(st[i]); + odd[i] = shuffle_odd(st[i]);; + } + + // actual iteration + for (r = 0; r < KECCAKF_ROUNDS; r++) { + // Theta + for (i = 0; i < 5; i++) { + bc_even[i] = even[i] ^ even[i + 5] ^ even[i + 10] ^ even[i + 15] ^ even[i + 20]; + bc_odd[i] = odd[i] ^ odd[i + 5] ^ odd[i + 10] ^ odd[i + 15] ^ odd[i + 20]; + } + + // Chi + for (i = 0; i < 5; i++) { + + /* Note that we are rotating by 1. In this case we only care about + * "odd" bits. */ + uint32_t rot32 = ROTL32(bc_odd[(i + 1) % 5], 1); + t1 = bc_even[(i + 4) % 5] ^ rot32; + t2 = bc_odd[(i + 4) % 5] ^ bc_even[(i + 1) % 5]; + + for (j = 0; j < 25; j += 5) { + even[j + i] ^= t1; + odd[j + i] ^= t2; + } + } + + // Rho Pi + t1 = even[1]; t2 = odd[1]; + for (i = 0; i < 24; i++) { + j = keccakf_piln[i]; + bc_even[0] = even[j]; bc_odd[0] = odd[j]; + + int half = keccakf_rotc[i] >> 1; + if (keccakf_rotc[i]&1) { + // U0 = ROT32(U1, tau) + odd[j] = ROTL32(t1, half); + // U1 = ROT32(U0, tau + 1) + even[j] = ROTL32(t2, half + 1); + } else { + // U0 = ROT32(U0, tau) + odd[j] = ROTL32(t2, half); + // U1 = ROT32(U1, tau) + even[j] = ROTL32(t1, half); + } + + t1 = bc_even[0]; t2 = bc_odd[0]; + } + + // Chi + for (j = 0; j < 25; j += 5) { + for (i = 0; i < 5; i++) { + bc_even[i] = even[j + i]; + bc_odd[i] = odd[j + i]; + } + for (i = 0; i < 5; i++) { + even[j + i] ^= (~bc_even[(i + 1) % 5]) & bc_even[(i + 2) % 5]; + odd[j + i] ^= (~bc_odd[(i + 1) % 5]) & bc_odd[(i + 2) % 5]; + } + } + + // Iota (can be precomputed) + even[0] ^= shuffle_even(keccakf_rndc[r]); + odd[0] ^= shuffle_odd(keccakf_rndc[r]); + } + + for (i = 0; i < 25; i++) { + st[i] = unshuffle(even[i], odd[i]); + } + +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ + // endianess conversion. this is redundant on little-endian targets + for (i = 0; i < 25; i++) { + v = (uint8_t *) &st[i]; + t = st[i]; + v[0] = t & 0xFF; + v[1] = (t >> 8) & 0xFF; + v[2] = (t >> 16) & 0xFF; + v[3] = (t >> 24) & 0xFF; + v[4] = (t >> 32) & 0xFF; + v[5] = (t >> 40) & 0xFF; + v[6] = (t >> 48) & 0xFF; + v[7] = (t >> 56) & 0xFF; + } +#endif +} + +#ifdef BIT_INTERLEAVING +#define KECCAK_F sha3_keccakf_bi +#else +#define KECCAK_F sha3_keccakf +#endif + // Initialize the context for SHA3 int sha3_init(sha3_ctx_t *c, int mdlen) diff --git a/sha3.h b/sha3.h index ba24f43..c75adf0 100644 --- a/sha3.h +++ b/sha3.h @@ -15,6 +15,10 @@ #define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) #endif +#ifndef ROTL32 +#define ROTL32(x, y) (((x) << (y)) | ((x) >> (32 - (y)))) +#endif + // state context typedef struct { union { // state: