diff --git a/sha3.c b/sha3.c index 931ae02..e359707 100644 --- a/sha3.c +++ b/sha3.c @@ -1,12 +1,36 @@ // sha3.c // 19-Nov-11 Markku-Juhani O. Saarinen +// 22-May-25 Kris Kwiatkowski // Revised 07-Aug-15 to match with official release of FIPS PUB 202 "SHA3" // Revised 03-Sep-15 for portability + OpenSSL - style API +// Revised 22-May-25 Added bit-interleaved implementation optimized for 32-bit architectures. #include "sha3.h" -// update the state with given number of rounds +// Interleave even and odd bits into one 64-bit line +uint64_t unshuffle(uint32_t even, uint32_t odd) { + uint64_t result = 0; + for (int i = 0; i < 32; i++) { + result |= ((uint64_t)(even >> i) & 1) << (2 * i); + result |= ((uint64_t)(odd >> i) & 1) << (2 * i + 1); + } + return result; +} + +uint32_t shuffle_even(uint64_t x) { + x &= 0x5555555555555555ULL; + x = (x | (x >> 1)) & 0x3333333333333333ULL; + x = (x | (x >> 2)) & 0x0F0F0F0F0F0F0F0FULL; + x = (x | (x >> 4)) & 0x00FF00FF00FF00FFULL; + x = (x | (x >> 8)) & 0x0000FFFF0000FFFFULL; + x = (x | (x >> 16)) & 0x00000000FFFFFFFFULL; + return (uint32_t)x; +} + +uint32_t shuffle_odd(uint64_t x) { + return shuffle_even(x >> 1); +} void sha3_keccakf(uint64_t st[25]) { @@ -32,7 +56,9 @@ void sha3_keccakf(uint64_t st[25]) // variables int i, j, r; - uint64_t t, bc[5]; + uint32_t t1, t2; + uint32_t even[25], odd[25]; + uint32_t bc_even[5], bc_odd[5]; #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ uint8_t *v; @@ -47,38 +73,73 @@ void sha3_keccakf(uint64_t st[25]) } #endif + for (i = 0; i < 25; i++) { + even[i] = shuffle_even(st[i]); + odd[i] = shuffle_odd(st[i]);; + } + // actual iteration for (r = 0; r < KECCAKF_ROUNDS; r++) { - // Theta - for (i = 0; i < 5; i++) - bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20]; - for (i = 0; i < 5; i++) { - t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1); - for (j = 0; j < 25; j += 5) - st[j + i] ^= t; + bc_even[i] = even[i] ^ even[i + 5] ^ even[i + 10] ^ even[i + 15] ^ even[i + 20]; + bc_odd[i] = odd[i] ^ odd[i + 5] ^ odd[i + 10] ^ odd[i + 15] ^ odd[i + 20]; + } + + // Chi + for (i = 0; i < 5; i++) { + + uint32_t rot32 = ROTL32(bc_odd[(i + 1) % 5], 1); + t1 = bc_even[(i + 4) % 5] ^ rot32; + t2 = bc_odd[(i + 4) % 5] ^ bc_even[(i + 1) % 5]; + + for (j = 0; j < 25; j += 5) { + even[j + i] ^= t1; + odd[j + i] ^= t2; + } } // Rho Pi - t = st[1]; + t1 = even[1]; t2 = odd[1]; for (i = 0; i < 24; i++) { j = keccakf_piln[i]; - bc[0] = st[j]; - st[j] = ROTL64(t, keccakf_rotc[i]); - t = bc[0]; + bc_even[0] = even[j]; bc_odd[0] = odd[j]; + + int half = keccakf_rotc[i] >> 1; + if (keccakf_rotc[i]&1) { + // U0 = ROT32(U1, tau) + odd[j] = ROTL32(t1, half); + // U1 = ROT32(U0, tau + 1) + even[j] = ROTL32(t2, half + 1); + } else { + // U0 = ROT32(U0, tau) + odd[j] = ROTL32(t2, half); + // U1 = ROT32(U1, tau) + even[j] = ROTL32(t1, half); + } + + t1 = bc_even[0]; t2 = bc_odd[0]; } // Chi for (j = 0; j < 25; j += 5) { - for (i = 0; i < 5; i++) - bc[i] = st[j + i]; - for (i = 0; i < 5; i++) - st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5]; + for (i = 0; i < 5; i++) { + bc_even[i] = even[j + i]; + bc_odd[i] = odd[j + i]; + } + for (i = 0; i < 5; i++) { + even[j + i] ^= (~bc_even[(i + 1) % 5]) & bc_even[(i + 2) % 5]; + odd[j + i] ^= (~bc_odd[(i + 1) % 5]) & bc_odd[(i + 2) % 5]; + } } - // Iota - st[0] ^= keccakf_rndc[r]; + // Iota (can be precomputed) + even[0] ^= shuffle_even(keccakf_rndc[r]); + odd[0] ^= shuffle_odd(keccakf_rndc[r]); + } + + for (i = 0; i < 25; i++) { + st[i] = unshuffle(even[i], odd[i]); } #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ diff --git a/sha3.h b/sha3.h index ba24f43..c75adf0 100644 --- a/sha3.h +++ b/sha3.h @@ -15,6 +15,10 @@ #define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) #endif +#ifndef ROTL32 +#define ROTL32(x, y) (((x) << (y)) | ((x) >> (32 - (y)))) +#endif + // state context typedef struct { union { // state: