From e948dbbac6c83dfe9df7d9c659840892a1a8a316 Mon Sep 17 00:00:00 2001 From: Kris Kwiatkowski Date: Thu, 22 May 2025 10:03:13 +0100 Subject: [PATCH 1/2] WIP --- sha3.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++----------- sha3.h | 4 +++ 2 files changed, 84 insertions(+), 19 deletions(-) diff --git a/sha3.c b/sha3.c index 931ae02..e359707 100644 --- a/sha3.c +++ b/sha3.c @@ -1,12 +1,36 @@ // sha3.c // 19-Nov-11 Markku-Juhani O. Saarinen +// 22-May-25 Kris Kwiatkowski // Revised 07-Aug-15 to match with official release of FIPS PUB 202 "SHA3" // Revised 03-Sep-15 for portability + OpenSSL - style API +// Revised 22-May-25 Added bit-interleaved implementation optimized for 32-bit architectures. #include "sha3.h" -// update the state with given number of rounds +// Interleave even and odd bits into one 64-bit line +uint64_t unshuffle(uint32_t even, uint32_t odd) { + uint64_t result = 0; + for (int i = 0; i < 32; i++) { + result |= ((uint64_t)(even >> i) & 1) << (2 * i); + result |= ((uint64_t)(odd >> i) & 1) << (2 * i + 1); + } + return result; +} + +uint32_t shuffle_even(uint64_t x) { + x &= 0x5555555555555555ULL; + x = (x | (x >> 1)) & 0x3333333333333333ULL; + x = (x | (x >> 2)) & 0x0F0F0F0F0F0F0F0FULL; + x = (x | (x >> 4)) & 0x00FF00FF00FF00FFULL; + x = (x | (x >> 8)) & 0x0000FFFF0000FFFFULL; + x = (x | (x >> 16)) & 0x00000000FFFFFFFFULL; + return (uint32_t)x; +} + +uint32_t shuffle_odd(uint64_t x) { + return shuffle_even(x >> 1); +} void sha3_keccakf(uint64_t st[25]) { @@ -32,7 +56,9 @@ void sha3_keccakf(uint64_t st[25]) // variables int i, j, r; - uint64_t t, bc[5]; + uint32_t t1, t2; + uint32_t even[25], odd[25]; + uint32_t bc_even[5], bc_odd[5]; #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ uint8_t *v; @@ -47,38 +73,73 @@ void sha3_keccakf(uint64_t st[25]) } #endif + for (i = 0; i < 25; i++) { + even[i] = shuffle_even(st[i]); + odd[i] = shuffle_odd(st[i]);; + } + // actual iteration for (r = 0; r < KECCAKF_ROUNDS; r++) { - // Theta - for (i = 0; i < 5; i++) - bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20]; - for (i = 0; i < 5; i++) { - t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1); - for (j = 0; j < 25; j += 5) - st[j + i] ^= t; + bc_even[i] = even[i] ^ even[i + 5] ^ even[i + 10] ^ even[i + 15] ^ even[i + 20]; + bc_odd[i] = odd[i] ^ odd[i + 5] ^ odd[i + 10] ^ odd[i + 15] ^ odd[i + 20]; + } + + // Chi + for (i = 0; i < 5; i++) { + + uint32_t rot32 = ROTL32(bc_odd[(i + 1) % 5], 1); + t1 = bc_even[(i + 4) % 5] ^ rot32; + t2 = bc_odd[(i + 4) % 5] ^ bc_even[(i + 1) % 5]; + + for (j = 0; j < 25; j += 5) { + even[j + i] ^= t1; + odd[j + i] ^= t2; + } } // Rho Pi - t = st[1]; + t1 = even[1]; t2 = odd[1]; for (i = 0; i < 24; i++) { j = keccakf_piln[i]; - bc[0] = st[j]; - st[j] = ROTL64(t, keccakf_rotc[i]); - t = bc[0]; + bc_even[0] = even[j]; bc_odd[0] = odd[j]; + + int half = keccakf_rotc[i] >> 1; + if (keccakf_rotc[i]&1) { + // U0 = ROT32(U1, tau) + odd[j] = ROTL32(t1, half); + // U1 = ROT32(U0, tau + 1) + even[j] = ROTL32(t2, half + 1); + } else { + // U0 = ROT32(U0, tau) + odd[j] = ROTL32(t2, half); + // U1 = ROT32(U1, tau) + even[j] = ROTL32(t1, half); + } + + t1 = bc_even[0]; t2 = bc_odd[0]; } // Chi for (j = 0; j < 25; j += 5) { - for (i = 0; i < 5; i++) - bc[i] = st[j + i]; - for (i = 0; i < 5; i++) - st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5]; + for (i = 0; i < 5; i++) { + bc_even[i] = even[j + i]; + bc_odd[i] = odd[j + i]; + } + for (i = 0; i < 5; i++) { + even[j + i] ^= (~bc_even[(i + 1) % 5]) & bc_even[(i + 2) % 5]; + odd[j + i] ^= (~bc_odd[(i + 1) % 5]) & bc_odd[(i + 2) % 5]; + } } - // Iota - st[0] ^= keccakf_rndc[r]; + // Iota (can be precomputed) + even[0] ^= shuffle_even(keccakf_rndc[r]); + odd[0] ^= shuffle_odd(keccakf_rndc[r]); + } + + for (i = 0; i < 25; i++) { + st[i] = unshuffle(even[i], odd[i]); } #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ diff --git a/sha3.h b/sha3.h index ba24f43..c75adf0 100644 --- a/sha3.h +++ b/sha3.h @@ -15,6 +15,10 @@ #define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) #endif +#ifndef ROTL32 +#define ROTL32(x, y) (((x) << (y)) | ((x) >> (32 - (y)))) +#endif + // state context typedef struct { union { // state: From 77ba73658bf0aac9147cf3a9ba482195a373ad33 Mon Sep 17 00:00:00 2001 From: Kris Kwiatkowski Date: Thu, 22 May 2025 10:13:29 +0100 Subject: [PATCH 2/2] Implements bit interleaving --- Makefile | 4 +-- sha3.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 13ba925..471e7ac 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ OBJS = sha3.o main.o DIST = tiny_sha3 CC = gcc -CFLAGS = -Wall -O3 +CFLAGS = -Wall -O3 -DBIT_INTERLEAVING LIBS = LDFLAGS = INCLUDES = @@ -18,7 +18,7 @@ $(BINARY): $(OBJS) $(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@ clean: - rm -rf $(DIST)-*.txz $(OBJS) $(BINARY) *~ + rm -rf $(DIST)-*.txz $(OBJS) $(BINARY) *~ dist: clean cd ..; \ diff --git a/sha3.c b/sha3.c index e359707..f5b66cf 100644 --- a/sha3.c +++ b/sha3.c @@ -18,6 +18,9 @@ uint64_t unshuffle(uint32_t even, uint32_t odd) { return result; } +/* Get 32 bits from 'x' located on even possitions. + * Example: Assuming x={1,0,1,0,1,0} and index of first + * bit start from 0. This function returns x={0,0,0}. */ uint32_t shuffle_even(uint64_t x) { x &= 0x5555555555555555ULL; x = (x | (x >> 1)) & 0x3333333333333333ULL; @@ -28,6 +31,9 @@ uint32_t shuffle_even(uint64_t x) { return (uint32_t)x; } +/* Get 32 bits from 'x' located on even possitions. + * Example: Assuming x={1,0,1,0,1,0} and index of first + * bit start from 0. This function returns x={1,1,1}. */ uint32_t shuffle_odd(uint64_t x) { return shuffle_even(x >> 1); } @@ -54,6 +60,96 @@ void sha3_keccakf(uint64_t st[25]) 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 }; + // variables + int i, j, r; + uint64_t t, bc[5]; + +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ + uint8_t *v; + + // endianess conversion. this is redundant on little-endian targets + for (i = 0; i < 25; i++) { + v = (uint8_t *) &st[i]; + st[i] = ((uint64_t) v[0]) | (((uint64_t) v[1]) << 8) | + (((uint64_t) v[2]) << 16) | (((uint64_t) v[3]) << 24) | + (((uint64_t) v[4]) << 32) | (((uint64_t) v[5]) << 40) | + (((uint64_t) v[6]) << 48) | (((uint64_t) v[7]) << 56); + } +#endif + + // actual iteration + for (r = 0; r < KECCAKF_ROUNDS; r++) { + + // Theta + for (i = 0; i < 5; i++) + bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20]; + + for (i = 0; i < 5; i++) { + t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1); + for (j = 0; j < 25; j += 5) + st[j + i] ^= t; + } + + // Rho Pi + t = st[1]; + for (i = 0; i < 24; i++) { + j = keccakf_piln[i]; + bc[0] = st[j]; + st[j] = ROTL64(t, keccakf_rotc[i]); + t = bc[0]; + } + + // Chi + for (j = 0; j < 25; j += 5) { + for (i = 0; i < 5; i++) + bc[i] = st[j + i]; + for (i = 0; i < 5; i++) + st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5]; + } + + // Iota + st[0] ^= keccakf_rndc[r]; + } + +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ + // endianess conversion. this is redundant on little-endian targets + for (i = 0; i < 25; i++) { + v = (uint8_t *) &st[i]; + t = st[i]; + v[0] = t & 0xFF; + v[1] = (t >> 8) & 0xFF; + v[2] = (t >> 16) & 0xFF; + v[3] = (t >> 24) & 0xFF; + v[4] = (t >> 32) & 0xFF; + v[5] = (t >> 40) & 0xFF; + v[6] = (t >> 48) & 0xFF; + v[7] = (t >> 56) & 0xFF; + } +#endif +} + +void sha3_keccakf_bi(uint64_t st[25]) +{ + // constants + const uint64_t keccakf_rndc[24] = { + 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, + 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, + 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, + 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, + 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, + 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, + 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, + 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 + }; + const int keccakf_rotc[24] = { + 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14, + 27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44 + }; + const int keccakf_piln[24] = { + 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, + 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 + }; + // variables int i, j, r; uint32_t t1, t2; @@ -89,6 +185,8 @@ void sha3_keccakf(uint64_t st[25]) // Chi for (i = 0; i < 5; i++) { + /* Note that we are rotating by 1. In this case we only care about + * "odd" bits. */ uint32_t rot32 = ROTL32(bc_odd[(i + 1) % 5], 1); t1 = bc_even[(i + 4) % 5] ^ rot32; t2 = bc_odd[(i + 4) % 5] ^ bc_even[(i + 1) % 5]; @@ -159,6 +257,12 @@ void sha3_keccakf(uint64_t st[25]) #endif } +#ifdef BIT_INTERLEAVING +#define KECCAK_F sha3_keccakf_bi +#else +#define KECCAK_F sha3_keccakf +#endif + // Initialize the context for SHA3 int sha3_init(sha3_ctx_t *c, int mdlen)