/* This file is for Benes network related functions */ #include "benes.h" #include "params.h" #include "transpose.h" #include "util.h" static void layer_0(uint64_t *bs, const uint64_t *cond) { int x; uint64_t diff; for (x = 0; x < (1 << 6); x += 2) { diff = bs[ x ] ^ bs[ x + 1 ]; diff &= *cond++; bs[ x ] ^= diff; bs[ x + 1 ] ^= diff; } } static void layer_1(uint64_t *bs, const uint64_t *cond) { int x; uint64_t diff; for (x = 0; x < (1 << 6); x += 4) { diff = bs[ x + 0 ] ^ bs[ x + 2 ]; diff &= cond[0]; bs[ x + 0 ] ^= diff; bs[ x + 2 ] ^= diff; diff = bs[ x + 1 ] ^ bs[ x + 3 ]; diff &= cond[1]; bs[ x + 1 ] ^= diff; bs[ x + 3 ] ^= diff; cond += 2; } } static void layer_2(uint64_t *bs, const uint64_t *cond) { int x; uint64_t diff; for (x = 0; x < (1 << 6); x += 8) { diff = bs[ x + 0 ] ^ bs[ x + 4 ]; diff &= cond[0]; bs[ x + 0 ] ^= diff; bs[ x + 4 ] ^= diff; diff = bs[ x + 1 ] ^ bs[ x + 5 ]; diff &= cond[1]; bs[ x + 1 ] ^= diff; bs[ x + 5 ] ^= diff; diff = bs[ x + 2 ] ^ bs[ x + 6 ]; diff &= cond[2]; bs[ x + 2 ] ^= diff; bs[ x + 6 ] ^= diff; diff = bs[ x + 3 ] ^ bs[ x + 7 ]; diff &= cond[3]; bs[ x + 3 ] ^= diff; bs[ x + 7 ] ^= diff; cond += 4; } } static void layer_3(uint64_t *bs, const uint64_t *cond) { int x, s; uint64_t diff; for (x = 0; x < (1 << 6); x += 16) { for (s = x; s < x + 8; s += 4) { diff = bs[ s + 0 ] ^ bs[ s + 8 ]; diff &= cond[0]; bs[ s + 0 ] ^= diff; bs[ s + 8 ] ^= diff; diff = bs[ s + 1 ] ^ bs[ s + 9 ]; diff &= cond[1]; bs[ s + 1 ] ^= diff; bs[ s + 9 ] ^= diff; diff = bs[ s + 2 ] ^ bs[ s + 10 ]; diff &= cond[2]; bs[ s + 2 ] ^= diff; bs[ s + 10 ] ^= diff; diff = bs[ s + 3 ] ^ bs[ s + 11 ]; diff &= cond[3]; bs[ s + 3 ] ^= diff; bs[ s + 11 ] ^= diff; cond += 4; } } } static void layer_4(uint64_t *bs, const uint64_t *cond) { int x, s; uint64_t diff; for (x = 0; x < (1 << 6); x += 32) { for (s = x; s < x + 16; s += 4) { diff = bs[ s + 0 ] ^ bs[ s + 16 ]; diff &= cond[0]; bs[ s + 0 ] ^= diff; bs[ s + 16 ] ^= diff; diff = bs[ s + 1 ] ^ bs[ s + 17 ]; diff &= cond[1]; bs[ s + 1 ] ^= diff; bs[ s + 17 ] ^= diff; diff = bs[ s + 2 ] ^ bs[ s + 18 ]; diff &= cond[2]; bs[ s + 2 ] ^= diff; bs[ s + 18 ] ^= diff; diff = bs[ s + 3 ] ^ bs[ s + 19 ]; diff &= cond[3]; bs[ s + 3 ] ^= diff; bs[ s + 19 ] ^= diff; cond += 4; } } } static void layer_5(uint64_t *bs, const uint64_t *cond) { int x, s; uint64_t diff; for (x = 0; x < (1 << 6); x += 64) { for (s = x; s < x + 32; s += 4) { diff = bs[ s + 0 ] ^ bs[ s + 32 ]; diff &= cond[0]; bs[ s + 0 ] ^= diff; bs[ s + 32 ] ^= diff; diff = bs[ s + 1 ] ^ bs[ s + 33 ]; diff &= cond[1]; bs[ s + 1 ] ^= diff; bs[ s + 33 ] ^= diff; diff = bs[ s + 2 ] ^ bs[ s + 34 ]; diff &= cond[2]; bs[ s + 2 ] ^= diff; bs[ s + 34 ] ^= diff; diff = bs[ s + 3 ] ^ bs[ s + 35 ]; diff &= cond[3]; bs[ s + 3 ] ^= diff; bs[ s + 35 ] ^= diff; cond += 4; } } } /* input: bits, control bits as array of bytes */ /* output: out, control bits as array of 128-bit vectors */ void PQCLEAN_MCELIECE348864F_AVX_load_bits(uint64_t out[][32], const unsigned char *bits) { int i, low, block = 0; uint64_t cond[64]; // for (low = 0; low <= 5; low++) { for (i = 0; i < 64; i++) { cond[i] = PQCLEAN_MCELIECE348864F_AVX_load4(bits + block * 256 + i * 4); } PQCLEAN_MCELIECE348864F_AVX_transpose_64x64(cond); for (i = 0; i < 32; i++) { out[ block ][i] = cond[i]; } block++; } for (low = 0; low <= 5; low++) { for (i = 0; i < 32; i++) { out[ block ][i] = PQCLEAN_MCELIECE348864F_AVX_load8(bits + block * 256 + i * 8); } block++; } for (low = 4; low >= 0; low--) { for (i = 0; i < 32; i++) { out[ block ][i] = PQCLEAN_MCELIECE348864F_AVX_load8(bits + block * 256 + i * 8); } block++; } for (low = 5; low >= 0; low--) { for (i = 0; i < 64; i++) { cond[i] = PQCLEAN_MCELIECE348864F_AVX_load4(bits + block * 256 + i * 4); } PQCLEAN_MCELIECE348864F_AVX_transpose_64x64(cond); for (i = 0; i < 32; i++) { out[ block ][i] = cond[i]; } block++; } } /* input: r, sequence of bits to be permuted */ /* cond, control bits as array of 128-bit vectors */ /* rev, 0 for normal application; !0 for inverse */ /* output: r, permuted bits */ void PQCLEAN_MCELIECE348864F_AVX_benes(uint64_t *r, uint64_t cond[][32], int rev) { int block, inc; uint64_t *bs = r; // if (rev == 0) { block = 0; inc = 1; } else { block = 22; inc = -1; } PQCLEAN_MCELIECE348864F_AVX_transpose_64x64(bs); layer_0(bs, cond[ block ]); block += inc; layer_1(bs, cond[ block ]); block += inc; layer_2(bs, cond[ block ]); block += inc; layer_3(bs, cond[ block ]); block += inc; layer_4(bs, cond[ block ]); block += inc; layer_5(bs, cond[ block ]); block += inc; PQCLEAN_MCELIECE348864F_AVX_transpose_64x64(bs); layer_0(bs, cond[ block ]); block += inc; layer_1(bs, cond[ block ]); block += inc; layer_2(bs, cond[ block ]); block += inc; layer_3(bs, cond[ block ]); block += inc; layer_4(bs, cond[ block ]); block += inc; layer_5(bs, cond[ block ]); block += inc; layer_4(bs, cond[ block ]); block += inc; layer_3(bs, cond[ block ]); block += inc; layer_2(bs, cond[ block ]); block += inc; layer_1(bs, cond[ block ]); block += inc; layer_0(bs, cond[ block ]); block += inc; PQCLEAN_MCELIECE348864F_AVX_transpose_64x64(bs); layer_5(bs, cond[ block ]); block += inc; layer_4(bs, cond[ block ]); block += inc; layer_3(bs, cond[ block ]); block += inc; layer_2(bs, cond[ block ]); block += inc; layer_1(bs, cond[ block ]); block += inc; layer_0(bs, cond[ block ]); //block += inc; PQCLEAN_MCELIECE348864F_AVX_transpose_64x64(bs); }