ac2c20045c
* Add McEliece reference implementations * Add Vec implementations of McEliece * Add sse implementations * Add AVX2 implementations * Get rid of stuff not supported by Mac ABI * restrict to two cores * Ditch .data files * Remove .hidden from all .S files * speed up duplicate consistency tests by batching * make cpuinfo more robust * Hope to stabilize macos cpuinfo without ccache * Revert "Hope to stabilize macos cpuinfo without ccache" This reverts commit 6129c3cabe1abbc8b956bc87e902a698e32bf322. * Just hardcode what's available at travis * Fixed-size types in api.h * namespace all header files in mceliece * Ditch operations.h * Get rid of static inline functions * fixup! Ditch operations.h
36 lines
1019 B
C
36 lines
1019 B
C
#include "transpose.h"
|
|
|
|
/* input: in, a 64x64 matrix over GF(2) */
|
|
/* output: out, transpose of in */
|
|
void PQCLEAN_MCELIECE6688128_VEC_transpose_64x64(uint64_t *out, const uint64_t *in) {
|
|
int i, j, s, d;
|
|
|
|
uint64_t x, y;
|
|
uint64_t masks[6][2] = {
|
|
{0x5555555555555555, 0xAAAAAAAAAAAAAAAA},
|
|
{0x3333333333333333, 0xCCCCCCCCCCCCCCCC},
|
|
{0x0F0F0F0F0F0F0F0F, 0xF0F0F0F0F0F0F0F0},
|
|
{0x00FF00FF00FF00FF, 0xFF00FF00FF00FF00},
|
|
{0x0000FFFF0000FFFF, 0xFFFF0000FFFF0000},
|
|
{0x00000000FFFFFFFF, 0xFFFFFFFF00000000}
|
|
};
|
|
|
|
for (i = 0; i < 64; i++) {
|
|
out[i] = in[i];
|
|
}
|
|
|
|
for (d = 5; d >= 0; d--) {
|
|
s = 1 << d;
|
|
|
|
for (i = 0; i < 64; i += s * 2) {
|
|
for (j = i; j < i + s; j++) {
|
|
x = (out[j] & masks[d][0]) | ((out[j + s] & masks[d][0]) << s);
|
|
y = ((out[j] & masks[d][1]) >> s) | (out[j + s] & masks[d][1]);
|
|
|
|
out[j + 0] = x;
|
|
out[j + s] = y;
|
|
}
|
|
}
|
|
}
|
|
}
|