pqc/crypto_kem/mceliece348864f/avx/util.c
Thom Wiggers b3f9d4f8d6
Classic McEliece (#259)
* Add McEliece reference implementations

* Add Vec implementations of McEliece

* Add sse implementations

* Add AVX2 implementations

* Get rid of stuff not supported by Mac ABI

* restrict to two cores

* Ditch .data files

* Remove .hidden from all .S files

* speed up duplicate consistency tests by batching

* make cpuinfo more robust

* Hope to stabilize macos cpuinfo without ccache

* Revert "Hope to stabilize macos cpuinfo without ccache"

This reverts commit 6129c3cabe1abbc8b956bc87e902a698e32bf322.

* Just hardcode what's available at travis

* Fixed-size types in api.h

* namespace all header files in mceliece

* Ditch operations.h

* Get rid of static inline functions

* fixup! Ditch operations.h
2020-02-05 13:09:56 +01:00

107 lines
2.5 KiB
C

/*
This file is for loading/storing data in a little-endian fashion
*/
#include "util.h"
void PQCLEAN_MCELIECE348864F_AVX_store_i(unsigned char *out, uint64_t in, int i) {
int j;
for (j = 0; j < i; j++) {
out[j] = (in >> (j * 8)) & 0xFF;
}
}
void PQCLEAN_MCELIECE348864F_AVX_store2(unsigned char *dest, gf a) {
dest[0] = a & 0xFF;
dest[1] = a >> 8;
}
uint16_t PQCLEAN_MCELIECE348864F_AVX_load2(const unsigned char *src) {
uint16_t a;
a = src[1];
a <<= 8;
a |= src[0];
return a & GFMASK;
}
uint32_t PQCLEAN_MCELIECE348864F_AVX_load4(const unsigned char *src) {
uint32_t a;
a = src[3];
a <<= 8;
a |= src[2];
a <<= 8;
a |= src[1];
a <<= 8;
a |= src[0];
return a;
}
void PQCLEAN_MCELIECE348864F_AVX_irr_load(uint64_t *out, const unsigned char *in) {
int i, j;
uint16_t irr[ SYS_T + 1 ];
for (i = 0; i < SYS_T; i++) {
irr[i] = PQCLEAN_MCELIECE348864F_AVX_load2(in + i * 2);
irr[i] &= GFMASK;
}
irr[ SYS_T ] = 1;
for (i = 0; i < GFBITS; i++) {
out[i] = 0;
}
for (i = SYS_T; i >= 0; i--) {
for (j = 0; j < GFBITS; j++) {
out[j] <<= 1;
out[j] |= (irr[i] >> j) & 1;
}
}
}
void PQCLEAN_MCELIECE348864F_AVX_store8(unsigned char *out, uint64_t in) {
out[0] = (in >> 0x00) & 0xFF;
out[1] = (in >> 0x08) & 0xFF;
out[2] = (in >> 0x10) & 0xFF;
out[3] = (in >> 0x18) & 0xFF;
out[4] = (in >> 0x20) & 0xFF;
out[5] = (in >> 0x28) & 0xFF;
out[6] = (in >> 0x30) & 0xFF;
out[7] = (in >> 0x38) & 0xFF;
}
uint64_t PQCLEAN_MCELIECE348864F_AVX_load8(const unsigned char *in) {
int i;
uint64_t ret = in[7];
for (i = 6; i >= 0; i--) {
ret <<= 8;
ret |= in[i];
}
return ret;
}
gf PQCLEAN_MCELIECE348864F_AVX_bitrev(gf a) {
a = ((a & 0x00FF) << 8) | ((a & 0xFF00) >> 8);
a = ((a & 0x0F0F) << 4) | ((a & 0xF0F0) >> 4);
a = ((a & 0x3333) << 2) | ((a & 0xCCCC) >> 2);
a = ((a & 0x5555) << 1) | ((a & 0xAAAA) >> 1);
return a >> 4;
}
vec128 PQCLEAN_MCELIECE348864F_AVX_load16(const unsigned char *in) {
return PQCLEAN_MCELIECE348864F_AVX_vec128_set2x( PQCLEAN_MCELIECE348864F_AVX_load8(in), PQCLEAN_MCELIECE348864F_AVX_load8(in + 8) );
}
void PQCLEAN_MCELIECE348864F_AVX_store16(unsigned char *out, vec128 in) {
PQCLEAN_MCELIECE348864F_AVX_store8(out + 0, PQCLEAN_MCELIECE348864F_AVX_vec128_extract(in, 0));
PQCLEAN_MCELIECE348864F_AVX_store8(out + 8, PQCLEAN_MCELIECE348864F_AVX_vec128_extract(in, 1));
}