pqc/crypto_sign/sphincs-haraka-256s-robust/aesni/thash_haraka_robustx4.c
Thom Wiggers a655ec8a9d SPHINCS+ optimized implementations (#253)
* Add state destroy to SHA2 API

* Include optimized SPHINCS+ implementations

I've generated new implementations from the sphincsplus repository.

* Don't destroy sha256ctx after finalize

* Attempt to shut up MSVC

* Make sure to drop errors in rmtree
2019-12-12 14:29:29 +01:00

93 lines
8.4 KiB
C

#include <stdint.h>
#include <string.h>
#include "address.h"
#include "haraka.h"
#include "params.h"
#include "thashx4.h"
/**
* 4-way parallel version of thash; takes 4x as much input and output
*/
#define thashx4_variant(name, inblocks) \
void PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_thashx4_##name(unsigned char *out0, unsigned char *out1, unsigned char *out2, \
unsigned char *out3, const unsigned char *in0, \
const unsigned char *in1, const unsigned char *in2, \
const unsigned char *in3, const unsigned char *pub_seed, \
uint32_t addrx4[4 * 8], const harakactx *state) { \
unsigned char buf0[PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_N]; \
unsigned char buf1[PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_N]; \
unsigned char buf2[PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_N]; \
unsigned char buf3[PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_N]; \
unsigned char bitmask0[(inblocks)*PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_N]; \
unsigned char bitmask1[(inblocks)*PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_N]; \
unsigned char bitmask2[(inblocks)*PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_N]; \
unsigned char bitmask3[(inblocks)*PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_N]; \
unsigned char outbuf[32 * 4]; \
unsigned char buf_tmp[64 * 4]; \
unsigned int i; \
\
(void)pub_seed; /* Suppress an 'unused parameter' warning. */ \
\
if ((inblocks) == 1) { \
memset(buf_tmp, 0, 64 * 4); \
\
/* Generate masks first in buffer */ \
PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_addr_to_bytes(buf_tmp, addrx4 + 0 * 8); \
PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_addr_to_bytes(buf_tmp + 32, addrx4 + 1 * 8); \
PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_addr_to_bytes(buf_tmp + 64, addrx4 + 2 * 8); \
PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_addr_to_bytes(buf_tmp + 96, addrx4 + 3 * 8); \
\
PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_haraka256x4(outbuf, buf_tmp, state); \
\
/* move addresses to make room for inputs; zero old values */ \
memcpy(buf_tmp + 192, buf_tmp + 96, PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES); \
memcpy(buf_tmp + 128, buf_tmp + 64, PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES); \
memcpy(buf_tmp + 64, buf_tmp + 32, PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES); \
/* skip memcpy(buf_tmp, buf_tmp, PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES); already in place */ \
\
/* skip memset(buf_tmp, 0, PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES); remained untouched */ \
memset(buf_tmp + 32, 0, PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES); \
/* skip memset(buf_tmp + 64, 0, PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES); contains addr1 */ \
memset(buf_tmp + 96, 0, PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES); \
\
for (i = 0; i < PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_N; i++) { \
buf_tmp[PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES + i] = in0[i] ^ outbuf[i]; \
buf_tmp[PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES + i + 64] = in1[i] ^ outbuf[i + 32]; \
buf_tmp[PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES + i + 128] = in2[i] ^ outbuf[i + 64]; \
buf_tmp[PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES + i + 192] = in3[i] ^ outbuf[i + 96]; \
} \
\
PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_haraka512x4(outbuf, buf_tmp, state); \
\
memcpy(out0, outbuf, PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_N); \
memcpy(out1, outbuf + 32, PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_N); \
memcpy(out2, outbuf + 64, PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_N); \
memcpy(out3, outbuf + 96, PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_N); \
} else { \
/* All other tweakable hashes*/ \
PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_addr_to_bytes(buf0, addrx4 + 0 * 8); \
PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_addr_to_bytes(buf1, addrx4 + 1 * 8); \
PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_addr_to_bytes(buf2, addrx4 + 2 * 8); \
PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_addr_to_bytes(buf3, addrx4 + 3 * 8); \
\
PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_haraka_Sx4(bitmask0, bitmask1, bitmask2, bitmask3, (inblocks)*PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_N, buf0, buf1, \
buf2, buf3, PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES, state); \
\
for (i = 0; i < (inblocks)*PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_N; i++) { \
buf0[PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES + i] = in0[i] ^ bitmask0[i]; \
buf1[PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES + i] = in1[i] ^ bitmask1[i]; \
buf2[PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES + i] = in2[i] ^ bitmask2[i]; \
buf3[PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES + i] = in3[i] ^ bitmask3[i]; \
} \
\
PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_haraka_Sx4(out0, out1, out2, out3, PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_N, buf0, buf1, buf2, buf3, \
PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_ADDR_BYTES + (inblocks)*PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_N, state); \
} \
}
thashx4_variant(1, 1)
thashx4_variant(2, 2)
thashx4_variant(WOTS_LEN, PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_WOTS_LEN)
thashx4_variant(FORS_TREES, PQCLEAN_SPHINCSHARAKA256SROBUST_AESNI_FORS_TREES)