@@ -17,4 +17,13 @@ auxiliary-submitters: | |||
- Damien Stehlé | |||
implementations: | |||
- name: clean | |||
version: https://github.com/pq-crystals/dilithium/commit/40f79645879b5c69835cd91d06945d7c24f39922 | |||
version: https://github.com/pq-crystals/dilithium/commit/c1b40fd599e71f65aa18be64dd6c3fc8e84b0c08 | |||
- name: avx2 | |||
version: https://github.com/pq-crystals/dilithium/commit/c1b40fd599e71f65aa18be64dd6c3fc8e84b0c08 | |||
supported_platforms: | |||
- architecture: x86_64 | |||
operating_systems: | |||
- Linux | |||
required_flags: | |||
- avx2 | |||
- bmi2 |
@@ -0,0 +1,6 @@ | |||
Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/) | |||
For Keccak and the random number generator | |||
we are using public-domain code from sources | |||
and by authors listed in comments on top of | |||
the respective files. |
@@ -0,0 +1,43 @@ | |||
# This Makefile can be used with GNU Make or BSD Make | |||
LIB=libdilithium2_avx2.a | |||
SOURCES = fips202x4.c invntt.s nttconsts.c ntt.s packing.c pointwise.S poly.c \ | |||
polyvec.c reduce.s rejsample.c rounding.c sign.c stream.c | |||
OBJECTS = fips202x4.o invntt.o nttconsts.o ntt.o packing.o pointwise.o poly.o \ | |||
polyvec.o reduce.o rejsample.o rounding.o sign.o stream.o | |||
HEADERS = alignment.h api.h params.h sign.h polyvec.h poly.h packing.h ntt.h \ | |||
nttconsts.h reduce.h rounding.h rejsample.h symmetric.h stream.h \ | |||
fips202x4.h shuffle.inc | |||
CFLAGS=-O3 -Wall -Wextra -Wpedantic -Wvla -Werror \ | |||
-Wmissing-prototypes -Wredundant-decls -std=c99 \ | |||
-Wcast-align \ | |||
-mavx2 -mbmi -mpopcnt -I../../../common $(EXTRAFLAGS) | |||
all: $(LIB) | |||
KECCAK4XDIR=../../../common/keccak4x | |||
KECCAK4XOBJ=KeccakP-1600-times4-SIMD256.o | |||
KECCAK4X=$(KECCAK4XDIR)/$(KECCAK4XOBJ) | |||
%.o: %.c $(HEADERS) | |||
$(CC) $(CFLAGS) -c -o $@ $< | |||
%.o: %.s $(HEADERS) | |||
$(AS) -o $@ $< | |||
%.o: %.S $(HEADERS) | |||
$(AS) -c -o $@ $< | |||
$(LIB): $(OBJECTS) $(KECCAK4X) | |||
$(AR) -r $@ $^ | |||
$(KECCAK4X): | |||
$(MAKE) -C $(KECCAK4XDIR) $(KECCAK4XOBJ) | |||
clean: | |||
$(RM) $(OBJECTS) | |||
$(RM) $(LIB) | |||
$(MAKE) -C $(KECCAK4XDIR) clean | |||
@@ -0,0 +1,22 @@ | |||
#ifndef PQCLEAN_DILITHIUM2_AVX2_ALIGNMENT_H | |||
#define PQCLEAN_DILITHIUM2_AVX2_ALIGNMENT_H | |||
#define ALIGNED_UINT8(N) \ | |||
union { \ | |||
uint32_t as_arr[N]; \ | |||
__m256i as_vec[(N)/32]; \ | |||
} | |||
#define ALIGNED_UINT32(N) \ | |||
union { \ | |||
uint32_t as_arr[N]; \ | |||
__m256i as_vec[(N)/8]; \ | |||
} | |||
#define ALIGNED_UINT64(N) \ | |||
union { \ | |||
uint64_t as_arr[N]; \ | |||
__m256i as_vec[(N)/8]; \ | |||
} | |||
#endif //PQCLEAN_DILITHIUM2_AVX2_ALIGNMENT_H |
@@ -0,0 +1,37 @@ | |||
#ifndef PQCLEAN_DILITHIUM2_AVX2_API_H | |||
#define PQCLEAN_DILITHIUM2_AVX2_API_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#define PQCLEAN_DILITHIUM2_AVX2_CRYPTO_PUBLICKEYBYTES 1184U | |||
#define PQCLEAN_DILITHIUM2_AVX2_CRYPTO_SECRETKEYBYTES 2800U | |||
#define PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES 2044U | |||
#define PQCLEAN_DILITHIUM2_AVX2_CRYPTO_ALGNAME "Dilithium2" | |||
int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_keypair( | |||
uint8_t *pk, uint8_t *sk); | |||
int PQCLEAN_DILITHIUM2_AVX2_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *msg, size_t len, | |||
const uint8_t *sk); | |||
int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, | |||
const uint8_t *pk); | |||
int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk); | |||
#endif |
@@ -0,0 +1,239 @@ | |||
#include <immintrin.h> | |||
#include <stdint.h> | |||
#include "fips202.h" | |||
#include "fips202x4.h" | |||
#include "params.h" | |||
#define NROUNDS 24 | |||
#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64 - (offset)))) | |||
static uint64_t load64(const uint8_t *x) { | |||
unsigned int i; | |||
uint64_t r = 0; | |||
for (i = 0; i < 8; ++i) { | |||
r |= (uint64_t)x[i] << 8 * i; | |||
} | |||
return r; | |||
} | |||
static void store64(uint8_t *x, uint64_t u) { | |||
unsigned int i; | |||
for (i = 0; i < 8; ++i) { | |||
x[i] = (uint8_t)(u >> 8 * i); | |||
} | |||
} | |||
/* Use implementation from the Keccak Code Package */ | |||
extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); | |||
#define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds | |||
static void keccak_absorb4x(__m256i *s, | |||
unsigned int r, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen, | |||
uint8_t p) { | |||
unsigned long long i; | |||
uint8_t t0[200]; | |||
uint8_t t1[200]; | |||
uint8_t t2[200]; | |||
uint8_t t3[200]; | |||
uint64_t *ss = (uint64_t *)s; | |||
for (i = 0; i < 25; ++i) { | |||
s[i] = _mm256_xor_si256(s[i], s[i]); | |||
} | |||
while (mlen >= r) { | |||
for (i = 0; i < r / 8; ++i) { | |||
ss[4 * i + 0] ^= load64(m0 + 8 * i); | |||
ss[4 * i + 1] ^= load64(m1 + 8 * i); | |||
ss[4 * i + 2] ^= load64(m2 + 8 * i); | |||
ss[4 * i + 3] ^= load64(m3 + 8 * i); | |||
} | |||
KeccakF1600_StatePermute4x(s); | |||
mlen -= r; | |||
m0 += r; | |||
m1 += r; | |||
m2 += r; | |||
m3 += r; | |||
} | |||
for (i = 0; i < r; ++i) { | |||
t0[i] = 0; | |||
t1[i] = 0; | |||
t2[i] = 0; | |||
t3[i] = 0; | |||
} | |||
for (i = 0; i < mlen; ++i) { | |||
t0[i] = m0[i]; | |||
t1[i] = m1[i]; | |||
t2[i] = m2[i]; | |||
t3[i] = m3[i]; | |||
} | |||
t0[i] = p; | |||
t1[i] = p; | |||
t2[i] = p; | |||
t3[i] = p; | |||
t0[r - 1] |= 128; | |||
t1[r - 1] |= 128; | |||
t2[r - 1] |= 128; | |||
t3[r - 1] |= 128; | |||
for (i = 0; i < r / 8; ++i) { | |||
ss[4 * i + 0] ^= load64(t0 + 8 * i); | |||
ss[4 * i + 1] ^= load64(t1 + 8 * i); | |||
ss[4 * i + 2] ^= load64(t2 + 8 * i); | |||
ss[4 * i + 3] ^= load64(t3 + 8 * i); | |||
} | |||
} | |||
static void keccak_squeezeblocks4x(uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long nblocks, | |||
unsigned int r, | |||
__m256i *s) { | |||
unsigned int i; | |||
uint64_t *ss = (uint64_t *)s; | |||
while (nblocks > 0) { | |||
KeccakF1600_StatePermute4x(s); | |||
for (i = 0; i < r / 8; ++i) { | |||
store64(h0 + 8 * i, ss[4 * i + 0]); | |||
store64(h1 + 8 * i, ss[4 * i + 1]); | |||
store64(h2 + 8 * i, ss[4 * i + 2]); | |||
store64(h3 + 8 * i, ss[4 * i + 3]); | |||
} | |||
h0 += r; | |||
h1 += r; | |||
h2 += r; | |||
h3 += r; | |||
--nblocks; | |||
} | |||
} | |||
void PQCLEAN_DILITHIUM2_AVX2_shake128_absorb4x( | |||
__m256i *s, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen) { | |||
keccak_absorb4x(s, SHAKE128_RATE, m0, m1, m2, m3, mlen, 0x1F); | |||
} | |||
void PQCLEAN_DILITHIUM2_AVX2_shake128_squeezeblocks4x( | |||
uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long nblocks, | |||
__m256i *s) { | |||
keccak_squeezeblocks4x(h0, h1, h2, h3, nblocks, SHAKE128_RATE, s); | |||
} | |||
void PQCLEAN_DILITHIUM2_AVX2_shake256_absorb4x( | |||
__m256i *s, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen) { | |||
keccak_absorb4x(s, SHAKE256_RATE, m0, m1, m2, m3, mlen, 0x1F); | |||
} | |||
void PQCLEAN_DILITHIUM2_AVX2_shake256_squeezeblocks4x( | |||
uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long nblocks, | |||
__m256i *s) { | |||
keccak_squeezeblocks4x(h0, h1, h2, h3, nblocks, SHAKE256_RATE, s); | |||
} | |||
void PQCLEAN_DILITHIUM2_AVX2_shake128_4x( | |||
uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long long hlen, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen) { | |||
unsigned int i; | |||
unsigned long nblocks = hlen / SHAKE128_RATE; | |||
uint8_t t[4][SHAKE128_RATE]; | |||
__m256i s[25]; | |||
PQCLEAN_DILITHIUM2_AVX2_shake128_absorb4x(s, m0, m1, m2, m3, mlen); | |||
PQCLEAN_DILITHIUM2_AVX2_shake128_squeezeblocks4x(h0, h1, h2, h3, nblocks, s); | |||
h0 += nblocks * SHAKE128_RATE; | |||
h1 += nblocks * SHAKE128_RATE; | |||
h2 += nblocks * SHAKE128_RATE; | |||
h3 += nblocks * SHAKE128_RATE; | |||
hlen -= nblocks * SHAKE128_RATE; | |||
if (hlen) { | |||
PQCLEAN_DILITHIUM2_AVX2_shake128_squeezeblocks4x(t[0], t[1], t[2], t[3], 1, s); | |||
for (i = 0; i < hlen; ++i) { | |||
h0[i] = t[0][i]; | |||
h1[i] = t[1][i]; | |||
h2[i] = t[2][i]; | |||
h3[i] = t[3][i]; | |||
} | |||
} | |||
} | |||
void PQCLEAN_DILITHIUM2_AVX2_shake256_4x( | |||
uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long long hlen, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen) { | |||
unsigned int i; | |||
unsigned long nblocks = hlen / SHAKE256_RATE; | |||
uint8_t t[4][SHAKE256_RATE]; | |||
__m256i s[25]; | |||
PQCLEAN_DILITHIUM2_AVX2_shake256_absorb4x(s, m0, m1, m2, m3, mlen); | |||
PQCLEAN_DILITHIUM2_AVX2_shake256_squeezeblocks4x(h0, h1, h2, h3, nblocks, s); | |||
h0 += nblocks * SHAKE256_RATE; | |||
h1 += nblocks * SHAKE256_RATE; | |||
h2 += nblocks * SHAKE256_RATE; | |||
h3 += nblocks * SHAKE256_RATE; | |||
hlen -= nblocks * SHAKE256_RATE; | |||
if (hlen) { | |||
PQCLEAN_DILITHIUM2_AVX2_shake256_squeezeblocks4x(t[0], t[1], t[2], t[3], 1, s); | |||
for (i = 0; i < hlen; ++i) { | |||
h0[i] = t[0][i]; | |||
h1[i] = t[1][i]; | |||
h2[i] = t[2][i]; | |||
h3[i] = t[3][i]; | |||
} | |||
} | |||
} |
@@ -0,0 +1,65 @@ | |||
#ifndef PQCLEAN_DILITHIUM2_AVX2_FIPS202X4_H | |||
#define PQCLEAN_DILITHIUM2_AVX2_FIPS202X4_H | |||
#include <immintrin.h> | |||
#include <stdint.h> | |||
#include "params.h" | |||
void PQCLEAN_DILITHIUM2_AVX2_shake128_absorb4x( | |||
__m256i *s, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen); | |||
void PQCLEAN_DILITHIUM2_AVX2_shake128_squeezeblocks4x( | |||
uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long nblocks, | |||
__m256i *s); | |||
void PQCLEAN_DILITHIUM2_AVX2_shake256_absorb4x( | |||
__m256i *s, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen); | |||
void PQCLEAN_DILITHIUM2_AVX2_shake256_squeezeblocks4x( | |||
uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long nblocks, | |||
__m256i *s); | |||
void PQCLEAN_DILITHIUM2_AVX2_shake128_4x( | |||
uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long long hlen, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen); | |||
void PQCLEAN_DILITHIUM2_AVX2_shake256_4x( | |||
uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long long hlen, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen); | |||
#endif |
@@ -0,0 +1,281 @@ | |||
.include "shuffle.inc" | |||
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3 z0=15,z1=3 | |||
vpaddd %ymm2,%ymm\l0,%ymm12 | |||
vpaddd %ymm2,%ymm\l1,%ymm13 | |||
vpaddd %ymm2,%ymm\l2,%ymm14 | |||
vpsubd %ymm\h0,%ymm12,%ymm12 | |||
vpsubd %ymm\h1,%ymm13,%ymm13 | |||
vpsubd %ymm\h2,%ymm14,%ymm14 | |||
vpmuludq %ymm\z0,%ymm12,%ymm12 | |||
vpmuludq %ymm\z0,%ymm13,%ymm13 | |||
vpaddd %ymm2,%ymm\l3,%ymm15 | |||
vpmuludq %ymm\z1,%ymm14,%ymm14 | |||
vpsubd %ymm\h3,%ymm15,%ymm15 | |||
vpaddd %ymm\l0,%ymm\h0,%ymm\l0 | |||
vpmuludq %ymm\z1,%ymm15,%ymm15 | |||
vpaddd %ymm\l1,%ymm\h1,%ymm\l1 | |||
vpaddd %ymm\l2,%ymm\h2,%ymm\l2 | |||
vpaddd %ymm\l3,%ymm\h3,%ymm\l3 | |||
vpmuludq %ymm0,%ymm12,%ymm\h0 | |||
vpmuludq %ymm0,%ymm13,%ymm\h1 | |||
vpmuludq %ymm0,%ymm14,%ymm\h2 | |||
vpmuludq %ymm0,%ymm15,%ymm\h3 | |||
vpmuludq %ymm1,%ymm\h0,%ymm\h0 | |||
vpmuludq %ymm1,%ymm\h1,%ymm\h1 | |||
vpmuludq %ymm1,%ymm\h2,%ymm\h2 | |||
vpmuludq %ymm1,%ymm\h3,%ymm\h3 | |||
vpaddq %ymm12,%ymm\h0,%ymm\h0 | |||
vpaddq %ymm13,%ymm\h1,%ymm\h1 | |||
vpaddq %ymm14,%ymm\h2,%ymm\h2 | |||
vpaddq %ymm15,%ymm\h3,%ymm\h3 | |||
vpsrlq $32,%ymm\h0,%ymm\h0 | |||
vpsrlq $32,%ymm\h1,%ymm\h1 | |||
vpsrlq $32,%ymm\h2,%ymm\h2 | |||
vpsrlq $32,%ymm\h3,%ymm\h3 | |||
.endm | |||
.global PQCLEAN_DILITHIUM2_AVX2_invntt_levels0t4_avx | |||
PQCLEAN_DILITHIUM2_AVX2_invntt_levels0t4_avx: | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xqinv(%rip),%ymm0 | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xq(%rip),%ymm1 | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8x256q(%rip),%ymm2 | |||
#load | |||
vmovdqa (%rsi),%ymm6 | |||
vmovdqa 32(%rsi),%ymm7 | |||
vmovdqa 64(%rsi),%ymm5 | |||
vmovdqa 96(%rsi),%ymm10 | |||
#reorder | |||
shuffle8 6,5,8,5 | |||
shuffle8 7,10,6,10 | |||
shuffle4 8,6,4,6 | |||
shuffle4 5,10,8,10 | |||
vpsrlq $32,%ymm4,%ymm5 | |||
vpsrlq $32,%ymm6,%ymm7 | |||
vpsrlq $32,%ymm8,%ymm9 | |||
vpsrlq $32,%ymm10,%ymm11 | |||
level0: | |||
vpmovzxdq (%rdx),%ymm3 | |||
vpmovzxdq 16(%rdx),%ymm15 | |||
vpaddd %ymm2,%ymm4,%ymm12 | |||
vpaddd %ymm2,%ymm6,%ymm13 | |||
vpaddd %ymm2,%ymm8,%ymm14 | |||
vpsubd %ymm5,%ymm12,%ymm12 | |||
vpsubd %ymm7,%ymm13,%ymm13 | |||
vpsubd %ymm9,%ymm14,%ymm14 | |||
vpmuludq %ymm3,%ymm12,%ymm12 | |||
vpmuludq %ymm15,%ymm13,%ymm13 | |||
vpaddd %ymm2,%ymm10,%ymm15 | |||
vpsubd %ymm11,%ymm15,%ymm15 | |||
vpaddd %ymm4,%ymm5,%ymm4 | |||
vpaddd %ymm6,%ymm7,%ymm6 | |||
vpmovzxdq 32(%rdx),%ymm5 | |||
vpmovzxdq 48(%rdx),%ymm7 | |||
vpmuludq %ymm5,%ymm14,%ymm14 | |||
vpmuludq %ymm7,%ymm15,%ymm15 | |||
vpaddd %ymm8,%ymm9,%ymm8 | |||
vpaddd %ymm10,%ymm11,%ymm10 | |||
vpmuludq %ymm0,%ymm12,%ymm5 | |||
vpmuludq %ymm0,%ymm13,%ymm7 | |||
vpmuludq %ymm0,%ymm14,%ymm9 | |||
vpmuludq %ymm0,%ymm15,%ymm11 | |||
vpmuludq %ymm1,%ymm5,%ymm5 | |||
vpmuludq %ymm1,%ymm7,%ymm7 | |||
vpmuludq %ymm1,%ymm9,%ymm9 | |||
vpmuludq %ymm1,%ymm11,%ymm11 | |||
vpaddq %ymm12,%ymm5,%ymm5 | |||
vpaddq %ymm13,%ymm7,%ymm7 | |||
vpaddq %ymm14,%ymm9,%ymm9 | |||
vpaddq %ymm15,%ymm11,%ymm11 | |||
vpsrlq $32,%ymm5,%ymm5 | |||
vpsrlq $32,%ymm7,%ymm7 | |||
vpsrlq $32,%ymm9,%ymm9 | |||
vpsrlq $32,%ymm11,%ymm11 | |||
level1: | |||
#PQCLEAN_DILITHIUM2_AVX2_zetas | |||
vpmovzxdq 64(%rdx),%ymm15 | |||
vpmovzxdq 80(%rdx),%ymm3 | |||
butterfly 4,5,8,9,6,7,10,11 | |||
level2: | |||
#PQCLEAN_DILITHIUM2_AVX2_zetas | |||
vpmovzxdq 96(%rdx),%ymm3 | |||
butterfly 4,5,6,7,8,9,10,11 3,3 | |||
#shuffle | |||
shuffle4 4,5,3,5 | |||
shuffle4 6,7,4,7 | |||
shuffle4 8,9,6,9 | |||
shuffle4 10,11,8,11 | |||
level3: | |||
#PQCLEAN_DILITHIUM2_AVX2_zetas | |||
vpbroadcastd 112(%rdx),%ymm14 | |||
vpbroadcastd 116(%rdx),%ymm15 | |||
vpblendd $0xF0,%ymm15,%ymm14,%ymm10 | |||
butterfly 3,4,6,8,5,7,9,11 10,10 | |||
#shuffle | |||
shuffle8 3,4,10,4 | |||
shuffle8 6,8,3,8 | |||
shuffle8 5,7,6,7 | |||
shuffle8 9,11,5,11 | |||
level4: | |||
#PQCLEAN_DILITHIUM2_AVX2_zetas | |||
vpbroadcastd 120(%rdx),%ymm9 | |||
butterfly 10,3,6,5,4,8,7,11 9,9 | |||
#store | |||
vmovdqa %ymm10,(%rdi) | |||
vmovdqa %ymm3,32(%rdi) | |||
vmovdqa %ymm6,64(%rdi) | |||
vmovdqa %ymm5,96(%rdi) | |||
vmovdqa %ymm4,128(%rdi) | |||
vmovdqa %ymm8,160(%rdi) | |||
vmovdqa %ymm7,192(%rdi) | |||
vmovdqa %ymm11,224(%rdi) | |||
ret | |||
.global PQCLEAN_DILITHIUM2_AVX2_invntt_levels5t7_avx | |||
PQCLEAN_DILITHIUM2_AVX2_invntt_levels5t7_avx: | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xqinv(%rip),%ymm0 | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xq(%rip),%ymm1 | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8x256q(%rip),%ymm2 | |||
#load | |||
vmovdqa (%rsi),%ymm4 | |||
vmovdqa 256(%rsi),%ymm5 | |||
vmovdqa 512(%rsi),%ymm6 | |||
vmovdqa 768(%rsi),%ymm7 | |||
vmovdqa 1024(%rsi),%ymm8 | |||
vmovdqa 1280(%rsi),%ymm9 | |||
vmovdqa 1536(%rsi),%ymm10 | |||
vmovdqa 1792(%rsi),%ymm11 | |||
level5: | |||
vpbroadcastd (%rdx),%ymm3 | |||
vpbroadcastd 4(%rdx),%ymm15 | |||
vpaddd %ymm2,%ymm4,%ymm12 | |||
vpaddd %ymm2,%ymm6,%ymm13 | |||
vpaddd %ymm2,%ymm8,%ymm14 | |||
vpsubd %ymm5,%ymm12,%ymm12 | |||
vpsubd %ymm7,%ymm13,%ymm13 | |||
vpsubd %ymm9,%ymm14,%ymm14 | |||
vpmuludq %ymm3,%ymm12,%ymm12 | |||
vpmuludq %ymm15,%ymm13,%ymm13 | |||
vpaddd %ymm2,%ymm10,%ymm15 | |||
vpsubd %ymm11,%ymm15,%ymm15 | |||
vpaddd %ymm4,%ymm5,%ymm4 | |||
vpaddd %ymm6,%ymm7,%ymm6 | |||
vpbroadcastd 8(%rdx),%ymm5 | |||
vpbroadcastd 12(%rdx),%ymm7 | |||
vpmuludq %ymm5,%ymm14,%ymm14 | |||
vpmuludq %ymm7,%ymm15,%ymm15 | |||
vpaddd %ymm8,%ymm9,%ymm8 | |||
vpaddd %ymm10,%ymm11,%ymm10 | |||
vpmuludq %ymm0,%ymm12,%ymm5 | |||
vpmuludq %ymm0,%ymm13,%ymm7 | |||
vpmuludq %ymm0,%ymm14,%ymm9 | |||
vpmuludq %ymm0,%ymm15,%ymm11 | |||
vpmuludq %ymm1,%ymm5,%ymm5 | |||
vpmuludq %ymm1,%ymm7,%ymm7 | |||
vpmuludq %ymm1,%ymm9,%ymm9 | |||
vpmuludq %ymm1,%ymm11,%ymm11 | |||
vpaddq %ymm12,%ymm5,%ymm5 | |||
vpaddq %ymm13,%ymm7,%ymm7 | |||
vpaddq %ymm14,%ymm9,%ymm9 | |||
vpaddq %ymm15,%ymm11,%ymm11 | |||
vpsrlq $32,%ymm5,%ymm5 | |||
vpsrlq $32,%ymm7,%ymm7 | |||
vpsrlq $32,%ymm9,%ymm9 | |||
vpsrlq $32,%ymm11,%ymm11 | |||
level6: | |||
#PQCLEAN_DILITHIUM2_AVX2_zetas | |||
vpbroadcastd 16(%rdx),%ymm15 | |||
vpbroadcastd 20(%rdx),%ymm3 | |||
butterfly 4,5,8,9,6,7,10,11 | |||
level7: | |||
#PQCLEAN_DILITHIUM2_AVX2_zetas | |||
vpbroadcastd 24(%rdx),%ymm3 | |||
butterfly 4,5,6,7,8,9,10,11 3,3 | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xdiv(%rip),%ymm3 | |||
vpmuludq %ymm3,%ymm4,%ymm4 | |||
vpmuludq %ymm3,%ymm5,%ymm5 | |||
vpmuludq %ymm3,%ymm6,%ymm6 | |||
vpmuludq %ymm3,%ymm7,%ymm7 | |||
vpmuludq %ymm0,%ymm4,%ymm12 | |||
vpmuludq %ymm0,%ymm5,%ymm13 | |||
vpmuludq %ymm0,%ymm6,%ymm14 | |||
vpmuludq %ymm0,%ymm7,%ymm15 | |||
vpmuludq %ymm1,%ymm12,%ymm12 | |||
vpmuludq %ymm1,%ymm13,%ymm13 | |||
vpmuludq %ymm1,%ymm14,%ymm14 | |||
vpmuludq %ymm1,%ymm15,%ymm15 | |||
vpaddq %ymm12,%ymm4,%ymm4 | |||
vpaddq %ymm13,%ymm5,%ymm5 | |||
vpaddq %ymm14,%ymm6,%ymm6 | |||
vpaddq %ymm15,%ymm7,%ymm7 | |||
vpsrlq $32,%ymm4,%ymm4 | |||
vpsrlq $32,%ymm5,%ymm5 | |||
vpsrlq $32,%ymm6,%ymm6 | |||
vpsrlq $32,%ymm7,%ymm7 | |||
#store | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_mask(%rip),%ymm3 | |||
vpermd %ymm4,%ymm3,%ymm4 | |||
vpermd %ymm5,%ymm3,%ymm5 | |||
vpermd %ymm6,%ymm3,%ymm6 | |||
vpermd %ymm7,%ymm3,%ymm7 | |||
vpermd %ymm8,%ymm3,%ymm8 | |||
vpermd %ymm9,%ymm3,%ymm9 | |||
vpermd %ymm10,%ymm3,%ymm10 | |||
vpermd %ymm11,%ymm3,%ymm11 | |||
vmovdqa %xmm4,(%rdi) | |||
vmovdqa %xmm5,128(%rdi) | |||
vmovdqa %xmm6,256(%rdi) | |||
vmovdqa %xmm7,384(%rdi) | |||
vmovdqa %xmm8,512(%rdi) | |||
vmovdqa %xmm9,640(%rdi) | |||
vmovdqa %xmm10,768(%rdi) | |||
vmovdqa %xmm11,896(%rdi) | |||
ret |
@@ -0,0 +1,26 @@ | |||
#ifndef NTT_H | |||
#define NTT_H | |||
#include <stdint.h> | |||
#include "nttconsts.h" | |||
#include "params.h" | |||
void PQCLEAN_DILITHIUM2_AVX2_ntt_levels0t2_avx(uint64_t *tmp, | |||
const uint32_t *a, | |||
const uint32_t *zetas); | |||
void PQCLEAN_DILITHIUM2_AVX2_ntt_levels3t8_avx(uint32_t *a, | |||
const uint64_t *tmp, | |||
const uint32_t *zetas); | |||
void PQCLEAN_DILITHIUM2_AVX2_invntt_levels0t4_avx(uint64_t *tmp, | |||
const uint32_t *a, | |||
const uint32_t *zetas_inv); | |||
void PQCLEAN_DILITHIUM2_AVX2_invntt_levels5t7_avx(uint32_t *a, | |||
const uint64_t *tmp, | |||
const uint32_t *zetas_inv); | |||
void PQCLEAN_DILITHIUM2_AVX2_pointwise_avx(uint32_t *c, const uint32_t *a, const uint32_t *b); | |||
void PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx(uint32_t *c, const uint32_t *a, const uint32_t *b); | |||
#endif |
@@ -0,0 +1,178 @@ | |||
.include "shuffle.inc" | |||
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 z0=3,z1=3,z2=3,z3=3 | |||
#mul | |||
vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0 | |||
vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1 | |||
vpmuludq %ymm\z2,%ymm\rh2,%ymm\rh2 | |||
vpmuludq %ymm\z3,%ymm\rh3,%ymm\rh3 | |||
#reduce | |||
vpmuludq %ymm0,%ymm\rh0,%ymm12 | |||
vpmuludq %ymm0,%ymm\rh1,%ymm13 | |||
vpmuludq %ymm0,%ymm\rh2,%ymm14 | |||
vpmuludq %ymm0,%ymm\rh3,%ymm15 | |||
vpmuludq %ymm1,%ymm12,%ymm12 | |||
vpmuludq %ymm1,%ymm13,%ymm13 | |||
vpmuludq %ymm1,%ymm14,%ymm14 | |||
vpmuludq %ymm1,%ymm15,%ymm15 | |||
vpaddq %ymm\rh0,%ymm12,%ymm12 | |||
vpaddq %ymm\rh1,%ymm13,%ymm13 | |||
vpaddq %ymm\rh2,%ymm14,%ymm14 | |||
vpaddq %ymm\rh3,%ymm15,%ymm15 | |||
vpsrlq $32,%ymm12,%ymm12 | |||
vpsrlq $32,%ymm13,%ymm13 | |||
vpsrlq $32,%ymm14,%ymm14 | |||
vpsrlq $32,%ymm15,%ymm15 | |||
#update | |||
vpaddd %ymm2,%ymm\rl0,%ymm\rh0 | |||
vpaddd %ymm2,%ymm\rl1,%ymm\rh1 | |||
vpaddd %ymm2,%ymm\rl2,%ymm\rh2 | |||
vpaddd %ymm2,%ymm\rl3,%ymm\rh3 | |||
vpaddd %ymm12,%ymm\rl0,%ymm\rl0 | |||
vpaddd %ymm13,%ymm\rl1,%ymm\rl1 | |||
vpaddd %ymm14,%ymm\rl2,%ymm\rl2 | |||
vpaddd %ymm15,%ymm\rl3,%ymm\rl3 | |||
vpsubd %ymm12,%ymm\rh0,%ymm\rh0 | |||
vpsubd %ymm13,%ymm\rh1,%ymm\rh1 | |||
vpsubd %ymm14,%ymm\rh2,%ymm\rh2 | |||
vpsubd %ymm15,%ymm\rh3,%ymm\rh3 | |||
.endm | |||
.global PQCLEAN_DILITHIUM2_AVX2_ntt_levels0t2_avx | |||
PQCLEAN_DILITHIUM2_AVX2_ntt_levels0t2_avx: | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xqinv(%rip),%ymm0 | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xq(%rip),%ymm1 | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8x2q(%rip),%ymm2 | |||
level0: | |||
#PQCLEAN_DILITHIUM2_AVX2_zetas | |||
vpbroadcastd (%rdx),%ymm3 | |||
#load | |||
vpmovzxdq (%rsi),%ymm4 | |||
vpmovzxdq 128(%rsi),%ymm5 | |||
vpmovzxdq 256(%rsi),%ymm6 | |||
vpmovzxdq 384(%rsi),%ymm7 | |||
vpmovzxdq 512(%rsi),%ymm8 | |||
vpmovzxdq 640(%rsi),%ymm9 | |||
vpmovzxdq 768(%rsi),%ymm10 | |||
vpmovzxdq 896(%rsi),%ymm11 | |||
butterfly 4,5,6,7,8,9,10,11 | |||
level1: | |||
#PQCLEAN_DILITHIUM2_AVX2_zetas | |||
vpbroadcastd 4(%rdx),%ymm12 | |||
vpbroadcastd 8(%rdx),%ymm13 | |||
butterfly 4,5,8,9,6,7,10,11 12,12,13,13 | |||
level2: | |||
#PQCLEAN_DILITHIUM2_AVX2_zetas | |||
vpbroadcastd 12(%rdx),%ymm12 | |||
vpbroadcastd 16(%rdx),%ymm13 | |||
vpbroadcastd 20(%rdx),%ymm14 | |||
vpbroadcastd 24(%rdx),%ymm15 | |||
butterfly 4,6,8,10,5,7,9,11 12,13,14,15 | |||
#store | |||
vmovdqa %ymm4,(%rdi) | |||
vmovdqa %ymm5,256(%rdi) | |||
vmovdqa %ymm6,512(%rdi) | |||
vmovdqa %ymm7,768(%rdi) | |||
vmovdqa %ymm8,1024(%rdi) | |||
vmovdqa %ymm9,1280(%rdi) | |||
vmovdqa %ymm10,1536(%rdi) | |||
vmovdqa %ymm11,1792(%rdi) | |||
ret | |||
.global PQCLEAN_DILITHIUM2_AVX2_ntt_levels3t8_avx | |||
PQCLEAN_DILITHIUM2_AVX2_ntt_levels3t8_avx: | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xqinv(%rip),%ymm0 | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xq(%rip),%ymm1 | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8x2q(%rip),%ymm2 | |||
#load | |||
vmovdqa (%rsi),%ymm4 | |||
vmovdqa 32(%rsi),%ymm5 | |||
vmovdqa 64(%rsi),%ymm6 | |||
vmovdqa 96(%rsi),%ymm7 | |||
vmovdqa 128(%rsi),%ymm8 | |||
vmovdqa 160(%rsi),%ymm9 | |||
vmovdqa 192(%rsi),%ymm10 | |||
vmovdqa 224(%rsi),%ymm11 | |||
level3: | |||
#PQCLEAN_DILITHIUM2_AVX2_zetas | |||
vpbroadcastd (%rdx),%ymm3 | |||
butterfly 4,5,6,7,8,9,10,11 | |||
level4: | |||
#PQCLEAN_DILITHIUM2_AVX2_zetas | |||
vpbroadcastd 4(%rdx),%ymm12 | |||
vpbroadcastd 8(%rdx),%ymm13 | |||
vpblendd $0xF0,%ymm13,%ymm12,%ymm12 | |||
shuffle8 4,8,3,8 | |||
shuffle8 5,9,4,9 | |||
shuffle8 6,10,5,10 | |||
shuffle8 7,11,6,11 | |||
butterfly 3,8,4,9,5,10,6,11 12,12,12,12 | |||
level5: | |||
#PQCLEAN_DILITHIUM2_AVX2_zetas | |||
vpmovzxdq 12(%rdx),%ymm12 | |||
shuffle4 3,5,7,5 | |||
shuffle4 8,10,3,10 | |||
shuffle4 4,6,8,6 | |||
shuffle4 9,11,4,11 | |||
butterfly 7,5,3,10,8,6,4,11 12,12,12,12 | |||
level6: | |||
#PQCLEAN_DILITHIUM2_AVX2_zetas | |||
vpmovzxdq 28(%rdx),%ymm12 | |||
vpmovzxdq 44(%rdx),%ymm13 | |||
butterfly 7,5,8,6,3,10,4,11 12,12,13,13 | |||
level7: | |||
#PQCLEAN_DILITHIUM2_AVX2_zetas | |||
vpmovzxdq 60(%rdx),%ymm12 | |||
vpmovzxdq 76(%rdx),%ymm13 | |||
vpmovzxdq 92(%rdx),%ymm14 | |||
vpmovzxdq 108(%rdx),%ymm15 | |||
butterfly 7,3,8,4,5,10,6,11 12,13,14,15 | |||
#store | |||
vpsllq $32,%ymm5,%ymm5 | |||
vpsllq $32,%ymm10,%ymm10 | |||
vpsllq $32,%ymm6,%ymm6 | |||
vpsllq $32,%ymm11,%ymm11 | |||
vpblendd $0xAA,%ymm5,%ymm7,%ymm7 | |||
vpblendd $0xAA,%ymm10,%ymm3,%ymm3 | |||
vpblendd $0xAA,%ymm6,%ymm8,%ymm8 | |||
vpblendd $0xAA,%ymm11,%ymm4,%ymm4 | |||
shuffle4 7,3,5,3 | |||
shuffle4 8,4,7,4 | |||
shuffle8 5,7,6,7 | |||
shuffle8 3,4,5,4 | |||
vmovdqa %ymm6,(%rdi) | |||
vmovdqa %ymm5,32(%rdi) | |||
vmovdqa %ymm7,64(%rdi) | |||
vmovdqa %ymm4,96(%rdi) | |||
ret |
@@ -0,0 +1,80 @@ | |||
#include "nttconsts.h" | |||
#define QINV 4236238847 // -q^(-1) mod 2^32 | |||
#define MONT 4193792ULL | |||
#define DIV (((MONT*MONT % Q) * (Q-1) % Q) * ((Q-1) >> 8) % Q) | |||
const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8xqinv = {.as_arr = {QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV}}; | |||
const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8xq = {.as_arr = {Q, Q, Q, Q, Q, Q, Q, Q}}; | |||
const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8x2q = {.as_arr = {2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q}}; | |||
const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8x256q = {.as_arr = {256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, | |||
256 * Q | |||
} | |||
}; | |||
const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_mask = {.as_arr = {0, 2, 4, 6, 0, 0, 0, 0}}; | |||
const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8x23ones = {.as_arr = {0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, | |||
0x7FFFFF, 0x7FFFFF | |||
} | |||
}; | |||
const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8xdiv = { .as_arr = {DIV, DIV, DIV, DIV, DIV, DIV, DIV, DIV}}; | |||
#undef QINV | |||
#undef MONT | |||
#undef DIV | |||
const aligned_uint32xN_t PQCLEAN_DILITHIUM2_AVX2_zetas = { | |||
.as_arr = { | |||
0, 25847, 5771523, 7861508, 237124, 7602457, 7504169, 466468, 1826347, 2725464, 1024112, 2706023, 95776, | |||
3077325, 3530437, 4450022, 4702672, 6927966, 2176455, 6851714, 5339162, 3475950, 6795196, 2091667, | |||
5037939, 266997, 4860065, 3407706, 2244091, 2434439, 4621053, 2316500, 5933984, 7144689, 7183191, | |||
3817976, 4817955, 3513181, 5187039, 2353451, 7300517, 3585928, 6718724, 4788269, 5842901, 3915439, | |||
7122806, 4296819, 5190273, 4747489, 1939314, 7380215, 5223087, 126922, 900702, 495491, 7725090, 4823422, | |||
1859098, 6767243, 5257975, 7855319, 909542, 8337157, 2031748, 7611795, 819034, 7857917, 3207046, 4784579, | |||
8021166, 7830929, 7260833, 4519302, 5336701, 3574422, 5512770, 3412210, 2147896, 5412772, 7969390, | |||
7396998, 2715295, 4686924, 5903370, 342297, 3437287, 2842341, 4055324, 286988, 5038140, 2691481, 1247620, | |||
5942594, 1735879, 5790267, 2486353, 4108315, 203044, 1265009, 1595974, 6288512, 2619752, 6271868, | |||
3539968, 8079950, 2348700, 7841118, 7709315, 8357436, 7998430, 1852771, 7151892, 7072248, 1349076, | |||
6949987, 4613401, 5386378, 7047359, 7929317, 1250494, 1869119, 1237275, 1312455, 2635921, 1903435, | |||
5062207, 3306115, 4832145, 7329447, 6950192, 6417775, 3119733, 6262231, 4520680, 6681150, 6736599, | |||
3505694, 4558682, 5037034, 508951, 44288, 904516, 264944, 3097992, 7280319, 3958618, 7100756, 1500165, | |||
7838005, 5796124, 1917081, 777191, 5548557, 4656147, 5834105, 2235880, 6709241, 594136, 7005614, 3406031, | |||
6533464, 4603424, 5495562, 6980856, 5102745, 3507263, 6239768, 6779997, 3699596, 4656075, 1653064, | |||
2389356, 759969, 8371839, 5130689, 8169440, 7063561, 6366809, 1957272, 5196991, 810149, 2432395, 3369112, | |||
162844, 1652634, 2454455, 185531, 1616392, 4686184, 8215696, 7173032, 3014001, 6581310, 3111497, 1757237, | |||
8360995, 811944, 531354, 954230, 3881043, 189548, 3159746, 5971092, 1315589, 4827145, 6529015, 8202977, | |||
1341330, 5341501, 2213111, 7953734, 6712985, 3523897, 7404533, 1723600, 7276084, 3866901, 1717735, | |||
6577327, 8119771, 269760, 472078, 1910376, 4546524, 2680103, 4010497, 280005, 3900724, 5823537, 2071892, | |||
5582638, 1285669, 7567685, 5361315, 4751448, 6795489, 6940675, 4499357, 3839961, 5441381, 183443, | |||
7826001, 3937738, 6144432, 7403526, 3919660, 1400424, 7959518, 1612842, 8332111, 7534263, 6094090, | |||
4834730, 7018208, 1976782 | |||
} | |||
}; | |||
const aligned_uint32xN_t PQCLEAN_DILITHIUM2_AVX2_zetas_inv = { | |||
.as_arr = { | |||
6403635, 1362209, 3545687, 2286327, 846154, 48306, 6767575, 420899, 6979993, 4460757, 976891, 2235985, | |||
4442679, 554416, 8196974, 2939036, 4540456, 3881060, 1439742, 1584928, 3628969, 3019102, 812732, 7094748, | |||
2797779, 6308525, 2556880, 4479693, 8100412, 4369920, 5700314, 3833893, 6470041, 7908339, 8110657, 260646, | |||
1803090, 6662682, 4513516, 1104333, 6656817, 975884, 4856520, 1667432, 426683, 6167306, 3038916, 7039087, | |||
177440, 1851402, 3553272, 7064828, 2409325, 5220671, 8190869, 4499374, 7426187, 7849063, 7568473, 19422, | |||
6623180, 5268920, 1799107, 5366416, 1207385, 164721, 3694233, 6764025, 8194886, 5925962, 6727783, 8217573, | |||
5011305, 5948022, 7570268, 3183426, 6423145, 2013608, 1316856, 210977, 3249728, 8578, 7620448, 5991061, | |||
6727353, 3724342, 4680821, 1600420, 2140649, 4873154, 3277672, 1399561, 2884855, 3776993, 1846953, 4974386, | |||
1374803, 7786281, 1671176, 6144537, 2546312, 3724270, 2831860, 7603226, 6463336, 2584293, 542412, 6880252, | |||
1279661, 4421799, 1100098, 5282425, 8115473, 7475901, 8336129, 7871466, 3343383, 3821735, 4874723, 1643818, | |||
1699267, 3859737, 2118186, 5260684, 1962642, 1430225, 1050970, 3548272, 5074302, 3318210, 6476982, 5744496, | |||
7067962, 7143142, 6511298, 7129923, 451100, 1333058, 2994039, 3767016, 1430430, 7031341, 1308169, 1228525, | |||
6527646, 381987, 22981, 671102, 539299, 6031717, 300467, 4840449, 2108549, 5760665, 2091905, 6784443, | |||
7115408, 8177373, 4272102, 5894064, 2590150, 6644538, 2437823, 7132797, 5688936, 3342277, 8093429, 4325093, | |||
5538076, 4943130, 8038120, 2477047, 3693493, 5665122, 983419, 411027, 2967645, 6232521, 4968207, 2867647, | |||
4805995, 3043716, 3861115, 1119584, 549488, 359251, 3595838, 5173371, 522500, 7561383, 768622, 6348669, | |||
43260, 7470875, 525098, 3122442, 1613174, 6521319, 3556995, 655327, 7884926, 7479715, 8253495, 3157330, | |||
1000202, 6441103, 3632928, 3190144, 4083598, 1257611, 4464978, 2537516, 3592148, 1661693, 4794489, 1079900, | |||
6026966, 3193378, 4867236, 3562462, 4562441, 1197226, 1235728, 2446433, 6063917, 3759364, 5945978, 6136326, | |||
4972711, 3520352, 8113420, 3342478, 6288750, 1585221, 4904467, 3041255, 1528703, 6203962, 1452451, 3677745, | |||
3930395, 4849980, 5303092, 8284641, 5674394, 7356305, 5654953, 6554070, 7913949, 876248, 777960, 8143293, | |||
518909, 2608894, 3975713 | |||
} | |||
}; |
@@ -0,0 +1,27 @@ | |||
#ifndef PQCLEAN_DILITHIUM2_AVX2_NTTCONSTS_H | |||
#define PQCLEAN_DILITHIUM2_AVX2_NTTCONSTS_H | |||
#include <immintrin.h> | |||
#include <stdint.h> | |||
#include "alignment.h" | |||
#include "params.h" | |||
typedef ALIGNED_UINT32(8) aligned_uint32x8_t; | |||
typedef ALIGNED_UINT32(N) aligned_uint32xN_t; | |||
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8xqinv; | |||
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8xq; | |||
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8x2q; | |||
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8x256q; | |||
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_mask; | |||
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8x23ones; | |||
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM2_AVX2_8xdiv; | |||
extern const aligned_uint32xN_t PQCLEAN_DILITHIUM2_AVX2_zetas; | |||
extern const aligned_uint32xN_t PQCLEAN_DILITHIUM2_AVX2_zetas_inv; | |||
#endif //PQCLEAN_DILITHIUM2_AVX2_NTTCONSTS_H | |||
@@ -0,0 +1,305 @@ | |||
#include "packing.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "polyvec.h" | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_pack_pk | |||
* | |||
* Description: Bit-pack public key pk = (rho, t1). | |||
* | |||
* Arguments: - uint8_t pk[]: output byte array | |||
* - const uint8_t rho[]: byte array containing rho | |||
* - const polyveck *t1: pointer to vector t1 | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_pack_pk( | |||
uint8_t *pk, | |||
const uint8_t *rho, | |||
const polyveck *t1) { | |||
unsigned int i; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
pk[i] = rho[i]; | |||
} | |||
pk += SEEDBYTES; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_polyt1_pack(pk + i * POLT1_SIZE_PACKED, &t1->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_unpack_pk | |||
* | |||
* Description: Unpack public key pk = (rho, t1). | |||
* | |||
* Arguments: - const uint8_t rho[]: output byte array for rho | |||
* - const polyveck *t1: pointer to output vector t1 | |||
* - uint8_t pk[]: byte array containing bit-packed pk | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_unpack_pk( | |||
uint8_t *rho, | |||
polyveck *t1, | |||
const uint8_t *pk) { | |||
unsigned int i; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
rho[i] = pk[i]; | |||
} | |||
pk += SEEDBYTES; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_polyt1_unpack(&t1->vec[i], pk + i * POLT1_SIZE_PACKED); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_pack_sk | |||
* | |||
* Description: Bit-pack secret key sk = (rho, key, tr, s1, s2, t0). | |||
* | |||
* Arguments: - uint8_t sk[]: output byte array | |||
* - const uint8_t rho[]: byte array containing rho | |||
* - const uint8_t key[]: byte array containing key | |||
* - const uint8_t tr[]: byte array containing tr | |||
* - const polyvecl *s1: pointer to vector s1 | |||
* - const polyveck *s2: pointer to vector s2 | |||
* - const polyveck *t0: pointer to vector t0 | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_pack_sk( | |||
uint8_t *sk, | |||
const uint8_t *rho, | |||
const uint8_t *key, | |||
const uint8_t *tr, | |||
const polyvecl *s1, | |||
const polyveck *s2, | |||
const polyveck *t0) { | |||
unsigned int i; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
sk[i] = rho[i]; | |||
} | |||
sk += SEEDBYTES; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
sk[i] = key[i]; | |||
} | |||
sk += SEEDBYTES; | |||
for (i = 0; i < CRHBYTES; ++i) { | |||
sk[i] = tr[i]; | |||
} | |||
sk += CRHBYTES; | |||
for (i = 0; i < L; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_polyeta_pack(sk + i * POLETA_SIZE_PACKED, &s1->vec[i]); | |||
} | |||
sk += L * POLETA_SIZE_PACKED; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_polyeta_pack(sk + i * POLETA_SIZE_PACKED, &s2->vec[i]); | |||
} | |||
sk += K * POLETA_SIZE_PACKED; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_polyt0_pack(sk + i * POLT0_SIZE_PACKED, &t0->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_unpack_sk | |||
* | |||
* Description: Unpack secret key sk = (rho, key, tr, s1, s2, t0). | |||
* | |||
* Arguments: - const uint8_t rho[]: output byte array for rho | |||
* - const uint8_t key[]: output byte array for key | |||
* - const uint8_t tr[]: output byte array for tr | |||
* - const polyvecl *s1: pointer to output vector s1 | |||
* - const polyveck *s2: pointer to output vector s2 | |||
* - const polyveck *r0: pointer to output vector t0 | |||
* - uint8_t sk[]: byte array containing bit-packed sk | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_unpack_sk( | |||
uint8_t *rho, | |||
uint8_t *key, | |||
uint8_t *tr, | |||
polyvecl *s1, | |||
polyveck *s2, | |||
polyveck *t0, | |||
const uint8_t *sk) { | |||
unsigned int i; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
rho[i] = sk[i]; | |||
} | |||
sk += SEEDBYTES; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
key[i] = sk[i]; | |||
} | |||
sk += SEEDBYTES; | |||
for (i = 0; i < CRHBYTES; ++i) { | |||
tr[i] = sk[i]; | |||
} | |||
sk += CRHBYTES; | |||
for (i = 0; i < L; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_polyeta_unpack(&s1->vec[i], sk + i * POLETA_SIZE_PACKED); | |||
} | |||
sk += L * POLETA_SIZE_PACKED; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_polyeta_unpack(&s2->vec[i], sk + i * POLETA_SIZE_PACKED); | |||
} | |||
sk += K * POLETA_SIZE_PACKED; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_polyt0_unpack(&t0->vec[i], sk + i * POLT0_SIZE_PACKED); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_pack_sig | |||
* | |||
* Description: Bit-pack signature sig = (z, h, c). | |||
* | |||
* Arguments: - uint8_t sig[]: output byte array | |||
* - const polyvecl *z: pointer to vector z | |||
* - const polyveck *h: pointer to hint vector h | |||
* - const poly *c: pointer to PQCLEAN_DILITHIUM2_AVX2_challenge polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_pack_sig( | |||
uint8_t *sig, | |||
const polyvecl *z, | |||
const polyveck *h, | |||
const poly *c) { | |||
unsigned int i, j, k; | |||
uint64_t signs, mask; | |||
for (i = 0; i < L; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_polyz_pack(sig + i * POLZ_SIZE_PACKED, &z->vec[i]); | |||
} | |||
sig += L * POLZ_SIZE_PACKED; | |||
/* Encode h */ | |||
k = 0; | |||
for (i = 0; i < K; ++i) { | |||
for (j = 0; j < N; ++j) { | |||
if (h->vec[i].coeffs[j] != 0) { | |||
sig[k++] = (uint8_t)j; | |||
} | |||
} | |||
sig[OMEGA + i] = (uint8_t)k; | |||
} | |||
while (k < OMEGA) { | |||
sig[k++] = 0; | |||
} | |||
sig += OMEGA + K; | |||
/* Encode c */ | |||
signs = 0; | |||
mask = 1; | |||
for (i = 0; i < N / 8; ++i) { | |||
sig[i] = 0; | |||
for (j = 0; j < 8; ++j) { | |||
if (c->coeffs[8 * i + j] != 0) { | |||
sig[i] |= (uint8_t)(1u << j); | |||
if (c->coeffs[8 * i + j] == (Q - 1)) { | |||
signs |= mask; | |||
} | |||
mask <<= 1; | |||
} | |||
} | |||
} | |||
sig += N / 8; | |||
for (i = 0; i < 8; ++i) { | |||
sig[i] = (uint8_t)(signs >> 8u * i); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_unpack_sig | |||
* | |||
* Description: Unpack signature sig = (z, h, c). | |||
* | |||
* Arguments: - polyvecl *z: pointer to output vector z | |||
* - polyveck *h: pointer to output hint vector h | |||
* - poly *c: pointer to output PQCLEAN_DILITHIUM2_AVX2_challenge polynomial | |||
* - const uint8_t sig[]: byte array containing | |||
* bit-packed signature | |||
* | |||
* Returns 1 in case of malformed signature; otherwise 0. | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_AVX2_unpack_sig( | |||
polyvecl *z, | |||
polyveck *h, | |||
poly *c, | |||
const uint8_t *sig) { | |||
unsigned int i, j, k; | |||
uint64_t signs; | |||
for (i = 0; i < L; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_polyz_unpack(&z->vec[i], sig + i * POLZ_SIZE_PACKED); | |||
} | |||
sig += L * POLZ_SIZE_PACKED; | |||
/* Decode h */ | |||
k = 0; | |||
for (i = 0; i < K; ++i) { | |||
for (j = 0; j < N; ++j) { | |||
h->vec[i].coeffs[j] = 0; | |||
} | |||
if (sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) { | |||
return 1; | |||
} | |||
for (j = k; j < sig[OMEGA + i]; ++j) { | |||
/* Coefficients are ordered for strong unforgeability */ | |||
if (j > k && sig[j] <= sig[j - 1]) { | |||
return 1; | |||
} | |||
h->vec[i].coeffs[sig[j]] = 1; | |||
} | |||
k = sig[OMEGA + i]; | |||
} | |||
/* Extra indices are zero for strong unforgeability */ | |||
for (j = k; j < OMEGA; ++j) { | |||
if (sig[j]) { | |||
return 1; | |||
} | |||
} | |||
sig += OMEGA + K; | |||
/* Decode c */ | |||
for (i = 0; i < N; ++i) { | |||
c->coeffs[i] = 0; | |||
} | |||
signs = 0; | |||
for (i = 0; i < 8; ++i) { | |||
signs |= (uint64_t)sig[N / 8 + i] << 8 * i; | |||
} | |||
/* Extra sign bits are zero for strong unforgeability */ | |||
if (signs >> 60) { | |||
return 1; | |||
} | |||
for (i = 0; i < N / 8; ++i) { | |||
for (j = 0; j < 8; ++j) { | |||
if ((sig[i] >> j) & 0x01) { | |||
c->coeffs[8 * i + j] = 1; | |||
c->coeffs[8 * i + j] ^= -((int32_t) signs & 1) & (1 ^ (Q - 1)); | |||
signs >>= 1; | |||
} | |||
} | |||
} | |||
return 0; | |||
} |
@@ -0,0 +1,36 @@ | |||
#ifndef PQCLEAN_DILITHIUM2_AVX2_PACKING_H | |||
#define PQCLEAN_DILITHIUM2_AVX2_PACKING_H | |||
#include "params.h" | |||
#include "polyvec.h" | |||
void PQCLEAN_DILITHIUM2_AVX2_pack_pk( | |||
uint8_t *pk, | |||
const uint8_t *rho, const polyveck *t1); | |||
void PQCLEAN_DILITHIUM2_AVX2_pack_sk( | |||
uint8_t *sk, | |||
const uint8_t *rho, | |||
const uint8_t *key, | |||
const uint8_t *tr, | |||
const polyvecl *s1, | |||
const polyveck *s2, | |||
const polyveck *t0); | |||
void PQCLEAN_DILITHIUM2_AVX2_pack_sig( | |||
uint8_t *sig, | |||
const polyvecl *z, const polyveck *h, const poly *c); | |||
void PQCLEAN_DILITHIUM2_AVX2_unpack_pk( | |||
uint8_t *rho, polyveck *t1, | |||
const uint8_t *pk); | |||
void PQCLEAN_DILITHIUM2_AVX2_unpack_sk( | |||
uint8_t *rho, | |||
uint8_t *key, | |||
uint8_t *tr, | |||
polyvecl *s1, | |||
polyveck *s2, | |||
polyveck *t0, | |||
const uint8_t *sk); | |||
int PQCLEAN_DILITHIUM2_AVX2_unpack_sig( | |||
polyvecl *z, polyveck *h, poly *c, const uint8_t *sig); | |||
#endif |
@@ -0,0 +1,33 @@ | |||
#ifndef PQCLEAN_DILITHIUM2_AVX2_PARAMS_H | |||
#define PQCLEAN_DILITHIUM2_AVX2_PARAMS_H | |||
#define SEEDBYTES 32 | |||
#define CRHBYTES 48 | |||
#define N 256 | |||
#define Q 8380417 | |||
#define QBITS 23 | |||
#define D 14 | |||
#define GAMMA1 ((Q - 1)/16) | |||
#define GAMMA2 (GAMMA1/2) | |||
#define ALPHA (2*GAMMA2) | |||
#define K 4 | |||
#define L 3 | |||
#define ETA 6 | |||
#define SETABITS 4 | |||
#define BETA 325 | |||
#define OMEGA 80 | |||
#define POLT1_SIZE_PACKED ((N*(QBITS - D))/8) | |||
#define POLT0_SIZE_PACKED ((N*D)/8) | |||
#define POLETA_SIZE_PACKED ((N*SETABITS)/8) | |||
#define POLZ_SIZE_PACKED ((N*(QBITS - 3))/8) | |||
#define POLW1_SIZE_PACKED ((N*4)/8) | |||
#define CRYPTO_PUBLICKEYBYTES (SEEDBYTES + K*POLT1_SIZE_PACKED) | |||
#define CRYPTO_SECRETKEYBYTES (2*SEEDBYTES + (L + K)*POLETA_SIZE_PACKED + CRHBYTES + K*POLT0_SIZE_PACKED) | |||
#define CRYPTO_BYTES (L*POLZ_SIZE_PACKED + (OMEGA + K) + (N/8 + 8)) | |||
#endif |
@@ -0,0 +1,189 @@ | |||
#include "params.h" | |||
.global PQCLEAN_DILITHIUM2_AVX2_pointwise_avx | |||
PQCLEAN_DILITHIUM2_AVX2_pointwise_avx: | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xqinv(%rip),%ymm0 | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xq(%rip),%ymm1 | |||
xor %eax,%eax | |||
_looptop1: | |||
#load | |||
vmovdqa (%rsi),%ymm2 | |||
vmovdqa 32(%rsi),%ymm4 | |||
vmovdqa 64(%rsi),%ymm6 | |||
vmovdqa (%rdx),%ymm10 | |||
vmovdqa 32(%rdx),%ymm12 | |||
vmovdqa 64(%rdx),%ymm14 | |||
vpsrlq $32,%ymm2,%ymm3 | |||
vpsrlq $32,%ymm4,%ymm5 | |||
vpsrlq $32,%ymm6,%ymm7 | |||
vpsrlq $32,%ymm10,%ymm11 | |||
vpsrlq $32,%ymm12,%ymm13 | |||
vpsrlq $32,%ymm14,%ymm15 | |||
#mul | |||
vpmuludq %ymm2,%ymm10,%ymm2 | |||
vpmuludq %ymm3,%ymm11,%ymm3 | |||
vpmuludq %ymm4,%ymm12,%ymm4 | |||
vpmuludq %ymm5,%ymm13,%ymm5 | |||
vpmuludq %ymm6,%ymm14,%ymm6 | |||
vpmuludq %ymm7,%ymm15,%ymm7 | |||
#reduce | |||
vpmuludq %ymm0,%ymm2,%ymm10 | |||
vpmuludq %ymm0,%ymm3,%ymm11 | |||
vpmuludq %ymm0,%ymm4,%ymm12 | |||
vpmuludq %ymm0,%ymm5,%ymm13 | |||
vpmuludq %ymm0,%ymm6,%ymm14 | |||
vpmuludq %ymm0,%ymm7,%ymm15 | |||
vpmuludq %ymm1,%ymm10,%ymm10 | |||
vpmuludq %ymm1,%ymm11,%ymm11 | |||
vpmuludq %ymm1,%ymm12,%ymm12 | |||
vpmuludq %ymm1,%ymm13,%ymm13 | |||
vpmuludq %ymm1,%ymm14,%ymm14 | |||
vpmuludq %ymm1,%ymm15,%ymm15 | |||
vpaddq %ymm2,%ymm10,%ymm2 | |||
vpaddq %ymm3,%ymm11,%ymm3 | |||
vpaddq %ymm4,%ymm12,%ymm4 | |||
vpaddq %ymm5,%ymm13,%ymm5 | |||
vpaddq %ymm6,%ymm14,%ymm6 | |||
vpaddq %ymm7,%ymm15,%ymm7 | |||
vpsrlq $32,%ymm2,%ymm2 | |||
vpsrlq $32,%ymm4,%ymm4 | |||
vpsrlq $32,%ymm6,%ymm6 | |||
#store | |||
vpblendd $0xAA,%ymm3,%ymm2,%ymm2 | |||
vpblendd $0xAA,%ymm5,%ymm4,%ymm4 | |||
vpblendd $0xAA,%ymm7,%ymm6,%ymm6 | |||
vmovdqa %ymm2,(%rdi) | |||
vmovdqa %ymm4,32(%rdi) | |||
vmovdqa %ymm6,64(%rdi) | |||
add $96,%rdi | |||
add $96,%rsi | |||
add $96,%rdx | |||
add $1,%eax | |||
cmp $10,%eax | |||
jb _looptop1 | |||
vmovdqa (%rsi),%ymm2 | |||
vmovdqa 32(%rsi),%ymm4 | |||
vmovdqa (%rdx),%ymm10 | |||
vmovdqa 32(%rdx),%ymm12 | |||
vpsrlq $32,%ymm2,%ymm3 | |||
vpsrlq $32,%ymm4,%ymm5 | |||
vpsrlq $32,%ymm10,%ymm11 | |||
vpsrlq $32,%ymm12,%ymm13 | |||
#mul | |||
vpmuludq %ymm2,%ymm10,%ymm2 | |||
vpmuludq %ymm3,%ymm11,%ymm3 | |||
vpmuludq %ymm4,%ymm12,%ymm4 | |||
vpmuludq %ymm5,%ymm13,%ymm5 | |||
#reduce | |||
vpmuludq %ymm0,%ymm2,%ymm10 | |||
vpmuludq %ymm0,%ymm3,%ymm11 | |||
vpmuludq %ymm0,%ymm4,%ymm12 | |||
vpmuludq %ymm0,%ymm5,%ymm13 | |||
vpmuludq %ymm1,%ymm10,%ymm10 | |||
vpmuludq %ymm1,%ymm11,%ymm11 | |||
vpmuludq %ymm1,%ymm12,%ymm12 | |||
vpmuludq %ymm1,%ymm13,%ymm13 | |||
vpaddq %ymm2,%ymm10,%ymm2 | |||
vpaddq %ymm3,%ymm11,%ymm3 | |||
vpaddq %ymm4,%ymm12,%ymm4 | |||
vpaddq %ymm5,%ymm13,%ymm5 | |||
vpsrlq $32,%ymm2,%ymm2 | |||
vpsrlq $32,%ymm4,%ymm4 | |||
#store | |||
vpblendd $0x55,%ymm2,%ymm3,%ymm2 | |||
vpblendd $0x55,%ymm4,%ymm5,%ymm4 | |||
vmovdqa %ymm2,(%rdi) | |||
vmovdqa %ymm4,32(%rdi) | |||
ret | |||
.macro pointwise off | |||
#load | |||
vmovdqa \off(%rsi),%ymm6 | |||
vmovdqa \off+32(%rsi),%ymm8 | |||
vmovdqa \off(%rdx),%ymm10 | |||
vmovdqa \off+32(%rdx),%ymm12 | |||
vpsrlq $32,%ymm6,%ymm7 | |||
vpsrlq $32,%ymm8,%ymm9 | |||
vpsrlq $32,%ymm10,%ymm11 | |||
vpsrlq $32,%ymm12,%ymm13 | |||
#mul | |||
vpmuludq %ymm6,%ymm10,%ymm6 | |||
vpmuludq %ymm7,%ymm11,%ymm7 | |||
vpmuludq %ymm8,%ymm12,%ymm8 | |||
vpmuludq %ymm9,%ymm13,%ymm9 | |||
.endm | |||
.macro acc | |||
vpaddq %ymm6,%ymm2,%ymm2 | |||
vpaddq %ymm7,%ymm3,%ymm3 | |||
vpaddq %ymm8,%ymm4,%ymm4 | |||
vpaddq %ymm9,%ymm5,%ymm5 | |||
.endm | |||
.global PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx | |||
PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx: | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xqinv(%rip),%ymm0 | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xq(%rip),%ymm1 | |||
xor %eax,%eax | |||
_looptop2: | |||
pointwise 0 | |||
#mov | |||
vmovdqa %ymm6,%ymm2 | |||
vmovdqa %ymm7,%ymm3 | |||
vmovdqa %ymm8,%ymm4 | |||
vmovdqa %ymm9,%ymm5 | |||
pointwise 1024 | |||
acc | |||
pointwise 2048 | |||
acc | |||
#reduce | |||
vpmuludq %ymm0,%ymm2,%ymm6 | |||
vpmuludq %ymm0,%ymm3,%ymm7 | |||
vpmuludq %ymm0,%ymm4,%ymm8 | |||
vpmuludq %ymm0,%ymm5,%ymm9 | |||
vpmuludq %ymm1,%ymm6,%ymm6 | |||
vpmuludq %ymm1,%ymm7,%ymm7 | |||
vpmuludq %ymm1,%ymm8,%ymm8 | |||
vpmuludq %ymm1,%ymm9,%ymm9 | |||
vpaddq %ymm2,%ymm6,%ymm2 | |||
vpaddq %ymm3,%ymm7,%ymm3 | |||
vpaddq %ymm4,%ymm8,%ymm4 | |||
vpaddq %ymm5,%ymm9,%ymm5 | |||
vpsrlq $32,%ymm2,%ymm2 | |||
vpsrlq $32,%ymm4,%ymm4 | |||
#store | |||
vpblendd $0xAA,%ymm3,%ymm2,%ymm2 | |||
vpblendd $0xAA,%ymm5,%ymm4,%ymm4 | |||
vmovdqa %ymm2,(%rdi) | |||
vmovdqa %ymm4,32(%rdi) | |||
add $64,%rsi | |||
add $64,%rdx | |||
add $64,%rdi | |||
add $1,%eax | |||
cmp $16,%eax | |||
jb _looptop2 | |||
ret |
@@ -0,0 +1,914 @@ | |||
#include <immintrin.h> | |||
#include <stdint.h> | |||
#include "fips202x4.h" | |||
#include "ntt.h" | |||
#include "nttconsts.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "reduce.h" | |||
#include "rejsample.h" | |||
#include "rounding.h" | |||
#include "symmetric.h" | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_poly_reduce | |||
* | |||
* Description: Reduce all coefficients of input polynomial to representative | |||
* in [0,2*Q[. | |||
* | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_reduce(poly *a) { | |||
PQCLEAN_DILITHIUM2_AVX2_reduce_avx(a->coeffs); | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_poly_csubq | |||
* | |||
* Description: For all coefficients of input polynomial subtract Q if | |||
* coefficient is bigger than Q. | |||
* | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_csubq(poly *a) { | |||
PQCLEAN_DILITHIUM2_AVX2_csubq_avx(a->coeffs); | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_poly_freeze | |||
* | |||
* Description: Reduce all coefficients of the polynomial to standard | |||
* representatives. | |||
* | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_freeze(poly *a) { | |||
PQCLEAN_DILITHIUM2_AVX2_reduce_avx(a->coeffs); | |||
PQCLEAN_DILITHIUM2_AVX2_csubq_avx(a->coeffs); | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_poly_add | |||
* | |||
* Description: Add polynomials. No modular reduction is performed. | |||
* | |||
* Arguments: - poly *c: pointer to output polynomial | |||
* - const poly *a: pointer to first summand | |||
* - const poly *b: pointer to second summand | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_add(poly *c, const poly *a, const poly *b) { | |||
unsigned int i; | |||
__m256i vec0, vec1; | |||
for (i = 0; i < N / 8; i++) { | |||
vec0 = _mm256_load_si256(&a->coeffs_x8[i]); | |||
vec1 = _mm256_load_si256(&b->coeffs_x8[i]); | |||
vec0 = _mm256_add_epi32(vec0, vec1); | |||
_mm256_store_si256(&c->coeffs_x8[i], vec0); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_poly_sub | |||
* | |||
* Description: Subtract polynomials. Assumes coefficients of second input | |||
* polynomial to be less than 2*Q. No modular reduction is | |||
* performed. | |||
* | |||
* Arguments: - poly *c: pointer to output polynomial | |||
* - const poly *a: pointer to first input polynomial | |||
* - const poly *b: pointer to second input polynomial to be | |||
* subtraced from first input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_sub(poly *c, const poly *a, const poly *b) { | |||
unsigned int i; | |||
__m256i vec0, vec1; | |||
const __m256i twoq = _mm256_load_si256(_PQCLEAN_DILITHIUM2_AVX2_8x2q.as_vec); | |||
for (i = 0; i < N / 8; i++) { | |||
vec0 = _mm256_load_si256(&a->coeffs_x8[i]); | |||
vec1 = _mm256_load_si256(&b->coeffs_x8[i]); | |||
vec0 = _mm256_add_epi32(vec0, twoq); | |||
vec0 = _mm256_sub_epi32(vec0, vec1); | |||
_mm256_store_si256(&c->coeffs_x8[i], vec0); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_poly_shiftl | |||
* | |||
* Description: Multiply polynomial by 2^D without modular reduction. Assumes | |||
* input coefficients to be less than 2^{32-D}. | |||
* | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_shiftl(poly *a) { | |||
unsigned int i; | |||
__m256i vec; | |||
for (i = 0; i < N / 8; i++) { | |||
vec = _mm256_load_si256(&a->coeffs_x8[i]); | |||
vec = _mm256_slli_epi32(vec, D); | |||
_mm256_store_si256(&a->coeffs_x8[i], vec); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_poly_ntt | |||
* | |||
* Description: Forward NTT. Output coefficients can be up to 16*Q larger than | |||
* input coefficients. | |||
* | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_ntt(poly *a) { | |||
unsigned int i; | |||
ALIGNED_UINT64(N) tmp; | |||
for (i = 0; i < N / 32; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_ntt_levels0t2_avx(tmp.as_arr + 4 * i, a->coeffs + 4 * i, PQCLEAN_DILITHIUM2_AVX2_zetas.as_arr + 1); | |||
} | |||
for (i = 0; i < N / 32; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_ntt_levels3t8_avx(a->coeffs + 32 * i, tmp.as_arr + 32 * i, PQCLEAN_DILITHIUM2_AVX2_zetas.as_arr + 8 + 31 * i); | |||
} | |||
} | |||
/************************************************* | |||
* Name: poly_invntt_montgomery | |||
* | |||
* Description: Inverse NTT and multiplication with 2^{32}. Input coefficients | |||
* need to be less than 2*Q. Output coefficients are less than 2*Q. | |||
* | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_invntt_montgomery(poly *a) { | |||
unsigned int i; | |||
ALIGNED_UINT64(N) tmp; | |||
for (i = 0; i < N / 32; i++) { | |||
PQCLEAN_DILITHIUM2_AVX2_invntt_levels0t4_avx(tmp.as_arr + 32 * i, a->coeffs + 32 * i, PQCLEAN_DILITHIUM2_AVX2_zetas_inv.as_arr + 31 * i); | |||
} | |||
for (i = 0; i < N / 32; i++) { | |||
PQCLEAN_DILITHIUM2_AVX2_invntt_levels5t7_avx(a->coeffs + 4 * i, tmp.as_arr + 4 * i, PQCLEAN_DILITHIUM2_AVX2_zetas_inv.as_arr + 248); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_poly_pointwise_invmontgomery | |||
* | |||
* Description: Pointwise multiplication of polynomials in NTT domain | |||
* representation and multiplication of resulting polynomial | |||
* with 2^{-32}. Output coefficients are less than 2*Q if input | |||
* coefficient are less than 22*Q. | |||
* | |||
* Arguments: - poly *c: pointer to output polynomial | |||
* - const poly *a: pointer to first input polynomial | |||
* - const poly *b: pointer to second input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_pointwise_invmontgomery(poly *c, const poly *a, const poly *b) { | |||
PQCLEAN_DILITHIUM2_AVX2_pointwise_avx(c->coeffs, a->coeffs, b->coeffs); | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_poly_power2round | |||
* | |||
* Description: For all coefficients c of the input polynomial, | |||
* compute c0, c1 such that c mod Q = c1*2^D + c0 | |||
* with -2^{D-1} < c0 <= 2^{D-1}. Assumes coefficients to be | |||
* standard representatives. | |||
* | |||
* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 | |||
* - poly *a0: pointer to output polynomial with coefficients Q + a0 | |||
* - const poly *v: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_power2round(poly *restrict a1, | |||
poly *restrict a0, | |||
const poly *restrict a) { | |||
for (size_t i = 0; i < N; ++i) { | |||
a1->coeffs[i] = PQCLEAN_DILITHIUM2_AVX2_power2round(a->coeffs[i], &a0->coeffs[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_poly_decompose | |||
* | |||
* Description: For all coefficients c of the input polynomial, | |||
* compute high and low bits c0, c1 such c mod Q = c1*ALPHA + c0 | |||
* with -ALPHA/2 < c0 <= ALPHA/2 except c1 = (Q-1)/ALPHA where we | |||
* set c1 = 0 and -ALPHA/2 <= c0 = c mod Q - Q < 0. | |||
* Assumes coefficients to be standard representatives. | |||
* | |||
* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 | |||
* - poly *a0: pointer to output polynomial with coefficients Q + a0 | |||
* - const poly *c: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_decompose( | |||
poly *restrict a1, | |||
poly *restrict a0, | |||
const poly *restrict a) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
a1->coeffs[i] = PQCLEAN_DILITHIUM2_AVX2_decompose(a->coeffs[i], &a0->coeffs[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_poly_make_hint | |||
* | |||
* Description: Compute hint polynomial. The coefficients of which indicate | |||
* whether the low bits of the corresponding coefficient of | |||
* the input polynomial overflow into the high bits. | |||
* | |||
* Arguments: - poly *h: pointer to output hint polynomial | |||
* - const poly *a0: pointer to low part of input polynomial | |||
* - const poly *a1: pointer to high part of input polynomial | |||
* | |||
* Returns number of 1 bits. | |||
**************************************************/ | |||
unsigned int PQCLEAN_DILITHIUM2_AVX2_poly_make_hint( | |||
poly *restrict h, | |||
const poly *restrict a0, | |||
const poly *restrict a1) { | |||
unsigned int i, s = 0; | |||
for (i = 0; i < N; ++i) { | |||
h->coeffs[i] = PQCLEAN_DILITHIUM2_AVX2_make_hint(a0->coeffs[i], a1->coeffs[i]); | |||
s += h->coeffs[i]; | |||
} | |||
return s; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_poly_use_hint | |||
* | |||
* Description: Use hint polynomial to correct the high bits of a polynomial. | |||
* | |||
* Arguments: - poly *a: pointer to output polynomial with corrected high bits | |||
* - const poly *b: pointer to input polynomial | |||
* - const poly *h: pointer to input hint polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_use_hint( | |||
poly *restrict a, | |||
const poly *restrict b, | |||
const poly *restrict h) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
a->coeffs[i] = PQCLEAN_DILITHIUM2_AVX2_use_hint(b->coeffs[i], h->coeffs[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_poly_chknorm | |||
* | |||
* Description: Check infinity norm of polynomial against given bound. | |||
* Assumes input coefficients to be standard representatives. | |||
* | |||
* Arguments: - const poly *a: pointer to polynomial | |||
* - uint32_t B: norm bound | |||
* | |||
* Returns 0 if norm is strictly smaller than B and 1 otherwise. | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_AVX2_poly_chknorm(const poly *a, uint32_t B) { | |||
unsigned int i; | |||
int32_t t; | |||
/* It is ok to leak which coefficient violates the bound since | |||
the probability for each coefficient is independent of secret | |||
data but we must not leak the sign of the centralized representative. */ | |||
for (i = 0; i < N; ++i) { | |||
/* Absolute value of centralized representative */ | |||
t = (Q - 1) / 2 - a->coeffs[i]; | |||
t ^= (t >> 31); | |||
t = (Q - 1) / 2 - t; | |||
if ((uint32_t)t >= B) { | |||
return 1; | |||
} | |||
} | |||
return 0; | |||
} | |||
/************************************************* | |||
* Name: rej_uniform_ref | |||
* | |||
* Description: Sample uniformly random coefficients in [0, Q-1] by | |||
* performing rejection sampling using array of random bytes. | |||
* | |||
* Arguments: - uint32_t *a: pointer to output array (allocated) | |||
* - unsigned int len: number of coefficients to be sampled | |||
* - const unsigned char *buf: array of random bytes | |||
* - unsigned int buflen: length of array of random bytes | |||
* | |||
* Returns number of sampled coefficients. Can be smaller than len if not enough | |||
* random bytes were given. | |||
**************************************************/ | |||
static unsigned int rej_uniform_ref(uint32_t *a, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
unsigned int buflen) { | |||
unsigned int ctr, pos; | |||
uint32_t t; | |||
ctr = pos = 0; | |||
while (ctr < len && pos + 3 <= buflen) { | |||
t = buf[pos++]; | |||
t |= (uint32_t)buf[pos++] << 8; | |||
t |= (uint32_t)buf[pos++] << 16; | |||
t &= 0x7FFFFF; | |||
if (t < Q) { | |||
a[ctr++] = t; | |||
} | |||
} | |||
return ctr; | |||
} | |||
/************************************************* | |||
* Name: poly_uniform | |||
* | |||
* Description: Sample polynomial with uniformly random coefficients | |||
* in [0,Q-1] by performing rejection sampling using the | |||
* output stream from SHAKE256(seed|nonce). | |||
* | |||
* Arguments: - poly *a: pointer to output polynomial | |||
* - const unsigned char seed[]: byte array with seed of length | |||
* SEEDBYTES | |||
* - uint16_t nonce: 2-byte nonce | |||
**************************************************/ | |||
#define POLY_UNIFORM_NBLOCKS ((769 + STREAM128_BLOCKBYTES) / STREAM128_BLOCKBYTES) | |||
#define POLY_UNIFORM_BUFLEN (POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES) | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_uniform(poly *a, | |||
const unsigned char seed[SEEDBYTES], | |||
uint16_t nonce) { | |||
unsigned int i, ctr, off; | |||
unsigned int nblocks = POLY_UNIFORM_NBLOCKS; | |||
unsigned int buflen = POLY_UNIFORM_BUFLEN; | |||
unsigned char buf[POLY_UNIFORM_BUFLEN + 2]; | |||
stream128_state state; | |||
stream128_init(&state, seed, nonce); | |||
stream128_squeezeblocks(buf, nblocks, &state); | |||
ctr = PQCLEAN_DILITHIUM2_AVX2_rej_uniform(a->coeffs, N, buf, buflen); | |||
while (ctr < N) { | |||
off = buflen % 3; | |||
for (i = 0; i < off; ++i) { | |||
buf[i] = buf[buflen - off + i]; | |||
} | |||
buflen = STREAM128_BLOCKBYTES + off; | |||
stream128_squeezeblocks(buf + off, 1, &state); | |||
ctr += rej_uniform_ref(a->coeffs + ctr, N - ctr, buf, buflen); | |||
} | |||
} | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_4x(poly *a0, | |||
poly *a1, | |||
poly *a2, | |||
poly *a3, | |||
const unsigned char seed[SEEDBYTES], | |||
uint16_t nonce0, | |||
uint16_t nonce1, | |||
uint16_t nonce2, | |||
uint16_t nonce3) { | |||
unsigned int i, ctr0, ctr1, ctr2, ctr3; | |||
unsigned char inbuf[4][SEEDBYTES + 2]; | |||
unsigned char outbuf[4][5 * SHAKE128_RATE]; | |||
__m256i state[25]; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
inbuf[0][i] = seed[i]; | |||
inbuf[1][i] = seed[i]; | |||
inbuf[2][i] = seed[i]; | |||
inbuf[3][i] = seed[i]; | |||
} | |||
inbuf[0][SEEDBYTES + 0] = nonce0; | |||
inbuf[0][SEEDBYTES + 1] = nonce0 >> 8; | |||
inbuf[1][SEEDBYTES + 0] = nonce1; | |||
inbuf[1][SEEDBYTES + 1] = nonce1 >> 8; | |||
inbuf[2][SEEDBYTES + 0] = nonce2; | |||
inbuf[2][SEEDBYTES + 1] = nonce2 >> 8; | |||
inbuf[3][SEEDBYTES + 0] = nonce3; | |||
inbuf[3][SEEDBYTES + 1] = nonce3 >> 8; | |||
PQCLEAN_DILITHIUM2_AVX2_shake128_absorb4x(state, inbuf[0], inbuf[1], inbuf[2], inbuf[3], | |||
SEEDBYTES + 2); | |||
PQCLEAN_DILITHIUM2_AVX2_shake128_squeezeblocks4x(outbuf[0], outbuf[1], outbuf[2], outbuf[3], 5, | |||
state); | |||
ctr0 = PQCLEAN_DILITHIUM2_AVX2_rej_uniform(a0->coeffs, N, outbuf[0], 5 * SHAKE128_RATE); | |||
ctr1 = PQCLEAN_DILITHIUM2_AVX2_rej_uniform(a1->coeffs, N, outbuf[1], 5 * SHAKE128_RATE); | |||
ctr2 = PQCLEAN_DILITHIUM2_AVX2_rej_uniform(a2->coeffs, N, outbuf[2], 5 * SHAKE128_RATE); | |||
ctr3 = PQCLEAN_DILITHIUM2_AVX2_rej_uniform(a3->coeffs, N, outbuf[3], 5 * SHAKE128_RATE); | |||
while (ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { | |||
PQCLEAN_DILITHIUM2_AVX2_shake128_squeezeblocks4x(outbuf[0], outbuf[1], outbuf[2], outbuf[3], 1, | |||
state); | |||
ctr0 += rej_uniform_ref(a0->coeffs + ctr0, N - ctr0, outbuf[0], | |||
SHAKE128_RATE); | |||
ctr1 += rej_uniform_ref(a1->coeffs + ctr1, N - ctr1, outbuf[1], | |||
SHAKE128_RATE); | |||
ctr2 += rej_uniform_ref(a2->coeffs + ctr2, N - ctr2, outbuf[2], | |||
SHAKE128_RATE); | |||
ctr3 += rej_uniform_ref(a3->coeffs + ctr3, N - ctr3, outbuf[3], | |||
SHAKE128_RATE); | |||
} | |||
} | |||
/************************************************* | |||
* Name: rej_eta | |||
* | |||
* Description: Sample uniformly random coefficients in [-ETA, ETA] by | |||
* performing rejection sampling using array of random bytes. | |||
* | |||
* Arguments: - uint32_t *a: pointer to output array (allocated) | |||
* - unsigned int len: number of coefficients to be sampled | |||
* - const unsigned char *buf: array of random bytes | |||
* - unsigned int buflen: length of array of random bytes | |||
* | |||
* Returns number of sampled coefficients. Can be smaller than len if not enough | |||
* random bytes were given. | |||
**************************************************/ | |||
static unsigned int rej_eta_ref(uint32_t *a, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
unsigned int buflen) { | |||
unsigned int ctr, pos; | |||
uint32_t t0, t1; | |||
ctr = pos = 0; | |||
while (ctr < len && pos < buflen) { | |||
t0 = buf[pos] & 0x0F; | |||
t1 = buf[pos++] >> 4; | |||
if (t0 <= 2 * ETA) { | |||
a[ctr++] = Q + ETA - t0; | |||
} | |||
if (t1 <= 2 * ETA && ctr < len) { | |||
a[ctr++] = Q + ETA - t1; | |||
} | |||
} | |||
return ctr; | |||
} | |||
/************************************************* | |||
* Name: poly_uniform_eta | |||
* | |||
* Description: Sample polynomial with uniformly random coefficients | |||
* in [-ETA,ETA] by performing rejection sampling using the | |||
* output stream from SHAKE256(seed|nonce). | |||
* | |||
* Arguments: - poly *a: pointer to output polynomial | |||
* - const unsigned char seed[]: byte array with seed of length | |||
* SEEDBYTES | |||
* - uint16_t nonce: 2-byte nonce | |||
**************************************************/ | |||
#define POLY_UNIFORM_ETA_NBLOCKS (((N / 2 * (1u << SETABITS)) / (2 * ETA + 1) + STREAM128_BLOCKBYTES) / STREAM128_BLOCKBYTES) | |||
#define POLY_UNIFORM_ETA_BUFLEN (POLY_UNIFORM_ETA_NBLOCKS*STREAM128_BLOCKBYTES) | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta(poly *a, | |||
const unsigned char seed[SEEDBYTES], | |||
uint16_t nonce) { | |||
unsigned int ctr; | |||
unsigned char buf[POLY_UNIFORM_ETA_BUFLEN]; | |||
stream128_state state; | |||
stream128_init(&state, seed, nonce); | |||
stream128_squeezeblocks(buf, POLY_UNIFORM_ETA_NBLOCKS, &state); | |||
ctr = PQCLEAN_DILITHIUM2_AVX2_rej_eta(a->coeffs, N, buf, POLY_UNIFORM_ETA_BUFLEN); | |||
while (ctr < N) { | |||
stream128_squeezeblocks(buf, 1, &state); | |||
ctr += rej_eta_ref(a->coeffs + ctr, N - ctr, buf, STREAM128_BLOCKBYTES); | |||
} | |||
} | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta_4x(poly *a0, | |||
poly *a1, | |||
poly *a2, | |||
poly *a3, | |||
const unsigned char seed[SEEDBYTES], | |||
uint16_t nonce0, | |||
uint16_t nonce1, | |||
uint16_t nonce2, | |||
uint16_t nonce3) { | |||
unsigned int i, ctr0, ctr1, ctr2, ctr3; | |||
unsigned char inbuf[4][SEEDBYTES + 2]; | |||
unsigned char outbuf[4][2 * SHAKE128_RATE]; | |||
__m256i state[25]; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
inbuf[0][i] = seed[i]; | |||
inbuf[1][i] = seed[i]; | |||
inbuf[2][i] = seed[i]; | |||
inbuf[3][i] = seed[i]; | |||
} | |||
inbuf[0][SEEDBYTES + 0] = nonce0; | |||
inbuf[0][SEEDBYTES + 1] = nonce0 >> 8; | |||
inbuf[1][SEEDBYTES + 0] = nonce1; | |||
inbuf[1][SEEDBYTES + 1] = nonce1 >> 8; | |||
inbuf[2][SEEDBYTES + 0] = nonce2; | |||
inbuf[2][SEEDBYTES + 1] = nonce2 >> 8; | |||
inbuf[3][SEEDBYTES + 0] = nonce3; | |||
inbuf[3][SEEDBYTES + 1] = nonce3 >> 8; | |||
PQCLEAN_DILITHIUM2_AVX2_shake128_absorb4x(state, inbuf[0], inbuf[1], inbuf[2], inbuf[3], | |||
SEEDBYTES + 2); | |||
PQCLEAN_DILITHIUM2_AVX2_shake128_squeezeblocks4x(outbuf[0], outbuf[1], outbuf[2], outbuf[3], 2, | |||
state); | |||
ctr0 = PQCLEAN_DILITHIUM2_AVX2_rej_eta(a0->coeffs, N, outbuf[0], 2 * SHAKE128_RATE); | |||
ctr1 = PQCLEAN_DILITHIUM2_AVX2_rej_eta(a1->coeffs, N, outbuf[1], 2 * SHAKE128_RATE); | |||
ctr2 = PQCLEAN_DILITHIUM2_AVX2_rej_eta(a2->coeffs, N, outbuf[2], 2 * SHAKE128_RATE); | |||
ctr3 = PQCLEAN_DILITHIUM2_AVX2_rej_eta(a3->coeffs, N, outbuf[3], 2 * SHAKE128_RATE); | |||
while (ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { | |||
PQCLEAN_DILITHIUM2_AVX2_shake128_squeezeblocks4x(outbuf[0], outbuf[1], outbuf[2], outbuf[3], 1, | |||
state); | |||
ctr0 += rej_eta_ref(a0->coeffs + ctr0, N - ctr0, outbuf[0], SHAKE128_RATE); | |||
ctr1 += rej_eta_ref(a1->coeffs + ctr1, N - ctr1, outbuf[1], SHAKE128_RATE); | |||
ctr2 += rej_eta_ref(a2->coeffs + ctr2, N - ctr2, outbuf[2], SHAKE128_RATE); | |||
ctr3 += rej_eta_ref(a3->coeffs + ctr3, N - ctr3, outbuf[3], SHAKE128_RATE); | |||
} | |||
} | |||
/************************************************* | |||
* Name: rej_gamma1m1_ref | |||
* | |||
* Description: Sample uniformly random coefficients | |||
* in [-(GAMMA1 - 1), GAMMA1 - 1] by performing rejection sampling | |||
* using array of random bytes. | |||
* | |||
* Arguments: - uint32_t *a: pointer to output array (allocated) | |||
* - unsigned int len: number of coefficients to be sampled | |||
* - const unsigned char *buf: array of random bytes | |||
* - unsigned int buflen: length of array of random bytes | |||
* | |||
* Returns number of sampled coefficients. Can be smaller than len if not enough | |||
* random bytes were given. | |||
**************************************************/ | |||
static unsigned int rej_gamma1m1_ref(uint32_t *a, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
unsigned int buflen) { | |||
unsigned int ctr, pos; | |||
uint32_t t0, t1; | |||
ctr = pos = 0; | |||
while (ctr < len && pos + 5 <= buflen) { | |||
t0 = buf[pos]; | |||
t0 |= (uint32_t)buf[pos + 1] << 8; | |||
t0 |= (uint32_t)buf[pos + 2] << 16; | |||
t0 &= 0xFFFFF; | |||
t1 = buf[pos + 2] >> 4; | |||
t1 |= (uint32_t)buf[pos + 3] << 4; | |||
t1 |= (uint32_t)buf[pos + 4] << 12; | |||
pos += 5; | |||
if (t0 <= 2 * GAMMA1 - 2) { | |||
a[ctr++] = Q + GAMMA1 - 1 - t0; | |||
} | |||
if (t1 <= 2 * GAMMA1 - 2 && ctr < len) { | |||
a[ctr++] = Q + GAMMA1 - 1 - t1; | |||
} | |||
} | |||
return ctr; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_poly_uniform_gamma1m1 | |||
* | |||
* Description: Sample polynomial with uniformly random coefficients | |||
* in [-(GAMMA1 - 1), GAMMA1 - 1] by performing rejection | |||
* sampling on output stream of SHAKE256(seed|nonce). | |||
* | |||
* Arguments: - poly *a: pointer to output polynomial | |||
* - const unsigned char seed[]: byte array with seed of length | |||
* CRHBYTES | |||
* - uint16_t nonce: 16-bit nonce | |||
**************************************************/ | |||
#define POLY_UNIFORM_GAMMA1M1_NBLOCKS ((641 + STREAM256_BLOCKBYTES) / STREAM256_BLOCKBYTES) | |||
#define POLY_UNIFORM_GAMMA1M1_BUFLEN (POLY_UNIFORM_GAMMA1M1_NBLOCKS * STREAM256_BLOCKBYTES) | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_gamma1m1(poly *a, | |||
const unsigned char seed[CRHBYTES], | |||
uint16_t nonce) { | |||
unsigned int i, ctr, off; | |||
unsigned int buflen = POLY_UNIFORM_GAMMA1M1_BUFLEN; | |||
unsigned char buf[POLY_UNIFORM_GAMMA1M1_BUFLEN + 4]; | |||
stream256_state state; | |||
stream256_init(&state, seed, nonce); | |||
stream256_squeezeblocks(buf, POLY_UNIFORM_GAMMA1M1_NBLOCKS, &state); | |||
ctr = PQCLEAN_DILITHIUM2_AVX2_rej_gamma1m1(a->coeffs, N, buf, POLY_UNIFORM_GAMMA1M1_BUFLEN); | |||
while (ctr < N) { | |||
off = buflen % 5; | |||
for (i = 0; i < off; ++i) { | |||
buf[i] = buf[buflen - off + i]; | |||
} | |||
buflen = STREAM256_BLOCKBYTES + off; | |||
stream256_squeezeblocks(buf + off, 1, &state); | |||
ctr += rej_gamma1m1_ref(a->coeffs + ctr, N - ctr, buf, buflen); | |||
} | |||
} | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_gamma1m1_4x(poly *a0, | |||
poly *a1, | |||
poly *a2, | |||
poly *a3, | |||
const unsigned char seed[CRHBYTES], | |||
uint16_t nonce0, | |||
uint16_t nonce1, | |||
uint16_t nonce2, | |||
uint16_t nonce3) { | |||
unsigned int i, ctr0, ctr1, ctr2, ctr3; | |||
unsigned char inbuf[4][CRHBYTES + 2]; | |||
unsigned char outbuf[4][5 * SHAKE256_RATE]; | |||
__m256i state[25]; | |||
for (i = 0; i < CRHBYTES; ++i) { | |||
inbuf[0][i] = seed[i]; | |||
inbuf[1][i] = seed[i]; | |||
inbuf[2][i] = seed[i]; | |||
inbuf[3][i] = seed[i]; | |||
} | |||
inbuf[0][CRHBYTES + 0] = nonce0 & 0xFF; | |||
inbuf[0][CRHBYTES + 1] = nonce0 >> 8; | |||
inbuf[1][CRHBYTES + 0] = nonce1 & 0xFF; | |||
inbuf[1][CRHBYTES + 1] = nonce1 >> 8; | |||
inbuf[2][CRHBYTES + 0] = nonce2 & 0xFF; | |||
inbuf[2][CRHBYTES + 1] = nonce2 >> 8; | |||
inbuf[3][CRHBYTES + 0] = nonce3 & 0xFF; | |||
inbuf[3][CRHBYTES + 1] = nonce3 >> 8; | |||
PQCLEAN_DILITHIUM2_AVX2_shake256_absorb4x(state, inbuf[0], inbuf[1], inbuf[2], inbuf[3], | |||
CRHBYTES + 2); | |||
PQCLEAN_DILITHIUM2_AVX2_shake256_squeezeblocks4x(outbuf[0], outbuf[1], outbuf[2], outbuf[3], 5, | |||
state); | |||
ctr0 = rej_gamma1m1_ref(a0->coeffs, N, outbuf[0], 5 * SHAKE256_RATE); | |||
ctr1 = rej_gamma1m1_ref(a1->coeffs, N, outbuf[1], 5 * SHAKE256_RATE); | |||
ctr2 = rej_gamma1m1_ref(a2->coeffs, N, outbuf[2], 5 * SHAKE256_RATE); | |||
ctr3 = rej_gamma1m1_ref(a3->coeffs, N, outbuf[3], 5 * SHAKE256_RATE); | |||
while (ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { | |||
PQCLEAN_DILITHIUM2_AVX2_shake256_squeezeblocks4x(outbuf[0], outbuf[1], outbuf[2], outbuf[3], 1, | |||
state); | |||
ctr0 += rej_gamma1m1_ref(a0->coeffs + ctr0, N - ctr0, outbuf[0], | |||
SHAKE256_RATE); | |||
ctr1 += rej_gamma1m1_ref(a1->coeffs + ctr1, N - ctr1, outbuf[1], | |||
SHAKE256_RATE); | |||
ctr2 += rej_gamma1m1_ref(a2->coeffs + ctr2, N - ctr2, outbuf[2], | |||
SHAKE256_RATE); | |||
ctr3 += rej_gamma1m1_ref(a3->coeffs + ctr3, N - ctr3, outbuf[3], | |||
SHAKE256_RATE); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyeta_pack | |||
* | |||
* Description: Bit-pack polynomial with coefficients in [-ETA,ETA]. | |||
* Input coefficients are assumed to lie in [Q-ETA,Q+ETA]. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* POLETA_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyeta_pack(unsigned char *restrict r, const poly *restrict a) { | |||
unsigned int i; | |||
unsigned char t[8]; | |||
for (i = 0; i < N / 2; ++i) { | |||
t[0] = Q + ETA - a->coeffs[2 * i + 0]; | |||
t[1] = Q + ETA - a->coeffs[2 * i + 1]; | |||
r[i] = t[0] | (t[1] << 4); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyeta_unpack | |||
* | |||
* Description: Unpack polynomial with coefficients in [-ETA,ETA]. | |||
* Output coefficients lie in [Q-ETA,Q+ETA]. | |||
* | |||
* Arguments: - poly *r: pointer to output polynomial | |||
* - const unsigned char *a: byte array with bit-packed polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyeta_unpack(poly *restrict r, const unsigned char *restrict a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 2; ++i) { | |||
r->coeffs[2 * i + 0] = a[i] & 0x0F; | |||
r->coeffs[2 * i + 1] = a[i] >> 4; | |||
r->coeffs[2 * i + 0] = Q + ETA - r->coeffs[2 * i + 0]; | |||
r->coeffs[2 * i + 1] = Q + ETA - r->coeffs[2 * i + 1]; | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyt1_pack | |||
* | |||
* Description: Bit-pack polynomial t1 with coefficients fitting in 9 bits. | |||
* Input coefficients are assumed to be standard representatives. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* POLT1_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyt1_pack(unsigned char *restrict r, const poly *restrict a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 8; ++i) { | |||
r[9 * i + 0] = (uint8_t)((a->coeffs[8 * i + 0] >> 0)); | |||
r[9 * i + 1] = (uint8_t)((a->coeffs[8 * i + 0] >> 8) | (a->coeffs[8 * i + 1] << 1)); | |||
r[9 * i + 2] = (uint8_t)((a->coeffs[8 * i + 1] >> 7) | (a->coeffs[8 * i + 2] << 2)); | |||
r[9 * i + 3] = (uint8_t)((a->coeffs[8 * i + 2] >> 6) | (a->coeffs[8 * i + 3] << 3)); | |||
r[9 * i + 4] = (uint8_t)((a->coeffs[8 * i + 3] >> 5) | (a->coeffs[8 * i + 4] << 4)); | |||
r[9 * i + 5] = (uint8_t)((a->coeffs[8 * i + 4] >> 4) | (a->coeffs[8 * i + 5] << 5)); | |||
r[9 * i + 6] = (uint8_t)((a->coeffs[8 * i + 5] >> 3) | (a->coeffs[8 * i + 6] << 6)); | |||
r[9 * i + 7] = (uint8_t)((a->coeffs[8 * i + 6] >> 2) | (a->coeffs[8 * i + 7] << 7)); | |||
r[9 * i + 8] = (uint8_t)((a->coeffs[8 * i + 7] >> 1)); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyt1_unpack | |||
* | |||
* Description: Unpack polynomial t1 with 9-bit coefficients. | |||
* Output coefficients are standard representatives. | |||
* | |||
* Arguments: - poly *r: pointer to output polynomial | |||
* - const unsigned char *a: byte array with bit-packed polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyt1_unpack(poly *restrict r, const unsigned char *restrict a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 8; ++i) { | |||
r->coeffs[8 * i + 0] = ((a[9 * i + 0] >> 0) | ((uint32_t)a[9 * i + 1] << 8)) & 0x1FF; | |||
r->coeffs[8 * i + 1] = ((a[9 * i + 1] >> 1) | ((uint32_t)a[9 * i + 2] << 7)) & 0x1FF; | |||
r->coeffs[8 * i + 2] = ((a[9 * i + 2] >> 2) | ((uint32_t)a[9 * i + 3] << 6)) & 0x1FF; | |||
r->coeffs[8 * i + 3] = ((a[9 * i + 3] >> 3) | ((uint32_t)a[9 * i + 4] << 5)) & 0x1FF; | |||
r->coeffs[8 * i + 4] = ((a[9 * i + 4] >> 4) | ((uint32_t)a[9 * i + 5] << 4)) & 0x1FF; | |||
r->coeffs[8 * i + 5] = ((a[9 * i + 5] >> 5) | ((uint32_t)a[9 * i + 6] << 3)) & 0x1FF; | |||
r->coeffs[8 * i + 6] = ((a[9 * i + 6] >> 6) | ((uint32_t)a[9 * i + 7] << 2)) & 0x1FF; | |||
r->coeffs[8 * i + 7] = ((a[9 * i + 7] >> 7) | ((uint32_t)a[9 * i + 8] << 1)) & 0x1FF; | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyt0_pack | |||
* | |||
* Description: Bit-pack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. | |||
* Input coefficients are assumed to lie in ]Q-2^{D-1}, Q+2^{D-1}]. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* POLT0_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyt0_pack(unsigned char *restrict r, const poly *restrict a) { | |||
unsigned int i; | |||
uint32_t t[4]; | |||
for (i = 0; i < N / 4; ++i) { | |||
t[0] = Q + (1U << (D - 1)) - a->coeffs[4 * i + 0]; | |||
t[1] = Q + (1U << (D - 1)) - a->coeffs[4 * i + 1]; | |||
t[2] = Q + (1U << (D - 1)) - a->coeffs[4 * i + 2]; | |||
t[3] = Q + (1U << (D - 1)) - a->coeffs[4 * i + 3]; | |||
r[7 * i + 0] = t[0]; | |||
r[7 * i + 1] = t[0] >> 8; | |||
r[7 * i + 1] |= t[1] << 6; | |||
r[7 * i + 2] = t[1] >> 2; | |||
r[7 * i + 3] = t[1] >> 10; | |||
r[7 * i + 3] |= t[2] << 4; | |||
r[7 * i + 4] = t[2] >> 4; | |||
r[7 * i + 5] = t[2] >> 12; | |||
r[7 * i + 5] |= t[3] << 2; | |||
r[7 * i + 6] = t[3] >> 6; | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyt0_unpack | |||
* | |||
* Description: Unpack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. | |||
* Output coefficients lie in ]Q-2^{D-1},Q+2^{D-1}]. | |||
* | |||
* Arguments: - poly *r: pointer to output polynomial | |||
* - const unsigned char *a: byte array with bit-packed polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyt0_unpack(poly *restrict r, const unsigned char *restrict a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 4; ++i) { | |||
r->coeffs[4 * i + 0] = a[7 * i + 0]; | |||
r->coeffs[4 * i + 0] |= (uint32_t)(a[7 * i + 1] & 0x3F) << 8; | |||
r->coeffs[4 * i + 1] = a[7 * i + 1] >> 6; | |||
r->coeffs[4 * i + 1] |= (uint32_t)a[7 * i + 2] << 2; | |||
r->coeffs[4 * i + 1] |= (uint32_t)(a[7 * i + 3] & 0x0F) << 10; | |||
r->coeffs[4 * i + 2] = a[7 * i + 3] >> 4; | |||
r->coeffs[4 * i + 2] |= (uint32_t)a[7 * i + 4] << 4; | |||
r->coeffs[4 * i + 2] |= (uint32_t)(a[7 * i + 5] & 0x03) << 12; | |||
r->coeffs[4 * i + 3] = a[7 * i + 5] >> 2; | |||
r->coeffs[4 * i + 3] |= (uint32_t)a[7 * i + 6] << 6; | |||
r->coeffs[4 * i + 0] = Q + (1U << (D - 1)) - r->coeffs[4 * i + 0]; | |||
r->coeffs[4 * i + 1] = Q + (1U << (D - 1)) - r->coeffs[4 * i + 1]; | |||
r->coeffs[4 * i + 2] = Q + (1U << (D - 1)) - r->coeffs[4 * i + 2]; | |||
r->coeffs[4 * i + 3] = Q + (1U << (D - 1)) - r->coeffs[4 * i + 3]; | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyz_pack | |||
* | |||
* Description: Bit-pack polynomial z with coefficients | |||
* in [-(GAMMA1 - 1), GAMMA1 - 1]. | |||
* Input coefficients are assumed to be standard representatives. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* POLZ_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyz_pack(unsigned char *restrict r, const poly *restrict a) { | |||
unsigned int i; | |||
uint32_t t[2]; | |||
for (i = 0; i < N / 2; ++i) { | |||
/* Map to {0,...,2*GAMMA1 - 2} */ | |||
t[0] = GAMMA1 - 1 - a->coeffs[2 * i + 0]; | |||
t[0] += ((int32_t)t[0] >> 31) & Q; | |||
t[1] = GAMMA1 - 1 - a->coeffs[2 * i + 1]; | |||
t[1] += ((int32_t)t[1] >> 31) & Q; | |||
r[5 * i + 0] = t[0]; | |||
r[5 * i + 1] = t[0] >> 8; | |||
r[5 * i + 2] = t[0] >> 16; | |||
r[5 * i + 2] |= t[1] << 4; | |||
r[5 * i + 3] = t[1] >> 4; | |||
r[5 * i + 4] = t[1] >> 12; | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyz_unpack | |||
* | |||
* Description: Unpack polynomial z with coefficients | |||
* in [-(GAMMA1 - 1), GAMMA1 - 1]. | |||
* Output coefficients are standard representatives. | |||
* | |||
* Arguments: - poly *r: pointer to output polynomial | |||
* - const unsigned char *a: byte array with bit-packed polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyz_unpack(poly *restrict r, const unsigned char *restrict a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 2; ++i) { | |||
r->coeffs[2 * i + 0] = a[5 * i + 0]; | |||
r->coeffs[2 * i + 0] |= (uint32_t)a[5 * i + 1] << 8; | |||
r->coeffs[2 * i + 0] |= (uint32_t)(a[5 * i + 2] & 0x0F) << 16; | |||
r->coeffs[2 * i + 1] = a[5 * i + 2] >> 4; | |||
r->coeffs[2 * i + 1] |= (uint32_t)a[5 * i + 3] << 4; | |||
r->coeffs[2 * i + 1] |= (uint32_t)a[5 * i + 4] << 12; | |||
r->coeffs[2 * i + 0] = GAMMA1 - 1 - r->coeffs[2 * i + 0]; | |||
r->coeffs[2 * i + 0] += ((int32_t)r->coeffs[2 * i + 0] >> 31) & Q; | |||
r->coeffs[2 * i + 1] = GAMMA1 - 1 - r->coeffs[2 * i + 1]; | |||
r->coeffs[2 * i + 1] += ((int32_t)r->coeffs[2 * i + 1] >> 31) & Q; | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyw1_pack | |||
* | |||
* Description: Bit-pack polynomial w1 with coefficients in [0, 15]. | |||
* Input coefficients are assumed to be standard representatives. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* POLW1_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyw1_pack(unsigned char *restrict r, const poly *restrict a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 2; ++i) { | |||
r[i] = a->coeffs[2 * i + 0] | (a->coeffs[2 * i + 1] << 4); | |||
} | |||
} |
@@ -0,0 +1,83 @@ | |||
#ifndef POLY_H | |||
#define POLY_H | |||
#include <immintrin.h> | |||
#include <stdint.h> | |||
#include "alignment.h" | |||
#include "params.h" | |||
typedef union { | |||
uint32_t coeffs[N]; | |||
__m256i coeffs_x8[N / 8]; | |||
} poly; | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_reduce(poly *a); | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_csubq(poly *a); | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_freeze(poly *a); | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_add(poly *c, const poly *a, const poly *b); | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_sub(poly *c, const poly *a, const poly *b); | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_shiftl(poly *a); | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_ntt(poly *a); | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_invntt_montgomery(poly *a); | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_pointwise_invmontgomery(poly *c, const poly *a, const poly *b); | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_power2round(poly *a1, poly *a0, const poly *a); | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_decompose(poly *a1, poly *a0, const poly *a); | |||
unsigned int PQCLEAN_DILITHIUM2_AVX2_poly_make_hint(poly *h, const poly *a0, const poly *a1); | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_use_hint(poly *a, const poly *b, const poly *h); | |||
int PQCLEAN_DILITHIUM2_AVX2_poly_chknorm(const poly *a, uint32_t B); | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_uniform(poly *a, | |||
const uint8_t *seed, | |||
uint16_t nonce); | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_4x(poly *a0, | |||
poly *a1, | |||
poly *a2, | |||
poly *a3, | |||
const uint8_t *seed, | |||
uint16_t nonce0, | |||
uint16_t nonce1, | |||
uint16_t nonce2, | |||
uint16_t nonce3); | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta(poly *a, | |||
const uint8_t *seed, | |||
uint16_t nonce); | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta_4x(poly *a0, | |||
poly *a1, | |||
poly *a2, | |||
poly *a3, | |||
const uint8_t *seed, | |||
uint16_t nonce0, | |||
uint16_t nonce1, | |||
uint16_t nonce2, | |||
uint16_t nonce3); | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_gamma1m1(poly *a, | |||
const uint8_t *seed, | |||
uint16_t nonce); | |||
void PQCLEAN_DILITHIUM2_AVX2_poly_uniform_gamma1m1_4x(poly *a0, | |||
poly *a1, | |||
poly *a2, | |||
poly *a3, | |||
const uint8_t *seed, | |||
uint16_t nonce0, | |||
uint16_t nonce1, | |||
uint16_t nonce2, | |||
uint16_t nonce3); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyeta_pack(uint8_t *r, const poly *a); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyeta_unpack(poly *r, const uint8_t *a); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyt1_pack(uint8_t *r, const poly *a); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyt1_unpack(poly *r, const uint8_t *a); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyt0_pack(uint8_t *r, const poly *a); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyt0_unpack(poly *r, const uint8_t *a); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyz_pack(uint8_t *r, const poly *a); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyz_unpack(poly *r, const uint8_t *a); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyw1_pack(uint8_t *r, const poly *a); | |||
#endif |
@@ -0,0 +1,353 @@ | |||
#include <stdint.h> | |||
#include "ntt.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "polyvec.h" | |||
/**************************************************************/ | |||
/************ Vectors of polynomials of length L **************/ | |||
/**************************************************************/ | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyvecl_freeze | |||
* | |||
* Description: Reduce coefficients of polynomials in vector of length L | |||
* to standard representatives. | |||
* | |||
* Arguments: - polyvecl *v: pointer to input/output vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyvecl_freeze(polyvecl *v) { | |||
unsigned int i; | |||
for (i = 0; i < L; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_freeze(&v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyvecl_add | |||
* | |||
* Description: Add vectors of polynomials of length L. | |||
* No modular reduction is performed. | |||
* | |||
* Arguments: - polyvecl *w: pointer to output vector | |||
* - const polyvecl *u: pointer to first summand | |||
* - const polyvecl *v: pointer to second summand | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v) { | |||
unsigned int i; | |||
for (i = 0; i < L; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyvecl_ntt | |||
* | |||
* Description: Forward NTT of all polynomials in vector of length L. Output | |||
* coefficients can be up to 16*Q larger than input coefficients. | |||
* | |||
* Arguments: - polyvecl *v: pointer to input/output vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyvecl_ntt(polyvecl *v) { | |||
unsigned int i; | |||
for (i = 0; i < L; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_ntt(&v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyvecl_pointwise_acc_invmontgomery | |||
* | |||
* Description: Pointwise multiply vectors of polynomials of length L, multiply | |||
* resulting vector by 2^{-32} and add (accumulate) polynomials | |||
* in it. Input/output vectors are in NTT domain representation. | |||
* Input coefficients are assumed to be less than 22*Q. Output | |||
* coeffcient are less than 2*L*Q. | |||
* | |||
* Arguments: - poly *w: output polynomial | |||
* - const polyvecl *u: pointer to first input vector | |||
* - const polyvecl *v: pointer to second input vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyvecl_pointwise_acc_invmontgomery(poly *w, | |||
const polyvecl *u, | |||
const polyvecl *v) { | |||
PQCLEAN_DILITHIUM2_AVX2_pointwise_acc_avx(w->coeffs, u->vec->coeffs, v->vec->coeffs); | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyvecl_chknorm | |||
* | |||
* Description: Check infinity norm of polynomials in vector of length L. | |||
* Assumes input coefficients to be standard representatives. | |||
* | |||
* Arguments: - const polyvecl *v: pointer to vector | |||
* - uint32_t B: norm bound | |||
* | |||
* Returns 0 if norm of all polynomials is strictly smaller than B and 1 | |||
* otherwise. | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_AVX2_polyvecl_chknorm(const polyvecl *v, uint32_t bound) { | |||
unsigned int i; | |||
for (i = 0; i < L; ++i) { | |||
if (PQCLEAN_DILITHIUM2_AVX2_poly_chknorm(&v->vec[i], bound)) { | |||
return 1; | |||
} | |||
} | |||
return 0; | |||
} | |||
/**************************************************************/ | |||
/************ Vectors of polynomials of length K **************/ | |||
/**************************************************************/ | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_reduce | |||
* | |||
* Description: Reduce coefficients of polynomials in vector of length K | |||
* to representatives in [0,2*Q[. | |||
* | |||
* Arguments: - polyveck *v: pointer to input/output vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_reduce(polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_reduce(&v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_csubq | |||
* | |||
* Description: For all coefficients of polynomials in vector of length K | |||
* subtract Q if coefficient is bigger than Q. | |||
* | |||
* Arguments: - polyveck *v: pointer to input/output vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_csubq(polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_csubq(&v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: polyveck_freeze | |||
* | |||
* Description: Reduce coefficients of polynomials in vector of length K | |||
* to standard representatives. | |||
* | |||
* Arguments: - polyveck *v: pointer to input/output vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_freeze(polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_freeze(&v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_add | |||
* | |||
* Description: Add vectors of polynomials of length K. | |||
* No modular reduction is performed. | |||
* | |||
* Arguments: - polyveck *w: pointer to output vector | |||
* - const polyveck *u: pointer to first summand | |||
* - const polyveck *v: pointer to second summand | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_add(polyveck *w, const polyveck *u, const polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_sub | |||
* | |||
* Description: Subtract vectors of polynomials of length K. | |||
* Assumes coefficients of polynomials in second input vector | |||
* to be less than 2*Q. No modular reduction is performed. | |||
* | |||
* Arguments: - polyveck *w: pointer to output vector | |||
* - const polyveck *u: pointer to first input vector | |||
* - const polyveck *v: pointer to second input vector to be | |||
* subtracted from first input vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_sub(&w->vec[i], &u->vec[i], &v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_shiftl | |||
* | |||
* Description: Multiply vector of polynomials of Length K by 2^D without modular | |||
* reduction. Assumes input coefficients to be less than 2^{32-D}. | |||
* | |||
* Arguments: - polyveck *v: pointer to input/output vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_shiftl(polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_shiftl(&v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_ntt | |||
* | |||
* Description: Forward NTT of all polynomials in vector of length K. Output | |||
* coefficients can be up to 16*Q larger than input coefficients. | |||
* | |||
* Arguments: - polyveck *v: pointer to input/output vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_ntt(polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_ntt(&v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_invntt_montgomery | |||
* | |||
* Description: Inverse NTT and multiplication by 2^{32} of polynomials | |||
* in vector of length K. Input coefficients need to be less | |||
* than 2*Q. | |||
* | |||
* Arguments: - polyveck *v: pointer to input/output vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_invntt_montgomery(polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_invntt_montgomery(&v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_chknorm | |||
* | |||
* Description: Check infinity norm of polynomials in vector of length K. | |||
* Assumes input coefficients to be standard representatives. | |||
* | |||
* Arguments: - const polyveck *v: pointer to vector | |||
* - uint32_t B: norm bound | |||
* | |||
* Returns 0 if norm of all polynomials are strictly smaller than B and 1 | |||
* otherwise. | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_AVX2_polyveck_chknorm(const polyveck *v, uint32_t bound) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
if (PQCLEAN_DILITHIUM2_AVX2_poly_chknorm(&v->vec[i], bound)) { | |||
return 1; | |||
} | |||
} | |||
return 0; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_power2round | |||
* | |||
* Description: For all coefficients a of polynomials in vector of length K, | |||
* compute a0, a1 such that a mod Q = a1*2^D + a0 | |||
* with -2^{D-1} < a0 <= 2^{D-1}. Assumes coefficients to be | |||
* standard representatives. | |||
* | |||
* Arguments: - polyveck *v1: pointer to output vector of polynomials with | |||
* coefficients a1 | |||
* - polyveck *v0: pointer to output vector of polynomials with | |||
* coefficients Q + a0 | |||
* - const polyveck *v: pointer to input vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_power2round(&v1->vec[i], &v0->vec[i], &v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_decompose | |||
* | |||
* Description: For all coefficients a of polynomials in vector of length K, | |||
* compute high and low bits a0, a1 such a mod Q = a1*ALPHA + a0 | |||
* with -ALPHA/2 < a0 <= ALPHA/2 except a1 = (Q-1)/ALPHA where we | |||
* set a1 = 0 and -ALPHA/2 <= a0 = a mod Q - Q < 0. | |||
* Assumes coefficients to be standard representatives. | |||
* | |||
* Arguments: - polyveck *v1: pointer to output vector of polynomials with | |||
* coefficients a1 | |||
* - polyveck *v0: pointer to output vector of polynomials with | |||
* coefficients Q + a0 | |||
* - const polyveck *v: pointer to input vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_decompose(&v1->vec[i], &v0->vec[i], &v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_polyveck_make_hint | |||
* | |||
* Description: Compute hint vector. | |||
* | |||
* Arguments: - polyveck *h: pointer to output vector | |||
* - const polyveck *v0: pointer to low part of input vector | |||
* - const polyveck *v1: pointer to high part of input vector | |||
* | |||
* Returns number of 1 bits. | |||
**************************************************/ | |||
unsigned int PQCLEAN_DILITHIUM2_AVX2_polyveck_make_hint(polyveck *h, | |||
const polyveck *v0, | |||
const polyveck *v1) { | |||
unsigned int i, s = 0; | |||
for (i = 0; i < K; ++i) { | |||
s += PQCLEAN_DILITHIUM2_AVX2_poly_make_hint(&h->vec[i], &v0->vec[i], &v1->vec[i]); | |||
} | |||
return s; | |||
} | |||
/************************************************* | |||
* Name: polyveck_use_hint | |||
* | |||
* Description: Use hint vector to correct the high bits of input vector. | |||
* | |||
* Arguments: - polyveck *w: pointer to output vector of polynomials with | |||
* corrected high bits | |||
* - const polyveck *v: pointer to input vector | |||
* - const polyveck *h: pointer to input hint vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_use_hint(polyveck *w, const polyveck *v, const polyveck *h) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_use_hint(&w->vec[i], &v->vec[i], &h->vec[i]); | |||
} | |||
} |
@@ -0,0 +1,52 @@ | |||
#ifndef PQCLEAN_DILITHIUM2_AVX2_POLYVEC_H | |||
#define PQCLEAN_DILITHIUM2_AVX2_POLYVEC_H | |||
#include <stdint.h> | |||
#include "params.h" | |||
#include "poly.h" | |||
/* Vectors of polynomials of length L */ | |||
typedef struct { | |||
poly vec[L]; | |||
} polyvecl; | |||
void PQCLEAN_DILITHIUM2_AVX2_polyvecl_freeze(polyvecl *v); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyvecl_ntt(polyvecl *v); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyvecl_pointwise_acc_invmontgomery(poly *w, | |||
const polyvecl *u, | |||
const polyvecl *v); | |||
int PQCLEAN_DILITHIUM2_AVX2_polyvecl_chknorm(const polyvecl *v, uint32_t B); | |||
/* Vectors of polynomials of length K */ | |||
typedef struct { | |||
poly vec[K]; | |||
} polyveck; | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_reduce(polyveck *v); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_csubq(polyveck *v); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_freeze(polyveck *v); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_add(polyveck *w, const polyveck *u, const polyveck *v); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_shiftl(polyveck *v); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_ntt(polyveck *v); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_invntt_montgomery(polyveck *v); | |||
int PQCLEAN_DILITHIUM2_AVX2_polyveck_chknorm(const polyveck *v, uint32_t B); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v); | |||
unsigned int PQCLEAN_DILITHIUM2_AVX2_polyveck_make_hint(polyveck *h, | |||
const polyveck *v0, | |||
const polyveck *v1); | |||
void PQCLEAN_DILITHIUM2_AVX2_polyveck_use_hint(polyveck *w, const polyveck *v, const polyveck *h); | |||
#endif |
@@ -0,0 +1,9 @@ | |||
#ifndef REDUCE_H | |||
#define REDUCE_H | |||
#include <stdint.h> | |||
void PQCLEAN_DILITHIUM2_AVX2_reduce_avx(uint32_t a[N]); | |||
void PQCLEAN_DILITHIUM2_AVX2_csubq_avx(uint32_t a[N]); | |||
#endif |
@@ -0,0 +1,91 @@ | |||
.global PQCLEAN_DILITHIUM2_AVX2_reduce_avx | |||
PQCLEAN_DILITHIUM2_AVX2_reduce_avx: | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8x23ones(%rip),%ymm0 | |||
xor %eax,%eax | |||
_looptop_rdc32: | |||
#load | |||
vmovdqa (%rdi),%ymm1 | |||
vmovdqa 32(%rdi),%ymm3 | |||
vmovdqa 64(%rdi),%ymm5 | |||
vmovdqa 96(%rdi),%ymm7 | |||
#reduce | |||
vpsrld $23,%ymm1,%ymm2 | |||
vpsrld $23,%ymm3,%ymm4 | |||
vpsrld $23,%ymm5,%ymm6 | |||
vpsrld $23,%ymm7,%ymm8 | |||
vpand %ymm0,%ymm1,%ymm1 | |||
vpand %ymm0,%ymm3,%ymm3 | |||
vpand %ymm0,%ymm5,%ymm5 | |||
vpand %ymm0,%ymm7,%ymm7 | |||
vpsubd %ymm2,%ymm1,%ymm1 | |||
vpsubd %ymm4,%ymm3,%ymm3 | |||
vpsubd %ymm6,%ymm5,%ymm5 | |||
vpsubd %ymm8,%ymm7,%ymm7 | |||
vpslld $13,%ymm2,%ymm2 | |||
vpslld $13,%ymm4,%ymm4 | |||
vpslld $13,%ymm6,%ymm6 | |||
vpslld $13,%ymm8,%ymm8 | |||
vpaddd %ymm2,%ymm1,%ymm1 | |||
vpaddd %ymm4,%ymm3,%ymm3 | |||
vpaddd %ymm6,%ymm5,%ymm5 | |||
vpaddd %ymm8,%ymm7,%ymm7 | |||
#store | |||
vmovdqa %ymm1,(%rdi) | |||
vmovdqa %ymm3,32(%rdi) | |||
vmovdqa %ymm5,64(%rdi) | |||
vmovdqa %ymm7,96(%rdi) | |||
add $128,%rdi | |||
add $1,%eax | |||
cmp $8,%eax | |||
jb _looptop_rdc32 | |||
ret | |||
.global PQCLEAN_DILITHIUM2_AVX2_csubq_avx | |||
PQCLEAN_DILITHIUM2_AVX2_csubq_avx: | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM2_AVX2_8xq(%rip),%ymm0 | |||
xor %eax,%eax | |||
_looptop_csubq: | |||
#load | |||
vmovdqa (%rdi),%ymm1 | |||
vmovdqa 32(%rdi),%ymm3 | |||
vmovdqa 64(%rdi),%ymm5 | |||
vmovdqa 96(%rdi),%ymm7 | |||
#PQCLEAN_DILITHIUM2_AVX2_csubq | |||
vpsubd %ymm0,%ymm1,%ymm1 | |||
vpsubd %ymm0,%ymm3,%ymm3 | |||
vpsubd %ymm0,%ymm5,%ymm5 | |||
vpsubd %ymm0,%ymm7,%ymm7 | |||
vpsrad $31,%ymm1,%ymm2 | |||
vpsrad $31,%ymm3,%ymm4 | |||
vpsrad $31,%ymm5,%ymm6 | |||
vpsrad $31,%ymm7,%ymm8 | |||
vpand %ymm0,%ymm2,%ymm2 | |||
vpand %ymm0,%ymm4,%ymm4 | |||
vpand %ymm0,%ymm6,%ymm6 | |||
vpand %ymm0,%ymm8,%ymm8 | |||
vpaddd %ymm2,%ymm1,%ymm1 | |||
vpaddd %ymm4,%ymm3,%ymm3 | |||
vpaddd %ymm6,%ymm5,%ymm5 | |||
vpaddd %ymm8,%ymm7,%ymm7 | |||
#store | |||
vmovdqa %ymm1,(%rdi) | |||
vmovdqa %ymm3,32(%rdi) | |||
vmovdqa %ymm5,64(%rdi) | |||
vmovdqa %ymm7,96(%rdi) | |||
add $128,%rdi | |||
add $1,%eax | |||
cmp $8,%eax | |||
jb _looptop_csubq | |||
ret |
@@ -0,0 +1,443 @@ | |||
#include <immintrin.h> | |||
#include <stdint.h> | |||
#include "params.h" | |||
#include "rejsample.h" | |||
static const uint8_t idx[256][8] = { | |||
{ 0, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 1, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 0, 0, 0, 0, 0, 0}, | |||
{ 2, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 0, 0, 0, 0, 0, 0}, | |||
{ 1, 2, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 0, 0, 0, 0, 0}, | |||
{ 3, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 0, 0, 0, 0, 0, 0}, | |||
{ 1, 3, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 0, 0, 0, 0, 0}, | |||
{ 2, 3, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 0, 0, 0, 0, 0}, | |||
{ 1, 2, 3, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 0, 0, 0, 0}, | |||
{ 4, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 4, 0, 0, 0, 0, 0, 0}, | |||
{ 1, 4, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 4, 0, 0, 0, 0, 0}, | |||
{ 2, 4, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 4, 0, 0, 0, 0, 0}, | |||
{ 1, 2, 4, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 4, 0, 0, 0, 0}, | |||
{ 3, 4, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 4, 0, 0, 0, 0, 0}, | |||
{ 1, 3, 4, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 4, 0, 0, 0, 0}, | |||
{ 2, 3, 4, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 4, 0, 0, 0, 0}, | |||
{ 1, 2, 3, 4, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 4, 0, 0, 0}, | |||
{ 5, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 5, 0, 0, 0, 0, 0, 0}, | |||
{ 1, 5, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 5, 0, 0, 0, 0, 0}, | |||
{ 2, 5, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 5, 0, 0, 0, 0, 0}, | |||
{ 1, 2, 5, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 5, 0, 0, 0, 0}, | |||
{ 3, 5, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 5, 0, 0, 0, 0, 0}, | |||
{ 1, 3, 5, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 5, 0, 0, 0, 0}, | |||
{ 2, 3, 5, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 5, 0, 0, 0, 0}, | |||
{ 1, 2, 3, 5, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 5, 0, 0, 0}, | |||
{ 4, 5, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 4, 5, 0, 0, 0, 0, 0}, | |||
{ 1, 4, 5, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 4, 5, 0, 0, 0, 0}, | |||
{ 2, 4, 5, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 4, 5, 0, 0, 0, 0}, | |||
{ 1, 2, 4, 5, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 4, 5, 0, 0, 0}, | |||
{ 3, 4, 5, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 4, 5, 0, 0, 0, 0}, | |||
{ 1, 3, 4, 5, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 4, 5, 0, 0, 0}, | |||
{ 2, 3, 4, 5, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 4, 5, 0, 0, 0}, | |||
{ 1, 2, 3, 4, 5, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 4, 5, 0, 0}, | |||
{ 6, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 6, 0, 0, 0, 0, 0, 0}, | |||
{ 1, 6, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 6, 0, 0, 0, 0, 0}, | |||
{ 2, 6, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 6, 0, 0, 0, 0, 0}, | |||
{ 1, 2, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 6, 0, 0, 0, 0}, | |||
{ 3, 6, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 6, 0, 0, 0, 0, 0}, | |||
{ 1, 3, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 6, 0, 0, 0, 0}, | |||
{ 2, 3, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 6, 0, 0, 0, 0}, | |||
{ 1, 2, 3, 6, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 6, 0, 0, 0}, | |||
{ 4, 6, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 4, 6, 0, 0, 0, 0, 0}, | |||
{ 1, 4, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 4, 6, 0, 0, 0, 0}, | |||
{ 2, 4, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 4, 6, 0, 0, 0, 0}, | |||
{ 1, 2, 4, 6, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 4, 6, 0, 0, 0}, | |||
{ 3, 4, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 4, 6, 0, 0, 0, 0}, | |||
{ 1, 3, 4, 6, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 4, 6, 0, 0, 0}, | |||
{ 2, 3, 4, 6, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 4, 6, 0, 0, 0}, | |||
{ 1, 2, 3, 4, 6, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 4, 6, 0, 0}, | |||
{ 5, 6, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 5, 6, 0, 0, 0, 0, 0}, | |||
{ 1, 5, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 5, 6, 0, 0, 0, 0}, | |||
{ 2, 5, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 5, 6, 0, 0, 0, 0}, | |||
{ 1, 2, 5, 6, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 5, 6, 0, 0, 0}, | |||
{ 3, 5, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 5, 6, 0, 0, 0, 0}, | |||
{ 1, 3, 5, 6, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 5, 6, 0, 0, 0}, | |||
{ 2, 3, 5, 6, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 5, 6, 0, 0, 0}, | |||
{ 1, 2, 3, 5, 6, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 5, 6, 0, 0}, | |||
{ 4, 5, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 4, 5, 6, 0, 0, 0, 0}, | |||
{ 1, 4, 5, 6, 0, 0, 0, 0}, | |||
{ 0, 1, 4, 5, 6, 0, 0, 0}, | |||
{ 2, 4, 5, 6, 0, 0, 0, 0}, | |||
{ 0, 2, 4, 5, 6, 0, 0, 0}, | |||
{ 1, 2, 4, 5, 6, 0, 0, 0}, | |||
{ 0, 1, 2, 4, 5, 6, 0, 0}, | |||
{ 3, 4, 5, 6, 0, 0, 0, 0}, | |||
{ 0, 3, 4, 5, 6, 0, 0, 0}, | |||
{ 1, 3, 4, 5, 6, 0, 0, 0}, | |||
{ 0, 1, 3, 4, 5, 6, 0, 0}, | |||
{ 2, 3, 4, 5, 6, 0, 0, 0}, | |||
{ 0, 2, 3, 4, 5, 6, 0, 0}, | |||
{ 1, 2, 3, 4, 5, 6, 0, 0}, | |||
{ 0, 1, 2, 3, 4, 5, 6, 0}, | |||
{ 7, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 7, 0, 0, 0, 0, 0, 0}, | |||
{ 1, 7, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 7, 0, 0, 0, 0, 0}, | |||
{ 2, 7, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 7, 0, 0, 0, 0, 0}, | |||
{ 1, 2, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 7, 0, 0, 0, 0}, | |||
{ 3, 7, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 7, 0, 0, 0, 0, 0}, | |||
{ 1, 3, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 7, 0, 0, 0, 0}, | |||
{ 2, 3, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 7, 0, 0, 0, 0}, | |||
{ 1, 2, 3, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 7, 0, 0, 0}, | |||
{ 4, 7, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 4, 7, 0, 0, 0, 0, 0}, | |||
{ 1, 4, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 4, 7, 0, 0, 0, 0}, | |||
{ 2, 4, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 4, 7, 0, 0, 0, 0}, | |||
{ 1, 2, 4, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 4, 7, 0, 0, 0}, | |||
{ 3, 4, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 4, 7, 0, 0, 0, 0}, | |||
{ 1, 3, 4, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 4, 7, 0, 0, 0}, | |||
{ 2, 3, 4, 7, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 4, 7, 0, 0, 0}, | |||
{ 1, 2, 3, 4, 7, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 4, 7, 0, 0}, | |||
{ 5, 7, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 5, 7, 0, 0, 0, 0, 0}, | |||
{ 1, 5, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 5, 7, 0, 0, 0, 0}, | |||
{ 2, 5, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 5, 7, 0, 0, 0, 0}, | |||
{ 1, 2, 5, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 5, 7, 0, 0, 0}, | |||
{ 3, 5, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 5, 7, 0, 0, 0, 0}, | |||
{ 1, 3, 5, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 5, 7, 0, 0, 0}, | |||
{ 2, 3, 5, 7, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 5, 7, 0, 0, 0}, | |||
{ 1, 2, 3, 5, 7, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 5, 7, 0, 0}, | |||
{ 4, 5, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 4, 5, 7, 0, 0, 0, 0}, | |||
{ 1, 4, 5, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 4, 5, 7, 0, 0, 0}, | |||
{ 2, 4, 5, 7, 0, 0, 0, 0}, | |||
{ 0, 2, 4, 5, 7, 0, 0, 0}, | |||
{ 1, 2, 4, 5, 7, 0, 0, 0}, | |||
{ 0, 1, 2, 4, 5, 7, 0, 0}, | |||
{ 3, 4, 5, 7, 0, 0, 0, 0}, | |||
{ 0, 3, 4, 5, 7, 0, 0, 0}, | |||
{ 1, 3, 4, 5, 7, 0, 0, 0}, | |||
{ 0, 1, 3, 4, 5, 7, 0, 0}, | |||
{ 2, 3, 4, 5, 7, 0, 0, 0}, | |||
{ 0, 2, 3, 4, 5, 7, 0, 0}, | |||
{ 1, 2, 3, 4, 5, 7, 0, 0}, | |||
{ 0, 1, 2, 3, 4, 5, 7, 0}, | |||
{ 6, 7, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 6, 7, 0, 0, 0, 0, 0}, | |||
{ 1, 6, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 6, 7, 0, 0, 0, 0}, | |||
{ 2, 6, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 6, 7, 0, 0, 0, 0}, | |||
{ 1, 2, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 6, 7, 0, 0, 0}, | |||
{ 3, 6, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 6, 7, 0, 0, 0, 0}, | |||
{ 1, 3, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 6, 7, 0, 0, 0}, | |||
{ 2, 3, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 6, 7, 0, 0, 0}, | |||
{ 1, 2, 3, 6, 7, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 6, 7, 0, 0}, | |||
{ 4, 6, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 4, 6, 7, 0, 0, 0, 0}, | |||
{ 1, 4, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 4, 6, 7, 0, 0, 0}, | |||
{ 2, 4, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 2, 4, 6, 7, 0, 0, 0}, | |||
{ 1, 2, 4, 6, 7, 0, 0, 0}, | |||
{ 0, 1, 2, 4, 6, 7, 0, 0}, | |||
{ 3, 4, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 3, 4, 6, 7, 0, 0, 0}, | |||
{ 1, 3, 4, 6, 7, 0, 0, 0}, | |||
{ 0, 1, 3, 4, 6, 7, 0, 0}, | |||
{ 2, 3, 4, 6, 7, 0, 0, 0}, | |||
{ 0, 2, 3, 4, 6, 7, 0, 0}, | |||
{ 1, 2, 3, 4, 6, 7, 0, 0}, | |||
{ 0, 1, 2, 3, 4, 6, 7, 0}, | |||
{ 5, 6, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 5, 6, 7, 0, 0, 0, 0}, | |||
{ 1, 5, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 5, 6, 7, 0, 0, 0}, | |||
{ 2, 5, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 2, 5, 6, 7, 0, 0, 0}, | |||
{ 1, 2, 5, 6, 7, 0, 0, 0}, | |||
{ 0, 1, 2, 5, 6, 7, 0, 0}, | |||
{ 3, 5, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 3, 5, 6, 7, 0, 0, 0}, | |||
{ 1, 3, 5, 6, 7, 0, 0, 0}, | |||
{ 0, 1, 3, 5, 6, 7, 0, 0}, | |||
{ 2, 3, 5, 6, 7, 0, 0, 0}, | |||
{ 0, 2, 3, 5, 6, 7, 0, 0}, | |||
{ 1, 2, 3, 5, 6, 7, 0, 0}, | |||
{ 0, 1, 2, 3, 5, 6, 7, 0}, | |||
{ 4, 5, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 4, 5, 6, 7, 0, 0, 0}, | |||
{ 1, 4, 5, 6, 7, 0, 0, 0}, | |||
{ 0, 1, 4, 5, 6, 7, 0, 0}, | |||
{ 2, 4, 5, 6, 7, 0, 0, 0}, | |||
{ 0, 2, 4, 5, 6, 7, 0, 0}, | |||
{ 1, 2, 4, 5, 6, 7, 0, 0}, | |||
{ 0, 1, 2, 4, 5, 6, 7, 0}, | |||
{ 3, 4, 5, 6, 7, 0, 0, 0}, | |||
{ 0, 3, 4, 5, 6, 7, 0, 0}, | |||
{ 1, 3, 4, 5, 6, 7, 0, 0}, | |||
{ 0, 1, 3, 4, 5, 6, 7, 0}, | |||
{ 2, 3, 4, 5, 6, 7, 0, 0}, | |||
{ 0, 2, 3, 4, 5, 6, 7, 0}, | |||
{ 1, 2, 3, 4, 5, 6, 7, 0}, | |||
{ 0, 1, 2, 3, 4, 5, 6, 7} | |||
}; | |||
unsigned int PQCLEAN_DILITHIUM2_AVX2_rej_uniform( | |||
uint32_t *r, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
unsigned int buflen) { | |||
unsigned int i, ctr, pos; | |||
uint32_t vec[8]; | |||
__m256i d, tmp; | |||
uint32_t good; | |||
const __m256i bound = _mm256_set1_epi32(Q); | |||
ctr = pos = 0; | |||
while (ctr + 8 <= len && pos + 24 <= buflen) { | |||
for (i = 0; i < 8; i++) { | |||
vec[i] = buf[pos++]; | |||
vec[i] |= (uint32_t)buf[pos++] << 8; | |||
vec[i] |= (uint32_t)buf[pos++] << 16; | |||
vec[i] &= 0x7FFFFF; | |||
} | |||
d = _mm256_loadu_si256((__m256i_u *)vec); | |||
tmp = _mm256_cmpgt_epi32(bound, d); | |||
good = _mm256_movemask_ps((__m256)tmp); | |||
__m128i rid = _mm_loadl_epi64((__m128i_u *)&idx[good]); | |||
tmp = _mm256_cvtepu8_epi32(rid); | |||
d = _mm256_permutevar8x32_epi32(d, tmp); | |||
_mm256_storeu_si256((__m256i_u *)&r[ctr], d); | |||
ctr += __builtin_popcount(good); | |||
} | |||
while (ctr < len && pos + 3 <= buflen) { | |||
vec[0] = buf[pos++]; | |||
vec[0] |= (uint32_t)buf[pos++] << 8; | |||
vec[0] |= (uint32_t)buf[pos++] << 16; | |||
vec[0] &= 0x7FFFFF; | |||
if (vec[0] < Q) { | |||
r[ctr++] = vec[0]; | |||
} | |||
} | |||
return ctr; | |||
} | |||
unsigned int PQCLEAN_DILITHIUM2_AVX2_rej_eta( | |||
uint32_t *r, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
unsigned int buflen) { | |||
unsigned int i, ctr, pos; | |||
uint8_t vec[32]; | |||
__m256i tmp0, tmp1; | |||
__m128i d0, d1, rid; | |||
uint32_t good; | |||
const __m256i bound = _mm256_set1_epi8(2 * ETA + 1); | |||
const __m256i off = _mm256_set1_epi32(Q + ETA); | |||
ctr = pos = 0; | |||
while (ctr + 32 <= len && pos + 16 <= buflen) { | |||
for (i = 0; i < 16; i++) { | |||
vec[2 * i + 0] = buf[pos] & 0x0F; | |||
vec[2 * i + 1] = buf[pos++] >> 4; | |||
} | |||
tmp0 = _mm256_loadu_si256((__m256i_u *)vec); | |||
tmp1 = _mm256_cmpgt_epi8(bound, tmp0); | |||
good = _mm256_movemask_epi8(tmp1); | |||
d0 = _mm256_castsi256_si128(tmp0); | |||
rid = _mm_loadl_epi64((__m128i_u *)&idx[good & 0xFF]); | |||
d1 = _mm_shuffle_epi8(d0, rid); | |||
tmp1 = _mm256_cvtepu8_epi32(d1); | |||
tmp1 = _mm256_sub_epi32(off, tmp1); | |||
_mm256_storeu_si256((__m256i_u *)&r[ctr], tmp1); | |||
ctr += __builtin_popcount(good & 0xFF); | |||
d0 = _mm_bsrli_si128(d0, 8); | |||
rid = _mm_loadl_epi64((__m128i_u *)&idx[(good >> 8) & 0xFF]); | |||
d1 = _mm_shuffle_epi8(d0, rid); | |||
tmp1 = _mm256_cvtepu8_epi32(d1); | |||
tmp1 = _mm256_sub_epi32(off, tmp1); | |||
_mm256_storeu_si256((__m256i_u *)&r[ctr], tmp1); | |||
ctr += __builtin_popcount((good >> 8) & 0xFF); | |||
d0 = _mm256_extracti128_si256(tmp0, 1); | |||
rid = _mm_loadl_epi64((__m128i_u *)&idx[(good >> 16) & 0xFF]); | |||
d1 = _mm_shuffle_epi8(d0, rid); | |||
tmp1 = _mm256_cvtepu8_epi32(d1); | |||
tmp1 = _mm256_sub_epi32(off, tmp1); | |||
_mm256_storeu_si256((__m256i_u *)&r[ctr], tmp1); | |||
ctr += __builtin_popcount((good >> 16) & 0xFF); | |||
d0 = _mm_bsrli_si128(d0, 8); | |||
rid = _mm_loadl_epi64((__m128i_u *)&idx[(good >> 24) & 0xFF]); | |||
d1 = _mm_shuffle_epi8(d0, rid); | |||
tmp1 = _mm256_cvtepu8_epi32(d1); | |||
tmp1 = _mm256_sub_epi32(off, tmp1); | |||
_mm256_storeu_si256((__m256i_u *)&r[ctr], tmp1); | |||
ctr += __builtin_popcount((good >> 24) & 0xFF); | |||
} | |||
while (ctr < len && pos < buflen) { | |||
vec[0] = buf[pos] & 0x0F; | |||
vec[1] = buf[pos++] >> 4; | |||
if (vec[0] <= 2 * ETA) { | |||
r[ctr++] = Q + ETA - vec[0]; | |||
} | |||
if (vec[1] <= 2 * ETA && ctr < len) { | |||
r[ctr++] = Q + ETA - vec[1]; | |||
} | |||
} | |||
return ctr; | |||
} | |||
unsigned int PQCLEAN_DILITHIUM2_AVX2_rej_gamma1m1( | |||
uint32_t *r, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
unsigned int buflen) { | |||
unsigned int i, ctr, pos; | |||
uint32_t vec[8]; | |||
__m256i d, tmp; | |||
uint32_t good; | |||
const __m256i bound = _mm256_set1_epi32(2 * GAMMA1 - 1); | |||
const __m256i off = _mm256_set1_epi32(Q + GAMMA1 - 1); | |||
ctr = pos = 0; | |||
while (ctr + 8 <= len && pos + 20 <= buflen) { | |||
for (i = 0; i < 4; i++) { | |||
vec[2 * i + 0] = buf[pos + 0]; | |||
vec[2 * i + 0] |= (uint32_t)buf[pos + 1] << 8; | |||
vec[2 * i + 0] |= (uint32_t)buf[pos + 2] << 16; | |||
vec[2 * i + 0] &= 0xFFFFF; | |||
vec[2 * i + 1] = buf[pos + 2] >> 4; | |||
vec[2 * i + 1] |= (uint32_t)buf[pos + 3] << 4; | |||
vec[2 * i + 1] |= (uint32_t)buf[pos + 4] << 12; | |||
pos += 5; | |||
} | |||
d = _mm256_loadu_si256((__m256i_u *)vec); | |||
tmp = _mm256_cmpgt_epi32(bound, d); | |||
good = _mm256_movemask_ps((__m256)tmp); | |||
d = _mm256_sub_epi32(off, d); | |||
__m128i rid = _mm_loadl_epi64((__m128i_u *)&idx[good]); | |||
tmp = _mm256_cvtepu8_epi32(rid); | |||
d = _mm256_permutevar8x32_epi32(d, tmp); | |||
_mm256_storeu_si256((__m256i_u *)&r[ctr], d); | |||
ctr += __builtin_popcount(good); | |||
} | |||
while (ctr < len && pos + 5 <= buflen) { | |||
vec[0] = buf[pos + 0]; | |||
vec[0] |= (uint32_t)buf[pos + 1] << 8; | |||
vec[0] |= (uint32_t)buf[pos + 2] << 16; | |||
vec[0] &= 0xFFFFF; | |||
vec[1] = buf[pos + 2] >> 4; | |||
vec[1] |= (uint32_t)buf[pos + 3] << 4; | |||
vec[1] |= (uint32_t)buf[pos + 4] << 12; | |||
pos += 5; | |||
if (vec[0] <= 2 * GAMMA1 - 2) { | |||
r[ctr++] = Q + GAMMA1 - 1 - vec[0]; | |||
} | |||
if (vec[1] <= 2 * GAMMA1 - 2 && ctr < len) { | |||
r[ctr++] = Q + GAMMA1 - 1 - vec[1]; | |||
} | |||
} | |||
return ctr; | |||
} |
@@ -0,0 +1,26 @@ | |||
#ifndef REJSAMPLE_H | |||
#define REJSAMPLE_H | |||
#include <stdint.h> | |||
#include "poly.h" | |||
unsigned int PQCLEAN_DILITHIUM2_AVX2_rej_uniform( | |||
uint32_t *r, | |||
unsigned int len, | |||
const uint8_t *buf, | |||
unsigned int buflen); | |||
unsigned int PQCLEAN_DILITHIUM2_AVX2_rej_eta( | |||
uint32_t *r, | |||
unsigned int len, | |||
const uint8_t *buf, | |||
unsigned int buflen); | |||
unsigned int PQCLEAN_DILITHIUM2_AVX2_rej_gamma1m1( | |||
uint32_t *r, | |||
unsigned int len, | |||
const uint8_t *buf, | |||
unsigned int buflen); | |||
#endif |
@@ -0,0 +1,115 @@ | |||
#include "rounding.h" | |||
/************************************************* | |||
* Name: power2round | |||
* | |||
* Description: For finite field element a, compute a0, a1 such that | |||
* a mod Q = a1*2^D + a0 with -2^{D-1} < a0 <= 2^{D-1}. | |||
* Assumes a to be standard representative. | |||
* | |||
* Arguments: - uint32_t a: input element | |||
* - uint32_t *a0: pointer to output element Q + a0 | |||
* | |||
* Returns a1. | |||
**************************************************/ | |||
uint32_t PQCLEAN_DILITHIUM2_AVX2_power2round(uint32_t a, uint32_t *a0) { | |||
int32_t t; | |||
/* Centralized remainder mod 2^D */ | |||
t = a & ((1U << D) - 1); | |||
t -= (1U << (D - 1)) + 1; | |||
t += (t >> 31) & (1U << D); | |||
t -= (1U << (D - 1)) - 1; | |||
*a0 = Q + t; | |||
a = (a - t) >> D; | |||
return a; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_decompose | |||
* | |||
* Description: For finite field element a, compute high and low bits a0, a1 such | |||
* that a mod Q = a1*ALPHA + a0 with -ALPHA/2 < a0 <= ALPHA/2 except | |||
* if a1 = (Q-1)/ALPHA where we set a1 = 0 and | |||
* -ALPHA/2 <= a0 = a mod Q - Q < 0. Assumes a to be standard | |||
* representative. | |||
* | |||
* Arguments: - uint32_t a: input element | |||
* - uint32_t *a0: pointer to output element Q + a0 | |||
* | |||
* Returns a1. | |||
**************************************************/ | |||
uint32_t PQCLEAN_DILITHIUM2_AVX2_decompose(uint32_t a, uint32_t *a0) { | |||
int32_t t, u; | |||
/* Centralized remainder mod ALPHA */ | |||
t = a & 0x7FFFF; | |||
t += (a >> 19) << 9; | |||
t -= ALPHA / 2 + 1; | |||
t += (t >> 31) & ALPHA; | |||
t -= ALPHA / 2 - 1; | |||
a -= t; | |||
/* Divide by ALPHA (possible to avoid) */ | |||
u = a - 1; | |||
u >>= 31; | |||
a = (a >> 19) + 1; | |||
a -= u & 1; | |||
/* Border case */ | |||
*a0 = Q + t - (a >> 4); | |||
a &= 0xF; | |||
return a; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_make_hint | |||
* | |||
* Description: Compute hint bit indicating whether the low bits of the | |||
* input element overflow into the high bits. Inputs assumed to be | |||
* standard representatives. | |||
* | |||
* Arguments: - uint32_t a0: low bits of input element | |||
* - uint32_t a1: high bits of input element | |||
* | |||
* Returns 1 if high bits of a and b differ and 0 otherwise. | |||
**************************************************/ | |||
unsigned int PQCLEAN_DILITHIUM2_AVX2_make_hint(const uint32_t a0, const uint32_t a1) { | |||
if (a0 <= GAMMA2 || a0 > Q - GAMMA2 || (a0 == Q - GAMMA2 && a1 == 0)) { | |||
return 0; | |||
} | |||
return 1; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_use_hint | |||
* | |||
* Description: Correct high bits according to hint. | |||
* | |||
* Arguments: - uint32_t a: input element | |||
* - unsigned int hint: hint bit | |||
* | |||
* Returns corrected high bits. | |||
**************************************************/ | |||
uint32_t PQCLEAN_DILITHIUM2_AVX2_use_hint(const uint32_t a, const unsigned int hint) { | |||
uint32_t a0, a1; | |||
a1 = PQCLEAN_DILITHIUM2_AVX2_decompose(a, &a0); | |||
if (hint == 0) { | |||
return a1; | |||
} | |||
if (a0 > Q) { | |||
return (a1 + 1) & 0xF; | |||
} | |||
return (a1 - 1) & 0xF; | |||
/* If decompose does not divide out ALPHA: | |||
if(hint == 0) | |||
return a1; | |||
else if(a0 > Q) | |||
return (a1 + ALPHA) % (Q - 1); | |||
else | |||
return (a1 - ALPHA) % (Q - 1); | |||
*/ | |||
} |
@@ -0,0 +1,12 @@ | |||
#ifndef ROUNDING_H | |||
#define ROUNDING_H | |||
#include "params.h" | |||
#include <stdint.h> | |||
uint32_t PQCLEAN_DILITHIUM2_AVX2_power2round(uint32_t a, uint32_t *a0); | |||
uint32_t PQCLEAN_DILITHIUM2_AVX2_decompose(uint32_t a, uint32_t *a0); | |||
unsigned int PQCLEAN_DILITHIUM2_AVX2_make_hint(uint32_t a0, uint32_t a1); | |||
uint32_t PQCLEAN_DILITHIUM2_AVX2_use_hint(uint32_t a, unsigned int hint); | |||
#endif |
@@ -0,0 +1,23 @@ | |||
.macro shuffle8 r0,r1,r2,r3 | |||
vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 | |||
vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 | |||
.endm | |||
.macro shuffle4 r0,r1,r2,r3 | |||
vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 | |||
vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 | |||
.endm | |||
.macro shuffle2 r0,r1,r2,r3 | |||
vpsllq $32,%ymm\r1,%ymm12 | |||
vpsrlq $32,%ymm\r0,%ymm13 | |||
vpblendd $0xAA,%ymm12,%ymm\r0,%ymm\r2 | |||
vpblendd $0xAA,%ymm\r1,%ymm13,%ymm\r3 | |||
.endm | |||
.macro shuffle1 r0,r1,r2,r3 | |||
vpslld $16,%ymm\r1,%ymm12 | |||
vpsrld $16,%ymm\r0,%ymm13 | |||
vpblendw $0xAA,%ymm12,%ymm\r0,%ymm\r2 | |||
vpblendw $0xAA,%ymm\r1,%ymm13,%ymm\r3 | |||
.endm |
@@ -0,0 +1,433 @@ | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include "fips202.h" | |||
#include "packing.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "polyvec.h" | |||
#include "randombytes.h" | |||
#include "sign.h" | |||
#include "symmetric.h" | |||
/************************************************* | |||
* Name: expand_mat | |||
* | |||
* Description: Implementation of ExpandA. Generates matrix A with uniformly | |||
* random coefficients a_{i,j} by performing rejection | |||
* sampling on the output stream of SHAKE128(rho|i|j). | |||
* | |||
* Arguments: - polyvecl mat[K]: output matrix | |||
* - const uint8_t rho[]: byte array containing seed rho | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_expand_mat(polyvecl mat[4], const uint8_t rho[SEEDBYTES]) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_uniform_4x(&mat[0].vec[0], | |||
&mat[0].vec[1], | |||
&mat[0].vec[2], | |||
&mat[1].vec[0], | |||
rho, 0, 1, 2, 256); | |||
PQCLEAN_DILITHIUM2_AVX2_poly_uniform_4x(&mat[1].vec[1], | |||
&mat[1].vec[2], | |||
&mat[2].vec[0], | |||
&mat[2].vec[1], | |||
rho, 257, 258, 512, 513); | |||
PQCLEAN_DILITHIUM2_AVX2_poly_uniform_4x(&mat[2].vec[2], | |||
&mat[3].vec[0], | |||
&mat[3].vec[1], | |||
&mat[3].vec[2], | |||
rho, 514, 768, 769, 770); | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_challenge | |||
* | |||
* Description: Implementation of H. Samples polynomial with 60 nonzero | |||
* coefficients in {-1,1} using the output stream of | |||
* SHAKE256(mu|w1). | |||
* | |||
* Arguments: - poly *c: pointer to output polynomial | |||
* - const uint8_t mu[]: byte array containing mu | |||
* - const polyveck *w1: pointer to vector w1 | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_AVX2_challenge(poly *c, | |||
const uint8_t mu[CRHBYTES], | |||
const polyveck *w1) { | |||
unsigned int i, b, pos; | |||
uint64_t signs; | |||
uint8_t inbuf[CRHBYTES + K * POLW1_SIZE_PACKED]; | |||
uint8_t outbuf[SHAKE256_RATE]; | |||
shake256ctx state; | |||
for (i = 0; i < CRHBYTES; ++i) { | |||
inbuf[i] = mu[i]; | |||
} | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_polyw1_pack(inbuf + CRHBYTES + i * POLW1_SIZE_PACKED, &w1->vec[i]); | |||
} | |||
shake256_absorb(&state, inbuf, sizeof(inbuf)); | |||
shake256_squeezeblocks(outbuf, 1, &state); | |||
signs = 0; | |||
for (i = 0; i < 8; ++i) { | |||
signs |= (uint64_t) outbuf[i] << 8 * i; | |||
} | |||
pos = 8; | |||
for (i = 0; i < N; ++i) { | |||
c->coeffs[i] = 0; | |||
} | |||
for (i = 196; i < 256; ++i) { | |||
do { | |||
if (pos >= SHAKE256_RATE) { | |||
shake256_squeezeblocks(outbuf, 1, &state); | |||
pos = 0; | |||
} | |||
b = outbuf[pos++]; | |||
} while (b > i); | |||
c->coeffs[i] = c->coeffs[b]; | |||
c->coeffs[b] = 1; | |||
c->coeffs[b] ^= -(signs & 1) & (1 ^ (Q - 1)); | |||
signs >>= 1; | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_crypto_sign_keypair | |||
* | |||
* Description: Generates public and private key. | |||
* | |||
* Arguments: - uint8_t *pk: pointer to output public key (allocated | |||
* array of PQCLEAN_DILITHIUM2_AVX2_CRYPTO_PUBLICKEYBYTES bytes) | |||
* - uint8_t *sk: pointer to output private key (allocated | |||
* array of PQCLEAN_DILITHIUM2_AVX2_CRYPTO_SECRETKEYBYTES bytes) | |||
* | |||
* Returns 0 (success) | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { | |||
unsigned int i; | |||
uint8_t seedbuf[3 * SEEDBYTES]; | |||
uint8_t tr[CRHBYTES]; | |||
const uint8_t *rho, *rhoprime, *key; | |||
uint16_t nonce = 0; | |||
polyvecl mat[K]; | |||
polyvecl s1, s1hat; | |||
polyveck s2, t, t1, t0; | |||
/* Expand 32 bytes of randomness into rho, rhoprime and key */ | |||
randombytes(seedbuf, 3 * SEEDBYTES); | |||
rho = seedbuf; | |||
rhoprime = seedbuf + SEEDBYTES; | |||
key = seedbuf + 2 * SEEDBYTES; | |||
/* Expand matrix */ | |||
PQCLEAN_DILITHIUM2_AVX2_expand_mat(mat, rho); | |||
/* Sample short vectors s1 and s2 */ | |||
PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta_4x(&s1.vec[0], &s1.vec[1], &s1.vec[2], &s2.vec[0], rhoprime, | |||
nonce, nonce + 1, nonce + 2, nonce + 3); | |||
PQCLEAN_DILITHIUM2_AVX2_poly_uniform_eta_4x(&s2.vec[1], &s2.vec[2], &s2.vec[3], &t.vec[0], rhoprime, | |||
nonce + 4, nonce + 5, nonce + 6, 0); | |||
/* Matrix-vector multiplication */ | |||
s1hat = s1; | |||
PQCLEAN_DILITHIUM2_AVX2_polyvecl_ntt(&s1hat); | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_polyvecl_pointwise_acc_invmontgomery(&t.vec[i], &mat[i], &s1hat); | |||
//PQCLEAN_DILITHIUM2_AVX2_poly_reduce(&t.vec[i]); | |||
PQCLEAN_DILITHIUM2_AVX2_poly_invntt_montgomery(&t.vec[i]); | |||
} | |||
/* Add error vector s2 */ | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_add(&t, &t, &s2); | |||
/* Extract t1 and write public key */ | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_freeze(&t); | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_power2round(&t1, &t0, &t); | |||
PQCLEAN_DILITHIUM2_AVX2_pack_pk(pk, rho, &t1); | |||
/* Compute CRH(rho, t1) and write secret key */ | |||
crh(tr, pk, PQCLEAN_DILITHIUM2_AVX2_CRYPTO_PUBLICKEYBYTES); | |||
PQCLEAN_DILITHIUM2_AVX2_pack_sk(sk, rho, key, tr, &s1, &s2, &t0); | |||
return 0; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_crypto_sign_signature | |||
* | |||
* Description: Compute signed message. | |||
* | |||
* Arguments: - uint8_t *sig: pointer to output signature (PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES | |||
* of len) | |||
* - size_t *siglen: pointer to output length of signed message | |||
* (should be PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES) | |||
* - uint8_t *m: pointer to message to be signed | |||
* - size_t mlen: length of message | |||
* - uint8_t *sk: pointer to bit-packed secret key | |||
* | |||
* Returns 0 (success) | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, | |||
const uint8_t *sk) { | |||
size_t i; | |||
unsigned int n; | |||
uint8_t seedbuf[2 * SEEDBYTES + 3 * CRHBYTES]; | |||
uint8_t *rho, *tr, *key, *mu, *rhoprime; | |||
uint16_t nonce = 0; | |||
poly c, chat; | |||
polyvecl mat[K], s1, y, yhat, z; | |||
polyveck t0, s2, w, w1, w0; | |||
polyveck h, cs2, ct0; | |||
rho = seedbuf; | |||
tr = rho + SEEDBYTES; | |||
key = tr + CRHBYTES; | |||
mu = key + SEEDBYTES; | |||
rhoprime = mu + CRHBYTES; | |||
PQCLEAN_DILITHIUM2_AVX2_unpack_sk(rho, key, tr, &s1, &s2, &t0, sk); | |||
// use incremental hash API instead of copying around buffers | |||
/* Compute CRH(tr, m) */ | |||
shake256incctx state; | |||
shake256_inc_init(&state); | |||
shake256_inc_absorb(&state, tr, CRHBYTES); | |||
shake256_inc_absorb(&state, m, mlen); | |||
shake256_inc_finalize(&state); | |||
shake256_inc_squeeze(mu, CRHBYTES, &state); | |||
crh(rhoprime, key, SEEDBYTES + CRHBYTES); | |||
/* Expand matrix and transform vectors */ | |||
PQCLEAN_DILITHIUM2_AVX2_expand_mat(mat, rho); | |||
PQCLEAN_DILITHIUM2_AVX2_polyvecl_ntt(&s1); | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_ntt(&s2); | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_ntt(&t0); | |||
rej: | |||
/* Sample intermediate vector y */ | |||
PQCLEAN_DILITHIUM2_AVX2_poly_uniform_gamma1m1_4x(&y.vec[0], &y.vec[1], &y.vec[2], &yhat.vec[0], | |||
rhoprime, nonce, nonce + 1, nonce + 2, 0); | |||
nonce += 3; | |||
/* Matrix-vector multiplication */ | |||
yhat = y; | |||
PQCLEAN_DILITHIUM2_AVX2_polyvecl_ntt(&yhat); | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_polyvecl_pointwise_acc_invmontgomery(&w.vec[i], &mat[i], &yhat); | |||
PQCLEAN_DILITHIUM2_AVX2_poly_reduce(&w.vec[i]); | |||
PQCLEAN_DILITHIUM2_AVX2_poly_invntt_montgomery(&w.vec[i]); | |||
} | |||
/* Decompose w and call the random oracle */ | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_csubq(&w); | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_decompose(&w1, &w0, &w); | |||
PQCLEAN_DILITHIUM2_AVX2_challenge(&c, mu, &w1); | |||
chat = c; | |||
PQCLEAN_DILITHIUM2_AVX2_poly_ntt(&chat); | |||
/* Check that subtracting cs2 does not change high bits of w and low bits | |||
* do not reveal secret information */ | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_pointwise_invmontgomery(&cs2.vec[i], &chat, &s2.vec[i]); | |||
PQCLEAN_DILITHIUM2_AVX2_poly_invntt_montgomery(&cs2.vec[i]); | |||
} | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_sub(&w0, &w0, &cs2); | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_freeze(&w0); | |||
if (PQCLEAN_DILITHIUM2_AVX2_polyveck_chknorm(&w0, GAMMA2 - BETA)) { | |||
goto rej; | |||
} | |||
/* Compute z, reject if it reveals secret */ | |||
for (i = 0; i < L; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_pointwise_invmontgomery(&z.vec[i], &chat, &s1.vec[i]); | |||
PQCLEAN_DILITHIUM2_AVX2_poly_invntt_montgomery(&z.vec[i]); | |||
} | |||
PQCLEAN_DILITHIUM2_AVX2_polyvecl_add(&z, &z, &y); | |||
PQCLEAN_DILITHIUM2_AVX2_polyvecl_freeze(&z); | |||
if (PQCLEAN_DILITHIUM2_AVX2_polyvecl_chknorm(&z, GAMMA1 - BETA)) { | |||
goto rej; | |||
} | |||
/* Compute hints for w1 */ | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_pointwise_invmontgomery(&ct0.vec[i], &chat, &t0.vec[i]); | |||
PQCLEAN_DILITHIUM2_AVX2_poly_invntt_montgomery(&ct0.vec[i]); | |||
} | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_csubq(&ct0); | |||
if (PQCLEAN_DILITHIUM2_AVX2_polyveck_chknorm(&ct0, GAMMA2)) { | |||
goto rej; | |||
} | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_add(&w0, &w0, &ct0); | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_csubq(&w0); | |||
n = PQCLEAN_DILITHIUM2_AVX2_polyveck_make_hint(&h, &w0, &w1); | |||
if (n > OMEGA) { | |||
goto rej; | |||
} | |||
/* Write signature */ | |||
PQCLEAN_DILITHIUM2_AVX2_pack_sig(sig, &z, &h, &c); | |||
*siglen = PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES; | |||
return 0; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_crypto_sign | |||
* | |||
* Description: Compute signed message. | |||
* | |||
* Arguments: - uint8_t *sm: pointer to output signed message (allocated | |||
* array with PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES + mlen bytes), | |||
* can be equal to m | |||
* - unsigned long long *smlen: pointer to output length of signed | |||
* message | |||
* - const uint8_t *m: pointer to message to be signed | |||
* - unsigned long long mlen: length of message | |||
* - const uint8_t *sk: pointer to bit-packed secret key | |||
* | |||
* Returns 0 (success) | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_AVX2_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *m, size_t mlen, | |||
const uint8_t *sk) { | |||
int rc; | |||
memmove(sm + PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES, m, mlen); | |||
rc = PQCLEAN_DILITHIUM2_AVX2_crypto_sign_signature(sm, smlen, m, mlen, sk); | |||
*smlen += mlen; | |||
return rc; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_crypto_sign_verify | |||
* | |||
* Description: Verify signed message. | |||
* | |||
* Arguments: - uint8_t *sig: signature | |||
* - size_t siglen: length of signature (PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES) | |||
* - uint8_t *m: pointer to message | |||
* - size_t *mlen: pointer to output length of message | |||
* - uint8_t *pk: pointer to bit-packed public key | |||
* | |||
* Returns 0 if signed message could be verified correctly and -1 otherwise | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, | |||
const uint8_t *pk) { | |||
size_t i; | |||
uint8_t rho[SEEDBYTES]; | |||
uint8_t mu[CRHBYTES]; | |||
poly c, chat, cp; | |||
polyvecl mat[K], z; | |||
polyveck t1, w1, h, tmp1, tmp2; | |||
if (siglen < PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES) { | |||
return -1; | |||
} | |||
PQCLEAN_DILITHIUM2_AVX2_unpack_pk(rho, &t1, pk); | |||
if (PQCLEAN_DILITHIUM2_AVX2_unpack_sig(&z, &h, &c, sig)) { | |||
return -1; | |||
} | |||
if (PQCLEAN_DILITHIUM2_AVX2_polyvecl_chknorm(&z, GAMMA1 - BETA)) { | |||
return -1; | |||
} | |||
/* Compute CRH(CRH(rho, t1), msg) */ | |||
crh(mu, pk, PQCLEAN_DILITHIUM2_AVX2_CRYPTO_PUBLICKEYBYTES); | |||
shake256incctx state; | |||
shake256_inc_init(&state); | |||
shake256_inc_absorb(&state, mu, CRHBYTES); | |||
shake256_inc_absorb(&state, m, mlen); | |||
shake256_inc_finalize(&state); | |||
shake256_inc_squeeze(mu, CRHBYTES, &state); | |||
/* Matrix-vector multiplication; compute Az - c2^dt1 */ | |||
PQCLEAN_DILITHIUM2_AVX2_expand_mat(mat, rho); | |||
PQCLEAN_DILITHIUM2_AVX2_polyvecl_ntt(&z); | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_polyvecl_pointwise_acc_invmontgomery(&tmp1.vec[i], &mat[i], &z); | |||
} | |||
chat = c; | |||
PQCLEAN_DILITHIUM2_AVX2_poly_ntt(&chat); | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_shiftl(&t1); | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_ntt(&t1); | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_AVX2_poly_pointwise_invmontgomery(&tmp2.vec[i], &chat, &t1.vec[i]); | |||
} | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_sub(&tmp1, &tmp1, &tmp2); | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_reduce(&tmp1); | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_invntt_montgomery(&tmp1); | |||
/* Reconstruct w1 */ | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_csubq(&tmp1); | |||
PQCLEAN_DILITHIUM2_AVX2_polyveck_use_hint(&w1, &tmp1, &h); | |||
/* Call random oracle and verify challenge */ | |||
PQCLEAN_DILITHIUM2_AVX2_challenge(&cp, mu, &w1); | |||
for (i = 0; i < N; ++i) { | |||
if (c.coeffs[i] != cp.coeffs[i]) { | |||
return -1; | |||
} | |||
} | |||
return 0; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_AVX2_crypto_sign_open | |||
* | |||
* Description: Verify signed message. | |||
* | |||
* Arguments: - unsigned char *m: pointer to output message (allocated | |||
* array with smlen bytes), can be equal to sm | |||
* - unsigned long long *mlen: pointer to output length of message | |||
* - const unsigned char *sm: pointer to signed message | |||
* - unsigned long long smlen: length of signed message | |||
* - const unsigned char *pk: pointer to bit-packed public key | |||
* | |||
* Returns 0 if signed message could be verified correctly and -1 otherwise | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_AVX2_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, | |||
const uint8_t *pk) { | |||
size_t i; | |||
if (smlen < PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES) { | |||
goto badsig; | |||
} | |||
*mlen = smlen - PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES; | |||
if (PQCLEAN_DILITHIUM2_AVX2_crypto_sign_verify(sm, PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES, | |||
sm + PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES, *mlen, pk)) { | |||
goto badsig; | |||
} else { | |||
/* All good, copy msg, return 0 */ | |||
for (i = 0; i < *mlen; ++i) { | |||
m[i] = sm[PQCLEAN_DILITHIUM2_AVX2_CRYPTO_BYTES + i]; | |||
} | |||
return 0; | |||
} | |||
/* Signature verification failed */ | |||
badsig: | |||
*mlen = (size_t) -1; | |||
for (i = 0; i < smlen; ++i) { | |||
m[i] = 0; | |||
} | |||
return -1; | |||
} |
@@ -0,0 +1,15 @@ | |||
#ifndef SIGN_H | |||
#define SIGN_H | |||
#include "api.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "polyvec.h" | |||
void PQCLEAN_DILITHIUM2_AVX2_expand_mat(polyvecl mat[K], const uint8_t rho[SEEDBYTES]); | |||
void PQCLEAN_DILITHIUM2_AVX2_challenge(poly *c, const uint8_t mu[CRHBYTES], | |||
const polyveck *w1); | |||
#endif | |||
@@ -0,0 +1,26 @@ | |||
#include "stream.h" | |||
#include <string.h> | |||
void PQCLEAN_DILITHIUM2_AVX2_shake128_stream_init( | |||
shake128ctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce) { | |||
uint8_t buf[SEEDBYTES + 2]; | |||
memcpy(buf, seed, SEEDBYTES); | |||
buf[SEEDBYTES] = (uint8_t)nonce; | |||
buf[SEEDBYTES + 1] = (uint8_t)(nonce >> 8); | |||
shake128_absorb(state, buf, SEEDBYTES + 2); | |||
} | |||
void PQCLEAN_DILITHIUM2_AVX2_shake256_stream_init( | |||
shake256ctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce) { | |||
uint8_t buf[CRHBYTES + 2]; | |||
memcpy(buf, seed, CRHBYTES); | |||
buf[CRHBYTES] = (uint8_t)nonce; | |||
buf[CRHBYTES + 1] = (uint8_t)(nonce >> 8); | |||
shake256_absorb(state, buf, CRHBYTES + 2); | |||
} |
@@ -0,0 +1,15 @@ | |||
#ifndef PQCLEAN_DILITHIUM2_AVX2_STREAM_H | |||
#define PQCLEAN_DILITHIUM2_AVX2_STREAM_H | |||
#include <stdint.h> | |||
#include "fips202.h" | |||
#include "params.h" | |||
void PQCLEAN_DILITHIUM2_AVX2_shake128_stream_init( | |||
shake128ctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce); | |||
void PQCLEAN_DILITHIUM2_AVX2_shake256_stream_init( | |||
shake256ctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce); | |||
#endif |
@@ -0,0 +1,23 @@ | |||
#ifndef PQCLEAN_DILITHIUM2_AVX2_SYMMETRIC_H | |||
#define PQCLEAN_DILITHIUM2_AVX2_SYMMETRIC_H | |||
#include "params.h" | |||
#include "stream.h" | |||
#include "fips202.h" | |||
#define crh(OUT, IN, INBYTES) shake256(OUT, CRHBYTES, IN, INBYTES) | |||
#define stream128_init(STATE, SEED, NONCE) PQCLEAN_DILITHIUM2_AVX2_shake128_stream_init(STATE, SEED, NONCE) | |||
#define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) | |||
#define stream256_init(STATE, SEED, NONCE) PQCLEAN_DILITHIUM2_AVX2_shake256_stream_init(STATE, SEED, NONCE) | |||
#define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) shake256_squeezeblocks(OUT, OUTBLOCKS, STATE) | |||
#define STREAM128_BLOCKBYTES SHAKE128_RATE | |||
#define STREAM256_BLOCKBYTES SHAKE256_RATE | |||
typedef shake128ctx stream128_state; | |||
typedef shake256ctx stream256_state; | |||
#endif |
@@ -1,2 +1,6 @@ | |||
Public Domain | |||
Authors: Léo Ducas, Eike Kiltz, Tancrède Lepoint, Vadim Lyubashevsky, Gregor Seiler, Peter Schwabe, Damien Stehlé | |||
Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/) | |||
For Keccak and the random number generator | |||
we are using public-domain code from sources | |||
and by authors listed in comments on top of | |||
the respective files. |
@@ -2,10 +2,10 @@ | |||
LIB=libdilithium2_clean.a | |||
SOURCES = sign.c polyvec.c poly.c packing.c ntt.c reduce.c rounding.c symmetric.c | |||
OBJECTS = sign.o polyvec.o poly.o packing.o ntt.o reduce.o rounding.o symmetric.o | |||
SOURCES = sign.c polyvec.c poly.c packing.c ntt.c reduce.c rounding.c stream.c | |||
OBJECTS = sign.o polyvec.o poly.o packing.o ntt.o reduce.o rounding.o stream.o | |||
HEADERS = api.h params.h sign.h polyvec.h poly.h packing.h ntt.h \ | |||
reduce.h rounding.h symmetric.h | |||
reduce.h rounding.h symmetric.h stream.h | |||
CFLAGS=-O3 -Wall -Wconversion -Wextra -Wpedantic -Wvla -Werror -Wmissing-prototypes -Wredundant-decls -std=c99 -I../../../common $(EXTRAFLAGS) | |||
@@ -2,7 +2,7 @@ | |||
# nmake /f Makefile.Microsoft_nmake | |||
LIBRARY=libdilithium2_clean.lib | |||
OBJECTS=sign.obj polyvec.obj poly.obj packing.obj ntt.obj reduce.obj rounding.obj symmetric.obj | |||
OBJECTS=sign.obj polyvec.obj poly.obj packing.obj ntt.obj reduce.obj rounding.obj stream.obj | |||
CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX | |||
all: $(LIBRARY) | |||
@@ -4,14 +4,25 @@ | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#define MODE 2 | |||
#define PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_PUBLICKEYBYTES 1184U | |||
#define PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_SECRETKEYBYTES 2800U | |||
#define PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES 2044U | |||
#define PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_ALGNAME "Dilithium2" | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_keypair( | |||
uint8_t *pk, uint8_t *sk); | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *msg, size_t len, | |||
const uint8_t *sk); | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, | |||
const uint8_t *pk); | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
@@ -21,13 +32,6 @@ int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk); | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign(uint8_t *sm, size_t *smlen, | |||
const uint8_t *msg, size_t len, | |||
const uint8_t *sk); | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_open(uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, | |||
const uint8_t *pk); | |||
#endif |
@@ -1,11 +1,12 @@ | |||
#include <stdint.h> | |||
#include "ntt.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "reduce.h" | |||
#include <stdint.h> | |||
/* Roots of unity in order needed by forward ntt */ | |||
static const uint32_t zetas[N] = { | |||
/* Roots of unity in order needed by forward PQCLEAN_DILITHIUM2_CLEAN_ntt */ | |||
static const uint32_t PQCLEAN_DILITHIUM2_CLEAN_zetas[N] = { | |||
0, 25847, 5771523, 7861508, 237124, 7602457, 7504169, 466468, 1826347, | |||
2353451, 8021166, 6288512, 3119733, 5495562, 3111497, 2680103, 2725464, | |||
1024112, 7300517, 3585928, 7830929, 7260833, 2619752, 6271868, 6262231, | |||
@@ -40,8 +41,8 @@ static const uint32_t zetas[N] = { | |||
8332111, 7018208, 3937738, 1400424, 7534263, 1976782 | |||
}; | |||
/* Roots of unity in order needed by inverse ntt */ | |||
static const uint32_t zetas_inv[N] = { | |||
/* Roots of unity in order needed by inverse PQCLEAN_DILITHIUM2_CLEAN_ntt */ | |||
static const uint32_t PQCLEAN_DILITHIUM2_CLEAN_zetas_inv[N] = { | |||
6403635, 846154, 6979993, 4442679, 1362209, 48306, 4460757, 554416, | |||
3545687, 6767575, 976891, 8196974, 2286327, 420899, 2235985, 2939036, | |||
3833893, 260646, 1104333, 1667432, 6470041, 1803090, 6656817, 426683, | |||
@@ -77,7 +78,7 @@ static const uint32_t zetas_inv[N] = { | |||
}; | |||
/************************************************* | |||
* Name: ntt | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_ntt | |||
* | |||
* Description: Forward NTT, in-place. No modular reduction is performed after | |||
* additions or subtractions. Hence output coefficients can be up | |||
@@ -86,16 +87,16 @@ static const uint32_t zetas_inv[N] = { | |||
* | |||
* Arguments: - uint32_t p[N]: input/output coefficient array | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_ntt(uint32_t p[N]) { | |||
void PQCLEAN_DILITHIUM2_CLEAN_ntt(uint32_t *p) { | |||
unsigned int len, start, j, k; | |||
uint32_t zeta, t; | |||
k = 1; | |||
for (len = 128; len > 0; len >>= 1) { | |||
for (start = 0; start < N; start = j + len) { | |||
zeta = zetas[k++]; | |||
zeta = PQCLEAN_DILITHIUM2_CLEAN_zetas[k++]; | |||
for (j = start; j < start + len; ++j) { | |||
t = PQCLEAN_DILITHIUM2_CLEAN_montgomery_reduce((uint64_t)zeta * p[j + len]); | |||
t = PQCLEAN_DILITHIUM2_CLEAN_montgomery_reduce((uint64_t) zeta * p[j + len]); | |||
p[j + len] = p[j] + 2 * Q - t; | |||
p[j] = p[j] + t; | |||
} | |||
@@ -104,7 +105,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_ntt(uint32_t p[N]) { | |||
} | |||
/************************************************* | |||
* Name: invntt_frominvmont | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_invntt_frominvmont | |||
* | |||
* Description: Inverse NTT and multiplication by Montgomery factor 2^32. | |||
* In-place. No modular reductions after additions or | |||
@@ -113,7 +114,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_ntt(uint32_t p[N]) { | |||
* | |||
* Arguments: - uint32_t p[N]: input/output coefficient array | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_invntt_frominvmont(uint32_t p[N]) { | |||
void PQCLEAN_DILITHIUM2_CLEAN_invntt_frominvmont(uint32_t *p) { | |||
unsigned int start, len, j, k; | |||
uint32_t t, zeta; | |||
const uint32_t f = (((uint64_t)MONT * MONT % Q) * (Q - 1) % Q) * ((Q - 1) >> 8) % Q; | |||
@@ -121,17 +122,17 @@ void PQCLEAN_DILITHIUM2_CLEAN_invntt_frominvmont(uint32_t p[N]) { | |||
k = 0; | |||
for (len = 1; len < N; len <<= 1) { | |||
for (start = 0; start < N; start = j + len) { | |||
zeta = zetas_inv[k++]; | |||
zeta = PQCLEAN_DILITHIUM2_CLEAN_zetas_inv[k++]; | |||
for (j = start; j < start + len; ++j) { | |||
t = p[j]; | |||
p[j] = t + p[j + len]; | |||
p[j + len] = t + 256 * Q - p[j + len]; | |||
p[j + len] = PQCLEAN_DILITHIUM2_CLEAN_montgomery_reduce((uint64_t)zeta * p[j + len]); | |||
p[j + len] = PQCLEAN_DILITHIUM2_CLEAN_montgomery_reduce((uint64_t) zeta * p[j + len]); | |||
} | |||
} | |||
} | |||
for (j = 0; j < N; ++j) { | |||
p[j] = PQCLEAN_DILITHIUM2_CLEAN_montgomery_reduce((uint64_t)f * p[j]); | |||
p[j] = PQCLEAN_DILITHIUM2_CLEAN_montgomery_reduce((uint64_t) f * p[j]); | |||
} | |||
} |
@@ -1,9 +1,10 @@ | |||
#ifndef NTT_H | |||
#define NTT_H | |||
#ifndef PQCLEAN_DILITHIUM2_CLEAN_NTT_H | |||
#define PQCLEAN_DILITHIUM2_CLEAN_NTT_H | |||
#include "params.h" | |||
#include <stdint.h> | |||
#include "params.h" | |||
void PQCLEAN_DILITHIUM2_CLEAN_ntt(uint32_t p[N]); | |||
void PQCLEAN_DILITHIUM2_CLEAN_invntt_frominvmont(uint32_t p[N]); | |||
@@ -4,17 +4,18 @@ | |||
#include "polyvec.h" | |||
/************************************************* | |||
* Name: pack_pk | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_pack_pk | |||
* | |||
* Description: Bit-pack public key pk = (rho, t1). | |||
* | |||
* Arguments: - unsigned char pk[]: output byte array | |||
* - const unsigned char rho[]: byte array containing rho | |||
* Arguments: - uint8_t pk[]: output byte array | |||
* - const uint8_t rho[]: byte array containing rho | |||
* - const polyveck *t1: pointer to vector t1 | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_pack_pk(unsigned char pk[CRYPTO_PUBLICKEYBYTES], | |||
const unsigned char rho[SEEDBYTES], | |||
const polyveck *t1) { | |||
void PQCLEAN_DILITHIUM2_CLEAN_pack_pk( | |||
uint8_t *pk, | |||
const uint8_t *rho, | |||
const polyveck *t1) { | |||
unsigned int i; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
@@ -28,17 +29,18 @@ void PQCLEAN_DILITHIUM2_CLEAN_pack_pk(unsigned char pk[CRYPTO_PUBLICKEYBYTES], | |||
} | |||
/************************************************* | |||
* Name: unpack_pk | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_unpack_pk | |||
* | |||
* Description: Unpack public key pk = (rho, t1). | |||
* | |||
* Arguments: - const unsigned char rho[]: output byte array for rho | |||
* Arguments: - const uint8_t rho[]: output byte array for rho | |||
* - const polyveck *t1: pointer to output vector t1 | |||
* - unsigned char pk[]: byte array containing bit-packed pk | |||
* - uint8_t pk[]: byte array containing bit-packed pk | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_unpack_pk(unsigned char rho[SEEDBYTES], | |||
polyveck *t1, | |||
const unsigned char pk[CRYPTO_PUBLICKEYBYTES]) { | |||
void PQCLEAN_DILITHIUM2_CLEAN_unpack_pk( | |||
uint8_t *rho, | |||
polyveck *t1, | |||
const uint8_t *pk) { | |||
unsigned int i; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
@@ -52,25 +54,26 @@ void PQCLEAN_DILITHIUM2_CLEAN_unpack_pk(unsigned char rho[SEEDBYTES], | |||
} | |||
/************************************************* | |||
* Name: pack_sk | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_pack_sk | |||
* | |||
* Description: Bit-pack secret key sk = (rho, key, tr, s1, s2, t0). | |||
* | |||
* Arguments: - unsigned char sk[]: output byte array | |||
* - const unsigned char rho[]: byte array containing rho | |||
* - const unsigned char key[]: byte array containing key | |||
* - const unsigned char tr[]: byte array containing tr | |||
* Arguments: - uint8_t sk[]: output byte array | |||
* - const uint8_t rho[]: byte array containing rho | |||
* - const uint8_t key[]: byte array containing key | |||
* - const uint8_t tr[]: byte array containing tr | |||
* - const polyvecl *s1: pointer to vector s1 | |||
* - const polyveck *s2: pointer to vector s2 | |||
* - const polyveck *t0: pointer to vector t0 | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_pack_sk(unsigned char sk[CRYPTO_SECRETKEYBYTES], | |||
const unsigned char rho[SEEDBYTES], | |||
const unsigned char key[SEEDBYTES], | |||
const unsigned char tr[CRHBYTES], | |||
const polyvecl *s1, | |||
const polyveck *s2, | |||
const polyveck *t0) { | |||
void PQCLEAN_DILITHIUM2_CLEAN_pack_sk( | |||
uint8_t *sk, | |||
const uint8_t *rho, | |||
const uint8_t *key, | |||
const uint8_t *tr, | |||
const polyvecl *s1, | |||
const polyveck *s2, | |||
const polyveck *t0) { | |||
unsigned int i; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
@@ -104,25 +107,26 @@ void PQCLEAN_DILITHIUM2_CLEAN_pack_sk(unsigned char sk[CRYPTO_SECRETKEYBYTES], | |||
} | |||
/************************************************* | |||
* Name: unpack_sk | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_unpack_sk | |||
* | |||
* Description: Unpack secret key sk = (rho, key, tr, s1, s2, t0). | |||
* | |||
* Arguments: - const unsigned char rho[]: output byte array for rho | |||
* - const unsigned char key[]: output byte array for key | |||
* - const unsigned char tr[]: output byte array for tr | |||
* Arguments: - const uint8_t rho[]: output byte array for rho | |||
* - const uint8_t key[]: output byte array for key | |||
* - const uint8_t tr[]: output byte array for tr | |||
* - const polyvecl *s1: pointer to output vector s1 | |||
* - const polyveck *s2: pointer to output vector s2 | |||
* - const polyveck *r0: pointer to output vector t0 | |||
* - unsigned char sk[]: byte array containing bit-packed sk | |||
* - uint8_t sk[]: byte array containing bit-packed sk | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_unpack_sk(unsigned char rho[SEEDBYTES], | |||
unsigned char key[SEEDBYTES], | |||
unsigned char tr[CRHBYTES], | |||
polyvecl *s1, | |||
polyveck *s2, | |||
polyveck *t0, | |||
const unsigned char sk[CRYPTO_SECRETKEYBYTES]) { | |||
void PQCLEAN_DILITHIUM2_CLEAN_unpack_sk( | |||
uint8_t *rho, | |||
uint8_t *key, | |||
uint8_t *tr, | |||
polyvecl *s1, | |||
polyveck *s2, | |||
polyveck *t0, | |||
const uint8_t *sk) { | |||
unsigned int i; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
@@ -156,19 +160,20 @@ void PQCLEAN_DILITHIUM2_CLEAN_unpack_sk(unsigned char rho[SEEDBYTES], | |||
} | |||
/************************************************* | |||
* Name: pack_sig | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_pack_sig | |||
* | |||
* Description: Bit-pack signature sig = (z, h, c). | |||
* | |||
* Arguments: - unsigned char sig[]: output byte array | |||
* Arguments: - uint8_t sig[]: output byte array | |||
* - const polyvecl *z: pointer to vector z | |||
* - const polyveck *h: pointer to hint vector h | |||
* - const poly *c: pointer to challenge polynomial | |||
* - const poly *c: pointer to PQCLEAN_DILITHIUM2_CLEAN_challenge polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_pack_sig(unsigned char sig[CRYPTO_BYTES], | |||
const polyvecl *z, | |||
const polyveck *h, | |||
const poly *c) { | |||
void PQCLEAN_DILITHIUM2_CLEAN_pack_sig( | |||
uint8_t *sig, | |||
const polyvecl *z, | |||
const polyveck *h, | |||
const poly *c) { | |||
unsigned int i, j, k; | |||
uint64_t signs, mask; | |||
@@ -182,10 +187,11 @@ void PQCLEAN_DILITHIUM2_CLEAN_pack_sig(unsigned char sig[CRYPTO_BYTES], | |||
for (i = 0; i < K; ++i) { | |||
for (j = 0; j < N; ++j) { | |||
if (h->vec[i].coeffs[j] != 0) { | |||
sig[k++] = (unsigned char) j; | |||
sig[k++] = (uint8_t)j; | |||
} | |||
} | |||
sig[OMEGA + i] = (unsigned char) k; | |||
sig[OMEGA + i] = (uint8_t)k; | |||
} | |||
while (k < OMEGA) { | |||
sig[k++] = 0; | |||
@@ -199,7 +205,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_pack_sig(unsigned char sig[CRYPTO_BYTES], | |||
sig[i] = 0; | |||
for (j = 0; j < 8; ++j) { | |||
if (c->coeffs[8 * i + j] != 0) { | |||
sig[i] |= (unsigned char) (1U << j); | |||
sig[i] |= (uint8_t)(1u << j); | |||
if (c->coeffs[8 * i + j] == (Q - 1)) { | |||
signs |= mask; | |||
} | |||
@@ -209,27 +215,28 @@ void PQCLEAN_DILITHIUM2_CLEAN_pack_sig(unsigned char sig[CRYPTO_BYTES], | |||
} | |||
sig += N / 8; | |||
for (i = 0; i < 8; ++i) { | |||
sig[i] = (unsigned char) (signs >> 8 * i); | |||
sig[i] = (uint8_t)(signs >> 8u * i); | |||
} | |||
} | |||
/************************************************* | |||
* Name: unpack_sig | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_unpack_sig | |||
* | |||
* Description: Unpack signature sig = (z, h, c). | |||
* | |||
* Arguments: - polyvecl *z: pointer to output vector z | |||
* - polyveck *h: pointer to output hint vector h | |||
* - poly *c: pointer to output challenge polynomial | |||
* - const unsigned char sig[]: byte array containing | |||
* - poly *c: pointer to output PQCLEAN_DILITHIUM2_CLEAN_challenge polynomial | |||
* - const uint8_t sig[]: byte array containing | |||
* bit-packed signature | |||
* | |||
* Returns 1 in case of malformed signature; otherwise 0. | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_CLEAN_unpack_sig(polyvecl *z, | |||
polyveck *h, | |||
poly *c, | |||
const unsigned char sig[CRYPTO_BYTES]) { | |||
int PQCLEAN_DILITHIUM2_CLEAN_unpack_sig( | |||
polyvecl *z, | |||
polyveck *h, | |||
poly *c, | |||
const uint8_t *sig) { | |||
unsigned int i, j, k; | |||
uint64_t signs; | |||
@@ -266,6 +273,7 @@ int PQCLEAN_DILITHIUM2_CLEAN_unpack_sig(polyvecl *z, | |||
return 1; | |||
} | |||
} | |||
sig += OMEGA + K; | |||
/* Decode c */ | |||
@@ -1,31 +1,36 @@ | |||
#ifndef PACKING_H | |||
#define PACKING_H | |||
#ifndef PQCLEAN_DILITHIUM2_CLEAN_PACKING_H | |||
#define PQCLEAN_DILITHIUM2_CLEAN_PACKING_H | |||
#include "params.h" | |||
#include "polyvec.h" | |||
void PQCLEAN_DILITHIUM2_CLEAN_pack_pk(unsigned char pk[CRYPTO_PUBLICKEYBYTES], | |||
const unsigned char rho[SEEDBYTES], const polyveck *t1); | |||
void PQCLEAN_DILITHIUM2_CLEAN_pack_sk(unsigned char sk[CRYPTO_SECRETKEYBYTES], | |||
const unsigned char rho[SEEDBYTES], | |||
const unsigned char key[SEEDBYTES], | |||
const unsigned char tr[CRHBYTES], | |||
const polyvecl *s1, | |||
const polyveck *s2, | |||
const polyveck *t0); | |||
void PQCLEAN_DILITHIUM2_CLEAN_pack_sig(unsigned char sig[CRYPTO_BYTES], | |||
const polyvecl *z, const polyveck *h, const poly *c); | |||
void PQCLEAN_DILITHIUM2_CLEAN_pack_pk( | |||
uint8_t *pk, | |||
const uint8_t *rho, const polyveck *t1); | |||
void PQCLEAN_DILITHIUM2_CLEAN_pack_sk( | |||
uint8_t *sk, | |||
const uint8_t *rho, | |||
const uint8_t *key, | |||
const uint8_t *tr, | |||
const polyvecl *s1, | |||
const polyveck *s2, | |||
const polyveck *t0); | |||
void PQCLEAN_DILITHIUM2_CLEAN_pack_sig( | |||
uint8_t *sig, | |||
const polyvecl *z, const polyveck *h, const poly *c); | |||
void PQCLEAN_DILITHIUM2_CLEAN_unpack_pk(unsigned char rho[SEEDBYTES], polyveck *t1, | |||
const unsigned char pk[CRYPTO_PUBLICKEYBYTES]); | |||
void PQCLEAN_DILITHIUM2_CLEAN_unpack_sk(unsigned char rho[SEEDBYTES], | |||
unsigned char key[SEEDBYTES], | |||
unsigned char tr[CRHBYTES], | |||
polyvecl *s1, | |||
polyveck *s2, | |||
polyveck *t0, | |||
const unsigned char sk[CRYPTO_SECRETKEYBYTES]); | |||
int PQCLEAN_DILITHIUM2_CLEAN_unpack_sig(polyvecl *z, polyveck *h, poly *c, | |||
const unsigned char sig[CRYPTO_BYTES]); | |||
void PQCLEAN_DILITHIUM2_CLEAN_unpack_pk( | |||
uint8_t *rho, polyveck *t1, | |||
const uint8_t *pk); | |||
void PQCLEAN_DILITHIUM2_CLEAN_unpack_sk( | |||
uint8_t *rho, | |||
uint8_t *key, | |||
uint8_t *tr, | |||
polyvecl *s1, | |||
polyveck *s2, | |||
polyveck *t0, | |||
const uint8_t *sk); | |||
int PQCLEAN_DILITHIUM2_CLEAN_unpack_sig( | |||
polyvecl *z, polyveck *h, poly *c, const uint8_t *sig); | |||
#endif |
@@ -1,19 +1,17 @@ | |||
#ifndef PARAMS_H | |||
#define PARAMS_H | |||
#ifndef PQCLEAN_DILITHIUM2_CLEAN_PARAMS_H | |||
#define PQCLEAN_DILITHIUM2_CLEAN_PARAMS_H | |||
#define SEEDBYTES 32 | |||
#define CRHBYTES 48 | |||
#define N 256 | |||
#define Q 8380417 | |||
#define QBITS 23 | |||
#define ROOT_OF_UNITY 1753 | |||
#define D 14 | |||
#define GAMMA1 ((Q - 1)/16) | |||
#define GAMMA2 (GAMMA1/2) | |||
#define ALPHA (2*GAMMA2) | |||
// DilithiumII parameters | |||
#define K 4 | |||
#define L 3 | |||
#define ETA 6 | |||
@@ -1,10 +1,11 @@ | |||
#include <stdint.h> | |||
#include "ntt.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "reduce.h" | |||
#include "rounding.h" | |||
#include "symmetric.h" | |||
#include <stdint.h> | |||
/************************************************* | |||
@@ -16,8 +17,7 @@ | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_poly_reduce(poly *a) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
a->coeffs[i] = PQCLEAN_DILITHIUM2_CLEAN_reduce32(a->coeffs[i]); | |||
} | |||
} | |||
@@ -31,8 +31,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_poly_reduce(poly *a) { | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_poly_csubq(poly *a) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
a->coeffs[i] = PQCLEAN_DILITHIUM2_CLEAN_csubq(a->coeffs[i]); | |||
} | |||
} | |||
@@ -46,8 +45,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_poly_csubq(poly *a) { | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_poly_freeze(poly *a) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
a->coeffs[i] = PQCLEAN_DILITHIUM2_CLEAN_freeze(a->coeffs[i]); | |||
} | |||
} | |||
@@ -61,9 +59,8 @@ void PQCLEAN_DILITHIUM2_CLEAN_poly_freeze(poly *a) { | |||
* - const poly *a: pointer to first summand | |||
* - const poly *b: pointer to second summand | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_poly_add(poly *c, const poly *a, const poly *b) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
void PQCLEAN_DILITHIUM2_CLEAN_poly_add(poly *c, const poly *a, const poly *b) { | |||
for (size_t i = 0; i < N; ++i) { | |||
c->coeffs[i] = a->coeffs[i] + b->coeffs[i]; | |||
} | |||
} | |||
@@ -78,11 +75,10 @@ void PQCLEAN_DILITHIUM2_CLEAN_poly_add(poly *c, const poly *a, const poly *b) { | |||
* Arguments: - poly *c: pointer to output polynomial | |||
* - const poly *a: pointer to first input polynomial | |||
* - const poly *b: pointer to second input polynomial to be | |||
* subtraced from first input polynomial | |||
* subtracted from first input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_poly_sub(poly *c, const poly *a, const poly *b) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
c->coeffs[i] = a->coeffs[i] + 2 * Q - b->coeffs[i]; | |||
} | |||
} | |||
@@ -96,8 +92,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_poly_sub(poly *c, const poly *a, const poly *b) { | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_poly_shiftl(poly *a) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
a->coeffs[i] <<= D; | |||
} | |||
} | |||
@@ -139,9 +134,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_poly_invntt_montgomery(poly *a) { | |||
* - const poly *b: pointer to second input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_poly_pointwise_invmontgomery(poly *c, const poly *a, const poly *b) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
c->coeffs[i] = PQCLEAN_DILITHIUM2_CLEAN_montgomery_reduce((uint64_t)a->coeffs[i] * b->coeffs[i]); | |||
} | |||
@@ -160,12 +153,9 @@ void PQCLEAN_DILITHIUM2_CLEAN_poly_pointwise_invmontgomery(poly *c, const poly * | |||
* - const poly *v: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_poly_power2round(poly *a1, poly *a0, const poly *a) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
a1->coeffs[i] = PQCLEAN_DILITHIUM2_CLEAN_power2round(a->coeffs[i], &a0->coeffs[i]); | |||
} | |||
} | |||
/************************************************* | |||
@@ -182,12 +172,9 @@ void PQCLEAN_DILITHIUM2_CLEAN_poly_power2round(poly *a1, poly *a0, const poly *a | |||
* - const poly *c: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_poly_decompose(poly *a1, poly *a0, const poly *a) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
a1->coeffs[i] = PQCLEAN_DILITHIUM2_CLEAN_decompose(a->coeffs[i], &a0->coeffs[i]); | |||
} | |||
} | |||
/************************************************* | |||
@@ -204,13 +191,11 @@ void PQCLEAN_DILITHIUM2_CLEAN_poly_decompose(poly *a1, poly *a0, const poly *a) | |||
* Returns number of 1 bits. | |||
**************************************************/ | |||
unsigned int PQCLEAN_DILITHIUM2_CLEAN_poly_make_hint(poly *h, const poly *a0, const poly *a1) { | |||
unsigned int i, s = 0; | |||
for (i = 0; i < N; ++i) { | |||
unsigned int s = 0; | |||
for (size_t i = 0; i < N; ++i) { | |||
h->coeffs[i] = PQCLEAN_DILITHIUM2_CLEAN_make_hint(a0->coeffs[i], a1->coeffs[i]); | |||
s += h->coeffs[i]; | |||
} | |||
return s; | |||
} | |||
@@ -224,12 +209,9 @@ unsigned int PQCLEAN_DILITHIUM2_CLEAN_poly_make_hint(poly *h, const poly *a0, co | |||
* - const poly *h: pointer to input hint polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_poly_use_hint(poly *a, const poly *b, const poly *h) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
a->coeffs[i] = PQCLEAN_DILITHIUM2_CLEAN_use_hint(b->coeffs[i], h->coeffs[i]); | |||
} | |||
} | |||
/************************************************* | |||
@@ -244,15 +226,13 @@ void PQCLEAN_DILITHIUM2_CLEAN_poly_use_hint(poly *a, const poly *b, const poly * | |||
* Returns 0 if norm is strictly smaller than B and 1 otherwise. | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_CLEAN_poly_chknorm(const poly *a, uint32_t B) { | |||
unsigned int i; | |||
int32_t t; | |||
/* It is ok to leak which coefficient violates the bound since | |||
the probability for each coefficient is independent of secret | |||
data but we must not leak the sign of the centralized representative. */ | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
/* Absolute value of centralized representative */ | |||
t = (int32_t) ((Q - 1) / 2 - a->coeffs[i]); | |||
t = (int32_t)((Q - 1) / 2 - a->coeffs[i]); | |||
t ^= (t >> 31); | |||
t = (Q - 1) / 2 - t; | |||
@@ -260,7 +240,6 @@ int PQCLEAN_DILITHIUM2_CLEAN_poly_chknorm(const poly *a, uint32_t B) { | |||
return 1; | |||
} | |||
} | |||
return 0; | |||
} | |||
@@ -272,7 +251,7 @@ int PQCLEAN_DILITHIUM2_CLEAN_poly_chknorm(const poly *a, uint32_t B) { | |||
* | |||
* Arguments: - uint32_t *a: pointer to output array (allocated) | |||
* - unsigned int len: number of coefficients to be sampled | |||
* - const unsigned char *buf: array of random bytes | |||
* - const uint8_t *buf: array of random bytes | |||
* - unsigned int buflen: length of array of random bytes | |||
* | |||
* Returns number of sampled coefficients. Can be smaller than len if not enough | |||
@@ -280,8 +259,8 @@ int PQCLEAN_DILITHIUM2_CLEAN_poly_chknorm(const poly *a, uint32_t B) { | |||
**************************************************/ | |||
static unsigned int rej_uniform(uint32_t *a, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
unsigned int buflen) { | |||
const uint8_t *buf, | |||
size_t buflen) { | |||
unsigned int ctr, pos; | |||
uint32_t t; | |||
@@ -308,19 +287,20 @@ static unsigned int rej_uniform(uint32_t *a, | |||
* output stream from SHAKE256(seed|nonce). | |||
* | |||
* Arguments: - poly *a: pointer to output polynomial | |||
* - const unsigned char seed[]: byte array with seed of length | |||
* - const uint8_t seed[]: byte array with seed of length | |||
* SEEDBYTES | |||
* - uint16_t nonce: 2-byte nonce | |||
**************************************************/ | |||
#define POLY_UNIFORM_NBLOCKS ((769 + STREAM128_BLOCKBYTES)/STREAM128_BLOCKBYTES) | |||
#define POLY_UNIFORM_BUFLEN (POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES) | |||
#define POLY_UNIFORM_NBLOCKS ((769 + STREAM128_BLOCKBYTES) / STREAM128_BLOCKBYTES) | |||
#define POLY_UNIFORM_BUFLEN (POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES) | |||
void PQCLEAN_DILITHIUM2_CLEAN_poly_uniform(poly *a, | |||
const unsigned char seed[SEEDBYTES], | |||
const uint8_t seed[SEEDBYTES], | |||
uint16_t nonce) { | |||
unsigned int i, ctr, off; | |||
unsigned int buflen = POLY_UNIFORM_BUFLEN; | |||
unsigned char buf[POLY_UNIFORM_BUFLEN + 2]; | |||
shake128ctx state; | |||
unsigned int i, ctr; | |||
size_t buflen = POLY_UNIFORM_BUFLEN; | |||
uint8_t buf[POLY_UNIFORM_BUFLEN + 2]; | |||
stream128_state state; | |||
size_t off; | |||
stream128_init(&state, seed, nonce); | |||
stream128_squeezeblocks(buf, POLY_UNIFORM_NBLOCKS, &state); | |||
@@ -347,7 +327,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_poly_uniform(poly *a, | |||
* | |||
* Arguments: - uint32_t *a: pointer to output array (allocated) | |||
* - unsigned int len: number of coefficients to be sampled | |||
* - const unsigned char *buf: array of random bytes | |||
* - const uint8_t *buf: array of random bytes | |||
* - unsigned int buflen: length of array of random bytes | |||
* | |||
* Returns number of sampled coefficients. Can be smaller than len if not enough | |||
@@ -355,7 +335,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_poly_uniform(poly *a, | |||
**************************************************/ | |||
static unsigned int rej_eta(uint32_t *a, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
const uint8_t *buf, | |||
unsigned int buflen) { | |||
unsigned int ctr, pos; | |||
uint32_t t0, t1; | |||
@@ -384,19 +364,18 @@ static unsigned int rej_eta(uint32_t *a, | |||
* output stream from SHAKE256(seed|nonce). | |||
* | |||
* Arguments: - poly *a: pointer to output polynomial | |||
* - const unsigned char seed[]: byte array with seed of length | |||
* - const uint8_t seed[]: byte array with seed of length | |||
* SEEDBYTES | |||
* - uint16_t nonce: 2-byte nonce | |||
**************************************************/ | |||
#define POLY_UNIFORM_ETA_NBLOCKS (((N/2 * (1U << SETABITS)) / (2*ETA + 1)\ | |||
+ STREAM128_BLOCKBYTES) / STREAM128_BLOCKBYTES) | |||
#define POLY_UNIFORM_ETA_NBLOCKS (((N / 2 * (1u << SETABITS)) / (2 * ETA + 1) + STREAM128_BLOCKBYTES) / STREAM128_BLOCKBYTES) | |||
#define POLY_UNIFORM_ETA_BUFLEN (POLY_UNIFORM_ETA_NBLOCKS*STREAM128_BLOCKBYTES) | |||
void PQCLEAN_DILITHIUM2_CLEAN_poly_uniform_eta(poly *a, | |||
const unsigned char seed[SEEDBYTES], | |||
const uint8_t *seed, | |||
uint16_t nonce) { | |||
unsigned int ctr; | |||
unsigned char buf[POLY_UNIFORM_ETA_BUFLEN]; | |||
shake128ctx state; | |||
uint8_t buf[POLY_UNIFORM_ETA_BUFLEN]; | |||
stream128_state state; | |||
stream128_init(&state, seed, nonce); | |||
stream128_squeezeblocks(buf, POLY_UNIFORM_ETA_NBLOCKS, &state); | |||
@@ -418,7 +397,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_poly_uniform_eta(poly *a, | |||
* | |||
* Arguments: - uint32_t *a: pointer to output array (allocated) | |||
* - unsigned int len: number of coefficients to be sampled | |||
* - const unsigned char *buf: array of random bytes | |||
* - const uint8_t *buf: array of random bytes | |||
* - unsigned int buflen: length of array of random bytes | |||
* | |||
* Returns number of sampled coefficients. Can be smaller than len if not enough | |||
@@ -426,7 +405,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_poly_uniform_eta(poly *a, | |||
**************************************************/ | |||
static unsigned int rej_gamma1m1(uint32_t *a, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
const uint8_t *buf, | |||
unsigned int buflen) { | |||
unsigned int ctr, pos; | |||
uint32_t t0, t1; | |||
@@ -438,7 +417,7 @@ static unsigned int rej_gamma1m1(uint32_t *a, | |||
t0 |= (uint32_t)buf[pos + 2] << 16; | |||
t0 &= 0xFFFFF; | |||
t1 = buf[pos + 2] >> 4; | |||
t1 = buf[pos + 2] >> 4; | |||
t1 |= (uint32_t)buf[pos + 3] << 4; | |||
t1 |= (uint32_t)buf[pos + 4] << 12; | |||
@@ -463,19 +442,19 @@ static unsigned int rej_gamma1m1(uint32_t *a, | |||
* sampling on output stream of SHAKE256(seed|nonce). | |||
* | |||
* Arguments: - poly *a: pointer to output polynomial | |||
* - const unsigned char seed[]: byte array with seed of length | |||
* - const uint8_t seed[]: byte array with seed of length | |||
* CRHBYTES | |||
* - uint16_t nonce: 16-bit nonce | |||
**************************************************/ | |||
#define POLY_UNIFORM_GAMMA1M1_NBLOCKS ((641 + STREAM256_BLOCKBYTES) / STREAM256_BLOCKBYTES) | |||
#define POLY_UNIFORM_GAMMA1M1_BUFLEN (POLY_UNIFORM_GAMMA1M1_NBLOCKS * STREAM256_BLOCKBYTES) | |||
void PQCLEAN_DILITHIUM2_CLEAN_poly_uniform_gamma1m1(poly *a, | |||
const unsigned char seed[CRHBYTES], | |||
const uint8_t seed[CRHBYTES], | |||
uint16_t nonce) { | |||
unsigned int i, ctr, off; | |||
unsigned int buflen = POLY_UNIFORM_GAMMA1M1_BUFLEN; | |||
unsigned char buf[POLY_UNIFORM_GAMMA1M1_BUFLEN + 4]; | |||
shake256ctx state; | |||
uint8_t buf[POLY_UNIFORM_GAMMA1M1_BUFLEN + 4]; | |||
stream256_state state; | |||
stream256_init(&state, seed, nonce); | |||
stream256_squeezeblocks(buf, POLY_UNIFORM_GAMMA1M1_NBLOCKS, &state); | |||
@@ -500,18 +479,18 @@ void PQCLEAN_DILITHIUM2_CLEAN_poly_uniform_gamma1m1(poly *a, | |||
* Description: Bit-pack polynomial with coefficients in [-ETA,ETA]. | |||
* Input coefficients are assumed to lie in [Q-ETA,Q+ETA]. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* Arguments: - uint8_t *r: pointer to output byte array with at least | |||
* POLETA_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyeta_pack(unsigned char *r, const poly *a) { | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyeta_pack(uint8_t *r, const poly *a) { | |||
unsigned int i; | |||
unsigned char t[8]; | |||
uint8_t t[8]; | |||
for (i = 0; i < N / 2; ++i) { | |||
t[0] = (uint8_t) (Q + ETA - a->coeffs[2 * i + 0]); | |||
t[1] = (uint8_t) (Q + ETA - a->coeffs[2 * i + 1]); | |||
r[i] = (uint8_t) (t[0] | (t[1] << 4)); | |||
t[0] = (uint8_t)(Q + ETA - a->coeffs[2 * i + 0]); | |||
t[1] = (uint8_t)(Q + ETA - a->coeffs[2 * i + 1]); | |||
r[i] = (uint8_t)(t[0] | (t[1] << 4)); | |||
} | |||
} | |||
@@ -522,68 +501,67 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyeta_pack(unsigned char *r, const poly *a) { | |||
* Output coefficients lie in [Q-ETA,Q+ETA]. | |||
* | |||
* Arguments: - poly *r: pointer to output polynomial | |||
* - const unsigned char *a: byte array with bit-packed polynomial | |||
* - const uint8_t *a: byte array with bit-packed polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyeta_unpack(poly *r, const unsigned char *a) { | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyeta_unpack(poly *r, const uint8_t *a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 2; ++i) { | |||
r->coeffs[2 * i + 0] = a[i] & 0x0F; | |||
r->coeffs[2 * i + 1] = a[i] >> 4; | |||
r->coeffs[2 * i + 0] = Q + ETA - r->coeffs[2 * i + 0]; | |||
r->coeffs[2 * i + 1] = Q + ETA - r->coeffs[2 * i + 1]; | |||
} | |||
} | |||
/************************************************* | |||
* Name: polyt1_pack | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyt1_pack | |||
* | |||
* Description: Bit-pack polynomial t1 with coefficients fitting in 9 bits. | |||
* Input coefficients are assumed to be standard representatives. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* Arguments: - uint8_t *r: pointer to output byte array with at least | |||
* POLT1_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyt1_pack(unsigned char *r, const poly *a) { | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyt1_pack(uint8_t *r, const poly *a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 8; ++i) { | |||
r[9 * i + 0] = (uint8_t) ((a->coeffs[8 * i + 0] >> 0)); | |||
r[9 * i + 1] = (uint8_t) ((a->coeffs[8 * i + 0] >> 8) | (a->coeffs[8 * i + 1] << 1)); | |||
r[9 * i + 2] = (uint8_t) ((a->coeffs[8 * i + 1] >> 7) | (a->coeffs[8 * i + 2] << 2)); | |||
r[9 * i + 3] = (uint8_t) ((a->coeffs[8 * i + 2] >> 6) | (a->coeffs[8 * i + 3] << 3)); | |||
r[9 * i + 4] = (uint8_t) ((a->coeffs[8 * i + 3] >> 5) | (a->coeffs[8 * i + 4] << 4)); | |||
r[9 * i + 5] = (uint8_t) ((a->coeffs[8 * i + 4] >> 4) | (a->coeffs[8 * i + 5] << 5)); | |||
r[9 * i + 6] = (uint8_t) ((a->coeffs[8 * i + 5] >> 3) | (a->coeffs[8 * i + 6] << 6)); | |||
r[9 * i + 7] = (uint8_t) ((a->coeffs[8 * i + 6] >> 2) | (a->coeffs[8 * i + 7] << 7)); | |||
r[9 * i + 8] = (uint8_t) ((a->coeffs[8 * i + 7] >> 1)); | |||
r[9 * i + 0] = (uint8_t)((a->coeffs[8 * i + 0] >> 0)); | |||
r[9 * i + 1] = (uint8_t)((a->coeffs[8 * i + 0] >> 8) | (a->coeffs[8 * i + 1] << 1)); | |||
r[9 * i + 2] = (uint8_t)((a->coeffs[8 * i + 1] >> 7) | (a->coeffs[8 * i + 2] << 2)); | |||
r[9 * i + 3] = (uint8_t)((a->coeffs[8 * i + 2] >> 6) | (a->coeffs[8 * i + 3] << 3)); | |||
r[9 * i + 4] = (uint8_t)((a->coeffs[8 * i + 3] >> 5) | (a->coeffs[8 * i + 4] << 4)); | |||
r[9 * i + 5] = (uint8_t)((a->coeffs[8 * i + 4] >> 4) | (a->coeffs[8 * i + 5] << 5)); | |||
r[9 * i + 6] = (uint8_t)((a->coeffs[8 * i + 5] >> 3) | (a->coeffs[8 * i + 6] << 6)); | |||
r[9 * i + 7] = (uint8_t)((a->coeffs[8 * i + 6] >> 2) | (a->coeffs[8 * i + 7] << 7)); | |||
r[9 * i + 8] = (uint8_t)((a->coeffs[8 * i + 7] >> 1)); | |||
} | |||
} | |||
/************************************************* | |||
* Name: polyt1_unpack | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyt1_unpack | |||
* | |||
* Description: Unpack polynomial t1 with 9-bit coefficients. | |||
* Output coefficients are standard representatives. | |||
* | |||
* Arguments: - poly *r: pointer to output polynomial | |||
* - const unsigned char *a: byte array with bit-packed polynomial | |||
* - const uint8_t *a: byte array with bit-packed polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyt1_unpack(poly *r, const unsigned char *a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 8; ++i) { | |||
r->coeffs[8 * i + 0] = ((a[9 * i + 0] ) | ((uint32_t)a[9 * i + 1] << 8)) & 0x1FF; | |||
r->coeffs[8 * i + 1] = ((a[9 * i + 1] >> 1) | ((uint32_t)a[9 * i + 2] << 7)) & 0x1FF; | |||
r->coeffs[8 * i + 2] = ((a[9 * i + 2] >> 2) | ((uint32_t)a[9 * i + 3] << 6)) & 0x1FF; | |||
r->coeffs[8 * i + 3] = ((a[9 * i + 3] >> 3) | ((uint32_t)a[9 * i + 4] << 5)) & 0x1FF; | |||
r->coeffs[8 * i + 4] = ((a[9 * i + 4] >> 4) | ((uint32_t)a[9 * i + 5] << 4)) & 0x1FF; | |||
r->coeffs[8 * i + 5] = ((a[9 * i + 5] >> 5) | ((uint32_t)a[9 * i + 6] << 3)) & 0x1FF; | |||
r->coeffs[8 * i + 6] = ((a[9 * i + 6] >> 6) | ((uint32_t)a[9 * i + 7] << 2)) & 0x1FF; | |||
r->coeffs[8 * i + 7] = ((a[9 * i + 7] >> 7) | ((uint32_t)a[9 * i + 8] << 1)) & 0x1FF; | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyt1_unpack(poly *r, const uint8_t *a) { | |||
for (size_t i = 0; i < N / 8; ++i) { | |||
r->coeffs[8 * i + 0] = ((a[9 * i + 0] ) | ((uint32_t) a[9 * i + 1] << 8)) & 0x1FF; | |||
r->coeffs[8 * i + 1] = ((a[9 * i + 1] >> 1) | ((uint32_t) a[9 * i + 2] << 7)) & 0x1FF; | |||
r->coeffs[8 * i + 2] = ((a[9 * i + 2] >> 2) | ((uint32_t) a[9 * i + 3] << 6)) & 0x1FF; | |||
r->coeffs[8 * i + 3] = ((a[9 * i + 3] >> 3) | ((uint32_t) a[9 * i + 4] << 5)) & 0x1FF; | |||
r->coeffs[8 * i + 4] = ((a[9 * i + 4] >> 4) | ((uint32_t) a[9 * i + 5] << 4)) & 0x1FF; | |||
r->coeffs[8 * i + 5] = ((a[9 * i + 5] >> 5) | ((uint32_t) a[9 * i + 6] << 3)) & 0x1FF; | |||
r->coeffs[8 * i + 6] = ((a[9 * i + 6] >> 6) | ((uint32_t) a[9 * i + 7] << 2)) & 0x1FF; | |||
r->coeffs[8 * i + 7] = ((a[9 * i + 7] >> 7) | ((uint32_t) a[9 * i + 8] << 1)) & 0x1FF; | |||
} | |||
} | |||
/************************************************* | |||
@@ -592,32 +570,30 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyt1_unpack(poly *r, const unsigned char *a) { | |||
* Description: Bit-pack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. | |||
* Input coefficients are assumed to lie in ]Q-2^{D-1}, Q+2^{D-1}]. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* Arguments: - uint8_t *r: pointer to output byte array with at least | |||
* POLT0_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyt0_pack(unsigned char *r, const poly *a) { | |||
unsigned int i; | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyt0_pack(uint8_t *r, const poly *a) { | |||
uint32_t t[4]; | |||
for (i = 0; i < N / 4; ++i) { | |||
t[0] = Q + (1U << (D - 1)) - a->coeffs[4 * i + 0]; | |||
t[1] = Q + (1U << (D - 1)) - a->coeffs[4 * i + 1]; | |||
t[2] = Q + (1U << (D - 1)) - a->coeffs[4 * i + 2]; | |||
t[3] = Q + (1U << (D - 1)) - a->coeffs[4 * i + 3]; | |||
r[7 * i + 0] = (uint8_t) (t[0]); | |||
r[7 * i + 1] = (uint8_t) (t[0] >> 8); | |||
r[7 * i + 1] |= (uint8_t) (t[1] << 6); | |||
r[7 * i + 2] = (uint8_t) (t[1] >> 2); | |||
r[7 * i + 3] = (uint8_t) (t[1] >> 10); | |||
r[7 * i + 3] |= (uint8_t) (t[2] << 4); | |||
r[7 * i + 4] = (uint8_t) (t[2] >> 4); | |||
r[7 * i + 5] = (uint8_t) (t[2] >> 12); | |||
r[7 * i + 5] |= (uint8_t) (t[3] << 2); | |||
r[7 * i + 6] = (uint8_t) (t[3] >> 6); | |||
for (size_t i = 0; i < N / 4; ++i) { | |||
t[0] = Q + (1u << (D - 1)) - a->coeffs[4 * i + 0]; | |||
t[1] = Q + (1u << (D - 1)) - a->coeffs[4 * i + 1]; | |||
t[2] = Q + (1u << (D - 1)) - a->coeffs[4 * i + 2]; | |||
t[3] = Q + (1u << (D - 1)) - a->coeffs[4 * i + 3]; | |||
r[7 * i + 0] = (uint8_t)(t[0]); | |||
r[7 * i + 1] = (uint8_t)(t[0] >> 8); | |||
r[7 * i + 1] |= (uint8_t)(t[1] << 6); | |||
r[7 * i + 2] = (uint8_t)(t[1] >> 2); | |||
r[7 * i + 3] = (uint8_t)(t[1] >> 10); | |||
r[7 * i + 3] |= (uint8_t)(t[2] << 4); | |||
r[7 * i + 4] = (uint8_t)(t[2] >> 4); | |||
r[7 * i + 5] = (uint8_t)(t[2] >> 12); | |||
r[7 * i + 5] |= (uint8_t)(t[3] << 2); | |||
r[7 * i + 6] = (uint8_t)(t[3] >> 6); | |||
} | |||
} | |||
/************************************************* | |||
@@ -627,32 +603,30 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyt0_pack(unsigned char *r, const poly *a) { | |||
* Output coefficients lie in ]Q-2^{D-1},Q+2^{D-1}]. | |||
* | |||
* Arguments: - poly *r: pointer to output polynomial | |||
* - const unsigned char *a: byte array with bit-packed polynomial | |||
* - const uint8_t *a: byte array with bit-packed polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyt0_unpack(poly *r, const unsigned char *a) { | |||
unsigned int i; | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyt0_unpack(poly *r, const uint8_t *a) { | |||
for (i = 0; i < N / 4; ++i) { | |||
r->coeffs[4 * i + 0] = a[7 * i + 0]; | |||
r->coeffs[4 * i + 0] |= (uint32_t)(a[7 * i + 1] & 0x3F) << 8; | |||
for (size_t i = 0; i < N / 4; ++i) { | |||
r->coeffs[4 * i + 0] = a[7 * i + 0]; | |||
r->coeffs[4 * i + 0] |= (uint32_t) (a[7 * i + 1] & 0x3F) << 8; | |||
r->coeffs[4 * i + 1] = a[7 * i + 1] >> 6; | |||
r->coeffs[4 * i + 1] |= (uint32_t)a[7 * i + 2] << 2; | |||
r->coeffs[4 * i + 1] |= (uint32_t)(a[7 * i + 3] & 0x0F) << 10; | |||
r->coeffs[4 * i + 1] = a[7 * i + 1] >> 6; | |||
r->coeffs[4 * i + 1] |= (uint32_t) a[7 * i + 2] << 2; | |||
r->coeffs[4 * i + 1] |= (uint32_t) (a[7 * i + 3] & 0x0F) << 10; | |||
r->coeffs[4 * i + 2] = a[7 * i + 3] >> 4; | |||
r->coeffs[4 * i + 2] |= (uint32_t)a[7 * i + 4] << 4; | |||
r->coeffs[4 * i + 2] |= (uint32_t)(a[7 * i + 5] & 0x03) << 12; | |||
r->coeffs[4 * i + 2] = a[7 * i + 3] >> 4; | |||
r->coeffs[4 * i + 2] |= (uint32_t) a[7 * i + 4] << 4; | |||
r->coeffs[4 * i + 2] |= (uint32_t) (a[7 * i + 5] & 0x03) << 12; | |||
r->coeffs[4 * i + 3] = a[7 * i + 5] >> 2; | |||
r->coeffs[4 * i + 3] |= (uint32_t)a[7 * i + 6] << 6; | |||
r->coeffs[4 * i + 3] = a[7 * i + 5] >> 2; | |||
r->coeffs[4 * i + 3] |= (uint32_t) a[7 * i + 6] << 6; | |||
r->coeffs[4 * i + 0] = Q + (1U << (D - 1)) - r->coeffs[4 * i + 0]; | |||
r->coeffs[4 * i + 1] = Q + (1U << (D - 1)) - r->coeffs[4 * i + 1]; | |||
r->coeffs[4 * i + 2] = Q + (1U << (D - 1)) - r->coeffs[4 * i + 2]; | |||
r->coeffs[4 * i + 3] = Q + (1U << (D - 1)) - r->coeffs[4 * i + 3]; | |||
} | |||
} | |||
/************************************************* | |||
@@ -662,29 +636,27 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyt0_unpack(poly *r, const unsigned char *a) { | |||
* in [-(GAMMA1 - 1), GAMMA1 - 1]. | |||
* Input coefficients are assumed to be standard representatives. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* Arguments: - uint8_t *r: pointer to output byte array with at least | |||
* POLZ_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyz_pack(unsigned char *r, const poly *a) { | |||
unsigned int i; | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyz_pack(uint8_t *r, const poly *a) { | |||
uint32_t t[2]; | |||
for (i = 0; i < N / 2; ++i) { | |||
for (size_t i = 0; i < N / 2; ++i) { | |||
/* Map to {0,...,2*GAMMA1 - 2} */ | |||
t[0] = GAMMA1 - 1 - a->coeffs[2 * i + 0]; | |||
t[0] += ((int32_t)t[0] >> 31) & Q; | |||
t[1] = GAMMA1 - 1 - a->coeffs[2 * i + 1]; | |||
t[1] += ((int32_t)t[1] >> 31) & Q; | |||
r[5 * i + 0] = (uint8_t) (t[0]); | |||
r[5 * i + 1] = (uint8_t) (t[0] >> 8); | |||
r[5 * i + 2] = (uint8_t) (t[0] >> 16); | |||
r[5 * i + 2] |= (uint8_t) (t[1] << 4); | |||
r[5 * i + 3] = (uint8_t) (t[1] >> 4); | |||
r[5 * i + 4] = (uint8_t) (t[1] >> 12); | |||
r[5 * i + 0] = (uint8_t)t[0]; | |||
r[5 * i + 1] = (uint8_t)(t[0] >> 8); | |||
r[5 * i + 2] = (uint8_t)(t[0] >> 16); | |||
r[5 * i + 2] |= (uint8_t)(t[1] << 4); | |||
r[5 * i + 3] = (uint8_t)(t[1] >> 4); | |||
r[5 * i + 4] = (uint8_t)(t[1] >> 12); | |||
} | |||
} | |||
/************************************************* | |||
@@ -695,26 +667,23 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyz_pack(unsigned char *r, const poly *a) { | |||
* Output coefficients are standard representatives. | |||
* | |||
* Arguments: - poly *r: pointer to output polynomial | |||
* - const unsigned char *a: byte array with bit-packed polynomial | |||
* - const uint8_t *a: byte array with bit-packed polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyz_unpack(poly *r, const unsigned char *a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 2; ++i) { | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyz_unpack(poly *r, const uint8_t *a) { | |||
for (size_t i = 0; i < N / 2; ++i) { | |||
r->coeffs[2 * i + 0] = a[5 * i + 0]; | |||
r->coeffs[2 * i + 0] |= (uint32_t)a[5 * i + 1] << 8; | |||
r->coeffs[2 * i + 0] |= (uint32_t)(a[5 * i + 2] & 0x0F) << 16; | |||
r->coeffs[2 * i + 0] |= (uint32_t) a[5 * i + 1] << 8; | |||
r->coeffs[2 * i + 0] |= (uint32_t) (a[5 * i + 2] & 0x0F) << 16; | |||
r->coeffs[2 * i + 1] = a[5 * i + 2] >> 4; | |||
r->coeffs[2 * i + 1] |= (uint32_t)a[5 * i + 3] << 4; | |||
r->coeffs[2 * i + 1] |= (uint32_t)a[5 * i + 4] << 12; | |||
r->coeffs[2 * i + 1] |= (uint32_t) a[5 * i + 3] << 4; | |||
r->coeffs[2 * i + 1] |= (uint32_t) a[5 * i + 4] << 12; | |||
r->coeffs[2 * i + 0] = GAMMA1 - 1 - r->coeffs[2 * i + 0]; | |||
r->coeffs[2 * i + 0] = GAMMA1 - 1 - r->coeffs[2 * i + 0]; | |||
r->coeffs[2 * i + 0] += ((int32_t)r->coeffs[2 * i + 0] >> 31) & Q; | |||
r->coeffs[2 * i + 1] = GAMMA1 - 1 - r->coeffs[2 * i + 1]; | |||
r->coeffs[2 * i + 1] = GAMMA1 - 1 - r->coeffs[2 * i + 1]; | |||
r->coeffs[2 * i + 1] += ((int32_t)r->coeffs[2 * i + 1] >> 31) & Q; | |||
} | |||
} | |||
/************************************************* | |||
@@ -723,15 +692,13 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyz_unpack(poly *r, const unsigned char *a) { | |||
* Description: Bit-pack polynomial w1 with coefficients in [0, 15]. | |||
* Input coefficients are assumed to be standard representatives. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* Arguments: - uint8_t *r: pointer to output byte array with at least | |||
* POLW1_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyw1_pack(unsigned char *r, const poly *a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 2; ++i) { | |||
r[i] = (uint8_t) (a->coeffs[2 * i + 0] | (a->coeffs[2 * i + 1] << 4)); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyw1_pack(uint8_t *r, const poly *a) { | |||
for (size_t i = 0; i < N / 2; ++i) { | |||
r[i] = (uint8_t)(a->coeffs[2 * i + 0] | a->coeffs[2 * i + 1] << 4); | |||
} | |||
} |
@@ -1,9 +1,10 @@ | |||
#ifndef POLY_H | |||
#define POLY_H | |||
#ifndef PQCLEAN_DILITHIUM2_CLEAN_POLY_H | |||
#define PQCLEAN_DILITHIUM2_CLEAN_POLY_H | |||
#include "params.h" | |||
#include <stdint.h> | |||
#include "params.h" | |||
typedef struct { | |||
uint32_t coeffs[N]; | |||
} poly; | |||
@@ -27,27 +28,27 @@ void PQCLEAN_DILITHIUM2_CLEAN_poly_use_hint(poly *a, const poly *b, const poly * | |||
int PQCLEAN_DILITHIUM2_CLEAN_poly_chknorm(const poly *a, uint32_t B); | |||
void PQCLEAN_DILITHIUM2_CLEAN_poly_uniform(poly *a, | |||
const unsigned char seed[SEEDBYTES], | |||
const uint8_t *seed, | |||
uint16_t nonce); | |||
void PQCLEAN_DILITHIUM2_CLEAN_poly_uniform_eta(poly *a, | |||
const unsigned char seed[SEEDBYTES], | |||
const uint8_t *seed, | |||
uint16_t nonce); | |||
void PQCLEAN_DILITHIUM2_CLEAN_poly_uniform_gamma1m1(poly *a, | |||
const unsigned char seed[CRHBYTES], | |||
const uint8_t seed[CRHBYTES], | |||
uint16_t nonce); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyeta_pack(unsigned char *r, const poly *a); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyeta_unpack(poly *r, const unsigned char *a); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyeta_pack(uint8_t *r, const poly *a); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyeta_unpack(poly *r, const uint8_t *a); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyt1_pack(unsigned char *r, const poly *a); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyt1_unpack(poly *r, const unsigned char *a); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyt1_pack(uint8_t *r, const poly *a); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyt1_unpack(poly *r, const uint8_t *a); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyt0_pack(unsigned char *r, const poly *a); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyt0_unpack(poly *r, const unsigned char *a); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyt0_pack(uint8_t *r, const poly *a); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyt0_unpack(poly *r, const uint8_t *a); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyz_pack(unsigned char *r, const poly *a); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyz_unpack(poly *r, const unsigned char *a); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyz_pack(uint8_t *r, const poly *a); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyz_unpack(poly *r, const uint8_t *a); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyw1_pack(unsigned char *r, const poly *a); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyw1_pack(uint8_t *r, const poly *a); | |||
#endif |
@@ -1,14 +1,15 @@ | |||
#include <stdint.h> | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "polyvec.h" | |||
#include <stdint.h> | |||
/**************************************************************/ | |||
/************ Vectors of polynomials of length L **************/ | |||
/**************************************************************/ | |||
/************************************************* | |||
* Name: polyvecl_freeze | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyvecl_freeze | |||
* | |||
* Description: Reduce coefficients of polynomials in vector of length L | |||
* to standard representatives. | |||
@@ -24,7 +25,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_freeze(polyvecl *v) { | |||
} | |||
/************************************************* | |||
* Name: polyvecl_add | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyvecl_add | |||
* | |||
* Description: Add vectors of polynomials of length L. | |||
* No modular reduction is performed. | |||
@@ -42,7 +43,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_add(polyvecl *w, const polyvecl *u, const | |||
} | |||
/************************************************* | |||
* Name: polyvecl_ntt | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyvecl_ntt | |||
* | |||
* Description: Forward NTT of all polynomials in vector of length L. Output | |||
* coefficients can be up to 16*Q larger than input coefficients. | |||
@@ -58,7 +59,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_ntt(polyvecl *v) { | |||
} | |||
/************************************************* | |||
* Name: polyvecl_pointwise_acc_invmontgomery | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyvecl_pointwise_acc_invmontgomery | |||
* | |||
* Description: Pointwise multiply vectors of polynomials of length L, multiply | |||
* resulting vector by 2^{-32} and add (accumulate) polynomials | |||
@@ -85,7 +86,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_pointwise_acc_invmontgomery(poly *w, | |||
} | |||
/************************************************* | |||
* Name: polyvecl_chknorm | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyvecl_chknorm | |||
* | |||
* Description: Check infinity norm of polynomials in vector of length L. | |||
* Assumes input coefficients to be standard representatives. | |||
@@ -96,14 +97,15 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyvecl_pointwise_acc_invmontgomery(poly *w, | |||
* Returns 0 if norm of all polynomials is strictly smaller than B and 1 | |||
* otherwise. | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_CLEAN_polyvecl_chknorm(const polyvecl *v, uint32_t bound) { | |||
int PQCLEAN_DILITHIUM2_CLEAN_polyvecl_chknorm(const polyvecl *v, uint32_t B) { | |||
unsigned int i; | |||
for (i = 0; i < L; ++i) { | |||
if (PQCLEAN_DILITHIUM2_CLEAN_poly_chknorm(&v->vec[i], bound)) { | |||
if (PQCLEAN_DILITHIUM2_CLEAN_poly_chknorm(&v->vec[i], B)) { | |||
return 1; | |||
} | |||
} | |||
return 0; | |||
} | |||
@@ -113,7 +115,7 @@ int PQCLEAN_DILITHIUM2_CLEAN_polyvecl_chknorm(const polyvecl *v, uint32_t bound) | |||
/************************************************* | |||
* Name: polyveck_reduce | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_reduce | |||
* | |||
* Description: Reduce coefficients of polynomials in vector of length K | |||
* to representatives in [0,2*Q[. | |||
@@ -129,7 +131,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyveck_reduce(polyveck *v) { | |||
} | |||
/************************************************* | |||
* Name: polyveck_csubq | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_csubq | |||
* | |||
* Description: For all coefficients of polynomials in vector of length K | |||
* subtract Q if coefficient is bigger than Q. | |||
@@ -145,7 +147,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyveck_csubq(polyveck *v) { | |||
} | |||
/************************************************* | |||
* Name: polyveck_freeze | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_freeze | |||
* | |||
* Description: Reduce coefficients of polynomials in vector of length K | |||
* to standard representatives. | |||
@@ -161,7 +163,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyveck_freeze(polyveck *v) { | |||
} | |||
/************************************************* | |||
* Name: polyveck_add | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_add | |||
* | |||
* Description: Add vectors of polynomials of length K. | |||
* No modular reduction is performed. | |||
@@ -179,7 +181,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyveck_add(polyveck *w, const polyveck *u, const | |||
} | |||
/************************************************* | |||
* Name: polyveck_sub | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_sub | |||
* | |||
* Description: Subtract vectors of polynomials of length K. | |||
* Assumes coefficients of polynomials in second input vector | |||
@@ -199,7 +201,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyveck_sub(polyveck *w, const polyveck *u, const | |||
} | |||
/************************************************* | |||
* Name: polyveck_shiftl | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_shiftl | |||
* | |||
* Description: Multiply vector of polynomials of Length K by 2^D without modular | |||
* reduction. Assumes input coefficients to be less than 2^{32-D}. | |||
@@ -215,7 +217,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyveck_shiftl(polyveck *v) { | |||
} | |||
/************************************************* | |||
* Name: polyveck_ntt | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_ntt | |||
* | |||
* Description: Forward NTT of all polynomials in vector of length K. Output | |||
* coefficients can be up to 16*Q larger than input coefficients. | |||
@@ -231,7 +233,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyveck_ntt(polyveck *v) { | |||
} | |||
/************************************************* | |||
* Name: polyveck_invntt_montgomery | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_invntt_montgomery | |||
* | |||
* Description: Inverse NTT and multiplication by 2^{32} of polynomials | |||
* in vector of length K. Input coefficients need to be less | |||
@@ -248,7 +250,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyveck_invntt_montgomery(polyveck *v) { | |||
} | |||
/************************************************* | |||
* Name: polyveck_chknorm | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_chknorm | |||
* | |||
* Description: Check infinity norm of polynomials in vector of length K. | |||
* Assumes input coefficients to be standard representatives. | |||
@@ -259,19 +261,20 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyveck_invntt_montgomery(polyveck *v) { | |||
* Returns 0 if norm of all polynomials are strictly smaller than B and 1 | |||
* otherwise. | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_CLEAN_polyveck_chknorm(const polyveck *v, uint32_t bound) { | |||
int PQCLEAN_DILITHIUM2_CLEAN_polyveck_chknorm(const polyveck *v, uint32_t B) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
if (PQCLEAN_DILITHIUM2_CLEAN_poly_chknorm(&v->vec[i], bound)) { | |||
if (PQCLEAN_DILITHIUM2_CLEAN_poly_chknorm(&v->vec[i], B)) { | |||
return 1; | |||
} | |||
} | |||
return 0; | |||
} | |||
/************************************************* | |||
* Name: polyveck_power2round | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_power2round | |||
* | |||
* Description: For all coefficients a of polynomials in vector of length K, | |||
* compute a0, a1 such that a mod Q = a1*2^D + a0 | |||
@@ -293,7 +296,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyveck_power2round(polyveck *v1, polyveck *v0, c | |||
} | |||
/************************************************* | |||
* Name: polyveck_decompose | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_decompose | |||
* | |||
* Description: For all coefficients a of polynomials in vector of length K, | |||
* compute high and low bits a0, a1 such a mod Q = a1*ALPHA + a0 | |||
@@ -316,7 +319,7 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyveck_decompose(polyveck *v1, polyveck *v0, con | |||
} | |||
/************************************************* | |||
* Name: polyveck_make_hint | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_make_hint | |||
* | |||
* Description: Compute hint vector. | |||
* | |||
@@ -339,19 +342,19 @@ unsigned int PQCLEAN_DILITHIUM2_CLEAN_polyveck_make_hint(polyveck *h, | |||
} | |||
/************************************************* | |||
* Name: polyveck_use_hint | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_polyveck_use_hint | |||
* | |||
* Description: Use hint vector to correct the high bits of input vector. | |||
* | |||
* Arguments: - polyveck *w: pointer to output vector of polynomials with | |||
* corrected high bits | |||
* - const polyveck *u: pointer to input vector | |||
* - const polyveck *v: pointer to input vector | |||
* - const polyveck *h: pointer to input hint vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h) { | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyveck_use_hint(polyveck *w, const polyveck *v, const polyveck *h) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM2_CLEAN_poly_use_hint(&w->vec[i], &u->vec[i], &h->vec[i]); | |||
PQCLEAN_DILITHIUM2_CLEAN_poly_use_hint(&w->vec[i], &v->vec[i], &h->vec[i]); | |||
} | |||
} |
@@ -1,9 +1,10 @@ | |||
#ifndef POLYVEC_H | |||
#define POLYVEC_H | |||
#ifndef PQCLEAN_DILITHIUM2_CLEAN_POLYVEC_H | |||
#define PQCLEAN_DILITHIUM2_CLEAN_POLYVEC_H | |||
#include <stdint.h> | |||
#include "params.h" | |||
#include "poly.h" | |||
#include <stdint.h> | |||
/* Vectors of polynomials of length L */ | |||
typedef struct { | |||
@@ -46,6 +47,6 @@ void PQCLEAN_DILITHIUM2_CLEAN_polyveck_decompose(polyveck *v1, polyveck *v0, con | |||
unsigned int PQCLEAN_DILITHIUM2_CLEAN_polyveck_make_hint(polyveck *h, | |||
const polyveck *v0, | |||
const polyveck *v1); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h); | |||
void PQCLEAN_DILITHIUM2_CLEAN_polyveck_use_hint(polyveck *w, const polyveck *v, const polyveck *h); | |||
#endif |
@@ -1,9 +1,10 @@ | |||
#include <stdint.h> | |||
#include "params.h" | |||
#include "reduce.h" | |||
#include <stdint.h> | |||
/************************************************* | |||
* Name: montgomery_reduce | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_montgomery_reduce | |||
* | |||
* Description: For finite field element a with 0 <= a <= Q*2^32, | |||
* compute r \equiv a*2^{-32} (mod Q) such that 0 <= r < 2*Q. | |||
@@ -20,11 +21,11 @@ uint32_t PQCLEAN_DILITHIUM2_CLEAN_montgomery_reduce(uint64_t a) { | |||
t *= Q; | |||
t = a + t; | |||
t >>= 32; | |||
return (uint32_t) t; | |||
return (uint32_t)t; | |||
} | |||
/************************************************* | |||
* Name: reduce32 | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_reduce32 | |||
* | |||
* Description: For finite field element a, compute r \equiv a (mod Q) | |||
* such that 0 <= r < 2*Q. | |||
@@ -43,7 +44,7 @@ uint32_t PQCLEAN_DILITHIUM2_CLEAN_reduce32(uint32_t a) { | |||
} | |||
/************************************************* | |||
* Name: csubq | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_csubq | |||
* | |||
* Description: Subtract Q if input coefficient is bigger than Q. | |||
* | |||
@@ -58,7 +59,7 @@ uint32_t PQCLEAN_DILITHIUM2_CLEAN_csubq(uint32_t a) { | |||
} | |||
/************************************************* | |||
* Name: freeze | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_freeze | |||
* | |||
* Description: For finite field element a, compute standard | |||
* representative r = a mod Q. | |||
@@ -1,5 +1,5 @@ | |||
#ifndef REDUCE_H | |||
#define REDUCE_H | |||
#ifndef PQCLEAN_DILITHIUM2_CLEAN_REDUCE_H | |||
#define PQCLEAN_DILITHIUM2_CLEAN_REDUCE_H | |||
#include <stdint.h> | |||
@@ -1,7 +1,10 @@ | |||
#include <stdint.h> | |||
#include "params.h" | |||
#include "rounding.h" | |||
/************************************************* | |||
* Name: power2round | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_power2round | |||
* | |||
* Description: For finite field element a, compute a0, a1 such that | |||
* a mod Q = a1*2^D + a0 with -2^{D-1} < a0 <= 2^{D-1}. | |||
@@ -17,16 +20,16 @@ uint32_t PQCLEAN_DILITHIUM2_CLEAN_power2round(uint32_t a, uint32_t *a0) { | |||
/* Centralized remainder mod 2^D */ | |||
t = a & ((1U << D) - 1); | |||
t -= ((1U << (D - 1)) + 1); | |||
t += ((uint32_t)((int32_t)t >> 31) & (1U << D)); | |||
t -= ((1U << (D - 1)) - 1); | |||
*a0 = (Q + t); | |||
t -= (1U << (D - 1)) + 1; | |||
t += ((uint32_t)((int32_t)t >> 31) & (1 << D)); | |||
t -= (1U << (D - 1)) - 1; | |||
*a0 = Q + t; | |||
a = (a - t) >> D; | |||
return a; | |||
} | |||
/************************************************* | |||
* Name: decompose | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_decompose | |||
* | |||
* Description: For finite field element a, compute high and low bits a0, a1 such | |||
* that a mod Q = a1*ALPHA + a0 with -ALPHA/2 < a0 <= ALPHA/2 except | |||
@@ -41,28 +44,29 @@ uint32_t PQCLEAN_DILITHIUM2_CLEAN_power2round(uint32_t a, uint32_t *a0) { | |||
**************************************************/ | |||
uint32_t PQCLEAN_DILITHIUM2_CLEAN_decompose(uint32_t a, uint32_t *a0) { | |||
int32_t t, u; | |||
/* Centralized remainder mod ALPHA */ | |||
t = a & 0x7FFFF; | |||
t += (int32_t) ((a >> 19) << 9); | |||
t = a & 0x7FFFFu; | |||
t += (int32_t)((a >> 19u) << 9u); | |||
t -= ALPHA / 2 + 1; | |||
t += (t >> 31) & ALPHA; | |||
t -= ALPHA / 2 - 1; | |||
a -= (uint32_t) t; | |||
a -= (uint32_t)t; | |||
/* Divide by ALPHA (possible to avoid) */ | |||
u = (int32_t) a - 1; | |||
u = (int32_t)(a - 1); | |||
u >>= 31; | |||
a = (a >> 19) + 1; | |||
a -= u & 1; | |||
/* Border case */ | |||
*a0 = Q + (uint32_t)t - (a >> 4); | |||
a &= 0xF; | |||
*a0 = (uint32_t)(Q + t - (int32_t)(a >> 4u)); | |||
a &= 0xFu; | |||
return a; | |||
} | |||
/************************************************* | |||
* Name: make_hint | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_make_hint | |||
* | |||
* Description: Compute hint bit indicating whether the low bits of the | |||
* input element overflow into the high bits. Inputs assumed to be | |||
@@ -73,7 +77,7 @@ uint32_t PQCLEAN_DILITHIUM2_CLEAN_decompose(uint32_t a, uint32_t *a0) { | |||
* | |||
* Returns 1 if high bits of a and b differ and 0 otherwise. | |||
**************************************************/ | |||
unsigned int PQCLEAN_DILITHIUM2_CLEAN_make_hint(uint32_t a0, uint32_t a1) { | |||
unsigned int PQCLEAN_DILITHIUM2_CLEAN_make_hint(const uint32_t a0, const uint32_t a1) { | |||
if (a0 <= GAMMA2 || a0 > Q - GAMMA2 || (a0 == Q - GAMMA2 && a1 == 0)) { | |||
return 0; | |||
} | |||
@@ -82,7 +86,7 @@ unsigned int PQCLEAN_DILITHIUM2_CLEAN_make_hint(uint32_t a0, uint32_t a1) { | |||
} | |||
/************************************************* | |||
* Name: use_hint | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_use_hint | |||
* | |||
* Description: Correct high bits according to hint. | |||
* | |||
@@ -91,7 +95,7 @@ unsigned int PQCLEAN_DILITHIUM2_CLEAN_make_hint(uint32_t a0, uint32_t a1) { | |||
* | |||
* Returns corrected high bits. | |||
**************************************************/ | |||
uint32_t PQCLEAN_DILITHIUM2_CLEAN_use_hint(uint32_t a, unsigned int hint) { | |||
uint32_t PQCLEAN_DILITHIUM2_CLEAN_use_hint(const uint32_t a, const unsigned int hint) { | |||
uint32_t a0, a1; | |||
a1 = PQCLEAN_DILITHIUM2_CLEAN_decompose(a, &a0); | |||
@@ -101,5 +105,15 @@ uint32_t PQCLEAN_DILITHIUM2_CLEAN_use_hint(uint32_t a, unsigned int hint) { | |||
if (a0 > Q) { | |||
return (a1 + 1) & 0xF; | |||
} | |||
return (a1 - 1) & 0xF; | |||
/* If PQCLEAN_DILITHIUM2_CLEAN_decompose does not divide out ALPHA: | |||
if(hint == 0) | |||
return a1; | |||
else if(a0 > Q) | |||
return (a1 + ALPHA) % (Q - 1); | |||
else | |||
return (a1 - ALPHA) % (Q - 1); | |||
*/ | |||
} |
@@ -1,5 +1,5 @@ | |||
#ifndef ROUNDING_H | |||
#define ROUNDING_H | |||
#ifndef PQCLEAN_DILITHIUM2_CLEAN_ROUNDING_H | |||
#define PQCLEAN_DILITHIUM2_CLEAN_ROUNDING_H | |||
#include <stdint.h> | |||
@@ -1,3 +1,6 @@ | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include "fips202.h" | |||
#include "packing.h" | |||
#include "params.h" | |||
@@ -7,19 +10,17 @@ | |||
#include "sign.h" | |||
#include "symmetric.h" | |||
#include <stdint.h> | |||
/************************************************* | |||
* Name: expand_mat | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_expand_mat | |||
* | |||
* Description: Implementation of ExpandA. Generates matrix A with uniformly | |||
* random coefficients a_{i,j} by performing rejection | |||
* sampling on the output stream of SHAKE128(rho|i|j). | |||
* | |||
* Arguments: - polyvecl mat[K]: output matrix | |||
* - const unsigned char rho[]: byte array containing seed rho | |||
* - const uint8_t rho[]: byte array containing seed rho | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_expand_mat(polyvecl mat[K], const unsigned char rho[SEEDBYTES]) { | |||
void PQCLEAN_DILITHIUM2_CLEAN_expand_mat(polyvecl mat[K], const uint8_t rho[SEEDBYTES]) { | |||
unsigned int i, j; | |||
for (i = 0; i < K; ++i) { | |||
@@ -30,23 +31,23 @@ void PQCLEAN_DILITHIUM2_CLEAN_expand_mat(polyvecl mat[K], const unsigned char rh | |||
} | |||
/************************************************* | |||
* Name: challenge | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_challenge | |||
* | |||
* Description: Implementation of H. Samples polynomial with 60 nonzero | |||
* coefficients in {-1,1} using the output stream of | |||
* SHAKE256(mu|w1). | |||
* | |||
* Arguments: - poly *c: pointer to output polynomial | |||
* - const unsigned char mu[]: byte array containing mu | |||
* - const uint8_t mu[]: byte array containing mu | |||
* - const polyveck *w1: pointer to vector w1 | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM2_CLEAN_challenge(poly *c, | |||
const unsigned char mu[CRHBYTES], | |||
const uint8_t mu[CRHBYTES], | |||
const polyveck *w1) { | |||
unsigned int i, b, pos; | |||
uint64_t signs; | |||
unsigned char inbuf[CRHBYTES + K * POLW1_SIZE_PACKED]; | |||
unsigned char outbuf[SHAKE256_RATE]; | |||
uint8_t inbuf[CRHBYTES + K * POLW1_SIZE_PACKED]; | |||
uint8_t outbuf[SHAKE256_RATE]; | |||
shake256ctx state; | |||
for (i = 0; i < CRHBYTES; ++i) { | |||
@@ -88,22 +89,22 @@ void PQCLEAN_DILITHIUM2_CLEAN_challenge(poly *c, | |||
} | |||
/************************************************* | |||
* Name: crypto_sign_keypair | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_keypair | |||
* | |||
* Description: Generates public and private key. | |||
* | |||
* Arguments: - unsigned char *pk: pointer to output public key (allocated | |||
* array of CRYPTO_PUBLICKEYBYTES bytes) | |||
* - unsigned char *sk: pointer to output private key (allocated | |||
* array of CRYPTO_SECRETKEYBYTES bytes) | |||
* Arguments: - uint8_t *pk: pointer to output public key (allocated | |||
* array of PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_PUBLICKEYBYTES bytes) | |||
* - uint8_t *sk: pointer to output private key (allocated | |||
* array of PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_SECRETKEYBYTES bytes) | |||
* | |||
* Returns 0 (success) | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { | |||
unsigned int i; | |||
unsigned char seedbuf[3 * SEEDBYTES]; | |||
unsigned char tr[CRHBYTES]; | |||
const unsigned char *rho, *rhoprime, *key; | |||
uint8_t seedbuf[3 * SEEDBYTES]; | |||
uint8_t tr[CRHBYTES]; | |||
const uint8_t *rho, *rhoprime, *key; | |||
uint16_t nonce = 0; | |||
polyvecl mat[K]; | |||
polyvecl s1, s1hat; | |||
@@ -144,19 +145,35 @@ int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { | |||
PQCLEAN_DILITHIUM2_CLEAN_pack_pk(pk, rho, &t1); | |||
/* Compute CRH(rho, t1) and write secret key */ | |||
crh(tr, pk, CRYPTO_PUBLICKEYBYTES); | |||
crh(tr, pk, PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_PUBLICKEYBYTES); | |||
PQCLEAN_DILITHIUM2_CLEAN_pack_sk(sk, rho, key, tr, &s1, &s2, &t0); | |||
return 0; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_signature | |||
* | |||
* Description: Compute signed message. | |||
* | |||
* Arguments: - uint8_t *sig: pointer to output signature (PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES | |||
* of len) | |||
* - size_t *smlen: pointer to output length of signed message | |||
* (should be PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES) | |||
* - uint8_t *m: pointer to message to be signed | |||
* - size_t mlen: length of message | |||
* - uint8_t *sk: pointer to bit-packed secret key | |||
* | |||
* Returns 0 (success) | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
const uint8_t *msg, size_t mlen, | |||
const uint8_t *sk) { | |||
unsigned long long i; | |||
unsigned int n; | |||
unsigned char seedbuf[2 * SEEDBYTES + 3 * CRHBYTES]; | |||
unsigned char *rho, *tr, *key, *mu, *rhoprime; | |||
uint8_t seedbuf[2 * SEEDBYTES + 3 * CRHBYTES]; | |||
uint8_t *rho, *tr, *key, *mu, *rhoprime; | |||
uint16_t nonce = 0; | |||
poly c, chat; | |||
polyvecl mat[K], s1, y, yhat, z; | |||
@@ -170,13 +187,12 @@ int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_signature( | |||
rhoprime = mu + CRHBYTES; | |||
PQCLEAN_DILITHIUM2_CLEAN_unpack_sk(rho, key, tr, &s1, &s2, &t0, sk); | |||
// use incremental hash API instead of copying around buffers | |||
/* Compute CRH(tr, msg) */ | |||
shake256incctx state; | |||
shake256_inc_init(&state); | |||
shake256_inc_absorb(&state, tr, CRHBYTES); | |||
shake256_inc_absorb(&state, m, mlen); | |||
shake256_inc_absorb(&state, msg, mlen); | |||
shake256_inc_finalize(&state); | |||
shake256_inc_squeeze(mu, CRHBYTES, &state); | |||
@@ -253,11 +269,51 @@ rej: | |||
/* Write signature */ | |||
PQCLEAN_DILITHIUM2_CLEAN_pack_sig(sig, &z, &h, &c); | |||
*siglen = CRYPTO_BYTES; | |||
*siglen = PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES; | |||
return 0; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_crypto_sign | |||
* | |||
* Description: Compute signed message. | |||
* | |||
* Arguments: - uint8_t *sm: pointer to output signed message (allocated | |||
* array with PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES + mlen bytes), | |||
* can be equal to m | |||
* - unsigned long long *smlen: pointer to output length of signed | |||
* message | |||
* - const uint8_t *m: pointer to message to be signed | |||
* - unsigned long long mlen: length of message | |||
* - const uint8_t *sk: pointer to bit-packed secret key | |||
* | |||
* Returns 0 (success) | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *m, size_t mlen, | |||
const uint8_t *sk) { | |||
int rc; | |||
memmove(sm + PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES, m, mlen); | |||
rc = PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_signature(sm, smlen, m, mlen, sk); | |||
*smlen += mlen; | |||
return rc; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_verify | |||
* | |||
* Description: Verify signed message. | |||
* | |||
* Arguments: - uint8_t *sig: signature | |||
* - size_t siglen: length of signature (PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES) | |||
* - uint8_t *m: pointer to message | |||
* - size_t *mlen: pointer to output length of message | |||
* - uint8_t *pk: pointer to bit-packed public key | |||
* | |||
* Returns 0 if signed message could be verified correctly and -1 otherwise | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk) { | |||
@@ -268,7 +324,7 @@ int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_verify( | |||
polyvecl mat[K], z; | |||
polyveck t1, w1, h, tmp1, tmp2; | |||
if (siglen < CRYPTO_BYTES) { | |||
if (siglen < PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES) { | |||
return -1; | |||
} | |||
@@ -281,7 +337,7 @@ int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_verify( | |||
} | |||
/* Compute CRH(CRH(rho, t1), msg) */ | |||
crh(mu, pk, CRYPTO_PUBLICKEYBYTES); | |||
crh(mu, pk, PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_PUBLICKEYBYTES); | |||
shake256incctx state; | |||
shake256_inc_init(&state); | |||
@@ -325,40 +381,9 @@ int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_verify( | |||
// All good | |||
return 0; | |||
} | |||
/************************************************* | |||
* Name: crypto_sign | |||
* | |||
* Description: Compute signed message. | |||
* | |||
* Arguments: - unsigned char *sm: pointer to output signed message (allocated | |||
* array with CRYPTO_BYTES + mlen bytes), | |||
* can be equal to m | |||
* - unsigned long long *smlen: pointer to output length of signed | |||
* message | |||
* - const unsigned char *m: pointer to message to be signed | |||
* - unsigned long long mlen: length of message | |||
* - const unsigned char *sk: pointer to bit-packed secret key | |||
* | |||
* Returns 0 (success) | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign(uint8_t *sm, | |||
size_t *smlen, | |||
const uint8_t *m, | |||
size_t mlen, | |||
const uint8_t *sk) { | |||
size_t i; | |||
int rc; | |||
for (i = 0; i < mlen; i++) { | |||
sm[CRYPTO_BYTES + i] = m[i]; | |||
} | |||
rc = PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_signature(sm, smlen, m, mlen, sk); | |||
*smlen += mlen; | |||
return rc; | |||
} | |||
/************************************************* | |||
* Name: crypto_sign_open | |||
* Name: PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_open | |||
* | |||
* Description: Verify signed message. | |||
* | |||
@@ -371,24 +396,23 @@ int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign(uint8_t *sm, | |||
* | |||
* Returns 0 if signed message could be verified correctly and -1 otherwise | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_open(uint8_t *m, | |||
size_t *mlen, | |||
const uint8_t *sm, | |||
size_t smlen, | |||
const uint8_t *pk) { | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, | |||
const uint8_t *pk) { | |||
size_t i; | |||
if (smlen < CRYPTO_BYTES) { | |||
if (smlen < PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES) { | |||
goto badsig; | |||
} | |||
*mlen = smlen - CRYPTO_BYTES; | |||
*mlen = smlen - PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES; | |||
if (PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_verify(sm, CRYPTO_BYTES, | |||
sm + CRYPTO_BYTES, *mlen, pk)) { | |||
if (PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_verify(sm, PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES, | |||
sm + PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES, *mlen, pk)) { | |||
goto badsig; | |||
} else { | |||
/* All good, copy msg, return 0 */ | |||
for (i = 0; i < *mlen; ++i) { | |||
m[i] = sm[CRYPTO_BYTES + i]; | |||
m[i] = sm[PQCLEAN_DILITHIUM2_CLEAN_CRYPTO_BYTES + i]; | |||
} | |||
return 0; | |||
} | |||
@@ -1,30 +1,12 @@ | |||
#ifndef SIGN_H | |||
#define SIGN_H | |||
#ifndef PQCLEAN_DILITHIUM2_CLEAN_SIGN_H | |||
#define PQCLEAN_DILITHIUM2_CLEAN_SIGN_H | |||
#include "api.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "polyvec.h" | |||
void PQCLEAN_DILITHIUM2_CLEAN_expand_mat(polyvecl mat[K], const unsigned char rho[SEEDBYTES]); | |||
void PQCLEAN_DILITHIUM2_CLEAN_challenge(poly *c, const unsigned char mu[CRHBYTES], | |||
void PQCLEAN_DILITHIUM2_CLEAN_expand_mat(polyvecl mat[K], const uint8_t rho[SEEDBYTES]); | |||
void PQCLEAN_DILITHIUM2_CLEAN_challenge(poly *c, const uint8_t mu[CRHBYTES], | |||
const polyveck *w1); | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk); | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign(uint8_t *sm, size_t *smlen, | |||
const uint8_t *msg, size_t len, | |||
const uint8_t *sk); | |||
int PQCLEAN_DILITHIUM2_CLEAN_crypto_sign_open(uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, | |||
const uint8_t *pk); | |||
#endif |
@@ -0,0 +1,26 @@ | |||
#include "stream.h" | |||
#include <string.h> | |||
void PQCLEAN_DILITHIUM2_CLEAN_shake128_stream_init( | |||
shake128ctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce) { | |||
uint8_t buf[SEEDBYTES + 2]; | |||
memcpy(buf, seed, SEEDBYTES); | |||
buf[SEEDBYTES] = (uint8_t)nonce; | |||
buf[SEEDBYTES + 1] = (uint8_t)(nonce >> 8); | |||
shake128_absorb(state, buf, SEEDBYTES + 2); | |||
} | |||
void PQCLEAN_DILITHIUM2_CLEAN_shake256_stream_init( | |||
shake256ctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce) { | |||
uint8_t buf[CRHBYTES + 2]; | |||
memcpy(buf, seed, CRHBYTES); | |||
buf[CRHBYTES] = (uint8_t)nonce; | |||
buf[CRHBYTES + 1] = (uint8_t)(nonce >> 8); | |||
shake256_absorb(state, buf, CRHBYTES + 2); | |||
} |
@@ -0,0 +1,15 @@ | |||
#ifndef PQCLEAN_DILITHIUM2_CLEAN_STREAM_H | |||
#define PQCLEAN_DILITHIUM2_CLEAN_STREAM_H | |||
#include <stdint.h> | |||
#include "fips202.h" | |||
#include "params.h" | |||
void PQCLEAN_DILITHIUM2_CLEAN_shake128_stream_init( | |||
shake128ctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce); | |||
void PQCLEAN_DILITHIUM2_CLEAN_shake256_stream_init( | |||
shake256ctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce); | |||
#endif |
@@ -1,32 +0,0 @@ | |||
#include "symmetric.h" | |||
#include "fips202.h" | |||
void PQCLEAN_DILITHIUM2_CLEAN_shake128_stream_init(shake128ctx *state, | |||
const unsigned char seed[SEEDBYTES], | |||
uint16_t nonce) { | |||
unsigned int i; | |||
unsigned char buf[SEEDBYTES + 2]; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
buf[i] = seed[i]; | |||
} | |||
buf[SEEDBYTES] = (uint8_t) nonce; | |||
buf[SEEDBYTES + 1] = (uint8_t) (nonce >> 8); | |||
shake128_absorb(state, buf, sizeof(buf)); | |||
} | |||
void PQCLEAN_DILITHIUM2_CLEAN_shake256_stream_init(shake256ctx *state, | |||
const unsigned char seed[CRHBYTES], | |||
uint16_t nonce) { | |||
unsigned int i; | |||
unsigned char buf[CRHBYTES + 2]; | |||
for (i = 0; i < CRHBYTES; ++i) { | |||
buf[i] = seed[i]; | |||
} | |||
buf[CRHBYTES] = (uint8_t) nonce; | |||
buf[CRHBYTES + 1] = (uint8_t) (nonce >> 8); | |||
shake256_absorb(state, buf, sizeof(buf)); | |||
} |
@@ -1,8 +1,11 @@ | |||
#ifndef SYMMETRIC_H | |||
#define SYMMETRIC_H | |||
#ifndef PQCLEAN_DILITHIUM2_CLEAN_SYMMETRIC_H | |||
#define PQCLEAN_DILITHIUM2_CLEAN_SYMMETRIC_H | |||
#include "fips202.h" | |||
#include "params.h" | |||
#include "stream.h" | |||
#include "fips202.h" | |||
#define crh(OUT, IN, INBYTES) shake256(OUT, CRHBYTES, IN, INBYTES) | |||
#define stream128_init(STATE, SEED, NONCE) PQCLEAN_DILITHIUM2_CLEAN_shake128_stream_init(STATE, SEED, NONCE) | |||
@@ -13,11 +16,8 @@ | |||
#define STREAM128_BLOCKBYTES SHAKE128_RATE | |||
#define STREAM256_BLOCKBYTES SHAKE256_RATE | |||
void PQCLEAN_DILITHIUM2_CLEAN_shake128_stream_init(shake128ctx *state, | |||
const unsigned char *seed, | |||
uint16_t nonce); | |||
void PQCLEAN_DILITHIUM2_CLEAN_shake256_stream_init(shake256ctx *state, | |||
const unsigned char *seed, | |||
uint16_t nonce); | |||
typedef shake128ctx stream128_state; | |||
typedef shake256ctx stream256_state; | |||
#endif |
@@ -17,4 +17,13 @@ auxiliary-submitters: | |||
- Damien Stehlé | |||
implementations: | |||
- name: clean | |||
version: https://github.com/pq-crystals/dilithium/commit/40f79645879b5c69835cd91d06945d7c24f39922 | |||
version: https://github.com/pq-crystals/dilithium/commit/c1b40fd599e71f65aa18be64dd6c3fc8e84b0c08 | |||
- name: avx2 | |||
version: https://github.com/pq-crystals/dilithium/commit/c1b40fd599e71f65aa18be64dd6c3fc8e84b0c08 | |||
supported_platforms: | |||
- architecture: x86_64 | |||
operating_systems: | |||
- Linux | |||
required_flags: | |||
- avx2 | |||
- bmi2 |
@@ -0,0 +1,6 @@ | |||
Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/) | |||
For Keccak and the random number generator | |||
we are using public-domain code from sources | |||
and by authors listed in comments on top of | |||
the respective files. |
@@ -0,0 +1,43 @@ | |||
# This Makefile can be used with GNU Make or BSD Make | |||
LIB=libdilithium3_avx2.a | |||
SOURCES = fips202x4.c invntt.s nttconsts.c ntt.s packing.c pointwise.S poly.c \ | |||
polyvec.c reduce.s rejsample.c rounding.c sign.c stream.c | |||
OBJECTS = fips202x4.o invntt.o nttconsts.o ntt.o packing.o pointwise.o poly.o \ | |||
polyvec.o reduce.o rejsample.o rounding.o sign.o stream.o | |||
HEADERS = alignment.h api.h params.h sign.h polyvec.h poly.h packing.h ntt.h \ | |||
nttconsts.h reduce.h rounding.h rejsample.h symmetric.h stream.h \ | |||
fips202x4.h shuffle.inc | |||
CFLAGS=-O3 -Wall -Wextra -Wpedantic -Wvla -Werror \ | |||
-Wmissing-prototypes -Wredundant-decls -std=c99 \ | |||
-Wcast-align \ | |||
-mavx2 -mbmi -mpopcnt -I../../../common $(EXTRAFLAGS) | |||
all: $(LIB) | |||
KECCAK4XDIR=../../../common/keccak4x | |||
KECCAK4XOBJ=KeccakP-1600-times4-SIMD256.o | |||
KECCAK4X=$(KECCAK4XDIR)/$(KECCAK4XOBJ) | |||
%.o: %.c $(HEADERS) | |||
$(CC) $(CFLAGS) -c -o $@ $< | |||
%.o: %.s $(HEADERS) | |||
$(AS) -o $@ $< | |||
%.o: %.S $(HEADERS) | |||
$(AS) -c -o $@ $< | |||
$(LIB): $(OBJECTS) $(KECCAK4X) | |||
$(AR) -r $@ $^ | |||
$(KECCAK4X): | |||
$(MAKE) -C $(KECCAK4XDIR) $(KECCAK4XOBJ) | |||
clean: | |||
$(RM) $(OBJECTS) | |||
$(RM) $(LIB) | |||
$(MAKE) -C $(KECCAK4XDIR) clean | |||
@@ -0,0 +1,22 @@ | |||
#ifndef PQCLEAN_DILITHIUM3_AVX2_ALIGNMENT_H | |||
#define PQCLEAN_DILITHIUM3_AVX2_ALIGNMENT_H | |||
#define ALIGNED_UINT8(N) \ | |||
union { \ | |||
uint32_t as_arr[N]; \ | |||
__m256i as_vec[(N)/32]; \ | |||
} | |||
#define ALIGNED_UINT32(N) \ | |||
union { \ | |||
uint32_t as_arr[N]; \ | |||
__m256i as_vec[(N)/8]; \ | |||
} | |||
#define ALIGNED_UINT64(N) \ | |||
union { \ | |||
uint64_t as_arr[N]; \ | |||
__m256i as_vec[(N)/8]; \ | |||
} | |||
#endif //PQCLEAN_DILITHIUM3_AVX2_ALIGNMENT_H |
@@ -0,0 +1,37 @@ | |||
#ifndef PQCLEAN_DILITHIUM3_AVX2_API_H | |||
#define PQCLEAN_DILITHIUM3_AVX2_API_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#define PQCLEAN_DILITHIUM3_AVX2_CRYPTO_PUBLICKEYBYTES 1472U | |||
#define PQCLEAN_DILITHIUM3_AVX2_CRYPTO_SECRETKEYBYTES 3504U | |||
#define PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES 2701U | |||
#define PQCLEAN_DILITHIUM3_AVX2_CRYPTO_ALGNAME "Dilithium3" | |||
int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_keypair( | |||
uint8_t *pk, uint8_t *sk); | |||
int PQCLEAN_DILITHIUM3_AVX2_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *msg, size_t len, | |||
const uint8_t *sk); | |||
int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, | |||
const uint8_t *pk); | |||
int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk); | |||
#endif |
@@ -0,0 +1,239 @@ | |||
#include <immintrin.h> | |||
#include <stdint.h> | |||
#include "fips202.h" | |||
#include "fips202x4.h" | |||
#include "params.h" | |||
#define NROUNDS 24 | |||
#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64 - (offset)))) | |||
static uint64_t load64(const uint8_t *x) { | |||
unsigned int i; | |||
uint64_t r = 0; | |||
for (i = 0; i < 8; ++i) { | |||
r |= (uint64_t)x[i] << 8 * i; | |||
} | |||
return r; | |||
} | |||
static void store64(uint8_t *x, uint64_t u) { | |||
unsigned int i; | |||
for (i = 0; i < 8; ++i) { | |||
x[i] = (uint8_t)(u >> 8 * i); | |||
} | |||
} | |||
/* Use implementation from the Keccak Code Package */ | |||
extern void KeccakP1600times4_PermuteAll_24rounds(__m256i *s); | |||
#define KeccakF1600_StatePermute4x KeccakP1600times4_PermuteAll_24rounds | |||
static void keccak_absorb4x(__m256i *s, | |||
unsigned int r, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen, | |||
uint8_t p) { | |||
unsigned long long i; | |||
uint8_t t0[200]; | |||
uint8_t t1[200]; | |||
uint8_t t2[200]; | |||
uint8_t t3[200]; | |||
uint64_t *ss = (uint64_t *)s; | |||
for (i = 0; i < 25; ++i) { | |||
s[i] = _mm256_xor_si256(s[i], s[i]); | |||
} | |||
while (mlen >= r) { | |||
for (i = 0; i < r / 8; ++i) { | |||
ss[4 * i + 0] ^= load64(m0 + 8 * i); | |||
ss[4 * i + 1] ^= load64(m1 + 8 * i); | |||
ss[4 * i + 2] ^= load64(m2 + 8 * i); | |||
ss[4 * i + 3] ^= load64(m3 + 8 * i); | |||
} | |||
KeccakF1600_StatePermute4x(s); | |||
mlen -= r; | |||
m0 += r; | |||
m1 += r; | |||
m2 += r; | |||
m3 += r; | |||
} | |||
for (i = 0; i < r; ++i) { | |||
t0[i] = 0; | |||
t1[i] = 0; | |||
t2[i] = 0; | |||
t3[i] = 0; | |||
} | |||
for (i = 0; i < mlen; ++i) { | |||
t0[i] = m0[i]; | |||
t1[i] = m1[i]; | |||
t2[i] = m2[i]; | |||
t3[i] = m3[i]; | |||
} | |||
t0[i] = p; | |||
t1[i] = p; | |||
t2[i] = p; | |||
t3[i] = p; | |||
t0[r - 1] |= 128; | |||
t1[r - 1] |= 128; | |||
t2[r - 1] |= 128; | |||
t3[r - 1] |= 128; | |||
for (i = 0; i < r / 8; ++i) { | |||
ss[4 * i + 0] ^= load64(t0 + 8 * i); | |||
ss[4 * i + 1] ^= load64(t1 + 8 * i); | |||
ss[4 * i + 2] ^= load64(t2 + 8 * i); | |||
ss[4 * i + 3] ^= load64(t3 + 8 * i); | |||
} | |||
} | |||
static void keccak_squeezeblocks4x(uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long nblocks, | |||
unsigned int r, | |||
__m256i *s) { | |||
unsigned int i; | |||
uint64_t *ss = (uint64_t *)s; | |||
while (nblocks > 0) { | |||
KeccakF1600_StatePermute4x(s); | |||
for (i = 0; i < r / 8; ++i) { | |||
store64(h0 + 8 * i, ss[4 * i + 0]); | |||
store64(h1 + 8 * i, ss[4 * i + 1]); | |||
store64(h2 + 8 * i, ss[4 * i + 2]); | |||
store64(h3 + 8 * i, ss[4 * i + 3]); | |||
} | |||
h0 += r; | |||
h1 += r; | |||
h2 += r; | |||
h3 += r; | |||
--nblocks; | |||
} | |||
} | |||
void PQCLEAN_DILITHIUM3_AVX2_shake128_absorb4x( | |||
__m256i *s, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen) { | |||
keccak_absorb4x(s, SHAKE128_RATE, m0, m1, m2, m3, mlen, 0x1F); | |||
} | |||
void PQCLEAN_DILITHIUM3_AVX2_shake128_squeezeblocks4x( | |||
uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long nblocks, | |||
__m256i *s) { | |||
keccak_squeezeblocks4x(h0, h1, h2, h3, nblocks, SHAKE128_RATE, s); | |||
} | |||
void PQCLEAN_DILITHIUM3_AVX2_shake256_absorb4x( | |||
__m256i *s, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen) { | |||
keccak_absorb4x(s, SHAKE256_RATE, m0, m1, m2, m3, mlen, 0x1F); | |||
} | |||
void PQCLEAN_DILITHIUM3_AVX2_shake256_squeezeblocks4x( | |||
uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long nblocks, | |||
__m256i *s) { | |||
keccak_squeezeblocks4x(h0, h1, h2, h3, nblocks, SHAKE256_RATE, s); | |||
} | |||
void PQCLEAN_DILITHIUM3_AVX2_shake128_4x( | |||
uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long long hlen, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen) { | |||
unsigned int i; | |||
unsigned long nblocks = hlen / SHAKE128_RATE; | |||
uint8_t t[4][SHAKE128_RATE]; | |||
__m256i s[25]; | |||
PQCLEAN_DILITHIUM3_AVX2_shake128_absorb4x(s, m0, m1, m2, m3, mlen); | |||
PQCLEAN_DILITHIUM3_AVX2_shake128_squeezeblocks4x(h0, h1, h2, h3, nblocks, s); | |||
h0 += nblocks * SHAKE128_RATE; | |||
h1 += nblocks * SHAKE128_RATE; | |||
h2 += nblocks * SHAKE128_RATE; | |||
h3 += nblocks * SHAKE128_RATE; | |||
hlen -= nblocks * SHAKE128_RATE; | |||
if (hlen) { | |||
PQCLEAN_DILITHIUM3_AVX2_shake128_squeezeblocks4x(t[0], t[1], t[2], t[3], 1, s); | |||
for (i = 0; i < hlen; ++i) { | |||
h0[i] = t[0][i]; | |||
h1[i] = t[1][i]; | |||
h2[i] = t[2][i]; | |||
h3[i] = t[3][i]; | |||
} | |||
} | |||
} | |||
void PQCLEAN_DILITHIUM3_AVX2_shake256_4x( | |||
uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long long hlen, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen) { | |||
unsigned int i; | |||
unsigned long nblocks = hlen / SHAKE256_RATE; | |||
uint8_t t[4][SHAKE256_RATE]; | |||
__m256i s[25]; | |||
PQCLEAN_DILITHIUM3_AVX2_shake256_absorb4x(s, m0, m1, m2, m3, mlen); | |||
PQCLEAN_DILITHIUM3_AVX2_shake256_squeezeblocks4x(h0, h1, h2, h3, nblocks, s); | |||
h0 += nblocks * SHAKE256_RATE; | |||
h1 += nblocks * SHAKE256_RATE; | |||
h2 += nblocks * SHAKE256_RATE; | |||
h3 += nblocks * SHAKE256_RATE; | |||
hlen -= nblocks * SHAKE256_RATE; | |||
if (hlen) { | |||
PQCLEAN_DILITHIUM3_AVX2_shake256_squeezeblocks4x(t[0], t[1], t[2], t[3], 1, s); | |||
for (i = 0; i < hlen; ++i) { | |||
h0[i] = t[0][i]; | |||
h1[i] = t[1][i]; | |||
h2[i] = t[2][i]; | |||
h3[i] = t[3][i]; | |||
} | |||
} | |||
} |
@@ -0,0 +1,65 @@ | |||
#ifndef PQCLEAN_DILITHIUM3_AVX2_FIPS202X4_H | |||
#define PQCLEAN_DILITHIUM3_AVX2_FIPS202X4_H | |||
#include <immintrin.h> | |||
#include <stdint.h> | |||
#include "params.h" | |||
void PQCLEAN_DILITHIUM3_AVX2_shake128_absorb4x( | |||
__m256i *s, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen); | |||
void PQCLEAN_DILITHIUM3_AVX2_shake128_squeezeblocks4x( | |||
uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long nblocks, | |||
__m256i *s); | |||
void PQCLEAN_DILITHIUM3_AVX2_shake256_absorb4x( | |||
__m256i *s, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen); | |||
void PQCLEAN_DILITHIUM3_AVX2_shake256_squeezeblocks4x( | |||
uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long nblocks, | |||
__m256i *s); | |||
void PQCLEAN_DILITHIUM3_AVX2_shake128_4x( | |||
uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long long hlen, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen); | |||
void PQCLEAN_DILITHIUM3_AVX2_shake256_4x( | |||
uint8_t *h0, | |||
uint8_t *h1, | |||
uint8_t *h2, | |||
uint8_t *h3, | |||
unsigned long long hlen, | |||
const uint8_t *m0, | |||
const uint8_t *m1, | |||
const uint8_t *m2, | |||
const uint8_t *m3, | |||
unsigned long long mlen); | |||
#endif |
@@ -0,0 +1,281 @@ | |||
.include "shuffle.inc" | |||
.macro butterfly l0,l1,l2,l3,h0,h1,h2,h3 z0=15,z1=3 | |||
vpaddd %ymm2,%ymm\l0,%ymm12 | |||
vpaddd %ymm2,%ymm\l1,%ymm13 | |||
vpaddd %ymm2,%ymm\l2,%ymm14 | |||
vpsubd %ymm\h0,%ymm12,%ymm12 | |||
vpsubd %ymm\h1,%ymm13,%ymm13 | |||
vpsubd %ymm\h2,%ymm14,%ymm14 | |||
vpmuludq %ymm\z0,%ymm12,%ymm12 | |||
vpmuludq %ymm\z0,%ymm13,%ymm13 | |||
vpaddd %ymm2,%ymm\l3,%ymm15 | |||
vpmuludq %ymm\z1,%ymm14,%ymm14 | |||
vpsubd %ymm\h3,%ymm15,%ymm15 | |||
vpaddd %ymm\l0,%ymm\h0,%ymm\l0 | |||
vpmuludq %ymm\z1,%ymm15,%ymm15 | |||
vpaddd %ymm\l1,%ymm\h1,%ymm\l1 | |||
vpaddd %ymm\l2,%ymm\h2,%ymm\l2 | |||
vpaddd %ymm\l3,%ymm\h3,%ymm\l3 | |||
vpmuludq %ymm0,%ymm12,%ymm\h0 | |||
vpmuludq %ymm0,%ymm13,%ymm\h1 | |||
vpmuludq %ymm0,%ymm14,%ymm\h2 | |||
vpmuludq %ymm0,%ymm15,%ymm\h3 | |||
vpmuludq %ymm1,%ymm\h0,%ymm\h0 | |||
vpmuludq %ymm1,%ymm\h1,%ymm\h1 | |||
vpmuludq %ymm1,%ymm\h2,%ymm\h2 | |||
vpmuludq %ymm1,%ymm\h3,%ymm\h3 | |||
vpaddq %ymm12,%ymm\h0,%ymm\h0 | |||
vpaddq %ymm13,%ymm\h1,%ymm\h1 | |||
vpaddq %ymm14,%ymm\h2,%ymm\h2 | |||
vpaddq %ymm15,%ymm\h3,%ymm\h3 | |||
vpsrlq $32,%ymm\h0,%ymm\h0 | |||
vpsrlq $32,%ymm\h1,%ymm\h1 | |||
vpsrlq $32,%ymm\h2,%ymm\h2 | |||
vpsrlq $32,%ymm\h3,%ymm\h3 | |||
.endm | |||
.global PQCLEAN_DILITHIUM3_AVX2_invntt_levels0t4_avx | |||
PQCLEAN_DILITHIUM3_AVX2_invntt_levels0t4_avx: | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xqinv(%rip),%ymm0 | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xq(%rip),%ymm1 | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8x256q(%rip),%ymm2 | |||
#load | |||
vmovdqa (%rsi),%ymm6 | |||
vmovdqa 32(%rsi),%ymm7 | |||
vmovdqa 64(%rsi),%ymm5 | |||
vmovdqa 96(%rsi),%ymm10 | |||
#reorder | |||
shuffle8 6,5,8,5 | |||
shuffle8 7,10,6,10 | |||
shuffle4 8,6,4,6 | |||
shuffle4 5,10,8,10 | |||
vpsrlq $32,%ymm4,%ymm5 | |||
vpsrlq $32,%ymm6,%ymm7 | |||
vpsrlq $32,%ymm8,%ymm9 | |||
vpsrlq $32,%ymm10,%ymm11 | |||
level0: | |||
vpmovzxdq (%rdx),%ymm3 | |||
vpmovzxdq 16(%rdx),%ymm15 | |||
vpaddd %ymm2,%ymm4,%ymm12 | |||
vpaddd %ymm2,%ymm6,%ymm13 | |||
vpaddd %ymm2,%ymm8,%ymm14 | |||
vpsubd %ymm5,%ymm12,%ymm12 | |||
vpsubd %ymm7,%ymm13,%ymm13 | |||
vpsubd %ymm9,%ymm14,%ymm14 | |||
vpmuludq %ymm3,%ymm12,%ymm12 | |||
vpmuludq %ymm15,%ymm13,%ymm13 | |||
vpaddd %ymm2,%ymm10,%ymm15 | |||
vpsubd %ymm11,%ymm15,%ymm15 | |||
vpaddd %ymm4,%ymm5,%ymm4 | |||
vpaddd %ymm6,%ymm7,%ymm6 | |||
vpmovzxdq 32(%rdx),%ymm5 | |||
vpmovzxdq 48(%rdx),%ymm7 | |||
vpmuludq %ymm5,%ymm14,%ymm14 | |||
vpmuludq %ymm7,%ymm15,%ymm15 | |||
vpaddd %ymm8,%ymm9,%ymm8 | |||
vpaddd %ymm10,%ymm11,%ymm10 | |||
vpmuludq %ymm0,%ymm12,%ymm5 | |||
vpmuludq %ymm0,%ymm13,%ymm7 | |||
vpmuludq %ymm0,%ymm14,%ymm9 | |||
vpmuludq %ymm0,%ymm15,%ymm11 | |||
vpmuludq %ymm1,%ymm5,%ymm5 | |||
vpmuludq %ymm1,%ymm7,%ymm7 | |||
vpmuludq %ymm1,%ymm9,%ymm9 | |||
vpmuludq %ymm1,%ymm11,%ymm11 | |||
vpaddq %ymm12,%ymm5,%ymm5 | |||
vpaddq %ymm13,%ymm7,%ymm7 | |||
vpaddq %ymm14,%ymm9,%ymm9 | |||
vpaddq %ymm15,%ymm11,%ymm11 | |||
vpsrlq $32,%ymm5,%ymm5 | |||
vpsrlq $32,%ymm7,%ymm7 | |||
vpsrlq $32,%ymm9,%ymm9 | |||
vpsrlq $32,%ymm11,%ymm11 | |||
level1: | |||
#PQCLEAN_DILITHIUM3_AVX2_zetas | |||
vpmovzxdq 64(%rdx),%ymm15 | |||
vpmovzxdq 80(%rdx),%ymm3 | |||
butterfly 4,5,8,9,6,7,10,11 | |||
level2: | |||
#PQCLEAN_DILITHIUM3_AVX2_zetas | |||
vpmovzxdq 96(%rdx),%ymm3 | |||
butterfly 4,5,6,7,8,9,10,11 3,3 | |||
#shuffle | |||
shuffle4 4,5,3,5 | |||
shuffle4 6,7,4,7 | |||
shuffle4 8,9,6,9 | |||
shuffle4 10,11,8,11 | |||
level3: | |||
#PQCLEAN_DILITHIUM3_AVX2_zetas | |||
vpbroadcastd 112(%rdx),%ymm14 | |||
vpbroadcastd 116(%rdx),%ymm15 | |||
vpblendd $0xF0,%ymm15,%ymm14,%ymm10 | |||
butterfly 3,4,6,8,5,7,9,11 10,10 | |||
#shuffle | |||
shuffle8 3,4,10,4 | |||
shuffle8 6,8,3,8 | |||
shuffle8 5,7,6,7 | |||
shuffle8 9,11,5,11 | |||
level4: | |||
#PQCLEAN_DILITHIUM3_AVX2_zetas | |||
vpbroadcastd 120(%rdx),%ymm9 | |||
butterfly 10,3,6,5,4,8,7,11 9,9 | |||
#store | |||
vmovdqa %ymm10,(%rdi) | |||
vmovdqa %ymm3,32(%rdi) | |||
vmovdqa %ymm6,64(%rdi) | |||
vmovdqa %ymm5,96(%rdi) | |||
vmovdqa %ymm4,128(%rdi) | |||
vmovdqa %ymm8,160(%rdi) | |||
vmovdqa %ymm7,192(%rdi) | |||
vmovdqa %ymm11,224(%rdi) | |||
ret | |||
.global PQCLEAN_DILITHIUM3_AVX2_invntt_levels5t7_avx | |||
PQCLEAN_DILITHIUM3_AVX2_invntt_levels5t7_avx: | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xqinv(%rip),%ymm0 | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xq(%rip),%ymm1 | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8x256q(%rip),%ymm2 | |||
#load | |||
vmovdqa (%rsi),%ymm4 | |||
vmovdqa 256(%rsi),%ymm5 | |||
vmovdqa 512(%rsi),%ymm6 | |||
vmovdqa 768(%rsi),%ymm7 | |||
vmovdqa 1024(%rsi),%ymm8 | |||
vmovdqa 1280(%rsi),%ymm9 | |||
vmovdqa 1536(%rsi),%ymm10 | |||
vmovdqa 1792(%rsi),%ymm11 | |||
level5: | |||
vpbroadcastd (%rdx),%ymm3 | |||
vpbroadcastd 4(%rdx),%ymm15 | |||
vpaddd %ymm2,%ymm4,%ymm12 | |||
vpaddd %ymm2,%ymm6,%ymm13 | |||
vpaddd %ymm2,%ymm8,%ymm14 | |||
vpsubd %ymm5,%ymm12,%ymm12 | |||
vpsubd %ymm7,%ymm13,%ymm13 | |||
vpsubd %ymm9,%ymm14,%ymm14 | |||
vpmuludq %ymm3,%ymm12,%ymm12 | |||
vpmuludq %ymm15,%ymm13,%ymm13 | |||
vpaddd %ymm2,%ymm10,%ymm15 | |||
vpsubd %ymm11,%ymm15,%ymm15 | |||
vpaddd %ymm4,%ymm5,%ymm4 | |||
vpaddd %ymm6,%ymm7,%ymm6 | |||
vpbroadcastd 8(%rdx),%ymm5 | |||
vpbroadcastd 12(%rdx),%ymm7 | |||
vpmuludq %ymm5,%ymm14,%ymm14 | |||
vpmuludq %ymm7,%ymm15,%ymm15 | |||
vpaddd %ymm8,%ymm9,%ymm8 | |||
vpaddd %ymm10,%ymm11,%ymm10 | |||
vpmuludq %ymm0,%ymm12,%ymm5 | |||
vpmuludq %ymm0,%ymm13,%ymm7 | |||
vpmuludq %ymm0,%ymm14,%ymm9 | |||
vpmuludq %ymm0,%ymm15,%ymm11 | |||
vpmuludq %ymm1,%ymm5,%ymm5 | |||
vpmuludq %ymm1,%ymm7,%ymm7 | |||
vpmuludq %ymm1,%ymm9,%ymm9 | |||
vpmuludq %ymm1,%ymm11,%ymm11 | |||
vpaddq %ymm12,%ymm5,%ymm5 | |||
vpaddq %ymm13,%ymm7,%ymm7 | |||
vpaddq %ymm14,%ymm9,%ymm9 | |||
vpaddq %ymm15,%ymm11,%ymm11 | |||
vpsrlq $32,%ymm5,%ymm5 | |||
vpsrlq $32,%ymm7,%ymm7 | |||
vpsrlq $32,%ymm9,%ymm9 | |||
vpsrlq $32,%ymm11,%ymm11 | |||
level6: | |||
#PQCLEAN_DILITHIUM3_AVX2_zetas | |||
vpbroadcastd 16(%rdx),%ymm15 | |||
vpbroadcastd 20(%rdx),%ymm3 | |||
butterfly 4,5,8,9,6,7,10,11 | |||
level7: | |||
#PQCLEAN_DILITHIUM3_AVX2_zetas | |||
vpbroadcastd 24(%rdx),%ymm3 | |||
butterfly 4,5,6,7,8,9,10,11 3,3 | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xdiv(%rip),%ymm3 | |||
vpmuludq %ymm3,%ymm4,%ymm4 | |||
vpmuludq %ymm3,%ymm5,%ymm5 | |||
vpmuludq %ymm3,%ymm6,%ymm6 | |||
vpmuludq %ymm3,%ymm7,%ymm7 | |||
vpmuludq %ymm0,%ymm4,%ymm12 | |||
vpmuludq %ymm0,%ymm5,%ymm13 | |||
vpmuludq %ymm0,%ymm6,%ymm14 | |||
vpmuludq %ymm0,%ymm7,%ymm15 | |||
vpmuludq %ymm1,%ymm12,%ymm12 | |||
vpmuludq %ymm1,%ymm13,%ymm13 | |||
vpmuludq %ymm1,%ymm14,%ymm14 | |||
vpmuludq %ymm1,%ymm15,%ymm15 | |||
vpaddq %ymm12,%ymm4,%ymm4 | |||
vpaddq %ymm13,%ymm5,%ymm5 | |||
vpaddq %ymm14,%ymm6,%ymm6 | |||
vpaddq %ymm15,%ymm7,%ymm7 | |||
vpsrlq $32,%ymm4,%ymm4 | |||
vpsrlq $32,%ymm5,%ymm5 | |||
vpsrlq $32,%ymm6,%ymm6 | |||
vpsrlq $32,%ymm7,%ymm7 | |||
#store | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_mask(%rip),%ymm3 | |||
vpermd %ymm4,%ymm3,%ymm4 | |||
vpermd %ymm5,%ymm3,%ymm5 | |||
vpermd %ymm6,%ymm3,%ymm6 | |||
vpermd %ymm7,%ymm3,%ymm7 | |||
vpermd %ymm8,%ymm3,%ymm8 | |||
vpermd %ymm9,%ymm3,%ymm9 | |||
vpermd %ymm10,%ymm3,%ymm10 | |||
vpermd %ymm11,%ymm3,%ymm11 | |||
vmovdqa %xmm4,(%rdi) | |||
vmovdqa %xmm5,128(%rdi) | |||
vmovdqa %xmm6,256(%rdi) | |||
vmovdqa %xmm7,384(%rdi) | |||
vmovdqa %xmm8,512(%rdi) | |||
vmovdqa %xmm9,640(%rdi) | |||
vmovdqa %xmm10,768(%rdi) | |||
vmovdqa %xmm11,896(%rdi) | |||
ret |
@@ -0,0 +1,26 @@ | |||
#ifndef NTT_H | |||
#define NTT_H | |||
#include <stdint.h> | |||
#include "nttconsts.h" | |||
#include "params.h" | |||
void PQCLEAN_DILITHIUM3_AVX2_ntt_levels0t2_avx(uint64_t *tmp, | |||
const uint32_t *a, | |||
const uint32_t *zetas); | |||
void PQCLEAN_DILITHIUM3_AVX2_ntt_levels3t8_avx(uint32_t *a, | |||
const uint64_t *tmp, | |||
const uint32_t *zetas); | |||
void PQCLEAN_DILITHIUM3_AVX2_invntt_levels0t4_avx(uint64_t *tmp, | |||
const uint32_t *a, | |||
const uint32_t *zetas_inv); | |||
void PQCLEAN_DILITHIUM3_AVX2_invntt_levels5t7_avx(uint32_t *a, | |||
const uint64_t *tmp, | |||
const uint32_t *zetas_inv); | |||
void PQCLEAN_DILITHIUM3_AVX2_pointwise_avx(uint32_t *c, const uint32_t *a, const uint32_t *b); | |||
void PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx(uint32_t *c, const uint32_t *a, const uint32_t *b); | |||
#endif |
@@ -0,0 +1,178 @@ | |||
.include "shuffle.inc" | |||
.macro butterfly rl0,rl1,rl2,rl3,rh0,rh1,rh2,rh3 z0=3,z1=3,z2=3,z3=3 | |||
#mul | |||
vpmuludq %ymm\z0,%ymm\rh0,%ymm\rh0 | |||
vpmuludq %ymm\z1,%ymm\rh1,%ymm\rh1 | |||
vpmuludq %ymm\z2,%ymm\rh2,%ymm\rh2 | |||
vpmuludq %ymm\z3,%ymm\rh3,%ymm\rh3 | |||
#reduce | |||
vpmuludq %ymm0,%ymm\rh0,%ymm12 | |||
vpmuludq %ymm0,%ymm\rh1,%ymm13 | |||
vpmuludq %ymm0,%ymm\rh2,%ymm14 | |||
vpmuludq %ymm0,%ymm\rh3,%ymm15 | |||
vpmuludq %ymm1,%ymm12,%ymm12 | |||
vpmuludq %ymm1,%ymm13,%ymm13 | |||
vpmuludq %ymm1,%ymm14,%ymm14 | |||
vpmuludq %ymm1,%ymm15,%ymm15 | |||
vpaddq %ymm\rh0,%ymm12,%ymm12 | |||
vpaddq %ymm\rh1,%ymm13,%ymm13 | |||
vpaddq %ymm\rh2,%ymm14,%ymm14 | |||
vpaddq %ymm\rh3,%ymm15,%ymm15 | |||
vpsrlq $32,%ymm12,%ymm12 | |||
vpsrlq $32,%ymm13,%ymm13 | |||
vpsrlq $32,%ymm14,%ymm14 | |||
vpsrlq $32,%ymm15,%ymm15 | |||
#update | |||
vpaddd %ymm2,%ymm\rl0,%ymm\rh0 | |||
vpaddd %ymm2,%ymm\rl1,%ymm\rh1 | |||
vpaddd %ymm2,%ymm\rl2,%ymm\rh2 | |||
vpaddd %ymm2,%ymm\rl3,%ymm\rh3 | |||
vpaddd %ymm12,%ymm\rl0,%ymm\rl0 | |||
vpaddd %ymm13,%ymm\rl1,%ymm\rl1 | |||
vpaddd %ymm14,%ymm\rl2,%ymm\rl2 | |||
vpaddd %ymm15,%ymm\rl3,%ymm\rl3 | |||
vpsubd %ymm12,%ymm\rh0,%ymm\rh0 | |||
vpsubd %ymm13,%ymm\rh1,%ymm\rh1 | |||
vpsubd %ymm14,%ymm\rh2,%ymm\rh2 | |||
vpsubd %ymm15,%ymm\rh3,%ymm\rh3 | |||
.endm | |||
.global PQCLEAN_DILITHIUM3_AVX2_ntt_levels0t2_avx | |||
PQCLEAN_DILITHIUM3_AVX2_ntt_levels0t2_avx: | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xqinv(%rip),%ymm0 | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xq(%rip),%ymm1 | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8x2q(%rip),%ymm2 | |||
level0: | |||
#PQCLEAN_DILITHIUM3_AVX2_zetas | |||
vpbroadcastd (%rdx),%ymm3 | |||
#load | |||
vpmovzxdq (%rsi),%ymm4 | |||
vpmovzxdq 128(%rsi),%ymm5 | |||
vpmovzxdq 256(%rsi),%ymm6 | |||
vpmovzxdq 384(%rsi),%ymm7 | |||
vpmovzxdq 512(%rsi),%ymm8 | |||
vpmovzxdq 640(%rsi),%ymm9 | |||
vpmovzxdq 768(%rsi),%ymm10 | |||
vpmovzxdq 896(%rsi),%ymm11 | |||
butterfly 4,5,6,7,8,9,10,11 | |||
level1: | |||
#PQCLEAN_DILITHIUM3_AVX2_zetas | |||
vpbroadcastd 4(%rdx),%ymm12 | |||
vpbroadcastd 8(%rdx),%ymm13 | |||
butterfly 4,5,8,9,6,7,10,11 12,12,13,13 | |||
level2: | |||
#PQCLEAN_DILITHIUM3_AVX2_zetas | |||
vpbroadcastd 12(%rdx),%ymm12 | |||
vpbroadcastd 16(%rdx),%ymm13 | |||
vpbroadcastd 20(%rdx),%ymm14 | |||
vpbroadcastd 24(%rdx),%ymm15 | |||
butterfly 4,6,8,10,5,7,9,11 12,13,14,15 | |||
#store | |||
vmovdqa %ymm4,(%rdi) | |||
vmovdqa %ymm5,256(%rdi) | |||
vmovdqa %ymm6,512(%rdi) | |||
vmovdqa %ymm7,768(%rdi) | |||
vmovdqa %ymm8,1024(%rdi) | |||
vmovdqa %ymm9,1280(%rdi) | |||
vmovdqa %ymm10,1536(%rdi) | |||
vmovdqa %ymm11,1792(%rdi) | |||
ret | |||
.global PQCLEAN_DILITHIUM3_AVX2_ntt_levels3t8_avx | |||
PQCLEAN_DILITHIUM3_AVX2_ntt_levels3t8_avx: | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xqinv(%rip),%ymm0 | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xq(%rip),%ymm1 | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8x2q(%rip),%ymm2 | |||
#load | |||
vmovdqa (%rsi),%ymm4 | |||
vmovdqa 32(%rsi),%ymm5 | |||
vmovdqa 64(%rsi),%ymm6 | |||
vmovdqa 96(%rsi),%ymm7 | |||
vmovdqa 128(%rsi),%ymm8 | |||
vmovdqa 160(%rsi),%ymm9 | |||
vmovdqa 192(%rsi),%ymm10 | |||
vmovdqa 224(%rsi),%ymm11 | |||
level3: | |||
#PQCLEAN_DILITHIUM3_AVX2_zetas | |||
vpbroadcastd (%rdx),%ymm3 | |||
butterfly 4,5,6,7,8,9,10,11 | |||
level4: | |||
#PQCLEAN_DILITHIUM3_AVX2_zetas | |||
vpbroadcastd 4(%rdx),%ymm12 | |||
vpbroadcastd 8(%rdx),%ymm13 | |||
vpblendd $0xF0,%ymm13,%ymm12,%ymm12 | |||
shuffle8 4,8,3,8 | |||
shuffle8 5,9,4,9 | |||
shuffle8 6,10,5,10 | |||
shuffle8 7,11,6,11 | |||
butterfly 3,8,4,9,5,10,6,11 12,12,12,12 | |||
level5: | |||
#PQCLEAN_DILITHIUM3_AVX2_zetas | |||
vpmovzxdq 12(%rdx),%ymm12 | |||
shuffle4 3,5,7,5 | |||
shuffle4 8,10,3,10 | |||
shuffle4 4,6,8,6 | |||
shuffle4 9,11,4,11 | |||
butterfly 7,5,3,10,8,6,4,11 12,12,12,12 | |||
level6: | |||
#PQCLEAN_DILITHIUM3_AVX2_zetas | |||
vpmovzxdq 28(%rdx),%ymm12 | |||
vpmovzxdq 44(%rdx),%ymm13 | |||
butterfly 7,5,8,6,3,10,4,11 12,12,13,13 | |||
level7: | |||
#PQCLEAN_DILITHIUM3_AVX2_zetas | |||
vpmovzxdq 60(%rdx),%ymm12 | |||
vpmovzxdq 76(%rdx),%ymm13 | |||
vpmovzxdq 92(%rdx),%ymm14 | |||
vpmovzxdq 108(%rdx),%ymm15 | |||
butterfly 7,3,8,4,5,10,6,11 12,13,14,15 | |||
#store | |||
vpsllq $32,%ymm5,%ymm5 | |||
vpsllq $32,%ymm10,%ymm10 | |||
vpsllq $32,%ymm6,%ymm6 | |||
vpsllq $32,%ymm11,%ymm11 | |||
vpblendd $0xAA,%ymm5,%ymm7,%ymm7 | |||
vpblendd $0xAA,%ymm10,%ymm3,%ymm3 | |||
vpblendd $0xAA,%ymm6,%ymm8,%ymm8 | |||
vpblendd $0xAA,%ymm11,%ymm4,%ymm4 | |||
shuffle4 7,3,5,3 | |||
shuffle4 8,4,7,4 | |||
shuffle8 5,7,6,7 | |||
shuffle8 3,4,5,4 | |||
vmovdqa %ymm6,(%rdi) | |||
vmovdqa %ymm5,32(%rdi) | |||
vmovdqa %ymm7,64(%rdi) | |||
vmovdqa %ymm4,96(%rdi) | |||
ret |
@@ -0,0 +1,80 @@ | |||
#include "nttconsts.h" | |||
#define QINV 4236238847 // -q^(-1) mod 2^32 | |||
#define MONT 4193792ULL | |||
#define DIV (((MONT*MONT % Q) * (Q-1) % Q) * ((Q-1) >> 8) % Q) | |||
const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8xqinv = {.as_arr = {QINV, QINV, QINV, QINV, QINV, QINV, QINV, QINV}}; | |||
const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8xq = {.as_arr = {Q, Q, Q, Q, Q, Q, Q, Q}}; | |||
const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8x2q = {.as_arr = {2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q, 2 * Q}}; | |||
const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8x256q = {.as_arr = {256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, 256 * Q, | |||
256 * Q | |||
} | |||
}; | |||
const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_mask = {.as_arr = {0, 2, 4, 6, 0, 0, 0, 0}}; | |||
const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8x23ones = {.as_arr = {0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, 0x7FFFFF, | |||
0x7FFFFF, 0x7FFFFF | |||
} | |||
}; | |||
const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8xdiv = { .as_arr = {DIV, DIV, DIV, DIV, DIV, DIV, DIV, DIV}}; | |||
#undef QINV | |||
#undef MONT | |||
#undef DIV | |||
const aligned_uint32xN_t PQCLEAN_DILITHIUM3_AVX2_zetas = { | |||
.as_arr = { | |||
0, 25847, 5771523, 7861508, 237124, 7602457, 7504169, 466468, 1826347, 2725464, 1024112, 2706023, 95776, | |||
3077325, 3530437, 4450022, 4702672, 6927966, 2176455, 6851714, 5339162, 3475950, 6795196, 2091667, | |||
5037939, 266997, 4860065, 3407706, 2244091, 2434439, 4621053, 2316500, 5933984, 7144689, 7183191, | |||
3817976, 4817955, 3513181, 5187039, 2353451, 7300517, 3585928, 6718724, 4788269, 5842901, 3915439, | |||
7122806, 4296819, 5190273, 4747489, 1939314, 7380215, 5223087, 126922, 900702, 495491, 7725090, 4823422, | |||
1859098, 6767243, 5257975, 7855319, 909542, 8337157, 2031748, 7611795, 819034, 7857917, 3207046, 4784579, | |||
8021166, 7830929, 7260833, 4519302, 5336701, 3574422, 5512770, 3412210, 2147896, 5412772, 7969390, | |||
7396998, 2715295, 4686924, 5903370, 342297, 3437287, 2842341, 4055324, 286988, 5038140, 2691481, 1247620, | |||
5942594, 1735879, 5790267, 2486353, 4108315, 203044, 1265009, 1595974, 6288512, 2619752, 6271868, | |||
3539968, 8079950, 2348700, 7841118, 7709315, 8357436, 7998430, 1852771, 7151892, 7072248, 1349076, | |||
6949987, 4613401, 5386378, 7047359, 7929317, 1250494, 1869119, 1237275, 1312455, 2635921, 1903435, | |||
5062207, 3306115, 4832145, 7329447, 6950192, 6417775, 3119733, 6262231, 4520680, 6681150, 6736599, | |||
3505694, 4558682, 5037034, 508951, 44288, 904516, 264944, 3097992, 7280319, 3958618, 7100756, 1500165, | |||
7838005, 5796124, 1917081, 777191, 5548557, 4656147, 5834105, 2235880, 6709241, 594136, 7005614, 3406031, | |||
6533464, 4603424, 5495562, 6980856, 5102745, 3507263, 6239768, 6779997, 3699596, 4656075, 1653064, | |||
2389356, 759969, 8371839, 5130689, 8169440, 7063561, 6366809, 1957272, 5196991, 810149, 2432395, 3369112, | |||
162844, 1652634, 2454455, 185531, 1616392, 4686184, 8215696, 7173032, 3014001, 6581310, 3111497, 1757237, | |||
8360995, 811944, 531354, 954230, 3881043, 189548, 3159746, 5971092, 1315589, 4827145, 6529015, 8202977, | |||
1341330, 5341501, 2213111, 7953734, 6712985, 3523897, 7404533, 1723600, 7276084, 3866901, 1717735, | |||
6577327, 8119771, 269760, 472078, 1910376, 4546524, 2680103, 4010497, 280005, 3900724, 5823537, 2071892, | |||
5582638, 1285669, 7567685, 5361315, 4751448, 6795489, 6940675, 4499357, 3839961, 5441381, 183443, | |||
7826001, 3937738, 6144432, 7403526, 3919660, 1400424, 7959518, 1612842, 8332111, 7534263, 6094090, | |||
4834730, 7018208, 1976782 | |||
} | |||
}; | |||
const aligned_uint32xN_t PQCLEAN_DILITHIUM3_AVX2_zetas_inv = { | |||
.as_arr = { | |||
6403635, 1362209, 3545687, 2286327, 846154, 48306, 6767575, 420899, 6979993, 4460757, 976891, 2235985, | |||
4442679, 554416, 8196974, 2939036, 4540456, 3881060, 1439742, 1584928, 3628969, 3019102, 812732, 7094748, | |||
2797779, 6308525, 2556880, 4479693, 8100412, 4369920, 5700314, 3833893, 6470041, 7908339, 8110657, 260646, | |||
1803090, 6662682, 4513516, 1104333, 6656817, 975884, 4856520, 1667432, 426683, 6167306, 3038916, 7039087, | |||
177440, 1851402, 3553272, 7064828, 2409325, 5220671, 8190869, 4499374, 7426187, 7849063, 7568473, 19422, | |||
6623180, 5268920, 1799107, 5366416, 1207385, 164721, 3694233, 6764025, 8194886, 5925962, 6727783, 8217573, | |||
5011305, 5948022, 7570268, 3183426, 6423145, 2013608, 1316856, 210977, 3249728, 8578, 7620448, 5991061, | |||
6727353, 3724342, 4680821, 1600420, 2140649, 4873154, 3277672, 1399561, 2884855, 3776993, 1846953, 4974386, | |||
1374803, 7786281, 1671176, 6144537, 2546312, 3724270, 2831860, 7603226, 6463336, 2584293, 542412, 6880252, | |||
1279661, 4421799, 1100098, 5282425, 8115473, 7475901, 8336129, 7871466, 3343383, 3821735, 4874723, 1643818, | |||
1699267, 3859737, 2118186, 5260684, 1962642, 1430225, 1050970, 3548272, 5074302, 3318210, 6476982, 5744496, | |||
7067962, 7143142, 6511298, 7129923, 451100, 1333058, 2994039, 3767016, 1430430, 7031341, 1308169, 1228525, | |||
6527646, 381987, 22981, 671102, 539299, 6031717, 300467, 4840449, 2108549, 5760665, 2091905, 6784443, | |||
7115408, 8177373, 4272102, 5894064, 2590150, 6644538, 2437823, 7132797, 5688936, 3342277, 8093429, 4325093, | |||
5538076, 4943130, 8038120, 2477047, 3693493, 5665122, 983419, 411027, 2967645, 6232521, 4968207, 2867647, | |||
4805995, 3043716, 3861115, 1119584, 549488, 359251, 3595838, 5173371, 522500, 7561383, 768622, 6348669, | |||
43260, 7470875, 525098, 3122442, 1613174, 6521319, 3556995, 655327, 7884926, 7479715, 8253495, 3157330, | |||
1000202, 6441103, 3632928, 3190144, 4083598, 1257611, 4464978, 2537516, 3592148, 1661693, 4794489, 1079900, | |||
6026966, 3193378, 4867236, 3562462, 4562441, 1197226, 1235728, 2446433, 6063917, 3759364, 5945978, 6136326, | |||
4972711, 3520352, 8113420, 3342478, 6288750, 1585221, 4904467, 3041255, 1528703, 6203962, 1452451, 3677745, | |||
3930395, 4849980, 5303092, 8284641, 5674394, 7356305, 5654953, 6554070, 7913949, 876248, 777960, 8143293, | |||
518909, 2608894, 3975713 | |||
} | |||
}; |
@@ -0,0 +1,27 @@ | |||
#ifndef PQCLEAN_DILITHIUM3_AVX2_NTTCONSTS_H | |||
#define PQCLEAN_DILITHIUM3_AVX2_NTTCONSTS_H | |||
#include <immintrin.h> | |||
#include <stdint.h> | |||
#include "alignment.h" | |||
#include "params.h" | |||
typedef ALIGNED_UINT32(8) aligned_uint32x8_t; | |||
typedef ALIGNED_UINT32(N) aligned_uint32xN_t; | |||
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8xqinv; | |||
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8xq; | |||
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8x2q; | |||
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8x256q; | |||
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_mask; | |||
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8x23ones; | |||
extern const aligned_uint32x8_t _PQCLEAN_DILITHIUM3_AVX2_8xdiv; | |||
extern const aligned_uint32xN_t PQCLEAN_DILITHIUM3_AVX2_zetas; | |||
extern const aligned_uint32xN_t PQCLEAN_DILITHIUM3_AVX2_zetas_inv; | |||
#endif //PQCLEAN_DILITHIUM3_AVX2_NTTCONSTS_H | |||
@@ -0,0 +1,305 @@ | |||
#include "packing.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "polyvec.h" | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_pack_pk | |||
* | |||
* Description: Bit-pack public key pk = (rho, t1). | |||
* | |||
* Arguments: - uint8_t pk[]: output byte array | |||
* - const uint8_t rho[]: byte array containing rho | |||
* - const polyveck *t1: pointer to vector t1 | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_pack_pk( | |||
uint8_t *pk, | |||
const uint8_t *rho, | |||
const polyveck *t1) { | |||
unsigned int i; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
pk[i] = rho[i]; | |||
} | |||
pk += SEEDBYTES; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_polyt1_pack(pk + i * POLT1_SIZE_PACKED, &t1->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_unpack_pk | |||
* | |||
* Description: Unpack public key pk = (rho, t1). | |||
* | |||
* Arguments: - const uint8_t rho[]: output byte array for rho | |||
* - const polyveck *t1: pointer to output vector t1 | |||
* - uint8_t pk[]: byte array containing bit-packed pk | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_unpack_pk( | |||
uint8_t *rho, | |||
polyveck *t1, | |||
const uint8_t *pk) { | |||
unsigned int i; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
rho[i] = pk[i]; | |||
} | |||
pk += SEEDBYTES; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_polyt1_unpack(&t1->vec[i], pk + i * POLT1_SIZE_PACKED); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_pack_sk | |||
* | |||
* Description: Bit-pack secret key sk = (rho, key, tr, s1, s2, t0). | |||
* | |||
* Arguments: - uint8_t sk[]: output byte array | |||
* - const uint8_t rho[]: byte array containing rho | |||
* - const uint8_t key[]: byte array containing key | |||
* - const uint8_t tr[]: byte array containing tr | |||
* - const polyvecl *s1: pointer to vector s1 | |||
* - const polyveck *s2: pointer to vector s2 | |||
* - const polyveck *t0: pointer to vector t0 | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_pack_sk( | |||
uint8_t *sk, | |||
const uint8_t *rho, | |||
const uint8_t *key, | |||
const uint8_t *tr, | |||
const polyvecl *s1, | |||
const polyveck *s2, | |||
const polyveck *t0) { | |||
unsigned int i; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
sk[i] = rho[i]; | |||
} | |||
sk += SEEDBYTES; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
sk[i] = key[i]; | |||
} | |||
sk += SEEDBYTES; | |||
for (i = 0; i < CRHBYTES; ++i) { | |||
sk[i] = tr[i]; | |||
} | |||
sk += CRHBYTES; | |||
for (i = 0; i < L; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_polyeta_pack(sk + i * POLETA_SIZE_PACKED, &s1->vec[i]); | |||
} | |||
sk += L * POLETA_SIZE_PACKED; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_polyeta_pack(sk + i * POLETA_SIZE_PACKED, &s2->vec[i]); | |||
} | |||
sk += K * POLETA_SIZE_PACKED; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_polyt0_pack(sk + i * POLT0_SIZE_PACKED, &t0->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_unpack_sk | |||
* | |||
* Description: Unpack secret key sk = (rho, key, tr, s1, s2, t0). | |||
* | |||
* Arguments: - const uint8_t rho[]: output byte array for rho | |||
* - const uint8_t key[]: output byte array for key | |||
* - const uint8_t tr[]: output byte array for tr | |||
* - const polyvecl *s1: pointer to output vector s1 | |||
* - const polyveck *s2: pointer to output vector s2 | |||
* - const polyveck *r0: pointer to output vector t0 | |||
* - uint8_t sk[]: byte array containing bit-packed sk | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_unpack_sk( | |||
uint8_t *rho, | |||
uint8_t *key, | |||
uint8_t *tr, | |||
polyvecl *s1, | |||
polyveck *s2, | |||
polyveck *t0, | |||
const uint8_t *sk) { | |||
unsigned int i; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
rho[i] = sk[i]; | |||
} | |||
sk += SEEDBYTES; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
key[i] = sk[i]; | |||
} | |||
sk += SEEDBYTES; | |||
for (i = 0; i < CRHBYTES; ++i) { | |||
tr[i] = sk[i]; | |||
} | |||
sk += CRHBYTES; | |||
for (i = 0; i < L; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_polyeta_unpack(&s1->vec[i], sk + i * POLETA_SIZE_PACKED); | |||
} | |||
sk += L * POLETA_SIZE_PACKED; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_polyeta_unpack(&s2->vec[i], sk + i * POLETA_SIZE_PACKED); | |||
} | |||
sk += K * POLETA_SIZE_PACKED; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_polyt0_unpack(&t0->vec[i], sk + i * POLT0_SIZE_PACKED); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_pack_sig | |||
* | |||
* Description: Bit-pack signature sig = (z, h, c). | |||
* | |||
* Arguments: - uint8_t sig[]: output byte array | |||
* - const polyvecl *z: pointer to vector z | |||
* - const polyveck *h: pointer to hint vector h | |||
* - const poly *c: pointer to PQCLEAN_DILITHIUM3_AVX2_challenge polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_pack_sig( | |||
uint8_t *sig, | |||
const polyvecl *z, | |||
const polyveck *h, | |||
const poly *c) { | |||
unsigned int i, j, k; | |||
uint64_t signs, mask; | |||
for (i = 0; i < L; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_polyz_pack(sig + i * POLZ_SIZE_PACKED, &z->vec[i]); | |||
} | |||
sig += L * POLZ_SIZE_PACKED; | |||
/* Encode h */ | |||
k = 0; | |||
for (i = 0; i < K; ++i) { | |||
for (j = 0; j < N; ++j) { | |||
if (h->vec[i].coeffs[j] != 0) { | |||
sig[k++] = (uint8_t)j; | |||
} | |||
} | |||
sig[OMEGA + i] = (uint8_t)k; | |||
} | |||
while (k < OMEGA) { | |||
sig[k++] = 0; | |||
} | |||
sig += OMEGA + K; | |||
/* Encode c */ | |||
signs = 0; | |||
mask = 1; | |||
for (i = 0; i < N / 8; ++i) { | |||
sig[i] = 0; | |||
for (j = 0; j < 8; ++j) { | |||
if (c->coeffs[8 * i + j] != 0) { | |||
sig[i] |= (uint8_t)(1u << j); | |||
if (c->coeffs[8 * i + j] == (Q - 1)) { | |||
signs |= mask; | |||
} | |||
mask <<= 1; | |||
} | |||
} | |||
} | |||
sig += N / 8; | |||
for (i = 0; i < 8; ++i) { | |||
sig[i] = (uint8_t)(signs >> 8u * i); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_unpack_sig | |||
* | |||
* Description: Unpack signature sig = (z, h, c). | |||
* | |||
* Arguments: - polyvecl *z: pointer to output vector z | |||
* - polyveck *h: pointer to output hint vector h | |||
* - poly *c: pointer to output PQCLEAN_DILITHIUM3_AVX2_challenge polynomial | |||
* - const uint8_t sig[]: byte array containing | |||
* bit-packed signature | |||
* | |||
* Returns 1 in case of malformed signature; otherwise 0. | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM3_AVX2_unpack_sig( | |||
polyvecl *z, | |||
polyveck *h, | |||
poly *c, | |||
const uint8_t *sig) { | |||
unsigned int i, j, k; | |||
uint64_t signs; | |||
for (i = 0; i < L; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_polyz_unpack(&z->vec[i], sig + i * POLZ_SIZE_PACKED); | |||
} | |||
sig += L * POLZ_SIZE_PACKED; | |||
/* Decode h */ | |||
k = 0; | |||
for (i = 0; i < K; ++i) { | |||
for (j = 0; j < N; ++j) { | |||
h->vec[i].coeffs[j] = 0; | |||
} | |||
if (sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) { | |||
return 1; | |||
} | |||
for (j = k; j < sig[OMEGA + i]; ++j) { | |||
/* Coefficients are ordered for strong unforgeability */ | |||
if (j > k && sig[j] <= sig[j - 1]) { | |||
return 1; | |||
} | |||
h->vec[i].coeffs[sig[j]] = 1; | |||
} | |||
k = sig[OMEGA + i]; | |||
} | |||
/* Extra indices are zero for strong unforgeability */ | |||
for (j = k; j < OMEGA; ++j) { | |||
if (sig[j]) { | |||
return 1; | |||
} | |||
} | |||
sig += OMEGA + K; | |||
/* Decode c */ | |||
for (i = 0; i < N; ++i) { | |||
c->coeffs[i] = 0; | |||
} | |||
signs = 0; | |||
for (i = 0; i < 8; ++i) { | |||
signs |= (uint64_t)sig[N / 8 + i] << 8 * i; | |||
} | |||
/* Extra sign bits are zero for strong unforgeability */ | |||
if (signs >> 60) { | |||
return 1; | |||
} | |||
for (i = 0; i < N / 8; ++i) { | |||
for (j = 0; j < 8; ++j) { | |||
if ((sig[i] >> j) & 0x01) { | |||
c->coeffs[8 * i + j] = 1; | |||
c->coeffs[8 * i + j] ^= -((int32_t) signs & 1) & (1 ^ (Q - 1)); | |||
signs >>= 1; | |||
} | |||
} | |||
} | |||
return 0; | |||
} |
@@ -0,0 +1,36 @@ | |||
#ifndef PQCLEAN_DILITHIUM3_AVX2_PACKING_H | |||
#define PQCLEAN_DILITHIUM3_AVX2_PACKING_H | |||
#include "params.h" | |||
#include "polyvec.h" | |||
void PQCLEAN_DILITHIUM3_AVX2_pack_pk( | |||
uint8_t *pk, | |||
const uint8_t *rho, const polyveck *t1); | |||
void PQCLEAN_DILITHIUM3_AVX2_pack_sk( | |||
uint8_t *sk, | |||
const uint8_t *rho, | |||
const uint8_t *key, | |||
const uint8_t *tr, | |||
const polyvecl *s1, | |||
const polyveck *s2, | |||
const polyveck *t0); | |||
void PQCLEAN_DILITHIUM3_AVX2_pack_sig( | |||
uint8_t *sig, | |||
const polyvecl *z, const polyveck *h, const poly *c); | |||
void PQCLEAN_DILITHIUM3_AVX2_unpack_pk( | |||
uint8_t *rho, polyveck *t1, | |||
const uint8_t *pk); | |||
void PQCLEAN_DILITHIUM3_AVX2_unpack_sk( | |||
uint8_t *rho, | |||
uint8_t *key, | |||
uint8_t *tr, | |||
polyvecl *s1, | |||
polyveck *s2, | |||
polyveck *t0, | |||
const uint8_t *sk); | |||
int PQCLEAN_DILITHIUM3_AVX2_unpack_sig( | |||
polyvecl *z, polyveck *h, poly *c, const uint8_t *sig); | |||
#endif |
@@ -0,0 +1,33 @@ | |||
#ifndef PQCLEAN_DILITHIUM3_AVX2_PARAMS_H | |||
#define PQCLEAN_DILITHIUM3_AVX2_PARAMS_H | |||
#define SEEDBYTES 32 | |||
#define CRHBYTES 48 | |||
#define N 256 | |||
#define Q 8380417 | |||
#define QBITS 23 | |||
#define D 14 | |||
#define GAMMA1 ((Q - 1)/16) | |||
#define GAMMA2 (GAMMA1/2) | |||
#define ALPHA (2*GAMMA2) | |||
#define K 5 | |||
#define L 4 | |||
#define ETA 5 | |||
#define SETABITS 4 | |||
#define BETA 275 | |||
#define OMEGA 96 | |||
#define POLT1_SIZE_PACKED ((N*(QBITS - D))/8) | |||
#define POLT0_SIZE_PACKED ((N*D)/8) | |||
#define POLETA_SIZE_PACKED ((N*SETABITS)/8) | |||
#define POLZ_SIZE_PACKED ((N*(QBITS - 3))/8) | |||
#define POLW1_SIZE_PACKED ((N*4)/8) | |||
#define CRYPTO_PUBLICKEYBYTES (SEEDBYTES + K*POLT1_SIZE_PACKED) | |||
#define CRYPTO_SECRETKEYBYTES (2*SEEDBYTES + (L + K)*POLETA_SIZE_PACKED + CRHBYTES + K*POLT0_SIZE_PACKED) | |||
#define CRYPTO_BYTES (L*POLZ_SIZE_PACKED + (OMEGA + K) + (N/8 + 8)) | |||
#endif |
@@ -0,0 +1,191 @@ | |||
#include "params.h" | |||
.global PQCLEAN_DILITHIUM3_AVX2_pointwise_avx | |||
PQCLEAN_DILITHIUM3_AVX2_pointwise_avx: | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xqinv(%rip),%ymm0 | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xq(%rip),%ymm1 | |||
xor %eax,%eax | |||
_looptop1: | |||
#load | |||
vmovdqa (%rsi),%ymm2 | |||
vmovdqa 32(%rsi),%ymm4 | |||
vmovdqa 64(%rsi),%ymm6 | |||
vmovdqa (%rdx),%ymm10 | |||
vmovdqa 32(%rdx),%ymm12 | |||
vmovdqa 64(%rdx),%ymm14 | |||
vpsrlq $32,%ymm2,%ymm3 | |||
vpsrlq $32,%ymm4,%ymm5 | |||
vpsrlq $32,%ymm6,%ymm7 | |||
vpsrlq $32,%ymm10,%ymm11 | |||
vpsrlq $32,%ymm12,%ymm13 | |||
vpsrlq $32,%ymm14,%ymm15 | |||
#mul | |||
vpmuludq %ymm2,%ymm10,%ymm2 | |||
vpmuludq %ymm3,%ymm11,%ymm3 | |||
vpmuludq %ymm4,%ymm12,%ymm4 | |||
vpmuludq %ymm5,%ymm13,%ymm5 | |||
vpmuludq %ymm6,%ymm14,%ymm6 | |||
vpmuludq %ymm7,%ymm15,%ymm7 | |||
#reduce | |||
vpmuludq %ymm0,%ymm2,%ymm10 | |||
vpmuludq %ymm0,%ymm3,%ymm11 | |||
vpmuludq %ymm0,%ymm4,%ymm12 | |||
vpmuludq %ymm0,%ymm5,%ymm13 | |||
vpmuludq %ymm0,%ymm6,%ymm14 | |||
vpmuludq %ymm0,%ymm7,%ymm15 | |||
vpmuludq %ymm1,%ymm10,%ymm10 | |||
vpmuludq %ymm1,%ymm11,%ymm11 | |||
vpmuludq %ymm1,%ymm12,%ymm12 | |||
vpmuludq %ymm1,%ymm13,%ymm13 | |||
vpmuludq %ymm1,%ymm14,%ymm14 | |||
vpmuludq %ymm1,%ymm15,%ymm15 | |||
vpaddq %ymm2,%ymm10,%ymm2 | |||
vpaddq %ymm3,%ymm11,%ymm3 | |||
vpaddq %ymm4,%ymm12,%ymm4 | |||
vpaddq %ymm5,%ymm13,%ymm5 | |||
vpaddq %ymm6,%ymm14,%ymm6 | |||
vpaddq %ymm7,%ymm15,%ymm7 | |||
vpsrlq $32,%ymm2,%ymm2 | |||
vpsrlq $32,%ymm4,%ymm4 | |||
vpsrlq $32,%ymm6,%ymm6 | |||
#store | |||
vpblendd $0xAA,%ymm3,%ymm2,%ymm2 | |||
vpblendd $0xAA,%ymm5,%ymm4,%ymm4 | |||
vpblendd $0xAA,%ymm7,%ymm6,%ymm6 | |||
vmovdqa %ymm2,(%rdi) | |||
vmovdqa %ymm4,32(%rdi) | |||
vmovdqa %ymm6,64(%rdi) | |||
add $96,%rdi | |||
add $96,%rsi | |||
add $96,%rdx | |||
add $1,%eax | |||
cmp $10,%eax | |||
jb _looptop1 | |||
vmovdqa (%rsi),%ymm2 | |||
vmovdqa 32(%rsi),%ymm4 | |||
vmovdqa (%rdx),%ymm10 | |||
vmovdqa 32(%rdx),%ymm12 | |||
vpsrlq $32,%ymm2,%ymm3 | |||
vpsrlq $32,%ymm4,%ymm5 | |||
vpsrlq $32,%ymm10,%ymm11 | |||
vpsrlq $32,%ymm12,%ymm13 | |||
#mul | |||
vpmuludq %ymm2,%ymm10,%ymm2 | |||
vpmuludq %ymm3,%ymm11,%ymm3 | |||
vpmuludq %ymm4,%ymm12,%ymm4 | |||
vpmuludq %ymm5,%ymm13,%ymm5 | |||
#reduce | |||
vpmuludq %ymm0,%ymm2,%ymm10 | |||
vpmuludq %ymm0,%ymm3,%ymm11 | |||
vpmuludq %ymm0,%ymm4,%ymm12 | |||
vpmuludq %ymm0,%ymm5,%ymm13 | |||
vpmuludq %ymm1,%ymm10,%ymm10 | |||
vpmuludq %ymm1,%ymm11,%ymm11 | |||
vpmuludq %ymm1,%ymm12,%ymm12 | |||
vpmuludq %ymm1,%ymm13,%ymm13 | |||
vpaddq %ymm2,%ymm10,%ymm2 | |||
vpaddq %ymm3,%ymm11,%ymm3 | |||
vpaddq %ymm4,%ymm12,%ymm4 | |||
vpaddq %ymm5,%ymm13,%ymm5 | |||
vpsrlq $32,%ymm2,%ymm2 | |||
vpsrlq $32,%ymm4,%ymm4 | |||
#store | |||
vpblendd $0x55,%ymm2,%ymm3,%ymm2 | |||
vpblendd $0x55,%ymm4,%ymm5,%ymm4 | |||
vmovdqa %ymm2,(%rdi) | |||
vmovdqa %ymm4,32(%rdi) | |||
ret | |||
.macro pointwise off | |||
#load | |||
vmovdqa \off(%rsi),%ymm6 | |||
vmovdqa \off+32(%rsi),%ymm8 | |||
vmovdqa \off(%rdx),%ymm10 | |||
vmovdqa \off+32(%rdx),%ymm12 | |||
vpsrlq $32,%ymm6,%ymm7 | |||
vpsrlq $32,%ymm8,%ymm9 | |||
vpsrlq $32,%ymm10,%ymm11 | |||
vpsrlq $32,%ymm12,%ymm13 | |||
#mul | |||
vpmuludq %ymm6,%ymm10,%ymm6 | |||
vpmuludq %ymm7,%ymm11,%ymm7 | |||
vpmuludq %ymm8,%ymm12,%ymm8 | |||
vpmuludq %ymm9,%ymm13,%ymm9 | |||
.endm | |||
.macro acc | |||
vpaddq %ymm6,%ymm2,%ymm2 | |||
vpaddq %ymm7,%ymm3,%ymm3 | |||
vpaddq %ymm8,%ymm4,%ymm4 | |||
vpaddq %ymm9,%ymm5,%ymm5 | |||
.endm | |||
.global PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx | |||
PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx: | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xqinv(%rip),%ymm0 | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xq(%rip),%ymm1 | |||
xor %eax,%eax | |||
_looptop2: | |||
pointwise 0 | |||
#mov | |||
vmovdqa %ymm6,%ymm2 | |||
vmovdqa %ymm7,%ymm3 | |||
vmovdqa %ymm8,%ymm4 | |||
vmovdqa %ymm9,%ymm5 | |||
pointwise 1024 | |||
acc | |||
pointwise 2048 | |||
acc | |||
pointwise 3072 | |||
acc | |||
#reduce | |||
vpmuludq %ymm0,%ymm2,%ymm6 | |||
vpmuludq %ymm0,%ymm3,%ymm7 | |||
vpmuludq %ymm0,%ymm4,%ymm8 | |||
vpmuludq %ymm0,%ymm5,%ymm9 | |||
vpmuludq %ymm1,%ymm6,%ymm6 | |||
vpmuludq %ymm1,%ymm7,%ymm7 | |||
vpmuludq %ymm1,%ymm8,%ymm8 | |||
vpmuludq %ymm1,%ymm9,%ymm9 | |||
vpaddq %ymm2,%ymm6,%ymm2 | |||
vpaddq %ymm3,%ymm7,%ymm3 | |||
vpaddq %ymm4,%ymm8,%ymm4 | |||
vpaddq %ymm5,%ymm9,%ymm5 | |||
vpsrlq $32,%ymm2,%ymm2 | |||
vpsrlq $32,%ymm4,%ymm4 | |||
#store | |||
vpblendd $0xAA,%ymm3,%ymm2,%ymm2 | |||
vpblendd $0xAA,%ymm5,%ymm4,%ymm4 | |||
vmovdqa %ymm2,(%rdi) | |||
vmovdqa %ymm4,32(%rdi) | |||
add $64,%rsi | |||
add $64,%rdx | |||
add $64,%rdi | |||
add $1,%eax | |||
cmp $16,%eax | |||
jb _looptop2 | |||
ret |
@@ -0,0 +1,914 @@ | |||
#include <immintrin.h> | |||
#include <stdint.h> | |||
#include "fips202x4.h" | |||
#include "ntt.h" | |||
#include "nttconsts.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "reduce.h" | |||
#include "rejsample.h" | |||
#include "rounding.h" | |||
#include "symmetric.h" | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_poly_reduce | |||
* | |||
* Description: Reduce all coefficients of input polynomial to representative | |||
* in [0,2*Q[. | |||
* | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_reduce(poly *a) { | |||
PQCLEAN_DILITHIUM3_AVX2_reduce_avx(a->coeffs); | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_poly_csubq | |||
* | |||
* Description: For all coefficients of input polynomial subtract Q if | |||
* coefficient is bigger than Q. | |||
* | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_csubq(poly *a) { | |||
PQCLEAN_DILITHIUM3_AVX2_csubq_avx(a->coeffs); | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_poly_freeze | |||
* | |||
* Description: Reduce all coefficients of the polynomial to standard | |||
* representatives. | |||
* | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_freeze(poly *a) { | |||
PQCLEAN_DILITHIUM3_AVX2_reduce_avx(a->coeffs); | |||
PQCLEAN_DILITHIUM3_AVX2_csubq_avx(a->coeffs); | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_poly_add | |||
* | |||
* Description: Add polynomials. No modular reduction is performed. | |||
* | |||
* Arguments: - poly *c: pointer to output polynomial | |||
* - const poly *a: pointer to first summand | |||
* - const poly *b: pointer to second summand | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_add(poly *c, const poly *a, const poly *b) { | |||
unsigned int i; | |||
__m256i vec0, vec1; | |||
for (i = 0; i < N / 8; i++) { | |||
vec0 = _mm256_load_si256(&a->coeffs_x8[i]); | |||
vec1 = _mm256_load_si256(&b->coeffs_x8[i]); | |||
vec0 = _mm256_add_epi32(vec0, vec1); | |||
_mm256_store_si256(&c->coeffs_x8[i], vec0); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_poly_sub | |||
* | |||
* Description: Subtract polynomials. Assumes coefficients of second input | |||
* polynomial to be less than 2*Q. No modular reduction is | |||
* performed. | |||
* | |||
* Arguments: - poly *c: pointer to output polynomial | |||
* - const poly *a: pointer to first input polynomial | |||
* - const poly *b: pointer to second input polynomial to be | |||
* subtraced from first input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_sub(poly *c, const poly *a, const poly *b) { | |||
unsigned int i; | |||
__m256i vec0, vec1; | |||
const __m256i twoq = _mm256_load_si256(_PQCLEAN_DILITHIUM3_AVX2_8x2q.as_vec); | |||
for (i = 0; i < N / 8; i++) { | |||
vec0 = _mm256_load_si256(&a->coeffs_x8[i]); | |||
vec1 = _mm256_load_si256(&b->coeffs_x8[i]); | |||
vec0 = _mm256_add_epi32(vec0, twoq); | |||
vec0 = _mm256_sub_epi32(vec0, vec1); | |||
_mm256_store_si256(&c->coeffs_x8[i], vec0); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_poly_shiftl | |||
* | |||
* Description: Multiply polynomial by 2^D without modular reduction. Assumes | |||
* input coefficients to be less than 2^{32-D}. | |||
* | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_shiftl(poly *a) { | |||
unsigned int i; | |||
__m256i vec; | |||
for (i = 0; i < N / 8; i++) { | |||
vec = _mm256_load_si256(&a->coeffs_x8[i]); | |||
vec = _mm256_slli_epi32(vec, D); | |||
_mm256_store_si256(&a->coeffs_x8[i], vec); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_poly_ntt | |||
* | |||
* Description: Forward NTT. Output coefficients can be up to 16*Q larger than | |||
* input coefficients. | |||
* | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_ntt(poly *a) { | |||
unsigned int i; | |||
ALIGNED_UINT64(N) tmp; | |||
for (i = 0; i < N / 32; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_ntt_levels0t2_avx(tmp.as_arr + 4 * i, a->coeffs + 4 * i, PQCLEAN_DILITHIUM3_AVX2_zetas.as_arr + 1); | |||
} | |||
for (i = 0; i < N / 32; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_ntt_levels3t8_avx(a->coeffs + 32 * i, tmp.as_arr + 32 * i, PQCLEAN_DILITHIUM3_AVX2_zetas.as_arr + 8 + 31 * i); | |||
} | |||
} | |||
/************************************************* | |||
* Name: poly_invntt_montgomery | |||
* | |||
* Description: Inverse NTT and multiplication with 2^{32}. Input coefficients | |||
* need to be less than 2*Q. Output coefficients are less than 2*Q. | |||
* | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_invntt_montgomery(poly *a) { | |||
unsigned int i; | |||
ALIGNED_UINT64(N) tmp; | |||
for (i = 0; i < N / 32; i++) { | |||
PQCLEAN_DILITHIUM3_AVX2_invntt_levels0t4_avx(tmp.as_arr + 32 * i, a->coeffs + 32 * i, PQCLEAN_DILITHIUM3_AVX2_zetas_inv.as_arr + 31 * i); | |||
} | |||
for (i = 0; i < N / 32; i++) { | |||
PQCLEAN_DILITHIUM3_AVX2_invntt_levels5t7_avx(a->coeffs + 4 * i, tmp.as_arr + 4 * i, PQCLEAN_DILITHIUM3_AVX2_zetas_inv.as_arr + 248); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_poly_pointwise_invmontgomery | |||
* | |||
* Description: Pointwise multiplication of polynomials in NTT domain | |||
* representation and multiplication of resulting polynomial | |||
* with 2^{-32}. Output coefficients are less than 2*Q if input | |||
* coefficient are less than 22*Q. | |||
* | |||
* Arguments: - poly *c: pointer to output polynomial | |||
* - const poly *a: pointer to first input polynomial | |||
* - const poly *b: pointer to second input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_pointwise_invmontgomery(poly *c, const poly *a, const poly *b) { | |||
PQCLEAN_DILITHIUM3_AVX2_pointwise_avx(c->coeffs, a->coeffs, b->coeffs); | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_poly_power2round | |||
* | |||
* Description: For all coefficients c of the input polynomial, | |||
* compute c0, c1 such that c mod Q = c1*2^D + c0 | |||
* with -2^{D-1} < c0 <= 2^{D-1}. Assumes coefficients to be | |||
* standard representatives. | |||
* | |||
* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 | |||
* - poly *a0: pointer to output polynomial with coefficients Q + a0 | |||
* - const poly *v: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_power2round(poly *restrict a1, | |||
poly *restrict a0, | |||
const poly *restrict a) { | |||
for (size_t i = 0; i < N; ++i) { | |||
a1->coeffs[i] = PQCLEAN_DILITHIUM3_AVX2_power2round(a->coeffs[i], &a0->coeffs[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_poly_decompose | |||
* | |||
* Description: For all coefficients c of the input polynomial, | |||
* compute high and low bits c0, c1 such c mod Q = c1*ALPHA + c0 | |||
* with -ALPHA/2 < c0 <= ALPHA/2 except c1 = (Q-1)/ALPHA where we | |||
* set c1 = 0 and -ALPHA/2 <= c0 = c mod Q - Q < 0. | |||
* Assumes coefficients to be standard representatives. | |||
* | |||
* Arguments: - poly *a1: pointer to output polynomial with coefficients c1 | |||
* - poly *a0: pointer to output polynomial with coefficients Q + a0 | |||
* - const poly *c: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_decompose( | |||
poly *restrict a1, | |||
poly *restrict a0, | |||
const poly *restrict a) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
a1->coeffs[i] = PQCLEAN_DILITHIUM3_AVX2_decompose(a->coeffs[i], &a0->coeffs[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_poly_make_hint | |||
* | |||
* Description: Compute hint polynomial. The coefficients of which indicate | |||
* whether the low bits of the corresponding coefficient of | |||
* the input polynomial overflow into the high bits. | |||
* | |||
* Arguments: - poly *h: pointer to output hint polynomial | |||
* - const poly *a0: pointer to low part of input polynomial | |||
* - const poly *a1: pointer to high part of input polynomial | |||
* | |||
* Returns number of 1 bits. | |||
**************************************************/ | |||
unsigned int PQCLEAN_DILITHIUM3_AVX2_poly_make_hint( | |||
poly *restrict h, | |||
const poly *restrict a0, | |||
const poly *restrict a1) { | |||
unsigned int i, s = 0; | |||
for (i = 0; i < N; ++i) { | |||
h->coeffs[i] = PQCLEAN_DILITHIUM3_AVX2_make_hint(a0->coeffs[i], a1->coeffs[i]); | |||
s += h->coeffs[i]; | |||
} | |||
return s; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_poly_use_hint | |||
* | |||
* Description: Use hint polynomial to correct the high bits of a polynomial. | |||
* | |||
* Arguments: - poly *a: pointer to output polynomial with corrected high bits | |||
* - const poly *b: pointer to input polynomial | |||
* - const poly *h: pointer to input hint polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_use_hint( | |||
poly *restrict a, | |||
const poly *restrict b, | |||
const poly *restrict h) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
a->coeffs[i] = PQCLEAN_DILITHIUM3_AVX2_use_hint(b->coeffs[i], h->coeffs[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_poly_chknorm | |||
* | |||
* Description: Check infinity norm of polynomial against given bound. | |||
* Assumes input coefficients to be standard representatives. | |||
* | |||
* Arguments: - const poly *a: pointer to polynomial | |||
* - uint32_t B: norm bound | |||
* | |||
* Returns 0 if norm is strictly smaller than B and 1 otherwise. | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM3_AVX2_poly_chknorm(const poly *a, uint32_t B) { | |||
unsigned int i; | |||
int32_t t; | |||
/* It is ok to leak which coefficient violates the bound since | |||
the probability for each coefficient is independent of secret | |||
data but we must not leak the sign of the centralized representative. */ | |||
for (i = 0; i < N; ++i) { | |||
/* Absolute value of centralized representative */ | |||
t = (Q - 1) / 2 - a->coeffs[i]; | |||
t ^= (t >> 31); | |||
t = (Q - 1) / 2 - t; | |||
if ((uint32_t)t >= B) { | |||
return 1; | |||
} | |||
} | |||
return 0; | |||
} | |||
/************************************************* | |||
* Name: rej_uniform_ref | |||
* | |||
* Description: Sample uniformly random coefficients in [0, Q-1] by | |||
* performing rejection sampling using array of random bytes. | |||
* | |||
* Arguments: - uint32_t *a: pointer to output array (allocated) | |||
* - unsigned int len: number of coefficients to be sampled | |||
* - const unsigned char *buf: array of random bytes | |||
* - unsigned int buflen: length of array of random bytes | |||
* | |||
* Returns number of sampled coefficients. Can be smaller than len if not enough | |||
* random bytes were given. | |||
**************************************************/ | |||
static unsigned int rej_uniform_ref(uint32_t *a, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
unsigned int buflen) { | |||
unsigned int ctr, pos; | |||
uint32_t t; | |||
ctr = pos = 0; | |||
while (ctr < len && pos + 3 <= buflen) { | |||
t = buf[pos++]; | |||
t |= (uint32_t)buf[pos++] << 8; | |||
t |= (uint32_t)buf[pos++] << 16; | |||
t &= 0x7FFFFF; | |||
if (t < Q) { | |||
a[ctr++] = t; | |||
} | |||
} | |||
return ctr; | |||
} | |||
/************************************************* | |||
* Name: poly_uniform | |||
* | |||
* Description: Sample polynomial with uniformly random coefficients | |||
* in [0,Q-1] by performing rejection sampling using the | |||
* output stream from SHAKE256(seed|nonce). | |||
* | |||
* Arguments: - poly *a: pointer to output polynomial | |||
* - const unsigned char seed[]: byte array with seed of length | |||
* SEEDBYTES | |||
* - uint16_t nonce: 2-byte nonce | |||
**************************************************/ | |||
#define POLY_UNIFORM_NBLOCKS ((769 + STREAM128_BLOCKBYTES) / STREAM128_BLOCKBYTES) | |||
#define POLY_UNIFORM_BUFLEN (POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES) | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_uniform(poly *a, | |||
const unsigned char seed[SEEDBYTES], | |||
uint16_t nonce) { | |||
unsigned int i, ctr, off; | |||
unsigned int nblocks = POLY_UNIFORM_NBLOCKS; | |||
unsigned int buflen = POLY_UNIFORM_BUFLEN; | |||
unsigned char buf[POLY_UNIFORM_BUFLEN + 2]; | |||
stream128_state state; | |||
stream128_init(&state, seed, nonce); | |||
stream128_squeezeblocks(buf, nblocks, &state); | |||
ctr = PQCLEAN_DILITHIUM3_AVX2_rej_uniform(a->coeffs, N, buf, buflen); | |||
while (ctr < N) { | |||
off = buflen % 3; | |||
for (i = 0; i < off; ++i) { | |||
buf[i] = buf[buflen - off + i]; | |||
} | |||
buflen = STREAM128_BLOCKBYTES + off; | |||
stream128_squeezeblocks(buf + off, 1, &state); | |||
ctr += rej_uniform_ref(a->coeffs + ctr, N - ctr, buf, buflen); | |||
} | |||
} | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_4x(poly *a0, | |||
poly *a1, | |||
poly *a2, | |||
poly *a3, | |||
const unsigned char seed[SEEDBYTES], | |||
uint16_t nonce0, | |||
uint16_t nonce1, | |||
uint16_t nonce2, | |||
uint16_t nonce3) { | |||
unsigned int i, ctr0, ctr1, ctr2, ctr3; | |||
unsigned char inbuf[4][SEEDBYTES + 2]; | |||
unsigned char outbuf[4][5 * SHAKE128_RATE]; | |||
__m256i state[25]; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
inbuf[0][i] = seed[i]; | |||
inbuf[1][i] = seed[i]; | |||
inbuf[2][i] = seed[i]; | |||
inbuf[3][i] = seed[i]; | |||
} | |||
inbuf[0][SEEDBYTES + 0] = nonce0; | |||
inbuf[0][SEEDBYTES + 1] = nonce0 >> 8; | |||
inbuf[1][SEEDBYTES + 0] = nonce1; | |||
inbuf[1][SEEDBYTES + 1] = nonce1 >> 8; | |||
inbuf[2][SEEDBYTES + 0] = nonce2; | |||
inbuf[2][SEEDBYTES + 1] = nonce2 >> 8; | |||
inbuf[3][SEEDBYTES + 0] = nonce3; | |||
inbuf[3][SEEDBYTES + 1] = nonce3 >> 8; | |||
PQCLEAN_DILITHIUM3_AVX2_shake128_absorb4x(state, inbuf[0], inbuf[1], inbuf[2], inbuf[3], | |||
SEEDBYTES + 2); | |||
PQCLEAN_DILITHIUM3_AVX2_shake128_squeezeblocks4x(outbuf[0], outbuf[1], outbuf[2], outbuf[3], 5, | |||
state); | |||
ctr0 = PQCLEAN_DILITHIUM3_AVX2_rej_uniform(a0->coeffs, N, outbuf[0], 5 * SHAKE128_RATE); | |||
ctr1 = PQCLEAN_DILITHIUM3_AVX2_rej_uniform(a1->coeffs, N, outbuf[1], 5 * SHAKE128_RATE); | |||
ctr2 = PQCLEAN_DILITHIUM3_AVX2_rej_uniform(a2->coeffs, N, outbuf[2], 5 * SHAKE128_RATE); | |||
ctr3 = PQCLEAN_DILITHIUM3_AVX2_rej_uniform(a3->coeffs, N, outbuf[3], 5 * SHAKE128_RATE); | |||
while (ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { | |||
PQCLEAN_DILITHIUM3_AVX2_shake128_squeezeblocks4x(outbuf[0], outbuf[1], outbuf[2], outbuf[3], 1, | |||
state); | |||
ctr0 += rej_uniform_ref(a0->coeffs + ctr0, N - ctr0, outbuf[0], | |||
SHAKE128_RATE); | |||
ctr1 += rej_uniform_ref(a1->coeffs + ctr1, N - ctr1, outbuf[1], | |||
SHAKE128_RATE); | |||
ctr2 += rej_uniform_ref(a2->coeffs + ctr2, N - ctr2, outbuf[2], | |||
SHAKE128_RATE); | |||
ctr3 += rej_uniform_ref(a3->coeffs + ctr3, N - ctr3, outbuf[3], | |||
SHAKE128_RATE); | |||
} | |||
} | |||
/************************************************* | |||
* Name: rej_eta | |||
* | |||
* Description: Sample uniformly random coefficients in [-ETA, ETA] by | |||
* performing rejection sampling using array of random bytes. | |||
* | |||
* Arguments: - uint32_t *a: pointer to output array (allocated) | |||
* - unsigned int len: number of coefficients to be sampled | |||
* - const unsigned char *buf: array of random bytes | |||
* - unsigned int buflen: length of array of random bytes | |||
* | |||
* Returns number of sampled coefficients. Can be smaller than len if not enough | |||
* random bytes were given. | |||
**************************************************/ | |||
static unsigned int rej_eta_ref(uint32_t *a, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
unsigned int buflen) { | |||
unsigned int ctr, pos; | |||
uint32_t t0, t1; | |||
ctr = pos = 0; | |||
while (ctr < len && pos < buflen) { | |||
t0 = buf[pos] & 0x0F; | |||
t1 = buf[pos++] >> 4; | |||
if (t0 <= 2 * ETA) { | |||
a[ctr++] = Q + ETA - t0; | |||
} | |||
if (t1 <= 2 * ETA && ctr < len) { | |||
a[ctr++] = Q + ETA - t1; | |||
} | |||
} | |||
return ctr; | |||
} | |||
/************************************************* | |||
* Name: poly_uniform_eta | |||
* | |||
* Description: Sample polynomial with uniformly random coefficients | |||
* in [-ETA,ETA] by performing rejection sampling using the | |||
* output stream from SHAKE256(seed|nonce). | |||
* | |||
* Arguments: - poly *a: pointer to output polynomial | |||
* - const unsigned char seed[]: byte array with seed of length | |||
* SEEDBYTES | |||
* - uint16_t nonce: 2-byte nonce | |||
**************************************************/ | |||
#define POLY_UNIFORM_ETA_NBLOCKS (((N / 2 * (1u << SETABITS)) / (2 * ETA + 1) + STREAM128_BLOCKBYTES) / STREAM128_BLOCKBYTES) | |||
#define POLY_UNIFORM_ETA_BUFLEN (POLY_UNIFORM_ETA_NBLOCKS*STREAM128_BLOCKBYTES) | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta(poly *a, | |||
const unsigned char seed[SEEDBYTES], | |||
uint16_t nonce) { | |||
unsigned int ctr; | |||
unsigned char buf[POLY_UNIFORM_ETA_BUFLEN]; | |||
stream128_state state; | |||
stream128_init(&state, seed, nonce); | |||
stream128_squeezeblocks(buf, POLY_UNIFORM_ETA_NBLOCKS, &state); | |||
ctr = PQCLEAN_DILITHIUM3_AVX2_rej_eta(a->coeffs, N, buf, POLY_UNIFORM_ETA_BUFLEN); | |||
while (ctr < N) { | |||
stream128_squeezeblocks(buf, 1, &state); | |||
ctr += rej_eta_ref(a->coeffs + ctr, N - ctr, buf, STREAM128_BLOCKBYTES); | |||
} | |||
} | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta_4x(poly *a0, | |||
poly *a1, | |||
poly *a2, | |||
poly *a3, | |||
const unsigned char seed[SEEDBYTES], | |||
uint16_t nonce0, | |||
uint16_t nonce1, | |||
uint16_t nonce2, | |||
uint16_t nonce3) { | |||
unsigned int i, ctr0, ctr1, ctr2, ctr3; | |||
unsigned char inbuf[4][SEEDBYTES + 2]; | |||
unsigned char outbuf[4][2 * SHAKE128_RATE]; | |||
__m256i state[25]; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
inbuf[0][i] = seed[i]; | |||
inbuf[1][i] = seed[i]; | |||
inbuf[2][i] = seed[i]; | |||
inbuf[3][i] = seed[i]; | |||
} | |||
inbuf[0][SEEDBYTES + 0] = nonce0; | |||
inbuf[0][SEEDBYTES + 1] = nonce0 >> 8; | |||
inbuf[1][SEEDBYTES + 0] = nonce1; | |||
inbuf[1][SEEDBYTES + 1] = nonce1 >> 8; | |||
inbuf[2][SEEDBYTES + 0] = nonce2; | |||
inbuf[2][SEEDBYTES + 1] = nonce2 >> 8; | |||
inbuf[3][SEEDBYTES + 0] = nonce3; | |||
inbuf[3][SEEDBYTES + 1] = nonce3 >> 8; | |||
PQCLEAN_DILITHIUM3_AVX2_shake128_absorb4x(state, inbuf[0], inbuf[1], inbuf[2], inbuf[3], | |||
SEEDBYTES + 2); | |||
PQCLEAN_DILITHIUM3_AVX2_shake128_squeezeblocks4x(outbuf[0], outbuf[1], outbuf[2], outbuf[3], 2, | |||
state); | |||
ctr0 = PQCLEAN_DILITHIUM3_AVX2_rej_eta(a0->coeffs, N, outbuf[0], 2 * SHAKE128_RATE); | |||
ctr1 = PQCLEAN_DILITHIUM3_AVX2_rej_eta(a1->coeffs, N, outbuf[1], 2 * SHAKE128_RATE); | |||
ctr2 = PQCLEAN_DILITHIUM3_AVX2_rej_eta(a2->coeffs, N, outbuf[2], 2 * SHAKE128_RATE); | |||
ctr3 = PQCLEAN_DILITHIUM3_AVX2_rej_eta(a3->coeffs, N, outbuf[3], 2 * SHAKE128_RATE); | |||
while (ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { | |||
PQCLEAN_DILITHIUM3_AVX2_shake128_squeezeblocks4x(outbuf[0], outbuf[1], outbuf[2], outbuf[3], 1, | |||
state); | |||
ctr0 += rej_eta_ref(a0->coeffs + ctr0, N - ctr0, outbuf[0], SHAKE128_RATE); | |||
ctr1 += rej_eta_ref(a1->coeffs + ctr1, N - ctr1, outbuf[1], SHAKE128_RATE); | |||
ctr2 += rej_eta_ref(a2->coeffs + ctr2, N - ctr2, outbuf[2], SHAKE128_RATE); | |||
ctr3 += rej_eta_ref(a3->coeffs + ctr3, N - ctr3, outbuf[3], SHAKE128_RATE); | |||
} | |||
} | |||
/************************************************* | |||
* Name: rej_gamma1m1_ref | |||
* | |||
* Description: Sample uniformly random coefficients | |||
* in [-(GAMMA1 - 1), GAMMA1 - 1] by performing rejection sampling | |||
* using array of random bytes. | |||
* | |||
* Arguments: - uint32_t *a: pointer to output array (allocated) | |||
* - unsigned int len: number of coefficients to be sampled | |||
* - const unsigned char *buf: array of random bytes | |||
* - unsigned int buflen: length of array of random bytes | |||
* | |||
* Returns number of sampled coefficients. Can be smaller than len if not enough | |||
* random bytes were given. | |||
**************************************************/ | |||
static unsigned int rej_gamma1m1_ref(uint32_t *a, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
unsigned int buflen) { | |||
unsigned int ctr, pos; | |||
uint32_t t0, t1; | |||
ctr = pos = 0; | |||
while (ctr < len && pos + 5 <= buflen) { | |||
t0 = buf[pos]; | |||
t0 |= (uint32_t)buf[pos + 1] << 8; | |||
t0 |= (uint32_t)buf[pos + 2] << 16; | |||
t0 &= 0xFFFFF; | |||
t1 = buf[pos + 2] >> 4; | |||
t1 |= (uint32_t)buf[pos + 3] << 4; | |||
t1 |= (uint32_t)buf[pos + 4] << 12; | |||
pos += 5; | |||
if (t0 <= 2 * GAMMA1 - 2) { | |||
a[ctr++] = Q + GAMMA1 - 1 - t0; | |||
} | |||
if (t1 <= 2 * GAMMA1 - 2 && ctr < len) { | |||
a[ctr++] = Q + GAMMA1 - 1 - t1; | |||
} | |||
} | |||
return ctr; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_poly_uniform_gamma1m1 | |||
* | |||
* Description: Sample polynomial with uniformly random coefficients | |||
* in [-(GAMMA1 - 1), GAMMA1 - 1] by performing rejection | |||
* sampling on output stream of SHAKE256(seed|nonce). | |||
* | |||
* Arguments: - poly *a: pointer to output polynomial | |||
* - const unsigned char seed[]: byte array with seed of length | |||
* CRHBYTES | |||
* - uint16_t nonce: 16-bit nonce | |||
**************************************************/ | |||
#define POLY_UNIFORM_GAMMA1M1_NBLOCKS ((641 + STREAM256_BLOCKBYTES) / STREAM256_BLOCKBYTES) | |||
#define POLY_UNIFORM_GAMMA1M1_BUFLEN (POLY_UNIFORM_GAMMA1M1_NBLOCKS * STREAM256_BLOCKBYTES) | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_gamma1m1(poly *a, | |||
const unsigned char seed[CRHBYTES], | |||
uint16_t nonce) { | |||
unsigned int i, ctr, off; | |||
unsigned int buflen = POLY_UNIFORM_GAMMA1M1_BUFLEN; | |||
unsigned char buf[POLY_UNIFORM_GAMMA1M1_BUFLEN + 4]; | |||
stream256_state state; | |||
stream256_init(&state, seed, nonce); | |||
stream256_squeezeblocks(buf, POLY_UNIFORM_GAMMA1M1_NBLOCKS, &state); | |||
ctr = PQCLEAN_DILITHIUM3_AVX2_rej_gamma1m1(a->coeffs, N, buf, POLY_UNIFORM_GAMMA1M1_BUFLEN); | |||
while (ctr < N) { | |||
off = buflen % 5; | |||
for (i = 0; i < off; ++i) { | |||
buf[i] = buf[buflen - off + i]; | |||
} | |||
buflen = STREAM256_BLOCKBYTES + off; | |||
stream256_squeezeblocks(buf + off, 1, &state); | |||
ctr += rej_gamma1m1_ref(a->coeffs + ctr, N - ctr, buf, buflen); | |||
} | |||
} | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_gamma1m1_4x(poly *a0, | |||
poly *a1, | |||
poly *a2, | |||
poly *a3, | |||
const unsigned char seed[CRHBYTES], | |||
uint16_t nonce0, | |||
uint16_t nonce1, | |||
uint16_t nonce2, | |||
uint16_t nonce3) { | |||
unsigned int i, ctr0, ctr1, ctr2, ctr3; | |||
unsigned char inbuf[4][CRHBYTES + 2]; | |||
unsigned char outbuf[4][5 * SHAKE256_RATE]; | |||
__m256i state[25]; | |||
for (i = 0; i < CRHBYTES; ++i) { | |||
inbuf[0][i] = seed[i]; | |||
inbuf[1][i] = seed[i]; | |||
inbuf[2][i] = seed[i]; | |||
inbuf[3][i] = seed[i]; | |||
} | |||
inbuf[0][CRHBYTES + 0] = nonce0 & 0xFF; | |||
inbuf[0][CRHBYTES + 1] = nonce0 >> 8; | |||
inbuf[1][CRHBYTES + 0] = nonce1 & 0xFF; | |||
inbuf[1][CRHBYTES + 1] = nonce1 >> 8; | |||
inbuf[2][CRHBYTES + 0] = nonce2 & 0xFF; | |||
inbuf[2][CRHBYTES + 1] = nonce2 >> 8; | |||
inbuf[3][CRHBYTES + 0] = nonce3 & 0xFF; | |||
inbuf[3][CRHBYTES + 1] = nonce3 >> 8; | |||
PQCLEAN_DILITHIUM3_AVX2_shake256_absorb4x(state, inbuf[0], inbuf[1], inbuf[2], inbuf[3], | |||
CRHBYTES + 2); | |||
PQCLEAN_DILITHIUM3_AVX2_shake256_squeezeblocks4x(outbuf[0], outbuf[1], outbuf[2], outbuf[3], 5, | |||
state); | |||
ctr0 = rej_gamma1m1_ref(a0->coeffs, N, outbuf[0], 5 * SHAKE256_RATE); | |||
ctr1 = rej_gamma1m1_ref(a1->coeffs, N, outbuf[1], 5 * SHAKE256_RATE); | |||
ctr2 = rej_gamma1m1_ref(a2->coeffs, N, outbuf[2], 5 * SHAKE256_RATE); | |||
ctr3 = rej_gamma1m1_ref(a3->coeffs, N, outbuf[3], 5 * SHAKE256_RATE); | |||
while (ctr0 < N || ctr1 < N || ctr2 < N || ctr3 < N) { | |||
PQCLEAN_DILITHIUM3_AVX2_shake256_squeezeblocks4x(outbuf[0], outbuf[1], outbuf[2], outbuf[3], 1, | |||
state); | |||
ctr0 += rej_gamma1m1_ref(a0->coeffs + ctr0, N - ctr0, outbuf[0], | |||
SHAKE256_RATE); | |||
ctr1 += rej_gamma1m1_ref(a1->coeffs + ctr1, N - ctr1, outbuf[1], | |||
SHAKE256_RATE); | |||
ctr2 += rej_gamma1m1_ref(a2->coeffs + ctr2, N - ctr2, outbuf[2], | |||
SHAKE256_RATE); | |||
ctr3 += rej_gamma1m1_ref(a3->coeffs + ctr3, N - ctr3, outbuf[3], | |||
SHAKE256_RATE); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyeta_pack | |||
* | |||
* Description: Bit-pack polynomial with coefficients in [-ETA,ETA]. | |||
* Input coefficients are assumed to lie in [Q-ETA,Q+ETA]. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* POLETA_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyeta_pack(unsigned char *restrict r, const poly *restrict a) { | |||
unsigned int i; | |||
unsigned char t[8]; | |||
for (i = 0; i < N / 2; ++i) { | |||
t[0] = Q + ETA - a->coeffs[2 * i + 0]; | |||
t[1] = Q + ETA - a->coeffs[2 * i + 1]; | |||
r[i] = t[0] | (t[1] << 4); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyeta_unpack | |||
* | |||
* Description: Unpack polynomial with coefficients in [-ETA,ETA]. | |||
* Output coefficients lie in [Q-ETA,Q+ETA]. | |||
* | |||
* Arguments: - poly *r: pointer to output polynomial | |||
* - const unsigned char *a: byte array with bit-packed polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyeta_unpack(poly *restrict r, const unsigned char *restrict a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 2; ++i) { | |||
r->coeffs[2 * i + 0] = a[i] & 0x0F; | |||
r->coeffs[2 * i + 1] = a[i] >> 4; | |||
r->coeffs[2 * i + 0] = Q + ETA - r->coeffs[2 * i + 0]; | |||
r->coeffs[2 * i + 1] = Q + ETA - r->coeffs[2 * i + 1]; | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyt1_pack | |||
* | |||
* Description: Bit-pack polynomial t1 with coefficients fitting in 9 bits. | |||
* Input coefficients are assumed to be standard representatives. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* POLT1_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyt1_pack(unsigned char *restrict r, const poly *restrict a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 8; ++i) { | |||
r[9 * i + 0] = (uint8_t)((a->coeffs[8 * i + 0] >> 0)); | |||
r[9 * i + 1] = (uint8_t)((a->coeffs[8 * i + 0] >> 8) | (a->coeffs[8 * i + 1] << 1)); | |||
r[9 * i + 2] = (uint8_t)((a->coeffs[8 * i + 1] >> 7) | (a->coeffs[8 * i + 2] << 2)); | |||
r[9 * i + 3] = (uint8_t)((a->coeffs[8 * i + 2] >> 6) | (a->coeffs[8 * i + 3] << 3)); | |||
r[9 * i + 4] = (uint8_t)((a->coeffs[8 * i + 3] >> 5) | (a->coeffs[8 * i + 4] << 4)); | |||
r[9 * i + 5] = (uint8_t)((a->coeffs[8 * i + 4] >> 4) | (a->coeffs[8 * i + 5] << 5)); | |||
r[9 * i + 6] = (uint8_t)((a->coeffs[8 * i + 5] >> 3) | (a->coeffs[8 * i + 6] << 6)); | |||
r[9 * i + 7] = (uint8_t)((a->coeffs[8 * i + 6] >> 2) | (a->coeffs[8 * i + 7] << 7)); | |||
r[9 * i + 8] = (uint8_t)((a->coeffs[8 * i + 7] >> 1)); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyt1_unpack | |||
* | |||
* Description: Unpack polynomial t1 with 9-bit coefficients. | |||
* Output coefficients are standard representatives. | |||
* | |||
* Arguments: - poly *r: pointer to output polynomial | |||
* - const unsigned char *a: byte array with bit-packed polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyt1_unpack(poly *restrict r, const unsigned char *restrict a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 8; ++i) { | |||
r->coeffs[8 * i + 0] = ((a[9 * i + 0] >> 0) | ((uint32_t)a[9 * i + 1] << 8)) & 0x1FF; | |||
r->coeffs[8 * i + 1] = ((a[9 * i + 1] >> 1) | ((uint32_t)a[9 * i + 2] << 7)) & 0x1FF; | |||
r->coeffs[8 * i + 2] = ((a[9 * i + 2] >> 2) | ((uint32_t)a[9 * i + 3] << 6)) & 0x1FF; | |||
r->coeffs[8 * i + 3] = ((a[9 * i + 3] >> 3) | ((uint32_t)a[9 * i + 4] << 5)) & 0x1FF; | |||
r->coeffs[8 * i + 4] = ((a[9 * i + 4] >> 4) | ((uint32_t)a[9 * i + 5] << 4)) & 0x1FF; | |||
r->coeffs[8 * i + 5] = ((a[9 * i + 5] >> 5) | ((uint32_t)a[9 * i + 6] << 3)) & 0x1FF; | |||
r->coeffs[8 * i + 6] = ((a[9 * i + 6] >> 6) | ((uint32_t)a[9 * i + 7] << 2)) & 0x1FF; | |||
r->coeffs[8 * i + 7] = ((a[9 * i + 7] >> 7) | ((uint32_t)a[9 * i + 8] << 1)) & 0x1FF; | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyt0_pack | |||
* | |||
* Description: Bit-pack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. | |||
* Input coefficients are assumed to lie in ]Q-2^{D-1}, Q+2^{D-1}]. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* POLT0_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyt0_pack(unsigned char *restrict r, const poly *restrict a) { | |||
unsigned int i; | |||
uint32_t t[4]; | |||
for (i = 0; i < N / 4; ++i) { | |||
t[0] = Q + (1U << (D - 1)) - a->coeffs[4 * i + 0]; | |||
t[1] = Q + (1U << (D - 1)) - a->coeffs[4 * i + 1]; | |||
t[2] = Q + (1U << (D - 1)) - a->coeffs[4 * i + 2]; | |||
t[3] = Q + (1U << (D - 1)) - a->coeffs[4 * i + 3]; | |||
r[7 * i + 0] = t[0]; | |||
r[7 * i + 1] = t[0] >> 8; | |||
r[7 * i + 1] |= t[1] << 6; | |||
r[7 * i + 2] = t[1] >> 2; | |||
r[7 * i + 3] = t[1] >> 10; | |||
r[7 * i + 3] |= t[2] << 4; | |||
r[7 * i + 4] = t[2] >> 4; | |||
r[7 * i + 5] = t[2] >> 12; | |||
r[7 * i + 5] |= t[3] << 2; | |||
r[7 * i + 6] = t[3] >> 6; | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyt0_unpack | |||
* | |||
* Description: Unpack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. | |||
* Output coefficients lie in ]Q-2^{D-1},Q+2^{D-1}]. | |||
* | |||
* Arguments: - poly *r: pointer to output polynomial | |||
* - const unsigned char *a: byte array with bit-packed polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyt0_unpack(poly *restrict r, const unsigned char *restrict a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 4; ++i) { | |||
r->coeffs[4 * i + 0] = a[7 * i + 0]; | |||
r->coeffs[4 * i + 0] |= (uint32_t)(a[7 * i + 1] & 0x3F) << 8; | |||
r->coeffs[4 * i + 1] = a[7 * i + 1] >> 6; | |||
r->coeffs[4 * i + 1] |= (uint32_t)a[7 * i + 2] << 2; | |||
r->coeffs[4 * i + 1] |= (uint32_t)(a[7 * i + 3] & 0x0F) << 10; | |||
r->coeffs[4 * i + 2] = a[7 * i + 3] >> 4; | |||
r->coeffs[4 * i + 2] |= (uint32_t)a[7 * i + 4] << 4; | |||
r->coeffs[4 * i + 2] |= (uint32_t)(a[7 * i + 5] & 0x03) << 12; | |||
r->coeffs[4 * i + 3] = a[7 * i + 5] >> 2; | |||
r->coeffs[4 * i + 3] |= (uint32_t)a[7 * i + 6] << 6; | |||
r->coeffs[4 * i + 0] = Q + (1U << (D - 1)) - r->coeffs[4 * i + 0]; | |||
r->coeffs[4 * i + 1] = Q + (1U << (D - 1)) - r->coeffs[4 * i + 1]; | |||
r->coeffs[4 * i + 2] = Q + (1U << (D - 1)) - r->coeffs[4 * i + 2]; | |||
r->coeffs[4 * i + 3] = Q + (1U << (D - 1)) - r->coeffs[4 * i + 3]; | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyz_pack | |||
* | |||
* Description: Bit-pack polynomial z with coefficients | |||
* in [-(GAMMA1 - 1), GAMMA1 - 1]. | |||
* Input coefficients are assumed to be standard representatives. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* POLZ_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyz_pack(unsigned char *restrict r, const poly *restrict a) { | |||
unsigned int i; | |||
uint32_t t[2]; | |||
for (i = 0; i < N / 2; ++i) { | |||
/* Map to {0,...,2*GAMMA1 - 2} */ | |||
t[0] = GAMMA1 - 1 - a->coeffs[2 * i + 0]; | |||
t[0] += ((int32_t)t[0] >> 31) & Q; | |||
t[1] = GAMMA1 - 1 - a->coeffs[2 * i + 1]; | |||
t[1] += ((int32_t)t[1] >> 31) & Q; | |||
r[5 * i + 0] = t[0]; | |||
r[5 * i + 1] = t[0] >> 8; | |||
r[5 * i + 2] = t[0] >> 16; | |||
r[5 * i + 2] |= t[1] << 4; | |||
r[5 * i + 3] = t[1] >> 4; | |||
r[5 * i + 4] = t[1] >> 12; | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyz_unpack | |||
* | |||
* Description: Unpack polynomial z with coefficients | |||
* in [-(GAMMA1 - 1), GAMMA1 - 1]. | |||
* Output coefficients are standard representatives. | |||
* | |||
* Arguments: - poly *r: pointer to output polynomial | |||
* - const unsigned char *a: byte array with bit-packed polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyz_unpack(poly *restrict r, const unsigned char *restrict a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 2; ++i) { | |||
r->coeffs[2 * i + 0] = a[5 * i + 0]; | |||
r->coeffs[2 * i + 0] |= (uint32_t)a[5 * i + 1] << 8; | |||
r->coeffs[2 * i + 0] |= (uint32_t)(a[5 * i + 2] & 0x0F) << 16; | |||
r->coeffs[2 * i + 1] = a[5 * i + 2] >> 4; | |||
r->coeffs[2 * i + 1] |= (uint32_t)a[5 * i + 3] << 4; | |||
r->coeffs[2 * i + 1] |= (uint32_t)a[5 * i + 4] << 12; | |||
r->coeffs[2 * i + 0] = GAMMA1 - 1 - r->coeffs[2 * i + 0]; | |||
r->coeffs[2 * i + 0] += ((int32_t)r->coeffs[2 * i + 0] >> 31) & Q; | |||
r->coeffs[2 * i + 1] = GAMMA1 - 1 - r->coeffs[2 * i + 1]; | |||
r->coeffs[2 * i + 1] += ((int32_t)r->coeffs[2 * i + 1] >> 31) & Q; | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyw1_pack | |||
* | |||
* Description: Bit-pack polynomial w1 with coefficients in [0, 15]. | |||
* Input coefficients are assumed to be standard representatives. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* POLW1_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyw1_pack(unsigned char *restrict r, const poly *restrict a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 2; ++i) { | |||
r[i] = a->coeffs[2 * i + 0] | (a->coeffs[2 * i + 1] << 4); | |||
} | |||
} |
@@ -0,0 +1,83 @@ | |||
#ifndef POLY_H | |||
#define POLY_H | |||
#include <immintrin.h> | |||
#include <stdint.h> | |||
#include "alignment.h" | |||
#include "params.h" | |||
typedef union { | |||
uint32_t coeffs[N]; | |||
__m256i coeffs_x8[N / 8]; | |||
} poly; | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_reduce(poly *a); | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_csubq(poly *a); | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_freeze(poly *a); | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_add(poly *c, const poly *a, const poly *b); | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_sub(poly *c, const poly *a, const poly *b); | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_shiftl(poly *a); | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_ntt(poly *a); | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_invntt_montgomery(poly *a); | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_pointwise_invmontgomery(poly *c, const poly *a, const poly *b); | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_power2round(poly *a1, poly *a0, const poly *a); | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_decompose(poly *a1, poly *a0, const poly *a); | |||
unsigned int PQCLEAN_DILITHIUM3_AVX2_poly_make_hint(poly *h, const poly *a0, const poly *a1); | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_use_hint(poly *a, const poly *b, const poly *h); | |||
int PQCLEAN_DILITHIUM3_AVX2_poly_chknorm(const poly *a, uint32_t B); | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_uniform(poly *a, | |||
const uint8_t *seed, | |||
uint16_t nonce); | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_4x(poly *a0, | |||
poly *a1, | |||
poly *a2, | |||
poly *a3, | |||
const uint8_t *seed, | |||
uint16_t nonce0, | |||
uint16_t nonce1, | |||
uint16_t nonce2, | |||
uint16_t nonce3); | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta(poly *a, | |||
const uint8_t *seed, | |||
uint16_t nonce); | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta_4x(poly *a0, | |||
poly *a1, | |||
poly *a2, | |||
poly *a3, | |||
const uint8_t *seed, | |||
uint16_t nonce0, | |||
uint16_t nonce1, | |||
uint16_t nonce2, | |||
uint16_t nonce3); | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_gamma1m1(poly *a, | |||
const uint8_t *seed, | |||
uint16_t nonce); | |||
void PQCLEAN_DILITHIUM3_AVX2_poly_uniform_gamma1m1_4x(poly *a0, | |||
poly *a1, | |||
poly *a2, | |||
poly *a3, | |||
const uint8_t *seed, | |||
uint16_t nonce0, | |||
uint16_t nonce1, | |||
uint16_t nonce2, | |||
uint16_t nonce3); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyeta_pack(uint8_t *r, const poly *a); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyeta_unpack(poly *r, const uint8_t *a); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyt1_pack(uint8_t *r, const poly *a); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyt1_unpack(poly *r, const uint8_t *a); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyt0_pack(uint8_t *r, const poly *a); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyt0_unpack(poly *r, const uint8_t *a); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyz_pack(uint8_t *r, const poly *a); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyz_unpack(poly *r, const uint8_t *a); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyw1_pack(uint8_t *r, const poly *a); | |||
#endif |
@@ -0,0 +1,353 @@ | |||
#include <stdint.h> | |||
#include "ntt.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "polyvec.h" | |||
/**************************************************************/ | |||
/************ Vectors of polynomials of length L **************/ | |||
/**************************************************************/ | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyvecl_freeze | |||
* | |||
* Description: Reduce coefficients of polynomials in vector of length L | |||
* to standard representatives. | |||
* | |||
* Arguments: - polyvecl *v: pointer to input/output vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyvecl_freeze(polyvecl *v) { | |||
unsigned int i; | |||
for (i = 0; i < L; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_freeze(&v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyvecl_add | |||
* | |||
* Description: Add vectors of polynomials of length L. | |||
* No modular reduction is performed. | |||
* | |||
* Arguments: - polyvecl *w: pointer to output vector | |||
* - const polyvecl *u: pointer to first summand | |||
* - const polyvecl *v: pointer to second summand | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v) { | |||
unsigned int i; | |||
for (i = 0; i < L; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyvecl_ntt | |||
* | |||
* Description: Forward NTT of all polynomials in vector of length L. Output | |||
* coefficients can be up to 16*Q larger than input coefficients. | |||
* | |||
* Arguments: - polyvecl *v: pointer to input/output vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyvecl_ntt(polyvecl *v) { | |||
unsigned int i; | |||
for (i = 0; i < L; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_ntt(&v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyvecl_pointwise_acc_invmontgomery | |||
* | |||
* Description: Pointwise multiply vectors of polynomials of length L, multiply | |||
* resulting vector by 2^{-32} and add (accumulate) polynomials | |||
* in it. Input/output vectors are in NTT domain representation. | |||
* Input coefficients are assumed to be less than 22*Q. Output | |||
* coeffcient are less than 2*L*Q. | |||
* | |||
* Arguments: - poly *w: output polynomial | |||
* - const polyvecl *u: pointer to first input vector | |||
* - const polyvecl *v: pointer to second input vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyvecl_pointwise_acc_invmontgomery(poly *w, | |||
const polyvecl *u, | |||
const polyvecl *v) { | |||
PQCLEAN_DILITHIUM3_AVX2_pointwise_acc_avx(w->coeffs, u->vec->coeffs, v->vec->coeffs); | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyvecl_chknorm | |||
* | |||
* Description: Check infinity norm of polynomials in vector of length L. | |||
* Assumes input coefficients to be standard representatives. | |||
* | |||
* Arguments: - const polyvecl *v: pointer to vector | |||
* - uint32_t B: norm bound | |||
* | |||
* Returns 0 if norm of all polynomials is strictly smaller than B and 1 | |||
* otherwise. | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM3_AVX2_polyvecl_chknorm(const polyvecl *v, uint32_t bound) { | |||
unsigned int i; | |||
for (i = 0; i < L; ++i) { | |||
if (PQCLEAN_DILITHIUM3_AVX2_poly_chknorm(&v->vec[i], bound)) { | |||
return 1; | |||
} | |||
} | |||
return 0; | |||
} | |||
/**************************************************************/ | |||
/************ Vectors of polynomials of length K **************/ | |||
/**************************************************************/ | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_reduce | |||
* | |||
* Description: Reduce coefficients of polynomials in vector of length K | |||
* to representatives in [0,2*Q[. | |||
* | |||
* Arguments: - polyveck *v: pointer to input/output vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_reduce(polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_reduce(&v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_csubq | |||
* | |||
* Description: For all coefficients of polynomials in vector of length K | |||
* subtract Q if coefficient is bigger than Q. | |||
* | |||
* Arguments: - polyveck *v: pointer to input/output vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_csubq(polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_csubq(&v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: polyveck_freeze | |||
* | |||
* Description: Reduce coefficients of polynomials in vector of length K | |||
* to standard representatives. | |||
* | |||
* Arguments: - polyveck *v: pointer to input/output vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_freeze(polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_freeze(&v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_add | |||
* | |||
* Description: Add vectors of polynomials of length K. | |||
* No modular reduction is performed. | |||
* | |||
* Arguments: - polyveck *w: pointer to output vector | |||
* - const polyveck *u: pointer to first summand | |||
* - const polyveck *v: pointer to second summand | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_add(polyveck *w, const polyveck *u, const polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_add(&w->vec[i], &u->vec[i], &v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_sub | |||
* | |||
* Description: Subtract vectors of polynomials of length K. | |||
* Assumes coefficients of polynomials in second input vector | |||
* to be less than 2*Q. No modular reduction is performed. | |||
* | |||
* Arguments: - polyveck *w: pointer to output vector | |||
* - const polyveck *u: pointer to first input vector | |||
* - const polyveck *v: pointer to second input vector to be | |||
* subtracted from first input vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_sub(&w->vec[i], &u->vec[i], &v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_shiftl | |||
* | |||
* Description: Multiply vector of polynomials of Length K by 2^D without modular | |||
* reduction. Assumes input coefficients to be less than 2^{32-D}. | |||
* | |||
* Arguments: - polyveck *v: pointer to input/output vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_shiftl(polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_shiftl(&v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_ntt | |||
* | |||
* Description: Forward NTT of all polynomials in vector of length K. Output | |||
* coefficients can be up to 16*Q larger than input coefficients. | |||
* | |||
* Arguments: - polyveck *v: pointer to input/output vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_ntt(polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_ntt(&v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_invntt_montgomery | |||
* | |||
* Description: Inverse NTT and multiplication by 2^{32} of polynomials | |||
* in vector of length K. Input coefficients need to be less | |||
* than 2*Q. | |||
* | |||
* Arguments: - polyveck *v: pointer to input/output vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_invntt_montgomery(polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_invntt_montgomery(&v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_chknorm | |||
* | |||
* Description: Check infinity norm of polynomials in vector of length K. | |||
* Assumes input coefficients to be standard representatives. | |||
* | |||
* Arguments: - const polyveck *v: pointer to vector | |||
* - uint32_t B: norm bound | |||
* | |||
* Returns 0 if norm of all polynomials are strictly smaller than B and 1 | |||
* otherwise. | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM3_AVX2_polyveck_chknorm(const polyveck *v, uint32_t bound) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
if (PQCLEAN_DILITHIUM3_AVX2_poly_chknorm(&v->vec[i], bound)) { | |||
return 1; | |||
} | |||
} | |||
return 0; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_power2round | |||
* | |||
* Description: For all coefficients a of polynomials in vector of length K, | |||
* compute a0, a1 such that a mod Q = a1*2^D + a0 | |||
* with -2^{D-1} < a0 <= 2^{D-1}. Assumes coefficients to be | |||
* standard representatives. | |||
* | |||
* Arguments: - polyveck *v1: pointer to output vector of polynomials with | |||
* coefficients a1 | |||
* - polyveck *v0: pointer to output vector of polynomials with | |||
* coefficients Q + a0 | |||
* - const polyveck *v: pointer to input vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_power2round(&v1->vec[i], &v0->vec[i], &v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_decompose | |||
* | |||
* Description: For all coefficients a of polynomials in vector of length K, | |||
* compute high and low bits a0, a1 such a mod Q = a1*ALPHA + a0 | |||
* with -ALPHA/2 < a0 <= ALPHA/2 except a1 = (Q-1)/ALPHA where we | |||
* set a1 = 0 and -ALPHA/2 <= a0 = a mod Q - Q < 0. | |||
* Assumes coefficients to be standard representatives. | |||
* | |||
* Arguments: - polyveck *v1: pointer to output vector of polynomials with | |||
* coefficients a1 | |||
* - polyveck *v0: pointer to output vector of polynomials with | |||
* coefficients Q + a0 | |||
* - const polyveck *v: pointer to input vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_decompose(&v1->vec[i], &v0->vec[i], &v->vec[i]); | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_polyveck_make_hint | |||
* | |||
* Description: Compute hint vector. | |||
* | |||
* Arguments: - polyveck *h: pointer to output vector | |||
* - const polyveck *v0: pointer to low part of input vector | |||
* - const polyveck *v1: pointer to high part of input vector | |||
* | |||
* Returns number of 1 bits. | |||
**************************************************/ | |||
unsigned int PQCLEAN_DILITHIUM3_AVX2_polyveck_make_hint(polyveck *h, | |||
const polyveck *v0, | |||
const polyveck *v1) { | |||
unsigned int i, s = 0; | |||
for (i = 0; i < K; ++i) { | |||
s += PQCLEAN_DILITHIUM3_AVX2_poly_make_hint(&h->vec[i], &v0->vec[i], &v1->vec[i]); | |||
} | |||
return s; | |||
} | |||
/************************************************* | |||
* Name: polyveck_use_hint | |||
* | |||
* Description: Use hint vector to correct the high bits of input vector. | |||
* | |||
* Arguments: - polyveck *w: pointer to output vector of polynomials with | |||
* corrected high bits | |||
* - const polyveck *v: pointer to input vector | |||
* - const polyveck *h: pointer to input hint vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_use_hint(polyveck *w, const polyveck *v, const polyveck *h) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_use_hint(&w->vec[i], &v->vec[i], &h->vec[i]); | |||
} | |||
} |
@@ -0,0 +1,52 @@ | |||
#ifndef PQCLEAN_DILITHIUM3_AVX2_POLYVEC_H | |||
#define PQCLEAN_DILITHIUM3_AVX2_POLYVEC_H | |||
#include <stdint.h> | |||
#include "params.h" | |||
#include "poly.h" | |||
/* Vectors of polynomials of length L */ | |||
typedef struct { | |||
poly vec[L]; | |||
} polyvecl; | |||
void PQCLEAN_DILITHIUM3_AVX2_polyvecl_freeze(polyvecl *v); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyvecl_add(polyvecl *w, const polyvecl *u, const polyvecl *v); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyvecl_ntt(polyvecl *v); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyvecl_pointwise_acc_invmontgomery(poly *w, | |||
const polyvecl *u, | |||
const polyvecl *v); | |||
int PQCLEAN_DILITHIUM3_AVX2_polyvecl_chknorm(const polyvecl *v, uint32_t B); | |||
/* Vectors of polynomials of length K */ | |||
typedef struct { | |||
poly vec[K]; | |||
} polyveck; | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_reduce(polyveck *v); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_csubq(polyveck *v); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_freeze(polyveck *v); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_add(polyveck *w, const polyveck *u, const polyveck *v); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_sub(polyveck *w, const polyveck *u, const polyveck *v); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_shiftl(polyveck *v); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_ntt(polyveck *v); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_invntt_montgomery(polyveck *v); | |||
int PQCLEAN_DILITHIUM3_AVX2_polyveck_chknorm(const polyveck *v, uint32_t B); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_power2round(polyveck *v1, polyveck *v0, const polyveck *v); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_decompose(polyveck *v1, polyveck *v0, const polyveck *v); | |||
unsigned int PQCLEAN_DILITHIUM3_AVX2_polyveck_make_hint(polyveck *h, | |||
const polyveck *v0, | |||
const polyveck *v1); | |||
void PQCLEAN_DILITHIUM3_AVX2_polyveck_use_hint(polyveck *w, const polyveck *v, const polyveck *h); | |||
#endif |
@@ -0,0 +1,9 @@ | |||
#ifndef REDUCE_H | |||
#define REDUCE_H | |||
#include <stdint.h> | |||
void PQCLEAN_DILITHIUM3_AVX2_reduce_avx(uint32_t a[N]); | |||
void PQCLEAN_DILITHIUM3_AVX2_csubq_avx(uint32_t a[N]); | |||
#endif |
@@ -0,0 +1,91 @@ | |||
.global PQCLEAN_DILITHIUM3_AVX2_reduce_avx | |||
PQCLEAN_DILITHIUM3_AVX2_reduce_avx: | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8x23ones(%rip),%ymm0 | |||
xor %eax,%eax | |||
_looptop_rdc32: | |||
#load | |||
vmovdqa (%rdi),%ymm1 | |||
vmovdqa 32(%rdi),%ymm3 | |||
vmovdqa 64(%rdi),%ymm5 | |||
vmovdqa 96(%rdi),%ymm7 | |||
#reduce | |||
vpsrld $23,%ymm1,%ymm2 | |||
vpsrld $23,%ymm3,%ymm4 | |||
vpsrld $23,%ymm5,%ymm6 | |||
vpsrld $23,%ymm7,%ymm8 | |||
vpand %ymm0,%ymm1,%ymm1 | |||
vpand %ymm0,%ymm3,%ymm3 | |||
vpand %ymm0,%ymm5,%ymm5 | |||
vpand %ymm0,%ymm7,%ymm7 | |||
vpsubd %ymm2,%ymm1,%ymm1 | |||
vpsubd %ymm4,%ymm3,%ymm3 | |||
vpsubd %ymm6,%ymm5,%ymm5 | |||
vpsubd %ymm8,%ymm7,%ymm7 | |||
vpslld $13,%ymm2,%ymm2 | |||
vpslld $13,%ymm4,%ymm4 | |||
vpslld $13,%ymm6,%ymm6 | |||
vpslld $13,%ymm8,%ymm8 | |||
vpaddd %ymm2,%ymm1,%ymm1 | |||
vpaddd %ymm4,%ymm3,%ymm3 | |||
vpaddd %ymm6,%ymm5,%ymm5 | |||
vpaddd %ymm8,%ymm7,%ymm7 | |||
#store | |||
vmovdqa %ymm1,(%rdi) | |||
vmovdqa %ymm3,32(%rdi) | |||
vmovdqa %ymm5,64(%rdi) | |||
vmovdqa %ymm7,96(%rdi) | |||
add $128,%rdi | |||
add $1,%eax | |||
cmp $8,%eax | |||
jb _looptop_rdc32 | |||
ret | |||
.global PQCLEAN_DILITHIUM3_AVX2_csubq_avx | |||
PQCLEAN_DILITHIUM3_AVX2_csubq_avx: | |||
#consts | |||
vmovdqa _PQCLEAN_DILITHIUM3_AVX2_8xq(%rip),%ymm0 | |||
xor %eax,%eax | |||
_looptop_csubq: | |||
#load | |||
vmovdqa (%rdi),%ymm1 | |||
vmovdqa 32(%rdi),%ymm3 | |||
vmovdqa 64(%rdi),%ymm5 | |||
vmovdqa 96(%rdi),%ymm7 | |||
#PQCLEAN_DILITHIUM3_AVX2_csubq | |||
vpsubd %ymm0,%ymm1,%ymm1 | |||
vpsubd %ymm0,%ymm3,%ymm3 | |||
vpsubd %ymm0,%ymm5,%ymm5 | |||
vpsubd %ymm0,%ymm7,%ymm7 | |||
vpsrad $31,%ymm1,%ymm2 | |||
vpsrad $31,%ymm3,%ymm4 | |||
vpsrad $31,%ymm5,%ymm6 | |||
vpsrad $31,%ymm7,%ymm8 | |||
vpand %ymm0,%ymm2,%ymm2 | |||
vpand %ymm0,%ymm4,%ymm4 | |||
vpand %ymm0,%ymm6,%ymm6 | |||
vpand %ymm0,%ymm8,%ymm8 | |||
vpaddd %ymm2,%ymm1,%ymm1 | |||
vpaddd %ymm4,%ymm3,%ymm3 | |||
vpaddd %ymm6,%ymm5,%ymm5 | |||
vpaddd %ymm8,%ymm7,%ymm7 | |||
#store | |||
vmovdqa %ymm1,(%rdi) | |||
vmovdqa %ymm3,32(%rdi) | |||
vmovdqa %ymm5,64(%rdi) | |||
vmovdqa %ymm7,96(%rdi) | |||
add $128,%rdi | |||
add $1,%eax | |||
cmp $8,%eax | |||
jb _looptop_csubq | |||
ret |
@@ -0,0 +1,443 @@ | |||
#include <immintrin.h> | |||
#include <stdint.h> | |||
#include "params.h" | |||
#include "rejsample.h" | |||
static const uint8_t idx[256][8] = { | |||
{ 0, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 1, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 0, 0, 0, 0, 0, 0}, | |||
{ 2, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 0, 0, 0, 0, 0, 0}, | |||
{ 1, 2, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 0, 0, 0, 0, 0}, | |||
{ 3, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 0, 0, 0, 0, 0, 0}, | |||
{ 1, 3, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 0, 0, 0, 0, 0}, | |||
{ 2, 3, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 0, 0, 0, 0, 0}, | |||
{ 1, 2, 3, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 0, 0, 0, 0}, | |||
{ 4, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 4, 0, 0, 0, 0, 0, 0}, | |||
{ 1, 4, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 4, 0, 0, 0, 0, 0}, | |||
{ 2, 4, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 4, 0, 0, 0, 0, 0}, | |||
{ 1, 2, 4, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 4, 0, 0, 0, 0}, | |||
{ 3, 4, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 4, 0, 0, 0, 0, 0}, | |||
{ 1, 3, 4, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 4, 0, 0, 0, 0}, | |||
{ 2, 3, 4, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 4, 0, 0, 0, 0}, | |||
{ 1, 2, 3, 4, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 4, 0, 0, 0}, | |||
{ 5, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 5, 0, 0, 0, 0, 0, 0}, | |||
{ 1, 5, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 5, 0, 0, 0, 0, 0}, | |||
{ 2, 5, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 5, 0, 0, 0, 0, 0}, | |||
{ 1, 2, 5, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 5, 0, 0, 0, 0}, | |||
{ 3, 5, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 5, 0, 0, 0, 0, 0}, | |||
{ 1, 3, 5, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 5, 0, 0, 0, 0}, | |||
{ 2, 3, 5, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 5, 0, 0, 0, 0}, | |||
{ 1, 2, 3, 5, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 5, 0, 0, 0}, | |||
{ 4, 5, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 4, 5, 0, 0, 0, 0, 0}, | |||
{ 1, 4, 5, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 4, 5, 0, 0, 0, 0}, | |||
{ 2, 4, 5, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 4, 5, 0, 0, 0, 0}, | |||
{ 1, 2, 4, 5, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 4, 5, 0, 0, 0}, | |||
{ 3, 4, 5, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 4, 5, 0, 0, 0, 0}, | |||
{ 1, 3, 4, 5, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 4, 5, 0, 0, 0}, | |||
{ 2, 3, 4, 5, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 4, 5, 0, 0, 0}, | |||
{ 1, 2, 3, 4, 5, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 4, 5, 0, 0}, | |||
{ 6, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 6, 0, 0, 0, 0, 0, 0}, | |||
{ 1, 6, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 6, 0, 0, 0, 0, 0}, | |||
{ 2, 6, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 6, 0, 0, 0, 0, 0}, | |||
{ 1, 2, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 6, 0, 0, 0, 0}, | |||
{ 3, 6, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 6, 0, 0, 0, 0, 0}, | |||
{ 1, 3, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 6, 0, 0, 0, 0}, | |||
{ 2, 3, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 6, 0, 0, 0, 0}, | |||
{ 1, 2, 3, 6, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 6, 0, 0, 0}, | |||
{ 4, 6, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 4, 6, 0, 0, 0, 0, 0}, | |||
{ 1, 4, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 4, 6, 0, 0, 0, 0}, | |||
{ 2, 4, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 4, 6, 0, 0, 0, 0}, | |||
{ 1, 2, 4, 6, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 4, 6, 0, 0, 0}, | |||
{ 3, 4, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 4, 6, 0, 0, 0, 0}, | |||
{ 1, 3, 4, 6, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 4, 6, 0, 0, 0}, | |||
{ 2, 3, 4, 6, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 4, 6, 0, 0, 0}, | |||
{ 1, 2, 3, 4, 6, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 4, 6, 0, 0}, | |||
{ 5, 6, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 5, 6, 0, 0, 0, 0, 0}, | |||
{ 1, 5, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 5, 6, 0, 0, 0, 0}, | |||
{ 2, 5, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 5, 6, 0, 0, 0, 0}, | |||
{ 1, 2, 5, 6, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 5, 6, 0, 0, 0}, | |||
{ 3, 5, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 5, 6, 0, 0, 0, 0}, | |||
{ 1, 3, 5, 6, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 5, 6, 0, 0, 0}, | |||
{ 2, 3, 5, 6, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 5, 6, 0, 0, 0}, | |||
{ 1, 2, 3, 5, 6, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 5, 6, 0, 0}, | |||
{ 4, 5, 6, 0, 0, 0, 0, 0}, | |||
{ 0, 4, 5, 6, 0, 0, 0, 0}, | |||
{ 1, 4, 5, 6, 0, 0, 0, 0}, | |||
{ 0, 1, 4, 5, 6, 0, 0, 0}, | |||
{ 2, 4, 5, 6, 0, 0, 0, 0}, | |||
{ 0, 2, 4, 5, 6, 0, 0, 0}, | |||
{ 1, 2, 4, 5, 6, 0, 0, 0}, | |||
{ 0, 1, 2, 4, 5, 6, 0, 0}, | |||
{ 3, 4, 5, 6, 0, 0, 0, 0}, | |||
{ 0, 3, 4, 5, 6, 0, 0, 0}, | |||
{ 1, 3, 4, 5, 6, 0, 0, 0}, | |||
{ 0, 1, 3, 4, 5, 6, 0, 0}, | |||
{ 2, 3, 4, 5, 6, 0, 0, 0}, | |||
{ 0, 2, 3, 4, 5, 6, 0, 0}, | |||
{ 1, 2, 3, 4, 5, 6, 0, 0}, | |||
{ 0, 1, 2, 3, 4, 5, 6, 0}, | |||
{ 7, 0, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 7, 0, 0, 0, 0, 0, 0}, | |||
{ 1, 7, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 7, 0, 0, 0, 0, 0}, | |||
{ 2, 7, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 7, 0, 0, 0, 0, 0}, | |||
{ 1, 2, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 7, 0, 0, 0, 0}, | |||
{ 3, 7, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 7, 0, 0, 0, 0, 0}, | |||
{ 1, 3, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 7, 0, 0, 0, 0}, | |||
{ 2, 3, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 7, 0, 0, 0, 0}, | |||
{ 1, 2, 3, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 7, 0, 0, 0}, | |||
{ 4, 7, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 4, 7, 0, 0, 0, 0, 0}, | |||
{ 1, 4, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 4, 7, 0, 0, 0, 0}, | |||
{ 2, 4, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 4, 7, 0, 0, 0, 0}, | |||
{ 1, 2, 4, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 4, 7, 0, 0, 0}, | |||
{ 3, 4, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 4, 7, 0, 0, 0, 0}, | |||
{ 1, 3, 4, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 4, 7, 0, 0, 0}, | |||
{ 2, 3, 4, 7, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 4, 7, 0, 0, 0}, | |||
{ 1, 2, 3, 4, 7, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 4, 7, 0, 0}, | |||
{ 5, 7, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 5, 7, 0, 0, 0, 0, 0}, | |||
{ 1, 5, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 5, 7, 0, 0, 0, 0}, | |||
{ 2, 5, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 5, 7, 0, 0, 0, 0}, | |||
{ 1, 2, 5, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 5, 7, 0, 0, 0}, | |||
{ 3, 5, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 5, 7, 0, 0, 0, 0}, | |||
{ 1, 3, 5, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 5, 7, 0, 0, 0}, | |||
{ 2, 3, 5, 7, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 5, 7, 0, 0, 0}, | |||
{ 1, 2, 3, 5, 7, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 5, 7, 0, 0}, | |||
{ 4, 5, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 4, 5, 7, 0, 0, 0, 0}, | |||
{ 1, 4, 5, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 4, 5, 7, 0, 0, 0}, | |||
{ 2, 4, 5, 7, 0, 0, 0, 0}, | |||
{ 0, 2, 4, 5, 7, 0, 0, 0}, | |||
{ 1, 2, 4, 5, 7, 0, 0, 0}, | |||
{ 0, 1, 2, 4, 5, 7, 0, 0}, | |||
{ 3, 4, 5, 7, 0, 0, 0, 0}, | |||
{ 0, 3, 4, 5, 7, 0, 0, 0}, | |||
{ 1, 3, 4, 5, 7, 0, 0, 0}, | |||
{ 0, 1, 3, 4, 5, 7, 0, 0}, | |||
{ 2, 3, 4, 5, 7, 0, 0, 0}, | |||
{ 0, 2, 3, 4, 5, 7, 0, 0}, | |||
{ 1, 2, 3, 4, 5, 7, 0, 0}, | |||
{ 0, 1, 2, 3, 4, 5, 7, 0}, | |||
{ 6, 7, 0, 0, 0, 0, 0, 0}, | |||
{ 0, 6, 7, 0, 0, 0, 0, 0}, | |||
{ 1, 6, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 1, 6, 7, 0, 0, 0, 0}, | |||
{ 2, 6, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 2, 6, 7, 0, 0, 0, 0}, | |||
{ 1, 2, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 2, 6, 7, 0, 0, 0}, | |||
{ 3, 6, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 3, 6, 7, 0, 0, 0, 0}, | |||
{ 1, 3, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 3, 6, 7, 0, 0, 0}, | |||
{ 2, 3, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 2, 3, 6, 7, 0, 0, 0}, | |||
{ 1, 2, 3, 6, 7, 0, 0, 0}, | |||
{ 0, 1, 2, 3, 6, 7, 0, 0}, | |||
{ 4, 6, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 4, 6, 7, 0, 0, 0, 0}, | |||
{ 1, 4, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 4, 6, 7, 0, 0, 0}, | |||
{ 2, 4, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 2, 4, 6, 7, 0, 0, 0}, | |||
{ 1, 2, 4, 6, 7, 0, 0, 0}, | |||
{ 0, 1, 2, 4, 6, 7, 0, 0}, | |||
{ 3, 4, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 3, 4, 6, 7, 0, 0, 0}, | |||
{ 1, 3, 4, 6, 7, 0, 0, 0}, | |||
{ 0, 1, 3, 4, 6, 7, 0, 0}, | |||
{ 2, 3, 4, 6, 7, 0, 0, 0}, | |||
{ 0, 2, 3, 4, 6, 7, 0, 0}, | |||
{ 1, 2, 3, 4, 6, 7, 0, 0}, | |||
{ 0, 1, 2, 3, 4, 6, 7, 0}, | |||
{ 5, 6, 7, 0, 0, 0, 0, 0}, | |||
{ 0, 5, 6, 7, 0, 0, 0, 0}, | |||
{ 1, 5, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 1, 5, 6, 7, 0, 0, 0}, | |||
{ 2, 5, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 2, 5, 6, 7, 0, 0, 0}, | |||
{ 1, 2, 5, 6, 7, 0, 0, 0}, | |||
{ 0, 1, 2, 5, 6, 7, 0, 0}, | |||
{ 3, 5, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 3, 5, 6, 7, 0, 0, 0}, | |||
{ 1, 3, 5, 6, 7, 0, 0, 0}, | |||
{ 0, 1, 3, 5, 6, 7, 0, 0}, | |||
{ 2, 3, 5, 6, 7, 0, 0, 0}, | |||
{ 0, 2, 3, 5, 6, 7, 0, 0}, | |||
{ 1, 2, 3, 5, 6, 7, 0, 0}, | |||
{ 0, 1, 2, 3, 5, 6, 7, 0}, | |||
{ 4, 5, 6, 7, 0, 0, 0, 0}, | |||
{ 0, 4, 5, 6, 7, 0, 0, 0}, | |||
{ 1, 4, 5, 6, 7, 0, 0, 0}, | |||
{ 0, 1, 4, 5, 6, 7, 0, 0}, | |||
{ 2, 4, 5, 6, 7, 0, 0, 0}, | |||
{ 0, 2, 4, 5, 6, 7, 0, 0}, | |||
{ 1, 2, 4, 5, 6, 7, 0, 0}, | |||
{ 0, 1, 2, 4, 5, 6, 7, 0}, | |||
{ 3, 4, 5, 6, 7, 0, 0, 0}, | |||
{ 0, 3, 4, 5, 6, 7, 0, 0}, | |||
{ 1, 3, 4, 5, 6, 7, 0, 0}, | |||
{ 0, 1, 3, 4, 5, 6, 7, 0}, | |||
{ 2, 3, 4, 5, 6, 7, 0, 0}, | |||
{ 0, 2, 3, 4, 5, 6, 7, 0}, | |||
{ 1, 2, 3, 4, 5, 6, 7, 0}, | |||
{ 0, 1, 2, 3, 4, 5, 6, 7} | |||
}; | |||
unsigned int PQCLEAN_DILITHIUM3_AVX2_rej_uniform( | |||
uint32_t *r, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
unsigned int buflen) { | |||
unsigned int i, ctr, pos; | |||
uint32_t vec[8]; | |||
__m256i d, tmp; | |||
uint32_t good; | |||
const __m256i bound = _mm256_set1_epi32(Q); | |||
ctr = pos = 0; | |||
while (ctr + 8 <= len && pos + 24 <= buflen) { | |||
for (i = 0; i < 8; i++) { | |||
vec[i] = buf[pos++]; | |||
vec[i] |= (uint32_t)buf[pos++] << 8; | |||
vec[i] |= (uint32_t)buf[pos++] << 16; | |||
vec[i] &= 0x7FFFFF; | |||
} | |||
d = _mm256_loadu_si256((__m256i_u *)vec); | |||
tmp = _mm256_cmpgt_epi32(bound, d); | |||
good = _mm256_movemask_ps((__m256)tmp); | |||
__m128i rid = _mm_loadl_epi64((__m128i_u *)&idx[good]); | |||
tmp = _mm256_cvtepu8_epi32(rid); | |||
d = _mm256_permutevar8x32_epi32(d, tmp); | |||
_mm256_storeu_si256((__m256i_u *)&r[ctr], d); | |||
ctr += __builtin_popcount(good); | |||
} | |||
while (ctr < len && pos + 3 <= buflen) { | |||
vec[0] = buf[pos++]; | |||
vec[0] |= (uint32_t)buf[pos++] << 8; | |||
vec[0] |= (uint32_t)buf[pos++] << 16; | |||
vec[0] &= 0x7FFFFF; | |||
if (vec[0] < Q) { | |||
r[ctr++] = vec[0]; | |||
} | |||
} | |||
return ctr; | |||
} | |||
unsigned int PQCLEAN_DILITHIUM3_AVX2_rej_eta( | |||
uint32_t *r, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
unsigned int buflen) { | |||
unsigned int i, ctr, pos; | |||
uint8_t vec[32]; | |||
__m256i tmp0, tmp1; | |||
__m128i d0, d1, rid; | |||
uint32_t good; | |||
const __m256i bound = _mm256_set1_epi8(2 * ETA + 1); | |||
const __m256i off = _mm256_set1_epi32(Q + ETA); | |||
ctr = pos = 0; | |||
while (ctr + 32 <= len && pos + 16 <= buflen) { | |||
for (i = 0; i < 16; i++) { | |||
vec[2 * i + 0] = buf[pos] & 0x0F; | |||
vec[2 * i + 1] = buf[pos++] >> 4; | |||
} | |||
tmp0 = _mm256_loadu_si256((__m256i_u *)vec); | |||
tmp1 = _mm256_cmpgt_epi8(bound, tmp0); | |||
good = _mm256_movemask_epi8(tmp1); | |||
d0 = _mm256_castsi256_si128(tmp0); | |||
rid = _mm_loadl_epi64((__m128i_u *)&idx[good & 0xFF]); | |||
d1 = _mm_shuffle_epi8(d0, rid); | |||
tmp1 = _mm256_cvtepu8_epi32(d1); | |||
tmp1 = _mm256_sub_epi32(off, tmp1); | |||
_mm256_storeu_si256((__m256i_u *)&r[ctr], tmp1); | |||
ctr += __builtin_popcount(good & 0xFF); | |||
d0 = _mm_bsrli_si128(d0, 8); | |||
rid = _mm_loadl_epi64((__m128i_u *)&idx[(good >> 8) & 0xFF]); | |||
d1 = _mm_shuffle_epi8(d0, rid); | |||
tmp1 = _mm256_cvtepu8_epi32(d1); | |||
tmp1 = _mm256_sub_epi32(off, tmp1); | |||
_mm256_storeu_si256((__m256i_u *)&r[ctr], tmp1); | |||
ctr += __builtin_popcount((good >> 8) & 0xFF); | |||
d0 = _mm256_extracti128_si256(tmp0, 1); | |||
rid = _mm_loadl_epi64((__m128i_u *)&idx[(good >> 16) & 0xFF]); | |||
d1 = _mm_shuffle_epi8(d0, rid); | |||
tmp1 = _mm256_cvtepu8_epi32(d1); | |||
tmp1 = _mm256_sub_epi32(off, tmp1); | |||
_mm256_storeu_si256((__m256i_u *)&r[ctr], tmp1); | |||
ctr += __builtin_popcount((good >> 16) & 0xFF); | |||
d0 = _mm_bsrli_si128(d0, 8); | |||
rid = _mm_loadl_epi64((__m128i_u *)&idx[(good >> 24) & 0xFF]); | |||
d1 = _mm_shuffle_epi8(d0, rid); | |||
tmp1 = _mm256_cvtepu8_epi32(d1); | |||
tmp1 = _mm256_sub_epi32(off, tmp1); | |||
_mm256_storeu_si256((__m256i_u *)&r[ctr], tmp1); | |||
ctr += __builtin_popcount((good >> 24) & 0xFF); | |||
} | |||
while (ctr < len && pos < buflen) { | |||
vec[0] = buf[pos] & 0x0F; | |||
vec[1] = buf[pos++] >> 4; | |||
if (vec[0] <= 2 * ETA) { | |||
r[ctr++] = Q + ETA - vec[0]; | |||
} | |||
if (vec[1] <= 2 * ETA && ctr < len) { | |||
r[ctr++] = Q + ETA - vec[1]; | |||
} | |||
} | |||
return ctr; | |||
} | |||
unsigned int PQCLEAN_DILITHIUM3_AVX2_rej_gamma1m1( | |||
uint32_t *r, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
unsigned int buflen) { | |||
unsigned int i, ctr, pos; | |||
uint32_t vec[8]; | |||
__m256i d, tmp; | |||
uint32_t good; | |||
const __m256i bound = _mm256_set1_epi32(2 * GAMMA1 - 1); | |||
const __m256i off = _mm256_set1_epi32(Q + GAMMA1 - 1); | |||
ctr = pos = 0; | |||
while (ctr + 8 <= len && pos + 20 <= buflen) { | |||
for (i = 0; i < 4; i++) { | |||
vec[2 * i + 0] = buf[pos + 0]; | |||
vec[2 * i + 0] |= (uint32_t)buf[pos + 1] << 8; | |||
vec[2 * i + 0] |= (uint32_t)buf[pos + 2] << 16; | |||
vec[2 * i + 0] &= 0xFFFFF; | |||
vec[2 * i + 1] = buf[pos + 2] >> 4; | |||
vec[2 * i + 1] |= (uint32_t)buf[pos + 3] << 4; | |||
vec[2 * i + 1] |= (uint32_t)buf[pos + 4] << 12; | |||
pos += 5; | |||
} | |||
d = _mm256_loadu_si256((__m256i_u *)vec); | |||
tmp = _mm256_cmpgt_epi32(bound, d); | |||
good = _mm256_movemask_ps((__m256)tmp); | |||
d = _mm256_sub_epi32(off, d); | |||
__m128i rid = _mm_loadl_epi64((__m128i_u *)&idx[good]); | |||
tmp = _mm256_cvtepu8_epi32(rid); | |||
d = _mm256_permutevar8x32_epi32(d, tmp); | |||
_mm256_storeu_si256((__m256i_u *)&r[ctr], d); | |||
ctr += __builtin_popcount(good); | |||
} | |||
while (ctr < len && pos + 5 <= buflen) { | |||
vec[0] = buf[pos + 0]; | |||
vec[0] |= (uint32_t)buf[pos + 1] << 8; | |||
vec[0] |= (uint32_t)buf[pos + 2] << 16; | |||
vec[0] &= 0xFFFFF; | |||
vec[1] = buf[pos + 2] >> 4; | |||
vec[1] |= (uint32_t)buf[pos + 3] << 4; | |||
vec[1] |= (uint32_t)buf[pos + 4] << 12; | |||
pos += 5; | |||
if (vec[0] <= 2 * GAMMA1 - 2) { | |||
r[ctr++] = Q + GAMMA1 - 1 - vec[0]; | |||
} | |||
if (vec[1] <= 2 * GAMMA1 - 2 && ctr < len) { | |||
r[ctr++] = Q + GAMMA1 - 1 - vec[1]; | |||
} | |||
} | |||
return ctr; | |||
} |
@@ -0,0 +1,26 @@ | |||
#ifndef REJSAMPLE_H | |||
#define REJSAMPLE_H | |||
#include <stdint.h> | |||
#include "poly.h" | |||
unsigned int PQCLEAN_DILITHIUM3_AVX2_rej_uniform( | |||
uint32_t *r, | |||
unsigned int len, | |||
const uint8_t *buf, | |||
unsigned int buflen); | |||
unsigned int PQCLEAN_DILITHIUM3_AVX2_rej_eta( | |||
uint32_t *r, | |||
unsigned int len, | |||
const uint8_t *buf, | |||
unsigned int buflen); | |||
unsigned int PQCLEAN_DILITHIUM3_AVX2_rej_gamma1m1( | |||
uint32_t *r, | |||
unsigned int len, | |||
const uint8_t *buf, | |||
unsigned int buflen); | |||
#endif |
@@ -0,0 +1,115 @@ | |||
#include "rounding.h" | |||
/************************************************* | |||
* Name: power2round | |||
* | |||
* Description: For finite field element a, compute a0, a1 such that | |||
* a mod Q = a1*2^D + a0 with -2^{D-1} < a0 <= 2^{D-1}. | |||
* Assumes a to be standard representative. | |||
* | |||
* Arguments: - uint32_t a: input element | |||
* - uint32_t *a0: pointer to output element Q + a0 | |||
* | |||
* Returns a1. | |||
**************************************************/ | |||
uint32_t PQCLEAN_DILITHIUM3_AVX2_power2round(uint32_t a, uint32_t *a0) { | |||
int32_t t; | |||
/* Centralized remainder mod 2^D */ | |||
t = a & ((1U << D) - 1); | |||
t -= (1U << (D - 1)) + 1; | |||
t += (t >> 31) & (1U << D); | |||
t -= (1U << (D - 1)) - 1; | |||
*a0 = Q + t; | |||
a = (a - t) >> D; | |||
return a; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_decompose | |||
* | |||
* Description: For finite field element a, compute high and low bits a0, a1 such | |||
* that a mod Q = a1*ALPHA + a0 with -ALPHA/2 < a0 <= ALPHA/2 except | |||
* if a1 = (Q-1)/ALPHA where we set a1 = 0 and | |||
* -ALPHA/2 <= a0 = a mod Q - Q < 0. Assumes a to be standard | |||
* representative. | |||
* | |||
* Arguments: - uint32_t a: input element | |||
* - uint32_t *a0: pointer to output element Q + a0 | |||
* | |||
* Returns a1. | |||
**************************************************/ | |||
uint32_t PQCLEAN_DILITHIUM3_AVX2_decompose(uint32_t a, uint32_t *a0) { | |||
int32_t t, u; | |||
/* Centralized remainder mod ALPHA */ | |||
t = a & 0x7FFFF; | |||
t += (a >> 19) << 9; | |||
t -= ALPHA / 2 + 1; | |||
t += (t >> 31) & ALPHA; | |||
t -= ALPHA / 2 - 1; | |||
a -= t; | |||
/* Divide by ALPHA (possible to avoid) */ | |||
u = a - 1; | |||
u >>= 31; | |||
a = (a >> 19) + 1; | |||
a -= u & 1; | |||
/* Border case */ | |||
*a0 = Q + t - (a >> 4); | |||
a &= 0xF; | |||
return a; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_make_hint | |||
* | |||
* Description: Compute hint bit indicating whether the low bits of the | |||
* input element overflow into the high bits. Inputs assumed to be | |||
* standard representatives. | |||
* | |||
* Arguments: - uint32_t a0: low bits of input element | |||
* - uint32_t a1: high bits of input element | |||
* | |||
* Returns 1 if high bits of a and b differ and 0 otherwise. | |||
**************************************************/ | |||
unsigned int PQCLEAN_DILITHIUM3_AVX2_make_hint(const uint32_t a0, const uint32_t a1) { | |||
if (a0 <= GAMMA2 || a0 > Q - GAMMA2 || (a0 == Q - GAMMA2 && a1 == 0)) { | |||
return 0; | |||
} | |||
return 1; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_use_hint | |||
* | |||
* Description: Correct high bits according to hint. | |||
* | |||
* Arguments: - uint32_t a: input element | |||
* - unsigned int hint: hint bit | |||
* | |||
* Returns corrected high bits. | |||
**************************************************/ | |||
uint32_t PQCLEAN_DILITHIUM3_AVX2_use_hint(const uint32_t a, const unsigned int hint) { | |||
uint32_t a0, a1; | |||
a1 = PQCLEAN_DILITHIUM3_AVX2_decompose(a, &a0); | |||
if (hint == 0) { | |||
return a1; | |||
} | |||
if (a0 > Q) { | |||
return (a1 + 1) & 0xF; | |||
} | |||
return (a1 - 1) & 0xF; | |||
/* If decompose does not divide out ALPHA: | |||
if(hint == 0) | |||
return a1; | |||
else if(a0 > Q) | |||
return (a1 + ALPHA) % (Q - 1); | |||
else | |||
return (a1 - ALPHA) % (Q - 1); | |||
*/ | |||
} |
@@ -0,0 +1,12 @@ | |||
#ifndef ROUNDING_H | |||
#define ROUNDING_H | |||
#include "params.h" | |||
#include <stdint.h> | |||
uint32_t PQCLEAN_DILITHIUM3_AVX2_power2round(uint32_t a, uint32_t *a0); | |||
uint32_t PQCLEAN_DILITHIUM3_AVX2_decompose(uint32_t a, uint32_t *a0); | |||
unsigned int PQCLEAN_DILITHIUM3_AVX2_make_hint(uint32_t a0, uint32_t a1); | |||
uint32_t PQCLEAN_DILITHIUM3_AVX2_use_hint(uint32_t a, unsigned int hint); | |||
#endif |
@@ -0,0 +1,23 @@ | |||
.macro shuffle8 r0,r1,r2,r3 | |||
vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2 | |||
vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3 | |||
.endm | |||
.macro shuffle4 r0,r1,r2,r3 | |||
vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2 | |||
vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3 | |||
.endm | |||
.macro shuffle2 r0,r1,r2,r3 | |||
vpsllq $32,%ymm\r1,%ymm12 | |||
vpsrlq $32,%ymm\r0,%ymm13 | |||
vpblendd $0xAA,%ymm12,%ymm\r0,%ymm\r2 | |||
vpblendd $0xAA,%ymm\r1,%ymm13,%ymm\r3 | |||
.endm | |||
.macro shuffle1 r0,r1,r2,r3 | |||
vpslld $16,%ymm\r1,%ymm12 | |||
vpsrld $16,%ymm\r0,%ymm13 | |||
vpblendw $0xAA,%ymm12,%ymm\r0,%ymm\r2 | |||
vpblendw $0xAA,%ymm\r1,%ymm13,%ymm\r3 | |||
.endm |
@@ -0,0 +1,446 @@ | |||
#include <stdint.h> | |||
#include <string.h> | |||
#include "fips202.h" | |||
#include "packing.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "polyvec.h" | |||
#include "randombytes.h" | |||
#include "sign.h" | |||
#include "symmetric.h" | |||
/************************************************* | |||
* Name: expand_mat | |||
* | |||
* Description: Implementation of ExpandA. Generates matrix A with uniformly | |||
* random coefficients a_{i,j} by performing rejection | |||
* sampling on the output stream of SHAKE128(rho|i|j). | |||
* | |||
* Arguments: - polyvecl mat[K]: output matrix | |||
* - const uint8_t rho[]: byte array containing seed rho | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_expand_mat(polyvecl mat[5], const uint8_t rho[SEEDBYTES]) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_uniform_4x(&mat[0].vec[0], | |||
&mat[0].vec[1], | |||
&mat[0].vec[2], | |||
&mat[0].vec[3], | |||
rho, 0, 1, 2, 3); | |||
PQCLEAN_DILITHIUM3_AVX2_poly_uniform_4x(&mat[1].vec[0], | |||
&mat[1].vec[1], | |||
&mat[1].vec[2], | |||
&mat[1].vec[3], | |||
rho, 256, 257, 258, 259); | |||
PQCLEAN_DILITHIUM3_AVX2_poly_uniform_4x(&mat[2].vec[0], | |||
&mat[2].vec[1], | |||
&mat[2].vec[2], | |||
&mat[2].vec[3], | |||
rho, 512, 513, 514, 515); | |||
PQCLEAN_DILITHIUM3_AVX2_poly_uniform_4x(&mat[3].vec[0], | |||
&mat[3].vec[1], | |||
&mat[3].vec[2], | |||
&mat[3].vec[3], | |||
rho, 768, 769, 770, 771); | |||
PQCLEAN_DILITHIUM3_AVX2_poly_uniform_4x(&mat[4].vec[0], | |||
&mat[4].vec[1], | |||
&mat[4].vec[2], | |||
&mat[4].vec[3], | |||
rho, 1024, 1025, 1026, 1027); | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_challenge | |||
* | |||
* Description: Implementation of H. Samples polynomial with 60 nonzero | |||
* coefficients in {-1,1} using the output stream of | |||
* SHAKE256(mu|w1). | |||
* | |||
* Arguments: - poly *c: pointer to output polynomial | |||
* - const uint8_t mu[]: byte array containing mu | |||
* - const polyveck *w1: pointer to vector w1 | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_AVX2_challenge(poly *c, | |||
const uint8_t mu[CRHBYTES], | |||
const polyveck *w1) { | |||
unsigned int i, b, pos; | |||
uint64_t signs; | |||
uint8_t inbuf[CRHBYTES + K * POLW1_SIZE_PACKED]; | |||
uint8_t outbuf[SHAKE256_RATE]; | |||
shake256ctx state; | |||
for (i = 0; i < CRHBYTES; ++i) { | |||
inbuf[i] = mu[i]; | |||
} | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_polyw1_pack(inbuf + CRHBYTES + i * POLW1_SIZE_PACKED, &w1->vec[i]); | |||
} | |||
shake256_absorb(&state, inbuf, sizeof(inbuf)); | |||
shake256_squeezeblocks(outbuf, 1, &state); | |||
signs = 0; | |||
for (i = 0; i < 8; ++i) { | |||
signs |= (uint64_t) outbuf[i] << 8 * i; | |||
} | |||
pos = 8; | |||
for (i = 0; i < N; ++i) { | |||
c->coeffs[i] = 0; | |||
} | |||
for (i = 196; i < 256; ++i) { | |||
do { | |||
if (pos >= SHAKE256_RATE) { | |||
shake256_squeezeblocks(outbuf, 1, &state); | |||
pos = 0; | |||
} | |||
b = outbuf[pos++]; | |||
} while (b > i); | |||
c->coeffs[i] = c->coeffs[b]; | |||
c->coeffs[b] = 1; | |||
c->coeffs[b] ^= -(signs & 1) & (1 ^ (Q - 1)); | |||
signs >>= 1; | |||
} | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_crypto_sign_keypair | |||
* | |||
* Description: Generates public and private key. | |||
* | |||
* Arguments: - uint8_t *pk: pointer to output public key (allocated | |||
* array of PQCLEAN_DILITHIUM3_AVX2_CRYPTO_PUBLICKEYBYTES bytes) | |||
* - uint8_t *sk: pointer to output private key (allocated | |||
* array of PQCLEAN_DILITHIUM3_AVX2_CRYPTO_SECRETKEYBYTES bytes) | |||
* | |||
* Returns 0 (success) | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_keypair(uint8_t *pk, uint8_t *sk) { | |||
unsigned int i; | |||
uint8_t seedbuf[3 * SEEDBYTES]; | |||
uint8_t tr[CRHBYTES]; | |||
const uint8_t *rho, *rhoprime, *key; | |||
uint16_t nonce = 0; | |||
polyvecl mat[K]; | |||
polyvecl s1, s1hat; | |||
polyveck s2, t, t1, t0; | |||
/* Expand 32 bytes of randomness into rho, rhoprime and key */ | |||
randombytes(seedbuf, 3 * SEEDBYTES); | |||
rho = seedbuf; | |||
rhoprime = seedbuf + SEEDBYTES; | |||
key = seedbuf + 2 * SEEDBYTES; | |||
/* Expand matrix */ | |||
PQCLEAN_DILITHIUM3_AVX2_expand_mat(mat, rho); | |||
/* Sample short vectors s1 and s2 */ | |||
PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta_4x(&s1.vec[0], &s1.vec[1], &s1.vec[2], &s1.vec[3], rhoprime, | |||
nonce, nonce + 1, nonce + 2, nonce + 3); | |||
PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta_4x(&s2.vec[0], &s2.vec[1], &s2.vec[2], &s2.vec[3], rhoprime, | |||
nonce + 4, nonce + 5, nonce + 6, nonce + 7); | |||
PQCLEAN_DILITHIUM3_AVX2_poly_uniform_eta(&s2.vec[4], rhoprime, nonce + 8); | |||
/* Matrix-vector multiplication */ | |||
s1hat = s1; | |||
PQCLEAN_DILITHIUM3_AVX2_polyvecl_ntt(&s1hat); | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_polyvecl_pointwise_acc_invmontgomery(&t.vec[i], &mat[i], &s1hat); | |||
//PQCLEAN_DILITHIUM3_AVX2_poly_reduce(&t.vec[i]); | |||
PQCLEAN_DILITHIUM3_AVX2_poly_invntt_montgomery(&t.vec[i]); | |||
} | |||
/* Add error vector s2 */ | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_add(&t, &t, &s2); | |||
/* Extract t1 and write public key */ | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_freeze(&t); | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_power2round(&t1, &t0, &t); | |||
PQCLEAN_DILITHIUM3_AVX2_pack_pk(pk, rho, &t1); | |||
/* Compute CRH(rho, t1) and write secret key */ | |||
crh(tr, pk, PQCLEAN_DILITHIUM3_AVX2_CRYPTO_PUBLICKEYBYTES); | |||
PQCLEAN_DILITHIUM3_AVX2_pack_sk(sk, rho, key, tr, &s1, &s2, &t0); | |||
return 0; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_crypto_sign_signature | |||
* | |||
* Description: Compute signed message. | |||
* | |||
* Arguments: - uint8_t *sig: pointer to output signature (PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES | |||
* of len) | |||
* - size_t *siglen: pointer to output length of signed message | |||
* (should be PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES) | |||
* - uint8_t *m: pointer to message to be signed | |||
* - size_t mlen: length of message | |||
* - uint8_t *sk: pointer to bit-packed secret key | |||
* | |||
* Returns 0 (success) | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, | |||
const uint8_t *sk) { | |||
size_t i; | |||
unsigned int n; | |||
uint8_t seedbuf[2 * SEEDBYTES + 3 * CRHBYTES]; | |||
uint8_t *rho, *tr, *key, *mu, *rhoprime; | |||
uint16_t nonce = 0; | |||
poly c, chat; | |||
polyvecl mat[K], s1, y, yhat, z; | |||
polyveck t0, s2, w, w1, w0; | |||
polyveck h, cs2, ct0; | |||
rho = seedbuf; | |||
tr = rho + SEEDBYTES; | |||
key = tr + CRHBYTES; | |||
mu = key + SEEDBYTES; | |||
rhoprime = mu + CRHBYTES; | |||
PQCLEAN_DILITHIUM3_AVX2_unpack_sk(rho, key, tr, &s1, &s2, &t0, sk); | |||
// use incremental hash API instead of copying around buffers | |||
/* Compute CRH(tr, m) */ | |||
shake256incctx state; | |||
shake256_inc_init(&state); | |||
shake256_inc_absorb(&state, tr, CRHBYTES); | |||
shake256_inc_absorb(&state, m, mlen); | |||
shake256_inc_finalize(&state); | |||
shake256_inc_squeeze(mu, CRHBYTES, &state); | |||
crh(rhoprime, key, SEEDBYTES + CRHBYTES); | |||
/* Expand matrix and transform vectors */ | |||
PQCLEAN_DILITHIUM3_AVX2_expand_mat(mat, rho); | |||
PQCLEAN_DILITHIUM3_AVX2_polyvecl_ntt(&s1); | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_ntt(&s2); | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_ntt(&t0); | |||
rej: | |||
/* Sample intermediate vector y */ | |||
PQCLEAN_DILITHIUM3_AVX2_poly_uniform_gamma1m1_4x(&y.vec[0], &y.vec[1], &y.vec[2], &y.vec[3], | |||
rhoprime, nonce, nonce + 1, nonce + 2, nonce + 3); | |||
nonce += 4; | |||
/* Matrix-vector multiplication */ | |||
yhat = y; | |||
PQCLEAN_DILITHIUM3_AVX2_polyvecl_ntt(&yhat); | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_polyvecl_pointwise_acc_invmontgomery(&w.vec[i], &mat[i], &yhat); | |||
PQCLEAN_DILITHIUM3_AVX2_poly_reduce(&w.vec[i]); | |||
PQCLEAN_DILITHIUM3_AVX2_poly_invntt_montgomery(&w.vec[i]); | |||
} | |||
/* Decompose w and call the random oracle */ | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_csubq(&w); | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_decompose(&w1, &w0, &w); | |||
PQCLEAN_DILITHIUM3_AVX2_challenge(&c, mu, &w1); | |||
chat = c; | |||
PQCLEAN_DILITHIUM3_AVX2_poly_ntt(&chat); | |||
/* Check that subtracting cs2 does not change high bits of w and low bits | |||
* do not reveal secret information */ | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_pointwise_invmontgomery(&cs2.vec[i], &chat, &s2.vec[i]); | |||
PQCLEAN_DILITHIUM3_AVX2_poly_invntt_montgomery(&cs2.vec[i]); | |||
} | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_sub(&w0, &w0, &cs2); | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_freeze(&w0); | |||
if (PQCLEAN_DILITHIUM3_AVX2_polyveck_chknorm(&w0, GAMMA2 - BETA)) { | |||
goto rej; | |||
} | |||
/* Compute z, reject if it reveals secret */ | |||
for (i = 0; i < L; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_pointwise_invmontgomery(&z.vec[i], &chat, &s1.vec[i]); | |||
PQCLEAN_DILITHIUM3_AVX2_poly_invntt_montgomery(&z.vec[i]); | |||
} | |||
PQCLEAN_DILITHIUM3_AVX2_polyvecl_add(&z, &z, &y); | |||
PQCLEAN_DILITHIUM3_AVX2_polyvecl_freeze(&z); | |||
if (PQCLEAN_DILITHIUM3_AVX2_polyvecl_chknorm(&z, GAMMA1 - BETA)) { | |||
goto rej; | |||
} | |||
/* Compute hints for w1 */ | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_pointwise_invmontgomery(&ct0.vec[i], &chat, &t0.vec[i]); | |||
PQCLEAN_DILITHIUM3_AVX2_poly_invntt_montgomery(&ct0.vec[i]); | |||
} | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_csubq(&ct0); | |||
if (PQCLEAN_DILITHIUM3_AVX2_polyveck_chknorm(&ct0, GAMMA2)) { | |||
goto rej; | |||
} | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_add(&w0, &w0, &ct0); | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_csubq(&w0); | |||
n = PQCLEAN_DILITHIUM3_AVX2_polyveck_make_hint(&h, &w0, &w1); | |||
if (n > OMEGA) { | |||
goto rej; | |||
} | |||
/* Write signature */ | |||
PQCLEAN_DILITHIUM3_AVX2_pack_sig(sig, &z, &h, &c); | |||
*siglen = PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES; | |||
return 0; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_crypto_sign | |||
* | |||
* Description: Compute signed message. | |||
* | |||
* Arguments: - uint8_t *sm: pointer to output signed message (allocated | |||
* array with PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES + mlen bytes), | |||
* can be equal to m | |||
* - unsigned long long *smlen: pointer to output length of signed | |||
* message | |||
* - const uint8_t *m: pointer to message to be signed | |||
* - unsigned long long mlen: length of message | |||
* - const uint8_t *sk: pointer to bit-packed secret key | |||
* | |||
* Returns 0 (success) | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM3_AVX2_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *m, size_t mlen, | |||
const uint8_t *sk) { | |||
int rc; | |||
memmove(sm + PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES, m, mlen); | |||
rc = PQCLEAN_DILITHIUM3_AVX2_crypto_sign_signature(sm, smlen, m, mlen, sk); | |||
*smlen += mlen; | |||
return rc; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_crypto_sign_verify | |||
* | |||
* Description: Verify signed message. | |||
* | |||
* Arguments: - uint8_t *sig: signature | |||
* - size_t siglen: length of signature (PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES) | |||
* - uint8_t *m: pointer to message | |||
* - size_t *mlen: pointer to output length of message | |||
* - uint8_t *pk: pointer to bit-packed public key | |||
* | |||
* Returns 0 if signed message could be verified correctly and -1 otherwise | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, | |||
const uint8_t *pk) { | |||
size_t i; | |||
uint8_t rho[SEEDBYTES]; | |||
uint8_t mu[CRHBYTES]; | |||
poly c, chat, cp; | |||
polyvecl mat[K], z; | |||
polyveck t1, w1, h, tmp1, tmp2; | |||
if (siglen < PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES) { | |||
return -1; | |||
} | |||
PQCLEAN_DILITHIUM3_AVX2_unpack_pk(rho, &t1, pk); | |||
if (PQCLEAN_DILITHIUM3_AVX2_unpack_sig(&z, &h, &c, sig)) { | |||
return -1; | |||
} | |||
if (PQCLEAN_DILITHIUM3_AVX2_polyvecl_chknorm(&z, GAMMA1 - BETA)) { | |||
return -1; | |||
} | |||
/* Compute CRH(CRH(rho, t1), msg) */ | |||
crh(mu, pk, PQCLEAN_DILITHIUM3_AVX2_CRYPTO_PUBLICKEYBYTES); | |||
shake256incctx state; | |||
shake256_inc_init(&state); | |||
shake256_inc_absorb(&state, mu, CRHBYTES); | |||
shake256_inc_absorb(&state, m, mlen); | |||
shake256_inc_finalize(&state); | |||
shake256_inc_squeeze(mu, CRHBYTES, &state); | |||
/* Matrix-vector multiplication; compute Az - c2^dt1 */ | |||
PQCLEAN_DILITHIUM3_AVX2_expand_mat(mat, rho); | |||
PQCLEAN_DILITHIUM3_AVX2_polyvecl_ntt(&z); | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_polyvecl_pointwise_acc_invmontgomery(&tmp1.vec[i], &mat[i], &z); | |||
} | |||
chat = c; | |||
PQCLEAN_DILITHIUM3_AVX2_poly_ntt(&chat); | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_shiftl(&t1); | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_ntt(&t1); | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_AVX2_poly_pointwise_invmontgomery(&tmp2.vec[i], &chat, &t1.vec[i]); | |||
} | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_sub(&tmp1, &tmp1, &tmp2); | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_reduce(&tmp1); | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_invntt_montgomery(&tmp1); | |||
/* Reconstruct w1 */ | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_csubq(&tmp1); | |||
PQCLEAN_DILITHIUM3_AVX2_polyveck_use_hint(&w1, &tmp1, &h); | |||
/* Call random oracle and verify challenge */ | |||
PQCLEAN_DILITHIUM3_AVX2_challenge(&cp, mu, &w1); | |||
for (i = 0; i < N; ++i) { | |||
if (c.coeffs[i] != cp.coeffs[i]) { | |||
return -1; | |||
} | |||
} | |||
return 0; | |||
} | |||
/************************************************* | |||
* Name: PQCLEAN_DILITHIUM3_AVX2_crypto_sign_open | |||
* | |||
* Description: Verify signed message. | |||
* | |||
* Arguments: - unsigned char *m: pointer to output message (allocated | |||
* array with smlen bytes), can be equal to sm | |||
* - unsigned long long *mlen: pointer to output length of message | |||
* - const unsigned char *sm: pointer to signed message | |||
* - unsigned long long smlen: length of signed message | |||
* - const unsigned char *pk: pointer to bit-packed public key | |||
* | |||
* Returns 0 if signed message could be verified correctly and -1 otherwise | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM3_AVX2_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, | |||
const uint8_t *pk) { | |||
size_t i; | |||
if (smlen < PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES) { | |||
goto badsig; | |||
} | |||
*mlen = smlen - PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES; | |||
if (PQCLEAN_DILITHIUM3_AVX2_crypto_sign_verify(sm, PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES, | |||
sm + PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES, *mlen, pk)) { | |||
goto badsig; | |||
} else { | |||
/* All good, copy msg, return 0 */ | |||
for (i = 0; i < *mlen; ++i) { | |||
m[i] = sm[PQCLEAN_DILITHIUM3_AVX2_CRYPTO_BYTES + i]; | |||
} | |||
return 0; | |||
} | |||
/* Signature verification failed */ | |||
badsig: | |||
*mlen = (size_t) -1; | |||
for (i = 0; i < smlen; ++i) { | |||
m[i] = 0; | |||
} | |||
return -1; | |||
} |
@@ -0,0 +1,15 @@ | |||
#ifndef SIGN_H | |||
#define SIGN_H | |||
#include "api.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "polyvec.h" | |||
void PQCLEAN_DILITHIUM3_AVX2_expand_mat(polyvecl mat[K], const uint8_t rho[SEEDBYTES]); | |||
void PQCLEAN_DILITHIUM3_AVX2_challenge(poly *c, const uint8_t mu[CRHBYTES], | |||
const polyveck *w1); | |||
#endif | |||
@@ -0,0 +1,26 @@ | |||
#include "stream.h" | |||
#include <string.h> | |||
void PQCLEAN_DILITHIUM3_AVX2_shake128_stream_init( | |||
shake128ctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce) { | |||
uint8_t buf[SEEDBYTES + 2]; | |||
memcpy(buf, seed, SEEDBYTES); | |||
buf[SEEDBYTES] = (uint8_t)nonce; | |||
buf[SEEDBYTES + 1] = (uint8_t)(nonce >> 8); | |||
shake128_absorb(state, buf, SEEDBYTES + 2); | |||
} | |||
void PQCLEAN_DILITHIUM3_AVX2_shake256_stream_init( | |||
shake256ctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce) { | |||
uint8_t buf[CRHBYTES + 2]; | |||
memcpy(buf, seed, CRHBYTES); | |||
buf[CRHBYTES] = (uint8_t)nonce; | |||
buf[CRHBYTES + 1] = (uint8_t)(nonce >> 8); | |||
shake256_absorb(state, buf, CRHBYTES + 2); | |||
} |
@@ -0,0 +1,15 @@ | |||
#ifndef PQCLEAN_DILITHIUM3_AVX2_STREAM_H | |||
#define PQCLEAN_DILITHIUM3_AVX2_STREAM_H | |||
#include <stdint.h> | |||
#include "fips202.h" | |||
#include "params.h" | |||
void PQCLEAN_DILITHIUM3_AVX2_shake128_stream_init( | |||
shake128ctx *state, const uint8_t seed[SEEDBYTES], uint16_t nonce); | |||
void PQCLEAN_DILITHIUM3_AVX2_shake256_stream_init( | |||
shake256ctx *state, const uint8_t seed[CRHBYTES], uint16_t nonce); | |||
#endif |
@@ -0,0 +1,23 @@ | |||
#ifndef PQCLEAN_DILITHIUM3_AVX2_SYMMETRIC_H | |||
#define PQCLEAN_DILITHIUM3_AVX2_SYMMETRIC_H | |||
#include "params.h" | |||
#include "stream.h" | |||
#include "fips202.h" | |||
#define crh(OUT, IN, INBYTES) shake256(OUT, CRHBYTES, IN, INBYTES) | |||
#define stream128_init(STATE, SEED, NONCE) PQCLEAN_DILITHIUM3_AVX2_shake128_stream_init(STATE, SEED, NONCE) | |||
#define stream128_squeezeblocks(OUT, OUTBLOCKS, STATE) shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) | |||
#define stream256_init(STATE, SEED, NONCE) PQCLEAN_DILITHIUM3_AVX2_shake256_stream_init(STATE, SEED, NONCE) | |||
#define stream256_squeezeblocks(OUT, OUTBLOCKS, STATE) shake256_squeezeblocks(OUT, OUTBLOCKS, STATE) | |||
#define STREAM128_BLOCKBYTES SHAKE128_RATE | |||
#define STREAM256_BLOCKBYTES SHAKE256_RATE | |||
typedef shake128ctx stream128_state; | |||
typedef shake256ctx stream256_state; | |||
#endif |
@@ -1,2 +1,6 @@ | |||
Public Domain | |||
Authors: Léo Ducas, Eike Kiltz, Tancrède Lepoint, Vadim Lyubashevsky, Gregor Seiler, Peter Schwabe, Damien Stehlé | |||
Public Domain (https://creativecommons.org/share-your-work/public-domain/cc0/) | |||
For Keccak and the random number generator | |||
we are using public-domain code from sources | |||
and by authors listed in comments on top of | |||
the respective files. |
@@ -2,10 +2,10 @@ | |||
LIB=libdilithium3_clean.a | |||
SOURCES = sign.c polyvec.c poly.c packing.c ntt.c reduce.c rounding.c symmetric.c | |||
OBJECTS = sign.o polyvec.o poly.o packing.o ntt.o reduce.o rounding.o symmetric.o | |||
SOURCES = sign.c polyvec.c poly.c packing.c ntt.c reduce.c rounding.c stream.c | |||
OBJECTS = sign.o polyvec.o poly.o packing.o ntt.o reduce.o rounding.o stream.o | |||
HEADERS = api.h params.h sign.h polyvec.h poly.h packing.h ntt.h \ | |||
reduce.h rounding.h symmetric.h | |||
reduce.h rounding.h symmetric.h stream.h | |||
CFLAGS=-O3 -Wall -Wconversion -Wextra -Wpedantic -Wvla -Werror -Wmissing-prototypes -Wredundant-decls -std=c99 -I../../../common $(EXTRAFLAGS) | |||
@@ -2,7 +2,7 @@ | |||
# nmake /f Makefile.Microsoft_nmake | |||
LIBRARY=libdilithium3_clean.lib | |||
OBJECTS=sign.obj polyvec.obj poly.obj packing.obj ntt.obj reduce.obj rounding.obj symmetric.obj | |||
OBJECTS=sign.obj polyvec.obj poly.obj packing.obj ntt.obj reduce.obj rounding.obj stream.obj | |||
CFLAGS=/nologo /O2 /I ..\..\..\common /W4 /WX | |||
all: $(LIBRARY) | |||
@@ -4,14 +4,25 @@ | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#define PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_PUBLICKEYBYTES 1472U | |||
#define PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_SECRETKEYBYTES 3504U | |||
#define PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_BYTES 2701U | |||
#define PQCLEAN_DILITHIUM3_CLEAN_CRYPTO_ALGNAME "Dilithium3" | |||
int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_keypair(uint8_t *pk, uint8_t *sk); | |||
int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_keypair( | |||
uint8_t *pk, uint8_t *sk); | |||
int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *msg, size_t len, | |||
const uint8_t *sk); | |||
int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, | |||
const uint8_t *pk); | |||
int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
@@ -21,13 +32,6 @@ int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk); | |||
int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign(uint8_t *sm, size_t *smlen, | |||
const uint8_t *msg, size_t len, | |||
const uint8_t *sk); | |||
int PQCLEAN_DILITHIUM3_CLEAN_crypto_sign_open(uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, | |||
const uint8_t *pk); | |||
#endif |
@@ -1,11 +1,12 @@ | |||
#include <stdint.h> | |||
#include "ntt.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "reduce.h" | |||
#include <stdint.h> | |||
/* Roots of unity in order needed by forward ntt */ | |||
static const uint32_t zetas[N] = { | |||
/* Roots of unity in order needed by forward PQCLEAN_DILITHIUM3_CLEAN_ntt */ | |||
static const uint32_t PQCLEAN_DILITHIUM3_CLEAN_zetas[N] = { | |||
0, 25847, 5771523, 7861508, 237124, 7602457, 7504169, 466468, 1826347, | |||
2353451, 8021166, 6288512, 3119733, 5495562, 3111497, 2680103, 2725464, | |||
1024112, 7300517, 3585928, 7830929, 7260833, 2619752, 6271868, 6262231, | |||
@@ -40,8 +41,8 @@ static const uint32_t zetas[N] = { | |||
8332111, 7018208, 3937738, 1400424, 7534263, 1976782 | |||
}; | |||
/* Roots of unity in order needed by inverse ntt */ | |||
static const uint32_t zetas_inv[N] = { | |||
/* Roots of unity in order needed by inverse PQCLEAN_DILITHIUM3_CLEAN_ntt */ | |||
static const uint32_t PQCLEAN_DILITHIUM3_CLEAN_zetas_inv[N] = { | |||
6403635, 846154, 6979993, 4442679, 1362209, 48306, 4460757, 554416, | |||
3545687, 6767575, 976891, 8196974, 2286327, 420899, 2235985, 2939036, | |||
3833893, 260646, 1104333, 1667432, 6470041, 1803090, 6656817, 426683, | |||
@@ -77,7 +78,7 @@ static const uint32_t zetas_inv[N] = { | |||
}; | |||
/************************************************* | |||
* Name: ntt | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_ntt | |||
* | |||
* Description: Forward NTT, in-place. No modular reduction is performed after | |||
* additions or subtractions. Hence output coefficients can be up | |||
@@ -86,16 +87,16 @@ static const uint32_t zetas_inv[N] = { | |||
* | |||
* Arguments: - uint32_t p[N]: input/output coefficient array | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_ntt(uint32_t p[N]) { | |||
void PQCLEAN_DILITHIUM3_CLEAN_ntt(uint32_t *p) { | |||
unsigned int len, start, j, k; | |||
uint32_t zeta, t; | |||
k = 1; | |||
for (len = 128; len > 0; len >>= 1) { | |||
for (start = 0; start < N; start = j + len) { | |||
zeta = zetas[k++]; | |||
zeta = PQCLEAN_DILITHIUM3_CLEAN_zetas[k++]; | |||
for (j = start; j < start + len; ++j) { | |||
t = PQCLEAN_DILITHIUM3_CLEAN_montgomery_reduce((uint64_t)zeta * p[j + len]); | |||
t = PQCLEAN_DILITHIUM3_CLEAN_montgomery_reduce((uint64_t) zeta * p[j + len]); | |||
p[j + len] = p[j] + 2 * Q - t; | |||
p[j] = p[j] + t; | |||
} | |||
@@ -104,7 +105,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_ntt(uint32_t p[N]) { | |||
} | |||
/************************************************* | |||
* Name: invntt_frominvmont | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_invntt_frominvmont | |||
* | |||
* Description: Inverse NTT and multiplication by Montgomery factor 2^32. | |||
* In-place. No modular reductions after additions or | |||
@@ -113,7 +114,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_ntt(uint32_t p[N]) { | |||
* | |||
* Arguments: - uint32_t p[N]: input/output coefficient array | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_invntt_frominvmont(uint32_t p[N]) { | |||
void PQCLEAN_DILITHIUM3_CLEAN_invntt_frominvmont(uint32_t *p) { | |||
unsigned int start, len, j, k; | |||
uint32_t t, zeta; | |||
const uint32_t f = (((uint64_t)MONT * MONT % Q) * (Q - 1) % Q) * ((Q - 1) >> 8) % Q; | |||
@@ -121,17 +122,17 @@ void PQCLEAN_DILITHIUM3_CLEAN_invntt_frominvmont(uint32_t p[N]) { | |||
k = 0; | |||
for (len = 1; len < N; len <<= 1) { | |||
for (start = 0; start < N; start = j + len) { | |||
zeta = zetas_inv[k++]; | |||
zeta = PQCLEAN_DILITHIUM3_CLEAN_zetas_inv[k++]; | |||
for (j = start; j < start + len; ++j) { | |||
t = p[j]; | |||
p[j] = t + p[j + len]; | |||
p[j + len] = t + 256 * Q - p[j + len]; | |||
p[j + len] = PQCLEAN_DILITHIUM3_CLEAN_montgomery_reduce((uint64_t)zeta * p[j + len]); | |||
p[j + len] = PQCLEAN_DILITHIUM3_CLEAN_montgomery_reduce((uint64_t) zeta * p[j + len]); | |||
} | |||
} | |||
} | |||
for (j = 0; j < N; ++j) { | |||
p[j] = PQCLEAN_DILITHIUM3_CLEAN_montgomery_reduce((uint64_t)f * p[j]); | |||
p[j] = PQCLEAN_DILITHIUM3_CLEAN_montgomery_reduce((uint64_t) f * p[j]); | |||
} | |||
} |
@@ -1,9 +1,10 @@ | |||
#ifndef NTT_H | |||
#define NTT_H | |||
#ifndef PQCLEAN_DILITHIUM3_CLEAN_NTT_H | |||
#define PQCLEAN_DILITHIUM3_CLEAN_NTT_H | |||
#include "params.h" | |||
#include <stdint.h> | |||
#include "params.h" | |||
void PQCLEAN_DILITHIUM3_CLEAN_ntt(uint32_t p[N]); | |||
void PQCLEAN_DILITHIUM3_CLEAN_invntt_frominvmont(uint32_t p[N]); | |||
@@ -4,17 +4,18 @@ | |||
#include "polyvec.h" | |||
/************************************************* | |||
* Name: pack_pk | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_pack_pk | |||
* | |||
* Description: Bit-pack public key pk = (rho, t1). | |||
* | |||
* Arguments: - unsigned char pk[]: output byte array | |||
* - const unsigned char rho[]: byte array containing rho | |||
* Arguments: - uint8_t pk[]: output byte array | |||
* - const uint8_t rho[]: byte array containing rho | |||
* - const polyveck *t1: pointer to vector t1 | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_pack_pk(unsigned char pk[CRYPTO_PUBLICKEYBYTES], | |||
const unsigned char rho[SEEDBYTES], | |||
const polyveck *t1) { | |||
void PQCLEAN_DILITHIUM3_CLEAN_pack_pk( | |||
uint8_t *pk, | |||
const uint8_t *rho, | |||
const polyveck *t1) { | |||
unsigned int i; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
@@ -28,17 +29,18 @@ void PQCLEAN_DILITHIUM3_CLEAN_pack_pk(unsigned char pk[CRYPTO_PUBLICKEYBYTES], | |||
} | |||
/************************************************* | |||
* Name: unpack_pk | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_unpack_pk | |||
* | |||
* Description: Unpack public key pk = (rho, t1). | |||
* | |||
* Arguments: - const unsigned char rho[]: output byte array for rho | |||
* Arguments: - const uint8_t rho[]: output byte array for rho | |||
* - const polyveck *t1: pointer to output vector t1 | |||
* - unsigned char pk[]: byte array containing bit-packed pk | |||
* - uint8_t pk[]: byte array containing bit-packed pk | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_unpack_pk(unsigned char rho[SEEDBYTES], | |||
polyveck *t1, | |||
const unsigned char pk[CRYPTO_PUBLICKEYBYTES]) { | |||
void PQCLEAN_DILITHIUM3_CLEAN_unpack_pk( | |||
uint8_t *rho, | |||
polyveck *t1, | |||
const uint8_t *pk) { | |||
unsigned int i; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
@@ -52,25 +54,26 @@ void PQCLEAN_DILITHIUM3_CLEAN_unpack_pk(unsigned char rho[SEEDBYTES], | |||
} | |||
/************************************************* | |||
* Name: pack_sk | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_pack_sk | |||
* | |||
* Description: Bit-pack secret key sk = (rho, key, tr, s1, s2, t0). | |||
* | |||
* Arguments: - unsigned char sk[]: output byte array | |||
* - const unsigned char rho[]: byte array containing rho | |||
* - const unsigned char key[]: byte array containing key | |||
* - const unsigned char tr[]: byte array containing tr | |||
* Arguments: - uint8_t sk[]: output byte array | |||
* - const uint8_t rho[]: byte array containing rho | |||
* - const uint8_t key[]: byte array containing key | |||
* - const uint8_t tr[]: byte array containing tr | |||
* - const polyvecl *s1: pointer to vector s1 | |||
* - const polyveck *s2: pointer to vector s2 | |||
* - const polyveck *t0: pointer to vector t0 | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_pack_sk(unsigned char sk[CRYPTO_SECRETKEYBYTES], | |||
const unsigned char rho[SEEDBYTES], | |||
const unsigned char key[SEEDBYTES], | |||
const unsigned char tr[CRHBYTES], | |||
const polyvecl *s1, | |||
const polyveck *s2, | |||
const polyveck *t0) { | |||
void PQCLEAN_DILITHIUM3_CLEAN_pack_sk( | |||
uint8_t *sk, | |||
const uint8_t *rho, | |||
const uint8_t *key, | |||
const uint8_t *tr, | |||
const polyvecl *s1, | |||
const polyveck *s2, | |||
const polyveck *t0) { | |||
unsigned int i; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
@@ -104,25 +107,26 @@ void PQCLEAN_DILITHIUM3_CLEAN_pack_sk(unsigned char sk[CRYPTO_SECRETKEYBYTES], | |||
} | |||
/************************************************* | |||
* Name: unpack_sk | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_unpack_sk | |||
* | |||
* Description: Unpack secret key sk = (rho, key, tr, s1, s2, t0). | |||
* | |||
* Arguments: - const unsigned char rho[]: output byte array for rho | |||
* - const unsigned char key[]: output byte array for key | |||
* - const unsigned char tr[]: output byte array for tr | |||
* Arguments: - const uint8_t rho[]: output byte array for rho | |||
* - const uint8_t key[]: output byte array for key | |||
* - const uint8_t tr[]: output byte array for tr | |||
* - const polyvecl *s1: pointer to output vector s1 | |||
* - const polyveck *s2: pointer to output vector s2 | |||
* - const polyveck *r0: pointer to output vector t0 | |||
* - unsigned char sk[]: byte array containing bit-packed sk | |||
* - uint8_t sk[]: byte array containing bit-packed sk | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_unpack_sk(unsigned char rho[SEEDBYTES], | |||
unsigned char key[SEEDBYTES], | |||
unsigned char tr[CRHBYTES], | |||
polyvecl *s1, | |||
polyveck *s2, | |||
polyveck *t0, | |||
const unsigned char sk[CRYPTO_SECRETKEYBYTES]) { | |||
void PQCLEAN_DILITHIUM3_CLEAN_unpack_sk( | |||
uint8_t *rho, | |||
uint8_t *key, | |||
uint8_t *tr, | |||
polyvecl *s1, | |||
polyveck *s2, | |||
polyveck *t0, | |||
const uint8_t *sk) { | |||
unsigned int i; | |||
for (i = 0; i < SEEDBYTES; ++i) { | |||
@@ -156,19 +160,20 @@ void PQCLEAN_DILITHIUM3_CLEAN_unpack_sk(unsigned char rho[SEEDBYTES], | |||
} | |||
/************************************************* | |||
* Name: pack_sig | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_pack_sig | |||
* | |||
* Description: Bit-pack signature sig = (z, h, c). | |||
* | |||
* Arguments: - unsigned char sig[]: output byte array | |||
* Arguments: - uint8_t sig[]: output byte array | |||
* - const polyvecl *z: pointer to vector z | |||
* - const polyveck *h: pointer to hint vector h | |||
* - const poly *c: pointer to challenge polynomial | |||
* - const poly *c: pointer to PQCLEAN_DILITHIUM3_CLEAN_challenge polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_pack_sig(unsigned char sig[CRYPTO_BYTES], | |||
const polyvecl *z, | |||
const polyveck *h, | |||
const poly *c) { | |||
void PQCLEAN_DILITHIUM3_CLEAN_pack_sig( | |||
uint8_t *sig, | |||
const polyvecl *z, | |||
const polyveck *h, | |||
const poly *c) { | |||
unsigned int i, j, k; | |||
uint64_t signs, mask; | |||
@@ -182,10 +187,11 @@ void PQCLEAN_DILITHIUM3_CLEAN_pack_sig(unsigned char sig[CRYPTO_BYTES], | |||
for (i = 0; i < K; ++i) { | |||
for (j = 0; j < N; ++j) { | |||
if (h->vec[i].coeffs[j] != 0) { | |||
sig[k++] = (unsigned char) j; | |||
sig[k++] = (uint8_t)j; | |||
} | |||
} | |||
sig[OMEGA + i] = (unsigned char) k; | |||
sig[OMEGA + i] = (uint8_t)k; | |||
} | |||
while (k < OMEGA) { | |||
sig[k++] = 0; | |||
@@ -199,7 +205,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_pack_sig(unsigned char sig[CRYPTO_BYTES], | |||
sig[i] = 0; | |||
for (j = 0; j < 8; ++j) { | |||
if (c->coeffs[8 * i + j] != 0) { | |||
sig[i] |= (unsigned char) (1U << j); | |||
sig[i] |= (uint8_t)(1u << j); | |||
if (c->coeffs[8 * i + j] == (Q - 1)) { | |||
signs |= mask; | |||
} | |||
@@ -209,27 +215,28 @@ void PQCLEAN_DILITHIUM3_CLEAN_pack_sig(unsigned char sig[CRYPTO_BYTES], | |||
} | |||
sig += N / 8; | |||
for (i = 0; i < 8; ++i) { | |||
sig[i] = (unsigned char) (signs >> 8 * i); | |||
sig[i] = (uint8_t)(signs >> 8u * i); | |||
} | |||
} | |||
/************************************************* | |||
* Name: unpack_sig | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_unpack_sig | |||
* | |||
* Description: Unpack signature sig = (z, h, c). | |||
* | |||
* Arguments: - polyvecl *z: pointer to output vector z | |||
* - polyveck *h: pointer to output hint vector h | |||
* - poly *c: pointer to output challenge polynomial | |||
* - const unsigned char sig[]: byte array containing | |||
* - poly *c: pointer to output PQCLEAN_DILITHIUM3_CLEAN_challenge polynomial | |||
* - const uint8_t sig[]: byte array containing | |||
* bit-packed signature | |||
* | |||
* Returns 1 in case of malformed signature; otherwise 0. | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM3_CLEAN_unpack_sig(polyvecl *z, | |||
polyveck *h, | |||
poly *c, | |||
const unsigned char sig[CRYPTO_BYTES]) { | |||
int PQCLEAN_DILITHIUM3_CLEAN_unpack_sig( | |||
polyvecl *z, | |||
polyveck *h, | |||
poly *c, | |||
const uint8_t *sig) { | |||
unsigned int i, j, k; | |||
uint64_t signs; | |||
@@ -266,6 +273,7 @@ int PQCLEAN_DILITHIUM3_CLEAN_unpack_sig(polyvecl *z, | |||
return 1; | |||
} | |||
} | |||
sig += OMEGA + K; | |||
/* Decode c */ | |||
@@ -1,31 +1,36 @@ | |||
#ifndef PACKING_H | |||
#define PACKING_H | |||
#ifndef PQCLEAN_DILITHIUM3_CLEAN_PACKING_H | |||
#define PQCLEAN_DILITHIUM3_CLEAN_PACKING_H | |||
#include "params.h" | |||
#include "polyvec.h" | |||
void PQCLEAN_DILITHIUM3_CLEAN_pack_pk(unsigned char pk[CRYPTO_PUBLICKEYBYTES], | |||
const unsigned char rho[SEEDBYTES], const polyveck *t1); | |||
void PQCLEAN_DILITHIUM3_CLEAN_pack_sk(unsigned char sk[CRYPTO_SECRETKEYBYTES], | |||
const unsigned char rho[SEEDBYTES], | |||
const unsigned char key[SEEDBYTES], | |||
const unsigned char tr[CRHBYTES], | |||
const polyvecl *s1, | |||
const polyveck *s2, | |||
const polyveck *t0); | |||
void PQCLEAN_DILITHIUM3_CLEAN_pack_sig(unsigned char sig[CRYPTO_BYTES], | |||
const polyvecl *z, const polyveck *h, const poly *c); | |||
void PQCLEAN_DILITHIUM3_CLEAN_pack_pk( | |||
uint8_t *pk, | |||
const uint8_t *rho, const polyveck *t1); | |||
void PQCLEAN_DILITHIUM3_CLEAN_pack_sk( | |||
uint8_t *sk, | |||
const uint8_t *rho, | |||
const uint8_t *key, | |||
const uint8_t *tr, | |||
const polyvecl *s1, | |||
const polyveck *s2, | |||
const polyveck *t0); | |||
void PQCLEAN_DILITHIUM3_CLEAN_pack_sig( | |||
uint8_t *sig, | |||
const polyvecl *z, const polyveck *h, const poly *c); | |||
void PQCLEAN_DILITHIUM3_CLEAN_unpack_pk(unsigned char rho[SEEDBYTES], polyveck *t1, | |||
const unsigned char pk[CRYPTO_PUBLICKEYBYTES]); | |||
void PQCLEAN_DILITHIUM3_CLEAN_unpack_sk(unsigned char rho[SEEDBYTES], | |||
unsigned char key[SEEDBYTES], | |||
unsigned char tr[CRHBYTES], | |||
polyvecl *s1, | |||
polyveck *s2, | |||
polyveck *t0, | |||
const unsigned char sk[CRYPTO_SECRETKEYBYTES]); | |||
int PQCLEAN_DILITHIUM3_CLEAN_unpack_sig(polyvecl *z, polyveck *h, poly *c, | |||
const unsigned char sig[CRYPTO_BYTES]); | |||
void PQCLEAN_DILITHIUM3_CLEAN_unpack_pk( | |||
uint8_t *rho, polyveck *t1, | |||
const uint8_t *pk); | |||
void PQCLEAN_DILITHIUM3_CLEAN_unpack_sk( | |||
uint8_t *rho, | |||
uint8_t *key, | |||
uint8_t *tr, | |||
polyvecl *s1, | |||
polyveck *s2, | |||
polyveck *t0, | |||
const uint8_t *sk); | |||
int PQCLEAN_DILITHIUM3_CLEAN_unpack_sig( | |||
polyvecl *z, polyveck *h, poly *c, const uint8_t *sig); | |||
#endif |
@@ -1,18 +1,17 @@ | |||
#ifndef PARAMS_H | |||
#define PARAMS_H | |||
#ifndef PQCLEAN_DILITHIUM3_CLEAN_PARAMS_H | |||
#define PQCLEAN_DILITHIUM3_CLEAN_PARAMS_H | |||
#define SEEDBYTES 32 | |||
#define CRHBYTES 48 | |||
#define N 256 | |||
#define Q 8380417 | |||
#define QBITS 23 | |||
#define ROOT_OF_UNITY 1753 | |||
#define D 14 | |||
#define GAMMA1 ((Q - 1)/16) | |||
#define GAMMA2 (GAMMA1/2) | |||
#define ALPHA (2*GAMMA2) | |||
// DilithiumIII parameters | |||
#define K 5 | |||
#define L 4 | |||
#define ETA 5 | |||
@@ -20,6 +19,7 @@ | |||
#define BETA 275 | |||
#define OMEGA 96 | |||
#define POLT1_SIZE_PACKED ((N*(QBITS - D))/8) | |||
#define POLT0_SIZE_PACKED ((N*D)/8) | |||
#define POLETA_SIZE_PACKED ((N*SETABITS)/8) | |||
@@ -1,10 +1,11 @@ | |||
#include <stdint.h> | |||
#include "ntt.h" | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "reduce.h" | |||
#include "rounding.h" | |||
#include "symmetric.h" | |||
#include <stdint.h> | |||
/************************************************* | |||
@@ -16,8 +17,7 @@ | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_poly_reduce(poly *a) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
a->coeffs[i] = PQCLEAN_DILITHIUM3_CLEAN_reduce32(a->coeffs[i]); | |||
} | |||
} | |||
@@ -31,8 +31,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_poly_reduce(poly *a) { | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_poly_csubq(poly *a) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
a->coeffs[i] = PQCLEAN_DILITHIUM3_CLEAN_csubq(a->coeffs[i]); | |||
} | |||
} | |||
@@ -46,8 +45,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_poly_csubq(poly *a) { | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_poly_freeze(poly *a) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
a->coeffs[i] = PQCLEAN_DILITHIUM3_CLEAN_freeze(a->coeffs[i]); | |||
} | |||
} | |||
@@ -61,9 +59,8 @@ void PQCLEAN_DILITHIUM3_CLEAN_poly_freeze(poly *a) { | |||
* - const poly *a: pointer to first summand | |||
* - const poly *b: pointer to second summand | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_poly_add(poly *c, const poly *a, const poly *b) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
void PQCLEAN_DILITHIUM3_CLEAN_poly_add(poly *c, const poly *a, const poly *b) { | |||
for (size_t i = 0; i < N; ++i) { | |||
c->coeffs[i] = a->coeffs[i] + b->coeffs[i]; | |||
} | |||
} | |||
@@ -78,11 +75,10 @@ void PQCLEAN_DILITHIUM3_CLEAN_poly_add(poly *c, const poly *a, const poly *b) { | |||
* Arguments: - poly *c: pointer to output polynomial | |||
* - const poly *a: pointer to first input polynomial | |||
* - const poly *b: pointer to second input polynomial to be | |||
* subtraced from first input polynomial | |||
* subtracted from first input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_poly_sub(poly *c, const poly *a, const poly *b) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
c->coeffs[i] = a->coeffs[i] + 2 * Q - b->coeffs[i]; | |||
} | |||
} | |||
@@ -96,8 +92,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_poly_sub(poly *c, const poly *a, const poly *b) { | |||
* Arguments: - poly *a: pointer to input/output polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_poly_shiftl(poly *a) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
a->coeffs[i] <<= D; | |||
} | |||
} | |||
@@ -139,9 +134,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_poly_invntt_montgomery(poly *a) { | |||
* - const poly *b: pointer to second input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_poly_pointwise_invmontgomery(poly *c, const poly *a, const poly *b) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
c->coeffs[i] = PQCLEAN_DILITHIUM3_CLEAN_montgomery_reduce((uint64_t)a->coeffs[i] * b->coeffs[i]); | |||
} | |||
@@ -160,12 +153,9 @@ void PQCLEAN_DILITHIUM3_CLEAN_poly_pointwise_invmontgomery(poly *c, const poly * | |||
* - const poly *v: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_poly_power2round(poly *a1, poly *a0, const poly *a) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
a1->coeffs[i] = PQCLEAN_DILITHIUM3_CLEAN_power2round(a->coeffs[i], &a0->coeffs[i]); | |||
} | |||
} | |||
/************************************************* | |||
@@ -182,12 +172,9 @@ void PQCLEAN_DILITHIUM3_CLEAN_poly_power2round(poly *a1, poly *a0, const poly *a | |||
* - const poly *c: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_poly_decompose(poly *a1, poly *a0, const poly *a) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
a1->coeffs[i] = PQCLEAN_DILITHIUM3_CLEAN_decompose(a->coeffs[i], &a0->coeffs[i]); | |||
} | |||
} | |||
/************************************************* | |||
@@ -204,13 +191,11 @@ void PQCLEAN_DILITHIUM3_CLEAN_poly_decompose(poly *a1, poly *a0, const poly *a) | |||
* Returns number of 1 bits. | |||
**************************************************/ | |||
unsigned int PQCLEAN_DILITHIUM3_CLEAN_poly_make_hint(poly *h, const poly *a0, const poly *a1) { | |||
unsigned int i, s = 0; | |||
for (i = 0; i < N; ++i) { | |||
unsigned int s = 0; | |||
for (size_t i = 0; i < N; ++i) { | |||
h->coeffs[i] = PQCLEAN_DILITHIUM3_CLEAN_make_hint(a0->coeffs[i], a1->coeffs[i]); | |||
s += h->coeffs[i]; | |||
} | |||
return s; | |||
} | |||
@@ -224,12 +209,9 @@ unsigned int PQCLEAN_DILITHIUM3_CLEAN_poly_make_hint(poly *h, const poly *a0, co | |||
* - const poly *h: pointer to input hint polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_poly_use_hint(poly *a, const poly *b, const poly *h) { | |||
unsigned int i; | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
a->coeffs[i] = PQCLEAN_DILITHIUM3_CLEAN_use_hint(b->coeffs[i], h->coeffs[i]); | |||
} | |||
} | |||
/************************************************* | |||
@@ -244,15 +226,13 @@ void PQCLEAN_DILITHIUM3_CLEAN_poly_use_hint(poly *a, const poly *b, const poly * | |||
* Returns 0 if norm is strictly smaller than B and 1 otherwise. | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM3_CLEAN_poly_chknorm(const poly *a, uint32_t B) { | |||
unsigned int i; | |||
int32_t t; | |||
/* It is ok to leak which coefficient violates the bound since | |||
the probability for each coefficient is independent of secret | |||
data but we must not leak the sign of the centralized representative. */ | |||
for (i = 0; i < N; ++i) { | |||
for (size_t i = 0; i < N; ++i) { | |||
/* Absolute value of centralized representative */ | |||
t = (int32_t) ((Q - 1) / 2 - a->coeffs[i]); | |||
t = (int32_t)((Q - 1) / 2 - a->coeffs[i]); | |||
t ^= (t >> 31); | |||
t = (Q - 1) / 2 - t; | |||
@@ -260,7 +240,6 @@ int PQCLEAN_DILITHIUM3_CLEAN_poly_chknorm(const poly *a, uint32_t B) { | |||
return 1; | |||
} | |||
} | |||
return 0; | |||
} | |||
@@ -272,7 +251,7 @@ int PQCLEAN_DILITHIUM3_CLEAN_poly_chknorm(const poly *a, uint32_t B) { | |||
* | |||
* Arguments: - uint32_t *a: pointer to output array (allocated) | |||
* - unsigned int len: number of coefficients to be sampled | |||
* - const unsigned char *buf: array of random bytes | |||
* - const uint8_t *buf: array of random bytes | |||
* - unsigned int buflen: length of array of random bytes | |||
* | |||
* Returns number of sampled coefficients. Can be smaller than len if not enough | |||
@@ -280,8 +259,8 @@ int PQCLEAN_DILITHIUM3_CLEAN_poly_chknorm(const poly *a, uint32_t B) { | |||
**************************************************/ | |||
static unsigned int rej_uniform(uint32_t *a, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
unsigned int buflen) { | |||
const uint8_t *buf, | |||
size_t buflen) { | |||
unsigned int ctr, pos; | |||
uint32_t t; | |||
@@ -308,19 +287,20 @@ static unsigned int rej_uniform(uint32_t *a, | |||
* output stream from SHAKE256(seed|nonce). | |||
* | |||
* Arguments: - poly *a: pointer to output polynomial | |||
* - const unsigned char seed[]: byte array with seed of length | |||
* - const uint8_t seed[]: byte array with seed of length | |||
* SEEDBYTES | |||
* - uint16_t nonce: 2-byte nonce | |||
**************************************************/ | |||
#define POLY_UNIFORM_NBLOCKS ((769 + STREAM128_BLOCKBYTES)/STREAM128_BLOCKBYTES) | |||
#define POLY_UNIFORM_BUFLEN (POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES) | |||
#define POLY_UNIFORM_NBLOCKS ((769 + STREAM128_BLOCKBYTES) / STREAM128_BLOCKBYTES) | |||
#define POLY_UNIFORM_BUFLEN (POLY_UNIFORM_NBLOCKS * STREAM128_BLOCKBYTES) | |||
void PQCLEAN_DILITHIUM3_CLEAN_poly_uniform(poly *a, | |||
const unsigned char seed[SEEDBYTES], | |||
const uint8_t seed[SEEDBYTES], | |||
uint16_t nonce) { | |||
unsigned int i, ctr, off; | |||
unsigned int buflen = POLY_UNIFORM_BUFLEN; | |||
unsigned char buf[POLY_UNIFORM_BUFLEN + 2]; | |||
shake128ctx state; | |||
unsigned int i, ctr; | |||
size_t buflen = POLY_UNIFORM_BUFLEN; | |||
uint8_t buf[POLY_UNIFORM_BUFLEN + 2]; | |||
stream128_state state; | |||
size_t off; | |||
stream128_init(&state, seed, nonce); | |||
stream128_squeezeblocks(buf, POLY_UNIFORM_NBLOCKS, &state); | |||
@@ -347,7 +327,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_poly_uniform(poly *a, | |||
* | |||
* Arguments: - uint32_t *a: pointer to output array (allocated) | |||
* - unsigned int len: number of coefficients to be sampled | |||
* - const unsigned char *buf: array of random bytes | |||
* - const uint8_t *buf: array of random bytes | |||
* - unsigned int buflen: length of array of random bytes | |||
* | |||
* Returns number of sampled coefficients. Can be smaller than len if not enough | |||
@@ -355,7 +335,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_poly_uniform(poly *a, | |||
**************************************************/ | |||
static unsigned int rej_eta(uint32_t *a, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
const uint8_t *buf, | |||
unsigned int buflen) { | |||
unsigned int ctr, pos; | |||
uint32_t t0, t1; | |||
@@ -384,19 +364,18 @@ static unsigned int rej_eta(uint32_t *a, | |||
* output stream from SHAKE256(seed|nonce). | |||
* | |||
* Arguments: - poly *a: pointer to output polynomial | |||
* - const unsigned char seed[]: byte array with seed of length | |||
* - const uint8_t seed[]: byte array with seed of length | |||
* SEEDBYTES | |||
* - uint16_t nonce: 2-byte nonce | |||
**************************************************/ | |||
#define POLY_UNIFORM_ETA_NBLOCKS (((N/2 * (1U << SETABITS)) / (2*ETA + 1)\ | |||
+ STREAM128_BLOCKBYTES) / STREAM128_BLOCKBYTES) | |||
#define POLY_UNIFORM_ETA_NBLOCKS (((N / 2 * (1u << SETABITS)) / (2 * ETA + 1) + STREAM128_BLOCKBYTES) / STREAM128_BLOCKBYTES) | |||
#define POLY_UNIFORM_ETA_BUFLEN (POLY_UNIFORM_ETA_NBLOCKS*STREAM128_BLOCKBYTES) | |||
void PQCLEAN_DILITHIUM3_CLEAN_poly_uniform_eta(poly *a, | |||
const unsigned char seed[SEEDBYTES], | |||
const uint8_t *seed, | |||
uint16_t nonce) { | |||
unsigned int ctr; | |||
unsigned char buf[POLY_UNIFORM_ETA_BUFLEN]; | |||
shake128ctx state; | |||
uint8_t buf[POLY_UNIFORM_ETA_BUFLEN]; | |||
stream128_state state; | |||
stream128_init(&state, seed, nonce); | |||
stream128_squeezeblocks(buf, POLY_UNIFORM_ETA_NBLOCKS, &state); | |||
@@ -418,7 +397,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_poly_uniform_eta(poly *a, | |||
* | |||
* Arguments: - uint32_t *a: pointer to output array (allocated) | |||
* - unsigned int len: number of coefficients to be sampled | |||
* - const unsigned char *buf: array of random bytes | |||
* - const uint8_t *buf: array of random bytes | |||
* - unsigned int buflen: length of array of random bytes | |||
* | |||
* Returns number of sampled coefficients. Can be smaller than len if not enough | |||
@@ -426,7 +405,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_poly_uniform_eta(poly *a, | |||
**************************************************/ | |||
static unsigned int rej_gamma1m1(uint32_t *a, | |||
unsigned int len, | |||
const unsigned char *buf, | |||
const uint8_t *buf, | |||
unsigned int buflen) { | |||
unsigned int ctr, pos; | |||
uint32_t t0, t1; | |||
@@ -438,7 +417,7 @@ static unsigned int rej_gamma1m1(uint32_t *a, | |||
t0 |= (uint32_t)buf[pos + 2] << 16; | |||
t0 &= 0xFFFFF; | |||
t1 = buf[pos + 2] >> 4; | |||
t1 = buf[pos + 2] >> 4; | |||
t1 |= (uint32_t)buf[pos + 3] << 4; | |||
t1 |= (uint32_t)buf[pos + 4] << 12; | |||
@@ -463,19 +442,19 @@ static unsigned int rej_gamma1m1(uint32_t *a, | |||
* sampling on output stream of SHAKE256(seed|nonce). | |||
* | |||
* Arguments: - poly *a: pointer to output polynomial | |||
* - const unsigned char seed[]: byte array with seed of length | |||
* - const uint8_t seed[]: byte array with seed of length | |||
* CRHBYTES | |||
* - uint16_t nonce: 16-bit nonce | |||
**************************************************/ | |||
#define POLY_UNIFORM_GAMMA1M1_NBLOCKS ((641 + STREAM256_BLOCKBYTES) / STREAM256_BLOCKBYTES) | |||
#define POLY_UNIFORM_GAMMA1M1_BUFLEN (POLY_UNIFORM_GAMMA1M1_NBLOCKS * STREAM256_BLOCKBYTES) | |||
void PQCLEAN_DILITHIUM3_CLEAN_poly_uniform_gamma1m1(poly *a, | |||
const unsigned char seed[CRHBYTES], | |||
const uint8_t seed[CRHBYTES], | |||
uint16_t nonce) { | |||
unsigned int i, ctr, off; | |||
unsigned int buflen = POLY_UNIFORM_GAMMA1M1_BUFLEN; | |||
unsigned char buf[POLY_UNIFORM_GAMMA1M1_BUFLEN + 4]; | |||
shake256ctx state; | |||
uint8_t buf[POLY_UNIFORM_GAMMA1M1_BUFLEN + 4]; | |||
stream256_state state; | |||
stream256_init(&state, seed, nonce); | |||
stream256_squeezeblocks(buf, POLY_UNIFORM_GAMMA1M1_NBLOCKS, &state); | |||
@@ -500,18 +479,18 @@ void PQCLEAN_DILITHIUM3_CLEAN_poly_uniform_gamma1m1(poly *a, | |||
* Description: Bit-pack polynomial with coefficients in [-ETA,ETA]. | |||
* Input coefficients are assumed to lie in [Q-ETA,Q+ETA]. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* Arguments: - uint8_t *r: pointer to output byte array with at least | |||
* POLETA_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyeta_pack(unsigned char *r, const poly *a) { | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyeta_pack(uint8_t *r, const poly *a) { | |||
unsigned int i; | |||
unsigned char t[8]; | |||
uint8_t t[8]; | |||
for (i = 0; i < N / 2; ++i) { | |||
t[0] = (uint8_t) (Q + ETA - a->coeffs[2 * i + 0]); | |||
t[1] = (uint8_t) (Q + ETA - a->coeffs[2 * i + 1]); | |||
r[i] = (uint8_t) (t[0] | (t[1] << 4)); | |||
t[0] = (uint8_t)(Q + ETA - a->coeffs[2 * i + 0]); | |||
t[1] = (uint8_t)(Q + ETA - a->coeffs[2 * i + 1]); | |||
r[i] = (uint8_t)(t[0] | (t[1] << 4)); | |||
} | |||
} | |||
@@ -522,68 +501,67 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyeta_pack(unsigned char *r, const poly *a) { | |||
* Output coefficients lie in [Q-ETA,Q+ETA]. | |||
* | |||
* Arguments: - poly *r: pointer to output polynomial | |||
* - const unsigned char *a: byte array with bit-packed polynomial | |||
* - const uint8_t *a: byte array with bit-packed polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyeta_unpack(poly *r, const unsigned char *a) { | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyeta_unpack(poly *r, const uint8_t *a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 2; ++i) { | |||
r->coeffs[2 * i + 0] = a[i] & 0x0F; | |||
r->coeffs[2 * i + 1] = a[i] >> 4; | |||
r->coeffs[2 * i + 0] = Q + ETA - r->coeffs[2 * i + 0]; | |||
r->coeffs[2 * i + 1] = Q + ETA - r->coeffs[2 * i + 1]; | |||
} | |||
} | |||
/************************************************* | |||
* Name: polyt1_pack | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyt1_pack | |||
* | |||
* Description: Bit-pack polynomial t1 with coefficients fitting in 9 bits. | |||
* Input coefficients are assumed to be standard representatives. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* Arguments: - uint8_t *r: pointer to output byte array with at least | |||
* POLT1_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyt1_pack(unsigned char *r, const poly *a) { | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyt1_pack(uint8_t *r, const poly *a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 8; ++i) { | |||
r[9 * i + 0] = (uint8_t) ((a->coeffs[8 * i + 0] >> 0)); | |||
r[9 * i + 1] = (uint8_t) ((a->coeffs[8 * i + 0] >> 8) | (a->coeffs[8 * i + 1] << 1)); | |||
r[9 * i + 2] = (uint8_t) ((a->coeffs[8 * i + 1] >> 7) | (a->coeffs[8 * i + 2] << 2)); | |||
r[9 * i + 3] = (uint8_t) ((a->coeffs[8 * i + 2] >> 6) | (a->coeffs[8 * i + 3] << 3)); | |||
r[9 * i + 4] = (uint8_t) ((a->coeffs[8 * i + 3] >> 5) | (a->coeffs[8 * i + 4] << 4)); | |||
r[9 * i + 5] = (uint8_t) ((a->coeffs[8 * i + 4] >> 4) | (a->coeffs[8 * i + 5] << 5)); | |||
r[9 * i + 6] = (uint8_t) ((a->coeffs[8 * i + 5] >> 3) | (a->coeffs[8 * i + 6] << 6)); | |||
r[9 * i + 7] = (uint8_t) ((a->coeffs[8 * i + 6] >> 2) | (a->coeffs[8 * i + 7] << 7)); | |||
r[9 * i + 8] = (uint8_t) ((a->coeffs[8 * i + 7] >> 1)); | |||
r[9 * i + 0] = (uint8_t)((a->coeffs[8 * i + 0] >> 0)); | |||
r[9 * i + 1] = (uint8_t)((a->coeffs[8 * i + 0] >> 8) | (a->coeffs[8 * i + 1] << 1)); | |||
r[9 * i + 2] = (uint8_t)((a->coeffs[8 * i + 1] >> 7) | (a->coeffs[8 * i + 2] << 2)); | |||
r[9 * i + 3] = (uint8_t)((a->coeffs[8 * i + 2] >> 6) | (a->coeffs[8 * i + 3] << 3)); | |||
r[9 * i + 4] = (uint8_t)((a->coeffs[8 * i + 3] >> 5) | (a->coeffs[8 * i + 4] << 4)); | |||
r[9 * i + 5] = (uint8_t)((a->coeffs[8 * i + 4] >> 4) | (a->coeffs[8 * i + 5] << 5)); | |||
r[9 * i + 6] = (uint8_t)((a->coeffs[8 * i + 5] >> 3) | (a->coeffs[8 * i + 6] << 6)); | |||
r[9 * i + 7] = (uint8_t)((a->coeffs[8 * i + 6] >> 2) | (a->coeffs[8 * i + 7] << 7)); | |||
r[9 * i + 8] = (uint8_t)((a->coeffs[8 * i + 7] >> 1)); | |||
} | |||
} | |||
/************************************************* | |||
* Name: polyt1_unpack | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyt1_unpack | |||
* | |||
* Description: Unpack polynomial t1 with 9-bit coefficients. | |||
* Output coefficients are standard representatives. | |||
* | |||
* Arguments: - poly *r: pointer to output polynomial | |||
* - const unsigned char *a: byte array with bit-packed polynomial | |||
* - const uint8_t *a: byte array with bit-packed polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyt1_unpack(poly *r, const unsigned char *a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 8; ++i) { | |||
r->coeffs[8 * i + 0] = ((a[9 * i + 0] ) | ((uint32_t)a[9 * i + 1] << 8)) & 0x1FF; | |||
r->coeffs[8 * i + 1] = ((a[9 * i + 1] >> 1) | ((uint32_t)a[9 * i + 2] << 7)) & 0x1FF; | |||
r->coeffs[8 * i + 2] = ((a[9 * i + 2] >> 2) | ((uint32_t)a[9 * i + 3] << 6)) & 0x1FF; | |||
r->coeffs[8 * i + 3] = ((a[9 * i + 3] >> 3) | ((uint32_t)a[9 * i + 4] << 5)) & 0x1FF; | |||
r->coeffs[8 * i + 4] = ((a[9 * i + 4] >> 4) | ((uint32_t)a[9 * i + 5] << 4)) & 0x1FF; | |||
r->coeffs[8 * i + 5] = ((a[9 * i + 5] >> 5) | ((uint32_t)a[9 * i + 6] << 3)) & 0x1FF; | |||
r->coeffs[8 * i + 6] = ((a[9 * i + 6] >> 6) | ((uint32_t)a[9 * i + 7] << 2)) & 0x1FF; | |||
r->coeffs[8 * i + 7] = ((a[9 * i + 7] >> 7) | ((uint32_t)a[9 * i + 8] << 1)) & 0x1FF; | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyt1_unpack(poly *r, const uint8_t *a) { | |||
for (size_t i = 0; i < N / 8; ++i) { | |||
r->coeffs[8 * i + 0] = ((a[9 * i + 0] ) | ((uint32_t) a[9 * i + 1] << 8)) & 0x1FF; | |||
r->coeffs[8 * i + 1] = ((a[9 * i + 1] >> 1) | ((uint32_t) a[9 * i + 2] << 7)) & 0x1FF; | |||
r->coeffs[8 * i + 2] = ((a[9 * i + 2] >> 2) | ((uint32_t) a[9 * i + 3] << 6)) & 0x1FF; | |||
r->coeffs[8 * i + 3] = ((a[9 * i + 3] >> 3) | ((uint32_t) a[9 * i + 4] << 5)) & 0x1FF; | |||
r->coeffs[8 * i + 4] = ((a[9 * i + 4] >> 4) | ((uint32_t) a[9 * i + 5] << 4)) & 0x1FF; | |||
r->coeffs[8 * i + 5] = ((a[9 * i + 5] >> 5) | ((uint32_t) a[9 * i + 6] << 3)) & 0x1FF; | |||
r->coeffs[8 * i + 6] = ((a[9 * i + 6] >> 6) | ((uint32_t) a[9 * i + 7] << 2)) & 0x1FF; | |||
r->coeffs[8 * i + 7] = ((a[9 * i + 7] >> 7) | ((uint32_t) a[9 * i + 8] << 1)) & 0x1FF; | |||
} | |||
} | |||
/************************************************* | |||
@@ -592,32 +570,30 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyt1_unpack(poly *r, const unsigned char *a) { | |||
* Description: Bit-pack polynomial t0 with coefficients in ]-2^{D-1}, 2^{D-1}]. | |||
* Input coefficients are assumed to lie in ]Q-2^{D-1}, Q+2^{D-1}]. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* Arguments: - uint8_t *r: pointer to output byte array with at least | |||
* POLT0_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyt0_pack(unsigned char *r, const poly *a) { | |||
unsigned int i; | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyt0_pack(uint8_t *r, const poly *a) { | |||
uint32_t t[4]; | |||
for (i = 0; i < N / 4; ++i) { | |||
t[0] = Q + (1U << (D - 1)) - a->coeffs[4 * i + 0]; | |||
t[1] = Q + (1U << (D - 1)) - a->coeffs[4 * i + 1]; | |||
t[2] = Q + (1U << (D - 1)) - a->coeffs[4 * i + 2]; | |||
t[3] = Q + (1U << (D - 1)) - a->coeffs[4 * i + 3]; | |||
r[7 * i + 0] = (uint8_t) (t[0]); | |||
r[7 * i + 1] = (uint8_t) (t[0] >> 8); | |||
r[7 * i + 1] |= (uint8_t) (t[1] << 6); | |||
r[7 * i + 2] = (uint8_t) (t[1] >> 2); | |||
r[7 * i + 3] = (uint8_t) (t[1] >> 10); | |||
r[7 * i + 3] |= (uint8_t) (t[2] << 4); | |||
r[7 * i + 4] = (uint8_t) (t[2] >> 4); | |||
r[7 * i + 5] = (uint8_t) (t[2] >> 12); | |||
r[7 * i + 5] |= (uint8_t) (t[3] << 2); | |||
r[7 * i + 6] = (uint8_t) (t[3] >> 6); | |||
for (size_t i = 0; i < N / 4; ++i) { | |||
t[0] = Q + (1u << (D - 1)) - a->coeffs[4 * i + 0]; | |||
t[1] = Q + (1u << (D - 1)) - a->coeffs[4 * i + 1]; | |||
t[2] = Q + (1u << (D - 1)) - a->coeffs[4 * i + 2]; | |||
t[3] = Q + (1u << (D - 1)) - a->coeffs[4 * i + 3]; | |||
r[7 * i + 0] = (uint8_t)(t[0]); | |||
r[7 * i + 1] = (uint8_t)(t[0] >> 8); | |||
r[7 * i + 1] |= (uint8_t)(t[1] << 6); | |||
r[7 * i + 2] = (uint8_t)(t[1] >> 2); | |||
r[7 * i + 3] = (uint8_t)(t[1] >> 10); | |||
r[7 * i + 3] |= (uint8_t)(t[2] << 4); | |||
r[7 * i + 4] = (uint8_t)(t[2] >> 4); | |||
r[7 * i + 5] = (uint8_t)(t[2] >> 12); | |||
r[7 * i + 5] |= (uint8_t)(t[3] << 2); | |||
r[7 * i + 6] = (uint8_t)(t[3] >> 6); | |||
} | |||
} | |||
/************************************************* | |||
@@ -627,32 +603,30 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyt0_pack(unsigned char *r, const poly *a) { | |||
* Output coefficients lie in ]Q-2^{D-1},Q+2^{D-1}]. | |||
* | |||
* Arguments: - poly *r: pointer to output polynomial | |||
* - const unsigned char *a: byte array with bit-packed polynomial | |||
* - const uint8_t *a: byte array with bit-packed polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyt0_unpack(poly *r, const unsigned char *a) { | |||
unsigned int i; | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyt0_unpack(poly *r, const uint8_t *a) { | |||
for (i = 0; i < N / 4; ++i) { | |||
r->coeffs[4 * i + 0] = a[7 * i + 0]; | |||
r->coeffs[4 * i + 0] |= (uint32_t)(a[7 * i + 1] & 0x3F) << 8; | |||
for (size_t i = 0; i < N / 4; ++i) { | |||
r->coeffs[4 * i + 0] = a[7 * i + 0]; | |||
r->coeffs[4 * i + 0] |= (uint32_t) (a[7 * i + 1] & 0x3F) << 8; | |||
r->coeffs[4 * i + 1] = a[7 * i + 1] >> 6; | |||
r->coeffs[4 * i + 1] |= (uint32_t)a[7 * i + 2] << 2; | |||
r->coeffs[4 * i + 1] |= (uint32_t)(a[7 * i + 3] & 0x0F) << 10; | |||
r->coeffs[4 * i + 1] = a[7 * i + 1] >> 6; | |||
r->coeffs[4 * i + 1] |= (uint32_t) a[7 * i + 2] << 2; | |||
r->coeffs[4 * i + 1] |= (uint32_t) (a[7 * i + 3] & 0x0F) << 10; | |||
r->coeffs[4 * i + 2] = a[7 * i + 3] >> 4; | |||
r->coeffs[4 * i + 2] |= (uint32_t)a[7 * i + 4] << 4; | |||
r->coeffs[4 * i + 2] |= (uint32_t)(a[7 * i + 5] & 0x03) << 12; | |||
r->coeffs[4 * i + 2] = a[7 * i + 3] >> 4; | |||
r->coeffs[4 * i + 2] |= (uint32_t) a[7 * i + 4] << 4; | |||
r->coeffs[4 * i + 2] |= (uint32_t) (a[7 * i + 5] & 0x03) << 12; | |||
r->coeffs[4 * i + 3] = a[7 * i + 5] >> 2; | |||
r->coeffs[4 * i + 3] |= (uint32_t)a[7 * i + 6] << 6; | |||
r->coeffs[4 * i + 3] = a[7 * i + 5] >> 2; | |||
r->coeffs[4 * i + 3] |= (uint32_t) a[7 * i + 6] << 6; | |||
r->coeffs[4 * i + 0] = Q + (1U << (D - 1)) - r->coeffs[4 * i + 0]; | |||
r->coeffs[4 * i + 1] = Q + (1U << (D - 1)) - r->coeffs[4 * i + 1]; | |||
r->coeffs[4 * i + 2] = Q + (1U << (D - 1)) - r->coeffs[4 * i + 2]; | |||
r->coeffs[4 * i + 3] = Q + (1U << (D - 1)) - r->coeffs[4 * i + 3]; | |||
} | |||
} | |||
/************************************************* | |||
@@ -662,29 +636,27 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyt0_unpack(poly *r, const unsigned char *a) { | |||
* in [-(GAMMA1 - 1), GAMMA1 - 1]. | |||
* Input coefficients are assumed to be standard representatives. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* Arguments: - uint8_t *r: pointer to output byte array with at least | |||
* POLZ_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyz_pack(unsigned char *r, const poly *a) { | |||
unsigned int i; | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyz_pack(uint8_t *r, const poly *a) { | |||
uint32_t t[2]; | |||
for (i = 0; i < N / 2; ++i) { | |||
for (size_t i = 0; i < N / 2; ++i) { | |||
/* Map to {0,...,2*GAMMA1 - 2} */ | |||
t[0] = GAMMA1 - 1 - a->coeffs[2 * i + 0]; | |||
t[0] += ((int32_t)t[0] >> 31) & Q; | |||
t[1] = GAMMA1 - 1 - a->coeffs[2 * i + 1]; | |||
t[1] += ((int32_t)t[1] >> 31) & Q; | |||
r[5 * i + 0] = (uint8_t) (t[0]); | |||
r[5 * i + 1] = (uint8_t) (t[0] >> 8); | |||
r[5 * i + 2] = (uint8_t) (t[0] >> 16); | |||
r[5 * i + 2] |= (uint8_t) (t[1] << 4); | |||
r[5 * i + 3] = (uint8_t) (t[1] >> 4); | |||
r[5 * i + 4] = (uint8_t) (t[1] >> 12); | |||
r[5 * i + 0] = (uint8_t)t[0]; | |||
r[5 * i + 1] = (uint8_t)(t[0] >> 8); | |||
r[5 * i + 2] = (uint8_t)(t[0] >> 16); | |||
r[5 * i + 2] |= (uint8_t)(t[1] << 4); | |||
r[5 * i + 3] = (uint8_t)(t[1] >> 4); | |||
r[5 * i + 4] = (uint8_t)(t[1] >> 12); | |||
} | |||
} | |||
/************************************************* | |||
@@ -695,26 +667,23 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyz_pack(unsigned char *r, const poly *a) { | |||
* Output coefficients are standard representatives. | |||
* | |||
* Arguments: - poly *r: pointer to output polynomial | |||
* - const unsigned char *a: byte array with bit-packed polynomial | |||
* - const uint8_t *a: byte array with bit-packed polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyz_unpack(poly *r, const unsigned char *a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 2; ++i) { | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyz_unpack(poly *r, const uint8_t *a) { | |||
for (size_t i = 0; i < N / 2; ++i) { | |||
r->coeffs[2 * i + 0] = a[5 * i + 0]; | |||
r->coeffs[2 * i + 0] |= (uint32_t)a[5 * i + 1] << 8; | |||
r->coeffs[2 * i + 0] |= (uint32_t)(a[5 * i + 2] & 0x0F) << 16; | |||
r->coeffs[2 * i + 0] |= (uint32_t) a[5 * i + 1] << 8; | |||
r->coeffs[2 * i + 0] |= (uint32_t) (a[5 * i + 2] & 0x0F) << 16; | |||
r->coeffs[2 * i + 1] = a[5 * i + 2] >> 4; | |||
r->coeffs[2 * i + 1] |= (uint32_t)a[5 * i + 3] << 4; | |||
r->coeffs[2 * i + 1] |= (uint32_t)a[5 * i + 4] << 12; | |||
r->coeffs[2 * i + 1] |= (uint32_t) a[5 * i + 3] << 4; | |||
r->coeffs[2 * i + 1] |= (uint32_t) a[5 * i + 4] << 12; | |||
r->coeffs[2 * i + 0] = GAMMA1 - 1 - r->coeffs[2 * i + 0]; | |||
r->coeffs[2 * i + 0] = GAMMA1 - 1 - r->coeffs[2 * i + 0]; | |||
r->coeffs[2 * i + 0] += ((int32_t)r->coeffs[2 * i + 0] >> 31) & Q; | |||
r->coeffs[2 * i + 1] = GAMMA1 - 1 - r->coeffs[2 * i + 1]; | |||
r->coeffs[2 * i + 1] = GAMMA1 - 1 - r->coeffs[2 * i + 1]; | |||
r->coeffs[2 * i + 1] += ((int32_t)r->coeffs[2 * i + 1] >> 31) & Q; | |||
} | |||
} | |||
/************************************************* | |||
@@ -723,15 +692,13 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyz_unpack(poly *r, const unsigned char *a) { | |||
* Description: Bit-pack polynomial w1 with coefficients in [0, 15]. | |||
* Input coefficients are assumed to be standard representatives. | |||
* | |||
* Arguments: - unsigned char *r: pointer to output byte array with at least | |||
* Arguments: - uint8_t *r: pointer to output byte array with at least | |||
* POLW1_SIZE_PACKED bytes | |||
* - const poly *a: pointer to input polynomial | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyw1_pack(unsigned char *r, const poly *a) { | |||
unsigned int i; | |||
for (i = 0; i < N / 2; ++i) { | |||
r[i] = (uint8_t) (a->coeffs[2 * i + 0] | (a->coeffs[2 * i + 1] << 4)); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyw1_pack(uint8_t *r, const poly *a) { | |||
for (size_t i = 0; i < N / 2; ++i) { | |||
r[i] = (uint8_t)(a->coeffs[2 * i + 0] | a->coeffs[2 * i + 1] << 4); | |||
} | |||
} |
@@ -1,9 +1,10 @@ | |||
#ifndef POLY_H | |||
#define POLY_H | |||
#ifndef PQCLEAN_DILITHIUM3_CLEAN_POLY_H | |||
#define PQCLEAN_DILITHIUM3_CLEAN_POLY_H | |||
#include "params.h" | |||
#include <stdint.h> | |||
#include "params.h" | |||
typedef struct { | |||
uint32_t coeffs[N]; | |||
} poly; | |||
@@ -27,27 +28,27 @@ void PQCLEAN_DILITHIUM3_CLEAN_poly_use_hint(poly *a, const poly *b, const poly * | |||
int PQCLEAN_DILITHIUM3_CLEAN_poly_chknorm(const poly *a, uint32_t B); | |||
void PQCLEAN_DILITHIUM3_CLEAN_poly_uniform(poly *a, | |||
const unsigned char seed[SEEDBYTES], | |||
const uint8_t *seed, | |||
uint16_t nonce); | |||
void PQCLEAN_DILITHIUM3_CLEAN_poly_uniform_eta(poly *a, | |||
const unsigned char seed[SEEDBYTES], | |||
const uint8_t *seed, | |||
uint16_t nonce); | |||
void PQCLEAN_DILITHIUM3_CLEAN_poly_uniform_gamma1m1(poly *a, | |||
const unsigned char seed[CRHBYTES], | |||
const uint8_t seed[CRHBYTES], | |||
uint16_t nonce); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyeta_pack(unsigned char *r, const poly *a); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyeta_unpack(poly *r, const unsigned char *a); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyeta_pack(uint8_t *r, const poly *a); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyeta_unpack(poly *r, const uint8_t *a); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyt1_pack(unsigned char *r, const poly *a); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyt1_unpack(poly *r, const unsigned char *a); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyt1_pack(uint8_t *r, const poly *a); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyt1_unpack(poly *r, const uint8_t *a); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyt0_pack(unsigned char *r, const poly *a); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyt0_unpack(poly *r, const unsigned char *a); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyt0_pack(uint8_t *r, const poly *a); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyt0_unpack(poly *r, const uint8_t *a); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyz_pack(unsigned char *r, const poly *a); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyz_unpack(poly *r, const unsigned char *a); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyz_pack(uint8_t *r, const poly *a); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyz_unpack(poly *r, const uint8_t *a); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyw1_pack(unsigned char *r, const poly *a); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyw1_pack(uint8_t *r, const poly *a); | |||
#endif |
@@ -1,14 +1,15 @@ | |||
#include <stdint.h> | |||
#include "params.h" | |||
#include "poly.h" | |||
#include "polyvec.h" | |||
#include <stdint.h> | |||
/**************************************************************/ | |||
/************ Vectors of polynomials of length L **************/ | |||
/**************************************************************/ | |||
/************************************************* | |||
* Name: polyvecl_freeze | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyvecl_freeze | |||
* | |||
* Description: Reduce coefficients of polynomials in vector of length L | |||
* to standard representatives. | |||
@@ -24,7 +25,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_freeze(polyvecl *v) { | |||
} | |||
/************************************************* | |||
* Name: polyvecl_add | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyvecl_add | |||
* | |||
* Description: Add vectors of polynomials of length L. | |||
* No modular reduction is performed. | |||
@@ -42,7 +43,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_add(polyvecl *w, const polyvecl *u, const | |||
} | |||
/************************************************* | |||
* Name: polyvecl_ntt | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyvecl_ntt | |||
* | |||
* Description: Forward NTT of all polynomials in vector of length L. Output | |||
* coefficients can be up to 16*Q larger than input coefficients. | |||
@@ -58,7 +59,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_ntt(polyvecl *v) { | |||
} | |||
/************************************************* | |||
* Name: polyvecl_pointwise_acc_invmontgomery | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyvecl_pointwise_acc_invmontgomery | |||
* | |||
* Description: Pointwise multiply vectors of polynomials of length L, multiply | |||
* resulting vector by 2^{-32} and add (accumulate) polynomials | |||
@@ -85,7 +86,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_pointwise_acc_invmontgomery(poly *w, | |||
} | |||
/************************************************* | |||
* Name: polyvecl_chknorm | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyvecl_chknorm | |||
* | |||
* Description: Check infinity norm of polynomials in vector of length L. | |||
* Assumes input coefficients to be standard representatives. | |||
@@ -96,14 +97,15 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyvecl_pointwise_acc_invmontgomery(poly *w, | |||
* Returns 0 if norm of all polynomials is strictly smaller than B and 1 | |||
* otherwise. | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM3_CLEAN_polyvecl_chknorm(const polyvecl *v, uint32_t bound) { | |||
int PQCLEAN_DILITHIUM3_CLEAN_polyvecl_chknorm(const polyvecl *v, uint32_t B) { | |||
unsigned int i; | |||
for (i = 0; i < L; ++i) { | |||
if (PQCLEAN_DILITHIUM3_CLEAN_poly_chknorm(&v->vec[i], bound)) { | |||
if (PQCLEAN_DILITHIUM3_CLEAN_poly_chknorm(&v->vec[i], B)) { | |||
return 1; | |||
} | |||
} | |||
return 0; | |||
} | |||
@@ -113,7 +115,7 @@ int PQCLEAN_DILITHIUM3_CLEAN_polyvecl_chknorm(const polyvecl *v, uint32_t bound) | |||
/************************************************* | |||
* Name: polyveck_reduce | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_reduce | |||
* | |||
* Description: Reduce coefficients of polynomials in vector of length K | |||
* to representatives in [0,2*Q[. | |||
@@ -129,7 +131,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyveck_reduce(polyveck *v) { | |||
} | |||
/************************************************* | |||
* Name: polyveck_csubq | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_csubq | |||
* | |||
* Description: For all coefficients of polynomials in vector of length K | |||
* subtract Q if coefficient is bigger than Q. | |||
@@ -145,7 +147,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyveck_csubq(polyveck *v) { | |||
} | |||
/************************************************* | |||
* Name: polyveck_freeze | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_freeze | |||
* | |||
* Description: Reduce coefficients of polynomials in vector of length K | |||
* to standard representatives. | |||
@@ -161,7 +163,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyveck_freeze(polyveck *v) { | |||
} | |||
/************************************************* | |||
* Name: polyveck_add | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_add | |||
* | |||
* Description: Add vectors of polynomials of length K. | |||
* No modular reduction is performed. | |||
@@ -179,7 +181,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyveck_add(polyveck *w, const polyveck *u, const | |||
} | |||
/************************************************* | |||
* Name: polyveck_sub | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_sub | |||
* | |||
* Description: Subtract vectors of polynomials of length K. | |||
* Assumes coefficients of polynomials in second input vector | |||
@@ -199,7 +201,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyveck_sub(polyveck *w, const polyveck *u, const | |||
} | |||
/************************************************* | |||
* Name: polyveck_shiftl | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_shiftl | |||
* | |||
* Description: Multiply vector of polynomials of Length K by 2^D without modular | |||
* reduction. Assumes input coefficients to be less than 2^{32-D}. | |||
@@ -215,7 +217,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyveck_shiftl(polyveck *v) { | |||
} | |||
/************************************************* | |||
* Name: polyveck_ntt | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_ntt | |||
* | |||
* Description: Forward NTT of all polynomials in vector of length K. Output | |||
* coefficients can be up to 16*Q larger than input coefficients. | |||
@@ -231,7 +233,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyveck_ntt(polyveck *v) { | |||
} | |||
/************************************************* | |||
* Name: polyveck_invntt_montgomery | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_invntt_montgomery | |||
* | |||
* Description: Inverse NTT and multiplication by 2^{32} of polynomials | |||
* in vector of length K. Input coefficients need to be less | |||
@@ -248,7 +250,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyveck_invntt_montgomery(polyveck *v) { | |||
} | |||
/************************************************* | |||
* Name: polyveck_chknorm | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_chknorm | |||
* | |||
* Description: Check infinity norm of polynomials in vector of length K. | |||
* Assumes input coefficients to be standard representatives. | |||
@@ -259,19 +261,20 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyveck_invntt_montgomery(polyveck *v) { | |||
* Returns 0 if norm of all polynomials are strictly smaller than B and 1 | |||
* otherwise. | |||
**************************************************/ | |||
int PQCLEAN_DILITHIUM3_CLEAN_polyveck_chknorm(const polyveck *v, uint32_t bound) { | |||
int PQCLEAN_DILITHIUM3_CLEAN_polyveck_chknorm(const polyveck *v, uint32_t B) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
if (PQCLEAN_DILITHIUM3_CLEAN_poly_chknorm(&v->vec[i], bound)) { | |||
if (PQCLEAN_DILITHIUM3_CLEAN_poly_chknorm(&v->vec[i], B)) { | |||
return 1; | |||
} | |||
} | |||
return 0; | |||
} | |||
/************************************************* | |||
* Name: polyveck_power2round | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_power2round | |||
* | |||
* Description: For all coefficients a of polynomials in vector of length K, | |||
* compute a0, a1 such that a mod Q = a1*2^D + a0 | |||
@@ -293,7 +296,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyveck_power2round(polyveck *v1, polyveck *v0, c | |||
} | |||
/************************************************* | |||
* Name: polyveck_decompose | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_decompose | |||
* | |||
* Description: For all coefficients a of polynomials in vector of length K, | |||
* compute high and low bits a0, a1 such a mod Q = a1*ALPHA + a0 | |||
@@ -316,7 +319,7 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyveck_decompose(polyveck *v1, polyveck *v0, con | |||
} | |||
/************************************************* | |||
* Name: polyveck_make_hint | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_make_hint | |||
* | |||
* Description: Compute hint vector. | |||
* | |||
@@ -339,19 +342,19 @@ unsigned int PQCLEAN_DILITHIUM3_CLEAN_polyveck_make_hint(polyveck *h, | |||
} | |||
/************************************************* | |||
* Name: polyveck_use_hint | |||
* Name: PQCLEAN_DILITHIUM3_CLEAN_polyveck_use_hint | |||
* | |||
* Description: Use hint vector to correct the high bits of input vector. | |||
* | |||
* Arguments: - polyveck *w: pointer to output vector of polynomials with | |||
* corrected high bits | |||
* - const polyveck *u: pointer to input vector | |||
* - const polyveck *v: pointer to input vector | |||
* - const polyveck *h: pointer to input hint vector | |||
**************************************************/ | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h) { | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyveck_use_hint(polyveck *w, const polyveck *v, const polyveck *h) { | |||
unsigned int i; | |||
for (i = 0; i < K; ++i) { | |||
PQCLEAN_DILITHIUM3_CLEAN_poly_use_hint(&w->vec[i], &u->vec[i], &h->vec[i]); | |||
PQCLEAN_DILITHIUM3_CLEAN_poly_use_hint(&w->vec[i], &v->vec[i], &h->vec[i]); | |||
} | |||
} |
@@ -1,9 +1,10 @@ | |||
#ifndef POLYVEC_H | |||
#define POLYVEC_H | |||
#ifndef PQCLEAN_DILITHIUM3_CLEAN_POLYVEC_H | |||
#define PQCLEAN_DILITHIUM3_CLEAN_POLYVEC_H | |||
#include <stdint.h> | |||
#include "params.h" | |||
#include "poly.h" | |||
#include <stdint.h> | |||
/* Vectors of polynomials of length L */ | |||
typedef struct { | |||
@@ -46,6 +47,6 @@ void PQCLEAN_DILITHIUM3_CLEAN_polyveck_decompose(polyveck *v1, polyveck *v0, con | |||
unsigned int PQCLEAN_DILITHIUM3_CLEAN_polyveck_make_hint(polyveck *h, | |||
const polyveck *v0, | |||
const polyveck *v1); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyveck_use_hint(polyveck *w, const polyveck *u, const polyveck *h); | |||
void PQCLEAN_DILITHIUM3_CLEAN_polyveck_use_hint(polyveck *w, const polyveck *v, const polyveck *h); | |||
#endif |