@@ -0,0 +1,23 @@ | |||||
name: Falcon-1024 | |||||
type: signature | |||||
claimed-nist-level: 5 | |||||
length-public-key: 1793 | |||||
length-secret-key: 2305 | |||||
length-signature: 1330 | |||||
nistkat-sha256: ad3d17869fdc05deae13ffa2ef26bde125b42f61b2dcd861a1ae20adcb2accc5 | |||||
testvectors-sha256: bd8076c13722d8c555c68fc6bd7763e1a9dd5483ee7c8d1c74dd2df459c72a40 | |||||
principal-submitters: | |||||
- Thomas Prest | |||||
auxiliary-submitters: | |||||
- Pierre-Alain Fouque | |||||
- Jeffrey Hoffstein | |||||
- Paul Kirchner | |||||
- Vadim Lyubashevsky | |||||
- Thomas Pornin | |||||
- Thomas Ricosset | |||||
- Gregor Seiler | |||||
- William Whyte | |||||
- Zhenfei Zhang | |||||
implementations: | |||||
- name: clean | |||||
version: https://github.com/FIX-THIS/YES-BUT-HOW/DUNNO-GUV |
@@ -0,0 +1,22 @@ | |||||
MIT License | |||||
Copyright (c) 2017-2019 Falcon Project | |||||
Permission is hereby granted, free of charge, to any person obtaining | |||||
a copy of this software and associated documentation files (the | |||||
"Software"), to deal in the Software without restriction, including | |||||
without limitation the rights to use, copy, modify, merge, publish, | |||||
distribute, sublicense, and/or sell copies of the Software, and to | |||||
permit persons to whom the Software is furnished to do so, subject to | |||||
the following conditions: | |||||
The above copyright notice and this permission notice shall be | |||||
included in all copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
@@ -0,0 +1,21 @@ | |||||
# This Makefile can be used with GNU Make or BSD Make | |||||
LIB=libfalcon-1024_clean.a | |||||
SOURCES = codec.c common.c fft.c fpr.c keygen.c pqclean.c rng.c sign.c vrfy.c | |||||
OBJECTS = codec.o common.o fft.o fpr.o keygen.o pqclean.o rng.o sign.o vrfy.o | |||||
HEADERS = api.h fpr.h inner.h | |||||
CFLAGS=-O3 -Wall -Wconversion -Wextra -Wpedantic -Wvla -Werror -Wmissing-prototypes -Wredundant-decls -std=c99 -I../../../common $(EXTRAFLAGS) | |||||
all: $(LIB) | |||||
%.o: %.c $(HEADERS) | |||||
$(CC) $(CFLAGS) -c -o $@ $< | |||||
$(LIB): $(OBJECTS) | |||||
$(AR) -r $@ $(OBJECTS) | |||||
clean: | |||||
$(RM) $(OBJECTS) | |||||
$(RM) $(LIB) |
@@ -0,0 +1,18 @@ | |||||
# This Makefile can be used with Microsoft Visual Studio's nmake using the command: | |||||
# nmake /f Makefile.Microsoft_nmake | |||||
LIBRARY=libfalcon-1024_clean.lib | |||||
OBJECTS=codec.obj common.obj fft.obj fpr.obj keygen.obj pqclean.obj rng.obj sign.obj vrfy.obj | |||||
CFLAGS=/nologo /I ..\..\..\common /W4 /WX | |||||
all: $(LIBRARY) | |||||
# Make sure objects are recompiled if headers change. | |||||
$(OBJECTS): *.h | |||||
$(LIBRARY): $(OBJECTS) | |||||
LIB.EXE /NOLOGO /WX /OUT:$@ $** | |||||
clean: | |||||
-DEL $(OBJECTS) | |||||
-DEL $(LIBRARY) |
@@ -0,0 +1,80 @@ | |||||
#ifndef PQCLEAN_FALCON1024_CLEAN_API_H | |||||
#define PQCLEAN_FALCON1024_CLEAN_API_H | |||||
#include <stddef.h> | |||||
#include <stdint.h> | |||||
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES 2305 | |||||
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES 1793 | |||||
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES 1330 | |||||
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_ALGNAME "Falcon-1024" | |||||
/* | |||||
* Generate a new key pair. Public key goes into pk[], private key in sk[]. | |||||
* Key sizes are exact (in bytes): | |||||
* public (pk): PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES | |||||
* private (sk): PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES | |||||
* | |||||
* Return value: 0 on success, -1 on error. | |||||
*/ | |||||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_keypair( | |||||
uint8_t *pk, uint8_t *sk); | |||||
/* | |||||
* Compute a signature on a provided message (m, mlen), with a given | |||||
* private key (sk). Signature is written in sig[], with length written | |||||
* into *siglen. Signature length is variable; maximum signature length | |||||
* (in bytes) is PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES. | |||||
* | |||||
* sig[], m[] and sk[] may overlap each other arbitrarily. | |||||
* | |||||
* Return value: 0 on success, -1 on error. | |||||
*/ | |||||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_signature( | |||||
uint8_t *sig, size_t *siglen, | |||||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||||
/* | |||||
* Verify a signature (sig, siglen) on a message (m, mlen) with a given | |||||
* public key (pk). | |||||
* | |||||
* sig[], m[] and pk[] may overlap each other arbitrarily. | |||||
* | |||||
* Return value: 0 on success, -1 on error. | |||||
*/ | |||||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_verify( | |||||
const uint8_t *sig, size_t siglen, | |||||
const uint8_t *m, size_t mlen, const uint8_t *pk); | |||||
/* | |||||
* Compute a signature on a message and pack the signature and message | |||||
* into a single object, written into sm[]. The length of that output is | |||||
* written in *smlen; that length may be larger than the message length | |||||
* (mlen) by up to PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES. | |||||
* | |||||
* sm[] and m[] may overlap each other arbitrarily; however, sm[] shall | |||||
* not overlap with sk[]. | |||||
* | |||||
* Return value: 0 on success, -1 on error. | |||||
*/ | |||||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign( | |||||
uint8_t *sm, size_t *smlen, | |||||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||||
/* | |||||
* Open a signed message object (sm, smlen) and verify the signature; | |||||
* on success, the message itself is written into m[] and its length | |||||
* into *mlen. The message is shorter than the signed message object, | |||||
* but the size difference depends on the signature value; the difference | |||||
* may range up to PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES. | |||||
* | |||||
* m[], sm[] and pk[] may overlap each other arbitrarily. | |||||
* | |||||
* Return value: 0 on success, -1 on error. | |||||
*/ | |||||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_open( | |||||
uint8_t *m, size_t *mlen, | |||||
const uint8_t *sm, size_t smlen, const uint8_t *pk); | |||||
#endif |
@@ -0,0 +1,549 @@ | |||||
/* | |||||
* Encoding/decoding of keys and signatures. | |||||
* | |||||
* ==========================(LICENSE BEGIN)============================ | |||||
* | |||||
* Copyright (c) 2017-2019 Falcon Project | |||||
* | |||||
* Permission is hereby granted, free of charge, to any person obtaining | |||||
* a copy of this software and associated documentation files (the | |||||
* "Software"), to deal in the Software without restriction, including | |||||
* without limitation the rights to use, copy, modify, merge, publish, | |||||
* distribute, sublicense, and/or sell copies of the Software, and to | |||||
* permit persons to whom the Software is furnished to do so, subject to | |||||
* the following conditions: | |||||
* | |||||
* The above copyright notice and this permission notice shall be | |||||
* included in all copies or substantial portions of the Software. | |||||
* | |||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||||
* | |||||
* ===========================(LICENSE END)============================= | |||||
* | |||||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||||
*/ | |||||
#include "inner.h" | |||||
/* see inner.h */ | |||||
size_t | |||||
PQCLEAN_FALCON1024_CLEAN_modq_encode( | |||||
void *out, size_t max_out_len, | |||||
const uint16_t *x, unsigned logn) { | |||||
size_t n, out_len, u; | |||||
uint8_t *buf; | |||||
uint32_t acc; | |||||
int acc_len; | |||||
n = (size_t)1 << logn; | |||||
for (u = 0; u < n; u ++) { | |||||
if (x[u] >= 12289) { | |||||
return 0; | |||||
} | |||||
} | |||||
out_len = ((n * 14) + 7) >> 3; | |||||
if (out == NULL) { | |||||
return out_len; | |||||
} | |||||
if (out_len > max_out_len) { | |||||
return 0; | |||||
} | |||||
buf = out; | |||||
acc = 0; | |||||
acc_len = 0; | |||||
for (u = 0; u < n; u ++) { | |||||
acc = (acc << 14) | x[u]; | |||||
acc_len += 14; | |||||
while (acc_len >= 8) { | |||||
acc_len -= 8; | |||||
*buf ++ = (uint8_t)(acc >> acc_len); | |||||
} | |||||
} | |||||
if (acc_len > 0) { | |||||
*buf = (uint8_t)(acc << (8 - acc_len)); | |||||
} | |||||
return out_len; | |||||
} | |||||
/* see inner.h */ | |||||
size_t | |||||
PQCLEAN_FALCON1024_CLEAN_modq_decode( | |||||
uint16_t *x, unsigned logn, | |||||
const void *in, size_t in_max_len) { | |||||
size_t n, in_len, u; | |||||
const uint8_t *buf; | |||||
uint32_t acc; | |||||
int acc_len; | |||||
n = (size_t)1 << logn; | |||||
in_len = ((n * 14) + 7) >> 3; | |||||
if (in_len > in_max_len) { | |||||
return 0; | |||||
} | |||||
buf = in; | |||||
acc = 0; | |||||
acc_len = 0; | |||||
u = 0; | |||||
while (u < n) { | |||||
acc = (acc << 8) | (*buf ++); | |||||
acc_len += 8; | |||||
if (acc_len >= 14) { | |||||
unsigned w; | |||||
acc_len -= 14; | |||||
w = (acc >> acc_len) & 0x3FFF; | |||||
if (w >= 12289) { | |||||
return 0; | |||||
} | |||||
x[u ++] = (uint16_t)w; | |||||
} | |||||
} | |||||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||||
return 0; | |||||
} | |||||
return in_len; | |||||
} | |||||
/* see inner.h */ | |||||
size_t | |||||
PQCLEAN_FALCON1024_CLEAN_trim_i16_encode( | |||||
void *out, size_t max_out_len, | |||||
const int16_t *x, unsigned logn, unsigned bits) { | |||||
size_t n, u, out_len; | |||||
int minv, maxv; | |||||
uint8_t *buf; | |||||
uint32_t acc, mask; | |||||
unsigned acc_len; | |||||
n = (size_t)1 << logn; | |||||
maxv = (1 << (bits - 1)) - 1; | |||||
minv = -maxv; | |||||
for (u = 0; u < n; u ++) { | |||||
if (x[u] < minv || x[u] > maxv) { | |||||
return 0; | |||||
} | |||||
} | |||||
out_len = ((n * bits) + 7) >> 3; | |||||
if (out == NULL) { | |||||
return out_len; | |||||
} else if (out_len > max_out_len) { | |||||
return 0; | |||||
} | |||||
buf = out; | |||||
acc = 0; | |||||
acc_len = 0; | |||||
mask = ((uint32_t)1 << bits) - 1; | |||||
for (u = 0; u < n; u ++) { | |||||
acc = (acc << bits) | ((uint16_t)x[u] & mask); | |||||
acc_len += bits; | |||||
while (acc_len >= 8) { | |||||
acc_len -= 8; | |||||
*buf ++ = (uint8_t)(acc >> acc_len); | |||||
} | |||||
} | |||||
if (acc_len > 0) { | |||||
*buf ++ = (uint8_t)(acc << (8 - acc_len)); | |||||
} | |||||
return out_len; | |||||
} | |||||
/* see inner.h */ | |||||
size_t | |||||
PQCLEAN_FALCON1024_CLEAN_trim_i16_decode( | |||||
int16_t *x, unsigned logn, unsigned bits, | |||||
const void *in, size_t in_max_len) { | |||||
size_t n, in_len; | |||||
const uint8_t *buf; | |||||
size_t u; | |||||
uint32_t acc, mask1, mask2; | |||||
unsigned acc_len; | |||||
n = (size_t)1 << logn; | |||||
in_len = ((n * bits) + 7) >> 3; | |||||
if (in_len > in_max_len) { | |||||
return 0; | |||||
} | |||||
buf = in; | |||||
u = 0; | |||||
acc = 0; | |||||
acc_len = 0; | |||||
mask1 = ((uint32_t)1 << bits) - 1; | |||||
mask2 = (uint32_t)1 << (bits - 1); | |||||
while (u < n) { | |||||
acc = (acc << 8) | *buf ++; | |||||
acc_len += 8; | |||||
while (acc_len >= bits && u < n) { | |||||
uint32_t w; | |||||
acc_len -= bits; | |||||
w = (acc >> acc_len) & mask1; | |||||
w |= -(w & mask2); | |||||
if (w == -mask2) { | |||||
/* | |||||
* The -2^(bits-1) value is forbidden. | |||||
*/ | |||||
return 0; | |||||
} | |||||
w |= -(w & mask2); | |||||
x[u ++] = (int16_t) * (int32_t *)&w; | |||||
} | |||||
} | |||||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||||
/* | |||||
* Extra bits in the last byte must be zero. | |||||
*/ | |||||
return 0; | |||||
} | |||||
return in_len; | |||||
} | |||||
/* see inner.h */ | |||||
size_t | |||||
PQCLEAN_FALCON1024_CLEAN_trim_i8_encode( | |||||
void *out, size_t max_out_len, | |||||
const int8_t *x, unsigned logn, unsigned bits) { | |||||
size_t n, u, out_len; | |||||
int minv, maxv; | |||||
uint8_t *buf; | |||||
uint32_t acc, mask; | |||||
unsigned acc_len; | |||||
n = (size_t)1 << logn; | |||||
maxv = (1 << (bits - 1)) - 1; | |||||
minv = -maxv; | |||||
for (u = 0; u < n; u ++) { | |||||
if (x[u] < minv || x[u] > maxv) { | |||||
return 0; | |||||
} | |||||
} | |||||
out_len = ((n * bits) + 7) >> 3; | |||||
if (out == NULL) { | |||||
return out_len; | |||||
} else if (out_len > max_out_len) { | |||||
return 0; | |||||
} | |||||
buf = out; | |||||
acc = 0; | |||||
acc_len = 0; | |||||
mask = ((uint32_t)1 << bits) - 1; | |||||
for (u = 0; u < n; u ++) { | |||||
acc = (acc << bits) | ((uint8_t)x[u] & mask); | |||||
acc_len += bits; | |||||
while (acc_len >= 8) { | |||||
acc_len -= 8; | |||||
*buf ++ = (uint8_t)(acc >> acc_len); | |||||
} | |||||
} | |||||
if (acc_len > 0) { | |||||
*buf ++ = (uint8_t)(acc << (8 - acc_len)); | |||||
} | |||||
return out_len; | |||||
} | |||||
/* see inner.h */ | |||||
size_t | |||||
PQCLEAN_FALCON1024_CLEAN_trim_i8_decode( | |||||
int8_t *x, unsigned logn, unsigned bits, | |||||
const void *in, size_t in_max_len) { | |||||
size_t n, in_len; | |||||
const uint8_t *buf; | |||||
size_t u; | |||||
uint32_t acc, mask1, mask2; | |||||
unsigned acc_len; | |||||
n = (size_t)1 << logn; | |||||
in_len = ((n * bits) + 7) >> 3; | |||||
if (in_len > in_max_len) { | |||||
return 0; | |||||
} | |||||
buf = in; | |||||
u = 0; | |||||
acc = 0; | |||||
acc_len = 0; | |||||
mask1 = ((uint32_t)1 << bits) - 1; | |||||
mask2 = (uint32_t)1 << (bits - 1); | |||||
while (u < n) { | |||||
acc = (acc << 8) | *buf ++; | |||||
acc_len += 8; | |||||
while (acc_len >= bits && u < n) { | |||||
uint32_t w; | |||||
acc_len -= bits; | |||||
w = (acc >> acc_len) & mask1; | |||||
w |= -(w & mask2); | |||||
if (w == -mask2) { | |||||
/* | |||||
* The -2^(bits-1) value is forbidden. | |||||
*/ | |||||
return 0; | |||||
} | |||||
x[u ++] = (int8_t) * (int32_t *)&w; | |||||
} | |||||
} | |||||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||||
/* | |||||
* Extra bits in the last byte must be zero. | |||||
*/ | |||||
return 0; | |||||
} | |||||
return in_len; | |||||
} | |||||
/* see inner.h */ | |||||
size_t | |||||
PQCLEAN_FALCON1024_CLEAN_comp_encode( | |||||
void *out, size_t max_out_len, | |||||
const int16_t *x, unsigned logn) { | |||||
uint8_t *buf; | |||||
size_t n, u, v; | |||||
uint32_t acc; | |||||
unsigned acc_len; | |||||
n = (size_t)1 << logn; | |||||
buf = out; | |||||
/* | |||||
* Make sure that all values are within the -2047..+2047 range. | |||||
*/ | |||||
for (u = 0; u < n; u ++) { | |||||
if (x[u] < -2047 || x[u] > +2047) { | |||||
return 0; | |||||
} | |||||
} | |||||
acc = 0; | |||||
acc_len = 0; | |||||
v = 0; | |||||
for (u = 0; u < n; u ++) { | |||||
int t; | |||||
unsigned w; | |||||
/* | |||||
* Get sign and absolute value of next integer; push the | |||||
* sign bit. | |||||
*/ | |||||
acc <<= 1; | |||||
t = x[u]; | |||||
if (t < 0) { | |||||
t = -t; | |||||
acc |= 1; | |||||
} | |||||
w = (unsigned)t; | |||||
/* | |||||
* Push the low 7 bits of the absolute value. | |||||
*/ | |||||
acc <<= 7; | |||||
acc |= w & 127u; | |||||
w >>= 7; | |||||
/* | |||||
* We pushed exactly 8 bits. | |||||
*/ | |||||
acc_len += 8; | |||||
/* | |||||
* Push as many zeros as necessary, then a one. Since the | |||||
* absolute value is at most 2047, w can only range up to | |||||
* 15 at this point, thus we will add at most 16 bits | |||||
* here. With the 8 bits above and possibly up to 7 bits | |||||
* from previous iterations, we may go up to 31 bits, which | |||||
* will fit in the accumulator, which is an uint32_t. | |||||
*/ | |||||
acc <<= (w + 1); | |||||
acc |= 1; | |||||
acc_len += w + 1; | |||||
/* | |||||
* Produce all full bytes. | |||||
*/ | |||||
while (acc_len >= 8) { | |||||
acc_len -= 8; | |||||
if (buf != NULL) { | |||||
if (v >= max_out_len) { | |||||
return 0; | |||||
} | |||||
buf[v] = (uint8_t)(acc >> acc_len); | |||||
} | |||||
v ++; | |||||
} | |||||
} | |||||
/* | |||||
* Flush remaining bits (if any). | |||||
*/ | |||||
if (acc_len > 0) { | |||||
if (buf != NULL) { | |||||
if (v >= max_out_len) { | |||||
return 0; | |||||
} | |||||
buf[v] = (uint8_t)(acc << (8 - acc_len)); | |||||
} | |||||
v ++; | |||||
} | |||||
return v; | |||||
} | |||||
/* see inner.h */ | |||||
size_t | |||||
PQCLEAN_FALCON1024_CLEAN_comp_decode( | |||||
int16_t *x, unsigned logn, | |||||
const void *in, size_t in_max_len) { | |||||
const uint8_t *buf; | |||||
size_t n, u, v; | |||||
uint32_t acc; | |||||
unsigned acc_len; | |||||
n = (size_t)1 << logn; | |||||
buf = in; | |||||
acc = 0; | |||||
acc_len = 0; | |||||
v = 0; | |||||
for (u = 0; u < n; u ++) { | |||||
unsigned b, s, m; | |||||
/* | |||||
* Get next eight bits: sign and low seven bits of the | |||||
* absolute value. | |||||
*/ | |||||
if (v >= in_max_len) { | |||||
return 0; | |||||
} | |||||
acc = (acc << 8) | (uint32_t)buf[v ++]; | |||||
b = acc >> acc_len; | |||||
s = b & 128; | |||||
m = b & 127; | |||||
/* | |||||
* Get next bits until a 1 is reached. | |||||
*/ | |||||
for (;;) { | |||||
if (acc_len == 0) { | |||||
if (v >= in_max_len) { | |||||
return 0; | |||||
} | |||||
acc = (acc << 8) | (uint32_t)buf[v ++]; | |||||
acc_len = 8; | |||||
} | |||||
acc_len --; | |||||
if (((acc >> acc_len) & 1) != 0) { | |||||
break; | |||||
} | |||||
m += 128; | |||||
if (m > 2047) { | |||||
return 0; | |||||
} | |||||
} | |||||
x[u] = (int16_t)(s ? -(int)m : (int)m); | |||||
} | |||||
return v; | |||||
} | |||||
/* | |||||
* Key elements and signatures are polynomials with small integer | |||||
* coefficients. Here are some statistics gathered over many | |||||
* generated key pairs (10000 or more for each degree): | |||||
* | |||||
* log(n) n max(f,g) std(f,g) max(F,G) std(F,G) | |||||
* 1 2 129 56.31 143 60.02 | |||||
* 2 4 123 40.93 160 46.52 | |||||
* 3 8 97 28.97 159 38.01 | |||||
* 4 16 100 21.48 154 32.50 | |||||
* 5 32 71 15.41 151 29.36 | |||||
* 6 64 59 11.07 138 27.77 | |||||
* 7 128 39 7.91 144 27.00 | |||||
* 8 256 32 5.63 148 26.61 | |||||
* 9 512 22 4.00 137 26.46 | |||||
* 10 1024 15 2.84 146 26.41 | |||||
* | |||||
* We want a compact storage format for private key, and, as part of | |||||
* key generation, we are allowed to reject some keys which would | |||||
* otherwise be fine (this does not induce any noticeable vulnerability | |||||
* as long as we reject only a small proportion of possible keys). | |||||
* Hence, we enforce at key generation time maximum values for the | |||||
* elements of f, g, F and G, so that their encoding can be expressed | |||||
* in fixed-width values. Limits have been chosen so that generated | |||||
* keys are almost always within bounds, thus not impacting neither | |||||
* security or performance. | |||||
* | |||||
* IMPORTANT: the code assumes that all coefficients of f, g, F and G | |||||
* ultimately fit in the -127..+127 range. Thus, none of the elements | |||||
* of max_fg_bits[] and max_FG_bits[] shall be greater than 8. | |||||
*/ | |||||
const uint8_t PQCLEAN_FALCON1024_CLEAN_max_fg_bits[] = { | |||||
0, /* unused */ | |||||
8, | |||||
8, | |||||
8, | |||||
8, | |||||
8, | |||||
7, | |||||
7, | |||||
6, | |||||
6, | |||||
5 | |||||
}; | |||||
const uint8_t PQCLEAN_FALCON1024_CLEAN_max_FG_bits[] = { | |||||
0, /* unused */ | |||||
8, | |||||
8, | |||||
8, | |||||
8, | |||||
8, | |||||
8, | |||||
8, | |||||
8, | |||||
8, | |||||
8 | |||||
}; | |||||
/* | |||||
* When generating a new key pair, we can always reject keys which | |||||
* feature an abnormally large coefficient. This can also be done for | |||||
* signatures, albeit with some care: in case the signature process is | |||||
* used in a derandomized setup (explicitly seeded with the message and | |||||
* private key), we have to follow the specification faithfully, and the | |||||
* specification only enforces a limit on the L2 norm of the signature | |||||
* vector. The limit on the L2 norm implies that the absolute value of | |||||
* a coefficient of the signature cannot be more than the following: | |||||
* | |||||
* log(n) n max sig coeff (theoretical) | |||||
* 1 2 412 | |||||
* 2 4 583 | |||||
* 3 8 824 | |||||
* 4 16 1166 | |||||
* 5 32 1649 | |||||
* 6 64 2332 | |||||
* 7 128 3299 | |||||
* 8 256 4665 | |||||
* 9 512 6598 | |||||
* 10 1024 9331 | |||||
* | |||||
* However, the largest observed signature coefficients during our | |||||
* experiments was 1077 (in absolute value), hence we can assume that, | |||||
* with overwhelming probability, signature coefficients will fit | |||||
* in -2047..2047, i.e. 12 bits. | |||||
*/ | |||||
const uint8_t PQCLEAN_FALCON1024_CLEAN_max_sig_bits[] = { | |||||
0, /* unused */ | |||||
10, | |||||
11, | |||||
11, | |||||
12, | |||||
12, | |||||
12, | |||||
12, | |||||
12, | |||||
12, | |||||
12 | |||||
}; |
@@ -0,0 +1,261 @@ | |||||
/* | |||||
* Support functions for signatures (hash-to-point, norm). | |||||
* | |||||
* ==========================(LICENSE BEGIN)============================ | |||||
* | |||||
* Copyright (c) 2017-2019 Falcon Project | |||||
* | |||||
* Permission is hereby granted, free of charge, to any person obtaining | |||||
* a copy of this software and associated documentation files (the | |||||
* "Software"), to deal in the Software without restriction, including | |||||
* without limitation the rights to use, copy, modify, merge, publish, | |||||
* distribute, sublicense, and/or sell copies of the Software, and to | |||||
* permit persons to whom the Software is furnished to do so, subject to | |||||
* the following conditions: | |||||
* | |||||
* The above copyright notice and this permission notice shall be | |||||
* included in all copies or substantial portions of the Software. | |||||
* | |||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||||
* | |||||
* ===========================(LICENSE END)============================= | |||||
* | |||||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||||
*/ | |||||
#include "inner.h" | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_hash_to_point( | |||||
shake256_context *sc, | |||||
uint16_t *x, unsigned logn, uint8_t *tmp) { | |||||
/* | |||||
* Each 16-bit sample is a value in 0..65535. The value is | |||||
* kept if it falls in 0..61444 (because 61445 = 5*12289) | |||||
* and rejected otherwise; thus, each sample has probability | |||||
* about 0.93758 of being selected. | |||||
* | |||||
* We want to oversample enough to be sure that we will | |||||
* have enough values with probability at least 1 - 2^(-256). | |||||
* Depending on degree N, this leads to the following | |||||
* required oversampling: | |||||
* | |||||
* logn n oversampling | |||||
* 1 2 65 | |||||
* 2 4 67 | |||||
* 3 8 71 | |||||
* 4 16 77 | |||||
* 5 32 86 | |||||
* 6 64 100 | |||||
* 7 128 122 | |||||
* 8 256 154 | |||||
* 9 512 205 | |||||
* 10 1024 287 | |||||
* | |||||
* If logn >= 7, then the provided temporary buffer is large | |||||
* enough. Otherwise, we use a stack buffer of 63 entries | |||||
* (i.e. 126 bytes) for the values that do not fit in tmp[]. | |||||
*/ | |||||
static const uint16_t overtab[] = { | |||||
0, /* unused */ | |||||
65, | |||||
67, | |||||
71, | |||||
77, | |||||
86, | |||||
100, | |||||
122, | |||||
154, | |||||
205, | |||||
287 | |||||
}; | |||||
unsigned n, n2, u, m, p, over; | |||||
uint16_t *tt1, tt2[63]; | |||||
/* | |||||
* We first generate m 16-bit value. Values 0..n-1 go to x[]. | |||||
* Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[]. | |||||
* We also reduce modulo q the values; rejected values are set | |||||
* to 0xFFFF. | |||||
*/ | |||||
n = 1U << logn; | |||||
n2 = n << 1; | |||||
over = overtab[logn]; | |||||
m = n + over; | |||||
tt1 = (uint16_t *)tmp; | |||||
for (u = 0; u < m; u ++) { | |||||
uint8_t buf[2]; | |||||
uint32_t w, wr; | |||||
shake256_extract(sc, buf, sizeof buf); | |||||
w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1]; | |||||
wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1)); | |||||
wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1)); | |||||
wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1)); | |||||
wr |= ((w - 61445) >> 31) - 1; | |||||
if (u < n) { | |||||
x[u] = (uint16_t)wr; | |||||
} else if (u < n2) { | |||||
tt1[u - n] = (uint16_t)wr; | |||||
} else { | |||||
tt2[u - n2] = (uint16_t)wr; | |||||
} | |||||
} | |||||
/* | |||||
* Now we must "squeeze out" the invalid values. We do this in | |||||
* a logarithmic sequence of passes; each pass computes where a | |||||
* value should go, and moves it down by 'p' slots if necessary, | |||||
* where 'p' uses an increasing powers-of-two scale. It can be | |||||
* shown that in all cases where the loop decides that a value | |||||
* has to be moved down by p slots, the destination slot is | |||||
* "free" (i.e. contains an invalid value). | |||||
*/ | |||||
for (p = 1; p <= over; p <<= 1) { | |||||
unsigned v; | |||||
/* | |||||
* In the loop below: | |||||
* | |||||
* - v contains the index of the final destination of | |||||
* the value; it is recomputed dynamically based on | |||||
* whether values are valid or not. | |||||
* | |||||
* - u is the index of the value we consider ("source"); | |||||
* its address is s. | |||||
* | |||||
* - The loop may swap the value with the one at index | |||||
* u-p. The address of the swap destination is d. | |||||
*/ | |||||
v = 0; | |||||
for (u = 0; u < m; u ++) { | |||||
uint16_t *s, *d; | |||||
unsigned j, sv, dv, m; | |||||
if (u < n) { | |||||
s = &x[u]; | |||||
} else if (u < n2) { | |||||
s = &tt1[u - n]; | |||||
} else { | |||||
s = &tt2[u - n2]; | |||||
} | |||||
sv = *s; | |||||
/* | |||||
* The value in sv should ultimately go to | |||||
* address v, i.e. jump back by u-v slots. | |||||
*/ | |||||
j = u - v; | |||||
/* | |||||
* We increment v for the next iteration, but | |||||
* only if the source value is valid. The mask | |||||
* 'm' is -1 if the value is valid, 0 otherwise, | |||||
* so we _subtract_ m. | |||||
*/ | |||||
m = (sv >> 15) - 1U; | |||||
v -= m; | |||||
/* | |||||
* In this loop we consider jumps by p slots; if | |||||
* u < p then there is nothing more to do. | |||||
*/ | |||||
if (u < p) { | |||||
continue; | |||||
} | |||||
/* | |||||
* Destination for the swap: value at address u-p. | |||||
*/ | |||||
if ((u - p) < n) { | |||||
d = &x[u - p]; | |||||
} else if ((u - p) < n2) { | |||||
d = &tt1[(u - p) - n]; | |||||
} else { | |||||
d = &tt2[(u - p) - n2]; | |||||
} | |||||
dv = *d; | |||||
/* | |||||
* The swap should be performed only if the source | |||||
* is valid AND the jump j has its 'p' bit set. | |||||
*/ | |||||
m &= -(((j & p) + 0x1FF) >> 9); | |||||
*s = (uint16_t)(sv ^ (m & (sv ^ dv))); | |||||
*d = (uint16_t)(dv ^ (m & (sv ^ dv))); | |||||
} | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
int | |||||
PQCLEAN_FALCON1024_CLEAN_is_short( | |||||
const int16_t *s1, const int16_t *s2, unsigned logn) { | |||||
/* | |||||
* We use the l2-norm. Code below uses only 32-bit operations to | |||||
* compute the square of the norm with saturation to 2^32-1 if | |||||
* the value exceeds 2^31-1. | |||||
*/ | |||||
size_t n, u; | |||||
uint32_t s, ng; | |||||
n = (size_t)1 << logn; | |||||
s = 0; | |||||
ng = 0; | |||||
for (u = 0; u < n; u ++) { | |||||
int32_t z; | |||||
z = s1[u]; | |||||
s += (uint32_t)(z * z); | |||||
ng |= s; | |||||
z = s2[u]; | |||||
s += (uint32_t)(z * z); | |||||
ng |= s; | |||||
} | |||||
s |= -(ng >> 31); | |||||
/* | |||||
* Acceptance bound on the l2-norm is: | |||||
* 1.2*1.55*sqrt(q)*sqrt(2*N) | |||||
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). | |||||
*/ | |||||
return s < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); | |||||
} | |||||
/* see inner.h */ | |||||
int | |||||
PQCLEAN_FALCON1024_CLEAN_is_short_half( | |||||
uint32_t sqn, const int16_t *a, unsigned logn) { | |||||
size_t n, u; | |||||
uint32_t ng; | |||||
n = (size_t)1 << logn; | |||||
ng = -(sqn >> 31); | |||||
for (u = 0; u < n; u ++) { | |||||
int32_t z; | |||||
z = a[u]; | |||||
sqn += (uint32_t)(z * z); | |||||
ng |= sqn; | |||||
} | |||||
sqn |= -(ng >> 31); | |||||
/* | |||||
* Acceptance bound on the l2-norm is: | |||||
* 1.2*1.55*sqrt(q)*sqrt(2*N) | |||||
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). | |||||
*/ | |||||
return sqn < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); | |||||
} |
@@ -0,0 +1,699 @@ | |||||
/* | |||||
* FFT code. | |||||
* | |||||
* ==========================(LICENSE BEGIN)============================ | |||||
* | |||||
* Copyright (c) 2017-2019 Falcon Project | |||||
* | |||||
* Permission is hereby granted, free of charge, to any person obtaining | |||||
* a copy of this software and associated documentation files (the | |||||
* "Software"), to deal in the Software without restriction, including | |||||
* without limitation the rights to use, copy, modify, merge, publish, | |||||
* distribute, sublicense, and/or sell copies of the Software, and to | |||||
* permit persons to whom the Software is furnished to do so, subject to | |||||
* the following conditions: | |||||
* | |||||
* The above copyright notice and this permission notice shall be | |||||
* included in all copies or substantial portions of the Software. | |||||
* | |||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||||
* | |||||
* ===========================(LICENSE END)============================= | |||||
* | |||||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||||
*/ | |||||
#include "inner.h" | |||||
/* | |||||
* Rules for complex number macros: | |||||
* -------------------------------- | |||||
* | |||||
* Operand order is: destination, source1, source2... | |||||
* | |||||
* Each operand is a real and an imaginary part. | |||||
* | |||||
* All overlaps are allowed. | |||||
*/ | |||||
/* | |||||
* Addition of two complex numbers (d = a + b). | |||||
*/ | |||||
#define FPC_ADD(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||||
fpr fpct_re, fpct_im; \ | |||||
fpct_re = fpr_add(a_re, b_re); \ | |||||
fpct_im = fpr_add(a_im, b_im); \ | |||||
(d_re) = fpct_re; \ | |||||
(d_im) = fpct_im; \ | |||||
} while (0) | |||||
/* | |||||
* Subtraction of two complex numbers (d = a - b). | |||||
*/ | |||||
#define FPC_SUB(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||||
fpr fpct_re, fpct_im; \ | |||||
fpct_re = fpr_sub(a_re, b_re); \ | |||||
fpct_im = fpr_sub(a_im, b_im); \ | |||||
(d_re) = fpct_re; \ | |||||
(d_im) = fpct_im; \ | |||||
} while (0) | |||||
/* | |||||
* Multplication of two complex numbers (d = a * b). | |||||
*/ | |||||
#define FPC_MUL(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||||
fpr fpct_a_re, fpct_a_im; \ | |||||
fpr fpct_b_re, fpct_b_im; \ | |||||
fpr fpct_d_re, fpct_d_im; \ | |||||
fpct_a_re = (a_re); \ | |||||
fpct_a_im = (a_im); \ | |||||
fpct_b_re = (b_re); \ | |||||
fpct_b_im = (b_im); \ | |||||
fpct_d_re = fpr_sub( \ | |||||
fpr_mul(fpct_a_re, fpct_b_re), \ | |||||
fpr_mul(fpct_a_im, fpct_b_im)); \ | |||||
fpct_d_im = fpr_add( \ | |||||
fpr_mul(fpct_a_re, fpct_b_im), \ | |||||
fpr_mul(fpct_a_im, fpct_b_re)); \ | |||||
(d_re) = fpct_d_re; \ | |||||
(d_im) = fpct_d_im; \ | |||||
} while (0) | |||||
/* | |||||
* Squaring of a complex number (d = a * a). | |||||
*/ | |||||
#define FPC_SQR(d_re, d_im, a_re, a_im) do { \ | |||||
fpr fpct_a_re, fpct_a_im; \ | |||||
fpr fpct_d_re, fpct_d_im; \ | |||||
fpct_a_re = (a_re); \ | |||||
fpct_a_im = (a_im); \ | |||||
fpct_d_re = fpr_sub(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ | |||||
fpct_d_im = fpr_double(fpr_mul(fpct_a_re, fpct_a_im)); \ | |||||
(d_re) = fpct_d_re; \ | |||||
(d_im) = fpct_d_im; \ | |||||
} while (0) | |||||
/* | |||||
* Inversion of a complex number (d = 1 / a). | |||||
*/ | |||||
#define FPC_INV(d_re, d_im, a_re, a_im) do { \ | |||||
fpr fpct_a_re, fpct_a_im; \ | |||||
fpr fpct_d_re, fpct_d_im; \ | |||||
fpr fpct_m; \ | |||||
fpct_a_re = (a_re); \ | |||||
fpct_a_im = (a_im); \ | |||||
fpct_m = fpr_add(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ | |||||
fpct_m = fpr_inv(fpct_m); \ | |||||
fpct_d_re = fpr_mul(fpct_a_re, fpct_m); \ | |||||
fpct_d_im = fpr_mul(fpr_neg(fpct_a_im), fpct_m); \ | |||||
(d_re) = fpct_d_re; \ | |||||
(d_im) = fpct_d_im; \ | |||||
} while (0) | |||||
/* | |||||
* Division of complex numbers (d = a / b). | |||||
*/ | |||||
#define FPC_DIV(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||||
fpr fpct_a_re, fpct_a_im; \ | |||||
fpr fpct_b_re, fpct_b_im; \ | |||||
fpr fpct_d_re, fpct_d_im; \ | |||||
fpr fpct_m; \ | |||||
fpct_a_re = (a_re); \ | |||||
fpct_a_im = (a_im); \ | |||||
fpct_b_re = (b_re); \ | |||||
fpct_b_im = (b_im); \ | |||||
fpct_m = fpr_add(fpr_sqr(fpct_b_re), fpr_sqr(fpct_b_im)); \ | |||||
fpct_m = fpr_inv(fpct_m); \ | |||||
fpct_b_re = fpr_mul(fpct_b_re, fpct_m); \ | |||||
fpct_b_im = fpr_mul(fpr_neg(fpct_b_im), fpct_m); \ | |||||
fpct_d_re = fpr_sub( \ | |||||
fpr_mul(fpct_a_re, fpct_b_re), \ | |||||
fpr_mul(fpct_a_im, fpct_b_im)); \ | |||||
fpct_d_im = fpr_add( \ | |||||
fpr_mul(fpct_a_re, fpct_b_im), \ | |||||
fpr_mul(fpct_a_im, fpct_b_re)); \ | |||||
(d_re) = fpct_d_re; \ | |||||
(d_im) = fpct_d_im; \ | |||||
} while (0) | |||||
/* | |||||
* Let w = exp(i*pi/N); w is a primitive 2N-th root of 1. We define the | |||||
* values w_j = w^(2j+1) for all j from 0 to N-1: these are the roots | |||||
* of X^N+1 in the field of complex numbers. A crucial property is that | |||||
* w_{N-1-j} = conj(w_j) = 1/w_j for all j. | |||||
* | |||||
* FFT representation of a polynomial f (taken modulo X^N+1) is the | |||||
* set of values f(w_j). Since f is real, conj(f(w_j)) = f(conj(w_j)), | |||||
* thus f(w_{N-1-j}) = conj(f(w_j)). We thus store only half the values, | |||||
* for j = 0 to N/2-1; the other half can be recomputed easily when (if) | |||||
* needed. A consequence is that FFT representation has the same size | |||||
* as normal representation: N/2 complex numbers use N real numbers (each | |||||
* complex number is the combination of a real and an imaginary part). | |||||
* | |||||
* We use a specific ordering which makes computations easier. Let rev() | |||||
* be the bit-reversal function over log(N) bits. For j in 0..N/2-1, we | |||||
* store the real and imaginary parts of f(w_j) in slots: | |||||
* | |||||
* Re(f(w_j)) -> slot rev(j)/2 | |||||
* Im(f(w_j)) -> slot rev(j)/2+N/2 | |||||
* | |||||
* (Note that rev(j) is even for j < N/2.) | |||||
*/ | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_FFT(fpr *f, unsigned logn) { | |||||
/* | |||||
* FFT algorithm in bit-reversal order uses the following | |||||
* iterative algorithm: | |||||
* | |||||
* t = N | |||||
* for m = 1; m < N; m *= 2: | |||||
* ht = t/2 | |||||
* for i1 = 0; i1 < m; i1 ++: | |||||
* j1 = i1 * t | |||||
* s = GM[m + i1] | |||||
* for j = j1; j < (j1 + ht); j ++: | |||||
* x = f[j] | |||||
* y = s * f[j + ht] | |||||
* f[j] = x + y | |||||
* f[j + ht] = x - y | |||||
* t = ht | |||||
* | |||||
* GM[k] contains w^rev(k) for primitive root w = exp(i*pi/N). | |||||
* | |||||
* In the description above, f[] is supposed to contain complex | |||||
* numbers. In our in-memory representation, the real and | |||||
* imaginary parts of f[k] are in array slots k and k+N/2. | |||||
* | |||||
* We only keep the first half of the complex numbers. We can | |||||
* see that after the first iteration, the first and second halves | |||||
* of the array of complex numbers have separate lives, so we | |||||
* simply ignore the second part. | |||||
*/ | |||||
unsigned u; | |||||
size_t t, n, hn, m; | |||||
/* | |||||
* First iteration: compute f[j] + i * f[j+N/2] for all j < N/2 | |||||
* (because GM[1] = w^rev(1) = w^(N/2) = i). | |||||
* In our chosen representation, this is a no-op: everything is | |||||
* already where it should be. | |||||
*/ | |||||
/* | |||||
* Subsequent iterations are truncated to use only the first | |||||
* half of values. | |||||
*/ | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
t = hn; | |||||
for (u = 1, m = 2; u < logn; u ++, m <<= 1) { | |||||
size_t ht, hm, i1, j1; | |||||
ht = t >> 1; | |||||
hm = m >> 1; | |||||
for (i1 = 0, j1 = 0; i1 < hm; i1 ++, j1 += t) { | |||||
size_t j, j2; | |||||
j2 = j1 + ht; | |||||
fpr s_re, s_im; | |||||
s_re = fpr_gm_tab[((m + i1) << 1) + 0]; | |||||
s_im = fpr_gm_tab[((m + i1) << 1) + 1]; | |||||
for (j = j1; j < j2; j ++) { | |||||
fpr x_re, x_im, y_re, y_im; | |||||
x_re = f[j]; | |||||
x_im = f[j + hn]; | |||||
y_re = f[j + ht]; | |||||
y_im = f[j + ht + hn]; | |||||
FPC_MUL(y_re, y_im, y_re, y_im, s_re, s_im); | |||||
FPC_ADD(f[j], f[j + hn], | |||||
x_re, x_im, y_re, y_im); | |||||
FPC_SUB(f[j + ht], f[j + ht + hn], | |||||
x_re, x_im, y_re, y_im); | |||||
} | |||||
} | |||||
t = ht; | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_iFFT(fpr *f, unsigned logn) { | |||||
/* | |||||
* Inverse FFT algorithm in bit-reversal order uses the following | |||||
* iterative algorithm: | |||||
* | |||||
* t = 1 | |||||
* for m = N; m > 1; m /= 2: | |||||
* hm = m/2 | |||||
* dt = t*2 | |||||
* for i1 = 0; i1 < hm; i1 ++: | |||||
* j1 = i1 * dt | |||||
* s = iGM[hm + i1] | |||||
* for j = j1; j < (j1 + t); j ++: | |||||
* x = f[j] | |||||
* y = f[j + t] | |||||
* f[j] = x + y | |||||
* f[j + t] = s * (x - y) | |||||
* t = dt | |||||
* for i1 = 0; i1 < N; i1 ++: | |||||
* f[i1] = f[i1] / N | |||||
* | |||||
* iGM[k] contains (1/w)^rev(k) for primitive root w = exp(i*pi/N) | |||||
* (actually, iGM[k] = 1/GM[k] = conj(GM[k])). | |||||
* | |||||
* In the main loop (not counting the final division loop), in | |||||
* all iterations except the last, the first and second half of f[] | |||||
* (as an array of complex numbers) are separate. In our chosen | |||||
* representation, we do not keep the second half. | |||||
* | |||||
* The last iteration recombines the recomputed half with the | |||||
* implicit half, and should yield only real numbers since the | |||||
* target polynomial is real; moreover, s = i at that step. | |||||
* Thus, when considering x and y: | |||||
* y = conj(x) since the final f[j] must be real | |||||
* Therefore, f[j] is filled with 2*Re(x), and f[j + t] is | |||||
* filled with 2*Im(x). | |||||
* But we already have Re(x) and Im(x) in array slots j and j+t | |||||
* in our chosen representation. That last iteration is thus a | |||||
* simple doubling of the values in all the array. | |||||
* | |||||
* We make the last iteration a no-op by tweaking the final | |||||
* division into a division by N/2, not N. | |||||
*/ | |||||
size_t u, n, hn, t, m; | |||||
n = (size_t)1 << logn; | |||||
t = 1; | |||||
m = n; | |||||
hn = n >> 1; | |||||
for (u = logn; u > 1; u --) { | |||||
size_t hm, dt, i1, j1; | |||||
hm = m >> 1; | |||||
dt = t << 1; | |||||
for (i1 = 0, j1 = 0; j1 < hn; i1 ++, j1 += dt) { | |||||
size_t j, j2; | |||||
j2 = j1 + t; | |||||
fpr s_re, s_im; | |||||
s_re = fpr_gm_tab[((hm + i1) << 1) + 0]; | |||||
s_im = fpr_neg(fpr_gm_tab[((hm + i1) << 1) + 1]); | |||||
for (j = j1; j < j2; j ++) { | |||||
fpr x_re, x_im, y_re, y_im; | |||||
x_re = f[j]; | |||||
x_im = f[j + hn]; | |||||
y_re = f[j + t]; | |||||
y_im = f[j + t + hn]; | |||||
FPC_ADD(f[j], f[j + hn], | |||||
x_re, x_im, y_re, y_im); | |||||
FPC_SUB(x_re, x_im, x_re, x_im, y_re, y_im); | |||||
FPC_MUL(f[j + t], f[j + t + hn], | |||||
x_re, x_im, s_re, s_im); | |||||
} | |||||
} | |||||
t = dt; | |||||
m = hm; | |||||
} | |||||
/* | |||||
* Last iteration is a no-op, provided that we divide by N/2 | |||||
* instead of N. We need to make a special case for logn = 0. | |||||
*/ | |||||
if (logn > 0) { | |||||
fpr ni; | |||||
ni = fpr_p2_tab[logn]; | |||||
for (u = 0; u < n; u ++) { | |||||
f[u] = fpr_mul(f[u], ni); | |||||
} | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_poly_add( | |||||
fpr *restrict a, const fpr *restrict b, unsigned logn) { | |||||
size_t n, u; | |||||
n = (size_t)1 << logn; | |||||
for (u = 0; u < n; u ++) { | |||||
a[u] = fpr_add(a[u], b[u]); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_poly_sub( | |||||
fpr *restrict a, const fpr *restrict b, unsigned logn) { | |||||
size_t n, u; | |||||
n = (size_t)1 << logn; | |||||
for (u = 0; u < n; u ++) { | |||||
a[u] = fpr_sub(a[u], b[u]); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_poly_neg(fpr *a, unsigned logn) { | |||||
size_t n, u; | |||||
n = (size_t)1 << logn; | |||||
for (u = 0; u < n; u ++) { | |||||
a[u] = fpr_neg(a[u]); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_poly_adj_fft(fpr *a, unsigned logn) { | |||||
size_t n, u; | |||||
n = (size_t)1 << logn; | |||||
for (u = (n >> 1); u < n; u ++) { | |||||
a[u] = fpr_neg(a[u]); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_poly_mul_fft( | |||||
fpr *restrict a, const fpr *restrict b, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr a_re, a_im, b_re, b_im; | |||||
a_re = a[u]; | |||||
a_im = a[u + hn]; | |||||
b_re = b[u]; | |||||
b_im = b[u + hn]; | |||||
FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_poly_muladj_fft( | |||||
fpr *restrict a, const fpr *restrict b, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr a_re, a_im, b_re, b_im; | |||||
a_re = a[u]; | |||||
a_im = a[u + hn]; | |||||
b_re = b[u]; | |||||
b_im = fpr_neg(b[u + hn]); | |||||
FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(fpr *a, unsigned logn) { | |||||
/* | |||||
* Since each coefficient is multiplied with its own conjugate, | |||||
* the result contains only real values. | |||||
*/ | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr a_re, a_im; | |||||
a_re = a[u]; | |||||
a_im = a[u + hn]; | |||||
a[u] = fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)); | |||||
a[u + hn] = fpr_zero; | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_poly_mulconst(fpr *a, fpr x, unsigned logn) { | |||||
size_t n, u; | |||||
n = (size_t)1 << logn; | |||||
for (u = 0; u < n; u ++) { | |||||
a[u] = fpr_mul(a[u], x); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_poly_div_fft( | |||||
fpr *restrict a, const fpr *restrict b, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr a_re, a_im, b_re, b_im; | |||||
a_re = a[u]; | |||||
a_im = a[u + hn]; | |||||
b_re = b[u]; | |||||
b_im = b[u + hn]; | |||||
FPC_DIV(a[u], a[u + hn], a_re, a_im, b_re, b_im); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_poly_invnorm2_fft(fpr *restrict d, | |||||
const fpr *restrict a, const fpr *restrict b, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr a_re, a_im; | |||||
fpr b_re, b_im; | |||||
a_re = a[u]; | |||||
a_im = a[u + hn]; | |||||
b_re = b[u]; | |||||
b_im = b[u + hn]; | |||||
d[u] = fpr_inv(fpr_add( | |||||
fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)), | |||||
fpr_add(fpr_sqr(b_re), fpr_sqr(b_im)))); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_poly_add_muladj_fft(fpr *restrict d, | |||||
const fpr *restrict F, const fpr *restrict G, | |||||
const fpr *restrict f, const fpr *restrict g, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr F_re, F_im, G_re, G_im; | |||||
fpr f_re, f_im, g_re, g_im; | |||||
fpr a_re, a_im, b_re, b_im; | |||||
F_re = F[u]; | |||||
F_im = F[u + hn]; | |||||
G_re = G[u]; | |||||
G_im = G[u + hn]; | |||||
f_re = f[u]; | |||||
f_im = f[u + hn]; | |||||
g_re = g[u]; | |||||
g_im = g[u + hn]; | |||||
FPC_MUL(a_re, a_im, F_re, F_im, f_re, fpr_neg(f_im)); | |||||
FPC_MUL(b_re, b_im, G_re, G_im, g_re, fpr_neg(g_im)); | |||||
d[u] = fpr_add(a_re, b_re); | |||||
d[u + hn] = fpr_add(a_im, b_im); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_poly_mul_autoadj_fft( | |||||
fpr *restrict a, const fpr *restrict b, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
a[u] = fpr_mul(a[u], b[u]); | |||||
a[u + hn] = fpr_mul(a[u + hn], b[u]); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_poly_div_autoadj_fft( | |||||
fpr *restrict a, const fpr *restrict b, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr ib; | |||||
ib = fpr_inv(b[u]); | |||||
a[u] = fpr_mul(a[u], ib); | |||||
a[u + hn] = fpr_mul(a[u + hn], ib); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_poly_LDL_fft( | |||||
const fpr *restrict g00, | |||||
fpr *restrict g01, fpr *restrict g11, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; | |||||
fpr mu_re, mu_im; | |||||
g00_re = g00[u]; | |||||
g00_im = g00[u + hn]; | |||||
g01_re = g01[u]; | |||||
g01_im = g01[u + hn]; | |||||
g11_re = g11[u]; | |||||
g11_im = g11[u + hn]; | |||||
FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); | |||||
FPC_MUL(g01_re, g01_im, mu_re, mu_im, g01_re, fpr_neg(g01_im)); | |||||
FPC_SUB(g11[u], g11[u + hn], g11_re, g11_im, g01_re, g01_im); | |||||
g01[u] = mu_re; | |||||
g01[u + hn] = fpr_neg(mu_im); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_poly_LDLmv_fft( | |||||
fpr *restrict d11, fpr *restrict l10, | |||||
const fpr *restrict g00, const fpr *restrict g01, | |||||
const fpr *restrict g11, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; | |||||
fpr mu_re, mu_im; | |||||
g00_re = g00[u]; | |||||
g00_im = g00[u + hn]; | |||||
g01_re = g01[u]; | |||||
g01_im = g01[u + hn]; | |||||
g11_re = g11[u]; | |||||
g11_im = g11[u + hn]; | |||||
FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); | |||||
FPC_MUL(g01_re, g01_im, mu_re, mu_im, g01_re, fpr_neg(g01_im)); | |||||
FPC_SUB(d11[u], d11[u + hn], g11_re, g11_im, g01_re, g01_im); | |||||
l10[u] = mu_re; | |||||
l10[u + hn] = fpr_neg(mu_im); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_poly_split_fft( | |||||
fpr *restrict f0, fpr *restrict f1, | |||||
const fpr *restrict f, unsigned logn) { | |||||
/* | |||||
* The FFT representation we use is in bit-reversed order | |||||
* (element i contains f(w^(rev(i))), where rev() is the | |||||
* bit-reversal function over the ring degree. This changes | |||||
* indexes with regards to the Falcon specification. | |||||
*/ | |||||
size_t n, hn, qn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
qn = hn >> 1; | |||||
/* | |||||
* We process complex values by pairs. For logn = 1, there is only | |||||
* one complex value (the other one is the implicit conjugate), | |||||
* so we add the two lines below because the loop will be | |||||
* skipped. | |||||
*/ | |||||
f0[0] = f[0]; | |||||
f1[0] = f[hn]; | |||||
for (u = 0; u < qn; u ++) { | |||||
fpr a_re, a_im, b_re, b_im; | |||||
fpr t_re, t_im; | |||||
a_re = f[(u << 1) + 0]; | |||||
a_im = f[(u << 1) + 0 + hn]; | |||||
b_re = f[(u << 1) + 1]; | |||||
b_im = f[(u << 1) + 1 + hn]; | |||||
FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); | |||||
f0[u] = fpr_half(t_re); | |||||
f0[u + qn] = fpr_half(t_im); | |||||
FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); | |||||
FPC_MUL(t_re, t_im, t_re, t_im, | |||||
fpr_gm_tab[((u + hn) << 1) + 0], | |||||
fpr_neg(fpr_gm_tab[((u + hn) << 1) + 1])); | |||||
f1[u] = fpr_half(t_re); | |||||
f1[u + qn] = fpr_half(t_im); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_poly_merge_fft( | |||||
fpr *restrict f, | |||||
const fpr *restrict f0, const fpr *restrict f1, unsigned logn) { | |||||
size_t n, hn, qn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
qn = hn >> 1; | |||||
/* | |||||
* An extra copy to handle the special case logn = 1. | |||||
*/ | |||||
f[0] = f0[0]; | |||||
f[hn] = f1[0]; | |||||
for (u = 0; u < qn; u ++) { | |||||
fpr a_re, a_im, b_re, b_im; | |||||
fpr t_re, t_im; | |||||
a_re = f0[u]; | |||||
a_im = f0[u + qn]; | |||||
FPC_MUL(b_re, b_im, f1[u], f1[u + qn], | |||||
fpr_gm_tab[((u + hn) << 1) + 0], | |||||
fpr_gm_tab[((u + hn) << 1) + 1]); | |||||
FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); | |||||
f[(u << 1) + 0] = t_re; | |||||
f[(u << 1) + 0 + hn] = t_im; | |||||
FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); | |||||
f[(u << 1) + 1] = t_re; | |||||
f[(u << 1) + 1 + hn] = t_im; | |||||
} | |||||
} |
@@ -0,0 +1,457 @@ | |||||
/* | |||||
* Floating-point operations. | |||||
* | |||||
* ==========================(LICENSE BEGIN)============================ | |||||
* | |||||
* Copyright (c) 2017-2019 Falcon Project | |||||
* | |||||
* Permission is hereby granted, free of charge, to any person obtaining | |||||
* a copy of this software and associated documentation files (the | |||||
* "Software"), to deal in the Software without restriction, including | |||||
* without limitation the rights to use, copy, modify, merge, publish, | |||||
* distribute, sublicense, and/or sell copies of the Software, and to | |||||
* permit persons to whom the Software is furnished to do so, subject to | |||||
* the following conditions: | |||||
* | |||||
* The above copyright notice and this permission notice shall be | |||||
* included in all copies or substantial portions of the Software. | |||||
* | |||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||||
* | |||||
* ===========================(LICENSE END)============================= | |||||
* | |||||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||||
*/ | |||||
/* ====================================================================== */ | |||||
/* | |||||
* Custom floating-point implementation with integer arithmetics. We | |||||
* use IEEE-754 "binary64" format, with some simplifications: | |||||
* | |||||
* - Top bit is s = 1 for negative, 0 for positive. | |||||
* | |||||
* - Exponent e uses the next 11 bits (bits 52 to 62, inclusive). | |||||
* | |||||
* - Mantissa m uses the 52 low bits. | |||||
* | |||||
* Encoded value is, in general: (-1)^s * 2^(e-1023) * (1 + m*2^(-52)) | |||||
* i.e. the mantissa really is a 53-bit number (less than 2.0, but not | |||||
* less than 1.0), but the top bit (equal to 1 by definition) is omitted | |||||
* in the encoding. | |||||
* | |||||
* In IEEE-754, there are some special values: | |||||
* | |||||
* - If e = 2047, then the value is either an infinite (m = 0) or | |||||
* a NaN (m != 0). | |||||
* | |||||
* - If e = 0, then the value is either a zero (m = 0) or a subnormal, | |||||
* aka "denormalized number" (m != 0). | |||||
* | |||||
* Of these, we only need the zeros. The caller is responsible for not | |||||
* providing operands that would lead to infinites, NaNs or subnormals. | |||||
* If inputs are such that values go out of range, then indeterminate | |||||
* values are returned (it would still be deterministic, but no specific | |||||
* value may be relied upon). | |||||
* | |||||
* At the C level, the three parts are stored in a 64-bit unsigned | |||||
* word. | |||||
* | |||||
* One may note that a property of the IEEE-754 format is that order | |||||
* is preserved for positive values: if two positive floating-point | |||||
* values x and y are such that x < y, then their respective encodings | |||||
* as _signed_ 64-bit integers i64(x) and i64(y) will be such that | |||||
* i64(x) < i64(y). For negative values, order is reversed: if x < 0, | |||||
* y < 0, and x < y, then ia64(x) > ia64(y). | |||||
* | |||||
* IMPORTANT ASSUMPTIONS: | |||||
* ====================== | |||||
* | |||||
* For proper computations, and constant-time behaviour, we assume the | |||||
* following: | |||||
* | |||||
* - 32x32->64 multiplication (unsigned) has an execution time that | |||||
* is independent of its operands. This is true of most modern | |||||
* x86 and ARM cores. Notable exceptions are the ARM Cortex M0, M0+ | |||||
* and M3 (in the M0 and M0+, this is done in software, so it depends | |||||
* on that routine), and the PowerPC cores from the G3/G4 lines. | |||||
* For more info, see: https://www.bearssl.org/ctmul.html | |||||
* | |||||
* - Left-shifts and right-shifts of 32-bit values have an execution | |||||
* time which does not depend on the shifted value nor on the | |||||
* shift count. An historical exception is the Pentium IV, but most | |||||
* modern CPU have barrel shifters. Some small microcontrollers | |||||
* might have varying-time shifts (not the ARM Cortex M*, though). | |||||
* | |||||
* - Right-shift of a signed negative value performs a sign extension. | |||||
* As per the C standard, this operation returns an | |||||
* implementation-defined result (this is NOT an "undefined | |||||
* behaviour"). On most/all systems, an arithmetic shift is | |||||
* performed, because this is what makes most sense. | |||||
*/ | |||||
/* | |||||
* Normally we should declare the 'fpr' type to be a struct or union | |||||
* around the internal 64-bit value; however, we want to use the | |||||
* direct 64-bit integer type to enable a lighter call convention on | |||||
* ARM platforms. This means that direct (invalid) use of operators | |||||
* such as '*' or '+' will not be caught by the compiler. We rely on | |||||
* the "normal" (non-emulated) code to detect such instances. | |||||
*/ | |||||
typedef uint64_t fpr; | |||||
/* | |||||
* For computations, we split values into an integral mantissa in the | |||||
* 2^54..2^55 range, and an (adjusted) exponent. The lowest bit is | |||||
* "sticky" (it is set to 1 if any of the bits below it is 1); when | |||||
* re-encoding, the low two bits are dropped, but may induce an | |||||
* increment in the value for proper rounding. | |||||
*/ | |||||
/* | |||||
* Right-shift a 64-bit unsigned value by a possibly secret shift count. | |||||
* We assumed that the underlying architecture had a barrel shifter for | |||||
* 32-bit shifts, but for 64-bit shifts on a 32-bit system, this will | |||||
* typically invoke a software routine that is not necessarily | |||||
* constant-time; hence the function below. | |||||
* | |||||
* Shift count n MUST be in the 0..63 range. | |||||
*/ | |||||
static inline uint64_t | |||||
fpr_ursh(uint64_t x, int n) { | |||||
x ^= (x ^ (x >> 32)) & -(uint64_t)(n >> 5); | |||||
return x >> (n & 31); | |||||
} | |||||
/* | |||||
* Right-shift a 64-bit signed value by a possibly secret shift count | |||||
* (see fpr_ursh() for the rationale). | |||||
* | |||||
* Shift count n MUST be in the 0..63 range. | |||||
*/ | |||||
static inline int64_t | |||||
fpr_irsh(int64_t x, int n) { | |||||
x ^= (x ^ (x >> 32)) & -(int64_t)(n >> 5); | |||||
return x >> (n & 31); | |||||
} | |||||
/* | |||||
* Left-shift a 64-bit unsigned value by a possibly secret shift count | |||||
* (see fpr_ursh() for the rationale). | |||||
* | |||||
* Shift count n MUST be in the 0..63 range. | |||||
*/ | |||||
static inline uint64_t | |||||
fpr_ulsh(uint64_t x, int n) { | |||||
x ^= (x ^ (x << 32)) & -(uint64_t)(n >> 5); | |||||
return x << (n & 31); | |||||
} | |||||
/* | |||||
* Expectations: | |||||
* s = 0 or 1 | |||||
* exponent e is "arbitrary" and unbiased | |||||
* 2^54 <= m < 2^55 | |||||
* Numerical value is (-1)^2 * m * 2^e | |||||
* | |||||
* Exponents which are too low lead to value zero. If the exponent is | |||||
* too large, the returned value is indeterminate. | |||||
* | |||||
* If m = 0, then a zero is returned (using the provided sign). | |||||
* If e < -1076, then a zero is returned (regardless of the value of m). | |||||
* If e >= -1076 and e != 0, m must be within the expected range | |||||
* (2^54 to 2^55-1). | |||||
*/ | |||||
static inline fpr | |||||
FPR(int s, int e, uint64_t m) { | |||||
fpr x; | |||||
uint32_t t; | |||||
unsigned f; | |||||
/* | |||||
* If e >= -1076, then the value is "normal"; otherwise, it | |||||
* should be a subnormal, which we clamp down to zero. | |||||
*/ | |||||
e += 1076; | |||||
t = (uint32_t)e >> 31; | |||||
m &= (uint64_t)t - 1; | |||||
/* | |||||
* If m = 0 then we want a zero; make e = 0 too, but conserve | |||||
* the sign. | |||||
*/ | |||||
t = (uint32_t)(m >> 54); | |||||
e &= -(int)t; | |||||
/* | |||||
* The 52 mantissa bits come from m. Value m has its top bit set | |||||
* (unless it is a zero); we leave it "as is": the top bit will | |||||
* increment the exponent by 1, except when m = 0, which is | |||||
* exactly what we want. | |||||
*/ | |||||
x = (((uint64_t)s << 63) | (m >> 2)) + ((uint64_t)(uint32_t)e << 52); | |||||
/* | |||||
* Rounding: if the low three bits of m are 011, 110 or 111, | |||||
* then the value should be incremented to get the next | |||||
* representable value. This implements the usual | |||||
* round-to-nearest rule (with preference to even values in case | |||||
* of a tie). Note that the increment may make a carry spill | |||||
* into the exponent field, which is again exactly what we want | |||||
* in that case. | |||||
*/ | |||||
f = (unsigned)m & 7U; | |||||
x += (0xC8U >> f) & 1; | |||||
return x; | |||||
} | |||||
#define fpr_scaled PQCLEAN_FALCON1024_CLEAN_fpr_scaled | |||||
fpr fpr_scaled(int64_t i, int sc); | |||||
static inline fpr | |||||
fpr_of(int64_t i) { | |||||
return fpr_scaled(i, 0); | |||||
} | |||||
static const fpr fpr_q = 4667981563525332992; | |||||
static const fpr fpr_inverse_of_q = 4545632735260551042; | |||||
static const fpr fpr_inv_2sqrsigma0 = 4594603506513722306; | |||||
static const fpr fpr_inv_sigma = 4573359825155195350; | |||||
static const fpr fpr_sigma_min_9 = 4608495221497168882; | |||||
static const fpr fpr_sigma_min_10 = 4608586345619182117; | |||||
static const fpr fpr_log2 = 4604418534313441775; | |||||
static const fpr fpr_inv_log2 = 4609176140021203710; | |||||
static const fpr fpr_bnorm_max = 4670353323383631276; | |||||
static const fpr fpr_zero = 0; | |||||
static const fpr fpr_one = 4607182418800017408; | |||||
static const fpr fpr_two = 4611686018427387904; | |||||
static const fpr fpr_onehalf = 4602678819172646912; | |||||
static const fpr fpr_ptwo31 = 4746794007248502784; | |||||
static const fpr fpr_ptwo31m1 = 4746794007244308480; | |||||
static const fpr fpr_mtwo31m1 = 13970166044099084288U; | |||||
static const fpr fpr_ptwo63m1 = 4890909195324358656; | |||||
static const fpr fpr_mtwo63m1 = 14114281232179134464U; | |||||
static const fpr fpr_ptwo63 = 4890909195324358656; | |||||
static inline int64_t | |||||
fpr_rint(fpr x) { | |||||
uint64_t m, d; | |||||
int e; | |||||
uint32_t s, dd; | |||||
unsigned f; | |||||
/* | |||||
* We assume that the value fits in -(2^63-1)..+(2^63-1). We can | |||||
* thus extract the mantissa as a 63-bit integer, then right-shift | |||||
* it as needed. | |||||
*/ | |||||
m = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); | |||||
e = 1085 - ((int)(x >> 52) & 0x7FF); | |||||
/* | |||||
* If a shift of more than 63 bits is needed, then simply set m | |||||
* to zero. This also covers the case of an input operand equal | |||||
* to zero. | |||||
*/ | |||||
m &= -(uint64_t)((uint32_t)(e - 64) >> 31); | |||||
e &= 63; | |||||
/* | |||||
* Right-shift m as needed. Shift count is e. Proper rounding | |||||
* mandates that: | |||||
* - If the highest dropped bit is zero, then round low. | |||||
* - If the highest dropped bit is one, and at least one of the | |||||
* other dropped bits is one, then round up. | |||||
* - If the highest dropped bit is one, and all other dropped | |||||
* bits are zero, then round up if the lowest kept bit is 1, | |||||
* or low otherwise (i.e. ties are broken by "rounding to even"). | |||||
* | |||||
* We thus first extract a word consisting of all the dropped bit | |||||
* AND the lowest kept bit; then we shrink it down to three bits, | |||||
* the lowest being "sticky". | |||||
*/ | |||||
d = fpr_ulsh(m, 63 - e); | |||||
dd = (uint32_t)d | ((uint32_t)(d >> 32) & 0x1FFFFFFF); | |||||
f = (unsigned)(d >> 61) | (unsigned)((dd | -dd) >> 31); | |||||
m = fpr_ursh(m, e) + (uint64_t)((0xC8U >> f) & 1U); | |||||
/* | |||||
* Apply the sign bit. | |||||
*/ | |||||
s = (uint32_t)(x >> 63); | |||||
return ((int64_t)m ^ -(int64_t)s) + (int64_t)s; | |||||
} | |||||
static inline long | |||||
fpr_floor(fpr x) { | |||||
uint64_t t; | |||||
int64_t xi; | |||||
int e, cc; | |||||
/* | |||||
* We extract the integer as a _signed_ 64-bit integer with | |||||
* a scaling factor. Since we assume that the value fits | |||||
* in the -(2^63-1)..+(2^63-1) range, we can left-shift the | |||||
* absolute value to make it in the 2^62..2^63-1 range: we | |||||
* will only need a right-shift afterwards. | |||||
*/ | |||||
e = (int)(x >> 52) & 0x7FF; | |||||
t = x >> 63; | |||||
xi = (int64_t)(((x << 10) | ((uint64_t)1 << 62)) | |||||
& (((uint64_t)1 << 63) - 1)); | |||||
xi = (xi ^ -(int64_t)t) + (int64_t)t; | |||||
cc = 1085 - e; | |||||
/* | |||||
* We perform an arithmetic right-shift on the value. This | |||||
* applies floor() semantics on both positive and negative values | |||||
* (rounding toward minus infinity). | |||||
*/ | |||||
xi = fpr_irsh(xi, cc & 63); | |||||
/* | |||||
* If the true shift count was 64 or more, then we should instead | |||||
* replace xi with 0 (if nonnegative) or -1 (if negative). Edge | |||||
* case: -0 will be floored to -1, not 0 (whether this is correct | |||||
* is debatable; in any case, the other functions normalize zero | |||||
* to +0). | |||||
* | |||||
* For an input of zero, the non-shifted xi was incorrect (we used | |||||
* a top implicit bit of value 1, not 0), but this does not matter | |||||
* since this operation will clamp it down. | |||||
*/ | |||||
xi ^= (xi ^ -(int64_t)t) & -(int64_t)((uint32_t)(63 - cc) >> 31); | |||||
return xi; | |||||
} | |||||
static inline int64_t | |||||
fpr_trunc(fpr x) { | |||||
uint64_t t, xu; | |||||
int e, cc; | |||||
/* | |||||
* Extract the absolute value. Since we assume that the value | |||||
* fits in the -(2^63-1)..+(2^63-1) range, we can left-shift | |||||
* the absolute value into the 2^62..2^63-1 range, and then | |||||
* do a right shift afterwards. | |||||
*/ | |||||
e = (int)(x >> 52) & 0x7FF; | |||||
xu = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); | |||||
cc = 1085 - e; | |||||
xu = fpr_ursh(xu, cc & 63); | |||||
/* | |||||
* If the exponent is too low (cc > 63), then the shift was wrong | |||||
* and we must clamp the value to 0. This also covers the case | |||||
* of an input equal to zero. | |||||
*/ | |||||
xu &= -(uint64_t)((uint32_t)(cc - 64) >> 31); | |||||
/* | |||||
* Apply back the sign, if the source value is negative. | |||||
*/ | |||||
t = x >> 63; | |||||
xu = (xu ^ -t) + t; | |||||
return *(int64_t *)&xu; | |||||
} | |||||
#define fpr_add PQCLEAN_FALCON1024_CLEAN_fpr_add | |||||
fpr fpr_add(fpr x, fpr y); | |||||
static inline fpr | |||||
fpr_sub(fpr x, fpr y) { | |||||
y ^= (uint64_t)1 << 63; | |||||
return fpr_add(x, y); | |||||
} | |||||
static inline fpr | |||||
fpr_neg(fpr x) { | |||||
x ^= (uint64_t)1 << 63; | |||||
return x; | |||||
} | |||||
static inline fpr | |||||
fpr_half(fpr x) { | |||||
/* | |||||
* To divide a value by 2, we just have to subtract 1 from its | |||||
* exponent, but we have to take care of zero. | |||||
*/ | |||||
uint32_t t; | |||||
x -= (uint64_t)1 << 52; | |||||
t = (((uint32_t)(x >> 52) & 0x7FF) + 1) >> 11; | |||||
x &= (uint64_t)t - 1; | |||||
return x; | |||||
} | |||||
static inline fpr | |||||
fpr_double(fpr x) { | |||||
/* | |||||
* To double a value, we just increment by one the exponent. We | |||||
* don't care about infinites or NaNs; however, 0 is a | |||||
* special case. | |||||
*/ | |||||
x += (uint64_t)((((unsigned)(x >> 52) & 0x7FFU) + 0x7FFU) >> 11) << 52; | |||||
return x; | |||||
} | |||||
#define fpr_mul PQCLEAN_FALCON1024_CLEAN_fpr_mul | |||||
fpr fpr_mul(fpr x, fpr y); | |||||
static inline fpr | |||||
fpr_sqr(fpr x) { | |||||
return fpr_mul(x, x); | |||||
} | |||||
#define fpr_div PQCLEAN_FALCON1024_CLEAN_fpr_div | |||||
fpr fpr_div(fpr x, fpr y); | |||||
static inline fpr | |||||
fpr_inv(fpr x) { | |||||
return fpr_div(4607182418800017408u, x); | |||||
} | |||||
#define fpr_sqrt PQCLEAN_FALCON1024_CLEAN_fpr_sqrt | |||||
fpr fpr_sqrt(fpr x); | |||||
static inline int | |||||
fpr_lt(fpr x, fpr y) { | |||||
/* | |||||
* If x >= 0 or y >= 0, a signed comparison yields the proper | |||||
* result: | |||||
* - For positive values, the order is preserved. | |||||
* - The sign bit is at the same place as in integers, so | |||||
* sign is preserved. | |||||
* | |||||
* If both x and y are negative, then the order is reversed. | |||||
* We cannot simply invert the comparison result in that case | |||||
* because it would not handle the edge case x = y properly. | |||||
*/ | |||||
int cc0, cc1; | |||||
cc0 = *(int64_t *)&x < *(int64_t *)&y; | |||||
cc1 = *(int64_t *)&x > *(int64_t *)&y; | |||||
return cc0 ^ ((cc0 ^ cc1) & (int)((x & y) >> 63)); | |||||
} | |||||
/* | |||||
* Compute exp(x) for x such that |x| <= ln 2. We want a precision of 50 | |||||
* bits or so. | |||||
*/ | |||||
#define fpr_expm_p63 PQCLEAN_FALCON1024_CLEAN_fpr_expm_p63 | |||||
uint64_t fpr_expm_p63(fpr x); | |||||
#define fpr_gm_tab PQCLEAN_FALCON1024_CLEAN_fpr_gm_tab | |||||
extern const fpr fpr_gm_tab[]; | |||||
#define fpr_p2_tab PQCLEAN_FALCON1024_CLEAN_fpr_p2_tab | |||||
extern const fpr fpr_p2_tab[]; | |||||
/* ====================================================================== */ | |||||
@@ -0,0 +1,663 @@ | |||||
#ifndef FALCON_INNER_H__ | |||||
#define FALCON_INNER_H__ | |||||
/* | |||||
* Internal functions for Falcon. This is not the API intended to be | |||||
* used by applications; instead, this internal API provides all the | |||||
* primitives on which wrappers build to provide external APIs. | |||||
* | |||||
* ==========================(LICENSE BEGIN)============================ | |||||
* | |||||
* Copyright (c) 2017-2019 Falcon Project | |||||
* | |||||
* Permission is hereby granted, free of charge, to any person obtaining | |||||
* a copy of this software and associated documentation files (the | |||||
* "Software"), to deal in the Software without restriction, including | |||||
* without limitation the rights to use, copy, modify, merge, publish, | |||||
* distribute, sublicense, and/or sell copies of the Software, and to | |||||
* permit persons to whom the Software is furnished to do so, subject to | |||||
* the following conditions: | |||||
* | |||||
* The above copyright notice and this permission notice shall be | |||||
* included in all copies or substantial portions of the Software. | |||||
* | |||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||||
* | |||||
* ===========================(LICENSE END)============================= | |||||
* | |||||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||||
*/ | |||||
#include <stdint.h> | |||||
#include <stdlib.h> | |||||
#include <string.h> | |||||
/* ==================================================================== */ | |||||
/* | |||||
* SHAKE256 implementation (shake.c). | |||||
* | |||||
* API is defined to be easily replaced with the fips202.h API defined | |||||
* as part of PQ Clean. | |||||
*/ | |||||
#include "fips202.h" | |||||
#define shake256_context shake256incctx | |||||
#define shake256_init(sc) shake256_inc_init(sc) | |||||
#define shake256_inject(sc, in, len) shake256_inc_absorb(sc, in, len) | |||||
#define shake256_flip(sc) shake256_inc_finalize(sc) | |||||
#define shake256_extract(sc, out, len) shake256_inc_squeeze(out, len, sc) | |||||
/* ==================================================================== */ | |||||
/* | |||||
* Encoding/decoding functions (codec.c). | |||||
* | |||||
* Encoding functions take as parameters an output buffer (out) with | |||||
* a given maximum length (max_out_len); returned value is the actual | |||||
* number of bytes which have been written. If the output buffer is | |||||
* not large enough, then 0 is returned (some bytes may have been | |||||
* written to the buffer). If 'out' is NULL, then 'max_out_len' is | |||||
* ignored; instead, the function computes and returns the actual | |||||
* required output length (in bytes). | |||||
* | |||||
* Decoding functions take as parameters an input buffer (in) with | |||||
* its maximum length (max_in_len); returned value is the actual number | |||||
* of bytes that have been read from the buffer. If the provided length | |||||
* is too short, then 0 is returned. | |||||
* | |||||
* Values to encode or decode are vectors of integers, with N = 2^logn | |||||
* elements. | |||||
* | |||||
* Three encoding formats are defined: | |||||
* | |||||
* - modq: sequence of values modulo 12289, each encoded over exactly | |||||
* 14 bits. The encoder and decoder verify that integers are within | |||||
* the valid range (0..12288). Values are arrays of uint16. | |||||
* | |||||
* - trim: sequence of signed integers, a specified number of bits | |||||
* each. The number of bits is provided as parameter and includes | |||||
* the sign bit. Each integer x must be such that |x| < 2^(bits-1) | |||||
* (which means that the -2^(bits-1) value is forbidden); encode and | |||||
* decode functions check that property. Values are arrays of | |||||
* int16_t or int8_t, corresponding to names 'trim_i16' and | |||||
* 'trim_i8', respectively. | |||||
* | |||||
* - comp: variable-length encoding for signed integers; each integer | |||||
* uses a minimum of 9 bits, possibly more. This is normally used | |||||
* only for signatures. | |||||
* | |||||
*/ | |||||
size_t PQCLEAN_FALCON1024_CLEAN_modq_encode(void *out, size_t max_out_len, | |||||
const uint16_t *x, unsigned logn); | |||||
size_t PQCLEAN_FALCON1024_CLEAN_trim_i16_encode(void *out, size_t max_out_len, | |||||
const int16_t *x, unsigned logn, unsigned bits); | |||||
size_t PQCLEAN_FALCON1024_CLEAN_trim_i8_encode(void *out, size_t max_out_len, | |||||
const int8_t *x, unsigned logn, unsigned bits); | |||||
size_t PQCLEAN_FALCON1024_CLEAN_comp_encode(void *out, size_t max_out_len, | |||||
const int16_t *x, unsigned logn); | |||||
size_t PQCLEAN_FALCON1024_CLEAN_modq_decode(uint16_t *x, unsigned logn, | |||||
const void *in, size_t max_in_len); | |||||
size_t PQCLEAN_FALCON1024_CLEAN_trim_i16_decode(int16_t *x, unsigned logn, unsigned bits, | |||||
const void *in, size_t max_in_len); | |||||
size_t PQCLEAN_FALCON1024_CLEAN_trim_i8_decode(int8_t *x, unsigned logn, unsigned bits, | |||||
const void *in, size_t max_in_len); | |||||
size_t PQCLEAN_FALCON1024_CLEAN_comp_decode(int16_t *x, unsigned logn, | |||||
const void *in, size_t max_in_len); | |||||
/* | |||||
* Number of bits for key elements, indexed by logn (1 to 10). This | |||||
* is at most 8 bits for all degrees, but some degrees may have shorter | |||||
* elements. | |||||
*/ | |||||
extern const uint8_t PQCLEAN_FALCON1024_CLEAN_max_fg_bits[]; | |||||
extern const uint8_t PQCLEAN_FALCON1024_CLEAN_max_FG_bits[]; | |||||
/* | |||||
* Maximum size, in bits, of elements in a signature, indexed by logn | |||||
* (1 to 10). The size includes the sign bit. | |||||
*/ | |||||
extern const uint8_t PQCLEAN_FALCON1024_CLEAN_max_sig_bits[]; | |||||
/* ==================================================================== */ | |||||
/* | |||||
* Support functions used for both signature generation and signature | |||||
* verification (common.c). | |||||
*/ | |||||
/* | |||||
* From a SHAKE256 context (must be already flipped), produce a new | |||||
* point. The temporary buffer (tmp) must have room for 2*2^logn bytes. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_hash_to_point(shake256_context *sc, | |||||
uint16_t *x, unsigned logn, uint8_t *tmp); | |||||
/* | |||||
* Tell whether a given vector (2N coordinates, in two halves) is | |||||
* acceptable as a signature. This compares the appropriate norm of the | |||||
* vector with the acceptance bound. Returned value is 1 on success | |||||
* (vector is short enough to be acceptable), 0 otherwise. | |||||
*/ | |||||
int PQCLEAN_FALCON1024_CLEAN_is_short(const int16_t *s1, const int16_t *s2, unsigned logn); | |||||
/* | |||||
* Tell whether a given vector (2N coordinates, in two halves) is | |||||
* acceptable as a signature. Instead of the first half s1, this | |||||
* function receives the "saturated squared norm" of s1, i.e. the | |||||
* sum of the squares of the coordinates of s1 (saturated at 2^32-1 | |||||
* if the sum exceeds 2^31-1). | |||||
* | |||||
* Returned value is 1 on success (vector is short enough to be | |||||
* acceptable), 0 otherwise. | |||||
*/ | |||||
int PQCLEAN_FALCON1024_CLEAN_is_short_half(uint32_t sqn, const int16_t *s2, unsigned logn); | |||||
/* ==================================================================== */ | |||||
/* | |||||
* Signature verification functions (vrfy.c). | |||||
*/ | |||||
/* | |||||
* Convert a public key to NTT + Montgomery format. Conversion is done | |||||
* in place. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn); | |||||
/* | |||||
* Internal signature verification code: | |||||
* c0[] contains the hashed nonce+message | |||||
* s2[] is the decoded signature | |||||
* h[] contains the public key, in NTT + Montgomery format | |||||
* logn is the degree log | |||||
* tmp[] temporary, must have at least 2*2^logn bytes | |||||
* Returned value is 1 on success, 0 on error. | |||||
*/ | |||||
int PQCLEAN_FALCON1024_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2, | |||||
const uint16_t *h, unsigned logn, uint8_t *tmp); | |||||
/* | |||||
* Compute the public key h[], given the private key elements f[] and | |||||
* g[]. This computes h = g/f mod phi mod q, where phi is the polynomial | |||||
* modulus. This function returns 1 on success, 0 on error (an error is | |||||
* reported if f is not invertible mod phi mod q). | |||||
* | |||||
* The tmp[] array must have room for at least 2*2^logn elements. | |||||
*/ | |||||
int PQCLEAN_FALCON1024_CLEAN_compute_public(uint16_t *h, | |||||
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp); | |||||
/* | |||||
* Recompute the fourth private key element. Private key consists in | |||||
* four polynomials with small coefficients f, g, F and G, which are | |||||
* such that fG - gF = q mod phi; furthermore, f is invertible modulo | |||||
* phi and modulo q. This function recomputes G from f, g and F. | |||||
* | |||||
* The tmp[] array must have room for at least 4*2^logn bytes. | |||||
* | |||||
* Returned value is 1 in success, 0 on error (f not invertible). | |||||
*/ | |||||
int PQCLEAN_FALCON1024_CLEAN_complete_private(int8_t *G, | |||||
const int8_t *f, const int8_t *g, const int8_t *F, | |||||
unsigned logn, uint8_t *tmp); | |||||
/* ==================================================================== */ | |||||
/* | |||||
* Implementation of floating-point real numbers (fpr.h, fpr.c). | |||||
*/ | |||||
/* | |||||
* Real numbers are implemented by an extra header file, included below. | |||||
* This is meant to support pluggable implementations. The default | |||||
* implementation relies on the C type 'double'. | |||||
* | |||||
* The included file must define the following types, functions and | |||||
* constants: | |||||
* | |||||
* fpr | |||||
* type for a real number | |||||
* | |||||
* fpr fpr_of(int64_t i) | |||||
* cast an integer into a real number; source must be in the | |||||
* -(2^63-1)..+(2^63-1) range | |||||
* | |||||
* fpr fpr_scaled(int64_t i, int sc) | |||||
* compute i*2^sc as a real number; source 'i' must be in the | |||||
* -(2^63-1)..+(2^63-1) range | |||||
* | |||||
* fpr fpr_ldexp(fpr x, int e) | |||||
* compute x*2^e | |||||
* | |||||
* int64_t fpr_rint(fpr x) | |||||
* round x to the nearest integer; x must be in the -(2^63-1) | |||||
* to +(2^63-1) range | |||||
* | |||||
* int64_t fpr_trunc(fpr x) | |||||
* round to an integer; this rounds towards zero; value must | |||||
* be in the -(2^63-1) to +(2^63-1) range | |||||
* | |||||
* fpr fpr_add(fpr x, fpr y) | |||||
* compute x + y | |||||
* | |||||
* fpr fpr_sub(fpr x, fpr y) | |||||
* compute x - y | |||||
* | |||||
* fpr fpr_neg(fpr x) | |||||
* compute -x | |||||
* | |||||
* fpr fpr_half(fpr x) | |||||
* compute x/2 | |||||
* | |||||
* fpr fpr_double(fpr x) | |||||
* compute x*2 | |||||
* | |||||
* fpr fpr_mul(fpr x, fpr y) | |||||
* compute x * y | |||||
* | |||||
* fpr fpr_sqr(fpr x) | |||||
* compute x * x | |||||
* | |||||
* fpr fpr_inv(fpr x) | |||||
* compute 1/x | |||||
* | |||||
* fpr fpr_div(fpr x, fpr y) | |||||
* compute x/y | |||||
* | |||||
* fpr fpr_sqrt(fpr x) | |||||
* compute the square root of x | |||||
* | |||||
* int fpr_lt(fpr x, fpr y) | |||||
* return 1 if x < y, 0 otherwise | |||||
* | |||||
* uint64_t fpr_expm_p63(fpr x) | |||||
* return exp(x), assuming that 0 <= x < log(2). Returned value | |||||
* is scaled to 63 bits (i.e. it really returns 2^63*exp(-x), | |||||
* rounded to the nearest integer). Computation should have a | |||||
* precision of at least 45 bits. | |||||
* | |||||
* const fpr fpr_gm_tab[] | |||||
* array of constants for FFT / iFFT | |||||
* | |||||
* const fpr fpr_p2_tab[] | |||||
* precomputed powers of 2 (by index, 0 to 10) | |||||
* | |||||
* Constants of type 'fpr': | |||||
* | |||||
* fpr fpr_q 12289 | |||||
* fpr fpr_inverse_of_q 1/12289 | |||||
* fpr fpr_inv_2sqrsigma0 1/(2*(1.8205^2)) | |||||
* fpr fpr_inv_sigma 1/(1.55*sqrt(12289)) | |||||
* fpr fpr_sigma_min_9 1.291500756233514568549480827642 | |||||
* fpr fpr_sigma_min_10 1.311734375905083682667395805765 | |||||
* fpr fpr_log2 log(2) | |||||
* fpr fpr_inv_log2 1/log(2) | |||||
* fpr fpr_bnorm_max 16822.4121 | |||||
* fpr fpr_zero 0 | |||||
* fpr fpr_one 1 | |||||
* fpr fpr_two 2 | |||||
* fpr fpr_onehalf 0.5 | |||||
* fpr fpr_ptwo31 2^31 | |||||
* fpr fpr_ptwo31m1 2^31-1 | |||||
* fpr fpr_mtwo31m1 -(2^31-1) | |||||
* fpr fpr_ptwo63m1 2^63-1 | |||||
* fpr fpr_mtwo63m1 -(2^63-1) | |||||
* fpr fpr_ptwo63 2^63 | |||||
*/ | |||||
#include "fpr.h" | |||||
/* ==================================================================== */ | |||||
/* | |||||
* RNG (rng.c). | |||||
* | |||||
* A PRNG based on ChaCha20 is implemented; it is seeded from a SHAKE256 | |||||
* context (flipped) and is used for bulk pseudorandom generation. | |||||
* A system-dependent seed generator is also provided. | |||||
*/ | |||||
/* | |||||
* Obtain a random seed from the system RNG. | |||||
* | |||||
* Returned value is 1 on success, 0 on error. | |||||
*/ | |||||
int PQCLEAN_FALCON1024_CLEAN_get_seed(void *seed, size_t seed_len); | |||||
/* | |||||
* Structure for a PRNG. This includes a large buffer so that values | |||||
* get generated in advance. The 'state' is used to keep the current | |||||
* PRNG algorithm state (contents depend on the selected algorithm). | |||||
* | |||||
* The unions with 'dummy_u64' are there to ensure proper alignment for | |||||
* 64-bit direct access. | |||||
*/ | |||||
typedef struct { | |||||
union { | |||||
unsigned char d[512]; /* MUST be 512, exactly */ | |||||
uint64_t dummy_u64; | |||||
} buf; | |||||
size_t ptr; | |||||
union { | |||||
unsigned char d[256]; | |||||
uint64_t dummy_u64; | |||||
} state; | |||||
int type; | |||||
} prng; | |||||
/* | |||||
* Instantiate a PRNG. That PRNG will feed over the provided SHAKE256 | |||||
* context (in "flipped" state) to obtain its initial state. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_prng_init(prng *p, shake256_context *src); | |||||
/* | |||||
* Refill the PRNG buffer. This is normally invoked automatically, and | |||||
* is declared here only so that prng_get_u64() may be inlined. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_prng_refill(prng *p); | |||||
/* | |||||
* Get some bytes from a PRNG. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len); | |||||
/* | |||||
* Get a 64-bit random value from a PRNG. | |||||
*/ | |||||
static inline uint64_t | |||||
prng_get_u64(prng *p) { | |||||
size_t u; | |||||
/* | |||||
* If there are less than 9 bytes in the buffer, we refill it. | |||||
* This means that we may drop the last few bytes, but this allows | |||||
* for faster extraction code. Also, it means that we never leave | |||||
* an empty buffer. | |||||
*/ | |||||
u = p->ptr; | |||||
if (u >= (sizeof p->buf.d) - 9) { | |||||
PQCLEAN_FALCON1024_CLEAN_prng_refill(p); | |||||
u = 0; | |||||
} | |||||
p->ptr = u + 8; | |||||
/* | |||||
* On systems that use little-endian encoding and allow | |||||
* unaligned accesses, we can simply read the data where it is. | |||||
*/ | |||||
return (uint64_t)p->buf.d[u + 0] | |||||
| ((uint64_t)p->buf.d[u + 1] << 8) | |||||
| ((uint64_t)p->buf.d[u + 2] << 16) | |||||
| ((uint64_t)p->buf.d[u + 3] << 24) | |||||
| ((uint64_t)p->buf.d[u + 4] << 32) | |||||
| ((uint64_t)p->buf.d[u + 5] << 40) | |||||
| ((uint64_t)p->buf.d[u + 6] << 48) | |||||
| ((uint64_t)p->buf.d[u + 7] << 56); | |||||
} | |||||
/* | |||||
* Get an 8-bit random value from a PRNG. | |||||
*/ | |||||
static inline unsigned | |||||
prng_get_u8(prng *p) { | |||||
unsigned v; | |||||
v = p->buf.d[p->ptr ++]; | |||||
if (p->ptr == sizeof p->buf.d) { | |||||
PQCLEAN_FALCON1024_CLEAN_prng_refill(p); | |||||
} | |||||
return v; | |||||
} | |||||
/* ==================================================================== */ | |||||
/* | |||||
* FFT (falcon-fft.c). | |||||
* | |||||
* A real polynomial is represented as an array of N 'fpr' elements. | |||||
* The FFT representation of a real polynomial contains N/2 complex | |||||
* elements; each is stored as two real numbers, for the real and | |||||
* imaginary parts, respectively. See falcon-fft.c for details on the | |||||
* internal representation. | |||||
*/ | |||||
/* | |||||
* Compute FFT in-place: the source array should contain a real | |||||
* polynomial (N coefficients); its storage area is reused to store | |||||
* the FFT representation of that polynomial (N/2 complex numbers). | |||||
* | |||||
* 'logn' MUST lie between 1 and 10 (inclusive). | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_FFT(fpr *f, unsigned logn); | |||||
/* | |||||
* Compute the inverse FFT in-place: the source array should contain the | |||||
* FFT representation of a real polynomial (N/2 elements); the resulting | |||||
* real polynomial (N coefficients of type 'fpr') is written over the | |||||
* array. | |||||
* | |||||
* 'logn' MUST lie between 1 and 10 (inclusive). | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_iFFT(fpr *f, unsigned logn); | |||||
/* | |||||
* Add polynomial b to polynomial a. a and b MUST NOT overlap. This | |||||
* function works in both normal and FFT representations. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_poly_add(fpr *restrict a, const fpr *restrict b, unsigned logn); | |||||
/* | |||||
* Subtract polynomial b from polynomial a. a and b MUST NOT overlap. This | |||||
* function works in both normal and FFT representations. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_poly_sub(fpr *restrict a, const fpr *restrict b, unsigned logn); | |||||
/* | |||||
* Negate polynomial a. This function works in both normal and FFT | |||||
* representations. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_poly_neg(fpr *a, unsigned logn); | |||||
/* | |||||
* Compute adjoint of polynomial a. This function works only in FFT | |||||
* representation. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_poly_adj_fft(fpr *a, unsigned logn); | |||||
/* | |||||
* Multiply polynomial a with polynomial b. a and b MUST NOT overlap. | |||||
* This function works only in FFT representation. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(fpr *restrict a, const fpr *restrict b, unsigned logn); | |||||
/* | |||||
* Multiply polynomial a with the adjoint of polynomial b. a and b MUST NOT | |||||
* overlap. This function works only in FFT representation. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_poly_muladj_fft(fpr *restrict a, const fpr *restrict b, unsigned logn); | |||||
/* | |||||
* Multiply polynomial with its own adjoint. This function works only in FFT | |||||
* representation. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(fpr *a, unsigned logn); | |||||
/* | |||||
* Multiply polynomial with a real constant. This function works in both | |||||
* normal and FFT representations. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_poly_mulconst(fpr *a, fpr x, unsigned logn); | |||||
/* | |||||
* Divide polynomial a by polynomial b, modulo X^N+1 (FFT representation). | |||||
* a and b MUST NOT overlap. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_poly_div_fft(fpr *restrict a, const fpr *restrict b, unsigned logn); | |||||
/* | |||||
* Given f and g (in FFT representation), compute 1/(f*adj(f)+g*adj(g)) | |||||
* (also in FFT representation). Since the result is auto-adjoint, all its | |||||
* coordinates in FFT representation are real; as such, only the first N/2 | |||||
* values of d[] are filled (the imaginary parts are skipped). | |||||
* | |||||
* Array d MUST NOT overlap with either a or b. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_poly_invnorm2_fft(fpr *restrict d, | |||||
const fpr *restrict a, const fpr *restrict b, unsigned logn); | |||||
/* | |||||
* Given F, G, f and g (in FFT representation), compute F*adj(f)+G*adj(g) | |||||
* (also in FFT representation). Destination d MUST NOT overlap with | |||||
* any of the source arrays. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_poly_add_muladj_fft(fpr *restrict d, | |||||
const fpr *restrict F, const fpr *restrict G, | |||||
const fpr *restrict f, const fpr *restrict g, unsigned logn); | |||||
/* | |||||
* Multiply polynomial a by polynomial b, where b is autoadjoint. Both | |||||
* a and b are in FFT representation. Since b is autoadjoint, all its | |||||
* FFT coefficients are real, and the array b contains only N/2 elements. | |||||
* a and b MUST NOT overlap. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_poly_mul_autoadj_fft(fpr *restrict a, | |||||
const fpr *restrict b, unsigned logn); | |||||
/* | |||||
* Divide polynomial a by polynomial b, where b is autoadjoint. Both | |||||
* a and b are in FFT representation. Since b is autoadjoint, all its | |||||
* FFT coefficients are real, and the array b contains only N/2 elements. | |||||
* a and b MUST NOT overlap. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_poly_div_autoadj_fft(fpr *restrict a, | |||||
const fpr *restrict b, unsigned logn); | |||||
/* | |||||
* Perform an LDL decomposition of an auto-adjoint matrix G, in FFT | |||||
* representation. On input, g00, g01 and g11 are provided (where the | |||||
* matrix G = [[g00, g01], [adj(g01), g11]]). On output, the d00, l10 | |||||
* and d11 values are written in g00, g01 and g11, respectively | |||||
* (with D = [[d00, 0], [0, d11]] and L = [[1, 0], [l10, 1]]). | |||||
* (In fact, d00 = g00, so the g00 operand is left unmodified.) | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_poly_LDL_fft(const fpr *restrict g00, | |||||
fpr *restrict g01, fpr *restrict g11, unsigned logn); | |||||
/* | |||||
* Perform an LDL decomposition of an auto-adjoint matrix G, in FFT | |||||
* representation. This is identical to poly_LDL_fft() except that | |||||
* g00, g01 and g11 are unmodified; the outputs d11 and l10 are written | |||||
* in two other separate buffers provided as extra parameters. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_poly_LDLmv_fft(fpr *restrict d11, fpr *restrict l10, | |||||
const fpr *restrict g00, const fpr *restrict g01, | |||||
const fpr *restrict g11, unsigned logn); | |||||
/* | |||||
* Apply "split" operation on a polynomial in FFT representation: | |||||
* f = f0(x^2) + x*f1(x^2), for half-size polynomials f0 and f1 | |||||
* (polynomials modulo X^(N/2)+1). f0, f1 and f MUST NOT overlap. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_poly_split_fft(fpr *restrict t0, fpr *restrict t1, | |||||
const fpr *restrict f, unsigned logn); | |||||
/* | |||||
* Apply "merge" operation on two polynomials in FFT representation: | |||||
* given f0 and f1, polynomials moduo X^(N/2)+1, this function computes | |||||
* f = f0(x^2) + x*f1(x^2), in FFT representation modulo X^N+1. | |||||
* f MUST NOT overlap with either f0 or f1. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_poly_merge_fft(fpr *restrict f, | |||||
const fpr *restrict f0, const fpr *restrict f1, unsigned logn); | |||||
/* ==================================================================== */ | |||||
/* | |||||
* Key pair generation. | |||||
*/ | |||||
/* | |||||
* Required sizes of the temporary buffer (in bytes). | |||||
*/ | |||||
#define FALCON_KEYGEN_TEMP_1 136 | |||||
#define FALCON_KEYGEN_TEMP_2 272 | |||||
#define FALCON_KEYGEN_TEMP_3 224 | |||||
#define FALCON_KEYGEN_TEMP_4 448 | |||||
#define FALCON_KEYGEN_TEMP_5 896 | |||||
#define FALCON_KEYGEN_TEMP_6 1792 | |||||
#define FALCON_KEYGEN_TEMP_7 3584 | |||||
#define FALCON_KEYGEN_TEMP_8 7168 | |||||
#define FALCON_KEYGEN_TEMP_9 14336 | |||||
#define FALCON_KEYGEN_TEMP_10 28672 | |||||
/* | |||||
* Generate a new key pair. Randomness is extracted from the provided | |||||
* SHAKE256 context, which must have already been seeded and flipped. | |||||
* The tmp[] array must have suitable size (see FALCON_KEYGEN_TEMP_* | |||||
* macros) and be aligned for the uint32_t, uint64_t and fpr types. | |||||
* | |||||
* The private key elements are written in f, g, F and G, and the | |||||
* public key is written in h. Either or both of G and h may be NULL, | |||||
* in which case the corresponding element is not returned (they can | |||||
* be recomputed from f, g and F). | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_keygen(shake256_context *rng, | |||||
int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h, | |||||
unsigned logn, uint8_t *tmp); | |||||
/* ==================================================================== */ | |||||
/* | |||||
* Signature generation. | |||||
*/ | |||||
/* | |||||
* Expand a private key into the B0 matrix in FFT representation and | |||||
* the LDL tree. All the values are written in 'expanded_key', for | |||||
* a total of (8*logn+40)*2^logn bytes. | |||||
* | |||||
* The tmp[] array must have room for at least 48*2^logn bytes. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_expand_privkey(fpr *restrict expanded_key, | |||||
const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G, | |||||
unsigned logn, uint8_t *restrict tmp); | |||||
/* | |||||
* Compute a signature over the provided hashed message (hm); the | |||||
* signature value is one short vector. This function uses an | |||||
* expanded key (as generated by PQCLEAN_FALCON1024_CLEAN_expand_privkey()). | |||||
* | |||||
* The sig[] and hm[] buffers may overlap. | |||||
* | |||||
* The minimal size (in bytes) of tmp[] is 48*2^logn bytes. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_sign_tree(int16_t *sig, shake256_context *rng, | |||||
const fpr *restrict expanded_key, | |||||
const uint16_t *hm, unsigned logn, uint8_t *tmp); | |||||
/* | |||||
* Compute a signature over the provided hashed message (hm); the | |||||
* signature value is one short vector. This function uses a raw | |||||
* key and dynamically recompute the B0 matrix and LDL tree; this | |||||
* saves RAM since there is no needed for an expanded key, but | |||||
* increases the signature cost. | |||||
* | |||||
* The sig[] and hm[] buffers may overlap. | |||||
* | |||||
* The minimal size (in bytes) of tmp[] is 72*2^logn bytes. | |||||
*/ | |||||
void PQCLEAN_FALCON1024_CLEAN_sign_dyn(int16_t *sig, shake256_context *rng, | |||||
const int8_t *restrict f, const int8_t *restrict g, | |||||
const int8_t *restrict F, const int8_t *restrict G, | |||||
const uint16_t *hm, unsigned logn, uint8_t *tmp); | |||||
/* ==================================================================== */ | |||||
#endif |
@@ -0,0 +1,381 @@ | |||||
/* | |||||
* Wrapper for implementing the PQClean API. | |||||
*/ | |||||
#include <stddef.h> | |||||
#include <string.h> | |||||
#include "api.h" | |||||
#include "inner.h" | |||||
#define NONCELEN 40 | |||||
#include "randombytes.h" | |||||
/* | |||||
* Encoding formats (nnnn = log of degree, 9 for Falcon-512, 10 for Falcon-1024) | |||||
* | |||||
* private key: | |||||
* header byte: 0101nnnn | |||||
* private f (6 or 5 bits by element, depending on degree) | |||||
* private g (6 or 5 bits by element, depending on degree) | |||||
* private F (8 bits by element) | |||||
* | |||||
* public key: | |||||
* header byte: 0000nnnn | |||||
* public h (14 bits by element) | |||||
* | |||||
* signature: | |||||
* header byte: 0011nnnn | |||||
* nonce 40 bytes | |||||
* value (12 bits by element) | |||||
* | |||||
* message + signature: | |||||
* signature length (2 bytes, big-endian) | |||||
* nonce 40 bytes | |||||
* message | |||||
* header byte: 0010nnnn | |||||
* value (12 bits by element) | |||||
* (signature length is 1+len(value), not counting the nonce) | |||||
*/ | |||||
/* see api.h */ | |||||
int | |||||
PQCLEAN_FALCON1024_CLEAN_crypto_sign_keypair( | |||||
uint8_t *pk, uint8_t *sk) { | |||||
union { | |||||
uint8_t b[FALCON_KEYGEN_TEMP_10]; | |||||
uint64_t dummy_u64; | |||||
fpr dummy_fpr; | |||||
} tmp; | |||||
int8_t f[1024], g[1024], F[1024]; | |||||
uint16_t h[1024]; | |||||
unsigned char seed[48]; | |||||
shake256_context rng; | |||||
size_t u, v; | |||||
/* | |||||
* Generate key pair. | |||||
*/ | |||||
randombytes(seed, sizeof seed); | |||||
shake256_init(&rng); | |||||
shake256_inject(&rng, seed, sizeof seed); | |||||
shake256_flip(&rng); | |||||
PQCLEAN_FALCON1024_CLEAN_keygen(&rng, f, g, F, NULL, h, 10, tmp.b); | |||||
/* | |||||
* Encode private key. | |||||
*/ | |||||
sk[0] = 0x50 + 10; | |||||
u = 1; | |||||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_encode( | |||||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u, | |||||
f, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10]); | |||||
if (v == 0) { | |||||
return -1; | |||||
} | |||||
u += v; | |||||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_encode( | |||||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u, | |||||
g, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10]); | |||||
if (v == 0) { | |||||
return -1; | |||||
} | |||||
u += v; | |||||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_encode( | |||||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u, | |||||
F, 10, PQCLEAN_FALCON1024_CLEAN_max_FG_bits[10]); | |||||
if (v == 0) { | |||||
return -1; | |||||
} | |||||
u += v; | |||||
if (u != PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES) { | |||||
return -1; | |||||
} | |||||
/* | |||||
* Encode public key. | |||||
*/ | |||||
pk[0] = 0x00 + 10; | |||||
v = PQCLEAN_FALCON1024_CLEAN_modq_encode( | |||||
pk + 1, PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1, | |||||
h, 10); | |||||
if (v != PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) { | |||||
return -1; | |||||
} | |||||
return 0; | |||||
} | |||||
/* | |||||
* Compute the signature. nonce[] receives the nonce and must have length | |||||
* NONCELEN bytes. sigbuf[] receives the signature value (without nonce | |||||
* or header byte), with *sigbuflen providing the maximum value length and | |||||
* receiving the actual value length. | |||||
* | |||||
* If a signature could be computed but not encoded because it would | |||||
* exceed the output buffer size, then a new signature is computed. If | |||||
* the provided buffer size is too low, this could loop indefinitely, so | |||||
* the caller must provide a size that can accommodate signatures with a | |||||
* large enough probability. | |||||
* | |||||
* Return value: 0 on success, -1 on error. | |||||
*/ | |||||
static int | |||||
do_sign(uint8_t *nonce, uint8_t *sigbuf, size_t *sigbuflen, | |||||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||||
union { | |||||
uint8_t b[72 * 1024]; | |||||
uint64_t dummy_u64; | |||||
fpr dummy_fpr; | |||||
} tmp; | |||||
int8_t f[1024], g[1024], F[1024], G[1024]; | |||||
union { | |||||
int16_t sig[1024]; | |||||
uint16_t hm[1024]; | |||||
} r; | |||||
unsigned char seed[48]; | |||||
shake256_context sc; | |||||
size_t u, v; | |||||
/* | |||||
* Decode the private key. | |||||
*/ | |||||
if (sk[0] != 0x50 + 10) { | |||||
return -1; | |||||
} | |||||
u = 1; | |||||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_decode( | |||||
f, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10], | |||||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u); | |||||
if (v == 0) { | |||||
return -1; | |||||
} | |||||
u += v; | |||||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_decode( | |||||
g, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10], | |||||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u); | |||||
if (v == 0) { | |||||
return -1; | |||||
} | |||||
u += v; | |||||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_decode( | |||||
F, 10, PQCLEAN_FALCON1024_CLEAN_max_FG_bits[10], | |||||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u); | |||||
if (v == 0) { | |||||
return -1; | |||||
} | |||||
u += v; | |||||
if (u != PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES) { | |||||
return -1; | |||||
} | |||||
if (!PQCLEAN_FALCON1024_CLEAN_complete_private(G, f, g, F, 10, tmp.b)) { | |||||
return -1; | |||||
} | |||||
/* | |||||
* Create a random nonce (40 bytes). | |||||
*/ | |||||
randombytes(nonce, NONCELEN); | |||||
/* | |||||
* Hash message nonce + message into a vector. | |||||
*/ | |||||
shake256_init(&sc); | |||||
shake256_inject(&sc, nonce, NONCELEN); | |||||
shake256_inject(&sc, m, mlen); | |||||
shake256_flip(&sc); | |||||
PQCLEAN_FALCON1024_CLEAN_hash_to_point(&sc, r.hm, 10, tmp.b); | |||||
/* | |||||
* Initialize a RNG. | |||||
*/ | |||||
randombytes(seed, sizeof seed); | |||||
shake256_init(&sc); | |||||
shake256_inject(&sc, seed, sizeof seed); | |||||
shake256_flip(&sc); | |||||
/* | |||||
* Compute and return the signature. This loops until a signature | |||||
* value is found that fits in the provided buffer. | |||||
*/ | |||||
for (;;) { | |||||
PQCLEAN_FALCON1024_CLEAN_sign_dyn(r.sig, &sc, f, g, F, G, r.hm, 10, tmp.b); | |||||
v = PQCLEAN_FALCON1024_CLEAN_comp_encode(sigbuf, *sigbuflen, r.sig, 10); | |||||
if (v != 0) { | |||||
*sigbuflen = v; | |||||
return 0; | |||||
} | |||||
} | |||||
} | |||||
/* | |||||
* Verify a sigature. The nonce has size NONCELEN bytes. sigbuf[] | |||||
* (of size sigbuflen) contains the signature value, not including the | |||||
* header byte or nonce. Return value is 0 on success, -1 on error. | |||||
*/ | |||||
static int | |||||
do_verify( | |||||
const uint8_t *nonce, const uint8_t *sigbuf, size_t sigbuflen, | |||||
const uint8_t *m, size_t mlen, const uint8_t *pk) { | |||||
union { | |||||
uint8_t b[2 * 1024]; | |||||
uint64_t dummy_u64; | |||||
fpr dummy_fpr; | |||||
} tmp; | |||||
uint16_t h[1024], hm[1024]; | |||||
int16_t sig[1024]; | |||||
shake256_context sc; | |||||
/* | |||||
* Decode public key. | |||||
*/ | |||||
if (pk[0] != 0x00 + 10) { | |||||
return -1; | |||||
} | |||||
if (PQCLEAN_FALCON1024_CLEAN_modq_decode(h, 10, | |||||
pk + 1, PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) | |||||
!= PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) { | |||||
return -1; | |||||
} | |||||
PQCLEAN_FALCON1024_CLEAN_to_ntt_monty(h, 10); | |||||
/* | |||||
* Decode signature. | |||||
*/ | |||||
if (sigbuflen == 0) { | |||||
return -1; | |||||
} | |||||
if (PQCLEAN_FALCON1024_CLEAN_comp_decode(sig, 10, sigbuf, sigbuflen) != sigbuflen) { | |||||
return -1; | |||||
} | |||||
/* | |||||
* Hash nonce + message into a vector. | |||||
*/ | |||||
shake256_init(&sc); | |||||
shake256_inject(&sc, nonce, NONCELEN); | |||||
shake256_inject(&sc, m, mlen); | |||||
shake256_flip(&sc); | |||||
PQCLEAN_FALCON1024_CLEAN_hash_to_point(&sc, hm, 10, tmp.b); | |||||
/* | |||||
* Verify signature. | |||||
*/ | |||||
if (!PQCLEAN_FALCON1024_CLEAN_verify_raw(hm, sig, h, 10, tmp.b)) { | |||||
return -1; | |||||
} | |||||
return 0; | |||||
} | |||||
/* see api.h */ | |||||
int | |||||
PQCLEAN_FALCON1024_CLEAN_crypto_sign_signature( | |||||
uint8_t *sig, size_t *siglen, | |||||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||||
/* | |||||
* The PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES constant is used for | |||||
* the signed message object (as produced by crypto_sign()) | |||||
* and includes a two-byte length value, so we take care here | |||||
* to only generate signatures that are two bytes shorter than | |||||
* the maximum. This is done to ensure that crypto_sign() | |||||
* and crypto_sign_signature() produce the exact same signature | |||||
* value, if used on the same message, with the same private key, | |||||
* and using the same output from randombytes() (this is for | |||||
* reproducibility of tests). | |||||
*/ | |||||
size_t vlen; | |||||
vlen = PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES - NONCELEN - 3; | |||||
if (do_sign(sig + 1, sig + 1 + NONCELEN, &vlen, m, mlen, sk) < 0) { | |||||
return -1; | |||||
} | |||||
sig[0] = 0x30 + 10; | |||||
*siglen = 1 + NONCELEN + vlen; | |||||
return 0; | |||||
} | |||||
/* see api.h */ | |||||
int | |||||
PQCLEAN_FALCON1024_CLEAN_crypto_sign_verify( | |||||
const uint8_t *sig, size_t siglen, | |||||
const uint8_t *m, size_t mlen, const uint8_t *pk) { | |||||
if (siglen < 1 + NONCELEN) { | |||||
return -1; | |||||
} | |||||
if (sig[0] != 0x30 + 10) { | |||||
return -1; | |||||
} | |||||
return do_verify(sig + 1, | |||||
sig + 1 + NONCELEN, siglen - 1 - NONCELEN, m, mlen, pk); | |||||
} | |||||
/* see api.h */ | |||||
int | |||||
PQCLEAN_FALCON1024_CLEAN_crypto_sign( | |||||
uint8_t *sm, size_t *smlen, | |||||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||||
uint8_t *pm, *sigbuf; | |||||
size_t sigbuflen; | |||||
/* | |||||
* Move the message to its final location; this is a memmove() so | |||||
* it handles overlaps properly. | |||||
*/ | |||||
memmove(sm + 2 + NONCELEN, m, mlen); | |||||
pm = sm + 2 + NONCELEN; | |||||
sigbuf = pm + 1 + mlen; | |||||
sigbuflen = PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES - NONCELEN - 3; | |||||
if (do_sign(sm + 2, sigbuf, &sigbuflen, pm, mlen, sk) < 0) { | |||||
return -1; | |||||
} | |||||
pm[mlen] = 0x20 + 10; | |||||
sigbuflen ++; | |||||
sm[0] = (uint8_t)(sigbuflen >> 8); | |||||
sm[1] = (uint8_t)sigbuflen; | |||||
*smlen = mlen + 2 + NONCELEN + sigbuflen; | |||||
return 0; | |||||
} | |||||
/* see api.h */ | |||||
int | |||||
PQCLEAN_FALCON1024_CLEAN_crypto_sign_open( | |||||
uint8_t *m, size_t *mlen, | |||||
const uint8_t *sm, size_t smlen, const uint8_t *pk) { | |||||
const uint8_t *sigbuf; | |||||
size_t pmlen, sigbuflen; | |||||
if (smlen < 3 + NONCELEN) { | |||||
return -1; | |||||
} | |||||
sigbuflen = ((size_t)sm[0] << 8) | (size_t)sm[1]; | |||||
if (sigbuflen < 2 || sigbuflen > (smlen - NONCELEN - 2)) { | |||||
return -1; | |||||
} | |||||
sigbuflen --; | |||||
pmlen = smlen - NONCELEN - 3 - sigbuflen; | |||||
if (sm[2 + NONCELEN + pmlen] != 0x20 + 10) { | |||||
return -1; | |||||
} | |||||
sigbuf = sm + 2 + NONCELEN + pmlen + 1; | |||||
/* | |||||
* The 2-byte length header and the one-byte signature header | |||||
* have been verified. Nonce is at sm+2, followed by the message | |||||
* itself. Message length is in pmlen. sigbuf/sigbuflen point to | |||||
* the signature value (excluding the header byte). | |||||
*/ | |||||
if (do_verify(sm + 2, sigbuf, sigbuflen, | |||||
sm + 2 + NONCELEN, pmlen, pk) < 0) { | |||||
return -1; | |||||
} | |||||
/* | |||||
* Signature is correct, we just have to copy/move the message | |||||
* to its final destination. The memmove() properly handles | |||||
* overlaps. | |||||
*/ | |||||
memmove(m, sm + 2 + NONCELEN, pmlen); | |||||
*mlen = pmlen; | |||||
return 0; | |||||
} |
@@ -0,0 +1,187 @@ | |||||
/* | |||||
* PRNG and interface to the system RNG. | |||||
* | |||||
* ==========================(LICENSE BEGIN)============================ | |||||
* | |||||
* Copyright (c) 2017-2019 Falcon Project | |||||
* | |||||
* Permission is hereby granted, free of charge, to any person obtaining | |||||
* a copy of this software and associated documentation files (the | |||||
* "Software"), to deal in the Software without restriction, including | |||||
* without limitation the rights to use, copy, modify, merge, publish, | |||||
* distribute, sublicense, and/or sell copies of the Software, and to | |||||
* permit persons to whom the Software is furnished to do so, subject to | |||||
* the following conditions: | |||||
* | |||||
* The above copyright notice and this permission notice shall be | |||||
* included in all copies or substantial portions of the Software. | |||||
* | |||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||||
* | |||||
* ===========================(LICENSE END)============================= | |||||
* | |||||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||||
*/ | |||||
#include <assert.h> | |||||
#include "inner.h" | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_prng_init(prng *p, shake256_context *src) { | |||||
/* | |||||
* To ensure reproducibility for a given seed, we | |||||
* must enforce little-endian interpretation of | |||||
* the state words. | |||||
*/ | |||||
unsigned char tmp[56]; | |||||
uint64_t th, tl; | |||||
int i; | |||||
shake256_extract(src, tmp, 56); | |||||
for (i = 0; i < 14; i ++) { | |||||
uint32_t w; | |||||
w = (uint32_t)tmp[(i << 2) + 0] | |||||
| ((uint32_t)tmp[(i << 2) + 1] << 8) | |||||
| ((uint32_t)tmp[(i << 2) + 2] << 16) | |||||
| ((uint32_t)tmp[(i << 2) + 3] << 24); | |||||
*(uint32_t *)(p->state.d + (i << 2)) = w; | |||||
} | |||||
tl = *(uint32_t *)(p->state.d + 48); | |||||
th = *(uint32_t *)(p->state.d + 52); | |||||
*(uint64_t *)(p->state.d + 48) = tl + (th << 32); | |||||
PQCLEAN_FALCON1024_CLEAN_prng_refill(p); | |||||
} | |||||
/* | |||||
* PRNG based on ChaCha20. | |||||
* | |||||
* State consists in key (32 bytes) then IV (16 bytes) and block counter | |||||
* (8 bytes). Normally, we should not care about local endianness (this | |||||
* is for a PRNG), but for the NIST competition we need reproducible KAT | |||||
* vectors that work across architectures, so we enforce little-endian | |||||
* interpretation where applicable. Moreover, output words are "spread | |||||
* out" over the output buffer with the interleaving pattern that is | |||||
* naturally obtained from the AVX2 implementation that runs eight | |||||
* ChaCha20 instances in parallel. | |||||
* | |||||
* The block counter is XORed into the first 8 bytes of the IV. | |||||
*/ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_prng_refill(prng *p) { | |||||
static const uint32_t CW[] = { | |||||
0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 | |||||
}; | |||||
uint64_t cc; | |||||
size_t u; | |||||
/* | |||||
* State uses local endianness. Only the output bytes must be | |||||
* converted to little endian (if used on a big-endian machine). | |||||
*/ | |||||
cc = *(uint64_t *)(p->state.d + 48); | |||||
for (u = 0; u < 8; u ++) { | |||||
uint32_t state[16]; | |||||
size_t v; | |||||
int i; | |||||
memcpy(&state[0], CW, sizeof CW); | |||||
memcpy(&state[4], p->state.d, 48); | |||||
state[14] ^= (uint32_t)cc; | |||||
state[15] ^= (uint32_t)(cc >> 32); | |||||
for (i = 0; i < 10; i ++) { | |||||
#define QROUND(a, b, c, d) do { \ | |||||
state[a] += state[b]; \ | |||||
state[d] ^= state[a]; \ | |||||
state[d] = (state[d] << 16) | (state[d] >> 16); \ | |||||
state[c] += state[d]; \ | |||||
state[b] ^= state[c]; \ | |||||
state[b] = (state[b] << 12) | (state[b] >> 20); \ | |||||
state[a] += state[b]; \ | |||||
state[d] ^= state[a]; \ | |||||
state[d] = (state[d] << 8) | (state[d] >> 24); \ | |||||
state[c] += state[d]; \ | |||||
state[b] ^= state[c]; \ | |||||
state[b] = (state[b] << 7) | (state[b] >> 25); \ | |||||
} while (0) | |||||
QROUND( 0, 4, 8, 12); | |||||
QROUND( 1, 5, 9, 13); | |||||
QROUND( 2, 6, 10, 14); | |||||
QROUND( 3, 7, 11, 15); | |||||
QROUND( 0, 5, 10, 15); | |||||
QROUND( 1, 6, 11, 12); | |||||
QROUND( 2, 7, 8, 13); | |||||
QROUND( 3, 4, 9, 14); | |||||
#undef QROUND | |||||
} | |||||
for (v = 0; v < 4; v ++) { | |||||
state[v] += CW[v]; | |||||
} | |||||
for (v = 4; v < 14; v ++) { | |||||
state[v] += ((uint32_t *)p->state.d)[v - 4]; | |||||
} | |||||
state[14] += ((uint32_t *)p->state.d)[10] | |||||
^ (uint32_t)cc; | |||||
state[15] += ((uint32_t *)p->state.d)[11] | |||||
^ (uint32_t)(cc >> 32); | |||||
cc ++; | |||||
/* | |||||
* We mimic the interleaving that is used in the AVX2 | |||||
* implementation. | |||||
*/ | |||||
for (v = 0; v < 16; v ++) { | |||||
p->buf.d[(u << 2) + (v << 5) + 0] = | |||||
(unsigned char)state[v]; | |||||
p->buf.d[(u << 2) + (v << 5) + 1] = | |||||
(unsigned char)(state[v] >> 8); | |||||
p->buf.d[(u << 2) + (v << 5) + 2] = | |||||
(unsigned char)(state[v] >> 16); | |||||
p->buf.d[(u << 2) + (v << 5) + 3] = | |||||
(unsigned char)(state[v] >> 24); | |||||
} | |||||
} | |||||
*(uint64_t *)(p->state.d + 48) = cc; | |||||
p->ptr = 0; | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len) { | |||||
unsigned char *buf; | |||||
buf = dst; | |||||
while (len > 0) { | |||||
size_t clen; | |||||
clen = (sizeof p->buf.d) - p->ptr; | |||||
if (clen > len) { | |||||
clen = len; | |||||
} | |||||
memcpy(buf, p->buf.d, clen); | |||||
buf += clen; | |||||
len -= clen; | |||||
p->ptr += clen; | |||||
if (p->ptr == sizeof p->buf.d) { | |||||
PQCLEAN_FALCON1024_CLEAN_prng_refill(p); | |||||
} | |||||
} | |||||
} |
@@ -0,0 +1,745 @@ | |||||
/* | |||||
* Falcon signature verification. | |||||
* | |||||
* ==========================(LICENSE BEGIN)============================ | |||||
* | |||||
* Copyright (c) 2017-2019 Falcon Project | |||||
* | |||||
* Permission is hereby granted, free of charge, to any person obtaining | |||||
* a copy of this software and associated documentation files (the | |||||
* "Software"), to deal in the Software without restriction, including | |||||
* without limitation the rights to use, copy, modify, merge, publish, | |||||
* distribute, sublicense, and/or sell copies of the Software, and to | |||||
* permit persons to whom the Software is furnished to do so, subject to | |||||
* the following conditions: | |||||
* | |||||
* The above copyright notice and this permission notice shall be | |||||
* included in all copies or substantial portions of the Software. | |||||
* | |||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||||
* | |||||
* ===========================(LICENSE END)============================= | |||||
* | |||||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||||
*/ | |||||
#include "inner.h" | |||||
/* ===================================================================== */ | |||||
/* | |||||
* Constants for NTT. | |||||
* | |||||
* n = 2^logn (2 <= n <= 1024) | |||||
* phi = X^n + 1 | |||||
* q = 12289 | |||||
* q0i = -1/q mod 2^16 | |||||
* R = 2^16 mod q | |||||
* R2 = 2^32 mod q | |||||
*/ | |||||
#define Q 12289 | |||||
#define Q0I 12287 | |||||
#define R 4091 | |||||
#define R2 10952 | |||||
/* | |||||
* Table for NTT, binary case: | |||||
* GMb[x] = R*(g^rev(x)) mod q | |||||
* where g = 7 (it is a 2048-th primitive root of 1 modulo q) | |||||
* and rev() is the bit-reversal function over 10 bits. | |||||
*/ | |||||
static const uint16_t GMb[] = { | |||||
4091, 7888, 11060, 11208, 6960, 4342, 6275, 9759, | |||||
1591, 6399, 9477, 5266, 586, 5825, 7538, 9710, | |||||
1134, 6407, 1711, 965, 7099, 7674, 3743, 6442, | |||||
10414, 8100, 1885, 1688, 1364, 10329, 10164, 9180, | |||||
12210, 6240, 997, 117, 4783, 4407, 1549, 7072, | |||||
2829, 6458, 4431, 8877, 7144, 2564, 5664, 4042, | |||||
12189, 432, 10751, 1237, 7610, 1534, 3983, 7863, | |||||
2181, 6308, 8720, 6570, 4843, 1690, 14, 3872, | |||||
5569, 9368, 12163, 2019, 7543, 2315, 4673, 7340, | |||||
1553, 1156, 8401, 11389, 1020, 2967, 10772, 7045, | |||||
3316, 11236, 5285, 11578, 10637, 10086, 9493, 6180, | |||||
9277, 6130, 3323, 883, 10469, 489, 1502, 2851, | |||||
11061, 9729, 2742, 12241, 4970, 10481, 10078, 1195, | |||||
730, 1762, 3854, 2030, 5892, 10922, 9020, 5274, | |||||
9179, 3604, 3782, 10206, 3180, 3467, 4668, 2446, | |||||
7613, 9386, 834, 7703, 6836, 3403, 5351, 12276, | |||||
3580, 1739, 10820, 9787, 10209, 4070, 12250, 8525, | |||||
10401, 2749, 7338, 10574, 6040, 943, 9330, 1477, | |||||
6865, 9668, 3585, 6633, 12145, 4063, 3684, 7680, | |||||
8188, 6902, 3533, 9807, 6090, 727, 10099, 7003, | |||||
6945, 1949, 9731, 10559, 6057, 378, 7871, 8763, | |||||
8901, 9229, 8846, 4551, 9589, 11664, 7630, 8821, | |||||
5680, 4956, 6251, 8388, 10156, 8723, 2341, 3159, | |||||
1467, 5460, 8553, 7783, 2649, 2320, 9036, 6188, | |||||
737, 3698, 4699, 5753, 9046, 3687, 16, 914, | |||||
5186, 10531, 4552, 1964, 3509, 8436, 7516, 5381, | |||||
10733, 3281, 7037, 1060, 2895, 7156, 8887, 5357, | |||||
6409, 8197, 2962, 6375, 5064, 6634, 5625, 278, | |||||
932, 10229, 8927, 7642, 351, 9298, 237, 5858, | |||||
7692, 3146, 12126, 7586, 2053, 11285, 3802, 5204, | |||||
4602, 1748, 11300, 340, 3711, 4614, 300, 10993, | |||||
5070, 10049, 11616, 12247, 7421, 10707, 5746, 5654, | |||||
3835, 5553, 1224, 8476, 9237, 3845, 250, 11209, | |||||
4225, 6326, 9680, 12254, 4136, 2778, 692, 8808, | |||||
6410, 6718, 10105, 10418, 3759, 7356, 11361, 8433, | |||||
6437, 3652, 6342, 8978, 5391, 2272, 6476, 7416, | |||||
8418, 10824, 11986, 5733, 876, 7030, 2167, 2436, | |||||
3442, 9217, 8206, 4858, 5964, 2746, 7178, 1434, | |||||
7389, 8879, 10661, 11457, 4220, 1432, 10832, 4328, | |||||
8557, 1867, 9454, 2416, 3816, 9076, 686, 5393, | |||||
2523, 4339, 6115, 619, 937, 2834, 7775, 3279, | |||||
2363, 7488, 6112, 5056, 824, 10204, 11690, 1113, | |||||
2727, 9848, 896, 2028, 5075, 2654, 10464, 7884, | |||||
12169, 5434, 3070, 6400, 9132, 11672, 12153, 4520, | |||||
1273, 9739, 11468, 9937, 10039, 9720, 2262, 9399, | |||||
11192, 315, 4511, 1158, 6061, 6751, 11865, 357, | |||||
7367, 4550, 983, 8534, 8352, 10126, 7530, 9253, | |||||
4367, 5221, 3999, 8777, 3161, 6990, 4130, 11652, | |||||
3374, 11477, 1753, 292, 8681, 2806, 10378, 12188, | |||||
5800, 11811, 3181, 1988, 1024, 9340, 2477, 10928, | |||||
4582, 6750, 3619, 5503, 5233, 2463, 8470, 7650, | |||||
7964, 6395, 1071, 1272, 3474, 11045, 3291, 11344, | |||||
8502, 9478, 9837, 1253, 1857, 6233, 4720, 11561, | |||||
6034, 9817, 3339, 1797, 2879, 6242, 5200, 2114, | |||||
7962, 9353, 11363, 5475, 6084, 9601, 4108, 7323, | |||||
10438, 9471, 1271, 408, 6911, 3079, 360, 8276, | |||||
11535, 9156, 9049, 11539, 850, 8617, 784, 7919, | |||||
8334, 12170, 1846, 10213, 12184, 7827, 11903, 5600, | |||||
9779, 1012, 721, 2784, 6676, 6552, 5348, 4424, | |||||
6816, 8405, 9959, 5150, 2356, 5552, 5267, 1333, | |||||
8801, 9661, 7308, 5788, 4910, 909, 11613, 4395, | |||||
8238, 6686, 4302, 3044, 2285, 12249, 1963, 9216, | |||||
4296, 11918, 695, 4371, 9793, 4884, 2411, 10230, | |||||
2650, 841, 3890, 10231, 7248, 8505, 11196, 6688, | |||||
4059, 6060, 3686, 4722, 11853, 5816, 7058, 6868, | |||||
11137, 7926, 4894, 12284, 4102, 3908, 3610, 6525, | |||||
7938, 7982, 11977, 6755, 537, 4562, 1623, 8227, | |||||
11453, 7544, 906, 11816, 9548, 10858, 9703, 2815, | |||||
11736, 6813, 6979, 819, 8903, 6271, 10843, 348, | |||||
7514, 8339, 6439, 694, 852, 5659, 2781, 3716, | |||||
11589, 3024, 1523, 8659, 4114, 10738, 3303, 5885, | |||||
2978, 7289, 11884, 9123, 9323, 11830, 98, 2526, | |||||
2116, 4131, 11407, 1844, 3645, 3916, 8133, 2224, | |||||
10871, 8092, 9651, 5989, 7140, 8480, 1670, 159, | |||||
10923, 4918, 128, 7312, 725, 9157, 5006, 6393, | |||||
3494, 6043, 10972, 6181, 11838, 3423, 10514, 7668, | |||||
3693, 6658, 6905, 11953, 10212, 11922, 9101, 8365, | |||||
5110, 45, 2400, 1921, 4377, 2720, 1695, 51, | |||||
2808, 650, 1896, 9997, 9971, 11980, 8098, 4833, | |||||
4135, 4257, 5838, 4765, 10985, 11532, 590, 12198, | |||||
482, 12173, 2006, 7064, 10018, 3912, 12016, 10519, | |||||
11362, 6954, 2210, 284, 5413, 6601, 3865, 10339, | |||||
11188, 6231, 517, 9564, 11281, 3863, 1210, 4604, | |||||
8160, 11447, 153, 7204, 5763, 5089, 9248, 12154, | |||||
11748, 1354, 6672, 179, 5532, 2646, 5941, 12185, | |||||
862, 3158, 477, 7279, 5678, 7914, 4254, 302, | |||||
2893, 10114, 6890, 9560, 9647, 11905, 4098, 9824, | |||||
10269, 1353, 10715, 5325, 6254, 3951, 1807, 6449, | |||||
5159, 1308, 8315, 3404, 1877, 1231, 112, 6398, | |||||
11724, 12272, 7286, 1459, 12274, 9896, 3456, 800, | |||||
1397, 10678, 103, 7420, 7976, 936, 764, 632, | |||||
7996, 8223, 8445, 7758, 10870, 9571, 2508, 1946, | |||||
6524, 10158, 1044, 4338, 2457, 3641, 1659, 4139, | |||||
4688, 9733, 11148, 3946, 2082, 5261, 2036, 11850, | |||||
7636, 12236, 5366, 2380, 1399, 7720, 2100, 3217, | |||||
10912, 8898, 7578, 11995, 2791, 1215, 3355, 2711, | |||||
2267, 2004, 8568, 10176, 3214, 2337, 1750, 4729, | |||||
4997, 7415, 6315, 12044, 4374, 7157, 4844, 211, | |||||
8003, 10159, 9290, 11481, 1735, 2336, 5793, 9875, | |||||
8192, 986, 7527, 1401, 870, 3615, 8465, 2756, | |||||
9770, 2034, 10168, 3264, 6132, 54, 2880, 4763, | |||||
11805, 3074, 8286, 9428, 4881, 6933, 1090, 10038, | |||||
2567, 708, 893, 6465, 4962, 10024, 2090, 5718, | |||||
10743, 780, 4733, 4623, 2134, 2087, 4802, 884, | |||||
5372, 5795, 5938, 4333, 6559, 7549, 5269, 10664, | |||||
4252, 3260, 5917, 10814, 5768, 9983, 8096, 7791, | |||||
6800, 7491, 6272, 1907, 10947, 6289, 11803, 6032, | |||||
11449, 1171, 9201, 7933, 2479, 7970, 11337, 7062, | |||||
8911, 6728, 6542, 8114, 8828, 6595, 3545, 4348, | |||||
4610, 2205, 6999, 8106, 5560, 10390, 9321, 2499, | |||||
2413, 7272, 6881, 10582, 9308, 9437, 3554, 3326, | |||||
5991, 11969, 3415, 12283, 9838, 12063, 4332, 7830, | |||||
11329, 6605, 12271, 2044, 11611, 7353, 11201, 11582, | |||||
3733, 8943, 9978, 1627, 7168, 3935, 5050, 2762, | |||||
7496, 10383, 755, 1654, 12053, 4952, 10134, 4394, | |||||
6592, 7898, 7497, 8904, 12029, 3581, 10748, 5674, | |||||
10358, 4901, 7414, 8771, 710, 6764, 8462, 7193, | |||||
5371, 7274, 11084, 290, 7864, 6827, 11822, 2509, | |||||
6578, 4026, 5807, 1458, 5721, 5762, 4178, 2105, | |||||
11621, 4852, 8897, 2856, 11510, 9264, 2520, 8776, | |||||
7011, 2647, 1898, 7039, 5950, 11163, 5488, 6277, | |||||
9182, 11456, 633, 10046, 11554, 5633, 9587, 2333, | |||||
7008, 7084, 5047, 7199, 9865, 8997, 569, 6390, | |||||
10845, 9679, 8268, 11472, 4203, 1997, 2, 9331, | |||||
162, 6182, 2000, 3649, 9792, 6363, 7557, 6187, | |||||
8510, 9935, 5536, 9019, 3706, 12009, 1452, 3067, | |||||
5494, 9692, 4865, 6019, 7106, 9610, 4588, 10165, | |||||
6261, 5887, 2652, 10172, 1580, 10379, 4638, 9949 | |||||
}; | |||||
/* | |||||
* Table for inverse NTT, binary case: | |||||
* iGMb[x] = R*((1/g)^rev(x)) mod q | |||||
* Since g = 7, 1/g = 8778 mod 12289. | |||||
*/ | |||||
static const uint16_t iGMb[] = { | |||||
4091, 4401, 1081, 1229, 2530, 6014, 7947, 5329, | |||||
2579, 4751, 6464, 11703, 7023, 2812, 5890, 10698, | |||||
3109, 2125, 1960, 10925, 10601, 10404, 4189, 1875, | |||||
5847, 8546, 4615, 5190, 11324, 10578, 5882, 11155, | |||||
8417, 12275, 10599, 7446, 5719, 3569, 5981, 10108, | |||||
4426, 8306, 10755, 4679, 11052, 1538, 11857, 100, | |||||
8247, 6625, 9725, 5145, 3412, 7858, 5831, 9460, | |||||
5217, 10740, 7882, 7506, 12172, 11292, 6049, 79, | |||||
13, 6938, 8886, 5453, 4586, 11455, 2903, 4676, | |||||
9843, 7621, 8822, 9109, 2083, 8507, 8685, 3110, | |||||
7015, 3269, 1367, 6397, 10259, 8435, 10527, 11559, | |||||
11094, 2211, 1808, 7319, 48, 9547, 2560, 1228, | |||||
9438, 10787, 11800, 1820, 11406, 8966, 6159, 3012, | |||||
6109, 2796, 2203, 1652, 711, 7004, 1053, 8973, | |||||
5244, 1517, 9322, 11269, 900, 3888, 11133, 10736, | |||||
4949, 7616, 9974, 4746, 10270, 126, 2921, 6720, | |||||
6635, 6543, 1582, 4868, 42, 673, 2240, 7219, | |||||
1296, 11989, 7675, 8578, 11949, 989, 10541, 7687, | |||||
7085, 8487, 1004, 10236, 4703, 163, 9143, 4597, | |||||
6431, 12052, 2991, 11938, 4647, 3362, 2060, 11357, | |||||
12011, 6664, 5655, 7225, 5914, 9327, 4092, 5880, | |||||
6932, 3402, 5133, 9394, 11229, 5252, 9008, 1556, | |||||
6908, 4773, 3853, 8780, 10325, 7737, 1758, 7103, | |||||
11375, 12273, 8602, 3243, 6536, 7590, 8591, 11552, | |||||
6101, 3253, 9969, 9640, 4506, 3736, 6829, 10822, | |||||
9130, 9948, 3566, 2133, 3901, 6038, 7333, 6609, | |||||
3468, 4659, 625, 2700, 7738, 3443, 3060, 3388, | |||||
3526, 4418, 11911, 6232, 1730, 2558, 10340, 5344, | |||||
5286, 2190, 11562, 6199, 2482, 8756, 5387, 4101, | |||||
4609, 8605, 8226, 144, 5656, 8704, 2621, 5424, | |||||
10812, 2959, 11346, 6249, 1715, 4951, 9540, 1888, | |||||
3764, 39, 8219, 2080, 2502, 1469, 10550, 8709, | |||||
5601, 1093, 3784, 5041, 2058, 8399, 11448, 9639, | |||||
2059, 9878, 7405, 2496, 7918, 11594, 371, 7993, | |||||
3073, 10326, 40, 10004, 9245, 7987, 5603, 4051, | |||||
7894, 676, 11380, 7379, 6501, 4981, 2628, 3488, | |||||
10956, 7022, 6737, 9933, 7139, 2330, 3884, 5473, | |||||
7865, 6941, 5737, 5613, 9505, 11568, 11277, 2510, | |||||
6689, 386, 4462, 105, 2076, 10443, 119, 3955, | |||||
4370, 11505, 3672, 11439, 750, 3240, 3133, 754, | |||||
4013, 11929, 9210, 5378, 11881, 11018, 2818, 1851, | |||||
4966, 8181, 2688, 6205, 6814, 926, 2936, 4327, | |||||
10175, 7089, 6047, 9410, 10492, 8950, 2472, 6255, | |||||
728, 7569, 6056, 10432, 11036, 2452, 2811, 3787, | |||||
945, 8998, 1244, 8815, 11017, 11218, 5894, 4325, | |||||
4639, 3819, 9826, 7056, 6786, 8670, 5539, 7707, | |||||
1361, 9812, 2949, 11265, 10301, 9108, 478, 6489, | |||||
101, 1911, 9483, 3608, 11997, 10536, 812, 8915, | |||||
637, 8159, 5299, 9128, 3512, 8290, 7068, 7922, | |||||
3036, 4759, 2163, 3937, 3755, 11306, 7739, 4922, | |||||
11932, 424, 5538, 6228, 11131, 7778, 11974, 1097, | |||||
2890, 10027, 2569, 2250, 2352, 821, 2550, 11016, | |||||
7769, 136, 617, 3157, 5889, 9219, 6855, 120, | |||||
4405, 1825, 9635, 7214, 10261, 11393, 2441, 9562, | |||||
11176, 599, 2085, 11465, 7233, 6177, 4801, 9926, | |||||
9010, 4514, 9455, 11352, 11670, 6174, 7950, 9766, | |||||
6896, 11603, 3213, 8473, 9873, 2835, 10422, 3732, | |||||
7961, 1457, 10857, 8069, 832, 1628, 3410, 4900, | |||||
10855, 5111, 9543, 6325, 7431, 4083, 3072, 8847, | |||||
9853, 10122, 5259, 11413, 6556, 303, 1465, 3871, | |||||
4873, 5813, 10017, 6898, 3311, 5947, 8637, 5852, | |||||
3856, 928, 4933, 8530, 1871, 2184, 5571, 5879, | |||||
3481, 11597, 9511, 8153, 35, 2609, 5963, 8064, | |||||
1080, 12039, 8444, 3052, 3813, 11065, 6736, 8454, | |||||
2340, 7651, 1910, 10709, 2117, 9637, 6402, 6028, | |||||
2124, 7701, 2679, 5183, 6270, 7424, 2597, 6795, | |||||
9222, 10837, 280, 8583, 3270, 6753, 2354, 3779, | |||||
6102, 4732, 5926, 2497, 8640, 10289, 6107, 12127, | |||||
2958, 12287, 10292, 8086, 817, 4021, 2610, 1444, | |||||
5899, 11720, 3292, 2424, 5090, 7242, 5205, 5281, | |||||
9956, 2702, 6656, 735, 2243, 11656, 833, 3107, | |||||
6012, 6801, 1126, 6339, 5250, 10391, 9642, 5278, | |||||
3513, 9769, 3025, 779, 9433, 3392, 7437, 668, | |||||
10184, 8111, 6527, 6568, 10831, 6482, 8263, 5711, | |||||
9780, 467, 5462, 4425, 11999, 1205, 5015, 6918, | |||||
5096, 3827, 5525, 11579, 3518, 4875, 7388, 1931, | |||||
6615, 1541, 8708, 260, 3385, 4792, 4391, 5697, | |||||
7895, 2155, 7337, 236, 10635, 11534, 1906, 4793, | |||||
9527, 7239, 8354, 5121, 10662, 2311, 3346, 8556, | |||||
707, 1088, 4936, 678, 10245, 18, 5684, 960, | |||||
4459, 7957, 226, 2451, 6, 8874, 320, 6298, | |||||
8963, 8735, 2852, 2981, 1707, 5408, 5017, 9876, | |||||
9790, 2968, 1899, 6729, 4183, 5290, 10084, 7679, | |||||
7941, 8744, 5694, 3461, 4175, 5747, 5561, 3378, | |||||
5227, 952, 4319, 9810, 4356, 3088, 11118, 840, | |||||
6257, 486, 6000, 1342, 10382, 6017, 4798, 5489, | |||||
4498, 4193, 2306, 6521, 1475, 6372, 9029, 8037, | |||||
1625, 7020, 4740, 5730, 7956, 6351, 6494, 6917, | |||||
11405, 7487, 10202, 10155, 7666, 7556, 11509, 1546, | |||||
6571, 10199, 2265, 7327, 5824, 11396, 11581, 9722, | |||||
2251, 11199, 5356, 7408, 2861, 4003, 9215, 484, | |||||
7526, 9409, 12235, 6157, 9025, 2121, 10255, 2519, | |||||
9533, 3824, 8674, 11419, 10888, 4762, 11303, 4097, | |||||
2414, 6496, 9953, 10554, 808, 2999, 2130, 4286, | |||||
12078, 7445, 5132, 7915, 245, 5974, 4874, 7292, | |||||
7560, 10539, 9952, 9075, 2113, 3721, 10285, 10022, | |||||
9578, 8934, 11074, 9498, 294, 4711, 3391, 1377, | |||||
9072, 10189, 4569, 10890, 9909, 6923, 53, 4653, | |||||
439, 10253, 7028, 10207, 8343, 1141, 2556, 7601, | |||||
8150, 10630, 8648, 9832, 7951, 11245, 2131, 5765, | |||||
10343, 9781, 2718, 1419, 4531, 3844, 4066, 4293, | |||||
11657, 11525, 11353, 4313, 4869, 12186, 1611, 10892, | |||||
11489, 8833, 2393, 15, 10830, 5003, 17, 565, | |||||
5891, 12177, 11058, 10412, 8885, 3974, 10981, 7130, | |||||
5840, 10482, 8338, 6035, 6964, 1574, 10936, 2020, | |||||
2465, 8191, 384, 2642, 2729, 5399, 2175, 9396, | |||||
11987, 8035, 4375, 6611, 5010, 11812, 9131, 11427, | |||||
104, 6348, 9643, 6757, 12110, 5617, 10935, 541, | |||||
135, 3041, 7200, 6526, 5085, 12136, 842, 4129, | |||||
7685, 11079, 8426, 1008, 2725, 11772, 6058, 1101, | |||||
1950, 8424, 5688, 6876, 12005, 10079, 5335, 927, | |||||
1770, 273, 8377, 2271, 5225, 10283, 116, 11807, | |||||
91, 11699, 757, 1304, 7524, 6451, 8032, 8154, | |||||
7456, 4191, 309, 2318, 2292, 10393, 11639, 9481, | |||||
12238, 10594, 9569, 7912, 10368, 9889, 12244, 7179, | |||||
3924, 3188, 367, 2077, 336, 5384, 5631, 8596, | |||||
4621, 1775, 8866, 451, 6108, 1317, 6246, 8795, | |||||
5896, 7283, 3132, 11564, 4977, 12161, 7371, 1366, | |||||
12130, 10619, 3809, 5149, 6300, 2638, 4197, 1418, | |||||
10065, 4156, 8373, 8644, 10445, 882, 8158, 10173, | |||||
9763, 12191, 459, 2966, 3166, 405, 5000, 9311, | |||||
6404, 8986, 1551, 8175, 3630, 10766, 9265, 700, | |||||
8573, 9508, 6630, 11437, 11595, 5850, 3950, 4775, | |||||
11941, 1446, 6018, 3386, 11470, 5310, 5476, 553, | |||||
9474, 2586, 1431, 2741, 473, 11383, 4745, 836, | |||||
4062, 10666, 7727, 11752, 5534, 312, 4307, 4351, | |||||
5764, 8679, 8381, 8187, 5, 7395, 4363, 1152, | |||||
5421, 5231, 6473, 436, 7567, 8603, 6229, 8230 | |||||
}; | |||||
/* | |||||
* Reduce a small signed integer modulo q. The source integer MUST | |||||
* be between -q/2 and +q/2. | |||||
*/ | |||||
static inline uint32_t | |||||
mq_conv_small(int x) { | |||||
/* | |||||
* If x < 0, the cast to uint32_t will set the high bit to 1. | |||||
*/ | |||||
uint32_t y; | |||||
y = (uint32_t)x; | |||||
y += Q & -(y >> 31); | |||||
return y; | |||||
} | |||||
/* | |||||
* Addition modulo q. Operands must be in the 0..q-1 range. | |||||
*/ | |||||
static inline uint32_t | |||||
mq_add(uint32_t x, uint32_t y) { | |||||
/* | |||||
* We compute x + y - q. If the result is negative, then the | |||||
* high bit will be set, and 'd >> 31' will be equal to 1; | |||||
* thus '-(d >> 31)' will be an all-one pattern. Otherwise, | |||||
* it will be an all-zero pattern. In other words, this | |||||
* implements a conditional addition of q. | |||||
*/ | |||||
uint32_t d; | |||||
d = x + y - Q; | |||||
d += Q & -(d >> 31); | |||||
return d; | |||||
} | |||||
/* | |||||
* Subtraction modulo q. Operands must be in the 0..q-1 range. | |||||
*/ | |||||
static inline uint32_t | |||||
mq_sub(uint32_t x, uint32_t y) { | |||||
/* | |||||
* As in mq_add(), we use a conditional addition to ensure the | |||||
* result is in the 0..q-1 range. | |||||
*/ | |||||
uint32_t d; | |||||
d = x - y; | |||||
d += Q & -(d >> 31); | |||||
return d; | |||||
} | |||||
/* | |||||
* Division by 2 modulo q. Operand must be in the 0..q-1 range. | |||||
*/ | |||||
static inline uint32_t | |||||
mq_rshift1(uint32_t x) { | |||||
x += Q & -(x & 1); | |||||
return (x >> 1); | |||||
} | |||||
/* | |||||
* Montgomery multiplication modulo q. If we set R = 2^16 mod q, then | |||||
* this function computes: x * y / R mod q | |||||
* Operands must be in the 0..q-1 range. | |||||
*/ | |||||
static inline uint32_t | |||||
mq_montymul(uint32_t x, uint32_t y) { | |||||
uint32_t z, w; | |||||
/* | |||||
* We compute x*y + k*q with a value of k chosen so that the 16 | |||||
* low bits of the result are 0. We can then shift the value. | |||||
* After the shift, result may still be larger than q, but it | |||||
* will be lower than 2*q, so a conditional subtraction works. | |||||
*/ | |||||
z = x * y; | |||||
w = ((z * Q0I) & 0xFFFF) * Q; | |||||
/* | |||||
* When adding z and w, the result will have its low 16 bits | |||||
* equal to 0. Since x, y and z are lower than q, the sum will | |||||
* be no more than (2^15 - 1) * q + (q - 1)^2, which will | |||||
* fit on 29 bits. | |||||
*/ | |||||
z = (z + w) >> 16; | |||||
/* | |||||
* After the shift, analysis shows that the value will be less | |||||
* than 2q. We do a subtraction then conditional subtraction to | |||||
* ensure the result is in the expected range. | |||||
*/ | |||||
z -= Q; | |||||
z += Q & -(z >> 31); | |||||
return z; | |||||
} | |||||
/* | |||||
* Montgomery squaring (computes (x^2)/R). | |||||
*/ | |||||
static inline uint32_t | |||||
mq_montysqr(uint32_t x) { | |||||
return mq_montymul(x, x); | |||||
} | |||||
/* | |||||
* Divide x by y modulo q = 12289. | |||||
*/ | |||||
static inline uint32_t | |||||
mq_div_12289(uint32_t x, uint32_t y) { | |||||
/* | |||||
* We invert y by computing y^(q-2) mod q. | |||||
* | |||||
* We use the following addition chain for exponent e = 12287: | |||||
* | |||||
* e0 = 1 | |||||
* e1 = 2 * e0 = 2 | |||||
* e2 = e1 + e0 = 3 | |||||
* e3 = e2 + e1 = 5 | |||||
* e4 = 2 * e3 = 10 | |||||
* e5 = 2 * e4 = 20 | |||||
* e6 = 2 * e5 = 40 | |||||
* e7 = 2 * e6 = 80 | |||||
* e8 = 2 * e7 = 160 | |||||
* e9 = e8 + e2 = 163 | |||||
* e10 = e9 + e8 = 323 | |||||
* e11 = 2 * e10 = 646 | |||||
* e12 = 2 * e11 = 1292 | |||||
* e13 = e12 + e9 = 1455 | |||||
* e14 = 2 * e13 = 2910 | |||||
* e15 = 2 * e14 = 5820 | |||||
* e16 = e15 + e10 = 6143 | |||||
* e17 = 2 * e16 = 12286 | |||||
* e18 = e17 + e0 = 12287 | |||||
* | |||||
* Additions on exponents are converted to Montgomery | |||||
* multiplications. We define all intermediate results as so | |||||
* many local variables, and let the C compiler work out which | |||||
* must be kept around. | |||||
*/ | |||||
uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9; | |||||
uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18; | |||||
y0 = mq_montymul(y, R2); | |||||
y1 = mq_montysqr(y0); | |||||
y2 = mq_montymul(y1, y0); | |||||
y3 = mq_montymul(y2, y1); | |||||
y4 = mq_montysqr(y3); | |||||
y5 = mq_montysqr(y4); | |||||
y6 = mq_montysqr(y5); | |||||
y7 = mq_montysqr(y6); | |||||
y8 = mq_montysqr(y7); | |||||
y9 = mq_montymul(y8, y2); | |||||
y10 = mq_montymul(y9, y8); | |||||
y11 = mq_montysqr(y10); | |||||
y12 = mq_montysqr(y11); | |||||
y13 = mq_montymul(y12, y9); | |||||
y14 = mq_montysqr(y13); | |||||
y15 = mq_montysqr(y14); | |||||
y16 = mq_montymul(y15, y10); | |||||
y17 = mq_montysqr(y16); | |||||
y18 = mq_montymul(y17, y0); | |||||
/* | |||||
* Final multiplication with x, which is not in Montgomery | |||||
* representation, computes the correct division result. | |||||
*/ | |||||
return mq_montymul(y18, x); | |||||
} | |||||
/* | |||||
* Compute NTT on a ring element. | |||||
*/ | |||||
static void | |||||
mq_NTT(uint16_t *a, unsigned logn) { | |||||
size_t n, t, m; | |||||
n = (size_t)1 << logn; | |||||
t = n; | |||||
for (m = 1; m < n; m <<= 1) { | |||||
size_t ht, i, j1; | |||||
ht = t >> 1; | |||||
for (i = 0, j1 = 0; i < m; i ++, j1 += t) { | |||||
size_t j, j2; | |||||
uint32_t s; | |||||
s = GMb[m + i]; | |||||
j2 = j1 + ht; | |||||
for (j = j1; j < j2; j ++) { | |||||
uint32_t u, v; | |||||
u = a[j]; | |||||
v = mq_montymul(a[j + ht], s); | |||||
a[j] = (uint16_t)mq_add(u, v); | |||||
a[j + ht] = (uint16_t)mq_sub(u, v); | |||||
} | |||||
} | |||||
t = ht; | |||||
} | |||||
} | |||||
/* | |||||
* Compute the inverse NTT on a ring element, binary case. | |||||
*/ | |||||
static void | |||||
mq_iNTT(uint16_t *a, unsigned logn) { | |||||
size_t n, t, m; | |||||
uint32_t ni; | |||||
n = (size_t)1 << logn; | |||||
t = 1; | |||||
m = n; | |||||
while (m > 1) { | |||||
size_t hm, dt, i, j1; | |||||
hm = m >> 1; | |||||
dt = t << 1; | |||||
for (i = 0, j1 = 0; i < hm; i ++, j1 += dt) { | |||||
size_t j, j2; | |||||
uint32_t s; | |||||
j2 = j1 + t; | |||||
s = iGMb[hm + i]; | |||||
for (j = j1; j < j2; j ++) { | |||||
uint32_t u, v, w; | |||||
u = a[j]; | |||||
v = a[j + t]; | |||||
a[j] = (uint16_t)mq_add(u, v); | |||||
w = mq_sub(u, v); | |||||
a[j + t] = (uint16_t) | |||||
mq_montymul(w, s); | |||||
} | |||||
} | |||||
t = dt; | |||||
m = hm; | |||||
} | |||||
/* | |||||
* To complete the inverse NTT, we must now divide all values by | |||||
* n (the vector size). We thus need the inverse of n, i.e. we | |||||
* need to divide 1 by 2 logn times. But we also want it in | |||||
* Montgomery representation, i.e. we also want to multiply it | |||||
* by R = 2^16. In the common case, this should be a simple right | |||||
* shift. The loop below is generic and works also in corner cases; | |||||
* its computation time is negligible. | |||||
*/ | |||||
ni = R; | |||||
for (m = n; m > 1; m >>= 1) { | |||||
ni = mq_rshift1(ni); | |||||
} | |||||
for (m = 0; m < n; m ++) { | |||||
a[m] = (uint16_t)mq_montymul(a[m], ni); | |||||
} | |||||
} | |||||
/* | |||||
* Convert a polynomial (mod q) to Montgomery representation. | |||||
*/ | |||||
static void | |||||
mq_poly_tomonty(uint16_t *f, unsigned logn) { | |||||
size_t u, n; | |||||
n = (size_t)1 << logn; | |||||
for (u = 0; u < n; u ++) { | |||||
f[u] = (uint16_t)mq_montymul(f[u], R2); | |||||
} | |||||
} | |||||
/* | |||||
* Multiply two polynomials together (NTT representation, and using | |||||
* a Montgomery multiplication). Result f*g is written over f. | |||||
*/ | |||||
static void | |||||
mq_poly_montymul_ntt(uint16_t *f, const uint16_t *g, unsigned logn) { | |||||
size_t u, n; | |||||
n = (size_t)1 << logn; | |||||
for (u = 0; u < n; u ++) { | |||||
f[u] = (uint16_t)mq_montymul(f[u], g[u]); | |||||
} | |||||
} | |||||
/* | |||||
* Subtract polynomial g from polynomial f. | |||||
*/ | |||||
static void | |||||
mq_poly_sub(uint16_t *f, const uint16_t *g, unsigned logn) { | |||||
size_t u, n; | |||||
n = (size_t)1 << logn; | |||||
for (u = 0; u < n; u ++) { | |||||
f[u] = (uint16_t)mq_sub(f[u], g[u]); | |||||
} | |||||
} | |||||
/* ===================================================================== */ | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON1024_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn) { | |||||
mq_NTT(h, logn); | |||||
mq_poly_tomonty(h, logn); | |||||
} | |||||
/* see inner.h */ | |||||
int | |||||
PQCLEAN_FALCON1024_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2, | |||||
const uint16_t *h, unsigned logn, uint8_t *tmp) { | |||||
size_t u, n; | |||||
uint16_t *tt; | |||||
n = (size_t)1 << logn; | |||||
tt = (uint16_t *)tmp; | |||||
/* | |||||
* Reduce s2 elements modulo q ([0..q-1] range). | |||||
*/ | |||||
for (u = 0; u < n; u ++) { | |||||
uint32_t w; | |||||
w = (uint32_t)s2[u]; | |||||
w += Q & -(w >> 31); | |||||
tt[u] = (uint16_t)w; | |||||
} | |||||
/* | |||||
* Compute s1 = s2*h - c0 mod phi mod q (in tt[]). | |||||
*/ | |||||
mq_NTT(tt, logn); | |||||
mq_poly_montymul_ntt(tt, h, logn); | |||||
mq_iNTT(tt, logn); | |||||
mq_poly_sub(tt, c0, logn); | |||||
/* | |||||
* Normalize s1 elements into the [-q/2..q/2] range. | |||||
*/ | |||||
for (u = 0; u < n; u ++) { | |||||
int32_t w; | |||||
w = (int32_t)tt[u]; | |||||
w -= (int32_t)(Q & -(((Q >> 1) - (uint32_t)w) >> 31)); | |||||
((int16_t *)tt)[u] = (int16_t)w; | |||||
} | |||||
/* | |||||
* Signature is valid if and only if the aggregate (s1,s2) vector | |||||
* is short enough. | |||||
*/ | |||||
return PQCLEAN_FALCON1024_CLEAN_is_short((int16_t *)tt, s2, logn); | |||||
} | |||||
/* see inner.h */ | |||||
int | |||||
PQCLEAN_FALCON1024_CLEAN_compute_public(uint16_t *h, | |||||
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp) { | |||||
size_t u, n; | |||||
uint16_t *tt; | |||||
n = (size_t)1 << logn; | |||||
tt = (uint16_t *)tmp; | |||||
for (u = 0; u < n; u ++) { | |||||
tt[u] = (uint16_t)mq_conv_small(f[u]); | |||||
h[u] = (uint16_t)mq_conv_small(g[u]); | |||||
} | |||||
mq_NTT(h, logn); | |||||
mq_NTT(tt, logn); | |||||
for (u = 0; u < n; u ++) { | |||||
if (tt[u] == 0) { | |||||
return 0; | |||||
} | |||||
h[u] = (uint16_t)mq_div_12289(h[u], tt[u]); | |||||
} | |||||
mq_iNTT(h, logn); | |||||
return 1; | |||||
} | |||||
/* see internal.h */ | |||||
int | |||||
PQCLEAN_FALCON1024_CLEAN_complete_private(int8_t *G, | |||||
const int8_t *f, const int8_t *g, const int8_t *F, | |||||
unsigned logn, uint8_t *tmp) { | |||||
size_t u, n; | |||||
uint16_t *t1, *t2; | |||||
n = (size_t)1 << logn; | |||||
t1 = (uint16_t *)tmp; | |||||
t2 = t1 + n; | |||||
for (u = 0; u < n; u ++) { | |||||
t1[u] = (uint16_t)mq_conv_small(g[u]); | |||||
t2[u] = (uint16_t)mq_conv_small(F[u]); | |||||
} | |||||
mq_NTT(t1, logn); | |||||
mq_NTT(t2, logn); | |||||
mq_poly_tomonty(t1, logn); | |||||
mq_poly_montymul_ntt(t1, t2, logn); | |||||
for (u = 0; u < n; u ++) { | |||||
t2[u] = (uint16_t)mq_conv_small(f[u]); | |||||
} | |||||
mq_NTT(t2, logn); | |||||
for (u = 0; u < n; u ++) { | |||||
if (t2[u] == 0) { | |||||
return 0; | |||||
} | |||||
t1[u] = (uint16_t)mq_div_12289(t1[u], t2[u]); | |||||
} | |||||
mq_iNTT(t1, logn); | |||||
for (u = 0; u < n; u ++) { | |||||
uint32_t w; | |||||
int32_t gi; | |||||
w = t1[u]; | |||||
w -= (Q & ~ -((w - (Q >> 1)) >> 31)); | |||||
gi = *(int32_t *)&w; | |||||
if (gi < -127 || gi > +127) { | |||||
return 0; | |||||
} | |||||
G[u] = (int8_t)gi; | |||||
} | |||||
return 1; | |||||
} |
@@ -0,0 +1,23 @@ | |||||
name: Falcon-512 | |||||
type: signature | |||||
claimed-nist-level: 1 | |||||
length-public-key: 897 | |||||
length-secret-key: 1281 | |||||
length-signature: 690 | |||||
nistkat-sha256: abc62e7be3d7c1db757ba3cbb771cfdc89c6b36fb5efc885593db89ec2ea8bc4 | |||||
testvectors-sha256: 1a1b170fc9e4623e7ff519c15ec7a2dda55e94a175756b7c72429451bd226b09 | |||||
principal-submitters: | |||||
- Thomas Prest | |||||
auxiliary-submitters: | |||||
- Pierre-Alain Fouque | |||||
- Jeffrey Hoffstein | |||||
- Paul Kirchner | |||||
- Vadim Lyubashevsky | |||||
- Thomas Pornin | |||||
- Thomas Ricosset | |||||
- Gregor Seiler | |||||
- William Whyte | |||||
- Zhenfei Zhang | |||||
implementations: | |||||
- name: clean | |||||
version: https://github.com/FIX-THIS/YES-BUT-HOW/DUNNO-GUV |
@@ -0,0 +1,22 @@ | |||||
MIT License | |||||
Copyright (c) 2017-2019 Falcon Project | |||||
Permission is hereby granted, free of charge, to any person obtaining | |||||
a copy of this software and associated documentation files (the | |||||
"Software"), to deal in the Software without restriction, including | |||||
without limitation the rights to use, copy, modify, merge, publish, | |||||
distribute, sublicense, and/or sell copies of the Software, and to | |||||
permit persons to whom the Software is furnished to do so, subject to | |||||
the following conditions: | |||||
The above copyright notice and this permission notice shall be | |||||
included in all copies or substantial portions of the Software. | |||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
@@ -0,0 +1,21 @@ | |||||
# This Makefile can be used with GNU Make or BSD Make | |||||
LIB=libfalcon-512_clean.a | |||||
SOURCES = codec.c common.c fft.c fpr.c keygen.c pqclean.c rng.c sign.c vrfy.c | |||||
OBJECTS = codec.o common.o fft.o fpr.o keygen.o pqclean.o rng.o sign.o vrfy.o | |||||
HEADERS = api.h fpr.h inner.h | |||||
CFLAGS=-O3 -Wall -Wconversion -Wextra -Wpedantic -Wvla -Werror -Wmissing-prototypes -Wredundant-decls -std=c99 -I../../../common $(EXTRAFLAGS) | |||||
all: $(LIB) | |||||
%.o: %.c $(HEADERS) | |||||
$(CC) $(CFLAGS) -c -o $@ $< | |||||
$(LIB): $(OBJECTS) | |||||
$(AR) -r $@ $(OBJECTS) | |||||
clean: | |||||
$(RM) $(OBJECTS) | |||||
$(RM) $(LIB) |
@@ -0,0 +1,18 @@ | |||||
# This Makefile can be used with Microsoft Visual Studio's nmake using the command: | |||||
# nmake /f Makefile.Microsoft_nmake | |||||
LIBRARY=libfalcon-512_clean.lib | |||||
OBJECTS=codec.obj common.obj fft.obj fpr.obj keygen.obj pqclean.obj rng.obj sign.obj vrfy.obj | |||||
CFLAGS=/nologo /I ..\..\..\common /W4 /WX | |||||
all: $(LIBRARY) | |||||
# Make sure objects are recompiled if headers change. | |||||
$(OBJECTS): *.h | |||||
$(LIBRARY): $(OBJECTS) | |||||
LIB.EXE /NOLOGO /WX /OUT:$@ $** | |||||
clean: | |||||
-DEL $(OBJECTS) | |||||
-DEL $(LIBRARY) |
@@ -0,0 +1,80 @@ | |||||
#ifndef PQCLEAN_FALCON512_CLEAN_API_H | |||||
#define PQCLEAN_FALCON512_CLEAN_API_H | |||||
#include <stddef.h> | |||||
#include <stdint.h> | |||||
#define PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES 1281 | |||||
#define PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES 897 | |||||
#define PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES 690 | |||||
#define PQCLEAN_FALCON512_CLEAN_CRYPTO_ALGNAME "Falcon-512" | |||||
/* | |||||
* Generate a new key pair. Public key goes into pk[], private key in sk[]. | |||||
* Key sizes are exact (in bytes): | |||||
* public (pk): PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES | |||||
* private (sk): PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES | |||||
* | |||||
* Return value: 0 on success, -1 on error. | |||||
*/ | |||||
int PQCLEAN_FALCON512_CLEAN_crypto_sign_keypair( | |||||
uint8_t *pk, uint8_t *sk); | |||||
/* | |||||
* Compute a signature on a provided message (m, mlen), with a given | |||||
* private key (sk). Signature is written in sig[], with length written | |||||
* into *siglen. Signature length is variable; maximum signature length | |||||
* (in bytes) is PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES. | |||||
* | |||||
* sig[], m[] and sk[] may overlap each other arbitrarily. | |||||
* | |||||
* Return value: 0 on success, -1 on error. | |||||
*/ | |||||
int PQCLEAN_FALCON512_CLEAN_crypto_sign_signature( | |||||
uint8_t *sig, size_t *siglen, | |||||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||||
/* | |||||
* Verify a signature (sig, siglen) on a message (m, mlen) with a given | |||||
* public key (pk). | |||||
* | |||||
* sig[], m[] and pk[] may overlap each other arbitrarily. | |||||
* | |||||
* Return value: 0 on success, -1 on error. | |||||
*/ | |||||
int PQCLEAN_FALCON512_CLEAN_crypto_sign_verify( | |||||
const uint8_t *sig, size_t siglen, | |||||
const uint8_t *m, size_t mlen, const uint8_t *pk); | |||||
/* | |||||
* Compute a signature on a message and pack the signature and message | |||||
* into a single object, written into sm[]. The length of that output is | |||||
* written in *smlen; that length may be larger than the message length | |||||
* (mlen) by up to PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES. | |||||
* | |||||
* sm[] and m[] may overlap each other arbitrarily; however, sm[] shall | |||||
* not overlap with sk[]. | |||||
* | |||||
* Return value: 0 on success, -1 on error. | |||||
*/ | |||||
int PQCLEAN_FALCON512_CLEAN_crypto_sign( | |||||
uint8_t *sm, size_t *smlen, | |||||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||||
/* | |||||
* Open a signed message object (sm, smlen) and verify the signature; | |||||
* on success, the message itself is written into m[] and its length | |||||
* into *mlen. The message is shorter than the signed message object, | |||||
* but the size difference depends on the signature value; the difference | |||||
* may range up to PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES. | |||||
* | |||||
* m[], sm[] and pk[] may overlap each other arbitrarily. | |||||
* | |||||
* Return value: 0 on success, -1 on error. | |||||
*/ | |||||
int PQCLEAN_FALCON512_CLEAN_crypto_sign_open( | |||||
uint8_t *m, size_t *mlen, | |||||
const uint8_t *sm, size_t smlen, const uint8_t *pk); | |||||
#endif |
@@ -0,0 +1,549 @@ | |||||
/* | |||||
* Encoding/decoding of keys and signatures. | |||||
* | |||||
* ==========================(LICENSE BEGIN)============================ | |||||
* | |||||
* Copyright (c) 2017-2019 Falcon Project | |||||
* | |||||
* Permission is hereby granted, free of charge, to any person obtaining | |||||
* a copy of this software and associated documentation files (the | |||||
* "Software"), to deal in the Software without restriction, including | |||||
* without limitation the rights to use, copy, modify, merge, publish, | |||||
* distribute, sublicense, and/or sell copies of the Software, and to | |||||
* permit persons to whom the Software is furnished to do so, subject to | |||||
* the following conditions: | |||||
* | |||||
* The above copyright notice and this permission notice shall be | |||||
* included in all copies or substantial portions of the Software. | |||||
* | |||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||||
* | |||||
* ===========================(LICENSE END)============================= | |||||
* | |||||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||||
*/ | |||||
#include "inner.h" | |||||
/* see inner.h */ | |||||
size_t | |||||
PQCLEAN_FALCON512_CLEAN_modq_encode( | |||||
void *out, size_t max_out_len, | |||||
const uint16_t *x, unsigned logn) { | |||||
size_t n, out_len, u; | |||||
uint8_t *buf; | |||||
uint32_t acc; | |||||
int acc_len; | |||||
n = (size_t)1 << logn; | |||||
for (u = 0; u < n; u ++) { | |||||
if (x[u] >= 12289) { | |||||
return 0; | |||||
} | |||||
} | |||||
out_len = ((n * 14) + 7) >> 3; | |||||
if (out == NULL) { | |||||
return out_len; | |||||
} | |||||
if (out_len > max_out_len) { | |||||
return 0; | |||||
} | |||||
buf = out; | |||||
acc = 0; | |||||
acc_len = 0; | |||||
for (u = 0; u < n; u ++) { | |||||
acc = (acc << 14) | x[u]; | |||||
acc_len += 14; | |||||
while (acc_len >= 8) { | |||||
acc_len -= 8; | |||||
*buf ++ = (uint8_t)(acc >> acc_len); | |||||
} | |||||
} | |||||
if (acc_len > 0) { | |||||
*buf = (uint8_t)(acc << (8 - acc_len)); | |||||
} | |||||
return out_len; | |||||
} | |||||
/* see inner.h */ | |||||
size_t | |||||
PQCLEAN_FALCON512_CLEAN_modq_decode( | |||||
uint16_t *x, unsigned logn, | |||||
const void *in, size_t in_max_len) { | |||||
size_t n, in_len, u; | |||||
const uint8_t *buf; | |||||
uint32_t acc; | |||||
int acc_len; | |||||
n = (size_t)1 << logn; | |||||
in_len = ((n * 14) + 7) >> 3; | |||||
if (in_len > in_max_len) { | |||||
return 0; | |||||
} | |||||
buf = in; | |||||
acc = 0; | |||||
acc_len = 0; | |||||
u = 0; | |||||
while (u < n) { | |||||
acc = (acc << 8) | (*buf ++); | |||||
acc_len += 8; | |||||
if (acc_len >= 14) { | |||||
unsigned w; | |||||
acc_len -= 14; | |||||
w = (acc >> acc_len) & 0x3FFF; | |||||
if (w >= 12289) { | |||||
return 0; | |||||
} | |||||
x[u ++] = (uint16_t)w; | |||||
} | |||||
} | |||||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||||
return 0; | |||||
} | |||||
return in_len; | |||||
} | |||||
/* see inner.h */ | |||||
size_t | |||||
PQCLEAN_FALCON512_CLEAN_trim_i16_encode( | |||||
void *out, size_t max_out_len, | |||||
const int16_t *x, unsigned logn, unsigned bits) { | |||||
size_t n, u, out_len; | |||||
int minv, maxv; | |||||
uint8_t *buf; | |||||
uint32_t acc, mask; | |||||
unsigned acc_len; | |||||
n = (size_t)1 << logn; | |||||
maxv = (1 << (bits - 1)) - 1; | |||||
minv = -maxv; | |||||
for (u = 0; u < n; u ++) { | |||||
if (x[u] < minv || x[u] > maxv) { | |||||
return 0; | |||||
} | |||||
} | |||||
out_len = ((n * bits) + 7) >> 3; | |||||
if (out == NULL) { | |||||
return out_len; | |||||
} else if (out_len > max_out_len) { | |||||
return 0; | |||||
} | |||||
buf = out; | |||||
acc = 0; | |||||
acc_len = 0; | |||||
mask = ((uint32_t)1 << bits) - 1; | |||||
for (u = 0; u < n; u ++) { | |||||
acc = (acc << bits) | ((uint16_t)x[u] & mask); | |||||
acc_len += bits; | |||||
while (acc_len >= 8) { | |||||
acc_len -= 8; | |||||
*buf ++ = (uint8_t)(acc >> acc_len); | |||||
} | |||||
} | |||||
if (acc_len > 0) { | |||||
*buf ++ = (uint8_t)(acc << (8 - acc_len)); | |||||
} | |||||
return out_len; | |||||
} | |||||
/* see inner.h */ | |||||
size_t | |||||
PQCLEAN_FALCON512_CLEAN_trim_i16_decode( | |||||
int16_t *x, unsigned logn, unsigned bits, | |||||
const void *in, size_t in_max_len) { | |||||
size_t n, in_len; | |||||
const uint8_t *buf; | |||||
size_t u; | |||||
uint32_t acc, mask1, mask2; | |||||
unsigned acc_len; | |||||
n = (size_t)1 << logn; | |||||
in_len = ((n * bits) + 7) >> 3; | |||||
if (in_len > in_max_len) { | |||||
return 0; | |||||
} | |||||
buf = in; | |||||
u = 0; | |||||
acc = 0; | |||||
acc_len = 0; | |||||
mask1 = ((uint32_t)1 << bits) - 1; | |||||
mask2 = (uint32_t)1 << (bits - 1); | |||||
while (u < n) { | |||||
acc = (acc << 8) | *buf ++; | |||||
acc_len += 8; | |||||
while (acc_len >= bits && u < n) { | |||||
uint32_t w; | |||||
acc_len -= bits; | |||||
w = (acc >> acc_len) & mask1; | |||||
w |= -(w & mask2); | |||||
if (w == -mask2) { | |||||
/* | |||||
* The -2^(bits-1) value is forbidden. | |||||
*/ | |||||
return 0; | |||||
} | |||||
w |= -(w & mask2); | |||||
x[u ++] = (int16_t) * (int32_t *)&w; | |||||
} | |||||
} | |||||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||||
/* | |||||
* Extra bits in the last byte must be zero. | |||||
*/ | |||||
return 0; | |||||
} | |||||
return in_len; | |||||
} | |||||
/* see inner.h */ | |||||
size_t | |||||
PQCLEAN_FALCON512_CLEAN_trim_i8_encode( | |||||
void *out, size_t max_out_len, | |||||
const int8_t *x, unsigned logn, unsigned bits) { | |||||
size_t n, u, out_len; | |||||
int minv, maxv; | |||||
uint8_t *buf; | |||||
uint32_t acc, mask; | |||||
unsigned acc_len; | |||||
n = (size_t)1 << logn; | |||||
maxv = (1 << (bits - 1)) - 1; | |||||
minv = -maxv; | |||||
for (u = 0; u < n; u ++) { | |||||
if (x[u] < minv || x[u] > maxv) { | |||||
return 0; | |||||
} | |||||
} | |||||
out_len = ((n * bits) + 7) >> 3; | |||||
if (out == NULL) { | |||||
return out_len; | |||||
} else if (out_len > max_out_len) { | |||||
return 0; | |||||
} | |||||
buf = out; | |||||
acc = 0; | |||||
acc_len = 0; | |||||
mask = ((uint32_t)1 << bits) - 1; | |||||
for (u = 0; u < n; u ++) { | |||||
acc = (acc << bits) | ((uint8_t)x[u] & mask); | |||||
acc_len += bits; | |||||
while (acc_len >= 8) { | |||||
acc_len -= 8; | |||||
*buf ++ = (uint8_t)(acc >> acc_len); | |||||
} | |||||
} | |||||
if (acc_len > 0) { | |||||
*buf ++ = (uint8_t)(acc << (8 - acc_len)); | |||||
} | |||||
return out_len; | |||||
} | |||||
/* see inner.h */ | |||||
size_t | |||||
PQCLEAN_FALCON512_CLEAN_trim_i8_decode( | |||||
int8_t *x, unsigned logn, unsigned bits, | |||||
const void *in, size_t in_max_len) { | |||||
size_t n, in_len; | |||||
const uint8_t *buf; | |||||
size_t u; | |||||
uint32_t acc, mask1, mask2; | |||||
unsigned acc_len; | |||||
n = (size_t)1 << logn; | |||||
in_len = ((n * bits) + 7) >> 3; | |||||
if (in_len > in_max_len) { | |||||
return 0; | |||||
} | |||||
buf = in; | |||||
u = 0; | |||||
acc = 0; | |||||
acc_len = 0; | |||||
mask1 = ((uint32_t)1 << bits) - 1; | |||||
mask2 = (uint32_t)1 << (bits - 1); | |||||
while (u < n) { | |||||
acc = (acc << 8) | *buf ++; | |||||
acc_len += 8; | |||||
while (acc_len >= bits && u < n) { | |||||
uint32_t w; | |||||
acc_len -= bits; | |||||
w = (acc >> acc_len) & mask1; | |||||
w |= -(w & mask2); | |||||
if (w == -mask2) { | |||||
/* | |||||
* The -2^(bits-1) value is forbidden. | |||||
*/ | |||||
return 0; | |||||
} | |||||
x[u ++] = (int8_t) * (int32_t *)&w; | |||||
} | |||||
} | |||||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||||
/* | |||||
* Extra bits in the last byte must be zero. | |||||
*/ | |||||
return 0; | |||||
} | |||||
return in_len; | |||||
} | |||||
/* see inner.h */ | |||||
size_t | |||||
PQCLEAN_FALCON512_CLEAN_comp_encode( | |||||
void *out, size_t max_out_len, | |||||
const int16_t *x, unsigned logn) { | |||||
uint8_t *buf; | |||||
size_t n, u, v; | |||||
uint32_t acc; | |||||
unsigned acc_len; | |||||
n = (size_t)1 << logn; | |||||
buf = out; | |||||
/* | |||||
* Make sure that all values are within the -2047..+2047 range. | |||||
*/ | |||||
for (u = 0; u < n; u ++) { | |||||
if (x[u] < -2047 || x[u] > +2047) { | |||||
return 0; | |||||
} | |||||
} | |||||
acc = 0; | |||||
acc_len = 0; | |||||
v = 0; | |||||
for (u = 0; u < n; u ++) { | |||||
int t; | |||||
unsigned w; | |||||
/* | |||||
* Get sign and absolute value of next integer; push the | |||||
* sign bit. | |||||
*/ | |||||
acc <<= 1; | |||||
t = x[u]; | |||||
if (t < 0) { | |||||
t = -t; | |||||
acc |= 1; | |||||
} | |||||
w = (unsigned)t; | |||||
/* | |||||
* Push the low 7 bits of the absolute value. | |||||
*/ | |||||
acc <<= 7; | |||||
acc |= w & 127u; | |||||
w >>= 7; | |||||
/* | |||||
* We pushed exactly 8 bits. | |||||
*/ | |||||
acc_len += 8; | |||||
/* | |||||
* Push as many zeros as necessary, then a one. Since the | |||||
* absolute value is at most 2047, w can only range up to | |||||
* 15 at this point, thus we will add at most 16 bits | |||||
* here. With the 8 bits above and possibly up to 7 bits | |||||
* from previous iterations, we may go up to 31 bits, which | |||||
* will fit in the accumulator, which is an uint32_t. | |||||
*/ | |||||
acc <<= (w + 1); | |||||
acc |= 1; | |||||
acc_len += w + 1; | |||||
/* | |||||
* Produce all full bytes. | |||||
*/ | |||||
while (acc_len >= 8) { | |||||
acc_len -= 8; | |||||
if (buf != NULL) { | |||||
if (v >= max_out_len) { | |||||
return 0; | |||||
} | |||||
buf[v] = (uint8_t)(acc >> acc_len); | |||||
} | |||||
v ++; | |||||
} | |||||
} | |||||
/* | |||||
* Flush remaining bits (if any). | |||||
*/ | |||||
if (acc_len > 0) { | |||||
if (buf != NULL) { | |||||
if (v >= max_out_len) { | |||||
return 0; | |||||
} | |||||
buf[v] = (uint8_t)(acc << (8 - acc_len)); | |||||
} | |||||
v ++; | |||||
} | |||||
return v; | |||||
} | |||||
/* see inner.h */ | |||||
size_t | |||||
PQCLEAN_FALCON512_CLEAN_comp_decode( | |||||
int16_t *x, unsigned logn, | |||||
const void *in, size_t in_max_len) { | |||||
const uint8_t *buf; | |||||
size_t n, u, v; | |||||
uint32_t acc; | |||||
unsigned acc_len; | |||||
n = (size_t)1 << logn; | |||||
buf = in; | |||||
acc = 0; | |||||
acc_len = 0; | |||||
v = 0; | |||||
for (u = 0; u < n; u ++) { | |||||
unsigned b, s, m; | |||||
/* | |||||
* Get next eight bits: sign and low seven bits of the | |||||
* absolute value. | |||||
*/ | |||||
if (v >= in_max_len) { | |||||
return 0; | |||||
} | |||||
acc = (acc << 8) | (uint32_t)buf[v ++]; | |||||
b = acc >> acc_len; | |||||
s = b & 128; | |||||
m = b & 127; | |||||
/* | |||||
* Get next bits until a 1 is reached. | |||||
*/ | |||||
for (;;) { | |||||
if (acc_len == 0) { | |||||
if (v >= in_max_len) { | |||||
return 0; | |||||
} | |||||
acc = (acc << 8) | (uint32_t)buf[v ++]; | |||||
acc_len = 8; | |||||
} | |||||
acc_len --; | |||||
if (((acc >> acc_len) & 1) != 0) { | |||||
break; | |||||
} | |||||
m += 128; | |||||
if (m > 2047) { | |||||
return 0; | |||||
} | |||||
} | |||||
x[u] = (int16_t)(s ? -(int)m : (int)m); | |||||
} | |||||
return v; | |||||
} | |||||
/* | |||||
* Key elements and signatures are polynomials with small integer | |||||
* coefficients. Here are some statistics gathered over many | |||||
* generated key pairs (10000 or more for each degree): | |||||
* | |||||
* log(n) n max(f,g) std(f,g) max(F,G) std(F,G) | |||||
* 1 2 129 56.31 143 60.02 | |||||
* 2 4 123 40.93 160 46.52 | |||||
* 3 8 97 28.97 159 38.01 | |||||
* 4 16 100 21.48 154 32.50 | |||||
* 5 32 71 15.41 151 29.36 | |||||
* 6 64 59 11.07 138 27.77 | |||||
* 7 128 39 7.91 144 27.00 | |||||
* 8 256 32 5.63 148 26.61 | |||||
* 9 512 22 4.00 137 26.46 | |||||
* 10 1024 15 2.84 146 26.41 | |||||
* | |||||
* We want a compact storage format for private key, and, as part of | |||||
* key generation, we are allowed to reject some keys which would | |||||
* otherwise be fine (this does not induce any noticeable vulnerability | |||||
* as long as we reject only a small proportion of possible keys). | |||||
* Hence, we enforce at key generation time maximum values for the | |||||
* elements of f, g, F and G, so that their encoding can be expressed | |||||
* in fixed-width values. Limits have been chosen so that generated | |||||
* keys are almost always within bounds, thus not impacting neither | |||||
* security or performance. | |||||
* | |||||
* IMPORTANT: the code assumes that all coefficients of f, g, F and G | |||||
* ultimately fit in the -127..+127 range. Thus, none of the elements | |||||
* of max_fg_bits[] and max_FG_bits[] shall be greater than 8. | |||||
*/ | |||||
const uint8_t PQCLEAN_FALCON512_CLEAN_max_fg_bits[] = { | |||||
0, /* unused */ | |||||
8, | |||||
8, | |||||
8, | |||||
8, | |||||
8, | |||||
7, | |||||
7, | |||||
6, | |||||
6, | |||||
5 | |||||
}; | |||||
const uint8_t PQCLEAN_FALCON512_CLEAN_max_FG_bits[] = { | |||||
0, /* unused */ | |||||
8, | |||||
8, | |||||
8, | |||||
8, | |||||
8, | |||||
8, | |||||
8, | |||||
8, | |||||
8, | |||||
8 | |||||
}; | |||||
/* | |||||
* When generating a new key pair, we can always reject keys which | |||||
* feature an abnormally large coefficient. This can also be done for | |||||
* signatures, albeit with some care: in case the signature process is | |||||
* used in a derandomized setup (explicitly seeded with the message and | |||||
* private key), we have to follow the specification faithfully, and the | |||||
* specification only enforces a limit on the L2 norm of the signature | |||||
* vector. The limit on the L2 norm implies that the absolute value of | |||||
* a coefficient of the signature cannot be more than the following: | |||||
* | |||||
* log(n) n max sig coeff (theoretical) | |||||
* 1 2 412 | |||||
* 2 4 583 | |||||
* 3 8 824 | |||||
* 4 16 1166 | |||||
* 5 32 1649 | |||||
* 6 64 2332 | |||||
* 7 128 3299 | |||||
* 8 256 4665 | |||||
* 9 512 6598 | |||||
* 10 1024 9331 | |||||
* | |||||
* However, the largest observed signature coefficients during our | |||||
* experiments was 1077 (in absolute value), hence we can assume that, | |||||
* with overwhelming probability, signature coefficients will fit | |||||
* in -2047..2047, i.e. 12 bits. | |||||
*/ | |||||
const uint8_t PQCLEAN_FALCON512_CLEAN_max_sig_bits[] = { | |||||
0, /* unused */ | |||||
10, | |||||
11, | |||||
11, | |||||
12, | |||||
12, | |||||
12, | |||||
12, | |||||
12, | |||||
12, | |||||
12 | |||||
}; |
@@ -0,0 +1,261 @@ | |||||
/* | |||||
* Support functions for signatures (hash-to-point, norm). | |||||
* | |||||
* ==========================(LICENSE BEGIN)============================ | |||||
* | |||||
* Copyright (c) 2017-2019 Falcon Project | |||||
* | |||||
* Permission is hereby granted, free of charge, to any person obtaining | |||||
* a copy of this software and associated documentation files (the | |||||
* "Software"), to deal in the Software without restriction, including | |||||
* without limitation the rights to use, copy, modify, merge, publish, | |||||
* distribute, sublicense, and/or sell copies of the Software, and to | |||||
* permit persons to whom the Software is furnished to do so, subject to | |||||
* the following conditions: | |||||
* | |||||
* The above copyright notice and this permission notice shall be | |||||
* included in all copies or substantial portions of the Software. | |||||
* | |||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||||
* | |||||
* ===========================(LICENSE END)============================= | |||||
* | |||||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||||
*/ | |||||
#include "inner.h" | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_hash_to_point( | |||||
shake256_context *sc, | |||||
uint16_t *x, unsigned logn, uint8_t *tmp) { | |||||
/* | |||||
* Each 16-bit sample is a value in 0..65535. The value is | |||||
* kept if it falls in 0..61444 (because 61445 = 5*12289) | |||||
* and rejected otherwise; thus, each sample has probability | |||||
* about 0.93758 of being selected. | |||||
* | |||||
* We want to oversample enough to be sure that we will | |||||
* have enough values with probability at least 1 - 2^(-256). | |||||
* Depending on degree N, this leads to the following | |||||
* required oversampling: | |||||
* | |||||
* logn n oversampling | |||||
* 1 2 65 | |||||
* 2 4 67 | |||||
* 3 8 71 | |||||
* 4 16 77 | |||||
* 5 32 86 | |||||
* 6 64 100 | |||||
* 7 128 122 | |||||
* 8 256 154 | |||||
* 9 512 205 | |||||
* 10 1024 287 | |||||
* | |||||
* If logn >= 7, then the provided temporary buffer is large | |||||
* enough. Otherwise, we use a stack buffer of 63 entries | |||||
* (i.e. 126 bytes) for the values that do not fit in tmp[]. | |||||
*/ | |||||
static const uint16_t overtab[] = { | |||||
0, /* unused */ | |||||
65, | |||||
67, | |||||
71, | |||||
77, | |||||
86, | |||||
100, | |||||
122, | |||||
154, | |||||
205, | |||||
287 | |||||
}; | |||||
unsigned n, n2, u, m, p, over; | |||||
uint16_t *tt1, tt2[63]; | |||||
/* | |||||
* We first generate m 16-bit value. Values 0..n-1 go to x[]. | |||||
* Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[]. | |||||
* We also reduce modulo q the values; rejected values are set | |||||
* to 0xFFFF. | |||||
*/ | |||||
n = 1U << logn; | |||||
n2 = n << 1; | |||||
over = overtab[logn]; | |||||
m = n + over; | |||||
tt1 = (uint16_t *)tmp; | |||||
for (u = 0; u < m; u ++) { | |||||
uint8_t buf[2]; | |||||
uint32_t w, wr; | |||||
shake256_extract(sc, buf, sizeof buf); | |||||
w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1]; | |||||
wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1)); | |||||
wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1)); | |||||
wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1)); | |||||
wr |= ((w - 61445) >> 31) - 1; | |||||
if (u < n) { | |||||
x[u] = (uint16_t)wr; | |||||
} else if (u < n2) { | |||||
tt1[u - n] = (uint16_t)wr; | |||||
} else { | |||||
tt2[u - n2] = (uint16_t)wr; | |||||
} | |||||
} | |||||
/* | |||||
* Now we must "squeeze out" the invalid values. We do this in | |||||
* a logarithmic sequence of passes; each pass computes where a | |||||
* value should go, and moves it down by 'p' slots if necessary, | |||||
* where 'p' uses an increasing powers-of-two scale. It can be | |||||
* shown that in all cases where the loop decides that a value | |||||
* has to be moved down by p slots, the destination slot is | |||||
* "free" (i.e. contains an invalid value). | |||||
*/ | |||||
for (p = 1; p <= over; p <<= 1) { | |||||
unsigned v; | |||||
/* | |||||
* In the loop below: | |||||
* | |||||
* - v contains the index of the final destination of | |||||
* the value; it is recomputed dynamically based on | |||||
* whether values are valid or not. | |||||
* | |||||
* - u is the index of the value we consider ("source"); | |||||
* its address is s. | |||||
* | |||||
* - The loop may swap the value with the one at index | |||||
* u-p. The address of the swap destination is d. | |||||
*/ | |||||
v = 0; | |||||
for (u = 0; u < m; u ++) { | |||||
uint16_t *s, *d; | |||||
unsigned j, sv, dv, m; | |||||
if (u < n) { | |||||
s = &x[u]; | |||||
} else if (u < n2) { | |||||
s = &tt1[u - n]; | |||||
} else { | |||||
s = &tt2[u - n2]; | |||||
} | |||||
sv = *s; | |||||
/* | |||||
* The value in sv should ultimately go to | |||||
* address v, i.e. jump back by u-v slots. | |||||
*/ | |||||
j = u - v; | |||||
/* | |||||
* We increment v for the next iteration, but | |||||
* only if the source value is valid. The mask | |||||
* 'm' is -1 if the value is valid, 0 otherwise, | |||||
* so we _subtract_ m. | |||||
*/ | |||||
m = (sv >> 15) - 1U; | |||||
v -= m; | |||||
/* | |||||
* In this loop we consider jumps by p slots; if | |||||
* u < p then there is nothing more to do. | |||||
*/ | |||||
if (u < p) { | |||||
continue; | |||||
} | |||||
/* | |||||
* Destination for the swap: value at address u-p. | |||||
*/ | |||||
if ((u - p) < n) { | |||||
d = &x[u - p]; | |||||
} else if ((u - p) < n2) { | |||||
d = &tt1[(u - p) - n]; | |||||
} else { | |||||
d = &tt2[(u - p) - n2]; | |||||
} | |||||
dv = *d; | |||||
/* | |||||
* The swap should be performed only if the source | |||||
* is valid AND the jump j has its 'p' bit set. | |||||
*/ | |||||
m &= -(((j & p) + 0x1FF) >> 9); | |||||
*s = (uint16_t)(sv ^ (m & (sv ^ dv))); | |||||
*d = (uint16_t)(dv ^ (m & (sv ^ dv))); | |||||
} | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
int | |||||
PQCLEAN_FALCON512_CLEAN_is_short( | |||||
const int16_t *s1, const int16_t *s2, unsigned logn) { | |||||
/* | |||||
* We use the l2-norm. Code below uses only 32-bit operations to | |||||
* compute the square of the norm with saturation to 2^32-1 if | |||||
* the value exceeds 2^31-1. | |||||
*/ | |||||
size_t n, u; | |||||
uint32_t s, ng; | |||||
n = (size_t)1 << logn; | |||||
s = 0; | |||||
ng = 0; | |||||
for (u = 0; u < n; u ++) { | |||||
int32_t z; | |||||
z = s1[u]; | |||||
s += (uint32_t)(z * z); | |||||
ng |= s; | |||||
z = s2[u]; | |||||
s += (uint32_t)(z * z); | |||||
ng |= s; | |||||
} | |||||
s |= -(ng >> 31); | |||||
/* | |||||
* Acceptance bound on the l2-norm is: | |||||
* 1.2*1.55*sqrt(q)*sqrt(2*N) | |||||
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). | |||||
*/ | |||||
return s < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); | |||||
} | |||||
/* see inner.h */ | |||||
int | |||||
PQCLEAN_FALCON512_CLEAN_is_short_half( | |||||
uint32_t sqn, const int16_t *a, unsigned logn) { | |||||
size_t n, u; | |||||
uint32_t ng; | |||||
n = (size_t)1 << logn; | |||||
ng = -(sqn >> 31); | |||||
for (u = 0; u < n; u ++) { | |||||
int32_t z; | |||||
z = a[u]; | |||||
sqn += (uint32_t)(z * z); | |||||
ng |= sqn; | |||||
} | |||||
sqn |= -(ng >> 31); | |||||
/* | |||||
* Acceptance bound on the l2-norm is: | |||||
* 1.2*1.55*sqrt(q)*sqrt(2*N) | |||||
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). | |||||
*/ | |||||
return sqn < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); | |||||
} |
@@ -0,0 +1,699 @@ | |||||
/* | |||||
* FFT code. | |||||
* | |||||
* ==========================(LICENSE BEGIN)============================ | |||||
* | |||||
* Copyright (c) 2017-2019 Falcon Project | |||||
* | |||||
* Permission is hereby granted, free of charge, to any person obtaining | |||||
* a copy of this software and associated documentation files (the | |||||
* "Software"), to deal in the Software without restriction, including | |||||
* without limitation the rights to use, copy, modify, merge, publish, | |||||
* distribute, sublicense, and/or sell copies of the Software, and to | |||||
* permit persons to whom the Software is furnished to do so, subject to | |||||
* the following conditions: | |||||
* | |||||
* The above copyright notice and this permission notice shall be | |||||
* included in all copies or substantial portions of the Software. | |||||
* | |||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||||
* | |||||
* ===========================(LICENSE END)============================= | |||||
* | |||||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||||
*/ | |||||
#include "inner.h" | |||||
/* | |||||
* Rules for complex number macros: | |||||
* -------------------------------- | |||||
* | |||||
* Operand order is: destination, source1, source2... | |||||
* | |||||
* Each operand is a real and an imaginary part. | |||||
* | |||||
* All overlaps are allowed. | |||||
*/ | |||||
/* | |||||
* Addition of two complex numbers (d = a + b). | |||||
*/ | |||||
#define FPC_ADD(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||||
fpr fpct_re, fpct_im; \ | |||||
fpct_re = fpr_add(a_re, b_re); \ | |||||
fpct_im = fpr_add(a_im, b_im); \ | |||||
(d_re) = fpct_re; \ | |||||
(d_im) = fpct_im; \ | |||||
} while (0) | |||||
/* | |||||
* Subtraction of two complex numbers (d = a - b). | |||||
*/ | |||||
#define FPC_SUB(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||||
fpr fpct_re, fpct_im; \ | |||||
fpct_re = fpr_sub(a_re, b_re); \ | |||||
fpct_im = fpr_sub(a_im, b_im); \ | |||||
(d_re) = fpct_re; \ | |||||
(d_im) = fpct_im; \ | |||||
} while (0) | |||||
/* | |||||
* Multplication of two complex numbers (d = a * b). | |||||
*/ | |||||
#define FPC_MUL(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||||
fpr fpct_a_re, fpct_a_im; \ | |||||
fpr fpct_b_re, fpct_b_im; \ | |||||
fpr fpct_d_re, fpct_d_im; \ | |||||
fpct_a_re = (a_re); \ | |||||
fpct_a_im = (a_im); \ | |||||
fpct_b_re = (b_re); \ | |||||
fpct_b_im = (b_im); \ | |||||
fpct_d_re = fpr_sub( \ | |||||
fpr_mul(fpct_a_re, fpct_b_re), \ | |||||
fpr_mul(fpct_a_im, fpct_b_im)); \ | |||||
fpct_d_im = fpr_add( \ | |||||
fpr_mul(fpct_a_re, fpct_b_im), \ | |||||
fpr_mul(fpct_a_im, fpct_b_re)); \ | |||||
(d_re) = fpct_d_re; \ | |||||
(d_im) = fpct_d_im; \ | |||||
} while (0) | |||||
/* | |||||
* Squaring of a complex number (d = a * a). | |||||
*/ | |||||
#define FPC_SQR(d_re, d_im, a_re, a_im) do { \ | |||||
fpr fpct_a_re, fpct_a_im; \ | |||||
fpr fpct_d_re, fpct_d_im; \ | |||||
fpct_a_re = (a_re); \ | |||||
fpct_a_im = (a_im); \ | |||||
fpct_d_re = fpr_sub(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ | |||||
fpct_d_im = fpr_double(fpr_mul(fpct_a_re, fpct_a_im)); \ | |||||
(d_re) = fpct_d_re; \ | |||||
(d_im) = fpct_d_im; \ | |||||
} while (0) | |||||
/* | |||||
* Inversion of a complex number (d = 1 / a). | |||||
*/ | |||||
#define FPC_INV(d_re, d_im, a_re, a_im) do { \ | |||||
fpr fpct_a_re, fpct_a_im; \ | |||||
fpr fpct_d_re, fpct_d_im; \ | |||||
fpr fpct_m; \ | |||||
fpct_a_re = (a_re); \ | |||||
fpct_a_im = (a_im); \ | |||||
fpct_m = fpr_add(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ | |||||
fpct_m = fpr_inv(fpct_m); \ | |||||
fpct_d_re = fpr_mul(fpct_a_re, fpct_m); \ | |||||
fpct_d_im = fpr_mul(fpr_neg(fpct_a_im), fpct_m); \ | |||||
(d_re) = fpct_d_re; \ | |||||
(d_im) = fpct_d_im; \ | |||||
} while (0) | |||||
/* | |||||
* Division of complex numbers (d = a / b). | |||||
*/ | |||||
#define FPC_DIV(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||||
fpr fpct_a_re, fpct_a_im; \ | |||||
fpr fpct_b_re, fpct_b_im; \ | |||||
fpr fpct_d_re, fpct_d_im; \ | |||||
fpr fpct_m; \ | |||||
fpct_a_re = (a_re); \ | |||||
fpct_a_im = (a_im); \ | |||||
fpct_b_re = (b_re); \ | |||||
fpct_b_im = (b_im); \ | |||||
fpct_m = fpr_add(fpr_sqr(fpct_b_re), fpr_sqr(fpct_b_im)); \ | |||||
fpct_m = fpr_inv(fpct_m); \ | |||||
fpct_b_re = fpr_mul(fpct_b_re, fpct_m); \ | |||||
fpct_b_im = fpr_mul(fpr_neg(fpct_b_im), fpct_m); \ | |||||
fpct_d_re = fpr_sub( \ | |||||
fpr_mul(fpct_a_re, fpct_b_re), \ | |||||
fpr_mul(fpct_a_im, fpct_b_im)); \ | |||||
fpct_d_im = fpr_add( \ | |||||
fpr_mul(fpct_a_re, fpct_b_im), \ | |||||
fpr_mul(fpct_a_im, fpct_b_re)); \ | |||||
(d_re) = fpct_d_re; \ | |||||
(d_im) = fpct_d_im; \ | |||||
} while (0) | |||||
/* | |||||
* Let w = exp(i*pi/N); w is a primitive 2N-th root of 1. We define the | |||||
* values w_j = w^(2j+1) for all j from 0 to N-1: these are the roots | |||||
* of X^N+1 in the field of complex numbers. A crucial property is that | |||||
* w_{N-1-j} = conj(w_j) = 1/w_j for all j. | |||||
* | |||||
* FFT representation of a polynomial f (taken modulo X^N+1) is the | |||||
* set of values f(w_j). Since f is real, conj(f(w_j)) = f(conj(w_j)), | |||||
* thus f(w_{N-1-j}) = conj(f(w_j)). We thus store only half the values, | |||||
* for j = 0 to N/2-1; the other half can be recomputed easily when (if) | |||||
* needed. A consequence is that FFT representation has the same size | |||||
* as normal representation: N/2 complex numbers use N real numbers (each | |||||
* complex number is the combination of a real and an imaginary part). | |||||
* | |||||
* We use a specific ordering which makes computations easier. Let rev() | |||||
* be the bit-reversal function over log(N) bits. For j in 0..N/2-1, we | |||||
* store the real and imaginary parts of f(w_j) in slots: | |||||
* | |||||
* Re(f(w_j)) -> slot rev(j)/2 | |||||
* Im(f(w_j)) -> slot rev(j)/2+N/2 | |||||
* | |||||
* (Note that rev(j) is even for j < N/2.) | |||||
*/ | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_FFT(fpr *f, unsigned logn) { | |||||
/* | |||||
* FFT algorithm in bit-reversal order uses the following | |||||
* iterative algorithm: | |||||
* | |||||
* t = N | |||||
* for m = 1; m < N; m *= 2: | |||||
* ht = t/2 | |||||
* for i1 = 0; i1 < m; i1 ++: | |||||
* j1 = i1 * t | |||||
* s = GM[m + i1] | |||||
* for j = j1; j < (j1 + ht); j ++: | |||||
* x = f[j] | |||||
* y = s * f[j + ht] | |||||
* f[j] = x + y | |||||
* f[j + ht] = x - y | |||||
* t = ht | |||||
* | |||||
* GM[k] contains w^rev(k) for primitive root w = exp(i*pi/N). | |||||
* | |||||
* In the description above, f[] is supposed to contain complex | |||||
* numbers. In our in-memory representation, the real and | |||||
* imaginary parts of f[k] are in array slots k and k+N/2. | |||||
* | |||||
* We only keep the first half of the complex numbers. We can | |||||
* see that after the first iteration, the first and second halves | |||||
* of the array of complex numbers have separate lives, so we | |||||
* simply ignore the second part. | |||||
*/ | |||||
unsigned u; | |||||
size_t t, n, hn, m; | |||||
/* | |||||
* First iteration: compute f[j] + i * f[j+N/2] for all j < N/2 | |||||
* (because GM[1] = w^rev(1) = w^(N/2) = i). | |||||
* In our chosen representation, this is a no-op: everything is | |||||
* already where it should be. | |||||
*/ | |||||
/* | |||||
* Subsequent iterations are truncated to use only the first | |||||
* half of values. | |||||
*/ | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
t = hn; | |||||
for (u = 1, m = 2; u < logn; u ++, m <<= 1) { | |||||
size_t ht, hm, i1, j1; | |||||
ht = t >> 1; | |||||
hm = m >> 1; | |||||
for (i1 = 0, j1 = 0; i1 < hm; i1 ++, j1 += t) { | |||||
size_t j, j2; | |||||
j2 = j1 + ht; | |||||
fpr s_re, s_im; | |||||
s_re = fpr_gm_tab[((m + i1) << 1) + 0]; | |||||
s_im = fpr_gm_tab[((m + i1) << 1) + 1]; | |||||
for (j = j1; j < j2; j ++) { | |||||
fpr x_re, x_im, y_re, y_im; | |||||
x_re = f[j]; | |||||
x_im = f[j + hn]; | |||||
y_re = f[j + ht]; | |||||
y_im = f[j + ht + hn]; | |||||
FPC_MUL(y_re, y_im, y_re, y_im, s_re, s_im); | |||||
FPC_ADD(f[j], f[j + hn], | |||||
x_re, x_im, y_re, y_im); | |||||
FPC_SUB(f[j + ht], f[j + ht + hn], | |||||
x_re, x_im, y_re, y_im); | |||||
} | |||||
} | |||||
t = ht; | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_iFFT(fpr *f, unsigned logn) { | |||||
/* | |||||
* Inverse FFT algorithm in bit-reversal order uses the following | |||||
* iterative algorithm: | |||||
* | |||||
* t = 1 | |||||
* for m = N; m > 1; m /= 2: | |||||
* hm = m/2 | |||||
* dt = t*2 | |||||
* for i1 = 0; i1 < hm; i1 ++: | |||||
* j1 = i1 * dt | |||||
* s = iGM[hm + i1] | |||||
* for j = j1; j < (j1 + t); j ++: | |||||
* x = f[j] | |||||
* y = f[j + t] | |||||
* f[j] = x + y | |||||
* f[j + t] = s * (x - y) | |||||
* t = dt | |||||
* for i1 = 0; i1 < N; i1 ++: | |||||
* f[i1] = f[i1] / N | |||||
* | |||||
* iGM[k] contains (1/w)^rev(k) for primitive root w = exp(i*pi/N) | |||||
* (actually, iGM[k] = 1/GM[k] = conj(GM[k])). | |||||
* | |||||
* In the main loop (not counting the final division loop), in | |||||
* all iterations except the last, the first and second half of f[] | |||||
* (as an array of complex numbers) are separate. In our chosen | |||||
* representation, we do not keep the second half. | |||||
* | |||||
* The last iteration recombines the recomputed half with the | |||||
* implicit half, and should yield only real numbers since the | |||||
* target polynomial is real; moreover, s = i at that step. | |||||
* Thus, when considering x and y: | |||||
* y = conj(x) since the final f[j] must be real | |||||
* Therefore, f[j] is filled with 2*Re(x), and f[j + t] is | |||||
* filled with 2*Im(x). | |||||
* But we already have Re(x) and Im(x) in array slots j and j+t | |||||
* in our chosen representation. That last iteration is thus a | |||||
* simple doubling of the values in all the array. | |||||
* | |||||
* We make the last iteration a no-op by tweaking the final | |||||
* division into a division by N/2, not N. | |||||
*/ | |||||
size_t u, n, hn, t, m; | |||||
n = (size_t)1 << logn; | |||||
t = 1; | |||||
m = n; | |||||
hn = n >> 1; | |||||
for (u = logn; u > 1; u --) { | |||||
size_t hm, dt, i1, j1; | |||||
hm = m >> 1; | |||||
dt = t << 1; | |||||
for (i1 = 0, j1 = 0; j1 < hn; i1 ++, j1 += dt) { | |||||
size_t j, j2; | |||||
j2 = j1 + t; | |||||
fpr s_re, s_im; | |||||
s_re = fpr_gm_tab[((hm + i1) << 1) + 0]; | |||||
s_im = fpr_neg(fpr_gm_tab[((hm + i1) << 1) + 1]); | |||||
for (j = j1; j < j2; j ++) { | |||||
fpr x_re, x_im, y_re, y_im; | |||||
x_re = f[j]; | |||||
x_im = f[j + hn]; | |||||
y_re = f[j + t]; | |||||
y_im = f[j + t + hn]; | |||||
FPC_ADD(f[j], f[j + hn], | |||||
x_re, x_im, y_re, y_im); | |||||
FPC_SUB(x_re, x_im, x_re, x_im, y_re, y_im); | |||||
FPC_MUL(f[j + t], f[j + t + hn], | |||||
x_re, x_im, s_re, s_im); | |||||
} | |||||
} | |||||
t = dt; | |||||
m = hm; | |||||
} | |||||
/* | |||||
* Last iteration is a no-op, provided that we divide by N/2 | |||||
* instead of N. We need to make a special case for logn = 0. | |||||
*/ | |||||
if (logn > 0) { | |||||
fpr ni; | |||||
ni = fpr_p2_tab[logn]; | |||||
for (u = 0; u < n; u ++) { | |||||
f[u] = fpr_mul(f[u], ni); | |||||
} | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_poly_add( | |||||
fpr *restrict a, const fpr *restrict b, unsigned logn) { | |||||
size_t n, u; | |||||
n = (size_t)1 << logn; | |||||
for (u = 0; u < n; u ++) { | |||||
a[u] = fpr_add(a[u], b[u]); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_poly_sub( | |||||
fpr *restrict a, const fpr *restrict b, unsigned logn) { | |||||
size_t n, u; | |||||
n = (size_t)1 << logn; | |||||
for (u = 0; u < n; u ++) { | |||||
a[u] = fpr_sub(a[u], b[u]); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_poly_neg(fpr *a, unsigned logn) { | |||||
size_t n, u; | |||||
n = (size_t)1 << logn; | |||||
for (u = 0; u < n; u ++) { | |||||
a[u] = fpr_neg(a[u]); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_poly_adj_fft(fpr *a, unsigned logn) { | |||||
size_t n, u; | |||||
n = (size_t)1 << logn; | |||||
for (u = (n >> 1); u < n; u ++) { | |||||
a[u] = fpr_neg(a[u]); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_poly_mul_fft( | |||||
fpr *restrict a, const fpr *restrict b, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr a_re, a_im, b_re, b_im; | |||||
a_re = a[u]; | |||||
a_im = a[u + hn]; | |||||
b_re = b[u]; | |||||
b_im = b[u + hn]; | |||||
FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_poly_muladj_fft( | |||||
fpr *restrict a, const fpr *restrict b, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr a_re, a_im, b_re, b_im; | |||||
a_re = a[u]; | |||||
a_im = a[u + hn]; | |||||
b_re = b[u]; | |||||
b_im = fpr_neg(b[u + hn]); | |||||
FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_poly_mulselfadj_fft(fpr *a, unsigned logn) { | |||||
/* | |||||
* Since each coefficient is multiplied with its own conjugate, | |||||
* the result contains only real values. | |||||
*/ | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr a_re, a_im; | |||||
a_re = a[u]; | |||||
a_im = a[u + hn]; | |||||
a[u] = fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)); | |||||
a[u + hn] = fpr_zero; | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_poly_mulconst(fpr *a, fpr x, unsigned logn) { | |||||
size_t n, u; | |||||
n = (size_t)1 << logn; | |||||
for (u = 0; u < n; u ++) { | |||||
a[u] = fpr_mul(a[u], x); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_poly_div_fft( | |||||
fpr *restrict a, const fpr *restrict b, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr a_re, a_im, b_re, b_im; | |||||
a_re = a[u]; | |||||
a_im = a[u + hn]; | |||||
b_re = b[u]; | |||||
b_im = b[u + hn]; | |||||
FPC_DIV(a[u], a[u + hn], a_re, a_im, b_re, b_im); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_poly_invnorm2_fft(fpr *restrict d, | |||||
const fpr *restrict a, const fpr *restrict b, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr a_re, a_im; | |||||
fpr b_re, b_im; | |||||
a_re = a[u]; | |||||
a_im = a[u + hn]; | |||||
b_re = b[u]; | |||||
b_im = b[u + hn]; | |||||
d[u] = fpr_inv(fpr_add( | |||||
fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)), | |||||
fpr_add(fpr_sqr(b_re), fpr_sqr(b_im)))); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_poly_add_muladj_fft(fpr *restrict d, | |||||
const fpr *restrict F, const fpr *restrict G, | |||||
const fpr *restrict f, const fpr *restrict g, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr F_re, F_im, G_re, G_im; | |||||
fpr f_re, f_im, g_re, g_im; | |||||
fpr a_re, a_im, b_re, b_im; | |||||
F_re = F[u]; | |||||
F_im = F[u + hn]; | |||||
G_re = G[u]; | |||||
G_im = G[u + hn]; | |||||
f_re = f[u]; | |||||
f_im = f[u + hn]; | |||||
g_re = g[u]; | |||||
g_im = g[u + hn]; | |||||
FPC_MUL(a_re, a_im, F_re, F_im, f_re, fpr_neg(f_im)); | |||||
FPC_MUL(b_re, b_im, G_re, G_im, g_re, fpr_neg(g_im)); | |||||
d[u] = fpr_add(a_re, b_re); | |||||
d[u + hn] = fpr_add(a_im, b_im); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_poly_mul_autoadj_fft( | |||||
fpr *restrict a, const fpr *restrict b, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
a[u] = fpr_mul(a[u], b[u]); | |||||
a[u + hn] = fpr_mul(a[u + hn], b[u]); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_poly_div_autoadj_fft( | |||||
fpr *restrict a, const fpr *restrict b, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr ib; | |||||
ib = fpr_inv(b[u]); | |||||
a[u] = fpr_mul(a[u], ib); | |||||
a[u + hn] = fpr_mul(a[u + hn], ib); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_poly_LDL_fft( | |||||
const fpr *restrict g00, | |||||
fpr *restrict g01, fpr *restrict g11, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; | |||||
fpr mu_re, mu_im; | |||||
g00_re = g00[u]; | |||||
g00_im = g00[u + hn]; | |||||
g01_re = g01[u]; | |||||
g01_im = g01[u + hn]; | |||||
g11_re = g11[u]; | |||||
g11_im = g11[u + hn]; | |||||
FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); | |||||
FPC_MUL(g01_re, g01_im, mu_re, mu_im, g01_re, fpr_neg(g01_im)); | |||||
FPC_SUB(g11[u], g11[u + hn], g11_re, g11_im, g01_re, g01_im); | |||||
g01[u] = mu_re; | |||||
g01[u + hn] = fpr_neg(mu_im); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_poly_LDLmv_fft( | |||||
fpr *restrict d11, fpr *restrict l10, | |||||
const fpr *restrict g00, const fpr *restrict g01, | |||||
const fpr *restrict g11, unsigned logn) { | |||||
size_t n, hn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
for (u = 0; u < hn; u ++) { | |||||
fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; | |||||
fpr mu_re, mu_im; | |||||
g00_re = g00[u]; | |||||
g00_im = g00[u + hn]; | |||||
g01_re = g01[u]; | |||||
g01_im = g01[u + hn]; | |||||
g11_re = g11[u]; | |||||
g11_im = g11[u + hn]; | |||||
FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); | |||||
FPC_MUL(g01_re, g01_im, mu_re, mu_im, g01_re, fpr_neg(g01_im)); | |||||
FPC_SUB(d11[u], d11[u + hn], g11_re, g11_im, g01_re, g01_im); | |||||
l10[u] = mu_re; | |||||
l10[u + hn] = fpr_neg(mu_im); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_poly_split_fft( | |||||
fpr *restrict f0, fpr *restrict f1, | |||||
const fpr *restrict f, unsigned logn) { | |||||
/* | |||||
* The FFT representation we use is in bit-reversed order | |||||
* (element i contains f(w^(rev(i))), where rev() is the | |||||
* bit-reversal function over the ring degree. This changes | |||||
* indexes with regards to the Falcon specification. | |||||
*/ | |||||
size_t n, hn, qn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
qn = hn >> 1; | |||||
/* | |||||
* We process complex values by pairs. For logn = 1, there is only | |||||
* one complex value (the other one is the implicit conjugate), | |||||
* so we add the two lines below because the loop will be | |||||
* skipped. | |||||
*/ | |||||
f0[0] = f[0]; | |||||
f1[0] = f[hn]; | |||||
for (u = 0; u < qn; u ++) { | |||||
fpr a_re, a_im, b_re, b_im; | |||||
fpr t_re, t_im; | |||||
a_re = f[(u << 1) + 0]; | |||||
a_im = f[(u << 1) + 0 + hn]; | |||||
b_re = f[(u << 1) + 1]; | |||||
b_im = f[(u << 1) + 1 + hn]; | |||||
FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); | |||||
f0[u] = fpr_half(t_re); | |||||
f0[u + qn] = fpr_half(t_im); | |||||
FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); | |||||
FPC_MUL(t_re, t_im, t_re, t_im, | |||||
fpr_gm_tab[((u + hn) << 1) + 0], | |||||
fpr_neg(fpr_gm_tab[((u + hn) << 1) + 1])); | |||||
f1[u] = fpr_half(t_re); | |||||
f1[u + qn] = fpr_half(t_im); | |||||
} | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_poly_merge_fft( | |||||
fpr *restrict f, | |||||
const fpr *restrict f0, const fpr *restrict f1, unsigned logn) { | |||||
size_t n, hn, qn, u; | |||||
n = (size_t)1 << logn; | |||||
hn = n >> 1; | |||||
qn = hn >> 1; | |||||
/* | |||||
* An extra copy to handle the special case logn = 1. | |||||
*/ | |||||
f[0] = f0[0]; | |||||
f[hn] = f1[0]; | |||||
for (u = 0; u < qn; u ++) { | |||||
fpr a_re, a_im, b_re, b_im; | |||||
fpr t_re, t_im; | |||||
a_re = f0[u]; | |||||
a_im = f0[u + qn]; | |||||
FPC_MUL(b_re, b_im, f1[u], f1[u + qn], | |||||
fpr_gm_tab[((u + hn) << 1) + 0], | |||||
fpr_gm_tab[((u + hn) << 1) + 1]); | |||||
FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); | |||||
f[(u << 1) + 0] = t_re; | |||||
f[(u << 1) + 0 + hn] = t_im; | |||||
FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); | |||||
f[(u << 1) + 1] = t_re; | |||||
f[(u << 1) + 1 + hn] = t_im; | |||||
} | |||||
} |
@@ -0,0 +1,457 @@ | |||||
/* | |||||
* Floating-point operations. | |||||
* | |||||
* ==========================(LICENSE BEGIN)============================ | |||||
* | |||||
* Copyright (c) 2017-2019 Falcon Project | |||||
* | |||||
* Permission is hereby granted, free of charge, to any person obtaining | |||||
* a copy of this software and associated documentation files (the | |||||
* "Software"), to deal in the Software without restriction, including | |||||
* without limitation the rights to use, copy, modify, merge, publish, | |||||
* distribute, sublicense, and/or sell copies of the Software, and to | |||||
* permit persons to whom the Software is furnished to do so, subject to | |||||
* the following conditions: | |||||
* | |||||
* The above copyright notice and this permission notice shall be | |||||
* included in all copies or substantial portions of the Software. | |||||
* | |||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||||
* | |||||
* ===========================(LICENSE END)============================= | |||||
* | |||||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||||
*/ | |||||
/* ====================================================================== */ | |||||
/* | |||||
* Custom floating-point implementation with integer arithmetics. We | |||||
* use IEEE-754 "binary64" format, with some simplifications: | |||||
* | |||||
* - Top bit is s = 1 for negative, 0 for positive. | |||||
* | |||||
* - Exponent e uses the next 11 bits (bits 52 to 62, inclusive). | |||||
* | |||||
* - Mantissa m uses the 52 low bits. | |||||
* | |||||
* Encoded value is, in general: (-1)^s * 2^(e-1023) * (1 + m*2^(-52)) | |||||
* i.e. the mantissa really is a 53-bit number (less than 2.0, but not | |||||
* less than 1.0), but the top bit (equal to 1 by definition) is omitted | |||||
* in the encoding. | |||||
* | |||||
* In IEEE-754, there are some special values: | |||||
* | |||||
* - If e = 2047, then the value is either an infinite (m = 0) or | |||||
* a NaN (m != 0). | |||||
* | |||||
* - If e = 0, then the value is either a zero (m = 0) or a subnormal, | |||||
* aka "denormalized number" (m != 0). | |||||
* | |||||
* Of these, we only need the zeros. The caller is responsible for not | |||||
* providing operands that would lead to infinites, NaNs or subnormals. | |||||
* If inputs are such that values go out of range, then indeterminate | |||||
* values are returned (it would still be deterministic, but no specific | |||||
* value may be relied upon). | |||||
* | |||||
* At the C level, the three parts are stored in a 64-bit unsigned | |||||
* word. | |||||
* | |||||
* One may note that a property of the IEEE-754 format is that order | |||||
* is preserved for positive values: if two positive floating-point | |||||
* values x and y are such that x < y, then their respective encodings | |||||
* as _signed_ 64-bit integers i64(x) and i64(y) will be such that | |||||
* i64(x) < i64(y). For negative values, order is reversed: if x < 0, | |||||
* y < 0, and x < y, then ia64(x) > ia64(y). | |||||
* | |||||
* IMPORTANT ASSUMPTIONS: | |||||
* ====================== | |||||
* | |||||
* For proper computations, and constant-time behaviour, we assume the | |||||
* following: | |||||
* | |||||
* - 32x32->64 multiplication (unsigned) has an execution time that | |||||
* is independent of its operands. This is true of most modern | |||||
* x86 and ARM cores. Notable exceptions are the ARM Cortex M0, M0+ | |||||
* and M3 (in the M0 and M0+, this is done in software, so it depends | |||||
* on that routine), and the PowerPC cores from the G3/G4 lines. | |||||
* For more info, see: https://www.bearssl.org/ctmul.html | |||||
* | |||||
* - Left-shifts and right-shifts of 32-bit values have an execution | |||||
* time which does not depend on the shifted value nor on the | |||||
* shift count. An historical exception is the Pentium IV, but most | |||||
* modern CPU have barrel shifters. Some small microcontrollers | |||||
* might have varying-time shifts (not the ARM Cortex M*, though). | |||||
* | |||||
* - Right-shift of a signed negative value performs a sign extension. | |||||
* As per the C standard, this operation returns an | |||||
* implementation-defined result (this is NOT an "undefined | |||||
* behaviour"). On most/all systems, an arithmetic shift is | |||||
* performed, because this is what makes most sense. | |||||
*/ | |||||
/* | |||||
* Normally we should declare the 'fpr' type to be a struct or union | |||||
* around the internal 64-bit value; however, we want to use the | |||||
* direct 64-bit integer type to enable a lighter call convention on | |||||
* ARM platforms. This means that direct (invalid) use of operators | |||||
* such as '*' or '+' will not be caught by the compiler. We rely on | |||||
* the "normal" (non-emulated) code to detect such instances. | |||||
*/ | |||||
typedef uint64_t fpr; | |||||
/* | |||||
* For computations, we split values into an integral mantissa in the | |||||
* 2^54..2^55 range, and an (adjusted) exponent. The lowest bit is | |||||
* "sticky" (it is set to 1 if any of the bits below it is 1); when | |||||
* re-encoding, the low two bits are dropped, but may induce an | |||||
* increment in the value for proper rounding. | |||||
*/ | |||||
/* | |||||
* Right-shift a 64-bit unsigned value by a possibly secret shift count. | |||||
* We assumed that the underlying architecture had a barrel shifter for | |||||
* 32-bit shifts, but for 64-bit shifts on a 32-bit system, this will | |||||
* typically invoke a software routine that is not necessarily | |||||
* constant-time; hence the function below. | |||||
* | |||||
* Shift count n MUST be in the 0..63 range. | |||||
*/ | |||||
static inline uint64_t | |||||
fpr_ursh(uint64_t x, int n) { | |||||
x ^= (x ^ (x >> 32)) & -(uint64_t)(n >> 5); | |||||
return x >> (n & 31); | |||||
} | |||||
/* | |||||
* Right-shift a 64-bit signed value by a possibly secret shift count | |||||
* (see fpr_ursh() for the rationale). | |||||
* | |||||
* Shift count n MUST be in the 0..63 range. | |||||
*/ | |||||
static inline int64_t | |||||
fpr_irsh(int64_t x, int n) { | |||||
x ^= (x ^ (x >> 32)) & -(int64_t)(n >> 5); | |||||
return x >> (n & 31); | |||||
} | |||||
/* | |||||
* Left-shift a 64-bit unsigned value by a possibly secret shift count | |||||
* (see fpr_ursh() for the rationale). | |||||
* | |||||
* Shift count n MUST be in the 0..63 range. | |||||
*/ | |||||
static inline uint64_t | |||||
fpr_ulsh(uint64_t x, int n) { | |||||
x ^= (x ^ (x << 32)) & -(uint64_t)(n >> 5); | |||||
return x << (n & 31); | |||||
} | |||||
/* | |||||
* Expectations: | |||||
* s = 0 or 1 | |||||
* exponent e is "arbitrary" and unbiased | |||||
* 2^54 <= m < 2^55 | |||||
* Numerical value is (-1)^2 * m * 2^e | |||||
* | |||||
* Exponents which are too low lead to value zero. If the exponent is | |||||
* too large, the returned value is indeterminate. | |||||
* | |||||
* If m = 0, then a zero is returned (using the provided sign). | |||||
* If e < -1076, then a zero is returned (regardless of the value of m). | |||||
* If e >= -1076 and e != 0, m must be within the expected range | |||||
* (2^54 to 2^55-1). | |||||
*/ | |||||
static inline fpr | |||||
FPR(int s, int e, uint64_t m) { | |||||
fpr x; | |||||
uint32_t t; | |||||
unsigned f; | |||||
/* | |||||
* If e >= -1076, then the value is "normal"; otherwise, it | |||||
* should be a subnormal, which we clamp down to zero. | |||||
*/ | |||||
e += 1076; | |||||
t = (uint32_t)e >> 31; | |||||
m &= (uint64_t)t - 1; | |||||
/* | |||||
* If m = 0 then we want a zero; make e = 0 too, but conserve | |||||
* the sign. | |||||
*/ | |||||
t = (uint32_t)(m >> 54); | |||||
e &= -(int)t; | |||||
/* | |||||
* The 52 mantissa bits come from m. Value m has its top bit set | |||||
* (unless it is a zero); we leave it "as is": the top bit will | |||||
* increment the exponent by 1, except when m = 0, which is | |||||
* exactly what we want. | |||||
*/ | |||||
x = (((uint64_t)s << 63) | (m >> 2)) + ((uint64_t)(uint32_t)e << 52); | |||||
/* | |||||
* Rounding: if the low three bits of m are 011, 110 or 111, | |||||
* then the value should be incremented to get the next | |||||
* representable value. This implements the usual | |||||
* round-to-nearest rule (with preference to even values in case | |||||
* of a tie). Note that the increment may make a carry spill | |||||
* into the exponent field, which is again exactly what we want | |||||
* in that case. | |||||
*/ | |||||
f = (unsigned)m & 7U; | |||||
x += (0xC8U >> f) & 1; | |||||
return x; | |||||
} | |||||
#define fpr_scaled PQCLEAN_FALCON512_CLEAN_fpr_scaled | |||||
fpr fpr_scaled(int64_t i, int sc); | |||||
static inline fpr | |||||
fpr_of(int64_t i) { | |||||
return fpr_scaled(i, 0); | |||||
} | |||||
static const fpr fpr_q = 4667981563525332992; | |||||
static const fpr fpr_inverse_of_q = 4545632735260551042; | |||||
static const fpr fpr_inv_2sqrsigma0 = 4594603506513722306; | |||||
static const fpr fpr_inv_sigma = 4573359825155195350; | |||||
static const fpr fpr_sigma_min_9 = 4608495221497168882; | |||||
static const fpr fpr_sigma_min_10 = 4608586345619182117; | |||||
static const fpr fpr_log2 = 4604418534313441775; | |||||
static const fpr fpr_inv_log2 = 4609176140021203710; | |||||
static const fpr fpr_bnorm_max = 4670353323383631276; | |||||
static const fpr fpr_zero = 0; | |||||
static const fpr fpr_one = 4607182418800017408; | |||||
static const fpr fpr_two = 4611686018427387904; | |||||
static const fpr fpr_onehalf = 4602678819172646912; | |||||
static const fpr fpr_ptwo31 = 4746794007248502784; | |||||
static const fpr fpr_ptwo31m1 = 4746794007244308480; | |||||
static const fpr fpr_mtwo31m1 = 13970166044099084288U; | |||||
static const fpr fpr_ptwo63m1 = 4890909195324358656; | |||||
static const fpr fpr_mtwo63m1 = 14114281232179134464U; | |||||
static const fpr fpr_ptwo63 = 4890909195324358656; | |||||
static inline int64_t | |||||
fpr_rint(fpr x) { | |||||
uint64_t m, d; | |||||
int e; | |||||
uint32_t s, dd; | |||||
unsigned f; | |||||
/* | |||||
* We assume that the value fits in -(2^63-1)..+(2^63-1). We can | |||||
* thus extract the mantissa as a 63-bit integer, then right-shift | |||||
* it as needed. | |||||
*/ | |||||
m = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); | |||||
e = 1085 - ((int)(x >> 52) & 0x7FF); | |||||
/* | |||||
* If a shift of more than 63 bits is needed, then simply set m | |||||
* to zero. This also covers the case of an input operand equal | |||||
* to zero. | |||||
*/ | |||||
m &= -(uint64_t)((uint32_t)(e - 64) >> 31); | |||||
e &= 63; | |||||
/* | |||||
* Right-shift m as needed. Shift count is e. Proper rounding | |||||
* mandates that: | |||||
* - If the highest dropped bit is zero, then round low. | |||||
* - If the highest dropped bit is one, and at least one of the | |||||
* other dropped bits is one, then round up. | |||||
* - If the highest dropped bit is one, and all other dropped | |||||
* bits are zero, then round up if the lowest kept bit is 1, | |||||
* or low otherwise (i.e. ties are broken by "rounding to even"). | |||||
* | |||||
* We thus first extract a word consisting of all the dropped bit | |||||
* AND the lowest kept bit; then we shrink it down to three bits, | |||||
* the lowest being "sticky". | |||||
*/ | |||||
d = fpr_ulsh(m, 63 - e); | |||||
dd = (uint32_t)d | ((uint32_t)(d >> 32) & 0x1FFFFFFF); | |||||
f = (unsigned)(d >> 61) | (unsigned)((dd | -dd) >> 31); | |||||
m = fpr_ursh(m, e) + (uint64_t)((0xC8U >> f) & 1U); | |||||
/* | |||||
* Apply the sign bit. | |||||
*/ | |||||
s = (uint32_t)(x >> 63); | |||||
return ((int64_t)m ^ -(int64_t)s) + (int64_t)s; | |||||
} | |||||
static inline long | |||||
fpr_floor(fpr x) { | |||||
uint64_t t; | |||||
int64_t xi; | |||||
int e, cc; | |||||
/* | |||||
* We extract the integer as a _signed_ 64-bit integer with | |||||
* a scaling factor. Since we assume that the value fits | |||||
* in the -(2^63-1)..+(2^63-1) range, we can left-shift the | |||||
* absolute value to make it in the 2^62..2^63-1 range: we | |||||
* will only need a right-shift afterwards. | |||||
*/ | |||||
e = (int)(x >> 52) & 0x7FF; | |||||
t = x >> 63; | |||||
xi = (int64_t)(((x << 10) | ((uint64_t)1 << 62)) | |||||
& (((uint64_t)1 << 63) - 1)); | |||||
xi = (xi ^ -(int64_t)t) + (int64_t)t; | |||||
cc = 1085 - e; | |||||
/* | |||||
* We perform an arithmetic right-shift on the value. This | |||||
* applies floor() semantics on both positive and negative values | |||||
* (rounding toward minus infinity). | |||||
*/ | |||||
xi = fpr_irsh(xi, cc & 63); | |||||
/* | |||||
* If the true shift count was 64 or more, then we should instead | |||||
* replace xi with 0 (if nonnegative) or -1 (if negative). Edge | |||||
* case: -0 will be floored to -1, not 0 (whether this is correct | |||||
* is debatable; in any case, the other functions normalize zero | |||||
* to +0). | |||||
* | |||||
* For an input of zero, the non-shifted xi was incorrect (we used | |||||
* a top implicit bit of value 1, not 0), but this does not matter | |||||
* since this operation will clamp it down. | |||||
*/ | |||||
xi ^= (xi ^ -(int64_t)t) & -(int64_t)((uint32_t)(63 - cc) >> 31); | |||||
return xi; | |||||
} | |||||
static inline int64_t | |||||
fpr_trunc(fpr x) { | |||||
uint64_t t, xu; | |||||
int e, cc; | |||||
/* | |||||
* Extract the absolute value. Since we assume that the value | |||||
* fits in the -(2^63-1)..+(2^63-1) range, we can left-shift | |||||
* the absolute value into the 2^62..2^63-1 range, and then | |||||
* do a right shift afterwards. | |||||
*/ | |||||
e = (int)(x >> 52) & 0x7FF; | |||||
xu = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); | |||||
cc = 1085 - e; | |||||
xu = fpr_ursh(xu, cc & 63); | |||||
/* | |||||
* If the exponent is too low (cc > 63), then the shift was wrong | |||||
* and we must clamp the value to 0. This also covers the case | |||||
* of an input equal to zero. | |||||
*/ | |||||
xu &= -(uint64_t)((uint32_t)(cc - 64) >> 31); | |||||
/* | |||||
* Apply back the sign, if the source value is negative. | |||||
*/ | |||||
t = x >> 63; | |||||
xu = (xu ^ -t) + t; | |||||
return *(int64_t *)&xu; | |||||
} | |||||
#define fpr_add PQCLEAN_FALCON512_CLEAN_fpr_add | |||||
fpr fpr_add(fpr x, fpr y); | |||||
static inline fpr | |||||
fpr_sub(fpr x, fpr y) { | |||||
y ^= (uint64_t)1 << 63; | |||||
return fpr_add(x, y); | |||||
} | |||||
static inline fpr | |||||
fpr_neg(fpr x) { | |||||
x ^= (uint64_t)1 << 63; | |||||
return x; | |||||
} | |||||
static inline fpr | |||||
fpr_half(fpr x) { | |||||
/* | |||||
* To divide a value by 2, we just have to subtract 1 from its | |||||
* exponent, but we have to take care of zero. | |||||
*/ | |||||
uint32_t t; | |||||
x -= (uint64_t)1 << 52; | |||||
t = (((uint32_t)(x >> 52) & 0x7FF) + 1) >> 11; | |||||
x &= (uint64_t)t - 1; | |||||
return x; | |||||
} | |||||
static inline fpr | |||||
fpr_double(fpr x) { | |||||
/* | |||||
* To double a value, we just increment by one the exponent. We | |||||
* don't care about infinites or NaNs; however, 0 is a | |||||
* special case. | |||||
*/ | |||||
x += (uint64_t)((((unsigned)(x >> 52) & 0x7FFU) + 0x7FFU) >> 11) << 52; | |||||
return x; | |||||
} | |||||
#define fpr_mul PQCLEAN_FALCON512_CLEAN_fpr_mul | |||||
fpr fpr_mul(fpr x, fpr y); | |||||
static inline fpr | |||||
fpr_sqr(fpr x) { | |||||
return fpr_mul(x, x); | |||||
} | |||||
#define fpr_div PQCLEAN_FALCON512_CLEAN_fpr_div | |||||
fpr fpr_div(fpr x, fpr y); | |||||
static inline fpr | |||||
fpr_inv(fpr x) { | |||||
return fpr_div(4607182418800017408u, x); | |||||
} | |||||
#define fpr_sqrt PQCLEAN_FALCON512_CLEAN_fpr_sqrt | |||||
fpr fpr_sqrt(fpr x); | |||||
static inline int | |||||
fpr_lt(fpr x, fpr y) { | |||||
/* | |||||
* If x >= 0 or y >= 0, a signed comparison yields the proper | |||||
* result: | |||||
* - For positive values, the order is preserved. | |||||
* - The sign bit is at the same place as in integers, so | |||||
* sign is preserved. | |||||
* | |||||
* If both x and y are negative, then the order is reversed. | |||||
* We cannot simply invert the comparison result in that case | |||||
* because it would not handle the edge case x = y properly. | |||||
*/ | |||||
int cc0, cc1; | |||||
cc0 = *(int64_t *)&x < *(int64_t *)&y; | |||||
cc1 = *(int64_t *)&x > *(int64_t *)&y; | |||||
return cc0 ^ ((cc0 ^ cc1) & (int)((x & y) >> 63)); | |||||
} | |||||
/* | |||||
* Compute exp(x) for x such that |x| <= ln 2. We want a precision of 50 | |||||
* bits or so. | |||||
*/ | |||||
#define fpr_expm_p63 PQCLEAN_FALCON512_CLEAN_fpr_expm_p63 | |||||
uint64_t fpr_expm_p63(fpr x); | |||||
#define fpr_gm_tab PQCLEAN_FALCON512_CLEAN_fpr_gm_tab | |||||
extern const fpr fpr_gm_tab[]; | |||||
#define fpr_p2_tab PQCLEAN_FALCON512_CLEAN_fpr_p2_tab | |||||
extern const fpr fpr_p2_tab[]; | |||||
/* ====================================================================== */ | |||||
@@ -0,0 +1,663 @@ | |||||
#ifndef FALCON_INNER_H__ | |||||
#define FALCON_INNER_H__ | |||||
/* | |||||
* Internal functions for Falcon. This is not the API intended to be | |||||
* used by applications; instead, this internal API provides all the | |||||
* primitives on which wrappers build to provide external APIs. | |||||
* | |||||
* ==========================(LICENSE BEGIN)============================ | |||||
* | |||||
* Copyright (c) 2017-2019 Falcon Project | |||||
* | |||||
* Permission is hereby granted, free of charge, to any person obtaining | |||||
* a copy of this software and associated documentation files (the | |||||
* "Software"), to deal in the Software without restriction, including | |||||
* without limitation the rights to use, copy, modify, merge, publish, | |||||
* distribute, sublicense, and/or sell copies of the Software, and to | |||||
* permit persons to whom the Software is furnished to do so, subject to | |||||
* the following conditions: | |||||
* | |||||
* The above copyright notice and this permission notice shall be | |||||
* included in all copies or substantial portions of the Software. | |||||
* | |||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||||
* | |||||
* ===========================(LICENSE END)============================= | |||||
* | |||||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||||
*/ | |||||
#include <stdint.h> | |||||
#include <stdlib.h> | |||||
#include <string.h> | |||||
/* ==================================================================== */ | |||||
/* | |||||
* SHAKE256 implementation (shake.c). | |||||
* | |||||
* API is defined to be easily replaced with the fips202.h API defined | |||||
* as part of PQ Clean. | |||||
*/ | |||||
#include "fips202.h" | |||||
#define shake256_context shake256incctx | |||||
#define shake256_init(sc) shake256_inc_init(sc) | |||||
#define shake256_inject(sc, in, len) shake256_inc_absorb(sc, in, len) | |||||
#define shake256_flip(sc) shake256_inc_finalize(sc) | |||||
#define shake256_extract(sc, out, len) shake256_inc_squeeze(out, len, sc) | |||||
/* ==================================================================== */ | |||||
/* | |||||
* Encoding/decoding functions (codec.c). | |||||
* | |||||
* Encoding functions take as parameters an output buffer (out) with | |||||
* a given maximum length (max_out_len); returned value is the actual | |||||
* number of bytes which have been written. If the output buffer is | |||||
* not large enough, then 0 is returned (some bytes may have been | |||||
* written to the buffer). If 'out' is NULL, then 'max_out_len' is | |||||
* ignored; instead, the function computes and returns the actual | |||||
* required output length (in bytes). | |||||
* | |||||
* Decoding functions take as parameters an input buffer (in) with | |||||
* its maximum length (max_in_len); returned value is the actual number | |||||
* of bytes that have been read from the buffer. If the provided length | |||||
* is too short, then 0 is returned. | |||||
* | |||||
* Values to encode or decode are vectors of integers, with N = 2^logn | |||||
* elements. | |||||
* | |||||
* Three encoding formats are defined: | |||||
* | |||||
* - modq: sequence of values modulo 12289, each encoded over exactly | |||||
* 14 bits. The encoder and decoder verify that integers are within | |||||
* the valid range (0..12288). Values are arrays of uint16. | |||||
* | |||||
* - trim: sequence of signed integers, a specified number of bits | |||||
* each. The number of bits is provided as parameter and includes | |||||
* the sign bit. Each integer x must be such that |x| < 2^(bits-1) | |||||
* (which means that the -2^(bits-1) value is forbidden); encode and | |||||
* decode functions check that property. Values are arrays of | |||||
* int16_t or int8_t, corresponding to names 'trim_i16' and | |||||
* 'trim_i8', respectively. | |||||
* | |||||
* - comp: variable-length encoding for signed integers; each integer | |||||
* uses a minimum of 9 bits, possibly more. This is normally used | |||||
* only for signatures. | |||||
* | |||||
*/ | |||||
size_t PQCLEAN_FALCON512_CLEAN_modq_encode(void *out, size_t max_out_len, | |||||
const uint16_t *x, unsigned logn); | |||||
size_t PQCLEAN_FALCON512_CLEAN_trim_i16_encode(void *out, size_t max_out_len, | |||||
const int16_t *x, unsigned logn, unsigned bits); | |||||
size_t PQCLEAN_FALCON512_CLEAN_trim_i8_encode(void *out, size_t max_out_len, | |||||
const int8_t *x, unsigned logn, unsigned bits); | |||||
size_t PQCLEAN_FALCON512_CLEAN_comp_encode(void *out, size_t max_out_len, | |||||
const int16_t *x, unsigned logn); | |||||
size_t PQCLEAN_FALCON512_CLEAN_modq_decode(uint16_t *x, unsigned logn, | |||||
const void *in, size_t max_in_len); | |||||
size_t PQCLEAN_FALCON512_CLEAN_trim_i16_decode(int16_t *x, unsigned logn, unsigned bits, | |||||
const void *in, size_t max_in_len); | |||||
size_t PQCLEAN_FALCON512_CLEAN_trim_i8_decode(int8_t *x, unsigned logn, unsigned bits, | |||||
const void *in, size_t max_in_len); | |||||
size_t PQCLEAN_FALCON512_CLEAN_comp_decode(int16_t *x, unsigned logn, | |||||
const void *in, size_t max_in_len); | |||||
/* | |||||
* Number of bits for key elements, indexed by logn (1 to 10). This | |||||
* is at most 8 bits for all degrees, but some degrees may have shorter | |||||
* elements. | |||||
*/ | |||||
extern const uint8_t PQCLEAN_FALCON512_CLEAN_max_fg_bits[]; | |||||
extern const uint8_t PQCLEAN_FALCON512_CLEAN_max_FG_bits[]; | |||||
/* | |||||
* Maximum size, in bits, of elements in a signature, indexed by logn | |||||
* (1 to 10). The size includes the sign bit. | |||||
*/ | |||||
extern const uint8_t PQCLEAN_FALCON512_CLEAN_max_sig_bits[]; | |||||
/* ==================================================================== */ | |||||
/* | |||||
* Support functions used for both signature generation and signature | |||||
* verification (common.c). | |||||
*/ | |||||
/* | |||||
* From a SHAKE256 context (must be already flipped), produce a new | |||||
* point. The temporary buffer (tmp) must have room for 2*2^logn bytes. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_hash_to_point(shake256_context *sc, | |||||
uint16_t *x, unsigned logn, uint8_t *tmp); | |||||
/* | |||||
* Tell whether a given vector (2N coordinates, in two halves) is | |||||
* acceptable as a signature. This compares the appropriate norm of the | |||||
* vector with the acceptance bound. Returned value is 1 on success | |||||
* (vector is short enough to be acceptable), 0 otherwise. | |||||
*/ | |||||
int PQCLEAN_FALCON512_CLEAN_is_short(const int16_t *s1, const int16_t *s2, unsigned logn); | |||||
/* | |||||
* Tell whether a given vector (2N coordinates, in two halves) is | |||||
* acceptable as a signature. Instead of the first half s1, this | |||||
* function receives the "saturated squared norm" of s1, i.e. the | |||||
* sum of the squares of the coordinates of s1 (saturated at 2^32-1 | |||||
* if the sum exceeds 2^31-1). | |||||
* | |||||
* Returned value is 1 on success (vector is short enough to be | |||||
* acceptable), 0 otherwise. | |||||
*/ | |||||
int PQCLEAN_FALCON512_CLEAN_is_short_half(uint32_t sqn, const int16_t *s2, unsigned logn); | |||||
/* ==================================================================== */ | |||||
/* | |||||
* Signature verification functions (vrfy.c). | |||||
*/ | |||||
/* | |||||
* Convert a public key to NTT + Montgomery format. Conversion is done | |||||
* in place. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn); | |||||
/* | |||||
* Internal signature verification code: | |||||
* c0[] contains the hashed nonce+message | |||||
* s2[] is the decoded signature | |||||
* h[] contains the public key, in NTT + Montgomery format | |||||
* logn is the degree log | |||||
* tmp[] temporary, must have at least 2*2^logn bytes | |||||
* Returned value is 1 on success, 0 on error. | |||||
*/ | |||||
int PQCLEAN_FALCON512_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2, | |||||
const uint16_t *h, unsigned logn, uint8_t *tmp); | |||||
/* | |||||
* Compute the public key h[], given the private key elements f[] and | |||||
* g[]. This computes h = g/f mod phi mod q, where phi is the polynomial | |||||
* modulus. This function returns 1 on success, 0 on error (an error is | |||||
* reported if f is not invertible mod phi mod q). | |||||
* | |||||
* The tmp[] array must have room for at least 2*2^logn elements. | |||||
*/ | |||||
int PQCLEAN_FALCON512_CLEAN_compute_public(uint16_t *h, | |||||
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp); | |||||
/* | |||||
* Recompute the fourth private key element. Private key consists in | |||||
* four polynomials with small coefficients f, g, F and G, which are | |||||
* such that fG - gF = q mod phi; furthermore, f is invertible modulo | |||||
* phi and modulo q. This function recomputes G from f, g and F. | |||||
* | |||||
* The tmp[] array must have room for at least 4*2^logn bytes. | |||||
* | |||||
* Returned value is 1 in success, 0 on error (f not invertible). | |||||
*/ | |||||
int PQCLEAN_FALCON512_CLEAN_complete_private(int8_t *G, | |||||
const int8_t *f, const int8_t *g, const int8_t *F, | |||||
unsigned logn, uint8_t *tmp); | |||||
/* ==================================================================== */ | |||||
/* | |||||
* Implementation of floating-point real numbers (fpr.h, fpr.c). | |||||
*/ | |||||
/* | |||||
* Real numbers are implemented by an extra header file, included below. | |||||
* This is meant to support pluggable implementations. The default | |||||
* implementation relies on the C type 'double'. | |||||
* | |||||
* The included file must define the following types, functions and | |||||
* constants: | |||||
* | |||||
* fpr | |||||
* type for a real number | |||||
* | |||||
* fpr fpr_of(int64_t i) | |||||
* cast an integer into a real number; source must be in the | |||||
* -(2^63-1)..+(2^63-1) range | |||||
* | |||||
* fpr fpr_scaled(int64_t i, int sc) | |||||
* compute i*2^sc as a real number; source 'i' must be in the | |||||
* -(2^63-1)..+(2^63-1) range | |||||
* | |||||
* fpr fpr_ldexp(fpr x, int e) | |||||
* compute x*2^e | |||||
* | |||||
* int64_t fpr_rint(fpr x) | |||||
* round x to the nearest integer; x must be in the -(2^63-1) | |||||
* to +(2^63-1) range | |||||
* | |||||
* int64_t fpr_trunc(fpr x) | |||||
* round to an integer; this rounds towards zero; value must | |||||
* be in the -(2^63-1) to +(2^63-1) range | |||||
* | |||||
* fpr fpr_add(fpr x, fpr y) | |||||
* compute x + y | |||||
* | |||||
* fpr fpr_sub(fpr x, fpr y) | |||||
* compute x - y | |||||
* | |||||
* fpr fpr_neg(fpr x) | |||||
* compute -x | |||||
* | |||||
* fpr fpr_half(fpr x) | |||||
* compute x/2 | |||||
* | |||||
* fpr fpr_double(fpr x) | |||||
* compute x*2 | |||||
* | |||||
* fpr fpr_mul(fpr x, fpr y) | |||||
* compute x * y | |||||
* | |||||
* fpr fpr_sqr(fpr x) | |||||
* compute x * x | |||||
* | |||||
* fpr fpr_inv(fpr x) | |||||
* compute 1/x | |||||
* | |||||
* fpr fpr_div(fpr x, fpr y) | |||||
* compute x/y | |||||
* | |||||
* fpr fpr_sqrt(fpr x) | |||||
* compute the square root of x | |||||
* | |||||
* int fpr_lt(fpr x, fpr y) | |||||
* return 1 if x < y, 0 otherwise | |||||
* | |||||
* uint64_t fpr_expm_p63(fpr x) | |||||
* return exp(x), assuming that 0 <= x < log(2). Returned value | |||||
* is scaled to 63 bits (i.e. it really returns 2^63*exp(-x), | |||||
* rounded to the nearest integer). Computation should have a | |||||
* precision of at least 45 bits. | |||||
* | |||||
* const fpr fpr_gm_tab[] | |||||
* array of constants for FFT / iFFT | |||||
* | |||||
* const fpr fpr_p2_tab[] | |||||
* precomputed powers of 2 (by index, 0 to 10) | |||||
* | |||||
* Constants of type 'fpr': | |||||
* | |||||
* fpr fpr_q 12289 | |||||
* fpr fpr_inverse_of_q 1/12289 | |||||
* fpr fpr_inv_2sqrsigma0 1/(2*(1.8205^2)) | |||||
* fpr fpr_inv_sigma 1/(1.55*sqrt(12289)) | |||||
* fpr fpr_sigma_min_9 1.291500756233514568549480827642 | |||||
* fpr fpr_sigma_min_10 1.311734375905083682667395805765 | |||||
* fpr fpr_log2 log(2) | |||||
* fpr fpr_inv_log2 1/log(2) | |||||
* fpr fpr_bnorm_max 16822.4121 | |||||
* fpr fpr_zero 0 | |||||
* fpr fpr_one 1 | |||||
* fpr fpr_two 2 | |||||
* fpr fpr_onehalf 0.5 | |||||
* fpr fpr_ptwo31 2^31 | |||||
* fpr fpr_ptwo31m1 2^31-1 | |||||
* fpr fpr_mtwo31m1 -(2^31-1) | |||||
* fpr fpr_ptwo63m1 2^63-1 | |||||
* fpr fpr_mtwo63m1 -(2^63-1) | |||||
* fpr fpr_ptwo63 2^63 | |||||
*/ | |||||
#include "fpr.h" | |||||
/* ==================================================================== */ | |||||
/* | |||||
* RNG (rng.c). | |||||
* | |||||
* A PRNG based on ChaCha20 is implemented; it is seeded from a SHAKE256 | |||||
* context (flipped) and is used for bulk pseudorandom generation. | |||||
* A system-dependent seed generator is also provided. | |||||
*/ | |||||
/* | |||||
* Obtain a random seed from the system RNG. | |||||
* | |||||
* Returned value is 1 on success, 0 on error. | |||||
*/ | |||||
int PQCLEAN_FALCON512_CLEAN_get_seed(void *seed, size_t seed_len); | |||||
/* | |||||
* Structure for a PRNG. This includes a large buffer so that values | |||||
* get generated in advance. The 'state' is used to keep the current | |||||
* PRNG algorithm state (contents depend on the selected algorithm). | |||||
* | |||||
* The unions with 'dummy_u64' are there to ensure proper alignment for | |||||
* 64-bit direct access. | |||||
*/ | |||||
typedef struct { | |||||
union { | |||||
unsigned char d[512]; /* MUST be 512, exactly */ | |||||
uint64_t dummy_u64; | |||||
} buf; | |||||
size_t ptr; | |||||
union { | |||||
unsigned char d[256]; | |||||
uint64_t dummy_u64; | |||||
} state; | |||||
int type; | |||||
} prng; | |||||
/* | |||||
* Instantiate a PRNG. That PRNG will feed over the provided SHAKE256 | |||||
* context (in "flipped" state) to obtain its initial state. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_prng_init(prng *p, shake256_context *src); | |||||
/* | |||||
* Refill the PRNG buffer. This is normally invoked automatically, and | |||||
* is declared here only so that prng_get_u64() may be inlined. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_prng_refill(prng *p); | |||||
/* | |||||
* Get some bytes from a PRNG. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len); | |||||
/* | |||||
* Get a 64-bit random value from a PRNG. | |||||
*/ | |||||
static inline uint64_t | |||||
prng_get_u64(prng *p) { | |||||
size_t u; | |||||
/* | |||||
* If there are less than 9 bytes in the buffer, we refill it. | |||||
* This means that we may drop the last few bytes, but this allows | |||||
* for faster extraction code. Also, it means that we never leave | |||||
* an empty buffer. | |||||
*/ | |||||
u = p->ptr; | |||||
if (u >= (sizeof p->buf.d) - 9) { | |||||
PQCLEAN_FALCON512_CLEAN_prng_refill(p); | |||||
u = 0; | |||||
} | |||||
p->ptr = u + 8; | |||||
/* | |||||
* On systems that use little-endian encoding and allow | |||||
* unaligned accesses, we can simply read the data where it is. | |||||
*/ | |||||
return (uint64_t)p->buf.d[u + 0] | |||||
| ((uint64_t)p->buf.d[u + 1] << 8) | |||||
| ((uint64_t)p->buf.d[u + 2] << 16) | |||||
| ((uint64_t)p->buf.d[u + 3] << 24) | |||||
| ((uint64_t)p->buf.d[u + 4] << 32) | |||||
| ((uint64_t)p->buf.d[u + 5] << 40) | |||||
| ((uint64_t)p->buf.d[u + 6] << 48) | |||||
| ((uint64_t)p->buf.d[u + 7] << 56); | |||||
} | |||||
/* | |||||
* Get an 8-bit random value from a PRNG. | |||||
*/ | |||||
static inline unsigned | |||||
prng_get_u8(prng *p) { | |||||
unsigned v; | |||||
v = p->buf.d[p->ptr ++]; | |||||
if (p->ptr == sizeof p->buf.d) { | |||||
PQCLEAN_FALCON512_CLEAN_prng_refill(p); | |||||
} | |||||
return v; | |||||
} | |||||
/* ==================================================================== */ | |||||
/* | |||||
* FFT (falcon-fft.c). | |||||
* | |||||
* A real polynomial is represented as an array of N 'fpr' elements. | |||||
* The FFT representation of a real polynomial contains N/2 complex | |||||
* elements; each is stored as two real numbers, for the real and | |||||
* imaginary parts, respectively. See falcon-fft.c for details on the | |||||
* internal representation. | |||||
*/ | |||||
/* | |||||
* Compute FFT in-place: the source array should contain a real | |||||
* polynomial (N coefficients); its storage area is reused to store | |||||
* the FFT representation of that polynomial (N/2 complex numbers). | |||||
* | |||||
* 'logn' MUST lie between 1 and 10 (inclusive). | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_FFT(fpr *f, unsigned logn); | |||||
/* | |||||
* Compute the inverse FFT in-place: the source array should contain the | |||||
* FFT representation of a real polynomial (N/2 elements); the resulting | |||||
* real polynomial (N coefficients of type 'fpr') is written over the | |||||
* array. | |||||
* | |||||
* 'logn' MUST lie between 1 and 10 (inclusive). | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_iFFT(fpr *f, unsigned logn); | |||||
/* | |||||
* Add polynomial b to polynomial a. a and b MUST NOT overlap. This | |||||
* function works in both normal and FFT representations. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_poly_add(fpr *restrict a, const fpr *restrict b, unsigned logn); | |||||
/* | |||||
* Subtract polynomial b from polynomial a. a and b MUST NOT overlap. This | |||||
* function works in both normal and FFT representations. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_poly_sub(fpr *restrict a, const fpr *restrict b, unsigned logn); | |||||
/* | |||||
* Negate polynomial a. This function works in both normal and FFT | |||||
* representations. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_poly_neg(fpr *a, unsigned logn); | |||||
/* | |||||
* Compute adjoint of polynomial a. This function works only in FFT | |||||
* representation. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_poly_adj_fft(fpr *a, unsigned logn); | |||||
/* | |||||
* Multiply polynomial a with polynomial b. a and b MUST NOT overlap. | |||||
* This function works only in FFT representation. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_poly_mul_fft(fpr *restrict a, const fpr *restrict b, unsigned logn); | |||||
/* | |||||
* Multiply polynomial a with the adjoint of polynomial b. a and b MUST NOT | |||||
* overlap. This function works only in FFT representation. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_poly_muladj_fft(fpr *restrict a, const fpr *restrict b, unsigned logn); | |||||
/* | |||||
* Multiply polynomial with its own adjoint. This function works only in FFT | |||||
* representation. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_poly_mulselfadj_fft(fpr *a, unsigned logn); | |||||
/* | |||||
* Multiply polynomial with a real constant. This function works in both | |||||
* normal and FFT representations. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_poly_mulconst(fpr *a, fpr x, unsigned logn); | |||||
/* | |||||
* Divide polynomial a by polynomial b, modulo X^N+1 (FFT representation). | |||||
* a and b MUST NOT overlap. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_poly_div_fft(fpr *restrict a, const fpr *restrict b, unsigned logn); | |||||
/* | |||||
* Given f and g (in FFT representation), compute 1/(f*adj(f)+g*adj(g)) | |||||
* (also in FFT representation). Since the result is auto-adjoint, all its | |||||
* coordinates in FFT representation are real; as such, only the first N/2 | |||||
* values of d[] are filled (the imaginary parts are skipped). | |||||
* | |||||
* Array d MUST NOT overlap with either a or b. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_poly_invnorm2_fft(fpr *restrict d, | |||||
const fpr *restrict a, const fpr *restrict b, unsigned logn); | |||||
/* | |||||
* Given F, G, f and g (in FFT representation), compute F*adj(f)+G*adj(g) | |||||
* (also in FFT representation). Destination d MUST NOT overlap with | |||||
* any of the source arrays. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_poly_add_muladj_fft(fpr *restrict d, | |||||
const fpr *restrict F, const fpr *restrict G, | |||||
const fpr *restrict f, const fpr *restrict g, unsigned logn); | |||||
/* | |||||
* Multiply polynomial a by polynomial b, where b is autoadjoint. Both | |||||
* a and b are in FFT representation. Since b is autoadjoint, all its | |||||
* FFT coefficients are real, and the array b contains only N/2 elements. | |||||
* a and b MUST NOT overlap. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_poly_mul_autoadj_fft(fpr *restrict a, | |||||
const fpr *restrict b, unsigned logn); | |||||
/* | |||||
* Divide polynomial a by polynomial b, where b is autoadjoint. Both | |||||
* a and b are in FFT representation. Since b is autoadjoint, all its | |||||
* FFT coefficients are real, and the array b contains only N/2 elements. | |||||
* a and b MUST NOT overlap. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_poly_div_autoadj_fft(fpr *restrict a, | |||||
const fpr *restrict b, unsigned logn); | |||||
/* | |||||
* Perform an LDL decomposition of an auto-adjoint matrix G, in FFT | |||||
* representation. On input, g00, g01 and g11 are provided (where the | |||||
* matrix G = [[g00, g01], [adj(g01), g11]]). On output, the d00, l10 | |||||
* and d11 values are written in g00, g01 and g11, respectively | |||||
* (with D = [[d00, 0], [0, d11]] and L = [[1, 0], [l10, 1]]). | |||||
* (In fact, d00 = g00, so the g00 operand is left unmodified.) | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_poly_LDL_fft(const fpr *restrict g00, | |||||
fpr *restrict g01, fpr *restrict g11, unsigned logn); | |||||
/* | |||||
* Perform an LDL decomposition of an auto-adjoint matrix G, in FFT | |||||
* representation. This is identical to poly_LDL_fft() except that | |||||
* g00, g01 and g11 are unmodified; the outputs d11 and l10 are written | |||||
* in two other separate buffers provided as extra parameters. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_poly_LDLmv_fft(fpr *restrict d11, fpr *restrict l10, | |||||
const fpr *restrict g00, const fpr *restrict g01, | |||||
const fpr *restrict g11, unsigned logn); | |||||
/* | |||||
* Apply "split" operation on a polynomial in FFT representation: | |||||
* f = f0(x^2) + x*f1(x^2), for half-size polynomials f0 and f1 | |||||
* (polynomials modulo X^(N/2)+1). f0, f1 and f MUST NOT overlap. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_poly_split_fft(fpr *restrict t0, fpr *restrict t1, | |||||
const fpr *restrict f, unsigned logn); | |||||
/* | |||||
* Apply "merge" operation on two polynomials in FFT representation: | |||||
* given f0 and f1, polynomials moduo X^(N/2)+1, this function computes | |||||
* f = f0(x^2) + x*f1(x^2), in FFT representation modulo X^N+1. | |||||
* f MUST NOT overlap with either f0 or f1. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_poly_merge_fft(fpr *restrict f, | |||||
const fpr *restrict f0, const fpr *restrict f1, unsigned logn); | |||||
/* ==================================================================== */ | |||||
/* | |||||
* Key pair generation. | |||||
*/ | |||||
/* | |||||
* Required sizes of the temporary buffer (in bytes). | |||||
*/ | |||||
#define FALCON_KEYGEN_TEMP_1 136 | |||||
#define FALCON_KEYGEN_TEMP_2 272 | |||||
#define FALCON_KEYGEN_TEMP_3 224 | |||||
#define FALCON_KEYGEN_TEMP_4 448 | |||||
#define FALCON_KEYGEN_TEMP_5 896 | |||||
#define FALCON_KEYGEN_TEMP_6 1792 | |||||
#define FALCON_KEYGEN_TEMP_7 3584 | |||||
#define FALCON_KEYGEN_TEMP_8 7168 | |||||
#define FALCON_KEYGEN_TEMP_9 14336 | |||||
#define FALCON_KEYGEN_TEMP_10 28672 | |||||
/* | |||||
* Generate a new key pair. Randomness is extracted from the provided | |||||
* SHAKE256 context, which must have already been seeded and flipped. | |||||
* The tmp[] array must have suitable size (see FALCON_KEYGEN_TEMP_* | |||||
* macros) and be aligned for the uint32_t, uint64_t and fpr types. | |||||
* | |||||
* The private key elements are written in f, g, F and G, and the | |||||
* public key is written in h. Either or both of G and h may be NULL, | |||||
* in which case the corresponding element is not returned (they can | |||||
* be recomputed from f, g and F). | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_keygen(shake256_context *rng, | |||||
int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h, | |||||
unsigned logn, uint8_t *tmp); | |||||
/* ==================================================================== */ | |||||
/* | |||||
* Signature generation. | |||||
*/ | |||||
/* | |||||
* Expand a private key into the B0 matrix in FFT representation and | |||||
* the LDL tree. All the values are written in 'expanded_key', for | |||||
* a total of (8*logn+40)*2^logn bytes. | |||||
* | |||||
* The tmp[] array must have room for at least 48*2^logn bytes. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_expand_privkey(fpr *restrict expanded_key, | |||||
const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G, | |||||
unsigned logn, uint8_t *restrict tmp); | |||||
/* | |||||
* Compute a signature over the provided hashed message (hm); the | |||||
* signature value is one short vector. This function uses an | |||||
* expanded key (as generated by PQCLEAN_FALCON512_CLEAN_expand_privkey()). | |||||
* | |||||
* The sig[] and hm[] buffers may overlap. | |||||
* | |||||
* The minimal size (in bytes) of tmp[] is 48*2^logn bytes. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_sign_tree(int16_t *sig, shake256_context *rng, | |||||
const fpr *restrict expanded_key, | |||||
const uint16_t *hm, unsigned logn, uint8_t *tmp); | |||||
/* | |||||
* Compute a signature over the provided hashed message (hm); the | |||||
* signature value is one short vector. This function uses a raw | |||||
* key and dynamically recompute the B0 matrix and LDL tree; this | |||||
* saves RAM since there is no needed for an expanded key, but | |||||
* increases the signature cost. | |||||
* | |||||
* The sig[] and hm[] buffers may overlap. | |||||
* | |||||
* The minimal size (in bytes) of tmp[] is 72*2^logn bytes. | |||||
*/ | |||||
void PQCLEAN_FALCON512_CLEAN_sign_dyn(int16_t *sig, shake256_context *rng, | |||||
const int8_t *restrict f, const int8_t *restrict g, | |||||
const int8_t *restrict F, const int8_t *restrict G, | |||||
const uint16_t *hm, unsigned logn, uint8_t *tmp); | |||||
/* ==================================================================== */ | |||||
#endif |
@@ -0,0 +1,381 @@ | |||||
/* | |||||
* Wrapper for implementing the PQClean API. | |||||
*/ | |||||
#include <stddef.h> | |||||
#include <string.h> | |||||
#include "api.h" | |||||
#include "inner.h" | |||||
#define NONCELEN 40 | |||||
#include "randombytes.h" | |||||
/* | |||||
* Encoding formats (nnnn = log of degree, 9 for Falcon-512, 10 for Falcon-1024) | |||||
* | |||||
* private key: | |||||
* header byte: 0101nnnn | |||||
* private f (6 or 5 bits by element, depending on degree) | |||||
* private g (6 or 5 bits by element, depending on degree) | |||||
* private F (8 bits by element) | |||||
* | |||||
* public key: | |||||
* header byte: 0000nnnn | |||||
* public h (14 bits by element) | |||||
* | |||||
* signature: | |||||
* header byte: 0011nnnn | |||||
* nonce 40 bytes | |||||
* value (12 bits by element) | |||||
* | |||||
* message + signature: | |||||
* signature length (2 bytes, big-endian) | |||||
* nonce 40 bytes | |||||
* message | |||||
* header byte: 0010nnnn | |||||
* value (12 bits by element) | |||||
* (signature length is 1+len(value), not counting the nonce) | |||||
*/ | |||||
/* see api.h */ | |||||
int | |||||
PQCLEAN_FALCON512_CLEAN_crypto_sign_keypair( | |||||
uint8_t *pk, uint8_t *sk) { | |||||
union { | |||||
uint8_t b[FALCON_KEYGEN_TEMP_9]; | |||||
uint64_t dummy_u64; | |||||
fpr dummy_fpr; | |||||
} tmp; | |||||
int8_t f[512], g[512], F[512]; | |||||
uint16_t h[512]; | |||||
unsigned char seed[48]; | |||||
shake256_context rng; | |||||
size_t u, v; | |||||
/* | |||||
* Generate key pair. | |||||
*/ | |||||
randombytes(seed, sizeof seed); | |||||
shake256_init(&rng); | |||||
shake256_inject(&rng, seed, sizeof seed); | |||||
shake256_flip(&rng); | |||||
PQCLEAN_FALCON512_CLEAN_keygen(&rng, f, g, F, NULL, h, 9, tmp.b); | |||||
/* | |||||
* Encode private key. | |||||
*/ | |||||
sk[0] = 0x50 + 9; | |||||
u = 1; | |||||
v = PQCLEAN_FALCON512_CLEAN_trim_i8_encode( | |||||
sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u, | |||||
f, 9, PQCLEAN_FALCON512_CLEAN_max_fg_bits[9]); | |||||
if (v == 0) { | |||||
return -1; | |||||
} | |||||
u += v; | |||||
v = PQCLEAN_FALCON512_CLEAN_trim_i8_encode( | |||||
sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u, | |||||
g, 9, PQCLEAN_FALCON512_CLEAN_max_fg_bits[9]); | |||||
if (v == 0) { | |||||
return -1; | |||||
} | |||||
u += v; | |||||
v = PQCLEAN_FALCON512_CLEAN_trim_i8_encode( | |||||
sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u, | |||||
F, 9, PQCLEAN_FALCON512_CLEAN_max_FG_bits[9]); | |||||
if (v == 0) { | |||||
return -1; | |||||
} | |||||
u += v; | |||||
if (u != PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES) { | |||||
return -1; | |||||
} | |||||
/* | |||||
* Encode public key. | |||||
*/ | |||||
pk[0] = 0x00 + 9; | |||||
v = PQCLEAN_FALCON512_CLEAN_modq_encode( | |||||
pk + 1, PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES - 1, | |||||
h, 9); | |||||
if (v != PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) { | |||||
return -1; | |||||
} | |||||
return 0; | |||||
} | |||||
/* | |||||
* Compute the signature. nonce[] receives the nonce and must have length | |||||
* NONCELEN bytes. sigbuf[] receives the signature value (without nonce | |||||
* or header byte), with *sigbuflen providing the maximum value length and | |||||
* receiving the actual value length. | |||||
* | |||||
* If a signature could be computed but not encoded because it would | |||||
* exceed the output buffer size, then a new signature is computed. If | |||||
* the provided buffer size is too low, this could loop indefinitely, so | |||||
* the caller must provide a size that can accommodate signatures with a | |||||
* large enough probability. | |||||
* | |||||
* Return value: 0 on success, -1 on error. | |||||
*/ | |||||
static int | |||||
do_sign(uint8_t *nonce, uint8_t *sigbuf, size_t *sigbuflen, | |||||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||||
union { | |||||
uint8_t b[72 * 512]; | |||||
uint64_t dummy_u64; | |||||
fpr dummy_fpr; | |||||
} tmp; | |||||
int8_t f[512], g[512], F[512], G[512]; | |||||
union { | |||||
int16_t sig[512]; | |||||
uint16_t hm[512]; | |||||
} r; | |||||
unsigned char seed[48]; | |||||
shake256_context sc; | |||||
size_t u, v; | |||||
/* | |||||
* Decode the private key. | |||||
*/ | |||||
if (sk[0] != 0x50 + 9) { | |||||
return -1; | |||||
} | |||||
u = 1; | |||||
v = PQCLEAN_FALCON512_CLEAN_trim_i8_decode( | |||||
f, 9, PQCLEAN_FALCON512_CLEAN_max_fg_bits[9], | |||||
sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u); | |||||
if (v == 0) { | |||||
return -1; | |||||
} | |||||
u += v; | |||||
v = PQCLEAN_FALCON512_CLEAN_trim_i8_decode( | |||||
g, 9, PQCLEAN_FALCON512_CLEAN_max_fg_bits[9], | |||||
sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u); | |||||
if (v == 0) { | |||||
return -1; | |||||
} | |||||
u += v; | |||||
v = PQCLEAN_FALCON512_CLEAN_trim_i8_decode( | |||||
F, 9, PQCLEAN_FALCON512_CLEAN_max_FG_bits[9], | |||||
sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u); | |||||
if (v == 0) { | |||||
return -1; | |||||
} | |||||
u += v; | |||||
if (u != PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES) { | |||||
return -1; | |||||
} | |||||
if (!PQCLEAN_FALCON512_CLEAN_complete_private(G, f, g, F, 9, tmp.b)) { | |||||
return -1; | |||||
} | |||||
/* | |||||
* Create a random nonce (40 bytes). | |||||
*/ | |||||
randombytes(nonce, NONCELEN); | |||||
/* | |||||
* Hash message nonce + message into a vector. | |||||
*/ | |||||
shake256_init(&sc); | |||||
shake256_inject(&sc, nonce, NONCELEN); | |||||
shake256_inject(&sc, m, mlen); | |||||
shake256_flip(&sc); | |||||
PQCLEAN_FALCON512_CLEAN_hash_to_point(&sc, r.hm, 9, tmp.b); | |||||
/* | |||||
* Initialize a RNG. | |||||
*/ | |||||
randombytes(seed, sizeof seed); | |||||
shake256_init(&sc); | |||||
shake256_inject(&sc, seed, sizeof seed); | |||||
shake256_flip(&sc); | |||||
/* | |||||
* Compute and return the signature. This loops until a signature | |||||
* value is found that fits in the provided buffer. | |||||
*/ | |||||
for (;;) { | |||||
PQCLEAN_FALCON512_CLEAN_sign_dyn(r.sig, &sc, f, g, F, G, r.hm, 9, tmp.b); | |||||
v = PQCLEAN_FALCON512_CLEAN_comp_encode(sigbuf, *sigbuflen, r.sig, 9); | |||||
if (v != 0) { | |||||
*sigbuflen = v; | |||||
return 0; | |||||
} | |||||
} | |||||
} | |||||
/* | |||||
* Verify a sigature. The nonce has size NONCELEN bytes. sigbuf[] | |||||
* (of size sigbuflen) contains the signature value, not including the | |||||
* header byte or nonce. Return value is 0 on success, -1 on error. | |||||
*/ | |||||
static int | |||||
do_verify( | |||||
const uint8_t *nonce, const uint8_t *sigbuf, size_t sigbuflen, | |||||
const uint8_t *m, size_t mlen, const uint8_t *pk) { | |||||
union { | |||||
uint8_t b[2 * 512]; | |||||
uint64_t dummy_u64; | |||||
fpr dummy_fpr; | |||||
} tmp; | |||||
uint16_t h[512], hm[512]; | |||||
int16_t sig[512]; | |||||
shake256_context sc; | |||||
/* | |||||
* Decode public key. | |||||
*/ | |||||
if (pk[0] != 0x00 + 9) { | |||||
return -1; | |||||
} | |||||
if (PQCLEAN_FALCON512_CLEAN_modq_decode(h, 9, | |||||
pk + 1, PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) | |||||
!= PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) { | |||||
return -1; | |||||
} | |||||
PQCLEAN_FALCON512_CLEAN_to_ntt_monty(h, 9); | |||||
/* | |||||
* Decode signature. | |||||
*/ | |||||
if (sigbuflen == 0) { | |||||
return -1; | |||||
} | |||||
if (PQCLEAN_FALCON512_CLEAN_comp_decode(sig, 9, sigbuf, sigbuflen) != sigbuflen) { | |||||
return -1; | |||||
} | |||||
/* | |||||
* Hash nonce + message into a vector. | |||||
*/ | |||||
shake256_init(&sc); | |||||
shake256_inject(&sc, nonce, NONCELEN); | |||||
shake256_inject(&sc, m, mlen); | |||||
shake256_flip(&sc); | |||||
PQCLEAN_FALCON512_CLEAN_hash_to_point(&sc, hm, 9, tmp.b); | |||||
/* | |||||
* Verify signature. | |||||
*/ | |||||
if (!PQCLEAN_FALCON512_CLEAN_verify_raw(hm, sig, h, 9, tmp.b)) { | |||||
return -1; | |||||
} | |||||
return 0; | |||||
} | |||||
/* see api.h */ | |||||
int | |||||
PQCLEAN_FALCON512_CLEAN_crypto_sign_signature( | |||||
uint8_t *sig, size_t *siglen, | |||||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||||
/* | |||||
* The PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES constant is used for | |||||
* the signed message object (as produced by crypto_sign()) | |||||
* and includes a two-byte length value, so we take care here | |||||
* to only generate signatures that are two bytes shorter than | |||||
* the maximum. This is done to ensure that crypto_sign() | |||||
* and crypto_sign_signature() produce the exact same signature | |||||
* value, if used on the same message, with the same private key, | |||||
* and using the same output from randombytes() (this is for | |||||
* reproducibility of tests). | |||||
*/ | |||||
size_t vlen; | |||||
vlen = PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES - NONCELEN - 3; | |||||
if (do_sign(sig + 1, sig + 1 + NONCELEN, &vlen, m, mlen, sk) < 0) { | |||||
return -1; | |||||
} | |||||
sig[0] = 0x30 + 9; | |||||
*siglen = 1 + NONCELEN + vlen; | |||||
return 0; | |||||
} | |||||
/* see api.h */ | |||||
int | |||||
PQCLEAN_FALCON512_CLEAN_crypto_sign_verify( | |||||
const uint8_t *sig, size_t siglen, | |||||
const uint8_t *m, size_t mlen, const uint8_t *pk) { | |||||
if (siglen < 1 + NONCELEN) { | |||||
return -1; | |||||
} | |||||
if (sig[0] != 0x30 + 9) { | |||||
return -1; | |||||
} | |||||
return do_verify(sig + 1, | |||||
sig + 1 + NONCELEN, siglen - 1 - NONCELEN, m, mlen, pk); | |||||
} | |||||
/* see api.h */ | |||||
int | |||||
PQCLEAN_FALCON512_CLEAN_crypto_sign( | |||||
uint8_t *sm, size_t *smlen, | |||||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||||
uint8_t *pm, *sigbuf; | |||||
size_t sigbuflen; | |||||
/* | |||||
* Move the message to its final location; this is a memmove() so | |||||
* it handles overlaps properly. | |||||
*/ | |||||
memmove(sm + 2 + NONCELEN, m, mlen); | |||||
pm = sm + 2 + NONCELEN; | |||||
sigbuf = pm + 1 + mlen; | |||||
sigbuflen = PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES - NONCELEN - 3; | |||||
if (do_sign(sm + 2, sigbuf, &sigbuflen, pm, mlen, sk) < 0) { | |||||
return -1; | |||||
} | |||||
pm[mlen] = 0x20 + 9; | |||||
sigbuflen ++; | |||||
sm[0] = (uint8_t)(sigbuflen >> 8); | |||||
sm[1] = (uint8_t)sigbuflen; | |||||
*smlen = mlen + 2 + NONCELEN + sigbuflen; | |||||
return 0; | |||||
} | |||||
/* see api.h */ | |||||
int | |||||
PQCLEAN_FALCON512_CLEAN_crypto_sign_open( | |||||
uint8_t *m, size_t *mlen, | |||||
const uint8_t *sm, size_t smlen, const uint8_t *pk) { | |||||
const uint8_t *sigbuf; | |||||
size_t pmlen, sigbuflen; | |||||
if (smlen < 3 + NONCELEN) { | |||||
return -1; | |||||
} | |||||
sigbuflen = ((size_t)sm[0] << 8) | (size_t)sm[1]; | |||||
if (sigbuflen < 2 || sigbuflen > (smlen - NONCELEN - 2)) { | |||||
return -1; | |||||
} | |||||
sigbuflen --; | |||||
pmlen = smlen - NONCELEN - 3 - sigbuflen; | |||||
if (sm[2 + NONCELEN + pmlen] != 0x20 + 9) { | |||||
return -1; | |||||
} | |||||
sigbuf = sm + 2 + NONCELEN + pmlen + 1; | |||||
/* | |||||
* The 2-byte length header and the one-byte signature header | |||||
* have been verified. Nonce is at sm+2, followed by the message | |||||
* itself. Message length is in pmlen. sigbuf/sigbuflen point to | |||||
* the signature value (excluding the header byte). | |||||
*/ | |||||
if (do_verify(sm + 2, sigbuf, sigbuflen, | |||||
sm + 2 + NONCELEN, pmlen, pk) < 0) { | |||||
return -1; | |||||
} | |||||
/* | |||||
* Signature is correct, we just have to copy/move the message | |||||
* to its final destination. The memmove() properly handles | |||||
* overlaps. | |||||
*/ | |||||
memmove(m, sm + 2 + NONCELEN, pmlen); | |||||
*mlen = pmlen; | |||||
return 0; | |||||
} |
@@ -0,0 +1,187 @@ | |||||
/* | |||||
* PRNG and interface to the system RNG. | |||||
* | |||||
* ==========================(LICENSE BEGIN)============================ | |||||
* | |||||
* Copyright (c) 2017-2019 Falcon Project | |||||
* | |||||
* Permission is hereby granted, free of charge, to any person obtaining | |||||
* a copy of this software and associated documentation files (the | |||||
* "Software"), to deal in the Software without restriction, including | |||||
* without limitation the rights to use, copy, modify, merge, publish, | |||||
* distribute, sublicense, and/or sell copies of the Software, and to | |||||
* permit persons to whom the Software is furnished to do so, subject to | |||||
* the following conditions: | |||||
* | |||||
* The above copyright notice and this permission notice shall be | |||||
* included in all copies or substantial portions of the Software. | |||||
* | |||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||||
* | |||||
* ===========================(LICENSE END)============================= | |||||
* | |||||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||||
*/ | |||||
#include <assert.h> | |||||
#include "inner.h" | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_prng_init(prng *p, shake256_context *src) { | |||||
/* | |||||
* To ensure reproducibility for a given seed, we | |||||
* must enforce little-endian interpretation of | |||||
* the state words. | |||||
*/ | |||||
unsigned char tmp[56]; | |||||
uint64_t th, tl; | |||||
int i; | |||||
shake256_extract(src, tmp, 56); | |||||
for (i = 0; i < 14; i ++) { | |||||
uint32_t w; | |||||
w = (uint32_t)tmp[(i << 2) + 0] | |||||
| ((uint32_t)tmp[(i << 2) + 1] << 8) | |||||
| ((uint32_t)tmp[(i << 2) + 2] << 16) | |||||
| ((uint32_t)tmp[(i << 2) + 3] << 24); | |||||
*(uint32_t *)(p->state.d + (i << 2)) = w; | |||||
} | |||||
tl = *(uint32_t *)(p->state.d + 48); | |||||
th = *(uint32_t *)(p->state.d + 52); | |||||
*(uint64_t *)(p->state.d + 48) = tl + (th << 32); | |||||
PQCLEAN_FALCON512_CLEAN_prng_refill(p); | |||||
} | |||||
/* | |||||
* PRNG based on ChaCha20. | |||||
* | |||||
* State consists in key (32 bytes) then IV (16 bytes) and block counter | |||||
* (8 bytes). Normally, we should not care about local endianness (this | |||||
* is for a PRNG), but for the NIST competition we need reproducible KAT | |||||
* vectors that work across architectures, so we enforce little-endian | |||||
* interpretation where applicable. Moreover, output words are "spread | |||||
* out" over the output buffer with the interleaving pattern that is | |||||
* naturally obtained from the AVX2 implementation that runs eight | |||||
* ChaCha20 instances in parallel. | |||||
* | |||||
* The block counter is XORed into the first 8 bytes of the IV. | |||||
*/ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_prng_refill(prng *p) { | |||||
static const uint32_t CW[] = { | |||||
0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 | |||||
}; | |||||
uint64_t cc; | |||||
size_t u; | |||||
/* | |||||
* State uses local endianness. Only the output bytes must be | |||||
* converted to little endian (if used on a big-endian machine). | |||||
*/ | |||||
cc = *(uint64_t *)(p->state.d + 48); | |||||
for (u = 0; u < 8; u ++) { | |||||
uint32_t state[16]; | |||||
size_t v; | |||||
int i; | |||||
memcpy(&state[0], CW, sizeof CW); | |||||
memcpy(&state[4], p->state.d, 48); | |||||
state[14] ^= (uint32_t)cc; | |||||
state[15] ^= (uint32_t)(cc >> 32); | |||||
for (i = 0; i < 10; i ++) { | |||||
#define QROUND(a, b, c, d) do { \ | |||||
state[a] += state[b]; \ | |||||
state[d] ^= state[a]; \ | |||||
state[d] = (state[d] << 16) | (state[d] >> 16); \ | |||||
state[c] += state[d]; \ | |||||
state[b] ^= state[c]; \ | |||||
state[b] = (state[b] << 12) | (state[b] >> 20); \ | |||||
state[a] += state[b]; \ | |||||
state[d] ^= state[a]; \ | |||||
state[d] = (state[d] << 8) | (state[d] >> 24); \ | |||||
state[c] += state[d]; \ | |||||
state[b] ^= state[c]; \ | |||||
state[b] = (state[b] << 7) | (state[b] >> 25); \ | |||||
} while (0) | |||||
QROUND( 0, 4, 8, 12); | |||||
QROUND( 1, 5, 9, 13); | |||||
QROUND( 2, 6, 10, 14); | |||||
QROUND( 3, 7, 11, 15); | |||||
QROUND( 0, 5, 10, 15); | |||||
QROUND( 1, 6, 11, 12); | |||||
QROUND( 2, 7, 8, 13); | |||||
QROUND( 3, 4, 9, 14); | |||||
#undef QROUND | |||||
} | |||||
for (v = 0; v < 4; v ++) { | |||||
state[v] += CW[v]; | |||||
} | |||||
for (v = 4; v < 14; v ++) { | |||||
state[v] += ((uint32_t *)p->state.d)[v - 4]; | |||||
} | |||||
state[14] += ((uint32_t *)p->state.d)[10] | |||||
^ (uint32_t)cc; | |||||
state[15] += ((uint32_t *)p->state.d)[11] | |||||
^ (uint32_t)(cc >> 32); | |||||
cc ++; | |||||
/* | |||||
* We mimic the interleaving that is used in the AVX2 | |||||
* implementation. | |||||
*/ | |||||
for (v = 0; v < 16; v ++) { | |||||
p->buf.d[(u << 2) + (v << 5) + 0] = | |||||
(unsigned char)state[v]; | |||||
p->buf.d[(u << 2) + (v << 5) + 1] = | |||||
(unsigned char)(state[v] >> 8); | |||||
p->buf.d[(u << 2) + (v << 5) + 2] = | |||||
(unsigned char)(state[v] >> 16); | |||||
p->buf.d[(u << 2) + (v << 5) + 3] = | |||||
(unsigned char)(state[v] >> 24); | |||||
} | |||||
} | |||||
*(uint64_t *)(p->state.d + 48) = cc; | |||||
p->ptr = 0; | |||||
} | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len) { | |||||
unsigned char *buf; | |||||
buf = dst; | |||||
while (len > 0) { | |||||
size_t clen; | |||||
clen = (sizeof p->buf.d) - p->ptr; | |||||
if (clen > len) { | |||||
clen = len; | |||||
} | |||||
memcpy(buf, p->buf.d, clen); | |||||
buf += clen; | |||||
len -= clen; | |||||
p->ptr += clen; | |||||
if (p->ptr == sizeof p->buf.d) { | |||||
PQCLEAN_FALCON512_CLEAN_prng_refill(p); | |||||
} | |||||
} | |||||
} |
@@ -0,0 +1,745 @@ | |||||
/* | |||||
* Falcon signature verification. | |||||
* | |||||
* ==========================(LICENSE BEGIN)============================ | |||||
* | |||||
* Copyright (c) 2017-2019 Falcon Project | |||||
* | |||||
* Permission is hereby granted, free of charge, to any person obtaining | |||||
* a copy of this software and associated documentation files (the | |||||
* "Software"), to deal in the Software without restriction, including | |||||
* without limitation the rights to use, copy, modify, merge, publish, | |||||
* distribute, sublicense, and/or sell copies of the Software, and to | |||||
* permit persons to whom the Software is furnished to do so, subject to | |||||
* the following conditions: | |||||
* | |||||
* The above copyright notice and this permission notice shall be | |||||
* included in all copies or substantial portions of the Software. | |||||
* | |||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||||
* | |||||
* ===========================(LICENSE END)============================= | |||||
* | |||||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||||
*/ | |||||
#include "inner.h" | |||||
/* ===================================================================== */ | |||||
/* | |||||
* Constants for NTT. | |||||
* | |||||
* n = 2^logn (2 <= n <= 1024) | |||||
* phi = X^n + 1 | |||||
* q = 12289 | |||||
* q0i = -1/q mod 2^16 | |||||
* R = 2^16 mod q | |||||
* R2 = 2^32 mod q | |||||
*/ | |||||
#define Q 12289 | |||||
#define Q0I 12287 | |||||
#define R 4091 | |||||
#define R2 10952 | |||||
/* | |||||
* Table for NTT, binary case: | |||||
* GMb[x] = R*(g^rev(x)) mod q | |||||
* where g = 7 (it is a 2048-th primitive root of 1 modulo q) | |||||
* and rev() is the bit-reversal function over 10 bits. | |||||
*/ | |||||
static const uint16_t GMb[] = { | |||||
4091, 7888, 11060, 11208, 6960, 4342, 6275, 9759, | |||||
1591, 6399, 9477, 5266, 586, 5825, 7538, 9710, | |||||
1134, 6407, 1711, 965, 7099, 7674, 3743, 6442, | |||||
10414, 8100, 1885, 1688, 1364, 10329, 10164, 9180, | |||||
12210, 6240, 997, 117, 4783, 4407, 1549, 7072, | |||||
2829, 6458, 4431, 8877, 7144, 2564, 5664, 4042, | |||||
12189, 432, 10751, 1237, 7610, 1534, 3983, 7863, | |||||
2181, 6308, 8720, 6570, 4843, 1690, 14, 3872, | |||||
5569, 9368, 12163, 2019, 7543, 2315, 4673, 7340, | |||||
1553, 1156, 8401, 11389, 1020, 2967, 10772, 7045, | |||||
3316, 11236, 5285, 11578, 10637, 10086, 9493, 6180, | |||||
9277, 6130, 3323, 883, 10469, 489, 1502, 2851, | |||||
11061, 9729, 2742, 12241, 4970, 10481, 10078, 1195, | |||||
730, 1762, 3854, 2030, 5892, 10922, 9020, 5274, | |||||
9179, 3604, 3782, 10206, 3180, 3467, 4668, 2446, | |||||
7613, 9386, 834, 7703, 6836, 3403, 5351, 12276, | |||||
3580, 1739, 10820, 9787, 10209, 4070, 12250, 8525, | |||||
10401, 2749, 7338, 10574, 6040, 943, 9330, 1477, | |||||
6865, 9668, 3585, 6633, 12145, 4063, 3684, 7680, | |||||
8188, 6902, 3533, 9807, 6090, 727, 10099, 7003, | |||||
6945, 1949, 9731, 10559, 6057, 378, 7871, 8763, | |||||
8901, 9229, 8846, 4551, 9589, 11664, 7630, 8821, | |||||
5680, 4956, 6251, 8388, 10156, 8723, 2341, 3159, | |||||
1467, 5460, 8553, 7783, 2649, 2320, 9036, 6188, | |||||
737, 3698, 4699, 5753, 9046, 3687, 16, 914, | |||||
5186, 10531, 4552, 1964, 3509, 8436, 7516, 5381, | |||||
10733, 3281, 7037, 1060, 2895, 7156, 8887, 5357, | |||||
6409, 8197, 2962, 6375, 5064, 6634, 5625, 278, | |||||
932, 10229, 8927, 7642, 351, 9298, 237, 5858, | |||||
7692, 3146, 12126, 7586, 2053, 11285, 3802, 5204, | |||||
4602, 1748, 11300, 340, 3711, 4614, 300, 10993, | |||||
5070, 10049, 11616, 12247, 7421, 10707, 5746, 5654, | |||||
3835, 5553, 1224, 8476, 9237, 3845, 250, 11209, | |||||
4225, 6326, 9680, 12254, 4136, 2778, 692, 8808, | |||||
6410, 6718, 10105, 10418, 3759, 7356, 11361, 8433, | |||||
6437, 3652, 6342, 8978, 5391, 2272, 6476, 7416, | |||||
8418, 10824, 11986, 5733, 876, 7030, 2167, 2436, | |||||
3442, 9217, 8206, 4858, 5964, 2746, 7178, 1434, | |||||
7389, 8879, 10661, 11457, 4220, 1432, 10832, 4328, | |||||
8557, 1867, 9454, 2416, 3816, 9076, 686, 5393, | |||||
2523, 4339, 6115, 619, 937, 2834, 7775, 3279, | |||||
2363, 7488, 6112, 5056, 824, 10204, 11690, 1113, | |||||
2727, 9848, 896, 2028, 5075, 2654, 10464, 7884, | |||||
12169, 5434, 3070, 6400, 9132, 11672, 12153, 4520, | |||||
1273, 9739, 11468, 9937, 10039, 9720, 2262, 9399, | |||||
11192, 315, 4511, 1158, 6061, 6751, 11865, 357, | |||||
7367, 4550, 983, 8534, 8352, 10126, 7530, 9253, | |||||
4367, 5221, 3999, 8777, 3161, 6990, 4130, 11652, | |||||
3374, 11477, 1753, 292, 8681, 2806, 10378, 12188, | |||||
5800, 11811, 3181, 1988, 1024, 9340, 2477, 10928, | |||||
4582, 6750, 3619, 5503, 5233, 2463, 8470, 7650, | |||||
7964, 6395, 1071, 1272, 3474, 11045, 3291, 11344, | |||||
8502, 9478, 9837, 1253, 1857, 6233, 4720, 11561, | |||||
6034, 9817, 3339, 1797, 2879, 6242, 5200, 2114, | |||||
7962, 9353, 11363, 5475, 6084, 9601, 4108, 7323, | |||||
10438, 9471, 1271, 408, 6911, 3079, 360, 8276, | |||||
11535, 9156, 9049, 11539, 850, 8617, 784, 7919, | |||||
8334, 12170, 1846, 10213, 12184, 7827, 11903, 5600, | |||||
9779, 1012, 721, 2784, 6676, 6552, 5348, 4424, | |||||
6816, 8405, 9959, 5150, 2356, 5552, 5267, 1333, | |||||
8801, 9661, 7308, 5788, 4910, 909, 11613, 4395, | |||||
8238, 6686, 4302, 3044, 2285, 12249, 1963, 9216, | |||||
4296, 11918, 695, 4371, 9793, 4884, 2411, 10230, | |||||
2650, 841, 3890, 10231, 7248, 8505, 11196, 6688, | |||||
4059, 6060, 3686, 4722, 11853, 5816, 7058, 6868, | |||||
11137, 7926, 4894, 12284, 4102, 3908, 3610, 6525, | |||||
7938, 7982, 11977, 6755, 537, 4562, 1623, 8227, | |||||
11453, 7544, 906, 11816, 9548, 10858, 9703, 2815, | |||||
11736, 6813, 6979, 819, 8903, 6271, 10843, 348, | |||||
7514, 8339, 6439, 694, 852, 5659, 2781, 3716, | |||||
11589, 3024, 1523, 8659, 4114, 10738, 3303, 5885, | |||||
2978, 7289, 11884, 9123, 9323, 11830, 98, 2526, | |||||
2116, 4131, 11407, 1844, 3645, 3916, 8133, 2224, | |||||
10871, 8092, 9651, 5989, 7140, 8480, 1670, 159, | |||||
10923, 4918, 128, 7312, 725, 9157, 5006, 6393, | |||||
3494, 6043, 10972, 6181, 11838, 3423, 10514, 7668, | |||||
3693, 6658, 6905, 11953, 10212, 11922, 9101, 8365, | |||||
5110, 45, 2400, 1921, 4377, 2720, 1695, 51, | |||||
2808, 650, 1896, 9997, 9971, 11980, 8098, 4833, | |||||
4135, 4257, 5838, 4765, 10985, 11532, 590, 12198, | |||||
482, 12173, 2006, 7064, 10018, 3912, 12016, 10519, | |||||
11362, 6954, 2210, 284, 5413, 6601, 3865, 10339, | |||||
11188, 6231, 517, 9564, 11281, 3863, 1210, 4604, | |||||
8160, 11447, 153, 7204, 5763, 5089, 9248, 12154, | |||||
11748, 1354, 6672, 179, 5532, 2646, 5941, 12185, | |||||
862, 3158, 477, 7279, 5678, 7914, 4254, 302, | |||||
2893, 10114, 6890, 9560, 9647, 11905, 4098, 9824, | |||||
10269, 1353, 10715, 5325, 6254, 3951, 1807, 6449, | |||||
5159, 1308, 8315, 3404, 1877, 1231, 112, 6398, | |||||
11724, 12272, 7286, 1459, 12274, 9896, 3456, 800, | |||||
1397, 10678, 103, 7420, 7976, 936, 764, 632, | |||||
7996, 8223, 8445, 7758, 10870, 9571, 2508, 1946, | |||||
6524, 10158, 1044, 4338, 2457, 3641, 1659, 4139, | |||||
4688, 9733, 11148, 3946, 2082, 5261, 2036, 11850, | |||||
7636, 12236, 5366, 2380, 1399, 7720, 2100, 3217, | |||||
10912, 8898, 7578, 11995, 2791, 1215, 3355, 2711, | |||||
2267, 2004, 8568, 10176, 3214, 2337, 1750, 4729, | |||||
4997, 7415, 6315, 12044, 4374, 7157, 4844, 211, | |||||
8003, 10159, 9290, 11481, 1735, 2336, 5793, 9875, | |||||
8192, 986, 7527, 1401, 870, 3615, 8465, 2756, | |||||
9770, 2034, 10168, 3264, 6132, 54, 2880, 4763, | |||||
11805, 3074, 8286, 9428, 4881, 6933, 1090, 10038, | |||||
2567, 708, 893, 6465, 4962, 10024, 2090, 5718, | |||||
10743, 780, 4733, 4623, 2134, 2087, 4802, 884, | |||||
5372, 5795, 5938, 4333, 6559, 7549, 5269, 10664, | |||||
4252, 3260, 5917, 10814, 5768, 9983, 8096, 7791, | |||||
6800, 7491, 6272, 1907, 10947, 6289, 11803, 6032, | |||||
11449, 1171, 9201, 7933, 2479, 7970, 11337, 7062, | |||||
8911, 6728, 6542, 8114, 8828, 6595, 3545, 4348, | |||||
4610, 2205, 6999, 8106, 5560, 10390, 9321, 2499, | |||||
2413, 7272, 6881, 10582, 9308, 9437, 3554, 3326, | |||||
5991, 11969, 3415, 12283, 9838, 12063, 4332, 7830, | |||||
11329, 6605, 12271, 2044, 11611, 7353, 11201, 11582, | |||||
3733, 8943, 9978, 1627, 7168, 3935, 5050, 2762, | |||||
7496, 10383, 755, 1654, 12053, 4952, 10134, 4394, | |||||
6592, 7898, 7497, 8904, 12029, 3581, 10748, 5674, | |||||
10358, 4901, 7414, 8771, 710, 6764, 8462, 7193, | |||||
5371, 7274, 11084, 290, 7864, 6827, 11822, 2509, | |||||
6578, 4026, 5807, 1458, 5721, 5762, 4178, 2105, | |||||
11621, 4852, 8897, 2856, 11510, 9264, 2520, 8776, | |||||
7011, 2647, 1898, 7039, 5950, 11163, 5488, 6277, | |||||
9182, 11456, 633, 10046, 11554, 5633, 9587, 2333, | |||||
7008, 7084, 5047, 7199, 9865, 8997, 569, 6390, | |||||
10845, 9679, 8268, 11472, 4203, 1997, 2, 9331, | |||||
162, 6182, 2000, 3649, 9792, 6363, 7557, 6187, | |||||
8510, 9935, 5536, 9019, 3706, 12009, 1452, 3067, | |||||
5494, 9692, 4865, 6019, 7106, 9610, 4588, 10165, | |||||
6261, 5887, 2652, 10172, 1580, 10379, 4638, 9949 | |||||
}; | |||||
/* | |||||
* Table for inverse NTT, binary case: | |||||
* iGMb[x] = R*((1/g)^rev(x)) mod q | |||||
* Since g = 7, 1/g = 8778 mod 12289. | |||||
*/ | |||||
static const uint16_t iGMb[] = { | |||||
4091, 4401, 1081, 1229, 2530, 6014, 7947, 5329, | |||||
2579, 4751, 6464, 11703, 7023, 2812, 5890, 10698, | |||||
3109, 2125, 1960, 10925, 10601, 10404, 4189, 1875, | |||||
5847, 8546, 4615, 5190, 11324, 10578, 5882, 11155, | |||||
8417, 12275, 10599, 7446, 5719, 3569, 5981, 10108, | |||||
4426, 8306, 10755, 4679, 11052, 1538, 11857, 100, | |||||
8247, 6625, 9725, 5145, 3412, 7858, 5831, 9460, | |||||
5217, 10740, 7882, 7506, 12172, 11292, 6049, 79, | |||||
13, 6938, 8886, 5453, 4586, 11455, 2903, 4676, | |||||
9843, 7621, 8822, 9109, 2083, 8507, 8685, 3110, | |||||
7015, 3269, 1367, 6397, 10259, 8435, 10527, 11559, | |||||
11094, 2211, 1808, 7319, 48, 9547, 2560, 1228, | |||||
9438, 10787, 11800, 1820, 11406, 8966, 6159, 3012, | |||||
6109, 2796, 2203, 1652, 711, 7004, 1053, 8973, | |||||
5244, 1517, 9322, 11269, 900, 3888, 11133, 10736, | |||||
4949, 7616, 9974, 4746, 10270, 126, 2921, 6720, | |||||
6635, 6543, 1582, 4868, 42, 673, 2240, 7219, | |||||
1296, 11989, 7675, 8578, 11949, 989, 10541, 7687, | |||||
7085, 8487, 1004, 10236, 4703, 163, 9143, 4597, | |||||
6431, 12052, 2991, 11938, 4647, 3362, 2060, 11357, | |||||
12011, 6664, 5655, 7225, 5914, 9327, 4092, 5880, | |||||
6932, 3402, 5133, 9394, 11229, 5252, 9008, 1556, | |||||
6908, 4773, 3853, 8780, 10325, 7737, 1758, 7103, | |||||
11375, 12273, 8602, 3243, 6536, 7590, 8591, 11552, | |||||
6101, 3253, 9969, 9640, 4506, 3736, 6829, 10822, | |||||
9130, 9948, 3566, 2133, 3901, 6038, 7333, 6609, | |||||
3468, 4659, 625, 2700, 7738, 3443, 3060, 3388, | |||||
3526, 4418, 11911, 6232, 1730, 2558, 10340, 5344, | |||||
5286, 2190, 11562, 6199, 2482, 8756, 5387, 4101, | |||||
4609, 8605, 8226, 144, 5656, 8704, 2621, 5424, | |||||
10812, 2959, 11346, 6249, 1715, 4951, 9540, 1888, | |||||
3764, 39, 8219, 2080, 2502, 1469, 10550, 8709, | |||||
5601, 1093, 3784, 5041, 2058, 8399, 11448, 9639, | |||||
2059, 9878, 7405, 2496, 7918, 11594, 371, 7993, | |||||
3073, 10326, 40, 10004, 9245, 7987, 5603, 4051, | |||||
7894, 676, 11380, 7379, 6501, 4981, 2628, 3488, | |||||
10956, 7022, 6737, 9933, 7139, 2330, 3884, 5473, | |||||
7865, 6941, 5737, 5613, 9505, 11568, 11277, 2510, | |||||
6689, 386, 4462, 105, 2076, 10443, 119, 3955, | |||||
4370, 11505, 3672, 11439, 750, 3240, 3133, 754, | |||||
4013, 11929, 9210, 5378, 11881, 11018, 2818, 1851, | |||||
4966, 8181, 2688, 6205, 6814, 926, 2936, 4327, | |||||
10175, 7089, 6047, 9410, 10492, 8950, 2472, 6255, | |||||
728, 7569, 6056, 10432, 11036, 2452, 2811, 3787, | |||||
945, 8998, 1244, 8815, 11017, 11218, 5894, 4325, | |||||
4639, 3819, 9826, 7056, 6786, 8670, 5539, 7707, | |||||
1361, 9812, 2949, 11265, 10301, 9108, 478, 6489, | |||||
101, 1911, 9483, 3608, 11997, 10536, 812, 8915, | |||||
637, 8159, 5299, 9128, 3512, 8290, 7068, 7922, | |||||
3036, 4759, 2163, 3937, 3755, 11306, 7739, 4922, | |||||
11932, 424, 5538, 6228, 11131, 7778, 11974, 1097, | |||||
2890, 10027, 2569, 2250, 2352, 821, 2550, 11016, | |||||
7769, 136, 617, 3157, 5889, 9219, 6855, 120, | |||||
4405, 1825, 9635, 7214, 10261, 11393, 2441, 9562, | |||||
11176, 599, 2085, 11465, 7233, 6177, 4801, 9926, | |||||
9010, 4514, 9455, 11352, 11670, 6174, 7950, 9766, | |||||
6896, 11603, 3213, 8473, 9873, 2835, 10422, 3732, | |||||
7961, 1457, 10857, 8069, 832, 1628, 3410, 4900, | |||||
10855, 5111, 9543, 6325, 7431, 4083, 3072, 8847, | |||||
9853, 10122, 5259, 11413, 6556, 303, 1465, 3871, | |||||
4873, 5813, 10017, 6898, 3311, 5947, 8637, 5852, | |||||
3856, 928, 4933, 8530, 1871, 2184, 5571, 5879, | |||||
3481, 11597, 9511, 8153, 35, 2609, 5963, 8064, | |||||
1080, 12039, 8444, 3052, 3813, 11065, 6736, 8454, | |||||
2340, 7651, 1910, 10709, 2117, 9637, 6402, 6028, | |||||
2124, 7701, 2679, 5183, 6270, 7424, 2597, 6795, | |||||
9222, 10837, 280, 8583, 3270, 6753, 2354, 3779, | |||||
6102, 4732, 5926, 2497, 8640, 10289, 6107, 12127, | |||||
2958, 12287, 10292, 8086, 817, 4021, 2610, 1444, | |||||
5899, 11720, 3292, 2424, 5090, 7242, 5205, 5281, | |||||
9956, 2702, 6656, 735, 2243, 11656, 833, 3107, | |||||
6012, 6801, 1126, 6339, 5250, 10391, 9642, 5278, | |||||
3513, 9769, 3025, 779, 9433, 3392, 7437, 668, | |||||
10184, 8111, 6527, 6568, 10831, 6482, 8263, 5711, | |||||
9780, 467, 5462, 4425, 11999, 1205, 5015, 6918, | |||||
5096, 3827, 5525, 11579, 3518, 4875, 7388, 1931, | |||||
6615, 1541, 8708, 260, 3385, 4792, 4391, 5697, | |||||
7895, 2155, 7337, 236, 10635, 11534, 1906, 4793, | |||||
9527, 7239, 8354, 5121, 10662, 2311, 3346, 8556, | |||||
707, 1088, 4936, 678, 10245, 18, 5684, 960, | |||||
4459, 7957, 226, 2451, 6, 8874, 320, 6298, | |||||
8963, 8735, 2852, 2981, 1707, 5408, 5017, 9876, | |||||
9790, 2968, 1899, 6729, 4183, 5290, 10084, 7679, | |||||
7941, 8744, 5694, 3461, 4175, 5747, 5561, 3378, | |||||
5227, 952, 4319, 9810, 4356, 3088, 11118, 840, | |||||
6257, 486, 6000, 1342, 10382, 6017, 4798, 5489, | |||||
4498, 4193, 2306, 6521, 1475, 6372, 9029, 8037, | |||||
1625, 7020, 4740, 5730, 7956, 6351, 6494, 6917, | |||||
11405, 7487, 10202, 10155, 7666, 7556, 11509, 1546, | |||||
6571, 10199, 2265, 7327, 5824, 11396, 11581, 9722, | |||||
2251, 11199, 5356, 7408, 2861, 4003, 9215, 484, | |||||
7526, 9409, 12235, 6157, 9025, 2121, 10255, 2519, | |||||
9533, 3824, 8674, 11419, 10888, 4762, 11303, 4097, | |||||
2414, 6496, 9953, 10554, 808, 2999, 2130, 4286, | |||||
12078, 7445, 5132, 7915, 245, 5974, 4874, 7292, | |||||
7560, 10539, 9952, 9075, 2113, 3721, 10285, 10022, | |||||
9578, 8934, 11074, 9498, 294, 4711, 3391, 1377, | |||||
9072, 10189, 4569, 10890, 9909, 6923, 53, 4653, | |||||
439, 10253, 7028, 10207, 8343, 1141, 2556, 7601, | |||||
8150, 10630, 8648, 9832, 7951, 11245, 2131, 5765, | |||||
10343, 9781, 2718, 1419, 4531, 3844, 4066, 4293, | |||||
11657, 11525, 11353, 4313, 4869, 12186, 1611, 10892, | |||||
11489, 8833, 2393, 15, 10830, 5003, 17, 565, | |||||
5891, 12177, 11058, 10412, 8885, 3974, 10981, 7130, | |||||
5840, 10482, 8338, 6035, 6964, 1574, 10936, 2020, | |||||
2465, 8191, 384, 2642, 2729, 5399, 2175, 9396, | |||||
11987, 8035, 4375, 6611, 5010, 11812, 9131, 11427, | |||||
104, 6348, 9643, 6757, 12110, 5617, 10935, 541, | |||||
135, 3041, 7200, 6526, 5085, 12136, 842, 4129, | |||||
7685, 11079, 8426, 1008, 2725, 11772, 6058, 1101, | |||||
1950, 8424, 5688, 6876, 12005, 10079, 5335, 927, | |||||
1770, 273, 8377, 2271, 5225, 10283, 116, 11807, | |||||
91, 11699, 757, 1304, 7524, 6451, 8032, 8154, | |||||
7456, 4191, 309, 2318, 2292, 10393, 11639, 9481, | |||||
12238, 10594, 9569, 7912, 10368, 9889, 12244, 7179, | |||||
3924, 3188, 367, 2077, 336, 5384, 5631, 8596, | |||||
4621, 1775, 8866, 451, 6108, 1317, 6246, 8795, | |||||
5896, 7283, 3132, 11564, 4977, 12161, 7371, 1366, | |||||
12130, 10619, 3809, 5149, 6300, 2638, 4197, 1418, | |||||
10065, 4156, 8373, 8644, 10445, 882, 8158, 10173, | |||||
9763, 12191, 459, 2966, 3166, 405, 5000, 9311, | |||||
6404, 8986, 1551, 8175, 3630, 10766, 9265, 700, | |||||
8573, 9508, 6630, 11437, 11595, 5850, 3950, 4775, | |||||
11941, 1446, 6018, 3386, 11470, 5310, 5476, 553, | |||||
9474, 2586, 1431, 2741, 473, 11383, 4745, 836, | |||||
4062, 10666, 7727, 11752, 5534, 312, 4307, 4351, | |||||
5764, 8679, 8381, 8187, 5, 7395, 4363, 1152, | |||||
5421, 5231, 6473, 436, 7567, 8603, 6229, 8230 | |||||
}; | |||||
/* | |||||
* Reduce a small signed integer modulo q. The source integer MUST | |||||
* be between -q/2 and +q/2. | |||||
*/ | |||||
static inline uint32_t | |||||
mq_conv_small(int x) { | |||||
/* | |||||
* If x < 0, the cast to uint32_t will set the high bit to 1. | |||||
*/ | |||||
uint32_t y; | |||||
y = (uint32_t)x; | |||||
y += Q & -(y >> 31); | |||||
return y; | |||||
} | |||||
/* | |||||
* Addition modulo q. Operands must be in the 0..q-1 range. | |||||
*/ | |||||
static inline uint32_t | |||||
mq_add(uint32_t x, uint32_t y) { | |||||
/* | |||||
* We compute x + y - q. If the result is negative, then the | |||||
* high bit will be set, and 'd >> 31' will be equal to 1; | |||||
* thus '-(d >> 31)' will be an all-one pattern. Otherwise, | |||||
* it will be an all-zero pattern. In other words, this | |||||
* implements a conditional addition of q. | |||||
*/ | |||||
uint32_t d; | |||||
d = x + y - Q; | |||||
d += Q & -(d >> 31); | |||||
return d; | |||||
} | |||||
/* | |||||
* Subtraction modulo q. Operands must be in the 0..q-1 range. | |||||
*/ | |||||
static inline uint32_t | |||||
mq_sub(uint32_t x, uint32_t y) { | |||||
/* | |||||
* As in mq_add(), we use a conditional addition to ensure the | |||||
* result is in the 0..q-1 range. | |||||
*/ | |||||
uint32_t d; | |||||
d = x - y; | |||||
d += Q & -(d >> 31); | |||||
return d; | |||||
} | |||||
/* | |||||
* Division by 2 modulo q. Operand must be in the 0..q-1 range. | |||||
*/ | |||||
static inline uint32_t | |||||
mq_rshift1(uint32_t x) { | |||||
x += Q & -(x & 1); | |||||
return (x >> 1); | |||||
} | |||||
/* | |||||
* Montgomery multiplication modulo q. If we set R = 2^16 mod q, then | |||||
* this function computes: x * y / R mod q | |||||
* Operands must be in the 0..q-1 range. | |||||
*/ | |||||
static inline uint32_t | |||||
mq_montymul(uint32_t x, uint32_t y) { | |||||
uint32_t z, w; | |||||
/* | |||||
* We compute x*y + k*q with a value of k chosen so that the 16 | |||||
* low bits of the result are 0. We can then shift the value. | |||||
* After the shift, result may still be larger than q, but it | |||||
* will be lower than 2*q, so a conditional subtraction works. | |||||
*/ | |||||
z = x * y; | |||||
w = ((z * Q0I) & 0xFFFF) * Q; | |||||
/* | |||||
* When adding z and w, the result will have its low 16 bits | |||||
* equal to 0. Since x, y and z are lower than q, the sum will | |||||
* be no more than (2^15 - 1) * q + (q - 1)^2, which will | |||||
* fit on 29 bits. | |||||
*/ | |||||
z = (z + w) >> 16; | |||||
/* | |||||
* After the shift, analysis shows that the value will be less | |||||
* than 2q. We do a subtraction then conditional subtraction to | |||||
* ensure the result is in the expected range. | |||||
*/ | |||||
z -= Q; | |||||
z += Q & -(z >> 31); | |||||
return z; | |||||
} | |||||
/* | |||||
* Montgomery squaring (computes (x^2)/R). | |||||
*/ | |||||
static inline uint32_t | |||||
mq_montysqr(uint32_t x) { | |||||
return mq_montymul(x, x); | |||||
} | |||||
/* | |||||
* Divide x by y modulo q = 12289. | |||||
*/ | |||||
static inline uint32_t | |||||
mq_div_12289(uint32_t x, uint32_t y) { | |||||
/* | |||||
* We invert y by computing y^(q-2) mod q. | |||||
* | |||||
* We use the following addition chain for exponent e = 12287: | |||||
* | |||||
* e0 = 1 | |||||
* e1 = 2 * e0 = 2 | |||||
* e2 = e1 + e0 = 3 | |||||
* e3 = e2 + e1 = 5 | |||||
* e4 = 2 * e3 = 10 | |||||
* e5 = 2 * e4 = 20 | |||||
* e6 = 2 * e5 = 40 | |||||
* e7 = 2 * e6 = 80 | |||||
* e8 = 2 * e7 = 160 | |||||
* e9 = e8 + e2 = 163 | |||||
* e10 = e9 + e8 = 323 | |||||
* e11 = 2 * e10 = 646 | |||||
* e12 = 2 * e11 = 1292 | |||||
* e13 = e12 + e9 = 1455 | |||||
* e14 = 2 * e13 = 2910 | |||||
* e15 = 2 * e14 = 5820 | |||||
* e16 = e15 + e10 = 6143 | |||||
* e17 = 2 * e16 = 12286 | |||||
* e18 = e17 + e0 = 12287 | |||||
* | |||||
* Additions on exponents are converted to Montgomery | |||||
* multiplications. We define all intermediate results as so | |||||
* many local variables, and let the C compiler work out which | |||||
* must be kept around. | |||||
*/ | |||||
uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9; | |||||
uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18; | |||||
y0 = mq_montymul(y, R2); | |||||
y1 = mq_montysqr(y0); | |||||
y2 = mq_montymul(y1, y0); | |||||
y3 = mq_montymul(y2, y1); | |||||
y4 = mq_montysqr(y3); | |||||
y5 = mq_montysqr(y4); | |||||
y6 = mq_montysqr(y5); | |||||
y7 = mq_montysqr(y6); | |||||
y8 = mq_montysqr(y7); | |||||
y9 = mq_montymul(y8, y2); | |||||
y10 = mq_montymul(y9, y8); | |||||
y11 = mq_montysqr(y10); | |||||
y12 = mq_montysqr(y11); | |||||
y13 = mq_montymul(y12, y9); | |||||
y14 = mq_montysqr(y13); | |||||
y15 = mq_montysqr(y14); | |||||
y16 = mq_montymul(y15, y10); | |||||
y17 = mq_montysqr(y16); | |||||
y18 = mq_montymul(y17, y0); | |||||
/* | |||||
* Final multiplication with x, which is not in Montgomery | |||||
* representation, computes the correct division result. | |||||
*/ | |||||
return mq_montymul(y18, x); | |||||
} | |||||
/* | |||||
* Compute NTT on a ring element. | |||||
*/ | |||||
static void | |||||
mq_NTT(uint16_t *a, unsigned logn) { | |||||
size_t n, t, m; | |||||
n = (size_t)1 << logn; | |||||
t = n; | |||||
for (m = 1; m < n; m <<= 1) { | |||||
size_t ht, i, j1; | |||||
ht = t >> 1; | |||||
for (i = 0, j1 = 0; i < m; i ++, j1 += t) { | |||||
size_t j, j2; | |||||
uint32_t s; | |||||
s = GMb[m + i]; | |||||
j2 = j1 + ht; | |||||
for (j = j1; j < j2; j ++) { | |||||
uint32_t u, v; | |||||
u = a[j]; | |||||
v = mq_montymul(a[j + ht], s); | |||||
a[j] = (uint16_t)mq_add(u, v); | |||||
a[j + ht] = (uint16_t)mq_sub(u, v); | |||||
} | |||||
} | |||||
t = ht; | |||||
} | |||||
} | |||||
/* | |||||
* Compute the inverse NTT on a ring element, binary case. | |||||
*/ | |||||
static void | |||||
mq_iNTT(uint16_t *a, unsigned logn) { | |||||
size_t n, t, m; | |||||
uint32_t ni; | |||||
n = (size_t)1 << logn; | |||||
t = 1; | |||||
m = n; | |||||
while (m > 1) { | |||||
size_t hm, dt, i, j1; | |||||
hm = m >> 1; | |||||
dt = t << 1; | |||||
for (i = 0, j1 = 0; i < hm; i ++, j1 += dt) { | |||||
size_t j, j2; | |||||
uint32_t s; | |||||
j2 = j1 + t; | |||||
s = iGMb[hm + i]; | |||||
for (j = j1; j < j2; j ++) { | |||||
uint32_t u, v, w; | |||||
u = a[j]; | |||||
v = a[j + t]; | |||||
a[j] = (uint16_t)mq_add(u, v); | |||||
w = mq_sub(u, v); | |||||
a[j + t] = (uint16_t) | |||||
mq_montymul(w, s); | |||||
} | |||||
} | |||||
t = dt; | |||||
m = hm; | |||||
} | |||||
/* | |||||
* To complete the inverse NTT, we must now divide all values by | |||||
* n (the vector size). We thus need the inverse of n, i.e. we | |||||
* need to divide 1 by 2 logn times. But we also want it in | |||||
* Montgomery representation, i.e. we also want to multiply it | |||||
* by R = 2^16. In the common case, this should be a simple right | |||||
* shift. The loop below is generic and works also in corner cases; | |||||
* its computation time is negligible. | |||||
*/ | |||||
ni = R; | |||||
for (m = n; m > 1; m >>= 1) { | |||||
ni = mq_rshift1(ni); | |||||
} | |||||
for (m = 0; m < n; m ++) { | |||||
a[m] = (uint16_t)mq_montymul(a[m], ni); | |||||
} | |||||
} | |||||
/* | |||||
* Convert a polynomial (mod q) to Montgomery representation. | |||||
*/ | |||||
static void | |||||
mq_poly_tomonty(uint16_t *f, unsigned logn) { | |||||
size_t u, n; | |||||
n = (size_t)1 << logn; | |||||
for (u = 0; u < n; u ++) { | |||||
f[u] = (uint16_t)mq_montymul(f[u], R2); | |||||
} | |||||
} | |||||
/* | |||||
* Multiply two polynomials together (NTT representation, and using | |||||
* a Montgomery multiplication). Result f*g is written over f. | |||||
*/ | |||||
static void | |||||
mq_poly_montymul_ntt(uint16_t *f, const uint16_t *g, unsigned logn) { | |||||
size_t u, n; | |||||
n = (size_t)1 << logn; | |||||
for (u = 0; u < n; u ++) { | |||||
f[u] = (uint16_t)mq_montymul(f[u], g[u]); | |||||
} | |||||
} | |||||
/* | |||||
* Subtract polynomial g from polynomial f. | |||||
*/ | |||||
static void | |||||
mq_poly_sub(uint16_t *f, const uint16_t *g, unsigned logn) { | |||||
size_t u, n; | |||||
n = (size_t)1 << logn; | |||||
for (u = 0; u < n; u ++) { | |||||
f[u] = (uint16_t)mq_sub(f[u], g[u]); | |||||
} | |||||
} | |||||
/* ===================================================================== */ | |||||
/* see inner.h */ | |||||
void | |||||
PQCLEAN_FALCON512_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn) { | |||||
mq_NTT(h, logn); | |||||
mq_poly_tomonty(h, logn); | |||||
} | |||||
/* see inner.h */ | |||||
int | |||||
PQCLEAN_FALCON512_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2, | |||||
const uint16_t *h, unsigned logn, uint8_t *tmp) { | |||||
size_t u, n; | |||||
uint16_t *tt; | |||||
n = (size_t)1 << logn; | |||||
tt = (uint16_t *)tmp; | |||||
/* | |||||
* Reduce s2 elements modulo q ([0..q-1] range). | |||||
*/ | |||||
for (u = 0; u < n; u ++) { | |||||
uint32_t w; | |||||
w = (uint32_t)s2[u]; | |||||
w += Q & -(w >> 31); | |||||
tt[u] = (uint16_t)w; | |||||
} | |||||
/* | |||||
* Compute s1 = s2*h - c0 mod phi mod q (in tt[]). | |||||
*/ | |||||
mq_NTT(tt, logn); | |||||
mq_poly_montymul_ntt(tt, h, logn); | |||||
mq_iNTT(tt, logn); | |||||
mq_poly_sub(tt, c0, logn); | |||||
/* | |||||
* Normalize s1 elements into the [-q/2..q/2] range. | |||||
*/ | |||||
for (u = 0; u < n; u ++) { | |||||
int32_t w; | |||||
w = (int32_t)tt[u]; | |||||
w -= (int32_t)(Q & -(((Q >> 1) - (uint32_t)w) >> 31)); | |||||
((int16_t *)tt)[u] = (int16_t)w; | |||||
} | |||||
/* | |||||
* Signature is valid if and only if the aggregate (s1,s2) vector | |||||
* is short enough. | |||||
*/ | |||||
return PQCLEAN_FALCON512_CLEAN_is_short((int16_t *)tt, s2, logn); | |||||
} | |||||
/* see inner.h */ | |||||
int | |||||
PQCLEAN_FALCON512_CLEAN_compute_public(uint16_t *h, | |||||
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp) { | |||||
size_t u, n; | |||||
uint16_t *tt; | |||||
n = (size_t)1 << logn; | |||||
tt = (uint16_t *)tmp; | |||||
for (u = 0; u < n; u ++) { | |||||
tt[u] = (uint16_t)mq_conv_small(f[u]); | |||||
h[u] = (uint16_t)mq_conv_small(g[u]); | |||||
} | |||||
mq_NTT(h, logn); | |||||
mq_NTT(tt, logn); | |||||
for (u = 0; u < n; u ++) { | |||||
if (tt[u] == 0) { | |||||
return 0; | |||||
} | |||||
h[u] = (uint16_t)mq_div_12289(h[u], tt[u]); | |||||
} | |||||
mq_iNTT(h, logn); | |||||
return 1; | |||||
} | |||||
/* see internal.h */ | |||||
int | |||||
PQCLEAN_FALCON512_CLEAN_complete_private(int8_t *G, | |||||
const int8_t *f, const int8_t *g, const int8_t *F, | |||||
unsigned logn, uint8_t *tmp) { | |||||
size_t u, n; | |||||
uint16_t *t1, *t2; | |||||
n = (size_t)1 << logn; | |||||
t1 = (uint16_t *)tmp; | |||||
t2 = t1 + n; | |||||
for (u = 0; u < n; u ++) { | |||||
t1[u] = (uint16_t)mq_conv_small(g[u]); | |||||
t2[u] = (uint16_t)mq_conv_small(F[u]); | |||||
} | |||||
mq_NTT(t1, logn); | |||||
mq_NTT(t2, logn); | |||||
mq_poly_tomonty(t1, logn); | |||||
mq_poly_montymul_ntt(t1, t2, logn); | |||||
for (u = 0; u < n; u ++) { | |||||
t2[u] = (uint16_t)mq_conv_small(f[u]); | |||||
} | |||||
mq_NTT(t2, logn); | |||||
for (u = 0; u < n; u ++) { | |||||
if (t2[u] == 0) { | |||||
return 0; | |||||
} | |||||
t1[u] = (uint16_t)mq_div_12289(t1[u], t2[u]); | |||||
} | |||||
mq_iNTT(t1, logn); | |||||
for (u = 0; u < n; u ++) { | |||||
uint32_t w; | |||||
int32_t gi; | |||||
w = t1[u]; | |||||
w -= (Q & ~ -((w - (Q >> 1)) >> 31)); | |||||
gi = *(int32_t *)&w; | |||||
if (gi < -127 || gi > +127) { | |||||
return 0; | |||||
} | |||||
G[u] = (int8_t)gi; | |||||
} | |||||
return 1; | |||||
} |