Falcon implementations (integer-only code, constant-time).master
@@ -0,0 +1,23 @@ | |||
name: Falcon-1024 | |||
type: signature | |||
claimed-nist-level: 5 | |||
length-public-key: 1793 | |||
length-secret-key: 2305 | |||
length-signature: 1330 | |||
nistkat-sha256: ad3d17869fdc05deae13ffa2ef26bde125b42f61b2dcd861a1ae20adcb2accc5 | |||
testvectors-sha256: bd8076c13722d8c555c68fc6bd7763e1a9dd5483ee7c8d1c74dd2df459c72a40 | |||
principal-submitters: | |||
- Thomas Prest | |||
auxiliary-submitters: | |||
- Pierre-Alain Fouque | |||
- Jeffrey Hoffstein | |||
- Paul Kirchner | |||
- Vadim Lyubashevsky | |||
- Thomas Pornin | |||
- Thomas Ricosset | |||
- Gregor Seiler | |||
- William Whyte | |||
- Zhenfei Zhang | |||
implementations: | |||
- name: clean | |||
version: round two |
@@ -0,0 +1,22 @@ | |||
MIT License | |||
Copyright (c) 2017-2019 Falcon Project | |||
Permission is hereby granted, free of charge, to any person obtaining | |||
a copy of this software and associated documentation files (the | |||
"Software"), to deal in the Software without restriction, including | |||
without limitation the rights to use, copy, modify, merge, publish, | |||
distribute, sublicense, and/or sell copies of the Software, and to | |||
permit persons to whom the Software is furnished to do so, subject to | |||
the following conditions: | |||
The above copyright notice and this permission notice shall be | |||
included in all copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
@@ -0,0 +1,21 @@ | |||
# This Makefile can be used with GNU Make or BSD Make | |||
LIB=libfalcon-1024_clean.a | |||
SOURCES = codec.c common.c fft.c fpr.c keygen.c pqclean.c rng.c sign.c vrfy.c | |||
OBJECTS = codec.o common.o fft.o fpr.o keygen.o pqclean.o rng.o sign.o vrfy.o | |||
HEADERS = api.h fpr.h inner.h | |||
CFLAGS=-O3 -Wall -Wconversion -Wextra -Wpedantic -Wvla -Werror -Wmissing-prototypes -Wredundant-decls -std=c99 -I../../../common $(EXTRAFLAGS) | |||
all: $(LIB) | |||
%.o: %.c $(HEADERS) | |||
$(CC) $(CFLAGS) -c -o $@ $< | |||
$(LIB): $(OBJECTS) | |||
$(AR) -r $@ $(OBJECTS) | |||
clean: | |||
$(RM) $(OBJECTS) | |||
$(RM) $(LIB) |
@@ -0,0 +1,23 @@ | |||
# This Makefile can be used with Microsoft Visual Studio's nmake using the command: | |||
# nmake /f Makefile.Microsoft_nmake | |||
LIBRARY=libfalcon-1024_clean.lib | |||
OBJECTS=codec.obj common.obj fft.obj fpr.obj keygen.obj pqclean.obj rng.obj sign.obj vrfy.obj | |||
# Warning C4146 is raised when a unary minus operator is applied to an | |||
# unsigned type; this has nonetheless been standard and portable for as | |||
# long as there has been a C standard, and we do that a lot, especially | |||
# for constant-time computations. Thus, we disable that spurious warning. | |||
CFLAGS=/nologo /I ..\..\..\common /W4 /wd4146 /WX | |||
all: $(LIBRARY) | |||
# Make sure objects are recompiled if headers change. | |||
$(OBJECTS): *.h | |||
$(LIBRARY): $(OBJECTS) | |||
LIB.EXE /NOLOGO /WX /OUT:$@ $** | |||
clean: | |||
-DEL $(OBJECTS) | |||
-DEL $(LIBRARY) |
@@ -0,0 +1,80 @@ | |||
#ifndef PQCLEAN_FALCON1024_CLEAN_API_H | |||
#define PQCLEAN_FALCON1024_CLEAN_API_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES 2305 | |||
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES 1793 | |||
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES 1330 | |||
#define PQCLEAN_FALCON1024_CLEAN_CRYPTO_ALGNAME "Falcon-1024" | |||
/* | |||
* Generate a new key pair. Public key goes into pk[], private key in sk[]. | |||
* Key sizes are exact (in bytes): | |||
* public (pk): PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES | |||
* private (sk): PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_keypair( | |||
uint8_t *pk, uint8_t *sk); | |||
/* | |||
* Compute a signature on a provided message (m, mlen), with a given | |||
* private key (sk). Signature is written in sig[], with length written | |||
* into *siglen. Signature length is variable; maximum signature length | |||
* (in bytes) is PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES. | |||
* | |||
* sig[], m[] and sk[] may overlap each other arbitrarily. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
/* | |||
* Verify a signature (sig, siglen) on a message (m, mlen) with a given | |||
* public key (pk). | |||
* | |||
* sig[], m[] and pk[] may overlap each other arbitrarily. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk); | |||
/* | |||
* Compute a signature on a message and pack the signature and message | |||
* into a single object, written into sm[]. The length of that output is | |||
* written in *smlen; that length may be larger than the message length | |||
* (mlen) by up to PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES. | |||
* | |||
* sm[] and m[] may overlap each other arbitrarily; however, sm[] shall | |||
* not overlap with sk[]. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
/* | |||
* Open a signed message object (sm, smlen) and verify the signature; | |||
* on success, the message itself is written into m[] and its length | |||
* into *mlen. The message is shorter than the signed message object, | |||
* but the size difference depends on the signature value; the difference | |||
* may range up to PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES. | |||
* | |||
* m[], sm[] and pk[] may overlap each other arbitrarily. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, const uint8_t *pk); | |||
#endif |
@@ -0,0 +1,551 @@ | |||
/* | |||
* Encoding/decoding of keys and signatures. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
#include "inner.h" | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_CLEAN_modq_encode( | |||
void *out, size_t max_out_len, | |||
const uint16_t *x, unsigned logn) { | |||
size_t n, out_len, u; | |||
uint8_t *buf; | |||
uint32_t acc; | |||
int acc_len; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] >= 12289) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * 14) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << 14) | x[u]; | |||
acc_len += 14; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_CLEAN_modq_decode( | |||
uint16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len) { | |||
size_t n, in_len, u; | |||
const uint8_t *buf; | |||
uint32_t acc; | |||
int acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * 14) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
acc = 0; | |||
acc_len = 0; | |||
u = 0; | |||
while (u < n) { | |||
acc = (acc << 8) | (*buf ++); | |||
acc_len += 8; | |||
if (acc_len >= 14) { | |||
unsigned w; | |||
acc_len -= 14; | |||
w = (acc >> acc_len) & 0x3FFF; | |||
if (w >= 12289) { | |||
return 0; | |||
} | |||
x[u ++] = (uint16_t)w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_CLEAN_trim_i16_encode( | |||
void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn, unsigned bits) { | |||
size_t n, u, out_len; | |||
int minv, maxv; | |||
uint8_t *buf; | |||
uint32_t acc, mask; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
maxv = (1 << (bits - 1)) - 1; | |||
minv = -maxv; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < minv || x[u] > maxv) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * bits) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
mask = ((uint32_t)1 << bits) - 1; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << bits) | ((uint16_t)x[u] & mask); | |||
acc_len += bits; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf ++ = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_CLEAN_trim_i16_decode( | |||
int16_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len) { | |||
size_t n, in_len; | |||
const uint8_t *buf; | |||
size_t u; | |||
uint32_t acc, mask1, mask2; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * bits) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
u = 0; | |||
acc = 0; | |||
acc_len = 0; | |||
mask1 = ((uint32_t)1 << bits) - 1; | |||
mask2 = (uint32_t)1 << (bits - 1); | |||
while (u < n) { | |||
acc = (acc << 8) | *buf ++; | |||
acc_len += 8; | |||
while (acc_len >= bits && u < n) { | |||
uint32_t w; | |||
acc_len -= bits; | |||
w = (acc >> acc_len) & mask1; | |||
w |= -(w & mask2); | |||
if (w == -mask2) { | |||
/* | |||
* The -2^(bits-1) value is forbidden. | |||
*/ | |||
return 0; | |||
} | |||
w |= -(w & mask2); | |||
x[u ++] = (int16_t) * (int32_t *)&w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
/* | |||
* Extra bits in the last byte must be zero. | |||
*/ | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_CLEAN_trim_i8_encode( | |||
void *out, size_t max_out_len, | |||
const int8_t *x, unsigned logn, unsigned bits) { | |||
size_t n, u, out_len; | |||
int minv, maxv; | |||
uint8_t *buf; | |||
uint32_t acc, mask; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
maxv = (1 << (bits - 1)) - 1; | |||
minv = -maxv; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < minv || x[u] > maxv) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * bits) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
mask = ((uint32_t)1 << bits) - 1; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << bits) | ((uint8_t)x[u] & mask); | |||
acc_len += bits; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf ++ = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_CLEAN_trim_i8_decode( | |||
int8_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len) { | |||
size_t n, in_len; | |||
const uint8_t *buf; | |||
size_t u; | |||
uint32_t acc, mask1, mask2; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * bits) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
u = 0; | |||
acc = 0; | |||
acc_len = 0; | |||
mask1 = ((uint32_t)1 << bits) - 1; | |||
mask2 = (uint32_t)1 << (bits - 1); | |||
while (u < n) { | |||
acc = (acc << 8) | *buf ++; | |||
acc_len += 8; | |||
while (acc_len >= bits && u < n) { | |||
uint32_t w; | |||
acc_len -= bits; | |||
w = (acc >> acc_len) & mask1; | |||
w |= -(w & mask2); | |||
if (w == -mask2) { | |||
/* | |||
* The -2^(bits-1) value is forbidden. | |||
*/ | |||
return 0; | |||
} | |||
x[u ++] = (int8_t) * (int32_t *)&w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
/* | |||
* Extra bits in the last byte must be zero. | |||
*/ | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_CLEAN_comp_encode( | |||
void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn) { | |||
uint8_t *buf; | |||
size_t n, u, v; | |||
uint32_t acc; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
buf = out; | |||
/* | |||
* Make sure that all values are within the -2047..+2047 range. | |||
*/ | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < -2047 || x[u] > +2047) { | |||
return 0; | |||
} | |||
} | |||
acc = 0; | |||
acc_len = 0; | |||
v = 0; | |||
for (u = 0; u < n; u ++) { | |||
int t; | |||
unsigned w; | |||
/* | |||
* Get sign and absolute value of next integer; push the | |||
* sign bit. | |||
*/ | |||
acc <<= 1; | |||
t = x[u]; | |||
if (t < 0) { | |||
t = -t; | |||
acc |= 1; | |||
} | |||
w = (unsigned)t; | |||
/* | |||
* Push the low 7 bits of the absolute value. | |||
*/ | |||
acc <<= 7; | |||
acc |= w & 127u; | |||
w >>= 7; | |||
/* | |||
* We pushed exactly 8 bits. | |||
*/ | |||
acc_len += 8; | |||
/* | |||
* Push as many zeros as necessary, then a one. Since the | |||
* absolute value is at most 2047, w can only range up to | |||
* 15 at this point, thus we will add at most 16 bits | |||
* here. With the 8 bits above and possibly up to 7 bits | |||
* from previous iterations, we may go up to 31 bits, which | |||
* will fit in the accumulator, which is an uint32_t. | |||
*/ | |||
acc <<= (w + 1); | |||
acc |= 1; | |||
acc_len += w + 1; | |||
/* | |||
* Produce all full bytes. | |||
*/ | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
if (buf != NULL) { | |||
if (v >= max_out_len) { | |||
return 0; | |||
} | |||
buf[v] = (uint8_t)(acc >> acc_len); | |||
} | |||
v ++; | |||
} | |||
} | |||
/* | |||
* Flush remaining bits (if any). | |||
*/ | |||
if (acc_len > 0) { | |||
if (buf != NULL) { | |||
if (v >= max_out_len) { | |||
return 0; | |||
} | |||
buf[v] = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
v ++; | |||
} | |||
return v; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON1024_CLEAN_comp_decode( | |||
int16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len) { | |||
const uint8_t *buf; | |||
size_t n, u, v; | |||
uint32_t acc; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
buf = in; | |||
acc = 0; | |||
acc_len = 0; | |||
v = 0; | |||
for (u = 0; u < n; u ++) { | |||
unsigned b, s, m; | |||
/* | |||
* Get next eight bits: sign and low seven bits of the | |||
* absolute value. | |||
*/ | |||
if (v >= max_in_len) { | |||
return 0; | |||
} | |||
acc = (acc << 8) | (uint32_t)buf[v ++]; | |||
b = acc >> acc_len; | |||
s = b & 128; | |||
m = b & 127; | |||
/* | |||
* Get next bits until a 1 is reached. | |||
*/ | |||
for (;;) { | |||
if (acc_len == 0) { | |||
if (v >= max_in_len) { | |||
return 0; | |||
} | |||
acc = (acc << 8) | (uint32_t)buf[v ++]; | |||
acc_len = 8; | |||
} | |||
acc_len --; | |||
if (((acc >> acc_len) & 1) != 0) { | |||
break; | |||
} | |||
m += 128; | |||
if (m > 2047) { | |||
return 0; | |||
} | |||
} | |||
x[u] = (int16_t)(s ? -(int)m : (int)m); | |||
} | |||
return v; | |||
} | |||
/* | |||
* Key elements and signatures are polynomials with small integer | |||
* coefficients. Here are some statistics gathered over many | |||
* generated key pairs (10000 or more for each degree): | |||
* | |||
* log(n) n max(f,g) std(f,g) max(F,G) std(F,G) | |||
* 1 2 129 56.31 143 60.02 | |||
* 2 4 123 40.93 160 46.52 | |||
* 3 8 97 28.97 159 38.01 | |||
* 4 16 100 21.48 154 32.50 | |||
* 5 32 71 15.41 151 29.36 | |||
* 6 64 59 11.07 138 27.77 | |||
* 7 128 39 7.91 144 27.00 | |||
* 8 256 32 5.63 148 26.61 | |||
* 9 512 22 4.00 137 26.46 | |||
* 10 1024 15 2.84 146 26.41 | |||
* | |||
* We want a compact storage format for private key, and, as part of | |||
* key generation, we are allowed to reject some keys which would | |||
* otherwise be fine (this does not induce any noticeable vulnerability | |||
* as long as we reject only a small proportion of possible keys). | |||
* Hence, we enforce at key generation time maximum values for the | |||
* elements of f, g, F and G, so that their encoding can be expressed | |||
* in fixed-width values. Limits have been chosen so that generated | |||
* keys are almost always within bounds, thus not impacting neither | |||
* security or performance. | |||
* | |||
* IMPORTANT: the code assumes that all coefficients of f, g, F and G | |||
* ultimately fit in the -127..+127 range. Thus, none of the elements | |||
* of max_fg_bits[] and max_FG_bits[] shall be greater than 8. | |||
*/ | |||
const uint8_t PQCLEAN_FALCON1024_CLEAN_max_fg_bits[] = { | |||
0, /* unused */ | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
7, | |||
7, | |||
6, | |||
6, | |||
5 | |||
}; | |||
const uint8_t PQCLEAN_FALCON1024_CLEAN_max_FG_bits[] = { | |||
0, /* unused */ | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8 | |||
}; | |||
/* | |||
* When generating a new key pair, we can always reject keys which | |||
* feature an abnormally large coefficient. This can also be done for | |||
* signatures, albeit with some care: in case the signature process is | |||
* used in a derandomized setup (explicitly seeded with the message and | |||
* private key), we have to follow the specification faithfully, and the | |||
* specification only enforces a limit on the L2 norm of the signature | |||
* vector. The limit on the L2 norm implies that the absolute value of | |||
* a coefficient of the signature cannot be more than the following: | |||
* | |||
* log(n) n max sig coeff (theoretical) | |||
* 1 2 412 | |||
* 2 4 583 | |||
* 3 8 824 | |||
* 4 16 1166 | |||
* 5 32 1649 | |||
* 6 64 2332 | |||
* 7 128 3299 | |||
* 8 256 4665 | |||
* 9 512 6598 | |||
* 10 1024 9331 | |||
* | |||
* However, the largest observed signature coefficients during our | |||
* experiments was 1077 (in absolute value), hence we can assume that, | |||
* with overwhelming probability, signature coefficients will fit | |||
* in -2047..2047, i.e. 12 bits. | |||
*/ | |||
const uint8_t PQCLEAN_FALCON1024_CLEAN_max_sig_bits[] = { | |||
0, /* unused */ | |||
10, | |||
11, | |||
11, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12 | |||
}; |
@@ -0,0 +1,261 @@ | |||
/* | |||
* Support functions for signatures (hash-to-point, norm). | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
#include "inner.h" | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_hash_to_point( | |||
shake256_context *sc, | |||
uint16_t *x, unsigned logn, uint8_t *tmp) { | |||
/* | |||
* Each 16-bit sample is a value in 0..65535. The value is | |||
* kept if it falls in 0..61444 (because 61445 = 5*12289) | |||
* and rejected otherwise; thus, each sample has probability | |||
* about 0.93758 of being selected. | |||
* | |||
* We want to oversample enough to be sure that we will | |||
* have enough values with probability at least 1 - 2^(-256). | |||
* Depending on degree N, this leads to the following | |||
* required oversampling: | |||
* | |||
* logn n oversampling | |||
* 1 2 65 | |||
* 2 4 67 | |||
* 3 8 71 | |||
* 4 16 77 | |||
* 5 32 86 | |||
* 6 64 100 | |||
* 7 128 122 | |||
* 8 256 154 | |||
* 9 512 205 | |||
* 10 1024 287 | |||
* | |||
* If logn >= 7, then the provided temporary buffer is large | |||
* enough. Otherwise, we use a stack buffer of 63 entries | |||
* (i.e. 126 bytes) for the values that do not fit in tmp[]. | |||
*/ | |||
static const uint16_t overtab[] = { | |||
0, /* unused */ | |||
65, | |||
67, | |||
71, | |||
77, | |||
86, | |||
100, | |||
122, | |||
154, | |||
205, | |||
287 | |||
}; | |||
unsigned n, n2, u, m, p, over; | |||
uint16_t *tt1, tt2[63]; | |||
/* | |||
* We first generate m 16-bit value. Values 0..n-1 go to x[]. | |||
* Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[]. | |||
* We also reduce modulo q the values; rejected values are set | |||
* to 0xFFFF. | |||
*/ | |||
n = 1U << logn; | |||
n2 = n << 1; | |||
over = overtab[logn]; | |||
m = n + over; | |||
tt1 = (uint16_t *)tmp; | |||
for (u = 0; u < m; u ++) { | |||
uint8_t buf[2]; | |||
uint32_t w, wr; | |||
shake256_extract(sc, buf, sizeof buf); | |||
w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1]; | |||
wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1)); | |||
wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1)); | |||
wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1)); | |||
wr |= ((w - 61445) >> 31) - 1; | |||
if (u < n) { | |||
x[u] = (uint16_t)wr; | |||
} else if (u < n2) { | |||
tt1[u - n] = (uint16_t)wr; | |||
} else { | |||
tt2[u - n2] = (uint16_t)wr; | |||
} | |||
} | |||
/* | |||
* Now we must "squeeze out" the invalid values. We do this in | |||
* a logarithmic sequence of passes; each pass computes where a | |||
* value should go, and moves it down by 'p' slots if necessary, | |||
* where 'p' uses an increasing powers-of-two scale. It can be | |||
* shown that in all cases where the loop decides that a value | |||
* has to be moved down by p slots, the destination slot is | |||
* "free" (i.e. contains an invalid value). | |||
*/ | |||
for (p = 1; p <= over; p <<= 1) { | |||
unsigned v; | |||
/* | |||
* In the loop below: | |||
* | |||
* - v contains the index of the final destination of | |||
* the value; it is recomputed dynamically based on | |||
* whether values are valid or not. | |||
* | |||
* - u is the index of the value we consider ("source"); | |||
* its address is s. | |||
* | |||
* - The loop may swap the value with the one at index | |||
* u-p. The address of the swap destination is d. | |||
*/ | |||
v = 0; | |||
for (u = 0; u < m; u ++) { | |||
uint16_t *s, *d; | |||
unsigned j, sv, dv, mk; | |||
if (u < n) { | |||
s = &x[u]; | |||
} else if (u < n2) { | |||
s = &tt1[u - n]; | |||
} else { | |||
s = &tt2[u - n2]; | |||
} | |||
sv = *s; | |||
/* | |||
* The value in sv should ultimately go to | |||
* address v, i.e. jump back by u-v slots. | |||
*/ | |||
j = u - v; | |||
/* | |||
* We increment v for the next iteration, but | |||
* only if the source value is valid. The mask | |||
* 'mk' is -1 if the value is valid, 0 otherwise, | |||
* so we _subtract_ mk. | |||
*/ | |||
mk = (sv >> 15) - 1U; | |||
v -= mk; | |||
/* | |||
* In this loop we consider jumps by p slots; if | |||
* u < p then there is nothing more to do. | |||
*/ | |||
if (u < p) { | |||
continue; | |||
} | |||
/* | |||
* Destination for the swap: value at address u-p. | |||
*/ | |||
if ((u - p) < n) { | |||
d = &x[u - p]; | |||
} else if ((u - p) < n2) { | |||
d = &tt1[(u - p) - n]; | |||
} else { | |||
d = &tt2[(u - p) - n2]; | |||
} | |||
dv = *d; | |||
/* | |||
* The swap should be performed only if the source | |||
* is valid AND the jump j has its 'p' bit set. | |||
*/ | |||
mk &= -(((j & p) + 0x1FF) >> 9); | |||
*s = (uint16_t)(sv ^ (mk & (sv ^ dv))); | |||
*d = (uint16_t)(dv ^ (mk & (sv ^ dv))); | |||
} | |||
} | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_is_short( | |||
const int16_t *s1, const int16_t *s2, unsigned logn) { | |||
/* | |||
* We use the l2-norm. Code below uses only 32-bit operations to | |||
* compute the square of the norm with saturation to 2^32-1 if | |||
* the value exceeds 2^31-1. | |||
*/ | |||
size_t n, u; | |||
uint32_t s, ng; | |||
n = (size_t)1 << logn; | |||
s = 0; | |||
ng = 0; | |||
for (u = 0; u < n; u ++) { | |||
int32_t z; | |||
z = s1[u]; | |||
s += (uint32_t)(z * z); | |||
ng |= s; | |||
z = s2[u]; | |||
s += (uint32_t)(z * z); | |||
ng |= s; | |||
} | |||
s |= -(ng >> 31); | |||
/* | |||
* Acceptance bound on the l2-norm is: | |||
* 1.2*1.55*sqrt(q)*sqrt(2*N) | |||
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). | |||
*/ | |||
return s < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_is_short_half( | |||
uint32_t sqn, const int16_t *s2, unsigned logn) { | |||
size_t n, u; | |||
uint32_t ng; | |||
n = (size_t)1 << logn; | |||
ng = -(sqn >> 31); | |||
for (u = 0; u < n; u ++) { | |||
int32_t z; | |||
z = s2[u]; | |||
sqn += (uint32_t)(z * z); | |||
ng |= sqn; | |||
} | |||
sqn |= -(ng >> 31); | |||
/* | |||
* Acceptance bound on the l2-norm is: | |||
* 1.2*1.55*sqrt(q)*sqrt(2*N) | |||
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). | |||
*/ | |||
return sqn < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); | |||
} |
@@ -0,0 +1,699 @@ | |||
/* | |||
* FFT code. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
#include "inner.h" | |||
/* | |||
* Rules for complex number macros: | |||
* -------------------------------- | |||
* | |||
* Operand order is: destination, source1, source2... | |||
* | |||
* Each operand is a real and an imaginary part. | |||
* | |||
* All overlaps are allowed. | |||
*/ | |||
/* | |||
* Addition of two complex numbers (d = a + b). | |||
*/ | |||
#define FPC_ADD(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||
fpr fpct_re, fpct_im; \ | |||
fpct_re = fpr_add(a_re, b_re); \ | |||
fpct_im = fpr_add(a_im, b_im); \ | |||
(d_re) = fpct_re; \ | |||
(d_im) = fpct_im; \ | |||
} while (0) | |||
/* | |||
* Subtraction of two complex numbers (d = a - b). | |||
*/ | |||
#define FPC_SUB(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||
fpr fpct_re, fpct_im; \ | |||
fpct_re = fpr_sub(a_re, b_re); \ | |||
fpct_im = fpr_sub(a_im, b_im); \ | |||
(d_re) = fpct_re; \ | |||
(d_im) = fpct_im; \ | |||
} while (0) | |||
/* | |||
* Multplication of two complex numbers (d = a * b). | |||
*/ | |||
#define FPC_MUL(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||
fpr fpct_a_re, fpct_a_im; \ | |||
fpr fpct_b_re, fpct_b_im; \ | |||
fpr fpct_d_re, fpct_d_im; \ | |||
fpct_a_re = (a_re); \ | |||
fpct_a_im = (a_im); \ | |||
fpct_b_re = (b_re); \ | |||
fpct_b_im = (b_im); \ | |||
fpct_d_re = fpr_sub( \ | |||
fpr_mul(fpct_a_re, fpct_b_re), \ | |||
fpr_mul(fpct_a_im, fpct_b_im)); \ | |||
fpct_d_im = fpr_add( \ | |||
fpr_mul(fpct_a_re, fpct_b_im), \ | |||
fpr_mul(fpct_a_im, fpct_b_re)); \ | |||
(d_re) = fpct_d_re; \ | |||
(d_im) = fpct_d_im; \ | |||
} while (0) | |||
/* | |||
* Squaring of a complex number (d = a * a). | |||
*/ | |||
#define FPC_SQR(d_re, d_im, a_re, a_im) do { \ | |||
fpr fpct_a_re, fpct_a_im; \ | |||
fpr fpct_d_re, fpct_d_im; \ | |||
fpct_a_re = (a_re); \ | |||
fpct_a_im = (a_im); \ | |||
fpct_d_re = fpr_sub(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ | |||
fpct_d_im = fpr_double(fpr_mul(fpct_a_re, fpct_a_im)); \ | |||
(d_re) = fpct_d_re; \ | |||
(d_im) = fpct_d_im; \ | |||
} while (0) | |||
/* | |||
* Inversion of a complex number (d = 1 / a). | |||
*/ | |||
#define FPC_INV(d_re, d_im, a_re, a_im) do { \ | |||
fpr fpct_a_re, fpct_a_im; \ | |||
fpr fpct_d_re, fpct_d_im; \ | |||
fpr fpct_m; \ | |||
fpct_a_re = (a_re); \ | |||
fpct_a_im = (a_im); \ | |||
fpct_m = fpr_add(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ | |||
fpct_m = fpr_inv(fpct_m); \ | |||
fpct_d_re = fpr_mul(fpct_a_re, fpct_m); \ | |||
fpct_d_im = fpr_mul(fpr_neg(fpct_a_im), fpct_m); \ | |||
(d_re) = fpct_d_re; \ | |||
(d_im) = fpct_d_im; \ | |||
} while (0) | |||
/* | |||
* Division of complex numbers (d = a / b). | |||
*/ | |||
#define FPC_DIV(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||
fpr fpct_a_re, fpct_a_im; \ | |||
fpr fpct_b_re, fpct_b_im; \ | |||
fpr fpct_d_re, fpct_d_im; \ | |||
fpr fpct_m; \ | |||
fpct_a_re = (a_re); \ | |||
fpct_a_im = (a_im); \ | |||
fpct_b_re = (b_re); \ | |||
fpct_b_im = (b_im); \ | |||
fpct_m = fpr_add(fpr_sqr(fpct_b_re), fpr_sqr(fpct_b_im)); \ | |||
fpct_m = fpr_inv(fpct_m); \ | |||
fpct_b_re = fpr_mul(fpct_b_re, fpct_m); \ | |||
fpct_b_im = fpr_mul(fpr_neg(fpct_b_im), fpct_m); \ | |||
fpct_d_re = fpr_sub( \ | |||
fpr_mul(fpct_a_re, fpct_b_re), \ | |||
fpr_mul(fpct_a_im, fpct_b_im)); \ | |||
fpct_d_im = fpr_add( \ | |||
fpr_mul(fpct_a_re, fpct_b_im), \ | |||
fpr_mul(fpct_a_im, fpct_b_re)); \ | |||
(d_re) = fpct_d_re; \ | |||
(d_im) = fpct_d_im; \ | |||
} while (0) | |||
/* | |||
* Let w = exp(i*pi/N); w is a primitive 2N-th root of 1. We define the | |||
* values w_j = w^(2j+1) for all j from 0 to N-1: these are the roots | |||
* of X^N+1 in the field of complex numbers. A crucial property is that | |||
* w_{N-1-j} = conj(w_j) = 1/w_j for all j. | |||
* | |||
* FFT representation of a polynomial f (taken modulo X^N+1) is the | |||
* set of values f(w_j). Since f is real, conj(f(w_j)) = f(conj(w_j)), | |||
* thus f(w_{N-1-j}) = conj(f(w_j)). We thus store only half the values, | |||
* for j = 0 to N/2-1; the other half can be recomputed easily when (if) | |||
* needed. A consequence is that FFT representation has the same size | |||
* as normal representation: N/2 complex numbers use N real numbers (each | |||
* complex number is the combination of a real and an imaginary part). | |||
* | |||
* We use a specific ordering which makes computations easier. Let rev() | |||
* be the bit-reversal function over log(N) bits. For j in 0..N/2-1, we | |||
* store the real and imaginary parts of f(w_j) in slots: | |||
* | |||
* Re(f(w_j)) -> slot rev(j)/2 | |||
* Im(f(w_j)) -> slot rev(j)/2+N/2 | |||
* | |||
* (Note that rev(j) is even for j < N/2.) | |||
*/ | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_FFT(fpr *f, unsigned logn) { | |||
/* | |||
* FFT algorithm in bit-reversal order uses the following | |||
* iterative algorithm: | |||
* | |||
* t = N | |||
* for m = 1; m < N; m *= 2: | |||
* ht = t/2 | |||
* for i1 = 0; i1 < m; i1 ++: | |||
* j1 = i1 * t | |||
* s = GM[m + i1] | |||
* for j = j1; j < (j1 + ht); j ++: | |||
* x = f[j] | |||
* y = s * f[j + ht] | |||
* f[j] = x + y | |||
* f[j + ht] = x - y | |||
* t = ht | |||
* | |||
* GM[k] contains w^rev(k) for primitive root w = exp(i*pi/N). | |||
* | |||
* In the description above, f[] is supposed to contain complex | |||
* numbers. In our in-memory representation, the real and | |||
* imaginary parts of f[k] are in array slots k and k+N/2. | |||
* | |||
* We only keep the first half of the complex numbers. We can | |||
* see that after the first iteration, the first and second halves | |||
* of the array of complex numbers have separate lives, so we | |||
* simply ignore the second part. | |||
*/ | |||
unsigned u; | |||
size_t t, n, hn, m; | |||
/* | |||
* First iteration: compute f[j] + i * f[j+N/2] for all j < N/2 | |||
* (because GM[1] = w^rev(1) = w^(N/2) = i). | |||
* In our chosen representation, this is a no-op: everything is | |||
* already where it should be. | |||
*/ | |||
/* | |||
* Subsequent iterations are truncated to use only the first | |||
* half of values. | |||
*/ | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
t = hn; | |||
for (u = 1, m = 2; u < logn; u ++, m <<= 1) { | |||
size_t ht, hm, i1, j1; | |||
ht = t >> 1; | |||
hm = m >> 1; | |||
for (i1 = 0, j1 = 0; i1 < hm; i1 ++, j1 += t) { | |||
size_t j, j2; | |||
j2 = j1 + ht; | |||
fpr s_re, s_im; | |||
s_re = fpr_gm_tab[((m + i1) << 1) + 0]; | |||
s_im = fpr_gm_tab[((m + i1) << 1) + 1]; | |||
for (j = j1; j < j2; j ++) { | |||
fpr x_re, x_im, y_re, y_im; | |||
x_re = f[j]; | |||
x_im = f[j + hn]; | |||
y_re = f[j + ht]; | |||
y_im = f[j + ht + hn]; | |||
FPC_MUL(y_re, y_im, y_re, y_im, s_re, s_im); | |||
FPC_ADD(f[j], f[j + hn], | |||
x_re, x_im, y_re, y_im); | |||
FPC_SUB(f[j + ht], f[j + ht + hn], | |||
x_re, x_im, y_re, y_im); | |||
} | |||
} | |||
t = ht; | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_iFFT(fpr *f, unsigned logn) { | |||
/* | |||
* Inverse FFT algorithm in bit-reversal order uses the following | |||
* iterative algorithm: | |||
* | |||
* t = 1 | |||
* for m = N; m > 1; m /= 2: | |||
* hm = m/2 | |||
* dt = t*2 | |||
* for i1 = 0; i1 < hm; i1 ++: | |||
* j1 = i1 * dt | |||
* s = iGM[hm + i1] | |||
* for j = j1; j < (j1 + t); j ++: | |||
* x = f[j] | |||
* y = f[j + t] | |||
* f[j] = x + y | |||
* f[j + t] = s * (x - y) | |||
* t = dt | |||
* for i1 = 0; i1 < N; i1 ++: | |||
* f[i1] = f[i1] / N | |||
* | |||
* iGM[k] contains (1/w)^rev(k) for primitive root w = exp(i*pi/N) | |||
* (actually, iGM[k] = 1/GM[k] = conj(GM[k])). | |||
* | |||
* In the main loop (not counting the final division loop), in | |||
* all iterations except the last, the first and second half of f[] | |||
* (as an array of complex numbers) are separate. In our chosen | |||
* representation, we do not keep the second half. | |||
* | |||
* The last iteration recombines the recomputed half with the | |||
* implicit half, and should yield only real numbers since the | |||
* target polynomial is real; moreover, s = i at that step. | |||
* Thus, when considering x and y: | |||
* y = conj(x) since the final f[j] must be real | |||
* Therefore, f[j] is filled with 2*Re(x), and f[j + t] is | |||
* filled with 2*Im(x). | |||
* But we already have Re(x) and Im(x) in array slots j and j+t | |||
* in our chosen representation. That last iteration is thus a | |||
* simple doubling of the values in all the array. | |||
* | |||
* We make the last iteration a no-op by tweaking the final | |||
* division into a division by N/2, not N. | |||
*/ | |||
size_t u, n, hn, t, m; | |||
n = (size_t)1 << logn; | |||
t = 1; | |||
m = n; | |||
hn = n >> 1; | |||
for (u = logn; u > 1; u --) { | |||
size_t hm, dt, i1, j1; | |||
hm = m >> 1; | |||
dt = t << 1; | |||
for (i1 = 0, j1 = 0; j1 < hn; i1 ++, j1 += dt) { | |||
size_t j, j2; | |||
j2 = j1 + t; | |||
fpr s_re, s_im; | |||
s_re = fpr_gm_tab[((hm + i1) << 1) + 0]; | |||
s_im = fpr_neg(fpr_gm_tab[((hm + i1) << 1) + 1]); | |||
for (j = j1; j < j2; j ++) { | |||
fpr x_re, x_im, y_re, y_im; | |||
x_re = f[j]; | |||
x_im = f[j + hn]; | |||
y_re = f[j + t]; | |||
y_im = f[j + t + hn]; | |||
FPC_ADD(f[j], f[j + hn], | |||
x_re, x_im, y_re, y_im); | |||
FPC_SUB(x_re, x_im, x_re, x_im, y_re, y_im); | |||
FPC_MUL(f[j + t], f[j + t + hn], | |||
x_re, x_im, s_re, s_im); | |||
} | |||
} | |||
t = dt; | |||
m = hm; | |||
} | |||
/* | |||
* Last iteration is a no-op, provided that we divide by N/2 | |||
* instead of N. We need to make a special case for logn = 0. | |||
*/ | |||
if (logn > 0) { | |||
fpr ni; | |||
ni = fpr_p2_tab[logn]; | |||
for (u = 0; u < n; u ++) { | |||
f[u] = fpr_mul(f[u], ni); | |||
} | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_add( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, u; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
a[u] = fpr_add(a[u], b[u]); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_sub( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, u; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
a[u] = fpr_sub(a[u], b[u]); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_neg(fpr *a, unsigned logn) { | |||
size_t n, u; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
a[u] = fpr_neg(a[u]); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_adj_fft(fpr *a, unsigned logn) { | |||
size_t n, u; | |||
n = (size_t)1 << logn; | |||
for (u = (n >> 1); u < n; u ++) { | |||
a[u] = fpr_neg(a[u]); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_mul_fft( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr a_re, a_im, b_re, b_im; | |||
a_re = a[u]; | |||
a_im = a[u + hn]; | |||
b_re = b[u]; | |||
b_im = b[u + hn]; | |||
FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_muladj_fft( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr a_re, a_im, b_re, b_im; | |||
a_re = a[u]; | |||
a_im = a[u + hn]; | |||
b_re = b[u]; | |||
b_im = fpr_neg(b[u + hn]); | |||
FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(fpr *a, unsigned logn) { | |||
/* | |||
* Since each coefficient is multiplied with its own conjugate, | |||
* the result contains only real values. | |||
*/ | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr a_re, a_im; | |||
a_re = a[u]; | |||
a_im = a[u + hn]; | |||
a[u] = fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)); | |||
a[u + hn] = fpr_zero; | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_mulconst(fpr *a, fpr x, unsigned logn) { | |||
size_t n, u; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
a[u] = fpr_mul(a[u], x); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_div_fft( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr a_re, a_im, b_re, b_im; | |||
a_re = a[u]; | |||
a_im = a[u + hn]; | |||
b_re = b[u]; | |||
b_im = b[u + hn]; | |||
FPC_DIV(a[u], a[u + hn], a_re, a_im, b_re, b_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_invnorm2_fft(fpr *d, | |||
const fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr a_re, a_im; | |||
fpr b_re, b_im; | |||
a_re = a[u]; | |||
a_im = a[u + hn]; | |||
b_re = b[u]; | |||
b_im = b[u + hn]; | |||
d[u] = fpr_inv(fpr_add( | |||
fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)), | |||
fpr_add(fpr_sqr(b_re), fpr_sqr(b_im)))); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_add_muladj_fft(fpr *d, | |||
const fpr *F, const fpr *G, | |||
const fpr *f, const fpr *g, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr F_re, F_im, G_re, G_im; | |||
fpr f_re, f_im, g_re, g_im; | |||
fpr a_re, a_im, b_re, b_im; | |||
F_re = F[u]; | |||
F_im = F[u + hn]; | |||
G_re = G[u]; | |||
G_im = G[u + hn]; | |||
f_re = f[u]; | |||
f_im = f[u + hn]; | |||
g_re = g[u]; | |||
g_im = g[u + hn]; | |||
FPC_MUL(a_re, a_im, F_re, F_im, f_re, fpr_neg(f_im)); | |||
FPC_MUL(b_re, b_im, G_re, G_im, g_re, fpr_neg(g_im)); | |||
d[u] = fpr_add(a_re, b_re); | |||
d[u + hn] = fpr_add(a_im, b_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_mul_autoadj_fft( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
a[u] = fpr_mul(a[u], b[u]); | |||
a[u + hn] = fpr_mul(a[u + hn], b[u]); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_div_autoadj_fft( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr ib; | |||
ib = fpr_inv(b[u]); | |||
a[u] = fpr_mul(a[u], ib); | |||
a[u + hn] = fpr_mul(a[u + hn], ib); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_LDL_fft( | |||
const fpr *g00, | |||
fpr *g01, fpr *g11, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; | |||
fpr mu_re, mu_im; | |||
g00_re = g00[u]; | |||
g00_im = g00[u + hn]; | |||
g01_re = g01[u]; | |||
g01_im = g01[u + hn]; | |||
g11_re = g11[u]; | |||
g11_im = g11[u + hn]; | |||
FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); | |||
FPC_MUL(g01_re, g01_im, mu_re, mu_im, g01_re, fpr_neg(g01_im)); | |||
FPC_SUB(g11[u], g11[u + hn], g11_re, g11_im, g01_re, g01_im); | |||
g01[u] = mu_re; | |||
g01[u + hn] = fpr_neg(mu_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_LDLmv_fft( | |||
fpr *d11, fpr *l10, | |||
const fpr *g00, const fpr *g01, | |||
const fpr *g11, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; | |||
fpr mu_re, mu_im; | |||
g00_re = g00[u]; | |||
g00_im = g00[u + hn]; | |||
g01_re = g01[u]; | |||
g01_im = g01[u + hn]; | |||
g11_re = g11[u]; | |||
g11_im = g11[u + hn]; | |||
FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); | |||
FPC_MUL(g01_re, g01_im, mu_re, mu_im, g01_re, fpr_neg(g01_im)); | |||
FPC_SUB(d11[u], d11[u + hn], g11_re, g11_im, g01_re, g01_im); | |||
l10[u] = mu_re; | |||
l10[u + hn] = fpr_neg(mu_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_split_fft( | |||
fpr *f0, fpr *f1, | |||
const fpr *f, unsigned logn) { | |||
/* | |||
* The FFT representation we use is in bit-reversed order | |||
* (element i contains f(w^(rev(i))), where rev() is the | |||
* bit-reversal function over the ring degree. This changes | |||
* indexes with regards to the Falcon specification. | |||
*/ | |||
size_t n, hn, qn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
qn = hn >> 1; | |||
/* | |||
* We process complex values by pairs. For logn = 1, there is only | |||
* one complex value (the other one is the implicit conjugate), | |||
* so we add the two lines below because the loop will be | |||
* skipped. | |||
*/ | |||
f0[0] = f[0]; | |||
f1[0] = f[hn]; | |||
for (u = 0; u < qn; u ++) { | |||
fpr a_re, a_im, b_re, b_im; | |||
fpr t_re, t_im; | |||
a_re = f[(u << 1) + 0]; | |||
a_im = f[(u << 1) + 0 + hn]; | |||
b_re = f[(u << 1) + 1]; | |||
b_im = f[(u << 1) + 1 + hn]; | |||
FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); | |||
f0[u] = fpr_half(t_re); | |||
f0[u + qn] = fpr_half(t_im); | |||
FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); | |||
FPC_MUL(t_re, t_im, t_re, t_im, | |||
fpr_gm_tab[((u + hn) << 1) + 0], | |||
fpr_neg(fpr_gm_tab[((u + hn) << 1) + 1])); | |||
f1[u] = fpr_half(t_re); | |||
f1[u + qn] = fpr_half(t_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_poly_merge_fft( | |||
fpr *f, | |||
const fpr *f0, const fpr *f1, unsigned logn) { | |||
size_t n, hn, qn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
qn = hn >> 1; | |||
/* | |||
* An extra copy to handle the special case logn = 1. | |||
*/ | |||
f[0] = f0[0]; | |||
f[hn] = f1[0]; | |||
for (u = 0; u < qn; u ++) { | |||
fpr a_re, a_im, b_re, b_im; | |||
fpr t_re, t_im; | |||
a_re = f0[u]; | |||
a_im = f0[u + qn]; | |||
FPC_MUL(b_re, b_im, f1[u], f1[u + qn], | |||
fpr_gm_tab[((u + hn) << 1) + 0], | |||
fpr_gm_tab[((u + hn) << 1) + 1]); | |||
FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); | |||
f[(u << 1) + 0] = t_re; | |||
f[(u << 1) + 0 + hn] = t_im; | |||
FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); | |||
f[(u << 1) + 1] = t_re; | |||
f[(u << 1) + 1 + hn] = t_im; | |||
} | |||
} |
@@ -0,0 +1,456 @@ | |||
/* | |||
* Floating-point operations. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
/* ====================================================================== */ | |||
/* | |||
* Custom floating-point implementation with integer arithmetics. We | |||
* use IEEE-754 "binary64" format, with some simplifications: | |||
* | |||
* - Top bit is s = 1 for negative, 0 for positive. | |||
* | |||
* - Exponent e uses the next 11 bits (bits 52 to 62, inclusive). | |||
* | |||
* - Mantissa m uses the 52 low bits. | |||
* | |||
* Encoded value is, in general: (-1)^s * 2^(e-1023) * (1 + m*2^(-52)) | |||
* i.e. the mantissa really is a 53-bit number (less than 2.0, but not | |||
* less than 1.0), but the top bit (equal to 1 by definition) is omitted | |||
* in the encoding. | |||
* | |||
* In IEEE-754, there are some special values: | |||
* | |||
* - If e = 2047, then the value is either an infinite (m = 0) or | |||
* a NaN (m != 0). | |||
* | |||
* - If e = 0, then the value is either a zero (m = 0) or a subnormal, | |||
* aka "denormalized number" (m != 0). | |||
* | |||
* Of these, we only need the zeros. The caller is responsible for not | |||
* providing operands that would lead to infinites, NaNs or subnormals. | |||
* If inputs are such that values go out of range, then indeterminate | |||
* values are returned (it would still be deterministic, but no specific | |||
* value may be relied upon). | |||
* | |||
* At the C level, the three parts are stored in a 64-bit unsigned | |||
* word. | |||
* | |||
* One may note that a property of the IEEE-754 format is that order | |||
* is preserved for positive values: if two positive floating-point | |||
* values x and y are such that x < y, then their respective encodings | |||
* as _signed_ 64-bit integers i64(x) and i64(y) will be such that | |||
* i64(x) < i64(y). For negative values, order is reversed: if x < 0, | |||
* y < 0, and x < y, then ia64(x) > ia64(y). | |||
* | |||
* IMPORTANT ASSUMPTIONS: | |||
* ====================== | |||
* | |||
* For proper computations, and constant-time behaviour, we assume the | |||
* following: | |||
* | |||
* - 32x32->64 multiplication (unsigned) has an execution time that | |||
* is independent of its operands. This is true of most modern | |||
* x86 and ARM cores. Notable exceptions are the ARM Cortex M0, M0+ | |||
* and M3 (in the M0 and M0+, this is done in software, so it depends | |||
* on that routine), and the PowerPC cores from the G3/G4 lines. | |||
* For more info, see: https://www.bearssl.org/ctmul.html | |||
* | |||
* - Left-shifts and right-shifts of 32-bit values have an execution | |||
* time which does not depend on the shifted value nor on the | |||
* shift count. An historical exception is the Pentium IV, but most | |||
* modern CPU have barrel shifters. Some small microcontrollers | |||
* might have varying-time shifts (not the ARM Cortex M*, though). | |||
* | |||
* - Right-shift of a signed negative value performs a sign extension. | |||
* As per the C standard, this operation returns an | |||
* implementation-defined result (this is NOT an "undefined | |||
* behaviour"). On most/all systems, an arithmetic shift is | |||
* performed, because this is what makes most sense. | |||
*/ | |||
/* | |||
* Normally we should declare the 'fpr' type to be a struct or union | |||
* around the internal 64-bit value; however, we want to use the | |||
* direct 64-bit integer type to enable a lighter call convention on | |||
* ARM platforms. This means that direct (invalid) use of operators | |||
* such as '*' or '+' will not be caught by the compiler. We rely on | |||
* the "normal" (non-emulated) code to detect such instances. | |||
*/ | |||
typedef uint64_t fpr; | |||
/* | |||
* For computations, we split values into an integral mantissa in the | |||
* 2^54..2^55 range, and an (adjusted) exponent. The lowest bit is | |||
* "sticky" (it is set to 1 if any of the bits below it is 1); when | |||
* re-encoding, the low two bits are dropped, but may induce an | |||
* increment in the value for proper rounding. | |||
*/ | |||
/* | |||
* Right-shift a 64-bit unsigned value by a possibly secret shift count. | |||
* We assumed that the underlying architecture had a barrel shifter for | |||
* 32-bit shifts, but for 64-bit shifts on a 32-bit system, this will | |||
* typically invoke a software routine that is not necessarily | |||
* constant-time; hence the function below. | |||
* | |||
* Shift count n MUST be in the 0..63 range. | |||
*/ | |||
static inline uint64_t | |||
fpr_ursh(uint64_t x, int n) { | |||
x ^= (x ^ (x >> 32)) & -(uint64_t)(n >> 5); | |||
return x >> (n & 31); | |||
} | |||
/* | |||
* Right-shift a 64-bit signed value by a possibly secret shift count | |||
* (see fpr_ursh() for the rationale). | |||
* | |||
* Shift count n MUST be in the 0..63 range. | |||
*/ | |||
static inline int64_t | |||
fpr_irsh(int64_t x, int n) { | |||
x ^= (x ^ (x >> 32)) & -(int64_t)(n >> 5); | |||
return x >> (n & 31); | |||
} | |||
/* | |||
* Left-shift a 64-bit unsigned value by a possibly secret shift count | |||
* (see fpr_ursh() for the rationale). | |||
* | |||
* Shift count n MUST be in the 0..63 range. | |||
*/ | |||
static inline uint64_t | |||
fpr_ulsh(uint64_t x, int n) { | |||
x ^= (x ^ (x << 32)) & -(uint64_t)(n >> 5); | |||
return x << (n & 31); | |||
} | |||
/* | |||
* Expectations: | |||
* s = 0 or 1 | |||
* exponent e is "arbitrary" and unbiased | |||
* 2^54 <= m < 2^55 | |||
* Numerical value is (-1)^2 * m * 2^e | |||
* | |||
* Exponents which are too low lead to value zero. If the exponent is | |||
* too large, the returned value is indeterminate. | |||
* | |||
* If m = 0, then a zero is returned (using the provided sign). | |||
* If e < -1076, then a zero is returned (regardless of the value of m). | |||
* If e >= -1076 and e != 0, m must be within the expected range | |||
* (2^54 to 2^55-1). | |||
*/ | |||
static inline fpr | |||
FPR(int s, int e, uint64_t m) { | |||
fpr x; | |||
uint32_t t; | |||
unsigned f; | |||
/* | |||
* If e >= -1076, then the value is "normal"; otherwise, it | |||
* should be a subnormal, which we clamp down to zero. | |||
*/ | |||
e += 1076; | |||
t = (uint32_t)e >> 31; | |||
m &= (uint64_t)t - 1; | |||
/* | |||
* If m = 0 then we want a zero; make e = 0 too, but conserve | |||
* the sign. | |||
*/ | |||
t = (uint32_t)(m >> 54); | |||
e &= -(int)t; | |||
/* | |||
* The 52 mantissa bits come from m. Value m has its top bit set | |||
* (unless it is a zero); we leave it "as is": the top bit will | |||
* increment the exponent by 1, except when m = 0, which is | |||
* exactly what we want. | |||
*/ | |||
x = (((uint64_t)s << 63) | (m >> 2)) + ((uint64_t)(uint32_t)e << 52); | |||
/* | |||
* Rounding: if the low three bits of m are 011, 110 or 111, | |||
* then the value should be incremented to get the next | |||
* representable value. This implements the usual | |||
* round-to-nearest rule (with preference to even values in case | |||
* of a tie). Note that the increment may make a carry spill | |||
* into the exponent field, which is again exactly what we want | |||
* in that case. | |||
*/ | |||
f = (unsigned)m & 7U; | |||
x += (0xC8U >> f) & 1; | |||
return x; | |||
} | |||
#define fpr_scaled PQCLEAN_FALCON1024_CLEAN_fpr_scaled | |||
fpr fpr_scaled(int64_t i, int sc); | |||
static inline fpr | |||
fpr_of(int64_t i) { | |||
return fpr_scaled(i, 0); | |||
} | |||
static const fpr fpr_q = 4667981563525332992; | |||
static const fpr fpr_inverse_of_q = 4545632735260551042; | |||
static const fpr fpr_inv_2sqrsigma0 = 4594603506513722306; | |||
static const fpr fpr_inv_sigma = 4573359825155195350; | |||
static const fpr fpr_sigma_min_9 = 4608495221497168882; | |||
static const fpr fpr_sigma_min_10 = 4608586345619182117; | |||
static const fpr fpr_log2 = 4604418534313441775; | |||
static const fpr fpr_inv_log2 = 4609176140021203710; | |||
static const fpr fpr_bnorm_max = 4670353323383631276; | |||
static const fpr fpr_zero = 0; | |||
static const fpr fpr_one = 4607182418800017408; | |||
static const fpr fpr_two = 4611686018427387904; | |||
static const fpr fpr_onehalf = 4602678819172646912; | |||
static const fpr fpr_ptwo31 = 4746794007248502784; | |||
static const fpr fpr_ptwo31m1 = 4746794007244308480; | |||
static const fpr fpr_mtwo31m1 = 13970166044099084288U; | |||
static const fpr fpr_ptwo63m1 = 4890909195324358656; | |||
static const fpr fpr_mtwo63m1 = 14114281232179134464U; | |||
static const fpr fpr_ptwo63 = 4890909195324358656; | |||
static inline int64_t | |||
fpr_rint(fpr x) { | |||
uint64_t m, d; | |||
int e; | |||
uint32_t s, dd, f; | |||
/* | |||
* We assume that the value fits in -(2^63-1)..+(2^63-1). We can | |||
* thus extract the mantissa as a 63-bit integer, then right-shift | |||
* it as needed. | |||
*/ | |||
m = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); | |||
e = 1085 - ((int)(x >> 52) & 0x7FF); | |||
/* | |||
* If a shift of more than 63 bits is needed, then simply set m | |||
* to zero. This also covers the case of an input operand equal | |||
* to zero. | |||
*/ | |||
m &= -(uint64_t)((uint32_t)(e - 64) >> 31); | |||
e &= 63; | |||
/* | |||
* Right-shift m as needed. Shift count is e. Proper rounding | |||
* mandates that: | |||
* - If the highest dropped bit is zero, then round low. | |||
* - If the highest dropped bit is one, and at least one of the | |||
* other dropped bits is one, then round up. | |||
* - If the highest dropped bit is one, and all other dropped | |||
* bits are zero, then round up if the lowest kept bit is 1, | |||
* or low otherwise (i.e. ties are broken by "rounding to even"). | |||
* | |||
* We thus first extract a word consisting of all the dropped bit | |||
* AND the lowest kept bit; then we shrink it down to three bits, | |||
* the lowest being "sticky". | |||
*/ | |||
d = fpr_ulsh(m, 63 - e); | |||
dd = (uint32_t)d | ((uint32_t)(d >> 32) & 0x1FFFFFFF); | |||
f = (uint32_t)(d >> 61) | ((dd | -dd) >> 31); | |||
m = fpr_ursh(m, e) + (uint64_t)((0xC8U >> f) & 1U); | |||
/* | |||
* Apply the sign bit. | |||
*/ | |||
s = (uint32_t)(x >> 63); | |||
return ((int64_t)m ^ -(int64_t)s) + (int64_t)s; | |||
} | |||
static inline int64_t | |||
fpr_floor(fpr x) { | |||
uint64_t t; | |||
int64_t xi; | |||
int e, cc; | |||
/* | |||
* We extract the integer as a _signed_ 64-bit integer with | |||
* a scaling factor. Since we assume that the value fits | |||
* in the -(2^63-1)..+(2^63-1) range, we can left-shift the | |||
* absolute value to make it in the 2^62..2^63-1 range: we | |||
* will only need a right-shift afterwards. | |||
*/ | |||
e = (int)(x >> 52) & 0x7FF; | |||
t = x >> 63; | |||
xi = (int64_t)(((x << 10) | ((uint64_t)1 << 62)) | |||
& (((uint64_t)1 << 63) - 1)); | |||
xi = (xi ^ -(int64_t)t) + (int64_t)t; | |||
cc = 1085 - e; | |||
/* | |||
* We perform an arithmetic right-shift on the value. This | |||
* applies floor() semantics on both positive and negative values | |||
* (rounding toward minus infinity). | |||
*/ | |||
xi = fpr_irsh(xi, cc & 63); | |||
/* | |||
* If the true shift count was 64 or more, then we should instead | |||
* replace xi with 0 (if nonnegative) or -1 (if negative). Edge | |||
* case: -0 will be floored to -1, not 0 (whether this is correct | |||
* is debatable; in any case, the other functions normalize zero | |||
* to +0). | |||
* | |||
* For an input of zero, the non-shifted xi was incorrect (we used | |||
* a top implicit bit of value 1, not 0), but this does not matter | |||
* since this operation will clamp it down. | |||
*/ | |||
xi ^= (xi ^ -(int64_t)t) & -(int64_t)((uint32_t)(63 - cc) >> 31); | |||
return xi; | |||
} | |||
static inline int64_t | |||
fpr_trunc(fpr x) { | |||
uint64_t t, xu; | |||
int e, cc; | |||
/* | |||
* Extract the absolute value. Since we assume that the value | |||
* fits in the -(2^63-1)..+(2^63-1) range, we can left-shift | |||
* the absolute value into the 2^62..2^63-1 range, and then | |||
* do a right shift afterwards. | |||
*/ | |||
e = (int)(x >> 52) & 0x7FF; | |||
xu = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); | |||
cc = 1085 - e; | |||
xu = fpr_ursh(xu, cc & 63); | |||
/* | |||
* If the exponent is too low (cc > 63), then the shift was wrong | |||
* and we must clamp the value to 0. This also covers the case | |||
* of an input equal to zero. | |||
*/ | |||
xu &= -(uint64_t)((uint32_t)(cc - 64) >> 31); | |||
/* | |||
* Apply back the sign, if the source value is negative. | |||
*/ | |||
t = x >> 63; | |||
xu = (xu ^ -t) + t; | |||
return *(int64_t *)&xu; | |||
} | |||
#define fpr_add PQCLEAN_FALCON1024_CLEAN_fpr_add | |||
fpr fpr_add(fpr x, fpr y); | |||
static inline fpr | |||
fpr_sub(fpr x, fpr y) { | |||
y ^= (uint64_t)1 << 63; | |||
return fpr_add(x, y); | |||
} | |||
static inline fpr | |||
fpr_neg(fpr x) { | |||
x ^= (uint64_t)1 << 63; | |||
return x; | |||
} | |||
static inline fpr | |||
fpr_half(fpr x) { | |||
/* | |||
* To divide a value by 2, we just have to subtract 1 from its | |||
* exponent, but we have to take care of zero. | |||
*/ | |||
uint32_t t; | |||
x -= (uint64_t)1 << 52; | |||
t = (((uint32_t)(x >> 52) & 0x7FF) + 1) >> 11; | |||
x &= (uint64_t)t - 1; | |||
return x; | |||
} | |||
static inline fpr | |||
fpr_double(fpr x) { | |||
/* | |||
* To double a value, we just increment by one the exponent. We | |||
* don't care about infinites or NaNs; however, 0 is a | |||
* special case. | |||
*/ | |||
x += (uint64_t)((((unsigned)(x >> 52) & 0x7FFU) + 0x7FFU) >> 11) << 52; | |||
return x; | |||
} | |||
#define fpr_mul PQCLEAN_FALCON1024_CLEAN_fpr_mul | |||
fpr fpr_mul(fpr x, fpr y); | |||
static inline fpr | |||
fpr_sqr(fpr x) { | |||
return fpr_mul(x, x); | |||
} | |||
#define fpr_div PQCLEAN_FALCON1024_CLEAN_fpr_div | |||
fpr fpr_div(fpr x, fpr y); | |||
static inline fpr | |||
fpr_inv(fpr x) { | |||
return fpr_div(4607182418800017408u, x); | |||
} | |||
#define fpr_sqrt PQCLEAN_FALCON1024_CLEAN_fpr_sqrt | |||
fpr fpr_sqrt(fpr x); | |||
static inline int | |||
fpr_lt(fpr x, fpr y) { | |||
/* | |||
* If x >= 0 or y >= 0, a signed comparison yields the proper | |||
* result: | |||
* - For positive values, the order is preserved. | |||
* - The sign bit is at the same place as in integers, so | |||
* sign is preserved. | |||
* | |||
* If both x and y are negative, then the order is reversed. | |||
* We cannot simply invert the comparison result in that case | |||
* because it would not handle the edge case x = y properly. | |||
*/ | |||
int cc0, cc1; | |||
cc0 = *(int64_t *)&x < *(int64_t *)&y; | |||
cc1 = *(int64_t *)&x > *(int64_t *)&y; | |||
return cc0 ^ ((cc0 ^ cc1) & (int)((x & y) >> 63)); | |||
} | |||
/* | |||
* Compute exp(x) for x such that |x| <= ln 2. We want a precision of 50 | |||
* bits or so. | |||
*/ | |||
#define fpr_expm_p63 PQCLEAN_FALCON1024_CLEAN_fpr_expm_p63 | |||
uint64_t fpr_expm_p63(fpr x); | |||
#define fpr_gm_tab PQCLEAN_FALCON1024_CLEAN_fpr_gm_tab | |||
extern const fpr fpr_gm_tab[]; | |||
#define fpr_p2_tab PQCLEAN_FALCON1024_CLEAN_fpr_p2_tab | |||
extern const fpr fpr_p2_tab[]; | |||
/* ====================================================================== */ | |||
@@ -0,0 +1,663 @@ | |||
#ifndef FALCON_INNER_H__ | |||
#define FALCON_INNER_H__ | |||
/* | |||
* Internal functions for Falcon. This is not the API intended to be | |||
* used by applications; instead, this internal API provides all the | |||
* primitives on which wrappers build to provide external APIs. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
#include <stdint.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
/* ==================================================================== */ | |||
/* | |||
* SHAKE256 implementation (shake.c). | |||
* | |||
* API is defined to be easily replaced with the fips202.h API defined | |||
* as part of PQ Clean. | |||
*/ | |||
#include "fips202.h" | |||
#define shake256_context shake256incctx | |||
#define shake256_init(sc) shake256_inc_init(sc) | |||
#define shake256_inject(sc, in, len) shake256_inc_absorb(sc, in, len) | |||
#define shake256_flip(sc) shake256_inc_finalize(sc) | |||
#define shake256_extract(sc, out, len) shake256_inc_squeeze(out, len, sc) | |||
/* ==================================================================== */ | |||
/* | |||
* Encoding/decoding functions (codec.c). | |||
* | |||
* Encoding functions take as parameters an output buffer (out) with | |||
* a given maximum length (max_out_len); returned value is the actual | |||
* number of bytes which have been written. If the output buffer is | |||
* not large enough, then 0 is returned (some bytes may have been | |||
* written to the buffer). If 'out' is NULL, then 'max_out_len' is | |||
* ignored; instead, the function computes and returns the actual | |||
* required output length (in bytes). | |||
* | |||
* Decoding functions take as parameters an input buffer (in) with | |||
* its maximum length (max_in_len); returned value is the actual number | |||
* of bytes that have been read from the buffer. If the provided length | |||
* is too short, then 0 is returned. | |||
* | |||
* Values to encode or decode are vectors of integers, with N = 2^logn | |||
* elements. | |||
* | |||
* Three encoding formats are defined: | |||
* | |||
* - modq: sequence of values modulo 12289, each encoded over exactly | |||
* 14 bits. The encoder and decoder verify that integers are within | |||
* the valid range (0..12288). Values are arrays of uint16. | |||
* | |||
* - trim: sequence of signed integers, a specified number of bits | |||
* each. The number of bits is provided as parameter and includes | |||
* the sign bit. Each integer x must be such that |x| < 2^(bits-1) | |||
* (which means that the -2^(bits-1) value is forbidden); encode and | |||
* decode functions check that property. Values are arrays of | |||
* int16_t or int8_t, corresponding to names 'trim_i16' and | |||
* 'trim_i8', respectively. | |||
* | |||
* - comp: variable-length encoding for signed integers; each integer | |||
* uses a minimum of 9 bits, possibly more. This is normally used | |||
* only for signatures. | |||
* | |||
*/ | |||
size_t PQCLEAN_FALCON1024_CLEAN_modq_encode(void *out, size_t max_out_len, | |||
const uint16_t *x, unsigned logn); | |||
size_t PQCLEAN_FALCON1024_CLEAN_trim_i16_encode(void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn, unsigned bits); | |||
size_t PQCLEAN_FALCON1024_CLEAN_trim_i8_encode(void *out, size_t max_out_len, | |||
const int8_t *x, unsigned logn, unsigned bits); | |||
size_t PQCLEAN_FALCON1024_CLEAN_comp_encode(void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn); | |||
size_t PQCLEAN_FALCON1024_CLEAN_modq_decode(uint16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len); | |||
size_t PQCLEAN_FALCON1024_CLEAN_trim_i16_decode(int16_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len); | |||
size_t PQCLEAN_FALCON1024_CLEAN_trim_i8_decode(int8_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len); | |||
size_t PQCLEAN_FALCON1024_CLEAN_comp_decode(int16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len); | |||
/* | |||
* Number of bits for key elements, indexed by logn (1 to 10). This | |||
* is at most 8 bits for all degrees, but some degrees may have shorter | |||
* elements. | |||
*/ | |||
extern const uint8_t PQCLEAN_FALCON1024_CLEAN_max_fg_bits[]; | |||
extern const uint8_t PQCLEAN_FALCON1024_CLEAN_max_FG_bits[]; | |||
/* | |||
* Maximum size, in bits, of elements in a signature, indexed by logn | |||
* (1 to 10). The size includes the sign bit. | |||
*/ | |||
extern const uint8_t PQCLEAN_FALCON1024_CLEAN_max_sig_bits[]; | |||
/* ==================================================================== */ | |||
/* | |||
* Support functions used for both signature generation and signature | |||
* verification (common.c). | |||
*/ | |||
/* | |||
* From a SHAKE256 context (must be already flipped), produce a new | |||
* point. The temporary buffer (tmp) must have room for 2*2^logn bytes. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_hash_to_point(shake256_context *sc, | |||
uint16_t *x, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Tell whether a given vector (2N coordinates, in two halves) is | |||
* acceptable as a signature. This compares the appropriate norm of the | |||
* vector with the acceptance bound. Returned value is 1 on success | |||
* (vector is short enough to be acceptable), 0 otherwise. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_is_short(const int16_t *s1, const int16_t *s2, unsigned logn); | |||
/* | |||
* Tell whether a given vector (2N coordinates, in two halves) is | |||
* acceptable as a signature. Instead of the first half s1, this | |||
* function receives the "saturated squared norm" of s1, i.e. the | |||
* sum of the squares of the coordinates of s1 (saturated at 2^32-1 | |||
* if the sum exceeds 2^31-1). | |||
* | |||
* Returned value is 1 on success (vector is short enough to be | |||
* acceptable), 0 otherwise. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_is_short_half(uint32_t sqn, const int16_t *s2, unsigned logn); | |||
/* ==================================================================== */ | |||
/* | |||
* Signature verification functions (vrfy.c). | |||
*/ | |||
/* | |||
* Convert a public key to NTT + Montgomery format. Conversion is done | |||
* in place. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn); | |||
/* | |||
* Internal signature verification code: | |||
* c0[] contains the hashed nonce+message | |||
* s2[] is the decoded signature | |||
* h[] contains the public key, in NTT + Montgomery format | |||
* logn is the degree log | |||
* tmp[] temporary, must have at least 2*2^logn bytes | |||
* Returned value is 1 on success, 0 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2, | |||
const uint16_t *h, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Compute the public key h[], given the private key elements f[] and | |||
* g[]. This computes h = g/f mod phi mod q, where phi is the polynomial | |||
* modulus. This function returns 1 on success, 0 on error (an error is | |||
* reported if f is not invertible mod phi mod q). | |||
* | |||
* The tmp[] array must have room for at least 2*2^logn elements. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_compute_public(uint16_t *h, | |||
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Recompute the fourth private key element. Private key consists in | |||
* four polynomials with small coefficients f, g, F and G, which are | |||
* such that fG - gF = q mod phi; furthermore, f is invertible modulo | |||
* phi and modulo q. This function recomputes G from f, g and F. | |||
* | |||
* The tmp[] array must have room for at least 4*2^logn bytes. | |||
* | |||
* Returned value is 1 in success, 0 on error (f not invertible). | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_complete_private(int8_t *G, | |||
const int8_t *f, const int8_t *g, const int8_t *F, | |||
unsigned logn, uint8_t *tmp); | |||
/* ==================================================================== */ | |||
/* | |||
* Implementation of floating-point real numbers (fpr.h, fpr.c). | |||
*/ | |||
/* | |||
* Real numbers are implemented by an extra header file, included below. | |||
* This is meant to support pluggable implementations. The default | |||
* implementation relies on the C type 'double'. | |||
* | |||
* The included file must define the following types, functions and | |||
* constants: | |||
* | |||
* fpr | |||
* type for a real number | |||
* | |||
* fpr fpr_of(int64_t i) | |||
* cast an integer into a real number; source must be in the | |||
* -(2^63-1)..+(2^63-1) range | |||
* | |||
* fpr fpr_scaled(int64_t i, int sc) | |||
* compute i*2^sc as a real number; source 'i' must be in the | |||
* -(2^63-1)..+(2^63-1) range | |||
* | |||
* fpr fpr_ldexp(fpr x, int e) | |||
* compute x*2^e | |||
* | |||
* int64_t fpr_rint(fpr x) | |||
* round x to the nearest integer; x must be in the -(2^63-1) | |||
* to +(2^63-1) range | |||
* | |||
* int64_t fpr_trunc(fpr x) | |||
* round to an integer; this rounds towards zero; value must | |||
* be in the -(2^63-1) to +(2^63-1) range | |||
* | |||
* fpr fpr_add(fpr x, fpr y) | |||
* compute x + y | |||
* | |||
* fpr fpr_sub(fpr x, fpr y) | |||
* compute x - y | |||
* | |||
* fpr fpr_neg(fpr x) | |||
* compute -x | |||
* | |||
* fpr fpr_half(fpr x) | |||
* compute x/2 | |||
* | |||
* fpr fpr_double(fpr x) | |||
* compute x*2 | |||
* | |||
* fpr fpr_mul(fpr x, fpr y) | |||
* compute x * y | |||
* | |||
* fpr fpr_sqr(fpr x) | |||
* compute x * x | |||
* | |||
* fpr fpr_inv(fpr x) | |||
* compute 1/x | |||
* | |||
* fpr fpr_div(fpr x, fpr y) | |||
* compute x/y | |||
* | |||
* fpr fpr_sqrt(fpr x) | |||
* compute the square root of x | |||
* | |||
* int fpr_lt(fpr x, fpr y) | |||
* return 1 if x < y, 0 otherwise | |||
* | |||
* uint64_t fpr_expm_p63(fpr x) | |||
* return exp(x), assuming that 0 <= x < log(2). Returned value | |||
* is scaled to 63 bits (i.e. it really returns 2^63*exp(-x), | |||
* rounded to the nearest integer). Computation should have a | |||
* precision of at least 45 bits. | |||
* | |||
* const fpr fpr_gm_tab[] | |||
* array of constants for FFT / iFFT | |||
* | |||
* const fpr fpr_p2_tab[] | |||
* precomputed powers of 2 (by index, 0 to 10) | |||
* | |||
* Constants of type 'fpr': | |||
* | |||
* fpr fpr_q 12289 | |||
* fpr fpr_inverse_of_q 1/12289 | |||
* fpr fpr_inv_2sqrsigma0 1/(2*(1.8205^2)) | |||
* fpr fpr_inv_sigma 1/(1.55*sqrt(12289)) | |||
* fpr fpr_sigma_min_9 1.291500756233514568549480827642 | |||
* fpr fpr_sigma_min_10 1.311734375905083682667395805765 | |||
* fpr fpr_log2 log(2) | |||
* fpr fpr_inv_log2 1/log(2) | |||
* fpr fpr_bnorm_max 16822.4121 | |||
* fpr fpr_zero 0 | |||
* fpr fpr_one 1 | |||
* fpr fpr_two 2 | |||
* fpr fpr_onehalf 0.5 | |||
* fpr fpr_ptwo31 2^31 | |||
* fpr fpr_ptwo31m1 2^31-1 | |||
* fpr fpr_mtwo31m1 -(2^31-1) | |||
* fpr fpr_ptwo63m1 2^63-1 | |||
* fpr fpr_mtwo63m1 -(2^63-1) | |||
* fpr fpr_ptwo63 2^63 | |||
*/ | |||
#include "fpr.h" | |||
/* ==================================================================== */ | |||
/* | |||
* RNG (rng.c). | |||
* | |||
* A PRNG based on ChaCha20 is implemented; it is seeded from a SHAKE256 | |||
* context (flipped) and is used for bulk pseudorandom generation. | |||
* A system-dependent seed generator is also provided. | |||
*/ | |||
/* | |||
* Obtain a random seed from the system RNG. | |||
* | |||
* Returned value is 1 on success, 0 on error. | |||
*/ | |||
int PQCLEAN_FALCON1024_CLEAN_get_seed(void *seed, size_t seed_len); | |||
/* | |||
* Structure for a PRNG. This includes a large buffer so that values | |||
* get generated in advance. The 'state' is used to keep the current | |||
* PRNG algorithm state (contents depend on the selected algorithm). | |||
* | |||
* The unions with 'dummy_u64' are there to ensure proper alignment for | |||
* 64-bit direct access. | |||
*/ | |||
typedef struct { | |||
union { | |||
uint8_t d[512]; /* MUST be 512, exactly */ | |||
uint64_t dummy_u64; | |||
} buf; | |||
size_t ptr; | |||
union { | |||
uint8_t d[256]; | |||
uint64_t dummy_u64; | |||
} state; | |||
int type; | |||
} prng; | |||
/* | |||
* Instantiate a PRNG. That PRNG will feed over the provided SHAKE256 | |||
* context (in "flipped" state) to obtain its initial state. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_prng_init(prng *p, shake256_context *src); | |||
/* | |||
* Refill the PRNG buffer. This is normally invoked automatically, and | |||
* is declared here only so that prng_get_u64() may be inlined. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_prng_refill(prng *p); | |||
/* | |||
* Get some bytes from a PRNG. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len); | |||
/* | |||
* Get a 64-bit random value from a PRNG. | |||
*/ | |||
static inline uint64_t | |||
prng_get_u64(prng *p) { | |||
size_t u; | |||
/* | |||
* If there are less than 9 bytes in the buffer, we refill it. | |||
* This means that we may drop the last few bytes, but this allows | |||
* for faster extraction code. Also, it means that we never leave | |||
* an empty buffer. | |||
*/ | |||
u = p->ptr; | |||
if (u >= (sizeof p->buf.d) - 9) { | |||
PQCLEAN_FALCON1024_CLEAN_prng_refill(p); | |||
u = 0; | |||
} | |||
p->ptr = u + 8; | |||
/* | |||
* On systems that use little-endian encoding and allow | |||
* unaligned accesses, we can simply read the data where it is. | |||
*/ | |||
return (uint64_t)p->buf.d[u + 0] | |||
| ((uint64_t)p->buf.d[u + 1] << 8) | |||
| ((uint64_t)p->buf.d[u + 2] << 16) | |||
| ((uint64_t)p->buf.d[u + 3] << 24) | |||
| ((uint64_t)p->buf.d[u + 4] << 32) | |||
| ((uint64_t)p->buf.d[u + 5] << 40) | |||
| ((uint64_t)p->buf.d[u + 6] << 48) | |||
| ((uint64_t)p->buf.d[u + 7] << 56); | |||
} | |||
/* | |||
* Get an 8-bit random value from a PRNG. | |||
*/ | |||
static inline unsigned | |||
prng_get_u8(prng *p) { | |||
unsigned v; | |||
v = p->buf.d[p->ptr ++]; | |||
if (p->ptr == sizeof p->buf.d) { | |||
PQCLEAN_FALCON1024_CLEAN_prng_refill(p); | |||
} | |||
return v; | |||
} | |||
/* ==================================================================== */ | |||
/* | |||
* FFT (falcon-fft.c). | |||
* | |||
* A real polynomial is represented as an array of N 'fpr' elements. | |||
* The FFT representation of a real polynomial contains N/2 complex | |||
* elements; each is stored as two real numbers, for the real and | |||
* imaginary parts, respectively. See falcon-fft.c for details on the | |||
* internal representation. | |||
*/ | |||
/* | |||
* Compute FFT in-place: the source array should contain a real | |||
* polynomial (N coefficients); its storage area is reused to store | |||
* the FFT representation of that polynomial (N/2 complex numbers). | |||
* | |||
* 'logn' MUST lie between 1 and 10 (inclusive). | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_FFT(fpr *f, unsigned logn); | |||
/* | |||
* Compute the inverse FFT in-place: the source array should contain the | |||
* FFT representation of a real polynomial (N/2 elements); the resulting | |||
* real polynomial (N coefficients of type 'fpr') is written over the | |||
* array. | |||
* | |||
* 'logn' MUST lie between 1 and 10 (inclusive). | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_iFFT(fpr *f, unsigned logn); | |||
/* | |||
* Add polynomial b to polynomial a. a and b MUST NOT overlap. This | |||
* function works in both normal and FFT representations. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_add(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Subtract polynomial b from polynomial a. a and b MUST NOT overlap. This | |||
* function works in both normal and FFT representations. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_sub(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Negate polynomial a. This function works in both normal and FFT | |||
* representations. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_neg(fpr *a, unsigned logn); | |||
/* | |||
* Compute adjoint of polynomial a. This function works only in FFT | |||
* representation. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_adj_fft(fpr *a, unsigned logn); | |||
/* | |||
* Multiply polynomial a with polynomial b. a and b MUST NOT overlap. | |||
* This function works only in FFT representation. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_mul_fft(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Multiply polynomial a with the adjoint of polynomial b. a and b MUST NOT | |||
* overlap. This function works only in FFT representation. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_muladj_fft(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Multiply polynomial with its own adjoint. This function works only in FFT | |||
* representation. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_mulselfadj_fft(fpr *a, unsigned logn); | |||
/* | |||
* Multiply polynomial with a real constant. This function works in both | |||
* normal and FFT representations. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_mulconst(fpr *a, fpr x, unsigned logn); | |||
/* | |||
* Divide polynomial a by polynomial b, modulo X^N+1 (FFT representation). | |||
* a and b MUST NOT overlap. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_div_fft(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Given f and g (in FFT representation), compute 1/(f*adj(f)+g*adj(g)) | |||
* (also in FFT representation). Since the result is auto-adjoint, all its | |||
* coordinates in FFT representation are real; as such, only the first N/2 | |||
* values of d[] are filled (the imaginary parts are skipped). | |||
* | |||
* Array d MUST NOT overlap with either a or b. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_invnorm2_fft(fpr *d, | |||
const fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Given F, G, f and g (in FFT representation), compute F*adj(f)+G*adj(g) | |||
* (also in FFT representation). Destination d MUST NOT overlap with | |||
* any of the source arrays. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_add_muladj_fft(fpr *d, | |||
const fpr *F, const fpr *G, | |||
const fpr *f, const fpr *g, unsigned logn); | |||
/* | |||
* Multiply polynomial a by polynomial b, where b is autoadjoint. Both | |||
* a and b are in FFT representation. Since b is autoadjoint, all its | |||
* FFT coefficients are real, and the array b contains only N/2 elements. | |||
* a and b MUST NOT overlap. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_mul_autoadj_fft(fpr *a, | |||
const fpr *b, unsigned logn); | |||
/* | |||
* Divide polynomial a by polynomial b, where b is autoadjoint. Both | |||
* a and b are in FFT representation. Since b is autoadjoint, all its | |||
* FFT coefficients are real, and the array b contains only N/2 elements. | |||
* a and b MUST NOT overlap. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_div_autoadj_fft(fpr *a, | |||
const fpr *b, unsigned logn); | |||
/* | |||
* Perform an LDL decomposition of an auto-adjoint matrix G, in FFT | |||
* representation. On input, g00, g01 and g11 are provided (where the | |||
* matrix G = [[g00, g01], [adj(g01), g11]]). On output, the d00, l10 | |||
* and d11 values are written in g00, g01 and g11, respectively | |||
* (with D = [[d00, 0], [0, d11]] and L = [[1, 0], [l10, 1]]). | |||
* (In fact, d00 = g00, so the g00 operand is left unmodified.) | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_LDL_fft(const fpr *g00, | |||
fpr *g01, fpr *g11, unsigned logn); | |||
/* | |||
* Perform an LDL decomposition of an auto-adjoint matrix G, in FFT | |||
* representation. This is identical to poly_LDL_fft() except that | |||
* g00, g01 and g11 are unmodified; the outputs d11 and l10 are written | |||
* in two other separate buffers provided as extra parameters. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_LDLmv_fft(fpr *d11, fpr *l10, | |||
const fpr *g00, const fpr *g01, | |||
const fpr *g11, unsigned logn); | |||
/* | |||
* Apply "split" operation on a polynomial in FFT representation: | |||
* f = f0(x^2) + x*f1(x^2), for half-size polynomials f0 and f1 | |||
* (polynomials modulo X^(N/2)+1). f0, f1 and f MUST NOT overlap. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_split_fft(fpr *f0, fpr *f1, | |||
const fpr *f, unsigned logn); | |||
/* | |||
* Apply "merge" operation on two polynomials in FFT representation: | |||
* given f0 and f1, polynomials moduo X^(N/2)+1, this function computes | |||
* f = f0(x^2) + x*f1(x^2), in FFT representation modulo X^N+1. | |||
* f MUST NOT overlap with either f0 or f1. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_poly_merge_fft(fpr *f, | |||
const fpr *f0, const fpr *f1, unsigned logn); | |||
/* ==================================================================== */ | |||
/* | |||
* Key pair generation. | |||
*/ | |||
/* | |||
* Required sizes of the temporary buffer (in bytes). | |||
*/ | |||
#define FALCON_KEYGEN_TEMP_1 136 | |||
#define FALCON_KEYGEN_TEMP_2 272 | |||
#define FALCON_KEYGEN_TEMP_3 224 | |||
#define FALCON_KEYGEN_TEMP_4 448 | |||
#define FALCON_KEYGEN_TEMP_5 896 | |||
#define FALCON_KEYGEN_TEMP_6 1792 | |||
#define FALCON_KEYGEN_TEMP_7 3584 | |||
#define FALCON_KEYGEN_TEMP_8 7168 | |||
#define FALCON_KEYGEN_TEMP_9 14336 | |||
#define FALCON_KEYGEN_TEMP_10 28672 | |||
/* | |||
* Generate a new key pair. Randomness is extracted from the provided | |||
* SHAKE256 context, which must have already been seeded and flipped. | |||
* The tmp[] array must have suitable size (see FALCON_KEYGEN_TEMP_* | |||
* macros) and be aligned for the uint32_t, uint64_t and fpr types. | |||
* | |||
* The private key elements are written in f, g, F and G, and the | |||
* public key is written in h. Either or both of G and h may be NULL, | |||
* in which case the corresponding element is not returned (they can | |||
* be recomputed from f, g and F). | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_keygen(shake256_context *rng, | |||
int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h, | |||
unsigned logn, uint8_t *tmp); | |||
/* ==================================================================== */ | |||
/* | |||
* Signature generation. | |||
*/ | |||
/* | |||
* Expand a private key into the B0 matrix in FFT representation and | |||
* the LDL tree. All the values are written in 'expanded_key', for | |||
* a total of (8*logn+40)*2^logn bytes. | |||
* | |||
* The tmp[] array must have room for at least 48*2^logn bytes. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_expand_privkey(fpr *expanded_key, | |||
const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G, | |||
unsigned logn, uint8_t *tmp); | |||
/* | |||
* Compute a signature over the provided hashed message (hm); the | |||
* signature value is one short vector. This function uses an | |||
* expanded key (as generated by PQCLEAN_FALCON1024_CLEAN_expand_privkey()). | |||
* | |||
* The sig[] and hm[] buffers may overlap. | |||
* | |||
* The minimal size (in bytes) of tmp[] is 48*2^logn bytes. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_sign_tree(int16_t *sig, shake256_context *rng, | |||
const fpr *expanded_key, | |||
const uint16_t *hm, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Compute a signature over the provided hashed message (hm); the | |||
* signature value is one short vector. This function uses a raw | |||
* key and dynamically recompute the B0 matrix and LDL tree; this | |||
* saves RAM since there is no needed for an expanded key, but | |||
* increases the signature cost. | |||
* | |||
* The sig[] and hm[] buffers may overlap. | |||
* | |||
* The minimal size (in bytes) of tmp[] is 72*2^logn bytes. | |||
*/ | |||
void PQCLEAN_FALCON1024_CLEAN_sign_dyn(int16_t *sig, shake256_context *rng, | |||
const int8_t *f, const int8_t *g, | |||
const int8_t *F, const int8_t *G, | |||
const uint16_t *hm, unsigned logn, uint8_t *tmp); | |||
/* ==================================================================== */ | |||
#endif |
@@ -0,0 +1,381 @@ | |||
/* | |||
* Wrapper for implementing the PQClean API. | |||
*/ | |||
#include <stddef.h> | |||
#include <string.h> | |||
#include "api.h" | |||
#include "inner.h" | |||
#define NONCELEN 40 | |||
#include "randombytes.h" | |||
/* | |||
* Encoding formats (nnnn = log of degree, 9 for Falcon-512, 10 for Falcon-1024) | |||
* | |||
* private key: | |||
* header byte: 0101nnnn | |||
* private f (6 or 5 bits by element, depending on degree) | |||
* private g (6 or 5 bits by element, depending on degree) | |||
* private F (8 bits by element) | |||
* | |||
* public key: | |||
* header byte: 0000nnnn | |||
* public h (14 bits by element) | |||
* | |||
* signature: | |||
* header byte: 0011nnnn | |||
* nonce 40 bytes | |||
* value (12 bits by element) | |||
* | |||
* message + signature: | |||
* signature length (2 bytes, big-endian) | |||
* nonce 40 bytes | |||
* message | |||
* header byte: 0010nnnn | |||
* value (12 bits by element) | |||
* (signature length is 1+len(value), not counting the nonce) | |||
*/ | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_crypto_sign_keypair( | |||
uint8_t *pk, uint8_t *sk) { | |||
union { | |||
uint8_t b[FALCON_KEYGEN_TEMP_10]; | |||
uint64_t dummy_u64; | |||
fpr dummy_fpr; | |||
} tmp; | |||
int8_t f[1024], g[1024], F[1024]; | |||
uint16_t h[1024]; | |||
unsigned char seed[48]; | |||
shake256_context rng; | |||
size_t u, v; | |||
/* | |||
* Generate key pair. | |||
*/ | |||
randombytes(seed, sizeof seed); | |||
shake256_init(&rng); | |||
shake256_inject(&rng, seed, sizeof seed); | |||
shake256_flip(&rng); | |||
PQCLEAN_FALCON1024_CLEAN_keygen(&rng, f, g, F, NULL, h, 10, tmp.b); | |||
/* | |||
* Encode private key. | |||
*/ | |||
sk[0] = 0x50 + 10; | |||
u = 1; | |||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_encode( | |||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u, | |||
f, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10]); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_encode( | |||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u, | |||
g, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10]); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_encode( | |||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u, | |||
F, 10, PQCLEAN_FALCON1024_CLEAN_max_FG_bits[10]); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
if (u != PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES) { | |||
return -1; | |||
} | |||
/* | |||
* Encode public key. | |||
*/ | |||
pk[0] = 0x00 + 10; | |||
v = PQCLEAN_FALCON1024_CLEAN_modq_encode( | |||
pk + 1, PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1, | |||
h, 10); | |||
if (v != PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) { | |||
return -1; | |||
} | |||
return 0; | |||
} | |||
/* | |||
* Compute the signature. nonce[] receives the nonce and must have length | |||
* NONCELEN bytes. sigbuf[] receives the signature value (without nonce | |||
* or header byte), with *sigbuflen providing the maximum value length and | |||
* receiving the actual value length. | |||
* | |||
* If a signature could be computed but not encoded because it would | |||
* exceed the output buffer size, then a new signature is computed. If | |||
* the provided buffer size is too low, this could loop indefinitely, so | |||
* the caller must provide a size that can accommodate signatures with a | |||
* large enough probability. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
static int | |||
do_sign(uint8_t *nonce, uint8_t *sigbuf, size_t *sigbuflen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
union { | |||
uint8_t b[72 * 1024]; | |||
uint64_t dummy_u64; | |||
fpr dummy_fpr; | |||
} tmp; | |||
int8_t f[1024], g[1024], F[1024], G[1024]; | |||
union { | |||
int16_t sig[1024]; | |||
uint16_t hm[1024]; | |||
} r; | |||
unsigned char seed[48]; | |||
shake256_context sc; | |||
size_t u, v; | |||
/* | |||
* Decode the private key. | |||
*/ | |||
if (sk[0] != 0x50 + 10) { | |||
return -1; | |||
} | |||
u = 1; | |||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_decode( | |||
f, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10], | |||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_decode( | |||
g, 10, PQCLEAN_FALCON1024_CLEAN_max_fg_bits[10], | |||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
v = PQCLEAN_FALCON1024_CLEAN_trim_i8_decode( | |||
F, 10, PQCLEAN_FALCON1024_CLEAN_max_FG_bits[10], | |||
sk + u, PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES - u); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
if (u != PQCLEAN_FALCON1024_CLEAN_CRYPTO_SECRETKEYBYTES) { | |||
return -1; | |||
} | |||
if (!PQCLEAN_FALCON1024_CLEAN_complete_private(G, f, g, F, 10, tmp.b)) { | |||
return -1; | |||
} | |||
/* | |||
* Create a random nonce (40 bytes). | |||
*/ | |||
randombytes(nonce, NONCELEN); | |||
/* | |||
* Hash message nonce + message into a vector. | |||
*/ | |||
shake256_init(&sc); | |||
shake256_inject(&sc, nonce, NONCELEN); | |||
shake256_inject(&sc, m, mlen); | |||
shake256_flip(&sc); | |||
PQCLEAN_FALCON1024_CLEAN_hash_to_point(&sc, r.hm, 10, tmp.b); | |||
/* | |||
* Initialize a RNG. | |||
*/ | |||
randombytes(seed, sizeof seed); | |||
shake256_init(&sc); | |||
shake256_inject(&sc, seed, sizeof seed); | |||
shake256_flip(&sc); | |||
/* | |||
* Compute and return the signature. This loops until a signature | |||
* value is found that fits in the provided buffer. | |||
*/ | |||
for (;;) { | |||
PQCLEAN_FALCON1024_CLEAN_sign_dyn(r.sig, &sc, f, g, F, G, r.hm, 10, tmp.b); | |||
v = PQCLEAN_FALCON1024_CLEAN_comp_encode(sigbuf, *sigbuflen, r.sig, 10); | |||
if (v != 0) { | |||
*sigbuflen = v; | |||
return 0; | |||
} | |||
} | |||
} | |||
/* | |||
* Verify a sigature. The nonce has size NONCELEN bytes. sigbuf[] | |||
* (of size sigbuflen) contains the signature value, not including the | |||
* header byte or nonce. Return value is 0 on success, -1 on error. | |||
*/ | |||
static int | |||
do_verify( | |||
const uint8_t *nonce, const uint8_t *sigbuf, size_t sigbuflen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk) { | |||
union { | |||
uint8_t b[2 * 1024]; | |||
uint64_t dummy_u64; | |||
fpr dummy_fpr; | |||
} tmp; | |||
uint16_t h[1024], hm[1024]; | |||
int16_t sig[1024]; | |||
shake256_context sc; | |||
/* | |||
* Decode public key. | |||
*/ | |||
if (pk[0] != 0x00 + 10) { | |||
return -1; | |||
} | |||
if (PQCLEAN_FALCON1024_CLEAN_modq_decode(h, 10, | |||
pk + 1, PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) | |||
!= PQCLEAN_FALCON1024_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) { | |||
return -1; | |||
} | |||
PQCLEAN_FALCON1024_CLEAN_to_ntt_monty(h, 10); | |||
/* | |||
* Decode signature. | |||
*/ | |||
if (sigbuflen == 0) { | |||
return -1; | |||
} | |||
if (PQCLEAN_FALCON1024_CLEAN_comp_decode(sig, 10, sigbuf, sigbuflen) != sigbuflen) { | |||
return -1; | |||
} | |||
/* | |||
* Hash nonce + message into a vector. | |||
*/ | |||
shake256_init(&sc); | |||
shake256_inject(&sc, nonce, NONCELEN); | |||
shake256_inject(&sc, m, mlen); | |||
shake256_flip(&sc); | |||
PQCLEAN_FALCON1024_CLEAN_hash_to_point(&sc, hm, 10, tmp.b); | |||
/* | |||
* Verify signature. | |||
*/ | |||
if (!PQCLEAN_FALCON1024_CLEAN_verify_raw(hm, sig, h, 10, tmp.b)) { | |||
return -1; | |||
} | |||
return 0; | |||
} | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
/* | |||
* The PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES constant is used for | |||
* the signed message object (as produced by crypto_sign()) | |||
* and includes a two-byte length value, so we take care here | |||
* to only generate signatures that are two bytes shorter than | |||
* the maximum. This is done to ensure that crypto_sign() | |||
* and crypto_sign_signature() produce the exact same signature | |||
* value, if used on the same message, with the same private key, | |||
* and using the same output from randombytes() (this is for | |||
* reproducibility of tests). | |||
*/ | |||
size_t vlen; | |||
vlen = PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES - NONCELEN - 3; | |||
if (do_sign(sig + 1, sig + 1 + NONCELEN, &vlen, m, mlen, sk) < 0) { | |||
return -1; | |||
} | |||
sig[0] = 0x30 + 10; | |||
*siglen = 1 + NONCELEN + vlen; | |||
return 0; | |||
} | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk) { | |||
if (siglen < 1 + NONCELEN) { | |||
return -1; | |||
} | |||
if (sig[0] != 0x30 + 10) { | |||
return -1; | |||
} | |||
return do_verify(sig + 1, | |||
sig + 1 + NONCELEN, siglen - 1 - NONCELEN, m, mlen, pk); | |||
} | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
uint8_t *pm, *sigbuf; | |||
size_t sigbuflen; | |||
/* | |||
* Move the message to its final location; this is a memmove() so | |||
* it handles overlaps properly. | |||
*/ | |||
memmove(sm + 2 + NONCELEN, m, mlen); | |||
pm = sm + 2 + NONCELEN; | |||
sigbuf = pm + 1 + mlen; | |||
sigbuflen = PQCLEAN_FALCON1024_CLEAN_CRYPTO_BYTES - NONCELEN - 3; | |||
if (do_sign(sm + 2, sigbuf, &sigbuflen, pm, mlen, sk) < 0) { | |||
return -1; | |||
} | |||
pm[mlen] = 0x20 + 10; | |||
sigbuflen ++; | |||
sm[0] = (uint8_t)(sigbuflen >> 8); | |||
sm[1] = (uint8_t)sigbuflen; | |||
*smlen = mlen + 2 + NONCELEN + sigbuflen; | |||
return 0; | |||
} | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, const uint8_t *pk) { | |||
const uint8_t *sigbuf; | |||
size_t pmlen, sigbuflen; | |||
if (smlen < 3 + NONCELEN) { | |||
return -1; | |||
} | |||
sigbuflen = ((size_t)sm[0] << 8) | (size_t)sm[1]; | |||
if (sigbuflen < 2 || sigbuflen > (smlen - NONCELEN - 2)) { | |||
return -1; | |||
} | |||
sigbuflen --; | |||
pmlen = smlen - NONCELEN - 3 - sigbuflen; | |||
if (sm[2 + NONCELEN + pmlen] != 0x20 + 10) { | |||
return -1; | |||
} | |||
sigbuf = sm + 2 + NONCELEN + pmlen + 1; | |||
/* | |||
* The 2-byte length header and the one-byte signature header | |||
* have been verified. Nonce is at sm+2, followed by the message | |||
* itself. Message length is in pmlen. sigbuf/sigbuflen point to | |||
* the signature value (excluding the header byte). | |||
*/ | |||
if (do_verify(sm + 2, sigbuf, sigbuflen, | |||
sm + 2 + NONCELEN, pmlen, pk) < 0) { | |||
return -1; | |||
} | |||
/* | |||
* Signature is correct, we just have to copy/move the message | |||
* to its final destination. The memmove() properly handles | |||
* overlaps. | |||
*/ | |||
memmove(m, sm + 2 + NONCELEN, pmlen); | |||
*mlen = pmlen; | |||
return 0; | |||
} |
@@ -0,0 +1,187 @@ | |||
/* | |||
* PRNG and interface to the system RNG. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
#include <assert.h> | |||
#include "inner.h" | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_prng_init(prng *p, shake256_context *src) { | |||
/* | |||
* To ensure reproducibility for a given seed, we | |||
* must enforce little-endian interpretation of | |||
* the state words. | |||
*/ | |||
uint8_t tmp[56]; | |||
uint64_t th, tl; | |||
int i; | |||
shake256_extract(src, tmp, 56); | |||
for (i = 0; i < 14; i ++) { | |||
uint32_t w; | |||
w = (uint32_t)tmp[(i << 2) + 0] | |||
| ((uint32_t)tmp[(i << 2) + 1] << 8) | |||
| ((uint32_t)tmp[(i << 2) + 2] << 16) | |||
| ((uint32_t)tmp[(i << 2) + 3] << 24); | |||
*(uint32_t *)(p->state.d + (i << 2)) = w; | |||
} | |||
tl = *(uint32_t *)(p->state.d + 48); | |||
th = *(uint32_t *)(p->state.d + 52); | |||
*(uint64_t *)(p->state.d + 48) = tl + (th << 32); | |||
PQCLEAN_FALCON1024_CLEAN_prng_refill(p); | |||
} | |||
/* | |||
* PRNG based on ChaCha20. | |||
* | |||
* State consists in key (32 bytes) then IV (16 bytes) and block counter | |||
* (8 bytes). Normally, we should not care about local endianness (this | |||
* is for a PRNG), but for the NIST competition we need reproducible KAT | |||
* vectors that work across architectures, so we enforce little-endian | |||
* interpretation where applicable. Moreover, output words are "spread | |||
* out" over the output buffer with the interleaving pattern that is | |||
* naturally obtained from the AVX2 implementation that runs eight | |||
* ChaCha20 instances in parallel. | |||
* | |||
* The block counter is XORed into the first 8 bytes of the IV. | |||
*/ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_prng_refill(prng *p) { | |||
static const uint32_t CW[] = { | |||
0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 | |||
}; | |||
uint64_t cc; | |||
size_t u; | |||
/* | |||
* State uses local endianness. Only the output bytes must be | |||
* converted to little endian (if used on a big-endian machine). | |||
*/ | |||
cc = *(uint64_t *)(p->state.d + 48); | |||
for (u = 0; u < 8; u ++) { | |||
uint32_t state[16]; | |||
size_t v; | |||
int i; | |||
memcpy(&state[0], CW, sizeof CW); | |||
memcpy(&state[4], p->state.d, 48); | |||
state[14] ^= (uint32_t)cc; | |||
state[15] ^= (uint32_t)(cc >> 32); | |||
for (i = 0; i < 10; i ++) { | |||
#define QROUND(a, b, c, d) do { \ | |||
state[a] += state[b]; \ | |||
state[d] ^= state[a]; \ | |||
state[d] = (state[d] << 16) | (state[d] >> 16); \ | |||
state[c] += state[d]; \ | |||
state[b] ^= state[c]; \ | |||
state[b] = (state[b] << 12) | (state[b] >> 20); \ | |||
state[a] += state[b]; \ | |||
state[d] ^= state[a]; \ | |||
state[d] = (state[d] << 8) | (state[d] >> 24); \ | |||
state[c] += state[d]; \ | |||
state[b] ^= state[c]; \ | |||
state[b] = (state[b] << 7) | (state[b] >> 25); \ | |||
} while (0) | |||
QROUND( 0, 4, 8, 12); | |||
QROUND( 1, 5, 9, 13); | |||
QROUND( 2, 6, 10, 14); | |||
QROUND( 3, 7, 11, 15); | |||
QROUND( 0, 5, 10, 15); | |||
QROUND( 1, 6, 11, 12); | |||
QROUND( 2, 7, 8, 13); | |||
QROUND( 3, 4, 9, 14); | |||
#undef QROUND | |||
} | |||
for (v = 0; v < 4; v ++) { | |||
state[v] += CW[v]; | |||
} | |||
for (v = 4; v < 14; v ++) { | |||
state[v] += ((uint32_t *)p->state.d)[v - 4]; | |||
} | |||
state[14] += ((uint32_t *)p->state.d)[10] | |||
^ (uint32_t)cc; | |||
state[15] += ((uint32_t *)p->state.d)[11] | |||
^ (uint32_t)(cc >> 32); | |||
cc ++; | |||
/* | |||
* We mimic the interleaving that is used in the AVX2 | |||
* implementation. | |||
*/ | |||
for (v = 0; v < 16; v ++) { | |||
p->buf.d[(u << 2) + (v << 5) + 0] = | |||
(uint8_t)state[v]; | |||
p->buf.d[(u << 2) + (v << 5) + 1] = | |||
(uint8_t)(state[v] >> 8); | |||
p->buf.d[(u << 2) + (v << 5) + 2] = | |||
(uint8_t)(state[v] >> 16); | |||
p->buf.d[(u << 2) + (v << 5) + 3] = | |||
(uint8_t)(state[v] >> 24); | |||
} | |||
} | |||
*(uint64_t *)(p->state.d + 48) = cc; | |||
p->ptr = 0; | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len) { | |||
uint8_t *buf; | |||
buf = dst; | |||
while (len > 0) { | |||
size_t clen; | |||
clen = (sizeof p->buf.d) - p->ptr; | |||
if (clen > len) { | |||
clen = len; | |||
} | |||
memcpy(buf, p->buf.d, clen); | |||
buf += clen; | |||
len -= clen; | |||
p->ptr += clen; | |||
if (p->ptr == sizeof p->buf.d) { | |||
PQCLEAN_FALCON1024_CLEAN_prng_refill(p); | |||
} | |||
} | |||
} |
@@ -0,0 +1,745 @@ | |||
/* | |||
* Falcon signature verification. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
#include "inner.h" | |||
/* ===================================================================== */ | |||
/* | |||
* Constants for NTT. | |||
* | |||
* n = 2^logn (2 <= n <= 1024) | |||
* phi = X^n + 1 | |||
* q = 12289 | |||
* q0i = -1/q mod 2^16 | |||
* R = 2^16 mod q | |||
* R2 = 2^32 mod q | |||
*/ | |||
#define Q 12289 | |||
#define Q0I 12287 | |||
#define R 4091 | |||
#define R2 10952 | |||
/* | |||
* Table for NTT, binary case: | |||
* GMb[x] = R*(g^rev(x)) mod q | |||
* where g = 7 (it is a 2048-th primitive root of 1 modulo q) | |||
* and rev() is the bit-reversal function over 10 bits. | |||
*/ | |||
static const uint16_t GMb[] = { | |||
4091, 7888, 11060, 11208, 6960, 4342, 6275, 9759, | |||
1591, 6399, 9477, 5266, 586, 5825, 7538, 9710, | |||
1134, 6407, 1711, 965, 7099, 7674, 3743, 6442, | |||
10414, 8100, 1885, 1688, 1364, 10329, 10164, 9180, | |||
12210, 6240, 997, 117, 4783, 4407, 1549, 7072, | |||
2829, 6458, 4431, 8877, 7144, 2564, 5664, 4042, | |||
12189, 432, 10751, 1237, 7610, 1534, 3983, 7863, | |||
2181, 6308, 8720, 6570, 4843, 1690, 14, 3872, | |||
5569, 9368, 12163, 2019, 7543, 2315, 4673, 7340, | |||
1553, 1156, 8401, 11389, 1020, 2967, 10772, 7045, | |||
3316, 11236, 5285, 11578, 10637, 10086, 9493, 6180, | |||
9277, 6130, 3323, 883, 10469, 489, 1502, 2851, | |||
11061, 9729, 2742, 12241, 4970, 10481, 10078, 1195, | |||
730, 1762, 3854, 2030, 5892, 10922, 9020, 5274, | |||
9179, 3604, 3782, 10206, 3180, 3467, 4668, 2446, | |||
7613, 9386, 834, 7703, 6836, 3403, 5351, 12276, | |||
3580, 1739, 10820, 9787, 10209, 4070, 12250, 8525, | |||
10401, 2749, 7338, 10574, 6040, 943, 9330, 1477, | |||
6865, 9668, 3585, 6633, 12145, 4063, 3684, 7680, | |||
8188, 6902, 3533, 9807, 6090, 727, 10099, 7003, | |||
6945, 1949, 9731, 10559, 6057, 378, 7871, 8763, | |||
8901, 9229, 8846, 4551, 9589, 11664, 7630, 8821, | |||
5680, 4956, 6251, 8388, 10156, 8723, 2341, 3159, | |||
1467, 5460, 8553, 7783, 2649, 2320, 9036, 6188, | |||
737, 3698, 4699, 5753, 9046, 3687, 16, 914, | |||
5186, 10531, 4552, 1964, 3509, 8436, 7516, 5381, | |||
10733, 3281, 7037, 1060, 2895, 7156, 8887, 5357, | |||
6409, 8197, 2962, 6375, 5064, 6634, 5625, 278, | |||
932, 10229, 8927, 7642, 351, 9298, 237, 5858, | |||
7692, 3146, 12126, 7586, 2053, 11285, 3802, 5204, | |||
4602, 1748, 11300, 340, 3711, 4614, 300, 10993, | |||
5070, 10049, 11616, 12247, 7421, 10707, 5746, 5654, | |||
3835, 5553, 1224, 8476, 9237, 3845, 250, 11209, | |||
4225, 6326, 9680, 12254, 4136, 2778, 692, 8808, | |||
6410, 6718, 10105, 10418, 3759, 7356, 11361, 8433, | |||
6437, 3652, 6342, 8978, 5391, 2272, 6476, 7416, | |||
8418, 10824, 11986, 5733, 876, 7030, 2167, 2436, | |||
3442, 9217, 8206, 4858, 5964, 2746, 7178, 1434, | |||
7389, 8879, 10661, 11457, 4220, 1432, 10832, 4328, | |||
8557, 1867, 9454, 2416, 3816, 9076, 686, 5393, | |||
2523, 4339, 6115, 619, 937, 2834, 7775, 3279, | |||
2363, 7488, 6112, 5056, 824, 10204, 11690, 1113, | |||
2727, 9848, 896, 2028, 5075, 2654, 10464, 7884, | |||
12169, 5434, 3070, 6400, 9132, 11672, 12153, 4520, | |||
1273, 9739, 11468, 9937, 10039, 9720, 2262, 9399, | |||
11192, 315, 4511, 1158, 6061, 6751, 11865, 357, | |||
7367, 4550, 983, 8534, 8352, 10126, 7530, 9253, | |||
4367, 5221, 3999, 8777, 3161, 6990, 4130, 11652, | |||
3374, 11477, 1753, 292, 8681, 2806, 10378, 12188, | |||
5800, 11811, 3181, 1988, 1024, 9340, 2477, 10928, | |||
4582, 6750, 3619, 5503, 5233, 2463, 8470, 7650, | |||
7964, 6395, 1071, 1272, 3474, 11045, 3291, 11344, | |||
8502, 9478, 9837, 1253, 1857, 6233, 4720, 11561, | |||
6034, 9817, 3339, 1797, 2879, 6242, 5200, 2114, | |||
7962, 9353, 11363, 5475, 6084, 9601, 4108, 7323, | |||
10438, 9471, 1271, 408, 6911, 3079, 360, 8276, | |||
11535, 9156, 9049, 11539, 850, 8617, 784, 7919, | |||
8334, 12170, 1846, 10213, 12184, 7827, 11903, 5600, | |||
9779, 1012, 721, 2784, 6676, 6552, 5348, 4424, | |||
6816, 8405, 9959, 5150, 2356, 5552, 5267, 1333, | |||
8801, 9661, 7308, 5788, 4910, 909, 11613, 4395, | |||
8238, 6686, 4302, 3044, 2285, 12249, 1963, 9216, | |||
4296, 11918, 695, 4371, 9793, 4884, 2411, 10230, | |||
2650, 841, 3890, 10231, 7248, 8505, 11196, 6688, | |||
4059, 6060, 3686, 4722, 11853, 5816, 7058, 6868, | |||
11137, 7926, 4894, 12284, 4102, 3908, 3610, 6525, | |||
7938, 7982, 11977, 6755, 537, 4562, 1623, 8227, | |||
11453, 7544, 906, 11816, 9548, 10858, 9703, 2815, | |||
11736, 6813, 6979, 819, 8903, 6271, 10843, 348, | |||
7514, 8339, 6439, 694, 852, 5659, 2781, 3716, | |||
11589, 3024, 1523, 8659, 4114, 10738, 3303, 5885, | |||
2978, 7289, 11884, 9123, 9323, 11830, 98, 2526, | |||
2116, 4131, 11407, 1844, 3645, 3916, 8133, 2224, | |||
10871, 8092, 9651, 5989, 7140, 8480, 1670, 159, | |||
10923, 4918, 128, 7312, 725, 9157, 5006, 6393, | |||
3494, 6043, 10972, 6181, 11838, 3423, 10514, 7668, | |||
3693, 6658, 6905, 11953, 10212, 11922, 9101, 8365, | |||
5110, 45, 2400, 1921, 4377, 2720, 1695, 51, | |||
2808, 650, 1896, 9997, 9971, 11980, 8098, 4833, | |||
4135, 4257, 5838, 4765, 10985, 11532, 590, 12198, | |||
482, 12173, 2006, 7064, 10018, 3912, 12016, 10519, | |||
11362, 6954, 2210, 284, 5413, 6601, 3865, 10339, | |||
11188, 6231, 517, 9564, 11281, 3863, 1210, 4604, | |||
8160, 11447, 153, 7204, 5763, 5089, 9248, 12154, | |||
11748, 1354, 6672, 179, 5532, 2646, 5941, 12185, | |||
862, 3158, 477, 7279, 5678, 7914, 4254, 302, | |||
2893, 10114, 6890, 9560, 9647, 11905, 4098, 9824, | |||
10269, 1353, 10715, 5325, 6254, 3951, 1807, 6449, | |||
5159, 1308, 8315, 3404, 1877, 1231, 112, 6398, | |||
11724, 12272, 7286, 1459, 12274, 9896, 3456, 800, | |||
1397, 10678, 103, 7420, 7976, 936, 764, 632, | |||
7996, 8223, 8445, 7758, 10870, 9571, 2508, 1946, | |||
6524, 10158, 1044, 4338, 2457, 3641, 1659, 4139, | |||
4688, 9733, 11148, 3946, 2082, 5261, 2036, 11850, | |||
7636, 12236, 5366, 2380, 1399, 7720, 2100, 3217, | |||
10912, 8898, 7578, 11995, 2791, 1215, 3355, 2711, | |||
2267, 2004, 8568, 10176, 3214, 2337, 1750, 4729, | |||
4997, 7415, 6315, 12044, 4374, 7157, 4844, 211, | |||
8003, 10159, 9290, 11481, 1735, 2336, 5793, 9875, | |||
8192, 986, 7527, 1401, 870, 3615, 8465, 2756, | |||
9770, 2034, 10168, 3264, 6132, 54, 2880, 4763, | |||
11805, 3074, 8286, 9428, 4881, 6933, 1090, 10038, | |||
2567, 708, 893, 6465, 4962, 10024, 2090, 5718, | |||
10743, 780, 4733, 4623, 2134, 2087, 4802, 884, | |||
5372, 5795, 5938, 4333, 6559, 7549, 5269, 10664, | |||
4252, 3260, 5917, 10814, 5768, 9983, 8096, 7791, | |||
6800, 7491, 6272, 1907, 10947, 6289, 11803, 6032, | |||
11449, 1171, 9201, 7933, 2479, 7970, 11337, 7062, | |||
8911, 6728, 6542, 8114, 8828, 6595, 3545, 4348, | |||
4610, 2205, 6999, 8106, 5560, 10390, 9321, 2499, | |||
2413, 7272, 6881, 10582, 9308, 9437, 3554, 3326, | |||
5991, 11969, 3415, 12283, 9838, 12063, 4332, 7830, | |||
11329, 6605, 12271, 2044, 11611, 7353, 11201, 11582, | |||
3733, 8943, 9978, 1627, 7168, 3935, 5050, 2762, | |||
7496, 10383, 755, 1654, 12053, 4952, 10134, 4394, | |||
6592, 7898, 7497, 8904, 12029, 3581, 10748, 5674, | |||
10358, 4901, 7414, 8771, 710, 6764, 8462, 7193, | |||
5371, 7274, 11084, 290, 7864, 6827, 11822, 2509, | |||
6578, 4026, 5807, 1458, 5721, 5762, 4178, 2105, | |||
11621, 4852, 8897, 2856, 11510, 9264, 2520, 8776, | |||
7011, 2647, 1898, 7039, 5950, 11163, 5488, 6277, | |||
9182, 11456, 633, 10046, 11554, 5633, 9587, 2333, | |||
7008, 7084, 5047, 7199, 9865, 8997, 569, 6390, | |||
10845, 9679, 8268, 11472, 4203, 1997, 2, 9331, | |||
162, 6182, 2000, 3649, 9792, 6363, 7557, 6187, | |||
8510, 9935, 5536, 9019, 3706, 12009, 1452, 3067, | |||
5494, 9692, 4865, 6019, 7106, 9610, 4588, 10165, | |||
6261, 5887, 2652, 10172, 1580, 10379, 4638, 9949 | |||
}; | |||
/* | |||
* Table for inverse NTT, binary case: | |||
* iGMb[x] = R*((1/g)^rev(x)) mod q | |||
* Since g = 7, 1/g = 8778 mod 12289. | |||
*/ | |||
static const uint16_t iGMb[] = { | |||
4091, 4401, 1081, 1229, 2530, 6014, 7947, 5329, | |||
2579, 4751, 6464, 11703, 7023, 2812, 5890, 10698, | |||
3109, 2125, 1960, 10925, 10601, 10404, 4189, 1875, | |||
5847, 8546, 4615, 5190, 11324, 10578, 5882, 11155, | |||
8417, 12275, 10599, 7446, 5719, 3569, 5981, 10108, | |||
4426, 8306, 10755, 4679, 11052, 1538, 11857, 100, | |||
8247, 6625, 9725, 5145, 3412, 7858, 5831, 9460, | |||
5217, 10740, 7882, 7506, 12172, 11292, 6049, 79, | |||
13, 6938, 8886, 5453, 4586, 11455, 2903, 4676, | |||
9843, 7621, 8822, 9109, 2083, 8507, 8685, 3110, | |||
7015, 3269, 1367, 6397, 10259, 8435, 10527, 11559, | |||
11094, 2211, 1808, 7319, 48, 9547, 2560, 1228, | |||
9438, 10787, 11800, 1820, 11406, 8966, 6159, 3012, | |||
6109, 2796, 2203, 1652, 711, 7004, 1053, 8973, | |||
5244, 1517, 9322, 11269, 900, 3888, 11133, 10736, | |||
4949, 7616, 9974, 4746, 10270, 126, 2921, 6720, | |||
6635, 6543, 1582, 4868, 42, 673, 2240, 7219, | |||
1296, 11989, 7675, 8578, 11949, 989, 10541, 7687, | |||
7085, 8487, 1004, 10236, 4703, 163, 9143, 4597, | |||
6431, 12052, 2991, 11938, 4647, 3362, 2060, 11357, | |||
12011, 6664, 5655, 7225, 5914, 9327, 4092, 5880, | |||
6932, 3402, 5133, 9394, 11229, 5252, 9008, 1556, | |||
6908, 4773, 3853, 8780, 10325, 7737, 1758, 7103, | |||
11375, 12273, 8602, 3243, 6536, 7590, 8591, 11552, | |||
6101, 3253, 9969, 9640, 4506, 3736, 6829, 10822, | |||
9130, 9948, 3566, 2133, 3901, 6038, 7333, 6609, | |||
3468, 4659, 625, 2700, 7738, 3443, 3060, 3388, | |||
3526, 4418, 11911, 6232, 1730, 2558, 10340, 5344, | |||
5286, 2190, 11562, 6199, 2482, 8756, 5387, 4101, | |||
4609, 8605, 8226, 144, 5656, 8704, 2621, 5424, | |||
10812, 2959, 11346, 6249, 1715, 4951, 9540, 1888, | |||
3764, 39, 8219, 2080, 2502, 1469, 10550, 8709, | |||
5601, 1093, 3784, 5041, 2058, 8399, 11448, 9639, | |||
2059, 9878, 7405, 2496, 7918, 11594, 371, 7993, | |||
3073, 10326, 40, 10004, 9245, 7987, 5603, 4051, | |||
7894, 676, 11380, 7379, 6501, 4981, 2628, 3488, | |||
10956, 7022, 6737, 9933, 7139, 2330, 3884, 5473, | |||
7865, 6941, 5737, 5613, 9505, 11568, 11277, 2510, | |||
6689, 386, 4462, 105, 2076, 10443, 119, 3955, | |||
4370, 11505, 3672, 11439, 750, 3240, 3133, 754, | |||
4013, 11929, 9210, 5378, 11881, 11018, 2818, 1851, | |||
4966, 8181, 2688, 6205, 6814, 926, 2936, 4327, | |||
10175, 7089, 6047, 9410, 10492, 8950, 2472, 6255, | |||
728, 7569, 6056, 10432, 11036, 2452, 2811, 3787, | |||
945, 8998, 1244, 8815, 11017, 11218, 5894, 4325, | |||
4639, 3819, 9826, 7056, 6786, 8670, 5539, 7707, | |||
1361, 9812, 2949, 11265, 10301, 9108, 478, 6489, | |||
101, 1911, 9483, 3608, 11997, 10536, 812, 8915, | |||
637, 8159, 5299, 9128, 3512, 8290, 7068, 7922, | |||
3036, 4759, 2163, 3937, 3755, 11306, 7739, 4922, | |||
11932, 424, 5538, 6228, 11131, 7778, 11974, 1097, | |||
2890, 10027, 2569, 2250, 2352, 821, 2550, 11016, | |||
7769, 136, 617, 3157, 5889, 9219, 6855, 120, | |||
4405, 1825, 9635, 7214, 10261, 11393, 2441, 9562, | |||
11176, 599, 2085, 11465, 7233, 6177, 4801, 9926, | |||
9010, 4514, 9455, 11352, 11670, 6174, 7950, 9766, | |||
6896, 11603, 3213, 8473, 9873, 2835, 10422, 3732, | |||
7961, 1457, 10857, 8069, 832, 1628, 3410, 4900, | |||
10855, 5111, 9543, 6325, 7431, 4083, 3072, 8847, | |||
9853, 10122, 5259, 11413, 6556, 303, 1465, 3871, | |||
4873, 5813, 10017, 6898, 3311, 5947, 8637, 5852, | |||
3856, 928, 4933, 8530, 1871, 2184, 5571, 5879, | |||
3481, 11597, 9511, 8153, 35, 2609, 5963, 8064, | |||
1080, 12039, 8444, 3052, 3813, 11065, 6736, 8454, | |||
2340, 7651, 1910, 10709, 2117, 9637, 6402, 6028, | |||
2124, 7701, 2679, 5183, 6270, 7424, 2597, 6795, | |||
9222, 10837, 280, 8583, 3270, 6753, 2354, 3779, | |||
6102, 4732, 5926, 2497, 8640, 10289, 6107, 12127, | |||
2958, 12287, 10292, 8086, 817, 4021, 2610, 1444, | |||
5899, 11720, 3292, 2424, 5090, 7242, 5205, 5281, | |||
9956, 2702, 6656, 735, 2243, 11656, 833, 3107, | |||
6012, 6801, 1126, 6339, 5250, 10391, 9642, 5278, | |||
3513, 9769, 3025, 779, 9433, 3392, 7437, 668, | |||
10184, 8111, 6527, 6568, 10831, 6482, 8263, 5711, | |||
9780, 467, 5462, 4425, 11999, 1205, 5015, 6918, | |||
5096, 3827, 5525, 11579, 3518, 4875, 7388, 1931, | |||
6615, 1541, 8708, 260, 3385, 4792, 4391, 5697, | |||
7895, 2155, 7337, 236, 10635, 11534, 1906, 4793, | |||
9527, 7239, 8354, 5121, 10662, 2311, 3346, 8556, | |||
707, 1088, 4936, 678, 10245, 18, 5684, 960, | |||
4459, 7957, 226, 2451, 6, 8874, 320, 6298, | |||
8963, 8735, 2852, 2981, 1707, 5408, 5017, 9876, | |||
9790, 2968, 1899, 6729, 4183, 5290, 10084, 7679, | |||
7941, 8744, 5694, 3461, 4175, 5747, 5561, 3378, | |||
5227, 952, 4319, 9810, 4356, 3088, 11118, 840, | |||
6257, 486, 6000, 1342, 10382, 6017, 4798, 5489, | |||
4498, 4193, 2306, 6521, 1475, 6372, 9029, 8037, | |||
1625, 7020, 4740, 5730, 7956, 6351, 6494, 6917, | |||
11405, 7487, 10202, 10155, 7666, 7556, 11509, 1546, | |||
6571, 10199, 2265, 7327, 5824, 11396, 11581, 9722, | |||
2251, 11199, 5356, 7408, 2861, 4003, 9215, 484, | |||
7526, 9409, 12235, 6157, 9025, 2121, 10255, 2519, | |||
9533, 3824, 8674, 11419, 10888, 4762, 11303, 4097, | |||
2414, 6496, 9953, 10554, 808, 2999, 2130, 4286, | |||
12078, 7445, 5132, 7915, 245, 5974, 4874, 7292, | |||
7560, 10539, 9952, 9075, 2113, 3721, 10285, 10022, | |||
9578, 8934, 11074, 9498, 294, 4711, 3391, 1377, | |||
9072, 10189, 4569, 10890, 9909, 6923, 53, 4653, | |||
439, 10253, 7028, 10207, 8343, 1141, 2556, 7601, | |||
8150, 10630, 8648, 9832, 7951, 11245, 2131, 5765, | |||
10343, 9781, 2718, 1419, 4531, 3844, 4066, 4293, | |||
11657, 11525, 11353, 4313, 4869, 12186, 1611, 10892, | |||
11489, 8833, 2393, 15, 10830, 5003, 17, 565, | |||
5891, 12177, 11058, 10412, 8885, 3974, 10981, 7130, | |||
5840, 10482, 8338, 6035, 6964, 1574, 10936, 2020, | |||
2465, 8191, 384, 2642, 2729, 5399, 2175, 9396, | |||
11987, 8035, 4375, 6611, 5010, 11812, 9131, 11427, | |||
104, 6348, 9643, 6757, 12110, 5617, 10935, 541, | |||
135, 3041, 7200, 6526, 5085, 12136, 842, 4129, | |||
7685, 11079, 8426, 1008, 2725, 11772, 6058, 1101, | |||
1950, 8424, 5688, 6876, 12005, 10079, 5335, 927, | |||
1770, 273, 8377, 2271, 5225, 10283, 116, 11807, | |||
91, 11699, 757, 1304, 7524, 6451, 8032, 8154, | |||
7456, 4191, 309, 2318, 2292, 10393, 11639, 9481, | |||
12238, 10594, 9569, 7912, 10368, 9889, 12244, 7179, | |||
3924, 3188, 367, 2077, 336, 5384, 5631, 8596, | |||
4621, 1775, 8866, 451, 6108, 1317, 6246, 8795, | |||
5896, 7283, 3132, 11564, 4977, 12161, 7371, 1366, | |||
12130, 10619, 3809, 5149, 6300, 2638, 4197, 1418, | |||
10065, 4156, 8373, 8644, 10445, 882, 8158, 10173, | |||
9763, 12191, 459, 2966, 3166, 405, 5000, 9311, | |||
6404, 8986, 1551, 8175, 3630, 10766, 9265, 700, | |||
8573, 9508, 6630, 11437, 11595, 5850, 3950, 4775, | |||
11941, 1446, 6018, 3386, 11470, 5310, 5476, 553, | |||
9474, 2586, 1431, 2741, 473, 11383, 4745, 836, | |||
4062, 10666, 7727, 11752, 5534, 312, 4307, 4351, | |||
5764, 8679, 8381, 8187, 5, 7395, 4363, 1152, | |||
5421, 5231, 6473, 436, 7567, 8603, 6229, 8230 | |||
}; | |||
/* | |||
* Reduce a small signed integer modulo q. The source integer MUST | |||
* be between -q/2 and +q/2. | |||
*/ | |||
static inline uint32_t | |||
mq_conv_small(int x) { | |||
/* | |||
* If x < 0, the cast to uint32_t will set the high bit to 1. | |||
*/ | |||
uint32_t y; | |||
y = (uint32_t)x; | |||
y += Q & -(y >> 31); | |||
return y; | |||
} | |||
/* | |||
* Addition modulo q. Operands must be in the 0..q-1 range. | |||
*/ | |||
static inline uint32_t | |||
mq_add(uint32_t x, uint32_t y) { | |||
/* | |||
* We compute x + y - q. If the result is negative, then the | |||
* high bit will be set, and 'd >> 31' will be equal to 1; | |||
* thus '-(d >> 31)' will be an all-one pattern. Otherwise, | |||
* it will be an all-zero pattern. In other words, this | |||
* implements a conditional addition of q. | |||
*/ | |||
uint32_t d; | |||
d = x + y - Q; | |||
d += Q & -(d >> 31); | |||
return d; | |||
} | |||
/* | |||
* Subtraction modulo q. Operands must be in the 0..q-1 range. | |||
*/ | |||
static inline uint32_t | |||
mq_sub(uint32_t x, uint32_t y) { | |||
/* | |||
* As in mq_add(), we use a conditional addition to ensure the | |||
* result is in the 0..q-1 range. | |||
*/ | |||
uint32_t d; | |||
d = x - y; | |||
d += Q & -(d >> 31); | |||
return d; | |||
} | |||
/* | |||
* Division by 2 modulo q. Operand must be in the 0..q-1 range. | |||
*/ | |||
static inline uint32_t | |||
mq_rshift1(uint32_t x) { | |||
x += Q & -(x & 1); | |||
return (x >> 1); | |||
} | |||
/* | |||
* Montgomery multiplication modulo q. If we set R = 2^16 mod q, then | |||
* this function computes: x * y / R mod q | |||
* Operands must be in the 0..q-1 range. | |||
*/ | |||
static inline uint32_t | |||
mq_montymul(uint32_t x, uint32_t y) { | |||
uint32_t z, w; | |||
/* | |||
* We compute x*y + k*q with a value of k chosen so that the 16 | |||
* low bits of the result are 0. We can then shift the value. | |||
* After the shift, result may still be larger than q, but it | |||
* will be lower than 2*q, so a conditional subtraction works. | |||
*/ | |||
z = x * y; | |||
w = ((z * Q0I) & 0xFFFF) * Q; | |||
/* | |||
* When adding z and w, the result will have its low 16 bits | |||
* equal to 0. Since x, y and z are lower than q, the sum will | |||
* be no more than (2^15 - 1) * q + (q - 1)^2, which will | |||
* fit on 29 bits. | |||
*/ | |||
z = (z + w) >> 16; | |||
/* | |||
* After the shift, analysis shows that the value will be less | |||
* than 2q. We do a subtraction then conditional subtraction to | |||
* ensure the result is in the expected range. | |||
*/ | |||
z -= Q; | |||
z += Q & -(z >> 31); | |||
return z; | |||
} | |||
/* | |||
* Montgomery squaring (computes (x^2)/R). | |||
*/ | |||
static inline uint32_t | |||
mq_montysqr(uint32_t x) { | |||
return mq_montymul(x, x); | |||
} | |||
/* | |||
* Divide x by y modulo q = 12289. | |||
*/ | |||
static inline uint32_t | |||
mq_div_12289(uint32_t x, uint32_t y) { | |||
/* | |||
* We invert y by computing y^(q-2) mod q. | |||
* | |||
* We use the following addition chain for exponent e = 12287: | |||
* | |||
* e0 = 1 | |||
* e1 = 2 * e0 = 2 | |||
* e2 = e1 + e0 = 3 | |||
* e3 = e2 + e1 = 5 | |||
* e4 = 2 * e3 = 10 | |||
* e5 = 2 * e4 = 20 | |||
* e6 = 2 * e5 = 40 | |||
* e7 = 2 * e6 = 80 | |||
* e8 = 2 * e7 = 160 | |||
* e9 = e8 + e2 = 163 | |||
* e10 = e9 + e8 = 323 | |||
* e11 = 2 * e10 = 646 | |||
* e12 = 2 * e11 = 1292 | |||
* e13 = e12 + e9 = 1455 | |||
* e14 = 2 * e13 = 2910 | |||
* e15 = 2 * e14 = 5820 | |||
* e16 = e15 + e10 = 6143 | |||
* e17 = 2 * e16 = 12286 | |||
* e18 = e17 + e0 = 12287 | |||
* | |||
* Additions on exponents are converted to Montgomery | |||
* multiplications. We define all intermediate results as so | |||
* many local variables, and let the C compiler work out which | |||
* must be kept around. | |||
*/ | |||
uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9; | |||
uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18; | |||
y0 = mq_montymul(y, R2); | |||
y1 = mq_montysqr(y0); | |||
y2 = mq_montymul(y1, y0); | |||
y3 = mq_montymul(y2, y1); | |||
y4 = mq_montysqr(y3); | |||
y5 = mq_montysqr(y4); | |||
y6 = mq_montysqr(y5); | |||
y7 = mq_montysqr(y6); | |||
y8 = mq_montysqr(y7); | |||
y9 = mq_montymul(y8, y2); | |||
y10 = mq_montymul(y9, y8); | |||
y11 = mq_montysqr(y10); | |||
y12 = mq_montysqr(y11); | |||
y13 = mq_montymul(y12, y9); | |||
y14 = mq_montysqr(y13); | |||
y15 = mq_montysqr(y14); | |||
y16 = mq_montymul(y15, y10); | |||
y17 = mq_montysqr(y16); | |||
y18 = mq_montymul(y17, y0); | |||
/* | |||
* Final multiplication with x, which is not in Montgomery | |||
* representation, computes the correct division result. | |||
*/ | |||
return mq_montymul(y18, x); | |||
} | |||
/* | |||
* Compute NTT on a ring element. | |||
*/ | |||
static void | |||
mq_NTT(uint16_t *a, unsigned logn) { | |||
size_t n, t, m; | |||
n = (size_t)1 << logn; | |||
t = n; | |||
for (m = 1; m < n; m <<= 1) { | |||
size_t ht, i, j1; | |||
ht = t >> 1; | |||
for (i = 0, j1 = 0; i < m; i ++, j1 += t) { | |||
size_t j, j2; | |||
uint32_t s; | |||
s = GMb[m + i]; | |||
j2 = j1 + ht; | |||
for (j = j1; j < j2; j ++) { | |||
uint32_t u, v; | |||
u = a[j]; | |||
v = mq_montymul(a[j + ht], s); | |||
a[j] = (uint16_t)mq_add(u, v); | |||
a[j + ht] = (uint16_t)mq_sub(u, v); | |||
} | |||
} | |||
t = ht; | |||
} | |||
} | |||
/* | |||
* Compute the inverse NTT on a ring element, binary case. | |||
*/ | |||
static void | |||
mq_iNTT(uint16_t *a, unsigned logn) { | |||
size_t n, t, m; | |||
uint32_t ni; | |||
n = (size_t)1 << logn; | |||
t = 1; | |||
m = n; | |||
while (m > 1) { | |||
size_t hm, dt, i, j1; | |||
hm = m >> 1; | |||
dt = t << 1; | |||
for (i = 0, j1 = 0; i < hm; i ++, j1 += dt) { | |||
size_t j, j2; | |||
uint32_t s; | |||
j2 = j1 + t; | |||
s = iGMb[hm + i]; | |||
for (j = j1; j < j2; j ++) { | |||
uint32_t u, v, w; | |||
u = a[j]; | |||
v = a[j + t]; | |||
a[j] = (uint16_t)mq_add(u, v); | |||
w = mq_sub(u, v); | |||
a[j + t] = (uint16_t) | |||
mq_montymul(w, s); | |||
} | |||
} | |||
t = dt; | |||
m = hm; | |||
} | |||
/* | |||
* To complete the inverse NTT, we must now divide all values by | |||
* n (the vector size). We thus need the inverse of n, i.e. we | |||
* need to divide 1 by 2 logn times. But we also want it in | |||
* Montgomery representation, i.e. we also want to multiply it | |||
* by R = 2^16. In the common case, this should be a simple right | |||
* shift. The loop below is generic and works also in corner cases; | |||
* its computation time is negligible. | |||
*/ | |||
ni = R; | |||
for (m = n; m > 1; m >>= 1) { | |||
ni = mq_rshift1(ni); | |||
} | |||
for (m = 0; m < n; m ++) { | |||
a[m] = (uint16_t)mq_montymul(a[m], ni); | |||
} | |||
} | |||
/* | |||
* Convert a polynomial (mod q) to Montgomery representation. | |||
*/ | |||
static void | |||
mq_poly_tomonty(uint16_t *f, unsigned logn) { | |||
size_t u, n; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
f[u] = (uint16_t)mq_montymul(f[u], R2); | |||
} | |||
} | |||
/* | |||
* Multiply two polynomials together (NTT representation, and using | |||
* a Montgomery multiplication). Result f*g is written over f. | |||
*/ | |||
static void | |||
mq_poly_montymul_ntt(uint16_t *f, const uint16_t *g, unsigned logn) { | |||
size_t u, n; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
f[u] = (uint16_t)mq_montymul(f[u], g[u]); | |||
} | |||
} | |||
/* | |||
* Subtract polynomial g from polynomial f. | |||
*/ | |||
static void | |||
mq_poly_sub(uint16_t *f, const uint16_t *g, unsigned logn) { | |||
size_t u, n; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
f[u] = (uint16_t)mq_sub(f[u], g[u]); | |||
} | |||
} | |||
/* ===================================================================== */ | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON1024_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn) { | |||
mq_NTT(h, logn); | |||
mq_poly_tomonty(h, logn); | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2, | |||
const uint16_t *h, unsigned logn, uint8_t *tmp) { | |||
size_t u, n; | |||
uint16_t *tt; | |||
n = (size_t)1 << logn; | |||
tt = (uint16_t *)tmp; | |||
/* | |||
* Reduce s2 elements modulo q ([0..q-1] range). | |||
*/ | |||
for (u = 0; u < n; u ++) { | |||
uint32_t w; | |||
w = (uint32_t)s2[u]; | |||
w += Q & -(w >> 31); | |||
tt[u] = (uint16_t)w; | |||
} | |||
/* | |||
* Compute s1 = s2*h - c0 mod phi mod q (in tt[]). | |||
*/ | |||
mq_NTT(tt, logn); | |||
mq_poly_montymul_ntt(tt, h, logn); | |||
mq_iNTT(tt, logn); | |||
mq_poly_sub(tt, c0, logn); | |||
/* | |||
* Normalize s1 elements into the [-q/2..q/2] range. | |||
*/ | |||
for (u = 0; u < n; u ++) { | |||
int32_t w; | |||
w = (int32_t)tt[u]; | |||
w -= (int32_t)(Q & -(((Q >> 1) - (uint32_t)w) >> 31)); | |||
((int16_t *)tt)[u] = (int16_t)w; | |||
} | |||
/* | |||
* Signature is valid if and only if the aggregate (s1,s2) vector | |||
* is short enough. | |||
*/ | |||
return PQCLEAN_FALCON1024_CLEAN_is_short((int16_t *)tt, s2, logn); | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_compute_public(uint16_t *h, | |||
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp) { | |||
size_t u, n; | |||
uint16_t *tt; | |||
n = (size_t)1 << logn; | |||
tt = (uint16_t *)tmp; | |||
for (u = 0; u < n; u ++) { | |||
tt[u] = (uint16_t)mq_conv_small(f[u]); | |||
h[u] = (uint16_t)mq_conv_small(g[u]); | |||
} | |||
mq_NTT(h, logn); | |||
mq_NTT(tt, logn); | |||
for (u = 0; u < n; u ++) { | |||
if (tt[u] == 0) { | |||
return 0; | |||
} | |||
h[u] = (uint16_t)mq_div_12289(h[u], tt[u]); | |||
} | |||
mq_iNTT(h, logn); | |||
return 1; | |||
} | |||
/* see internal.h */ | |||
int | |||
PQCLEAN_FALCON1024_CLEAN_complete_private(int8_t *G, | |||
const int8_t *f, const int8_t *g, const int8_t *F, | |||
unsigned logn, uint8_t *tmp) { | |||
size_t u, n; | |||
uint16_t *t1, *t2; | |||
n = (size_t)1 << logn; | |||
t1 = (uint16_t *)tmp; | |||
t2 = t1 + n; | |||
for (u = 0; u < n; u ++) { | |||
t1[u] = (uint16_t)mq_conv_small(g[u]); | |||
t2[u] = (uint16_t)mq_conv_small(F[u]); | |||
} | |||
mq_NTT(t1, logn); | |||
mq_NTT(t2, logn); | |||
mq_poly_tomonty(t1, logn); | |||
mq_poly_montymul_ntt(t1, t2, logn); | |||
for (u = 0; u < n; u ++) { | |||
t2[u] = (uint16_t)mq_conv_small(f[u]); | |||
} | |||
mq_NTT(t2, logn); | |||
for (u = 0; u < n; u ++) { | |||
if (t2[u] == 0) { | |||
return 0; | |||
} | |||
t1[u] = (uint16_t)mq_div_12289(t1[u], t2[u]); | |||
} | |||
mq_iNTT(t1, logn); | |||
for (u = 0; u < n; u ++) { | |||
uint32_t w; | |||
int32_t gi; | |||
w = t1[u]; | |||
w -= (Q & ~ -((w - (Q >> 1)) >> 31)); | |||
gi = *(int32_t *)&w; | |||
if (gi < -127 || gi > +127) { | |||
return 0; | |||
} | |||
G[u] = (int8_t)gi; | |||
} | |||
return 1; | |||
} |
@@ -0,0 +1,23 @@ | |||
name: Falcon-512 | |||
type: signature | |||
claimed-nist-level: 1 | |||
length-public-key: 897 | |||
length-secret-key: 1281 | |||
length-signature: 690 | |||
nistkat-sha256: abc62e7be3d7c1db757ba3cbb771cfdc89c6b36fb5efc885593db89ec2ea8bc4 | |||
testvectors-sha256: 1a1b170fc9e4623e7ff519c15ec7a2dda55e94a175756b7c72429451bd226b09 | |||
principal-submitters: | |||
- Thomas Prest | |||
auxiliary-submitters: | |||
- Pierre-Alain Fouque | |||
- Jeffrey Hoffstein | |||
- Paul Kirchner | |||
- Vadim Lyubashevsky | |||
- Thomas Pornin | |||
- Thomas Ricosset | |||
- Gregor Seiler | |||
- William Whyte | |||
- Zhenfei Zhang | |||
implementations: | |||
- name: clean | |||
version: round two |
@@ -0,0 +1,22 @@ | |||
MIT License | |||
Copyright (c) 2017-2019 Falcon Project | |||
Permission is hereby granted, free of charge, to any person obtaining | |||
a copy of this software and associated documentation files (the | |||
"Software"), to deal in the Software without restriction, including | |||
without limitation the rights to use, copy, modify, merge, publish, | |||
distribute, sublicense, and/or sell copies of the Software, and to | |||
permit persons to whom the Software is furnished to do so, subject to | |||
the following conditions: | |||
The above copyright notice and this permission notice shall be | |||
included in all copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
@@ -0,0 +1,21 @@ | |||
# This Makefile can be used with GNU Make or BSD Make | |||
LIB=libfalcon-512_clean.a | |||
SOURCES = codec.c common.c fft.c fpr.c keygen.c pqclean.c rng.c sign.c vrfy.c | |||
OBJECTS = codec.o common.o fft.o fpr.o keygen.o pqclean.o rng.o sign.o vrfy.o | |||
HEADERS = api.h fpr.h inner.h | |||
CFLAGS=-O3 -Wall -Wconversion -Wextra -Wpedantic -Wvla -Werror -Wmissing-prototypes -Wredundant-decls -std=c99 -I../../../common $(EXTRAFLAGS) | |||
all: $(LIB) | |||
%.o: %.c $(HEADERS) | |||
$(CC) $(CFLAGS) -c -o $@ $< | |||
$(LIB): $(OBJECTS) | |||
$(AR) -r $@ $(OBJECTS) | |||
clean: | |||
$(RM) $(OBJECTS) | |||
$(RM) $(LIB) |
@@ -0,0 +1,23 @@ | |||
# This Makefile can be used with Microsoft Visual Studio's nmake using the command: | |||
# nmake /f Makefile.Microsoft_nmake | |||
LIBRARY=libfalcon-512_clean.lib | |||
OBJECTS=codec.obj common.obj fft.obj fpr.obj keygen.obj pqclean.obj rng.obj sign.obj vrfy.obj | |||
# Warning C4146 is raised when a unary minus operator is applied to an | |||
# unsigned type; this has nonetheless been standard and portable for as | |||
# long as there has been a C standard, and we do that a lot, especially | |||
# for constant-time computations. Thus, we disable that spurious warning. | |||
CFLAGS=/nologo /I ..\..\..\common /W4 /wd4146 /WX | |||
all: $(LIBRARY) | |||
# Make sure objects are recompiled if headers change. | |||
$(OBJECTS): *.h | |||
$(LIBRARY): $(OBJECTS) | |||
LIB.EXE /NOLOGO /WX /OUT:$@ $** | |||
clean: | |||
-DEL $(OBJECTS) | |||
-DEL $(LIBRARY) |
@@ -0,0 +1,80 @@ | |||
#ifndef PQCLEAN_FALCON512_CLEAN_API_H | |||
#define PQCLEAN_FALCON512_CLEAN_API_H | |||
#include <stddef.h> | |||
#include <stdint.h> | |||
#define PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES 1281 | |||
#define PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES 897 | |||
#define PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES 690 | |||
#define PQCLEAN_FALCON512_CLEAN_CRYPTO_ALGNAME "Falcon-512" | |||
/* | |||
* Generate a new key pair. Public key goes into pk[], private key in sk[]. | |||
* Key sizes are exact (in bytes): | |||
* public (pk): PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES | |||
* private (sk): PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON512_CLEAN_crypto_sign_keypair( | |||
uint8_t *pk, uint8_t *sk); | |||
/* | |||
* Compute a signature on a provided message (m, mlen), with a given | |||
* private key (sk). Signature is written in sig[], with length written | |||
* into *siglen. Signature length is variable; maximum signature length | |||
* (in bytes) is PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES. | |||
* | |||
* sig[], m[] and sk[] may overlap each other arbitrarily. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON512_CLEAN_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
/* | |||
* Verify a signature (sig, siglen) on a message (m, mlen) with a given | |||
* public key (pk). | |||
* | |||
* sig[], m[] and pk[] may overlap each other arbitrarily. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON512_CLEAN_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk); | |||
/* | |||
* Compute a signature on a message and pack the signature and message | |||
* into a single object, written into sm[]. The length of that output is | |||
* written in *smlen; that length may be larger than the message length | |||
* (mlen) by up to PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES. | |||
* | |||
* sm[] and m[] may overlap each other arbitrarily; however, sm[] shall | |||
* not overlap with sk[]. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON512_CLEAN_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk); | |||
/* | |||
* Open a signed message object (sm, smlen) and verify the signature; | |||
* on success, the message itself is written into m[] and its length | |||
* into *mlen. The message is shorter than the signed message object, | |||
* but the size difference depends on the signature value; the difference | |||
* may range up to PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES. | |||
* | |||
* m[], sm[] and pk[] may overlap each other arbitrarily. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
int PQCLEAN_FALCON512_CLEAN_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, const uint8_t *pk); | |||
#endif |
@@ -0,0 +1,551 @@ | |||
/* | |||
* Encoding/decoding of keys and signatures. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
#include "inner.h" | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON512_CLEAN_modq_encode( | |||
void *out, size_t max_out_len, | |||
const uint16_t *x, unsigned logn) { | |||
size_t n, out_len, u; | |||
uint8_t *buf; | |||
uint32_t acc; | |||
int acc_len; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] >= 12289) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * 14) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << 14) | x[u]; | |||
acc_len += 14; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON512_CLEAN_modq_decode( | |||
uint16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len) { | |||
size_t n, in_len, u; | |||
const uint8_t *buf; | |||
uint32_t acc; | |||
int acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * 14) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
acc = 0; | |||
acc_len = 0; | |||
u = 0; | |||
while (u < n) { | |||
acc = (acc << 8) | (*buf ++); | |||
acc_len += 8; | |||
if (acc_len >= 14) { | |||
unsigned w; | |||
acc_len -= 14; | |||
w = (acc >> acc_len) & 0x3FFF; | |||
if (w >= 12289) { | |||
return 0; | |||
} | |||
x[u ++] = (uint16_t)w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON512_CLEAN_trim_i16_encode( | |||
void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn, unsigned bits) { | |||
size_t n, u, out_len; | |||
int minv, maxv; | |||
uint8_t *buf; | |||
uint32_t acc, mask; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
maxv = (1 << (bits - 1)) - 1; | |||
minv = -maxv; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < minv || x[u] > maxv) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * bits) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
mask = ((uint32_t)1 << bits) - 1; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << bits) | ((uint16_t)x[u] & mask); | |||
acc_len += bits; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf ++ = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON512_CLEAN_trim_i16_decode( | |||
int16_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len) { | |||
size_t n, in_len; | |||
const uint8_t *buf; | |||
size_t u; | |||
uint32_t acc, mask1, mask2; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * bits) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
u = 0; | |||
acc = 0; | |||
acc_len = 0; | |||
mask1 = ((uint32_t)1 << bits) - 1; | |||
mask2 = (uint32_t)1 << (bits - 1); | |||
while (u < n) { | |||
acc = (acc << 8) | *buf ++; | |||
acc_len += 8; | |||
while (acc_len >= bits && u < n) { | |||
uint32_t w; | |||
acc_len -= bits; | |||
w = (acc >> acc_len) & mask1; | |||
w |= -(w & mask2); | |||
if (w == -mask2) { | |||
/* | |||
* The -2^(bits-1) value is forbidden. | |||
*/ | |||
return 0; | |||
} | |||
w |= -(w & mask2); | |||
x[u ++] = (int16_t) * (int32_t *)&w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
/* | |||
* Extra bits in the last byte must be zero. | |||
*/ | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON512_CLEAN_trim_i8_encode( | |||
void *out, size_t max_out_len, | |||
const int8_t *x, unsigned logn, unsigned bits) { | |||
size_t n, u, out_len; | |||
int minv, maxv; | |||
uint8_t *buf; | |||
uint32_t acc, mask; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
maxv = (1 << (bits - 1)) - 1; | |||
minv = -maxv; | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < minv || x[u] > maxv) { | |||
return 0; | |||
} | |||
} | |||
out_len = ((n * bits) + 7) >> 3; | |||
if (out == NULL) { | |||
return out_len; | |||
} | |||
if (out_len > max_out_len) { | |||
return 0; | |||
} | |||
buf = out; | |||
acc = 0; | |||
acc_len = 0; | |||
mask = ((uint32_t)1 << bits) - 1; | |||
for (u = 0; u < n; u ++) { | |||
acc = (acc << bits) | ((uint8_t)x[u] & mask); | |||
acc_len += bits; | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
*buf ++ = (uint8_t)(acc >> acc_len); | |||
} | |||
} | |||
if (acc_len > 0) { | |||
*buf ++ = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
return out_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON512_CLEAN_trim_i8_decode( | |||
int8_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len) { | |||
size_t n, in_len; | |||
const uint8_t *buf; | |||
size_t u; | |||
uint32_t acc, mask1, mask2; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
in_len = ((n * bits) + 7) >> 3; | |||
if (in_len > max_in_len) { | |||
return 0; | |||
} | |||
buf = in; | |||
u = 0; | |||
acc = 0; | |||
acc_len = 0; | |||
mask1 = ((uint32_t)1 << bits) - 1; | |||
mask2 = (uint32_t)1 << (bits - 1); | |||
while (u < n) { | |||
acc = (acc << 8) | *buf ++; | |||
acc_len += 8; | |||
while (acc_len >= bits && u < n) { | |||
uint32_t w; | |||
acc_len -= bits; | |||
w = (acc >> acc_len) & mask1; | |||
w |= -(w & mask2); | |||
if (w == -mask2) { | |||
/* | |||
* The -2^(bits-1) value is forbidden. | |||
*/ | |||
return 0; | |||
} | |||
x[u ++] = (int8_t) * (int32_t *)&w; | |||
} | |||
} | |||
if ((acc & (((uint32_t)1 << acc_len) - 1)) != 0) { | |||
/* | |||
* Extra bits in the last byte must be zero. | |||
*/ | |||
return 0; | |||
} | |||
return in_len; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON512_CLEAN_comp_encode( | |||
void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn) { | |||
uint8_t *buf; | |||
size_t n, u, v; | |||
uint32_t acc; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
buf = out; | |||
/* | |||
* Make sure that all values are within the -2047..+2047 range. | |||
*/ | |||
for (u = 0; u < n; u ++) { | |||
if (x[u] < -2047 || x[u] > +2047) { | |||
return 0; | |||
} | |||
} | |||
acc = 0; | |||
acc_len = 0; | |||
v = 0; | |||
for (u = 0; u < n; u ++) { | |||
int t; | |||
unsigned w; | |||
/* | |||
* Get sign and absolute value of next integer; push the | |||
* sign bit. | |||
*/ | |||
acc <<= 1; | |||
t = x[u]; | |||
if (t < 0) { | |||
t = -t; | |||
acc |= 1; | |||
} | |||
w = (unsigned)t; | |||
/* | |||
* Push the low 7 bits of the absolute value. | |||
*/ | |||
acc <<= 7; | |||
acc |= w & 127u; | |||
w >>= 7; | |||
/* | |||
* We pushed exactly 8 bits. | |||
*/ | |||
acc_len += 8; | |||
/* | |||
* Push as many zeros as necessary, then a one. Since the | |||
* absolute value is at most 2047, w can only range up to | |||
* 15 at this point, thus we will add at most 16 bits | |||
* here. With the 8 bits above and possibly up to 7 bits | |||
* from previous iterations, we may go up to 31 bits, which | |||
* will fit in the accumulator, which is an uint32_t. | |||
*/ | |||
acc <<= (w + 1); | |||
acc |= 1; | |||
acc_len += w + 1; | |||
/* | |||
* Produce all full bytes. | |||
*/ | |||
while (acc_len >= 8) { | |||
acc_len -= 8; | |||
if (buf != NULL) { | |||
if (v >= max_out_len) { | |||
return 0; | |||
} | |||
buf[v] = (uint8_t)(acc >> acc_len); | |||
} | |||
v ++; | |||
} | |||
} | |||
/* | |||
* Flush remaining bits (if any). | |||
*/ | |||
if (acc_len > 0) { | |||
if (buf != NULL) { | |||
if (v >= max_out_len) { | |||
return 0; | |||
} | |||
buf[v] = (uint8_t)(acc << (8 - acc_len)); | |||
} | |||
v ++; | |||
} | |||
return v; | |||
} | |||
/* see inner.h */ | |||
size_t | |||
PQCLEAN_FALCON512_CLEAN_comp_decode( | |||
int16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len) { | |||
const uint8_t *buf; | |||
size_t n, u, v; | |||
uint32_t acc; | |||
unsigned acc_len; | |||
n = (size_t)1 << logn; | |||
buf = in; | |||
acc = 0; | |||
acc_len = 0; | |||
v = 0; | |||
for (u = 0; u < n; u ++) { | |||
unsigned b, s, m; | |||
/* | |||
* Get next eight bits: sign and low seven bits of the | |||
* absolute value. | |||
*/ | |||
if (v >= max_in_len) { | |||
return 0; | |||
} | |||
acc = (acc << 8) | (uint32_t)buf[v ++]; | |||
b = acc >> acc_len; | |||
s = b & 128; | |||
m = b & 127; | |||
/* | |||
* Get next bits until a 1 is reached. | |||
*/ | |||
for (;;) { | |||
if (acc_len == 0) { | |||
if (v >= max_in_len) { | |||
return 0; | |||
} | |||
acc = (acc << 8) | (uint32_t)buf[v ++]; | |||
acc_len = 8; | |||
} | |||
acc_len --; | |||
if (((acc >> acc_len) & 1) != 0) { | |||
break; | |||
} | |||
m += 128; | |||
if (m > 2047) { | |||
return 0; | |||
} | |||
} | |||
x[u] = (int16_t)(s ? -(int)m : (int)m); | |||
} | |||
return v; | |||
} | |||
/* | |||
* Key elements and signatures are polynomials with small integer | |||
* coefficients. Here are some statistics gathered over many | |||
* generated key pairs (10000 or more for each degree): | |||
* | |||
* log(n) n max(f,g) std(f,g) max(F,G) std(F,G) | |||
* 1 2 129 56.31 143 60.02 | |||
* 2 4 123 40.93 160 46.52 | |||
* 3 8 97 28.97 159 38.01 | |||
* 4 16 100 21.48 154 32.50 | |||
* 5 32 71 15.41 151 29.36 | |||
* 6 64 59 11.07 138 27.77 | |||
* 7 128 39 7.91 144 27.00 | |||
* 8 256 32 5.63 148 26.61 | |||
* 9 512 22 4.00 137 26.46 | |||
* 10 1024 15 2.84 146 26.41 | |||
* | |||
* We want a compact storage format for private key, and, as part of | |||
* key generation, we are allowed to reject some keys which would | |||
* otherwise be fine (this does not induce any noticeable vulnerability | |||
* as long as we reject only a small proportion of possible keys). | |||
* Hence, we enforce at key generation time maximum values for the | |||
* elements of f, g, F and G, so that their encoding can be expressed | |||
* in fixed-width values. Limits have been chosen so that generated | |||
* keys are almost always within bounds, thus not impacting neither | |||
* security or performance. | |||
* | |||
* IMPORTANT: the code assumes that all coefficients of f, g, F and G | |||
* ultimately fit in the -127..+127 range. Thus, none of the elements | |||
* of max_fg_bits[] and max_FG_bits[] shall be greater than 8. | |||
*/ | |||
const uint8_t PQCLEAN_FALCON512_CLEAN_max_fg_bits[] = { | |||
0, /* unused */ | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
7, | |||
7, | |||
6, | |||
6, | |||
5 | |||
}; | |||
const uint8_t PQCLEAN_FALCON512_CLEAN_max_FG_bits[] = { | |||
0, /* unused */ | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8, | |||
8 | |||
}; | |||
/* | |||
* When generating a new key pair, we can always reject keys which | |||
* feature an abnormally large coefficient. This can also be done for | |||
* signatures, albeit with some care: in case the signature process is | |||
* used in a derandomized setup (explicitly seeded with the message and | |||
* private key), we have to follow the specification faithfully, and the | |||
* specification only enforces a limit on the L2 norm of the signature | |||
* vector. The limit on the L2 norm implies that the absolute value of | |||
* a coefficient of the signature cannot be more than the following: | |||
* | |||
* log(n) n max sig coeff (theoretical) | |||
* 1 2 412 | |||
* 2 4 583 | |||
* 3 8 824 | |||
* 4 16 1166 | |||
* 5 32 1649 | |||
* 6 64 2332 | |||
* 7 128 3299 | |||
* 8 256 4665 | |||
* 9 512 6598 | |||
* 10 1024 9331 | |||
* | |||
* However, the largest observed signature coefficients during our | |||
* experiments was 1077 (in absolute value), hence we can assume that, | |||
* with overwhelming probability, signature coefficients will fit | |||
* in -2047..2047, i.e. 12 bits. | |||
*/ | |||
const uint8_t PQCLEAN_FALCON512_CLEAN_max_sig_bits[] = { | |||
0, /* unused */ | |||
10, | |||
11, | |||
11, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12, | |||
12 | |||
}; |
@@ -0,0 +1,261 @@ | |||
/* | |||
* Support functions for signatures (hash-to-point, norm). | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
#include "inner.h" | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_hash_to_point( | |||
shake256_context *sc, | |||
uint16_t *x, unsigned logn, uint8_t *tmp) { | |||
/* | |||
* Each 16-bit sample is a value in 0..65535. The value is | |||
* kept if it falls in 0..61444 (because 61445 = 5*12289) | |||
* and rejected otherwise; thus, each sample has probability | |||
* about 0.93758 of being selected. | |||
* | |||
* We want to oversample enough to be sure that we will | |||
* have enough values with probability at least 1 - 2^(-256). | |||
* Depending on degree N, this leads to the following | |||
* required oversampling: | |||
* | |||
* logn n oversampling | |||
* 1 2 65 | |||
* 2 4 67 | |||
* 3 8 71 | |||
* 4 16 77 | |||
* 5 32 86 | |||
* 6 64 100 | |||
* 7 128 122 | |||
* 8 256 154 | |||
* 9 512 205 | |||
* 10 1024 287 | |||
* | |||
* If logn >= 7, then the provided temporary buffer is large | |||
* enough. Otherwise, we use a stack buffer of 63 entries | |||
* (i.e. 126 bytes) for the values that do not fit in tmp[]. | |||
*/ | |||
static const uint16_t overtab[] = { | |||
0, /* unused */ | |||
65, | |||
67, | |||
71, | |||
77, | |||
86, | |||
100, | |||
122, | |||
154, | |||
205, | |||
287 | |||
}; | |||
unsigned n, n2, u, m, p, over; | |||
uint16_t *tt1, tt2[63]; | |||
/* | |||
* We first generate m 16-bit value. Values 0..n-1 go to x[]. | |||
* Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[]. | |||
* We also reduce modulo q the values; rejected values are set | |||
* to 0xFFFF. | |||
*/ | |||
n = 1U << logn; | |||
n2 = n << 1; | |||
over = overtab[logn]; | |||
m = n + over; | |||
tt1 = (uint16_t *)tmp; | |||
for (u = 0; u < m; u ++) { | |||
uint8_t buf[2]; | |||
uint32_t w, wr; | |||
shake256_extract(sc, buf, sizeof buf); | |||
w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1]; | |||
wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1)); | |||
wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1)); | |||
wr = wr - ((uint32_t)12289 & (((wr - 12289) >> 31) - 1)); | |||
wr |= ((w - 61445) >> 31) - 1; | |||
if (u < n) { | |||
x[u] = (uint16_t)wr; | |||
} else if (u < n2) { | |||
tt1[u - n] = (uint16_t)wr; | |||
} else { | |||
tt2[u - n2] = (uint16_t)wr; | |||
} | |||
} | |||
/* | |||
* Now we must "squeeze out" the invalid values. We do this in | |||
* a logarithmic sequence of passes; each pass computes where a | |||
* value should go, and moves it down by 'p' slots if necessary, | |||
* where 'p' uses an increasing powers-of-two scale. It can be | |||
* shown that in all cases where the loop decides that a value | |||
* has to be moved down by p slots, the destination slot is | |||
* "free" (i.e. contains an invalid value). | |||
*/ | |||
for (p = 1; p <= over; p <<= 1) { | |||
unsigned v; | |||
/* | |||
* In the loop below: | |||
* | |||
* - v contains the index of the final destination of | |||
* the value; it is recomputed dynamically based on | |||
* whether values are valid or not. | |||
* | |||
* - u is the index of the value we consider ("source"); | |||
* its address is s. | |||
* | |||
* - The loop may swap the value with the one at index | |||
* u-p. The address of the swap destination is d. | |||
*/ | |||
v = 0; | |||
for (u = 0; u < m; u ++) { | |||
uint16_t *s, *d; | |||
unsigned j, sv, dv, mk; | |||
if (u < n) { | |||
s = &x[u]; | |||
} else if (u < n2) { | |||
s = &tt1[u - n]; | |||
} else { | |||
s = &tt2[u - n2]; | |||
} | |||
sv = *s; | |||
/* | |||
* The value in sv should ultimately go to | |||
* address v, i.e. jump back by u-v slots. | |||
*/ | |||
j = u - v; | |||
/* | |||
* We increment v for the next iteration, but | |||
* only if the source value is valid. The mask | |||
* 'mk' is -1 if the value is valid, 0 otherwise, | |||
* so we _subtract_ mk. | |||
*/ | |||
mk = (sv >> 15) - 1U; | |||
v -= mk; | |||
/* | |||
* In this loop we consider jumps by p slots; if | |||
* u < p then there is nothing more to do. | |||
*/ | |||
if (u < p) { | |||
continue; | |||
} | |||
/* | |||
* Destination for the swap: value at address u-p. | |||
*/ | |||
if ((u - p) < n) { | |||
d = &x[u - p]; | |||
} else if ((u - p) < n2) { | |||
d = &tt1[(u - p) - n]; | |||
} else { | |||
d = &tt2[(u - p) - n2]; | |||
} | |||
dv = *d; | |||
/* | |||
* The swap should be performed only if the source | |||
* is valid AND the jump j has its 'p' bit set. | |||
*/ | |||
mk &= -(((j & p) + 0x1FF) >> 9); | |||
*s = (uint16_t)(sv ^ (mk & (sv ^ dv))); | |||
*d = (uint16_t)(dv ^ (mk & (sv ^ dv))); | |||
} | |||
} | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON512_CLEAN_is_short( | |||
const int16_t *s1, const int16_t *s2, unsigned logn) { | |||
/* | |||
* We use the l2-norm. Code below uses only 32-bit operations to | |||
* compute the square of the norm with saturation to 2^32-1 if | |||
* the value exceeds 2^31-1. | |||
*/ | |||
size_t n, u; | |||
uint32_t s, ng; | |||
n = (size_t)1 << logn; | |||
s = 0; | |||
ng = 0; | |||
for (u = 0; u < n; u ++) { | |||
int32_t z; | |||
z = s1[u]; | |||
s += (uint32_t)(z * z); | |||
ng |= s; | |||
z = s2[u]; | |||
s += (uint32_t)(z * z); | |||
ng |= s; | |||
} | |||
s |= -(ng >> 31); | |||
/* | |||
* Acceptance bound on the l2-norm is: | |||
* 1.2*1.55*sqrt(q)*sqrt(2*N) | |||
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). | |||
*/ | |||
return s < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON512_CLEAN_is_short_half( | |||
uint32_t sqn, const int16_t *s2, unsigned logn) { | |||
size_t n, u; | |||
uint32_t ng; | |||
n = (size_t)1 << logn; | |||
ng = -(sqn >> 31); | |||
for (u = 0; u < n; u ++) { | |||
int32_t z; | |||
z = s2[u]; | |||
sqn += (uint32_t)(z * z); | |||
ng |= sqn; | |||
} | |||
sqn |= -(ng >> 31); | |||
/* | |||
* Acceptance bound on the l2-norm is: | |||
* 1.2*1.55*sqrt(q)*sqrt(2*N) | |||
* Value 7085 is floor((1.2^2)*(1.55^2)*2*1024). | |||
*/ | |||
return sqn < (((uint32_t)7085 * (uint32_t)12289) >> (10 - logn)); | |||
} |
@@ -0,0 +1,699 @@ | |||
/* | |||
* FFT code. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
#include "inner.h" | |||
/* | |||
* Rules for complex number macros: | |||
* -------------------------------- | |||
* | |||
* Operand order is: destination, source1, source2... | |||
* | |||
* Each operand is a real and an imaginary part. | |||
* | |||
* All overlaps are allowed. | |||
*/ | |||
/* | |||
* Addition of two complex numbers (d = a + b). | |||
*/ | |||
#define FPC_ADD(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||
fpr fpct_re, fpct_im; \ | |||
fpct_re = fpr_add(a_re, b_re); \ | |||
fpct_im = fpr_add(a_im, b_im); \ | |||
(d_re) = fpct_re; \ | |||
(d_im) = fpct_im; \ | |||
} while (0) | |||
/* | |||
* Subtraction of two complex numbers (d = a - b). | |||
*/ | |||
#define FPC_SUB(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||
fpr fpct_re, fpct_im; \ | |||
fpct_re = fpr_sub(a_re, b_re); \ | |||
fpct_im = fpr_sub(a_im, b_im); \ | |||
(d_re) = fpct_re; \ | |||
(d_im) = fpct_im; \ | |||
} while (0) | |||
/* | |||
* Multplication of two complex numbers (d = a * b). | |||
*/ | |||
#define FPC_MUL(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||
fpr fpct_a_re, fpct_a_im; \ | |||
fpr fpct_b_re, fpct_b_im; \ | |||
fpr fpct_d_re, fpct_d_im; \ | |||
fpct_a_re = (a_re); \ | |||
fpct_a_im = (a_im); \ | |||
fpct_b_re = (b_re); \ | |||
fpct_b_im = (b_im); \ | |||
fpct_d_re = fpr_sub( \ | |||
fpr_mul(fpct_a_re, fpct_b_re), \ | |||
fpr_mul(fpct_a_im, fpct_b_im)); \ | |||
fpct_d_im = fpr_add( \ | |||
fpr_mul(fpct_a_re, fpct_b_im), \ | |||
fpr_mul(fpct_a_im, fpct_b_re)); \ | |||
(d_re) = fpct_d_re; \ | |||
(d_im) = fpct_d_im; \ | |||
} while (0) | |||
/* | |||
* Squaring of a complex number (d = a * a). | |||
*/ | |||
#define FPC_SQR(d_re, d_im, a_re, a_im) do { \ | |||
fpr fpct_a_re, fpct_a_im; \ | |||
fpr fpct_d_re, fpct_d_im; \ | |||
fpct_a_re = (a_re); \ | |||
fpct_a_im = (a_im); \ | |||
fpct_d_re = fpr_sub(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ | |||
fpct_d_im = fpr_double(fpr_mul(fpct_a_re, fpct_a_im)); \ | |||
(d_re) = fpct_d_re; \ | |||
(d_im) = fpct_d_im; \ | |||
} while (0) | |||
/* | |||
* Inversion of a complex number (d = 1 / a). | |||
*/ | |||
#define FPC_INV(d_re, d_im, a_re, a_im) do { \ | |||
fpr fpct_a_re, fpct_a_im; \ | |||
fpr fpct_d_re, fpct_d_im; \ | |||
fpr fpct_m; \ | |||
fpct_a_re = (a_re); \ | |||
fpct_a_im = (a_im); \ | |||
fpct_m = fpr_add(fpr_sqr(fpct_a_re), fpr_sqr(fpct_a_im)); \ | |||
fpct_m = fpr_inv(fpct_m); \ | |||
fpct_d_re = fpr_mul(fpct_a_re, fpct_m); \ | |||
fpct_d_im = fpr_mul(fpr_neg(fpct_a_im), fpct_m); \ | |||
(d_re) = fpct_d_re; \ | |||
(d_im) = fpct_d_im; \ | |||
} while (0) | |||
/* | |||
* Division of complex numbers (d = a / b). | |||
*/ | |||
#define FPC_DIV(d_re, d_im, a_re, a_im, b_re, b_im) do { \ | |||
fpr fpct_a_re, fpct_a_im; \ | |||
fpr fpct_b_re, fpct_b_im; \ | |||
fpr fpct_d_re, fpct_d_im; \ | |||
fpr fpct_m; \ | |||
fpct_a_re = (a_re); \ | |||
fpct_a_im = (a_im); \ | |||
fpct_b_re = (b_re); \ | |||
fpct_b_im = (b_im); \ | |||
fpct_m = fpr_add(fpr_sqr(fpct_b_re), fpr_sqr(fpct_b_im)); \ | |||
fpct_m = fpr_inv(fpct_m); \ | |||
fpct_b_re = fpr_mul(fpct_b_re, fpct_m); \ | |||
fpct_b_im = fpr_mul(fpr_neg(fpct_b_im), fpct_m); \ | |||
fpct_d_re = fpr_sub( \ | |||
fpr_mul(fpct_a_re, fpct_b_re), \ | |||
fpr_mul(fpct_a_im, fpct_b_im)); \ | |||
fpct_d_im = fpr_add( \ | |||
fpr_mul(fpct_a_re, fpct_b_im), \ | |||
fpr_mul(fpct_a_im, fpct_b_re)); \ | |||
(d_re) = fpct_d_re; \ | |||
(d_im) = fpct_d_im; \ | |||
} while (0) | |||
/* | |||
* Let w = exp(i*pi/N); w is a primitive 2N-th root of 1. We define the | |||
* values w_j = w^(2j+1) for all j from 0 to N-1: these are the roots | |||
* of X^N+1 in the field of complex numbers. A crucial property is that | |||
* w_{N-1-j} = conj(w_j) = 1/w_j for all j. | |||
* | |||
* FFT representation of a polynomial f (taken modulo X^N+1) is the | |||
* set of values f(w_j). Since f is real, conj(f(w_j)) = f(conj(w_j)), | |||
* thus f(w_{N-1-j}) = conj(f(w_j)). We thus store only half the values, | |||
* for j = 0 to N/2-1; the other half can be recomputed easily when (if) | |||
* needed. A consequence is that FFT representation has the same size | |||
* as normal representation: N/2 complex numbers use N real numbers (each | |||
* complex number is the combination of a real and an imaginary part). | |||
* | |||
* We use a specific ordering which makes computations easier. Let rev() | |||
* be the bit-reversal function over log(N) bits. For j in 0..N/2-1, we | |||
* store the real and imaginary parts of f(w_j) in slots: | |||
* | |||
* Re(f(w_j)) -> slot rev(j)/2 | |||
* Im(f(w_j)) -> slot rev(j)/2+N/2 | |||
* | |||
* (Note that rev(j) is even for j < N/2.) | |||
*/ | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_FFT(fpr *f, unsigned logn) { | |||
/* | |||
* FFT algorithm in bit-reversal order uses the following | |||
* iterative algorithm: | |||
* | |||
* t = N | |||
* for m = 1; m < N; m *= 2: | |||
* ht = t/2 | |||
* for i1 = 0; i1 < m; i1 ++: | |||
* j1 = i1 * t | |||
* s = GM[m + i1] | |||
* for j = j1; j < (j1 + ht); j ++: | |||
* x = f[j] | |||
* y = s * f[j + ht] | |||
* f[j] = x + y | |||
* f[j + ht] = x - y | |||
* t = ht | |||
* | |||
* GM[k] contains w^rev(k) for primitive root w = exp(i*pi/N). | |||
* | |||
* In the description above, f[] is supposed to contain complex | |||
* numbers. In our in-memory representation, the real and | |||
* imaginary parts of f[k] are in array slots k and k+N/2. | |||
* | |||
* We only keep the first half of the complex numbers. We can | |||
* see that after the first iteration, the first and second halves | |||
* of the array of complex numbers have separate lives, so we | |||
* simply ignore the second part. | |||
*/ | |||
unsigned u; | |||
size_t t, n, hn, m; | |||
/* | |||
* First iteration: compute f[j] + i * f[j+N/2] for all j < N/2 | |||
* (because GM[1] = w^rev(1) = w^(N/2) = i). | |||
* In our chosen representation, this is a no-op: everything is | |||
* already where it should be. | |||
*/ | |||
/* | |||
* Subsequent iterations are truncated to use only the first | |||
* half of values. | |||
*/ | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
t = hn; | |||
for (u = 1, m = 2; u < logn; u ++, m <<= 1) { | |||
size_t ht, hm, i1, j1; | |||
ht = t >> 1; | |||
hm = m >> 1; | |||
for (i1 = 0, j1 = 0; i1 < hm; i1 ++, j1 += t) { | |||
size_t j, j2; | |||
j2 = j1 + ht; | |||
fpr s_re, s_im; | |||
s_re = fpr_gm_tab[((m + i1) << 1) + 0]; | |||
s_im = fpr_gm_tab[((m + i1) << 1) + 1]; | |||
for (j = j1; j < j2; j ++) { | |||
fpr x_re, x_im, y_re, y_im; | |||
x_re = f[j]; | |||
x_im = f[j + hn]; | |||
y_re = f[j + ht]; | |||
y_im = f[j + ht + hn]; | |||
FPC_MUL(y_re, y_im, y_re, y_im, s_re, s_im); | |||
FPC_ADD(f[j], f[j + hn], | |||
x_re, x_im, y_re, y_im); | |||
FPC_SUB(f[j + ht], f[j + ht + hn], | |||
x_re, x_im, y_re, y_im); | |||
} | |||
} | |||
t = ht; | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_iFFT(fpr *f, unsigned logn) { | |||
/* | |||
* Inverse FFT algorithm in bit-reversal order uses the following | |||
* iterative algorithm: | |||
* | |||
* t = 1 | |||
* for m = N; m > 1; m /= 2: | |||
* hm = m/2 | |||
* dt = t*2 | |||
* for i1 = 0; i1 < hm; i1 ++: | |||
* j1 = i1 * dt | |||
* s = iGM[hm + i1] | |||
* for j = j1; j < (j1 + t); j ++: | |||
* x = f[j] | |||
* y = f[j + t] | |||
* f[j] = x + y | |||
* f[j + t] = s * (x - y) | |||
* t = dt | |||
* for i1 = 0; i1 < N; i1 ++: | |||
* f[i1] = f[i1] / N | |||
* | |||
* iGM[k] contains (1/w)^rev(k) for primitive root w = exp(i*pi/N) | |||
* (actually, iGM[k] = 1/GM[k] = conj(GM[k])). | |||
* | |||
* In the main loop (not counting the final division loop), in | |||
* all iterations except the last, the first and second half of f[] | |||
* (as an array of complex numbers) are separate. In our chosen | |||
* representation, we do not keep the second half. | |||
* | |||
* The last iteration recombines the recomputed half with the | |||
* implicit half, and should yield only real numbers since the | |||
* target polynomial is real; moreover, s = i at that step. | |||
* Thus, when considering x and y: | |||
* y = conj(x) since the final f[j] must be real | |||
* Therefore, f[j] is filled with 2*Re(x), and f[j + t] is | |||
* filled with 2*Im(x). | |||
* But we already have Re(x) and Im(x) in array slots j and j+t | |||
* in our chosen representation. That last iteration is thus a | |||
* simple doubling of the values in all the array. | |||
* | |||
* We make the last iteration a no-op by tweaking the final | |||
* division into a division by N/2, not N. | |||
*/ | |||
size_t u, n, hn, t, m; | |||
n = (size_t)1 << logn; | |||
t = 1; | |||
m = n; | |||
hn = n >> 1; | |||
for (u = logn; u > 1; u --) { | |||
size_t hm, dt, i1, j1; | |||
hm = m >> 1; | |||
dt = t << 1; | |||
for (i1 = 0, j1 = 0; j1 < hn; i1 ++, j1 += dt) { | |||
size_t j, j2; | |||
j2 = j1 + t; | |||
fpr s_re, s_im; | |||
s_re = fpr_gm_tab[((hm + i1) << 1) + 0]; | |||
s_im = fpr_neg(fpr_gm_tab[((hm + i1) << 1) + 1]); | |||
for (j = j1; j < j2; j ++) { | |||
fpr x_re, x_im, y_re, y_im; | |||
x_re = f[j]; | |||
x_im = f[j + hn]; | |||
y_re = f[j + t]; | |||
y_im = f[j + t + hn]; | |||
FPC_ADD(f[j], f[j + hn], | |||
x_re, x_im, y_re, y_im); | |||
FPC_SUB(x_re, x_im, x_re, x_im, y_re, y_im); | |||
FPC_MUL(f[j + t], f[j + t + hn], | |||
x_re, x_im, s_re, s_im); | |||
} | |||
} | |||
t = dt; | |||
m = hm; | |||
} | |||
/* | |||
* Last iteration is a no-op, provided that we divide by N/2 | |||
* instead of N. We need to make a special case for logn = 0. | |||
*/ | |||
if (logn > 0) { | |||
fpr ni; | |||
ni = fpr_p2_tab[logn]; | |||
for (u = 0; u < n; u ++) { | |||
f[u] = fpr_mul(f[u], ni); | |||
} | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_poly_add( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, u; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
a[u] = fpr_add(a[u], b[u]); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_poly_sub( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, u; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
a[u] = fpr_sub(a[u], b[u]); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_poly_neg(fpr *a, unsigned logn) { | |||
size_t n, u; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
a[u] = fpr_neg(a[u]); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_poly_adj_fft(fpr *a, unsigned logn) { | |||
size_t n, u; | |||
n = (size_t)1 << logn; | |||
for (u = (n >> 1); u < n; u ++) { | |||
a[u] = fpr_neg(a[u]); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_poly_mul_fft( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr a_re, a_im, b_re, b_im; | |||
a_re = a[u]; | |||
a_im = a[u + hn]; | |||
b_re = b[u]; | |||
b_im = b[u + hn]; | |||
FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_poly_muladj_fft( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr a_re, a_im, b_re, b_im; | |||
a_re = a[u]; | |||
a_im = a[u + hn]; | |||
b_re = b[u]; | |||
b_im = fpr_neg(b[u + hn]); | |||
FPC_MUL(a[u], a[u + hn], a_re, a_im, b_re, b_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_poly_mulselfadj_fft(fpr *a, unsigned logn) { | |||
/* | |||
* Since each coefficient is multiplied with its own conjugate, | |||
* the result contains only real values. | |||
*/ | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr a_re, a_im; | |||
a_re = a[u]; | |||
a_im = a[u + hn]; | |||
a[u] = fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)); | |||
a[u + hn] = fpr_zero; | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_poly_mulconst(fpr *a, fpr x, unsigned logn) { | |||
size_t n, u; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
a[u] = fpr_mul(a[u], x); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_poly_div_fft( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr a_re, a_im, b_re, b_im; | |||
a_re = a[u]; | |||
a_im = a[u + hn]; | |||
b_re = b[u]; | |||
b_im = b[u + hn]; | |||
FPC_DIV(a[u], a[u + hn], a_re, a_im, b_re, b_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_poly_invnorm2_fft(fpr *d, | |||
const fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr a_re, a_im; | |||
fpr b_re, b_im; | |||
a_re = a[u]; | |||
a_im = a[u + hn]; | |||
b_re = b[u]; | |||
b_im = b[u + hn]; | |||
d[u] = fpr_inv(fpr_add( | |||
fpr_add(fpr_sqr(a_re), fpr_sqr(a_im)), | |||
fpr_add(fpr_sqr(b_re), fpr_sqr(b_im)))); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_poly_add_muladj_fft(fpr *d, | |||
const fpr *F, const fpr *G, | |||
const fpr *f, const fpr *g, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr F_re, F_im, G_re, G_im; | |||
fpr f_re, f_im, g_re, g_im; | |||
fpr a_re, a_im, b_re, b_im; | |||
F_re = F[u]; | |||
F_im = F[u + hn]; | |||
G_re = G[u]; | |||
G_im = G[u + hn]; | |||
f_re = f[u]; | |||
f_im = f[u + hn]; | |||
g_re = g[u]; | |||
g_im = g[u + hn]; | |||
FPC_MUL(a_re, a_im, F_re, F_im, f_re, fpr_neg(f_im)); | |||
FPC_MUL(b_re, b_im, G_re, G_im, g_re, fpr_neg(g_im)); | |||
d[u] = fpr_add(a_re, b_re); | |||
d[u + hn] = fpr_add(a_im, b_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_poly_mul_autoadj_fft( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
a[u] = fpr_mul(a[u], b[u]); | |||
a[u + hn] = fpr_mul(a[u + hn], b[u]); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_poly_div_autoadj_fft( | |||
fpr *a, const fpr *b, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr ib; | |||
ib = fpr_inv(b[u]); | |||
a[u] = fpr_mul(a[u], ib); | |||
a[u + hn] = fpr_mul(a[u + hn], ib); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_poly_LDL_fft( | |||
const fpr *g00, | |||
fpr *g01, fpr *g11, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; | |||
fpr mu_re, mu_im; | |||
g00_re = g00[u]; | |||
g00_im = g00[u + hn]; | |||
g01_re = g01[u]; | |||
g01_im = g01[u + hn]; | |||
g11_re = g11[u]; | |||
g11_im = g11[u + hn]; | |||
FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); | |||
FPC_MUL(g01_re, g01_im, mu_re, mu_im, g01_re, fpr_neg(g01_im)); | |||
FPC_SUB(g11[u], g11[u + hn], g11_re, g11_im, g01_re, g01_im); | |||
g01[u] = mu_re; | |||
g01[u + hn] = fpr_neg(mu_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_poly_LDLmv_fft( | |||
fpr *d11, fpr *l10, | |||
const fpr *g00, const fpr *g01, | |||
const fpr *g11, unsigned logn) { | |||
size_t n, hn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
for (u = 0; u < hn; u ++) { | |||
fpr g00_re, g00_im, g01_re, g01_im, g11_re, g11_im; | |||
fpr mu_re, mu_im; | |||
g00_re = g00[u]; | |||
g00_im = g00[u + hn]; | |||
g01_re = g01[u]; | |||
g01_im = g01[u + hn]; | |||
g11_re = g11[u]; | |||
g11_im = g11[u + hn]; | |||
FPC_DIV(mu_re, mu_im, g01_re, g01_im, g00_re, g00_im); | |||
FPC_MUL(g01_re, g01_im, mu_re, mu_im, g01_re, fpr_neg(g01_im)); | |||
FPC_SUB(d11[u], d11[u + hn], g11_re, g11_im, g01_re, g01_im); | |||
l10[u] = mu_re; | |||
l10[u + hn] = fpr_neg(mu_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_poly_split_fft( | |||
fpr *f0, fpr *f1, | |||
const fpr *f, unsigned logn) { | |||
/* | |||
* The FFT representation we use is in bit-reversed order | |||
* (element i contains f(w^(rev(i))), where rev() is the | |||
* bit-reversal function over the ring degree. This changes | |||
* indexes with regards to the Falcon specification. | |||
*/ | |||
size_t n, hn, qn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
qn = hn >> 1; | |||
/* | |||
* We process complex values by pairs. For logn = 1, there is only | |||
* one complex value (the other one is the implicit conjugate), | |||
* so we add the two lines below because the loop will be | |||
* skipped. | |||
*/ | |||
f0[0] = f[0]; | |||
f1[0] = f[hn]; | |||
for (u = 0; u < qn; u ++) { | |||
fpr a_re, a_im, b_re, b_im; | |||
fpr t_re, t_im; | |||
a_re = f[(u << 1) + 0]; | |||
a_im = f[(u << 1) + 0 + hn]; | |||
b_re = f[(u << 1) + 1]; | |||
b_im = f[(u << 1) + 1 + hn]; | |||
FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); | |||
f0[u] = fpr_half(t_re); | |||
f0[u + qn] = fpr_half(t_im); | |||
FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); | |||
FPC_MUL(t_re, t_im, t_re, t_im, | |||
fpr_gm_tab[((u + hn) << 1) + 0], | |||
fpr_neg(fpr_gm_tab[((u + hn) << 1) + 1])); | |||
f1[u] = fpr_half(t_re); | |||
f1[u + qn] = fpr_half(t_im); | |||
} | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_poly_merge_fft( | |||
fpr *f, | |||
const fpr *f0, const fpr *f1, unsigned logn) { | |||
size_t n, hn, qn, u; | |||
n = (size_t)1 << logn; | |||
hn = n >> 1; | |||
qn = hn >> 1; | |||
/* | |||
* An extra copy to handle the special case logn = 1. | |||
*/ | |||
f[0] = f0[0]; | |||
f[hn] = f1[0]; | |||
for (u = 0; u < qn; u ++) { | |||
fpr a_re, a_im, b_re, b_im; | |||
fpr t_re, t_im; | |||
a_re = f0[u]; | |||
a_im = f0[u + qn]; | |||
FPC_MUL(b_re, b_im, f1[u], f1[u + qn], | |||
fpr_gm_tab[((u + hn) << 1) + 0], | |||
fpr_gm_tab[((u + hn) << 1) + 1]); | |||
FPC_ADD(t_re, t_im, a_re, a_im, b_re, b_im); | |||
f[(u << 1) + 0] = t_re; | |||
f[(u << 1) + 0 + hn] = t_im; | |||
FPC_SUB(t_re, t_im, a_re, a_im, b_re, b_im); | |||
f[(u << 1) + 1] = t_re; | |||
f[(u << 1) + 1 + hn] = t_im; | |||
} | |||
} |
@@ -0,0 +1,456 @@ | |||
/* | |||
* Floating-point operations. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
/* ====================================================================== */ | |||
/* | |||
* Custom floating-point implementation with integer arithmetics. We | |||
* use IEEE-754 "binary64" format, with some simplifications: | |||
* | |||
* - Top bit is s = 1 for negative, 0 for positive. | |||
* | |||
* - Exponent e uses the next 11 bits (bits 52 to 62, inclusive). | |||
* | |||
* - Mantissa m uses the 52 low bits. | |||
* | |||
* Encoded value is, in general: (-1)^s * 2^(e-1023) * (1 + m*2^(-52)) | |||
* i.e. the mantissa really is a 53-bit number (less than 2.0, but not | |||
* less than 1.0), but the top bit (equal to 1 by definition) is omitted | |||
* in the encoding. | |||
* | |||
* In IEEE-754, there are some special values: | |||
* | |||
* - If e = 2047, then the value is either an infinite (m = 0) or | |||
* a NaN (m != 0). | |||
* | |||
* - If e = 0, then the value is either a zero (m = 0) or a subnormal, | |||
* aka "denormalized number" (m != 0). | |||
* | |||
* Of these, we only need the zeros. The caller is responsible for not | |||
* providing operands that would lead to infinites, NaNs or subnormals. | |||
* If inputs are such that values go out of range, then indeterminate | |||
* values are returned (it would still be deterministic, but no specific | |||
* value may be relied upon). | |||
* | |||
* At the C level, the three parts are stored in a 64-bit unsigned | |||
* word. | |||
* | |||
* One may note that a property of the IEEE-754 format is that order | |||
* is preserved for positive values: if two positive floating-point | |||
* values x and y are such that x < y, then their respective encodings | |||
* as _signed_ 64-bit integers i64(x) and i64(y) will be such that | |||
* i64(x) < i64(y). For negative values, order is reversed: if x < 0, | |||
* y < 0, and x < y, then ia64(x) > ia64(y). | |||
* | |||
* IMPORTANT ASSUMPTIONS: | |||
* ====================== | |||
* | |||
* For proper computations, and constant-time behaviour, we assume the | |||
* following: | |||
* | |||
* - 32x32->64 multiplication (unsigned) has an execution time that | |||
* is independent of its operands. This is true of most modern | |||
* x86 and ARM cores. Notable exceptions are the ARM Cortex M0, M0+ | |||
* and M3 (in the M0 and M0+, this is done in software, so it depends | |||
* on that routine), and the PowerPC cores from the G3/G4 lines. | |||
* For more info, see: https://www.bearssl.org/ctmul.html | |||
* | |||
* - Left-shifts and right-shifts of 32-bit values have an execution | |||
* time which does not depend on the shifted value nor on the | |||
* shift count. An historical exception is the Pentium IV, but most | |||
* modern CPU have barrel shifters. Some small microcontrollers | |||
* might have varying-time shifts (not the ARM Cortex M*, though). | |||
* | |||
* - Right-shift of a signed negative value performs a sign extension. | |||
* As per the C standard, this operation returns an | |||
* implementation-defined result (this is NOT an "undefined | |||
* behaviour"). On most/all systems, an arithmetic shift is | |||
* performed, because this is what makes most sense. | |||
*/ | |||
/* | |||
* Normally we should declare the 'fpr' type to be a struct or union | |||
* around the internal 64-bit value; however, we want to use the | |||
* direct 64-bit integer type to enable a lighter call convention on | |||
* ARM platforms. This means that direct (invalid) use of operators | |||
* such as '*' or '+' will not be caught by the compiler. We rely on | |||
* the "normal" (non-emulated) code to detect such instances. | |||
*/ | |||
typedef uint64_t fpr; | |||
/* | |||
* For computations, we split values into an integral mantissa in the | |||
* 2^54..2^55 range, and an (adjusted) exponent. The lowest bit is | |||
* "sticky" (it is set to 1 if any of the bits below it is 1); when | |||
* re-encoding, the low two bits are dropped, but may induce an | |||
* increment in the value for proper rounding. | |||
*/ | |||
/* | |||
* Right-shift a 64-bit unsigned value by a possibly secret shift count. | |||
* We assumed that the underlying architecture had a barrel shifter for | |||
* 32-bit shifts, but for 64-bit shifts on a 32-bit system, this will | |||
* typically invoke a software routine that is not necessarily | |||
* constant-time; hence the function below. | |||
* | |||
* Shift count n MUST be in the 0..63 range. | |||
*/ | |||
static inline uint64_t | |||
fpr_ursh(uint64_t x, int n) { | |||
x ^= (x ^ (x >> 32)) & -(uint64_t)(n >> 5); | |||
return x >> (n & 31); | |||
} | |||
/* | |||
* Right-shift a 64-bit signed value by a possibly secret shift count | |||
* (see fpr_ursh() for the rationale). | |||
* | |||
* Shift count n MUST be in the 0..63 range. | |||
*/ | |||
static inline int64_t | |||
fpr_irsh(int64_t x, int n) { | |||
x ^= (x ^ (x >> 32)) & -(int64_t)(n >> 5); | |||
return x >> (n & 31); | |||
} | |||
/* | |||
* Left-shift a 64-bit unsigned value by a possibly secret shift count | |||
* (see fpr_ursh() for the rationale). | |||
* | |||
* Shift count n MUST be in the 0..63 range. | |||
*/ | |||
static inline uint64_t | |||
fpr_ulsh(uint64_t x, int n) { | |||
x ^= (x ^ (x << 32)) & -(uint64_t)(n >> 5); | |||
return x << (n & 31); | |||
} | |||
/* | |||
* Expectations: | |||
* s = 0 or 1 | |||
* exponent e is "arbitrary" and unbiased | |||
* 2^54 <= m < 2^55 | |||
* Numerical value is (-1)^2 * m * 2^e | |||
* | |||
* Exponents which are too low lead to value zero. If the exponent is | |||
* too large, the returned value is indeterminate. | |||
* | |||
* If m = 0, then a zero is returned (using the provided sign). | |||
* If e < -1076, then a zero is returned (regardless of the value of m). | |||
* If e >= -1076 and e != 0, m must be within the expected range | |||
* (2^54 to 2^55-1). | |||
*/ | |||
static inline fpr | |||
FPR(int s, int e, uint64_t m) { | |||
fpr x; | |||
uint32_t t; | |||
unsigned f; | |||
/* | |||
* If e >= -1076, then the value is "normal"; otherwise, it | |||
* should be a subnormal, which we clamp down to zero. | |||
*/ | |||
e += 1076; | |||
t = (uint32_t)e >> 31; | |||
m &= (uint64_t)t - 1; | |||
/* | |||
* If m = 0 then we want a zero; make e = 0 too, but conserve | |||
* the sign. | |||
*/ | |||
t = (uint32_t)(m >> 54); | |||
e &= -(int)t; | |||
/* | |||
* The 52 mantissa bits come from m. Value m has its top bit set | |||
* (unless it is a zero); we leave it "as is": the top bit will | |||
* increment the exponent by 1, except when m = 0, which is | |||
* exactly what we want. | |||
*/ | |||
x = (((uint64_t)s << 63) | (m >> 2)) + ((uint64_t)(uint32_t)e << 52); | |||
/* | |||
* Rounding: if the low three bits of m are 011, 110 or 111, | |||
* then the value should be incremented to get the next | |||
* representable value. This implements the usual | |||
* round-to-nearest rule (with preference to even values in case | |||
* of a tie). Note that the increment may make a carry spill | |||
* into the exponent field, which is again exactly what we want | |||
* in that case. | |||
*/ | |||
f = (unsigned)m & 7U; | |||
x += (0xC8U >> f) & 1; | |||
return x; | |||
} | |||
#define fpr_scaled PQCLEAN_FALCON512_CLEAN_fpr_scaled | |||
fpr fpr_scaled(int64_t i, int sc); | |||
static inline fpr | |||
fpr_of(int64_t i) { | |||
return fpr_scaled(i, 0); | |||
} | |||
static const fpr fpr_q = 4667981563525332992; | |||
static const fpr fpr_inverse_of_q = 4545632735260551042; | |||
static const fpr fpr_inv_2sqrsigma0 = 4594603506513722306; | |||
static const fpr fpr_inv_sigma = 4573359825155195350; | |||
static const fpr fpr_sigma_min_9 = 4608495221497168882; | |||
static const fpr fpr_sigma_min_10 = 4608586345619182117; | |||
static const fpr fpr_log2 = 4604418534313441775; | |||
static const fpr fpr_inv_log2 = 4609176140021203710; | |||
static const fpr fpr_bnorm_max = 4670353323383631276; | |||
static const fpr fpr_zero = 0; | |||
static const fpr fpr_one = 4607182418800017408; | |||
static const fpr fpr_two = 4611686018427387904; | |||
static const fpr fpr_onehalf = 4602678819172646912; | |||
static const fpr fpr_ptwo31 = 4746794007248502784; | |||
static const fpr fpr_ptwo31m1 = 4746794007244308480; | |||
static const fpr fpr_mtwo31m1 = 13970166044099084288U; | |||
static const fpr fpr_ptwo63m1 = 4890909195324358656; | |||
static const fpr fpr_mtwo63m1 = 14114281232179134464U; | |||
static const fpr fpr_ptwo63 = 4890909195324358656; | |||
static inline int64_t | |||
fpr_rint(fpr x) { | |||
uint64_t m, d; | |||
int e; | |||
uint32_t s, dd, f; | |||
/* | |||
* We assume that the value fits in -(2^63-1)..+(2^63-1). We can | |||
* thus extract the mantissa as a 63-bit integer, then right-shift | |||
* it as needed. | |||
*/ | |||
m = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); | |||
e = 1085 - ((int)(x >> 52) & 0x7FF); | |||
/* | |||
* If a shift of more than 63 bits is needed, then simply set m | |||
* to zero. This also covers the case of an input operand equal | |||
* to zero. | |||
*/ | |||
m &= -(uint64_t)((uint32_t)(e - 64) >> 31); | |||
e &= 63; | |||
/* | |||
* Right-shift m as needed. Shift count is e. Proper rounding | |||
* mandates that: | |||
* - If the highest dropped bit is zero, then round low. | |||
* - If the highest dropped bit is one, and at least one of the | |||
* other dropped bits is one, then round up. | |||
* - If the highest dropped bit is one, and all other dropped | |||
* bits are zero, then round up if the lowest kept bit is 1, | |||
* or low otherwise (i.e. ties are broken by "rounding to even"). | |||
* | |||
* We thus first extract a word consisting of all the dropped bit | |||
* AND the lowest kept bit; then we shrink it down to three bits, | |||
* the lowest being "sticky". | |||
*/ | |||
d = fpr_ulsh(m, 63 - e); | |||
dd = (uint32_t)d | ((uint32_t)(d >> 32) & 0x1FFFFFFF); | |||
f = (uint32_t)(d >> 61) | ((dd | -dd) >> 31); | |||
m = fpr_ursh(m, e) + (uint64_t)((0xC8U >> f) & 1U); | |||
/* | |||
* Apply the sign bit. | |||
*/ | |||
s = (uint32_t)(x >> 63); | |||
return ((int64_t)m ^ -(int64_t)s) + (int64_t)s; | |||
} | |||
static inline int64_t | |||
fpr_floor(fpr x) { | |||
uint64_t t; | |||
int64_t xi; | |||
int e, cc; | |||
/* | |||
* We extract the integer as a _signed_ 64-bit integer with | |||
* a scaling factor. Since we assume that the value fits | |||
* in the -(2^63-1)..+(2^63-1) range, we can left-shift the | |||
* absolute value to make it in the 2^62..2^63-1 range: we | |||
* will only need a right-shift afterwards. | |||
*/ | |||
e = (int)(x >> 52) & 0x7FF; | |||
t = x >> 63; | |||
xi = (int64_t)(((x << 10) | ((uint64_t)1 << 62)) | |||
& (((uint64_t)1 << 63) - 1)); | |||
xi = (xi ^ -(int64_t)t) + (int64_t)t; | |||
cc = 1085 - e; | |||
/* | |||
* We perform an arithmetic right-shift on the value. This | |||
* applies floor() semantics on both positive and negative values | |||
* (rounding toward minus infinity). | |||
*/ | |||
xi = fpr_irsh(xi, cc & 63); | |||
/* | |||
* If the true shift count was 64 or more, then we should instead | |||
* replace xi with 0 (if nonnegative) or -1 (if negative). Edge | |||
* case: -0 will be floored to -1, not 0 (whether this is correct | |||
* is debatable; in any case, the other functions normalize zero | |||
* to +0). | |||
* | |||
* For an input of zero, the non-shifted xi was incorrect (we used | |||
* a top implicit bit of value 1, not 0), but this does not matter | |||
* since this operation will clamp it down. | |||
*/ | |||
xi ^= (xi ^ -(int64_t)t) & -(int64_t)((uint32_t)(63 - cc) >> 31); | |||
return xi; | |||
} | |||
static inline int64_t | |||
fpr_trunc(fpr x) { | |||
uint64_t t, xu; | |||
int e, cc; | |||
/* | |||
* Extract the absolute value. Since we assume that the value | |||
* fits in the -(2^63-1)..+(2^63-1) range, we can left-shift | |||
* the absolute value into the 2^62..2^63-1 range, and then | |||
* do a right shift afterwards. | |||
*/ | |||
e = (int)(x >> 52) & 0x7FF; | |||
xu = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1); | |||
cc = 1085 - e; | |||
xu = fpr_ursh(xu, cc & 63); | |||
/* | |||
* If the exponent is too low (cc > 63), then the shift was wrong | |||
* and we must clamp the value to 0. This also covers the case | |||
* of an input equal to zero. | |||
*/ | |||
xu &= -(uint64_t)((uint32_t)(cc - 64) >> 31); | |||
/* | |||
* Apply back the sign, if the source value is negative. | |||
*/ | |||
t = x >> 63; | |||
xu = (xu ^ -t) + t; | |||
return *(int64_t *)&xu; | |||
} | |||
#define fpr_add PQCLEAN_FALCON512_CLEAN_fpr_add | |||
fpr fpr_add(fpr x, fpr y); | |||
static inline fpr | |||
fpr_sub(fpr x, fpr y) { | |||
y ^= (uint64_t)1 << 63; | |||
return fpr_add(x, y); | |||
} | |||
static inline fpr | |||
fpr_neg(fpr x) { | |||
x ^= (uint64_t)1 << 63; | |||
return x; | |||
} | |||
static inline fpr | |||
fpr_half(fpr x) { | |||
/* | |||
* To divide a value by 2, we just have to subtract 1 from its | |||
* exponent, but we have to take care of zero. | |||
*/ | |||
uint32_t t; | |||
x -= (uint64_t)1 << 52; | |||
t = (((uint32_t)(x >> 52) & 0x7FF) + 1) >> 11; | |||
x &= (uint64_t)t - 1; | |||
return x; | |||
} | |||
static inline fpr | |||
fpr_double(fpr x) { | |||
/* | |||
* To double a value, we just increment by one the exponent. We | |||
* don't care about infinites or NaNs; however, 0 is a | |||
* special case. | |||
*/ | |||
x += (uint64_t)((((unsigned)(x >> 52) & 0x7FFU) + 0x7FFU) >> 11) << 52; | |||
return x; | |||
} | |||
#define fpr_mul PQCLEAN_FALCON512_CLEAN_fpr_mul | |||
fpr fpr_mul(fpr x, fpr y); | |||
static inline fpr | |||
fpr_sqr(fpr x) { | |||
return fpr_mul(x, x); | |||
} | |||
#define fpr_div PQCLEAN_FALCON512_CLEAN_fpr_div | |||
fpr fpr_div(fpr x, fpr y); | |||
static inline fpr | |||
fpr_inv(fpr x) { | |||
return fpr_div(4607182418800017408u, x); | |||
} | |||
#define fpr_sqrt PQCLEAN_FALCON512_CLEAN_fpr_sqrt | |||
fpr fpr_sqrt(fpr x); | |||
static inline int | |||
fpr_lt(fpr x, fpr y) { | |||
/* | |||
* If x >= 0 or y >= 0, a signed comparison yields the proper | |||
* result: | |||
* - For positive values, the order is preserved. | |||
* - The sign bit is at the same place as in integers, so | |||
* sign is preserved. | |||
* | |||
* If both x and y are negative, then the order is reversed. | |||
* We cannot simply invert the comparison result in that case | |||
* because it would not handle the edge case x = y properly. | |||
*/ | |||
int cc0, cc1; | |||
cc0 = *(int64_t *)&x < *(int64_t *)&y; | |||
cc1 = *(int64_t *)&x > *(int64_t *)&y; | |||
return cc0 ^ ((cc0 ^ cc1) & (int)((x & y) >> 63)); | |||
} | |||
/* | |||
* Compute exp(x) for x such that |x| <= ln 2. We want a precision of 50 | |||
* bits or so. | |||
*/ | |||
#define fpr_expm_p63 PQCLEAN_FALCON512_CLEAN_fpr_expm_p63 | |||
uint64_t fpr_expm_p63(fpr x); | |||
#define fpr_gm_tab PQCLEAN_FALCON512_CLEAN_fpr_gm_tab | |||
extern const fpr fpr_gm_tab[]; | |||
#define fpr_p2_tab PQCLEAN_FALCON512_CLEAN_fpr_p2_tab | |||
extern const fpr fpr_p2_tab[]; | |||
/* ====================================================================== */ | |||
@@ -0,0 +1,663 @@ | |||
#ifndef FALCON_INNER_H__ | |||
#define FALCON_INNER_H__ | |||
/* | |||
* Internal functions for Falcon. This is not the API intended to be | |||
* used by applications; instead, this internal API provides all the | |||
* primitives on which wrappers build to provide external APIs. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
#include <stdint.h> | |||
#include <stdlib.h> | |||
#include <string.h> | |||
/* ==================================================================== */ | |||
/* | |||
* SHAKE256 implementation (shake.c). | |||
* | |||
* API is defined to be easily replaced with the fips202.h API defined | |||
* as part of PQ Clean. | |||
*/ | |||
#include "fips202.h" | |||
#define shake256_context shake256incctx | |||
#define shake256_init(sc) shake256_inc_init(sc) | |||
#define shake256_inject(sc, in, len) shake256_inc_absorb(sc, in, len) | |||
#define shake256_flip(sc) shake256_inc_finalize(sc) | |||
#define shake256_extract(sc, out, len) shake256_inc_squeeze(out, len, sc) | |||
/* ==================================================================== */ | |||
/* | |||
* Encoding/decoding functions (codec.c). | |||
* | |||
* Encoding functions take as parameters an output buffer (out) with | |||
* a given maximum length (max_out_len); returned value is the actual | |||
* number of bytes which have been written. If the output buffer is | |||
* not large enough, then 0 is returned (some bytes may have been | |||
* written to the buffer). If 'out' is NULL, then 'max_out_len' is | |||
* ignored; instead, the function computes and returns the actual | |||
* required output length (in bytes). | |||
* | |||
* Decoding functions take as parameters an input buffer (in) with | |||
* its maximum length (max_in_len); returned value is the actual number | |||
* of bytes that have been read from the buffer. If the provided length | |||
* is too short, then 0 is returned. | |||
* | |||
* Values to encode or decode are vectors of integers, with N = 2^logn | |||
* elements. | |||
* | |||
* Three encoding formats are defined: | |||
* | |||
* - modq: sequence of values modulo 12289, each encoded over exactly | |||
* 14 bits. The encoder and decoder verify that integers are within | |||
* the valid range (0..12288). Values are arrays of uint16. | |||
* | |||
* - trim: sequence of signed integers, a specified number of bits | |||
* each. The number of bits is provided as parameter and includes | |||
* the sign bit. Each integer x must be such that |x| < 2^(bits-1) | |||
* (which means that the -2^(bits-1) value is forbidden); encode and | |||
* decode functions check that property. Values are arrays of | |||
* int16_t or int8_t, corresponding to names 'trim_i16' and | |||
* 'trim_i8', respectively. | |||
* | |||
* - comp: variable-length encoding for signed integers; each integer | |||
* uses a minimum of 9 bits, possibly more. This is normally used | |||
* only for signatures. | |||
* | |||
*/ | |||
size_t PQCLEAN_FALCON512_CLEAN_modq_encode(void *out, size_t max_out_len, | |||
const uint16_t *x, unsigned logn); | |||
size_t PQCLEAN_FALCON512_CLEAN_trim_i16_encode(void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn, unsigned bits); | |||
size_t PQCLEAN_FALCON512_CLEAN_trim_i8_encode(void *out, size_t max_out_len, | |||
const int8_t *x, unsigned logn, unsigned bits); | |||
size_t PQCLEAN_FALCON512_CLEAN_comp_encode(void *out, size_t max_out_len, | |||
const int16_t *x, unsigned logn); | |||
size_t PQCLEAN_FALCON512_CLEAN_modq_decode(uint16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len); | |||
size_t PQCLEAN_FALCON512_CLEAN_trim_i16_decode(int16_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len); | |||
size_t PQCLEAN_FALCON512_CLEAN_trim_i8_decode(int8_t *x, unsigned logn, unsigned bits, | |||
const void *in, size_t max_in_len); | |||
size_t PQCLEAN_FALCON512_CLEAN_comp_decode(int16_t *x, unsigned logn, | |||
const void *in, size_t max_in_len); | |||
/* | |||
* Number of bits for key elements, indexed by logn (1 to 10). This | |||
* is at most 8 bits for all degrees, but some degrees may have shorter | |||
* elements. | |||
*/ | |||
extern const uint8_t PQCLEAN_FALCON512_CLEAN_max_fg_bits[]; | |||
extern const uint8_t PQCLEAN_FALCON512_CLEAN_max_FG_bits[]; | |||
/* | |||
* Maximum size, in bits, of elements in a signature, indexed by logn | |||
* (1 to 10). The size includes the sign bit. | |||
*/ | |||
extern const uint8_t PQCLEAN_FALCON512_CLEAN_max_sig_bits[]; | |||
/* ==================================================================== */ | |||
/* | |||
* Support functions used for both signature generation and signature | |||
* verification (common.c). | |||
*/ | |||
/* | |||
* From a SHAKE256 context (must be already flipped), produce a new | |||
* point. The temporary buffer (tmp) must have room for 2*2^logn bytes. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_hash_to_point(shake256_context *sc, | |||
uint16_t *x, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Tell whether a given vector (2N coordinates, in two halves) is | |||
* acceptable as a signature. This compares the appropriate norm of the | |||
* vector with the acceptance bound. Returned value is 1 on success | |||
* (vector is short enough to be acceptable), 0 otherwise. | |||
*/ | |||
int PQCLEAN_FALCON512_CLEAN_is_short(const int16_t *s1, const int16_t *s2, unsigned logn); | |||
/* | |||
* Tell whether a given vector (2N coordinates, in two halves) is | |||
* acceptable as a signature. Instead of the first half s1, this | |||
* function receives the "saturated squared norm" of s1, i.e. the | |||
* sum of the squares of the coordinates of s1 (saturated at 2^32-1 | |||
* if the sum exceeds 2^31-1). | |||
* | |||
* Returned value is 1 on success (vector is short enough to be | |||
* acceptable), 0 otherwise. | |||
*/ | |||
int PQCLEAN_FALCON512_CLEAN_is_short_half(uint32_t sqn, const int16_t *s2, unsigned logn); | |||
/* ==================================================================== */ | |||
/* | |||
* Signature verification functions (vrfy.c). | |||
*/ | |||
/* | |||
* Convert a public key to NTT + Montgomery format. Conversion is done | |||
* in place. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn); | |||
/* | |||
* Internal signature verification code: | |||
* c0[] contains the hashed nonce+message | |||
* s2[] is the decoded signature | |||
* h[] contains the public key, in NTT + Montgomery format | |||
* logn is the degree log | |||
* tmp[] temporary, must have at least 2*2^logn bytes | |||
* Returned value is 1 on success, 0 on error. | |||
*/ | |||
int PQCLEAN_FALCON512_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2, | |||
const uint16_t *h, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Compute the public key h[], given the private key elements f[] and | |||
* g[]. This computes h = g/f mod phi mod q, where phi is the polynomial | |||
* modulus. This function returns 1 on success, 0 on error (an error is | |||
* reported if f is not invertible mod phi mod q). | |||
* | |||
* The tmp[] array must have room for at least 2*2^logn elements. | |||
*/ | |||
int PQCLEAN_FALCON512_CLEAN_compute_public(uint16_t *h, | |||
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Recompute the fourth private key element. Private key consists in | |||
* four polynomials with small coefficients f, g, F and G, which are | |||
* such that fG - gF = q mod phi; furthermore, f is invertible modulo | |||
* phi and modulo q. This function recomputes G from f, g and F. | |||
* | |||
* The tmp[] array must have room for at least 4*2^logn bytes. | |||
* | |||
* Returned value is 1 in success, 0 on error (f not invertible). | |||
*/ | |||
int PQCLEAN_FALCON512_CLEAN_complete_private(int8_t *G, | |||
const int8_t *f, const int8_t *g, const int8_t *F, | |||
unsigned logn, uint8_t *tmp); | |||
/* ==================================================================== */ | |||
/* | |||
* Implementation of floating-point real numbers (fpr.h, fpr.c). | |||
*/ | |||
/* | |||
* Real numbers are implemented by an extra header file, included below. | |||
* This is meant to support pluggable implementations. The default | |||
* implementation relies on the C type 'double'. | |||
* | |||
* The included file must define the following types, functions and | |||
* constants: | |||
* | |||
* fpr | |||
* type for a real number | |||
* | |||
* fpr fpr_of(int64_t i) | |||
* cast an integer into a real number; source must be in the | |||
* -(2^63-1)..+(2^63-1) range | |||
* | |||
* fpr fpr_scaled(int64_t i, int sc) | |||
* compute i*2^sc as a real number; source 'i' must be in the | |||
* -(2^63-1)..+(2^63-1) range | |||
* | |||
* fpr fpr_ldexp(fpr x, int e) | |||
* compute x*2^e | |||
* | |||
* int64_t fpr_rint(fpr x) | |||
* round x to the nearest integer; x must be in the -(2^63-1) | |||
* to +(2^63-1) range | |||
* | |||
* int64_t fpr_trunc(fpr x) | |||
* round to an integer; this rounds towards zero; value must | |||
* be in the -(2^63-1) to +(2^63-1) range | |||
* | |||
* fpr fpr_add(fpr x, fpr y) | |||
* compute x + y | |||
* | |||
* fpr fpr_sub(fpr x, fpr y) | |||
* compute x - y | |||
* | |||
* fpr fpr_neg(fpr x) | |||
* compute -x | |||
* | |||
* fpr fpr_half(fpr x) | |||
* compute x/2 | |||
* | |||
* fpr fpr_double(fpr x) | |||
* compute x*2 | |||
* | |||
* fpr fpr_mul(fpr x, fpr y) | |||
* compute x * y | |||
* | |||
* fpr fpr_sqr(fpr x) | |||
* compute x * x | |||
* | |||
* fpr fpr_inv(fpr x) | |||
* compute 1/x | |||
* | |||
* fpr fpr_div(fpr x, fpr y) | |||
* compute x/y | |||
* | |||
* fpr fpr_sqrt(fpr x) | |||
* compute the square root of x | |||
* | |||
* int fpr_lt(fpr x, fpr y) | |||
* return 1 if x < y, 0 otherwise | |||
* | |||
* uint64_t fpr_expm_p63(fpr x) | |||
* return exp(x), assuming that 0 <= x < log(2). Returned value | |||
* is scaled to 63 bits (i.e. it really returns 2^63*exp(-x), | |||
* rounded to the nearest integer). Computation should have a | |||
* precision of at least 45 bits. | |||
* | |||
* const fpr fpr_gm_tab[] | |||
* array of constants for FFT / iFFT | |||
* | |||
* const fpr fpr_p2_tab[] | |||
* precomputed powers of 2 (by index, 0 to 10) | |||
* | |||
* Constants of type 'fpr': | |||
* | |||
* fpr fpr_q 12289 | |||
* fpr fpr_inverse_of_q 1/12289 | |||
* fpr fpr_inv_2sqrsigma0 1/(2*(1.8205^2)) | |||
* fpr fpr_inv_sigma 1/(1.55*sqrt(12289)) | |||
* fpr fpr_sigma_min_9 1.291500756233514568549480827642 | |||
* fpr fpr_sigma_min_10 1.311734375905083682667395805765 | |||
* fpr fpr_log2 log(2) | |||
* fpr fpr_inv_log2 1/log(2) | |||
* fpr fpr_bnorm_max 16822.4121 | |||
* fpr fpr_zero 0 | |||
* fpr fpr_one 1 | |||
* fpr fpr_two 2 | |||
* fpr fpr_onehalf 0.5 | |||
* fpr fpr_ptwo31 2^31 | |||
* fpr fpr_ptwo31m1 2^31-1 | |||
* fpr fpr_mtwo31m1 -(2^31-1) | |||
* fpr fpr_ptwo63m1 2^63-1 | |||
* fpr fpr_mtwo63m1 -(2^63-1) | |||
* fpr fpr_ptwo63 2^63 | |||
*/ | |||
#include "fpr.h" | |||
/* ==================================================================== */ | |||
/* | |||
* RNG (rng.c). | |||
* | |||
* A PRNG based on ChaCha20 is implemented; it is seeded from a SHAKE256 | |||
* context (flipped) and is used for bulk pseudorandom generation. | |||
* A system-dependent seed generator is also provided. | |||
*/ | |||
/* | |||
* Obtain a random seed from the system RNG. | |||
* | |||
* Returned value is 1 on success, 0 on error. | |||
*/ | |||
int PQCLEAN_FALCON512_CLEAN_get_seed(void *seed, size_t seed_len); | |||
/* | |||
* Structure for a PRNG. This includes a large buffer so that values | |||
* get generated in advance. The 'state' is used to keep the current | |||
* PRNG algorithm state (contents depend on the selected algorithm). | |||
* | |||
* The unions with 'dummy_u64' are there to ensure proper alignment for | |||
* 64-bit direct access. | |||
*/ | |||
typedef struct { | |||
union { | |||
uint8_t d[512]; /* MUST be 512, exactly */ | |||
uint64_t dummy_u64; | |||
} buf; | |||
size_t ptr; | |||
union { | |||
uint8_t d[256]; | |||
uint64_t dummy_u64; | |||
} state; | |||
int type; | |||
} prng; | |||
/* | |||
* Instantiate a PRNG. That PRNG will feed over the provided SHAKE256 | |||
* context (in "flipped" state) to obtain its initial state. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_prng_init(prng *p, shake256_context *src); | |||
/* | |||
* Refill the PRNG buffer. This is normally invoked automatically, and | |||
* is declared here only so that prng_get_u64() may be inlined. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_prng_refill(prng *p); | |||
/* | |||
* Get some bytes from a PRNG. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len); | |||
/* | |||
* Get a 64-bit random value from a PRNG. | |||
*/ | |||
static inline uint64_t | |||
prng_get_u64(prng *p) { | |||
size_t u; | |||
/* | |||
* If there are less than 9 bytes in the buffer, we refill it. | |||
* This means that we may drop the last few bytes, but this allows | |||
* for faster extraction code. Also, it means that we never leave | |||
* an empty buffer. | |||
*/ | |||
u = p->ptr; | |||
if (u >= (sizeof p->buf.d) - 9) { | |||
PQCLEAN_FALCON512_CLEAN_prng_refill(p); | |||
u = 0; | |||
} | |||
p->ptr = u + 8; | |||
/* | |||
* On systems that use little-endian encoding and allow | |||
* unaligned accesses, we can simply read the data where it is. | |||
*/ | |||
return (uint64_t)p->buf.d[u + 0] | |||
| ((uint64_t)p->buf.d[u + 1] << 8) | |||
| ((uint64_t)p->buf.d[u + 2] << 16) | |||
| ((uint64_t)p->buf.d[u + 3] << 24) | |||
| ((uint64_t)p->buf.d[u + 4] << 32) | |||
| ((uint64_t)p->buf.d[u + 5] << 40) | |||
| ((uint64_t)p->buf.d[u + 6] << 48) | |||
| ((uint64_t)p->buf.d[u + 7] << 56); | |||
} | |||
/* | |||
* Get an 8-bit random value from a PRNG. | |||
*/ | |||
static inline unsigned | |||
prng_get_u8(prng *p) { | |||
unsigned v; | |||
v = p->buf.d[p->ptr ++]; | |||
if (p->ptr == sizeof p->buf.d) { | |||
PQCLEAN_FALCON512_CLEAN_prng_refill(p); | |||
} | |||
return v; | |||
} | |||
/* ==================================================================== */ | |||
/* | |||
* FFT (falcon-fft.c). | |||
* | |||
* A real polynomial is represented as an array of N 'fpr' elements. | |||
* The FFT representation of a real polynomial contains N/2 complex | |||
* elements; each is stored as two real numbers, for the real and | |||
* imaginary parts, respectively. See falcon-fft.c for details on the | |||
* internal representation. | |||
*/ | |||
/* | |||
* Compute FFT in-place: the source array should contain a real | |||
* polynomial (N coefficients); its storage area is reused to store | |||
* the FFT representation of that polynomial (N/2 complex numbers). | |||
* | |||
* 'logn' MUST lie between 1 and 10 (inclusive). | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_FFT(fpr *f, unsigned logn); | |||
/* | |||
* Compute the inverse FFT in-place: the source array should contain the | |||
* FFT representation of a real polynomial (N/2 elements); the resulting | |||
* real polynomial (N coefficients of type 'fpr') is written over the | |||
* array. | |||
* | |||
* 'logn' MUST lie between 1 and 10 (inclusive). | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_iFFT(fpr *f, unsigned logn); | |||
/* | |||
* Add polynomial b to polynomial a. a and b MUST NOT overlap. This | |||
* function works in both normal and FFT representations. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_poly_add(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Subtract polynomial b from polynomial a. a and b MUST NOT overlap. This | |||
* function works in both normal and FFT representations. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_poly_sub(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Negate polynomial a. This function works in both normal and FFT | |||
* representations. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_poly_neg(fpr *a, unsigned logn); | |||
/* | |||
* Compute adjoint of polynomial a. This function works only in FFT | |||
* representation. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_poly_adj_fft(fpr *a, unsigned logn); | |||
/* | |||
* Multiply polynomial a with polynomial b. a and b MUST NOT overlap. | |||
* This function works only in FFT representation. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_poly_mul_fft(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Multiply polynomial a with the adjoint of polynomial b. a and b MUST NOT | |||
* overlap. This function works only in FFT representation. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_poly_muladj_fft(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Multiply polynomial with its own adjoint. This function works only in FFT | |||
* representation. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_poly_mulselfadj_fft(fpr *a, unsigned logn); | |||
/* | |||
* Multiply polynomial with a real constant. This function works in both | |||
* normal and FFT representations. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_poly_mulconst(fpr *a, fpr x, unsigned logn); | |||
/* | |||
* Divide polynomial a by polynomial b, modulo X^N+1 (FFT representation). | |||
* a and b MUST NOT overlap. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_poly_div_fft(fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Given f and g (in FFT representation), compute 1/(f*adj(f)+g*adj(g)) | |||
* (also in FFT representation). Since the result is auto-adjoint, all its | |||
* coordinates in FFT representation are real; as such, only the first N/2 | |||
* values of d[] are filled (the imaginary parts are skipped). | |||
* | |||
* Array d MUST NOT overlap with either a or b. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_poly_invnorm2_fft(fpr *d, | |||
const fpr *a, const fpr *b, unsigned logn); | |||
/* | |||
* Given F, G, f and g (in FFT representation), compute F*adj(f)+G*adj(g) | |||
* (also in FFT representation). Destination d MUST NOT overlap with | |||
* any of the source arrays. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_poly_add_muladj_fft(fpr *d, | |||
const fpr *F, const fpr *G, | |||
const fpr *f, const fpr *g, unsigned logn); | |||
/* | |||
* Multiply polynomial a by polynomial b, where b is autoadjoint. Both | |||
* a and b are in FFT representation. Since b is autoadjoint, all its | |||
* FFT coefficients are real, and the array b contains only N/2 elements. | |||
* a and b MUST NOT overlap. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_poly_mul_autoadj_fft(fpr *a, | |||
const fpr *b, unsigned logn); | |||
/* | |||
* Divide polynomial a by polynomial b, where b is autoadjoint. Both | |||
* a and b are in FFT representation. Since b is autoadjoint, all its | |||
* FFT coefficients are real, and the array b contains only N/2 elements. | |||
* a and b MUST NOT overlap. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_poly_div_autoadj_fft(fpr *a, | |||
const fpr *b, unsigned logn); | |||
/* | |||
* Perform an LDL decomposition of an auto-adjoint matrix G, in FFT | |||
* representation. On input, g00, g01 and g11 are provided (where the | |||
* matrix G = [[g00, g01], [adj(g01), g11]]). On output, the d00, l10 | |||
* and d11 values are written in g00, g01 and g11, respectively | |||
* (with D = [[d00, 0], [0, d11]] and L = [[1, 0], [l10, 1]]). | |||
* (In fact, d00 = g00, so the g00 operand is left unmodified.) | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_poly_LDL_fft(const fpr *g00, | |||
fpr *g01, fpr *g11, unsigned logn); | |||
/* | |||
* Perform an LDL decomposition of an auto-adjoint matrix G, in FFT | |||
* representation. This is identical to poly_LDL_fft() except that | |||
* g00, g01 and g11 are unmodified; the outputs d11 and l10 are written | |||
* in two other separate buffers provided as extra parameters. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_poly_LDLmv_fft(fpr *d11, fpr *l10, | |||
const fpr *g00, const fpr *g01, | |||
const fpr *g11, unsigned logn); | |||
/* | |||
* Apply "split" operation on a polynomial in FFT representation: | |||
* f = f0(x^2) + x*f1(x^2), for half-size polynomials f0 and f1 | |||
* (polynomials modulo X^(N/2)+1). f0, f1 and f MUST NOT overlap. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_poly_split_fft(fpr *f0, fpr *f1, | |||
const fpr *f, unsigned logn); | |||
/* | |||
* Apply "merge" operation on two polynomials in FFT representation: | |||
* given f0 and f1, polynomials moduo X^(N/2)+1, this function computes | |||
* f = f0(x^2) + x*f1(x^2), in FFT representation modulo X^N+1. | |||
* f MUST NOT overlap with either f0 or f1. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_poly_merge_fft(fpr *f, | |||
const fpr *f0, const fpr *f1, unsigned logn); | |||
/* ==================================================================== */ | |||
/* | |||
* Key pair generation. | |||
*/ | |||
/* | |||
* Required sizes of the temporary buffer (in bytes). | |||
*/ | |||
#define FALCON_KEYGEN_TEMP_1 136 | |||
#define FALCON_KEYGEN_TEMP_2 272 | |||
#define FALCON_KEYGEN_TEMP_3 224 | |||
#define FALCON_KEYGEN_TEMP_4 448 | |||
#define FALCON_KEYGEN_TEMP_5 896 | |||
#define FALCON_KEYGEN_TEMP_6 1792 | |||
#define FALCON_KEYGEN_TEMP_7 3584 | |||
#define FALCON_KEYGEN_TEMP_8 7168 | |||
#define FALCON_KEYGEN_TEMP_9 14336 | |||
#define FALCON_KEYGEN_TEMP_10 28672 | |||
/* | |||
* Generate a new key pair. Randomness is extracted from the provided | |||
* SHAKE256 context, which must have already been seeded and flipped. | |||
* The tmp[] array must have suitable size (see FALCON_KEYGEN_TEMP_* | |||
* macros) and be aligned for the uint32_t, uint64_t and fpr types. | |||
* | |||
* The private key elements are written in f, g, F and G, and the | |||
* public key is written in h. Either or both of G and h may be NULL, | |||
* in which case the corresponding element is not returned (they can | |||
* be recomputed from f, g and F). | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_keygen(shake256_context *rng, | |||
int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h, | |||
unsigned logn, uint8_t *tmp); | |||
/* ==================================================================== */ | |||
/* | |||
* Signature generation. | |||
*/ | |||
/* | |||
* Expand a private key into the B0 matrix in FFT representation and | |||
* the LDL tree. All the values are written in 'expanded_key', for | |||
* a total of (8*logn+40)*2^logn bytes. | |||
* | |||
* The tmp[] array must have room for at least 48*2^logn bytes. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_expand_privkey(fpr *expanded_key, | |||
const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G, | |||
unsigned logn, uint8_t *tmp); | |||
/* | |||
* Compute a signature over the provided hashed message (hm); the | |||
* signature value is one short vector. This function uses an | |||
* expanded key (as generated by PQCLEAN_FALCON512_CLEAN_expand_privkey()). | |||
* | |||
* The sig[] and hm[] buffers may overlap. | |||
* | |||
* The minimal size (in bytes) of tmp[] is 48*2^logn bytes. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_sign_tree(int16_t *sig, shake256_context *rng, | |||
const fpr *expanded_key, | |||
const uint16_t *hm, unsigned logn, uint8_t *tmp); | |||
/* | |||
* Compute a signature over the provided hashed message (hm); the | |||
* signature value is one short vector. This function uses a raw | |||
* key and dynamically recompute the B0 matrix and LDL tree; this | |||
* saves RAM since there is no needed for an expanded key, but | |||
* increases the signature cost. | |||
* | |||
* The sig[] and hm[] buffers may overlap. | |||
* | |||
* The minimal size (in bytes) of tmp[] is 72*2^logn bytes. | |||
*/ | |||
void PQCLEAN_FALCON512_CLEAN_sign_dyn(int16_t *sig, shake256_context *rng, | |||
const int8_t *f, const int8_t *g, | |||
const int8_t *F, const int8_t *G, | |||
const uint16_t *hm, unsigned logn, uint8_t *tmp); | |||
/* ==================================================================== */ | |||
#endif |
@@ -0,0 +1,381 @@ | |||
/* | |||
* Wrapper for implementing the PQClean API. | |||
*/ | |||
#include <stddef.h> | |||
#include <string.h> | |||
#include "api.h" | |||
#include "inner.h" | |||
#define NONCELEN 40 | |||
#include "randombytes.h" | |||
/* | |||
* Encoding formats (nnnn = log of degree, 9 for Falcon-512, 10 for Falcon-1024) | |||
* | |||
* private key: | |||
* header byte: 0101nnnn | |||
* private f (6 or 5 bits by element, depending on degree) | |||
* private g (6 or 5 bits by element, depending on degree) | |||
* private F (8 bits by element) | |||
* | |||
* public key: | |||
* header byte: 0000nnnn | |||
* public h (14 bits by element) | |||
* | |||
* signature: | |||
* header byte: 0011nnnn | |||
* nonce 40 bytes | |||
* value (12 bits by element) | |||
* | |||
* message + signature: | |||
* signature length (2 bytes, big-endian) | |||
* nonce 40 bytes | |||
* message | |||
* header byte: 0010nnnn | |||
* value (12 bits by element) | |||
* (signature length is 1+len(value), not counting the nonce) | |||
*/ | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON512_CLEAN_crypto_sign_keypair( | |||
uint8_t *pk, uint8_t *sk) { | |||
union { | |||
uint8_t b[FALCON_KEYGEN_TEMP_9]; | |||
uint64_t dummy_u64; | |||
fpr dummy_fpr; | |||
} tmp; | |||
int8_t f[512], g[512], F[512]; | |||
uint16_t h[512]; | |||
unsigned char seed[48]; | |||
shake256_context rng; | |||
size_t u, v; | |||
/* | |||
* Generate key pair. | |||
*/ | |||
randombytes(seed, sizeof seed); | |||
shake256_init(&rng); | |||
shake256_inject(&rng, seed, sizeof seed); | |||
shake256_flip(&rng); | |||
PQCLEAN_FALCON512_CLEAN_keygen(&rng, f, g, F, NULL, h, 9, tmp.b); | |||
/* | |||
* Encode private key. | |||
*/ | |||
sk[0] = 0x50 + 9; | |||
u = 1; | |||
v = PQCLEAN_FALCON512_CLEAN_trim_i8_encode( | |||
sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u, | |||
f, 9, PQCLEAN_FALCON512_CLEAN_max_fg_bits[9]); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
v = PQCLEAN_FALCON512_CLEAN_trim_i8_encode( | |||
sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u, | |||
g, 9, PQCLEAN_FALCON512_CLEAN_max_fg_bits[9]); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
v = PQCLEAN_FALCON512_CLEAN_trim_i8_encode( | |||
sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u, | |||
F, 9, PQCLEAN_FALCON512_CLEAN_max_FG_bits[9]); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
if (u != PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES) { | |||
return -1; | |||
} | |||
/* | |||
* Encode public key. | |||
*/ | |||
pk[0] = 0x00 + 9; | |||
v = PQCLEAN_FALCON512_CLEAN_modq_encode( | |||
pk + 1, PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES - 1, | |||
h, 9); | |||
if (v != PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) { | |||
return -1; | |||
} | |||
return 0; | |||
} | |||
/* | |||
* Compute the signature. nonce[] receives the nonce and must have length | |||
* NONCELEN bytes. sigbuf[] receives the signature value (without nonce | |||
* or header byte), with *sigbuflen providing the maximum value length and | |||
* receiving the actual value length. | |||
* | |||
* If a signature could be computed but not encoded because it would | |||
* exceed the output buffer size, then a new signature is computed. If | |||
* the provided buffer size is too low, this could loop indefinitely, so | |||
* the caller must provide a size that can accommodate signatures with a | |||
* large enough probability. | |||
* | |||
* Return value: 0 on success, -1 on error. | |||
*/ | |||
static int | |||
do_sign(uint8_t *nonce, uint8_t *sigbuf, size_t *sigbuflen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
union { | |||
uint8_t b[72 * 512]; | |||
uint64_t dummy_u64; | |||
fpr dummy_fpr; | |||
} tmp; | |||
int8_t f[512], g[512], F[512], G[512]; | |||
union { | |||
int16_t sig[512]; | |||
uint16_t hm[512]; | |||
} r; | |||
unsigned char seed[48]; | |||
shake256_context sc; | |||
size_t u, v; | |||
/* | |||
* Decode the private key. | |||
*/ | |||
if (sk[0] != 0x50 + 9) { | |||
return -1; | |||
} | |||
u = 1; | |||
v = PQCLEAN_FALCON512_CLEAN_trim_i8_decode( | |||
f, 9, PQCLEAN_FALCON512_CLEAN_max_fg_bits[9], | |||
sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
v = PQCLEAN_FALCON512_CLEAN_trim_i8_decode( | |||
g, 9, PQCLEAN_FALCON512_CLEAN_max_fg_bits[9], | |||
sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
v = PQCLEAN_FALCON512_CLEAN_trim_i8_decode( | |||
F, 9, PQCLEAN_FALCON512_CLEAN_max_FG_bits[9], | |||
sk + u, PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES - u); | |||
if (v == 0) { | |||
return -1; | |||
} | |||
u += v; | |||
if (u != PQCLEAN_FALCON512_CLEAN_CRYPTO_SECRETKEYBYTES) { | |||
return -1; | |||
} | |||
if (!PQCLEAN_FALCON512_CLEAN_complete_private(G, f, g, F, 9, tmp.b)) { | |||
return -1; | |||
} | |||
/* | |||
* Create a random nonce (40 bytes). | |||
*/ | |||
randombytes(nonce, NONCELEN); | |||
/* | |||
* Hash message nonce + message into a vector. | |||
*/ | |||
shake256_init(&sc); | |||
shake256_inject(&sc, nonce, NONCELEN); | |||
shake256_inject(&sc, m, mlen); | |||
shake256_flip(&sc); | |||
PQCLEAN_FALCON512_CLEAN_hash_to_point(&sc, r.hm, 9, tmp.b); | |||
/* | |||
* Initialize a RNG. | |||
*/ | |||
randombytes(seed, sizeof seed); | |||
shake256_init(&sc); | |||
shake256_inject(&sc, seed, sizeof seed); | |||
shake256_flip(&sc); | |||
/* | |||
* Compute and return the signature. This loops until a signature | |||
* value is found that fits in the provided buffer. | |||
*/ | |||
for (;;) { | |||
PQCLEAN_FALCON512_CLEAN_sign_dyn(r.sig, &sc, f, g, F, G, r.hm, 9, tmp.b); | |||
v = PQCLEAN_FALCON512_CLEAN_comp_encode(sigbuf, *sigbuflen, r.sig, 9); | |||
if (v != 0) { | |||
*sigbuflen = v; | |||
return 0; | |||
} | |||
} | |||
} | |||
/* | |||
* Verify a sigature. The nonce has size NONCELEN bytes. sigbuf[] | |||
* (of size sigbuflen) contains the signature value, not including the | |||
* header byte or nonce. Return value is 0 on success, -1 on error. | |||
*/ | |||
static int | |||
do_verify( | |||
const uint8_t *nonce, const uint8_t *sigbuf, size_t sigbuflen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk) { | |||
union { | |||
uint8_t b[2 * 512]; | |||
uint64_t dummy_u64; | |||
fpr dummy_fpr; | |||
} tmp; | |||
uint16_t h[512], hm[512]; | |||
int16_t sig[512]; | |||
shake256_context sc; | |||
/* | |||
* Decode public key. | |||
*/ | |||
if (pk[0] != 0x00 + 9) { | |||
return -1; | |||
} | |||
if (PQCLEAN_FALCON512_CLEAN_modq_decode(h, 9, | |||
pk + 1, PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) | |||
!= PQCLEAN_FALCON512_CLEAN_CRYPTO_PUBLICKEYBYTES - 1) { | |||
return -1; | |||
} | |||
PQCLEAN_FALCON512_CLEAN_to_ntt_monty(h, 9); | |||
/* | |||
* Decode signature. | |||
*/ | |||
if (sigbuflen == 0) { | |||
return -1; | |||
} | |||
if (PQCLEAN_FALCON512_CLEAN_comp_decode(sig, 9, sigbuf, sigbuflen) != sigbuflen) { | |||
return -1; | |||
} | |||
/* | |||
* Hash nonce + message into a vector. | |||
*/ | |||
shake256_init(&sc); | |||
shake256_inject(&sc, nonce, NONCELEN); | |||
shake256_inject(&sc, m, mlen); | |||
shake256_flip(&sc); | |||
PQCLEAN_FALCON512_CLEAN_hash_to_point(&sc, hm, 9, tmp.b); | |||
/* | |||
* Verify signature. | |||
*/ | |||
if (!PQCLEAN_FALCON512_CLEAN_verify_raw(hm, sig, h, 9, tmp.b)) { | |||
return -1; | |||
} | |||
return 0; | |||
} | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON512_CLEAN_crypto_sign_signature( | |||
uint8_t *sig, size_t *siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
/* | |||
* The PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES constant is used for | |||
* the signed message object (as produced by crypto_sign()) | |||
* and includes a two-byte length value, so we take care here | |||
* to only generate signatures that are two bytes shorter than | |||
* the maximum. This is done to ensure that crypto_sign() | |||
* and crypto_sign_signature() produce the exact same signature | |||
* value, if used on the same message, with the same private key, | |||
* and using the same output from randombytes() (this is for | |||
* reproducibility of tests). | |||
*/ | |||
size_t vlen; | |||
vlen = PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES - NONCELEN - 3; | |||
if (do_sign(sig + 1, sig + 1 + NONCELEN, &vlen, m, mlen, sk) < 0) { | |||
return -1; | |||
} | |||
sig[0] = 0x30 + 9; | |||
*siglen = 1 + NONCELEN + vlen; | |||
return 0; | |||
} | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON512_CLEAN_crypto_sign_verify( | |||
const uint8_t *sig, size_t siglen, | |||
const uint8_t *m, size_t mlen, const uint8_t *pk) { | |||
if (siglen < 1 + NONCELEN) { | |||
return -1; | |||
} | |||
if (sig[0] != 0x30 + 9) { | |||
return -1; | |||
} | |||
return do_verify(sig + 1, | |||
sig + 1 + NONCELEN, siglen - 1 - NONCELEN, m, mlen, pk); | |||
} | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON512_CLEAN_crypto_sign( | |||
uint8_t *sm, size_t *smlen, | |||
const uint8_t *m, size_t mlen, const uint8_t *sk) { | |||
uint8_t *pm, *sigbuf; | |||
size_t sigbuflen; | |||
/* | |||
* Move the message to its final location; this is a memmove() so | |||
* it handles overlaps properly. | |||
*/ | |||
memmove(sm + 2 + NONCELEN, m, mlen); | |||
pm = sm + 2 + NONCELEN; | |||
sigbuf = pm + 1 + mlen; | |||
sigbuflen = PQCLEAN_FALCON512_CLEAN_CRYPTO_BYTES - NONCELEN - 3; | |||
if (do_sign(sm + 2, sigbuf, &sigbuflen, pm, mlen, sk) < 0) { | |||
return -1; | |||
} | |||
pm[mlen] = 0x20 + 9; | |||
sigbuflen ++; | |||
sm[0] = (uint8_t)(sigbuflen >> 8); | |||
sm[1] = (uint8_t)sigbuflen; | |||
*smlen = mlen + 2 + NONCELEN + sigbuflen; | |||
return 0; | |||
} | |||
/* see api.h */ | |||
int | |||
PQCLEAN_FALCON512_CLEAN_crypto_sign_open( | |||
uint8_t *m, size_t *mlen, | |||
const uint8_t *sm, size_t smlen, const uint8_t *pk) { | |||
const uint8_t *sigbuf; | |||
size_t pmlen, sigbuflen; | |||
if (smlen < 3 + NONCELEN) { | |||
return -1; | |||
} | |||
sigbuflen = ((size_t)sm[0] << 8) | (size_t)sm[1]; | |||
if (sigbuflen < 2 || sigbuflen > (smlen - NONCELEN - 2)) { | |||
return -1; | |||
} | |||
sigbuflen --; | |||
pmlen = smlen - NONCELEN - 3 - sigbuflen; | |||
if (sm[2 + NONCELEN + pmlen] != 0x20 + 9) { | |||
return -1; | |||
} | |||
sigbuf = sm + 2 + NONCELEN + pmlen + 1; | |||
/* | |||
* The 2-byte length header and the one-byte signature header | |||
* have been verified. Nonce is at sm+2, followed by the message | |||
* itself. Message length is in pmlen. sigbuf/sigbuflen point to | |||
* the signature value (excluding the header byte). | |||
*/ | |||
if (do_verify(sm + 2, sigbuf, sigbuflen, | |||
sm + 2 + NONCELEN, pmlen, pk) < 0) { | |||
return -1; | |||
} | |||
/* | |||
* Signature is correct, we just have to copy/move the message | |||
* to its final destination. The memmove() properly handles | |||
* overlaps. | |||
*/ | |||
memmove(m, sm + 2 + NONCELEN, pmlen); | |||
*mlen = pmlen; | |||
return 0; | |||
} |
@@ -0,0 +1,187 @@ | |||
/* | |||
* PRNG and interface to the system RNG. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
#include <assert.h> | |||
#include "inner.h" | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_prng_init(prng *p, shake256_context *src) { | |||
/* | |||
* To ensure reproducibility for a given seed, we | |||
* must enforce little-endian interpretation of | |||
* the state words. | |||
*/ | |||
uint8_t tmp[56]; | |||
uint64_t th, tl; | |||
int i; | |||
shake256_extract(src, tmp, 56); | |||
for (i = 0; i < 14; i ++) { | |||
uint32_t w; | |||
w = (uint32_t)tmp[(i << 2) + 0] | |||
| ((uint32_t)tmp[(i << 2) + 1] << 8) | |||
| ((uint32_t)tmp[(i << 2) + 2] << 16) | |||
| ((uint32_t)tmp[(i << 2) + 3] << 24); | |||
*(uint32_t *)(p->state.d + (i << 2)) = w; | |||
} | |||
tl = *(uint32_t *)(p->state.d + 48); | |||
th = *(uint32_t *)(p->state.d + 52); | |||
*(uint64_t *)(p->state.d + 48) = tl + (th << 32); | |||
PQCLEAN_FALCON512_CLEAN_prng_refill(p); | |||
} | |||
/* | |||
* PRNG based on ChaCha20. | |||
* | |||
* State consists in key (32 bytes) then IV (16 bytes) and block counter | |||
* (8 bytes). Normally, we should not care about local endianness (this | |||
* is for a PRNG), but for the NIST competition we need reproducible KAT | |||
* vectors that work across architectures, so we enforce little-endian | |||
* interpretation where applicable. Moreover, output words are "spread | |||
* out" over the output buffer with the interleaving pattern that is | |||
* naturally obtained from the AVX2 implementation that runs eight | |||
* ChaCha20 instances in parallel. | |||
* | |||
* The block counter is XORed into the first 8 bytes of the IV. | |||
*/ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_prng_refill(prng *p) { | |||
static const uint32_t CW[] = { | |||
0x61707865, 0x3320646e, 0x79622d32, 0x6b206574 | |||
}; | |||
uint64_t cc; | |||
size_t u; | |||
/* | |||
* State uses local endianness. Only the output bytes must be | |||
* converted to little endian (if used on a big-endian machine). | |||
*/ | |||
cc = *(uint64_t *)(p->state.d + 48); | |||
for (u = 0; u < 8; u ++) { | |||
uint32_t state[16]; | |||
size_t v; | |||
int i; | |||
memcpy(&state[0], CW, sizeof CW); | |||
memcpy(&state[4], p->state.d, 48); | |||
state[14] ^= (uint32_t)cc; | |||
state[15] ^= (uint32_t)(cc >> 32); | |||
for (i = 0; i < 10; i ++) { | |||
#define QROUND(a, b, c, d) do { \ | |||
state[a] += state[b]; \ | |||
state[d] ^= state[a]; \ | |||
state[d] = (state[d] << 16) | (state[d] >> 16); \ | |||
state[c] += state[d]; \ | |||
state[b] ^= state[c]; \ | |||
state[b] = (state[b] << 12) | (state[b] >> 20); \ | |||
state[a] += state[b]; \ | |||
state[d] ^= state[a]; \ | |||
state[d] = (state[d] << 8) | (state[d] >> 24); \ | |||
state[c] += state[d]; \ | |||
state[b] ^= state[c]; \ | |||
state[b] = (state[b] << 7) | (state[b] >> 25); \ | |||
} while (0) | |||
QROUND( 0, 4, 8, 12); | |||
QROUND( 1, 5, 9, 13); | |||
QROUND( 2, 6, 10, 14); | |||
QROUND( 3, 7, 11, 15); | |||
QROUND( 0, 5, 10, 15); | |||
QROUND( 1, 6, 11, 12); | |||
QROUND( 2, 7, 8, 13); | |||
QROUND( 3, 4, 9, 14); | |||
#undef QROUND | |||
} | |||
for (v = 0; v < 4; v ++) { | |||
state[v] += CW[v]; | |||
} | |||
for (v = 4; v < 14; v ++) { | |||
state[v] += ((uint32_t *)p->state.d)[v - 4]; | |||
} | |||
state[14] += ((uint32_t *)p->state.d)[10] | |||
^ (uint32_t)cc; | |||
state[15] += ((uint32_t *)p->state.d)[11] | |||
^ (uint32_t)(cc >> 32); | |||
cc ++; | |||
/* | |||
* We mimic the interleaving that is used in the AVX2 | |||
* implementation. | |||
*/ | |||
for (v = 0; v < 16; v ++) { | |||
p->buf.d[(u << 2) + (v << 5) + 0] = | |||
(uint8_t)state[v]; | |||
p->buf.d[(u << 2) + (v << 5) + 1] = | |||
(uint8_t)(state[v] >> 8); | |||
p->buf.d[(u << 2) + (v << 5) + 2] = | |||
(uint8_t)(state[v] >> 16); | |||
p->buf.d[(u << 2) + (v << 5) + 3] = | |||
(uint8_t)(state[v] >> 24); | |||
} | |||
} | |||
*(uint64_t *)(p->state.d + 48) = cc; | |||
p->ptr = 0; | |||
} | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_prng_get_bytes(prng *p, void *dst, size_t len) { | |||
uint8_t *buf; | |||
buf = dst; | |||
while (len > 0) { | |||
size_t clen; | |||
clen = (sizeof p->buf.d) - p->ptr; | |||
if (clen > len) { | |||
clen = len; | |||
} | |||
memcpy(buf, p->buf.d, clen); | |||
buf += clen; | |||
len -= clen; | |||
p->ptr += clen; | |||
if (p->ptr == sizeof p->buf.d) { | |||
PQCLEAN_FALCON512_CLEAN_prng_refill(p); | |||
} | |||
} | |||
} |
@@ -0,0 +1,745 @@ | |||
/* | |||
* Falcon signature verification. | |||
* | |||
* ==========================(LICENSE BEGIN)============================ | |||
* | |||
* Copyright (c) 2017-2019 Falcon Project | |||
* | |||
* Permission is hereby granted, free of charge, to any person obtaining | |||
* a copy of this software and associated documentation files (the | |||
* "Software"), to deal in the Software without restriction, including | |||
* without limitation the rights to use, copy, modify, merge, publish, | |||
* distribute, sublicense, and/or sell copies of the Software, and to | |||
* permit persons to whom the Software is furnished to do so, subject to | |||
* the following conditions: | |||
* | |||
* The above copyright notice and this permission notice shall be | |||
* included in all copies or substantial portions of the Software. | |||
* | |||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. | |||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY | |||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, | |||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE | |||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |||
* | |||
* ===========================(LICENSE END)============================= | |||
* | |||
* @author Thomas Pornin <thomas.pornin@nccgroup.com> | |||
*/ | |||
#include "inner.h" | |||
/* ===================================================================== */ | |||
/* | |||
* Constants for NTT. | |||
* | |||
* n = 2^logn (2 <= n <= 1024) | |||
* phi = X^n + 1 | |||
* q = 12289 | |||
* q0i = -1/q mod 2^16 | |||
* R = 2^16 mod q | |||
* R2 = 2^32 mod q | |||
*/ | |||
#define Q 12289 | |||
#define Q0I 12287 | |||
#define R 4091 | |||
#define R2 10952 | |||
/* | |||
* Table for NTT, binary case: | |||
* GMb[x] = R*(g^rev(x)) mod q | |||
* where g = 7 (it is a 2048-th primitive root of 1 modulo q) | |||
* and rev() is the bit-reversal function over 10 bits. | |||
*/ | |||
static const uint16_t GMb[] = { | |||
4091, 7888, 11060, 11208, 6960, 4342, 6275, 9759, | |||
1591, 6399, 9477, 5266, 586, 5825, 7538, 9710, | |||
1134, 6407, 1711, 965, 7099, 7674, 3743, 6442, | |||
10414, 8100, 1885, 1688, 1364, 10329, 10164, 9180, | |||
12210, 6240, 997, 117, 4783, 4407, 1549, 7072, | |||
2829, 6458, 4431, 8877, 7144, 2564, 5664, 4042, | |||
12189, 432, 10751, 1237, 7610, 1534, 3983, 7863, | |||
2181, 6308, 8720, 6570, 4843, 1690, 14, 3872, | |||
5569, 9368, 12163, 2019, 7543, 2315, 4673, 7340, | |||
1553, 1156, 8401, 11389, 1020, 2967, 10772, 7045, | |||
3316, 11236, 5285, 11578, 10637, 10086, 9493, 6180, | |||
9277, 6130, 3323, 883, 10469, 489, 1502, 2851, | |||
11061, 9729, 2742, 12241, 4970, 10481, 10078, 1195, | |||
730, 1762, 3854, 2030, 5892, 10922, 9020, 5274, | |||
9179, 3604, 3782, 10206, 3180, 3467, 4668, 2446, | |||
7613, 9386, 834, 7703, 6836, 3403, 5351, 12276, | |||
3580, 1739, 10820, 9787, 10209, 4070, 12250, 8525, | |||
10401, 2749, 7338, 10574, 6040, 943, 9330, 1477, | |||
6865, 9668, 3585, 6633, 12145, 4063, 3684, 7680, | |||
8188, 6902, 3533, 9807, 6090, 727, 10099, 7003, | |||
6945, 1949, 9731, 10559, 6057, 378, 7871, 8763, | |||
8901, 9229, 8846, 4551, 9589, 11664, 7630, 8821, | |||
5680, 4956, 6251, 8388, 10156, 8723, 2341, 3159, | |||
1467, 5460, 8553, 7783, 2649, 2320, 9036, 6188, | |||
737, 3698, 4699, 5753, 9046, 3687, 16, 914, | |||
5186, 10531, 4552, 1964, 3509, 8436, 7516, 5381, | |||
10733, 3281, 7037, 1060, 2895, 7156, 8887, 5357, | |||
6409, 8197, 2962, 6375, 5064, 6634, 5625, 278, | |||
932, 10229, 8927, 7642, 351, 9298, 237, 5858, | |||
7692, 3146, 12126, 7586, 2053, 11285, 3802, 5204, | |||
4602, 1748, 11300, 340, 3711, 4614, 300, 10993, | |||
5070, 10049, 11616, 12247, 7421, 10707, 5746, 5654, | |||
3835, 5553, 1224, 8476, 9237, 3845, 250, 11209, | |||
4225, 6326, 9680, 12254, 4136, 2778, 692, 8808, | |||
6410, 6718, 10105, 10418, 3759, 7356, 11361, 8433, | |||
6437, 3652, 6342, 8978, 5391, 2272, 6476, 7416, | |||
8418, 10824, 11986, 5733, 876, 7030, 2167, 2436, | |||
3442, 9217, 8206, 4858, 5964, 2746, 7178, 1434, | |||
7389, 8879, 10661, 11457, 4220, 1432, 10832, 4328, | |||
8557, 1867, 9454, 2416, 3816, 9076, 686, 5393, | |||
2523, 4339, 6115, 619, 937, 2834, 7775, 3279, | |||
2363, 7488, 6112, 5056, 824, 10204, 11690, 1113, | |||
2727, 9848, 896, 2028, 5075, 2654, 10464, 7884, | |||
12169, 5434, 3070, 6400, 9132, 11672, 12153, 4520, | |||
1273, 9739, 11468, 9937, 10039, 9720, 2262, 9399, | |||
11192, 315, 4511, 1158, 6061, 6751, 11865, 357, | |||
7367, 4550, 983, 8534, 8352, 10126, 7530, 9253, | |||
4367, 5221, 3999, 8777, 3161, 6990, 4130, 11652, | |||
3374, 11477, 1753, 292, 8681, 2806, 10378, 12188, | |||
5800, 11811, 3181, 1988, 1024, 9340, 2477, 10928, | |||
4582, 6750, 3619, 5503, 5233, 2463, 8470, 7650, | |||
7964, 6395, 1071, 1272, 3474, 11045, 3291, 11344, | |||
8502, 9478, 9837, 1253, 1857, 6233, 4720, 11561, | |||
6034, 9817, 3339, 1797, 2879, 6242, 5200, 2114, | |||
7962, 9353, 11363, 5475, 6084, 9601, 4108, 7323, | |||
10438, 9471, 1271, 408, 6911, 3079, 360, 8276, | |||
11535, 9156, 9049, 11539, 850, 8617, 784, 7919, | |||
8334, 12170, 1846, 10213, 12184, 7827, 11903, 5600, | |||
9779, 1012, 721, 2784, 6676, 6552, 5348, 4424, | |||
6816, 8405, 9959, 5150, 2356, 5552, 5267, 1333, | |||
8801, 9661, 7308, 5788, 4910, 909, 11613, 4395, | |||
8238, 6686, 4302, 3044, 2285, 12249, 1963, 9216, | |||
4296, 11918, 695, 4371, 9793, 4884, 2411, 10230, | |||
2650, 841, 3890, 10231, 7248, 8505, 11196, 6688, | |||
4059, 6060, 3686, 4722, 11853, 5816, 7058, 6868, | |||
11137, 7926, 4894, 12284, 4102, 3908, 3610, 6525, | |||
7938, 7982, 11977, 6755, 537, 4562, 1623, 8227, | |||
11453, 7544, 906, 11816, 9548, 10858, 9703, 2815, | |||
11736, 6813, 6979, 819, 8903, 6271, 10843, 348, | |||
7514, 8339, 6439, 694, 852, 5659, 2781, 3716, | |||
11589, 3024, 1523, 8659, 4114, 10738, 3303, 5885, | |||
2978, 7289, 11884, 9123, 9323, 11830, 98, 2526, | |||
2116, 4131, 11407, 1844, 3645, 3916, 8133, 2224, | |||
10871, 8092, 9651, 5989, 7140, 8480, 1670, 159, | |||
10923, 4918, 128, 7312, 725, 9157, 5006, 6393, | |||
3494, 6043, 10972, 6181, 11838, 3423, 10514, 7668, | |||
3693, 6658, 6905, 11953, 10212, 11922, 9101, 8365, | |||
5110, 45, 2400, 1921, 4377, 2720, 1695, 51, | |||
2808, 650, 1896, 9997, 9971, 11980, 8098, 4833, | |||
4135, 4257, 5838, 4765, 10985, 11532, 590, 12198, | |||
482, 12173, 2006, 7064, 10018, 3912, 12016, 10519, | |||
11362, 6954, 2210, 284, 5413, 6601, 3865, 10339, | |||
11188, 6231, 517, 9564, 11281, 3863, 1210, 4604, | |||
8160, 11447, 153, 7204, 5763, 5089, 9248, 12154, | |||
11748, 1354, 6672, 179, 5532, 2646, 5941, 12185, | |||
862, 3158, 477, 7279, 5678, 7914, 4254, 302, | |||
2893, 10114, 6890, 9560, 9647, 11905, 4098, 9824, | |||
10269, 1353, 10715, 5325, 6254, 3951, 1807, 6449, | |||
5159, 1308, 8315, 3404, 1877, 1231, 112, 6398, | |||
11724, 12272, 7286, 1459, 12274, 9896, 3456, 800, | |||
1397, 10678, 103, 7420, 7976, 936, 764, 632, | |||
7996, 8223, 8445, 7758, 10870, 9571, 2508, 1946, | |||
6524, 10158, 1044, 4338, 2457, 3641, 1659, 4139, | |||
4688, 9733, 11148, 3946, 2082, 5261, 2036, 11850, | |||
7636, 12236, 5366, 2380, 1399, 7720, 2100, 3217, | |||
10912, 8898, 7578, 11995, 2791, 1215, 3355, 2711, | |||
2267, 2004, 8568, 10176, 3214, 2337, 1750, 4729, | |||
4997, 7415, 6315, 12044, 4374, 7157, 4844, 211, | |||
8003, 10159, 9290, 11481, 1735, 2336, 5793, 9875, | |||
8192, 986, 7527, 1401, 870, 3615, 8465, 2756, | |||
9770, 2034, 10168, 3264, 6132, 54, 2880, 4763, | |||
11805, 3074, 8286, 9428, 4881, 6933, 1090, 10038, | |||
2567, 708, 893, 6465, 4962, 10024, 2090, 5718, | |||
10743, 780, 4733, 4623, 2134, 2087, 4802, 884, | |||
5372, 5795, 5938, 4333, 6559, 7549, 5269, 10664, | |||
4252, 3260, 5917, 10814, 5768, 9983, 8096, 7791, | |||
6800, 7491, 6272, 1907, 10947, 6289, 11803, 6032, | |||
11449, 1171, 9201, 7933, 2479, 7970, 11337, 7062, | |||
8911, 6728, 6542, 8114, 8828, 6595, 3545, 4348, | |||
4610, 2205, 6999, 8106, 5560, 10390, 9321, 2499, | |||
2413, 7272, 6881, 10582, 9308, 9437, 3554, 3326, | |||
5991, 11969, 3415, 12283, 9838, 12063, 4332, 7830, | |||
11329, 6605, 12271, 2044, 11611, 7353, 11201, 11582, | |||
3733, 8943, 9978, 1627, 7168, 3935, 5050, 2762, | |||
7496, 10383, 755, 1654, 12053, 4952, 10134, 4394, | |||
6592, 7898, 7497, 8904, 12029, 3581, 10748, 5674, | |||
10358, 4901, 7414, 8771, 710, 6764, 8462, 7193, | |||
5371, 7274, 11084, 290, 7864, 6827, 11822, 2509, | |||
6578, 4026, 5807, 1458, 5721, 5762, 4178, 2105, | |||
11621, 4852, 8897, 2856, 11510, 9264, 2520, 8776, | |||
7011, 2647, 1898, 7039, 5950, 11163, 5488, 6277, | |||
9182, 11456, 633, 10046, 11554, 5633, 9587, 2333, | |||
7008, 7084, 5047, 7199, 9865, 8997, 569, 6390, | |||
10845, 9679, 8268, 11472, 4203, 1997, 2, 9331, | |||
162, 6182, 2000, 3649, 9792, 6363, 7557, 6187, | |||
8510, 9935, 5536, 9019, 3706, 12009, 1452, 3067, | |||
5494, 9692, 4865, 6019, 7106, 9610, 4588, 10165, | |||
6261, 5887, 2652, 10172, 1580, 10379, 4638, 9949 | |||
}; | |||
/* | |||
* Table for inverse NTT, binary case: | |||
* iGMb[x] = R*((1/g)^rev(x)) mod q | |||
* Since g = 7, 1/g = 8778 mod 12289. | |||
*/ | |||
static const uint16_t iGMb[] = { | |||
4091, 4401, 1081, 1229, 2530, 6014, 7947, 5329, | |||
2579, 4751, 6464, 11703, 7023, 2812, 5890, 10698, | |||
3109, 2125, 1960, 10925, 10601, 10404, 4189, 1875, | |||
5847, 8546, 4615, 5190, 11324, 10578, 5882, 11155, | |||
8417, 12275, 10599, 7446, 5719, 3569, 5981, 10108, | |||
4426, 8306, 10755, 4679, 11052, 1538, 11857, 100, | |||
8247, 6625, 9725, 5145, 3412, 7858, 5831, 9460, | |||
5217, 10740, 7882, 7506, 12172, 11292, 6049, 79, | |||
13, 6938, 8886, 5453, 4586, 11455, 2903, 4676, | |||
9843, 7621, 8822, 9109, 2083, 8507, 8685, 3110, | |||
7015, 3269, 1367, 6397, 10259, 8435, 10527, 11559, | |||
11094, 2211, 1808, 7319, 48, 9547, 2560, 1228, | |||
9438, 10787, 11800, 1820, 11406, 8966, 6159, 3012, | |||
6109, 2796, 2203, 1652, 711, 7004, 1053, 8973, | |||
5244, 1517, 9322, 11269, 900, 3888, 11133, 10736, | |||
4949, 7616, 9974, 4746, 10270, 126, 2921, 6720, | |||
6635, 6543, 1582, 4868, 42, 673, 2240, 7219, | |||
1296, 11989, 7675, 8578, 11949, 989, 10541, 7687, | |||
7085, 8487, 1004, 10236, 4703, 163, 9143, 4597, | |||
6431, 12052, 2991, 11938, 4647, 3362, 2060, 11357, | |||
12011, 6664, 5655, 7225, 5914, 9327, 4092, 5880, | |||
6932, 3402, 5133, 9394, 11229, 5252, 9008, 1556, | |||
6908, 4773, 3853, 8780, 10325, 7737, 1758, 7103, | |||
11375, 12273, 8602, 3243, 6536, 7590, 8591, 11552, | |||
6101, 3253, 9969, 9640, 4506, 3736, 6829, 10822, | |||
9130, 9948, 3566, 2133, 3901, 6038, 7333, 6609, | |||
3468, 4659, 625, 2700, 7738, 3443, 3060, 3388, | |||
3526, 4418, 11911, 6232, 1730, 2558, 10340, 5344, | |||
5286, 2190, 11562, 6199, 2482, 8756, 5387, 4101, | |||
4609, 8605, 8226, 144, 5656, 8704, 2621, 5424, | |||
10812, 2959, 11346, 6249, 1715, 4951, 9540, 1888, | |||
3764, 39, 8219, 2080, 2502, 1469, 10550, 8709, | |||
5601, 1093, 3784, 5041, 2058, 8399, 11448, 9639, | |||
2059, 9878, 7405, 2496, 7918, 11594, 371, 7993, | |||
3073, 10326, 40, 10004, 9245, 7987, 5603, 4051, | |||
7894, 676, 11380, 7379, 6501, 4981, 2628, 3488, | |||
10956, 7022, 6737, 9933, 7139, 2330, 3884, 5473, | |||
7865, 6941, 5737, 5613, 9505, 11568, 11277, 2510, | |||
6689, 386, 4462, 105, 2076, 10443, 119, 3955, | |||
4370, 11505, 3672, 11439, 750, 3240, 3133, 754, | |||
4013, 11929, 9210, 5378, 11881, 11018, 2818, 1851, | |||
4966, 8181, 2688, 6205, 6814, 926, 2936, 4327, | |||
10175, 7089, 6047, 9410, 10492, 8950, 2472, 6255, | |||
728, 7569, 6056, 10432, 11036, 2452, 2811, 3787, | |||
945, 8998, 1244, 8815, 11017, 11218, 5894, 4325, | |||
4639, 3819, 9826, 7056, 6786, 8670, 5539, 7707, | |||
1361, 9812, 2949, 11265, 10301, 9108, 478, 6489, | |||
101, 1911, 9483, 3608, 11997, 10536, 812, 8915, | |||
637, 8159, 5299, 9128, 3512, 8290, 7068, 7922, | |||
3036, 4759, 2163, 3937, 3755, 11306, 7739, 4922, | |||
11932, 424, 5538, 6228, 11131, 7778, 11974, 1097, | |||
2890, 10027, 2569, 2250, 2352, 821, 2550, 11016, | |||
7769, 136, 617, 3157, 5889, 9219, 6855, 120, | |||
4405, 1825, 9635, 7214, 10261, 11393, 2441, 9562, | |||
11176, 599, 2085, 11465, 7233, 6177, 4801, 9926, | |||
9010, 4514, 9455, 11352, 11670, 6174, 7950, 9766, | |||
6896, 11603, 3213, 8473, 9873, 2835, 10422, 3732, | |||
7961, 1457, 10857, 8069, 832, 1628, 3410, 4900, | |||
10855, 5111, 9543, 6325, 7431, 4083, 3072, 8847, | |||
9853, 10122, 5259, 11413, 6556, 303, 1465, 3871, | |||
4873, 5813, 10017, 6898, 3311, 5947, 8637, 5852, | |||
3856, 928, 4933, 8530, 1871, 2184, 5571, 5879, | |||
3481, 11597, 9511, 8153, 35, 2609, 5963, 8064, | |||
1080, 12039, 8444, 3052, 3813, 11065, 6736, 8454, | |||
2340, 7651, 1910, 10709, 2117, 9637, 6402, 6028, | |||
2124, 7701, 2679, 5183, 6270, 7424, 2597, 6795, | |||
9222, 10837, 280, 8583, 3270, 6753, 2354, 3779, | |||
6102, 4732, 5926, 2497, 8640, 10289, 6107, 12127, | |||
2958, 12287, 10292, 8086, 817, 4021, 2610, 1444, | |||
5899, 11720, 3292, 2424, 5090, 7242, 5205, 5281, | |||
9956, 2702, 6656, 735, 2243, 11656, 833, 3107, | |||
6012, 6801, 1126, 6339, 5250, 10391, 9642, 5278, | |||
3513, 9769, 3025, 779, 9433, 3392, 7437, 668, | |||
10184, 8111, 6527, 6568, 10831, 6482, 8263, 5711, | |||
9780, 467, 5462, 4425, 11999, 1205, 5015, 6918, | |||
5096, 3827, 5525, 11579, 3518, 4875, 7388, 1931, | |||
6615, 1541, 8708, 260, 3385, 4792, 4391, 5697, | |||
7895, 2155, 7337, 236, 10635, 11534, 1906, 4793, | |||
9527, 7239, 8354, 5121, 10662, 2311, 3346, 8556, | |||
707, 1088, 4936, 678, 10245, 18, 5684, 960, | |||
4459, 7957, 226, 2451, 6, 8874, 320, 6298, | |||
8963, 8735, 2852, 2981, 1707, 5408, 5017, 9876, | |||
9790, 2968, 1899, 6729, 4183, 5290, 10084, 7679, | |||
7941, 8744, 5694, 3461, 4175, 5747, 5561, 3378, | |||
5227, 952, 4319, 9810, 4356, 3088, 11118, 840, | |||
6257, 486, 6000, 1342, 10382, 6017, 4798, 5489, | |||
4498, 4193, 2306, 6521, 1475, 6372, 9029, 8037, | |||
1625, 7020, 4740, 5730, 7956, 6351, 6494, 6917, | |||
11405, 7487, 10202, 10155, 7666, 7556, 11509, 1546, | |||
6571, 10199, 2265, 7327, 5824, 11396, 11581, 9722, | |||
2251, 11199, 5356, 7408, 2861, 4003, 9215, 484, | |||
7526, 9409, 12235, 6157, 9025, 2121, 10255, 2519, | |||
9533, 3824, 8674, 11419, 10888, 4762, 11303, 4097, | |||
2414, 6496, 9953, 10554, 808, 2999, 2130, 4286, | |||
12078, 7445, 5132, 7915, 245, 5974, 4874, 7292, | |||
7560, 10539, 9952, 9075, 2113, 3721, 10285, 10022, | |||
9578, 8934, 11074, 9498, 294, 4711, 3391, 1377, | |||
9072, 10189, 4569, 10890, 9909, 6923, 53, 4653, | |||
439, 10253, 7028, 10207, 8343, 1141, 2556, 7601, | |||
8150, 10630, 8648, 9832, 7951, 11245, 2131, 5765, | |||
10343, 9781, 2718, 1419, 4531, 3844, 4066, 4293, | |||
11657, 11525, 11353, 4313, 4869, 12186, 1611, 10892, | |||
11489, 8833, 2393, 15, 10830, 5003, 17, 565, | |||
5891, 12177, 11058, 10412, 8885, 3974, 10981, 7130, | |||
5840, 10482, 8338, 6035, 6964, 1574, 10936, 2020, | |||
2465, 8191, 384, 2642, 2729, 5399, 2175, 9396, | |||
11987, 8035, 4375, 6611, 5010, 11812, 9131, 11427, | |||
104, 6348, 9643, 6757, 12110, 5617, 10935, 541, | |||
135, 3041, 7200, 6526, 5085, 12136, 842, 4129, | |||
7685, 11079, 8426, 1008, 2725, 11772, 6058, 1101, | |||
1950, 8424, 5688, 6876, 12005, 10079, 5335, 927, | |||
1770, 273, 8377, 2271, 5225, 10283, 116, 11807, | |||
91, 11699, 757, 1304, 7524, 6451, 8032, 8154, | |||
7456, 4191, 309, 2318, 2292, 10393, 11639, 9481, | |||
12238, 10594, 9569, 7912, 10368, 9889, 12244, 7179, | |||
3924, 3188, 367, 2077, 336, 5384, 5631, 8596, | |||
4621, 1775, 8866, 451, 6108, 1317, 6246, 8795, | |||
5896, 7283, 3132, 11564, 4977, 12161, 7371, 1366, | |||
12130, 10619, 3809, 5149, 6300, 2638, 4197, 1418, | |||
10065, 4156, 8373, 8644, 10445, 882, 8158, 10173, | |||
9763, 12191, 459, 2966, 3166, 405, 5000, 9311, | |||
6404, 8986, 1551, 8175, 3630, 10766, 9265, 700, | |||
8573, 9508, 6630, 11437, 11595, 5850, 3950, 4775, | |||
11941, 1446, 6018, 3386, 11470, 5310, 5476, 553, | |||
9474, 2586, 1431, 2741, 473, 11383, 4745, 836, | |||
4062, 10666, 7727, 11752, 5534, 312, 4307, 4351, | |||
5764, 8679, 8381, 8187, 5, 7395, 4363, 1152, | |||
5421, 5231, 6473, 436, 7567, 8603, 6229, 8230 | |||
}; | |||
/* | |||
* Reduce a small signed integer modulo q. The source integer MUST | |||
* be between -q/2 and +q/2. | |||
*/ | |||
static inline uint32_t | |||
mq_conv_small(int x) { | |||
/* | |||
* If x < 0, the cast to uint32_t will set the high bit to 1. | |||
*/ | |||
uint32_t y; | |||
y = (uint32_t)x; | |||
y += Q & -(y >> 31); | |||
return y; | |||
} | |||
/* | |||
* Addition modulo q. Operands must be in the 0..q-1 range. | |||
*/ | |||
static inline uint32_t | |||
mq_add(uint32_t x, uint32_t y) { | |||
/* | |||
* We compute x + y - q. If the result is negative, then the | |||
* high bit will be set, and 'd >> 31' will be equal to 1; | |||
* thus '-(d >> 31)' will be an all-one pattern. Otherwise, | |||
* it will be an all-zero pattern. In other words, this | |||
* implements a conditional addition of q. | |||
*/ | |||
uint32_t d; | |||
d = x + y - Q; | |||
d += Q & -(d >> 31); | |||
return d; | |||
} | |||
/* | |||
* Subtraction modulo q. Operands must be in the 0..q-1 range. | |||
*/ | |||
static inline uint32_t | |||
mq_sub(uint32_t x, uint32_t y) { | |||
/* | |||
* As in mq_add(), we use a conditional addition to ensure the | |||
* result is in the 0..q-1 range. | |||
*/ | |||
uint32_t d; | |||
d = x - y; | |||
d += Q & -(d >> 31); | |||
return d; | |||
} | |||
/* | |||
* Division by 2 modulo q. Operand must be in the 0..q-1 range. | |||
*/ | |||
static inline uint32_t | |||
mq_rshift1(uint32_t x) { | |||
x += Q & -(x & 1); | |||
return (x >> 1); | |||
} | |||
/* | |||
* Montgomery multiplication modulo q. If we set R = 2^16 mod q, then | |||
* this function computes: x * y / R mod q | |||
* Operands must be in the 0..q-1 range. | |||
*/ | |||
static inline uint32_t | |||
mq_montymul(uint32_t x, uint32_t y) { | |||
uint32_t z, w; | |||
/* | |||
* We compute x*y + k*q with a value of k chosen so that the 16 | |||
* low bits of the result are 0. We can then shift the value. | |||
* After the shift, result may still be larger than q, but it | |||
* will be lower than 2*q, so a conditional subtraction works. | |||
*/ | |||
z = x * y; | |||
w = ((z * Q0I) & 0xFFFF) * Q; | |||
/* | |||
* When adding z and w, the result will have its low 16 bits | |||
* equal to 0. Since x, y and z are lower than q, the sum will | |||
* be no more than (2^15 - 1) * q + (q - 1)^2, which will | |||
* fit on 29 bits. | |||
*/ | |||
z = (z + w) >> 16; | |||
/* | |||
* After the shift, analysis shows that the value will be less | |||
* than 2q. We do a subtraction then conditional subtraction to | |||
* ensure the result is in the expected range. | |||
*/ | |||
z -= Q; | |||
z += Q & -(z >> 31); | |||
return z; | |||
} | |||
/* | |||
* Montgomery squaring (computes (x^2)/R). | |||
*/ | |||
static inline uint32_t | |||
mq_montysqr(uint32_t x) { | |||
return mq_montymul(x, x); | |||
} | |||
/* | |||
* Divide x by y modulo q = 12289. | |||
*/ | |||
static inline uint32_t | |||
mq_div_12289(uint32_t x, uint32_t y) { | |||
/* | |||
* We invert y by computing y^(q-2) mod q. | |||
* | |||
* We use the following addition chain for exponent e = 12287: | |||
* | |||
* e0 = 1 | |||
* e1 = 2 * e0 = 2 | |||
* e2 = e1 + e0 = 3 | |||
* e3 = e2 + e1 = 5 | |||
* e4 = 2 * e3 = 10 | |||
* e5 = 2 * e4 = 20 | |||
* e6 = 2 * e5 = 40 | |||
* e7 = 2 * e6 = 80 | |||
* e8 = 2 * e7 = 160 | |||
* e9 = e8 + e2 = 163 | |||
* e10 = e9 + e8 = 323 | |||
* e11 = 2 * e10 = 646 | |||
* e12 = 2 * e11 = 1292 | |||
* e13 = e12 + e9 = 1455 | |||
* e14 = 2 * e13 = 2910 | |||
* e15 = 2 * e14 = 5820 | |||
* e16 = e15 + e10 = 6143 | |||
* e17 = 2 * e16 = 12286 | |||
* e18 = e17 + e0 = 12287 | |||
* | |||
* Additions on exponents are converted to Montgomery | |||
* multiplications. We define all intermediate results as so | |||
* many local variables, and let the C compiler work out which | |||
* must be kept around. | |||
*/ | |||
uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9; | |||
uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18; | |||
y0 = mq_montymul(y, R2); | |||
y1 = mq_montysqr(y0); | |||
y2 = mq_montymul(y1, y0); | |||
y3 = mq_montymul(y2, y1); | |||
y4 = mq_montysqr(y3); | |||
y5 = mq_montysqr(y4); | |||
y6 = mq_montysqr(y5); | |||
y7 = mq_montysqr(y6); | |||
y8 = mq_montysqr(y7); | |||
y9 = mq_montymul(y8, y2); | |||
y10 = mq_montymul(y9, y8); | |||
y11 = mq_montysqr(y10); | |||
y12 = mq_montysqr(y11); | |||
y13 = mq_montymul(y12, y9); | |||
y14 = mq_montysqr(y13); | |||
y15 = mq_montysqr(y14); | |||
y16 = mq_montymul(y15, y10); | |||
y17 = mq_montysqr(y16); | |||
y18 = mq_montymul(y17, y0); | |||
/* | |||
* Final multiplication with x, which is not in Montgomery | |||
* representation, computes the correct division result. | |||
*/ | |||
return mq_montymul(y18, x); | |||
} | |||
/* | |||
* Compute NTT on a ring element. | |||
*/ | |||
static void | |||
mq_NTT(uint16_t *a, unsigned logn) { | |||
size_t n, t, m; | |||
n = (size_t)1 << logn; | |||
t = n; | |||
for (m = 1; m < n; m <<= 1) { | |||
size_t ht, i, j1; | |||
ht = t >> 1; | |||
for (i = 0, j1 = 0; i < m; i ++, j1 += t) { | |||
size_t j, j2; | |||
uint32_t s; | |||
s = GMb[m + i]; | |||
j2 = j1 + ht; | |||
for (j = j1; j < j2; j ++) { | |||
uint32_t u, v; | |||
u = a[j]; | |||
v = mq_montymul(a[j + ht], s); | |||
a[j] = (uint16_t)mq_add(u, v); | |||
a[j + ht] = (uint16_t)mq_sub(u, v); | |||
} | |||
} | |||
t = ht; | |||
} | |||
} | |||
/* | |||
* Compute the inverse NTT on a ring element, binary case. | |||
*/ | |||
static void | |||
mq_iNTT(uint16_t *a, unsigned logn) { | |||
size_t n, t, m; | |||
uint32_t ni; | |||
n = (size_t)1 << logn; | |||
t = 1; | |||
m = n; | |||
while (m > 1) { | |||
size_t hm, dt, i, j1; | |||
hm = m >> 1; | |||
dt = t << 1; | |||
for (i = 0, j1 = 0; i < hm; i ++, j1 += dt) { | |||
size_t j, j2; | |||
uint32_t s; | |||
j2 = j1 + t; | |||
s = iGMb[hm + i]; | |||
for (j = j1; j < j2; j ++) { | |||
uint32_t u, v, w; | |||
u = a[j]; | |||
v = a[j + t]; | |||
a[j] = (uint16_t)mq_add(u, v); | |||
w = mq_sub(u, v); | |||
a[j + t] = (uint16_t) | |||
mq_montymul(w, s); | |||
} | |||
} | |||
t = dt; | |||
m = hm; | |||
} | |||
/* | |||
* To complete the inverse NTT, we must now divide all values by | |||
* n (the vector size). We thus need the inverse of n, i.e. we | |||
* need to divide 1 by 2 logn times. But we also want it in | |||
* Montgomery representation, i.e. we also want to multiply it | |||
* by R = 2^16. In the common case, this should be a simple right | |||
* shift. The loop below is generic and works also in corner cases; | |||
* its computation time is negligible. | |||
*/ | |||
ni = R; | |||
for (m = n; m > 1; m >>= 1) { | |||
ni = mq_rshift1(ni); | |||
} | |||
for (m = 0; m < n; m ++) { | |||
a[m] = (uint16_t)mq_montymul(a[m], ni); | |||
} | |||
} | |||
/* | |||
* Convert a polynomial (mod q) to Montgomery representation. | |||
*/ | |||
static void | |||
mq_poly_tomonty(uint16_t *f, unsigned logn) { | |||
size_t u, n; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
f[u] = (uint16_t)mq_montymul(f[u], R2); | |||
} | |||
} | |||
/* | |||
* Multiply two polynomials together (NTT representation, and using | |||
* a Montgomery multiplication). Result f*g is written over f. | |||
*/ | |||
static void | |||
mq_poly_montymul_ntt(uint16_t *f, const uint16_t *g, unsigned logn) { | |||
size_t u, n; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
f[u] = (uint16_t)mq_montymul(f[u], g[u]); | |||
} | |||
} | |||
/* | |||
* Subtract polynomial g from polynomial f. | |||
*/ | |||
static void | |||
mq_poly_sub(uint16_t *f, const uint16_t *g, unsigned logn) { | |||
size_t u, n; | |||
n = (size_t)1 << logn; | |||
for (u = 0; u < n; u ++) { | |||
f[u] = (uint16_t)mq_sub(f[u], g[u]); | |||
} | |||
} | |||
/* ===================================================================== */ | |||
/* see inner.h */ | |||
void | |||
PQCLEAN_FALCON512_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn) { | |||
mq_NTT(h, logn); | |||
mq_poly_tomonty(h, logn); | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON512_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2, | |||
const uint16_t *h, unsigned logn, uint8_t *tmp) { | |||
size_t u, n; | |||
uint16_t *tt; | |||
n = (size_t)1 << logn; | |||
tt = (uint16_t *)tmp; | |||
/* | |||
* Reduce s2 elements modulo q ([0..q-1] range). | |||
*/ | |||
for (u = 0; u < n; u ++) { | |||
uint32_t w; | |||
w = (uint32_t)s2[u]; | |||
w += Q & -(w >> 31); | |||
tt[u] = (uint16_t)w; | |||
} | |||
/* | |||
* Compute s1 = s2*h - c0 mod phi mod q (in tt[]). | |||
*/ | |||
mq_NTT(tt, logn); | |||
mq_poly_montymul_ntt(tt, h, logn); | |||
mq_iNTT(tt, logn); | |||
mq_poly_sub(tt, c0, logn); | |||
/* | |||
* Normalize s1 elements into the [-q/2..q/2] range. | |||
*/ | |||
for (u = 0; u < n; u ++) { | |||
int32_t w; | |||
w = (int32_t)tt[u]; | |||
w -= (int32_t)(Q & -(((Q >> 1) - (uint32_t)w) >> 31)); | |||
((int16_t *)tt)[u] = (int16_t)w; | |||
} | |||
/* | |||
* Signature is valid if and only if the aggregate (s1,s2) vector | |||
* is short enough. | |||
*/ | |||
return PQCLEAN_FALCON512_CLEAN_is_short((int16_t *)tt, s2, logn); | |||
} | |||
/* see inner.h */ | |||
int | |||
PQCLEAN_FALCON512_CLEAN_compute_public(uint16_t *h, | |||
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp) { | |||
size_t u, n; | |||
uint16_t *tt; | |||
n = (size_t)1 << logn; | |||
tt = (uint16_t *)tmp; | |||
for (u = 0; u < n; u ++) { | |||
tt[u] = (uint16_t)mq_conv_small(f[u]); | |||
h[u] = (uint16_t)mq_conv_small(g[u]); | |||
} | |||
mq_NTT(h, logn); | |||
mq_NTT(tt, logn); | |||
for (u = 0; u < n; u ++) { | |||
if (tt[u] == 0) { | |||
return 0; | |||
} | |||
h[u] = (uint16_t)mq_div_12289(h[u], tt[u]); | |||
} | |||
mq_iNTT(h, logn); | |||
return 1; | |||
} | |||
/* see internal.h */ | |||
int | |||
PQCLEAN_FALCON512_CLEAN_complete_private(int8_t *G, | |||
const int8_t *f, const int8_t *g, const int8_t *F, | |||
unsigned logn, uint8_t *tmp) { | |||
size_t u, n; | |||
uint16_t *t1, *t2; | |||
n = (size_t)1 << logn; | |||
t1 = (uint16_t *)tmp; | |||
t2 = t1 + n; | |||
for (u = 0; u < n; u ++) { | |||
t1[u] = (uint16_t)mq_conv_small(g[u]); | |||
t2[u] = (uint16_t)mq_conv_small(F[u]); | |||
} | |||
mq_NTT(t1, logn); | |||
mq_NTT(t2, logn); | |||
mq_poly_tomonty(t1, logn); | |||
mq_poly_montymul_ntt(t1, t2, logn); | |||
for (u = 0; u < n; u ++) { | |||
t2[u] = (uint16_t)mq_conv_small(f[u]); | |||
} | |||
mq_NTT(t2, logn); | |||
for (u = 0; u < n; u ++) { | |||
if (t2[u] == 0) { | |||
return 0; | |||
} | |||
t1[u] = (uint16_t)mq_div_12289(t1[u], t2[u]); | |||
} | |||
mq_iNTT(t1, logn); | |||
for (u = 0; u < n; u ++) { | |||
uint32_t w; | |||
int32_t gi; | |||
w = t1[u]; | |||
w -= (Q & ~ -((w - (Q >> 1)) >> 31)); | |||
gi = *(int32_t *)&w; | |||
if (gi < -127 || gi > +127) { | |||
return 0; | |||
} | |||
G[u] = (int8_t)gi; | |||
} | |||
return 1; | |||
} |