mirror of
https://github.com/henrydcase/pqc.git
synced 2024-11-22 07:35:38 +00:00
Fixed sampler bug (update to new upstream Falcon code 2019-09-18).
This commit is contained in:
parent
4be5e497dc
commit
44a050106a
@ -4,8 +4,8 @@ claimed-nist-level: 5
|
||||
length-public-key: 1793
|
||||
length-secret-key: 2305
|
||||
length-signature: 1330
|
||||
nistkat-sha256: ad3d17869fdc05deae13ffa2ef26bde125b42f61b2dcd861a1ae20adcb2accc5
|
||||
testvectors-sha256: bd8076c13722d8c555c68fc6bd7763e1a9dd5483ee7c8d1c74dd2df459c72a40
|
||||
nistkat-sha256: ef2104e326221515621638ca03cd99802271bdd9907e2ae5fc7b8d19d696c584
|
||||
testvectors-sha256: 14ee0e3f0ea4b9b25193a54eed9100b1bb1cf5dbc7813fd9dc9180c1ea1a1042
|
||||
principal-submitters:
|
||||
- Thomas Prest
|
||||
auxiliary-submitters:
|
||||
|
@ -33,10 +33,43 @@
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
PQCLEAN_FALCON1024_CLEAN_hash_to_point(
|
||||
shake256_context *sc,
|
||||
uint16_t *x, unsigned logn, uint8_t *tmp) {
|
||||
PQCLEAN_FALCON1024_CLEAN_hash_to_point_vartime(
|
||||
inner_shake256_context *sc,
|
||||
uint16_t *x, unsigned logn) {
|
||||
/*
|
||||
* This is the straightforward per-the-spec implementation. It
|
||||
* is not constant-time, thus it might reveal information on the
|
||||
* plaintext (at least, enough to check the plaintext against a
|
||||
* list of potential plaintexts) in a scenario where the
|
||||
* attacker does not have access to the signature value or to
|
||||
* the public key, but knows the nonce (without knowledge of the
|
||||
* nonce, the hashed output cannot be matched against potential
|
||||
* plaintexts).
|
||||
*/
|
||||
size_t n;
|
||||
|
||||
n = (size_t)1 << logn;
|
||||
while (n > 0) {
|
||||
uint8_t buf[2];
|
||||
uint32_t w;
|
||||
|
||||
inner_shake256_extract(sc, (void *)buf, sizeof buf);
|
||||
w = ((unsigned)buf[0] << 8) | (unsigned)buf[1];
|
||||
if (w < 61445) {
|
||||
while (w >= 12289) {
|
||||
w -= 12289;
|
||||
}
|
||||
*x ++ = (uint16_t)w;
|
||||
n --;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
PQCLEAN_FALCON1024_CLEAN_hash_to_point_ct(
|
||||
inner_shake256_context *sc,
|
||||
uint16_t *x, unsigned logn, uint8_t *tmp) {
|
||||
/*
|
||||
* Each 16-bit sample is a value in 0..65535. The value is
|
||||
* kept if it falls in 0..61444 (because 61445 = 5*12289)
|
||||
@ -97,7 +130,7 @@ PQCLEAN_FALCON1024_CLEAN_hash_to_point(
|
||||
uint8_t buf[2];
|
||||
uint32_t w, wr;
|
||||
|
||||
shake256_extract(sc, buf, sizeof buf);
|
||||
inner_shake256_extract(sc, buf, sizeof buf);
|
||||
w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1];
|
||||
wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1));
|
||||
wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1));
|
||||
@ -196,7 +229,6 @@ PQCLEAN_FALCON1024_CLEAN_hash_to_point(
|
||||
*d = (uint16_t)(dv ^ (mk & (sv ^ dv)));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
|
@ -507,7 +507,7 @@ fpr_sqrt(fpr x) {
|
||||
|
||||
|
||||
uint64_t
|
||||
fpr_expm_p63(fpr x) {
|
||||
fpr_expm_p63(fpr x, fpr ccs) {
|
||||
/*
|
||||
* Polynomial approximation of exp(-x) is taken from FACCT:
|
||||
* https://eprint.iacr.org/2018/1234
|
||||
@ -539,6 +539,8 @@ fpr_expm_p63(fpr x) {
|
||||
|
||||
uint64_t z, y;
|
||||
unsigned u;
|
||||
uint32_t z0, z1, y0, y1;
|
||||
uint64_t a, b;
|
||||
|
||||
y = C[0];
|
||||
z = (uint64_t)fpr_trunc(fpr_mul(x, fpr_ptwo63)) << 1;
|
||||
@ -554,8 +556,7 @@ fpr_expm_p63(fpr x) {
|
||||
* also have appropriate IEEE754 floating-point support,
|
||||
* which is better.
|
||||
*/
|
||||
uint32_t z0, z1, y0, y1;
|
||||
uint64_t a, b, c;
|
||||
uint64_t c;
|
||||
|
||||
z0 = (uint32_t)z;
|
||||
z1 = (uint32_t)(z >> 32);
|
||||
@ -569,6 +570,24 @@ fpr_expm_p63(fpr x) {
|
||||
c += (uint64_t)z1 * (uint64_t)y1;
|
||||
y = C[u] - c;
|
||||
}
|
||||
|
||||
/*
|
||||
* The scaling factor must be applied at the end. Since y is now
|
||||
* in fixed-point notation, we have to convert the factor to the
|
||||
* same format, and do an extra integer multiplication.
|
||||
*/
|
||||
z = (uint64_t)fpr_trunc(fpr_mul(ccs, fpr_ptwo63)) << 1;
|
||||
z0 = (uint32_t)z;
|
||||
z1 = (uint32_t)(z >> 32);
|
||||
y0 = (uint32_t)y;
|
||||
y1 = (uint32_t)(y >> 32);
|
||||
a = ((uint64_t)z0 * (uint64_t)y1)
|
||||
+ (((uint64_t)z0 * (uint64_t)y0) >> 32);
|
||||
b = ((uint64_t)z1 * (uint64_t)y0);
|
||||
y = (a >> 32) + (b >> 32);
|
||||
y += (((uint64_t)(uint32_t)a + (uint64_t)(uint32_t)b) >> 32);
|
||||
y += (uint64_t)z1 * (uint64_t)y1;
|
||||
|
||||
return y;
|
||||
}
|
||||
|
||||
|
@ -232,6 +232,8 @@ static const fpr fpr_zero = 0;
|
||||
static const fpr fpr_one = 4607182418800017408;
|
||||
static const fpr fpr_two = 4611686018427387904;
|
||||
static const fpr fpr_onehalf = 4602678819172646912;
|
||||
static const fpr fpr_invsqrt2 = 4604544271217802189;
|
||||
static const fpr fpr_invsqrt8 = 4600040671590431693;
|
||||
static const fpr fpr_ptwo31 = 4746794007248502784;
|
||||
static const fpr fpr_ptwo31m1 = 4746794007244308480;
|
||||
static const fpr fpr_mtwo31m1 = 13970166044099084288U;
|
||||
@ -444,7 +446,7 @@ fpr_lt(fpr x, fpr y) {
|
||||
* bits or so.
|
||||
*/
|
||||
#define fpr_expm_p63 PQCLEAN_FALCON1024_CLEAN_fpr_expm_p63
|
||||
uint64_t fpr_expm_p63(fpr x);
|
||||
uint64_t fpr_expm_p63(fpr x, fpr ccs);
|
||||
|
||||
#define fpr_gm_tab PQCLEAN_FALCON1024_CLEAN_fpr_gm_tab
|
||||
extern const fpr fpr_gm_tab[];
|
||||
|
@ -34,6 +34,45 @@
|
||||
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
|
||||
*/
|
||||
|
||||
/*
|
||||
* IMPORTANT API RULES
|
||||
* -------------------
|
||||
*
|
||||
* This API has some non-trivial usage rules:
|
||||
*
|
||||
*
|
||||
* - All public functions (i.e. the non-static ones) must be referenced
|
||||
* with the PQCLEAN_FALCON1024_CLEAN_ macro (e.g. PQCLEAN_FALCON1024_CLEAN_verify_raw for the verify_raw()
|
||||
* function). That macro adds a prefix to the name, which is
|
||||
* configurable with the FALCON_PREFIX macro. This allows compiling
|
||||
* the code into a specific "namespace" and potentially including
|
||||
* several versions of this code into a single application (e.g. to
|
||||
* have an AVX2 and a non-AVX2 variants and select the one to use at
|
||||
* runtime based on availability of AVX2 opcodes).
|
||||
*
|
||||
* - Functions that need temporary buffers expects them as a final
|
||||
* tmp[] array of type uint8_t*, with a size which is documented for
|
||||
* each function. However, most have some alignment requirements,
|
||||
* because they will use the array to store 16-bit, 32-bit or 64-bit
|
||||
* values (e.g. uint64_t or double). The caller must ensure proper
|
||||
* alignment. What happens on unaligned access depends on the
|
||||
* underlying architecture, ranging from a slight time penalty
|
||||
* to immediate termination of the process.
|
||||
*
|
||||
* - Some functions rely on specific rounding rules and precision for
|
||||
* floating-point numbers. On some systems (in particular 32-bit x86
|
||||
* with the 387 FPU), this requires setting an hardware control
|
||||
* word. The caller MUST use set_fpu_cw() to ensure proper precision:
|
||||
*
|
||||
* oldcw = set_fpu_cw(2);
|
||||
* PQCLEAN_FALCON1024_CLEAN_sign_dyn(...);
|
||||
* set_fpu_cw(oldcw);
|
||||
*
|
||||
* On systems where the native floating-point precision is already
|
||||
* proper, or integer-based emulation is used, the set_fpu_cw()
|
||||
* function does nothing, so it can be called systematically.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
@ -42,22 +81,47 @@
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Some computations with floating-point elements, in particular
|
||||
* rounding to the nearest integer, rely on operations using _exactly_
|
||||
* the precision of IEEE-754 binary64 type (i.e. 52 bits). On 32-bit
|
||||
* x86, the 387 FPU may be used (depending on the target OS) and, in
|
||||
* that case, may use more precision bits (i.e. 64 bits, for an 80-bit
|
||||
* total type length); to prevent miscomputations, we define an explicit
|
||||
* function that modifies the precision in the FPU control word.
|
||||
*
|
||||
* set_fpu_cw() sets the precision to the provided value, and returns
|
||||
* the previously set precision; callers are supposed to restore the
|
||||
* previous precision on exit. The correct (52-bit) precision is
|
||||
* configured with the value "2". On unsupported compilers, or on
|
||||
* targets other than 32-bit x86, or when the native 'double' type is
|
||||
* not used, the set_fpu_cw() function does nothing at all.
|
||||
*/
|
||||
static inline unsigned
|
||||
set_fpu_cw(unsigned x) {
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/* ==================================================================== */
|
||||
/*
|
||||
* SHAKE256 implementation (shake.c).
|
||||
*
|
||||
* API is defined to be easily replaced with the fips202.h API defined
|
||||
* as part of PQ Clean.
|
||||
* as part of PQClean.
|
||||
*/
|
||||
|
||||
|
||||
#include "fips202.h"
|
||||
|
||||
#define shake256_context shake256incctx
|
||||
#define shake256_init(sc) shake256_inc_init(sc)
|
||||
#define shake256_inject(sc, in, len) shake256_inc_absorb(sc, in, len)
|
||||
#define shake256_flip(sc) shake256_inc_finalize(sc)
|
||||
#define shake256_extract(sc, out, len) shake256_inc_squeeze(out, len, sc)
|
||||
#define inner_shake256_context shake256incctx
|
||||
#define inner_shake256_init(sc) shake256_inc_init(sc)
|
||||
#define inner_shake256_inject(sc, in, len) shake256_inc_absorb(sc, in, len)
|
||||
#define inner_shake256_flip(sc) shake256_inc_finalize(sc)
|
||||
#define inner_shake256_extract(sc, out, len) shake256_inc_squeeze(out, len, sc)
|
||||
|
||||
|
||||
/* ==================================================================== */
|
||||
@ -140,9 +204,22 @@ extern const uint8_t PQCLEAN_FALCON1024_CLEAN_max_sig_bits[];
|
||||
|
||||
/*
|
||||
* From a SHAKE256 context (must be already flipped), produce a new
|
||||
* point. The temporary buffer (tmp) must have room for 2*2^logn bytes.
|
||||
* point. This is the non-constant-time version, which may leak enough
|
||||
* information to serve as a stop condition on a brute force attack on
|
||||
* the hashed message (provided that the nonce value is known).
|
||||
*/
|
||||
void PQCLEAN_FALCON1024_CLEAN_hash_to_point(shake256_context *sc,
|
||||
void PQCLEAN_FALCON1024_CLEAN_hash_to_point_vartime(inner_shake256_context *sc,
|
||||
uint16_t *x, unsigned logn);
|
||||
|
||||
/*
|
||||
* From a SHAKE256 context (must be already flipped), produce a new
|
||||
* point. The temporary buffer (tmp) must have room for 2*2^logn bytes.
|
||||
* This function is constant-time but is typically more expensive than
|
||||
* PQCLEAN_FALCON1024_CLEAN_hash_to_point_vartime().
|
||||
*
|
||||
* tmp[] must have 16-bit alignment.
|
||||
*/
|
||||
void PQCLEAN_FALCON1024_CLEAN_hash_to_point_ct(inner_shake256_context *sc,
|
||||
uint16_t *x, unsigned logn, uint8_t *tmp);
|
||||
|
||||
/*
|
||||
@ -184,6 +261,8 @@ void PQCLEAN_FALCON1024_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn);
|
||||
* logn is the degree log
|
||||
* tmp[] temporary, must have at least 2*2^logn bytes
|
||||
* Returned value is 1 on success, 0 on error.
|
||||
*
|
||||
* tmp[] must have 16-bit alignment.
|
||||
*/
|
||||
int PQCLEAN_FALCON1024_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2,
|
||||
const uint16_t *h, unsigned logn, uint8_t *tmp);
|
||||
@ -195,6 +274,7 @@ int PQCLEAN_FALCON1024_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2,
|
||||
* reported if f is not invertible mod phi mod q).
|
||||
*
|
||||
* The tmp[] array must have room for at least 2*2^logn elements.
|
||||
* tmp[] must have 16-bit alignment.
|
||||
*/
|
||||
int PQCLEAN_FALCON1024_CLEAN_compute_public(uint16_t *h,
|
||||
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp);
|
||||
@ -208,11 +288,53 @@ int PQCLEAN_FALCON1024_CLEAN_compute_public(uint16_t *h,
|
||||
* The tmp[] array must have room for at least 4*2^logn bytes.
|
||||
*
|
||||
* Returned value is 1 in success, 0 on error (f not invertible).
|
||||
* tmp[] must have 16-bit alignment.
|
||||
*/
|
||||
int PQCLEAN_FALCON1024_CLEAN_complete_private(int8_t *G,
|
||||
const int8_t *f, const int8_t *g, const int8_t *F,
|
||||
unsigned logn, uint8_t *tmp);
|
||||
|
||||
/*
|
||||
* Test whether a given polynomial is invertible modulo phi and q.
|
||||
* Polynomial coefficients are small integers.
|
||||
*
|
||||
* tmp[] must have 16-bit alignment.
|
||||
*/
|
||||
int PQCLEAN_FALCON1024_CLEAN_is_invertible(
|
||||
const int16_t *s2, unsigned logn, uint8_t *tmp);
|
||||
|
||||
/*
|
||||
* Count the number of elements of value zero in the NTT representation
|
||||
* of the given polynomial: this is the number of primitive 2n-th roots
|
||||
* of unity (modulo q = 12289) that are roots of the provided polynomial
|
||||
* (taken modulo q).
|
||||
*
|
||||
* tmp[] must have 16-bit alignment.
|
||||
*/
|
||||
int PQCLEAN_FALCON1024_CLEAN_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp);
|
||||
|
||||
/*
|
||||
* Internal signature verification with public key recovery:
|
||||
* h[] receives the public key (NOT in NTT/Montgomery format)
|
||||
* c0[] contains the hashed nonce+message
|
||||
* s1[] is the first signature half
|
||||
* s2[] is the second signature half
|
||||
* logn is the degree log
|
||||
* tmp[] temporary, must have at least 2*2^logn bytes
|
||||
* Returned value is 1 on success, 0 on error. Success is returned if
|
||||
* the signature is a short enough vector; in that case, the public
|
||||
* key has been written to h[]. However, the caller must still
|
||||
* verify that h[] is the correct value (e.g. with regards to a known
|
||||
* hash of the public key).
|
||||
*
|
||||
* h[] may not overlap with any of the other arrays.
|
||||
*
|
||||
* tmp[] must have 16-bit alignment.
|
||||
*/
|
||||
int PQCLEAN_FALCON1024_CLEAN_verify_recover(uint16_t *h,
|
||||
const uint16_t *c0, const int16_t *s1, const int16_t *s2,
|
||||
unsigned logn, uint8_t *tmp);
|
||||
|
||||
/* ==================================================================== */
|
||||
/*
|
||||
* Implementation of floating-point real numbers (fpr.h, fpr.c).
|
||||
@ -358,7 +480,7 @@ typedef struct {
|
||||
* Instantiate a PRNG. That PRNG will feed over the provided SHAKE256
|
||||
* context (in "flipped" state) to obtain its initial state.
|
||||
*/
|
||||
void PQCLEAN_FALCON1024_CLEAN_prng_init(prng *p, shake256_context *src);
|
||||
void PQCLEAN_FALCON1024_CLEAN_prng_init(prng *p, inner_shake256_context *src);
|
||||
|
||||
/*
|
||||
* Refill the PRNG buffer. This is normally invoked automatically, and
|
||||
@ -586,6 +708,9 @@ void PQCLEAN_FALCON1024_CLEAN_poly_merge_fft(fpr *f,
|
||||
|
||||
/*
|
||||
* Required sizes of the temporary buffer (in bytes).
|
||||
*
|
||||
* This size is 28*2^logn bytes, except for degrees 2 and 4 (logn = 1
|
||||
* or 2) where it is slightly greater.
|
||||
*/
|
||||
#define FALCON_KEYGEN_TEMP_1 136
|
||||
#define FALCON_KEYGEN_TEMP_2 272
|
||||
@ -608,8 +733,11 @@ void PQCLEAN_FALCON1024_CLEAN_poly_merge_fft(fpr *f,
|
||||
* public key is written in h. Either or both of G and h may be NULL,
|
||||
* in which case the corresponding element is not returned (they can
|
||||
* be recomputed from f, g and F).
|
||||
*
|
||||
* tmp[] must have 64-bit alignment.
|
||||
* This function uses floating-point rounding (see set_fpu_cw()).
|
||||
*/
|
||||
void PQCLEAN_FALCON1024_CLEAN_keygen(shake256_context *rng,
|
||||
void PQCLEAN_FALCON1024_CLEAN_keygen(inner_shake256_context *rng,
|
||||
int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h,
|
||||
unsigned logn, uint8_t *tmp);
|
||||
|
||||
@ -624,6 +752,9 @@ void PQCLEAN_FALCON1024_CLEAN_keygen(shake256_context *rng,
|
||||
* a total of (8*logn+40)*2^logn bytes.
|
||||
*
|
||||
* The tmp[] array must have room for at least 48*2^logn bytes.
|
||||
*
|
||||
* tmp[] must have 64-bit alignment.
|
||||
* This function uses floating-point rounding (see set_fpu_cw()).
|
||||
*/
|
||||
void PQCLEAN_FALCON1024_CLEAN_expand_privkey(fpr *expanded_key,
|
||||
const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G,
|
||||
@ -636,9 +767,15 @@ void PQCLEAN_FALCON1024_CLEAN_expand_privkey(fpr *expanded_key,
|
||||
*
|
||||
* The sig[] and hm[] buffers may overlap.
|
||||
*
|
||||
* On successful output, the start of the tmp[] buffer contains the s1
|
||||
* vector (as int16_t elements).
|
||||
*
|
||||
* The minimal size (in bytes) of tmp[] is 48*2^logn bytes.
|
||||
*
|
||||
* tmp[] must have 64-bit alignment.
|
||||
* This function uses floating-point rounding (see set_fpu_cw()).
|
||||
*/
|
||||
void PQCLEAN_FALCON1024_CLEAN_sign_tree(int16_t *sig, shake256_context *rng,
|
||||
void PQCLEAN_FALCON1024_CLEAN_sign_tree(int16_t *sig, inner_shake256_context *rng,
|
||||
const fpr *expanded_key,
|
||||
const uint16_t *hm, unsigned logn, uint8_t *tmp);
|
||||
|
||||
@ -651,13 +788,47 @@ void PQCLEAN_FALCON1024_CLEAN_sign_tree(int16_t *sig, shake256_context *rng,
|
||||
*
|
||||
* The sig[] and hm[] buffers may overlap.
|
||||
*
|
||||
* On successful output, the start of the tmp[] buffer contains the s1
|
||||
* vector (as int16_t elements).
|
||||
*
|
||||
* The minimal size (in bytes) of tmp[] is 72*2^logn bytes.
|
||||
*
|
||||
* tmp[] must have 64-bit alignment.
|
||||
* This function uses floating-point rounding (see set_fpu_cw()).
|
||||
*/
|
||||
void PQCLEAN_FALCON1024_CLEAN_sign_dyn(int16_t *sig, shake256_context *rng,
|
||||
void PQCLEAN_FALCON1024_CLEAN_sign_dyn(int16_t *sig, inner_shake256_context *rng,
|
||||
const int8_t *f, const int8_t *g,
|
||||
const int8_t *F, const int8_t *G,
|
||||
const uint16_t *hm, unsigned logn, uint8_t *tmp);
|
||||
|
||||
/*
|
||||
* Internal sampler engine. Exported for tests.
|
||||
*
|
||||
* sampler_context wraps around a source of random numbers (PRNG) and
|
||||
* the sigma_min value (nominally dependent on the degree).
|
||||
*
|
||||
* sampler() takes as parameters:
|
||||
* ctx pointer to the sampler_context structure
|
||||
* mu center for the distribution
|
||||
* isigma inverse of the distribution standard deviation
|
||||
* It returns an integer sampled along the Gaussian distribution centered
|
||||
* on mu and of standard deviation sigma = 1/isigma.
|
||||
*
|
||||
* gaussian0_sampler() takes as parameter a pointer to a PRNG, and
|
||||
* returns an integer sampled along a half-Gaussian with standard
|
||||
* deviation sigma0 = 1.8205 (center is 0, returned value is
|
||||
* nonnegative).
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
prng p;
|
||||
fpr sigma_min;
|
||||
} sampler_context;
|
||||
|
||||
int PQCLEAN_FALCON1024_CLEAN_sampler(void *ctx, fpr mu, fpr isigma);
|
||||
|
||||
int PQCLEAN_FALCON1024_CLEAN_gaussian0_sampler(prng *p);
|
||||
|
||||
/* ==================================================================== */
|
||||
|
||||
#endif
|
||||
|
@ -2171,6 +2171,9 @@ poly_sub_scaled_ntt(uint32_t *F, size_t Flen, size_t Fstride,
|
||||
|
||||
/* ==================================================================== */
|
||||
|
||||
|
||||
#define RNG_CONTEXT inner_shake256_context
|
||||
|
||||
/*
|
||||
* Get a random 8-byte integer from a SHAKE-based RNG. This function
|
||||
* ensures consistent interpretation of the SHAKE output so that
|
||||
@ -2178,14 +2181,14 @@ poly_sub_scaled_ntt(uint32_t *F, size_t Flen, size_t Fstride,
|
||||
* a known seed is used.
|
||||
*/
|
||||
static inline uint64_t
|
||||
get_rng_u64(shake256_context *rng) {
|
||||
get_rng_u64(inner_shake256_context *rng) {
|
||||
/*
|
||||
* We enforce little-endian representation.
|
||||
*/
|
||||
|
||||
uint8_t tmp[8];
|
||||
|
||||
shake256_extract(rng, tmp, sizeof tmp);
|
||||
inner_shake256_extract(rng, tmp, sizeof tmp);
|
||||
return (uint64_t)tmp[0]
|
||||
| ((uint64_t)tmp[1] << 8)
|
||||
| ((uint64_t)tmp[2] << 16)
|
||||
@ -2196,6 +2199,7 @@ get_rng_u64(shake256_context *rng) {
|
||||
| ((uint64_t)tmp[7] << 56);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Table below incarnates a discrete Gaussian distribution:
|
||||
* D(x) = exp(-(x^2)/(2*sigma^2))
|
||||
@ -2227,7 +2231,7 @@ static const uint64_t gauss_1024_12289[] = {
|
||||
* together for lower dimensions.
|
||||
*/
|
||||
static int
|
||||
mkgauss(shake256_context *rng, unsigned logn) {
|
||||
mkgauss(RNG_CONTEXT *rng, unsigned logn) {
|
||||
unsigned u, g;
|
||||
int val;
|
||||
|
||||
@ -3156,6 +3160,7 @@ solve_NTRU_intermediate(unsigned logn_top,
|
||||
fpr xv;
|
||||
|
||||
xv = fpr_mul(rt2[u], pdc);
|
||||
|
||||
/*
|
||||
* Sometimes the values can be out-of-bounds if
|
||||
* the algorithm fails; we must not call
|
||||
@ -4006,7 +4011,7 @@ solve_NTRU(unsigned logn, int8_t *F, int8_t *G,
|
||||
* also makes sure that the resultant of the polynomial with phi is odd.
|
||||
*/
|
||||
static void
|
||||
poly_small_mkgauss(shake256_context *rng, int8_t *f, unsigned logn) {
|
||||
poly_small_mkgauss(RNG_CONTEXT *rng, int8_t *f, unsigned logn) {
|
||||
size_t n, u;
|
||||
unsigned mod2;
|
||||
|
||||
@ -4046,7 +4051,7 @@ restart:
|
||||
|
||||
/* see falcon.h */
|
||||
void
|
||||
PQCLEAN_FALCON1024_CLEAN_keygen(shake256_context *rng,
|
||||
PQCLEAN_FALCON1024_CLEAN_keygen(inner_shake256_context *rng,
|
||||
int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h,
|
||||
unsigned logn, uint8_t *tmp) {
|
||||
/*
|
||||
@ -4070,8 +4075,10 @@ PQCLEAN_FALCON1024_CLEAN_keygen(shake256_context *rng,
|
||||
*/
|
||||
size_t n, u;
|
||||
uint16_t *h2, *tmp2;
|
||||
RNG_CONTEXT *rc;
|
||||
|
||||
n = MKN(logn);
|
||||
rc = rng;
|
||||
|
||||
/*
|
||||
* We need to generate f and g randomly, until we find values
|
||||
@ -4104,8 +4111,8 @@ PQCLEAN_FALCON1024_CLEAN_keygen(shake256_context *rng,
|
||||
* (i.e. the resultant of the polynomial with phi
|
||||
* will be odd).
|
||||
*/
|
||||
poly_small_mkgauss(rng, f, logn);
|
||||
poly_small_mkgauss(rng, g, logn);
|
||||
poly_small_mkgauss(rc, f, logn);
|
||||
poly_small_mkgauss(rc, g, logn);
|
||||
|
||||
/*
|
||||
* Verify that all coefficients are within the bounds
|
||||
|
@ -51,16 +51,16 @@ PQCLEAN_FALCON1024_CLEAN_crypto_sign_keypair(
|
||||
int8_t f[1024], g[1024], F[1024];
|
||||
uint16_t h[1024];
|
||||
unsigned char seed[48];
|
||||
shake256_context rng;
|
||||
inner_shake256_context rng;
|
||||
size_t u, v;
|
||||
|
||||
/*
|
||||
* Generate key pair.
|
||||
*/
|
||||
randombytes(seed, sizeof seed);
|
||||
shake256_init(&rng);
|
||||
shake256_inject(&rng, seed, sizeof seed);
|
||||
shake256_flip(&rng);
|
||||
inner_shake256_init(&rng);
|
||||
inner_shake256_inject(&rng, seed, sizeof seed);
|
||||
inner_shake256_flip(&rng);
|
||||
PQCLEAN_FALCON1024_CLEAN_keygen(&rng, f, g, F, NULL, h, 10, tmp.b);
|
||||
|
||||
/*
|
||||
@ -135,7 +135,7 @@ do_sign(uint8_t *nonce, uint8_t *sigbuf, size_t *sigbuflen,
|
||||
uint16_t hm[1024];
|
||||
} r;
|
||||
unsigned char seed[48];
|
||||
shake256_context sc;
|
||||
inner_shake256_context sc;
|
||||
size_t u, v;
|
||||
|
||||
/*
|
||||
@ -181,19 +181,19 @@ do_sign(uint8_t *nonce, uint8_t *sigbuf, size_t *sigbuflen,
|
||||
/*
|
||||
* Hash message nonce + message into a vector.
|
||||
*/
|
||||
shake256_init(&sc);
|
||||
shake256_inject(&sc, nonce, NONCELEN);
|
||||
shake256_inject(&sc, m, mlen);
|
||||
shake256_flip(&sc);
|
||||
PQCLEAN_FALCON1024_CLEAN_hash_to_point(&sc, r.hm, 10, tmp.b);
|
||||
inner_shake256_init(&sc);
|
||||
inner_shake256_inject(&sc, nonce, NONCELEN);
|
||||
inner_shake256_inject(&sc, m, mlen);
|
||||
inner_shake256_flip(&sc);
|
||||
PQCLEAN_FALCON1024_CLEAN_hash_to_point_ct(&sc, r.hm, 10, tmp.b);
|
||||
|
||||
/*
|
||||
* Initialize a RNG.
|
||||
*/
|
||||
randombytes(seed, sizeof seed);
|
||||
shake256_init(&sc);
|
||||
shake256_inject(&sc, seed, sizeof seed);
|
||||
shake256_flip(&sc);
|
||||
inner_shake256_init(&sc);
|
||||
inner_shake256_inject(&sc, seed, sizeof seed);
|
||||
inner_shake256_flip(&sc);
|
||||
|
||||
/*
|
||||
* Compute and return the signature. This loops until a signature
|
||||
@ -225,7 +225,7 @@ do_verify(
|
||||
} tmp;
|
||||
uint16_t h[1024], hm[1024];
|
||||
int16_t sig[1024];
|
||||
shake256_context sc;
|
||||
inner_shake256_context sc;
|
||||
|
||||
/*
|
||||
* Decode public key.
|
||||
@ -253,11 +253,11 @@ do_verify(
|
||||
/*
|
||||
* Hash nonce + message into a vector.
|
||||
*/
|
||||
shake256_init(&sc);
|
||||
shake256_inject(&sc, nonce, NONCELEN);
|
||||
shake256_inject(&sc, m, mlen);
|
||||
shake256_flip(&sc);
|
||||
PQCLEAN_FALCON1024_CLEAN_hash_to_point(&sc, hm, 10, tmp.b);
|
||||
inner_shake256_init(&sc);
|
||||
inner_shake256_inject(&sc, nonce, NONCELEN);
|
||||
inner_shake256_inject(&sc, m, mlen);
|
||||
inner_shake256_flip(&sc);
|
||||
PQCLEAN_FALCON1024_CLEAN_hash_to_point_ct(&sc, hm, 10, tmp.b);
|
||||
|
||||
/*
|
||||
* Verify signature.
|
||||
|
@ -36,7 +36,7 @@
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
PQCLEAN_FALCON1024_CLEAN_prng_init(prng *p, shake256_context *src) {
|
||||
PQCLEAN_FALCON1024_CLEAN_prng_init(prng *p, inner_shake256_context *src) {
|
||||
/*
|
||||
* To ensure reproducibility for a given seed, we
|
||||
* must enforce little-endian interpretation of
|
||||
@ -46,7 +46,7 @@ PQCLEAN_FALCON1024_CLEAN_prng_init(prng *p, shake256_context *src) {
|
||||
uint64_t th, tl;
|
||||
int i;
|
||||
|
||||
shake256_extract(src, tmp, 56);
|
||||
inner_shake256_extract(src, tmp, 56);
|
||||
for (i = 0; i < 14; i ++) {
|
||||
uint32_t w;
|
||||
|
||||
|
@ -417,8 +417,170 @@ ffSampling_fft(samplerZ samp, void *samp_ctx,
|
||||
size_t n, hn;
|
||||
const fpr *tree0, *tree1;
|
||||
|
||||
n = (size_t)1 << logn;
|
||||
if (n == 1) {
|
||||
/*
|
||||
* When logn == 2, we inline the last two recursion levels.
|
||||
*/
|
||||
if (logn == 2) {
|
||||
fpr x0, x1, y0, y1, w0, w1, w2, w3, sigma;
|
||||
fpr a_re, a_im, b_re, b_im, c_re, c_im;
|
||||
|
||||
tree0 = tree + 4;
|
||||
tree1 = tree + 8;
|
||||
|
||||
/*
|
||||
* We split t1 into w*, then do the recursive invocation,
|
||||
* with output in w*. We finally merge back into z1.
|
||||
*/
|
||||
a_re = t1[0];
|
||||
a_im = t1[2];
|
||||
b_re = t1[1];
|
||||
b_im = t1[3];
|
||||
c_re = fpr_add(a_re, b_re);
|
||||
c_im = fpr_add(a_im, b_im);
|
||||
w0 = fpr_half(c_re);
|
||||
w1 = fpr_half(c_im);
|
||||
c_re = fpr_sub(a_re, b_re);
|
||||
c_im = fpr_sub(a_im, b_im);
|
||||
w2 = fpr_mul(fpr_add(c_re, c_im), fpr_invsqrt8);
|
||||
w3 = fpr_mul(fpr_sub(c_im, c_re), fpr_invsqrt8);
|
||||
|
||||
x0 = w2;
|
||||
x1 = w3;
|
||||
sigma = tree1[3];
|
||||
w2 = fpr_of(samp(samp_ctx, x0, sigma));
|
||||
w3 = fpr_of(samp(samp_ctx, x1, sigma));
|
||||
a_re = fpr_sub(x0, w2);
|
||||
a_im = fpr_sub(x1, w3);
|
||||
b_re = tree1[0];
|
||||
b_im = tree1[1];
|
||||
c_re = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im));
|
||||
c_im = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re));
|
||||
x0 = fpr_add(c_re, w0);
|
||||
x1 = fpr_add(c_im, w1);
|
||||
sigma = tree1[2];
|
||||
w0 = fpr_of(samp(samp_ctx, x0, sigma));
|
||||
w1 = fpr_of(samp(samp_ctx, x1, sigma));
|
||||
|
||||
a_re = w0;
|
||||
a_im = w1;
|
||||
b_re = w2;
|
||||
b_im = w3;
|
||||
c_re = fpr_mul(fpr_sub(b_re, b_im), fpr_invsqrt2);
|
||||
c_im = fpr_mul(fpr_add(b_re, b_im), fpr_invsqrt2);
|
||||
z1[0] = w0 = fpr_add(a_re, c_re);
|
||||
z1[2] = w2 = fpr_add(a_im, c_im);
|
||||
z1[1] = w1 = fpr_sub(a_re, c_re);
|
||||
z1[3] = w3 = fpr_sub(a_im, c_im);
|
||||
|
||||
/*
|
||||
* Compute tb0 = t0 + (t1 - z1) * L. Value tb0 ends up in w*.
|
||||
*/
|
||||
w0 = fpr_sub(t1[0], w0);
|
||||
w1 = fpr_sub(t1[1], w1);
|
||||
w2 = fpr_sub(t1[2], w2);
|
||||
w3 = fpr_sub(t1[3], w3);
|
||||
|
||||
a_re = w0;
|
||||
a_im = w2;
|
||||
b_re = tree[0];
|
||||
b_im = tree[2];
|
||||
w0 = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im));
|
||||
w2 = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re));
|
||||
a_re = w1;
|
||||
a_im = w3;
|
||||
b_re = tree[1];
|
||||
b_im = tree[3];
|
||||
w1 = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im));
|
||||
w3 = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re));
|
||||
|
||||
w0 = fpr_add(w0, t0[0]);
|
||||
w1 = fpr_add(w1, t0[1]);
|
||||
w2 = fpr_add(w2, t0[2]);
|
||||
w3 = fpr_add(w3, t0[3]);
|
||||
|
||||
/*
|
||||
* Second recursive invocation.
|
||||
*/
|
||||
a_re = w0;
|
||||
a_im = w2;
|
||||
b_re = w1;
|
||||
b_im = w3;
|
||||
c_re = fpr_add(a_re, b_re);
|
||||
c_im = fpr_add(a_im, b_im);
|
||||
w0 = fpr_half(c_re);
|
||||
w1 = fpr_half(c_im);
|
||||
c_re = fpr_sub(a_re, b_re);
|
||||
c_im = fpr_sub(a_im, b_im);
|
||||
w2 = fpr_mul(fpr_add(c_re, c_im), fpr_invsqrt8);
|
||||
w3 = fpr_mul(fpr_sub(c_im, c_re), fpr_invsqrt8);
|
||||
|
||||
x0 = w2;
|
||||
x1 = w3;
|
||||
sigma = tree0[3];
|
||||
w2 = y0 = fpr_of(samp(samp_ctx, x0, sigma));
|
||||
w3 = y1 = fpr_of(samp(samp_ctx, x1, sigma));
|
||||
a_re = fpr_sub(x0, y0);
|
||||
a_im = fpr_sub(x1, y1);
|
||||
b_re = tree0[0];
|
||||
b_im = tree0[1];
|
||||
c_re = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im));
|
||||
c_im = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re));
|
||||
x0 = fpr_add(c_re, w0);
|
||||
x1 = fpr_add(c_im, w1);
|
||||
sigma = tree0[2];
|
||||
w0 = fpr_of(samp(samp_ctx, x0, sigma));
|
||||
w1 = fpr_of(samp(samp_ctx, x1, sigma));
|
||||
|
||||
a_re = w0;
|
||||
a_im = w1;
|
||||
b_re = w2;
|
||||
b_im = w3;
|
||||
c_re = fpr_mul(fpr_sub(b_re, b_im), fpr_invsqrt2);
|
||||
c_im = fpr_mul(fpr_add(b_re, b_im), fpr_invsqrt2);
|
||||
z0[0] = fpr_add(a_re, c_re);
|
||||
z0[2] = fpr_add(a_im, c_im);
|
||||
z0[1] = fpr_sub(a_re, c_re);
|
||||
z0[3] = fpr_sub(a_im, c_im);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Case logn == 1 is reachable only when using Falcon-2 (the
|
||||
* smallest size for which Falcon is mathematically defined, but
|
||||
* of course way too insecure to be of any use).
|
||||
*/
|
||||
if (logn == 1) {
|
||||
fpr x0, x1, y0, y1, sigma;
|
||||
fpr a_re, a_im, b_re, b_im, c_re, c_im;
|
||||
|
||||
x0 = t1[0];
|
||||
x1 = t1[1];
|
||||
sigma = tree[3];
|
||||
z1[0] = y0 = fpr_of(samp(samp_ctx, x0, sigma));
|
||||
z1[1] = y1 = fpr_of(samp(samp_ctx, x1, sigma));
|
||||
a_re = fpr_sub(x0, y0);
|
||||
a_im = fpr_sub(x1, y1);
|
||||
b_re = tree[0];
|
||||
b_im = tree[1];
|
||||
c_re = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im));
|
||||
c_im = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re));
|
||||
x0 = fpr_add(c_re, t0[0]);
|
||||
x1 = fpr_add(c_im, t0[1]);
|
||||
sigma = tree[2];
|
||||
z0[0] = fpr_of(samp(samp_ctx, x0, sigma));
|
||||
z0[1] = fpr_of(samp(samp_ctx, x1, sigma));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Normal end of recursion is for logn == 0. Since the last
|
||||
* steps of the recursions were inlined in the blocks above
|
||||
* (when logn == 1 or 2), this case is not reachable, and is
|
||||
* retained here only for documentation purposes.
|
||||
|
||||
if (logn == 0) {
|
||||
fpr x0, x1, sigma;
|
||||
|
||||
x0 = t0[0];
|
||||
@ -429,6 +591,13 @@ ffSampling_fft(samplerZ samp, void *samp_ctx,
|
||||
return;
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
* General recursive case (logn >= 3).
|
||||
*/
|
||||
|
||||
n = (size_t)1 << logn;
|
||||
hn = n >> 1;
|
||||
tree0 = tree + n;
|
||||
tree1 = tree + n + ffLDL_treesize(logn - 1);
|
||||
@ -480,7 +649,7 @@ do_sign_tree(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
const fpr *b00, *b01, *b10, *b11, *tree;
|
||||
fpr ni;
|
||||
uint32_t sqn, ng;
|
||||
int16_t *s2tmp;
|
||||
int16_t *s1tmp, *s2tmp;
|
||||
|
||||
n = MKN(logn);
|
||||
t0 = tmp;
|
||||
@ -542,6 +711,7 @@ do_sign_tree(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
/*
|
||||
* Compute the signature.
|
||||
*/
|
||||
s1tmp = (int16_t *)tx;
|
||||
sqn = 0;
|
||||
ng = 0;
|
||||
for (u = 0; u < n; u ++) {
|
||||
@ -550,6 +720,7 @@ do_sign_tree(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
z = (int32_t)hm[u] - (int32_t)fpr_rint(t0[u]);
|
||||
sqn += (uint32_t)(z * z);
|
||||
ng |= sqn;
|
||||
s1tmp[u] = (int16_t)z;
|
||||
}
|
||||
sqn |= -(ng >> 31);
|
||||
|
||||
@ -568,6 +739,7 @@ do_sign_tree(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
}
|
||||
if (PQCLEAN_FALCON1024_CLEAN_is_short_half(sqn, s2tmp, logn)) {
|
||||
memcpy(s2, s2tmp, n * sizeof * s2);
|
||||
memcpy(tmp, s1tmp, n * sizeof * s1tmp);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
@ -592,7 +764,7 @@ do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
fpr *b00, *b01, *b10, *b11, *g00, *g01, *g11;
|
||||
fpr ni;
|
||||
uint32_t sqn, ng;
|
||||
int16_t *s2tmp;
|
||||
int16_t *s1tmp, *s2tmp;
|
||||
|
||||
n = MKN(logn);
|
||||
|
||||
@ -745,6 +917,7 @@ do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
PQCLEAN_FALCON1024_CLEAN_iFFT(t0, logn);
|
||||
PQCLEAN_FALCON1024_CLEAN_iFFT(t1, logn);
|
||||
|
||||
s1tmp = (int16_t *)tx;
|
||||
sqn = 0;
|
||||
ng = 0;
|
||||
for (u = 0; u < n; u ++) {
|
||||
@ -753,6 +926,7 @@ do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
z = (int32_t)hm[u] - (int32_t)fpr_rint(t0[u]);
|
||||
sqn += (uint32_t)(z * z);
|
||||
ng |= sqn;
|
||||
s1tmp[u] = (int16_t)z;
|
||||
}
|
||||
sqn |= -(ng >> 31);
|
||||
|
||||
@ -771,6 +945,7 @@ do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
}
|
||||
if (PQCLEAN_FALCON1024_CLEAN_is_short_half(sqn, s2tmp, logn)) {
|
||||
memcpy(s2, s2tmp, n * sizeof * s2);
|
||||
memcpy(tmp, s1tmp, n * sizeof * s1tmp);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
@ -780,29 +955,28 @@ do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
* Sample an integer value along a half-gaussian distribution centered
|
||||
* on zero and standard deviation 1.8205, with a precision of 72 bits.
|
||||
*/
|
||||
static int
|
||||
gaussian0_sampler(prng *p) {
|
||||
int
|
||||
PQCLEAN_FALCON1024_CLEAN_gaussian0_sampler(prng *p) {
|
||||
|
||||
static const uint32_t dist[] = {
|
||||
6031371U, 13708371U, 13035518U,
|
||||
5186761U, 1487980U, 12270720U,
|
||||
3298653U, 4688887U, 5511555U,
|
||||
1551448U, 9247616U, 9467675U,
|
||||
539632U, 14076116U, 5909365U,
|
||||
138809U, 10836485U, 13263376U,
|
||||
26405U, 15335617U, 16601723U,
|
||||
3714U, 14514117U, 13240074U,
|
||||
386U, 8324059U, 3276722U,
|
||||
29U, 12376792U, 7821247U,
|
||||
1U, 11611789U, 3398254U,
|
||||
0U, 1194629U, 4532444U,
|
||||
0U, 37177U, 2973575U,
|
||||
0U, 855U, 10369757U,
|
||||
0U, 14U, 9441597U,
|
||||
0U, 0U, 3075302U,
|
||||
0U, 0U, 28626U,
|
||||
0U, 0U, 197U,
|
||||
0U, 0U, 1U
|
||||
10745844u, 3068844u, 3741698u,
|
||||
5559083u, 1580863u, 8248194u,
|
||||
2260429u, 13669192u, 2736639u,
|
||||
708981u, 4421575u, 10046180u,
|
||||
169348u, 7122675u, 4136815u,
|
||||
30538u, 13063405u, 7650655u,
|
||||
4132u, 14505003u, 7826148u,
|
||||
417u, 16768101u, 11363290u,
|
||||
31u, 8444042u, 8086568u,
|
||||
1u, 12844466u, 265321u,
|
||||
0u, 1232676u, 13644283u,
|
||||
0u, 38047u, 9111839u,
|
||||
0u, 870u, 6138264u,
|
||||
0u, 14u, 12545723u,
|
||||
0u, 0u, 3104126u,
|
||||
0u, 0u, 28824u,
|
||||
0u, 0u, 198u,
|
||||
0u, 0u, 1u
|
||||
};
|
||||
|
||||
uint32_t v0, v1, v2, hi;
|
||||
@ -843,7 +1017,7 @@ gaussian0_sampler(prng *p) {
|
||||
* Sample a bit with probability exp(-x) for some x >= 0.
|
||||
*/
|
||||
static int
|
||||
BerExp(prng *p, fpr x) {
|
||||
BerExp(prng *p, fpr x, fpr ccs) {
|
||||
int s, i;
|
||||
fpr r;
|
||||
uint32_t sw, w;
|
||||
@ -880,7 +1054,7 @@ BerExp(prng *p, fpr x) {
|
||||
* case). The bias is negligible since fpr_expm_p63() only computes
|
||||
* with 51 bits of precision or so.
|
||||
*/
|
||||
z = ((fpr_expm_p63(r) << 1) - 1) >> s;
|
||||
z = ((fpr_expm_p63(r, ccs) << 1) - 1) >> s;
|
||||
|
||||
/*
|
||||
* Sample a bit with probability exp(-x). Since x = s*log(2) + r,
|
||||
@ -896,11 +1070,6 @@ BerExp(prng *p, fpr x) {
|
||||
return (int)(w >> 31);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
prng p;
|
||||
fpr sigma_min;
|
||||
} sampler_context;
|
||||
|
||||
/*
|
||||
* The sampler produces a random integer that follows a discrete Gaussian
|
||||
* distribution, centered on mu, and with standard deviation sigma. The
|
||||
@ -909,8 +1078,8 @@ typedef struct {
|
||||
* The value of sigma MUST lie between 1 and 2 (i.e. isigma lies between
|
||||
* 0.5 and 1); in Falcon, sigma should always be between 1.2 and 1.9.
|
||||
*/
|
||||
static int
|
||||
sampler(void *ctx, fpr mu, fpr isigma) {
|
||||
int
|
||||
PQCLEAN_FALCON1024_CLEAN_sampler(void *ctx, fpr mu, fpr isigma) {
|
||||
sampler_context *spc;
|
||||
int s;
|
||||
fpr r, dss, ccs;
|
||||
@ -952,7 +1121,7 @@ sampler(void *ctx, fpr mu, fpr isigma) {
|
||||
* - b = 0: z <= 0 and sampled against a Gaussian
|
||||
* centered on 0.
|
||||
*/
|
||||
z0 = gaussian0_sampler(&spc->p);
|
||||
z0 = PQCLEAN_FALCON1024_CLEAN_gaussian0_sampler(&spc->p);
|
||||
b = prng_get_u8(&spc->p) & 1;
|
||||
z = b + ((b << 1) - 1) * z0;
|
||||
|
||||
@ -983,8 +1152,7 @@ sampler(void *ctx, fpr mu, fpr isigma) {
|
||||
*/
|
||||
x = fpr_mul(fpr_sqr(fpr_sub(fpr_of(z), r)), dss);
|
||||
x = fpr_sub(x, fpr_mul(fpr_of(z0 * z0), fpr_inv_2sqrsigma0));
|
||||
x = fpr_mul(x, ccs);
|
||||
if (BerExp(&spc->p, x)) {
|
||||
if (BerExp(&spc->p, x, ccs)) {
|
||||
/*
|
||||
* Rejection sampling was centered on r, but the
|
||||
* actual center is mu = s + r.
|
||||
@ -996,7 +1164,7 @@ sampler(void *ctx, fpr mu, fpr isigma) {
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
PQCLEAN_FALCON1024_CLEAN_sign_tree(int16_t *sig, shake256_context *rng,
|
||||
PQCLEAN_FALCON1024_CLEAN_sign_tree(int16_t *sig, inner_shake256_context *rng,
|
||||
const fpr *expanded_key,
|
||||
const uint16_t *hm, unsigned logn, uint8_t *tmp) {
|
||||
fpr *ftmp;
|
||||
@ -1025,7 +1193,7 @@ PQCLEAN_FALCON1024_CLEAN_sign_tree(int16_t *sig, shake256_context *rng,
|
||||
? fpr_sigma_min_10
|
||||
: fpr_sigma_min_9;
|
||||
PQCLEAN_FALCON1024_CLEAN_prng_init(&spc.p, rng);
|
||||
samp = sampler;
|
||||
samp = PQCLEAN_FALCON1024_CLEAN_sampler;
|
||||
samp_ctx = &spc;
|
||||
|
||||
/*
|
||||
@ -1040,7 +1208,7 @@ PQCLEAN_FALCON1024_CLEAN_sign_tree(int16_t *sig, shake256_context *rng,
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
PQCLEAN_FALCON1024_CLEAN_sign_dyn(int16_t *sig, shake256_context *rng,
|
||||
PQCLEAN_FALCON1024_CLEAN_sign_dyn(int16_t *sig, inner_shake256_context *rng,
|
||||
const int8_t *f, const int8_t *g,
|
||||
const int8_t *F, const int8_t *G,
|
||||
const uint16_t *hm, unsigned logn, uint8_t *tmp) {
|
||||
@ -1070,7 +1238,7 @@ PQCLEAN_FALCON1024_CLEAN_sign_dyn(int16_t *sig, shake256_context *rng,
|
||||
? fpr_sigma_min_10
|
||||
: fpr_sigma_min_9;
|
||||
PQCLEAN_FALCON1024_CLEAN_prng_init(&spc.p, rng);
|
||||
samp = sampler;
|
||||
samp = PQCLEAN_FALCON1024_CLEAN_sampler;
|
||||
samp_ctx = &spc;
|
||||
|
||||
/*
|
||||
|
@ -649,7 +649,7 @@ PQCLEAN_FALCON1024_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2,
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute s1 = s2*h - c0 mod phi mod q (in tt[]).
|
||||
* Compute -s1 = s2*h - c0 mod phi mod q (in tt[]).
|
||||
*/
|
||||
mq_NTT(tt, logn);
|
||||
mq_poly_montymul_ntt(tt, h, logn);
|
||||
@ -657,7 +657,7 @@ PQCLEAN_FALCON1024_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2,
|
||||
mq_poly_sub(tt, c0, logn);
|
||||
|
||||
/*
|
||||
* Normalize s1 elements into the [-q/2..q/2] range.
|
||||
* Normalize -s1 elements into the [-q/2..q/2] range.
|
||||
*/
|
||||
for (u = 0; u < n; u ++) {
|
||||
int32_t w;
|
||||
@ -668,7 +668,7 @@ PQCLEAN_FALCON1024_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2,
|
||||
}
|
||||
|
||||
/*
|
||||
* Signature is valid if and only if the aggregate (s1,s2) vector
|
||||
* Signature is valid if and only if the aggregate (-s1,s2) vector
|
||||
* is short enough.
|
||||
*/
|
||||
return PQCLEAN_FALCON1024_CLEAN_is_short((int16_t *)tt, s2, logn);
|
||||
@ -699,7 +699,7 @@ PQCLEAN_FALCON1024_CLEAN_compute_public(uint16_t *h,
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* see internal.h */
|
||||
/* see inner.h */
|
||||
int
|
||||
PQCLEAN_FALCON1024_CLEAN_complete_private(int8_t *G,
|
||||
const int8_t *f, const int8_t *g, const int8_t *F,
|
||||
@ -743,3 +743,110 @@ PQCLEAN_FALCON1024_CLEAN_complete_private(int8_t *G,
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
int
|
||||
PQCLEAN_FALCON1024_CLEAN_is_invertible(
|
||||
const int16_t *s2, unsigned logn, uint8_t *tmp) {
|
||||
size_t u, n;
|
||||
uint16_t *tt;
|
||||
uint32_t r;
|
||||
|
||||
n = (size_t)1 << logn;
|
||||
tt = (uint16_t *)tmp;
|
||||
for (u = 0; u < n; u ++) {
|
||||
uint32_t w;
|
||||
|
||||
w = (uint32_t)s2[u];
|
||||
w += Q & -(w >> 31);
|
||||
tt[u] = (uint16_t)w;
|
||||
}
|
||||
mq_NTT(tt, logn);
|
||||
r = 0;
|
||||
for (u = 0; u < n; u ++) {
|
||||
r |= (uint32_t)(tt[u] - 1);
|
||||
}
|
||||
return (int)(1u - (r >> 31));
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
int
|
||||
PQCLEAN_FALCON1024_CLEAN_verify_recover(uint16_t *h,
|
||||
const uint16_t *c0, const int16_t *s1, const int16_t *s2,
|
||||
unsigned logn, uint8_t *tmp) {
|
||||
size_t u, n;
|
||||
uint16_t *tt;
|
||||
uint32_t r;
|
||||
|
||||
n = (size_t)1 << logn;
|
||||
|
||||
/*
|
||||
* Reduce elements of s1 and s2 modulo q; then write s2 into tt[]
|
||||
* and c0 - s1 into h[].
|
||||
*/
|
||||
tt = (uint16_t *)tmp;
|
||||
for (u = 0; u < n; u ++) {
|
||||
uint32_t w;
|
||||
|
||||
w = (uint32_t)s2[u];
|
||||
w += Q & -(w >> 31);
|
||||
tt[u] = (uint16_t)w;
|
||||
|
||||
w = (uint32_t)s1[u];
|
||||
w += Q & -(w >> 31);
|
||||
w = mq_sub(c0[u], w);
|
||||
h[u] = (uint16_t)w;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute h = (c0 - s1) / s2. If one of the coefficients of s2
|
||||
* is zero (in NTT representation) then the operation fails. We
|
||||
* keep that information into a flag so that we do not deviate
|
||||
* from strict constant-time processing; if all coefficients of
|
||||
* s2 are non-zero, then the high bit of r will be zero.
|
||||
*/
|
||||
mq_NTT(tt, logn);
|
||||
mq_NTT(h, logn);
|
||||
r = 0;
|
||||
for (u = 0; u < n; u ++) {
|
||||
r |= (uint32_t)(tt[u] - 1);
|
||||
h[u] = (uint16_t)mq_div_12289(h[u], tt[u]);
|
||||
}
|
||||
mq_iNTT(h, logn);
|
||||
|
||||
/*
|
||||
* Signature is acceptable if and only if it is short enough,
|
||||
* and s2 was invertible mod phi mod q. The caller must still
|
||||
* check that the rebuilt public key matches the expected
|
||||
* value (e.g. through a hash).
|
||||
*/
|
||||
r = ~r & (uint32_t) - PQCLEAN_FALCON1024_CLEAN_is_short(s1, s2, logn);
|
||||
return (int)(r >> 31);
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
int
|
||||
PQCLEAN_FALCON1024_CLEAN_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp) {
|
||||
uint16_t *s2;
|
||||
size_t u, n;
|
||||
uint32_t r;
|
||||
|
||||
n = (size_t)1 << logn;
|
||||
s2 = (uint16_t *)tmp;
|
||||
for (u = 0; u < n; u ++) {
|
||||
uint32_t w;
|
||||
|
||||
w = (uint32_t)sig[u];
|
||||
w += Q & -(w >> 31);
|
||||
s2[u] = (uint16_t)w;
|
||||
}
|
||||
mq_NTT(s2, logn);
|
||||
r = 0;
|
||||
for (u = 0; u < n; u ++) {
|
||||
uint32_t w;
|
||||
|
||||
w = (uint32_t)s2[u] - 1u;
|
||||
r += (w >> 31);
|
||||
}
|
||||
return (int)r;
|
||||
}
|
||||
|
@ -4,8 +4,8 @@ claimed-nist-level: 1
|
||||
length-public-key: 897
|
||||
length-secret-key: 1281
|
||||
length-signature: 690
|
||||
nistkat-sha256: abc62e7be3d7c1db757ba3cbb771cfdc89c6b36fb5efc885593db89ec2ea8bc4
|
||||
testvectors-sha256: 1a1b170fc9e4623e7ff519c15ec7a2dda55e94a175756b7c72429451bd226b09
|
||||
nistkat-sha256: e9c3985f1ce732e29ca81aeca091f20d4dbb5beb456ee1a7ab41d04add4dab10
|
||||
testvectors-sha256: 036b5e803ab825146502513b7460b24cc9493f8e366323cd5e30e2dc6d4ca6a7
|
||||
principal-submitters:
|
||||
- Thomas Prest
|
||||
auxiliary-submitters:
|
||||
|
@ -33,10 +33,43 @@
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
PQCLEAN_FALCON512_CLEAN_hash_to_point(
|
||||
shake256_context *sc,
|
||||
uint16_t *x, unsigned logn, uint8_t *tmp) {
|
||||
PQCLEAN_FALCON512_CLEAN_hash_to_point_vartime(
|
||||
inner_shake256_context *sc,
|
||||
uint16_t *x, unsigned logn) {
|
||||
/*
|
||||
* This is the straightforward per-the-spec implementation. It
|
||||
* is not constant-time, thus it might reveal information on the
|
||||
* plaintext (at least, enough to check the plaintext against a
|
||||
* list of potential plaintexts) in a scenario where the
|
||||
* attacker does not have access to the signature value or to
|
||||
* the public key, but knows the nonce (without knowledge of the
|
||||
* nonce, the hashed output cannot be matched against potential
|
||||
* plaintexts).
|
||||
*/
|
||||
size_t n;
|
||||
|
||||
n = (size_t)1 << logn;
|
||||
while (n > 0) {
|
||||
uint8_t buf[2];
|
||||
uint32_t w;
|
||||
|
||||
inner_shake256_extract(sc, (void *)buf, sizeof buf);
|
||||
w = ((unsigned)buf[0] << 8) | (unsigned)buf[1];
|
||||
if (w < 61445) {
|
||||
while (w >= 12289) {
|
||||
w -= 12289;
|
||||
}
|
||||
*x ++ = (uint16_t)w;
|
||||
n --;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
PQCLEAN_FALCON512_CLEAN_hash_to_point_ct(
|
||||
inner_shake256_context *sc,
|
||||
uint16_t *x, unsigned logn, uint8_t *tmp) {
|
||||
/*
|
||||
* Each 16-bit sample is a value in 0..65535. The value is
|
||||
* kept if it falls in 0..61444 (because 61445 = 5*12289)
|
||||
@ -97,7 +130,7 @@ PQCLEAN_FALCON512_CLEAN_hash_to_point(
|
||||
uint8_t buf[2];
|
||||
uint32_t w, wr;
|
||||
|
||||
shake256_extract(sc, buf, sizeof buf);
|
||||
inner_shake256_extract(sc, buf, sizeof buf);
|
||||
w = ((uint32_t)buf[0] << 8) | (uint32_t)buf[1];
|
||||
wr = w - ((uint32_t)24578 & (((w - 24578) >> 31) - 1));
|
||||
wr = wr - ((uint32_t)24578 & (((wr - 24578) >> 31) - 1));
|
||||
@ -196,7 +229,6 @@ PQCLEAN_FALCON512_CLEAN_hash_to_point(
|
||||
*d = (uint16_t)(dv ^ (mk & (sv ^ dv)));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
|
@ -507,7 +507,7 @@ fpr_sqrt(fpr x) {
|
||||
|
||||
|
||||
uint64_t
|
||||
fpr_expm_p63(fpr x) {
|
||||
fpr_expm_p63(fpr x, fpr ccs) {
|
||||
/*
|
||||
* Polynomial approximation of exp(-x) is taken from FACCT:
|
||||
* https://eprint.iacr.org/2018/1234
|
||||
@ -539,6 +539,8 @@ fpr_expm_p63(fpr x) {
|
||||
|
||||
uint64_t z, y;
|
||||
unsigned u;
|
||||
uint32_t z0, z1, y0, y1;
|
||||
uint64_t a, b;
|
||||
|
||||
y = C[0];
|
||||
z = (uint64_t)fpr_trunc(fpr_mul(x, fpr_ptwo63)) << 1;
|
||||
@ -554,8 +556,7 @@ fpr_expm_p63(fpr x) {
|
||||
* also have appropriate IEEE754 floating-point support,
|
||||
* which is better.
|
||||
*/
|
||||
uint32_t z0, z1, y0, y1;
|
||||
uint64_t a, b, c;
|
||||
uint64_t c;
|
||||
|
||||
z0 = (uint32_t)z;
|
||||
z1 = (uint32_t)(z >> 32);
|
||||
@ -569,6 +570,24 @@ fpr_expm_p63(fpr x) {
|
||||
c += (uint64_t)z1 * (uint64_t)y1;
|
||||
y = C[u] - c;
|
||||
}
|
||||
|
||||
/*
|
||||
* The scaling factor must be applied at the end. Since y is now
|
||||
* in fixed-point notation, we have to convert the factor to the
|
||||
* same format, and do an extra integer multiplication.
|
||||
*/
|
||||
z = (uint64_t)fpr_trunc(fpr_mul(ccs, fpr_ptwo63)) << 1;
|
||||
z0 = (uint32_t)z;
|
||||
z1 = (uint32_t)(z >> 32);
|
||||
y0 = (uint32_t)y;
|
||||
y1 = (uint32_t)(y >> 32);
|
||||
a = ((uint64_t)z0 * (uint64_t)y1)
|
||||
+ (((uint64_t)z0 * (uint64_t)y0) >> 32);
|
||||
b = ((uint64_t)z1 * (uint64_t)y0);
|
||||
y = (a >> 32) + (b >> 32);
|
||||
y += (((uint64_t)(uint32_t)a + (uint64_t)(uint32_t)b) >> 32);
|
||||
y += (uint64_t)z1 * (uint64_t)y1;
|
||||
|
||||
return y;
|
||||
}
|
||||
|
||||
|
@ -232,6 +232,8 @@ static const fpr fpr_zero = 0;
|
||||
static const fpr fpr_one = 4607182418800017408;
|
||||
static const fpr fpr_two = 4611686018427387904;
|
||||
static const fpr fpr_onehalf = 4602678819172646912;
|
||||
static const fpr fpr_invsqrt2 = 4604544271217802189;
|
||||
static const fpr fpr_invsqrt8 = 4600040671590431693;
|
||||
static const fpr fpr_ptwo31 = 4746794007248502784;
|
||||
static const fpr fpr_ptwo31m1 = 4746794007244308480;
|
||||
static const fpr fpr_mtwo31m1 = 13970166044099084288U;
|
||||
@ -444,7 +446,7 @@ fpr_lt(fpr x, fpr y) {
|
||||
* bits or so.
|
||||
*/
|
||||
#define fpr_expm_p63 PQCLEAN_FALCON512_CLEAN_fpr_expm_p63
|
||||
uint64_t fpr_expm_p63(fpr x);
|
||||
uint64_t fpr_expm_p63(fpr x, fpr ccs);
|
||||
|
||||
#define fpr_gm_tab PQCLEAN_FALCON512_CLEAN_fpr_gm_tab
|
||||
extern const fpr fpr_gm_tab[];
|
||||
|
@ -34,6 +34,45 @@
|
||||
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
|
||||
*/
|
||||
|
||||
/*
|
||||
* IMPORTANT API RULES
|
||||
* -------------------
|
||||
*
|
||||
* This API has some non-trivial usage rules:
|
||||
*
|
||||
*
|
||||
* - All public functions (i.e. the non-static ones) must be referenced
|
||||
* with the PQCLEAN_FALCON512_CLEAN_ macro (e.g. PQCLEAN_FALCON512_CLEAN_verify_raw for the verify_raw()
|
||||
* function). That macro adds a prefix to the name, which is
|
||||
* configurable with the FALCON_PREFIX macro. This allows compiling
|
||||
* the code into a specific "namespace" and potentially including
|
||||
* several versions of this code into a single application (e.g. to
|
||||
* have an AVX2 and a non-AVX2 variants and select the one to use at
|
||||
* runtime based on availability of AVX2 opcodes).
|
||||
*
|
||||
* - Functions that need temporary buffers expects them as a final
|
||||
* tmp[] array of type uint8_t*, with a size which is documented for
|
||||
* each function. However, most have some alignment requirements,
|
||||
* because they will use the array to store 16-bit, 32-bit or 64-bit
|
||||
* values (e.g. uint64_t or double). The caller must ensure proper
|
||||
* alignment. What happens on unaligned access depends on the
|
||||
* underlying architecture, ranging from a slight time penalty
|
||||
* to immediate termination of the process.
|
||||
*
|
||||
* - Some functions rely on specific rounding rules and precision for
|
||||
* floating-point numbers. On some systems (in particular 32-bit x86
|
||||
* with the 387 FPU), this requires setting an hardware control
|
||||
* word. The caller MUST use set_fpu_cw() to ensure proper precision:
|
||||
*
|
||||
* oldcw = set_fpu_cw(2);
|
||||
* PQCLEAN_FALCON512_CLEAN_sign_dyn(...);
|
||||
* set_fpu_cw(oldcw);
|
||||
*
|
||||
* On systems where the native floating-point precision is already
|
||||
* proper, or integer-based emulation is used, the set_fpu_cw()
|
||||
* function does nothing, so it can be called systematically.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
@ -42,22 +81,47 @@
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Some computations with floating-point elements, in particular
|
||||
* rounding to the nearest integer, rely on operations using _exactly_
|
||||
* the precision of IEEE-754 binary64 type (i.e. 52 bits). On 32-bit
|
||||
* x86, the 387 FPU may be used (depending on the target OS) and, in
|
||||
* that case, may use more precision bits (i.e. 64 bits, for an 80-bit
|
||||
* total type length); to prevent miscomputations, we define an explicit
|
||||
* function that modifies the precision in the FPU control word.
|
||||
*
|
||||
* set_fpu_cw() sets the precision to the provided value, and returns
|
||||
* the previously set precision; callers are supposed to restore the
|
||||
* previous precision on exit. The correct (52-bit) precision is
|
||||
* configured with the value "2". On unsupported compilers, or on
|
||||
* targets other than 32-bit x86, or when the native 'double' type is
|
||||
* not used, the set_fpu_cw() function does nothing at all.
|
||||
*/
|
||||
static inline unsigned
|
||||
set_fpu_cw(unsigned x) {
|
||||
return x;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/* ==================================================================== */
|
||||
/*
|
||||
* SHAKE256 implementation (shake.c).
|
||||
*
|
||||
* API is defined to be easily replaced with the fips202.h API defined
|
||||
* as part of PQ Clean.
|
||||
* as part of PQClean.
|
||||
*/
|
||||
|
||||
|
||||
#include "fips202.h"
|
||||
|
||||
#define shake256_context shake256incctx
|
||||
#define shake256_init(sc) shake256_inc_init(sc)
|
||||
#define shake256_inject(sc, in, len) shake256_inc_absorb(sc, in, len)
|
||||
#define shake256_flip(sc) shake256_inc_finalize(sc)
|
||||
#define shake256_extract(sc, out, len) shake256_inc_squeeze(out, len, sc)
|
||||
#define inner_shake256_context shake256incctx
|
||||
#define inner_shake256_init(sc) shake256_inc_init(sc)
|
||||
#define inner_shake256_inject(sc, in, len) shake256_inc_absorb(sc, in, len)
|
||||
#define inner_shake256_flip(sc) shake256_inc_finalize(sc)
|
||||
#define inner_shake256_extract(sc, out, len) shake256_inc_squeeze(out, len, sc)
|
||||
|
||||
|
||||
/* ==================================================================== */
|
||||
@ -140,9 +204,22 @@ extern const uint8_t PQCLEAN_FALCON512_CLEAN_max_sig_bits[];
|
||||
|
||||
/*
|
||||
* From a SHAKE256 context (must be already flipped), produce a new
|
||||
* point. The temporary buffer (tmp) must have room for 2*2^logn bytes.
|
||||
* point. This is the non-constant-time version, which may leak enough
|
||||
* information to serve as a stop condition on a brute force attack on
|
||||
* the hashed message (provided that the nonce value is known).
|
||||
*/
|
||||
void PQCLEAN_FALCON512_CLEAN_hash_to_point(shake256_context *sc,
|
||||
void PQCLEAN_FALCON512_CLEAN_hash_to_point_vartime(inner_shake256_context *sc,
|
||||
uint16_t *x, unsigned logn);
|
||||
|
||||
/*
|
||||
* From a SHAKE256 context (must be already flipped), produce a new
|
||||
* point. The temporary buffer (tmp) must have room for 2*2^logn bytes.
|
||||
* This function is constant-time but is typically more expensive than
|
||||
* PQCLEAN_FALCON512_CLEAN_hash_to_point_vartime().
|
||||
*
|
||||
* tmp[] must have 16-bit alignment.
|
||||
*/
|
||||
void PQCLEAN_FALCON512_CLEAN_hash_to_point_ct(inner_shake256_context *sc,
|
||||
uint16_t *x, unsigned logn, uint8_t *tmp);
|
||||
|
||||
/*
|
||||
@ -184,6 +261,8 @@ void PQCLEAN_FALCON512_CLEAN_to_ntt_monty(uint16_t *h, unsigned logn);
|
||||
* logn is the degree log
|
||||
* tmp[] temporary, must have at least 2*2^logn bytes
|
||||
* Returned value is 1 on success, 0 on error.
|
||||
*
|
||||
* tmp[] must have 16-bit alignment.
|
||||
*/
|
||||
int PQCLEAN_FALCON512_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2,
|
||||
const uint16_t *h, unsigned logn, uint8_t *tmp);
|
||||
@ -195,6 +274,7 @@ int PQCLEAN_FALCON512_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2,
|
||||
* reported if f is not invertible mod phi mod q).
|
||||
*
|
||||
* The tmp[] array must have room for at least 2*2^logn elements.
|
||||
* tmp[] must have 16-bit alignment.
|
||||
*/
|
||||
int PQCLEAN_FALCON512_CLEAN_compute_public(uint16_t *h,
|
||||
const int8_t *f, const int8_t *g, unsigned logn, uint8_t *tmp);
|
||||
@ -208,11 +288,53 @@ int PQCLEAN_FALCON512_CLEAN_compute_public(uint16_t *h,
|
||||
* The tmp[] array must have room for at least 4*2^logn bytes.
|
||||
*
|
||||
* Returned value is 1 in success, 0 on error (f not invertible).
|
||||
* tmp[] must have 16-bit alignment.
|
||||
*/
|
||||
int PQCLEAN_FALCON512_CLEAN_complete_private(int8_t *G,
|
||||
const int8_t *f, const int8_t *g, const int8_t *F,
|
||||
unsigned logn, uint8_t *tmp);
|
||||
|
||||
/*
|
||||
* Test whether a given polynomial is invertible modulo phi and q.
|
||||
* Polynomial coefficients are small integers.
|
||||
*
|
||||
* tmp[] must have 16-bit alignment.
|
||||
*/
|
||||
int PQCLEAN_FALCON512_CLEAN_is_invertible(
|
||||
const int16_t *s2, unsigned logn, uint8_t *tmp);
|
||||
|
||||
/*
|
||||
* Count the number of elements of value zero in the NTT representation
|
||||
* of the given polynomial: this is the number of primitive 2n-th roots
|
||||
* of unity (modulo q = 12289) that are roots of the provided polynomial
|
||||
* (taken modulo q).
|
||||
*
|
||||
* tmp[] must have 16-bit alignment.
|
||||
*/
|
||||
int PQCLEAN_FALCON512_CLEAN_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp);
|
||||
|
||||
/*
|
||||
* Internal signature verification with public key recovery:
|
||||
* h[] receives the public key (NOT in NTT/Montgomery format)
|
||||
* c0[] contains the hashed nonce+message
|
||||
* s1[] is the first signature half
|
||||
* s2[] is the second signature half
|
||||
* logn is the degree log
|
||||
* tmp[] temporary, must have at least 2*2^logn bytes
|
||||
* Returned value is 1 on success, 0 on error. Success is returned if
|
||||
* the signature is a short enough vector; in that case, the public
|
||||
* key has been written to h[]. However, the caller must still
|
||||
* verify that h[] is the correct value (e.g. with regards to a known
|
||||
* hash of the public key).
|
||||
*
|
||||
* h[] may not overlap with any of the other arrays.
|
||||
*
|
||||
* tmp[] must have 16-bit alignment.
|
||||
*/
|
||||
int PQCLEAN_FALCON512_CLEAN_verify_recover(uint16_t *h,
|
||||
const uint16_t *c0, const int16_t *s1, const int16_t *s2,
|
||||
unsigned logn, uint8_t *tmp);
|
||||
|
||||
/* ==================================================================== */
|
||||
/*
|
||||
* Implementation of floating-point real numbers (fpr.h, fpr.c).
|
||||
@ -358,7 +480,7 @@ typedef struct {
|
||||
* Instantiate a PRNG. That PRNG will feed over the provided SHAKE256
|
||||
* context (in "flipped" state) to obtain its initial state.
|
||||
*/
|
||||
void PQCLEAN_FALCON512_CLEAN_prng_init(prng *p, shake256_context *src);
|
||||
void PQCLEAN_FALCON512_CLEAN_prng_init(prng *p, inner_shake256_context *src);
|
||||
|
||||
/*
|
||||
* Refill the PRNG buffer. This is normally invoked automatically, and
|
||||
@ -586,6 +708,9 @@ void PQCLEAN_FALCON512_CLEAN_poly_merge_fft(fpr *f,
|
||||
|
||||
/*
|
||||
* Required sizes of the temporary buffer (in bytes).
|
||||
*
|
||||
* This size is 28*2^logn bytes, except for degrees 2 and 4 (logn = 1
|
||||
* or 2) where it is slightly greater.
|
||||
*/
|
||||
#define FALCON_KEYGEN_TEMP_1 136
|
||||
#define FALCON_KEYGEN_TEMP_2 272
|
||||
@ -608,8 +733,11 @@ void PQCLEAN_FALCON512_CLEAN_poly_merge_fft(fpr *f,
|
||||
* public key is written in h. Either or both of G and h may be NULL,
|
||||
* in which case the corresponding element is not returned (they can
|
||||
* be recomputed from f, g and F).
|
||||
*
|
||||
* tmp[] must have 64-bit alignment.
|
||||
* This function uses floating-point rounding (see set_fpu_cw()).
|
||||
*/
|
||||
void PQCLEAN_FALCON512_CLEAN_keygen(shake256_context *rng,
|
||||
void PQCLEAN_FALCON512_CLEAN_keygen(inner_shake256_context *rng,
|
||||
int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h,
|
||||
unsigned logn, uint8_t *tmp);
|
||||
|
||||
@ -624,6 +752,9 @@ void PQCLEAN_FALCON512_CLEAN_keygen(shake256_context *rng,
|
||||
* a total of (8*logn+40)*2^logn bytes.
|
||||
*
|
||||
* The tmp[] array must have room for at least 48*2^logn bytes.
|
||||
*
|
||||
* tmp[] must have 64-bit alignment.
|
||||
* This function uses floating-point rounding (see set_fpu_cw()).
|
||||
*/
|
||||
void PQCLEAN_FALCON512_CLEAN_expand_privkey(fpr *expanded_key,
|
||||
const int8_t *f, const int8_t *g, const int8_t *F, const int8_t *G,
|
||||
@ -636,9 +767,15 @@ void PQCLEAN_FALCON512_CLEAN_expand_privkey(fpr *expanded_key,
|
||||
*
|
||||
* The sig[] and hm[] buffers may overlap.
|
||||
*
|
||||
* On successful output, the start of the tmp[] buffer contains the s1
|
||||
* vector (as int16_t elements).
|
||||
*
|
||||
* The minimal size (in bytes) of tmp[] is 48*2^logn bytes.
|
||||
*
|
||||
* tmp[] must have 64-bit alignment.
|
||||
* This function uses floating-point rounding (see set_fpu_cw()).
|
||||
*/
|
||||
void PQCLEAN_FALCON512_CLEAN_sign_tree(int16_t *sig, shake256_context *rng,
|
||||
void PQCLEAN_FALCON512_CLEAN_sign_tree(int16_t *sig, inner_shake256_context *rng,
|
||||
const fpr *expanded_key,
|
||||
const uint16_t *hm, unsigned logn, uint8_t *tmp);
|
||||
|
||||
@ -651,13 +788,47 @@ void PQCLEAN_FALCON512_CLEAN_sign_tree(int16_t *sig, shake256_context *rng,
|
||||
*
|
||||
* The sig[] and hm[] buffers may overlap.
|
||||
*
|
||||
* On successful output, the start of the tmp[] buffer contains the s1
|
||||
* vector (as int16_t elements).
|
||||
*
|
||||
* The minimal size (in bytes) of tmp[] is 72*2^logn bytes.
|
||||
*
|
||||
* tmp[] must have 64-bit alignment.
|
||||
* This function uses floating-point rounding (see set_fpu_cw()).
|
||||
*/
|
||||
void PQCLEAN_FALCON512_CLEAN_sign_dyn(int16_t *sig, shake256_context *rng,
|
||||
void PQCLEAN_FALCON512_CLEAN_sign_dyn(int16_t *sig, inner_shake256_context *rng,
|
||||
const int8_t *f, const int8_t *g,
|
||||
const int8_t *F, const int8_t *G,
|
||||
const uint16_t *hm, unsigned logn, uint8_t *tmp);
|
||||
|
||||
/*
|
||||
* Internal sampler engine. Exported for tests.
|
||||
*
|
||||
* sampler_context wraps around a source of random numbers (PRNG) and
|
||||
* the sigma_min value (nominally dependent on the degree).
|
||||
*
|
||||
* sampler() takes as parameters:
|
||||
* ctx pointer to the sampler_context structure
|
||||
* mu center for the distribution
|
||||
* isigma inverse of the distribution standard deviation
|
||||
* It returns an integer sampled along the Gaussian distribution centered
|
||||
* on mu and of standard deviation sigma = 1/isigma.
|
||||
*
|
||||
* gaussian0_sampler() takes as parameter a pointer to a PRNG, and
|
||||
* returns an integer sampled along a half-Gaussian with standard
|
||||
* deviation sigma0 = 1.8205 (center is 0, returned value is
|
||||
* nonnegative).
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
prng p;
|
||||
fpr sigma_min;
|
||||
} sampler_context;
|
||||
|
||||
int PQCLEAN_FALCON512_CLEAN_sampler(void *ctx, fpr mu, fpr isigma);
|
||||
|
||||
int PQCLEAN_FALCON512_CLEAN_gaussian0_sampler(prng *p);
|
||||
|
||||
/* ==================================================================== */
|
||||
|
||||
#endif
|
||||
|
@ -2171,6 +2171,9 @@ poly_sub_scaled_ntt(uint32_t *F, size_t Flen, size_t Fstride,
|
||||
|
||||
/* ==================================================================== */
|
||||
|
||||
|
||||
#define RNG_CONTEXT inner_shake256_context
|
||||
|
||||
/*
|
||||
* Get a random 8-byte integer from a SHAKE-based RNG. This function
|
||||
* ensures consistent interpretation of the SHAKE output so that
|
||||
@ -2178,14 +2181,14 @@ poly_sub_scaled_ntt(uint32_t *F, size_t Flen, size_t Fstride,
|
||||
* a known seed is used.
|
||||
*/
|
||||
static inline uint64_t
|
||||
get_rng_u64(shake256_context *rng) {
|
||||
get_rng_u64(inner_shake256_context *rng) {
|
||||
/*
|
||||
* We enforce little-endian representation.
|
||||
*/
|
||||
|
||||
uint8_t tmp[8];
|
||||
|
||||
shake256_extract(rng, tmp, sizeof tmp);
|
||||
inner_shake256_extract(rng, tmp, sizeof tmp);
|
||||
return (uint64_t)tmp[0]
|
||||
| ((uint64_t)tmp[1] << 8)
|
||||
| ((uint64_t)tmp[2] << 16)
|
||||
@ -2196,6 +2199,7 @@ get_rng_u64(shake256_context *rng) {
|
||||
| ((uint64_t)tmp[7] << 56);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Table below incarnates a discrete Gaussian distribution:
|
||||
* D(x) = exp(-(x^2)/(2*sigma^2))
|
||||
@ -2227,7 +2231,7 @@ static const uint64_t gauss_1024_12289[] = {
|
||||
* together for lower dimensions.
|
||||
*/
|
||||
static int
|
||||
mkgauss(shake256_context *rng, unsigned logn) {
|
||||
mkgauss(RNG_CONTEXT *rng, unsigned logn) {
|
||||
unsigned u, g;
|
||||
int val;
|
||||
|
||||
@ -3156,6 +3160,7 @@ solve_NTRU_intermediate(unsigned logn_top,
|
||||
fpr xv;
|
||||
|
||||
xv = fpr_mul(rt2[u], pdc);
|
||||
|
||||
/*
|
||||
* Sometimes the values can be out-of-bounds if
|
||||
* the algorithm fails; we must not call
|
||||
@ -4006,7 +4011,7 @@ solve_NTRU(unsigned logn, int8_t *F, int8_t *G,
|
||||
* also makes sure that the resultant of the polynomial with phi is odd.
|
||||
*/
|
||||
static void
|
||||
poly_small_mkgauss(shake256_context *rng, int8_t *f, unsigned logn) {
|
||||
poly_small_mkgauss(RNG_CONTEXT *rng, int8_t *f, unsigned logn) {
|
||||
size_t n, u;
|
||||
unsigned mod2;
|
||||
|
||||
@ -4046,7 +4051,7 @@ restart:
|
||||
|
||||
/* see falcon.h */
|
||||
void
|
||||
PQCLEAN_FALCON512_CLEAN_keygen(shake256_context *rng,
|
||||
PQCLEAN_FALCON512_CLEAN_keygen(inner_shake256_context *rng,
|
||||
int8_t *f, int8_t *g, int8_t *F, int8_t *G, uint16_t *h,
|
||||
unsigned logn, uint8_t *tmp) {
|
||||
/*
|
||||
@ -4070,8 +4075,10 @@ PQCLEAN_FALCON512_CLEAN_keygen(shake256_context *rng,
|
||||
*/
|
||||
size_t n, u;
|
||||
uint16_t *h2, *tmp2;
|
||||
RNG_CONTEXT *rc;
|
||||
|
||||
n = MKN(logn);
|
||||
rc = rng;
|
||||
|
||||
/*
|
||||
* We need to generate f and g randomly, until we find values
|
||||
@ -4104,8 +4111,8 @@ PQCLEAN_FALCON512_CLEAN_keygen(shake256_context *rng,
|
||||
* (i.e. the resultant of the polynomial with phi
|
||||
* will be odd).
|
||||
*/
|
||||
poly_small_mkgauss(rng, f, logn);
|
||||
poly_small_mkgauss(rng, g, logn);
|
||||
poly_small_mkgauss(rc, f, logn);
|
||||
poly_small_mkgauss(rc, g, logn);
|
||||
|
||||
/*
|
||||
* Verify that all coefficients are within the bounds
|
||||
|
@ -51,16 +51,16 @@ PQCLEAN_FALCON512_CLEAN_crypto_sign_keypair(
|
||||
int8_t f[512], g[512], F[512];
|
||||
uint16_t h[512];
|
||||
unsigned char seed[48];
|
||||
shake256_context rng;
|
||||
inner_shake256_context rng;
|
||||
size_t u, v;
|
||||
|
||||
/*
|
||||
* Generate key pair.
|
||||
*/
|
||||
randombytes(seed, sizeof seed);
|
||||
shake256_init(&rng);
|
||||
shake256_inject(&rng, seed, sizeof seed);
|
||||
shake256_flip(&rng);
|
||||
inner_shake256_init(&rng);
|
||||
inner_shake256_inject(&rng, seed, sizeof seed);
|
||||
inner_shake256_flip(&rng);
|
||||
PQCLEAN_FALCON512_CLEAN_keygen(&rng, f, g, F, NULL, h, 9, tmp.b);
|
||||
|
||||
/*
|
||||
@ -135,7 +135,7 @@ do_sign(uint8_t *nonce, uint8_t *sigbuf, size_t *sigbuflen,
|
||||
uint16_t hm[512];
|
||||
} r;
|
||||
unsigned char seed[48];
|
||||
shake256_context sc;
|
||||
inner_shake256_context sc;
|
||||
size_t u, v;
|
||||
|
||||
/*
|
||||
@ -181,19 +181,19 @@ do_sign(uint8_t *nonce, uint8_t *sigbuf, size_t *sigbuflen,
|
||||
/*
|
||||
* Hash message nonce + message into a vector.
|
||||
*/
|
||||
shake256_init(&sc);
|
||||
shake256_inject(&sc, nonce, NONCELEN);
|
||||
shake256_inject(&sc, m, mlen);
|
||||
shake256_flip(&sc);
|
||||
PQCLEAN_FALCON512_CLEAN_hash_to_point(&sc, r.hm, 9, tmp.b);
|
||||
inner_shake256_init(&sc);
|
||||
inner_shake256_inject(&sc, nonce, NONCELEN);
|
||||
inner_shake256_inject(&sc, m, mlen);
|
||||
inner_shake256_flip(&sc);
|
||||
PQCLEAN_FALCON512_CLEAN_hash_to_point_ct(&sc, r.hm, 9, tmp.b);
|
||||
|
||||
/*
|
||||
* Initialize a RNG.
|
||||
*/
|
||||
randombytes(seed, sizeof seed);
|
||||
shake256_init(&sc);
|
||||
shake256_inject(&sc, seed, sizeof seed);
|
||||
shake256_flip(&sc);
|
||||
inner_shake256_init(&sc);
|
||||
inner_shake256_inject(&sc, seed, sizeof seed);
|
||||
inner_shake256_flip(&sc);
|
||||
|
||||
/*
|
||||
* Compute and return the signature. This loops until a signature
|
||||
@ -225,7 +225,7 @@ do_verify(
|
||||
} tmp;
|
||||
uint16_t h[512], hm[512];
|
||||
int16_t sig[512];
|
||||
shake256_context sc;
|
||||
inner_shake256_context sc;
|
||||
|
||||
/*
|
||||
* Decode public key.
|
||||
@ -253,11 +253,11 @@ do_verify(
|
||||
/*
|
||||
* Hash nonce + message into a vector.
|
||||
*/
|
||||
shake256_init(&sc);
|
||||
shake256_inject(&sc, nonce, NONCELEN);
|
||||
shake256_inject(&sc, m, mlen);
|
||||
shake256_flip(&sc);
|
||||
PQCLEAN_FALCON512_CLEAN_hash_to_point(&sc, hm, 9, tmp.b);
|
||||
inner_shake256_init(&sc);
|
||||
inner_shake256_inject(&sc, nonce, NONCELEN);
|
||||
inner_shake256_inject(&sc, m, mlen);
|
||||
inner_shake256_flip(&sc);
|
||||
PQCLEAN_FALCON512_CLEAN_hash_to_point_ct(&sc, hm, 9, tmp.b);
|
||||
|
||||
/*
|
||||
* Verify signature.
|
||||
|
@ -36,7 +36,7 @@
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
PQCLEAN_FALCON512_CLEAN_prng_init(prng *p, shake256_context *src) {
|
||||
PQCLEAN_FALCON512_CLEAN_prng_init(prng *p, inner_shake256_context *src) {
|
||||
/*
|
||||
* To ensure reproducibility for a given seed, we
|
||||
* must enforce little-endian interpretation of
|
||||
@ -46,7 +46,7 @@ PQCLEAN_FALCON512_CLEAN_prng_init(prng *p, shake256_context *src) {
|
||||
uint64_t th, tl;
|
||||
int i;
|
||||
|
||||
shake256_extract(src, tmp, 56);
|
||||
inner_shake256_extract(src, tmp, 56);
|
||||
for (i = 0; i < 14; i ++) {
|
||||
uint32_t w;
|
||||
|
||||
|
@ -417,8 +417,170 @@ ffSampling_fft(samplerZ samp, void *samp_ctx,
|
||||
size_t n, hn;
|
||||
const fpr *tree0, *tree1;
|
||||
|
||||
n = (size_t)1 << logn;
|
||||
if (n == 1) {
|
||||
/*
|
||||
* When logn == 2, we inline the last two recursion levels.
|
||||
*/
|
||||
if (logn == 2) {
|
||||
fpr x0, x1, y0, y1, w0, w1, w2, w3, sigma;
|
||||
fpr a_re, a_im, b_re, b_im, c_re, c_im;
|
||||
|
||||
tree0 = tree + 4;
|
||||
tree1 = tree + 8;
|
||||
|
||||
/*
|
||||
* We split t1 into w*, then do the recursive invocation,
|
||||
* with output in w*. We finally merge back into z1.
|
||||
*/
|
||||
a_re = t1[0];
|
||||
a_im = t1[2];
|
||||
b_re = t1[1];
|
||||
b_im = t1[3];
|
||||
c_re = fpr_add(a_re, b_re);
|
||||
c_im = fpr_add(a_im, b_im);
|
||||
w0 = fpr_half(c_re);
|
||||
w1 = fpr_half(c_im);
|
||||
c_re = fpr_sub(a_re, b_re);
|
||||
c_im = fpr_sub(a_im, b_im);
|
||||
w2 = fpr_mul(fpr_add(c_re, c_im), fpr_invsqrt8);
|
||||
w3 = fpr_mul(fpr_sub(c_im, c_re), fpr_invsqrt8);
|
||||
|
||||
x0 = w2;
|
||||
x1 = w3;
|
||||
sigma = tree1[3];
|
||||
w2 = fpr_of(samp(samp_ctx, x0, sigma));
|
||||
w3 = fpr_of(samp(samp_ctx, x1, sigma));
|
||||
a_re = fpr_sub(x0, w2);
|
||||
a_im = fpr_sub(x1, w3);
|
||||
b_re = tree1[0];
|
||||
b_im = tree1[1];
|
||||
c_re = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im));
|
||||
c_im = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re));
|
||||
x0 = fpr_add(c_re, w0);
|
||||
x1 = fpr_add(c_im, w1);
|
||||
sigma = tree1[2];
|
||||
w0 = fpr_of(samp(samp_ctx, x0, sigma));
|
||||
w1 = fpr_of(samp(samp_ctx, x1, sigma));
|
||||
|
||||
a_re = w0;
|
||||
a_im = w1;
|
||||
b_re = w2;
|
||||
b_im = w3;
|
||||
c_re = fpr_mul(fpr_sub(b_re, b_im), fpr_invsqrt2);
|
||||
c_im = fpr_mul(fpr_add(b_re, b_im), fpr_invsqrt2);
|
||||
z1[0] = w0 = fpr_add(a_re, c_re);
|
||||
z1[2] = w2 = fpr_add(a_im, c_im);
|
||||
z1[1] = w1 = fpr_sub(a_re, c_re);
|
||||
z1[3] = w3 = fpr_sub(a_im, c_im);
|
||||
|
||||
/*
|
||||
* Compute tb0 = t0 + (t1 - z1) * L. Value tb0 ends up in w*.
|
||||
*/
|
||||
w0 = fpr_sub(t1[0], w0);
|
||||
w1 = fpr_sub(t1[1], w1);
|
||||
w2 = fpr_sub(t1[2], w2);
|
||||
w3 = fpr_sub(t1[3], w3);
|
||||
|
||||
a_re = w0;
|
||||
a_im = w2;
|
||||
b_re = tree[0];
|
||||
b_im = tree[2];
|
||||
w0 = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im));
|
||||
w2 = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re));
|
||||
a_re = w1;
|
||||
a_im = w3;
|
||||
b_re = tree[1];
|
||||
b_im = tree[3];
|
||||
w1 = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im));
|
||||
w3 = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re));
|
||||
|
||||
w0 = fpr_add(w0, t0[0]);
|
||||
w1 = fpr_add(w1, t0[1]);
|
||||
w2 = fpr_add(w2, t0[2]);
|
||||
w3 = fpr_add(w3, t0[3]);
|
||||
|
||||
/*
|
||||
* Second recursive invocation.
|
||||
*/
|
||||
a_re = w0;
|
||||
a_im = w2;
|
||||
b_re = w1;
|
||||
b_im = w3;
|
||||
c_re = fpr_add(a_re, b_re);
|
||||
c_im = fpr_add(a_im, b_im);
|
||||
w0 = fpr_half(c_re);
|
||||
w1 = fpr_half(c_im);
|
||||
c_re = fpr_sub(a_re, b_re);
|
||||
c_im = fpr_sub(a_im, b_im);
|
||||
w2 = fpr_mul(fpr_add(c_re, c_im), fpr_invsqrt8);
|
||||
w3 = fpr_mul(fpr_sub(c_im, c_re), fpr_invsqrt8);
|
||||
|
||||
x0 = w2;
|
||||
x1 = w3;
|
||||
sigma = tree0[3];
|
||||
w2 = y0 = fpr_of(samp(samp_ctx, x0, sigma));
|
||||
w3 = y1 = fpr_of(samp(samp_ctx, x1, sigma));
|
||||
a_re = fpr_sub(x0, y0);
|
||||
a_im = fpr_sub(x1, y1);
|
||||
b_re = tree0[0];
|
||||
b_im = tree0[1];
|
||||
c_re = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im));
|
||||
c_im = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re));
|
||||
x0 = fpr_add(c_re, w0);
|
||||
x1 = fpr_add(c_im, w1);
|
||||
sigma = tree0[2];
|
||||
w0 = fpr_of(samp(samp_ctx, x0, sigma));
|
||||
w1 = fpr_of(samp(samp_ctx, x1, sigma));
|
||||
|
||||
a_re = w0;
|
||||
a_im = w1;
|
||||
b_re = w2;
|
||||
b_im = w3;
|
||||
c_re = fpr_mul(fpr_sub(b_re, b_im), fpr_invsqrt2);
|
||||
c_im = fpr_mul(fpr_add(b_re, b_im), fpr_invsqrt2);
|
||||
z0[0] = fpr_add(a_re, c_re);
|
||||
z0[2] = fpr_add(a_im, c_im);
|
||||
z0[1] = fpr_sub(a_re, c_re);
|
||||
z0[3] = fpr_sub(a_im, c_im);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Case logn == 1 is reachable only when using Falcon-2 (the
|
||||
* smallest size for which Falcon is mathematically defined, but
|
||||
* of course way too insecure to be of any use).
|
||||
*/
|
||||
if (logn == 1) {
|
||||
fpr x0, x1, y0, y1, sigma;
|
||||
fpr a_re, a_im, b_re, b_im, c_re, c_im;
|
||||
|
||||
x0 = t1[0];
|
||||
x1 = t1[1];
|
||||
sigma = tree[3];
|
||||
z1[0] = y0 = fpr_of(samp(samp_ctx, x0, sigma));
|
||||
z1[1] = y1 = fpr_of(samp(samp_ctx, x1, sigma));
|
||||
a_re = fpr_sub(x0, y0);
|
||||
a_im = fpr_sub(x1, y1);
|
||||
b_re = tree[0];
|
||||
b_im = tree[1];
|
||||
c_re = fpr_sub(fpr_mul(a_re, b_re), fpr_mul(a_im, b_im));
|
||||
c_im = fpr_add(fpr_mul(a_re, b_im), fpr_mul(a_im, b_re));
|
||||
x0 = fpr_add(c_re, t0[0]);
|
||||
x1 = fpr_add(c_im, t0[1]);
|
||||
sigma = tree[2];
|
||||
z0[0] = fpr_of(samp(samp_ctx, x0, sigma));
|
||||
z0[1] = fpr_of(samp(samp_ctx, x1, sigma));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Normal end of recursion is for logn == 0. Since the last
|
||||
* steps of the recursions were inlined in the blocks above
|
||||
* (when logn == 1 or 2), this case is not reachable, and is
|
||||
* retained here only for documentation purposes.
|
||||
|
||||
if (logn == 0) {
|
||||
fpr x0, x1, sigma;
|
||||
|
||||
x0 = t0[0];
|
||||
@ -429,6 +591,13 @@ ffSampling_fft(samplerZ samp, void *samp_ctx,
|
||||
return;
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
/*
|
||||
* General recursive case (logn >= 3).
|
||||
*/
|
||||
|
||||
n = (size_t)1 << logn;
|
||||
hn = n >> 1;
|
||||
tree0 = tree + n;
|
||||
tree1 = tree + n + ffLDL_treesize(logn - 1);
|
||||
@ -480,7 +649,7 @@ do_sign_tree(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
const fpr *b00, *b01, *b10, *b11, *tree;
|
||||
fpr ni;
|
||||
uint32_t sqn, ng;
|
||||
int16_t *s2tmp;
|
||||
int16_t *s1tmp, *s2tmp;
|
||||
|
||||
n = MKN(logn);
|
||||
t0 = tmp;
|
||||
@ -542,6 +711,7 @@ do_sign_tree(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
/*
|
||||
* Compute the signature.
|
||||
*/
|
||||
s1tmp = (int16_t *)tx;
|
||||
sqn = 0;
|
||||
ng = 0;
|
||||
for (u = 0; u < n; u ++) {
|
||||
@ -550,6 +720,7 @@ do_sign_tree(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
z = (int32_t)hm[u] - (int32_t)fpr_rint(t0[u]);
|
||||
sqn += (uint32_t)(z * z);
|
||||
ng |= sqn;
|
||||
s1tmp[u] = (int16_t)z;
|
||||
}
|
||||
sqn |= -(ng >> 31);
|
||||
|
||||
@ -568,6 +739,7 @@ do_sign_tree(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
}
|
||||
if (PQCLEAN_FALCON512_CLEAN_is_short_half(sqn, s2tmp, logn)) {
|
||||
memcpy(s2, s2tmp, n * sizeof * s2);
|
||||
memcpy(tmp, s1tmp, n * sizeof * s1tmp);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
@ -592,7 +764,7 @@ do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
fpr *b00, *b01, *b10, *b11, *g00, *g01, *g11;
|
||||
fpr ni;
|
||||
uint32_t sqn, ng;
|
||||
int16_t *s2tmp;
|
||||
int16_t *s1tmp, *s2tmp;
|
||||
|
||||
n = MKN(logn);
|
||||
|
||||
@ -745,6 +917,7 @@ do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
PQCLEAN_FALCON512_CLEAN_iFFT(t0, logn);
|
||||
PQCLEAN_FALCON512_CLEAN_iFFT(t1, logn);
|
||||
|
||||
s1tmp = (int16_t *)tx;
|
||||
sqn = 0;
|
||||
ng = 0;
|
||||
for (u = 0; u < n; u ++) {
|
||||
@ -753,6 +926,7 @@ do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
z = (int32_t)hm[u] - (int32_t)fpr_rint(t0[u]);
|
||||
sqn += (uint32_t)(z * z);
|
||||
ng |= sqn;
|
||||
s1tmp[u] = (int16_t)z;
|
||||
}
|
||||
sqn |= -(ng >> 31);
|
||||
|
||||
@ -771,6 +945,7 @@ do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
}
|
||||
if (PQCLEAN_FALCON512_CLEAN_is_short_half(sqn, s2tmp, logn)) {
|
||||
memcpy(s2, s2tmp, n * sizeof * s2);
|
||||
memcpy(tmp, s1tmp, n * sizeof * s1tmp);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
@ -780,29 +955,28 @@ do_sign_dyn(samplerZ samp, void *samp_ctx, int16_t *s2,
|
||||
* Sample an integer value along a half-gaussian distribution centered
|
||||
* on zero and standard deviation 1.8205, with a precision of 72 bits.
|
||||
*/
|
||||
static int
|
||||
gaussian0_sampler(prng *p) {
|
||||
int
|
||||
PQCLEAN_FALCON512_CLEAN_gaussian0_sampler(prng *p) {
|
||||
|
||||
static const uint32_t dist[] = {
|
||||
6031371U, 13708371U, 13035518U,
|
||||
5186761U, 1487980U, 12270720U,
|
||||
3298653U, 4688887U, 5511555U,
|
||||
1551448U, 9247616U, 9467675U,
|
||||
539632U, 14076116U, 5909365U,
|
||||
138809U, 10836485U, 13263376U,
|
||||
26405U, 15335617U, 16601723U,
|
||||
3714U, 14514117U, 13240074U,
|
||||
386U, 8324059U, 3276722U,
|
||||
29U, 12376792U, 7821247U,
|
||||
1U, 11611789U, 3398254U,
|
||||
0U, 1194629U, 4532444U,
|
||||
0U, 37177U, 2973575U,
|
||||
0U, 855U, 10369757U,
|
||||
0U, 14U, 9441597U,
|
||||
0U, 0U, 3075302U,
|
||||
0U, 0U, 28626U,
|
||||
0U, 0U, 197U,
|
||||
0U, 0U, 1U
|
||||
10745844u, 3068844u, 3741698u,
|
||||
5559083u, 1580863u, 8248194u,
|
||||
2260429u, 13669192u, 2736639u,
|
||||
708981u, 4421575u, 10046180u,
|
||||
169348u, 7122675u, 4136815u,
|
||||
30538u, 13063405u, 7650655u,
|
||||
4132u, 14505003u, 7826148u,
|
||||
417u, 16768101u, 11363290u,
|
||||
31u, 8444042u, 8086568u,
|
||||
1u, 12844466u, 265321u,
|
||||
0u, 1232676u, 13644283u,
|
||||
0u, 38047u, 9111839u,
|
||||
0u, 870u, 6138264u,
|
||||
0u, 14u, 12545723u,
|
||||
0u, 0u, 3104126u,
|
||||
0u, 0u, 28824u,
|
||||
0u, 0u, 198u,
|
||||
0u, 0u, 1u
|
||||
};
|
||||
|
||||
uint32_t v0, v1, v2, hi;
|
||||
@ -843,7 +1017,7 @@ gaussian0_sampler(prng *p) {
|
||||
* Sample a bit with probability exp(-x) for some x >= 0.
|
||||
*/
|
||||
static int
|
||||
BerExp(prng *p, fpr x) {
|
||||
BerExp(prng *p, fpr x, fpr ccs) {
|
||||
int s, i;
|
||||
fpr r;
|
||||
uint32_t sw, w;
|
||||
@ -880,7 +1054,7 @@ BerExp(prng *p, fpr x) {
|
||||
* case). The bias is negligible since fpr_expm_p63() only computes
|
||||
* with 51 bits of precision or so.
|
||||
*/
|
||||
z = ((fpr_expm_p63(r) << 1) - 1) >> s;
|
||||
z = ((fpr_expm_p63(r, ccs) << 1) - 1) >> s;
|
||||
|
||||
/*
|
||||
* Sample a bit with probability exp(-x). Since x = s*log(2) + r,
|
||||
@ -896,11 +1070,6 @@ BerExp(prng *p, fpr x) {
|
||||
return (int)(w >> 31);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
prng p;
|
||||
fpr sigma_min;
|
||||
} sampler_context;
|
||||
|
||||
/*
|
||||
* The sampler produces a random integer that follows a discrete Gaussian
|
||||
* distribution, centered on mu, and with standard deviation sigma. The
|
||||
@ -909,8 +1078,8 @@ typedef struct {
|
||||
* The value of sigma MUST lie between 1 and 2 (i.e. isigma lies between
|
||||
* 0.5 and 1); in Falcon, sigma should always be between 1.2 and 1.9.
|
||||
*/
|
||||
static int
|
||||
sampler(void *ctx, fpr mu, fpr isigma) {
|
||||
int
|
||||
PQCLEAN_FALCON512_CLEAN_sampler(void *ctx, fpr mu, fpr isigma) {
|
||||
sampler_context *spc;
|
||||
int s;
|
||||
fpr r, dss, ccs;
|
||||
@ -952,7 +1121,7 @@ sampler(void *ctx, fpr mu, fpr isigma) {
|
||||
* - b = 0: z <= 0 and sampled against a Gaussian
|
||||
* centered on 0.
|
||||
*/
|
||||
z0 = gaussian0_sampler(&spc->p);
|
||||
z0 = PQCLEAN_FALCON512_CLEAN_gaussian0_sampler(&spc->p);
|
||||
b = prng_get_u8(&spc->p) & 1;
|
||||
z = b + ((b << 1) - 1) * z0;
|
||||
|
||||
@ -983,8 +1152,7 @@ sampler(void *ctx, fpr mu, fpr isigma) {
|
||||
*/
|
||||
x = fpr_mul(fpr_sqr(fpr_sub(fpr_of(z), r)), dss);
|
||||
x = fpr_sub(x, fpr_mul(fpr_of(z0 * z0), fpr_inv_2sqrsigma0));
|
||||
x = fpr_mul(x, ccs);
|
||||
if (BerExp(&spc->p, x)) {
|
||||
if (BerExp(&spc->p, x, ccs)) {
|
||||
/*
|
||||
* Rejection sampling was centered on r, but the
|
||||
* actual center is mu = s + r.
|
||||
@ -996,7 +1164,7 @@ sampler(void *ctx, fpr mu, fpr isigma) {
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
PQCLEAN_FALCON512_CLEAN_sign_tree(int16_t *sig, shake256_context *rng,
|
||||
PQCLEAN_FALCON512_CLEAN_sign_tree(int16_t *sig, inner_shake256_context *rng,
|
||||
const fpr *expanded_key,
|
||||
const uint16_t *hm, unsigned logn, uint8_t *tmp) {
|
||||
fpr *ftmp;
|
||||
@ -1025,7 +1193,7 @@ PQCLEAN_FALCON512_CLEAN_sign_tree(int16_t *sig, shake256_context *rng,
|
||||
? fpr_sigma_min_10
|
||||
: fpr_sigma_min_9;
|
||||
PQCLEAN_FALCON512_CLEAN_prng_init(&spc.p, rng);
|
||||
samp = sampler;
|
||||
samp = PQCLEAN_FALCON512_CLEAN_sampler;
|
||||
samp_ctx = &spc;
|
||||
|
||||
/*
|
||||
@ -1040,7 +1208,7 @@ PQCLEAN_FALCON512_CLEAN_sign_tree(int16_t *sig, shake256_context *rng,
|
||||
|
||||
/* see inner.h */
|
||||
void
|
||||
PQCLEAN_FALCON512_CLEAN_sign_dyn(int16_t *sig, shake256_context *rng,
|
||||
PQCLEAN_FALCON512_CLEAN_sign_dyn(int16_t *sig, inner_shake256_context *rng,
|
||||
const int8_t *f, const int8_t *g,
|
||||
const int8_t *F, const int8_t *G,
|
||||
const uint16_t *hm, unsigned logn, uint8_t *tmp) {
|
||||
@ -1070,7 +1238,7 @@ PQCLEAN_FALCON512_CLEAN_sign_dyn(int16_t *sig, shake256_context *rng,
|
||||
? fpr_sigma_min_10
|
||||
: fpr_sigma_min_9;
|
||||
PQCLEAN_FALCON512_CLEAN_prng_init(&spc.p, rng);
|
||||
samp = sampler;
|
||||
samp = PQCLEAN_FALCON512_CLEAN_sampler;
|
||||
samp_ctx = &spc;
|
||||
|
||||
/*
|
||||
|
@ -649,7 +649,7 @@ PQCLEAN_FALCON512_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2,
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute s1 = s2*h - c0 mod phi mod q (in tt[]).
|
||||
* Compute -s1 = s2*h - c0 mod phi mod q (in tt[]).
|
||||
*/
|
||||
mq_NTT(tt, logn);
|
||||
mq_poly_montymul_ntt(tt, h, logn);
|
||||
@ -657,7 +657,7 @@ PQCLEAN_FALCON512_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2,
|
||||
mq_poly_sub(tt, c0, logn);
|
||||
|
||||
/*
|
||||
* Normalize s1 elements into the [-q/2..q/2] range.
|
||||
* Normalize -s1 elements into the [-q/2..q/2] range.
|
||||
*/
|
||||
for (u = 0; u < n; u ++) {
|
||||
int32_t w;
|
||||
@ -668,7 +668,7 @@ PQCLEAN_FALCON512_CLEAN_verify_raw(const uint16_t *c0, const int16_t *s2,
|
||||
}
|
||||
|
||||
/*
|
||||
* Signature is valid if and only if the aggregate (s1,s2) vector
|
||||
* Signature is valid if and only if the aggregate (-s1,s2) vector
|
||||
* is short enough.
|
||||
*/
|
||||
return PQCLEAN_FALCON512_CLEAN_is_short((int16_t *)tt, s2, logn);
|
||||
@ -699,7 +699,7 @@ PQCLEAN_FALCON512_CLEAN_compute_public(uint16_t *h,
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* see internal.h */
|
||||
/* see inner.h */
|
||||
int
|
||||
PQCLEAN_FALCON512_CLEAN_complete_private(int8_t *G,
|
||||
const int8_t *f, const int8_t *g, const int8_t *F,
|
||||
@ -743,3 +743,110 @@ PQCLEAN_FALCON512_CLEAN_complete_private(int8_t *G,
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
int
|
||||
PQCLEAN_FALCON512_CLEAN_is_invertible(
|
||||
const int16_t *s2, unsigned logn, uint8_t *tmp) {
|
||||
size_t u, n;
|
||||
uint16_t *tt;
|
||||
uint32_t r;
|
||||
|
||||
n = (size_t)1 << logn;
|
||||
tt = (uint16_t *)tmp;
|
||||
for (u = 0; u < n; u ++) {
|
||||
uint32_t w;
|
||||
|
||||
w = (uint32_t)s2[u];
|
||||
w += Q & -(w >> 31);
|
||||
tt[u] = (uint16_t)w;
|
||||
}
|
||||
mq_NTT(tt, logn);
|
||||
r = 0;
|
||||
for (u = 0; u < n; u ++) {
|
||||
r |= (uint32_t)(tt[u] - 1);
|
||||
}
|
||||
return (int)(1u - (r >> 31));
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
int
|
||||
PQCLEAN_FALCON512_CLEAN_verify_recover(uint16_t *h,
|
||||
const uint16_t *c0, const int16_t *s1, const int16_t *s2,
|
||||
unsigned logn, uint8_t *tmp) {
|
||||
size_t u, n;
|
||||
uint16_t *tt;
|
||||
uint32_t r;
|
||||
|
||||
n = (size_t)1 << logn;
|
||||
|
||||
/*
|
||||
* Reduce elements of s1 and s2 modulo q; then write s2 into tt[]
|
||||
* and c0 - s1 into h[].
|
||||
*/
|
||||
tt = (uint16_t *)tmp;
|
||||
for (u = 0; u < n; u ++) {
|
||||
uint32_t w;
|
||||
|
||||
w = (uint32_t)s2[u];
|
||||
w += Q & -(w >> 31);
|
||||
tt[u] = (uint16_t)w;
|
||||
|
||||
w = (uint32_t)s1[u];
|
||||
w += Q & -(w >> 31);
|
||||
w = mq_sub(c0[u], w);
|
||||
h[u] = (uint16_t)w;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute h = (c0 - s1) / s2. If one of the coefficients of s2
|
||||
* is zero (in NTT representation) then the operation fails. We
|
||||
* keep that information into a flag so that we do not deviate
|
||||
* from strict constant-time processing; if all coefficients of
|
||||
* s2 are non-zero, then the high bit of r will be zero.
|
||||
*/
|
||||
mq_NTT(tt, logn);
|
||||
mq_NTT(h, logn);
|
||||
r = 0;
|
||||
for (u = 0; u < n; u ++) {
|
||||
r |= (uint32_t)(tt[u] - 1);
|
||||
h[u] = (uint16_t)mq_div_12289(h[u], tt[u]);
|
||||
}
|
||||
mq_iNTT(h, logn);
|
||||
|
||||
/*
|
||||
* Signature is acceptable if and only if it is short enough,
|
||||
* and s2 was invertible mod phi mod q. The caller must still
|
||||
* check that the rebuilt public key matches the expected
|
||||
* value (e.g. through a hash).
|
||||
*/
|
||||
r = ~r & (uint32_t) - PQCLEAN_FALCON512_CLEAN_is_short(s1, s2, logn);
|
||||
return (int)(r >> 31);
|
||||
}
|
||||
|
||||
/* see inner.h */
|
||||
int
|
||||
PQCLEAN_FALCON512_CLEAN_count_nttzero(const int16_t *sig, unsigned logn, uint8_t *tmp) {
|
||||
uint16_t *s2;
|
||||
size_t u, n;
|
||||
uint32_t r;
|
||||
|
||||
n = (size_t)1 << logn;
|
||||
s2 = (uint16_t *)tmp;
|
||||
for (u = 0; u < n; u ++) {
|
||||
uint32_t w;
|
||||
|
||||
w = (uint32_t)sig[u];
|
||||
w += Q & -(w >> 31);
|
||||
s2[u] = (uint16_t)w;
|
||||
}
|
||||
mq_NTT(s2, logn);
|
||||
r = 0;
|
||||
for (u = 0; u < n; u ++) {
|
||||
uint32_t w;
|
||||
|
||||
w = (uint32_t)s2[u] - 1u;
|
||||
r += (w >> 31);
|
||||
}
|
||||
return (int)r;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user