457 lines
15 KiB
C
457 lines
15 KiB
C
/*
|
|
* Floating-point operations.
|
|
*
|
|
* ==========================(LICENSE BEGIN)============================
|
|
*
|
|
* Copyright (c) 2017-2019 Falcon Project
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining
|
|
* a copy of this software and associated documentation files (the
|
|
* "Software"), to deal in the Software without restriction, including
|
|
* without limitation the rights to use, copy, modify, merge, publish,
|
|
* distribute, sublicense, and/or sell copies of the Software, and to
|
|
* permit persons to whom the Software is furnished to do so, subject to
|
|
* the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be
|
|
* included in all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*
|
|
* ===========================(LICENSE END)=============================
|
|
*
|
|
* @author Thomas Pornin <thomas.pornin@nccgroup.com>
|
|
*/
|
|
|
|
|
|
/* ====================================================================== */
|
|
/*
|
|
* Custom floating-point implementation with integer arithmetics. We
|
|
* use IEEE-754 "binary64" format, with some simplifications:
|
|
*
|
|
* - Top bit is s = 1 for negative, 0 for positive.
|
|
*
|
|
* - Exponent e uses the next 11 bits (bits 52 to 62, inclusive).
|
|
*
|
|
* - Mantissa m uses the 52 low bits.
|
|
*
|
|
* Encoded value is, in general: (-1)^s * 2^(e-1023) * (1 + m*2^(-52))
|
|
* i.e. the mantissa really is a 53-bit number (less than 2.0, but not
|
|
* less than 1.0), but the top bit (equal to 1 by definition) is omitted
|
|
* in the encoding.
|
|
*
|
|
* In IEEE-754, there are some special values:
|
|
*
|
|
* - If e = 2047, then the value is either an infinite (m = 0) or
|
|
* a NaN (m != 0).
|
|
*
|
|
* - If e = 0, then the value is either a zero (m = 0) or a subnormal,
|
|
* aka "denormalized number" (m != 0).
|
|
*
|
|
* Of these, we only need the zeros. The caller is responsible for not
|
|
* providing operands that would lead to infinites, NaNs or subnormals.
|
|
* If inputs are such that values go out of range, then indeterminate
|
|
* values are returned (it would still be deterministic, but no specific
|
|
* value may be relied upon).
|
|
*
|
|
* At the C level, the three parts are stored in a 64-bit unsigned
|
|
* word.
|
|
*
|
|
* One may note that a property of the IEEE-754 format is that order
|
|
* is preserved for positive values: if two positive floating-point
|
|
* values x and y are such that x < y, then their respective encodings
|
|
* as _signed_ 64-bit integers i64(x) and i64(y) will be such that
|
|
* i64(x) < i64(y). For negative values, order is reversed: if x < 0,
|
|
* y < 0, and x < y, then ia64(x) > ia64(y).
|
|
*
|
|
* IMPORTANT ASSUMPTIONS:
|
|
* ======================
|
|
*
|
|
* For proper computations, and constant-time behaviour, we assume the
|
|
* following:
|
|
*
|
|
* - 32x32->64 multiplication (unsigned) has an execution time that
|
|
* is independent of its operands. This is true of most modern
|
|
* x86 and ARM cores. Notable exceptions are the ARM Cortex M0, M0+
|
|
* and M3 (in the M0 and M0+, this is done in software, so it depends
|
|
* on that routine), and the PowerPC cores from the G3/G4 lines.
|
|
* For more info, see: https://www.bearssl.org/ctmul.html
|
|
*
|
|
* - Left-shifts and right-shifts of 32-bit values have an execution
|
|
* time which does not depend on the shifted value nor on the
|
|
* shift count. An historical exception is the Pentium IV, but most
|
|
* modern CPU have barrel shifters. Some small microcontrollers
|
|
* might have varying-time shifts (not the ARM Cortex M*, though).
|
|
*
|
|
* - Right-shift of a signed negative value performs a sign extension.
|
|
* As per the C standard, this operation returns an
|
|
* implementation-defined result (this is NOT an "undefined
|
|
* behaviour"). On most/all systems, an arithmetic shift is
|
|
* performed, because this is what makes most sense.
|
|
*/
|
|
|
|
/*
|
|
* Normally we should declare the 'fpr' type to be a struct or union
|
|
* around the internal 64-bit value; however, we want to use the
|
|
* direct 64-bit integer type to enable a lighter call convention on
|
|
* ARM platforms. This means that direct (invalid) use of operators
|
|
* such as '*' or '+' will not be caught by the compiler. We rely on
|
|
* the "normal" (non-emulated) code to detect such instances.
|
|
*/
|
|
typedef uint64_t fpr;
|
|
|
|
/*
|
|
* For computations, we split values into an integral mantissa in the
|
|
* 2^54..2^55 range, and an (adjusted) exponent. The lowest bit is
|
|
* "sticky" (it is set to 1 if any of the bits below it is 1); when
|
|
* re-encoding, the low two bits are dropped, but may induce an
|
|
* increment in the value for proper rounding.
|
|
*/
|
|
|
|
/*
|
|
* Right-shift a 64-bit unsigned value by a possibly secret shift count.
|
|
* We assumed that the underlying architecture had a barrel shifter for
|
|
* 32-bit shifts, but for 64-bit shifts on a 32-bit system, this will
|
|
* typically invoke a software routine that is not necessarily
|
|
* constant-time; hence the function below.
|
|
*
|
|
* Shift count n MUST be in the 0..63 range.
|
|
*/
|
|
static inline uint64_t
|
|
fpr_ursh(uint64_t x, int n) {
|
|
x ^= (x ^ (x >> 32)) & -(uint64_t)(n >> 5);
|
|
return x >> (n & 31);
|
|
}
|
|
|
|
/*
|
|
* Right-shift a 64-bit signed value by a possibly secret shift count
|
|
* (see fpr_ursh() for the rationale).
|
|
*
|
|
* Shift count n MUST be in the 0..63 range.
|
|
*/
|
|
static inline int64_t
|
|
fpr_irsh(int64_t x, int n) {
|
|
x ^= (x ^ (x >> 32)) & -(int64_t)(n >> 5);
|
|
return x >> (n & 31);
|
|
}
|
|
|
|
/*
|
|
* Left-shift a 64-bit unsigned value by a possibly secret shift count
|
|
* (see fpr_ursh() for the rationale).
|
|
*
|
|
* Shift count n MUST be in the 0..63 range.
|
|
*/
|
|
static inline uint64_t
|
|
fpr_ulsh(uint64_t x, int n) {
|
|
x ^= (x ^ (x << 32)) & -(uint64_t)(n >> 5);
|
|
return x << (n & 31);
|
|
}
|
|
|
|
/*
|
|
* Expectations:
|
|
* s = 0 or 1
|
|
* exponent e is "arbitrary" and unbiased
|
|
* 2^54 <= m < 2^55
|
|
* Numerical value is (-1)^2 * m * 2^e
|
|
*
|
|
* Exponents which are too low lead to value zero. If the exponent is
|
|
* too large, the returned value is indeterminate.
|
|
*
|
|
* If m = 0, then a zero is returned (using the provided sign).
|
|
* If e < -1076, then a zero is returned (regardless of the value of m).
|
|
* If e >= -1076 and e != 0, m must be within the expected range
|
|
* (2^54 to 2^55-1).
|
|
*/
|
|
static inline fpr
|
|
FPR(int s, int e, uint64_t m) {
|
|
fpr x;
|
|
uint32_t t;
|
|
unsigned f;
|
|
|
|
/*
|
|
* If e >= -1076, then the value is "normal"; otherwise, it
|
|
* should be a subnormal, which we clamp down to zero.
|
|
*/
|
|
e += 1076;
|
|
t = (uint32_t)e >> 31;
|
|
m &= (uint64_t)t - 1;
|
|
|
|
/*
|
|
* If m = 0 then we want a zero; make e = 0 too, but conserve
|
|
* the sign.
|
|
*/
|
|
t = (uint32_t)(m >> 54);
|
|
e &= -(int)t;
|
|
|
|
/*
|
|
* The 52 mantissa bits come from m. Value m has its top bit set
|
|
* (unless it is a zero); we leave it "as is": the top bit will
|
|
* increment the exponent by 1, except when m = 0, which is
|
|
* exactly what we want.
|
|
*/
|
|
x = (((uint64_t)s << 63) | (m >> 2)) + ((uint64_t)(uint32_t)e << 52);
|
|
|
|
/*
|
|
* Rounding: if the low three bits of m are 011, 110 or 111,
|
|
* then the value should be incremented to get the next
|
|
* representable value. This implements the usual
|
|
* round-to-nearest rule (with preference to even values in case
|
|
* of a tie). Note that the increment may make a carry spill
|
|
* into the exponent field, which is again exactly what we want
|
|
* in that case.
|
|
*/
|
|
f = (unsigned)m & 7U;
|
|
x += (0xC8U >> f) & 1;
|
|
return x;
|
|
}
|
|
|
|
#define fpr_scaled PQCLEAN_FALCON512_CLEAN_fpr_scaled
|
|
fpr fpr_scaled(int64_t i, int sc);
|
|
|
|
static inline fpr
|
|
fpr_of(int64_t i) {
|
|
return fpr_scaled(i, 0);
|
|
}
|
|
|
|
static const fpr fpr_q = 4667981563525332992;
|
|
static const fpr fpr_inverse_of_q = 4545632735260551042;
|
|
static const fpr fpr_inv_2sqrsigma0 = 4594603506513722306;
|
|
static const fpr fpr_inv_sigma = 4573359825155195350;
|
|
static const fpr fpr_sigma_min_9 = 4608495221497168882;
|
|
static const fpr fpr_sigma_min_10 = 4608586345619182117;
|
|
static const fpr fpr_log2 = 4604418534313441775;
|
|
static const fpr fpr_inv_log2 = 4609176140021203710;
|
|
static const fpr fpr_bnorm_max = 4670353323383631276;
|
|
static const fpr fpr_zero = 0;
|
|
static const fpr fpr_one = 4607182418800017408;
|
|
static const fpr fpr_two = 4611686018427387904;
|
|
static const fpr fpr_onehalf = 4602678819172646912;
|
|
static const fpr fpr_ptwo31 = 4746794007248502784;
|
|
static const fpr fpr_ptwo31m1 = 4746794007244308480;
|
|
static const fpr fpr_mtwo31m1 = 13970166044099084288U;
|
|
static const fpr fpr_ptwo63m1 = 4890909195324358656;
|
|
static const fpr fpr_mtwo63m1 = 14114281232179134464U;
|
|
static const fpr fpr_ptwo63 = 4890909195324358656;
|
|
|
|
static inline int64_t
|
|
fpr_rint(fpr x) {
|
|
uint64_t m, d;
|
|
int e;
|
|
uint32_t s, dd, f;
|
|
|
|
/*
|
|
* We assume that the value fits in -(2^63-1)..+(2^63-1). We can
|
|
* thus extract the mantissa as a 63-bit integer, then right-shift
|
|
* it as needed.
|
|
*/
|
|
m = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1);
|
|
e = 1085 - ((int)(x >> 52) & 0x7FF);
|
|
|
|
/*
|
|
* If a shift of more than 63 bits is needed, then simply set m
|
|
* to zero. This also covers the case of an input operand equal
|
|
* to zero.
|
|
*/
|
|
m &= -(uint64_t)((uint32_t)(e - 64) >> 31);
|
|
e &= 63;
|
|
|
|
/*
|
|
* Right-shift m as needed. Shift count is e. Proper rounding
|
|
* mandates that:
|
|
* - If the highest dropped bit is zero, then round low.
|
|
* - If the highest dropped bit is one, and at least one of the
|
|
* other dropped bits is one, then round up.
|
|
* - If the highest dropped bit is one, and all other dropped
|
|
* bits are zero, then round up if the lowest kept bit is 1,
|
|
* or low otherwise (i.e. ties are broken by "rounding to even").
|
|
*
|
|
* We thus first extract a word consisting of all the dropped bit
|
|
* AND the lowest kept bit; then we shrink it down to three bits,
|
|
* the lowest being "sticky".
|
|
*/
|
|
d = fpr_ulsh(m, 63 - e);
|
|
dd = (uint32_t)d | ((uint32_t)(d >> 32) & 0x1FFFFFFF);
|
|
f = (uint32_t)(d >> 61) | ((dd | -dd) >> 31);
|
|
m = fpr_ursh(m, e) + (uint64_t)((0xC8U >> f) & 1U);
|
|
|
|
/*
|
|
* Apply the sign bit.
|
|
*/
|
|
s = (uint32_t)(x >> 63);
|
|
return ((int64_t)m ^ -(int64_t)s) + (int64_t)s;
|
|
}
|
|
|
|
static inline int64_t
|
|
fpr_floor(fpr x) {
|
|
uint64_t t;
|
|
int64_t xi;
|
|
int e, cc;
|
|
|
|
/*
|
|
* We extract the integer as a _signed_ 64-bit integer with
|
|
* a scaling factor. Since we assume that the value fits
|
|
* in the -(2^63-1)..+(2^63-1) range, we can left-shift the
|
|
* absolute value to make it in the 2^62..2^63-1 range: we
|
|
* will only need a right-shift afterwards.
|
|
*/
|
|
e = (int)(x >> 52) & 0x7FF;
|
|
t = x >> 63;
|
|
xi = (int64_t)(((x << 10) | ((uint64_t)1 << 62))
|
|
& (((uint64_t)1 << 63) - 1));
|
|
xi = (xi ^ -(int64_t)t) + (int64_t)t;
|
|
cc = 1085 - e;
|
|
|
|
/*
|
|
* We perform an arithmetic right-shift on the value. This
|
|
* applies floor() semantics on both positive and negative values
|
|
* (rounding toward minus infinity).
|
|
*/
|
|
xi = fpr_irsh(xi, cc & 63);
|
|
|
|
/*
|
|
* If the true shift count was 64 or more, then we should instead
|
|
* replace xi with 0 (if nonnegative) or -1 (if negative). Edge
|
|
* case: -0 will be floored to -1, not 0 (whether this is correct
|
|
* is debatable; in any case, the other functions normalize zero
|
|
* to +0).
|
|
*
|
|
* For an input of zero, the non-shifted xi was incorrect (we used
|
|
* a top implicit bit of value 1, not 0), but this does not matter
|
|
* since this operation will clamp it down.
|
|
*/
|
|
xi ^= (xi ^ -(int64_t)t) & -(int64_t)((uint32_t)(63 - cc) >> 31);
|
|
return xi;
|
|
}
|
|
|
|
static inline int64_t
|
|
fpr_trunc(fpr x) {
|
|
uint64_t t, xu;
|
|
int e, cc;
|
|
|
|
/*
|
|
* Extract the absolute value. Since we assume that the value
|
|
* fits in the -(2^63-1)..+(2^63-1) range, we can left-shift
|
|
* the absolute value into the 2^62..2^63-1 range, and then
|
|
* do a right shift afterwards.
|
|
*/
|
|
e = (int)(x >> 52) & 0x7FF;
|
|
xu = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1);
|
|
cc = 1085 - e;
|
|
xu = fpr_ursh(xu, cc & 63);
|
|
|
|
/*
|
|
* If the exponent is too low (cc > 63), then the shift was wrong
|
|
* and we must clamp the value to 0. This also covers the case
|
|
* of an input equal to zero.
|
|
*/
|
|
xu &= -(uint64_t)((uint32_t)(cc - 64) >> 31);
|
|
|
|
/*
|
|
* Apply back the sign, if the source value is negative.
|
|
*/
|
|
t = x >> 63;
|
|
xu = (xu ^ -t) + t;
|
|
return *(int64_t *)&xu;
|
|
}
|
|
|
|
#define fpr_add PQCLEAN_FALCON512_CLEAN_fpr_add
|
|
fpr fpr_add(fpr x, fpr y);
|
|
|
|
static inline fpr
|
|
fpr_sub(fpr x, fpr y) {
|
|
y ^= (uint64_t)1 << 63;
|
|
return fpr_add(x, y);
|
|
}
|
|
|
|
static inline fpr
|
|
fpr_neg(fpr x) {
|
|
x ^= (uint64_t)1 << 63;
|
|
return x;
|
|
}
|
|
|
|
static inline fpr
|
|
fpr_half(fpr x) {
|
|
/*
|
|
* To divide a value by 2, we just have to subtract 1 from its
|
|
* exponent, but we have to take care of zero.
|
|
*/
|
|
uint32_t t;
|
|
|
|
x -= (uint64_t)1 << 52;
|
|
t = (((uint32_t)(x >> 52) & 0x7FF) + 1) >> 11;
|
|
x &= (uint64_t)t - 1;
|
|
return x;
|
|
}
|
|
|
|
static inline fpr
|
|
fpr_double(fpr x) {
|
|
/*
|
|
* To double a value, we just increment by one the exponent. We
|
|
* don't care about infinites or NaNs; however, 0 is a
|
|
* special case.
|
|
*/
|
|
x += (uint64_t)((((unsigned)(x >> 52) & 0x7FFU) + 0x7FFU) >> 11) << 52;
|
|
return x;
|
|
}
|
|
|
|
#define fpr_mul PQCLEAN_FALCON512_CLEAN_fpr_mul
|
|
fpr fpr_mul(fpr x, fpr y);
|
|
|
|
static inline fpr
|
|
fpr_sqr(fpr x) {
|
|
return fpr_mul(x, x);
|
|
}
|
|
|
|
#define fpr_div PQCLEAN_FALCON512_CLEAN_fpr_div
|
|
fpr fpr_div(fpr x, fpr y);
|
|
|
|
static inline fpr
|
|
fpr_inv(fpr x) {
|
|
return fpr_div(4607182418800017408u, x);
|
|
}
|
|
|
|
#define fpr_sqrt PQCLEAN_FALCON512_CLEAN_fpr_sqrt
|
|
fpr fpr_sqrt(fpr x);
|
|
|
|
static inline int
|
|
fpr_lt(fpr x, fpr y) {
|
|
/*
|
|
* If x >= 0 or y >= 0, a signed comparison yields the proper
|
|
* result:
|
|
* - For positive values, the order is preserved.
|
|
* - The sign bit is at the same place as in integers, so
|
|
* sign is preserved.
|
|
*
|
|
* If both x and y are negative, then the order is reversed.
|
|
* We cannot simply invert the comparison result in that case
|
|
* because it would not handle the edge case x = y properly.
|
|
*/
|
|
int cc0, cc1;
|
|
|
|
cc0 = *(int64_t *)&x < *(int64_t *)&y;
|
|
cc1 = *(int64_t *)&x > *(int64_t *)&y;
|
|
return cc0 ^ ((cc0 ^ cc1) & (int)((x & y) >> 63));
|
|
}
|
|
|
|
/*
|
|
* Compute exp(x) for x such that |x| <= ln 2. We want a precision of 50
|
|
* bits or so.
|
|
*/
|
|
#define fpr_expm_p63 PQCLEAN_FALCON512_CLEAN_fpr_expm_p63
|
|
uint64_t fpr_expm_p63(fpr x);
|
|
|
|
#define fpr_gm_tab PQCLEAN_FALCON512_CLEAN_fpr_gm_tab
|
|
extern const fpr fpr_gm_tab[];
|
|
|
|
#define fpr_p2_tab PQCLEAN_FALCON512_CLEAN_fpr_p2_tab
|
|
extern const fpr fpr_p2_tab[];
|
|
|
|
/* ====================================================================== */
|
|
|