pqcrypto/crypto_sign/falcon-512/clean/fpr.h

/*
 * Floating-point operations.
 *
 * ==========================(LICENSE BEGIN)============================
 *
 * Copyright (c) 2017-2019  Falcon Project
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * ===========================(LICENSE END)=============================
 *
 * @author   Thomas Pornin <thomas.pornin@nccgroup.com>
 */


/* ====================================================================== */
/*
 * Custom floating-point implementation with integer arithmetics. We
 * use IEEE-754 "binary64" format, with some simplifications:
 *
 *   - Top bit is s = 1 for negative, 0 for positive.
 *
 *   - Exponent e uses the next 11 bits (bits 52 to 62, inclusive).
 *
 *   - Mantissa m uses the 52 low bits.
 *
 * Encoded value is, in general: (-1)^s * 2^(e-1023) * (1 + m*2^(-52))
 * i.e. the mantissa really is a 53-bit number (less than 2.0, but not
 * less than 1.0), but the top bit (equal to 1 by definition) is omitted
 * in the encoding.
 *
 * In IEEE-754, there are some special values:
 *
 *   - If e = 2047, then the value is either an infinite (m = 0) or
 *     a NaN (m != 0).
 *
 *   - If e = 0, then the value is either a zero (m = 0) or a subnormal,
 *     aka "denormalized number" (m != 0).
 *
 * Of these, we only need the zeros. The caller is responsible for not
 * providing operands that would lead to infinites, NaNs or subnormals.
 * If inputs are such that values go out of range, then indeterminate
 * values are returned (it would still be deterministic, but no specific
 * value may be relied upon).
 *
 * At the C level, the three parts are stored in a 64-bit unsigned
 * word.
 *
 * One may note that a property of the IEEE-754 format is that order
 * is preserved for positive values: if two positive floating-point
 * values x and y are such that x < y, then their respective encodings
 * as _signed_ 64-bit integers i64(x) and i64(y) will be such that
 * i64(x) < i64(y). For negative values, order is reversed: if x < 0,
 * y < 0, and x < y, then ia64(x) > ia64(y).
 *
 * IMPORTANT ASSUMPTIONS:
 * ======================
 *
 * For proper computations, and constant-time behaviour, we assume the
 * following:
 *
 *   - 32x32->64 multiplication (unsigned) has an execution time that
 *     is independent of its operands. This is true of most modern
 *     x86 and ARM cores. Notable exceptions are the ARM Cortex M0, M0+
 *     and M3 (in the M0 and M0+, this is done in software, so it depends
 *     on that routine), and the PowerPC cores from the G3/G4 lines.
 *     For more info, see: https://www.bearssl.org/ctmul.html
 *
 *   - Left-shifts and right-shifts of 32-bit values have an execution
 *     time which does not depend on the shifted value nor on the
 *     shift count. An historical exception is the Pentium IV, but most
 *     modern CPU have barrel shifters. Some small microcontrollers
 *     might have varying-time shifts (not the ARM Cortex M*, though).
 *
 *   - Right-shift of a signed negative value performs a sign extension.
 *     As per the C standard, this operation returns an
 *     implementation-defined result (this is NOT an "undefined
 *     behaviour"). On most/all systems, an arithmetic shift is
 *     performed, because this is what makes most sense.
 */

/*
 * Normally we should declare the 'fpr' type to be a struct or union
 * around the internal 64-bit value; however, we want to use the
 * direct 64-bit integer type to enable a lighter call convention on
 * ARM platforms. This means that direct (invalid) use of operators
 * such as '*' or '+' will not be caught by the compiler. We rely on
 * the "normal" (non-emulated) code to detect such instances.
 */
typedef uint64_t fpr;

/*
 * For computations, we split values into an integral mantissa in the
 * 2^54..2^55 range, and an (adjusted) exponent. The lowest bit is
 * "sticky" (it is set to 1 if any of the bits below it is 1); when
 * re-encoding, the low two bits are dropped, but may induce an
 * increment in the value for proper rounding.
 */

/*
 * Right-shift a 64-bit unsigned value by a possibly secret shift count.
 * We assumed that the underlying architecture had a barrel shifter for
 * 32-bit shifts, but for 64-bit shifts on a 32-bit system, this will
 * typically invoke a software routine that is not necessarily
 * constant-time; hence the function below.
 *
 * Shift count n MUST be in the 0..63 range.
 */
static inline uint64_t
fpr_ursh(uint64_t x, int n) {
    x ^= (x ^ (x >> 32)) & -(uint64_t)(n >> 5);
    return x >> (n & 31);
}

/*
 * Right-shift a 64-bit signed value by a possibly secret shift count
 * (see fpr_ursh() for the rationale).
 *
 * Shift count n MUST be in the 0..63 range.
 */
static inline int64_t
fpr_irsh(int64_t x, int n) {
    x ^= (x ^ (x >> 32)) & -(int64_t)(n >> 5);
    return x >> (n & 31);
}

/*
 * Left-shift a 64-bit unsigned value by a possibly secret shift count
 * (see fpr_ursh() for the rationale).
 *
 * Shift count n MUST be in the 0..63 range.
 */
static inline uint64_t
fpr_ulsh(uint64_t x, int n) {
    x ^= (x ^ (x << 32)) & -(uint64_t)(n >> 5);
    return x << (n & 31);
}

/*
 * Expectations:
 *   s = 0 or 1
 *   exponent e is "arbitrary" and unbiased
 *   2^54 <= m < 2^55
 * Numerical value is (-1)^2 * m * 2^e
 *
 * Exponents which are too low lead to value zero. If the exponent is
 * too large, the returned value is indeterminate.
 *
 * If m = 0, then a zero is returned (using the provided sign).
 * If e < -1076, then a zero is returned (regardless of the value of m).
 * If e >= -1076 and e != 0, m must be within the expected range
 * (2^54 to 2^55-1).
 */
static inline fpr
FPR(int s, int e, uint64_t m) {
    fpr x;
    uint32_t t;
    unsigned f;

    /*
     * If e >= -1076, then the value is "normal"; otherwise, it
     * should be a subnormal, which we clamp down to zero.
     */
    e += 1076;
    t = (uint32_t)e >> 31;
    m &= (uint64_t)t - 1;

    /*
     * If m = 0 then we want a zero; make e = 0 too, but conserve
     * the sign.
     */
    t = (uint32_t)(m >> 54);
    e &= -(int)t;

    /*
     * The 52 mantissa bits come from m. Value m has its top bit set
     * (unless it is a zero); we leave it "as is": the top bit will
     * increment the exponent by 1, except when m = 0, which is
     * exactly what we want.
     */
    x = (((uint64_t)s << 63) | (m >> 2)) + ((uint64_t)(uint32_t)e << 52);

    /*
     * Rounding: if the low three bits of m are 011, 110 or 111,
     * then the value should be incremented to get the next
     * representable value. This implements the usual
     * round-to-nearest rule (with preference to even values in case
     * of a tie). Note that the increment may make a carry spill
     * into the exponent field, which is again exactly what we want
     * in that case.
     */
    f = (unsigned)m & 7U;
    x += (0xC8U >> f) & 1;
    return x;
}

#define fpr_scaled   PQCLEAN_FALCON512_CLEAN_fpr_scaled
fpr fpr_scaled(int64_t i, int sc);

static inline fpr
fpr_of(int64_t i) {
    return fpr_scaled(i, 0);
}

static const fpr fpr_q = 4667981563525332992;
static const fpr fpr_inverse_of_q = 4545632735260551042;
static const fpr fpr_inv_2sqrsigma0 = 4594603506513722306;
static const fpr fpr_inv_sigma = 4573359825155195350;
static const fpr fpr_sigma_min_9 = 4608495221497168882;
static const fpr fpr_sigma_min_10 = 4608586345619182117;
static const fpr fpr_log2 = 4604418534313441775;
static const fpr fpr_inv_log2 = 4609176140021203710;
static const fpr fpr_bnorm_max = 4670353323383631276;
static const fpr fpr_zero = 0;
static const fpr fpr_one = 4607182418800017408;
static const fpr fpr_two = 4611686018427387904;
static const fpr fpr_onehalf = 4602678819172646912;
static const fpr fpr_invsqrt2 = 4604544271217802189;
static const fpr fpr_invsqrt8 = 4600040671590431693;
static const fpr fpr_ptwo31 = 4746794007248502784;
static const fpr fpr_ptwo31m1 = 4746794007244308480;
static const fpr fpr_mtwo31m1 = 13970166044099084288U;
static const fpr fpr_ptwo63m1 = 4890909195324358656;
static const fpr fpr_mtwo63m1 = 14114281232179134464U;
static const fpr fpr_ptwo63 = 4890909195324358656;

static inline int64_t
fpr_rint(fpr x) {
    uint64_t m, d;
    int e;
    uint32_t s, dd, f;

    /*
     * We assume that the value fits in -(2^63-1)..+(2^63-1). We can
     * thus extract the mantissa as a 63-bit integer, then right-shift
     * it as needed.
     */
    m = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1);
    e = 1085 - ((int)(x >> 52) & 0x7FF);

    /*
     * If a shift of more than 63 bits is needed, then simply set m
     * to zero. This also covers the case of an input operand equal
     * to zero.
     */
    m &= -(uint64_t)((uint32_t)(e - 64) >> 31);
    e &= 63;

    /*
     * Right-shift m as needed. Shift count is e. Proper rounding
     * mandates that:
     *   - If the highest dropped bit is zero, then round low.
     *   - If the highest dropped bit is one, and at least one of the
     *     other dropped bits is one, then round up.
     *   - If the highest dropped bit is one, and all other dropped
     *     bits are zero, then round up if the lowest kept bit is 1,
     *     or low otherwise (i.e. ties are broken by "rounding to even").
     *
     * We thus first extract a word consisting of all the dropped bit
     * AND the lowest kept bit; then we shrink it down to three bits,
     * the lowest being "sticky".
     */
    d = fpr_ulsh(m, 63 - e);
    dd = (uint32_t)d | ((uint32_t)(d >> 32) & 0x1FFFFFFF);
    f = (uint32_t)(d >> 61) | ((dd | -dd) >> 31);
    m = fpr_ursh(m, e) + (uint64_t)((0xC8U >> f) & 1U);

    /*
     * Apply the sign bit.
     */
    s = (uint32_t)(x >> 63);
    return ((int64_t)m ^ -(int64_t)s) + (int64_t)s;
}

static inline int64_t
fpr_floor(fpr x) {
    uint64_t t;
    int64_t xi;
    int e, cc;

    /*
     * We extract the integer as a _signed_ 64-bit integer with
     * a scaling factor. Since we assume that the value fits
     * in the -(2^63-1)..+(2^63-1) range, we can left-shift the
     * absolute value to make it in the 2^62..2^63-1 range: we
     * will only need a right-shift afterwards.
     */
    e = (int)(x >> 52) & 0x7FF;
    t = x >> 63;
    xi = (int64_t)(((x << 10) | ((uint64_t)1 << 62))
                   & (((uint64_t)1 << 63) - 1));
    xi = (xi ^ -(int64_t)t) + (int64_t)t;
    cc = 1085 - e;

    /*
     * We perform an arithmetic right-shift on the value. This
     * applies floor() semantics on both positive and negative values
     * (rounding toward minus infinity).
     */
    xi = fpr_irsh(xi, cc & 63);

    /*
     * If the true shift count was 64 or more, then we should instead
     * replace xi with 0 (if nonnegative) or -1 (if negative). Edge
     * case: -0 will be floored to -1, not 0 (whether this is correct
     * is debatable; in any case, the other functions normalize zero
     * to +0).
     *
     * For an input of zero, the non-shifted xi was incorrect (we used
     * a top implicit bit of value 1, not 0), but this does not matter
     * since this operation will clamp it down.
     */
    xi ^= (xi ^ -(int64_t)t) & -(int64_t)((uint32_t)(63 - cc) >> 31);
    return xi;
}

static inline int64_t
fpr_trunc(fpr x) {
    uint64_t t, xu;
    int e, cc;

    /*
     * Extract the absolute value. Since we assume that the value
     * fits in the -(2^63-1)..+(2^63-1) range, we can left-shift
     * the absolute value into the 2^62..2^63-1 range, and then
     * do a right shift afterwards.
     */
    e = (int)(x >> 52) & 0x7FF;
    xu = ((x << 10) | ((uint64_t)1 << 62)) & (((uint64_t)1 << 63) - 1);
    cc = 1085 - e;
    xu = fpr_ursh(xu, cc & 63);

    /*
     * If the exponent is too low (cc > 63), then the shift was wrong
     * and we must clamp the value to 0. This also covers the case
     * of an input equal to zero.
     */
    xu &= -(uint64_t)((uint32_t)(cc - 64) >> 31);

    /*
     * Apply back the sign, if the source value is negative.
     */
    t = x >> 63;
    xu = (xu ^ -t) + t;
    return *(int64_t *)&xu;
}

#define fpr_add   PQCLEAN_FALCON512_CLEAN_fpr_add
fpr fpr_add(fpr x, fpr y);

static inline fpr
fpr_sub(fpr x, fpr y) {
    y ^= (uint64_t)1 << 63;
    return fpr_add(x, y);
}

static inline fpr
fpr_neg(fpr x) {
    x ^= (uint64_t)1 << 63;
    return x;
}

static inline fpr
fpr_half(fpr x) {
    /*
     * To divide a value by 2, we just have to subtract 1 from its
     * exponent, but we have to take care of zero.
     */
    uint32_t t;

    x -= (uint64_t)1 << 52;
    t = (((uint32_t)(x >> 52) & 0x7FF) + 1) >> 11;
    x &= (uint64_t)t - 1;
    return x;
}

static inline fpr
fpr_double(fpr x) {
    /*
     * To double a value, we just increment by one the exponent. We
     * don't care about infinites or NaNs; however, 0 is a
     * special case.
     */
    x += (uint64_t)((((unsigned)(x >> 52) & 0x7FFU) + 0x7FFU) >> 11) << 52;
    return x;
}

#define fpr_mul   PQCLEAN_FALCON512_CLEAN_fpr_mul
fpr fpr_mul(fpr x, fpr y);

static inline fpr
fpr_sqr(fpr x) {
    return fpr_mul(x, x);
}

#define fpr_div   PQCLEAN_FALCON512_CLEAN_fpr_div
fpr fpr_div(fpr x, fpr y);

static inline fpr
fpr_inv(fpr x) {
    return fpr_div(4607182418800017408u, x);
}

#define fpr_sqrt   PQCLEAN_FALCON512_CLEAN_fpr_sqrt
fpr fpr_sqrt(fpr x);

static inline int
fpr_lt(fpr x, fpr y) {
    /*
     * If x >= 0 or y >= 0, a signed comparison yields the proper
     * result:
     *   - For positive values, the order is preserved.
     *   - The sign bit is at the same place as in integers, so
     *     sign is preserved.
     *
     * If both x and y are negative, then the order is reversed.
     * We cannot simply invert the comparison result in that case
     * because it would not handle the edge case x = y properly.
     */
    int cc0, cc1;

    cc0 = *(int64_t *)&x < *(int64_t *)&y;
    cc1 = *(int64_t *)&x > *(int64_t *)&y;
    return cc0 ^ ((cc0 ^ cc1) & (int)((x & y) >> 63));
}

/*
 * Compute exp(x) for x such that |x| <= ln 2. We want a precision of 50
 * bits or so.
 */
#define fpr_expm_p63   PQCLEAN_FALCON512_CLEAN_fpr_expm_p63
uint64_t fpr_expm_p63(fpr x, fpr ccs);

#define fpr_gm_tab   PQCLEAN_FALCON512_CLEAN_fpr_gm_tab
extern const fpr fpr_gm_tab[];

#define fpr_p2_tab   PQCLEAN_FALCON512_CLEAN_fpr_p2_tab
extern const fpr fpr_p2_tab[];

/* ====================================================================== */