add karatsuba + toom-cook-3 without VLAs

5 роки тому · e4a5cc3cf2
--- a/crypto_kem/ledakemlt12/leaktime/gf2x_arith.c
+++ b/crypto_kem/ledakemlt12/leaktime/gf2x_arith.c
@@ -8,6 +8,15 @@ void PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_add(DIGIT Res[], const DIGIT A[], const D
    }
 }

 /* copies len digits from a to r if b == 1 */
 void PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_cmov(DIGIT *r, const DIGIT *a, size_t len, int c) {
    size_t i;
    DIGIT mask = -(DIGIT)c;
    for (i = 0; i < len; i++) {
        r[i] ^= mask & (a[i] ^ r[i]);
    }
 }

 /* PRE: MAX ALLOWED ROTATION AMOUNT : DIGIT_SIZE_b */
 void PQCLEAN_LEDAKEMLT12_LEAKTIME_right_bit_shift_n(int length, DIGIT in[], unsigned int amount) {
    if ( amount == 0 ) {
@@ -38,9 +47,10 @@ void PQCLEAN_LEDAKEMLT12_LEAKTIME_left_bit_shift_n(int length, DIGIT in[], unsig
    in[j] <<= amount;
 }

 void PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_mul_comb(int nr, DIGIT Res[],
        int na, const DIGIT A[],
        int nb, const DIGIT B[]) {
 static void gf2x_mul_comb(int nr, DIGIT Res[],
                          int na, const DIGIT A[],
                          int nb, const DIGIT B[]) {

    int i, j, k;
    DIGIT u, h;

@@ -71,3 +81,182 @@ void PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_mul_comb(int nr, DIGIT Res[],
        }
    }
 }

 static void gf2x_cpy(DIGIT *R, const DIGIT *A, size_t len) {
    for (size_t i = 0; i < len; i++) {
        R[i] = A[i];
    }
 }

 /* Accumulate */
 #define gf2x_add(R, A, B, n) PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_add(R, A, B, n)
 #define gf2x_acc(R, B, n) PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_add(R, R, B, n)

 /* allows the operands to be of different size
 * first operand must be the bigger one.
 * aligns last array elements */
 static inline void gf2x_add_asymm(DIGIT *R,
                                  int na, const DIGIT *A,
                                  int nb, const DIGIT *B) {
    size_t delta = na - nb;
    gf2x_cpy(R, A, delta);
    PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_add(R + delta, A + delta, B, nb);;
 }

 /* aligns first array elements */
 static inline void gf2x_add_asymm2(DIGIT *R,
                                   int na, const DIGIT *A,
                                   int nb, const DIGIT *B) {
    size_t delta = na - nb;
    PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_add(R, A, B, nb);
    gf2x_cpy(R + nb, A + nb, delta);
 }

 /*  Karatsuba with lowered space complexity
 *  T(n) = 3 * ceil(n/2) + T(ceil(n / 2)) */
 static void gf2x_mul_kar(DIGIT *R,
                         const DIGIT *A,
                         const DIGIT *B,
                         size_t n,
                         DIGIT *stack) {

    if (n < MIN_KAR_DIGITS) {
        gf2x_mul_comb(2 * n, R, n, A, n, B);
        return;
    }

    size_t l = (n + 1) / 2; // limb size = ceil(n / 2)
    size_t d = n & 1;

    const DIGIT *a1 = A;            // length n - d
    const DIGIT *a0 = A + l - d;    // length n
    const DIGIT *b1 = B;
    const DIGIT *b0 = B + l - d;

    DIGIT *aa = stack;
    DIGIT *bb = aa + l;
    DIGIT *cc = bb + l;
    stack = cc + l; // 3l space requirement at each level

    DIGIT *c3 = R + l - 2 * d;
    DIGIT *c2 = c3 + l;
    DIGIT *c1 = c2 + l;

    gf2x_mul_kar(c2, a0, b0, l, stack);      // L in low part of R
    gf2x_mul_kar(R, a1, b1, l - d, stack);   // H in higher part of R
    gf2x_add_asymm(aa, l, a0, l - d, a1);    // AH + AL
    gf2x_add_asymm(bb, l, b0, l - d, b1);    // BH + BL
    gf2x_add(cc, c3, c2, l);                 // HL + LH in cc
    gf2x_mul_kar(c3, aa, bb, l, stack);      // M = (AH + AL) x (BH + BL)
    gf2x_add_asymm(c3, l, c3, l - 2 * d, R); // add HH
    gf2x_acc(c2, c1, l);                     // add LL
    gf2x_acc(c3, cc, l);                     // add HL + LH
    gf2x_acc(c2, cc, l);                     // add HL + LH
 }

 static void gf2x_div_w_plus_one(DIGIT *A, size_t n) {
    size_t i;
    for (i = 0; i < n - 2; i++) {
        A[i + 1] ^= A[i]; // runs n - 2 times
    }
 }

 static void gf2x_shift_left_w(DIGIT *A, size_t n) {
    size_t i;
    for (i = 0; i < n - 1; i++) {
        A[i] = A[i + 1];
    }
    A[i] = 0;
 }

 /* Word-aligned Toom-Cook 3, source:
 * Brent, Richard P., et al. "Faster multiplication in GF (2)[x]."
 * International Algorithmic Number Theory Symposium.
 * Springer, Berlin, Heidelberg, 2008. */
 static void gf2x_mul_tc3w(DIGIT *R,
                          const DIGIT *A,
                          const DIGIT *B,
                          size_t n,
                          DIGIT *stack) {

    if (n < MIN_TOOM_DIGITS) {
        gf2x_mul_kar(R, A, B, n, stack);
        return;
    }

    size_t l = (n + 2) / 3;                     // size of a0, a1, b0, b1
    size_t r = n - 2 * l;                       // remaining sizes (a2, b2)
    size_t x = 2 * l + 4;                       // size of c1, c2, c3, c4
    size_t z = r + 2 > l + 1 ? r + 2 : l + 1;   // size of c5

    const DIGIT *a0 = A;
    const DIGIT *a1 = A + l;
    const DIGIT *a2 = A + 2 * l;
    const DIGIT *b0 = B;
    const DIGIT *b1 = B + l;
    const DIGIT *b2 = B + 2 * l;

    DIGIT *c0 = R;                              // c0 and c4 in the result
    DIGIT *c4 = R + 4 * l;
    DIGIT *c1 = stack;                          // the rest in the stack
    DIGIT *c2 = c1 + x;
    DIGIT *c3 = c2 + x;
    DIGIT *c5 = c3 + x;
    stack = c5 + z;                             // Worst-case 7l + 14

    // Evaluation
    c0[0] = 0;                                  // c0[z] = a1*W + a2*W^2
    c0[l + 1] = 0;
    gf2x_cpy(c0 + 1, a1, l);
    gf2x_acc(c0 + 2, a2, r);

    c4[0] = 0;                                  // c4[z] = b1*W + b2*W^2
    c4[l + 1] = 0;
    gf2x_cpy(c4 + 1, b1, l);
    gf2x_acc(c4 + 2, b2, r);

    gf2x_cpy(c5, a0, l);                        // c5[l] = a0 + a1 + a2
    gf2x_acc(c5, a1, l);
    gf2x_acc(c5, a2, r);

    gf2x_cpy(c2, b0, l);                        // c2[l] = b0 + b1 + b2
    gf2x_acc(c2, b1, l);
    gf2x_acc(c2, b2, r);

    gf2x_mul_tc3w(c1, c2, c5, l, stack);        // c1[2l] = c2 * c5
    gf2x_add_asymm2(c5, z, c0, l, c5);          // c5[z] += c0, z >= l
    gf2x_add_asymm2(c2, z, c4, l, c2);          // c2[z] += c4, idem
    gf2x_acc(c0, a0, l);                        // c0[l] += a0
    gf2x_acc(c4, b0, l);                        // c4[l] += b0
    gf2x_mul_tc3w(c3, c2, c5, z, stack);        // c3[2z] = c2 * c5
    gf2x_mul_tc3w(c2, c0, c4, z, stack);        // c2[2z] = c0 * c4
    gf2x_mul_tc3w(c0, a0, b0, l, stack);        // c0[2l] = a0 * b0
    gf2x_mul_tc3w(c4, a2, b2, r, stack);        // c4[2r] = a2 * b2

    // Interpolation
    gf2x_acc(c3, c2, 2 * z);                    // c3[2z] += c2
    gf2x_acc(c2, c0, 2 * l);                    // c2[2z] += c0
    gf2x_shift_left_w(c2, 2 * z);               // c2[2z] = c2/y + c3
    gf2x_acc(c2, c3, 2 * z);
    gf2x_acc(c2, c4, 2 * r);                    // c2[2z] += c4 + c4**3
    gf2x_acc(c2 + 3, c4, 2 * r);
    gf2x_div_w_plus_one(c2, 2 * z);             // c2[2z-1] = c2/(W+1)
    gf2x_acc(c1, c0, 2 * l);                    // c1[2l] += c0
    gf2x_acc(c3, c1, 2 * l);                    // c3[2z] += c1
    gf2x_shift_left_w(c3, 2 * z);               // c3[2z-2] = c3/(W^2 + W)
    gf2x_div_w_plus_one(c3, 2 * z - 1);
    gf2x_add_asymm2(c1, 2 * z, c2, 2 * l, c1);  // c1[2z-1] += c2 + c4
    gf2x_acc(c1, c4, 2 * r);                    // size c2 >= c1 >= c4
    gf2x_acc(c2, c3, 2 * z - 1);                // c2[2z-1] += c3

    // Recombination
    gf2x_cpy(R + 2 * l, c2, 2 * l);
    gf2x_acc(R + l, c1, 2 * z - 1);
    gf2x_acc(R + 3 * l, c3, 2 * z - 1);
 }

 void PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_mul(DIGIT *R, const DIGIT *A, const DIGIT *B, size_t n) {
    DIGIT stack[STACK_WORDS];
    gf2x_mul_tc3w(R, A, B, n, stack);
 }

--- a/crypto_kem/ledakemlt12/leaktime/gf2x_arith.h
+++ b/crypto_kem/ledakemlt12/leaktime/gf2x_arith.h
@@ -48,11 +48,16 @@ typedef uint64_t DIGIT;
 #define DIGIT_SIZE_b (DIGIT_SIZE_B << 3)
 #define POSITION_T uint32_t

 #define GF2X_MUL PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_mul_comb
 #define MIN_KAR_DIGITS      10
 #define MIN_TOOM_DIGITS     42

 #define STACK_KAR_ONLY      2433
 #define STACK_WORDS         2892

 void PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_add(DIGIT Res[], const DIGIT A[], const DIGIT B[], int nr);
 void PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_cmov(DIGIT *r, const DIGIT *a, size_t len, int c);
 void PQCLEAN_LEDAKEMLT12_LEAKTIME_right_bit_shift_n(int length, DIGIT in[], unsigned int amount);
 void PQCLEAN_LEDAKEMLT12_LEAKTIME_left_bit_shift_n(int length, DIGIT in[], unsigned int amount);
 void GF2X_MUL(int nr, DIGIT Res[], int na, const DIGIT A[], int nb, const DIGIT B[]);
 void PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_mul(DIGIT *R, const DIGIT *A, const DIGIT *B, size_t n);

 #endif
--- a/crypto_kem/ledakemlt12/leaktime/gf2x_arith_mod_xPplusOne.c
+++ b/crypto_kem/ledakemlt12/leaktime/gf2x_arith_mod_xPplusOne.c
@@ -259,9 +259,7 @@ int PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_mod_inverse(DIGIT out[], const DIGIT in[])
 void PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_mod_mul(DIGIT Res[], const DIGIT A[], const DIGIT B[]) {

    DIGIT aux[2 * NUM_DIGITS_GF2X_ELEMENT];
    GF2X_MUL(2 * NUM_DIGITS_GF2X_ELEMENT, aux,
             NUM_DIGITS_GF2X_ELEMENT, A,
             NUM_DIGITS_GF2X_ELEMENT, B);
    PQCLEAN_LEDAKEMLT12_LEAKTIME_gf2x_mul(aux, A, B, NUM_DIGITS_GF2X_ELEMENT);
    gf2x_mod(Res, aux);

 }
--- a/crypto_kem/ledakemlt32/leaktime/gf2x_arith.c
+++ b/crypto_kem/ledakemlt32/leaktime/gf2x_arith.c
@@ -8,6 +8,15 @@ void PQCLEAN_LEDAKEMLT32_LEAKTIME_gf2x_add(DIGIT Res[], const DIGIT A[], const D
    }
 }

 /* copies len digits from a to r if b == 1 */
 void PQCLEAN_LEDAKEMLT32_LEAKTIME_gf2x_cmov(DIGIT *r, const DIGIT *a, size_t len, int c) {
    size_t i;
    DIGIT mask = -(DIGIT)c;
    for (i = 0; i < len; i++) {
        r[i] ^= mask & (a[i] ^ r[i]);
    }
 }

 /* PRE: MAX ALLOWED ROTATION AMOUNT : DIGIT_SIZE_b */
 void PQCLEAN_LEDAKEMLT32_LEAKTIME_right_bit_shift_n(int length, DIGIT in[], unsigned int amount) {
    if ( amount == 0 ) {
@@ -38,9 +47,10 @@ void PQCLEAN_LEDAKEMLT32_LEAKTIME_left_bit_shift_n(int length, DIGIT in[], unsig
    in[j] <<= amount;
 }

 void PQCLEAN_LEDAKEMLT32_LEAKTIME_gf2x_mul_comb(int nr, DIGIT Res[],
        int na, const DIGIT A[],
        int nb, const DIGIT B[]) {
 static void gf2x_mul_comb(int nr, DIGIT Res[],
                          int na, const DIGIT A[],
                          int nb, const DIGIT B[]) {

    int i, j, k;
    DIGIT u, h;

@@ -71,3 +81,182 @@ void PQCLEAN_LEDAKEMLT32_LEAKTIME_gf2x_mul_comb(int nr, DIGIT Res[],
        }
    }
 }

 static void gf2x_cpy(DIGIT *R, const DIGIT *A, size_t len) {
    for (size_t i = 0; i < len; i++) {
        R[i] = A[i];
    }
 }

 /* Accumulate */
 #define gf2x_add(R, A, B, n) PQCLEAN_LEDAKEMLT32_LEAKTIME_gf2x_add(R, A, B, n)
 #define gf2x_acc(R, B, n) PQCLEAN_LEDAKEMLT32_LEAKTIME_gf2x_add(R, R, B, n)

 /* allows the operands to be of different size
 * first operand must be the bigger one.
 * aligns last array elements */
 static inline void gf2x_add_asymm(DIGIT *R,
                                  int na, const DIGIT *A,
                                  int nb, const DIGIT *B) {
    size_t delta = na - nb;
    gf2x_cpy(R, A, delta);
    PQCLEAN_LEDAKEMLT32_LEAKTIME_gf2x_add(R + delta, A + delta, B, nb);;
 }

 /* aligns first array elements */
 static inline void gf2x_add_asymm2(DIGIT *R,
                                   int na, const DIGIT *A,
                                   int nb, const DIGIT *B) {
    size_t delta = na - nb;
    PQCLEAN_LEDAKEMLT32_LEAKTIME_gf2x_add(R, A, B, nb);
    gf2x_cpy(R + nb, A + nb, delta);
 }

 /*  Karatsuba with lowered space complexity
 *  T(n) = 3 * ceil(n/2) + T(ceil(n / 2)) */
 static void gf2x_mul_kar(DIGIT *R,
                         const DIGIT *A,
                         const DIGIT *B,
                         size_t n,
                         DIGIT *stack) {

    if (n < MIN_KAR_DIGITS) {
        gf2x_mul_comb(2 * n, R, n, A, n, B);
        return;
    }

    size_t l = (n + 1) / 2; // limb size = ceil(n / 2)
    size_t d = n & 1;

    const DIGIT *a1 = A;            // length n - d
    const DIGIT *a0 = A + l - d;    // length n
    const DIGIT *b1 = B;
    const DIGIT *b0 = B + l - d;

    DIGIT *aa = stack;
    DIGIT *bb = aa + l;
    DIGIT *cc = bb + l;
    stack = cc + l; // 3l space requirement at each level

    DIGIT *c3 = R + l - 2 * d;
    DIGIT *c2 = c3 + l;
    DIGIT *c1 = c2 + l;

    gf2x_mul_kar(c2, a0, b0, l, stack);      // L in low part of R
    gf2x_mul_kar(R, a1, b1, l - d, stack);   // H in higher part of R
    gf2x_add_asymm(aa, l, a0, l - d, a1);    // AH + AL
    gf2x_add_asymm(bb, l, b0, l - d, b1);    // BH + BL
    gf2x_add(cc, c3, c2, l);                 // HL + LH in cc
    gf2x_mul_kar(c3, aa, bb, l, stack);      // M = (AH + AL) x (BH + BL)
    gf2x_add_asymm(c3, l, c3, l - 2 * d, R); // add HH
    gf2x_acc(c2, c1, l);                     // add LL
    gf2x_acc(c3, cc, l);                     // add HL + LH
    gf2x_acc(c2, cc, l);                     // add HL + LH
 }

 static void gf2x_div_w_plus_one(DIGIT *A, size_t n) {
    size_t i;
    for (i = 0; i < n - 2; i++) {
        A[i + 1] ^= A[i]; // runs n - 2 times
    }
 }

 static void gf2x_shift_left_w(DIGIT *A, size_t n) {
    size_t i;
    for (i = 0; i < n - 1; i++) {
        A[i] = A[i + 1];
    }
    A[i] = 0;
 }

 /* Word-aligned Toom-Cook 3, source:
 * Brent, Richard P., et al. "Faster multiplication in GF (2)[x]."
 * International Algorithmic Number Theory Symposium.
 * Springer, Berlin, Heidelberg, 2008. */
 static void gf2x_mul_tc3w(DIGIT *R,
                          const DIGIT *A,
                          const DIGIT *B,
                          size_t n,
                          DIGIT *stack) {

    if (n < MIN_TOOM_DIGITS) {
        gf2x_mul_kar(R, A, B, n, stack);
        return;
    }

    size_t l = (n + 2) / 3;                     // size of a0, a1, b0, b1
    size_t r = n - 2 * l;                       // remaining sizes (a2, b2)
    size_t x = 2 * l + 4;                       // size of c1, c2, c3, c4
    size_t z = r + 2 > l + 1 ? r + 2 : l + 1;   // size of c5

    const DIGIT *a0 = A;
    const DIGIT *a1 = A + l;
    const DIGIT *a2 = A + 2 * l;
    const DIGIT *b0 = B;
    const DIGIT *b1 = B + l;
    const DIGIT *b2 = B + 2 * l;

    DIGIT *c0 = R;                              // c0 and c4 in the result
    DIGIT *c4 = R + 4 * l;
    DIGIT *c1 = stack;                          // the rest in the stack
    DIGIT *c2 = c1 + x;
    DIGIT *c3 = c2 + x;
    DIGIT *c5 = c3 + x;
    stack = c5 + z;                             // Worst-case 7l + 14

    // Evaluation
    c0[0] = 0;                                  // c0[z] = a1*W + a2*W^2
    c0[l + 1] = 0;
    gf2x_cpy(c0 + 1, a1, l);
    gf2x_acc(c0 + 2, a2, r);

    c4[0] = 0;                                  // c4[z] = b1*W + b2*W^2
    c4[l + 1] = 0;
    gf2x_cpy(c4 + 1, b1, l);
    gf2x_acc(c4 + 2, b2, r);

    gf2x_cpy(c5, a0, l);                        // c5[l] = a0 + a1 + a2
    gf2x_acc(c5, a1, l);
    gf2x_acc(c5, a2, r);

    gf2x_cpy(c2, b0, l);                        // c2[l] = b0 + b1 + b2
    gf2x_acc(c2, b1, l);
    gf2x_acc(c2, b2, r);

    gf2x_mul_tc3w(c1, c2, c5, l, stack);        // c1[2l] = c2 * c5
    gf2x_add_asymm2(c5, z, c0, l, c5);          // c5[z] += c0, z >= l
    gf2x_add_asymm2(c2, z, c4, l, c2);          // c2[z] += c4, idem
    gf2x_acc(c0, a0, l);                        // c0[l] += a0
    gf2x_acc(c4, b0, l);                        // c4[l] += b0
    gf2x_mul_tc3w(c3, c2, c5, z, stack);        // c3[2z] = c2 * c5
    gf2x_mul_tc3w(c2, c0, c4, z, stack);        // c2[2z] = c0 * c4
    gf2x_mul_tc3w(c0, a0, b0, l, stack);        // c0[2l] = a0 * b0
    gf2x_mul_tc3w(c4, a2, b2, r, stack);        // c4[2r] = a2 * b2

    // Interpolation
    gf2x_acc(c3, c2, 2 * z);                    // c3[2z] += c2
    gf2x_acc(c2, c0, 2 * l);                    // c2[2z] += c0
    gf2x_shift_left_w(c2, 2 * z);               // c2[2z] = c2/y + c3
    gf2x_acc(c2, c3, 2 * z);
    gf2x_acc(c2, c4, 2 * r);                    // c2[2z] += c4 + c4**3
    gf2x_acc(c2 + 3, c4, 2 * r);
    gf2x_div_w_plus_one(c2, 2 * z);             // c2[2z-1] = c2/(W+1)
    gf2x_acc(c1, c0, 2 * l);                    // c1[2l] += c0
    gf2x_acc(c3, c1, 2 * l);                    // c3[2z] += c1
    gf2x_shift_left_w(c3, 2 * z);               // c3[2z-2] = c3/(W^2 + W)
    gf2x_div_w_plus_one(c3, 2 * z - 1);
    gf2x_add_asymm2(c1, 2 * z, c2, 2 * l, c1);  // c1[2z-1] += c2 + c4
    gf2x_acc(c1, c4, 2 * r);                    // size c2 >= c1 >= c4
    gf2x_acc(c2, c3, 2 * z - 1);                // c2[2z-1] += c3

    // Recombination
    gf2x_cpy(R + 2 * l, c2, 2 * l);
    gf2x_acc(R + l, c1, 2 * z - 1);
    gf2x_acc(R + 3 * l, c3, 2 * z - 1);
 }

 void PQCLEAN_LEDAKEMLT32_LEAKTIME_gf2x_mul(DIGIT *R, const DIGIT *A, const DIGIT *B, size_t n) {
    DIGIT stack[STACK_WORDS];
    gf2x_mul_tc3w(R, A, B, n, stack);
 }

--- a/crypto_kem/ledakemlt32/leaktime/gf2x_arith.h
+++ b/crypto_kem/ledakemlt32/leaktime/gf2x_arith.h
@@ -48,11 +48,16 @@ typedef uint64_t DIGIT;
 #define DIGIT_SIZE_b (DIGIT_SIZE_B << 3)
 #define POSITION_T uint32_t

 #define GF2X_MUL PQCLEAN_LEDAKEMLT32_LEAKTIME_gf2x_mul_comb
 #define MIN_KAR_DIGITS      10
 #define MIN_TOOM_DIGITS     42

 #define STACK_KAR_ONLY      4497
 #define STACK_WORDS         5336

 void PQCLEAN_LEDAKEMLT32_LEAKTIME_gf2x_add(DIGIT Res[], const DIGIT A[], const DIGIT B[], int nr);
 void PQCLEAN_LEDAKEMLT32_LEAKTIME_gf2x_cmov(DIGIT *r, const DIGIT *a, size_t len, int c);
 void PQCLEAN_LEDAKEMLT32_LEAKTIME_right_bit_shift_n(int length, DIGIT in[], unsigned int amount);
 void PQCLEAN_LEDAKEMLT32_LEAKTIME_left_bit_shift_n(int length, DIGIT in[], unsigned int amount);
 void GF2X_MUL(int nr, DIGIT Res[], int na, const DIGIT A[], int nb, const DIGIT B[]);
 void PQCLEAN_LEDAKEMLT32_LEAKTIME_gf2x_mul(DIGIT *R, const DIGIT *A, const DIGIT *B, size_t n);

 #endif
--- a/crypto_kem/ledakemlt32/leaktime/gf2x_arith_mod_xPplusOne.c
+++ b/crypto_kem/ledakemlt32/leaktime/gf2x_arith_mod_xPplusOne.c
@@ -257,9 +257,7 @@ int PQCLEAN_LEDAKEMLT32_LEAKTIME_gf2x_mod_inverse(DIGIT out[], const DIGIT in[])
 void PQCLEAN_LEDAKEMLT32_LEAKTIME_gf2x_mod_mul(DIGIT Res[], const DIGIT A[], const DIGIT B[]) {

    DIGIT aux[2 * NUM_DIGITS_GF2X_ELEMENT];
    GF2X_MUL(2 * NUM_DIGITS_GF2X_ELEMENT, aux,
             NUM_DIGITS_GF2X_ELEMENT, A,
             NUM_DIGITS_GF2X_ELEMENT, B);
    PQCLEAN_LEDAKEMLT32_LEAKTIME_gf2x_mul(aux, A, B, NUM_DIGITS_GF2X_ELEMENT);
    gf2x_mod(Res, aux);

 }
--- a/crypto_kem/ledakemlt52/leaktime/gf2x_arith.c
+++ b/crypto_kem/ledakemlt52/leaktime/gf2x_arith.c
@@ -8,6 +8,15 @@ void PQCLEAN_LEDAKEMLT52_LEAKTIME_gf2x_add(DIGIT Res[], const DIGIT A[], const D
    }
 }

 /* copies len digits from a to r if b == 1 */
 void PQCLEAN_LEDAKEMLT52_LEAKTIME_gf2x_cmov(DIGIT *r, const DIGIT *a, size_t len, int c) {
    size_t i;
    DIGIT mask = -(DIGIT)c;
    for (i = 0; i < len; i++) {
        r[i] ^= mask & (a[i] ^ r[i]);
    }
 }

 /* PRE: MAX ALLOWED ROTATION AMOUNT : DIGIT_SIZE_b */
 void PQCLEAN_LEDAKEMLT52_LEAKTIME_right_bit_shift_n(int length, DIGIT in[], unsigned int amount) {
    if ( amount == 0 ) {
@@ -38,9 +47,10 @@ void PQCLEAN_LEDAKEMLT52_LEAKTIME_left_bit_shift_n(int length, DIGIT in[], unsig
    in[j] <<= amount;
 }

 void PQCLEAN_LEDAKEMLT52_LEAKTIME_gf2x_mul_comb(int nr, DIGIT Res[],
        int na, const DIGIT A[],
        int nb, const DIGIT B[]) {
 static void gf2x_mul_comb(int nr, DIGIT Res[],
                          int na, const DIGIT A[],
                          int nb, const DIGIT B[]) {

    int i, j, k;
    DIGIT u, h;

@@ -71,3 +81,182 @@ void PQCLEAN_LEDAKEMLT52_LEAKTIME_gf2x_mul_comb(int nr, DIGIT Res[],
        }
    }
 }

 static void gf2x_cpy(DIGIT *R, const DIGIT *A, size_t len) {
    for (size_t i = 0; i < len; i++) {
        R[i] = A[i];
    }
 }

 /* Accumulate */
 #define gf2x_add(R, A, B, n) PQCLEAN_LEDAKEMLT52_LEAKTIME_gf2x_add(R, A, B, n)
 #define gf2x_acc(R, B, n) PQCLEAN_LEDAKEMLT52_LEAKTIME_gf2x_add(R, R, B, n)

 /* allows the operands to be of different size
 * first operand must be the bigger one.
 * aligns last array elements */
 static inline void gf2x_add_asymm(DIGIT *R,
                                  int na, const DIGIT *A,
                                  int nb, const DIGIT *B) {
    size_t delta = na - nb;
    gf2x_cpy(R, A, delta);
    PQCLEAN_LEDAKEMLT52_LEAKTIME_gf2x_add(R + delta, A + delta, B, nb);;
 }

 /* aligns first array elements */
 static inline void gf2x_add_asymm2(DIGIT *R,
                                   int na, const DIGIT *A,
                                   int nb, const DIGIT *B) {
    size_t delta = na - nb;
    PQCLEAN_LEDAKEMLT52_LEAKTIME_gf2x_add(R, A, B, nb);
    gf2x_cpy(R + nb, A + nb, delta);
 }

 /*  Karatsuba with lowered space complexity
 *  T(n) = 3 * ceil(n/2) + T(ceil(n / 2)) */
 static void gf2x_mul_kar(DIGIT *R,
                         const DIGIT *A,
                         const DIGIT *B,
                         size_t n,
                         DIGIT *stack) {

    if (n < MIN_KAR_DIGITS) {
        gf2x_mul_comb(2 * n, R, n, A, n, B);
        return;
    }

    size_t l = (n + 1) / 2; // limb size = ceil(n / 2)
    size_t d = n & 1;

    const DIGIT *a1 = A;            // length n - d
    const DIGIT *a0 = A + l - d;    // length n
    const DIGIT *b1 = B;
    const DIGIT *b0 = B + l - d;

    DIGIT *aa = stack;
    DIGIT *bb = aa + l;
    DIGIT *cc = bb + l;
    stack = cc + l; // 3l space requirement at each level

    DIGIT *c3 = R + l - 2 * d;
    DIGIT *c2 = c3 + l;
    DIGIT *c1 = c2 + l;

    gf2x_mul_kar(c2, a0, b0, l, stack);      // L in low part of R
    gf2x_mul_kar(R, a1, b1, l - d, stack);   // H in higher part of R
    gf2x_add_asymm(aa, l, a0, l - d, a1);    // AH + AL
    gf2x_add_asymm(bb, l, b0, l - d, b1);    // BH + BL
    gf2x_add(cc, c3, c2, l);                 // HL + LH in cc
    gf2x_mul_kar(c3, aa, bb, l, stack);      // M = (AH + AL) x (BH + BL)
    gf2x_add_asymm(c3, l, c3, l - 2 * d, R); // add HH
    gf2x_acc(c2, c1, l);                     // add LL
    gf2x_acc(c3, cc, l);                     // add HL + LH
    gf2x_acc(c2, cc, l);                     // add HL + LH
 }

 static void gf2x_div_w_plus_one(DIGIT *A, size_t n) {
    size_t i;
    for (i = 0; i < n - 2; i++) {
        A[i + 1] ^= A[i]; // runs n - 2 times
    }
 }

 static void gf2x_shift_left_w(DIGIT *A, size_t n) {
    size_t i;
    for (i = 0; i < n - 1; i++) {
        A[i] = A[i + 1];
    }
    A[i] = 0;
 }

 /* Word-aligned Toom-Cook 3, source:
 * Brent, Richard P., et al. "Faster multiplication in GF (2)[x]."
 * International Algorithmic Number Theory Symposium.
 * Springer, Berlin, Heidelberg, 2008. */
 static void gf2x_mul_tc3w(DIGIT *R,
                          const DIGIT *A,
                          const DIGIT *B,
                          size_t n,
                          DIGIT *stack) {

    if (n < MIN_TOOM_DIGITS) {
        gf2x_mul_kar(R, A, B, n, stack);
        return;
    }

    size_t l = (n + 2) / 3;                     // size of a0, a1, b0, b1
    size_t r = n - 2 * l;                       // remaining sizes (a2, b2)
    size_t x = 2 * l + 4;                       // size of c1, c2, c3, c4
    size_t z = r + 2 > l + 1 ? r + 2 : l + 1;   // size of c5

    const DIGIT *a0 = A;
    const DIGIT *a1 = A + l;
    const DIGIT *a2 = A + 2 * l;
    const DIGIT *b0 = B;
    const DIGIT *b1 = B + l;
    const DIGIT *b2 = B + 2 * l;

    DIGIT *c0 = R;                              // c0 and c4 in the result
    DIGIT *c4 = R + 4 * l;
    DIGIT *c1 = stack;                          // the rest in the stack
    DIGIT *c2 = c1 + x;
    DIGIT *c3 = c2 + x;
    DIGIT *c5 = c3 + x;
    stack = c5 + z;                             // Worst-case 7l + 14

    // Evaluation
    c0[0] = 0;                                  // c0[z] = a1*W + a2*W^2
    c0[l + 1] = 0;
    gf2x_cpy(c0 + 1, a1, l);
    gf2x_acc(c0 + 2, a2, r);

    c4[0] = 0;                                  // c4[z] = b1*W + b2*W^2
    c4[l + 1] = 0;
    gf2x_cpy(c4 + 1, b1, l);
    gf2x_acc(c4 + 2, b2, r);

    gf2x_cpy(c5, a0, l);                        // c5[l] = a0 + a1 + a2
    gf2x_acc(c5, a1, l);
    gf2x_acc(c5, a2, r);

    gf2x_cpy(c2, b0, l);                        // c2[l] = b0 + b1 + b2
    gf2x_acc(c2, b1, l);
    gf2x_acc(c2, b2, r);

    gf2x_mul_tc3w(c1, c2, c5, l, stack);        // c1[2l] = c2 * c5
    gf2x_add_asymm2(c5, z, c0, l, c5);          // c5[z] += c0, z >= l
    gf2x_add_asymm2(c2, z, c4, l, c2);          // c2[z] += c4, idem
    gf2x_acc(c0, a0, l);                        // c0[l] += a0
    gf2x_acc(c4, b0, l);                        // c4[l] += b0
    gf2x_mul_tc3w(c3, c2, c5, z, stack);        // c3[2z] = c2 * c5
    gf2x_mul_tc3w(c2, c0, c4, z, stack);        // c2[2z] = c0 * c4
    gf2x_mul_tc3w(c0, a0, b0, l, stack);        // c0[2l] = a0 * b0
    gf2x_mul_tc3w(c4, a2, b2, r, stack);        // c4[2r] = a2 * b2

    // Interpolation
    gf2x_acc(c3, c2, 2 * z);                    // c3[2z] += c2
    gf2x_acc(c2, c0, 2 * l);                    // c2[2z] += c0
    gf2x_shift_left_w(c2, 2 * z);               // c2[2z] = c2/y + c3
    gf2x_acc(c2, c3, 2 * z);
    gf2x_acc(c2, c4, 2 * r);                    // c2[2z] += c4 + c4**3
    gf2x_acc(c2 + 3, c4, 2 * r);
    gf2x_div_w_plus_one(c2, 2 * z);             // c2[2z-1] = c2/(W+1)
    gf2x_acc(c1, c0, 2 * l);                    // c1[2l] += c0
    gf2x_acc(c3, c1, 2 * l);                    // c3[2z] += c1
    gf2x_shift_left_w(c3, 2 * z);               // c3[2z-2] = c3/(W^2 + W)
    gf2x_div_w_plus_one(c3, 2 * z - 1);
    gf2x_add_asymm2(c1, 2 * z, c2, 2 * l, c1);  // c1[2z-1] += c2 + c4
    gf2x_acc(c1, c4, 2 * r);                    // size c2 >= c1 >= c4
    gf2x_acc(c2, c3, 2 * z - 1);                // c2[2z-1] += c3

    // Recombination
    gf2x_cpy(R + 2 * l, c2, 2 * l);
    gf2x_acc(R + l, c1, 2 * z - 1);
    gf2x_acc(R + 3 * l, c3, 2 * z - 1);
 }

 void PQCLEAN_LEDAKEMLT52_LEAKTIME_gf2x_mul(DIGIT *R, const DIGIT *A, const DIGIT *B, size_t n) {
    DIGIT stack[STACK_WORDS];
    gf2x_mul_tc3w(R, A, B, n, stack);
 }

--- a/crypto_kem/ledakemlt52/leaktime/gf2x_arith.h
+++ b/crypto_kem/ledakemlt52/leaktime/gf2x_arith.h
@@ -48,11 +48,16 @@ typedef uint64_t DIGIT;
 #define DIGIT_SIZE_b (DIGIT_SIZE_B << 3)
 #define POSITION_T uint32_t

 #define GF2X_MUL PQCLEAN_LEDAKEMLT52_LEAKTIME_gf2x_mul_comb
 #define MIN_KAR_DIGITS      10
 #define MIN_TOOM_DIGITS     42

 #define STACK_KAR_ONLY      7137
 #define STACK_WORDS         8401

 void PQCLEAN_LEDAKEMLT52_LEAKTIME_gf2x_add(DIGIT Res[], const DIGIT A[], const DIGIT B[], int nr);
 void PQCLEAN_LEDAKEMLT52_LEAKTIME_gf2x_cmov(DIGIT *r, const DIGIT *a, size_t len, int c);
 void PQCLEAN_LEDAKEMLT52_LEAKTIME_right_bit_shift_n(int length, DIGIT in[], unsigned int amount);
 void PQCLEAN_LEDAKEMLT52_LEAKTIME_left_bit_shift_n(int length, DIGIT in[], unsigned int amount);
 void GF2X_MUL(int nr, DIGIT Res[], int na, const DIGIT A[], int nb, const DIGIT B[]);
 void PQCLEAN_LEDAKEMLT52_LEAKTIME_gf2x_mul(DIGIT *R, const DIGIT *A, const DIGIT *B, size_t n);

 #endif
--- a/crypto_kem/ledakemlt52/leaktime/gf2x_arith_mod_xPplusOne.c
+++ b/crypto_kem/ledakemlt52/leaktime/gf2x_arith_mod_xPplusOne.c
@@ -257,9 +257,7 @@ int PQCLEAN_LEDAKEMLT52_LEAKTIME_gf2x_mod_inverse(DIGIT out[], const DIGIT in[])
 void PQCLEAN_LEDAKEMLT52_LEAKTIME_gf2x_mod_mul(DIGIT Res[], const DIGIT A[], const DIGIT B[]) {

    DIGIT aux[2 * NUM_DIGITS_GF2X_ELEMENT];
    GF2X_MUL(2 * NUM_DIGITS_GF2X_ELEMENT, aux,
             NUM_DIGITS_GF2X_ELEMENT, A,
             NUM_DIGITS_GF2X_ELEMENT, B);
    PQCLEAN_LEDAKEMLT52_LEAKTIME_gf2x_mul(aux, A, B, NUM_DIGITS_GF2X_ELEMENT);
    gf2x_mod(Res, aux);

 }
--- a/test/duplicate_consistency/ledakemlt12_leaktime.yml
+++ b/test/duplicate_consistency/ledakemlt12_leaktime.yml
@@ -7,7 +7,6 @@ consistency_checks:
    - dfr_test.c
    - dfr_test.h
    - gf2x_arith.c
    - gf2x_arith.h
    - H_Q_matrices_generation.c
    - H_Q_matrices_generation.h
    - kem.c
@@ -23,7 +22,6 @@ consistency_checks:
    - dfr_test.c
    - dfr_test.h
    - gf2x_arith.c
    - gf2x_arith.h
    - H_Q_matrices_generation.c
    - H_Q_matrices_generation.h
    - kem.c
--- a/test/duplicate_consistency/ledakemlt32_leaktime.yml
+++ b/test/duplicate_consistency/ledakemlt32_leaktime.yml
@@ -7,7 +7,6 @@ consistency_checks:
    - dfr_test.c
    - dfr_test.h
    - gf2x_arith.c
    - gf2x_arith.h
    - H_Q_matrices_generation.c
    - H_Q_matrices_generation.h
    - kem.c
@@ -23,7 +22,6 @@ consistency_checks:
    - dfr_test.c
    - dfr_test.h
    - gf2x_arith.c
    - gf2x_arith.h
    - H_Q_matrices_generation.c
    - H_Q_matrices_generation.h
    - kem.c
--- a/test/duplicate_consistency/ledakemlt52_leaktime.yml
+++ b/test/duplicate_consistency/ledakemlt52_leaktime.yml
@@ -7,7 +7,6 @@ consistency_checks:
    - dfr_test.c
    - dfr_test.h
    - gf2x_arith.c
    - gf2x_arith.h
    - H_Q_matrices_generation.c
    - H_Q_matrices_generation.h
    - kem.c
@@ -22,7 +21,6 @@ consistency_checks:
    - dfr_test.c
    - dfr_test.h
    - gf2x_arith.c
    - gf2x_arith.h
    - gf2x_arith_mod_xPplusOne.c
    - H_Q_matrices_generation.c
    - H_Q_matrices_generation.h