#include <stdint.h>
#include <string.h>

#define NWORDS_FIELD 15
#define MAX_INT_POINTS_ALICE 8
#define MAX_INT_POINTS_BOB 10

#define ALICE 0
#define BOB 1
#define OALICE_BITS 372
#define OBOB_BITS 379

#define MAX_Alice 186
#define MAX_Bob 239

#define NBITS_FIELD 751
#define MAXBITS_FIELD 768
#define FP2_ENCODED_BYTES 2 * ((NBITS_FIELD + 7) / 8)

typedef uint64_t felm_t[NWORDS_FIELD];
typedef felm_t f2elm_t[2];

typedef struct
{
    f2elm_t X;
    f2elm_t Z;
} point_proj; // Point representation in projective XZ Montgomery coordinates.

typedef point_proj point_proj_t[1];

const uint64_t A_gen_ifma[5 * NWORDS_FIELD] = {
    0x000ceab50ad8bc0d, 0x0005e457b1c2fc08, 0x000cd6e1d7d710f5, 0x000ae8738d92953d, 0x000a7ebee8a3418a, 0x0008345f03f46fba, 0x0007cfe2616c9a28, 0x000b4be50c8b9e16, 0x00039b6799643b2e, 0x000597a7ff9d56d5, 0x00021d410d97fe0a, 0x000a4a92a8f2ad52, 0x00054508e42abde4, 0x000ebf7d0178c137, 0x00000000004a0a75,
    0x000d21582e4118ad, 0x0005df400ae6cc41, 0x000aec407c2ecb7c, 0x000de8e34b521432, 0x000761e2ab085167, 0x000bcaa6094b3c50, 0x000df9ddd71032cf, 0x00057d905265605f, 0x000f7dba2681f9d7, 0x0009e9732def416c, 0x0006f77956ce00ce, 0x000576fb3094772b, 0x000b2d166e2a949f, 0x0002f665c6588ea2, 0x0000000000337a25,
    0x00026279148626cd, 0x0006b5baead56fe5, 0x000ab911fad60dc9, 0x000401e137d0bf07, 0x0004d3e925216196, 0x0005e4cd09a33740, 0x00069e4af733c538, 0x000d1169f6821367, 0x000c64ecfc721111, 0x000ba56507cd0dc7, 0x000995e4ae04dfad, 0x0007b992deeceab8, 0x0007bccd256aff1e, 0x000207f5fde1824c, 0x0000000000345cc7,
    0x00041dffd19b3e7f, 0x000b48c18e0bb844, 0x000380584b4dea99, 0x0000692de648ad31, 0x000d72761b6dfaee, 0x0005c672c3058de6, 0x000cba26fdc22397, 0x000e15f9133d4bc3, 0x000d5ae123793466, 0x000bb494276e321d, 0x000c9c99fb74cd99, 0x0005da6e4fd03f75, 0x000b95feb24d0937, 0x000e6a307e03cd17, 0x000000000044ad2e,
    0x0007f1ec71be8c36, 0x00053859b1ed78c1, 0x000529ff824d6df7, 0x000633a10839b2a8, 0x00003e9e25fdea79, 0x000a8054df1762fc, 0x000034c6467c4708, 0x000acb63530b60ec, 0x0000c6fc8c19bf71, 0x0005aca92467c3cb, 0x000d42050ba154a2, 0x000b4d5baa4ab074, 0x00044ba4962ac622, 0x0002bbf250aa70e6, 0x0000000000457f51};

const uint64_t B_gen_ifma[5 * NWORDS_FIELD] = {
    0x0001ef867ab0bcb9, 0x0009a45c76cfb6d7, 0x0001f034a5fdd76e, 0x000038b1ee69194b, 0x000e7b18a7761f3f, 0x000a486a52c84cf6, 0x0005aa75466fcf01, 0x00044164f797233f, 0x000331aeaec77db1, 0x0005185f83d9a22f, 0x000e2d4dc94f5b17, 0x0000f7b3858b15a4, 0x000635ac44515c99, 0x000a5b14eaf4ee2e, 0x000000000048e907,
    0x0004e7c075cc3a24, 0x00004aa430a49203, 0x00094c8677baf00b, 0x000b3aae0c9a755c, 0x000c4b064e9ebb08, 0x000dd04e826c661d, 0x00061f01b223684e, 0x000d43bc8a6360b6, 0x00008c633a79ab30, 0x0008e0092fbd6f39, 0x0002b9ba797337f8, 0x000fcb3252ddaf84, 0x000467ded2ca9dce, 0x0006117350e479f4, 0x00000000001ae9d1,
    0x000ed7b96c4ab279, 0x000178486ef1a8c9, 0x000c2f4299429da5, 0x000aef4926f20cd5, 0x0003b2e2858b4716, 0x000bcc3cac3eeb68, 0x0003a600460dda2f, 0x00050e6650a24c9f, 0x0004cb60c61775f8, 0x00082b196ebc78b3, 0x000cc7fec8cce966, 0x000d9b778d801d65, 0x0005324630f74af3, 0x0009018193e7592e, 0x00000000003aef05,
    0x00033769d0f314ef, 0x000e2659d11c0d67, 0x000d133f084c3086, 0x0005e23d5da27bcb, 0x0008ec9a8d586402, 0x000c781b3b645bf3, 0x000c9fb03ee6426d, 0x000ddc7bb40b83e3, 0x000bb7b4ab585e3a, 0x0006c2672e53eeaf, 0x0000397a1e62b655, 0x0004ac383daab923, 0x0008eb1ecdd2f39e, 0x000f1516da469247, 0x00000000003693cf,
    0x0007d8f72bd956dc, 0x000e9934884ae37e, 0x0003c3edd2d504b3, 0x00005d14e7fa1ecb, 0x0007610ceb75d635, 0x000b4cac446b1112, 0x000c1f70caf255b4, 0x00057d3e324d2f36, 0x0006181c3bb1a700, 0x000db2f2916ccc40, 0x00021ee51d1c92f1, 0x000c07c22031c32a, 0x000e4310e5103473, 0x00069c1148de9ef5, 0x00000000004d1227};

const uint64_t One[NWORDS_FIELD] = {
    0x00000000249ad67c, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0001f9800c542c00, 0x000b326488fe3b2a, 0x000e6176236db777, 0x000dd6e970232b83, 0x000d4d762277573f, 0x00054cd16c015f35, 0x0009fc72438c4fc7, 0x00000000001bf8f6};

const uint64_t Two[NWORDS_FIELD] = {
    0x000000004935acf8, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0003f30018a85800, 0x000664c911fc7654, 0x000cc2ec46db6eef, 0x000badd2e0465707, 0x000a9aec44eeae7f, 0x000a99a2d802be6b, 0x0003f8e487189f8e, 0x000000000037f1ed};

// Fixed parameters for isogeny tree computation
extern const unsigned int strat_Alice[MAX_Alice - 1];
extern const unsigned int strat_Bob[MAX_Bob - 1];

void norm2red(uint64_t *res, const uint64_t *a);
void red2norm(uint64_t out[12], const uint64_t in[15])
{
    out[0] = in[0] ^ in[1] << 52;

    out[1] = in[1] >> 12 ^ in[2] << 40;
    out[2] = in[2] >> 24 ^ in[3] << 28;
    out[3] = in[3] >> 36 ^ in[4] << 16;
    out[4] = in[4] >> 48 ^ in[5] << 4 ^ in[6] << 56;

    out[5] = in[6] >> 8 ^ in[7] << 44;
    out[6] = in[7] >> 20 ^ in[8] << 32;
    out[7] = in[8] >> 32 ^ in[9] << 20;
    out[8] = in[9] >> 44 ^ in[10] << 8 ^ in[11] << 60;

    out[9] = in[11] >> 4 ^ in[12] << 48;
    out[10] = in[12] >> 16 ^ in[13] << 36;
    out[11] = in[13] >> 28 ^ in[14] << 24;
}

static void init_basis(const uint64_t *gen, f2elm_t XP, f2elm_t XQ, f2elm_t XR)
{ // Initialization of basis points

    memcpy(XP[0], &gen[0 * NWORDS_FIELD], sizeof(felm_t));
    memcpy(XP[1], &gen[1 * NWORDS_FIELD], sizeof(felm_t));

    memcpy(XQ[0], &gen[2 * NWORDS_FIELD], sizeof(felm_t));
    memset(XQ[1], 0, sizeof(felm_t));

    memcpy(XR[0], &gen[3 * NWORDS_FIELD], sizeof(felm_t));
    memcpy(XR[1], &gen[4 * NWORDS_FIELD], sizeof(felm_t));
}

void fp2_mul_ifma(f2elm_t res, const f2elm_t a, const f2elm_t b);
void fp2_mul_ifma_x2(f2elm_t res1, const f2elm_t a1, const f2elm_t b1, f2elm_t res2, const f2elm_t a2, const f2elm_t b2);
void fp2_sqr_ifma(f2elm_t res, const f2elm_t a);
void fp2_add(f2elm_t res, const f2elm_t a, const f2elm_t b);
void fp2_sub(f2elm_t res, const f2elm_t a, const f2elm_t b);

void fp2_swap(point_proj_t a, point_proj_t b, int swap);

void fp_mul_ifma(felm_t res, felm_t a, felm_t b);
void fp_add(felm_t res, const felm_t a, const felm_t b);
void fp_sub(felm_t res, const felm_t a, const felm_t b);

void to_mont_ifma(felm_t rp, const felm_t ap);
void from_mont_ifma(felm_t rp, const felm_t ap);

void red2norm(uint64_t out[12], const felm_t in);

#define fp2mul_mont(a, b, r) fp2_mul_ifma(r, a, b)
#define fp2sqr_mont(a, r) fp2_sqr_ifma(r, a)
#define fp2add(a, b, r) fp2_add(r, a, b)
#define fp2sub(a, b, r) fp2_sub(r, a, b)
#define fp2correction

#define fpsqr_mont(a, r) fp_mul_ifma(r, a, a)
#define fpmul_mont(a, b, r) fp_mul_ifma(r, a, b)

#define fpadd(a, b, r) fp_add(r, a, b)
#define fpsub(a, b, r) fp_sub(r, a, b)

void fpinv_chain_mont(felm_t a)
{ // Chain to compute a^(p-3)/4 using Montgomery arithmetic.
    unsigned int i, j;
    felm_t t[27], tt;

    // Precomputed table
    fpsqr_mont(a, tt);
    fpmul_mont(a, tt, t[0]);
    fpmul_mont(t[0], tt, t[1]);
    fpmul_mont(t[1], tt, t[2]);
    fpmul_mont(t[2], tt, t[3]);
    fpmul_mont(t[3], tt, t[3]);
    for (i = 3; i <= 8; i++)
        fpmul_mont(t[i], tt, t[i + 1]);
    fpmul_mont(t[9], tt, t[9]);
    for (i = 9; i <= 20; i++)
        fpmul_mont(t[i], tt, t[i + 1]);
    fpmul_mont(t[21], tt, t[21]);
    for (i = 21; i <= 24; i++)
        fpmul_mont(t[i], tt, t[i + 1]);
    fpmul_mont(t[25], tt, t[25]);
    fpmul_mont(t[25], tt, t[26]);

    memcpy(tt, a, sizeof(felm_t));
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[20], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[24], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[11], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[8], tt, tt);
    for (i = 0; i < 8; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[2], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[23], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[2], tt, tt);
    for (i = 0; i < 9; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[2], tt, tt);
    for (i = 0; i < 10; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[15], tt, tt);
    for (i = 0; i < 8; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[13], tt, tt);
    for (i = 0; i < 8; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[26], tt, tt);
    for (i = 0; i < 8; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[20], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[11], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[10], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[14], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[4], tt, tt);
    for (i = 0; i < 10; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[18], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[1], tt, tt);
    for (i = 0; i < 7; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[22], tt, tt);
    for (i = 0; i < 10; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[6], tt, tt);
    for (i = 0; i < 7; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[24], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[9], tt, tt);
    for (i = 0; i < 8; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[18], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[17], tt, tt);
    for (i = 0; i < 8; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(a, tt, tt);
    for (i = 0; i < 10; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[16], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[7], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[0], tt, tt);
    for (i = 0; i < 7; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[12], tt, tt);
    for (i = 0; i < 7; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[19], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[22], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[25], tt, tt);
    for (i = 0; i < 7; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[2], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[10], tt, tt);
    for (i = 0; i < 7; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[22], tt, tt);
    for (i = 0; i < 8; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[18], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[4], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[14], tt, tt);
    for (i = 0; i < 7; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[13], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[5], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[23], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[21], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[2], tt, tt);
    for (i = 0; i < 7; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[23], tt, tt);
    for (i = 0; i < 8; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[12], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[9], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[3], tt, tt);
    for (i = 0; i < 7; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[13], tt, tt);
    for (i = 0; i < 7; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[17], tt, tt);
    for (i = 0; i < 8; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[26], tt, tt);
    for (i = 0; i < 8; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[5], tt, tt);
    for (i = 0; i < 8; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[8], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[2], tt, tt);
    for (i = 0; i < 6; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[11], tt, tt);
    for (i = 0; i < 7; i++)
        fpsqr_mont(tt, tt);
    fpmul_mont(t[20], tt, tt);
    for (j = 0; j < 61; j++)
    {
        for (i = 0; i < 6; i++)
            fpsqr_mont(tt, tt);
        fpmul_mont(t[26], tt, tt);
    }
    memcpy(a, tt, sizeof(felm_t));
}

void fpinv_mont(felm_t a)
{ // Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p.
    felm_t tt;
    memcpy(tt, a, sizeof(felm_t));
    fpinv_chain_mont(tt);
    fpsqr_mont(tt, tt);
    fpsqr_mont(tt, tt);
    fpmul_mont(a, tt, a);
}

void fp2inv_mont(f2elm_t a)
{ // GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2).
    f2elm_t t1;
    felm_t zero = {0};
    fpsqr_mont(a[0], t1[0]);    // t10 = a0^2
    fpsqr_mont(a[1], t1[1]);    // t11 = a1^2
    fpadd(t1[0], t1[1], t1[0]); // t10 = a0^2+a1^2
    fpinv_mont(t1[0]);          // t10 = (a0^2+a1^2)^-1
    fp_sub(a[1], zero, a[1]);   // a = a0-i*a1
    fpmul_mont(a[0], t1[0], a[0]);
    fpmul_mont(a[1], t1[0], a[1]); // a = (a0-i*a1)*(a0^2+a1^2)^-1
}

void inv_3_way_ifma(f2elm_t z1, f2elm_t z2, f2elm_t z3)
{ // 3-way simultaneous inversion
    // Input:  z1,z2,z3
    // Output: 1/z1,1/z2,1/z3 (override inputs).
    f2elm_t t0, t1, t2, t3;

    fp2mul_mont(z1, z2, t0); // t0 = z1*z2
    fp2mul_mont(z3, t0, t1); // t1 = z1*z2*z3
    fp2inv_mont(t1);         // t1 = 1/(z1*z2*z3)
    fp2mul_mont(z3, t1, t2); // t2 = 1/(z1*z2)
    fp2_mul_ifma_x2(t3, t2, z2, z2, t2, z1);
    //fp2mul_mont(t2, z2, t3); // t3 = 1/z1
    //fp2mul_mont(t2, z1, z2); // z2 = 1/z2
    fp2mul_mont(t0, t1, z3); // z3 = 1/z3
    memcpy(z1, t3, sizeof(f2elm_t));
}

void xDBLADD_ifma(point_proj_t P, point_proj_t Q, const f2elm_t xPQ, const f2elm_t A24)
{ // Simultaneous doubling and differential addition.
    // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4.
    // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP.
    f2elm_t t0, t1, t2, t3;

    fp2add(P->X, P->Z, t0); // t0 = XP+ZP
    fp2sub(P->X, P->Z, t1); // t1 = XP-ZP

    fp2_mul_ifma_x2(P->X, t0, t0, P->Z, t1, t1);
    //fp2sqr_mont(t0, P->X); // XP = (XP+ZP)^2
    //fp2sqr_mont(t1, P->Z); // ZP = (XP-ZP)^2

    fp2add(Q->X, Q->Z, t2); // XQ = XQ+ZQ
    fp2sub(Q->X, Q->Z, t3); // t2 = XQ-ZQ

    fp2_mul_ifma_x2(t1, t1, t2, t0, t0, t3);
    //fp2mul_mont(t2, t1, t1); // t1 = (XP-ZP)*(XQ+ZQ)
    //fp2mul_mont(t3, t0, t0); // t0 = (XP+ZP)*(XQ-ZQ)

    fp2sub(P->X, P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2
    fp2sub(t0, t1, Q->Z);   // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)

    fp2_mul_ifma_x2(P->X, P->X, P->Z, Q->X, A24, t2);
    //fp2mul_mont(P->X, P->Z, P->X); // XP = (XP+ZP)^2*(XP-ZP)^2
    //fp2mul_mont(t2, A24, Q->X);    // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2]

    fp2add(Q->X, P->Z, P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2
    fp2add(t0, t1, Q->X);     // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)

    fp2_mul_ifma_x2(Q->Z, Q->Z, Q->Z, Q->X, Q->X, Q->X);
    //fp2sqr_mont(Q->Z, Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
    //fp2sqr_mont(Q->X, Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2

    fp2_mul_ifma_x2(P->Z, P->Z, t2, Q->Z, Q->Z, xPQ);
    //fp2mul_mont(P->Z, t2, P->Z);  // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2]
    //fp2mul_mont(Q->Z, xPQ, Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2
}

static void LADDER3PT_ifma(const f2elm_t xP, const f2elm_t xQ, const f2elm_t xPQ, const uint64_t *m, const unsigned int AliceOrBob, point_proj_t R)
{
    point_proj_t R0 = {0}, R2 = {0};
    const f2elm_t A24 = {
        {0x00000000124d6b3e, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000fcc0062a1600, 0x000d9932447f1d95, 0x000f30bb11b6dbbb, 0x000eeb74b81195c1, 0x000ea6bb113bab9f, 0x000aa668b600af9a, 0x0004fe3921c627e3, 0x00000000000dfc7b},
        {0}};

    uint64_t mask;
    int i, nbits, bit, swap, prevbit = 0;

    if (AliceOrBob == ALICE)
    {
        nbits = OALICE_BITS;
    }
    else
    {
        nbits = OBOB_BITS;
    }

    // Initializing points
    memcpy(R0->X, xQ, sizeof(f2elm_t));
    memcpy(R0->Z[0], One, sizeof(felm_t));

    memcpy(R2->X, xPQ, sizeof(f2elm_t));
    memcpy(R2->Z[0], One, sizeof(felm_t));

    memcpy(R->X, xP, sizeof(f2elm_t));
    memcpy(R->Z[0], One, sizeof(felm_t));
    memset(R->Z[1], 0, sizeof(felm_t));

    // Main loop
    for (i = 0; i < nbits; i++)
    {
        bit = (m[i >> 6] >> (i & (64 - 1))) & 1;
        swap = bit ^ prevbit;
        prevbit = bit;
        fp2_swap(R, R2, swap);

        xDBLADD_ifma(R0, R2, R->X, A24);
        fp2_mul_ifma(R2->X, R->Z, R2->X);
    }
}

static void xDBL_ifma(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24)
{ // Doubling of a Montgomery point in projective coordinates (X:Z).
    // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C.
    // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2).
    f2elm_t t0, t1, t2;

    fp2sub(P->X, P->Z, t0); // t0 = X1-Z1
    fp2add(P->X, P->Z, t1); // t1 = X1+Z1

    fp2_mul_ifma_x2(t0, t0, t0, t1, t1, t1);
    //fp2sqr_mont(t0, t0); // t0 = (X1-Z1)^2
    //fp2sqr_mont(t1, t1); // t1 = (X1+Z1)^2

    fp2sub(t1, t0, t2); // t1 = (X1+Z1)^2-(X1-Z1)^2

    fp2_mul_ifma_x2(Q->Z, t0, C24, t0, t2, A24plus);
    //fp2mul_mont(C24, t0, Q->Z);   // Z2 = C24*(X1-Z1)^2
    //fp2mul_mont(A24plus, t2, t0); // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2]

    fp2add(Q->Z, t0, t0); // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2

    fp2_mul_ifma_x2(Q->X, Q->Z, t1, Q->Z, t2, t0);
    //fp2mul_mont(t1, Q->Z, Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2
    //fp2mul_mont(t0, t2, Q->Z);   // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2]
}

static void xDBLe_ifma(const point_proj_t P, point_proj_t Q, const f2elm_t A24plus, const f2elm_t C24, const int e)
{ // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings.
    // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C.
    // Output: projective Montgomery x-coordinates Q <- (2^e)*P.
    int i;

    memcpy(Q, P, sizeof(point_proj));

    for (i = 0; i < e; i++)
    {
        xDBL_ifma(Q, Q, A24plus, C24);
    }
}

static void xTPL_ifma(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus)
{ // Tripling of a Montgomery point in projective coordinates (X:Z).
    // Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
    // Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3).
    f2elm_t t0, t1, t2, t3, t4, t5, t6, t7, t8;

    fp2sub(P->X, P->Z, t0); // t0 = X-Z
    fp2add(P->X, P->Z, t1); // t1 = X+Z
    fp2_mul_ifma_x2(t2, t0, t0, t3, t1, t1);
    //fp2sqr_mont(t0, t2);           // t2 = (X-Z)^2
    //fp2sqr_mont(t1, t3);           // t3 = (X+Z)^2
    fp2_mul_ifma_x2(t5, A24plus, t3, t6, A24minus, t2);
    //fp2mul_mont(t3, A24plus, t5);  // t5 = A24plus*(X+Z)^2
    //fp2mul_mont(A24minus, t2, t6); // t6 = A24minus*(X-Z)^2
    fp2_mul_ifma_x2(t7, t3, t5, t8, t2, t6);
    //fp2mul_mont(t3, t5, t7); // t3 = A24plus*(X+Z)^3
    //fp2mul_mont(t2, t6, t8); // t2 = A24minus*(X-Z)^3
    fp2add(t0, t1, t4);      // t4 = 2*X
    fp2sub(t1, t0, t0);      // t0 = 2*Z
    fp2sqr_mont(t4, t1);     // t1 = 4*X^2
    fp2sub(t1, t3, t1);      // t1 = 4*X^2 - (X+Z)^2
    fp2sub(t1, t2, t1);      // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2
    fp2sub(t8, t7, t7);      // t3 = A24minus*(X-Z)^3 - coeff*(X+Z)^3
    fp2sub(t5, t6, t8);      // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2
    fp2mul_mont(t1, t8, t1); // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
    fp2add(t7, t1, t8);      // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^3 - coeff*(X+Z)^3
    fp2sub(t7, t1, t1);      // t1 = A24minus*(X-Z)^3 - A24plus*(X+Z)^3 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2]
    fp2_mul_ifma_x2(t8, t8, t8, t1, t1, t1);
    //fp2sqr_mont(t8, t8); // t2 = t2^2
    //fp2sqr_mont(t1, t1); // t1 = t1^2
    fp2_mul_ifma_x2(Q->X, t4, t8, Q->Z, t1, t0);
    //fp2mul_mont(t4, t8, Q->X); // X3 = 2*X*t2
    //fp2mul_mont(t0, t1, Q->Z); // Z3 = 2*Z*t1
}

void xTPLe_ifma(const point_proj_t P, point_proj_t Q, const f2elm_t A24minus, const f2elm_t A24plus, const int e)
{ // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings.
    // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C.
    // Output: projective Montgomery x-coordinates Q <- (3^e)*P.
    int i;

    memcpy(Q, P, sizeof(point_proj));

    for (i = 0; i < e; i++)
    {
        xTPL_ifma(Q, Q, A24minus, A24plus);
    }
}

static void get_4_isog_ifma(const point_proj_t P, f2elm_t A24plus, f2elm_t C24, f2elm_t *coeff)
{ // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4.
    // Input:  projective point of order four P = (X4:Z4).
    // Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients
    //         that are used to evaluate the isogeny at a point in eval_4_isog().

    fp2sub(P->X, P->Z, coeff[1]); // coeff[1] = X4-Z4
    fp2add(P->X, P->Z, coeff[2]); // coeff[2] = X4+Z4

    fp2_mul_ifma_x2(coeff[0], P->Z, P->Z, A24plus, P->X, P->X);
    //fp2sqr_mont(P->Z, coeff[0]); // coeff[0] = Z4^2
    //fp2sqr_mont(P->X, A24plus);  // A24plus = X4^2

    fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 2*Z4^2
    fp2add(A24plus, A24plus, A24plus);    // A24plus = 2*X4^2

    fp2_mul_ifma_x2(C24, coeff[0], coeff[0], A24plus, A24plus, A24plus);
    //fp2sqr_mont(coeff[0], C24);    // C24 = 4*Z4^4
    //fp2sqr_mont(A24plus, A24plus); // A24plus = 4*X4^4

    fp2add(coeff[0], coeff[0], coeff[0]); // coeff[0] = 4*Z4^2
}

static void eval_4_isog_ifma(point_proj_t P, f2elm_t *coeff)
{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined
    // by the 3 coefficients in coeff (computed in the function get_4_isog()).
    // Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z).
    // Output: the projective point P = phi(P) = (X:Z) in the codomain.
    f2elm_t t0, t1, t2;

    fp2add(P->X, P->Z, t0); // t0 = X+Z
    fp2sub(P->X, P->Z, t1); // t1 = X-Z

    fp2_mul_ifma_x2(P->X, t0, coeff[1], t0, t0, t1);
    //fp2mul_mont(t0, coeff[1], P->X); // X = (X+Z)*coeff[1]
    //fp2mul_mont(t0, t1, t0);             // t0 = (X+Z)*(X-Z)

    fp2_mul_ifma_x2(P->Z, coeff[2], t1, t0, coeff[0], t0);
    //fp2mul_mont(t1, coeff[2], P->Z); // Z = (X-Z)*coeff[2]
    //fp2mul_mont(t0, coeff[0], t0);   // t0 = coeff[0]*(X+Z)*(X-Z)

    fp2add(P->X, P->Z, t1);   // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1]
    fp2sub(P->X, P->Z, P->Z); // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1]

    fp2_mul_ifma_x2(t1, t1, t1, P->Z, P->Z, P->Z);
    //fp2sqr_mont(t1, t1);     // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
    //fp2sqr_mont(P->Z, P->Z); // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2

    fp2add(t1, t0, P->X); // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2
    fp2sub(P->Z, t0, t0); // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z)

    fp2_mul_ifma_x2(P->X, P->X, t1, P->Z, P->Z, t0);
    //fp2mul_mont(P->X, t1, P->X); // Xfinal
    //fp2mul_mont(P->Z, t0, P->Z); // Zfinal
}

static void get_3_isog_ifma(const point_proj_t P, f2elm_t A24minus, f2elm_t A24plus, f2elm_t *coeff)
{ // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3.
    // Input:  projective point of order three P = (X3:Z3).
    // Output: the 3-isogenous Montgomery curve with projective coefficient A/C.
    f2elm_t t0, t1, t2, t3, t4, t5;

    fp2sub(P->X, P->Z, coeff[0]); // coeff0 = X-Z
    fp2add(P->X, P->Z, coeff[1]); // coeff1 = X+Z
    fp2_mul_ifma_x2(t0, coeff[0], coeff[0], t1, coeff[1], coeff[1]);
    //fp2sqr_mont(coeff[0], t0);      // t0 = (X-Z)^2
    //fp2sqr_mont(coeff[1], t1);      // t1 = (X+Z)^2
    fp2add(t0, t1, t2);             // t2 = (X+Z)^2 + (X-Z)^2
    fp2add(coeff[0], coeff[1], t3); // t3 = 2*X
    fp2sqr_mont(t3, t3);            // t3 = 4*X^2
    fp2sub(t3, t2, t3);             // t3 = 4*X^2 - (X+Z)^2 - (X-Z)^2
    fp2add(t1, t3, t2);             // t2 = 4*X^2 - (X-Z)^2
    fp2add(t3, t0, t3);             // t3 = 4*X^2 - (X+Z)^2
    fp2add(t0, t3, t4);             // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2
    fp2add(t4, t4, t4);             // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2)
    fp2add(t1, t4, t4);             // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2
    fp2add(t1, t2, t5);             // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2
    fp2add(t5, t5, t5);             // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2)
    fp2add(t0, t5, t5);             // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2
    fp2_mul_ifma_x2(A24minus, t2, t4, t5, t5, t3);
    // fp2mul_mont(t2, t4, A24minus);  // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2]
    // fp2mul_mont(t3, t5, t5);        // t4 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2]
    fp2sub(t5, A24minus, t0);      // t0 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] - [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2]
    fp2add(A24minus, t0, A24plus); // A24plus = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2
}

static void eval_3_isog_ifma(point_proj_t Q, const f2elm_t *coeff)
{ // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and
    // a point P with 2 coefficients in coeff (computed in the function get_3_isog()).
    // Inputs: projective points P = (X3:Z3) and Q = (X:Z).
    // Output: the projective point Q <- phi(Q) = (X3:Z3).
    f2elm_t t0, t1, t2;

    fp2add(Q->X, Q->Z, t0); // t0 = X+Z
    fp2sub(Q->X, Q->Z, t1); // t1 = X-Z
    fp2_mul_ifma_x2(t0, t0, coeff[0], t1, t1, coeff[1]);
    //fp2mul_mont(t0, coeff[0], t0); // t0 = coeff0*(X+Z)
    //fp2mul_mont(t1, coeff[1], t1); // t1 = coeff1*(X-Z)
    fp2add(t0, t1, t2); // t2 = coeff0*(X-Z) + coeff1*(X+Z)
    fp2sub(t1, t0, t0); // t0 = coeff0*(X-Z) - coeff1*(X+Z)
    fp2_mul_ifma_x2(t2, t2, t2, t0, t0, t0);
    //fp2sqr_mont(t2, t2);         // t2 = [coeff0*(X-Z) + coeff1*(X+Z)]^2
    //fp2sqr_mont(t0, t0);         // t1 = [coeff0*(X-Z) - coeff1*(X+Z)]^2
    fp2_mul_ifma_x2(Q->X, Q->X, t2, Q->Z, Q->Z, t0);
    //fp2mul_mont(Q->X, t2, Q->X); // X3final = X*[coeff0*(X-Z) + coeff1*(X+Z)]^2
    //fp2mul_mont(Q->Z, t0, Q->Z); // Z3final = Z*[coeff0*(X-Z) - coeff1*(X+Z)]^2
}

static void fp2_encode(const f2elm_t x, unsigned char *enc)
{ // Conversion of GF(p^2) element from Montgomery to standard representation, and encoding by removing leading 0 bytes
    unsigned int i;
    f2elm_t tt;
    uint64_t t[12 * 2];

    from_mont_ifma(tt[0], x[0]);
    from_mont_ifma(tt[1], x[1]);

    red2norm(t, tt[0]);
    red2norm(&t[12], tt[1]);

    for (i = 0; i < FP2_ENCODED_BYTES / 2; i++)
    {
        enc[i] = ((unsigned char *)t)[i];
        enc[i + FP2_ENCODED_BYTES / 2] = ((unsigned char *)t)[i + MAXBITS_FIELD / 8];
    }
}

static void fp2_decode(const unsigned char *enc, f2elm_t x)
{
    unsigned int i;
    uint64_t t[12 * 2];

    memset(x, 0, sizeof(f2elm_t));
    for (i = 0; i < FP2_ENCODED_BYTES / 2; i++)
    {
        ((unsigned char *)t)[i] = enc[i];
        ((unsigned char *)t)[i + MAXBITS_FIELD / 8] = enc[i + FP2_ENCODED_BYTES / 2];
    }

    norm2red(x[0], t);
    norm2red(x[1], &t[12]);
    to_mont_ifma(x[0], x[0]);
    to_mont_ifma(x[1], x[1]);
}

int EphemeralKeyGeneration_A_ifma(const unsigned char *PrivateKeyA, unsigned char *PublicKeyA)
{ // Alice's ephemeral public key generation
    // Input:  a private key PrivateKeyA in the range [0, 2^eA - 1].
    // Output: the public key PublicKeyA consisting of 3 elements in GF(p^2) which are encoded by removing leading 0 bytes.
    point_proj_t R, phiP = {0}, phiQ = {0}, phiR = {0}, pts[MAX_INT_POINTS_ALICE];
    f2elm_t XPA, XQA, XRA, coeff[3];
    unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0;

    f2elm_t C24 = {
        {0x000000004935acf8, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0003f30018a85800, 0x000664c911fc7654, 0x000cc2ec46db6eef, 0x000badd2e0465707, 0x000a9aec44eeae7f, 0x000a99a2d802be6b, 0x0003f8e487189f8e, 0x000000000037f1ed},
        {0}};

    f2elm_t A24plus = {
        {0x00000000249ad67c, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0001f9800c542c00, 0x000b326488fe3b2a, 0x000e6176236db777, 0x000dd6e970232b83, 0x000d4d762277573f, 0x00054cd16c015f35, 0x0009fc72438c4fc7, 0x00000000001bf8f6},
        {0}};

    // Initialize basis points
    init_basis(A_gen_ifma, XPA, XQA, XRA);
    init_basis(B_gen_ifma, phiP->X, phiQ->X, phiR->X);
    memcpy(phiP->Z, One, sizeof(felm_t));
    memcpy(phiQ->Z, One, sizeof(felm_t));
    memcpy(phiR->Z, One, sizeof(felm_t));

    // Retrieve kernel point
    LADDER3PT_ifma(XPA, XQA, XRA, (uint64_t *)PrivateKeyA, ALICE, R);

    // Traverse tree
    index = 0;
    for (row = 1; row < MAX_Alice; row++)
    {
        while (index < MAX_Alice - row)
        {
            memcpy(pts[npts]->X, R->X, sizeof(f2elm_t));
            memcpy(pts[npts]->Z, R->Z, sizeof(f2elm_t));
            pts_index[npts++] = index;
            m = strat_Alice[ii++];
            xDBLe_ifma(R, R, A24plus, C24, (int)(2 * m));
            index += m;
        }
        get_4_isog_ifma(R, A24plus, C24, coeff);

        for (i = 0; i < npts; i++)
        {
            eval_4_isog_ifma(pts[i], coeff);
        }
        eval_4_isog_ifma(phiP, coeff);
        eval_4_isog_ifma(phiQ, coeff);
        eval_4_isog_ifma(phiR, coeff);

        memcpy(R->X, pts[npts - 1]->X, sizeof(f2elm_t));
        memcpy(R->Z, pts[npts - 1]->Z, sizeof(f2elm_t));
        index = pts_index[npts - 1];
        npts -= 1;
    }

    get_4_isog_ifma(R, A24plus, C24, coeff);
    eval_4_isog_ifma(phiP, coeff);
    eval_4_isog_ifma(phiQ, coeff);
    eval_4_isog_ifma(phiR, coeff);

    inv_3_way_ifma(phiP->Z, phiQ->Z, phiR->Z);
    fp2_mul_ifma_x2(phiP->X, phiP->X, phiP->Z, phiQ->X, phiQ->X, phiQ->Z);
    //fp2mul_mont(phiP->X, phiP->Z, phiP->X);
    //fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
    fp2mul_mont(phiR->X, phiR->Z, phiR->X);

    // Format public key
    fp2_encode(phiP->X, PublicKeyA);
    fp2_encode(phiQ->X, PublicKeyA + FP2_ENCODED_BYTES);
    fp2_encode(phiR->X, PublicKeyA + 2 * FP2_ENCODED_BYTES);

    return 0;
}

int EphemeralKeyGeneration_B_ifma(const unsigned char *PrivateKeyB, unsigned char *PublicKeyB)
{ // Bob's ephemeral public key generation
    // Input:  a private key PrivateKeyB in the range [0, 2^Floor(Log(2,oB)) - 1].
    // Output: the public key PublicKeyB consisting of 3 elements in GF(p^2) which are encoded by removing leading 0 bytes.
    point_proj_t R, phiP = {0}, phiQ = {0}, phiR = {0}, pts[MAX_INT_POINTS_BOB];
    f2elm_t XPB, XQB, XRB, coeff[3], A = {0};
    unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0;

    f2elm_t A24plus = {{0x000000004935acf8, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0003f30018a85800, 0x000664c911fc7654, 0x000cc2ec46db6eef, 0x000badd2e0465707, 0x000a9aec44eeae7f, 0x000a99a2d802be6b, 0x0003f8e487189f8e, 0x000000000037f1ed},
                       {0}};

    f2elm_t A24minus = {{0x000fffffb6ca5307, 0x000fffffffffffff, 0x000fffffffffffff, 0x000fffffffffffff, 0x000fffffffffffff, 0x000fffffffffffff, 0x000fffffffffffff, 0x0000ac8771e692ff, 0x000167add1f02031, 0x000aaabd12d63250, 0x000ca0c5879094e0, 0x0000b5598636c600, 0x0004fe180463c6f7, 0x0000268d39c8897b, 0x000000000037f3e8},
                        {0}};

    uint64_t temp[12];
    uint64_t ifma_temp[15];
    // Initialize basis points
    init_basis(B_gen_ifma, XPB, XQB, XRB);
    init_basis(A_gen_ifma, phiP->X, phiQ->X, phiR->X);
    memcpy(phiP->Z, One, sizeof(felm_t));
    memcpy(phiQ->Z, One, sizeof(felm_t));
    memcpy(phiR->Z, One, sizeof(felm_t));

    // Retrieve kernel point
    LADDER3PT_ifma(XPB, XQB, XRB, (uint64_t *)PrivateKeyB, BOB, R);

    // Traverse tree
    index = 0;
    for (row = 1; row < MAX_Bob; row++)
    {
        while (index < MAX_Bob - row)
        {
            memcpy(pts[npts]->X, R->X, sizeof(f2elm_t));
            memcpy(pts[npts]->Z, R->Z, sizeof(f2elm_t));
            pts_index[npts++] = index;
            m = strat_Bob[ii++];
            xTPLe_ifma(R, R, A24minus, A24plus, (int)m);
            index += m;
        }
        get_3_isog_ifma(R, A24minus, A24plus, coeff);

        for (i = 0; i < npts; i++)
        {
            eval_3_isog_ifma(pts[i], coeff);
        }
        eval_3_isog_ifma(phiP, coeff);
        eval_3_isog_ifma(phiQ, coeff);
        eval_3_isog_ifma(phiR, coeff);

        memcpy(R->X, pts[npts - 1]->X, sizeof(f2elm_t));
        memcpy(R->Z, pts[npts - 1]->Z, sizeof(f2elm_t));

        index = pts_index[npts - 1];
        npts -= 1;
    }

    get_3_isog_ifma(R, A24minus, A24plus, coeff);
    eval_3_isog_ifma(phiP, coeff);
    eval_3_isog_ifma(phiQ, coeff);
    eval_3_isog_ifma(phiR, coeff);

    inv_3_way_ifma(phiP->Z, phiQ->Z, phiR->Z);
    fp2mul_mont(phiP->X, phiP->Z, phiP->X);
    fp2mul_mont(phiQ->X, phiQ->Z, phiQ->X);
    fp2mul_mont(phiR->X, phiR->Z, phiR->X);

    // Format public key
    fp2_encode(phiP->X, PublicKeyB);
    fp2_encode(phiQ->X, PublicKeyB + FP2_ENCODED_BYTES);
    fp2_encode(phiR->X, PublicKeyB + 2 * FP2_ENCODED_BYTES);

    return 0;
}