From 9c85e12796ba3df388696d9b35af7f6927e1cfe9 Mon Sep 17 00:00:00 2001 From: Kris Kwiatkowski Date: Sun, 2 Sep 2018 20:36:11 +0100 Subject: [PATCH] WIP --- p503toolbox/consts.go | 173 +++ p503toolbox/curve.go | 293 +++++ p503toolbox/curve_test.go | 372 ++++++ p503toolbox/field.go | 405 ++++++ p503toolbox/field.o | Bin 0 -> 39652 bytes p503toolbox/field_amd64.s | 2284 ++++++++++++++++++++++++++++++++++ p503toolbox/field_decl.go | 42 + p503toolbox/field_generic.go | 254 ++++ p503toolbox/field_test.go | 419 +++++++ p503toolbox/isogeny.go | 144 +++ p503toolbox/isogeny_test.go | 114 ++ p503toolbox/print_test.go | 19 + 12 files changed, 4519 insertions(+) create mode 100644 p503toolbox/consts.go create mode 100644 p503toolbox/curve.go create mode 100644 p503toolbox/curve_test.go create mode 100644 p503toolbox/field.go create mode 100644 p503toolbox/field.o create mode 100644 p503toolbox/field_amd64.s create mode 100644 p503toolbox/field_decl.go create mode 100644 p503toolbox/field_generic.go create mode 100644 p503toolbox/field_test.go create mode 100644 p503toolbox/isogeny.go create mode 100644 p503toolbox/isogeny_test.go create mode 100644 p503toolbox/print_test.go diff --git a/p503toolbox/consts.go b/p503toolbox/consts.go new file mode 100644 index 0000000..8359556 --- /dev/null +++ b/p503toolbox/consts.go @@ -0,0 +1,173 @@ +package p503toolbox + +const ( + // The secret key size, in bytes. Secret key is actually different for + // torsion group 2 and 3. For 2-torsion group, last byte of secret key + // is always set to 0. + P503_SecretKeySize = 48 + // SIDH public key byte size + P503_PublicKeySize = 564 + // SIDH shared secret byte size. + P503_SharedSecretSize = 188 + // Max size of secret key for 2-torsion group, corresponds to 2^e2 + P503_SecretBitLenA = 372 + // Size of secret key for 3-torsion group, corresponds to log_2(3^e3) + P503_SecretBitLenB = 379 + // Corresponds to (8 - e2 % 8). Used for ensuring bitlength equal to e2 + P503_MaskAliceByte1 = 0x00 + P503_MaskAliceByte2 = 0x0f + P503_MaskAliceByte3 = 0xfe + // Corresponds to (8 - e3 % 8). Used for ensuring bitlength equal to e3 + P503_MaskBobByte = 0x03 + // Sample rate to obtain a value in [0,3^238] + P503_SampleRate = 102 + // Size of a compuatation strategy for 2-torsion group + strategySizeA = 185 + // Size of a compuatation strategy for 3-torsion group + strategySizeB = 238 +) + +// The x-coordinate of PA +var P503_affine_PA = ExtensionFieldElement{ + A: Fp751Element{ + 0xC2FC08CEAB50AD8B, 0x1D7D710F55E457B1, 0xE8738D92953DCD6E, + 0xBAA7EBEE8A3418AA, 0xC9A288345F03F46F, 0xC8D18D167CFE2616, + 0x02043761F6B1C045, 0xAA1975E13180E7E9, 0x9E13D3FDC6690DE6, + 0x3A024640A3A3BB4F, 0x4E5AD44E6ACBBDAE, 0x0000544BEB561DAD, + }, + B: Fp751Element{ + 0xE6CC41D21582E411, 0x07C2ECB7C5DF400A, 0xE8E34B521432AEC4, + 0x50761E2AB085167D, 0x032CFBCAA6094B3C, 0x6C522F5FDF9DDD71, + 0x1319217DC3A1887D, 0xDC4FB25803353A86, 0x362C8D7B63A6AB09, + 0x39DCDFBCE47EA488, 0x4C27C99A2C28D409, 0x00003CB0075527C4, + }, +} + +// The x-coordinate of QA +var P503_affine_QA = ExtensionFieldElement{ + A: Fp751Element{ + 0xD56FE52627914862, 0x1FAD60DC96B5BAEA, 0x01E137D0BF07AB91, + 0x404D3E9252161964, 0x3C5385E4CD09A337, 0x4476426769E4AF73, + 0x9790C6DB989DFE33, 0xE06E1C04D2AA8B5E, 0x38C08185EDEA73B9, + 0xAA41F678A4396CA6, 0x92B9259B2229E9A0, 0x00002F9326818BE0, + }, + B: Fp751Element{ + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + }, +} + +// The x-coordinate of RA = PA-QA +var P503_affine_RA = ExtensionFieldElement{ + A: Fp751Element{ + 0x0BB84441DFFD19B3, 0x84B4DEA99B48C18E, 0x692DE648AD313805, + 0xE6D72761B6DFAEE0, 0x223975C672C3058D, 0xA0FDE0C3CBA26FDC, + 0xA5326132A922A3CA, 0xCA5E7F5D5EA96FA4, 0x127C7EFE33FFA8C6, + 0x4749B1567E2A23C4, 0x2B7DF5B4AF413BFA, 0x0000656595B9623C, + }, + B: Fp751Element{ + 0xED78C17F1EC71BE8, 0xF824D6DF753859B1, 0x33A10839B2A8529F, + 0xFC03E9E25FDEA796, 0xC4708A8054DF1762, 0x4034F2EC034C6467, + 0xABFB70FBF06ECC79, 0xDABE96636EC108B7, 0x49CBCFB090605FD3, + 0x20B89711819A45A7, 0xFB8E1590B2B0F63E, 0x0000556A5F964AB2, + }, +} + +// The x-coordinate of PB +var P503_affine_PB = ExtensionFieldElement{ + A: Fp751Element{ + 0xCFB6D71EF867AB0B, 0x4A5FDD76E9A45C76, 0x38B1EE69194B1F03, + 0xF6E7B18A7761F3F0, 0xFCF01A486A52C84C, 0xCBE2F63F5AA75466, + 0x6487BCE837B5E4D6, 0x7747F5A8C622E9B8, 0x4CBFE1E4EE6AEBBA, + 0x8A8616A13FA91512, 0x53DB980E1579E0A5, 0x000058FEBFF3BE69, + }, + B: Fp751Element{ + 0xA492034E7C075CC3, 0x677BAF00B04AA430, 0x3AAE0C9A755C94C8, + 0x1DC4B064E9EBB08B, 0x3684EDD04E826C66, 0x9BAA6CB661F01B22, + 0x20285A00AD2EFE35, 0xDCE95ABD0497065F, 0x16C7FBB3778E3794, + 0x26B3AC29CEF25AAF, 0xFB3C28A31A30AC1D, 0x000046ED190624EE, + }, +} + +// The x-coordinate of QB +var P503_affine_QB = ExtensionFieldElement{ + A: Fp751Element{ + 0xF1A8C9ED7B96C4AB, 0x299429DA5178486E, 0xEF4926F20CD5C2F4, + 0x683B2E2858B4716A, 0xDDA2FBCC3CAC3EEB, 0xEC055F9F3A600460, + 0xD5A5A17A58C3848B, 0x4652D836F42EAED5, 0x2F2E71ED78B3A3B3, + 0xA771C057180ADD1D, 0xC780A5D2D835F512, 0x0000114EA3B55AC1, + }, + B: Fp751Element{ + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + }, +} + +// The x-coordinate of RB = PB - QB +var P503_affine_RB = ExtensionFieldElement{ + A: Fp751Element{ + 0x1C0D6733769D0F31, 0xF084C3086E2659D1, 0xE23D5DA27BCBD133, + 0xF38EC9A8D5864025, 0x6426DC781B3B645B, 0x4B24E8E3C9FB03EE, + 0x6432792F9D2CEA30, 0x7CC8E8B1AE76E857, 0x7F32BFB626BB8963, + 0xB9F05995B48D7B74, 0x4D71200A7D67E042, 0x0000228457AF0637, + }, + B: Fp751Element{ + 0x4AE37E7D8F72BD95, 0xDD2D504B3E993488, 0x5D14E7FA1ECB3C3E, + 0x127610CEB75D6350, 0x255B4B4CAC446B11, 0x9EA12336C1F70CAF, + 0x79FA68A2147BC2F8, 0x11E895CFDADBBC49, 0xE4B9D3C4D6356C18, + 0x44B25856A67F951C, 0x5851541F61308D0B, 0x00002FFD994F7E4C, + }, +} + +// 2-torsion group computation strategy +var P503_AliceIsogenyStrategy = [strategySizeA]uint32{ + 0x50, 0x30, 0x1B, 0x0F, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, + 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x07, + 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x03, 0x02, 0x01, + 0x01, 0x01, 0x01, 0x0C, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02, + 0x01, 0x01, 0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x03, + 0x02, 0x01, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x15, + 0x0C, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x03, + 0x02, 0x01, 0x01, 0x01, 0x01, 0x05, 0x03, 0x02, 0x01, 0x01, + 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x09, 0x05, 0x03, 0x02, + 0x01, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x04, 0x02, + 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x21, 0x14, 0x0C, 0x07, + 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x03, 0x02, 0x01, + 0x01, 0x01, 0x01, 0x05, 0x03, 0x02, 0x01, 0x01, 0x01, 0x01, + 0x02, 0x01, 0x01, 0x01, 0x08, 0x05, 0x03, 0x02, 0x01, 0x01, + 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, + 0x02, 0x01, 0x01, 0x10, 0x08, 0x04, 0x02, 0x01, 0x01, 0x01, + 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, + 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, + 0x01, 0x01, 0x02, 0x01, 0x01} + +// 3-torsion group computation strategy +var P503_BobIsogenyStrategy = [strategySizeB]uint32{ + 0x70, 0x3F, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, + 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, + 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, + 0x01, 0x02, 0x01, 0x01, 0x10, 0x08, 0x04, 0x02, 0x01, 0x01, + 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, + 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, + 0x01, 0x01, 0x02, 0x01, 0x01, 0x1F, 0x10, 0x08, 0x04, 0x02, + 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, + 0x01, 0x01, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, + 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x0F, 0x08, 0x04, + 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, + 0x02, 0x01, 0x01, 0x07, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, + 0x01, 0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x31, 0x1F, 0x10, + 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, 0x02, + 0x01, 0x01, 0x02, 0x01, 0x01, 0x08, 0x04, 0x02, 0x01, 0x01, + 0x02, 0x01, 0x01, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, + 0x0F, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x04, + 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x07, 0x04, 0x02, 0x01, + 0x01, 0x02, 0x01, 0x01, 0x03, 0x02, 0x01, 0x01, 0x01, 0x01, + 0x15, 0x0C, 0x08, 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, + 0x04, 0x02, 0x01, 0x01, 0x02, 0x01, 0x01, 0x05, 0x03, 0x02, + 0x01, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, 0x09, 0x05, + 0x03, 0x02, 0x01, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01, 0x01, + 0x04, 0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01} diff --git a/p503toolbox/curve.go b/p503toolbox/curve.go new file mode 100644 index 0000000..7b65620 --- /dev/null +++ b/p503toolbox/curve.go @@ -0,0 +1,293 @@ +package p503toolbox + +// A point on the projective line P^1(F_{p^2}). +// +// This is used to work projectively with the curve coefficients. +type ProjectiveCurveParameters struct { + A ExtensionFieldElement + C ExtensionFieldElement +} + +// Stores curve projective parameters equivalent to A/C. Meaning of the +// values depends on the context. When working with isogenies over +// subgroup that are powers of: +// * three then (A:C) ~ (A+2C:A-2C) +// * four then (A:C) ~ (A+2C: 4C) +// See Appendix A of SIKE for more details +type CurveCoefficientsEquiv struct { + A ExtensionFieldElement + C ExtensionFieldElement +} + +// A point on the projective line P^1(F_{p^2}). +// +// This represents a point on the Kummer line of a Montgomery curve. The +// curve is specified by a ProjectiveCurveParameters struct. +type ProjectivePoint struct { + X ExtensionFieldElement + Z ExtensionFieldElement +} + +func (params *ProjectiveCurveParameters) FromAffine(a *ExtensionFieldElement) { + params.A = *a + params.C.One() +} + +// Computes j-invariant for a curve y2=x3+A/Cx+x with A,C in F_(p^2). Result +// is returned in jBytes buffer, encoded in little-endian format. Caller +// provided jBytes buffer has to be big enough to j-invariant value. In case +// of SIDH, buffer size must be at least size of shared secret. +// Implementation corresponds to Algorithm 9 from SIKE. +func (cparams *ProjectiveCurveParameters) Jinvariant(jBytes []byte) { + var j, t0, t1 ExtensionFieldElement + + j.Square(&cparams.A) // j = A^2 + t1.Square(&cparams.C) // t1 = C^2 + t0.Add(&t1, &t1) // t0 = t1 + t1 + t0.Sub(&j, &t0) // t0 = j - t0 + t0.Sub(&t0, &t1) // t0 = t0 - t1 + j.Sub(&t0, &t1) // t0 = t0 - t1 + t1.Square(&t1) // t1 = t1^2 + j.Mul(&j, &t1) // t0 = t0 * t1 + t0.Add(&t0, &t0) // t0 = t0 + t0 + t0.Add(&t0, &t0) // t0 = t0 + t0 + t1.Square(&t0) // t1 = t0^2 + t0.Mul(&t0, &t1) // t0 = t0 * t1 + t0.Add(&t0, &t0) // t0 = t0 + t0 + t0.Add(&t0, &t0) // t0 = t0 + t0 + j.Inv(&j) // j = 1/j + j.Mul(&t0, &j) // j = t0 * j + + j.ToBytes(jBytes) +} + +// Given affine points x(P), x(Q) and x(Q-P) in a extension field F_{p^2}, function +// recorvers projective coordinate A of a curve. This is Algorithm 10 from SIKE. +func (curve *ProjectiveCurveParameters) RecoverCoordinateA(xp, xq, xr *ExtensionFieldElement) { + var t0, t1 ExtensionFieldElement + + t1.Add(xp, xq) // t1 = Xp + Xq + t0.Mul(xp, xq) // t0 = Xp * Xq + curve.A.Mul(xr, &t1) // A = X(q-p) * t1 + curve.A.Add(&curve.A, &t0) // A = A + t0 + t0.Mul(&t0, xr) // t0 = t0 * X(q-p) + curve.A.Sub(&curve.A, &oneExtensionField) // A = A - 1 + t0.Add(&t0, &t0) // t0 = t0 + t0 + t1.Add(&t1, xr) // t1 = t1 + X(q-p) + t0.Add(&t0, &t0) // t0 = t0 + t0 + curve.A.Square(&curve.A) // A = A^2 + t0.Inv(&t0) // t0 = 1/t0 + curve.A.Mul(&curve.A, &t0) // A = A * t0 + curve.A.Sub(&curve.A, &t1) // A = A - t1 +} + +// Computes equivalence (A:C) ~ (A+2C : A-2C) +func (curve *ProjectiveCurveParameters) CalcCurveParamsEquiv3() CurveCoefficientsEquiv { + var coef CurveCoefficientsEquiv + var c2 ExtensionFieldElement + + c2.Add(&curve.C, &curve.C) + // A24p = A+2*C + coef.A.Add(&curve.A, &c2) + // A24m = A-2*C + coef.C.Sub(&curve.A, &c2) + return coef +} + +// Computes equivalence (A:C) ~ (A+2C : 4C) +func (cparams *ProjectiveCurveParameters) CalcCurveParamsEquiv4() CurveCoefficientsEquiv { + var coefEq CurveCoefficientsEquiv + + coefEq.C.Add(&cparams.C, &cparams.C) + // A24p = A+2C + coefEq.A.Add(&cparams.A, &coefEq.C) + // C24 = 4*C + coefEq.C.Add(&coefEq.C, &coefEq.C) + return coefEq +} + +// Helper function for RightToLeftLadder(). Returns A+2C / 4. +func (cparams *ProjectiveCurveParameters) calcAplus2Over4() (ret ExtensionFieldElement) { + var tmp ExtensionFieldElement + // 2C + tmp.Add(&cparams.C, &cparams.C) + // A+2C + ret.Add(&cparams.A, &tmp) + // 1/4C + tmp.Add(&tmp, &tmp).Inv(&tmp) + // A+2C/4C + ret.Mul(&ret, &tmp) + return +} + +// Recovers (A:C) curve parameters from projectively equivalent (A+2C:A-2C). +func (cparams *ProjectiveCurveParameters) RecoverCurveCoefficients3(coefEq *CurveCoefficientsEquiv) { + cparams.A.Add(&coefEq.A, &coefEq.C) + // cparams.A = 2*(A+2C+A-2C) = 4A + cparams.A.Add(&cparams.A, &cparams.A) + // cparams.C = (A+2C-A+2C) = 4C + cparams.C.Sub(&coefEq.A, &coefEq.C) + return +} + +// Recovers (A:C) curve parameters from projectively equivalent (A+2C:4C). +func (cparams *ProjectiveCurveParameters) RecoverCurveCoefficients4(coefEq *CurveCoefficientsEquiv) { + var half = ExtensionFieldElement{ + A: Fp751Element{ + 0x00000000000124D6, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0xB8E0000000000000, + 0x9C8A2434C0AA7287, 0xA206996CA9A378A3, 0x6876280D41A41B52, + 0xE903B49F175CE04F, 0x0F8511860666D227, 0x00004EA07CFF6E7F}, + B: Fp751Element{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + } + // cparams.C = (4C)*1/2=2C + cparams.C.Mul(&coefEq.C, &half) + // cparams.A = A+2C - 2C = A + cparams.A.Sub(&coefEq.A, &cparams.C) + // cparams.C = 2C * 1/2 = C + cparams.C.Mul(&cparams.C, &half) + return +} + +func (point *ProjectivePoint) FromAffine(x *ExtensionFieldElement) { + point.X = *x + point.Z = oneExtensionField +} + +func (point *ProjectivePoint) ToAffine() *ExtensionFieldElement { + affine_x := new(ExtensionFieldElement) + affine_x.Inv(&point.Z).Mul(affine_x, &point.X) + return affine_x +} + +func (lhs *ProjectivePoint) VartimeEq(rhs *ProjectivePoint) bool { + var t0, t1 ExtensionFieldElement + t0.Mul(&lhs.X, &rhs.Z) + t1.Mul(&lhs.Z, &rhs.X) + return t0.VartimeEq(&t1) +} + +func ProjectivePointConditionalSwap(xP, xQ *ProjectivePoint, choice uint8) { + ExtensionFieldConditionalSwap(&xP.X, &xQ.X, choice) + ExtensionFieldConditionalSwap(&xP.Z, &xQ.Z, choice) +} + +// Combined coordinate doubling and differential addition. Takes projective points +// P,Q,Q-P and (A+2C)/4C curve E coefficient. Returns 2*P and P+Q calculated on E. +// Function is used only by RightToLeftLadder. Corresponds to Algorithm 5 of SIKE +func xDblAdd(P, Q, QmP *ProjectivePoint, a24 *ExtensionFieldElement) (dblP, PaQ ProjectivePoint) { + var t0, t1, t2 ExtensionFieldElement + xQmP, zQmP := &QmP.X, &QmP.Z + xPaQ, zPaQ := &PaQ.X, &PaQ.Z + x2P, z2P := &dblP.X, &dblP.Z + xP, zP := &P.X, &P.Z + xQ, zQ := &Q.X, &Q.Z + + t0.Add(xP, zP) // t0 = Xp+Zp + t1.Sub(xP, zP) // t1 = Xp-Zp + x2P.Square(&t0) // 2P.X = t0^2 + t2.Sub(xQ, zQ) // t2 = Xq-Zq + xPaQ.Add(xQ, zQ) // Xp+q = Xq+Zq + t0.Mul(&t0, &t2) // t0 = t0 * t2 + z2P.Mul(&t1, &t1) // 2P.Z = t1 * t1 + t1.Mul(&t1, xPaQ) // t1 = t1 * Xp+q + t2.Sub(x2P, z2P) // t2 = 2P.X - 2P.Z + x2P.Mul(x2P, z2P) // 2P.X = 2P.X * 2P.Z + xPaQ.Mul(a24, &t2) // Xp+q = A24 * t2 + zPaQ.Sub(&t0, &t1) // Zp+q = t0 - t1 + z2P.Add(xPaQ, z2P) // 2P.Z = Xp+q + 2P.Z + xPaQ.Add(&t0, &t1) // Xp+q = t0 + t1 + z2P.Mul(z2P, &t2) // 2P.Z = 2P.Z * t2 + zPaQ.Square(zPaQ) // Zp+q = Zp+q ^ 2 + xPaQ.Square(xPaQ) // Xp+q = Xp+q ^ 2 + zPaQ.Mul(xQmP, zPaQ) // Zp+q = Xq-p * Zp+q + xPaQ.Mul(zQmP, xPaQ) // Xp+q = Zq-p * Xp+q + return +} + +// Given the curve parameters, xP = x(P), and k >= 0, compute x2P = x([2^k]P). +// +// Returns x2P to allow chaining. Safe to overlap xP, x2P. +func (x2P *ProjectivePoint) Pow2k(params *CurveCoefficientsEquiv, xP *ProjectivePoint, k uint32) *ProjectivePoint { + var t0, t1 ExtensionFieldElement + + *x2P = *xP + x, z := &x2P.X, &x2P.Z + + for i := uint32(0); i < k; i++ { + t0.Sub(x, z) // t0 = Xp - Zp + t1.Add(x, z) // t1 = Xp + Zp + t0.Square(&t0) // t0 = t0 ^ 2 + t1.Square(&t1) // t1 = t1 ^ 2 + z.Mul(¶ms.C, &t0) // Z2p = C24 * t0 + x.Mul(z, &t1) // X2p = Z2p * t1 + t1.Sub(&t1, &t0) // t1 = t1 - t0 + t0.Mul(¶ms.A, &t1) // t0 = A24+ * t1 + z.Add(z, &t0) // Z2p = Z2p + t0 + z.Mul(z, &t1) // Zp = Z2p * t1 + } + + return x2P +} + +// Given the curve parameters, xP = x(P), and k >= 0, compute x3P = x([3^k]P). +// +// Returns x3P to allow chaining. Safe to overlap xP, xR. +func (x3P *ProjectivePoint) Pow3k(params *CurveCoefficientsEquiv, xP *ProjectivePoint, k uint32) *ProjectivePoint { + var t0, t1, t2, t3, t4, t5, t6 ExtensionFieldElement + + *x3P = *xP + x, z := &x3P.X, &x3P.Z + + for i := uint32(0); i < k; i++ { + t0.Sub(x, z) // t0 = Xp - Zp + t2.Square(&t0) // t2 = t0^2 + t1.Add(x, z) // t1 = Xp + Zp + t3.Square(&t1) // t3 = t1^2 + t4.Add(&t1, &t0) // t4 = t1 + t0 + t0.Sub(&t1, &t0) // t0 = t1 - t0 + t1.Square(&t4) // t1 = t4^2 + t1.Sub(&t1, &t3) // t1 = t1 - t3 + t1.Sub(&t1, &t2) // t1 = t1 - t2 + t5.Mul(&t3, ¶ms.A) // t5 = t3 * A24+ + t3.Mul(&t3, &t5) // t3 = t5 * t3 + t6.Mul(&t2, ¶ms.C) // t6 = t2 * A24- + t2.Mul(&t2, &t6) // t2 = t2 * t6 + t3.Sub(&t2, &t3) // t3 = t2 - t3 + t2.Sub(&t5, &t6) // t2 = t5 - t6 + t1.Mul(&t2, &t1) // t1 = t2 * t1 + t2.Add(&t3, &t1) // t2 = t3 + t1 + t2.Square(&t2) // t2 = t2^2 + x.Mul(&t2, &t4) // X3p = t2 * t4 + t1.Sub(&t3, &t1) // t1 = t3 - t1 + t1.Square(&t1) // t1 = t1^2 + z.Mul(&t1, &t0) // Z3p = t1 * t0 + } + return x3P +} + +// RightToLeftLadder is a right-to-left point multiplication that given the +// x-coordinate of P, Q and P-Q calculates the x-coordinate of R=Q+[scalar]P. +// nbits must be smaller or equal to len(scalar). +func RightToLeftLadder(c *ProjectiveCurveParameters, P, Q, PmQ *ProjectivePoint, + nbits uint, scalar []uint8) ProjectivePoint { + var R0, R2, R1 ProjectivePoint + + aPlus2Over4 := c.calcAplus2Over4() + R1 = *P + R2 = *PmQ + R0 = *Q + + // Iterate over the bits of the scalar, bottom to top + prevBit := uint8(0) + for i := uint(0); i < nbits; i++ { + bit := (scalar[i>>3] >> (i & 7) & 1) + swap := prevBit ^ bit + prevBit = bit + ProjectivePointConditionalSwap(&R1, &R2, swap) + R0, R2 = xDblAdd(&R0, &R2, &R1, &aPlus2Over4) + } + + ProjectivePointConditionalSwap(&R1, &R2, prevBit) + return R1 +} diff --git a/p503toolbox/curve_test.go b/p503toolbox/curve_test.go new file mode 100644 index 0000000..737c002 --- /dev/null +++ b/p503toolbox/curve_test.go @@ -0,0 +1,372 @@ +package p503toolbox + +import ( + "bytes" + "math/rand" + "reflect" + "testing" + "testing/quick" +) + +// Sage script for generating test vectors: +// sage: p = 2^372 * 3^239 - 1; Fp = GF(p) +// sage: R. = Fp[] +// sage: Fp2 = Fp.extension(x^2 + 1, 'i') +// sage: i = Fp2.gen() +// sage: A = 4385300808024233870220415655826946795549183378139271271040522089756750951667981765872679172832050962894122367066234419550072004266298327417513857609747116903999863022476533671840646615759860564818837299058134292387429068536219*i + 1408083354499944307008104531475821995920666351413327060806684084512082259107262519686546161682384352696826343970108773343853651664489352092568012759783386151707999371397181344707721407830640876552312524779901115054295865393760 +// sage: C = 933177602672972392833143808100058748100491911694554386487433154761658932801917030685312352302083870852688835968069519091048283111836766101703759957146191882367397129269726925521881467635358356591977198680477382414690421049768*i + 9088894745865170214288643088620446862479558967886622582768682946704447519087179261631044546285104919696820250567182021319063155067584445633834024992188567423889559216759336548208016316396859149888322907914724065641454773776307 +// sage: E = EllipticCurve(Fp2, [0,A/C,0,1,0]) +// sage: XP, YP, ZP = (8172151271761071554796221948801462094972242987811852753144865524899433583596839357223411088919388342364651632180452081960511516040935428737829624206426287774255114241789158000915683252363913079335550843837650671094705509470594*i + 9326574858039944121604015439381720195556183422719505497448541073272720545047742235526963773359004021838961919129020087515274115525812121436661025030481584576474033630899768377131534320053412545346268645085054880212827284581557, 2381174772709336084066332457520782192315178511983342038392622832616744048226360647551642232950959910067260611740876401494529727990031260499974773548012283808741733925525689114517493995359390158666069816204787133942283380884077*i + 5378956232034228335189697969144556552783858755832284194802470922976054645696324118966333158267442767138528227968841257817537239745277092206433048875637709652271370008564179304718555812947398374153513738054572355903547642836171, 1) +// sage: XQ, YQ, ZQ = (58415083458086949460774631288254059520198050925769753726965257584725433404854146588020043440874302516770401547098747946831855304325931998303167820332782040123488940125615738529117852871565495005635582483848203360379709783913*i + 5253691516070829381103946367549411712646323371150903710970418364086793242952116417060222617507143384179331507713214357463046698181663711723374230655213762988214127964685312538735038552802853885461137159568726585741937776693865 : 5158855522131677856751020091923296498106687563952623634063620100943451774747711264382913073742659403103540173656392945587315798626256921403472228775673617431559609635074280718249654789362069112718012186738875122436282913402060*i + 6207982879267706771450280080677554401823496760317291100742934673980817977221212113336809046153999230028113719924529054308271353271269929392876016936836430864080079684360816968551808465413552611322477628209203066093559492022329 : 1) +// sage: P = E((XP,YP,ZP)) +// sage: X2, Y2, Z2 = 2*P +// sage: X3, Y3, Z3 = 3*P +// sage: m = 96550223052359874398280314003345143371473380422728857598463622014420884224892 + +// A = 4385300808024233870220415655826946795549183378139271271040522089756750951667981765872679172832050962894122367066234419550072004266298327417513857609747116903999863022476533671840646615759860564818837299058134292387429068536219*i + 1408083354499944307008104531475821995920666351413327060806684084512082259107262519686546161682384352696826343970108773343853651664489352092568012759783386151707999371397181344707721407830640876552312524779901115054295865393760 +var curve_A = ExtensionFieldElement{ + A: Fp751Element{0x8319eb18ca2c435e, 0x3a93beae72cd0267, 0x5e465e1f72fd5a84, 0x8617fa4150aa7272, 0x887da24799d62a13, 0xb079b31b3c7667fe, 0xc4661b150fa14f2e, 0xd4d2b2967bc6efd6, 0x854215a8b7239003, 0x61c5302ccba656c2, 0xf93194a27d6f97a2, 0x1ed9532bca75}, + B: Fp751Element{0xb6f541040e8c7db6, 0x99403e7365342e15, 0x457e9cee7c29cced, 0x8ece72dc073b1d67, 0x6e73cef17ad28d28, 0x7aed836ca317472, 0x89e1de9454263b54, 0x745329277aa0071b, 0xf623dfc73bc86b9b, 0xb8e3c1d8a9245882, 0x6ad0b3d317770bec, 0x5b406e8d502b}} + +// C = 933177602672972392833143808100058748100491911694554386487433154761658932801917030685312352302083870852688835968069519091048283111836766101703759957146191882367397129269726925521881467635358356591977198680477382414690421049768*i + 9088894745865170214288643088620446862479558967886622582768682946704447519087179261631044546285104919696820250567182021319063155067584445633834024992188567423889559216759336548208016316396859149888322907914724065641454773776307 +var curve_C = ExtensionFieldElement{ + A: Fp751Element{0x4fb2358bbf723107, 0x3a791521ac79e240, 0x283e24ef7c4c922f, 0xc89baa1205e33cc, 0x3031be81cff6fee1, 0xaf7a494a2f6a95c4, 0x248d251eaac83a1d, 0xc122fca1e2550c88, 0xbc0451b11b6cfd3d, 0x9c0a114ab046222c, 0x43b957b32f21f6ea, 0x5b9c87fa61de}, + B: Fp751Element{0xacf142afaac15ec6, 0xfd1322a504a071d5, 0x56bb205e10f6c5c6, 0xe204d2849a97b9bd, 0x40b0122202fe7f2e, 0xecf72c6fafacf2cb, 0x45dfc681f869f60a, 0x11814c9aff4af66c, 0x9278b0c4eea54fe7, 0x9a633d5baf7f2e2e, 0x69a329e6f1a05112, 0x1d874ace23e4}} + +var curve = ProjectiveCurveParameters{A: curve_A, C: curve_C} + +// x(P) = 8172151271761071554796221948801462094972242987811852753144865524899433583596839357223411088919388342364651632180452081960511516040935428737829624206426287774255114241789158000915683252363913079335550843837650671094705509470594*i + 9326574858039944121604015439381720195556183422719505497448541073272720545047742235526963773359004021838961919129020087515274115525812121436661025030481584576474033630899768377131534320053412545346268645085054880212827284581557 +var affine_xP = ExtensionFieldElement{ + A: Fp751Element{0xe8d05f30aac47247, 0x576ec00c55441de7, 0xbf1a8ec5fe558518, 0xd77cb17f77515881, 0x8e9852837ee73ec4, 0x8159634ad4f44a6b, 0x2e4eb5533a798c5, 0x9be8c4354d5bc849, 0xf47dc61806496b84, 0x25d0e130295120e0, 0xdbef54095f8139e3, 0x5a724f20862c}, + B: Fp751Element{0x3ca30d7623602e30, 0xfb281eddf45f07b7, 0xd2bf62d5901a45bc, 0xc67c9baf86306dd2, 0x4e2bd93093f538ca, 0xcfd92075c25b9cbe, 0xceafe9a3095bcbab, 0x7d928ad380c85414, 0x37c5f38b2afdc095, 0x75325899a7b779f4, 0xf130568249f20fdd, 0x178f264767d1}} + +// x([2]P) = 1476586462090705633631615225226507185986710728845281579274759750260315746890216330325246185232948298241128541272709769576682305216876843626191069809810990267291824247158062860010264352034514805065784938198193493333201179504845*i + 3623708673253635214546781153561465284135688791018117615357700171724097420944592557655719832228709144190233454198555848137097153934561706150196041331832421059972652530564323645509890008896574678228045006354394485640545367112224 +var affine_xP2 = ExtensionFieldElement{ + A: Fp751Element{0x2a77afa8576ce979, 0xab1360e69b0aeba0, 0xd79e3e3cbffad660, 0x5fd0175aa10f106b, 0x1800ebafce9fbdbc, 0x228fc9142bdd6166, 0x867cf907314e34c3, 0xa58d18c94c13c31c, 0x699a5bc78b11499f, 0xa29fc29a01f7ccf1, 0x6c69c0c5347eebce, 0x38ecee0cc57}, + B: Fp751Element{0x43607fd5f4837da0, 0x560bad4ce27f8f4a, 0x2164927f8495b4dd, 0x621103fdb831a997, 0xad740c4eea7db2db, 0x2cde0442205096cd, 0x2af51a70ede8324e, 0x41a4e680b9f3466, 0x5481f74660b8f476, 0xfcb2f3e656ff4d18, 0x42e3ce0837171acc, 0x44238c30530c}} + +// x([3]P) = 9351941061182433396254169746041546943662317734130813745868897924918150043217746763025923323891372857734564353401396667570940585840576256269386471444236630417779544535291208627646172485976486155620044292287052393847140181703665*i + 9010417309438761934687053906541862978676948345305618417255296028956221117900864204687119686555681136336037659036201780543527957809743092793196559099050594959988453765829339642265399496041485088089691808244290286521100323250273 +var affine_xP3 = ExtensionFieldElement{ + A: Fp751Element{0x2096e3f23feca947, 0xf36f635aa4ad8634, 0xdae3b1c6983c5e9a, 0xe08df6c262cb74b4, 0xd2ca4edc37452d3d, 0xfb5f3fe42f500c79, 0x73740aa3abc2b21f, 0xd535fd869f914cca, 0x4a558466823fb67f, 0x3e50a7a0e3bfc715, 0xf43c6da9183a132f, 0x61aca1e1b8b9}, + B: Fp751Element{0x1e54ec26ea5077bd, 0x61380572d8769f9a, 0xc615170684f59818, 0x6309c3b93e84ef6e, 0x33c74b1318c3fcd0, 0xfe8d7956835afb14, 0x2d5a7b55423c1ecc, 0x869db67edfafea68, 0x1292632394f0a628, 0x10bba48225bfd141, 0x6466c28b408daba, 0x63cacfdb7c43}} + +// x([2^2]P) = 441719501189485559222919502512761433931671682884872259563221427434901842337947564993718830905758163254463901652874331063768876314142359813382575876106725244985607032091781306919778265250690045578695338669105227100119314831452*i + 6961734028200975729170216310486458180126343885294922940439352055937945948015840788921225114530454649744697857047401608073256634790353321931728699534700109268264491160589480994022419317695690866764726967221310990488404411684053 +var affine_xP4 = ExtensionFieldElement{ + A: Fp751Element{0x6f9dbe4c39175153, 0xf2fec757eb99e88, 0x43d7361a93733d91, 0x3abd10ed19c85a3d, 0xc4de9ab9c5ef7181, 0x53e375901684c900, 0x68ffc3e7d71c41ff, 0x47adab62c8d942fe, 0x226a33fd6fbb381d, 0x87ef4c8fdd83309a, 0xaca1cf44c5fa8799, 0x6cbae86c755f}, + B: Fp751Element{0x4c80c37fe68282a7, 0xbd8b9d7248bf553a, 0x1fb0e8e74d5e1762, 0xb63fa0e4e5f91482, 0xc675ab8a45a1439, 0xdfa6772deace7820, 0xf0d813d71d9a9255, 0x53a1a58c634534bd, 0x4ebfc6485fdfd888, 0x6991fe4358bcf169, 0xc0547bdaca85b6fd, 0xf461548d632}} + +// x([3^2]P) = 3957171963425208493644602380039721164492341594850197356580248639045894821895524981729970650520936632013218950972842867220898274664982599375786979902471523505057611521217523103474682939638645404445093536997296151472632038973463*i + 1357869545269286021642168835877253886774707209614159162748874474269328421720121175566245719916322684751967981171882659798149072149161259103020057556362998810229937432814792024248155991141511691087135859252304684633946087474060 +var affine_xP9 = ExtensionFieldElement{ + A: Fp751Element{0x7c0daa0f04ded4e0, 0x52dc4f883d85e065, 0x91afbdc2c1714d0b, 0xb7b3db8e658cfeba, 0x43d4e72a692882f3, 0x535c56d83753da30, 0xc8a58724433cbf5d, 0x351153c0a5e74219, 0x2c81827d19f93dd5, 0x26ef8aca3370ea1a, 0x1cf939a6dd225dec, 0x3403cb28ad41}, + B: Fp751Element{0x93e7bc373a9ff7b, 0x57b8cc47635ebc0f, 0x92eab55689106cf3, 0x93643111d421f24c, 0x1c58b519506f6b7a, 0xebd409fb998faa13, 0x5c86ed799d09d80e, 0xd9a1d764d6363562, 0xf95e87f92fb0c4cc, 0x6b2bbaf5632a5609, 0x2d9b6a809dfaff7f, 0x29c0460348b}} + +// m = 96550223052359874398280314003345143371473380422728857598463622014420884224892 +var mScalarBytes = [...]uint8{0x7c, 0x7b, 0x95, 0xfa, 0xb4, 0x75, 0x6c, 0x48, 0x8c, 0x17, 0x55, 0xb4, 0x49, 0xf5, 0x1e, 0xa3, 0xb, 0x31, 0xf0, 0xa4, 0xa6, 0x81, 0xad, 0x94, 0x51, 0x11, 0xe7, 0xf5, 0x5b, 0x7d, 0x75, 0xd5} + +// x([m]P) = 7893578558852400052689739833699289348717964559651707250677393044951777272628231794999463214496545377542328262828965953246725804301238040891993859185944339366910592967840967752138115122568615081881937109746463885908097382992642*i + 8293895847098220389503562888233557012043261770526854885191188476280014204211818299871679993460086974249554528517413590157845430186202704783785316202196966198176323445986064452630594623103149383929503089342736311904030571524837 +var affine_xaP = ExtensionFieldElement{ + A: Fp751Element{0x2112f3c7d7f938bb, 0x704a677f0a4df08f, 0x825370e31fb4ef00, 0xddbf79b7469f902, 0x27640c899ea739fd, 0xfb7b8b19f244108e, 0x546a6679dd3baebc, 0xe9f0ecf398d5265f, 0x223d2b350e75e461, 0x84b322a0b6aff016, 0xfabe426f539f8b39, 0x4507a0604f50}, + B: Fp751Element{0xac77737e5618a5fe, 0xf91c0e08c436ca52, 0xd124037bc323533c, 0xc9a772bf52c58b63, 0x3b30c8f38ef6af4d, 0xb9eed160e134f36e, 0x24e3836393b25017, 0xc828be1b11baf1d9, 0x7b7dab585df50e93, 0x1ca3852c618bd8e0, 0x4efa73bcb359fa00, 0x50b6a923c2d4}} + +// Inputs for testing 3-point-ladder +var threePointLadderInputs = []ProjectivePoint{ + // x(P) + ProjectivePoint{ + X: ExtensionFieldElement{ + A: Fp751Element{0xe8d05f30aac47247, 0x576ec00c55441de7, 0xbf1a8ec5fe558518, 0xd77cb17f77515881, 0x8e9852837ee73ec4, 0x8159634ad4f44a6b, 0x2e4eb5533a798c5, 0x9be8c4354d5bc849, 0xf47dc61806496b84, 0x25d0e130295120e0, 0xdbef54095f8139e3, 0x5a724f20862c}, + B: Fp751Element{0x3ca30d7623602e30, 0xfb281eddf45f07b7, 0xd2bf62d5901a45bc, 0xc67c9baf86306dd2, 0x4e2bd93093f538ca, 0xcfd92075c25b9cbe, 0xceafe9a3095bcbab, 0x7d928ad380c85414, 0x37c5f38b2afdc095, 0x75325899a7b779f4, 0xf130568249f20fdd, 0x178f264767d1}}, + Z: oneExtensionField, + }, + // x(Q) + ProjectivePoint{ + X: ExtensionFieldElement{ + A: Fp751Element{0x2b71a2a93ad1e10e, 0xf0b9842a92cfb333, 0xae17373615a27f5c, 0x3039239f428330c4, 0xa0c4b735ed7dcf98, 0x6e359771ddf6af6a, 0xe986e4cac4584651, 0x8233a2b622d5518, 0xbfd67bf5f06b818b, 0xdffe38d0f5b966a6, 0xa86b36a3272ee00a, 0x193e2ea4f68f}, + B: Fp751Element{0x5a0f396459d9d998, 0x479f42250b1b7dda, 0x4016b57e2a15bf75, 0xc59f915203fa3749, 0xd5f90257399cf8da, 0x1fb2dadfd86dcef4, 0x600f20e6429021dc, 0x17e347d380c57581, 0xc1b0d5fa8fe3e440, 0xbcf035330ac20e8, 0x50c2eb5f6a4f03e6, 0x86b7c4571}}, + Z: oneExtensionField, + }, + // x(P-Q) + ProjectivePoint{ + X: ExtensionFieldElement{ + A: Fp751Element{0x4aafa9f378f7b5ff, 0x1172a683aa8eee0, 0xea518d8cbec2c1de, 0xe191bcbb63674557, 0x97bc19637b259011, 0xdbeae5c9f4a2e454, 0x78f64d1b72a42f95, 0xe71cb4ea7e181e54, 0xe4169d4c48543994, 0x6198c2286a98730f, 0xd21d675bbab1afa5, 0x2e7269fce391}, + B: Fp751Element{0x23355783ce1d0450, 0x683164cf4ce3d93f, 0xae6d1c4d25970fd8, 0x7807007fb80b48cf, 0xa005a62ec2bbb8a2, 0x6b5649bd016004cb, 0xbb1a13fa1330176b, 0xbf38e51087660461, 0xe577fddc5dd7b930, 0x5f38116f56947cd3, 0x3124f30b98c36fde, 0x4ca9b6e6db37}}, + Z: oneExtensionField, + }, +} + +func (P ProjectivePoint) Generate(rand *rand.Rand, size int) reflect.Value { + f := ExtensionFieldElement{} + x, _ := f.Generate(rand, size).Interface().(ExtensionFieldElement) + z, _ := f.Generate(rand, size).Interface().(ExtensionFieldElement) + return reflect.ValueOf(ProjectivePoint{ + X: x, + Z: z, + }) +} + +func (curve ProjectiveCurveParameters) Generate(rand *rand.Rand, size int) reflect.Value { + f := ExtensionFieldElement{} + A, _ := f.Generate(rand, size).Interface().(ExtensionFieldElement) + C, _ := f.Generate(rand, size).Interface().(ExtensionFieldElement) + return reflect.ValueOf(ProjectiveCurveParameters{ + A: A, + C: C, + }) +} + +// Helpers + +// Given xP = x(P), xQ = x(Q), and xPmQ = x(P-Q), compute xR = x(P+Q). +// +// Returns xR to allow chaining. Safe to overlap xP, xQ, xR. +func (xR *ProjectivePoint) Add(xP, xQ, xPmQ *ProjectivePoint) *ProjectivePoint { + // Algorithm 1 of Costello-Smith. + var v0, v1, v2, v3, v4 ExtensionFieldElement + v0.Add(&xP.X, &xP.Z) // X_P + Z_P + v1.Sub(&xQ.X, &xQ.Z).Mul(&v1, &v0) // (X_Q - Z_Q)(X_P + Z_P) + v0.Sub(&xP.X, &xP.Z) // X_P - Z_P + v2.Add(&xQ.X, &xQ.Z).Mul(&v2, &v0) // (X_Q + Z_Q)(X_P - Z_P) + v3.Add(&v1, &v2).Square(&v3) // 4(X_Q X_P - Z_Q Z_P)^2 + v4.Sub(&v1, &v2).Square(&v4) // 4(X_Q Z_P - Z_Q X_P)^2 + v0.Mul(&xPmQ.Z, &v3) // 4X_{P-Q}(X_Q X_P - Z_Q Z_P)^2 + xR.Z.Mul(&xPmQ.X, &v4) // 4Z_{P-Q}(X_Q Z_P - Z_Q X_P)^2 + xR.X = v0 + return xR +} + +// Given xP = x(P) and cached curve parameters Aplus2C = A + 2*C, C4 = 4*C, +// compute xQ = x([2]P). +// +// Returns xQ to allow chaining. Safe to overlap xP, xQ. +func (xQ *ProjectivePoint) Double(xP *ProjectivePoint, Aplus2C, C4 *ExtensionFieldElement) *ProjectivePoint { + // Algorithm 2 of Costello-Smith, amended to work with projective curve coefficients. + var v1, v2, v3, xz4 ExtensionFieldElement + v1.Add(&xP.X, &xP.Z).Square(&v1) // (X+Z)^2 + v2.Sub(&xP.X, &xP.Z).Square(&v2) // (X-Z)^2 + xz4.Sub(&v1, &v2) // 4XZ = (X+Z)^2 - (X-Z)^2 + v2.Mul(&v2, C4) // 4C(X-Z)^2 + xQ.X.Mul(&v1, &v2) // 4C(X+Z)^2(X-Z)^2 + v3.Mul(&xz4, Aplus2C) // 4XZ(A + 2C) + v3.Add(&v3, &v2) // 4XZ(A + 2C) + 4C(X-Z)^2 + xQ.Z.Mul(&v3, &xz4) // (4XZ(A + 2C) + 4C(X-Z)^2)4XZ + // Now (xQ.x : xQ.z) + // = (4C(X+Z)^2(X-Z)^2 : (4XZ(A + 2C) + 4C(X-Z)^2)4XZ ) + // = ((X+Z)^2(X-Z)^2 : (4XZ((A + 2C)/4C) + (X-Z)^2)4XZ ) + // = ((X+Z)^2(X-Z)^2 : (4XZ((a + 2)/4) + (X-Z)^2)4XZ ) + return xQ +} + +// Given x(P) and a scalar m in little-endian bytes, compute x([m]P) using the +// Montgomery ladder. This is described in Algorithm 8 of Costello-Smith. +// +// This function's execution time is dependent only on the byte-length of the +// input scalar. All scalars of the same input length execute in uniform time. +// The scalar can be padded with zero bytes to ensure a uniform length. +// +// Safe to overlap the source with the destination. +func (xQ *ProjectivePoint) ScalarMult(curve *ProjectiveCurveParameters, xP *ProjectivePoint, scalar []uint8) *ProjectivePoint { + var x0, x1, tmp ProjectivePoint + var Aplus2C, C4 ExtensionFieldElement + + Aplus2C.Add(&curve.C, &curve.C) // = 2*C + C4.Add(&Aplus2C, &Aplus2C) // = 4*C + Aplus2C.Add(&Aplus2C, &curve.A) // = 2*C + A + + x0.X.One() + x0.Z.Zero() + x1 = *xP + + // Iterate over the bits of the scalar, top to bottom + prevBit := uint8(0) + for i := len(scalar) - 1; i >= 0; i-- { + scalarByte := scalar[i] + for j := 7; j >= 0; j-- { + bit := (scalarByte >> uint(j)) & 0x1 + ProjectivePointConditionalSwap(&x0, &x1, (bit ^ prevBit)) + tmp.Double(&x0, &Aplus2C, &C4) + x1.Add(&x0, &x1, xP) + x0 = tmp + prevBit = bit + } + } + // now prevBit is the lowest bit of the scalar + ProjectivePointConditionalSwap(&x0, &x1, prevBit) + *xQ = x0 + return xQ +} + +// Tests + +func TestOne(t *testing.T) { + var tmp ExtensionFieldElement + + tmp.Mul(&oneExtensionField, &affine_xP) + if !tmp.VartimeEq(&affine_xP) { + t.Error("Not equal 1") + } +} + +func TestScalarMultVersusSage(t *testing.T) { + var xP ProjectivePoint + + xP.FromAffine(&affine_xP) + affine_xQ := xP.ScalarMult(&curve, &xP, mScalarBytes[:]).ToAffine() // = x([m]P) + if !affine_xaP.VartimeEq(affine_xQ) { + t.Error("\nExpected\n", affine_xaP, "\nfound\n", affine_xQ) + } +} + +func Test_jInvariant(t *testing.T) { + var curve = ProjectiveCurveParameters{A: curve_A, C: curve_C} + var jbufRes = make([]byte, P503_SharedSecretSize) + var jbufExp = make([]byte, P503_SharedSecretSize) + // Computed using Sage + // j = 3674553797500778604587777859668542828244523188705960771798425843588160903687122861541242595678107095655647237100722594066610650373491179241544334443939077738732728884873568393760629500307797547379838602108296735640313894560419*i + 3127495302417548295242630557836520229396092255080675419212556702820583041296798857582303163183558315662015469648040494128968509467224910895884358424271180055990446576645240058960358037224785786494172548090318531038910933793845 + var known_j = ExtensionFieldElement{ + A: Fp751Element{0xc7a8921c1fb23993, 0xa20aea321327620b, 0xf1caa17ed9676fa8, 0x61b780e6b1a04037, 0x47784af4c24acc7a, 0x83926e2e300b9adf, 0xcd891d56fae5b66, 0x49b66985beb733bc, 0xd4bcd2a473d518f, 0xe242239991abe224, 0xa8af5b20f98672f8, 0x139e4d4e4d98}, + B: Fp751Element{0xb5b52a21f81f359, 0x715e3a865db6d920, 0x9bac2f9d8911978b, 0xef14acd8ac4c1e3d, 0xe81aacd90cfb09c8, 0xaf898288de4a09d9, 0xb85a7fb88c5c4601, 0x2c37c3f1dd303387, 0x7ad3277fe332367c, 0xd4cbee7f25a8e6f8, 0x36eacbe979eaeffa, 0x59eb5a13ac33}, + } + + curve.Jinvariant(jbufRes) + known_j.ToBytes(jbufExp) + + if !bytes.Equal(jbufRes, jbufExp) { + t.Error("Computed incorrect j-invariant: found\n", jbufRes, "\nexpected\n", jbufExp) + } +} + +func TestProjectivePointVartimeEq(t *testing.T) { + var xP ProjectivePoint + + xP.FromAffine(&affine_xP) + xQ := xP + // Scale xQ, which results in the same projective point + xQ.X.Mul(&xQ.X, &curve_A) + xQ.Z.Mul(&xQ.Z, &curve_A) + if !xQ.VartimeEq(&xP) { + t.Error("Expected the scaled point to be equal to the original") + } +} + +func TestPointDoubleVersusSage(t *testing.T) { + var curve = ProjectiveCurveParameters{A: curve_A, C: curve_C} + var params = curve.CalcCurveParamsEquiv4() + var xP, xQ ProjectivePoint + + xP.FromAffine(&affine_xP) + affine_xQ := xQ.Pow2k(¶ms, &xP, 1).ToAffine() + if !affine_xQ.VartimeEq(&affine_xP2) { + t.Error("\nExpected\n", affine_xP2, "\nfound\n", affine_xQ) + } +} + +func TestPointMul4VersusSage(t *testing.T) { + var params = curve.CalcCurveParamsEquiv4() + var xP, xQ ProjectivePoint + + xP.FromAffine(&affine_xP) + affine_xQ := xQ.Pow2k(¶ms, &xP, 2).ToAffine() + if !affine_xQ.VartimeEq(&affine_xP4) { + t.Error("\nExpected\n", affine_xP4, "\nfound\n", affine_xQ) + } +} + +func TestPointMul9VersusSage(t *testing.T) { + var params = curve.CalcCurveParamsEquiv3() + var xP, xQ ProjectivePoint + + xP.FromAffine(&affine_xP) + affine_xQ := xQ.Pow3k(¶ms, &xP, 2).ToAffine() + if !affine_xQ.VartimeEq(&affine_xP9) { + t.Error("\nExpected\n", affine_xP9, "\nfound\n", affine_xQ) + } +} + +func TestPointPow2kVersusScalarMult(t *testing.T) { + var xP, xQ, xR ProjectivePoint + var params = curve.CalcCurveParamsEquiv4() + + xP.FromAffine(&affine_xP) + affine_xQ := xQ.Pow2k(¶ms, &xP, 5).ToAffine() // = x([32]P) + affine_xR := xR.ScalarMult(&curve, &xP, []byte{32}).ToAffine() // = x([32]P) + + if !affine_xQ.VartimeEq(affine_xR) { + t.Error("\nExpected\n", affine_xQ, "\nfound\n", affine_xR) + } +} + +func TestRecoverCoordinateA(t *testing.T) { + var cparam ProjectiveCurveParameters + // Vectors generated with SIKE reference implementation + var a = ExtensionFieldElement{ + A: Fp751Element{0x9331D9C5AAF59EA4, 0xB32B702BE4046931, 0xCEBB333912ED4D34, 0x5628CE37CD29C7A2, 0x0BEAC5ED48B7F58E, 0x1FB9D3E281D65B07, 0x9C0CFACC1E195662, 0xAE4BCE0F6B70F7D9, 0x59E4E63D43FE71A0, 0xEF7CE57560CC8615, 0xE44A8FB7901E74E8, 0x000069D13C8366D1}, + B: Fp751Element{0xF6DA1070279AB966, 0xA78FB0CE7268C762, 0x19B40F044A57ABFA, 0x7AC8EE6160C0C233, 0x93D4993442947072, 0x757D2B3FA4E44860, 0x073A920F8C4D5257, 0x2031F1B054734037, 0xDEFAA1D2406555CD, 0x26F9C70E1496BE3D, 0x5B3F335A0A4D0976, 0x000013628B2E9C59}} + var affine_xP = ExtensionFieldElement{ + A: Fp751Element{0xea6b2d1e2aebb250, 0x35d0b205dc4f6386, 0xb198e93cb1830b8d, 0x3b5b456b496ddcc6, 0x5be3f0d41132c260, 0xce5f188807516a00, 0x54f3e7469ea8866d, 0x33809ef47f36286, 0x6fa45f83eabe1edb, 0x1b3391ae5d19fd86, 0x1e66daf48584af3f, 0xb430c14aaa87}, + B: Fp751Element{0x97b41ebc61dcb2ad, 0x80ead31cb932f641, 0x40a940099948b642, 0x2a22fd16cdc7fe84, 0xaabf35b17579667f, 0x76c1d0139feb4032, 0x71467e1e7b1949be, 0x678ca8dadd0d6d81, 0x14445daea9064c66, 0x92d161eab4fa4691, 0x8dfbb01b6b238d36, 0x2e3718434e4e}} + var affine_xQ = ExtensionFieldElement{ + A: Fp751Element{0xb055cf0ca1943439, 0xa9ff5de2fa6c69ed, 0x4f2761f934e5730a, 0x61a1dcaa1f94aa4b, 0xce3c8fadfd058543, 0xeac432aaa6701b8e, 0x8491d523093aea8b, 0xba273f9bd92b9b7f, 0xd8f59fd34439bb5a, 0xdc0350261c1fe600, 0x99375ab1eb151311, 0x14d175bbdbc5}, + B: Fp751Element{0xffb0ef8c2111a107, 0x55ceca3825991829, 0xdbf8a1ccc075d34b, 0xb8e9187bd85d8494, 0x670aa2d5c34a03b0, 0xef9fe2ed2b064953, 0xc911f5311d645aee, 0xf4411f409e410507, 0x934a0a852d03e1a8, 0xe6274e67ae1ad544, 0x9f4bc563c69a87bc, 0x6f316019681e}} + var affine_xQmP = ExtensionFieldElement{ + A: Fp751Element{0x6ffb44306a153779, 0xc0ffef21f2f918f3, 0x196c46d35d77f778, 0x4a73f80452edcfe6, 0x9b00836bce61c67f, 0x387879418d84219e, 0x20700cf9fc1ec5d1, 0x1dfe2356ec64155e, 0xf8b9e33038256b1c, 0xd2aaf2e14bada0f0, 0xb33b226e79a4e313, 0x6be576fad4e5}, + B: Fp751Element{0x7db5dbc88e00de34, 0x75cc8cb9f8b6e11e, 0x8c8001c04ebc52ac, 0x67ef6c981a0b5a94, 0xc3654fbe73230738, 0xc6a46ee82983ceca, 0xed1aa61a27ef49f0, 0x17fe5a13b0858fe0, 0x9ae0ca945a4c6b3c, 0x234104a218ad8878, 0xa619627166104394, 0x556a01ff2e7e}} + + cparam.RecoverCoordinateA(&affine_xP, &affine_xQ, &affine_xQmP) + cparam.C.One() + + // Check A is correct + if !cparam.A.VartimeEq(&a) { + t.Error("\nExpected\n", a, "\nfound\n", cparam.A) + } + + // Check C is not changed + if !cparam.C.VartimeEq(&oneExtensionField) { + t.Error("\nExpected\n", cparam.C, "\nfound\n", oneExtensionField) + } +} + +func TestR2LVersusSage(t *testing.T) { + var xR ProjectivePoint + + sageAffine_xR := ExtensionFieldElement{ + A: Fp751Element{0x729465ba800d4fd5, 0x9398015b59e514a1, 0x1a59dd6be76c748e, 0x1a7db94eb28dd55c, 0x444686e680b1b8ec, 0xcc3d4ace2a2454ff, 0x51d3dab4ec95a419, 0xc3b0f33594acac6a, 0x9598a74e7fd44f8a, 0x4fbf8c638f1c2e37, 0x844e347033052f51, 0x6cd6de3eafcf}, + B: Fp751Element{0x85da145412d73430, 0xd83c0e3b66eb3232, 0xd08ff2d453ec1369, 0xa64aaacfdb395b13, 0xe9cba211a20e806e, 0xa4f80b175d937cfc, 0x556ce5c64b1f7937, 0xb59b39ea2b3fdf7a, 0xc2526b869a4196b3, 0x8dad90bca9371750, 0xdfb4a30c9d9147a2, 0x346d2130629b}} + xR = RightToLeftLadder(&curve, &threePointLadderInputs[0], &threePointLadderInputs[1], &threePointLadderInputs[2], uint(len(mScalarBytes)*8), mScalarBytes[:]) + affine_xR := xR.ToAffine() + + if !affine_xR.VartimeEq(&sageAffine_xR) { + t.Error("\nExpected\n", sageAffine_xR, "\nfound\n", affine_xR) + } +} + +func TestPointTripleVersusAddDouble(t *testing.T) { + tripleEqualsAddDouble := func(curve ProjectiveCurveParameters, P ProjectivePoint) bool { + var P2, P3, P2plusP ProjectivePoint + + eqivParams4 := curve.CalcCurveParamsEquiv4() + eqivParams3 := curve.CalcCurveParamsEquiv3() + P2.Pow2k(&eqivParams4, &P, 1) // = x([2]P) + P3.Pow3k(&eqivParams3, &P, 1) // = x([3]P) + P2plusP.Add(&P2, &P, &P) // = x([2]P + P) + return P3.VartimeEq(&P2plusP) + } + + if err := quick.Check(tripleEqualsAddDouble, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func BenchmarkThreePointLadder379BitScalar(b *testing.B) { + var mScalarBytes = [...]uint8{84, 222, 146, 63, 85, 18, 173, 162, 167, 38, 10, 8, 143, 176, 93, 228, 247, 128, 50, 128, 205, 42, 15, 137, 119, 67, 43, 3, 61, 91, 237, 24, 235, 12, 53, 96, 186, 164, 232, 223, 197, 224, 64, 109, 137, 63, 246, 4} + + for n := 0; n < b.N; n++ { + RightToLeftLadder(&curve, &threePointLadderInputs[0], &threePointLadderInputs[1], &threePointLadderInputs[2], uint(len(mScalarBytes)*8), mScalarBytes[:]) + } +} + +func BenchmarkR2L379BitScalar(b *testing.B) { + var mScalarBytes = [...]uint8{84, 222, 146, 63, 85, 18, 173, 162, 167, 38, 10, 8, 143, 176, 93, 228, 247, 128, 50, 128, 205, 42, 15, 137, 119, 67, 43, 3, 61, 91, 237, 24, 235, 12, 53, 96, 186, 164, 232, 223, 197, 224, 64, 109, 137, 63, 246, 4} + + for n := 0; n < b.N; n++ { + RightToLeftLadder(&curve, &threePointLadderInputs[0], &threePointLadderInputs[1], &threePointLadderInputs[2], uint(len(mScalarBytes)*8), mScalarBytes[:]) + } +} diff --git a/p503toolbox/field.go b/p503toolbox/field.go new file mode 100644 index 0000000..efa9d7d --- /dev/null +++ b/p503toolbox/field.go @@ -0,0 +1,405 @@ +package p503toolbox + +//------------------------------------------------------------------------------ +// Extension Field +//------------------------------------------------------------------------------ + +// Represents an element of the extension field F_{p^2}. +type ExtensionFieldElement struct { + // This field element is in Montgomery form, so that the value `A` is + // represented by `aR mod p`. + A Fp751Element + // This field element is in Montgomery form, so that the value `B` is + // represented by `bR mod p`. + B Fp751Element +} + +var zeroExtensionField = ExtensionFieldElement{ + A: Fp751Element{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + B: Fp751Element{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, +} + +var oneExtensionField = ExtensionFieldElement{ + A: Fp751Element{0x249ad, 0x0, 0x0, 0x0, 0x0, 0x8310000000000000, 0x5527b1e4375c6c66, 0x697797bf3f4f24d0, 0xc89db7b2ac5c4e2e, 0x4ca4b439d2076956, 0x10f7926c7512c7e9, 0x2d5b24bce5e2}, + B: Fp751Element{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, +} + +// 2*p751 +var p751x2 = Fp751Element{ + 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, + 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xDD5FFFFFFFFFFFFF, + 0xC7D92D0A93F0F151, 0xB52B363427EF98ED, 0x109D30CFADD7D0ED, + 0x0AC56A08B964AE90, 0x1C25213F2F75B8CD, 0x0000DFCBAA83EE38} + +// p751 +var p751 = Fp751Element{ + 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, + 0xffffffffffffffff, 0xffffffffffffffff, 0xeeafffffffffffff, + 0xe3ec968549f878a8, 0xda959b1a13f7cc76, 0x084e9867d6ebe876, + 0x8562b5045cb25748, 0x0e12909f97badc66, 0x00006fe5d541f71c} + +// p751 + 1 +var p751p1 = Fp751Element{ + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0xeeb0000000000000, + 0xe3ec968549f878a8, 0xda959b1a13f7cc76, 0x084e9867d6ebe876, + 0x8562b5045cb25748, 0x0e12909f97badc66, 0x00006fe5d541f71c} + +// Set dest = 0. +// +// Returns dest to allow chaining operations. +func (dest *ExtensionFieldElement) Zero() *ExtensionFieldElement { + *dest = zeroExtensionField + return dest +} + +// Set dest = 1. +// +// Returns dest to allow chaining operations. +func (dest *ExtensionFieldElement) One() *ExtensionFieldElement { + *dest = oneExtensionField + return dest +} + +// Set dest = lhs * rhs. +// +// Allowed to overlap lhs or rhs with dest. +// +// Returns dest to allow chaining operations. +func (dest *ExtensionFieldElement) Mul(lhs, rhs *ExtensionFieldElement) *ExtensionFieldElement { + // Let (a,b,c,d) = (lhs.a,lhs.b,rhs.a,rhs.b). + a := &lhs.A + b := &lhs.B + c := &rhs.A + d := &rhs.B + + // We want to compute + // + // (a + bi)*(c + di) = (a*c - b*d) + (a*d + b*c)i + // + // Use Karatsuba's trick: note that + // + // (b - a)*(c - d) = (b*c + a*d) - a*c - b*d + // + // so (a*d + b*c) = (b-a)*(c-d) + a*c + b*d. + + var ac, bd fp751X2 + fp751Mul(&ac, a, c) // = a*c*R*R + fp751Mul(&bd, b, d) // = b*d*R*R + + var b_minus_a, c_minus_d Fp751Element + fp751SubReduced(&b_minus_a, b, a) // = (b-a)*R + fp751SubReduced(&c_minus_d, c, d) // = (c-d)*R + + var ad_plus_bc fp751X2 + fp751Mul(&ad_plus_bc, &b_minus_a, &c_minus_d) // = (b-a)*(c-d)*R*R + fp751X2AddLazy(&ad_plus_bc, &ad_plus_bc, &ac) // = ((b-a)*(c-d) + a*c)*R*R + fp751X2AddLazy(&ad_plus_bc, &ad_plus_bc, &bd) // = ((b-a)*(c-d) + a*c + b*d)*R*R + + fp751MontgomeryReduce(&dest.B, &ad_plus_bc) // = (a*d + b*c)*R mod p + + var ac_minus_bd fp751X2 + fp751X2SubLazy(&ac_minus_bd, &ac, &bd) // = (a*c - b*d)*R*R + fp751MontgomeryReduce(&dest.A, &ac_minus_bd) // = (a*c - b*d)*R mod p + + return dest +} + +// Set dest = 1/x +// +// Allowed to overlap dest with x. +// +// Returns dest to allow chaining operations. +func (dest *ExtensionFieldElement) Inv(x *ExtensionFieldElement) *ExtensionFieldElement { + a := &x.A + b := &x.B + + // We want to compute + // + // 1 1 (a - bi) (a - bi) + // -------- = -------- -------- = ----------- + // (a + bi) (a + bi) (a - bi) (a^2 + b^2) + // + // Letting c = 1/(a^2 + b^2), this is + // + // 1/(a+bi) = a*c - b*ci. + + var asq_plus_bsq PrimeFieldElement + var asq, bsq fp751X2 + fp751Mul(&asq, a, a) // = a*a*R*R + fp751Mul(&bsq, b, b) // = b*b*R*R + fp751X2AddLazy(&asq, &asq, &bsq) // = (a^2 + b^2)*R*R + fp751MontgomeryReduce(&asq_plus_bsq.A, &asq) // = (a^2 + b^2)*R mod p + // Now asq_plus_bsq = a^2 + b^2 + + // Invert asq_plus_bsq + inv := asq_plus_bsq + inv.Mul(&asq_plus_bsq, &asq_plus_bsq) + inv.P34(&inv) + inv.Mul(&inv, &inv) + inv.Mul(&inv, &asq_plus_bsq) + + var ac fp751X2 + fp751Mul(&ac, a, &inv.A) + fp751MontgomeryReduce(&dest.A, &ac) + + var minus_b Fp751Element + fp751SubReduced(&minus_b, &minus_b, b) + var minus_bc fp751X2 + fp751Mul(&minus_bc, &minus_b, &inv.A) + fp751MontgomeryReduce(&dest.B, &minus_bc) + + return dest +} + +// Set (y1, y2, y3) = (1/x1, 1/x2, 1/x3). +// +// All xi, yi must be distinct. +func ExtensionFieldBatch3Inv(x1, x2, x3, y1, y2, y3 *ExtensionFieldElement) { + var x1x2, t ExtensionFieldElement + x1x2.Mul(x1, x2) // x1*x2 + t.Mul(&x1x2, x3).Inv(&t) // 1/(x1*x2*x3) + y1.Mul(&t, x2).Mul(y1, x3) // 1/x1 + y2.Mul(&t, x1).Mul(y2, x3) // 1/x2 + y3.Mul(&t, &x1x2) // 1/x3 +} + +// Set dest = x * x +// +// Allowed to overlap dest with x. +// +// Returns dest to allow chaining operations. +func (dest *ExtensionFieldElement) Square(x *ExtensionFieldElement) *ExtensionFieldElement { + a := &x.A + b := &x.B + + // We want to compute + // + // (a + bi)*(a + bi) = (a^2 - b^2) + 2abi. + + var a2, a_plus_b, a_minus_b Fp751Element + fp751AddReduced(&a2, a, a) // = a*R + a*R = 2*a*R + fp751AddReduced(&a_plus_b, a, b) // = a*R + b*R = (a+b)*R + fp751SubReduced(&a_minus_b, a, b) // = a*R - b*R = (a-b)*R + + var asq_minus_bsq, ab2 fp751X2 + fp751Mul(&asq_minus_bsq, &a_plus_b, &a_minus_b) // = (a+b)*(a-b)*R*R = (a^2 - b^2)*R*R + fp751Mul(&ab2, &a2, b) // = 2*a*b*R*R + + fp751MontgomeryReduce(&dest.A, &asq_minus_bsq) // = (a^2 - b^2)*R mod p + fp751MontgomeryReduce(&dest.B, &ab2) // = 2*a*b*R mod p + + return dest +} + +// Set dest = lhs + rhs. +// +// Allowed to overlap lhs or rhs with dest. +// +// Returns dest to allow chaining operations. +func (dest *ExtensionFieldElement) Add(lhs, rhs *ExtensionFieldElement) *ExtensionFieldElement { + fp751AddReduced(&dest.A, &lhs.A, &rhs.A) + fp751AddReduced(&dest.B, &lhs.B, &rhs.B) + + return dest +} + +// Set dest = lhs - rhs. +// +// Allowed to overlap lhs or rhs with dest. +// +// Returns dest to allow chaining operations. +func (dest *ExtensionFieldElement) Sub(lhs, rhs *ExtensionFieldElement) *ExtensionFieldElement { + fp751SubReduced(&dest.A, &lhs.A, &rhs.A) + fp751SubReduced(&dest.B, &lhs.B, &rhs.B) + + return dest +} + +// If choice = 1u8, set (x,y) = (y,x). If choice = 0u8, set (x,y) = (x,y). +// +// Returns dest to allow chaining operations. +func ExtensionFieldConditionalSwap(x, y *ExtensionFieldElement, choice uint8) { + fp751ConditionalSwap(&x.A, &y.A, choice) + fp751ConditionalSwap(&x.B, &y.B, choice) +} + +// Returns true if lhs = rhs. Takes variable time. +func (lhs *ExtensionFieldElement) VartimeEq(rhs *ExtensionFieldElement) bool { + return lhs.A.vartimeEq(rhs.A) && lhs.B.vartimeEq(rhs.B) +} + +// Convert the input to wire format. +// +// The output byte slice must be at least 188 bytes long. +func (x *ExtensionFieldElement) ToBytes(output []byte) { + if len(output) < 188 { + panic("output byte slice too short, need 188 bytes") + } + var a,b Fp751Element + var aR fp751X2 + + // convert from montgomery domain + copy(aR[:], x.A[:]) // = a*R + fp751MontgomeryReduce(&a, &aR) // = a mod p in [0, 2p) + fp751StrongReduce(&a) // = a mod p in [0, p) + copy(aR[:], x.B[:]) + fp751MontgomeryReduce(&b, &aR) + fp751StrongReduce(&b) + + // convert to bytes in little endian form. 8*12 = 96, but we drop the last two bytes + // since p is 751 < 752=94*8 bits. + for i := 0; i < 94; i++ { + // set i = j*8 + k + j := i / 8 + k := uint64(i % 8) + + output[i] = byte(a[j] >> (8 * k)) + output[i+94] = byte(b[j] >> (8 * k)) + } +} + +// Read 188 bytes into the given ExtensionFieldElement. +// +// It is an error to call this function if the input byte slice is less than 188 bytes long. +func (x *ExtensionFieldElement) FromBytes(input []byte) { + if len(input) < 188 { + panic("input byte slice too short, need 188 bytes") + } + + for i:=0; i<94; i++ { + j := i / 8 + k := uint64(i % 8) + x.A[j] |= uint64(input[i]) << (8 * k) + x.B[j] |= uint64(input[i+94]) << (8 * k) + } + + // convert to montgomery domain + var aRR fp751X2 + fp751Mul(&aRR, &x.A, &montgomeryRsq) // = a*R*R + fp751MontgomeryReduce(&x.A, &aRR) // = a*R mod p + fp751Mul(&aRR, &x.B, &montgomeryRsq) // = a*R*R + fp751MontgomeryReduce(&x.B, &aRR) // = a*R mod p +} + +//------------------------------------------------------------------------------ +// Prime Field +//------------------------------------------------------------------------------ + +// Represents an element of the prime field F_p. +type PrimeFieldElement struct { + // This field element is in Montgomery form, so that the value `A` is + // represented by `aR mod p`. + A Fp751Element +} + +var zeroPrimeField = PrimeFieldElement{ + A: Fp751Element{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, +} + +var onePrimeField = PrimeFieldElement{ + A: Fp751Element{0x249ad, 0x0, 0x0, 0x0, 0x0, 0x8310000000000000, 0x5527b1e4375c6c66, 0x697797bf3f4f24d0, 0xc89db7b2ac5c4e2e, 0x4ca4b439d2076956, 0x10f7926c7512c7e9, 0x2d5b24bce5e2}, +} + +// Set dest = lhs * rhs. +// +// Allowed to overlap lhs or rhs with dest. +// +// Returns dest to allow chaining operations. +func (dest *PrimeFieldElement) Mul(lhs, rhs *PrimeFieldElement) *PrimeFieldElement { + a := &lhs.A // = a*R + b := &rhs.A // = b*R + + var ab fp751X2 + fp751Mul(&ab, a, b) // = a*b*R*R + fp751MontgomeryReduce(&dest.A, &ab) // = a*b*R mod p + + return dest +} + +// Set dest = x^(2^k), for k >= 1, by repeated squarings. +// +// Allowed to overlap x with dest. +// +// Returns dest to allow chaining operations. +func (dest *PrimeFieldElement) Pow2k(x *PrimeFieldElement, k uint8) *PrimeFieldElement { + dest.Mul(x, x) + for i := uint8(1); i < k; i++ { + dest.Mul(dest, dest) + } + + return dest +} + +// Set dest = x^((p-3)/4). If x is square, this is 1/sqrt(x). +// +// Allowed to overlap x with dest. +// +// Returns dest to allow chaining operations. +func (dest *PrimeFieldElement) P34(x *PrimeFieldElement) *PrimeFieldElement { + // Sliding-window strategy computed with Sage, awk, sed, and tr. + // + // This performs sum(powStrategy) = 744 squarings and len(mulStrategy) + // = 137 multiplications, in addition to 1 squaring and 15 + // multiplications to build a lookup table. + // + // In total this is 745 squarings, 152 multiplications. Since squaring + // is not implemented for the prime field, this is 897 multiplications + // in total. + powStrategy := [137]uint8{5, 7, 6, 2, 10, 4, 6, 9, 8, 5, 9, 4, 7, 5, 5, 4, 8, 3, 9, 5, 5, 4, 10, 4, 6, 6, 6, 5, 8, 9, 3, 4, 9, 4, 5, 6, 6, 2, 9, 4, 5, 5, 5, 7, 7, 9, 4, 6, 4, 8, 5, 8, 6, 6, 2, 9, 7, 4, 8, 8, 8, 4, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2} + mulStrategy := [137]uint8{31, 23, 21, 1, 31, 7, 7, 7, 9, 9, 19, 15, 23, 23, 11, 7, 25, 5, 21, 17, 11, 5, 17, 7, 11, 9, 23, 9, 1, 19, 5, 3, 25, 15, 11, 29, 31, 1, 29, 11, 13, 9, 11, 27, 13, 19, 15, 31, 3, 29, 23, 31, 25, 11, 1, 21, 19, 15, 15, 21, 29, 13, 23, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 3} + initialMul := uint8(27) + + // Build a lookup table of odd multiples of x. + lookup := [16]PrimeFieldElement{} + xx := &PrimeFieldElement{} + xx.Mul(x, x) // Set xx = x^2 + lookup[0] = *x + for i := 1; i < 16; i++ { + lookup[i].Mul(&lookup[i-1], xx) + } + // Now lookup = {x, x^3, x^5, ... } + // so that lookup[i] = x^{2*i + 1} + // so that lookup[k/2] = x^k, for odd k + + *dest = lookup[initialMul/2] + for i := uint8(0); i < 137; i++ { + dest.Pow2k(dest, powStrategy[i]) + dest.Mul(dest, &lookup[mulStrategy[i]/2]) + } + + return dest +} + +//------------------------------------------------------------------------------ +// Internals +//------------------------------------------------------------------------------ + +const fp751NumWords = 12 + +// (2^768)^2 mod p +// This can't be a constant because Go doesn't allow array constants, so try +// not to modify it. +var montgomeryRsq = Fp751Element{2535603850726686808, 15780896088201250090, 6788776303855402382, 17585428585582356230, 5274503137951975249, 2266259624764636289, 11695651972693921304, 13072885652150159301, 4908312795585420432, 6229583484603254826, 488927695601805643, 72213483953973} + +// Internal representation of an element of the base field F_p. +// +// This type is distinct from PrimeFieldElement in that no particular meaning +// is assigned to the representation -- it could represent an element in +// Montgomery form, or not. Tracking the meaning of the field element is left +// to higher types. +type Fp751Element [fp751NumWords]uint64 + +// Represents an intermediate product of two elements of the base field F_p. +type fp751X2 [2 * fp751NumWords]uint64 + +func (x Fp751Element) vartimeEq(y Fp751Element) bool { + fp751StrongReduce(&x) + fp751StrongReduce(&y) + eq := true + for i := 0; i < fp751NumWords; i++ { + eq = (x[i] == y[i]) && eq + } + + return eq +} diff --git a/p503toolbox/field.o b/p503toolbox/field.o new file mode 100644 index 0000000000000000000000000000000000000000..277d624f8d905b5611cfbc70ff15bd6957e31c92 GIT binary patch literal 39652 zcmd^o3w%`7wf8=I&P*ngkObrr5JHR*0WnS{Z$yj~>4*^#tx`l`CNqI(NMaHY6qQHh zhDQ)7BBh8FsZxp(yvnK?7bOhWYDe!uS< zCTE}hTJN>?I(yDJOe#w*b3fuOtV%8^E}c3(*)_$JotZqTEIlpVo|ch3c5qRJYl^qL zthltwTVcBqADhjVnmWuj&0A4fTvnQ#WwSA^bSuxvO0Oy_D{+@iXXZ}NB*e3*$z@Z# z=E7Q88mgS;dqQ6=hR~&8YHLGTvio)bw3% zr?XDp%EH0+>FK<4N_=r?`P8aiZ#!5o#s-2=F_h-q6$FJQC+ZXPFtKyl)T+SD+dEHl zRa6yE@!tLz;{uC=(aDTmZ)Y&3FnoS%R)!5_UC(jzZ5|KrF{G2nTPYC7Yt%T@9z$YE zCRfswUO26w(@fS&=`n<{@tysAU~VGo1rmA_&GWhWj?!spOJjB%i9t*ka~?3?HMKM~SUL-p8gw1%60_H7KLa_fB=U4kfMAeNtZNmL#PRNu4_1 zuL}bmk|kiLbHNkQ2Z4OfqsDnF$|Pt>Mf4%%#$bI9b5#{i&H#rjw#Sg@>FK0C@DV-T zA#_G(8JfVNXP_4hqHP9xv*a0ivq4n!EF~`(@(;>NJ;hb9D_6@Pm|5VimM8|rMCfI zlex8?Vdy!Jp6dj_yIhrz-Ueh|a+P#+F%xg4$AO-tR(mqgbID{<`})BRD{~Vtv|CJS zA4#@rN_mNQj7zlcRiYBE7R|1rqT*8TgweOLAyLUeP4~2H8q>0A^str_TQ-dmP3Z!o zVD*m5vPs_38TVFIfS^e;So}ASPw)Hd-=JTbO^Cx7n`mWp1dzgJNad~LRCY>XjB z=aqOA=F3k>N$Wo_7>^bz2c(hjG2cK8X@wYHa>dkAif3t_sYOMFW#u!N&q@J{`G!=@ zDEFq(yjB4-pXKQaJM-N)31Pj^Re~7i@)o5R<+xqhUPpF@y)ZAsm6e{Gnc>OHE6Q>? za$P}#GcpQ2-a>r5Zf~wV!;y!%9=F$YUveR?Z+fXnqC%$a()qh}%pgtf?C@E|& zLOP^3UC0r5EUwDOaCFl#L6Z!Gk%(qZG}PnJMbQEs<`xq*aoXG6xx9cVV{2p6f2m1TUEwUzC;Uad+-A z3yrmkeL$MbUYO7CU2r46cUfF+P}Yl-qRE-GqPo!7#ichvfi6vFZP*V$2a)bdhrzmv z3Ujk_z@nq5$nCJ_6*>yjvvOQnb~{|nz-M+|VR}whhTD@>1cSAwd-Afv3h;y$(lK zw%e7SS6Jk57v?x%dLtug!=C`c_A$;F0n!4QvagH4?#9&tC`ofottzXUQa&NaSjjS0 zLZU!cVNPbIJu4^Eo1F&-Dav$b6=r4@X60sQ+Y2*bz(%oW!-@*C-9;`qk}D@CCp|AW zhk{Es79y7$VjmHSAuZ8mzOiDIk)Z);;jbEe-35o0RTaghlbEkJCL=Oe6&DhpaLFV+ zW=|?Bca;`Htg-UC0wF}qeDI-`x+%=c^5$k{x$LfNhd0mda(S~M2)oCP)zl8&+t7{w zLIUz>g_&?yPa(uZx;Am`gt#IorHr7J@TR(8uY(1i7QF+~L@JE%@+kg_gY=bScepZL zZkHpkFe@)Fr^szjFD&wAxwAYu?ku!~3KOmzEL}vcR9(74w#B6$FXDD=Nm<#WQ_J;u z_F#JUL#4u{*31&cJ8Z;r=-fKli|N`%mulL2|;c(QUGc2`bzk+%qg zDPbw1Bm*>+l}^$#1p^ER){rcBk=Jea=Hy`=$;rveblEf9xjBWokTVEP28@9I$54 zfM+Q4jhExq=I|eN1j01F7iZrm#b3(LRthKskgR8A|MSnKnh-T4e3*U z*g^jMXat7%QHBL{p8-%xkL>tv7zwJ~>`J~xsF8B&=Q#VH=iPm0Gifd&}E*0fpt1`c7eao z|C@@WG;|?^luyQh^l>@16{Rp4lKYuT49FZ|MTv&5A~2H2#X8!@Am zy=Hy9BO}*8-kit?93aHipwiFy~VJJ=_;Az zs;F>b%b=GeOW>eg3VCHqt1i_f(P@)im6MC6mKNqAX&$6!-^lo>eF8G`5^t%FEzSFw zJ~GWP(u-fAO8i45k~C035GkYug8zte=+3{LZAkkmZ2QAiXsj@d8zVR}NQ3}td&NP? zYZ?;U(6QRm3gJn?XyU{!N=4{UVzd9iaKHy8YoG=I)<`jTpqv4v8+i;2ge1hcHixZN zu9TK=R1Qd{oCg$oB2pMavYf?y?nvZ@nKLSc^uTp#(9!^so;AmZ7jyQlxbCXp^3Y7%6|O z(4vZHkvnJwnUI+G!hGzN>9SRK(`*XaEs`Z|kU*+bNo-+oS)86wYjiK|}=5&`}oK6Jh*; zdBJzL0lDc>^PSZ#;m9>P5RGwxgK!(Vr24P?(HA-cn{%zS(&b0XvDn*0Yn9Md0H^f( zcA83o!BiqDROWCZAVwTFD1;ihsn-hexZ*--ye)v^@-!b%Tm!nmMtJ~BppE(DHK}pJLv6`4S6a`5VeTf7B!uggdlCNav-B+b$y{=;U(t#t zxz{8vX+^!oH7)%}&=#dbG)lwX`4E~=XnR{3q>|n`n`9ugHz`SyS4dP3>H?g)qp3^$S&{R z#8<_n>{bf;Ip?0SI_D2cb=Kl<;mp(oXKfblYG=;*C8^EMS{#~0b>S4|TsR!v^M|J< zIu~N_+y?9WTusP}$F06Y+2qc=7r%_c7!sR>dnAV|(qA-zQcr?d8+iO%`UQjZC`?!)kfs{{$o+Dde6Nj)q&s{9~k zrcNiCYM(jQDqS-n?{Iy?Q>Q!UOVkow0io0AFi=T4)5ei7hygK9k`mx%N9qdPkR47! z)uLAbsx2VQ1O;b+9ROh%BxF^)NE2SfAySC`AyD51ND1nzbwXybFwaFAa^TcNzaw=l zEFe6w)GLV%kN|oYX&MVfrVo|V{Ae^%ob#(w6G(ImH4B^FK!*3epyjJ^|GIpcryPs_ zr}|lZ4CAkw*&=1I%1u@)@0G`|=3V>7CU?3zwzo~anfK@Y)g*o$zm{K{V;#~P=NY`a zn*2lD@{W1z1%>~B7h=>qJj%v`OyBM|{e9B(M+;qUOm$KT(-$QOo`^chZb#}dm~q}0;cF`0$yTBOZ_UZg)DF!=pJ&XPt+gy{Z|FNh_7n17#PxWtd)y0yspe(k`+ zhzY5kY7H@*&~F5@sJC{DHmVgys#C=fvVSQ57!4Pe8)UTpAw-6sm!MXnFu-PTm!HbO zkR7SBOp(l!dIeYTS0N|qPK#0{WWNBQ&#_XU8V7Qs-#}|;rY1*H@$l4Sfkwy|xTGHZ z;GyUIh(1rfQu+l1N%(v?B8~1U=lRLh*dRZeOqKvY66QL)fvi~bqfWw))^y%&ez`OJ z=lkb8jVJ6$4 z@SFHL&L{9%CDfG;`}9?0Ik7kU%zDA)#?&=b!j)vHiMExsQ1h);BepJa2)j;zC7CT; zmby(;jK+VAwU8~w)d!DTs7G&uk!dChp*D)ap|;%;n;(jQ4y|OUNNj$)Ma%whASE|` z28wewIkVFcwcI6H5W^rjJ^Siho|N`=hjaY%ZZrfXOHGO=k7$8addwgneSXkNai*q zYdn=*hF@HjUwJDSdR}2mO(~o!X7SgH4^X>;UWRs-&(nBMlhN}3?<0Q0l@VVQQSxGi zf;q)+g$io?z5(?>wo>dcMJP+wXdpCF6No}Vaz0%RA-it1GCq^NtC1N4Imhu-ZZk(Q zHj}Yi_y8VniV9BFn*1wFfAW~wXI2Qu*_gV7N(2{K?no8g3%AG>r?r)4W-1I~M`{ht z!#IkKtt~tAzoaZkKx+v_WHj+x5$zGcs_UorOs}u0ma7vyMvyYQfhc^RmIS%@l~1G$ zvQaEiEkgl@2})`)Y@tS7OT11=RTv;aRDJa*ELyaD^=sj;b(Xct8y2O`^7I=PJcIvx z&i0rb{92_O|C6G?y3xRXrl`vm-o+eZ-|_QaBeLh7f$x+cYk`9%lg{&(dD9+=l+| zJ8O}Skk%;3X{UOc86P8g9zy=yV|2B0w=z2EZt&Znu*0T)yp!cdtarAkB=|5x8@r$9 z@Eb!N`u`Dz&$f?YtaEGR!u>N6>+CbfgmgE`D?9j>SaV>Z&1k2XyeRboona3E3v{?; z59)615+7hlLz=PF7SO0bc$S@%)iU9q8t&N*$1q0=)juHB_N_PlM0wAIX#HD-ZBla( zsjVtkujYwJS-Ppo@lY>%*kkIjP~}dmfF;|>lKYb-_m?K9nQp!`U63S~-)(@wPG5_7 zc70%+$o+AULnz=~f4M$>YeA|rx3S`&Hg0(8Xj)_t7aQyvC&&|%-WwE1)Qp6SHOp9WPDCc}+e6T4I zfVBJx0j9t6a=mlzg@l>ErPx|84fxC1Obxv?8_+f$W8bsic7YP4Qa=t?_dn zhqatWoW&|5up+sm5(|vb`0NHU0BKoT)YF4sgShT%PMWR$ElFukiu+p<0^kmnol+fL z(|NkJ3-8OLud$}!>hD@}zbKxibdKx8xoS6A_z>RHl5CDrqHPz;8!I-+!tBH40hDq2iXGN^uoI-7Hj8t$Y2Iz z{dj>Ak}>2%d@`yHCCiw7CSMrH#?)LY;V+_ubjMPC?uCcr7yW}!>oG0NCLt~b$?VcU zJl$`2{MOiButn1yfBf7z_n-N9^*nIbHGe3G-d8|j?auj`JDs(6D(HFmDTj0ZN6wx9 zyw$n$t6QA4nK90tKGPlZo1HsPnw<0hh%ltgQtSYn2 z*Iia&R$p*-!91ATtOGc=rbKm8ui`e|#gd3j|0CPIdO+f5|f)du<3Cx}wC%Ta(OBGAG`!iLfh zoVL|;&2`p3pg8CMmvipLho3^$^GD~-i%|QiTb#-foI>4)oI8)B_{=%rkIuQi*vbs# zN9PiqJHJ5VKC*^Co+4{<_WZN+nj_A!fQfCbOG;X1}n$jWWB1Yc*1R;g6G@ z&;EJpbPDXksh>SxHI6da)@+)x^6D@z{I&j( z6`?(nYNV6Sbz1SNjV&6nH4wgTfBwexPoh!2OUB>U_(U|lw?m{8ejVyFp&Y#f+H|u` z0y^weQ`AOH79?|d?iadVl~NDu`Sf^e=tpBm!_lbY5XLNPerwjNnmOy$Htd-me+~Tv zBRv>|RMz}R^{R$cuS%rB_7(EvStwFnb8*_6LfEI8fD=ffvH~|oLZ)}L5%T8!-@TcTt9=NqZXyJFRgK??G;>(a6F-=Xga#`0sKJ zONM;Y20L%0LRMfLlEIcY9f>y?g@W`uq?@h8noq$B(L*j|%SD48oMp{#T6$G8ExoGS zb1UfOL$6^7&``drp^PqO>%}R5zAWxh{Gh8^b&?-&k(b|hb-q%{1E2F0v-U#xZKC2?S-0&ihceUnEA0tbVSp4>@S7qce zJ`7`ck~SRO0Qa>3F5OO^W94RYDbefoN(8f%reZ`K{+Zh_bVR!t{f?)k6gpV)5v(+6L#_}lfF**CoP z_5S~S{bTz}2}?G4-myONJKOKKO}**XYp&~^dv@-Ik3M7U3V>ti8)w%~zi`L1%g=l< zZSOy>>h-f1j!bJheQ45(QC8;zo5!nfxt}dMy8XqMmvo8i{g2x|`D@F3x(y-KT`~Os zY59-MfBmK5Bkb1>ynXqrE=BqC;(vWn(REq&!qaQ5_T+sn?GCS=yE{jhsK z-^}i{e`vhD{>}5Q9F>3d<*fA2r}o@%vQzQyfBo^Q)p!5ql~?jd@WGpY`_Ut#4vri3 zdY}7G-+3Qnolnf^cHp+XCvEwk{qF5EyP_I4Il7Ly^NXgZx<6YtFlpN8A$LZYAQc%^RF$8oT-KqtU;5weZQ>LD@Cyo<6bTv!i*@2m23N z`Qh~i4cGrDst&KKbvvKGzTaQVJ~_4ht>s52zTWrwUq!vQKj(AB)3f`Sr9IYd_tJ_#?_Toa!wWYYP`>~3m&dH%oM-)c#`4!b zopAgs(?x{&mrTR)yQe%)8o{xV^tsqdXVi_gB9 z+x&Oe;|t#W@_a$#m`9wwnlBaI_u9By&maHj(22Klns)q$XWLgPyVw8s4z_Qq@}m0NabF!>mb3Wr zUw`{S_cz9Uvv0u0U-#>?(cbHo{zJY!n`-I#^$5m()v)}@uRmP>U!~6J_Z%6pY{0o) zpTvIC?~cDc^4ONK{nKuqe0u1{A$z}F`SS3JvU!H{b)bWS<|;4e$kYAr~TBRmu{Nj@I25o zZPS}gjWY}9|EAy8_Z-huJz2Zug&#M+Gwj%;r)%GIYpCNf*ED`q1dYhu+?oFs)1HM~81Lxbwm5-iZCj&g^S{zWTzhC%dkl zd~wDdJN|s+;}@DbCzfO#YWU+jCwjl|%&Yg0-8?*|*6!+i-#udsp1%91|AKvvMntpY z?`r(T!Sp0Q#t{HQ~!41XL?SBzhG;m!B zZJ}}vmHRb-*Y$sqezJq_LcdY=34qm_U4`-(Dgz^o*5(`gBh4)b{#)^vjK6TSv=`QQ zq2u3)yYIi5_kUMx!JZNO|CM(=-hCIFy^$Po z%4}Axrf3||VzHR5(Iz!oHCrqkN3%I9Dq6LuR@_F2hA7o)wW=00e?K4Iw?{XnZxnDu zUlre@M@&>tbGObh=FU+u(LJJ-tIVdJ@iBe+Dt%%)MaNv->8kj?ranFT_KZ=wT@@eS ztxu;OeZQ}dxMU>Z;QuxV`q1i5A4J?}|Lb(fEa;<`Hx0jYJhNJ}Wo!6jqSmF)yY;Q& zWNx+g2tOmZXJT-bh1T`%)-JXEZ!XaX`AdD%M7}jsg6vM?=S9%m_Bm_rK`_(1uO))<3j5w*I-bOgB|#58TiZ-LCq?twhqTsSVxgvSiHBeldj$ zP@5hydpp9!b1PRIi^N3uRiV87M7M1!9nss4e;z&F5tirHJu^Ep%dH2qfW8Xa%X&xH zTK+<(4uzW4zi8|gi7h3pIgV*BJMHn()|dRZ8$W)(Bf9$2NXuQ3m})D2D$Imm&J`Bz z|5Xz$xJ?5`+@OM^xc6nC6hKF`HF41CKXrt$;g9$CMq*4$Y5g{*FpIpY0W{-hd4)x3 zKNf>G!;jf%?Nw##YFhQzoZf}}^yG7ev z)%;TYH=jTZx8ZMCeFz;teQ8|+R`s8TG>5I~L0K>Lf%*DLJIi`)oWhcVLUfCt&v6rF zD~8o#SV(-h;{Ff^Y%yQA54~*SFAeI5xyJu<`;tiJ+WwW>^L>crO4%#e zbGMJ_2yAX4>zIEA1-)(5m%IU zUqT1wCu6(tibB?lB`YSjheTaqI&0f*cpw(`-|pR3al24JYV!6WW8HtA0)e*iP9~iQ zz06&H{EhZ75lX~w9&|kiCfY6MfR*Um*BLD=_?Oq!+}a*mLIry8`96KGM4%@^GON(K zU!E-1_~s{FWP17I4|cxQ9&$s$|M@HHIj*Yh7Tr4mdc@f_|BnQ+BeWhE4)Oypl+wx8 z%R!TWRNo%*L&5Jzxb2=RA^)sK4~0DvyrRcH`oWj}qN2=-{^Kv**GA{WqRFI)b}=5a z=&m{XyY-f=%w)ek5=a3>(X~pYn6DiPk~o!s1Q8O4KG&qk@fe9b0MqG1qkD=o2~JFr z-QtuBKylhgAJETaGu8gqB&LqACe+1Pk`p*ntxUDWCaAVJOPqzNac~3Ph53}O{=wbb z9-PSb+VD-aS(6#trdSisntGj;#YS#TV7)h+=-7ngTD8|2RTLLcu%t#a9WUXCIr}U$ zY*`eJ$w$O-trh1fhj2vqmS|K`o9URc-G*bo{A2jhH<4KA4;BU{tF}RQ)s_~|?T)_O zp82wgXXn0b;n{;r5)Ucoa6BAq*+s03JWI!db;QZYQ{uQ9oM84PVyO2H z!O%B20{nJjCFv}V=-*8o^nCHONeb0fb z`=0|>V?XZ1$Bln3o{xL@;{;-@#LAP^iEfQG4?AzcH&L**K=M={V2vd<5od|3i6`ed zIwm#{XNfC`nZ#4#xSg0uJVD2P6ZgZ;6N*6DWKi}9C@X>7OY>D*`DoQvahda~X_w8s zs;jlY%2UL_-qE7~(l2i+72Z-ph!J z-UsQJvV!EAa-5F+>`P)8>k=xlSqh14jzTi4Rz8m6`STQrF26?k*haz{PIllI)&hmC z#baXJDu|o|>e$ntiX$%C$ma6#!$PRZ#aa7Xc^7q&LJ!#pb6V08Op*YTbVXqB5X}Yp zN@`|;fMv=)qGp+LhKhc6kYgDIO)OWSiWN$-x>AX=^tXVTRRL=7G|WmfzFh@VD=`xW zdrB6sf-Tgx5qE2pMyMaPJfUF7)M{H6*UBvWO*R}*0= zg35mR^@zqcSi^1w5oou95VTuC4BD+UD)55ruGT?T9%r*Ml0QUz3P{1x&@l6qgJ^~3IP*y0`q%H@Pt$@2G*0L8h zc2K!UjRzHqWoW#J0`9hpoPFdhspVOu-jPRegy!qXxkfgSTkTQyP}Dl49G1mND*D-* zlW@P7U|o<3L)fhxRxbD8N0g(=^S$_|$}#1Y-YAYMf9u1?olq|K;U{6d&n1cC`6RHD zY@LXg+o?^;Dcw%iDC`^hN`oH-OlOow#PKAFsoxS4))+GQ{N+l$qUMjDWr6|5SyC;K z^I1h-zv>mX+l+5_Ar_JZ#anO-HJ&#?AkE4GGV0#-bWAx#$3w~`6+N4krDhc0D6f({ zwklhM)#uN`eJWPqwM&aRC-i$>c~2#4M+oVvUI=her?3-v?N~5+VgTs3lcFywr>IDo zM+!J2J?oHycuV-}sJNswNc;yv@Jmndh2rn7&fAKFD-dfc?qY#hYq$t&3poDc`$e1{A={|fh9i66 z_a*R|e6n>g>w-X;%NFxhvhYPOq1**G=UbO@yh9CN+b~S%ItCNE7SV)RawK9A&GSVs z(Euj(L}{czL=v}#n)4TK3Ri{QXfQ27}m&Q zhb#`u!WX?-N4tjJ41;fPLhA(DlQB+#cBX(S=v=`Y3Ex_NOrpIci9i@54A|YGzM~R* zHDOr6v8YlXOi!}8PREBQckptlM&3Oa_&m-8e%|64Pl74O$n}u8n6QD0^%Cu3jsQeZ zW=VV+S_EzhdV>ynBWLSO_$C4>n>jXtpNpDV;#bltj_ih{bx;=YH6nB!fmy*3i%Dpo zN}veB1bdlI{U#kn9cM7MQ%W33Yk>lBt>p_8Aur7mR!drU$l{1Be9@bA2zdS{0AY!Q zu%tZ@w(1b@8n8VIzOiJAOBLAS1!bustg(jBZRcwx*6lj-9r{!}nRH0OH+B@Ju2vpS z;496ml?sy4A~|@yEOt{-5A)omP2~;Taz7sQ^;vgw1P6R=BLQQRGA@DFm^LY;1VbpV zk!vt~(R=jCd%5L4b{{6=-J9DKe39C5_AU`nV|rH-u!K-Ha-n&0jQu*%2jG7Sz6pad zXC$y**lXX}R-C!caijqX7SMzDIEr zoTtUiWi(|w#}ZA2F!+Or*rLxDeMErkKdBxl6LOE$xsr>%G$fDn7!cz~)?N)q@9bDe_l1z18>t9TvFUcnDZSj{>t$VbFLex9pc z7%r>$1^wiresW1a@#!a6(B$Ab`bo8ZGEY0PsCe$I3enZnoldaW`Ao>J>Xm$Px*%}6 z$bX7=A7TKWM)#TUtQXG2&}PLy3kwEd%`#!YGI1&f;JGoqbw%KGWnjRnK&+-gtYYm|+qeBgW4G}>xo>rE4JzRBd{o5Tnk zMzG)kW391lgBVtaVVg}nnMsPbpt?0s-G=IRQB{eb&87q#x0-y`9VQNm)hn?>&@hJ= zBwz`i!zaQ_7I0caR`R7}7@Fmg*}iKM2IAxOCZ5ai-{tXLCf=QO$4NsNMs^7bw=1AQ zAy!*3vdct_G>BmJSqk7Z*4u`vw9-D2n-aJ9!|!&S=?B{ozzy9LZfQ#ly3&r)zC z0`{7CwSw1S0rtKyu=fhs`vH4D-Z-Jam*YZV?-j838DQrxAq;DQ;ed$?Z{`P0JcXqI z(V;Mi4hlqvf#|TwHcP?F4O$51pg?p;AR_XQ=rTX58_%aEK8lS3mSbVCd@8UU2bSaE zMD?k_a!g>!7os9Y>{j?$vyuYlgYB|jK+G?PiJmYaFxl7v6S8+ZUfQ;Xih2bxjtWE= z8#`j!Cj08CsKYgeDE3p|QPW|n9XBC0vBUhDWZ~l{O}Jcx*YhS5pQA8*ef(4y2Ac#q zXTac@aQbQz44yJD7|>Tf+ppl;opiiic;89kef5}QV|H@EIx%sRpU;Gy&#B&U4a3(2p!jmBSMmiKf)dX5?>f3mjseos*lf7!?@Qafy8G( zLiRaF#iGmb_3>)#tF#u?=BOmnd4N4n4dY&OR0;>xYB>>YWWdL3RE}){e0_XD7z8x} z!6G176pr{Bfnb3VfsZd%vG7yc@8e6tfL<&>F9qnO;b>nhKrb}S>726DUwpLw3MOR#w;aEpK>r`B>*x4ero{E``;)v@t8+%nf zOqF@+F)EfQxO$_anflhN7pQjFG|PlylTss#B_{N3P*+f`PQ?YD9p=1V7IoCOS=~mp zwQ4;TSOV#Yd_?6|+#HN$T|cQ}xcpngyORHZV~AYmMC7u;d+L=c2#&&ZJ-PlCbw3sP z&2-!f2~fqyx2b#>8wSYRRa}$e$_b}C!f1cH(Ehv7{<~_J@UvZLe}~ZiZbi87ZbkUO zZbd|(-7toF73np@7wJP7RP_Q?15h=DbGCYcYFDeMkVUBcsKW3?9uWq|Zh>PDaO?@k z&TfIDu@xLE1MKY89cmv$s4#qyJ%quzPvATNoCm@=&_024e=ts@nJQ1fja$elRP0&c zbze9=90toFf#nFW90@1ALjucTBbJrAq98pb-1@&wfF5UX)0NUq8>E|#Tu&Z0QY@2u zrIQ{|#WGnSR>=b4q6ejm9+57(M-e`{2c|KOiM&HBYsLk68_2b>-HNd5ajn_4j~`We z8pD6T#6MN7t*$;t1?|~Bo{g0gUy<4wo#&{)@~IdWvhE|tQiBrgm z2?P0*m~a+EoE0NlWS6G|+%sa>IHW28b|`cCCQv2r&IPz@7TkRUz~@yhbGZ=P#u$RQJS!Sdr<{;fw2dc!~ zJW&<8n`0*3RbxtxnJ2L%NO*zS)~Yqtm?@W8gZXQWx%E7AKzB7}%4HUafwRoS9?`rg zfSp9WI8Y^GmIR1dB#2oIVwUQ}EDJ--Qh{R~=C3mnv&2BmQbEkJ5MtuAtRO*)F2l9t zWQ0~u)JnHjPS-5@32po$MN1+UD_YAcy#@afg}gMx>*VFh4!vu?e$u3OozkbDRWwW& W^sZT4n_k03*X7HT%F^>nJpTtyW`J%0 literal 0 HcmV?d00001 diff --git a/p503toolbox/field_amd64.s b/p503toolbox/field_amd64.s new file mode 100644 index 0000000..4596d8f --- /dev/null +++ b/p503toolbox/field_amd64.s @@ -0,0 +1,2284 @@ +// +build amd64,!noasm + +#include "textflag.h" + +// p751 + 1 +#define P751P1_5 $0xEEB0000000000000 +#define P751P1_6 $0xE3EC968549F878A8 +#define P751P1_7 $0xDA959B1A13F7CC76 +#define P751P1_8 $0x084E9867D6EBE876 +#define P751P1_9 $0x8562B5045CB25748 +#define P751P1_10 $0x0E12909F97BADC66 +#define P751P1_11 $0x00006FE5D541F71C + +#define P751_0 $0xFFFFFFFFFFFFFFFF +#define P751_5 $0xEEAFFFFFFFFFFFFF +#define P751_6 $0xE3EC968549F878A8 +#define P751_7 $0xDA959B1A13F7CC76 +#define P751_8 $0x084E9867D6EBE876 +#define P751_9 $0x8562B5045CB25748 +#define P751_10 $0x0E12909F97BADC66 +#define P751_11 $0x00006FE5D541F71C + +#define P751X2_0 $0xFFFFFFFFFFFFFFFE +#define P751X2_1 $0xFFFFFFFFFFFFFFFF +#define P751X2_5 $0xDD5FFFFFFFFFFFFF +#define P751X2_6 $0xC7D92D0A93F0F151 +#define P751X2_7 $0xB52B363427EF98ED +#define P751X2_8 $0x109D30CFADD7D0ED +#define P751X2_9 $0x0AC56A08B964AE90 +#define P751X2_10 $0x1C25213F2F75B8CD +#define P751X2_11 $0x0000DFCBAA83EE38 + +// The MSR code uses these registers for parameter passing. Keep using +// them to avoid significant code changes. This means that when the Go +// assembler does something strange, we can diff the machine code +// against a different assembler to find out what Go did. + +#define REG_P1 DI +#define REG_P2 SI +#define REG_P3 DX + +// We can't write MOVQ $0, AX because Go's assembler incorrectly +// optimizes this to XOR AX, AX, which clobbers the carry flags. +// +// This bug was defined to be "correct" behaviour (cf. +// https://github.com/golang/go/issues/12405 ) by declaring that the MOV +// pseudo-instruction clobbers flags, although this fact is mentioned +// nowhere in the documentation for the Go assembler. +// +// Defining MOVQ to clobber flags has the effect that it is never safe +// to interleave MOVQ with ADCQ and SBBQ instructions. Since this is +// required to write a carry chain longer than registers' working set, +// all of the below code therefore relies on the unspecified and +// undocumented behaviour that MOV won't clobber flags, except in the +// case of the above-mentioned bug. +// +// However, there's also no specification of which instructions +// correspond to machine instructions, and which are +// pseudo-instructions (i.e., no specification of what the assembler +// actually does), so this doesn't seem much worse than usual. +// +// Avoid the bug by dropping the bytes for `mov eax, 0` in directly: + +#define ZERO_AX_WITHOUT_CLOBBERING_FLAGS BYTE $0xB8; BYTE $0; BYTE $0; BYTE $0; BYTE $0; + +TEXT ·fp751StrongReduce(SB), NOSPLIT, $0-8 + MOVQ x+0(FP), REG_P1 + + // Zero AX for later use: + XORQ AX, AX + + // Load p into registers: + MOVQ P751_0, R8 + // P751_{1,2,3,4} = P751_0, so reuse R8 + MOVQ P751_5, R9 + MOVQ P751_6, R10 + MOVQ P751_7, R11 + MOVQ P751_8, R12 + MOVQ P751_9, R13 + MOVQ P751_10, R14 + MOVQ P751_11, R15 + + // Set x <- x - p + SUBQ R8, (REG_P1) + SBBQ R8, (8)(REG_P1) + SBBQ R8, (16)(REG_P1) + SBBQ R8, (24)(REG_P1) + SBBQ R8, (32)(REG_P1) + SBBQ R9, (40)(REG_P1) + SBBQ R10, (48)(REG_P1) + SBBQ R11, (56)(REG_P1) + SBBQ R12, (64)(REG_P1) + SBBQ R13, (72)(REG_P1) + SBBQ R14, (80)(REG_P1) + SBBQ R15, (88)(REG_P1) + + // Save carry flag indicating x-p < 0 as a mask in AX + SBBQ $0, AX + + // Conditionally add p to x if x-p < 0 + ANDQ AX, R8 + ANDQ AX, R9 + ANDQ AX, R10 + ANDQ AX, R11 + ANDQ AX, R12 + ANDQ AX, R13 + ANDQ AX, R14 + ANDQ AX, R15 + + ADDQ R8, (REG_P1) + ADCQ R8, (8)(REG_P1) + ADCQ R8, (16)(REG_P1) + ADCQ R8, (24)(REG_P1) + ADCQ R8, (32)(REG_P1) + ADCQ R9, (40)(REG_P1) + ADCQ R10, (48)(REG_P1) + ADCQ R11, (56)(REG_P1) + ADCQ R12, (64)(REG_P1) + ADCQ R13, (72)(REG_P1) + ADCQ R14, (80)(REG_P1) + ADCQ R15, (88)(REG_P1) + + RET + +TEXT ·fp751ConditionalSwap(SB), NOSPLIT, $0-17 + + MOVQ x+0(FP), REG_P1 + MOVQ y+8(FP), REG_P2 + MOVB choice+16(FP), AL // AL = 0 or 1 + MOVBLZX AL, AX // AX = 0 or 1 + NEGQ AX // RAX = 0x00..00 or 0xff..ff + + MOVQ (0*8)(REG_P1), BX // BX = x[0] + MOVQ (0*8)(REG_P2), CX // CX = y[0] + MOVQ CX, DX // DX = y[0] + XORQ BX, DX // DX = y[0] ^ x[0] + ANDQ AX, DX // DX = (y[0] ^ x[0]) & mask + XORQ DX, BX // BX = (y[0] ^ x[0]) & mask) ^ x[0] = x[0] or y[0] + XORQ DX, CX // CX = (y[0] ^ x[0]) & mask) ^ y[0] = y[0] or x[0] + MOVQ BX, (0*8)(REG_P1) + MOVQ CX, (0*8)(REG_P2) + + MOVQ (1*8)(REG_P1), BX + MOVQ (1*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (1*8)(REG_P1) + MOVQ CX, (1*8)(REG_P2) + + MOVQ (2*8)(REG_P1), BX + MOVQ (2*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (2*8)(REG_P1) + MOVQ CX, (2*8)(REG_P2) + + MOVQ (3*8)(REG_P1), BX + MOVQ (3*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (3*8)(REG_P1) + MOVQ CX, (3*8)(REG_P2) + + MOVQ (4*8)(REG_P1), BX + MOVQ (4*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (4*8)(REG_P1) + MOVQ CX, (4*8)(REG_P2) + + MOVQ (5*8)(REG_P1), BX + MOVQ (5*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (5*8)(REG_P1) + MOVQ CX, (5*8)(REG_P2) + + MOVQ (6*8)(REG_P1), BX + MOVQ (6*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (6*8)(REG_P1) + MOVQ CX, (6*8)(REG_P2) + + MOVQ (7*8)(REG_P1), BX + MOVQ (7*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (7*8)(REG_P1) + MOVQ CX, (7*8)(REG_P2) + + MOVQ (8*8)(REG_P1), BX + MOVQ (8*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (8*8)(REG_P1) + MOVQ CX, (8*8)(REG_P2) + + MOVQ (9*8)(REG_P1), BX + MOVQ (9*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (9*8)(REG_P1) + MOVQ CX, (9*8)(REG_P2) + + MOVQ (10*8)(REG_P1), BX + MOVQ (10*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (10*8)(REG_P1) + MOVQ CX, (10*8)(REG_P2) + + MOVQ (11*8)(REG_P1), BX + MOVQ (11*8)(REG_P2), CX + MOVQ CX, DX + XORQ BX, DX + ANDQ AX, DX + XORQ DX, BX + XORQ DX, CX + MOVQ BX, (11*8)(REG_P1) + MOVQ CX, (11*8)(REG_P2) + + RET + +TEXT ·fp751AddReduced(SB), NOSPLIT, $0-24 + + MOVQ z+0(FP), REG_P3 + MOVQ x+8(FP), REG_P1 + MOVQ y+16(FP), REG_P2 + + MOVQ (REG_P1), R8 + MOVQ (8)(REG_P1), R9 + MOVQ (16)(REG_P1), R10 + MOVQ (24)(REG_P1), R11 + MOVQ (32)(REG_P1), R12 + MOVQ (40)(REG_P1), R13 + MOVQ (48)(REG_P1), R14 + MOVQ (56)(REG_P1), R15 + MOVQ (64)(REG_P1), CX + ADDQ (REG_P2), R8 + ADCQ (8)(REG_P2), R9 + ADCQ (16)(REG_P2), R10 + ADCQ (24)(REG_P2), R11 + ADCQ (32)(REG_P2), R12 + ADCQ (40)(REG_P2), R13 + ADCQ (48)(REG_P2), R14 + ADCQ (56)(REG_P2), R15 + ADCQ (64)(REG_P2), CX + MOVQ (72)(REG_P1), AX + ADCQ (72)(REG_P2), AX + MOVQ AX, (72)(REG_P3) + MOVQ (80)(REG_P1), AX + ADCQ (80)(REG_P2), AX + MOVQ AX, (80)(REG_P3) + MOVQ (88)(REG_P1), AX + ADCQ (88)(REG_P2), AX + MOVQ AX, (88)(REG_P3) + + MOVQ P751X2_0, AX + SUBQ AX, R8 + MOVQ P751X2_1, AX + SBBQ AX, R9 + SBBQ AX, R10 + SBBQ AX, R11 + SBBQ AX, R12 + MOVQ P751X2_5, AX + SBBQ AX, R13 + MOVQ P751X2_6, AX + SBBQ AX, R14 + MOVQ P751X2_7, AX + SBBQ AX, R15 + MOVQ P751X2_8, AX + SBBQ AX, CX + MOVQ R8, (REG_P3) + MOVQ R9, (8)(REG_P3) + MOVQ R10, (16)(REG_P3) + MOVQ R11, (24)(REG_P3) + MOVQ R12, (32)(REG_P3) + MOVQ R13, (40)(REG_P3) + MOVQ R14, (48)(REG_P3) + MOVQ R15, (56)(REG_P3) + MOVQ CX, (64)(REG_P3) + MOVQ (72)(REG_P3), R8 + MOVQ (80)(REG_P3), R9 + MOVQ (88)(REG_P3), R10 + MOVQ P751X2_9, AX + SBBQ AX, R8 + MOVQ P751X2_10, AX + SBBQ AX, R9 + MOVQ P751X2_11, AX + SBBQ AX, R10 + MOVQ R8, (72)(REG_P3) + MOVQ R9, (80)(REG_P3) + MOVQ R10, (88)(REG_P3) + ZERO_AX_WITHOUT_CLOBBERING_FLAGS + SBBQ $0, AX + + MOVQ P751X2_0, SI + ANDQ AX, SI + MOVQ P751X2_1, R8 + ANDQ AX, R8 + MOVQ P751X2_5, R9 + ANDQ AX, R9 + MOVQ P751X2_6, R10 + ANDQ AX, R10 + MOVQ P751X2_7, R11 + ANDQ AX, R11 + MOVQ P751X2_8, R12 + ANDQ AX, R12 + MOVQ P751X2_9, R13 + ANDQ AX, R13 + MOVQ P751X2_10, R14 + ANDQ AX, R14 + MOVQ P751X2_11, R15 + ANDQ AX, R15 + + MOVQ (REG_P3), AX + ADDQ SI, AX + MOVQ AX, (REG_P3) + MOVQ (8)(REG_P3), AX + ADCQ R8, AX + MOVQ AX, (8)(REG_P3) + MOVQ (16)(REG_P3), AX + ADCQ R8, AX + MOVQ AX, (16)(REG_P3) + MOVQ (24)(REG_P3), AX + ADCQ R8, AX + MOVQ AX, (24)(REG_P3) + MOVQ (32)(REG_P3), AX + ADCQ R8, AX + MOVQ AX, (32)(REG_P3) + MOVQ (40)(REG_P3), AX + ADCQ R9, AX + MOVQ AX, (40)(REG_P3) + MOVQ (48)(REG_P3), AX + ADCQ R10, AX + MOVQ AX, (48)(REG_P3) + MOVQ (56)(REG_P3), AX + ADCQ R11, AX + MOVQ AX, (56)(REG_P3) + MOVQ (64)(REG_P3), AX + ADCQ R12, AX + MOVQ AX, (64)(REG_P3) + MOVQ (72)(REG_P3), AX + ADCQ R13, AX + MOVQ AX, (72)(REG_P3) + MOVQ (80)(REG_P3), AX + ADCQ R14, AX + MOVQ AX, (80)(REG_P3) + MOVQ (88)(REG_P3), AX + ADCQ R15, AX + MOVQ AX, (88)(REG_P3) + + RET + +TEXT ·fp751SubReduced(SB), NOSPLIT, $0-24 + + MOVQ z+0(FP), REG_P3 + MOVQ x+8(FP), REG_P1 + MOVQ y+16(FP), REG_P2 + + MOVQ (REG_P1), R8 + MOVQ (8)(REG_P1), R9 + MOVQ (16)(REG_P1), R10 + MOVQ (24)(REG_P1), R11 + MOVQ (32)(REG_P1), R12 + MOVQ (40)(REG_P1), R13 + MOVQ (48)(REG_P1), R14 + MOVQ (56)(REG_P1), R15 + MOVQ (64)(REG_P1), CX + SUBQ (REG_P2), R8 + SBBQ (8)(REG_P2), R9 + SBBQ (16)(REG_P2), R10 + SBBQ (24)(REG_P2), R11 + SBBQ (32)(REG_P2), R12 + SBBQ (40)(REG_P2), R13 + SBBQ (48)(REG_P2), R14 + SBBQ (56)(REG_P2), R15 + SBBQ (64)(REG_P2), CX + MOVQ R8, (REG_P3) + MOVQ R9, (8)(REG_P3) + MOVQ R10, (16)(REG_P3) + MOVQ R11, (24)(REG_P3) + MOVQ R12, (32)(REG_P3) + MOVQ R13, (40)(REG_P3) + MOVQ R14, (48)(REG_P3) + MOVQ R15, (56)(REG_P3) + MOVQ CX, (64)(REG_P3) + MOVQ (72)(REG_P1), AX + SBBQ (72)(REG_P2), AX + MOVQ AX, (72)(REG_P3) + MOVQ (80)(REG_P1), AX + SBBQ (80)(REG_P2), AX + MOVQ AX, (80)(REG_P3) + MOVQ (88)(REG_P1), AX + SBBQ (88)(REG_P2), AX + MOVQ AX, (88)(REG_P3) + ZERO_AX_WITHOUT_CLOBBERING_FLAGS + SBBQ $0, AX + + MOVQ P751X2_0, SI + ANDQ AX, SI + MOVQ P751X2_1, R8 + ANDQ AX, R8 + MOVQ P751X2_5, R9 + ANDQ AX, R9 + MOVQ P751X2_6, R10 + ANDQ AX, R10 + MOVQ P751X2_7, R11 + ANDQ AX, R11 + MOVQ P751X2_8, R12 + ANDQ AX, R12 + MOVQ P751X2_9, R13 + ANDQ AX, R13 + MOVQ P751X2_10, R14 + ANDQ AX, R14 + MOVQ P751X2_11, R15 + ANDQ AX, R15 + + MOVQ (REG_P3), AX + ADDQ SI, AX + MOVQ AX, (REG_P3) + MOVQ (8)(REG_P3), AX + ADCQ R8, AX + MOVQ AX, (8)(REG_P3) + MOVQ (16)(REG_P3), AX + ADCQ R8, AX + MOVQ AX, (16)(REG_P3) + MOVQ (24)(REG_P3), AX + ADCQ R8, AX + MOVQ AX, (24)(REG_P3) + MOVQ (32)(REG_P3), AX + ADCQ R8, AX + MOVQ AX, (32)(REG_P3) + MOVQ (40)(REG_P3), AX + ADCQ R9, AX + MOVQ AX, (40)(REG_P3) + MOVQ (48)(REG_P3), AX + ADCQ R10, AX + MOVQ AX, (48)(REG_P3) + MOVQ (56)(REG_P3), AX + ADCQ R11, AX + MOVQ AX, (56)(REG_P3) + MOVQ (64)(REG_P3), AX + ADCQ R12, AX + MOVQ AX, (64)(REG_P3) + MOVQ (72)(REG_P3), AX + ADCQ R13, AX + MOVQ AX, (72)(REG_P3) + MOVQ (80)(REG_P3), AX + ADCQ R14, AX + MOVQ AX, (80)(REG_P3) + MOVQ (88)(REG_P3), AX + ADCQ R15, AX + MOVQ AX, (88)(REG_P3) + + RET + +TEXT ·fp751Mul(SB), $96-24 + + // Here we store the destination in CX instead of in REG_P3 because the + // multiplication instructions use DX as an implicit destination + // operand: MULQ $REG sets DX:AX <-- AX * $REG. + + MOVQ z+0(FP), CX + MOVQ x+8(FP), REG_P1 + MOVQ y+16(FP), REG_P2 + + XORQ AX, AX + MOVQ (48)(REG_P1), R8 + MOVQ (56)(REG_P1), R9 + MOVQ (64)(REG_P1), R10 + MOVQ (72)(REG_P1), R11 + MOVQ (80)(REG_P1), R12 + MOVQ (88)(REG_P1), R13 + ADDQ (REG_P1), R8 + ADCQ (8)(REG_P1), R9 + ADCQ (16)(REG_P1), R10 + ADCQ (24)(REG_P1), R11 + ADCQ (32)(REG_P1), R12 + ADCQ (40)(REG_P1), R13 + MOVQ R8, (CX) + MOVQ R9, (8)(CX) + MOVQ R10, (16)(CX) + MOVQ R11, (24)(CX) + MOVQ R12, (32)(CX) + MOVQ R13, (40)(CX) + SBBQ $0, AX + + XORQ DX, DX + MOVQ (48)(REG_P2), R8 + MOVQ (56)(REG_P2), R9 + MOVQ (64)(REG_P2), R10 + MOVQ (72)(REG_P2), R11 + MOVQ (80)(REG_P2), R12 + MOVQ (88)(REG_P2), R13 + ADDQ (REG_P2), R8 + ADCQ (8)(REG_P2), R9 + ADCQ (16)(REG_P2), R10 + ADCQ (24)(REG_P2), R11 + ADCQ (32)(REG_P2), R12 + ADCQ (40)(REG_P2), R13 + MOVQ R8, (48)(CX) + MOVQ R9, (56)(CX) + MOVQ R10, (64)(CX) + MOVQ R11, (72)(CX) + MOVQ R12, (80)(CX) + MOVQ R13, (88)(CX) + SBBQ $0, DX + MOVQ AX, (80)(SP) + MOVQ DX, (88)(SP) + + // (SP[0-8],R10,R8,R9) <- (AH+AL)*(BH+BL) + + MOVQ (CX), R11 + MOVQ R8, AX + MULQ R11 + MOVQ AX, (SP) // c0 + MOVQ DX, R14 + + XORQ R15, R15 + MOVQ R9, AX + MULQ R11 + XORQ R9, R9 + ADDQ AX, R14 + ADCQ DX, R9 + + MOVQ (8)(CX), R12 + MOVQ R8, AX + MULQ R12 + ADDQ AX, R14 + MOVQ R14, (8)(SP) // c1 + ADCQ DX, R9 + ADCQ $0, R15 + + XORQ R8, R8 + MOVQ R10, AX + MULQ R11 + ADDQ AX, R9 + MOVQ (48)(CX), R13 + ADCQ DX, R15 + ADCQ $0, R8 + + MOVQ (16)(CX), AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R15 + MOVQ (56)(CX), AX + ADCQ $0, R8 + + MULQ R12 + ADDQ AX, R9 + MOVQ R9, (16)(SP) // c2 + ADCQ DX, R15 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ (72)(CX), AX + MULQ R11 + ADDQ AX, R15 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (24)(CX), AX + MULQ R13 + ADDQ AX, R15 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ R10, AX + MULQ R12 + ADDQ AX, R15 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (16)(CX), R14 + MOVQ (56)(CX), AX + MULQ R14 + ADDQ AX, R15 + MOVQ R15, (24)(SP) // c3 + ADCQ DX, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ (80)(CX), AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (64)(CX), AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (48)(CX), R15 + MOVQ (32)(CX), AX + MULQ R15 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (72)(CX), AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (24)(CX), R13 + MOVQ (56)(CX), AX + MULQ R13 + ADDQ AX, R8 + MOVQ R8, (32)(SP) // c4 + ADCQ DX, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ (88)(CX), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (64)(CX), AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (72)(CX), AX + MULQ R14 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (40)(CX), AX + MULQ R15 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (80)(CX), AX + MULQ R12 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (32)(CX), R15 + MOVQ (56)(CX), AX + MULQ R15 + ADDQ AX, R9 + MOVQ R9, (40)(SP) // c5 + ADCQ DX, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ (64)(CX), AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (88)(CX), AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (80)(CX), AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (40)(CX), R11 + MOVQ (56)(CX), AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (72)(CX), AX + MULQ R13 + ADDQ AX, R10 + MOVQ R10, (48)(SP) // c6 + ADCQ DX, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ (88)(CX), AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (64)(CX), AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (80)(CX), AX + MULQ R13 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (72)(CX), AX + MULQ R15 + ADDQ AX, R8 + MOVQ R8, (56)(SP) // c7 + ADCQ DX, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ (72)(CX), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (80)(CX), AX + MULQ R15 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (88)(CX), AX + MULQ R13 + ADDQ AX, R9 + MOVQ R9, (64)(SP) // c8 + ADCQ DX, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ (88)(CX), AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (80)(CX), AX + MULQ R11 + ADDQ AX, R10 // c9 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (88)(CX), AX + MULQ R11 + ADDQ AX, R8 // c10 + ADCQ DX, R9 // c11 + + MOVQ (88)(SP), AX + MOVQ (CX), DX + ANDQ AX, R12 + ANDQ AX, R14 + ANDQ AX, DX + ANDQ AX, R13 + ANDQ AX, R15 + ANDQ AX, R11 + MOVQ (48)(SP), AX + ADDQ AX, DX + MOVQ (56)(SP), AX + ADCQ AX, R12 + MOVQ (64)(SP), AX + ADCQ AX, R14 + ADCQ R10, R13 + ADCQ R8, R15 + ADCQ R9, R11 + MOVQ (80)(SP), AX + MOVQ DX, (48)(SP) + MOVQ R12, (56)(SP) + MOVQ R14, (64)(SP) + MOVQ R13, (72)(SP) + MOVQ R15, (80)(SP) + MOVQ R11, (88)(SP) + + MOVQ (48)(CX), R8 + MOVQ (56)(CX), R9 + MOVQ (64)(CX), R10 + MOVQ (72)(CX), R11 + MOVQ (80)(CX), R12 + MOVQ (88)(CX), R13 + ANDQ AX, R8 + ANDQ AX, R9 + ANDQ AX, R10 + ANDQ AX, R11 + ANDQ AX, R12 + ANDQ AX, R13 + MOVQ (48)(SP), AX + ADDQ AX, R8 + MOVQ (56)(SP), AX + ADCQ AX, R9 + MOVQ (64)(SP), AX + ADCQ AX, R10 + MOVQ (72)(SP), AX + ADCQ AX, R11 + MOVQ (80)(SP), AX + ADCQ AX, R12 + MOVQ (88)(SP), AX + ADCQ AX, R13 + MOVQ R8, (48)(SP) + MOVQ R9, (56)(SP) + MOVQ R11, (72)(SP) + + // CX[0-11] <- AL*BL + MOVQ (REG_P1), R11 + MOVQ (REG_P2), AX + MULQ R11 + XORQ R9, R9 + MOVQ AX, (CX) // c0 + MOVQ R10, (64)(SP) + MOVQ DX, R8 + + MOVQ (8)(REG_P2), AX + MULQ R11 + XORQ R10, R10 + ADDQ AX, R8 + MOVQ R12, (80)(SP) + ADCQ DX, R9 + + MOVQ (8)(REG_P1), R12 + MOVQ (REG_P2), AX + MULQ R12 + ADDQ AX, R8 + MOVQ R8, (8)(CX) // c1 + ADCQ DX, R9 + MOVQ R13, (88)(SP) + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ (16)(REG_P2), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (REG_P2), R13 + MOVQ (16)(REG_P1), AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (8)(REG_P2), AX + MULQ R12 + ADDQ AX, R9 + MOVQ R9, (16)(CX) // c2 + ADCQ DX, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ (24)(REG_P2), AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (24)(REG_P1), AX + MULQ R13 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (16)(REG_P2), AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (16)(REG_P1), R14 + MOVQ (8)(REG_P2), AX + MULQ R14 + ADDQ AX, R10 + MOVQ R10, (24)(CX) // c3 + ADCQ DX, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ (32)(REG_P2), AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (16)(REG_P2), AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (32)(REG_P1), AX + MULQ R13 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (24)(REG_P2), AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (24)(REG_P1), R13 + MOVQ (8)(REG_P2), AX + MULQ R13 + ADDQ AX, R8 + MOVQ R8, (32)(CX) // c4 + ADCQ DX, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ (40)(REG_P2), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (16)(REG_P2), AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (24)(REG_P2), AX + MULQ R14 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (40)(REG_P1), R11 + MOVQ (REG_P2), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (32)(REG_P2), AX + MULQ R12 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (32)(REG_P1), R15 + MOVQ (8)(REG_P2), AX + MULQ R15 + ADDQ AX, R9 + MOVQ R9, (40)(CX) //c5 + ADCQ DX, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ (16)(REG_P2), AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (40)(REG_P2), AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (32)(REG_P2), AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (8)(REG_P2), AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (24)(REG_P2), AX + MULQ R13 + ADDQ AX, R10 + MOVQ R10, (48)(CX) // c6 + ADCQ DX, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ (40)(REG_P2), AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (16)(REG_P2), AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (32)(REG_P2), AX + MULQ R13 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (24)(REG_P2), AX + MULQ R15 + ADDQ AX, R8 + MOVQ R8, (56)(CX) // c7 + ADCQ DX, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ (24)(REG_P2), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (32)(REG_P2), AX + MULQ R15 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (40)(REG_P2), AX + MULQ R13 + ADDQ AX, R9 + MOVQ R9, (64)(CX) // c8 + ADCQ DX, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ (40)(REG_P2), AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (32)(REG_P2), AX + MULQ R11 + ADDQ AX, R10 + MOVQ R10, (72)(CX) // c9 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (40)(REG_P2), AX + MULQ R11 + ADDQ AX, R8 + MOVQ R8, (80)(CX) // c10 + ADCQ DX, R9 + MOVQ R9, (88)(CX) // c11 + + // CX[12-23] <- AH*BH + MOVQ (48)(REG_P1), R11 + MOVQ (48)(REG_P2), AX + MULQ R11 + XORQ R9, R9 + MOVQ AX, (96)(CX) // c0 + MOVQ DX, R8 + + MOVQ (56)(REG_P2), AX + MULQ R11 + XORQ R10, R10 + ADDQ AX, R8 + ADCQ DX, R9 + + MOVQ (56)(REG_P1), R12 + MOVQ (48)(REG_P2), AX + MULQ R12 + ADDQ AX, R8 + MOVQ R8, (104)(CX) // c1 + ADCQ DX, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ (64)(REG_P2), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (48)(REG_P2), R13 + MOVQ (64)(REG_P1), AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (56)(REG_P2), AX + MULQ R12 + ADDQ AX, R9 + MOVQ R9, (112)(CX) // c2 + ADCQ DX, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ (72)(REG_P2), AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (72)(REG_P1), AX + MULQ R13 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (64)(REG_P2), AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (64)(REG_P1), R14 + MOVQ (56)(REG_P2), AX + MULQ R14 + ADDQ AX, R10 + MOVQ R10, (120)(CX) // c3 + ADCQ DX, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ (80)(REG_P2), AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (64)(REG_P2), AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (80)(REG_P1), R15 + MOVQ R13, AX + MULQ R15 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (72)(REG_P2), AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (72)(REG_P1), R13 + MOVQ (56)(REG_P2), AX + MULQ R13 + ADDQ AX, R8 + MOVQ R8, (128)(CX) // c4 + ADCQ DX, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ (88)(REG_P2), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (64)(REG_P2), AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (72)(REG_P2), AX + MULQ R14 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (88)(REG_P1), R11 + MOVQ (48)(REG_P2), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (80)(REG_P2), AX + MULQ R12 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (56)(REG_P2), AX + MULQ R15 + ADDQ AX, R9 + MOVQ R9, (136)(CX) // c5 + ADCQ DX, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ (64)(REG_P2), AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (88)(REG_P2), AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (80)(REG_P2), AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (56)(REG_P2), AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (72)(REG_P2), AX + MULQ R13 + ADDQ AX, R10 + MOVQ R10, (144)(CX) // c6 + ADCQ DX, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ (88)(REG_P2), AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (64)(REG_P2), AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (80)(REG_P2), AX + MULQ R13 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (72)(REG_P2), AX + MULQ R15 + ADDQ AX, R8 + MOVQ R8, (152)(CX) // c7 + ADCQ DX, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ (72)(REG_P2), AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (80)(REG_P2), AX + MULQ R15 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (88)(REG_P2), AX + MULQ R13 + ADDQ AX, R9 + MOVQ R9, (160)(CX) // c8 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (88)(REG_P2), AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + + MOVQ (80)(REG_P2), AX + MULQ R11 + ADDQ AX, R10 + MOVQ R10, (168)(CX) // c9 + ADCQ DX, R8 + + MOVQ (88)(REG_P2), AX + MULQ R11 + ADDQ AX, R8 + MOVQ R8, (176)(CX) // c10 + ADCQ $0, DX + MOVQ DX, (184)(CX) // c11 + + // [R8-R15,AX,DX,DI,(SP)] <- (AH+AL)*(BH+BL)-AL*BL + MOVQ (SP), R8 + SUBQ (CX), R8 + MOVQ (8)(SP), R9 + SBBQ (8)(CX), R9 + MOVQ (16)(SP), R10 + SBBQ (16)(CX), R10 + MOVQ (24)(SP), R11 + SBBQ (24)(CX), R11 + MOVQ (32)(SP), R12 + SBBQ (32)(CX), R12 + MOVQ (40)(SP), R13 + SBBQ (40)(CX), R13 + MOVQ (48)(SP), R14 + SBBQ (48)(CX), R14 + MOVQ (56)(SP), R15 + SBBQ (56)(CX), R15 + MOVQ (64)(SP), AX + SBBQ (64)(CX), AX + MOVQ (72)(SP), DX + SBBQ (72)(CX), DX + MOVQ (80)(SP), DI + SBBQ (80)(CX), DI + MOVQ (88)(SP), SI + SBBQ (88)(CX), SI + MOVQ SI, (SP) + + // [R8-R15,AX,DX,DI,(SP)] <- (AH+AL)*(BH+BL) - AL*BL - AH*BH + MOVQ (96)(CX), SI + SUBQ SI, R8 + MOVQ (104)(CX), SI + SBBQ SI, R9 + MOVQ (112)(CX), SI + SBBQ SI, R10 + MOVQ (120)(CX), SI + SBBQ SI, R11 + MOVQ (128)(CX), SI + SBBQ SI, R12 + MOVQ (136)(CX), SI + SBBQ SI, R13 + MOVQ (144)(CX), SI + SBBQ SI, R14 + MOVQ (152)(CX), SI + SBBQ SI, R15 + MOVQ (160)(CX), SI + SBBQ SI, AX + MOVQ (168)(CX), SI + SBBQ SI, DX + MOVQ (176)(CX), SI + SBBQ SI, DI + MOVQ (SP), SI + SBBQ (184)(CX), SI + + // FINAL RESULT + ADDQ (48)(CX), R8 + MOVQ R8, (48)(CX) + ADCQ (56)(CX), R9 + MOVQ R9, (56)(CX) + ADCQ (64)(CX), R10 + MOVQ R10, (64)(CX) + ADCQ (72)(CX), R11 + MOVQ R11, (72)(CX) + ADCQ (80)(CX), R12 + MOVQ R12, (80)(CX) + ADCQ (88)(CX), R13 + MOVQ R13, (88)(CX) + ADCQ (96)(CX), R14 + MOVQ R14, (96)(CX) + ADCQ (104)(CX), R15 + MOVQ R15, (104)(CX) + ADCQ (112)(CX), AX + MOVQ AX, (112)(CX) + ADCQ (120)(CX), DX + MOVQ DX, (120)(CX) + ADCQ (128)(CX), DI + MOVQ DI, (128)(CX) + ADCQ (136)(CX), SI + MOVQ SI, (136)(CX) + MOVQ (144)(CX), AX + ADCQ $0, AX + MOVQ AX, (144)(CX) + MOVQ (152)(CX), AX + ADCQ $0, AX + MOVQ AX, (152)(CX) + MOVQ (160)(CX), AX + ADCQ $0, AX + MOVQ AX, (160)(CX) + MOVQ (168)(CX), AX + ADCQ $0, AX + MOVQ AX, (168)(CX) + MOVQ (176)(CX), AX + ADCQ $0, AX + MOVQ AX, (176)(CX) + MOVQ (184)(CX), AX + ADCQ $0, AX + MOVQ AX, (184)(CX) + + RET + +TEXT ·fp751MontgomeryReduce(SB), $0-16 + + MOVQ z+0(FP), REG_P2 + MOVQ x+8(FP), REG_P1 + + MOVQ (REG_P1), R11 + MOVQ P751P1_5, AX + MULQ R11 + XORQ R8, R8 + ADDQ (40)(REG_P1), AX + MOVQ AX, (40)(REG_P2) // Z5 + ADCQ DX, R8 + + XORQ R9, R9 + MOVQ P751P1_6, AX + MULQ R11 + XORQ R10, R10 + ADDQ AX, R8 + ADCQ DX, R9 + + MOVQ (8)(REG_P1), R12 + MOVQ P751P1_5, AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + ADDQ (48)(REG_P1), R8 + MOVQ R8, (48)(REG_P2) // Z6 + ADCQ $0, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ P751P1_7, AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_6, AX + MULQ R12 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (16)(REG_P1), R13 + MOVQ P751P1_5, AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + ADDQ (56)(REG_P1), R9 + MOVQ R9, (56)(REG_P2) // Z7 + ADCQ $0, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ P751P1_8, AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_7, AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_6, AX + MULQ R13 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (24)(REG_P1), R14 + MOVQ P751P1_5, AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + ADDQ (64)(REG_P1), R10 + MOVQ R10, (64)(REG_P2) // Z8 + ADCQ $0, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ P751P1_9, AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_8, AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_7, AX + MULQ R13 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_6, AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (32)(REG_P1), R15 + MOVQ P751P1_5, AX + MULQ R15 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + ADDQ (72)(REG_P1), R8 + MOVQ R8, (72)(REG_P2) // Z9 + ADCQ $0, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ P751P1_10, AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_9, AX + MULQ R12 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_8, AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_7, AX + MULQ R14 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_6, AX + MULQ R15 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (40)(REG_P2), CX + MOVQ P751P1_5, AX + MULQ CX + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + ADDQ (80)(REG_P1), R9 + MOVQ R9, (80)(REG_P2) // Z10 + ADCQ $0, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ P751P1_11, AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_10, AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_9, AX + MULQ R13 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_8, AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_7, AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_6, AX + MULQ CX + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (48)(REG_P2), R11 + MOVQ P751P1_5, AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + ADDQ (88)(REG_P1), R10 + MOVQ R10, (88)(REG_P2) // Z11 + ADCQ $0, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ P751P1_11, AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_10, AX + MULQ R13 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_9, AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_8, AX + MULQ R15 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_7, AX + MULQ CX + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_6, AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (56)(REG_P2), R12 + MOVQ P751P1_5, AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + ADDQ (96)(REG_P1), R8 + MOVQ R8, (REG_P2) // Z0 + ADCQ $0, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ P751P1_11, AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_10, AX + MULQ R14 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_9, AX + MULQ R15 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_8, AX + MULQ CX + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_7, AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_6, AX + MULQ R12 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (64)(REG_P2), R13 + MOVQ P751P1_5, AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + ADDQ (104)(REG_P1), R9 + MOVQ R9, (8)(REG_P2) // Z1 + ADCQ $0, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ P751P1_11, AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_10, AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_9, AX + MULQ CX + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_8, AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_7, AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_6, AX + MULQ R13 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ (72)(REG_P2), R14 + MOVQ P751P1_5, AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + ADDQ (112)(REG_P1), R10 + MOVQ R10, (16)(REG_P2) // Z2 + ADCQ $0, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ P751P1_11, AX + MULQ R15 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_10, AX + MULQ CX + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_9, AX + MULQ R11 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_8, AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_7, AX + MULQ R13 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_6, AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ (80)(REG_P2), R15 + MOVQ P751P1_5, AX + MULQ R15 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + ADDQ (120)(REG_P1), R8 + MOVQ R8, (24)(REG_P2) // Z3 + ADCQ $0, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ P751P1_11, AX + MULQ CX + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_10, AX + MULQ R11 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_9, AX + MULQ R12 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_8, AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_7, AX + MULQ R14 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_6, AX + MULQ R15 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ (88)(REG_P2), CX + MOVQ P751P1_5, AX + MULQ CX + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + ADDQ (128)(REG_P1), R9 + MOVQ R9, (32)(REG_P2) // Z4 + ADCQ $0, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ P751P1_11, AX + MULQ R11 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_10, AX + MULQ R12 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_9, AX + MULQ R13 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_8, AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_7, AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_6, AX + MULQ CX + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + ADDQ (136)(REG_P1), R10 + MOVQ R10, (40)(REG_P2) // Z5 + ADCQ $0, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ P751P1_11, AX + MULQ R12 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_10, AX + MULQ R13 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_9, AX + MULQ R14 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_8, AX + MULQ R15 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_7, AX + MULQ CX + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + ADDQ (144)(REG_P1), R8 + MOVQ R8, (48)(REG_P2) // Z6 + ADCQ $0, R9 + ADCQ $0, R10 + + XORQ R8, R8 + MOVQ P751P1_11, AX + MULQ R13 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_10, AX + MULQ R14 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_9, AX + MULQ R15 + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + + MOVQ P751P1_8, AX + MULQ CX + ADDQ AX, R9 + ADCQ DX, R10 + ADCQ $0, R8 + ADDQ (152)(REG_P1), R9 + MOVQ R9, (56)(REG_P2) // Z7 + ADCQ $0, R10 + ADCQ $0, R8 + + XORQ R9, R9 + MOVQ P751P1_11, AX + MULQ R14 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_10, AX + MULQ R15 + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + + MOVQ P751P1_9, AX + MULQ CX + ADDQ AX, R10 + ADCQ DX, R8 + ADCQ $0, R9 + ADDQ (160)(REG_P1), R10 + MOVQ R10, (64)(REG_P2) // Z8 + ADCQ $0, R8 + ADCQ $0, R9 + + XORQ R10, R10 + MOVQ P751P1_11, AX + MULQ R15 + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + + MOVQ P751P1_10, AX + MULQ CX + ADDQ AX, R8 + ADCQ DX, R9 + ADCQ $0, R10 + ADDQ (168)(REG_P1), R8 // Z9 + MOVQ R8, (72)(REG_P2) // Z9 + ADCQ $0, R9 + ADCQ $0, R10 + + MOVQ P751P1_11, AX + MULQ CX + ADDQ AX, R9 + ADCQ DX, R10 + ADDQ (176)(REG_P1), R9 // Z10 + MOVQ R9, (80)(REG_P2) // Z10 + ADCQ $0, R10 + ADDQ (184)(REG_P1), R10 // Z11 + MOVQ R10, (88)(REG_P2) // Z11 + + RET + +TEXT ·fp751AddLazy(SB), NOSPLIT, $0-24 + + MOVQ z+0(FP), REG_P3 + MOVQ x+8(FP), REG_P1 + MOVQ y+16(FP), REG_P2 + + MOVQ (REG_P1), R8 + MOVQ (8)(REG_P1), R9 + MOVQ (16)(REG_P1), R10 + MOVQ (24)(REG_P1), R11 + MOVQ (32)(REG_P1), R12 + MOVQ (40)(REG_P1), R13 + MOVQ (48)(REG_P1), R14 + MOVQ (56)(REG_P1), R15 + MOVQ (64)(REG_P1), AX + MOVQ (72)(REG_P1), BX + MOVQ (80)(REG_P1), CX + MOVQ (88)(REG_P1), DI + + ADDQ (REG_P2), R8 + ADCQ (8)(REG_P2), R9 + ADCQ (16)(REG_P2), R10 + ADCQ (24)(REG_P2), R11 + ADCQ (32)(REG_P2), R12 + ADCQ (40)(REG_P2), R13 + ADCQ (48)(REG_P2), R14 + ADCQ (56)(REG_P2), R15 + ADCQ (64)(REG_P2), AX + ADCQ (72)(REG_P2), BX + ADCQ (80)(REG_P2), CX + ADCQ (88)(REG_P2), DI + + MOVQ R8, (REG_P3) + MOVQ R9, (8)(REG_P3) + MOVQ R10, (16)(REG_P3) + MOVQ R11, (24)(REG_P3) + MOVQ R12, (32)(REG_P3) + MOVQ R13, (40)(REG_P3) + MOVQ R14, (48)(REG_P3) + MOVQ R15, (56)(REG_P3) + MOVQ AX, (64)(REG_P3) + MOVQ BX, (72)(REG_P3) + MOVQ CX, (80)(REG_P3) + MOVQ DI, (88)(REG_P3) + + RET + +TEXT ·fp751X2AddLazy(SB), NOSPLIT, $0-24 + + MOVQ z+0(FP), REG_P3 + MOVQ x+8(FP), REG_P1 + MOVQ y+16(FP), REG_P2 + + MOVQ (REG_P1), R8 + MOVQ (8)(REG_P1), R9 + MOVQ (16)(REG_P1), R10 + MOVQ (24)(REG_P1), R11 + MOVQ (32)(REG_P1), R12 + MOVQ (40)(REG_P1), R13 + MOVQ (48)(REG_P1), R14 + MOVQ (56)(REG_P1), R15 + MOVQ (64)(REG_P1), AX + MOVQ (72)(REG_P1), BX + MOVQ (80)(REG_P1), CX + + ADDQ (REG_P2), R8 + ADCQ (8)(REG_P2), R9 + ADCQ (16)(REG_P2), R10 + ADCQ (24)(REG_P2), R11 + ADCQ (32)(REG_P2), R12 + ADCQ (40)(REG_P2), R13 + ADCQ (48)(REG_P2), R14 + ADCQ (56)(REG_P2), R15 + ADCQ (64)(REG_P2), AX + ADCQ (72)(REG_P2), BX + ADCQ (80)(REG_P2), CX + + MOVQ R8, (REG_P3) + MOVQ R9, (8)(REG_P3) + MOVQ R10, (16)(REG_P3) + MOVQ R11, (24)(REG_P3) + MOVQ R12, (32)(REG_P3) + MOVQ R13, (40)(REG_P3) + MOVQ R14, (48)(REG_P3) + MOVQ R15, (56)(REG_P3) + MOVQ AX, (64)(REG_P3) + MOVQ BX, (72)(REG_P3) + MOVQ CX, (80)(REG_P3) + MOVQ (88)(REG_P1), AX + ADCQ (88)(REG_P2), AX + MOVQ AX, (88)(REG_P3) + + MOVQ (96)(REG_P1), R8 + MOVQ (104)(REG_P1), R9 + MOVQ (112)(REG_P1), R10 + MOVQ (120)(REG_P1), R11 + MOVQ (128)(REG_P1), R12 + MOVQ (136)(REG_P1), R13 + MOVQ (144)(REG_P1), R14 + MOVQ (152)(REG_P1), R15 + MOVQ (160)(REG_P1), AX + MOVQ (168)(REG_P1), BX + MOVQ (176)(REG_P1), CX + MOVQ (184)(REG_P1), DI + + ADCQ (96)(REG_P2), R8 + ADCQ (104)(REG_P2), R9 + ADCQ (112)(REG_P2), R10 + ADCQ (120)(REG_P2), R11 + ADCQ (128)(REG_P2), R12 + ADCQ (136)(REG_P2), R13 + ADCQ (144)(REG_P2), R14 + ADCQ (152)(REG_P2), R15 + ADCQ (160)(REG_P2), AX + ADCQ (168)(REG_P2), BX + ADCQ (176)(REG_P2), CX + ADCQ (184)(REG_P2), DI + + MOVQ R8, (96)(REG_P3) + MOVQ R9, (104)(REG_P3) + MOVQ R10, (112)(REG_P3) + MOVQ R11, (120)(REG_P3) + MOVQ R12, (128)(REG_P3) + MOVQ R13, (136)(REG_P3) + MOVQ R14, (144)(REG_P3) + MOVQ R15, (152)(REG_P3) + MOVQ AX, (160)(REG_P3) + MOVQ BX, (168)(REG_P3) + MOVQ CX, (176)(REG_P3) + MOVQ DI, (184)(REG_P3) + + RET + + +TEXT ·fp751X2SubLazy(SB), NOSPLIT, $0-24 + + MOVQ z+0(FP), REG_P3 + MOVQ x+8(FP), REG_P1 + MOVQ y+16(FP), REG_P2 + + MOVQ (REG_P1), R8 + MOVQ (8)(REG_P1), R9 + MOVQ (16)(REG_P1), R10 + MOVQ (24)(REG_P1), R11 + MOVQ (32)(REG_P1), R12 + MOVQ (40)(REG_P1), R13 + MOVQ (48)(REG_P1), R14 + MOVQ (56)(REG_P1), R15 + MOVQ (64)(REG_P1), AX + MOVQ (72)(REG_P1), BX + MOVQ (80)(REG_P1), CX + + SUBQ (REG_P2), R8 + SBBQ (8)(REG_P2), R9 + SBBQ (16)(REG_P2), R10 + SBBQ (24)(REG_P2), R11 + SBBQ (32)(REG_P2), R12 + SBBQ (40)(REG_P2), R13 + SBBQ (48)(REG_P2), R14 + SBBQ (56)(REG_P2), R15 + SBBQ (64)(REG_P2), AX + SBBQ (72)(REG_P2), BX + SBBQ (80)(REG_P2), CX + + MOVQ R8, (REG_P3) + MOVQ R9, (8)(REG_P3) + MOVQ R10, (16)(REG_P3) + MOVQ R11, (24)(REG_P3) + MOVQ R12, (32)(REG_P3) + MOVQ R13, (40)(REG_P3) + MOVQ R14, (48)(REG_P3) + MOVQ R15, (56)(REG_P3) + MOVQ AX, (64)(REG_P3) + MOVQ BX, (72)(REG_P3) + MOVQ CX, (80)(REG_P3) + MOVQ (88)(REG_P1), AX + SBBQ (88)(REG_P2), AX + MOVQ AX, (88)(REG_P3) + + MOVQ (96)(REG_P1), R8 + MOVQ (104)(REG_P1), R9 + MOVQ (112)(REG_P1), R10 + MOVQ (120)(REG_P1), R11 + MOVQ (128)(REG_P1), R12 + MOVQ (136)(REG_P1), R13 + MOVQ (144)(REG_P1), R14 + MOVQ (152)(REG_P1), R15 + MOVQ (160)(REG_P1), AX + MOVQ (168)(REG_P1), BX + MOVQ (176)(REG_P1), CX + MOVQ (184)(REG_P1), DI + + SBBQ (96)(REG_P2), R8 + SBBQ (104)(REG_P2), R9 + SBBQ (112)(REG_P2), R10 + SBBQ (120)(REG_P2), R11 + SBBQ (128)(REG_P2), R12 + SBBQ (136)(REG_P2), R13 + SBBQ (144)(REG_P2), R14 + SBBQ (152)(REG_P2), R15 + SBBQ (160)(REG_P2), AX + SBBQ (168)(REG_P2), BX + SBBQ (176)(REG_P2), CX + SBBQ (184)(REG_P2), DI + + MOVQ R8, (96)(REG_P3) + MOVQ R9, (104)(REG_P3) + MOVQ R10, (112)(REG_P3) + MOVQ R11, (120)(REG_P3) + MOVQ R12, (128)(REG_P3) + MOVQ R13, (136)(REG_P3) + MOVQ R14, (144)(REG_P3) + MOVQ R15, (152)(REG_P3) + MOVQ AX, (160)(REG_P3) + MOVQ BX, (168)(REG_P3) + MOVQ CX, (176)(REG_P3) + MOVQ DI, (184)(REG_P3) + + // Now the carry flag is 1 if x-y < 0. If so, add p*2^768. + ZERO_AX_WITHOUT_CLOBBERING_FLAGS + SBBQ $0, AX + + // Load p into registers: + MOVQ P751_0, R8 + // P751_{1,2,3,4} = P751_0, so reuse R8 + MOVQ P751_5, R9 + MOVQ P751_6, R10 + MOVQ P751_7, R11 + MOVQ P751_8, R12 + MOVQ P751_9, R13 + MOVQ P751_10, R14 + MOVQ P751_11, R15 + + ANDQ AX, R8 + ANDQ AX, R9 + ANDQ AX, R10 + ANDQ AX, R11 + ANDQ AX, R12 + ANDQ AX, R13 + ANDQ AX, R14 + ANDQ AX, R15 + + ADDQ R8, (96 )(REG_P3) + ADCQ R8, (96+ 8)(REG_P3) + ADCQ R8, (96+16)(REG_P3) + ADCQ R8, (96+24)(REG_P3) + ADCQ R8, (96+32)(REG_P3) + ADCQ R9, (96+40)(REG_P3) + ADCQ R10, (96+48)(REG_P3) + ADCQ R11, (96+56)(REG_P3) + ADCQ R12, (96+64)(REG_P3) + ADCQ R13, (96+72)(REG_P3) + ADCQ R14, (96+80)(REG_P3) + ADCQ R15, (96+88)(REG_P3) + + RET + diff --git a/p503toolbox/field_decl.go b/p503toolbox/field_decl.go new file mode 100644 index 0000000..796f14b --- /dev/null +++ b/p503toolbox/field_decl.go @@ -0,0 +1,42 @@ +// +build amd64,!noasm + +package p503toolbox + +// If choice = 0, leave x,y unchanged. If choice = 1, set x,y = y,x. +// If choice is neither 0 nor 1 then behaviour is undefined. +// This function executes in constant time. +//go:noescape +func fp751ConditionalSwap(x, y *Fp751Element, choice uint8) + +// Compute z = x + y (mod p). +//go:noescape +func fp751AddReduced(z, x, y *Fp751Element) + +// Compute z = x - y (mod p). +//go:noescape +func fp751SubReduced(z, x, y *Fp751Element) + +// Compute z = x + y, without reducing mod p. +//go:noescape +func fp751AddLazy(z, x, y *Fp751Element) + +// Compute z = x + y, without reducing mod p. +//go:noescape +func fp751X2AddLazy(z, x, y *fp751X2) + +// Compute z = x - y, without reducing mod p. +//go:noescape +func fp751X2SubLazy(z, x, y *fp751X2) + +// Compute z = x * y. +//go:noescape +func fp751Mul(z *fp751X2, x, y *Fp751Element) + +// Perform Montgomery reduction: set z = x R^{-1} (mod 2*p). +// Destroys the input value. +//go:noescape +func fp751MontgomeryReduce(z *Fp751Element, x *fp751X2) + +// Reduce a field element in [0, 2*p) to one in [0,p). +//go:noescape +func fp751StrongReduce(x *Fp751Element) diff --git a/p503toolbox/field_generic.go b/p503toolbox/field_generic.go new file mode 100644 index 0000000..37f1528 --- /dev/null +++ b/p503toolbox/field_generic.go @@ -0,0 +1,254 @@ +// +build noasm arm64 arm + +package p503toolbox + +// helper used for uint128 representation +type uint128 struct { + H, L uint64 +} + +// Adds 2 64bit digits in constant time. +// Returns result and carry (1 or 0) +func addc64(cin, a, b uint64) (ret, cout uint64) { + t := a + cin + ret = b + t + cout = ((a & b) | ((a | b) & (^ret))) >> 63 + return +} + +// Substracts 2 64bit digits in constant time. +// Returns result and borrow (1 or 0) +func subc64(bIn, a, b uint64) (ret, bOut uint64) { + var tmp1 = a - b + // Set bOut if bIn!=0 and tmp1==0 in constant time + bOut = bIn & (1 ^ ((tmp1 | uint64(0-tmp1)) >> 63)) + // Constant time check if x> 63 + ret = tmp1 - bIn + return +} + +// Multiplies 2 64bit digits in constant time +func mul64(a, b uint64) (res uint128) { + var al, bl, ah, bh, albl, albh, ahbl, ahbh uint64 + var res1, res2, res3 uint64 + var carry, maskL, maskH, temp uint64 + + maskL = (^maskL) >> 32 + maskH = ^maskL + + al = a & maskL + ah = a >> 32 + bl = b & maskL + bh = b >> 32 + + albl = al * bl + albh = al * bh + ahbl = ah * bl + ahbh = ah * bh + res.L = albl & maskL + + res1 = albl >> 32 + res2 = ahbl & maskL + res3 = albh & maskL + temp = res1 + res2 + res3 + carry = temp >> 32 + res.L ^= temp << 32 + + res1 = ahbl >> 32 + res2 = albh >> 32 + res3 = ahbh & maskL + temp = res1 + res2 + res3 + carry + res.H = temp & maskL + carry = temp & maskH + res.H ^= (ahbh & maskH) + carry + return +} + +// Compute z = x + y (mod p). +func fp751AddReduced(z, x, y *Fp751Element) { + var carry uint64 + + // z=x+y % p751 + for i := 0; i < fp751NumWords; i++ { + z[i], carry = addc64(carry, x[i], y[i]) + } + + // z = z - p751x2 + carry = 0 + for i := 0; i < fp751NumWords; i++ { + z[i], carry = subc64(carry, z[i], p751x2[i]) + } + + // z = z + p751x2 + mask := uint64(0 - carry) + carry = 0 + for i := 0; i < fp751NumWords; i++ { + z[i], carry = addc64(carry, z[i], p751x2[i]&mask) + } +} + +// Compute z = x - y (mod p). +func fp751SubReduced(z, x, y *Fp751Element) { + var borrow uint64 + + for i := 0; i < fp751NumWords; i++ { + z[i], borrow = subc64(borrow, x[i], y[i]) + } + + mask := uint64(0 - borrow) + borrow = 0 + + for i := 0; i < fp751NumWords; i++ { + z[i], borrow = addc64(borrow, z[i], p751x2[i]&mask) + } +} + +// Conditionally swaps bits in x and y in constant time. +// mask indicates bits to be swaped (set bits are swapped) +// For details see "Hackers Delight, 2.20" +// +// Implementation doesn't actually depend on a prime field. +func fp751ConditionalSwap(x, y *Fp751Element, mask uint8) { + var tmp, mask64 uint64 + + mask64 = 0 - uint64(mask) + for i := 0; i < len(x); i++ { + tmp = mask64 & (x[i] ^ y[i]) + x[i] = tmp ^ x[i] + y[i] = tmp ^ y[i] + } +} + +// Perform Montgomery reduction: set z = x R^{-1} (mod 2*p) +// with R=2^768. Destroys the input value. +func fp751MontgomeryReduce(z *Fp751Element, x *fp751X2) { + var carry, t, u, v uint64 + var uv uint128 + var count int + + count = 5 // number of 0 digits in the least significat part of p751 + 1 + + for i := 0; i < fp751NumWords; i++ { + for j := 0; j < i; j++ { + if j < (i - count + 1) { + uv = mul64(z[j], p751p1[i-j]) + v, carry = addc64(0, uv.L, v) + u, carry = addc64(carry, uv.H, u) + t += carry + } + } + v, carry = addc64(0, v, x[i]) + u, carry = addc64(carry, u, 0) + t += carry + + z[i] = v + v = u + u = t + t = 0 + } + + for i := fp751NumWords; i < 2*fp751NumWords-1; i++ { + if count > 0 { + count-- + } + for j := i - fp751NumWords + 1; j < fp751NumWords; j++ { + if j < (fp751NumWords - count) { + uv = mul64(z[j], p751p1[i-j]) + v, carry = addc64(0, uv.L, v) + u, carry = addc64(carry, uv.H, u) + t += carry + } + } + v, carry = addc64(0, v, x[i]) + u, carry = addc64(carry, u, 0) + + t += carry + z[i-fp751NumWords] = v + v = u + u = t + t = 0 + } + v, carry = addc64(0, v, x[2*fp751NumWords-1]) + z[fp751NumWords-1] = v +} + +// Compute z = x * y. +func fp751Mul(z *fp751X2, x, y *Fp751Element) { + var u, v, t uint64 + var carry uint64 + var uv uint128 + + for i := uint64(0); i < fp751NumWords; i++ { + for j := uint64(0); j <= i; j++ { + uv = mul64(x[j], y[i-j]) + v, carry = addc64(0, uv.L, v) + u, carry = addc64(carry, uv.H, u) + t += carry + } + z[i] = v + v = u + u = t + t = 0 + } + + for i := fp751NumWords; i < (2*fp751NumWords)-1; i++ { + for j := i - fp751NumWords + 1; j < fp751NumWords; j++ { + uv = mul64(x[j], y[i-j]) + v, carry = addc64(0, uv.L, v) + u, carry = addc64(carry, uv.H, u) + t += carry + } + z[i] = v + v = u + u = t + t = 0 + } + z[2*fp751NumWords-1] = v +} + +// Compute z = x + y, without reducing mod p. +func fp751AddLazy(z, x, y *Fp751Element) { + var carry uint64 + for i := 0; i < fp751NumWords; i++ { + z[i], carry = addc64(carry, x[i], y[i]) + } +} + +// Compute z = x + y, without reducing mod p. +func fp751X2AddLazy(z, x, y *fp751X2) { + var carry uint64 + for i := 0; i < 2*fp751NumWords; i++ { + z[i], carry = addc64(carry, x[i], y[i]) + } +} + +// Reduce a field element in [0, 2*p) to one in [0,p). +func fp751StrongReduce(x *Fp751Element) { + var borrow, mask uint64 + for i := 0; i < fp751NumWords; i++ { + x[i], borrow = subc64(borrow, x[i], p751[i]) + } + + // Sets all bits if borrow = 1 + mask = 0 - borrow + borrow = 0 + for i := 0; i < fp751NumWords; i++ { + x[i], borrow = addc64(borrow, x[i], p751[i]&mask) + } +} + +// Compute z = x - y, without reducing mod p. +func fp751X2SubLazy(z, x, y *fp751X2) { + var borrow, mask uint64 + for i := 0; i < len(z); i++ { + z[i], borrow = subc64(borrow, x[i], y[i]) + } + + // Sets all bits if borrow = 1 + mask = 0 - borrow + borrow = 0 + for i := fp751NumWords; i < len(z); i++ { + z[i], borrow = addc64(borrow, z[i], p751[i-fp751NumWords]&mask) + } +} diff --git a/p503toolbox/field_test.go b/p503toolbox/field_test.go new file mode 100644 index 0000000..2f510fb --- /dev/null +++ b/p503toolbox/field_test.go @@ -0,0 +1,419 @@ +package p503toolbox + +import ( + "math/big" + "math/rand" + "reflect" + "testing" + "testing/quick" +) + +var quickCheckScaleFactor = uint8(3) +var quickCheckConfig = &quick.Config{MaxCount: (1 << (12 + quickCheckScaleFactor))} + +var cln16prime, _ = new(big.Int).SetString("10354717741769305252977768237866805321427389645549071170116189679054678940682478846502882896561066713624553211618840202385203911976522554393044160468771151816976706840078913334358399730952774926980235086850991501872665651576831", 10) + +// Convert an Fp751Element to a big.Int for testing. Because this is only +// for testing, no big.Int to Fp751Element conversion is provided. + +func radix64ToBigInt(x []uint64) *big.Int { + radix := new(big.Int) + // 2^64 + radix.UnmarshalText(([]byte)("18446744073709551616")) + + base := new(big.Int).SetUint64(1) + val := new(big.Int).SetUint64(0) + tmp := new(big.Int) + + for _, xi := range x { + tmp.SetUint64(xi) + tmp.Mul(tmp, base) + val.Add(val, tmp) + base.Mul(base, radix) + } + + return val +} + +func VartimeEq(x,y *PrimeFieldElement) bool { + return x.A.vartimeEq(y.A) +} + +func (x *Fp751Element) toBigInt() *big.Int { + // Convert from Montgomery form + return x.toBigIntFromMontgomeryForm() +} + +func (x *Fp751Element) toBigIntFromMontgomeryForm() *big.Int { + // Convert from Montgomery form + a := Fp751Element{} + aR := fp751X2{} + copy(aR[:], x[:]) // = a*R + fp751MontgomeryReduce(&a, &aR) // = a mod p in [0,2p) + fp751StrongReduce(&a) // = a mod p in [0,p) + return radix64ToBigInt(a[:]) +} + +func TestPrimeFieldElementToBigInt(t *testing.T) { + // Chosen so that p < xR < 2p + x := PrimeFieldElement{A: Fp751Element{ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 140737488355328, + }} + // Computed using Sage: + // sage: p = 2^372 * 3^239 - 1 + // sage: R = 2^768 + // sage: from_radix_64 = lambda xs: sum((xi * (2**64)**i for i,xi in enumerate(xs))) + // sage: xR = from_radix_64([1]*11 + [2^47]) + // sage: assert(p < xR) + // sage: assert(xR < 2*p) + // sage: (xR / R) % p + xBig, _ := new(big.Int).SetString("4469946751055876387821312289373600189787971305258234719850789711074696941114031433609871105823930699680637820852699269802003300352597419024286385747737509380032982821081644521634652750355306547718505685107272222083450567982240", 10) + if xBig.Cmp(x.A.toBigInt()) != 0 { + t.Error("Expected", xBig, "found", x.A.toBigInt()) + } +} + +func generateFp751(rand *rand.Rand) Fp751Element { + // Generation strategy: low limbs taken from [0,2^64); high limb + // taken from smaller range + // + // Size hint is ignored since all elements are fixed size. + // + // Field elements taken in range [0,2p). Emulate this by capping + // the high limb by the top digit of 2*p-1: + // + // sage: (2*p-1).digits(2^64)[-1] + // 246065832128056 + // + // This still allows generating values >= 2p, but hopefully that + // excess is OK (and if it's not, we'll find out, because it's for + // testing...) + // + highLimb := rand.Uint64() % 246065832128056 + + return Fp751Element{ + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + rand.Uint64(), + highLimb, + } +} + +func (x PrimeFieldElement) Generate(rand *rand.Rand, size int) reflect.Value { + return reflect.ValueOf(PrimeFieldElement{A: generateFp751(rand)}) +} + +func (x ExtensionFieldElement) Generate(rand *rand.Rand, size int) reflect.Value { + return reflect.ValueOf(ExtensionFieldElement{A: generateFp751(rand), B: generateFp751(rand)}) +} + +//------------------------------------------------------------------------------ +// Extension Field +//------------------------------------------------------------------------------ + +func TestOneExtensionFieldToBytes(t *testing.T) { + var x ExtensionFieldElement + var xBytes [188]byte + + x.One() + x.ToBytes(xBytes[:]) + if xBytes[0] != 1 { + t.Error("Expected 1, got", xBytes[0]) + } + for i := 1; i < 188; i++ { + if xBytes[i] != 0 { + t.Error("Expected 0, got", xBytes[0]) + } + } +} + +func TestExtensionFieldElementToBytesRoundTrip(t *testing.T) { + roundTrips := func(x ExtensionFieldElement) bool { + var xBytes [188]byte + var xPrime ExtensionFieldElement + x.ToBytes(xBytes[:]) + xPrime.FromBytes(xBytes[:]) + + return x.VartimeEq(&xPrime) + } + + if err := quick.Check(roundTrips, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestExtensionFieldElementMulDistributesOverAdd(t *testing.T) { + mulDistributesOverAdd := func(x, y, z ExtensionFieldElement) bool { + // Compute t1 = (x+y)*z + t1 := new(ExtensionFieldElement) + t1.Add(&x, &y) + t1.Mul(t1, &z) + + // Compute t2 = x*z + y*z + t2 := new(ExtensionFieldElement) + t3 := new(ExtensionFieldElement) + t2.Mul(&x, &z) + t3.Mul(&y, &z) + t2.Add(t2, t3) + + return t1.VartimeEq(t2) + } + + if err := quick.Check(mulDistributesOverAdd, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestExtensionFieldElementMulIsAssociative(t *testing.T) { + isAssociative := func(x, y, z ExtensionFieldElement) bool { + // Compute t1 = (x*y)*z + t1 := new(ExtensionFieldElement) + t1.Mul(&x, &y) + t1.Mul(t1, &z) + + // Compute t2 = (y*z)*x + t2 := new(ExtensionFieldElement) + t2.Mul(&y, &z) + t2.Mul(t2, &x) + + return t1.VartimeEq(t2) + } + + if err := quick.Check(isAssociative, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestExtensionFieldElementSquareMatchesMul(t *testing.T) { + sqrMatchesMul := func(x ExtensionFieldElement) bool { + // Compute t1 = (x*x) + t1 := new(ExtensionFieldElement) + t1.Mul(&x, &x) + + // Compute t2 = x^2 + t2 := new(ExtensionFieldElement) + t2.Square(&x) + + return t1.VartimeEq(t2) + } + + if err := quick.Check(sqrMatchesMul, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestExtensionFieldElementInv(t *testing.T) { + inverseIsCorrect := func(x ExtensionFieldElement) bool { + z := new(ExtensionFieldElement) + z.Inv(&x) + + // Now z = (1/x), so (z * x) * x == x + z.Mul(z, &x) + z.Mul(z, &x) + + return z.VartimeEq(&x) + } + + // This is more expensive; run fewer tests + var quickCheckConfig = &quick.Config{MaxCount: (1 << (8 + quickCheckScaleFactor))} + if err := quick.Check(inverseIsCorrect, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestExtensionFieldElementBatch3Inv(t *testing.T) { + batchInverseIsCorrect := func(x1, x2, x3 ExtensionFieldElement) bool { + var x1Inv, x2Inv, x3Inv ExtensionFieldElement + x1Inv.Inv(&x1) + x2Inv.Inv(&x2) + x3Inv.Inv(&x3) + + var y1, y2, y3 ExtensionFieldElement + ExtensionFieldBatch3Inv(&x1, &x2, &x3, &y1, &y2, &y3) + + return (y1.VartimeEq(&x1Inv) && y2.VartimeEq(&x2Inv) && y3.VartimeEq(&x3Inv)) + } + + // This is more expensive; run fewer tests + var quickCheckConfig = &quick.Config{MaxCount: (1 << (5 + quickCheckScaleFactor))} + if err := quick.Check(batchInverseIsCorrect, quickCheckConfig); err != nil { + t.Error(err) + } +} + +//------------------------------------------------------------------------------ +// Prime Field +//------------------------------------------------------------------------------ +func TestPrimeFieldElementMulVersusBigInt(t *testing.T) { + mulMatchesBigInt := func(x, y PrimeFieldElement) bool { + z := new(PrimeFieldElement) + z.Mul(&x, &y) + + check := new(big.Int) + check.Mul(x.A.toBigInt(), y.A.toBigInt()) + check.Mod(check, cln16prime) + + return check.Cmp(z.A.toBigInt()) == 0 + } + + if err := quick.Check(mulMatchesBigInt, quickCheckConfig); err != nil { + t.Error(err) + } +} + +func TestPrimeFieldElementP34VersusBigInt(t *testing.T) { + var p34, _ = new(big.Int).SetString("2588679435442326313244442059466701330356847411387267792529047419763669735170619711625720724140266678406138302904710050596300977994130638598261040117192787954244176710019728333589599932738193731745058771712747875468166412894207", 10) + p34MatchesBigInt := func(x PrimeFieldElement) bool { + z := new(PrimeFieldElement) + z.P34(&x) + + check := x.A.toBigInt() + check.Exp(check, p34, cln16prime) + + return check.Cmp(z.A.toBigInt()) == 0 + } + + // This is more expensive; run fewer tests + var quickCheckConfig = &quick.Config{MaxCount: (1 << (8 + quickCheckScaleFactor))} + if err := quick.Check(p34MatchesBigInt, quickCheckConfig); err != nil { + t.Error(err) + } +} + +// Package-level storage for this field element is intended to deter +// compiler optimizations. +var benchmarkFp751Element Fp751Element +var benchmarkFp751X2 fp751X2 +var bench_x = Fp751Element{17026702066521327207, 5108203422050077993, 10225396685796065916, 11153620995215874678, 6531160855165088358, 15302925148404145445, 1248821577836769963, 9789766903037985294, 7493111552032041328, 10838999828319306046, 18103257655515297935, 27403304611634} +var bench_y = Fp751Element{4227467157325093378, 10699492810770426363, 13500940151395637365, 12966403950118934952, 16517692605450415877, 13647111148905630666, 14223628886152717087, 7167843152346903316, 15855377759596736571, 4300673881383687338, 6635288001920617779, 30486099554235} +var bench_z = fp751X2{1595347748594595712, 10854920567160033970, 16877102267020034574, 12435724995376660096, 3757940912203224231, 8251999420280413600, 3648859773438820227, 17622716832674727914, 11029567000887241528, 11216190007549447055, 17606662790980286987, 4720707159513626555, 12887743598335030915, 14954645239176589309, 14178817688915225254, 1191346797768989683, 12629157932334713723, 6348851952904485603, 16444232588597434895, 7809979927681678066, 14642637672942531613, 3092657597757640067, 10160361564485285723, 240071237} + +func BenchmarkExtensionFieldElementMul(b *testing.B) { + z := &ExtensionFieldElement{A: bench_x, B: bench_y} + w := new(ExtensionFieldElement) + + for n := 0; n < b.N; n++ { + w.Mul(z, z) + } +} + +func BenchmarkExtensionFieldElementInv(b *testing.B) { + z := &ExtensionFieldElement{A: bench_x, B: bench_y} + w := new(ExtensionFieldElement) + + for n := 0; n < b.N; n++ { + w.Inv(z) + } +} + +func BenchmarkExtensionFieldElementSquare(b *testing.B) { + z := &ExtensionFieldElement{A: bench_x, B: bench_y} + w := new(ExtensionFieldElement) + + for n := 0; n < b.N; n++ { + w.Square(z) + } +} + +func BenchmarkExtensionFieldElementAdd(b *testing.B) { + z := &ExtensionFieldElement{A: bench_x, B: bench_y} + w := new(ExtensionFieldElement) + + for n := 0; n < b.N; n++ { + w.Add(z, z) + } +} + +func BenchmarkExtensionFieldElementSub(b *testing.B) { + z := &ExtensionFieldElement{A: bench_x, B: bench_y} + w := new(ExtensionFieldElement) + + for n := 0; n < b.N; n++ { + w.Sub(z, z) + } +} + +func BenchmarkPrimeFieldElementMul(b *testing.B) { + z := &PrimeFieldElement{A: bench_x} + w := new(PrimeFieldElement) + + for n := 0; n < b.N; n++ { + w.Mul(z, z) + } +} + +// --- field operation functions + +func BenchmarkFp751Multiply(b *testing.B) { + for n := 0; n < b.N; n++ { + fp751Mul(&benchmarkFp751X2, &bench_x, &bench_y) + } +} + +func BenchmarkFp751MontgomeryReduce(b *testing.B) { + z := bench_z + + // This benchmark actually computes garbage, because + // fp751MontgomeryReduce mangles its input, but since it's + // constant-time that shouldn't matter for the benchmarks. + for n := 0; n < b.N; n++ { + fp751MontgomeryReduce(&benchmarkFp751Element, &z) + } +} + +func BenchmarkFp751AddReduced(b *testing.B) { + for n := 0; n < b.N; n++ { + fp751AddReduced(&benchmarkFp751Element, &bench_x, &bench_y) + } +} + +func BenchmarkFp751SubReduced(b *testing.B) { + for n := 0; n < b.N; n++ { + fp751SubReduced(&benchmarkFp751Element, &bench_x, &bench_y) + } +} + +func BenchmarkFp751ConditionalSwap(b *testing.B) { + x, y := bench_x, bench_y + for n := 0; n < b.N; n++ { + fp751ConditionalSwap(&x, &y, 1) + fp751ConditionalSwap(&x, &y, 0) + } +} + +func BenchmarkFp751StrongReduce(b *testing.B) { + x := bench_x + for n := 0; n < b.N; n++ { + fp751StrongReduce(&x) + } +} + +func BenchmarkFp751AddLazy(b *testing.B) { + var z Fp751Element + x, y := bench_x, bench_y + for n := 0; n < b.N; n++ { + fp751AddLazy(&z, &x, &y) + } +} + +func BenchmarkFp751X2AddLazy(b *testing.B) { + x, y, z := bench_z, bench_z, bench_z + for n := 0; n < b.N; n++ { + fp751X2AddLazy(&x, &y, &z) + } +} + +func BenchmarkFp751X2SubLazy(b *testing.B) { + x, y, z := bench_z, bench_z, bench_z + for n := 0; n < b.N; n++ { + fp751X2SubLazy(&x, &y, &z) + } +} diff --git a/p503toolbox/isogeny.go b/p503toolbox/isogeny.go new file mode 100644 index 0000000..462405a --- /dev/null +++ b/p503toolbox/isogeny.go @@ -0,0 +1,144 @@ +package p503toolbox + +// Interface for working with isogenies. +type Isogeny interface { + // Given a torsion point on a curve computes isogenous curve. + // Returns curve coefficients (A:C), so that E_(A/C) = E_(A/C)/

, + // where P is a provided projective point. Sets also isogeny constants + // that are needed for isogeny evaluation. + GenerateCurve(*ProjectivePoint) CurveCoefficientsEquiv + // Evaluates isogeny at caller provided point. Requires isogeny curve constants + // to be earlier computed by GenerateCurve. + EvaluatePoint(*ProjectivePoint) ProjectivePoint +} + +// Stores Isogeny 4 curve constants +type isogeny4 struct { + isogeny3 + K3 ExtensionFieldElement +} + +// Stores Isogeny 3 curve constants +type isogeny3 struct { + K1 ExtensionFieldElement + K2 ExtensionFieldElement +} + +// Constructs isogeny4 objects +func NewIsogeny4() Isogeny { + return new(isogeny4) +} + +// Constructs isogeny3 objects +func NewIsogeny3() Isogeny { + return new(isogeny3) +} + +// Given a three-torsion point p = x(PB) on the curve E_(A:C), construct the +// three-isogeny phi : E_(A:C) -> E_(A:C)/ = E_(A':C'). +// +// Input: (XP_3: ZP_3), where P_3 has exact order 3 on E_A/C +// Output: * Curve coordinates (A' + 2C', A' - 2C') corresponding to E_A'/C' = A_E/C/ +// * Isogeny phi with constants in F_p^2 +func (phi *isogeny3) GenerateCurve(p *ProjectivePoint) CurveCoefficientsEquiv { + var t0, t1, t2, t3, t4 ExtensionFieldElement + var coefEq CurveCoefficientsEquiv + var K1, K2 = &phi.K1, &phi.K2 + + K1.Sub(&p.X, &p.Z) // K1 = XP3 - ZP3 + t0.Square(K1) // t0 = K1^2 + K2.Add(&p.X, &p.Z) // K2 = XP3 + ZP3 + t1.Square(K2) // t1 = K2^2 + t2.Add(&t0, &t1) // t2 = t0 + t1 + t3.Add(K1, K2) // t3 = K1 + K2 + t3.Square(&t3) // t3 = t3^2 + t3.Sub(&t3, &t2) // t3 = t3 - t2 + t2.Add(&t1, &t3) // t2 = t1 + t3 + t3.Add(&t3, &t0) // t3 = t3 + t0 + t4.Add(&t3, &t0) // t4 = t3 + t0 + t4.Add(&t4, &t4) // t4 = t4 + t4 + t4.Add(&t1, &t4) // t4 = t1 + t4 + coefEq.C.Mul(&t2, &t4) // A24m = t2 * t4 + t4.Add(&t1, &t2) // t4 = t1 + t2 + t4.Add(&t4, &t4) // t4 = t4 + t4 + t4.Add(&t0, &t4) // t4 = t0 + t4 + t4.Mul(&t3, &t4) // t4 = t3 * t4 + t0.Sub(&t4, &coefEq.C) // t0 = t4 - A24m + coefEq.A.Add(&coefEq.C, &t0) // A24p = A24m + t0 + return coefEq +} + +// Given a 3-isogeny phi and a point pB = x(PB), compute x(QB), the x-coordinate +// of the image QB = phi(PB) of PB under phi : E_(A:C) -> E_(A':C'). +// +// The output xQ = x(Q) is then a point on the curve E_(A':C'); the curve +// parameters are returned by the GenerateCurve function used to construct phi. +func (phi *isogeny3) EvaluatePoint(p *ProjectivePoint) ProjectivePoint { + var t0, t1, t2 ExtensionFieldElement + var q ProjectivePoint + var K1, K2 = &phi.K1, &phi.K2 + var px, pz = &p.X, &p.Z + + t0.Add(px, pz) // t0 = XQ + ZQ + t1.Sub(px, pz) // t1 = XQ - ZQ + t0.Mul(K1, &t0) // t2 = K1 * t0 + t1.Mul(K2, &t1) // t1 = K2 * t1 + t2.Add(&t0, &t1) // t2 = t0 + t1 + t0.Sub(&t1, &t0) // t0 = t1 - t0 + t2.Square(&t2) // t2 = t2 ^ 2 + t0.Square(&t0) // t0 = t0 ^ 2 + q.X.Mul(px, &t2) // XQ'= XQ * t2 + q.Z.Mul(pz, &t0) // ZQ'= ZQ * t0 + return q +} + +// Given a four-torsion point p = x(PB) on the curve E_(A:C), construct the +// four-isogeny phi : E_(A:C) -> E_(A:C)/ = E_(A':C'). +// +// Input: (XP_4: ZP_4), where P_4 has exact order 4 on E_A/C +// Output: * Curve coordinates (A' + 2C', 4C') corresponding to E_A'/C' = A_E/C/ +// * Isogeny phi with constants in F_p^2 +func (phi *isogeny4) GenerateCurve(p *ProjectivePoint) CurveCoefficientsEquiv { + var coefEq CurveCoefficientsEquiv + var xp4, zp4 = &p.X, &p.Z + var K1, K2, K3 = &phi.K1, &phi.K2, &phi.K3 + + K2.Sub(xp4, zp4) + K3.Add(xp4, zp4) + K1.Square(zp4) + K1.Add(K1, K1) + coefEq.C.Square(K1) + K1.Add(K1, K1) + coefEq.A.Square(xp4) + coefEq.A.Add(&coefEq.A, &coefEq.A) + coefEq.A.Square(&coefEq.A) + return coefEq +} + +// Given a 4-isogeny phi and a point xP = x(P), compute x(Q), the x-coordinate +// of the image Q = phi(P) of P under phi : E_(A:C) -> E_(A':C'). +// +// Input: Isogeny returned by GenerateCurve and point q=(Qx,Qz) from E0_A/C +// Output: Corresponding point q from E1_A'/C', where E1 is 4-isogenous to E0 +func (phi *isogeny4) EvaluatePoint(p *ProjectivePoint) ProjectivePoint { + var t0, t1 ExtensionFieldElement + var q = *p + var xq, zq = &q.X, &q.Z + var K1, K2, K3 = &phi.K1, &phi.K2, &phi.K3 + + t0.Add(xq, zq) + t1.Sub(xq, zq) + xq.Mul(&t0, K2) + zq.Mul(&t1, K3) + t0.Mul(&t0, &t1) + t0.Mul(&t0, K1) + t1.Add(xq, zq) + zq.Sub(xq, zq) + t1.Square(&t1) + zq.Square(zq) + xq.Add(&t0, &t1) + t0.Sub(zq, &t0) + xq.Mul(xq, &t1) + zq.Mul(zq, &t0) + return q +} diff --git a/p503toolbox/isogeny_test.go b/p503toolbox/isogeny_test.go new file mode 100644 index 0000000..bc6daa2 --- /dev/null +++ b/p503toolbox/isogeny_test.go @@ -0,0 +1,114 @@ +package p503toolbox + +import ( + "testing" +) + +func TestFourIsogenyVersusSage(t *testing.T) { + var xR, xP4, resPhiXr, expPhiXr ProjectivePoint + var phi = NewIsogeny4() + + // sage: p = 2^372 * 3^239 - 1; Fp = GF(p) + // sage: R. = Fp[] + // sage: Fp2 = Fp.extension(x^2 + 1, 'i') + // sage: i = Fp2.gen() + // sage: E0Fp = EllipticCurve(Fp, [0,0,0,1,0]) + // sage: E0Fp2 = EllipticCurve(Fp2, [0,0,0,1,0]) + // sage: x_PA = 11 + // sage: y_PA = -Fp(11^3 + 11).sqrt() + // sage: x_PB = 6 + // sage: y_PB = -Fp(6^3 + 6).sqrt() + // sage: P_A = 3^239 * E0Fp((x_PA,y_PA)) + // sage: P_B = 2^372 * E0Fp((x_PB,y_PB)) + // sage: def tau(P): + // ....: return E0Fp2( (-P.xy()[0], i*P.xy()[1])) + // ....: + // sage: m_B = 3*randint(0,3^238) + // sage: m_A = 2*randint(0,2^371) + // sage: R_A = E0Fp2(P_A) + m_A*tau(P_A) + // sage: def y_recover(x, a): + // ....: return (x**3 + a*x**2 + x).sqrt() + // ....: + // sage: first_4_torsion_point = E0Fp2(1, y_recover(Fp2(1),0)) + // sage: sage_first_4_isogeny = E0Fp2.isogeny(first_4_torsion_point) + // sage: a = Fp2(0) + // sage: E1A = EllipticCurve(Fp2, [0,(2*(a+6))/(a-2),0,1,0]) + // sage: sage_isomorphism = sage_first_4_isogeny.codomain().isomorphism_to(E1A) + // sage: isogenized_R_A = sage_isomorphism(sage_first_4_isogeny(R_A)) + // sage: P_4 = (2**(372-4))*isogenized_R_A + // sage: P_4._order = 4 #otherwise falls back to generic group methods for order + // sage: X4, Z4 = P_4.xy()[0], 1 + // sage: phi4 = EllipticCurveIsogeny(E1A, P_4, None, 4) + // sage: E2A_sage = phi4.codomain() # not in monty form + // sage: Aprime, Cprime = 2*(2*X4^4 - Z4^4), Z4^4 + // sage: E2A = EllipticCurve(Fp2, [0,Aprime/Cprime,0,1,0]) + // sage: sage_iso = E2A_sage.isomorphism_to(E2A) + // sage: isogenized2_R_A = sage_iso(phi4(isogenized_R_A)) + + xP4.FromAffine(&ExtensionFieldElement{ + A: Fp751Element{0x2afd75a913f3d5e7, 0x2918fba06f88c9ab, 0xa4ac4dc7cb526f05, 0x2d19e9391a607300, 0x7a79e2b34091b54, 0x3ad809dcb42f1792, 0xd46179328bd6402a, 0x1afa73541e2c4f3f, 0xf602d73ace9bdbd8, 0xd77ac58f6bab7004, 0x4689d97f6793b3b3, 0x4f26b00e42b7}, + B: Fp751Element{0x6cdf918dafdcb890, 0x666f273cc29cfae2, 0xad00fcd31ba618e2, 0x5fbcf62bef2f6a33, 0xf408bb88318e5098, 0x84ab97849453d175, 0x501bbfcdcfb8e1ac, 0xf2370098e6b5542c, 0xc7dc73f5f0f6bd32, 0xdd76dcd86729d1cf, 0xca22c905029996e4, 0x5cf4a9373de3}}) + xR.FromAffine(&ExtensionFieldElement{ + A: Fp751Element{0xff99e76f78da1e05, 0xdaa36bd2bb8d97c4, 0xb4328cee0a409daf, 0xc28b099980c5da3f, 0xf2d7cd15cfebb852, 0x1935103dded6cdef, 0xade81528de1429c3, 0x6775b0fa90a64319, 0x25f89817ee52485d, 0x706e2d00848e697, 0xc4958ec4216d65c0, 0xc519681417f}, + B: Fp751Element{0x742fe7dde60e1fb9, 0x801a3c78466a456b, 0xa9f945b786f48c35, 0x20ce89e1b144348f, 0xf633970b7776217e, 0x4c6077a9b38976e5, 0x34a513fc766c7825, 0xacccba359b9cd65, 0xd0ca8383f0fd0125, 0x77350437196287a, 0x9fe1ad7706d4ea21, 0x4d26129ee42d}}) + expPhiXr.FromAffine(&ExtensionFieldElement{ + A: Fp751Element{0x111efd8bd0b7a01e, 0x6ab75a4f3789ca9b, 0x939dbe518564cac4, 0xf9eeaba1601d0434, 0x8d41f8ba6edac998, 0xfcd2557efe9aa170, 0xb3c3549c098b7844, 0x52874fef6f81127c, 0xb2b9ac82aa518bb3, 0xee70820230520a86, 0xd4012b7f5efb184a, 0x573e4536329b}, + B: Fp751Element{0xa99952281e932902, 0x569a89a571f2c7b1, 0x6150143846ba3f6b, 0x11fd204441e91430, 0x7f469bd55c9b07b, 0xb72db8b9de35b161, 0x455a9a37a940512a, 0xb0cff7670abaf906, 0x18c785b7583375fe, 0x603ab9ca403c9148, 0xab54ba3a6e6c62c1, 0x2726d7d57c4f}}) + + phi.GenerateCurve(&xP4) + resPhiXr = phi.EvaluatePoint(&xR) + if !expPhiXr.VartimeEq(&resPhiXr) { + t.Error("\nExpected\n", expPhiXr.ToAffine(), "\nfound\n", resPhiXr.ToAffine()) + } +} + +func TestThreeIsogenyVersusSage(t *testing.T) { + var xR, xP3, resPhiXr, expPhiXr ProjectivePoint + var phi = NewIsogeny3() + + // sage: %colors Linux + // sage: p = 2^372 * 3^239 - 1; Fp = GF(p) + // sage: R. = Fp[] + // sage: Fp2 = Fp.extension(x^2 + 1, 'i') + // sage: i = Fp2.gen() + // sage: E0Fp = EllipticCurve(Fp, [0,0,0,1,0]) + // sage: E0Fp2 = EllipticCurve(Fp2, [0,0,0,1,0]) + // sage: x_PA = 11 + // sage: y_PA = -Fp(11^3 + 11).sqrt() + // sage: x_PB = 6 + // sage: y_PB = -Fp(6^3 + 6).sqrt() + // sage: P_A = 3^239 * E0Fp((x_PA,y_PA)) + // sage: P_B = 2^372 * E0Fp((x_PB,y_PB)) + // sage: def tau(P): + // ....: return E0Fp2( (-P.xy()[0], i*P.xy()[1])) + // ....: + // sage: m_B = 3*randint(0,3^238) + // sage: R_B = E0Fp2(P_B) + m_B*tau(P_B) + // sage: P_3 = (3^238)*R_B + // sage: def three_isog(P_3, P): + // ....: X3, Z3 = P_3.xy()[0], 1 + // ....: XP, ZP = P.xy()[0], 1 + // ....: x = (XP*(X3*XP - Z3*ZP)^2)/(ZP*(Z3*XP - X3*ZP)^2) + // ....: A3, C3 = (Z3^4 + 9*X3^2*(2*Z3^2 - 3*X3^2)), 4*X3*Z3^3 + // ....: cod = EllipticCurve(Fp2, [0,A3/C3,0,1,0]) + // ....: return cod.lift_x(x) + // ....: + // sage: isogenized_R_B = three_isog(P_3, R_B) + + xR.FromAffine(&ExtensionFieldElement{ + A: Fp751Element{0xbd0737ed5cc9a3d7, 0x45ae6d476517c101, 0x6f228e9e7364fdb2, 0xbba4871225b3dbd, 0x6299ccd2e5da1a07, 0x38488fe4af5f2d0e, 0xec23cae5a86e980c, 0x26c804ba3f1edffa, 0xfbbed81932df60e5, 0x7e00e9d182ae9187, 0xc7654abb66d05f4b, 0x262d0567237b}, + B: Fp751Element{0x3a3b5b6ad0b2ac33, 0x246602b5179127d3, 0x502ae0e9ad65077d, 0x10a3a37237e1bf70, 0x4a1ab9294dd05610, 0xb0f3adac30fe1fa6, 0x341995267faf70cb, 0xa14dd94d39cf4ec1, 0xce4b7527d1bf5568, 0xe0410423ed45c7e4, 0x38011809b6425686, 0x28f52472ebed}}) + xP3.FromAffine(&ExtensionFieldElement{ + A: Fp751Element{0x7bb7a4a07b0788dc, 0xdc36a3f6607b21b0, 0x4750e18ee74cf2f0, 0x464e319d0b7ab806, 0xc25aa44c04f758ff, 0x392e8521a46e0a68, 0xfc4e76b63eff37df, 0x1f3566d892e67dd8, 0xf8d2eb0f73295e65, 0x457b13ebc470bccb, 0xfda1cc9efef5be33, 0x5dbf3d92cc02}, + B: Fp751Element{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}) + expPhiXr.FromAffine(&ExtensionFieldElement{ + A: Fp751Element{0x286db7d75913c5b1, 0xcb2049ad50189220, 0xccee90ef765fa9f4, 0x65e52ce2730e7d88, 0xa6b6b553bd0d06e7, 0xb561ecec14591590, 0x17b7a66d8c64d959, 0x77778cecbe1461e, 0x9405c9c0c41a57ce, 0x8f6b4847e8ca7d3d, 0xf625eb987b366937, 0x421b3590e345}, + B: Fp751Element{0x566b893803e7d8d6, 0xe8c71a04d527e696, 0x5a1d8f87bf5eb51, 0x42ae08ae098724f, 0x4ee3d7c7af40ca2e, 0xd9f9ab9067bb10a7, 0xecd53d69edd6328c, 0xa581e9202dea107d, 0x8bcdfb6c8ecf9257, 0xe7cbbc2e5cbcf2af, 0x5f031a8701f0e53e, 0x18312d93e3cb}}) + + phi.GenerateCurve(&xP3) + resPhiXr = phi.EvaluatePoint(&xR) + + if !expPhiXr.VartimeEq(&resPhiXr) { + t.Error("\nExpected\n", expPhiXr.ToAffine(), "\nfound\n", resPhiXr.ToAffine()) + } +} diff --git a/p503toolbox/print_test.go b/p503toolbox/print_test.go new file mode 100644 index 0000000..3f42e32 --- /dev/null +++ b/p503toolbox/print_test.go @@ -0,0 +1,19 @@ +package p503toolbox + +// Tools used for testing and debugging + +import ( + "fmt" +) + +func (primeElement PrimeFieldElement) String() string { + return fmt.Sprintf("%X", primeElement.A.toBigInt().String()) +} + +func (extElement ExtensionFieldElement) String() string { + return fmt.Sprintf("\nA: %X\nB: %X", extElement.A.toBigInt().String(), extElement.B.toBigInt().String()) +} + +func (point ProjectivePoint) String() string { + return fmt.Sprintf("X:\n%sZ:\n%s", point.X.String(), point.Z.String()) +}