diff --git a/Makefile b/Makefile index 9cad96a..d46a9ef 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,7 @@ endif TARGETS= \ dh \ drbg \ + ec \ hash \ kem diff --git a/README.md b/README.md index 1da5bba..7269217 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ Crypto primitives implementation in Go. ## Implemented primitives * dh/ - SIDH +* ec/ + - x448 * hash/ - cSHAKE (sha3 coppied from "golang.org/x/crypto") - SM3 diff --git a/ec/x448/LICENSE.txt b/ec/x448/LICENSE.txt new file mode 100644 index 0000000..1184f29 --- /dev/null +++ b/ec/x448/LICENSE.txt @@ -0,0 +1,22 @@ +The MIT License (MIT) + +Copyright (c) 2014-2015 Cryptography Research, Inc. +Copyright (c) 2015 Yawning Angel. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/ec/x448/x448.go b/ec/x448/x448.go new file mode 100644 index 0000000..790e14a --- /dev/null +++ b/ec/x448/x448.go @@ -0,0 +1,114 @@ +// The MIT License (MIT) +// +// Copyright (c) 2014-2015 Cryptography Research, Inc. +// Copyright (c) 2015 Yawning Angel. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +// Package x448 provides an implementation of scalar multiplication on the +// elliptic curve known as curve448. +// +// See https://tools.ietf.org/html/draft-irtf-cfrg-curves-11 +package x448 + +const ( + x448Bytes = 56 + edwardsD = -39081 +) + +var basePoint = [56]byte{ + 5, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +} + +func ScalarMult(out, scalar, base *[56]byte) int { + var x1, x2, z2, x3, z3, t1, t2 gf + x1.deser(base) + x2.cpy(&one) + z2.cpy(&zero) + x3.cpy(&x1) + z3.cpy(&one) + + var swap limbUint + + for t := int(448 - 1); t >= 0; t-- { + sb := scalar[t/8] + + // Scalar conditioning. + if t/8 == 0 { + sb &= 0xFC + } else if t/8 == x448Bytes-1 { + sb |= 0x80 + } + + kT := (limbUint)((sb >> ((uint)(t) % 8)) & 1) + kT = -kT // Set to all 0s or all 1s + + swap ^= kT + x2.condSwap(&x3, swap) + z2.condSwap(&z3, swap) + swap = kT + + t1.add(&x2, &z2) // A = x2 + z2 + t2.sub(&x2, &z2) // B = x2 - z2 + z2.sub(&x3, &z3) // D = x3 - z3 + x2.mul(&t1, &z2) // DA + z2.add(&z3, &x3) // C = x3 + z3 + x3.mul(&t2, &z2) // CB + z3.sub(&x2, &x3) // DA-CB + z2.sqr(&z3) // (DA-CB)^2 + z3.mul(&x1, &z2) // z3 = x1(DA-CB)^2 + z2.add(&x2, &x3) // (DA+CB) + x3.sqr(&z2) // x3 = (DA+CB)^2 + + z2.sqr(&t1) // AA = A^2 + t1.sqr(&t2) // BB = B^2 + x2.mul(&z2, &t1) // x2 = AA*BB + t2.sub(&z2, &t1) // E = AA-BB + + t1.mlw(&t2, -edwardsD) // E*-d = a24*E + t1.add(&t1, &z2) // AA + a24*E + z2.mul(&t2, &t1) // z2 = E(AA+a24*E) + } + + // Finish + x2.condSwap(&x3, swap) + z2.condSwap(&x3, swap) + z2.inv(&z2) + x1.mul(&x2, &z2) + x1.ser(out) + + // As with X25519, both sides MUST check, without leaking extra + // information about the value of K, whether the resulting shared K is + // the all-zero value and abort if so. + var nz limbSint + for _, v := range out { + nz |= (limbSint)(v) + } + nz = (nz - 1) >> 8 // 0 = succ, -1 = fail + + // return value: 0 = succ, -1 = fail + return (int)(nz) +} + +func ScalarBaseMult(out, scalar *[56]byte) int { + return ScalarMult(out, scalar, &basePoint) +} diff --git a/ec/x448/x448_ref.go b/ec/x448/x448_ref.go new file mode 100644 index 0000000..59626e1 --- /dev/null +++ b/ec/x448/x448_ref.go @@ -0,0 +1,778 @@ +// The MIT License (MIT) +// +// Copyright (c) 2014-2015 Cryptography Research, Inc. +// Copyright (c) 2015 Yawning Angel. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package x448 + +// This should really use 64 bit limbs, but Go is fucking retarded and doesn't +// have __(u)int128_t, so the 32 bit code it is, at a hefty performance +// penalty. Fuck my life, I'm going to have to bust out PeachPy to get this +// to go fast aren't I. + +const ( + wBits = 32 + lBits = (wBits * 7 / 8) + x448Limbs = (448 / lBits) + lMask = (1 << lBits) - 1 +) + +type limbUint uint32 +type limbSint int32 + +type gf struct { + limb [x448Limbs]uint32 +} + +var zero = gf{[x448Limbs]uint32{0}} +var one = gf{[x448Limbs]uint32{1}} +var p = gf{[x448Limbs]uint32{ + lMask, lMask, lMask, lMask, lMask, lMask, lMask, lMask, + lMask - 1, lMask, lMask, lMask, lMask, lMask, lMask, lMask, +}} + +// cpy copies x = y. +func (x *gf) cpy(y *gf) { + // for i, v := range y.limb { + // x.limb[i] = v + // } + + copy(x.limb[:], y.limb[:]) +} + +// mul multiplies c = a * b. (PERF) +func (c *gf) mul(a, b *gf) { + var aa gf + aa.cpy(a) + + // + // This is *by far* the most CPU intesive routine in the code. + // + + // var accum [x448Limbs]uint64 + // for i, bv := range b.limb { + // for j, aav := range aa.limb { + // accum[(i+j)%x448Limbs] += (uint64)(bv) * (uint64)(aav) + // } + // aa.limb[(x448Limbs-1-i)^(x448Limbs/2)] += aa.limb[x448Limbs-1-i] + // } + + // So fucking stupid that this is actually a fairly massive gain. + var accum0, accum1, accum2, accum3, accum4, accum5, accum6, accum7, accum8, accum9, accum10, accum11, accum12, accum13, accum14, accum15 uint64 + var bv uint64 + + bv = (uint64)(b.limb[0]) + accum0 += bv * (uint64)(aa.limb[0]) + accum1 += bv * (uint64)(aa.limb[1]) + accum2 += bv * (uint64)(aa.limb[2]) + accum3 += bv * (uint64)(aa.limb[3]) + accum4 += bv * (uint64)(aa.limb[4]) + accum5 += bv * (uint64)(aa.limb[5]) + accum6 += bv * (uint64)(aa.limb[6]) + accum7 += bv * (uint64)(aa.limb[7]) + accum8 += bv * (uint64)(aa.limb[8]) + accum9 += bv * (uint64)(aa.limb[9]) + accum10 += bv * (uint64)(aa.limb[10]) + accum11 += bv * (uint64)(aa.limb[11]) + accum12 += bv * (uint64)(aa.limb[12]) + accum13 += bv * (uint64)(aa.limb[13]) + accum14 += bv * (uint64)(aa.limb[14]) + accum15 += bv * (uint64)(aa.limb[15]) + aa.limb[(x448Limbs-1-0)^(x448Limbs/2)] += aa.limb[x448Limbs-1-0] + + bv = (uint64)(b.limb[1]) + accum1 += bv * (uint64)(aa.limb[0]) + accum2 += bv * (uint64)(aa.limb[1]) + accum3 += bv * (uint64)(aa.limb[2]) + accum4 += bv * (uint64)(aa.limb[3]) + accum5 += bv * (uint64)(aa.limb[4]) + accum6 += bv * (uint64)(aa.limb[5]) + accum7 += bv * (uint64)(aa.limb[6]) + accum8 += bv * (uint64)(aa.limb[7]) + accum9 += bv * (uint64)(aa.limb[8]) + accum10 += bv * (uint64)(aa.limb[9]) + accum11 += bv * (uint64)(aa.limb[10]) + accum12 += bv * (uint64)(aa.limb[11]) + accum13 += bv * (uint64)(aa.limb[12]) + accum14 += bv * (uint64)(aa.limb[13]) + accum15 += bv * (uint64)(aa.limb[14]) + accum0 += bv * (uint64)(aa.limb[15]) + aa.limb[(x448Limbs-1-1)^(x448Limbs/2)] += aa.limb[x448Limbs-1-1] + + bv = (uint64)(b.limb[2]) + accum2 += bv * (uint64)(aa.limb[0]) + accum3 += bv * (uint64)(aa.limb[1]) + accum4 += bv * (uint64)(aa.limb[2]) + accum5 += bv * (uint64)(aa.limb[3]) + accum6 += bv * (uint64)(aa.limb[4]) + accum7 += bv * (uint64)(aa.limb[5]) + accum8 += bv * (uint64)(aa.limb[6]) + accum9 += bv * (uint64)(aa.limb[7]) + accum10 += bv * (uint64)(aa.limb[8]) + accum11 += bv * (uint64)(aa.limb[9]) + accum12 += bv * (uint64)(aa.limb[10]) + accum13 += bv * (uint64)(aa.limb[11]) + accum14 += bv * (uint64)(aa.limb[12]) + accum15 += bv * (uint64)(aa.limb[13]) + accum0 += bv * (uint64)(aa.limb[14]) + accum1 += bv * (uint64)(aa.limb[15]) + aa.limb[(x448Limbs-1-2)^(x448Limbs/2)] += aa.limb[x448Limbs-1-2] + + bv = (uint64)(b.limb[3]) + accum3 += bv * (uint64)(aa.limb[0]) + accum4 += bv * (uint64)(aa.limb[1]) + accum5 += bv * (uint64)(aa.limb[2]) + accum6 += bv * (uint64)(aa.limb[3]) + accum7 += bv * (uint64)(aa.limb[4]) + accum8 += bv * (uint64)(aa.limb[5]) + accum9 += bv * (uint64)(aa.limb[6]) + accum10 += bv * (uint64)(aa.limb[7]) + accum11 += bv * (uint64)(aa.limb[8]) + accum12 += bv * (uint64)(aa.limb[9]) + accum13 += bv * (uint64)(aa.limb[10]) + accum14 += bv * (uint64)(aa.limb[11]) + accum15 += bv * (uint64)(aa.limb[12]) + accum0 += bv * (uint64)(aa.limb[13]) + accum1 += bv * (uint64)(aa.limb[14]) + accum2 += bv * (uint64)(aa.limb[15]) + aa.limb[(x448Limbs-1-3)^(x448Limbs/2)] += aa.limb[x448Limbs-1-3] + + bv = (uint64)(b.limb[4]) + accum4 += bv * (uint64)(aa.limb[0]) + accum5 += bv * (uint64)(aa.limb[1]) + accum6 += bv * (uint64)(aa.limb[2]) + accum7 += bv * (uint64)(aa.limb[3]) + accum8 += bv * (uint64)(aa.limb[4]) + accum9 += bv * (uint64)(aa.limb[5]) + accum10 += bv * (uint64)(aa.limb[6]) + accum11 += bv * (uint64)(aa.limb[7]) + accum12 += bv * (uint64)(aa.limb[8]) + accum13 += bv * (uint64)(aa.limb[9]) + accum14 += bv * (uint64)(aa.limb[10]) + accum15 += bv * (uint64)(aa.limb[11]) + accum0 += bv * (uint64)(aa.limb[12]) + accum1 += bv * (uint64)(aa.limb[13]) + accum2 += bv * (uint64)(aa.limb[14]) + accum3 += bv * (uint64)(aa.limb[15]) + aa.limb[(x448Limbs-1-4)^(x448Limbs/2)] += aa.limb[x448Limbs-1-4] + + bv = (uint64)(b.limb[5]) + accum5 += bv * (uint64)(aa.limb[0]) + accum6 += bv * (uint64)(aa.limb[1]) + accum7 += bv * (uint64)(aa.limb[2]) + accum8 += bv * (uint64)(aa.limb[3]) + accum9 += bv * (uint64)(aa.limb[4]) + accum10 += bv * (uint64)(aa.limb[5]) + accum11 += bv * (uint64)(aa.limb[6]) + accum12 += bv * (uint64)(aa.limb[7]) + accum13 += bv * (uint64)(aa.limb[8]) + accum14 += bv * (uint64)(aa.limb[9]) + accum15 += bv * (uint64)(aa.limb[10]) + accum0 += bv * (uint64)(aa.limb[11]) + accum1 += bv * (uint64)(aa.limb[12]) + accum2 += bv * (uint64)(aa.limb[13]) + accum3 += bv * (uint64)(aa.limb[14]) + accum4 += bv * (uint64)(aa.limb[15]) + aa.limb[(x448Limbs-1-5)^(x448Limbs/2)] += aa.limb[x448Limbs-1-5] + + bv = (uint64)(b.limb[6]) + accum6 += bv * (uint64)(aa.limb[0]) + accum7 += bv * (uint64)(aa.limb[1]) + accum8 += bv * (uint64)(aa.limb[2]) + accum9 += bv * (uint64)(aa.limb[3]) + accum10 += bv * (uint64)(aa.limb[4]) + accum11 += bv * (uint64)(aa.limb[5]) + accum12 += bv * (uint64)(aa.limb[6]) + accum13 += bv * (uint64)(aa.limb[7]) + accum14 += bv * (uint64)(aa.limb[8]) + accum15 += bv * (uint64)(aa.limb[9]) + accum0 += bv * (uint64)(aa.limb[10]) + accum1 += bv * (uint64)(aa.limb[11]) + accum2 += bv * (uint64)(aa.limb[12]) + accum3 += bv * (uint64)(aa.limb[13]) + accum4 += bv * (uint64)(aa.limb[14]) + accum5 += bv * (uint64)(aa.limb[15]) + aa.limb[(x448Limbs-1-6)^(x448Limbs/2)] += aa.limb[x448Limbs-1-6] + + bv = (uint64)(b.limb[7]) + accum7 += bv * (uint64)(aa.limb[0]) + accum8 += bv * (uint64)(aa.limb[1]) + accum9 += bv * (uint64)(aa.limb[2]) + accum10 += bv * (uint64)(aa.limb[3]) + accum11 += bv * (uint64)(aa.limb[4]) + accum12 += bv * (uint64)(aa.limb[5]) + accum13 += bv * (uint64)(aa.limb[6]) + accum14 += bv * (uint64)(aa.limb[7]) + accum15 += bv * (uint64)(aa.limb[8]) + accum0 += bv * (uint64)(aa.limb[9]) + accum1 += bv * (uint64)(aa.limb[10]) + accum2 += bv * (uint64)(aa.limb[11]) + accum3 += bv * (uint64)(aa.limb[12]) + accum4 += bv * (uint64)(aa.limb[13]) + accum5 += bv * (uint64)(aa.limb[14]) + accum6 += bv * (uint64)(aa.limb[15]) + aa.limb[(x448Limbs-1-7)^(x448Limbs/2)] += aa.limb[x448Limbs-1-7] + + bv = (uint64)(b.limb[8]) + accum8 += bv * (uint64)(aa.limb[0]) + accum9 += bv * (uint64)(aa.limb[1]) + accum10 += bv * (uint64)(aa.limb[2]) + accum11 += bv * (uint64)(aa.limb[3]) + accum12 += bv * (uint64)(aa.limb[4]) + accum13 += bv * (uint64)(aa.limb[5]) + accum14 += bv * (uint64)(aa.limb[6]) + accum15 += bv * (uint64)(aa.limb[7]) + accum0 += bv * (uint64)(aa.limb[8]) + accum1 += bv * (uint64)(aa.limb[9]) + accum2 += bv * (uint64)(aa.limb[10]) + accum3 += bv * (uint64)(aa.limb[11]) + accum4 += bv * (uint64)(aa.limb[12]) + accum5 += bv * (uint64)(aa.limb[13]) + accum6 += bv * (uint64)(aa.limb[14]) + accum7 += bv * (uint64)(aa.limb[15]) + aa.limb[(x448Limbs-1-8)^(x448Limbs/2)] += aa.limb[x448Limbs-1-8] + + bv = (uint64)(b.limb[9]) + accum9 += bv * (uint64)(aa.limb[0]) + accum10 += bv * (uint64)(aa.limb[1]) + accum11 += bv * (uint64)(aa.limb[2]) + accum12 += bv * (uint64)(aa.limb[3]) + accum13 += bv * (uint64)(aa.limb[4]) + accum14 += bv * (uint64)(aa.limb[5]) + accum15 += bv * (uint64)(aa.limb[6]) + accum0 += bv * (uint64)(aa.limb[7]) + accum1 += bv * (uint64)(aa.limb[8]) + accum2 += bv * (uint64)(aa.limb[9]) + accum3 += bv * (uint64)(aa.limb[10]) + accum4 += bv * (uint64)(aa.limb[11]) + accum5 += bv * (uint64)(aa.limb[12]) + accum6 += bv * (uint64)(aa.limb[13]) + accum7 += bv * (uint64)(aa.limb[14]) + accum8 += bv * (uint64)(aa.limb[15]) + aa.limb[(x448Limbs-1-9)^(x448Limbs/2)] += aa.limb[x448Limbs-1-9] + + bv = (uint64)(b.limb[10]) + accum10 += bv * (uint64)(aa.limb[0]) + accum11 += bv * (uint64)(aa.limb[1]) + accum12 += bv * (uint64)(aa.limb[2]) + accum13 += bv * (uint64)(aa.limb[3]) + accum14 += bv * (uint64)(aa.limb[4]) + accum15 += bv * (uint64)(aa.limb[5]) + accum0 += bv * (uint64)(aa.limb[6]) + accum1 += bv * (uint64)(aa.limb[7]) + accum2 += bv * (uint64)(aa.limb[8]) + accum3 += bv * (uint64)(aa.limb[9]) + accum4 += bv * (uint64)(aa.limb[10]) + accum5 += bv * (uint64)(aa.limb[11]) + accum6 += bv * (uint64)(aa.limb[12]) + accum7 += bv * (uint64)(aa.limb[13]) + accum8 += bv * (uint64)(aa.limb[14]) + accum9 += bv * (uint64)(aa.limb[15]) + aa.limb[(x448Limbs-1-10)^(x448Limbs/2)] += aa.limb[x448Limbs-1-10] + + bv = (uint64)(b.limb[11]) + accum11 += bv * (uint64)(aa.limb[0]) + accum12 += bv * (uint64)(aa.limb[1]) + accum13 += bv * (uint64)(aa.limb[2]) + accum14 += bv * (uint64)(aa.limb[3]) + accum15 += bv * (uint64)(aa.limb[4]) + accum0 += bv * (uint64)(aa.limb[5]) + accum1 += bv * (uint64)(aa.limb[6]) + accum2 += bv * (uint64)(aa.limb[7]) + accum3 += bv * (uint64)(aa.limb[8]) + accum4 += bv * (uint64)(aa.limb[9]) + accum5 += bv * (uint64)(aa.limb[10]) + accum6 += bv * (uint64)(aa.limb[11]) + accum7 += bv * (uint64)(aa.limb[12]) + accum8 += bv * (uint64)(aa.limb[13]) + accum9 += bv * (uint64)(aa.limb[14]) + accum10 += bv * (uint64)(aa.limb[15]) + aa.limb[(x448Limbs-1-11)^(x448Limbs/2)] += aa.limb[x448Limbs-1-11] + + bv = (uint64)(b.limb[12]) + accum12 += bv * (uint64)(aa.limb[0]) + accum13 += bv * (uint64)(aa.limb[1]) + accum14 += bv * (uint64)(aa.limb[2]) + accum15 += bv * (uint64)(aa.limb[3]) + accum0 += bv * (uint64)(aa.limb[4]) + accum1 += bv * (uint64)(aa.limb[5]) + accum2 += bv * (uint64)(aa.limb[6]) + accum3 += bv * (uint64)(aa.limb[7]) + accum4 += bv * (uint64)(aa.limb[8]) + accum5 += bv * (uint64)(aa.limb[9]) + accum6 += bv * (uint64)(aa.limb[10]) + accum7 += bv * (uint64)(aa.limb[11]) + accum8 += bv * (uint64)(aa.limb[12]) + accum9 += bv * (uint64)(aa.limb[13]) + accum10 += bv * (uint64)(aa.limb[14]) + accum11 += bv * (uint64)(aa.limb[15]) + aa.limb[(x448Limbs-1-12)^(x448Limbs/2)] += aa.limb[x448Limbs-1-12] + + bv = (uint64)(b.limb[13]) + accum13 += bv * (uint64)(aa.limb[0]) + accum14 += bv * (uint64)(aa.limb[1]) + accum15 += bv * (uint64)(aa.limb[2]) + accum0 += bv * (uint64)(aa.limb[3]) + accum1 += bv * (uint64)(aa.limb[4]) + accum2 += bv * (uint64)(aa.limb[5]) + accum3 += bv * (uint64)(aa.limb[6]) + accum4 += bv * (uint64)(aa.limb[7]) + accum5 += bv * (uint64)(aa.limb[8]) + accum6 += bv * (uint64)(aa.limb[9]) + accum7 += bv * (uint64)(aa.limb[10]) + accum8 += bv * (uint64)(aa.limb[11]) + accum9 += bv * (uint64)(aa.limb[12]) + accum10 += bv * (uint64)(aa.limb[13]) + accum11 += bv * (uint64)(aa.limb[14]) + accum12 += bv * (uint64)(aa.limb[15]) + aa.limb[(x448Limbs-1-13)^(x448Limbs/2)] += aa.limb[x448Limbs-1-13] + + bv = (uint64)(b.limb[14]) + accum14 += bv * (uint64)(aa.limb[0]) + accum15 += bv * (uint64)(aa.limb[1]) + accum0 += bv * (uint64)(aa.limb[2]) + accum1 += bv * (uint64)(aa.limb[3]) + accum2 += bv * (uint64)(aa.limb[4]) + accum3 += bv * (uint64)(aa.limb[5]) + accum4 += bv * (uint64)(aa.limb[6]) + accum5 += bv * (uint64)(aa.limb[7]) + accum6 += bv * (uint64)(aa.limb[8]) + accum7 += bv * (uint64)(aa.limb[9]) + accum8 += bv * (uint64)(aa.limb[10]) + accum9 += bv * (uint64)(aa.limb[11]) + accum10 += bv * (uint64)(aa.limb[12]) + accum11 += bv * (uint64)(aa.limb[13]) + accum12 += bv * (uint64)(aa.limb[14]) + accum13 += bv * (uint64)(aa.limb[15]) + aa.limb[(x448Limbs-1-14)^(x448Limbs/2)] += aa.limb[x448Limbs-1-14] + + bv = (uint64)(b.limb[15]) + accum15 += bv * (uint64)(aa.limb[0]) + accum0 += bv * (uint64)(aa.limb[1]) + accum1 += bv * (uint64)(aa.limb[2]) + accum2 += bv * (uint64)(aa.limb[3]) + accum3 += bv * (uint64)(aa.limb[4]) + accum4 += bv * (uint64)(aa.limb[5]) + accum5 += bv * (uint64)(aa.limb[6]) + accum6 += bv * (uint64)(aa.limb[7]) + accum7 += bv * (uint64)(aa.limb[8]) + accum8 += bv * (uint64)(aa.limb[9]) + accum9 += bv * (uint64)(aa.limb[10]) + accum10 += bv * (uint64)(aa.limb[11]) + accum11 += bv * (uint64)(aa.limb[12]) + accum12 += bv * (uint64)(aa.limb[13]) + accum13 += bv * (uint64)(aa.limb[14]) + accum14 += bv * (uint64)(aa.limb[15]) + aa.limb[(x448Limbs-1-15)^(x448Limbs/2)] += aa.limb[x448Limbs-1-15] + + // accum[x448Limbs-1] += accum[x448Limbs-2] >> lBits + // accum[x448Limbs-2] &= lMask + // accum[x448Limbs/2] += accum[x448Limbs-1] >> lBits + accum15 += accum14 >> lBits + accum14 &= lMask + accum8 += accum15 >> lBits + + // for j := uint(0); j < x448Limbs; j++ { + // accum[j] += accum[(j-1)%x448Limbs] >> lBits + // accum[(j-1)%x448Limbs] &= lMask + // } + accum0 += accum15 >> lBits + accum15 &= lMask + accum1 += accum0 >> lBits + accum0 &= lMask + accum2 += accum1 >> lBits + accum1 &= lMask + accum3 += accum2 >> lBits + accum2 &= lMask + accum4 += accum3 >> lBits + accum3 &= lMask + accum5 += accum4 >> lBits + accum4 &= lMask + accum6 += accum5 >> lBits + accum5 &= lMask + accum7 += accum6 >> lBits + accum6 &= lMask + accum8 += accum7 >> lBits + accum7 &= lMask + accum9 += accum8 >> lBits + accum8 &= lMask + accum10 += accum9 >> lBits + accum9 &= lMask + accum11 += accum10 >> lBits + accum10 &= lMask + accum12 += accum11 >> lBits + accum11 &= lMask + accum13 += accum12 >> lBits + accum12 &= lMask + accum14 += accum13 >> lBits + accum13 &= lMask + accum15 += accum14 >> lBits + accum14 &= lMask + + // for j, accv := range accum { + // c.limb[j] = (uint32)(accv) + // } + c.limb[0] = (uint32)(accum0) + c.limb[1] = (uint32)(accum1) + c.limb[2] = (uint32)(accum2) + c.limb[3] = (uint32)(accum3) + c.limb[4] = (uint32)(accum4) + c.limb[5] = (uint32)(accum5) + c.limb[6] = (uint32)(accum6) + c.limb[7] = (uint32)(accum7) + c.limb[8] = (uint32)(accum8) + c.limb[9] = (uint32)(accum9) + c.limb[10] = (uint32)(accum10) + c.limb[11] = (uint32)(accum11) + c.limb[12] = (uint32)(accum12) + c.limb[13] = (uint32)(accum13) + c.limb[14] = (uint32)(accum14) + c.limb[15] = (uint32)(accum15) +} + +// sqr squares (c = x * x). Just calls multiply. (PERF) +func (c *gf) sqr(x *gf) { + c.mul(x, x) +} + +// isqrt inverse square roots (y = 1/sqrt(x)), using an addition chain. +func (y *gf) isqrt(x *gf) { + var a, b, c gf + c.sqr(x) + + // XXX/Yawning, could unroll, but this is called only once. + + // STEP(b,x,1); + b.mul(x, &c) + c.cpy(&b) + for i := 0; i < 1; i++ { + c.sqr(&c) + } + + // STEP(b,x,3); + b.mul(x, &c) + c.cpy(&b) + for i := 0; i < 3; i++ { + c.sqr(&c) + } + + //STEP(a,b,3); + a.mul(&b, &c) + c.cpy(&a) + for i := 0; i < 3; i++ { + c.sqr(&c) + } + + // STEP(a,b,9); + a.mul(&b, &c) + c.cpy(&a) + for i := 0; i < 9; i++ { + c.sqr(&c) + } + + // STEP(b,a,1); + b.mul(&a, &c) + c.cpy(&b) + for i := 0; i < 1; i++ { + c.sqr(&c) + } + + // STEP(a,x,18); + a.mul(x, &c) + c.cpy(&a) + for i := 0; i < 18; i++ { + c.sqr(&c) + } + + // STEP(a,b,37); + a.mul(&b, &c) + c.cpy(&a) + for i := 0; i < 37; i++ { + c.sqr(&c) + } + + // STEP(b,a,37); + b.mul(&a, &c) + c.cpy(&b) + for i := 0; i < 37; i++ { + c.sqr(&c) + } + + // STEP(b,a,111); + b.mul(&a, &c) + c.cpy(&b) + for i := 0; i < 111; i++ { + c.sqr(&c) + } + + // STEP(a,b,1); + a.mul(&b, &c) + c.cpy(&a) + for i := 0; i < 1; i++ { + c.sqr(&c) + } + + // STEP(b,x,223); + b.mul(x, &c) + c.cpy(&b) + for i := 0; i < 223; i++ { + c.sqr(&c) + } + + y.mul(&a, &c) +} + +// inv inverses (y = 1/x). +func (y *gf) inv(x *gf) { + var z, w gf + z.sqr(x) // x^2 + w.isqrt(&z) // +- 1/sqrt(x^2) = +- 1/x + z.sqr(&w) // 1/x^2 + w.mul(x, &z) // 1/x + y.cpy(&w) +} + +// reduce weakly reduces mod p +func (x *gf) reduce() { + x.limb[x448Limbs/2] += x.limb[x448Limbs-1] >> lBits + + // for j := uint(0); j < x448Limbs; j++ { + // x.limb[j] += x.limb[(j-1)%x448Limbs] >> lBits + // x.limb[(j-1)%x448Limbs] &= lMask + // } + x.limb[0] += x.limb[15] >> lBits + x.limb[15] &= lMask + x.limb[1] += x.limb[0] >> lBits + x.limb[0] &= lMask + x.limb[2] += x.limb[1] >> lBits + x.limb[1] &= lMask + x.limb[3] += x.limb[2] >> lBits + x.limb[2] &= lMask + x.limb[4] += x.limb[3] >> lBits + x.limb[3] &= lMask + x.limb[5] += x.limb[4] >> lBits + x.limb[4] &= lMask + x.limb[6] += x.limb[5] >> lBits + x.limb[5] &= lMask + x.limb[7] += x.limb[6] >> lBits + x.limb[6] &= lMask + x.limb[8] += x.limb[7] >> lBits + x.limb[7] &= lMask + x.limb[9] += x.limb[8] >> lBits + x.limb[8] &= lMask + x.limb[10] += x.limb[9] >> lBits + x.limb[9] &= lMask + x.limb[11] += x.limb[10] >> lBits + x.limb[10] &= lMask + x.limb[12] += x.limb[11] >> lBits + x.limb[11] &= lMask + x.limb[13] += x.limb[12] >> lBits + x.limb[12] &= lMask + x.limb[14] += x.limb[13] >> lBits + x.limb[13] &= lMask + x.limb[15] += x.limb[14] >> lBits + x.limb[14] &= lMask +} + +// add adds mod p. Conservatively always weak-reduces. (PERF) +func (x *gf) add(y, z *gf) { + // for i, yv := range y.limb { + // x.limb[i] = yv + z.limb[i] + // } + x.limb[0] = y.limb[0] + z.limb[0] + x.limb[1] = y.limb[1] + z.limb[1] + x.limb[2] = y.limb[2] + z.limb[2] + x.limb[3] = y.limb[3] + z.limb[3] + x.limb[4] = y.limb[4] + z.limb[4] + x.limb[5] = y.limb[5] + z.limb[5] + x.limb[6] = y.limb[6] + z.limb[6] + x.limb[7] = y.limb[7] + z.limb[7] + x.limb[8] = y.limb[8] + z.limb[8] + x.limb[9] = y.limb[9] + z.limb[9] + x.limb[10] = y.limb[10] + z.limb[10] + x.limb[11] = y.limb[11] + z.limb[11] + x.limb[12] = y.limb[12] + z.limb[12] + x.limb[13] = y.limb[13] + z.limb[13] + x.limb[14] = y.limb[14] + z.limb[14] + x.limb[15] = y.limb[15] + z.limb[15] + + x.reduce() +} + +// sub subtracts mod p. Conservatively always weak-reduces. (PERF) +func (x *gf) sub(y, z *gf) { + // for i, yv := range y.limb { + // x.limb[i] = yv - z.limb[i] + 2*p.limb[i] + // } + x.limb[0] = y.limb[0] - z.limb[0] + 2*lMask + x.limb[1] = y.limb[1] - z.limb[1] + 2*lMask + x.limb[2] = y.limb[2] - z.limb[2] + 2*lMask + x.limb[3] = y.limb[3] - z.limb[3] + 2*lMask + x.limb[4] = y.limb[4] - z.limb[4] + 2*lMask + x.limb[5] = y.limb[5] - z.limb[5] + 2*lMask + x.limb[6] = y.limb[6] - z.limb[6] + 2*lMask + x.limb[7] = y.limb[7] - z.limb[7] + 2*lMask + x.limb[8] = y.limb[8] - z.limb[8] + 2*(lMask-1) + x.limb[9] = y.limb[9] - z.limb[9] + 2*lMask + x.limb[10] = y.limb[10] - z.limb[10] + 2*lMask + x.limb[11] = y.limb[11] - z.limb[11] + 2*lMask + x.limb[12] = y.limb[12] - z.limb[12] + 2*lMask + x.limb[13] = y.limb[13] - z.limb[13] + 2*lMask + x.limb[14] = y.limb[14] - z.limb[14] + 2*lMask + x.limb[15] = y.limb[15] - z.limb[15] + 2*lMask + + x.reduce() +} + +// condSwap swaps x and y in constant time. +func (x *gf) condSwap(y *gf, swap limbUint) { + // for i, xv := range x.limb { + // s := (xv ^ y.limb[i]) & (uint32)(swap) // Sort of dumb, oh well. + // x.limb[i] ^= s + // y.limb[i] ^= s + // } + + var s uint32 + + s = (x.limb[0] ^ y.limb[0]) & (uint32)(swap) + x.limb[0] ^= s + y.limb[0] ^= s + s = (x.limb[1] ^ y.limb[1]) & (uint32)(swap) + x.limb[1] ^= s + y.limb[1] ^= s + s = (x.limb[2] ^ y.limb[2]) & (uint32)(swap) + x.limb[2] ^= s + y.limb[2] ^= s + s = (x.limb[3] ^ y.limb[3]) & (uint32)(swap) + x.limb[3] ^= s + y.limb[3] ^= s + s = (x.limb[4] ^ y.limb[4]) & (uint32)(swap) + x.limb[4] ^= s + y.limb[4] ^= s + s = (x.limb[5] ^ y.limb[5]) & (uint32)(swap) + x.limb[5] ^= s + y.limb[5] ^= s + s = (x.limb[6] ^ y.limb[6]) & (uint32)(swap) + x.limb[6] ^= s + y.limb[6] ^= s + s = (x.limb[7] ^ y.limb[7]) & (uint32)(swap) + x.limb[7] ^= s + y.limb[7] ^= s + s = (x.limb[8] ^ y.limb[8]) & (uint32)(swap) + x.limb[8] ^= s + y.limb[8] ^= s + s = (x.limb[9] ^ y.limb[9]) & (uint32)(swap) + x.limb[9] ^= s + y.limb[9] ^= s + s = (x.limb[10] ^ y.limb[10]) & (uint32)(swap) + x.limb[10] ^= s + y.limb[10] ^= s + s = (x.limb[11] ^ y.limb[11]) & (uint32)(swap) + x.limb[11] ^= s + y.limb[11] ^= s + s = (x.limb[12] ^ y.limb[12]) & (uint32)(swap) + x.limb[12] ^= s + y.limb[12] ^= s + s = (x.limb[13] ^ y.limb[13]) & (uint32)(swap) + x.limb[13] ^= s + y.limb[13] ^= s + s = (x.limb[14] ^ y.limb[14]) & (uint32)(swap) + x.limb[14] ^= s + y.limb[14] ^= s + s = (x.limb[15] ^ y.limb[15]) & (uint32)(swap) + x.limb[15] ^= s + y.limb[15] ^= s +} + +// mlw multiplies by a signed int. NOT CONSTANT TIME wrt the sign of the int, +// but that's ok because it's only ever called with w = -edwardsD. Just uses +// a full multiply. (PERF) +func (a *gf) mlw(b *gf, w int) { + if w > 0 { + ww := gf{[x448Limbs]uint32{(uint32)(w)}} + a.mul(b, &ww) + } else { + // This branch is *NEVER* taken with the current code. + panic("mul called with negative w") + ww := gf{[x448Limbs]uint32{(uint32)(-w)}} + a.mul(b, &ww) + a.sub(&zero, a) + } +} + +// canon canonicalizes. +func (a *gf) canon() { + a.reduce() + + // Subtract p with borrow. + var carry int64 + for i, v := range a.limb { + carry = carry + (int64)(v) - (int64)(p.limb[i]) + a.limb[i] = (uint32)(carry & lMask) + carry >>= lBits + } + + addback := carry + carry = 0 + + // Add it back. + for i, v := range a.limb { + carry = carry + (int64)(v) + (int64)(p.limb[i]&(uint32)(addback)) + a.limb[i] = uint32(carry & lMask) + carry >>= lBits + } +} + +// deser deserializes into the limb representation. +func (s *gf) deser(ser *[x448Bytes]byte) { + var buf uint64 + bits := uint(0) + k := 0 + + for i, v := range ser { + buf |= (uint64)(v) << bits + for bits += 8; (bits >= lBits || i == x448Bytes-1) && k < x448Limbs; bits, buf = bits-lBits, buf>>lBits { + s.limb[k] = (uint32)(buf & lMask) + k++ + } + } +} + +// ser serializes into byte representation. +func (a *gf) ser(ser *[x448Bytes]byte) { + a.canon() + k := 0 + bits := uint(0) + var buf uint64 + for i, v := range a.limb { + buf |= (uint64)(v) << bits + for bits += lBits; (bits >= 8 || i == x448Limbs-1) && k < x448Bytes; bits, buf = bits-8, buf>>8 { + ser[k] = (byte)(buf) + k++ + } + } +} + +func init() { + if x448Limbs != 16 { + panic("x448Limbs != 16, unrolled loops likely broken") + } +} diff --git a/ec/x448/x448_test.go b/ec/x448/x448_test.go new file mode 100644 index 0000000..2b0e095 --- /dev/null +++ b/ec/x448/x448_test.go @@ -0,0 +1,265 @@ +// The MIT License (MIT) +// +// Copyright (c) 2011 Stanford University. +// Copyright (c) 2014-2015 Cryptography Research, Inc. +// Copyright (c) 2015 Yawning Angel. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package x448 + +import ( + "bytes" + "crypto/rand" + "encoding/hex" + "testing" +) + +// Cowardly refuse to run the full slow test vector case unless this is set +// at compile time, because the timeout for the test harness needs to be +// adjusted at runtime. +var reallyRunSlowTest = false + +func TestX448(t *testing.T) { + type KATVectors struct { + scalar [x448Bytes]byte + base [x448Bytes]byte + answer [x448Bytes]byte + } + + vectors := []KATVectors{ + { + [x448Bytes]byte{ + 0x3d, 0x26, 0x2f, 0xdd, 0xf9, 0xec, 0x8e, 0x88, + 0x49, 0x52, 0x66, 0xfe, 0xa1, 0x9a, 0x34, 0xd2, + 0x88, 0x82, 0xac, 0xef, 0x04, 0x51, 0x04, 0xd0, + 0xd1, 0xaa, 0xe1, 0x21, 0x70, 0x0a, 0x77, 0x9c, + 0x98, 0x4c, 0x24, 0xf8, 0xcd, 0xd7, 0x8f, 0xbf, + 0xf4, 0x49, 0x43, 0xeb, 0xa3, 0x68, 0xf5, 0x4b, + 0x29, 0x25, 0x9a, 0x4f, 0x1c, 0x60, 0x0a, 0xd3, + }, + [x448Bytes]byte{ + 0x06, 0xfc, 0xe6, 0x40, 0xfa, 0x34, 0x87, 0xbf, + 0xda, 0x5f, 0x6c, 0xf2, 0xd5, 0x26, 0x3f, 0x8a, + 0xad, 0x88, 0x33, 0x4c, 0xbd, 0x07, 0x43, 0x7f, + 0x02, 0x0f, 0x08, 0xf9, 0x81, 0x4d, 0xc0, 0x31, + 0xdd, 0xbd, 0xc3, 0x8c, 0x19, 0xc6, 0xda, 0x25, + 0x83, 0xfa, 0x54, 0x29, 0xdb, 0x94, 0xad, 0xa1, + 0x8a, 0xa7, 0xa7, 0xfb, 0x4e, 0xf8, 0xa0, 0x86, + }, + [x448Bytes]byte{ + 0xce, 0x3e, 0x4f, 0xf9, 0x5a, 0x60, 0xdc, 0x66, + 0x97, 0xda, 0x1d, 0xb1, 0xd8, 0x5e, 0x6a, 0xfb, + 0xdf, 0x79, 0xb5, 0x0a, 0x24, 0x12, 0xd7, 0x54, + 0x6d, 0x5f, 0x23, 0x9f, 0xe1, 0x4f, 0xba, 0xad, + 0xeb, 0x44, 0x5f, 0xc6, 0x6a, 0x01, 0xb0, 0x77, + 0x9d, 0x98, 0x22, 0x39, 0x61, 0x11, 0x1e, 0x21, + 0x76, 0x62, 0x82, 0xf7, 0x3d, 0xd9, 0x6b, 0x6f, + }, + }, + { + [x448Bytes]byte{ + 0x20, 0x3d, 0x49, 0x44, 0x28, 0xb8, 0x39, 0x93, + 0x52, 0x66, 0x5d, 0xdc, 0xa4, 0x2f, 0x9d, 0xe8, + 0xfe, 0xf6, 0x00, 0x90, 0x8e, 0x0d, 0x46, 0x1c, + 0xb0, 0x21, 0xf8, 0xc5, 0x38, 0x34, 0x5d, 0xd7, + 0x7c, 0x3e, 0x48, 0x06, 0xe2, 0x5f, 0x46, 0xd3, + 0x31, 0x5c, 0x44, 0xe0, 0xa5, 0xb4, 0x37, 0x12, + 0x82, 0xdd, 0x2c, 0x8d, 0x5b, 0xe3, 0x09, 0x5f, + }, + [x448Bytes]byte{ + 0x0f, 0xbc, 0xc2, 0xf9, 0x93, 0xcd, 0x56, 0xd3, + 0x30, 0x5b, 0x0b, 0x7d, 0x9e, 0x55, 0xd4, 0xc1, + 0xa8, 0xfb, 0x5d, 0xbb, 0x52, 0xf8, 0xe9, 0xa1, + 0xe9, 0xb6, 0x20, 0x1b, 0x16, 0x5d, 0x01, 0x58, + 0x94, 0xe5, 0x6c, 0x4d, 0x35, 0x70, 0xbe, 0xe5, + 0x2f, 0xe2, 0x05, 0xe2, 0x8a, 0x78, 0xb9, 0x1c, + 0xdf, 0xbd, 0xe7, 0x1c, 0xe8, 0xd1, 0x57, 0xdb, + }, + [x448Bytes]byte{ + 0x88, 0x4a, 0x02, 0x57, 0x62, 0x39, 0xff, 0x7a, + 0x2f, 0x2f, 0x63, 0xb2, 0xdb, 0x6a, 0x9f, 0xf3, + 0x70, 0x47, 0xac, 0x13, 0x56, 0x8e, 0x1e, 0x30, + 0xfe, 0x63, 0xc4, 0xa7, 0xad, 0x1b, 0x3e, 0xe3, + 0xa5, 0x70, 0x0d, 0xf3, 0x43, 0x21, 0xd6, 0x20, + 0x77, 0xe6, 0x36, 0x33, 0xc5, 0x75, 0xc1, 0xc9, + 0x54, 0x51, 0x4e, 0x99, 0xda, 0x7c, 0x17, 0x9d, + }, + }, + } + + var out [x448Bytes]byte + for i, vec := range vectors { + ret := ScalarMult(&out, &vec.scalar, &vec.base) + if ret != 0 { + t.Errorf("KAT[%d]: ScalarMultiply failed", i) + } + if !bytes.Equal(out[:], vec.answer[:]) { + t.Errorf("KAT[%d]: Mismatch", i) + } + } +} + +func TestX448IETFDraft(t *testing.T) { + // Run the other test vectors from 5.2 of the IETF draft. + + // WARNING: The full version of the test will easily take longer than the + // default 10 min test timeout, even on a moderately powerful box. + // + // Unless reallyRunSlowTest is set in the source code, it will cowardly + // refuse to run the full 1 million iterations, and the `go test` + // timeout will need to be increased (`go test -timeout 30m`). + + var k, u, out [x448Bytes]byte + copy(k[:], basePoint[:]) + copy(u[:], basePoint[:]) + + for i := 0; i < 1000000; i++ { + ret := ScalarMult(&out, &k, &u) + if ret != 0 { + t.Fatalf("Iterated[%d]: ScalarMultiply failed", i) + } + switch i + 1 { + case 1: + known, _ := hex.DecodeString("3f482c8a9f19b01e6c46ee9711d9dc14fd4bf67af30765c2ae2b846a4d23a8cd0db897086239492caf350b51f833868b9bc2b3bca9cf4113") + if !bytes.Equal(out[:], known) { + t.Fatalf("Iterated[%d]: Mismatch", i) + } + case 1000: + known, _ := hex.DecodeString("aa3b4749d55b9daf1e5b00288826c467274ce3ebbdd5c17b975e09d4af6c67cf10d087202db88286e2b79fceea3ec353ef54faa26e219f38") + if !bytes.Equal(out[:], known) { + t.Fatalf("Iterated[%d]: Mismatch", i) + } + if testing.Short() || !reallyRunSlowTest { + t.Skipf("Short test requested, skipping remaining, was correct at 1k") + } + } + copy(u[:], k[:]) + copy(k[:], out[:]) + } + known, _ := hex.DecodeString("077f453681caca3693198420bbe515cae0002472519b3e67661a7e89cab94695c8f4bcd66e61b9b9c946da8d524de3d69bd9d9d66b997e37") + if !bytes.Equal(k[:], known) { + t.Fatal("Final value mismatch") + } +} + +func TestCurve448(t *testing.T) { + alicePriv := [x448Bytes]byte{ + 0x9a, 0x8f, 0x49, 0x25, 0xd1, 0x51, 0x9f, 0x57, + 0x75, 0xcf, 0x46, 0xb0, 0x4b, 0x58, 0x00, 0xd4, + 0xee, 0x9e, 0xe8, 0xba, 0xe8, 0xbc, 0x55, 0x65, + 0xd4, 0x98, 0xc2, 0x8d, 0xd9, 0xc9, 0xba, 0xf5, + 0x74, 0xa9, 0x41, 0x97, 0x44, 0x89, 0x73, 0x91, + 0x00, 0x63, 0x82, 0xa6, 0xf1, 0x27, 0xab, 0x1d, + 0x9a, 0xc2, 0xd8, 0xc0, 0xa5, 0x98, 0x72, 0x6b, + } + + alicePub := [x448Bytes]byte{ + 0x9b, 0x08, 0xf7, 0xcc, 0x31, 0xb7, 0xe3, 0xe6, + 0x7d, 0x22, 0xd5, 0xae, 0xa1, 0x21, 0x07, 0x4a, + 0x27, 0x3b, 0xd2, 0xb8, 0x3d, 0xe0, 0x9c, 0x63, + 0xfa, 0xa7, 0x3d, 0x2c, 0x22, 0xc5, 0xd9, 0xbb, + 0xc8, 0x36, 0x64, 0x72, 0x41, 0xd9, 0x53, 0xd4, + 0x0c, 0x5b, 0x12, 0xda, 0x88, 0x12, 0x0d, 0x53, + 0x17, 0x7f, 0x80, 0xe5, 0x32, 0xc4, 0x1f, 0xa0, + } + + bobPriv := [x448Bytes]byte{ + 0x1c, 0x30, 0x6a, 0x7a, 0xc2, 0xa0, 0xe2, 0xe0, + 0x99, 0x0b, 0x29, 0x44, 0x70, 0xcb, 0xa3, 0x39, + 0xe6, 0x45, 0x37, 0x72, 0xb0, 0x75, 0x81, 0x1d, + 0x8f, 0xad, 0x0d, 0x1d, 0x69, 0x27, 0xc1, 0x20, + 0xbb, 0x5e, 0xe8, 0x97, 0x2b, 0x0d, 0x3e, 0x21, + 0x37, 0x4c, 0x9c, 0x92, 0x1b, 0x09, 0xd1, 0xb0, + 0x36, 0x6f, 0x10, 0xb6, 0x51, 0x73, 0x99, 0x2d, + } + + bobPub := [x448Bytes]byte{ + 0x3e, 0xb7, 0xa8, 0x29, 0xb0, 0xcd, 0x20, 0xf5, + 0xbc, 0xfc, 0x0b, 0x59, 0x9b, 0x6f, 0xec, 0xcf, + 0x6d, 0xa4, 0x62, 0x71, 0x07, 0xbd, 0xb0, 0xd4, + 0xf3, 0x45, 0xb4, 0x30, 0x27, 0xd8, 0xb9, 0x72, + 0xfc, 0x3e, 0x34, 0xfb, 0x42, 0x32, 0xa1, 0x3c, + 0xa7, 0x06, 0xdc, 0xb5, 0x7a, 0xec, 0x3d, 0xae, + 0x07, 0xbd, 0xc1, 0xc6, 0x7b, 0xf3, 0x36, 0x09, + } + + aliceBob := [x448Bytes]byte{ + 0x07, 0xff, 0xf4, 0x18, 0x1a, 0xc6, 0xcc, 0x95, + 0xec, 0x1c, 0x16, 0xa9, 0x4a, 0x0f, 0x74, 0xd1, + 0x2d, 0xa2, 0x32, 0xce, 0x40, 0xa7, 0x75, 0x52, + 0x28, 0x1d, 0x28, 0x2b, 0xb6, 0x0c, 0x0b, 0x56, + 0xfd, 0x24, 0x64, 0xc3, 0x35, 0x54, 0x39, 0x36, + 0x52, 0x1c, 0x24, 0x40, 0x30, 0x85, 0xd5, 0x9a, + 0x44, 0x9a, 0x50, 0x37, 0x51, 0x4a, 0x87, 0x9d, + } + + var out [x448Bytes]byte + ret := ScalarBaseMult(&out, &alicePriv) + if ret != 0 { + t.Error("Alice: ScalarBaseMult failed") + } + if !bytes.Equal(out[:], alicePub[:]) { + t.Error("Alice: ScalarBaseMult Mismatch") + } + ret = ScalarBaseMult(&out, &bobPriv) + if ret != 0 { + t.Error("Bob: ScalarBaseMult failed") + } + if !bytes.Equal(out[:], bobPub[:]) { + t.Error("Bob: ScalarBaseMult Mismatch") + } + ret = ScalarMult(&out, &bobPriv, &alicePub) + if ret != 0 { + t.Error("Bob: ScalarMult failed") + } + if !bytes.Equal(out[:], aliceBob[:]) { + t.Error("Bob: ScalarMult Mismatch") + } + ret = ScalarMult(&out, &alicePriv, &bobPub) + if ret != 0 { + t.Error("Alice: ScalarMult failed") + } + if !bytes.Equal(out[:], aliceBob[:]) { + t.Error("Alice: ScalarMult Mismatch") + } +} + +func BenchmarkECDH(b *testing.B) { + var sa, sb, pa, pb, ab, ba [x448Bytes]byte + ret := 0 + + rand.Read(sa[:]) + rand.Read(sb[:]) + b.ResetTimer() + b.StopTimer() + for i := 0; i < b.N; i++ { + ret |= ScalarBaseMult(&pa, &sa) + ret |= ScalarBaseMult(&pb, &sb) + b.StartTimer() + ret |= ScalarMult(&ab, &sa, &pb) + b.StopTimer() + ret |= ScalarMult(&ba, &sb, &pa) + if !bytes.Equal(ab[:], ba[:]) { + b.Fatal("Alice/Bob: Mismatch") + } + copy(sa[:], pa[:]) + copy(sb[:], pb[:]) + } +}