1
0
mirror of https://github.com/henrydcase/nobs.git synced 2024-11-22 23:28:57 +00:00
nobs/dh/csidh/csidh.go
Kris Kwiatkowski 7efbbf4745 cSIDH-511: (#26)
Implementation of Commutative Supersingular Isogeny Diffie Hellman,
based on "A faster way to CSIDH" paper (2018/782).

* For fast isogeny calculation, implementation converts a curve from
  Montgomery to Edwards. All calculations are done on Edwards curve
  and then converted back to Montgomery.
* As multiplication in a field Fp511 is most expensive operation
  the implementation contains multiple multiplications. It has
  most performant, assembly implementation which uses BMI2 and
  ADOX/ADCX instructions for modern CPUs. It also contains
  slower implementation which will run on older CPUs

* Benchmarks (Intel SkyLake):

  BenchmarkGeneratePrivate   	    6459	    172213 ns/op	       0 B/op	       0 allocs/op
  BenchmarkGenerateKeyPair   	      25	  45800356 ns/op	       0 B/op	       0 allocs/op
  BenchmarkValidate          	     297	   3915983 ns/op	       0 B/op	       0 allocs/op
  BenchmarkValidateRandom    	  184683	      6231 ns/op	       0 B/op	       0 allocs/op
  BenchmarkValidateGenerated 	      25	  48481306 ns/op	       0 B/op	       0 allocs/op
  BenchmarkDerive            	      19	  60928763 ns/op	       0 B/op	       0 allocs/op
  BenchmarkDeriveGenerated   	       8	 137342421 ns/op	       0 B/op	       0 allocs/op
  BenchmarkXMul              	    2311	    494267 ns/op	       1 B/op	       0 allocs/op
  BenchmarkXAdd              	 2396754	       501 ns/op	       0 B/op	       0 allocs/op
  BenchmarkXDbl              	 2072690	       571 ns/op	       0 B/op	       0 allocs/op
  BenchmarkIsom              	   78004	     15171 ns/op	       0 B/op	       0 allocs/op
  BenchmarkFp512Sub          	224635152	         5.33 ns/op	       0 B/op	       0 allocs/op
  BenchmarkFp512Mul          	246633255	         4.90 ns/op	       0 B/op	       0 allocs/op
  BenchmarkCSwap             	233228547	         5.10 ns/op	       0 B/op	       0 allocs/op
  BenchmarkAddRdc            	87348240	        12.6 ns/op	       0 B/op	       0 allocs/op
  BenchmarkSubRdc            	95112787	        11.7 ns/op	       0 B/op	       0 allocs/op
  BenchmarkModExpRdc         	   25436	     46878 ns/op	       0 B/op	       0 allocs/op
  BenchmarkMulBmiAsm         	19527573	        60.1 ns/op	       0 B/op	       0 allocs/op
  BenchmarkMulGeneric        	 7117650	       164 ns/op	       0 B/op	       0 allocs/op

* Go code has very similar performance when compared to C
  implementation.
  Results from sidh_torturer (4e2996e12d68364761064341cbe1d1b47efafe23)
  github.com:henrydcase/sidh-torture/csidh

  | TestName         |Go        | C        |
  |------------------|----------|----------|
  |TestSharedSecret  | 57.95774 | 57.91092 |
  |TestKeyGeneration | 62.23614 | 58.12980 |
  |TestSharedSecret  | 55.28988 | 57.23132 |
  |TestKeyGeneration | 61.68745 | 58.66396 |
  |TestSharedSecret  | 63.19408 | 58.64774 |
  |TestKeyGeneration | 62.34022 | 61.62539 |
  |TestSharedSecret  | 62.85453 | 68.74503 |
  |TestKeyGeneration | 52.58518 | 58.40115 |
  |TestSharedSecret  | 50.77081 | 61.91699 |
  |TestKeyGeneration | 59.91843 | 61.09266 |
  |TestSharedSecret  | 59.97962 | 62.98151 |
  |TestKeyGeneration | 64.57525 | 56.22863 |
  |TestSharedSecret  | 56.40521 | 55.77447 |
  |TestKeyGeneration | 67.85850 | 58.52604 |
  |TestSharedSecret  | 60.54290 | 65.14052 |
  |TestKeyGeneration | 65.45766 | 58.42823 |

  On average Go implementation is 2% faster.
2019-11-25 15:03:29 +00:00

308 lines
5.9 KiB
Go

package csidh
import (
"io"
)
// 511-bit number representing prime field element GF(p)
type fp [numWords]uint64
// Represents projective point on elliptic curve E over fp
type point struct {
x fp
z fp
}
// Curve coefficients
type coeff struct {
a fp
c fp
}
type fpRngGen struct {
// working buffer needed to avoid memory allocation
wbuf [64]byte
}
// Defines operations on public key
type PublicKey struct {
fpRngGen
// Montgomery coefficient: represents y^2 = x^3 + Ax^2 + x
a fp
}
// Defines operations on private key
type PrivateKey struct {
fpRngGen
e [PrivateKeySize]int8
}
// randFp generates random element from Fp
func (s *fpRngGen) randFp(v *fp, rng io.Reader) {
mask := uint64(1<<(pbits%limbBitSize)) - 1
for {
*v = fp{}
_, err := io.ReadFull(rng, s.wbuf[:])
if err != nil {
panic("Can't read random number")
}
for i := 0; i < len(s.wbuf); i++ {
j := i / limbByteSize
k := uint(i % 8)
v[j] |= uint64(s.wbuf[i]) << (8 * k)
}
v[len(v)-1] &= mask
if isLess(v, &p) {
return
}
}
}
func cofactorMultiples(p *point, a *coeff, halfL, halfR int, order *fp) (bool, bool) {
var Q point
var r1, d1, r2, d2 bool
if (halfR - halfL) == 1 {
if !p.z.isZero() {
var tmp = fp{primes[halfL]}
xMul512(p, p, a, &tmp)
if !p.z.isZero() {
// order does not divide p+1
return false, true
}
mul512(order, order, primes[halfL])
if sub512(&tmp, &fourSqrtP, order) == 1 {
// order > 4*sqrt(p) -> supersingular
return true, true
}
}
return false, false
}
// perform another recursive step
mid := halfL + ((halfR - halfL + 1) / 2)
var mulL, mulR = fp{1}, fp{1}
for i := halfL; i < mid; i++ {
mul512(&mulR, &mulR, primes[i])
}
for i := mid; i < halfR; i++ {
mul512(&mulL, &mulL, primes[i])
}
xMul512(&Q, p, a, &mulR)
xMul512(p, p, a, &mulL)
r1, d1 = cofactorMultiples(&Q, a, mid, halfR, order)
r2, d2 = cofactorMultiples(p, a, halfL, mid, order)
return r1 || r2, d1 || d2
}
func groupAction(pub *PublicKey, prv *PrivateKey, rng io.Reader) {
var k [2]fp
var e [2][primeCount]uint8
var done = [2]bool{false, false}
var A = coeff{a: pub.a, c: one}
k[0][0] = 4
k[1][0] = 4
for i, v := range primes {
t := (prv.e[uint(i)>>1] << ((uint(i) % 2) * 4)) >> 4
if t > 0 {
e[0][i] = uint8(t)
e[1][i] = 0
mul512(&k[1], &k[1], v)
} else if t < 0 {
e[1][i] = uint8(-t)
e[0][i] = 0
mul512(&k[0], &k[0], v)
} else {
e[0][i] = 0
e[1][i] = 0
mul512(&k[0], &k[0], v)
mul512(&k[1], &k[1], v)
}
}
for {
var P point
var rhs fp
prv.randFp(&P.x, rng)
P.z = one
montEval(&rhs, &A.a, &P.x)
sign := rhs.isNonQuadRes()
if done[sign] {
continue
}
xMul512(&P, &P, &A, &k[sign])
done[sign] = true
for i, v := range primes {
if e[sign][i] != 0 {
var cof = fp{1}
var K point
for j := i + 1; j < len(primes); j++ {
if e[sign][j] != 0 {
mul512(&cof, &cof, primes[j])
}
}
xMul512(&K, &P, &A, &cof)
if !K.z.isZero() {
isom(&P, &A, &K, v)
e[sign][i] = e[sign][i] - 1
if e[sign][i] == 0 {
mul512(&k[sign], &k[sign], primes[i])
}
}
}
done[sign] = done[sign] && (e[sign][i] == 0)
}
modExpRdc512(&A.c, &A.c, &pMin1)
mulRdc(&A.a, &A.a, &A.c)
A.c = one
if done[0] && done[1] {
break
}
}
pub.a = A.a
}
// PrivateKey operations
func (c *PrivateKey) Import(key []byte) bool {
if len(key) < len(c.e) {
return false
}
for i, v := range key {
c.e[i] = int8(v)
}
return true
}
func (c PrivateKey) Export(out []byte) bool {
if len(out) < len(c.e) {
return false
}
for i, v := range c.e {
out[i] = byte(v)
}
return true
}
func GeneratePrivateKey(key *PrivateKey, rng io.Reader) error {
for i := range key.e {
key.e[i] = 0
}
for i := 0; i < len(primes); {
_, err := io.ReadFull(rng, key.wbuf[:])
if err != nil {
return err
}
for j := range key.wbuf {
if int8(key.wbuf[j]) <= expMax && int8(key.wbuf[j]) >= -expMax {
key.e[i>>1] |= int8((key.wbuf[j] & 0xF) << uint((i%2)*4))
i = i + 1
if i == len(primes) {
break
}
}
}
}
return nil
}
// Public key operations
// Assumes key is in Montgomery domain
func (c *PublicKey) Import(key []byte) bool {
if len(key) != numWords*limbByteSize {
return false
}
for i := 0; i < len(key); i++ {
j := i / limbByteSize
k := uint64(i % 8)
c.a[j] |= uint64(key[i]) << (8 * k)
}
return true
}
// Assumes key is exported as encoded in Montgomery domain
func (c *PublicKey) Export(out []byte) bool {
if len(out) != numWords*limbByteSize {
return false
}
for i := 0; i < len(out); i++ {
j := i / limbByteSize
k := uint64(i % 8)
out[i] = byte(c.a[j] >> (8 * k))
}
return true
}
func (c *PublicKey) reset() {
for i := range c.a {
c.a[i] = 0
}
}
func GeneratePublicKey(pub *PublicKey, prv *PrivateKey, rng io.Reader) {
pub.reset()
groupAction(pub, prv, rng)
}
// Validate does public key validation. It returns true if
// a 'pub' is a valid cSIDH public key, otherwise false.
func Validate(pub *PublicKey, rng io.Reader) bool {
// Check if in range
if !isLess(&pub.a, &p) {
return false
}
// j-invariant for montgomery curves is something like
// j = (256*(A^3-3)^3)/(A^2 - 4), so any |A| = 2 is invalid
if pub.a.equal(&two) || pub.a.equal(&twoNeg) {
return false
}
// P must have big enough order to prove supersingularity. The
// probability that this loop will be repeated is negligible.
for {
var P point
var A = point{pub.a, one}
pub.randFp(&P.x, rng)
P.z = one
xDbl(&P, &P, &A)
xDbl(&P, &P, &A)
res, done := cofactorMultiples(&P, &coeff{A.x, A.z}, 0, len(primes), &fp{1})
if done {
return res
}
}
}
// DeriveSecret computes a cSIDH shared secret. If successful, returns true
// and fills 'out' with shared secret. Function returns false in case 'pub' is invalid.
func DeriveSecret(out *[64]byte, pub *PublicKey, prv *PrivateKey, rng io.Reader) bool {
if !Validate(pub, rng) {
return false
}
groupAction(pub, prv, rng)
pub.Export(out[:])
return true
}