1
0
mirror of https://github.com/henrydcase/nobs.git synced 2024-11-26 00:51:22 +00:00

cSIDH-511:

Implementation of Commutative Supersingular Isogeny Diffie Hellman,
based on "A faster way to CSIDH" paper (2018/782).

* For fast isogeny calculation, implementation converts a curve from
  Montgomery to Edwards. All calculations are done on Edwards curve
  and then converted back to Montgomery.
* As multiplication in a field Fp511 is most expensive operation
  the implementation contains multiple multiplications. It has
  most performant, assembly implementation which uses BMI2 and
  ADOX/ADCX instructions for modern CPUs. It also contains
  slower implementation which will run on older CPUs

Benchmarks (Intel SkyLake):
----------
BenchmarkGeneratePrivate   	    6459	    172213 ns/op	       0 B/op	       0 allocs/op
BenchmarkGenerateKeyPair   	      25	  45800356 ns/op	       0 B/op	       0 allocs/op
BenchmarkValidate          	     297	   3915983 ns/op	       0 B/op	       0 allocs/op
BenchmarkValidateRandom    	  184683	      6231 ns/op	       0 B/op	       0 allocs/op
BenchmarkValidateGenerated 	      25	  48481306 ns/op	       0 B/op	       0 allocs/op
BenchmarkDerive            	      19	  60928763 ns/op	       0 B/op	       0 allocs/op
BenchmarkDeriveGenerated   	       8	 137342421 ns/op	       0 B/op	       0 allocs/op
BenchmarkXMul              	    2311	    494267 ns/op	       1 B/op	       0 allocs/op
BenchmarkXAdd              	 2396754	       501 ns/op	       0 B/op	       0 allocs/op
BenchmarkXDbl              	 2072690	       571 ns/op	       0 B/op	       0 allocs/op
BenchmarkIsom              	   78004	     15171 ns/op	       0 B/op	       0 allocs/op
BenchmarkFp512Sub          	224635152	         5.33 ns/op	       0 B/op	       0 allocs/op
BenchmarkFp512Mul          	246633255	         4.90 ns/op	       0 B/op	       0 allocs/op
BenchmarkCSwap             	233228547	         5.10 ns/op	       0 B/op	       0 allocs/op
BenchmarkAddRdc            	87348240	        12.6 ns/op	       0 B/op	       0 allocs/op
BenchmarkSubRdc            	95112787	        11.7 ns/op	       0 B/op	       0 allocs/op
BenchmarkModExpRdc         	   25436	     46878 ns/op	       0 B/op	       0 allocs/op
BenchmarkMulBmiAsm         	19527573	        60.1 ns/op	       0 B/op	       0 allocs/op
BenchmarkMulGeneric        	 7117650	       164 ns/op	       0 B/op	       0 allocs/op
This commit is contained in:
Henry Case 2019-04-09 17:15:12 +01:00 committed by Henry Case
parent 6f9706df01
commit 1e91fe8a91
16 changed files with 4980 additions and 3 deletions

View File

@ -50,6 +50,12 @@ make_dirs:
test: clean make_dirs $(addprefix prep-,$(TARGETS)) test: clean make_dirs $(addprefix prep-,$(TARGETS))
cd $(GOPATH_LOCAL); $(OPTS_ENV) GOPATH=$(GOPATH_LOCAL) go test $(OPTS) $(TEST_PATH) cd $(GOPATH_LOCAL); $(OPTS_ENV) GOPATH=$(GOPATH_LOCAL) go test $(OPTS) $(TEST_PATH)
test_csidh: clean make_dirs $(addprefix prep-,$(TARGETS))
cd $(GOPATH_LOCAL); $(OPTS_ENV) GOPATH=$(GOPATH_LOCAL) go test $(OPTS) github.com/henrydcase/nobs/dh/csidh
test_csidh_bin: clean make_dirs $(addprefix prep-,$(TARGETS))
cd $(GOPATH_LOCAL); $(OPTS_ENV) GOPATH=$(GOPATH_LOCAL) go test -c $(OPTS) github.com/henrydcase/nobs/dh/csidh
cover: cover:
cd $(GOPATH_LOCAL); $(OPTS_ENV) GOPATH=$(GOPATH_LOCAL) go test \ cd $(GOPATH_LOCAL); $(OPTS_ENV) GOPATH=$(GOPATH_LOCAL) go test \
-race -coverprofile=coverage_$(NOASM).txt -covermode=atomic $(OPTS) $(TEST_PATH) -race -coverprofile=coverage_$(NOASM).txt -covermode=atomic $(OPTS) $(TEST_PATH)
@ -59,6 +65,10 @@ bench: clean $(addprefix prep-,$(TARGETS))
cd $(GOPATH_LOCAL); GOCACHE=$(GOCACHE) GOPATH=$(GOPATH_LOCAL) $(GO) test \ cd $(GOPATH_LOCAL); GOCACHE=$(GOCACHE) GOPATH=$(GOPATH_LOCAL) $(GO) test \
$(BENCH_OPTS) $(TEST_PATH) $(BENCH_OPTS) $(TEST_PATH)
bench_csidh: clean $(addprefix prep-,$(TARGETS))
cd $(GOPATH_LOCAL); GOCACHE=$(GOCACHE) GOPATH=$(GOPATH_LOCAL) $(GO) test \
$(BENCH_OPTS) github.com/henrydcase/nobs/dh/csidh
clean: clean:
rm -rf $(GOPATH_LOCAL) rm -rf $(GOPATH_LOCAL)
rm -rf $(VENDOR_DIR) rm -rf $(VENDOR_DIR)

104
dh/csidh/consts.go Normal file
View File

@ -0,0 +1,104 @@
package csidh
const (
// pbits is a bitsize of prime p
pbits = 511
// primeCount number of Elkies primes used for constructing p
primeCount = 74
// (2*5+1)^74 is roughly 2^256
expMax = int8(5)
// size of the limbs, pretty much hardcoded to 64-bit words
limbBitSize = 64
// size of the limbs in bytes
limbByteSize = limbBitSize >> 3
// Number of limbs for a field element
numWords = 8
// PrivateKeySize is a size of cSIDH/512 private key in bytes.
PrivateKeySize = 37
// PublicKeySize is a size of cSIDH/512 public key in bytes.
PublicKeySize = 64
// SharedSecretSize is a size of cSIDH/512 shared secret in bytes.
SharedSecretSize = 64
)
var (
// Elkies primes up to 374 + prime 587
// p = 4 * product(primes) - 1
primes = [primeCount]uint64{
0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, 0x0017, 0x001D, 0x001F, 0x0025,
0x0029, 0x002B, 0x002F, 0x0035, 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053,
0x0059, 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083, 0x0089, 0x008B,
0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5,
0x00C7, 0x00D3, 0x00DF, 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, 0x0139, 0x013D, 0x014B,
0x0151, 0x015B, 0x015D, 0x0161, 0x0167, 0x016F, 0x0175, 0x024B}
p = fp{
0x1B81B90533C6C87B, 0xC2721BF457ACA835,
0x516730CC1F0B4F25, 0xA7AAC6C567F35507,
0x5AFBFCC69322C9CD, 0xB42D083AEDC88C42,
0xFC8AB0D15E3E4C4A, 0x65B48E8F740F89BF,
}
/* Montgomery R = 2^512 mod p */
one = fp{
0xC8FC8DF598726F0A, 0x7B1BC81750A6AF95,
0x5D319E67C1E961B4, 0xB0AA7275301955F1,
0x4A080672D9BA6C64, 0x97A5EF8A246EE77B,
0x06EA9E5D4383676A, 0x3496E2E117E0EC80,
}
// 2 in Montgomery domain
two = fp{
0x767762E5FD1E1599, 0x33C5743A49A0B6F6,
0x68FC0C0364C77443, 0xB9AA1E24F83F56DB,
0x3914101F20520EFB, 0x7B1ED6D95B1542B4,
0x114A8BE928C8828A, 0x03793732BBB24F40,
}
// -2 in Montgomery domain
twoNeg = fp{
0xA50A561F36A8B2E2, 0x8EACA7BA0E0BF13E,
0xE86B24C8BA43DAE2, 0xEE00A8A06FB3FE2B,
0x21E7ECA772D0BAD1, 0x390E316192B3498E,
0xEB4024E83575C9C0, 0x623B575CB85D3A7F,
}
// 4 in Montgomery domain
four = fp{
0xECEEC5CBFA3C2B32, 0x678AE87493416DEC,
0xD1F81806C98EE886, 0x73543C49F07EADB6,
0x7228203E40A41DF7, 0xF63DADB2B62A8568,
0x229517D251910514, 0x06F26E6577649E80,
}
// 4 * sqrt(p)
fourSqrtP = fp{
0x17895E71E1A20B3F, 0x38D0CD95F8636A56,
0x142B9541E59682CD, 0x856F1399D91D6592,
0x0000000000000002,
}
// -p^-1 mod 2^64
pNegInv = fp{
0x66c1301f632e294d,
}
// (p-1)/2. Used as exponent, hence not in
// montgomery domain
pMin1By2 = fp{
0x8DC0DC8299E3643D, 0xE1390DFA2BD6541A,
0xA8B398660F85A792, 0xD3D56362B3F9AA83,
0x2D7DFE63499164E6, 0x5A16841D76E44621,
0xFE455868AF1F2625, 0x32DA4747BA07C4DF,
}
// p-1 mod 2^64. Used as exponent, hence not
// in montgomery domain
pMin1 = fp{
0x1B81B90533C6C879, 0xC2721BF457ACA835,
0x516730CC1F0B4F25, 0xA7AAC6C567F35507,
0x5AFBFCC69322C9CD, 0xB42D083AEDC88C42,
0xFC8AB0D15E3E4C4A, 0x65B48E8F740F89BF,
}
)

333
dh/csidh/csidh.go Normal file
View File

@ -0,0 +1,333 @@
package csidh
import (
"io"
)
// 511-bit number representing prime field element GF(p)
type fp [numWords]uint64
// Represents projective point on elliptic curve E over fp
type point struct {
x fp
z fp
}
// Curve coefficients
type coeff struct {
a fp
c fp
}
// Defines operations on public key
type PublicKey struct {
// Montgomery coefficient: represents y^2 = x^3 + Ax^2 + x
a fp
}
// Defines operations on private key
type PrivateKey struct {
e [PrivateKeySize]int8
// working buffer needed to avoid memory allocation
wbuf [64]byte
}
// working buffer needed to avoid memory allocation
var wbuf [limbByteSize]byte
// TODO: this is weird. How do I know loop will end?
func randFp(v *fp, rng io.Reader) {
mask := uint64(1<<(pbits%limbBitSize)) - 1
for {
*v = fp{}
if _, err := io.ReadFull(rng, wbuf[:]); err != nil {
panic("Can't read random number")
}
for i := 0; i < len(wbuf); i++ {
j := i / limbByteSize
k := uint(i % 8)
v[j] |= uint64(wbuf[i]) << (8 * k)
}
v[len(v)-1] &= mask
if isLess(v, &p) {
return
}
}
}
// evaluates x^3 + Ax^2 + x
func montEval(res, A, x *fp) {
var t fp
*res = *x
mulRdc(res, res, res)
mulRdc(&t, A, x)
addRdc(res, res, &t)
addRdc(res, res, &one)
mulRdc(res, res, x)
}
// Assumes lower<upper
// TODO: non constant time
// TODO: this needs to be rewritten - function called recursivelly
/* compute [(p+1)/l] P for all l in our list of primes. */
/* divide and conquer is much faster than doing it naively,
* but uses more memory. */
func cofactorMultiples(p, a *point, halfL, halfR int, order *fp) (bool, bool) {
var A coeff = coeff{a.x, a.z}
if (halfR - halfL) == 1 {
if !p.z.isZero() {
var tmp = fp{primes[halfL]}
xMul512(p, p, &A, &tmp)
if !p.z.isZero() {
// order does not divide p+1
return false, true
}
mul512(order, order, primes[halfL])
if sub512(&tmp, &fourSqrtP, order) == 1 {
// order > 4*sqrt(p) -> supersingular
return true, true
}
}
return false, false
}
// perform another recursive step
mid := halfL + ((halfR - halfL + 1) / 2)
var mulL, mulR = fp{1}, fp{1}
for i := halfL; i < mid; i++ {
mul512(&mulR, &mulR, primes[i])
}
for i := mid; i < halfR; i++ {
mul512(&mulL, &mulL, primes[i])
}
var Q point
xMul512(&Q, p, &A, &mulR)
xMul512(p, p, &A, &mulL)
// TODO: make it in for loop instead of calling a function
// it won't need to do returns.
var r1, d1, r2, d2 bool
r1, d1 = cofactorMultiples(&Q, a, mid, halfR, order)
r2, d2 = cofactorMultiples(p, a, halfL, mid, order)
return r1 || r2, d1 || d2
}
// PrivateKey operations
func (c *PrivateKey) Import(key []byte) bool {
if len(key) < len(c.e) {
return false
}
for i, v := range key {
c.e[i] = int8(v)
}
return true
}
func (c PrivateKey) Export(out []byte) bool {
if len(out) < len(c.e) {
return false
}
for i, v := range c.e {
out[i] = byte(v)
}
return true
}
func GeneratePrivateKey(key *PrivateKey, rng io.Reader) error {
for i, _ := range key.e {
key.e[i] = 0
}
for i := 0; i < len(primes); {
_, err := io.ReadFull(rng, key.wbuf[:])
if err != nil {
return err
}
for j, _ := range key.wbuf {
if int8(key.wbuf[j]) <= expMax && int8(key.wbuf[j]) >= -expMax {
key.e[i>>1] |= int8((key.wbuf[j] & 0xF) << uint((i%2)*4))
i = i + 1
if i == len(primes) {
break
}
}
}
}
return nil
}
// Public key operations
// Assumes key is in Montgomery domain
func (c *PublicKey) Import(key []byte) bool {
if len(key) != numWords*limbByteSize {
return false
}
for i := 0; i < len(key); i++ {
j := i / limbByteSize
k := uint64(i % 8)
c.a[j] |= uint64(key[i]) << (8 * k)
}
return true
}
// Assumes key is exported as encoded in Montgomery domain
func (c *PublicKey) Export(out []byte) bool {
if len(out) != numWords*limbByteSize {
return false
}
for i := 0; i < len(out); i++ {
j := i / limbByteSize
k := uint64(i % 8)
out[i] = byte(c.a[j] >> (8 * k))
}
return true
}
func (c *PublicKey) reset() {
for i := range c.a {
c.a[i] = 0
}
}
func groupAction(pub *PublicKey, prv *PrivateKey, rng io.Reader) {
var k [2]fp
var e [2][primeCount]uint8
var done = [2]bool{false, false}
var A = coeff{a: pub.a, c: one}
k[0][0] = 4
k[1][0] = 4
for i, v := range primes {
t := int8((prv.e[uint(i)>>1] << ((uint(i) % 2) * 4)) >> 4)
if t > 0 {
e[0][i] = uint8(t)
e[1][i] = 0
mul512(&k[1], &k[1], v)
} else if t < 0 {
e[1][i] = uint8(-t)
e[0][i] = 0
mul512(&k[0], &k[0], v)
} else {
e[0][i] = 0
e[1][i] = 0
mul512(&k[0], &k[0], v)
mul512(&k[1], &k[1], v)
}
}
for {
var P point
var rhs fp
randFp(&P.x, rng)
P.z = one
montEval(&rhs, &A.a, &P.x)
sign := rhs.isNonQuadRes()
if done[sign] {
continue
}
xMul512(&P, &P, &A, &k[sign])
done[sign] = true
for i, v := range primes {
if e[sign][i] != 0 {
var cof = fp{1}
var K point
for j := i + 1; j < len(primes); j++ {
if e[sign][j] != 0 {
mul512(&cof, &cof, primes[j])
}
}
xMul512(&K, &P, &A, &cof)
if !K.z.isZero() {
isom(&P, &A, &K, v)
e[sign][i] = e[sign][i] - 1
if e[sign][i] == 0 {
mul512(&k[sign], &k[sign], primes[i])
}
}
}
done[sign] = done[sign] && (e[sign][i] == 0)
}
modExpRdc512(&A.c, &A.c, &pMin1)
mulRdc(&A.a, &A.a, &A.c)
A.c = one
if done[0] && done[1] {
break
}
}
pub.a = A.a
}
func GeneratePublicKey(pub *PublicKey, prv *PrivateKey, rng io.Reader) {
pub.reset()
groupAction(pub, prv, rng)
}
// Validate does public key validation. It returns true if
// a 'pub' is a valid cSIDH public key, otherwise false.
func Validate(pub *PublicKey, rng io.Reader) bool {
var tmp fp
// Check if in range
if sub512(&tmp, &p, &pub.a) == 1 {
return false
}
if pub.a.equal(&p) {
return false
}
// j-invariant for montgomery curves is something like
// j = (256*(A^3-3)^3)/(A^2 - 4), so any |A| = 2 is invalid
if pub.a.equal(&two) || pub.a.equal(&twoNeg) {
return false
}
// P must have big enough order to prove supersingularity. The
// probability that this loop will be repeated is negligible.
for {
var P point
var A point = point{pub.a, one}
randFp(&P.x, rng)
P.z = one
xDbl(&P, &P, &A)
xDbl(&P, &P, &A)
var order = fp{1}
res, done := cofactorMultiples(&P, &A, 0, len(primes), &order)
if done {
return res
}
}
return false
}
func DeriveSecret(out []byte, pub *PublicKey, prv *PrivateKey, rng io.Reader) bool {
if !Validate(pub, rng) {
randFp(&pub.a, rng)
return false
}
groupAction(pub, prv, rng)
pub.Export(out)
return true
}

371
dh/csidh/csidh_test.go Normal file
View File

@ -0,0 +1,371 @@
package csidh
import (
"bytes"
"encoding/hex"
"encoding/json"
"fmt"
"os"
"testing"
crand "crypto/rand"
"github.com/henrydcase/nobs/drbg"
)
// Possible values for "Status"
const (
Valid = iota // Indicates that shared secret must be agreed correctly
ValidPublicKey2 // Public key 2 must succeed validation
InvalidSharedSecret // Calculated shared secret must be different than test vector
InvalidPublicKey1 // Public key 1 generated from private key must be different than test vector
InvalidPublicKey2 // Public key 2 must fail validation
)
var StatusValues = map[int]string{
Valid: "valid",
ValidPublicKey2: "valid_public_key2",
InvalidSharedSecret: "invalid_shared_secret",
InvalidPublicKey1: "invalid_public_key1",
InvalidPublicKey2: "invalid_public_key2",
}
type TestVector struct {
Id int `json:"Id"`
Pk1 string `json:"Pk1"`
Pr1 string `json:"Pr1"`
Pk2 string `json:"Pk2"`
Ss string `json:"Ss"`
Status string `json:"status"`
}
type TestVectors struct {
Vectors []TestVector `json:"Vectors"`
}
var rng *drbg.CtrDrbg
func init() {
var tmp [32]byte
// Init drbg
rng = drbg.NewCtrDrbg()
crand.Read(tmp[:])
if !rng.Init(tmp[:], nil) {
panic("Can't initialize DRBG")
}
}
func TestCompare64(t *testing.T) {
const s uint64 = 0xFFFFFFFFFFFFFFFF
var val1 = fp{0, 2, 3, 4, 5, 6, 7, 8}
var val2 = fp{s, s, s, s, s, s, s, s}
var fp fp
if !fp.isZero() {
t.Errorf("isZero returned true, where it should be false")
}
if val1.isZero() {
t.Errorf("isZero returned false, where it should be true")
}
if val2.isZero() {
t.Errorf("isZero returned false, where it should be true")
}
}
func TestEphemeralKeyExchange(t *testing.T) {
var ss1, ss2 [64]byte
var prv1, prv2 PrivateKey
var pub1, pub2 PublicKey
prv_bytes1 := []byte{0xaa, 0x54, 0xe4, 0xd4, 0xd0, 0xbd, 0xee, 0xcb, 0xf4, 0xd0, 0xc2, 0xbc, 0x52, 0x44, 0x11, 0xee, 0xe1, 0x14, 0xd2, 0x24, 0xe5, 0x0, 0xcc, 0xf5, 0xc0, 0xe1, 0x1e, 0xb3, 0x43, 0x52, 0x45, 0xbe, 0xfb, 0x54, 0xc0, 0x55, 0xb2}
prv1.Import(prv_bytes1)
GeneratePublicKey(&pub1, &prv1, rng)
GeneratePrivateKey(&prv2, rng)
GeneratePublicKey(&pub2, &prv2, rng)
if !DeriveSecret(ss1[:], &pub1, &prv2, rng) {
t.Error("Derivation failed")
}
if !DeriveSecret(ss2[:], &pub2, &prv1, rng) {
t.Error("Derivation failed")
}
if !bytes.Equal(ss1[:], ss2[:]) {
fmt.Printf("%X\n", ss1)
fmt.Printf("%X\n", ss2)
t.Error("ss1 != ss2")
}
}
func TestPrivateKeyExportImport(t *testing.T) {
var buf [37]byte
for i := 0; i < 100; i++ {
var prv1, prv2 PrivateKey
GeneratePrivateKey(&prv1, rng)
prv1.Export(buf[:])
prv2.Import(buf[:])
for i := 0; i < len(prv1.e); i++ {
if prv1.e[i] != prv2.e[i] {
t.Error("Error occured when public key export/import")
}
}
}
}
func TestPublicKeyExportImport(t *testing.T) {
var buf [64]byte
eq64 := func(x, y []uint64) bool {
for i, _ := range x {
if x[i] != y[i] {
return false
}
}
return true
}
for i := 0; i < 10; i++ {
var prv PrivateKey
var pub1, pub2 PublicKey
GeneratePrivateKey(&prv, rng)
GeneratePublicKey(&pub1, &prv, rng)
pub1.Export(buf[:])
pub2.Import(buf[:])
if !eq64(pub1.a[:], pub2.a[:]) {
t.Error("Error occured when public key export/import")
}
}
}
// Test vectors generated by reference implementation
func TestKAT(t *testing.T) {
var tests TestVectors
// Helper checks if e==true and reports an error if not.
checkExpr := func(e bool, vec *TestVector, t *testing.T, msg string) {
t.Helper()
if !e {
t.Errorf("[Test ID=%d] "+msg, vec.Id)
}
}
// checkSharedSecret implements nominal case - imports asymmetric keys for
// both parties, derives secret key and compares it to value in test vector.
// Comparision must succeed in case status is "Valid" in any other case
// it must fail.
checkSharedSecret := func(vec *TestVector, t *testing.T, status int) {
var prv1 PrivateKey
var pub1, pub2 PublicKey
var ss [SharedSecretSize]byte
prBuf, err := hex.DecodeString(vec.Pr1)
if err != nil {
t.Fatal(err)
}
checkExpr(
prv1.Import(prBuf[:]),
vec, t, "PrivateKey wrong")
pkBuf, err := hex.DecodeString(vec.Pk1)
if err != nil {
t.Fatal(err)
}
checkExpr(
pub1.Import(pkBuf[:]),
vec, t, "PublicKey 1 wrong")
pkBuf, err = hex.DecodeString(vec.Pk2)
if err != nil {
t.Fatal(err)
}
checkExpr(
pub2.Import(pkBuf[:]),
vec, t, "PublicKey 2 wrong")
checkExpr(
DeriveSecret(ss[:], &pub2, &prv1, rng),
vec, t, "Error when deriving key")
ssExp, err := hex.DecodeString(vec.Ss)
if err != nil {
t.Fatal(err)
}
checkExpr(
bytes.Equal(ss[:], ssExp) == (status == Valid),
vec, t, "Unexpected value of shared secret")
}
// checkPublicKey1 imports public and private key for one party A
// and tries to generate public key for a private key. After that
// it compares generated key to a key from test vector. Comparision
// must fail.
checkPublicKey1 := func(vec *TestVector, t *testing.T) {
var prv PrivateKey
var pub PublicKey
var pubBytesGot [PublicKeySize]byte
prBuf, err := hex.DecodeString(vec.Pr1)
if err != nil {
t.Fatal(err)
}
pubBytesExp, err := hex.DecodeString(vec.Pk1)
if err != nil {
t.Fatal(err)
}
checkExpr(
prv.Import(prBuf[:]),
vec, t, "PrivateKey wrong")
// Generate public key
GeneratePrivateKey(&prv, rng)
pub.Export(pubBytesGot[:])
// pubBytesGot must be different than pubBytesExp
checkExpr(
!bytes.Equal(pubBytesGot[:], pubBytesExp),
vec, t, "Public key generated is the same as public key from the test vector")
}
// checkPublicKey2 the goal is to test key validation. Test tries to
// import public key for B and ensure that import suceeds in case
// status is "Valid" and fails otherwise.
checkPublicKey2 := func(vec *TestVector, t *testing.T, status int) {
var pub PublicKey
pubBytesExp, err := hex.DecodeString(vec.Pk2)
if err != nil {
t.Fatal(err)
}
// Import validates an input, so it must fail
pub.Import(pubBytesExp[:])
checkExpr(
Validate(&pub, rng) == (status == Valid || status == ValidPublicKey2),
vec, t, "PublicKey has been validated correctly")
}
// Load test data
file, err := os.Open("testdata/csidh_testvectors.dat")
if err != nil {
t.Fatal(err.Error())
}
err = json.NewDecoder(file).Decode(&tests)
if err != nil {
t.Fatal(err.Error())
}
// Loop over all test cases
for _, test := range tests.Vectors {
switch test.Status {
case StatusValues[Valid]:
checkSharedSecret(&test, t, Valid)
checkPublicKey2(&test, t, Valid)
case StatusValues[InvalidSharedSecret]:
checkSharedSecret(&test, t, InvalidSharedSecret)
case StatusValues[InvalidPublicKey1]:
checkPublicKey1(&test, t)
case StatusValues[InvalidPublicKey2]:
checkPublicKey2(&test, t, InvalidPublicKey2)
case StatusValues[InvalidPublicKey2]:
checkPublicKey2(&test, t, InvalidPublicKey2)
case StatusValues[ValidPublicKey2]:
checkPublicKey2(&test, t, ValidPublicKey2)
}
}
}
var prv1, prv2 PrivateKey
// Private key generation
func BenchmarkGeneratePrivate(b *testing.B) {
for n := 0; n < b.N; n++ {
GeneratePrivateKey(&prv1, rng)
}
}
// Public key generation from private (group action on empty key)
func BenchmarkGenerateKeyPair(b *testing.B) {
for n := 0; n < b.N; n++ {
var pub PublicKey
GeneratePrivateKey(&prv1, rng)
GeneratePublicKey(&pub, &prv1, rng)
}
}
// Benchmark validation on same key multiple times
func BenchmarkValidate(b *testing.B) {
prvBytes := []byte{0xaa, 0x54, 0xe4, 0xd4, 0xd0, 0xbd, 0xee, 0xcb, 0xf4, 0xd0, 0xc2, 0xbc, 0x52, 0x44, 0x11, 0xee, 0xe1, 0x14, 0xd2, 0x24, 0xe5, 0x0, 0xcc, 0xf5, 0xc0, 0xe1, 0x1e, 0xb3, 0x43, 0x52, 0x45, 0xbe, 0xfb, 0x54, 0xc0, 0x55, 0xb2}
prv1.Import(prvBytes)
var pub PublicKey
GeneratePublicKey(&pub, &prv1, rng)
for n := 0; n < b.N; n++ {
Validate(&pub, rng)
}
}
// Benchmark validation on random (most probably wrong) key
func BenchmarkValidateRandom(b *testing.B) {
var tmp [64]byte
var pub PublicKey
// Initialize seed
for n := 0; n < b.N; n++ {
if _, err := rng.Read(tmp[:]); err != nil {
b.FailNow()
}
pub.Import(tmp[:])
}
}
// Benchmark validation on different keys
func BenchmarkValidateGenerated(b *testing.B) {
var pub PublicKey
for n := 0; n < b.N; n++ {
GeneratePrivateKey(&prv1, rng)
GeneratePublicKey(&pub, &prv1, rng)
Validate(&pub, rng)
}
}
// Generate some keys and benchmark derive
func BenchmarkDerive(b *testing.B) {
var ss [64]byte
var pub1 PublicKey
var pub2 PublicKey
GeneratePrivateKey(&prv1, rng)
GeneratePublicKey(&pub1, &prv1, rng)
GeneratePrivateKey(&prv2, rng)
GeneratePublicKey(&pub2, &prv2, rng)
for n := 0; n < b.N; n++ {
DeriveSecret(ss[:], &pub2, &prv1, rng)
}
}
// Benchmarks both - key generation and derivation
func BenchmarkDeriveGenerated(b *testing.B) {
var ss [64]byte
var pub1 PublicKey
var pub2 PublicKey
for n := 0; n < b.N; n++ {
GeneratePrivateKey(&prv1, rng)
GeneratePublicKey(&pub1, &prv1, rng)
GeneratePrivateKey(&prv2, rng)
GeneratePublicKey(&pub2, &prv2, rng)
DeriveSecret(ss[:], &pub2, &prv1, rng)
}
}

191
dh/csidh/curve.go Normal file
View File

@ -0,0 +1,191 @@
package csidh
// Implements differential arithmetic in P^1 for montgomery
// curves a mapping: x(P),x(Q),x(P-Q) -> x(P+Q)
// PaQ = P + Q
// This algorithms is correctly defined only for cases when
// P!=inf, Q!=inf, P!=Q and P!=-Q
func xAdd(PaQ, P, Q, PdQ *point) {
var t0, t1, t2, t3 fp
addRdc(&t0, &P.x, &P.z)
subRdc(&t1, &P.x, &P.z)
addRdc(&t2, &Q.x, &Q.z)
subRdc(&t3, &Q.x, &Q.z)
mulRdc(&t0, &t0, &t3)
mulRdc(&t1, &t1, &t2)
addRdc(&t2, &t0, &t1)
subRdc(&t3, &t0, &t1)
mulRdc(&t2, &t2, &t2) // sqr
mulRdc(&t3, &t3, &t3) // sqr
mulRdc(&PaQ.x, &PdQ.z, &t2)
mulRdc(&PaQ.z, &PdQ.x, &t3)
}
// Q = 2*P on a montgomery curve E(x): x^3 + A*x^2 + x
// It is correctly defined for all P != inf
func xDbl(Q, P, A *point) {
var t0, t1, t2 fp
addRdc(&t0, &P.x, &P.z)
mulRdc(&t0, &t0, &t0) // sqr
subRdc(&t1, &P.x, &P.z)
mulRdc(&t1, &t1, &t1) // sqr
subRdc(&t2, &t0, &t1)
mulRdc(&t1, &four, &t1)
mulRdc(&t1, &t1, &A.z)
mulRdc(&Q.x, &t0, &t1)
addRdc(&t0, &A.z, &A.z)
addRdc(&t0, &t0, &A.x)
mulRdc(&t0, &t0, &t2)
addRdc(&t0, &t0, &t1)
mulRdc(&Q.z, &t0, &t2)
}
// PaP = 2*P; PaQ = P+Q
// PaP can override P and PaQ can override Q
func xDblAdd(PaP, PaQ, P, Q, PdQ *point, A24 *coeff) {
var t0, t1, t2 fp
addRdc(&t0, &P.x, &P.z)
subRdc(&t1, &P.x, &P.z)
mulRdc(&PaP.x, &t0, &t0)
subRdc(&t2, &Q.x, &Q.z)
addRdc(&PaQ.x, &Q.x, &Q.z)
mulRdc(&t0, &t0, &t2)
mulRdc(&PaP.z, &t1, &t1)
mulRdc(&t1, &t1, &PaQ.x)
subRdc(&t2, &PaP.x, &PaP.z)
mulRdc(&PaP.z, &PaP.z, &A24.c)
mulRdc(&PaP.x, &PaP.x, &PaP.z)
mulRdc(&PaQ.x, &A24.a, &t2)
subRdc(&PaQ.z, &t0, &t1)
addRdc(&PaP.z, &PaP.z, &PaQ.x)
addRdc(&PaQ.x, &t0, &t1)
mulRdc(&PaP.z, &PaP.z, &t2)
mulRdc(&PaQ.z, &PaQ.z, &PaQ.z)
mulRdc(&PaQ.x, &PaQ.x, &PaQ.x)
mulRdc(&PaQ.z, &PaQ.z, &PdQ.x)
mulRdc(&PaQ.x, &PaQ.x, &PdQ.z)
}
// Swap P1 with P2 in constant time. The 'choice'
// parameter must have a value of either 1 (results
// in swap) or 0 (results in no-swap).
func cswappoint(P1, P2 *point, choice uint8) {
cswap512(&P1.x, &P2.x, choice)
cswap512(&P1.z, &P2.z, choice)
}
// A uniform Montgomery ladder. co is A coefficient of
// x^3 + A*x^2 + x curve. k MUST be > 0
//
// kP = [k]P. xM=x(0 + k*P)
//
// non-constant time.
func xMul512(kP, P *point, co *coeff, k *fp) {
var A24 coeff
var Q point
var j uint
var A point = point{x: co.a, z: co.c}
var R point = *P
// Precompyte A24 = (A+2C:4C) => (A24.x = A.x+2A.z; A24.z = 4*A.z)
addRdc(&A24.a, &co.c, &co.c)
addRdc(&A24.a, &A24.a, &co.a)
mulRdc(&A24.c, &co.c, &four)
// Skip initial 0 bits.
for j = 511; j > 0; j-- {
// performance hit from making it constant-time is actually
// quite big, so... unsafe branch for now
if uint8(k[j>>6]>>(j&63)&1) != 0 {
break
}
}
xDbl(&Q, P, &A)
prevBit := uint8(1)
for i := j; i > 0; {
i--
bit := uint8(k[i>>6] >> (i & 63) & 1)
swap := prevBit ^ bit
prevBit = bit
cswappoint(&Q, &R, swap)
xDblAdd(&Q, &R, &Q, &R, P, &A24)
}
cswappoint(&Q, &R, uint8(k[0]&1))
*kP = Q
}
func isom(img *point, co *coeff, kern *point, order uint64) {
var t0, t1, t2, S, D fp
var Q, prod point
var coEd coeff
var M [3]point = [3]point{*kern}
// Compute twisted Edwards coefficients
// coEd.a = co.a + 2*co.c
// coEd.c = co.a - 2*co.c
// coEd.a*X^2 + Y^2 = 1 + coEd.c*X^2*Y^2
addRdc(&coEd.c, &co.c, &co.c)
addRdc(&coEd.a, &co.a, &coEd.c)
subRdc(&coEd.c, &co.a, &coEd.c)
// Transfer point to twisted Edwards YZ-coordinates
// (X:Z)->(Y:Z) = (X-Z : X+Z)
addRdc(&S, &img.x, &img.z)
subRdc(&D, &img.x, &img.z)
subRdc(&prod.x, &kern.x, &kern.z)
addRdc(&prod.z, &kern.x, &kern.z)
mulRdc(&t1, &prod.x, &S)
mulRdc(&t0, &prod.z, &D)
addRdc(&Q.x, &t0, &t1)
subRdc(&Q.z, &t0, &t1)
xDbl(&M[1], kern, &point{x: co.a, z: co.c})
// TODO: Not constant time.
for i := uint64(1); i < uint64(order/2); i++ {
if i >= 2 {
xAdd(&M[i%3], &M[(i-1)%3], kern, &M[(i-2)%3])
}
subRdc(&t1, &M[i%3].x, &M[i%3].z)
addRdc(&t0, &M[i%3].x, &M[i%3].z)
mulRdc(&prod.x, &prod.x, &t1)
mulRdc(&prod.z, &prod.z, &t0)
mulRdc(&t1, &t1, &S)
mulRdc(&t0, &t0, &D)
addRdc(&t2, &t0, &t1)
mulRdc(&Q.x, &Q.x, &t2)
subRdc(&t2, &t0, &t1)
mulRdc(&Q.z, &Q.z, &t2)
}
mulRdc(&Q.x, &Q.x, &Q.x)
mulRdc(&Q.z, &Q.z, &Q.z)
mulRdc(&img.x, &img.x, &Q.x)
mulRdc(&img.z, &img.z, &Q.z)
// coEd.a^order and coEd.c^order
modExpRdc64(&coEd.a, &coEd.a, order)
modExpRdc64(&coEd.c, &coEd.c, order)
// prod^8
mulRdc(&prod.x, &prod.x, &prod.x)
mulRdc(&prod.x, &prod.x, &prod.x)
mulRdc(&prod.x, &prod.x, &prod.x)
mulRdc(&prod.z, &prod.z, &prod.z)
mulRdc(&prod.z, &prod.z, &prod.z)
mulRdc(&prod.z, &prod.z, &prod.z)
// Compute image curve params
mulRdc(&coEd.c, &coEd.c, &prod.x)
mulRdc(&coEd.a, &coEd.a, &prod.z)
// Convert curve coefficients back to Montgomery
addRdc(&co.a, &coEd.a, &coEd.c)
subRdc(&co.c, &coEd.a, &coEd.c)
addRdc(&co.a, &co.a, &co.a)
}

371
dh/csidh/curve_test.go Normal file
View File

@ -0,0 +1,371 @@
package csidh
import (
"math/big"
"testing"
)
// Actual test implementation
func TestXAdd(t *testing.T) {
var P, Q, PdQ point
var PaQ point
var expPaQ big.Int
// points from a Elliptic Curve defined in sage as follows:
// A = 0x6055947AAFEBF773CE912680A6A32656073233D2FD6FDF4A143BE82D25B44ECC0431DE564C0F0D6591ACC62D6876E86F5D06B68C9EAF20D0DB0A6B99ED558512
// E = EllipticCurve(GF(p), [0, A, 0, 1, 0])
// where p is CSIDH's 511-bit prime
checkXAdd := func() {
xAdd(&PaQ, &P, &Q, &PdQ)
ret := toNormX(&PaQ)
if ret.Cmp(&expPaQ) != 0 {
t.Errorf("\nExp: %s\nGot: %s", expPaQ.Text(16), ret.Text(16))
}
}
expPaQ.SetString("0x41C98C5D7FF118B1A3987733581FD69C0CC27D7B63BCCA525106B9945869C6DAEDAA3D5D9D2679237EF0D013BE68EF12731DBFB26E12576BAD1E824C67ABD125", 0)
P.x = toFp("0x5840FD8E0165F7F474260F99337461AF195233F791FABE735EC2634B74A95559568B4CEB23959C8A01C5C57E215D22639868ED840D74FE2BAC04830CF75047AD")
P.z = toFp("1")
Q.x = toFp("0x3C1A003C71436698B4A181CEB12BA4B4D1FF7BB14AAAF6FBDA6957C4EBA20AD8E3893DF6F64E67E81163E024C19C7E975F3EC61862F75502C3ED802370E75A3F")
Q.z = toFp("1")
PdQ.x = toFp("0x519B1928F752B0B2143C1C23EB247B370DBB5B9C29B9A3A064D7FBC1B67FAC34B6D3DDA0F3CB87C387B425B36F31B93A8E73252BA701927B767A9DE89D5A92AE")
PdQ.z = toFp("1")
checkXAdd()
expPaQ.SetString("0x5840FD8E0165F7F474260F99337461AF195233F791FABE735EC2634B74A95559568B4CEB23959C8A01C5C57E215D22639868ED840D74FE2BAC04830CF75047AD", 0)
P.x = toFp("0x5840FD8E0165F7F474260F99337461AF195233F791FABE735EC2634B74A95559568B4CEB23959C8A01C5C57E215D22639868ED840D74FE2BAC04830CF75047AD")
P.z = toFp("1")
Q.x = toFp("1")
Q.z = toFp("0x0")
PdQ.x = toFp(expPaQ.Text(10))
PdQ.z = toFp("1")
checkXAdd()
}
func TestXDbl(t *testing.T) {
var P, A point
var PaP point
var expPaP big.Int
// points from a Elliptic Curve defined in sage as follows:
// A = 0x599841D7D1FCD92A85759B7A3D2D5E4C56EFB17F19F86EB70E121EA16305EDE45A55868BE069313F821F7D94069EC220A4AC3B85500376710538246E9B3BC138
// E = EllipticCurve(GF(p), [0, A, 0, 1, 0])
// where p is CSIDH's 511-bit prime
expPaP.SetString("0x6115B5D8BB613D11BDFEA70D436D87C1515553F6A15061727B4001E0AF745AAA9F39EB9464982829D931F77DAB9D71B24FF0D1D34C347F2A51FD45821F2EA06F", 0)
P.x = toFp("0x6C5B4D4AB0765AAB23C10F8455BE522D3A5363324D7AD641CC67C0A52FC1FFE9F3F8EDFE641478CA93D4D0016D83F21487FD4AF4E02F8A2C237CF27C5604BCC")
P.z = toFp("1")
A.x = toFp("0x599841D7D1FCD92A85759B7A3D2D5E4C56EFB17F19F86EB70E121EA16305EDE45A55868BE069313F821F7D94069EC220A4AC3B85500376710538246E9B3BC138")
A.z = toFp("1")
xDbl(&PaP, &P, &A)
ret := toNormX(&PaP)
if ret.Cmp(&expPaP) != 0 {
t.Errorf("\nExp: %s\nGot: %s", expPaP.Text(16), ret.Text(16))
}
}
func TestXDblAdd_Nominal(t *testing.T) {
var P, Q, PdQ point
var PaP, PaQ point
var expPaP, expPaQ big.Int
var A coeff
checkXDblAdd := func() {
var A24 coeff
// A24.a = 2*A.z + A.a
addRdc(&A24.a, &A.c, &A.c)
addRdc(&A24.a, &A24.a, &A.a)
// A24.z = 4*A.z
mulRdc(&A24.c, &A.c, &four)
// Additionally will check if input can be same as output
PaP = P
PaQ = Q
xDblAdd(&PaP, &PaQ, &PaP, &PaQ, &PdQ, &A24)
retPaP := toNormX(&PaP)
retPaQ := toNormX(&PaQ)
if retPaP.Cmp(&expPaP) != 0 {
t.Errorf("\nExp: %s\nGot: %s", expPaP.Text(16), retPaP.Text(16))
}
if retPaQ.Cmp(&expPaQ) != 0 {
t.Errorf("\nExp: %s\nGot: %s", expPaQ.Text(16), retPaQ.Text(16))
}
}
// 2*P
expPaP.SetString("0x38F5B37271A3D8FA50107F88045D6F6B08355DD026C02E0306CE5875F47422736AD841B4122B2BD7DE6166BB6498F6A283378FF8250948E834F15CEA2D59A57B", 0)
// P+Q
expPaQ.SetString("0x53D9B44C5F61651612243CF7987F619FE6ACB5CF29538F96A63E7278E131F41A17D64388E31B028A5183EF9096AE82724BC34D8DDFD67AD68BD552A33C345B8C", 0)
P.x = toFp("0x4FE17B4CC66E85960F57033CD45996C99248DA09DF2E36F8840657B52F74ED8173E0D322FA57D7B4D0EE7F12967BBD59140B42F2626E29167D6419E851E5A4C9")
P.z = toFp("1")
Q.x = toFp("0x465047949CD6574FDBE00EA365CAF7A95DC9DEBE96A188823CA8C9DD9F527CF81290D49864F61DF0C08C1D6052139230735CA6CFDBDC1A8820610CCD71861176")
Q.z = toFp("1")
PdQ.x = toFp("0x49D3B999A0A020B34473568A8F75B5405F2D3BE5A006595015FC6DDC6BED8AB2A51A887B6DC62C64354466865FFD69E50AD37F6F4FBD74119EB65EBC9367B556")
PdQ.z = toFp("1")
A.a = toFp("0x118F955D498D902FD42E5B2926F297CC814CD7649EC5B070295622F97C4A0D9BD34058A7E0E00CB73ED32FCC237F9F6B7D2A15F5CC7C4EC61ECEF80ACBB0EFA4")
A.c = toFp("1")
checkXDblAdd()
// Case P=value, Q=(x=1, z=0). In this case PaQ==P; PaP=2*P
expPaP.SetString("0x38F5B37271A3D8FA50107F88045D6F6B08355DD026C02E0306CE5875F47422736AD841B4122B2BD7DE6166BB6498F6A283378FF8250948E834F15CEA2D59A57B", 0)
expPaQ.SetString("0x4FE17B4CC66E85960F57033CD45996C99248DA09DF2E36F8840657B52F74ED8173E0D322FA57D7B4D0EE7F12967BBD59140B42F2626E29167D6419E851E5A4C9", 0)
P.x = toFp("0x4FE17B4CC66E85960F57033CD45996C99248DA09DF2E36F8840657B52F74ED8173E0D322FA57D7B4D0EE7F12967BBD59140B42F2626E29167D6419E851E5A4C9")
P.z = toFp("1")
Q.x = toFp("1")
Q.z = toFp("0")
PdQ.x = toFp("0x4FE17B4CC66E85960F57033CD45996C99248DA09DF2E36F8840657B52F74ED8173E0D322FA57D7B4D0EE7F12967BBD59140B42F2626E29167D6419E851E5A4C9")
PdQ.z = toFp("1")
A.a = toFp("0x118F955D498D902FD42E5B2926F297CC814CD7649EC5B070295622F97C4A0D9BD34058A7E0E00CB73ED32FCC237F9F6B7D2A15F5CC7C4EC61ECEF80ACBB0EFA4")
A.c = toFp("1")
checkXDblAdd()
}
func TestXDblAdd_vs_xDbl_xAdd(t *testing.T) {
var P, Q, PdQ point
var PaP1, PaQ1 point
var PaP2, PaQ2 point
var A point
var A24 coeff
P.x = toFp("0x4FE17B4CC66E85960F57033CD45996C99248DA09DF2E36F8840657B52F74ED8173E0D322FA57D7B4D0EE7F12967BBD59140B42F2626E29167D6419E851E5A4C9")
P.z = toFp("1")
Q.x = toFp("0x465047949CD6574FDBE00EA365CAF7A95DC9DEBE96A188823CA8C9DD9F527CF81290D49864F61DF0C08C1D6052139230735CA6CFDBDC1A8820610CCD71861176")
Q.z = toFp("1")
PdQ.x = toFp("0x49D3B999A0A020B34473568A8F75B5405F2D3BE5A006595015FC6DDC6BED8AB2A51A887B6DC62C64354466865FFD69E50AD37F6F4FBD74119EB65EBC9367B556")
PdQ.z = toFp("1")
A.x = toFp("0x118F955D498D902FD42E5B2926F297CC814CD7649EC5B070295622F97C4A0D9BD34058A7E0E00CB73ED32FCC237F9F6B7D2A15F5CC7C4EC61ECEF80ACBB0EFA4")
A.z = toFp("1")
// Precompute A24 for xDblAdd
// (A+2C:4C) => (A24.x = A.x+2A.z; A24.z = 4*A.z)
addRdc(&A24.a, &A.z, &A.z)
addRdc(&A24.a, &A24.a, &A.x)
mulRdc(&A24.c, &A.z, &four)
for i := 0; i < kNumIter; i++ {
xAdd(&PaQ2, &P, &Q, &PdQ)
xDbl(&PaP2, &P, &A)
xDblAdd(&PaP1, &PaQ1, &P, &Q, &PdQ, &A24)
if !ceqpoint(&PaQ1, &PaQ2) {
exp := toNormX(&PaQ1)
got := toNormX(&PaQ2)
t.Errorf("\nExp: \n\t%s\nGot from xAdd: \n\t%s", exp.Text(16), got.Text(16))
}
if !ceqpoint(&PaP1, &PaP2) {
exp := toNormX(&PaP1)
got := toNormX(&PaP2)
t.Errorf("\nExp: \n\t%s\nGot from xDbl: \n\t%s", exp.Text(16), got.Text(16))
}
// Swap values for next operation
PdQ = Q
Q = P
P = PaP1
}
}
func TestXMul(t *testing.T) {
var P point
var co coeff
var expKP big.Int
var k fp
checkXMul := func() {
var kP point
xMul512(&kP, &P, &co, &k)
retKP := toNormX(&kP)
if expKP.Cmp(&retKP) != 0 {
t.Errorf("\nExp: %s\nGot: %s", expKP.Text(16), retKP.Text(16))
}
// Check if first and second argument can overlap
xMul512(&P, &P, &co, &k)
retKP = toNormX(&P)
if expKP.Cmp(&retKP) != 0 {
t.Errorf("\nExp: %s\nGot: %s", expKP.Text(16), retKP.Text(16))
}
}
// Case C=1
expKP.SetString("0x582B866603E6FBEBD21FE660FB34EF9466FDEC55FFBCE1073134CC557071147821BBAD225E30F7B2B6790B00ED9C39A29AA043F58AF995E440AFB13DA8E6D788", 0)
P.x = toFp("0x1C5CA539C1D5B52DE4750C390C24C05251E8B1D33E48971FA86F5ADDED2D06C8CD31E94887541468BB2925EBD693C9DDFF5BD9508430F25FE28EE30C0760C0FE")
P.z = toFp("1")
co.a = toFp("0x538F785D52996919C8D5C73D842A0249669B5B6BB05338B74EAE8094AE5009A3BA2D73730F527D7403E8184D9B1FA11C0C4C40E7B328A84874A6DBCE99E1DF92")
co.c = toFp("1")
k = fp{0x7A36C930A83EFBD5, 0xD0E80041ED0DDF9F, 0x5AA17134F1B8F877, 0x975711EC94168E51, 0xB3CAD962BED4BAC5, 0x3026DFDD7E4F5687, 0xE67F91AB8EC9C3AF, 0x34671D3FD8C317E7}
checkXMul()
// Check if algorithms works correctly with k=1
expKP.SetString("0x1C5CA539C1D5B52DE4750C390C24C05251E8B1D33E48971FA86F5ADDED2D06C8CD31E94887541468BB2925EBD693C9DDFF5BD9508430F25FE28EE30C0760C0FE", 0)
P.x = toFp("0x1C5CA539C1D5B52DE4750C390C24C05251E8B1D33E48971FA86F5ADDED2D06C8CD31E94887541468BB2925EBD693C9DDFF5BD9508430F25FE28EE30C0760C0FE")
P.z = toFp("1")
co.a = toFp("0x538F785D52996919C8D5C73D842A0249669B5B6BB05338B74EAE8094AE5009A3BA2D73730F527D7403E8184D9B1FA11C0C4C40E7B328A84874A6DBCE99E1DF92")
co.c = toFp("1")
k = fp{1, 0, 0, 0, 0, 0, 0, 0}
checkXMul()
// Check if algorithms works correctly with value of k for which few small and high
// order bits are 0 (test for odd number of cswaps in xMul)
expKP.SetString("0x1925EDA0928C10F427B4E642E7E1481A670D1249956DED6A2292B9BAB841F6AA86A9F41459400845ED4A5E2531A14165F64FE4E43DBD85321B429C6DAE2E8987", 0)
P.x = toFp("0x4CE8603817B9BB06515E921AA201D26B31F3CE181D1E18CD5CD704708CCAD47546CEEAB42B98EE67925A5259E0684A0489F574A999DE127F708B849ACAA12A63")
P.z = toFp("1")
co.a = toFp("0x538F785D52996919C8D5C73D842A0249669B5B6BB05338B74EAE8094AE5009A3BA2D73730F527D7403E8184D9B1FA11C0C4C40E7B328A84874A6DBCE99E1DF92")
co.c = toFp("1")
k = fp{0, 7, 0, 0, 0, 0, 0, 0}
checkXMul()
// Check if algorithms works correctly with value of k for which few small and high
// order bits are 0 (test for even number of cswaps in xMul)
expKP.SetString("0x30C02915C5967C3B6EB2196A934ADF38A183E9C7E814B54121F93048A8FC12D5036992FABF8D807581017A4C1F93D07352413F38F6A902FC76A8894FE8D94805", 0)
P.x = toFp("0x2DDD15ED7C169BE6D9EC02CFE3DC507EC4A7A4D96DE3FAAB9BFCEA1B047807EA301E89830F2FDD0E7E642A85E7ACDE16BAD76DF140F719C4A7AB85153E7D69DC")
P.z = toFp("1")
co.a = toFp("0x538F785D52996919C8D5C73D842A0249669B5B6BB05338B74EAE8094AE5009A3BA2D73730F527D7403E8184D9B1FA11C0C4C40E7B328A84874A6DBCE99E1DF92")
co.c = toFp("1")
k = fp{0, 15, 0, 0, 0, 0, 0, 0}
checkXMul()
// xMul512 does NOT work correctly for k==0. In such case function will return 2*P. But
// thanks to that fact we don't need to handle k==0 case, we get some speedup.
expKP.SetString("0x6115B5D8BB613D11BDFEA70D436D87C1515553F6A15061727B4001E0AF745AAA9F39EB9464982829D931F77DAB9D71B24FF0D1D34C347F2A51FD45821F2EA06F", 0)
P.x = toFp("0x6C5B4D4AB0765AAB23C10F8455BE522D3A5363324D7AD641CC67C0A52FC1FFE9F3F8EDFE641478CA93D4D0016D83F21487FD4AF4E02F8A2C237CF27C5604BCC")
P.z = toFp("1")
co.a = toFp("0x599841D7D1FCD92A85759B7A3D2D5E4C56EFB17F19F86EB70E121EA16305EDE45A55868BE069313F821F7D94069EC220A4AC3B85500376710538246E9B3BC138")
co.c = toFp("1")
k = fp{0, 0, 0, 0, 0, 0, 0, 0}
checkXMul()
}
func TestMappointHardcoded3(t *testing.T) {
var P point = point{
x: fp{0xca1a2fdec38c669b, 0xf2fe3678ebeb978b, 0xfda3e9a6f0c719d, 0x6f7bffa41772570b, 0x3d90cdd6283dc150, 0x21b55b738eb1ded9, 0x209515d0a9f41dd6, 0x5275cf397d154a12},
z: fp{0x1fff8309761576e, 0xef239cbeda7c2ba1, 0x6136ae2d76e95873, 0x1f8f6ac909570cec, 0x780fdf0cc7d676d8, 0x548098fe92ed04e1, 0xb39da564701ef35d, 0x5fec19626df41306}}
var A coeff = coeff{
a: fp{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
c: fp{0xc8fc8df598726f0a, 0x7b1bc81750a6af95, 0x5d319e67c1e961b4, 0xb0aa7275301955f1, 0x4a080672d9ba6c64, 0x97a5ef8a246ee77b, 0x6ea9e5d4383676a, 0x3496e2e117e0ec80}}
var K point = point{
x: fp{0x597616608e291c6f, 0xd14230b008736798, 0xa63099b1ace67e6e, 0xe37c13afd768bcfa, 0xc6ef718894f08135, 0x53a4fd09091f3522, 0xc9a1f9f670645fe1, 0x628c4a8efd83e5f0},
z: fp{0x8f18a654312ac1ad, 0xbc20a9b2472785c9, 0xdaf97c29bbf9e492, 0xf91a8c799e2f6119, 0xc8dc675cc8e528e6, 0x9a7b2c2f0df95171, 0x85629cd38cdd9fdb, 0x656d5253d3fd1a6e}}
var k uint64 = 3
var expA coeff = coeff{
a: fp{0x6fa92a66e77cfc1, 0x9efbfb7118f1832c, 0x441894cc5d1d24ae, 0x5a2f0fafa26761de, 0x8095c36d3a20a78a, 0xb22be0023612a135, 0x5eb844d06ef0f430, 0x52e53309d1c90cf8},
c: fp{0x98173d5664a23e5c, 0xd8fe1c6306bbc11a, 0xa774fbc502648059, 0x766a0d839aa62c83, 0x4b074f9b93d1633d, 0xf306019dbf87f505, 0x77c720ca059234b0, 0x3d47ab65269c5908}}
var expP point = point{
x: fp{0x91aba9b39f280495, 0xfbd8ea69d2990aeb, 0xb03e1b8ed7fe3dba, 0x3d30a41499f08998, 0xb15a42630de9c606, 0xa7dd487fef16f5c8, 0x8673948afed8e968, 0x57ecc8710004cd4d},
z: fp{0xce8819869a942526, 0xb98ca2ff79ef8969, 0xd49c9703743a1812, 0x21dbb090f9152e03, 0xbabdcac831b1adea, 0x8cee90762baa2ddd, 0xa0dd2ddcef809d96, 0x1de2a8887a32f19b}}
isom(&P, &A, &K, k)
if !ceqFp(&P.x, &expP.x) || !ceqFp(&P.z, &expP.z) {
normP := toNormX(&P)
normPExp := toNormX(&expP)
t.Errorf("P != expP [\n %s != %s\n]", normP.Text(16), normPExp.Text(16))
}
if !ceqFp(&A.a, &expA.a) || !ceqFp(&A.c, &expA.c) {
t.Errorf("A != expA %X %X", A.a[0], expA.a[0])
}
}
func TestMappointHardcoded5(t *testing.T) {
var P point = point{
x: fp{0xca1a2fdec38c669b, 0xf2fe3678ebeb978b, 0xfda3e9a6f0c719d, 0x6f7bffa41772570b, 0x3d90cdd6283dc150, 0x21b55b738eb1ded9, 0x209515d0a9f41dd6, 0x5275cf397d154a12},
z: fp{0x1fff8309761576e, 0xef239cbeda7c2ba1, 0x6136ae2d76e95873, 0x1f8f6ac909570cec, 0x780fdf0cc7d676d8, 0x548098fe92ed04e1, 0xb39da564701ef35d, 0x5fec19626df41306}}
var A coeff = coeff{
a: fp{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
c: fp{0xc8fc8df598726f0a, 0x7b1bc81750a6af95, 0x5d319e67c1e961b4, 0xb0aa7275301955f1, 0x4a080672d9ba6c64, 0x97a5ef8a246ee77b, 0x6ea9e5d4383676a, 0x3496e2e117e0ec80}}
var K point = point{
x: fp{0x597616608e291c6f, 0xd14230b008736798, 0xa63099b1ace67e6e, 0xe37c13afd768bcfa, 0xc6ef718894f08135, 0x53a4fd09091f3522, 0xc9a1f9f670645fe1, 0x628c4a8efd83e5f0},
z: fp{0x8f18a654312ac1ad, 0xbc20a9b2472785c9, 0xdaf97c29bbf9e492, 0xf91a8c799e2f6119, 0xc8dc675cc8e528e6, 0x9a7b2c2f0df95171, 0x85629cd38cdd9fdb, 0x656d5253d3fd1a6e}}
var k uint64 = 5
var expA coeff = coeff{
a: fp{0x32076f58298ed474, 0x5094a1fc8696d307, 0x82e510594157944a, 0xb60ce760f88c83a9, 0xae8a28c325186983, 0xe31d2446a4ad2f18, 0xb266c612b5f141c1, 0x64283e618db5a705},
c: fp{0x4472b49b65272190, 0x2bd5919309778f56, 0x6132753691fe016c, 0x8f654849c09e6d34, 0xfa208dd9aea1ef12, 0xf7df0dd10071411a, 0x75afb7860500922c, 0x52fb7d34b129fb65}}
var expP point = point{
x: fp{0x3b75fc94b2a6df2d, 0x96d53dc9b0e867a0, 0x22e87202421d274e, 0x30a361440697ee1a, 0x8b52ee078bdbddcd, 0x64425d500e6b934d, 0xf47d1f568f6df391, 0x5d9d3607431395ab},
z: fp{0x746e02dafa040976, 0xcd408f2cddbf3a8e, 0xf643354e0e13a93f, 0x7c39ed96ce9a5e29, 0xfcdf26f1a1a550ca, 0x2fc8aafc4ca0a559, 0x5d204a2b14cf19ba, 0xbd2c3406762f05d}}
isom(&P, &A, &K, k)
if !ceqFp(&P.x, &expP.x) || !ceqFp(&P.z, &expP.z) {
normP := toNormX(&P)
normPExp := toNormX(&expP)
t.Errorf("P != expP [\n %s != %s\n]", normP.Text(16), normPExp.Text(16))
}
if !ceqFp(&A.a, &expA.a) || !ceqFp(&A.c, &expA.c) {
t.Errorf("A != expA %X %X", A.a[0], expA.a[0])
}
}
func BenchmarkXMul(b *testing.B) {
var kP, P point
var co coeff
var expKP big.Int
var k fp
// Case C=1
expKP.SetString("0x582B866603E6FBEBD21FE660FB34EF9466FDEC55FFBCE1073134CC557071147821BBAD225E30F7B2B6790B00ED9C39A29AA043F58AF995E440AFB13DA8E6D788", 0)
P.x = toFp("0x1C5CA539C1D5B52DE4750C390C24C05251E8B1D33E48971FA86F5ADDED2D06C8CD31E94887541468BB2925EBD693C9DDFF5BD9508430F25FE28EE30C0760C0FE")
P.z = toFp("1")
co.a = toFp("0x538F785D52996919C8D5C73D842A0249669B5B6BB05338B74EAE8094AE5009A3BA2D73730F527D7403E8184D9B1FA11C0C4C40E7B328A84874A6DBCE99E1DF92")
co.c = toFp("1")
k = fp{0x7A36C930A83EFBD5, 0xD0E80041ED0DDF9F, 0x5AA17134F1B8F877, 0x975711EC94168E51, 0xB3CAD962BED4BAC5, 0x3026DFDD7E4F5687, 0xE67F91AB8EC9C3AF, 0x34671D3FD8C317E7}
for n := 0; n < b.N; n++ {
xMul512(&kP, &P, &co, &k)
}
}
func BenchmarkXAdd(b *testing.B) {
var P, Q, PdQ point
var PaQ point
P.x = toFp("0x5840FD8E0165F7F474260F99337461AF195233F791FABE735EC2634B74A95559568B4CEB23959C8A01C5C57E215D22639868ED840D74FE2BAC04830CF75047AD")
P.z = toFp("1")
Q.x = toFp("0x3C1A003C71436698B4A181CEB12BA4B4D1FF7BB14AAAF6FBDA6957C4EBA20AD8E3893DF6F64E67E81163E024C19C7E975F3EC61862F75502C3ED802370E75A3F")
Q.z = toFp("1")
PdQ.x = toFp("0x519B1928F752B0B2143C1C23EB247B370DBB5B9C29B9A3A064D7FBC1B67FAC34B6D3DDA0F3CB87C387B425B36F31B93A8E73252BA701927B767A9DE89D5A92AE")
PdQ.z = toFp("1")
for n := 0; n < b.N; n++ {
xAdd(&PaQ, &P, &Q, &PdQ)
}
}
func BenchmarkXDbl(b *testing.B) {
var P, A point
var PaP point
P.x = toFp("0x6C5B4D4AB0765AAB23C10F8455BE522D3A5363324D7AD641CC67C0A52FC1FFE9F3F8EDFE641478CA93D4D0016D83F21487FD4AF4E02F8A2C237CF27C5604BCC")
P.z = toFp("1")
A.x = toFp("0x599841D7D1FCD92A85759B7A3D2D5E4C56EFB17F19F86EB70E121EA16305EDE45A55868BE069313F821F7D94069EC220A4AC3B85500376710538246E9B3BC138")
A.z = toFp("1")
for n := 0; n < b.N; n++ {
xDbl(&PaP, &P, &A)
}
}
func BenchmarkIsom(b *testing.B) {
var P, kern point
var expPhiP big.Int
var co coeff
var k = uint64(2)
expPhiP.SetString("0x5FEBD68F795F9AEB732ECF0D1507904922F2B0736704E0751EF242B4E191E6F630D83778B5E5681161FD071CDEF7DF4C3A41D0ECEB30E90B119C5BF86C5AB51A", 0)
P.x = toFp("0x5FD8D226C228FD6AA3CCDCAB931C5D3AA000A46B47041F59D9724E517594F696D38F2CB45C987ACF68BB1057D8D518F926D8F55171F337D05354E0022BC66B23")
P.z = toFp("1")
co.a = toFp("0x9E8DBC4914E3C4F080592642DD0B08B9564AB3ADF75EE9B58A685443BA6E39A1ACD1201B7F034077AF344123880AF9D8C77575E6E782E00186881ECE8B87CA3")
co.c = toFp("1")
kern.x = toFp("0x594F77A49EABBF2A12025BC00E1DBC119CDA674B9FE8A00791724B42FEB7D225C4C9940B01B09B8F00B30B0E961212FB63E42614814E38EC9E5E5B0FEBF98C58")
kern.z = toFp("1")
for n := 0; n < b.N; n++ {
isom(&P, &co, &kern, k)
}
}

319
dh/csidh/fp511.go Normal file
View File

@ -0,0 +1,319 @@
package csidh
import (
"math/bits"
"golang.org/x/sys/cpu"
)
// CPU Capabilities. Those flags are referred by assembly code. According to
// https://github.com/golang/go/issues/28230, variables referred from the
// assembly must be in the same package.
// We declare variables not constants, in order to facilitate testing.
var (
// Signals support for BMI2 (MULX)
hasBMI2 = cpu.X86.HasBMI2
// Signals support for ADX and BMI2
hasADXandBMI2 = cpu.X86.HasBMI2 && cpu.X86.HasADX
)
// Constant time select.
// if pick == 0xFF..FF (out = in1)
// if pick == 0 (out = in2)
// else out is undefined
func ctPick64(which uint64, in1, in2 uint64) uint64 {
return (in1 & which) | (in2 & ^which)
}
// ctIsNonZero64 returns 0 in case i == 0, otherwise it returns 1.
// Constant-time.
func ctIsNonZero64(i uint64) int {
// In case i==0 then i-1 will set MSB. Only in such case (i OR ~(i-1))
// will result in MSB being not set (logical implication: (i-1)=>i is
// false iff (i-1)==0 and i==non-zero). In every other case MSB is
// set and hence function returns 1.
return int((i | (^(i - 1))) >> 63)
}
func mulGeneric(r, x, y *fp) {
var s fp // keeps intermediate results
var t1, t2 [9]uint64
var c, q uint64
for i := 0; i < numWords-1; i++ {
q = ((x[i] * y[0]) + s[0]) * pNegInv[0]
mul576(&t1, &p, q)
mul576(&t2, y, x[i])
// x[i]*y + q_i*p
t1[0], c = bits.Add64(t1[0], t2[0], 0)
t1[1], c = bits.Add64(t1[1], t2[1], c)
t1[2], c = bits.Add64(t1[2], t2[2], c)
t1[3], c = bits.Add64(t1[3], t2[3], c)
t1[4], c = bits.Add64(t1[4], t2[4], c)
t1[5], c = bits.Add64(t1[5], t2[5], c)
t1[6], c = bits.Add64(t1[6], t2[6], c)
t1[7], c = bits.Add64(t1[7], t2[7], c)
t1[8], _ = bits.Add64(t1[8], t2[8], c)
// s = (s + x[i]*y + q_i * p) / R
_, c = bits.Add64(t1[0], s[0], 0)
s[0], c = bits.Add64(t1[1], s[1], c)
s[1], c = bits.Add64(t1[2], s[2], c)
s[2], c = bits.Add64(t1[3], s[3], c)
s[3], c = bits.Add64(t1[4], s[4], c)
s[4], c = bits.Add64(t1[5], s[5], c)
s[5], c = bits.Add64(t1[6], s[6], c)
s[6], c = bits.Add64(t1[7], s[7], c)
s[7], _ = bits.Add64(t1[8], 0, c)
}
// last iteration stores result in r
q = ((x[numWords-1] * y[0]) + s[0]) * pNegInv[0]
mul576(&t1, &p, q)
mul576(&t2, y, x[numWords-1])
t1[0], c = bits.Add64(t1[0], t2[0], c)
t1[1], c = bits.Add64(t1[1], t2[1], c)
t1[2], c = bits.Add64(t1[2], t2[2], c)
t1[3], c = bits.Add64(t1[3], t2[3], c)
t1[4], c = bits.Add64(t1[4], t2[4], c)
t1[5], c = bits.Add64(t1[5], t2[5], c)
t1[6], c = bits.Add64(t1[6], t2[6], c)
t1[7], c = bits.Add64(t1[7], t2[7], c)
t1[8], c = bits.Add64(t1[8], t2[8], c)
_, c = bits.Add64(t1[0], s[0], 0)
r[0], c = bits.Add64(t1[1], s[1], c)
r[1], c = bits.Add64(t1[2], s[2], c)
r[2], c = bits.Add64(t1[3], s[3], c)
r[3], c = bits.Add64(t1[4], s[4], c)
r[4], c = bits.Add64(t1[5], s[5], c)
r[5], c = bits.Add64(t1[6], s[6], c)
r[6], c = bits.Add64(t1[7], s[7], c)
r[7], _ = bits.Add64(t1[8], 0, c)
}
// Returns result of x<y operation.
func isLess(x, y *fp) bool {
for i := numWords - 1; i >= 0; i-- {
v, c := bits.Sub64(y[i], x[i], 0)
if c != 0 {
return false
}
if v != 0 {
return true
}
}
return false
}
// r = x + y mod P. Allows to use r==x==y
func addRdc(r, x, y *fp) {
var c uint64
var t fp
r[0], c = bits.Add64(x[0], y[0], 0)
r[1], c = bits.Add64(x[1], y[1], c)
r[2], c = bits.Add64(x[2], y[2], c)
r[3], c = bits.Add64(x[3], y[3], c)
r[4], c = bits.Add64(x[4], y[4], c)
r[5], c = bits.Add64(x[5], y[5], c)
r[6], c = bits.Add64(x[6], y[6], c)
r[7], c = bits.Add64(x[7], y[7], c)
t[0], c = bits.Sub64(r[0], p[0], 0)
t[1], c = bits.Sub64(r[1], p[1], c)
t[2], c = bits.Sub64(r[2], p[2], c)
t[3], c = bits.Sub64(r[3], p[3], c)
t[4], c = bits.Sub64(r[4], p[4], c)
t[5], c = bits.Sub64(r[5], p[5], c)
t[6], c = bits.Sub64(r[6], p[6], c)
t[7], c = bits.Sub64(r[7], p[7], c)
var w = uint64(0 - uint64(c))
r[0] = ctPick64(w, r[0], t[0])
r[1] = ctPick64(w, r[1], t[1])
r[2] = ctPick64(w, r[2], t[2])
r[3] = ctPick64(w, r[3], t[3])
r[4] = ctPick64(w, r[4], t[4])
r[5] = ctPick64(w, r[5], t[5])
r[6] = ctPick64(w, r[6], t[6])
r[7] = ctPick64(w, r[7], t[7])
}
func sub512(r, x, y *fp) uint64 {
var c uint64
r[0], c = bits.Sub64(x[0], y[0], 0)
r[1], c = bits.Sub64(x[1], y[1], c)
r[2], c = bits.Sub64(x[2], y[2], c)
r[3], c = bits.Sub64(x[3], y[3], c)
r[4], c = bits.Sub64(x[4], y[4], c)
r[5], c = bits.Sub64(x[5], y[5], c)
r[6], c = bits.Sub64(x[6], y[6], c)
r[7], c = bits.Sub64(x[7], y[7], c)
return c
}
func subRdc(r, x, y *fp) {
var c uint64
// Same as sub512(r,x,y). Unfortunatelly
// compiler is not able to inline it.
r[0], c = bits.Sub64(x[0], y[0], 0)
r[1], c = bits.Sub64(x[1], y[1], c)
r[2], c = bits.Sub64(x[2], y[2], c)
r[3], c = bits.Sub64(x[3], y[3], c)
r[4], c = bits.Sub64(x[4], y[4], c)
r[5], c = bits.Sub64(x[5], y[5], c)
r[6], c = bits.Sub64(x[6], y[6], c)
r[7], c = bits.Sub64(x[7], y[7], c)
// if x<y => r=x-y+p
var w = uint64(0 - uint64(c))
r[0], c = bits.Add64(r[0], ctPick64(w, p[0], 0), 0)
r[1], c = bits.Add64(r[1], ctPick64(w, p[1], 0), c)
r[2], c = bits.Add64(r[2], ctPick64(w, p[2], 0), c)
r[3], c = bits.Add64(r[3], ctPick64(w, p[3], 0), c)
r[4], c = bits.Add64(r[4], ctPick64(w, p[4], 0), c)
r[5], c = bits.Add64(r[5], ctPick64(w, p[5], 0), c)
r[6], c = bits.Add64(r[6], ctPick64(w, p[6], 0), c)
r[7], _ = bits.Add64(r[7], ctPick64(w, p[7], 0), c)
}
// OZAPTF: it must say that SI size has 2*64
func mulRdc(r, x, y *fp) {
var t fp
var c uint64
if hasADXandBMI2 {
mulBmiAsm(r, x, y)
} else {
mulGeneric(r, x, y)
}
// if p <= r < 2p then r = r-p
t[0], c = bits.Sub64(r[0], p[0], 0)
t[1], c = bits.Sub64(r[1], p[1], c)
t[2], c = bits.Sub64(r[2], p[2], c)
t[3], c = bits.Sub64(r[3], p[3], c)
t[4], c = bits.Sub64(r[4], p[4], c)
t[5], c = bits.Sub64(r[5], p[5], c)
t[6], c = bits.Sub64(r[6], p[6], c)
t[7], c = bits.Sub64(r[7], p[7], c)
var w = uint64(0 - uint64(c))
r[0] = ctPick64(w, r[0], t[0])
r[1] = ctPick64(w, r[1], t[1])
r[2] = ctPick64(w, r[2], t[2])
r[3] = ctPick64(w, r[3], t[3])
r[4] = ctPick64(w, r[4], t[4])
r[5] = ctPick64(w, r[5], t[5])
r[6] = ctPick64(w, r[6], t[6])
r[7] = ctPick64(w, r[7], t[7])
}
// Fixed-window mod exp for fpBitLen bit value with 4 bit window. Returned
// result is a number in montgomery domain.
// r = b ^ e (mod p).
// Constant time.
func modExpRdcCommon(r, b, e *fp, fpBitLen int) {
var precomp [16]fp
var t fp
var c uint64
// Precompute step, computes an array of small powers of 'b'. As this
// algorithm implements 4-bit window, we need 2^4=16 of such values.
// b^0 = 1, which is equal to R from REDC.
precomp[0] = one // b ^ 0
precomp[1] = *b // b ^ 1
for i := 2; i < 16; i = i + 2 {
// TODO: implement fast squering. Then interleaving fast squaring
// with multiplication should improve performance.
mulRdc(&precomp[i], &precomp[i/2], &precomp[i/2]) // sqr
mulRdc(&precomp[i+1], &precomp[i], b)
}
*r = one
for i := int(fpBitLen/4) - 1; i >= 0; i-- {
for j := 0; j < 4; j++ {
mulRdc(r, r, r)
}
// note: non resistant to cache SCA
idx := (e[i/16] >> uint((i%16)*4)) & 15
mulRdc(r, r, &precomp[idx])
}
// if p <= r < 2p then r = r-p
t[0], c = bits.Sub64(r[0], p[0], 0)
t[1], c = bits.Sub64(r[1], p[1], c)
t[2], c = bits.Sub64(r[2], p[2], c)
t[3], c = bits.Sub64(r[3], p[3], c)
t[4], c = bits.Sub64(r[4], p[4], c)
t[5], c = bits.Sub64(r[5], p[5], c)
t[6], c = bits.Sub64(r[6], p[6], c)
t[7], c = bits.Sub64(r[7], p[7], c)
var w = uint64(0 - uint64(c))
r[0] = ctPick64(w, r[0], t[0])
r[1] = ctPick64(w, r[1], t[1])
r[2] = ctPick64(w, r[2], t[2])
r[3] = ctPick64(w, r[3], t[3])
r[4] = ctPick64(w, r[4], t[4])
r[5] = ctPick64(w, r[5], t[5])
r[6] = ctPick64(w, r[6], t[6])
r[7] = ctPick64(w, r[7], t[7])
}
// modExpRdc does modular exponentation of 512-bit number.
// Constant-time.
func modExpRdc512(r, b, e *fp) {
modExpRdcCommon(r, b, e, 512)
}
// modExpRdc does modular exponentation of 64-bit number.
// Constant-time.
func modExpRdc64(r, b *fp, e uint64) {
modExpRdcCommon(r, b, &fp{e}, 64)
}
// isNonQuadRes checks whether value v is quadratic residue.
// Implementation uses Fermat's little theorem (or
// Euler's criterion)
// a^(p-1) == 1, hence
// (a^2) ((p-1)/2) == 1
// Which means v is a quadratic residue iff v^((p-1)/2) == 1.
// Caller provided v must be in montgomery domain.
// Returns 0 in case v is quadratic residue or 1 in case
// v is quadratic non-residue.
func (v *fp) isNonQuadRes() int {
var res fp
var b uint64
modExpRdc512(&res, v, &pMin1By2)
for i, _ := range res {
b |= res[i] ^ one[i]
}
return ctIsNonZero64(b)
}
// isZero returns false in case v is equal to 0, otherwise
// true. Constant time.
func (v *fp) isZero() bool {
var r uint64
for i := 0; i < numWords; i++ {
r |= v[i]
}
return ctIsNonZero64(r) == 0
}
// equal checks if v is equal to in. Constant time
func (v *fp) equal(in *fp) bool {
var r uint64
for i, _ := range v {
r |= v[i] ^ in[i]
}
return ctIsNonZero64(r) == 0
}

192
dh/csidh/fp511_amd64.s Normal file
View File

@ -0,0 +1,192 @@
// +build amd64,!noasm
#include "textflag.h"
// Multipies 512-bit value by 64-bit value. Uses MULQ instruction to
// multiply 2 64-bit values.
//
// Result: x = (y * z) mod 2^512
//
// Registers used: AX, CX, DX, SI, DI, R8
//
// func mul512(a, b *Fp, c uint64)
TEXT ·mul512(SB), NOSPLIT, $0-24
MOVQ x+ 0(FP), DI // result
MOVQ y+ 8(FP), SI // multiplicand
// Check wether to use optimized implementation
CMPB ·hasBMI2(SB), $1
JE mul512_mulx
MOVQ z+16(FP), R10 // 64 bit multiplier, used by MULQ
MOVQ R10, AX; MULQ 0(SI); MOVQ DX, R11; MOVQ AX, 0(DI) //x[0]
MOVQ R10, AX; MULQ 8(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 8(DI) //x[1]
MOVQ R10, AX; MULQ 16(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 16(DI) //x[2]
MOVQ R10, AX; MULQ 24(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 24(DI) //x[3]
MOVQ R10, AX; MULQ 32(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 32(DI) //x[4]
MOVQ R10, AX; MULQ 40(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 40(DI) //x[5]
MOVQ R10, AX; MULQ 48(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 48(DI) //x[6]
MOVQ R10, AX; MULQ 56(SI); ADDQ R11, AX; MOVQ AX, 56(DI) //x[7]
RET
// Optimized for CPUs with BMI2
mul512_mulx:
MOVQ z+16(FP), DX // 64 bit multiplier, used by MULX
MULXQ 0(SI), AX, R10; MOVQ AX, 0(DI) // x[0]
MULXQ 8(SI), AX, R11; ADDQ R10, AX; MOVQ AX, 8(DI) // x[1]
MULXQ 16(SI), AX, R10; ADCQ R11, AX; MOVQ AX, 16(DI) // x[2]
MULXQ 24(SI), AX, R11; ADCQ R10, AX; MOVQ AX, 24(DI) // x[3]
MULXQ 32(SI), AX, R10; ADCQ R11, AX; MOVQ AX, 32(DI) // x[4]
MULXQ 40(SI), AX, R11; ADCQ R10, AX; MOVQ AX, 40(DI) // x[5]
MULXQ 48(SI), AX, R10; ADCQ R11, AX; MOVQ AX, 48(DI) // x[6]
MULXQ 56(SI), AX, R11; ADCQ R10, AX; MOVQ AX, 56(DI) // x[7]
RET
// Multipies 512-bit value by 64-bit value and returns 576-bit result. Uses MULQ instruction to
// multiply 2 64-bit values. Returns 576-bit result.
//
// Result: x = (y * z)
//
// Registers used: AX, CX, DX, SI, DI, R8
//
// func mul576(a, b *Fp, c uint64)
TEXT ·mul576(SB), NOSPLIT, $0-24
MOVQ x+ 0(FP), DI // result
MOVQ y+ 8(FP), SI // multiplicand
MOVQ z+16(FP), R10 // 64 bit multiplier, used by MULQ
MOVQ R10, AX; MULQ 0(SI); MOVQ DX, R11; MOVQ AX, 0(DI) //x[0]
MOVQ R10, AX; MULQ 8(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 8(DI) //x[1]
MOVQ R10, AX; MULQ 16(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 16(DI) //x[2]
MOVQ R10, AX; MULQ 24(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 24(DI) //x[3]
MOVQ R10, AX; MULQ 32(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 32(DI) //x[4]
MOVQ R10, AX; MULQ 40(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 40(DI) //x[5]
MOVQ R10, AX; MULQ 48(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ DX, R11; MOVQ AX, 48(DI) //x[6]
MOVQ R10, AX; MULQ 56(SI); ADDQ R11, AX; ADCQ $0, DX; MOVQ AX, 56(DI) //x[7]
MOVQ DX, 64(DI) //x[8]
RET
TEXT ·cswap512(SB),NOSPLIT,$0-17
MOVQ x+0(FP), DI
MOVQ y+8(FP), SI
MOVBLZX choice+16(FP), AX // AL = 0 or 1
// Make AX, so that either all bits are set or non
// AX = 0 or 1
NEGQ AX
// Fill xmm15. After this step first half of XMM15 is
// just zeros and second half is whatever in AX
MOVQ AX, X15
// Copy lower double word everywhere else. So that
// XMM15=AL|AL|AL|AL. As AX has either all bits set
// or non result will be that XMM15 has also either
// all bits set or non of them.
PSHUFD $0, X15, X15
#ifndef CSWAP_BLOCK
#define CSWAP_BLOCK(idx) \
MOVOU (idx*16)(DI), X0 \
MOVOU (idx*16)(SI), X1 \
\ // X2 = mask & (X0 ^ X1)
MOVO X1, X2 \
PXOR X0, X2 \
PAND X15, X2 \
\
PXOR X2, X0 \
PXOR X2, X1 \
\
MOVOU X0, (idx*16)(DI) \
MOVOU X1, (idx*16)(SI)
#endif
CSWAP_BLOCK(0)
CSWAP_BLOCK(1)
CSWAP_BLOCK(2)
CSWAP_BLOCK(3)
RET
// mulAsm implements montgomery multiplication interleaved with
// montgomery reduction. It uses MULX and ADCX/ADOX instructions.
// Implementation specific to 511-bit prime 'p'
//
// z = x*y mod p
TEXT ·mulBmiAsm(SB),NOSPLIT,$32-24
MOVQ y+ 8(FP), DI // multiplicand
MOVQ z+16(FP), SI // multiplier
XORQ R8, R8
XORQ R9, R9
XORQ R10, R10
XORQ R11, R11
XORQ R12, R12
XORQ R13, R13
XORQ R14, R14
XORQ R15, R15
MOVQ BP, 24(SP) // OZAPTF: thats maybe wrong
XORQ BP, BP
// Uses BMI2 (MULX)
#ifdef MULS_MULX_512
#undef MULS_MULX_512
#endif
#define MULS_MULX_512(idx, r0, r1, r2, r3, r4, r5, r6, r7, r8) \
\ // Reduction step
MOVQ ( 0)(SI), DX \
MULXQ ( 8*idx)(DI), DX, CX \
ADDQ r0, DX \
MULXQ ·pNegInv(SB), DX, CX \
\
XORQ AX, AX \
MULXQ ·p+ 0(SB), AX, BX; ; ADOXQ AX, r0 \
MULXQ ·p+ 8(SB), AX, CX; ADCXQ BX, r1; ADOXQ AX, r1 \
MULXQ ·p+16(SB), AX, BX; ADCXQ CX, r2; ADOXQ AX, r2 \
MULXQ ·p+24(SB), AX, CX; ADCXQ BX, r3; ADOXQ AX, r3 \
MULXQ ·p+32(SB), AX, BX; ADCXQ CX, r4; ADOXQ AX, r4 \
MULXQ ·p+40(SB), AX, CX; ADCXQ BX, r5; ADOXQ AX, r5 \
MULXQ ·p+48(SB), AX, BX; ADCXQ CX, r6; ADOXQ AX, r6 \
MULXQ ·p+56(SB), AX, CX; ADCXQ BX, r7; ADOXQ AX, r7 \
MOVQ $0, AX ; ADCXQ CX, r8; ADOXQ AX, r8 \
\ // Multiplication step
MOVQ (8*idx)(DI), DX \
\
XORQ AX, AX \
MULXQ ( 0)(SI), AX, BX; ADOXQ AX, r0 \
MULXQ ( 8)(SI), AX, CX; ADCXQ BX, r1; ADOXQ AX, r1 \
MULXQ (16)(SI), AX, BX; ADCXQ CX, r2; ADOXQ AX, r2 \
MULXQ (24)(SI), AX, CX; ADCXQ BX, r3; ADOXQ AX, r3 \
MULXQ (32)(SI), AX, BX; ADCXQ CX, r4; ADOXQ AX, r4 \
MULXQ (40)(SI), AX, CX; ADCXQ BX, r5; ADOXQ AX, r5 \
MULXQ (48)(SI), AX, BX; ADCXQ CX, r6; ADOXQ AX, r6 \
MULXQ (56)(SI), AX, CX; ADCXQ BX, r7; ADOXQ AX, r7 \
MOVQ $0, AX ; ADCXQ CX, r8; ADOXQ AX, r8
MULS_MULX_512(0, R8, R9, R10, R11, R12, R13, R14, R15, BP)
MULS_MULX_512(1, R9, R10, R11, R12, R13, R14, R15, BP, R8)
MULS_MULX_512(2, R10, R11, R12, R13, R14, R15, BP, R8, R9)
MULS_MULX_512(3, R11, R12, R13, R14, R15, BP, R8, R9, R10)
MULS_MULX_512(4, R12, R13, R14, R15, BP, R8, R9, R10, R11)
MULS_MULX_512(5, R13, R14, R15, BP, R8, R9, R10, R11, R12)
MULS_MULX_512(6, R14, R15, BP, R8, R9, R10, R11, R12, R13)
MULS_MULX_512(7, R15, BP, R8, R9, R10, R11, R12, R13, R14)
#undef MULS_MULX_512
MOVQ x+0(FP), DI
MOVQ BP, ( 0)(DI)
MOVQ R8, ( 8)(DI)
MOVQ R9, (16)(DI)
MOVQ R10, (24)(DI)
MOVQ R11, (32)(DI)
MOVQ R12, (40)(DI)
MOVQ R13, (48)(DI)
MOVQ R14, (56)(DI)
MOVQ 24(SP), BP
// NOW DI needs to be reduced if > p
RET

13
dh/csidh/fp511_decl.go Normal file
View File

@ -0,0 +1,13 @@
package csidh
//go:noescape
func mul512(a, b *fp, c uint64)
//go:noescape
func mul576(a *[9]uint64, b *fp, c uint64)
//go:noescape
func cswap512(x, y *fp, choice uint8)
//go:noescape
func mulBmiAsm(res, x, y *fp)

85
dh/csidh/fp511_generic.go Normal file
View File

@ -0,0 +1,85 @@
package csidh
import "math/bits"
// OZAPTF: this should be compiled only when generic
func locMul512(r, m1 []uint64, m2 uint64) uint64 {
var c, h, l uint64
c, r[0] = bits.Mul64(m2, m1[0])
h, l = bits.Mul64(m2, m1[1])
r[1], c = bits.Add64(l, c, 0)
c = h + c
h, l = bits.Mul64(m2, m1[2])
r[2], c = bits.Add64(l, c, 0)
c = h + c
h, l = bits.Mul64(m2, m1[3])
r[3], c = bits.Add64(l, c, 0)
c = h + c
h, l = bits.Mul64(m2, m1[4])
r[4], c = bits.Add64(l, c, 0)
c = h + c
h, l = bits.Mul64(m2, m1[5])
r[5], c = bits.Add64(l, c, 0)
c = h + c
h, l = bits.Mul64(m2, m1[6])
r[6], c = bits.Add64(l, c, 0)
c = h + c
h, l = bits.Mul64(m2, m1[7])
r[7], c = bits.Add64(l, c, 0)
return c
}
func mul576Gen(r *[9]uint64, m1 *fp, m2 uint64) {
var c, h, l uint64
c, r[0] = bits.Mul64(m2, m1[0])
h, l = bits.Mul64(m2, m1[1])
r[1], c = bits.Add64(l, c, 0)
c = h + c
h, l = bits.Mul64(m2, m1[2])
r[2], c = bits.Add64(l, c, 0)
c = h + c
h, l = bits.Mul64(m2, m1[3])
r[3], c = bits.Add64(l, c, 0)
c = h + c
h, l = bits.Mul64(m2, m1[4])
r[4], c = bits.Add64(l, c, 0)
c = h + c
h, l = bits.Mul64(m2, m1[5])
r[5], c = bits.Add64(l, c, 0)
c = h + c
h, l = bits.Mul64(m2, m1[6])
r[6], c = bits.Add64(l, c, 0)
c = h + c
h, l = bits.Mul64(m2, m1[7])
r[7], c = bits.Add64(l, c, 0)
r[8], c = bits.Add64(h, c, 0)
r[8] += c
}
func cswap512Gen(x, y *fp, choice uint8) {
var tmp uint64
mask64 := 0 - uint64(choice)
for i := 0; i < numWords; i++ {
tmp = mask64 & (x[i] ^ y[i])
x[i] = tmp ^ x[i]
y[i] = tmp ^ y[i]
}
}

431
dh/csidh/fp511_test.go Normal file
View File

@ -0,0 +1,431 @@
package csidh
import (
"math/big"
"testing"
mrand "math/rand"
"golang.org/x/sys/cpu"
)
func resetCpuFeatures() {
hasBMI2 = cpu.X86.HasBMI2
hasADXandBMI2 = cpu.X86.HasBMI2 && cpu.X86.HasADX
}
func testFp512Mul3_Nominal(t *testing.T) {
var multiplier64 uint64
var mod big.Int
// modulus: 2^512
mod.SetUint64(1).Lsh(&mod, 512)
for i := 0; i < kNumIter; i++ {
multiplier64 = mrand.Uint64()
fV := randomFp()
exp, _ := new(big.Int).SetString(fp2S(fV), 16)
exp.Mul(exp, new(big.Int).SetUint64(multiplier64))
// Truncate to 512 bits
exp.Mod(exp, &mod)
mul512(&fV, &fV, multiplier64)
res, _ := new(big.Int).SetString(fp2S(fV), 16)
if exp.Cmp(res) != 0 {
t.Errorf("%X != %X", exp, res)
}
}
}
// Check if mul512 produces result
// z = x*y mod 2^512
func TestFp512Mul3_Nominal(t *testing.T) {
hasBMI2 = false
testFp512Mul3_Nominal(t)
resetCpuFeatures()
testFp512Mul3_Nominal(t)
}
func TestAddRdc_Random(t *testing.T) {
for i := 0; i < kNumIter; i++ {
a := randomFp()
bigA, _ := new(big.Int).SetString(fp2S(a), 16)
bigA.Mod(bigA, kModulus)
copy(a[:], intGetU64(bigA))
b := randomFp()
bigB, _ := new(big.Int).SetString(fp2S(b), 16)
bigB.Mod(bigB, kModulus)
copy(b[:], intGetU64(bigB))
addRdc(&a, &a, &b)
bigRet, _ := new(big.Int).SetString(fp2S(a), 16)
bigA.Add(bigA, bigB)
bigA.Mod(bigA, kModulus)
if bigRet.Cmp(bigA) != 0 {
t.Errorf("%X != %X", bigRet, bigA)
}
}
}
func TestAddRdc_Nominal(t *testing.T) {
var res fp
tmp := OneFp512
addRdc(&res, &tmp, &p)
if !ceq512(&res, &tmp) {
t.Errorf("Wrong value\n%X", res)
}
tmp = ZeroFp512
addRdc(&res, &p, &p)
if !ceq512(&res, &p) {
t.Errorf("Wrong value\n%X", res)
}
tmp = fp{1, 1, 1, 1, 1, 1, 1, 1}
addRdc(&res, &p, &tmp)
if !ceq512(&res, &tmp) {
t.Errorf("Wrong value\n%X", res)
}
tmp = fp{1, 1, 1, 1, 1, 1, 1, 1}
exp := fp{2, 2, 2, 2, 2, 2, 2, 2}
addRdc(&res, &tmp, &tmp)
if !ceq512(&res, &exp) {
t.Errorf("Wrong value\n%X", res)
}
}
func TestFp512Sub3_Nominal(t *testing.T) {
var ret fp
var mod big.Int
// modulus: 2^512
mod.SetUint64(1).Lsh(&mod, 512)
for i := 0; i < kNumIter; i++ {
a := randomFp()
bigA, _ := new(big.Int).SetString(fp2S(a), 16)
b := randomFp()
bigB, _ := new(big.Int).SetString(fp2S(b), 16)
sub512(&ret, &a, &b)
bigRet, _ := new(big.Int).SetString(fp2S(ret), 16)
bigA.Sub(bigA, bigB)
// Truncate to 512 bits
bigA.Mod(bigA, &mod)
if bigRet.Cmp(bigA) != 0 {
t.Errorf("%X != %X", bigRet, bigA)
}
}
}
func TestFp512Sub3_DoesntReturnCarry(t *testing.T) {
a := fp{}
b := fp{
0xFFFFFFFFFFFFFFFF, 1,
0, 0,
0, 0,
0, 0}
c := fp{
0xFFFFFFFFFFFFFFFF, 2,
0, 0,
0, 0,
0, 0}
if sub512(&a, &b, &c) != 1 {
t.Error("Carry not returned")
}
}
func TestFp512Sub3_ReturnsCarry(t *testing.T) {
a := fp{}
b := fp{
0xFFFFFFFFFFFFFFFF, 2,
0, 0,
0, 0,
0, 0}
c := fp{
0xFFFFFFFFFFFFFFFF, 1,
0, 0,
0, 0,
0, 0}
if sub512(&a, &b, &c) != 0 {
t.Error("Carry not returned")
}
}
func TestCswap(t *testing.T) {
arg1 := randomFp()
arg2 := randomFp()
arg1cpy := arg1
cswap512(&arg1, &arg2, 0)
if !ceq512(&arg1, &arg1cpy) {
t.Error("cswap swapped")
}
arg1cpy = arg1
cswap512(&arg1, &arg2, 1)
if ceq512(&arg1, &arg1cpy) {
t.Error("cswap didn't swapped")
}
arg1cpy = arg1
cswap512(&arg1, &arg2, 0xF2)
if ceq512(&arg1, &arg1cpy) {
t.Error("cswap didn't swapped")
}
}
func TestSubRdc(t *testing.T) {
var res fp
// 1 - 1 mod P
tmp := OneFp512
subRdc(&res, &tmp, &tmp)
if !ceq512(&res, &ZeroFp512) {
t.Errorf("Wrong value\n%X", res)
}
zero(&res)
// 0 - 1 mod P
exp := p
exp[0]--
subRdc(&res, &ZeroFp512, &OneFp512)
if !ceq512(&res, &exp) {
t.Errorf("Wrong value\n%X\n%X", res, exp)
}
zero(&res)
// P - (P-1)
pMinusOne := p
pMinusOne[0]--
subRdc(&res, &p, &pMinusOne)
if !ceq512(&res, &OneFp512) {
t.Errorf("Wrong value\n[%X != %X]", res, OneFp512)
}
zero(&res)
subRdc(&res, &p, &OneFp512)
if !ceq512(&res, &pMinusOne) {
t.Errorf("Wrong value\n[%X != %X]", res, pMinusOne)
}
}
func testMulRdc(t *testing.T) {
var res fp
var m1 = fp{
0x85E2579C786882D0, 0x4E3433657E18DA95,
0x850AE5507965A0B3, 0xA15BC4E676475964}
var m2 = fp{
0x85E2579C786882CF, 0x4E3433657E18DA95,
0x850AE5507965A0B3, 0xA15BC4E676475964}
// Expected
var m1m1 = fp{
0xAEBF46E92C88A4B4, 0xCFE857977B946347,
0xD3B264FF08493901, 0x6EEB3D23746B6C7C,
0xC0CA874A349D64B4, 0x7AD4A38B406F8504,
0x38B6B6CEB82472FB, 0x1587015FD7DDFC7D}
var m1m2 = fp{
0x51534771258C4624, 0x2BFEDE86504E2160,
0xE8127D5E9329670B, 0x0C84DBD584491D75,
0x656C73C68B16E38C, 0x01C0DA470B30B8DE,
0x2532E3903EAA950B, 0x3F2C28EA97FE6FEC}
// 0*0
tmp := ZeroFp512
mulRdc(&res, &tmp, &tmp)
if !ceq512(&res, &tmp) {
t.Errorf("Wrong value\n%X", res)
}
// 1*m1 == m1
zero(&res)
mulRdc(&res, &m1, &one)
if !ceq512(&res, &m1) {
t.Errorf("Wrong value\n%X", res)
}
// m1*m2 < p
zero(&res)
mulRdc(&res, &m1, &m2)
if !ceq512(&res, &m1m2) {
t.Errorf("Wrong value\n%X", res)
}
// m1*m1 > p
zero(&res)
mulRdc(&res, &m1, &m1)
if !ceq512(&res, &m1m1) {
t.Errorf("Wrong value\n%X", res)
}
}
func TestMulRdc(t *testing.T) {
hasADXandBMI2 = false
testMulRdc(t)
resetCpuFeatures()
testMulRdc(t)
}
func TestModExp(t *testing.T) {
var resExp, base, exp big.Int
var baseFp, expFp, resFp, resFpExp fp
for i := 0; i < kNumIter; i++ {
// Perform modexp with reference implementation
// in Montgomery domain
base.SetString(fp2S(randomFp()), 16)
exp.SetString(fp2S(randomFp()), 16)
resExp.Exp(&base, &exp, kModulus)
toMont(&base, true)
toMont(&resExp, true)
// Convert to fp
copy(baseFp[:], intGetU64(&base))
copy(expFp[:], intGetU64(&exp))
copy(resFpExp[:], intGetU64(&resExp))
// Perform modexp with our implementation
modExpRdc512(&resFp, &baseFp, &expFp)
if !ceq512(&resFp, &resFpExp) {
t.Errorf("Wrong value\n%X!=%X", resFp, intGetU64(&resExp))
}
}
}
// Test uses Euler's Criterion
func TestIsNonQuadRes(t *testing.T) {
var n, nMont big.Int
var pm1o2, rawP big.Int
var nMontFp fp
// (p-1)/2
pm1o2.SetString("0x32da4747ba07c4dffe455868af1f26255a16841d76e446212d7dfe63499164e6d3d56362b3f9aa83a8b398660f85a792e1390dfa2bd6541a8dc0dc8299e3643d", 0)
// modulus value (not in montgomery)
rawP.SetString("0x65b48e8f740f89bffc8ab0d15e3e4c4ab42d083aedc88c425afbfcc69322c9cda7aac6c567f35507516730cc1f0b4f25c2721bf457aca8351b81b90533c6c87b", 0)
// There is 641 quadratic residues in this range
for i := uint64(1); i < 1000; i++ {
n.SetUint64(i)
n.Exp(&n, &pm1o2, &rawP)
// exp == 1 iff n is quadratic non-residue
exp := n.Cmp(big.NewInt(1))
if exp < 0 {
panic("Should never happen")
}
nMont.SetUint64(i)
toMont(&nMont, true)
copy(nMontFp[:], intGetU64(&nMont))
ret := nMontFp.isNonQuadRes()
if ret != exp {
toMont(&nMont, false)
t.Errorf("Test failed for value %s", nMont.Text(10))
}
}
}
func TestCheckSmaller(t *testing.T) {
// p-1
var pMin1 = p
pMin1[0] -= 1
// p-1 < p => 1
if !isLess(&pMin1, &p) {
t.Error("pMin1>p")
}
// p < p-1 => 0
if isLess(&p, &pMin1) {
t.Error("p>pMin1")
}
// p == p => 0
if isLess(&p, &p) {
t.Error("p==p")
}
}
func BenchmarkFp512Sub(b *testing.B) {
var arg1 fp
arg2, arg3 := randomFp(), randomFp()
for n := 0; n < b.N; n++ {
sub512(&arg1, &arg2, &arg3)
}
}
func BenchmarkFp512Mul(b *testing.B) {
var arg1 = mrand.Uint64()
arg2, arg3 := randomFp(), randomFp()
for n := 0; n < b.N; n++ {
mul512(&arg2, &arg3, arg1)
}
}
func BenchmarkCSwap(b *testing.B) {
arg1 := randomFp()
arg2 := randomFp()
for n := 0; n < b.N; n++ {
cswap512(&arg1, &arg2, uint8(n%2))
}
}
func BenchmarkAddRdc(b *testing.B) {
var res fp
arg1 := randomFp()
arg2 := randomFp()
for n := 0; n < b.N; n++ {
addRdc(&res, &arg1, &arg2)
}
}
func BenchmarkSubRdc(b *testing.B) {
arg1 := randomFp()
arg2 := randomFp()
var res fp
for n := 0; n < b.N; n++ {
subRdc(&res, &arg1, &arg2)
}
}
func BenchmarkModExpRdc(b *testing.B) {
arg1 := randomFp()
arg2 := randomFp()
var res fp
for n := 0; n < b.N; n++ {
modExpRdc512(&res, &arg1, &arg2)
}
}
func BenchmarkMulBmiAsm(b *testing.B) {
arg1 := randomFp()
arg2 := randomFp()
var res fp
for n := 0; n < b.N; n++ {
mulBmiAsm(&res, &arg1, &arg2)
}
}
func BenchmarkMulGeneric(b *testing.B) {
arg1 := randomFp()
arg2 := randomFp()
var res fp
for n := 0; n < b.N; n++ {
mulGeneric(&res, &arg1, &arg2)
}
}

1197
dh/csidh/testdata/csidh_testvectors.dat vendored Normal file

File diff suppressed because it is too large Load Diff

156
dh/csidh/utils_test.go Normal file
View File

@ -0,0 +1,156 @@
package csidh
import (
"fmt"
"math/big"
mrand "math/rand"
)
// Commonly used variables
var (
// Number of interations
kNumIter = 1000
// Modulus
kModulus, _ = new(big.Int).SetString(fp2S(p), 16)
// Zero in fp
ZeroFp512 = fp{}
// One in fp
OneFp512 = fp{1, 0, 0, 0, 0, 0, 0, 0}
)
// Converts dst to Montgomery if "toMont==true" or from Montgomery domain otherwise.
func toMont(dst *big.Int, toMont bool) {
var bigP, bigR big.Int
intSetU64(&bigP, p[:])
bigR.SetUint64(1)
bigR.Lsh(&bigR, 512)
if !toMont {
bigR.ModInverse(&bigR, &bigP)
}
dst.Mul(dst, &bigR)
dst.Mod(dst, &bigP)
}
func fp2S(v fp) string {
var str string
for i := 0; i < 8; i++ {
str = fmt.Sprintf("%016x", v[i]) + str
}
return str
}
// zeroize fp
func zero(v *fp) {
for i, _ := range *v {
v[i] = 0
}
}
// returns random value in a range (0,p)
func randomFp() fp {
var u fp
for i := 0; i < 8; i++ {
u[i] = mrand.Uint64()
}
return u
}
// x<y: <0
// x>y: >0
// x==y: 0
func cmp512(x, y *fp) int {
if len(*x) == len(*y) {
for i := len(*x) - 1; i >= 0; i-- {
if x[i] < y[i] {
return -1
} else if x[i] > y[i] {
return 1
}
}
return 0
}
return len(*x) - len(*y)
}
// return x==y for fp
func ceqFp(l, r *fp) bool {
for idx, _ := range l {
if l[idx] != r[idx] {
return false
}
}
return true
}
// return x==y for point
func ceqpoint(l, r *point) bool {
return ceqFp(&l.x, &r.x) && ceqFp(&l.z, &r.z)
}
// return x==y
func ceq512(x, y *fp) bool {
return cmp512(x, y) == 0
}
// Converst src to big.Int. Function assumes that src is a slice of uint64
// values encoded in little-endian byte order.
func intSetU64(dst *big.Int, src []uint64) *big.Int {
var tmp big.Int
dst.SetUint64(0)
for i, _ := range src {
tmp.SetUint64(src[i])
tmp.Lsh(&tmp, uint(i*64))
dst.Add(dst, &tmp)
}
return dst
}
// Convers src to an array of uint64 values encoded in little-endian
// byte order.
func intGetU64(src *big.Int) []uint64 {
var tmp, mod big.Int
dst := make([]uint64, (src.BitLen()/64)+1)
u64 := uint64(0)
u64--
mod.SetUint64(u64)
for i := 0; i < (src.BitLen()/64)+1; i++ {
tmp.Set(src)
tmp.Rsh(&tmp, uint(i)*64)
tmp.And(&tmp, &mod)
dst[i] = tmp.Uint64()
}
return dst
}
// Returns projective coordinate X of normalized EC 'point' (point.x / point.z).
func toNormX(point *point) big.Int {
var bigP, bigDnt, bigDor big.Int
intSetU64(&bigP, p[:])
intSetU64(&bigDnt, point.x[:])
intSetU64(&bigDor, point.z[:])
bigDor.ModInverse(&bigDor, &bigP)
bigDnt.Mul(&bigDnt, &bigDor)
bigDnt.Mod(&bigDnt, &bigP)
return bigDnt
}
// Converts string to fp element in Montgomery domain of cSIDH-512
func toFp(num string) fp {
var tmp big.Int
var ok bool
var ret fp
_, ok = tmp.SetString(num, 0)
if !ok {
panic("Can't parse a number")
}
toMont(&tmp, true)
copy(ret[:], intGetU64(&tmp))
return ret
}

1197
etc/csidh_testvectors.dat Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,11 @@
FROM multiarch/debian-debootstrap:arm64-buster-slim FROM multiarch/debian-debootstrap:arm64-buster
RUN apt-get upgrade -y
RUN apt-get update -qq RUN apt-get update -qq
USER root USER root
RUN apt-get install -y make golang RUN apt-get install -y make wget ca-certificates
RUN rm -rf /var/lib/apt/lists/* RUN wget https://dl.google.com/go/go1.12.5.linux-arm64.tar.gz
RUN tar -xzf go1.12.5.linux-arm64.tar.gz
RUN mv go /usr/local/
RUN ln -s /usr/local/go/bin/go /usr/bin/
RUN rm -rf /var/lib/apt/lists/* go1.12.5.linux-arm64.tar.gz

View File

@ -6,6 +6,8 @@
# Configuration # Configuration
# kWindowSize and kP34 must be specified # kWindowSize and kP34 must be specified
# #
# P434
#kP34 = [1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
# P503 # P503
kP34 = [1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] kP34 = [1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
# P751 # P751