|
|
@@ -4,91 +4,29 @@ package p751 |
|
|
|
|
|
|
|
import ( |
|
|
|
. "github.com/cloudflare/p751sidh/internal/isogeny" |
|
|
|
. "github.com/cloudflare/p751sidh/internal/arith" |
|
|
|
) |
|
|
|
|
|
|
|
// helper used for uint128 representation |
|
|
|
type uint128 struct { |
|
|
|
H, L uint64 |
|
|
|
} |
|
|
|
|
|
|
|
// Adds 2 64bit digits in constant time. |
|
|
|
// Returns result and carry (1 or 0) |
|
|
|
func addc64(cin, a, b uint64) (ret, cout uint64) { |
|
|
|
t := a + cin |
|
|
|
ret = b + t |
|
|
|
cout = ((a & b) | ((a | b) & (^ret))) >> 63 |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
// Substracts 2 64bit digits in constant time. |
|
|
|
// Returns result and borrow (1 or 0) |
|
|
|
func subc64(bIn, a, b uint64) (ret, bOut uint64) { |
|
|
|
var tmp1 = a - b |
|
|
|
// Set bOut if bIn!=0 and tmp1==0 in constant time |
|
|
|
bOut = bIn & (1 ^ ((tmp1 | uint64(0-tmp1)) >> 63)) |
|
|
|
// Constant time check if x<y |
|
|
|
bOut |= (a ^ ((a ^ b) | (uint64(a-b) ^ b))) >> 63 |
|
|
|
ret = tmp1 - bIn |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
// Multiplies 2 64bit digits in constant time |
|
|
|
func mul64(a, b uint64) (res uint128) { |
|
|
|
var al, bl, ah, bh, albl, albh, ahbl, ahbh uint64 |
|
|
|
var res1, res2, res3 uint64 |
|
|
|
var carry, maskL, maskH, temp uint64 |
|
|
|
|
|
|
|
maskL = (^maskL) >> 32 |
|
|
|
maskH = ^maskL |
|
|
|
|
|
|
|
al = a & maskL |
|
|
|
ah = a >> 32 |
|
|
|
bl = b & maskL |
|
|
|
bh = b >> 32 |
|
|
|
|
|
|
|
albl = al * bl |
|
|
|
albh = al * bh |
|
|
|
ahbl = ah * bl |
|
|
|
ahbh = ah * bh |
|
|
|
res.L = albl & maskL |
|
|
|
|
|
|
|
res1 = albl >> 32 |
|
|
|
res2 = ahbl & maskL |
|
|
|
res3 = albh & maskL |
|
|
|
temp = res1 + res2 + res3 |
|
|
|
carry = temp >> 32 |
|
|
|
res.L ^= temp << 32 |
|
|
|
|
|
|
|
res1 = ahbl >> 32 |
|
|
|
res2 = albh >> 32 |
|
|
|
res3 = ahbh & maskL |
|
|
|
temp = res1 + res2 + res3 + carry |
|
|
|
res.H = temp & maskL |
|
|
|
carry = temp & maskH |
|
|
|
res.H ^= (ahbh & maskH) + carry |
|
|
|
return |
|
|
|
} |
|
|
|
|
|
|
|
// Compute z = x + y (mod p). |
|
|
|
func fp751AddReduced(z, x, y *FpElement) { |
|
|
|
var carry uint64 |
|
|
|
|
|
|
|
// z=x+y % p751 |
|
|
|
for i := 0; i < NumWords; i++ { |
|
|
|
z[i], carry = addc64(carry, x[i], y[i]) |
|
|
|
z[i], carry = Addc64(carry, x[i], y[i]) |
|
|
|
} |
|
|
|
|
|
|
|
// z = z - p751x2 |
|
|
|
carry = 0 |
|
|
|
for i := 0; i < NumWords; i++ { |
|
|
|
z[i], carry = subc64(carry, z[i], p751x2[i]) |
|
|
|
z[i], carry = Subc64(carry, z[i], p751x2[i]) |
|
|
|
} |
|
|
|
|
|
|
|
// z = z + p751x2 |
|
|
|
mask := uint64(0 - carry) |
|
|
|
carry = 0 |
|
|
|
for i := 0; i < NumWords; i++ { |
|
|
|
z[i], carry = addc64(carry, z[i], p751x2[i]&mask) |
|
|
|
z[i], carry = Addc64(carry, z[i], p751x2[i]&mask) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
@@ -97,14 +35,14 @@ func fp751SubReduced(z, x, y *FpElement) { |
|
|
|
var borrow uint64 |
|
|
|
|
|
|
|
for i := 0; i < NumWords; i++ { |
|
|
|
z[i], borrow = subc64(borrow, x[i], y[i]) |
|
|
|
z[i], borrow = Subc64(borrow, x[i], y[i]) |
|
|
|
} |
|
|
|
|
|
|
|
mask := uint64(0 - borrow) |
|
|
|
borrow = 0 |
|
|
|
|
|
|
|
for i := 0; i < NumWords; i++ { |
|
|
|
z[i], borrow = addc64(borrow, z[i], p751x2[i]&mask) |
|
|
|
z[i], borrow = Addc64(borrow, z[i], p751x2[i]&mask) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
@@ -128,7 +66,7 @@ func fp751ConditionalSwap(x, y *FpElement, mask uint8) { |
|
|
|
// with R=2^768. Destroys the input value. |
|
|
|
func fp751MontgomeryReduce(z *FpElement, x *FpElementX2) { |
|
|
|
var carry, t, u, v uint64 |
|
|
|
var uv uint128 |
|
|
|
var uv Uint128 |
|
|
|
var count int |
|
|
|
|
|
|
|
count = 5 // number of 0 digits in the least significat part of p751 + 1 |
|
|
@@ -136,14 +74,14 @@ func fp751MontgomeryReduce(z *FpElement, x *FpElementX2) { |
|
|
|
for i := 0; i < NumWords; i++ { |
|
|
|
for j := 0; j < i; j++ { |
|
|
|
if j < (i - count + 1) { |
|
|
|
uv = mul64(z[j], p751p1[i-j]) |
|
|
|
v, carry = addc64(0, uv.L, v) |
|
|
|
u, carry = addc64(carry, uv.H, u) |
|
|
|
uv = Mul64(z[j], p751p1[i-j]) |
|
|
|
v, carry = Addc64(0, uv.L, v) |
|
|
|
u, carry = Addc64(carry, uv.H, u) |
|
|
|
t += carry |
|
|
|
} |
|
|
|
} |
|
|
|
v, carry = addc64(0, v, x[i]) |
|
|
|
u, carry = addc64(carry, u, 0) |
|
|
|
v, carry = Addc64(0, v, x[i]) |
|
|
|
u, carry = Addc64(carry, u, 0) |
|
|
|
t += carry |
|
|
|
|
|
|
|
z[i] = v |
|
|
@@ -158,14 +96,14 @@ func fp751MontgomeryReduce(z *FpElement, x *FpElementX2) { |
|
|
|
} |
|
|
|
for j := i - NumWords + 1; j < NumWords; j++ { |
|
|
|
if j < (NumWords - count) { |
|
|
|
uv = mul64(z[j], p751p1[i-j]) |
|
|
|
v, carry = addc64(0, uv.L, v) |
|
|
|
u, carry = addc64(carry, uv.H, u) |
|
|
|
uv = Mul64(z[j], p751p1[i-j]) |
|
|
|
v, carry = Addc64(0, uv.L, v) |
|
|
|
u, carry = Addc64(carry, uv.H, u) |
|
|
|
t += carry |
|
|
|
} |
|
|
|
} |
|
|
|
v, carry = addc64(0, v, x[i]) |
|
|
|
u, carry = addc64(carry, u, 0) |
|
|
|
v, carry = Addc64(0, v, x[i]) |
|
|
|
u, carry = Addc64(carry, u, 0) |
|
|
|
|
|
|
|
t += carry |
|
|
|
z[i-NumWords] = v |
|
|
@@ -173,7 +111,7 @@ func fp751MontgomeryReduce(z *FpElement, x *FpElementX2) { |
|
|
|
u = t |
|
|
|
t = 0 |
|
|
|
} |
|
|
|
v, carry = addc64(0, v, x[2*NumWords-1]) |
|
|
|
v, carry = Addc64(0, v, x[2*NumWords-1]) |
|
|
|
z[NumWords-1] = v |
|
|
|
} |
|
|
|
|
|
|
@@ -181,13 +119,13 @@ func fp751MontgomeryReduce(z *FpElement, x *FpElementX2) { |
|
|
|
func fp751Mul(z *FpElementX2, x, y *FpElement) { |
|
|
|
var u, v, t uint64 |
|
|
|
var carry uint64 |
|
|
|
var uv uint128 |
|
|
|
var uv Uint128 |
|
|
|
|
|
|
|
for i := uint64(0); i < NumWords; i++ { |
|
|
|
for j := uint64(0); j <= i; j++ { |
|
|
|
uv = mul64(x[j], y[i-j]) |
|
|
|
v, carry = addc64(0, uv.L, v) |
|
|
|
u, carry = addc64(carry, uv.H, u) |
|
|
|
uv = Mul64(x[j], y[i-j]) |
|
|
|
v, carry = Addc64(0, uv.L, v) |
|
|
|
u, carry = Addc64(carry, uv.H, u) |
|
|
|
t += carry |
|
|
|
} |
|
|
|
z[i] = v |
|
|
@@ -198,9 +136,9 @@ func fp751Mul(z *FpElementX2, x, y *FpElement) { |
|
|
|
|
|
|
|
for i := NumWords; i < (2*NumWords)-1; i++ { |
|
|
|
for j := i - NumWords + 1; j < NumWords; j++ { |
|
|
|
uv = mul64(x[j], y[i-j]) |
|
|
|
v, carry = addc64(0, uv.L, v) |
|
|
|
u, carry = addc64(carry, uv.H, u) |
|
|
|
uv = Mul64(x[j], y[i-j]) |
|
|
|
v, carry = Addc64(0, uv.L, v) |
|
|
|
u, carry = Addc64(carry, uv.H, u) |
|
|
|
t += carry |
|
|
|
} |
|
|
|
z[i] = v |
|
|
@@ -215,7 +153,7 @@ func fp751Mul(z *FpElementX2, x, y *FpElement) { |
|
|
|
func fp751AddLazy(z, x, y *FpElement) { |
|
|
|
var carry uint64 |
|
|
|
for i := 0; i < NumWords; i++ { |
|
|
|
z[i], carry = addc64(carry, x[i], y[i]) |
|
|
|
z[i], carry = Addc64(carry, x[i], y[i]) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
@@ -223,7 +161,7 @@ func fp751AddLazy(z, x, y *FpElement) { |
|
|
|
func fp751X2AddLazy(z, x, y *FpElementX2) { |
|
|
|
var carry uint64 |
|
|
|
for i := 0; i < 2*NumWords; i++ { |
|
|
|
z[i], carry = addc64(carry, x[i], y[i]) |
|
|
|
z[i], carry = Addc64(carry, x[i], y[i]) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
@@ -231,14 +169,14 @@ func fp751X2AddLazy(z, x, y *FpElementX2) { |
|
|
|
func fp751StrongReduce(x *FpElement) { |
|
|
|
var borrow, mask uint64 |
|
|
|
for i := 0; i < NumWords; i++ { |
|
|
|
x[i], borrow = subc64(borrow, x[i], p751[i]) |
|
|
|
x[i], borrow = Subc64(borrow, x[i], p751[i]) |
|
|
|
} |
|
|
|
|
|
|
|
// Sets all bits if borrow = 1 |
|
|
|
mask = 0 - borrow |
|
|
|
borrow = 0 |
|
|
|
for i := 0; i < NumWords; i++ { |
|
|
|
x[i], borrow = addc64(borrow, x[i], p751[i]&mask) |
|
|
|
x[i], borrow = Addc64(borrow, x[i], p751[i]&mask) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
@@ -246,13 +184,13 @@ func fp751StrongReduce(x *FpElement) { |
|
|
|
func fp751X2SubLazy(z, x, y *FpElementX2) { |
|
|
|
var borrow, mask uint64 |
|
|
|
for i := 0; i < len(z); i++ { |
|
|
|
z[i], borrow = subc64(borrow, x[i], y[i]) |
|
|
|
z[i], borrow = Subc64(borrow, x[i], y[i]) |
|
|
|
} |
|
|
|
|
|
|
|
// Sets all bits if borrow = 1 |
|
|
|
mask = 0 - borrow |
|
|
|
borrow = 0 |
|
|
|
for i := NumWords; i < len(z); i++ { |
|
|
|
z[i], borrow = addc64(borrow, z[i], p751[i-NumWords]&mask) |
|
|
|
z[i], borrow = Addc64(borrow, z[i], p751[i-NumWords]&mask) |
|
|
|
} |
|
|
|
} |