Upphovsman | SHA1 | Meddelande | Datum |
---|---|---|---|
Henry Case | a3ccc0e275 |
WIP
WIP2 WIP |
6 år sedan |
@@ -0,0 +1,17 @@ | |||
// +build amd64,!noasm | |||
package p503 | |||
import cpu "github.com/cloudflare/p751sidh/internal/utils" | |||
// There couple of reasons for having those variables here: | |||
// 1) to have an access to them from assembly | |||
// 2) to make it easy to vendor the library | |||
// 3) make it possible to test all functionalities | |||
var useMULX bool | |||
var useADXMULX bool | |||
func init() { | |||
useMULX = cpu.HasBMI2 | |||
useADXMULX = cpu.HasADX && cpu.HasBMI2 | |||
} |
@@ -690,23 +690,26 @@ TEXT ·fp503SubReduced(SB), NOSPLIT, $0-24 | |||
RET | |||
TEXT ·mulWithMULXADX(SB), NOSPLIT, $104-24 | |||
TEXT ·fp503Mul(SB), NOSPLIT, $104-24 | |||
// Actual implementation | |||
MOVQ z+ 0(FP), CX | |||
MOVQ x+ 8(FP), REG_P2 | |||
MOVQ y+16(FP), REG_P1 | |||
MUL(CX, REG_P2, REG_P1, MULS256_MULXADX) | |||
RET | |||
MOVQ x+ 8(FP), REG_P1 | |||
MOVQ y+16(FP), REG_P2 | |||
TEXT ·mulWithMULX(SB), NOSPLIT, $104-24 | |||
// Actual implementation | |||
MOVQ z+ 0(FP), CX | |||
MOVQ x+ 8(FP), REG_P2 | |||
MOVQ y+16(FP), REG_P1 | |||
MUL(CX, REG_P2, REG_P1, MULS256_MULX) | |||
// CMPB ·useMULX(SB), $0 | |||
// JE mul | |||
// CMPB ·useMULX(SB), $1 | |||
// JE mulx | |||
// MOVB ·useADXMULX(SB), AX | |||
// TESTB AX, AX | |||
// JZ mulx_and_adx | |||
MUL(CX, REG_P1, REG_P2, MULS256_MULXADX) | |||
RET | |||
TEXT ·mul(SB), $96-24 | |||
mul: | |||
RET | |||
// Uses variant of Karatsuba method. | |||
// | |||
// Here we store the destination in CX instead of in REG_P3 because the | |||
@@ -1188,11 +1191,28 @@ TEXT ·mul(SB), $96-24 | |||
ADCQ $0, SI; MOVQ SI, (120)(CX) | |||
RET | |||
// Uses implementation optimized for CPU supporting carry-less | |||
// multiplayer (MULX) and two independent carry-chains (ADOX/ADCX) | |||
mulx_and_adx: | |||
MUL(CX, REG_P1, REG_P2, MULS256_MULXADX) | |||
RET | |||
// Uses implementation optimized for CPU supporting carry-less | |||
mulx: | |||
MUL(CX, REG_P1, REG_P2, MULS256_MULX) | |||
RET | |||
TEXT ·redc(SB), $0-16 | |||
TEXT ·fp503MontgomeryReduce(SB), $0-16 | |||
MOVQ z+0(FP), REG_P2 | |||
MOVQ x+8(FP), REG_P1 | |||
REDC(REG_P2, REG_P1, MULS_128x320_MULX) | |||
RET | |||
CMPB ·useADXMULX(SB), $1 | |||
JE redc_with_mulx_adx | |||
CMPB ·useMULX(SB), $1 | |||
JE redc_with_mulx | |||
MOVQ (REG_P1), R11 | |||
MOVQ P503P1_3, AX | |||
MULQ R11 | |||
@@ -1497,17 +1517,12 @@ TEXT ·redc(SB), $0-16 | |||
MOVQ R10, (56)(REG_P2) // Z7 | |||
RET | |||
TEXT ·redcWithMULX(SB), $0-16 | |||
MOVQ z+0(FP), DI | |||
MOVQ x+8(FP), SI | |||
REDC(DI, SI, MULS_128x320_MULX) | |||
redc_with_mulx: | |||
REDC(REG_P2, REG_P1, MULS_128x320_MULX) | |||
RET | |||
TEXT ·redcWithMULXADX(SB), $0-16 | |||
MOVQ z+0(FP), DI | |||
MOVQ x+8(FP), SI | |||
REDC(DI, SI, MULS_128x320_MULXADX) | |||
redc_with_mulx_adx: | |||
REDC(REG_P2, REG_P1, MULS_128x320_MULXADX) | |||
RET | |||
TEXT ·fp503AddLazy(SB), NOSPLIT, $0-24 | |||
@@ -2,14 +2,16 @@ | |||
package p503 | |||
/* | |||
import ( | |||
. "github.com/cloudflare/p751sidh/internal/isogeny" | |||
cpu "github.com/cloudflare/p751sidh/internal/utils" | |||
// cpu "github.com/cloudflare/p751sidh/internal/utils" | |||
"reflect" | |||
"testing" | |||
"testing/quick" | |||
) | |||
*/ | |||
/* | |||
// Utility function used for testing Mul implementations. Tests caller provided | |||
// mulFunc against mul() | |||
func testMul(t *testing.T, mulFunc func(z *FpElementX2, x, y *FpElement)) { | |||
@@ -40,7 +42,8 @@ func testRedc(t *testing.T, redcFunc func(z *FpElement, x *FpElementX2)) { | |||
t.Error(err) | |||
} | |||
} | |||
*/ | |||
/* | |||
// Ensures corretness of implementation of mul operation which uses MULX | |||
func TestMulWithMULX(t *testing.T) { | |||
if !cpu.HasBMI2 { | |||
@@ -72,3 +75,4 @@ func TestRedcWithMULXADX(t *testing.T) { | |||
} | |||
testRedc(t, redcWithMULXADX) | |||
} | |||
*/ |
@@ -4,7 +4,6 @@ package p503 | |||
import ( | |||
. "github.com/cloudflare/p751sidh/internal/isogeny" | |||
cpu "github.com/cloudflare/p751sidh/internal/utils" | |||
) | |||
// If choice = 0, leave x,y unchanged. If choice = 1, set x,y = y,x. | |||
@@ -41,24 +40,24 @@ func fp503StrongReduce(x *FpElement) | |||
// Concrete implementation depends on capabilities of the CPU which | |||
// are resolved at runtime. CPUs with ADCX, ADOX and MULX support | |||
// run most optimized implementation | |||
var fp503Mul func(z *FpElementX2, x, y *FpElement) | |||
// Mul implementattion for legacy CPUs | |||
//go:noescape | |||
func mul(z *FpElementX2, x, y *FpElement) | |||
// Mul implementation for CPUs supporting carry-less MULX multiplier. | |||
//go:noescape | |||
func mulWithMULX(z *FpElementX2, x, y *FpElement) | |||
// Mul implementation for CPUs supporting two independent carry chain | |||
// (ADOX/ADCX) instructions and carry-less MULX multiplier | |||
//go:noescape | |||
func mulWithMULXADX(z *FpElementX2, x, y *FpElement) | |||
func fp503Mul(z *FpElementX2, x, y *FpElement) | |||
// // Mul implementattion for legacy CPUs | |||
// //go:noescape | |||
// func mul(z *FpElementX2, x, y *FpElement) | |||
// | |||
// // Mul implementation for CPUs supporting carry-less MULX multiplier. | |||
// //go:noescape | |||
// func mulWithMULX(z *FpElementX2, x, y *FpElement) | |||
// | |||
// // Mul implementation for CPUs supporting two independent carry chain | |||
// // (ADOX/ADCX) instructions and carry-less MULX multiplier | |||
// //go:noescape | |||
// func mulWithMULXADX(z *FpElementX2, x, y *FpElement) | |||
// Computes the Montgomery reduction z = x R^{-1} (mod 2*p). On return value | |||
// of x may be changed. z=x not allowed. | |||
var fp503MontgomeryReduce func(z *FpElement, x *FpElementX2) | |||
func fp503MontgomeryReduce(z *FpElement, x *FpElementX2) | |||
func redc(z *FpElement, x *FpElementX2) | |||
@@ -71,19 +70,21 @@ func redcWithMULX(z *FpElement, x *FpElementX2) | |||
//go:noescape | |||
func redcWithMULXADX(z *FpElement, x *FpElementX2) | |||
/* | |||
// On initialization, set the fp503Mul function pointer to the | |||
// fastest implementation depending on CPU capabilities. | |||
func init() { | |||
if cpu.HasBMI2 { | |||
if cpu.HasADX { | |||
fp503Mul = mulWithMULXADX | |||
// fp503Mul = mulWithMULXADX | |||
fp503MontgomeryReduce = redcWithMULXADX | |||
} else { | |||
fp503Mul = mulWithMULX | |||
// fp503Mul = mulWithMULX | |||
fp503MontgomeryReduce = redcWithMULX | |||
} | |||
} else { | |||
fp503Mul = mul | |||
// fp503Mul = mul | |||
fp503MontgomeryReduce = redc | |||
} | |||
} | |||
*/ |