@@ -10,8 +10,8 @@ package utils | |||||
// Signals support for MULX which is in BMI2 | // Signals support for MULX which is in BMI2 | ||||
var HasBMI2 bool | var HasBMI2 bool | ||||
// Signals support for MULX and BMI2 | |||||
var HasADX bool | |||||
// Signals support for ADX and BMI2 | |||||
var HasADXandBMI2 bool | |||||
// Performs CPUID and returns values of registers | // Performs CPUID and returns values of registers | ||||
// go:nosplit | // go:nosplit | ||||
@@ -22,7 +22,7 @@ func bitn(bits uint32, n uint8) bool { | |||||
return (bits>>n)&1 == 1 | return (bits>>n)&1 == 1 | ||||
} | } | ||||
func init() { | |||||
func RecognizeCpu() { | |||||
// CPUID returns max possible input that can be requested | // CPUID returns max possible input that can be requested | ||||
max, _, _, _ := cpuid(0, 0) | max, _, _, _ := cpuid(0, 0) | ||||
if max < 7 { | if max < 7 { | ||||
@@ -31,5 +31,9 @@ func init() { | |||||
_, ebx, _, _ := cpuid(7, 0) | _, ebx, _, _ := cpuid(7, 0) | ||||
HasBMI2 = bitn(ebx, 19) | HasBMI2 = bitn(ebx, 19) | ||||
HasADX = bitn(ebx, 7) | |||||
HasADXandBMI2 = bitn(ebx, 7) && HasBMI2 | |||||
} | |||||
func init() { | |||||
RecognizeCpu() | |||||
} | } |
@@ -1,24 +0,0 @@ | |||||
// +build amd64,!noasm | |||||
package p503 | |||||
import cpu "github.com/cloudflare/sidh/internal/utils" | |||||
// There couple of reasons for having those variables here: | |||||
// * to have an access to them from assembly | |||||
// TODO(kk): Is there a way to access variable from different package? | |||||
// If it is then probably this file could be moved to internal | |||||
// and we don't need to have many copies of that | |||||
// * make it easy to vendor the library | |||||
// * make it possible to test all functionalities | |||||
var useMULX bool | |||||
var useADXMULX bool | |||||
func recognizecpu() { | |||||
useMULX = cpu.HasBMI2 | |||||
useADXMULX = cpu.HasADX && cpu.HasBMI2 | |||||
} | |||||
func init() { | |||||
recognizecpu() | |||||
} |
@@ -697,9 +697,9 @@ TEXT ·fp503Mul(SB), NOSPLIT, $104-24 | |||||
MOVQ y+16(FP), REG_P2 | MOVQ y+16(FP), REG_P2 | ||||
// Check wether to use optimized implementation | // Check wether to use optimized implementation | ||||
CMPB ·useADXMULX(SB), $1 | |||||
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1 | |||||
JE mul_with_mulx_adx | JE mul_with_mulx_adx | ||||
CMPB ·useMULX(SB), $1 | |||||
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1 | |||||
JE mul_with_mulx | JE mul_with_mulx | ||||
// Generic x86 implementation (below) uses variant of Karatsuba method. | // Generic x86 implementation (below) uses variant of Karatsuba method. | ||||
@@ -1194,9 +1194,9 @@ TEXT ·fp503MontgomeryReduce(SB), $0-16 | |||||
MOVQ x+8(FP), REG_P1 | MOVQ x+8(FP), REG_P1 | ||||
// Check wether to use optimized implementation | // Check wether to use optimized implementation | ||||
CMPB ·useADXMULX(SB), $1 | |||||
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1 | |||||
JE redc_with_mulx_adx | JE redc_with_mulx_adx | ||||
CMPB ·useMULX(SB), $1 | |||||
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1 | |||||
JE redc_with_mulx | JE redc_with_mulx | ||||
MOVQ (REG_P1), R11 | MOVQ (REG_P1), R11 | ||||
@@ -13,26 +13,26 @@ import ( | |||||
type OptimFlag uint | type OptimFlag uint | ||||
const ( | const ( | ||||
kUse_MUL OptimFlag = 1 << 0 | |||||
kUse_MULX = 1 << 1 | |||||
kUse_MULXADX = 1 << 2 | |||||
kUse_MUL OptimFlag = 1 << 0 | |||||
kUse_MULX = 1 << 1 | |||||
kUse_BMI2andADX = 1 << 2 | |||||
) | ) | ||||
// Utility function used for testing Mul implementations. Tests caller provided | // Utility function used for testing Mul implementations. Tests caller provided | ||||
// mulFunc against mul() | // mulFunc against mul() | ||||
func testMul(t *testing.T, f1, f2 OptimFlag) { | func testMul(t *testing.T, f1, f2 OptimFlag) { | ||||
doMulTest := func(multiplier, multiplicant FpElement) bool { | doMulTest := func(multiplier, multiplicant FpElement) bool { | ||||
defer recognizecpu() | |||||
defer cpu.RecognizeCpu() | |||||
var resMulRef, resMulOptim FpElementX2 | var resMulRef, resMulOptim FpElementX2 | ||||
// Compute multiplier*multiplicant with first implementation | // Compute multiplier*multiplicant with first implementation | ||||
useMULX = (kUse_MULX & f1) == kUse_MULX | |||||
useADXMULX = (kUse_MULXADX & f1) == kUse_MULXADX | |||||
cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX | |||||
cpu.HasADXandBMI2 = (kUse_BMI2andADX & f1) == kUse_BMI2andADX | |||||
fp503Mul(&resMulOptim, &multiplier, &multiplicant) | fp503Mul(&resMulOptim, &multiplier, &multiplicant) | ||||
// Compute multiplier*multiplicant with second implementation | // Compute multiplier*multiplicant with second implementation | ||||
useMULX = (kUse_MULX & f2) == kUse_MULX | |||||
useADXMULX = (kUse_MULXADX & f2) == kUse_MULXADX | |||||
cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX | |||||
cpu.HasADXandBMI2 = (kUse_BMI2andADX & f2) == kUse_BMI2andADX | |||||
fp503Mul(&resMulRef, &multiplier, &multiplicant) | fp503Mul(&resMulRef, &multiplier, &multiplicant) | ||||
// Compare results | // Compare results | ||||
@@ -48,18 +48,18 @@ func testMul(t *testing.T, f1, f2 OptimFlag) { | |||||
// redcFunc against redc() | // redcFunc against redc() | ||||
func testRedc(t *testing.T, f1, f2 OptimFlag) { | func testRedc(t *testing.T, f1, f2 OptimFlag) { | ||||
doRedcTest := func(aRR FpElementX2) bool { | doRedcTest := func(aRR FpElementX2) bool { | ||||
defer recognizecpu() | |||||
defer cpu.RecognizeCpu() | |||||
var resRedcF1, resRedcF2 FpElement | var resRedcF1, resRedcF2 FpElement | ||||
var aRRcpy = aRR | var aRRcpy = aRR | ||||
// Compute redc with first implementation | // Compute redc with first implementation | ||||
useMULX = (kUse_MULX & f1) == kUse_MULX | |||||
useADXMULX = (kUse_MULXADX & f1) == kUse_MULXADX | |||||
cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX | |||||
cpu.HasADXandBMI2 = (kUse_BMI2andADX & f1) == kUse_BMI2andADX | |||||
fp503MontgomeryReduce(&resRedcF1, &aRR) | fp503MontgomeryReduce(&resRedcF1, &aRR) | ||||
// Compute redc with second implementation | // Compute redc with second implementation | ||||
useMULX = (kUse_MULX & f2) == kUse_MULX | |||||
useADXMULX = (kUse_MULXADX & f2) == kUse_MULXADX | |||||
cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX | |||||
cpu.HasADXandBMI2 = (kUse_BMI2andADX & f2) == kUse_BMI2andADX | |||||
fp503MontgomeryReduce(&resRedcF2, &aRRcpy) | fp503MontgomeryReduce(&resRedcF2, &aRRcpy) | ||||
// Compare results | // Compare results | ||||
@@ -73,7 +73,7 @@ func testRedc(t *testing.T, f1, f2 OptimFlag) { | |||||
// Ensures corretness of implementation of mul operation which uses MULX | // Ensures corretness of implementation of mul operation which uses MULX | ||||
func TestMulWithMULX(t *testing.T) { | func TestMulWithMULX(t *testing.T) { | ||||
defer recognizecpu() | |||||
defer cpu.RecognizeCpu() | |||||
if !cpu.HasBMI2 { | if !cpu.HasBMI2 { | ||||
t.Skip("MULX not supported by the platform") | t.Skip("MULX not supported by the platform") | ||||
} | } | ||||
@@ -82,25 +82,25 @@ func TestMulWithMULX(t *testing.T) { | |||||
// Ensures corretness of implementation of mul operation which uses MULX and ADOX/ADCX | // Ensures corretness of implementation of mul operation which uses MULX and ADOX/ADCX | ||||
func TestMulWithMULXADX(t *testing.T) { | func TestMulWithMULXADX(t *testing.T) { | ||||
defer recognizecpu() | |||||
if !(cpu.HasADX && cpu.HasBMI2) { | |||||
defer cpu.RecognizeCpu() | |||||
if !cpu.HasADXandBMI2 { | |||||
t.Skip("MULX, ADCX and ADOX not supported by the platform") | t.Skip("MULX, ADCX and ADOX not supported by the platform") | ||||
} | } | ||||
testMul(t, kUse_MULXADX, kUse_MUL) | |||||
testMul(t, kUse_BMI2andADX, kUse_MUL) | |||||
} | } | ||||
// Ensures corretness of implementation of mul operation which uses MULX and ADOX/ADCX | // Ensures corretness of implementation of mul operation which uses MULX and ADOX/ADCX | ||||
func TestMulWithMULXADXAgainstMULX(t *testing.T) { | func TestMulWithMULXADXAgainstMULX(t *testing.T) { | ||||
defer recognizecpu() | |||||
if !(cpu.HasADX && cpu.HasBMI2) { | |||||
defer cpu.RecognizeCpu() | |||||
if !cpu.HasADXandBMI2 { | |||||
t.Skip("MULX, ADCX and ADOX not supported by the platform") | t.Skip("MULX, ADCX and ADOX not supported by the platform") | ||||
} | } | ||||
testMul(t, kUse_MULX, kUse_MULXADX) | |||||
testMul(t, kUse_MULX, kUse_BMI2andADX) | |||||
} | } | ||||
// Ensures corretness of Montgomery reduction implementation which uses MULX | // Ensures corretness of Montgomery reduction implementation which uses MULX | ||||
func TestRedcWithMULX(t *testing.T) { | func TestRedcWithMULX(t *testing.T) { | ||||
defer recognizecpu() | |||||
defer cpu.RecognizeCpu() | |||||
if !cpu.HasBMI2 { | if !cpu.HasBMI2 { | ||||
t.Skip("MULX not supported by the platform") | t.Skip("MULX not supported by the platform") | ||||
} | } | ||||
@@ -110,19 +110,19 @@ func TestRedcWithMULX(t *testing.T) { | |||||
// Ensures corretness of Montgomery reduction implementation which uses MULX | // Ensures corretness of Montgomery reduction implementation which uses MULX | ||||
// and ADX | // and ADX | ||||
func TestRedcWithMULXADX(t *testing.T) { | func TestRedcWithMULXADX(t *testing.T) { | ||||
defer recognizecpu() | |||||
if !(cpu.HasADX && cpu.HasBMI2) { | |||||
defer cpu.RecognizeCpu() | |||||
if !cpu.HasADXandBMI2 { | |||||
t.Skip("MULX, ADCX and ADOX not supported by the platform") | t.Skip("MULX, ADCX and ADOX not supported by the platform") | ||||
} | } | ||||
testRedc(t, kUse_MULXADX, kUse_MUL) | |||||
testRedc(t, kUse_BMI2andADX, kUse_MUL) | |||||
} | } | ||||
// Ensures corretness of Montgomery reduction implementation which uses MULX | // Ensures corretness of Montgomery reduction implementation which uses MULX | ||||
// and ADX. | // and ADX. | ||||
func TestRedcWithMULXADXAgainstMULX(t *testing.T) { | func TestRedcWithMULXADXAgainstMULX(t *testing.T) { | ||||
defer recognizecpu() | |||||
if !(cpu.HasADX && cpu.HasBMI2) { | |||||
defer cpu.RecognizeCpu() | |||||
if !cpu.HasADXandBMI2 { | |||||
t.Skip("MULX, ADCX and ADOX not supported by the platform") | t.Skip("MULX, ADCX and ADOX not supported by the platform") | ||||
} | } | ||||
testRedc(t, kUse_MULXADX, kUse_MULX) | |||||
testRedc(t, kUse_BMI2andADX, kUse_MULX) | |||||
} | } |
@@ -4,6 +4,9 @@ package p503 | |||||
import ( | import ( | ||||
. "github.com/cloudflare/sidh/internal/isogeny" | . "github.com/cloudflare/sidh/internal/isogeny" | ||||
// This is imported only because arith_amd64.s needs | |||||
// some symbols from cpuid.go | |||||
_ "github.com/cloudflare/sidh/internal/utils" | |||||
) | ) | ||||
// If choice = 0, leave x,y unchanged. If choice = 1, set x,y = y,x. | // If choice = 0, leave x,y unchanged. If choice = 1, set x,y = y,x. | ||||
@@ -1,24 +0,0 @@ | |||||
// +build amd64,!noasm | |||||
package p751 | |||||
import cpu "github.com/cloudflare/sidh/internal/utils" | |||||
// There couple of reasons for having those variables here: | |||||
// * to have an access to them from assembly | |||||
// TODO(kk): Is there a way to access variable from different package? | |||||
// If it is then probably this file could be moved to internal | |||||
// and we don't need to have many copies of that | |||||
// * make it easy to vendor the library | |||||
// * make it possible to test all functionalities | |||||
var useMULX bool | |||||
var useADXMULX bool | |||||
func recognizecpu() { | |||||
useMULX = cpu.HasBMI2 | |||||
useADXMULX = cpu.HasADX && cpu.HasBMI2 | |||||
} | |||||
func init() { | |||||
recognizecpu() | |||||
} |
@@ -1740,9 +1740,9 @@ TEXT ·fp751MontgomeryReduce(SB), $0-16 | |||||
MOVQ x+8(FP), REG_P1 | MOVQ x+8(FP), REG_P1 | ||||
// Check wether to use optimized implementation | // Check wether to use optimized implementation | ||||
CMPB ·useADXMULX(SB), $1 | |||||
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1 | |||||
JE redc_with_mulx_adx | JE redc_with_mulx_adx | ||||
CMPB ·useMULX(SB), $1 | |||||
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1 | |||||
JE redc_with_mulx | JE redc_with_mulx | ||||
MOVQ (REG_P1), R11 | MOVQ (REG_P1), R11 | ||||
@@ -14,27 +14,27 @@ import ( | |||||
type OptimFlag uint | type OptimFlag uint | ||||
const ( | const ( | ||||
kUse_MUL OptimFlag = 1 << 0 | |||||
kUse_MULX = 1 << 1 | |||||
kUse_MULXADX = 1 << 2 | |||||
kUse_MUL OptimFlag = 1 << 0 | |||||
kUse_MULX = 1 << 1 | |||||
kUse_ADXandBMI2 = 1 << 2 | |||||
) | ) | ||||
// Utility function used for testing REDC implementations. Tests caller provided | // Utility function used for testing REDC implementations. Tests caller provided | ||||
// redcFunc against redc() | // redcFunc against redc() | ||||
func testRedc(t *testing.T, f1, f2 OptimFlag) { | func testRedc(t *testing.T, f1, f2 OptimFlag) { | ||||
doRedcTest := func(aRR FpElementX2) bool { | doRedcTest := func(aRR FpElementX2) bool { | ||||
defer recognizecpu() | |||||
defer cpu.RecognizeCpu() | |||||
var resRedcF1, resRedcF2 FpElement | var resRedcF1, resRedcF2 FpElement | ||||
var aRRcpy = aRR | var aRRcpy = aRR | ||||
// Compute redc with first implementation | // Compute redc with first implementation | ||||
useMULX = (kUse_MULX & f1) == kUse_MULX | |||||
useADXMULX = (kUse_MULXADX & f1) == kUse_MULXADX | |||||
cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX | |||||
cpu.HasADXandBMI2 = (kUse_ADXandBMI2 & f1) == kUse_ADXandBMI2 | |||||
fp751MontgomeryReduce(&resRedcF1, &aRR) | fp751MontgomeryReduce(&resRedcF1, &aRR) | ||||
// Compute redc with second implementation | // Compute redc with second implementation | ||||
useMULX = (kUse_MULX & f2) == kUse_MULX | |||||
useADXMULX = (kUse_MULXADX & f2) == kUse_MULXADX | |||||
cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX | |||||
cpu.HasADXandBMI2 = (kUse_ADXandBMI2 & f2) == kUse_ADXandBMI2 | |||||
fp751MontgomeryReduce(&resRedcF2, &aRRcpy) | fp751MontgomeryReduce(&resRedcF2, &aRRcpy) | ||||
// Compare results | // Compare results | ||||
@@ -48,7 +48,7 @@ func testRedc(t *testing.T, f1, f2 OptimFlag) { | |||||
// Ensures corretness of Montgomery reduction implementation which uses MULX | // Ensures corretness of Montgomery reduction implementation which uses MULX | ||||
func TestRedcWithMULX(t *testing.T) { | func TestRedcWithMULX(t *testing.T) { | ||||
defer recognizecpu() | |||||
defer cpu.RecognizeCpu() | |||||
if !cpu.HasBMI2 { | if !cpu.HasBMI2 { | ||||
t.Skip("MULX not supported by the platform") | t.Skip("MULX not supported by the platform") | ||||
} | } | ||||
@@ -58,19 +58,19 @@ func TestRedcWithMULX(t *testing.T) { | |||||
// Ensures corretness of Montgomery reduction implementation which uses MULX | // Ensures corretness of Montgomery reduction implementation which uses MULX | ||||
// and ADX | // and ADX | ||||
func TestRedcWithMULXADX(t *testing.T) { | func TestRedcWithMULXADX(t *testing.T) { | ||||
defer recognizecpu() | |||||
if !(cpu.HasADX && cpu.HasBMI2) { | |||||
defer cpu.RecognizeCpu() | |||||
if !cpu.HasADXandBMI2 { | |||||
t.Skip("MULX, ADCX and ADOX not supported by the platform") | t.Skip("MULX, ADCX and ADOX not supported by the platform") | ||||
} | } | ||||
testRedc(t, kUse_MULXADX, kUse_MUL) | |||||
testRedc(t, kUse_ADXandBMI2, kUse_MUL) | |||||
} | } | ||||
// Ensures corretness of Montgomery reduction implementation which uses MULX | // Ensures corretness of Montgomery reduction implementation which uses MULX | ||||
// and ADX. | // and ADX. | ||||
func TestRedcWithMULXADXAgainstMULX(t *testing.T) { | func TestRedcWithMULXADXAgainstMULX(t *testing.T) { | ||||
defer recognizecpu() | |||||
if !(cpu.HasADX && cpu.HasBMI2) { | |||||
defer cpu.RecognizeCpu() | |||||
if !cpu.HasADXandBMI2 { | |||||
t.Skip("MULX, ADCX and ADOX not supported by the platform") | t.Skip("MULX, ADCX and ADOX not supported by the platform") | ||||
} | } | ||||
testRedc(t, kUse_MULXADX, kUse_MULX) | |||||
testRedc(t, kUse_ADXandBMI2, kUse_MULX) | |||||
} | } |
@@ -4,6 +4,9 @@ package p751 | |||||
import ( | import ( | ||||
. "github.com/cloudflare/sidh/internal/isogeny" | . "github.com/cloudflare/sidh/internal/isogeny" | ||||
// This is imported only because arith_amd64.s needs | |||||
// some symbols from cpuid.go | |||||
_ "github.com/cloudflare/sidh/internal/utils" | |||||
) | ) | ||||
// If choice = 0, leave x,y unchanged. If choice = 1, set x,y = y,x. | // If choice = 0, leave x,y unchanged. If choice = 1, set x,y = y,x. | ||||