From 80d9660e4dad9fcb9f69f475adbe3b9ca09167e2 Mon Sep 17 00:00:00 2001 From: Kris Kwiatkowski Date: Mon, 8 Oct 2018 12:32:44 +0100 Subject: [PATCH] removes need for useADX and useBMI2 flags --- internal/utils/cpuid.go | 12 ++++++--- p503/arith_amd64.go | 24 ------------------ p503/arith_amd64.s | 8 +++--- p503/arith_amd64_test.go | 54 ++++++++++++++++++++-------------------- p503/arith_decl.go | 3 +++ p751/arith_amd64.go | 24 ------------------ p751/arith_amd64.s | 4 +-- p751/arith_amd64_test.go | 30 +++++++++++----------- p751/arith_decl.go | 3 +++ 9 files changed, 62 insertions(+), 100 deletions(-) delete mode 100644 p503/arith_amd64.go delete mode 100644 p751/arith_amd64.go diff --git a/internal/utils/cpuid.go b/internal/utils/cpuid.go index 9a5fa73..c4eb648 100644 --- a/internal/utils/cpuid.go +++ b/internal/utils/cpuid.go @@ -10,8 +10,8 @@ package utils // Signals support for MULX which is in BMI2 var HasBMI2 bool -// Signals support for MULX and BMI2 -var HasADX bool +// Signals support for ADX and BMI2 +var HasADXandBMI2 bool // Performs CPUID and returns values of registers // go:nosplit @@ -22,7 +22,7 @@ func bitn(bits uint32, n uint8) bool { return (bits>>n)&1 == 1 } -func init() { +func RecognizeCpu() { // CPUID returns max possible input that can be requested max, _, _, _ := cpuid(0, 0) if max < 7 { @@ -31,5 +31,9 @@ func init() { _, ebx, _, _ := cpuid(7, 0) HasBMI2 = bitn(ebx, 19) - HasADX = bitn(ebx, 7) + HasADXandBMI2 = bitn(ebx, 7) && HasBMI2 +} + +func init() { + RecognizeCpu() } diff --git a/p503/arith_amd64.go b/p503/arith_amd64.go deleted file mode 100644 index ffb3a83..0000000 --- a/p503/arith_amd64.go +++ /dev/null @@ -1,24 +0,0 @@ -// +build amd64,!noasm - -package p503 - -import cpu "github.com/cloudflare/sidh/internal/utils" - -// There couple of reasons for having those variables here: -// * to have an access to them from assembly -// TODO(kk): Is there a way to access variable from different package? -// If it is then probably this file could be moved to internal -// and we don't need to have many copies of that -// * make it easy to vendor the library -// * make it possible to test all functionalities -var useMULX bool -var useADXMULX bool - -func recognizecpu() { - useMULX = cpu.HasBMI2 - useADXMULX = cpu.HasADX && cpu.HasBMI2 -} - -func init() { - recognizecpu() -} diff --git a/p503/arith_amd64.s b/p503/arith_amd64.s index d0b4698..302db3a 100644 --- a/p503/arith_amd64.s +++ b/p503/arith_amd64.s @@ -697,9 +697,9 @@ TEXT ·fp503Mul(SB), NOSPLIT, $104-24 MOVQ y+16(FP), REG_P2 // Check wether to use optimized implementation - CMPB ·useADXMULX(SB), $1 + CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1 JE mul_with_mulx_adx - CMPB ·useMULX(SB), $1 + CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1 JE mul_with_mulx // Generic x86 implementation (below) uses variant of Karatsuba method. @@ -1194,9 +1194,9 @@ TEXT ·fp503MontgomeryReduce(SB), $0-16 MOVQ x+8(FP), REG_P1 // Check wether to use optimized implementation - CMPB ·useADXMULX(SB), $1 + CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1 JE redc_with_mulx_adx - CMPB ·useMULX(SB), $1 + CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1 JE redc_with_mulx MOVQ (REG_P1), R11 diff --git a/p503/arith_amd64_test.go b/p503/arith_amd64_test.go index 6adb6a9..8ad6bb9 100644 --- a/p503/arith_amd64_test.go +++ b/p503/arith_amd64_test.go @@ -13,26 +13,26 @@ import ( type OptimFlag uint const ( - kUse_MUL OptimFlag = 1 << 0 - kUse_MULX = 1 << 1 - kUse_MULXADX = 1 << 2 + kUse_MUL OptimFlag = 1 << 0 + kUse_MULX = 1 << 1 + kUse_BMI2andADX = 1 << 2 ) // Utility function used for testing Mul implementations. Tests caller provided // mulFunc against mul() func testMul(t *testing.T, f1, f2 OptimFlag) { doMulTest := func(multiplier, multiplicant FpElement) bool { - defer recognizecpu() + defer cpu.RecognizeCpu() var resMulRef, resMulOptim FpElementX2 // Compute multiplier*multiplicant with first implementation - useMULX = (kUse_MULX & f1) == kUse_MULX - useADXMULX = (kUse_MULXADX & f1) == kUse_MULXADX + cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX + cpu.HasADXandBMI2 = (kUse_BMI2andADX & f1) == kUse_BMI2andADX fp503Mul(&resMulOptim, &multiplier, &multiplicant) // Compute multiplier*multiplicant with second implementation - useMULX = (kUse_MULX & f2) == kUse_MULX - useADXMULX = (kUse_MULXADX & f2) == kUse_MULXADX + cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX + cpu.HasADXandBMI2 = (kUse_BMI2andADX & f2) == kUse_BMI2andADX fp503Mul(&resMulRef, &multiplier, &multiplicant) // Compare results @@ -48,18 +48,18 @@ func testMul(t *testing.T, f1, f2 OptimFlag) { // redcFunc against redc() func testRedc(t *testing.T, f1, f2 OptimFlag) { doRedcTest := func(aRR FpElementX2) bool { - defer recognizecpu() + defer cpu.RecognizeCpu() var resRedcF1, resRedcF2 FpElement var aRRcpy = aRR // Compute redc with first implementation - useMULX = (kUse_MULX & f1) == kUse_MULX - useADXMULX = (kUse_MULXADX & f1) == kUse_MULXADX + cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX + cpu.HasADXandBMI2 = (kUse_BMI2andADX & f1) == kUse_BMI2andADX fp503MontgomeryReduce(&resRedcF1, &aRR) // Compute redc with second implementation - useMULX = (kUse_MULX & f2) == kUse_MULX - useADXMULX = (kUse_MULXADX & f2) == kUse_MULXADX + cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX + cpu.HasADXandBMI2 = (kUse_BMI2andADX & f2) == kUse_BMI2andADX fp503MontgomeryReduce(&resRedcF2, &aRRcpy) // Compare results @@ -73,7 +73,7 @@ func testRedc(t *testing.T, f1, f2 OptimFlag) { // Ensures corretness of implementation of mul operation which uses MULX func TestMulWithMULX(t *testing.T) { - defer recognizecpu() + defer cpu.RecognizeCpu() if !cpu.HasBMI2 { t.Skip("MULX not supported by the platform") } @@ -82,25 +82,25 @@ func TestMulWithMULX(t *testing.T) { // Ensures corretness of implementation of mul operation which uses MULX and ADOX/ADCX func TestMulWithMULXADX(t *testing.T) { - defer recognizecpu() - if !(cpu.HasADX && cpu.HasBMI2) { + defer cpu.RecognizeCpu() + if !cpu.HasADXandBMI2 { t.Skip("MULX, ADCX and ADOX not supported by the platform") } - testMul(t, kUse_MULXADX, kUse_MUL) + testMul(t, kUse_BMI2andADX, kUse_MUL) } // Ensures corretness of implementation of mul operation which uses MULX and ADOX/ADCX func TestMulWithMULXADXAgainstMULX(t *testing.T) { - defer recognizecpu() - if !(cpu.HasADX && cpu.HasBMI2) { + defer cpu.RecognizeCpu() + if !cpu.HasADXandBMI2 { t.Skip("MULX, ADCX and ADOX not supported by the platform") } - testMul(t, kUse_MULX, kUse_MULXADX) + testMul(t, kUse_MULX, kUse_BMI2andADX) } // Ensures corretness of Montgomery reduction implementation which uses MULX func TestRedcWithMULX(t *testing.T) { - defer recognizecpu() + defer cpu.RecognizeCpu() if !cpu.HasBMI2 { t.Skip("MULX not supported by the platform") } @@ -110,19 +110,19 @@ func TestRedcWithMULX(t *testing.T) { // Ensures corretness of Montgomery reduction implementation which uses MULX // and ADX func TestRedcWithMULXADX(t *testing.T) { - defer recognizecpu() - if !(cpu.HasADX && cpu.HasBMI2) { + defer cpu.RecognizeCpu() + if !cpu.HasADXandBMI2 { t.Skip("MULX, ADCX and ADOX not supported by the platform") } - testRedc(t, kUse_MULXADX, kUse_MUL) + testRedc(t, kUse_BMI2andADX, kUse_MUL) } // Ensures corretness of Montgomery reduction implementation which uses MULX // and ADX. func TestRedcWithMULXADXAgainstMULX(t *testing.T) { - defer recognizecpu() - if !(cpu.HasADX && cpu.HasBMI2) { + defer cpu.RecognizeCpu() + if !cpu.HasADXandBMI2 { t.Skip("MULX, ADCX and ADOX not supported by the platform") } - testRedc(t, kUse_MULXADX, kUse_MULX) + testRedc(t, kUse_BMI2andADX, kUse_MULX) } diff --git a/p503/arith_decl.go b/p503/arith_decl.go index 5bc6884..299ba4c 100644 --- a/p503/arith_decl.go +++ b/p503/arith_decl.go @@ -4,6 +4,9 @@ package p503 import ( . "github.com/cloudflare/sidh/internal/isogeny" + // This is imported only because arith_amd64.s needs + // some symbols from cpuid.go + _ "github.com/cloudflare/sidh/internal/utils" ) // If choice = 0, leave x,y unchanged. If choice = 1, set x,y = y,x. diff --git a/p751/arith_amd64.go b/p751/arith_amd64.go deleted file mode 100644 index 49d336a..0000000 --- a/p751/arith_amd64.go +++ /dev/null @@ -1,24 +0,0 @@ -// +build amd64,!noasm - -package p751 - -import cpu "github.com/cloudflare/sidh/internal/utils" - -// There couple of reasons for having those variables here: -// * to have an access to them from assembly -// TODO(kk): Is there a way to access variable from different package? -// If it is then probably this file could be moved to internal -// and we don't need to have many copies of that -// * make it easy to vendor the library -// * make it possible to test all functionalities -var useMULX bool -var useADXMULX bool - -func recognizecpu() { - useMULX = cpu.HasBMI2 - useADXMULX = cpu.HasADX && cpu.HasBMI2 -} - -func init() { - recognizecpu() -} diff --git a/p751/arith_amd64.s b/p751/arith_amd64.s index 65e7500..0a01962 100644 --- a/p751/arith_amd64.s +++ b/p751/arith_amd64.s @@ -1740,9 +1740,9 @@ TEXT ·fp751MontgomeryReduce(SB), $0-16 MOVQ x+8(FP), REG_P1 // Check wether to use optimized implementation - CMPB ·useADXMULX(SB), $1 + CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1 JE redc_with_mulx_adx - CMPB ·useMULX(SB), $1 + CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1 JE redc_with_mulx MOVQ (REG_P1), R11 diff --git a/p751/arith_amd64_test.go b/p751/arith_amd64_test.go index c444176..3dc5a67 100644 --- a/p751/arith_amd64_test.go +++ b/p751/arith_amd64_test.go @@ -14,27 +14,27 @@ import ( type OptimFlag uint const ( - kUse_MUL OptimFlag = 1 << 0 - kUse_MULX = 1 << 1 - kUse_MULXADX = 1 << 2 + kUse_MUL OptimFlag = 1 << 0 + kUse_MULX = 1 << 1 + kUse_ADXandBMI2 = 1 << 2 ) // Utility function used for testing REDC implementations. Tests caller provided // redcFunc against redc() func testRedc(t *testing.T, f1, f2 OptimFlag) { doRedcTest := func(aRR FpElementX2) bool { - defer recognizecpu() + defer cpu.RecognizeCpu() var resRedcF1, resRedcF2 FpElement var aRRcpy = aRR // Compute redc with first implementation - useMULX = (kUse_MULX & f1) == kUse_MULX - useADXMULX = (kUse_MULXADX & f1) == kUse_MULXADX + cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX + cpu.HasADXandBMI2 = (kUse_ADXandBMI2 & f1) == kUse_ADXandBMI2 fp751MontgomeryReduce(&resRedcF1, &aRR) // Compute redc with second implementation - useMULX = (kUse_MULX & f2) == kUse_MULX - useADXMULX = (kUse_MULXADX & f2) == kUse_MULXADX + cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX + cpu.HasADXandBMI2 = (kUse_ADXandBMI2 & f2) == kUse_ADXandBMI2 fp751MontgomeryReduce(&resRedcF2, &aRRcpy) // Compare results @@ -48,7 +48,7 @@ func testRedc(t *testing.T, f1, f2 OptimFlag) { // Ensures corretness of Montgomery reduction implementation which uses MULX func TestRedcWithMULX(t *testing.T) { - defer recognizecpu() + defer cpu.RecognizeCpu() if !cpu.HasBMI2 { t.Skip("MULX not supported by the platform") } @@ -58,19 +58,19 @@ func TestRedcWithMULX(t *testing.T) { // Ensures corretness of Montgomery reduction implementation which uses MULX // and ADX func TestRedcWithMULXADX(t *testing.T) { - defer recognizecpu() - if !(cpu.HasADX && cpu.HasBMI2) { + defer cpu.RecognizeCpu() + if !cpu.HasADXandBMI2 { t.Skip("MULX, ADCX and ADOX not supported by the platform") } - testRedc(t, kUse_MULXADX, kUse_MUL) + testRedc(t, kUse_ADXandBMI2, kUse_MUL) } // Ensures corretness of Montgomery reduction implementation which uses MULX // and ADX. func TestRedcWithMULXADXAgainstMULX(t *testing.T) { - defer recognizecpu() - if !(cpu.HasADX && cpu.HasBMI2) { + defer cpu.RecognizeCpu() + if !cpu.HasADXandBMI2 { t.Skip("MULX, ADCX and ADOX not supported by the platform") } - testRedc(t, kUse_MULXADX, kUse_MULX) + testRedc(t, kUse_ADXandBMI2, kUse_MULX) } diff --git a/p751/arith_decl.go b/p751/arith_decl.go index 61a0659..f81f366 100644 --- a/p751/arith_decl.go +++ b/p751/arith_decl.go @@ -4,6 +4,9 @@ package p751 import ( . "github.com/cloudflare/sidh/internal/isogeny" + // This is imported only because arith_amd64.s needs + // some symbols from cpuid.go + _ "github.com/cloudflare/sidh/internal/utils" ) // If choice = 0, leave x,y unchanged. If choice = 1, set x,y = y,x.