Przeglądaj źródła

removes need for useADX and useBMI2 flags

master
Henry Case 6 lat temu
committed by Kris Kwiatkowski
rodzic
commit
80d9660e4d
9 zmienionych plików z 62 dodań i 100 usunięć
  1. +8
    -4
      internal/utils/cpuid.go
  2. +0
    -24
      p503/arith_amd64.go
  3. +4
    -4
      p503/arith_amd64.s
  4. +27
    -27
      p503/arith_amd64_test.go
  5. +3
    -0
      p503/arith_decl.go
  6. +0
    -24
      p751/arith_amd64.go
  7. +2
    -2
      p751/arith_amd64.s
  8. +15
    -15
      p751/arith_amd64_test.go
  9. +3
    -0
      p751/arith_decl.go

+ 8
- 4
internal/utils/cpuid.go Wyświetl plik

@@ -10,8 +10,8 @@ package utils
// Signals support for MULX which is in BMI2
var HasBMI2 bool

// Signals support for MULX and BMI2
var HasADX bool
// Signals support for ADX and BMI2
var HasADXandBMI2 bool

// Performs CPUID and returns values of registers
// go:nosplit
@@ -22,7 +22,7 @@ func bitn(bits uint32, n uint8) bool {
return (bits>>n)&1 == 1
}

func init() {
func RecognizeCpu() {
// CPUID returns max possible input that can be requested
max, _, _, _ := cpuid(0, 0)
if max < 7 {
@@ -31,5 +31,9 @@ func init() {

_, ebx, _, _ := cpuid(7, 0)
HasBMI2 = bitn(ebx, 19)
HasADX = bitn(ebx, 7)
HasADXandBMI2 = bitn(ebx, 7) && HasBMI2
}

func init() {
RecognizeCpu()
}

+ 0
- 24
p503/arith_amd64.go Wyświetl plik

@@ -1,24 +0,0 @@
// +build amd64,!noasm

package p503

import cpu "github.com/cloudflare/sidh/internal/utils"

// There couple of reasons for having those variables here:
// * to have an access to them from assembly
// TODO(kk): Is there a way to access variable from different package?
// If it is then probably this file could be moved to internal
// and we don't need to have many copies of that
// * make it easy to vendor the library
// * make it possible to test all functionalities
var useMULX bool
var useADXMULX bool

func recognizecpu() {
useMULX = cpu.HasBMI2
useADXMULX = cpu.HasADX && cpu.HasBMI2
}

func init() {
recognizecpu()
}

+ 4
- 4
p503/arith_amd64.s Wyświetl plik

@@ -697,9 +697,9 @@ TEXT ·fp503Mul(SB), NOSPLIT, $104-24
MOVQ y+16(FP), REG_P2

// Check wether to use optimized implementation
CMPB ·useADXMULX(SB), $1
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1
JE mul_with_mulx_adx
CMPB ·useMULX(SB), $1
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1
JE mul_with_mulx

// Generic x86 implementation (below) uses variant of Karatsuba method.
@@ -1194,9 +1194,9 @@ TEXT ·fp503MontgomeryReduce(SB), $0-16
MOVQ x+8(FP), REG_P1

// Check wether to use optimized implementation
CMPB ·useADXMULX(SB), $1
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1
JE redc_with_mulx_adx
CMPB ·useMULX(SB), $1
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1
JE redc_with_mulx

MOVQ (REG_P1), R11


+ 27
- 27
p503/arith_amd64_test.go Wyświetl plik

@@ -13,26 +13,26 @@ import (
type OptimFlag uint

const (
kUse_MUL OptimFlag = 1 << 0
kUse_MULX = 1 << 1
kUse_MULXADX = 1 << 2
kUse_MUL OptimFlag = 1 << 0
kUse_MULX = 1 << 1
kUse_BMI2andADX = 1 << 2
)

// Utility function used for testing Mul implementations. Tests caller provided
// mulFunc against mul()
func testMul(t *testing.T, f1, f2 OptimFlag) {
doMulTest := func(multiplier, multiplicant FpElement) bool {
defer recognizecpu()
defer cpu.RecognizeCpu()
var resMulRef, resMulOptim FpElementX2

// Compute multiplier*multiplicant with first implementation
useMULX = (kUse_MULX & f1) == kUse_MULX
useADXMULX = (kUse_MULXADX & f1) == kUse_MULXADX
cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX
cpu.HasADXandBMI2 = (kUse_BMI2andADX & f1) == kUse_BMI2andADX
fp503Mul(&resMulOptim, &multiplier, &multiplicant)

// Compute multiplier*multiplicant with second implementation
useMULX = (kUse_MULX & f2) == kUse_MULX
useADXMULX = (kUse_MULXADX & f2) == kUse_MULXADX
cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX
cpu.HasADXandBMI2 = (kUse_BMI2andADX & f2) == kUse_BMI2andADX
fp503Mul(&resMulRef, &multiplier, &multiplicant)

// Compare results
@@ -48,18 +48,18 @@ func testMul(t *testing.T, f1, f2 OptimFlag) {
// redcFunc against redc()
func testRedc(t *testing.T, f1, f2 OptimFlag) {
doRedcTest := func(aRR FpElementX2) bool {
defer recognizecpu()
defer cpu.RecognizeCpu()
var resRedcF1, resRedcF2 FpElement
var aRRcpy = aRR

// Compute redc with first implementation
useMULX = (kUse_MULX & f1) == kUse_MULX
useADXMULX = (kUse_MULXADX & f1) == kUse_MULXADX
cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX
cpu.HasADXandBMI2 = (kUse_BMI2andADX & f1) == kUse_BMI2andADX
fp503MontgomeryReduce(&resRedcF1, &aRR)

// Compute redc with second implementation
useMULX = (kUse_MULX & f2) == kUse_MULX
useADXMULX = (kUse_MULXADX & f2) == kUse_MULXADX
cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX
cpu.HasADXandBMI2 = (kUse_BMI2andADX & f2) == kUse_BMI2andADX
fp503MontgomeryReduce(&resRedcF2, &aRRcpy)

// Compare results
@@ -73,7 +73,7 @@ func testRedc(t *testing.T, f1, f2 OptimFlag) {

// Ensures corretness of implementation of mul operation which uses MULX
func TestMulWithMULX(t *testing.T) {
defer recognizecpu()
defer cpu.RecognizeCpu()
if !cpu.HasBMI2 {
t.Skip("MULX not supported by the platform")
}
@@ -82,25 +82,25 @@ func TestMulWithMULX(t *testing.T) {

// Ensures corretness of implementation of mul operation which uses MULX and ADOX/ADCX
func TestMulWithMULXADX(t *testing.T) {
defer recognizecpu()
if !(cpu.HasADX && cpu.HasBMI2) {
defer cpu.RecognizeCpu()
if !cpu.HasADXandBMI2 {
t.Skip("MULX, ADCX and ADOX not supported by the platform")
}
testMul(t, kUse_MULXADX, kUse_MUL)
testMul(t, kUse_BMI2andADX, kUse_MUL)
}

// Ensures corretness of implementation of mul operation which uses MULX and ADOX/ADCX
func TestMulWithMULXADXAgainstMULX(t *testing.T) {
defer recognizecpu()
if !(cpu.HasADX && cpu.HasBMI2) {
defer cpu.RecognizeCpu()
if !cpu.HasADXandBMI2 {
t.Skip("MULX, ADCX and ADOX not supported by the platform")
}
testMul(t, kUse_MULX, kUse_MULXADX)
testMul(t, kUse_MULX, kUse_BMI2andADX)
}

// Ensures corretness of Montgomery reduction implementation which uses MULX
func TestRedcWithMULX(t *testing.T) {
defer recognizecpu()
defer cpu.RecognizeCpu()
if !cpu.HasBMI2 {
t.Skip("MULX not supported by the platform")
}
@@ -110,19 +110,19 @@ func TestRedcWithMULX(t *testing.T) {
// Ensures corretness of Montgomery reduction implementation which uses MULX
// and ADX
func TestRedcWithMULXADX(t *testing.T) {
defer recognizecpu()
if !(cpu.HasADX && cpu.HasBMI2) {
defer cpu.RecognizeCpu()
if !cpu.HasADXandBMI2 {
t.Skip("MULX, ADCX and ADOX not supported by the platform")
}
testRedc(t, kUse_MULXADX, kUse_MUL)
testRedc(t, kUse_BMI2andADX, kUse_MUL)
}

// Ensures corretness of Montgomery reduction implementation which uses MULX
// and ADX.
func TestRedcWithMULXADXAgainstMULX(t *testing.T) {
defer recognizecpu()
if !(cpu.HasADX && cpu.HasBMI2) {
defer cpu.RecognizeCpu()
if !cpu.HasADXandBMI2 {
t.Skip("MULX, ADCX and ADOX not supported by the platform")
}
testRedc(t, kUse_MULXADX, kUse_MULX)
testRedc(t, kUse_BMI2andADX, kUse_MULX)
}

+ 3
- 0
p503/arith_decl.go Wyświetl plik

@@ -4,6 +4,9 @@ package p503

import (
. "github.com/cloudflare/sidh/internal/isogeny"
// This is imported only because arith_amd64.s needs
// some symbols from cpuid.go
_ "github.com/cloudflare/sidh/internal/utils"
)

// If choice = 0, leave x,y unchanged. If choice = 1, set x,y = y,x.


+ 0
- 24
p751/arith_amd64.go Wyświetl plik

@@ -1,24 +0,0 @@
// +build amd64,!noasm

package p751

import cpu "github.com/cloudflare/sidh/internal/utils"

// There couple of reasons for having those variables here:
// * to have an access to them from assembly
// TODO(kk): Is there a way to access variable from different package?
// If it is then probably this file could be moved to internal
// and we don't need to have many copies of that
// * make it easy to vendor the library
// * make it possible to test all functionalities
var useMULX bool
var useADXMULX bool

func recognizecpu() {
useMULX = cpu.HasBMI2
useADXMULX = cpu.HasADX && cpu.HasBMI2
}

func init() {
recognizecpu()
}

+ 2
- 2
p751/arith_amd64.s Wyświetl plik

@@ -1740,9 +1740,9 @@ TEXT ·fp751MontgomeryReduce(SB), $0-16
MOVQ x+8(FP), REG_P1

// Check wether to use optimized implementation
CMPB ·useADXMULX(SB), $1
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1
JE redc_with_mulx_adx
CMPB ·useMULX(SB), $1
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1
JE redc_with_mulx

MOVQ (REG_P1), R11


+ 15
- 15
p751/arith_amd64_test.go Wyświetl plik

@@ -14,27 +14,27 @@ import (
type OptimFlag uint

const (
kUse_MUL OptimFlag = 1 << 0
kUse_MULX = 1 << 1
kUse_MULXADX = 1 << 2
kUse_MUL OptimFlag = 1 << 0
kUse_MULX = 1 << 1
kUse_ADXandBMI2 = 1 << 2
)

// Utility function used for testing REDC implementations. Tests caller provided
// redcFunc against redc()
func testRedc(t *testing.T, f1, f2 OptimFlag) {
doRedcTest := func(aRR FpElementX2) bool {
defer recognizecpu()
defer cpu.RecognizeCpu()
var resRedcF1, resRedcF2 FpElement
var aRRcpy = aRR

// Compute redc with first implementation
useMULX = (kUse_MULX & f1) == kUse_MULX
useADXMULX = (kUse_MULXADX & f1) == kUse_MULXADX
cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX
cpu.HasADXandBMI2 = (kUse_ADXandBMI2 & f1) == kUse_ADXandBMI2
fp751MontgomeryReduce(&resRedcF1, &aRR)

// Compute redc with second implementation
useMULX = (kUse_MULX & f2) == kUse_MULX
useADXMULX = (kUse_MULXADX & f2) == kUse_MULXADX
cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX
cpu.HasADXandBMI2 = (kUse_ADXandBMI2 & f2) == kUse_ADXandBMI2
fp751MontgomeryReduce(&resRedcF2, &aRRcpy)

// Compare results
@@ -48,7 +48,7 @@ func testRedc(t *testing.T, f1, f2 OptimFlag) {

// Ensures corretness of Montgomery reduction implementation which uses MULX
func TestRedcWithMULX(t *testing.T) {
defer recognizecpu()
defer cpu.RecognizeCpu()
if !cpu.HasBMI2 {
t.Skip("MULX not supported by the platform")
}
@@ -58,19 +58,19 @@ func TestRedcWithMULX(t *testing.T) {
// Ensures corretness of Montgomery reduction implementation which uses MULX
// and ADX
func TestRedcWithMULXADX(t *testing.T) {
defer recognizecpu()
if !(cpu.HasADX && cpu.HasBMI2) {
defer cpu.RecognizeCpu()
if !cpu.HasADXandBMI2 {
t.Skip("MULX, ADCX and ADOX not supported by the platform")
}
testRedc(t, kUse_MULXADX, kUse_MUL)
testRedc(t, kUse_ADXandBMI2, kUse_MUL)
}

// Ensures corretness of Montgomery reduction implementation which uses MULX
// and ADX.
func TestRedcWithMULXADXAgainstMULX(t *testing.T) {
defer recognizecpu()
if !(cpu.HasADX && cpu.HasBMI2) {
defer cpu.RecognizeCpu()
if !cpu.HasADXandBMI2 {
t.Skip("MULX, ADCX and ADOX not supported by the platform")
}
testRedc(t, kUse_MULXADX, kUse_MULX)
testRedc(t, kUse_ADXandBMI2, kUse_MULX)
}

+ 3
- 0
p751/arith_decl.go Wyświetl plik

@@ -4,6 +4,9 @@ package p751

import (
. "github.com/cloudflare/sidh/internal/isogeny"
// This is imported only because arith_amd64.s needs
// some symbols from cpuid.go
_ "github.com/cloudflare/sidh/internal/utils"
)

// If choice = 0, leave x,y unchanged. If choice = 1, set x,y = y,x.


Ładowanie…
Anuluj
Zapisz