Henry Case 6年前
コミット
a6e152eaec
10個のファイルの変更86行の追加57行の削除
  1. +2
    -4
      Makefile
  2. +3
    -3
      internal/utils/cpuid.go
  3. +0
    -8
      internal/utils/utils.go
  4. +16
    -0
      p503/arith_amd64.go
  5. +4
    -4
      p503/arith_amd64.s
  6. +27
    -22
      p503/arith_amd64_test.go
  7. +0
    -3
      p503/arith_decl.go
  8. +16
    -0
      p751/arith_amd64.go
  9. +2
    -2
      p751/arith_amd64.s
  10. +16
    -11
      p751/arith_amd64_test.go

+ 2
- 4
Makefile ファイルの表示

@@ -43,11 +43,11 @@ copy-target-%:
prep_targets: build_env $(addprefix copy-target-, $(TARGETS)) prep_targets: build_env $(addprefix copy-target-, $(TARGETS))


install-%: prep_targets install-%: prep_targets
GOPATH=$(GOPATH_LOCAL) $(GO) install $(OPTS) $(GOPATH_DIR)/$*
GOPATH=$(GOPATH_LOCAL) GOARCH=$(GOARCH) $(GO) install $(OPTS) $(GOPATH_DIR)/$*


test-%: prep_targets test-%: prep_targets
GOPATH=$(GOPATH_LOCAL) $(GO) vet $(GOPATH_DIR)/$* GOPATH=$(GOPATH_LOCAL) $(GO) vet $(GOPATH_DIR)/$*
GOPATH=$(GOPATH_LOCAL) $(GO) test $(OPTS) $(GOPATH_DIR)/$*
GOPATH=$(GOPATH_LOCAL) GOARCH=$(GOARCH) $(GO) test $(OPTS) $(GOPATH_DIR)/$*


bench-%: prep_targets bench-%: prep_targets
GOMAXPROCS=1 GOPATH=$(GOPATH_LOCAL) $(GO) test $(OPTS) $(GOPATH_DIR)/$* $(BENCH_OPTS) GOMAXPROCS=1 GOPATH=$(GOPATH_LOCAL) $(GO) test $(OPTS) $(GOPATH_DIR)/$* $(BENCH_OPTS)
@@ -70,8 +70,6 @@ vendor: clean
# This swaps all imports with github.com to github_com, so that standard library doesn't # This swaps all imports with github.com to github_com, so that standard library doesn't
# try to access external libraries. # try to access external libraries.
find $(VENDOR_DIR) -type f -iname "*.go" -print0 | xargs -0 sed -i 's/github\.com/github_com/g' find $(VENDOR_DIR) -type f -iname "*.go" -print0 | xargs -0 sed -i 's/github\.com/github_com/g'
# Similar as above, but specific to assembly files. When referencing variable from assembly code
find $(VENDOR_DIR) -type f -iname "*.s" -print0 | xargs -0 sed -i 's/github·com/vendor∕github_com/g'


bench: $(addprefix bench-, $(TARGETS)) bench: $(addprefix bench-, $(TARGETS))
cover: $(addprefix cover-, $(TARGETS)) cover: $(addprefix cover-, $(TARGETS))


+ 3
- 3
internal/utils/cpuid.go ファイルの表示

@@ -10,8 +10,8 @@ package utils
// Signals support for MULX which is in BMI2 // Signals support for MULX which is in BMI2
var HasBMI2 bool var HasBMI2 bool


// Signals support for ADX and BMI2
var HasADXandBMI2 bool
// Signals support for ADX
var HasADX bool


// Performs CPUID and returns values of registers // Performs CPUID and returns values of registers
// go:nosplit // go:nosplit
@@ -31,7 +31,7 @@ func RecognizeCpu() {


_, ebx, _, _ := cpuid(7, 0) _, ebx, _, _ := cpuid(7, 0)
HasBMI2 = bitn(ebx, 19) HasBMI2 = bitn(ebx, 19)
HasADXandBMI2 = bitn(ebx, 7) && HasBMI2
HasADX = bitn(ebx, 7)
} }


func init() { func init() {


+ 0
- 8
internal/utils/utils.go ファイルの表示

@@ -1,8 +0,0 @@
// +build !noasm

// This file is here because the Go language specification requires that a
// package has one or more source files. Now there is always a valid package.
// Otherwise all source files would be excluded by the build constraints for
// the arm64 build.
// https://golang.org/ref/spec#Packages
package utils

+ 16
- 0
p503/arith_amd64.go ファイルの表示

@@ -0,0 +1,16 @@
// +build amd64

package p503

import cpu "github.com/cloudflare/sidh/internal/utils"

// Signals support for MULX which is in BMI2
var HasBMI2 bool

// Signals support for ADX and BMI2
var HasADXandBMI2 bool

func init() {
HasBMI2 = cpu.HasBMI2
HasADXandBMI2 = cpu.HasBMI2 && cpu.HasADX
}

+ 4
- 4
p503/arith_amd64.s ファイルの表示

@@ -697,9 +697,9 @@ TEXT ·fp503Mul(SB), NOSPLIT, $104-24
MOVQ y+16(FP), REG_P2 MOVQ y+16(FP), REG_P2


// Check wether to use optimized implementation // Check wether to use optimized implementation
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1
CMPB ·HasADXandBMI2(SB), $1
JE mul_with_mulx_adcx_adox JE mul_with_mulx_adcx_adox
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1
CMPB ·HasBMI2(SB), $1
JE mul_with_mulx JE mul_with_mulx


// Generic x86 implementation (below) uses variant of Karatsuba method. // Generic x86 implementation (below) uses variant of Karatsuba method.
@@ -1194,9 +1194,9 @@ TEXT ·fp503MontgomeryReduce(SB), $0-16
MOVQ x+8(FP), REG_P1 MOVQ x+8(FP), REG_P1


// Check wether to use optimized implementation // Check wether to use optimized implementation
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1
CMPB ·HasADXandBMI2(SB), $1
JE redc_with_mulx_adcx_adox JE redc_with_mulx_adcx_adox
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1
CMPB ·HasBMI2(SB), $1
JE redc_with_mulx JE redc_with_mulx


MOVQ (REG_P1), R11 MOVQ (REG_P1), R11


+ 27
- 22
p503/arith_amd64_test.go ファイルの表示

@@ -21,21 +21,26 @@ const (
kUse_MULXandADxX = 1 << 2 kUse_MULXandADxX = 1 << 2
) )


func resetCpuFeatures() {
HasBMI2 = cpu.HasBMI2
HasADXandBMI2 = cpu.HasBMI2 && cpu.HasADX
}

// Utility function used for testing Mul implementations. Tests caller provided // Utility function used for testing Mul implementations. Tests caller provided
// mulFunc against mul() // mulFunc against mul()
func testMul(t *testing.T, f1, f2 OptimFlag) { func testMul(t *testing.T, f1, f2 OptimFlag) {
doMulTest := func(multiplier, multiplicant FpElement) bool { doMulTest := func(multiplier, multiplicant FpElement) bool {
defer cpu.RecognizeCpu()
defer resetCpuFeatures()
var resMulRef, resMulOptim FpElementX2 var resMulRef, resMulOptim FpElementX2


// Compute multiplier*multiplicant with first implementation // Compute multiplier*multiplicant with first implementation
cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX
cpu.HasADXandBMI2 = (kUse_MULXandADxX & f1) == kUse_MULXandADxX
HasBMI2 = (kUse_MULX & f1) == kUse_MULX
HasADXandBMI2 = (kUse_MULXandADxX & f1) == kUse_MULXandADxX
fp503Mul(&resMulOptim, &multiplier, &multiplicant) fp503Mul(&resMulOptim, &multiplier, &multiplicant)


// Compute multiplier*multiplicant with second implementation // Compute multiplier*multiplicant with second implementation
cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX
cpu.HasADXandBMI2 = (kUse_MULXandADxX & f2) == kUse_MULXandADxX
HasBMI2 = (kUse_MULX & f2) == kUse_MULX
HasADXandBMI2 = (kUse_MULXandADxX & f2) == kUse_MULXandADxX
fp503Mul(&resMulRef, &multiplier, &multiplicant) fp503Mul(&resMulRef, &multiplier, &multiplicant)


// Compare results // Compare results
@@ -51,18 +56,18 @@ func testMul(t *testing.T, f1, f2 OptimFlag) {
// redcFunc against redc() // redcFunc against redc()
func testRedc(t *testing.T, f1, f2 OptimFlag) { func testRedc(t *testing.T, f1, f2 OptimFlag) {
doRedcTest := func(aRR FpElementX2) bool { doRedcTest := func(aRR FpElementX2) bool {
defer cpu.RecognizeCpu()
defer resetCpuFeatures()
var resRedcF1, resRedcF2 FpElement var resRedcF1, resRedcF2 FpElement
var aRRcpy = aRR var aRRcpy = aRR


// Compute redc with first implementation // Compute redc with first implementation
cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX
cpu.HasADXandBMI2 = (kUse_MULXandADxX & f1) == kUse_MULXandADxX
HasBMI2 = (kUse_MULX & f1) == kUse_MULX
HasADXandBMI2 = (kUse_MULXandADxX & f1) == kUse_MULXandADxX
fp503MontgomeryReduce(&resRedcF1, &aRR) fp503MontgomeryReduce(&resRedcF1, &aRR)


// Compute redc with second implementation // Compute redc with second implementation
cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX
cpu.HasADXandBMI2 = (kUse_MULXandADxX & f2) == kUse_MULXandADxX
HasBMI2 = (kUse_MULX & f2) == kUse_MULX
HasADXandBMI2 = (kUse_MULXandADxX & f2) == kUse_MULXandADxX
fp503MontgomeryReduce(&resRedcF2, &aRRcpy) fp503MontgomeryReduce(&resRedcF2, &aRRcpy)


// Compare results // Compare results
@@ -76,8 +81,8 @@ func testRedc(t *testing.T, f1, f2 OptimFlag) {


// Ensures correctness of implementation of mul operation which uses MULX // Ensures correctness of implementation of mul operation which uses MULX
func TestMulWithMULX(t *testing.T) { func TestMulWithMULX(t *testing.T) {
defer cpu.RecognizeCpu()
if !cpu.HasBMI2 {
defer resetCpuFeatures()
if !HasBMI2 {
t.Skip("MULX not supported by the platform") t.Skip("MULX not supported by the platform")
} }
testMul(t, kUse_MULX, kUse_MUL) testMul(t, kUse_MULX, kUse_MUL)
@@ -85,8 +90,8 @@ func TestMulWithMULX(t *testing.T) {


// Ensures correctness of implementation of mul operation which uses MULX and ADOX/ADCX // Ensures correctness of implementation of mul operation which uses MULX and ADOX/ADCX
func TestMulWithMULXADxX(t *testing.T) { func TestMulWithMULXADxX(t *testing.T) {
defer cpu.RecognizeCpu()
if !cpu.HasADXandBMI2 {
defer resetCpuFeatures()
if !HasADXandBMI2 {
t.Skip("MULX, ADCX and ADOX not supported by the platform") t.Skip("MULX, ADCX and ADOX not supported by the platform")
} }
testMul(t, kUse_MULXandADxX, kUse_MUL) testMul(t, kUse_MULXandADxX, kUse_MUL)
@@ -94,8 +99,8 @@ func TestMulWithMULXADxX(t *testing.T) {


// Ensures correctness of implementation of mul operation which uses MULX and ADOX/ADCX // Ensures correctness of implementation of mul operation which uses MULX and ADOX/ADCX
func TestMulWithMULXADxXAgainstMULX(t *testing.T) { func TestMulWithMULXADxXAgainstMULX(t *testing.T) {
defer cpu.RecognizeCpu()
if !cpu.HasADXandBMI2 {
defer resetCpuFeatures()
if !HasADXandBMI2 {
t.Skip("MULX, ADCX and ADOX not supported by the platform") t.Skip("MULX, ADCX and ADOX not supported by the platform")
} }
testMul(t, kUse_MULX, kUse_MULXandADxX) testMul(t, kUse_MULX, kUse_MULXandADxX)
@@ -103,8 +108,8 @@ func TestMulWithMULXADxXAgainstMULX(t *testing.T) {


// Ensures correctness of Montgomery reduction implementation which uses MULX // Ensures correctness of Montgomery reduction implementation which uses MULX
func TestRedcWithMULX(t *testing.T) { func TestRedcWithMULX(t *testing.T) {
defer cpu.RecognizeCpu()
if !cpu.HasBMI2 {
defer resetCpuFeatures()
if !HasBMI2 {
t.Skip("MULX not supported by the platform") t.Skip("MULX not supported by the platform")
} }
testRedc(t, kUse_MULX, kUse_MUL) testRedc(t, kUse_MULX, kUse_MUL)
@@ -113,8 +118,8 @@ func TestRedcWithMULX(t *testing.T) {
// Ensures correctness of Montgomery reduction implementation which uses MULX // Ensures correctness of Montgomery reduction implementation which uses MULX
// and ADCX/ADOX. // and ADCX/ADOX.
func TestRedcWithMULXADxX(t *testing.T) { func TestRedcWithMULXADxX(t *testing.T) {
defer cpu.RecognizeCpu()
if !cpu.HasADXandBMI2 {
defer resetCpuFeatures()
if !HasADXandBMI2 {
t.Skip("MULX, ADCX and ADOX not supported by the platform") t.Skip("MULX, ADCX and ADOX not supported by the platform")
} }
testRedc(t, kUse_MULXandADxX, kUse_MUL) testRedc(t, kUse_MULXandADxX, kUse_MUL)
@@ -123,8 +128,8 @@ func TestRedcWithMULXADxX(t *testing.T) {
// Ensures correctness of Montgomery reduction implementation which uses MULX // Ensures correctness of Montgomery reduction implementation which uses MULX
// and ADCX/ADOX. // and ADCX/ADOX.
func TestRedcWithMULXADxXAgainstMULX(t *testing.T) { func TestRedcWithMULXADxXAgainstMULX(t *testing.T) {
defer cpu.RecognizeCpu()
if !cpu.HasADXandBMI2 {
defer resetCpuFeatures()
if !HasADXandBMI2 {
t.Skip("MULX, ADCX and ADOX not supported by the platform") t.Skip("MULX, ADCX and ADOX not supported by the platform")
} }
testRedc(t, kUse_MULXandADxX, kUse_MULX) testRedc(t, kUse_MULXandADxX, kUse_MULX)


+ 0
- 3
p503/arith_decl.go ファイルの表示

@@ -4,9 +4,6 @@ package p503


import ( import (
. "github.com/cloudflare/sidh/internal/isogeny" . "github.com/cloudflare/sidh/internal/isogeny"
// This is imported only because arith_amd64.s needs
// some symbols from cpuid.go
_ "github.com/cloudflare/sidh/internal/utils"
) )


// If choice = 0, leave x,y unchanged. If choice = 1, set x,y = y,x. // If choice = 0, leave x,y unchanged. If choice = 1, set x,y = y,x.


+ 16
- 0
p751/arith_amd64.go ファイルの表示

@@ -0,0 +1,16 @@
// +build amd64

package p751

import cpu "github.com/cloudflare/sidh/internal/utils"

// Signals support for MULX which is in BMI2
var HasBMI2 bool

// Signals support for ADX and BMI2
var HasADXandBMI2 bool

func init() {
HasBMI2 = cpu.HasBMI2
HasADXandBMI2 = cpu.HasBMI2 && cpu.HasADX
}

+ 2
- 2
p751/arith_amd64.s ファイルの表示

@@ -1740,9 +1740,9 @@ TEXT ·fp751MontgomeryReduce(SB), $0-16
MOVQ x+8(FP), REG_P1 MOVQ x+8(FP), REG_P1


// Check wether to use optimized implementation // Check wether to use optimized implementation
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1
CMPB ·HasADXandBMI2(SB), $1
JE redc_with_mulx_adcx_adox JE redc_with_mulx_adcx_adox
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1
CMPB ·HasBMI2(SB), $1
JE redc_with_mulx JE redc_with_mulx


MOVQ (REG_P1), R11 MOVQ (REG_P1), R11


+ 16
- 11
p751/arith_amd64_test.go ファイルの表示

@@ -22,22 +22,27 @@ const (
kUse_MULXandADxX = 1 << 2 kUse_MULXandADxX = 1 << 2
) )


func resetCpuFeatures() {
HasBMI2 = cpu.HasBMI2
HasADXandBMI2 = cpu.HasBMI2 && cpu.HasADX
}

// Utility function used for testing REDC implementations. Tests caller provided // Utility function used for testing REDC implementations. Tests caller provided
// redcFunc against redc() // redcFunc against redc()
func testRedc(t *testing.T, f1, f2 OptimFlag) { func testRedc(t *testing.T, f1, f2 OptimFlag) {
doRedcTest := func(aRR FpElementX2) bool { doRedcTest := func(aRR FpElementX2) bool {
defer cpu.RecognizeCpu()
defer resetCpuFeatures()
var resRedcF1, resRedcF2 FpElement var resRedcF1, resRedcF2 FpElement
var aRRcpy = aRR var aRRcpy = aRR


// Compute redc with first implementation // Compute redc with first implementation
cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX
cpu.HasADXandBMI2 = (kUse_MULXandADxX & f1) == kUse_MULXandADxX
HasBMI2 = (kUse_MULX & f1) == kUse_MULX
HasADXandBMI2 = (kUse_MULXandADxX & f1) == kUse_MULXandADxX
fp751MontgomeryReduce(&resRedcF1, &aRR) fp751MontgomeryReduce(&resRedcF1, &aRR)


// Compute redc with second implementation // Compute redc with second implementation
cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX
cpu.HasADXandBMI2 = (kUse_MULXandADxX & f2) == kUse_MULXandADxX
HasBMI2 = (kUse_MULX & f2) == kUse_MULX
HasADXandBMI2 = (kUse_MULXandADxX & f2) == kUse_MULXandADxX
fp751MontgomeryReduce(&resRedcF2, &aRRcpy) fp751MontgomeryReduce(&resRedcF2, &aRRcpy)


// Compare results // Compare results
@@ -51,8 +56,8 @@ func testRedc(t *testing.T, f1, f2 OptimFlag) {


// Ensures correctness of Montgomery reduction implementation which uses MULX // Ensures correctness of Montgomery reduction implementation which uses MULX
func TestRedcWithMULX(t *testing.T) { func TestRedcWithMULX(t *testing.T) {
defer cpu.RecognizeCpu()
if !cpu.HasBMI2 {
defer resetCpuFeatures()
if !HasBMI2 {
t.Skip("MULX not supported by the platform") t.Skip("MULX not supported by the platform")
} }
testRedc(t, kUse_MULX, kUse_MUL) testRedc(t, kUse_MULX, kUse_MUL)
@@ -61,8 +66,8 @@ func TestRedcWithMULX(t *testing.T) {
// Ensures correctness of Montgomery reduction implementation which uses MULX // Ensures correctness of Montgomery reduction implementation which uses MULX
// and ADCX/ADOX. // and ADCX/ADOX.
func TestRedcWithMULXADxX(t *testing.T) { func TestRedcWithMULXADxX(t *testing.T) {
defer cpu.RecognizeCpu()
if !cpu.HasADXandBMI2 {
defer resetCpuFeatures()
if !HasADXandBMI2 {
t.Skip("MULX, ADCX and ADOX not supported by the platform") t.Skip("MULX, ADCX and ADOX not supported by the platform")
} }
testRedc(t, kUse_MULXandADxX, kUse_MUL) testRedc(t, kUse_MULXandADxX, kUse_MUL)
@@ -71,8 +76,8 @@ func TestRedcWithMULXADxX(t *testing.T) {
// Ensures correctness of Montgomery reduction implementation which uses MULX // Ensures correctness of Montgomery reduction implementation which uses MULX
// and ADCX/ADOX. // and ADCX/ADOX.
func TestRedcWithMULXADxXAgainstMULX(t *testing.T) { func TestRedcWithMULXADxXAgainstMULX(t *testing.T) {
defer cpu.RecognizeCpu()
if !cpu.HasADXandBMI2 {
defer resetCpuFeatures()
if !HasADXandBMI2 {
t.Skip("MULX, ADCX and ADOX not supported by the platform") t.Skip("MULX, ADCX and ADOX not supported by the platform")
} }
testRedc(t, kUse_MULXandADxX, kUse_MULX) testRedc(t, kUse_MULXandADxX, kUse_MULX)


読み込み中…
キャンセル
保存