Browse Source

WIP3

trials/PERF_try2
Kris Kwiatkowski 6 years ago
parent
commit
5bce13a8cc
4 changed files with 18 additions and 13 deletions
  1. +1
    -1
      Makefile
  2. +7
    -7
      p503/arith_amd64.s
  3. +8
    -3
      p503/arith_decl.go
  4. +2
    -2
      p503/field_ops.go

+ 1
- 1
Makefile View File

@@ -22,7 +22,7 @@ endif

ifeq ($(V),1)
OPTS += -v # Be verbose
BENCH_OPTS += -gcflags=-m # Show results from inlining
BENCH_OPTS += -gcflags="-m -m" # Show results from inlining
endif

all: test


+ 7
- 7
p503/arith_amd64.s View File

@@ -698,13 +698,13 @@ TEXT ·mulWithMULXADX(SB), NOSPLIT, $104-24
MUL(CX, REG_P2, REG_P1, MULS256_MULXADX)
RET

TEXT ·fp503Mul(SB), NOSPLIT, $104-24
// Actual implementation
MOVQ z+ 0(FP), CX
MOVQ x+ 8(FP), REG_P2
MOVQ y+16(FP), REG_P1
MUL(CX, REG_P2, REG_P1, MULS256_MULXADX)
RET
// TEXT ·fp503Mul(SB), NOSPLIT, $104-24
// // Actual implementation
// MOVQ z+ 0(FP), CX
// MOVQ x+ 8(FP), REG_P2
// MOVQ y+16(FP), REG_P1
// MUL(CX, REG_P2, REG_P1, MULS256_MULXADX)
// RET

TEXT ·fp503MulXXX(SB), NOSPLIT, $104-72
// Actual implementation


+ 8
- 3
p503/arith_decl.go View File

@@ -4,7 +4,7 @@ package p503

import (
. "github.com/cloudflare/p751sidh/internal/isogeny"
// cpu "github.com/cloudflare/p751sidh/internal/utils"
cpu "github.com/cloudflare/p751sidh/internal/utils"
)

// If choice = 0, leave x,y unchanged. If choice = 1, set x,y = y,x.
@@ -54,8 +54,13 @@ func mulWithMULX(z *FpElementX2, x, y *FpElement)

// Mul implementation for CPUs supporting two independent carry chain
// (ADOX/ADCX) instructions and carry-less MULX multiplier
//go:noescape
func fp503Mul(z *FpElementX2, x, y *FpElement)
func fp503Mul(z *FpElementX2, x, y *FpElement) {
if cpu.HasBMI2 {
mulWithMULXADX(z, x, y)
} else {
mul(z, x, y)
}
}

//go:noescape
func fp503MulXXX(z, x, y []uint64)


+ 2
- 2
p503/field_ops.go View File

@@ -36,8 +36,8 @@ func (fp503Ops) Mul(dest, lhs, rhs *Fp2Element) {
//
// so (a*d + b*c) = (b-a)*(c-d) + a*c + b*d.

fp503Mul1(ac[:], lhs.A[:], rhs.A[:]) // = a*c*R*R
fp503Mul(&bd, &lhs.B, &rhs.B) // = b*d*R*R
fp503Mul(&ac, &lhs.A, &rhs.A) // = a*c*R*R
fp503Mul(&bd, &lhs.B, &rhs.B) // = b*d*R*R

fp503SubReduced(&b_minus_a, &lhs.B, &lhs.A) // = (b-a)*R
fp503SubReduced(&c_minus_d, &rhs.A, &rhs.B) // = (c-d)*R


Loading…
Cancel
Save