浏览代码

cln16sidh: port p-751 multiplication from MSR assembly

trials/prep_p503_trial3
Henry de Valence 7 年前
父节点
当前提交
b017b109ce
共有 3 个文件被更改,包括 972 次插入0 次删除
  1. +5
    -0
      field.go
  2. +930
    -0
      field_amd64.s
  3. +37
    -0
      field_test.go

+ 5
- 0
field.go 查看文件

@@ -4,8 +4,13 @@ const Fp751NumWords = 12

type Fp751Element [Fp751NumWords]uint64

type Fp751UnreducedProduct [2 * Fp751NumWords]uint64

/// Compute z = x + y.
func Fp751Add(z, x, y *Fp751Element)

/// Compute z = x - y.
func Fp751Sub(z, x, y *Fp751Element)

/// Compute z = x * y.
func Fp751Mul(z *Fp751UnreducedProduct, x, y *Fp751Element)

+ 930
- 0
field_amd64.s 查看文件

@@ -295,3 +295,933 @@ TEXT ·Fp751Sub(SB), NOSPLIT, $0-24

RET

TEXT ·Fp751Mul(SB), $96-24

// Here we store the destination in CX instead of in REG_P3 because the
// multiplication instructions use DX as an implicit destination
// operand: MULQ $REG sets DX:AX <-- AX * $REG.

MOVQ z+0(FP), CX
MOVQ x+8(FP), REG_P1
MOVQ y+16(FP), REG_P2

XORQ AX, AX
MOVQ (48)(REG_P1), R8
MOVQ (56)(REG_P1), R9
MOVQ (64)(REG_P1), R10
MOVQ (72)(REG_P1), R11
MOVQ (80)(REG_P1), R12
MOVQ (88)(REG_P1), R13
ADDQ (REG_P1), R8
ADCQ (8)(REG_P1), R9
ADCQ (16)(REG_P1), R10
ADCQ (24)(REG_P1), R11
ADCQ (32)(REG_P1), R12
ADCQ (40)(REG_P1), R13
MOVQ R8, (CX)
MOVQ R9, (8)(CX)
MOVQ R10, (16)(CX)
MOVQ R11, (24)(CX)
MOVQ R12, (32)(CX)
MOVQ R13, (40)(CX)
SBBQ $0, AX

XORQ DX, DX
MOVQ (48)(REG_P2), R8
MOVQ (56)(REG_P2), R9
MOVQ (64)(REG_P2), R10
MOVQ (72)(REG_P2), R11
MOVQ (80)(REG_P2), R12
MOVQ (88)(REG_P2), R13
ADDQ (REG_P2), R8
ADCQ (8)(REG_P2), R9
ADCQ (16)(REG_P2), R10
ADCQ (24)(REG_P2), R11
ADCQ (32)(REG_P2), R12
ADCQ (40)(REG_P2), R13
MOVQ R8, (48)(CX)
MOVQ R9, (56)(CX)
MOVQ R10, (64)(CX)
MOVQ R11, (72)(CX)
MOVQ R12, (80)(CX)
MOVQ R13, (88)(CX)
SBBQ $0, DX
MOVQ AX, (80)(SP)
MOVQ DX, (88)(SP)

// (SP[0-8],R10,R8,R9) <- (AH+AL)*(BH+BL)

MOVQ (CX), R11
MOVQ R8, AX
MULQ R11
MOVQ AX, (SP) // c0
MOVQ DX, R14

XORQ R15, R15
MOVQ R9, AX
MULQ R11
XORQ R9, R9
ADDQ AX, R14
ADCQ DX, R9

MOVQ (8)(CX), R12
MOVQ R8, AX
MULQ R12
ADDQ AX, R14
MOVQ R14, (8)(SP) // c1
ADCQ DX, R9
ADCQ $0, R15

XORQ R8, R8
MOVQ R10, AX
MULQ R11
ADDQ AX, R9
MOVQ (48)(CX), R13
ADCQ DX, R15
ADCQ $0, R8

MOVQ (16)(CX), AX
MULQ R13
ADDQ AX, R9
ADCQ DX, R15
MOVQ (56)(CX), AX
ADCQ $0, R8

MULQ R12
ADDQ AX, R9
MOVQ R9, (16)(SP) // c2
ADCQ DX, R15
ADCQ $0, R8

XORQ R9, R9
MOVQ (72)(CX), AX
MULQ R11
ADDQ AX, R15
ADCQ DX, R8
ADCQ $0, R9

MOVQ (24)(CX), AX
MULQ R13
ADDQ AX, R15
ADCQ DX, R8
ADCQ $0, R9

MOVQ R10, AX
MULQ R12
ADDQ AX, R15
ADCQ DX, R8
ADCQ $0, R9

MOVQ (16)(CX), R14
MOVQ (56)(CX), AX
MULQ R14
ADDQ AX, R15
MOVQ R15, (24)(SP) // c3
ADCQ DX, R8
ADCQ $0, R9

XORQ R10, R10
MOVQ (80)(CX), AX
MULQ R11
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (64)(CX), AX
MULQ R14
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (48)(CX), R15
MOVQ (32)(CX), AX
MULQ R15
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (72)(CX), AX
MULQ R12
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (24)(CX), R13
MOVQ (56)(CX), AX
MULQ R13
ADDQ AX, R8
MOVQ R8, (32)(SP) // c4
ADCQ DX, R9
ADCQ $0, R10

XORQ R8, R8
MOVQ (88)(CX), AX
MULQ R11
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (64)(CX), AX
MULQ R13
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (72)(CX), AX
MULQ R14
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (40)(CX), AX
MULQ R15
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (80)(CX), AX
MULQ R12
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (32)(CX), R15
MOVQ (56)(CX), AX
MULQ R15
ADDQ AX, R9
MOVQ R9, (40)(SP) // c5
ADCQ DX, R10
ADCQ $0, R8

XORQ R9, R9
MOVQ (64)(CX), AX
MULQ R15
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (88)(CX), AX
MULQ R12
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (80)(CX), AX
MULQ R14
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (40)(CX), R11
MOVQ (56)(CX), AX
MULQ R11
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (72)(CX), AX
MULQ R13
ADDQ AX, R10
MOVQ R10, (48)(SP) // c6
ADCQ DX, R8
ADCQ $0, R9

XORQ R10, R10
MOVQ (88)(CX), AX
MULQ R14
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (64)(CX), AX
MULQ R11
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (80)(CX), AX
MULQ R13
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (72)(CX), AX
MULQ R15
ADDQ AX, R8
MOVQ R8, (56)(SP) // c7
ADCQ DX, R9
ADCQ $0, R10

XORQ R8, R8
MOVQ (72)(CX), AX
MULQ R11
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (80)(CX), AX
MULQ R15
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (88)(CX), AX
MULQ R13
ADDQ AX, R9
MOVQ R9, (64)(SP) // c8
ADCQ DX, R10
ADCQ $0, R8

XORQ R9, R9
MOVQ (88)(CX), AX
MULQ R15
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (80)(CX), AX
MULQ R11
ADDQ AX, R10 // c9
ADCQ DX, R8
ADCQ $0, R9

MOVQ (88)(CX), AX
MULQ R11
ADDQ AX, R8 // c10
ADCQ DX, R9 // c11

MOVQ (88)(SP), AX
MOVQ (CX), DX
ANDQ AX, R12
ANDQ AX, R14
ANDQ AX, DX
ANDQ AX, R13
ANDQ AX, R15
ANDQ AX, R11
MOVQ (48)(SP), AX
ADDQ AX, DX
MOVQ (56)(SP), AX
ADCQ AX, R12
MOVQ (64)(SP), AX
ADCQ AX, R14
ADCQ R10, R13
ADCQ R8, R15
ADCQ R9, R11
MOVQ (80)(SP), AX
MOVQ DX, (48)(SP)
MOVQ R12, (56)(SP)
MOVQ R14, (64)(SP)
MOVQ R13, (72)(SP)
MOVQ R15, (80)(SP)
MOVQ R11, (88)(SP)

MOVQ (48)(CX), R8
MOVQ (56)(CX), R9
MOVQ (64)(CX), R10
MOVQ (72)(CX), R11
MOVQ (80)(CX), R12
MOVQ (88)(CX), R13
ANDQ AX, R8
ANDQ AX, R9
ANDQ AX, R10
ANDQ AX, R11
ANDQ AX, R12
ANDQ AX, R13
MOVQ (48)(SP), AX
ADDQ AX, R8
MOVQ (56)(SP), AX
ADCQ AX, R9
MOVQ (64)(SP), AX
ADCQ AX, R10
MOVQ (72)(SP), AX
ADCQ AX, R11
MOVQ (80)(SP), AX
ADCQ AX, R12
MOVQ (88)(SP), AX
ADCQ AX, R13
MOVQ R8, (48)(SP)
MOVQ R9, (56)(SP)
MOVQ R11, (72)(SP)

// CX[0-11] <- AL*BL
MOVQ (REG_P1), R11
MOVQ (REG_P2), AX
MULQ R11
XORQ R9, R9
MOVQ AX, (CX) // c0
MOVQ R10, (64)(SP)
MOVQ DX, R8

MOVQ (8)(REG_P2), AX
MULQ R11
XORQ R10, R10
ADDQ AX, R8
MOVQ R12, (80)(SP)
ADCQ DX, R9

MOVQ (8)(REG_P1), R12
MOVQ (REG_P2), AX
MULQ R12
ADDQ AX, R8
MOVQ R8, (8)(CX) // c1
ADCQ DX, R9
MOVQ R13, (88)(SP)
ADCQ $0, R10

XORQ R8, R8
MOVQ (16)(REG_P2), AX
MULQ R11
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (REG_P2), R13
MOVQ (16)(REG_P1), AX
MULQ R13
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (8)(REG_P2), AX
MULQ R12
ADDQ AX, R9
MOVQ R9, (16)(CX) // c2
ADCQ DX, R10
ADCQ $0, R8

XORQ R9, R9
MOVQ (24)(REG_P2), AX
MULQ R11
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (24)(REG_P1), AX
MULQ R13
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (16)(REG_P2), AX
MULQ R12
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (16)(REG_P1), R14
MOVQ (8)(REG_P2), AX
MULQ R14
ADDQ AX, R10
MOVQ R10, (24)(CX) // c3
ADCQ DX, R8
ADCQ $0, R9

XORQ R10, R10
MOVQ (32)(REG_P2), AX
MULQ R11
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (16)(REG_P2), AX
MULQ R14
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (32)(REG_P1), AX
MULQ R13
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (24)(REG_P2), AX
MULQ R12
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (24)(REG_P1), R13
MOVQ (8)(REG_P2), AX
MULQ R13
ADDQ AX, R8
MOVQ R8, (32)(CX) // c4
ADCQ DX, R9
ADCQ $0, R10

XORQ R8, R8
MOVQ (40)(REG_P2), AX
MULQ R11
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (16)(REG_P2), AX
MULQ R13
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (24)(REG_P2), AX
MULQ R14
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (40)(REG_P1), R11
MOVQ (REG_P2), AX
MULQ R11
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (32)(REG_P2), AX
MULQ R12
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (32)(REG_P1), R15
MOVQ (8)(REG_P2), AX
MULQ R15
ADDQ AX, R9
MOVQ R9, (40)(CX) //c5
ADCQ DX, R10
ADCQ $0, R8

XORQ R9, R9
MOVQ (16)(REG_P2), AX
MULQ R15
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (40)(REG_P2), AX
MULQ R12
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (32)(REG_P2), AX
MULQ R14
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (8)(REG_P2), AX
MULQ R11
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (24)(REG_P2), AX
MULQ R13
ADDQ AX, R10
MOVQ R10, (48)(CX) // c6
ADCQ DX, R8
ADCQ $0, R9

XORQ R10, R10
MOVQ (40)(REG_P2), AX
MULQ R14
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (16)(REG_P2), AX
MULQ R11
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (32)(REG_P2), AX
MULQ R13
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (24)(REG_P2), AX
MULQ R15
ADDQ AX, R8
MOVQ R8, (56)(CX) // c7
ADCQ DX, R9
ADCQ $0, R10

XORQ R8, R8
MOVQ (24)(REG_P2), AX
MULQ R11
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (32)(REG_P2), AX
MULQ R15
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (40)(REG_P2), AX
MULQ R13
ADDQ AX, R9
MOVQ R9, (64)(CX) // c8
ADCQ DX, R10
ADCQ $0, R8

XORQ R9, R9
MOVQ (40)(REG_P2), AX
MULQ R15
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (32)(REG_P2), AX
MULQ R11
ADDQ AX, R10
MOVQ R10, (72)(CX) // c9
ADCQ DX, R8
ADCQ $0, R9

MOVQ (40)(REG_P2), AX
MULQ R11
ADDQ AX, R8
MOVQ R8, (80)(CX) // c10
ADCQ DX, R9
MOVQ R9, (88)(CX) // c11

// CX[12-23] <- AH*BH
MOVQ (48)(REG_P1), R11
MOVQ (48)(REG_P2), AX
MULQ R11
XORQ R9, R9
MOVQ AX, (96)(CX) // c0
MOVQ DX, R8

MOVQ (56)(REG_P2), AX
MULQ R11
XORQ R10, R10
ADDQ AX, R8
ADCQ DX, R9

MOVQ (56)(REG_P1), R12
MOVQ (48)(REG_P2), AX
MULQ R12
ADDQ AX, R8
MOVQ R8, (104)(CX) // c1
ADCQ DX, R9
ADCQ $0, R10

XORQ R8, R8
MOVQ (64)(REG_P2), AX
MULQ R11
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (48)(REG_P2), R13
MOVQ (64)(REG_P1), AX
MULQ R13
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (56)(REG_P2), AX
MULQ R12
ADDQ AX, R9
MOVQ R9, (112)(CX) // c2
ADCQ DX, R10
ADCQ $0, R8

XORQ R9, R9
MOVQ (72)(REG_P2), AX
MULQ R11
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (72)(REG_P1), AX
MULQ R13
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (64)(REG_P2), AX
MULQ R12
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (64)(REG_P1), R14
MOVQ (56)(REG_P2), AX
MULQ R14
ADDQ AX, R10
MOVQ R10, (120)(CX) // c3
ADCQ DX, R8
ADCQ $0, R9

XORQ R10, R10
MOVQ (80)(REG_P2), AX
MULQ R11
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (64)(REG_P2), AX
MULQ R14
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (80)(REG_P1), R15
MOVQ R13, AX
MULQ R15
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (72)(REG_P2), AX
MULQ R12
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (72)(REG_P1), R13
MOVQ (56)(REG_P2), AX
MULQ R13
ADDQ AX, R8
MOVQ R8, (128)(CX) // c4
ADCQ DX, R9
ADCQ $0, R10

XORQ R8, R8
MOVQ (88)(REG_P2), AX
MULQ R11
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (64)(REG_P2), AX
MULQ R13
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (72)(REG_P2), AX
MULQ R14
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (88)(REG_P1), R11
MOVQ (48)(REG_P2), AX
MULQ R11
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (80)(REG_P2), AX
MULQ R12
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (56)(REG_P2), AX
MULQ R15
ADDQ AX, R9
MOVQ R9, (136)(CX) // c5
ADCQ DX, R10
ADCQ $0, R8

XORQ R9, R9
MOVQ (64)(REG_P2), AX
MULQ R15
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (88)(REG_P2), AX
MULQ R12
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (80)(REG_P2), AX
MULQ R14
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (56)(REG_P2), AX
MULQ R11
ADDQ AX, R10
ADCQ DX, R8
ADCQ $0, R9

MOVQ (72)(REG_P2), AX
MULQ R13
ADDQ AX, R10
MOVQ R10, (144)(CX) // c6
ADCQ DX, R8
ADCQ $0, R9

XORQ R10, R10
MOVQ (88)(REG_P2), AX
MULQ R14
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (64)(REG_P2), AX
MULQ R11
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (80)(REG_P2), AX
MULQ R13
ADDQ AX, R8
ADCQ DX, R9
ADCQ $0, R10

MOVQ (72)(REG_P2), AX
MULQ R15
ADDQ AX, R8
MOVQ R8, (152)(CX) // c7
ADCQ DX, R9
ADCQ $0, R10

XORQ R8, R8
MOVQ (72)(REG_P2), AX
MULQ R11
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (80)(REG_P2), AX
MULQ R15
ADDQ AX, R9
ADCQ DX, R10
ADCQ $0, R8

MOVQ (88)(REG_P2), AX
MULQ R13
ADDQ AX, R9
MOVQ R9, (160)(CX) // c8
ADCQ DX, R10
ADCQ $0, R8

MOVQ (88)(REG_P2), AX
MULQ R15
ADDQ AX, R10
ADCQ DX, R8

MOVQ (80)(REG_P2), AX
MULQ R11
ADDQ AX, R10
MOVQ R10, (168)(CX) // c9
ADCQ DX, R8

MOVQ (88)(REG_P2), AX
MULQ R11
ADDQ AX, R8
MOVQ R8, (176)(CX) // c10
ADCQ $0, DX
MOVQ DX, (184)(CX) // c11

// [R8-R15,AX,DX,DI,(SP)] <- (AH+AL)*(BH+BL)-AL*BL
MOVQ (SP), R8
SUBQ (CX), R8
MOVQ (8)(SP), R9
SBBQ (8)(CX), R9
MOVQ (16)(SP), R10
SBBQ (16)(CX), R10
MOVQ (24)(SP), R11
SBBQ (24)(CX), R11
MOVQ (32)(SP), R12
SBBQ (32)(CX), R12
MOVQ (40)(SP), R13
SBBQ (40)(CX), R13
MOVQ (48)(SP), R14
SBBQ (48)(CX), R14
MOVQ (56)(SP), R15
SBBQ (56)(CX), R15
MOVQ (64)(SP), AX
SBBQ (64)(CX), AX
MOVQ (72)(SP), DX
SBBQ (72)(CX), DX
MOVQ (80)(SP), DI
SBBQ (80)(CX), DI
MOVQ (88)(SP), SI
SBBQ (88)(CX), SI
MOVQ SI, (SP)

// [R8-R15,AX,DX,DI,(SP)] <- (AH+AL)*(BH+BL) - AL*BL - AH*BH
MOVQ (96)(CX), SI
SUBQ SI, R8
MOVQ (104)(CX), SI
SBBQ SI, R9
MOVQ (112)(CX), SI
SBBQ SI, R10
MOVQ (120)(CX), SI
SBBQ SI, R11
MOVQ (128)(CX), SI
SBBQ SI, R12
MOVQ (136)(CX), SI
SBBQ SI, R13
MOVQ (144)(CX), SI
SBBQ SI, R14
MOVQ (152)(CX), SI
SBBQ SI, R15
MOVQ (160)(CX), SI
SBBQ SI, AX
MOVQ (168)(CX), SI
SBBQ SI, DX
MOVQ (176)(CX), SI
SBBQ SI, DI
MOVQ (SP), SI
SBBQ (184)(CX), SI

// FINAL RESULT
ADDQ (48)(CX), R8
MOVQ R8, (48)(CX)
ADCQ (56)(CX), R9
MOVQ R9, (56)(CX)
ADCQ (64)(CX), R10
MOVQ R10, (64)(CX)
ADCQ (72)(CX), R11
MOVQ R11, (72)(CX)
ADCQ (80)(CX), R12
MOVQ R12, (80)(CX)
ADCQ (88)(CX), R13
MOVQ R13, (88)(CX)
ADCQ (96)(CX), R14
MOVQ R14, (96)(CX)
ADCQ (104)(CX), R15
MOVQ R15, (104)(CX)
ADCQ (112)(CX), AX
MOVQ AX, (112)(CX)
ADCQ (120)(CX), DX
MOVQ DX, (120)(CX)
ADCQ (128)(CX), DI
MOVQ DI, (128)(CX)
ADCQ (136)(CX), SI
MOVQ SI, (136)(CX)
MOVQ (144)(CX), AX
ADCQ $0, AX
MOVQ AX, (144)(CX)
MOVQ (152)(CX), AX
ADCQ $0, AX
MOVQ AX, (152)(CX)
MOVQ (160)(CX), AX
ADCQ $0, AX
MOVQ AX, (160)(CX)
MOVQ (168)(CX), AX
ADCQ $0, AX
MOVQ AX, (168)(CX)
MOVQ (176)(CX), AX
ADCQ $0, AX
MOVQ AX, (176)(CX)
MOVQ (184)(CX), AX
ADCQ $0, AX
MOVQ AX, (184)(CX)

RET

+ 37
- 0
field_test.go 查看文件

@@ -34,6 +34,10 @@ func (x *Fp751Element) toBigInt() *big.Int {
return radix64ToBigInt(x[:])
}

func (x *Fp751UnreducedProduct) toBigInt() *big.Int {
return radix64ToBigInt(x[:])
}

func (x Fp751Element) Generate(rand *rand.Rand, size int) reflect.Value {
// Generation strategy: low limbs taken from [0,2^64); high limb
// taken from smaller range
@@ -149,6 +153,39 @@ func TestFp751SubVersusBigInt(t *testing.T) {
}
}

func TestFp751MulVersusBigInt(t *testing.T) {
// The CLN16-SIDH prime
p := new(big.Int)
p.UnmarshalText(([]byte)("10354717741769305252977768237866805321427389645549071170116189679054678940682478846502882896561066713624553211618840202385203911976522554393044160468771151816976706840078913334358399730952774926980235086850991501872665651576831"))

// Returns true if computing x * y in this implementation matches
// computing x * y using big.Int
assertion := func(x, y Fp751Element) bool {
z := new(Fp751UnreducedProduct)
// Compute z = x * y using Fp751Mul
Fp751Mul(z, &x, &y)

xBig := x.toBigInt()
yBig := y.toBigInt()
zBig := z.toBigInt()

// Compute z = x * y using big.Int
tmp := new(big.Int)
tmp.Mul(xBig, yBig)

// Reduce both mod p and check that they are equal.
zBig.Mod(zBig, p)
tmp.Mod(tmp, p)
return zBig.Cmp(tmp) == 0
}

// Run 1M tests
config := &quick.Config{MaxCount: (1 << 20)}
if err := quick.Check(assertion, config); err != nil {
t.Error(err)
}
}

// Package-level storage for this field element is intended to deter
// compiler optimizations.
var benchmarkFp751Element Fp751Element


正在加载...
取消
保存