@@ -4,8 +4,13 @@ const Fp751NumWords = 12 | |||
type Fp751Element [Fp751NumWords]uint64 | |||
type Fp751UnreducedProduct [2 * Fp751NumWords]uint64 | |||
/// Compute z = x + y. | |||
func Fp751Add(z, x, y *Fp751Element) | |||
/// Compute z = x - y. | |||
func Fp751Sub(z, x, y *Fp751Element) | |||
/// Compute z = x * y. | |||
func Fp751Mul(z *Fp751UnreducedProduct, x, y *Fp751Element) |
@@ -295,3 +295,933 @@ TEXT ·Fp751Sub(SB), NOSPLIT, $0-24 | |||
RET | |||
TEXT ·Fp751Mul(SB), $96-24 | |||
// Here we store the destination in CX instead of in REG_P3 because the | |||
// multiplication instructions use DX as an implicit destination | |||
// operand: MULQ $REG sets DX:AX <-- AX * $REG. | |||
MOVQ z+0(FP), CX | |||
MOVQ x+8(FP), REG_P1 | |||
MOVQ y+16(FP), REG_P2 | |||
XORQ AX, AX | |||
MOVQ (48)(REG_P1), R8 | |||
MOVQ (56)(REG_P1), R9 | |||
MOVQ (64)(REG_P1), R10 | |||
MOVQ (72)(REG_P1), R11 | |||
MOVQ (80)(REG_P1), R12 | |||
MOVQ (88)(REG_P1), R13 | |||
ADDQ (REG_P1), R8 | |||
ADCQ (8)(REG_P1), R9 | |||
ADCQ (16)(REG_P1), R10 | |||
ADCQ (24)(REG_P1), R11 | |||
ADCQ (32)(REG_P1), R12 | |||
ADCQ (40)(REG_P1), R13 | |||
MOVQ R8, (CX) | |||
MOVQ R9, (8)(CX) | |||
MOVQ R10, (16)(CX) | |||
MOVQ R11, (24)(CX) | |||
MOVQ R12, (32)(CX) | |||
MOVQ R13, (40)(CX) | |||
SBBQ $0, AX | |||
XORQ DX, DX | |||
MOVQ (48)(REG_P2), R8 | |||
MOVQ (56)(REG_P2), R9 | |||
MOVQ (64)(REG_P2), R10 | |||
MOVQ (72)(REG_P2), R11 | |||
MOVQ (80)(REG_P2), R12 | |||
MOVQ (88)(REG_P2), R13 | |||
ADDQ (REG_P2), R8 | |||
ADCQ (8)(REG_P2), R9 | |||
ADCQ (16)(REG_P2), R10 | |||
ADCQ (24)(REG_P2), R11 | |||
ADCQ (32)(REG_P2), R12 | |||
ADCQ (40)(REG_P2), R13 | |||
MOVQ R8, (48)(CX) | |||
MOVQ R9, (56)(CX) | |||
MOVQ R10, (64)(CX) | |||
MOVQ R11, (72)(CX) | |||
MOVQ R12, (80)(CX) | |||
MOVQ R13, (88)(CX) | |||
SBBQ $0, DX | |||
MOVQ AX, (80)(SP) | |||
MOVQ DX, (88)(SP) | |||
// (SP[0-8],R10,R8,R9) <- (AH+AL)*(BH+BL) | |||
MOVQ (CX), R11 | |||
MOVQ R8, AX | |||
MULQ R11 | |||
MOVQ AX, (SP) // c0 | |||
MOVQ DX, R14 | |||
XORQ R15, R15 | |||
MOVQ R9, AX | |||
MULQ R11 | |||
XORQ R9, R9 | |||
ADDQ AX, R14 | |||
ADCQ DX, R9 | |||
MOVQ (8)(CX), R12 | |||
MOVQ R8, AX | |||
MULQ R12 | |||
ADDQ AX, R14 | |||
MOVQ R14, (8)(SP) // c1 | |||
ADCQ DX, R9 | |||
ADCQ $0, R15 | |||
XORQ R8, R8 | |||
MOVQ R10, AX | |||
MULQ R11 | |||
ADDQ AX, R9 | |||
MOVQ (48)(CX), R13 | |||
ADCQ DX, R15 | |||
ADCQ $0, R8 | |||
MOVQ (16)(CX), AX | |||
MULQ R13 | |||
ADDQ AX, R9 | |||
ADCQ DX, R15 | |||
MOVQ (56)(CX), AX | |||
ADCQ $0, R8 | |||
MULQ R12 | |||
ADDQ AX, R9 | |||
MOVQ R9, (16)(SP) // c2 | |||
ADCQ DX, R15 | |||
ADCQ $0, R8 | |||
XORQ R9, R9 | |||
MOVQ (72)(CX), AX | |||
MULQ R11 | |||
ADDQ AX, R15 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (24)(CX), AX | |||
MULQ R13 | |||
ADDQ AX, R15 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ R10, AX | |||
MULQ R12 | |||
ADDQ AX, R15 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (16)(CX), R14 | |||
MOVQ (56)(CX), AX | |||
MULQ R14 | |||
ADDQ AX, R15 | |||
MOVQ R15, (24)(SP) // c3 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
XORQ R10, R10 | |||
MOVQ (80)(CX), AX | |||
MULQ R11 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (64)(CX), AX | |||
MULQ R14 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (48)(CX), R15 | |||
MOVQ (32)(CX), AX | |||
MULQ R15 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (72)(CX), AX | |||
MULQ R12 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (24)(CX), R13 | |||
MOVQ (56)(CX), AX | |||
MULQ R13 | |||
ADDQ AX, R8 | |||
MOVQ R8, (32)(SP) // c4 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
XORQ R8, R8 | |||
MOVQ (88)(CX), AX | |||
MULQ R11 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (64)(CX), AX | |||
MULQ R13 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (72)(CX), AX | |||
MULQ R14 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (40)(CX), AX | |||
MULQ R15 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (80)(CX), AX | |||
MULQ R12 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (32)(CX), R15 | |||
MOVQ (56)(CX), AX | |||
MULQ R15 | |||
ADDQ AX, R9 | |||
MOVQ R9, (40)(SP) // c5 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
XORQ R9, R9 | |||
MOVQ (64)(CX), AX | |||
MULQ R15 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (88)(CX), AX | |||
MULQ R12 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (80)(CX), AX | |||
MULQ R14 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (40)(CX), R11 | |||
MOVQ (56)(CX), AX | |||
MULQ R11 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (72)(CX), AX | |||
MULQ R13 | |||
ADDQ AX, R10 | |||
MOVQ R10, (48)(SP) // c6 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
XORQ R10, R10 | |||
MOVQ (88)(CX), AX | |||
MULQ R14 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (64)(CX), AX | |||
MULQ R11 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (80)(CX), AX | |||
MULQ R13 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (72)(CX), AX | |||
MULQ R15 | |||
ADDQ AX, R8 | |||
MOVQ R8, (56)(SP) // c7 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
XORQ R8, R8 | |||
MOVQ (72)(CX), AX | |||
MULQ R11 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (80)(CX), AX | |||
MULQ R15 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (88)(CX), AX | |||
MULQ R13 | |||
ADDQ AX, R9 | |||
MOVQ R9, (64)(SP) // c8 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
XORQ R9, R9 | |||
MOVQ (88)(CX), AX | |||
MULQ R15 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (80)(CX), AX | |||
MULQ R11 | |||
ADDQ AX, R10 // c9 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (88)(CX), AX | |||
MULQ R11 | |||
ADDQ AX, R8 // c10 | |||
ADCQ DX, R9 // c11 | |||
MOVQ (88)(SP), AX | |||
MOVQ (CX), DX | |||
ANDQ AX, R12 | |||
ANDQ AX, R14 | |||
ANDQ AX, DX | |||
ANDQ AX, R13 | |||
ANDQ AX, R15 | |||
ANDQ AX, R11 | |||
MOVQ (48)(SP), AX | |||
ADDQ AX, DX | |||
MOVQ (56)(SP), AX | |||
ADCQ AX, R12 | |||
MOVQ (64)(SP), AX | |||
ADCQ AX, R14 | |||
ADCQ R10, R13 | |||
ADCQ R8, R15 | |||
ADCQ R9, R11 | |||
MOVQ (80)(SP), AX | |||
MOVQ DX, (48)(SP) | |||
MOVQ R12, (56)(SP) | |||
MOVQ R14, (64)(SP) | |||
MOVQ R13, (72)(SP) | |||
MOVQ R15, (80)(SP) | |||
MOVQ R11, (88)(SP) | |||
MOVQ (48)(CX), R8 | |||
MOVQ (56)(CX), R9 | |||
MOVQ (64)(CX), R10 | |||
MOVQ (72)(CX), R11 | |||
MOVQ (80)(CX), R12 | |||
MOVQ (88)(CX), R13 | |||
ANDQ AX, R8 | |||
ANDQ AX, R9 | |||
ANDQ AX, R10 | |||
ANDQ AX, R11 | |||
ANDQ AX, R12 | |||
ANDQ AX, R13 | |||
MOVQ (48)(SP), AX | |||
ADDQ AX, R8 | |||
MOVQ (56)(SP), AX | |||
ADCQ AX, R9 | |||
MOVQ (64)(SP), AX | |||
ADCQ AX, R10 | |||
MOVQ (72)(SP), AX | |||
ADCQ AX, R11 | |||
MOVQ (80)(SP), AX | |||
ADCQ AX, R12 | |||
MOVQ (88)(SP), AX | |||
ADCQ AX, R13 | |||
MOVQ R8, (48)(SP) | |||
MOVQ R9, (56)(SP) | |||
MOVQ R11, (72)(SP) | |||
// CX[0-11] <- AL*BL | |||
MOVQ (REG_P1), R11 | |||
MOVQ (REG_P2), AX | |||
MULQ R11 | |||
XORQ R9, R9 | |||
MOVQ AX, (CX) // c0 | |||
MOVQ R10, (64)(SP) | |||
MOVQ DX, R8 | |||
MOVQ (8)(REG_P2), AX | |||
MULQ R11 | |||
XORQ R10, R10 | |||
ADDQ AX, R8 | |||
MOVQ R12, (80)(SP) | |||
ADCQ DX, R9 | |||
MOVQ (8)(REG_P1), R12 | |||
MOVQ (REG_P2), AX | |||
MULQ R12 | |||
ADDQ AX, R8 | |||
MOVQ R8, (8)(CX) // c1 | |||
ADCQ DX, R9 | |||
MOVQ R13, (88)(SP) | |||
ADCQ $0, R10 | |||
XORQ R8, R8 | |||
MOVQ (16)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (REG_P2), R13 | |||
MOVQ (16)(REG_P1), AX | |||
MULQ R13 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (8)(REG_P2), AX | |||
MULQ R12 | |||
ADDQ AX, R9 | |||
MOVQ R9, (16)(CX) // c2 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
XORQ R9, R9 | |||
MOVQ (24)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (24)(REG_P1), AX | |||
MULQ R13 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (16)(REG_P2), AX | |||
MULQ R12 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (16)(REG_P1), R14 | |||
MOVQ (8)(REG_P2), AX | |||
MULQ R14 | |||
ADDQ AX, R10 | |||
MOVQ R10, (24)(CX) // c3 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
XORQ R10, R10 | |||
MOVQ (32)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (16)(REG_P2), AX | |||
MULQ R14 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (32)(REG_P1), AX | |||
MULQ R13 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (24)(REG_P2), AX | |||
MULQ R12 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (24)(REG_P1), R13 | |||
MOVQ (8)(REG_P2), AX | |||
MULQ R13 | |||
ADDQ AX, R8 | |||
MOVQ R8, (32)(CX) // c4 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
XORQ R8, R8 | |||
MOVQ (40)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (16)(REG_P2), AX | |||
MULQ R13 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (24)(REG_P2), AX | |||
MULQ R14 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (40)(REG_P1), R11 | |||
MOVQ (REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (32)(REG_P2), AX | |||
MULQ R12 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (32)(REG_P1), R15 | |||
MOVQ (8)(REG_P2), AX | |||
MULQ R15 | |||
ADDQ AX, R9 | |||
MOVQ R9, (40)(CX) //c5 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
XORQ R9, R9 | |||
MOVQ (16)(REG_P2), AX | |||
MULQ R15 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (40)(REG_P2), AX | |||
MULQ R12 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (32)(REG_P2), AX | |||
MULQ R14 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (8)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (24)(REG_P2), AX | |||
MULQ R13 | |||
ADDQ AX, R10 | |||
MOVQ R10, (48)(CX) // c6 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
XORQ R10, R10 | |||
MOVQ (40)(REG_P2), AX | |||
MULQ R14 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (16)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (32)(REG_P2), AX | |||
MULQ R13 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (24)(REG_P2), AX | |||
MULQ R15 | |||
ADDQ AX, R8 | |||
MOVQ R8, (56)(CX) // c7 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
XORQ R8, R8 | |||
MOVQ (24)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (32)(REG_P2), AX | |||
MULQ R15 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (40)(REG_P2), AX | |||
MULQ R13 | |||
ADDQ AX, R9 | |||
MOVQ R9, (64)(CX) // c8 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
XORQ R9, R9 | |||
MOVQ (40)(REG_P2), AX | |||
MULQ R15 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (32)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R10 | |||
MOVQ R10, (72)(CX) // c9 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (40)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R8 | |||
MOVQ R8, (80)(CX) // c10 | |||
ADCQ DX, R9 | |||
MOVQ R9, (88)(CX) // c11 | |||
// CX[12-23] <- AH*BH | |||
MOVQ (48)(REG_P1), R11 | |||
MOVQ (48)(REG_P2), AX | |||
MULQ R11 | |||
XORQ R9, R9 | |||
MOVQ AX, (96)(CX) // c0 | |||
MOVQ DX, R8 | |||
MOVQ (56)(REG_P2), AX | |||
MULQ R11 | |||
XORQ R10, R10 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
MOVQ (56)(REG_P1), R12 | |||
MOVQ (48)(REG_P2), AX | |||
MULQ R12 | |||
ADDQ AX, R8 | |||
MOVQ R8, (104)(CX) // c1 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
XORQ R8, R8 | |||
MOVQ (64)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (48)(REG_P2), R13 | |||
MOVQ (64)(REG_P1), AX | |||
MULQ R13 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (56)(REG_P2), AX | |||
MULQ R12 | |||
ADDQ AX, R9 | |||
MOVQ R9, (112)(CX) // c2 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
XORQ R9, R9 | |||
MOVQ (72)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (72)(REG_P1), AX | |||
MULQ R13 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (64)(REG_P2), AX | |||
MULQ R12 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (64)(REG_P1), R14 | |||
MOVQ (56)(REG_P2), AX | |||
MULQ R14 | |||
ADDQ AX, R10 | |||
MOVQ R10, (120)(CX) // c3 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
XORQ R10, R10 | |||
MOVQ (80)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (64)(REG_P2), AX | |||
MULQ R14 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (80)(REG_P1), R15 | |||
MOVQ R13, AX | |||
MULQ R15 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (72)(REG_P2), AX | |||
MULQ R12 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (72)(REG_P1), R13 | |||
MOVQ (56)(REG_P2), AX | |||
MULQ R13 | |||
ADDQ AX, R8 | |||
MOVQ R8, (128)(CX) // c4 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
XORQ R8, R8 | |||
MOVQ (88)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (64)(REG_P2), AX | |||
MULQ R13 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (72)(REG_P2), AX | |||
MULQ R14 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (88)(REG_P1), R11 | |||
MOVQ (48)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (80)(REG_P2), AX | |||
MULQ R12 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (56)(REG_P2), AX | |||
MULQ R15 | |||
ADDQ AX, R9 | |||
MOVQ R9, (136)(CX) // c5 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
XORQ R9, R9 | |||
MOVQ (64)(REG_P2), AX | |||
MULQ R15 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (88)(REG_P2), AX | |||
MULQ R12 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (80)(REG_P2), AX | |||
MULQ R14 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (56)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
MOVQ (72)(REG_P2), AX | |||
MULQ R13 | |||
ADDQ AX, R10 | |||
MOVQ R10, (144)(CX) // c6 | |||
ADCQ DX, R8 | |||
ADCQ $0, R9 | |||
XORQ R10, R10 | |||
MOVQ (88)(REG_P2), AX | |||
MULQ R14 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (64)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (80)(REG_P2), AX | |||
MULQ R13 | |||
ADDQ AX, R8 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
MOVQ (72)(REG_P2), AX | |||
MULQ R15 | |||
ADDQ AX, R8 | |||
MOVQ R8, (152)(CX) // c7 | |||
ADCQ DX, R9 | |||
ADCQ $0, R10 | |||
XORQ R8, R8 | |||
MOVQ (72)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (80)(REG_P2), AX | |||
MULQ R15 | |||
ADDQ AX, R9 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (88)(REG_P2), AX | |||
MULQ R13 | |||
ADDQ AX, R9 | |||
MOVQ R9, (160)(CX) // c8 | |||
ADCQ DX, R10 | |||
ADCQ $0, R8 | |||
MOVQ (88)(REG_P2), AX | |||
MULQ R15 | |||
ADDQ AX, R10 | |||
ADCQ DX, R8 | |||
MOVQ (80)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R10 | |||
MOVQ R10, (168)(CX) // c9 | |||
ADCQ DX, R8 | |||
MOVQ (88)(REG_P2), AX | |||
MULQ R11 | |||
ADDQ AX, R8 | |||
MOVQ R8, (176)(CX) // c10 | |||
ADCQ $0, DX | |||
MOVQ DX, (184)(CX) // c11 | |||
// [R8-R15,AX,DX,DI,(SP)] <- (AH+AL)*(BH+BL)-AL*BL | |||
MOVQ (SP), R8 | |||
SUBQ (CX), R8 | |||
MOVQ (8)(SP), R9 | |||
SBBQ (8)(CX), R9 | |||
MOVQ (16)(SP), R10 | |||
SBBQ (16)(CX), R10 | |||
MOVQ (24)(SP), R11 | |||
SBBQ (24)(CX), R11 | |||
MOVQ (32)(SP), R12 | |||
SBBQ (32)(CX), R12 | |||
MOVQ (40)(SP), R13 | |||
SBBQ (40)(CX), R13 | |||
MOVQ (48)(SP), R14 | |||
SBBQ (48)(CX), R14 | |||
MOVQ (56)(SP), R15 | |||
SBBQ (56)(CX), R15 | |||
MOVQ (64)(SP), AX | |||
SBBQ (64)(CX), AX | |||
MOVQ (72)(SP), DX | |||
SBBQ (72)(CX), DX | |||
MOVQ (80)(SP), DI | |||
SBBQ (80)(CX), DI | |||
MOVQ (88)(SP), SI | |||
SBBQ (88)(CX), SI | |||
MOVQ SI, (SP) | |||
// [R8-R15,AX,DX,DI,(SP)] <- (AH+AL)*(BH+BL) - AL*BL - AH*BH | |||
MOVQ (96)(CX), SI | |||
SUBQ SI, R8 | |||
MOVQ (104)(CX), SI | |||
SBBQ SI, R9 | |||
MOVQ (112)(CX), SI | |||
SBBQ SI, R10 | |||
MOVQ (120)(CX), SI | |||
SBBQ SI, R11 | |||
MOVQ (128)(CX), SI | |||
SBBQ SI, R12 | |||
MOVQ (136)(CX), SI | |||
SBBQ SI, R13 | |||
MOVQ (144)(CX), SI | |||
SBBQ SI, R14 | |||
MOVQ (152)(CX), SI | |||
SBBQ SI, R15 | |||
MOVQ (160)(CX), SI | |||
SBBQ SI, AX | |||
MOVQ (168)(CX), SI | |||
SBBQ SI, DX | |||
MOVQ (176)(CX), SI | |||
SBBQ SI, DI | |||
MOVQ (SP), SI | |||
SBBQ (184)(CX), SI | |||
// FINAL RESULT | |||
ADDQ (48)(CX), R8 | |||
MOVQ R8, (48)(CX) | |||
ADCQ (56)(CX), R9 | |||
MOVQ R9, (56)(CX) | |||
ADCQ (64)(CX), R10 | |||
MOVQ R10, (64)(CX) | |||
ADCQ (72)(CX), R11 | |||
MOVQ R11, (72)(CX) | |||
ADCQ (80)(CX), R12 | |||
MOVQ R12, (80)(CX) | |||
ADCQ (88)(CX), R13 | |||
MOVQ R13, (88)(CX) | |||
ADCQ (96)(CX), R14 | |||
MOVQ R14, (96)(CX) | |||
ADCQ (104)(CX), R15 | |||
MOVQ R15, (104)(CX) | |||
ADCQ (112)(CX), AX | |||
MOVQ AX, (112)(CX) | |||
ADCQ (120)(CX), DX | |||
MOVQ DX, (120)(CX) | |||
ADCQ (128)(CX), DI | |||
MOVQ DI, (128)(CX) | |||
ADCQ (136)(CX), SI | |||
MOVQ SI, (136)(CX) | |||
MOVQ (144)(CX), AX | |||
ADCQ $0, AX | |||
MOVQ AX, (144)(CX) | |||
MOVQ (152)(CX), AX | |||
ADCQ $0, AX | |||
MOVQ AX, (152)(CX) | |||
MOVQ (160)(CX), AX | |||
ADCQ $0, AX | |||
MOVQ AX, (160)(CX) | |||
MOVQ (168)(CX), AX | |||
ADCQ $0, AX | |||
MOVQ AX, (168)(CX) | |||
MOVQ (176)(CX), AX | |||
ADCQ $0, AX | |||
MOVQ AX, (176)(CX) | |||
MOVQ (184)(CX), AX | |||
ADCQ $0, AX | |||
MOVQ AX, (184)(CX) | |||
RET |
@@ -34,6 +34,10 @@ func (x *Fp751Element) toBigInt() *big.Int { | |||
return radix64ToBigInt(x[:]) | |||
} | |||
func (x *Fp751UnreducedProduct) toBigInt() *big.Int { | |||
return radix64ToBigInt(x[:]) | |||
} | |||
func (x Fp751Element) Generate(rand *rand.Rand, size int) reflect.Value { | |||
// Generation strategy: low limbs taken from [0,2^64); high limb | |||
// taken from smaller range | |||
@@ -149,6 +153,39 @@ func TestFp751SubVersusBigInt(t *testing.T) { | |||
} | |||
} | |||
func TestFp751MulVersusBigInt(t *testing.T) { | |||
// The CLN16-SIDH prime | |||
p := new(big.Int) | |||
p.UnmarshalText(([]byte)("10354717741769305252977768237866805321427389645549071170116189679054678940682478846502882896561066713624553211618840202385203911976522554393044160468771151816976706840078913334358399730952774926980235086850991501872665651576831")) | |||
// Returns true if computing x * y in this implementation matches | |||
// computing x * y using big.Int | |||
assertion := func(x, y Fp751Element) bool { | |||
z := new(Fp751UnreducedProduct) | |||
// Compute z = x * y using Fp751Mul | |||
Fp751Mul(z, &x, &y) | |||
xBig := x.toBigInt() | |||
yBig := y.toBigInt() | |||
zBig := z.toBigInt() | |||
// Compute z = x * y using big.Int | |||
tmp := new(big.Int) | |||
tmp.Mul(xBig, yBig) | |||
// Reduce both mod p and check that they are equal. | |||
zBig.Mod(zBig, p) | |||
tmp.Mod(tmp, p) | |||
return zBig.Cmp(tmp) == 0 | |||
} | |||
// Run 1M tests | |||
config := &quick.Config{MaxCount: (1 << 20)} | |||
if err := quick.Check(assertion, config); err != nil { | |||
t.Error(err) | |||
} | |||
} | |||
// Package-level storage for this field element is intended to deter | |||
// compiler optimizations. | |||
var benchmarkFp751Element Fp751Element | |||