diff --git a/p503/arith_amd64.s b/p503/arith_amd64.s index 302db3a..f3add32 100644 --- a/p503/arith_amd64.s +++ b/p503/arith_amd64.s @@ -698,7 +698,7 @@ TEXT ·fp503Mul(SB), NOSPLIT, $104-24 // Check wether to use optimized implementation CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1 - JE mul_with_mulx_adx + JE mul_with_mulx_adcx_adox CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1 JE mul_with_mulx @@ -1178,7 +1178,7 @@ TEXT ·fp503Mul(SB), NOSPLIT, $104-24 ADCQ $0, SI; MOVQ SI, (120)(CX) RET -mul_with_mulx_adx: +mul_with_mulx_adcx_adox: // Mul implementation for CPUs supporting two independent carry chain // (ADOX/ADCX) instructions and carry-less MULX multiplier MUL(CX, REG_P1, REG_P2, MULS256_MULXADX) @@ -1195,7 +1195,7 @@ TEXT ·fp503MontgomeryReduce(SB), $0-16 // Check wether to use optimized implementation CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1 - JE redc_with_mulx_adx + JE redc_with_mulx_adcx_adox CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1 JE redc_with_mulx @@ -1503,7 +1503,7 @@ TEXT ·fp503MontgomeryReduce(SB), $0-16 MOVQ R10, (56)(REG_P2) // Z7 RET -redc_with_mulx_adx: +redc_with_mulx_adcx_adox: // Implementation of the Montgomery reduction for CPUs // supporting two independent carry chain (ADOX/ADCX) // instructions and carry-less MULX multiplier diff --git a/p503/arith_amd64_test.go b/p503/arith_amd64_test.go index 8ad6bb9..4627bd8 100644 --- a/p503/arith_amd64_test.go +++ b/p503/arith_amd64_test.go @@ -13,9 +13,12 @@ import ( type OptimFlag uint const ( - kUse_MUL OptimFlag = 1 << 0 - kUse_MULX = 1 << 1 - kUse_BMI2andADX = 1 << 2 + // Indicates that optimisation which uses MUL instruction should be used + kUse_MUL OptimFlag = 1 << 0 + // Indicates that optimisation which uses MULX instruction should be used + kUse_MULX = 1 << 1 + // Indicates that optimisation which uses MULX, ADOX and ADCX instructions should be used + kUse_MULXandADxX = 1 << 2 ) // Utility function used for testing Mul implementations. Tests caller provided @@ -27,12 +30,12 @@ func testMul(t *testing.T, f1, f2 OptimFlag) { // Compute multiplier*multiplicant with first implementation cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX - cpu.HasADXandBMI2 = (kUse_BMI2andADX & f1) == kUse_BMI2andADX + cpu.HasADXandBMI2 = (kUse_MULXandADxX & f1) == kUse_MULXandADxX fp503Mul(&resMulOptim, &multiplier, &multiplicant) // Compute multiplier*multiplicant with second implementation cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX - cpu.HasADXandBMI2 = (kUse_BMI2andADX & f2) == kUse_BMI2andADX + cpu.HasADXandBMI2 = (kUse_MULXandADxX & f2) == kUse_MULXandADxX fp503Mul(&resMulRef, &multiplier, &multiplicant) // Compare results @@ -54,12 +57,12 @@ func testRedc(t *testing.T, f1, f2 OptimFlag) { // Compute redc with first implementation cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX - cpu.HasADXandBMI2 = (kUse_BMI2andADX & f1) == kUse_BMI2andADX + cpu.HasADXandBMI2 = (kUse_MULXandADxX & f1) == kUse_MULXandADxX fp503MontgomeryReduce(&resRedcF1, &aRR) // Compute redc with second implementation cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX - cpu.HasADXandBMI2 = (kUse_BMI2andADX & f2) == kUse_BMI2andADX + cpu.HasADXandBMI2 = (kUse_MULXandADxX & f2) == kUse_MULXandADxX fp503MontgomeryReduce(&resRedcF2, &aRRcpy) // Compare results @@ -71,7 +74,7 @@ func testRedc(t *testing.T, f1, f2 OptimFlag) { } } -// Ensures corretness of implementation of mul operation which uses MULX +// Ensures correctness of implementation of mul operation which uses MULX func TestMulWithMULX(t *testing.T) { defer cpu.RecognizeCpu() if !cpu.HasBMI2 { @@ -80,25 +83,25 @@ func TestMulWithMULX(t *testing.T) { testMul(t, kUse_MULX, kUse_MUL) } -// Ensures corretness of implementation of mul operation which uses MULX and ADOX/ADCX -func TestMulWithMULXADX(t *testing.T) { +// Ensures correctness of implementation of mul operation which uses MULX and ADOX/ADCX +func TestMulWithMULXADxX(t *testing.T) { defer cpu.RecognizeCpu() if !cpu.HasADXandBMI2 { t.Skip("MULX, ADCX and ADOX not supported by the platform") } - testMul(t, kUse_BMI2andADX, kUse_MUL) + testMul(t, kUse_MULXandADxX, kUse_MUL) } -// Ensures corretness of implementation of mul operation which uses MULX and ADOX/ADCX -func TestMulWithMULXADXAgainstMULX(t *testing.T) { +// Ensures correctness of implementation of mul operation which uses MULX and ADOX/ADCX +func TestMulWithMULXADxXAgainstMULX(t *testing.T) { defer cpu.RecognizeCpu() if !cpu.HasADXandBMI2 { t.Skip("MULX, ADCX and ADOX not supported by the platform") } - testMul(t, kUse_MULX, kUse_BMI2andADX) + testMul(t, kUse_MULX, kUse_MULXandADxX) } -// Ensures corretness of Montgomery reduction implementation which uses MULX +// Ensures correctness of Montgomery reduction implementation which uses MULX func TestRedcWithMULX(t *testing.T) { defer cpu.RecognizeCpu() if !cpu.HasBMI2 { @@ -107,22 +110,22 @@ func TestRedcWithMULX(t *testing.T) { testRedc(t, kUse_MULX, kUse_MUL) } -// Ensures corretness of Montgomery reduction implementation which uses MULX -// and ADX -func TestRedcWithMULXADX(t *testing.T) { +// Ensures correctness of Montgomery reduction implementation which uses MULX +// and ADCX/ADOX. +func TestRedcWithMULXADxX(t *testing.T) { defer cpu.RecognizeCpu() if !cpu.HasADXandBMI2 { t.Skip("MULX, ADCX and ADOX not supported by the platform") } - testRedc(t, kUse_BMI2andADX, kUse_MUL) + testRedc(t, kUse_MULXandADxX, kUse_MUL) } -// Ensures corretness of Montgomery reduction implementation which uses MULX -// and ADX. -func TestRedcWithMULXADXAgainstMULX(t *testing.T) { +// Ensures correctness of Montgomery reduction implementation which uses MULX +// and ADCX/ADOX. +func TestRedcWithMULXADxXAgainstMULX(t *testing.T) { defer cpu.RecognizeCpu() if !cpu.HasADXandBMI2 { t.Skip("MULX, ADCX and ADOX not supported by the platform") } - testRedc(t, kUse_BMI2andADX, kUse_MULX) + testRedc(t, kUse_MULXandADxX, kUse_MULX) } diff --git a/p751/arith_amd64.s b/p751/arith_amd64.s index 0a01962..260e23c 100644 --- a/p751/arith_amd64.s +++ b/p751/arith_amd64.s @@ -1741,7 +1741,7 @@ TEXT ·fp751MontgomeryReduce(SB), $0-16 // Check wether to use optimized implementation CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1 - JE redc_with_mulx_adx + JE redc_with_mulx_adcx_adox CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1 JE redc_with_mulx @@ -2347,7 +2347,7 @@ TEXT ·fp751MontgomeryReduce(SB), $0-16 MOVQ R10, (88)(REG_P2) // Z11 RET -redc_with_mulx_adx: +redc_with_mulx_adcx_adox: // This implements the Montgomery reduction algorithm described in // section 5.2.3 of https://eprint.iacr.org/2017/1015.pdf. // This assumes that the BMI2 and ADX instruction set extensions are available. diff --git a/p751/arith_amd64_test.go b/p751/arith_amd64_test.go index 3dc5a67..76458d9 100644 --- a/p751/arith_amd64_test.go +++ b/p751/arith_amd64_test.go @@ -14,9 +14,12 @@ import ( type OptimFlag uint const ( - kUse_MUL OptimFlag = 1 << 0 - kUse_MULX = 1 << 1 - kUse_ADXandBMI2 = 1 << 2 + // Indicates that optimisation which uses MUL instruction should be used + kUse_MUL OptimFlag = 1 << 0 + // Indicates that optimisation which uses MULX instruction should be used + kUse_MULX = 1 << 1 + // Indicates that optimisation which uses MULX, ADOX and ADCX instructions should be used + kUse_MULXandADxX = 1 << 2 ) // Utility function used for testing REDC implementations. Tests caller provided @@ -29,12 +32,12 @@ func testRedc(t *testing.T, f1, f2 OptimFlag) { // Compute redc with first implementation cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX - cpu.HasADXandBMI2 = (kUse_ADXandBMI2 & f1) == kUse_ADXandBMI2 + cpu.HasADXandBMI2 = (kUse_MULXandADxX & f1) == kUse_MULXandADxX fp751MontgomeryReduce(&resRedcF1, &aRR) // Compute redc with second implementation cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX - cpu.HasADXandBMI2 = (kUse_ADXandBMI2 & f2) == kUse_ADXandBMI2 + cpu.HasADXandBMI2 = (kUse_MULXandADxX & f2) == kUse_MULXandADxX fp751MontgomeryReduce(&resRedcF2, &aRRcpy) // Compare results @@ -46,7 +49,7 @@ func testRedc(t *testing.T, f1, f2 OptimFlag) { } } -// Ensures corretness of Montgomery reduction implementation which uses MULX +// Ensures correctness of Montgomery reduction implementation which uses MULX func TestRedcWithMULX(t *testing.T) { defer cpu.RecognizeCpu() if !cpu.HasBMI2 { @@ -55,22 +58,22 @@ func TestRedcWithMULX(t *testing.T) { testRedc(t, kUse_MULX, kUse_MUL) } -// Ensures corretness of Montgomery reduction implementation which uses MULX -// and ADX -func TestRedcWithMULXADX(t *testing.T) { +// Ensures correctness of Montgomery reduction implementation which uses MULX +// and ADCX/ADOX. +func TestRedcWithMULXADxX(t *testing.T) { defer cpu.RecognizeCpu() if !cpu.HasADXandBMI2 { t.Skip("MULX, ADCX and ADOX not supported by the platform") } - testRedc(t, kUse_ADXandBMI2, kUse_MUL) + testRedc(t, kUse_MULXandADxX, kUse_MUL) } -// Ensures corretness of Montgomery reduction implementation which uses MULX -// and ADX. -func TestRedcWithMULXADXAgainstMULX(t *testing.T) { +// Ensures correctness of Montgomery reduction implementation which uses MULX +// and ADCX/ADOX. +func TestRedcWithMULXADxXAgainstMULX(t *testing.T) { defer cpu.RecognizeCpu() if !cpu.HasADXandBMI2 { t.Skip("MULX, ADCX and ADOX not supported by the platform") } - testRedc(t, kUse_ADXandBMI2, kUse_MULX) + testRedc(t, kUse_MULXandADxX, kUse_MULX) }