@@ -698,7 +698,7 @@ TEXT ·fp503Mul(SB), NOSPLIT, $104-24 | |||
// Check wether to use optimized implementation | |||
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1 | |||
JE mul_with_mulx_adx | |||
JE mul_with_mulx_adcx_adox | |||
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1 | |||
JE mul_with_mulx | |||
@@ -1178,7 +1178,7 @@ TEXT ·fp503Mul(SB), NOSPLIT, $104-24 | |||
ADCQ $0, SI; MOVQ SI, (120)(CX) | |||
RET | |||
mul_with_mulx_adx: | |||
mul_with_mulx_adcx_adox: | |||
// Mul implementation for CPUs supporting two independent carry chain | |||
// (ADOX/ADCX) instructions and carry-less MULX multiplier | |||
MUL(CX, REG_P1, REG_P2, MULS256_MULXADX) | |||
@@ -1195,7 +1195,7 @@ TEXT ·fp503MontgomeryReduce(SB), $0-16 | |||
// Check wether to use optimized implementation | |||
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1 | |||
JE redc_with_mulx_adx | |||
JE redc_with_mulx_adcx_adox | |||
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1 | |||
JE redc_with_mulx | |||
@@ -1503,7 +1503,7 @@ TEXT ·fp503MontgomeryReduce(SB), $0-16 | |||
MOVQ R10, (56)(REG_P2) // Z7 | |||
RET | |||
redc_with_mulx_adx: | |||
redc_with_mulx_adcx_adox: | |||
// Implementation of the Montgomery reduction for CPUs | |||
// supporting two independent carry chain (ADOX/ADCX) | |||
// instructions and carry-less MULX multiplier | |||
@@ -13,9 +13,12 @@ import ( | |||
type OptimFlag uint | |||
const ( | |||
kUse_MUL OptimFlag = 1 << 0 | |||
kUse_MULX = 1 << 1 | |||
kUse_BMI2andADX = 1 << 2 | |||
// Indicates that optimisation which uses MUL instruction should be used | |||
kUse_MUL OptimFlag = 1 << 0 | |||
// Indicates that optimisation which uses MULX instruction should be used | |||
kUse_MULX = 1 << 1 | |||
// Indicates that optimisation which uses MULX, ADOX and ADCX instructions should be used | |||
kUse_MULXandADxX = 1 << 2 | |||
) | |||
// Utility function used for testing Mul implementations. Tests caller provided | |||
@@ -27,12 +30,12 @@ func testMul(t *testing.T, f1, f2 OptimFlag) { | |||
// Compute multiplier*multiplicant with first implementation | |||
cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX | |||
cpu.HasADXandBMI2 = (kUse_BMI2andADX & f1) == kUse_BMI2andADX | |||
cpu.HasADXandBMI2 = (kUse_MULXandADxX & f1) == kUse_MULXandADxX | |||
fp503Mul(&resMulOptim, &multiplier, &multiplicant) | |||
// Compute multiplier*multiplicant with second implementation | |||
cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX | |||
cpu.HasADXandBMI2 = (kUse_BMI2andADX & f2) == kUse_BMI2andADX | |||
cpu.HasADXandBMI2 = (kUse_MULXandADxX & f2) == kUse_MULXandADxX | |||
fp503Mul(&resMulRef, &multiplier, &multiplicant) | |||
// Compare results | |||
@@ -54,12 +57,12 @@ func testRedc(t *testing.T, f1, f2 OptimFlag) { | |||
// Compute redc with first implementation | |||
cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX | |||
cpu.HasADXandBMI2 = (kUse_BMI2andADX & f1) == kUse_BMI2andADX | |||
cpu.HasADXandBMI2 = (kUse_MULXandADxX & f1) == kUse_MULXandADxX | |||
fp503MontgomeryReduce(&resRedcF1, &aRR) | |||
// Compute redc with second implementation | |||
cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX | |||
cpu.HasADXandBMI2 = (kUse_BMI2andADX & f2) == kUse_BMI2andADX | |||
cpu.HasADXandBMI2 = (kUse_MULXandADxX & f2) == kUse_MULXandADxX | |||
fp503MontgomeryReduce(&resRedcF2, &aRRcpy) | |||
// Compare results | |||
@@ -71,7 +74,7 @@ func testRedc(t *testing.T, f1, f2 OptimFlag) { | |||
} | |||
} | |||
// Ensures corretness of implementation of mul operation which uses MULX | |||
// Ensures correctness of implementation of mul operation which uses MULX | |||
func TestMulWithMULX(t *testing.T) { | |||
defer cpu.RecognizeCpu() | |||
if !cpu.HasBMI2 { | |||
@@ -80,25 +83,25 @@ func TestMulWithMULX(t *testing.T) { | |||
testMul(t, kUse_MULX, kUse_MUL) | |||
} | |||
// Ensures corretness of implementation of mul operation which uses MULX and ADOX/ADCX | |||
func TestMulWithMULXADX(t *testing.T) { | |||
// Ensures correctness of implementation of mul operation which uses MULX and ADOX/ADCX | |||
func TestMulWithMULXADxX(t *testing.T) { | |||
defer cpu.RecognizeCpu() | |||
if !cpu.HasADXandBMI2 { | |||
t.Skip("MULX, ADCX and ADOX not supported by the platform") | |||
} | |||
testMul(t, kUse_BMI2andADX, kUse_MUL) | |||
testMul(t, kUse_MULXandADxX, kUse_MUL) | |||
} | |||
// Ensures corretness of implementation of mul operation which uses MULX and ADOX/ADCX | |||
func TestMulWithMULXADXAgainstMULX(t *testing.T) { | |||
// Ensures correctness of implementation of mul operation which uses MULX and ADOX/ADCX | |||
func TestMulWithMULXADxXAgainstMULX(t *testing.T) { | |||
defer cpu.RecognizeCpu() | |||
if !cpu.HasADXandBMI2 { | |||
t.Skip("MULX, ADCX and ADOX not supported by the platform") | |||
} | |||
testMul(t, kUse_MULX, kUse_BMI2andADX) | |||
testMul(t, kUse_MULX, kUse_MULXandADxX) | |||
} | |||
// Ensures corretness of Montgomery reduction implementation which uses MULX | |||
// Ensures correctness of Montgomery reduction implementation which uses MULX | |||
func TestRedcWithMULX(t *testing.T) { | |||
defer cpu.RecognizeCpu() | |||
if !cpu.HasBMI2 { | |||
@@ -107,22 +110,22 @@ func TestRedcWithMULX(t *testing.T) { | |||
testRedc(t, kUse_MULX, kUse_MUL) | |||
} | |||
// Ensures corretness of Montgomery reduction implementation which uses MULX | |||
// and ADX | |||
func TestRedcWithMULXADX(t *testing.T) { | |||
// Ensures correctness of Montgomery reduction implementation which uses MULX | |||
// and ADCX/ADOX. | |||
func TestRedcWithMULXADxX(t *testing.T) { | |||
defer cpu.RecognizeCpu() | |||
if !cpu.HasADXandBMI2 { | |||
t.Skip("MULX, ADCX and ADOX not supported by the platform") | |||
} | |||
testRedc(t, kUse_BMI2andADX, kUse_MUL) | |||
testRedc(t, kUse_MULXandADxX, kUse_MUL) | |||
} | |||
// Ensures corretness of Montgomery reduction implementation which uses MULX | |||
// and ADX. | |||
func TestRedcWithMULXADXAgainstMULX(t *testing.T) { | |||
// Ensures correctness of Montgomery reduction implementation which uses MULX | |||
// and ADCX/ADOX. | |||
func TestRedcWithMULXADxXAgainstMULX(t *testing.T) { | |||
defer cpu.RecognizeCpu() | |||
if !cpu.HasADXandBMI2 { | |||
t.Skip("MULX, ADCX and ADOX not supported by the platform") | |||
} | |||
testRedc(t, kUse_BMI2andADX, kUse_MULX) | |||
testRedc(t, kUse_MULXandADxX, kUse_MULX) | |||
} |
@@ -1741,7 +1741,7 @@ TEXT ·fp751MontgomeryReduce(SB), $0-16 | |||
// Check wether to use optimized implementation | |||
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasADXandBMI2(SB), $1 | |||
JE redc_with_mulx_adx | |||
JE redc_with_mulx_adcx_adox | |||
CMPB github·com∕cloudflare∕sidh∕internal∕utils·HasBMI2(SB), $1 | |||
JE redc_with_mulx | |||
@@ -2347,7 +2347,7 @@ TEXT ·fp751MontgomeryReduce(SB), $0-16 | |||
MOVQ R10, (88)(REG_P2) // Z11 | |||
RET | |||
redc_with_mulx_adx: | |||
redc_with_mulx_adcx_adox: | |||
// This implements the Montgomery reduction algorithm described in | |||
// section 5.2.3 of https://eprint.iacr.org/2017/1015.pdf. | |||
// This assumes that the BMI2 and ADX instruction set extensions are available. | |||
@@ -14,9 +14,12 @@ import ( | |||
type OptimFlag uint | |||
const ( | |||
kUse_MUL OptimFlag = 1 << 0 | |||
kUse_MULX = 1 << 1 | |||
kUse_ADXandBMI2 = 1 << 2 | |||
// Indicates that optimisation which uses MUL instruction should be used | |||
kUse_MUL OptimFlag = 1 << 0 | |||
// Indicates that optimisation which uses MULX instruction should be used | |||
kUse_MULX = 1 << 1 | |||
// Indicates that optimisation which uses MULX, ADOX and ADCX instructions should be used | |||
kUse_MULXandADxX = 1 << 2 | |||
) | |||
// Utility function used for testing REDC implementations. Tests caller provided | |||
@@ -29,12 +32,12 @@ func testRedc(t *testing.T, f1, f2 OptimFlag) { | |||
// Compute redc with first implementation | |||
cpu.HasBMI2 = (kUse_MULX & f1) == kUse_MULX | |||
cpu.HasADXandBMI2 = (kUse_ADXandBMI2 & f1) == kUse_ADXandBMI2 | |||
cpu.HasADXandBMI2 = (kUse_MULXandADxX & f1) == kUse_MULXandADxX | |||
fp751MontgomeryReduce(&resRedcF1, &aRR) | |||
// Compute redc with second implementation | |||
cpu.HasBMI2 = (kUse_MULX & f2) == kUse_MULX | |||
cpu.HasADXandBMI2 = (kUse_ADXandBMI2 & f2) == kUse_ADXandBMI2 | |||
cpu.HasADXandBMI2 = (kUse_MULXandADxX & f2) == kUse_MULXandADxX | |||
fp751MontgomeryReduce(&resRedcF2, &aRRcpy) | |||
// Compare results | |||
@@ -46,7 +49,7 @@ func testRedc(t *testing.T, f1, f2 OptimFlag) { | |||
} | |||
} | |||
// Ensures corretness of Montgomery reduction implementation which uses MULX | |||
// Ensures correctness of Montgomery reduction implementation which uses MULX | |||
func TestRedcWithMULX(t *testing.T) { | |||
defer cpu.RecognizeCpu() | |||
if !cpu.HasBMI2 { | |||
@@ -55,22 +58,22 @@ func TestRedcWithMULX(t *testing.T) { | |||
testRedc(t, kUse_MULX, kUse_MUL) | |||
} | |||
// Ensures corretness of Montgomery reduction implementation which uses MULX | |||
// and ADX | |||
func TestRedcWithMULXADX(t *testing.T) { | |||
// Ensures correctness of Montgomery reduction implementation which uses MULX | |||
// and ADCX/ADOX. | |||
func TestRedcWithMULXADxX(t *testing.T) { | |||
defer cpu.RecognizeCpu() | |||
if !cpu.HasADXandBMI2 { | |||
t.Skip("MULX, ADCX and ADOX not supported by the platform") | |||
} | |||
testRedc(t, kUse_ADXandBMI2, kUse_MUL) | |||
testRedc(t, kUse_MULXandADxX, kUse_MUL) | |||
} | |||
// Ensures corretness of Montgomery reduction implementation which uses MULX | |||
// and ADX. | |||
func TestRedcWithMULXADXAgainstMULX(t *testing.T) { | |||
// Ensures correctness of Montgomery reduction implementation which uses MULX | |||
// and ADCX/ADOX. | |||
func TestRedcWithMULXADxXAgainstMULX(t *testing.T) { | |||
defer cpu.RecognizeCpu() | |||
if !cpu.HasADXandBMI2 { | |||
t.Skip("MULX, ADCX and ADOX not supported by the platform") | |||
} | |||
testRedc(t, kUse_ADXandBMI2, kUse_MULX) | |||
testRedc(t, kUse_MULXandADxX, kUse_MULX) | |||
} |