mirror of
https://github.com/henrydcase/nobs.git
synced 2024-11-26 00:51:22 +00:00
makes AddReduced503 25% faster
This commit is contained in:
parent
ea2ffa2d61
commit
e621ca22b3
@ -139,78 +139,33 @@ TEXT ·fp503AddReduced(SB),NOSPLIT,$0-24
|
||||
MOVQ x+8(FP), REG_P1
|
||||
MOVQ y+16(FP), REG_P2
|
||||
|
||||
// Used later to calculate a mask
|
||||
XORQ CX, CX
|
||||
|
||||
// [R8-R15]: z = x + y
|
||||
MOVQ ( 0)(REG_P1), R8
|
||||
MOVQ ( 8)(REG_P1), R9
|
||||
MOVQ (16)(REG_P1), R10
|
||||
MOVQ (24)(REG_P1), R11
|
||||
MOVQ (32)(REG_P1), R12
|
||||
MOVQ (40)(REG_P1), R13
|
||||
MOVQ (48)(REG_P1), R14
|
||||
MOVQ (56)(REG_P1), R15
|
||||
ADDQ ( 0)(REG_P2), R8
|
||||
ADCQ ( 8)(REG_P2), R9
|
||||
ADCQ (16)(REG_P2), R10
|
||||
ADCQ (24)(REG_P2), R11
|
||||
ADCQ (32)(REG_P2), R12
|
||||
ADCQ (40)(REG_P2), R13
|
||||
ADCQ (48)(REG_P2), R14
|
||||
ADCQ (56)(REG_P2), R15
|
||||
MOVQ ( 0)(REG_P1), R8; ADDQ ( 0)(REG_P2), R8; MOVQ R8, ( 0)(REG_P3)
|
||||
MOVQ ( 8)(REG_P1), R9; ADCQ ( 8)(REG_P2), R9; MOVQ R9, ( 8)(REG_P3)
|
||||
MOVQ (16)(REG_P1), R10; ADCQ (16)(REG_P2), R10; MOVQ R10, (16)(REG_P3)
|
||||
MOVQ (24)(REG_P1), R11; ADCQ (24)(REG_P2), R11; MOVQ R11, (24)(REG_P3)
|
||||
MOVQ (32)(REG_P1), R12; ADCQ (32)(REG_P2), R12; MOVQ R12, (32)(REG_P3)
|
||||
MOVQ (40)(REG_P1), R13; ADCQ (40)(REG_P2), R13; MOVQ R13, (40)(REG_P3)
|
||||
MOVQ (48)(REG_P1), R14; ADCQ (48)(REG_P2), R14; MOVQ R14, (48)(REG_P3)
|
||||
MOVQ (56)(REG_P1), R15; ADCQ (56)(REG_P2), R15; MOVQ R15, (56)(REG_P3)
|
||||
|
||||
MOVQ P503X2_0, AX
|
||||
SUBQ AX, R8
|
||||
MOVQ P503X2_1, AX
|
||||
SBBQ AX, R9
|
||||
SBBQ AX, R10
|
||||
MOVQ P503X2_3, AX
|
||||
SBBQ AX, R11
|
||||
MOVQ P503X2_4, AX
|
||||
SBBQ AX, R12
|
||||
MOVQ P503X2_5, AX
|
||||
SBBQ AX, R13
|
||||
MOVQ P503X2_6, AX
|
||||
SBBQ AX, R14
|
||||
MOVQ P503X2_7, AX
|
||||
SBBQ AX, R15
|
||||
MOVQ ·p503x2+ 0(SB), AX; SUBQ AX, R8
|
||||
MOVQ ·p503x2+ 8(SB), AX; SBBQ AX, R9
|
||||
MOVQ ·p503x2+16(SB), AX; SBBQ AX, R10
|
||||
MOVQ ·p503x2+24(SB), AX; SBBQ AX, R11
|
||||
MOVQ ·p503x2+32(SB), AX; SBBQ AX, R12
|
||||
MOVQ ·p503x2+40(SB), AX; SBBQ AX, R13
|
||||
MOVQ ·p503x2+48(SB), AX; SBBQ AX, R14
|
||||
MOVQ ·p503x2+56(SB), AX; SBBQ AX, R15
|
||||
|
||||
SBBQ $0, CX // mask
|
||||
|
||||
// move z to REG_P3
|
||||
MOVQ R8, ( 0)(REG_P3)
|
||||
MOVQ R9, ( 8)(REG_P3)
|
||||
MOVQ R10, (16)(REG_P3)
|
||||
MOVQ R11, (24)(REG_P3)
|
||||
MOVQ R12, (32)(REG_P3)
|
||||
MOVQ R13, (40)(REG_P3)
|
||||
MOVQ R14, (48)(REG_P3)
|
||||
MOVQ R15, (56)(REG_P3)
|
||||
|
||||
// if z<0 add p503x2 back
|
||||
MOVQ P503X2_0, R8
|
||||
MOVQ P503X2_1, R9
|
||||
MOVQ P503X2_3, R10
|
||||
MOVQ P503X2_4, R11
|
||||
MOVQ P503X2_5, R12
|
||||
MOVQ P503X2_6, R13
|
||||
MOVQ P503X2_7, R14
|
||||
ANDQ CX, R8
|
||||
ANDQ CX, R9
|
||||
ANDQ CX, R10
|
||||
ANDQ CX, R11
|
||||
ANDQ CX, R12
|
||||
ANDQ CX, R13
|
||||
ANDQ CX, R14
|
||||
ADDQ R8, ( 0)(REG_P3)
|
||||
ADCQ R9, ( 8)(REG_P3)
|
||||
ADCQ R9, (16)(REG_P3)
|
||||
ADCQ R10,(24)(REG_P3)
|
||||
ADCQ R11,(32)(REG_P3)
|
||||
ADCQ R12,(40)(REG_P3)
|
||||
ADCQ R13,(48)(REG_P3)
|
||||
ADCQ R14,(56)(REG_P3)
|
||||
MOVQ ( 0)(REG_P3), AX; CMOVQCC R8, AX; MOVQ AX, ( 0)(REG_P3)
|
||||
MOVQ ( 8)(REG_P3), AX; CMOVQCC R9, AX; MOVQ AX, ( 8)(REG_P3)
|
||||
MOVQ (16)(REG_P3), AX; CMOVQCC R10, AX; MOVQ AX, (16)(REG_P3)
|
||||
MOVQ (24)(REG_P3), AX; CMOVQCC R11, AX; MOVQ AX, (24)(REG_P3)
|
||||
MOVQ (32)(REG_P3), AX; CMOVQCC R12, AX; MOVQ AX, (32)(REG_P3)
|
||||
MOVQ (40)(REG_P3), AX; CMOVQCC R13, AX; MOVQ AX, (40)(REG_P3)
|
||||
MOVQ (48)(REG_P3), AX; CMOVQCC R14, AX; MOVQ AX, (48)(REG_P3)
|
||||
MOVQ (56)(REG_P3), AX; CMOVQCC R15, AX; MOVQ AX, (56)(REG_P3)
|
||||
|
||||
RET
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user