// Copyright 2017 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // +build arm64, !noasm #include "textflag.h" DATA rotInvSRows<>+0x00(SB)/8, $0x080f0205040b0e01 DATA rotInvSRows<>+0x08(SB)/8, $0x00070a0d0c030609 GLOBL rotInvSRows<>(SB), (NOPTR+RODATA), $16 DATA invSRows<>+0x00(SB)/8, $0x0b0e0104070a0d00 DATA invSRows<>+0x08(SB)/8, $0x0306090c0f020508 GLOBL invSRows<>(SB), (NOPTR+RODATA), $16 // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte) TEXT ·encryptBlockAsm(SB),NOSPLIT,$0 MOVD nr+0(FP), R9 MOVD xk+8(FP), R10 MOVD dst+16(FP), R11 MOVD src+24(FP), R12 VLD1 (R12), [V0.B16] CMP $12, R9 BLT enc128 BEQ enc196 enc256: VLD1.P 32(R10), [V1.B16, V2.B16] AESE V1.B16, V0.B16 AESMC V0.B16, V0.B16 AESE V2.B16, V0.B16 AESMC V0.B16, V0.B16 enc196: VLD1.P 32(R10), [V3.B16, V4.B16] AESE V3.B16, V0.B16 AESMC V0.B16, V0.B16 AESE V4.B16, V0.B16 AESMC V0.B16, V0.B16 enc128: VLD1.P 64(R10), [V5.B16, V6.B16, V7.B16, V8.B16] VLD1.P 64(R10), [V9.B16, V10.B16, V11.B16, V12.B16] VLD1.P 48(R10), [V13.B16, V14.B16, V15.B16] AESE V5.B16, V0.B16 AESMC V0.B16, V0.B16 AESE V6.B16, V0.B16 AESMC V0.B16, V0.B16 AESE V7.B16, V0.B16 AESMC V0.B16, V0.B16 AESE V8.B16, V0.B16 AESMC V0.B16, V0.B16 AESE V9.B16, V0.B16 AESMC V0.B16, V0.B16 AESE V10.B16, V0.B16 AESMC V0.B16, V0.B16 AESE V11.B16, V0.B16 AESMC V0.B16, V0.B16 AESE V12.B16, V0.B16 AESMC V0.B16, V0.B16 AESE V13.B16, V0.B16 AESMC V0.B16, V0.B16 AESE V14.B16, V0.B16 VEOR V0.B16, V15.B16, V0.B16 VST1 [V0.B16], (R11) RET // func decryptBlockAsm(nr int, xk *uint32, dst, src *byte) TEXT ·decryptBlockAsm(SB),NOSPLIT,$0 MOVD nr+0(FP), R9 MOVD xk+8(FP), R10 MOVD dst+16(FP), R11 MOVD src+24(FP), R12 VLD1 (R12), [V0.B16] CMP $12, R9 BLT dec128 BEQ dec196 dec256: VLD1.P 32(R10), [V1.B16, V2.B16] AESD V1.B16, V0.B16 AESIMC V0.B16, V0.B16 AESD V2.B16, V0.B16 AESIMC V0.B16, V0.B16 dec196: VLD1.P 32(R10), [V3.B16, V4.B16] AESD V3.B16, V0.B16 AESIMC V0.B16, V0.B16 AESD V4.B16, V0.B16 AESIMC V0.B16, V0.B16 dec128: VLD1.P 64(R10), [V5.B16, V6.B16, V7.B16, V8.B16] VLD1.P 64(R10), [V9.B16, V10.B16, V11.B16, V12.B16] VLD1.P 48(R10), [V13.B16, V14.B16, V15.B16] AESD V5.B16, V0.B16 AESIMC V0.B16, V0.B16 AESD V6.B16, V0.B16 AESIMC V0.B16, V0.B16 AESD V7.B16, V0.B16 AESIMC V0.B16, V0.B16 AESD V8.B16, V0.B16 AESIMC V0.B16, V0.B16 AESD V9.B16, V0.B16 AESIMC V0.B16, V0.B16 AESD V10.B16, V0.B16 AESIMC V0.B16, V0.B16 AESD V11.B16, V0.B16 AESIMC V0.B16, V0.B16 AESD V12.B16, V0.B16 AESIMC V0.B16, V0.B16 AESD V13.B16, V0.B16 AESIMC V0.B16, V0.B16 AESD V14.B16, V0.B16 VEOR V0.B16, V15.B16, V0.B16 VST1 [V0.B16], (R11) RET // func expandKeyAsm(nr int, key *byte, enc, dec *uint32) { // Note that round keys are stored in uint128 format, not uint32 TEXT ·expandKeyAsm(SB),NOSPLIT,$0 MOVD nr+0(FP), R8 MOVD key+8(FP), R9 MOVD enc+16(FP), R10 MOVD dec+24(FP), R11 LDP rotInvSRows<>(SB), (R0, R1) VMOV R0, V3.D[0] VMOV R1, V3.D[1] VEOR V0.B16, V0.B16, V0.B16 // All zeroes MOVW $1, R13 TBZ $1, R8, ks192 TBNZ $2, R8, ks256 LDPW (R9), (R4, R5) LDPW 8(R9), (R6, R7) STPW.P (R4, R5), 8(R10) STPW.P (R6, R7), 8(R10) MOVW $0x1b, R14 ks128Loop: VMOV R7, V2.S[0] WORD $0x4E030042 // TBL V3.B16, [V2.B16], V2.B16 AESE V0.B16, V2.B16 // Use AES to compute the SBOX EORW R13, R4 LSLW $1, R13 // Compute next Rcon ANDSW $0x100, R13, ZR CSELW NE, R14, R13, R13 // Fake modulo SUBS $1, R8 VMOV V2.S[0], R0 EORW R0, R4 EORW R4, R5 EORW R5, R6 EORW R6, R7 STPW.P (R4, R5), 8(R10) STPW.P (R6, R7), 8(R10) BNE ks128Loop CBZ R11, ksDone // If dec is nil we are done SUB $176, R10 // Decryption keys are encryption keys with InverseMixColumns applied VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] VMOV V0.B16, V7.B16 AESIMC V1.B16, V6.B16 AESIMC V2.B16, V5.B16 AESIMC V3.B16, V4.B16 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] AESIMC V0.B16, V11.B16 AESIMC V1.B16, V10.B16 AESIMC V2.B16, V9.B16 AESIMC V3.B16, V8.B16 VLD1 (R10), [V0.B16, V1.B16, V2.B16] AESIMC V0.B16, V14.B16 AESIMC V1.B16, V13.B16 VMOV V2.B16, V12.B16 VST1.P [V12.B16, V13.B16, V14.B16], 48(R11) VST1.P [V8.B16, V9.B16, V10.B16, V11.B16], 64(R11) VST1 [V4.B16, V5.B16, V6.B16, V7.B16], (R11) B ksDone ks192: LDPW (R9), (R2, R3) LDPW 8(R9), (R4, R5) LDPW 16(R9), (R6, R7) STPW.P (R2, R3), 8(R10) STPW.P (R4, R5), 8(R10) SUB $4, R8 ks192Loop: STPW.P (R6, R7), 8(R10) VMOV R7, V2.S[0] WORD $0x4E030042 //TBL V3.B16, [V2.B16], V2.B16 AESE V0.B16, V2.B16 EORW R13, R2 LSLW $1, R13 SUBS $1, R8 VMOV V2.S[0], R0 EORW R0, R2 EORW R2, R3 EORW R3, R4 EORW R4, R5 EORW R5, R6 EORW R6, R7 STPW.P (R2, R3), 8(R10) STPW.P (R4, R5), 8(R10) BNE ks192Loop CBZ R11, ksDone SUB $208, R10 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] VMOV V0.B16, V7.B16 AESIMC V1.B16, V6.B16 AESIMC V2.B16, V5.B16 AESIMC V3.B16, V4.B16 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] AESIMC V0.B16, V11.B16 AESIMC V1.B16, V10.B16 AESIMC V2.B16, V9.B16 AESIMC V3.B16, V8.B16 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] AESIMC V0.B16, V15.B16 AESIMC V1.B16, V14.B16 AESIMC V2.B16, V13.B16 AESIMC V3.B16, V12.B16 VLD1 (R10), [V0.B16] VST1.P [V0.B16], 16(R11) VST1.P [V12.B16, V13.B16, V14.B16, V15.B16], 64(R11) VST1.P [V8.B16, V9.B16, V10.B16, V11.B16], 64(R11) VST1 [V4.B16, V5.B16, V6.B16, V7.B16], (R11) B ksDone ks256: LDP invSRows<>(SB), (R0, R1) VMOV R0, V4.D[0] VMOV R1, V4.D[1] LDPW (R9), (R0, R1) LDPW 8(R9), (R2, R3) LDPW 16(R9), (R4, R5) LDPW 24(R9), (R6, R7) STPW.P (R0, R1), 8(R10) STPW.P (R2, R3), 8(R10) SUB $7, R8 ks256Loop: STPW.P (R4, R5), 8(R10) STPW.P (R6, R7), 8(R10) VMOV R7, V2.S[0] WORD $0x4E030042 //TBL V3.B16, [V2.B16], V2.B16 AESE V0.B16, V2.B16 EORW R13, R0 LSLW $1, R13 SUBS $1, R8 VMOV V2.S[0], R9 EORW R9, R0 EORW R0, R1 EORW R1, R2 EORW R2, R3 VMOV R3, V2.S[0] WORD $0x4E040042 //TBL V3.B16, [V2.B16], V2.B16 AESE V0.B16, V2.B16 VMOV V2.S[0], R9 EORW R9, R4 EORW R4, R5 EORW R5, R6 EORW R6, R7 STPW.P (R0, R1), 8(R10) STPW.P (R2, R3), 8(R10) BNE ks256Loop CBZ R11, ksDone SUB $240, R10 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] VMOV V0.B16, V7.B16 AESIMC V1.B16, V6.B16 AESIMC V2.B16, V5.B16 AESIMC V3.B16, V4.B16 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] AESIMC V0.B16, V11.B16 AESIMC V1.B16, V10.B16 AESIMC V2.B16, V9.B16 AESIMC V3.B16, V8.B16 VLD1.P 64(R10), [V0.B16, V1.B16, V2.B16, V3.B16] AESIMC V0.B16, V15.B16 AESIMC V1.B16, V14.B16 AESIMC V2.B16, V13.B16 AESIMC V3.B16, V12.B16 VLD1 (R10), [V0.B16, V1.B16, V2.B16] AESIMC V0.B16, V18.B16 AESIMC V1.B16, V17.B16 VMOV V2.B16, V16.B16 VST1.P [V16.B16, V17.B16, V18.B16], 48(R11) VST1.P [V12.B16, V13.B16, V14.B16, V15.B16], 64(R11) VST1.P [V8.B16, V9.B16, V10.B16, V11.B16], 64(R11) VST1 [V4.B16, V5.B16, V6.B16, V7.B16], (R11) ksDone: RET