Make vpaes-armv8.pl compatible with XOM.
Change-Id: I27413467e5cac4e16ecbbb8d9a238ba5a8bcb9e7 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/35284 Commit-Queue: Adam Langley <agl@google.com> Reviewed-by: Adam Langley <agl@google.com>
This commit is contained in:
parent
1d1345377a
commit
35941f2923
@ -49,7 +49,7 @@ open OUT,"| \"$^X\" $xlate $flavour $output";
|
|||||||
*STDOUT=*OUT;
|
*STDOUT=*OUT;
|
||||||
|
|
||||||
$code.=<<___;
|
$code.=<<___;
|
||||||
.text
|
.section .rodata
|
||||||
|
|
||||||
.type _vpaes_consts,%object
|
.type _vpaes_consts,%object
|
||||||
.align 7 // totally strategic alignment
|
.align 7 // totally strategic alignment
|
||||||
@ -140,6 +140,8 @@ _vpaes_consts:
|
|||||||
.asciz "Vector Permutation AES for ARMv8, Mike Hamburg (Stanford University)"
|
.asciz "Vector Permutation AES for ARMv8, Mike Hamburg (Stanford University)"
|
||||||
.size _vpaes_consts,.-_vpaes_consts
|
.size _vpaes_consts,.-_vpaes_consts
|
||||||
.align 6
|
.align 6
|
||||||
|
|
||||||
|
.text
|
||||||
___
|
___
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -159,7 +161,8 @@ $code.=<<___;
|
|||||||
.type _vpaes_encrypt_preheat,%function
|
.type _vpaes_encrypt_preheat,%function
|
||||||
.align 4
|
.align 4
|
||||||
_vpaes_encrypt_preheat:
|
_vpaes_encrypt_preheat:
|
||||||
adr x10, .Lk_inv
|
adrp x10, :pg_hi21:.Lk_inv
|
||||||
|
add x10, x10, :lo12:.Lk_inv
|
||||||
movi v17.16b, #0x0f
|
movi v17.16b, #0x0f
|
||||||
ld1 {v18.2d-v19.2d}, [x10],#32 // .Lk_inv
|
ld1 {v18.2d-v19.2d}, [x10],#32 // .Lk_inv
|
||||||
ld1 {v20.2d-v23.2d}, [x10],#64 // .Lk_ipt, .Lk_sbo
|
ld1 {v20.2d-v23.2d}, [x10],#64 // .Lk_ipt, .Lk_sbo
|
||||||
@ -187,7 +190,8 @@ _vpaes_encrypt_preheat:
|
|||||||
_vpaes_encrypt_core:
|
_vpaes_encrypt_core:
|
||||||
mov x9, $key
|
mov x9, $key
|
||||||
ldr w8, [$key,#240] // pull rounds
|
ldr w8, [$key,#240] // pull rounds
|
||||||
adr x11, .Lk_mc_forward+16
|
adrp x11, :pg_hi21:.Lk_mc_forward+16
|
||||||
|
add x11, x11, :lo12:.Lk_mc_forward+16
|
||||||
// vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
|
// vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
|
||||||
ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key
|
ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key
|
||||||
and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
||||||
@ -272,7 +276,8 @@ vpaes_encrypt:
|
|||||||
_vpaes_encrypt_2x:
|
_vpaes_encrypt_2x:
|
||||||
mov x9, $key
|
mov x9, $key
|
||||||
ldr w8, [$key,#240] // pull rounds
|
ldr w8, [$key,#240] // pull rounds
|
||||||
adr x11, .Lk_mc_forward+16
|
adrp x11, :pg_hi21:.Lk_mc_forward+16
|
||||||
|
add x11, x11, :lo12:.Lk_mc_forward+16
|
||||||
// vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
|
// vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo
|
||||||
ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key
|
ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key
|
||||||
and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
||||||
@ -375,9 +380,11 @@ _vpaes_encrypt_2x:
|
|||||||
.type _vpaes_decrypt_preheat,%function
|
.type _vpaes_decrypt_preheat,%function
|
||||||
.align 4
|
.align 4
|
||||||
_vpaes_decrypt_preheat:
|
_vpaes_decrypt_preheat:
|
||||||
adr x10, .Lk_inv
|
adrp x10, :pg_hi21:.Lk_inv
|
||||||
|
add x10, x10, :lo12:.Lk_inv
|
||||||
movi v17.16b, #0x0f
|
movi v17.16b, #0x0f
|
||||||
adr x11, .Lk_dipt
|
adrp x11, :pg_hi21:.Lk_dipt
|
||||||
|
add x11, x11, :lo12:.Lk_dipt
|
||||||
ld1 {v18.2d-v19.2d}, [x10],#32 // .Lk_inv
|
ld1 {v18.2d-v19.2d}, [x10],#32 // .Lk_inv
|
||||||
ld1 {v20.2d-v23.2d}, [x11],#64 // .Lk_dipt, .Lk_dsbo
|
ld1 {v20.2d-v23.2d}, [x11],#64 // .Lk_dipt, .Lk_dsbo
|
||||||
ld1 {v24.2d-v27.2d}, [x11],#64 // .Lk_dsb9, .Lk_dsbd
|
ld1 {v24.2d-v27.2d}, [x11],#64 // .Lk_dsb9, .Lk_dsbd
|
||||||
@ -399,10 +406,12 @@ _vpaes_decrypt_core:
|
|||||||
// vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
|
// vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
|
||||||
lsl x11, x8, #4 // mov %rax, %r11; shl \$4, %r11
|
lsl x11, x8, #4 // mov %rax, %r11; shl \$4, %r11
|
||||||
eor x11, x11, #0x30 // xor \$0x30, %r11
|
eor x11, x11, #0x30 // xor \$0x30, %r11
|
||||||
adr x10, .Lk_sr
|
adrp x10, :pg_hi21:.Lk_sr
|
||||||
|
add x10, x10, :lo12:.Lk_sr
|
||||||
and x11, x11, #0x30 // and \$0x30, %r11
|
and x11, x11, #0x30 // and \$0x30, %r11
|
||||||
add x11, x11, x10
|
add x11, x11, x10
|
||||||
adr x10, .Lk_mc_forward+48
|
adrp x10, :pg_hi21:.Lk_mc_forward+48
|
||||||
|
add x10, x10, :lo12:.Lk_mc_forward+48
|
||||||
|
|
||||||
ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key
|
ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key
|
||||||
and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
||||||
@ -508,10 +517,12 @@ _vpaes_decrypt_2x:
|
|||||||
// vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
|
// vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo
|
||||||
lsl x11, x8, #4 // mov %rax, %r11; shl \$4, %r11
|
lsl x11, x8, #4 // mov %rax, %r11; shl \$4, %r11
|
||||||
eor x11, x11, #0x30 // xor \$0x30, %r11
|
eor x11, x11, #0x30 // xor \$0x30, %r11
|
||||||
adr x10, .Lk_sr
|
adrp x10, :pg_hi21:.Lk_sr
|
||||||
|
add x10, x10, :lo12:.Lk_sr
|
||||||
and x11, x11, #0x30 // and \$0x30, %r11
|
and x11, x11, #0x30 // and \$0x30, %r11
|
||||||
add x11, x11, x10
|
add x11, x11, x10
|
||||||
adr x10, .Lk_mc_forward+48
|
adrp x10, :pg_hi21:.Lk_mc_forward+48
|
||||||
|
add x10, x10, :lo12:.Lk_mc_forward+48
|
||||||
|
|
||||||
ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key
|
ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key
|
||||||
and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1
|
||||||
@ -647,14 +658,18 @@ $code.=<<___;
|
|||||||
.type _vpaes_key_preheat,%function
|
.type _vpaes_key_preheat,%function
|
||||||
.align 4
|
.align 4
|
||||||
_vpaes_key_preheat:
|
_vpaes_key_preheat:
|
||||||
adr x10, .Lk_inv
|
adrp x10, :pg_hi21:.Lk_inv
|
||||||
|
add x10, x10, :lo12:.Lk_inv
|
||||||
movi v16.16b, #0x5b // .Lk_s63
|
movi v16.16b, #0x5b // .Lk_s63
|
||||||
adr x11, .Lk_sb1
|
adrp x11, :pg_hi21:.Lk_sb1
|
||||||
|
add x11, x11, :lo12:.Lk_sb1
|
||||||
movi v17.16b, #0x0f // .Lk_s0F
|
movi v17.16b, #0x0f // .Lk_s0F
|
||||||
ld1 {v18.2d-v21.2d}, [x10] // .Lk_inv, .Lk_ipt
|
ld1 {v18.2d-v21.2d}, [x10] // .Lk_inv, .Lk_ipt
|
||||||
adr x10, .Lk_dksd
|
adrp x10, :pg_hi21:.Lk_dksd
|
||||||
|
add x10, x10, :lo12:.Lk_dksd
|
||||||
ld1 {v22.2d-v23.2d}, [x11] // .Lk_sb1
|
ld1 {v22.2d-v23.2d}, [x11] // .Lk_sb1
|
||||||
adr x11, .Lk_mc_forward
|
adrp x11, :pg_hi21:.Lk_mc_forward
|
||||||
|
add x11, x11, :lo12:.Lk_mc_forward
|
||||||
ld1 {v24.2d-v27.2d}, [x10],#64 // .Lk_dksd, .Lk_dksb
|
ld1 {v24.2d-v27.2d}, [x10],#64 // .Lk_dksd, .Lk_dksb
|
||||||
ld1 {v28.2d-v31.2d}, [x10],#64 // .Lk_dkse, .Lk_dks9
|
ld1 {v28.2d-v31.2d}, [x10],#64 // .Lk_dkse, .Lk_dks9
|
||||||
ld1 {v8.2d}, [x10] // .Lk_rcon
|
ld1 {v8.2d}, [x10] // .Lk_rcon
|
||||||
@ -677,7 +692,9 @@ _vpaes_schedule_core:
|
|||||||
bl _vpaes_schedule_transform
|
bl _vpaes_schedule_transform
|
||||||
mov v7.16b, v0.16b // vmovdqa %xmm0, %xmm7
|
mov v7.16b, v0.16b // vmovdqa %xmm0, %xmm7
|
||||||
|
|
||||||
adr x10, .Lk_sr // lea .Lk_sr(%rip),%r10
|
adrp x10, :pg_hi21:.Lk_sr // lea .Lk_sr(%rip),%r10
|
||||||
|
add x10, x10, :lo12:.Lk_sr
|
||||||
|
|
||||||
add x8, x8, x10
|
add x8, x8, x10
|
||||||
cbnz $dir, .Lschedule_am_decrypting
|
cbnz $dir, .Lschedule_am_decrypting
|
||||||
|
|
||||||
@ -803,12 +820,15 @@ _vpaes_schedule_core:
|
|||||||
.align 4
|
.align 4
|
||||||
.Lschedule_mangle_last:
|
.Lschedule_mangle_last:
|
||||||
// schedule last round key from xmm0
|
// schedule last round key from xmm0
|
||||||
adr x11, .Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew
|
adrp x11, :pg_hi21:.Lk_deskew // lea .Lk_deskew(%rip),%r11 # prepare to deskew
|
||||||
|
add x11, x11, :lo12:.Lk_deskew
|
||||||
|
|
||||||
cbnz $dir, .Lschedule_mangle_last_dec
|
cbnz $dir, .Lschedule_mangle_last_dec
|
||||||
|
|
||||||
// encrypting
|
// encrypting
|
||||||
ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10),%xmm1
|
ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10),%xmm1
|
||||||
adr x11, .Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform
|
adrp x11, :pg_hi21:.Lk_opt // lea .Lk_opt(%rip), %r11 # prepare to output transform
|
||||||
|
add x11, x11, :lo12:.Lk_opt
|
||||||
add $out, $out, #32 // add \$32, %rdx
|
add $out, $out, #32 // add \$32, %rdx
|
||||||
tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 # output permute
|
tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 # output permute
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user