7c075b99e2
The symbol “rcon” should be local in order to avoid collisions and it's much easier on delocate if some of the expressions are evalulated in Perl rather than left in the resulting .S file. Also fix the perlasm style so the symbols are actually local. Change-Id: Iddfc661fc3a6504bcc5732abaa1174da89ad805e Reviewed-on: https://boringssl-review.googlesource.com/16524 Reviewed-by: David Benjamin <davidben@google.com> Commit-Queue: David Benjamin <davidben@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org>
3806 lines
91 KiB
Raku
3806 lines
91 KiB
Raku
#! /usr/bin/env perl
|
|
# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the OpenSSL license (the "License"). You may not use
|
|
# this file except in compliance with the License. You can obtain a copy
|
|
# in the file LICENSE in the source distribution or at
|
|
# https://www.openssl.org/source/license.html
|
|
|
|
#
|
|
# ====================================================================
|
|
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
|
# project. The module is, however, dual licensed under OpenSSL and
|
|
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
|
# details see http://www.openssl.org/~appro/cryptogams/.
|
|
# ====================================================================
|
|
#
|
|
# This module implements support for AES instructions as per PowerISA
|
|
# specification version 2.07, first implemented by POWER8 processor.
|
|
# The module is endian-agnostic in sense that it supports both big-
|
|
# and little-endian cases. Data alignment in parallelizable modes is
|
|
# handled with VSX loads and stores, which implies MSR.VSX flag being
|
|
# set. It should also be noted that ISA specification doesn't prohibit
|
|
# alignment exceptions for these instructions on page boundaries.
|
|
# Initially alignment was handled in pure AltiVec/VMX way [when data
|
|
# is aligned programmatically, which in turn guarantees exception-
|
|
# free execution], but it turned to hamper performance when vcipher
|
|
# instructions are interleaved. It's reckoned that eventual
|
|
# misalignment penalties at page boundaries are in average lower
|
|
# than additional overhead in pure AltiVec approach.
|
|
#
|
|
# May 2016
|
|
#
|
|
# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
|
|
# systems were measured.
|
|
#
|
|
######################################################################
|
|
# Current large-block performance in cycles per byte processed with
|
|
# 128-bit key (less is better).
|
|
#
|
|
# CBC en-/decrypt CTR XTS
|
|
# POWER8[le] 3.96/0.72 0.74 1.1
|
|
# POWER8[be] 3.75/0.65 0.66 1.0
|
|
|
|
$flavour = shift;
|
|
|
|
if ($flavour =~ /64/) {
|
|
$SIZE_T =8;
|
|
$LRSAVE =2*$SIZE_T;
|
|
$STU ="stdu";
|
|
$POP ="ld";
|
|
$PUSH ="std";
|
|
$UCMP ="cmpld";
|
|
$SHL ="sldi";
|
|
} elsif ($flavour =~ /32/) {
|
|
$SIZE_T =4;
|
|
$LRSAVE =$SIZE_T;
|
|
$STU ="stwu";
|
|
$POP ="lwz";
|
|
$PUSH ="stw";
|
|
$UCMP ="cmplw";
|
|
$SHL ="slwi";
|
|
} else { die "nonsense $flavour"; }
|
|
|
|
$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
|
|
|
|
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
|
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
|
|
( $xlate="${dir}../../../perlasm/ppc-xlate.pl" and -f $xlate) or
|
|
die "can't locate ppc-xlate.pl";
|
|
|
|
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
|
|
|
|
$FRAME=8*$SIZE_T;
|
|
$prefix="aes_hw";
|
|
|
|
$sp="r1";
|
|
$vrsave="r12";
|
|
|
|
#########################################################################
|
|
{{{ # Key setup procedures #
|
|
my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
|
|
my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
|
|
my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
|
|
|
|
$code.=<<___;
|
|
.machine "any"
|
|
|
|
.text
|
|
|
|
.align 7
|
|
Lrcon:
|
|
.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
|
|
.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
|
|
.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
|
|
.long 0,0,0,0 ?asis
|
|
Lconsts:
|
|
mflr r0
|
|
bcl 20,31,\$+4
|
|
mflr $ptr #vvvvv "distance between . and rcon
|
|
addi $ptr,$ptr,-0x48
|
|
mtlr r0
|
|
blr
|
|
.long 0
|
|
.byte 0,12,0x14,0,0,0,0,0
|
|
.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
|
|
|
|
.globl .${prefix}_set_encrypt_key
|
|
.align 5
|
|
.${prefix}_set_encrypt_key:
|
|
Lset_encrypt_key:
|
|
mflr r11
|
|
$PUSH r11,$LRSAVE($sp)
|
|
|
|
li $ptr,-1
|
|
${UCMP}i $inp,0
|
|
beq- Lenc_key_abort # if ($inp==0) return -1;
|
|
${UCMP}i $out,0
|
|
beq- Lenc_key_abort # if ($out==0) return -1;
|
|
li $ptr,-2
|
|
cmpwi $bits,128
|
|
blt- Lenc_key_abort
|
|
cmpwi $bits,256
|
|
bgt- Lenc_key_abort
|
|
andi. r0,$bits,0x3f
|
|
bne- Lenc_key_abort
|
|
|
|
lis r0,0xfff0
|
|
mfspr $vrsave,256
|
|
mtspr 256,r0
|
|
|
|
bl Lconsts
|
|
mtlr r11
|
|
|
|
neg r9,$inp
|
|
lvx $in0,0,$inp
|
|
addi $inp,$inp,15 # 15 is not typo
|
|
lvsr $key,0,r9 # borrow $key
|
|
li r8,0x20
|
|
cmpwi $bits,192
|
|
lvx $in1,0,$inp
|
|
le?vspltisb $mask,0x0f # borrow $mask
|
|
lvx $rcon,0,$ptr
|
|
le?vxor $key,$key,$mask # adjust for byte swap
|
|
lvx $mask,r8,$ptr
|
|
addi $ptr,$ptr,0x10
|
|
vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
|
|
li $cnt,8
|
|
vxor $zero,$zero,$zero
|
|
mtctr $cnt
|
|
|
|
?lvsr $outperm,0,$out
|
|
vspltisb $outmask,-1
|
|
lvx $outhead,0,$out
|
|
?vperm $outmask,$zero,$outmask,$outperm
|
|
|
|
blt Loop128
|
|
addi $inp,$inp,8
|
|
beq L192
|
|
addi $inp,$inp,8
|
|
b L256
|
|
|
|
.align 4
|
|
Loop128:
|
|
vperm $key,$in0,$in0,$mask # rotate-n-splat
|
|
vsldoi $tmp,$zero,$in0,12 # >>32
|
|
vperm $outtail,$in0,$in0,$outperm # rotate
|
|
vsel $stage,$outhead,$outtail,$outmask
|
|
vmr $outhead,$outtail
|
|
vcipherlast $key,$key,$rcon
|
|
stvx $stage,0,$out
|
|
addi $out,$out,16
|
|
|
|
vxor $in0,$in0,$tmp
|
|
vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
vxor $in0,$in0,$tmp
|
|
vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
vxor $in0,$in0,$tmp
|
|
vadduwm $rcon,$rcon,$rcon
|
|
vxor $in0,$in0,$key
|
|
bdnz Loop128
|
|
|
|
lvx $rcon,0,$ptr # last two round keys
|
|
|
|
vperm $key,$in0,$in0,$mask # rotate-n-splat
|
|
vsldoi $tmp,$zero,$in0,12 # >>32
|
|
vperm $outtail,$in0,$in0,$outperm # rotate
|
|
vsel $stage,$outhead,$outtail,$outmask
|
|
vmr $outhead,$outtail
|
|
vcipherlast $key,$key,$rcon
|
|
stvx $stage,0,$out
|
|
addi $out,$out,16
|
|
|
|
vxor $in0,$in0,$tmp
|
|
vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
vxor $in0,$in0,$tmp
|
|
vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
vxor $in0,$in0,$tmp
|
|
vadduwm $rcon,$rcon,$rcon
|
|
vxor $in0,$in0,$key
|
|
|
|
vperm $key,$in0,$in0,$mask # rotate-n-splat
|
|
vsldoi $tmp,$zero,$in0,12 # >>32
|
|
vperm $outtail,$in0,$in0,$outperm # rotate
|
|
vsel $stage,$outhead,$outtail,$outmask
|
|
vmr $outhead,$outtail
|
|
vcipherlast $key,$key,$rcon
|
|
stvx $stage,0,$out
|
|
addi $out,$out,16
|
|
|
|
vxor $in0,$in0,$tmp
|
|
vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
vxor $in0,$in0,$tmp
|
|
vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
vxor $in0,$in0,$tmp
|
|
vxor $in0,$in0,$key
|
|
vperm $outtail,$in0,$in0,$outperm # rotate
|
|
vsel $stage,$outhead,$outtail,$outmask
|
|
vmr $outhead,$outtail
|
|
stvx $stage,0,$out
|
|
|
|
addi $inp,$out,15 # 15 is not typo
|
|
addi $out,$out,0x50
|
|
|
|
li $rounds,10
|
|
b Ldone
|
|
|
|
.align 4
|
|
L192:
|
|
lvx $tmp,0,$inp
|
|
li $cnt,4
|
|
vperm $outtail,$in0,$in0,$outperm # rotate
|
|
vsel $stage,$outhead,$outtail,$outmask
|
|
vmr $outhead,$outtail
|
|
stvx $stage,0,$out
|
|
addi $out,$out,16
|
|
vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
|
|
vspltisb $key,8 # borrow $key
|
|
mtctr $cnt
|
|
vsububm $mask,$mask,$key # adjust the mask
|
|
|
|
Loop192:
|
|
vperm $key,$in1,$in1,$mask # roate-n-splat
|
|
vsldoi $tmp,$zero,$in0,12 # >>32
|
|
vcipherlast $key,$key,$rcon
|
|
|
|
vxor $in0,$in0,$tmp
|
|
vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
vxor $in0,$in0,$tmp
|
|
vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
vxor $in0,$in0,$tmp
|
|
|
|
vsldoi $stage,$zero,$in1,8
|
|
vspltw $tmp,$in0,3
|
|
vxor $tmp,$tmp,$in1
|
|
vsldoi $in1,$zero,$in1,12 # >>32
|
|
vadduwm $rcon,$rcon,$rcon
|
|
vxor $in1,$in1,$tmp
|
|
vxor $in0,$in0,$key
|
|
vxor $in1,$in1,$key
|
|
vsldoi $stage,$stage,$in0,8
|
|
|
|
vperm $key,$in1,$in1,$mask # rotate-n-splat
|
|
vsldoi $tmp,$zero,$in0,12 # >>32
|
|
vperm $outtail,$stage,$stage,$outperm # rotate
|
|
vsel $stage,$outhead,$outtail,$outmask
|
|
vmr $outhead,$outtail
|
|
vcipherlast $key,$key,$rcon
|
|
stvx $stage,0,$out
|
|
addi $out,$out,16
|
|
|
|
vsldoi $stage,$in0,$in1,8
|
|
vxor $in0,$in0,$tmp
|
|
vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
vperm $outtail,$stage,$stage,$outperm # rotate
|
|
vsel $stage,$outhead,$outtail,$outmask
|
|
vmr $outhead,$outtail
|
|
vxor $in0,$in0,$tmp
|
|
vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
vxor $in0,$in0,$tmp
|
|
stvx $stage,0,$out
|
|
addi $out,$out,16
|
|
|
|
vspltw $tmp,$in0,3
|
|
vxor $tmp,$tmp,$in1
|
|
vsldoi $in1,$zero,$in1,12 # >>32
|
|
vadduwm $rcon,$rcon,$rcon
|
|
vxor $in1,$in1,$tmp
|
|
vxor $in0,$in0,$key
|
|
vxor $in1,$in1,$key
|
|
vperm $outtail,$in0,$in0,$outperm # rotate
|
|
vsel $stage,$outhead,$outtail,$outmask
|
|
vmr $outhead,$outtail
|
|
stvx $stage,0,$out
|
|
addi $inp,$out,15 # 15 is not typo
|
|
addi $out,$out,16
|
|
bdnz Loop192
|
|
|
|
li $rounds,12
|
|
addi $out,$out,0x20
|
|
b Ldone
|
|
|
|
.align 4
|
|
L256:
|
|
lvx $tmp,0,$inp
|
|
li $cnt,7
|
|
li $rounds,14
|
|
vperm $outtail,$in0,$in0,$outperm # rotate
|
|
vsel $stage,$outhead,$outtail,$outmask
|
|
vmr $outhead,$outtail
|
|
stvx $stage,0,$out
|
|
addi $out,$out,16
|
|
vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
|
|
mtctr $cnt
|
|
|
|
Loop256:
|
|
vperm $key,$in1,$in1,$mask # rotate-n-splat
|
|
vsldoi $tmp,$zero,$in0,12 # >>32
|
|
vperm $outtail,$in1,$in1,$outperm # rotate
|
|
vsel $stage,$outhead,$outtail,$outmask
|
|
vmr $outhead,$outtail
|
|
vcipherlast $key,$key,$rcon
|
|
stvx $stage,0,$out
|
|
addi $out,$out,16
|
|
|
|
vxor $in0,$in0,$tmp
|
|
vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
vxor $in0,$in0,$tmp
|
|
vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
vxor $in0,$in0,$tmp
|
|
vadduwm $rcon,$rcon,$rcon
|
|
vxor $in0,$in0,$key
|
|
vperm $outtail,$in0,$in0,$outperm # rotate
|
|
vsel $stage,$outhead,$outtail,$outmask
|
|
vmr $outhead,$outtail
|
|
stvx $stage,0,$out
|
|
addi $inp,$out,15 # 15 is not typo
|
|
addi $out,$out,16
|
|
bdz Ldone
|
|
|
|
vspltw $key,$in0,3 # just splat
|
|
vsldoi $tmp,$zero,$in1,12 # >>32
|
|
vsbox $key,$key
|
|
|
|
vxor $in1,$in1,$tmp
|
|
vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
vxor $in1,$in1,$tmp
|
|
vsldoi $tmp,$zero,$tmp,12 # >>32
|
|
vxor $in1,$in1,$tmp
|
|
|
|
vxor $in1,$in1,$key
|
|
b Loop256
|
|
|
|
.align 4
|
|
Ldone:
|
|
lvx $in1,0,$inp # redundant in aligned case
|
|
vsel $in1,$outhead,$in1,$outmask
|
|
stvx $in1,0,$inp
|
|
li $ptr,0
|
|
mtspr 256,$vrsave
|
|
stw $rounds,0($out)
|
|
|
|
Lenc_key_abort:
|
|
mr r3,$ptr
|
|
blr
|
|
.long 0
|
|
.byte 0,12,0x14,1,0,0,3,0
|
|
.long 0
|
|
.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
|
|
|
|
.globl .${prefix}_set_decrypt_key
|
|
.align 5
|
|
.${prefix}_set_decrypt_key:
|
|
$STU $sp,-$FRAME($sp)
|
|
mflr r10
|
|
$PUSH r10,`$FRAME+$LRSAVE`($sp)
|
|
bl Lset_encrypt_key
|
|
mtlr r10
|
|
|
|
cmpwi r3,0
|
|
bne- Ldec_key_abort
|
|
|
|
slwi $cnt,$rounds,4
|
|
subi $inp,$out,240 # first round key
|
|
srwi $rounds,$rounds,1
|
|
add $out,$inp,$cnt # last round key
|
|
mtctr $rounds
|
|
|
|
Ldeckey:
|
|
lwz r0, 0($inp)
|
|
lwz r6, 4($inp)
|
|
lwz r7, 8($inp)
|
|
lwz r8, 12($inp)
|
|
addi $inp,$inp,16
|
|
lwz r9, 0($out)
|
|
lwz r10,4($out)
|
|
lwz r11,8($out)
|
|
lwz r12,12($out)
|
|
stw r0, 0($out)
|
|
stw r6, 4($out)
|
|
stw r7, 8($out)
|
|
stw r8, 12($out)
|
|
subi $out,$out,16
|
|
stw r9, -16($inp)
|
|
stw r10,-12($inp)
|
|
stw r11,-8($inp)
|
|
stw r12,-4($inp)
|
|
bdnz Ldeckey
|
|
|
|
xor r3,r3,r3 # return value
|
|
Ldec_key_abort:
|
|
addi $sp,$sp,$FRAME
|
|
blr
|
|
.long 0
|
|
.byte 0,12,4,1,0x80,0,3,0
|
|
.long 0
|
|
.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
|
|
___
|
|
}}}
|
|
#########################################################################
|
|
{{{ # Single block en- and decrypt procedures #
|
|
sub gen_block () {
|
|
my $dir = shift;
|
|
my $n = $dir eq "de" ? "n" : "";
|
|
my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
|
|
|
|
$code.=<<___;
|
|
.globl .${prefix}_${dir}crypt
|
|
.align 5
|
|
.${prefix}_${dir}crypt:
|
|
lwz $rounds,240($key)
|
|
lis r0,0xfc00
|
|
mfspr $vrsave,256
|
|
li $idx,15 # 15 is not typo
|
|
mtspr 256,r0
|
|
|
|
lvx v0,0,$inp
|
|
neg r11,$out
|
|
lvx v1,$idx,$inp
|
|
lvsl v2,0,$inp # inpperm
|
|
le?vspltisb v4,0x0f
|
|
?lvsl v3,0,r11 # outperm
|
|
le?vxor v2,v2,v4
|
|
li $idx,16
|
|
vperm v0,v0,v1,v2 # align [and byte swap in LE]
|
|
lvx v1,0,$key
|
|
?lvsl v5,0,$key # keyperm
|
|
srwi $rounds,$rounds,1
|
|
lvx v2,$idx,$key
|
|
addi $idx,$idx,16
|
|
subi $rounds,$rounds,1
|
|
?vperm v1,v1,v2,v5 # align round key
|
|
|
|
vxor v0,v0,v1
|
|
lvx v1,$idx,$key
|
|
addi $idx,$idx,16
|
|
mtctr $rounds
|
|
|
|
Loop_${dir}c:
|
|
?vperm v2,v2,v1,v5
|
|
v${n}cipher v0,v0,v2
|
|
lvx v2,$idx,$key
|
|
addi $idx,$idx,16
|
|
?vperm v1,v1,v2,v5
|
|
v${n}cipher v0,v0,v1
|
|
lvx v1,$idx,$key
|
|
addi $idx,$idx,16
|
|
bdnz Loop_${dir}c
|
|
|
|
?vperm v2,v2,v1,v5
|
|
v${n}cipher v0,v0,v2
|
|
lvx v2,$idx,$key
|
|
?vperm v1,v1,v2,v5
|
|
v${n}cipherlast v0,v0,v1
|
|
|
|
vspltisb v2,-1
|
|
vxor v1,v1,v1
|
|
li $idx,15 # 15 is not typo
|
|
?vperm v2,v1,v2,v3 # outmask
|
|
le?vxor v3,v3,v4
|
|
lvx v1,0,$out # outhead
|
|
vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
|
|
vsel v1,v1,v0,v2
|
|
lvx v4,$idx,$out
|
|
stvx v1,0,$out
|
|
vsel v0,v0,v4,v2
|
|
stvx v0,$idx,$out
|
|
|
|
mtspr 256,$vrsave
|
|
blr
|
|
.long 0
|
|
.byte 0,12,0x14,0,0,0,3,0
|
|
.long 0
|
|
.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
|
|
___
|
|
}
|
|
&gen_block("en");
|
|
&gen_block("de");
|
|
}}}
|
|
#########################################################################
|
|
{{{ # CBC en- and decrypt procedures #
|
|
my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
|
|
my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
|
|
my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
|
|
map("v$_",(4..10));
|
|
$code.=<<___;
|
|
.globl .${prefix}_cbc_encrypt
|
|
.align 5
|
|
.${prefix}_cbc_encrypt:
|
|
${UCMP}i $len,16
|
|
bltlr-
|
|
|
|
cmpwi $enc,0 # test direction
|
|
lis r0,0xffe0
|
|
mfspr $vrsave,256
|
|
mtspr 256,r0
|
|
|
|
li $idx,15
|
|
vxor $rndkey0,$rndkey0,$rndkey0
|
|
le?vspltisb $tmp,0x0f
|
|
|
|
lvx $ivec,0,$ivp # load [unaligned] iv
|
|
lvsl $inpperm,0,$ivp
|
|
lvx $inptail,$idx,$ivp
|
|
le?vxor $inpperm,$inpperm,$tmp
|
|
vperm $ivec,$ivec,$inptail,$inpperm
|
|
|
|
neg r11,$inp
|
|
?lvsl $keyperm,0,$key # prepare for unaligned key
|
|
lwz $rounds,240($key)
|
|
|
|
lvsr $inpperm,0,r11 # prepare for unaligned load
|
|
lvx $inptail,0,$inp
|
|
addi $inp,$inp,15 # 15 is not typo
|
|
le?vxor $inpperm,$inpperm,$tmp
|
|
|
|
?lvsr $outperm,0,$out # prepare for unaligned store
|
|
vspltisb $outmask,-1
|
|
lvx $outhead,0,$out
|
|
?vperm $outmask,$rndkey0,$outmask,$outperm
|
|
le?vxor $outperm,$outperm,$tmp
|
|
|
|
srwi $rounds,$rounds,1
|
|
li $idx,16
|
|
subi $rounds,$rounds,1
|
|
beq Lcbc_dec
|
|
|
|
Lcbc_enc:
|
|
vmr $inout,$inptail
|
|
lvx $inptail,0,$inp
|
|
addi $inp,$inp,16
|
|
mtctr $rounds
|
|
subi $len,$len,16 # len-=16
|
|
|
|
lvx $rndkey0,0,$key
|
|
vperm $inout,$inout,$inptail,$inpperm
|
|
lvx $rndkey1,$idx,$key
|
|
addi $idx,$idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vxor $inout,$inout,$rndkey0
|
|
lvx $rndkey0,$idx,$key
|
|
addi $idx,$idx,16
|
|
vxor $inout,$inout,$ivec
|
|
|
|
Loop_cbc_enc:
|
|
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
vcipher $inout,$inout,$rndkey1
|
|
lvx $rndkey1,$idx,$key
|
|
addi $idx,$idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vcipher $inout,$inout,$rndkey0
|
|
lvx $rndkey0,$idx,$key
|
|
addi $idx,$idx,16
|
|
bdnz Loop_cbc_enc
|
|
|
|
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
vcipher $inout,$inout,$rndkey1
|
|
lvx $rndkey1,$idx,$key
|
|
li $idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vcipherlast $ivec,$inout,$rndkey0
|
|
${UCMP}i $len,16
|
|
|
|
vperm $tmp,$ivec,$ivec,$outperm
|
|
vsel $inout,$outhead,$tmp,$outmask
|
|
vmr $outhead,$tmp
|
|
stvx $inout,0,$out
|
|
addi $out,$out,16
|
|
bge Lcbc_enc
|
|
|
|
b Lcbc_done
|
|
|
|
.align 4
|
|
Lcbc_dec:
|
|
${UCMP}i $len,128
|
|
bge _aesp8_cbc_decrypt8x
|
|
vmr $tmp,$inptail
|
|
lvx $inptail,0,$inp
|
|
addi $inp,$inp,16
|
|
mtctr $rounds
|
|
subi $len,$len,16 # len-=16
|
|
|
|
lvx $rndkey0,0,$key
|
|
vperm $tmp,$tmp,$inptail,$inpperm
|
|
lvx $rndkey1,$idx,$key
|
|
addi $idx,$idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vxor $inout,$tmp,$rndkey0
|
|
lvx $rndkey0,$idx,$key
|
|
addi $idx,$idx,16
|
|
|
|
Loop_cbc_dec:
|
|
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
vncipher $inout,$inout,$rndkey1
|
|
lvx $rndkey1,$idx,$key
|
|
addi $idx,$idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vncipher $inout,$inout,$rndkey0
|
|
lvx $rndkey0,$idx,$key
|
|
addi $idx,$idx,16
|
|
bdnz Loop_cbc_dec
|
|
|
|
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
vncipher $inout,$inout,$rndkey1
|
|
lvx $rndkey1,$idx,$key
|
|
li $idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vncipherlast $inout,$inout,$rndkey0
|
|
${UCMP}i $len,16
|
|
|
|
vxor $inout,$inout,$ivec
|
|
vmr $ivec,$tmp
|
|
vperm $tmp,$inout,$inout,$outperm
|
|
vsel $inout,$outhead,$tmp,$outmask
|
|
vmr $outhead,$tmp
|
|
stvx $inout,0,$out
|
|
addi $out,$out,16
|
|
bge Lcbc_dec
|
|
|
|
Lcbc_done:
|
|
addi $out,$out,-1
|
|
lvx $inout,0,$out # redundant in aligned case
|
|
vsel $inout,$outhead,$inout,$outmask
|
|
stvx $inout,0,$out
|
|
|
|
neg $enc,$ivp # write [unaligned] iv
|
|
li $idx,15 # 15 is not typo
|
|
vxor $rndkey0,$rndkey0,$rndkey0
|
|
vspltisb $outmask,-1
|
|
le?vspltisb $tmp,0x0f
|
|
?lvsl $outperm,0,$enc
|
|
?vperm $outmask,$rndkey0,$outmask,$outperm
|
|
le?vxor $outperm,$outperm,$tmp
|
|
lvx $outhead,0,$ivp
|
|
vperm $ivec,$ivec,$ivec,$outperm
|
|
vsel $inout,$outhead,$ivec,$outmask
|
|
lvx $inptail,$idx,$ivp
|
|
stvx $inout,0,$ivp
|
|
vsel $inout,$ivec,$inptail,$outmask
|
|
stvx $inout,$idx,$ivp
|
|
|
|
mtspr 256,$vrsave
|
|
blr
|
|
.long 0
|
|
.byte 0,12,0x14,0,0,0,6,0
|
|
.long 0
|
|
___
|
|
#########################################################################
|
|
{{ # Optimized CBC decrypt procedure #
|
|
my $key_="r11";
|
|
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
|
|
$x00=0 if ($flavour =~ /osx/);
|
|
my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
|
|
my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
|
|
my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
|
|
# v26-v31 last 6 round keys
|
|
my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
|
|
|
|
$code.=<<___;
|
|
.align 5
|
|
_aesp8_cbc_decrypt8x:
|
|
$STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
|
|
li r10,`$FRAME+8*16+15`
|
|
li r11,`$FRAME+8*16+31`
|
|
stvx v20,r10,$sp # ABI says so
|
|
addi r10,r10,32
|
|
stvx v21,r11,$sp
|
|
addi r11,r11,32
|
|
stvx v22,r10,$sp
|
|
addi r10,r10,32
|
|
stvx v23,r11,$sp
|
|
addi r11,r11,32
|
|
stvx v24,r10,$sp
|
|
addi r10,r10,32
|
|
stvx v25,r11,$sp
|
|
addi r11,r11,32
|
|
stvx v26,r10,$sp
|
|
addi r10,r10,32
|
|
stvx v27,r11,$sp
|
|
addi r11,r11,32
|
|
stvx v28,r10,$sp
|
|
addi r10,r10,32
|
|
stvx v29,r11,$sp
|
|
addi r11,r11,32
|
|
stvx v30,r10,$sp
|
|
stvx v31,r11,$sp
|
|
li r0,-1
|
|
stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
|
|
li $x10,0x10
|
|
$PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
li $x20,0x20
|
|
$PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
li $x30,0x30
|
|
$PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
li $x40,0x40
|
|
$PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
li $x50,0x50
|
|
$PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
li $x60,0x60
|
|
$PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
li $x70,0x70
|
|
mtspr 256,r0
|
|
|
|
subi $rounds,$rounds,3 # -4 in total
|
|
subi $len,$len,128 # bias
|
|
|
|
lvx $rndkey0,$x00,$key # load key schedule
|
|
lvx v30,$x10,$key
|
|
addi $key,$key,0x20
|
|
lvx v31,$x00,$key
|
|
?vperm $rndkey0,$rndkey0,v30,$keyperm
|
|
addi $key_,$sp,`$FRAME+15`
|
|
mtctr $rounds
|
|
|
|
Load_cbc_dec_key:
|
|
?vperm v24,v30,v31,$keyperm
|
|
lvx v30,$x10,$key
|
|
addi $key,$key,0x20
|
|
stvx v24,$x00,$key_ # off-load round[1]
|
|
?vperm v25,v31,v30,$keyperm
|
|
lvx v31,$x00,$key
|
|
stvx v25,$x10,$key_ # off-load round[2]
|
|
addi $key_,$key_,0x20
|
|
bdnz Load_cbc_dec_key
|
|
|
|
lvx v26,$x10,$key
|
|
?vperm v24,v30,v31,$keyperm
|
|
lvx v27,$x20,$key
|
|
stvx v24,$x00,$key_ # off-load round[3]
|
|
?vperm v25,v31,v26,$keyperm
|
|
lvx v28,$x30,$key
|
|
stvx v25,$x10,$key_ # off-load round[4]
|
|
addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
?vperm v26,v26,v27,$keyperm
|
|
lvx v29,$x40,$key
|
|
?vperm v27,v27,v28,$keyperm
|
|
lvx v30,$x50,$key
|
|
?vperm v28,v28,v29,$keyperm
|
|
lvx v31,$x60,$key
|
|
?vperm v29,v29,v30,$keyperm
|
|
lvx $out0,$x70,$key # borrow $out0
|
|
?vperm v30,v30,v31,$keyperm
|
|
lvx v24,$x00,$key_ # pre-load round[1]
|
|
?vperm v31,v31,$out0,$keyperm
|
|
lvx v25,$x10,$key_ # pre-load round[2]
|
|
|
|
#lvx $inptail,0,$inp # "caller" already did this
|
|
#addi $inp,$inp,15 # 15 is not typo
|
|
subi $inp,$inp,15 # undo "caller"
|
|
|
|
le?li $idx,8
|
|
lvx_u $in0,$x00,$inp # load first 8 "words"
|
|
le?lvsl $inpperm,0,$idx
|
|
le?vspltisb $tmp,0x0f
|
|
lvx_u $in1,$x10,$inp
|
|
le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
|
|
lvx_u $in2,$x20,$inp
|
|
le?vperm $in0,$in0,$in0,$inpperm
|
|
lvx_u $in3,$x30,$inp
|
|
le?vperm $in1,$in1,$in1,$inpperm
|
|
lvx_u $in4,$x40,$inp
|
|
le?vperm $in2,$in2,$in2,$inpperm
|
|
vxor $out0,$in0,$rndkey0
|
|
lvx_u $in5,$x50,$inp
|
|
le?vperm $in3,$in3,$in3,$inpperm
|
|
vxor $out1,$in1,$rndkey0
|
|
lvx_u $in6,$x60,$inp
|
|
le?vperm $in4,$in4,$in4,$inpperm
|
|
vxor $out2,$in2,$rndkey0
|
|
lvx_u $in7,$x70,$inp
|
|
addi $inp,$inp,0x80
|
|
le?vperm $in5,$in5,$in5,$inpperm
|
|
vxor $out3,$in3,$rndkey0
|
|
le?vperm $in6,$in6,$in6,$inpperm
|
|
vxor $out4,$in4,$rndkey0
|
|
le?vperm $in7,$in7,$in7,$inpperm
|
|
vxor $out5,$in5,$rndkey0
|
|
vxor $out6,$in6,$rndkey0
|
|
vxor $out7,$in7,$rndkey0
|
|
|
|
mtctr $rounds
|
|
b Loop_cbc_dec8x
|
|
.align 5
|
|
Loop_cbc_dec8x:
|
|
vncipher $out0,$out0,v24
|
|
vncipher $out1,$out1,v24
|
|
vncipher $out2,$out2,v24
|
|
vncipher $out3,$out3,v24
|
|
vncipher $out4,$out4,v24
|
|
vncipher $out5,$out5,v24
|
|
vncipher $out6,$out6,v24
|
|
vncipher $out7,$out7,v24
|
|
lvx v24,$x20,$key_ # round[3]
|
|
addi $key_,$key_,0x20
|
|
|
|
vncipher $out0,$out0,v25
|
|
vncipher $out1,$out1,v25
|
|
vncipher $out2,$out2,v25
|
|
vncipher $out3,$out3,v25
|
|
vncipher $out4,$out4,v25
|
|
vncipher $out5,$out5,v25
|
|
vncipher $out6,$out6,v25
|
|
vncipher $out7,$out7,v25
|
|
lvx v25,$x10,$key_ # round[4]
|
|
bdnz Loop_cbc_dec8x
|
|
|
|
subic $len,$len,128 # $len-=128
|
|
vncipher $out0,$out0,v24
|
|
vncipher $out1,$out1,v24
|
|
vncipher $out2,$out2,v24
|
|
vncipher $out3,$out3,v24
|
|
vncipher $out4,$out4,v24
|
|
vncipher $out5,$out5,v24
|
|
vncipher $out6,$out6,v24
|
|
vncipher $out7,$out7,v24
|
|
|
|
subfe. r0,r0,r0 # borrow?-1:0
|
|
vncipher $out0,$out0,v25
|
|
vncipher $out1,$out1,v25
|
|
vncipher $out2,$out2,v25
|
|
vncipher $out3,$out3,v25
|
|
vncipher $out4,$out4,v25
|
|
vncipher $out5,$out5,v25
|
|
vncipher $out6,$out6,v25
|
|
vncipher $out7,$out7,v25
|
|
|
|
and r0,r0,$len
|
|
vncipher $out0,$out0,v26
|
|
vncipher $out1,$out1,v26
|
|
vncipher $out2,$out2,v26
|
|
vncipher $out3,$out3,v26
|
|
vncipher $out4,$out4,v26
|
|
vncipher $out5,$out5,v26
|
|
vncipher $out6,$out6,v26
|
|
vncipher $out7,$out7,v26
|
|
|
|
add $inp,$inp,r0 # $inp is adjusted in such
|
|
# way that at exit from the
|
|
# loop inX-in7 are loaded
|
|
# with last "words"
|
|
vncipher $out0,$out0,v27
|
|
vncipher $out1,$out1,v27
|
|
vncipher $out2,$out2,v27
|
|
vncipher $out3,$out3,v27
|
|
vncipher $out4,$out4,v27
|
|
vncipher $out5,$out5,v27
|
|
vncipher $out6,$out6,v27
|
|
vncipher $out7,$out7,v27
|
|
|
|
addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
vncipher $out0,$out0,v28
|
|
vncipher $out1,$out1,v28
|
|
vncipher $out2,$out2,v28
|
|
vncipher $out3,$out3,v28
|
|
vncipher $out4,$out4,v28
|
|
vncipher $out5,$out5,v28
|
|
vncipher $out6,$out6,v28
|
|
vncipher $out7,$out7,v28
|
|
lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
|
|
vncipher $out0,$out0,v29
|
|
vncipher $out1,$out1,v29
|
|
vncipher $out2,$out2,v29
|
|
vncipher $out3,$out3,v29
|
|
vncipher $out4,$out4,v29
|
|
vncipher $out5,$out5,v29
|
|
vncipher $out6,$out6,v29
|
|
vncipher $out7,$out7,v29
|
|
lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
|
|
vncipher $out0,$out0,v30
|
|
vxor $ivec,$ivec,v31 # xor with last round key
|
|
vncipher $out1,$out1,v30
|
|
vxor $in0,$in0,v31
|
|
vncipher $out2,$out2,v30
|
|
vxor $in1,$in1,v31
|
|
vncipher $out3,$out3,v30
|
|
vxor $in2,$in2,v31
|
|
vncipher $out4,$out4,v30
|
|
vxor $in3,$in3,v31
|
|
vncipher $out5,$out5,v30
|
|
vxor $in4,$in4,v31
|
|
vncipher $out6,$out6,v30
|
|
vxor $in5,$in5,v31
|
|
vncipher $out7,$out7,v30
|
|
vxor $in6,$in6,v31
|
|
|
|
vncipherlast $out0,$out0,$ivec
|
|
vncipherlast $out1,$out1,$in0
|
|
lvx_u $in0,$x00,$inp # load next input block
|
|
vncipherlast $out2,$out2,$in1
|
|
lvx_u $in1,$x10,$inp
|
|
vncipherlast $out3,$out3,$in2
|
|
le?vperm $in0,$in0,$in0,$inpperm
|
|
lvx_u $in2,$x20,$inp
|
|
vncipherlast $out4,$out4,$in3
|
|
le?vperm $in1,$in1,$in1,$inpperm
|
|
lvx_u $in3,$x30,$inp
|
|
vncipherlast $out5,$out5,$in4
|
|
le?vperm $in2,$in2,$in2,$inpperm
|
|
lvx_u $in4,$x40,$inp
|
|
vncipherlast $out6,$out6,$in5
|
|
le?vperm $in3,$in3,$in3,$inpperm
|
|
lvx_u $in5,$x50,$inp
|
|
vncipherlast $out7,$out7,$in6
|
|
le?vperm $in4,$in4,$in4,$inpperm
|
|
lvx_u $in6,$x60,$inp
|
|
vmr $ivec,$in7
|
|
le?vperm $in5,$in5,$in5,$inpperm
|
|
lvx_u $in7,$x70,$inp
|
|
addi $inp,$inp,0x80
|
|
|
|
le?vperm $out0,$out0,$out0,$inpperm
|
|
le?vperm $out1,$out1,$out1,$inpperm
|
|
stvx_u $out0,$x00,$out
|
|
le?vperm $in6,$in6,$in6,$inpperm
|
|
vxor $out0,$in0,$rndkey0
|
|
le?vperm $out2,$out2,$out2,$inpperm
|
|
stvx_u $out1,$x10,$out
|
|
le?vperm $in7,$in7,$in7,$inpperm
|
|
vxor $out1,$in1,$rndkey0
|
|
le?vperm $out3,$out3,$out3,$inpperm
|
|
stvx_u $out2,$x20,$out
|
|
vxor $out2,$in2,$rndkey0
|
|
le?vperm $out4,$out4,$out4,$inpperm
|
|
stvx_u $out3,$x30,$out
|
|
vxor $out3,$in3,$rndkey0
|
|
le?vperm $out5,$out5,$out5,$inpperm
|
|
stvx_u $out4,$x40,$out
|
|
vxor $out4,$in4,$rndkey0
|
|
le?vperm $out6,$out6,$out6,$inpperm
|
|
stvx_u $out5,$x50,$out
|
|
vxor $out5,$in5,$rndkey0
|
|
le?vperm $out7,$out7,$out7,$inpperm
|
|
stvx_u $out6,$x60,$out
|
|
vxor $out6,$in6,$rndkey0
|
|
stvx_u $out7,$x70,$out
|
|
addi $out,$out,0x80
|
|
vxor $out7,$in7,$rndkey0
|
|
|
|
mtctr $rounds
|
|
beq Loop_cbc_dec8x # did $len-=128 borrow?
|
|
|
|
addic. $len,$len,128
|
|
beq Lcbc_dec8x_done
|
|
nop
|
|
nop
|
|
|
|
Loop_cbc_dec8x_tail: # up to 7 "words" tail...
|
|
vncipher $out1,$out1,v24
|
|
vncipher $out2,$out2,v24
|
|
vncipher $out3,$out3,v24
|
|
vncipher $out4,$out4,v24
|
|
vncipher $out5,$out5,v24
|
|
vncipher $out6,$out6,v24
|
|
vncipher $out7,$out7,v24
|
|
lvx v24,$x20,$key_ # round[3]
|
|
addi $key_,$key_,0x20
|
|
|
|
vncipher $out1,$out1,v25
|
|
vncipher $out2,$out2,v25
|
|
vncipher $out3,$out3,v25
|
|
vncipher $out4,$out4,v25
|
|
vncipher $out5,$out5,v25
|
|
vncipher $out6,$out6,v25
|
|
vncipher $out7,$out7,v25
|
|
lvx v25,$x10,$key_ # round[4]
|
|
bdnz Loop_cbc_dec8x_tail
|
|
|
|
vncipher $out1,$out1,v24
|
|
vncipher $out2,$out2,v24
|
|
vncipher $out3,$out3,v24
|
|
vncipher $out4,$out4,v24
|
|
vncipher $out5,$out5,v24
|
|
vncipher $out6,$out6,v24
|
|
vncipher $out7,$out7,v24
|
|
|
|
vncipher $out1,$out1,v25
|
|
vncipher $out2,$out2,v25
|
|
vncipher $out3,$out3,v25
|
|
vncipher $out4,$out4,v25
|
|
vncipher $out5,$out5,v25
|
|
vncipher $out6,$out6,v25
|
|
vncipher $out7,$out7,v25
|
|
|
|
vncipher $out1,$out1,v26
|
|
vncipher $out2,$out2,v26
|
|
vncipher $out3,$out3,v26
|
|
vncipher $out4,$out4,v26
|
|
vncipher $out5,$out5,v26
|
|
vncipher $out6,$out6,v26
|
|
vncipher $out7,$out7,v26
|
|
|
|
vncipher $out1,$out1,v27
|
|
vncipher $out2,$out2,v27
|
|
vncipher $out3,$out3,v27
|
|
vncipher $out4,$out4,v27
|
|
vncipher $out5,$out5,v27
|
|
vncipher $out6,$out6,v27
|
|
vncipher $out7,$out7,v27
|
|
|
|
vncipher $out1,$out1,v28
|
|
vncipher $out2,$out2,v28
|
|
vncipher $out3,$out3,v28
|
|
vncipher $out4,$out4,v28
|
|
vncipher $out5,$out5,v28
|
|
vncipher $out6,$out6,v28
|
|
vncipher $out7,$out7,v28
|
|
|
|
vncipher $out1,$out1,v29
|
|
vncipher $out2,$out2,v29
|
|
vncipher $out3,$out3,v29
|
|
vncipher $out4,$out4,v29
|
|
vncipher $out5,$out5,v29
|
|
vncipher $out6,$out6,v29
|
|
vncipher $out7,$out7,v29
|
|
|
|
vncipher $out1,$out1,v30
|
|
vxor $ivec,$ivec,v31 # last round key
|
|
vncipher $out2,$out2,v30
|
|
vxor $in1,$in1,v31
|
|
vncipher $out3,$out3,v30
|
|
vxor $in2,$in2,v31
|
|
vncipher $out4,$out4,v30
|
|
vxor $in3,$in3,v31
|
|
vncipher $out5,$out5,v30
|
|
vxor $in4,$in4,v31
|
|
vncipher $out6,$out6,v30
|
|
vxor $in5,$in5,v31
|
|
vncipher $out7,$out7,v30
|
|
vxor $in6,$in6,v31
|
|
|
|
cmplwi $len,32 # switch($len)
|
|
blt Lcbc_dec8x_one
|
|
nop
|
|
beq Lcbc_dec8x_two
|
|
cmplwi $len,64
|
|
blt Lcbc_dec8x_three
|
|
nop
|
|
beq Lcbc_dec8x_four
|
|
cmplwi $len,96
|
|
blt Lcbc_dec8x_five
|
|
nop
|
|
beq Lcbc_dec8x_six
|
|
|
|
Lcbc_dec8x_seven:
|
|
vncipherlast $out1,$out1,$ivec
|
|
vncipherlast $out2,$out2,$in1
|
|
vncipherlast $out3,$out3,$in2
|
|
vncipherlast $out4,$out4,$in3
|
|
vncipherlast $out5,$out5,$in4
|
|
vncipherlast $out6,$out6,$in5
|
|
vncipherlast $out7,$out7,$in6
|
|
vmr $ivec,$in7
|
|
|
|
le?vperm $out1,$out1,$out1,$inpperm
|
|
le?vperm $out2,$out2,$out2,$inpperm
|
|
stvx_u $out1,$x00,$out
|
|
le?vperm $out3,$out3,$out3,$inpperm
|
|
stvx_u $out2,$x10,$out
|
|
le?vperm $out4,$out4,$out4,$inpperm
|
|
stvx_u $out3,$x20,$out
|
|
le?vperm $out5,$out5,$out5,$inpperm
|
|
stvx_u $out4,$x30,$out
|
|
le?vperm $out6,$out6,$out6,$inpperm
|
|
stvx_u $out5,$x40,$out
|
|
le?vperm $out7,$out7,$out7,$inpperm
|
|
stvx_u $out6,$x50,$out
|
|
stvx_u $out7,$x60,$out
|
|
addi $out,$out,0x70
|
|
b Lcbc_dec8x_done
|
|
|
|
.align 5
|
|
Lcbc_dec8x_six:
|
|
vncipherlast $out2,$out2,$ivec
|
|
vncipherlast $out3,$out3,$in2
|
|
vncipherlast $out4,$out4,$in3
|
|
vncipherlast $out5,$out5,$in4
|
|
vncipherlast $out6,$out6,$in5
|
|
vncipherlast $out7,$out7,$in6
|
|
vmr $ivec,$in7
|
|
|
|
le?vperm $out2,$out2,$out2,$inpperm
|
|
le?vperm $out3,$out3,$out3,$inpperm
|
|
stvx_u $out2,$x00,$out
|
|
le?vperm $out4,$out4,$out4,$inpperm
|
|
stvx_u $out3,$x10,$out
|
|
le?vperm $out5,$out5,$out5,$inpperm
|
|
stvx_u $out4,$x20,$out
|
|
le?vperm $out6,$out6,$out6,$inpperm
|
|
stvx_u $out5,$x30,$out
|
|
le?vperm $out7,$out7,$out7,$inpperm
|
|
stvx_u $out6,$x40,$out
|
|
stvx_u $out7,$x50,$out
|
|
addi $out,$out,0x60
|
|
b Lcbc_dec8x_done
|
|
|
|
.align 5
|
|
Lcbc_dec8x_five:
|
|
vncipherlast $out3,$out3,$ivec
|
|
vncipherlast $out4,$out4,$in3
|
|
vncipherlast $out5,$out5,$in4
|
|
vncipherlast $out6,$out6,$in5
|
|
vncipherlast $out7,$out7,$in6
|
|
vmr $ivec,$in7
|
|
|
|
le?vperm $out3,$out3,$out3,$inpperm
|
|
le?vperm $out4,$out4,$out4,$inpperm
|
|
stvx_u $out3,$x00,$out
|
|
le?vperm $out5,$out5,$out5,$inpperm
|
|
stvx_u $out4,$x10,$out
|
|
le?vperm $out6,$out6,$out6,$inpperm
|
|
stvx_u $out5,$x20,$out
|
|
le?vperm $out7,$out7,$out7,$inpperm
|
|
stvx_u $out6,$x30,$out
|
|
stvx_u $out7,$x40,$out
|
|
addi $out,$out,0x50
|
|
b Lcbc_dec8x_done
|
|
|
|
.align 5
|
|
Lcbc_dec8x_four:
|
|
vncipherlast $out4,$out4,$ivec
|
|
vncipherlast $out5,$out5,$in4
|
|
vncipherlast $out6,$out6,$in5
|
|
vncipherlast $out7,$out7,$in6
|
|
vmr $ivec,$in7
|
|
|
|
le?vperm $out4,$out4,$out4,$inpperm
|
|
le?vperm $out5,$out5,$out5,$inpperm
|
|
stvx_u $out4,$x00,$out
|
|
le?vperm $out6,$out6,$out6,$inpperm
|
|
stvx_u $out5,$x10,$out
|
|
le?vperm $out7,$out7,$out7,$inpperm
|
|
stvx_u $out6,$x20,$out
|
|
stvx_u $out7,$x30,$out
|
|
addi $out,$out,0x40
|
|
b Lcbc_dec8x_done
|
|
|
|
.align 5
|
|
Lcbc_dec8x_three:
|
|
vncipherlast $out5,$out5,$ivec
|
|
vncipherlast $out6,$out6,$in5
|
|
vncipherlast $out7,$out7,$in6
|
|
vmr $ivec,$in7
|
|
|
|
le?vperm $out5,$out5,$out5,$inpperm
|
|
le?vperm $out6,$out6,$out6,$inpperm
|
|
stvx_u $out5,$x00,$out
|
|
le?vperm $out7,$out7,$out7,$inpperm
|
|
stvx_u $out6,$x10,$out
|
|
stvx_u $out7,$x20,$out
|
|
addi $out,$out,0x30
|
|
b Lcbc_dec8x_done
|
|
|
|
.align 5
|
|
Lcbc_dec8x_two:
|
|
vncipherlast $out6,$out6,$ivec
|
|
vncipherlast $out7,$out7,$in6
|
|
vmr $ivec,$in7
|
|
|
|
le?vperm $out6,$out6,$out6,$inpperm
|
|
le?vperm $out7,$out7,$out7,$inpperm
|
|
stvx_u $out6,$x00,$out
|
|
stvx_u $out7,$x10,$out
|
|
addi $out,$out,0x20
|
|
b Lcbc_dec8x_done
|
|
|
|
.align 5
|
|
Lcbc_dec8x_one:
|
|
vncipherlast $out7,$out7,$ivec
|
|
vmr $ivec,$in7
|
|
|
|
le?vperm $out7,$out7,$out7,$inpperm
|
|
stvx_u $out7,0,$out
|
|
addi $out,$out,0x10
|
|
|
|
Lcbc_dec8x_done:
|
|
le?vperm $ivec,$ivec,$ivec,$inpperm
|
|
stvx_u $ivec,0,$ivp # write [unaligned] iv
|
|
|
|
li r10,`$FRAME+15`
|
|
li r11,`$FRAME+31`
|
|
stvx $inpperm,r10,$sp # wipe copies of round keys
|
|
addi r10,r10,32
|
|
stvx $inpperm,r11,$sp
|
|
addi r11,r11,32
|
|
stvx $inpperm,r10,$sp
|
|
addi r10,r10,32
|
|
stvx $inpperm,r11,$sp
|
|
addi r11,r11,32
|
|
stvx $inpperm,r10,$sp
|
|
addi r10,r10,32
|
|
stvx $inpperm,r11,$sp
|
|
addi r11,r11,32
|
|
stvx $inpperm,r10,$sp
|
|
addi r10,r10,32
|
|
stvx $inpperm,r11,$sp
|
|
addi r11,r11,32
|
|
|
|
mtspr 256,$vrsave
|
|
lvx v20,r10,$sp # ABI says so
|
|
addi r10,r10,32
|
|
lvx v21,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v22,r10,$sp
|
|
addi r10,r10,32
|
|
lvx v23,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v24,r10,$sp
|
|
addi r10,r10,32
|
|
lvx v25,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v26,r10,$sp
|
|
addi r10,r10,32
|
|
lvx v27,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v28,r10,$sp
|
|
addi r10,r10,32
|
|
lvx v29,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v30,r10,$sp
|
|
lvx v31,r11,$sp
|
|
$POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
$POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
$POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
$POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
$POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
$POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
|
|
blr
|
|
.long 0
|
|
.byte 0,12,0x04,0,0x80,6,6,0
|
|
.long 0
|
|
.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
|
|
___
|
|
}} }}}
|
|
|
|
#########################################################################
|
|
{{{ # CTR procedure[s] #
|
|
my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
|
|
my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
|
|
my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
|
|
map("v$_",(4..11));
|
|
my $dat=$tmp;
|
|
|
|
$code.=<<___;
|
|
.globl .${prefix}_ctr32_encrypt_blocks
|
|
.align 5
|
|
.${prefix}_ctr32_encrypt_blocks:
|
|
${UCMP}i $len,1
|
|
bltlr-
|
|
|
|
lis r0,0xfff0
|
|
mfspr $vrsave,256
|
|
mtspr 256,r0
|
|
|
|
li $idx,15
|
|
vxor $rndkey0,$rndkey0,$rndkey0
|
|
le?vspltisb $tmp,0x0f
|
|
|
|
lvx $ivec,0,$ivp # load [unaligned] iv
|
|
lvsl $inpperm,0,$ivp
|
|
lvx $inptail,$idx,$ivp
|
|
vspltisb $one,1
|
|
le?vxor $inpperm,$inpperm,$tmp
|
|
vperm $ivec,$ivec,$inptail,$inpperm
|
|
vsldoi $one,$rndkey0,$one,1
|
|
|
|
neg r11,$inp
|
|
?lvsl $keyperm,0,$key # prepare for unaligned key
|
|
lwz $rounds,240($key)
|
|
|
|
lvsr $inpperm,0,r11 # prepare for unaligned load
|
|
lvx $inptail,0,$inp
|
|
addi $inp,$inp,15 # 15 is not typo
|
|
le?vxor $inpperm,$inpperm,$tmp
|
|
|
|
srwi $rounds,$rounds,1
|
|
li $idx,16
|
|
subi $rounds,$rounds,1
|
|
|
|
${UCMP}i $len,8
|
|
bge _aesp8_ctr32_encrypt8x
|
|
|
|
?lvsr $outperm,0,$out # prepare for unaligned store
|
|
vspltisb $outmask,-1
|
|
lvx $outhead,0,$out
|
|
?vperm $outmask,$rndkey0,$outmask,$outperm
|
|
le?vxor $outperm,$outperm,$tmp
|
|
|
|
lvx $rndkey0,0,$key
|
|
mtctr $rounds
|
|
lvx $rndkey1,$idx,$key
|
|
addi $idx,$idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vxor $inout,$ivec,$rndkey0
|
|
lvx $rndkey0,$idx,$key
|
|
addi $idx,$idx,16
|
|
b Loop_ctr32_enc
|
|
|
|
.align 5
|
|
Loop_ctr32_enc:
|
|
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
vcipher $inout,$inout,$rndkey1
|
|
lvx $rndkey1,$idx,$key
|
|
addi $idx,$idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vcipher $inout,$inout,$rndkey0
|
|
lvx $rndkey0,$idx,$key
|
|
addi $idx,$idx,16
|
|
bdnz Loop_ctr32_enc
|
|
|
|
vadduwm $ivec,$ivec,$one
|
|
vmr $dat,$inptail
|
|
lvx $inptail,0,$inp
|
|
addi $inp,$inp,16
|
|
subic. $len,$len,1 # blocks--
|
|
|
|
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
vcipher $inout,$inout,$rndkey1
|
|
lvx $rndkey1,$idx,$key
|
|
vperm $dat,$dat,$inptail,$inpperm
|
|
li $idx,16
|
|
?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
|
|
lvx $rndkey0,0,$key
|
|
vxor $dat,$dat,$rndkey1 # last round key
|
|
vcipherlast $inout,$inout,$dat
|
|
|
|
lvx $rndkey1,$idx,$key
|
|
addi $idx,$idx,16
|
|
vperm $inout,$inout,$inout,$outperm
|
|
vsel $dat,$outhead,$inout,$outmask
|
|
mtctr $rounds
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vmr $outhead,$inout
|
|
vxor $inout,$ivec,$rndkey0
|
|
lvx $rndkey0,$idx,$key
|
|
addi $idx,$idx,16
|
|
stvx $dat,0,$out
|
|
addi $out,$out,16
|
|
bne Loop_ctr32_enc
|
|
|
|
addi $out,$out,-1
|
|
lvx $inout,0,$out # redundant in aligned case
|
|
vsel $inout,$outhead,$inout,$outmask
|
|
stvx $inout,0,$out
|
|
|
|
mtspr 256,$vrsave
|
|
blr
|
|
.long 0
|
|
.byte 0,12,0x14,0,0,0,6,0
|
|
.long 0
|
|
___
|
|
#########################################################################
|
|
{{ # Optimized CTR procedure #
|
|
my $key_="r11";
|
|
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
|
|
$x00=0 if ($flavour =~ /osx/);
|
|
my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
|
|
my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
|
|
my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
|
|
# v26-v31 last 6 round keys
|
|
my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
|
|
my ($two,$three,$four)=($outhead,$outperm,$outmask);
|
|
|
|
$code.=<<___;
|
|
.align 5
|
|
_aesp8_ctr32_encrypt8x:
|
|
$STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
|
|
li r10,`$FRAME+8*16+15`
|
|
li r11,`$FRAME+8*16+31`
|
|
stvx v20,r10,$sp # ABI says so
|
|
addi r10,r10,32
|
|
stvx v21,r11,$sp
|
|
addi r11,r11,32
|
|
stvx v22,r10,$sp
|
|
addi r10,r10,32
|
|
stvx v23,r11,$sp
|
|
addi r11,r11,32
|
|
stvx v24,r10,$sp
|
|
addi r10,r10,32
|
|
stvx v25,r11,$sp
|
|
addi r11,r11,32
|
|
stvx v26,r10,$sp
|
|
addi r10,r10,32
|
|
stvx v27,r11,$sp
|
|
addi r11,r11,32
|
|
stvx v28,r10,$sp
|
|
addi r10,r10,32
|
|
stvx v29,r11,$sp
|
|
addi r11,r11,32
|
|
stvx v30,r10,$sp
|
|
stvx v31,r11,$sp
|
|
li r0,-1
|
|
stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
|
|
li $x10,0x10
|
|
$PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
li $x20,0x20
|
|
$PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
li $x30,0x30
|
|
$PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
li $x40,0x40
|
|
$PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
li $x50,0x50
|
|
$PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
li $x60,0x60
|
|
$PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
li $x70,0x70
|
|
mtspr 256,r0
|
|
|
|
subi $rounds,$rounds,3 # -4 in total
|
|
|
|
lvx $rndkey0,$x00,$key # load key schedule
|
|
lvx v30,$x10,$key
|
|
addi $key,$key,0x20
|
|
lvx v31,$x00,$key
|
|
?vperm $rndkey0,$rndkey0,v30,$keyperm
|
|
addi $key_,$sp,`$FRAME+15`
|
|
mtctr $rounds
|
|
|
|
Load_ctr32_enc_key:
|
|
?vperm v24,v30,v31,$keyperm
|
|
lvx v30,$x10,$key
|
|
addi $key,$key,0x20
|
|
stvx v24,$x00,$key_ # off-load round[1]
|
|
?vperm v25,v31,v30,$keyperm
|
|
lvx v31,$x00,$key
|
|
stvx v25,$x10,$key_ # off-load round[2]
|
|
addi $key_,$key_,0x20
|
|
bdnz Load_ctr32_enc_key
|
|
|
|
lvx v26,$x10,$key
|
|
?vperm v24,v30,v31,$keyperm
|
|
lvx v27,$x20,$key
|
|
stvx v24,$x00,$key_ # off-load round[3]
|
|
?vperm v25,v31,v26,$keyperm
|
|
lvx v28,$x30,$key
|
|
stvx v25,$x10,$key_ # off-load round[4]
|
|
addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
?vperm v26,v26,v27,$keyperm
|
|
lvx v29,$x40,$key
|
|
?vperm v27,v27,v28,$keyperm
|
|
lvx v30,$x50,$key
|
|
?vperm v28,v28,v29,$keyperm
|
|
lvx v31,$x60,$key
|
|
?vperm v29,v29,v30,$keyperm
|
|
lvx $out0,$x70,$key # borrow $out0
|
|
?vperm v30,v30,v31,$keyperm
|
|
lvx v24,$x00,$key_ # pre-load round[1]
|
|
?vperm v31,v31,$out0,$keyperm
|
|
lvx v25,$x10,$key_ # pre-load round[2]
|
|
|
|
vadduwm $two,$one,$one
|
|
subi $inp,$inp,15 # undo "caller"
|
|
$SHL $len,$len,4
|
|
|
|
vadduwm $out1,$ivec,$one # counter values ...
|
|
vadduwm $out2,$ivec,$two
|
|
vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
|
|
le?li $idx,8
|
|
vadduwm $out3,$out1,$two
|
|
vxor $out1,$out1,$rndkey0
|
|
le?lvsl $inpperm,0,$idx
|
|
vadduwm $out4,$out2,$two
|
|
vxor $out2,$out2,$rndkey0
|
|
le?vspltisb $tmp,0x0f
|
|
vadduwm $out5,$out3,$two
|
|
vxor $out3,$out3,$rndkey0
|
|
le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
|
|
vadduwm $out6,$out4,$two
|
|
vxor $out4,$out4,$rndkey0
|
|
vadduwm $out7,$out5,$two
|
|
vxor $out5,$out5,$rndkey0
|
|
vadduwm $ivec,$out6,$two # next counter value
|
|
vxor $out6,$out6,$rndkey0
|
|
vxor $out7,$out7,$rndkey0
|
|
|
|
mtctr $rounds
|
|
b Loop_ctr32_enc8x
|
|
.align 5
|
|
Loop_ctr32_enc8x:
|
|
vcipher $out0,$out0,v24
|
|
vcipher $out1,$out1,v24
|
|
vcipher $out2,$out2,v24
|
|
vcipher $out3,$out3,v24
|
|
vcipher $out4,$out4,v24
|
|
vcipher $out5,$out5,v24
|
|
vcipher $out6,$out6,v24
|
|
vcipher $out7,$out7,v24
|
|
Loop_ctr32_enc8x_middle:
|
|
lvx v24,$x20,$key_ # round[3]
|
|
addi $key_,$key_,0x20
|
|
|
|
vcipher $out0,$out0,v25
|
|
vcipher $out1,$out1,v25
|
|
vcipher $out2,$out2,v25
|
|
vcipher $out3,$out3,v25
|
|
vcipher $out4,$out4,v25
|
|
vcipher $out5,$out5,v25
|
|
vcipher $out6,$out6,v25
|
|
vcipher $out7,$out7,v25
|
|
lvx v25,$x10,$key_ # round[4]
|
|
bdnz Loop_ctr32_enc8x
|
|
|
|
subic r11,$len,256 # $len-256, borrow $key_
|
|
vcipher $out0,$out0,v24
|
|
vcipher $out1,$out1,v24
|
|
vcipher $out2,$out2,v24
|
|
vcipher $out3,$out3,v24
|
|
vcipher $out4,$out4,v24
|
|
vcipher $out5,$out5,v24
|
|
vcipher $out6,$out6,v24
|
|
vcipher $out7,$out7,v24
|
|
|
|
subfe r0,r0,r0 # borrow?-1:0
|
|
vcipher $out0,$out0,v25
|
|
vcipher $out1,$out1,v25
|
|
vcipher $out2,$out2,v25
|
|
vcipher $out3,$out3,v25
|
|
vcipher $out4,$out4,v25
|
|
vcipher $out5,$out5,v25
|
|
vcipher $out6,$out6,v25
|
|
vcipher $out7,$out7,v25
|
|
|
|
and r0,r0,r11
|
|
addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
vcipher $out0,$out0,v26
|
|
vcipher $out1,$out1,v26
|
|
vcipher $out2,$out2,v26
|
|
vcipher $out3,$out3,v26
|
|
vcipher $out4,$out4,v26
|
|
vcipher $out5,$out5,v26
|
|
vcipher $out6,$out6,v26
|
|
vcipher $out7,$out7,v26
|
|
lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
|
|
subic $len,$len,129 # $len-=129
|
|
vcipher $out0,$out0,v27
|
|
addi $len,$len,1 # $len-=128 really
|
|
vcipher $out1,$out1,v27
|
|
vcipher $out2,$out2,v27
|
|
vcipher $out3,$out3,v27
|
|
vcipher $out4,$out4,v27
|
|
vcipher $out5,$out5,v27
|
|
vcipher $out6,$out6,v27
|
|
vcipher $out7,$out7,v27
|
|
lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
|
|
vcipher $out0,$out0,v28
|
|
lvx_u $in0,$x00,$inp # load input
|
|
vcipher $out1,$out1,v28
|
|
lvx_u $in1,$x10,$inp
|
|
vcipher $out2,$out2,v28
|
|
lvx_u $in2,$x20,$inp
|
|
vcipher $out3,$out3,v28
|
|
lvx_u $in3,$x30,$inp
|
|
vcipher $out4,$out4,v28
|
|
lvx_u $in4,$x40,$inp
|
|
vcipher $out5,$out5,v28
|
|
lvx_u $in5,$x50,$inp
|
|
vcipher $out6,$out6,v28
|
|
lvx_u $in6,$x60,$inp
|
|
vcipher $out7,$out7,v28
|
|
lvx_u $in7,$x70,$inp
|
|
addi $inp,$inp,0x80
|
|
|
|
vcipher $out0,$out0,v29
|
|
le?vperm $in0,$in0,$in0,$inpperm
|
|
vcipher $out1,$out1,v29
|
|
le?vperm $in1,$in1,$in1,$inpperm
|
|
vcipher $out2,$out2,v29
|
|
le?vperm $in2,$in2,$in2,$inpperm
|
|
vcipher $out3,$out3,v29
|
|
le?vperm $in3,$in3,$in3,$inpperm
|
|
vcipher $out4,$out4,v29
|
|
le?vperm $in4,$in4,$in4,$inpperm
|
|
vcipher $out5,$out5,v29
|
|
le?vperm $in5,$in5,$in5,$inpperm
|
|
vcipher $out6,$out6,v29
|
|
le?vperm $in6,$in6,$in6,$inpperm
|
|
vcipher $out7,$out7,v29
|
|
le?vperm $in7,$in7,$in7,$inpperm
|
|
|
|
add $inp,$inp,r0 # $inp is adjusted in such
|
|
# way that at exit from the
|
|
# loop inX-in7 are loaded
|
|
# with last "words"
|
|
subfe. r0,r0,r0 # borrow?-1:0
|
|
vcipher $out0,$out0,v30
|
|
vxor $in0,$in0,v31 # xor with last round key
|
|
vcipher $out1,$out1,v30
|
|
vxor $in1,$in1,v31
|
|
vcipher $out2,$out2,v30
|
|
vxor $in2,$in2,v31
|
|
vcipher $out3,$out3,v30
|
|
vxor $in3,$in3,v31
|
|
vcipher $out4,$out4,v30
|
|
vxor $in4,$in4,v31
|
|
vcipher $out5,$out5,v30
|
|
vxor $in5,$in5,v31
|
|
vcipher $out6,$out6,v30
|
|
vxor $in6,$in6,v31
|
|
vcipher $out7,$out7,v30
|
|
vxor $in7,$in7,v31
|
|
|
|
bne Lctr32_enc8x_break # did $len-129 borrow?
|
|
|
|
vcipherlast $in0,$out0,$in0
|
|
vcipherlast $in1,$out1,$in1
|
|
vadduwm $out1,$ivec,$one # counter values ...
|
|
vcipherlast $in2,$out2,$in2
|
|
vadduwm $out2,$ivec,$two
|
|
vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
|
|
vcipherlast $in3,$out3,$in3
|
|
vadduwm $out3,$out1,$two
|
|
vxor $out1,$out1,$rndkey0
|
|
vcipherlast $in4,$out4,$in4
|
|
vadduwm $out4,$out2,$two
|
|
vxor $out2,$out2,$rndkey0
|
|
vcipherlast $in5,$out5,$in5
|
|
vadduwm $out5,$out3,$two
|
|
vxor $out3,$out3,$rndkey0
|
|
vcipherlast $in6,$out6,$in6
|
|
vadduwm $out6,$out4,$two
|
|
vxor $out4,$out4,$rndkey0
|
|
vcipherlast $in7,$out7,$in7
|
|
vadduwm $out7,$out5,$two
|
|
vxor $out5,$out5,$rndkey0
|
|
le?vperm $in0,$in0,$in0,$inpperm
|
|
vadduwm $ivec,$out6,$two # next counter value
|
|
vxor $out6,$out6,$rndkey0
|
|
le?vperm $in1,$in1,$in1,$inpperm
|
|
vxor $out7,$out7,$rndkey0
|
|
mtctr $rounds
|
|
|
|
vcipher $out0,$out0,v24
|
|
stvx_u $in0,$x00,$out
|
|
le?vperm $in2,$in2,$in2,$inpperm
|
|
vcipher $out1,$out1,v24
|
|
stvx_u $in1,$x10,$out
|
|
le?vperm $in3,$in3,$in3,$inpperm
|
|
vcipher $out2,$out2,v24
|
|
stvx_u $in2,$x20,$out
|
|
le?vperm $in4,$in4,$in4,$inpperm
|
|
vcipher $out3,$out3,v24
|
|
stvx_u $in3,$x30,$out
|
|
le?vperm $in5,$in5,$in5,$inpperm
|
|
vcipher $out4,$out4,v24
|
|
stvx_u $in4,$x40,$out
|
|
le?vperm $in6,$in6,$in6,$inpperm
|
|
vcipher $out5,$out5,v24
|
|
stvx_u $in5,$x50,$out
|
|
le?vperm $in7,$in7,$in7,$inpperm
|
|
vcipher $out6,$out6,v24
|
|
stvx_u $in6,$x60,$out
|
|
vcipher $out7,$out7,v24
|
|
stvx_u $in7,$x70,$out
|
|
addi $out,$out,0x80
|
|
|
|
b Loop_ctr32_enc8x_middle
|
|
|
|
.align 5
|
|
Lctr32_enc8x_break:
|
|
cmpwi $len,-0x60
|
|
blt Lctr32_enc8x_one
|
|
nop
|
|
beq Lctr32_enc8x_two
|
|
cmpwi $len,-0x40
|
|
blt Lctr32_enc8x_three
|
|
nop
|
|
beq Lctr32_enc8x_four
|
|
cmpwi $len,-0x20
|
|
blt Lctr32_enc8x_five
|
|
nop
|
|
beq Lctr32_enc8x_six
|
|
cmpwi $len,0x00
|
|
blt Lctr32_enc8x_seven
|
|
|
|
Lctr32_enc8x_eight:
|
|
vcipherlast $out0,$out0,$in0
|
|
vcipherlast $out1,$out1,$in1
|
|
vcipherlast $out2,$out2,$in2
|
|
vcipherlast $out3,$out3,$in3
|
|
vcipherlast $out4,$out4,$in4
|
|
vcipherlast $out5,$out5,$in5
|
|
vcipherlast $out6,$out6,$in6
|
|
vcipherlast $out7,$out7,$in7
|
|
|
|
le?vperm $out0,$out0,$out0,$inpperm
|
|
le?vperm $out1,$out1,$out1,$inpperm
|
|
stvx_u $out0,$x00,$out
|
|
le?vperm $out2,$out2,$out2,$inpperm
|
|
stvx_u $out1,$x10,$out
|
|
le?vperm $out3,$out3,$out3,$inpperm
|
|
stvx_u $out2,$x20,$out
|
|
le?vperm $out4,$out4,$out4,$inpperm
|
|
stvx_u $out3,$x30,$out
|
|
le?vperm $out5,$out5,$out5,$inpperm
|
|
stvx_u $out4,$x40,$out
|
|
le?vperm $out6,$out6,$out6,$inpperm
|
|
stvx_u $out5,$x50,$out
|
|
le?vperm $out7,$out7,$out7,$inpperm
|
|
stvx_u $out6,$x60,$out
|
|
stvx_u $out7,$x70,$out
|
|
addi $out,$out,0x80
|
|
b Lctr32_enc8x_done
|
|
|
|
.align 5
|
|
Lctr32_enc8x_seven:
|
|
vcipherlast $out0,$out0,$in1
|
|
vcipherlast $out1,$out1,$in2
|
|
vcipherlast $out2,$out2,$in3
|
|
vcipherlast $out3,$out3,$in4
|
|
vcipherlast $out4,$out4,$in5
|
|
vcipherlast $out5,$out5,$in6
|
|
vcipherlast $out6,$out6,$in7
|
|
|
|
le?vperm $out0,$out0,$out0,$inpperm
|
|
le?vperm $out1,$out1,$out1,$inpperm
|
|
stvx_u $out0,$x00,$out
|
|
le?vperm $out2,$out2,$out2,$inpperm
|
|
stvx_u $out1,$x10,$out
|
|
le?vperm $out3,$out3,$out3,$inpperm
|
|
stvx_u $out2,$x20,$out
|
|
le?vperm $out4,$out4,$out4,$inpperm
|
|
stvx_u $out3,$x30,$out
|
|
le?vperm $out5,$out5,$out5,$inpperm
|
|
stvx_u $out4,$x40,$out
|
|
le?vperm $out6,$out6,$out6,$inpperm
|
|
stvx_u $out5,$x50,$out
|
|
stvx_u $out6,$x60,$out
|
|
addi $out,$out,0x70
|
|
b Lctr32_enc8x_done
|
|
|
|
.align 5
|
|
Lctr32_enc8x_six:
|
|
vcipherlast $out0,$out0,$in2
|
|
vcipherlast $out1,$out1,$in3
|
|
vcipherlast $out2,$out2,$in4
|
|
vcipherlast $out3,$out3,$in5
|
|
vcipherlast $out4,$out4,$in6
|
|
vcipherlast $out5,$out5,$in7
|
|
|
|
le?vperm $out0,$out0,$out0,$inpperm
|
|
le?vperm $out1,$out1,$out1,$inpperm
|
|
stvx_u $out0,$x00,$out
|
|
le?vperm $out2,$out2,$out2,$inpperm
|
|
stvx_u $out1,$x10,$out
|
|
le?vperm $out3,$out3,$out3,$inpperm
|
|
stvx_u $out2,$x20,$out
|
|
le?vperm $out4,$out4,$out4,$inpperm
|
|
stvx_u $out3,$x30,$out
|
|
le?vperm $out5,$out5,$out5,$inpperm
|
|
stvx_u $out4,$x40,$out
|
|
stvx_u $out5,$x50,$out
|
|
addi $out,$out,0x60
|
|
b Lctr32_enc8x_done
|
|
|
|
.align 5
|
|
Lctr32_enc8x_five:
|
|
vcipherlast $out0,$out0,$in3
|
|
vcipherlast $out1,$out1,$in4
|
|
vcipherlast $out2,$out2,$in5
|
|
vcipherlast $out3,$out3,$in6
|
|
vcipherlast $out4,$out4,$in7
|
|
|
|
le?vperm $out0,$out0,$out0,$inpperm
|
|
le?vperm $out1,$out1,$out1,$inpperm
|
|
stvx_u $out0,$x00,$out
|
|
le?vperm $out2,$out2,$out2,$inpperm
|
|
stvx_u $out1,$x10,$out
|
|
le?vperm $out3,$out3,$out3,$inpperm
|
|
stvx_u $out2,$x20,$out
|
|
le?vperm $out4,$out4,$out4,$inpperm
|
|
stvx_u $out3,$x30,$out
|
|
stvx_u $out4,$x40,$out
|
|
addi $out,$out,0x50
|
|
b Lctr32_enc8x_done
|
|
|
|
.align 5
|
|
Lctr32_enc8x_four:
|
|
vcipherlast $out0,$out0,$in4
|
|
vcipherlast $out1,$out1,$in5
|
|
vcipherlast $out2,$out2,$in6
|
|
vcipherlast $out3,$out3,$in7
|
|
|
|
le?vperm $out0,$out0,$out0,$inpperm
|
|
le?vperm $out1,$out1,$out1,$inpperm
|
|
stvx_u $out0,$x00,$out
|
|
le?vperm $out2,$out2,$out2,$inpperm
|
|
stvx_u $out1,$x10,$out
|
|
le?vperm $out3,$out3,$out3,$inpperm
|
|
stvx_u $out2,$x20,$out
|
|
stvx_u $out3,$x30,$out
|
|
addi $out,$out,0x40
|
|
b Lctr32_enc8x_done
|
|
|
|
.align 5
|
|
Lctr32_enc8x_three:
|
|
vcipherlast $out0,$out0,$in5
|
|
vcipherlast $out1,$out1,$in6
|
|
vcipherlast $out2,$out2,$in7
|
|
|
|
le?vperm $out0,$out0,$out0,$inpperm
|
|
le?vperm $out1,$out1,$out1,$inpperm
|
|
stvx_u $out0,$x00,$out
|
|
le?vperm $out2,$out2,$out2,$inpperm
|
|
stvx_u $out1,$x10,$out
|
|
stvx_u $out2,$x20,$out
|
|
addi $out,$out,0x30
|
|
b Lcbc_dec8x_done
|
|
|
|
.align 5
|
|
Lctr32_enc8x_two:
|
|
vcipherlast $out0,$out0,$in6
|
|
vcipherlast $out1,$out1,$in7
|
|
|
|
le?vperm $out0,$out0,$out0,$inpperm
|
|
le?vperm $out1,$out1,$out1,$inpperm
|
|
stvx_u $out0,$x00,$out
|
|
stvx_u $out1,$x10,$out
|
|
addi $out,$out,0x20
|
|
b Lcbc_dec8x_done
|
|
|
|
.align 5
|
|
Lctr32_enc8x_one:
|
|
vcipherlast $out0,$out0,$in7
|
|
|
|
le?vperm $out0,$out0,$out0,$inpperm
|
|
stvx_u $out0,0,$out
|
|
addi $out,$out,0x10
|
|
|
|
Lctr32_enc8x_done:
|
|
li r10,`$FRAME+15`
|
|
li r11,`$FRAME+31`
|
|
stvx $inpperm,r10,$sp # wipe copies of round keys
|
|
addi r10,r10,32
|
|
stvx $inpperm,r11,$sp
|
|
addi r11,r11,32
|
|
stvx $inpperm,r10,$sp
|
|
addi r10,r10,32
|
|
stvx $inpperm,r11,$sp
|
|
addi r11,r11,32
|
|
stvx $inpperm,r10,$sp
|
|
addi r10,r10,32
|
|
stvx $inpperm,r11,$sp
|
|
addi r11,r11,32
|
|
stvx $inpperm,r10,$sp
|
|
addi r10,r10,32
|
|
stvx $inpperm,r11,$sp
|
|
addi r11,r11,32
|
|
|
|
mtspr 256,$vrsave
|
|
lvx v20,r10,$sp # ABI says so
|
|
addi r10,r10,32
|
|
lvx v21,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v22,r10,$sp
|
|
addi r10,r10,32
|
|
lvx v23,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v24,r10,$sp
|
|
addi r10,r10,32
|
|
lvx v25,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v26,r10,$sp
|
|
addi r10,r10,32
|
|
lvx v27,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v28,r10,$sp
|
|
addi r10,r10,32
|
|
lvx v29,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v30,r10,$sp
|
|
lvx v31,r11,$sp
|
|
$POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
$POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
$POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
$POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
$POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
$POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
|
|
blr
|
|
.long 0
|
|
.byte 0,12,0x04,0,0x80,6,6,0
|
|
.long 0
|
|
.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
|
|
___
|
|
}} }}}
|
|
|
|
#########################################################################
|
|
{{{ # XTS procedures #
|
|
# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
|
|
# const AES_KEY *key1, const AES_KEY *key2, #
|
|
# [const] unsigned char iv[16]); #
|
|
# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
|
|
# input tweak value is assumed to be encrypted already, and last tweak #
|
|
# value, one suitable for consecutive call on same chunk of data, is #
|
|
# written back to original buffer. In addition, in "tweak chaining" #
|
|
# mode only complete input blocks are processed. #
|
|
|
|
my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
|
|
my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
|
|
my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
|
|
my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
|
|
my $taillen = $key2;
|
|
|
|
($inp,$idx) = ($idx,$inp); # reassign
|
|
|
|
$code.=<<___;
|
|
.globl .${prefix}_xts_encrypt
|
|
.align 5
|
|
.${prefix}_xts_encrypt:
|
|
mr $inp,r3 # reassign
|
|
li r3,-1
|
|
${UCMP}i $len,16
|
|
bltlr-
|
|
|
|
lis r0,0xfff0
|
|
mfspr r12,256 # save vrsave
|
|
li r11,0
|
|
mtspr 256,r0
|
|
|
|
vspltisb $seven,0x07 # 0x070707..07
|
|
le?lvsl $leperm,r11,r11
|
|
le?vspltisb $tmp,0x0f
|
|
le?vxor $leperm,$leperm,$seven
|
|
|
|
li $idx,15
|
|
lvx $tweak,0,$ivp # load [unaligned] iv
|
|
lvsl $inpperm,0,$ivp
|
|
lvx $inptail,$idx,$ivp
|
|
le?vxor $inpperm,$inpperm,$tmp
|
|
vperm $tweak,$tweak,$inptail,$inpperm
|
|
|
|
neg r11,$inp
|
|
lvsr $inpperm,0,r11 # prepare for unaligned load
|
|
lvx $inout,0,$inp
|
|
addi $inp,$inp,15 # 15 is not typo
|
|
le?vxor $inpperm,$inpperm,$tmp
|
|
|
|
${UCMP}i $key2,0 # key2==NULL?
|
|
beq Lxts_enc_no_key2
|
|
|
|
?lvsl $keyperm,0,$key2 # prepare for unaligned key
|
|
lwz $rounds,240($key2)
|
|
srwi $rounds,$rounds,1
|
|
subi $rounds,$rounds,1
|
|
li $idx,16
|
|
|
|
lvx $rndkey0,0,$key2
|
|
lvx $rndkey1,$idx,$key2
|
|
addi $idx,$idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vxor $tweak,$tweak,$rndkey0
|
|
lvx $rndkey0,$idx,$key2
|
|
addi $idx,$idx,16
|
|
mtctr $rounds
|
|
|
|
Ltweak_xts_enc:
|
|
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
vcipher $tweak,$tweak,$rndkey1
|
|
lvx $rndkey1,$idx,$key2
|
|
addi $idx,$idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vcipher $tweak,$tweak,$rndkey0
|
|
lvx $rndkey0,$idx,$key2
|
|
addi $idx,$idx,16
|
|
bdnz Ltweak_xts_enc
|
|
|
|
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
vcipher $tweak,$tweak,$rndkey1
|
|
lvx $rndkey1,$idx,$key2
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vcipherlast $tweak,$tweak,$rndkey0
|
|
|
|
li $ivp,0 # don't chain the tweak
|
|
b Lxts_enc
|
|
|
|
Lxts_enc_no_key2:
|
|
li $idx,-16
|
|
and $len,$len,$idx # in "tweak chaining"
|
|
# mode only complete
|
|
# blocks are processed
|
|
Lxts_enc:
|
|
lvx $inptail,0,$inp
|
|
addi $inp,$inp,16
|
|
|
|
?lvsl $keyperm,0,$key1 # prepare for unaligned key
|
|
lwz $rounds,240($key1)
|
|
srwi $rounds,$rounds,1
|
|
subi $rounds,$rounds,1
|
|
li $idx,16
|
|
|
|
vslb $eighty7,$seven,$seven # 0x808080..80
|
|
vor $eighty7,$eighty7,$seven # 0x878787..87
|
|
vspltisb $tmp,1 # 0x010101..01
|
|
vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
|
|
|
|
${UCMP}i $len,96
|
|
bge _aesp8_xts_encrypt6x
|
|
|
|
andi. $taillen,$len,15
|
|
subic r0,$len,32
|
|
subi $taillen,$taillen,16
|
|
subfe r0,r0,r0
|
|
and r0,r0,$taillen
|
|
add $inp,$inp,r0
|
|
|
|
lvx $rndkey0,0,$key1
|
|
lvx $rndkey1,$idx,$key1
|
|
addi $idx,$idx,16
|
|
vperm $inout,$inout,$inptail,$inpperm
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vxor $inout,$inout,$tweak
|
|
vxor $inout,$inout,$rndkey0
|
|
lvx $rndkey0,$idx,$key1
|
|
addi $idx,$idx,16
|
|
mtctr $rounds
|
|
b Loop_xts_enc
|
|
|
|
.align 5
|
|
Loop_xts_enc:
|
|
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
vcipher $inout,$inout,$rndkey1
|
|
lvx $rndkey1,$idx,$key1
|
|
addi $idx,$idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vcipher $inout,$inout,$rndkey0
|
|
lvx $rndkey0,$idx,$key1
|
|
addi $idx,$idx,16
|
|
bdnz Loop_xts_enc
|
|
|
|
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
vcipher $inout,$inout,$rndkey1
|
|
lvx $rndkey1,$idx,$key1
|
|
li $idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vxor $rndkey0,$rndkey0,$tweak
|
|
vcipherlast $output,$inout,$rndkey0
|
|
|
|
le?vperm $tmp,$output,$output,$leperm
|
|
be?nop
|
|
le?stvx_u $tmp,0,$out
|
|
be?stvx_u $output,0,$out
|
|
addi $out,$out,16
|
|
|
|
subic. $len,$len,16
|
|
beq Lxts_enc_done
|
|
|
|
vmr $inout,$inptail
|
|
lvx $inptail,0,$inp
|
|
addi $inp,$inp,16
|
|
lvx $rndkey0,0,$key1
|
|
lvx $rndkey1,$idx,$key1
|
|
addi $idx,$idx,16
|
|
|
|
subic r0,$len,32
|
|
subfe r0,r0,r0
|
|
and r0,r0,$taillen
|
|
add $inp,$inp,r0
|
|
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
vand $tmp,$tmp,$eighty7
|
|
vxor $tweak,$tweak,$tmp
|
|
|
|
vperm $inout,$inout,$inptail,$inpperm
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vxor $inout,$inout,$tweak
|
|
vxor $output,$output,$rndkey0 # just in case $len<16
|
|
vxor $inout,$inout,$rndkey0
|
|
lvx $rndkey0,$idx,$key1
|
|
addi $idx,$idx,16
|
|
|
|
mtctr $rounds
|
|
${UCMP}i $len,16
|
|
bge Loop_xts_enc
|
|
|
|
vxor $output,$output,$tweak
|
|
lvsr $inpperm,0,$len # $inpperm is no longer needed
|
|
vxor $inptail,$inptail,$inptail # $inptail is no longer needed
|
|
vspltisb $tmp,-1
|
|
vperm $inptail,$inptail,$tmp,$inpperm
|
|
vsel $inout,$inout,$output,$inptail
|
|
|
|
subi r11,$out,17
|
|
subi $out,$out,16
|
|
mtctr $len
|
|
li $len,16
|
|
Loop_xts_enc_steal:
|
|
lbzu r0,1(r11)
|
|
stb r0,16(r11)
|
|
bdnz Loop_xts_enc_steal
|
|
|
|
mtctr $rounds
|
|
b Loop_xts_enc # one more time...
|
|
|
|
Lxts_enc_done:
|
|
${UCMP}i $ivp,0
|
|
beq Lxts_enc_ret
|
|
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
vand $tmp,$tmp,$eighty7
|
|
vxor $tweak,$tweak,$tmp
|
|
|
|
le?vperm $tweak,$tweak,$tweak,$leperm
|
|
stvx_u $tweak,0,$ivp
|
|
|
|
Lxts_enc_ret:
|
|
mtspr 256,r12 # restore vrsave
|
|
li r3,0
|
|
blr
|
|
.long 0
|
|
.byte 0,12,0x04,0,0x80,6,6,0
|
|
.long 0
|
|
.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
|
|
|
|
.globl .${prefix}_xts_decrypt
|
|
.align 5
|
|
.${prefix}_xts_decrypt:
|
|
mr $inp,r3 # reassign
|
|
li r3,-1
|
|
${UCMP}i $len,16
|
|
bltlr-
|
|
|
|
lis r0,0xfff8
|
|
mfspr r12,256 # save vrsave
|
|
li r11,0
|
|
mtspr 256,r0
|
|
|
|
andi. r0,$len,15
|
|
neg r0,r0
|
|
andi. r0,r0,16
|
|
sub $len,$len,r0
|
|
|
|
vspltisb $seven,0x07 # 0x070707..07
|
|
le?lvsl $leperm,r11,r11
|
|
le?vspltisb $tmp,0x0f
|
|
le?vxor $leperm,$leperm,$seven
|
|
|
|
li $idx,15
|
|
lvx $tweak,0,$ivp # load [unaligned] iv
|
|
lvsl $inpperm,0,$ivp
|
|
lvx $inptail,$idx,$ivp
|
|
le?vxor $inpperm,$inpperm,$tmp
|
|
vperm $tweak,$tweak,$inptail,$inpperm
|
|
|
|
neg r11,$inp
|
|
lvsr $inpperm,0,r11 # prepare for unaligned load
|
|
lvx $inout,0,$inp
|
|
addi $inp,$inp,15 # 15 is not typo
|
|
le?vxor $inpperm,$inpperm,$tmp
|
|
|
|
${UCMP}i $key2,0 # key2==NULL?
|
|
beq Lxts_dec_no_key2
|
|
|
|
?lvsl $keyperm,0,$key2 # prepare for unaligned key
|
|
lwz $rounds,240($key2)
|
|
srwi $rounds,$rounds,1
|
|
subi $rounds,$rounds,1
|
|
li $idx,16
|
|
|
|
lvx $rndkey0,0,$key2
|
|
lvx $rndkey1,$idx,$key2
|
|
addi $idx,$idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vxor $tweak,$tweak,$rndkey0
|
|
lvx $rndkey0,$idx,$key2
|
|
addi $idx,$idx,16
|
|
mtctr $rounds
|
|
|
|
Ltweak_xts_dec:
|
|
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
vcipher $tweak,$tweak,$rndkey1
|
|
lvx $rndkey1,$idx,$key2
|
|
addi $idx,$idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vcipher $tweak,$tweak,$rndkey0
|
|
lvx $rndkey0,$idx,$key2
|
|
addi $idx,$idx,16
|
|
bdnz Ltweak_xts_dec
|
|
|
|
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
vcipher $tweak,$tweak,$rndkey1
|
|
lvx $rndkey1,$idx,$key2
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vcipherlast $tweak,$tweak,$rndkey0
|
|
|
|
li $ivp,0 # don't chain the tweak
|
|
b Lxts_dec
|
|
|
|
Lxts_dec_no_key2:
|
|
neg $idx,$len
|
|
andi. $idx,$idx,15
|
|
add $len,$len,$idx # in "tweak chaining"
|
|
# mode only complete
|
|
# blocks are processed
|
|
Lxts_dec:
|
|
lvx $inptail,0,$inp
|
|
addi $inp,$inp,16
|
|
|
|
?lvsl $keyperm,0,$key1 # prepare for unaligned key
|
|
lwz $rounds,240($key1)
|
|
srwi $rounds,$rounds,1
|
|
subi $rounds,$rounds,1
|
|
li $idx,16
|
|
|
|
vslb $eighty7,$seven,$seven # 0x808080..80
|
|
vor $eighty7,$eighty7,$seven # 0x878787..87
|
|
vspltisb $tmp,1 # 0x010101..01
|
|
vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
|
|
|
|
${UCMP}i $len,96
|
|
bge _aesp8_xts_decrypt6x
|
|
|
|
lvx $rndkey0,0,$key1
|
|
lvx $rndkey1,$idx,$key1
|
|
addi $idx,$idx,16
|
|
vperm $inout,$inout,$inptail,$inpperm
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vxor $inout,$inout,$tweak
|
|
vxor $inout,$inout,$rndkey0
|
|
lvx $rndkey0,$idx,$key1
|
|
addi $idx,$idx,16
|
|
mtctr $rounds
|
|
|
|
${UCMP}i $len,16
|
|
blt Ltail_xts_dec
|
|
be?b Loop_xts_dec
|
|
|
|
.align 5
|
|
Loop_xts_dec:
|
|
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
vncipher $inout,$inout,$rndkey1
|
|
lvx $rndkey1,$idx,$key1
|
|
addi $idx,$idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vncipher $inout,$inout,$rndkey0
|
|
lvx $rndkey0,$idx,$key1
|
|
addi $idx,$idx,16
|
|
bdnz Loop_xts_dec
|
|
|
|
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
vncipher $inout,$inout,$rndkey1
|
|
lvx $rndkey1,$idx,$key1
|
|
li $idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vxor $rndkey0,$rndkey0,$tweak
|
|
vncipherlast $output,$inout,$rndkey0
|
|
|
|
le?vperm $tmp,$output,$output,$leperm
|
|
be?nop
|
|
le?stvx_u $tmp,0,$out
|
|
be?stvx_u $output,0,$out
|
|
addi $out,$out,16
|
|
|
|
subic. $len,$len,16
|
|
beq Lxts_dec_done
|
|
|
|
vmr $inout,$inptail
|
|
lvx $inptail,0,$inp
|
|
addi $inp,$inp,16
|
|
lvx $rndkey0,0,$key1
|
|
lvx $rndkey1,$idx,$key1
|
|
addi $idx,$idx,16
|
|
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
vand $tmp,$tmp,$eighty7
|
|
vxor $tweak,$tweak,$tmp
|
|
|
|
vperm $inout,$inout,$inptail,$inpperm
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vxor $inout,$inout,$tweak
|
|
vxor $inout,$inout,$rndkey0
|
|
lvx $rndkey0,$idx,$key1
|
|
addi $idx,$idx,16
|
|
|
|
mtctr $rounds
|
|
${UCMP}i $len,16
|
|
bge Loop_xts_dec
|
|
|
|
Ltail_xts_dec:
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vaddubm $tweak1,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
vand $tmp,$tmp,$eighty7
|
|
vxor $tweak1,$tweak1,$tmp
|
|
|
|
subi $inp,$inp,16
|
|
add $inp,$inp,$len
|
|
|
|
vxor $inout,$inout,$tweak # :-(
|
|
vxor $inout,$inout,$tweak1 # :-)
|
|
|
|
Loop_xts_dec_short:
|
|
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
vncipher $inout,$inout,$rndkey1
|
|
lvx $rndkey1,$idx,$key1
|
|
addi $idx,$idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vncipher $inout,$inout,$rndkey0
|
|
lvx $rndkey0,$idx,$key1
|
|
addi $idx,$idx,16
|
|
bdnz Loop_xts_dec_short
|
|
|
|
?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
|
|
vncipher $inout,$inout,$rndkey1
|
|
lvx $rndkey1,$idx,$key1
|
|
li $idx,16
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
vxor $rndkey0,$rndkey0,$tweak1
|
|
vncipherlast $output,$inout,$rndkey0
|
|
|
|
le?vperm $tmp,$output,$output,$leperm
|
|
be?nop
|
|
le?stvx_u $tmp,0,$out
|
|
be?stvx_u $output,0,$out
|
|
|
|
vmr $inout,$inptail
|
|
lvx $inptail,0,$inp
|
|
#addi $inp,$inp,16
|
|
lvx $rndkey0,0,$key1
|
|
lvx $rndkey1,$idx,$key1
|
|
addi $idx,$idx,16
|
|
vperm $inout,$inout,$inptail,$inpperm
|
|
?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
|
|
|
|
lvsr $inpperm,0,$len # $inpperm is no longer needed
|
|
vxor $inptail,$inptail,$inptail # $inptail is no longer needed
|
|
vspltisb $tmp,-1
|
|
vperm $inptail,$inptail,$tmp,$inpperm
|
|
vsel $inout,$inout,$output,$inptail
|
|
|
|
vxor $rndkey0,$rndkey0,$tweak
|
|
vxor $inout,$inout,$rndkey0
|
|
lvx $rndkey0,$idx,$key1
|
|
addi $idx,$idx,16
|
|
|
|
subi r11,$out,1
|
|
mtctr $len
|
|
li $len,16
|
|
Loop_xts_dec_steal:
|
|
lbzu r0,1(r11)
|
|
stb r0,16(r11)
|
|
bdnz Loop_xts_dec_steal
|
|
|
|
mtctr $rounds
|
|
b Loop_xts_dec # one more time...
|
|
|
|
Lxts_dec_done:
|
|
${UCMP}i $ivp,0
|
|
beq Lxts_dec_ret
|
|
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
vand $tmp,$tmp,$eighty7
|
|
vxor $tweak,$tweak,$tmp
|
|
|
|
le?vperm $tweak,$tweak,$tweak,$leperm
|
|
stvx_u $tweak,0,$ivp
|
|
|
|
Lxts_dec_ret:
|
|
mtspr 256,r12 # restore vrsave
|
|
li r3,0
|
|
blr
|
|
.long 0
|
|
.byte 0,12,0x04,0,0x80,6,6,0
|
|
.long 0
|
|
.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
|
|
___
|
|
#########################################################################
|
|
{{ # Optimized XTS procedures #
|
|
my $key_=$key2;
|
|
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
|
|
$x00=0 if ($flavour =~ /osx/);
|
|
my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
|
|
my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
|
|
my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
|
|
my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
|
|
# v26-v31 last 6 round keys
|
|
my ($keyperm)=($out0); # aliases with "caller", redundant assignment
|
|
my $taillen=$x70;
|
|
|
|
$code.=<<___;
|
|
.align 5
|
|
_aesp8_xts_encrypt6x:
|
|
$STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
|
|
mflr r11
|
|
li r7,`$FRAME+8*16+15`
|
|
li r3,`$FRAME+8*16+31`
|
|
$PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
|
|
stvx v20,r7,$sp # ABI says so
|
|
addi r7,r7,32
|
|
stvx v21,r3,$sp
|
|
addi r3,r3,32
|
|
stvx v22,r7,$sp
|
|
addi r7,r7,32
|
|
stvx v23,r3,$sp
|
|
addi r3,r3,32
|
|
stvx v24,r7,$sp
|
|
addi r7,r7,32
|
|
stvx v25,r3,$sp
|
|
addi r3,r3,32
|
|
stvx v26,r7,$sp
|
|
addi r7,r7,32
|
|
stvx v27,r3,$sp
|
|
addi r3,r3,32
|
|
stvx v28,r7,$sp
|
|
addi r7,r7,32
|
|
stvx v29,r3,$sp
|
|
addi r3,r3,32
|
|
stvx v30,r7,$sp
|
|
stvx v31,r3,$sp
|
|
li r0,-1
|
|
stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
|
|
li $x10,0x10
|
|
$PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
li $x20,0x20
|
|
$PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
li $x30,0x30
|
|
$PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
li $x40,0x40
|
|
$PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
li $x50,0x50
|
|
$PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
li $x60,0x60
|
|
$PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
li $x70,0x70
|
|
mtspr 256,r0
|
|
|
|
subi $rounds,$rounds,3 # -4 in total
|
|
|
|
lvx $rndkey0,$x00,$key1 # load key schedule
|
|
lvx v30,$x10,$key1
|
|
addi $key1,$key1,0x20
|
|
lvx v31,$x00,$key1
|
|
?vperm $rndkey0,$rndkey0,v30,$keyperm
|
|
addi $key_,$sp,`$FRAME+15`
|
|
mtctr $rounds
|
|
|
|
Load_xts_enc_key:
|
|
?vperm v24,v30,v31,$keyperm
|
|
lvx v30,$x10,$key1
|
|
addi $key1,$key1,0x20
|
|
stvx v24,$x00,$key_ # off-load round[1]
|
|
?vperm v25,v31,v30,$keyperm
|
|
lvx v31,$x00,$key1
|
|
stvx v25,$x10,$key_ # off-load round[2]
|
|
addi $key_,$key_,0x20
|
|
bdnz Load_xts_enc_key
|
|
|
|
lvx v26,$x10,$key1
|
|
?vperm v24,v30,v31,$keyperm
|
|
lvx v27,$x20,$key1
|
|
stvx v24,$x00,$key_ # off-load round[3]
|
|
?vperm v25,v31,v26,$keyperm
|
|
lvx v28,$x30,$key1
|
|
stvx v25,$x10,$key_ # off-load round[4]
|
|
addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
?vperm v26,v26,v27,$keyperm
|
|
lvx v29,$x40,$key1
|
|
?vperm v27,v27,v28,$keyperm
|
|
lvx v30,$x50,$key1
|
|
?vperm v28,v28,v29,$keyperm
|
|
lvx v31,$x60,$key1
|
|
?vperm v29,v29,v30,$keyperm
|
|
lvx $twk5,$x70,$key1 # borrow $twk5
|
|
?vperm v30,v30,v31,$keyperm
|
|
lvx v24,$x00,$key_ # pre-load round[1]
|
|
?vperm v31,v31,$twk5,$keyperm
|
|
lvx v25,$x10,$key_ # pre-load round[2]
|
|
|
|
vperm $in0,$inout,$inptail,$inpperm
|
|
subi $inp,$inp,31 # undo "caller"
|
|
vxor $twk0,$tweak,$rndkey0
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
vand $tmp,$tmp,$eighty7
|
|
vxor $out0,$in0,$twk0
|
|
vxor $tweak,$tweak,$tmp
|
|
|
|
lvx_u $in1,$x10,$inp
|
|
vxor $twk1,$tweak,$rndkey0
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
le?vperm $in1,$in1,$in1,$leperm
|
|
vand $tmp,$tmp,$eighty7
|
|
vxor $out1,$in1,$twk1
|
|
vxor $tweak,$tweak,$tmp
|
|
|
|
lvx_u $in2,$x20,$inp
|
|
andi. $taillen,$len,15
|
|
vxor $twk2,$tweak,$rndkey0
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
le?vperm $in2,$in2,$in2,$leperm
|
|
vand $tmp,$tmp,$eighty7
|
|
vxor $out2,$in2,$twk2
|
|
vxor $tweak,$tweak,$tmp
|
|
|
|
lvx_u $in3,$x30,$inp
|
|
sub $len,$len,$taillen
|
|
vxor $twk3,$tweak,$rndkey0
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
le?vperm $in3,$in3,$in3,$leperm
|
|
vand $tmp,$tmp,$eighty7
|
|
vxor $out3,$in3,$twk3
|
|
vxor $tweak,$tweak,$tmp
|
|
|
|
lvx_u $in4,$x40,$inp
|
|
subi $len,$len,0x60
|
|
vxor $twk4,$tweak,$rndkey0
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
le?vperm $in4,$in4,$in4,$leperm
|
|
vand $tmp,$tmp,$eighty7
|
|
vxor $out4,$in4,$twk4
|
|
vxor $tweak,$tweak,$tmp
|
|
|
|
lvx_u $in5,$x50,$inp
|
|
addi $inp,$inp,0x60
|
|
vxor $twk5,$tweak,$rndkey0
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
le?vperm $in5,$in5,$in5,$leperm
|
|
vand $tmp,$tmp,$eighty7
|
|
vxor $out5,$in5,$twk5
|
|
vxor $tweak,$tweak,$tmp
|
|
|
|
vxor v31,v31,$rndkey0
|
|
mtctr $rounds
|
|
b Loop_xts_enc6x
|
|
|
|
.align 5
|
|
Loop_xts_enc6x:
|
|
vcipher $out0,$out0,v24
|
|
vcipher $out1,$out1,v24
|
|
vcipher $out2,$out2,v24
|
|
vcipher $out3,$out3,v24
|
|
vcipher $out4,$out4,v24
|
|
vcipher $out5,$out5,v24
|
|
lvx v24,$x20,$key_ # round[3]
|
|
addi $key_,$key_,0x20
|
|
|
|
vcipher $out0,$out0,v25
|
|
vcipher $out1,$out1,v25
|
|
vcipher $out2,$out2,v25
|
|
vcipher $out3,$out3,v25
|
|
vcipher $out4,$out4,v25
|
|
vcipher $out5,$out5,v25
|
|
lvx v25,$x10,$key_ # round[4]
|
|
bdnz Loop_xts_enc6x
|
|
|
|
subic $len,$len,96 # $len-=96
|
|
vxor $in0,$twk0,v31 # xor with last round key
|
|
vcipher $out0,$out0,v24
|
|
vcipher $out1,$out1,v24
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vxor $twk0,$tweak,$rndkey0
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vcipher $out2,$out2,v24
|
|
vcipher $out3,$out3,v24
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
vcipher $out4,$out4,v24
|
|
vcipher $out5,$out5,v24
|
|
|
|
subfe. r0,r0,r0 # borrow?-1:0
|
|
vand $tmp,$tmp,$eighty7
|
|
vcipher $out0,$out0,v25
|
|
vcipher $out1,$out1,v25
|
|
vxor $tweak,$tweak,$tmp
|
|
vcipher $out2,$out2,v25
|
|
vcipher $out3,$out3,v25
|
|
vxor $in1,$twk1,v31
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vxor $twk1,$tweak,$rndkey0
|
|
vcipher $out4,$out4,v25
|
|
vcipher $out5,$out5,v25
|
|
|
|
and r0,r0,$len
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
vcipher $out0,$out0,v26
|
|
vcipher $out1,$out1,v26
|
|
vand $tmp,$tmp,$eighty7
|
|
vcipher $out2,$out2,v26
|
|
vcipher $out3,$out3,v26
|
|
vxor $tweak,$tweak,$tmp
|
|
vcipher $out4,$out4,v26
|
|
vcipher $out5,$out5,v26
|
|
|
|
add $inp,$inp,r0 # $inp is adjusted in such
|
|
# way that at exit from the
|
|
# loop inX-in5 are loaded
|
|
# with last "words"
|
|
vxor $in2,$twk2,v31
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vxor $twk2,$tweak,$rndkey0
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vcipher $out0,$out0,v27
|
|
vcipher $out1,$out1,v27
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
vcipher $out2,$out2,v27
|
|
vcipher $out3,$out3,v27
|
|
vand $tmp,$tmp,$eighty7
|
|
vcipher $out4,$out4,v27
|
|
vcipher $out5,$out5,v27
|
|
|
|
addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
vxor $tweak,$tweak,$tmp
|
|
vcipher $out0,$out0,v28
|
|
vcipher $out1,$out1,v28
|
|
vxor $in3,$twk3,v31
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vxor $twk3,$tweak,$rndkey0
|
|
vcipher $out2,$out2,v28
|
|
vcipher $out3,$out3,v28
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
vcipher $out4,$out4,v28
|
|
vcipher $out5,$out5,v28
|
|
lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
vand $tmp,$tmp,$eighty7
|
|
|
|
vcipher $out0,$out0,v29
|
|
vcipher $out1,$out1,v29
|
|
vxor $tweak,$tweak,$tmp
|
|
vcipher $out2,$out2,v29
|
|
vcipher $out3,$out3,v29
|
|
vxor $in4,$twk4,v31
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vxor $twk4,$tweak,$rndkey0
|
|
vcipher $out4,$out4,v29
|
|
vcipher $out5,$out5,v29
|
|
lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
|
|
vcipher $out0,$out0,v30
|
|
vcipher $out1,$out1,v30
|
|
vand $tmp,$tmp,$eighty7
|
|
vcipher $out2,$out2,v30
|
|
vcipher $out3,$out3,v30
|
|
vxor $tweak,$tweak,$tmp
|
|
vcipher $out4,$out4,v30
|
|
vcipher $out5,$out5,v30
|
|
vxor $in5,$twk5,v31
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vxor $twk5,$tweak,$rndkey0
|
|
|
|
vcipherlast $out0,$out0,$in0
|
|
lvx_u $in0,$x00,$inp # load next input block
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
vcipherlast $out1,$out1,$in1
|
|
lvx_u $in1,$x10,$inp
|
|
vcipherlast $out2,$out2,$in2
|
|
le?vperm $in0,$in0,$in0,$leperm
|
|
lvx_u $in2,$x20,$inp
|
|
vand $tmp,$tmp,$eighty7
|
|
vcipherlast $out3,$out3,$in3
|
|
le?vperm $in1,$in1,$in1,$leperm
|
|
lvx_u $in3,$x30,$inp
|
|
vcipherlast $out4,$out4,$in4
|
|
le?vperm $in2,$in2,$in2,$leperm
|
|
lvx_u $in4,$x40,$inp
|
|
vxor $tweak,$tweak,$tmp
|
|
vcipherlast $tmp,$out5,$in5 # last block might be needed
|
|
# in stealing mode
|
|
le?vperm $in3,$in3,$in3,$leperm
|
|
lvx_u $in5,$x50,$inp
|
|
addi $inp,$inp,0x60
|
|
le?vperm $in4,$in4,$in4,$leperm
|
|
le?vperm $in5,$in5,$in5,$leperm
|
|
|
|
le?vperm $out0,$out0,$out0,$leperm
|
|
le?vperm $out1,$out1,$out1,$leperm
|
|
stvx_u $out0,$x00,$out # store output
|
|
vxor $out0,$in0,$twk0
|
|
le?vperm $out2,$out2,$out2,$leperm
|
|
stvx_u $out1,$x10,$out
|
|
vxor $out1,$in1,$twk1
|
|
le?vperm $out3,$out3,$out3,$leperm
|
|
stvx_u $out2,$x20,$out
|
|
vxor $out2,$in2,$twk2
|
|
le?vperm $out4,$out4,$out4,$leperm
|
|
stvx_u $out3,$x30,$out
|
|
vxor $out3,$in3,$twk3
|
|
le?vperm $out5,$tmp,$tmp,$leperm
|
|
stvx_u $out4,$x40,$out
|
|
vxor $out4,$in4,$twk4
|
|
le?stvx_u $out5,$x50,$out
|
|
be?stvx_u $tmp, $x50,$out
|
|
vxor $out5,$in5,$twk5
|
|
addi $out,$out,0x60
|
|
|
|
mtctr $rounds
|
|
beq Loop_xts_enc6x # did $len-=96 borrow?
|
|
|
|
addic. $len,$len,0x60
|
|
beq Lxts_enc6x_zero
|
|
cmpwi $len,0x20
|
|
blt Lxts_enc6x_one
|
|
nop
|
|
beq Lxts_enc6x_two
|
|
cmpwi $len,0x40
|
|
blt Lxts_enc6x_three
|
|
nop
|
|
beq Lxts_enc6x_four
|
|
|
|
Lxts_enc6x_five:
|
|
vxor $out0,$in1,$twk0
|
|
vxor $out1,$in2,$twk1
|
|
vxor $out2,$in3,$twk2
|
|
vxor $out3,$in4,$twk3
|
|
vxor $out4,$in5,$twk4
|
|
|
|
bl _aesp8_xts_enc5x
|
|
|
|
le?vperm $out0,$out0,$out0,$leperm
|
|
vmr $twk0,$twk5 # unused tweak
|
|
le?vperm $out1,$out1,$out1,$leperm
|
|
stvx_u $out0,$x00,$out # store output
|
|
le?vperm $out2,$out2,$out2,$leperm
|
|
stvx_u $out1,$x10,$out
|
|
le?vperm $out3,$out3,$out3,$leperm
|
|
stvx_u $out2,$x20,$out
|
|
vxor $tmp,$out4,$twk5 # last block prep for stealing
|
|
le?vperm $out4,$out4,$out4,$leperm
|
|
stvx_u $out3,$x30,$out
|
|
stvx_u $out4,$x40,$out
|
|
addi $out,$out,0x50
|
|
bne Lxts_enc6x_steal
|
|
b Lxts_enc6x_done
|
|
|
|
.align 4
|
|
Lxts_enc6x_four:
|
|
vxor $out0,$in2,$twk0
|
|
vxor $out1,$in3,$twk1
|
|
vxor $out2,$in4,$twk2
|
|
vxor $out3,$in5,$twk3
|
|
vxor $out4,$out4,$out4
|
|
|
|
bl _aesp8_xts_enc5x
|
|
|
|
le?vperm $out0,$out0,$out0,$leperm
|
|
vmr $twk0,$twk4 # unused tweak
|
|
le?vperm $out1,$out1,$out1,$leperm
|
|
stvx_u $out0,$x00,$out # store output
|
|
le?vperm $out2,$out2,$out2,$leperm
|
|
stvx_u $out1,$x10,$out
|
|
vxor $tmp,$out3,$twk4 # last block prep for stealing
|
|
le?vperm $out3,$out3,$out3,$leperm
|
|
stvx_u $out2,$x20,$out
|
|
stvx_u $out3,$x30,$out
|
|
addi $out,$out,0x40
|
|
bne Lxts_enc6x_steal
|
|
b Lxts_enc6x_done
|
|
|
|
.align 4
|
|
Lxts_enc6x_three:
|
|
vxor $out0,$in3,$twk0
|
|
vxor $out1,$in4,$twk1
|
|
vxor $out2,$in5,$twk2
|
|
vxor $out3,$out3,$out3
|
|
vxor $out4,$out4,$out4
|
|
|
|
bl _aesp8_xts_enc5x
|
|
|
|
le?vperm $out0,$out0,$out0,$leperm
|
|
vmr $twk0,$twk3 # unused tweak
|
|
le?vperm $out1,$out1,$out1,$leperm
|
|
stvx_u $out0,$x00,$out # store output
|
|
vxor $tmp,$out2,$twk3 # last block prep for stealing
|
|
le?vperm $out2,$out2,$out2,$leperm
|
|
stvx_u $out1,$x10,$out
|
|
stvx_u $out2,$x20,$out
|
|
addi $out,$out,0x30
|
|
bne Lxts_enc6x_steal
|
|
b Lxts_enc6x_done
|
|
|
|
.align 4
|
|
Lxts_enc6x_two:
|
|
vxor $out0,$in4,$twk0
|
|
vxor $out1,$in5,$twk1
|
|
vxor $out2,$out2,$out2
|
|
vxor $out3,$out3,$out3
|
|
vxor $out4,$out4,$out4
|
|
|
|
bl _aesp8_xts_enc5x
|
|
|
|
le?vperm $out0,$out0,$out0,$leperm
|
|
vmr $twk0,$twk2 # unused tweak
|
|
vxor $tmp,$out1,$twk2 # last block prep for stealing
|
|
le?vperm $out1,$out1,$out1,$leperm
|
|
stvx_u $out0,$x00,$out # store output
|
|
stvx_u $out1,$x10,$out
|
|
addi $out,$out,0x20
|
|
bne Lxts_enc6x_steal
|
|
b Lxts_enc6x_done
|
|
|
|
.align 4
|
|
Lxts_enc6x_one:
|
|
vxor $out0,$in5,$twk0
|
|
nop
|
|
Loop_xts_enc1x:
|
|
vcipher $out0,$out0,v24
|
|
lvx v24,$x20,$key_ # round[3]
|
|
addi $key_,$key_,0x20
|
|
|
|
vcipher $out0,$out0,v25
|
|
lvx v25,$x10,$key_ # round[4]
|
|
bdnz Loop_xts_enc1x
|
|
|
|
add $inp,$inp,$taillen
|
|
cmpwi $taillen,0
|
|
vcipher $out0,$out0,v24
|
|
|
|
subi $inp,$inp,16
|
|
vcipher $out0,$out0,v25
|
|
|
|
lvsr $inpperm,0,$taillen
|
|
vcipher $out0,$out0,v26
|
|
|
|
lvx_u $in0,0,$inp
|
|
vcipher $out0,$out0,v27
|
|
|
|
addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
vcipher $out0,$out0,v28
|
|
lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
|
|
vcipher $out0,$out0,v29
|
|
lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
vxor $twk0,$twk0,v31
|
|
|
|
le?vperm $in0,$in0,$in0,$leperm
|
|
vcipher $out0,$out0,v30
|
|
|
|
vperm $in0,$in0,$in0,$inpperm
|
|
vcipherlast $out0,$out0,$twk0
|
|
|
|
vmr $twk0,$twk1 # unused tweak
|
|
vxor $tmp,$out0,$twk1 # last block prep for stealing
|
|
le?vperm $out0,$out0,$out0,$leperm
|
|
stvx_u $out0,$x00,$out # store output
|
|
addi $out,$out,0x10
|
|
bne Lxts_enc6x_steal
|
|
b Lxts_enc6x_done
|
|
|
|
.align 4
|
|
Lxts_enc6x_zero:
|
|
cmpwi $taillen,0
|
|
beq Lxts_enc6x_done
|
|
|
|
add $inp,$inp,$taillen
|
|
subi $inp,$inp,16
|
|
lvx_u $in0,0,$inp
|
|
lvsr $inpperm,0,$taillen # $in5 is no more
|
|
le?vperm $in0,$in0,$in0,$leperm
|
|
vperm $in0,$in0,$in0,$inpperm
|
|
vxor $tmp,$tmp,$twk0
|
|
Lxts_enc6x_steal:
|
|
vxor $in0,$in0,$twk0
|
|
vxor $out0,$out0,$out0
|
|
vspltisb $out1,-1
|
|
vperm $out0,$out0,$out1,$inpperm
|
|
vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
|
|
|
|
subi r30,$out,17
|
|
subi $out,$out,16
|
|
mtctr $taillen
|
|
Loop_xts_enc6x_steal:
|
|
lbzu r0,1(r30)
|
|
stb r0,16(r30)
|
|
bdnz Loop_xts_enc6x_steal
|
|
|
|
li $taillen,0
|
|
mtctr $rounds
|
|
b Loop_xts_enc1x # one more time...
|
|
|
|
.align 4
|
|
Lxts_enc6x_done:
|
|
${UCMP}i $ivp,0
|
|
beq Lxts_enc6x_ret
|
|
|
|
vxor $tweak,$twk0,$rndkey0
|
|
le?vperm $tweak,$tweak,$tweak,$leperm
|
|
stvx_u $tweak,0,$ivp
|
|
|
|
Lxts_enc6x_ret:
|
|
mtlr r11
|
|
li r10,`$FRAME+15`
|
|
li r11,`$FRAME+31`
|
|
stvx $seven,r10,$sp # wipe copies of round keys
|
|
addi r10,r10,32
|
|
stvx $seven,r11,$sp
|
|
addi r11,r11,32
|
|
stvx $seven,r10,$sp
|
|
addi r10,r10,32
|
|
stvx $seven,r11,$sp
|
|
addi r11,r11,32
|
|
stvx $seven,r10,$sp
|
|
addi r10,r10,32
|
|
stvx $seven,r11,$sp
|
|
addi r11,r11,32
|
|
stvx $seven,r10,$sp
|
|
addi r10,r10,32
|
|
stvx $seven,r11,$sp
|
|
addi r11,r11,32
|
|
|
|
mtspr 256,$vrsave
|
|
lvx v20,r10,$sp # ABI says so
|
|
addi r10,r10,32
|
|
lvx v21,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v22,r10,$sp
|
|
addi r10,r10,32
|
|
lvx v23,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v24,r10,$sp
|
|
addi r10,r10,32
|
|
lvx v25,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v26,r10,$sp
|
|
addi r10,r10,32
|
|
lvx v27,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v28,r10,$sp
|
|
addi r10,r10,32
|
|
lvx v29,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v30,r10,$sp
|
|
lvx v31,r11,$sp
|
|
$POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
$POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
$POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
$POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
$POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
$POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
|
|
blr
|
|
.long 0
|
|
.byte 0,12,0x04,1,0x80,6,6,0
|
|
.long 0
|
|
|
|
.align 5
|
|
_aesp8_xts_enc5x:
|
|
vcipher $out0,$out0,v24
|
|
vcipher $out1,$out1,v24
|
|
vcipher $out2,$out2,v24
|
|
vcipher $out3,$out3,v24
|
|
vcipher $out4,$out4,v24
|
|
lvx v24,$x20,$key_ # round[3]
|
|
addi $key_,$key_,0x20
|
|
|
|
vcipher $out0,$out0,v25
|
|
vcipher $out1,$out1,v25
|
|
vcipher $out2,$out2,v25
|
|
vcipher $out3,$out3,v25
|
|
vcipher $out4,$out4,v25
|
|
lvx v25,$x10,$key_ # round[4]
|
|
bdnz _aesp8_xts_enc5x
|
|
|
|
add $inp,$inp,$taillen
|
|
cmpwi $taillen,0
|
|
vcipher $out0,$out0,v24
|
|
vcipher $out1,$out1,v24
|
|
vcipher $out2,$out2,v24
|
|
vcipher $out3,$out3,v24
|
|
vcipher $out4,$out4,v24
|
|
|
|
subi $inp,$inp,16
|
|
vcipher $out0,$out0,v25
|
|
vcipher $out1,$out1,v25
|
|
vcipher $out2,$out2,v25
|
|
vcipher $out3,$out3,v25
|
|
vcipher $out4,$out4,v25
|
|
vxor $twk0,$twk0,v31
|
|
|
|
vcipher $out0,$out0,v26
|
|
lvsr $inpperm,0,$taillen # $in5 is no more
|
|
vcipher $out1,$out1,v26
|
|
vcipher $out2,$out2,v26
|
|
vcipher $out3,$out3,v26
|
|
vcipher $out4,$out4,v26
|
|
vxor $in1,$twk1,v31
|
|
|
|
vcipher $out0,$out0,v27
|
|
lvx_u $in0,0,$inp
|
|
vcipher $out1,$out1,v27
|
|
vcipher $out2,$out2,v27
|
|
vcipher $out3,$out3,v27
|
|
vcipher $out4,$out4,v27
|
|
vxor $in2,$twk2,v31
|
|
|
|
addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
vcipher $out0,$out0,v28
|
|
vcipher $out1,$out1,v28
|
|
vcipher $out2,$out2,v28
|
|
vcipher $out3,$out3,v28
|
|
vcipher $out4,$out4,v28
|
|
lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
vxor $in3,$twk3,v31
|
|
|
|
vcipher $out0,$out0,v29
|
|
le?vperm $in0,$in0,$in0,$leperm
|
|
vcipher $out1,$out1,v29
|
|
vcipher $out2,$out2,v29
|
|
vcipher $out3,$out3,v29
|
|
vcipher $out4,$out4,v29
|
|
lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
vxor $in4,$twk4,v31
|
|
|
|
vcipher $out0,$out0,v30
|
|
vperm $in0,$in0,$in0,$inpperm
|
|
vcipher $out1,$out1,v30
|
|
vcipher $out2,$out2,v30
|
|
vcipher $out3,$out3,v30
|
|
vcipher $out4,$out4,v30
|
|
|
|
vcipherlast $out0,$out0,$twk0
|
|
vcipherlast $out1,$out1,$in1
|
|
vcipherlast $out2,$out2,$in2
|
|
vcipherlast $out3,$out3,$in3
|
|
vcipherlast $out4,$out4,$in4
|
|
blr
|
|
.long 0
|
|
.byte 0,12,0x14,0,0,0,0,0
|
|
|
|
.align 5
|
|
_aesp8_xts_decrypt6x:
|
|
$STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
|
|
mflr r11
|
|
li r7,`$FRAME+8*16+15`
|
|
li r3,`$FRAME+8*16+31`
|
|
$PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
|
|
stvx v20,r7,$sp # ABI says so
|
|
addi r7,r7,32
|
|
stvx v21,r3,$sp
|
|
addi r3,r3,32
|
|
stvx v22,r7,$sp
|
|
addi r7,r7,32
|
|
stvx v23,r3,$sp
|
|
addi r3,r3,32
|
|
stvx v24,r7,$sp
|
|
addi r7,r7,32
|
|
stvx v25,r3,$sp
|
|
addi r3,r3,32
|
|
stvx v26,r7,$sp
|
|
addi r7,r7,32
|
|
stvx v27,r3,$sp
|
|
addi r3,r3,32
|
|
stvx v28,r7,$sp
|
|
addi r7,r7,32
|
|
stvx v29,r3,$sp
|
|
addi r3,r3,32
|
|
stvx v30,r7,$sp
|
|
stvx v31,r3,$sp
|
|
li r0,-1
|
|
stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
|
|
li $x10,0x10
|
|
$PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
li $x20,0x20
|
|
$PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
li $x30,0x30
|
|
$PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
li $x40,0x40
|
|
$PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
li $x50,0x50
|
|
$PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
li $x60,0x60
|
|
$PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
li $x70,0x70
|
|
mtspr 256,r0
|
|
|
|
subi $rounds,$rounds,3 # -4 in total
|
|
|
|
lvx $rndkey0,$x00,$key1 # load key schedule
|
|
lvx v30,$x10,$key1
|
|
addi $key1,$key1,0x20
|
|
lvx v31,$x00,$key1
|
|
?vperm $rndkey0,$rndkey0,v30,$keyperm
|
|
addi $key_,$sp,`$FRAME+15`
|
|
mtctr $rounds
|
|
|
|
Load_xts_dec_key:
|
|
?vperm v24,v30,v31,$keyperm
|
|
lvx v30,$x10,$key1
|
|
addi $key1,$key1,0x20
|
|
stvx v24,$x00,$key_ # off-load round[1]
|
|
?vperm v25,v31,v30,$keyperm
|
|
lvx v31,$x00,$key1
|
|
stvx v25,$x10,$key_ # off-load round[2]
|
|
addi $key_,$key_,0x20
|
|
bdnz Load_xts_dec_key
|
|
|
|
lvx v26,$x10,$key1
|
|
?vperm v24,v30,v31,$keyperm
|
|
lvx v27,$x20,$key1
|
|
stvx v24,$x00,$key_ # off-load round[3]
|
|
?vperm v25,v31,v26,$keyperm
|
|
lvx v28,$x30,$key1
|
|
stvx v25,$x10,$key_ # off-load round[4]
|
|
addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
?vperm v26,v26,v27,$keyperm
|
|
lvx v29,$x40,$key1
|
|
?vperm v27,v27,v28,$keyperm
|
|
lvx v30,$x50,$key1
|
|
?vperm v28,v28,v29,$keyperm
|
|
lvx v31,$x60,$key1
|
|
?vperm v29,v29,v30,$keyperm
|
|
lvx $twk5,$x70,$key1 # borrow $twk5
|
|
?vperm v30,v30,v31,$keyperm
|
|
lvx v24,$x00,$key_ # pre-load round[1]
|
|
?vperm v31,v31,$twk5,$keyperm
|
|
lvx v25,$x10,$key_ # pre-load round[2]
|
|
|
|
vperm $in0,$inout,$inptail,$inpperm
|
|
subi $inp,$inp,31 # undo "caller"
|
|
vxor $twk0,$tweak,$rndkey0
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
vand $tmp,$tmp,$eighty7
|
|
vxor $out0,$in0,$twk0
|
|
vxor $tweak,$tweak,$tmp
|
|
|
|
lvx_u $in1,$x10,$inp
|
|
vxor $twk1,$tweak,$rndkey0
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
le?vperm $in1,$in1,$in1,$leperm
|
|
vand $tmp,$tmp,$eighty7
|
|
vxor $out1,$in1,$twk1
|
|
vxor $tweak,$tweak,$tmp
|
|
|
|
lvx_u $in2,$x20,$inp
|
|
andi. $taillen,$len,15
|
|
vxor $twk2,$tweak,$rndkey0
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
le?vperm $in2,$in2,$in2,$leperm
|
|
vand $tmp,$tmp,$eighty7
|
|
vxor $out2,$in2,$twk2
|
|
vxor $tweak,$tweak,$tmp
|
|
|
|
lvx_u $in3,$x30,$inp
|
|
sub $len,$len,$taillen
|
|
vxor $twk3,$tweak,$rndkey0
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
le?vperm $in3,$in3,$in3,$leperm
|
|
vand $tmp,$tmp,$eighty7
|
|
vxor $out3,$in3,$twk3
|
|
vxor $tweak,$tweak,$tmp
|
|
|
|
lvx_u $in4,$x40,$inp
|
|
subi $len,$len,0x60
|
|
vxor $twk4,$tweak,$rndkey0
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
le?vperm $in4,$in4,$in4,$leperm
|
|
vand $tmp,$tmp,$eighty7
|
|
vxor $out4,$in4,$twk4
|
|
vxor $tweak,$tweak,$tmp
|
|
|
|
lvx_u $in5,$x50,$inp
|
|
addi $inp,$inp,0x60
|
|
vxor $twk5,$tweak,$rndkey0
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
le?vperm $in5,$in5,$in5,$leperm
|
|
vand $tmp,$tmp,$eighty7
|
|
vxor $out5,$in5,$twk5
|
|
vxor $tweak,$tweak,$tmp
|
|
|
|
vxor v31,v31,$rndkey0
|
|
mtctr $rounds
|
|
b Loop_xts_dec6x
|
|
|
|
.align 5
|
|
Loop_xts_dec6x:
|
|
vncipher $out0,$out0,v24
|
|
vncipher $out1,$out1,v24
|
|
vncipher $out2,$out2,v24
|
|
vncipher $out3,$out3,v24
|
|
vncipher $out4,$out4,v24
|
|
vncipher $out5,$out5,v24
|
|
lvx v24,$x20,$key_ # round[3]
|
|
addi $key_,$key_,0x20
|
|
|
|
vncipher $out0,$out0,v25
|
|
vncipher $out1,$out1,v25
|
|
vncipher $out2,$out2,v25
|
|
vncipher $out3,$out3,v25
|
|
vncipher $out4,$out4,v25
|
|
vncipher $out5,$out5,v25
|
|
lvx v25,$x10,$key_ # round[4]
|
|
bdnz Loop_xts_dec6x
|
|
|
|
subic $len,$len,96 # $len-=96
|
|
vxor $in0,$twk0,v31 # xor with last round key
|
|
vncipher $out0,$out0,v24
|
|
vncipher $out1,$out1,v24
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vxor $twk0,$tweak,$rndkey0
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vncipher $out2,$out2,v24
|
|
vncipher $out3,$out3,v24
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
vncipher $out4,$out4,v24
|
|
vncipher $out5,$out5,v24
|
|
|
|
subfe. r0,r0,r0 # borrow?-1:0
|
|
vand $tmp,$tmp,$eighty7
|
|
vncipher $out0,$out0,v25
|
|
vncipher $out1,$out1,v25
|
|
vxor $tweak,$tweak,$tmp
|
|
vncipher $out2,$out2,v25
|
|
vncipher $out3,$out3,v25
|
|
vxor $in1,$twk1,v31
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vxor $twk1,$tweak,$rndkey0
|
|
vncipher $out4,$out4,v25
|
|
vncipher $out5,$out5,v25
|
|
|
|
and r0,r0,$len
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
vncipher $out0,$out0,v26
|
|
vncipher $out1,$out1,v26
|
|
vand $tmp,$tmp,$eighty7
|
|
vncipher $out2,$out2,v26
|
|
vncipher $out3,$out3,v26
|
|
vxor $tweak,$tweak,$tmp
|
|
vncipher $out4,$out4,v26
|
|
vncipher $out5,$out5,v26
|
|
|
|
add $inp,$inp,r0 # $inp is adjusted in such
|
|
# way that at exit from the
|
|
# loop inX-in5 are loaded
|
|
# with last "words"
|
|
vxor $in2,$twk2,v31
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vxor $twk2,$tweak,$rndkey0
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vncipher $out0,$out0,v27
|
|
vncipher $out1,$out1,v27
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
vncipher $out2,$out2,v27
|
|
vncipher $out3,$out3,v27
|
|
vand $tmp,$tmp,$eighty7
|
|
vncipher $out4,$out4,v27
|
|
vncipher $out5,$out5,v27
|
|
|
|
addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
vxor $tweak,$tweak,$tmp
|
|
vncipher $out0,$out0,v28
|
|
vncipher $out1,$out1,v28
|
|
vxor $in3,$twk3,v31
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vxor $twk3,$tweak,$rndkey0
|
|
vncipher $out2,$out2,v28
|
|
vncipher $out3,$out3,v28
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
vncipher $out4,$out4,v28
|
|
vncipher $out5,$out5,v28
|
|
lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
vand $tmp,$tmp,$eighty7
|
|
|
|
vncipher $out0,$out0,v29
|
|
vncipher $out1,$out1,v29
|
|
vxor $tweak,$tweak,$tmp
|
|
vncipher $out2,$out2,v29
|
|
vncipher $out3,$out3,v29
|
|
vxor $in4,$twk4,v31
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vxor $twk4,$tweak,$rndkey0
|
|
vncipher $out4,$out4,v29
|
|
vncipher $out5,$out5,v29
|
|
lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
|
|
vncipher $out0,$out0,v30
|
|
vncipher $out1,$out1,v30
|
|
vand $tmp,$tmp,$eighty7
|
|
vncipher $out2,$out2,v30
|
|
vncipher $out3,$out3,v30
|
|
vxor $tweak,$tweak,$tmp
|
|
vncipher $out4,$out4,v30
|
|
vncipher $out5,$out5,v30
|
|
vxor $in5,$twk5,v31
|
|
vsrab $tmp,$tweak,$seven # next tweak value
|
|
vxor $twk5,$tweak,$rndkey0
|
|
|
|
vncipherlast $out0,$out0,$in0
|
|
lvx_u $in0,$x00,$inp # load next input block
|
|
vaddubm $tweak,$tweak,$tweak
|
|
vsldoi $tmp,$tmp,$tmp,15
|
|
vncipherlast $out1,$out1,$in1
|
|
lvx_u $in1,$x10,$inp
|
|
vncipherlast $out2,$out2,$in2
|
|
le?vperm $in0,$in0,$in0,$leperm
|
|
lvx_u $in2,$x20,$inp
|
|
vand $tmp,$tmp,$eighty7
|
|
vncipherlast $out3,$out3,$in3
|
|
le?vperm $in1,$in1,$in1,$leperm
|
|
lvx_u $in3,$x30,$inp
|
|
vncipherlast $out4,$out4,$in4
|
|
le?vperm $in2,$in2,$in2,$leperm
|
|
lvx_u $in4,$x40,$inp
|
|
vxor $tweak,$tweak,$tmp
|
|
vncipherlast $out5,$out5,$in5
|
|
le?vperm $in3,$in3,$in3,$leperm
|
|
lvx_u $in5,$x50,$inp
|
|
addi $inp,$inp,0x60
|
|
le?vperm $in4,$in4,$in4,$leperm
|
|
le?vperm $in5,$in5,$in5,$leperm
|
|
|
|
le?vperm $out0,$out0,$out0,$leperm
|
|
le?vperm $out1,$out1,$out1,$leperm
|
|
stvx_u $out0,$x00,$out # store output
|
|
vxor $out0,$in0,$twk0
|
|
le?vperm $out2,$out2,$out2,$leperm
|
|
stvx_u $out1,$x10,$out
|
|
vxor $out1,$in1,$twk1
|
|
le?vperm $out3,$out3,$out3,$leperm
|
|
stvx_u $out2,$x20,$out
|
|
vxor $out2,$in2,$twk2
|
|
le?vperm $out4,$out4,$out4,$leperm
|
|
stvx_u $out3,$x30,$out
|
|
vxor $out3,$in3,$twk3
|
|
le?vperm $out5,$out5,$out5,$leperm
|
|
stvx_u $out4,$x40,$out
|
|
vxor $out4,$in4,$twk4
|
|
stvx_u $out5,$x50,$out
|
|
vxor $out5,$in5,$twk5
|
|
addi $out,$out,0x60
|
|
|
|
mtctr $rounds
|
|
beq Loop_xts_dec6x # did $len-=96 borrow?
|
|
|
|
addic. $len,$len,0x60
|
|
beq Lxts_dec6x_zero
|
|
cmpwi $len,0x20
|
|
blt Lxts_dec6x_one
|
|
nop
|
|
beq Lxts_dec6x_two
|
|
cmpwi $len,0x40
|
|
blt Lxts_dec6x_three
|
|
nop
|
|
beq Lxts_dec6x_four
|
|
|
|
Lxts_dec6x_five:
|
|
vxor $out0,$in1,$twk0
|
|
vxor $out1,$in2,$twk1
|
|
vxor $out2,$in3,$twk2
|
|
vxor $out3,$in4,$twk3
|
|
vxor $out4,$in5,$twk4
|
|
|
|
bl _aesp8_xts_dec5x
|
|
|
|
le?vperm $out0,$out0,$out0,$leperm
|
|
vmr $twk0,$twk5 # unused tweak
|
|
vxor $twk1,$tweak,$rndkey0
|
|
le?vperm $out1,$out1,$out1,$leperm
|
|
stvx_u $out0,$x00,$out # store output
|
|
vxor $out0,$in0,$twk1
|
|
le?vperm $out2,$out2,$out2,$leperm
|
|
stvx_u $out1,$x10,$out
|
|
le?vperm $out3,$out3,$out3,$leperm
|
|
stvx_u $out2,$x20,$out
|
|
le?vperm $out4,$out4,$out4,$leperm
|
|
stvx_u $out3,$x30,$out
|
|
stvx_u $out4,$x40,$out
|
|
addi $out,$out,0x50
|
|
bne Lxts_dec6x_steal
|
|
b Lxts_dec6x_done
|
|
|
|
.align 4
|
|
Lxts_dec6x_four:
|
|
vxor $out0,$in2,$twk0
|
|
vxor $out1,$in3,$twk1
|
|
vxor $out2,$in4,$twk2
|
|
vxor $out3,$in5,$twk3
|
|
vxor $out4,$out4,$out4
|
|
|
|
bl _aesp8_xts_dec5x
|
|
|
|
le?vperm $out0,$out0,$out0,$leperm
|
|
vmr $twk0,$twk4 # unused tweak
|
|
vmr $twk1,$twk5
|
|
le?vperm $out1,$out1,$out1,$leperm
|
|
stvx_u $out0,$x00,$out # store output
|
|
vxor $out0,$in0,$twk5
|
|
le?vperm $out2,$out2,$out2,$leperm
|
|
stvx_u $out1,$x10,$out
|
|
le?vperm $out3,$out3,$out3,$leperm
|
|
stvx_u $out2,$x20,$out
|
|
stvx_u $out3,$x30,$out
|
|
addi $out,$out,0x40
|
|
bne Lxts_dec6x_steal
|
|
b Lxts_dec6x_done
|
|
|
|
.align 4
|
|
Lxts_dec6x_three:
|
|
vxor $out0,$in3,$twk0
|
|
vxor $out1,$in4,$twk1
|
|
vxor $out2,$in5,$twk2
|
|
vxor $out3,$out3,$out3
|
|
vxor $out4,$out4,$out4
|
|
|
|
bl _aesp8_xts_dec5x
|
|
|
|
le?vperm $out0,$out0,$out0,$leperm
|
|
vmr $twk0,$twk3 # unused tweak
|
|
vmr $twk1,$twk4
|
|
le?vperm $out1,$out1,$out1,$leperm
|
|
stvx_u $out0,$x00,$out # store output
|
|
vxor $out0,$in0,$twk4
|
|
le?vperm $out2,$out2,$out2,$leperm
|
|
stvx_u $out1,$x10,$out
|
|
stvx_u $out2,$x20,$out
|
|
addi $out,$out,0x30
|
|
bne Lxts_dec6x_steal
|
|
b Lxts_dec6x_done
|
|
|
|
.align 4
|
|
Lxts_dec6x_two:
|
|
vxor $out0,$in4,$twk0
|
|
vxor $out1,$in5,$twk1
|
|
vxor $out2,$out2,$out2
|
|
vxor $out3,$out3,$out3
|
|
vxor $out4,$out4,$out4
|
|
|
|
bl _aesp8_xts_dec5x
|
|
|
|
le?vperm $out0,$out0,$out0,$leperm
|
|
vmr $twk0,$twk2 # unused tweak
|
|
vmr $twk1,$twk3
|
|
le?vperm $out1,$out1,$out1,$leperm
|
|
stvx_u $out0,$x00,$out # store output
|
|
vxor $out0,$in0,$twk3
|
|
stvx_u $out1,$x10,$out
|
|
addi $out,$out,0x20
|
|
bne Lxts_dec6x_steal
|
|
b Lxts_dec6x_done
|
|
|
|
.align 4
|
|
Lxts_dec6x_one:
|
|
vxor $out0,$in5,$twk0
|
|
nop
|
|
Loop_xts_dec1x:
|
|
vncipher $out0,$out0,v24
|
|
lvx v24,$x20,$key_ # round[3]
|
|
addi $key_,$key_,0x20
|
|
|
|
vncipher $out0,$out0,v25
|
|
lvx v25,$x10,$key_ # round[4]
|
|
bdnz Loop_xts_dec1x
|
|
|
|
subi r0,$taillen,1
|
|
vncipher $out0,$out0,v24
|
|
|
|
andi. r0,r0,16
|
|
cmpwi $taillen,0
|
|
vncipher $out0,$out0,v25
|
|
|
|
sub $inp,$inp,r0
|
|
vncipher $out0,$out0,v26
|
|
|
|
lvx_u $in0,0,$inp
|
|
vncipher $out0,$out0,v27
|
|
|
|
addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
vncipher $out0,$out0,v28
|
|
lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
|
|
vncipher $out0,$out0,v29
|
|
lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
vxor $twk0,$twk0,v31
|
|
|
|
le?vperm $in0,$in0,$in0,$leperm
|
|
vncipher $out0,$out0,v30
|
|
|
|
mtctr $rounds
|
|
vncipherlast $out0,$out0,$twk0
|
|
|
|
vmr $twk0,$twk1 # unused tweak
|
|
vmr $twk1,$twk2
|
|
le?vperm $out0,$out0,$out0,$leperm
|
|
stvx_u $out0,$x00,$out # store output
|
|
addi $out,$out,0x10
|
|
vxor $out0,$in0,$twk2
|
|
bne Lxts_dec6x_steal
|
|
b Lxts_dec6x_done
|
|
|
|
.align 4
|
|
Lxts_dec6x_zero:
|
|
cmpwi $taillen,0
|
|
beq Lxts_dec6x_done
|
|
|
|
lvx_u $in0,0,$inp
|
|
le?vperm $in0,$in0,$in0,$leperm
|
|
vxor $out0,$in0,$twk1
|
|
Lxts_dec6x_steal:
|
|
vncipher $out0,$out0,v24
|
|
lvx v24,$x20,$key_ # round[3]
|
|
addi $key_,$key_,0x20
|
|
|
|
vncipher $out0,$out0,v25
|
|
lvx v25,$x10,$key_ # round[4]
|
|
bdnz Lxts_dec6x_steal
|
|
|
|
add $inp,$inp,$taillen
|
|
vncipher $out0,$out0,v24
|
|
|
|
cmpwi $taillen,0
|
|
vncipher $out0,$out0,v25
|
|
|
|
lvx_u $in0,0,$inp
|
|
vncipher $out0,$out0,v26
|
|
|
|
lvsr $inpperm,0,$taillen # $in5 is no more
|
|
vncipher $out0,$out0,v27
|
|
|
|
addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
vncipher $out0,$out0,v28
|
|
lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
|
|
vncipher $out0,$out0,v29
|
|
lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
vxor $twk1,$twk1,v31
|
|
|
|
le?vperm $in0,$in0,$in0,$leperm
|
|
vncipher $out0,$out0,v30
|
|
|
|
vperm $in0,$in0,$in0,$inpperm
|
|
vncipherlast $tmp,$out0,$twk1
|
|
|
|
le?vperm $out0,$tmp,$tmp,$leperm
|
|
le?stvx_u $out0,0,$out
|
|
be?stvx_u $tmp,0,$out
|
|
|
|
vxor $out0,$out0,$out0
|
|
vspltisb $out1,-1
|
|
vperm $out0,$out0,$out1,$inpperm
|
|
vsel $out0,$in0,$tmp,$out0
|
|
vxor $out0,$out0,$twk0
|
|
|
|
subi r30,$out,1
|
|
mtctr $taillen
|
|
Loop_xts_dec6x_steal:
|
|
lbzu r0,1(r30)
|
|
stb r0,16(r30)
|
|
bdnz Loop_xts_dec6x_steal
|
|
|
|
li $taillen,0
|
|
mtctr $rounds
|
|
b Loop_xts_dec1x # one more time...
|
|
|
|
.align 4
|
|
Lxts_dec6x_done:
|
|
${UCMP}i $ivp,0
|
|
beq Lxts_dec6x_ret
|
|
|
|
vxor $tweak,$twk0,$rndkey0
|
|
le?vperm $tweak,$tweak,$tweak,$leperm
|
|
stvx_u $tweak,0,$ivp
|
|
|
|
Lxts_dec6x_ret:
|
|
mtlr r11
|
|
li r10,`$FRAME+15`
|
|
li r11,`$FRAME+31`
|
|
stvx $seven,r10,$sp # wipe copies of round keys
|
|
addi r10,r10,32
|
|
stvx $seven,r11,$sp
|
|
addi r11,r11,32
|
|
stvx $seven,r10,$sp
|
|
addi r10,r10,32
|
|
stvx $seven,r11,$sp
|
|
addi r11,r11,32
|
|
stvx $seven,r10,$sp
|
|
addi r10,r10,32
|
|
stvx $seven,r11,$sp
|
|
addi r11,r11,32
|
|
stvx $seven,r10,$sp
|
|
addi r10,r10,32
|
|
stvx $seven,r11,$sp
|
|
addi r11,r11,32
|
|
|
|
mtspr 256,$vrsave
|
|
lvx v20,r10,$sp # ABI says so
|
|
addi r10,r10,32
|
|
lvx v21,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v22,r10,$sp
|
|
addi r10,r10,32
|
|
lvx v23,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v24,r10,$sp
|
|
addi r10,r10,32
|
|
lvx v25,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v26,r10,$sp
|
|
addi r10,r10,32
|
|
lvx v27,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v28,r10,$sp
|
|
addi r10,r10,32
|
|
lvx v29,r11,$sp
|
|
addi r11,r11,32
|
|
lvx v30,r10,$sp
|
|
lvx v31,r11,$sp
|
|
$POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
|
|
$POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
|
|
$POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
|
|
$POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
|
|
$POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
|
|
$POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
|
|
addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
|
|
blr
|
|
.long 0
|
|
.byte 0,12,0x04,1,0x80,6,6,0
|
|
.long 0
|
|
|
|
.align 5
|
|
_aesp8_xts_dec5x:
|
|
vncipher $out0,$out0,v24
|
|
vncipher $out1,$out1,v24
|
|
vncipher $out2,$out2,v24
|
|
vncipher $out3,$out3,v24
|
|
vncipher $out4,$out4,v24
|
|
lvx v24,$x20,$key_ # round[3]
|
|
addi $key_,$key_,0x20
|
|
|
|
vncipher $out0,$out0,v25
|
|
vncipher $out1,$out1,v25
|
|
vncipher $out2,$out2,v25
|
|
vncipher $out3,$out3,v25
|
|
vncipher $out4,$out4,v25
|
|
lvx v25,$x10,$key_ # round[4]
|
|
bdnz _aesp8_xts_dec5x
|
|
|
|
subi r0,$taillen,1
|
|
vncipher $out0,$out0,v24
|
|
vncipher $out1,$out1,v24
|
|
vncipher $out2,$out2,v24
|
|
vncipher $out3,$out3,v24
|
|
vncipher $out4,$out4,v24
|
|
|
|
andi. r0,r0,16
|
|
cmpwi $taillen,0
|
|
vncipher $out0,$out0,v25
|
|
vncipher $out1,$out1,v25
|
|
vncipher $out2,$out2,v25
|
|
vncipher $out3,$out3,v25
|
|
vncipher $out4,$out4,v25
|
|
vxor $twk0,$twk0,v31
|
|
|
|
sub $inp,$inp,r0
|
|
vncipher $out0,$out0,v26
|
|
vncipher $out1,$out1,v26
|
|
vncipher $out2,$out2,v26
|
|
vncipher $out3,$out3,v26
|
|
vncipher $out4,$out4,v26
|
|
vxor $in1,$twk1,v31
|
|
|
|
vncipher $out0,$out0,v27
|
|
lvx_u $in0,0,$inp
|
|
vncipher $out1,$out1,v27
|
|
vncipher $out2,$out2,v27
|
|
vncipher $out3,$out3,v27
|
|
vncipher $out4,$out4,v27
|
|
vxor $in2,$twk2,v31
|
|
|
|
addi $key_,$sp,`$FRAME+15` # rewind $key_
|
|
vncipher $out0,$out0,v28
|
|
vncipher $out1,$out1,v28
|
|
vncipher $out2,$out2,v28
|
|
vncipher $out3,$out3,v28
|
|
vncipher $out4,$out4,v28
|
|
lvx v24,$x00,$key_ # re-pre-load round[1]
|
|
vxor $in3,$twk3,v31
|
|
|
|
vncipher $out0,$out0,v29
|
|
le?vperm $in0,$in0,$in0,$leperm
|
|
vncipher $out1,$out1,v29
|
|
vncipher $out2,$out2,v29
|
|
vncipher $out3,$out3,v29
|
|
vncipher $out4,$out4,v29
|
|
lvx v25,$x10,$key_ # re-pre-load round[2]
|
|
vxor $in4,$twk4,v31
|
|
|
|
vncipher $out0,$out0,v30
|
|
vncipher $out1,$out1,v30
|
|
vncipher $out2,$out2,v30
|
|
vncipher $out3,$out3,v30
|
|
vncipher $out4,$out4,v30
|
|
|
|
vncipherlast $out0,$out0,$twk0
|
|
vncipherlast $out1,$out1,$in1
|
|
vncipherlast $out2,$out2,$in2
|
|
vncipherlast $out3,$out3,$in3
|
|
vncipherlast $out4,$out4,$in4
|
|
mtctr $rounds
|
|
blr
|
|
.long 0
|
|
.byte 0,12,0x14,0,0,0,0,0
|
|
___
|
|
}} }}}
|
|
|
|
my $consts=1;
|
|
foreach(split("\n",$code)) {
|
|
s/\`([^\`]*)\`/eval($1)/geo;
|
|
|
|
# constants table endian-specific conversion
|
|
if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
|
|
my $conv=$3;
|
|
my @bytes=();
|
|
|
|
# convert to endian-agnostic format
|
|
if ($1 eq "long") {
|
|
foreach (split(/,\s*/,$2)) {
|
|
my $l = /^0/?oct:int;
|
|
push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
|
|
}
|
|
} else {
|
|
@bytes = map(/^0/?oct:int,split(/,\s*/,$2));
|
|
}
|
|
|
|
# little-endian conversion
|
|
if ($flavour =~ /le$/o) {
|
|
SWITCH: for($conv) {
|
|
/\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
|
|
/\?rev/ && do { @bytes=reverse(@bytes); last; };
|
|
}
|
|
}
|
|
|
|
#emit
|
|
print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
|
|
next;
|
|
}
|
|
$consts=0 if (m/Lconsts:/o); # end of table
|
|
|
|
# instructions prefixed with '?' are endian-specific and need
|
|
# to be adjusted accordingly...
|
|
if ($flavour =~ /le$/o) { # little-endian
|
|
s/le\?//o or
|
|
s/be\?/#be#/o or
|
|
s/\?lvsr/lvsl/o or
|
|
s/\?lvsl/lvsr/o or
|
|
s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
|
|
s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
|
|
s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
|
|
} else { # big-endian
|
|
s/le\?/#le#/o or
|
|
s/be\?//o or
|
|
s/\?([a-z]+)/$1/o;
|
|
}
|
|
|
|
print $_,"\n";
|
|
}
|
|
|
|
close STDOUT;
|