2 # SPDX-License-Identifier: GPL-2.0
4 # This code is taken from CRYPTOGAMs[1] and is included here using the option
5 # in the license to distribute the code under the GPL. Therefore this program
6 # is free software; you can redistribute it and/or modify it under the terms of
7 # the GNU General Public License version 2 as published by the Free Software
10 # [1] https://www.openssl.org/~appro/cryptogams/
12 # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
13 # All rights reserved.
15 # Redistribution and use in source and binary forms, with or without
16 # modification, are permitted provided that the following conditions
19 # * Redistributions of source code must retain copyright notices,
20 # this list of conditions and the following disclaimer.
22 # * Redistributions in binary form must reproduce the above
23 # copyright notice, this list of conditions and the following
24 # disclaimer in the documentation and/or other materials
25 # provided with the distribution.
27 # * Neither the name of the CRYPTOGAMS nor the names of its
28 # copyright holder and contributors may be used to endorse or
29 # promote products derived from this software without specific
30 # prior written permission.
32 # ALTERNATIVELY, provided that this notice is retained in full, this
33 # product may be distributed under the terms of the GNU General Public
34 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
37 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
49 # ====================================================================
50 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
51 # project. The module is, however, dual licensed under OpenSSL and
52 # CRYPTOGAMS licenses depending on where you obtain it. For further
53 # details see http://www.openssl.org/~appro/cryptogams/.
54 # ====================================================================
56 # This module implements support for AES instructions as per PowerISA
57 # specification version 2.07, first implemented by POWER8 processor.
58 # The module is endian-agnostic in sense that it supports both big-
59 # and little-endian cases. Data alignment in parallelizable modes is
60 # handled with VSX loads and stores, which implies MSR.VSX flag being
61 # set. It should also be noted that ISA specification doesn't prohibit
62 # alignment exceptions for these instructions on page boundaries.
63 # Initially alignment was handled in pure AltiVec/VMX way [when data
64 # is aligned programmatically, which in turn guarantees exception-
65 # free execution], but it turned to hamper performance when vcipher
66 # instructions are interleaved. It's reckoned that eventual
67 # misalignment penalties at page boundaries are in average lower
68 # than additional overhead in pure AltiVec approach.
72 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73 # systems were measured.
75 ######################################################################
76 # Current large-block performance in cycles per byte processed with
77 # 128-bit key (less is better).
79 # CBC en-/decrypt CTR XTS
80 # POWER8[le] 3.96/0.72 0.74 1.1
81 # POWER8[be] 3.75/0.65 0.66 1.0
85 if ($flavour =~ /64/) {
93 } elsif ($flavour =~ /32/) {
101 } else { die "nonsense $flavour"; }
103 $LITTLE_ENDIAN = ($flavour=~/le$/) ?
$SIZE_T : 0;
105 $0 =~ m/(.*[\/\\])[^\
/\\]+$/; $dir=$1;
106 ( $xlate="${dir}ppc-xlate.pl" and -f
$xlate ) or
107 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f
$xlate) or
108 die "can't locate ppc-xlate.pl";
110 open STDOUT
,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
118 #########################################################################
119 {{{ # Key setup procedures #
120 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
121 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
122 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
131 .long
0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
132 .long
0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
133 .long
0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
138 mflr
$ptr #vvvvv "distance between . and rcon
143 .byte
0,12,0x14,0,0,0,0,0
144 .asciz
"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
146 .globl
.${prefix
}_set_encrypt_key
149 $PUSH r11
,$LRSAVE($sp)
153 beq
- Lenc_key_abort
# if ($inp==0) return -1;
155 beq
- Lenc_key_abort
# if ($out==0) return -1;
173 addi
$inp,$inp,15 # 15 is not typo
174 lvsr
$key,0,r9
# borrow $key
178 le?vspltisb
$mask,0x0f # borrow $mask
180 le?vxor
$key,$key,$mask # adjust for byte swap
183 vperm
$in0,$in0,$in1,$key # align [and byte swap in LE]
185 vxor
$zero,$zero,$zero
188 ?lvsr
$outperm,0,$out
191 ?vperm
$outmask,$zero,$outmask,$outperm
201 vperm
$key,$in0,$in0,$mask # rotate-n-splat
202 vsldoi
$tmp,$zero,$in0,12 # >>32
203 vperm
$outtail,$in0,$in0,$outperm # rotate
204 vsel
$stage,$outhead,$outtail,$outmask
205 vmr
$outhead,$outtail
206 vcipherlast
$key,$key,$rcon
211 vsldoi
$tmp,$zero,$tmp,12 # >>32
213 vsldoi
$tmp,$zero,$tmp,12 # >>32
215 vadduwm
$rcon,$rcon,$rcon
219 lvx
$rcon,0,$ptr # last two round keys
221 vperm
$key,$in0,$in0,$mask # rotate-n-splat
222 vsldoi
$tmp,$zero,$in0,12 # >>32
223 vperm
$outtail,$in0,$in0,$outperm # rotate
224 vsel
$stage,$outhead,$outtail,$outmask
225 vmr
$outhead,$outtail
226 vcipherlast
$key,$key,$rcon
231 vsldoi
$tmp,$zero,$tmp,12 # >>32
233 vsldoi
$tmp,$zero,$tmp,12 # >>32
235 vadduwm
$rcon,$rcon,$rcon
238 vperm
$key,$in0,$in0,$mask # rotate-n-splat
239 vsldoi
$tmp,$zero,$in0,12 # >>32
240 vperm
$outtail,$in0,$in0,$outperm # rotate
241 vsel
$stage,$outhead,$outtail,$outmask
242 vmr
$outhead,$outtail
243 vcipherlast
$key,$key,$rcon
248 vsldoi
$tmp,$zero,$tmp,12 # >>32
250 vsldoi
$tmp,$zero,$tmp,12 # >>32
253 vperm
$outtail,$in0,$in0,$outperm # rotate
254 vsel
$stage,$outhead,$outtail,$outmask
255 vmr
$outhead,$outtail
258 addi
$inp,$out,15 # 15 is not typo
268 vperm
$outtail,$in0,$in0,$outperm # rotate
269 vsel
$stage,$outhead,$outtail,$outmask
270 vmr
$outhead,$outtail
273 vperm
$in1,$in1,$tmp,$key # align [and byte swap in LE]
274 vspltisb
$key,8 # borrow $key
276 vsububm
$mask,$mask,$key # adjust the mask
279 vperm
$key,$in1,$in1,$mask # roate-n-splat
280 vsldoi
$tmp,$zero,$in0,12 # >>32
281 vcipherlast
$key,$key,$rcon
284 vsldoi
$tmp,$zero,$tmp,12 # >>32
286 vsldoi
$tmp,$zero,$tmp,12 # >>32
289 vsldoi
$stage,$zero,$in1,8
292 vsldoi
$in1,$zero,$in1,12 # >>32
293 vadduwm
$rcon,$rcon,$rcon
297 vsldoi
$stage,$stage,$in0,8
299 vperm
$key,$in1,$in1,$mask # rotate-n-splat
300 vsldoi
$tmp,$zero,$in0,12 # >>32
301 vperm
$outtail,$stage,$stage,$outperm # rotate
302 vsel
$stage,$outhead,$outtail,$outmask
303 vmr
$outhead,$outtail
304 vcipherlast
$key,$key,$rcon
308 vsldoi
$stage,$in0,$in1,8
310 vsldoi
$tmp,$zero,$tmp,12 # >>32
311 vperm
$outtail,$stage,$stage,$outperm # rotate
312 vsel
$stage,$outhead,$outtail,$outmask
313 vmr
$outhead,$outtail
315 vsldoi
$tmp,$zero,$tmp,12 # >>32
322 vsldoi
$in1,$zero,$in1,12 # >>32
323 vadduwm
$rcon,$rcon,$rcon
327 vperm
$outtail,$in0,$in0,$outperm # rotate
328 vsel
$stage,$outhead,$outtail,$outmask
329 vmr
$outhead,$outtail
331 addi
$inp,$out,15 # 15 is not typo
344 vperm
$outtail,$in0,$in0,$outperm # rotate
345 vsel
$stage,$outhead,$outtail,$outmask
346 vmr
$outhead,$outtail
349 vperm
$in1,$in1,$tmp,$key # align [and byte swap in LE]
353 vperm
$key,$in1,$in1,$mask # rotate-n-splat
354 vsldoi
$tmp,$zero,$in0,12 # >>32
355 vperm
$outtail,$in1,$in1,$outperm # rotate
356 vsel
$stage,$outhead,$outtail,$outmask
357 vmr
$outhead,$outtail
358 vcipherlast
$key,$key,$rcon
363 vsldoi
$tmp,$zero,$tmp,12 # >>32
365 vsldoi
$tmp,$zero,$tmp,12 # >>32
367 vadduwm
$rcon,$rcon,$rcon
369 vperm
$outtail,$in0,$in0,$outperm # rotate
370 vsel
$stage,$outhead,$outtail,$outmask
371 vmr
$outhead,$outtail
373 addi
$inp,$out,15 # 15 is not typo
377 vspltw
$key,$in0,3 # just splat
378 vsldoi
$tmp,$zero,$in1,12 # >>32
382 vsldoi
$tmp,$zero,$tmp,12 # >>32
384 vsldoi
$tmp,$zero,$tmp,12 # >>32
392 lvx
$in1,0,$inp # redundant in aligned case
393 vsel
$in1,$outhead,$in1,$outmask
403 .byte
0,12,0x14,1,0,0,3,0
405 .size
.${prefix
}_set_encrypt_key
,.-.${prefix
}_set_encrypt_key
407 .globl
.${prefix
}_set_decrypt_key
408 $STU $sp,-$FRAME($sp)
410 $PUSH r10
,$FRAME+$LRSAVE($sp)
418 subi
$inp,$out,240 # first round key
419 srwi
$rounds,$rounds,1
420 add
$out,$inp,$cnt # last round key
444 xor r3
,r3
,r3
# return value
449 .byte
0,12,4,1,0x80,0,3,0
451 .size
.${prefix
}_set_decrypt_key
,.-.${prefix
}_set_decrypt_key
454 #########################################################################
455 {{{ # Single block en- and decrypt procedures #
458 my $n = $dir eq "de" ?
"n" : "";
459 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
462 .globl
.${prefix
}_
${dir
}crypt
463 lwz
$rounds,240($key)
466 li
$idx,15 # 15 is not typo
472 lvsl v2
,0,$inp # inpperm
474 ?lvsl v3
,0,r11
# outperm
477 vperm v0
,v0
,v1
,v2
# align [and byte swap in LE]
479 ?lvsl v5
,0,$key # keyperm
480 srwi
$rounds,$rounds,1
483 subi
$rounds,$rounds,1
484 ?vperm v1
,v1
,v2
,v5
# align round key
506 v
${n
}cipherlast v0
,v0
,v1
510 li
$idx,15 # 15 is not typo
511 ?vperm v2
,v1
,v2
,v3
# outmask
513 lvx v1
,0,$out # outhead
514 vperm v0
,v0
,v0
,v3
# rotate [and byte swap in LE]
524 .byte
0,12,0x14,0,0,0,3,0
526 .size
.${prefix
}_
${dir
}crypt,.-.${prefix
}_
${dir
}crypt
532 #########################################################################
533 {{{ # CBC en- and decrypt procedures #
534 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
535 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
536 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
539 .globl
.${prefix
}_cbc_encrypt
543 cmpwi
$enc,0 # test direction
549 vxor
$rndkey0,$rndkey0,$rndkey0
550 le?vspltisb
$tmp,0x0f
552 lvx
$ivec,0,$ivp # load [unaligned] iv
554 lvx
$inptail,$idx,$ivp
555 le?vxor
$inpperm,$inpperm,$tmp
556 vperm
$ivec,$ivec,$inptail,$inpperm
559 ?lvsl
$keyperm,0,$key # prepare for unaligned key
560 lwz
$rounds,240($key)
562 lvsr
$inpperm,0,r11
# prepare for unaligned load
564 addi
$inp,$inp,15 # 15 is not typo
565 le?vxor
$inpperm,$inpperm,$tmp
567 ?lvsr
$outperm,0,$out # prepare for unaligned store
570 ?vperm
$outmask,$rndkey0,$outmask,$outperm
571 le?vxor
$outperm,$outperm,$tmp
573 srwi
$rounds,$rounds,1
575 subi
$rounds,$rounds,1
583 subi
$len,$len,16 # len-=16
586 vperm
$inout,$inout,$inptail,$inpperm
587 lvx
$rndkey1,$idx,$key
589 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
590 vxor
$inout,$inout,$rndkey0
591 lvx
$rndkey0,$idx,$key
593 vxor
$inout,$inout,$ivec
596 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
597 vcipher
$inout,$inout,$rndkey1
598 lvx
$rndkey1,$idx,$key
600 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
601 vcipher
$inout,$inout,$rndkey0
602 lvx
$rndkey0,$idx,$key
606 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
607 vcipher
$inout,$inout,$rndkey1
608 lvx
$rndkey1,$idx,$key
610 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
611 vcipherlast
$ivec,$inout,$rndkey0
614 vperm
$tmp,$ivec,$ivec,$outperm
615 vsel
$inout,$outhead,$tmp,$outmask
626 bge _aesp8_cbc_decrypt8x
631 subi
$len,$len,16 # len-=16
634 vperm
$tmp,$tmp,$inptail,$inpperm
635 lvx
$rndkey1,$idx,$key
637 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
638 vxor
$inout,$tmp,$rndkey0
639 lvx
$rndkey0,$idx,$key
643 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
644 vncipher
$inout,$inout,$rndkey1
645 lvx
$rndkey1,$idx,$key
647 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
648 vncipher
$inout,$inout,$rndkey0
649 lvx
$rndkey0,$idx,$key
653 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
654 vncipher
$inout,$inout,$rndkey1
655 lvx
$rndkey1,$idx,$key
657 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
658 vncipherlast
$inout,$inout,$rndkey0
661 vxor
$inout,$inout,$ivec
663 vperm
$tmp,$inout,$inout,$outperm
664 vsel
$inout,$outhead,$tmp,$outmask
672 lvx
$inout,0,$out # redundant in aligned case
673 vsel
$inout,$outhead,$inout,$outmask
676 neg
$enc,$ivp # write [unaligned] iv
677 li
$idx,15 # 15 is not typo
678 vxor
$rndkey0,$rndkey0,$rndkey0
680 le?vspltisb
$tmp,0x0f
681 ?lvsl
$outperm,0,$enc
682 ?vperm
$outmask,$rndkey0,$outmask,$outperm
683 le?vxor
$outperm,$outperm,$tmp
685 vperm
$ivec,$ivec,$ivec,$outperm
686 vsel
$inout,$outhead,$ivec,$outmask
687 lvx
$inptail,$idx,$ivp
689 vsel
$inout,$ivec,$inptail,$outmask
690 stvx
$inout,$idx,$ivp
695 .byte
0,12,0x14,0,0,0,6,0
698 #########################################################################
699 {{ # Optimized CBC decrypt procedure #
701 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
702 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
703 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
704 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
705 # v26-v31 last 6 round keys
706 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
710 _aesp8_cbc_decrypt8x
:
711 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
712 li r10
,`$FRAME+8*16+15`
713 li r11
,`$FRAME+8*16+31`
714 stvx v20
,r10
,$sp # ABI says so
737 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
739 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
741 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
743 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
745 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
747 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
749 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
753 subi
$rounds,$rounds,3 # -4 in total
754 subi
$len,$len,128 # bias
756 lvx
$rndkey0,$x00,$key # load key schedule
760 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
761 addi
$key_,$sp,$FRAME+15
765 ?vperm v24
,v30
,v31
,$keyperm
768 stvx v24
,$x00,$key_ # off-load round[1]
769 ?vperm v25
,v31
,v30
,$keyperm
771 stvx v25
,$x10,$key_ # off-load round[2]
772 addi
$key_,$key_,0x20
773 bdnz Load_cbc_dec_key
776 ?vperm v24
,v30
,v31
,$keyperm
778 stvx v24
,$x00,$key_ # off-load round[3]
779 ?vperm v25
,v31
,v26
,$keyperm
781 stvx v25
,$x10,$key_ # off-load round[4]
782 addi
$key_,$sp,$FRAME+15 # rewind $key_
783 ?vperm v26
,v26
,v27
,$keyperm
785 ?vperm v27
,v27
,v28
,$keyperm
787 ?vperm v28
,v28
,v29
,$keyperm
789 ?vperm v29
,v29
,v30
,$keyperm
790 lvx
$out0,$x70,$key # borrow $out0
791 ?vperm v30
,v30
,v31
,$keyperm
792 lvx v24
,$x00,$key_ # pre-load round[1]
793 ?vperm v31
,v31
,$out0,$keyperm
794 lvx v25
,$x10,$key_ # pre-load round[2]
796 #lvx $inptail,0,$inp # "caller" already did this
797 #addi $inp,$inp,15 # 15 is not typo
798 subi
$inp,$inp,15 # undo "caller"
801 lvx_u
$in0,$x00,$inp # load first 8 "words"
802 le?lvsl
$inpperm,0,$idx
803 le?vspltisb
$tmp,0x0f
805 le?vxor
$inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
807 le?vperm
$in0,$in0,$in0,$inpperm
809 le?vperm
$in1,$in1,$in1,$inpperm
811 le?vperm
$in2,$in2,$in2,$inpperm
812 vxor
$out0,$in0,$rndkey0
814 le?vperm
$in3,$in3,$in3,$inpperm
815 vxor
$out1,$in1,$rndkey0
817 le?vperm
$in4,$in4,$in4,$inpperm
818 vxor
$out2,$in2,$rndkey0
821 le?vperm
$in5,$in5,$in5,$inpperm
822 vxor
$out3,$in3,$rndkey0
823 le?vperm
$in6,$in6,$in6,$inpperm
824 vxor
$out4,$in4,$rndkey0
825 le?vperm
$in7,$in7,$in7,$inpperm
826 vxor
$out5,$in5,$rndkey0
827 vxor
$out6,$in6,$rndkey0
828 vxor
$out7,$in7,$rndkey0
834 vncipher
$out0,$out0,v24
835 vncipher
$out1,$out1,v24
836 vncipher
$out2,$out2,v24
837 vncipher
$out3,$out3,v24
838 vncipher
$out4,$out4,v24
839 vncipher
$out5,$out5,v24
840 vncipher
$out6,$out6,v24
841 vncipher
$out7,$out7,v24
842 lvx v24
,$x20,$key_ # round[3]
843 addi
$key_,$key_,0x20
845 vncipher
$out0,$out0,v25
846 vncipher
$out1,$out1,v25
847 vncipher
$out2,$out2,v25
848 vncipher
$out3,$out3,v25
849 vncipher
$out4,$out4,v25
850 vncipher
$out5,$out5,v25
851 vncipher
$out6,$out6,v25
852 vncipher
$out7,$out7,v25
853 lvx v25
,$x10,$key_ # round[4]
856 subic
$len,$len,128 # $len-=128
857 vncipher
$out0,$out0,v24
858 vncipher
$out1,$out1,v24
859 vncipher
$out2,$out2,v24
860 vncipher
$out3,$out3,v24
861 vncipher
$out4,$out4,v24
862 vncipher
$out5,$out5,v24
863 vncipher
$out6,$out6,v24
864 vncipher
$out7,$out7,v24
866 subfe
. r0
,r0
,r0
# borrow?-1:0
867 vncipher
$out0,$out0,v25
868 vncipher
$out1,$out1,v25
869 vncipher
$out2,$out2,v25
870 vncipher
$out3,$out3,v25
871 vncipher
$out4,$out4,v25
872 vncipher
$out5,$out5,v25
873 vncipher
$out6,$out6,v25
874 vncipher
$out7,$out7,v25
877 vncipher
$out0,$out0,v26
878 vncipher
$out1,$out1,v26
879 vncipher
$out2,$out2,v26
880 vncipher
$out3,$out3,v26
881 vncipher
$out4,$out4,v26
882 vncipher
$out5,$out5,v26
883 vncipher
$out6,$out6,v26
884 vncipher
$out7,$out7,v26
886 add
$inp,$inp,r0
# $inp is adjusted in such
887 # way that at exit from the
888 # loop inX-in7 are loaded
890 vncipher
$out0,$out0,v27
891 vncipher
$out1,$out1,v27
892 vncipher
$out2,$out2,v27
893 vncipher
$out3,$out3,v27
894 vncipher
$out4,$out4,v27
895 vncipher
$out5,$out5,v27
896 vncipher
$out6,$out6,v27
897 vncipher
$out7,$out7,v27
899 addi
$key_,$sp,$FRAME+15 # rewind $key_
900 vncipher
$out0,$out0,v28
901 vncipher
$out1,$out1,v28
902 vncipher
$out2,$out2,v28
903 vncipher
$out3,$out3,v28
904 vncipher
$out4,$out4,v28
905 vncipher
$out5,$out5,v28
906 vncipher
$out6,$out6,v28
907 vncipher
$out7,$out7,v28
908 lvx v24
,$x00,$key_ # re-pre-load round[1]
910 vncipher
$out0,$out0,v29
911 vncipher
$out1,$out1,v29
912 vncipher
$out2,$out2,v29
913 vncipher
$out3,$out3,v29
914 vncipher
$out4,$out4,v29
915 vncipher
$out5,$out5,v29
916 vncipher
$out6,$out6,v29
917 vncipher
$out7,$out7,v29
918 lvx v25
,$x10,$key_ # re-pre-load round[2]
920 vncipher
$out0,$out0,v30
921 vxor
$ivec,$ivec,v31
# xor with last round key
922 vncipher
$out1,$out1,v30
924 vncipher
$out2,$out2,v30
926 vncipher
$out3,$out3,v30
928 vncipher
$out4,$out4,v30
930 vncipher
$out5,$out5,v30
932 vncipher
$out6,$out6,v30
934 vncipher
$out7,$out7,v30
937 vncipherlast
$out0,$out0,$ivec
938 vncipherlast
$out1,$out1,$in0
939 lvx_u
$in0,$x00,$inp # load next input block
940 vncipherlast
$out2,$out2,$in1
942 vncipherlast
$out3,$out3,$in2
943 le?vperm
$in0,$in0,$in0,$inpperm
945 vncipherlast
$out4,$out4,$in3
946 le?vperm
$in1,$in1,$in1,$inpperm
948 vncipherlast
$out5,$out5,$in4
949 le?vperm
$in2,$in2,$in2,$inpperm
951 vncipherlast
$out6,$out6,$in5
952 le?vperm
$in3,$in3,$in3,$inpperm
954 vncipherlast
$out7,$out7,$in6
955 le?vperm
$in4,$in4,$in4,$inpperm
958 le?vperm
$in5,$in5,$in5,$inpperm
962 le?vperm
$out0,$out0,$out0,$inpperm
963 le?vperm
$out1,$out1,$out1,$inpperm
964 stvx_u
$out0,$x00,$out
965 le?vperm
$in6,$in6,$in6,$inpperm
966 vxor
$out0,$in0,$rndkey0
967 le?vperm
$out2,$out2,$out2,$inpperm
968 stvx_u
$out1,$x10,$out
969 le?vperm
$in7,$in7,$in7,$inpperm
970 vxor
$out1,$in1,$rndkey0
971 le?vperm
$out3,$out3,$out3,$inpperm
972 stvx_u
$out2,$x20,$out
973 vxor
$out2,$in2,$rndkey0
974 le?vperm
$out4,$out4,$out4,$inpperm
975 stvx_u
$out3,$x30,$out
976 vxor
$out3,$in3,$rndkey0
977 le?vperm
$out5,$out5,$out5,$inpperm
978 stvx_u
$out4,$x40,$out
979 vxor
$out4,$in4,$rndkey0
980 le?vperm
$out6,$out6,$out6,$inpperm
981 stvx_u
$out5,$x50,$out
982 vxor
$out5,$in5,$rndkey0
983 le?vperm
$out7,$out7,$out7,$inpperm
984 stvx_u
$out6,$x60,$out
985 vxor
$out6,$in6,$rndkey0
986 stvx_u
$out7,$x70,$out
988 vxor
$out7,$in7,$rndkey0
991 beq Loop_cbc_dec8x
# did $len-=128 borrow?
998 Loop_cbc_dec8x_tail
: # up to 7 "words" tail...
999 vncipher
$out1,$out1,v24
1000 vncipher
$out2,$out2,v24
1001 vncipher
$out3,$out3,v24
1002 vncipher
$out4,$out4,v24
1003 vncipher
$out5,$out5,v24
1004 vncipher
$out6,$out6,v24
1005 vncipher
$out7,$out7,v24
1006 lvx v24
,$x20,$key_ # round[3]
1007 addi
$key_,$key_,0x20
1009 vncipher
$out1,$out1,v25
1010 vncipher
$out2,$out2,v25
1011 vncipher
$out3,$out3,v25
1012 vncipher
$out4,$out4,v25
1013 vncipher
$out5,$out5,v25
1014 vncipher
$out6,$out6,v25
1015 vncipher
$out7,$out7,v25
1016 lvx v25
,$x10,$key_ # round[4]
1017 bdnz Loop_cbc_dec8x_tail
1019 vncipher
$out1,$out1,v24
1020 vncipher
$out2,$out2,v24
1021 vncipher
$out3,$out3,v24
1022 vncipher
$out4,$out4,v24
1023 vncipher
$out5,$out5,v24
1024 vncipher
$out6,$out6,v24
1025 vncipher
$out7,$out7,v24
1027 vncipher
$out1,$out1,v25
1028 vncipher
$out2,$out2,v25
1029 vncipher
$out3,$out3,v25
1030 vncipher
$out4,$out4,v25
1031 vncipher
$out5,$out5,v25
1032 vncipher
$out6,$out6,v25
1033 vncipher
$out7,$out7,v25
1035 vncipher
$out1,$out1,v26
1036 vncipher
$out2,$out2,v26
1037 vncipher
$out3,$out3,v26
1038 vncipher
$out4,$out4,v26
1039 vncipher
$out5,$out5,v26
1040 vncipher
$out6,$out6,v26
1041 vncipher
$out7,$out7,v26
1043 vncipher
$out1,$out1,v27
1044 vncipher
$out2,$out2,v27
1045 vncipher
$out3,$out3,v27
1046 vncipher
$out4,$out4,v27
1047 vncipher
$out5,$out5,v27
1048 vncipher
$out6,$out6,v27
1049 vncipher
$out7,$out7,v27
1051 vncipher
$out1,$out1,v28
1052 vncipher
$out2,$out2,v28
1053 vncipher
$out3,$out3,v28
1054 vncipher
$out4,$out4,v28
1055 vncipher
$out5,$out5,v28
1056 vncipher
$out6,$out6,v28
1057 vncipher
$out7,$out7,v28
1059 vncipher
$out1,$out1,v29
1060 vncipher
$out2,$out2,v29
1061 vncipher
$out3,$out3,v29
1062 vncipher
$out4,$out4,v29
1063 vncipher
$out5,$out5,v29
1064 vncipher
$out6,$out6,v29
1065 vncipher
$out7,$out7,v29
1067 vncipher
$out1,$out1,v30
1068 vxor
$ivec,$ivec,v31
# last round key
1069 vncipher
$out2,$out2,v30
1071 vncipher
$out3,$out3,v30
1073 vncipher
$out4,$out4,v30
1075 vncipher
$out5,$out5,v30
1077 vncipher
$out6,$out6,v30
1079 vncipher
$out7,$out7,v30
1082 cmplwi
$len,32 # switch($len)
1087 blt Lcbc_dec8x_three
1096 vncipherlast
$out1,$out1,$ivec
1097 vncipherlast
$out2,$out2,$in1
1098 vncipherlast
$out3,$out3,$in2
1099 vncipherlast
$out4,$out4,$in3
1100 vncipherlast
$out5,$out5,$in4
1101 vncipherlast
$out6,$out6,$in5
1102 vncipherlast
$out7,$out7,$in6
1105 le?vperm
$out1,$out1,$out1,$inpperm
1106 le?vperm
$out2,$out2,$out2,$inpperm
1107 stvx_u
$out1,$x00,$out
1108 le?vperm
$out3,$out3,$out3,$inpperm
1109 stvx_u
$out2,$x10,$out
1110 le?vperm
$out4,$out4,$out4,$inpperm
1111 stvx_u
$out3,$x20,$out
1112 le?vperm
$out5,$out5,$out5,$inpperm
1113 stvx_u
$out4,$x30,$out
1114 le?vperm
$out6,$out6,$out6,$inpperm
1115 stvx_u
$out5,$x40,$out
1116 le?vperm
$out7,$out7,$out7,$inpperm
1117 stvx_u
$out6,$x50,$out
1118 stvx_u
$out7,$x60,$out
1124 vncipherlast
$out2,$out2,$ivec
1125 vncipherlast
$out3,$out3,$in2
1126 vncipherlast
$out4,$out4,$in3
1127 vncipherlast
$out5,$out5,$in4
1128 vncipherlast
$out6,$out6,$in5
1129 vncipherlast
$out7,$out7,$in6
1132 le?vperm
$out2,$out2,$out2,$inpperm
1133 le?vperm
$out3,$out3,$out3,$inpperm
1134 stvx_u
$out2,$x00,$out
1135 le?vperm
$out4,$out4,$out4,$inpperm
1136 stvx_u
$out3,$x10,$out
1137 le?vperm
$out5,$out5,$out5,$inpperm
1138 stvx_u
$out4,$x20,$out
1139 le?vperm
$out6,$out6,$out6,$inpperm
1140 stvx_u
$out5,$x30,$out
1141 le?vperm
$out7,$out7,$out7,$inpperm
1142 stvx_u
$out6,$x40,$out
1143 stvx_u
$out7,$x50,$out
1149 vncipherlast
$out3,$out3,$ivec
1150 vncipherlast
$out4,$out4,$in3
1151 vncipherlast
$out5,$out5,$in4
1152 vncipherlast
$out6,$out6,$in5
1153 vncipherlast
$out7,$out7,$in6
1156 le?vperm
$out3,$out3,$out3,$inpperm
1157 le?vperm
$out4,$out4,$out4,$inpperm
1158 stvx_u
$out3,$x00,$out
1159 le?vperm
$out5,$out5,$out5,$inpperm
1160 stvx_u
$out4,$x10,$out
1161 le?vperm
$out6,$out6,$out6,$inpperm
1162 stvx_u
$out5,$x20,$out
1163 le?vperm
$out7,$out7,$out7,$inpperm
1164 stvx_u
$out6,$x30,$out
1165 stvx_u
$out7,$x40,$out
1171 vncipherlast
$out4,$out4,$ivec
1172 vncipherlast
$out5,$out5,$in4
1173 vncipherlast
$out6,$out6,$in5
1174 vncipherlast
$out7,$out7,$in6
1177 le?vperm
$out4,$out4,$out4,$inpperm
1178 le?vperm
$out5,$out5,$out5,$inpperm
1179 stvx_u
$out4,$x00,$out
1180 le?vperm
$out6,$out6,$out6,$inpperm
1181 stvx_u
$out5,$x10,$out
1182 le?vperm
$out7,$out7,$out7,$inpperm
1183 stvx_u
$out6,$x20,$out
1184 stvx_u
$out7,$x30,$out
1190 vncipherlast
$out5,$out5,$ivec
1191 vncipherlast
$out6,$out6,$in5
1192 vncipherlast
$out7,$out7,$in6
1195 le?vperm
$out5,$out5,$out5,$inpperm
1196 le?vperm
$out6,$out6,$out6,$inpperm
1197 stvx_u
$out5,$x00,$out
1198 le?vperm
$out7,$out7,$out7,$inpperm
1199 stvx_u
$out6,$x10,$out
1200 stvx_u
$out7,$x20,$out
1206 vncipherlast
$out6,$out6,$ivec
1207 vncipherlast
$out7,$out7,$in6
1210 le?vperm
$out6,$out6,$out6,$inpperm
1211 le?vperm
$out7,$out7,$out7,$inpperm
1212 stvx_u
$out6,$x00,$out
1213 stvx_u
$out7,$x10,$out
1219 vncipherlast
$out7,$out7,$ivec
1222 le?vperm
$out7,$out7,$out7,$inpperm
1227 le?vperm
$ivec,$ivec,$ivec,$inpperm
1228 stvx_u
$ivec,0,$ivp # write [unaligned] iv
1232 stvx
$inpperm,r10
,$sp # wipe copies of round keys
1234 stvx
$inpperm,r11
,$sp
1236 stvx
$inpperm,r10
,$sp
1238 stvx
$inpperm,r11
,$sp
1240 stvx
$inpperm,r10
,$sp
1242 stvx
$inpperm,r11
,$sp
1244 stvx
$inpperm,r10
,$sp
1246 stvx
$inpperm,r11
,$sp
1250 lvx v20
,r10
,$sp # ABI says so
1272 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1273 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1274 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1275 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1276 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1277 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1278 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1281 .byte
0,12,0x14,0,0x80,6,6,0
1283 .size
.${prefix
}_cbc_encrypt
,.-.${prefix
}_cbc_encrypt
1287 #########################################################################
1288 {{{ # CTR procedure[s] #
1289 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1290 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1291 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1296 .globl
.${prefix
}_ctr32_encrypt_blocks
1305 vxor
$rndkey0,$rndkey0,$rndkey0
1306 le?vspltisb
$tmp,0x0f
1308 lvx
$ivec,0,$ivp # load [unaligned] iv
1309 lvsl
$inpperm,0,$ivp
1310 lvx
$inptail,$idx,$ivp
1312 le?vxor
$inpperm,$inpperm,$tmp
1313 vperm
$ivec,$ivec,$inptail,$inpperm
1314 vsldoi
$one,$rndkey0,$one,1
1317 ?lvsl
$keyperm,0,$key # prepare for unaligned key
1318 lwz
$rounds,240($key)
1320 lvsr
$inpperm,0,r11
# prepare for unaligned load
1322 addi
$inp,$inp,15 # 15 is not typo
1323 le?vxor
$inpperm,$inpperm,$tmp
1325 srwi
$rounds,$rounds,1
1327 subi
$rounds,$rounds,1
1330 bge _aesp8_ctr32_encrypt8x
1332 ?lvsr
$outperm,0,$out # prepare for unaligned store
1333 vspltisb
$outmask,-1
1335 ?vperm
$outmask,$rndkey0,$outmask,$outperm
1336 le?vxor
$outperm,$outperm,$tmp
1340 lvx
$rndkey1,$idx,$key
1342 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1343 vxor
$inout,$ivec,$rndkey0
1344 lvx
$rndkey0,$idx,$key
1350 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
1351 vcipher
$inout,$inout,$rndkey1
1352 lvx
$rndkey1,$idx,$key
1354 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1355 vcipher
$inout,$inout,$rndkey0
1356 lvx
$rndkey0,$idx,$key
1360 vadduqm
$ivec,$ivec,$one
1364 subic
. $len,$len,1 # blocks--
1366 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
1367 vcipher
$inout,$inout,$rndkey1
1368 lvx
$rndkey1,$idx,$key
1369 vperm
$dat,$dat,$inptail,$inpperm
1371 ?vperm
$rndkey1,$rndkey0,$rndkey1,$keyperm
1373 vxor
$dat,$dat,$rndkey1 # last round key
1374 vcipherlast
$inout,$inout,$dat
1376 lvx
$rndkey1,$idx,$key
1378 vperm
$inout,$inout,$inout,$outperm
1379 vsel
$dat,$outhead,$inout,$outmask
1381 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1383 vxor
$inout,$ivec,$rndkey0
1384 lvx
$rndkey0,$idx,$key
1391 lvx
$inout,0,$out # redundant in aligned case
1392 vsel
$inout,$outhead,$inout,$outmask
1398 .byte
0,12,0x14,0,0,0,6,0
1401 #########################################################################
1402 {{ # Optimized CTR procedure #
1404 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1405 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1406 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1407 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1408 # v26-v31 last 6 round keys
1409 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1410 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1414 _aesp8_ctr32_encrypt8x
:
1415 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1416 li r10
,`$FRAME+8*16+15`
1417 li r11
,`$FRAME+8*16+31`
1418 stvx v20
,r10
,$sp # ABI says so
1441 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1443 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1445 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1447 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1449 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1451 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1453 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1457 subi
$rounds,$rounds,3 # -4 in total
1459 lvx
$rndkey0,$x00,$key # load key schedule
1463 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
1464 addi
$key_,$sp,$FRAME+15
1468 ?vperm v24
,v30
,v31
,$keyperm
1471 stvx v24
,$x00,$key_ # off-load round[1]
1472 ?vperm v25
,v31
,v30
,$keyperm
1474 stvx v25
,$x10,$key_ # off-load round[2]
1475 addi
$key_,$key_,0x20
1476 bdnz Load_ctr32_enc_key
1479 ?vperm v24
,v30
,v31
,$keyperm
1481 stvx v24
,$x00,$key_ # off-load round[3]
1482 ?vperm v25
,v31
,v26
,$keyperm
1484 stvx v25
,$x10,$key_ # off-load round[4]
1485 addi
$key_,$sp,$FRAME+15 # rewind $key_
1486 ?vperm v26
,v26
,v27
,$keyperm
1488 ?vperm v27
,v27
,v28
,$keyperm
1490 ?vperm v28
,v28
,v29
,$keyperm
1492 ?vperm v29
,v29
,v30
,$keyperm
1493 lvx
$out0,$x70,$key # borrow $out0
1494 ?vperm v30
,v30
,v31
,$keyperm
1495 lvx v24
,$x00,$key_ # pre-load round[1]
1496 ?vperm v31
,v31
,$out0,$keyperm
1497 lvx v25
,$x10,$key_ # pre-load round[2]
1499 vadduqm
$two,$one,$one
1500 subi
$inp,$inp,15 # undo "caller"
1503 vadduqm
$out1,$ivec,$one # counter values ...
1504 vadduqm
$out2,$ivec,$two
1505 vxor
$out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1507 vadduqm
$out3,$out1,$two
1508 vxor
$out1,$out1,$rndkey0
1509 le?lvsl
$inpperm,0,$idx
1510 vadduqm
$out4,$out2,$two
1511 vxor
$out2,$out2,$rndkey0
1512 le?vspltisb
$tmp,0x0f
1513 vadduqm
$out5,$out3,$two
1514 vxor
$out3,$out3,$rndkey0
1515 le?vxor
$inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1516 vadduqm
$out6,$out4,$two
1517 vxor
$out4,$out4,$rndkey0
1518 vadduqm
$out7,$out5,$two
1519 vxor
$out5,$out5,$rndkey0
1520 vadduqm
$ivec,$out6,$two # next counter value
1521 vxor
$out6,$out6,$rndkey0
1522 vxor
$out7,$out7,$rndkey0
1528 vcipher
$out0,$out0,v24
1529 vcipher
$out1,$out1,v24
1530 vcipher
$out2,$out2,v24
1531 vcipher
$out3,$out3,v24
1532 vcipher
$out4,$out4,v24
1533 vcipher
$out5,$out5,v24
1534 vcipher
$out6,$out6,v24
1535 vcipher
$out7,$out7,v24
1536 Loop_ctr32_enc8x_middle
:
1537 lvx v24
,$x20,$key_ # round[3]
1538 addi
$key_,$key_,0x20
1540 vcipher
$out0,$out0,v25
1541 vcipher
$out1,$out1,v25
1542 vcipher
$out2,$out2,v25
1543 vcipher
$out3,$out3,v25
1544 vcipher
$out4,$out4,v25
1545 vcipher
$out5,$out5,v25
1546 vcipher
$out6,$out6,v25
1547 vcipher
$out7,$out7,v25
1548 lvx v25
,$x10,$key_ # round[4]
1549 bdnz Loop_ctr32_enc8x
1551 subic r11
,$len,256 # $len-256, borrow $key_
1552 vcipher
$out0,$out0,v24
1553 vcipher
$out1,$out1,v24
1554 vcipher
$out2,$out2,v24
1555 vcipher
$out3,$out3,v24
1556 vcipher
$out4,$out4,v24
1557 vcipher
$out5,$out5,v24
1558 vcipher
$out6,$out6,v24
1559 vcipher
$out7,$out7,v24
1561 subfe r0
,r0
,r0
# borrow?-1:0
1562 vcipher
$out0,$out0,v25
1563 vcipher
$out1,$out1,v25
1564 vcipher
$out2,$out2,v25
1565 vcipher
$out3,$out3,v25
1566 vcipher
$out4,$out4,v25
1567 vcipher
$out5,$out5,v25
1568 vcipher
$out6,$out6,v25
1569 vcipher
$out7,$out7,v25
1572 addi
$key_,$sp,$FRAME+15 # rewind $key_
1573 vcipher
$out0,$out0,v26
1574 vcipher
$out1,$out1,v26
1575 vcipher
$out2,$out2,v26
1576 vcipher
$out3,$out3,v26
1577 vcipher
$out4,$out4,v26
1578 vcipher
$out5,$out5,v26
1579 vcipher
$out6,$out6,v26
1580 vcipher
$out7,$out7,v26
1581 lvx v24
,$x00,$key_ # re-pre-load round[1]
1583 subic
$len,$len,129 # $len-=129
1584 vcipher
$out0,$out0,v27
1585 addi
$len,$len,1 # $len-=128 really
1586 vcipher
$out1,$out1,v27
1587 vcipher
$out2,$out2,v27
1588 vcipher
$out3,$out3,v27
1589 vcipher
$out4,$out4,v27
1590 vcipher
$out5,$out5,v27
1591 vcipher
$out6,$out6,v27
1592 vcipher
$out7,$out7,v27
1593 lvx v25
,$x10,$key_ # re-pre-load round[2]
1595 vcipher
$out0,$out0,v28
1596 lvx_u
$in0,$x00,$inp # load input
1597 vcipher
$out1,$out1,v28
1598 lvx_u
$in1,$x10,$inp
1599 vcipher
$out2,$out2,v28
1600 lvx_u
$in2,$x20,$inp
1601 vcipher
$out3,$out3,v28
1602 lvx_u
$in3,$x30,$inp
1603 vcipher
$out4,$out4,v28
1604 lvx_u
$in4,$x40,$inp
1605 vcipher
$out5,$out5,v28
1606 lvx_u
$in5,$x50,$inp
1607 vcipher
$out6,$out6,v28
1608 lvx_u
$in6,$x60,$inp
1609 vcipher
$out7,$out7,v28
1610 lvx_u
$in7,$x70,$inp
1613 vcipher
$out0,$out0,v29
1614 le?vperm
$in0,$in0,$in0,$inpperm
1615 vcipher
$out1,$out1,v29
1616 le?vperm
$in1,$in1,$in1,$inpperm
1617 vcipher
$out2,$out2,v29
1618 le?vperm
$in2,$in2,$in2,$inpperm
1619 vcipher
$out3,$out3,v29
1620 le?vperm
$in3,$in3,$in3,$inpperm
1621 vcipher
$out4,$out4,v29
1622 le?vperm
$in4,$in4,$in4,$inpperm
1623 vcipher
$out5,$out5,v29
1624 le?vperm
$in5,$in5,$in5,$inpperm
1625 vcipher
$out6,$out6,v29
1626 le?vperm
$in6,$in6,$in6,$inpperm
1627 vcipher
$out7,$out7,v29
1628 le?vperm
$in7,$in7,$in7,$inpperm
1630 add
$inp,$inp,r0
# $inp is adjusted in such
1631 # way that at exit from the
1632 # loop inX-in7 are loaded
1634 subfe
. r0
,r0
,r0
# borrow?-1:0
1635 vcipher
$out0,$out0,v30
1636 vxor
$in0,$in0,v31
# xor with last round key
1637 vcipher
$out1,$out1,v30
1639 vcipher
$out2,$out2,v30
1641 vcipher
$out3,$out3,v30
1643 vcipher
$out4,$out4,v30
1645 vcipher
$out5,$out5,v30
1647 vcipher
$out6,$out6,v30
1649 vcipher
$out7,$out7,v30
1652 bne Lctr32_enc8x_break
# did $len-129 borrow?
1654 vcipherlast
$in0,$out0,$in0
1655 vcipherlast
$in1,$out1,$in1
1656 vadduqm
$out1,$ivec,$one # counter values ...
1657 vcipherlast
$in2,$out2,$in2
1658 vadduqm
$out2,$ivec,$two
1659 vxor
$out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1660 vcipherlast
$in3,$out3,$in3
1661 vadduqm
$out3,$out1,$two
1662 vxor
$out1,$out1,$rndkey0
1663 vcipherlast
$in4,$out4,$in4
1664 vadduqm
$out4,$out2,$two
1665 vxor
$out2,$out2,$rndkey0
1666 vcipherlast
$in5,$out5,$in5
1667 vadduqm
$out5,$out3,$two
1668 vxor
$out3,$out3,$rndkey0
1669 vcipherlast
$in6,$out6,$in6
1670 vadduqm
$out6,$out4,$two
1671 vxor
$out4,$out4,$rndkey0
1672 vcipherlast
$in7,$out7,$in7
1673 vadduqm
$out7,$out5,$two
1674 vxor
$out5,$out5,$rndkey0
1675 le?vperm
$in0,$in0,$in0,$inpperm
1676 vadduqm
$ivec,$out6,$two # next counter value
1677 vxor
$out6,$out6,$rndkey0
1678 le?vperm
$in1,$in1,$in1,$inpperm
1679 vxor
$out7,$out7,$rndkey0
1682 vcipher
$out0,$out0,v24
1683 stvx_u
$in0,$x00,$out
1684 le?vperm
$in2,$in2,$in2,$inpperm
1685 vcipher
$out1,$out1,v24
1686 stvx_u
$in1,$x10,$out
1687 le?vperm
$in3,$in3,$in3,$inpperm
1688 vcipher
$out2,$out2,v24
1689 stvx_u
$in2,$x20,$out
1690 le?vperm
$in4,$in4,$in4,$inpperm
1691 vcipher
$out3,$out3,v24
1692 stvx_u
$in3,$x30,$out
1693 le?vperm
$in5,$in5,$in5,$inpperm
1694 vcipher
$out4,$out4,v24
1695 stvx_u
$in4,$x40,$out
1696 le?vperm
$in6,$in6,$in6,$inpperm
1697 vcipher
$out5,$out5,v24
1698 stvx_u
$in5,$x50,$out
1699 le?vperm
$in7,$in7,$in7,$inpperm
1700 vcipher
$out6,$out6,v24
1701 stvx_u
$in6,$x60,$out
1702 vcipher
$out7,$out7,v24
1703 stvx_u
$in7,$x70,$out
1706 b Loop_ctr32_enc8x_middle
1711 blt Lctr32_enc8x_one
1713 beq Lctr32_enc8x_two
1715 blt Lctr32_enc8x_three
1717 beq Lctr32_enc8x_four
1719 blt Lctr32_enc8x_five
1721 beq Lctr32_enc8x_six
1723 blt Lctr32_enc8x_seven
1726 vcipherlast
$out0,$out0,$in0
1727 vcipherlast
$out1,$out1,$in1
1728 vcipherlast
$out2,$out2,$in2
1729 vcipherlast
$out3,$out3,$in3
1730 vcipherlast
$out4,$out4,$in4
1731 vcipherlast
$out5,$out5,$in5
1732 vcipherlast
$out6,$out6,$in6
1733 vcipherlast
$out7,$out7,$in7
1735 le?vperm
$out0,$out0,$out0,$inpperm
1736 le?vperm
$out1,$out1,$out1,$inpperm
1737 stvx_u
$out0,$x00,$out
1738 le?vperm
$out2,$out2,$out2,$inpperm
1739 stvx_u
$out1,$x10,$out
1740 le?vperm
$out3,$out3,$out3,$inpperm
1741 stvx_u
$out2,$x20,$out
1742 le?vperm
$out4,$out4,$out4,$inpperm
1743 stvx_u
$out3,$x30,$out
1744 le?vperm
$out5,$out5,$out5,$inpperm
1745 stvx_u
$out4,$x40,$out
1746 le?vperm
$out6,$out6,$out6,$inpperm
1747 stvx_u
$out5,$x50,$out
1748 le?vperm
$out7,$out7,$out7,$inpperm
1749 stvx_u
$out6,$x60,$out
1750 stvx_u
$out7,$x70,$out
1756 vcipherlast
$out0,$out0,$in1
1757 vcipherlast
$out1,$out1,$in2
1758 vcipherlast
$out2,$out2,$in3
1759 vcipherlast
$out3,$out3,$in4
1760 vcipherlast
$out4,$out4,$in5
1761 vcipherlast
$out5,$out5,$in6
1762 vcipherlast
$out6,$out6,$in7
1764 le?vperm
$out0,$out0,$out0,$inpperm
1765 le?vperm
$out1,$out1,$out1,$inpperm
1766 stvx_u
$out0,$x00,$out
1767 le?vperm
$out2,$out2,$out2,$inpperm
1768 stvx_u
$out1,$x10,$out
1769 le?vperm
$out3,$out3,$out3,$inpperm
1770 stvx_u
$out2,$x20,$out
1771 le?vperm
$out4,$out4,$out4,$inpperm
1772 stvx_u
$out3,$x30,$out
1773 le?vperm
$out5,$out5,$out5,$inpperm
1774 stvx_u
$out4,$x40,$out
1775 le?vperm
$out6,$out6,$out6,$inpperm
1776 stvx_u
$out5,$x50,$out
1777 stvx_u
$out6,$x60,$out
1783 vcipherlast
$out0,$out0,$in2
1784 vcipherlast
$out1,$out1,$in3
1785 vcipherlast
$out2,$out2,$in4
1786 vcipherlast
$out3,$out3,$in5
1787 vcipherlast
$out4,$out4,$in6
1788 vcipherlast
$out5,$out5,$in7
1790 le?vperm
$out0,$out0,$out0,$inpperm
1791 le?vperm
$out1,$out1,$out1,$inpperm
1792 stvx_u
$out0,$x00,$out
1793 le?vperm
$out2,$out2,$out2,$inpperm
1794 stvx_u
$out1,$x10,$out
1795 le?vperm
$out3,$out3,$out3,$inpperm
1796 stvx_u
$out2,$x20,$out
1797 le?vperm
$out4,$out4,$out4,$inpperm
1798 stvx_u
$out3,$x30,$out
1799 le?vperm
$out5,$out5,$out5,$inpperm
1800 stvx_u
$out4,$x40,$out
1801 stvx_u
$out5,$x50,$out
1807 vcipherlast
$out0,$out0,$in3
1808 vcipherlast
$out1,$out1,$in4
1809 vcipherlast
$out2,$out2,$in5
1810 vcipherlast
$out3,$out3,$in6
1811 vcipherlast
$out4,$out4,$in7
1813 le?vperm
$out0,$out0,$out0,$inpperm
1814 le?vperm
$out1,$out1,$out1,$inpperm
1815 stvx_u
$out0,$x00,$out
1816 le?vperm
$out2,$out2,$out2,$inpperm
1817 stvx_u
$out1,$x10,$out
1818 le?vperm
$out3,$out3,$out3,$inpperm
1819 stvx_u
$out2,$x20,$out
1820 le?vperm
$out4,$out4,$out4,$inpperm
1821 stvx_u
$out3,$x30,$out
1822 stvx_u
$out4,$x40,$out
1828 vcipherlast
$out0,$out0,$in4
1829 vcipherlast
$out1,$out1,$in5
1830 vcipherlast
$out2,$out2,$in6
1831 vcipherlast
$out3,$out3,$in7
1833 le?vperm
$out0,$out0,$out0,$inpperm
1834 le?vperm
$out1,$out1,$out1,$inpperm
1835 stvx_u
$out0,$x00,$out
1836 le?vperm
$out2,$out2,$out2,$inpperm
1837 stvx_u
$out1,$x10,$out
1838 le?vperm
$out3,$out3,$out3,$inpperm
1839 stvx_u
$out2,$x20,$out
1840 stvx_u
$out3,$x30,$out
1846 vcipherlast
$out0,$out0,$in5
1847 vcipherlast
$out1,$out1,$in6
1848 vcipherlast
$out2,$out2,$in7
1850 le?vperm
$out0,$out0,$out0,$inpperm
1851 le?vperm
$out1,$out1,$out1,$inpperm
1852 stvx_u
$out0,$x00,$out
1853 le?vperm
$out2,$out2,$out2,$inpperm
1854 stvx_u
$out1,$x10,$out
1855 stvx_u
$out2,$x20,$out
1861 vcipherlast
$out0,$out0,$in6
1862 vcipherlast
$out1,$out1,$in7
1864 le?vperm
$out0,$out0,$out0,$inpperm
1865 le?vperm
$out1,$out1,$out1,$inpperm
1866 stvx_u
$out0,$x00,$out
1867 stvx_u
$out1,$x10,$out
1873 vcipherlast
$out0,$out0,$in7
1875 le?vperm
$out0,$out0,$out0,$inpperm
1882 stvx
$inpperm,r10
,$sp # wipe copies of round keys
1884 stvx
$inpperm,r11
,$sp
1886 stvx
$inpperm,r10
,$sp
1888 stvx
$inpperm,r11
,$sp
1890 stvx
$inpperm,r10
,$sp
1892 stvx
$inpperm,r11
,$sp
1894 stvx
$inpperm,r10
,$sp
1896 stvx
$inpperm,r11
,$sp
1900 lvx v20
,r10
,$sp # ABI says so
1922 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1923 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1924 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1925 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1926 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1927 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1928 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1931 .byte
0,12,0x14,0,0x80,6,6,0
1933 .size
.${prefix
}_ctr32_encrypt_blocks
,.-.${prefix
}_ctr32_encrypt_blocks
1937 #########################################################################
1938 {{{ # XTS procedures #
1939 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1940 # const AES_KEY *key1, const AES_KEY *key2, #
1941 # [const] unsigned char iv[16]); #
1942 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1943 # input tweak value is assumed to be encrypted already, and last tweak #
1944 # value, one suitable for consecutive call on same chunk of data, is #
1945 # written back to original buffer. In addition, in "tweak chaining" #
1946 # mode only complete input blocks are processed. #
1948 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1949 my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1950 my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1951 my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1952 my $taillen = $key2;
1954 ($inp,$idx) = ($idx,$inp); # reassign
1957 .globl
.${prefix
}_xts_encrypt
1958 mr
$inp,r3
# reassign
1964 mfspr r12
,256 # save vrsave
1968 vspltisb
$seven,0x07 # 0x070707..07
1969 le?lvsl
$leperm,r11
,r11
1970 le?vspltisb
$tmp,0x0f
1971 le?vxor
$leperm,$leperm,$seven
1974 lvx
$tweak,0,$ivp # load [unaligned] iv
1975 lvsl
$inpperm,0,$ivp
1976 lvx
$inptail,$idx,$ivp
1977 le?vxor
$inpperm,$inpperm,$tmp
1978 vperm
$tweak,$tweak,$inptail,$inpperm
1981 lvsr
$inpperm,0,r11
# prepare for unaligned load
1983 addi
$inp,$inp,15 # 15 is not typo
1984 le?vxor
$inpperm,$inpperm,$tmp
1986 ${UCMP
}i
$key2,0 # key2==NULL?
1987 beq Lxts_enc_no_key2
1989 ?lvsl
$keyperm,0,$key2 # prepare for unaligned key
1990 lwz
$rounds,240($key2)
1991 srwi
$rounds,$rounds,1
1992 subi
$rounds,$rounds,1
1995 lvx
$rndkey0,0,$key2
1996 lvx
$rndkey1,$idx,$key2
1998 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1999 vxor
$tweak,$tweak,$rndkey0
2000 lvx
$rndkey0,$idx,$key2
2005 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2006 vcipher
$tweak,$tweak,$rndkey1
2007 lvx
$rndkey1,$idx,$key2
2009 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2010 vcipher
$tweak,$tweak,$rndkey0
2011 lvx
$rndkey0,$idx,$key2
2015 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2016 vcipher
$tweak,$tweak,$rndkey1
2017 lvx
$rndkey1,$idx,$key2
2018 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2019 vcipherlast
$tweak,$tweak,$rndkey0
2021 li
$ivp,0 # don't chain the tweak
2026 and $len,$len,$idx # in "tweak chaining"
2027 # mode only complete
2028 # blocks are processed
2033 ?lvsl
$keyperm,0,$key1 # prepare for unaligned key
2034 lwz
$rounds,240($key1)
2035 srwi
$rounds,$rounds,1
2036 subi
$rounds,$rounds,1
2039 vslb
$eighty7,$seven,$seven # 0x808080..80
2040 vor
$eighty7,$eighty7,$seven # 0x878787..87
2041 vspltisb
$tmp,1 # 0x010101..01
2042 vsldoi
$eighty7,$eighty7,$tmp,15 # 0x870101..01
2045 bge _aesp8_xts_encrypt6x
2047 andi
. $taillen,$len,15
2049 subi
$taillen,$taillen,16
2054 lvx
$rndkey0,0,$key1
2055 lvx
$rndkey1,$idx,$key1
2057 vperm
$inout,$inout,$inptail,$inpperm
2058 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2059 vxor
$inout,$inout,$tweak
2060 vxor
$inout,$inout,$rndkey0
2061 lvx
$rndkey0,$idx,$key1
2068 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2069 vcipher
$inout,$inout,$rndkey1
2070 lvx
$rndkey1,$idx,$key1
2072 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2073 vcipher
$inout,$inout,$rndkey0
2074 lvx
$rndkey0,$idx,$key1
2078 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2079 vcipher
$inout,$inout,$rndkey1
2080 lvx
$rndkey1,$idx,$key1
2082 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2083 vxor
$rndkey0,$rndkey0,$tweak
2084 vcipherlast
$output,$inout,$rndkey0
2086 le?vperm
$tmp,$output,$output,$leperm
2088 le?stvx_u
$tmp,0,$out
2089 be?stvx_u
$output,0,$out
2098 lvx
$rndkey0,0,$key1
2099 lvx
$rndkey1,$idx,$key1
2107 vsrab
$tmp,$tweak,$seven # next tweak value
2108 vaddubm
$tweak,$tweak,$tweak
2109 vsldoi
$tmp,$tmp,$tmp,15
2110 vand
$tmp,$tmp,$eighty7
2111 vxor
$tweak,$tweak,$tmp
2113 vperm
$inout,$inout,$inptail,$inpperm
2114 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2115 vxor
$inout,$inout,$tweak
2116 vxor
$output,$output,$rndkey0 # just in case $len<16
2117 vxor
$inout,$inout,$rndkey0
2118 lvx
$rndkey0,$idx,$key1
2125 vxor
$output,$output,$tweak
2126 lvsr
$inpperm,0,$len # $inpperm is no longer needed
2127 vxor
$inptail,$inptail,$inptail # $inptail is no longer needed
2129 vperm
$inptail,$inptail,$tmp,$inpperm
2130 vsel
$inout,$inout,$output,$inptail
2139 bdnz Loop_xts_enc_steal
2142 b Loop_xts_enc
# one more time...
2148 vsrab
$tmp,$tweak,$seven # next tweak value
2149 vaddubm
$tweak,$tweak,$tweak
2150 vsldoi
$tmp,$tmp,$tmp,15
2151 vand
$tmp,$tmp,$eighty7
2152 vxor
$tweak,$tweak,$tmp
2154 le?vperm
$tweak,$tweak,$tweak,$leperm
2155 stvx_u
$tweak,0,$ivp
2158 mtspr
256,r12
# restore vrsave
2162 .byte
0,12,0x04,0,0x80,6,6,0
2164 .size
.${prefix
}_xts_encrypt
,.-.${prefix
}_xts_encrypt
2166 .globl
.${prefix
}_xts_decrypt
2167 mr
$inp,r3
# reassign
2173 mfspr r12
,256 # save vrsave
2182 vspltisb
$seven,0x07 # 0x070707..07
2183 le?lvsl
$leperm,r11
,r11
2184 le?vspltisb
$tmp,0x0f
2185 le?vxor
$leperm,$leperm,$seven
2188 lvx
$tweak,0,$ivp # load [unaligned] iv
2189 lvsl
$inpperm,0,$ivp
2190 lvx
$inptail,$idx,$ivp
2191 le?vxor
$inpperm,$inpperm,$tmp
2192 vperm
$tweak,$tweak,$inptail,$inpperm
2195 lvsr
$inpperm,0,r11
# prepare for unaligned load
2197 addi
$inp,$inp,15 # 15 is not typo
2198 le?vxor
$inpperm,$inpperm,$tmp
2200 ${UCMP
}i
$key2,0 # key2==NULL?
2201 beq Lxts_dec_no_key2
2203 ?lvsl
$keyperm,0,$key2 # prepare for unaligned key
2204 lwz
$rounds,240($key2)
2205 srwi
$rounds,$rounds,1
2206 subi
$rounds,$rounds,1
2209 lvx
$rndkey0,0,$key2
2210 lvx
$rndkey1,$idx,$key2
2212 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2213 vxor
$tweak,$tweak,$rndkey0
2214 lvx
$rndkey0,$idx,$key2
2219 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2220 vcipher
$tweak,$tweak,$rndkey1
2221 lvx
$rndkey1,$idx,$key2
2223 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2224 vcipher
$tweak,$tweak,$rndkey0
2225 lvx
$rndkey0,$idx,$key2
2229 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2230 vcipher
$tweak,$tweak,$rndkey1
2231 lvx
$rndkey1,$idx,$key2
2232 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2233 vcipherlast
$tweak,$tweak,$rndkey0
2235 li
$ivp,0 # don't chain the tweak
2241 add
$len,$len,$idx # in "tweak chaining"
2242 # mode only complete
2243 # blocks are processed
2248 ?lvsl
$keyperm,0,$key1 # prepare for unaligned key
2249 lwz
$rounds,240($key1)
2250 srwi
$rounds,$rounds,1
2251 subi
$rounds,$rounds,1
2254 vslb
$eighty7,$seven,$seven # 0x808080..80
2255 vor
$eighty7,$eighty7,$seven # 0x878787..87
2256 vspltisb
$tmp,1 # 0x010101..01
2257 vsldoi
$eighty7,$eighty7,$tmp,15 # 0x870101..01
2260 bge _aesp8_xts_decrypt6x
2262 lvx
$rndkey0,0,$key1
2263 lvx
$rndkey1,$idx,$key1
2265 vperm
$inout,$inout,$inptail,$inpperm
2266 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2267 vxor
$inout,$inout,$tweak
2268 vxor
$inout,$inout,$rndkey0
2269 lvx
$rndkey0,$idx,$key1
2279 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2280 vncipher
$inout,$inout,$rndkey1
2281 lvx
$rndkey1,$idx,$key1
2283 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2284 vncipher
$inout,$inout,$rndkey0
2285 lvx
$rndkey0,$idx,$key1
2289 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2290 vncipher
$inout,$inout,$rndkey1
2291 lvx
$rndkey1,$idx,$key1
2293 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2294 vxor
$rndkey0,$rndkey0,$tweak
2295 vncipherlast
$output,$inout,$rndkey0
2297 le?vperm
$tmp,$output,$output,$leperm
2299 le?stvx_u
$tmp,0,$out
2300 be?stvx_u
$output,0,$out
2309 lvx
$rndkey0,0,$key1
2310 lvx
$rndkey1,$idx,$key1
2313 vsrab
$tmp,$tweak,$seven # next tweak value
2314 vaddubm
$tweak,$tweak,$tweak
2315 vsldoi
$tmp,$tmp,$tmp,15
2316 vand
$tmp,$tmp,$eighty7
2317 vxor
$tweak,$tweak,$tmp
2319 vperm
$inout,$inout,$inptail,$inpperm
2320 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2321 vxor
$inout,$inout,$tweak
2322 vxor
$inout,$inout,$rndkey0
2323 lvx
$rndkey0,$idx,$key1
2331 vsrab
$tmp,$tweak,$seven # next tweak value
2332 vaddubm
$tweak1,$tweak,$tweak
2333 vsldoi
$tmp,$tmp,$tmp,15
2334 vand
$tmp,$tmp,$eighty7
2335 vxor
$tweak1,$tweak1,$tmp
2340 vxor
$inout,$inout,$tweak # :-(
2341 vxor
$inout,$inout,$tweak1 # :-)
2344 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2345 vncipher
$inout,$inout,$rndkey1
2346 lvx
$rndkey1,$idx,$key1
2348 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2349 vncipher
$inout,$inout,$rndkey0
2350 lvx
$rndkey0,$idx,$key1
2352 bdnz Loop_xts_dec_short
2354 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2355 vncipher
$inout,$inout,$rndkey1
2356 lvx
$rndkey1,$idx,$key1
2358 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2359 vxor
$rndkey0,$rndkey0,$tweak1
2360 vncipherlast
$output,$inout,$rndkey0
2362 le?vperm
$tmp,$output,$output,$leperm
2364 le?stvx_u
$tmp,0,$out
2365 be?stvx_u
$output,0,$out
2370 lvx
$rndkey0,0,$key1
2371 lvx
$rndkey1,$idx,$key1
2373 vperm
$inout,$inout,$inptail,$inpperm
2374 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2376 lvsr
$inpperm,0,$len # $inpperm is no longer needed
2377 vxor
$inptail,$inptail,$inptail # $inptail is no longer needed
2379 vperm
$inptail,$inptail,$tmp,$inpperm
2380 vsel
$inout,$inout,$output,$inptail
2382 vxor
$rndkey0,$rndkey0,$tweak
2383 vxor
$inout,$inout,$rndkey0
2384 lvx
$rndkey0,$idx,$key1
2393 bdnz Loop_xts_dec_steal
2396 b Loop_xts_dec
# one more time...
2402 vsrab
$tmp,$tweak,$seven # next tweak value
2403 vaddubm
$tweak,$tweak,$tweak
2404 vsldoi
$tmp,$tmp,$tmp,15
2405 vand
$tmp,$tmp,$eighty7
2406 vxor
$tweak,$tweak,$tmp
2408 le?vperm
$tweak,$tweak,$tweak,$leperm
2409 stvx_u
$tweak,0,$ivp
2412 mtspr
256,r12
# restore vrsave
2416 .byte
0,12,0x04,0,0x80,6,6,0
2418 .size
.${prefix
}_xts_decrypt
,.-.${prefix
}_xts_decrypt
2420 #########################################################################
2421 {{ # Optimized XTS procedures #
2423 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2424 $x00=0 if ($flavour =~ /osx/);
2425 my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2426 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2427 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2428 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2429 # v26-v31 last 6 round keys
2430 my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2435 _aesp8_xts_encrypt6x
:
2436 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2438 li r7
,`$FRAME+8*16+15`
2439 li r3
,`$FRAME+8*16+31`
2440 $PUSH r11
,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2441 stvx v20
,r7
,$sp # ABI says so
2464 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2466 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
2468 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
2470 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
2472 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
2474 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
2476 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
2480 subi
$rounds,$rounds,3 # -4 in total
2482 lvx
$rndkey0,$x00,$key1 # load key schedule
2484 addi
$key1,$key1,0x20
2486 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
2487 addi
$key_,$sp,$FRAME+15
2491 ?vperm v24
,v30
,v31
,$keyperm
2493 addi
$key1,$key1,0x20
2494 stvx v24
,$x00,$key_ # off-load round[1]
2495 ?vperm v25
,v31
,v30
,$keyperm
2497 stvx v25
,$x10,$key_ # off-load round[2]
2498 addi
$key_,$key_,0x20
2499 bdnz Load_xts_enc_key
2502 ?vperm v24
,v30
,v31
,$keyperm
2504 stvx v24
,$x00,$key_ # off-load round[3]
2505 ?vperm v25
,v31
,v26
,$keyperm
2507 stvx v25
,$x10,$key_ # off-load round[4]
2508 addi
$key_,$sp,$FRAME+15 # rewind $key_
2509 ?vperm v26
,v26
,v27
,$keyperm
2511 ?vperm v27
,v27
,v28
,$keyperm
2513 ?vperm v28
,v28
,v29
,$keyperm
2515 ?vperm v29
,v29
,v30
,$keyperm
2516 lvx
$twk5,$x70,$key1 # borrow $twk5
2517 ?vperm v30
,v30
,v31
,$keyperm
2518 lvx v24
,$x00,$key_ # pre-load round[1]
2519 ?vperm v31
,v31
,$twk5,$keyperm
2520 lvx v25
,$x10,$key_ # pre-load round[2]
2522 vperm
$in0,$inout,$inptail,$inpperm
2523 subi
$inp,$inp,31 # undo "caller"
2524 vxor
$twk0,$tweak,$rndkey0
2525 vsrab
$tmp,$tweak,$seven # next tweak value
2526 vaddubm
$tweak,$tweak,$tweak
2527 vsldoi
$tmp,$tmp,$tmp,15
2528 vand
$tmp,$tmp,$eighty7
2529 vxor
$out0,$in0,$twk0
2530 vxor
$tweak,$tweak,$tmp
2532 lvx_u
$in1,$x10,$inp
2533 vxor
$twk1,$tweak,$rndkey0
2534 vsrab
$tmp,$tweak,$seven # next tweak value
2535 vaddubm
$tweak,$tweak,$tweak
2536 vsldoi
$tmp,$tmp,$tmp,15
2537 le?vperm
$in1,$in1,$in1,$leperm
2538 vand
$tmp,$tmp,$eighty7
2539 vxor
$out1,$in1,$twk1
2540 vxor
$tweak,$tweak,$tmp
2542 lvx_u
$in2,$x20,$inp
2543 andi
. $taillen,$len,15
2544 vxor
$twk2,$tweak,$rndkey0
2545 vsrab
$tmp,$tweak,$seven # next tweak value
2546 vaddubm
$tweak,$tweak,$tweak
2547 vsldoi
$tmp,$tmp,$tmp,15
2548 le?vperm
$in2,$in2,$in2,$leperm
2549 vand
$tmp,$tmp,$eighty7
2550 vxor
$out2,$in2,$twk2
2551 vxor
$tweak,$tweak,$tmp
2553 lvx_u
$in3,$x30,$inp
2554 sub $len,$len,$taillen
2555 vxor
$twk3,$tweak,$rndkey0
2556 vsrab
$tmp,$tweak,$seven # next tweak value
2557 vaddubm
$tweak,$tweak,$tweak
2558 vsldoi
$tmp,$tmp,$tmp,15
2559 le?vperm
$in3,$in3,$in3,$leperm
2560 vand
$tmp,$tmp,$eighty7
2561 vxor
$out3,$in3,$twk3
2562 vxor
$tweak,$tweak,$tmp
2564 lvx_u
$in4,$x40,$inp
2566 vxor
$twk4,$tweak,$rndkey0
2567 vsrab
$tmp,$tweak,$seven # next tweak value
2568 vaddubm
$tweak,$tweak,$tweak
2569 vsldoi
$tmp,$tmp,$tmp,15
2570 le?vperm
$in4,$in4,$in4,$leperm
2571 vand
$tmp,$tmp,$eighty7
2572 vxor
$out4,$in4,$twk4
2573 vxor
$tweak,$tweak,$tmp
2575 lvx_u
$in5,$x50,$inp
2577 vxor
$twk5,$tweak,$rndkey0
2578 vsrab
$tmp,$tweak,$seven # next tweak value
2579 vaddubm
$tweak,$tweak,$tweak
2580 vsldoi
$tmp,$tmp,$tmp,15
2581 le?vperm
$in5,$in5,$in5,$leperm
2582 vand
$tmp,$tmp,$eighty7
2583 vxor
$out5,$in5,$twk5
2584 vxor
$tweak,$tweak,$tmp
2586 vxor v31
,v31
,$rndkey0
2592 vcipher
$out0,$out0,v24
2593 vcipher
$out1,$out1,v24
2594 vcipher
$out2,$out2,v24
2595 vcipher
$out3,$out3,v24
2596 vcipher
$out4,$out4,v24
2597 vcipher
$out5,$out5,v24
2598 lvx v24
,$x20,$key_ # round[3]
2599 addi
$key_,$key_,0x20
2601 vcipher
$out0,$out0,v25
2602 vcipher
$out1,$out1,v25
2603 vcipher
$out2,$out2,v25
2604 vcipher
$out3,$out3,v25
2605 vcipher
$out4,$out4,v25
2606 vcipher
$out5,$out5,v25
2607 lvx v25
,$x10,$key_ # round[4]
2610 subic
$len,$len,96 # $len-=96
2611 vxor
$in0,$twk0,v31
# xor with last round key
2612 vcipher
$out0,$out0,v24
2613 vcipher
$out1,$out1,v24
2614 vsrab
$tmp,$tweak,$seven # next tweak value
2615 vxor
$twk0,$tweak,$rndkey0
2616 vaddubm
$tweak,$tweak,$tweak
2617 vcipher
$out2,$out2,v24
2618 vcipher
$out3,$out3,v24
2619 vsldoi
$tmp,$tmp,$tmp,15
2620 vcipher
$out4,$out4,v24
2621 vcipher
$out5,$out5,v24
2623 subfe
. r0
,r0
,r0
# borrow?-1:0
2624 vand
$tmp,$tmp,$eighty7
2625 vcipher
$out0,$out0,v25
2626 vcipher
$out1,$out1,v25
2627 vxor
$tweak,$tweak,$tmp
2628 vcipher
$out2,$out2,v25
2629 vcipher
$out3,$out3,v25
2631 vsrab
$tmp,$tweak,$seven # next tweak value
2632 vxor
$twk1,$tweak,$rndkey0
2633 vcipher
$out4,$out4,v25
2634 vcipher
$out5,$out5,v25
2637 vaddubm
$tweak,$tweak,$tweak
2638 vsldoi
$tmp,$tmp,$tmp,15
2639 vcipher
$out0,$out0,v26
2640 vcipher
$out1,$out1,v26
2641 vand
$tmp,$tmp,$eighty7
2642 vcipher
$out2,$out2,v26
2643 vcipher
$out3,$out3,v26
2644 vxor
$tweak,$tweak,$tmp
2645 vcipher
$out4,$out4,v26
2646 vcipher
$out5,$out5,v26
2648 add
$inp,$inp,r0
# $inp is adjusted in such
2649 # way that at exit from the
2650 # loop inX-in5 are loaded
2653 vsrab
$tmp,$tweak,$seven # next tweak value
2654 vxor
$twk2,$tweak,$rndkey0
2655 vaddubm
$tweak,$tweak,$tweak
2656 vcipher
$out0,$out0,v27
2657 vcipher
$out1,$out1,v27
2658 vsldoi
$tmp,$tmp,$tmp,15
2659 vcipher
$out2,$out2,v27
2660 vcipher
$out3,$out3,v27
2661 vand
$tmp,$tmp,$eighty7
2662 vcipher
$out4,$out4,v27
2663 vcipher
$out5,$out5,v27
2665 addi
$key_,$sp,$FRAME+15 # rewind $key_
2666 vxor
$tweak,$tweak,$tmp
2667 vcipher
$out0,$out0,v28
2668 vcipher
$out1,$out1,v28
2670 vsrab
$tmp,$tweak,$seven # next tweak value
2671 vxor
$twk3,$tweak,$rndkey0
2672 vcipher
$out2,$out2,v28
2673 vcipher
$out3,$out3,v28
2674 vaddubm
$tweak,$tweak,$tweak
2675 vsldoi
$tmp,$tmp,$tmp,15
2676 vcipher
$out4,$out4,v28
2677 vcipher
$out5,$out5,v28
2678 lvx v24
,$x00,$key_ # re-pre-load round[1]
2679 vand
$tmp,$tmp,$eighty7
2681 vcipher
$out0,$out0,v29
2682 vcipher
$out1,$out1,v29
2683 vxor
$tweak,$tweak,$tmp
2684 vcipher
$out2,$out2,v29
2685 vcipher
$out3,$out3,v29
2687 vsrab
$tmp,$tweak,$seven # next tweak value
2688 vxor
$twk4,$tweak,$rndkey0
2689 vcipher
$out4,$out4,v29
2690 vcipher
$out5,$out5,v29
2691 lvx v25
,$x10,$key_ # re-pre-load round[2]
2692 vaddubm
$tweak,$tweak,$tweak
2693 vsldoi
$tmp,$tmp,$tmp,15
2695 vcipher
$out0,$out0,v30
2696 vcipher
$out1,$out1,v30
2697 vand
$tmp,$tmp,$eighty7
2698 vcipher
$out2,$out2,v30
2699 vcipher
$out3,$out3,v30
2700 vxor
$tweak,$tweak,$tmp
2701 vcipher
$out4,$out4,v30
2702 vcipher
$out5,$out5,v30
2704 vsrab
$tmp,$tweak,$seven # next tweak value
2705 vxor
$twk5,$tweak,$rndkey0
2707 vcipherlast
$out0,$out0,$in0
2708 lvx_u
$in0,$x00,$inp # load next input block
2709 vaddubm
$tweak,$tweak,$tweak
2710 vsldoi
$tmp,$tmp,$tmp,15
2711 vcipherlast
$out1,$out1,$in1
2712 lvx_u
$in1,$x10,$inp
2713 vcipherlast
$out2,$out2,$in2
2714 le?vperm
$in0,$in0,$in0,$leperm
2715 lvx_u
$in2,$x20,$inp
2716 vand
$tmp,$tmp,$eighty7
2717 vcipherlast
$out3,$out3,$in3
2718 le?vperm
$in1,$in1,$in1,$leperm
2719 lvx_u
$in3,$x30,$inp
2720 vcipherlast
$out4,$out4,$in4
2721 le?vperm
$in2,$in2,$in2,$leperm
2722 lvx_u
$in4,$x40,$inp
2723 vxor
$tweak,$tweak,$tmp
2724 vcipherlast
$tmp,$out5,$in5 # last block might be needed
2726 le?vperm
$in3,$in3,$in3,$leperm
2727 lvx_u
$in5,$x50,$inp
2729 le?vperm
$in4,$in4,$in4,$leperm
2730 le?vperm
$in5,$in5,$in5,$leperm
2732 le?vperm
$out0,$out0,$out0,$leperm
2733 le?vperm
$out1,$out1,$out1,$leperm
2734 stvx_u
$out0,$x00,$out # store output
2735 vxor
$out0,$in0,$twk0
2736 le?vperm
$out2,$out2,$out2,$leperm
2737 stvx_u
$out1,$x10,$out
2738 vxor
$out1,$in1,$twk1
2739 le?vperm
$out3,$out3,$out3,$leperm
2740 stvx_u
$out2,$x20,$out
2741 vxor
$out2,$in2,$twk2
2742 le?vperm
$out4,$out4,$out4,$leperm
2743 stvx_u
$out3,$x30,$out
2744 vxor
$out3,$in3,$twk3
2745 le?vperm
$out5,$tmp,$tmp,$leperm
2746 stvx_u
$out4,$x40,$out
2747 vxor
$out4,$in4,$twk4
2748 le?stvx_u
$out5,$x50,$out
2749 be?stvx_u
$tmp, $x50,$out
2750 vxor
$out5,$in5,$twk5
2754 beq Loop_xts_enc6x
# did $len-=96 borrow?
2756 addic
. $len,$len,0x60
2763 blt Lxts_enc6x_three
2768 vxor
$out0,$in1,$twk0
2769 vxor
$out1,$in2,$twk1
2770 vxor
$out2,$in3,$twk2
2771 vxor
$out3,$in4,$twk3
2772 vxor
$out4,$in5,$twk4
2776 le?vperm
$out0,$out0,$out0,$leperm
2777 vmr
$twk0,$twk5 # unused tweak
2778 le?vperm
$out1,$out1,$out1,$leperm
2779 stvx_u
$out0,$x00,$out # store output
2780 le?vperm
$out2,$out2,$out2,$leperm
2781 stvx_u
$out1,$x10,$out
2782 le?vperm
$out3,$out3,$out3,$leperm
2783 stvx_u
$out2,$x20,$out
2784 vxor
$tmp,$out4,$twk5 # last block prep for stealing
2785 le?vperm
$out4,$out4,$out4,$leperm
2786 stvx_u
$out3,$x30,$out
2787 stvx_u
$out4,$x40,$out
2789 bne Lxts_enc6x_steal
2794 vxor
$out0,$in2,$twk0
2795 vxor
$out1,$in3,$twk1
2796 vxor
$out2,$in4,$twk2
2797 vxor
$out3,$in5,$twk3
2798 vxor
$out4,$out4,$out4
2802 le?vperm
$out0,$out0,$out0,$leperm
2803 vmr
$twk0,$twk4 # unused tweak
2804 le?vperm
$out1,$out1,$out1,$leperm
2805 stvx_u
$out0,$x00,$out # store output
2806 le?vperm
$out2,$out2,$out2,$leperm
2807 stvx_u
$out1,$x10,$out
2808 vxor
$tmp,$out3,$twk4 # last block prep for stealing
2809 le?vperm
$out3,$out3,$out3,$leperm
2810 stvx_u
$out2,$x20,$out
2811 stvx_u
$out3,$x30,$out
2813 bne Lxts_enc6x_steal
2818 vxor
$out0,$in3,$twk0
2819 vxor
$out1,$in4,$twk1
2820 vxor
$out2,$in5,$twk2
2821 vxor
$out3,$out3,$out3
2822 vxor
$out4,$out4,$out4
2826 le?vperm
$out0,$out0,$out0,$leperm
2827 vmr
$twk0,$twk3 # unused tweak
2828 le?vperm
$out1,$out1,$out1,$leperm
2829 stvx_u
$out0,$x00,$out # store output
2830 vxor
$tmp,$out2,$twk3 # last block prep for stealing
2831 le?vperm
$out2,$out2,$out2,$leperm
2832 stvx_u
$out1,$x10,$out
2833 stvx_u
$out2,$x20,$out
2835 bne Lxts_enc6x_steal
2840 vxor
$out0,$in4,$twk0
2841 vxor
$out1,$in5,$twk1
2842 vxor
$out2,$out2,$out2
2843 vxor
$out3,$out3,$out3
2844 vxor
$out4,$out4,$out4
2848 le?vperm
$out0,$out0,$out0,$leperm
2849 vmr
$twk0,$twk2 # unused tweak
2850 vxor
$tmp,$out1,$twk2 # last block prep for stealing
2851 le?vperm
$out1,$out1,$out1,$leperm
2852 stvx_u
$out0,$x00,$out # store output
2853 stvx_u
$out1,$x10,$out
2855 bne Lxts_enc6x_steal
2860 vxor
$out0,$in5,$twk0
2863 vcipher
$out0,$out0,v24
2864 lvx v24
,$x20,$key_ # round[3]
2865 addi
$key_,$key_,0x20
2867 vcipher
$out0,$out0,v25
2868 lvx v25
,$x10,$key_ # round[4]
2871 add
$inp,$inp,$taillen
2873 vcipher
$out0,$out0,v24
2876 vcipher
$out0,$out0,v25
2878 lvsr
$inpperm,0,$taillen
2879 vcipher
$out0,$out0,v26
2882 vcipher
$out0,$out0,v27
2884 addi
$key_,$sp,$FRAME+15 # rewind $key_
2885 vcipher
$out0,$out0,v28
2886 lvx v24
,$x00,$key_ # re-pre-load round[1]
2888 vcipher
$out0,$out0,v29
2889 lvx v25
,$x10,$key_ # re-pre-load round[2]
2890 vxor
$twk0,$twk0,v31
2892 le?vperm
$in0,$in0,$in0,$leperm
2893 vcipher
$out0,$out0,v30
2895 vperm
$in0,$in0,$in0,$inpperm
2896 vcipherlast
$out0,$out0,$twk0
2898 vmr
$twk0,$twk1 # unused tweak
2899 vxor
$tmp,$out0,$twk1 # last block prep for stealing
2900 le?vperm
$out0,$out0,$out0,$leperm
2901 stvx_u
$out0,$x00,$out # store output
2903 bne Lxts_enc6x_steal
2911 add
$inp,$inp,$taillen
2914 lvsr
$inpperm,0,$taillen # $in5 is no more
2915 le?vperm
$in0,$in0,$in0,$leperm
2916 vperm
$in0,$in0,$in0,$inpperm
2917 vxor
$tmp,$tmp,$twk0
2919 vxor
$in0,$in0,$twk0
2920 vxor
$out0,$out0,$out0
2922 vperm
$out0,$out0,$out1,$inpperm
2923 vsel
$out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2928 Loop_xts_enc6x_steal
:
2931 bdnz Loop_xts_enc6x_steal
2935 b Loop_xts_enc1x
# one more time...
2942 vxor
$tweak,$twk0,$rndkey0
2943 le?vperm
$tweak,$tweak,$tweak,$leperm
2944 stvx_u
$tweak,0,$ivp
2950 stvx
$seven,r10
,$sp # wipe copies of round keys
2968 lvx v20
,r10
,$sp # ABI says so
2990 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
2991 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
2992 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
2993 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
2994 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
2995 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
2996 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2999 .byte
0,12,0x04,1,0x80,6,6,0
3004 vcipher
$out0,$out0,v24
3005 vcipher
$out1,$out1,v24
3006 vcipher
$out2,$out2,v24
3007 vcipher
$out3,$out3,v24
3008 vcipher
$out4,$out4,v24
3009 lvx v24
,$x20,$key_ # round[3]
3010 addi
$key_,$key_,0x20
3012 vcipher
$out0,$out0,v25
3013 vcipher
$out1,$out1,v25
3014 vcipher
$out2,$out2,v25
3015 vcipher
$out3,$out3,v25
3016 vcipher
$out4,$out4,v25
3017 lvx v25
,$x10,$key_ # round[4]
3018 bdnz _aesp8_xts_enc5x
3020 add
$inp,$inp,$taillen
3022 vcipher
$out0,$out0,v24
3023 vcipher
$out1,$out1,v24
3024 vcipher
$out2,$out2,v24
3025 vcipher
$out3,$out3,v24
3026 vcipher
$out4,$out4,v24
3029 vcipher
$out0,$out0,v25
3030 vcipher
$out1,$out1,v25
3031 vcipher
$out2,$out2,v25
3032 vcipher
$out3,$out3,v25
3033 vcipher
$out4,$out4,v25
3034 vxor
$twk0,$twk0,v31
3036 vcipher
$out0,$out0,v26
3037 lvsr
$inpperm,r0
,$taillen # $in5 is no more
3038 vcipher
$out1,$out1,v26
3039 vcipher
$out2,$out2,v26
3040 vcipher
$out3,$out3,v26
3041 vcipher
$out4,$out4,v26
3044 vcipher
$out0,$out0,v27
3046 vcipher
$out1,$out1,v27
3047 vcipher
$out2,$out2,v27
3048 vcipher
$out3,$out3,v27
3049 vcipher
$out4,$out4,v27
3052 addi
$key_,$sp,$FRAME+15 # rewind $key_
3053 vcipher
$out0,$out0,v28
3054 vcipher
$out1,$out1,v28
3055 vcipher
$out2,$out2,v28
3056 vcipher
$out3,$out3,v28
3057 vcipher
$out4,$out4,v28
3058 lvx v24
,$x00,$key_ # re-pre-load round[1]
3061 vcipher
$out0,$out0,v29
3062 le?vperm
$in0,$in0,$in0,$leperm
3063 vcipher
$out1,$out1,v29
3064 vcipher
$out2,$out2,v29
3065 vcipher
$out3,$out3,v29
3066 vcipher
$out4,$out4,v29
3067 lvx v25
,$x10,$key_ # re-pre-load round[2]
3070 vcipher
$out0,$out0,v30
3071 vperm
$in0,$in0,$in0,$inpperm
3072 vcipher
$out1,$out1,v30
3073 vcipher
$out2,$out2,v30
3074 vcipher
$out3,$out3,v30
3075 vcipher
$out4,$out4,v30
3077 vcipherlast
$out0,$out0,$twk0
3078 vcipherlast
$out1,$out1,$in1
3079 vcipherlast
$out2,$out2,$in2
3080 vcipherlast
$out3,$out3,$in3
3081 vcipherlast
$out4,$out4,$in4
3084 .byte
0,12,0x14,0,0,0,0,0
3087 _aesp8_xts_decrypt6x
:
3088 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3090 li r7
,`$FRAME+8*16+15`
3091 li r3
,`$FRAME+8*16+31`
3092 $PUSH r11
,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3093 stvx v20
,r7
,$sp # ABI says so
3116 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3118 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
3120 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
3122 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
3124 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
3126 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
3128 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
3132 subi
$rounds,$rounds,3 # -4 in total
3134 lvx
$rndkey0,$x00,$key1 # load key schedule
3136 addi
$key1,$key1,0x20
3138 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
3139 addi
$key_,$sp,$FRAME+15
3143 ?vperm v24
,v30
,v31
,$keyperm
3145 addi
$key1,$key1,0x20
3146 stvx v24
,$x00,$key_ # off-load round[1]
3147 ?vperm v25
,v31
,v30
,$keyperm
3149 stvx v25
,$x10,$key_ # off-load round[2]
3150 addi
$key_,$key_,0x20
3151 bdnz Load_xts_dec_key
3154 ?vperm v24
,v30
,v31
,$keyperm
3156 stvx v24
,$x00,$key_ # off-load round[3]
3157 ?vperm v25
,v31
,v26
,$keyperm
3159 stvx v25
,$x10,$key_ # off-load round[4]
3160 addi
$key_,$sp,$FRAME+15 # rewind $key_
3161 ?vperm v26
,v26
,v27
,$keyperm
3163 ?vperm v27
,v27
,v28
,$keyperm
3165 ?vperm v28
,v28
,v29
,$keyperm
3167 ?vperm v29
,v29
,v30
,$keyperm
3168 lvx
$twk5,$x70,$key1 # borrow $twk5
3169 ?vperm v30
,v30
,v31
,$keyperm
3170 lvx v24
,$x00,$key_ # pre-load round[1]
3171 ?vperm v31
,v31
,$twk5,$keyperm
3172 lvx v25
,$x10,$key_ # pre-load round[2]
3174 vperm
$in0,$inout,$inptail,$inpperm
3175 subi
$inp,$inp,31 # undo "caller"
3176 vxor
$twk0,$tweak,$rndkey0
3177 vsrab
$tmp,$tweak,$seven # next tweak value
3178 vaddubm
$tweak,$tweak,$tweak
3179 vsldoi
$tmp,$tmp,$tmp,15
3180 vand
$tmp,$tmp,$eighty7
3181 vxor
$out0,$in0,$twk0
3182 vxor
$tweak,$tweak,$tmp
3184 lvx_u
$in1,$x10,$inp
3185 vxor
$twk1,$tweak,$rndkey0
3186 vsrab
$tmp,$tweak,$seven # next tweak value
3187 vaddubm
$tweak,$tweak,$tweak
3188 vsldoi
$tmp,$tmp,$tmp,15
3189 le?vperm
$in1,$in1,$in1,$leperm
3190 vand
$tmp,$tmp,$eighty7
3191 vxor
$out1,$in1,$twk1
3192 vxor
$tweak,$tweak,$tmp
3194 lvx_u
$in2,$x20,$inp
3195 andi
. $taillen,$len,15
3196 vxor
$twk2,$tweak,$rndkey0
3197 vsrab
$tmp,$tweak,$seven # next tweak value
3198 vaddubm
$tweak,$tweak,$tweak
3199 vsldoi
$tmp,$tmp,$tmp,15
3200 le?vperm
$in2,$in2,$in2,$leperm
3201 vand
$tmp,$tmp,$eighty7
3202 vxor
$out2,$in2,$twk2
3203 vxor
$tweak,$tweak,$tmp
3205 lvx_u
$in3,$x30,$inp
3206 sub $len,$len,$taillen
3207 vxor
$twk3,$tweak,$rndkey0
3208 vsrab
$tmp,$tweak,$seven # next tweak value
3209 vaddubm
$tweak,$tweak,$tweak
3210 vsldoi
$tmp,$tmp,$tmp,15
3211 le?vperm
$in3,$in3,$in3,$leperm
3212 vand
$tmp,$tmp,$eighty7
3213 vxor
$out3,$in3,$twk3
3214 vxor
$tweak,$tweak,$tmp
3216 lvx_u
$in4,$x40,$inp
3218 vxor
$twk4,$tweak,$rndkey0
3219 vsrab
$tmp,$tweak,$seven # next tweak value
3220 vaddubm
$tweak,$tweak,$tweak
3221 vsldoi
$tmp,$tmp,$tmp,15
3222 le?vperm
$in4,$in4,$in4,$leperm
3223 vand
$tmp,$tmp,$eighty7
3224 vxor
$out4,$in4,$twk4
3225 vxor
$tweak,$tweak,$tmp
3227 lvx_u
$in5,$x50,$inp
3229 vxor
$twk5,$tweak,$rndkey0
3230 vsrab
$tmp,$tweak,$seven # next tweak value
3231 vaddubm
$tweak,$tweak,$tweak
3232 vsldoi
$tmp,$tmp,$tmp,15
3233 le?vperm
$in5,$in5,$in5,$leperm
3234 vand
$tmp,$tmp,$eighty7
3235 vxor
$out5,$in5,$twk5
3236 vxor
$tweak,$tweak,$tmp
3238 vxor v31
,v31
,$rndkey0
3244 vncipher
$out0,$out0,v24
3245 vncipher
$out1,$out1,v24
3246 vncipher
$out2,$out2,v24
3247 vncipher
$out3,$out3,v24
3248 vncipher
$out4,$out4,v24
3249 vncipher
$out5,$out5,v24
3250 lvx v24
,$x20,$key_ # round[3]
3251 addi
$key_,$key_,0x20
3253 vncipher
$out0,$out0,v25
3254 vncipher
$out1,$out1,v25
3255 vncipher
$out2,$out2,v25
3256 vncipher
$out3,$out3,v25
3257 vncipher
$out4,$out4,v25
3258 vncipher
$out5,$out5,v25
3259 lvx v25
,$x10,$key_ # round[4]
3262 subic
$len,$len,96 # $len-=96
3263 vxor
$in0,$twk0,v31
# xor with last round key
3264 vncipher
$out0,$out0,v24
3265 vncipher
$out1,$out1,v24
3266 vsrab
$tmp,$tweak,$seven # next tweak value
3267 vxor
$twk0,$tweak,$rndkey0
3268 vaddubm
$tweak,$tweak,$tweak
3269 vncipher
$out2,$out2,v24
3270 vncipher
$out3,$out3,v24
3271 vsldoi
$tmp,$tmp,$tmp,15
3272 vncipher
$out4,$out4,v24
3273 vncipher
$out5,$out5,v24
3275 subfe
. r0
,r0
,r0
# borrow?-1:0
3276 vand
$tmp,$tmp,$eighty7
3277 vncipher
$out0,$out0,v25
3278 vncipher
$out1,$out1,v25
3279 vxor
$tweak,$tweak,$tmp
3280 vncipher
$out2,$out2,v25
3281 vncipher
$out3,$out3,v25
3283 vsrab
$tmp,$tweak,$seven # next tweak value
3284 vxor
$twk1,$tweak,$rndkey0
3285 vncipher
$out4,$out4,v25
3286 vncipher
$out5,$out5,v25
3289 vaddubm
$tweak,$tweak,$tweak
3290 vsldoi
$tmp,$tmp,$tmp,15
3291 vncipher
$out0,$out0,v26
3292 vncipher
$out1,$out1,v26
3293 vand
$tmp,$tmp,$eighty7
3294 vncipher
$out2,$out2,v26
3295 vncipher
$out3,$out3,v26
3296 vxor
$tweak,$tweak,$tmp
3297 vncipher
$out4,$out4,v26
3298 vncipher
$out5,$out5,v26
3300 add
$inp,$inp,r0
# $inp is adjusted in such
3301 # way that at exit from the
3302 # loop inX-in5 are loaded
3305 vsrab
$tmp,$tweak,$seven # next tweak value
3306 vxor
$twk2,$tweak,$rndkey0
3307 vaddubm
$tweak,$tweak,$tweak
3308 vncipher
$out0,$out0,v27
3309 vncipher
$out1,$out1,v27
3310 vsldoi
$tmp,$tmp,$tmp,15
3311 vncipher
$out2,$out2,v27
3312 vncipher
$out3,$out3,v27
3313 vand
$tmp,$tmp,$eighty7
3314 vncipher
$out4,$out4,v27
3315 vncipher
$out5,$out5,v27
3317 addi
$key_,$sp,$FRAME+15 # rewind $key_
3318 vxor
$tweak,$tweak,$tmp
3319 vncipher
$out0,$out0,v28
3320 vncipher
$out1,$out1,v28
3322 vsrab
$tmp,$tweak,$seven # next tweak value
3323 vxor
$twk3,$tweak,$rndkey0
3324 vncipher
$out2,$out2,v28
3325 vncipher
$out3,$out3,v28
3326 vaddubm
$tweak,$tweak,$tweak
3327 vsldoi
$tmp,$tmp,$tmp,15
3328 vncipher
$out4,$out4,v28
3329 vncipher
$out5,$out5,v28
3330 lvx v24
,$x00,$key_ # re-pre-load round[1]
3331 vand
$tmp,$tmp,$eighty7
3333 vncipher
$out0,$out0,v29
3334 vncipher
$out1,$out1,v29
3335 vxor
$tweak,$tweak,$tmp
3336 vncipher
$out2,$out2,v29
3337 vncipher
$out3,$out3,v29
3339 vsrab
$tmp,$tweak,$seven # next tweak value
3340 vxor
$twk4,$tweak,$rndkey0
3341 vncipher
$out4,$out4,v29
3342 vncipher
$out5,$out5,v29
3343 lvx v25
,$x10,$key_ # re-pre-load round[2]
3344 vaddubm
$tweak,$tweak,$tweak
3345 vsldoi
$tmp,$tmp,$tmp,15
3347 vncipher
$out0,$out0,v30
3348 vncipher
$out1,$out1,v30
3349 vand
$tmp,$tmp,$eighty7
3350 vncipher
$out2,$out2,v30
3351 vncipher
$out3,$out3,v30
3352 vxor
$tweak,$tweak,$tmp
3353 vncipher
$out4,$out4,v30
3354 vncipher
$out5,$out5,v30
3356 vsrab
$tmp,$tweak,$seven # next tweak value
3357 vxor
$twk5,$tweak,$rndkey0
3359 vncipherlast
$out0,$out0,$in0
3360 lvx_u
$in0,$x00,$inp # load next input block
3361 vaddubm
$tweak,$tweak,$tweak
3362 vsldoi
$tmp,$tmp,$tmp,15
3363 vncipherlast
$out1,$out1,$in1
3364 lvx_u
$in1,$x10,$inp
3365 vncipherlast
$out2,$out2,$in2
3366 le?vperm
$in0,$in0,$in0,$leperm
3367 lvx_u
$in2,$x20,$inp
3368 vand
$tmp,$tmp,$eighty7
3369 vncipherlast
$out3,$out3,$in3
3370 le?vperm
$in1,$in1,$in1,$leperm
3371 lvx_u
$in3,$x30,$inp
3372 vncipherlast
$out4,$out4,$in4
3373 le?vperm
$in2,$in2,$in2,$leperm
3374 lvx_u
$in4,$x40,$inp
3375 vxor
$tweak,$tweak,$tmp
3376 vncipherlast
$out5,$out5,$in5
3377 le?vperm
$in3,$in3,$in3,$leperm
3378 lvx_u
$in5,$x50,$inp
3380 le?vperm
$in4,$in4,$in4,$leperm
3381 le?vperm
$in5,$in5,$in5,$leperm
3383 le?vperm
$out0,$out0,$out0,$leperm
3384 le?vperm
$out1,$out1,$out1,$leperm
3385 stvx_u
$out0,$x00,$out # store output
3386 vxor
$out0,$in0,$twk0
3387 le?vperm
$out2,$out2,$out2,$leperm
3388 stvx_u
$out1,$x10,$out
3389 vxor
$out1,$in1,$twk1
3390 le?vperm
$out3,$out3,$out3,$leperm
3391 stvx_u
$out2,$x20,$out
3392 vxor
$out2,$in2,$twk2
3393 le?vperm
$out4,$out4,$out4,$leperm
3394 stvx_u
$out3,$x30,$out
3395 vxor
$out3,$in3,$twk3
3396 le?vperm
$out5,$out5,$out5,$leperm
3397 stvx_u
$out4,$x40,$out
3398 vxor
$out4,$in4,$twk4
3399 stvx_u
$out5,$x50,$out
3400 vxor
$out5,$in5,$twk5
3404 beq Loop_xts_dec6x
# did $len-=96 borrow?
3406 addic
. $len,$len,0x60
3413 blt Lxts_dec6x_three
3418 vxor
$out0,$in1,$twk0
3419 vxor
$out1,$in2,$twk1
3420 vxor
$out2,$in3,$twk2
3421 vxor
$out3,$in4,$twk3
3422 vxor
$out4,$in5,$twk4
3426 le?vperm
$out0,$out0,$out0,$leperm
3427 vmr
$twk0,$twk5 # unused tweak
3428 vxor
$twk1,$tweak,$rndkey0
3429 le?vperm
$out1,$out1,$out1,$leperm
3430 stvx_u
$out0,$x00,$out # store output
3431 vxor
$out0,$in0,$twk1
3432 le?vperm
$out2,$out2,$out2,$leperm
3433 stvx_u
$out1,$x10,$out
3434 le?vperm
$out3,$out3,$out3,$leperm
3435 stvx_u
$out2,$x20,$out
3436 le?vperm
$out4,$out4,$out4,$leperm
3437 stvx_u
$out3,$x30,$out
3438 stvx_u
$out4,$x40,$out
3440 bne Lxts_dec6x_steal
3445 vxor
$out0,$in2,$twk0
3446 vxor
$out1,$in3,$twk1
3447 vxor
$out2,$in4,$twk2
3448 vxor
$out3,$in5,$twk3
3449 vxor
$out4,$out4,$out4
3453 le?vperm
$out0,$out0,$out0,$leperm
3454 vmr
$twk0,$twk4 # unused tweak
3456 le?vperm
$out1,$out1,$out1,$leperm
3457 stvx_u
$out0,$x00,$out # store output
3458 vxor
$out0,$in0,$twk5
3459 le?vperm
$out2,$out2,$out2,$leperm
3460 stvx_u
$out1,$x10,$out
3461 le?vperm
$out3,$out3,$out3,$leperm
3462 stvx_u
$out2,$x20,$out
3463 stvx_u
$out3,$x30,$out
3465 bne Lxts_dec6x_steal
3470 vxor
$out0,$in3,$twk0
3471 vxor
$out1,$in4,$twk1
3472 vxor
$out2,$in5,$twk2
3473 vxor
$out3,$out3,$out3
3474 vxor
$out4,$out4,$out4
3478 le?vperm
$out0,$out0,$out0,$leperm
3479 vmr
$twk0,$twk3 # unused tweak
3481 le?vperm
$out1,$out1,$out1,$leperm
3482 stvx_u
$out0,$x00,$out # store output
3483 vxor
$out0,$in0,$twk4
3484 le?vperm
$out2,$out2,$out2,$leperm
3485 stvx_u
$out1,$x10,$out
3486 stvx_u
$out2,$x20,$out
3488 bne Lxts_dec6x_steal
3493 vxor
$out0,$in4,$twk0
3494 vxor
$out1,$in5,$twk1
3495 vxor
$out2,$out2,$out2
3496 vxor
$out3,$out3,$out3
3497 vxor
$out4,$out4,$out4
3501 le?vperm
$out0,$out0,$out0,$leperm
3502 vmr
$twk0,$twk2 # unused tweak
3504 le?vperm
$out1,$out1,$out1,$leperm
3505 stvx_u
$out0,$x00,$out # store output
3506 vxor
$out0,$in0,$twk3
3507 stvx_u
$out1,$x10,$out
3509 bne Lxts_dec6x_steal
3514 vxor
$out0,$in5,$twk0
3517 vncipher
$out0,$out0,v24
3518 lvx v24
,$x20,$key_ # round[3]
3519 addi
$key_,$key_,0x20
3521 vncipher
$out0,$out0,v25
3522 lvx v25
,$x10,$key_ # round[4]
3526 vncipher
$out0,$out0,v24
3530 vncipher
$out0,$out0,v25
3533 vncipher
$out0,$out0,v26
3536 vncipher
$out0,$out0,v27
3538 addi
$key_,$sp,$FRAME+15 # rewind $key_
3539 vncipher
$out0,$out0,v28
3540 lvx v24
,$x00,$key_ # re-pre-load round[1]
3542 vncipher
$out0,$out0,v29
3543 lvx v25
,$x10,$key_ # re-pre-load round[2]
3544 vxor
$twk0,$twk0,v31
3546 le?vperm
$in0,$in0,$in0,$leperm
3547 vncipher
$out0,$out0,v30
3550 vncipherlast
$out0,$out0,$twk0
3552 vmr
$twk0,$twk1 # unused tweak
3554 le?vperm
$out0,$out0,$out0,$leperm
3555 stvx_u
$out0,$x00,$out # store output
3557 vxor
$out0,$in0,$twk2
3558 bne Lxts_dec6x_steal
3567 le?vperm
$in0,$in0,$in0,$leperm
3568 vxor
$out0,$in0,$twk1
3570 vncipher
$out0,$out0,v24
3571 lvx v24
,$x20,$key_ # round[3]
3572 addi
$key_,$key_,0x20
3574 vncipher
$out0,$out0,v25
3575 lvx v25
,$x10,$key_ # round[4]
3576 bdnz Lxts_dec6x_steal
3578 add
$inp,$inp,$taillen
3579 vncipher
$out0,$out0,v24
3582 vncipher
$out0,$out0,v25
3585 vncipher
$out0,$out0,v26
3587 lvsr
$inpperm,0,$taillen # $in5 is no more
3588 vncipher
$out0,$out0,v27
3590 addi
$key_,$sp,$FRAME+15 # rewind $key_
3591 vncipher
$out0,$out0,v28
3592 lvx v24
,$x00,$key_ # re-pre-load round[1]
3594 vncipher
$out0,$out0,v29
3595 lvx v25
,$x10,$key_ # re-pre-load round[2]
3596 vxor
$twk1,$twk1,v31
3598 le?vperm
$in0,$in0,$in0,$leperm
3599 vncipher
$out0,$out0,v30
3601 vperm
$in0,$in0,$in0,$inpperm
3602 vncipherlast
$tmp,$out0,$twk1
3604 le?vperm
$out0,$tmp,$tmp,$leperm
3605 le?stvx_u
$out0,0,$out
3606 be?stvx_u
$tmp,0,$out
3608 vxor
$out0,$out0,$out0
3610 vperm
$out0,$out0,$out1,$inpperm
3611 vsel
$out0,$in0,$tmp,$out0
3612 vxor
$out0,$out0,$twk0
3616 Loop_xts_dec6x_steal
:
3619 bdnz Loop_xts_dec6x_steal
3623 b Loop_xts_dec1x
# one more time...
3630 vxor
$tweak,$twk0,$rndkey0
3631 le?vperm
$tweak,$tweak,$tweak,$leperm
3632 stvx_u
$tweak,0,$ivp
3638 stvx
$seven,r10
,$sp # wipe copies of round keys
3656 lvx v20
,r10
,$sp # ABI says so
3678 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
3679 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
3680 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
3681 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
3682 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
3683 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
3684 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3687 .byte
0,12,0x04,1,0x80,6,6,0
3692 vncipher
$out0,$out0,v24
3693 vncipher
$out1,$out1,v24
3694 vncipher
$out2,$out2,v24
3695 vncipher
$out3,$out3,v24
3696 vncipher
$out4,$out4,v24
3697 lvx v24
,$x20,$key_ # round[3]
3698 addi
$key_,$key_,0x20
3700 vncipher
$out0,$out0,v25
3701 vncipher
$out1,$out1,v25
3702 vncipher
$out2,$out2,v25
3703 vncipher
$out3,$out3,v25
3704 vncipher
$out4,$out4,v25
3705 lvx v25
,$x10,$key_ # round[4]
3706 bdnz _aesp8_xts_dec5x
3709 vncipher
$out0,$out0,v24
3710 vncipher
$out1,$out1,v24
3711 vncipher
$out2,$out2,v24
3712 vncipher
$out3,$out3,v24
3713 vncipher
$out4,$out4,v24
3717 vncipher
$out0,$out0,v25
3718 vncipher
$out1,$out1,v25
3719 vncipher
$out2,$out2,v25
3720 vncipher
$out3,$out3,v25
3721 vncipher
$out4,$out4,v25
3722 vxor
$twk0,$twk0,v31
3725 vncipher
$out0,$out0,v26
3726 vncipher
$out1,$out1,v26
3727 vncipher
$out2,$out2,v26
3728 vncipher
$out3,$out3,v26
3729 vncipher
$out4,$out4,v26
3732 vncipher
$out0,$out0,v27
3734 vncipher
$out1,$out1,v27
3735 vncipher
$out2,$out2,v27
3736 vncipher
$out3,$out3,v27
3737 vncipher
$out4,$out4,v27
3740 addi
$key_,$sp,$FRAME+15 # rewind $key_
3741 vncipher
$out0,$out0,v28
3742 vncipher
$out1,$out1,v28
3743 vncipher
$out2,$out2,v28
3744 vncipher
$out3,$out3,v28
3745 vncipher
$out4,$out4,v28
3746 lvx v24
,$x00,$key_ # re-pre-load round[1]
3749 vncipher
$out0,$out0,v29
3750 le?vperm
$in0,$in0,$in0,$leperm
3751 vncipher
$out1,$out1,v29
3752 vncipher
$out2,$out2,v29
3753 vncipher
$out3,$out3,v29
3754 vncipher
$out4,$out4,v29
3755 lvx v25
,$x10,$key_ # re-pre-load round[2]
3758 vncipher
$out0,$out0,v30
3759 vncipher
$out1,$out1,v30
3760 vncipher
$out2,$out2,v30
3761 vncipher
$out3,$out3,v30
3762 vncipher
$out4,$out4,v30
3764 vncipherlast
$out0,$out0,$twk0
3765 vncipherlast
$out1,$out1,$in1
3766 vncipherlast
$out2,$out2,$in2
3767 vncipherlast
$out3,$out3,$in3
3768 vncipherlast
$out4,$out4,$in4
3772 .byte
0,12,0x14,0,0,0,0,0
3777 foreach(split("\n",$code)) {
3778 s/\`([^\`]*)\`/eval($1)/geo;
3780 # constants table endian-specific conversion
3781 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3785 # convert to endian-agnostic format
3787 foreach (split(/,\s*/,$2)) {
3788 my $l = /^0/?
oct:int;
3789 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3792 @bytes = map(/^0/?
oct:int,split(/,\s*/,$2));
3795 # little-endian conversion
3796 if ($flavour =~ /le$/o) {
3797 SWITCH
: for($conv) {
3798 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3799 /\?rev/ && do { @bytes=reverse(@bytes); last; };
3804 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3807 $consts=0 if (m/Lconsts:/o); # end of table
3809 # instructions prefixed with '?' are endian-specific and need
3810 # to be adjusted accordingly...
3811 if ($flavour =~ /le$/o) { # little-endian
3816 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3817 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3818 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3819 } else { # big-endian