2 # SPDX-License-Identifier: GPL-2.0
4 # This code is taken from CRYPTOGAMs[1] and is included here using the option
5 # in the license to distribute the code under the GPL. Therefore this program
6 # is free software; you can redistribute it and/or modify it under the terms of
7 # the GNU General Public License version 2 as published by the Free Software
10 # [1] https://www.openssl.org/~appro/cryptogams/
12 # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
13 # All rights reserved.
15 # Redistribution and use in source and binary forms, with or without
16 # modification, are permitted provided that the following conditions
19 # * Redistributions of source code must retain copyright notices,
20 # this list of conditions and the following disclaimer.
22 # * Redistributions in binary form must reproduce the above
23 # copyright notice, this list of conditions and the following
24 # disclaimer in the documentation and/or other materials
25 # provided with the distribution.
27 # * Neither the name of the CRYPTOGAMS nor the names of its
28 # copyright holder and contributors may be used to endorse or
29 # promote products derived from this software without specific
30 # prior written permission.
32 # ALTERNATIVELY, provided that this notice is retained in full, this
33 # product may be distributed under the terms of the GNU General Public
34 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
37 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
49 # ====================================================================
50 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
51 # project. The module is, however, dual licensed under OpenSSL and
52 # CRYPTOGAMS licenses depending on where you obtain it. For further
53 # details see http://www.openssl.org/~appro/cryptogams/.
54 # ====================================================================
56 # This module implements support for AES instructions as per PowerISA
57 # specification version 2.07, first implemented by POWER8 processor.
58 # The module is endian-agnostic in sense that it supports both big-
59 # and little-endian cases. Data alignment in parallelizable modes is
60 # handled with VSX loads and stores, which implies MSR.VSX flag being
61 # set. It should also be noted that ISA specification doesn't prohibit
62 # alignment exceptions for these instructions on page boundaries.
63 # Initially alignment was handled in pure AltiVec/VMX way [when data
64 # is aligned programmatically, which in turn guarantees exception-
65 # free execution], but it turned to hamper performance when vcipher
66 # instructions are interleaved. It's reckoned that eventual
67 # misalignment penalties at page boundaries are in average lower
68 # than additional overhead in pure AltiVec approach.
72 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73 # systems were measured.
75 ######################################################################
76 # Current large-block performance in cycles per byte processed with
77 # 128-bit key (less is better).
79 # CBC en-/decrypt CTR XTS
80 # POWER8[le] 3.96/0.72 0.74 1.1
81 # POWER8[be] 3.75/0.65 0.66 1.0
85 if ($flavour =~ /64/) {
93 } elsif ($flavour =~ /32/) {
101 } else { die "nonsense $flavour"; }
103 $LITTLE_ENDIAN = ($flavour=~/le$/) ?
$SIZE_T : 0;
105 $0 =~ m/(.*[\/\\])[^\
/\\]+$/; $dir=$1;
106 ( $xlate="${dir}ppc-xlate.pl" and -f
$xlate ) or
107 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f
$xlate) or
108 die "can't locate ppc-xlate.pl";
110 open STDOUT
,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
118 #########################################################################
119 {{{ # Key setup procedures #
120 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
121 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
122 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
131 .long
0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
132 .long
0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
133 .long
0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
138 mflr
$ptr #vvvvv "distance between . and rcon
143 .byte
0,12,0x14,0,0,0,0,0
144 .asciz
"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
146 .globl
.${prefix
}_set_encrypt_key
149 $PUSH r11
,$LRSAVE($sp)
153 beq
- Lenc_key_abort
# if ($inp==0) return -1;
155 beq
- Lenc_key_abort
# if ($out==0) return -1;
173 addi
$inp,$inp,15 # 15 is not typo
174 lvsr
$key,0,r9
# borrow $key
178 le?vspltisb
$mask,0x0f # borrow $mask
180 le?vxor
$key,$key,$mask # adjust for byte swap
183 vperm
$in0,$in0,$in1,$key # align [and byte swap in LE]
185 vxor
$zero,$zero,$zero
188 ?lvsr
$outperm,0,$out
191 ?vperm
$outmask,$zero,$outmask,$outperm
201 vperm
$key,$in0,$in0,$mask # rotate-n-splat
202 vsldoi
$tmp,$zero,$in0,12 # >>32
203 vperm
$outtail,$in0,$in0,$outperm # rotate
204 vsel
$stage,$outhead,$outtail,$outmask
205 vmr
$outhead,$outtail
206 vcipherlast
$key,$key,$rcon
211 vsldoi
$tmp,$zero,$tmp,12 # >>32
213 vsldoi
$tmp,$zero,$tmp,12 # >>32
215 vadduwm
$rcon,$rcon,$rcon
219 lvx
$rcon,0,$ptr # last two round keys
221 vperm
$key,$in0,$in0,$mask # rotate-n-splat
222 vsldoi
$tmp,$zero,$in0,12 # >>32
223 vperm
$outtail,$in0,$in0,$outperm # rotate
224 vsel
$stage,$outhead,$outtail,$outmask
225 vmr
$outhead,$outtail
226 vcipherlast
$key,$key,$rcon
231 vsldoi
$tmp,$zero,$tmp,12 # >>32
233 vsldoi
$tmp,$zero,$tmp,12 # >>32
235 vadduwm
$rcon,$rcon,$rcon
238 vperm
$key,$in0,$in0,$mask # rotate-n-splat
239 vsldoi
$tmp,$zero,$in0,12 # >>32
240 vperm
$outtail,$in0,$in0,$outperm # rotate
241 vsel
$stage,$outhead,$outtail,$outmask
242 vmr
$outhead,$outtail
243 vcipherlast
$key,$key,$rcon
248 vsldoi
$tmp,$zero,$tmp,12 # >>32
250 vsldoi
$tmp,$zero,$tmp,12 # >>32
253 vperm
$outtail,$in0,$in0,$outperm # rotate
254 vsel
$stage,$outhead,$outtail,$outmask
255 vmr
$outhead,$outtail
258 addi
$inp,$out,15 # 15 is not typo
268 vperm
$outtail,$in0,$in0,$outperm # rotate
269 vsel
$stage,$outhead,$outtail,$outmask
270 vmr
$outhead,$outtail
273 vperm
$in1,$in1,$tmp,$key # align [and byte swap in LE]
274 vspltisb
$key,8 # borrow $key
276 vsububm
$mask,$mask,$key # adjust the mask
279 vperm
$key,$in1,$in1,$mask # roate-n-splat
280 vsldoi
$tmp,$zero,$in0,12 # >>32
281 vcipherlast
$key,$key,$rcon
284 vsldoi
$tmp,$zero,$tmp,12 # >>32
286 vsldoi
$tmp,$zero,$tmp,12 # >>32
289 vsldoi
$stage,$zero,$in1,8
292 vsldoi
$in1,$zero,$in1,12 # >>32
293 vadduwm
$rcon,$rcon,$rcon
297 vsldoi
$stage,$stage,$in0,8
299 vperm
$key,$in1,$in1,$mask # rotate-n-splat
300 vsldoi
$tmp,$zero,$in0,12 # >>32
301 vperm
$outtail,$stage,$stage,$outperm # rotate
302 vsel
$stage,$outhead,$outtail,$outmask
303 vmr
$outhead,$outtail
304 vcipherlast
$key,$key,$rcon
308 vsldoi
$stage,$in0,$in1,8
310 vsldoi
$tmp,$zero,$tmp,12 # >>32
311 vperm
$outtail,$stage,$stage,$outperm # rotate
312 vsel
$stage,$outhead,$outtail,$outmask
313 vmr
$outhead,$outtail
315 vsldoi
$tmp,$zero,$tmp,12 # >>32
322 vsldoi
$in1,$zero,$in1,12 # >>32
323 vadduwm
$rcon,$rcon,$rcon
327 vperm
$outtail,$in0,$in0,$outperm # rotate
328 vsel
$stage,$outhead,$outtail,$outmask
329 vmr
$outhead,$outtail
331 addi
$inp,$out,15 # 15 is not typo
344 vperm
$outtail,$in0,$in0,$outperm # rotate
345 vsel
$stage,$outhead,$outtail,$outmask
346 vmr
$outhead,$outtail
349 vperm
$in1,$in1,$tmp,$key # align [and byte swap in LE]
353 vperm
$key,$in1,$in1,$mask # rotate-n-splat
354 vsldoi
$tmp,$zero,$in0,12 # >>32
355 vperm
$outtail,$in1,$in1,$outperm # rotate
356 vsel
$stage,$outhead,$outtail,$outmask
357 vmr
$outhead,$outtail
358 vcipherlast
$key,$key,$rcon
363 vsldoi
$tmp,$zero,$tmp,12 # >>32
365 vsldoi
$tmp,$zero,$tmp,12 # >>32
367 vadduwm
$rcon,$rcon,$rcon
369 vperm
$outtail,$in0,$in0,$outperm # rotate
370 vsel
$stage,$outhead,$outtail,$outmask
371 vmr
$outhead,$outtail
373 addi
$inp,$out,15 # 15 is not typo
377 vspltw
$key,$in0,3 # just splat
378 vsldoi
$tmp,$zero,$in1,12 # >>32
382 vsldoi
$tmp,$zero,$tmp,12 # >>32
384 vsldoi
$tmp,$zero,$tmp,12 # >>32
392 lvx
$in1,0,$inp # redundant in aligned case
393 vsel
$in1,$outhead,$in1,$outmask
403 .byte
0,12,0x14,1,0,0,3,0
405 .size
.${prefix
}_set_encrypt_key
,.-.${prefix
}_set_encrypt_key
407 .globl
.${prefix
}_set_decrypt_key
408 $STU $sp,-$FRAME($sp)
410 $PUSH r10
,$FRAME+$LRSAVE($sp)
418 subi
$inp,$out,240 # first round key
419 srwi
$rounds,$rounds,1
420 add
$out,$inp,$cnt # last round key
444 xor r3
,r3
,r3
# return value
449 .byte
0,12,4,1,0x80,0,3,0
451 .size
.${prefix
}_set_decrypt_key
,.-.${prefix
}_set_decrypt_key
454 #########################################################################
455 {{{ # Single block en- and decrypt procedures #
458 my $n = $dir eq "de" ?
"n" : "";
459 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
462 .globl
.${prefix
}_
${dir
}crypt
463 lwz
$rounds,240($key)
466 li
$idx,15 # 15 is not typo
472 lvsl v2
,0,$inp # inpperm
474 ?lvsl v3
,0,r11
# outperm
477 vperm v0
,v0
,v1
,v2
# align [and byte swap in LE]
479 ?lvsl v5
,0,$key # keyperm
480 srwi
$rounds,$rounds,1
483 subi
$rounds,$rounds,1
484 ?vperm v1
,v1
,v2
,v5
# align round key
506 v
${n
}cipherlast v0
,v0
,v1
510 li
$idx,15 # 15 is not typo
511 ?vperm v2
,v1
,v2
,v3
# outmask
513 lvx v1
,0,$out # outhead
514 vperm v0
,v0
,v0
,v3
# rotate [and byte swap in LE]
524 .byte
0,12,0x14,0,0,0,3,0
526 .size
.${prefix
}_
${dir
}crypt,.-.${prefix
}_
${dir
}crypt
532 #########################################################################
533 {{{ # CBC en- and decrypt procedures #
534 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
535 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
536 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
539 .globl
.${prefix
}_cbc_encrypt
543 cmpwi
$enc,0 # test direction
549 vxor
$rndkey0,$rndkey0,$rndkey0
550 le?vspltisb
$tmp,0x0f
552 lvx
$ivec,0,$ivp # load [unaligned] iv
554 lvx
$inptail,$idx,$ivp
555 le?vxor
$inpperm,$inpperm,$tmp
556 vperm
$ivec,$ivec,$inptail,$inpperm
559 ?lvsl
$keyperm,0,$key # prepare for unaligned key
560 lwz
$rounds,240($key)
562 lvsr
$inpperm,0,r11
# prepare for unaligned load
564 addi
$inp,$inp,15 # 15 is not typo
565 le?vxor
$inpperm,$inpperm,$tmp
567 ?lvsr
$outperm,0,$out # prepare for unaligned store
570 ?vperm
$outmask,$rndkey0,$outmask,$outperm
571 le?vxor
$outperm,$outperm,$tmp
573 srwi
$rounds,$rounds,1
575 subi
$rounds,$rounds,1
583 subi
$len,$len,16 # len-=16
586 vperm
$inout,$inout,$inptail,$inpperm
587 lvx
$rndkey1,$idx,$key
589 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
590 vxor
$inout,$inout,$rndkey0
591 lvx
$rndkey0,$idx,$key
593 vxor
$inout,$inout,$ivec
596 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
597 vcipher
$inout,$inout,$rndkey1
598 lvx
$rndkey1,$idx,$key
600 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
601 vcipher
$inout,$inout,$rndkey0
602 lvx
$rndkey0,$idx,$key
606 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
607 vcipher
$inout,$inout,$rndkey1
608 lvx
$rndkey1,$idx,$key
610 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
611 vcipherlast
$ivec,$inout,$rndkey0
614 vperm
$tmp,$ivec,$ivec,$outperm
615 vsel
$inout,$outhead,$tmp,$outmask
626 bge _aesp8_cbc_decrypt8x
631 subi
$len,$len,16 # len-=16
634 vperm
$tmp,$tmp,$inptail,$inpperm
635 lvx
$rndkey1,$idx,$key
637 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
638 vxor
$inout,$tmp,$rndkey0
639 lvx
$rndkey0,$idx,$key
643 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
644 vncipher
$inout,$inout,$rndkey1
645 lvx
$rndkey1,$idx,$key
647 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
648 vncipher
$inout,$inout,$rndkey0
649 lvx
$rndkey0,$idx,$key
653 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
654 vncipher
$inout,$inout,$rndkey1
655 lvx
$rndkey1,$idx,$key
657 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
658 vncipherlast
$inout,$inout,$rndkey0
661 vxor
$inout,$inout,$ivec
663 vperm
$tmp,$inout,$inout,$outperm
664 vsel
$inout,$outhead,$tmp,$outmask
672 lvx
$inout,0,$out # redundant in aligned case
673 vsel
$inout,$outhead,$inout,$outmask
676 neg
$enc,$ivp # write [unaligned] iv
677 li
$idx,15 # 15 is not typo
678 vxor
$rndkey0,$rndkey0,$rndkey0
680 le?vspltisb
$tmp,0x0f
681 ?lvsl
$outperm,0,$enc
682 ?vperm
$outmask,$rndkey0,$outmask,$outperm
683 le?vxor
$outperm,$outperm,$tmp
685 vperm
$ivec,$ivec,$ivec,$outperm
686 vsel
$inout,$outhead,$ivec,$outmask
687 lvx
$inptail,$idx,$ivp
689 vsel
$inout,$ivec,$inptail,$outmask
690 stvx
$inout,$idx,$ivp
695 .byte
0,12,0x14,0,0,0,6,0
698 #########################################################################
699 {{ # Optimized CBC decrypt procedure #
701 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
702 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
703 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
704 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
705 # v26-v31 last 6 round keys
706 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
710 _aesp8_cbc_decrypt8x
:
711 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
712 li r10
,`$FRAME+8*16+15`
713 li r11
,`$FRAME+8*16+31`
714 stvx v20
,r10
,$sp # ABI says so
737 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
739 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
741 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
743 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
745 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
747 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
749 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
753 subi
$rounds,$rounds,3 # -4 in total
754 subi
$len,$len,128 # bias
756 lvx
$rndkey0,$x00,$key # load key schedule
760 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
761 addi
$key_,$sp,$FRAME+15
765 ?vperm v24
,v30
,v31
,$keyperm
768 stvx v24
,$x00,$key_ # off-load round[1]
769 ?vperm v25
,v31
,v30
,$keyperm
771 stvx v25
,$x10,$key_ # off-load round[2]
772 addi
$key_,$key_,0x20
773 bdnz Load_cbc_dec_key
776 ?vperm v24
,v30
,v31
,$keyperm
778 stvx v24
,$x00,$key_ # off-load round[3]
779 ?vperm v25
,v31
,v26
,$keyperm
781 stvx v25
,$x10,$key_ # off-load round[4]
782 addi
$key_,$sp,$FRAME+15 # rewind $key_
783 ?vperm v26
,v26
,v27
,$keyperm
785 ?vperm v27
,v27
,v28
,$keyperm
787 ?vperm v28
,v28
,v29
,$keyperm
789 ?vperm v29
,v29
,v30
,$keyperm
790 lvx
$out0,$x70,$key # borrow $out0
791 ?vperm v30
,v30
,v31
,$keyperm
792 lvx v24
,$x00,$key_ # pre-load round[1]
793 ?vperm v31
,v31
,$out0,$keyperm
794 lvx v25
,$x10,$key_ # pre-load round[2]
796 #lvx $inptail,0,$inp # "caller" already did this
797 #addi $inp,$inp,15 # 15 is not typo
798 subi
$inp,$inp,15 # undo "caller"
801 lvx_u
$in0,$x00,$inp # load first 8 "words"
802 le?lvsl
$inpperm,0,$idx
803 le?vspltisb
$tmp,0x0f
805 le?vxor
$inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
807 le?vperm
$in0,$in0,$in0,$inpperm
809 le?vperm
$in1,$in1,$in1,$inpperm
811 le?vperm
$in2,$in2,$in2,$inpperm
812 vxor
$out0,$in0,$rndkey0
814 le?vperm
$in3,$in3,$in3,$inpperm
815 vxor
$out1,$in1,$rndkey0
817 le?vperm
$in4,$in4,$in4,$inpperm
818 vxor
$out2,$in2,$rndkey0
821 le?vperm
$in5,$in5,$in5,$inpperm
822 vxor
$out3,$in3,$rndkey0
823 le?vperm
$in6,$in6,$in6,$inpperm
824 vxor
$out4,$in4,$rndkey0
825 le?vperm
$in7,$in7,$in7,$inpperm
826 vxor
$out5,$in5,$rndkey0
827 vxor
$out6,$in6,$rndkey0
828 vxor
$out7,$in7,$rndkey0
834 vncipher
$out0,$out0,v24
835 vncipher
$out1,$out1,v24
836 vncipher
$out2,$out2,v24
837 vncipher
$out3,$out3,v24
838 vncipher
$out4,$out4,v24
839 vncipher
$out5,$out5,v24
840 vncipher
$out6,$out6,v24
841 vncipher
$out7,$out7,v24
842 lvx v24
,$x20,$key_ # round[3]
843 addi
$key_,$key_,0x20
845 vncipher
$out0,$out0,v25
846 vncipher
$out1,$out1,v25
847 vncipher
$out2,$out2,v25
848 vncipher
$out3,$out3,v25
849 vncipher
$out4,$out4,v25
850 vncipher
$out5,$out5,v25
851 vncipher
$out6,$out6,v25
852 vncipher
$out7,$out7,v25
853 lvx v25
,$x10,$key_ # round[4]
856 subic
$len,$len,128 # $len-=128
857 vncipher
$out0,$out0,v24
858 vncipher
$out1,$out1,v24
859 vncipher
$out2,$out2,v24
860 vncipher
$out3,$out3,v24
861 vncipher
$out4,$out4,v24
862 vncipher
$out5,$out5,v24
863 vncipher
$out6,$out6,v24
864 vncipher
$out7,$out7,v24
866 subfe
. r0
,r0
,r0
# borrow?-1:0
867 vncipher
$out0,$out0,v25
868 vncipher
$out1,$out1,v25
869 vncipher
$out2,$out2,v25
870 vncipher
$out3,$out3,v25
871 vncipher
$out4,$out4,v25
872 vncipher
$out5,$out5,v25
873 vncipher
$out6,$out6,v25
874 vncipher
$out7,$out7,v25
877 vncipher
$out0,$out0,v26
878 vncipher
$out1,$out1,v26
879 vncipher
$out2,$out2,v26
880 vncipher
$out3,$out3,v26
881 vncipher
$out4,$out4,v26
882 vncipher
$out5,$out5,v26
883 vncipher
$out6,$out6,v26
884 vncipher
$out7,$out7,v26
886 add
$inp,$inp,r0
# $inp is adjusted in such
887 # way that at exit from the
888 # loop inX-in7 are loaded
890 vncipher
$out0,$out0,v27
891 vncipher
$out1,$out1,v27
892 vncipher
$out2,$out2,v27
893 vncipher
$out3,$out3,v27
894 vncipher
$out4,$out4,v27
895 vncipher
$out5,$out5,v27
896 vncipher
$out6,$out6,v27
897 vncipher
$out7,$out7,v27
899 addi
$key_,$sp,$FRAME+15 # rewind $key_
900 vncipher
$out0,$out0,v28
901 vncipher
$out1,$out1,v28
902 vncipher
$out2,$out2,v28
903 vncipher
$out3,$out3,v28
904 vncipher
$out4,$out4,v28
905 vncipher
$out5,$out5,v28
906 vncipher
$out6,$out6,v28
907 vncipher
$out7,$out7,v28
908 lvx v24
,$x00,$key_ # re-pre-load round[1]
910 vncipher
$out0,$out0,v29
911 vncipher
$out1,$out1,v29
912 vncipher
$out2,$out2,v29
913 vncipher
$out3,$out3,v29
914 vncipher
$out4,$out4,v29
915 vncipher
$out5,$out5,v29
916 vncipher
$out6,$out6,v29
917 vncipher
$out7,$out7,v29
918 lvx v25
,$x10,$key_ # re-pre-load round[2]
920 vncipher
$out0,$out0,v30
921 vxor
$ivec,$ivec,v31
# xor with last round key
922 vncipher
$out1,$out1,v30
924 vncipher
$out2,$out2,v30
926 vncipher
$out3,$out3,v30
928 vncipher
$out4,$out4,v30
930 vncipher
$out5,$out5,v30
932 vncipher
$out6,$out6,v30
934 vncipher
$out7,$out7,v30
937 vncipherlast
$out0,$out0,$ivec
938 vncipherlast
$out1,$out1,$in0
939 lvx_u
$in0,$x00,$inp # load next input block
940 vncipherlast
$out2,$out2,$in1
942 vncipherlast
$out3,$out3,$in2
943 le?vperm
$in0,$in0,$in0,$inpperm
945 vncipherlast
$out4,$out4,$in3
946 le?vperm
$in1,$in1,$in1,$inpperm
948 vncipherlast
$out5,$out5,$in4
949 le?vperm
$in2,$in2,$in2,$inpperm
951 vncipherlast
$out6,$out6,$in5
952 le?vperm
$in3,$in3,$in3,$inpperm
954 vncipherlast
$out7,$out7,$in6
955 le?vperm
$in4,$in4,$in4,$inpperm
958 le?vperm
$in5,$in5,$in5,$inpperm
962 le?vperm
$out0,$out0,$out0,$inpperm
963 le?vperm
$out1,$out1,$out1,$inpperm
964 stvx_u
$out0,$x00,$out
965 le?vperm
$in6,$in6,$in6,$inpperm
966 vxor
$out0,$in0,$rndkey0
967 le?vperm
$out2,$out2,$out2,$inpperm
968 stvx_u
$out1,$x10,$out
969 le?vperm
$in7,$in7,$in7,$inpperm
970 vxor
$out1,$in1,$rndkey0
971 le?vperm
$out3,$out3,$out3,$inpperm
972 stvx_u
$out2,$x20,$out
973 vxor
$out2,$in2,$rndkey0
974 le?vperm
$out4,$out4,$out4,$inpperm
975 stvx_u
$out3,$x30,$out
976 vxor
$out3,$in3,$rndkey0
977 le?vperm
$out5,$out5,$out5,$inpperm
978 stvx_u
$out4,$x40,$out
979 vxor
$out4,$in4,$rndkey0
980 le?vperm
$out6,$out6,$out6,$inpperm
981 stvx_u
$out5,$x50,$out
982 vxor
$out5,$in5,$rndkey0
983 le?vperm
$out7,$out7,$out7,$inpperm
984 stvx_u
$out6,$x60,$out
985 vxor
$out6,$in6,$rndkey0
986 stvx_u
$out7,$x70,$out
988 vxor
$out7,$in7,$rndkey0
991 beq Loop_cbc_dec8x
# did $len-=128 borrow?
998 Loop_cbc_dec8x_tail
: # up to 7 "words" tail...
999 vncipher
$out1,$out1,v24
1000 vncipher
$out2,$out2,v24
1001 vncipher
$out3,$out3,v24
1002 vncipher
$out4,$out4,v24
1003 vncipher
$out5,$out5,v24
1004 vncipher
$out6,$out6,v24
1005 vncipher
$out7,$out7,v24
1006 lvx v24
,$x20,$key_ # round[3]
1007 addi
$key_,$key_,0x20
1009 vncipher
$out1,$out1,v25
1010 vncipher
$out2,$out2,v25
1011 vncipher
$out3,$out3,v25
1012 vncipher
$out4,$out4,v25
1013 vncipher
$out5,$out5,v25
1014 vncipher
$out6,$out6,v25
1015 vncipher
$out7,$out7,v25
1016 lvx v25
,$x10,$key_ # round[4]
1017 bdnz Loop_cbc_dec8x_tail
1019 vncipher
$out1,$out1,v24
1020 vncipher
$out2,$out2,v24
1021 vncipher
$out3,$out3,v24
1022 vncipher
$out4,$out4,v24
1023 vncipher
$out5,$out5,v24
1024 vncipher
$out6,$out6,v24
1025 vncipher
$out7,$out7,v24
1027 vncipher
$out1,$out1,v25
1028 vncipher
$out2,$out2,v25
1029 vncipher
$out3,$out3,v25
1030 vncipher
$out4,$out4,v25
1031 vncipher
$out5,$out5,v25
1032 vncipher
$out6,$out6,v25
1033 vncipher
$out7,$out7,v25
1035 vncipher
$out1,$out1,v26
1036 vncipher
$out2,$out2,v26
1037 vncipher
$out3,$out3,v26
1038 vncipher
$out4,$out4,v26
1039 vncipher
$out5,$out5,v26
1040 vncipher
$out6,$out6,v26
1041 vncipher
$out7,$out7,v26
1043 vncipher
$out1,$out1,v27
1044 vncipher
$out2,$out2,v27
1045 vncipher
$out3,$out3,v27
1046 vncipher
$out4,$out4,v27
1047 vncipher
$out5,$out5,v27
1048 vncipher
$out6,$out6,v27
1049 vncipher
$out7,$out7,v27
1051 vncipher
$out1,$out1,v28
1052 vncipher
$out2,$out2,v28
1053 vncipher
$out3,$out3,v28
1054 vncipher
$out4,$out4,v28
1055 vncipher
$out5,$out5,v28
1056 vncipher
$out6,$out6,v28
1057 vncipher
$out7,$out7,v28
1059 vncipher
$out1,$out1,v29
1060 vncipher
$out2,$out2,v29
1061 vncipher
$out3,$out3,v29
1062 vncipher
$out4,$out4,v29
1063 vncipher
$out5,$out5,v29
1064 vncipher
$out6,$out6,v29
1065 vncipher
$out7,$out7,v29
1067 vncipher
$out1,$out1,v30
1068 vxor
$ivec,$ivec,v31
# last round key
1069 vncipher
$out2,$out2,v30
1071 vncipher
$out3,$out3,v30
1073 vncipher
$out4,$out4,v30
1075 vncipher
$out5,$out5,v30
1077 vncipher
$out6,$out6,v30
1079 vncipher
$out7,$out7,v30
1082 cmplwi
$len,32 # switch($len)
1087 blt Lcbc_dec8x_three
1096 vncipherlast
$out1,$out1,$ivec
1097 vncipherlast
$out2,$out2,$in1
1098 vncipherlast
$out3,$out3,$in2
1099 vncipherlast
$out4,$out4,$in3
1100 vncipherlast
$out5,$out5,$in4
1101 vncipherlast
$out6,$out6,$in5
1102 vncipherlast
$out7,$out7,$in6
1105 le?vperm
$out1,$out1,$out1,$inpperm
1106 le?vperm
$out2,$out2,$out2,$inpperm
1107 stvx_u
$out1,$x00,$out
1108 le?vperm
$out3,$out3,$out3,$inpperm
1109 stvx_u
$out2,$x10,$out
1110 le?vperm
$out4,$out4,$out4,$inpperm
1111 stvx_u
$out3,$x20,$out
1112 le?vperm
$out5,$out5,$out5,$inpperm
1113 stvx_u
$out4,$x30,$out
1114 le?vperm
$out6,$out6,$out6,$inpperm
1115 stvx_u
$out5,$x40,$out
1116 le?vperm
$out7,$out7,$out7,$inpperm
1117 stvx_u
$out6,$x50,$out
1118 stvx_u
$out7,$x60,$out
1124 vncipherlast
$out2,$out2,$ivec
1125 vncipherlast
$out3,$out3,$in2
1126 vncipherlast
$out4,$out4,$in3
1127 vncipherlast
$out5,$out5,$in4
1128 vncipherlast
$out6,$out6,$in5
1129 vncipherlast
$out7,$out7,$in6
1132 le?vperm
$out2,$out2,$out2,$inpperm
1133 le?vperm
$out3,$out3,$out3,$inpperm
1134 stvx_u
$out2,$x00,$out
1135 le?vperm
$out4,$out4,$out4,$inpperm
1136 stvx_u
$out3,$x10,$out
1137 le?vperm
$out5,$out5,$out5,$inpperm
1138 stvx_u
$out4,$x20,$out
1139 le?vperm
$out6,$out6,$out6,$inpperm
1140 stvx_u
$out5,$x30,$out
1141 le?vperm
$out7,$out7,$out7,$inpperm
1142 stvx_u
$out6,$x40,$out
1143 stvx_u
$out7,$x50,$out
1149 vncipherlast
$out3,$out3,$ivec
1150 vncipherlast
$out4,$out4,$in3
1151 vncipherlast
$out5,$out5,$in4
1152 vncipherlast
$out6,$out6,$in5
1153 vncipherlast
$out7,$out7,$in6
1156 le?vperm
$out3,$out3,$out3,$inpperm
1157 le?vperm
$out4,$out4,$out4,$inpperm
1158 stvx_u
$out3,$x00,$out
1159 le?vperm
$out5,$out5,$out5,$inpperm
1160 stvx_u
$out4,$x10,$out
1161 le?vperm
$out6,$out6,$out6,$inpperm
1162 stvx_u
$out5,$x20,$out
1163 le?vperm
$out7,$out7,$out7,$inpperm
1164 stvx_u
$out6,$x30,$out
1165 stvx_u
$out7,$x40,$out
1171 vncipherlast
$out4,$out4,$ivec
1172 vncipherlast
$out5,$out5,$in4
1173 vncipherlast
$out6,$out6,$in5
1174 vncipherlast
$out7,$out7,$in6
1177 le?vperm
$out4,$out4,$out4,$inpperm
1178 le?vperm
$out5,$out5,$out5,$inpperm
1179 stvx_u
$out4,$x00,$out
1180 le?vperm
$out6,$out6,$out6,$inpperm
1181 stvx_u
$out5,$x10,$out
1182 le?vperm
$out7,$out7,$out7,$inpperm
1183 stvx_u
$out6,$x20,$out
1184 stvx_u
$out7,$x30,$out
1190 vncipherlast
$out5,$out5,$ivec
1191 vncipherlast
$out6,$out6,$in5
1192 vncipherlast
$out7,$out7,$in6
1195 le?vperm
$out5,$out5,$out5,$inpperm
1196 le?vperm
$out6,$out6,$out6,$inpperm
1197 stvx_u
$out5,$x00,$out
1198 le?vperm
$out7,$out7,$out7,$inpperm
1199 stvx_u
$out6,$x10,$out
1200 stvx_u
$out7,$x20,$out
1206 vncipherlast
$out6,$out6,$ivec
1207 vncipherlast
$out7,$out7,$in6
1210 le?vperm
$out6,$out6,$out6,$inpperm
1211 le?vperm
$out7,$out7,$out7,$inpperm
1212 stvx_u
$out6,$x00,$out
1213 stvx_u
$out7,$x10,$out
1219 vncipherlast
$out7,$out7,$ivec
1222 le?vperm
$out7,$out7,$out7,$inpperm
1227 le?vperm
$ivec,$ivec,$ivec,$inpperm
1228 stvx_u
$ivec,0,$ivp # write [unaligned] iv
1232 stvx
$inpperm,r10
,$sp # wipe copies of round keys
1234 stvx
$inpperm,r11
,$sp
1236 stvx
$inpperm,r10
,$sp
1238 stvx
$inpperm,r11
,$sp
1240 stvx
$inpperm,r10
,$sp
1242 stvx
$inpperm,r11
,$sp
1244 stvx
$inpperm,r10
,$sp
1246 stvx
$inpperm,r11
,$sp
1250 lvx v20
,r10
,$sp # ABI says so
1272 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1273 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1274 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1275 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1276 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1277 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1278 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1281 .byte
0,12,0x14,0,0x80,6,6,0
1283 .size
.${prefix
}_cbc_encrypt
,.-.${prefix
}_cbc_encrypt
1287 #########################################################################
1288 {{{ # CTR procedure[s] #
1290 ####################### WARNING: Here be dragons! #######################
1292 # This code is written as 'ctr32', based on a 32-bit counter used
1293 # upstream. The kernel does *not* use a 32-bit counter. The kernel uses
1294 # a 128-bit counter.
1296 # This leads to subtle changes from the upstream code: the counter
1297 # is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in
1298 # both the bulk (8 blocks at a time) path, and in the individual block
1299 # path. Be aware of this when doing updates.
1302 # 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug")
1303 # 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword")
1304 # https://github.com/openssl/openssl/pull/8942
1306 #########################################################################
1307 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1308 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1309 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1314 .globl
.${prefix
}_ctr32_encrypt_blocks
1323 vxor
$rndkey0,$rndkey0,$rndkey0
1324 le?vspltisb
$tmp,0x0f
1326 lvx
$ivec,0,$ivp # load [unaligned] iv
1327 lvsl
$inpperm,0,$ivp
1328 lvx
$inptail,$idx,$ivp
1330 le?vxor
$inpperm,$inpperm,$tmp
1331 vperm
$ivec,$ivec,$inptail,$inpperm
1332 vsldoi
$one,$rndkey0,$one,1
1335 ?lvsl
$keyperm,0,$key # prepare for unaligned key
1336 lwz
$rounds,240($key)
1338 lvsr
$inpperm,0,r11
# prepare for unaligned load
1340 addi
$inp,$inp,15 # 15 is not typo
1341 le?vxor
$inpperm,$inpperm,$tmp
1343 srwi
$rounds,$rounds,1
1345 subi
$rounds,$rounds,1
1348 bge _aesp8_ctr32_encrypt8x
1350 ?lvsr
$outperm,0,$out # prepare for unaligned store
1351 vspltisb
$outmask,-1
1353 ?vperm
$outmask,$rndkey0,$outmask,$outperm
1354 le?vxor
$outperm,$outperm,$tmp
1358 lvx
$rndkey1,$idx,$key
1360 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1361 vxor
$inout,$ivec,$rndkey0
1362 lvx
$rndkey0,$idx,$key
1368 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
1369 vcipher
$inout,$inout,$rndkey1
1370 lvx
$rndkey1,$idx,$key
1372 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1373 vcipher
$inout,$inout,$rndkey0
1374 lvx
$rndkey0,$idx,$key
1378 vadduqm
$ivec,$ivec,$one # Kernel change for 128-bit
1382 subic
. $len,$len,1 # blocks--
1384 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
1385 vcipher
$inout,$inout,$rndkey1
1386 lvx
$rndkey1,$idx,$key
1387 vperm
$dat,$dat,$inptail,$inpperm
1389 ?vperm
$rndkey1,$rndkey0,$rndkey1,$keyperm
1391 vxor
$dat,$dat,$rndkey1 # last round key
1392 vcipherlast
$inout,$inout,$dat
1394 lvx
$rndkey1,$idx,$key
1396 vperm
$inout,$inout,$inout,$outperm
1397 vsel
$dat,$outhead,$inout,$outmask
1399 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1401 vxor
$inout,$ivec,$rndkey0
1402 lvx
$rndkey0,$idx,$key
1409 lvx
$inout,0,$out # redundant in aligned case
1410 vsel
$inout,$outhead,$inout,$outmask
1416 .byte
0,12,0x14,0,0,0,6,0
1419 #########################################################################
1420 {{ # Optimized CTR procedure #
1422 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1423 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1424 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1425 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1426 # v26-v31 last 6 round keys
1427 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1428 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1432 _aesp8_ctr32_encrypt8x
:
1433 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1434 li r10
,`$FRAME+8*16+15`
1435 li r11
,`$FRAME+8*16+31`
1436 stvx v20
,r10
,$sp # ABI says so
1459 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1461 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1463 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1465 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1467 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1469 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1471 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1475 subi
$rounds,$rounds,3 # -4 in total
1477 lvx
$rndkey0,$x00,$key # load key schedule
1481 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
1482 addi
$key_,$sp,$FRAME+15
1486 ?vperm v24
,v30
,v31
,$keyperm
1489 stvx v24
,$x00,$key_ # off-load round[1]
1490 ?vperm v25
,v31
,v30
,$keyperm
1492 stvx v25
,$x10,$key_ # off-load round[2]
1493 addi
$key_,$key_,0x20
1494 bdnz Load_ctr32_enc_key
1497 ?vperm v24
,v30
,v31
,$keyperm
1499 stvx v24
,$x00,$key_ # off-load round[3]
1500 ?vperm v25
,v31
,v26
,$keyperm
1502 stvx v25
,$x10,$key_ # off-load round[4]
1503 addi
$key_,$sp,$FRAME+15 # rewind $key_
1504 ?vperm v26
,v26
,v27
,$keyperm
1506 ?vperm v27
,v27
,v28
,$keyperm
1508 ?vperm v28
,v28
,v29
,$keyperm
1510 ?vperm v29
,v29
,v30
,$keyperm
1511 lvx
$out0,$x70,$key # borrow $out0
1512 ?vperm v30
,v30
,v31
,$keyperm
1513 lvx v24
,$x00,$key_ # pre-load round[1]
1514 ?vperm v31
,v31
,$out0,$keyperm
1515 lvx v25
,$x10,$key_ # pre-load round[2]
1517 vadduqm
$two,$one,$one
1518 subi
$inp,$inp,15 # undo "caller"
1521 vadduqm
$out1,$ivec,$one # counter values ...
1522 vadduqm
$out2,$ivec,$two # (do all ctr adds as 128-bit)
1523 vxor
$out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1525 vadduqm
$out3,$out1,$two
1526 vxor
$out1,$out1,$rndkey0
1527 le?lvsl
$inpperm,0,$idx
1528 vadduqm
$out4,$out2,$two
1529 vxor
$out2,$out2,$rndkey0
1530 le?vspltisb
$tmp,0x0f
1531 vadduqm
$out5,$out3,$two
1532 vxor
$out3,$out3,$rndkey0
1533 le?vxor
$inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1534 vadduqm
$out6,$out4,$two
1535 vxor
$out4,$out4,$rndkey0
1536 vadduqm
$out7,$out5,$two
1537 vxor
$out5,$out5,$rndkey0
1538 vadduqm
$ivec,$out6,$two # next counter value
1539 vxor
$out6,$out6,$rndkey0
1540 vxor
$out7,$out7,$rndkey0
1546 vcipher
$out0,$out0,v24
1547 vcipher
$out1,$out1,v24
1548 vcipher
$out2,$out2,v24
1549 vcipher
$out3,$out3,v24
1550 vcipher
$out4,$out4,v24
1551 vcipher
$out5,$out5,v24
1552 vcipher
$out6,$out6,v24
1553 vcipher
$out7,$out7,v24
1554 Loop_ctr32_enc8x_middle
:
1555 lvx v24
,$x20,$key_ # round[3]
1556 addi
$key_,$key_,0x20
1558 vcipher
$out0,$out0,v25
1559 vcipher
$out1,$out1,v25
1560 vcipher
$out2,$out2,v25
1561 vcipher
$out3,$out3,v25
1562 vcipher
$out4,$out4,v25
1563 vcipher
$out5,$out5,v25
1564 vcipher
$out6,$out6,v25
1565 vcipher
$out7,$out7,v25
1566 lvx v25
,$x10,$key_ # round[4]
1567 bdnz Loop_ctr32_enc8x
1569 subic r11
,$len,256 # $len-256, borrow $key_
1570 vcipher
$out0,$out0,v24
1571 vcipher
$out1,$out1,v24
1572 vcipher
$out2,$out2,v24
1573 vcipher
$out3,$out3,v24
1574 vcipher
$out4,$out4,v24
1575 vcipher
$out5,$out5,v24
1576 vcipher
$out6,$out6,v24
1577 vcipher
$out7,$out7,v24
1579 subfe r0
,r0
,r0
# borrow?-1:0
1580 vcipher
$out0,$out0,v25
1581 vcipher
$out1,$out1,v25
1582 vcipher
$out2,$out2,v25
1583 vcipher
$out3,$out3,v25
1584 vcipher
$out4,$out4,v25
1585 vcipher
$out5,$out5,v25
1586 vcipher
$out6,$out6,v25
1587 vcipher
$out7,$out7,v25
1590 addi
$key_,$sp,$FRAME+15 # rewind $key_
1591 vcipher
$out0,$out0,v26
1592 vcipher
$out1,$out1,v26
1593 vcipher
$out2,$out2,v26
1594 vcipher
$out3,$out3,v26
1595 vcipher
$out4,$out4,v26
1596 vcipher
$out5,$out5,v26
1597 vcipher
$out6,$out6,v26
1598 vcipher
$out7,$out7,v26
1599 lvx v24
,$x00,$key_ # re-pre-load round[1]
1601 subic
$len,$len,129 # $len-=129
1602 vcipher
$out0,$out0,v27
1603 addi
$len,$len,1 # $len-=128 really
1604 vcipher
$out1,$out1,v27
1605 vcipher
$out2,$out2,v27
1606 vcipher
$out3,$out3,v27
1607 vcipher
$out4,$out4,v27
1608 vcipher
$out5,$out5,v27
1609 vcipher
$out6,$out6,v27
1610 vcipher
$out7,$out7,v27
1611 lvx v25
,$x10,$key_ # re-pre-load round[2]
1613 vcipher
$out0,$out0,v28
1614 lvx_u
$in0,$x00,$inp # load input
1615 vcipher
$out1,$out1,v28
1616 lvx_u
$in1,$x10,$inp
1617 vcipher
$out2,$out2,v28
1618 lvx_u
$in2,$x20,$inp
1619 vcipher
$out3,$out3,v28
1620 lvx_u
$in3,$x30,$inp
1621 vcipher
$out4,$out4,v28
1622 lvx_u
$in4,$x40,$inp
1623 vcipher
$out5,$out5,v28
1624 lvx_u
$in5,$x50,$inp
1625 vcipher
$out6,$out6,v28
1626 lvx_u
$in6,$x60,$inp
1627 vcipher
$out7,$out7,v28
1628 lvx_u
$in7,$x70,$inp
1631 vcipher
$out0,$out0,v29
1632 le?vperm
$in0,$in0,$in0,$inpperm
1633 vcipher
$out1,$out1,v29
1634 le?vperm
$in1,$in1,$in1,$inpperm
1635 vcipher
$out2,$out2,v29
1636 le?vperm
$in2,$in2,$in2,$inpperm
1637 vcipher
$out3,$out3,v29
1638 le?vperm
$in3,$in3,$in3,$inpperm
1639 vcipher
$out4,$out4,v29
1640 le?vperm
$in4,$in4,$in4,$inpperm
1641 vcipher
$out5,$out5,v29
1642 le?vperm
$in5,$in5,$in5,$inpperm
1643 vcipher
$out6,$out6,v29
1644 le?vperm
$in6,$in6,$in6,$inpperm
1645 vcipher
$out7,$out7,v29
1646 le?vperm
$in7,$in7,$in7,$inpperm
1648 add
$inp,$inp,r0
# $inp is adjusted in such
1649 # way that at exit from the
1650 # loop inX-in7 are loaded
1652 subfe
. r0
,r0
,r0
# borrow?-1:0
1653 vcipher
$out0,$out0,v30
1654 vxor
$in0,$in0,v31
# xor with last round key
1655 vcipher
$out1,$out1,v30
1657 vcipher
$out2,$out2,v30
1659 vcipher
$out3,$out3,v30
1661 vcipher
$out4,$out4,v30
1663 vcipher
$out5,$out5,v30
1665 vcipher
$out6,$out6,v30
1667 vcipher
$out7,$out7,v30
1670 bne Lctr32_enc8x_break
# did $len-129 borrow?
1672 vcipherlast
$in0,$out0,$in0
1673 vcipherlast
$in1,$out1,$in1
1674 vadduqm
$out1,$ivec,$one # counter values ...
1675 vcipherlast
$in2,$out2,$in2
1676 vadduqm
$out2,$ivec,$two
1677 vxor
$out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1678 vcipherlast
$in3,$out3,$in3
1679 vadduqm
$out3,$out1,$two
1680 vxor
$out1,$out1,$rndkey0
1681 vcipherlast
$in4,$out4,$in4
1682 vadduqm
$out4,$out2,$two
1683 vxor
$out2,$out2,$rndkey0
1684 vcipherlast
$in5,$out5,$in5
1685 vadduqm
$out5,$out3,$two
1686 vxor
$out3,$out3,$rndkey0
1687 vcipherlast
$in6,$out6,$in6
1688 vadduqm
$out6,$out4,$two
1689 vxor
$out4,$out4,$rndkey0
1690 vcipherlast
$in7,$out7,$in7
1691 vadduqm
$out7,$out5,$two
1692 vxor
$out5,$out5,$rndkey0
1693 le?vperm
$in0,$in0,$in0,$inpperm
1694 vadduqm
$ivec,$out6,$two # next counter value
1695 vxor
$out6,$out6,$rndkey0
1696 le?vperm
$in1,$in1,$in1,$inpperm
1697 vxor
$out7,$out7,$rndkey0
1700 vcipher
$out0,$out0,v24
1701 stvx_u
$in0,$x00,$out
1702 le?vperm
$in2,$in2,$in2,$inpperm
1703 vcipher
$out1,$out1,v24
1704 stvx_u
$in1,$x10,$out
1705 le?vperm
$in3,$in3,$in3,$inpperm
1706 vcipher
$out2,$out2,v24
1707 stvx_u
$in2,$x20,$out
1708 le?vperm
$in4,$in4,$in4,$inpperm
1709 vcipher
$out3,$out3,v24
1710 stvx_u
$in3,$x30,$out
1711 le?vperm
$in5,$in5,$in5,$inpperm
1712 vcipher
$out4,$out4,v24
1713 stvx_u
$in4,$x40,$out
1714 le?vperm
$in6,$in6,$in6,$inpperm
1715 vcipher
$out5,$out5,v24
1716 stvx_u
$in5,$x50,$out
1717 le?vperm
$in7,$in7,$in7,$inpperm
1718 vcipher
$out6,$out6,v24
1719 stvx_u
$in6,$x60,$out
1720 vcipher
$out7,$out7,v24
1721 stvx_u
$in7,$x70,$out
1724 b Loop_ctr32_enc8x_middle
1729 blt Lctr32_enc8x_one
1731 beq Lctr32_enc8x_two
1733 blt Lctr32_enc8x_three
1735 beq Lctr32_enc8x_four
1737 blt Lctr32_enc8x_five
1739 beq Lctr32_enc8x_six
1741 blt Lctr32_enc8x_seven
1744 vcipherlast
$out0,$out0,$in0
1745 vcipherlast
$out1,$out1,$in1
1746 vcipherlast
$out2,$out2,$in2
1747 vcipherlast
$out3,$out3,$in3
1748 vcipherlast
$out4,$out4,$in4
1749 vcipherlast
$out5,$out5,$in5
1750 vcipherlast
$out6,$out6,$in6
1751 vcipherlast
$out7,$out7,$in7
1753 le?vperm
$out0,$out0,$out0,$inpperm
1754 le?vperm
$out1,$out1,$out1,$inpperm
1755 stvx_u
$out0,$x00,$out
1756 le?vperm
$out2,$out2,$out2,$inpperm
1757 stvx_u
$out1,$x10,$out
1758 le?vperm
$out3,$out3,$out3,$inpperm
1759 stvx_u
$out2,$x20,$out
1760 le?vperm
$out4,$out4,$out4,$inpperm
1761 stvx_u
$out3,$x30,$out
1762 le?vperm
$out5,$out5,$out5,$inpperm
1763 stvx_u
$out4,$x40,$out
1764 le?vperm
$out6,$out6,$out6,$inpperm
1765 stvx_u
$out5,$x50,$out
1766 le?vperm
$out7,$out7,$out7,$inpperm
1767 stvx_u
$out6,$x60,$out
1768 stvx_u
$out7,$x70,$out
1774 vcipherlast
$out0,$out0,$in1
1775 vcipherlast
$out1,$out1,$in2
1776 vcipherlast
$out2,$out2,$in3
1777 vcipherlast
$out3,$out3,$in4
1778 vcipherlast
$out4,$out4,$in5
1779 vcipherlast
$out5,$out5,$in6
1780 vcipherlast
$out6,$out6,$in7
1782 le?vperm
$out0,$out0,$out0,$inpperm
1783 le?vperm
$out1,$out1,$out1,$inpperm
1784 stvx_u
$out0,$x00,$out
1785 le?vperm
$out2,$out2,$out2,$inpperm
1786 stvx_u
$out1,$x10,$out
1787 le?vperm
$out3,$out3,$out3,$inpperm
1788 stvx_u
$out2,$x20,$out
1789 le?vperm
$out4,$out4,$out4,$inpperm
1790 stvx_u
$out3,$x30,$out
1791 le?vperm
$out5,$out5,$out5,$inpperm
1792 stvx_u
$out4,$x40,$out
1793 le?vperm
$out6,$out6,$out6,$inpperm
1794 stvx_u
$out5,$x50,$out
1795 stvx_u
$out6,$x60,$out
1801 vcipherlast
$out0,$out0,$in2
1802 vcipherlast
$out1,$out1,$in3
1803 vcipherlast
$out2,$out2,$in4
1804 vcipherlast
$out3,$out3,$in5
1805 vcipherlast
$out4,$out4,$in6
1806 vcipherlast
$out5,$out5,$in7
1808 le?vperm
$out0,$out0,$out0,$inpperm
1809 le?vperm
$out1,$out1,$out1,$inpperm
1810 stvx_u
$out0,$x00,$out
1811 le?vperm
$out2,$out2,$out2,$inpperm
1812 stvx_u
$out1,$x10,$out
1813 le?vperm
$out3,$out3,$out3,$inpperm
1814 stvx_u
$out2,$x20,$out
1815 le?vperm
$out4,$out4,$out4,$inpperm
1816 stvx_u
$out3,$x30,$out
1817 le?vperm
$out5,$out5,$out5,$inpperm
1818 stvx_u
$out4,$x40,$out
1819 stvx_u
$out5,$x50,$out
1825 vcipherlast
$out0,$out0,$in3
1826 vcipherlast
$out1,$out1,$in4
1827 vcipherlast
$out2,$out2,$in5
1828 vcipherlast
$out3,$out3,$in6
1829 vcipherlast
$out4,$out4,$in7
1831 le?vperm
$out0,$out0,$out0,$inpperm
1832 le?vperm
$out1,$out1,$out1,$inpperm
1833 stvx_u
$out0,$x00,$out
1834 le?vperm
$out2,$out2,$out2,$inpperm
1835 stvx_u
$out1,$x10,$out
1836 le?vperm
$out3,$out3,$out3,$inpperm
1837 stvx_u
$out2,$x20,$out
1838 le?vperm
$out4,$out4,$out4,$inpperm
1839 stvx_u
$out3,$x30,$out
1840 stvx_u
$out4,$x40,$out
1846 vcipherlast
$out0,$out0,$in4
1847 vcipherlast
$out1,$out1,$in5
1848 vcipherlast
$out2,$out2,$in6
1849 vcipherlast
$out3,$out3,$in7
1851 le?vperm
$out0,$out0,$out0,$inpperm
1852 le?vperm
$out1,$out1,$out1,$inpperm
1853 stvx_u
$out0,$x00,$out
1854 le?vperm
$out2,$out2,$out2,$inpperm
1855 stvx_u
$out1,$x10,$out
1856 le?vperm
$out3,$out3,$out3,$inpperm
1857 stvx_u
$out2,$x20,$out
1858 stvx_u
$out3,$x30,$out
1864 vcipherlast
$out0,$out0,$in5
1865 vcipherlast
$out1,$out1,$in6
1866 vcipherlast
$out2,$out2,$in7
1868 le?vperm
$out0,$out0,$out0,$inpperm
1869 le?vperm
$out1,$out1,$out1,$inpperm
1870 stvx_u
$out0,$x00,$out
1871 le?vperm
$out2,$out2,$out2,$inpperm
1872 stvx_u
$out1,$x10,$out
1873 stvx_u
$out2,$x20,$out
1879 vcipherlast
$out0,$out0,$in6
1880 vcipherlast
$out1,$out1,$in7
1882 le?vperm
$out0,$out0,$out0,$inpperm
1883 le?vperm
$out1,$out1,$out1,$inpperm
1884 stvx_u
$out0,$x00,$out
1885 stvx_u
$out1,$x10,$out
1891 vcipherlast
$out0,$out0,$in7
1893 le?vperm
$out0,$out0,$out0,$inpperm
1900 stvx
$inpperm,r10
,$sp # wipe copies of round keys
1902 stvx
$inpperm,r11
,$sp
1904 stvx
$inpperm,r10
,$sp
1906 stvx
$inpperm,r11
,$sp
1908 stvx
$inpperm,r10
,$sp
1910 stvx
$inpperm,r11
,$sp
1912 stvx
$inpperm,r10
,$sp
1914 stvx
$inpperm,r11
,$sp
1918 lvx v20
,r10
,$sp # ABI says so
1940 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1941 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1942 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1943 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1944 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1945 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1946 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1949 .byte
0,12,0x14,0,0x80,6,6,0
1951 .size
.${prefix
}_ctr32_encrypt_blocks
,.-.${prefix
}_ctr32_encrypt_blocks
1955 #########################################################################
1956 {{{ # XTS procedures #
1957 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1958 # const AES_KEY *key1, const AES_KEY *key2, #
1959 # [const] unsigned char iv[16]); #
1960 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1961 # input tweak value is assumed to be encrypted already, and last tweak #
1962 # value, one suitable for consecutive call on same chunk of data, is #
1963 # written back to original buffer. In addition, in "tweak chaining" #
1964 # mode only complete input blocks are processed. #
1966 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1967 my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1968 my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1969 my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1970 my $taillen = $key2;
1972 ($inp,$idx) = ($idx,$inp); # reassign
1975 .globl
.${prefix
}_xts_encrypt
1976 mr
$inp,r3
# reassign
1982 mfspr r12
,256 # save vrsave
1986 vspltisb
$seven,0x07 # 0x070707..07
1987 le?lvsl
$leperm,r11
,r11
1988 le?vspltisb
$tmp,0x0f
1989 le?vxor
$leperm,$leperm,$seven
1992 lvx
$tweak,0,$ivp # load [unaligned] iv
1993 lvsl
$inpperm,0,$ivp
1994 lvx
$inptail,$idx,$ivp
1995 le?vxor
$inpperm,$inpperm,$tmp
1996 vperm
$tweak,$tweak,$inptail,$inpperm
1999 lvsr
$inpperm,0,r11
# prepare for unaligned load
2001 addi
$inp,$inp,15 # 15 is not typo
2002 le?vxor
$inpperm,$inpperm,$tmp
2004 ${UCMP
}i
$key2,0 # key2==NULL?
2005 beq Lxts_enc_no_key2
2007 ?lvsl
$keyperm,0,$key2 # prepare for unaligned key
2008 lwz
$rounds,240($key2)
2009 srwi
$rounds,$rounds,1
2010 subi
$rounds,$rounds,1
2013 lvx
$rndkey0,0,$key2
2014 lvx
$rndkey1,$idx,$key2
2016 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2017 vxor
$tweak,$tweak,$rndkey0
2018 lvx
$rndkey0,$idx,$key2
2023 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2024 vcipher
$tweak,$tweak,$rndkey1
2025 lvx
$rndkey1,$idx,$key2
2027 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2028 vcipher
$tweak,$tweak,$rndkey0
2029 lvx
$rndkey0,$idx,$key2
2033 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2034 vcipher
$tweak,$tweak,$rndkey1
2035 lvx
$rndkey1,$idx,$key2
2036 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2037 vcipherlast
$tweak,$tweak,$rndkey0
2039 li
$ivp,0 # don't chain the tweak
2044 and $len,$len,$idx # in "tweak chaining"
2045 # mode only complete
2046 # blocks are processed
2051 ?lvsl
$keyperm,0,$key1 # prepare for unaligned key
2052 lwz
$rounds,240($key1)
2053 srwi
$rounds,$rounds,1
2054 subi
$rounds,$rounds,1
2057 vslb
$eighty7,$seven,$seven # 0x808080..80
2058 vor
$eighty7,$eighty7,$seven # 0x878787..87
2059 vspltisb
$tmp,1 # 0x010101..01
2060 vsldoi
$eighty7,$eighty7,$tmp,15 # 0x870101..01
2063 bge _aesp8_xts_encrypt6x
2065 andi
. $taillen,$len,15
2067 subi
$taillen,$taillen,16
2072 lvx
$rndkey0,0,$key1
2073 lvx
$rndkey1,$idx,$key1
2075 vperm
$inout,$inout,$inptail,$inpperm
2076 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2077 vxor
$inout,$inout,$tweak
2078 vxor
$inout,$inout,$rndkey0
2079 lvx
$rndkey0,$idx,$key1
2086 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2087 vcipher
$inout,$inout,$rndkey1
2088 lvx
$rndkey1,$idx,$key1
2090 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2091 vcipher
$inout,$inout,$rndkey0
2092 lvx
$rndkey0,$idx,$key1
2096 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2097 vcipher
$inout,$inout,$rndkey1
2098 lvx
$rndkey1,$idx,$key1
2100 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2101 vxor
$rndkey0,$rndkey0,$tweak
2102 vcipherlast
$output,$inout,$rndkey0
2104 le?vperm
$tmp,$output,$output,$leperm
2106 le?stvx_u
$tmp,0,$out
2107 be?stvx_u
$output,0,$out
2116 lvx
$rndkey0,0,$key1
2117 lvx
$rndkey1,$idx,$key1
2125 vsrab
$tmp,$tweak,$seven # next tweak value
2126 vaddubm
$tweak,$tweak,$tweak
2127 vsldoi
$tmp,$tmp,$tmp,15
2128 vand
$tmp,$tmp,$eighty7
2129 vxor
$tweak,$tweak,$tmp
2131 vperm
$inout,$inout,$inptail,$inpperm
2132 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2133 vxor
$inout,$inout,$tweak
2134 vxor
$output,$output,$rndkey0 # just in case $len<16
2135 vxor
$inout,$inout,$rndkey0
2136 lvx
$rndkey0,$idx,$key1
2143 vxor
$output,$output,$tweak
2144 lvsr
$inpperm,0,$len # $inpperm is no longer needed
2145 vxor
$inptail,$inptail,$inptail # $inptail is no longer needed
2147 vperm
$inptail,$inptail,$tmp,$inpperm
2148 vsel
$inout,$inout,$output,$inptail
2157 bdnz Loop_xts_enc_steal
2160 b Loop_xts_enc
# one more time...
2166 vsrab
$tmp,$tweak,$seven # next tweak value
2167 vaddubm
$tweak,$tweak,$tweak
2168 vsldoi
$tmp,$tmp,$tmp,15
2169 vand
$tmp,$tmp,$eighty7
2170 vxor
$tweak,$tweak,$tmp
2172 le?vperm
$tweak,$tweak,$tweak,$leperm
2173 stvx_u
$tweak,0,$ivp
2176 mtspr
256,r12
# restore vrsave
2180 .byte
0,12,0x04,0,0x80,6,6,0
2182 .size
.${prefix
}_xts_encrypt
,.-.${prefix
}_xts_encrypt
2184 .globl
.${prefix
}_xts_decrypt
2185 mr
$inp,r3
# reassign
2191 mfspr r12
,256 # save vrsave
2200 vspltisb
$seven,0x07 # 0x070707..07
2201 le?lvsl
$leperm,r11
,r11
2202 le?vspltisb
$tmp,0x0f
2203 le?vxor
$leperm,$leperm,$seven
2206 lvx
$tweak,0,$ivp # load [unaligned] iv
2207 lvsl
$inpperm,0,$ivp
2208 lvx
$inptail,$idx,$ivp
2209 le?vxor
$inpperm,$inpperm,$tmp
2210 vperm
$tweak,$tweak,$inptail,$inpperm
2213 lvsr
$inpperm,0,r11
# prepare for unaligned load
2215 addi
$inp,$inp,15 # 15 is not typo
2216 le?vxor
$inpperm,$inpperm,$tmp
2218 ${UCMP
}i
$key2,0 # key2==NULL?
2219 beq Lxts_dec_no_key2
2221 ?lvsl
$keyperm,0,$key2 # prepare for unaligned key
2222 lwz
$rounds,240($key2)
2223 srwi
$rounds,$rounds,1
2224 subi
$rounds,$rounds,1
2227 lvx
$rndkey0,0,$key2
2228 lvx
$rndkey1,$idx,$key2
2230 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2231 vxor
$tweak,$tweak,$rndkey0
2232 lvx
$rndkey0,$idx,$key2
2237 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2238 vcipher
$tweak,$tweak,$rndkey1
2239 lvx
$rndkey1,$idx,$key2
2241 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2242 vcipher
$tweak,$tweak,$rndkey0
2243 lvx
$rndkey0,$idx,$key2
2247 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2248 vcipher
$tweak,$tweak,$rndkey1
2249 lvx
$rndkey1,$idx,$key2
2250 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2251 vcipherlast
$tweak,$tweak,$rndkey0
2253 li
$ivp,0 # don't chain the tweak
2259 add
$len,$len,$idx # in "tweak chaining"
2260 # mode only complete
2261 # blocks are processed
2266 ?lvsl
$keyperm,0,$key1 # prepare for unaligned key
2267 lwz
$rounds,240($key1)
2268 srwi
$rounds,$rounds,1
2269 subi
$rounds,$rounds,1
2272 vslb
$eighty7,$seven,$seven # 0x808080..80
2273 vor
$eighty7,$eighty7,$seven # 0x878787..87
2274 vspltisb
$tmp,1 # 0x010101..01
2275 vsldoi
$eighty7,$eighty7,$tmp,15 # 0x870101..01
2278 bge _aesp8_xts_decrypt6x
2280 lvx
$rndkey0,0,$key1
2281 lvx
$rndkey1,$idx,$key1
2283 vperm
$inout,$inout,$inptail,$inpperm
2284 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2285 vxor
$inout,$inout,$tweak
2286 vxor
$inout,$inout,$rndkey0
2287 lvx
$rndkey0,$idx,$key1
2297 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2298 vncipher
$inout,$inout,$rndkey1
2299 lvx
$rndkey1,$idx,$key1
2301 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2302 vncipher
$inout,$inout,$rndkey0
2303 lvx
$rndkey0,$idx,$key1
2307 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2308 vncipher
$inout,$inout,$rndkey1
2309 lvx
$rndkey1,$idx,$key1
2311 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2312 vxor
$rndkey0,$rndkey0,$tweak
2313 vncipherlast
$output,$inout,$rndkey0
2315 le?vperm
$tmp,$output,$output,$leperm
2317 le?stvx_u
$tmp,0,$out
2318 be?stvx_u
$output,0,$out
2327 lvx
$rndkey0,0,$key1
2328 lvx
$rndkey1,$idx,$key1
2331 vsrab
$tmp,$tweak,$seven # next tweak value
2332 vaddubm
$tweak,$tweak,$tweak
2333 vsldoi
$tmp,$tmp,$tmp,15
2334 vand
$tmp,$tmp,$eighty7
2335 vxor
$tweak,$tweak,$tmp
2337 vperm
$inout,$inout,$inptail,$inpperm
2338 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2339 vxor
$inout,$inout,$tweak
2340 vxor
$inout,$inout,$rndkey0
2341 lvx
$rndkey0,$idx,$key1
2349 vsrab
$tmp,$tweak,$seven # next tweak value
2350 vaddubm
$tweak1,$tweak,$tweak
2351 vsldoi
$tmp,$tmp,$tmp,15
2352 vand
$tmp,$tmp,$eighty7
2353 vxor
$tweak1,$tweak1,$tmp
2358 vxor
$inout,$inout,$tweak # :-(
2359 vxor
$inout,$inout,$tweak1 # :-)
2362 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2363 vncipher
$inout,$inout,$rndkey1
2364 lvx
$rndkey1,$idx,$key1
2366 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2367 vncipher
$inout,$inout,$rndkey0
2368 lvx
$rndkey0,$idx,$key1
2370 bdnz Loop_xts_dec_short
2372 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2373 vncipher
$inout,$inout,$rndkey1
2374 lvx
$rndkey1,$idx,$key1
2376 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2377 vxor
$rndkey0,$rndkey0,$tweak1
2378 vncipherlast
$output,$inout,$rndkey0
2380 le?vperm
$tmp,$output,$output,$leperm
2382 le?stvx_u
$tmp,0,$out
2383 be?stvx_u
$output,0,$out
2388 lvx
$rndkey0,0,$key1
2389 lvx
$rndkey1,$idx,$key1
2391 vperm
$inout,$inout,$inptail,$inpperm
2392 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2394 lvsr
$inpperm,0,$len # $inpperm is no longer needed
2395 vxor
$inptail,$inptail,$inptail # $inptail is no longer needed
2397 vperm
$inptail,$inptail,$tmp,$inpperm
2398 vsel
$inout,$inout,$output,$inptail
2400 vxor
$rndkey0,$rndkey0,$tweak
2401 vxor
$inout,$inout,$rndkey0
2402 lvx
$rndkey0,$idx,$key1
2411 bdnz Loop_xts_dec_steal
2414 b Loop_xts_dec
# one more time...
2420 vsrab
$tmp,$tweak,$seven # next tweak value
2421 vaddubm
$tweak,$tweak,$tweak
2422 vsldoi
$tmp,$tmp,$tmp,15
2423 vand
$tmp,$tmp,$eighty7
2424 vxor
$tweak,$tweak,$tmp
2426 le?vperm
$tweak,$tweak,$tweak,$leperm
2427 stvx_u
$tweak,0,$ivp
2430 mtspr
256,r12
# restore vrsave
2434 .byte
0,12,0x04,0,0x80,6,6,0
2436 .size
.${prefix
}_xts_decrypt
,.-.${prefix
}_xts_decrypt
2438 #########################################################################
2439 {{ # Optimized XTS procedures #
2441 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2442 $x00=0 if ($flavour =~ /osx/);
2443 my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2444 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2445 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2446 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2447 # v26-v31 last 6 round keys
2448 my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2453 _aesp8_xts_encrypt6x
:
2454 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2456 li r7
,`$FRAME+8*16+15`
2457 li r3
,`$FRAME+8*16+31`
2458 $PUSH r11
,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2459 stvx v20
,r7
,$sp # ABI says so
2482 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2484 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
2486 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
2488 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
2490 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
2492 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
2494 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
2498 subi
$rounds,$rounds,3 # -4 in total
2500 lvx
$rndkey0,$x00,$key1 # load key schedule
2502 addi
$key1,$key1,0x20
2504 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
2505 addi
$key_,$sp,$FRAME+15
2509 ?vperm v24
,v30
,v31
,$keyperm
2511 addi
$key1,$key1,0x20
2512 stvx v24
,$x00,$key_ # off-load round[1]
2513 ?vperm v25
,v31
,v30
,$keyperm
2515 stvx v25
,$x10,$key_ # off-load round[2]
2516 addi
$key_,$key_,0x20
2517 bdnz Load_xts_enc_key
2520 ?vperm v24
,v30
,v31
,$keyperm
2522 stvx v24
,$x00,$key_ # off-load round[3]
2523 ?vperm v25
,v31
,v26
,$keyperm
2525 stvx v25
,$x10,$key_ # off-load round[4]
2526 addi
$key_,$sp,$FRAME+15 # rewind $key_
2527 ?vperm v26
,v26
,v27
,$keyperm
2529 ?vperm v27
,v27
,v28
,$keyperm
2531 ?vperm v28
,v28
,v29
,$keyperm
2533 ?vperm v29
,v29
,v30
,$keyperm
2534 lvx
$twk5,$x70,$key1 # borrow $twk5
2535 ?vperm v30
,v30
,v31
,$keyperm
2536 lvx v24
,$x00,$key_ # pre-load round[1]
2537 ?vperm v31
,v31
,$twk5,$keyperm
2538 lvx v25
,$x10,$key_ # pre-load round[2]
2540 vperm
$in0,$inout,$inptail,$inpperm
2541 subi
$inp,$inp,31 # undo "caller"
2542 vxor
$twk0,$tweak,$rndkey0
2543 vsrab
$tmp,$tweak,$seven # next tweak value
2544 vaddubm
$tweak,$tweak,$tweak
2545 vsldoi
$tmp,$tmp,$tmp,15
2546 vand
$tmp,$tmp,$eighty7
2547 vxor
$out0,$in0,$twk0
2548 vxor
$tweak,$tweak,$tmp
2550 lvx_u
$in1,$x10,$inp
2551 vxor
$twk1,$tweak,$rndkey0
2552 vsrab
$tmp,$tweak,$seven # next tweak value
2553 vaddubm
$tweak,$tweak,$tweak
2554 vsldoi
$tmp,$tmp,$tmp,15
2555 le?vperm
$in1,$in1,$in1,$leperm
2556 vand
$tmp,$tmp,$eighty7
2557 vxor
$out1,$in1,$twk1
2558 vxor
$tweak,$tweak,$tmp
2560 lvx_u
$in2,$x20,$inp
2561 andi
. $taillen,$len,15
2562 vxor
$twk2,$tweak,$rndkey0
2563 vsrab
$tmp,$tweak,$seven # next tweak value
2564 vaddubm
$tweak,$tweak,$tweak
2565 vsldoi
$tmp,$tmp,$tmp,15
2566 le?vperm
$in2,$in2,$in2,$leperm
2567 vand
$tmp,$tmp,$eighty7
2568 vxor
$out2,$in2,$twk2
2569 vxor
$tweak,$tweak,$tmp
2571 lvx_u
$in3,$x30,$inp
2572 sub $len,$len,$taillen
2573 vxor
$twk3,$tweak,$rndkey0
2574 vsrab
$tmp,$tweak,$seven # next tweak value
2575 vaddubm
$tweak,$tweak,$tweak
2576 vsldoi
$tmp,$tmp,$tmp,15
2577 le?vperm
$in3,$in3,$in3,$leperm
2578 vand
$tmp,$tmp,$eighty7
2579 vxor
$out3,$in3,$twk3
2580 vxor
$tweak,$tweak,$tmp
2582 lvx_u
$in4,$x40,$inp
2584 vxor
$twk4,$tweak,$rndkey0
2585 vsrab
$tmp,$tweak,$seven # next tweak value
2586 vaddubm
$tweak,$tweak,$tweak
2587 vsldoi
$tmp,$tmp,$tmp,15
2588 le?vperm
$in4,$in4,$in4,$leperm
2589 vand
$tmp,$tmp,$eighty7
2590 vxor
$out4,$in4,$twk4
2591 vxor
$tweak,$tweak,$tmp
2593 lvx_u
$in5,$x50,$inp
2595 vxor
$twk5,$tweak,$rndkey0
2596 vsrab
$tmp,$tweak,$seven # next tweak value
2597 vaddubm
$tweak,$tweak,$tweak
2598 vsldoi
$tmp,$tmp,$tmp,15
2599 le?vperm
$in5,$in5,$in5,$leperm
2600 vand
$tmp,$tmp,$eighty7
2601 vxor
$out5,$in5,$twk5
2602 vxor
$tweak,$tweak,$tmp
2604 vxor v31
,v31
,$rndkey0
2610 vcipher
$out0,$out0,v24
2611 vcipher
$out1,$out1,v24
2612 vcipher
$out2,$out2,v24
2613 vcipher
$out3,$out3,v24
2614 vcipher
$out4,$out4,v24
2615 vcipher
$out5,$out5,v24
2616 lvx v24
,$x20,$key_ # round[3]
2617 addi
$key_,$key_,0x20
2619 vcipher
$out0,$out0,v25
2620 vcipher
$out1,$out1,v25
2621 vcipher
$out2,$out2,v25
2622 vcipher
$out3,$out3,v25
2623 vcipher
$out4,$out4,v25
2624 vcipher
$out5,$out5,v25
2625 lvx v25
,$x10,$key_ # round[4]
2628 subic
$len,$len,96 # $len-=96
2629 vxor
$in0,$twk0,v31
# xor with last round key
2630 vcipher
$out0,$out0,v24
2631 vcipher
$out1,$out1,v24
2632 vsrab
$tmp,$tweak,$seven # next tweak value
2633 vxor
$twk0,$tweak,$rndkey0
2634 vaddubm
$tweak,$tweak,$tweak
2635 vcipher
$out2,$out2,v24
2636 vcipher
$out3,$out3,v24
2637 vsldoi
$tmp,$tmp,$tmp,15
2638 vcipher
$out4,$out4,v24
2639 vcipher
$out5,$out5,v24
2641 subfe
. r0
,r0
,r0
# borrow?-1:0
2642 vand
$tmp,$tmp,$eighty7
2643 vcipher
$out0,$out0,v25
2644 vcipher
$out1,$out1,v25
2645 vxor
$tweak,$tweak,$tmp
2646 vcipher
$out2,$out2,v25
2647 vcipher
$out3,$out3,v25
2649 vsrab
$tmp,$tweak,$seven # next tweak value
2650 vxor
$twk1,$tweak,$rndkey0
2651 vcipher
$out4,$out4,v25
2652 vcipher
$out5,$out5,v25
2655 vaddubm
$tweak,$tweak,$tweak
2656 vsldoi
$tmp,$tmp,$tmp,15
2657 vcipher
$out0,$out0,v26
2658 vcipher
$out1,$out1,v26
2659 vand
$tmp,$tmp,$eighty7
2660 vcipher
$out2,$out2,v26
2661 vcipher
$out3,$out3,v26
2662 vxor
$tweak,$tweak,$tmp
2663 vcipher
$out4,$out4,v26
2664 vcipher
$out5,$out5,v26
2666 add
$inp,$inp,r0
# $inp is adjusted in such
2667 # way that at exit from the
2668 # loop inX-in5 are loaded
2671 vsrab
$tmp,$tweak,$seven # next tweak value
2672 vxor
$twk2,$tweak,$rndkey0
2673 vaddubm
$tweak,$tweak,$tweak
2674 vcipher
$out0,$out0,v27
2675 vcipher
$out1,$out1,v27
2676 vsldoi
$tmp,$tmp,$tmp,15
2677 vcipher
$out2,$out2,v27
2678 vcipher
$out3,$out3,v27
2679 vand
$tmp,$tmp,$eighty7
2680 vcipher
$out4,$out4,v27
2681 vcipher
$out5,$out5,v27
2683 addi
$key_,$sp,$FRAME+15 # rewind $key_
2684 vxor
$tweak,$tweak,$tmp
2685 vcipher
$out0,$out0,v28
2686 vcipher
$out1,$out1,v28
2688 vsrab
$tmp,$tweak,$seven # next tweak value
2689 vxor
$twk3,$tweak,$rndkey0
2690 vcipher
$out2,$out2,v28
2691 vcipher
$out3,$out3,v28
2692 vaddubm
$tweak,$tweak,$tweak
2693 vsldoi
$tmp,$tmp,$tmp,15
2694 vcipher
$out4,$out4,v28
2695 vcipher
$out5,$out5,v28
2696 lvx v24
,$x00,$key_ # re-pre-load round[1]
2697 vand
$tmp,$tmp,$eighty7
2699 vcipher
$out0,$out0,v29
2700 vcipher
$out1,$out1,v29
2701 vxor
$tweak,$tweak,$tmp
2702 vcipher
$out2,$out2,v29
2703 vcipher
$out3,$out3,v29
2705 vsrab
$tmp,$tweak,$seven # next tweak value
2706 vxor
$twk4,$tweak,$rndkey0
2707 vcipher
$out4,$out4,v29
2708 vcipher
$out5,$out5,v29
2709 lvx v25
,$x10,$key_ # re-pre-load round[2]
2710 vaddubm
$tweak,$tweak,$tweak
2711 vsldoi
$tmp,$tmp,$tmp,15
2713 vcipher
$out0,$out0,v30
2714 vcipher
$out1,$out1,v30
2715 vand
$tmp,$tmp,$eighty7
2716 vcipher
$out2,$out2,v30
2717 vcipher
$out3,$out3,v30
2718 vxor
$tweak,$tweak,$tmp
2719 vcipher
$out4,$out4,v30
2720 vcipher
$out5,$out5,v30
2722 vsrab
$tmp,$tweak,$seven # next tweak value
2723 vxor
$twk5,$tweak,$rndkey0
2725 vcipherlast
$out0,$out0,$in0
2726 lvx_u
$in0,$x00,$inp # load next input block
2727 vaddubm
$tweak,$tweak,$tweak
2728 vsldoi
$tmp,$tmp,$tmp,15
2729 vcipherlast
$out1,$out1,$in1
2730 lvx_u
$in1,$x10,$inp
2731 vcipherlast
$out2,$out2,$in2
2732 le?vperm
$in0,$in0,$in0,$leperm
2733 lvx_u
$in2,$x20,$inp
2734 vand
$tmp,$tmp,$eighty7
2735 vcipherlast
$out3,$out3,$in3
2736 le?vperm
$in1,$in1,$in1,$leperm
2737 lvx_u
$in3,$x30,$inp
2738 vcipherlast
$out4,$out4,$in4
2739 le?vperm
$in2,$in2,$in2,$leperm
2740 lvx_u
$in4,$x40,$inp
2741 vxor
$tweak,$tweak,$tmp
2742 vcipherlast
$tmp,$out5,$in5 # last block might be needed
2744 le?vperm
$in3,$in3,$in3,$leperm
2745 lvx_u
$in5,$x50,$inp
2747 le?vperm
$in4,$in4,$in4,$leperm
2748 le?vperm
$in5,$in5,$in5,$leperm
2750 le?vperm
$out0,$out0,$out0,$leperm
2751 le?vperm
$out1,$out1,$out1,$leperm
2752 stvx_u
$out0,$x00,$out # store output
2753 vxor
$out0,$in0,$twk0
2754 le?vperm
$out2,$out2,$out2,$leperm
2755 stvx_u
$out1,$x10,$out
2756 vxor
$out1,$in1,$twk1
2757 le?vperm
$out3,$out3,$out3,$leperm
2758 stvx_u
$out2,$x20,$out
2759 vxor
$out2,$in2,$twk2
2760 le?vperm
$out4,$out4,$out4,$leperm
2761 stvx_u
$out3,$x30,$out
2762 vxor
$out3,$in3,$twk3
2763 le?vperm
$out5,$tmp,$tmp,$leperm
2764 stvx_u
$out4,$x40,$out
2765 vxor
$out4,$in4,$twk4
2766 le?stvx_u
$out5,$x50,$out
2767 be?stvx_u
$tmp, $x50,$out
2768 vxor
$out5,$in5,$twk5
2772 beq Loop_xts_enc6x
# did $len-=96 borrow?
2774 addic
. $len,$len,0x60
2781 blt Lxts_enc6x_three
2786 vxor
$out0,$in1,$twk0
2787 vxor
$out1,$in2,$twk1
2788 vxor
$out2,$in3,$twk2
2789 vxor
$out3,$in4,$twk3
2790 vxor
$out4,$in5,$twk4
2794 le?vperm
$out0,$out0,$out0,$leperm
2795 vmr
$twk0,$twk5 # unused tweak
2796 le?vperm
$out1,$out1,$out1,$leperm
2797 stvx_u
$out0,$x00,$out # store output
2798 le?vperm
$out2,$out2,$out2,$leperm
2799 stvx_u
$out1,$x10,$out
2800 le?vperm
$out3,$out3,$out3,$leperm
2801 stvx_u
$out2,$x20,$out
2802 vxor
$tmp,$out4,$twk5 # last block prep for stealing
2803 le?vperm
$out4,$out4,$out4,$leperm
2804 stvx_u
$out3,$x30,$out
2805 stvx_u
$out4,$x40,$out
2807 bne Lxts_enc6x_steal
2812 vxor
$out0,$in2,$twk0
2813 vxor
$out1,$in3,$twk1
2814 vxor
$out2,$in4,$twk2
2815 vxor
$out3,$in5,$twk3
2816 vxor
$out4,$out4,$out4
2820 le?vperm
$out0,$out0,$out0,$leperm
2821 vmr
$twk0,$twk4 # unused tweak
2822 le?vperm
$out1,$out1,$out1,$leperm
2823 stvx_u
$out0,$x00,$out # store output
2824 le?vperm
$out2,$out2,$out2,$leperm
2825 stvx_u
$out1,$x10,$out
2826 vxor
$tmp,$out3,$twk4 # last block prep for stealing
2827 le?vperm
$out3,$out3,$out3,$leperm
2828 stvx_u
$out2,$x20,$out
2829 stvx_u
$out3,$x30,$out
2831 bne Lxts_enc6x_steal
2836 vxor
$out0,$in3,$twk0
2837 vxor
$out1,$in4,$twk1
2838 vxor
$out2,$in5,$twk2
2839 vxor
$out3,$out3,$out3
2840 vxor
$out4,$out4,$out4
2844 le?vperm
$out0,$out0,$out0,$leperm
2845 vmr
$twk0,$twk3 # unused tweak
2846 le?vperm
$out1,$out1,$out1,$leperm
2847 stvx_u
$out0,$x00,$out # store output
2848 vxor
$tmp,$out2,$twk3 # last block prep for stealing
2849 le?vperm
$out2,$out2,$out2,$leperm
2850 stvx_u
$out1,$x10,$out
2851 stvx_u
$out2,$x20,$out
2853 bne Lxts_enc6x_steal
2858 vxor
$out0,$in4,$twk0
2859 vxor
$out1,$in5,$twk1
2860 vxor
$out2,$out2,$out2
2861 vxor
$out3,$out3,$out3
2862 vxor
$out4,$out4,$out4
2866 le?vperm
$out0,$out0,$out0,$leperm
2867 vmr
$twk0,$twk2 # unused tweak
2868 vxor
$tmp,$out1,$twk2 # last block prep for stealing
2869 le?vperm
$out1,$out1,$out1,$leperm
2870 stvx_u
$out0,$x00,$out # store output
2871 stvx_u
$out1,$x10,$out
2873 bne Lxts_enc6x_steal
2878 vxor
$out0,$in5,$twk0
2881 vcipher
$out0,$out0,v24
2882 lvx v24
,$x20,$key_ # round[3]
2883 addi
$key_,$key_,0x20
2885 vcipher
$out0,$out0,v25
2886 lvx v25
,$x10,$key_ # round[4]
2889 add
$inp,$inp,$taillen
2891 vcipher
$out0,$out0,v24
2894 vcipher
$out0,$out0,v25
2896 lvsr
$inpperm,0,$taillen
2897 vcipher
$out0,$out0,v26
2900 vcipher
$out0,$out0,v27
2902 addi
$key_,$sp,$FRAME+15 # rewind $key_
2903 vcipher
$out0,$out0,v28
2904 lvx v24
,$x00,$key_ # re-pre-load round[1]
2906 vcipher
$out0,$out0,v29
2907 lvx v25
,$x10,$key_ # re-pre-load round[2]
2908 vxor
$twk0,$twk0,v31
2910 le?vperm
$in0,$in0,$in0,$leperm
2911 vcipher
$out0,$out0,v30
2913 vperm
$in0,$in0,$in0,$inpperm
2914 vcipherlast
$out0,$out0,$twk0
2916 vmr
$twk0,$twk1 # unused tweak
2917 vxor
$tmp,$out0,$twk1 # last block prep for stealing
2918 le?vperm
$out0,$out0,$out0,$leperm
2919 stvx_u
$out0,$x00,$out # store output
2921 bne Lxts_enc6x_steal
2929 add
$inp,$inp,$taillen
2932 lvsr
$inpperm,0,$taillen # $in5 is no more
2933 le?vperm
$in0,$in0,$in0,$leperm
2934 vperm
$in0,$in0,$in0,$inpperm
2935 vxor
$tmp,$tmp,$twk0
2937 vxor
$in0,$in0,$twk0
2938 vxor
$out0,$out0,$out0
2940 vperm
$out0,$out0,$out1,$inpperm
2941 vsel
$out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2946 Loop_xts_enc6x_steal
:
2949 bdnz Loop_xts_enc6x_steal
2953 b Loop_xts_enc1x
# one more time...
2960 vxor
$tweak,$twk0,$rndkey0
2961 le?vperm
$tweak,$tweak,$tweak,$leperm
2962 stvx_u
$tweak,0,$ivp
2968 stvx
$seven,r10
,$sp # wipe copies of round keys
2986 lvx v20
,r10
,$sp # ABI says so
3008 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
3009 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
3010 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
3011 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
3012 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
3013 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
3014 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3017 .byte
0,12,0x04,1,0x80,6,6,0
3022 vcipher
$out0,$out0,v24
3023 vcipher
$out1,$out1,v24
3024 vcipher
$out2,$out2,v24
3025 vcipher
$out3,$out3,v24
3026 vcipher
$out4,$out4,v24
3027 lvx v24
,$x20,$key_ # round[3]
3028 addi
$key_,$key_,0x20
3030 vcipher
$out0,$out0,v25
3031 vcipher
$out1,$out1,v25
3032 vcipher
$out2,$out2,v25
3033 vcipher
$out3,$out3,v25
3034 vcipher
$out4,$out4,v25
3035 lvx v25
,$x10,$key_ # round[4]
3036 bdnz _aesp8_xts_enc5x
3038 add
$inp,$inp,$taillen
3040 vcipher
$out0,$out0,v24
3041 vcipher
$out1,$out1,v24
3042 vcipher
$out2,$out2,v24
3043 vcipher
$out3,$out3,v24
3044 vcipher
$out4,$out4,v24
3047 vcipher
$out0,$out0,v25
3048 vcipher
$out1,$out1,v25
3049 vcipher
$out2,$out2,v25
3050 vcipher
$out3,$out3,v25
3051 vcipher
$out4,$out4,v25
3052 vxor
$twk0,$twk0,v31
3054 vcipher
$out0,$out0,v26
3055 lvsr
$inpperm,r0
,$taillen # $in5 is no more
3056 vcipher
$out1,$out1,v26
3057 vcipher
$out2,$out2,v26
3058 vcipher
$out3,$out3,v26
3059 vcipher
$out4,$out4,v26
3062 vcipher
$out0,$out0,v27
3064 vcipher
$out1,$out1,v27
3065 vcipher
$out2,$out2,v27
3066 vcipher
$out3,$out3,v27
3067 vcipher
$out4,$out4,v27
3070 addi
$key_,$sp,$FRAME+15 # rewind $key_
3071 vcipher
$out0,$out0,v28
3072 vcipher
$out1,$out1,v28
3073 vcipher
$out2,$out2,v28
3074 vcipher
$out3,$out3,v28
3075 vcipher
$out4,$out4,v28
3076 lvx v24
,$x00,$key_ # re-pre-load round[1]
3079 vcipher
$out0,$out0,v29
3080 le?vperm
$in0,$in0,$in0,$leperm
3081 vcipher
$out1,$out1,v29
3082 vcipher
$out2,$out2,v29
3083 vcipher
$out3,$out3,v29
3084 vcipher
$out4,$out4,v29
3085 lvx v25
,$x10,$key_ # re-pre-load round[2]
3088 vcipher
$out0,$out0,v30
3089 vperm
$in0,$in0,$in0,$inpperm
3090 vcipher
$out1,$out1,v30
3091 vcipher
$out2,$out2,v30
3092 vcipher
$out3,$out3,v30
3093 vcipher
$out4,$out4,v30
3095 vcipherlast
$out0,$out0,$twk0
3096 vcipherlast
$out1,$out1,$in1
3097 vcipherlast
$out2,$out2,$in2
3098 vcipherlast
$out3,$out3,$in3
3099 vcipherlast
$out4,$out4,$in4
3102 .byte
0,12,0x14,0,0,0,0,0
3105 _aesp8_xts_decrypt6x
:
3106 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3108 li r7
,`$FRAME+8*16+15`
3109 li r3
,`$FRAME+8*16+31`
3110 $PUSH r11
,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3111 stvx v20
,r7
,$sp # ABI says so
3134 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3136 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
3138 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
3140 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
3142 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
3144 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
3146 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
3150 subi
$rounds,$rounds,3 # -4 in total
3152 lvx
$rndkey0,$x00,$key1 # load key schedule
3154 addi
$key1,$key1,0x20
3156 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
3157 addi
$key_,$sp,$FRAME+15
3161 ?vperm v24
,v30
,v31
,$keyperm
3163 addi
$key1,$key1,0x20
3164 stvx v24
,$x00,$key_ # off-load round[1]
3165 ?vperm v25
,v31
,v30
,$keyperm
3167 stvx v25
,$x10,$key_ # off-load round[2]
3168 addi
$key_,$key_,0x20
3169 bdnz Load_xts_dec_key
3172 ?vperm v24
,v30
,v31
,$keyperm
3174 stvx v24
,$x00,$key_ # off-load round[3]
3175 ?vperm v25
,v31
,v26
,$keyperm
3177 stvx v25
,$x10,$key_ # off-load round[4]
3178 addi
$key_,$sp,$FRAME+15 # rewind $key_
3179 ?vperm v26
,v26
,v27
,$keyperm
3181 ?vperm v27
,v27
,v28
,$keyperm
3183 ?vperm v28
,v28
,v29
,$keyperm
3185 ?vperm v29
,v29
,v30
,$keyperm
3186 lvx
$twk5,$x70,$key1 # borrow $twk5
3187 ?vperm v30
,v30
,v31
,$keyperm
3188 lvx v24
,$x00,$key_ # pre-load round[1]
3189 ?vperm v31
,v31
,$twk5,$keyperm
3190 lvx v25
,$x10,$key_ # pre-load round[2]
3192 vperm
$in0,$inout,$inptail,$inpperm
3193 subi
$inp,$inp,31 # undo "caller"
3194 vxor
$twk0,$tweak,$rndkey0
3195 vsrab
$tmp,$tweak,$seven # next tweak value
3196 vaddubm
$tweak,$tweak,$tweak
3197 vsldoi
$tmp,$tmp,$tmp,15
3198 vand
$tmp,$tmp,$eighty7
3199 vxor
$out0,$in0,$twk0
3200 vxor
$tweak,$tweak,$tmp
3202 lvx_u
$in1,$x10,$inp
3203 vxor
$twk1,$tweak,$rndkey0
3204 vsrab
$tmp,$tweak,$seven # next tweak value
3205 vaddubm
$tweak,$tweak,$tweak
3206 vsldoi
$tmp,$tmp,$tmp,15
3207 le?vperm
$in1,$in1,$in1,$leperm
3208 vand
$tmp,$tmp,$eighty7
3209 vxor
$out1,$in1,$twk1
3210 vxor
$tweak,$tweak,$tmp
3212 lvx_u
$in2,$x20,$inp
3213 andi
. $taillen,$len,15
3214 vxor
$twk2,$tweak,$rndkey0
3215 vsrab
$tmp,$tweak,$seven # next tweak value
3216 vaddubm
$tweak,$tweak,$tweak
3217 vsldoi
$tmp,$tmp,$tmp,15
3218 le?vperm
$in2,$in2,$in2,$leperm
3219 vand
$tmp,$tmp,$eighty7
3220 vxor
$out2,$in2,$twk2
3221 vxor
$tweak,$tweak,$tmp
3223 lvx_u
$in3,$x30,$inp
3224 sub $len,$len,$taillen
3225 vxor
$twk3,$tweak,$rndkey0
3226 vsrab
$tmp,$tweak,$seven # next tweak value
3227 vaddubm
$tweak,$tweak,$tweak
3228 vsldoi
$tmp,$tmp,$tmp,15
3229 le?vperm
$in3,$in3,$in3,$leperm
3230 vand
$tmp,$tmp,$eighty7
3231 vxor
$out3,$in3,$twk3
3232 vxor
$tweak,$tweak,$tmp
3234 lvx_u
$in4,$x40,$inp
3236 vxor
$twk4,$tweak,$rndkey0
3237 vsrab
$tmp,$tweak,$seven # next tweak value
3238 vaddubm
$tweak,$tweak,$tweak
3239 vsldoi
$tmp,$tmp,$tmp,15
3240 le?vperm
$in4,$in4,$in4,$leperm
3241 vand
$tmp,$tmp,$eighty7
3242 vxor
$out4,$in4,$twk4
3243 vxor
$tweak,$tweak,$tmp
3245 lvx_u
$in5,$x50,$inp
3247 vxor
$twk5,$tweak,$rndkey0
3248 vsrab
$tmp,$tweak,$seven # next tweak value
3249 vaddubm
$tweak,$tweak,$tweak
3250 vsldoi
$tmp,$tmp,$tmp,15
3251 le?vperm
$in5,$in5,$in5,$leperm
3252 vand
$tmp,$tmp,$eighty7
3253 vxor
$out5,$in5,$twk5
3254 vxor
$tweak,$tweak,$tmp
3256 vxor v31
,v31
,$rndkey0
3262 vncipher
$out0,$out0,v24
3263 vncipher
$out1,$out1,v24
3264 vncipher
$out2,$out2,v24
3265 vncipher
$out3,$out3,v24
3266 vncipher
$out4,$out4,v24
3267 vncipher
$out5,$out5,v24
3268 lvx v24
,$x20,$key_ # round[3]
3269 addi
$key_,$key_,0x20
3271 vncipher
$out0,$out0,v25
3272 vncipher
$out1,$out1,v25
3273 vncipher
$out2,$out2,v25
3274 vncipher
$out3,$out3,v25
3275 vncipher
$out4,$out4,v25
3276 vncipher
$out5,$out5,v25
3277 lvx v25
,$x10,$key_ # round[4]
3280 subic
$len,$len,96 # $len-=96
3281 vxor
$in0,$twk0,v31
# xor with last round key
3282 vncipher
$out0,$out0,v24
3283 vncipher
$out1,$out1,v24
3284 vsrab
$tmp,$tweak,$seven # next tweak value
3285 vxor
$twk0,$tweak,$rndkey0
3286 vaddubm
$tweak,$tweak,$tweak
3287 vncipher
$out2,$out2,v24
3288 vncipher
$out3,$out3,v24
3289 vsldoi
$tmp,$tmp,$tmp,15
3290 vncipher
$out4,$out4,v24
3291 vncipher
$out5,$out5,v24
3293 subfe
. r0
,r0
,r0
# borrow?-1:0
3294 vand
$tmp,$tmp,$eighty7
3295 vncipher
$out0,$out0,v25
3296 vncipher
$out1,$out1,v25
3297 vxor
$tweak,$tweak,$tmp
3298 vncipher
$out2,$out2,v25
3299 vncipher
$out3,$out3,v25
3301 vsrab
$tmp,$tweak,$seven # next tweak value
3302 vxor
$twk1,$tweak,$rndkey0
3303 vncipher
$out4,$out4,v25
3304 vncipher
$out5,$out5,v25
3307 vaddubm
$tweak,$tweak,$tweak
3308 vsldoi
$tmp,$tmp,$tmp,15
3309 vncipher
$out0,$out0,v26
3310 vncipher
$out1,$out1,v26
3311 vand
$tmp,$tmp,$eighty7
3312 vncipher
$out2,$out2,v26
3313 vncipher
$out3,$out3,v26
3314 vxor
$tweak,$tweak,$tmp
3315 vncipher
$out4,$out4,v26
3316 vncipher
$out5,$out5,v26
3318 add
$inp,$inp,r0
# $inp is adjusted in such
3319 # way that at exit from the
3320 # loop inX-in5 are loaded
3323 vsrab
$tmp,$tweak,$seven # next tweak value
3324 vxor
$twk2,$tweak,$rndkey0
3325 vaddubm
$tweak,$tweak,$tweak
3326 vncipher
$out0,$out0,v27
3327 vncipher
$out1,$out1,v27
3328 vsldoi
$tmp,$tmp,$tmp,15
3329 vncipher
$out2,$out2,v27
3330 vncipher
$out3,$out3,v27
3331 vand
$tmp,$tmp,$eighty7
3332 vncipher
$out4,$out4,v27
3333 vncipher
$out5,$out5,v27
3335 addi
$key_,$sp,$FRAME+15 # rewind $key_
3336 vxor
$tweak,$tweak,$tmp
3337 vncipher
$out0,$out0,v28
3338 vncipher
$out1,$out1,v28
3340 vsrab
$tmp,$tweak,$seven # next tweak value
3341 vxor
$twk3,$tweak,$rndkey0
3342 vncipher
$out2,$out2,v28
3343 vncipher
$out3,$out3,v28
3344 vaddubm
$tweak,$tweak,$tweak
3345 vsldoi
$tmp,$tmp,$tmp,15
3346 vncipher
$out4,$out4,v28
3347 vncipher
$out5,$out5,v28
3348 lvx v24
,$x00,$key_ # re-pre-load round[1]
3349 vand
$tmp,$tmp,$eighty7
3351 vncipher
$out0,$out0,v29
3352 vncipher
$out1,$out1,v29
3353 vxor
$tweak,$tweak,$tmp
3354 vncipher
$out2,$out2,v29
3355 vncipher
$out3,$out3,v29
3357 vsrab
$tmp,$tweak,$seven # next tweak value
3358 vxor
$twk4,$tweak,$rndkey0
3359 vncipher
$out4,$out4,v29
3360 vncipher
$out5,$out5,v29
3361 lvx v25
,$x10,$key_ # re-pre-load round[2]
3362 vaddubm
$tweak,$tweak,$tweak
3363 vsldoi
$tmp,$tmp,$tmp,15
3365 vncipher
$out0,$out0,v30
3366 vncipher
$out1,$out1,v30
3367 vand
$tmp,$tmp,$eighty7
3368 vncipher
$out2,$out2,v30
3369 vncipher
$out3,$out3,v30
3370 vxor
$tweak,$tweak,$tmp
3371 vncipher
$out4,$out4,v30
3372 vncipher
$out5,$out5,v30
3374 vsrab
$tmp,$tweak,$seven # next tweak value
3375 vxor
$twk5,$tweak,$rndkey0
3377 vncipherlast
$out0,$out0,$in0
3378 lvx_u
$in0,$x00,$inp # load next input block
3379 vaddubm
$tweak,$tweak,$tweak
3380 vsldoi
$tmp,$tmp,$tmp,15
3381 vncipherlast
$out1,$out1,$in1
3382 lvx_u
$in1,$x10,$inp
3383 vncipherlast
$out2,$out2,$in2
3384 le?vperm
$in0,$in0,$in0,$leperm
3385 lvx_u
$in2,$x20,$inp
3386 vand
$tmp,$tmp,$eighty7
3387 vncipherlast
$out3,$out3,$in3
3388 le?vperm
$in1,$in1,$in1,$leperm
3389 lvx_u
$in3,$x30,$inp
3390 vncipherlast
$out4,$out4,$in4
3391 le?vperm
$in2,$in2,$in2,$leperm
3392 lvx_u
$in4,$x40,$inp
3393 vxor
$tweak,$tweak,$tmp
3394 vncipherlast
$out5,$out5,$in5
3395 le?vperm
$in3,$in3,$in3,$leperm
3396 lvx_u
$in5,$x50,$inp
3398 le?vperm
$in4,$in4,$in4,$leperm
3399 le?vperm
$in5,$in5,$in5,$leperm
3401 le?vperm
$out0,$out0,$out0,$leperm
3402 le?vperm
$out1,$out1,$out1,$leperm
3403 stvx_u
$out0,$x00,$out # store output
3404 vxor
$out0,$in0,$twk0
3405 le?vperm
$out2,$out2,$out2,$leperm
3406 stvx_u
$out1,$x10,$out
3407 vxor
$out1,$in1,$twk1
3408 le?vperm
$out3,$out3,$out3,$leperm
3409 stvx_u
$out2,$x20,$out
3410 vxor
$out2,$in2,$twk2
3411 le?vperm
$out4,$out4,$out4,$leperm
3412 stvx_u
$out3,$x30,$out
3413 vxor
$out3,$in3,$twk3
3414 le?vperm
$out5,$out5,$out5,$leperm
3415 stvx_u
$out4,$x40,$out
3416 vxor
$out4,$in4,$twk4
3417 stvx_u
$out5,$x50,$out
3418 vxor
$out5,$in5,$twk5
3422 beq Loop_xts_dec6x
# did $len-=96 borrow?
3424 addic
. $len,$len,0x60
3431 blt Lxts_dec6x_three
3436 vxor
$out0,$in1,$twk0
3437 vxor
$out1,$in2,$twk1
3438 vxor
$out2,$in3,$twk2
3439 vxor
$out3,$in4,$twk3
3440 vxor
$out4,$in5,$twk4
3444 le?vperm
$out0,$out0,$out0,$leperm
3445 vmr
$twk0,$twk5 # unused tweak
3446 vxor
$twk1,$tweak,$rndkey0
3447 le?vperm
$out1,$out1,$out1,$leperm
3448 stvx_u
$out0,$x00,$out # store output
3449 vxor
$out0,$in0,$twk1
3450 le?vperm
$out2,$out2,$out2,$leperm
3451 stvx_u
$out1,$x10,$out
3452 le?vperm
$out3,$out3,$out3,$leperm
3453 stvx_u
$out2,$x20,$out
3454 le?vperm
$out4,$out4,$out4,$leperm
3455 stvx_u
$out3,$x30,$out
3456 stvx_u
$out4,$x40,$out
3458 bne Lxts_dec6x_steal
3463 vxor
$out0,$in2,$twk0
3464 vxor
$out1,$in3,$twk1
3465 vxor
$out2,$in4,$twk2
3466 vxor
$out3,$in5,$twk3
3467 vxor
$out4,$out4,$out4
3471 le?vperm
$out0,$out0,$out0,$leperm
3472 vmr
$twk0,$twk4 # unused tweak
3474 le?vperm
$out1,$out1,$out1,$leperm
3475 stvx_u
$out0,$x00,$out # store output
3476 vxor
$out0,$in0,$twk5
3477 le?vperm
$out2,$out2,$out2,$leperm
3478 stvx_u
$out1,$x10,$out
3479 le?vperm
$out3,$out3,$out3,$leperm
3480 stvx_u
$out2,$x20,$out
3481 stvx_u
$out3,$x30,$out
3483 bne Lxts_dec6x_steal
3488 vxor
$out0,$in3,$twk0
3489 vxor
$out1,$in4,$twk1
3490 vxor
$out2,$in5,$twk2
3491 vxor
$out3,$out3,$out3
3492 vxor
$out4,$out4,$out4
3496 le?vperm
$out0,$out0,$out0,$leperm
3497 vmr
$twk0,$twk3 # unused tweak
3499 le?vperm
$out1,$out1,$out1,$leperm
3500 stvx_u
$out0,$x00,$out # store output
3501 vxor
$out0,$in0,$twk4
3502 le?vperm
$out2,$out2,$out2,$leperm
3503 stvx_u
$out1,$x10,$out
3504 stvx_u
$out2,$x20,$out
3506 bne Lxts_dec6x_steal
3511 vxor
$out0,$in4,$twk0
3512 vxor
$out1,$in5,$twk1
3513 vxor
$out2,$out2,$out2
3514 vxor
$out3,$out3,$out3
3515 vxor
$out4,$out4,$out4
3519 le?vperm
$out0,$out0,$out0,$leperm
3520 vmr
$twk0,$twk2 # unused tweak
3522 le?vperm
$out1,$out1,$out1,$leperm
3523 stvx_u
$out0,$x00,$out # store output
3524 vxor
$out0,$in0,$twk3
3525 stvx_u
$out1,$x10,$out
3527 bne Lxts_dec6x_steal
3532 vxor
$out0,$in5,$twk0
3535 vncipher
$out0,$out0,v24
3536 lvx v24
,$x20,$key_ # round[3]
3537 addi
$key_,$key_,0x20
3539 vncipher
$out0,$out0,v25
3540 lvx v25
,$x10,$key_ # round[4]
3544 vncipher
$out0,$out0,v24
3548 vncipher
$out0,$out0,v25
3551 vncipher
$out0,$out0,v26
3554 vncipher
$out0,$out0,v27
3556 addi
$key_,$sp,$FRAME+15 # rewind $key_
3557 vncipher
$out0,$out0,v28
3558 lvx v24
,$x00,$key_ # re-pre-load round[1]
3560 vncipher
$out0,$out0,v29
3561 lvx v25
,$x10,$key_ # re-pre-load round[2]
3562 vxor
$twk0,$twk0,v31
3564 le?vperm
$in0,$in0,$in0,$leperm
3565 vncipher
$out0,$out0,v30
3568 vncipherlast
$out0,$out0,$twk0
3570 vmr
$twk0,$twk1 # unused tweak
3572 le?vperm
$out0,$out0,$out0,$leperm
3573 stvx_u
$out0,$x00,$out # store output
3575 vxor
$out0,$in0,$twk2
3576 bne Lxts_dec6x_steal
3585 le?vperm
$in0,$in0,$in0,$leperm
3586 vxor
$out0,$in0,$twk1
3588 vncipher
$out0,$out0,v24
3589 lvx v24
,$x20,$key_ # round[3]
3590 addi
$key_,$key_,0x20
3592 vncipher
$out0,$out0,v25
3593 lvx v25
,$x10,$key_ # round[4]
3594 bdnz Lxts_dec6x_steal
3596 add
$inp,$inp,$taillen
3597 vncipher
$out0,$out0,v24
3600 vncipher
$out0,$out0,v25
3603 vncipher
$out0,$out0,v26
3605 lvsr
$inpperm,0,$taillen # $in5 is no more
3606 vncipher
$out0,$out0,v27
3608 addi
$key_,$sp,$FRAME+15 # rewind $key_
3609 vncipher
$out0,$out0,v28
3610 lvx v24
,$x00,$key_ # re-pre-load round[1]
3612 vncipher
$out0,$out0,v29
3613 lvx v25
,$x10,$key_ # re-pre-load round[2]
3614 vxor
$twk1,$twk1,v31
3616 le?vperm
$in0,$in0,$in0,$leperm
3617 vncipher
$out0,$out0,v30
3619 vperm
$in0,$in0,$in0,$inpperm
3620 vncipherlast
$tmp,$out0,$twk1
3622 le?vperm
$out0,$tmp,$tmp,$leperm
3623 le?stvx_u
$out0,0,$out
3624 be?stvx_u
$tmp,0,$out
3626 vxor
$out0,$out0,$out0
3628 vperm
$out0,$out0,$out1,$inpperm
3629 vsel
$out0,$in0,$tmp,$out0
3630 vxor
$out0,$out0,$twk0
3634 Loop_xts_dec6x_steal
:
3637 bdnz Loop_xts_dec6x_steal
3641 b Loop_xts_dec1x
# one more time...
3648 vxor
$tweak,$twk0,$rndkey0
3649 le?vperm
$tweak,$tweak,$tweak,$leperm
3650 stvx_u
$tweak,0,$ivp
3656 stvx
$seven,r10
,$sp # wipe copies of round keys
3674 lvx v20
,r10
,$sp # ABI says so
3696 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
3697 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
3698 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
3699 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
3700 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
3701 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
3702 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3705 .byte
0,12,0x04,1,0x80,6,6,0
3710 vncipher
$out0,$out0,v24
3711 vncipher
$out1,$out1,v24
3712 vncipher
$out2,$out2,v24
3713 vncipher
$out3,$out3,v24
3714 vncipher
$out4,$out4,v24
3715 lvx v24
,$x20,$key_ # round[3]
3716 addi
$key_,$key_,0x20
3718 vncipher
$out0,$out0,v25
3719 vncipher
$out1,$out1,v25
3720 vncipher
$out2,$out2,v25
3721 vncipher
$out3,$out3,v25
3722 vncipher
$out4,$out4,v25
3723 lvx v25
,$x10,$key_ # round[4]
3724 bdnz _aesp8_xts_dec5x
3727 vncipher
$out0,$out0,v24
3728 vncipher
$out1,$out1,v24
3729 vncipher
$out2,$out2,v24
3730 vncipher
$out3,$out3,v24
3731 vncipher
$out4,$out4,v24
3735 vncipher
$out0,$out0,v25
3736 vncipher
$out1,$out1,v25
3737 vncipher
$out2,$out2,v25
3738 vncipher
$out3,$out3,v25
3739 vncipher
$out4,$out4,v25
3740 vxor
$twk0,$twk0,v31
3743 vncipher
$out0,$out0,v26
3744 vncipher
$out1,$out1,v26
3745 vncipher
$out2,$out2,v26
3746 vncipher
$out3,$out3,v26
3747 vncipher
$out4,$out4,v26
3750 vncipher
$out0,$out0,v27
3752 vncipher
$out1,$out1,v27
3753 vncipher
$out2,$out2,v27
3754 vncipher
$out3,$out3,v27
3755 vncipher
$out4,$out4,v27
3758 addi
$key_,$sp,$FRAME+15 # rewind $key_
3759 vncipher
$out0,$out0,v28
3760 vncipher
$out1,$out1,v28
3761 vncipher
$out2,$out2,v28
3762 vncipher
$out3,$out3,v28
3763 vncipher
$out4,$out4,v28
3764 lvx v24
,$x00,$key_ # re-pre-load round[1]
3767 vncipher
$out0,$out0,v29
3768 le?vperm
$in0,$in0,$in0,$leperm
3769 vncipher
$out1,$out1,v29
3770 vncipher
$out2,$out2,v29
3771 vncipher
$out3,$out3,v29
3772 vncipher
$out4,$out4,v29
3773 lvx v25
,$x10,$key_ # re-pre-load round[2]
3776 vncipher
$out0,$out0,v30
3777 vncipher
$out1,$out1,v30
3778 vncipher
$out2,$out2,v30
3779 vncipher
$out3,$out3,v30
3780 vncipher
$out4,$out4,v30
3782 vncipherlast
$out0,$out0,$twk0
3783 vncipherlast
$out1,$out1,$in1
3784 vncipherlast
$out2,$out2,$in2
3785 vncipherlast
$out3,$out3,$in3
3786 vncipherlast
$out4,$out4,$in4
3790 .byte
0,12,0x14,0,0,0,0,0
3795 foreach(split("\n",$code)) {
3796 s/\`([^\`]*)\`/eval($1)/geo;
3798 # constants table endian-specific conversion
3799 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3803 # convert to endian-agnostic format
3805 foreach (split(/,\s*/,$2)) {
3806 my $l = /^0/?
oct:int;
3807 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3810 @bytes = map(/^0/?
oct:int,split(/,\s*/,$2));
3813 # little-endian conversion
3814 if ($flavour =~ /le$/o) {
3815 SWITCH
: for($conv) {
3816 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3817 /\?rev/ && do { @bytes=reverse(@bytes); last; };
3822 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3825 $consts=0 if (m/Lconsts:/o); # end of table
3827 # instructions prefixed with '?' are endian-specific and need
3828 # to be adjusted accordingly...
3829 if ($flavour =~ /le$/o) { # little-endian
3834 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3835 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3836 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3837 } else { # big-endian