2 # Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
17 # This module implements support for AES instructions as per PowerISA
18 # specification version 2.07, first implemented by POWER8 processor.
19 # The module is endian-agnostic in sense that it supports both big-
20 # and little-endian cases. Data alignment in parallelizable modes is
21 # handled with VSX loads and stores, which implies MSR.VSX flag being
22 # set. It should also be noted that ISA specification doesn't prohibit
23 # alignment exceptions for these instructions on page boundaries.
24 # Initially alignment was handled in pure AltiVec/VMX way [when data
25 # is aligned programmatically, which in turn guarantees exception-
26 # free execution], but it turned to hamper performance when vcipher
27 # instructions are interleaved. It's reckoned that eventual
28 # misalignment penalties at page boundaries are in average lower
29 # than additional overhead in pure AltiVec approach.
33 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34 # systems were measured.
36 ######################################################################
37 # Current large-block performance in cycles per byte processed with
38 # 128-bit key (less is better).
40 # CBC en-/decrypt CTR XTS
41 # POWER8[le] 3.96/0.72 0.74 1.1
42 # POWER8[be] 3.75/0.65 0.66 1.0
46 if ($flavour =~ /64/) {
54 } elsif ($flavour =~ /32/) {
62 } else { die "nonsense $flavour"; }
64 $LITTLE_ENDIAN = ($flavour=~/le$/) ?
$SIZE_T : 0;
66 $0 =~ m/(.*[\/\\])[^\
/\\]+$/; $dir=$1;
67 ( $xlate="${dir}ppc-xlate.pl" and -f
$xlate ) or
68 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f
$xlate) or
69 die "can't locate ppc-xlate.pl";
71 open STDOUT
,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
79 #########################################################################
80 {{{ # Key setup procedures #
81 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
82 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
83 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
92 .long
0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
93 .long
0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
94 .long
0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
99 mflr
$ptr #vvvvv "distance between . and rcon
104 .byte
0,12,0x14,0,0,0,0,0
105 .asciz
"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
107 .globl
.${prefix
}_set_encrypt_key
110 $PUSH r11
,$LRSAVE($sp)
114 beq
- Lenc_key_abort
# if ($inp==0) return -1;
116 beq
- Lenc_key_abort
# if ($out==0) return -1;
134 addi
$inp,$inp,15 # 15 is not typo
135 lvsr
$key,0,r9
# borrow $key
139 le?vspltisb
$mask,0x0f # borrow $mask
141 le?vxor
$key,$key,$mask # adjust for byte swap
144 vperm
$in0,$in0,$in1,$key # align [and byte swap in LE]
146 vxor
$zero,$zero,$zero
149 ?lvsr
$outperm,0,$out
152 ?vperm
$outmask,$zero,$outmask,$outperm
162 vperm
$key,$in0,$in0,$mask # rotate-n-splat
163 vsldoi
$tmp,$zero,$in0,12 # >>32
164 vperm
$outtail,$in0,$in0,$outperm # rotate
165 vsel
$stage,$outhead,$outtail,$outmask
166 vmr
$outhead,$outtail
167 vcipherlast
$key,$key,$rcon
172 vsldoi
$tmp,$zero,$tmp,12 # >>32
174 vsldoi
$tmp,$zero,$tmp,12 # >>32
176 vadduwm
$rcon,$rcon,$rcon
180 lvx
$rcon,0,$ptr # last two round keys
182 vperm
$key,$in0,$in0,$mask # rotate-n-splat
183 vsldoi
$tmp,$zero,$in0,12 # >>32
184 vperm
$outtail,$in0,$in0,$outperm # rotate
185 vsel
$stage,$outhead,$outtail,$outmask
186 vmr
$outhead,$outtail
187 vcipherlast
$key,$key,$rcon
192 vsldoi
$tmp,$zero,$tmp,12 # >>32
194 vsldoi
$tmp,$zero,$tmp,12 # >>32
196 vadduwm
$rcon,$rcon,$rcon
199 vperm
$key,$in0,$in0,$mask # rotate-n-splat
200 vsldoi
$tmp,$zero,$in0,12 # >>32
201 vperm
$outtail,$in0,$in0,$outperm # rotate
202 vsel
$stage,$outhead,$outtail,$outmask
203 vmr
$outhead,$outtail
204 vcipherlast
$key,$key,$rcon
209 vsldoi
$tmp,$zero,$tmp,12 # >>32
211 vsldoi
$tmp,$zero,$tmp,12 # >>32
214 vperm
$outtail,$in0,$in0,$outperm # rotate
215 vsel
$stage,$outhead,$outtail,$outmask
216 vmr
$outhead,$outtail
219 addi
$inp,$out,15 # 15 is not typo
229 vperm
$outtail,$in0,$in0,$outperm # rotate
230 vsel
$stage,$outhead,$outtail,$outmask
231 vmr
$outhead,$outtail
234 vperm
$in1,$in1,$tmp,$key # align [and byte swap in LE]
235 vspltisb
$key,8 # borrow $key
237 vsububm
$mask,$mask,$key # adjust the mask
240 vperm
$key,$in1,$in1,$mask # roate-n-splat
241 vsldoi
$tmp,$zero,$in0,12 # >>32
242 vcipherlast
$key,$key,$rcon
245 vsldoi
$tmp,$zero,$tmp,12 # >>32
247 vsldoi
$tmp,$zero,$tmp,12 # >>32
250 vsldoi
$stage,$zero,$in1,8
253 vsldoi
$in1,$zero,$in1,12 # >>32
254 vadduwm
$rcon,$rcon,$rcon
258 vsldoi
$stage,$stage,$in0,8
260 vperm
$key,$in1,$in1,$mask # rotate-n-splat
261 vsldoi
$tmp,$zero,$in0,12 # >>32
262 vperm
$outtail,$stage,$stage,$outperm # rotate
263 vsel
$stage,$outhead,$outtail,$outmask
264 vmr
$outhead,$outtail
265 vcipherlast
$key,$key,$rcon
269 vsldoi
$stage,$in0,$in1,8
271 vsldoi
$tmp,$zero,$tmp,12 # >>32
272 vperm
$outtail,$stage,$stage,$outperm # rotate
273 vsel
$stage,$outhead,$outtail,$outmask
274 vmr
$outhead,$outtail
276 vsldoi
$tmp,$zero,$tmp,12 # >>32
283 vsldoi
$in1,$zero,$in1,12 # >>32
284 vadduwm
$rcon,$rcon,$rcon
288 vperm
$outtail,$in0,$in0,$outperm # rotate
289 vsel
$stage,$outhead,$outtail,$outmask
290 vmr
$outhead,$outtail
292 addi
$inp,$out,15 # 15 is not typo
305 vperm
$outtail,$in0,$in0,$outperm # rotate
306 vsel
$stage,$outhead,$outtail,$outmask
307 vmr
$outhead,$outtail
310 vperm
$in1,$in1,$tmp,$key # align [and byte swap in LE]
314 vperm
$key,$in1,$in1,$mask # rotate-n-splat
315 vsldoi
$tmp,$zero,$in0,12 # >>32
316 vperm
$outtail,$in1,$in1,$outperm # rotate
317 vsel
$stage,$outhead,$outtail,$outmask
318 vmr
$outhead,$outtail
319 vcipherlast
$key,$key,$rcon
324 vsldoi
$tmp,$zero,$tmp,12 # >>32
326 vsldoi
$tmp,$zero,$tmp,12 # >>32
328 vadduwm
$rcon,$rcon,$rcon
330 vperm
$outtail,$in0,$in0,$outperm # rotate
331 vsel
$stage,$outhead,$outtail,$outmask
332 vmr
$outhead,$outtail
334 addi
$inp,$out,15 # 15 is not typo
338 vspltw
$key,$in0,3 # just splat
339 vsldoi
$tmp,$zero,$in1,12 # >>32
343 vsldoi
$tmp,$zero,$tmp,12 # >>32
345 vsldoi
$tmp,$zero,$tmp,12 # >>32
353 lvx
$in1,0,$inp # redundant in aligned case
354 vsel
$in1,$outhead,$in1,$outmask
364 .byte
0,12,0x14,1,0,0,3,0
366 .size
.${prefix
}_set_encrypt_key
,.-.${prefix
}_set_encrypt_key
368 .globl
.${prefix
}_set_decrypt_key
369 $STU $sp,-$FRAME($sp)
371 $PUSH r10
,$FRAME+$LRSAVE($sp)
379 subi
$inp,$out,240 # first round key
380 srwi
$rounds,$rounds,1
381 add
$out,$inp,$cnt # last round key
405 xor r3
,r3
,r3
# return value
410 .byte
0,12,4,1,0x80,0,3,0
412 .size
.${prefix
}_set_decrypt_key
,.-.${prefix
}_set_decrypt_key
415 #########################################################################
416 {{{ # Single block en- and decrypt procedures #
419 my $n = $dir eq "de" ?
"n" : "";
420 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
423 .globl
.${prefix
}_
${dir
}crypt
424 lwz
$rounds,240($key)
427 li
$idx,15 # 15 is not typo
433 lvsl v2
,0,$inp # inpperm
435 ?lvsl v3
,0,r11
# outperm
438 vperm v0
,v0
,v1
,v2
# align [and byte swap in LE]
440 ?lvsl v5
,0,$key # keyperm
441 srwi
$rounds,$rounds,1
444 subi
$rounds,$rounds,1
445 ?vperm v1
,v1
,v2
,v5
# align round key
467 v
${n
}cipherlast v0
,v0
,v1
471 li
$idx,15 # 15 is not typo
472 ?vperm v2
,v1
,v2
,v3
# outmask
474 lvx v1
,0,$out # outhead
475 vperm v0
,v0
,v0
,v3
# rotate [and byte swap in LE]
485 .byte
0,12,0x14,0,0,0,3,0
487 .size
.${prefix
}_
${dir
}crypt,.-.${prefix
}_
${dir
}crypt
493 #########################################################################
494 {{{ # CBC en- and decrypt procedures #
495 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
496 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
497 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
500 .globl
.${prefix
}_cbc_encrypt
504 cmpwi
$enc,0 # test direction
510 vxor
$rndkey0,$rndkey0,$rndkey0
511 le?vspltisb
$tmp,0x0f
513 lvx
$ivec,0,$ivp # load [unaligned] iv
515 lvx
$inptail,$idx,$ivp
516 le?vxor
$inpperm,$inpperm,$tmp
517 vperm
$ivec,$ivec,$inptail,$inpperm
520 ?lvsl
$keyperm,0,$key # prepare for unaligned key
521 lwz
$rounds,240($key)
523 lvsr
$inpperm,0,r11
# prepare for unaligned load
525 addi
$inp,$inp,15 # 15 is not typo
526 le?vxor
$inpperm,$inpperm,$tmp
528 ?lvsr
$outperm,0,$out # prepare for unaligned store
531 ?vperm
$outmask,$rndkey0,$outmask,$outperm
532 le?vxor
$outperm,$outperm,$tmp
534 srwi
$rounds,$rounds,1
536 subi
$rounds,$rounds,1
544 subi
$len,$len,16 # len-=16
547 vperm
$inout,$inout,$inptail,$inpperm
548 lvx
$rndkey1,$idx,$key
550 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
551 vxor
$inout,$inout,$rndkey0
552 lvx
$rndkey0,$idx,$key
554 vxor
$inout,$inout,$ivec
557 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
558 vcipher
$inout,$inout,$rndkey1
559 lvx
$rndkey1,$idx,$key
561 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
562 vcipher
$inout,$inout,$rndkey0
563 lvx
$rndkey0,$idx,$key
567 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
568 vcipher
$inout,$inout,$rndkey1
569 lvx
$rndkey1,$idx,$key
571 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
572 vcipherlast
$ivec,$inout,$rndkey0
575 vperm
$tmp,$ivec,$ivec,$outperm
576 vsel
$inout,$outhead,$tmp,$outmask
587 bge _aesp8_cbc_decrypt8x
592 subi
$len,$len,16 # len-=16
595 vperm
$tmp,$tmp,$inptail,$inpperm
596 lvx
$rndkey1,$idx,$key
598 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
599 vxor
$inout,$tmp,$rndkey0
600 lvx
$rndkey0,$idx,$key
604 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
605 vncipher
$inout,$inout,$rndkey1
606 lvx
$rndkey1,$idx,$key
608 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
609 vncipher
$inout,$inout,$rndkey0
610 lvx
$rndkey0,$idx,$key
614 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
615 vncipher
$inout,$inout,$rndkey1
616 lvx
$rndkey1,$idx,$key
618 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
619 vncipherlast
$inout,$inout,$rndkey0
622 vxor
$inout,$inout,$ivec
624 vperm
$tmp,$inout,$inout,$outperm
625 vsel
$inout,$outhead,$tmp,$outmask
633 lvx
$inout,0,$out # redundant in aligned case
634 vsel
$inout,$outhead,$inout,$outmask
637 neg
$enc,$ivp # write [unaligned] iv
638 li
$idx,15 # 15 is not typo
639 vxor
$rndkey0,$rndkey0,$rndkey0
641 le?vspltisb
$tmp,0x0f
642 ?lvsl
$outperm,0,$enc
643 ?vperm
$outmask,$rndkey0,$outmask,$outperm
644 le?vxor
$outperm,$outperm,$tmp
646 vperm
$ivec,$ivec,$ivec,$outperm
647 vsel
$inout,$outhead,$ivec,$outmask
648 lvx
$inptail,$idx,$ivp
650 vsel
$inout,$ivec,$inptail,$outmask
651 stvx
$inout,$idx,$ivp
656 .byte
0,12,0x14,0,0,0,6,0
659 #########################################################################
660 {{ # Optimized CBC decrypt procedure #
662 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
663 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
664 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
665 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
666 # v26-v31 last 6 round keys
667 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
671 _aesp8_cbc_decrypt8x
:
672 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
673 li r10
,`$FRAME+8*16+15`
674 li r11
,`$FRAME+8*16+31`
675 stvx v20
,r10
,$sp # ABI says so
698 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
700 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
702 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
704 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
706 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
708 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
710 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
714 subi
$rounds,$rounds,3 # -4 in total
715 subi
$len,$len,128 # bias
717 lvx
$rndkey0,$x00,$key # load key schedule
721 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
722 addi
$key_,$sp,$FRAME+15
726 ?vperm v24
,v30
,v31
,$keyperm
729 stvx v24
,$x00,$key_ # off-load round[1]
730 ?vperm v25
,v31
,v30
,$keyperm
732 stvx v25
,$x10,$key_ # off-load round[2]
733 addi
$key_,$key_,0x20
734 bdnz Load_cbc_dec_key
737 ?vperm v24
,v30
,v31
,$keyperm
739 stvx v24
,$x00,$key_ # off-load round[3]
740 ?vperm v25
,v31
,v26
,$keyperm
742 stvx v25
,$x10,$key_ # off-load round[4]
743 addi
$key_,$sp,$FRAME+15 # rewind $key_
744 ?vperm v26
,v26
,v27
,$keyperm
746 ?vperm v27
,v27
,v28
,$keyperm
748 ?vperm v28
,v28
,v29
,$keyperm
750 ?vperm v29
,v29
,v30
,$keyperm
751 lvx
$out0,$x70,$key # borrow $out0
752 ?vperm v30
,v30
,v31
,$keyperm
753 lvx v24
,$x00,$key_ # pre-load round[1]
754 ?vperm v31
,v31
,$out0,$keyperm
755 lvx v25
,$x10,$key_ # pre-load round[2]
757 #lvx $inptail,0,$inp # "caller" already did this
758 #addi $inp,$inp,15 # 15 is not typo
759 subi
$inp,$inp,15 # undo "caller"
762 lvx_u
$in0,$x00,$inp # load first 8 "words"
763 le?lvsl
$inpperm,0,$idx
764 le?vspltisb
$tmp,0x0f
766 le?vxor
$inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
768 le?vperm
$in0,$in0,$in0,$inpperm
770 le?vperm
$in1,$in1,$in1,$inpperm
772 le?vperm
$in2,$in2,$in2,$inpperm
773 vxor
$out0,$in0,$rndkey0
775 le?vperm
$in3,$in3,$in3,$inpperm
776 vxor
$out1,$in1,$rndkey0
778 le?vperm
$in4,$in4,$in4,$inpperm
779 vxor
$out2,$in2,$rndkey0
782 le?vperm
$in5,$in5,$in5,$inpperm
783 vxor
$out3,$in3,$rndkey0
784 le?vperm
$in6,$in6,$in6,$inpperm
785 vxor
$out4,$in4,$rndkey0
786 le?vperm
$in7,$in7,$in7,$inpperm
787 vxor
$out5,$in5,$rndkey0
788 vxor
$out6,$in6,$rndkey0
789 vxor
$out7,$in7,$rndkey0
795 vncipher
$out0,$out0,v24
796 vncipher
$out1,$out1,v24
797 vncipher
$out2,$out2,v24
798 vncipher
$out3,$out3,v24
799 vncipher
$out4,$out4,v24
800 vncipher
$out5,$out5,v24
801 vncipher
$out6,$out6,v24
802 vncipher
$out7,$out7,v24
803 lvx v24
,$x20,$key_ # round[3]
804 addi
$key_,$key_,0x20
806 vncipher
$out0,$out0,v25
807 vncipher
$out1,$out1,v25
808 vncipher
$out2,$out2,v25
809 vncipher
$out3,$out3,v25
810 vncipher
$out4,$out4,v25
811 vncipher
$out5,$out5,v25
812 vncipher
$out6,$out6,v25
813 vncipher
$out7,$out7,v25
814 lvx v25
,$x10,$key_ # round[4]
817 subic
$len,$len,128 # $len-=128
818 vncipher
$out0,$out0,v24
819 vncipher
$out1,$out1,v24
820 vncipher
$out2,$out2,v24
821 vncipher
$out3,$out3,v24
822 vncipher
$out4,$out4,v24
823 vncipher
$out5,$out5,v24
824 vncipher
$out6,$out6,v24
825 vncipher
$out7,$out7,v24
827 subfe
. r0
,r0
,r0
# borrow?-1:0
828 vncipher
$out0,$out0,v25
829 vncipher
$out1,$out1,v25
830 vncipher
$out2,$out2,v25
831 vncipher
$out3,$out3,v25
832 vncipher
$out4,$out4,v25
833 vncipher
$out5,$out5,v25
834 vncipher
$out6,$out6,v25
835 vncipher
$out7,$out7,v25
838 vncipher
$out0,$out0,v26
839 vncipher
$out1,$out1,v26
840 vncipher
$out2,$out2,v26
841 vncipher
$out3,$out3,v26
842 vncipher
$out4,$out4,v26
843 vncipher
$out5,$out5,v26
844 vncipher
$out6,$out6,v26
845 vncipher
$out7,$out7,v26
847 add
$inp,$inp,r0
# $inp is adjusted in such
848 # way that at exit from the
849 # loop inX-in7 are loaded
851 vncipher
$out0,$out0,v27
852 vncipher
$out1,$out1,v27
853 vncipher
$out2,$out2,v27
854 vncipher
$out3,$out3,v27
855 vncipher
$out4,$out4,v27
856 vncipher
$out5,$out5,v27
857 vncipher
$out6,$out6,v27
858 vncipher
$out7,$out7,v27
860 addi
$key_,$sp,$FRAME+15 # rewind $key_
861 vncipher
$out0,$out0,v28
862 vncipher
$out1,$out1,v28
863 vncipher
$out2,$out2,v28
864 vncipher
$out3,$out3,v28
865 vncipher
$out4,$out4,v28
866 vncipher
$out5,$out5,v28
867 vncipher
$out6,$out6,v28
868 vncipher
$out7,$out7,v28
869 lvx v24
,$x00,$key_ # re-pre-load round[1]
871 vncipher
$out0,$out0,v29
872 vncipher
$out1,$out1,v29
873 vncipher
$out2,$out2,v29
874 vncipher
$out3,$out3,v29
875 vncipher
$out4,$out4,v29
876 vncipher
$out5,$out5,v29
877 vncipher
$out6,$out6,v29
878 vncipher
$out7,$out7,v29
879 lvx v25
,$x10,$key_ # re-pre-load round[2]
881 vncipher
$out0,$out0,v30
882 vxor
$ivec,$ivec,v31
# xor with last round key
883 vncipher
$out1,$out1,v30
885 vncipher
$out2,$out2,v30
887 vncipher
$out3,$out3,v30
889 vncipher
$out4,$out4,v30
891 vncipher
$out5,$out5,v30
893 vncipher
$out6,$out6,v30
895 vncipher
$out7,$out7,v30
898 vncipherlast
$out0,$out0,$ivec
899 vncipherlast
$out1,$out1,$in0
900 lvx_u
$in0,$x00,$inp # load next input block
901 vncipherlast
$out2,$out2,$in1
903 vncipherlast
$out3,$out3,$in2
904 le?vperm
$in0,$in0,$in0,$inpperm
906 vncipherlast
$out4,$out4,$in3
907 le?vperm
$in1,$in1,$in1,$inpperm
909 vncipherlast
$out5,$out5,$in4
910 le?vperm
$in2,$in2,$in2,$inpperm
912 vncipherlast
$out6,$out6,$in5
913 le?vperm
$in3,$in3,$in3,$inpperm
915 vncipherlast
$out7,$out7,$in6
916 le?vperm
$in4,$in4,$in4,$inpperm
919 le?vperm
$in5,$in5,$in5,$inpperm
923 le?vperm
$out0,$out0,$out0,$inpperm
924 le?vperm
$out1,$out1,$out1,$inpperm
925 stvx_u
$out0,$x00,$out
926 le?vperm
$in6,$in6,$in6,$inpperm
927 vxor
$out0,$in0,$rndkey0
928 le?vperm
$out2,$out2,$out2,$inpperm
929 stvx_u
$out1,$x10,$out
930 le?vperm
$in7,$in7,$in7,$inpperm
931 vxor
$out1,$in1,$rndkey0
932 le?vperm
$out3,$out3,$out3,$inpperm
933 stvx_u
$out2,$x20,$out
934 vxor
$out2,$in2,$rndkey0
935 le?vperm
$out4,$out4,$out4,$inpperm
936 stvx_u
$out3,$x30,$out
937 vxor
$out3,$in3,$rndkey0
938 le?vperm
$out5,$out5,$out5,$inpperm
939 stvx_u
$out4,$x40,$out
940 vxor
$out4,$in4,$rndkey0
941 le?vperm
$out6,$out6,$out6,$inpperm
942 stvx_u
$out5,$x50,$out
943 vxor
$out5,$in5,$rndkey0
944 le?vperm
$out7,$out7,$out7,$inpperm
945 stvx_u
$out6,$x60,$out
946 vxor
$out6,$in6,$rndkey0
947 stvx_u
$out7,$x70,$out
949 vxor
$out7,$in7,$rndkey0
952 beq Loop_cbc_dec8x
# did $len-=128 borrow?
959 Loop_cbc_dec8x_tail
: # up to 7 "words" tail...
960 vncipher
$out1,$out1,v24
961 vncipher
$out2,$out2,v24
962 vncipher
$out3,$out3,v24
963 vncipher
$out4,$out4,v24
964 vncipher
$out5,$out5,v24
965 vncipher
$out6,$out6,v24
966 vncipher
$out7,$out7,v24
967 lvx v24
,$x20,$key_ # round[3]
968 addi
$key_,$key_,0x20
970 vncipher
$out1,$out1,v25
971 vncipher
$out2,$out2,v25
972 vncipher
$out3,$out3,v25
973 vncipher
$out4,$out4,v25
974 vncipher
$out5,$out5,v25
975 vncipher
$out6,$out6,v25
976 vncipher
$out7,$out7,v25
977 lvx v25
,$x10,$key_ # round[4]
978 bdnz Loop_cbc_dec8x_tail
980 vncipher
$out1,$out1,v24
981 vncipher
$out2,$out2,v24
982 vncipher
$out3,$out3,v24
983 vncipher
$out4,$out4,v24
984 vncipher
$out5,$out5,v24
985 vncipher
$out6,$out6,v24
986 vncipher
$out7,$out7,v24
988 vncipher
$out1,$out1,v25
989 vncipher
$out2,$out2,v25
990 vncipher
$out3,$out3,v25
991 vncipher
$out4,$out4,v25
992 vncipher
$out5,$out5,v25
993 vncipher
$out6,$out6,v25
994 vncipher
$out7,$out7,v25
996 vncipher
$out1,$out1,v26
997 vncipher
$out2,$out2,v26
998 vncipher
$out3,$out3,v26
999 vncipher
$out4,$out4,v26
1000 vncipher
$out5,$out5,v26
1001 vncipher
$out6,$out6,v26
1002 vncipher
$out7,$out7,v26
1004 vncipher
$out1,$out1,v27
1005 vncipher
$out2,$out2,v27
1006 vncipher
$out3,$out3,v27
1007 vncipher
$out4,$out4,v27
1008 vncipher
$out5,$out5,v27
1009 vncipher
$out6,$out6,v27
1010 vncipher
$out7,$out7,v27
1012 vncipher
$out1,$out1,v28
1013 vncipher
$out2,$out2,v28
1014 vncipher
$out3,$out3,v28
1015 vncipher
$out4,$out4,v28
1016 vncipher
$out5,$out5,v28
1017 vncipher
$out6,$out6,v28
1018 vncipher
$out7,$out7,v28
1020 vncipher
$out1,$out1,v29
1021 vncipher
$out2,$out2,v29
1022 vncipher
$out3,$out3,v29
1023 vncipher
$out4,$out4,v29
1024 vncipher
$out5,$out5,v29
1025 vncipher
$out6,$out6,v29
1026 vncipher
$out7,$out7,v29
1028 vncipher
$out1,$out1,v30
1029 vxor
$ivec,$ivec,v31
# last round key
1030 vncipher
$out2,$out2,v30
1032 vncipher
$out3,$out3,v30
1034 vncipher
$out4,$out4,v30
1036 vncipher
$out5,$out5,v30
1038 vncipher
$out6,$out6,v30
1040 vncipher
$out7,$out7,v30
1043 cmplwi
$len,32 # switch($len)
1048 blt Lcbc_dec8x_three
1057 vncipherlast
$out1,$out1,$ivec
1058 vncipherlast
$out2,$out2,$in1
1059 vncipherlast
$out3,$out3,$in2
1060 vncipherlast
$out4,$out4,$in3
1061 vncipherlast
$out5,$out5,$in4
1062 vncipherlast
$out6,$out6,$in5
1063 vncipherlast
$out7,$out7,$in6
1066 le?vperm
$out1,$out1,$out1,$inpperm
1067 le?vperm
$out2,$out2,$out2,$inpperm
1068 stvx_u
$out1,$x00,$out
1069 le?vperm
$out3,$out3,$out3,$inpperm
1070 stvx_u
$out2,$x10,$out
1071 le?vperm
$out4,$out4,$out4,$inpperm
1072 stvx_u
$out3,$x20,$out
1073 le?vperm
$out5,$out5,$out5,$inpperm
1074 stvx_u
$out4,$x30,$out
1075 le?vperm
$out6,$out6,$out6,$inpperm
1076 stvx_u
$out5,$x40,$out
1077 le?vperm
$out7,$out7,$out7,$inpperm
1078 stvx_u
$out6,$x50,$out
1079 stvx_u
$out7,$x60,$out
1085 vncipherlast
$out2,$out2,$ivec
1086 vncipherlast
$out3,$out3,$in2
1087 vncipherlast
$out4,$out4,$in3
1088 vncipherlast
$out5,$out5,$in4
1089 vncipherlast
$out6,$out6,$in5
1090 vncipherlast
$out7,$out7,$in6
1093 le?vperm
$out2,$out2,$out2,$inpperm
1094 le?vperm
$out3,$out3,$out3,$inpperm
1095 stvx_u
$out2,$x00,$out
1096 le?vperm
$out4,$out4,$out4,$inpperm
1097 stvx_u
$out3,$x10,$out
1098 le?vperm
$out5,$out5,$out5,$inpperm
1099 stvx_u
$out4,$x20,$out
1100 le?vperm
$out6,$out6,$out6,$inpperm
1101 stvx_u
$out5,$x30,$out
1102 le?vperm
$out7,$out7,$out7,$inpperm
1103 stvx_u
$out6,$x40,$out
1104 stvx_u
$out7,$x50,$out
1110 vncipherlast
$out3,$out3,$ivec
1111 vncipherlast
$out4,$out4,$in3
1112 vncipherlast
$out5,$out5,$in4
1113 vncipherlast
$out6,$out6,$in5
1114 vncipherlast
$out7,$out7,$in6
1117 le?vperm
$out3,$out3,$out3,$inpperm
1118 le?vperm
$out4,$out4,$out4,$inpperm
1119 stvx_u
$out3,$x00,$out
1120 le?vperm
$out5,$out5,$out5,$inpperm
1121 stvx_u
$out4,$x10,$out
1122 le?vperm
$out6,$out6,$out6,$inpperm
1123 stvx_u
$out5,$x20,$out
1124 le?vperm
$out7,$out7,$out7,$inpperm
1125 stvx_u
$out6,$x30,$out
1126 stvx_u
$out7,$x40,$out
1132 vncipherlast
$out4,$out4,$ivec
1133 vncipherlast
$out5,$out5,$in4
1134 vncipherlast
$out6,$out6,$in5
1135 vncipherlast
$out7,$out7,$in6
1138 le?vperm
$out4,$out4,$out4,$inpperm
1139 le?vperm
$out5,$out5,$out5,$inpperm
1140 stvx_u
$out4,$x00,$out
1141 le?vperm
$out6,$out6,$out6,$inpperm
1142 stvx_u
$out5,$x10,$out
1143 le?vperm
$out7,$out7,$out7,$inpperm
1144 stvx_u
$out6,$x20,$out
1145 stvx_u
$out7,$x30,$out
1151 vncipherlast
$out5,$out5,$ivec
1152 vncipherlast
$out6,$out6,$in5
1153 vncipherlast
$out7,$out7,$in6
1156 le?vperm
$out5,$out5,$out5,$inpperm
1157 le?vperm
$out6,$out6,$out6,$inpperm
1158 stvx_u
$out5,$x00,$out
1159 le?vperm
$out7,$out7,$out7,$inpperm
1160 stvx_u
$out6,$x10,$out
1161 stvx_u
$out7,$x20,$out
1167 vncipherlast
$out6,$out6,$ivec
1168 vncipherlast
$out7,$out7,$in6
1171 le?vperm
$out6,$out6,$out6,$inpperm
1172 le?vperm
$out7,$out7,$out7,$inpperm
1173 stvx_u
$out6,$x00,$out
1174 stvx_u
$out7,$x10,$out
1180 vncipherlast
$out7,$out7,$ivec
1183 le?vperm
$out7,$out7,$out7,$inpperm
1188 le?vperm
$ivec,$ivec,$ivec,$inpperm
1189 stvx_u
$ivec,0,$ivp # write [unaligned] iv
1193 stvx
$inpperm,r10
,$sp # wipe copies of round keys
1195 stvx
$inpperm,r11
,$sp
1197 stvx
$inpperm,r10
,$sp
1199 stvx
$inpperm,r11
,$sp
1201 stvx
$inpperm,r10
,$sp
1203 stvx
$inpperm,r11
,$sp
1205 stvx
$inpperm,r10
,$sp
1207 stvx
$inpperm,r11
,$sp
1211 lvx v20
,r10
,$sp # ABI says so
1233 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1234 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1235 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1236 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1237 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1238 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1239 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1242 .byte
0,12,0x14,0,0x80,6,6,0
1244 .size
.${prefix
}_cbc_encrypt
,.-.${prefix
}_cbc_encrypt
1248 #########################################################################
1249 {{{ # CTR procedure[s] #
1250 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1251 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1252 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1257 .globl
.${prefix
}_ctr32_encrypt_blocks
1266 vxor
$rndkey0,$rndkey0,$rndkey0
1267 le?vspltisb
$tmp,0x0f
1269 lvx
$ivec,0,$ivp # load [unaligned] iv
1270 lvsl
$inpperm,0,$ivp
1271 lvx
$inptail,$idx,$ivp
1273 le?vxor
$inpperm,$inpperm,$tmp
1274 vperm
$ivec,$ivec,$inptail,$inpperm
1275 vsldoi
$one,$rndkey0,$one,1
1278 ?lvsl
$keyperm,0,$key # prepare for unaligned key
1279 lwz
$rounds,240($key)
1281 lvsr
$inpperm,0,r11
# prepare for unaligned load
1283 addi
$inp,$inp,15 # 15 is not typo
1284 le?vxor
$inpperm,$inpperm,$tmp
1286 srwi
$rounds,$rounds,1
1288 subi
$rounds,$rounds,1
1291 bge _aesp8_ctr32_encrypt8x
1293 ?lvsr
$outperm,0,$out # prepare for unaligned store
1294 vspltisb
$outmask,-1
1296 ?vperm
$outmask,$rndkey0,$outmask,$outperm
1297 le?vxor
$outperm,$outperm,$tmp
1301 lvx
$rndkey1,$idx,$key
1303 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1304 vxor
$inout,$ivec,$rndkey0
1305 lvx
$rndkey0,$idx,$key
1311 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
1312 vcipher
$inout,$inout,$rndkey1
1313 lvx
$rndkey1,$idx,$key
1315 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1316 vcipher
$inout,$inout,$rndkey0
1317 lvx
$rndkey0,$idx,$key
1321 vadduwm
$ivec,$ivec,$one
1325 subic
. $len,$len,1 # blocks--
1327 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
1328 vcipher
$inout,$inout,$rndkey1
1329 lvx
$rndkey1,$idx,$key
1330 vperm
$dat,$dat,$inptail,$inpperm
1332 ?vperm
$rndkey1,$rndkey0,$rndkey1,$keyperm
1334 vxor
$dat,$dat,$rndkey1 # last round key
1335 vcipherlast
$inout,$inout,$dat
1337 lvx
$rndkey1,$idx,$key
1339 vperm
$inout,$inout,$inout,$outperm
1340 vsel
$dat,$outhead,$inout,$outmask
1342 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1344 vxor
$inout,$ivec,$rndkey0
1345 lvx
$rndkey0,$idx,$key
1352 lvx
$inout,0,$out # redundant in aligned case
1353 vsel
$inout,$outhead,$inout,$outmask
1359 .byte
0,12,0x14,0,0,0,6,0
1362 #########################################################################
1363 {{ # Optimized CTR procedure #
1365 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1366 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1367 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1368 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1369 # v26-v31 last 6 round keys
1370 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1371 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1375 _aesp8_ctr32_encrypt8x
:
1376 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1377 li r10
,`$FRAME+8*16+15`
1378 li r11
,`$FRAME+8*16+31`
1379 stvx v20
,r10
,$sp # ABI says so
1402 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1404 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1406 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1408 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1410 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1412 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1414 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1418 subi
$rounds,$rounds,3 # -4 in total
1420 lvx
$rndkey0,$x00,$key # load key schedule
1424 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
1425 addi
$key_,$sp,$FRAME+15
1429 ?vperm v24
,v30
,v31
,$keyperm
1432 stvx v24
,$x00,$key_ # off-load round[1]
1433 ?vperm v25
,v31
,v30
,$keyperm
1435 stvx v25
,$x10,$key_ # off-load round[2]
1436 addi
$key_,$key_,0x20
1437 bdnz Load_ctr32_enc_key
1440 ?vperm v24
,v30
,v31
,$keyperm
1442 stvx v24
,$x00,$key_ # off-load round[3]
1443 ?vperm v25
,v31
,v26
,$keyperm
1445 stvx v25
,$x10,$key_ # off-load round[4]
1446 addi
$key_,$sp,$FRAME+15 # rewind $key_
1447 ?vperm v26
,v26
,v27
,$keyperm
1449 ?vperm v27
,v27
,v28
,$keyperm
1451 ?vperm v28
,v28
,v29
,$keyperm
1453 ?vperm v29
,v29
,v30
,$keyperm
1454 lvx
$out0,$x70,$key # borrow $out0
1455 ?vperm v30
,v30
,v31
,$keyperm
1456 lvx v24
,$x00,$key_ # pre-load round[1]
1457 ?vperm v31
,v31
,$out0,$keyperm
1458 lvx v25
,$x10,$key_ # pre-load round[2]
1460 vadduqm
$two,$one,$one
1461 subi
$inp,$inp,15 # undo "caller"
1464 vadduqm
$out1,$ivec,$one # counter values ...
1465 vadduqm
$out2,$ivec,$two
1466 vxor
$out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1468 vadduqm
$out3,$out1,$two
1469 vxor
$out1,$out1,$rndkey0
1470 le?lvsl
$inpperm,0,$idx
1471 vadduqm
$out4,$out2,$two
1472 vxor
$out2,$out2,$rndkey0
1473 le?vspltisb
$tmp,0x0f
1474 vadduqm
$out5,$out3,$two
1475 vxor
$out3,$out3,$rndkey0
1476 le?vxor
$inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1477 vadduqm
$out6,$out4,$two
1478 vxor
$out4,$out4,$rndkey0
1479 vadduqm
$out7,$out5,$two
1480 vxor
$out5,$out5,$rndkey0
1481 vadduqm
$ivec,$out6,$two # next counter value
1482 vxor
$out6,$out6,$rndkey0
1483 vxor
$out7,$out7,$rndkey0
1489 vcipher
$out0,$out0,v24
1490 vcipher
$out1,$out1,v24
1491 vcipher
$out2,$out2,v24
1492 vcipher
$out3,$out3,v24
1493 vcipher
$out4,$out4,v24
1494 vcipher
$out5,$out5,v24
1495 vcipher
$out6,$out6,v24
1496 vcipher
$out7,$out7,v24
1497 Loop_ctr32_enc8x_middle
:
1498 lvx v24
,$x20,$key_ # round[3]
1499 addi
$key_,$key_,0x20
1501 vcipher
$out0,$out0,v25
1502 vcipher
$out1,$out1,v25
1503 vcipher
$out2,$out2,v25
1504 vcipher
$out3,$out3,v25
1505 vcipher
$out4,$out4,v25
1506 vcipher
$out5,$out5,v25
1507 vcipher
$out6,$out6,v25
1508 vcipher
$out7,$out7,v25
1509 lvx v25
,$x10,$key_ # round[4]
1510 bdnz Loop_ctr32_enc8x
1512 subic r11
,$len,256 # $len-256, borrow $key_
1513 vcipher
$out0,$out0,v24
1514 vcipher
$out1,$out1,v24
1515 vcipher
$out2,$out2,v24
1516 vcipher
$out3,$out3,v24
1517 vcipher
$out4,$out4,v24
1518 vcipher
$out5,$out5,v24
1519 vcipher
$out6,$out6,v24
1520 vcipher
$out7,$out7,v24
1522 subfe r0
,r0
,r0
# borrow?-1:0
1523 vcipher
$out0,$out0,v25
1524 vcipher
$out1,$out1,v25
1525 vcipher
$out2,$out2,v25
1526 vcipher
$out3,$out3,v25
1527 vcipher
$out4,$out4,v25
1528 vcipher
$out5,$out5,v25
1529 vcipher
$out6,$out6,v25
1530 vcipher
$out7,$out7,v25
1533 addi
$key_,$sp,$FRAME+15 # rewind $key_
1534 vcipher
$out0,$out0,v26
1535 vcipher
$out1,$out1,v26
1536 vcipher
$out2,$out2,v26
1537 vcipher
$out3,$out3,v26
1538 vcipher
$out4,$out4,v26
1539 vcipher
$out5,$out5,v26
1540 vcipher
$out6,$out6,v26
1541 vcipher
$out7,$out7,v26
1542 lvx v24
,$x00,$key_ # re-pre-load round[1]
1544 subic
$len,$len,129 # $len-=129
1545 vcipher
$out0,$out0,v27
1546 addi
$len,$len,1 # $len-=128 really
1547 vcipher
$out1,$out1,v27
1548 vcipher
$out2,$out2,v27
1549 vcipher
$out3,$out3,v27
1550 vcipher
$out4,$out4,v27
1551 vcipher
$out5,$out5,v27
1552 vcipher
$out6,$out6,v27
1553 vcipher
$out7,$out7,v27
1554 lvx v25
,$x10,$key_ # re-pre-load round[2]
1556 vcipher
$out0,$out0,v28
1557 lvx_u
$in0,$x00,$inp # load input
1558 vcipher
$out1,$out1,v28
1559 lvx_u
$in1,$x10,$inp
1560 vcipher
$out2,$out2,v28
1561 lvx_u
$in2,$x20,$inp
1562 vcipher
$out3,$out3,v28
1563 lvx_u
$in3,$x30,$inp
1564 vcipher
$out4,$out4,v28
1565 lvx_u
$in4,$x40,$inp
1566 vcipher
$out5,$out5,v28
1567 lvx_u
$in5,$x50,$inp
1568 vcipher
$out6,$out6,v28
1569 lvx_u
$in6,$x60,$inp
1570 vcipher
$out7,$out7,v28
1571 lvx_u
$in7,$x70,$inp
1574 vcipher
$out0,$out0,v29
1575 le?vperm
$in0,$in0,$in0,$inpperm
1576 vcipher
$out1,$out1,v29
1577 le?vperm
$in1,$in1,$in1,$inpperm
1578 vcipher
$out2,$out2,v29
1579 le?vperm
$in2,$in2,$in2,$inpperm
1580 vcipher
$out3,$out3,v29
1581 le?vperm
$in3,$in3,$in3,$inpperm
1582 vcipher
$out4,$out4,v29
1583 le?vperm
$in4,$in4,$in4,$inpperm
1584 vcipher
$out5,$out5,v29
1585 le?vperm
$in5,$in5,$in5,$inpperm
1586 vcipher
$out6,$out6,v29
1587 le?vperm
$in6,$in6,$in6,$inpperm
1588 vcipher
$out7,$out7,v29
1589 le?vperm
$in7,$in7,$in7,$inpperm
1591 add
$inp,$inp,r0
# $inp is adjusted in such
1592 # way that at exit from the
1593 # loop inX-in7 are loaded
1595 subfe
. r0
,r0
,r0
# borrow?-1:0
1596 vcipher
$out0,$out0,v30
1597 vxor
$in0,$in0,v31
# xor with last round key
1598 vcipher
$out1,$out1,v30
1600 vcipher
$out2,$out2,v30
1602 vcipher
$out3,$out3,v30
1604 vcipher
$out4,$out4,v30
1606 vcipher
$out5,$out5,v30
1608 vcipher
$out6,$out6,v30
1610 vcipher
$out7,$out7,v30
1613 bne Lctr32_enc8x_break
# did $len-129 borrow?
1615 vcipherlast
$in0,$out0,$in0
1616 vcipherlast
$in1,$out1,$in1
1617 vadduqm
$out1,$ivec,$one # counter values ...
1618 vcipherlast
$in2,$out2,$in2
1619 vadduqm
$out2,$ivec,$two
1620 vxor
$out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1621 vcipherlast
$in3,$out3,$in3
1622 vadduqm
$out3,$out1,$two
1623 vxor
$out1,$out1,$rndkey0
1624 vcipherlast
$in4,$out4,$in4
1625 vadduqm
$out4,$out2,$two
1626 vxor
$out2,$out2,$rndkey0
1627 vcipherlast
$in5,$out5,$in5
1628 vadduqm
$out5,$out3,$two
1629 vxor
$out3,$out3,$rndkey0
1630 vcipherlast
$in6,$out6,$in6
1631 vadduqm
$out6,$out4,$two
1632 vxor
$out4,$out4,$rndkey0
1633 vcipherlast
$in7,$out7,$in7
1634 vadduqm
$out7,$out5,$two
1635 vxor
$out5,$out5,$rndkey0
1636 le?vperm
$in0,$in0,$in0,$inpperm
1637 vadduqm
$ivec,$out6,$two # next counter value
1638 vxor
$out6,$out6,$rndkey0
1639 le?vperm
$in1,$in1,$in1,$inpperm
1640 vxor
$out7,$out7,$rndkey0
1643 vcipher
$out0,$out0,v24
1644 stvx_u
$in0,$x00,$out
1645 le?vperm
$in2,$in2,$in2,$inpperm
1646 vcipher
$out1,$out1,v24
1647 stvx_u
$in1,$x10,$out
1648 le?vperm
$in3,$in3,$in3,$inpperm
1649 vcipher
$out2,$out2,v24
1650 stvx_u
$in2,$x20,$out
1651 le?vperm
$in4,$in4,$in4,$inpperm
1652 vcipher
$out3,$out3,v24
1653 stvx_u
$in3,$x30,$out
1654 le?vperm
$in5,$in5,$in5,$inpperm
1655 vcipher
$out4,$out4,v24
1656 stvx_u
$in4,$x40,$out
1657 le?vperm
$in6,$in6,$in6,$inpperm
1658 vcipher
$out5,$out5,v24
1659 stvx_u
$in5,$x50,$out
1660 le?vperm
$in7,$in7,$in7,$inpperm
1661 vcipher
$out6,$out6,v24
1662 stvx_u
$in6,$x60,$out
1663 vcipher
$out7,$out7,v24
1664 stvx_u
$in7,$x70,$out
1667 b Loop_ctr32_enc8x_middle
1672 blt Lctr32_enc8x_one
1674 beq Lctr32_enc8x_two
1676 blt Lctr32_enc8x_three
1678 beq Lctr32_enc8x_four
1680 blt Lctr32_enc8x_five
1682 beq Lctr32_enc8x_six
1684 blt Lctr32_enc8x_seven
1687 vcipherlast
$out0,$out0,$in0
1688 vcipherlast
$out1,$out1,$in1
1689 vcipherlast
$out2,$out2,$in2
1690 vcipherlast
$out3,$out3,$in3
1691 vcipherlast
$out4,$out4,$in4
1692 vcipherlast
$out5,$out5,$in5
1693 vcipherlast
$out6,$out6,$in6
1694 vcipherlast
$out7,$out7,$in7
1696 le?vperm
$out0,$out0,$out0,$inpperm
1697 le?vperm
$out1,$out1,$out1,$inpperm
1698 stvx_u
$out0,$x00,$out
1699 le?vperm
$out2,$out2,$out2,$inpperm
1700 stvx_u
$out1,$x10,$out
1701 le?vperm
$out3,$out3,$out3,$inpperm
1702 stvx_u
$out2,$x20,$out
1703 le?vperm
$out4,$out4,$out4,$inpperm
1704 stvx_u
$out3,$x30,$out
1705 le?vperm
$out5,$out5,$out5,$inpperm
1706 stvx_u
$out4,$x40,$out
1707 le?vperm
$out6,$out6,$out6,$inpperm
1708 stvx_u
$out5,$x50,$out
1709 le?vperm
$out7,$out7,$out7,$inpperm
1710 stvx_u
$out6,$x60,$out
1711 stvx_u
$out7,$x70,$out
1717 vcipherlast
$out0,$out0,$in1
1718 vcipherlast
$out1,$out1,$in2
1719 vcipherlast
$out2,$out2,$in3
1720 vcipherlast
$out3,$out3,$in4
1721 vcipherlast
$out4,$out4,$in5
1722 vcipherlast
$out5,$out5,$in6
1723 vcipherlast
$out6,$out6,$in7
1725 le?vperm
$out0,$out0,$out0,$inpperm
1726 le?vperm
$out1,$out1,$out1,$inpperm
1727 stvx_u
$out0,$x00,$out
1728 le?vperm
$out2,$out2,$out2,$inpperm
1729 stvx_u
$out1,$x10,$out
1730 le?vperm
$out3,$out3,$out3,$inpperm
1731 stvx_u
$out2,$x20,$out
1732 le?vperm
$out4,$out4,$out4,$inpperm
1733 stvx_u
$out3,$x30,$out
1734 le?vperm
$out5,$out5,$out5,$inpperm
1735 stvx_u
$out4,$x40,$out
1736 le?vperm
$out6,$out6,$out6,$inpperm
1737 stvx_u
$out5,$x50,$out
1738 stvx_u
$out6,$x60,$out
1744 vcipherlast
$out0,$out0,$in2
1745 vcipherlast
$out1,$out1,$in3
1746 vcipherlast
$out2,$out2,$in4
1747 vcipherlast
$out3,$out3,$in5
1748 vcipherlast
$out4,$out4,$in6
1749 vcipherlast
$out5,$out5,$in7
1751 le?vperm
$out0,$out0,$out0,$inpperm
1752 le?vperm
$out1,$out1,$out1,$inpperm
1753 stvx_u
$out0,$x00,$out
1754 le?vperm
$out2,$out2,$out2,$inpperm
1755 stvx_u
$out1,$x10,$out
1756 le?vperm
$out3,$out3,$out3,$inpperm
1757 stvx_u
$out2,$x20,$out
1758 le?vperm
$out4,$out4,$out4,$inpperm
1759 stvx_u
$out3,$x30,$out
1760 le?vperm
$out5,$out5,$out5,$inpperm
1761 stvx_u
$out4,$x40,$out
1762 stvx_u
$out5,$x50,$out
1768 vcipherlast
$out0,$out0,$in3
1769 vcipherlast
$out1,$out1,$in4
1770 vcipherlast
$out2,$out2,$in5
1771 vcipherlast
$out3,$out3,$in6
1772 vcipherlast
$out4,$out4,$in7
1774 le?vperm
$out0,$out0,$out0,$inpperm
1775 le?vperm
$out1,$out1,$out1,$inpperm
1776 stvx_u
$out0,$x00,$out
1777 le?vperm
$out2,$out2,$out2,$inpperm
1778 stvx_u
$out1,$x10,$out
1779 le?vperm
$out3,$out3,$out3,$inpperm
1780 stvx_u
$out2,$x20,$out
1781 le?vperm
$out4,$out4,$out4,$inpperm
1782 stvx_u
$out3,$x30,$out
1783 stvx_u
$out4,$x40,$out
1789 vcipherlast
$out0,$out0,$in4
1790 vcipherlast
$out1,$out1,$in5
1791 vcipherlast
$out2,$out2,$in6
1792 vcipherlast
$out3,$out3,$in7
1794 le?vperm
$out0,$out0,$out0,$inpperm
1795 le?vperm
$out1,$out1,$out1,$inpperm
1796 stvx_u
$out0,$x00,$out
1797 le?vperm
$out2,$out2,$out2,$inpperm
1798 stvx_u
$out1,$x10,$out
1799 le?vperm
$out3,$out3,$out3,$inpperm
1800 stvx_u
$out2,$x20,$out
1801 stvx_u
$out3,$x30,$out
1807 vcipherlast
$out0,$out0,$in5
1808 vcipherlast
$out1,$out1,$in6
1809 vcipherlast
$out2,$out2,$in7
1811 le?vperm
$out0,$out0,$out0,$inpperm
1812 le?vperm
$out1,$out1,$out1,$inpperm
1813 stvx_u
$out0,$x00,$out
1814 le?vperm
$out2,$out2,$out2,$inpperm
1815 stvx_u
$out1,$x10,$out
1816 stvx_u
$out2,$x20,$out
1822 vcipherlast
$out0,$out0,$in6
1823 vcipherlast
$out1,$out1,$in7
1825 le?vperm
$out0,$out0,$out0,$inpperm
1826 le?vperm
$out1,$out1,$out1,$inpperm
1827 stvx_u
$out0,$x00,$out
1828 stvx_u
$out1,$x10,$out
1834 vcipherlast
$out0,$out0,$in7
1836 le?vperm
$out0,$out0,$out0,$inpperm
1843 stvx
$inpperm,r10
,$sp # wipe copies of round keys
1845 stvx
$inpperm,r11
,$sp
1847 stvx
$inpperm,r10
,$sp
1849 stvx
$inpperm,r11
,$sp
1851 stvx
$inpperm,r10
,$sp
1853 stvx
$inpperm,r11
,$sp
1855 stvx
$inpperm,r10
,$sp
1857 stvx
$inpperm,r11
,$sp
1861 lvx v20
,r10
,$sp # ABI says so
1883 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1884 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1885 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1886 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1887 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1888 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1889 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1892 .byte
0,12,0x14,0,0x80,6,6,0
1894 .size
.${prefix
}_ctr32_encrypt_blocks
,.-.${prefix
}_ctr32_encrypt_blocks
1898 #########################################################################
1899 {{{ # XTS procedures #
1900 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1901 # const AES_KEY *key1, const AES_KEY *key2, #
1902 # [const] unsigned char iv[16]); #
1903 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1904 # input tweak value is assumed to be encrypted already, and last tweak #
1905 # value, one suitable for consecutive call on same chunk of data, is #
1906 # written back to original buffer. In addition, in "tweak chaining" #
1907 # mode only complete input blocks are processed. #
1909 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1910 my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1911 my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1912 my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1913 my $taillen = $key2;
1915 ($inp,$idx) = ($idx,$inp); # reassign
1918 .globl
.${prefix
}_xts_encrypt
1919 mr
$inp,r3
# reassign
1925 mfspr r12
,256 # save vrsave
1929 vspltisb
$seven,0x07 # 0x070707..07
1930 le?lvsl
$leperm,r11
,r11
1931 le?vspltisb
$tmp,0x0f
1932 le?vxor
$leperm,$leperm,$seven
1935 lvx
$tweak,0,$ivp # load [unaligned] iv
1936 lvsl
$inpperm,0,$ivp
1937 lvx
$inptail,$idx,$ivp
1938 le?vxor
$inpperm,$inpperm,$tmp
1939 vperm
$tweak,$tweak,$inptail,$inpperm
1942 lvsr
$inpperm,0,r11
# prepare for unaligned load
1944 addi
$inp,$inp,15 # 15 is not typo
1945 le?vxor
$inpperm,$inpperm,$tmp
1947 ${UCMP
}i
$key2,0 # key2==NULL?
1948 beq Lxts_enc_no_key2
1950 ?lvsl
$keyperm,0,$key2 # prepare for unaligned key
1951 lwz
$rounds,240($key2)
1952 srwi
$rounds,$rounds,1
1953 subi
$rounds,$rounds,1
1956 lvx
$rndkey0,0,$key2
1957 lvx
$rndkey1,$idx,$key2
1959 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1960 vxor
$tweak,$tweak,$rndkey0
1961 lvx
$rndkey0,$idx,$key2
1966 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
1967 vcipher
$tweak,$tweak,$rndkey1
1968 lvx
$rndkey1,$idx,$key2
1970 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1971 vcipher
$tweak,$tweak,$rndkey0
1972 lvx
$rndkey0,$idx,$key2
1976 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
1977 vcipher
$tweak,$tweak,$rndkey1
1978 lvx
$rndkey1,$idx,$key2
1979 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1980 vcipherlast
$tweak,$tweak,$rndkey0
1982 li
$ivp,0 # don't chain the tweak
1987 and $len,$len,$idx # in "tweak chaining"
1988 # mode only complete
1989 # blocks are processed
1994 ?lvsl
$keyperm,0,$key1 # prepare for unaligned key
1995 lwz
$rounds,240($key1)
1996 srwi
$rounds,$rounds,1
1997 subi
$rounds,$rounds,1
2000 vslb
$eighty7,$seven,$seven # 0x808080..80
2001 vor
$eighty7,$eighty7,$seven # 0x878787..87
2002 vspltisb
$tmp,1 # 0x010101..01
2003 vsldoi
$eighty7,$eighty7,$tmp,15 # 0x870101..01
2006 bge _aesp8_xts_encrypt6x
2008 andi
. $taillen,$len,15
2010 subi
$taillen,$taillen,16
2015 lvx
$rndkey0,0,$key1
2016 lvx
$rndkey1,$idx,$key1
2018 vperm
$inout,$inout,$inptail,$inpperm
2019 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2020 vxor
$inout,$inout,$tweak
2021 vxor
$inout,$inout,$rndkey0
2022 lvx
$rndkey0,$idx,$key1
2029 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2030 vcipher
$inout,$inout,$rndkey1
2031 lvx
$rndkey1,$idx,$key1
2033 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2034 vcipher
$inout,$inout,$rndkey0
2035 lvx
$rndkey0,$idx,$key1
2039 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2040 vcipher
$inout,$inout,$rndkey1
2041 lvx
$rndkey1,$idx,$key1
2043 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2044 vxor
$rndkey0,$rndkey0,$tweak
2045 vcipherlast
$output,$inout,$rndkey0
2047 le?vperm
$tmp,$output,$output,$leperm
2049 le?stvx_u
$tmp,0,$out
2050 be?stvx_u
$output,0,$out
2059 lvx
$rndkey0,0,$key1
2060 lvx
$rndkey1,$idx,$key1
2068 vsrab
$tmp,$tweak,$seven # next tweak value
2069 vaddubm
$tweak,$tweak,$tweak
2070 vsldoi
$tmp,$tmp,$tmp,15
2071 vand
$tmp,$tmp,$eighty7
2072 vxor
$tweak,$tweak,$tmp
2074 vperm
$inout,$inout,$inptail,$inpperm
2075 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2076 vxor
$inout,$inout,$tweak
2077 vxor
$output,$output,$rndkey0 # just in case $len<16
2078 vxor
$inout,$inout,$rndkey0
2079 lvx
$rndkey0,$idx,$key1
2086 vxor
$output,$output,$tweak
2087 lvsr
$inpperm,0,$len # $inpperm is no longer needed
2088 vxor
$inptail,$inptail,$inptail # $inptail is no longer needed
2090 vperm
$inptail,$inptail,$tmp,$inpperm
2091 vsel
$inout,$inout,$output,$inptail
2100 bdnz Loop_xts_enc_steal
2103 b Loop_xts_enc
# one more time...
2109 vsrab
$tmp,$tweak,$seven # next tweak value
2110 vaddubm
$tweak,$tweak,$tweak
2111 vsldoi
$tmp,$tmp,$tmp,15
2112 vand
$tmp,$tmp,$eighty7
2113 vxor
$tweak,$tweak,$tmp
2115 le?vperm
$tweak,$tweak,$tweak,$leperm
2116 stvx_u
$tweak,0,$ivp
2119 mtspr
256,r12
# restore vrsave
2123 .byte
0,12,0x04,0,0x80,6,6,0
2125 .size
.${prefix
}_xts_encrypt
,.-.${prefix
}_xts_encrypt
2127 .globl
.${prefix
}_xts_decrypt
2128 mr
$inp,r3
# reassign
2134 mfspr r12
,256 # save vrsave
2143 vspltisb
$seven,0x07 # 0x070707..07
2144 le?lvsl
$leperm,r11
,r11
2145 le?vspltisb
$tmp,0x0f
2146 le?vxor
$leperm,$leperm,$seven
2149 lvx
$tweak,0,$ivp # load [unaligned] iv
2150 lvsl
$inpperm,0,$ivp
2151 lvx
$inptail,$idx,$ivp
2152 le?vxor
$inpperm,$inpperm,$tmp
2153 vperm
$tweak,$tweak,$inptail,$inpperm
2156 lvsr
$inpperm,0,r11
# prepare for unaligned load
2158 addi
$inp,$inp,15 # 15 is not typo
2159 le?vxor
$inpperm,$inpperm,$tmp
2161 ${UCMP
}i
$key2,0 # key2==NULL?
2162 beq Lxts_dec_no_key2
2164 ?lvsl
$keyperm,0,$key2 # prepare for unaligned key
2165 lwz
$rounds,240($key2)
2166 srwi
$rounds,$rounds,1
2167 subi
$rounds,$rounds,1
2170 lvx
$rndkey0,0,$key2
2171 lvx
$rndkey1,$idx,$key2
2173 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2174 vxor
$tweak,$tweak,$rndkey0
2175 lvx
$rndkey0,$idx,$key2
2180 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2181 vcipher
$tweak,$tweak,$rndkey1
2182 lvx
$rndkey1,$idx,$key2
2184 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2185 vcipher
$tweak,$tweak,$rndkey0
2186 lvx
$rndkey0,$idx,$key2
2190 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2191 vcipher
$tweak,$tweak,$rndkey1
2192 lvx
$rndkey1,$idx,$key2
2193 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2194 vcipherlast
$tweak,$tweak,$rndkey0
2196 li
$ivp,0 # don't chain the tweak
2202 add
$len,$len,$idx # in "tweak chaining"
2203 # mode only complete
2204 # blocks are processed
2209 ?lvsl
$keyperm,0,$key1 # prepare for unaligned key
2210 lwz
$rounds,240($key1)
2211 srwi
$rounds,$rounds,1
2212 subi
$rounds,$rounds,1
2215 vslb
$eighty7,$seven,$seven # 0x808080..80
2216 vor
$eighty7,$eighty7,$seven # 0x878787..87
2217 vspltisb
$tmp,1 # 0x010101..01
2218 vsldoi
$eighty7,$eighty7,$tmp,15 # 0x870101..01
2221 bge _aesp8_xts_decrypt6x
2223 lvx
$rndkey0,0,$key1
2224 lvx
$rndkey1,$idx,$key1
2226 vperm
$inout,$inout,$inptail,$inpperm
2227 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2228 vxor
$inout,$inout,$tweak
2229 vxor
$inout,$inout,$rndkey0
2230 lvx
$rndkey0,$idx,$key1
2240 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2241 vncipher
$inout,$inout,$rndkey1
2242 lvx
$rndkey1,$idx,$key1
2244 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2245 vncipher
$inout,$inout,$rndkey0
2246 lvx
$rndkey0,$idx,$key1
2250 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2251 vncipher
$inout,$inout,$rndkey1
2252 lvx
$rndkey1,$idx,$key1
2254 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2255 vxor
$rndkey0,$rndkey0,$tweak
2256 vncipherlast
$output,$inout,$rndkey0
2258 le?vperm
$tmp,$output,$output,$leperm
2260 le?stvx_u
$tmp,0,$out
2261 be?stvx_u
$output,0,$out
2270 lvx
$rndkey0,0,$key1
2271 lvx
$rndkey1,$idx,$key1
2274 vsrab
$tmp,$tweak,$seven # next tweak value
2275 vaddubm
$tweak,$tweak,$tweak
2276 vsldoi
$tmp,$tmp,$tmp,15
2277 vand
$tmp,$tmp,$eighty7
2278 vxor
$tweak,$tweak,$tmp
2280 vperm
$inout,$inout,$inptail,$inpperm
2281 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2282 vxor
$inout,$inout,$tweak
2283 vxor
$inout,$inout,$rndkey0
2284 lvx
$rndkey0,$idx,$key1
2292 vsrab
$tmp,$tweak,$seven # next tweak value
2293 vaddubm
$tweak1,$tweak,$tweak
2294 vsldoi
$tmp,$tmp,$tmp,15
2295 vand
$tmp,$tmp,$eighty7
2296 vxor
$tweak1,$tweak1,$tmp
2301 vxor
$inout,$inout,$tweak # :-(
2302 vxor
$inout,$inout,$tweak1 # :-)
2305 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2306 vncipher
$inout,$inout,$rndkey1
2307 lvx
$rndkey1,$idx,$key1
2309 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2310 vncipher
$inout,$inout,$rndkey0
2311 lvx
$rndkey0,$idx,$key1
2313 bdnz Loop_xts_dec_short
2315 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
2316 vncipher
$inout,$inout,$rndkey1
2317 lvx
$rndkey1,$idx,$key1
2319 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2320 vxor
$rndkey0,$rndkey0,$tweak1
2321 vncipherlast
$output,$inout,$rndkey0
2323 le?vperm
$tmp,$output,$output,$leperm
2325 le?stvx_u
$tmp,0,$out
2326 be?stvx_u
$output,0,$out
2331 lvx
$rndkey0,0,$key1
2332 lvx
$rndkey1,$idx,$key1
2334 vperm
$inout,$inout,$inptail,$inpperm
2335 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
2337 lvsr
$inpperm,0,$len # $inpperm is no longer needed
2338 vxor
$inptail,$inptail,$inptail # $inptail is no longer needed
2340 vperm
$inptail,$inptail,$tmp,$inpperm
2341 vsel
$inout,$inout,$output,$inptail
2343 vxor
$rndkey0,$rndkey0,$tweak
2344 vxor
$inout,$inout,$rndkey0
2345 lvx
$rndkey0,$idx,$key1
2354 bdnz Loop_xts_dec_steal
2357 b Loop_xts_dec
# one more time...
2363 vsrab
$tmp,$tweak,$seven # next tweak value
2364 vaddubm
$tweak,$tweak,$tweak
2365 vsldoi
$tmp,$tmp,$tmp,15
2366 vand
$tmp,$tmp,$eighty7
2367 vxor
$tweak,$tweak,$tmp
2369 le?vperm
$tweak,$tweak,$tweak,$leperm
2370 stvx_u
$tweak,0,$ivp
2373 mtspr
256,r12
# restore vrsave
2377 .byte
0,12,0x04,0,0x80,6,6,0
2379 .size
.${prefix
}_xts_decrypt
,.-.${prefix
}_xts_decrypt
2381 #########################################################################
2382 {{ # Optimized XTS procedures #
2384 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2385 $x00=0 if ($flavour =~ /osx/);
2386 my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2387 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2388 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2389 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2390 # v26-v31 last 6 round keys
2391 my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2396 _aesp8_xts_encrypt6x
:
2397 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2399 li r7
,`$FRAME+8*16+15`
2400 li r3
,`$FRAME+8*16+31`
2401 $PUSH r11
,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2402 stvx v20
,r7
,$sp # ABI says so
2425 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2427 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
2429 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
2431 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
2433 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
2435 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
2437 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
2441 subi
$rounds,$rounds,3 # -4 in total
2443 lvx
$rndkey0,$x00,$key1 # load key schedule
2445 addi
$key1,$key1,0x20
2447 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
2448 addi
$key_,$sp,$FRAME+15
2452 ?vperm v24
,v30
,v31
,$keyperm
2454 addi
$key1,$key1,0x20
2455 stvx v24
,$x00,$key_ # off-load round[1]
2456 ?vperm v25
,v31
,v30
,$keyperm
2458 stvx v25
,$x10,$key_ # off-load round[2]
2459 addi
$key_,$key_,0x20
2460 bdnz Load_xts_enc_key
2463 ?vperm v24
,v30
,v31
,$keyperm
2465 stvx v24
,$x00,$key_ # off-load round[3]
2466 ?vperm v25
,v31
,v26
,$keyperm
2468 stvx v25
,$x10,$key_ # off-load round[4]
2469 addi
$key_,$sp,$FRAME+15 # rewind $key_
2470 ?vperm v26
,v26
,v27
,$keyperm
2472 ?vperm v27
,v27
,v28
,$keyperm
2474 ?vperm v28
,v28
,v29
,$keyperm
2476 ?vperm v29
,v29
,v30
,$keyperm
2477 lvx
$twk5,$x70,$key1 # borrow $twk5
2478 ?vperm v30
,v30
,v31
,$keyperm
2479 lvx v24
,$x00,$key_ # pre-load round[1]
2480 ?vperm v31
,v31
,$twk5,$keyperm
2481 lvx v25
,$x10,$key_ # pre-load round[2]
2483 vperm
$in0,$inout,$inptail,$inpperm
2484 subi
$inp,$inp,31 # undo "caller"
2485 vxor
$twk0,$tweak,$rndkey0
2486 vsrab
$tmp,$tweak,$seven # next tweak value
2487 vaddubm
$tweak,$tweak,$tweak
2488 vsldoi
$tmp,$tmp,$tmp,15
2489 vand
$tmp,$tmp,$eighty7
2490 vxor
$out0,$in0,$twk0
2491 vxor
$tweak,$tweak,$tmp
2493 lvx_u
$in1,$x10,$inp
2494 vxor
$twk1,$tweak,$rndkey0
2495 vsrab
$tmp,$tweak,$seven # next tweak value
2496 vaddubm
$tweak,$tweak,$tweak
2497 vsldoi
$tmp,$tmp,$tmp,15
2498 le?vperm
$in1,$in1,$in1,$leperm
2499 vand
$tmp,$tmp,$eighty7
2500 vxor
$out1,$in1,$twk1
2501 vxor
$tweak,$tweak,$tmp
2503 lvx_u
$in2,$x20,$inp
2504 andi
. $taillen,$len,15
2505 vxor
$twk2,$tweak,$rndkey0
2506 vsrab
$tmp,$tweak,$seven # next tweak value
2507 vaddubm
$tweak,$tweak,$tweak
2508 vsldoi
$tmp,$tmp,$tmp,15
2509 le?vperm
$in2,$in2,$in2,$leperm
2510 vand
$tmp,$tmp,$eighty7
2511 vxor
$out2,$in2,$twk2
2512 vxor
$tweak,$tweak,$tmp
2514 lvx_u
$in3,$x30,$inp
2515 sub $len,$len,$taillen
2516 vxor
$twk3,$tweak,$rndkey0
2517 vsrab
$tmp,$tweak,$seven # next tweak value
2518 vaddubm
$tweak,$tweak,$tweak
2519 vsldoi
$tmp,$tmp,$tmp,15
2520 le?vperm
$in3,$in3,$in3,$leperm
2521 vand
$tmp,$tmp,$eighty7
2522 vxor
$out3,$in3,$twk3
2523 vxor
$tweak,$tweak,$tmp
2525 lvx_u
$in4,$x40,$inp
2527 vxor
$twk4,$tweak,$rndkey0
2528 vsrab
$tmp,$tweak,$seven # next tweak value
2529 vaddubm
$tweak,$tweak,$tweak
2530 vsldoi
$tmp,$tmp,$tmp,15
2531 le?vperm
$in4,$in4,$in4,$leperm
2532 vand
$tmp,$tmp,$eighty7
2533 vxor
$out4,$in4,$twk4
2534 vxor
$tweak,$tweak,$tmp
2536 lvx_u
$in5,$x50,$inp
2538 vxor
$twk5,$tweak,$rndkey0
2539 vsrab
$tmp,$tweak,$seven # next tweak value
2540 vaddubm
$tweak,$tweak,$tweak
2541 vsldoi
$tmp,$tmp,$tmp,15
2542 le?vperm
$in5,$in5,$in5,$leperm
2543 vand
$tmp,$tmp,$eighty7
2544 vxor
$out5,$in5,$twk5
2545 vxor
$tweak,$tweak,$tmp
2547 vxor v31
,v31
,$rndkey0
2553 vcipher
$out0,$out0,v24
2554 vcipher
$out1,$out1,v24
2555 vcipher
$out2,$out2,v24
2556 vcipher
$out3,$out3,v24
2557 vcipher
$out4,$out4,v24
2558 vcipher
$out5,$out5,v24
2559 lvx v24
,$x20,$key_ # round[3]
2560 addi
$key_,$key_,0x20
2562 vcipher
$out0,$out0,v25
2563 vcipher
$out1,$out1,v25
2564 vcipher
$out2,$out2,v25
2565 vcipher
$out3,$out3,v25
2566 vcipher
$out4,$out4,v25
2567 vcipher
$out5,$out5,v25
2568 lvx v25
,$x10,$key_ # round[4]
2571 subic
$len,$len,96 # $len-=96
2572 vxor
$in0,$twk0,v31
# xor with last round key
2573 vcipher
$out0,$out0,v24
2574 vcipher
$out1,$out1,v24
2575 vsrab
$tmp,$tweak,$seven # next tweak value
2576 vxor
$twk0,$tweak,$rndkey0
2577 vaddubm
$tweak,$tweak,$tweak
2578 vcipher
$out2,$out2,v24
2579 vcipher
$out3,$out3,v24
2580 vsldoi
$tmp,$tmp,$tmp,15
2581 vcipher
$out4,$out4,v24
2582 vcipher
$out5,$out5,v24
2584 subfe
. r0
,r0
,r0
# borrow?-1:0
2585 vand
$tmp,$tmp,$eighty7
2586 vcipher
$out0,$out0,v25
2587 vcipher
$out1,$out1,v25
2588 vxor
$tweak,$tweak,$tmp
2589 vcipher
$out2,$out2,v25
2590 vcipher
$out3,$out3,v25
2592 vsrab
$tmp,$tweak,$seven # next tweak value
2593 vxor
$twk1,$tweak,$rndkey0
2594 vcipher
$out4,$out4,v25
2595 vcipher
$out5,$out5,v25
2598 vaddubm
$tweak,$tweak,$tweak
2599 vsldoi
$tmp,$tmp,$tmp,15
2600 vcipher
$out0,$out0,v26
2601 vcipher
$out1,$out1,v26
2602 vand
$tmp,$tmp,$eighty7
2603 vcipher
$out2,$out2,v26
2604 vcipher
$out3,$out3,v26
2605 vxor
$tweak,$tweak,$tmp
2606 vcipher
$out4,$out4,v26
2607 vcipher
$out5,$out5,v26
2609 add
$inp,$inp,r0
# $inp is adjusted in such
2610 # way that at exit from the
2611 # loop inX-in5 are loaded
2614 vsrab
$tmp,$tweak,$seven # next tweak value
2615 vxor
$twk2,$tweak,$rndkey0
2616 vaddubm
$tweak,$tweak,$tweak
2617 vcipher
$out0,$out0,v27
2618 vcipher
$out1,$out1,v27
2619 vsldoi
$tmp,$tmp,$tmp,15
2620 vcipher
$out2,$out2,v27
2621 vcipher
$out3,$out3,v27
2622 vand
$tmp,$tmp,$eighty7
2623 vcipher
$out4,$out4,v27
2624 vcipher
$out5,$out5,v27
2626 addi
$key_,$sp,$FRAME+15 # rewind $key_
2627 vxor
$tweak,$tweak,$tmp
2628 vcipher
$out0,$out0,v28
2629 vcipher
$out1,$out1,v28
2631 vsrab
$tmp,$tweak,$seven # next tweak value
2632 vxor
$twk3,$tweak,$rndkey0
2633 vcipher
$out2,$out2,v28
2634 vcipher
$out3,$out3,v28
2635 vaddubm
$tweak,$tweak,$tweak
2636 vsldoi
$tmp,$tmp,$tmp,15
2637 vcipher
$out4,$out4,v28
2638 vcipher
$out5,$out5,v28
2639 lvx v24
,$x00,$key_ # re-pre-load round[1]
2640 vand
$tmp,$tmp,$eighty7
2642 vcipher
$out0,$out0,v29
2643 vcipher
$out1,$out1,v29
2644 vxor
$tweak,$tweak,$tmp
2645 vcipher
$out2,$out2,v29
2646 vcipher
$out3,$out3,v29
2648 vsrab
$tmp,$tweak,$seven # next tweak value
2649 vxor
$twk4,$tweak,$rndkey0
2650 vcipher
$out4,$out4,v29
2651 vcipher
$out5,$out5,v29
2652 lvx v25
,$x10,$key_ # re-pre-load round[2]
2653 vaddubm
$tweak,$tweak,$tweak
2654 vsldoi
$tmp,$tmp,$tmp,15
2656 vcipher
$out0,$out0,v30
2657 vcipher
$out1,$out1,v30
2658 vand
$tmp,$tmp,$eighty7
2659 vcipher
$out2,$out2,v30
2660 vcipher
$out3,$out3,v30
2661 vxor
$tweak,$tweak,$tmp
2662 vcipher
$out4,$out4,v30
2663 vcipher
$out5,$out5,v30
2665 vsrab
$tmp,$tweak,$seven # next tweak value
2666 vxor
$twk5,$tweak,$rndkey0
2668 vcipherlast
$out0,$out0,$in0
2669 lvx_u
$in0,$x00,$inp # load next input block
2670 vaddubm
$tweak,$tweak,$tweak
2671 vsldoi
$tmp,$tmp,$tmp,15
2672 vcipherlast
$out1,$out1,$in1
2673 lvx_u
$in1,$x10,$inp
2674 vcipherlast
$out2,$out2,$in2
2675 le?vperm
$in0,$in0,$in0,$leperm
2676 lvx_u
$in2,$x20,$inp
2677 vand
$tmp,$tmp,$eighty7
2678 vcipherlast
$out3,$out3,$in3
2679 le?vperm
$in1,$in1,$in1,$leperm
2680 lvx_u
$in3,$x30,$inp
2681 vcipherlast
$out4,$out4,$in4
2682 le?vperm
$in2,$in2,$in2,$leperm
2683 lvx_u
$in4,$x40,$inp
2684 vxor
$tweak,$tweak,$tmp
2685 vcipherlast
$tmp,$out5,$in5 # last block might be needed
2687 le?vperm
$in3,$in3,$in3,$leperm
2688 lvx_u
$in5,$x50,$inp
2690 le?vperm
$in4,$in4,$in4,$leperm
2691 le?vperm
$in5,$in5,$in5,$leperm
2693 le?vperm
$out0,$out0,$out0,$leperm
2694 le?vperm
$out1,$out1,$out1,$leperm
2695 stvx_u
$out0,$x00,$out # store output
2696 vxor
$out0,$in0,$twk0
2697 le?vperm
$out2,$out2,$out2,$leperm
2698 stvx_u
$out1,$x10,$out
2699 vxor
$out1,$in1,$twk1
2700 le?vperm
$out3,$out3,$out3,$leperm
2701 stvx_u
$out2,$x20,$out
2702 vxor
$out2,$in2,$twk2
2703 le?vperm
$out4,$out4,$out4,$leperm
2704 stvx_u
$out3,$x30,$out
2705 vxor
$out3,$in3,$twk3
2706 le?vperm
$out5,$tmp,$tmp,$leperm
2707 stvx_u
$out4,$x40,$out
2708 vxor
$out4,$in4,$twk4
2709 le?stvx_u
$out5,$x50,$out
2710 be?stvx_u
$tmp, $x50,$out
2711 vxor
$out5,$in5,$twk5
2715 beq Loop_xts_enc6x
# did $len-=96 borrow?
2717 addic
. $len,$len,0x60
2724 blt Lxts_enc6x_three
2729 vxor
$out0,$in1,$twk0
2730 vxor
$out1,$in2,$twk1
2731 vxor
$out2,$in3,$twk2
2732 vxor
$out3,$in4,$twk3
2733 vxor
$out4,$in5,$twk4
2737 le?vperm
$out0,$out0,$out0,$leperm
2738 vmr
$twk0,$twk5 # unused tweak
2739 le?vperm
$out1,$out1,$out1,$leperm
2740 stvx_u
$out0,$x00,$out # store output
2741 le?vperm
$out2,$out2,$out2,$leperm
2742 stvx_u
$out1,$x10,$out
2743 le?vperm
$out3,$out3,$out3,$leperm
2744 stvx_u
$out2,$x20,$out
2745 vxor
$tmp,$out4,$twk5 # last block prep for stealing
2746 le?vperm
$out4,$out4,$out4,$leperm
2747 stvx_u
$out3,$x30,$out
2748 stvx_u
$out4,$x40,$out
2750 bne Lxts_enc6x_steal
2755 vxor
$out0,$in2,$twk0
2756 vxor
$out1,$in3,$twk1
2757 vxor
$out2,$in4,$twk2
2758 vxor
$out3,$in5,$twk3
2759 vxor
$out4,$out4,$out4
2763 le?vperm
$out0,$out0,$out0,$leperm
2764 vmr
$twk0,$twk4 # unused tweak
2765 le?vperm
$out1,$out1,$out1,$leperm
2766 stvx_u
$out0,$x00,$out # store output
2767 le?vperm
$out2,$out2,$out2,$leperm
2768 stvx_u
$out1,$x10,$out
2769 vxor
$tmp,$out3,$twk4 # last block prep for stealing
2770 le?vperm
$out3,$out3,$out3,$leperm
2771 stvx_u
$out2,$x20,$out
2772 stvx_u
$out3,$x30,$out
2774 bne Lxts_enc6x_steal
2779 vxor
$out0,$in3,$twk0
2780 vxor
$out1,$in4,$twk1
2781 vxor
$out2,$in5,$twk2
2782 vxor
$out3,$out3,$out3
2783 vxor
$out4,$out4,$out4
2787 le?vperm
$out0,$out0,$out0,$leperm
2788 vmr
$twk0,$twk3 # unused tweak
2789 le?vperm
$out1,$out1,$out1,$leperm
2790 stvx_u
$out0,$x00,$out # store output
2791 vxor
$tmp,$out2,$twk3 # last block prep for stealing
2792 le?vperm
$out2,$out2,$out2,$leperm
2793 stvx_u
$out1,$x10,$out
2794 stvx_u
$out2,$x20,$out
2796 bne Lxts_enc6x_steal
2801 vxor
$out0,$in4,$twk0
2802 vxor
$out1,$in5,$twk1
2803 vxor
$out2,$out2,$out2
2804 vxor
$out3,$out3,$out3
2805 vxor
$out4,$out4,$out4
2809 le?vperm
$out0,$out0,$out0,$leperm
2810 vmr
$twk0,$twk2 # unused tweak
2811 vxor
$tmp,$out1,$twk2 # last block prep for stealing
2812 le?vperm
$out1,$out1,$out1,$leperm
2813 stvx_u
$out0,$x00,$out # store output
2814 stvx_u
$out1,$x10,$out
2816 bne Lxts_enc6x_steal
2821 vxor
$out0,$in5,$twk0
2824 vcipher
$out0,$out0,v24
2825 lvx v24
,$x20,$key_ # round[3]
2826 addi
$key_,$key_,0x20
2828 vcipher
$out0,$out0,v25
2829 lvx v25
,$x10,$key_ # round[4]
2832 add
$inp,$inp,$taillen
2834 vcipher
$out0,$out0,v24
2837 vcipher
$out0,$out0,v25
2839 lvsr
$inpperm,0,$taillen
2840 vcipher
$out0,$out0,v26
2843 vcipher
$out0,$out0,v27
2845 addi
$key_,$sp,$FRAME+15 # rewind $key_
2846 vcipher
$out0,$out0,v28
2847 lvx v24
,$x00,$key_ # re-pre-load round[1]
2849 vcipher
$out0,$out0,v29
2850 lvx v25
,$x10,$key_ # re-pre-load round[2]
2851 vxor
$twk0,$twk0,v31
2853 le?vperm
$in0,$in0,$in0,$leperm
2854 vcipher
$out0,$out0,v30
2856 vperm
$in0,$in0,$in0,$inpperm
2857 vcipherlast
$out0,$out0,$twk0
2859 vmr
$twk0,$twk1 # unused tweak
2860 vxor
$tmp,$out0,$twk1 # last block prep for stealing
2861 le?vperm
$out0,$out0,$out0,$leperm
2862 stvx_u
$out0,$x00,$out # store output
2864 bne Lxts_enc6x_steal
2872 add
$inp,$inp,$taillen
2875 lvsr
$inpperm,0,$taillen # $in5 is no more
2876 le?vperm
$in0,$in0,$in0,$leperm
2877 vperm
$in0,$in0,$in0,$inpperm
2878 vxor
$tmp,$tmp,$twk0
2880 vxor
$in0,$in0,$twk0
2881 vxor
$out0,$out0,$out0
2883 vperm
$out0,$out0,$out1,$inpperm
2884 vsel
$out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2889 Loop_xts_enc6x_steal
:
2892 bdnz Loop_xts_enc6x_steal
2896 b Loop_xts_enc1x
# one more time...
2903 vxor
$tweak,$twk0,$rndkey0
2904 le?vperm
$tweak,$tweak,$tweak,$leperm
2905 stvx_u
$tweak,0,$ivp
2911 stvx
$seven,r10
,$sp # wipe copies of round keys
2929 lvx v20
,r10
,$sp # ABI says so
2951 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
2952 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
2953 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
2954 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
2955 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
2956 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
2957 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2960 .byte
0,12,0x04,1,0x80,6,6,0
2965 vcipher
$out0,$out0,v24
2966 vcipher
$out1,$out1,v24
2967 vcipher
$out2,$out2,v24
2968 vcipher
$out3,$out3,v24
2969 vcipher
$out4,$out4,v24
2970 lvx v24
,$x20,$key_ # round[3]
2971 addi
$key_,$key_,0x20
2973 vcipher
$out0,$out0,v25
2974 vcipher
$out1,$out1,v25
2975 vcipher
$out2,$out2,v25
2976 vcipher
$out3,$out3,v25
2977 vcipher
$out4,$out4,v25
2978 lvx v25
,$x10,$key_ # round[4]
2979 bdnz _aesp8_xts_enc5x
2981 add
$inp,$inp,$taillen
2983 vcipher
$out0,$out0,v24
2984 vcipher
$out1,$out1,v24
2985 vcipher
$out2,$out2,v24
2986 vcipher
$out3,$out3,v24
2987 vcipher
$out4,$out4,v24
2990 vcipher
$out0,$out0,v25
2991 vcipher
$out1,$out1,v25
2992 vcipher
$out2,$out2,v25
2993 vcipher
$out3,$out3,v25
2994 vcipher
$out4,$out4,v25
2995 vxor
$twk0,$twk0,v31
2997 vcipher
$out0,$out0,v26
2998 lvsr
$inpperm,r0
,$taillen # $in5 is no more
2999 vcipher
$out1,$out1,v26
3000 vcipher
$out2,$out2,v26
3001 vcipher
$out3,$out3,v26
3002 vcipher
$out4,$out4,v26
3005 vcipher
$out0,$out0,v27
3007 vcipher
$out1,$out1,v27
3008 vcipher
$out2,$out2,v27
3009 vcipher
$out3,$out3,v27
3010 vcipher
$out4,$out4,v27
3013 addi
$key_,$sp,$FRAME+15 # rewind $key_
3014 vcipher
$out0,$out0,v28
3015 vcipher
$out1,$out1,v28
3016 vcipher
$out2,$out2,v28
3017 vcipher
$out3,$out3,v28
3018 vcipher
$out4,$out4,v28
3019 lvx v24
,$x00,$key_ # re-pre-load round[1]
3022 vcipher
$out0,$out0,v29
3023 le?vperm
$in0,$in0,$in0,$leperm
3024 vcipher
$out1,$out1,v29
3025 vcipher
$out2,$out2,v29
3026 vcipher
$out3,$out3,v29
3027 vcipher
$out4,$out4,v29
3028 lvx v25
,$x10,$key_ # re-pre-load round[2]
3031 vcipher
$out0,$out0,v30
3032 vperm
$in0,$in0,$in0,$inpperm
3033 vcipher
$out1,$out1,v30
3034 vcipher
$out2,$out2,v30
3035 vcipher
$out3,$out3,v30
3036 vcipher
$out4,$out4,v30
3038 vcipherlast
$out0,$out0,$twk0
3039 vcipherlast
$out1,$out1,$in1
3040 vcipherlast
$out2,$out2,$in2
3041 vcipherlast
$out3,$out3,$in3
3042 vcipherlast
$out4,$out4,$in4
3045 .byte
0,12,0x14,0,0,0,0,0
3048 _aesp8_xts_decrypt6x
:
3049 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3051 li r7
,`$FRAME+8*16+15`
3052 li r3
,`$FRAME+8*16+31`
3053 $PUSH r11
,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3054 stvx v20
,r7
,$sp # ABI says so
3077 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3079 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
3081 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
3083 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
3085 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
3087 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
3089 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
3093 subi
$rounds,$rounds,3 # -4 in total
3095 lvx
$rndkey0,$x00,$key1 # load key schedule
3097 addi
$key1,$key1,0x20
3099 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
3100 addi
$key_,$sp,$FRAME+15
3104 ?vperm v24
,v30
,v31
,$keyperm
3106 addi
$key1,$key1,0x20
3107 stvx v24
,$x00,$key_ # off-load round[1]
3108 ?vperm v25
,v31
,v30
,$keyperm
3110 stvx v25
,$x10,$key_ # off-load round[2]
3111 addi
$key_,$key_,0x20
3112 bdnz Load_xts_dec_key
3115 ?vperm v24
,v30
,v31
,$keyperm
3117 stvx v24
,$x00,$key_ # off-load round[3]
3118 ?vperm v25
,v31
,v26
,$keyperm
3120 stvx v25
,$x10,$key_ # off-load round[4]
3121 addi
$key_,$sp,$FRAME+15 # rewind $key_
3122 ?vperm v26
,v26
,v27
,$keyperm
3124 ?vperm v27
,v27
,v28
,$keyperm
3126 ?vperm v28
,v28
,v29
,$keyperm
3128 ?vperm v29
,v29
,v30
,$keyperm
3129 lvx
$twk5,$x70,$key1 # borrow $twk5
3130 ?vperm v30
,v30
,v31
,$keyperm
3131 lvx v24
,$x00,$key_ # pre-load round[1]
3132 ?vperm v31
,v31
,$twk5,$keyperm
3133 lvx v25
,$x10,$key_ # pre-load round[2]
3135 vperm
$in0,$inout,$inptail,$inpperm
3136 subi
$inp,$inp,31 # undo "caller"
3137 vxor
$twk0,$tweak,$rndkey0
3138 vsrab
$tmp,$tweak,$seven # next tweak value
3139 vaddubm
$tweak,$tweak,$tweak
3140 vsldoi
$tmp,$tmp,$tmp,15
3141 vand
$tmp,$tmp,$eighty7
3142 vxor
$out0,$in0,$twk0
3143 vxor
$tweak,$tweak,$tmp
3145 lvx_u
$in1,$x10,$inp
3146 vxor
$twk1,$tweak,$rndkey0
3147 vsrab
$tmp,$tweak,$seven # next tweak value
3148 vaddubm
$tweak,$tweak,$tweak
3149 vsldoi
$tmp,$tmp,$tmp,15
3150 le?vperm
$in1,$in1,$in1,$leperm
3151 vand
$tmp,$tmp,$eighty7
3152 vxor
$out1,$in1,$twk1
3153 vxor
$tweak,$tweak,$tmp
3155 lvx_u
$in2,$x20,$inp
3156 andi
. $taillen,$len,15
3157 vxor
$twk2,$tweak,$rndkey0
3158 vsrab
$tmp,$tweak,$seven # next tweak value
3159 vaddubm
$tweak,$tweak,$tweak
3160 vsldoi
$tmp,$tmp,$tmp,15
3161 le?vperm
$in2,$in2,$in2,$leperm
3162 vand
$tmp,$tmp,$eighty7
3163 vxor
$out2,$in2,$twk2
3164 vxor
$tweak,$tweak,$tmp
3166 lvx_u
$in3,$x30,$inp
3167 sub $len,$len,$taillen
3168 vxor
$twk3,$tweak,$rndkey0
3169 vsrab
$tmp,$tweak,$seven # next tweak value
3170 vaddubm
$tweak,$tweak,$tweak
3171 vsldoi
$tmp,$tmp,$tmp,15
3172 le?vperm
$in3,$in3,$in3,$leperm
3173 vand
$tmp,$tmp,$eighty7
3174 vxor
$out3,$in3,$twk3
3175 vxor
$tweak,$tweak,$tmp
3177 lvx_u
$in4,$x40,$inp
3179 vxor
$twk4,$tweak,$rndkey0
3180 vsrab
$tmp,$tweak,$seven # next tweak value
3181 vaddubm
$tweak,$tweak,$tweak
3182 vsldoi
$tmp,$tmp,$tmp,15
3183 le?vperm
$in4,$in4,$in4,$leperm
3184 vand
$tmp,$tmp,$eighty7
3185 vxor
$out4,$in4,$twk4
3186 vxor
$tweak,$tweak,$tmp
3188 lvx_u
$in5,$x50,$inp
3190 vxor
$twk5,$tweak,$rndkey0
3191 vsrab
$tmp,$tweak,$seven # next tweak value
3192 vaddubm
$tweak,$tweak,$tweak
3193 vsldoi
$tmp,$tmp,$tmp,15
3194 le?vperm
$in5,$in5,$in5,$leperm
3195 vand
$tmp,$tmp,$eighty7
3196 vxor
$out5,$in5,$twk5
3197 vxor
$tweak,$tweak,$tmp
3199 vxor v31
,v31
,$rndkey0
3205 vncipher
$out0,$out0,v24
3206 vncipher
$out1,$out1,v24
3207 vncipher
$out2,$out2,v24
3208 vncipher
$out3,$out3,v24
3209 vncipher
$out4,$out4,v24
3210 vncipher
$out5,$out5,v24
3211 lvx v24
,$x20,$key_ # round[3]
3212 addi
$key_,$key_,0x20
3214 vncipher
$out0,$out0,v25
3215 vncipher
$out1,$out1,v25
3216 vncipher
$out2,$out2,v25
3217 vncipher
$out3,$out3,v25
3218 vncipher
$out4,$out4,v25
3219 vncipher
$out5,$out5,v25
3220 lvx v25
,$x10,$key_ # round[4]
3223 subic
$len,$len,96 # $len-=96
3224 vxor
$in0,$twk0,v31
# xor with last round key
3225 vncipher
$out0,$out0,v24
3226 vncipher
$out1,$out1,v24
3227 vsrab
$tmp,$tweak,$seven # next tweak value
3228 vxor
$twk0,$tweak,$rndkey0
3229 vaddubm
$tweak,$tweak,$tweak
3230 vncipher
$out2,$out2,v24
3231 vncipher
$out3,$out3,v24
3232 vsldoi
$tmp,$tmp,$tmp,15
3233 vncipher
$out4,$out4,v24
3234 vncipher
$out5,$out5,v24
3236 subfe
. r0
,r0
,r0
# borrow?-1:0
3237 vand
$tmp,$tmp,$eighty7
3238 vncipher
$out0,$out0,v25
3239 vncipher
$out1,$out1,v25
3240 vxor
$tweak,$tweak,$tmp
3241 vncipher
$out2,$out2,v25
3242 vncipher
$out3,$out3,v25
3244 vsrab
$tmp,$tweak,$seven # next tweak value
3245 vxor
$twk1,$tweak,$rndkey0
3246 vncipher
$out4,$out4,v25
3247 vncipher
$out5,$out5,v25
3250 vaddubm
$tweak,$tweak,$tweak
3251 vsldoi
$tmp,$tmp,$tmp,15
3252 vncipher
$out0,$out0,v26
3253 vncipher
$out1,$out1,v26
3254 vand
$tmp,$tmp,$eighty7
3255 vncipher
$out2,$out2,v26
3256 vncipher
$out3,$out3,v26
3257 vxor
$tweak,$tweak,$tmp
3258 vncipher
$out4,$out4,v26
3259 vncipher
$out5,$out5,v26
3261 add
$inp,$inp,r0
# $inp is adjusted in such
3262 # way that at exit from the
3263 # loop inX-in5 are loaded
3266 vsrab
$tmp,$tweak,$seven # next tweak value
3267 vxor
$twk2,$tweak,$rndkey0
3268 vaddubm
$tweak,$tweak,$tweak
3269 vncipher
$out0,$out0,v27
3270 vncipher
$out1,$out1,v27
3271 vsldoi
$tmp,$tmp,$tmp,15
3272 vncipher
$out2,$out2,v27
3273 vncipher
$out3,$out3,v27
3274 vand
$tmp,$tmp,$eighty7
3275 vncipher
$out4,$out4,v27
3276 vncipher
$out5,$out5,v27
3278 addi
$key_,$sp,$FRAME+15 # rewind $key_
3279 vxor
$tweak,$tweak,$tmp
3280 vncipher
$out0,$out0,v28
3281 vncipher
$out1,$out1,v28
3283 vsrab
$tmp,$tweak,$seven # next tweak value
3284 vxor
$twk3,$tweak,$rndkey0
3285 vncipher
$out2,$out2,v28
3286 vncipher
$out3,$out3,v28
3287 vaddubm
$tweak,$tweak,$tweak
3288 vsldoi
$tmp,$tmp,$tmp,15
3289 vncipher
$out4,$out4,v28
3290 vncipher
$out5,$out5,v28
3291 lvx v24
,$x00,$key_ # re-pre-load round[1]
3292 vand
$tmp,$tmp,$eighty7
3294 vncipher
$out0,$out0,v29
3295 vncipher
$out1,$out1,v29
3296 vxor
$tweak,$tweak,$tmp
3297 vncipher
$out2,$out2,v29
3298 vncipher
$out3,$out3,v29
3300 vsrab
$tmp,$tweak,$seven # next tweak value
3301 vxor
$twk4,$tweak,$rndkey0
3302 vncipher
$out4,$out4,v29
3303 vncipher
$out5,$out5,v29
3304 lvx v25
,$x10,$key_ # re-pre-load round[2]
3305 vaddubm
$tweak,$tweak,$tweak
3306 vsldoi
$tmp,$tmp,$tmp,15
3308 vncipher
$out0,$out0,v30
3309 vncipher
$out1,$out1,v30
3310 vand
$tmp,$tmp,$eighty7
3311 vncipher
$out2,$out2,v30
3312 vncipher
$out3,$out3,v30
3313 vxor
$tweak,$tweak,$tmp
3314 vncipher
$out4,$out4,v30
3315 vncipher
$out5,$out5,v30
3317 vsrab
$tmp,$tweak,$seven # next tweak value
3318 vxor
$twk5,$tweak,$rndkey0
3320 vncipherlast
$out0,$out0,$in0
3321 lvx_u
$in0,$x00,$inp # load next input block
3322 vaddubm
$tweak,$tweak,$tweak
3323 vsldoi
$tmp,$tmp,$tmp,15
3324 vncipherlast
$out1,$out1,$in1
3325 lvx_u
$in1,$x10,$inp
3326 vncipherlast
$out2,$out2,$in2
3327 le?vperm
$in0,$in0,$in0,$leperm
3328 lvx_u
$in2,$x20,$inp
3329 vand
$tmp,$tmp,$eighty7
3330 vncipherlast
$out3,$out3,$in3
3331 le?vperm
$in1,$in1,$in1,$leperm
3332 lvx_u
$in3,$x30,$inp
3333 vncipherlast
$out4,$out4,$in4
3334 le?vperm
$in2,$in2,$in2,$leperm
3335 lvx_u
$in4,$x40,$inp
3336 vxor
$tweak,$tweak,$tmp
3337 vncipherlast
$out5,$out5,$in5
3338 le?vperm
$in3,$in3,$in3,$leperm
3339 lvx_u
$in5,$x50,$inp
3341 le?vperm
$in4,$in4,$in4,$leperm
3342 le?vperm
$in5,$in5,$in5,$leperm
3344 le?vperm
$out0,$out0,$out0,$leperm
3345 le?vperm
$out1,$out1,$out1,$leperm
3346 stvx_u
$out0,$x00,$out # store output
3347 vxor
$out0,$in0,$twk0
3348 le?vperm
$out2,$out2,$out2,$leperm
3349 stvx_u
$out1,$x10,$out
3350 vxor
$out1,$in1,$twk1
3351 le?vperm
$out3,$out3,$out3,$leperm
3352 stvx_u
$out2,$x20,$out
3353 vxor
$out2,$in2,$twk2
3354 le?vperm
$out4,$out4,$out4,$leperm
3355 stvx_u
$out3,$x30,$out
3356 vxor
$out3,$in3,$twk3
3357 le?vperm
$out5,$out5,$out5,$leperm
3358 stvx_u
$out4,$x40,$out
3359 vxor
$out4,$in4,$twk4
3360 stvx_u
$out5,$x50,$out
3361 vxor
$out5,$in5,$twk5
3365 beq Loop_xts_dec6x
# did $len-=96 borrow?
3367 addic
. $len,$len,0x60
3374 blt Lxts_dec6x_three
3379 vxor
$out0,$in1,$twk0
3380 vxor
$out1,$in2,$twk1
3381 vxor
$out2,$in3,$twk2
3382 vxor
$out3,$in4,$twk3
3383 vxor
$out4,$in5,$twk4
3387 le?vperm
$out0,$out0,$out0,$leperm
3388 vmr
$twk0,$twk5 # unused tweak
3389 vxor
$twk1,$tweak,$rndkey0
3390 le?vperm
$out1,$out1,$out1,$leperm
3391 stvx_u
$out0,$x00,$out # store output
3392 vxor
$out0,$in0,$twk1
3393 le?vperm
$out2,$out2,$out2,$leperm
3394 stvx_u
$out1,$x10,$out
3395 le?vperm
$out3,$out3,$out3,$leperm
3396 stvx_u
$out2,$x20,$out
3397 le?vperm
$out4,$out4,$out4,$leperm
3398 stvx_u
$out3,$x30,$out
3399 stvx_u
$out4,$x40,$out
3401 bne Lxts_dec6x_steal
3406 vxor
$out0,$in2,$twk0
3407 vxor
$out1,$in3,$twk1
3408 vxor
$out2,$in4,$twk2
3409 vxor
$out3,$in5,$twk3
3410 vxor
$out4,$out4,$out4
3414 le?vperm
$out0,$out0,$out0,$leperm
3415 vmr
$twk0,$twk4 # unused tweak
3417 le?vperm
$out1,$out1,$out1,$leperm
3418 stvx_u
$out0,$x00,$out # store output
3419 vxor
$out0,$in0,$twk5
3420 le?vperm
$out2,$out2,$out2,$leperm
3421 stvx_u
$out1,$x10,$out
3422 le?vperm
$out3,$out3,$out3,$leperm
3423 stvx_u
$out2,$x20,$out
3424 stvx_u
$out3,$x30,$out
3426 bne Lxts_dec6x_steal
3431 vxor
$out0,$in3,$twk0
3432 vxor
$out1,$in4,$twk1
3433 vxor
$out2,$in5,$twk2
3434 vxor
$out3,$out3,$out3
3435 vxor
$out4,$out4,$out4
3439 le?vperm
$out0,$out0,$out0,$leperm
3440 vmr
$twk0,$twk3 # unused tweak
3442 le?vperm
$out1,$out1,$out1,$leperm
3443 stvx_u
$out0,$x00,$out # store output
3444 vxor
$out0,$in0,$twk4
3445 le?vperm
$out2,$out2,$out2,$leperm
3446 stvx_u
$out1,$x10,$out
3447 stvx_u
$out2,$x20,$out
3449 bne Lxts_dec6x_steal
3454 vxor
$out0,$in4,$twk0
3455 vxor
$out1,$in5,$twk1
3456 vxor
$out2,$out2,$out2
3457 vxor
$out3,$out3,$out3
3458 vxor
$out4,$out4,$out4
3462 le?vperm
$out0,$out0,$out0,$leperm
3463 vmr
$twk0,$twk2 # unused tweak
3465 le?vperm
$out1,$out1,$out1,$leperm
3466 stvx_u
$out0,$x00,$out # store output
3467 vxor
$out0,$in0,$twk3
3468 stvx_u
$out1,$x10,$out
3470 bne Lxts_dec6x_steal
3475 vxor
$out0,$in5,$twk0
3478 vncipher
$out0,$out0,v24
3479 lvx v24
,$x20,$key_ # round[3]
3480 addi
$key_,$key_,0x20
3482 vncipher
$out0,$out0,v25
3483 lvx v25
,$x10,$key_ # round[4]
3487 vncipher
$out0,$out0,v24
3491 vncipher
$out0,$out0,v25
3494 vncipher
$out0,$out0,v26
3497 vncipher
$out0,$out0,v27
3499 addi
$key_,$sp,$FRAME+15 # rewind $key_
3500 vncipher
$out0,$out0,v28
3501 lvx v24
,$x00,$key_ # re-pre-load round[1]
3503 vncipher
$out0,$out0,v29
3504 lvx v25
,$x10,$key_ # re-pre-load round[2]
3505 vxor
$twk0,$twk0,v31
3507 le?vperm
$in0,$in0,$in0,$leperm
3508 vncipher
$out0,$out0,v30
3511 vncipherlast
$out0,$out0,$twk0
3513 vmr
$twk0,$twk1 # unused tweak
3515 le?vperm
$out0,$out0,$out0,$leperm
3516 stvx_u
$out0,$x00,$out # store output
3518 vxor
$out0,$in0,$twk2
3519 bne Lxts_dec6x_steal
3528 le?vperm
$in0,$in0,$in0,$leperm
3529 vxor
$out0,$in0,$twk1
3531 vncipher
$out0,$out0,v24
3532 lvx v24
,$x20,$key_ # round[3]
3533 addi
$key_,$key_,0x20
3535 vncipher
$out0,$out0,v25
3536 lvx v25
,$x10,$key_ # round[4]
3537 bdnz Lxts_dec6x_steal
3539 add
$inp,$inp,$taillen
3540 vncipher
$out0,$out0,v24
3543 vncipher
$out0,$out0,v25
3546 vncipher
$out0,$out0,v26
3548 lvsr
$inpperm,0,$taillen # $in5 is no more
3549 vncipher
$out0,$out0,v27
3551 addi
$key_,$sp,$FRAME+15 # rewind $key_
3552 vncipher
$out0,$out0,v28
3553 lvx v24
,$x00,$key_ # re-pre-load round[1]
3555 vncipher
$out0,$out0,v29
3556 lvx v25
,$x10,$key_ # re-pre-load round[2]
3557 vxor
$twk1,$twk1,v31
3559 le?vperm
$in0,$in0,$in0,$leperm
3560 vncipher
$out0,$out0,v30
3562 vperm
$in0,$in0,$in0,$inpperm
3563 vncipherlast
$tmp,$out0,$twk1
3565 le?vperm
$out0,$tmp,$tmp,$leperm
3566 le?stvx_u
$out0,0,$out
3567 be?stvx_u
$tmp,0,$out
3569 vxor
$out0,$out0,$out0
3571 vperm
$out0,$out0,$out1,$inpperm
3572 vsel
$out0,$in0,$tmp,$out0
3573 vxor
$out0,$out0,$twk0
3577 Loop_xts_dec6x_steal
:
3580 bdnz Loop_xts_dec6x_steal
3584 b Loop_xts_dec1x
# one more time...
3591 vxor
$tweak,$twk0,$rndkey0
3592 le?vperm
$tweak,$tweak,$tweak,$leperm
3593 stvx_u
$tweak,0,$ivp
3599 stvx
$seven,r10
,$sp # wipe copies of round keys
3617 lvx v20
,r10
,$sp # ABI says so
3639 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
3640 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
3641 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
3642 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
3643 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
3644 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
3645 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3648 .byte
0,12,0x04,1,0x80,6,6,0
3653 vncipher
$out0,$out0,v24
3654 vncipher
$out1,$out1,v24
3655 vncipher
$out2,$out2,v24
3656 vncipher
$out3,$out3,v24
3657 vncipher
$out4,$out4,v24
3658 lvx v24
,$x20,$key_ # round[3]
3659 addi
$key_,$key_,0x20
3661 vncipher
$out0,$out0,v25
3662 vncipher
$out1,$out1,v25
3663 vncipher
$out2,$out2,v25
3664 vncipher
$out3,$out3,v25
3665 vncipher
$out4,$out4,v25
3666 lvx v25
,$x10,$key_ # round[4]
3667 bdnz _aesp8_xts_dec5x
3670 vncipher
$out0,$out0,v24
3671 vncipher
$out1,$out1,v24
3672 vncipher
$out2,$out2,v24
3673 vncipher
$out3,$out3,v24
3674 vncipher
$out4,$out4,v24
3678 vncipher
$out0,$out0,v25
3679 vncipher
$out1,$out1,v25
3680 vncipher
$out2,$out2,v25
3681 vncipher
$out3,$out3,v25
3682 vncipher
$out4,$out4,v25
3683 vxor
$twk0,$twk0,v31
3686 vncipher
$out0,$out0,v26
3687 vncipher
$out1,$out1,v26
3688 vncipher
$out2,$out2,v26
3689 vncipher
$out3,$out3,v26
3690 vncipher
$out4,$out4,v26
3693 vncipher
$out0,$out0,v27
3695 vncipher
$out1,$out1,v27
3696 vncipher
$out2,$out2,v27
3697 vncipher
$out3,$out3,v27
3698 vncipher
$out4,$out4,v27
3701 addi
$key_,$sp,$FRAME+15 # rewind $key_
3702 vncipher
$out0,$out0,v28
3703 vncipher
$out1,$out1,v28
3704 vncipher
$out2,$out2,v28
3705 vncipher
$out3,$out3,v28
3706 vncipher
$out4,$out4,v28
3707 lvx v24
,$x00,$key_ # re-pre-load round[1]
3710 vncipher
$out0,$out0,v29
3711 le?vperm
$in0,$in0,$in0,$leperm
3712 vncipher
$out1,$out1,v29
3713 vncipher
$out2,$out2,v29
3714 vncipher
$out3,$out3,v29
3715 vncipher
$out4,$out4,v29
3716 lvx v25
,$x10,$key_ # re-pre-load round[2]
3719 vncipher
$out0,$out0,v30
3720 vncipher
$out1,$out1,v30
3721 vncipher
$out2,$out2,v30
3722 vncipher
$out3,$out3,v30
3723 vncipher
$out4,$out4,v30
3725 vncipherlast
$out0,$out0,$twk0
3726 vncipherlast
$out1,$out1,$in1
3727 vncipherlast
$out2,$out2,$in2
3728 vncipherlast
$out3,$out3,$in3
3729 vncipherlast
$out4,$out4,$in4
3733 .byte
0,12,0x14,0,0,0,0,0
3738 foreach(split("\n",$code)) {
3739 s/\`([^\`]*)\`/eval($1)/geo;
3741 # constants table endian-specific conversion
3742 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3746 # convert to endian-agnostic format
3748 foreach (split(/,\s*/,$2)) {
3749 my $l = /^0/?
oct:int;
3750 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3753 @bytes = map(/^0/?
oct:int,split(/,\s*/,$2));
3756 # little-endian conversion
3757 if ($flavour =~ /le$/o) {
3758 SWITCH
: for($conv) {
3759 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3760 /\?rev/ && do { @bytes=reverse(@bytes); last; };
3765 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3768 $consts=0 if (m/Lconsts:/o); # end of table
3770 # instructions prefixed with '?' are endian-specific and need
3771 # to be adjusted accordingly...
3772 if ($flavour =~ /le$/o) { # little-endian
3777 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3778 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3779 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3780 } else { # big-endian