3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # This module implements support for AES instructions as per PowerISA
11 # specification version 2.07, first implemented by POWER8 processor.
12 # The module is endian-agnostic in sense that it supports both big-
13 # and little-endian cases. Data alignment in parallelizable modes is
14 # handled with VSX loads and stores, which implies MSR.VSX flag being
15 # set. It should also be noted that ISA specification doesn't prohibit
16 # alignment exceptions for these instructions on page boundaries.
17 # Initially alignment was handled in pure AltiVec/VMX way [when data
18 # is aligned programmatically, which in turn guarantees exception-
19 # free execution], but it turned to hamper performance when vcipher
20 # instructions are interleaved. It's reckoned that eventual
21 # misalignment penalties at page boundaries are in average lower
22 # than additional overhead in pure AltiVec approach.
26 if ($flavour =~ /64/) {
34 } elsif ($flavour =~ /32/) {
42 } else { die "nonsense $flavour"; }
44 $LITTLE_ENDIAN = ($flavour=~/le$/) ?
$SIZE_T : 0;
46 $0 =~ m/(.*[\/\\])[^\
/\\]+$/; $dir=$1;
47 ( $xlate="${dir}ppc-xlate.pl" and -f
$xlate ) or
48 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f
$xlate) or
49 die "can't locate ppc-xlate.pl";
51 open STDOUT
,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
59 #########################################################################
60 {{{ # Key setup procedures #
61 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
62 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
63 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
72 .long
0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
73 .long
0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
74 .long
0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
79 mflr
$ptr #vvvvv "distance between . and rcon
84 .byte
0,12,0x14,0,0,0,0,0
85 .asciz
"AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
87 .globl
.${prefix
}_set_encrypt_key
90 $PUSH r11
,$LRSAVE($sp)
94 beq
- Lenc_key_abort
# if ($inp==0) return -1;
96 beq
- Lenc_key_abort
# if ($out==0) return -1;
114 addi
$inp,$inp,15 # 15 is not typo
115 lvsr
$key,0,r9
# borrow $key
119 le?vspltisb
$mask,0x0f # borrow $mask
121 le?vxor
$key,$key,$mask # adjust for byte swap
124 vperm
$in0,$in0,$in1,$key # align [and byte swap in LE]
126 vxor
$zero,$zero,$zero
129 ?lvsr
$outperm,0,$out
132 ?vperm
$outmask,$zero,$outmask,$outperm
142 vperm
$key,$in0,$in0,$mask # rotate-n-splat
143 vsldoi
$tmp,$zero,$in0,12 # >>32
144 vperm
$outtail,$in0,$in0,$outperm # rotate
145 vsel
$stage,$outhead,$outtail,$outmask
146 vmr
$outhead,$outtail
147 vcipherlast
$key,$key,$rcon
152 vsldoi
$tmp,$zero,$tmp,12 # >>32
154 vsldoi
$tmp,$zero,$tmp,12 # >>32
156 vadduwm
$rcon,$rcon,$rcon
160 lvx
$rcon,0,$ptr # last two round keys
162 vperm
$key,$in0,$in0,$mask # rotate-n-splat
163 vsldoi
$tmp,$zero,$in0,12 # >>32
164 vperm
$outtail,$in0,$in0,$outperm # rotate
165 vsel
$stage,$outhead,$outtail,$outmask
166 vmr
$outhead,$outtail
167 vcipherlast
$key,$key,$rcon
172 vsldoi
$tmp,$zero,$tmp,12 # >>32
174 vsldoi
$tmp,$zero,$tmp,12 # >>32
176 vadduwm
$rcon,$rcon,$rcon
179 vperm
$key,$in0,$in0,$mask # rotate-n-splat
180 vsldoi
$tmp,$zero,$in0,12 # >>32
181 vperm
$outtail,$in0,$in0,$outperm # rotate
182 vsel
$stage,$outhead,$outtail,$outmask
183 vmr
$outhead,$outtail
184 vcipherlast
$key,$key,$rcon
189 vsldoi
$tmp,$zero,$tmp,12 # >>32
191 vsldoi
$tmp,$zero,$tmp,12 # >>32
194 vperm
$outtail,$in0,$in0,$outperm # rotate
195 vsel
$stage,$outhead,$outtail,$outmask
196 vmr
$outhead,$outtail
199 addi
$inp,$out,15 # 15 is not typo
209 vperm
$outtail,$in0,$in0,$outperm # rotate
210 vsel
$stage,$outhead,$outtail,$outmask
211 vmr
$outhead,$outtail
214 vperm
$in1,$in1,$tmp,$key # align [and byte swap in LE]
215 vspltisb
$key,8 # borrow $key
217 vsububm
$mask,$mask,$key # adjust the mask
220 vperm
$key,$in1,$in1,$mask # roate-n-splat
221 vsldoi
$tmp,$zero,$in0,12 # >>32
222 vcipherlast
$key,$key,$rcon
225 vsldoi
$tmp,$zero,$tmp,12 # >>32
227 vsldoi
$tmp,$zero,$tmp,12 # >>32
230 vsldoi
$stage,$zero,$in1,8
233 vsldoi
$in1,$zero,$in1,12 # >>32
234 vadduwm
$rcon,$rcon,$rcon
238 vsldoi
$stage,$stage,$in0,8
240 vperm
$key,$in1,$in1,$mask # rotate-n-splat
241 vsldoi
$tmp,$zero,$in0,12 # >>32
242 vperm
$outtail,$stage,$stage,$outperm # rotate
243 vsel
$stage,$outhead,$outtail,$outmask
244 vmr
$outhead,$outtail
245 vcipherlast
$key,$key,$rcon
249 vsldoi
$stage,$in0,$in1,8
251 vsldoi
$tmp,$zero,$tmp,12 # >>32
252 vperm
$outtail,$stage,$stage,$outperm # rotate
253 vsel
$stage,$outhead,$outtail,$outmask
254 vmr
$outhead,$outtail
256 vsldoi
$tmp,$zero,$tmp,12 # >>32
263 vsldoi
$in1,$zero,$in1,12 # >>32
264 vadduwm
$rcon,$rcon,$rcon
268 vperm
$outtail,$in0,$in0,$outperm # rotate
269 vsel
$stage,$outhead,$outtail,$outmask
270 vmr
$outhead,$outtail
272 addi
$inp,$out,15 # 15 is not typo
285 vperm
$outtail,$in0,$in0,$outperm # rotate
286 vsel
$stage,$outhead,$outtail,$outmask
287 vmr
$outhead,$outtail
290 vperm
$in1,$in1,$tmp,$key # align [and byte swap in LE]
294 vperm
$key,$in1,$in1,$mask # rotate-n-splat
295 vsldoi
$tmp,$zero,$in0,12 # >>32
296 vperm
$outtail,$in1,$in1,$outperm # rotate
297 vsel
$stage,$outhead,$outtail,$outmask
298 vmr
$outhead,$outtail
299 vcipherlast
$key,$key,$rcon
304 vsldoi
$tmp,$zero,$tmp,12 # >>32
306 vsldoi
$tmp,$zero,$tmp,12 # >>32
308 vadduwm
$rcon,$rcon,$rcon
310 vperm
$outtail,$in0,$in0,$outperm # rotate
311 vsel
$stage,$outhead,$outtail,$outmask
312 vmr
$outhead,$outtail
314 addi
$inp,$out,15 # 15 is not typo
318 vspltw
$key,$in0,3 # just splat
319 vsldoi
$tmp,$zero,$in1,12 # >>32
323 vsldoi
$tmp,$zero,$tmp,12 # >>32
325 vsldoi
$tmp,$zero,$tmp,12 # >>32
333 lvx
$in1,0,$inp # redundant in aligned case
334 vsel
$in1,$outhead,$in1,$outmask
344 .byte
0,12,0x14,1,0,0,3,0
346 .size
.${prefix
}_set_encrypt_key
,.-.${prefix
}_set_encrypt_key
348 .globl
.${prefix
}_set_decrypt_key
349 $STU $sp,-$FRAME($sp)
351 $PUSH r10
,$FRAME+$LRSAVE($sp)
359 subi
$inp,$out,240 # first round key
360 srwi
$rounds,$rounds,1
361 add
$out,$inp,$cnt # last round key
385 xor r3
,r3
,r3
# return value
390 .byte
0,12,4,1,0x80,0,3,0
392 .size
.${prefix
}_set_decrypt_key
,.-.${prefix
}_set_decrypt_key
395 #########################################################################
396 {{{ # Single block en- and decrypt procedures #
399 my $n = $dir eq "de" ?
"n" : "";
400 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
403 .globl
.${prefix
}_
${dir
}crypt
404 lwz
$rounds,240($key)
407 li
$idx,15 # 15 is not typo
413 lvsl v2
,0,$inp # inpperm
415 ?lvsl v3
,0,r11
# outperm
418 vperm v0
,v0
,v1
,v2
# align [and byte swap in LE]
420 ?lvsl v5
,0,$key # keyperm
421 srwi
$rounds,$rounds,1
424 subi
$rounds,$rounds,1
425 ?vperm v1
,v1
,v2
,v5
# align round key
447 v
${n
}cipherlast v0
,v0
,v1
451 li
$idx,15 # 15 is not typo
452 ?vperm v2
,v1
,v2
,v3
# outmask
454 lvx v1
,0,$out # outhead
455 vperm v0
,v0
,v0
,v3
# rotate [and byte swap in LE]
465 .byte
0,12,0x14,0,0,0,3,0
467 .size
.${prefix
}_
${dir
}crypt,.-.${prefix
}_
${dir
}crypt
473 #########################################################################
474 {{{ # CBC en- and decrypt procedures #
475 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
476 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
477 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
480 .globl
.${prefix
}_cbc_encrypt
484 cmpwi
$enc,0 # test direction
490 vxor
$rndkey0,$rndkey0,$rndkey0
491 le?vspltisb
$tmp,0x0f
493 lvx
$ivec,0,$ivp # load [unaligned] iv
495 lvx
$inptail,$idx,$ivp
496 le?vxor
$inpperm,$inpperm,$tmp
497 vperm
$ivec,$ivec,$inptail,$inpperm
500 ?lvsl
$keyperm,0,$key # prepare for unaligned key
501 lwz
$rounds,240($key)
503 lvsr
$inpperm,0,r11
# prepare for unaligned load
505 addi
$inp,$inp,15 # 15 is not typo
506 le?vxor
$inpperm,$inpperm,$tmp
508 ?lvsr
$outperm,0,$out # prepare for unaligned store
511 ?vperm
$outmask,$rndkey0,$outmask,$outperm
512 le?vxor
$outperm,$outperm,$tmp
514 srwi
$rounds,$rounds,1
516 subi
$rounds,$rounds,1
524 subi
$len,$len,16 # len-=16
527 vperm
$inout,$inout,$inptail,$inpperm
528 lvx
$rndkey1,$idx,$key
530 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
531 vxor
$inout,$inout,$rndkey0
532 lvx
$rndkey0,$idx,$key
534 vxor
$inout,$inout,$ivec
537 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
538 vcipher
$inout,$inout,$rndkey1
539 lvx
$rndkey1,$idx,$key
541 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
542 vcipher
$inout,$inout,$rndkey0
543 lvx
$rndkey0,$idx,$key
547 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
548 vcipher
$inout,$inout,$rndkey1
549 lvx
$rndkey1,$idx,$key
551 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
552 vcipherlast
$ivec,$inout,$rndkey0
555 vperm
$tmp,$ivec,$ivec,$outperm
556 vsel
$inout,$outhead,$tmp,$outmask
567 bge _aesp8_cbc_decrypt8x
572 subi
$len,$len,16 # len-=16
575 vperm
$tmp,$tmp,$inptail,$inpperm
576 lvx
$rndkey1,$idx,$key
578 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
579 vxor
$inout,$tmp,$rndkey0
580 lvx
$rndkey0,$idx,$key
584 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
585 vncipher
$inout,$inout,$rndkey1
586 lvx
$rndkey1,$idx,$key
588 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
589 vncipher
$inout,$inout,$rndkey0
590 lvx
$rndkey0,$idx,$key
594 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
595 vncipher
$inout,$inout,$rndkey1
596 lvx
$rndkey1,$idx,$key
598 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
599 vncipherlast
$inout,$inout,$rndkey0
602 vxor
$inout,$inout,$ivec
604 vperm
$tmp,$inout,$inout,$outperm
605 vsel
$inout,$outhead,$tmp,$outmask
613 lvx
$inout,0,$out # redundant in aligned case
614 vsel
$inout,$outhead,$inout,$outmask
617 neg
$enc,$ivp # write [unaligned] iv
618 li
$idx,15 # 15 is not typo
619 vxor
$rndkey0,$rndkey0,$rndkey0
621 le?vspltisb
$tmp,0x0f
622 ?lvsl
$outperm,0,$enc
623 ?vperm
$outmask,$rndkey0,$outmask,$outperm
624 le?vxor
$outperm,$outperm,$tmp
626 vperm
$ivec,$ivec,$ivec,$outperm
627 vsel
$inout,$outhead,$ivec,$outmask
628 lvx
$inptail,$idx,$ivp
630 vsel
$inout,$ivec,$inptail,$outmask
631 stvx
$inout,$idx,$ivp
636 .byte
0,12,0x14,0,0,0,6,0
639 #########################################################################
640 {{ # Optimized CBC decrypt procedure #
642 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
643 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
644 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
645 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
646 # v26-v31 last 6 round keys
647 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
651 _aesp8_cbc_decrypt8x
:
652 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
653 li r10
,`$FRAME+8*16+15`
654 li r11
,`$FRAME+8*16+31`
655 stvx v20
,r10
,$sp # ABI says so
678 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
680 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
682 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
684 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
686 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
688 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
690 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
694 subi
$rounds,$rounds,3 # -4 in total
695 subi
$len,$len,128 # bias
697 lvx
$rndkey0,$x00,$key # load key schedule
701 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
702 addi
$key_,$sp,$FRAME+15
706 ?vperm v24
,v30
,v31
,$keyperm
709 stvx v24
,$x00,$key_ # off-load round[1]
710 ?vperm v25
,v31
,v30
,$keyperm
712 stvx v25
,$x10,$key_ # off-load round[2]
713 addi
$key_,$key_,0x20
714 bdnz Load_cbc_dec_key
717 ?vperm v24
,v30
,v31
,$keyperm
719 stvx v24
,$x00,$key_ # off-load round[3]
720 ?vperm v25
,v31
,v26
,$keyperm
722 stvx v25
,$x10,$key_ # off-load round[4]
723 addi
$key_,$sp,$FRAME+15 # rewind $key_
724 ?vperm v26
,v26
,v27
,$keyperm
726 ?vperm v27
,v27
,v28
,$keyperm
728 ?vperm v28
,v28
,v29
,$keyperm
730 ?vperm v29
,v29
,v30
,$keyperm
731 lvx
$out0,$x70,$key # borrow $out0
732 ?vperm v30
,v30
,v31
,$keyperm
733 lvx v24
,$x00,$key_ # pre-load round[1]
734 ?vperm v31
,v31
,$out0,$keyperm
735 lvx v25
,$x10,$key_ # pre-load round[2]
737 #lvx $inptail,0,$inp # "caller" already did this
738 #addi $inp,$inp,15 # 15 is not typo
739 subi
$inp,$inp,15 # undo "caller"
742 lvx_u
$in0,$x00,$inp # load first 8 "words"
743 le?lvsl
$inpperm,0,$idx
744 le?vspltisb
$tmp,0x0f
746 le?vxor
$inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
748 le?vperm
$in0,$in0,$in0,$inpperm
750 le?vperm
$in1,$in1,$in1,$inpperm
752 le?vperm
$in2,$in2,$in2,$inpperm
753 vxor
$out0,$in0,$rndkey0
755 le?vperm
$in3,$in3,$in3,$inpperm
756 vxor
$out1,$in1,$rndkey0
758 le?vperm
$in4,$in4,$in4,$inpperm
759 vxor
$out2,$in2,$rndkey0
762 le?vperm
$in5,$in5,$in5,$inpperm
763 vxor
$out3,$in3,$rndkey0
764 le?vperm
$in6,$in6,$in6,$inpperm
765 vxor
$out4,$in4,$rndkey0
766 le?vperm
$in7,$in7,$in7,$inpperm
767 vxor
$out5,$in5,$rndkey0
768 vxor
$out6,$in6,$rndkey0
769 vxor
$out7,$in7,$rndkey0
775 vncipher
$out0,$out0,v24
776 vncipher
$out1,$out1,v24
777 vncipher
$out2,$out2,v24
778 vncipher
$out3,$out3,v24
779 vncipher
$out4,$out4,v24
780 vncipher
$out5,$out5,v24
781 vncipher
$out6,$out6,v24
782 vncipher
$out7,$out7,v24
783 lvx v24
,$x20,$key_ # round[3]
784 addi
$key_,$key_,0x20
786 vncipher
$out0,$out0,v25
787 vncipher
$out1,$out1,v25
788 vncipher
$out2,$out2,v25
789 vncipher
$out3,$out3,v25
790 vncipher
$out4,$out4,v25
791 vncipher
$out5,$out5,v25
792 vncipher
$out6,$out6,v25
793 vncipher
$out7,$out7,v25
794 lvx v25
,$x10,$key_ # round[4]
797 subic
$len,$len,128 # $len-=128
798 vncipher
$out0,$out0,v24
799 vncipher
$out1,$out1,v24
800 vncipher
$out2,$out2,v24
801 vncipher
$out3,$out3,v24
802 vncipher
$out4,$out4,v24
803 vncipher
$out5,$out5,v24
804 vncipher
$out6,$out6,v24
805 vncipher
$out7,$out7,v24
807 subfe
. r0
,r0
,r0
# borrow?-1:0
808 vncipher
$out0,$out0,v25
809 vncipher
$out1,$out1,v25
810 vncipher
$out2,$out2,v25
811 vncipher
$out3,$out3,v25
812 vncipher
$out4,$out4,v25
813 vncipher
$out5,$out5,v25
814 vncipher
$out6,$out6,v25
815 vncipher
$out7,$out7,v25
818 vncipher
$out0,$out0,v26
819 vncipher
$out1,$out1,v26
820 vncipher
$out2,$out2,v26
821 vncipher
$out3,$out3,v26
822 vncipher
$out4,$out4,v26
823 vncipher
$out5,$out5,v26
824 vncipher
$out6,$out6,v26
825 vncipher
$out7,$out7,v26
827 add
$inp,$inp,r0
# $inp is adjusted in such
828 # way that at exit from the
829 # loop inX-in7 are loaded
831 vncipher
$out0,$out0,v27
832 vncipher
$out1,$out1,v27
833 vncipher
$out2,$out2,v27
834 vncipher
$out3,$out3,v27
835 vncipher
$out4,$out4,v27
836 vncipher
$out5,$out5,v27
837 vncipher
$out6,$out6,v27
838 vncipher
$out7,$out7,v27
840 addi
$key_,$sp,$FRAME+15 # rewind $key_
841 vncipher
$out0,$out0,v28
842 vncipher
$out1,$out1,v28
843 vncipher
$out2,$out2,v28
844 vncipher
$out3,$out3,v28
845 vncipher
$out4,$out4,v28
846 vncipher
$out5,$out5,v28
847 vncipher
$out6,$out6,v28
848 vncipher
$out7,$out7,v28
849 lvx v24
,$x00,$key_ # re-pre-load round[1]
851 vncipher
$out0,$out0,v29
852 vncipher
$out1,$out1,v29
853 vncipher
$out2,$out2,v29
854 vncipher
$out3,$out3,v29
855 vncipher
$out4,$out4,v29
856 vncipher
$out5,$out5,v29
857 vncipher
$out6,$out6,v29
858 vncipher
$out7,$out7,v29
859 lvx v25
,$x10,$key_ # re-pre-load round[2]
861 vncipher
$out0,$out0,v30
862 vxor
$ivec,$ivec,v31
# xor with last round key
863 vncipher
$out1,$out1,v30
865 vncipher
$out2,$out2,v30
867 vncipher
$out3,$out3,v30
869 vncipher
$out4,$out4,v30
871 vncipher
$out5,$out5,v30
873 vncipher
$out6,$out6,v30
875 vncipher
$out7,$out7,v30
878 vncipherlast
$out0,$out0,$ivec
879 vncipherlast
$out1,$out1,$in0
880 lvx_u
$in0,$x00,$inp # load next input block
881 vncipherlast
$out2,$out2,$in1
883 vncipherlast
$out3,$out3,$in2
884 le?vperm
$in0,$in0,$in0,$inpperm
886 vncipherlast
$out4,$out4,$in3
887 le?vperm
$in1,$in1,$in1,$inpperm
889 vncipherlast
$out5,$out5,$in4
890 le?vperm
$in2,$in2,$in2,$inpperm
892 vncipherlast
$out6,$out6,$in5
893 le?vperm
$in3,$in3,$in3,$inpperm
895 vncipherlast
$out7,$out7,$in6
896 le?vperm
$in4,$in4,$in4,$inpperm
899 le?vperm
$in5,$in5,$in5,$inpperm
903 le?vperm
$out0,$out0,$out0,$inpperm
904 le?vperm
$out1,$out1,$out1,$inpperm
905 stvx_u
$out0,$x00,$out
906 le?vperm
$in6,$in6,$in6,$inpperm
907 vxor
$out0,$in0,$rndkey0
908 le?vperm
$out2,$out2,$out2,$inpperm
909 stvx_u
$out1,$x10,$out
910 le?vperm
$in7,$in7,$in7,$inpperm
911 vxor
$out1,$in1,$rndkey0
912 le?vperm
$out3,$out3,$out3,$inpperm
913 stvx_u
$out2,$x20,$out
914 vxor
$out2,$in2,$rndkey0
915 le?vperm
$out4,$out4,$out4,$inpperm
916 stvx_u
$out3,$x30,$out
917 vxor
$out3,$in3,$rndkey0
918 le?vperm
$out5,$out5,$out5,$inpperm
919 stvx_u
$out4,$x40,$out
920 vxor
$out4,$in4,$rndkey0
921 le?vperm
$out6,$out6,$out6,$inpperm
922 stvx_u
$out5,$x50,$out
923 vxor
$out5,$in5,$rndkey0
924 le?vperm
$out7,$out7,$out7,$inpperm
925 stvx_u
$out6,$x60,$out
926 vxor
$out6,$in6,$rndkey0
927 stvx_u
$out7,$x70,$out
929 vxor
$out7,$in7,$rndkey0
932 beq Loop_cbc_dec8x
# did $len-=128 borrow?
939 Loop_cbc_dec8x_tail
: # up to 7 "words" tail...
940 vncipher
$out1,$out1,v24
941 vncipher
$out2,$out2,v24
942 vncipher
$out3,$out3,v24
943 vncipher
$out4,$out4,v24
944 vncipher
$out5,$out5,v24
945 vncipher
$out6,$out6,v24
946 vncipher
$out7,$out7,v24
947 lvx v24
,$x20,$key_ # round[3]
948 addi
$key_,$key_,0x20
950 vncipher
$out1,$out1,v25
951 vncipher
$out2,$out2,v25
952 vncipher
$out3,$out3,v25
953 vncipher
$out4,$out4,v25
954 vncipher
$out5,$out5,v25
955 vncipher
$out6,$out6,v25
956 vncipher
$out7,$out7,v25
957 lvx v25
,$x10,$key_ # round[4]
958 bdnz Loop_cbc_dec8x_tail
960 vncipher
$out1,$out1,v24
961 vncipher
$out2,$out2,v24
962 vncipher
$out3,$out3,v24
963 vncipher
$out4,$out4,v24
964 vncipher
$out5,$out5,v24
965 vncipher
$out6,$out6,v24
966 vncipher
$out7,$out7,v24
968 vncipher
$out1,$out1,v25
969 vncipher
$out2,$out2,v25
970 vncipher
$out3,$out3,v25
971 vncipher
$out4,$out4,v25
972 vncipher
$out5,$out5,v25
973 vncipher
$out6,$out6,v25
974 vncipher
$out7,$out7,v25
976 vncipher
$out1,$out1,v26
977 vncipher
$out2,$out2,v26
978 vncipher
$out3,$out3,v26
979 vncipher
$out4,$out4,v26
980 vncipher
$out5,$out5,v26
981 vncipher
$out6,$out6,v26
982 vncipher
$out7,$out7,v26
984 vncipher
$out1,$out1,v27
985 vncipher
$out2,$out2,v27
986 vncipher
$out3,$out3,v27
987 vncipher
$out4,$out4,v27
988 vncipher
$out5,$out5,v27
989 vncipher
$out6,$out6,v27
990 vncipher
$out7,$out7,v27
992 vncipher
$out1,$out1,v28
993 vncipher
$out2,$out2,v28
994 vncipher
$out3,$out3,v28
995 vncipher
$out4,$out4,v28
996 vncipher
$out5,$out5,v28
997 vncipher
$out6,$out6,v28
998 vncipher
$out7,$out7,v28
1000 vncipher
$out1,$out1,v29
1001 vncipher
$out2,$out2,v29
1002 vncipher
$out3,$out3,v29
1003 vncipher
$out4,$out4,v29
1004 vncipher
$out5,$out5,v29
1005 vncipher
$out6,$out6,v29
1006 vncipher
$out7,$out7,v29
1008 vncipher
$out1,$out1,v30
1009 vxor
$ivec,$ivec,v31
# last round key
1010 vncipher
$out2,$out2,v30
1012 vncipher
$out3,$out3,v30
1014 vncipher
$out4,$out4,v30
1016 vncipher
$out5,$out5,v30
1018 vncipher
$out6,$out6,v30
1020 vncipher
$out7,$out7,v30
1023 cmplwi
$len,32 # switch($len)
1028 blt Lcbc_dec8x_three
1037 vncipherlast
$out1,$out1,$ivec
1038 vncipherlast
$out2,$out2,$in1
1039 vncipherlast
$out3,$out3,$in2
1040 vncipherlast
$out4,$out4,$in3
1041 vncipherlast
$out5,$out5,$in4
1042 vncipherlast
$out6,$out6,$in5
1043 vncipherlast
$out7,$out7,$in6
1046 le?vperm
$out1,$out1,$out1,$inpperm
1047 le?vperm
$out2,$out2,$out2,$inpperm
1048 stvx_u
$out1,$x00,$out
1049 le?vperm
$out3,$out3,$out3,$inpperm
1050 stvx_u
$out2,$x10,$out
1051 le?vperm
$out4,$out4,$out4,$inpperm
1052 stvx_u
$out3,$x20,$out
1053 le?vperm
$out5,$out5,$out5,$inpperm
1054 stvx_u
$out4,$x30,$out
1055 le?vperm
$out6,$out6,$out6,$inpperm
1056 stvx_u
$out5,$x40,$out
1057 le?vperm
$out7,$out7,$out7,$inpperm
1058 stvx_u
$out6,$x50,$out
1059 stvx_u
$out7,$x60,$out
1065 vncipherlast
$out2,$out2,$ivec
1066 vncipherlast
$out3,$out3,$in2
1067 vncipherlast
$out4,$out4,$in3
1068 vncipherlast
$out5,$out5,$in4
1069 vncipherlast
$out6,$out6,$in5
1070 vncipherlast
$out7,$out7,$in6
1073 le?vperm
$out2,$out2,$out2,$inpperm
1074 le?vperm
$out3,$out3,$out3,$inpperm
1075 stvx_u
$out2,$x00,$out
1076 le?vperm
$out4,$out4,$out4,$inpperm
1077 stvx_u
$out3,$x10,$out
1078 le?vperm
$out5,$out5,$out5,$inpperm
1079 stvx_u
$out4,$x20,$out
1080 le?vperm
$out6,$out6,$out6,$inpperm
1081 stvx_u
$out5,$x30,$out
1082 le?vperm
$out7,$out7,$out7,$inpperm
1083 stvx_u
$out6,$x40,$out
1084 stvx_u
$out7,$x50,$out
1090 vncipherlast
$out3,$out3,$ivec
1091 vncipherlast
$out4,$out4,$in3
1092 vncipherlast
$out5,$out5,$in4
1093 vncipherlast
$out6,$out6,$in5
1094 vncipherlast
$out7,$out7,$in6
1097 le?vperm
$out3,$out3,$out3,$inpperm
1098 le?vperm
$out4,$out4,$out4,$inpperm
1099 stvx_u
$out3,$x00,$out
1100 le?vperm
$out5,$out5,$out5,$inpperm
1101 stvx_u
$out4,$x10,$out
1102 le?vperm
$out6,$out6,$out6,$inpperm
1103 stvx_u
$out5,$x20,$out
1104 le?vperm
$out7,$out7,$out7,$inpperm
1105 stvx_u
$out6,$x30,$out
1106 stvx_u
$out7,$x40,$out
1112 vncipherlast
$out4,$out4,$ivec
1113 vncipherlast
$out5,$out5,$in4
1114 vncipherlast
$out6,$out6,$in5
1115 vncipherlast
$out7,$out7,$in6
1118 le?vperm
$out4,$out4,$out4,$inpperm
1119 le?vperm
$out5,$out5,$out5,$inpperm
1120 stvx_u
$out4,$x00,$out
1121 le?vperm
$out6,$out6,$out6,$inpperm
1122 stvx_u
$out5,$x10,$out
1123 le?vperm
$out7,$out7,$out7,$inpperm
1124 stvx_u
$out6,$x20,$out
1125 stvx_u
$out7,$x30,$out
1131 vncipherlast
$out5,$out5,$ivec
1132 vncipherlast
$out6,$out6,$in5
1133 vncipherlast
$out7,$out7,$in6
1136 le?vperm
$out5,$out5,$out5,$inpperm
1137 le?vperm
$out6,$out6,$out6,$inpperm
1138 stvx_u
$out5,$x00,$out
1139 le?vperm
$out7,$out7,$out7,$inpperm
1140 stvx_u
$out6,$x10,$out
1141 stvx_u
$out7,$x20,$out
1147 vncipherlast
$out6,$out6,$ivec
1148 vncipherlast
$out7,$out7,$in6
1151 le?vperm
$out6,$out6,$out6,$inpperm
1152 le?vperm
$out7,$out7,$out7,$inpperm
1153 stvx_u
$out6,$x00,$out
1154 stvx_u
$out7,$x10,$out
1160 vncipherlast
$out7,$out7,$ivec
1163 le?vperm
$out7,$out7,$out7,$inpperm
1168 le?vperm
$ivec,$ivec,$ivec,$inpperm
1169 stvx_u
$ivec,0,$ivp # write [unaligned] iv
1173 stvx
$inpperm,r10
,$sp # wipe copies of round keys
1175 stvx
$inpperm,r11
,$sp
1177 stvx
$inpperm,r10
,$sp
1179 stvx
$inpperm,r11
,$sp
1181 stvx
$inpperm,r10
,$sp
1183 stvx
$inpperm,r11
,$sp
1185 stvx
$inpperm,r10
,$sp
1187 stvx
$inpperm,r11
,$sp
1191 lvx v20
,r10
,$sp # ABI says so
1213 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1214 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1215 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1216 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1217 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1218 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1219 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1222 .byte
0,12,0x14,0,0x80,6,6,0
1224 .size
.${prefix
}_cbc_encrypt
,.-.${prefix
}_cbc_encrypt
1228 #########################################################################
1229 {{{ # CTR procedure[s] #
1230 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1231 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1232 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1237 .globl
.${prefix
}_ctr32_encrypt_blocks
1246 vxor
$rndkey0,$rndkey0,$rndkey0
1247 le?vspltisb
$tmp,0x0f
1249 lvx
$ivec,0,$ivp # load [unaligned] iv
1250 lvsl
$inpperm,0,$ivp
1251 lvx
$inptail,$idx,$ivp
1253 le?vxor
$inpperm,$inpperm,$tmp
1254 vperm
$ivec,$ivec,$inptail,$inpperm
1255 vsldoi
$one,$rndkey0,$one,1
1258 ?lvsl
$keyperm,0,$key # prepare for unaligned key
1259 lwz
$rounds,240($key)
1261 lvsr
$inpperm,0,r11
# prepare for unaligned load
1263 addi
$inp,$inp,15 # 15 is not typo
1264 le?vxor
$inpperm,$inpperm,$tmp
1266 srwi
$rounds,$rounds,1
1268 subi
$rounds,$rounds,1
1271 bge _aesp8_ctr32_encrypt8x
1273 ?lvsr
$outperm,0,$out # prepare for unaligned store
1274 vspltisb
$outmask,-1
1276 ?vperm
$outmask,$rndkey0,$outmask,$outperm
1277 le?vxor
$outperm,$outperm,$tmp
1281 lvx
$rndkey1,$idx,$key
1283 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1284 vxor
$inout,$ivec,$rndkey0
1285 lvx
$rndkey0,$idx,$key
1291 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
1292 vcipher
$inout,$inout,$rndkey1
1293 lvx
$rndkey1,$idx,$key
1295 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1296 vcipher
$inout,$inout,$rndkey0
1297 lvx
$rndkey0,$idx,$key
1301 vadduwm
$ivec,$ivec,$one
1305 subic
. $len,$len,1 # blocks--
1307 ?vperm
$rndkey1,$rndkey1,$rndkey0,$keyperm
1308 vcipher
$inout,$inout,$rndkey1
1309 lvx
$rndkey1,$idx,$key
1310 vperm
$dat,$dat,$inptail,$inpperm
1312 ?vperm
$rndkey1,$rndkey0,$rndkey1,$keyperm
1314 vxor
$dat,$dat,$rndkey1 # last round key
1315 vcipherlast
$inout,$inout,$dat
1317 lvx
$rndkey1,$idx,$key
1319 vperm
$inout,$inout,$inout,$outperm
1320 vsel
$dat,$outhead,$inout,$outmask
1322 ?vperm
$rndkey0,$rndkey0,$rndkey1,$keyperm
1324 vxor
$inout,$ivec,$rndkey0
1325 lvx
$rndkey0,$idx,$key
1332 lvx
$inout,0,$out # redundant in aligned case
1333 vsel
$inout,$outhead,$inout,$outmask
1339 .byte
0,12,0x14,0,0,0,6,0
1342 #########################################################################
1343 {{ # Optimized CTR procedure #
1345 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1346 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1347 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1348 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1349 # v26-v31 last 6 round keys
1350 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1351 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1355 _aesp8_ctr32_encrypt8x
:
1356 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1357 li r10
,`$FRAME+8*16+15`
1358 li r11
,`$FRAME+8*16+31`
1359 stvx v20
,r10
,$sp # ABI says so
1382 stw
$vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1384 $PUSH r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1386 $PUSH r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1388 $PUSH r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1390 $PUSH r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1392 $PUSH r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1394 $PUSH r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1398 subi
$rounds,$rounds,3 # -4 in total
1400 lvx
$rndkey0,$x00,$key # load key schedule
1404 ?vperm
$rndkey0,$rndkey0,v30
,$keyperm
1405 addi
$key_,$sp,$FRAME+15
1409 ?vperm v24
,v30
,v31
,$keyperm
1412 stvx v24
,$x00,$key_ # off-load round[1]
1413 ?vperm v25
,v31
,v30
,$keyperm
1415 stvx v25
,$x10,$key_ # off-load round[2]
1416 addi
$key_,$key_,0x20
1417 bdnz Load_ctr32_enc_key
1420 ?vperm v24
,v30
,v31
,$keyperm
1422 stvx v24
,$x00,$key_ # off-load round[3]
1423 ?vperm v25
,v31
,v26
,$keyperm
1425 stvx v25
,$x10,$key_ # off-load round[4]
1426 addi
$key_,$sp,$FRAME+15 # rewind $key_
1427 ?vperm v26
,v26
,v27
,$keyperm
1429 ?vperm v27
,v27
,v28
,$keyperm
1431 ?vperm v28
,v28
,v29
,$keyperm
1433 ?vperm v29
,v29
,v30
,$keyperm
1434 lvx
$out0,$x70,$key # borrow $out0
1435 ?vperm v30
,v30
,v31
,$keyperm
1436 lvx v24
,$x00,$key_ # pre-load round[1]
1437 ?vperm v31
,v31
,$out0,$keyperm
1438 lvx v25
,$x10,$key_ # pre-load round[2]
1440 vadduqm
$two,$one,$one
1441 subi
$inp,$inp,15 # undo "caller"
1444 vadduqm
$out1,$ivec,$one # counter values ...
1445 vadduqm
$out2,$ivec,$two
1446 vxor
$out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1448 vadduqm
$out3,$out1,$two
1449 vxor
$out1,$out1,$rndkey0
1450 le?lvsl
$inpperm,0,$idx
1451 vadduqm
$out4,$out2,$two
1452 vxor
$out2,$out2,$rndkey0
1453 le?vspltisb
$tmp,0x0f
1454 vadduqm
$out5,$out3,$two
1455 vxor
$out3,$out3,$rndkey0
1456 le?vxor
$inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1457 vadduqm
$out6,$out4,$two
1458 vxor
$out4,$out4,$rndkey0
1459 vadduqm
$out7,$out5,$two
1460 vxor
$out5,$out5,$rndkey0
1461 vadduqm
$ivec,$out6,$two # next counter value
1462 vxor
$out6,$out6,$rndkey0
1463 vxor
$out7,$out7,$rndkey0
1469 vcipher
$out0,$out0,v24
1470 vcipher
$out1,$out1,v24
1471 vcipher
$out2,$out2,v24
1472 vcipher
$out3,$out3,v24
1473 vcipher
$out4,$out4,v24
1474 vcipher
$out5,$out5,v24
1475 vcipher
$out6,$out6,v24
1476 vcipher
$out7,$out7,v24
1477 Loop_ctr32_enc8x_middle
:
1478 lvx v24
,$x20,$key_ # round[3]
1479 addi
$key_,$key_,0x20
1481 vcipher
$out0,$out0,v25
1482 vcipher
$out1,$out1,v25
1483 vcipher
$out2,$out2,v25
1484 vcipher
$out3,$out3,v25
1485 vcipher
$out4,$out4,v25
1486 vcipher
$out5,$out5,v25
1487 vcipher
$out6,$out6,v25
1488 vcipher
$out7,$out7,v25
1489 lvx v25
,$x10,$key_ # round[4]
1490 bdnz Loop_ctr32_enc8x
1492 subic r11
,$len,256 # $len-256, borrow $key_
1493 vcipher
$out0,$out0,v24
1494 vcipher
$out1,$out1,v24
1495 vcipher
$out2,$out2,v24
1496 vcipher
$out3,$out3,v24
1497 vcipher
$out4,$out4,v24
1498 vcipher
$out5,$out5,v24
1499 vcipher
$out6,$out6,v24
1500 vcipher
$out7,$out7,v24
1502 subfe r0
,r0
,r0
# borrow?-1:0
1503 vcipher
$out0,$out0,v25
1504 vcipher
$out1,$out1,v25
1505 vcipher
$out2,$out2,v25
1506 vcipher
$out3,$out3,v25
1507 vcipher
$out4,$out4,v25
1508 vcipher
$out5,$out5,v25
1509 vcipher
$out6,$out6,v25
1510 vcipher
$out7,$out7,v25
1513 addi
$key_,$sp,$FRAME+15 # rewind $key_
1514 vcipher
$out0,$out0,v26
1515 vcipher
$out1,$out1,v26
1516 vcipher
$out2,$out2,v26
1517 vcipher
$out3,$out3,v26
1518 vcipher
$out4,$out4,v26
1519 vcipher
$out5,$out5,v26
1520 vcipher
$out6,$out6,v26
1521 vcipher
$out7,$out7,v26
1522 lvx v24
,$x00,$key_ # re-pre-load round[1]
1524 subic
$len,$len,129 # $len-=129
1525 vcipher
$out0,$out0,v27
1526 addi
$len,$len,1 # $len-=128 really
1527 vcipher
$out1,$out1,v27
1528 vcipher
$out2,$out2,v27
1529 vcipher
$out3,$out3,v27
1530 vcipher
$out4,$out4,v27
1531 vcipher
$out5,$out5,v27
1532 vcipher
$out6,$out6,v27
1533 vcipher
$out7,$out7,v27
1534 lvx v25
,$x10,$key_ # re-pre-load round[2]
1536 vcipher
$out0,$out0,v28
1537 lvx_u
$in0,$x00,$inp # load input
1538 vcipher
$out1,$out1,v28
1539 lvx_u
$in1,$x10,$inp
1540 vcipher
$out2,$out2,v28
1541 lvx_u
$in2,$x20,$inp
1542 vcipher
$out3,$out3,v28
1543 lvx_u
$in3,$x30,$inp
1544 vcipher
$out4,$out4,v28
1545 lvx_u
$in4,$x40,$inp
1546 vcipher
$out5,$out5,v28
1547 lvx_u
$in5,$x50,$inp
1548 vcipher
$out6,$out6,v28
1549 lvx_u
$in6,$x60,$inp
1550 vcipher
$out7,$out7,v28
1551 lvx_u
$in7,$x70,$inp
1554 vcipher
$out0,$out0,v29
1555 le?vperm
$in0,$in0,$in0,$inpperm
1556 vcipher
$out1,$out1,v29
1557 le?vperm
$in1,$in1,$in1,$inpperm
1558 vcipher
$out2,$out2,v29
1559 le?vperm
$in2,$in2,$in2,$inpperm
1560 vcipher
$out3,$out3,v29
1561 le?vperm
$in3,$in3,$in3,$inpperm
1562 vcipher
$out4,$out4,v29
1563 le?vperm
$in4,$in4,$in4,$inpperm
1564 vcipher
$out5,$out5,v29
1565 le?vperm
$in5,$in5,$in5,$inpperm
1566 vcipher
$out6,$out6,v29
1567 le?vperm
$in6,$in6,$in6,$inpperm
1568 vcipher
$out7,$out7,v29
1569 le?vperm
$in7,$in7,$in7,$inpperm
1571 add
$inp,$inp,r0
# $inp is adjusted in such
1572 # way that at exit from the
1573 # loop inX-in7 are loaded
1575 subfe
. r0
,r0
,r0
# borrow?-1:0
1576 vcipher
$out0,$out0,v30
1577 vxor
$in0,$in0,v31
# xor with last round key
1578 vcipher
$out1,$out1,v30
1580 vcipher
$out2,$out2,v30
1582 vcipher
$out3,$out3,v30
1584 vcipher
$out4,$out4,v30
1586 vcipher
$out5,$out5,v30
1588 vcipher
$out6,$out6,v30
1590 vcipher
$out7,$out7,v30
1593 bne Lctr32_enc8x_break
# did $len-129 borrow?
1595 vcipherlast
$in0,$out0,$in0
1596 vcipherlast
$in1,$out1,$in1
1597 vadduqm
$out1,$ivec,$one # counter values ...
1598 vcipherlast
$in2,$out2,$in2
1599 vadduqm
$out2,$ivec,$two
1600 vxor
$out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1601 vcipherlast
$in3,$out3,$in3
1602 vadduqm
$out3,$out1,$two
1603 vxor
$out1,$out1,$rndkey0
1604 vcipherlast
$in4,$out4,$in4
1605 vadduqm
$out4,$out2,$two
1606 vxor
$out2,$out2,$rndkey0
1607 vcipherlast
$in5,$out5,$in5
1608 vadduqm
$out5,$out3,$two
1609 vxor
$out3,$out3,$rndkey0
1610 vcipherlast
$in6,$out6,$in6
1611 vadduqm
$out6,$out4,$two
1612 vxor
$out4,$out4,$rndkey0
1613 vcipherlast
$in7,$out7,$in7
1614 vadduqm
$out7,$out5,$two
1615 vxor
$out5,$out5,$rndkey0
1616 le?vperm
$in0,$in0,$in0,$inpperm
1617 vadduqm
$ivec,$out6,$two # next counter value
1618 vxor
$out6,$out6,$rndkey0
1619 le?vperm
$in1,$in1,$in1,$inpperm
1620 vxor
$out7,$out7,$rndkey0
1623 vcipher
$out0,$out0,v24
1624 stvx_u
$in0,$x00,$out
1625 le?vperm
$in2,$in2,$in2,$inpperm
1626 vcipher
$out1,$out1,v24
1627 stvx_u
$in1,$x10,$out
1628 le?vperm
$in3,$in3,$in3,$inpperm
1629 vcipher
$out2,$out2,v24
1630 stvx_u
$in2,$x20,$out
1631 le?vperm
$in4,$in4,$in4,$inpperm
1632 vcipher
$out3,$out3,v24
1633 stvx_u
$in3,$x30,$out
1634 le?vperm
$in5,$in5,$in5,$inpperm
1635 vcipher
$out4,$out4,v24
1636 stvx_u
$in4,$x40,$out
1637 le?vperm
$in6,$in6,$in6,$inpperm
1638 vcipher
$out5,$out5,v24
1639 stvx_u
$in5,$x50,$out
1640 le?vperm
$in7,$in7,$in7,$inpperm
1641 vcipher
$out6,$out6,v24
1642 stvx_u
$in6,$x60,$out
1643 vcipher
$out7,$out7,v24
1644 stvx_u
$in7,$x70,$out
1647 b Loop_ctr32_enc8x_middle
1652 blt Lctr32_enc8x_one
1654 beq Lctr32_enc8x_two
1656 blt Lctr32_enc8x_three
1658 beq Lctr32_enc8x_four
1660 blt Lctr32_enc8x_five
1662 beq Lctr32_enc8x_six
1664 blt Lctr32_enc8x_seven
1667 vcipherlast
$out0,$out0,$in0
1668 vcipherlast
$out1,$out1,$in1
1669 vcipherlast
$out2,$out2,$in2
1670 vcipherlast
$out3,$out3,$in3
1671 vcipherlast
$out4,$out4,$in4
1672 vcipherlast
$out5,$out5,$in5
1673 vcipherlast
$out6,$out6,$in6
1674 vcipherlast
$out7,$out7,$in7
1676 le?vperm
$out0,$out0,$out0,$inpperm
1677 le?vperm
$out1,$out1,$out1,$inpperm
1678 stvx_u
$out0,$x00,$out
1679 le?vperm
$out2,$out2,$out2,$inpperm
1680 stvx_u
$out1,$x10,$out
1681 le?vperm
$out3,$out3,$out3,$inpperm
1682 stvx_u
$out2,$x20,$out
1683 le?vperm
$out4,$out4,$out4,$inpperm
1684 stvx_u
$out3,$x30,$out
1685 le?vperm
$out5,$out5,$out5,$inpperm
1686 stvx_u
$out4,$x40,$out
1687 le?vperm
$out6,$out6,$out6,$inpperm
1688 stvx_u
$out5,$x50,$out
1689 le?vperm
$out7,$out7,$out7,$inpperm
1690 stvx_u
$out6,$x60,$out
1691 stvx_u
$out7,$x70,$out
1697 vcipherlast
$out0,$out0,$in1
1698 vcipherlast
$out1,$out1,$in2
1699 vcipherlast
$out2,$out2,$in3
1700 vcipherlast
$out3,$out3,$in4
1701 vcipherlast
$out4,$out4,$in5
1702 vcipherlast
$out5,$out5,$in6
1703 vcipherlast
$out6,$out6,$in7
1705 le?vperm
$out0,$out0,$out0,$inpperm
1706 le?vperm
$out1,$out1,$out1,$inpperm
1707 stvx_u
$out0,$x00,$out
1708 le?vperm
$out2,$out2,$out2,$inpperm
1709 stvx_u
$out1,$x10,$out
1710 le?vperm
$out3,$out3,$out3,$inpperm
1711 stvx_u
$out2,$x20,$out
1712 le?vperm
$out4,$out4,$out4,$inpperm
1713 stvx_u
$out3,$x30,$out
1714 le?vperm
$out5,$out5,$out5,$inpperm
1715 stvx_u
$out4,$x40,$out
1716 le?vperm
$out6,$out6,$out6,$inpperm
1717 stvx_u
$out5,$x50,$out
1718 stvx_u
$out6,$x60,$out
1724 vcipherlast
$out0,$out0,$in2
1725 vcipherlast
$out1,$out1,$in3
1726 vcipherlast
$out2,$out2,$in4
1727 vcipherlast
$out3,$out3,$in5
1728 vcipherlast
$out4,$out4,$in6
1729 vcipherlast
$out5,$out5,$in7
1731 le?vperm
$out0,$out0,$out0,$inpperm
1732 le?vperm
$out1,$out1,$out1,$inpperm
1733 stvx_u
$out0,$x00,$out
1734 le?vperm
$out2,$out2,$out2,$inpperm
1735 stvx_u
$out1,$x10,$out
1736 le?vperm
$out3,$out3,$out3,$inpperm
1737 stvx_u
$out2,$x20,$out
1738 le?vperm
$out4,$out4,$out4,$inpperm
1739 stvx_u
$out3,$x30,$out
1740 le?vperm
$out5,$out5,$out5,$inpperm
1741 stvx_u
$out4,$x40,$out
1742 stvx_u
$out5,$x50,$out
1748 vcipherlast
$out0,$out0,$in3
1749 vcipherlast
$out1,$out1,$in4
1750 vcipherlast
$out2,$out2,$in5
1751 vcipherlast
$out3,$out3,$in6
1752 vcipherlast
$out4,$out4,$in7
1754 le?vperm
$out0,$out0,$out0,$inpperm
1755 le?vperm
$out1,$out1,$out1,$inpperm
1756 stvx_u
$out0,$x00,$out
1757 le?vperm
$out2,$out2,$out2,$inpperm
1758 stvx_u
$out1,$x10,$out
1759 le?vperm
$out3,$out3,$out3,$inpperm
1760 stvx_u
$out2,$x20,$out
1761 le?vperm
$out4,$out4,$out4,$inpperm
1762 stvx_u
$out3,$x30,$out
1763 stvx_u
$out4,$x40,$out
1769 vcipherlast
$out0,$out0,$in4
1770 vcipherlast
$out1,$out1,$in5
1771 vcipherlast
$out2,$out2,$in6
1772 vcipherlast
$out3,$out3,$in7
1774 le?vperm
$out0,$out0,$out0,$inpperm
1775 le?vperm
$out1,$out1,$out1,$inpperm
1776 stvx_u
$out0,$x00,$out
1777 le?vperm
$out2,$out2,$out2,$inpperm
1778 stvx_u
$out1,$x10,$out
1779 le?vperm
$out3,$out3,$out3,$inpperm
1780 stvx_u
$out2,$x20,$out
1781 stvx_u
$out3,$x30,$out
1787 vcipherlast
$out0,$out0,$in5
1788 vcipherlast
$out1,$out1,$in6
1789 vcipherlast
$out2,$out2,$in7
1791 le?vperm
$out0,$out0,$out0,$inpperm
1792 le?vperm
$out1,$out1,$out1,$inpperm
1793 stvx_u
$out0,$x00,$out
1794 le?vperm
$out2,$out2,$out2,$inpperm
1795 stvx_u
$out1,$x10,$out
1796 stvx_u
$out2,$x20,$out
1802 vcipherlast
$out0,$out0,$in6
1803 vcipherlast
$out1,$out1,$in7
1805 le?vperm
$out0,$out0,$out0,$inpperm
1806 le?vperm
$out1,$out1,$out1,$inpperm
1807 stvx_u
$out0,$x00,$out
1808 stvx_u
$out1,$x10,$out
1814 vcipherlast
$out0,$out0,$in7
1816 le?vperm
$out0,$out0,$out0,$inpperm
1823 stvx
$inpperm,r10
,$sp # wipe copies of round keys
1825 stvx
$inpperm,r11
,$sp
1827 stvx
$inpperm,r10
,$sp
1829 stvx
$inpperm,r11
,$sp
1831 stvx
$inpperm,r10
,$sp
1833 stvx
$inpperm,r11
,$sp
1835 stvx
$inpperm,r10
,$sp
1837 stvx
$inpperm,r11
,$sp
1841 lvx v20
,r10
,$sp # ABI says so
1863 $POP r26
,`$FRAME+21*16+0*$SIZE_T`($sp)
1864 $POP r27
,`$FRAME+21*16+1*$SIZE_T`($sp)
1865 $POP r28
,`$FRAME+21*16+2*$SIZE_T`($sp)
1866 $POP r29
,`$FRAME+21*16+3*$SIZE_T`($sp)
1867 $POP r30
,`$FRAME+21*16+4*$SIZE_T`($sp)
1868 $POP r31
,`$FRAME+21*16+5*$SIZE_T`($sp)
1869 addi
$sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1872 .byte
0,12,0x14,0,0x80,6,6,0
1874 .size
.${prefix
}_ctr32_encrypt_blocks
,.-.${prefix
}_ctr32_encrypt_blocks
1879 foreach(split("\n",$code)) {
1880 s/\`([^\`]*)\`/eval($1)/geo;
1882 # constants table endian-specific conversion
1883 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
1887 # convert to endian-agnostic format
1889 foreach (split(/,\s*/,$2)) {
1890 my $l = /^0/?
oct:int;
1891 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
1894 @bytes = map(/^0/?
oct:int,split(/,\s*/,$2));
1897 # little-endian conversion
1898 if ($flavour =~ /le$/o) {
1899 SWITCH
: for($conv) {
1900 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
1901 /\?rev/ && do { @bytes=reverse(@bytes); last; };
1906 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
1909 $consts=0 if (m/Lconsts:/o); # end of table
1911 # instructions prefixed with '?' are endian-specific and need
1912 # to be adjusted accordingly...
1913 if ($flavour =~ /le$/o) { # little-endian
1918 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
1919 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
1920 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
1921 } else { # big-endian