Linux 4.19.133
[linux/fpc-iii.git] / drivers / crypto / vmx / aesp8-ppc.pl
blob9c6b5c1d6a1a61c0408a55822a816516057a486f
1 #! /usr/bin/env perl
2 # SPDX-License-Identifier: GPL-2.0
4 # This code is taken from CRYPTOGAMs[1] and is included here using the option
5 # in the license to distribute the code under the GPL. Therefore this program
6 # is free software; you can redistribute it and/or modify it under the terms of
7 # the GNU General Public License version 2 as published by the Free Software
8 # Foundation.
10 # [1] https://www.openssl.org/~appro/cryptogams/
12 # Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org>
13 # All rights reserved.
15 # Redistribution and use in source and binary forms, with or without
16 # modification, are permitted provided that the following conditions
17 # are met:
19 # * Redistributions of source code must retain copyright notices,
20 # this list of conditions and the following disclaimer.
22 # * Redistributions in binary form must reproduce the above
23 # copyright notice, this list of conditions and the following
24 # disclaimer in the documentation and/or other materials
25 # provided with the distribution.
27 # * Neither the name of the CRYPTOGAMS nor the names of its
28 # copyright holder and contributors may be used to endorse or
29 # promote products derived from this software without specific
30 # prior written permission.
32 # ALTERNATIVELY, provided that this notice is retained in full, this
33 # product may be distributed under the terms of the GNU General Public
34 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
35 # those given above.
37 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
38 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
40 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
41 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
44 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
45 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
46 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
47 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
49 # ====================================================================
50 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
51 # project. The module is, however, dual licensed under OpenSSL and
52 # CRYPTOGAMS licenses depending on where you obtain it. For further
53 # details see http://www.openssl.org/~appro/cryptogams/.
54 # ====================================================================
56 # This module implements support for AES instructions as per PowerISA
57 # specification version 2.07, first implemented by POWER8 processor.
58 # The module is endian-agnostic in sense that it supports both big-
59 # and little-endian cases. Data alignment in parallelizable modes is
60 # handled with VSX loads and stores, which implies MSR.VSX flag being
61 # set. It should also be noted that ISA specification doesn't prohibit
62 # alignment exceptions for these instructions on page boundaries.
63 # Initially alignment was handled in pure AltiVec/VMX way [when data
64 # is aligned programmatically, which in turn guarantees exception-
65 # free execution], but it turned to hamper performance when vcipher
66 # instructions are interleaved. It's reckoned that eventual
67 # misalignment penalties at page boundaries are in average lower
68 # than additional overhead in pure AltiVec approach.
70 # May 2016
72 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
73 # systems were measured.
75 ######################################################################
76 # Current large-block performance in cycles per byte processed with
77 # 128-bit key (less is better).
79 # CBC en-/decrypt CTR XTS
80 # POWER8[le] 3.96/0.72 0.74 1.1
81 # POWER8[be] 3.75/0.65 0.66 1.0
83 $flavour = shift;
85 if ($flavour =~ /64/) {
86 $SIZE_T =8;
87 $LRSAVE =2*$SIZE_T;
88 $STU ="stdu";
89 $POP ="ld";
90 $PUSH ="std";
91 $UCMP ="cmpld";
92 $SHL ="sldi";
93 } elsif ($flavour =~ /32/) {
94 $SIZE_T =4;
95 $LRSAVE =$SIZE_T;
96 $STU ="stwu";
97 $POP ="lwz";
98 $PUSH ="stw";
99 $UCMP ="cmplw";
100 $SHL ="slwi";
101 } else { die "nonsense $flavour"; }
103 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
105 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
106 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
107 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
108 die "can't locate ppc-xlate.pl";
110 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
112 $FRAME=8*$SIZE_T;
113 $prefix="aes_p8";
115 $sp="r1";
116 $vrsave="r12";
118 #########################################################################
119 {{{ # Key setup procedures #
120 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
121 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
122 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
124 $code.=<<___;
125 .machine "any"
127 .text
129 .align 7
130 rcon:
131 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
132 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
133 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
134 .long 0,0,0,0 ?asis
135 Lconsts:
136 mflr r0
137 bcl 20,31,\$+4
138 mflr $ptr #vvvvv "distance between . and rcon
139 addi $ptr,$ptr,-0x48
140 mtlr r0
142 .long 0
143 .byte 0,12,0x14,0,0,0,0,0
144 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
146 .globl .${prefix}_set_encrypt_key
147 Lset_encrypt_key:
148 mflr r11
149 $PUSH r11,$LRSAVE($sp)
151 li $ptr,-1
152 ${UCMP}i $inp,0
153 beq- Lenc_key_abort # if ($inp==0) return -1;
154 ${UCMP}i $out,0
155 beq- Lenc_key_abort # if ($out==0) return -1;
156 li $ptr,-2
157 cmpwi $bits,128
158 blt- Lenc_key_abort
159 cmpwi $bits,256
160 bgt- Lenc_key_abort
161 andi. r0,$bits,0x3f
162 bne- Lenc_key_abort
164 lis r0,0xfff0
165 mfspr $vrsave,256
166 mtspr 256,r0
168 bl Lconsts
169 mtlr r11
171 neg r9,$inp
172 lvx $in0,0,$inp
173 addi $inp,$inp,15 # 15 is not typo
174 lvsr $key,0,r9 # borrow $key
175 li r8,0x20
176 cmpwi $bits,192
177 lvx $in1,0,$inp
178 le?vspltisb $mask,0x0f # borrow $mask
179 lvx $rcon,0,$ptr
180 le?vxor $key,$key,$mask # adjust for byte swap
181 lvx $mask,r8,$ptr
182 addi $ptr,$ptr,0x10
183 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
184 li $cnt,8
185 vxor $zero,$zero,$zero
186 mtctr $cnt
188 ?lvsr $outperm,0,$out
189 vspltisb $outmask,-1
190 lvx $outhead,0,$out
191 ?vperm $outmask,$zero,$outmask,$outperm
193 blt Loop128
194 addi $inp,$inp,8
195 beq L192
196 addi $inp,$inp,8
197 b L256
199 .align 4
200 Loop128:
201 vperm $key,$in0,$in0,$mask # rotate-n-splat
202 vsldoi $tmp,$zero,$in0,12 # >>32
203 vperm $outtail,$in0,$in0,$outperm # rotate
204 vsel $stage,$outhead,$outtail,$outmask
205 vmr $outhead,$outtail
206 vcipherlast $key,$key,$rcon
207 stvx $stage,0,$out
208 addi $out,$out,16
210 vxor $in0,$in0,$tmp
211 vsldoi $tmp,$zero,$tmp,12 # >>32
212 vxor $in0,$in0,$tmp
213 vsldoi $tmp,$zero,$tmp,12 # >>32
214 vxor $in0,$in0,$tmp
215 vadduwm $rcon,$rcon,$rcon
216 vxor $in0,$in0,$key
217 bdnz Loop128
219 lvx $rcon,0,$ptr # last two round keys
221 vperm $key,$in0,$in0,$mask # rotate-n-splat
222 vsldoi $tmp,$zero,$in0,12 # >>32
223 vperm $outtail,$in0,$in0,$outperm # rotate
224 vsel $stage,$outhead,$outtail,$outmask
225 vmr $outhead,$outtail
226 vcipherlast $key,$key,$rcon
227 stvx $stage,0,$out
228 addi $out,$out,16
230 vxor $in0,$in0,$tmp
231 vsldoi $tmp,$zero,$tmp,12 # >>32
232 vxor $in0,$in0,$tmp
233 vsldoi $tmp,$zero,$tmp,12 # >>32
234 vxor $in0,$in0,$tmp
235 vadduwm $rcon,$rcon,$rcon
236 vxor $in0,$in0,$key
238 vperm $key,$in0,$in0,$mask # rotate-n-splat
239 vsldoi $tmp,$zero,$in0,12 # >>32
240 vperm $outtail,$in0,$in0,$outperm # rotate
241 vsel $stage,$outhead,$outtail,$outmask
242 vmr $outhead,$outtail
243 vcipherlast $key,$key,$rcon
244 stvx $stage,0,$out
245 addi $out,$out,16
247 vxor $in0,$in0,$tmp
248 vsldoi $tmp,$zero,$tmp,12 # >>32
249 vxor $in0,$in0,$tmp
250 vsldoi $tmp,$zero,$tmp,12 # >>32
251 vxor $in0,$in0,$tmp
252 vxor $in0,$in0,$key
253 vperm $outtail,$in0,$in0,$outperm # rotate
254 vsel $stage,$outhead,$outtail,$outmask
255 vmr $outhead,$outtail
256 stvx $stage,0,$out
258 addi $inp,$out,15 # 15 is not typo
259 addi $out,$out,0x50
261 li $rounds,10
262 b Ldone
264 .align 4
265 L192:
266 lvx $tmp,0,$inp
267 li $cnt,4
268 vperm $outtail,$in0,$in0,$outperm # rotate
269 vsel $stage,$outhead,$outtail,$outmask
270 vmr $outhead,$outtail
271 stvx $stage,0,$out
272 addi $out,$out,16
273 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
274 vspltisb $key,8 # borrow $key
275 mtctr $cnt
276 vsububm $mask,$mask,$key # adjust the mask
278 Loop192:
279 vperm $key,$in1,$in1,$mask # roate-n-splat
280 vsldoi $tmp,$zero,$in0,12 # >>32
281 vcipherlast $key,$key,$rcon
283 vxor $in0,$in0,$tmp
284 vsldoi $tmp,$zero,$tmp,12 # >>32
285 vxor $in0,$in0,$tmp
286 vsldoi $tmp,$zero,$tmp,12 # >>32
287 vxor $in0,$in0,$tmp
289 vsldoi $stage,$zero,$in1,8
290 vspltw $tmp,$in0,3
291 vxor $tmp,$tmp,$in1
292 vsldoi $in1,$zero,$in1,12 # >>32
293 vadduwm $rcon,$rcon,$rcon
294 vxor $in1,$in1,$tmp
295 vxor $in0,$in0,$key
296 vxor $in1,$in1,$key
297 vsldoi $stage,$stage,$in0,8
299 vperm $key,$in1,$in1,$mask # rotate-n-splat
300 vsldoi $tmp,$zero,$in0,12 # >>32
301 vperm $outtail,$stage,$stage,$outperm # rotate
302 vsel $stage,$outhead,$outtail,$outmask
303 vmr $outhead,$outtail
304 vcipherlast $key,$key,$rcon
305 stvx $stage,0,$out
306 addi $out,$out,16
308 vsldoi $stage,$in0,$in1,8
309 vxor $in0,$in0,$tmp
310 vsldoi $tmp,$zero,$tmp,12 # >>32
311 vperm $outtail,$stage,$stage,$outperm # rotate
312 vsel $stage,$outhead,$outtail,$outmask
313 vmr $outhead,$outtail
314 vxor $in0,$in0,$tmp
315 vsldoi $tmp,$zero,$tmp,12 # >>32
316 vxor $in0,$in0,$tmp
317 stvx $stage,0,$out
318 addi $out,$out,16
320 vspltw $tmp,$in0,3
321 vxor $tmp,$tmp,$in1
322 vsldoi $in1,$zero,$in1,12 # >>32
323 vadduwm $rcon,$rcon,$rcon
324 vxor $in1,$in1,$tmp
325 vxor $in0,$in0,$key
326 vxor $in1,$in1,$key
327 vperm $outtail,$in0,$in0,$outperm # rotate
328 vsel $stage,$outhead,$outtail,$outmask
329 vmr $outhead,$outtail
330 stvx $stage,0,$out
331 addi $inp,$out,15 # 15 is not typo
332 addi $out,$out,16
333 bdnz Loop192
335 li $rounds,12
336 addi $out,$out,0x20
337 b Ldone
339 .align 4
340 L256:
341 lvx $tmp,0,$inp
342 li $cnt,7
343 li $rounds,14
344 vperm $outtail,$in0,$in0,$outperm # rotate
345 vsel $stage,$outhead,$outtail,$outmask
346 vmr $outhead,$outtail
347 stvx $stage,0,$out
348 addi $out,$out,16
349 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
350 mtctr $cnt
352 Loop256:
353 vperm $key,$in1,$in1,$mask # rotate-n-splat
354 vsldoi $tmp,$zero,$in0,12 # >>32
355 vperm $outtail,$in1,$in1,$outperm # rotate
356 vsel $stage,$outhead,$outtail,$outmask
357 vmr $outhead,$outtail
358 vcipherlast $key,$key,$rcon
359 stvx $stage,0,$out
360 addi $out,$out,16
362 vxor $in0,$in0,$tmp
363 vsldoi $tmp,$zero,$tmp,12 # >>32
364 vxor $in0,$in0,$tmp
365 vsldoi $tmp,$zero,$tmp,12 # >>32
366 vxor $in0,$in0,$tmp
367 vadduwm $rcon,$rcon,$rcon
368 vxor $in0,$in0,$key
369 vperm $outtail,$in0,$in0,$outperm # rotate
370 vsel $stage,$outhead,$outtail,$outmask
371 vmr $outhead,$outtail
372 stvx $stage,0,$out
373 addi $inp,$out,15 # 15 is not typo
374 addi $out,$out,16
375 bdz Ldone
377 vspltw $key,$in0,3 # just splat
378 vsldoi $tmp,$zero,$in1,12 # >>32
379 vsbox $key,$key
381 vxor $in1,$in1,$tmp
382 vsldoi $tmp,$zero,$tmp,12 # >>32
383 vxor $in1,$in1,$tmp
384 vsldoi $tmp,$zero,$tmp,12 # >>32
385 vxor $in1,$in1,$tmp
387 vxor $in1,$in1,$key
388 b Loop256
390 .align 4
391 Ldone:
392 lvx $in1,0,$inp # redundant in aligned case
393 vsel $in1,$outhead,$in1,$outmask
394 stvx $in1,0,$inp
395 li $ptr,0
396 mtspr 256,$vrsave
397 stw $rounds,0($out)
399 Lenc_key_abort:
400 mr r3,$ptr
402 .long 0
403 .byte 0,12,0x14,1,0,0,3,0
404 .long 0
405 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
407 .globl .${prefix}_set_decrypt_key
408 $STU $sp,-$FRAME($sp)
409 mflr r10
410 $PUSH r10,$FRAME+$LRSAVE($sp)
411 bl Lset_encrypt_key
412 mtlr r10
414 cmpwi r3,0
415 bne- Ldec_key_abort
417 slwi $cnt,$rounds,4
418 subi $inp,$out,240 # first round key
419 srwi $rounds,$rounds,1
420 add $out,$inp,$cnt # last round key
421 mtctr $rounds
423 Ldeckey:
424 lwz r0, 0($inp)
425 lwz r6, 4($inp)
426 lwz r7, 8($inp)
427 lwz r8, 12($inp)
428 addi $inp,$inp,16
429 lwz r9, 0($out)
430 lwz r10,4($out)
431 lwz r11,8($out)
432 lwz r12,12($out)
433 stw r0, 0($out)
434 stw r6, 4($out)
435 stw r7, 8($out)
436 stw r8, 12($out)
437 subi $out,$out,16
438 stw r9, -16($inp)
439 stw r10,-12($inp)
440 stw r11,-8($inp)
441 stw r12,-4($inp)
442 bdnz Ldeckey
444 xor r3,r3,r3 # return value
445 Ldec_key_abort:
446 addi $sp,$sp,$FRAME
448 .long 0
449 .byte 0,12,4,1,0x80,0,3,0
450 .long 0
451 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
454 #########################################################################
455 {{{ # Single block en- and decrypt procedures #
456 sub gen_block () {
457 my $dir = shift;
458 my $n = $dir eq "de" ? "n" : "";
459 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
461 $code.=<<___;
462 .globl .${prefix}_${dir}crypt
463 lwz $rounds,240($key)
464 lis r0,0xfc00
465 mfspr $vrsave,256
466 li $idx,15 # 15 is not typo
467 mtspr 256,r0
469 lvx v0,0,$inp
470 neg r11,$out
471 lvx v1,$idx,$inp
472 lvsl v2,0,$inp # inpperm
473 le?vspltisb v4,0x0f
474 ?lvsl v3,0,r11 # outperm
475 le?vxor v2,v2,v4
476 li $idx,16
477 vperm v0,v0,v1,v2 # align [and byte swap in LE]
478 lvx v1,0,$key
479 ?lvsl v5,0,$key # keyperm
480 srwi $rounds,$rounds,1
481 lvx v2,$idx,$key
482 addi $idx,$idx,16
483 subi $rounds,$rounds,1
484 ?vperm v1,v1,v2,v5 # align round key
486 vxor v0,v0,v1
487 lvx v1,$idx,$key
488 addi $idx,$idx,16
489 mtctr $rounds
491 Loop_${dir}c:
492 ?vperm v2,v2,v1,v5
493 v${n}cipher v0,v0,v2
494 lvx v2,$idx,$key
495 addi $idx,$idx,16
496 ?vperm v1,v1,v2,v5
497 v${n}cipher v0,v0,v1
498 lvx v1,$idx,$key
499 addi $idx,$idx,16
500 bdnz Loop_${dir}c
502 ?vperm v2,v2,v1,v5
503 v${n}cipher v0,v0,v2
504 lvx v2,$idx,$key
505 ?vperm v1,v1,v2,v5
506 v${n}cipherlast v0,v0,v1
508 vspltisb v2,-1
509 vxor v1,v1,v1
510 li $idx,15 # 15 is not typo
511 ?vperm v2,v1,v2,v3 # outmask
512 le?vxor v3,v3,v4
513 lvx v1,0,$out # outhead
514 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
515 vsel v1,v1,v0,v2
516 lvx v4,$idx,$out
517 stvx v1,0,$out
518 vsel v0,v0,v4,v2
519 stvx v0,$idx,$out
521 mtspr 256,$vrsave
523 .long 0
524 .byte 0,12,0x14,0,0,0,3,0
525 .long 0
526 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
529 &gen_block("en");
530 &gen_block("de");
532 #########################################################################
533 {{{ # CBC en- and decrypt procedures #
534 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
535 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
536 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
537 map("v$_",(4..10));
538 $code.=<<___;
539 .globl .${prefix}_cbc_encrypt
540 ${UCMP}i $len,16
541 bltlr-
543 cmpwi $enc,0 # test direction
544 lis r0,0xffe0
545 mfspr $vrsave,256
546 mtspr 256,r0
548 li $idx,15
549 vxor $rndkey0,$rndkey0,$rndkey0
550 le?vspltisb $tmp,0x0f
552 lvx $ivec,0,$ivp # load [unaligned] iv
553 lvsl $inpperm,0,$ivp
554 lvx $inptail,$idx,$ivp
555 le?vxor $inpperm,$inpperm,$tmp
556 vperm $ivec,$ivec,$inptail,$inpperm
558 neg r11,$inp
559 ?lvsl $keyperm,0,$key # prepare for unaligned key
560 lwz $rounds,240($key)
562 lvsr $inpperm,0,r11 # prepare for unaligned load
563 lvx $inptail,0,$inp
564 addi $inp,$inp,15 # 15 is not typo
565 le?vxor $inpperm,$inpperm,$tmp
567 ?lvsr $outperm,0,$out # prepare for unaligned store
568 vspltisb $outmask,-1
569 lvx $outhead,0,$out
570 ?vperm $outmask,$rndkey0,$outmask,$outperm
571 le?vxor $outperm,$outperm,$tmp
573 srwi $rounds,$rounds,1
574 li $idx,16
575 subi $rounds,$rounds,1
576 beq Lcbc_dec
578 Lcbc_enc:
579 vmr $inout,$inptail
580 lvx $inptail,0,$inp
581 addi $inp,$inp,16
582 mtctr $rounds
583 subi $len,$len,16 # len-=16
585 lvx $rndkey0,0,$key
586 vperm $inout,$inout,$inptail,$inpperm
587 lvx $rndkey1,$idx,$key
588 addi $idx,$idx,16
589 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
590 vxor $inout,$inout,$rndkey0
591 lvx $rndkey0,$idx,$key
592 addi $idx,$idx,16
593 vxor $inout,$inout,$ivec
595 Loop_cbc_enc:
596 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
597 vcipher $inout,$inout,$rndkey1
598 lvx $rndkey1,$idx,$key
599 addi $idx,$idx,16
600 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
601 vcipher $inout,$inout,$rndkey0
602 lvx $rndkey0,$idx,$key
603 addi $idx,$idx,16
604 bdnz Loop_cbc_enc
606 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
607 vcipher $inout,$inout,$rndkey1
608 lvx $rndkey1,$idx,$key
609 li $idx,16
610 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
611 vcipherlast $ivec,$inout,$rndkey0
612 ${UCMP}i $len,16
614 vperm $tmp,$ivec,$ivec,$outperm
615 vsel $inout,$outhead,$tmp,$outmask
616 vmr $outhead,$tmp
617 stvx $inout,0,$out
618 addi $out,$out,16
619 bge Lcbc_enc
621 b Lcbc_done
623 .align 4
624 Lcbc_dec:
625 ${UCMP}i $len,128
626 bge _aesp8_cbc_decrypt8x
627 vmr $tmp,$inptail
628 lvx $inptail,0,$inp
629 addi $inp,$inp,16
630 mtctr $rounds
631 subi $len,$len,16 # len-=16
633 lvx $rndkey0,0,$key
634 vperm $tmp,$tmp,$inptail,$inpperm
635 lvx $rndkey1,$idx,$key
636 addi $idx,$idx,16
637 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
638 vxor $inout,$tmp,$rndkey0
639 lvx $rndkey0,$idx,$key
640 addi $idx,$idx,16
642 Loop_cbc_dec:
643 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
644 vncipher $inout,$inout,$rndkey1
645 lvx $rndkey1,$idx,$key
646 addi $idx,$idx,16
647 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
648 vncipher $inout,$inout,$rndkey0
649 lvx $rndkey0,$idx,$key
650 addi $idx,$idx,16
651 bdnz Loop_cbc_dec
653 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
654 vncipher $inout,$inout,$rndkey1
655 lvx $rndkey1,$idx,$key
656 li $idx,16
657 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
658 vncipherlast $inout,$inout,$rndkey0
659 ${UCMP}i $len,16
661 vxor $inout,$inout,$ivec
662 vmr $ivec,$tmp
663 vperm $tmp,$inout,$inout,$outperm
664 vsel $inout,$outhead,$tmp,$outmask
665 vmr $outhead,$tmp
666 stvx $inout,0,$out
667 addi $out,$out,16
668 bge Lcbc_dec
670 Lcbc_done:
671 addi $out,$out,-1
672 lvx $inout,0,$out # redundant in aligned case
673 vsel $inout,$outhead,$inout,$outmask
674 stvx $inout,0,$out
676 neg $enc,$ivp # write [unaligned] iv
677 li $idx,15 # 15 is not typo
678 vxor $rndkey0,$rndkey0,$rndkey0
679 vspltisb $outmask,-1
680 le?vspltisb $tmp,0x0f
681 ?lvsl $outperm,0,$enc
682 ?vperm $outmask,$rndkey0,$outmask,$outperm
683 le?vxor $outperm,$outperm,$tmp
684 lvx $outhead,0,$ivp
685 vperm $ivec,$ivec,$ivec,$outperm
686 vsel $inout,$outhead,$ivec,$outmask
687 lvx $inptail,$idx,$ivp
688 stvx $inout,0,$ivp
689 vsel $inout,$ivec,$inptail,$outmask
690 stvx $inout,$idx,$ivp
692 mtspr 256,$vrsave
694 .long 0
695 .byte 0,12,0x14,0,0,0,6,0
696 .long 0
698 #########################################################################
699 {{ # Optimized CBC decrypt procedure #
700 my $key_="r11";
701 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
702 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
703 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
704 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
705 # v26-v31 last 6 round keys
706 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
708 $code.=<<___;
709 .align 5
710 _aesp8_cbc_decrypt8x:
711 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
712 li r10,`$FRAME+8*16+15`
713 li r11,`$FRAME+8*16+31`
714 stvx v20,r10,$sp # ABI says so
715 addi r10,r10,32
716 stvx v21,r11,$sp
717 addi r11,r11,32
718 stvx v22,r10,$sp
719 addi r10,r10,32
720 stvx v23,r11,$sp
721 addi r11,r11,32
722 stvx v24,r10,$sp
723 addi r10,r10,32
724 stvx v25,r11,$sp
725 addi r11,r11,32
726 stvx v26,r10,$sp
727 addi r10,r10,32
728 stvx v27,r11,$sp
729 addi r11,r11,32
730 stvx v28,r10,$sp
731 addi r10,r10,32
732 stvx v29,r11,$sp
733 addi r11,r11,32
734 stvx v30,r10,$sp
735 stvx v31,r11,$sp
736 li r0,-1
737 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
738 li $x10,0x10
739 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
740 li $x20,0x20
741 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
742 li $x30,0x30
743 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
744 li $x40,0x40
745 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
746 li $x50,0x50
747 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
748 li $x60,0x60
749 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
750 li $x70,0x70
751 mtspr 256,r0
753 subi $rounds,$rounds,3 # -4 in total
754 subi $len,$len,128 # bias
756 lvx $rndkey0,$x00,$key # load key schedule
757 lvx v30,$x10,$key
758 addi $key,$key,0x20
759 lvx v31,$x00,$key
760 ?vperm $rndkey0,$rndkey0,v30,$keyperm
761 addi $key_,$sp,$FRAME+15
762 mtctr $rounds
764 Load_cbc_dec_key:
765 ?vperm v24,v30,v31,$keyperm
766 lvx v30,$x10,$key
767 addi $key,$key,0x20
768 stvx v24,$x00,$key_ # off-load round[1]
769 ?vperm v25,v31,v30,$keyperm
770 lvx v31,$x00,$key
771 stvx v25,$x10,$key_ # off-load round[2]
772 addi $key_,$key_,0x20
773 bdnz Load_cbc_dec_key
775 lvx v26,$x10,$key
776 ?vperm v24,v30,v31,$keyperm
777 lvx v27,$x20,$key
778 stvx v24,$x00,$key_ # off-load round[3]
779 ?vperm v25,v31,v26,$keyperm
780 lvx v28,$x30,$key
781 stvx v25,$x10,$key_ # off-load round[4]
782 addi $key_,$sp,$FRAME+15 # rewind $key_
783 ?vperm v26,v26,v27,$keyperm
784 lvx v29,$x40,$key
785 ?vperm v27,v27,v28,$keyperm
786 lvx v30,$x50,$key
787 ?vperm v28,v28,v29,$keyperm
788 lvx v31,$x60,$key
789 ?vperm v29,v29,v30,$keyperm
790 lvx $out0,$x70,$key # borrow $out0
791 ?vperm v30,v30,v31,$keyperm
792 lvx v24,$x00,$key_ # pre-load round[1]
793 ?vperm v31,v31,$out0,$keyperm
794 lvx v25,$x10,$key_ # pre-load round[2]
796 #lvx $inptail,0,$inp # "caller" already did this
797 #addi $inp,$inp,15 # 15 is not typo
798 subi $inp,$inp,15 # undo "caller"
800 le?li $idx,8
801 lvx_u $in0,$x00,$inp # load first 8 "words"
802 le?lvsl $inpperm,0,$idx
803 le?vspltisb $tmp,0x0f
804 lvx_u $in1,$x10,$inp
805 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
806 lvx_u $in2,$x20,$inp
807 le?vperm $in0,$in0,$in0,$inpperm
808 lvx_u $in3,$x30,$inp
809 le?vperm $in1,$in1,$in1,$inpperm
810 lvx_u $in4,$x40,$inp
811 le?vperm $in2,$in2,$in2,$inpperm
812 vxor $out0,$in0,$rndkey0
813 lvx_u $in5,$x50,$inp
814 le?vperm $in3,$in3,$in3,$inpperm
815 vxor $out1,$in1,$rndkey0
816 lvx_u $in6,$x60,$inp
817 le?vperm $in4,$in4,$in4,$inpperm
818 vxor $out2,$in2,$rndkey0
819 lvx_u $in7,$x70,$inp
820 addi $inp,$inp,0x80
821 le?vperm $in5,$in5,$in5,$inpperm
822 vxor $out3,$in3,$rndkey0
823 le?vperm $in6,$in6,$in6,$inpperm
824 vxor $out4,$in4,$rndkey0
825 le?vperm $in7,$in7,$in7,$inpperm
826 vxor $out5,$in5,$rndkey0
827 vxor $out6,$in6,$rndkey0
828 vxor $out7,$in7,$rndkey0
830 mtctr $rounds
831 b Loop_cbc_dec8x
832 .align 5
833 Loop_cbc_dec8x:
834 vncipher $out0,$out0,v24
835 vncipher $out1,$out1,v24
836 vncipher $out2,$out2,v24
837 vncipher $out3,$out3,v24
838 vncipher $out4,$out4,v24
839 vncipher $out5,$out5,v24
840 vncipher $out6,$out6,v24
841 vncipher $out7,$out7,v24
842 lvx v24,$x20,$key_ # round[3]
843 addi $key_,$key_,0x20
845 vncipher $out0,$out0,v25
846 vncipher $out1,$out1,v25
847 vncipher $out2,$out2,v25
848 vncipher $out3,$out3,v25
849 vncipher $out4,$out4,v25
850 vncipher $out5,$out5,v25
851 vncipher $out6,$out6,v25
852 vncipher $out7,$out7,v25
853 lvx v25,$x10,$key_ # round[4]
854 bdnz Loop_cbc_dec8x
856 subic $len,$len,128 # $len-=128
857 vncipher $out0,$out0,v24
858 vncipher $out1,$out1,v24
859 vncipher $out2,$out2,v24
860 vncipher $out3,$out3,v24
861 vncipher $out4,$out4,v24
862 vncipher $out5,$out5,v24
863 vncipher $out6,$out6,v24
864 vncipher $out7,$out7,v24
866 subfe. r0,r0,r0 # borrow?-1:0
867 vncipher $out0,$out0,v25
868 vncipher $out1,$out1,v25
869 vncipher $out2,$out2,v25
870 vncipher $out3,$out3,v25
871 vncipher $out4,$out4,v25
872 vncipher $out5,$out5,v25
873 vncipher $out6,$out6,v25
874 vncipher $out7,$out7,v25
876 and r0,r0,$len
877 vncipher $out0,$out0,v26
878 vncipher $out1,$out1,v26
879 vncipher $out2,$out2,v26
880 vncipher $out3,$out3,v26
881 vncipher $out4,$out4,v26
882 vncipher $out5,$out5,v26
883 vncipher $out6,$out6,v26
884 vncipher $out7,$out7,v26
886 add $inp,$inp,r0 # $inp is adjusted in such
887 # way that at exit from the
888 # loop inX-in7 are loaded
889 # with last "words"
890 vncipher $out0,$out0,v27
891 vncipher $out1,$out1,v27
892 vncipher $out2,$out2,v27
893 vncipher $out3,$out3,v27
894 vncipher $out4,$out4,v27
895 vncipher $out5,$out5,v27
896 vncipher $out6,$out6,v27
897 vncipher $out7,$out7,v27
899 addi $key_,$sp,$FRAME+15 # rewind $key_
900 vncipher $out0,$out0,v28
901 vncipher $out1,$out1,v28
902 vncipher $out2,$out2,v28
903 vncipher $out3,$out3,v28
904 vncipher $out4,$out4,v28
905 vncipher $out5,$out5,v28
906 vncipher $out6,$out6,v28
907 vncipher $out7,$out7,v28
908 lvx v24,$x00,$key_ # re-pre-load round[1]
910 vncipher $out0,$out0,v29
911 vncipher $out1,$out1,v29
912 vncipher $out2,$out2,v29
913 vncipher $out3,$out3,v29
914 vncipher $out4,$out4,v29
915 vncipher $out5,$out5,v29
916 vncipher $out6,$out6,v29
917 vncipher $out7,$out7,v29
918 lvx v25,$x10,$key_ # re-pre-load round[2]
920 vncipher $out0,$out0,v30
921 vxor $ivec,$ivec,v31 # xor with last round key
922 vncipher $out1,$out1,v30
923 vxor $in0,$in0,v31
924 vncipher $out2,$out2,v30
925 vxor $in1,$in1,v31
926 vncipher $out3,$out3,v30
927 vxor $in2,$in2,v31
928 vncipher $out4,$out4,v30
929 vxor $in3,$in3,v31
930 vncipher $out5,$out5,v30
931 vxor $in4,$in4,v31
932 vncipher $out6,$out6,v30
933 vxor $in5,$in5,v31
934 vncipher $out7,$out7,v30
935 vxor $in6,$in6,v31
937 vncipherlast $out0,$out0,$ivec
938 vncipherlast $out1,$out1,$in0
939 lvx_u $in0,$x00,$inp # load next input block
940 vncipherlast $out2,$out2,$in1
941 lvx_u $in1,$x10,$inp
942 vncipherlast $out3,$out3,$in2
943 le?vperm $in0,$in0,$in0,$inpperm
944 lvx_u $in2,$x20,$inp
945 vncipherlast $out4,$out4,$in3
946 le?vperm $in1,$in1,$in1,$inpperm
947 lvx_u $in3,$x30,$inp
948 vncipherlast $out5,$out5,$in4
949 le?vperm $in2,$in2,$in2,$inpperm
950 lvx_u $in4,$x40,$inp
951 vncipherlast $out6,$out6,$in5
952 le?vperm $in3,$in3,$in3,$inpperm
953 lvx_u $in5,$x50,$inp
954 vncipherlast $out7,$out7,$in6
955 le?vperm $in4,$in4,$in4,$inpperm
956 lvx_u $in6,$x60,$inp
957 vmr $ivec,$in7
958 le?vperm $in5,$in5,$in5,$inpperm
959 lvx_u $in7,$x70,$inp
960 addi $inp,$inp,0x80
962 le?vperm $out0,$out0,$out0,$inpperm
963 le?vperm $out1,$out1,$out1,$inpperm
964 stvx_u $out0,$x00,$out
965 le?vperm $in6,$in6,$in6,$inpperm
966 vxor $out0,$in0,$rndkey0
967 le?vperm $out2,$out2,$out2,$inpperm
968 stvx_u $out1,$x10,$out
969 le?vperm $in7,$in7,$in7,$inpperm
970 vxor $out1,$in1,$rndkey0
971 le?vperm $out3,$out3,$out3,$inpperm
972 stvx_u $out2,$x20,$out
973 vxor $out2,$in2,$rndkey0
974 le?vperm $out4,$out4,$out4,$inpperm
975 stvx_u $out3,$x30,$out
976 vxor $out3,$in3,$rndkey0
977 le?vperm $out5,$out5,$out5,$inpperm
978 stvx_u $out4,$x40,$out
979 vxor $out4,$in4,$rndkey0
980 le?vperm $out6,$out6,$out6,$inpperm
981 stvx_u $out5,$x50,$out
982 vxor $out5,$in5,$rndkey0
983 le?vperm $out7,$out7,$out7,$inpperm
984 stvx_u $out6,$x60,$out
985 vxor $out6,$in6,$rndkey0
986 stvx_u $out7,$x70,$out
987 addi $out,$out,0x80
988 vxor $out7,$in7,$rndkey0
990 mtctr $rounds
991 beq Loop_cbc_dec8x # did $len-=128 borrow?
993 addic. $len,$len,128
994 beq Lcbc_dec8x_done
998 Loop_cbc_dec8x_tail: # up to 7 "words" tail...
999 vncipher $out1,$out1,v24
1000 vncipher $out2,$out2,v24
1001 vncipher $out3,$out3,v24
1002 vncipher $out4,$out4,v24
1003 vncipher $out5,$out5,v24
1004 vncipher $out6,$out6,v24
1005 vncipher $out7,$out7,v24
1006 lvx v24,$x20,$key_ # round[3]
1007 addi $key_,$key_,0x20
1009 vncipher $out1,$out1,v25
1010 vncipher $out2,$out2,v25
1011 vncipher $out3,$out3,v25
1012 vncipher $out4,$out4,v25
1013 vncipher $out5,$out5,v25
1014 vncipher $out6,$out6,v25
1015 vncipher $out7,$out7,v25
1016 lvx v25,$x10,$key_ # round[4]
1017 bdnz Loop_cbc_dec8x_tail
1019 vncipher $out1,$out1,v24
1020 vncipher $out2,$out2,v24
1021 vncipher $out3,$out3,v24
1022 vncipher $out4,$out4,v24
1023 vncipher $out5,$out5,v24
1024 vncipher $out6,$out6,v24
1025 vncipher $out7,$out7,v24
1027 vncipher $out1,$out1,v25
1028 vncipher $out2,$out2,v25
1029 vncipher $out3,$out3,v25
1030 vncipher $out4,$out4,v25
1031 vncipher $out5,$out5,v25
1032 vncipher $out6,$out6,v25
1033 vncipher $out7,$out7,v25
1035 vncipher $out1,$out1,v26
1036 vncipher $out2,$out2,v26
1037 vncipher $out3,$out3,v26
1038 vncipher $out4,$out4,v26
1039 vncipher $out5,$out5,v26
1040 vncipher $out6,$out6,v26
1041 vncipher $out7,$out7,v26
1043 vncipher $out1,$out1,v27
1044 vncipher $out2,$out2,v27
1045 vncipher $out3,$out3,v27
1046 vncipher $out4,$out4,v27
1047 vncipher $out5,$out5,v27
1048 vncipher $out6,$out6,v27
1049 vncipher $out7,$out7,v27
1051 vncipher $out1,$out1,v28
1052 vncipher $out2,$out2,v28
1053 vncipher $out3,$out3,v28
1054 vncipher $out4,$out4,v28
1055 vncipher $out5,$out5,v28
1056 vncipher $out6,$out6,v28
1057 vncipher $out7,$out7,v28
1059 vncipher $out1,$out1,v29
1060 vncipher $out2,$out2,v29
1061 vncipher $out3,$out3,v29
1062 vncipher $out4,$out4,v29
1063 vncipher $out5,$out5,v29
1064 vncipher $out6,$out6,v29
1065 vncipher $out7,$out7,v29
1067 vncipher $out1,$out1,v30
1068 vxor $ivec,$ivec,v31 # last round key
1069 vncipher $out2,$out2,v30
1070 vxor $in1,$in1,v31
1071 vncipher $out3,$out3,v30
1072 vxor $in2,$in2,v31
1073 vncipher $out4,$out4,v30
1074 vxor $in3,$in3,v31
1075 vncipher $out5,$out5,v30
1076 vxor $in4,$in4,v31
1077 vncipher $out6,$out6,v30
1078 vxor $in5,$in5,v31
1079 vncipher $out7,$out7,v30
1080 vxor $in6,$in6,v31
1082 cmplwi $len,32 # switch($len)
1083 blt Lcbc_dec8x_one
1085 beq Lcbc_dec8x_two
1086 cmplwi $len,64
1087 blt Lcbc_dec8x_three
1089 beq Lcbc_dec8x_four
1090 cmplwi $len,96
1091 blt Lcbc_dec8x_five
1093 beq Lcbc_dec8x_six
1095 Lcbc_dec8x_seven:
1096 vncipherlast $out1,$out1,$ivec
1097 vncipherlast $out2,$out2,$in1
1098 vncipherlast $out3,$out3,$in2
1099 vncipherlast $out4,$out4,$in3
1100 vncipherlast $out5,$out5,$in4
1101 vncipherlast $out6,$out6,$in5
1102 vncipherlast $out7,$out7,$in6
1103 vmr $ivec,$in7
1105 le?vperm $out1,$out1,$out1,$inpperm
1106 le?vperm $out2,$out2,$out2,$inpperm
1107 stvx_u $out1,$x00,$out
1108 le?vperm $out3,$out3,$out3,$inpperm
1109 stvx_u $out2,$x10,$out
1110 le?vperm $out4,$out4,$out4,$inpperm
1111 stvx_u $out3,$x20,$out
1112 le?vperm $out5,$out5,$out5,$inpperm
1113 stvx_u $out4,$x30,$out
1114 le?vperm $out6,$out6,$out6,$inpperm
1115 stvx_u $out5,$x40,$out
1116 le?vperm $out7,$out7,$out7,$inpperm
1117 stvx_u $out6,$x50,$out
1118 stvx_u $out7,$x60,$out
1119 addi $out,$out,0x70
1120 b Lcbc_dec8x_done
1122 .align 5
1123 Lcbc_dec8x_six:
1124 vncipherlast $out2,$out2,$ivec
1125 vncipherlast $out3,$out3,$in2
1126 vncipherlast $out4,$out4,$in3
1127 vncipherlast $out5,$out5,$in4
1128 vncipherlast $out6,$out6,$in5
1129 vncipherlast $out7,$out7,$in6
1130 vmr $ivec,$in7
1132 le?vperm $out2,$out2,$out2,$inpperm
1133 le?vperm $out3,$out3,$out3,$inpperm
1134 stvx_u $out2,$x00,$out
1135 le?vperm $out4,$out4,$out4,$inpperm
1136 stvx_u $out3,$x10,$out
1137 le?vperm $out5,$out5,$out5,$inpperm
1138 stvx_u $out4,$x20,$out
1139 le?vperm $out6,$out6,$out6,$inpperm
1140 stvx_u $out5,$x30,$out
1141 le?vperm $out7,$out7,$out7,$inpperm
1142 stvx_u $out6,$x40,$out
1143 stvx_u $out7,$x50,$out
1144 addi $out,$out,0x60
1145 b Lcbc_dec8x_done
1147 .align 5
1148 Lcbc_dec8x_five:
1149 vncipherlast $out3,$out3,$ivec
1150 vncipherlast $out4,$out4,$in3
1151 vncipherlast $out5,$out5,$in4
1152 vncipherlast $out6,$out6,$in5
1153 vncipherlast $out7,$out7,$in6
1154 vmr $ivec,$in7
1156 le?vperm $out3,$out3,$out3,$inpperm
1157 le?vperm $out4,$out4,$out4,$inpperm
1158 stvx_u $out3,$x00,$out
1159 le?vperm $out5,$out5,$out5,$inpperm
1160 stvx_u $out4,$x10,$out
1161 le?vperm $out6,$out6,$out6,$inpperm
1162 stvx_u $out5,$x20,$out
1163 le?vperm $out7,$out7,$out7,$inpperm
1164 stvx_u $out6,$x30,$out
1165 stvx_u $out7,$x40,$out
1166 addi $out,$out,0x50
1167 b Lcbc_dec8x_done
1169 .align 5
1170 Lcbc_dec8x_four:
1171 vncipherlast $out4,$out4,$ivec
1172 vncipherlast $out5,$out5,$in4
1173 vncipherlast $out6,$out6,$in5
1174 vncipherlast $out7,$out7,$in6
1175 vmr $ivec,$in7
1177 le?vperm $out4,$out4,$out4,$inpperm
1178 le?vperm $out5,$out5,$out5,$inpperm
1179 stvx_u $out4,$x00,$out
1180 le?vperm $out6,$out6,$out6,$inpperm
1181 stvx_u $out5,$x10,$out
1182 le?vperm $out7,$out7,$out7,$inpperm
1183 stvx_u $out6,$x20,$out
1184 stvx_u $out7,$x30,$out
1185 addi $out,$out,0x40
1186 b Lcbc_dec8x_done
1188 .align 5
1189 Lcbc_dec8x_three:
1190 vncipherlast $out5,$out5,$ivec
1191 vncipherlast $out6,$out6,$in5
1192 vncipherlast $out7,$out7,$in6
1193 vmr $ivec,$in7
1195 le?vperm $out5,$out5,$out5,$inpperm
1196 le?vperm $out6,$out6,$out6,$inpperm
1197 stvx_u $out5,$x00,$out
1198 le?vperm $out7,$out7,$out7,$inpperm
1199 stvx_u $out6,$x10,$out
1200 stvx_u $out7,$x20,$out
1201 addi $out,$out,0x30
1202 b Lcbc_dec8x_done
1204 .align 5
1205 Lcbc_dec8x_two:
1206 vncipherlast $out6,$out6,$ivec
1207 vncipherlast $out7,$out7,$in6
1208 vmr $ivec,$in7
1210 le?vperm $out6,$out6,$out6,$inpperm
1211 le?vperm $out7,$out7,$out7,$inpperm
1212 stvx_u $out6,$x00,$out
1213 stvx_u $out7,$x10,$out
1214 addi $out,$out,0x20
1215 b Lcbc_dec8x_done
1217 .align 5
1218 Lcbc_dec8x_one:
1219 vncipherlast $out7,$out7,$ivec
1220 vmr $ivec,$in7
1222 le?vperm $out7,$out7,$out7,$inpperm
1223 stvx_u $out7,0,$out
1224 addi $out,$out,0x10
1226 Lcbc_dec8x_done:
1227 le?vperm $ivec,$ivec,$ivec,$inpperm
1228 stvx_u $ivec,0,$ivp # write [unaligned] iv
1230 li r10,`$FRAME+15`
1231 li r11,`$FRAME+31`
1232 stvx $inpperm,r10,$sp # wipe copies of round keys
1233 addi r10,r10,32
1234 stvx $inpperm,r11,$sp
1235 addi r11,r11,32
1236 stvx $inpperm,r10,$sp
1237 addi r10,r10,32
1238 stvx $inpperm,r11,$sp
1239 addi r11,r11,32
1240 stvx $inpperm,r10,$sp
1241 addi r10,r10,32
1242 stvx $inpperm,r11,$sp
1243 addi r11,r11,32
1244 stvx $inpperm,r10,$sp
1245 addi r10,r10,32
1246 stvx $inpperm,r11,$sp
1247 addi r11,r11,32
1249 mtspr 256,$vrsave
1250 lvx v20,r10,$sp # ABI says so
1251 addi r10,r10,32
1252 lvx v21,r11,$sp
1253 addi r11,r11,32
1254 lvx v22,r10,$sp
1255 addi r10,r10,32
1256 lvx v23,r11,$sp
1257 addi r11,r11,32
1258 lvx v24,r10,$sp
1259 addi r10,r10,32
1260 lvx v25,r11,$sp
1261 addi r11,r11,32
1262 lvx v26,r10,$sp
1263 addi r10,r10,32
1264 lvx v27,r11,$sp
1265 addi r11,r11,32
1266 lvx v28,r10,$sp
1267 addi r10,r10,32
1268 lvx v29,r11,$sp
1269 addi r11,r11,32
1270 lvx v30,r10,$sp
1271 lvx v31,r11,$sp
1272 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1273 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1274 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1275 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1276 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1277 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1278 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1280 .long 0
1281 .byte 0,12,0x14,0,0x80,6,6,0
1282 .long 0
1283 .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1285 }} }}}
1287 #########################################################################
1288 {{{ # CTR procedure[s] #
1289 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1290 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1291 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1292 map("v$_",(4..11));
1293 my $dat=$tmp;
1295 $code.=<<___;
1296 .globl .${prefix}_ctr32_encrypt_blocks
1297 ${UCMP}i $len,1
1298 bltlr-
1300 lis r0,0xfff0
1301 mfspr $vrsave,256
1302 mtspr 256,r0
1304 li $idx,15
1305 vxor $rndkey0,$rndkey0,$rndkey0
1306 le?vspltisb $tmp,0x0f
1308 lvx $ivec,0,$ivp # load [unaligned] iv
1309 lvsl $inpperm,0,$ivp
1310 lvx $inptail,$idx,$ivp
1311 vspltisb $one,1
1312 le?vxor $inpperm,$inpperm,$tmp
1313 vperm $ivec,$ivec,$inptail,$inpperm
1314 vsldoi $one,$rndkey0,$one,1
1316 neg r11,$inp
1317 ?lvsl $keyperm,0,$key # prepare for unaligned key
1318 lwz $rounds,240($key)
1320 lvsr $inpperm,0,r11 # prepare for unaligned load
1321 lvx $inptail,0,$inp
1322 addi $inp,$inp,15 # 15 is not typo
1323 le?vxor $inpperm,$inpperm,$tmp
1325 srwi $rounds,$rounds,1
1326 li $idx,16
1327 subi $rounds,$rounds,1
1329 ${UCMP}i $len,8
1330 bge _aesp8_ctr32_encrypt8x
1332 ?lvsr $outperm,0,$out # prepare for unaligned store
1333 vspltisb $outmask,-1
1334 lvx $outhead,0,$out
1335 ?vperm $outmask,$rndkey0,$outmask,$outperm
1336 le?vxor $outperm,$outperm,$tmp
1338 lvx $rndkey0,0,$key
1339 mtctr $rounds
1340 lvx $rndkey1,$idx,$key
1341 addi $idx,$idx,16
1342 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1343 vxor $inout,$ivec,$rndkey0
1344 lvx $rndkey0,$idx,$key
1345 addi $idx,$idx,16
1346 b Loop_ctr32_enc
1348 .align 5
1349 Loop_ctr32_enc:
1350 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1351 vcipher $inout,$inout,$rndkey1
1352 lvx $rndkey1,$idx,$key
1353 addi $idx,$idx,16
1354 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1355 vcipher $inout,$inout,$rndkey0
1356 lvx $rndkey0,$idx,$key
1357 addi $idx,$idx,16
1358 bdnz Loop_ctr32_enc
1360 vadduqm $ivec,$ivec,$one
1361 vmr $dat,$inptail
1362 lvx $inptail,0,$inp
1363 addi $inp,$inp,16
1364 subic. $len,$len,1 # blocks--
1366 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1367 vcipher $inout,$inout,$rndkey1
1368 lvx $rndkey1,$idx,$key
1369 vperm $dat,$dat,$inptail,$inpperm
1370 li $idx,16
1371 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1372 lvx $rndkey0,0,$key
1373 vxor $dat,$dat,$rndkey1 # last round key
1374 vcipherlast $inout,$inout,$dat
1376 lvx $rndkey1,$idx,$key
1377 addi $idx,$idx,16
1378 vperm $inout,$inout,$inout,$outperm
1379 vsel $dat,$outhead,$inout,$outmask
1380 mtctr $rounds
1381 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1382 vmr $outhead,$inout
1383 vxor $inout,$ivec,$rndkey0
1384 lvx $rndkey0,$idx,$key
1385 addi $idx,$idx,16
1386 stvx $dat,0,$out
1387 addi $out,$out,16
1388 bne Loop_ctr32_enc
1390 addi $out,$out,-1
1391 lvx $inout,0,$out # redundant in aligned case
1392 vsel $inout,$outhead,$inout,$outmask
1393 stvx $inout,0,$out
1395 mtspr 256,$vrsave
1397 .long 0
1398 .byte 0,12,0x14,0,0,0,6,0
1399 .long 0
1401 #########################################################################
1402 {{ # Optimized CTR procedure #
1403 my $key_="r11";
1404 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1405 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1406 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1407 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1408 # v26-v31 last 6 round keys
1409 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1410 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1412 $code.=<<___;
1413 .align 5
1414 _aesp8_ctr32_encrypt8x:
1415 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1416 li r10,`$FRAME+8*16+15`
1417 li r11,`$FRAME+8*16+31`
1418 stvx v20,r10,$sp # ABI says so
1419 addi r10,r10,32
1420 stvx v21,r11,$sp
1421 addi r11,r11,32
1422 stvx v22,r10,$sp
1423 addi r10,r10,32
1424 stvx v23,r11,$sp
1425 addi r11,r11,32
1426 stvx v24,r10,$sp
1427 addi r10,r10,32
1428 stvx v25,r11,$sp
1429 addi r11,r11,32
1430 stvx v26,r10,$sp
1431 addi r10,r10,32
1432 stvx v27,r11,$sp
1433 addi r11,r11,32
1434 stvx v28,r10,$sp
1435 addi r10,r10,32
1436 stvx v29,r11,$sp
1437 addi r11,r11,32
1438 stvx v30,r10,$sp
1439 stvx v31,r11,$sp
1440 li r0,-1
1441 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1442 li $x10,0x10
1443 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1444 li $x20,0x20
1445 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1446 li $x30,0x30
1447 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1448 li $x40,0x40
1449 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1450 li $x50,0x50
1451 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1452 li $x60,0x60
1453 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1454 li $x70,0x70
1455 mtspr 256,r0
1457 subi $rounds,$rounds,3 # -4 in total
1459 lvx $rndkey0,$x00,$key # load key schedule
1460 lvx v30,$x10,$key
1461 addi $key,$key,0x20
1462 lvx v31,$x00,$key
1463 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1464 addi $key_,$sp,$FRAME+15
1465 mtctr $rounds
1467 Load_ctr32_enc_key:
1468 ?vperm v24,v30,v31,$keyperm
1469 lvx v30,$x10,$key
1470 addi $key,$key,0x20
1471 stvx v24,$x00,$key_ # off-load round[1]
1472 ?vperm v25,v31,v30,$keyperm
1473 lvx v31,$x00,$key
1474 stvx v25,$x10,$key_ # off-load round[2]
1475 addi $key_,$key_,0x20
1476 bdnz Load_ctr32_enc_key
1478 lvx v26,$x10,$key
1479 ?vperm v24,v30,v31,$keyperm
1480 lvx v27,$x20,$key
1481 stvx v24,$x00,$key_ # off-load round[3]
1482 ?vperm v25,v31,v26,$keyperm
1483 lvx v28,$x30,$key
1484 stvx v25,$x10,$key_ # off-load round[4]
1485 addi $key_,$sp,$FRAME+15 # rewind $key_
1486 ?vperm v26,v26,v27,$keyperm
1487 lvx v29,$x40,$key
1488 ?vperm v27,v27,v28,$keyperm
1489 lvx v30,$x50,$key
1490 ?vperm v28,v28,v29,$keyperm
1491 lvx v31,$x60,$key
1492 ?vperm v29,v29,v30,$keyperm
1493 lvx $out0,$x70,$key # borrow $out0
1494 ?vperm v30,v30,v31,$keyperm
1495 lvx v24,$x00,$key_ # pre-load round[1]
1496 ?vperm v31,v31,$out0,$keyperm
1497 lvx v25,$x10,$key_ # pre-load round[2]
1499 vadduqm $two,$one,$one
1500 subi $inp,$inp,15 # undo "caller"
1501 $SHL $len,$len,4
1503 vadduqm $out1,$ivec,$one # counter values ...
1504 vadduqm $out2,$ivec,$two
1505 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1506 le?li $idx,8
1507 vadduqm $out3,$out1,$two
1508 vxor $out1,$out1,$rndkey0
1509 le?lvsl $inpperm,0,$idx
1510 vadduqm $out4,$out2,$two
1511 vxor $out2,$out2,$rndkey0
1512 le?vspltisb $tmp,0x0f
1513 vadduqm $out5,$out3,$two
1514 vxor $out3,$out3,$rndkey0
1515 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1516 vadduqm $out6,$out4,$two
1517 vxor $out4,$out4,$rndkey0
1518 vadduqm $out7,$out5,$two
1519 vxor $out5,$out5,$rndkey0
1520 vadduqm $ivec,$out6,$two # next counter value
1521 vxor $out6,$out6,$rndkey0
1522 vxor $out7,$out7,$rndkey0
1524 mtctr $rounds
1525 b Loop_ctr32_enc8x
1526 .align 5
1527 Loop_ctr32_enc8x:
1528 vcipher $out0,$out0,v24
1529 vcipher $out1,$out1,v24
1530 vcipher $out2,$out2,v24
1531 vcipher $out3,$out3,v24
1532 vcipher $out4,$out4,v24
1533 vcipher $out5,$out5,v24
1534 vcipher $out6,$out6,v24
1535 vcipher $out7,$out7,v24
1536 Loop_ctr32_enc8x_middle:
1537 lvx v24,$x20,$key_ # round[3]
1538 addi $key_,$key_,0x20
1540 vcipher $out0,$out0,v25
1541 vcipher $out1,$out1,v25
1542 vcipher $out2,$out2,v25
1543 vcipher $out3,$out3,v25
1544 vcipher $out4,$out4,v25
1545 vcipher $out5,$out5,v25
1546 vcipher $out6,$out6,v25
1547 vcipher $out7,$out7,v25
1548 lvx v25,$x10,$key_ # round[4]
1549 bdnz Loop_ctr32_enc8x
1551 subic r11,$len,256 # $len-256, borrow $key_
1552 vcipher $out0,$out0,v24
1553 vcipher $out1,$out1,v24
1554 vcipher $out2,$out2,v24
1555 vcipher $out3,$out3,v24
1556 vcipher $out4,$out4,v24
1557 vcipher $out5,$out5,v24
1558 vcipher $out6,$out6,v24
1559 vcipher $out7,$out7,v24
1561 subfe r0,r0,r0 # borrow?-1:0
1562 vcipher $out0,$out0,v25
1563 vcipher $out1,$out1,v25
1564 vcipher $out2,$out2,v25
1565 vcipher $out3,$out3,v25
1566 vcipher $out4,$out4,v25
1567 vcipher $out5,$out5,v25
1568 vcipher $out6,$out6,v25
1569 vcipher $out7,$out7,v25
1571 and r0,r0,r11
1572 addi $key_,$sp,$FRAME+15 # rewind $key_
1573 vcipher $out0,$out0,v26
1574 vcipher $out1,$out1,v26
1575 vcipher $out2,$out2,v26
1576 vcipher $out3,$out3,v26
1577 vcipher $out4,$out4,v26
1578 vcipher $out5,$out5,v26
1579 vcipher $out6,$out6,v26
1580 vcipher $out7,$out7,v26
1581 lvx v24,$x00,$key_ # re-pre-load round[1]
1583 subic $len,$len,129 # $len-=129
1584 vcipher $out0,$out0,v27
1585 addi $len,$len,1 # $len-=128 really
1586 vcipher $out1,$out1,v27
1587 vcipher $out2,$out2,v27
1588 vcipher $out3,$out3,v27
1589 vcipher $out4,$out4,v27
1590 vcipher $out5,$out5,v27
1591 vcipher $out6,$out6,v27
1592 vcipher $out7,$out7,v27
1593 lvx v25,$x10,$key_ # re-pre-load round[2]
1595 vcipher $out0,$out0,v28
1596 lvx_u $in0,$x00,$inp # load input
1597 vcipher $out1,$out1,v28
1598 lvx_u $in1,$x10,$inp
1599 vcipher $out2,$out2,v28
1600 lvx_u $in2,$x20,$inp
1601 vcipher $out3,$out3,v28
1602 lvx_u $in3,$x30,$inp
1603 vcipher $out4,$out4,v28
1604 lvx_u $in4,$x40,$inp
1605 vcipher $out5,$out5,v28
1606 lvx_u $in5,$x50,$inp
1607 vcipher $out6,$out6,v28
1608 lvx_u $in6,$x60,$inp
1609 vcipher $out7,$out7,v28
1610 lvx_u $in7,$x70,$inp
1611 addi $inp,$inp,0x80
1613 vcipher $out0,$out0,v29
1614 le?vperm $in0,$in0,$in0,$inpperm
1615 vcipher $out1,$out1,v29
1616 le?vperm $in1,$in1,$in1,$inpperm
1617 vcipher $out2,$out2,v29
1618 le?vperm $in2,$in2,$in2,$inpperm
1619 vcipher $out3,$out3,v29
1620 le?vperm $in3,$in3,$in3,$inpperm
1621 vcipher $out4,$out4,v29
1622 le?vperm $in4,$in4,$in4,$inpperm
1623 vcipher $out5,$out5,v29
1624 le?vperm $in5,$in5,$in5,$inpperm
1625 vcipher $out6,$out6,v29
1626 le?vperm $in6,$in6,$in6,$inpperm
1627 vcipher $out7,$out7,v29
1628 le?vperm $in7,$in7,$in7,$inpperm
1630 add $inp,$inp,r0 # $inp is adjusted in such
1631 # way that at exit from the
1632 # loop inX-in7 are loaded
1633 # with last "words"
1634 subfe. r0,r0,r0 # borrow?-1:0
1635 vcipher $out0,$out0,v30
1636 vxor $in0,$in0,v31 # xor with last round key
1637 vcipher $out1,$out1,v30
1638 vxor $in1,$in1,v31
1639 vcipher $out2,$out2,v30
1640 vxor $in2,$in2,v31
1641 vcipher $out3,$out3,v30
1642 vxor $in3,$in3,v31
1643 vcipher $out4,$out4,v30
1644 vxor $in4,$in4,v31
1645 vcipher $out5,$out5,v30
1646 vxor $in5,$in5,v31
1647 vcipher $out6,$out6,v30
1648 vxor $in6,$in6,v31
1649 vcipher $out7,$out7,v30
1650 vxor $in7,$in7,v31
1652 bne Lctr32_enc8x_break # did $len-129 borrow?
1654 vcipherlast $in0,$out0,$in0
1655 vcipherlast $in1,$out1,$in1
1656 vadduqm $out1,$ivec,$one # counter values ...
1657 vcipherlast $in2,$out2,$in2
1658 vadduqm $out2,$ivec,$two
1659 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1660 vcipherlast $in3,$out3,$in3
1661 vadduqm $out3,$out1,$two
1662 vxor $out1,$out1,$rndkey0
1663 vcipherlast $in4,$out4,$in4
1664 vadduqm $out4,$out2,$two
1665 vxor $out2,$out2,$rndkey0
1666 vcipherlast $in5,$out5,$in5
1667 vadduqm $out5,$out3,$two
1668 vxor $out3,$out3,$rndkey0
1669 vcipherlast $in6,$out6,$in6
1670 vadduqm $out6,$out4,$two
1671 vxor $out4,$out4,$rndkey0
1672 vcipherlast $in7,$out7,$in7
1673 vadduqm $out7,$out5,$two
1674 vxor $out5,$out5,$rndkey0
1675 le?vperm $in0,$in0,$in0,$inpperm
1676 vadduqm $ivec,$out6,$two # next counter value
1677 vxor $out6,$out6,$rndkey0
1678 le?vperm $in1,$in1,$in1,$inpperm
1679 vxor $out7,$out7,$rndkey0
1680 mtctr $rounds
1682 vcipher $out0,$out0,v24
1683 stvx_u $in0,$x00,$out
1684 le?vperm $in2,$in2,$in2,$inpperm
1685 vcipher $out1,$out1,v24
1686 stvx_u $in1,$x10,$out
1687 le?vperm $in3,$in3,$in3,$inpperm
1688 vcipher $out2,$out2,v24
1689 stvx_u $in2,$x20,$out
1690 le?vperm $in4,$in4,$in4,$inpperm
1691 vcipher $out3,$out3,v24
1692 stvx_u $in3,$x30,$out
1693 le?vperm $in5,$in5,$in5,$inpperm
1694 vcipher $out4,$out4,v24
1695 stvx_u $in4,$x40,$out
1696 le?vperm $in6,$in6,$in6,$inpperm
1697 vcipher $out5,$out5,v24
1698 stvx_u $in5,$x50,$out
1699 le?vperm $in7,$in7,$in7,$inpperm
1700 vcipher $out6,$out6,v24
1701 stvx_u $in6,$x60,$out
1702 vcipher $out7,$out7,v24
1703 stvx_u $in7,$x70,$out
1704 addi $out,$out,0x80
1706 b Loop_ctr32_enc8x_middle
1708 .align 5
1709 Lctr32_enc8x_break:
1710 cmpwi $len,-0x60
1711 blt Lctr32_enc8x_one
1713 beq Lctr32_enc8x_two
1714 cmpwi $len,-0x40
1715 blt Lctr32_enc8x_three
1717 beq Lctr32_enc8x_four
1718 cmpwi $len,-0x20
1719 blt Lctr32_enc8x_five
1721 beq Lctr32_enc8x_six
1722 cmpwi $len,0x00
1723 blt Lctr32_enc8x_seven
1725 Lctr32_enc8x_eight:
1726 vcipherlast $out0,$out0,$in0
1727 vcipherlast $out1,$out1,$in1
1728 vcipherlast $out2,$out2,$in2
1729 vcipherlast $out3,$out3,$in3
1730 vcipherlast $out4,$out4,$in4
1731 vcipherlast $out5,$out5,$in5
1732 vcipherlast $out6,$out6,$in6
1733 vcipherlast $out7,$out7,$in7
1735 le?vperm $out0,$out0,$out0,$inpperm
1736 le?vperm $out1,$out1,$out1,$inpperm
1737 stvx_u $out0,$x00,$out
1738 le?vperm $out2,$out2,$out2,$inpperm
1739 stvx_u $out1,$x10,$out
1740 le?vperm $out3,$out3,$out3,$inpperm
1741 stvx_u $out2,$x20,$out
1742 le?vperm $out4,$out4,$out4,$inpperm
1743 stvx_u $out3,$x30,$out
1744 le?vperm $out5,$out5,$out5,$inpperm
1745 stvx_u $out4,$x40,$out
1746 le?vperm $out6,$out6,$out6,$inpperm
1747 stvx_u $out5,$x50,$out
1748 le?vperm $out7,$out7,$out7,$inpperm
1749 stvx_u $out6,$x60,$out
1750 stvx_u $out7,$x70,$out
1751 addi $out,$out,0x80
1752 b Lctr32_enc8x_done
1754 .align 5
1755 Lctr32_enc8x_seven:
1756 vcipherlast $out0,$out0,$in1
1757 vcipherlast $out1,$out1,$in2
1758 vcipherlast $out2,$out2,$in3
1759 vcipherlast $out3,$out3,$in4
1760 vcipherlast $out4,$out4,$in5
1761 vcipherlast $out5,$out5,$in6
1762 vcipherlast $out6,$out6,$in7
1764 le?vperm $out0,$out0,$out0,$inpperm
1765 le?vperm $out1,$out1,$out1,$inpperm
1766 stvx_u $out0,$x00,$out
1767 le?vperm $out2,$out2,$out2,$inpperm
1768 stvx_u $out1,$x10,$out
1769 le?vperm $out3,$out3,$out3,$inpperm
1770 stvx_u $out2,$x20,$out
1771 le?vperm $out4,$out4,$out4,$inpperm
1772 stvx_u $out3,$x30,$out
1773 le?vperm $out5,$out5,$out5,$inpperm
1774 stvx_u $out4,$x40,$out
1775 le?vperm $out6,$out6,$out6,$inpperm
1776 stvx_u $out5,$x50,$out
1777 stvx_u $out6,$x60,$out
1778 addi $out,$out,0x70
1779 b Lctr32_enc8x_done
1781 .align 5
1782 Lctr32_enc8x_six:
1783 vcipherlast $out0,$out0,$in2
1784 vcipherlast $out1,$out1,$in3
1785 vcipherlast $out2,$out2,$in4
1786 vcipherlast $out3,$out3,$in5
1787 vcipherlast $out4,$out4,$in6
1788 vcipherlast $out5,$out5,$in7
1790 le?vperm $out0,$out0,$out0,$inpperm
1791 le?vperm $out1,$out1,$out1,$inpperm
1792 stvx_u $out0,$x00,$out
1793 le?vperm $out2,$out2,$out2,$inpperm
1794 stvx_u $out1,$x10,$out
1795 le?vperm $out3,$out3,$out3,$inpperm
1796 stvx_u $out2,$x20,$out
1797 le?vperm $out4,$out4,$out4,$inpperm
1798 stvx_u $out3,$x30,$out
1799 le?vperm $out5,$out5,$out5,$inpperm
1800 stvx_u $out4,$x40,$out
1801 stvx_u $out5,$x50,$out
1802 addi $out,$out,0x60
1803 b Lctr32_enc8x_done
1805 .align 5
1806 Lctr32_enc8x_five:
1807 vcipherlast $out0,$out0,$in3
1808 vcipherlast $out1,$out1,$in4
1809 vcipherlast $out2,$out2,$in5
1810 vcipherlast $out3,$out3,$in6
1811 vcipherlast $out4,$out4,$in7
1813 le?vperm $out0,$out0,$out0,$inpperm
1814 le?vperm $out1,$out1,$out1,$inpperm
1815 stvx_u $out0,$x00,$out
1816 le?vperm $out2,$out2,$out2,$inpperm
1817 stvx_u $out1,$x10,$out
1818 le?vperm $out3,$out3,$out3,$inpperm
1819 stvx_u $out2,$x20,$out
1820 le?vperm $out4,$out4,$out4,$inpperm
1821 stvx_u $out3,$x30,$out
1822 stvx_u $out4,$x40,$out
1823 addi $out,$out,0x50
1824 b Lctr32_enc8x_done
1826 .align 5
1827 Lctr32_enc8x_four:
1828 vcipherlast $out0,$out0,$in4
1829 vcipherlast $out1,$out1,$in5
1830 vcipherlast $out2,$out2,$in6
1831 vcipherlast $out3,$out3,$in7
1833 le?vperm $out0,$out0,$out0,$inpperm
1834 le?vperm $out1,$out1,$out1,$inpperm
1835 stvx_u $out0,$x00,$out
1836 le?vperm $out2,$out2,$out2,$inpperm
1837 stvx_u $out1,$x10,$out
1838 le?vperm $out3,$out3,$out3,$inpperm
1839 stvx_u $out2,$x20,$out
1840 stvx_u $out3,$x30,$out
1841 addi $out,$out,0x40
1842 b Lctr32_enc8x_done
1844 .align 5
1845 Lctr32_enc8x_three:
1846 vcipherlast $out0,$out0,$in5
1847 vcipherlast $out1,$out1,$in6
1848 vcipherlast $out2,$out2,$in7
1850 le?vperm $out0,$out0,$out0,$inpperm
1851 le?vperm $out1,$out1,$out1,$inpperm
1852 stvx_u $out0,$x00,$out
1853 le?vperm $out2,$out2,$out2,$inpperm
1854 stvx_u $out1,$x10,$out
1855 stvx_u $out2,$x20,$out
1856 addi $out,$out,0x30
1857 b Lctr32_enc8x_done
1859 .align 5
1860 Lctr32_enc8x_two:
1861 vcipherlast $out0,$out0,$in6
1862 vcipherlast $out1,$out1,$in7
1864 le?vperm $out0,$out0,$out0,$inpperm
1865 le?vperm $out1,$out1,$out1,$inpperm
1866 stvx_u $out0,$x00,$out
1867 stvx_u $out1,$x10,$out
1868 addi $out,$out,0x20
1869 b Lctr32_enc8x_done
1871 .align 5
1872 Lctr32_enc8x_one:
1873 vcipherlast $out0,$out0,$in7
1875 le?vperm $out0,$out0,$out0,$inpperm
1876 stvx_u $out0,0,$out
1877 addi $out,$out,0x10
1879 Lctr32_enc8x_done:
1880 li r10,`$FRAME+15`
1881 li r11,`$FRAME+31`
1882 stvx $inpperm,r10,$sp # wipe copies of round keys
1883 addi r10,r10,32
1884 stvx $inpperm,r11,$sp
1885 addi r11,r11,32
1886 stvx $inpperm,r10,$sp
1887 addi r10,r10,32
1888 stvx $inpperm,r11,$sp
1889 addi r11,r11,32
1890 stvx $inpperm,r10,$sp
1891 addi r10,r10,32
1892 stvx $inpperm,r11,$sp
1893 addi r11,r11,32
1894 stvx $inpperm,r10,$sp
1895 addi r10,r10,32
1896 stvx $inpperm,r11,$sp
1897 addi r11,r11,32
1899 mtspr 256,$vrsave
1900 lvx v20,r10,$sp # ABI says so
1901 addi r10,r10,32
1902 lvx v21,r11,$sp
1903 addi r11,r11,32
1904 lvx v22,r10,$sp
1905 addi r10,r10,32
1906 lvx v23,r11,$sp
1907 addi r11,r11,32
1908 lvx v24,r10,$sp
1909 addi r10,r10,32
1910 lvx v25,r11,$sp
1911 addi r11,r11,32
1912 lvx v26,r10,$sp
1913 addi r10,r10,32
1914 lvx v27,r11,$sp
1915 addi r11,r11,32
1916 lvx v28,r10,$sp
1917 addi r10,r10,32
1918 lvx v29,r11,$sp
1919 addi r11,r11,32
1920 lvx v30,r10,$sp
1921 lvx v31,r11,$sp
1922 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1923 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1924 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1925 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1926 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1927 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1928 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1930 .long 0
1931 .byte 0,12,0x14,0,0x80,6,6,0
1932 .long 0
1933 .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1935 }} }}}
1937 #########################################################################
1938 {{{ # XTS procedures #
1939 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1940 # const AES_KEY *key1, const AES_KEY *key2, #
1941 # [const] unsigned char iv[16]); #
1942 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1943 # input tweak value is assumed to be encrypted already, and last tweak #
1944 # value, one suitable for consecutive call on same chunk of data, is #
1945 # written back to original buffer. In addition, in "tweak chaining" #
1946 # mode only complete input blocks are processed. #
1948 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1949 my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1950 my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1951 my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1952 my $taillen = $key2;
1954 ($inp,$idx) = ($idx,$inp); # reassign
1956 $code.=<<___;
1957 .globl .${prefix}_xts_encrypt
1958 mr $inp,r3 # reassign
1959 li r3,-1
1960 ${UCMP}i $len,16
1961 bltlr-
1963 lis r0,0xfff0
1964 mfspr r12,256 # save vrsave
1965 li r11,0
1966 mtspr 256,r0
1968 vspltisb $seven,0x07 # 0x070707..07
1969 le?lvsl $leperm,r11,r11
1970 le?vspltisb $tmp,0x0f
1971 le?vxor $leperm,$leperm,$seven
1973 li $idx,15
1974 lvx $tweak,0,$ivp # load [unaligned] iv
1975 lvsl $inpperm,0,$ivp
1976 lvx $inptail,$idx,$ivp
1977 le?vxor $inpperm,$inpperm,$tmp
1978 vperm $tweak,$tweak,$inptail,$inpperm
1980 neg r11,$inp
1981 lvsr $inpperm,0,r11 # prepare for unaligned load
1982 lvx $inout,0,$inp
1983 addi $inp,$inp,15 # 15 is not typo
1984 le?vxor $inpperm,$inpperm,$tmp
1986 ${UCMP}i $key2,0 # key2==NULL?
1987 beq Lxts_enc_no_key2
1989 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
1990 lwz $rounds,240($key2)
1991 srwi $rounds,$rounds,1
1992 subi $rounds,$rounds,1
1993 li $idx,16
1995 lvx $rndkey0,0,$key2
1996 lvx $rndkey1,$idx,$key2
1997 addi $idx,$idx,16
1998 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1999 vxor $tweak,$tweak,$rndkey0
2000 lvx $rndkey0,$idx,$key2
2001 addi $idx,$idx,16
2002 mtctr $rounds
2004 Ltweak_xts_enc:
2005 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2006 vcipher $tweak,$tweak,$rndkey1
2007 lvx $rndkey1,$idx,$key2
2008 addi $idx,$idx,16
2009 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2010 vcipher $tweak,$tweak,$rndkey0
2011 lvx $rndkey0,$idx,$key2
2012 addi $idx,$idx,16
2013 bdnz Ltweak_xts_enc
2015 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2016 vcipher $tweak,$tweak,$rndkey1
2017 lvx $rndkey1,$idx,$key2
2018 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2019 vcipherlast $tweak,$tweak,$rndkey0
2021 li $ivp,0 # don't chain the tweak
2022 b Lxts_enc
2024 Lxts_enc_no_key2:
2025 li $idx,-16
2026 and $len,$len,$idx # in "tweak chaining"
2027 # mode only complete
2028 # blocks are processed
2029 Lxts_enc:
2030 lvx $inptail,0,$inp
2031 addi $inp,$inp,16
2033 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2034 lwz $rounds,240($key1)
2035 srwi $rounds,$rounds,1
2036 subi $rounds,$rounds,1
2037 li $idx,16
2039 vslb $eighty7,$seven,$seven # 0x808080..80
2040 vor $eighty7,$eighty7,$seven # 0x878787..87
2041 vspltisb $tmp,1 # 0x010101..01
2042 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2044 ${UCMP}i $len,96
2045 bge _aesp8_xts_encrypt6x
2047 andi. $taillen,$len,15
2048 subic r0,$len,32
2049 subi $taillen,$taillen,16
2050 subfe r0,r0,r0
2051 and r0,r0,$taillen
2052 add $inp,$inp,r0
2054 lvx $rndkey0,0,$key1
2055 lvx $rndkey1,$idx,$key1
2056 addi $idx,$idx,16
2057 vperm $inout,$inout,$inptail,$inpperm
2058 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2059 vxor $inout,$inout,$tweak
2060 vxor $inout,$inout,$rndkey0
2061 lvx $rndkey0,$idx,$key1
2062 addi $idx,$idx,16
2063 mtctr $rounds
2064 b Loop_xts_enc
2066 .align 5
2067 Loop_xts_enc:
2068 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2069 vcipher $inout,$inout,$rndkey1
2070 lvx $rndkey1,$idx,$key1
2071 addi $idx,$idx,16
2072 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2073 vcipher $inout,$inout,$rndkey0
2074 lvx $rndkey0,$idx,$key1
2075 addi $idx,$idx,16
2076 bdnz Loop_xts_enc
2078 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2079 vcipher $inout,$inout,$rndkey1
2080 lvx $rndkey1,$idx,$key1
2081 li $idx,16
2082 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2083 vxor $rndkey0,$rndkey0,$tweak
2084 vcipherlast $output,$inout,$rndkey0
2086 le?vperm $tmp,$output,$output,$leperm
2087 be?nop
2088 le?stvx_u $tmp,0,$out
2089 be?stvx_u $output,0,$out
2090 addi $out,$out,16
2092 subic. $len,$len,16
2093 beq Lxts_enc_done
2095 vmr $inout,$inptail
2096 lvx $inptail,0,$inp
2097 addi $inp,$inp,16
2098 lvx $rndkey0,0,$key1
2099 lvx $rndkey1,$idx,$key1
2100 addi $idx,$idx,16
2102 subic r0,$len,32
2103 subfe r0,r0,r0
2104 and r0,r0,$taillen
2105 add $inp,$inp,r0
2107 vsrab $tmp,$tweak,$seven # next tweak value
2108 vaddubm $tweak,$tweak,$tweak
2109 vsldoi $tmp,$tmp,$tmp,15
2110 vand $tmp,$tmp,$eighty7
2111 vxor $tweak,$tweak,$tmp
2113 vperm $inout,$inout,$inptail,$inpperm
2114 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2115 vxor $inout,$inout,$tweak
2116 vxor $output,$output,$rndkey0 # just in case $len<16
2117 vxor $inout,$inout,$rndkey0
2118 lvx $rndkey0,$idx,$key1
2119 addi $idx,$idx,16
2121 mtctr $rounds
2122 ${UCMP}i $len,16
2123 bge Loop_xts_enc
2125 vxor $output,$output,$tweak
2126 lvsr $inpperm,0,$len # $inpperm is no longer needed
2127 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2128 vspltisb $tmp,-1
2129 vperm $inptail,$inptail,$tmp,$inpperm
2130 vsel $inout,$inout,$output,$inptail
2132 subi r11,$out,17
2133 subi $out,$out,16
2134 mtctr $len
2135 li $len,16
2136 Loop_xts_enc_steal:
2137 lbzu r0,1(r11)
2138 stb r0,16(r11)
2139 bdnz Loop_xts_enc_steal
2141 mtctr $rounds
2142 b Loop_xts_enc # one more time...
2144 Lxts_enc_done:
2145 ${UCMP}i $ivp,0
2146 beq Lxts_enc_ret
2148 vsrab $tmp,$tweak,$seven # next tweak value
2149 vaddubm $tweak,$tweak,$tweak
2150 vsldoi $tmp,$tmp,$tmp,15
2151 vand $tmp,$tmp,$eighty7
2152 vxor $tweak,$tweak,$tmp
2154 le?vperm $tweak,$tweak,$tweak,$leperm
2155 stvx_u $tweak,0,$ivp
2157 Lxts_enc_ret:
2158 mtspr 256,r12 # restore vrsave
2159 li r3,0
2161 .long 0
2162 .byte 0,12,0x04,0,0x80,6,6,0
2163 .long 0
2164 .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2166 .globl .${prefix}_xts_decrypt
2167 mr $inp,r3 # reassign
2168 li r3,-1
2169 ${UCMP}i $len,16
2170 bltlr-
2172 lis r0,0xfff8
2173 mfspr r12,256 # save vrsave
2174 li r11,0
2175 mtspr 256,r0
2177 andi. r0,$len,15
2178 neg r0,r0
2179 andi. r0,r0,16
2180 sub $len,$len,r0
2182 vspltisb $seven,0x07 # 0x070707..07
2183 le?lvsl $leperm,r11,r11
2184 le?vspltisb $tmp,0x0f
2185 le?vxor $leperm,$leperm,$seven
2187 li $idx,15
2188 lvx $tweak,0,$ivp # load [unaligned] iv
2189 lvsl $inpperm,0,$ivp
2190 lvx $inptail,$idx,$ivp
2191 le?vxor $inpperm,$inpperm,$tmp
2192 vperm $tweak,$tweak,$inptail,$inpperm
2194 neg r11,$inp
2195 lvsr $inpperm,0,r11 # prepare for unaligned load
2196 lvx $inout,0,$inp
2197 addi $inp,$inp,15 # 15 is not typo
2198 le?vxor $inpperm,$inpperm,$tmp
2200 ${UCMP}i $key2,0 # key2==NULL?
2201 beq Lxts_dec_no_key2
2203 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2204 lwz $rounds,240($key2)
2205 srwi $rounds,$rounds,1
2206 subi $rounds,$rounds,1
2207 li $idx,16
2209 lvx $rndkey0,0,$key2
2210 lvx $rndkey1,$idx,$key2
2211 addi $idx,$idx,16
2212 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2213 vxor $tweak,$tweak,$rndkey0
2214 lvx $rndkey0,$idx,$key2
2215 addi $idx,$idx,16
2216 mtctr $rounds
2218 Ltweak_xts_dec:
2219 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2220 vcipher $tweak,$tweak,$rndkey1
2221 lvx $rndkey1,$idx,$key2
2222 addi $idx,$idx,16
2223 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2224 vcipher $tweak,$tweak,$rndkey0
2225 lvx $rndkey0,$idx,$key2
2226 addi $idx,$idx,16
2227 bdnz Ltweak_xts_dec
2229 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2230 vcipher $tweak,$tweak,$rndkey1
2231 lvx $rndkey1,$idx,$key2
2232 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2233 vcipherlast $tweak,$tweak,$rndkey0
2235 li $ivp,0 # don't chain the tweak
2236 b Lxts_dec
2238 Lxts_dec_no_key2:
2239 neg $idx,$len
2240 andi. $idx,$idx,15
2241 add $len,$len,$idx # in "tweak chaining"
2242 # mode only complete
2243 # blocks are processed
2244 Lxts_dec:
2245 lvx $inptail,0,$inp
2246 addi $inp,$inp,16
2248 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2249 lwz $rounds,240($key1)
2250 srwi $rounds,$rounds,1
2251 subi $rounds,$rounds,1
2252 li $idx,16
2254 vslb $eighty7,$seven,$seven # 0x808080..80
2255 vor $eighty7,$eighty7,$seven # 0x878787..87
2256 vspltisb $tmp,1 # 0x010101..01
2257 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2259 ${UCMP}i $len,96
2260 bge _aesp8_xts_decrypt6x
2262 lvx $rndkey0,0,$key1
2263 lvx $rndkey1,$idx,$key1
2264 addi $idx,$idx,16
2265 vperm $inout,$inout,$inptail,$inpperm
2266 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2267 vxor $inout,$inout,$tweak
2268 vxor $inout,$inout,$rndkey0
2269 lvx $rndkey0,$idx,$key1
2270 addi $idx,$idx,16
2271 mtctr $rounds
2273 ${UCMP}i $len,16
2274 blt Ltail_xts_dec
2275 be?b Loop_xts_dec
2277 .align 5
2278 Loop_xts_dec:
2279 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2280 vncipher $inout,$inout,$rndkey1
2281 lvx $rndkey1,$idx,$key1
2282 addi $idx,$idx,16
2283 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2284 vncipher $inout,$inout,$rndkey0
2285 lvx $rndkey0,$idx,$key1
2286 addi $idx,$idx,16
2287 bdnz Loop_xts_dec
2289 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2290 vncipher $inout,$inout,$rndkey1
2291 lvx $rndkey1,$idx,$key1
2292 li $idx,16
2293 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2294 vxor $rndkey0,$rndkey0,$tweak
2295 vncipherlast $output,$inout,$rndkey0
2297 le?vperm $tmp,$output,$output,$leperm
2298 be?nop
2299 le?stvx_u $tmp,0,$out
2300 be?stvx_u $output,0,$out
2301 addi $out,$out,16
2303 subic. $len,$len,16
2304 beq Lxts_dec_done
2306 vmr $inout,$inptail
2307 lvx $inptail,0,$inp
2308 addi $inp,$inp,16
2309 lvx $rndkey0,0,$key1
2310 lvx $rndkey1,$idx,$key1
2311 addi $idx,$idx,16
2313 vsrab $tmp,$tweak,$seven # next tweak value
2314 vaddubm $tweak,$tweak,$tweak
2315 vsldoi $tmp,$tmp,$tmp,15
2316 vand $tmp,$tmp,$eighty7
2317 vxor $tweak,$tweak,$tmp
2319 vperm $inout,$inout,$inptail,$inpperm
2320 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2321 vxor $inout,$inout,$tweak
2322 vxor $inout,$inout,$rndkey0
2323 lvx $rndkey0,$idx,$key1
2324 addi $idx,$idx,16
2326 mtctr $rounds
2327 ${UCMP}i $len,16
2328 bge Loop_xts_dec
2330 Ltail_xts_dec:
2331 vsrab $tmp,$tweak,$seven # next tweak value
2332 vaddubm $tweak1,$tweak,$tweak
2333 vsldoi $tmp,$tmp,$tmp,15
2334 vand $tmp,$tmp,$eighty7
2335 vxor $tweak1,$tweak1,$tmp
2337 subi $inp,$inp,16
2338 add $inp,$inp,$len
2340 vxor $inout,$inout,$tweak # :-(
2341 vxor $inout,$inout,$tweak1 # :-)
2343 Loop_xts_dec_short:
2344 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2345 vncipher $inout,$inout,$rndkey1
2346 lvx $rndkey1,$idx,$key1
2347 addi $idx,$idx,16
2348 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2349 vncipher $inout,$inout,$rndkey0
2350 lvx $rndkey0,$idx,$key1
2351 addi $idx,$idx,16
2352 bdnz Loop_xts_dec_short
2354 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2355 vncipher $inout,$inout,$rndkey1
2356 lvx $rndkey1,$idx,$key1
2357 li $idx,16
2358 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2359 vxor $rndkey0,$rndkey0,$tweak1
2360 vncipherlast $output,$inout,$rndkey0
2362 le?vperm $tmp,$output,$output,$leperm
2363 be?nop
2364 le?stvx_u $tmp,0,$out
2365 be?stvx_u $output,0,$out
2367 vmr $inout,$inptail
2368 lvx $inptail,0,$inp
2369 #addi $inp,$inp,16
2370 lvx $rndkey0,0,$key1
2371 lvx $rndkey1,$idx,$key1
2372 addi $idx,$idx,16
2373 vperm $inout,$inout,$inptail,$inpperm
2374 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2376 lvsr $inpperm,0,$len # $inpperm is no longer needed
2377 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2378 vspltisb $tmp,-1
2379 vperm $inptail,$inptail,$tmp,$inpperm
2380 vsel $inout,$inout,$output,$inptail
2382 vxor $rndkey0,$rndkey0,$tweak
2383 vxor $inout,$inout,$rndkey0
2384 lvx $rndkey0,$idx,$key1
2385 addi $idx,$idx,16
2387 subi r11,$out,1
2388 mtctr $len
2389 li $len,16
2390 Loop_xts_dec_steal:
2391 lbzu r0,1(r11)
2392 stb r0,16(r11)
2393 bdnz Loop_xts_dec_steal
2395 mtctr $rounds
2396 b Loop_xts_dec # one more time...
2398 Lxts_dec_done:
2399 ${UCMP}i $ivp,0
2400 beq Lxts_dec_ret
2402 vsrab $tmp,$tweak,$seven # next tweak value
2403 vaddubm $tweak,$tweak,$tweak
2404 vsldoi $tmp,$tmp,$tmp,15
2405 vand $tmp,$tmp,$eighty7
2406 vxor $tweak,$tweak,$tmp
2408 le?vperm $tweak,$tweak,$tweak,$leperm
2409 stvx_u $tweak,0,$ivp
2411 Lxts_dec_ret:
2412 mtspr 256,r12 # restore vrsave
2413 li r3,0
2415 .long 0
2416 .byte 0,12,0x04,0,0x80,6,6,0
2417 .long 0
2418 .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2420 #########################################################################
2421 {{ # Optimized XTS procedures #
2422 my $key_=$key2;
2423 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2424 $x00=0 if ($flavour =~ /osx/);
2425 my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2426 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2427 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2428 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2429 # v26-v31 last 6 round keys
2430 my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2431 my $taillen=$x70;
2433 $code.=<<___;
2434 .align 5
2435 _aesp8_xts_encrypt6x:
2436 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2437 mflr r11
2438 li r7,`$FRAME+8*16+15`
2439 li r3,`$FRAME+8*16+31`
2440 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2441 stvx v20,r7,$sp # ABI says so
2442 addi r7,r7,32
2443 stvx v21,r3,$sp
2444 addi r3,r3,32
2445 stvx v22,r7,$sp
2446 addi r7,r7,32
2447 stvx v23,r3,$sp
2448 addi r3,r3,32
2449 stvx v24,r7,$sp
2450 addi r7,r7,32
2451 stvx v25,r3,$sp
2452 addi r3,r3,32
2453 stvx v26,r7,$sp
2454 addi r7,r7,32
2455 stvx v27,r3,$sp
2456 addi r3,r3,32
2457 stvx v28,r7,$sp
2458 addi r7,r7,32
2459 stvx v29,r3,$sp
2460 addi r3,r3,32
2461 stvx v30,r7,$sp
2462 stvx v31,r3,$sp
2463 li r0,-1
2464 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2465 li $x10,0x10
2466 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2467 li $x20,0x20
2468 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2469 li $x30,0x30
2470 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2471 li $x40,0x40
2472 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2473 li $x50,0x50
2474 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2475 li $x60,0x60
2476 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2477 li $x70,0x70
2478 mtspr 256,r0
2480 subi $rounds,$rounds,3 # -4 in total
2482 lvx $rndkey0,$x00,$key1 # load key schedule
2483 lvx v30,$x10,$key1
2484 addi $key1,$key1,0x20
2485 lvx v31,$x00,$key1
2486 ?vperm $rndkey0,$rndkey0,v30,$keyperm
2487 addi $key_,$sp,$FRAME+15
2488 mtctr $rounds
2490 Load_xts_enc_key:
2491 ?vperm v24,v30,v31,$keyperm
2492 lvx v30,$x10,$key1
2493 addi $key1,$key1,0x20
2494 stvx v24,$x00,$key_ # off-load round[1]
2495 ?vperm v25,v31,v30,$keyperm
2496 lvx v31,$x00,$key1
2497 stvx v25,$x10,$key_ # off-load round[2]
2498 addi $key_,$key_,0x20
2499 bdnz Load_xts_enc_key
2501 lvx v26,$x10,$key1
2502 ?vperm v24,v30,v31,$keyperm
2503 lvx v27,$x20,$key1
2504 stvx v24,$x00,$key_ # off-load round[3]
2505 ?vperm v25,v31,v26,$keyperm
2506 lvx v28,$x30,$key1
2507 stvx v25,$x10,$key_ # off-load round[4]
2508 addi $key_,$sp,$FRAME+15 # rewind $key_
2509 ?vperm v26,v26,v27,$keyperm
2510 lvx v29,$x40,$key1
2511 ?vperm v27,v27,v28,$keyperm
2512 lvx v30,$x50,$key1
2513 ?vperm v28,v28,v29,$keyperm
2514 lvx v31,$x60,$key1
2515 ?vperm v29,v29,v30,$keyperm
2516 lvx $twk5,$x70,$key1 # borrow $twk5
2517 ?vperm v30,v30,v31,$keyperm
2518 lvx v24,$x00,$key_ # pre-load round[1]
2519 ?vperm v31,v31,$twk5,$keyperm
2520 lvx v25,$x10,$key_ # pre-load round[2]
2522 vperm $in0,$inout,$inptail,$inpperm
2523 subi $inp,$inp,31 # undo "caller"
2524 vxor $twk0,$tweak,$rndkey0
2525 vsrab $tmp,$tweak,$seven # next tweak value
2526 vaddubm $tweak,$tweak,$tweak
2527 vsldoi $tmp,$tmp,$tmp,15
2528 vand $tmp,$tmp,$eighty7
2529 vxor $out0,$in0,$twk0
2530 vxor $tweak,$tweak,$tmp
2532 lvx_u $in1,$x10,$inp
2533 vxor $twk1,$tweak,$rndkey0
2534 vsrab $tmp,$tweak,$seven # next tweak value
2535 vaddubm $tweak,$tweak,$tweak
2536 vsldoi $tmp,$tmp,$tmp,15
2537 le?vperm $in1,$in1,$in1,$leperm
2538 vand $tmp,$tmp,$eighty7
2539 vxor $out1,$in1,$twk1
2540 vxor $tweak,$tweak,$tmp
2542 lvx_u $in2,$x20,$inp
2543 andi. $taillen,$len,15
2544 vxor $twk2,$tweak,$rndkey0
2545 vsrab $tmp,$tweak,$seven # next tweak value
2546 vaddubm $tweak,$tweak,$tweak
2547 vsldoi $tmp,$tmp,$tmp,15
2548 le?vperm $in2,$in2,$in2,$leperm
2549 vand $tmp,$tmp,$eighty7
2550 vxor $out2,$in2,$twk2
2551 vxor $tweak,$tweak,$tmp
2553 lvx_u $in3,$x30,$inp
2554 sub $len,$len,$taillen
2555 vxor $twk3,$tweak,$rndkey0
2556 vsrab $tmp,$tweak,$seven # next tweak value
2557 vaddubm $tweak,$tweak,$tweak
2558 vsldoi $tmp,$tmp,$tmp,15
2559 le?vperm $in3,$in3,$in3,$leperm
2560 vand $tmp,$tmp,$eighty7
2561 vxor $out3,$in3,$twk3
2562 vxor $tweak,$tweak,$tmp
2564 lvx_u $in4,$x40,$inp
2565 subi $len,$len,0x60
2566 vxor $twk4,$tweak,$rndkey0
2567 vsrab $tmp,$tweak,$seven # next tweak value
2568 vaddubm $tweak,$tweak,$tweak
2569 vsldoi $tmp,$tmp,$tmp,15
2570 le?vperm $in4,$in4,$in4,$leperm
2571 vand $tmp,$tmp,$eighty7
2572 vxor $out4,$in4,$twk4
2573 vxor $tweak,$tweak,$tmp
2575 lvx_u $in5,$x50,$inp
2576 addi $inp,$inp,0x60
2577 vxor $twk5,$tweak,$rndkey0
2578 vsrab $tmp,$tweak,$seven # next tweak value
2579 vaddubm $tweak,$tweak,$tweak
2580 vsldoi $tmp,$tmp,$tmp,15
2581 le?vperm $in5,$in5,$in5,$leperm
2582 vand $tmp,$tmp,$eighty7
2583 vxor $out5,$in5,$twk5
2584 vxor $tweak,$tweak,$tmp
2586 vxor v31,v31,$rndkey0
2587 mtctr $rounds
2588 b Loop_xts_enc6x
2590 .align 5
2591 Loop_xts_enc6x:
2592 vcipher $out0,$out0,v24
2593 vcipher $out1,$out1,v24
2594 vcipher $out2,$out2,v24
2595 vcipher $out3,$out3,v24
2596 vcipher $out4,$out4,v24
2597 vcipher $out5,$out5,v24
2598 lvx v24,$x20,$key_ # round[3]
2599 addi $key_,$key_,0x20
2601 vcipher $out0,$out0,v25
2602 vcipher $out1,$out1,v25
2603 vcipher $out2,$out2,v25
2604 vcipher $out3,$out3,v25
2605 vcipher $out4,$out4,v25
2606 vcipher $out5,$out5,v25
2607 lvx v25,$x10,$key_ # round[4]
2608 bdnz Loop_xts_enc6x
2610 subic $len,$len,96 # $len-=96
2611 vxor $in0,$twk0,v31 # xor with last round key
2612 vcipher $out0,$out0,v24
2613 vcipher $out1,$out1,v24
2614 vsrab $tmp,$tweak,$seven # next tweak value
2615 vxor $twk0,$tweak,$rndkey0
2616 vaddubm $tweak,$tweak,$tweak
2617 vcipher $out2,$out2,v24
2618 vcipher $out3,$out3,v24
2619 vsldoi $tmp,$tmp,$tmp,15
2620 vcipher $out4,$out4,v24
2621 vcipher $out5,$out5,v24
2623 subfe. r0,r0,r0 # borrow?-1:0
2624 vand $tmp,$tmp,$eighty7
2625 vcipher $out0,$out0,v25
2626 vcipher $out1,$out1,v25
2627 vxor $tweak,$tweak,$tmp
2628 vcipher $out2,$out2,v25
2629 vcipher $out3,$out3,v25
2630 vxor $in1,$twk1,v31
2631 vsrab $tmp,$tweak,$seven # next tweak value
2632 vxor $twk1,$tweak,$rndkey0
2633 vcipher $out4,$out4,v25
2634 vcipher $out5,$out5,v25
2636 and r0,r0,$len
2637 vaddubm $tweak,$tweak,$tweak
2638 vsldoi $tmp,$tmp,$tmp,15
2639 vcipher $out0,$out0,v26
2640 vcipher $out1,$out1,v26
2641 vand $tmp,$tmp,$eighty7
2642 vcipher $out2,$out2,v26
2643 vcipher $out3,$out3,v26
2644 vxor $tweak,$tweak,$tmp
2645 vcipher $out4,$out4,v26
2646 vcipher $out5,$out5,v26
2648 add $inp,$inp,r0 # $inp is adjusted in such
2649 # way that at exit from the
2650 # loop inX-in5 are loaded
2651 # with last "words"
2652 vxor $in2,$twk2,v31
2653 vsrab $tmp,$tweak,$seven # next tweak value
2654 vxor $twk2,$tweak,$rndkey0
2655 vaddubm $tweak,$tweak,$tweak
2656 vcipher $out0,$out0,v27
2657 vcipher $out1,$out1,v27
2658 vsldoi $tmp,$tmp,$tmp,15
2659 vcipher $out2,$out2,v27
2660 vcipher $out3,$out3,v27
2661 vand $tmp,$tmp,$eighty7
2662 vcipher $out4,$out4,v27
2663 vcipher $out5,$out5,v27
2665 addi $key_,$sp,$FRAME+15 # rewind $key_
2666 vxor $tweak,$tweak,$tmp
2667 vcipher $out0,$out0,v28
2668 vcipher $out1,$out1,v28
2669 vxor $in3,$twk3,v31
2670 vsrab $tmp,$tweak,$seven # next tweak value
2671 vxor $twk3,$tweak,$rndkey0
2672 vcipher $out2,$out2,v28
2673 vcipher $out3,$out3,v28
2674 vaddubm $tweak,$tweak,$tweak
2675 vsldoi $tmp,$tmp,$tmp,15
2676 vcipher $out4,$out4,v28
2677 vcipher $out5,$out5,v28
2678 lvx v24,$x00,$key_ # re-pre-load round[1]
2679 vand $tmp,$tmp,$eighty7
2681 vcipher $out0,$out0,v29
2682 vcipher $out1,$out1,v29
2683 vxor $tweak,$tweak,$tmp
2684 vcipher $out2,$out2,v29
2685 vcipher $out3,$out3,v29
2686 vxor $in4,$twk4,v31
2687 vsrab $tmp,$tweak,$seven # next tweak value
2688 vxor $twk4,$tweak,$rndkey0
2689 vcipher $out4,$out4,v29
2690 vcipher $out5,$out5,v29
2691 lvx v25,$x10,$key_ # re-pre-load round[2]
2692 vaddubm $tweak,$tweak,$tweak
2693 vsldoi $tmp,$tmp,$tmp,15
2695 vcipher $out0,$out0,v30
2696 vcipher $out1,$out1,v30
2697 vand $tmp,$tmp,$eighty7
2698 vcipher $out2,$out2,v30
2699 vcipher $out3,$out3,v30
2700 vxor $tweak,$tweak,$tmp
2701 vcipher $out4,$out4,v30
2702 vcipher $out5,$out5,v30
2703 vxor $in5,$twk5,v31
2704 vsrab $tmp,$tweak,$seven # next tweak value
2705 vxor $twk5,$tweak,$rndkey0
2707 vcipherlast $out0,$out0,$in0
2708 lvx_u $in0,$x00,$inp # load next input block
2709 vaddubm $tweak,$tweak,$tweak
2710 vsldoi $tmp,$tmp,$tmp,15
2711 vcipherlast $out1,$out1,$in1
2712 lvx_u $in1,$x10,$inp
2713 vcipherlast $out2,$out2,$in2
2714 le?vperm $in0,$in0,$in0,$leperm
2715 lvx_u $in2,$x20,$inp
2716 vand $tmp,$tmp,$eighty7
2717 vcipherlast $out3,$out3,$in3
2718 le?vperm $in1,$in1,$in1,$leperm
2719 lvx_u $in3,$x30,$inp
2720 vcipherlast $out4,$out4,$in4
2721 le?vperm $in2,$in2,$in2,$leperm
2722 lvx_u $in4,$x40,$inp
2723 vxor $tweak,$tweak,$tmp
2724 vcipherlast $tmp,$out5,$in5 # last block might be needed
2725 # in stealing mode
2726 le?vperm $in3,$in3,$in3,$leperm
2727 lvx_u $in5,$x50,$inp
2728 addi $inp,$inp,0x60
2729 le?vperm $in4,$in4,$in4,$leperm
2730 le?vperm $in5,$in5,$in5,$leperm
2732 le?vperm $out0,$out0,$out0,$leperm
2733 le?vperm $out1,$out1,$out1,$leperm
2734 stvx_u $out0,$x00,$out # store output
2735 vxor $out0,$in0,$twk0
2736 le?vperm $out2,$out2,$out2,$leperm
2737 stvx_u $out1,$x10,$out
2738 vxor $out1,$in1,$twk1
2739 le?vperm $out3,$out3,$out3,$leperm
2740 stvx_u $out2,$x20,$out
2741 vxor $out2,$in2,$twk2
2742 le?vperm $out4,$out4,$out4,$leperm
2743 stvx_u $out3,$x30,$out
2744 vxor $out3,$in3,$twk3
2745 le?vperm $out5,$tmp,$tmp,$leperm
2746 stvx_u $out4,$x40,$out
2747 vxor $out4,$in4,$twk4
2748 le?stvx_u $out5,$x50,$out
2749 be?stvx_u $tmp, $x50,$out
2750 vxor $out5,$in5,$twk5
2751 addi $out,$out,0x60
2753 mtctr $rounds
2754 beq Loop_xts_enc6x # did $len-=96 borrow?
2756 addic. $len,$len,0x60
2757 beq Lxts_enc6x_zero
2758 cmpwi $len,0x20
2759 blt Lxts_enc6x_one
2761 beq Lxts_enc6x_two
2762 cmpwi $len,0x40
2763 blt Lxts_enc6x_three
2765 beq Lxts_enc6x_four
2767 Lxts_enc6x_five:
2768 vxor $out0,$in1,$twk0
2769 vxor $out1,$in2,$twk1
2770 vxor $out2,$in3,$twk2
2771 vxor $out3,$in4,$twk3
2772 vxor $out4,$in5,$twk4
2774 bl _aesp8_xts_enc5x
2776 le?vperm $out0,$out0,$out0,$leperm
2777 vmr $twk0,$twk5 # unused tweak
2778 le?vperm $out1,$out1,$out1,$leperm
2779 stvx_u $out0,$x00,$out # store output
2780 le?vperm $out2,$out2,$out2,$leperm
2781 stvx_u $out1,$x10,$out
2782 le?vperm $out3,$out3,$out3,$leperm
2783 stvx_u $out2,$x20,$out
2784 vxor $tmp,$out4,$twk5 # last block prep for stealing
2785 le?vperm $out4,$out4,$out4,$leperm
2786 stvx_u $out3,$x30,$out
2787 stvx_u $out4,$x40,$out
2788 addi $out,$out,0x50
2789 bne Lxts_enc6x_steal
2790 b Lxts_enc6x_done
2792 .align 4
2793 Lxts_enc6x_four:
2794 vxor $out0,$in2,$twk0
2795 vxor $out1,$in3,$twk1
2796 vxor $out2,$in4,$twk2
2797 vxor $out3,$in5,$twk3
2798 vxor $out4,$out4,$out4
2800 bl _aesp8_xts_enc5x
2802 le?vperm $out0,$out0,$out0,$leperm
2803 vmr $twk0,$twk4 # unused tweak
2804 le?vperm $out1,$out1,$out1,$leperm
2805 stvx_u $out0,$x00,$out # store output
2806 le?vperm $out2,$out2,$out2,$leperm
2807 stvx_u $out1,$x10,$out
2808 vxor $tmp,$out3,$twk4 # last block prep for stealing
2809 le?vperm $out3,$out3,$out3,$leperm
2810 stvx_u $out2,$x20,$out
2811 stvx_u $out3,$x30,$out
2812 addi $out,$out,0x40
2813 bne Lxts_enc6x_steal
2814 b Lxts_enc6x_done
2816 .align 4
2817 Lxts_enc6x_three:
2818 vxor $out0,$in3,$twk0
2819 vxor $out1,$in4,$twk1
2820 vxor $out2,$in5,$twk2
2821 vxor $out3,$out3,$out3
2822 vxor $out4,$out4,$out4
2824 bl _aesp8_xts_enc5x
2826 le?vperm $out0,$out0,$out0,$leperm
2827 vmr $twk0,$twk3 # unused tweak
2828 le?vperm $out1,$out1,$out1,$leperm
2829 stvx_u $out0,$x00,$out # store output
2830 vxor $tmp,$out2,$twk3 # last block prep for stealing
2831 le?vperm $out2,$out2,$out2,$leperm
2832 stvx_u $out1,$x10,$out
2833 stvx_u $out2,$x20,$out
2834 addi $out,$out,0x30
2835 bne Lxts_enc6x_steal
2836 b Lxts_enc6x_done
2838 .align 4
2839 Lxts_enc6x_two:
2840 vxor $out0,$in4,$twk0
2841 vxor $out1,$in5,$twk1
2842 vxor $out2,$out2,$out2
2843 vxor $out3,$out3,$out3
2844 vxor $out4,$out4,$out4
2846 bl _aesp8_xts_enc5x
2848 le?vperm $out0,$out0,$out0,$leperm
2849 vmr $twk0,$twk2 # unused tweak
2850 vxor $tmp,$out1,$twk2 # last block prep for stealing
2851 le?vperm $out1,$out1,$out1,$leperm
2852 stvx_u $out0,$x00,$out # store output
2853 stvx_u $out1,$x10,$out
2854 addi $out,$out,0x20
2855 bne Lxts_enc6x_steal
2856 b Lxts_enc6x_done
2858 .align 4
2859 Lxts_enc6x_one:
2860 vxor $out0,$in5,$twk0
2862 Loop_xts_enc1x:
2863 vcipher $out0,$out0,v24
2864 lvx v24,$x20,$key_ # round[3]
2865 addi $key_,$key_,0x20
2867 vcipher $out0,$out0,v25
2868 lvx v25,$x10,$key_ # round[4]
2869 bdnz Loop_xts_enc1x
2871 add $inp,$inp,$taillen
2872 cmpwi $taillen,0
2873 vcipher $out0,$out0,v24
2875 subi $inp,$inp,16
2876 vcipher $out0,$out0,v25
2878 lvsr $inpperm,0,$taillen
2879 vcipher $out0,$out0,v26
2881 lvx_u $in0,0,$inp
2882 vcipher $out0,$out0,v27
2884 addi $key_,$sp,$FRAME+15 # rewind $key_
2885 vcipher $out0,$out0,v28
2886 lvx v24,$x00,$key_ # re-pre-load round[1]
2888 vcipher $out0,$out0,v29
2889 lvx v25,$x10,$key_ # re-pre-load round[2]
2890 vxor $twk0,$twk0,v31
2892 le?vperm $in0,$in0,$in0,$leperm
2893 vcipher $out0,$out0,v30
2895 vperm $in0,$in0,$in0,$inpperm
2896 vcipherlast $out0,$out0,$twk0
2898 vmr $twk0,$twk1 # unused tweak
2899 vxor $tmp,$out0,$twk1 # last block prep for stealing
2900 le?vperm $out0,$out0,$out0,$leperm
2901 stvx_u $out0,$x00,$out # store output
2902 addi $out,$out,0x10
2903 bne Lxts_enc6x_steal
2904 b Lxts_enc6x_done
2906 .align 4
2907 Lxts_enc6x_zero:
2908 cmpwi $taillen,0
2909 beq Lxts_enc6x_done
2911 add $inp,$inp,$taillen
2912 subi $inp,$inp,16
2913 lvx_u $in0,0,$inp
2914 lvsr $inpperm,0,$taillen # $in5 is no more
2915 le?vperm $in0,$in0,$in0,$leperm
2916 vperm $in0,$in0,$in0,$inpperm
2917 vxor $tmp,$tmp,$twk0
2918 Lxts_enc6x_steal:
2919 vxor $in0,$in0,$twk0
2920 vxor $out0,$out0,$out0
2921 vspltisb $out1,-1
2922 vperm $out0,$out0,$out1,$inpperm
2923 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2925 subi r30,$out,17
2926 subi $out,$out,16
2927 mtctr $taillen
2928 Loop_xts_enc6x_steal:
2929 lbzu r0,1(r30)
2930 stb r0,16(r30)
2931 bdnz Loop_xts_enc6x_steal
2933 li $taillen,0
2934 mtctr $rounds
2935 b Loop_xts_enc1x # one more time...
2937 .align 4
2938 Lxts_enc6x_done:
2939 ${UCMP}i $ivp,0
2940 beq Lxts_enc6x_ret
2942 vxor $tweak,$twk0,$rndkey0
2943 le?vperm $tweak,$tweak,$tweak,$leperm
2944 stvx_u $tweak,0,$ivp
2946 Lxts_enc6x_ret:
2947 mtlr r11
2948 li r10,`$FRAME+15`
2949 li r11,`$FRAME+31`
2950 stvx $seven,r10,$sp # wipe copies of round keys
2951 addi r10,r10,32
2952 stvx $seven,r11,$sp
2953 addi r11,r11,32
2954 stvx $seven,r10,$sp
2955 addi r10,r10,32
2956 stvx $seven,r11,$sp
2957 addi r11,r11,32
2958 stvx $seven,r10,$sp
2959 addi r10,r10,32
2960 stvx $seven,r11,$sp
2961 addi r11,r11,32
2962 stvx $seven,r10,$sp
2963 addi r10,r10,32
2964 stvx $seven,r11,$sp
2965 addi r11,r11,32
2967 mtspr 256,$vrsave
2968 lvx v20,r10,$sp # ABI says so
2969 addi r10,r10,32
2970 lvx v21,r11,$sp
2971 addi r11,r11,32
2972 lvx v22,r10,$sp
2973 addi r10,r10,32
2974 lvx v23,r11,$sp
2975 addi r11,r11,32
2976 lvx v24,r10,$sp
2977 addi r10,r10,32
2978 lvx v25,r11,$sp
2979 addi r11,r11,32
2980 lvx v26,r10,$sp
2981 addi r10,r10,32
2982 lvx v27,r11,$sp
2983 addi r11,r11,32
2984 lvx v28,r10,$sp
2985 addi r10,r10,32
2986 lvx v29,r11,$sp
2987 addi r11,r11,32
2988 lvx v30,r10,$sp
2989 lvx v31,r11,$sp
2990 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2991 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2992 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2993 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2994 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2995 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2996 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2998 .long 0
2999 .byte 0,12,0x04,1,0x80,6,6,0
3000 .long 0
3002 .align 5
3003 _aesp8_xts_enc5x:
3004 vcipher $out0,$out0,v24
3005 vcipher $out1,$out1,v24
3006 vcipher $out2,$out2,v24
3007 vcipher $out3,$out3,v24
3008 vcipher $out4,$out4,v24
3009 lvx v24,$x20,$key_ # round[3]
3010 addi $key_,$key_,0x20
3012 vcipher $out0,$out0,v25
3013 vcipher $out1,$out1,v25
3014 vcipher $out2,$out2,v25
3015 vcipher $out3,$out3,v25
3016 vcipher $out4,$out4,v25
3017 lvx v25,$x10,$key_ # round[4]
3018 bdnz _aesp8_xts_enc5x
3020 add $inp,$inp,$taillen
3021 cmpwi $taillen,0
3022 vcipher $out0,$out0,v24
3023 vcipher $out1,$out1,v24
3024 vcipher $out2,$out2,v24
3025 vcipher $out3,$out3,v24
3026 vcipher $out4,$out4,v24
3028 subi $inp,$inp,16
3029 vcipher $out0,$out0,v25
3030 vcipher $out1,$out1,v25
3031 vcipher $out2,$out2,v25
3032 vcipher $out3,$out3,v25
3033 vcipher $out4,$out4,v25
3034 vxor $twk0,$twk0,v31
3036 vcipher $out0,$out0,v26
3037 lvsr $inpperm,r0,$taillen # $in5 is no more
3038 vcipher $out1,$out1,v26
3039 vcipher $out2,$out2,v26
3040 vcipher $out3,$out3,v26
3041 vcipher $out4,$out4,v26
3042 vxor $in1,$twk1,v31
3044 vcipher $out0,$out0,v27
3045 lvx_u $in0,0,$inp
3046 vcipher $out1,$out1,v27
3047 vcipher $out2,$out2,v27
3048 vcipher $out3,$out3,v27
3049 vcipher $out4,$out4,v27
3050 vxor $in2,$twk2,v31
3052 addi $key_,$sp,$FRAME+15 # rewind $key_
3053 vcipher $out0,$out0,v28
3054 vcipher $out1,$out1,v28
3055 vcipher $out2,$out2,v28
3056 vcipher $out3,$out3,v28
3057 vcipher $out4,$out4,v28
3058 lvx v24,$x00,$key_ # re-pre-load round[1]
3059 vxor $in3,$twk3,v31
3061 vcipher $out0,$out0,v29
3062 le?vperm $in0,$in0,$in0,$leperm
3063 vcipher $out1,$out1,v29
3064 vcipher $out2,$out2,v29
3065 vcipher $out3,$out3,v29
3066 vcipher $out4,$out4,v29
3067 lvx v25,$x10,$key_ # re-pre-load round[2]
3068 vxor $in4,$twk4,v31
3070 vcipher $out0,$out0,v30
3071 vperm $in0,$in0,$in0,$inpperm
3072 vcipher $out1,$out1,v30
3073 vcipher $out2,$out2,v30
3074 vcipher $out3,$out3,v30
3075 vcipher $out4,$out4,v30
3077 vcipherlast $out0,$out0,$twk0
3078 vcipherlast $out1,$out1,$in1
3079 vcipherlast $out2,$out2,$in2
3080 vcipherlast $out3,$out3,$in3
3081 vcipherlast $out4,$out4,$in4
3083 .long 0
3084 .byte 0,12,0x14,0,0,0,0,0
3086 .align 5
3087 _aesp8_xts_decrypt6x:
3088 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3089 mflr r11
3090 li r7,`$FRAME+8*16+15`
3091 li r3,`$FRAME+8*16+31`
3092 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3093 stvx v20,r7,$sp # ABI says so
3094 addi r7,r7,32
3095 stvx v21,r3,$sp
3096 addi r3,r3,32
3097 stvx v22,r7,$sp
3098 addi r7,r7,32
3099 stvx v23,r3,$sp
3100 addi r3,r3,32
3101 stvx v24,r7,$sp
3102 addi r7,r7,32
3103 stvx v25,r3,$sp
3104 addi r3,r3,32
3105 stvx v26,r7,$sp
3106 addi r7,r7,32
3107 stvx v27,r3,$sp
3108 addi r3,r3,32
3109 stvx v28,r7,$sp
3110 addi r7,r7,32
3111 stvx v29,r3,$sp
3112 addi r3,r3,32
3113 stvx v30,r7,$sp
3114 stvx v31,r3,$sp
3115 li r0,-1
3116 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3117 li $x10,0x10
3118 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3119 li $x20,0x20
3120 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3121 li $x30,0x30
3122 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3123 li $x40,0x40
3124 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3125 li $x50,0x50
3126 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3127 li $x60,0x60
3128 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3129 li $x70,0x70
3130 mtspr 256,r0
3132 subi $rounds,$rounds,3 # -4 in total
3134 lvx $rndkey0,$x00,$key1 # load key schedule
3135 lvx v30,$x10,$key1
3136 addi $key1,$key1,0x20
3137 lvx v31,$x00,$key1
3138 ?vperm $rndkey0,$rndkey0,v30,$keyperm
3139 addi $key_,$sp,$FRAME+15
3140 mtctr $rounds
3142 Load_xts_dec_key:
3143 ?vperm v24,v30,v31,$keyperm
3144 lvx v30,$x10,$key1
3145 addi $key1,$key1,0x20
3146 stvx v24,$x00,$key_ # off-load round[1]
3147 ?vperm v25,v31,v30,$keyperm
3148 lvx v31,$x00,$key1
3149 stvx v25,$x10,$key_ # off-load round[2]
3150 addi $key_,$key_,0x20
3151 bdnz Load_xts_dec_key
3153 lvx v26,$x10,$key1
3154 ?vperm v24,v30,v31,$keyperm
3155 lvx v27,$x20,$key1
3156 stvx v24,$x00,$key_ # off-load round[3]
3157 ?vperm v25,v31,v26,$keyperm
3158 lvx v28,$x30,$key1
3159 stvx v25,$x10,$key_ # off-load round[4]
3160 addi $key_,$sp,$FRAME+15 # rewind $key_
3161 ?vperm v26,v26,v27,$keyperm
3162 lvx v29,$x40,$key1
3163 ?vperm v27,v27,v28,$keyperm
3164 lvx v30,$x50,$key1
3165 ?vperm v28,v28,v29,$keyperm
3166 lvx v31,$x60,$key1
3167 ?vperm v29,v29,v30,$keyperm
3168 lvx $twk5,$x70,$key1 # borrow $twk5
3169 ?vperm v30,v30,v31,$keyperm
3170 lvx v24,$x00,$key_ # pre-load round[1]
3171 ?vperm v31,v31,$twk5,$keyperm
3172 lvx v25,$x10,$key_ # pre-load round[2]
3174 vperm $in0,$inout,$inptail,$inpperm
3175 subi $inp,$inp,31 # undo "caller"
3176 vxor $twk0,$tweak,$rndkey0
3177 vsrab $tmp,$tweak,$seven # next tweak value
3178 vaddubm $tweak,$tweak,$tweak
3179 vsldoi $tmp,$tmp,$tmp,15
3180 vand $tmp,$tmp,$eighty7
3181 vxor $out0,$in0,$twk0
3182 vxor $tweak,$tweak,$tmp
3184 lvx_u $in1,$x10,$inp
3185 vxor $twk1,$tweak,$rndkey0
3186 vsrab $tmp,$tweak,$seven # next tweak value
3187 vaddubm $tweak,$tweak,$tweak
3188 vsldoi $tmp,$tmp,$tmp,15
3189 le?vperm $in1,$in1,$in1,$leperm
3190 vand $tmp,$tmp,$eighty7
3191 vxor $out1,$in1,$twk1
3192 vxor $tweak,$tweak,$tmp
3194 lvx_u $in2,$x20,$inp
3195 andi. $taillen,$len,15
3196 vxor $twk2,$tweak,$rndkey0
3197 vsrab $tmp,$tweak,$seven # next tweak value
3198 vaddubm $tweak,$tweak,$tweak
3199 vsldoi $tmp,$tmp,$tmp,15
3200 le?vperm $in2,$in2,$in2,$leperm
3201 vand $tmp,$tmp,$eighty7
3202 vxor $out2,$in2,$twk2
3203 vxor $tweak,$tweak,$tmp
3205 lvx_u $in3,$x30,$inp
3206 sub $len,$len,$taillen
3207 vxor $twk3,$tweak,$rndkey0
3208 vsrab $tmp,$tweak,$seven # next tweak value
3209 vaddubm $tweak,$tweak,$tweak
3210 vsldoi $tmp,$tmp,$tmp,15
3211 le?vperm $in3,$in3,$in3,$leperm
3212 vand $tmp,$tmp,$eighty7
3213 vxor $out3,$in3,$twk3
3214 vxor $tweak,$tweak,$tmp
3216 lvx_u $in4,$x40,$inp
3217 subi $len,$len,0x60
3218 vxor $twk4,$tweak,$rndkey0
3219 vsrab $tmp,$tweak,$seven # next tweak value
3220 vaddubm $tweak,$tweak,$tweak
3221 vsldoi $tmp,$tmp,$tmp,15
3222 le?vperm $in4,$in4,$in4,$leperm
3223 vand $tmp,$tmp,$eighty7
3224 vxor $out4,$in4,$twk4
3225 vxor $tweak,$tweak,$tmp
3227 lvx_u $in5,$x50,$inp
3228 addi $inp,$inp,0x60
3229 vxor $twk5,$tweak,$rndkey0
3230 vsrab $tmp,$tweak,$seven # next tweak value
3231 vaddubm $tweak,$tweak,$tweak
3232 vsldoi $tmp,$tmp,$tmp,15
3233 le?vperm $in5,$in5,$in5,$leperm
3234 vand $tmp,$tmp,$eighty7
3235 vxor $out5,$in5,$twk5
3236 vxor $tweak,$tweak,$tmp
3238 vxor v31,v31,$rndkey0
3239 mtctr $rounds
3240 b Loop_xts_dec6x
3242 .align 5
3243 Loop_xts_dec6x:
3244 vncipher $out0,$out0,v24
3245 vncipher $out1,$out1,v24
3246 vncipher $out2,$out2,v24
3247 vncipher $out3,$out3,v24
3248 vncipher $out4,$out4,v24
3249 vncipher $out5,$out5,v24
3250 lvx v24,$x20,$key_ # round[3]
3251 addi $key_,$key_,0x20
3253 vncipher $out0,$out0,v25
3254 vncipher $out1,$out1,v25
3255 vncipher $out2,$out2,v25
3256 vncipher $out3,$out3,v25
3257 vncipher $out4,$out4,v25
3258 vncipher $out5,$out5,v25
3259 lvx v25,$x10,$key_ # round[4]
3260 bdnz Loop_xts_dec6x
3262 subic $len,$len,96 # $len-=96
3263 vxor $in0,$twk0,v31 # xor with last round key
3264 vncipher $out0,$out0,v24
3265 vncipher $out1,$out1,v24
3266 vsrab $tmp,$tweak,$seven # next tweak value
3267 vxor $twk0,$tweak,$rndkey0
3268 vaddubm $tweak,$tweak,$tweak
3269 vncipher $out2,$out2,v24
3270 vncipher $out3,$out3,v24
3271 vsldoi $tmp,$tmp,$tmp,15
3272 vncipher $out4,$out4,v24
3273 vncipher $out5,$out5,v24
3275 subfe. r0,r0,r0 # borrow?-1:0
3276 vand $tmp,$tmp,$eighty7
3277 vncipher $out0,$out0,v25
3278 vncipher $out1,$out1,v25
3279 vxor $tweak,$tweak,$tmp
3280 vncipher $out2,$out2,v25
3281 vncipher $out3,$out3,v25
3282 vxor $in1,$twk1,v31
3283 vsrab $tmp,$tweak,$seven # next tweak value
3284 vxor $twk1,$tweak,$rndkey0
3285 vncipher $out4,$out4,v25
3286 vncipher $out5,$out5,v25
3288 and r0,r0,$len
3289 vaddubm $tweak,$tweak,$tweak
3290 vsldoi $tmp,$tmp,$tmp,15
3291 vncipher $out0,$out0,v26
3292 vncipher $out1,$out1,v26
3293 vand $tmp,$tmp,$eighty7
3294 vncipher $out2,$out2,v26
3295 vncipher $out3,$out3,v26
3296 vxor $tweak,$tweak,$tmp
3297 vncipher $out4,$out4,v26
3298 vncipher $out5,$out5,v26
3300 add $inp,$inp,r0 # $inp is adjusted in such
3301 # way that at exit from the
3302 # loop inX-in5 are loaded
3303 # with last "words"
3304 vxor $in2,$twk2,v31
3305 vsrab $tmp,$tweak,$seven # next tweak value
3306 vxor $twk2,$tweak,$rndkey0
3307 vaddubm $tweak,$tweak,$tweak
3308 vncipher $out0,$out0,v27
3309 vncipher $out1,$out1,v27
3310 vsldoi $tmp,$tmp,$tmp,15
3311 vncipher $out2,$out2,v27
3312 vncipher $out3,$out3,v27
3313 vand $tmp,$tmp,$eighty7
3314 vncipher $out4,$out4,v27
3315 vncipher $out5,$out5,v27
3317 addi $key_,$sp,$FRAME+15 # rewind $key_
3318 vxor $tweak,$tweak,$tmp
3319 vncipher $out0,$out0,v28
3320 vncipher $out1,$out1,v28
3321 vxor $in3,$twk3,v31
3322 vsrab $tmp,$tweak,$seven # next tweak value
3323 vxor $twk3,$tweak,$rndkey0
3324 vncipher $out2,$out2,v28
3325 vncipher $out3,$out3,v28
3326 vaddubm $tweak,$tweak,$tweak
3327 vsldoi $tmp,$tmp,$tmp,15
3328 vncipher $out4,$out4,v28
3329 vncipher $out5,$out5,v28
3330 lvx v24,$x00,$key_ # re-pre-load round[1]
3331 vand $tmp,$tmp,$eighty7
3333 vncipher $out0,$out0,v29
3334 vncipher $out1,$out1,v29
3335 vxor $tweak,$tweak,$tmp
3336 vncipher $out2,$out2,v29
3337 vncipher $out3,$out3,v29
3338 vxor $in4,$twk4,v31
3339 vsrab $tmp,$tweak,$seven # next tweak value
3340 vxor $twk4,$tweak,$rndkey0
3341 vncipher $out4,$out4,v29
3342 vncipher $out5,$out5,v29
3343 lvx v25,$x10,$key_ # re-pre-load round[2]
3344 vaddubm $tweak,$tweak,$tweak
3345 vsldoi $tmp,$tmp,$tmp,15
3347 vncipher $out0,$out0,v30
3348 vncipher $out1,$out1,v30
3349 vand $tmp,$tmp,$eighty7
3350 vncipher $out2,$out2,v30
3351 vncipher $out3,$out3,v30
3352 vxor $tweak,$tweak,$tmp
3353 vncipher $out4,$out4,v30
3354 vncipher $out5,$out5,v30
3355 vxor $in5,$twk5,v31
3356 vsrab $tmp,$tweak,$seven # next tweak value
3357 vxor $twk5,$tweak,$rndkey0
3359 vncipherlast $out0,$out0,$in0
3360 lvx_u $in0,$x00,$inp # load next input block
3361 vaddubm $tweak,$tweak,$tweak
3362 vsldoi $tmp,$tmp,$tmp,15
3363 vncipherlast $out1,$out1,$in1
3364 lvx_u $in1,$x10,$inp
3365 vncipherlast $out2,$out2,$in2
3366 le?vperm $in0,$in0,$in0,$leperm
3367 lvx_u $in2,$x20,$inp
3368 vand $tmp,$tmp,$eighty7
3369 vncipherlast $out3,$out3,$in3
3370 le?vperm $in1,$in1,$in1,$leperm
3371 lvx_u $in3,$x30,$inp
3372 vncipherlast $out4,$out4,$in4
3373 le?vperm $in2,$in2,$in2,$leperm
3374 lvx_u $in4,$x40,$inp
3375 vxor $tweak,$tweak,$tmp
3376 vncipherlast $out5,$out5,$in5
3377 le?vperm $in3,$in3,$in3,$leperm
3378 lvx_u $in5,$x50,$inp
3379 addi $inp,$inp,0x60
3380 le?vperm $in4,$in4,$in4,$leperm
3381 le?vperm $in5,$in5,$in5,$leperm
3383 le?vperm $out0,$out0,$out0,$leperm
3384 le?vperm $out1,$out1,$out1,$leperm
3385 stvx_u $out0,$x00,$out # store output
3386 vxor $out0,$in0,$twk0
3387 le?vperm $out2,$out2,$out2,$leperm
3388 stvx_u $out1,$x10,$out
3389 vxor $out1,$in1,$twk1
3390 le?vperm $out3,$out3,$out3,$leperm
3391 stvx_u $out2,$x20,$out
3392 vxor $out2,$in2,$twk2
3393 le?vperm $out4,$out4,$out4,$leperm
3394 stvx_u $out3,$x30,$out
3395 vxor $out3,$in3,$twk3
3396 le?vperm $out5,$out5,$out5,$leperm
3397 stvx_u $out4,$x40,$out
3398 vxor $out4,$in4,$twk4
3399 stvx_u $out5,$x50,$out
3400 vxor $out5,$in5,$twk5
3401 addi $out,$out,0x60
3403 mtctr $rounds
3404 beq Loop_xts_dec6x # did $len-=96 borrow?
3406 addic. $len,$len,0x60
3407 beq Lxts_dec6x_zero
3408 cmpwi $len,0x20
3409 blt Lxts_dec6x_one
3411 beq Lxts_dec6x_two
3412 cmpwi $len,0x40
3413 blt Lxts_dec6x_three
3415 beq Lxts_dec6x_four
3417 Lxts_dec6x_five:
3418 vxor $out0,$in1,$twk0
3419 vxor $out1,$in2,$twk1
3420 vxor $out2,$in3,$twk2
3421 vxor $out3,$in4,$twk3
3422 vxor $out4,$in5,$twk4
3424 bl _aesp8_xts_dec5x
3426 le?vperm $out0,$out0,$out0,$leperm
3427 vmr $twk0,$twk5 # unused tweak
3428 vxor $twk1,$tweak,$rndkey0
3429 le?vperm $out1,$out1,$out1,$leperm
3430 stvx_u $out0,$x00,$out # store output
3431 vxor $out0,$in0,$twk1
3432 le?vperm $out2,$out2,$out2,$leperm
3433 stvx_u $out1,$x10,$out
3434 le?vperm $out3,$out3,$out3,$leperm
3435 stvx_u $out2,$x20,$out
3436 le?vperm $out4,$out4,$out4,$leperm
3437 stvx_u $out3,$x30,$out
3438 stvx_u $out4,$x40,$out
3439 addi $out,$out,0x50
3440 bne Lxts_dec6x_steal
3441 b Lxts_dec6x_done
3443 .align 4
3444 Lxts_dec6x_four:
3445 vxor $out0,$in2,$twk0
3446 vxor $out1,$in3,$twk1
3447 vxor $out2,$in4,$twk2
3448 vxor $out3,$in5,$twk3
3449 vxor $out4,$out4,$out4
3451 bl _aesp8_xts_dec5x
3453 le?vperm $out0,$out0,$out0,$leperm
3454 vmr $twk0,$twk4 # unused tweak
3455 vmr $twk1,$twk5
3456 le?vperm $out1,$out1,$out1,$leperm
3457 stvx_u $out0,$x00,$out # store output
3458 vxor $out0,$in0,$twk5
3459 le?vperm $out2,$out2,$out2,$leperm
3460 stvx_u $out1,$x10,$out
3461 le?vperm $out3,$out3,$out3,$leperm
3462 stvx_u $out2,$x20,$out
3463 stvx_u $out3,$x30,$out
3464 addi $out,$out,0x40
3465 bne Lxts_dec6x_steal
3466 b Lxts_dec6x_done
3468 .align 4
3469 Lxts_dec6x_three:
3470 vxor $out0,$in3,$twk0
3471 vxor $out1,$in4,$twk1
3472 vxor $out2,$in5,$twk2
3473 vxor $out3,$out3,$out3
3474 vxor $out4,$out4,$out4
3476 bl _aesp8_xts_dec5x
3478 le?vperm $out0,$out0,$out0,$leperm
3479 vmr $twk0,$twk3 # unused tweak
3480 vmr $twk1,$twk4
3481 le?vperm $out1,$out1,$out1,$leperm
3482 stvx_u $out0,$x00,$out # store output
3483 vxor $out0,$in0,$twk4
3484 le?vperm $out2,$out2,$out2,$leperm
3485 stvx_u $out1,$x10,$out
3486 stvx_u $out2,$x20,$out
3487 addi $out,$out,0x30
3488 bne Lxts_dec6x_steal
3489 b Lxts_dec6x_done
3491 .align 4
3492 Lxts_dec6x_two:
3493 vxor $out0,$in4,$twk0
3494 vxor $out1,$in5,$twk1
3495 vxor $out2,$out2,$out2
3496 vxor $out3,$out3,$out3
3497 vxor $out4,$out4,$out4
3499 bl _aesp8_xts_dec5x
3501 le?vperm $out0,$out0,$out0,$leperm
3502 vmr $twk0,$twk2 # unused tweak
3503 vmr $twk1,$twk3
3504 le?vperm $out1,$out1,$out1,$leperm
3505 stvx_u $out0,$x00,$out # store output
3506 vxor $out0,$in0,$twk3
3507 stvx_u $out1,$x10,$out
3508 addi $out,$out,0x20
3509 bne Lxts_dec6x_steal
3510 b Lxts_dec6x_done
3512 .align 4
3513 Lxts_dec6x_one:
3514 vxor $out0,$in5,$twk0
3516 Loop_xts_dec1x:
3517 vncipher $out0,$out0,v24
3518 lvx v24,$x20,$key_ # round[3]
3519 addi $key_,$key_,0x20
3521 vncipher $out0,$out0,v25
3522 lvx v25,$x10,$key_ # round[4]
3523 bdnz Loop_xts_dec1x
3525 subi r0,$taillen,1
3526 vncipher $out0,$out0,v24
3528 andi. r0,r0,16
3529 cmpwi $taillen,0
3530 vncipher $out0,$out0,v25
3532 sub $inp,$inp,r0
3533 vncipher $out0,$out0,v26
3535 lvx_u $in0,0,$inp
3536 vncipher $out0,$out0,v27
3538 addi $key_,$sp,$FRAME+15 # rewind $key_
3539 vncipher $out0,$out0,v28
3540 lvx v24,$x00,$key_ # re-pre-load round[1]
3542 vncipher $out0,$out0,v29
3543 lvx v25,$x10,$key_ # re-pre-load round[2]
3544 vxor $twk0,$twk0,v31
3546 le?vperm $in0,$in0,$in0,$leperm
3547 vncipher $out0,$out0,v30
3549 mtctr $rounds
3550 vncipherlast $out0,$out0,$twk0
3552 vmr $twk0,$twk1 # unused tweak
3553 vmr $twk1,$twk2
3554 le?vperm $out0,$out0,$out0,$leperm
3555 stvx_u $out0,$x00,$out # store output
3556 addi $out,$out,0x10
3557 vxor $out0,$in0,$twk2
3558 bne Lxts_dec6x_steal
3559 b Lxts_dec6x_done
3561 .align 4
3562 Lxts_dec6x_zero:
3563 cmpwi $taillen,0
3564 beq Lxts_dec6x_done
3566 lvx_u $in0,0,$inp
3567 le?vperm $in0,$in0,$in0,$leperm
3568 vxor $out0,$in0,$twk1
3569 Lxts_dec6x_steal:
3570 vncipher $out0,$out0,v24
3571 lvx v24,$x20,$key_ # round[3]
3572 addi $key_,$key_,0x20
3574 vncipher $out0,$out0,v25
3575 lvx v25,$x10,$key_ # round[4]
3576 bdnz Lxts_dec6x_steal
3578 add $inp,$inp,$taillen
3579 vncipher $out0,$out0,v24
3581 cmpwi $taillen,0
3582 vncipher $out0,$out0,v25
3584 lvx_u $in0,0,$inp
3585 vncipher $out0,$out0,v26
3587 lvsr $inpperm,0,$taillen # $in5 is no more
3588 vncipher $out0,$out0,v27
3590 addi $key_,$sp,$FRAME+15 # rewind $key_
3591 vncipher $out0,$out0,v28
3592 lvx v24,$x00,$key_ # re-pre-load round[1]
3594 vncipher $out0,$out0,v29
3595 lvx v25,$x10,$key_ # re-pre-load round[2]
3596 vxor $twk1,$twk1,v31
3598 le?vperm $in0,$in0,$in0,$leperm
3599 vncipher $out0,$out0,v30
3601 vperm $in0,$in0,$in0,$inpperm
3602 vncipherlast $tmp,$out0,$twk1
3604 le?vperm $out0,$tmp,$tmp,$leperm
3605 le?stvx_u $out0,0,$out
3606 be?stvx_u $tmp,0,$out
3608 vxor $out0,$out0,$out0
3609 vspltisb $out1,-1
3610 vperm $out0,$out0,$out1,$inpperm
3611 vsel $out0,$in0,$tmp,$out0
3612 vxor $out0,$out0,$twk0
3614 subi r30,$out,1
3615 mtctr $taillen
3616 Loop_xts_dec6x_steal:
3617 lbzu r0,1(r30)
3618 stb r0,16(r30)
3619 bdnz Loop_xts_dec6x_steal
3621 li $taillen,0
3622 mtctr $rounds
3623 b Loop_xts_dec1x # one more time...
3625 .align 4
3626 Lxts_dec6x_done:
3627 ${UCMP}i $ivp,0
3628 beq Lxts_dec6x_ret
3630 vxor $tweak,$twk0,$rndkey0
3631 le?vperm $tweak,$tweak,$tweak,$leperm
3632 stvx_u $tweak,0,$ivp
3634 Lxts_dec6x_ret:
3635 mtlr r11
3636 li r10,`$FRAME+15`
3637 li r11,`$FRAME+31`
3638 stvx $seven,r10,$sp # wipe copies of round keys
3639 addi r10,r10,32
3640 stvx $seven,r11,$sp
3641 addi r11,r11,32
3642 stvx $seven,r10,$sp
3643 addi r10,r10,32
3644 stvx $seven,r11,$sp
3645 addi r11,r11,32
3646 stvx $seven,r10,$sp
3647 addi r10,r10,32
3648 stvx $seven,r11,$sp
3649 addi r11,r11,32
3650 stvx $seven,r10,$sp
3651 addi r10,r10,32
3652 stvx $seven,r11,$sp
3653 addi r11,r11,32
3655 mtspr 256,$vrsave
3656 lvx v20,r10,$sp # ABI says so
3657 addi r10,r10,32
3658 lvx v21,r11,$sp
3659 addi r11,r11,32
3660 lvx v22,r10,$sp
3661 addi r10,r10,32
3662 lvx v23,r11,$sp
3663 addi r11,r11,32
3664 lvx v24,r10,$sp
3665 addi r10,r10,32
3666 lvx v25,r11,$sp
3667 addi r11,r11,32
3668 lvx v26,r10,$sp
3669 addi r10,r10,32
3670 lvx v27,r11,$sp
3671 addi r11,r11,32
3672 lvx v28,r10,$sp
3673 addi r10,r10,32
3674 lvx v29,r11,$sp
3675 addi r11,r11,32
3676 lvx v30,r10,$sp
3677 lvx v31,r11,$sp
3678 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3679 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3680 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3681 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3682 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3683 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3684 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3686 .long 0
3687 .byte 0,12,0x04,1,0x80,6,6,0
3688 .long 0
3690 .align 5
3691 _aesp8_xts_dec5x:
3692 vncipher $out0,$out0,v24
3693 vncipher $out1,$out1,v24
3694 vncipher $out2,$out2,v24
3695 vncipher $out3,$out3,v24
3696 vncipher $out4,$out4,v24
3697 lvx v24,$x20,$key_ # round[3]
3698 addi $key_,$key_,0x20
3700 vncipher $out0,$out0,v25
3701 vncipher $out1,$out1,v25
3702 vncipher $out2,$out2,v25
3703 vncipher $out3,$out3,v25
3704 vncipher $out4,$out4,v25
3705 lvx v25,$x10,$key_ # round[4]
3706 bdnz _aesp8_xts_dec5x
3708 subi r0,$taillen,1
3709 vncipher $out0,$out0,v24
3710 vncipher $out1,$out1,v24
3711 vncipher $out2,$out2,v24
3712 vncipher $out3,$out3,v24
3713 vncipher $out4,$out4,v24
3715 andi. r0,r0,16
3716 cmpwi $taillen,0
3717 vncipher $out0,$out0,v25
3718 vncipher $out1,$out1,v25
3719 vncipher $out2,$out2,v25
3720 vncipher $out3,$out3,v25
3721 vncipher $out4,$out4,v25
3722 vxor $twk0,$twk0,v31
3724 sub $inp,$inp,r0
3725 vncipher $out0,$out0,v26
3726 vncipher $out1,$out1,v26
3727 vncipher $out2,$out2,v26
3728 vncipher $out3,$out3,v26
3729 vncipher $out4,$out4,v26
3730 vxor $in1,$twk1,v31
3732 vncipher $out0,$out0,v27
3733 lvx_u $in0,0,$inp
3734 vncipher $out1,$out1,v27
3735 vncipher $out2,$out2,v27
3736 vncipher $out3,$out3,v27
3737 vncipher $out4,$out4,v27
3738 vxor $in2,$twk2,v31
3740 addi $key_,$sp,$FRAME+15 # rewind $key_
3741 vncipher $out0,$out0,v28
3742 vncipher $out1,$out1,v28
3743 vncipher $out2,$out2,v28
3744 vncipher $out3,$out3,v28
3745 vncipher $out4,$out4,v28
3746 lvx v24,$x00,$key_ # re-pre-load round[1]
3747 vxor $in3,$twk3,v31
3749 vncipher $out0,$out0,v29
3750 le?vperm $in0,$in0,$in0,$leperm
3751 vncipher $out1,$out1,v29
3752 vncipher $out2,$out2,v29
3753 vncipher $out3,$out3,v29
3754 vncipher $out4,$out4,v29
3755 lvx v25,$x10,$key_ # re-pre-load round[2]
3756 vxor $in4,$twk4,v31
3758 vncipher $out0,$out0,v30
3759 vncipher $out1,$out1,v30
3760 vncipher $out2,$out2,v30
3761 vncipher $out3,$out3,v30
3762 vncipher $out4,$out4,v30
3764 vncipherlast $out0,$out0,$twk0
3765 vncipherlast $out1,$out1,$in1
3766 vncipherlast $out2,$out2,$in2
3767 vncipherlast $out3,$out3,$in3
3768 vncipherlast $out4,$out4,$in4
3769 mtctr $rounds
3771 .long 0
3772 .byte 0,12,0x14,0,0,0,0,0
3774 }} }}}
3776 my $consts=1;
3777 foreach(split("\n",$code)) {
3778 s/\`([^\`]*)\`/eval($1)/geo;
3780 # constants table endian-specific conversion
3781 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3782 my $conv=$3;
3783 my @bytes=();
3785 # convert to endian-agnostic format
3786 if ($1 eq "long") {
3787 foreach (split(/,\s*/,$2)) {
3788 my $l = /^0/?oct:int;
3789 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3791 } else {
3792 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3795 # little-endian conversion
3796 if ($flavour =~ /le$/o) {
3797 SWITCH: for($conv) {
3798 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3799 /\?rev/ && do { @bytes=reverse(@bytes); last; };
3803 #emit
3804 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3805 next;
3807 $consts=0 if (m/Lconsts:/o); # end of table
3809 # instructions prefixed with '?' are endian-specific and need
3810 # to be adjusted accordingly...
3811 if ($flavour =~ /le$/o) { # little-endian
3812 s/le\?//o or
3813 s/be\?/#be#/o or
3814 s/\?lvsr/lvsl/o or
3815 s/\?lvsl/lvsr/o or
3816 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3817 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3818 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3819 } else { # big-endian
3820 s/le\?/#le#/o or
3821 s/be\?//o or
3822 s/\?([a-z]+)/$1/o;
3825 print $_,"\n";
3828 close STDOUT;