irqchip/zevio: Use irq_data_get_chip_type() helper
[linux/fpc-iii.git] / drivers / crypto / vmx / aesp8-ppc.pl
blob228053921b3f024f468dbcf720c31bcb3cc58437
1 #!/usr/bin/env perl
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
10 # This module implements support for AES instructions as per PowerISA
11 # specification version 2.07, first implemented by POWER8 processor.
12 # The module is endian-agnostic in sense that it supports both big-
13 # and little-endian cases. Data alignment in parallelizable modes is
14 # handled with VSX loads and stores, which implies MSR.VSX flag being
15 # set. It should also be noted that ISA specification doesn't prohibit
16 # alignment exceptions for these instructions on page boundaries.
17 # Initially alignment was handled in pure AltiVec/VMX way [when data
18 # is aligned programmatically, which in turn guarantees exception-
19 # free execution], but it turned to hamper performance when vcipher
20 # instructions are interleaved. It's reckoned that eventual
21 # misalignment penalties at page boundaries are in average lower
22 # than additional overhead in pure AltiVec approach.
24 $flavour = shift;
26 if ($flavour =~ /64/) {
27 $SIZE_T =8;
28 $LRSAVE =2*$SIZE_T;
29 $STU ="stdu";
30 $POP ="ld";
31 $PUSH ="std";
32 $UCMP ="cmpld";
33 $SHL ="sldi";
34 } elsif ($flavour =~ /32/) {
35 $SIZE_T =4;
36 $LRSAVE =$SIZE_T;
37 $STU ="stwu";
38 $POP ="lwz";
39 $PUSH ="stw";
40 $UCMP ="cmplw";
41 $SHL ="slwi";
42 } else { die "nonsense $flavour"; }
44 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
46 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
47 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
48 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
49 die "can't locate ppc-xlate.pl";
51 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
53 $FRAME=8*$SIZE_T;
54 $prefix="aes_p8";
56 $sp="r1";
57 $vrsave="r12";
59 #########################################################################
60 {{{ # Key setup procedures #
61 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
62 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
63 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
65 $code.=<<___;
66 .machine "any"
68 .text
70 .align 7
71 rcon:
72 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
73 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
74 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
75 .long 0,0,0,0 ?asis
76 Lconsts:
77 mflr r0
78 bcl 20,31,\$+4
79 mflr $ptr #vvvvv "distance between . and rcon
80 addi $ptr,$ptr,-0x48
81 mtlr r0
82 blr
83 .long 0
84 .byte 0,12,0x14,0,0,0,0,0
85 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
87 .globl .${prefix}_set_encrypt_key
88 Lset_encrypt_key:
89 mflr r11
90 $PUSH r11,$LRSAVE($sp)
92 li $ptr,-1
93 ${UCMP}i $inp,0
94 beq- Lenc_key_abort # if ($inp==0) return -1;
95 ${UCMP}i $out,0
96 beq- Lenc_key_abort # if ($out==0) return -1;
97 li $ptr,-2
98 cmpwi $bits,128
99 blt- Lenc_key_abort
100 cmpwi $bits,256
101 bgt- Lenc_key_abort
102 andi. r0,$bits,0x3f
103 bne- Lenc_key_abort
105 lis r0,0xfff0
106 mfspr $vrsave,256
107 mtspr 256,r0
109 bl Lconsts
110 mtlr r11
112 neg r9,$inp
113 lvx $in0,0,$inp
114 addi $inp,$inp,15 # 15 is not typo
115 lvsr $key,0,r9 # borrow $key
116 li r8,0x20
117 cmpwi $bits,192
118 lvx $in1,0,$inp
119 le?vspltisb $mask,0x0f # borrow $mask
120 lvx $rcon,0,$ptr
121 le?vxor $key,$key,$mask # adjust for byte swap
122 lvx $mask,r8,$ptr
123 addi $ptr,$ptr,0x10
124 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
125 li $cnt,8
126 vxor $zero,$zero,$zero
127 mtctr $cnt
129 ?lvsr $outperm,0,$out
130 vspltisb $outmask,-1
131 lvx $outhead,0,$out
132 ?vperm $outmask,$zero,$outmask,$outperm
134 blt Loop128
135 addi $inp,$inp,8
136 beq L192
137 addi $inp,$inp,8
138 b L256
140 .align 4
141 Loop128:
142 vperm $key,$in0,$in0,$mask # rotate-n-splat
143 vsldoi $tmp,$zero,$in0,12 # >>32
144 vperm $outtail,$in0,$in0,$outperm # rotate
145 vsel $stage,$outhead,$outtail,$outmask
146 vmr $outhead,$outtail
147 vcipherlast $key,$key,$rcon
148 stvx $stage,0,$out
149 addi $out,$out,16
151 vxor $in0,$in0,$tmp
152 vsldoi $tmp,$zero,$tmp,12 # >>32
153 vxor $in0,$in0,$tmp
154 vsldoi $tmp,$zero,$tmp,12 # >>32
155 vxor $in0,$in0,$tmp
156 vadduwm $rcon,$rcon,$rcon
157 vxor $in0,$in0,$key
158 bdnz Loop128
160 lvx $rcon,0,$ptr # last two round keys
162 vperm $key,$in0,$in0,$mask # rotate-n-splat
163 vsldoi $tmp,$zero,$in0,12 # >>32
164 vperm $outtail,$in0,$in0,$outperm # rotate
165 vsel $stage,$outhead,$outtail,$outmask
166 vmr $outhead,$outtail
167 vcipherlast $key,$key,$rcon
168 stvx $stage,0,$out
169 addi $out,$out,16
171 vxor $in0,$in0,$tmp
172 vsldoi $tmp,$zero,$tmp,12 # >>32
173 vxor $in0,$in0,$tmp
174 vsldoi $tmp,$zero,$tmp,12 # >>32
175 vxor $in0,$in0,$tmp
176 vadduwm $rcon,$rcon,$rcon
177 vxor $in0,$in0,$key
179 vperm $key,$in0,$in0,$mask # rotate-n-splat
180 vsldoi $tmp,$zero,$in0,12 # >>32
181 vperm $outtail,$in0,$in0,$outperm # rotate
182 vsel $stage,$outhead,$outtail,$outmask
183 vmr $outhead,$outtail
184 vcipherlast $key,$key,$rcon
185 stvx $stage,0,$out
186 addi $out,$out,16
188 vxor $in0,$in0,$tmp
189 vsldoi $tmp,$zero,$tmp,12 # >>32
190 vxor $in0,$in0,$tmp
191 vsldoi $tmp,$zero,$tmp,12 # >>32
192 vxor $in0,$in0,$tmp
193 vxor $in0,$in0,$key
194 vperm $outtail,$in0,$in0,$outperm # rotate
195 vsel $stage,$outhead,$outtail,$outmask
196 vmr $outhead,$outtail
197 stvx $stage,0,$out
199 addi $inp,$out,15 # 15 is not typo
200 addi $out,$out,0x50
202 li $rounds,10
203 b Ldone
205 .align 4
206 L192:
207 lvx $tmp,0,$inp
208 li $cnt,4
209 vperm $outtail,$in0,$in0,$outperm # rotate
210 vsel $stage,$outhead,$outtail,$outmask
211 vmr $outhead,$outtail
212 stvx $stage,0,$out
213 addi $out,$out,16
214 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
215 vspltisb $key,8 # borrow $key
216 mtctr $cnt
217 vsububm $mask,$mask,$key # adjust the mask
219 Loop192:
220 vperm $key,$in1,$in1,$mask # roate-n-splat
221 vsldoi $tmp,$zero,$in0,12 # >>32
222 vcipherlast $key,$key,$rcon
224 vxor $in0,$in0,$tmp
225 vsldoi $tmp,$zero,$tmp,12 # >>32
226 vxor $in0,$in0,$tmp
227 vsldoi $tmp,$zero,$tmp,12 # >>32
228 vxor $in0,$in0,$tmp
230 vsldoi $stage,$zero,$in1,8
231 vspltw $tmp,$in0,3
232 vxor $tmp,$tmp,$in1
233 vsldoi $in1,$zero,$in1,12 # >>32
234 vadduwm $rcon,$rcon,$rcon
235 vxor $in1,$in1,$tmp
236 vxor $in0,$in0,$key
237 vxor $in1,$in1,$key
238 vsldoi $stage,$stage,$in0,8
240 vperm $key,$in1,$in1,$mask # rotate-n-splat
241 vsldoi $tmp,$zero,$in0,12 # >>32
242 vperm $outtail,$stage,$stage,$outperm # rotate
243 vsel $stage,$outhead,$outtail,$outmask
244 vmr $outhead,$outtail
245 vcipherlast $key,$key,$rcon
246 stvx $stage,0,$out
247 addi $out,$out,16
249 vsldoi $stage,$in0,$in1,8
250 vxor $in0,$in0,$tmp
251 vsldoi $tmp,$zero,$tmp,12 # >>32
252 vperm $outtail,$stage,$stage,$outperm # rotate
253 vsel $stage,$outhead,$outtail,$outmask
254 vmr $outhead,$outtail
255 vxor $in0,$in0,$tmp
256 vsldoi $tmp,$zero,$tmp,12 # >>32
257 vxor $in0,$in0,$tmp
258 stvx $stage,0,$out
259 addi $out,$out,16
261 vspltw $tmp,$in0,3
262 vxor $tmp,$tmp,$in1
263 vsldoi $in1,$zero,$in1,12 # >>32
264 vadduwm $rcon,$rcon,$rcon
265 vxor $in1,$in1,$tmp
266 vxor $in0,$in0,$key
267 vxor $in1,$in1,$key
268 vperm $outtail,$in0,$in0,$outperm # rotate
269 vsel $stage,$outhead,$outtail,$outmask
270 vmr $outhead,$outtail
271 stvx $stage,0,$out
272 addi $inp,$out,15 # 15 is not typo
273 addi $out,$out,16
274 bdnz Loop192
276 li $rounds,12
277 addi $out,$out,0x20
278 b Ldone
280 .align 4
281 L256:
282 lvx $tmp,0,$inp
283 li $cnt,7
284 li $rounds,14
285 vperm $outtail,$in0,$in0,$outperm # rotate
286 vsel $stage,$outhead,$outtail,$outmask
287 vmr $outhead,$outtail
288 stvx $stage,0,$out
289 addi $out,$out,16
290 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
291 mtctr $cnt
293 Loop256:
294 vperm $key,$in1,$in1,$mask # rotate-n-splat
295 vsldoi $tmp,$zero,$in0,12 # >>32
296 vperm $outtail,$in1,$in1,$outperm # rotate
297 vsel $stage,$outhead,$outtail,$outmask
298 vmr $outhead,$outtail
299 vcipherlast $key,$key,$rcon
300 stvx $stage,0,$out
301 addi $out,$out,16
303 vxor $in0,$in0,$tmp
304 vsldoi $tmp,$zero,$tmp,12 # >>32
305 vxor $in0,$in0,$tmp
306 vsldoi $tmp,$zero,$tmp,12 # >>32
307 vxor $in0,$in0,$tmp
308 vadduwm $rcon,$rcon,$rcon
309 vxor $in0,$in0,$key
310 vperm $outtail,$in0,$in0,$outperm # rotate
311 vsel $stage,$outhead,$outtail,$outmask
312 vmr $outhead,$outtail
313 stvx $stage,0,$out
314 addi $inp,$out,15 # 15 is not typo
315 addi $out,$out,16
316 bdz Ldone
318 vspltw $key,$in0,3 # just splat
319 vsldoi $tmp,$zero,$in1,12 # >>32
320 vsbox $key,$key
322 vxor $in1,$in1,$tmp
323 vsldoi $tmp,$zero,$tmp,12 # >>32
324 vxor $in1,$in1,$tmp
325 vsldoi $tmp,$zero,$tmp,12 # >>32
326 vxor $in1,$in1,$tmp
328 vxor $in1,$in1,$key
329 b Loop256
331 .align 4
332 Ldone:
333 lvx $in1,0,$inp # redundant in aligned case
334 vsel $in1,$outhead,$in1,$outmask
335 stvx $in1,0,$inp
336 li $ptr,0
337 mtspr 256,$vrsave
338 stw $rounds,0($out)
340 Lenc_key_abort:
341 mr r3,$ptr
343 .long 0
344 .byte 0,12,0x14,1,0,0,3,0
345 .long 0
346 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
348 .globl .${prefix}_set_decrypt_key
349 $STU $sp,-$FRAME($sp)
350 mflr r10
351 $PUSH r10,$FRAME+$LRSAVE($sp)
352 bl Lset_encrypt_key
353 mtlr r10
355 cmpwi r3,0
356 bne- Ldec_key_abort
358 slwi $cnt,$rounds,4
359 subi $inp,$out,240 # first round key
360 srwi $rounds,$rounds,1
361 add $out,$inp,$cnt # last round key
362 mtctr $rounds
364 Ldeckey:
365 lwz r0, 0($inp)
366 lwz r6, 4($inp)
367 lwz r7, 8($inp)
368 lwz r8, 12($inp)
369 addi $inp,$inp,16
370 lwz r9, 0($out)
371 lwz r10,4($out)
372 lwz r11,8($out)
373 lwz r12,12($out)
374 stw r0, 0($out)
375 stw r6, 4($out)
376 stw r7, 8($out)
377 stw r8, 12($out)
378 subi $out,$out,16
379 stw r9, -16($inp)
380 stw r10,-12($inp)
381 stw r11,-8($inp)
382 stw r12,-4($inp)
383 bdnz Ldeckey
385 xor r3,r3,r3 # return value
386 Ldec_key_abort:
387 addi $sp,$sp,$FRAME
389 .long 0
390 .byte 0,12,4,1,0x80,0,3,0
391 .long 0
392 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
395 #########################################################################
396 {{{ # Single block en- and decrypt procedures #
397 sub gen_block () {
398 my $dir = shift;
399 my $n = $dir eq "de" ? "n" : "";
400 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
402 $code.=<<___;
403 .globl .${prefix}_${dir}crypt
404 lwz $rounds,240($key)
405 lis r0,0xfc00
406 mfspr $vrsave,256
407 li $idx,15 # 15 is not typo
408 mtspr 256,r0
410 lvx v0,0,$inp
411 neg r11,$out
412 lvx v1,$idx,$inp
413 lvsl v2,0,$inp # inpperm
414 le?vspltisb v4,0x0f
415 ?lvsl v3,0,r11 # outperm
416 le?vxor v2,v2,v4
417 li $idx,16
418 vperm v0,v0,v1,v2 # align [and byte swap in LE]
419 lvx v1,0,$key
420 ?lvsl v5,0,$key # keyperm
421 srwi $rounds,$rounds,1
422 lvx v2,$idx,$key
423 addi $idx,$idx,16
424 subi $rounds,$rounds,1
425 ?vperm v1,v1,v2,v5 # align round key
427 vxor v0,v0,v1
428 lvx v1,$idx,$key
429 addi $idx,$idx,16
430 mtctr $rounds
432 Loop_${dir}c:
433 ?vperm v2,v2,v1,v5
434 v${n}cipher v0,v0,v2
435 lvx v2,$idx,$key
436 addi $idx,$idx,16
437 ?vperm v1,v1,v2,v5
438 v${n}cipher v0,v0,v1
439 lvx v1,$idx,$key
440 addi $idx,$idx,16
441 bdnz Loop_${dir}c
443 ?vperm v2,v2,v1,v5
444 v${n}cipher v0,v0,v2
445 lvx v2,$idx,$key
446 ?vperm v1,v1,v2,v5
447 v${n}cipherlast v0,v0,v1
449 vspltisb v2,-1
450 vxor v1,v1,v1
451 li $idx,15 # 15 is not typo
452 ?vperm v2,v1,v2,v3 # outmask
453 le?vxor v3,v3,v4
454 lvx v1,0,$out # outhead
455 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
456 vsel v1,v1,v0,v2
457 lvx v4,$idx,$out
458 stvx v1,0,$out
459 vsel v0,v0,v4,v2
460 stvx v0,$idx,$out
462 mtspr 256,$vrsave
464 .long 0
465 .byte 0,12,0x14,0,0,0,3,0
466 .long 0
467 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
470 &gen_block("en");
471 &gen_block("de");
473 #########################################################################
474 {{{ # CBC en- and decrypt procedures #
475 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
476 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
477 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
478 map("v$_",(4..10));
479 $code.=<<___;
480 .globl .${prefix}_cbc_encrypt
481 ${UCMP}i $len,16
482 bltlr-
484 cmpwi $enc,0 # test direction
485 lis r0,0xffe0
486 mfspr $vrsave,256
487 mtspr 256,r0
489 li $idx,15
490 vxor $rndkey0,$rndkey0,$rndkey0
491 le?vspltisb $tmp,0x0f
493 lvx $ivec,0,$ivp # load [unaligned] iv
494 lvsl $inpperm,0,$ivp
495 lvx $inptail,$idx,$ivp
496 le?vxor $inpperm,$inpperm,$tmp
497 vperm $ivec,$ivec,$inptail,$inpperm
499 neg r11,$inp
500 ?lvsl $keyperm,0,$key # prepare for unaligned key
501 lwz $rounds,240($key)
503 lvsr $inpperm,0,r11 # prepare for unaligned load
504 lvx $inptail,0,$inp
505 addi $inp,$inp,15 # 15 is not typo
506 le?vxor $inpperm,$inpperm,$tmp
508 ?lvsr $outperm,0,$out # prepare for unaligned store
509 vspltisb $outmask,-1
510 lvx $outhead,0,$out
511 ?vperm $outmask,$rndkey0,$outmask,$outperm
512 le?vxor $outperm,$outperm,$tmp
514 srwi $rounds,$rounds,1
515 li $idx,16
516 subi $rounds,$rounds,1
517 beq Lcbc_dec
519 Lcbc_enc:
520 vmr $inout,$inptail
521 lvx $inptail,0,$inp
522 addi $inp,$inp,16
523 mtctr $rounds
524 subi $len,$len,16 # len-=16
526 lvx $rndkey0,0,$key
527 vperm $inout,$inout,$inptail,$inpperm
528 lvx $rndkey1,$idx,$key
529 addi $idx,$idx,16
530 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
531 vxor $inout,$inout,$rndkey0
532 lvx $rndkey0,$idx,$key
533 addi $idx,$idx,16
534 vxor $inout,$inout,$ivec
536 Loop_cbc_enc:
537 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
538 vcipher $inout,$inout,$rndkey1
539 lvx $rndkey1,$idx,$key
540 addi $idx,$idx,16
541 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
542 vcipher $inout,$inout,$rndkey0
543 lvx $rndkey0,$idx,$key
544 addi $idx,$idx,16
545 bdnz Loop_cbc_enc
547 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
548 vcipher $inout,$inout,$rndkey1
549 lvx $rndkey1,$idx,$key
550 li $idx,16
551 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
552 vcipherlast $ivec,$inout,$rndkey0
553 ${UCMP}i $len,16
555 vperm $tmp,$ivec,$ivec,$outperm
556 vsel $inout,$outhead,$tmp,$outmask
557 vmr $outhead,$tmp
558 stvx $inout,0,$out
559 addi $out,$out,16
560 bge Lcbc_enc
562 b Lcbc_done
564 .align 4
565 Lcbc_dec:
566 ${UCMP}i $len,128
567 bge _aesp8_cbc_decrypt8x
568 vmr $tmp,$inptail
569 lvx $inptail,0,$inp
570 addi $inp,$inp,16
571 mtctr $rounds
572 subi $len,$len,16 # len-=16
574 lvx $rndkey0,0,$key
575 vperm $tmp,$tmp,$inptail,$inpperm
576 lvx $rndkey1,$idx,$key
577 addi $idx,$idx,16
578 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
579 vxor $inout,$tmp,$rndkey0
580 lvx $rndkey0,$idx,$key
581 addi $idx,$idx,16
583 Loop_cbc_dec:
584 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
585 vncipher $inout,$inout,$rndkey1
586 lvx $rndkey1,$idx,$key
587 addi $idx,$idx,16
588 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
589 vncipher $inout,$inout,$rndkey0
590 lvx $rndkey0,$idx,$key
591 addi $idx,$idx,16
592 bdnz Loop_cbc_dec
594 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
595 vncipher $inout,$inout,$rndkey1
596 lvx $rndkey1,$idx,$key
597 li $idx,16
598 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
599 vncipherlast $inout,$inout,$rndkey0
600 ${UCMP}i $len,16
602 vxor $inout,$inout,$ivec
603 vmr $ivec,$tmp
604 vperm $tmp,$inout,$inout,$outperm
605 vsel $inout,$outhead,$tmp,$outmask
606 vmr $outhead,$tmp
607 stvx $inout,0,$out
608 addi $out,$out,16
609 bge Lcbc_dec
611 Lcbc_done:
612 addi $out,$out,-1
613 lvx $inout,0,$out # redundant in aligned case
614 vsel $inout,$outhead,$inout,$outmask
615 stvx $inout,0,$out
617 neg $enc,$ivp # write [unaligned] iv
618 li $idx,15 # 15 is not typo
619 vxor $rndkey0,$rndkey0,$rndkey0
620 vspltisb $outmask,-1
621 le?vspltisb $tmp,0x0f
622 ?lvsl $outperm,0,$enc
623 ?vperm $outmask,$rndkey0,$outmask,$outperm
624 le?vxor $outperm,$outperm,$tmp
625 lvx $outhead,0,$ivp
626 vperm $ivec,$ivec,$ivec,$outperm
627 vsel $inout,$outhead,$ivec,$outmask
628 lvx $inptail,$idx,$ivp
629 stvx $inout,0,$ivp
630 vsel $inout,$ivec,$inptail,$outmask
631 stvx $inout,$idx,$ivp
633 mtspr 256,$vrsave
635 .long 0
636 .byte 0,12,0x14,0,0,0,6,0
637 .long 0
639 #########################################################################
640 {{ # Optimized CBC decrypt procedure #
641 my $key_="r11";
642 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
643 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
644 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
645 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
646 # v26-v31 last 6 round keys
647 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
649 $code.=<<___;
650 .align 5
651 _aesp8_cbc_decrypt8x:
652 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
653 li r10,`$FRAME+8*16+15`
654 li r11,`$FRAME+8*16+31`
655 stvx v20,r10,$sp # ABI says so
656 addi r10,r10,32
657 stvx v21,r11,$sp
658 addi r11,r11,32
659 stvx v22,r10,$sp
660 addi r10,r10,32
661 stvx v23,r11,$sp
662 addi r11,r11,32
663 stvx v24,r10,$sp
664 addi r10,r10,32
665 stvx v25,r11,$sp
666 addi r11,r11,32
667 stvx v26,r10,$sp
668 addi r10,r10,32
669 stvx v27,r11,$sp
670 addi r11,r11,32
671 stvx v28,r10,$sp
672 addi r10,r10,32
673 stvx v29,r11,$sp
674 addi r11,r11,32
675 stvx v30,r10,$sp
676 stvx v31,r11,$sp
677 li r0,-1
678 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
679 li $x10,0x10
680 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
681 li $x20,0x20
682 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
683 li $x30,0x30
684 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
685 li $x40,0x40
686 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
687 li $x50,0x50
688 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
689 li $x60,0x60
690 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
691 li $x70,0x70
692 mtspr 256,r0
694 subi $rounds,$rounds,3 # -4 in total
695 subi $len,$len,128 # bias
697 lvx $rndkey0,$x00,$key # load key schedule
698 lvx v30,$x10,$key
699 addi $key,$key,0x20
700 lvx v31,$x00,$key
701 ?vperm $rndkey0,$rndkey0,v30,$keyperm
702 addi $key_,$sp,$FRAME+15
703 mtctr $rounds
705 Load_cbc_dec_key:
706 ?vperm v24,v30,v31,$keyperm
707 lvx v30,$x10,$key
708 addi $key,$key,0x20
709 stvx v24,$x00,$key_ # off-load round[1]
710 ?vperm v25,v31,v30,$keyperm
711 lvx v31,$x00,$key
712 stvx v25,$x10,$key_ # off-load round[2]
713 addi $key_,$key_,0x20
714 bdnz Load_cbc_dec_key
716 lvx v26,$x10,$key
717 ?vperm v24,v30,v31,$keyperm
718 lvx v27,$x20,$key
719 stvx v24,$x00,$key_ # off-load round[3]
720 ?vperm v25,v31,v26,$keyperm
721 lvx v28,$x30,$key
722 stvx v25,$x10,$key_ # off-load round[4]
723 addi $key_,$sp,$FRAME+15 # rewind $key_
724 ?vperm v26,v26,v27,$keyperm
725 lvx v29,$x40,$key
726 ?vperm v27,v27,v28,$keyperm
727 lvx v30,$x50,$key
728 ?vperm v28,v28,v29,$keyperm
729 lvx v31,$x60,$key
730 ?vperm v29,v29,v30,$keyperm
731 lvx $out0,$x70,$key # borrow $out0
732 ?vperm v30,v30,v31,$keyperm
733 lvx v24,$x00,$key_ # pre-load round[1]
734 ?vperm v31,v31,$out0,$keyperm
735 lvx v25,$x10,$key_ # pre-load round[2]
737 #lvx $inptail,0,$inp # "caller" already did this
738 #addi $inp,$inp,15 # 15 is not typo
739 subi $inp,$inp,15 # undo "caller"
741 le?li $idx,8
742 lvx_u $in0,$x00,$inp # load first 8 "words"
743 le?lvsl $inpperm,0,$idx
744 le?vspltisb $tmp,0x0f
745 lvx_u $in1,$x10,$inp
746 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
747 lvx_u $in2,$x20,$inp
748 le?vperm $in0,$in0,$in0,$inpperm
749 lvx_u $in3,$x30,$inp
750 le?vperm $in1,$in1,$in1,$inpperm
751 lvx_u $in4,$x40,$inp
752 le?vperm $in2,$in2,$in2,$inpperm
753 vxor $out0,$in0,$rndkey0
754 lvx_u $in5,$x50,$inp
755 le?vperm $in3,$in3,$in3,$inpperm
756 vxor $out1,$in1,$rndkey0
757 lvx_u $in6,$x60,$inp
758 le?vperm $in4,$in4,$in4,$inpperm
759 vxor $out2,$in2,$rndkey0
760 lvx_u $in7,$x70,$inp
761 addi $inp,$inp,0x80
762 le?vperm $in5,$in5,$in5,$inpperm
763 vxor $out3,$in3,$rndkey0
764 le?vperm $in6,$in6,$in6,$inpperm
765 vxor $out4,$in4,$rndkey0
766 le?vperm $in7,$in7,$in7,$inpperm
767 vxor $out5,$in5,$rndkey0
768 vxor $out6,$in6,$rndkey0
769 vxor $out7,$in7,$rndkey0
771 mtctr $rounds
772 b Loop_cbc_dec8x
773 .align 5
774 Loop_cbc_dec8x:
775 vncipher $out0,$out0,v24
776 vncipher $out1,$out1,v24
777 vncipher $out2,$out2,v24
778 vncipher $out3,$out3,v24
779 vncipher $out4,$out4,v24
780 vncipher $out5,$out5,v24
781 vncipher $out6,$out6,v24
782 vncipher $out7,$out7,v24
783 lvx v24,$x20,$key_ # round[3]
784 addi $key_,$key_,0x20
786 vncipher $out0,$out0,v25
787 vncipher $out1,$out1,v25
788 vncipher $out2,$out2,v25
789 vncipher $out3,$out3,v25
790 vncipher $out4,$out4,v25
791 vncipher $out5,$out5,v25
792 vncipher $out6,$out6,v25
793 vncipher $out7,$out7,v25
794 lvx v25,$x10,$key_ # round[4]
795 bdnz Loop_cbc_dec8x
797 subic $len,$len,128 # $len-=128
798 vncipher $out0,$out0,v24
799 vncipher $out1,$out1,v24
800 vncipher $out2,$out2,v24
801 vncipher $out3,$out3,v24
802 vncipher $out4,$out4,v24
803 vncipher $out5,$out5,v24
804 vncipher $out6,$out6,v24
805 vncipher $out7,$out7,v24
807 subfe. r0,r0,r0 # borrow?-1:0
808 vncipher $out0,$out0,v25
809 vncipher $out1,$out1,v25
810 vncipher $out2,$out2,v25
811 vncipher $out3,$out3,v25
812 vncipher $out4,$out4,v25
813 vncipher $out5,$out5,v25
814 vncipher $out6,$out6,v25
815 vncipher $out7,$out7,v25
817 and r0,r0,$len
818 vncipher $out0,$out0,v26
819 vncipher $out1,$out1,v26
820 vncipher $out2,$out2,v26
821 vncipher $out3,$out3,v26
822 vncipher $out4,$out4,v26
823 vncipher $out5,$out5,v26
824 vncipher $out6,$out6,v26
825 vncipher $out7,$out7,v26
827 add $inp,$inp,r0 # $inp is adjusted in such
828 # way that at exit from the
829 # loop inX-in7 are loaded
830 # with last "words"
831 vncipher $out0,$out0,v27
832 vncipher $out1,$out1,v27
833 vncipher $out2,$out2,v27
834 vncipher $out3,$out3,v27
835 vncipher $out4,$out4,v27
836 vncipher $out5,$out5,v27
837 vncipher $out6,$out6,v27
838 vncipher $out7,$out7,v27
840 addi $key_,$sp,$FRAME+15 # rewind $key_
841 vncipher $out0,$out0,v28
842 vncipher $out1,$out1,v28
843 vncipher $out2,$out2,v28
844 vncipher $out3,$out3,v28
845 vncipher $out4,$out4,v28
846 vncipher $out5,$out5,v28
847 vncipher $out6,$out6,v28
848 vncipher $out7,$out7,v28
849 lvx v24,$x00,$key_ # re-pre-load round[1]
851 vncipher $out0,$out0,v29
852 vncipher $out1,$out1,v29
853 vncipher $out2,$out2,v29
854 vncipher $out3,$out3,v29
855 vncipher $out4,$out4,v29
856 vncipher $out5,$out5,v29
857 vncipher $out6,$out6,v29
858 vncipher $out7,$out7,v29
859 lvx v25,$x10,$key_ # re-pre-load round[2]
861 vncipher $out0,$out0,v30
862 vxor $ivec,$ivec,v31 # xor with last round key
863 vncipher $out1,$out1,v30
864 vxor $in0,$in0,v31
865 vncipher $out2,$out2,v30
866 vxor $in1,$in1,v31
867 vncipher $out3,$out3,v30
868 vxor $in2,$in2,v31
869 vncipher $out4,$out4,v30
870 vxor $in3,$in3,v31
871 vncipher $out5,$out5,v30
872 vxor $in4,$in4,v31
873 vncipher $out6,$out6,v30
874 vxor $in5,$in5,v31
875 vncipher $out7,$out7,v30
876 vxor $in6,$in6,v31
878 vncipherlast $out0,$out0,$ivec
879 vncipherlast $out1,$out1,$in0
880 lvx_u $in0,$x00,$inp # load next input block
881 vncipherlast $out2,$out2,$in1
882 lvx_u $in1,$x10,$inp
883 vncipherlast $out3,$out3,$in2
884 le?vperm $in0,$in0,$in0,$inpperm
885 lvx_u $in2,$x20,$inp
886 vncipherlast $out4,$out4,$in3
887 le?vperm $in1,$in1,$in1,$inpperm
888 lvx_u $in3,$x30,$inp
889 vncipherlast $out5,$out5,$in4
890 le?vperm $in2,$in2,$in2,$inpperm
891 lvx_u $in4,$x40,$inp
892 vncipherlast $out6,$out6,$in5
893 le?vperm $in3,$in3,$in3,$inpperm
894 lvx_u $in5,$x50,$inp
895 vncipherlast $out7,$out7,$in6
896 le?vperm $in4,$in4,$in4,$inpperm
897 lvx_u $in6,$x60,$inp
898 vmr $ivec,$in7
899 le?vperm $in5,$in5,$in5,$inpperm
900 lvx_u $in7,$x70,$inp
901 addi $inp,$inp,0x80
903 le?vperm $out0,$out0,$out0,$inpperm
904 le?vperm $out1,$out1,$out1,$inpperm
905 stvx_u $out0,$x00,$out
906 le?vperm $in6,$in6,$in6,$inpperm
907 vxor $out0,$in0,$rndkey0
908 le?vperm $out2,$out2,$out2,$inpperm
909 stvx_u $out1,$x10,$out
910 le?vperm $in7,$in7,$in7,$inpperm
911 vxor $out1,$in1,$rndkey0
912 le?vperm $out3,$out3,$out3,$inpperm
913 stvx_u $out2,$x20,$out
914 vxor $out2,$in2,$rndkey0
915 le?vperm $out4,$out4,$out4,$inpperm
916 stvx_u $out3,$x30,$out
917 vxor $out3,$in3,$rndkey0
918 le?vperm $out5,$out5,$out5,$inpperm
919 stvx_u $out4,$x40,$out
920 vxor $out4,$in4,$rndkey0
921 le?vperm $out6,$out6,$out6,$inpperm
922 stvx_u $out5,$x50,$out
923 vxor $out5,$in5,$rndkey0
924 le?vperm $out7,$out7,$out7,$inpperm
925 stvx_u $out6,$x60,$out
926 vxor $out6,$in6,$rndkey0
927 stvx_u $out7,$x70,$out
928 addi $out,$out,0x80
929 vxor $out7,$in7,$rndkey0
931 mtctr $rounds
932 beq Loop_cbc_dec8x # did $len-=128 borrow?
934 addic. $len,$len,128
935 beq Lcbc_dec8x_done
939 Loop_cbc_dec8x_tail: # up to 7 "words" tail...
940 vncipher $out1,$out1,v24
941 vncipher $out2,$out2,v24
942 vncipher $out3,$out3,v24
943 vncipher $out4,$out4,v24
944 vncipher $out5,$out5,v24
945 vncipher $out6,$out6,v24
946 vncipher $out7,$out7,v24
947 lvx v24,$x20,$key_ # round[3]
948 addi $key_,$key_,0x20
950 vncipher $out1,$out1,v25
951 vncipher $out2,$out2,v25
952 vncipher $out3,$out3,v25
953 vncipher $out4,$out4,v25
954 vncipher $out5,$out5,v25
955 vncipher $out6,$out6,v25
956 vncipher $out7,$out7,v25
957 lvx v25,$x10,$key_ # round[4]
958 bdnz Loop_cbc_dec8x_tail
960 vncipher $out1,$out1,v24
961 vncipher $out2,$out2,v24
962 vncipher $out3,$out3,v24
963 vncipher $out4,$out4,v24
964 vncipher $out5,$out5,v24
965 vncipher $out6,$out6,v24
966 vncipher $out7,$out7,v24
968 vncipher $out1,$out1,v25
969 vncipher $out2,$out2,v25
970 vncipher $out3,$out3,v25
971 vncipher $out4,$out4,v25
972 vncipher $out5,$out5,v25
973 vncipher $out6,$out6,v25
974 vncipher $out7,$out7,v25
976 vncipher $out1,$out1,v26
977 vncipher $out2,$out2,v26
978 vncipher $out3,$out3,v26
979 vncipher $out4,$out4,v26
980 vncipher $out5,$out5,v26
981 vncipher $out6,$out6,v26
982 vncipher $out7,$out7,v26
984 vncipher $out1,$out1,v27
985 vncipher $out2,$out2,v27
986 vncipher $out3,$out3,v27
987 vncipher $out4,$out4,v27
988 vncipher $out5,$out5,v27
989 vncipher $out6,$out6,v27
990 vncipher $out7,$out7,v27
992 vncipher $out1,$out1,v28
993 vncipher $out2,$out2,v28
994 vncipher $out3,$out3,v28
995 vncipher $out4,$out4,v28
996 vncipher $out5,$out5,v28
997 vncipher $out6,$out6,v28
998 vncipher $out7,$out7,v28
1000 vncipher $out1,$out1,v29
1001 vncipher $out2,$out2,v29
1002 vncipher $out3,$out3,v29
1003 vncipher $out4,$out4,v29
1004 vncipher $out5,$out5,v29
1005 vncipher $out6,$out6,v29
1006 vncipher $out7,$out7,v29
1008 vncipher $out1,$out1,v30
1009 vxor $ivec,$ivec,v31 # last round key
1010 vncipher $out2,$out2,v30
1011 vxor $in1,$in1,v31
1012 vncipher $out3,$out3,v30
1013 vxor $in2,$in2,v31
1014 vncipher $out4,$out4,v30
1015 vxor $in3,$in3,v31
1016 vncipher $out5,$out5,v30
1017 vxor $in4,$in4,v31
1018 vncipher $out6,$out6,v30
1019 vxor $in5,$in5,v31
1020 vncipher $out7,$out7,v30
1021 vxor $in6,$in6,v31
1023 cmplwi $len,32 # switch($len)
1024 blt Lcbc_dec8x_one
1026 beq Lcbc_dec8x_two
1027 cmplwi $len,64
1028 blt Lcbc_dec8x_three
1030 beq Lcbc_dec8x_four
1031 cmplwi $len,96
1032 blt Lcbc_dec8x_five
1034 beq Lcbc_dec8x_six
1036 Lcbc_dec8x_seven:
1037 vncipherlast $out1,$out1,$ivec
1038 vncipherlast $out2,$out2,$in1
1039 vncipherlast $out3,$out3,$in2
1040 vncipherlast $out4,$out4,$in3
1041 vncipherlast $out5,$out5,$in4
1042 vncipherlast $out6,$out6,$in5
1043 vncipherlast $out7,$out7,$in6
1044 vmr $ivec,$in7
1046 le?vperm $out1,$out1,$out1,$inpperm
1047 le?vperm $out2,$out2,$out2,$inpperm
1048 stvx_u $out1,$x00,$out
1049 le?vperm $out3,$out3,$out3,$inpperm
1050 stvx_u $out2,$x10,$out
1051 le?vperm $out4,$out4,$out4,$inpperm
1052 stvx_u $out3,$x20,$out
1053 le?vperm $out5,$out5,$out5,$inpperm
1054 stvx_u $out4,$x30,$out
1055 le?vperm $out6,$out6,$out6,$inpperm
1056 stvx_u $out5,$x40,$out
1057 le?vperm $out7,$out7,$out7,$inpperm
1058 stvx_u $out6,$x50,$out
1059 stvx_u $out7,$x60,$out
1060 addi $out,$out,0x70
1061 b Lcbc_dec8x_done
1063 .align 5
1064 Lcbc_dec8x_six:
1065 vncipherlast $out2,$out2,$ivec
1066 vncipherlast $out3,$out3,$in2
1067 vncipherlast $out4,$out4,$in3
1068 vncipherlast $out5,$out5,$in4
1069 vncipherlast $out6,$out6,$in5
1070 vncipherlast $out7,$out7,$in6
1071 vmr $ivec,$in7
1073 le?vperm $out2,$out2,$out2,$inpperm
1074 le?vperm $out3,$out3,$out3,$inpperm
1075 stvx_u $out2,$x00,$out
1076 le?vperm $out4,$out4,$out4,$inpperm
1077 stvx_u $out3,$x10,$out
1078 le?vperm $out5,$out5,$out5,$inpperm
1079 stvx_u $out4,$x20,$out
1080 le?vperm $out6,$out6,$out6,$inpperm
1081 stvx_u $out5,$x30,$out
1082 le?vperm $out7,$out7,$out7,$inpperm
1083 stvx_u $out6,$x40,$out
1084 stvx_u $out7,$x50,$out
1085 addi $out,$out,0x60
1086 b Lcbc_dec8x_done
1088 .align 5
1089 Lcbc_dec8x_five:
1090 vncipherlast $out3,$out3,$ivec
1091 vncipherlast $out4,$out4,$in3
1092 vncipherlast $out5,$out5,$in4
1093 vncipherlast $out6,$out6,$in5
1094 vncipherlast $out7,$out7,$in6
1095 vmr $ivec,$in7
1097 le?vperm $out3,$out3,$out3,$inpperm
1098 le?vperm $out4,$out4,$out4,$inpperm
1099 stvx_u $out3,$x00,$out
1100 le?vperm $out5,$out5,$out5,$inpperm
1101 stvx_u $out4,$x10,$out
1102 le?vperm $out6,$out6,$out6,$inpperm
1103 stvx_u $out5,$x20,$out
1104 le?vperm $out7,$out7,$out7,$inpperm
1105 stvx_u $out6,$x30,$out
1106 stvx_u $out7,$x40,$out
1107 addi $out,$out,0x50
1108 b Lcbc_dec8x_done
1110 .align 5
1111 Lcbc_dec8x_four:
1112 vncipherlast $out4,$out4,$ivec
1113 vncipherlast $out5,$out5,$in4
1114 vncipherlast $out6,$out6,$in5
1115 vncipherlast $out7,$out7,$in6
1116 vmr $ivec,$in7
1118 le?vperm $out4,$out4,$out4,$inpperm
1119 le?vperm $out5,$out5,$out5,$inpperm
1120 stvx_u $out4,$x00,$out
1121 le?vperm $out6,$out6,$out6,$inpperm
1122 stvx_u $out5,$x10,$out
1123 le?vperm $out7,$out7,$out7,$inpperm
1124 stvx_u $out6,$x20,$out
1125 stvx_u $out7,$x30,$out
1126 addi $out,$out,0x40
1127 b Lcbc_dec8x_done
1129 .align 5
1130 Lcbc_dec8x_three:
1131 vncipherlast $out5,$out5,$ivec
1132 vncipherlast $out6,$out6,$in5
1133 vncipherlast $out7,$out7,$in6
1134 vmr $ivec,$in7
1136 le?vperm $out5,$out5,$out5,$inpperm
1137 le?vperm $out6,$out6,$out6,$inpperm
1138 stvx_u $out5,$x00,$out
1139 le?vperm $out7,$out7,$out7,$inpperm
1140 stvx_u $out6,$x10,$out
1141 stvx_u $out7,$x20,$out
1142 addi $out,$out,0x30
1143 b Lcbc_dec8x_done
1145 .align 5
1146 Lcbc_dec8x_two:
1147 vncipherlast $out6,$out6,$ivec
1148 vncipherlast $out7,$out7,$in6
1149 vmr $ivec,$in7
1151 le?vperm $out6,$out6,$out6,$inpperm
1152 le?vperm $out7,$out7,$out7,$inpperm
1153 stvx_u $out6,$x00,$out
1154 stvx_u $out7,$x10,$out
1155 addi $out,$out,0x20
1156 b Lcbc_dec8x_done
1158 .align 5
1159 Lcbc_dec8x_one:
1160 vncipherlast $out7,$out7,$ivec
1161 vmr $ivec,$in7
1163 le?vperm $out7,$out7,$out7,$inpperm
1164 stvx_u $out7,0,$out
1165 addi $out,$out,0x10
1167 Lcbc_dec8x_done:
1168 le?vperm $ivec,$ivec,$ivec,$inpperm
1169 stvx_u $ivec,0,$ivp # write [unaligned] iv
1171 li r10,`$FRAME+15`
1172 li r11,`$FRAME+31`
1173 stvx $inpperm,r10,$sp # wipe copies of round keys
1174 addi r10,r10,32
1175 stvx $inpperm,r11,$sp
1176 addi r11,r11,32
1177 stvx $inpperm,r10,$sp
1178 addi r10,r10,32
1179 stvx $inpperm,r11,$sp
1180 addi r11,r11,32
1181 stvx $inpperm,r10,$sp
1182 addi r10,r10,32
1183 stvx $inpperm,r11,$sp
1184 addi r11,r11,32
1185 stvx $inpperm,r10,$sp
1186 addi r10,r10,32
1187 stvx $inpperm,r11,$sp
1188 addi r11,r11,32
1190 mtspr 256,$vrsave
1191 lvx v20,r10,$sp # ABI says so
1192 addi r10,r10,32
1193 lvx v21,r11,$sp
1194 addi r11,r11,32
1195 lvx v22,r10,$sp
1196 addi r10,r10,32
1197 lvx v23,r11,$sp
1198 addi r11,r11,32
1199 lvx v24,r10,$sp
1200 addi r10,r10,32
1201 lvx v25,r11,$sp
1202 addi r11,r11,32
1203 lvx v26,r10,$sp
1204 addi r10,r10,32
1205 lvx v27,r11,$sp
1206 addi r11,r11,32
1207 lvx v28,r10,$sp
1208 addi r10,r10,32
1209 lvx v29,r11,$sp
1210 addi r11,r11,32
1211 lvx v30,r10,$sp
1212 lvx v31,r11,$sp
1213 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1214 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1215 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1216 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1217 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1218 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1219 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1221 .long 0
1222 .byte 0,12,0x14,0,0x80,6,6,0
1223 .long 0
1224 .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1226 }} }}}
1228 #########################################################################
1229 {{{ # CTR procedure[s] #
1230 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1231 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1232 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1233 map("v$_",(4..11));
1234 my $dat=$tmp;
1236 $code.=<<___;
1237 .globl .${prefix}_ctr32_encrypt_blocks
1238 ${UCMP}i $len,1
1239 bltlr-
1241 lis r0,0xfff0
1242 mfspr $vrsave,256
1243 mtspr 256,r0
1245 li $idx,15
1246 vxor $rndkey0,$rndkey0,$rndkey0
1247 le?vspltisb $tmp,0x0f
1249 lvx $ivec,0,$ivp # load [unaligned] iv
1250 lvsl $inpperm,0,$ivp
1251 lvx $inptail,$idx,$ivp
1252 vspltisb $one,1
1253 le?vxor $inpperm,$inpperm,$tmp
1254 vperm $ivec,$ivec,$inptail,$inpperm
1255 vsldoi $one,$rndkey0,$one,1
1257 neg r11,$inp
1258 ?lvsl $keyperm,0,$key # prepare for unaligned key
1259 lwz $rounds,240($key)
1261 lvsr $inpperm,0,r11 # prepare for unaligned load
1262 lvx $inptail,0,$inp
1263 addi $inp,$inp,15 # 15 is not typo
1264 le?vxor $inpperm,$inpperm,$tmp
1266 srwi $rounds,$rounds,1
1267 li $idx,16
1268 subi $rounds,$rounds,1
1270 ${UCMP}i $len,8
1271 bge _aesp8_ctr32_encrypt8x
1273 ?lvsr $outperm,0,$out # prepare for unaligned store
1274 vspltisb $outmask,-1
1275 lvx $outhead,0,$out
1276 ?vperm $outmask,$rndkey0,$outmask,$outperm
1277 le?vxor $outperm,$outperm,$tmp
1279 lvx $rndkey0,0,$key
1280 mtctr $rounds
1281 lvx $rndkey1,$idx,$key
1282 addi $idx,$idx,16
1283 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1284 vxor $inout,$ivec,$rndkey0
1285 lvx $rndkey0,$idx,$key
1286 addi $idx,$idx,16
1287 b Loop_ctr32_enc
1289 .align 5
1290 Loop_ctr32_enc:
1291 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1292 vcipher $inout,$inout,$rndkey1
1293 lvx $rndkey1,$idx,$key
1294 addi $idx,$idx,16
1295 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1296 vcipher $inout,$inout,$rndkey0
1297 lvx $rndkey0,$idx,$key
1298 addi $idx,$idx,16
1299 bdnz Loop_ctr32_enc
1301 vadduwm $ivec,$ivec,$one
1302 vmr $dat,$inptail
1303 lvx $inptail,0,$inp
1304 addi $inp,$inp,16
1305 subic. $len,$len,1 # blocks--
1307 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1308 vcipher $inout,$inout,$rndkey1
1309 lvx $rndkey1,$idx,$key
1310 vperm $dat,$dat,$inptail,$inpperm
1311 li $idx,16
1312 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1313 lvx $rndkey0,0,$key
1314 vxor $dat,$dat,$rndkey1 # last round key
1315 vcipherlast $inout,$inout,$dat
1317 lvx $rndkey1,$idx,$key
1318 addi $idx,$idx,16
1319 vperm $inout,$inout,$inout,$outperm
1320 vsel $dat,$outhead,$inout,$outmask
1321 mtctr $rounds
1322 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1323 vmr $outhead,$inout
1324 vxor $inout,$ivec,$rndkey0
1325 lvx $rndkey0,$idx,$key
1326 addi $idx,$idx,16
1327 stvx $dat,0,$out
1328 addi $out,$out,16
1329 bne Loop_ctr32_enc
1331 addi $out,$out,-1
1332 lvx $inout,0,$out # redundant in aligned case
1333 vsel $inout,$outhead,$inout,$outmask
1334 stvx $inout,0,$out
1336 mtspr 256,$vrsave
1338 .long 0
1339 .byte 0,12,0x14,0,0,0,6,0
1340 .long 0
1342 #########################################################################
1343 {{ # Optimized CTR procedure #
1344 my $key_="r11";
1345 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1346 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1347 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1348 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1349 # v26-v31 last 6 round keys
1350 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1351 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1353 $code.=<<___;
1354 .align 5
1355 _aesp8_ctr32_encrypt8x:
1356 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1357 li r10,`$FRAME+8*16+15`
1358 li r11,`$FRAME+8*16+31`
1359 stvx v20,r10,$sp # ABI says so
1360 addi r10,r10,32
1361 stvx v21,r11,$sp
1362 addi r11,r11,32
1363 stvx v22,r10,$sp
1364 addi r10,r10,32
1365 stvx v23,r11,$sp
1366 addi r11,r11,32
1367 stvx v24,r10,$sp
1368 addi r10,r10,32
1369 stvx v25,r11,$sp
1370 addi r11,r11,32
1371 stvx v26,r10,$sp
1372 addi r10,r10,32
1373 stvx v27,r11,$sp
1374 addi r11,r11,32
1375 stvx v28,r10,$sp
1376 addi r10,r10,32
1377 stvx v29,r11,$sp
1378 addi r11,r11,32
1379 stvx v30,r10,$sp
1380 stvx v31,r11,$sp
1381 li r0,-1
1382 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1383 li $x10,0x10
1384 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1385 li $x20,0x20
1386 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1387 li $x30,0x30
1388 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1389 li $x40,0x40
1390 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1391 li $x50,0x50
1392 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1393 li $x60,0x60
1394 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1395 li $x70,0x70
1396 mtspr 256,r0
1398 subi $rounds,$rounds,3 # -4 in total
1400 lvx $rndkey0,$x00,$key # load key schedule
1401 lvx v30,$x10,$key
1402 addi $key,$key,0x20
1403 lvx v31,$x00,$key
1404 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1405 addi $key_,$sp,$FRAME+15
1406 mtctr $rounds
1408 Load_ctr32_enc_key:
1409 ?vperm v24,v30,v31,$keyperm
1410 lvx v30,$x10,$key
1411 addi $key,$key,0x20
1412 stvx v24,$x00,$key_ # off-load round[1]
1413 ?vperm v25,v31,v30,$keyperm
1414 lvx v31,$x00,$key
1415 stvx v25,$x10,$key_ # off-load round[2]
1416 addi $key_,$key_,0x20
1417 bdnz Load_ctr32_enc_key
1419 lvx v26,$x10,$key
1420 ?vperm v24,v30,v31,$keyperm
1421 lvx v27,$x20,$key
1422 stvx v24,$x00,$key_ # off-load round[3]
1423 ?vperm v25,v31,v26,$keyperm
1424 lvx v28,$x30,$key
1425 stvx v25,$x10,$key_ # off-load round[4]
1426 addi $key_,$sp,$FRAME+15 # rewind $key_
1427 ?vperm v26,v26,v27,$keyperm
1428 lvx v29,$x40,$key
1429 ?vperm v27,v27,v28,$keyperm
1430 lvx v30,$x50,$key
1431 ?vperm v28,v28,v29,$keyperm
1432 lvx v31,$x60,$key
1433 ?vperm v29,v29,v30,$keyperm
1434 lvx $out0,$x70,$key # borrow $out0
1435 ?vperm v30,v30,v31,$keyperm
1436 lvx v24,$x00,$key_ # pre-load round[1]
1437 ?vperm v31,v31,$out0,$keyperm
1438 lvx v25,$x10,$key_ # pre-load round[2]
1440 vadduqm $two,$one,$one
1441 subi $inp,$inp,15 # undo "caller"
1442 $SHL $len,$len,4
1444 vadduqm $out1,$ivec,$one # counter values ...
1445 vadduqm $out2,$ivec,$two
1446 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1447 le?li $idx,8
1448 vadduqm $out3,$out1,$two
1449 vxor $out1,$out1,$rndkey0
1450 le?lvsl $inpperm,0,$idx
1451 vadduqm $out4,$out2,$two
1452 vxor $out2,$out2,$rndkey0
1453 le?vspltisb $tmp,0x0f
1454 vadduqm $out5,$out3,$two
1455 vxor $out3,$out3,$rndkey0
1456 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1457 vadduqm $out6,$out4,$two
1458 vxor $out4,$out4,$rndkey0
1459 vadduqm $out7,$out5,$two
1460 vxor $out5,$out5,$rndkey0
1461 vadduqm $ivec,$out6,$two # next counter value
1462 vxor $out6,$out6,$rndkey0
1463 vxor $out7,$out7,$rndkey0
1465 mtctr $rounds
1466 b Loop_ctr32_enc8x
1467 .align 5
1468 Loop_ctr32_enc8x:
1469 vcipher $out0,$out0,v24
1470 vcipher $out1,$out1,v24
1471 vcipher $out2,$out2,v24
1472 vcipher $out3,$out3,v24
1473 vcipher $out4,$out4,v24
1474 vcipher $out5,$out5,v24
1475 vcipher $out6,$out6,v24
1476 vcipher $out7,$out7,v24
1477 Loop_ctr32_enc8x_middle:
1478 lvx v24,$x20,$key_ # round[3]
1479 addi $key_,$key_,0x20
1481 vcipher $out0,$out0,v25
1482 vcipher $out1,$out1,v25
1483 vcipher $out2,$out2,v25
1484 vcipher $out3,$out3,v25
1485 vcipher $out4,$out4,v25
1486 vcipher $out5,$out5,v25
1487 vcipher $out6,$out6,v25
1488 vcipher $out7,$out7,v25
1489 lvx v25,$x10,$key_ # round[4]
1490 bdnz Loop_ctr32_enc8x
1492 subic r11,$len,256 # $len-256, borrow $key_
1493 vcipher $out0,$out0,v24
1494 vcipher $out1,$out1,v24
1495 vcipher $out2,$out2,v24
1496 vcipher $out3,$out3,v24
1497 vcipher $out4,$out4,v24
1498 vcipher $out5,$out5,v24
1499 vcipher $out6,$out6,v24
1500 vcipher $out7,$out7,v24
1502 subfe r0,r0,r0 # borrow?-1:0
1503 vcipher $out0,$out0,v25
1504 vcipher $out1,$out1,v25
1505 vcipher $out2,$out2,v25
1506 vcipher $out3,$out3,v25
1507 vcipher $out4,$out4,v25
1508 vcipher $out5,$out5,v25
1509 vcipher $out6,$out6,v25
1510 vcipher $out7,$out7,v25
1512 and r0,r0,r11
1513 addi $key_,$sp,$FRAME+15 # rewind $key_
1514 vcipher $out0,$out0,v26
1515 vcipher $out1,$out1,v26
1516 vcipher $out2,$out2,v26
1517 vcipher $out3,$out3,v26
1518 vcipher $out4,$out4,v26
1519 vcipher $out5,$out5,v26
1520 vcipher $out6,$out6,v26
1521 vcipher $out7,$out7,v26
1522 lvx v24,$x00,$key_ # re-pre-load round[1]
1524 subic $len,$len,129 # $len-=129
1525 vcipher $out0,$out0,v27
1526 addi $len,$len,1 # $len-=128 really
1527 vcipher $out1,$out1,v27
1528 vcipher $out2,$out2,v27
1529 vcipher $out3,$out3,v27
1530 vcipher $out4,$out4,v27
1531 vcipher $out5,$out5,v27
1532 vcipher $out6,$out6,v27
1533 vcipher $out7,$out7,v27
1534 lvx v25,$x10,$key_ # re-pre-load round[2]
1536 vcipher $out0,$out0,v28
1537 lvx_u $in0,$x00,$inp # load input
1538 vcipher $out1,$out1,v28
1539 lvx_u $in1,$x10,$inp
1540 vcipher $out2,$out2,v28
1541 lvx_u $in2,$x20,$inp
1542 vcipher $out3,$out3,v28
1543 lvx_u $in3,$x30,$inp
1544 vcipher $out4,$out4,v28
1545 lvx_u $in4,$x40,$inp
1546 vcipher $out5,$out5,v28
1547 lvx_u $in5,$x50,$inp
1548 vcipher $out6,$out6,v28
1549 lvx_u $in6,$x60,$inp
1550 vcipher $out7,$out7,v28
1551 lvx_u $in7,$x70,$inp
1552 addi $inp,$inp,0x80
1554 vcipher $out0,$out0,v29
1555 le?vperm $in0,$in0,$in0,$inpperm
1556 vcipher $out1,$out1,v29
1557 le?vperm $in1,$in1,$in1,$inpperm
1558 vcipher $out2,$out2,v29
1559 le?vperm $in2,$in2,$in2,$inpperm
1560 vcipher $out3,$out3,v29
1561 le?vperm $in3,$in3,$in3,$inpperm
1562 vcipher $out4,$out4,v29
1563 le?vperm $in4,$in4,$in4,$inpperm
1564 vcipher $out5,$out5,v29
1565 le?vperm $in5,$in5,$in5,$inpperm
1566 vcipher $out6,$out6,v29
1567 le?vperm $in6,$in6,$in6,$inpperm
1568 vcipher $out7,$out7,v29
1569 le?vperm $in7,$in7,$in7,$inpperm
1571 add $inp,$inp,r0 # $inp is adjusted in such
1572 # way that at exit from the
1573 # loop inX-in7 are loaded
1574 # with last "words"
1575 subfe. r0,r0,r0 # borrow?-1:0
1576 vcipher $out0,$out0,v30
1577 vxor $in0,$in0,v31 # xor with last round key
1578 vcipher $out1,$out1,v30
1579 vxor $in1,$in1,v31
1580 vcipher $out2,$out2,v30
1581 vxor $in2,$in2,v31
1582 vcipher $out3,$out3,v30
1583 vxor $in3,$in3,v31
1584 vcipher $out4,$out4,v30
1585 vxor $in4,$in4,v31
1586 vcipher $out5,$out5,v30
1587 vxor $in5,$in5,v31
1588 vcipher $out6,$out6,v30
1589 vxor $in6,$in6,v31
1590 vcipher $out7,$out7,v30
1591 vxor $in7,$in7,v31
1593 bne Lctr32_enc8x_break # did $len-129 borrow?
1595 vcipherlast $in0,$out0,$in0
1596 vcipherlast $in1,$out1,$in1
1597 vadduqm $out1,$ivec,$one # counter values ...
1598 vcipherlast $in2,$out2,$in2
1599 vadduqm $out2,$ivec,$two
1600 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1601 vcipherlast $in3,$out3,$in3
1602 vadduqm $out3,$out1,$two
1603 vxor $out1,$out1,$rndkey0
1604 vcipherlast $in4,$out4,$in4
1605 vadduqm $out4,$out2,$two
1606 vxor $out2,$out2,$rndkey0
1607 vcipherlast $in5,$out5,$in5
1608 vadduqm $out5,$out3,$two
1609 vxor $out3,$out3,$rndkey0
1610 vcipherlast $in6,$out6,$in6
1611 vadduqm $out6,$out4,$two
1612 vxor $out4,$out4,$rndkey0
1613 vcipherlast $in7,$out7,$in7
1614 vadduqm $out7,$out5,$two
1615 vxor $out5,$out5,$rndkey0
1616 le?vperm $in0,$in0,$in0,$inpperm
1617 vadduqm $ivec,$out6,$two # next counter value
1618 vxor $out6,$out6,$rndkey0
1619 le?vperm $in1,$in1,$in1,$inpperm
1620 vxor $out7,$out7,$rndkey0
1621 mtctr $rounds
1623 vcipher $out0,$out0,v24
1624 stvx_u $in0,$x00,$out
1625 le?vperm $in2,$in2,$in2,$inpperm
1626 vcipher $out1,$out1,v24
1627 stvx_u $in1,$x10,$out
1628 le?vperm $in3,$in3,$in3,$inpperm
1629 vcipher $out2,$out2,v24
1630 stvx_u $in2,$x20,$out
1631 le?vperm $in4,$in4,$in4,$inpperm
1632 vcipher $out3,$out3,v24
1633 stvx_u $in3,$x30,$out
1634 le?vperm $in5,$in5,$in5,$inpperm
1635 vcipher $out4,$out4,v24
1636 stvx_u $in4,$x40,$out
1637 le?vperm $in6,$in6,$in6,$inpperm
1638 vcipher $out5,$out5,v24
1639 stvx_u $in5,$x50,$out
1640 le?vperm $in7,$in7,$in7,$inpperm
1641 vcipher $out6,$out6,v24
1642 stvx_u $in6,$x60,$out
1643 vcipher $out7,$out7,v24
1644 stvx_u $in7,$x70,$out
1645 addi $out,$out,0x80
1647 b Loop_ctr32_enc8x_middle
1649 .align 5
1650 Lctr32_enc8x_break:
1651 cmpwi $len,-0x60
1652 blt Lctr32_enc8x_one
1654 beq Lctr32_enc8x_two
1655 cmpwi $len,-0x40
1656 blt Lctr32_enc8x_three
1658 beq Lctr32_enc8x_four
1659 cmpwi $len,-0x20
1660 blt Lctr32_enc8x_five
1662 beq Lctr32_enc8x_six
1663 cmpwi $len,0x00
1664 blt Lctr32_enc8x_seven
1666 Lctr32_enc8x_eight:
1667 vcipherlast $out0,$out0,$in0
1668 vcipherlast $out1,$out1,$in1
1669 vcipherlast $out2,$out2,$in2
1670 vcipherlast $out3,$out3,$in3
1671 vcipherlast $out4,$out4,$in4
1672 vcipherlast $out5,$out5,$in5
1673 vcipherlast $out6,$out6,$in6
1674 vcipherlast $out7,$out7,$in7
1676 le?vperm $out0,$out0,$out0,$inpperm
1677 le?vperm $out1,$out1,$out1,$inpperm
1678 stvx_u $out0,$x00,$out
1679 le?vperm $out2,$out2,$out2,$inpperm
1680 stvx_u $out1,$x10,$out
1681 le?vperm $out3,$out3,$out3,$inpperm
1682 stvx_u $out2,$x20,$out
1683 le?vperm $out4,$out4,$out4,$inpperm
1684 stvx_u $out3,$x30,$out
1685 le?vperm $out5,$out5,$out5,$inpperm
1686 stvx_u $out4,$x40,$out
1687 le?vperm $out6,$out6,$out6,$inpperm
1688 stvx_u $out5,$x50,$out
1689 le?vperm $out7,$out7,$out7,$inpperm
1690 stvx_u $out6,$x60,$out
1691 stvx_u $out7,$x70,$out
1692 addi $out,$out,0x80
1693 b Lctr32_enc8x_done
1695 .align 5
1696 Lctr32_enc8x_seven:
1697 vcipherlast $out0,$out0,$in1
1698 vcipherlast $out1,$out1,$in2
1699 vcipherlast $out2,$out2,$in3
1700 vcipherlast $out3,$out3,$in4
1701 vcipherlast $out4,$out4,$in5
1702 vcipherlast $out5,$out5,$in6
1703 vcipherlast $out6,$out6,$in7
1705 le?vperm $out0,$out0,$out0,$inpperm
1706 le?vperm $out1,$out1,$out1,$inpperm
1707 stvx_u $out0,$x00,$out
1708 le?vperm $out2,$out2,$out2,$inpperm
1709 stvx_u $out1,$x10,$out
1710 le?vperm $out3,$out3,$out3,$inpperm
1711 stvx_u $out2,$x20,$out
1712 le?vperm $out4,$out4,$out4,$inpperm
1713 stvx_u $out3,$x30,$out
1714 le?vperm $out5,$out5,$out5,$inpperm
1715 stvx_u $out4,$x40,$out
1716 le?vperm $out6,$out6,$out6,$inpperm
1717 stvx_u $out5,$x50,$out
1718 stvx_u $out6,$x60,$out
1719 addi $out,$out,0x70
1720 b Lctr32_enc8x_done
1722 .align 5
1723 Lctr32_enc8x_six:
1724 vcipherlast $out0,$out0,$in2
1725 vcipherlast $out1,$out1,$in3
1726 vcipherlast $out2,$out2,$in4
1727 vcipherlast $out3,$out3,$in5
1728 vcipherlast $out4,$out4,$in6
1729 vcipherlast $out5,$out5,$in7
1731 le?vperm $out0,$out0,$out0,$inpperm
1732 le?vperm $out1,$out1,$out1,$inpperm
1733 stvx_u $out0,$x00,$out
1734 le?vperm $out2,$out2,$out2,$inpperm
1735 stvx_u $out1,$x10,$out
1736 le?vperm $out3,$out3,$out3,$inpperm
1737 stvx_u $out2,$x20,$out
1738 le?vperm $out4,$out4,$out4,$inpperm
1739 stvx_u $out3,$x30,$out
1740 le?vperm $out5,$out5,$out5,$inpperm
1741 stvx_u $out4,$x40,$out
1742 stvx_u $out5,$x50,$out
1743 addi $out,$out,0x60
1744 b Lctr32_enc8x_done
1746 .align 5
1747 Lctr32_enc8x_five:
1748 vcipherlast $out0,$out0,$in3
1749 vcipherlast $out1,$out1,$in4
1750 vcipherlast $out2,$out2,$in5
1751 vcipherlast $out3,$out3,$in6
1752 vcipherlast $out4,$out4,$in7
1754 le?vperm $out0,$out0,$out0,$inpperm
1755 le?vperm $out1,$out1,$out1,$inpperm
1756 stvx_u $out0,$x00,$out
1757 le?vperm $out2,$out2,$out2,$inpperm
1758 stvx_u $out1,$x10,$out
1759 le?vperm $out3,$out3,$out3,$inpperm
1760 stvx_u $out2,$x20,$out
1761 le?vperm $out4,$out4,$out4,$inpperm
1762 stvx_u $out3,$x30,$out
1763 stvx_u $out4,$x40,$out
1764 addi $out,$out,0x50
1765 b Lctr32_enc8x_done
1767 .align 5
1768 Lctr32_enc8x_four:
1769 vcipherlast $out0,$out0,$in4
1770 vcipherlast $out1,$out1,$in5
1771 vcipherlast $out2,$out2,$in6
1772 vcipherlast $out3,$out3,$in7
1774 le?vperm $out0,$out0,$out0,$inpperm
1775 le?vperm $out1,$out1,$out1,$inpperm
1776 stvx_u $out0,$x00,$out
1777 le?vperm $out2,$out2,$out2,$inpperm
1778 stvx_u $out1,$x10,$out
1779 le?vperm $out3,$out3,$out3,$inpperm
1780 stvx_u $out2,$x20,$out
1781 stvx_u $out3,$x30,$out
1782 addi $out,$out,0x40
1783 b Lctr32_enc8x_done
1785 .align 5
1786 Lctr32_enc8x_three:
1787 vcipherlast $out0,$out0,$in5
1788 vcipherlast $out1,$out1,$in6
1789 vcipherlast $out2,$out2,$in7
1791 le?vperm $out0,$out0,$out0,$inpperm
1792 le?vperm $out1,$out1,$out1,$inpperm
1793 stvx_u $out0,$x00,$out
1794 le?vperm $out2,$out2,$out2,$inpperm
1795 stvx_u $out1,$x10,$out
1796 stvx_u $out2,$x20,$out
1797 addi $out,$out,0x30
1798 b Lcbc_dec8x_done
1800 .align 5
1801 Lctr32_enc8x_two:
1802 vcipherlast $out0,$out0,$in6
1803 vcipherlast $out1,$out1,$in7
1805 le?vperm $out0,$out0,$out0,$inpperm
1806 le?vperm $out1,$out1,$out1,$inpperm
1807 stvx_u $out0,$x00,$out
1808 stvx_u $out1,$x10,$out
1809 addi $out,$out,0x20
1810 b Lcbc_dec8x_done
1812 .align 5
1813 Lctr32_enc8x_one:
1814 vcipherlast $out0,$out0,$in7
1816 le?vperm $out0,$out0,$out0,$inpperm
1817 stvx_u $out0,0,$out
1818 addi $out,$out,0x10
1820 Lctr32_enc8x_done:
1821 li r10,`$FRAME+15`
1822 li r11,`$FRAME+31`
1823 stvx $inpperm,r10,$sp # wipe copies of round keys
1824 addi r10,r10,32
1825 stvx $inpperm,r11,$sp
1826 addi r11,r11,32
1827 stvx $inpperm,r10,$sp
1828 addi r10,r10,32
1829 stvx $inpperm,r11,$sp
1830 addi r11,r11,32
1831 stvx $inpperm,r10,$sp
1832 addi r10,r10,32
1833 stvx $inpperm,r11,$sp
1834 addi r11,r11,32
1835 stvx $inpperm,r10,$sp
1836 addi r10,r10,32
1837 stvx $inpperm,r11,$sp
1838 addi r11,r11,32
1840 mtspr 256,$vrsave
1841 lvx v20,r10,$sp # ABI says so
1842 addi r10,r10,32
1843 lvx v21,r11,$sp
1844 addi r11,r11,32
1845 lvx v22,r10,$sp
1846 addi r10,r10,32
1847 lvx v23,r11,$sp
1848 addi r11,r11,32
1849 lvx v24,r10,$sp
1850 addi r10,r10,32
1851 lvx v25,r11,$sp
1852 addi r11,r11,32
1853 lvx v26,r10,$sp
1854 addi r10,r10,32
1855 lvx v27,r11,$sp
1856 addi r11,r11,32
1857 lvx v28,r10,$sp
1858 addi r10,r10,32
1859 lvx v29,r11,$sp
1860 addi r11,r11,32
1861 lvx v30,r10,$sp
1862 lvx v31,r11,$sp
1863 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1864 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1865 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1866 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1867 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1868 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1869 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1871 .long 0
1872 .byte 0,12,0x14,0,0x80,6,6,0
1873 .long 0
1874 .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1876 }} }}}
1878 my $consts=1;
1879 foreach(split("\n",$code)) {
1880 s/\`([^\`]*)\`/eval($1)/geo;
1882 # constants table endian-specific conversion
1883 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
1884 my $conv=$3;
1885 my @bytes=();
1887 # convert to endian-agnostic format
1888 if ($1 eq "long") {
1889 foreach (split(/,\s*/,$2)) {
1890 my $l = /^0/?oct:int;
1891 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
1893 } else {
1894 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
1897 # little-endian conversion
1898 if ($flavour =~ /le$/o) {
1899 SWITCH: for($conv) {
1900 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
1901 /\?rev/ && do { @bytes=reverse(@bytes); last; };
1905 #emit
1906 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
1907 next;
1909 $consts=0 if (m/Lconsts:/o); # end of table
1911 # instructions prefixed with '?' are endian-specific and need
1912 # to be adjusted accordingly...
1913 if ($flavour =~ /le$/o) { # little-endian
1914 s/le\?//o or
1915 s/be\?/#be#/o or
1916 s/\?lvsr/lvsl/o or
1917 s/\?lvsl/lvsr/o or
1918 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
1919 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
1920 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
1921 } else { # big-endian
1922 s/le\?/#le#/o or
1923 s/be\?//o or
1924 s/\?([a-z]+)/$1/o;
1927 print $_,"\n";
1930 close STDOUT;