WIP FPC-III support
[linux/fpc-iii.git] / arch / powerpc / crypto / aes-spe-modes.S
blob3f92a6a85785776977dd351b61e7dbfc943e1626
1 /* SPDX-License-Identifier: GPL-2.0-or-later */
2 /*
3  * AES modes (ECB/CBC/CTR/XTS) for PPC AES implementation
4  *
5  * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
6  */
8 #include <asm/ppc_asm.h>
9 #include "aes-spe-regs.h"
11 #ifdef __BIG_ENDIAN__                   /* Macros for big endian builds */
13 #define LOAD_DATA(reg, off) \
14         lwz             reg,off(rSP);   /* load with offset             */
15 #define SAVE_DATA(reg, off) \
16         stw             reg,off(rDP);   /* save with offset             */
17 #define NEXT_BLOCK \
18         addi            rSP,rSP,16;     /* increment pointers per bloc  */ \
19         addi            rDP,rDP,16;
20 #define LOAD_IV(reg, off) \
21         lwz             reg,off(rIP);   /* IV loading with offset       */
22 #define SAVE_IV(reg, off) \
23         stw             reg,off(rIP);   /* IV saving with offset        */
24 #define START_IV                        /* nothing to reset             */
25 #define CBC_DEC 16                      /* CBC decrement per block      */
26 #define CTR_DEC 1                       /* CTR decrement one byte       */
28 #else                                   /* Macros for little endian     */
30 #define LOAD_DATA(reg, off) \
31         lwbrx           reg,0,rSP;      /* load reversed                */ \
32         addi            rSP,rSP,4;      /* and increment pointer        */
33 #define SAVE_DATA(reg, off) \
34         stwbrx          reg,0,rDP;      /* save reversed                */ \
35         addi            rDP,rDP,4;      /* and increment pointer        */
36 #define NEXT_BLOCK                      /* nothing todo                 */
37 #define LOAD_IV(reg, off) \
38         lwbrx           reg,0,rIP;      /* load reversed                */ \
39         addi            rIP,rIP,4;      /* and increment pointer        */
40 #define SAVE_IV(reg, off) \
41         stwbrx          reg,0,rIP;      /* load reversed                */ \
42         addi            rIP,rIP,4;      /* and increment pointer        */
43 #define START_IV \
44         subi            rIP,rIP,16;     /* must reset pointer           */
45 #define CBC_DEC 32                      /* 2 blocks because of incs     */
46 #define CTR_DEC 17                      /* 1 block because of incs      */
48 #endif
50 #define SAVE_0_REGS
51 #define LOAD_0_REGS
53 #define SAVE_4_REGS \
54         stw             rI0,96(r1);     /* save 32 bit registers        */ \
55         stw             rI1,100(r1);                                       \
56         stw             rI2,104(r1);                                       \
57         stw             rI3,108(r1);
59 #define LOAD_4_REGS \
60         lwz             rI0,96(r1);     /* restore 32 bit registers     */ \
61         lwz             rI1,100(r1);                                       \
62         lwz             rI2,104(r1);                                       \
63         lwz             rI3,108(r1);
65 #define SAVE_8_REGS \
66         SAVE_4_REGS                                                        \
67         stw             rG0,112(r1);    /* save 32 bit registers        */ \
68         stw             rG1,116(r1);                                       \
69         stw             rG2,120(r1);                                       \
70         stw             rG3,124(r1);
72 #define LOAD_8_REGS \
73         LOAD_4_REGS                                                        \
74         lwz             rG0,112(r1);    /* restore 32 bit registers     */ \
75         lwz             rG1,116(r1);                                       \
76         lwz             rG2,120(r1);                                       \
77         lwz             rG3,124(r1);
79 #define INITIALIZE_CRYPT(tab,nr32bitregs) \
80         mflr            r0;                                                \
81         stwu            r1,-160(r1);    /* create stack frame           */ \
82         lis             rT0,tab@h;      /* en-/decryption table pointer */ \
83         stw             r0,8(r1);       /* save link register           */ \
84         ori             rT0,rT0,tab@l;                                     \
85         evstdw          r14,16(r1);                                        \
86         mr              rKS,rKP;                                           \
87         evstdw          r15,24(r1);     /* We must save non volatile    */ \
88         evstdw          r16,32(r1);     /* registers. Take the chance   */ \
89         evstdw          r17,40(r1);     /* and save the SPE part too    */ \
90         evstdw          r18,48(r1);                                        \
91         evstdw          r19,56(r1);                                        \
92         evstdw          r20,64(r1);                                        \
93         evstdw          r21,72(r1);                                        \
94         evstdw          r22,80(r1);                                        \
95         evstdw          r23,88(r1);                                        \
96         SAVE_##nr32bitregs##_REGS
98 #define FINALIZE_CRYPT(nr32bitregs) \
99         lwz             r0,8(r1);                                          \
100         evldw           r14,16(r1);     /* restore SPE registers        */ \
101         evldw           r15,24(r1);                                        \
102         evldw           r16,32(r1);                                        \
103         evldw           r17,40(r1);                                        \
104         evldw           r18,48(r1);                                        \
105         evldw           r19,56(r1);                                        \
106         evldw           r20,64(r1);                                        \
107         evldw           r21,72(r1);                                        \
108         evldw           r22,80(r1);                                        \
109         evldw           r23,88(r1);                                        \
110         LOAD_##nr32bitregs##_REGS                                          \
111         mtlr            r0;             /* restore link register        */ \
112         xor             r0,r0,r0;                                          \
113         stw             r0,16(r1);      /* delete sensitive data        */ \
114         stw             r0,24(r1);      /* that we might have pushed    */ \
115         stw             r0,32(r1);      /* from other context that runs */ \
116         stw             r0,40(r1);      /* the same code                */ \
117         stw             r0,48(r1);                                         \
118         stw             r0,56(r1);                                         \
119         stw             r0,64(r1);                                         \
120         stw             r0,72(r1);                                         \
121         stw             r0,80(r1);                                         \
122         stw             r0,88(r1);                                         \
123         addi            r1,r1,160;      /* cleanup stack frame          */
125 #define ENDIAN_SWAP(t0, t1, s0, s1) \
126         rotrwi          t0,s0,8;        /* swap endianness for 2 GPRs   */ \
127         rotrwi          t1,s1,8;                                           \
128         rlwimi          t0,s0,8,8,15;                                      \
129         rlwimi          t1,s1,8,8,15;                                      \
130         rlwimi          t0,s0,8,24,31;                                     \
131         rlwimi          t1,s1,8,24,31;
133 #define GF128_MUL(d0, d1, d2, d3, t0) \
134         li              t0,0x87;        /* multiplication in GF128      */ \
135         cmpwi           d3,-1;                                             \
136         iselgt          t0,0,t0;                                           \
137         rlwimi          d3,d2,0,0,0;    /* propagate "carry" bits       */ \
138         rotlwi          d3,d3,1;                                           \
139         rlwimi          d2,d1,0,0,0;                                       \
140         rotlwi          d2,d2,1;                                           \
141         rlwimi          d1,d0,0,0,0;                                       \
142         slwi            d0,d0,1;        /* shift left 128 bit           */ \
143         rotlwi          d1,d1,1;                                           \
144         xor             d0,d0,t0;
146 #define START_KEY(d0, d1, d2, d3) \
147         lwz             rW0,0(rKP);                                        \
148         mtctr           rRR;                                               \
149         lwz             rW1,4(rKP);                                        \
150         lwz             rW2,8(rKP);                                        \
151         lwz             rW3,12(rKP);                                       \
152         xor             rD0,d0,rW0;                                        \
153         xor             rD1,d1,rW1;                                        \
154         xor             rD2,d2,rW2;                                        \
155         xor             rD3,d3,rW3;
158  * ppc_encrypt_aes(u8 *out, const u8 *in, u32 *key_enc,
159  *                 u32 rounds)
161  * called from glue layer to encrypt a single 16 byte block
162  * round values are AES128 = 4, AES192 = 5, AES256 = 6
164  */
165 _GLOBAL(ppc_encrypt_aes)
166         INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
167         LOAD_DATA(rD0, 0)
168         LOAD_DATA(rD1, 4)
169         LOAD_DATA(rD2, 8)
170         LOAD_DATA(rD3, 12)
171         START_KEY(rD0, rD1, rD2, rD3)
172         bl              ppc_encrypt_block
173         xor             rD0,rD0,rW0
174         SAVE_DATA(rD0, 0)
175         xor             rD1,rD1,rW1
176         SAVE_DATA(rD1, 4)
177         xor             rD2,rD2,rW2
178         SAVE_DATA(rD2, 8)
179         xor             rD3,rD3,rW3
180         SAVE_DATA(rD3, 12)
181         FINALIZE_CRYPT(0)
182         blr
185  * ppc_decrypt_aes(u8 *out, const u8 *in, u32 *key_dec,
186  *                 u32 rounds)
188  * called from glue layer to decrypt a single 16 byte block
189  * round values are AES128 = 4, AES192 = 5, AES256 = 6
191  */
192 _GLOBAL(ppc_decrypt_aes)
193         INITIALIZE_CRYPT(PPC_AES_4K_DECTAB,0)
194         LOAD_DATA(rD0, 0)
195         addi            rT1,rT0,4096
196         LOAD_DATA(rD1, 4)
197         LOAD_DATA(rD2, 8)
198         LOAD_DATA(rD3, 12)
199         START_KEY(rD0, rD1, rD2, rD3)
200         bl              ppc_decrypt_block
201         xor             rD0,rD0,rW0
202         SAVE_DATA(rD0, 0)
203         xor             rD1,rD1,rW1
204         SAVE_DATA(rD1, 4)
205         xor             rD2,rD2,rW2
206         SAVE_DATA(rD2, 8)
207         xor             rD3,rD3,rW3
208         SAVE_DATA(rD3, 12)
209         FINALIZE_CRYPT(0)
210         blr
213  * ppc_encrypt_ecb(u8 *out, const u8 *in, u32 *key_enc,
214  *                 u32 rounds, u32 bytes);
216  * called from glue layer to encrypt multiple blocks via ECB
217  * Bytes must be larger or equal 16 and only whole blocks are
218  * processed. round values are AES128 = 4, AES192 = 5 and
219  * AES256 = 6
221  */
222 _GLOBAL(ppc_encrypt_ecb)
223         INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 0)
224 ppc_encrypt_ecb_loop:
225         LOAD_DATA(rD0, 0)
226         mr              rKP,rKS
227         LOAD_DATA(rD1, 4)
228         subi            rLN,rLN,16
229         LOAD_DATA(rD2, 8)
230         cmpwi           rLN,15
231         LOAD_DATA(rD3, 12)
232         START_KEY(rD0, rD1, rD2, rD3)
233         bl              ppc_encrypt_block
234         xor             rD0,rD0,rW0
235         SAVE_DATA(rD0, 0)
236         xor             rD1,rD1,rW1
237         SAVE_DATA(rD1, 4)
238         xor             rD2,rD2,rW2
239         SAVE_DATA(rD2, 8)
240         xor             rD3,rD3,rW3
241         SAVE_DATA(rD3, 12)
242         NEXT_BLOCK
243         bt              gt,ppc_encrypt_ecb_loop
244         FINALIZE_CRYPT(0)
245         blr
248  * ppc_decrypt_ecb(u8 *out, const u8 *in, u32 *key_dec,
249  *                 u32 rounds, u32 bytes);
251  * called from glue layer to decrypt multiple blocks via ECB
252  * Bytes must be larger or equal 16 and only whole blocks are
253  * processed. round values are AES128 = 4, AES192 = 5 and
254  * AES256 = 6
256  */
257 _GLOBAL(ppc_decrypt_ecb)
258         INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 0)
259         addi            rT1,rT0,4096
260 ppc_decrypt_ecb_loop:
261         LOAD_DATA(rD0, 0)
262         mr              rKP,rKS
263         LOAD_DATA(rD1, 4)
264         subi            rLN,rLN,16
265         LOAD_DATA(rD2, 8)
266         cmpwi           rLN,15
267         LOAD_DATA(rD3, 12)
268         START_KEY(rD0, rD1, rD2, rD3)
269         bl              ppc_decrypt_block
270         xor             rD0,rD0,rW0
271         SAVE_DATA(rD0, 0)
272         xor             rD1,rD1,rW1
273         SAVE_DATA(rD1, 4)
274         xor             rD2,rD2,rW2
275         SAVE_DATA(rD2, 8)
276         xor             rD3,rD3,rW3
277         SAVE_DATA(rD3, 12)
278         NEXT_BLOCK
279         bt              gt,ppc_decrypt_ecb_loop
280         FINALIZE_CRYPT(0)
281         blr
284  * ppc_encrypt_cbc(u8 *out, const u8 *in, u32 *key_enc,
285  *                 32 rounds, u32 bytes, u8 *iv);
287  * called from glue layer to encrypt multiple blocks via CBC
288  * Bytes must be larger or equal 16 and only whole blocks are
289  * processed. round values are AES128 = 4, AES192 = 5 and
290  * AES256 = 6
292  */
293 _GLOBAL(ppc_encrypt_cbc)
294         INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
295         LOAD_IV(rI0, 0)
296         LOAD_IV(rI1, 4)
297         LOAD_IV(rI2, 8)
298         LOAD_IV(rI3, 12)
299 ppc_encrypt_cbc_loop:
300         LOAD_DATA(rD0, 0)
301         mr              rKP,rKS
302         LOAD_DATA(rD1, 4)
303         subi            rLN,rLN,16
304         LOAD_DATA(rD2, 8)
305         cmpwi           rLN,15
306         LOAD_DATA(rD3, 12)
307         xor             rD0,rD0,rI0
308         xor             rD1,rD1,rI1
309         xor             rD2,rD2,rI2
310         xor             rD3,rD3,rI3
311         START_KEY(rD0, rD1, rD2, rD3)
312         bl              ppc_encrypt_block
313         xor             rI0,rD0,rW0
314         SAVE_DATA(rI0, 0)
315         xor             rI1,rD1,rW1
316         SAVE_DATA(rI1, 4)
317         xor             rI2,rD2,rW2
318         SAVE_DATA(rI2, 8)
319         xor             rI3,rD3,rW3
320         SAVE_DATA(rI3, 12)
321         NEXT_BLOCK
322         bt              gt,ppc_encrypt_cbc_loop
323         START_IV
324         SAVE_IV(rI0, 0)
325         SAVE_IV(rI1, 4)
326         SAVE_IV(rI2, 8)
327         SAVE_IV(rI3, 12)
328         FINALIZE_CRYPT(4)
329         blr
332  * ppc_decrypt_cbc(u8 *out, const u8 *in, u32 *key_dec,
333  *                 u32 rounds, u32 bytes, u8 *iv);
335  * called from glue layer to decrypt multiple blocks via CBC
336  * round values are AES128 = 4, AES192 = 5, AES256 = 6
338  */
339 _GLOBAL(ppc_decrypt_cbc)
340         INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 4)
341         li              rT1,15
342         LOAD_IV(rI0, 0)
343         andc            rLN,rLN,rT1
344         LOAD_IV(rI1, 4)
345         subi            rLN,rLN,16
346         LOAD_IV(rI2, 8)
347         add             rSP,rSP,rLN     /* reverse processing           */
348         LOAD_IV(rI3, 12)
349         add             rDP,rDP,rLN
350         LOAD_DATA(rD0, 0)
351         addi            rT1,rT0,4096
352         LOAD_DATA(rD1, 4)
353         LOAD_DATA(rD2, 8)
354         LOAD_DATA(rD3, 12)
355         START_IV
356         SAVE_IV(rD0, 0)
357         SAVE_IV(rD1, 4)
358         SAVE_IV(rD2, 8)
359         cmpwi           rLN,16
360         SAVE_IV(rD3, 12)
361         bt              lt,ppc_decrypt_cbc_end
362 ppc_decrypt_cbc_loop:
363         mr              rKP,rKS
364         START_KEY(rD0, rD1, rD2, rD3)
365         bl              ppc_decrypt_block
366         subi            rLN,rLN,16
367         subi            rSP,rSP,CBC_DEC
368         xor             rW0,rD0,rW0
369         LOAD_DATA(rD0, 0)
370         xor             rW1,rD1,rW1
371         LOAD_DATA(rD1, 4)
372         xor             rW2,rD2,rW2
373         LOAD_DATA(rD2, 8)
374         xor             rW3,rD3,rW3
375         LOAD_DATA(rD3, 12)
376         xor             rW0,rW0,rD0
377         SAVE_DATA(rW0, 0)
378         xor             rW1,rW1,rD1
379         SAVE_DATA(rW1, 4)
380         xor             rW2,rW2,rD2
381         SAVE_DATA(rW2, 8)
382         xor             rW3,rW3,rD3
383         SAVE_DATA(rW3, 12)
384         cmpwi           rLN,15
385         subi            rDP,rDP,CBC_DEC
386         bt              gt,ppc_decrypt_cbc_loop
387 ppc_decrypt_cbc_end:
388         mr              rKP,rKS
389         START_KEY(rD0, rD1, rD2, rD3)
390         bl              ppc_decrypt_block
391         xor             rW0,rW0,rD0
392         xor             rW1,rW1,rD1
393         xor             rW2,rW2,rD2
394         xor             rW3,rW3,rD3
395         xor             rW0,rW0,rI0     /* decrypt with initial IV      */
396         SAVE_DATA(rW0, 0)
397         xor             rW1,rW1,rI1
398         SAVE_DATA(rW1, 4)
399         xor             rW2,rW2,rI2
400         SAVE_DATA(rW2, 8)
401         xor             rW3,rW3,rI3
402         SAVE_DATA(rW3, 12)
403         FINALIZE_CRYPT(4)
404         blr
407  * ppc_crypt_ctr(u8 *out, const u8 *in, u32 *key_enc,
408  *               u32 rounds, u32 bytes, u8 *iv);
410  * called from glue layer to encrypt/decrypt multiple blocks
411  * via CTR. Number of bytes does not need to be a multiple of
412  * 16. Round values are AES128 = 4, AES192 = 5, AES256 = 6
414  */
415 _GLOBAL(ppc_crypt_ctr)
416         INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 4)
417         LOAD_IV(rI0, 0)
418         LOAD_IV(rI1, 4)
419         LOAD_IV(rI2, 8)
420         cmpwi           rLN,16
421         LOAD_IV(rI3, 12)
422         START_IV
423         bt              lt,ppc_crypt_ctr_partial
424 ppc_crypt_ctr_loop:
425         mr              rKP,rKS
426         START_KEY(rI0, rI1, rI2, rI3)
427         bl              ppc_encrypt_block
428         xor             rW0,rD0,rW0
429         xor             rW1,rD1,rW1
430         xor             rW2,rD2,rW2
431         xor             rW3,rD3,rW3
432         LOAD_DATA(rD0, 0)
433         subi            rLN,rLN,16
434         LOAD_DATA(rD1, 4)
435         LOAD_DATA(rD2, 8)
436         LOAD_DATA(rD3, 12)
437         xor             rD0,rD0,rW0
438         SAVE_DATA(rD0, 0)
439         xor             rD1,rD1,rW1
440         SAVE_DATA(rD1, 4)
441         xor             rD2,rD2,rW2
442         SAVE_DATA(rD2, 8)
443         xor             rD3,rD3,rW3
444         SAVE_DATA(rD3, 12)
445         addic           rI3,rI3,1       /* increase counter                     */
446         addze           rI2,rI2
447         addze           rI1,rI1
448         addze           rI0,rI0
449         NEXT_BLOCK
450         cmpwi           rLN,15
451         bt              gt,ppc_crypt_ctr_loop
452 ppc_crypt_ctr_partial:
453         cmpwi           rLN,0
454         bt              eq,ppc_crypt_ctr_end
455         mr              rKP,rKS
456         START_KEY(rI0, rI1, rI2, rI3)
457         bl              ppc_encrypt_block
458         xor             rW0,rD0,rW0
459         SAVE_IV(rW0, 0)
460         xor             rW1,rD1,rW1
461         SAVE_IV(rW1, 4)
462         xor             rW2,rD2,rW2
463         SAVE_IV(rW2, 8)
464         xor             rW3,rD3,rW3
465         SAVE_IV(rW3, 12)
466         mtctr           rLN
467         subi            rIP,rIP,CTR_DEC
468         subi            rSP,rSP,1
469         subi            rDP,rDP,1
470 ppc_crypt_ctr_xorbyte:
471         lbzu            rW4,1(rIP)      /* bytewise xor for partial block       */
472         lbzu            rW5,1(rSP)
473         xor             rW4,rW4,rW5
474         stbu            rW4,1(rDP)
475         bdnz            ppc_crypt_ctr_xorbyte
476         subf            rIP,rLN,rIP
477         addi            rIP,rIP,1
478         addic           rI3,rI3,1
479         addze           rI2,rI2
480         addze           rI1,rI1
481         addze           rI0,rI0
482 ppc_crypt_ctr_end:
483         SAVE_IV(rI0, 0)
484         SAVE_IV(rI1, 4)
485         SAVE_IV(rI2, 8)
486         SAVE_IV(rI3, 12)
487         FINALIZE_CRYPT(4)
488         blr
491  * ppc_encrypt_xts(u8 *out, const u8 *in, u32 *key_enc,
492  *                 u32 rounds, u32 bytes, u8 *iv, u32 *key_twk);
494  * called from glue layer to encrypt multiple blocks via XTS
495  * If key_twk is given, the initial IV encryption will be
496  * processed too. Round values are AES128 = 4, AES192 = 5,
497  * AES256 = 6
499  */
500 _GLOBAL(ppc_encrypt_xts)
501         INITIALIZE_CRYPT(PPC_AES_4K_ENCTAB, 8)
502         LOAD_IV(rI0, 0)
503         LOAD_IV(rI1, 4)
504         LOAD_IV(rI2, 8)
505         cmpwi           rKT,0
506         LOAD_IV(rI3, 12)
507         bt              eq,ppc_encrypt_xts_notweak
508         mr              rKP,rKT
509         START_KEY(rI0, rI1, rI2, rI3)
510         bl              ppc_encrypt_block
511         xor             rI0,rD0,rW0
512         xor             rI1,rD1,rW1
513         xor             rI2,rD2,rW2
514         xor             rI3,rD3,rW3
515 ppc_encrypt_xts_notweak:
516         ENDIAN_SWAP(rG0, rG1, rI0, rI1)
517         ENDIAN_SWAP(rG2, rG3, rI2, rI3)
518 ppc_encrypt_xts_loop:
519         LOAD_DATA(rD0, 0)
520         mr              rKP,rKS
521         LOAD_DATA(rD1, 4)
522         subi            rLN,rLN,16
523         LOAD_DATA(rD2, 8)
524         LOAD_DATA(rD3, 12)
525         xor             rD0,rD0,rI0
526         xor             rD1,rD1,rI1
527         xor             rD2,rD2,rI2
528         xor             rD3,rD3,rI3
529         START_KEY(rD0, rD1, rD2, rD3)
530         bl              ppc_encrypt_block
531         xor             rD0,rD0,rW0
532         xor             rD1,rD1,rW1
533         xor             rD2,rD2,rW2
534         xor             rD3,rD3,rW3
535         xor             rD0,rD0,rI0
536         SAVE_DATA(rD0, 0)
537         xor             rD1,rD1,rI1
538         SAVE_DATA(rD1, 4)
539         xor             rD2,rD2,rI2
540         SAVE_DATA(rD2, 8)
541         xor             rD3,rD3,rI3
542         SAVE_DATA(rD3, 12)
543         GF128_MUL(rG0, rG1, rG2, rG3, rW0)
544         ENDIAN_SWAP(rI0, rI1, rG0, rG1)
545         ENDIAN_SWAP(rI2, rI3, rG2, rG3)
546         cmpwi           rLN,0
547         NEXT_BLOCK
548         bt              gt,ppc_encrypt_xts_loop
549         START_IV
550         SAVE_IV(rI0, 0)
551         SAVE_IV(rI1, 4)
552         SAVE_IV(rI2, 8)
553         SAVE_IV(rI3, 12)
554         FINALIZE_CRYPT(8)
555         blr
558  * ppc_decrypt_xts(u8 *out, const u8 *in, u32 *key_dec,
559  *                 u32 rounds, u32 blocks, u8 *iv, u32 *key_twk);
561  * called from glue layer to decrypt multiple blocks via XTS
562  * If key_twk is given, the initial IV encryption will be
563  * processed too. Round values are AES128 = 4, AES192 = 5,
564  * AES256 = 6
566  */
567 _GLOBAL(ppc_decrypt_xts)
568         INITIALIZE_CRYPT(PPC_AES_4K_DECTAB, 8)
569         LOAD_IV(rI0, 0)
570         addi            rT1,rT0,4096
571         LOAD_IV(rI1, 4)
572         LOAD_IV(rI2, 8)
573         cmpwi           rKT,0
574         LOAD_IV(rI3, 12)
575         bt              eq,ppc_decrypt_xts_notweak
576         subi            rT0,rT0,4096
577         mr              rKP,rKT
578         START_KEY(rI0, rI1, rI2, rI3)
579         bl              ppc_encrypt_block
580         xor             rI0,rD0,rW0
581         xor             rI1,rD1,rW1
582         xor             rI2,rD2,rW2
583         xor             rI3,rD3,rW3
584         addi            rT0,rT0,4096
585 ppc_decrypt_xts_notweak:
586         ENDIAN_SWAP(rG0, rG1, rI0, rI1)
587         ENDIAN_SWAP(rG2, rG3, rI2, rI3)
588 ppc_decrypt_xts_loop:
589         LOAD_DATA(rD0, 0)
590         mr              rKP,rKS
591         LOAD_DATA(rD1, 4)
592         subi            rLN,rLN,16
593         LOAD_DATA(rD2, 8)
594         LOAD_DATA(rD3, 12)
595         xor             rD0,rD0,rI0
596         xor             rD1,rD1,rI1
597         xor             rD2,rD2,rI2
598         xor             rD3,rD3,rI3
599         START_KEY(rD0, rD1, rD2, rD3)
600         bl              ppc_decrypt_block
601         xor             rD0,rD0,rW0
602         xor             rD1,rD1,rW1
603         xor             rD2,rD2,rW2
604         xor             rD3,rD3,rW3
605         xor             rD0,rD0,rI0
606         SAVE_DATA(rD0, 0)
607         xor             rD1,rD1,rI1
608         SAVE_DATA(rD1, 4)
609         xor             rD2,rD2,rI2
610         SAVE_DATA(rD2, 8)
611         xor             rD3,rD3,rI3
612         SAVE_DATA(rD3, 12)
613         GF128_MUL(rG0, rG1, rG2, rG3, rW0)
614         ENDIAN_SWAP(rI0, rI1, rG0, rG1)
615         ENDIAN_SWAP(rI2, rI3, rG2, rG3)
616         cmpwi           rLN,0
617         NEXT_BLOCK
618         bt              gt,ppc_decrypt_xts_loop
619         START_IV
620         SAVE_IV(rI0, 0)
621         SAVE_IV(rI1, 4)
622         SAVE_IV(rI2, 8)
623         SAVE_IV(rI3, 12)
624         FINALIZE_CRYPT(8)
625         blr