staging: rtl8192u: remove redundant assignment to pointer crypt
[linux/fpc-iii.git] / arch / arm64 / crypto / aes-modes.S
blob324039b720942f1a29b2b874eb10a1ccf7a4c1d6
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
4  *
5  * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
6  */
8 /* included by aes-ce.S and aes-neon.S */
10         .text
11         .align          4
13 #ifndef MAX_STRIDE
14 #define MAX_STRIDE      4
15 #endif
17 #if MAX_STRIDE == 4
18 #define ST4(x...) x
19 #define ST5(x...)
20 #else
21 #define ST4(x...)
22 #define ST5(x...) x
23 #endif
25 aes_encrypt_block4x:
26         encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
27         ret
28 ENDPROC(aes_encrypt_block4x)
30 aes_decrypt_block4x:
31         decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
32         ret
33 ENDPROC(aes_decrypt_block4x)
35 #if MAX_STRIDE == 5
36 aes_encrypt_block5x:
37         encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
38         ret
39 ENDPROC(aes_encrypt_block5x)
41 aes_decrypt_block5x:
42         decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
43         ret
44 ENDPROC(aes_decrypt_block5x)
45 #endif
47         /*
48          * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
49          *                 int blocks)
50          * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
51          *                 int blocks)
52          */
54 AES_ENTRY(aes_ecb_encrypt)
55         stp             x29, x30, [sp, #-16]!
56         mov             x29, sp
58         enc_prepare     w3, x2, x5
60 .LecbencloopNx:
61         subs            w4, w4, #MAX_STRIDE
62         bmi             .Lecbenc1x
63         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
64 ST4(    bl              aes_encrypt_block4x             )
65 ST5(    ld1             {v4.16b}, [x1], #16             )
66 ST5(    bl              aes_encrypt_block5x             )
67         st1             {v0.16b-v3.16b}, [x0], #64
68 ST5(    st1             {v4.16b}, [x0], #16             )
69         b               .LecbencloopNx
70 .Lecbenc1x:
71         adds            w4, w4, #MAX_STRIDE
72         beq             .Lecbencout
73 .Lecbencloop:
74         ld1             {v0.16b}, [x1], #16             /* get next pt block */
75         encrypt_block   v0, w3, x2, x5, w6
76         st1             {v0.16b}, [x0], #16
77         subs            w4, w4, #1
78         bne             .Lecbencloop
79 .Lecbencout:
80         ldp             x29, x30, [sp], #16
81         ret
82 AES_ENDPROC(aes_ecb_encrypt)
85 AES_ENTRY(aes_ecb_decrypt)
86         stp             x29, x30, [sp, #-16]!
87         mov             x29, sp
89         dec_prepare     w3, x2, x5
91 .LecbdecloopNx:
92         subs            w4, w4, #MAX_STRIDE
93         bmi             .Lecbdec1x
94         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
95 ST4(    bl              aes_decrypt_block4x             )
96 ST5(    ld1             {v4.16b}, [x1], #16             )
97 ST5(    bl              aes_decrypt_block5x             )
98         st1             {v0.16b-v3.16b}, [x0], #64
99 ST5(    st1             {v4.16b}, [x0], #16             )
100         b               .LecbdecloopNx
101 .Lecbdec1x:
102         adds            w4, w4, #MAX_STRIDE
103         beq             .Lecbdecout
104 .Lecbdecloop:
105         ld1             {v0.16b}, [x1], #16             /* get next ct block */
106         decrypt_block   v0, w3, x2, x5, w6
107         st1             {v0.16b}, [x0], #16
108         subs            w4, w4, #1
109         bne             .Lecbdecloop
110 .Lecbdecout:
111         ldp             x29, x30, [sp], #16
112         ret
113 AES_ENDPROC(aes_ecb_decrypt)
116         /*
117          * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
118          *                 int blocks, u8 iv[])
119          * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
120          *                 int blocks, u8 iv[])
121          */
123 AES_ENTRY(aes_cbc_encrypt)
124         ld1             {v4.16b}, [x5]                  /* get iv */
125         enc_prepare     w3, x2, x6
127 .Lcbcencloop4x:
128         subs            w4, w4, #4
129         bmi             .Lcbcenc1x
130         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
131         eor             v0.16b, v0.16b, v4.16b          /* ..and xor with iv */
132         encrypt_block   v0, w3, x2, x6, w7
133         eor             v1.16b, v1.16b, v0.16b
134         encrypt_block   v1, w3, x2, x6, w7
135         eor             v2.16b, v2.16b, v1.16b
136         encrypt_block   v2, w3, x2, x6, w7
137         eor             v3.16b, v3.16b, v2.16b
138         encrypt_block   v3, w3, x2, x6, w7
139         st1             {v0.16b-v3.16b}, [x0], #64
140         mov             v4.16b, v3.16b
141         b               .Lcbcencloop4x
142 .Lcbcenc1x:
143         adds            w4, w4, #4
144         beq             .Lcbcencout
145 .Lcbcencloop:
146         ld1             {v0.16b}, [x1], #16             /* get next pt block */
147         eor             v4.16b, v4.16b, v0.16b          /* ..and xor with iv */
148         encrypt_block   v4, w3, x2, x6, w7
149         st1             {v4.16b}, [x0], #16
150         subs            w4, w4, #1
151         bne             .Lcbcencloop
152 .Lcbcencout:
153         st1             {v4.16b}, [x5]                  /* return iv */
154         ret
155 AES_ENDPROC(aes_cbc_encrypt)
158 AES_ENTRY(aes_cbc_decrypt)
159         stp             x29, x30, [sp, #-16]!
160         mov             x29, sp
162         ld1             {cbciv.16b}, [x5]               /* get iv */
163         dec_prepare     w3, x2, x6
165 .LcbcdecloopNx:
166         subs            w4, w4, #MAX_STRIDE
167         bmi             .Lcbcdec1x
168         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
169 #if MAX_STRIDE == 5
170         ld1             {v4.16b}, [x1], #16             /* get 1 ct block */
171         mov             v5.16b, v0.16b
172         mov             v6.16b, v1.16b
173         mov             v7.16b, v2.16b
174         bl              aes_decrypt_block5x
175         sub             x1, x1, #32
176         eor             v0.16b, v0.16b, cbciv.16b
177         eor             v1.16b, v1.16b, v5.16b
178         ld1             {v5.16b}, [x1], #16             /* reload 1 ct block */
179         ld1             {cbciv.16b}, [x1], #16          /* reload 1 ct block */
180         eor             v2.16b, v2.16b, v6.16b
181         eor             v3.16b, v3.16b, v7.16b
182         eor             v4.16b, v4.16b, v5.16b
183 #else
184         mov             v4.16b, v0.16b
185         mov             v5.16b, v1.16b
186         mov             v6.16b, v2.16b
187         bl              aes_decrypt_block4x
188         sub             x1, x1, #16
189         eor             v0.16b, v0.16b, cbciv.16b
190         eor             v1.16b, v1.16b, v4.16b
191         ld1             {cbciv.16b}, [x1], #16          /* reload 1 ct block */
192         eor             v2.16b, v2.16b, v5.16b
193         eor             v3.16b, v3.16b, v6.16b
194 #endif
195         st1             {v0.16b-v3.16b}, [x0], #64
196 ST5(    st1             {v4.16b}, [x0], #16             )
197         b               .LcbcdecloopNx
198 .Lcbcdec1x:
199         adds            w4, w4, #MAX_STRIDE
200         beq             .Lcbcdecout
201 .Lcbcdecloop:
202         ld1             {v1.16b}, [x1], #16             /* get next ct block */
203         mov             v0.16b, v1.16b                  /* ...and copy to v0 */
204         decrypt_block   v0, w3, x2, x6, w7
205         eor             v0.16b, v0.16b, cbciv.16b       /* xor with iv => pt */
206         mov             cbciv.16b, v1.16b               /* ct is next iv */
207         st1             {v0.16b}, [x0], #16
208         subs            w4, w4, #1
209         bne             .Lcbcdecloop
210 .Lcbcdecout:
211         st1             {cbciv.16b}, [x5]               /* return iv */
212         ldp             x29, x30, [sp], #16
213         ret
214 AES_ENDPROC(aes_cbc_decrypt)
217         /*
218          * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
219          *                     int rounds, int bytes, u8 const iv[])
220          * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
221          *                     int rounds, int bytes, u8 const iv[])
222          */
224 AES_ENTRY(aes_cbc_cts_encrypt)
225         adr_l           x8, .Lcts_permute_table
226         sub             x4, x4, #16
227         add             x9, x8, #32
228         add             x8, x8, x4
229         sub             x9, x9, x4
230         ld1             {v3.16b}, [x8]
231         ld1             {v4.16b}, [x9]
233         ld1             {v0.16b}, [x1], x4              /* overlapping loads */
234         ld1             {v1.16b}, [x1]
236         ld1             {v5.16b}, [x5]                  /* get iv */
237         enc_prepare     w3, x2, x6
239         eor             v0.16b, v0.16b, v5.16b          /* xor with iv */
240         tbl             v1.16b, {v1.16b}, v4.16b
241         encrypt_block   v0, w3, x2, x6, w7
243         eor             v1.16b, v1.16b, v0.16b
244         tbl             v0.16b, {v0.16b}, v3.16b
245         encrypt_block   v1, w3, x2, x6, w7
247         add             x4, x0, x4
248         st1             {v0.16b}, [x4]                  /* overlapping stores */
249         st1             {v1.16b}, [x0]
250         ret
251 AES_ENDPROC(aes_cbc_cts_encrypt)
253 AES_ENTRY(aes_cbc_cts_decrypt)
254         adr_l           x8, .Lcts_permute_table
255         sub             x4, x4, #16
256         add             x9, x8, #32
257         add             x8, x8, x4
258         sub             x9, x9, x4
259         ld1             {v3.16b}, [x8]
260         ld1             {v4.16b}, [x9]
262         ld1             {v0.16b}, [x1], x4              /* overlapping loads */
263         ld1             {v1.16b}, [x1]
265         ld1             {v5.16b}, [x5]                  /* get iv */
266         dec_prepare     w3, x2, x6
268         tbl             v2.16b, {v1.16b}, v4.16b
269         decrypt_block   v0, w3, x2, x6, w7
270         eor             v2.16b, v2.16b, v0.16b
272         tbx             v0.16b, {v1.16b}, v4.16b
273         tbl             v2.16b, {v2.16b}, v3.16b
274         decrypt_block   v0, w3, x2, x6, w7
275         eor             v0.16b, v0.16b, v5.16b          /* xor with iv */
277         add             x4, x0, x4
278         st1             {v2.16b}, [x4]                  /* overlapping stores */
279         st1             {v0.16b}, [x0]
280         ret
281 AES_ENDPROC(aes_cbc_cts_decrypt)
283         .section        ".rodata", "a"
284         .align          6
285 .Lcts_permute_table:
286         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
287         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
288         .byte            0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
289         .byte            0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
290         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
291         .byte           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
292         .previous
295         /*
296          * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
297          *                 int blocks, u8 ctr[])
298          */
300 AES_ENTRY(aes_ctr_encrypt)
301         stp             x29, x30, [sp, #-16]!
302         mov             x29, sp
304         enc_prepare     w3, x2, x6
305         ld1             {vctr.16b}, [x5]
307         umov            x6, vctr.d[1]           /* keep swabbed ctr in reg */
308         rev             x6, x6
309         cmn             w6, w4                  /* 32 bit overflow? */
310         bcs             .Lctrloop
311 .LctrloopNx:
312         subs            w4, w4, #MAX_STRIDE
313         bmi             .Lctr1x
314         add             w7, w6, #1
315         mov             v0.16b, vctr.16b
316         add             w8, w6, #2
317         mov             v1.16b, vctr.16b
318         add             w9, w6, #3
319         mov             v2.16b, vctr.16b
320         add             w9, w6, #3
321         rev             w7, w7
322         mov             v3.16b, vctr.16b
323         rev             w8, w8
324 ST5(    mov             v4.16b, vctr.16b                )
325         mov             v1.s[3], w7
326         rev             w9, w9
327 ST5(    add             w10, w6, #4                     )
328         mov             v2.s[3], w8
329 ST5(    rev             w10, w10                        )
330         mov             v3.s[3], w9
331 ST5(    mov             v4.s[3], w10                    )
332         ld1             {v5.16b-v7.16b}, [x1], #48      /* get 3 input blocks */
333 ST4(    bl              aes_encrypt_block4x             )
334 ST5(    bl              aes_encrypt_block5x             )
335         eor             v0.16b, v5.16b, v0.16b
336 ST4(    ld1             {v5.16b}, [x1], #16             )
337         eor             v1.16b, v6.16b, v1.16b
338 ST5(    ld1             {v5.16b-v6.16b}, [x1], #32      )
339         eor             v2.16b, v7.16b, v2.16b
340         eor             v3.16b, v5.16b, v3.16b
341 ST5(    eor             v4.16b, v6.16b, v4.16b          )
342         st1             {v0.16b-v3.16b}, [x0], #64
343 ST5(    st1             {v4.16b}, [x0], #16             )
344         add             x6, x6, #MAX_STRIDE
345         rev             x7, x6
346         ins             vctr.d[1], x7
347         cbz             w4, .Lctrout
348         b               .LctrloopNx
349 .Lctr1x:
350         adds            w4, w4, #MAX_STRIDE
351         beq             .Lctrout
352 .Lctrloop:
353         mov             v0.16b, vctr.16b
354         encrypt_block   v0, w3, x2, x8, w7
356         adds            x6, x6, #1              /* increment BE ctr */
357         rev             x7, x6
358         ins             vctr.d[1], x7
359         bcs             .Lctrcarry              /* overflow? */
361 .Lctrcarrydone:
362         subs            w4, w4, #1
363         bmi             .Lctrtailblock          /* blocks <0 means tail block */
364         ld1             {v3.16b}, [x1], #16
365         eor             v3.16b, v0.16b, v3.16b
366         st1             {v3.16b}, [x0], #16
367         bne             .Lctrloop
369 .Lctrout:
370         st1             {vctr.16b}, [x5]        /* return next CTR value */
371         ldp             x29, x30, [sp], #16
372         ret
374 .Lctrtailblock:
375         st1             {v0.16b}, [x0]
376         b               .Lctrout
378 .Lctrcarry:
379         umov            x7, vctr.d[0]           /* load upper word of ctr  */
380         rev             x7, x7                  /* ... to handle the carry */
381         add             x7, x7, #1
382         rev             x7, x7
383         ins             vctr.d[0], x7
384         b               .Lctrcarrydone
385 AES_ENDPROC(aes_ctr_encrypt)
388         /*
389          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
390          *                 int blocks, u8 const rk2[], u8 iv[], int first)
391          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
392          *                 int blocks, u8 const rk2[], u8 iv[], int first)
393          */
395         .macro          next_tweak, out, in, tmp
396         sshr            \tmp\().2d,  \in\().2d,   #63
397         and             \tmp\().16b, \tmp\().16b, xtsmask.16b
398         add             \out\().2d,  \in\().2d,   \in\().2d
399         ext             \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
400         eor             \out\().16b, \out\().16b, \tmp\().16b
401         .endm
403         .macro          xts_load_mask, tmp
404         movi            xtsmask.2s, #0x1
405         movi            \tmp\().2s, #0x87
406         uzp1            xtsmask.4s, xtsmask.4s, \tmp\().4s
407         .endm
409 AES_ENTRY(aes_xts_encrypt)
410         stp             x29, x30, [sp, #-16]!
411         mov             x29, sp
413         ld1             {v4.16b}, [x6]
414         xts_load_mask   v8
415         cbz             w7, .Lxtsencnotfirst
417         enc_prepare     w3, x5, x8
418         encrypt_block   v4, w3, x5, x8, w7              /* first tweak */
419         enc_switch_key  w3, x2, x8
420         b               .LxtsencNx
422 .Lxtsencnotfirst:
423         enc_prepare     w3, x2, x8
424 .LxtsencloopNx:
425         next_tweak      v4, v4, v8
426 .LxtsencNx:
427         subs            w4, w4, #4
428         bmi             .Lxtsenc1x
429         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
430         next_tweak      v5, v4, v8
431         eor             v0.16b, v0.16b, v4.16b
432         next_tweak      v6, v5, v8
433         eor             v1.16b, v1.16b, v5.16b
434         eor             v2.16b, v2.16b, v6.16b
435         next_tweak      v7, v6, v8
436         eor             v3.16b, v3.16b, v7.16b
437         bl              aes_encrypt_block4x
438         eor             v3.16b, v3.16b, v7.16b
439         eor             v0.16b, v0.16b, v4.16b
440         eor             v1.16b, v1.16b, v5.16b
441         eor             v2.16b, v2.16b, v6.16b
442         st1             {v0.16b-v3.16b}, [x0], #64
443         mov             v4.16b, v7.16b
444         cbz             w4, .Lxtsencout
445         xts_reload_mask v8
446         b               .LxtsencloopNx
447 .Lxtsenc1x:
448         adds            w4, w4, #4
449         beq             .Lxtsencout
450 .Lxtsencloop:
451         ld1             {v1.16b}, [x1], #16
452         eor             v0.16b, v1.16b, v4.16b
453         encrypt_block   v0, w3, x2, x8, w7
454         eor             v0.16b, v0.16b, v4.16b
455         st1             {v0.16b}, [x0], #16
456         subs            w4, w4, #1
457         beq             .Lxtsencout
458         next_tweak      v4, v4, v8
459         b               .Lxtsencloop
460 .Lxtsencout:
461         st1             {v4.16b}, [x6]
462         ldp             x29, x30, [sp], #16
463         ret
464 AES_ENDPROC(aes_xts_encrypt)
467 AES_ENTRY(aes_xts_decrypt)
468         stp             x29, x30, [sp, #-16]!
469         mov             x29, sp
471         ld1             {v4.16b}, [x6]
472         xts_load_mask   v8
473         cbz             w7, .Lxtsdecnotfirst
475         enc_prepare     w3, x5, x8
476         encrypt_block   v4, w3, x5, x8, w7              /* first tweak */
477         dec_prepare     w3, x2, x8
478         b               .LxtsdecNx
480 .Lxtsdecnotfirst:
481         dec_prepare     w3, x2, x8
482 .LxtsdecloopNx:
483         next_tweak      v4, v4, v8
484 .LxtsdecNx:
485         subs            w4, w4, #4
486         bmi             .Lxtsdec1x
487         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
488         next_tweak      v5, v4, v8
489         eor             v0.16b, v0.16b, v4.16b
490         next_tweak      v6, v5, v8
491         eor             v1.16b, v1.16b, v5.16b
492         eor             v2.16b, v2.16b, v6.16b
493         next_tweak      v7, v6, v8
494         eor             v3.16b, v3.16b, v7.16b
495         bl              aes_decrypt_block4x
496         eor             v3.16b, v3.16b, v7.16b
497         eor             v0.16b, v0.16b, v4.16b
498         eor             v1.16b, v1.16b, v5.16b
499         eor             v2.16b, v2.16b, v6.16b
500         st1             {v0.16b-v3.16b}, [x0], #64
501         mov             v4.16b, v7.16b
502         cbz             w4, .Lxtsdecout
503         xts_reload_mask v8
504         b               .LxtsdecloopNx
505 .Lxtsdec1x:
506         adds            w4, w4, #4
507         beq             .Lxtsdecout
508 .Lxtsdecloop:
509         ld1             {v1.16b}, [x1], #16
510         eor             v0.16b, v1.16b, v4.16b
511         decrypt_block   v0, w3, x2, x8, w7
512         eor             v0.16b, v0.16b, v4.16b
513         st1             {v0.16b}, [x0], #16
514         subs            w4, w4, #1
515         beq             .Lxtsdecout
516         next_tweak      v4, v4, v8
517         b               .Lxtsdecloop
518 .Lxtsdecout:
519         st1             {v4.16b}, [x6]
520         ldp             x29, x30, [sp], #16
521         ret
522 AES_ENDPROC(aes_xts_decrypt)
524         /*
525          * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
526          *                int blocks, u8 dg[], int enc_before, int enc_after)
527          */
528 AES_ENTRY(aes_mac_update)
529         frame_push      6
531         mov             x19, x0
532         mov             x20, x1
533         mov             x21, x2
534         mov             x22, x3
535         mov             x23, x4
536         mov             x24, x6
538         ld1             {v0.16b}, [x23]                 /* get dg */
539         enc_prepare     w2, x1, x7
540         cbz             w5, .Lmacloop4x
542         encrypt_block   v0, w2, x1, x7, w8
544 .Lmacloop4x:
545         subs            w22, w22, #4
546         bmi             .Lmac1x
547         ld1             {v1.16b-v4.16b}, [x19], #64     /* get next pt block */
548         eor             v0.16b, v0.16b, v1.16b          /* ..and xor with dg */
549         encrypt_block   v0, w21, x20, x7, w8
550         eor             v0.16b, v0.16b, v2.16b
551         encrypt_block   v0, w21, x20, x7, w8
552         eor             v0.16b, v0.16b, v3.16b
553         encrypt_block   v0, w21, x20, x7, w8
554         eor             v0.16b, v0.16b, v4.16b
555         cmp             w22, wzr
556         csinv           x5, x24, xzr, eq
557         cbz             w5, .Lmacout
558         encrypt_block   v0, w21, x20, x7, w8
559         st1             {v0.16b}, [x23]                 /* return dg */
560         cond_yield_neon .Lmacrestart
561         b               .Lmacloop4x
562 .Lmac1x:
563         add             w22, w22, #4
564 .Lmacloop:
565         cbz             w22, .Lmacout
566         ld1             {v1.16b}, [x19], #16            /* get next pt block */
567         eor             v0.16b, v0.16b, v1.16b          /* ..and xor with dg */
569         subs            w22, w22, #1
570         csinv           x5, x24, xzr, eq
571         cbz             w5, .Lmacout
573 .Lmacenc:
574         encrypt_block   v0, w21, x20, x7, w8
575         b               .Lmacloop
577 .Lmacout:
578         st1             {v0.16b}, [x23]                 /* return dg */
579         frame_pop
580         ret
582 .Lmacrestart:
583         ld1             {v0.16b}, [x23]                 /* get dg */
584         enc_prepare     w21, x20, x0
585         b               .Lmacloop4x
586 AES_ENDPROC(aes_mac_update)