Linux 4.18.10
[linux/fpc-iii.git] / arch / arm64 / crypto / aes-modes.S
blob483a7130cf0e118de591837a067c4a489ca12a5e
1 /*
2  * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
3  *
4  * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
11 /* included by aes-ce.S and aes-neon.S */
13         .text
14         .align          4
16 aes_encrypt_block4x:
17         encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
18         ret
19 ENDPROC(aes_encrypt_block4x)
21 aes_decrypt_block4x:
22         decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
23         ret
24 ENDPROC(aes_decrypt_block4x)
26         /*
27          * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
28          *                 int blocks)
29          * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
30          *                 int blocks)
31          */
33 AES_ENTRY(aes_ecb_encrypt)
34         frame_push      5
36         mov             x19, x0
37         mov             x20, x1
38         mov             x21, x2
39         mov             x22, x3
40         mov             x23, x4
42 .Lecbencrestart:
43         enc_prepare     w22, x21, x5
45 .LecbencloopNx:
46         subs            w23, w23, #4
47         bmi             .Lecbenc1x
48         ld1             {v0.16b-v3.16b}, [x20], #64     /* get 4 pt blocks */
49         bl              aes_encrypt_block4x
50         st1             {v0.16b-v3.16b}, [x19], #64
51         cond_yield_neon .Lecbencrestart
52         b               .LecbencloopNx
53 .Lecbenc1x:
54         adds            w23, w23, #4
55         beq             .Lecbencout
56 .Lecbencloop:
57         ld1             {v0.16b}, [x20], #16            /* get next pt block */
58         encrypt_block   v0, w22, x21, x5, w6
59         st1             {v0.16b}, [x19], #16
60         subs            w23, w23, #1
61         bne             .Lecbencloop
62 .Lecbencout:
63         frame_pop
64         ret
65 AES_ENDPROC(aes_ecb_encrypt)
68 AES_ENTRY(aes_ecb_decrypt)
69         frame_push      5
71         mov             x19, x0
72         mov             x20, x1
73         mov             x21, x2
74         mov             x22, x3
75         mov             x23, x4
77 .Lecbdecrestart:
78         dec_prepare     w22, x21, x5
80 .LecbdecloopNx:
81         subs            w23, w23, #4
82         bmi             .Lecbdec1x
83         ld1             {v0.16b-v3.16b}, [x20], #64     /* get 4 ct blocks */
84         bl              aes_decrypt_block4x
85         st1             {v0.16b-v3.16b}, [x19], #64
86         cond_yield_neon .Lecbdecrestart
87         b               .LecbdecloopNx
88 .Lecbdec1x:
89         adds            w23, w23, #4
90         beq             .Lecbdecout
91 .Lecbdecloop:
92         ld1             {v0.16b}, [x20], #16            /* get next ct block */
93         decrypt_block   v0, w22, x21, x5, w6
94         st1             {v0.16b}, [x19], #16
95         subs            w23, w23, #1
96         bne             .Lecbdecloop
97 .Lecbdecout:
98         frame_pop
99         ret
100 AES_ENDPROC(aes_ecb_decrypt)
103         /*
104          * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
105          *                 int blocks, u8 iv[])
106          * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
107          *                 int blocks, u8 iv[])
108          */
110 AES_ENTRY(aes_cbc_encrypt)
111         frame_push      6
113         mov             x19, x0
114         mov             x20, x1
115         mov             x21, x2
116         mov             x22, x3
117         mov             x23, x4
118         mov             x24, x5
120 .Lcbcencrestart:
121         ld1             {v4.16b}, [x24]                 /* get iv */
122         enc_prepare     w22, x21, x6
124 .Lcbcencloop4x:
125         subs            w23, w23, #4
126         bmi             .Lcbcenc1x
127         ld1             {v0.16b-v3.16b}, [x20], #64     /* get 4 pt blocks */
128         eor             v0.16b, v0.16b, v4.16b          /* ..and xor with iv */
129         encrypt_block   v0, w22, x21, x6, w7
130         eor             v1.16b, v1.16b, v0.16b
131         encrypt_block   v1, w22, x21, x6, w7
132         eor             v2.16b, v2.16b, v1.16b
133         encrypt_block   v2, w22, x21, x6, w7
134         eor             v3.16b, v3.16b, v2.16b
135         encrypt_block   v3, w22, x21, x6, w7
136         st1             {v0.16b-v3.16b}, [x19], #64
137         mov             v4.16b, v3.16b
138         st1             {v4.16b}, [x24]                 /* return iv */
139         cond_yield_neon .Lcbcencrestart
140         b               .Lcbcencloop4x
141 .Lcbcenc1x:
142         adds            w23, w23, #4
143         beq             .Lcbcencout
144 .Lcbcencloop:
145         ld1             {v0.16b}, [x20], #16            /* get next pt block */
146         eor             v4.16b, v4.16b, v0.16b          /* ..and xor with iv */
147         encrypt_block   v4, w22, x21, x6, w7
148         st1             {v4.16b}, [x19], #16
149         subs            w23, w23, #1
150         bne             .Lcbcencloop
151 .Lcbcencout:
152         st1             {v4.16b}, [x24]                 /* return iv */
153         frame_pop
154         ret
155 AES_ENDPROC(aes_cbc_encrypt)
158 AES_ENTRY(aes_cbc_decrypt)
159         frame_push      6
161         mov             x19, x0
162         mov             x20, x1
163         mov             x21, x2
164         mov             x22, x3
165         mov             x23, x4
166         mov             x24, x5
168 .Lcbcdecrestart:
169         ld1             {v7.16b}, [x24]                 /* get iv */
170         dec_prepare     w22, x21, x6
172 .LcbcdecloopNx:
173         subs            w23, w23, #4
174         bmi             .Lcbcdec1x
175         ld1             {v0.16b-v3.16b}, [x20], #64     /* get 4 ct blocks */
176         mov             v4.16b, v0.16b
177         mov             v5.16b, v1.16b
178         mov             v6.16b, v2.16b
179         bl              aes_decrypt_block4x
180         sub             x20, x20, #16
181         eor             v0.16b, v0.16b, v7.16b
182         eor             v1.16b, v1.16b, v4.16b
183         ld1             {v7.16b}, [x20], #16            /* reload 1 ct block */
184         eor             v2.16b, v2.16b, v5.16b
185         eor             v3.16b, v3.16b, v6.16b
186         st1             {v0.16b-v3.16b}, [x19], #64
187         st1             {v7.16b}, [x24]                 /* return iv */
188         cond_yield_neon .Lcbcdecrestart
189         b               .LcbcdecloopNx
190 .Lcbcdec1x:
191         adds            w23, w23, #4
192         beq             .Lcbcdecout
193 .Lcbcdecloop:
194         ld1             {v1.16b}, [x20], #16            /* get next ct block */
195         mov             v0.16b, v1.16b                  /* ...and copy to v0 */
196         decrypt_block   v0, w22, x21, x6, w7
197         eor             v0.16b, v0.16b, v7.16b          /* xor with iv => pt */
198         mov             v7.16b, v1.16b                  /* ct is next iv */
199         st1             {v0.16b}, [x19], #16
200         subs            w23, w23, #1
201         bne             .Lcbcdecloop
202 .Lcbcdecout:
203         st1             {v7.16b}, [x24]                 /* return iv */
204         frame_pop
205         ret
206 AES_ENDPROC(aes_cbc_decrypt)
209         /*
210          * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
211          *                 int blocks, u8 ctr[])
212          */
214 AES_ENTRY(aes_ctr_encrypt)
215         frame_push      6
217         mov             x19, x0
218         mov             x20, x1
219         mov             x21, x2
220         mov             x22, x3
221         mov             x23, x4
222         mov             x24, x5
224 .Lctrrestart:
225         enc_prepare     w22, x21, x6
226         ld1             {v4.16b}, [x24]
228         umov            x6, v4.d[1]             /* keep swabbed ctr in reg */
229         rev             x6, x6
230 .LctrloopNx:
231         subs            w23, w23, #4
232         bmi             .Lctr1x
233         cmn             w6, #4                  /* 32 bit overflow? */
234         bcs             .Lctr1x
235         ldr             q8, =0x30000000200000001        /* addends 1,2,3[,0] */
236         dup             v7.4s, w6
237         mov             v0.16b, v4.16b
238         add             v7.4s, v7.4s, v8.4s
239         mov             v1.16b, v4.16b
240         rev32           v8.16b, v7.16b
241         mov             v2.16b, v4.16b
242         mov             v3.16b, v4.16b
243         mov             v1.s[3], v8.s[0]
244         mov             v2.s[3], v8.s[1]
245         mov             v3.s[3], v8.s[2]
246         ld1             {v5.16b-v7.16b}, [x20], #48     /* get 3 input blocks */
247         bl              aes_encrypt_block4x
248         eor             v0.16b, v5.16b, v0.16b
249         ld1             {v5.16b}, [x20], #16            /* get 1 input block  */
250         eor             v1.16b, v6.16b, v1.16b
251         eor             v2.16b, v7.16b, v2.16b
252         eor             v3.16b, v5.16b, v3.16b
253         st1             {v0.16b-v3.16b}, [x19], #64
254         add             x6, x6, #4
255         rev             x7, x6
256         ins             v4.d[1], x7
257         cbz             w23, .Lctrout
258         st1             {v4.16b}, [x24]         /* return next CTR value */
259         cond_yield_neon .Lctrrestart
260         b               .LctrloopNx
261 .Lctr1x:
262         adds            w23, w23, #4
263         beq             .Lctrout
264 .Lctrloop:
265         mov             v0.16b, v4.16b
266         encrypt_block   v0, w22, x21, x8, w7
268         adds            x6, x6, #1              /* increment BE ctr */
269         rev             x7, x6
270         ins             v4.d[1], x7
271         bcs             .Lctrcarry              /* overflow? */
273 .Lctrcarrydone:
274         subs            w23, w23, #1
275         bmi             .Lctrtailblock          /* blocks <0 means tail block */
276         ld1             {v3.16b}, [x20], #16
277         eor             v3.16b, v0.16b, v3.16b
278         st1             {v3.16b}, [x19], #16
279         bne             .Lctrloop
281 .Lctrout:
282         st1             {v4.16b}, [x24]         /* return next CTR value */
283 .Lctrret:
284         frame_pop
285         ret
287 .Lctrtailblock:
288         st1             {v0.16b}, [x19]
289         b               .Lctrret
291 .Lctrcarry:
292         umov            x7, v4.d[0]             /* load upper word of ctr  */
293         rev             x7, x7                  /* ... to handle the carry */
294         add             x7, x7, #1
295         rev             x7, x7
296         ins             v4.d[0], x7
297         b               .Lctrcarrydone
298 AES_ENDPROC(aes_ctr_encrypt)
299         .ltorg
302         /*
303          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
304          *                 int blocks, u8 const rk2[], u8 iv[], int first)
305          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
306          *                 int blocks, u8 const rk2[], u8 iv[], int first)
307          */
309         .macro          next_tweak, out, in, const, tmp
310         sshr            \tmp\().2d,  \in\().2d,   #63
311         and             \tmp\().16b, \tmp\().16b, \const\().16b
312         add             \out\().2d,  \in\().2d,   \in\().2d
313         ext             \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
314         eor             \out\().16b, \out\().16b, \tmp\().16b
315         .endm
317 .Lxts_mul_x:
318 CPU_LE( .quad           1, 0x87         )
319 CPU_BE( .quad           0x87, 1         )
321 AES_ENTRY(aes_xts_encrypt)
322         frame_push      6
324         mov             x19, x0
325         mov             x20, x1
326         mov             x21, x2
327         mov             x22, x3
328         mov             x23, x4
329         mov             x24, x6
331         ld1             {v4.16b}, [x24]
332         cbz             w7, .Lxtsencnotfirst
334         enc_prepare     w3, x5, x8
335         encrypt_block   v4, w3, x5, x8, w7              /* first tweak */
336         enc_switch_key  w3, x2, x8
337         ldr             q7, .Lxts_mul_x
338         b               .LxtsencNx
340 .Lxtsencrestart:
341         ld1             {v4.16b}, [x24]
342 .Lxtsencnotfirst:
343         enc_prepare     w22, x21, x8
344 .LxtsencloopNx:
345         ldr             q7, .Lxts_mul_x
346         next_tweak      v4, v4, v7, v8
347 .LxtsencNx:
348         subs            w23, w23, #4
349         bmi             .Lxtsenc1x
350         ld1             {v0.16b-v3.16b}, [x20], #64     /* get 4 pt blocks */
351         next_tweak      v5, v4, v7, v8
352         eor             v0.16b, v0.16b, v4.16b
353         next_tweak      v6, v5, v7, v8
354         eor             v1.16b, v1.16b, v5.16b
355         eor             v2.16b, v2.16b, v6.16b
356         next_tweak      v7, v6, v7, v8
357         eor             v3.16b, v3.16b, v7.16b
358         bl              aes_encrypt_block4x
359         eor             v3.16b, v3.16b, v7.16b
360         eor             v0.16b, v0.16b, v4.16b
361         eor             v1.16b, v1.16b, v5.16b
362         eor             v2.16b, v2.16b, v6.16b
363         st1             {v0.16b-v3.16b}, [x19], #64
364         mov             v4.16b, v7.16b
365         cbz             w23, .Lxtsencout
366         st1             {v4.16b}, [x24]
367         cond_yield_neon .Lxtsencrestart
368         b               .LxtsencloopNx
369 .Lxtsenc1x:
370         adds            w23, w23, #4
371         beq             .Lxtsencout
372 .Lxtsencloop:
373         ld1             {v1.16b}, [x20], #16
374         eor             v0.16b, v1.16b, v4.16b
375         encrypt_block   v0, w22, x21, x8, w7
376         eor             v0.16b, v0.16b, v4.16b
377         st1             {v0.16b}, [x19], #16
378         subs            w23, w23, #1
379         beq             .Lxtsencout
380         next_tweak      v4, v4, v7, v8
381         b               .Lxtsencloop
382 .Lxtsencout:
383         st1             {v4.16b}, [x24]
384         frame_pop
385         ret
386 AES_ENDPROC(aes_xts_encrypt)
389 AES_ENTRY(aes_xts_decrypt)
390         frame_push      6
392         mov             x19, x0
393         mov             x20, x1
394         mov             x21, x2
395         mov             x22, x3
396         mov             x23, x4
397         mov             x24, x6
399         ld1             {v4.16b}, [x24]
400         cbz             w7, .Lxtsdecnotfirst
402         enc_prepare     w3, x5, x8
403         encrypt_block   v4, w3, x5, x8, w7              /* first tweak */
404         dec_prepare     w3, x2, x8
405         ldr             q7, .Lxts_mul_x
406         b               .LxtsdecNx
408 .Lxtsdecrestart:
409         ld1             {v4.16b}, [x24]
410 .Lxtsdecnotfirst:
411         dec_prepare     w22, x21, x8
412 .LxtsdecloopNx:
413         ldr             q7, .Lxts_mul_x
414         next_tweak      v4, v4, v7, v8
415 .LxtsdecNx:
416         subs            w23, w23, #4
417         bmi             .Lxtsdec1x
418         ld1             {v0.16b-v3.16b}, [x20], #64     /* get 4 ct blocks */
419         next_tweak      v5, v4, v7, v8
420         eor             v0.16b, v0.16b, v4.16b
421         next_tweak      v6, v5, v7, v8
422         eor             v1.16b, v1.16b, v5.16b
423         eor             v2.16b, v2.16b, v6.16b
424         next_tweak      v7, v6, v7, v8
425         eor             v3.16b, v3.16b, v7.16b
426         bl              aes_decrypt_block4x
427         eor             v3.16b, v3.16b, v7.16b
428         eor             v0.16b, v0.16b, v4.16b
429         eor             v1.16b, v1.16b, v5.16b
430         eor             v2.16b, v2.16b, v6.16b
431         st1             {v0.16b-v3.16b}, [x19], #64
432         mov             v4.16b, v7.16b
433         cbz             w23, .Lxtsdecout
434         st1             {v4.16b}, [x24]
435         cond_yield_neon .Lxtsdecrestart
436         b               .LxtsdecloopNx
437 .Lxtsdec1x:
438         adds            w23, w23, #4
439         beq             .Lxtsdecout
440 .Lxtsdecloop:
441         ld1             {v1.16b}, [x20], #16
442         eor             v0.16b, v1.16b, v4.16b
443         decrypt_block   v0, w22, x21, x8, w7
444         eor             v0.16b, v0.16b, v4.16b
445         st1             {v0.16b}, [x19], #16
446         subs            w23, w23, #1
447         beq             .Lxtsdecout
448         next_tweak      v4, v4, v7, v8
449         b               .Lxtsdecloop
450 .Lxtsdecout:
451         st1             {v4.16b}, [x24]
452         frame_pop
453         ret
454 AES_ENDPROC(aes_xts_decrypt)
456         /*
457          * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
458          *                int blocks, u8 dg[], int enc_before, int enc_after)
459          */
460 AES_ENTRY(aes_mac_update)
461         frame_push      6
463         mov             x19, x0
464         mov             x20, x1
465         mov             x21, x2
466         mov             x22, x3
467         mov             x23, x4
468         mov             x24, x6
470         ld1             {v0.16b}, [x23]                 /* get dg */
471         enc_prepare     w2, x1, x7
472         cbz             w5, .Lmacloop4x
474         encrypt_block   v0, w2, x1, x7, w8
476 .Lmacloop4x:
477         subs            w22, w22, #4
478         bmi             .Lmac1x
479         ld1             {v1.16b-v4.16b}, [x19], #64     /* get next pt block */
480         eor             v0.16b, v0.16b, v1.16b          /* ..and xor with dg */
481         encrypt_block   v0, w21, x20, x7, w8
482         eor             v0.16b, v0.16b, v2.16b
483         encrypt_block   v0, w21, x20, x7, w8
484         eor             v0.16b, v0.16b, v3.16b
485         encrypt_block   v0, w21, x20, x7, w8
486         eor             v0.16b, v0.16b, v4.16b
487         cmp             w22, wzr
488         csinv           x5, x24, xzr, eq
489         cbz             w5, .Lmacout
490         encrypt_block   v0, w21, x20, x7, w8
491         st1             {v0.16b}, [x23]                 /* return dg */
492         cond_yield_neon .Lmacrestart
493         b               .Lmacloop4x
494 .Lmac1x:
495         add             w22, w22, #4
496 .Lmacloop:
497         cbz             w22, .Lmacout
498         ld1             {v1.16b}, [x19], #16            /* get next pt block */
499         eor             v0.16b, v0.16b, v1.16b          /* ..and xor with dg */
501         subs            w22, w22, #1
502         csinv           x5, x24, xzr, eq
503         cbz             w5, .Lmacout
505 .Lmacenc:
506         encrypt_block   v0, w21, x20, x7, w8
507         b               .Lmacloop
509 .Lmacout:
510         st1             {v0.16b}, [x23]                 /* return dg */
511         frame_pop
512         ret
514 .Lmacrestart:
515         ld1             {v0.16b}, [x23]                 /* get dg */
516         enc_prepare     w21, x20, x0
517         b               .Lmacloop4x
518 AES_ENDPROC(aes_mac_update)