mm: hugetlb: fix hugepage memory leak caused by wrong reserve count
[linux/fpc-iii.git] / arch / arm64 / crypto / aes-modes.S
blobf6e372c528eb438b6517a236315afeb1694a8002
1 /*
2  * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
3  *
4  * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
11 /* included by aes-ce.S and aes-neon.S */
13         .text
14         .align          4
17  * There are several ways to instantiate this code:
18  * - no interleave, all inline
19  * - 2-way interleave, 2x calls out of line (-DINTERLEAVE=2)
20  * - 2-way interleave, all inline (-DINTERLEAVE=2 -DINTERLEAVE_INLINE)
21  * - 4-way interleave, 4x calls out of line (-DINTERLEAVE=4)
22  * - 4-way interleave, all inline (-DINTERLEAVE=4 -DINTERLEAVE_INLINE)
23  *
24  * Macros imported by this code:
25  * - enc_prepare        - setup NEON registers for encryption
26  * - dec_prepare        - setup NEON registers for decryption
27  * - enc_switch_key     - change to new key after having prepared for encryption
28  * - encrypt_block      - encrypt a single block
29  * - decrypt block      - decrypt a single block
30  * - encrypt_block2x    - encrypt 2 blocks in parallel (if INTERLEAVE == 2)
31  * - decrypt_block2x    - decrypt 2 blocks in parallel (if INTERLEAVE == 2)
32  * - encrypt_block4x    - encrypt 4 blocks in parallel (if INTERLEAVE == 4)
33  * - decrypt_block4x    - decrypt 4 blocks in parallel (if INTERLEAVE == 4)
34  */
36 #if defined(INTERLEAVE) && !defined(INTERLEAVE_INLINE)
37 #define FRAME_PUSH      stp x29, x30, [sp,#-16]! ; mov x29, sp
38 #define FRAME_POP       ldp x29, x30, [sp],#16
40 #if INTERLEAVE == 2
42 aes_encrypt_block2x:
43         encrypt_block2x v0, v1, w3, x2, x6, w7
44         ret
45 ENDPROC(aes_encrypt_block2x)
47 aes_decrypt_block2x:
48         decrypt_block2x v0, v1, w3, x2, x6, w7
49         ret
50 ENDPROC(aes_decrypt_block2x)
52 #elif INTERLEAVE == 4
54 aes_encrypt_block4x:
55         encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
56         ret
57 ENDPROC(aes_encrypt_block4x)
59 aes_decrypt_block4x:
60         decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
61         ret
62 ENDPROC(aes_decrypt_block4x)
64 #else
65 #error INTERLEAVE should equal 2 or 4
66 #endif
68         .macro          do_encrypt_block2x
69         bl              aes_encrypt_block2x
70         .endm
72         .macro          do_decrypt_block2x
73         bl              aes_decrypt_block2x
74         .endm
76         .macro          do_encrypt_block4x
77         bl              aes_encrypt_block4x
78         .endm
80         .macro          do_decrypt_block4x
81         bl              aes_decrypt_block4x
82         .endm
84 #else
85 #define FRAME_PUSH
86 #define FRAME_POP
88         .macro          do_encrypt_block2x
89         encrypt_block2x v0, v1, w3, x2, x6, w7
90         .endm
92         .macro          do_decrypt_block2x
93         decrypt_block2x v0, v1, w3, x2, x6, w7
94         .endm
96         .macro          do_encrypt_block4x
97         encrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
98         .endm
100         .macro          do_decrypt_block4x
101         decrypt_block4x v0, v1, v2, v3, w3, x2, x6, w7
102         .endm
104 #endif
106         /*
107          * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
108          *                 int blocks, int first)
109          * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
110          *                 int blocks, int first)
111          */
113 AES_ENTRY(aes_ecb_encrypt)
114         FRAME_PUSH
115         cbz             w5, .LecbencloopNx
117         enc_prepare     w3, x2, x5
119 .LecbencloopNx:
120 #if INTERLEAVE >= 2
121         subs            w4, w4, #INTERLEAVE
122         bmi             .Lecbenc1x
123 #if INTERLEAVE == 2
124         ld1             {v0.16b-v1.16b}, [x1], #32      /* get 2 pt blocks */
125         do_encrypt_block2x
126         st1             {v0.16b-v1.16b}, [x0], #32
127 #else
128         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
129         do_encrypt_block4x
130         st1             {v0.16b-v3.16b}, [x0], #64
131 #endif
132         b               .LecbencloopNx
133 .Lecbenc1x:
134         adds            w4, w4, #INTERLEAVE
135         beq             .Lecbencout
136 #endif
137 .Lecbencloop:
138         ld1             {v0.16b}, [x1], #16             /* get next pt block */
139         encrypt_block   v0, w3, x2, x5, w6
140         st1             {v0.16b}, [x0], #16
141         subs            w4, w4, #1
142         bne             .Lecbencloop
143 .Lecbencout:
144         FRAME_POP
145         ret
146 AES_ENDPROC(aes_ecb_encrypt)
149 AES_ENTRY(aes_ecb_decrypt)
150         FRAME_PUSH
151         cbz             w5, .LecbdecloopNx
153         dec_prepare     w3, x2, x5
155 .LecbdecloopNx:
156 #if INTERLEAVE >= 2
157         subs            w4, w4, #INTERLEAVE
158         bmi             .Lecbdec1x
159 #if INTERLEAVE == 2
160         ld1             {v0.16b-v1.16b}, [x1], #32      /* get 2 ct blocks */
161         do_decrypt_block2x
162         st1             {v0.16b-v1.16b}, [x0], #32
163 #else
164         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
165         do_decrypt_block4x
166         st1             {v0.16b-v3.16b}, [x0], #64
167 #endif
168         b               .LecbdecloopNx
169 .Lecbdec1x:
170         adds            w4, w4, #INTERLEAVE
171         beq             .Lecbdecout
172 #endif
173 .Lecbdecloop:
174         ld1             {v0.16b}, [x1], #16             /* get next ct block */
175         decrypt_block   v0, w3, x2, x5, w6
176         st1             {v0.16b}, [x0], #16
177         subs            w4, w4, #1
178         bne             .Lecbdecloop
179 .Lecbdecout:
180         FRAME_POP
181         ret
182 AES_ENDPROC(aes_ecb_decrypt)
185         /*
186          * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
187          *                 int blocks, u8 iv[], int first)
188          * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
189          *                 int blocks, u8 iv[], int first)
190          */
192 AES_ENTRY(aes_cbc_encrypt)
193         cbz             w6, .Lcbcencloop
195         ld1             {v0.16b}, [x5]                  /* get iv */
196         enc_prepare     w3, x2, x5
198 .Lcbcencloop:
199         ld1             {v1.16b}, [x1], #16             /* get next pt block */
200         eor             v0.16b, v0.16b, v1.16b          /* ..and xor with iv */
201         encrypt_block   v0, w3, x2, x5, w6
202         st1             {v0.16b}, [x0], #16
203         subs            w4, w4, #1
204         bne             .Lcbcencloop
205         ret
206 AES_ENDPROC(aes_cbc_encrypt)
209 AES_ENTRY(aes_cbc_decrypt)
210         FRAME_PUSH
211         cbz             w6, .LcbcdecloopNx
213         ld1             {v7.16b}, [x5]                  /* get iv */
214         dec_prepare     w3, x2, x5
216 .LcbcdecloopNx:
217 #if INTERLEAVE >= 2
218         subs            w4, w4, #INTERLEAVE
219         bmi             .Lcbcdec1x
220 #if INTERLEAVE == 2
221         ld1             {v0.16b-v1.16b}, [x1], #32      /* get 2 ct blocks */
222         mov             v2.16b, v0.16b
223         mov             v3.16b, v1.16b
224         do_decrypt_block2x
225         eor             v0.16b, v0.16b, v7.16b
226         eor             v1.16b, v1.16b, v2.16b
227         mov             v7.16b, v3.16b
228         st1             {v0.16b-v1.16b}, [x0], #32
229 #else
230         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
231         mov             v4.16b, v0.16b
232         mov             v5.16b, v1.16b
233         mov             v6.16b, v2.16b
234         do_decrypt_block4x
235         sub             x1, x1, #16
236         eor             v0.16b, v0.16b, v7.16b
237         eor             v1.16b, v1.16b, v4.16b
238         ld1             {v7.16b}, [x1], #16             /* reload 1 ct block */
239         eor             v2.16b, v2.16b, v5.16b
240         eor             v3.16b, v3.16b, v6.16b
241         st1             {v0.16b-v3.16b}, [x0], #64
242 #endif
243         b               .LcbcdecloopNx
244 .Lcbcdec1x:
245         adds            w4, w4, #INTERLEAVE
246         beq             .Lcbcdecout
247 #endif
248 .Lcbcdecloop:
249         ld1             {v1.16b}, [x1], #16             /* get next ct block */
250         mov             v0.16b, v1.16b                  /* ...and copy to v0 */
251         decrypt_block   v0, w3, x2, x5, w6
252         eor             v0.16b, v0.16b, v7.16b          /* xor with iv => pt */
253         mov             v7.16b, v1.16b                  /* ct is next iv */
254         st1             {v0.16b}, [x0], #16
255         subs            w4, w4, #1
256         bne             .Lcbcdecloop
257 .Lcbcdecout:
258         FRAME_POP
259         ret
260 AES_ENDPROC(aes_cbc_decrypt)
263         /*
264          * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
265          *                 int blocks, u8 ctr[], int first)
266          */
268 AES_ENTRY(aes_ctr_encrypt)
269         FRAME_PUSH
270         cbnz            w6, .Lctrfirst          /* 1st time around? */
271         umov            x5, v4.d[1]             /* keep swabbed ctr in reg */
272         rev             x5, x5
273 #if INTERLEAVE >= 2
274         cmn             w5, w4                  /* 32 bit overflow? */
275         bcs             .Lctrinc
276         add             x5, x5, #1              /* increment BE ctr */
277         b               .LctrincNx
278 #else
279         b               .Lctrinc
280 #endif
281 .Lctrfirst:
282         enc_prepare     w3, x2, x6
283         ld1             {v4.16b}, [x5]
284         umov            x5, v4.d[1]             /* keep swabbed ctr in reg */
285         rev             x5, x5
286 #if INTERLEAVE >= 2
287         cmn             w5, w4                  /* 32 bit overflow? */
288         bcs             .Lctrloop
289 .LctrloopNx:
290         subs            w4, w4, #INTERLEAVE
291         bmi             .Lctr1x
292 #if INTERLEAVE == 2
293         mov             v0.8b, v4.8b
294         mov             v1.8b, v4.8b
295         rev             x7, x5
296         add             x5, x5, #1
297         ins             v0.d[1], x7
298         rev             x7, x5
299         add             x5, x5, #1
300         ins             v1.d[1], x7
301         ld1             {v2.16b-v3.16b}, [x1], #32      /* get 2 input blocks */
302         do_encrypt_block2x
303         eor             v0.16b, v0.16b, v2.16b
304         eor             v1.16b, v1.16b, v3.16b
305         st1             {v0.16b-v1.16b}, [x0], #32
306 #else
307         ldr             q8, =0x30000000200000001        /* addends 1,2,3[,0] */
308         dup             v7.4s, w5
309         mov             v0.16b, v4.16b
310         add             v7.4s, v7.4s, v8.4s
311         mov             v1.16b, v4.16b
312         rev32           v8.16b, v7.16b
313         mov             v2.16b, v4.16b
314         mov             v3.16b, v4.16b
315         mov             v1.s[3], v8.s[0]
316         mov             v2.s[3], v8.s[1]
317         mov             v3.s[3], v8.s[2]
318         ld1             {v5.16b-v7.16b}, [x1], #48      /* get 3 input blocks */
319         do_encrypt_block4x
320         eor             v0.16b, v5.16b, v0.16b
321         ld1             {v5.16b}, [x1], #16             /* get 1 input block  */
322         eor             v1.16b, v6.16b, v1.16b
323         eor             v2.16b, v7.16b, v2.16b
324         eor             v3.16b, v5.16b, v3.16b
325         st1             {v0.16b-v3.16b}, [x0], #64
326         add             x5, x5, #INTERLEAVE
327 #endif
328         cbz             w4, .LctroutNx
329 .LctrincNx:
330         rev             x7, x5
331         ins             v4.d[1], x7
332         b               .LctrloopNx
333 .LctroutNx:
334         sub             x5, x5, #1
335         rev             x7, x5
336         ins             v4.d[1], x7
337         b               .Lctrout
338 .Lctr1x:
339         adds            w4, w4, #INTERLEAVE
340         beq             .Lctrout
341 #endif
342 .Lctrloop:
343         mov             v0.16b, v4.16b
344         encrypt_block   v0, w3, x2, x6, w7
345         subs            w4, w4, #1
346         bmi             .Lctrhalfblock          /* blocks < 0 means 1/2 block */
347         ld1             {v3.16b}, [x1], #16
348         eor             v3.16b, v0.16b, v3.16b
349         st1             {v3.16b}, [x0], #16
350         beq             .Lctrout
351 .Lctrinc:
352         adds            x5, x5, #1              /* increment BE ctr */
353         rev             x7, x5
354         ins             v4.d[1], x7
355         bcc             .Lctrloop               /* no overflow? */
356         umov            x7, v4.d[0]             /* load upper word of ctr  */
357         rev             x7, x7                  /* ... to handle the carry */
358         add             x7, x7, #1
359         rev             x7, x7
360         ins             v4.d[0], x7
361         b               .Lctrloop
362 .Lctrhalfblock:
363         ld1             {v3.8b}, [x1]
364         eor             v3.8b, v0.8b, v3.8b
365         st1             {v3.8b}, [x0]
366 .Lctrout:
367         FRAME_POP
368         ret
369 AES_ENDPROC(aes_ctr_encrypt)
370         .ltorg
373         /*
374          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
375          *                 int blocks, u8 const rk2[], u8 iv[], int first)
376          * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
377          *                 int blocks, u8 const rk2[], u8 iv[], int first)
378          */
380         .macro          next_tweak, out, in, const, tmp
381         sshr            \tmp\().2d,  \in\().2d,   #63
382         and             \tmp\().16b, \tmp\().16b, \const\().16b
383         add             \out\().2d,  \in\().2d,   \in\().2d
384         ext             \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
385         eor             \out\().16b, \out\().16b, \tmp\().16b
386         .endm
388 .Lxts_mul_x:
389         .word           1, 0, 0x87, 0
391 AES_ENTRY(aes_xts_encrypt)
392         FRAME_PUSH
393         cbz             w7, .LxtsencloopNx
395         ld1             {v4.16b}, [x6]
396         enc_prepare     w3, x5, x6
397         encrypt_block   v4, w3, x5, x6, w7              /* first tweak */
398         enc_switch_key  w3, x2, x6
399         ldr             q7, .Lxts_mul_x
400         b               .LxtsencNx
402 .LxtsencloopNx:
403         ldr             q7, .Lxts_mul_x
404         next_tweak      v4, v4, v7, v8
405 .LxtsencNx:
406 #if INTERLEAVE >= 2
407         subs            w4, w4, #INTERLEAVE
408         bmi             .Lxtsenc1x
409 #if INTERLEAVE == 2
410         ld1             {v0.16b-v1.16b}, [x1], #32      /* get 2 pt blocks */
411         next_tweak      v5, v4, v7, v8
412         eor             v0.16b, v0.16b, v4.16b
413         eor             v1.16b, v1.16b, v5.16b
414         do_encrypt_block2x
415         eor             v0.16b, v0.16b, v4.16b
416         eor             v1.16b, v1.16b, v5.16b
417         st1             {v0.16b-v1.16b}, [x0], #32
418         cbz             w4, .LxtsencoutNx
419         next_tweak      v4, v5, v7, v8
420         b               .LxtsencNx
421 .LxtsencoutNx:
422         mov             v4.16b, v5.16b
423         b               .Lxtsencout
424 #else
425         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 pt blocks */
426         next_tweak      v5, v4, v7, v8
427         eor             v0.16b, v0.16b, v4.16b
428         next_tweak      v6, v5, v7, v8
429         eor             v1.16b, v1.16b, v5.16b
430         eor             v2.16b, v2.16b, v6.16b
431         next_tweak      v7, v6, v7, v8
432         eor             v3.16b, v3.16b, v7.16b
433         do_encrypt_block4x
434         eor             v3.16b, v3.16b, v7.16b
435         eor             v0.16b, v0.16b, v4.16b
436         eor             v1.16b, v1.16b, v5.16b
437         eor             v2.16b, v2.16b, v6.16b
438         st1             {v0.16b-v3.16b}, [x0], #64
439         mov             v4.16b, v7.16b
440         cbz             w4, .Lxtsencout
441         b               .LxtsencloopNx
442 #endif
443 .Lxtsenc1x:
444         adds            w4, w4, #INTERLEAVE
445         beq             .Lxtsencout
446 #endif
447 .Lxtsencloop:
448         ld1             {v1.16b}, [x1], #16
449         eor             v0.16b, v1.16b, v4.16b
450         encrypt_block   v0, w3, x2, x6, w7
451         eor             v0.16b, v0.16b, v4.16b
452         st1             {v0.16b}, [x0], #16
453         subs            w4, w4, #1
454         beq             .Lxtsencout
455         next_tweak      v4, v4, v7, v8
456         b               .Lxtsencloop
457 .Lxtsencout:
458         FRAME_POP
459         ret
460 AES_ENDPROC(aes_xts_encrypt)
463 AES_ENTRY(aes_xts_decrypt)
464         FRAME_PUSH
465         cbz             w7, .LxtsdecloopNx
467         ld1             {v4.16b}, [x6]
468         enc_prepare     w3, x5, x6
469         encrypt_block   v4, w3, x5, x6, w7              /* first tweak */
470         dec_prepare     w3, x2, x6
471         ldr             q7, .Lxts_mul_x
472         b               .LxtsdecNx
474 .LxtsdecloopNx:
475         ldr             q7, .Lxts_mul_x
476         next_tweak      v4, v4, v7, v8
477 .LxtsdecNx:
478 #if INTERLEAVE >= 2
479         subs            w4, w4, #INTERLEAVE
480         bmi             .Lxtsdec1x
481 #if INTERLEAVE == 2
482         ld1             {v0.16b-v1.16b}, [x1], #32      /* get 2 ct blocks */
483         next_tweak      v5, v4, v7, v8
484         eor             v0.16b, v0.16b, v4.16b
485         eor             v1.16b, v1.16b, v5.16b
486         do_decrypt_block2x
487         eor             v0.16b, v0.16b, v4.16b
488         eor             v1.16b, v1.16b, v5.16b
489         st1             {v0.16b-v1.16b}, [x0], #32
490         cbz             w4, .LxtsdecoutNx
491         next_tweak      v4, v5, v7, v8
492         b               .LxtsdecNx
493 .LxtsdecoutNx:
494         mov             v4.16b, v5.16b
495         b               .Lxtsdecout
496 #else
497         ld1             {v0.16b-v3.16b}, [x1], #64      /* get 4 ct blocks */
498         next_tweak      v5, v4, v7, v8
499         eor             v0.16b, v0.16b, v4.16b
500         next_tweak      v6, v5, v7, v8
501         eor             v1.16b, v1.16b, v5.16b
502         eor             v2.16b, v2.16b, v6.16b
503         next_tweak      v7, v6, v7, v8
504         eor             v3.16b, v3.16b, v7.16b
505         do_decrypt_block4x
506         eor             v3.16b, v3.16b, v7.16b
507         eor             v0.16b, v0.16b, v4.16b
508         eor             v1.16b, v1.16b, v5.16b
509         eor             v2.16b, v2.16b, v6.16b
510         st1             {v0.16b-v3.16b}, [x0], #64
511         mov             v4.16b, v7.16b
512         cbz             w4, .Lxtsdecout
513         b               .LxtsdecloopNx
514 #endif
515 .Lxtsdec1x:
516         adds            w4, w4, #INTERLEAVE
517         beq             .Lxtsdecout
518 #endif
519 .Lxtsdecloop:
520         ld1             {v1.16b}, [x1], #16
521         eor             v0.16b, v1.16b, v4.16b
522         decrypt_block   v0, w3, x2, x6, w7
523         eor             v0.16b, v0.16b, v4.16b
524         st1             {v0.16b}, [x0], #16
525         subs            w4, w4, #1
526         beq             .Lxtsdecout
527         next_tweak      v4, v4, v7, v8
528         b               .Lxtsdecloop
529 .Lxtsdecout:
530         FRAME_POP
531         ret
532 AES_ENDPROC(aes_xts_decrypt)