2 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
4 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 /* included by aes-ce.S and aes-neon.S */
17 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
19 ENDPROC(aes_encrypt_block4x)
22 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
24 ENDPROC(aes_decrypt_block4x)
27 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
29 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
33 AES_ENTRY(aes_ecb_encrypt)
34 stp x29, x30, [sp, #-16]!
37 enc_prepare w3, x2, x5
42 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
43 bl aes_encrypt_block4x
44 st1 {v0.16b-v3.16b}, [x0], #64
50 ld1 {v0.16b}, [x1], #16 /* get next pt block */
51 encrypt_block v0, w3, x2, x5, w6
52 st1 {v0.16b}, [x0], #16
56 ldp x29, x30, [sp], #16
58 AES_ENDPROC(aes_ecb_encrypt)
61 AES_ENTRY(aes_ecb_decrypt)
62 stp x29, x30, [sp, #-16]!
65 dec_prepare w3, x2, x5
70 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
71 bl aes_decrypt_block4x
72 st1 {v0.16b-v3.16b}, [x0], #64
78 ld1 {v0.16b}, [x1], #16 /* get next ct block */
79 decrypt_block v0, w3, x2, x5, w6
80 st1 {v0.16b}, [x0], #16
84 ldp x29, x30, [sp], #16
86 AES_ENDPROC(aes_ecb_decrypt)
90 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
91 * int blocks, u8 iv[])
92 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
93 * int blocks, u8 iv[])
96 AES_ENTRY(aes_cbc_encrypt)
97 ld1 {v4.16b}, [x5] /* get iv */
98 enc_prepare w3, x2, x6
103 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
104 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
105 encrypt_block v0, w3, x2, x6, w7
106 eor v1.16b, v1.16b, v0.16b
107 encrypt_block v1, w3, x2, x6, w7
108 eor v2.16b, v2.16b, v1.16b
109 encrypt_block v2, w3, x2, x6, w7
110 eor v3.16b, v3.16b, v2.16b
111 encrypt_block v3, w3, x2, x6, w7
112 st1 {v0.16b-v3.16b}, [x0], #64
119 ld1 {v0.16b}, [x1], #16 /* get next pt block */
120 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
121 encrypt_block v4, w3, x2, x6, w7
122 st1 {v4.16b}, [x0], #16
126 st1 {v4.16b}, [x5] /* return iv */
128 AES_ENDPROC(aes_cbc_encrypt)
131 AES_ENTRY(aes_cbc_decrypt)
132 stp x29, x30, [sp, #-16]!
135 ld1 {v7.16b}, [x5] /* get iv */
136 dec_prepare w3, x2, x6
141 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
145 bl aes_decrypt_block4x
147 eor v0.16b, v0.16b, v7.16b
148 eor v1.16b, v1.16b, v4.16b
149 ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
150 eor v2.16b, v2.16b, v5.16b
151 eor v3.16b, v3.16b, v6.16b
152 st1 {v0.16b-v3.16b}, [x0], #64
158 ld1 {v1.16b}, [x1], #16 /* get next ct block */
159 mov v0.16b, v1.16b /* ...and copy to v0 */
160 decrypt_block v0, w3, x2, x6, w7
161 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
162 mov v7.16b, v1.16b /* ct is next iv */
163 st1 {v0.16b}, [x0], #16
167 st1 {v7.16b}, [x5] /* return iv */
168 ldp x29, x30, [sp], #16
170 AES_ENDPROC(aes_cbc_decrypt)
174 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
175 * int rounds, int bytes, u8 const iv[])
176 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
177 * int rounds, int bytes, u8 const iv[])
180 AES_ENTRY(aes_cbc_cts_encrypt)
181 adr_l x8, .Lcts_permute_table
189 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
192 ld1 {v5.16b}, [x5] /* get iv */
193 enc_prepare w3, x2, x6
195 eor v0.16b, v0.16b, v5.16b /* xor with iv */
196 tbl v1.16b, {v1.16b}, v4.16b
197 encrypt_block v0, w3, x2, x6, w7
199 eor v1.16b, v1.16b, v0.16b
200 tbl v0.16b, {v0.16b}, v3.16b
201 encrypt_block v1, w3, x2, x6, w7
204 st1 {v0.16b}, [x4] /* overlapping stores */
207 AES_ENDPROC(aes_cbc_cts_encrypt)
209 AES_ENTRY(aes_cbc_cts_decrypt)
210 adr_l x8, .Lcts_permute_table
218 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
221 ld1 {v5.16b}, [x5] /* get iv */
222 dec_prepare w3, x2, x6
224 tbl v2.16b, {v1.16b}, v4.16b
225 decrypt_block v0, w3, x2, x6, w7
226 eor v2.16b, v2.16b, v0.16b
228 tbx v0.16b, {v1.16b}, v4.16b
229 tbl v2.16b, {v2.16b}, v3.16b
230 decrypt_block v0, w3, x2, x6, w7
231 eor v0.16b, v0.16b, v5.16b /* xor with iv */
234 st1 {v2.16b}, [x4] /* overlapping stores */
237 AES_ENDPROC(aes_cbc_cts_decrypt)
239 .section ".rodata", "a"
242 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
243 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
244 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
245 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
246 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
247 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
252 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
253 * int blocks, u8 ctr[])
256 AES_ENTRY(aes_ctr_encrypt)
257 stp x29, x30, [sp, #-16]!
260 enc_prepare w3, x2, x6
263 umov x6, v4.d[1] /* keep swabbed ctr in reg */
265 cmn w6, w4 /* 32 bit overflow? */
283 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
284 bl aes_encrypt_block4x
285 eor v0.16b, v5.16b, v0.16b
286 ld1 {v5.16b}, [x1], #16 /* get 1 input block */
287 eor v1.16b, v6.16b, v1.16b
288 eor v2.16b, v7.16b, v2.16b
289 eor v3.16b, v5.16b, v3.16b
290 st1 {v0.16b-v3.16b}, [x0], #64
301 encrypt_block v0, w3, x2, x8, w7
303 adds x6, x6, #1 /* increment BE ctr */
306 bcs .Lctrcarry /* overflow? */
310 bmi .Lctrtailblock /* blocks <0 means tail block */
311 ld1 {v3.16b}, [x1], #16
312 eor v3.16b, v0.16b, v3.16b
313 st1 {v3.16b}, [x0], #16
317 st1 {v4.16b}, [x5] /* return next CTR value */
318 ldp x29, x30, [sp], #16
326 umov x7, v4.d[0] /* load upper word of ctr */
327 rev x7, x7 /* ... to handle the carry */
332 AES_ENDPROC(aes_ctr_encrypt)
336 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
337 * int blocks, u8 const rk2[], u8 iv[], int first)
338 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
339 * int blocks, u8 const rk2[], u8 iv[], int first)
342 .macro next_tweak, out, in, tmp
343 sshr \tmp\().2d, \in\().2d, #63
344 and \tmp\().16b, \tmp\().16b, xtsmask.16b
345 add \out\().2d, \in\().2d, \in\().2d
346 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
347 eor \out\().16b, \out\().16b, \tmp\().16b
350 .macro xts_load_mask, tmp
351 movi xtsmask.2s, #0x1
352 movi \tmp\().2s, #0x87
353 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
356 AES_ENTRY(aes_xts_encrypt)
357 stp x29, x30, [sp, #-16]!
362 cbz w7, .Lxtsencnotfirst
364 enc_prepare w3, x5, x8
365 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
366 enc_switch_key w3, x2, x8
370 enc_prepare w3, x2, x8
372 next_tweak v4, v4, v8
376 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
377 next_tweak v5, v4, v8
378 eor v0.16b, v0.16b, v4.16b
379 next_tweak v6, v5, v8
380 eor v1.16b, v1.16b, v5.16b
381 eor v2.16b, v2.16b, v6.16b
382 next_tweak v7, v6, v8
383 eor v3.16b, v3.16b, v7.16b
384 bl aes_encrypt_block4x
385 eor v3.16b, v3.16b, v7.16b
386 eor v0.16b, v0.16b, v4.16b
387 eor v1.16b, v1.16b, v5.16b
388 eor v2.16b, v2.16b, v6.16b
389 st1 {v0.16b-v3.16b}, [x0], #64
398 ld1 {v1.16b}, [x1], #16
399 eor v0.16b, v1.16b, v4.16b
400 encrypt_block v0, w3, x2, x8, w7
401 eor v0.16b, v0.16b, v4.16b
402 st1 {v0.16b}, [x0], #16
405 next_tweak v4, v4, v8
409 ldp x29, x30, [sp], #16
411 AES_ENDPROC(aes_xts_encrypt)
414 AES_ENTRY(aes_xts_decrypt)
415 stp x29, x30, [sp, #-16]!
420 cbz w7, .Lxtsdecnotfirst
422 enc_prepare w3, x5, x8
423 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
424 dec_prepare w3, x2, x8
428 dec_prepare w3, x2, x8
430 next_tweak v4, v4, v8
434 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
435 next_tweak v5, v4, v8
436 eor v0.16b, v0.16b, v4.16b
437 next_tweak v6, v5, v8
438 eor v1.16b, v1.16b, v5.16b
439 eor v2.16b, v2.16b, v6.16b
440 next_tweak v7, v6, v8
441 eor v3.16b, v3.16b, v7.16b
442 bl aes_decrypt_block4x
443 eor v3.16b, v3.16b, v7.16b
444 eor v0.16b, v0.16b, v4.16b
445 eor v1.16b, v1.16b, v5.16b
446 eor v2.16b, v2.16b, v6.16b
447 st1 {v0.16b-v3.16b}, [x0], #64
456 ld1 {v1.16b}, [x1], #16
457 eor v0.16b, v1.16b, v4.16b
458 decrypt_block v0, w3, x2, x8, w7
459 eor v0.16b, v0.16b, v4.16b
460 st1 {v0.16b}, [x0], #16
463 next_tweak v4, v4, v8
467 ldp x29, x30, [sp], #16
469 AES_ENDPROC(aes_xts_decrypt)
472 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
473 * int blocks, u8 dg[], int enc_before, int enc_after)
475 AES_ENTRY(aes_mac_update)
485 ld1 {v0.16b}, [x23] /* get dg */
486 enc_prepare w2, x1, x7
489 encrypt_block v0, w2, x1, x7, w8
494 ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
495 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
496 encrypt_block v0, w21, x20, x7, w8
497 eor v0.16b, v0.16b, v2.16b
498 encrypt_block v0, w21, x20, x7, w8
499 eor v0.16b, v0.16b, v3.16b
500 encrypt_block v0, w21, x20, x7, w8
501 eor v0.16b, v0.16b, v4.16b
503 csinv x5, x24, xzr, eq
505 encrypt_block v0, w21, x20, x7, w8
506 st1 {v0.16b}, [x23] /* return dg */
507 cond_yield_neon .Lmacrestart
513 ld1 {v1.16b}, [x19], #16 /* get next pt block */
514 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
517 csinv x5, x24, xzr, eq
521 encrypt_block v0, w21, x20, x7, w8
525 st1 {v0.16b}, [x23] /* return dg */
530 ld1 {v0.16b}, [x23] /* get dg */
531 enc_prepare w21, x20, x0
533 AES_ENDPROC(aes_mac_update)