2 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
4 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 /* included by aes-ce.S and aes-neon.S */
17 encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
19 ENDPROC(aes_encrypt_block4x)
22 decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
24 ENDPROC(aes_decrypt_block4x)
27 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
29 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
33 AES_ENTRY(aes_ecb_encrypt)
43 enc_prepare w22, x21, x5
48 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
49 bl aes_encrypt_block4x
50 st1 {v0.16b-v3.16b}, [x19], #64
51 cond_yield_neon .Lecbencrestart
57 ld1 {v0.16b}, [x20], #16 /* get next pt block */
58 encrypt_block v0, w22, x21, x5, w6
59 st1 {v0.16b}, [x19], #16
65 AES_ENDPROC(aes_ecb_encrypt)
68 AES_ENTRY(aes_ecb_decrypt)
78 dec_prepare w22, x21, x5
83 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
84 bl aes_decrypt_block4x
85 st1 {v0.16b-v3.16b}, [x19], #64
86 cond_yield_neon .Lecbdecrestart
92 ld1 {v0.16b}, [x20], #16 /* get next ct block */
93 decrypt_block v0, w22, x21, x5, w6
94 st1 {v0.16b}, [x19], #16
100 AES_ENDPROC(aes_ecb_decrypt)
104 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
105 * int blocks, u8 iv[])
106 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
107 * int blocks, u8 iv[])
110 AES_ENTRY(aes_cbc_encrypt)
121 ld1 {v4.16b}, [x24] /* get iv */
122 enc_prepare w22, x21, x6
127 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
128 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
129 encrypt_block v0, w22, x21, x6, w7
130 eor v1.16b, v1.16b, v0.16b
131 encrypt_block v1, w22, x21, x6, w7
132 eor v2.16b, v2.16b, v1.16b
133 encrypt_block v2, w22, x21, x6, w7
134 eor v3.16b, v3.16b, v2.16b
135 encrypt_block v3, w22, x21, x6, w7
136 st1 {v0.16b-v3.16b}, [x19], #64
138 st1 {v4.16b}, [x24] /* return iv */
139 cond_yield_neon .Lcbcencrestart
145 ld1 {v0.16b}, [x20], #16 /* get next pt block */
146 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
147 encrypt_block v4, w22, x21, x6, w7
148 st1 {v4.16b}, [x19], #16
152 st1 {v4.16b}, [x24] /* return iv */
155 AES_ENDPROC(aes_cbc_encrypt)
158 AES_ENTRY(aes_cbc_decrypt)
169 ld1 {v7.16b}, [x24] /* get iv */
170 dec_prepare w22, x21, x6
175 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
179 bl aes_decrypt_block4x
181 eor v0.16b, v0.16b, v7.16b
182 eor v1.16b, v1.16b, v4.16b
183 ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */
184 eor v2.16b, v2.16b, v5.16b
185 eor v3.16b, v3.16b, v6.16b
186 st1 {v0.16b-v3.16b}, [x19], #64
187 st1 {v7.16b}, [x24] /* return iv */
188 cond_yield_neon .Lcbcdecrestart
194 ld1 {v1.16b}, [x20], #16 /* get next ct block */
195 mov v0.16b, v1.16b /* ...and copy to v0 */
196 decrypt_block v0, w22, x21, x6, w7
197 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
198 mov v7.16b, v1.16b /* ct is next iv */
199 st1 {v0.16b}, [x19], #16
203 st1 {v7.16b}, [x24] /* return iv */
206 AES_ENDPROC(aes_cbc_decrypt)
210 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
211 * int blocks, u8 ctr[])
214 AES_ENTRY(aes_ctr_encrypt)
225 enc_prepare w22, x21, x6
228 umov x6, v4.d[1] /* keep swabbed ctr in reg */
233 cmn w6, #4 /* 32 bit overflow? */
235 ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
238 add v7.4s, v7.4s, v8.4s
246 ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */
247 bl aes_encrypt_block4x
248 eor v0.16b, v5.16b, v0.16b
249 ld1 {v5.16b}, [x20], #16 /* get 1 input block */
250 eor v1.16b, v6.16b, v1.16b
251 eor v2.16b, v7.16b, v2.16b
252 eor v3.16b, v5.16b, v3.16b
253 st1 {v0.16b-v3.16b}, [x19], #64
258 st1 {v4.16b}, [x24] /* return next CTR value */
259 cond_yield_neon .Lctrrestart
266 encrypt_block v0, w22, x21, x8, w7
268 adds x6, x6, #1 /* increment BE ctr */
271 bcs .Lctrcarry /* overflow? */
275 bmi .Lctrtailblock /* blocks <0 means tail block */
276 ld1 {v3.16b}, [x20], #16
277 eor v3.16b, v0.16b, v3.16b
278 st1 {v3.16b}, [x19], #16
282 st1 {v4.16b}, [x24] /* return next CTR value */
292 umov x7, v4.d[0] /* load upper word of ctr */
293 rev x7, x7 /* ... to handle the carry */
298 AES_ENDPROC(aes_ctr_encrypt)
303 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
304 * int blocks, u8 const rk2[], u8 iv[], int first)
305 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
306 * int blocks, u8 const rk2[], u8 iv[], int first)
309 .macro next_tweak, out, in, const, tmp
310 sshr \tmp\().2d, \in\().2d, #63
311 and \tmp\().16b, \tmp\().16b, \const\().16b
312 add \out\().2d, \in\().2d, \in\().2d
313 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
314 eor \out\().16b, \out\().16b, \tmp\().16b
318 CPU_LE( .quad 1, 0x87 )
319 CPU_BE( .quad 0x87, 1 )
321 AES_ENTRY(aes_xts_encrypt)
332 cbz w7, .Lxtsencnotfirst
334 enc_prepare w3, x5, x8
335 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
336 enc_switch_key w3, x2, x8
343 enc_prepare w22, x21, x8
346 next_tweak v4, v4, v7, v8
350 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
351 next_tweak v5, v4, v7, v8
352 eor v0.16b, v0.16b, v4.16b
353 next_tweak v6, v5, v7, v8
354 eor v1.16b, v1.16b, v5.16b
355 eor v2.16b, v2.16b, v6.16b
356 next_tweak v7, v6, v7, v8
357 eor v3.16b, v3.16b, v7.16b
358 bl aes_encrypt_block4x
359 eor v3.16b, v3.16b, v7.16b
360 eor v0.16b, v0.16b, v4.16b
361 eor v1.16b, v1.16b, v5.16b
362 eor v2.16b, v2.16b, v6.16b
363 st1 {v0.16b-v3.16b}, [x19], #64
367 cond_yield_neon .Lxtsencrestart
373 ld1 {v1.16b}, [x20], #16
374 eor v0.16b, v1.16b, v4.16b
375 encrypt_block v0, w22, x21, x8, w7
376 eor v0.16b, v0.16b, v4.16b
377 st1 {v0.16b}, [x19], #16
380 next_tweak v4, v4, v7, v8
386 AES_ENDPROC(aes_xts_encrypt)
389 AES_ENTRY(aes_xts_decrypt)
400 cbz w7, .Lxtsdecnotfirst
402 enc_prepare w3, x5, x8
403 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
404 dec_prepare w3, x2, x8
411 dec_prepare w22, x21, x8
414 next_tweak v4, v4, v7, v8
418 ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
419 next_tweak v5, v4, v7, v8
420 eor v0.16b, v0.16b, v4.16b
421 next_tweak v6, v5, v7, v8
422 eor v1.16b, v1.16b, v5.16b
423 eor v2.16b, v2.16b, v6.16b
424 next_tweak v7, v6, v7, v8
425 eor v3.16b, v3.16b, v7.16b
426 bl aes_decrypt_block4x
427 eor v3.16b, v3.16b, v7.16b
428 eor v0.16b, v0.16b, v4.16b
429 eor v1.16b, v1.16b, v5.16b
430 eor v2.16b, v2.16b, v6.16b
431 st1 {v0.16b-v3.16b}, [x19], #64
435 cond_yield_neon .Lxtsdecrestart
441 ld1 {v1.16b}, [x20], #16
442 eor v0.16b, v1.16b, v4.16b
443 decrypt_block v0, w22, x21, x8, w7
444 eor v0.16b, v0.16b, v4.16b
445 st1 {v0.16b}, [x19], #16
448 next_tweak v4, v4, v7, v8
454 AES_ENDPROC(aes_xts_decrypt)
457 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
458 * int blocks, u8 dg[], int enc_before, int enc_after)
460 AES_ENTRY(aes_mac_update)
470 ld1 {v0.16b}, [x23] /* get dg */
471 enc_prepare w2, x1, x7
474 encrypt_block v0, w2, x1, x7, w8
479 ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
480 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
481 encrypt_block v0, w21, x20, x7, w8
482 eor v0.16b, v0.16b, v2.16b
483 encrypt_block v0, w21, x20, x7, w8
484 eor v0.16b, v0.16b, v3.16b
485 encrypt_block v0, w21, x20, x7, w8
486 eor v0.16b, v0.16b, v4.16b
488 csinv x5, x24, xzr, eq
490 encrypt_block v0, w21, x20, x7, w8
491 st1 {v0.16b}, [x23] /* return dg */
492 cond_yield_neon .Lmacrestart
498 ld1 {v1.16b}, [x19], #16 /* get next pt block */
499 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
502 csinv x5, x24, xzr, eq
506 encrypt_block v0, w21, x20, x7, w8
510 st1 {v0.16b}, [x23] /* return dg */
515 ld1 {v0.16b}, [x23] /* get dg */
516 enc_prepare w21, x20, x0
518 AES_ENDPROC(aes_mac_update)