1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
8 /* included by aes-ce.S and aes-neon.S */
26 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
28 ENDPROC(aes_encrypt_block4x)
31 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
33 ENDPROC(aes_decrypt_block4x)
37 encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
39 ENDPROC(aes_encrypt_block5x)
42 decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
44 ENDPROC(aes_decrypt_block5x)
48 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
50 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
54 AES_ENTRY(aes_ecb_encrypt)
55 stp x29, x30, [sp, #-16]!
58 enc_prepare w3, x2, x5
61 subs w4, w4, #MAX_STRIDE
63 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
64 ST4( bl aes_encrypt_block4x )
65 ST5( ld1 {v4.16b}, [x1], #16 )
66 ST5( bl aes_encrypt_block5x )
67 st1 {v0.16b-v3.16b}, [x0], #64
68 ST5( st1 {v4.16b}, [x0], #16 )
71 adds w4, w4, #MAX_STRIDE
74 ld1 {v0.16b}, [x1], #16 /* get next pt block */
75 encrypt_block v0, w3, x2, x5, w6
76 st1 {v0.16b}, [x0], #16
80 ldp x29, x30, [sp], #16
82 AES_ENDPROC(aes_ecb_encrypt)
85 AES_ENTRY(aes_ecb_decrypt)
86 stp x29, x30, [sp, #-16]!
89 dec_prepare w3, x2, x5
92 subs w4, w4, #MAX_STRIDE
94 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
95 ST4( bl aes_decrypt_block4x )
96 ST5( ld1 {v4.16b}, [x1], #16 )
97 ST5( bl aes_decrypt_block5x )
98 st1 {v0.16b-v3.16b}, [x0], #64
99 ST5( st1 {v4.16b}, [x0], #16 )
102 adds w4, w4, #MAX_STRIDE
105 ld1 {v0.16b}, [x1], #16 /* get next ct block */
106 decrypt_block v0, w3, x2, x5, w6
107 st1 {v0.16b}, [x0], #16
111 ldp x29, x30, [sp], #16
113 AES_ENDPROC(aes_ecb_decrypt)
117 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
118 * int blocks, u8 iv[])
119 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
120 * int blocks, u8 iv[])
123 AES_ENTRY(aes_cbc_encrypt)
124 ld1 {v4.16b}, [x5] /* get iv */
125 enc_prepare w3, x2, x6
130 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
131 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
132 encrypt_block v0, w3, x2, x6, w7
133 eor v1.16b, v1.16b, v0.16b
134 encrypt_block v1, w3, x2, x6, w7
135 eor v2.16b, v2.16b, v1.16b
136 encrypt_block v2, w3, x2, x6, w7
137 eor v3.16b, v3.16b, v2.16b
138 encrypt_block v3, w3, x2, x6, w7
139 st1 {v0.16b-v3.16b}, [x0], #64
146 ld1 {v0.16b}, [x1], #16 /* get next pt block */
147 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
148 encrypt_block v4, w3, x2, x6, w7
149 st1 {v4.16b}, [x0], #16
153 st1 {v4.16b}, [x5] /* return iv */
155 AES_ENDPROC(aes_cbc_encrypt)
158 AES_ENTRY(aes_cbc_decrypt)
159 stp x29, x30, [sp, #-16]!
162 ld1 {cbciv.16b}, [x5] /* get iv */
163 dec_prepare w3, x2, x6
166 subs w4, w4, #MAX_STRIDE
168 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
170 ld1 {v4.16b}, [x1], #16 /* get 1 ct block */
174 bl aes_decrypt_block5x
176 eor v0.16b, v0.16b, cbciv.16b
177 eor v1.16b, v1.16b, v5.16b
178 ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */
179 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
180 eor v2.16b, v2.16b, v6.16b
181 eor v3.16b, v3.16b, v7.16b
182 eor v4.16b, v4.16b, v5.16b
187 bl aes_decrypt_block4x
189 eor v0.16b, v0.16b, cbciv.16b
190 eor v1.16b, v1.16b, v4.16b
191 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
192 eor v2.16b, v2.16b, v5.16b
193 eor v3.16b, v3.16b, v6.16b
195 st1 {v0.16b-v3.16b}, [x0], #64
196 ST5( st1 {v4.16b}, [x0], #16 )
199 adds w4, w4, #MAX_STRIDE
202 ld1 {v1.16b}, [x1], #16 /* get next ct block */
203 mov v0.16b, v1.16b /* ...and copy to v0 */
204 decrypt_block v0, w3, x2, x6, w7
205 eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */
206 mov cbciv.16b, v1.16b /* ct is next iv */
207 st1 {v0.16b}, [x0], #16
211 st1 {cbciv.16b}, [x5] /* return iv */
212 ldp x29, x30, [sp], #16
214 AES_ENDPROC(aes_cbc_decrypt)
218 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
219 * int rounds, int bytes, u8 const iv[])
220 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
221 * int rounds, int bytes, u8 const iv[])
224 AES_ENTRY(aes_cbc_cts_encrypt)
225 adr_l x8, .Lcts_permute_table
233 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
236 ld1 {v5.16b}, [x5] /* get iv */
237 enc_prepare w3, x2, x6
239 eor v0.16b, v0.16b, v5.16b /* xor with iv */
240 tbl v1.16b, {v1.16b}, v4.16b
241 encrypt_block v0, w3, x2, x6, w7
243 eor v1.16b, v1.16b, v0.16b
244 tbl v0.16b, {v0.16b}, v3.16b
245 encrypt_block v1, w3, x2, x6, w7
248 st1 {v0.16b}, [x4] /* overlapping stores */
251 AES_ENDPROC(aes_cbc_cts_encrypt)
253 AES_ENTRY(aes_cbc_cts_decrypt)
254 adr_l x8, .Lcts_permute_table
262 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
265 ld1 {v5.16b}, [x5] /* get iv */
266 dec_prepare w3, x2, x6
268 tbl v2.16b, {v1.16b}, v4.16b
269 decrypt_block v0, w3, x2, x6, w7
270 eor v2.16b, v2.16b, v0.16b
272 tbx v0.16b, {v1.16b}, v4.16b
273 tbl v2.16b, {v2.16b}, v3.16b
274 decrypt_block v0, w3, x2, x6, w7
275 eor v0.16b, v0.16b, v5.16b /* xor with iv */
278 st1 {v2.16b}, [x4] /* overlapping stores */
281 AES_ENDPROC(aes_cbc_cts_decrypt)
283 .section ".rodata", "a"
286 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
287 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
288 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
289 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
290 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
291 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
296 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
297 * int blocks, u8 ctr[])
300 AES_ENTRY(aes_ctr_encrypt)
301 stp x29, x30, [sp, #-16]!
304 enc_prepare w3, x2, x6
307 umov x6, vctr.d[1] /* keep swabbed ctr in reg */
309 cmn w6, w4 /* 32 bit overflow? */
312 subs w4, w4, #MAX_STRIDE
324 ST5( mov v4.16b, vctr.16b )
327 ST5( add w10, w6, #4 )
331 ST5( mov v4.s[3], w10 )
332 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
333 ST4( bl aes_encrypt_block4x )
334 ST5( bl aes_encrypt_block5x )
335 eor v0.16b, v5.16b, v0.16b
336 ST4( ld1 {v5.16b}, [x1], #16 )
337 eor v1.16b, v6.16b, v1.16b
338 ST5( ld1 {v5.16b-v6.16b}, [x1], #32 )
339 eor v2.16b, v7.16b, v2.16b
340 eor v3.16b, v5.16b, v3.16b
341 ST5( eor v4.16b, v6.16b, v4.16b )
342 st1 {v0.16b-v3.16b}, [x0], #64
343 ST5( st1 {v4.16b}, [x0], #16 )
344 add x6, x6, #MAX_STRIDE
350 adds w4, w4, #MAX_STRIDE
354 encrypt_block v0, w3, x2, x8, w7
356 adds x6, x6, #1 /* increment BE ctr */
359 bcs .Lctrcarry /* overflow? */
363 bmi .Lctrtailblock /* blocks <0 means tail block */
364 ld1 {v3.16b}, [x1], #16
365 eor v3.16b, v0.16b, v3.16b
366 st1 {v3.16b}, [x0], #16
370 st1 {vctr.16b}, [x5] /* return next CTR value */
371 ldp x29, x30, [sp], #16
379 umov x7, vctr.d[0] /* load upper word of ctr */
380 rev x7, x7 /* ... to handle the carry */
385 AES_ENDPROC(aes_ctr_encrypt)
389 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
390 * int blocks, u8 const rk2[], u8 iv[], int first)
391 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
392 * int blocks, u8 const rk2[], u8 iv[], int first)
395 .macro next_tweak, out, in, tmp
396 sshr \tmp\().2d, \in\().2d, #63
397 and \tmp\().16b, \tmp\().16b, xtsmask.16b
398 add \out\().2d, \in\().2d, \in\().2d
399 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
400 eor \out\().16b, \out\().16b, \tmp\().16b
403 .macro xts_load_mask, tmp
404 movi xtsmask.2s, #0x1
405 movi \tmp\().2s, #0x87
406 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
409 AES_ENTRY(aes_xts_encrypt)
410 stp x29, x30, [sp, #-16]!
415 cbz w7, .Lxtsencnotfirst
417 enc_prepare w3, x5, x8
418 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
419 enc_switch_key w3, x2, x8
423 enc_prepare w3, x2, x8
425 next_tweak v4, v4, v8
429 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
430 next_tweak v5, v4, v8
431 eor v0.16b, v0.16b, v4.16b
432 next_tweak v6, v5, v8
433 eor v1.16b, v1.16b, v5.16b
434 eor v2.16b, v2.16b, v6.16b
435 next_tweak v7, v6, v8
436 eor v3.16b, v3.16b, v7.16b
437 bl aes_encrypt_block4x
438 eor v3.16b, v3.16b, v7.16b
439 eor v0.16b, v0.16b, v4.16b
440 eor v1.16b, v1.16b, v5.16b
441 eor v2.16b, v2.16b, v6.16b
442 st1 {v0.16b-v3.16b}, [x0], #64
451 ld1 {v1.16b}, [x1], #16
452 eor v0.16b, v1.16b, v4.16b
453 encrypt_block v0, w3, x2, x8, w7
454 eor v0.16b, v0.16b, v4.16b
455 st1 {v0.16b}, [x0], #16
458 next_tweak v4, v4, v8
462 ldp x29, x30, [sp], #16
464 AES_ENDPROC(aes_xts_encrypt)
467 AES_ENTRY(aes_xts_decrypt)
468 stp x29, x30, [sp, #-16]!
473 cbz w7, .Lxtsdecnotfirst
475 enc_prepare w3, x5, x8
476 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
477 dec_prepare w3, x2, x8
481 dec_prepare w3, x2, x8
483 next_tweak v4, v4, v8
487 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
488 next_tweak v5, v4, v8
489 eor v0.16b, v0.16b, v4.16b
490 next_tweak v6, v5, v8
491 eor v1.16b, v1.16b, v5.16b
492 eor v2.16b, v2.16b, v6.16b
493 next_tweak v7, v6, v8
494 eor v3.16b, v3.16b, v7.16b
495 bl aes_decrypt_block4x
496 eor v3.16b, v3.16b, v7.16b
497 eor v0.16b, v0.16b, v4.16b
498 eor v1.16b, v1.16b, v5.16b
499 eor v2.16b, v2.16b, v6.16b
500 st1 {v0.16b-v3.16b}, [x0], #64
509 ld1 {v1.16b}, [x1], #16
510 eor v0.16b, v1.16b, v4.16b
511 decrypt_block v0, w3, x2, x8, w7
512 eor v0.16b, v0.16b, v4.16b
513 st1 {v0.16b}, [x0], #16
516 next_tweak v4, v4, v8
520 ldp x29, x30, [sp], #16
522 AES_ENDPROC(aes_xts_decrypt)
525 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
526 * int blocks, u8 dg[], int enc_before, int enc_after)
528 AES_ENTRY(aes_mac_update)
538 ld1 {v0.16b}, [x23] /* get dg */
539 enc_prepare w2, x1, x7
542 encrypt_block v0, w2, x1, x7, w8
547 ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
548 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
549 encrypt_block v0, w21, x20, x7, w8
550 eor v0.16b, v0.16b, v2.16b
551 encrypt_block v0, w21, x20, x7, w8
552 eor v0.16b, v0.16b, v3.16b
553 encrypt_block v0, w21, x20, x7, w8
554 eor v0.16b, v0.16b, v4.16b
556 csinv x5, x24, xzr, eq
558 encrypt_block v0, w21, x20, x7, w8
559 st1 {v0.16b}, [x23] /* return dg */
560 cond_yield_neon .Lmacrestart
566 ld1 {v1.16b}, [x19], #16 /* get next pt block */
567 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
570 csinv x5, x24, xzr, eq
574 encrypt_block v0, w21, x20, x7, w8
578 st1 {v0.16b}, [x23] /* return dg */
583 ld1 {v0.16b}, [x23] /* get dg */
584 enc_prepare w21, x20, x0
586 AES_ENDPROC(aes_mac_update)