2 * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
4 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 #include <linux/linkage.h>
12 #include <asm/assembler.h>
15 .fpu crypto-neon-fp-armv8
18 .macro enc_round, state, key
20 aesmc.8 \state, \state
23 .macro dec_round, state, key
25 aesimc.8 \state, \state
28 .macro enc_dround, key1, key2
33 .macro dec_dround, key1, key2
38 .macro enc_fround, key1, key2, key3
44 .macro dec_fround, key1, key2, key3
50 .macro enc_dround_3x, key1, key2
59 .macro dec_dround_3x, key1, key2
68 .macro enc_fround_3x, key1, key2, key3
80 .macro dec_fround_3x, key1, key2, key3
92 .macro do_block, dround, fround
93 cmp r3, #12 @ which key size?
94 vld1.8 {q10-q11}, [ip]!
96 vld1.8 {q12-q13}, [ip]!
98 vld1.8 {q10-q11}, [ip]!
100 vld1.8 {q12-q13}, [ip]!
102 blo 0f @ AES-128: 10 rounds
103 vld1.8 {q10-q11}, [ip]!
105 beq 1f @ AES-192: 12 rounds
106 vld1.8 {q12-q13}, [ip]
108 0: \fround q12, q13, q14
111 1: \fround q10, q11, q14
116 * Internal, non-AAPCS compliant functions that implement the core AES
117 * transforms. These should preserve all registers except q0 - q2 and ip
119 * q0 : first in/output block
120 * q1 : second in/output block (_3x version only)
121 * q2 : third in/output block (_3x version only)
122 * q8 : first round key
123 * q9 : secound round key
124 * q14 : final round key
125 * r2 : address of round key array
126 * r3 : number of rounds
130 add ip, r2, #32 @ 3rd round key
132 do_block enc_dround, enc_fround
137 add ip, r2, #32 @ 3rd round key
138 do_block dec_dround, dec_fround
143 add ip, r2, #32 @ 3rd round key
144 do_block enc_dround_3x, enc_fround_3x
145 ENDPROC(aes_encrypt_3x)
149 add ip, r2, #32 @ 3rd round key
150 do_block dec_dround_3x, dec_fround_3x
151 ENDPROC(aes_decrypt_3x)
153 .macro prepare_key, rk, rounds
154 add ip, \rk, \rounds, lsl #4
155 vld1.8 {q8-q9}, [\rk] @ load first 2 round keys
156 vld1.8 {q14}, [ip] @ load last round key
160 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
162 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
165 ENTRY(ce_aes_ecb_encrypt)
172 vld1.8 {q0-q1}, [r1]!
175 vst1.8 {q0-q1}, [r0]!
189 ENDPROC(ce_aes_ecb_encrypt)
191 ENTRY(ce_aes_ecb_decrypt)
198 vld1.8 {q0-q1}, [r1]!
201 vst1.8 {q0-q1}, [r0]!
215 ENDPROC(ce_aes_ecb_decrypt)
218 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
219 * int blocks, u8 iv[])
220 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
221 * int blocks, u8 iv[])
223 ENTRY(ce_aes_cbc_encrypt)
225 ldrd r4, r5, [sp, #16]
229 vld1.8 {q1}, [r1]! @ get next pt block
230 veor q0, q0, q1 @ ..and xor with iv
237 ENDPROC(ce_aes_cbc_encrypt)
239 ENTRY(ce_aes_cbc_decrypt)
241 ldrd r4, r5, [sp, #16]
242 vld1.8 {q6}, [r5] @ keep iv in q6
247 vld1.8 {q0-q1}, [r1]!
257 vst1.8 {q0-q1}, [r0]!
263 vmov q15, q14 @ preserve last round key
265 vld1.8 {q0}, [r1]! @ get next ct block
266 veor q14, q15, q6 @ combine prev ct with last key
273 vst1.8 {q6}, [r5] @ keep iv in q6
275 ENDPROC(ce_aes_cbc_decrypt)
278 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
279 * int blocks, u8 ctr[])
281 ENTRY(ce_aes_ctr_encrypt)
283 ldrd r4, r5, [sp, #16]
284 vld1.8 {q6}, [r5] @ load ctr
286 vmov r6, s27 @ keep swabbed ctr in r6
288 cmn r6, r4 @ 32 bit overflow?
303 vld1.8 {q3-q4}, [r1]!
310 vst1.8 {q0-q1}, [r0]!
321 bmi .Lctrtailblock @ blocks < 0 means tail block
326 adds r6, r6, #1 @ increment BE ctr
337 vst1.8 {q0}, [r0, :64] @ return just the key stream
341 .irp sreg, s26, s25, s24
342 vmov ip, \sreg @ load next word of ctr
343 rev ip, ip @ ... to handle the carry
352 ENDPROC(ce_aes_ctr_encrypt)
355 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
356 * int blocks, u8 iv[], u8 const rk2[], int first)
357 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
358 * int blocks, u8 iv[], u8 const rk2[], int first)
361 .macro next_tweak, out, in, const, tmp
362 vshr.s64 \tmp, \in, #63
363 vand \tmp, \tmp, \const
364 vadd.u64 \out, \in, \in
365 vext.8 \tmp, \tmp, \tmp, #8
366 veor \out, \out, \tmp
374 vldr d14, .Lxts_mul_x
375 vldr d15, .Lxts_mul_x + 8
377 ldrd r4, r5, [sp, #16] @ load args
379 vld1.8 {q0}, [r5] @ load iv
380 teq r6, #1 @ start of a block?
383 @ Encrypt the IV in q0 with the second AES key. This should only
384 @ be done at the start of a block.
385 ldr r6, [sp, #24] @ load AES key 2
387 add ip, r6, #32 @ 3rd round key of key 2
388 b .Laes_encrypt_tweak @ tail call
389 ENDPROC(ce_aes_xts_init)
391 ENTRY(ce_aes_xts_encrypt)
394 bl ce_aes_xts_init @ run shared prologue
398 teq r6, #0 @ start of a block?
402 next_tweak q3, q3, q7, q6
406 vld1.8 {q0-q1}, [r1]! @ get 3 pt blocks
408 next_tweak q4, q3, q7, q6
410 next_tweak q5, q4, q7, q6
417 vst1.8 {q0-q1}, [r0]! @ write 3 ct blocks
434 next_tweak q3, q3, q7, q6
439 ENDPROC(ce_aes_xts_encrypt)
442 ENTRY(ce_aes_xts_decrypt)
445 bl ce_aes_xts_init @ run shared prologue
449 teq r6, #0 @ start of a block?
453 next_tweak q3, q3, q7, q6
457 vld1.8 {q0-q1}, [r1]! @ get 3 ct blocks
459 next_tweak q4, q3, q7, q6
461 next_tweak q5, q4, q7, q6
468 vst1.8 {q0-q1}, [r0]! @ write 3 pt blocks
480 add ip, r2, #32 @ 3rd round key
486 next_tweak q3, q3, q7, q6
491 ENDPROC(ce_aes_xts_decrypt)
494 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
495 * AES sbox substitution on each byte in
507 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
508 * operation on round key *src
515 ENDPROC(ce_aes_invert)