2 * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
4 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 #include <linux/linkage.h>
12 #include <asm/assembler.h>
15 .fpu crypto-neon-fp-armv8
18 .macro enc_round, state, key
20 aesmc.8 \state, \state
23 .macro dec_round, state, key
25 aesimc.8 \state, \state
28 .macro enc_dround, key1, key2
33 .macro dec_dround, key1, key2
38 .macro enc_fround, key1, key2, key3
44 .macro dec_fround, key1, key2, key3
50 .macro enc_dround_3x, key1, key2
59 .macro dec_dround_3x, key1, key2
68 .macro enc_fround_3x, key1, key2, key3
80 .macro dec_fround_3x, key1, key2, key3
92 .macro do_block, dround, fround
93 cmp r3, #12 @ which key size?
94 vld1.8 {q10-q11}, [ip]!
96 vld1.8 {q12-q13}, [ip]!
98 vld1.8 {q10-q11}, [ip]!
100 vld1.8 {q12-q13}, [ip]!
102 blo 0f @ AES-128: 10 rounds
103 vld1.8 {q10-q11}, [ip]!
105 beq 1f @ AES-192: 12 rounds
106 vld1.8 {q12-q13}, [ip]
108 0: \fround q12, q13, q14
111 1: \fround q10, q11, q14
116 * Internal, non-AAPCS compliant functions that implement the core AES
117 * transforms. These should preserve all registers except q0 - q2 and ip
119 * q0 : first in/output block
120 * q1 : second in/output block (_3x version only)
121 * q2 : third in/output block (_3x version only)
122 * q8 : first round key
123 * q9 : secound round key
124 * q14 : final round key
125 * r2 : address of round key array
126 * r3 : number of rounds
130 add ip, r2, #32 @ 3rd round key
132 do_block enc_dround, enc_fround
137 add ip, r2, #32 @ 3rd round key
138 do_block dec_dround, dec_fround
143 add ip, r2, #32 @ 3rd round key
144 do_block enc_dround_3x, enc_fround_3x
145 ENDPROC(aes_encrypt_3x)
149 add ip, r2, #32 @ 3rd round key
150 do_block dec_dround_3x, dec_fround_3x
151 ENDPROC(aes_decrypt_3x)
153 .macro prepare_key, rk, rounds
154 add ip, \rk, \rounds, lsl #4
155 vld1.8 {q8-q9}, [\rk] @ load first 2 round keys
156 vld1.8 {q14}, [ip] @ load last round key
160 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
162 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
165 ENTRY(ce_aes_ecb_encrypt)
172 vld1.8 {q0-q1}, [r1, :64]!
173 vld1.8 {q2}, [r1, :64]!
175 vst1.8 {q0-q1}, [r0, :64]!
176 vst1.8 {q2}, [r0, :64]!
182 vld1.8 {q0}, [r1, :64]!
184 vst1.8 {q0}, [r0, :64]!
189 ENDPROC(ce_aes_ecb_encrypt)
191 ENTRY(ce_aes_ecb_decrypt)
198 vld1.8 {q0-q1}, [r1, :64]!
199 vld1.8 {q2}, [r1, :64]!
201 vst1.8 {q0-q1}, [r0, :64]!
202 vst1.8 {q2}, [r0, :64]!
208 vld1.8 {q0}, [r1, :64]!
210 vst1.8 {q0}, [r0, :64]!
215 ENDPROC(ce_aes_ecb_decrypt)
218 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
219 * int blocks, u8 iv[])
220 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
221 * int blocks, u8 iv[])
223 ENTRY(ce_aes_cbc_encrypt)
225 ldrd r4, r5, [sp, #16]
229 vld1.8 {q1}, [r1, :64]! @ get next pt block
230 veor q0, q0, q1 @ ..and xor with iv
232 vst1.8 {q0}, [r0, :64]!
237 ENDPROC(ce_aes_cbc_encrypt)
239 ENTRY(ce_aes_cbc_decrypt)
241 ldrd r4, r5, [sp, #16]
242 vld1.8 {q6}, [r5] @ keep iv in q6
247 vld1.8 {q0-q1}, [r1, :64]!
248 vld1.8 {q2}, [r1, :64]!
257 vst1.8 {q0-q1}, [r0, :64]!
258 vst1.8 {q2}, [r0, :64]!
263 vmov q15, q14 @ preserve last round key
265 vld1.8 {q0}, [r1, :64]! @ get next ct block
266 veor q14, q15, q6 @ combine prev ct with last key
269 vst1.8 {q0}, [r0, :64]!
273 vst1.8 {q6}, [r5] @ keep iv in q6
275 ENDPROC(ce_aes_cbc_decrypt)
278 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
279 * int blocks, u8 ctr[])
281 ENTRY(ce_aes_ctr_encrypt)
283 ldrd r4, r5, [sp, #16]
284 vld1.8 {q6}, [r5] @ load ctr
286 vmov r6, s27 @ keep swabbed ctr in r6
288 cmn r6, r4 @ 32 bit overflow?
303 vld1.8 {q3-q4}, [r1, :64]!
304 vld1.8 {q5}, [r1, :64]!
310 vst1.8 {q0-q1}, [r0, :64]!
311 vst1.8 {q2}, [r0, :64]!
321 bmi .Lctrhalfblock @ blocks < 0 means 1/2 block
322 vld1.8 {q3}, [r1, :64]!
324 vst1.8 {q3}, [r0, :64]!
326 adds r6, r6, #1 @ increment BE ctr
337 vld1.8 {d1}, [r1, :64]
339 vst1.8 {d0}, [r0, :64]
343 .irp sreg, s26, s25, s24
344 vmov ip, \sreg @ load next word of ctr
345 rev ip, ip @ ... to handle the carry
354 ENDPROC(ce_aes_ctr_encrypt)
357 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
358 * int blocks, u8 iv[], u8 const rk2[], int first)
359 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
360 * int blocks, u8 iv[], u8 const rk2[], int first)
363 .macro next_tweak, out, in, const, tmp
364 vshr.s64 \tmp, \in, #63
365 vand \tmp, \tmp, \const
366 vadd.u64 \out, \in, \in
367 vext.8 \tmp, \tmp, \tmp, #8
368 veor \out, \out, \tmp
376 vldr d14, .Lxts_mul_x
377 vldr d15, .Lxts_mul_x + 8
379 ldrd r4, r5, [sp, #16] @ load args
381 vld1.8 {q0}, [r5] @ load iv
382 teq r6, #1 @ start of a block?
385 @ Encrypt the IV in q0 with the second AES key. This should only
386 @ be done at the start of a block.
387 ldr r6, [sp, #24] @ load AES key 2
389 add ip, r6, #32 @ 3rd round key of key 2
390 b .Laes_encrypt_tweak @ tail call
391 ENDPROC(ce_aes_xts_init)
393 ENTRY(ce_aes_xts_encrypt)
396 bl ce_aes_xts_init @ run shared prologue
400 teq r6, #0 @ start of a block?
404 next_tweak q3, q3, q7, q6
408 vld1.8 {q0-q1}, [r1, :64]! @ get 3 pt blocks
409 vld1.8 {q2}, [r1, :64]!
410 next_tweak q4, q3, q7, q6
412 next_tweak q5, q4, q7, q6
419 vst1.8 {q0-q1}, [r0, :64]! @ write 3 ct blocks
420 vst1.8 {q2}, [r0, :64]!
429 vld1.8 {q0}, [r1, :64]!
433 vst1.8 {q0}, [r0, :64]!
436 next_tweak q3, q3, q7, q6
441 ENDPROC(ce_aes_xts_encrypt)
444 ENTRY(ce_aes_xts_decrypt)
447 bl ce_aes_xts_init @ run shared prologue
451 teq r6, #0 @ start of a block?
455 next_tweak q3, q3, q7, q6
459 vld1.8 {q0-q1}, [r1, :64]! @ get 3 ct blocks
460 vld1.8 {q2}, [r1, :64]!
461 next_tweak q4, q3, q7, q6
463 next_tweak q5, q4, q7, q6
470 vst1.8 {q0-q1}, [r0, :64]! @ write 3 pt blocks
471 vst1.8 {q2}, [r0, :64]!
480 vld1.8 {q0}, [r1, :64]!
482 add ip, r2, #32 @ 3rd round key
485 vst1.8 {q0}, [r0, :64]!
488 next_tweak q3, q3, q7, q6
493 ENDPROC(ce_aes_xts_decrypt)
496 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
497 * AES sbox substitution on each byte in
509 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
510 * operation on round key *src
517 ENDPROC(ce_aes_invert)