1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * Bit sliced AES using NEON instructions
5 * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
9 * The algorithm implemented here is described in detail by the paper
10 * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
11 * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
13 * This implementation is based primarily on the OpenSSL implementation
14 * for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
17 #include <linux/linkage.h>
18 #include <linux/cfi_types.h>
19 #include <asm/assembler.h>
26 .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
42 .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
56 .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
71 .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
87 .macro mul_gf4, x0, x1, y0, y1, t0, t1
97 .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
114 .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
115 y0, y1, y2, y3, t0, t1, t2, t3
118 mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3
121 mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
128 mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
131 mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3
138 .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
139 t0, t1, t2, t3, s0, s1, s2, s3
186 mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
187 \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
190 .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
191 t0, t1, t2, t3, s0, s1, s2, s3
192 in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
193 \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
194 inv_gf256 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
195 \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
196 \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
197 \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
198 out_bs_ch \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
199 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
202 .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
203 t0, t1, t2, t3, s0, s1, s2, s3
204 inv_in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
205 \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
206 inv_gf256 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
207 \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
208 \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
209 \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
210 inv_out_bs_ch \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
211 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
215 ldp q16, q17, [bskey], #128
216 ldp q18, q19, [bskey, #-96]
217 ldp q20, q21, [bskey, #-64]
218 ldp q22, q23, [bskey, #-32]
222 ldp q16, q17, [bskey, #-128]!
223 ldp q18, q19, [bskey, #32]
224 ldp q20, q21, [bskey, #64]
225 ldp q22, q23, [bskey, #96]
228 .macro add_round_key, x0, x1, x2, x3, x4, x5, x6, x7
229 eor \x0\().16b, \x0\().16b, v16.16b
230 eor \x1\().16b, \x1\().16b, v17.16b
231 eor \x2\().16b, \x2\().16b, v18.16b
232 eor \x3\().16b, \x3\().16b, v19.16b
233 eor \x4\().16b, \x4\().16b, v20.16b
234 eor \x5\().16b, \x5\().16b, v21.16b
235 eor \x6\().16b, \x6\().16b, v22.16b
236 eor \x7\().16b, \x7\().16b, v23.16b
239 .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask
240 tbl \x0\().16b, {\x0\().16b}, \mask\().16b
241 tbl \x1\().16b, {\x1\().16b}, \mask\().16b
242 tbl \x2\().16b, {\x2\().16b}, \mask\().16b
243 tbl \x3\().16b, {\x3\().16b}, \mask\().16b
244 tbl \x4\().16b, {\x4\().16b}, \mask\().16b
245 tbl \x5\().16b, {\x5\().16b}, \mask\().16b
246 tbl \x6\().16b, {\x6\().16b}, \mask\().16b
247 tbl \x7\().16b, {\x7\().16b}, \mask\().16b
250 .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
251 t0, t1, t2, t3, t4, t5, t6, t7, inv
252 ext \t0\().16b, \x0\().16b, \x0\().16b, #12
253 ext \t1\().16b, \x1\().16b, \x1\().16b, #12
254 eor \x0\().16b, \x0\().16b, \t0\().16b
255 ext \t2\().16b, \x2\().16b, \x2\().16b, #12
256 eor \x1\().16b, \x1\().16b, \t1\().16b
257 ext \t3\().16b, \x3\().16b, \x3\().16b, #12
258 eor \x2\().16b, \x2\().16b, \t2\().16b
259 ext \t4\().16b, \x4\().16b, \x4\().16b, #12
260 eor \x3\().16b, \x3\().16b, \t3\().16b
261 ext \t5\().16b, \x5\().16b, \x5\().16b, #12
262 eor \x4\().16b, \x4\().16b, \t4\().16b
263 ext \t6\().16b, \x6\().16b, \x6\().16b, #12
264 eor \x5\().16b, \x5\().16b, \t5\().16b
265 ext \t7\().16b, \x7\().16b, \x7\().16b, #12
266 eor \x6\().16b, \x6\().16b, \t6\().16b
267 eor \t1\().16b, \t1\().16b, \x0\().16b
268 eor \x7\().16b, \x7\().16b, \t7\().16b
269 ext \x0\().16b, \x0\().16b, \x0\().16b, #8
270 eor \t2\().16b, \t2\().16b, \x1\().16b
271 eor \t0\().16b, \t0\().16b, \x7\().16b
272 eor \t1\().16b, \t1\().16b, \x7\().16b
273 ext \x1\().16b, \x1\().16b, \x1\().16b, #8
274 eor \t5\().16b, \t5\().16b, \x4\().16b
275 eor \x0\().16b, \x0\().16b, \t0\().16b
276 eor \t6\().16b, \t6\().16b, \x5\().16b
277 eor \x1\().16b, \x1\().16b, \t1\().16b
278 ext \t0\().16b, \x4\().16b, \x4\().16b, #8
279 eor \t4\().16b, \t4\().16b, \x3\().16b
280 ext \t1\().16b, \x5\().16b, \x5\().16b, #8
281 eor \t7\().16b, \t7\().16b, \x6\().16b
282 ext \x4\().16b, \x3\().16b, \x3\().16b, #8
283 eor \t3\().16b, \t3\().16b, \x2\().16b
284 ext \x5\().16b, \x7\().16b, \x7\().16b, #8
285 eor \t4\().16b, \t4\().16b, \x7\().16b
286 ext \x3\().16b, \x6\().16b, \x6\().16b, #8
287 eor \t3\().16b, \t3\().16b, \x7\().16b
288 ext \x6\().16b, \x2\().16b, \x2\().16b, #8
289 eor \x7\().16b, \t1\().16b, \t5\().16b
291 eor \x2\().16b, \t0\().16b, \t4\().16b
292 eor \x4\().16b, \x4\().16b, \t3\().16b
293 eor \x5\().16b, \x5\().16b, \t7\().16b
294 eor \x3\().16b, \x3\().16b, \t6\().16b
295 eor \x6\().16b, \x6\().16b, \t2\().16b
297 eor \t3\().16b, \t3\().16b, \x4\().16b
298 eor \x5\().16b, \x5\().16b, \t7\().16b
299 eor \x2\().16b, \x3\().16b, \t6\().16b
300 eor \x3\().16b, \t0\().16b, \t4\().16b
301 eor \x4\().16b, \x6\().16b, \t2\().16b
302 mov \x6\().16b, \t3\().16b
306 .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
307 t0, t1, t2, t3, t4, t5, t6, t7
308 ext \t0\().16b, \x0\().16b, \x0\().16b, #8
309 ext \t6\().16b, \x6\().16b, \x6\().16b, #8
310 ext \t7\().16b, \x7\().16b, \x7\().16b, #8
311 eor \t0\().16b, \t0\().16b, \x0\().16b
312 ext \t1\().16b, \x1\().16b, \x1\().16b, #8
313 eor \t6\().16b, \t6\().16b, \x6\().16b
314 ext \t2\().16b, \x2\().16b, \x2\().16b, #8
315 eor \t7\().16b, \t7\().16b, \x7\().16b
316 ext \t3\().16b, \x3\().16b, \x3\().16b, #8
317 eor \t1\().16b, \t1\().16b, \x1\().16b
318 ext \t4\().16b, \x4\().16b, \x4\().16b, #8
319 eor \t2\().16b, \t2\().16b, \x2\().16b
320 ext \t5\().16b, \x5\().16b, \x5\().16b, #8
321 eor \t3\().16b, \t3\().16b, \x3\().16b
322 eor \t4\().16b, \t4\().16b, \x4\().16b
323 eor \t5\().16b, \t5\().16b, \x5\().16b
324 eor \x0\().16b, \x0\().16b, \t6\().16b
325 eor \x1\().16b, \x1\().16b, \t6\().16b
326 eor \x2\().16b, \x2\().16b, \t0\().16b
327 eor \x4\().16b, \x4\().16b, \t2\().16b
328 eor \x3\().16b, \x3\().16b, \t1\().16b
329 eor \x1\().16b, \x1\().16b, \t7\().16b
330 eor \x2\().16b, \x2\().16b, \t7\().16b
331 eor \x4\().16b, \x4\().16b, \t6\().16b
332 eor \x5\().16b, \x5\().16b, \t3\().16b
333 eor \x3\().16b, \x3\().16b, \t6\().16b
334 eor \x6\().16b, \x6\().16b, \t4\().16b
335 eor \x4\().16b, \x4\().16b, \t7\().16b
336 eor \x5\().16b, \x5\().16b, \t7\().16b
337 eor \x7\().16b, \x7\().16b, \t5\().16b
338 mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
339 \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
342 .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
343 ushr \t0\().2d, \b0\().2d, #\n
344 ushr \t1\().2d, \b1\().2d, #\n
345 eor \t0\().16b, \t0\().16b, \a0\().16b
346 eor \t1\().16b, \t1\().16b, \a1\().16b
347 and \t0\().16b, \t0\().16b, \mask\().16b
348 and \t1\().16b, \t1\().16b, \mask\().16b
349 eor \a0\().16b, \a0\().16b, \t0\().16b
350 shl \t0\().2d, \t0\().2d, #\n
351 eor \a1\().16b, \a1\().16b, \t1\().16b
352 shl \t1\().2d, \t1\().2d, #\n
353 eor \b0\().16b, \b0\().16b, \t0\().16b
354 eor \b1\().16b, \b1\().16b, \t1\().16b
357 .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
358 movi \t0\().16b, #0x55
359 movi \t1\().16b, #0x33
360 swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
361 swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
362 movi \t0\().16b, #0x0f
363 swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
364 swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
365 swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
366 swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
371 M0: .octa 0x0004080c0105090d02060a0e03070b0f
373 M0SR: .octa 0x0004080c05090d010a0e02060f03070b
374 SR: .octa 0x0f0e0d0c0a09080b0504070600030201
375 SRM0: .octa 0x01060b0c0207080d0304090e00050a0f
377 M0ISR: .octa 0x0004080c0d0105090a0e0206070b0f03
378 ISR: .octa 0x0f0e0d0c080b0a090504070602010003
379 ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f
382 * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
384 SYM_FUNC_START(aesbs_convert_key)
385 ld1 {v7.4s}, [x1], #16 // load round 0 key
386 ld1 {v17.4s}, [x1], #16 // load round 1 key
388 movi v8.16b, #0x01 // bit masks
399 str q7, [x0], #16 // save round 0 key
402 tbl v7.16b ,{v17.16b}, v16.16b
403 ld1 {v17.4s}, [x1], #16 // load next round key
405 cmtst v0.16b, v7.16b, v8.16b
406 cmtst v1.16b, v7.16b, v9.16b
407 cmtst v2.16b, v7.16b, v10.16b
408 cmtst v3.16b, v7.16b, v11.16b
409 cmtst v4.16b, v7.16b, v12.16b
410 cmtst v5.16b, v7.16b, v13.16b
411 cmtst v6.16b, v7.16b, v14.16b
412 cmtst v7.16b, v7.16b, v15.16b
419 stp q0, q1, [x0], #128
420 stp q2, q3, [x0, #-96]
421 stp q4, q5, [x0, #-64]
422 stp q6, q7, [x0, #-32]
425 movi v7.16b, #0x63 // compose .L63
426 eor v17.16b, v17.16b, v7.16b
429 SYM_FUNC_END(aesbs_convert_key)
432 SYM_FUNC_START_LOCAL(aesbs_encrypt8)
433 ldr q9, [bskey], #16 // round 0 key
437 eor v10.16b, v0.16b, v9.16b // xor with round0 key
438 eor v11.16b, v1.16b, v9.16b
439 tbl v0.16b, {v10.16b}, v8.16b
440 eor v12.16b, v2.16b, v9.16b
441 tbl v1.16b, {v11.16b}, v8.16b
442 eor v13.16b, v3.16b, v9.16b
443 tbl v2.16b, {v12.16b}, v8.16b
444 eor v14.16b, v4.16b, v9.16b
445 tbl v3.16b, {v13.16b}, v8.16b
446 eor v15.16b, v5.16b, v9.16b
447 tbl v4.16b, {v14.16b}, v8.16b
448 eor v10.16b, v6.16b, v9.16b
449 tbl v5.16b, {v15.16b}, v8.16b
450 eor v11.16b, v7.16b, v9.16b
451 tbl v6.16b, {v10.16b}, v8.16b
452 tbl v7.16b, {v11.16b}, v8.16b
454 bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
456 sub rounds, rounds, #1
460 shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
462 sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
464 subs rounds, rounds, #1
469 mix_cols v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
472 add_round_key v0, v1, v2, v3, v4, v5, v6, v7
479 ldr q12, [bskey] // last round key
481 bitslice v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
483 eor v0.16b, v0.16b, v12.16b
484 eor v1.16b, v1.16b, v12.16b
485 eor v4.16b, v4.16b, v12.16b
486 eor v6.16b, v6.16b, v12.16b
487 eor v3.16b, v3.16b, v12.16b
488 eor v7.16b, v7.16b, v12.16b
489 eor v2.16b, v2.16b, v12.16b
490 eor v5.16b, v5.16b, v12.16b
492 SYM_FUNC_END(aesbs_encrypt8)
495 SYM_FUNC_START_LOCAL(aesbs_decrypt8)
499 ldr q9, [bskey, #-112]! // round 0 key
503 eor v10.16b, v0.16b, v9.16b // xor with round0 key
504 eor v11.16b, v1.16b, v9.16b
505 tbl v0.16b, {v10.16b}, v8.16b
506 eor v12.16b, v2.16b, v9.16b
507 tbl v1.16b, {v11.16b}, v8.16b
508 eor v13.16b, v3.16b, v9.16b
509 tbl v2.16b, {v12.16b}, v8.16b
510 eor v14.16b, v4.16b, v9.16b
511 tbl v3.16b, {v13.16b}, v8.16b
512 eor v15.16b, v5.16b, v9.16b
513 tbl v4.16b, {v14.16b}, v8.16b
514 eor v10.16b, v6.16b, v9.16b
515 tbl v5.16b, {v15.16b}, v8.16b
516 eor v11.16b, v7.16b, v9.16b
517 tbl v6.16b, {v10.16b}, v8.16b
518 tbl v7.16b, {v11.16b}, v8.16b
520 bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
522 sub rounds, rounds, #1
526 shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
528 inv_sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
530 subs rounds, rounds, #1
535 add_round_key v0, v1, v6, v4, v2, v7, v3, v5
537 inv_mix_cols v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \
544 ldr q12, [bskey, #-16] // last round key
546 bitslice v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11
548 eor v0.16b, v0.16b, v12.16b
549 eor v1.16b, v1.16b, v12.16b
550 eor v6.16b, v6.16b, v12.16b
551 eor v4.16b, v4.16b, v12.16b
552 eor v2.16b, v2.16b, v12.16b
553 eor v7.16b, v7.16b, v12.16b
554 eor v3.16b, v3.16b, v12.16b
555 eor v5.16b, v5.16b, v12.16b
557 SYM_FUNC_END(aesbs_decrypt8)
560 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
562 * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
565 .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
577 csel x23, x23, xzr, pl
580 ld1 {v0.16b}, [x20], #16
582 ld1 {v1.16b}, [x20], #16
584 ld1 {v2.16b}, [x20], #16
586 ld1 {v3.16b}, [x20], #16
588 ld1 {v4.16b}, [x20], #16
590 ld1 {v5.16b}, [x20], #16
592 ld1 {v6.16b}, [x20], #16
594 ld1 {v7.16b}, [x20], #16
600 st1 {\o0\().16b}, [x19], #16
602 st1 {\o1\().16b}, [x19], #16
604 st1 {\o2\().16b}, [x19], #16
606 st1 {\o3\().16b}, [x19], #16
608 st1 {\o4\().16b}, [x19], #16
610 st1 {\o5\().16b}, [x19], #16
612 st1 {\o6\().16b}, [x19], #16
614 st1 {\o7\().16b}, [x19], #16
624 SYM_TYPED_FUNC_START(aesbs_ecb_encrypt)
625 __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
626 SYM_FUNC_END(aesbs_ecb_encrypt)
629 SYM_TYPED_FUNC_START(aesbs_ecb_decrypt)
630 __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
631 SYM_FUNC_END(aesbs_ecb_decrypt)
634 * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
635 * int blocks, u8 iv[])
638 SYM_FUNC_START(aesbs_cbc_decrypt)
651 csel x23, x23, xzr, pl
654 ld1 {v0.16b}, [x20], #16
657 ld1 {v1.16b}, [x20], #16
660 ld1 {v2.16b}, [x20], #16
663 ld1 {v3.16b}, [x20], #16
666 ld1 {v4.16b}, [x20], #16
669 ld1 {v5.16b}, [x20], #16
672 ld1 {v6.16b}, [x20], #16
681 ld1 {v24.16b}, [x24] // load IV
683 eor v1.16b, v1.16b, v25.16b
684 eor v6.16b, v6.16b, v26.16b
685 eor v4.16b, v4.16b, v27.16b
686 eor v2.16b, v2.16b, v28.16b
687 eor v7.16b, v7.16b, v29.16b
688 eor v0.16b, v0.16b, v24.16b
689 eor v3.16b, v3.16b, v30.16b
690 eor v5.16b, v5.16b, v31.16b
692 st1 {v0.16b}, [x19], #16
695 st1 {v1.16b}, [x19], #16
698 st1 {v6.16b}, [x19], #16
701 st1 {v4.16b}, [x19], #16
704 st1 {v2.16b}, [x19], #16
707 st1 {v7.16b}, [x19], #16
710 st1 {v3.16b}, [x19], #16
713 ld1 {v24.16b}, [x20], #16
714 st1 {v5.16b}, [x19], #16
715 1: st1 {v24.16b}, [x24] // store IV
722 SYM_FUNC_END(aesbs_cbc_decrypt)
724 .macro next_tweak, out, in, const, tmp
725 sshr \tmp\().2d, \in\().2d, #63
726 and \tmp\().16b, \tmp\().16b, \const\().16b
727 add \out\().2d, \in\().2d, \in\().2d
728 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
729 eor \out\().16b, \out\().16b, \tmp\().16b
733 * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
734 * int blocks, u8 iv[])
735 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
736 * int blocks, u8 iv[])
738 SYM_FUNC_START_LOCAL(__xts_crypt8)
741 uzp1 v18.4s, v18.4s, v19.4s
743 ld1 {v0.16b-v3.16b}, [x1], #64
744 ld1 {v4.16b-v7.16b}, [x1], #64
746 next_tweak v26, v25, v18, v19
747 next_tweak v27, v26, v18, v19
748 next_tweak v28, v27, v18, v19
749 next_tweak v29, v28, v18, v19
750 next_tweak v30, v29, v18, v19
751 next_tweak v31, v30, v18, v19
752 next_tweak v16, v31, v18, v19
753 next_tweak v17, v16, v18, v19
755 eor v0.16b, v0.16b, v25.16b
756 eor v1.16b, v1.16b, v26.16b
757 eor v2.16b, v2.16b, v27.16b
758 eor v3.16b, v3.16b, v28.16b
759 eor v4.16b, v4.16b, v29.16b
760 eor v5.16b, v5.16b, v30.16b
761 eor v6.16b, v6.16b, v31.16b
762 eor v7.16b, v7.16b, v16.16b
769 SYM_FUNC_END(__xts_crypt8)
771 .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
773 add x6, sp, #.Lframe_local_offset
780 eor v16.16b, \o0\().16b, v25.16b
781 eor v17.16b, \o1\().16b, v26.16b
782 eor v18.16b, \o2\().16b, v27.16b
783 eor v19.16b, \o3\().16b, v28.16b
787 eor v20.16b, \o4\().16b, v29.16b
788 eor v21.16b, \o5\().16b, v30.16b
789 eor v22.16b, \o6\().16b, v31.16b
790 eor v23.16b, \o7\().16b, v24.16b
792 st1 {v16.16b-v19.16b}, [x0], #64
793 st1 {v20.16b-v23.16b}, [x0], #64
803 SYM_TYPED_FUNC_START(aesbs_xts_encrypt)
804 __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
805 SYM_FUNC_END(aesbs_xts_encrypt)
807 SYM_TYPED_FUNC_START(aesbs_xts_decrypt)
808 __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
809 SYM_FUNC_END(aesbs_xts_decrypt)
816 rev64 \v\().16b, \v\().16b
820 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
821 * int rounds, int blocks, u8 iv[])
823 SYM_FUNC_START(aesbs_ctr_encrypt)
844 ld1 { v8.16b-v11.16b}, [x1], #64
845 ld1 {v12.16b-v15.16b}, [x1], #64
847 eor v8.16b, v0.16b, v8.16b
848 eor v9.16b, v1.16b, v9.16b
849 eor v10.16b, v4.16b, v10.16b
850 eor v11.16b, v6.16b, v11.16b
851 eor v12.16b, v3.16b, v12.16b
852 eor v13.16b, v7.16b, v13.16b
853 eor v14.16b, v2.16b, v14.16b
854 eor v15.16b, v5.16b, v15.16b
856 st1 { v8.16b-v11.16b}, [x0], #64
857 st1 {v12.16b-v15.16b}, [x0], #64
866 SYM_FUNC_END(aesbs_ctr_encrypt)