1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * Bit sliced AES using NEON instructions
5 * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
9 * The algorithm implemented here is described in detail by the paper
10 * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
11 * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
13 * This implementation is based primarily on the OpenSSL implementation
14 * for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
17 #include <linux/linkage.h>
18 #include <asm/assembler.h>
25 .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
41 .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
55 .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
70 .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
86 .macro mul_gf4, x0, x1, y0, y1, t0, t1
96 .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
113 .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
114 y0, y1, y2, y3, t0, t1, t2, t3
117 mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3
120 mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
127 mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
130 mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3
137 .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
138 t0, t1, t2, t3, s0, s1, s2, s3
185 mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
186 \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
189 .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
190 t0, t1, t2, t3, s0, s1, s2, s3
191 in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
192 \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
193 inv_gf256 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
194 \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
195 \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
196 \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
197 out_bs_ch \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
198 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
201 .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
202 t0, t1, t2, t3, s0, s1, s2, s3
203 inv_in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
204 \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
205 inv_gf256 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
206 \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
207 \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
208 \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
209 inv_out_bs_ch \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
210 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
214 ldp q16, q17, [bskey], #128
215 ldp q18, q19, [bskey, #-96]
216 ldp q20, q21, [bskey, #-64]
217 ldp q22, q23, [bskey, #-32]
221 ldp q16, q17, [bskey, #-128]!
222 ldp q18, q19, [bskey, #32]
223 ldp q20, q21, [bskey, #64]
224 ldp q22, q23, [bskey, #96]
227 .macro add_round_key, x0, x1, x2, x3, x4, x5, x6, x7
228 eor \x0\().16b, \x0\().16b, v16.16b
229 eor \x1\().16b, \x1\().16b, v17.16b
230 eor \x2\().16b, \x2\().16b, v18.16b
231 eor \x3\().16b, \x3\().16b, v19.16b
232 eor \x4\().16b, \x4\().16b, v20.16b
233 eor \x5\().16b, \x5\().16b, v21.16b
234 eor \x6\().16b, \x6\().16b, v22.16b
235 eor \x7\().16b, \x7\().16b, v23.16b
238 .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask
239 tbl \x0\().16b, {\x0\().16b}, \mask\().16b
240 tbl \x1\().16b, {\x1\().16b}, \mask\().16b
241 tbl \x2\().16b, {\x2\().16b}, \mask\().16b
242 tbl \x3\().16b, {\x3\().16b}, \mask\().16b
243 tbl \x4\().16b, {\x4\().16b}, \mask\().16b
244 tbl \x5\().16b, {\x5\().16b}, \mask\().16b
245 tbl \x6\().16b, {\x6\().16b}, \mask\().16b
246 tbl \x7\().16b, {\x7\().16b}, \mask\().16b
249 .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
250 t0, t1, t2, t3, t4, t5, t6, t7, inv
251 ext \t0\().16b, \x0\().16b, \x0\().16b, #12
252 ext \t1\().16b, \x1\().16b, \x1\().16b, #12
253 eor \x0\().16b, \x0\().16b, \t0\().16b
254 ext \t2\().16b, \x2\().16b, \x2\().16b, #12
255 eor \x1\().16b, \x1\().16b, \t1\().16b
256 ext \t3\().16b, \x3\().16b, \x3\().16b, #12
257 eor \x2\().16b, \x2\().16b, \t2\().16b
258 ext \t4\().16b, \x4\().16b, \x4\().16b, #12
259 eor \x3\().16b, \x3\().16b, \t3\().16b
260 ext \t5\().16b, \x5\().16b, \x5\().16b, #12
261 eor \x4\().16b, \x4\().16b, \t4\().16b
262 ext \t6\().16b, \x6\().16b, \x6\().16b, #12
263 eor \x5\().16b, \x5\().16b, \t5\().16b
264 ext \t7\().16b, \x7\().16b, \x7\().16b, #12
265 eor \x6\().16b, \x6\().16b, \t6\().16b
266 eor \t1\().16b, \t1\().16b, \x0\().16b
267 eor \x7\().16b, \x7\().16b, \t7\().16b
268 ext \x0\().16b, \x0\().16b, \x0\().16b, #8
269 eor \t2\().16b, \t2\().16b, \x1\().16b
270 eor \t0\().16b, \t0\().16b, \x7\().16b
271 eor \t1\().16b, \t1\().16b, \x7\().16b
272 ext \x1\().16b, \x1\().16b, \x1\().16b, #8
273 eor \t5\().16b, \t5\().16b, \x4\().16b
274 eor \x0\().16b, \x0\().16b, \t0\().16b
275 eor \t6\().16b, \t6\().16b, \x5\().16b
276 eor \x1\().16b, \x1\().16b, \t1\().16b
277 ext \t0\().16b, \x4\().16b, \x4\().16b, #8
278 eor \t4\().16b, \t4\().16b, \x3\().16b
279 ext \t1\().16b, \x5\().16b, \x5\().16b, #8
280 eor \t7\().16b, \t7\().16b, \x6\().16b
281 ext \x4\().16b, \x3\().16b, \x3\().16b, #8
282 eor \t3\().16b, \t3\().16b, \x2\().16b
283 ext \x5\().16b, \x7\().16b, \x7\().16b, #8
284 eor \t4\().16b, \t4\().16b, \x7\().16b
285 ext \x3\().16b, \x6\().16b, \x6\().16b, #8
286 eor \t3\().16b, \t3\().16b, \x7\().16b
287 ext \x6\().16b, \x2\().16b, \x2\().16b, #8
288 eor \x7\().16b, \t1\().16b, \t5\().16b
290 eor \x2\().16b, \t0\().16b, \t4\().16b
291 eor \x4\().16b, \x4\().16b, \t3\().16b
292 eor \x5\().16b, \x5\().16b, \t7\().16b
293 eor \x3\().16b, \x3\().16b, \t6\().16b
294 eor \x6\().16b, \x6\().16b, \t2\().16b
296 eor \t3\().16b, \t3\().16b, \x4\().16b
297 eor \x5\().16b, \x5\().16b, \t7\().16b
298 eor \x2\().16b, \x3\().16b, \t6\().16b
299 eor \x3\().16b, \t0\().16b, \t4\().16b
300 eor \x4\().16b, \x6\().16b, \t2\().16b
301 mov \x6\().16b, \t3\().16b
305 .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
306 t0, t1, t2, t3, t4, t5, t6, t7
307 ext \t0\().16b, \x0\().16b, \x0\().16b, #8
308 ext \t6\().16b, \x6\().16b, \x6\().16b, #8
309 ext \t7\().16b, \x7\().16b, \x7\().16b, #8
310 eor \t0\().16b, \t0\().16b, \x0\().16b
311 ext \t1\().16b, \x1\().16b, \x1\().16b, #8
312 eor \t6\().16b, \t6\().16b, \x6\().16b
313 ext \t2\().16b, \x2\().16b, \x2\().16b, #8
314 eor \t7\().16b, \t7\().16b, \x7\().16b
315 ext \t3\().16b, \x3\().16b, \x3\().16b, #8
316 eor \t1\().16b, \t1\().16b, \x1\().16b
317 ext \t4\().16b, \x4\().16b, \x4\().16b, #8
318 eor \t2\().16b, \t2\().16b, \x2\().16b
319 ext \t5\().16b, \x5\().16b, \x5\().16b, #8
320 eor \t3\().16b, \t3\().16b, \x3\().16b
321 eor \t4\().16b, \t4\().16b, \x4\().16b
322 eor \t5\().16b, \t5\().16b, \x5\().16b
323 eor \x0\().16b, \x0\().16b, \t6\().16b
324 eor \x1\().16b, \x1\().16b, \t6\().16b
325 eor \x2\().16b, \x2\().16b, \t0\().16b
326 eor \x4\().16b, \x4\().16b, \t2\().16b
327 eor \x3\().16b, \x3\().16b, \t1\().16b
328 eor \x1\().16b, \x1\().16b, \t7\().16b
329 eor \x2\().16b, \x2\().16b, \t7\().16b
330 eor \x4\().16b, \x4\().16b, \t6\().16b
331 eor \x5\().16b, \x5\().16b, \t3\().16b
332 eor \x3\().16b, \x3\().16b, \t6\().16b
333 eor \x6\().16b, \x6\().16b, \t4\().16b
334 eor \x4\().16b, \x4\().16b, \t7\().16b
335 eor \x5\().16b, \x5\().16b, \t7\().16b
336 eor \x7\().16b, \x7\().16b, \t5\().16b
337 mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
338 \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
341 .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
342 ushr \t0\().2d, \b0\().2d, #\n
343 ushr \t1\().2d, \b1\().2d, #\n
344 eor \t0\().16b, \t0\().16b, \a0\().16b
345 eor \t1\().16b, \t1\().16b, \a1\().16b
346 and \t0\().16b, \t0\().16b, \mask\().16b
347 and \t1\().16b, \t1\().16b, \mask\().16b
348 eor \a0\().16b, \a0\().16b, \t0\().16b
349 shl \t0\().2d, \t0\().2d, #\n
350 eor \a1\().16b, \a1\().16b, \t1\().16b
351 shl \t1\().2d, \t1\().2d, #\n
352 eor \b0\().16b, \b0\().16b, \t0\().16b
353 eor \b1\().16b, \b1\().16b, \t1\().16b
356 .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
357 movi \t0\().16b, #0x55
358 movi \t1\().16b, #0x33
359 swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
360 swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
361 movi \t0\().16b, #0x0f
362 swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
363 swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
364 swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
365 swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
370 M0: .octa 0x0004080c0105090d02060a0e03070b0f
372 M0SR: .octa 0x0004080c05090d010a0e02060f03070b
373 SR: .octa 0x0f0e0d0c0a09080b0504070600030201
374 SRM0: .octa 0x01060b0c0207080d0304090e00050a0f
376 M0ISR: .octa 0x0004080c0d0105090a0e0206070b0f03
377 ISR: .octa 0x0f0e0d0c080b0a090504070602010003
378 ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f
381 * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
383 ENTRY(aesbs_convert_key)
384 ld1 {v7.4s}, [x1], #16 // load round 0 key
385 ld1 {v17.4s}, [x1], #16 // load round 1 key
387 movi v8.16b, #0x01 // bit masks
398 str q7, [x0], #16 // save round 0 key
401 tbl v7.16b ,{v17.16b}, v16.16b
402 ld1 {v17.4s}, [x1], #16 // load next round key
404 cmtst v0.16b, v7.16b, v8.16b
405 cmtst v1.16b, v7.16b, v9.16b
406 cmtst v2.16b, v7.16b, v10.16b
407 cmtst v3.16b, v7.16b, v11.16b
408 cmtst v4.16b, v7.16b, v12.16b
409 cmtst v5.16b, v7.16b, v13.16b
410 cmtst v6.16b, v7.16b, v14.16b
411 cmtst v7.16b, v7.16b, v15.16b
418 stp q0, q1, [x0], #128
419 stp q2, q3, [x0, #-96]
420 stp q4, q5, [x0, #-64]
421 stp q6, q7, [x0, #-32]
424 movi v7.16b, #0x63 // compose .L63
425 eor v17.16b, v17.16b, v7.16b
428 ENDPROC(aesbs_convert_key)
432 ldr q9, [bskey], #16 // round 0 key
436 eor v10.16b, v0.16b, v9.16b // xor with round0 key
437 eor v11.16b, v1.16b, v9.16b
438 tbl v0.16b, {v10.16b}, v8.16b
439 eor v12.16b, v2.16b, v9.16b
440 tbl v1.16b, {v11.16b}, v8.16b
441 eor v13.16b, v3.16b, v9.16b
442 tbl v2.16b, {v12.16b}, v8.16b
443 eor v14.16b, v4.16b, v9.16b
444 tbl v3.16b, {v13.16b}, v8.16b
445 eor v15.16b, v5.16b, v9.16b
446 tbl v4.16b, {v14.16b}, v8.16b
447 eor v10.16b, v6.16b, v9.16b
448 tbl v5.16b, {v15.16b}, v8.16b
449 eor v11.16b, v7.16b, v9.16b
450 tbl v6.16b, {v10.16b}, v8.16b
451 tbl v7.16b, {v11.16b}, v8.16b
453 bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
455 sub rounds, rounds, #1
459 shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
461 sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
463 subs rounds, rounds, #1
468 mix_cols v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
471 add_round_key v0, v1, v2, v3, v4, v5, v6, v7
478 ldr q12, [bskey] // last round key
480 bitslice v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
482 eor v0.16b, v0.16b, v12.16b
483 eor v1.16b, v1.16b, v12.16b
484 eor v4.16b, v4.16b, v12.16b
485 eor v6.16b, v6.16b, v12.16b
486 eor v3.16b, v3.16b, v12.16b
487 eor v7.16b, v7.16b, v12.16b
488 eor v2.16b, v2.16b, v12.16b
489 eor v5.16b, v5.16b, v12.16b
491 ENDPROC(aesbs_encrypt8)
498 ldr q9, [bskey, #-112]! // round 0 key
502 eor v10.16b, v0.16b, v9.16b // xor with round0 key
503 eor v11.16b, v1.16b, v9.16b
504 tbl v0.16b, {v10.16b}, v8.16b
505 eor v12.16b, v2.16b, v9.16b
506 tbl v1.16b, {v11.16b}, v8.16b
507 eor v13.16b, v3.16b, v9.16b
508 tbl v2.16b, {v12.16b}, v8.16b
509 eor v14.16b, v4.16b, v9.16b
510 tbl v3.16b, {v13.16b}, v8.16b
511 eor v15.16b, v5.16b, v9.16b
512 tbl v4.16b, {v14.16b}, v8.16b
513 eor v10.16b, v6.16b, v9.16b
514 tbl v5.16b, {v15.16b}, v8.16b
515 eor v11.16b, v7.16b, v9.16b
516 tbl v6.16b, {v10.16b}, v8.16b
517 tbl v7.16b, {v11.16b}, v8.16b
519 bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
521 sub rounds, rounds, #1
525 shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
527 inv_sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
529 subs rounds, rounds, #1
534 add_round_key v0, v1, v6, v4, v2, v7, v3, v5
536 inv_mix_cols v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \
543 ldr q12, [bskey, #-16] // last round key
545 bitslice v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11
547 eor v0.16b, v0.16b, v12.16b
548 eor v1.16b, v1.16b, v12.16b
549 eor v6.16b, v6.16b, v12.16b
550 eor v4.16b, v4.16b, v12.16b
551 eor v2.16b, v2.16b, v12.16b
552 eor v7.16b, v7.16b, v12.16b
553 eor v3.16b, v3.16b, v12.16b
554 eor v5.16b, v5.16b, v12.16b
556 ENDPROC(aesbs_decrypt8)
559 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
561 * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
564 .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
576 csel x23, x23, xzr, pl
579 ld1 {v0.16b}, [x20], #16
581 ld1 {v1.16b}, [x20], #16
583 ld1 {v2.16b}, [x20], #16
585 ld1 {v3.16b}, [x20], #16
587 ld1 {v4.16b}, [x20], #16
589 ld1 {v5.16b}, [x20], #16
591 ld1 {v6.16b}, [x20], #16
593 ld1 {v7.16b}, [x20], #16
599 st1 {\o0\().16b}, [x19], #16
601 st1 {\o1\().16b}, [x19], #16
603 st1 {\o2\().16b}, [x19], #16
605 st1 {\o3\().16b}, [x19], #16
607 st1 {\o4\().16b}, [x19], #16
609 st1 {\o5\().16b}, [x19], #16
611 st1 {\o6\().16b}, [x19], #16
613 st1 {\o7\().16b}, [x19], #16
624 ENTRY(aesbs_ecb_encrypt)
625 __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
626 ENDPROC(aesbs_ecb_encrypt)
629 ENTRY(aesbs_ecb_decrypt)
630 __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
631 ENDPROC(aesbs_ecb_decrypt)
634 * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
635 * int blocks, u8 iv[])
638 ENTRY(aesbs_cbc_decrypt)
651 csel x23, x23, xzr, pl
654 ld1 {v0.16b}, [x20], #16
657 ld1 {v1.16b}, [x20], #16
660 ld1 {v2.16b}, [x20], #16
663 ld1 {v3.16b}, [x20], #16
666 ld1 {v4.16b}, [x20], #16
669 ld1 {v5.16b}, [x20], #16
672 ld1 {v6.16b}, [x20], #16
681 ld1 {v24.16b}, [x24] // load IV
683 eor v1.16b, v1.16b, v25.16b
684 eor v6.16b, v6.16b, v26.16b
685 eor v4.16b, v4.16b, v27.16b
686 eor v2.16b, v2.16b, v28.16b
687 eor v7.16b, v7.16b, v29.16b
688 eor v0.16b, v0.16b, v24.16b
689 eor v3.16b, v3.16b, v30.16b
690 eor v5.16b, v5.16b, v31.16b
692 st1 {v0.16b}, [x19], #16
695 st1 {v1.16b}, [x19], #16
698 st1 {v6.16b}, [x19], #16
701 st1 {v4.16b}, [x19], #16
704 st1 {v2.16b}, [x19], #16
707 st1 {v7.16b}, [x19], #16
710 st1 {v3.16b}, [x19], #16
713 ld1 {v24.16b}, [x20], #16
714 st1 {v5.16b}, [x19], #16
715 1: st1 {v24.16b}, [x24] // store IV
723 ENDPROC(aesbs_cbc_decrypt)
725 .macro next_tweak, out, in, const, tmp
726 sshr \tmp\().2d, \in\().2d, #63
727 and \tmp\().16b, \tmp\().16b, \const\().16b
728 add \out\().2d, \in\().2d, \in\().2d
729 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
730 eor \out\().16b, \out\().16b, \tmp\().16b
735 CPU_LE( .quad 1, 0x87 )
736 CPU_BE( .quad 0x87, 1 )
739 * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
740 * int blocks, u8 iv[])
741 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
742 * int blocks, u8 iv[])
748 csel x23, x23, xzr, pl
751 ld1 {v0.16b}, [x20], #16
752 next_tweak v26, v25, v30, v31
753 eor v0.16b, v0.16b, v25.16b
756 ld1 {v1.16b}, [x20], #16
757 next_tweak v27, v26, v30, v31
758 eor v1.16b, v1.16b, v26.16b
761 ld1 {v2.16b}, [x20], #16
762 next_tweak v28, v27, v30, v31
763 eor v2.16b, v2.16b, v27.16b
766 ld1 {v3.16b}, [x20], #16
767 next_tweak v29, v28, v30, v31
768 eor v3.16b, v3.16b, v28.16b
771 ld1 {v4.16b}, [x20], #16
772 str q29, [sp, #.Lframe_local_offset]
773 eor v4.16b, v4.16b, v29.16b
774 next_tweak v29, v29, v30, v31
777 ld1 {v5.16b}, [x20], #16
778 str q29, [sp, #.Lframe_local_offset + 16]
779 eor v5.16b, v5.16b, v29.16b
780 next_tweak v29, v29, v30, v31
783 ld1 {v6.16b}, [x20], #16
784 str q29, [sp, #.Lframe_local_offset + 32]
785 eor v6.16b, v6.16b, v29.16b
786 next_tweak v29, v29, v30, v31
789 ld1 {v7.16b}, [x20], #16
790 str q29, [sp, #.Lframe_local_offset + 48]
791 eor v7.16b, v7.16b, v29.16b
792 next_tweak v29, v29, v30, v31
797 ENDPROC(__xts_crypt8)
799 .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
809 0: ldr q30, .Lxts_mul_x
815 ldp q16, q17, [sp, #.Lframe_local_offset]
816 ldp q18, q19, [sp, #.Lframe_local_offset + 32]
818 eor \o0\().16b, \o0\().16b, v25.16b
819 eor \o1\().16b, \o1\().16b, v26.16b
820 eor \o2\().16b, \o2\().16b, v27.16b
821 eor \o3\().16b, \o3\().16b, v28.16b
823 st1 {\o0\().16b}, [x19], #16
826 st1 {\o1\().16b}, [x19], #16
829 st1 {\o2\().16b}, [x19], #16
832 st1 {\o3\().16b}, [x19], #16
836 eor \o4\().16b, \o4\().16b, v16.16b
837 eor \o5\().16b, \o5\().16b, v17.16b
838 eor \o6\().16b, \o6\().16b, v18.16b
839 eor \o7\().16b, \o7\().16b, v19.16b
841 st1 {\o4\().16b}, [x19], #16
843 st1 {\o5\().16b}, [x19], #16
845 st1 {\o6\().16b}, [x19], #16
847 st1 {\o7\().16b}, [x19], #16
855 1: st1 {v25.16b}, [x24]
860 ENTRY(aesbs_xts_encrypt)
861 __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
862 ENDPROC(aesbs_xts_encrypt)
864 ENTRY(aesbs_xts_decrypt)
865 __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
866 ENDPROC(aesbs_xts_decrypt)
873 rev64 \v\().16b, \v\().16b
877 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
878 * int rounds, int blocks, u8 iv[], u8 final[])
880 ENTRY(aesbs_ctr_encrypt)
893 add x23, x23, x26 // do one extra block if final
895 98: ldp x7, x8, [x24]
905 csel x23, x23, xzr, pl
927 lsr x9, x9, x26 // disregard the extra block
930 ld1 {v8.16b}, [x20], #16
931 eor v0.16b, v0.16b, v8.16b
932 st1 {v0.16b}, [x19], #16
935 ld1 {v9.16b}, [x20], #16
936 eor v1.16b, v1.16b, v9.16b
937 st1 {v1.16b}, [x19], #16
940 ld1 {v10.16b}, [x20], #16
941 eor v4.16b, v4.16b, v10.16b
942 st1 {v4.16b}, [x19], #16
945 ld1 {v11.16b}, [x20], #16
946 eor v6.16b, v6.16b, v11.16b
947 st1 {v6.16b}, [x19], #16
950 ld1 {v12.16b}, [x20], #16
951 eor v3.16b, v3.16b, v12.16b
952 st1 {v3.16b}, [x19], #16
955 ld1 {v13.16b}, [x20], #16
956 eor v7.16b, v7.16b, v13.16b
957 st1 {v7.16b}, [x19], #16
960 ld1 {v14.16b}, [x20], #16
961 eor v2.16b, v2.16b, v14.16b
962 st1 {v2.16b}, [x19], #16
965 ld1 {v15.16b}, [x20], #16
966 eor v5.16b, v5.16b, v15.16b
967 st1 {v5.16b}, [x19], #16
981 * If we are handling the tail of the input (x6 != NULL), return the
982 * final keystream block back to the caller.
1008 ENDPROC(aesbs_ctr_encrypt)