2 * Bit sliced AES using NEON instructions
4 * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
12 * The algorithm implemented here is described in detail by the paper
13 * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
14 * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
16 * This implementation is based primarily on the OpenSSL implementation
17 * for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
20 #include <linux/linkage.h>
21 #include <asm/assembler.h>
28 .macro in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
44 .macro out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
58 .macro inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
73 .macro inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
89 .macro mul_gf4, x0, x1, y0, y1, t0, t1
99 .macro mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
116 .macro mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
117 y0, y1, y2, y3, t0, t1, t2, t3
120 mul_gf4 \x0, \x1, \y0, \y1, \t2, \t3
123 mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
130 mul_gf4_n_gf4 \t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
133 mul_gf4 \x4, \x5, \y0, \y1, \t2, \t3
140 .macro inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
141 t0, t1, t2, t3, s0, s1, s2, s3
188 mul_gf16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
189 \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
192 .macro sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
193 t0, t1, t2, t3, s0, s1, s2, s3
194 in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
195 \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
196 inv_gf256 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
197 \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
198 \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
199 \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
200 out_bs_ch \b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
201 \b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
204 .macro inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
205 t0, t1, t2, t3, s0, s1, s2, s3
206 inv_in_bs_ch \b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
207 \b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
208 inv_gf256 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
209 \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
210 \t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
211 \s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
212 inv_out_bs_ch \b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
213 \b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
217 ldp q16, q17, [bskey], #128
218 ldp q18, q19, [bskey, #-96]
219 ldp q20, q21, [bskey, #-64]
220 ldp q22, q23, [bskey, #-32]
224 ldp q16, q17, [bskey, #-128]!
225 ldp q18, q19, [bskey, #32]
226 ldp q20, q21, [bskey, #64]
227 ldp q22, q23, [bskey, #96]
230 .macro add_round_key, x0, x1, x2, x3, x4, x5, x6, x7
231 eor \x0\().16b, \x0\().16b, v16.16b
232 eor \x1\().16b, \x1\().16b, v17.16b
233 eor \x2\().16b, \x2\().16b, v18.16b
234 eor \x3\().16b, \x3\().16b, v19.16b
235 eor \x4\().16b, \x4\().16b, v20.16b
236 eor \x5\().16b, \x5\().16b, v21.16b
237 eor \x6\().16b, \x6\().16b, v22.16b
238 eor \x7\().16b, \x7\().16b, v23.16b
241 .macro shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask
242 tbl \x0\().16b, {\x0\().16b}, \mask\().16b
243 tbl \x1\().16b, {\x1\().16b}, \mask\().16b
244 tbl \x2\().16b, {\x2\().16b}, \mask\().16b
245 tbl \x3\().16b, {\x3\().16b}, \mask\().16b
246 tbl \x4\().16b, {\x4\().16b}, \mask\().16b
247 tbl \x5\().16b, {\x5\().16b}, \mask\().16b
248 tbl \x6\().16b, {\x6\().16b}, \mask\().16b
249 tbl \x7\().16b, {\x7\().16b}, \mask\().16b
252 .macro mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
253 t0, t1, t2, t3, t4, t5, t6, t7, inv
254 ext \t0\().16b, \x0\().16b, \x0\().16b, #12
255 ext \t1\().16b, \x1\().16b, \x1\().16b, #12
256 eor \x0\().16b, \x0\().16b, \t0\().16b
257 ext \t2\().16b, \x2\().16b, \x2\().16b, #12
258 eor \x1\().16b, \x1\().16b, \t1\().16b
259 ext \t3\().16b, \x3\().16b, \x3\().16b, #12
260 eor \x2\().16b, \x2\().16b, \t2\().16b
261 ext \t4\().16b, \x4\().16b, \x4\().16b, #12
262 eor \x3\().16b, \x3\().16b, \t3\().16b
263 ext \t5\().16b, \x5\().16b, \x5\().16b, #12
264 eor \x4\().16b, \x4\().16b, \t4\().16b
265 ext \t6\().16b, \x6\().16b, \x6\().16b, #12
266 eor \x5\().16b, \x5\().16b, \t5\().16b
267 ext \t7\().16b, \x7\().16b, \x7\().16b, #12
268 eor \x6\().16b, \x6\().16b, \t6\().16b
269 eor \t1\().16b, \t1\().16b, \x0\().16b
270 eor \x7\().16b, \x7\().16b, \t7\().16b
271 ext \x0\().16b, \x0\().16b, \x0\().16b, #8
272 eor \t2\().16b, \t2\().16b, \x1\().16b
273 eor \t0\().16b, \t0\().16b, \x7\().16b
274 eor \t1\().16b, \t1\().16b, \x7\().16b
275 ext \x1\().16b, \x1\().16b, \x1\().16b, #8
276 eor \t5\().16b, \t5\().16b, \x4\().16b
277 eor \x0\().16b, \x0\().16b, \t0\().16b
278 eor \t6\().16b, \t6\().16b, \x5\().16b
279 eor \x1\().16b, \x1\().16b, \t1\().16b
280 ext \t0\().16b, \x4\().16b, \x4\().16b, #8
281 eor \t4\().16b, \t4\().16b, \x3\().16b
282 ext \t1\().16b, \x5\().16b, \x5\().16b, #8
283 eor \t7\().16b, \t7\().16b, \x6\().16b
284 ext \x4\().16b, \x3\().16b, \x3\().16b, #8
285 eor \t3\().16b, \t3\().16b, \x2\().16b
286 ext \x5\().16b, \x7\().16b, \x7\().16b, #8
287 eor \t4\().16b, \t4\().16b, \x7\().16b
288 ext \x3\().16b, \x6\().16b, \x6\().16b, #8
289 eor \t3\().16b, \t3\().16b, \x7\().16b
290 ext \x6\().16b, \x2\().16b, \x2\().16b, #8
291 eor \x7\().16b, \t1\().16b, \t5\().16b
293 eor \x2\().16b, \t0\().16b, \t4\().16b
294 eor \x4\().16b, \x4\().16b, \t3\().16b
295 eor \x5\().16b, \x5\().16b, \t7\().16b
296 eor \x3\().16b, \x3\().16b, \t6\().16b
297 eor \x6\().16b, \x6\().16b, \t2\().16b
299 eor \t3\().16b, \t3\().16b, \x4\().16b
300 eor \x5\().16b, \x5\().16b, \t7\().16b
301 eor \x2\().16b, \x3\().16b, \t6\().16b
302 eor \x3\().16b, \t0\().16b, \t4\().16b
303 eor \x4\().16b, \x6\().16b, \t2\().16b
304 mov \x6\().16b, \t3\().16b
308 .macro inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
309 t0, t1, t2, t3, t4, t5, t6, t7
310 ext \t0\().16b, \x0\().16b, \x0\().16b, #8
311 ext \t6\().16b, \x6\().16b, \x6\().16b, #8
312 ext \t7\().16b, \x7\().16b, \x7\().16b, #8
313 eor \t0\().16b, \t0\().16b, \x0\().16b
314 ext \t1\().16b, \x1\().16b, \x1\().16b, #8
315 eor \t6\().16b, \t6\().16b, \x6\().16b
316 ext \t2\().16b, \x2\().16b, \x2\().16b, #8
317 eor \t7\().16b, \t7\().16b, \x7\().16b
318 ext \t3\().16b, \x3\().16b, \x3\().16b, #8
319 eor \t1\().16b, \t1\().16b, \x1\().16b
320 ext \t4\().16b, \x4\().16b, \x4\().16b, #8
321 eor \t2\().16b, \t2\().16b, \x2\().16b
322 ext \t5\().16b, \x5\().16b, \x5\().16b, #8
323 eor \t3\().16b, \t3\().16b, \x3\().16b
324 eor \t4\().16b, \t4\().16b, \x4\().16b
325 eor \t5\().16b, \t5\().16b, \x5\().16b
326 eor \x0\().16b, \x0\().16b, \t6\().16b
327 eor \x1\().16b, \x1\().16b, \t6\().16b
328 eor \x2\().16b, \x2\().16b, \t0\().16b
329 eor \x4\().16b, \x4\().16b, \t2\().16b
330 eor \x3\().16b, \x3\().16b, \t1\().16b
331 eor \x1\().16b, \x1\().16b, \t7\().16b
332 eor \x2\().16b, \x2\().16b, \t7\().16b
333 eor \x4\().16b, \x4\().16b, \t6\().16b
334 eor \x5\().16b, \x5\().16b, \t3\().16b
335 eor \x3\().16b, \x3\().16b, \t6\().16b
336 eor \x6\().16b, \x6\().16b, \t4\().16b
337 eor \x4\().16b, \x4\().16b, \t7\().16b
338 eor \x5\().16b, \x5\().16b, \t7\().16b
339 eor \x7\().16b, \x7\().16b, \t5\().16b
340 mix_cols \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
341 \t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
344 .macro swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
345 ushr \t0\().2d, \b0\().2d, #\n
346 ushr \t1\().2d, \b1\().2d, #\n
347 eor \t0\().16b, \t0\().16b, \a0\().16b
348 eor \t1\().16b, \t1\().16b, \a1\().16b
349 and \t0\().16b, \t0\().16b, \mask\().16b
350 and \t1\().16b, \t1\().16b, \mask\().16b
351 eor \a0\().16b, \a0\().16b, \t0\().16b
352 shl \t0\().2d, \t0\().2d, #\n
353 eor \a1\().16b, \a1\().16b, \t1\().16b
354 shl \t1\().2d, \t1\().2d, #\n
355 eor \b0\().16b, \b0\().16b, \t0\().16b
356 eor \b1\().16b, \b1\().16b, \t1\().16b
359 .macro bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
360 movi \t0\().16b, #0x55
361 movi \t1\().16b, #0x33
362 swapmove_2x \x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
363 swapmove_2x \x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
364 movi \t0\().16b, #0x0f
365 swapmove_2x \x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
366 swapmove_2x \x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
367 swapmove_2x \x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
368 swapmove_2x \x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
373 M0: .octa 0x0004080c0105090d02060a0e03070b0f
375 M0SR: .octa 0x0004080c05090d010a0e02060f03070b
376 SR: .octa 0x0f0e0d0c0a09080b0504070600030201
377 SRM0: .octa 0x01060b0c0207080d0304090e00050a0f
379 M0ISR: .octa 0x0004080c0d0105090a0e0206070b0f03
380 ISR: .octa 0x0f0e0d0c080b0a090504070602010003
381 ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f
384 * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
386 ENTRY(aesbs_convert_key)
387 ld1 {v7.4s}, [x1], #16 // load round 0 key
388 ld1 {v17.4s}, [x1], #16 // load round 1 key
390 movi v8.16b, #0x01 // bit masks
401 str q7, [x0], #16 // save round 0 key
404 tbl v7.16b ,{v17.16b}, v16.16b
405 ld1 {v17.4s}, [x1], #16 // load next round key
407 cmtst v0.16b, v7.16b, v8.16b
408 cmtst v1.16b, v7.16b, v9.16b
409 cmtst v2.16b, v7.16b, v10.16b
410 cmtst v3.16b, v7.16b, v11.16b
411 cmtst v4.16b, v7.16b, v12.16b
412 cmtst v5.16b, v7.16b, v13.16b
413 cmtst v6.16b, v7.16b, v14.16b
414 cmtst v7.16b, v7.16b, v15.16b
421 stp q0, q1, [x0], #128
422 stp q2, q3, [x0, #-96]
423 stp q4, q5, [x0, #-64]
424 stp q6, q7, [x0, #-32]
427 movi v7.16b, #0x63 // compose .L63
428 eor v17.16b, v17.16b, v7.16b
431 ENDPROC(aesbs_convert_key)
435 ldr q9, [bskey], #16 // round 0 key
439 eor v10.16b, v0.16b, v9.16b // xor with round0 key
440 eor v11.16b, v1.16b, v9.16b
441 tbl v0.16b, {v10.16b}, v8.16b
442 eor v12.16b, v2.16b, v9.16b
443 tbl v1.16b, {v11.16b}, v8.16b
444 eor v13.16b, v3.16b, v9.16b
445 tbl v2.16b, {v12.16b}, v8.16b
446 eor v14.16b, v4.16b, v9.16b
447 tbl v3.16b, {v13.16b}, v8.16b
448 eor v15.16b, v5.16b, v9.16b
449 tbl v4.16b, {v14.16b}, v8.16b
450 eor v10.16b, v6.16b, v9.16b
451 tbl v5.16b, {v15.16b}, v8.16b
452 eor v11.16b, v7.16b, v9.16b
453 tbl v6.16b, {v10.16b}, v8.16b
454 tbl v7.16b, {v11.16b}, v8.16b
456 bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
458 sub rounds, rounds, #1
462 shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
464 sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
466 subs rounds, rounds, #1
471 mix_cols v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
474 add_round_key v0, v1, v2, v3, v4, v5, v6, v7
481 ldr q12, [bskey] // last round key
483 bitslice v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
485 eor v0.16b, v0.16b, v12.16b
486 eor v1.16b, v1.16b, v12.16b
487 eor v4.16b, v4.16b, v12.16b
488 eor v6.16b, v6.16b, v12.16b
489 eor v3.16b, v3.16b, v12.16b
490 eor v7.16b, v7.16b, v12.16b
491 eor v2.16b, v2.16b, v12.16b
492 eor v5.16b, v5.16b, v12.16b
494 ENDPROC(aesbs_encrypt8)
501 ldr q9, [bskey, #-112]! // round 0 key
505 eor v10.16b, v0.16b, v9.16b // xor with round0 key
506 eor v11.16b, v1.16b, v9.16b
507 tbl v0.16b, {v10.16b}, v8.16b
508 eor v12.16b, v2.16b, v9.16b
509 tbl v1.16b, {v11.16b}, v8.16b
510 eor v13.16b, v3.16b, v9.16b
511 tbl v2.16b, {v12.16b}, v8.16b
512 eor v14.16b, v4.16b, v9.16b
513 tbl v3.16b, {v13.16b}, v8.16b
514 eor v15.16b, v5.16b, v9.16b
515 tbl v4.16b, {v14.16b}, v8.16b
516 eor v10.16b, v6.16b, v9.16b
517 tbl v5.16b, {v15.16b}, v8.16b
518 eor v11.16b, v7.16b, v9.16b
519 tbl v6.16b, {v10.16b}, v8.16b
520 tbl v7.16b, {v11.16b}, v8.16b
522 bitslice v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
524 sub rounds, rounds, #1
528 shift_rows v0, v1, v2, v3, v4, v5, v6, v7, v24
530 inv_sbox v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
532 subs rounds, rounds, #1
537 add_round_key v0, v1, v6, v4, v2, v7, v3, v5
539 inv_mix_cols v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \
546 ldr q12, [bskey, #-16] // last round key
548 bitslice v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11
550 eor v0.16b, v0.16b, v12.16b
551 eor v1.16b, v1.16b, v12.16b
552 eor v6.16b, v6.16b, v12.16b
553 eor v4.16b, v4.16b, v12.16b
554 eor v2.16b, v2.16b, v12.16b
555 eor v7.16b, v7.16b, v12.16b
556 eor v3.16b, v3.16b, v12.16b
557 eor v5.16b, v5.16b, v12.16b
559 ENDPROC(aesbs_decrypt8)
562 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
564 * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
567 .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
568 stp x29, x30, [sp, #-16]!
577 ld1 {v0.16b}, [x1], #16
579 ld1 {v1.16b}, [x1], #16
581 ld1 {v2.16b}, [x1], #16
583 ld1 {v3.16b}, [x1], #16
585 ld1 {v4.16b}, [x1], #16
587 ld1 {v5.16b}, [x1], #16
589 ld1 {v6.16b}, [x1], #16
591 ld1 {v7.16b}, [x1], #16
597 st1 {\o0\().16b}, [x0], #16
599 st1 {\o1\().16b}, [x0], #16
601 st1 {\o2\().16b}, [x0], #16
603 st1 {\o3\().16b}, [x0], #16
605 st1 {\o4\().16b}, [x0], #16
607 st1 {\o5\().16b}, [x0], #16
609 st1 {\o6\().16b}, [x0], #16
611 st1 {\o7\().16b}, [x0], #16
615 1: ldp x29, x30, [sp], #16
620 ENTRY(aesbs_ecb_encrypt)
621 __ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
622 ENDPROC(aesbs_ecb_encrypt)
625 ENTRY(aesbs_ecb_decrypt)
626 __ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
627 ENDPROC(aesbs_ecb_decrypt)
630 * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
631 * int blocks, u8 iv[])
634 ENTRY(aesbs_cbc_decrypt)
635 stp x29, x30, [sp, #-16]!
644 ld1 {v0.16b}, [x1], #16
647 ld1 {v1.16b}, [x1], #16
650 ld1 {v2.16b}, [x1], #16
653 ld1 {v3.16b}, [x1], #16
656 ld1 {v4.16b}, [x1], #16
659 ld1 {v5.16b}, [x1], #16
662 ld1 {v6.16b}, [x1], #16
671 ld1 {v24.16b}, [x5] // load IV
673 eor v1.16b, v1.16b, v25.16b
674 eor v6.16b, v6.16b, v26.16b
675 eor v4.16b, v4.16b, v27.16b
676 eor v2.16b, v2.16b, v28.16b
677 eor v7.16b, v7.16b, v29.16b
678 eor v0.16b, v0.16b, v24.16b
679 eor v3.16b, v3.16b, v30.16b
680 eor v5.16b, v5.16b, v31.16b
682 st1 {v0.16b}, [x0], #16
685 st1 {v1.16b}, [x0], #16
688 st1 {v6.16b}, [x0], #16
691 st1 {v4.16b}, [x0], #16
694 st1 {v2.16b}, [x0], #16
697 st1 {v7.16b}, [x0], #16
700 st1 {v3.16b}, [x0], #16
703 ld1 {v24.16b}, [x1], #16
704 st1 {v5.16b}, [x0], #16
705 1: st1 {v24.16b}, [x5] // store IV
709 ldp x29, x30, [sp], #16
711 ENDPROC(aesbs_cbc_decrypt)
713 .macro next_tweak, out, in, const, tmp
714 sshr \tmp\().2d, \in\().2d, #63
715 and \tmp\().16b, \tmp\().16b, \const\().16b
716 add \out\().2d, \in\().2d, \in\().2d
717 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
718 eor \out\().16b, \out\().16b, \tmp\().16b
723 CPU_LE( .quad 1, 0x87 )
724 CPU_BE( .quad 0x87, 1 )
727 * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
728 * int blocks, u8 iv[])
729 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
730 * int blocks, u8 iv[])
739 ld1 {v0.16b}, [x1], #16
740 next_tweak v26, v25, v30, v31
741 eor v0.16b, v0.16b, v25.16b
744 ld1 {v1.16b}, [x1], #16
745 next_tweak v27, v26, v30, v31
746 eor v1.16b, v1.16b, v26.16b
749 ld1 {v2.16b}, [x1], #16
750 next_tweak v28, v27, v30, v31
751 eor v2.16b, v2.16b, v27.16b
754 ld1 {v3.16b}, [x1], #16
755 next_tweak v29, v28, v30, v31
756 eor v3.16b, v3.16b, v28.16b
759 ld1 {v4.16b}, [x1], #16
761 eor v4.16b, v4.16b, v29.16b
762 next_tweak v29, v29, v30, v31
765 ld1 {v5.16b}, [x1], #16
767 eor v5.16b, v5.16b, v29.16b
768 next_tweak v29, v29, v30, v31
771 ld1 {v6.16b}, [x1], #16
773 eor v6.16b, v6.16b, v29.16b
774 next_tweak v29, v29, v30, v31
777 ld1 {v7.16b}, [x1], #16
779 eor v7.16b, v7.16b, v29.16b
780 next_tweak v29, v29, v30, v31
785 ENDPROC(__xts_crypt8)
787 .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
788 stp x29, x30, [sp, #-80]!
797 ldp q16, q17, [sp, #16]
798 ldp q18, q19, [sp, #48]
800 eor \o0\().16b, \o0\().16b, v25.16b
801 eor \o1\().16b, \o1\().16b, v26.16b
802 eor \o2\().16b, \o2\().16b, v27.16b
803 eor \o3\().16b, \o3\().16b, v28.16b
805 st1 {\o0\().16b}, [x0], #16
808 st1 {\o1\().16b}, [x0], #16
811 st1 {\o2\().16b}, [x0], #16
814 st1 {\o3\().16b}, [x0], #16
818 eor \o4\().16b, \o4\().16b, v16.16b
819 eor \o5\().16b, \o5\().16b, v17.16b
820 eor \o6\().16b, \o6\().16b, v18.16b
821 eor \o7\().16b, \o7\().16b, v19.16b
823 st1 {\o4\().16b}, [x0], #16
825 st1 {\o5\().16b}, [x0], #16
827 st1 {\o6\().16b}, [x0], #16
829 st1 {\o7\().16b}, [x0], #16
833 1: st1 {v25.16b}, [x5]
834 ldp x29, x30, [sp], #80
838 ENTRY(aesbs_xts_encrypt)
839 __xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
840 ENDPROC(aesbs_xts_encrypt)
842 ENTRY(aesbs_xts_decrypt)
843 __xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
844 ENDPROC(aesbs_xts_decrypt)
851 rev64 \v\().16b, \v\().16b
855 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
856 * int rounds, int blocks, u8 iv[], u8 final[])
858 ENTRY(aesbs_ctr_encrypt)
859 stp x29, x30, [sp, #-16]!
864 add x4, x4, x10 // do one extra block if final
898 lsr x9, x9, x10 // disregard the extra block
901 ld1 {v8.16b}, [x1], #16
902 eor v0.16b, v0.16b, v8.16b
903 st1 {v0.16b}, [x0], #16
906 ld1 {v9.16b}, [x1], #16
907 eor v1.16b, v1.16b, v9.16b
908 st1 {v1.16b}, [x0], #16
911 ld1 {v10.16b}, [x1], #16
912 eor v4.16b, v4.16b, v10.16b
913 st1 {v4.16b}, [x0], #16
916 ld1 {v11.16b}, [x1], #16
917 eor v6.16b, v6.16b, v11.16b
918 st1 {v6.16b}, [x0], #16
921 ld1 {v12.16b}, [x1], #16
922 eor v3.16b, v3.16b, v12.16b
923 st1 {v3.16b}, [x0], #16
926 ld1 {v13.16b}, [x1], #16
927 eor v7.16b, v7.16b, v13.16b
928 st1 {v7.16b}, [x0], #16
931 ld1 {v14.16b}, [x1], #16
932 eor v2.16b, v2.16b, v14.16b
933 st1 {v2.16b}, [x0], #16
936 ld1 {v15.16b}, [x1], #16
937 eor v5.16b, v5.16b, v15.16b
938 st1 {v5.16b}, [x0], #16
943 0: st1 {v0.16b}, [x5]
944 ldp x29, x30, [sp], #16
948 * If we are handling the tail of the input (x6 != NULL), return the
949 * final keystream block back to the caller.
972 ENDPROC(aesbs_ctr_encrypt)