1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
8 /* included by aes-ce.S and aes-neon.S */
25 SYM_FUNC_START_LOCAL(aes_encrypt_block4x)
26 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
28 SYM_FUNC_END(aes_encrypt_block4x)
30 SYM_FUNC_START_LOCAL(aes_decrypt_block4x)
31 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
33 SYM_FUNC_END(aes_decrypt_block4x)
36 SYM_FUNC_START_LOCAL(aes_encrypt_block5x)
37 encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
39 SYM_FUNC_END(aes_encrypt_block5x)
41 SYM_FUNC_START_LOCAL(aes_decrypt_block5x)
42 decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
44 SYM_FUNC_END(aes_decrypt_block5x)
48 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
50 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
54 AES_FUNC_START(aes_ecb_encrypt)
57 enc_prepare w3, x2, x5
60 subs w4, w4, #MAX_STRIDE
62 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
63 ST4( bl aes_encrypt_block4x )
64 ST5( ld1 {v4.16b}, [x1], #16 )
65 ST5( bl aes_encrypt_block5x )
66 st1 {v0.16b-v3.16b}, [x0], #64
67 ST5( st1 {v4.16b}, [x0], #16 )
70 adds w4, w4, #MAX_STRIDE
73 ld1 {v0.16b}, [x1], #16 /* get next pt block */
74 encrypt_block v0, w3, x2, x5, w6
75 st1 {v0.16b}, [x0], #16
81 AES_FUNC_END(aes_ecb_encrypt)
84 AES_FUNC_START(aes_ecb_decrypt)
87 dec_prepare w3, x2, x5
90 subs w4, w4, #MAX_STRIDE
92 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
93 ST4( bl aes_decrypt_block4x )
94 ST5( ld1 {v4.16b}, [x1], #16 )
95 ST5( bl aes_decrypt_block5x )
96 st1 {v0.16b-v3.16b}, [x0], #64
97 ST5( st1 {v4.16b}, [x0], #16 )
100 adds w4, w4, #MAX_STRIDE
103 ld1 {v0.16b}, [x1], #16 /* get next ct block */
104 decrypt_block v0, w3, x2, x5, w6
105 st1 {v0.16b}, [x0], #16
111 AES_FUNC_END(aes_ecb_decrypt)
115 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
116 * int blocks, u8 iv[])
117 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
118 * int blocks, u8 iv[])
119 * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[],
120 * int rounds, int blocks, u8 iv[],
122 * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[],
123 * int rounds, int blocks, u8 iv[],
127 AES_FUNC_START(aes_essiv_cbc_encrypt)
128 ld1 {v4.16b}, [x5] /* get iv */
130 mov w8, #14 /* AES-256: 14 rounds */
131 enc_prepare w8, x6, x7
132 encrypt_block v4, w8, x6, x7, w9
133 enc_switch_key w3, x2, x6
136 AES_FUNC_START(aes_cbc_encrypt)
137 ld1 {v4.16b}, [x5] /* get iv */
138 enc_prepare w3, x2, x6
143 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
144 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
145 encrypt_block v0, w3, x2, x6, w7
146 eor v1.16b, v1.16b, v0.16b
147 encrypt_block v1, w3, x2, x6, w7
148 eor v2.16b, v2.16b, v1.16b
149 encrypt_block v2, w3, x2, x6, w7
150 eor v3.16b, v3.16b, v2.16b
151 encrypt_block v3, w3, x2, x6, w7
152 st1 {v0.16b-v3.16b}, [x0], #64
159 ld1 {v0.16b}, [x1], #16 /* get next pt block */
160 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
161 encrypt_block v4, w3, x2, x6, w7
162 st1 {v4.16b}, [x0], #16
166 st1 {v4.16b}, [x5] /* return iv */
168 AES_FUNC_END(aes_cbc_encrypt)
169 AES_FUNC_END(aes_essiv_cbc_encrypt)
171 AES_FUNC_START(aes_essiv_cbc_decrypt)
172 ld1 {cbciv.16b}, [x5] /* get iv */
174 mov w8, #14 /* AES-256: 14 rounds */
175 enc_prepare w8, x6, x7
176 encrypt_block cbciv, w8, x6, x7, w9
179 AES_FUNC_START(aes_cbc_decrypt)
180 ld1 {cbciv.16b}, [x5] /* get iv */
183 dec_prepare w3, x2, x6
186 subs w4, w4, #MAX_STRIDE
188 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
190 ld1 {v4.16b}, [x1], #16 /* get 1 ct block */
194 bl aes_decrypt_block5x
196 eor v0.16b, v0.16b, cbciv.16b
197 eor v1.16b, v1.16b, v5.16b
198 ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */
199 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
200 eor v2.16b, v2.16b, v6.16b
201 eor v3.16b, v3.16b, v7.16b
202 eor v4.16b, v4.16b, v5.16b
207 bl aes_decrypt_block4x
209 eor v0.16b, v0.16b, cbciv.16b
210 eor v1.16b, v1.16b, v4.16b
211 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
212 eor v2.16b, v2.16b, v5.16b
213 eor v3.16b, v3.16b, v6.16b
215 st1 {v0.16b-v3.16b}, [x0], #64
216 ST5( st1 {v4.16b}, [x0], #16 )
219 adds w4, w4, #MAX_STRIDE
222 ld1 {v1.16b}, [x1], #16 /* get next ct block */
223 mov v0.16b, v1.16b /* ...and copy to v0 */
224 decrypt_block v0, w3, x2, x6, w7
225 eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */
226 mov cbciv.16b, v1.16b /* ct is next iv */
227 st1 {v0.16b}, [x0], #16
231 st1 {cbciv.16b}, [x5] /* return iv */
234 AES_FUNC_END(aes_cbc_decrypt)
235 AES_FUNC_END(aes_essiv_cbc_decrypt)
239 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
240 * int rounds, int bytes, u8 const iv[])
241 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
242 * int rounds, int bytes, u8 const iv[])
245 AES_FUNC_START(aes_cbc_cts_encrypt)
246 adr_l x8, .Lcts_permute_table
254 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
257 ld1 {v5.16b}, [x5] /* get iv */
258 enc_prepare w3, x2, x6
260 eor v0.16b, v0.16b, v5.16b /* xor with iv */
261 tbl v1.16b, {v1.16b}, v4.16b
262 encrypt_block v0, w3, x2, x6, w7
264 eor v1.16b, v1.16b, v0.16b
265 tbl v0.16b, {v0.16b}, v3.16b
266 encrypt_block v1, w3, x2, x6, w7
269 st1 {v0.16b}, [x4] /* overlapping stores */
272 AES_FUNC_END(aes_cbc_cts_encrypt)
274 AES_FUNC_START(aes_cbc_cts_decrypt)
275 adr_l x8, .Lcts_permute_table
283 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
286 ld1 {v5.16b}, [x5] /* get iv */
287 dec_prepare w3, x2, x6
289 decrypt_block v0, w3, x2, x6, w7
290 tbl v2.16b, {v0.16b}, v3.16b
291 eor v2.16b, v2.16b, v1.16b
293 tbx v0.16b, {v1.16b}, v4.16b
294 decrypt_block v0, w3, x2, x6, w7
295 eor v0.16b, v0.16b, v5.16b /* xor with iv */
298 st1 {v2.16b}, [x4] /* overlapping stores */
301 AES_FUNC_END(aes_cbc_cts_decrypt)
303 .section ".rodata", "a"
306 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
307 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
308 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
309 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
310 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
311 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
315 * This macro generates the code for CTR and XCTR mode.
317 .macro ctr_encrypt xctr
325 BYTE_CTR_W .req w6 // XCTR only
326 // Intermediate values
327 CTR_W .req w11 // XCTR only
328 CTR .req x11 // XCTR only
335 enc_prepare ROUNDS_W, KEY, IV_PART
339 * Keep 64 bits of the IV in a register. For CTR mode this lets us
340 * easily increment the IV. For XCTR mode this lets us efficiently XOR
341 * the 64-bit counter with the IV.
344 umov IV_PART, vctr.d[0]
345 lsr CTR_W, BYTE_CTR_W, #4
347 umov IV_PART, vctr.d[1]
352 add BLOCKS_W, BYTES_W, #15
353 sub BYTES_W, BYTES_W, #MAX_STRIDE << 4
354 lsr BLOCKS_W, BLOCKS_W, #4
357 csel BLOCKS_W, BLOCKS_W, w8, lt
360 * Set up the counter values in v0-v{MAX_STRIDE-1}.
362 * If we are encrypting less than MAX_STRIDE blocks, the tail block
363 * handling code expects the last keystream block to be in
364 * v{MAX_STRIDE-1}. For example: if encrypting two blocks with
365 * MAX_STRIDE=5, then v3 and v4 should have the next two counter blocks.
370 adds IV_PART, IV_PART, BLOCKS
376 ST5( mov v4.16b, vctr.16b )
378 sub x6, CTR, #MAX_STRIDE - 1
379 sub x7, CTR, #MAX_STRIDE - 2
380 sub x8, CTR, #MAX_STRIDE - 3
381 sub x9, CTR, #MAX_STRIDE - 4
382 ST5( sub x10, CTR, #MAX_STRIDE - 5 )
387 ST5( eor x10, x10, IV_PART )
392 ST5( mov v4.d[0], x10 )
397 * This subsection handles carries.
399 * Conditional branching here is allowed with respect to time
400 * invariance since the branches are dependent on the IV instead
401 * of the plaintext or key. This code is rarely executed in
405 /* Apply carry to outgoing counter. */
406 0: umov x8, vctr.d[0]
413 * Apply carry to counter blocks if needed.
415 * Since the carry flag was set, we know 0 <= IV_PART <
416 * MAX_STRIDE. Using the value of IV_PART we can determine how
417 * many counter blocks need to be updated.
421 sub x16, x16, IV_PART, lsl #3
424 mov v0.d[0], vctr.d[0]
426 mov v1.d[0], vctr.d[0]
428 mov v2.d[0], vctr.d[0]
430 mov v3.d[0], vctr.d[0]
432 ST5( mov v4.d[0], vctr.d[0] )
438 sub x7, IV_PART, #MAX_STRIDE - 1
439 sub x8, IV_PART, #MAX_STRIDE - 2
440 sub x9, IV_PART, #MAX_STRIDE - 3
445 ST5( sub x10, IV_PART, #MAX_STRIDE - 4 )
449 ST5( mov v4.d[1], x10 )
453 * If there are at least MAX_STRIDE blocks left, XOR the data with
454 * keystream and store. Otherwise jump to tail handling.
456 tbnz BYTES_W, #31, .Lctrtail\xctr
457 ld1 {v5.16b-v7.16b}, [IN], #48
458 ST4( bl aes_encrypt_block4x )
459 ST5( bl aes_encrypt_block5x )
460 eor v0.16b, v5.16b, v0.16b
461 ST4( ld1 {v5.16b}, [IN], #16 )
462 eor v1.16b, v6.16b, v1.16b
463 ST5( ld1 {v5.16b-v6.16b}, [IN], #32 )
464 eor v2.16b, v7.16b, v2.16b
465 eor v3.16b, v5.16b, v3.16b
466 ST5( eor v4.16b, v6.16b, v4.16b )
467 st1 {v0.16b-v3.16b}, [OUT], #64
468 ST5( st1 {v4.16b}, [OUT], #16 )
469 cbz BYTES_W, .Lctrout\xctr
474 st1 {vctr.16b}, [IV] /* return next CTR value */
481 * Handle up to MAX_STRIDE * 16 - 1 bytes of plaintext
483 * This code expects the last keystream block to be in v{MAX_STRIDE-1}.
484 * For example: if encrypting two blocks with MAX_STRIDE=5, then v3 and
485 * v4 should have the next two counter blocks.
487 * This allows us to store the ciphertext by writing to overlapping
488 * regions of memory. Any invalid ciphertext blocks get overwritten by
489 * correctly computed blocks. This approach greatly simplifies the
490 * logic for storing the ciphertext.
493 ands w7, BYTES_W, #0xf
494 csel x13, x7, x16, ne
496 ST5( cmp BYTES_W, #64 - (MAX_STRIDE << 4))
497 ST5( csel x14, x16, xzr, gt )
498 cmp BYTES_W, #48 - (MAX_STRIDE << 4)
499 csel x15, x16, xzr, gt
500 cmp BYTES_W, #32 - (MAX_STRIDE << 4)
501 csel x16, x16, xzr, gt
502 cmp BYTES_W, #16 - (MAX_STRIDE << 4)
504 adr_l x9, .Lcts_permute_table
508 ST5( ld1 {v5.16b}, [IN], x14 )
509 ld1 {v6.16b}, [IN], x15
510 ld1 {v7.16b}, [IN], x16
512 ST4( bl aes_encrypt_block4x )
513 ST5( bl aes_encrypt_block5x )
515 ld1 {v8.16b}, [IN], x13
519 ST4( eor v6.16b, v6.16b, v0.16b )
520 ST4( eor v7.16b, v7.16b, v1.16b )
521 ST4( tbl v3.16b, {v3.16b}, v10.16b )
522 ST4( eor v8.16b, v8.16b, v2.16b )
523 ST4( eor v9.16b, v9.16b, v3.16b )
525 ST5( eor v5.16b, v5.16b, v0.16b )
526 ST5( eor v6.16b, v6.16b, v1.16b )
527 ST5( tbl v4.16b, {v4.16b}, v10.16b )
528 ST5( eor v7.16b, v7.16b, v2.16b )
529 ST5( eor v8.16b, v8.16b, v3.16b )
530 ST5( eor v9.16b, v9.16b, v4.16b )
532 ST5( st1 {v5.16b}, [OUT], x14 )
533 st1 {v6.16b}, [OUT], x15
534 st1 {v7.16b}, [OUT], x16
536 st1 {v9.16b}, [x13] // overlapping stores
542 * Handle <= 16 bytes of plaintext
544 * This code always reads and writes 16 bytes. To avoid out of bounds
545 * accesses, XCTR and CTR modes must use a temporary buffer when
546 * encrypting/decrypting less than 16 bytes.
548 * This code is unusual in that it loads the input and stores the output
549 * relative to the end of the buffers rather than relative to the start.
550 * This causes unusual behaviour when encrypting/decrypting less than 16
551 * bytes; the end of the data is expected to be at the end of the
552 * temporary buffer rather than the start of the data being at the start
553 * of the temporary buffer.
561 ST5( mov v3.16b, v4.16b )
562 encrypt_block v3, ROUNDS_W, KEY, x8, w7
563 ld1 {v10.16b-v11.16b}, [x9]
564 tbl v3.16b, {v3.16b}, v10.16b
565 sshr v11.16b, v11.16b, #7
566 eor v5.16b, v5.16b, v3.16b
567 bif v5.16b, v6.16b, v11.16b
578 .unreq BYTE_CTR_W // XCTR only
579 // Intermediate values
580 .unreq CTR_W // XCTR only
581 .unreq CTR // XCTR only
588 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
589 * int bytes, u8 ctr[])
591 * The input and output buffers must always be at least 16 bytes even if
592 * encrypting/decrypting less than 16 bytes. Otherwise out of bounds
593 * accesses will occur. The data to be encrypted/decrypted is expected
594 * to be at the end of this 16-byte temporary buffer rather than the
598 AES_FUNC_START(aes_ctr_encrypt)
600 AES_FUNC_END(aes_ctr_encrypt)
603 * aes_xctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
604 * int bytes, u8 const iv[], int byte_ctr)
606 * The input and output buffers must always be at least 16 bytes even if
607 * encrypting/decrypting less than 16 bytes. Otherwise out of bounds
608 * accesses will occur. The data to be encrypted/decrypted is expected
609 * to be at the end of this 16-byte temporary buffer rather than the
613 AES_FUNC_START(aes_xctr_encrypt)
615 AES_FUNC_END(aes_xctr_encrypt)
619 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
620 * int bytes, u8 const rk2[], u8 iv[], int first)
621 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
622 * int bytes, u8 const rk2[], u8 iv[], int first)
625 .macro next_tweak, out, in, tmp
626 sshr \tmp\().2d, \in\().2d, #63
627 and \tmp\().16b, \tmp\().16b, xtsmask.16b
628 add \out\().2d, \in\().2d, \in\().2d
629 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
630 eor \out\().16b, \out\().16b, \tmp\().16b
633 .macro xts_load_mask, tmp
634 movi xtsmask.2s, #0x1
635 movi \tmp\().2s, #0x87
636 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
639 AES_FUNC_START(aes_xts_encrypt)
644 cbz w7, .Lxtsencnotfirst
646 enc_prepare w3, x5, x8
647 xts_cts_skip_tw w7, .LxtsencNx
648 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
649 enc_switch_key w3, x2, x8
653 enc_prepare w3, x2, x8
655 next_tweak v4, v4, v8
659 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
660 next_tweak v5, v4, v8
661 eor v0.16b, v0.16b, v4.16b
662 next_tweak v6, v5, v8
663 eor v1.16b, v1.16b, v5.16b
664 eor v2.16b, v2.16b, v6.16b
665 next_tweak v7, v6, v8
666 eor v3.16b, v3.16b, v7.16b
667 bl aes_encrypt_block4x
668 eor v3.16b, v3.16b, v7.16b
669 eor v0.16b, v0.16b, v4.16b
670 eor v1.16b, v1.16b, v5.16b
671 eor v2.16b, v2.16b, v6.16b
672 st1 {v0.16b-v3.16b}, [x0], #64
683 ld1 {v0.16b}, [x1], #16
685 eor v0.16b, v0.16b, v4.16b
686 encrypt_block v0, w3, x2, x8, w7
687 eor v0.16b, v0.16b, v4.16b
690 next_tweak v4, v4, v8
692 st1 {v0.16b}, [x0], #16
705 adr_l x8, .Lcts_permute_table
707 add x1, x1, w4, sxtw /* rewind input pointer */
708 add w4, w4, #16 /* # bytes in final block */
712 add x4, x0, x4 /* output address of final block */
714 ld1 {v1.16b}, [x1] /* load final block */
718 tbl v2.16b, {v0.16b}, v2.16b
719 tbx v0.16b, {v1.16b}, v3.16b
720 st1 {v2.16b}, [x4] /* overlapping stores */
723 AES_FUNC_END(aes_xts_encrypt)
725 AES_FUNC_START(aes_xts_decrypt)
728 /* subtract 16 bytes if we are doing CTS */
735 xts_cts_skip_tw w7, .Lxtsdecskiptw
736 cbz w7, .Lxtsdecnotfirst
738 enc_prepare w3, x5, x8
739 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
741 dec_prepare w3, x2, x8
745 dec_prepare w3, x2, x8
747 next_tweak v4, v4, v8
751 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
752 next_tweak v5, v4, v8
753 eor v0.16b, v0.16b, v4.16b
754 next_tweak v6, v5, v8
755 eor v1.16b, v1.16b, v5.16b
756 eor v2.16b, v2.16b, v6.16b
757 next_tweak v7, v6, v8
758 eor v3.16b, v3.16b, v7.16b
759 bl aes_decrypt_block4x
760 eor v3.16b, v3.16b, v7.16b
761 eor v0.16b, v0.16b, v4.16b
762 eor v1.16b, v1.16b, v5.16b
763 eor v2.16b, v2.16b, v6.16b
764 st1 {v0.16b-v3.16b}, [x0], #64
774 ld1 {v0.16b}, [x1], #16
777 eor v0.16b, v0.16b, v4.16b
778 decrypt_block v0, w3, x2, x8, w7
779 eor v0.16b, v0.16b, v4.16b
780 st1 {v0.16b}, [x0], #16
783 next_tweak v4, v4, v8
791 adr_l x8, .Lcts_permute_table
793 add x1, x1, w4, sxtw /* rewind input pointer */
794 add w4, w4, #16 /* # bytes in final block */
798 add x4, x0, x4 /* output address of final block */
800 next_tweak v5, v4, v8
802 ld1 {v1.16b}, [x1] /* load final block */
806 eor v0.16b, v0.16b, v5.16b
807 decrypt_block v0, w3, x2, x8, w7
808 eor v0.16b, v0.16b, v5.16b
810 tbl v2.16b, {v0.16b}, v2.16b
811 tbx v0.16b, {v1.16b}, v3.16b
813 st1 {v2.16b}, [x4] /* overlapping stores */
816 AES_FUNC_END(aes_xts_decrypt)
819 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
820 * int blocks, u8 dg[], int enc_before, int enc_after)
822 AES_FUNC_START(aes_mac_update)
823 ld1 {v0.16b}, [x4] /* get dg */
824 enc_prepare w2, x1, x7
827 encrypt_block v0, w2, x1, x7, w8
832 ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */
833 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
834 encrypt_block v0, w2, x1, x7, w8
835 eor v0.16b, v0.16b, v2.16b
836 encrypt_block v0, w2, x1, x7, w8
837 eor v0.16b, v0.16b, v3.16b
838 encrypt_block v0, w2, x1, x7, w8
839 eor v0.16b, v0.16b, v4.16b
841 csinv x5, x6, xzr, eq
843 encrypt_block v0, w2, x1, x7, w8
844 st1 {v0.16b}, [x4] /* return dg */
845 cond_yield .Lmacout, x7, x8
851 ld1 {v1.16b}, [x0], #16 /* get next pt block */
852 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
855 csinv x5, x6, xzr, eq
859 encrypt_block v0, w2, x1, x7, w8
863 st1 {v0.16b}, [x4] /* return dg */
866 AES_FUNC_END(aes_mac_update)