1 #if defined(__aarch64__)
2 #include <openssl/arm_arch.h>
4 #if __ARM_MAX_ARCH__>=7
6 #if !defined(__clang__)
11 .long 0x01,0x01,0x01,0x01
12 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
13 .long 0x1b,0x1b,0x1b,0x1b
15 .globl aes_v8_set_encrypt_key
16 .type aes_v8_set_encrypt_key,%function
18 aes_v8_set_encrypt_key:
20 stp x29,x30,[sp,#-16]!
38 eor v0.16b,v0.16b,v0.16b
41 ld1 {v1.4s,v2.4s},[x3],#32
49 tbl v6.16b,{v3.16b},v2.16b
50 ext v5.16b,v0.16b,v3.16b,#12
55 eor v3.16b,v3.16b,v5.16b
56 ext v5.16b,v0.16b,v5.16b,#12
57 eor v3.16b,v3.16b,v5.16b
58 ext v5.16b,v0.16b,v5.16b,#12
59 eor v6.16b,v6.16b,v1.16b
60 eor v3.16b,v3.16b,v5.16b
62 eor v3.16b,v3.16b,v6.16b
67 tbl v6.16b,{v3.16b},v2.16b
68 ext v5.16b,v0.16b,v3.16b,#12
72 eor v3.16b,v3.16b,v5.16b
73 ext v5.16b,v0.16b,v5.16b,#12
74 eor v3.16b,v3.16b,v5.16b
75 ext v5.16b,v0.16b,v5.16b,#12
76 eor v6.16b,v6.16b,v1.16b
77 eor v3.16b,v3.16b,v5.16b
79 eor v3.16b,v3.16b,v6.16b
81 tbl v6.16b,{v3.16b},v2.16b
82 ext v5.16b,v0.16b,v3.16b,#12
86 eor v3.16b,v3.16b,v5.16b
87 ext v5.16b,v0.16b,v5.16b,#12
88 eor v3.16b,v3.16b,v5.16b
89 ext v5.16b,v0.16b,v5.16b,#12
90 eor v6.16b,v6.16b,v1.16b
91 eor v3.16b,v3.16b,v5.16b
92 eor v3.16b,v3.16b,v6.16b
102 movi v6.16b,#8 // borrow v6.16b
104 sub v2.16b,v2.16b,v6.16b // adjust the mask
107 tbl v6.16b,{v4.16b},v2.16b
108 ext v5.16b,v0.16b,v3.16b,#12
113 eor v3.16b,v3.16b,v5.16b
114 ext v5.16b,v0.16b,v5.16b,#12
115 eor v3.16b,v3.16b,v5.16b
116 ext v5.16b,v0.16b,v5.16b,#12
117 eor v3.16b,v3.16b,v5.16b
120 eor v5.16b,v5.16b,v4.16b
121 eor v6.16b,v6.16b,v1.16b
122 ext v4.16b,v0.16b,v4.16b,#12
124 eor v4.16b,v4.16b,v5.16b
125 eor v3.16b,v3.16b,v6.16b
126 eor v4.16b,v4.16b,v6.16b
142 tbl v6.16b,{v4.16b},v2.16b
143 ext v5.16b,v0.16b,v3.16b,#12
148 eor v3.16b,v3.16b,v5.16b
149 ext v5.16b,v0.16b,v5.16b,#12
150 eor v3.16b,v3.16b,v5.16b
151 ext v5.16b,v0.16b,v5.16b,#12
152 eor v6.16b,v6.16b,v1.16b
153 eor v3.16b,v3.16b,v5.16b
155 eor v3.16b,v3.16b,v6.16b
159 dup v6.4s,v3.s[3] // just splat
160 ext v5.16b,v0.16b,v4.16b,#12
163 eor v4.16b,v4.16b,v5.16b
164 ext v5.16b,v0.16b,v5.16b,#12
165 eor v4.16b,v4.16b,v5.16b
166 ext v5.16b,v0.16b,v5.16b,#12
167 eor v4.16b,v4.16b,v5.16b
169 eor v4.16b,v4.16b,v6.16b
177 mov x0,x3 // return value
180 .size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
182 .globl aes_v8_set_decrypt_key
183 .type aes_v8_set_decrypt_key,%function
185 aes_v8_set_decrypt_key:
186 stp x29,x30,[sp,#-16]!
193 sub x2,x2,#240 // restore original x2
195 add x0,x2,x12,lsl#4 // end of key schedule
216 eor x0,x0,x0 // return value
220 .size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
221 .globl aes_v8_encrypt
222 .type aes_v8_encrypt,%function
245 eor v2.16b,v2.16b,v0.16b
249 .size aes_v8_encrypt,.-aes_v8_encrypt
250 .globl aes_v8_decrypt
251 .type aes_v8_decrypt,%function
274 eor v2.16b,v2.16b,v0.16b
278 .size aes_v8_decrypt,.-aes_v8_decrypt
279 .globl aes_v8_cbc_encrypt
280 .type aes_v8_cbc_encrypt,%function
283 stp x29,x30,[sp,#-16]!
290 cmp w5,#0 // en- or decrypting?
296 ld1 {v16.4s,v17.4s},[x3] // load key schedule...
298 add x7,x3,x5,lsl#4 // pointer to last 7 round keys
300 ld1 {v18.4s,v19.4s},[x7],#32
301 ld1 {v20.4s,v21.4s},[x7],#32
302 ld1 {v22.4s,v23.4s},[x7],#32
310 eor v0.16b,v0.16b,v6.16b
311 eor v5.16b,v16.16b,v7.16b
314 ld1 {v2.4s,v3.4s},[x7]
328 st1 {v6.16b},[x1],#16
360 ld1 {v16.16b},[x0],x8
363 eor v16.16b,v16.16b,v5.16b
366 ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
370 eor v6.16b,v0.16b,v7.16b
373 st1 {v6.16b},[x1],#16
378 ld1 {v2.4s,v3.4s},[x7]
385 st1 {v6.16b},[x1],#16
399 ld1 {v16.16b},[x0],x8
406 eor v16.16b,v16.16b,v5.16b
408 eor v6.16b,v0.16b,v7.16b
409 b.hs .Loop_cbc_enc128
411 st1 {v6.16b},[x1],#16
415 ld1 {v18.16b},[x0],#16
416 subs x2,x2,#32 // bias
418 orr v3.16b,v0.16b,v0.16b
419 orr v1.16b,v0.16b,v0.16b
420 orr v19.16b,v18.16b,v18.16b
423 orr v1.16b,v18.16b,v18.16b
424 ld1 {v18.16b},[x0],#16
425 orr v2.16b,v0.16b,v0.16b
426 orr v3.16b,v1.16b,v1.16b
427 orr v19.16b,v18.16b,v18.16b
435 aesimc v18.16b,v18.16b
436 ld1 {v16.4s},[x7],#16
443 aesimc v18.16b,v18.16b
444 ld1 {v17.4s},[x7],#16
452 aesimc v18.16b,v18.16b
453 eor v4.16b,v6.16b,v7.16b
455 eor v5.16b,v2.16b,v7.16b
456 csel x6,x2,x6,lo // x6, w6, is zero at this point
462 aesimc v18.16b,v18.16b
463 eor v17.16b,v3.16b,v7.16b
464 add x0,x0,x6 // x0 is adjusted in such way that
465 // at exit from the loop v1.16b-v18.16b
466 // are loaded with last "words"
467 orr v6.16b,v19.16b,v19.16b
474 aesimc v18.16b,v18.16b
475 ld1 {v2.16b},[x0],#16
481 aesimc v18.16b,v18.16b
482 ld1 {v3.16b},[x0],#16
488 aesimc v18.16b,v18.16b
489 ld1 {v19.16b},[x0],#16
493 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
495 eor v4.16b,v4.16b,v0.16b
496 eor v5.16b,v5.16b,v1.16b
497 eor v18.16b,v18.16b,v17.16b
498 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
499 st1 {v4.16b},[x1],#16
500 orr v0.16b,v2.16b,v2.16b
501 st1 {v5.16b},[x1],#16
502 orr v1.16b,v3.16b,v3.16b
503 st1 {v18.16b},[x1],#16
504 orr v18.16b,v19.16b,v19.16b
515 aesimc v18.16b,v18.16b
516 ld1 {v16.4s},[x7],#16
521 aesimc v18.16b,v18.16b
522 ld1 {v17.4s},[x7],#16
528 aesimc v18.16b,v18.16b
532 aesimc v18.16b,v18.16b
536 aesimc v18.16b,v18.16b
541 aesimc v18.16b,v18.16b
542 eor v5.16b,v6.16b,v7.16b
546 aesimc v18.16b,v18.16b
547 eor v17.16b,v3.16b,v7.16b
551 eor v5.16b,v5.16b,v1.16b
552 eor v17.16b,v17.16b,v18.16b
553 orr v6.16b,v19.16b,v19.16b
554 st1 {v5.16b},[x1],#16
555 st1 {v17.16b},[x1],#16
559 eor v5.16b,v5.16b,v18.16b
560 orr v6.16b,v19.16b,v19.16b
561 st1 {v5.16b},[x1],#16
568 .size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
569 .globl aes_v8_ctr32_encrypt_blocks
570 .type aes_v8_ctr32_encrypt_blocks,%function
572 aes_v8_ctr32_encrypt_blocks:
573 stp x29,x30,[sp,#-16]!
580 ld1 {v16.4s,v17.4s},[x3] // load key schedule...
584 add x7,x3,x5,lsl#4 // pointer to last 5 round keys
586 ld1 {v20.4s,v21.4s},[x7],#32
587 ld1 {v22.4s,v23.4s},[x7],#32
595 orr v1.16b,v0.16b,v0.16b
597 orr v18.16b,v0.16b,v0.16b
599 orr v6.16b,v0.16b,v0.16b
615 aesmc v18.16b,v18.16b
616 ld1 {v16.4s},[x7],#16
623 aesmc v18.16b,v18.16b
624 ld1 {v17.4s},[x7],#16
631 ld1 {v2.16b},[x0],#16
632 orr v0.16b,v6.16b,v6.16b
634 aesmc v18.16b,v18.16b
635 ld1 {v3.16b},[x0],#16
636 orr v1.16b,v6.16b,v6.16b
641 ld1 {v19.16b},[x0],#16
644 aesmc v17.16b,v18.16b
645 orr v18.16b,v6.16b,v6.16b
651 eor v2.16b,v2.16b,v7.16b
654 aesmc v17.16b,v17.16b
655 eor v3.16b,v3.16b,v7.16b
661 eor v19.16b,v19.16b,v7.16b
664 aesmc v17.16b,v17.16b
674 aesmc v17.16b,v17.16b
681 eor v2.16b,v2.16b,v4.16b
682 ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
683 st1 {v2.16b},[x1],#16
684 eor v3.16b,v3.16b,v5.16b
686 st1 {v3.16b},[x1],#16
687 eor v19.16b,v19.16b,v17.16b
688 ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
689 st1 {v19.16b},[x1],#16
703 ld1 {v16.4s},[x7],#16
709 ld1 {v17.4s},[x7],#16
720 ld1 {v2.16b},[x0],x12
730 eor v2.16b,v2.16b,v7.16b
735 eor v3.16b,v3.16b,v7.16b
740 eor v2.16b,v2.16b,v0.16b
741 eor v3.16b,v3.16b,v1.16b
742 st1 {v2.16b},[x1],#16
749 .size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks