Disable draw_properties benchmark on Android.
[chromium-blink-merge.git] / third_party / boringssl / linux-arm / crypto / aes / aesv8-armx.S
blobfede6edd18b31920d65df2b9cf4ef38460751131
1 #include "arm_arch.h"
3 #if __ARM_MAX_ARCH__>=7
4 .text
5 .arch   armv7-a
6 .fpu    neon
7 .code   32
8 .align  5
9 rcon:
10 .long   0x01,0x01,0x01,0x01
11 .long   0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d     @ rotate-n-splat
12 .long   0x1b,0x1b,0x1b,0x1b
14 .globl  aes_v8_set_encrypt_key
15 .type   aes_v8_set_encrypt_key,%function
16 .align  5
17 aes_v8_set_encrypt_key:
18 .Lenc_key:
19         mov     r3,#-1
20         cmp     r0,#0
21         beq     .Lenc_key_abort
22         cmp     r2,#0
23         beq     .Lenc_key_abort
24         mov     r3,#-2
25         cmp     r1,#128
26         blt     .Lenc_key_abort
27         cmp     r1,#256
28         bgt     .Lenc_key_abort
29         tst     r1,#0x3f
30         bne     .Lenc_key_abort
32         adr     r3,rcon
33         cmp     r1,#192
35         veor    q0,q0,q0
36         vld1.8  {q3},[r0]!
37         mov     r1,#8           @ reuse r1
38         vld1.32 {q1,q2},[r3]!
40         blt     .Loop128
41         beq     .L192
42         b       .L256
44 .align  4
45 .Loop128:
46         vtbl.8  d20,{q3},d4
47         vtbl.8  d21,{q3},d5
48         vext.8  q9,q0,q3,#12
49         vst1.32 {q3},[r2]!
50         .byte   0x00,0x43,0xf0,0xf3     @ aese q10,q0
51         subs    r1,r1,#1
53         veor    q3,q3,q9
54         vext.8  q9,q0,q9,#12
55         veor    q3,q3,q9
56         vext.8  q9,q0,q9,#12
57          veor   q10,q10,q1
58         veor    q3,q3,q9
59         vshl.u8 q1,q1,#1
60         veor    q3,q3,q10
61         bne     .Loop128
63         vld1.32 {q1},[r3]
65         vtbl.8  d20,{q3},d4
66         vtbl.8  d21,{q3},d5
67         vext.8  q9,q0,q3,#12
68         vst1.32 {q3},[r2]!
69         .byte   0x00,0x43,0xf0,0xf3     @ aese q10,q0
71         veor    q3,q3,q9
72         vext.8  q9,q0,q9,#12
73         veor    q3,q3,q9
74         vext.8  q9,q0,q9,#12
75          veor   q10,q10,q1
76         veor    q3,q3,q9
77         vshl.u8 q1,q1,#1
78         veor    q3,q3,q10
80         vtbl.8  d20,{q3},d4
81         vtbl.8  d21,{q3},d5
82         vext.8  q9,q0,q3,#12
83         vst1.32 {q3},[r2]!
84         .byte   0x00,0x43,0xf0,0xf3     @ aese q10,q0
86         veor    q3,q3,q9
87         vext.8  q9,q0,q9,#12
88         veor    q3,q3,q9
89         vext.8  q9,q0,q9,#12
90          veor   q10,q10,q1
91         veor    q3,q3,q9
92         veor    q3,q3,q10
93         vst1.32 {q3},[r2]
94         add     r2,r2,#0x50
96         mov     r12,#10
97         b       .Ldone
99 .align  4
100 .L192:
101         vld1.8  {d16},[r0]!
102         vmov.i8 q10,#8                  @ borrow q10
103         vst1.32 {q3},[r2]!
104         vsub.i8 q2,q2,q10       @ adjust the mask
106 .Loop192:
107         vtbl.8  d20,{q8},d4
108         vtbl.8  d21,{q8},d5
109         vext.8  q9,q0,q3,#12
110         vst1.32 {d16},[r2]!
111         .byte   0x00,0x43,0xf0,0xf3     @ aese q10,q0
112         subs    r1,r1,#1
114         veor    q3,q3,q9
115         vext.8  q9,q0,q9,#12
116         veor    q3,q3,q9
117         vext.8  q9,q0,q9,#12
118         veor    q3,q3,q9
120         vdup.32 q9,d7[1]
121         veor    q9,q9,q8
122          veor   q10,q10,q1
123         vext.8  q8,q0,q8,#12
124         vshl.u8 q1,q1,#1
125         veor    q8,q8,q9
126         veor    q3,q3,q10
127         veor    q8,q8,q10
128         vst1.32 {q3},[r2]!
129         bne     .Loop192
131         mov     r12,#12
132         add     r2,r2,#0x20
133         b       .Ldone
135 .align  4
136 .L256:
137         vld1.8  {q8},[r0]
138         mov     r1,#7
139         mov     r12,#14
140         vst1.32 {q3},[r2]!
142 .Loop256:
143         vtbl.8  d20,{q8},d4
144         vtbl.8  d21,{q8},d5
145         vext.8  q9,q0,q3,#12
146         vst1.32 {q8},[r2]!
147         .byte   0x00,0x43,0xf0,0xf3     @ aese q10,q0
148         subs    r1,r1,#1
150         veor    q3,q3,q9
151         vext.8  q9,q0,q9,#12
152         veor    q3,q3,q9
153         vext.8  q9,q0,q9,#12
154          veor   q10,q10,q1
155         veor    q3,q3,q9
156         vshl.u8 q1,q1,#1
157         veor    q3,q3,q10
158         vst1.32 {q3},[r2]!
159         beq     .Ldone
161         vdup.32 q10,d7[1]
162         vext.8  q9,q0,q8,#12
163         .byte   0x00,0x43,0xf0,0xf3     @ aese q10,q0
165         veor    q8,q8,q9
166         vext.8  q9,q0,q9,#12
167         veor    q8,q8,q9
168         vext.8  q9,q0,q9,#12
169         veor    q8,q8,q9
171         veor    q8,q8,q10
172         b       .Loop256
174 .Ldone:
175         str     r12,[r2]
176         mov     r3,#0
178 .Lenc_key_abort:
179         mov     r0,r3                   @ return value
180         
181         bx      lr
182 .size   aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
184 .globl  aes_v8_set_decrypt_key
185 .type   aes_v8_set_decrypt_key,%function
186 .align  5
187 aes_v8_set_decrypt_key:
188         stmdb   sp!,{r4,lr}
189         bl      .Lenc_key
191         cmp     r0,#0
192         bne     .Ldec_key_abort
194         sub     r2,r2,#240              @ restore original r2
195         mov     r4,#-16
196         add     r0,r2,r12,lsl#4 @ end of key schedule
198         vld1.32 {q0},[r2]
199         vld1.32 {q1},[r0]
200         vst1.32 {q0},[r0],r4
201         vst1.32 {q1},[r2]!
203 .Loop_imc:
204         vld1.32 {q0},[r2]
205         vld1.32 {q1},[r0]
206         .byte   0xc0,0x03,0xb0,0xf3     @ aesimc q0,q0
207         .byte   0xc2,0x23,0xb0,0xf3     @ aesimc q1,q1
208         vst1.32 {q0},[r0],r4
209         vst1.32 {q1},[r2]!
210         cmp     r0,r2
211         bhi     .Loop_imc
213         vld1.32 {q0},[r2]
214         .byte   0xc0,0x03,0xb0,0xf3     @ aesimc q0,q0
215         vst1.32 {q0},[r0]
217         eor     r0,r0,r0                @ return value
218 .Ldec_key_abort:
219         ldmia   sp!,{r4,pc}
220 .size   aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
221 .globl  aes_v8_encrypt
222 .type   aes_v8_encrypt,%function
223 .align  5
224 aes_v8_encrypt:
225         ldr     r3,[r2,#240]
226         vld1.32 {q0},[r2]!
227         vld1.8  {q2},[r0]
228         sub     r3,r3,#2
229         vld1.32 {q1},[r2]!
231 .Loop_enc:
232         .byte   0x00,0x43,0xb0,0xf3     @ aese q2,q0
233         vld1.32 {q0},[r2]!
234         .byte   0x84,0x43,0xb0,0xf3     @ aesmc q2,q2
235         subs    r3,r3,#2
236         .byte   0x02,0x43,0xb0,0xf3     @ aese q2,q1
237         vld1.32 {q1},[r2]!
238         .byte   0x84,0x43,0xb0,0xf3     @ aesmc q2,q2
239         bgt     .Loop_enc
241         .byte   0x00,0x43,0xb0,0xf3     @ aese q2,q0
242         vld1.32 {q0},[r2]
243         .byte   0x84,0x43,0xb0,0xf3     @ aesmc q2,q2
244         .byte   0x02,0x43,0xb0,0xf3     @ aese q2,q1
245         veor    q2,q2,q0
247         vst1.8  {q2},[r1]
248         bx      lr
249 .size   aes_v8_encrypt,.-aes_v8_encrypt
250 .globl  aes_v8_decrypt
251 .type   aes_v8_decrypt,%function
252 .align  5
253 aes_v8_decrypt:
254         ldr     r3,[r2,#240]
255         vld1.32 {q0},[r2]!
256         vld1.8  {q2},[r0]
257         sub     r3,r3,#2
258         vld1.32 {q1},[r2]!
260 .Loop_dec:
261         .byte   0x40,0x43,0xb0,0xf3     @ aesd q2,q0
262         vld1.32 {q0},[r2]!
263         .byte   0xc4,0x43,0xb0,0xf3     @ aesimc q2,q2
264         subs    r3,r3,#2
265         .byte   0x42,0x43,0xb0,0xf3     @ aesd q2,q1
266         vld1.32 {q1},[r2]!
267         .byte   0xc4,0x43,0xb0,0xf3     @ aesimc q2,q2
268         bgt     .Loop_dec
270         .byte   0x40,0x43,0xb0,0xf3     @ aesd q2,q0
271         vld1.32 {q0},[r2]
272         .byte   0xc4,0x43,0xb0,0xf3     @ aesimc q2,q2
273         .byte   0x42,0x43,0xb0,0xf3     @ aesd q2,q1
274         veor    q2,q2,q0
276         vst1.8  {q2},[r1]
277         bx      lr
278 .size   aes_v8_decrypt,.-aes_v8_decrypt
279 .globl  aes_v8_cbc_encrypt
280 .type   aes_v8_cbc_encrypt,%function
281 .align  5
282 aes_v8_cbc_encrypt:
283         mov     ip,sp
284         stmdb   sp!,{r4-r8,lr}
285         vstmdb  sp!,{d8-d15}            @ ABI specification says so
286         ldmia   ip,{r4-r5}              @ load remaining args
287         subs    r2,r2,#16
288         mov     r8,#16
289         blo     .Lcbc_abort
290         moveq   r8,#0
292         cmp     r5,#0                   @ en- or decrypting?
293         ldr     r5,[r3,#240]
294         and     r2,r2,#-16
295         vld1.8  {q6},[r4]
296         vld1.8  {q0},[r0],r8
298         vld1.32 {q8-q9},[r3]            @ load key schedule...
299         sub     r5,r5,#6
300         add     r7,r3,r5,lsl#4  @ pointer to last 7 round keys
301         sub     r5,r5,#2
302         vld1.32 {q10-q11},[r7]!
303         vld1.32 {q12-q13},[r7]!
304         vld1.32 {q14-q15},[r7]!
305         vld1.32 {q7},[r7]
307         add     r7,r3,#32
308         mov     r6,r5
309         beq     .Lcbc_dec
311         cmp     r5,#2
312         veor    q0,q0,q6
313         veor    q5,q8,q7
314         beq     .Lcbc_enc128
316 .Loop_cbc_enc:
317         .byte   0x20,0x03,0xb0,0xf3     @ aese q0,q8
318         vld1.32 {q8},[r7]!
319         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
320         subs    r6,r6,#2
321         .byte   0x22,0x03,0xb0,0xf3     @ aese q0,q9
322         vld1.32 {q9},[r7]!
323         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
324         bgt     .Loop_cbc_enc
326         .byte   0x20,0x03,0xb0,0xf3     @ aese q0,q8
327         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
328          subs   r2,r2,#16
329         .byte   0x22,0x03,0xb0,0xf3     @ aese q0,q9
330         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
331          moveq  r8,#0
332         .byte   0x24,0x03,0xb0,0xf3     @ aese q0,q10
333         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
334          add    r7,r3,#16
335         .byte   0x26,0x03,0xb0,0xf3     @ aese q0,q11
336         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
337          vld1.8 {q8},[r0],r8
338         .byte   0x28,0x03,0xb0,0xf3     @ aese q0,q12
339         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
340          veor   q8,q8,q5
341         .byte   0x2a,0x03,0xb0,0xf3     @ aese q0,q13
342         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
343          vld1.32 {q9},[r7]!     @ re-pre-load rndkey[1]
344         .byte   0x2c,0x03,0xb0,0xf3     @ aese q0,q14
345         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
346         .byte   0x2e,0x03,0xb0,0xf3     @ aese q0,q15
348          mov    r6,r5
349         veor    q6,q0,q7
350         vst1.8  {q6},[r1]!
351         bhs     .Loop_cbc_enc
353         b       .Lcbc_done
355 .align  5
356 .Lcbc_enc128:
357         vld1.32 {q2-q3},[r7]
358         .byte   0x20,0x03,0xb0,0xf3     @ aese q0,q8
359         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
360         b       .Lenter_cbc_enc128
361 .Loop_cbc_enc128:
362         .byte   0x20,0x03,0xb0,0xf3     @ aese q0,q8
363         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
364          vst1.8 {q6},[r1]!
365 .Lenter_cbc_enc128:
366         .byte   0x22,0x03,0xb0,0xf3     @ aese q0,q9
367         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
368          subs   r2,r2,#16
369         .byte   0x04,0x03,0xb0,0xf3     @ aese q0,q2
370         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
371          moveq  r8,#0
372         .byte   0x06,0x03,0xb0,0xf3     @ aese q0,q3
373         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
374         .byte   0x24,0x03,0xb0,0xf3     @ aese q0,q10
375         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
376         .byte   0x26,0x03,0xb0,0xf3     @ aese q0,q11
377         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
378          vld1.8 {q8},[r0],r8
379         .byte   0x28,0x03,0xb0,0xf3     @ aese q0,q12
380         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
381         .byte   0x2a,0x03,0xb0,0xf3     @ aese q0,q13
382         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
383         .byte   0x2c,0x03,0xb0,0xf3     @ aese q0,q14
384         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
385          veor   q8,q8,q5
386         .byte   0x2e,0x03,0xb0,0xf3     @ aese q0,q15
387         veor    q6,q0,q7
388         bhs     .Loop_cbc_enc128
390         vst1.8  {q6},[r1]!
391         b       .Lcbc_done
392 .align  5
393 .Lcbc_dec:
394         vld1.8  {q10},[r0]!
395         subs    r2,r2,#32               @ bias
396         add     r6,r5,#2
397         vorr    q3,q0,q0
398         vorr    q1,q0,q0
399         vorr    q11,q10,q10
400         blo     .Lcbc_dec_tail
402         vorr    q1,q10,q10
403         vld1.8  {q10},[r0]!
404         vorr    q2,q0,q0
405         vorr    q3,q1,q1
406         vorr    q11,q10,q10
408 .Loop3x_cbc_dec:
409         .byte   0x60,0x03,0xb0,0xf3     @ aesd q0,q8
410         .byte   0x60,0x23,0xb0,0xf3     @ aesd q1,q8
411         .byte   0x60,0x43,0xf0,0xf3     @ aesd q10,q8
412         vld1.32 {q8},[r7]!
413         .byte   0xc0,0x03,0xb0,0xf3     @ aesimc q0,q0
414         .byte   0xc2,0x23,0xb0,0xf3     @ aesimc q1,q1
415         .byte   0xe4,0x43,0xf0,0xf3     @ aesimc q10,q10
416         subs    r6,r6,#2
417         .byte   0x62,0x03,0xb0,0xf3     @ aesd q0,q9
418         .byte   0x62,0x23,0xb0,0xf3     @ aesd q1,q9
419         .byte   0x62,0x43,0xf0,0xf3     @ aesd q10,q9
420         vld1.32 {q9},[r7]!
421         .byte   0xc0,0x03,0xb0,0xf3     @ aesimc q0,q0
422         .byte   0xc2,0x23,0xb0,0xf3     @ aesimc q1,q1
423         .byte   0xe4,0x43,0xf0,0xf3     @ aesimc q10,q10
424         bgt     .Loop3x_cbc_dec
426         .byte   0x60,0x03,0xb0,0xf3     @ aesd q0,q8
427         .byte   0x60,0x23,0xb0,0xf3     @ aesd q1,q8
428         .byte   0x60,0x43,0xf0,0xf3     @ aesd q10,q8
429          veor   q4,q6,q7
430         .byte   0xc0,0x03,0xb0,0xf3     @ aesimc q0,q0
431         .byte   0xc2,0x23,0xb0,0xf3     @ aesimc q1,q1
432         .byte   0xe4,0x43,0xf0,0xf3     @ aesimc q10,q10
433          veor   q5,q2,q7
434         .byte   0x62,0x03,0xb0,0xf3     @ aesd q0,q9
435         .byte   0x62,0x23,0xb0,0xf3     @ aesd q1,q9
436         .byte   0x62,0x43,0xf0,0xf3     @ aesd q10,q9
437          veor   q9,q3,q7
438          subs   r2,r2,#0x30
439         .byte   0xc0,0x03,0xb0,0xf3     @ aesimc q0,q0
440         .byte   0xc2,0x23,0xb0,0xf3     @ aesimc q1,q1
441         .byte   0xe4,0x43,0xf0,0xf3     @ aesimc q10,q10
442          vorr   q6,q11,q11
443          movlo  r6,r2                   @ r6, r6, is zero at this point
444         .byte   0x68,0x03,0xb0,0xf3     @ aesd q0,q12
445         .byte   0x68,0x23,0xb0,0xf3     @ aesd q1,q12
446         .byte   0x68,0x43,0xf0,0xf3     @ aesd q10,q12
447          add    r0,r0,r6                @ r0 is adjusted in such way that
448                                         @ at exit from the loop q1-q10
449                                         @ are loaded with last "words"
450         .byte   0xc0,0x03,0xb0,0xf3     @ aesimc q0,q0
451         .byte   0xc2,0x23,0xb0,0xf3     @ aesimc q1,q1
452         .byte   0xe4,0x43,0xf0,0xf3     @ aesimc q10,q10
453          mov    r7,r3
454         .byte   0x6a,0x03,0xb0,0xf3     @ aesd q0,q13
455         .byte   0x6a,0x23,0xb0,0xf3     @ aesd q1,q13
456         .byte   0x6a,0x43,0xf0,0xf3     @ aesd q10,q13
457          vld1.8 {q2},[r0]!
458         .byte   0xc0,0x03,0xb0,0xf3     @ aesimc q0,q0
459         .byte   0xc2,0x23,0xb0,0xf3     @ aesimc q1,q1
460         .byte   0xe4,0x43,0xf0,0xf3     @ aesimc q10,q10
461          vld1.8 {q3},[r0]!
462         .byte   0x6c,0x03,0xb0,0xf3     @ aesd q0,q14
463         .byte   0x6c,0x23,0xb0,0xf3     @ aesd q1,q14
464         .byte   0x6c,0x43,0xf0,0xf3     @ aesd q10,q14
465          vld1.8 {q11},[r0]!
466         .byte   0xc0,0x03,0xb0,0xf3     @ aesimc q0,q0
467         .byte   0xc2,0x23,0xb0,0xf3     @ aesimc q1,q1
468         .byte   0xe4,0x43,0xf0,0xf3     @ aesimc q10,q10
469          vld1.32 {q8},[r7]!     @ re-pre-load rndkey[0]
470         .byte   0x6e,0x03,0xb0,0xf3     @ aesd q0,q15
471         .byte   0x6e,0x23,0xb0,0xf3     @ aesd q1,q15
472         .byte   0x6e,0x43,0xf0,0xf3     @ aesd q10,q15
474          add    r6,r5,#2
475         veor    q4,q4,q0
476         veor    q5,q5,q1
477         veor    q10,q10,q9
478          vld1.32 {q9},[r7]!     @ re-pre-load rndkey[1]
479          vorr   q0,q2,q2
480         vst1.8  {q4},[r1]!
481          vorr   q1,q3,q3
482         vst1.8  {q5},[r1]!
483         vst1.8  {q10},[r1]!
484          vorr   q10,q11,q11
485         bhs     .Loop3x_cbc_dec
487         cmn     r2,#0x30
488         beq     .Lcbc_done
489         nop
491 .Lcbc_dec_tail:
492         .byte   0x60,0x23,0xb0,0xf3     @ aesd q1,q8
493         .byte   0x60,0x43,0xf0,0xf3     @ aesd q10,q8
494         vld1.32 {q8},[r7]!
495         .byte   0xc2,0x23,0xb0,0xf3     @ aesimc q1,q1
496         .byte   0xe4,0x43,0xf0,0xf3     @ aesimc q10,q10
497         subs    r6,r6,#2
498         .byte   0x62,0x23,0xb0,0xf3     @ aesd q1,q9
499         .byte   0x62,0x43,0xf0,0xf3     @ aesd q10,q9
500         vld1.32 {q9},[r7]!
501         .byte   0xc2,0x23,0xb0,0xf3     @ aesimc q1,q1
502         .byte   0xe4,0x43,0xf0,0xf3     @ aesimc q10,q10
503         bgt     .Lcbc_dec_tail
505         .byte   0x60,0x23,0xb0,0xf3     @ aesd q1,q8
506         .byte   0x60,0x43,0xf0,0xf3     @ aesd q10,q8
507         .byte   0xc2,0x23,0xb0,0xf3     @ aesimc q1,q1
508         .byte   0xe4,0x43,0xf0,0xf3     @ aesimc q10,q10
509         .byte   0x62,0x23,0xb0,0xf3     @ aesd q1,q9
510         .byte   0x62,0x43,0xf0,0xf3     @ aesd q10,q9
511         .byte   0xc2,0x23,0xb0,0xf3     @ aesimc q1,q1
512         .byte   0xe4,0x43,0xf0,0xf3     @ aesimc q10,q10
513         .byte   0x68,0x23,0xb0,0xf3     @ aesd q1,q12
514         .byte   0x68,0x43,0xf0,0xf3     @ aesd q10,q12
515         .byte   0xc2,0x23,0xb0,0xf3     @ aesimc q1,q1
516         .byte   0xe4,0x43,0xf0,0xf3     @ aesimc q10,q10
517          cmn    r2,#0x20
518         .byte   0x6a,0x23,0xb0,0xf3     @ aesd q1,q13
519         .byte   0x6a,0x43,0xf0,0xf3     @ aesd q10,q13
520         .byte   0xc2,0x23,0xb0,0xf3     @ aesimc q1,q1
521         .byte   0xe4,0x43,0xf0,0xf3     @ aesimc q10,q10
522          veor   q5,q6,q7
523         .byte   0x6c,0x23,0xb0,0xf3     @ aesd q1,q14
524         .byte   0x6c,0x43,0xf0,0xf3     @ aesd q10,q14
525         .byte   0xc2,0x23,0xb0,0xf3     @ aesimc q1,q1
526         .byte   0xe4,0x43,0xf0,0xf3     @ aesimc q10,q10
527          veor   q9,q3,q7
528         .byte   0x6e,0x23,0xb0,0xf3     @ aesd q1,q15
529         .byte   0x6e,0x43,0xf0,0xf3     @ aesd q10,q15
530         beq     .Lcbc_dec_one
531         veor    q5,q5,q1
532         veor    q9,q9,q10
533          vorr   q6,q11,q11
534         vst1.8  {q5},[r1]!
535         vst1.8  {q9},[r1]!
536         b       .Lcbc_done
538 .Lcbc_dec_one:
539         veor    q5,q5,q10
540          vorr   q6,q11,q11
541         vst1.8  {q5},[r1]!
543 .Lcbc_done:
544         vst1.8  {q6},[r4]
545 .Lcbc_abort:
546         vldmia  sp!,{d8-d15}
547         ldmia   sp!,{r4-r8,pc}
548 .size   aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
549 .globl  aes_v8_ctr32_encrypt_blocks
550 .type   aes_v8_ctr32_encrypt_blocks,%function
551 .align  5
552 aes_v8_ctr32_encrypt_blocks:
553         mov             ip,sp
554         stmdb           sp!,{r4-r10,lr}
555         vstmdb          sp!,{d8-d15}            @ ABI specification says so
556         ldr             r4, [ip]                @ load remaining arg
557         ldr             r5,[r3,#240]
559         ldr             r8, [r4, #12]
560         vld1.32         {q0},[r4]
562         vld1.32         {q8-q9},[r3]            @ load key schedule...
563         sub             r5,r5,#4
564         mov             r12,#16
565         cmp             r2,#2
566         add             r7,r3,r5,lsl#4  @ pointer to last 5 round keys
567         sub             r5,r5,#2
568         vld1.32         {q12-q13},[r7]!
569         vld1.32         {q14-q15},[r7]!
570         vld1.32         {q7},[r7]
571         add             r7,r3,#32
572         mov             r6,r5
573         movlo   r12,#0
574 #ifndef __ARMEB__
575         rev             r8, r8
576 #endif
577         vorr            q1,q0,q0
578         add             r10, r8, #1
579         vorr            q10,q0,q0
580         add             r8, r8, #2
581         vorr            q6,q0,q0
582         rev             r10, r10
583         vmov.32 d3[1],r10
584         bls             .Lctr32_tail
585         rev             r12, r8
586         sub             r2,r2,#3                @ bias
587         vmov.32 d21[1],r12
588         b               .Loop3x_ctr32
590 .align  4
591 .Loop3x_ctr32:
592         .byte   0x20,0x03,0xb0,0xf3     @ aese q0,q8
593         .byte   0x20,0x23,0xb0,0xf3     @ aese q1,q8
594         .byte   0x20,0x43,0xf0,0xf3     @ aese q10,q8
595         vld1.32         {q8},[r7]!
596         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
597         .byte   0x82,0x23,0xb0,0xf3     @ aesmc q1,q1
598         .byte   0xa4,0x43,0xf0,0xf3     @ aesmc q10,q10
599         subs            r6,r6,#2
600         .byte   0x22,0x03,0xb0,0xf3     @ aese q0,q9
601         .byte   0x22,0x23,0xb0,0xf3     @ aese q1,q9
602         .byte   0x22,0x43,0xf0,0xf3     @ aese q10,q9
603         vld1.32         {q9},[r7]!
604         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
605         .byte   0x82,0x23,0xb0,0xf3     @ aesmc q1,q1
606         .byte   0xa4,0x43,0xf0,0xf3     @ aesmc q10,q10
607         bgt             .Loop3x_ctr32
609         .byte   0x20,0x03,0xb0,0xf3     @ aese q0,q8
610         .byte   0x20,0x23,0xb0,0xf3     @ aese q1,q8
611         .byte   0x20,0x43,0xf0,0xf3     @ aese q10,q8
612          mov            r7,r3
613         .byte   0x80,0x83,0xb0,0xf3     @ aesmc q4,q0
614          vld1.8         {q2},[r0]!
615         .byte   0x82,0xa3,0xb0,0xf3     @ aesmc q5,q1
616         .byte   0xa4,0x43,0xf0,0xf3     @ aesmc q10,q10
617          vorr           q0,q6,q6
618         .byte   0x22,0x83,0xb0,0xf3     @ aese q4,q9
619          vld1.8         {q3},[r0]!
620         .byte   0x22,0xa3,0xb0,0xf3     @ aese q5,q9
621         .byte   0x22,0x43,0xf0,0xf3     @ aese q10,q9
622          vorr           q1,q6,q6
623         .byte   0x88,0x83,0xb0,0xf3     @ aesmc q4,q4
624          vld1.8         {q11},[r0]!
625         .byte   0x8a,0xa3,0xb0,0xf3     @ aesmc q5,q5
626         .byte   0xa4,0x23,0xf0,0xf3     @ aesmc q9,q10
627          vorr           q10,q6,q6
628          add            r9,r8,#1
629         .byte   0x28,0x83,0xb0,0xf3     @ aese q4,q12
630         .byte   0x28,0xa3,0xb0,0xf3     @ aese q5,q12
631         .byte   0x28,0x23,0xf0,0xf3     @ aese q9,q12
632          veor           q2,q2,q7
633          add            r10,r8,#2
634         .byte   0x88,0x83,0xb0,0xf3     @ aesmc q4,q4
635         .byte   0x8a,0xa3,0xb0,0xf3     @ aesmc q5,q5
636         .byte   0xa2,0x23,0xf0,0xf3     @ aesmc q9,q9
637          veor           q3,q3,q7
638          add            r8,r8,#3
639         .byte   0x2a,0x83,0xb0,0xf3     @ aese q4,q13
640         .byte   0x2a,0xa3,0xb0,0xf3     @ aese q5,q13
641         .byte   0x2a,0x23,0xf0,0xf3     @ aese q9,q13
642          veor           q11,q11,q7
643          rev            r9,r9
644         .byte   0x88,0x83,0xb0,0xf3     @ aesmc q4,q4
645          vld1.32         {q8},[r7]!     @ re-pre-load rndkey[0]
646         .byte   0x8a,0xa3,0xb0,0xf3     @ aesmc q5,q5
647         .byte   0xa2,0x23,0xf0,0xf3     @ aesmc q9,q9
648          vmov.32        d1[1], r9
649          rev            r10,r10
650         .byte   0x2c,0x83,0xb0,0xf3     @ aese q4,q14
651         .byte   0x2c,0xa3,0xb0,0xf3     @ aese q5,q14
652         .byte   0x2c,0x23,0xf0,0xf3     @ aese q9,q14
653          vmov.32        d3[1], r10
654          rev            r12,r8
655         .byte   0x88,0x83,0xb0,0xf3     @ aesmc q4,q4
656         .byte   0x8a,0xa3,0xb0,0xf3     @ aesmc q5,q5
657         .byte   0xa2,0x23,0xf0,0xf3     @ aesmc q9,q9
658          vmov.32        d21[1], r12
659          subs           r2,r2,#3
660         .byte   0x2e,0x83,0xb0,0xf3     @ aese q4,q15
661         .byte   0x2e,0xa3,0xb0,0xf3     @ aese q5,q15
662         .byte   0x2e,0x23,0xf0,0xf3     @ aese q9,q15
664          mov            r6,r5
665         veor            q2,q2,q4
666         veor            q3,q3,q5
667         veor            q11,q11,q9
668          vld1.32         {q9},[r7]!     @ re-pre-load rndkey[1]
669         vst1.8          {q2},[r1]!
670         vst1.8          {q3},[r1]!
671         vst1.8          {q11},[r1]!
672         bhs             .Loop3x_ctr32
674         adds            r2,r2,#3
675         beq             .Lctr32_done
676         cmp             r2,#1
677         mov             r12,#16
678         moveq   r12,#0
680 .Lctr32_tail:
681         .byte   0x20,0x03,0xb0,0xf3     @ aese q0,q8
682         .byte   0x20,0x23,0xb0,0xf3     @ aese q1,q8
683         vld1.32         {q8},[r7]!
684         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
685         .byte   0x82,0x23,0xb0,0xf3     @ aesmc q1,q1
686         subs            r6,r6,#2
687         .byte   0x22,0x03,0xb0,0xf3     @ aese q0,q9
688         .byte   0x22,0x23,0xb0,0xf3     @ aese q1,q9
689         vld1.32         {q9},[r7]!
690         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
691         .byte   0x82,0x23,0xb0,0xf3     @ aesmc q1,q1
692         bgt             .Lctr32_tail
694         .byte   0x20,0x03,0xb0,0xf3     @ aese q0,q8
695         .byte   0x20,0x23,0xb0,0xf3     @ aese q1,q8
696         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
697         .byte   0x82,0x23,0xb0,0xf3     @ aesmc q1,q1
698         .byte   0x22,0x03,0xb0,0xf3     @ aese q0,q9
699         .byte   0x22,0x23,0xb0,0xf3     @ aese q1,q9
700         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
701         .byte   0x82,0x23,0xb0,0xf3     @ aesmc q1,q1
702          vld1.8         {q2},[r0],r12
703         .byte   0x28,0x03,0xb0,0xf3     @ aese q0,q12
704         .byte   0x28,0x23,0xb0,0xf3     @ aese q1,q12
705          vld1.8         {q3},[r0]
706         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
707         .byte   0x82,0x23,0xb0,0xf3     @ aesmc q1,q1
708         .byte   0x2a,0x03,0xb0,0xf3     @ aese q0,q13
709         .byte   0x2a,0x23,0xb0,0xf3     @ aese q1,q13
710         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
711         .byte   0x82,0x23,0xb0,0xf3     @ aesmc q1,q1
712         .byte   0x2c,0x03,0xb0,0xf3     @ aese q0,q14
713         .byte   0x2c,0x23,0xb0,0xf3     @ aese q1,q14
714          veor           q2,q2,q7
715         .byte   0x80,0x03,0xb0,0xf3     @ aesmc q0,q0
716         .byte   0x82,0x23,0xb0,0xf3     @ aesmc q1,q1
717          veor           q3,q3,q7
718         .byte   0x2e,0x03,0xb0,0xf3     @ aese q0,q15
719         .byte   0x2e,0x23,0xb0,0xf3     @ aese q1,q15
721         cmp             r2,#1
722         veor            q2,q2,q0
723         veor            q3,q3,q1
724         vst1.8          {q2},[r1]!
725         beq             .Lctr32_done
726         vst1.8          {q3},[r1]
728 .Lctr32_done:
729         vldmia          sp!,{d8-d15}
730         ldmia           sp!,{r4-r10,pc}
731 .size   aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
732 #endif