1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple armv8---eabi -mattr=+aes,+fix-cortex-a57-aes-1742098 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-FIX-NOSCHED
4 ; These CPUs should have the fix enabled by default. They use different
5 ; FileCheck prefixes because some instructions are scheduled differently.
7 ; RUN: llc -mtriple armv8---eabi -mcpu=cortex-a57 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-CORTEX-FIX
8 ; RUN: llc -mtriple armv8---eabi -mcpu=cortex-a72 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-CORTEX-FIX
10 ; This checks that adding `+fix-cortex-a57-aes-1742098` causes `vorr` to be
11 ; inserted wherever the compiler cannot prove that either input to the first aes
12 ; instruction in a fused aes pair was set by 64-bit Neon register writes or
13 ; 128-bit Neon register writes. All other register writes are unsafe, and
14 ; require a `vorr` to protect the AES input.
16 declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>)
17 declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>)
18 declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>)
19 declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>)
21 declare arm_aapcs_vfpcc <16 x i8> @get_input() local_unnamed_addr
22 declare arm_aapcs_vfpcc <16 x i8> @get_inputf16(half) local_unnamed_addr
23 declare arm_aapcs_vfpcc <16 x i8> @get_inputf32(float) local_unnamed_addr
27 define arm_aapcs_vfpcc void @aese_zero(ptr %0) nounwind {
28 ; CHECK-FIX-LABEL: aese_zero:
30 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
31 ; CHECK-FIX-NEXT: vmov.i32 q9, #0x0
32 ; CHECK-FIX-NEXT: aese.8 q9, q8
33 ; CHECK-FIX-NEXT: aesmc.8 q8, q9
34 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
35 ; CHECK-FIX-NEXT: bx lr
36 %2 = load <16 x i8>, ptr %0, align 8
37 %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> zeroinitializer, <16 x i8> %2)
38 %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
39 store <16 x i8> %4, ptr %0, align 8
43 define arm_aapcs_vfpcc void @aese_via_call1(ptr %0) nounwind {
44 ; CHECK-FIX-LABEL: aese_via_call1:
46 ; CHECK-FIX-NEXT: .save {r4, lr}
47 ; CHECK-FIX-NEXT: push {r4, lr}
48 ; CHECK-FIX-NEXT: mov r4, r0
49 ; CHECK-FIX-NEXT: bl get_input
50 ; CHECK-FIX-NEXT: vorr q0, q0, q0
51 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
52 ; CHECK-FIX-NEXT: aese.8 q8, q0
53 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
54 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
55 ; CHECK-FIX-NEXT: pop {r4, pc}
56 %2 = call arm_aapcs_vfpcc <16 x i8> @get_input()
57 %3 = load <16 x i8>, ptr %0, align 8
58 %4 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %2, <16 x i8> %3)
59 %5 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %4)
60 store <16 x i8> %5, ptr %0, align 8
64 define arm_aapcs_vfpcc void @aese_via_call2(half %0, ptr %1) nounwind {
65 ; CHECK-FIX-LABEL: aese_via_call2:
67 ; CHECK-FIX-NEXT: .save {r4, lr}
68 ; CHECK-FIX-NEXT: push {r4, lr}
69 ; CHECK-FIX-NEXT: mov r4, r0
70 ; CHECK-FIX-NEXT: bl get_inputf16
71 ; CHECK-FIX-NEXT: vorr q0, q0, q0
72 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
73 ; CHECK-FIX-NEXT: aese.8 q8, q0
74 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
75 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
76 ; CHECK-FIX-NEXT: pop {r4, pc}
77 %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0)
78 %4 = load <16 x i8>, ptr %1, align 8
79 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
80 %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
81 store <16 x i8> %6, ptr %1, align 8
85 define arm_aapcs_vfpcc void @aese_via_call3(float %0, ptr %1) nounwind {
86 ; CHECK-FIX-LABEL: aese_via_call3:
88 ; CHECK-FIX-NEXT: .save {r4, lr}
89 ; CHECK-FIX-NEXT: push {r4, lr}
90 ; CHECK-FIX-NEXT: mov r4, r0
91 ; CHECK-FIX-NEXT: bl get_inputf32
92 ; CHECK-FIX-NEXT: vorr q0, q0, q0
93 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
94 ; CHECK-FIX-NEXT: aese.8 q8, q0
95 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
96 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
97 ; CHECK-FIX-NEXT: pop {r4, pc}
98 %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0)
99 %4 = load <16 x i8>, ptr %1, align 8
100 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
101 %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
102 store <16 x i8> %6, ptr %1, align 8
106 define arm_aapcs_vfpcc void @aese_once_via_ptr(ptr %0, ptr %1) nounwind {
107 ; CHECK-FIX-LABEL: aese_once_via_ptr:
108 ; CHECK-FIX: @ %bb.0:
109 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
110 ; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1]
111 ; CHECK-FIX-NEXT: aese.8 q9, q8
112 ; CHECK-FIX-NEXT: aesmc.8 q8, q9
113 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
114 ; CHECK-FIX-NEXT: bx lr
115 %3 = load <16 x i8>, ptr %1, align 8
116 %4 = load <16 x i8>, ptr %0, align 8
117 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
118 %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
119 store <16 x i8> %6, ptr %1, align 8
123 define arm_aapcs_vfpcc <16 x i8> @aese_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
124 ; CHECK-FIX-LABEL: aese_once_via_val:
125 ; CHECK-FIX: @ %bb.0:
126 ; CHECK-FIX-NEXT: vorr q0, q0, q0
127 ; CHECK-FIX-NEXT: vorr q1, q1, q1
128 ; CHECK-FIX-NEXT: aese.8 q0, q1
129 ; CHECK-FIX-NEXT: aesmc.8 q0, q0
130 ; CHECK-FIX-NEXT: bx lr
131 %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %1, <16 x i8> %0)
132 %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
136 define arm_aapcs_vfpcc void @aese_twice_via_ptr(ptr %0, ptr %1) nounwind {
137 ; CHECK-FIX-LABEL: aese_twice_via_ptr:
138 ; CHECK-FIX: @ %bb.0:
139 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
140 ; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1]
141 ; CHECK-FIX-NEXT: aese.8 q9, q8
142 ; CHECK-FIX-NEXT: aesmc.8 q8, q9
143 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
144 ; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r0]
145 ; CHECK-FIX-NEXT: aese.8 q9, q8
146 ; CHECK-FIX-NEXT: aesmc.8 q8, q9
147 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
148 ; CHECK-FIX-NEXT: bx lr
149 %3 = load <16 x i8>, ptr %1, align 8
150 %4 = load <16 x i8>, ptr %0, align 8
151 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
152 %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
153 store <16 x i8> %6, ptr %1, align 8
154 %7 = load <16 x i8>, ptr %0, align 8
155 %8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7)
156 %9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8)
157 store <16 x i8> %9, ptr %1, align 8
161 define arm_aapcs_vfpcc <16 x i8> @aese_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
162 ; CHECK-FIX-LABEL: aese_twice_via_val:
163 ; CHECK-FIX: @ %bb.0:
164 ; CHECK-FIX-NEXT: vorr q1, q1, q1
165 ; CHECK-FIX-NEXT: vorr q0, q0, q0
166 ; CHECK-FIX-NEXT: vorr q0, q0, q0
167 ; CHECK-FIX-NEXT: aese.8 q1, q0
168 ; CHECK-FIX-NEXT: aesmc.8 q8, q1
169 ; CHECK-FIX-NEXT: aese.8 q8, q0
170 ; CHECK-FIX-NEXT: aesmc.8 q0, q8
171 ; CHECK-FIX-NEXT: bx lr
172 %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %1, <16 x i8> %0)
173 %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
174 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %4, <16 x i8> %0)
175 %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
179 define arm_aapcs_vfpcc void @aese_loop_via_ptr(i32 %0, ptr %1, ptr %2) nounwind {
180 ; CHECK-FIX-NOSCHED-LABEL: aese_loop_via_ptr:
181 ; CHECK-FIX-NOSCHED: @ %bb.0:
182 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
183 ; CHECK-FIX-NOSCHED-NEXT: bxeq lr
184 ; CHECK-FIX-NOSCHED-NEXT: .LBB8_1: @ =>This Inner Loop Header: Depth=1
185 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
186 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
187 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d18, d19}, [r2]
188 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q9, q8
189 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q9
190 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
191 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB8_1
192 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.2:
193 ; CHECK-FIX-NOSCHED-NEXT: bx lr
195 ; CHECK-CORTEX-FIX-LABEL: aese_loop_via_ptr:
196 ; CHECK-CORTEX-FIX: @ %bb.0:
197 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
198 ; CHECK-CORTEX-FIX-NEXT: bxeq lr
199 ; CHECK-CORTEX-FIX-NEXT: .LBB8_1: @ =>This Inner Loop Header: Depth=1
200 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
201 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d18, d19}, [r2]
202 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
203 ; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8
204 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9
205 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
206 ; CHECK-CORTEX-FIX-NEXT: bne .LBB8_1
207 ; CHECK-CORTEX-FIX-NEXT: @ %bb.2:
208 ; CHECK-CORTEX-FIX-NEXT: bx lr
209 %4 = icmp eq i32 %0, 0
210 br i1 %4, label %5, label %6
216 %7 = phi i32 [ %12, %6 ], [ 0, %3 ]
217 %8 = load <16 x i8>, ptr %2, align 8
218 %9 = load <16 x i8>, ptr %1, align 8
219 %10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %9)
220 %11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10)
221 store <16 x i8> %11, ptr %2, align 8
222 %12 = add nuw i32 %7, 1
223 %13 = icmp eq i32 %12, %0
224 br i1 %13, label %5, label %6
227 define arm_aapcs_vfpcc <16 x i8> @aese_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind {
228 ; CHECK-FIX-LABEL: aese_loop_via_val:
229 ; CHECK-FIX: @ %bb.0:
230 ; CHECK-FIX-NEXT: vorr q1, q1, q1
231 ; CHECK-FIX-NEXT: vorr q0, q0, q0
232 ; CHECK-FIX-NEXT: cmp r0, #0
233 ; CHECK-FIX-NEXT: beq .LBB9_2
234 ; CHECK-FIX-NEXT: .LBB9_1: @ =>This Inner Loop Header: Depth=1
235 ; CHECK-FIX-NEXT: aese.8 q1, q0
236 ; CHECK-FIX-NEXT: subs r0, r0, #1
237 ; CHECK-FIX-NEXT: aesmc.8 q1, q1
238 ; CHECK-FIX-NEXT: bne .LBB9_1
239 ; CHECK-FIX-NEXT: .LBB9_2:
240 ; CHECK-FIX-NEXT: vorr q0, q1, q1
241 ; CHECK-FIX-NEXT: bx lr
242 %4 = icmp eq i32 %0, 0
243 br i1 %4, label %5, label %7
246 %6 = phi <16 x i8> [ %2, %3 ], [ %11, %7 ]
250 %8 = phi i32 [ %12, %7 ], [ 0, %3 ]
251 %9 = phi <16 x i8> [ %11, %7 ], [ %2, %3 ]
252 %10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %9, <16 x i8> %1)
253 %11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10)
254 %12 = add nuw i32 %8, 1
255 %13 = icmp eq i32 %12, %0
256 br i1 %13, label %5, label %7
259 define arm_aapcs_vfpcc void @aese_set8_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
260 ; CHECK-FIX-NOSCHED-LABEL: aese_set8_via_ptr:
261 ; CHECK-FIX-NOSCHED: @ %bb.0:
262 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
263 ; CHECK-FIX-NOSCHED-NEXT: ldrb r0, [r0]
264 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
265 ; CHECK-FIX-NOSCHED-NEXT: vmov.8 d0[0], r0
266 ; CHECK-FIX-NOSCHED-NEXT: vmov.8 d16[0], r0
267 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
268 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
269 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
270 ; CHECK-FIX-NOSCHED-NEXT: bx lr
272 ; CHECK-CORTEX-FIX-LABEL: aese_set8_via_ptr:
273 ; CHECK-CORTEX-FIX: @ %bb.0:
274 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
275 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
276 ; CHECK-CORTEX-FIX-NEXT: ldrb r0, [r0]
277 ; CHECK-CORTEX-FIX-NEXT: vmov.8 d0[0], r0
278 ; CHECK-CORTEX-FIX-NEXT: vmov.8 d16[0], r0
279 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
280 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
281 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
282 ; CHECK-CORTEX-FIX-NEXT: bx lr
283 %4 = load i8, ptr %0, align 1
284 %5 = load <16 x i8>, ptr %2, align 8
285 %6 = insertelement <16 x i8> %5, i8 %4, i64 0
286 %7 = insertelement <16 x i8> %1, i8 %4, i64 0
287 %8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7)
288 %9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8)
289 store <16 x i8> %9, ptr %2, align 8
293 define arm_aapcs_vfpcc void @aese_set8_via_val(i8 zeroext %0, <16 x i8> %1, ptr %2) nounwind {
294 ; CHECK-FIX-LABEL: aese_set8_via_val:
295 ; CHECK-FIX: @ %bb.0:
296 ; CHECK-FIX-NEXT: vorr q0, q0, q0
297 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
298 ; CHECK-FIX-NEXT: vmov.8 d0[0], r0
299 ; CHECK-FIX-NEXT: vmov.8 d16[0], r0
300 ; CHECK-FIX-NEXT: aese.8 q8, q0
301 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
302 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
303 ; CHECK-FIX-NEXT: bx lr
304 %4 = load <16 x i8>, ptr %2, align 8
305 %5 = insertelement <16 x i8> %4, i8 %0, i64 0
306 %6 = insertelement <16 x i8> %1, i8 %0, i64 0
307 %7 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %5, <16 x i8> %6)
308 %8 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %7)
309 store <16 x i8> %8, ptr %2, align 8
313 define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
314 ; CHECK-FIX-LABEL: aese_set8_cond_via_ptr:
315 ; CHECK-FIX: @ %bb.0:
316 ; CHECK-FIX-NEXT: vorr q0, q0, q0
317 ; CHECK-FIX-NEXT: cmp r0, #0
318 ; CHECK-FIX-NEXT: beq .LBB12_2
319 ; CHECK-FIX-NEXT: @ %bb.1:
320 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
321 ; CHECK-FIX-NEXT: vld1.8 {d16[0]}, [r1]
322 ; CHECK-FIX-NEXT: cmp r0, #0
323 ; CHECK-FIX-NEXT: bne .LBB12_3
324 ; CHECK-FIX-NEXT: b .LBB12_4
325 ; CHECK-FIX-NEXT: .LBB12_2:
326 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
327 ; CHECK-FIX-NEXT: cmp r0, #0
328 ; CHECK-FIX-NEXT: beq .LBB12_4
329 ; CHECK-FIX-NEXT: .LBB12_3:
330 ; CHECK-FIX-NEXT: vld1.8 {d0[0]}, [r1]
331 ; CHECK-FIX-NEXT: .LBB12_4:
332 ; CHECK-FIX-NEXT: aese.8 q8, q0
333 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
334 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
335 ; CHECK-FIX-NEXT: bx lr
336 br i1 %0, label %5, label %9
339 %6 = load i8, ptr %1, align 1
340 %7 = load <16 x i8>, ptr %3, align 8
341 %8 = insertelement <16 x i8> %7, i8 %6, i64 0
345 %10 = load <16 x i8>, ptr %3, align 8
349 %12 = phi <16 x i8> [ %8, %5 ], [ %10, %9 ]
350 br i1 %0, label %13, label %16
353 %14 = load i8, ptr %1, align 1
354 %15 = insertelement <16 x i8> %2, i8 %14, i64 0
358 %17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ]
359 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %17)
360 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
361 store <16 x i8> %19, ptr %3, align 8
365 define arm_aapcs_vfpcc void @aese_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
366 ; CHECK-FIX-LABEL: aese_set8_cond_via_val:
367 ; CHECK-FIX: @ %bb.0:
368 ; CHECK-FIX-NEXT: vorr q0, q0, q0
369 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
370 ; CHECK-FIX-NEXT: cmp r0, #0
371 ; CHECK-FIX-NEXT: beq .LBB13_2
372 ; CHECK-FIX-NEXT: @ %bb.1:
373 ; CHECK-FIX-NEXT: vmov.8 d16[0], r1
374 ; CHECK-FIX-NEXT: .LBB13_2: @ %select.end
375 ; CHECK-FIX-NEXT: cmp r0, #0
376 ; CHECK-FIX-NEXT: beq .LBB13_4
377 ; CHECK-FIX-NEXT: @ %bb.3:
378 ; CHECK-FIX-NEXT: vmov.8 d0[0], r1
379 ; CHECK-FIX-NEXT: .LBB13_4: @ %select.end2
380 ; CHECK-FIX-NEXT: aese.8 q8, q0
381 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
382 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
383 ; CHECK-FIX-NEXT: bx lr
384 %5 = load <16 x i8>, ptr %3, align 8
385 %6 = insertelement <16 x i8> %5, i8 %1, i64 0
386 %7 = select i1 %0, <16 x i8> %6, <16 x i8> %5
387 %8 = insertelement <16 x i8> %2, i8 %1, i64 0
388 %9 = select i1 %0, <16 x i8> %8, <16 x i8> %2
389 %10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %9)
390 %11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10)
391 store <16 x i8> %11, ptr %3, align 8
395 define arm_aapcs_vfpcc void @aese_set8_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
396 ; CHECK-FIX-LABEL: aese_set8_loop_via_ptr:
397 ; CHECK-FIX: @ %bb.0:
398 ; CHECK-FIX-NEXT: vorr q0, q0, q0
399 ; CHECK-FIX-NEXT: ldrb r1, [r1]
400 ; CHECK-FIX-NEXT: cmp r0, #0
401 ; CHECK-FIX-NEXT: strb r1, [r2]
402 ; CHECK-FIX-NEXT: bxeq lr
403 ; CHECK-FIX-NEXT: .LBB14_1:
404 ; CHECK-FIX-NEXT: vmov.8 d0[0], r1
405 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
406 ; CHECK-FIX-NEXT: .LBB14_2: @ =>This Inner Loop Header: Depth=1
407 ; CHECK-FIX-NEXT: aese.8 q8, q0
408 ; CHECK-FIX-NEXT: subs r0, r0, #1
409 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
410 ; CHECK-FIX-NEXT: bne .LBB14_2
411 ; CHECK-FIX-NEXT: @ %bb.3:
412 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
413 ; CHECK-FIX-NEXT: bx lr
414 %5 = load i8, ptr %1, align 1
415 %6 = insertelement <16 x i8> %2, i8 %5, i64 0
416 %7 = getelementptr inbounds <16 x i8>, ptr %3, i32 0, i32 0
417 store i8 %5, ptr %7, align 8
418 %8 = icmp eq i32 %0, 0
419 br i1 %8, label %12, label %9
422 %10 = load <16 x i8>, ptr %3, align 8
426 store <16 x i8> %17, ptr %3, align 8
433 %14 = phi <16 x i8> [ %10, %9 ], [ %17, %13 ]
434 %15 = phi i32 [ 0, %9 ], [ %18, %13 ]
435 %16 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %14, <16 x i8> %6)
436 %17 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %16)
437 %18 = add nuw i32 %15, 1
438 %19 = icmp eq i32 %18, %0
439 br i1 %19, label %11, label %13
442 define arm_aapcs_vfpcc void @aese_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
443 ; CHECK-FIX-LABEL: aese_set8_loop_via_val:
444 ; CHECK-FIX: @ %bb.0:
445 ; CHECK-FIX-NEXT: vorr q0, q0, q0
446 ; CHECK-FIX-NEXT: cmp r0, #0
447 ; CHECK-FIX-NEXT: bxeq lr
448 ; CHECK-FIX-NEXT: .LBB15_1:
449 ; CHECK-FIX-NEXT: vmov.8 d0[0], r1
450 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
451 ; CHECK-FIX-NEXT: .LBB15_2: @ =>This Inner Loop Header: Depth=1
452 ; CHECK-FIX-NEXT: vmov.8 d16[0], r1
453 ; CHECK-FIX-NEXT: subs r0, r0, #1
454 ; CHECK-FIX-NEXT: aese.8 q8, q0
455 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
456 ; CHECK-FIX-NEXT: bne .LBB15_2
457 ; CHECK-FIX-NEXT: @ %bb.3:
458 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
459 ; CHECK-FIX-NEXT: bx lr
460 %5 = icmp eq i32 %0, 0
461 br i1 %5, label %10, label %6
464 %7 = insertelement <16 x i8> %2, i8 %1, i64 0
465 %8 = load <16 x i8>, ptr %3, align 8
469 store <16 x i8> %16, ptr %3, align 8
476 %12 = phi <16 x i8> [ %8, %6 ], [ %16, %11 ]
477 %13 = phi i32 [ 0, %6 ], [ %17, %11 ]
478 %14 = insertelement <16 x i8> %12, i8 %1, i64 0
479 %15 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %14, <16 x i8> %7)
480 %16 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %15)
481 %17 = add nuw i32 %13, 1
482 %18 = icmp eq i32 %17, %0
483 br i1 %18, label %9, label %11
486 define arm_aapcs_vfpcc void @aese_set16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
487 ; CHECK-FIX-NOSCHED-LABEL: aese_set16_via_ptr:
488 ; CHECK-FIX-NOSCHED: @ %bb.0:
489 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
490 ; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
491 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
492 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
493 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0
494 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
495 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
496 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
497 ; CHECK-FIX-NOSCHED-NEXT: bx lr
499 ; CHECK-CORTEX-FIX-LABEL: aese_set16_via_ptr:
500 ; CHECK-CORTEX-FIX: @ %bb.0:
501 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
502 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
503 ; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
504 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
505 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0
506 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
507 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
508 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
509 ; CHECK-CORTEX-FIX-NEXT: bx lr
510 %4 = load i16, ptr %0, align 2
511 %5 = bitcast ptr %2 to ptr
512 %6 = load <8 x i16>, ptr %5, align 8
513 %7 = insertelement <8 x i16> %6, i16 %4, i64 0
514 %8 = bitcast <8 x i16> %7 to <16 x i8>
515 %9 = bitcast <16 x i8> %1 to <8 x i16>
516 %10 = insertelement <8 x i16> %9, i16 %4, i64 0
517 %11 = bitcast <8 x i16> %10 to <16 x i8>
518 %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
519 %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
520 store <16 x i8> %13, ptr %2, align 8
524 define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, ptr %2) nounwind {
525 ; CHECK-FIX-LABEL: aese_set16_via_val:
526 ; CHECK-FIX: @ %bb.0:
527 ; CHECK-FIX-NEXT: vorr q0, q0, q0
528 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
529 ; CHECK-FIX-NEXT: vmov.16 d0[0], r0
530 ; CHECK-FIX-NEXT: vmov.16 d16[0], r0
531 ; CHECK-FIX-NEXT: aese.8 q8, q0
532 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
533 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
534 ; CHECK-FIX-NEXT: bx lr
535 %4 = bitcast ptr %2 to ptr
536 %5 = load <8 x i16>, ptr %4, align 8
537 %6 = insertelement <8 x i16> %5, i16 %0, i64 0
538 %7 = bitcast <8 x i16> %6 to <16 x i8>
539 %8 = bitcast <16 x i8> %1 to <8 x i16>
540 %9 = insertelement <8 x i16> %8, i16 %0, i64 0
541 %10 = bitcast <8 x i16> %9 to <16 x i8>
542 %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
543 %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
544 store <16 x i8> %12, ptr %2, align 8
548 define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
549 ; CHECK-FIX-LABEL: aese_set16_cond_via_ptr:
550 ; CHECK-FIX: @ %bb.0:
551 ; CHECK-FIX-NEXT: vorr q0, q0, q0
552 ; CHECK-FIX-NEXT: cmp r0, #0
553 ; CHECK-FIX-NEXT: beq .LBB18_2
554 ; CHECK-FIX-NEXT: @ %bb.1:
555 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
556 ; CHECK-FIX-NEXT: vld1.16 {d16[0]}, [r1:16]
557 ; CHECK-FIX-NEXT: cmp r0, #0
558 ; CHECK-FIX-NEXT: bne .LBB18_3
559 ; CHECK-FIX-NEXT: b .LBB18_4
560 ; CHECK-FIX-NEXT: .LBB18_2:
561 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
562 ; CHECK-FIX-NEXT: cmp r0, #0
563 ; CHECK-FIX-NEXT: beq .LBB18_4
564 ; CHECK-FIX-NEXT: .LBB18_3:
565 ; CHECK-FIX-NEXT: vld1.16 {d0[0]}, [r1:16]
566 ; CHECK-FIX-NEXT: .LBB18_4:
567 ; CHECK-FIX-NEXT: aese.8 q8, q0
568 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
569 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
570 ; CHECK-FIX-NEXT: bx lr
571 br i1 %0, label %5, label %10
574 %6 = load i16, ptr %1, align 2
575 %7 = bitcast ptr %3 to ptr
576 %8 = load <8 x i16>, ptr %7, align 8
577 %9 = insertelement <8 x i16> %8, i16 %6, i64 0
581 %11 = bitcast ptr %3 to ptr
582 %12 = load <8 x i16>, ptr %11, align 8
586 %14 = phi <8 x i16> [ %9, %5 ], [ %12, %10 ]
587 br i1 %0, label %15, label %19
590 %16 = load i16, ptr %1, align 2
591 %17 = bitcast <16 x i8> %2 to <8 x i16>
592 %18 = insertelement <8 x i16> %17, i16 %16, i64 0
596 %20 = bitcast <16 x i8> %2 to <8 x i16>
600 %22 = phi <8 x i16> [ %18, %15 ], [ %20, %19 ]
601 %23 = bitcast <8 x i16> %14 to <16 x i8>
602 %24 = bitcast <8 x i16> %22 to <16 x i8>
603 %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
604 %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
605 store <16 x i8> %26, ptr %3, align 8
609 define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
610 ; CHECK-FIX-LABEL: aese_set16_cond_via_val:
611 ; CHECK-FIX: @ %bb.0:
612 ; CHECK-FIX-NEXT: vorr q0, q0, q0
613 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
614 ; CHECK-FIX-NEXT: cmp r0, #0
615 ; CHECK-FIX-NEXT: beq .LBB19_2
616 ; CHECK-FIX-NEXT: @ %bb.1:
617 ; CHECK-FIX-NEXT: vmov.16 d16[0], r1
618 ; CHECK-FIX-NEXT: .LBB19_2: @ %select.end
619 ; CHECK-FIX-NEXT: cmp r0, #0
620 ; CHECK-FIX-NEXT: beq .LBB19_4
621 ; CHECK-FIX-NEXT: @ %bb.3:
622 ; CHECK-FIX-NEXT: vmov.16 d0[0], r1
623 ; CHECK-FIX-NEXT: .LBB19_4: @ %select.end2
624 ; CHECK-FIX-NEXT: aese.8 q8, q0
625 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
626 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
627 ; CHECK-FIX-NEXT: bx lr
628 %5 = bitcast ptr %3 to ptr
629 %6 = load <8 x i16>, ptr %5, align 8
630 %7 = insertelement <8 x i16> %6, i16 %1, i64 0
631 %8 = select i1 %0, <8 x i16> %7, <8 x i16> %6
632 %9 = bitcast <16 x i8> %2 to <8 x i16>
633 %10 = insertelement <8 x i16> %9, i16 %1, i64 0
634 %11 = select i1 %0, <8 x i16> %10, <8 x i16> %9
635 %12 = bitcast <8 x i16> %8 to <16 x i8>
636 %13 = bitcast <8 x i16> %11 to <16 x i8>
637 %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
638 %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
639 store <16 x i8> %15, ptr %3, align 8
643 define arm_aapcs_vfpcc void @aese_set16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
644 ; CHECK-FIX-LABEL: aese_set16_loop_via_ptr:
645 ; CHECK-FIX: @ %bb.0:
646 ; CHECK-FIX-NEXT: vorr q0, q0, q0
647 ; CHECK-FIX-NEXT: ldrh r1, [r1]
648 ; CHECK-FIX-NEXT: cmp r0, #0
649 ; CHECK-FIX-NEXT: strh r1, [r2]
650 ; CHECK-FIX-NEXT: bxeq lr
651 ; CHECK-FIX-NEXT: .LBB20_1:
652 ; CHECK-FIX-NEXT: vmov.16 d0[0], r1
653 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
654 ; CHECK-FIX-NEXT: .LBB20_2: @ =>This Inner Loop Header: Depth=1
655 ; CHECK-FIX-NEXT: aese.8 q8, q0
656 ; CHECK-FIX-NEXT: subs r0, r0, #1
657 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
658 ; CHECK-FIX-NEXT: bne .LBB20_2
659 ; CHECK-FIX-NEXT: @ %bb.3:
660 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
661 ; CHECK-FIX-NEXT: bx lr
662 %5 = load i16, ptr %1, align 2
663 %6 = bitcast <16 x i8> %2 to <8 x i16>
664 %7 = insertelement <8 x i16> %6, i16 %5, i64 0
665 %8 = bitcast <8 x i16> %7 to <16 x i8>
666 %9 = bitcast ptr %3 to ptr
667 store i16 %5, ptr %9, align 8
668 %10 = icmp eq i32 %0, 0
669 br i1 %10, label %14, label %11
672 %12 = load <16 x i8>, ptr %3, align 8
676 store <16 x i8> %19, ptr %3, align 8
683 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
684 %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
685 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
686 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
687 %20 = add nuw i32 %17, 1
688 %21 = icmp eq i32 %20, %0
689 br i1 %21, label %13, label %15
692 define arm_aapcs_vfpcc void @aese_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
693 ; CHECK-FIX-LABEL: aese_set16_loop_via_val:
694 ; CHECK-FIX: @ %bb.0:
695 ; CHECK-FIX-NEXT: vorr q0, q0, q0
696 ; CHECK-FIX-NEXT: cmp r0, #0
697 ; CHECK-FIX-NEXT: bxeq lr
698 ; CHECK-FIX-NEXT: .LBB21_1:
699 ; CHECK-FIX-NEXT: vmov.16 d0[0], r1
700 ; CHECK-FIX-NEXT: .LBB21_2: @ =>This Inner Loop Header: Depth=1
701 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
702 ; CHECK-FIX-NEXT: subs r0, r0, #1
703 ; CHECK-FIX-NEXT: vmov.16 d16[0], r1
704 ; CHECK-FIX-NEXT: aese.8 q8, q0
705 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
706 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
707 ; CHECK-FIX-NEXT: bne .LBB21_2
708 ; CHECK-FIX-NEXT: @ %bb.3:
709 ; CHECK-FIX-NEXT: bx lr
710 %5 = icmp eq i32 %0, 0
711 br i1 %5, label %12, label %6
714 %7 = bitcast <16 x i8> %2 to <8 x i16>
715 %8 = insertelement <8 x i16> %7, i16 %1, i64 0
716 %9 = bitcast <8 x i16> %8 to <16 x i8>
717 %10 = bitcast ptr %3 to ptr
718 %11 = bitcast ptr %3 to ptr
725 %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
726 %15 = load <8 x i16>, ptr %10, align 8
727 %16 = insertelement <8 x i16> %15, i16 %1, i64 0
728 %17 = bitcast <8 x i16> %16 to <16 x i8>
729 store i16 %1, ptr %11, align 8
730 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
731 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
732 store <16 x i8> %19, ptr %3, align 8
733 %20 = add nuw i32 %14, 1
734 %21 = icmp eq i32 %20, %0
735 br i1 %21, label %12, label %13
738 define arm_aapcs_vfpcc void @aese_set32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
739 ; CHECK-FIX-NOSCHED-LABEL: aese_set32_via_ptr:
740 ; CHECK-FIX-NOSCHED: @ %bb.0:
741 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
742 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [r0]
743 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
744 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d0[0], r0
745 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
746 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
747 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
748 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
749 ; CHECK-FIX-NOSCHED-NEXT: bx lr
751 ; CHECK-CORTEX-FIX-LABEL: aese_set32_via_ptr:
752 ; CHECK-CORTEX-FIX: @ %bb.0:
753 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
754 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
755 ; CHECK-CORTEX-FIX-NEXT: ldr r0, [r0]
756 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d0[0], r0
757 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r0
758 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
759 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
760 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
761 ; CHECK-CORTEX-FIX-NEXT: bx lr
762 %4 = load i32, ptr %0, align 4
763 %5 = bitcast ptr %2 to ptr
764 %6 = load <4 x i32>, ptr %5, align 8
765 %7 = insertelement <4 x i32> %6, i32 %4, i64 0
766 %8 = bitcast <4 x i32> %7 to <16 x i8>
767 %9 = bitcast <16 x i8> %1 to <4 x i32>
768 %10 = insertelement <4 x i32> %9, i32 %4, i64 0
769 %11 = bitcast <4 x i32> %10 to <16 x i8>
770 %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
771 %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
772 store <16 x i8> %13, ptr %2, align 8
776 define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, ptr %2) nounwind {
777 ; CHECK-FIX-LABEL: aese_set32_via_val:
778 ; CHECK-FIX: @ %bb.0:
779 ; CHECK-FIX-NEXT: vorr q0, q0, q0
780 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
781 ; CHECK-FIX-NEXT: vmov.32 d0[0], r0
782 ; CHECK-FIX-NEXT: vmov.32 d16[0], r0
783 ; CHECK-FIX-NEXT: aese.8 q8, q0
784 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
785 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
786 ; CHECK-FIX-NEXT: bx lr
787 %4 = bitcast ptr %2 to ptr
788 %5 = load <4 x i32>, ptr %4, align 8
789 %6 = insertelement <4 x i32> %5, i32 %0, i64 0
790 %7 = bitcast <4 x i32> %6 to <16 x i8>
791 %8 = bitcast <16 x i8> %1 to <4 x i32>
792 %9 = insertelement <4 x i32> %8, i32 %0, i64 0
793 %10 = bitcast <4 x i32> %9 to <16 x i8>
794 %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
795 %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
796 store <16 x i8> %12, ptr %2, align 8
800 define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
801 ; CHECK-FIX-LABEL: aese_set32_cond_via_ptr:
802 ; CHECK-FIX: @ %bb.0:
803 ; CHECK-FIX-NEXT: vorr q0, q0, q0
804 ; CHECK-FIX-NEXT: cmp r0, #0
805 ; CHECK-FIX-NEXT: beq .LBB24_2
806 ; CHECK-FIX-NEXT: @ %bb.1:
807 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
808 ; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
809 ; CHECK-FIX-NEXT: cmp r0, #0
810 ; CHECK-FIX-NEXT: bne .LBB24_3
811 ; CHECK-FIX-NEXT: b .LBB24_4
812 ; CHECK-FIX-NEXT: .LBB24_2:
813 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
814 ; CHECK-FIX-NEXT: cmp r0, #0
815 ; CHECK-FIX-NEXT: beq .LBB24_4
816 ; CHECK-FIX-NEXT: .LBB24_3:
817 ; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
818 ; CHECK-FIX-NEXT: .LBB24_4:
819 ; CHECK-FIX-NEXT: aese.8 q8, q0
820 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
821 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
822 ; CHECK-FIX-NEXT: bx lr
823 br i1 %0, label %5, label %10
826 %6 = load i32, ptr %1, align 4
827 %7 = bitcast ptr %3 to ptr
828 %8 = load <4 x i32>, ptr %7, align 8
829 %9 = insertelement <4 x i32> %8, i32 %6, i64 0
833 %11 = bitcast ptr %3 to ptr
834 %12 = load <4 x i32>, ptr %11, align 8
838 %14 = phi <4 x i32> [ %9, %5 ], [ %12, %10 ]
839 br i1 %0, label %15, label %19
842 %16 = load i32, ptr %1, align 4
843 %17 = bitcast <16 x i8> %2 to <4 x i32>
844 %18 = insertelement <4 x i32> %17, i32 %16, i64 0
848 %20 = bitcast <16 x i8> %2 to <4 x i32>
852 %22 = phi <4 x i32> [ %18, %15 ], [ %20, %19 ]
853 %23 = bitcast <4 x i32> %14 to <16 x i8>
854 %24 = bitcast <4 x i32> %22 to <16 x i8>
855 %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
856 %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
857 store <16 x i8> %26, ptr %3, align 8
861 define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, ptr %3) nounwind {
862 ; CHECK-FIX-LABEL: aese_set32_cond_via_val:
863 ; CHECK-FIX: @ %bb.0:
864 ; CHECK-FIX-NEXT: vorr q0, q0, q0
865 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
866 ; CHECK-FIX-NEXT: cmp r0, #0
867 ; CHECK-FIX-NEXT: beq .LBB25_2
868 ; CHECK-FIX-NEXT: @ %bb.1:
869 ; CHECK-FIX-NEXT: vmov.32 d16[0], r1
870 ; CHECK-FIX-NEXT: .LBB25_2: @ %select.end
871 ; CHECK-FIX-NEXT: cmp r0, #0
872 ; CHECK-FIX-NEXT: beq .LBB25_4
873 ; CHECK-FIX-NEXT: @ %bb.3:
874 ; CHECK-FIX-NEXT: vmov.32 d0[0], r1
875 ; CHECK-FIX-NEXT: .LBB25_4: @ %select.end2
876 ; CHECK-FIX-NEXT: aese.8 q8, q0
877 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
878 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
879 ; CHECK-FIX-NEXT: bx lr
880 %5 = bitcast ptr %3 to ptr
881 %6 = load <4 x i32>, ptr %5, align 8
882 %7 = insertelement <4 x i32> %6, i32 %1, i64 0
883 %8 = select i1 %0, <4 x i32> %7, <4 x i32> %6
884 %9 = bitcast <16 x i8> %2 to <4 x i32>
885 %10 = insertelement <4 x i32> %9, i32 %1, i64 0
886 %11 = select i1 %0, <4 x i32> %10, <4 x i32> %9
887 %12 = bitcast <4 x i32> %8 to <16 x i8>
888 %13 = bitcast <4 x i32> %11 to <16 x i8>
889 %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
890 %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
891 store <16 x i8> %15, ptr %3, align 8
895 define arm_aapcs_vfpcc void @aese_set32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
896 ; CHECK-FIX-LABEL: aese_set32_loop_via_ptr:
897 ; CHECK-FIX: @ %bb.0:
898 ; CHECK-FIX-NEXT: vorr q0, q0, q0
899 ; CHECK-FIX-NEXT: ldr r1, [r1]
900 ; CHECK-FIX-NEXT: cmp r0, #0
901 ; CHECK-FIX-NEXT: str r1, [r2]
902 ; CHECK-FIX-NEXT: bxeq lr
903 ; CHECK-FIX-NEXT: .LBB26_1:
904 ; CHECK-FIX-NEXT: vmov.32 d0[0], r1
905 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
906 ; CHECK-FIX-NEXT: .LBB26_2: @ =>This Inner Loop Header: Depth=1
907 ; CHECK-FIX-NEXT: aese.8 q8, q0
908 ; CHECK-FIX-NEXT: subs r0, r0, #1
909 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
910 ; CHECK-FIX-NEXT: bne .LBB26_2
911 ; CHECK-FIX-NEXT: @ %bb.3:
912 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
913 ; CHECK-FIX-NEXT: bx lr
914 %5 = load i32, ptr %1, align 4
915 %6 = bitcast <16 x i8> %2 to <4 x i32>
916 %7 = insertelement <4 x i32> %6, i32 %5, i64 0
917 %8 = bitcast <4 x i32> %7 to <16 x i8>
918 %9 = bitcast ptr %3 to ptr
919 store i32 %5, ptr %9, align 8
920 %10 = icmp eq i32 %0, 0
921 br i1 %10, label %14, label %11
924 %12 = load <16 x i8>, ptr %3, align 8
928 store <16 x i8> %19, ptr %3, align 8
935 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
936 %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
937 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
938 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
939 %20 = add nuw i32 %17, 1
940 %21 = icmp eq i32 %20, %0
941 br i1 %21, label %13, label %15
944 define arm_aapcs_vfpcc void @aese_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, ptr %3) nounwind {
945 ; CHECK-FIX-LABEL: aese_set32_loop_via_val:
946 ; CHECK-FIX: @ %bb.0:
947 ; CHECK-FIX-NEXT: vorr q0, q0, q0
948 ; CHECK-FIX-NEXT: cmp r0, #0
949 ; CHECK-FIX-NEXT: bxeq lr
950 ; CHECK-FIX-NEXT: .LBB27_1:
951 ; CHECK-FIX-NEXT: vmov.32 d0[0], r1
952 ; CHECK-FIX-NEXT: .LBB27_2: @ =>This Inner Loop Header: Depth=1
953 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
954 ; CHECK-FIX-NEXT: subs r0, r0, #1
955 ; CHECK-FIX-NEXT: vmov.32 d16[0], r1
956 ; CHECK-FIX-NEXT: aese.8 q8, q0
957 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
958 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
959 ; CHECK-FIX-NEXT: bne .LBB27_2
960 ; CHECK-FIX-NEXT: @ %bb.3:
961 ; CHECK-FIX-NEXT: bx lr
962 %5 = icmp eq i32 %0, 0
963 br i1 %5, label %12, label %6
966 %7 = bitcast <16 x i8> %2 to <4 x i32>
967 %8 = insertelement <4 x i32> %7, i32 %1, i64 0
968 %9 = bitcast <4 x i32> %8 to <16 x i8>
969 %10 = bitcast ptr %3 to ptr
970 %11 = bitcast ptr %3 to ptr
977 %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
978 %15 = load <4 x i32>, ptr %10, align 8
979 %16 = insertelement <4 x i32> %15, i32 %1, i64 0
980 %17 = bitcast <4 x i32> %16 to <16 x i8>
981 store i32 %1, ptr %11, align 8
982 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
983 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
984 store <16 x i8> %19, ptr %3, align 8
985 %20 = add nuw i32 %14, 1
986 %21 = icmp eq i32 %20, %0
987 br i1 %21, label %12, label %13
990 define arm_aapcs_vfpcc void @aese_set64_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
991 ; CHECK-FIX-NOSCHED-LABEL: aese_set64_via_ptr:
992 ; CHECK-FIX-NOSCHED: @ %bb.0:
993 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
994 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
995 ; CHECK-FIX-NOSCHED-NEXT: vldr d0, [r0]
996 ; CHECK-FIX-NOSCHED-NEXT: vorr d16, d0, d0
997 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
998 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
999 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
1000 ; CHECK-FIX-NOSCHED-NEXT: bx lr
1002 ; CHECK-CORTEX-FIX-LABEL: aese_set64_via_ptr:
1003 ; CHECK-CORTEX-FIX: @ %bb.0:
1004 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
1005 ; CHECK-CORTEX-FIX-NEXT: vldr d0, [r0]
1006 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
1007 ; CHECK-CORTEX-FIX-NEXT: vorr d16, d0, d0
1008 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
1009 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
1010 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
1011 ; CHECK-CORTEX-FIX-NEXT: bx lr
1012 %4 = load i64, ptr %0, align 8
1013 %5 = bitcast ptr %2 to ptr
1014 %6 = load <2 x i64>, ptr %5, align 8
1015 %7 = insertelement <2 x i64> %6, i64 %4, i64 0
1016 %8 = bitcast <2 x i64> %7 to <16 x i8>
1017 %9 = bitcast <16 x i8> %1 to <2 x i64>
1018 %10 = insertelement <2 x i64> %9, i64 %4, i64 0
1019 %11 = bitcast <2 x i64> %10 to <16 x i8>
1020 %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
1021 %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
1022 store <16 x i8> %13, ptr %2, align 8
1026 define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, ptr %2) nounwind {
1027 ; CHECK-FIX-LABEL: aese_set64_via_val:
1028 ; CHECK-FIX: @ %bb.0:
1029 ; CHECK-FIX-NEXT: vorr q0, q0, q0
1030 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
1031 ; CHECK-FIX-NEXT: vmov.32 d0[0], r0
1032 ; CHECK-FIX-NEXT: vmov.32 d16[0], r0
1033 ; CHECK-FIX-NEXT: vmov.32 d0[1], r1
1034 ; CHECK-FIX-NEXT: vmov.32 d16[1], r1
1035 ; CHECK-FIX-NEXT: aese.8 q8, q0
1036 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
1037 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
1038 ; CHECK-FIX-NEXT: bx lr
1039 %4 = bitcast ptr %2 to ptr
1040 %5 = load <2 x i64>, ptr %4, align 8
1041 %6 = insertelement <2 x i64> %5, i64 %0, i64 0
1042 %7 = bitcast <2 x i64> %6 to <16 x i8>
1043 %8 = bitcast <16 x i8> %1 to <2 x i64>
1044 %9 = insertelement <2 x i64> %8, i64 %0, i64 0
1045 %10 = bitcast <2 x i64> %9 to <16 x i8>
1046 %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
1047 %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
1048 store <16 x i8> %12, ptr %2, align 8
1052 define arm_aapcs_vfpcc void @aese_set64_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
1053 ; CHECK-FIX-NOSCHED-LABEL: aese_set64_cond_via_ptr:
1054 ; CHECK-FIX-NOSCHED: @ %bb.0:
1055 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
1056 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB30_2
1057 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
1058 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
1059 ; CHECK-FIX-NOSCHED-NEXT: vldr d16, [r1]
1060 ; CHECK-FIX-NOSCHED-NEXT: b .LBB30_3
1061 ; CHECK-FIX-NOSCHED-NEXT: .LBB30_2:
1062 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
1063 ; CHECK-FIX-NOSCHED-NEXT: .LBB30_3:
1064 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
1065 ; CHECK-FIX-NOSCHED-NEXT: vldrne d0, [r1]
1066 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
1067 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
1068 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
1069 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
1070 ; CHECK-FIX-NOSCHED-NEXT: bx lr
1072 ; CHECK-CORTEX-FIX-LABEL: aese_set64_cond_via_ptr:
1073 ; CHECK-CORTEX-FIX: @ %bb.0:
1074 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
1075 ; CHECK-CORTEX-FIX-NEXT: beq .LBB30_2
1076 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
1077 ; CHECK-CORTEX-FIX-NEXT: vldr d18, [r1]
1078 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
1079 ; CHECK-CORTEX-FIX-NEXT: vorr d16, d18, d18
1080 ; CHECK-CORTEX-FIX-NEXT: b .LBB30_3
1081 ; CHECK-CORTEX-FIX-NEXT: .LBB30_2:
1082 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
1083 ; CHECK-CORTEX-FIX-NEXT: .LBB30_3:
1084 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
1085 ; CHECK-CORTEX-FIX-NEXT: vldrne d0, [r1]
1086 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
1087 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
1088 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
1089 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
1090 ; CHECK-CORTEX-FIX-NEXT: bx lr
1091 br i1 %0, label %5, label %10
1094 %6 = load i64, ptr %1, align 8
1095 %7 = bitcast ptr %3 to ptr
1096 %8 = load <2 x i64>, ptr %7, align 8
1097 %9 = insertelement <2 x i64> %8, i64 %6, i64 0
1101 %11 = bitcast ptr %3 to ptr
1102 %12 = load <2 x i64>, ptr %11, align 8
1106 %14 = phi <2 x i64> [ %9, %5 ], [ %12, %10 ]
1107 br i1 %0, label %15, label %19
1110 %16 = load i64, ptr %1, align 8
1111 %17 = bitcast <16 x i8> %2 to <2 x i64>
1112 %18 = insertelement <2 x i64> %17, i64 %16, i64 0
1116 %20 = bitcast <16 x i8> %2 to <2 x i64>
1120 %22 = phi <2 x i64> [ %18, %15 ], [ %20, %19 ]
1121 %23 = bitcast <2 x i64> %14 to <16 x i8>
1122 %24 = bitcast <2 x i64> %22 to <16 x i8>
1123 %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
1124 %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
1125 store <16 x i8> %26, ptr %3, align 8
1129 define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, ptr %3) nounwind {
1130 ; CHECK-FIX-LABEL: aese_set64_cond_via_val:
1131 ; CHECK-FIX: @ %bb.0:
1132 ; CHECK-FIX-NEXT: vorr q0, q0, q0
1133 ; CHECK-FIX-NEXT: ldr r1, [sp]
1134 ; CHECK-FIX-NEXT: cmp r0, #0
1135 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
1136 ; CHECK-FIX-NEXT: beq .LBB31_2
1137 ; CHECK-FIX-NEXT: @ %bb.1:
1138 ; CHECK-FIX-NEXT: vmov.32 d16[0], r2
1139 ; CHECK-FIX-NEXT: vmov.32 d16[1], r3
1140 ; CHECK-FIX-NEXT: .LBB31_2: @ %select.end
1141 ; CHECK-FIX-NEXT: cmp r0, #0
1142 ; CHECK-FIX-NEXT: beq .LBB31_4
1143 ; CHECK-FIX-NEXT: @ %bb.3:
1144 ; CHECK-FIX-NEXT: vmov.32 d0[0], r2
1145 ; CHECK-FIX-NEXT: vmov.32 d0[1], r3
1146 ; CHECK-FIX-NEXT: .LBB31_4: @ %select.end2
1147 ; CHECK-FIX-NEXT: aese.8 q8, q0
1148 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
1149 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
1150 ; CHECK-FIX-NEXT: bx lr
1151 %5 = bitcast ptr %3 to ptr
1152 %6 = load <2 x i64>, ptr %5, align 8
1153 %7 = insertelement <2 x i64> %6, i64 %1, i64 0
1154 %8 = select i1 %0, <2 x i64> %7, <2 x i64> %6
1155 %9 = bitcast <16 x i8> %2 to <2 x i64>
1156 %10 = insertelement <2 x i64> %9, i64 %1, i64 0
1157 %11 = select i1 %0, <2 x i64> %10, <2 x i64> %9
1158 %12 = bitcast <2 x i64> %8 to <16 x i8>
1159 %13 = bitcast <2 x i64> %11 to <16 x i8>
1160 %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
1161 %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
1162 store <16 x i8> %15, ptr %3, align 8
1166 define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
1167 ; CHECK-FIX-NOSCHED-LABEL: aese_set64_loop_via_ptr:
1168 ; CHECK-FIX-NOSCHED: @ %bb.0:
1169 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
1170 ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r11, lr}
1171 ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r11, lr}
1172 ; CHECK-FIX-NOSCHED-NEXT: ldrd r4, r5, [r1]
1173 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
1174 ; CHECK-FIX-NOSCHED-NEXT: strd r4, r5, [r2]
1175 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB32_4
1176 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
1177 ; CHECK-FIX-NOSCHED-NEXT: vmov d0, r4, r5
1178 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
1179 ; CHECK-FIX-NOSCHED-NEXT: .LBB32_2: @ =>This Inner Loop Header: Depth=1
1180 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
1181 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
1182 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
1183 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB32_2
1184 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
1185 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
1186 ; CHECK-FIX-NOSCHED-NEXT: .LBB32_4:
1187 ; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r11, pc}
1189 ; CHECK-CORTEX-FIX-LABEL: aese_set64_loop_via_ptr:
1190 ; CHECK-CORTEX-FIX: @ %bb.0:
1191 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
1192 ; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r11, lr}
1193 ; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r11, lr}
1194 ; CHECK-CORTEX-FIX-NEXT: ldrd r4, r5, [r1]
1195 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
1196 ; CHECK-CORTEX-FIX-NEXT: strd r4, r5, [r2]
1197 ; CHECK-CORTEX-FIX-NEXT: popeq {r4, r5, r11, pc}
1198 ; CHECK-CORTEX-FIX-NEXT: .LBB32_1:
1199 ; CHECK-CORTEX-FIX-NEXT: vmov d0, r4, r5
1200 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
1201 ; CHECK-CORTEX-FIX-NEXT: .LBB32_2: @ =>This Inner Loop Header: Depth=1
1202 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
1203 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
1204 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
1205 ; CHECK-CORTEX-FIX-NEXT: bne .LBB32_2
1206 ; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
1207 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
1208 ; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r11, pc}
1209 %5 = load i64, ptr %1, align 8
1210 %6 = bitcast <16 x i8> %2 to <2 x i64>
1211 %7 = insertelement <2 x i64> %6, i64 %5, i64 0
1212 %8 = bitcast <2 x i64> %7 to <16 x i8>
1213 %9 = bitcast ptr %3 to ptr
1214 store i64 %5, ptr %9, align 8
1215 %10 = icmp eq i32 %0, 0
1216 br i1 %10, label %14, label %11
1219 %12 = load <16 x i8>, ptr %3, align 8
1223 store <16 x i8> %19, ptr %3, align 8
1230 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
1231 %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
1232 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
1233 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
1234 %20 = add nuw i32 %17, 1
1235 %21 = icmp eq i32 %20, %0
1236 br i1 %21, label %13, label %15
1239 define arm_aapcs_vfpcc void @aese_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, ptr %3) nounwind {
1240 ; CHECK-FIX-LABEL: aese_set64_loop_via_val:
1241 ; CHECK-FIX: @ %bb.0:
1242 ; CHECK-FIX-NEXT: vorr q0, q0, q0
1243 ; CHECK-FIX-NEXT: cmp r0, #0
1244 ; CHECK-FIX-NEXT: bxeq lr
1245 ; CHECK-FIX-NEXT: .LBB33_1:
1246 ; CHECK-FIX-NEXT: vmov.32 d0[0], r2
1247 ; CHECK-FIX-NEXT: ldr r1, [sp]
1248 ; CHECK-FIX-NEXT: vmov.32 d0[1], r3
1249 ; CHECK-FIX-NEXT: .LBB33_2: @ =>This Inner Loop Header: Depth=1
1250 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
1251 ; CHECK-FIX-NEXT: subs r0, r0, #1
1252 ; CHECK-FIX-NEXT: vmov.32 d16[0], r2
1253 ; CHECK-FIX-NEXT: vmov.32 d16[1], r3
1254 ; CHECK-FIX-NEXT: aese.8 q8, q0
1255 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
1256 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
1257 ; CHECK-FIX-NEXT: bne .LBB33_2
1258 ; CHECK-FIX-NEXT: @ %bb.3:
1259 ; CHECK-FIX-NEXT: bx lr
1260 %5 = icmp eq i32 %0, 0
1261 br i1 %5, label %12, label %6
1264 %7 = bitcast <16 x i8> %2 to <2 x i64>
1265 %8 = insertelement <2 x i64> %7, i64 %1, i64 0
1266 %9 = bitcast <2 x i64> %8 to <16 x i8>
1267 %10 = bitcast ptr %3 to ptr
1268 %11 = bitcast ptr %3 to ptr
1275 %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
1276 %15 = load <2 x i64>, ptr %10, align 8
1277 %16 = insertelement <2 x i64> %15, i64 %1, i64 0
1278 %17 = bitcast <2 x i64> %16 to <16 x i8>
1279 store i64 %1, ptr %11, align 8
1280 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
1281 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
1282 store <16 x i8> %19, ptr %3, align 8
1283 %20 = add nuw i32 %14, 1
1284 %21 = icmp eq i32 %20, %0
1285 br i1 %21, label %12, label %13
1288 define arm_aapcs_vfpcc void @aese_setf16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
1289 ; CHECK-FIX-NOSCHED-LABEL: aese_setf16_via_ptr:
1290 ; CHECK-FIX-NOSCHED: @ %bb.0:
1291 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
1292 ; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
1293 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
1294 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
1295 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0
1296 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
1297 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
1298 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
1299 ; CHECK-FIX-NOSCHED-NEXT: bx lr
1301 ; CHECK-CORTEX-FIX-LABEL: aese_setf16_via_ptr:
1302 ; CHECK-CORTEX-FIX: @ %bb.0:
1303 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
1304 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
1305 ; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
1306 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
1307 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0
1308 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
1309 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
1310 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
1311 ; CHECK-CORTEX-FIX-NEXT: bx lr
1312 %4 = bitcast ptr %0 to ptr
1313 %5 = load i16, ptr %4, align 2
1314 %6 = bitcast ptr %2 to ptr
1315 %7 = load <8 x i16>, ptr %6, align 8
1316 %8 = insertelement <8 x i16> %7, i16 %5, i64 0
1317 %9 = bitcast <8 x i16> %8 to <16 x i8>
1318 %10 = bitcast <16 x i8> %1 to <8 x i16>
1319 %11 = insertelement <8 x i16> %10, i16 %5, i64 0
1320 %12 = bitcast <8 x i16> %11 to <16 x i8>
1321 %13 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %9, <16 x i8> %12)
1322 %14 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %13)
1323 store <16 x i8> %14, ptr %2, align 8
1327 define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, ptr %2) nounwind {
1328 ; CHECK-FIX-LABEL: aese_setf16_via_val:
1329 ; CHECK-FIX: @ %bb.0:
1330 ; CHECK-FIX-NEXT: vorr q1, q1, q1
1331 ; CHECK-FIX-NEXT: vmov r1, s0
1332 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
1333 ; CHECK-FIX-NEXT: vmov.16 d2[0], r1
1334 ; CHECK-FIX-NEXT: vmov.16 d16[0], r1
1335 ; CHECK-FIX-NEXT: aese.8 q8, q1
1336 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
1337 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
1338 ; CHECK-FIX-NEXT: bx lr
1339 %4 = bitcast ptr %2 to ptr
1340 %5 = load <8 x i16>, ptr %4, align 8
1341 %6 = bitcast half %0 to i16
1342 %7 = insertelement <8 x i16> %5, i16 %6, i64 0
1343 %8 = bitcast <8 x i16> %7 to <16 x i8>
1344 %9 = bitcast <16 x i8> %1 to <8 x i16>
1345 %10 = insertelement <8 x i16> %9, i16 %6, i64 0
1346 %11 = bitcast <8 x i16> %10 to <16 x i8>
1347 %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
1348 %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
1349 store <16 x i8> %13, ptr %2, align 8
1353 define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
1354 ; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_ptr:
1355 ; CHECK-FIX-NOSCHED: @ %bb.0:
1356 ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1357 ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1358 ; CHECK-FIX-NOSCHED-NEXT: .pad #12
1359 ; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12
1360 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
1361 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB36_3
1362 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
1363 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
1364 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3]
1365 ; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1]
1366 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0]
1367 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3]
1368 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
1369 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
1370 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2]
1371 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
1372 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1]
1373 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
1374 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1]
1375 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
1376 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB36_4
1377 ; CHECK-FIX-NOSCHED-NEXT: .LBB36_2:
1378 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3]
1379 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2]
1380 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1]
1381 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0]
1382 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3]
1383 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2]
1384 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1]
1385 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r1, d0[0]
1386 ; CHECK-FIX-NOSCHED-NEXT: b .LBB36_5
1387 ; CHECK-FIX-NOSCHED-NEXT: .LBB36_3:
1388 ; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #8
1389 ; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r2:32]
1390 ; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r3:32]
1391 ; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #4
1392 ; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[1]}, [r3:32]
1393 ; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #12
1394 ; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[1]}, [r3:32]
1395 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3]
1396 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0]
1397 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3]
1398 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
1399 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d16[0]
1400 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
1401 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2]
1402 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
1403 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1]
1404 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
1405 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1]
1406 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
1407 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB36_2
1408 ; CHECK-FIX-NOSCHED-NEXT: .LBB36_4:
1409 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3]
1410 ; CHECK-FIX-NOSCHED-NEXT: ldrh r1, [r1]
1411 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2]
1412 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1]
1413 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0]
1414 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3]
1415 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2]
1416 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1]
1417 ; CHECK-FIX-NOSCHED-NEXT: .LBB36_5:
1418 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r1, r8, lsl #16
1419 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r3, r7, r3, lsl #16
1420 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r4, r0, lsl #16
1421 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r1
1422 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, lr, r12, lsl #16
1423 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r3
1424 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r1
1425 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r11, r10, lsl #16
1426 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r1
1427 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r6, r5, lsl #16
1428 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r1
1429 ; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp] @ 4-byte Reload
1430 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r9, r1, lsl #16
1431 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
1432 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
1433 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r1
1434 ; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
1435 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16
1436 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
1437 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q9
1438 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
1439 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
1440 ; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #12
1441 ; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
1443 ; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_ptr:
1444 ; CHECK-CORTEX-FIX: @ %bb.0:
1445 ; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1446 ; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1447 ; CHECK-CORTEX-FIX-NEXT: .pad #24
1448 ; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #24
1449 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
1450 ; CHECK-CORTEX-FIX-NEXT: beq .LBB36_2
1451 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
1452 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
1453 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1]
1454 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[0]
1455 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2]
1456 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
1457 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2]
1458 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
1459 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3]
1460 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
1461 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[1]
1462 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
1463 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1]
1464 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
1465 ; CHECK-CORTEX-FIX-NEXT: mov r3, r6
1466 ; CHECK-CORTEX-FIX-NEXT: b .LBB36_3
1467 ; CHECK-CORTEX-FIX-NEXT: .LBB36_2:
1468 ; CHECK-CORTEX-FIX-NEXT: add r3, r2, #8
1469 ; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[0]}, [r2:32]
1470 ; CHECK-CORTEX-FIX-NEXT: add r7, r2, #4
1471 ; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[0]}, [r3:32]
1472 ; CHECK-CORTEX-FIX-NEXT: add r3, r2, #12
1473 ; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[1]}, [r7:32]
1474 ; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[1]}, [r3:32]
1475 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[0]
1476 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
1477 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1]
1478 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[1]
1479 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
1480 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2]
1481 ; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill
1482 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2]
1483 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
1484 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3]
1485 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
1486 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[0]
1487 ; CHECK-CORTEX-FIX-NEXT: .LBB36_3:
1488 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d17[3]
1489 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
1490 ; CHECK-CORTEX-FIX-NEXT: beq .LBB36_5
1491 ; CHECK-CORTEX-FIX-NEXT: @ %bb.4:
1492 ; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r1]
1493 ; CHECK-CORTEX-FIX-NEXT: b .LBB36_6
1494 ; CHECK-CORTEX-FIX-NEXT: .LBB36_5:
1495 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r0, d0[0]
1496 ; CHECK-CORTEX-FIX-NEXT: .LBB36_6:
1497 ; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
1498 ; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
1499 ; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
1500 ; CHECK-CORTEX-FIX-NEXT: pkhbt r9, r7, r4, lsl #16
1501 ; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
1502 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d0[1]
1503 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d0[2]
1504 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d0[3]
1505 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d1[0]
1506 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d1[1]
1507 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 lr, d1[2]
1508 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d1[3]
1509 ; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r1, lsl #16
1510 ; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
1511 ; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16
1512 ; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16
1513 ; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r11, r5, lsl #16
1514 ; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r3, r1, lsl #16
1515 ; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
1516 ; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r3, r4, lsl #16
1517 ; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp] @ 4-byte Reload
1518 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r4
1519 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r1
1520 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7
1521 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r9
1522 ; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r10, lsl #16
1523 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r3
1524 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r5
1525 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r6
1526 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r0
1527 ; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8
1528 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9
1529 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
1530 ; CHECK-CORTEX-FIX-NEXT: add sp, sp, #24
1531 ; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
1532 br i1 %0, label %5, label %12
1535 %6 = bitcast ptr %1 to ptr
1536 %7 = load i16, ptr %6, align 2
1537 %8 = bitcast ptr %3 to ptr
1538 %9 = load <8 x i16>, ptr %8, align 8
1539 %10 = insertelement <8 x i16> %9, i16 %7, i64 0
1540 %11 = bitcast <8 x i16> %10 to <8 x half>
1544 %13 = bitcast ptr %3 to ptr
1545 %14 = load <8 x half>, ptr %13, align 8
1549 %16 = phi <8 x half> [ %11, %5 ], [ %14, %12 ]
1550 br i1 %0, label %17, label %23
1553 %18 = bitcast ptr %1 to ptr
1554 %19 = load i16, ptr %18, align 2
1555 %20 = bitcast <16 x i8> %2 to <8 x i16>
1556 %21 = insertelement <8 x i16> %20, i16 %19, i64 0
1557 %22 = bitcast <8 x i16> %21 to <8 x half>
1561 %24 = bitcast <16 x i8> %2 to <8 x half>
1565 %26 = phi <8 x half> [ %22, %17 ], [ %24, %23 ]
1566 %27 = bitcast <8 x half> %16 to <16 x i8>
1567 %28 = bitcast <8 x half> %26 to <16 x i8>
1568 %29 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %27, <16 x i8> %28)
1569 %30 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %29)
1570 store <16 x i8> %30, ptr %3, align 8
1574 define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, ptr %3) nounwind {
1575 ; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_val:
1576 ; CHECK-FIX-NOSCHED: @ %bb.0:
1577 ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1578 ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1579 ; CHECK-FIX-NOSCHED-NEXT: .pad #12
1580 ; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12
1581 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
1582 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB37_2
1583 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
1584 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
1585 ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s2, s0
1586 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[1]
1587 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d17[3]
1588 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d17[2]
1589 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
1590 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d16[1]
1591 ; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill
1592 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[0]
1593 ; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #4] @ 4-byte Spill
1594 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[3]
1595 ; CHECK-FIX-NOSCHED-NEXT: str r2, [sp] @ 4-byte Spill
1596 ; CHECK-FIX-NOSCHED-NEXT: b .LBB37_3
1597 ; CHECK-FIX-NOSCHED-NEXT: .LBB37_2:
1598 ; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #8
1599 ; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r1:32]
1600 ; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r2:32]
1601 ; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #4
1602 ; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[1]}, [r2:32]
1603 ; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #12
1604 ; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[1]}, [r2:32]
1605 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[1]
1606 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d17[3]
1607 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d17[2]
1608 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
1609 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d16[1]
1610 ; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill
1611 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[0]
1612 ; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #4] @ 4-byte Spill
1613 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[3]
1614 ; CHECK-FIX-NOSCHED-NEXT: str r2, [sp] @ 4-byte Spill
1615 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[0]
1616 ; CHECK-FIX-NOSCHED-NEXT: vmov s2, r2
1617 ; CHECK-FIX-NOSCHED-NEXT: .LBB37_3:
1618 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d3[3]
1619 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
1620 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d3[2]
1621 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d3[1]
1622 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d3[0]
1623 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d2[3]
1624 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d2[2]
1625 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB37_5
1626 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.4:
1627 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d2[1]
1628 ; CHECK-FIX-NOSCHED-NEXT: b .LBB37_6
1629 ; CHECK-FIX-NOSCHED-NEXT: .LBB37_5:
1630 ; CHECK-FIX-NOSCHED-NEXT: mov r0, lr
1631 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d2[0]
1632 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d2[1]
1633 ; CHECK-FIX-NOSCHED-NEXT: vmov s0, lr
1634 ; CHECK-FIX-NOSCHED-NEXT: mov lr, r0
1635 ; CHECK-FIX-NOSCHED-NEXT: .LBB37_6:
1636 ; CHECK-FIX-NOSCHED-NEXT: vmov r0, s0
1637 ; CHECK-FIX-NOSCHED-NEXT: vmov r6, s2
1638 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r12, lsl #16
1639 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r6, r6, r8, lsl #16
1640 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
1641 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r5, r4, lsl #16
1642 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r6
1643 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0
1644 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp] @ 4-byte Reload
1645 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r0, lsl #16
1646 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
1647 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r3, r2, lsl #16
1648 ; CHECK-FIX-NOSCHED-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
1649 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
1650 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
1651 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16
1652 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
1653 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r9, lsl #16
1654 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
1655 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, lr, r7, lsl #16
1656 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
1657 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q9
1658 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
1659 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
1660 ; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #12
1661 ; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
1663 ; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_val:
1664 ; CHECK-CORTEX-FIX: @ %bb.0:
1665 ; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1666 ; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1667 ; CHECK-CORTEX-FIX-NEXT: .pad #12
1668 ; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #12
1669 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
1670 ; CHECK-CORTEX-FIX-NEXT: beq .LBB37_3
1671 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
1672 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
1673 ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s2, s0
1674 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d16[1]
1675 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d16[2]
1676 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d16[3]
1677 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d17[2]
1678 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[3]
1679 ; CHECK-CORTEX-FIX-NEXT: str r2, [sp, #8] @ 4-byte Spill
1680 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d17[0]
1681 ; CHECK-CORTEX-FIX-NEXT: str r2, [sp, #4] @ 4-byte Spill
1682 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d17[1]
1683 ; CHECK-CORTEX-FIX-NEXT: str r2, [sp] @ 4-byte Spill
1684 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
1685 ; CHECK-CORTEX-FIX-NEXT: bne .LBB37_4
1686 ; CHECK-CORTEX-FIX-NEXT: .LBB37_2:
1687 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 lr, d2[0]
1688 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d2[1]
1689 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d2[2]
1690 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d2[3]
1691 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r9, d3[0]
1692 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d3[1]
1693 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d3[2]
1694 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d3[3]
1695 ; CHECK-CORTEX-FIX-NEXT: vmov s0, lr
1696 ; CHECK-CORTEX-FIX-NEXT: b .LBB37_5
1697 ; CHECK-CORTEX-FIX-NEXT: .LBB37_3:
1698 ; CHECK-CORTEX-FIX-NEXT: add r2, r1, #8
1699 ; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
1700 ; CHECK-CORTEX-FIX-NEXT: add r3, r1, #4
1701 ; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[0]}, [r2:32]
1702 ; CHECK-CORTEX-FIX-NEXT: add r2, r1, #12
1703 ; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[1]}, [r3:32]
1704 ; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[1]}, [r2:32]
1705 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1]
1706 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d16[0]
1707 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d16[2]
1708 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d16[3]
1709 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
1710 ; CHECK-CORTEX-FIX-NEXT: vmov s2, r2
1711 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[0]
1712 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d17[2]
1713 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[3]
1714 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
1715 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[1]
1716 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp] @ 4-byte Spill
1717 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
1718 ; CHECK-CORTEX-FIX-NEXT: beq .LBB37_2
1719 ; CHECK-CORTEX-FIX-NEXT: .LBB37_4:
1720 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d2[1]
1721 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d2[2]
1722 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d2[3]
1723 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r9, d3[0]
1724 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d3[1]
1725 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d3[2]
1726 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d3[3]
1727 ; CHECK-CORTEX-FIX-NEXT: .LBB37_5:
1728 ; CHECK-CORTEX-FIX-NEXT: pkhbt lr, r11, r6, lsl #16
1729 ; CHECK-CORTEX-FIX-NEXT: pkhbt r0, r7, r10, lsl #16
1730 ; CHECK-CORTEX-FIX-NEXT: ldm sp, {r6, r7} @ 8-byte Folded Reload
1731 ; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r4, lsl #16
1732 ; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r5, r12, lsl #16
1733 ; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r9, r2, lsl #16
1734 ; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r7, r6, lsl #16
1735 ; CHECK-CORTEX-FIX-NEXT: vmov r7, s2
1736 ; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
1737 ; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r7, r6, lsl #16
1738 ; CHECK-CORTEX-FIX-NEXT: vmov r6, s0
1739 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r7
1740 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r4
1741 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r0
1742 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], lr
1743 ; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r8, lsl #16
1744 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r6
1745 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r2
1746 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r3
1747 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r5
1748 ; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8
1749 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9
1750 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
1751 ; CHECK-CORTEX-FIX-NEXT: add sp, sp, #12
1752 ; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
1753 br i1 %0, label %5, label %11
1756 %6 = bitcast ptr %3 to ptr
1757 %7 = load <8 x i16>, ptr %6, align 8
1758 %8 = bitcast half %1 to i16
1759 %9 = insertelement <8 x i16> %7, i16 %8, i64 0
1760 %10 = bitcast <8 x i16> %9 to <8 x half>
1764 %12 = bitcast ptr %3 to ptr
1765 %13 = load <8 x half>, ptr %12, align 8
1769 %15 = phi <8 x half> [ %10, %5 ], [ %13, %11 ]
1770 br i1 %0, label %16, label %21
1773 %17 = bitcast <16 x i8> %2 to <8 x i16>
1774 %18 = bitcast half %1 to i16
1775 %19 = insertelement <8 x i16> %17, i16 %18, i64 0
1776 %20 = bitcast <8 x i16> %19 to <8 x half>
1780 %22 = bitcast <16 x i8> %2 to <8 x half>
1784 %24 = phi <8 x half> [ %20, %16 ], [ %22, %21 ]
1785 %25 = bitcast <8 x half> %15 to <16 x i8>
1786 %26 = bitcast <8 x half> %24 to <16 x i8>
1787 %27 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %25, <16 x i8> %26)
1788 %28 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %27)
1789 store <16 x i8> %28, ptr %3, align 8
1793 define arm_aapcs_vfpcc void @aese_setf16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
1794 ; CHECK-FIX-LABEL: aese_setf16_loop_via_ptr:
1795 ; CHECK-FIX: @ %bb.0:
1796 ; CHECK-FIX-NEXT: vorr q0, q0, q0
1797 ; CHECK-FIX-NEXT: ldrh r1, [r1]
1798 ; CHECK-FIX-NEXT: cmp r0, #0
1799 ; CHECK-FIX-NEXT: strh r1, [r2]
1800 ; CHECK-FIX-NEXT: bxeq lr
1801 ; CHECK-FIX-NEXT: .LBB38_1:
1802 ; CHECK-FIX-NEXT: vmov.16 d0[0], r1
1803 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
1804 ; CHECK-FIX-NEXT: .LBB38_2: @ =>This Inner Loop Header: Depth=1
1805 ; CHECK-FIX-NEXT: aese.8 q8, q0
1806 ; CHECK-FIX-NEXT: subs r0, r0, #1
1807 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
1808 ; CHECK-FIX-NEXT: bne .LBB38_2
1809 ; CHECK-FIX-NEXT: @ %bb.3:
1810 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
1811 ; CHECK-FIX-NEXT: bx lr
1812 %5 = bitcast ptr %1 to ptr
1813 %6 = load i16, ptr %5, align 2
1814 %7 = bitcast <16 x i8> %2 to <8 x i16>
1815 %8 = insertelement <8 x i16> %7, i16 %6, i64 0
1816 %9 = bitcast <8 x i16> %8 to <16 x i8>
1817 %10 = bitcast ptr %3 to ptr
1818 store i16 %6, ptr %10, align 8
1819 %11 = icmp eq i32 %0, 0
1820 br i1 %11, label %15, label %12
1823 %13 = load <16 x i8>, ptr %3, align 8
1827 store <16 x i8> %20, ptr %3, align 8
1834 %17 = phi <16 x i8> [ %13, %12 ], [ %20, %16 ]
1835 %18 = phi i32 [ 0, %12 ], [ %21, %16 ]
1836 %19 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
1837 %20 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %19)
1838 %21 = add nuw i32 %18, 1
1839 %22 = icmp eq i32 %21, %0
1840 br i1 %22, label %14, label %16
1843 define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, ptr %3) nounwind {
1844 ; CHECK-FIX-LABEL: aese_setf16_loop_via_val:
1845 ; CHECK-FIX: @ %bb.0:
1846 ; CHECK-FIX-NEXT: vorr q1, q1, q1
1847 ; CHECK-FIX-NEXT: cmp r0, #0
1848 ; CHECK-FIX-NEXT: bxeq lr
1849 ; CHECK-FIX-NEXT: .LBB39_1:
1850 ; CHECK-FIX-NEXT: vmov r2, s0
1851 ; CHECK-FIX-NEXT: vmov.16 d2[0], r2
1852 ; CHECK-FIX-NEXT: .LBB39_2: @ =>This Inner Loop Header: Depth=1
1853 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
1854 ; CHECK-FIX-NEXT: subs r0, r0, #1
1855 ; CHECK-FIX-NEXT: vmov.16 d16[0], r2
1856 ; CHECK-FIX-NEXT: aese.8 q8, q1
1857 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
1858 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
1859 ; CHECK-FIX-NEXT: bne .LBB39_2
1860 ; CHECK-FIX-NEXT: @ %bb.3:
1861 ; CHECK-FIX-NEXT: bx lr
1862 %5 = icmp eq i32 %0, 0
1863 br i1 %5, label %13, label %6
1866 %7 = bitcast <16 x i8> %2 to <8 x i16>
1867 %8 = bitcast half %1 to i16
1868 %9 = insertelement <8 x i16> %7, i16 %8, i64 0
1869 %10 = bitcast <8 x i16> %9 to <16 x i8>
1870 %11 = bitcast ptr %3 to ptr
1871 %12 = bitcast ptr %3 to ptr
1878 %15 = phi i32 [ 0, %6 ], [ %21, %14 ]
1879 %16 = load <8 x i16>, ptr %11, align 8
1880 %17 = insertelement <8 x i16> %16, i16 %8, i64 0
1881 %18 = bitcast <8 x i16> %17 to <16 x i8>
1882 store half %1, ptr %12, align 8
1883 %19 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %18, <16 x i8> %10)
1884 %20 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %19)
1885 store <16 x i8> %20, ptr %3, align 8
1886 %21 = add nuw i32 %15, 1
1887 %22 = icmp eq i32 %21, %0
1888 br i1 %22, label %13, label %14
1891 define arm_aapcs_vfpcc void @aese_setf32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
1892 ; CHECK-FIX-LABEL: aese_setf32_via_ptr:
1893 ; CHECK-FIX: @ %bb.0:
1894 ; CHECK-FIX-NEXT: vldr s0, [r0]
1895 ; CHECK-FIX-NEXT: vld1.64 {d2, d3}, [r1]
1896 ; CHECK-FIX-NEXT: vmov.f32 s4, s0
1897 ; CHECK-FIX-NEXT: vorr q1, q1, q1
1898 ; CHECK-FIX-NEXT: vorr q0, q0, q0
1899 ; CHECK-FIX-NEXT: aese.8 q1, q0
1900 ; CHECK-FIX-NEXT: aesmc.8 q8, q1
1901 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
1902 ; CHECK-FIX-NEXT: bx lr
1903 %4 = load float, ptr %0, align 4
1904 %5 = bitcast ptr %2 to ptr
1905 %6 = load <4 x float>, ptr %5, align 8
1906 %7 = insertelement <4 x float> %6, float %4, i64 0
1907 %8 = bitcast <4 x float> %7 to <16 x i8>
1908 %9 = bitcast <16 x i8> %1 to <4 x float>
1909 %10 = insertelement <4 x float> %9, float %4, i64 0
1910 %11 = bitcast <4 x float> %10 to <16 x i8>
1911 %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
1912 %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
1913 store <16 x i8> %13, ptr %2, align 8
1917 define arm_aapcs_vfpcc void @aese_setf32_via_val(float %0, <16 x i8> %1, ptr %2) nounwind {
1918 ; CHECK-FIX-LABEL: aese_setf32_via_val:
1919 ; CHECK-FIX: @ %bb.0:
1920 ; CHECK-FIX-NEXT: vmov.f32 s4, s0
1921 ; CHECK-FIX-NEXT: vld1.64 {d0, d1}, [r0]
1922 ; CHECK-FIX-NEXT: vmov.f32 s0, s4
1923 ; CHECK-FIX-NEXT: vorr q0, q0, q0
1924 ; CHECK-FIX-NEXT: vorr q1, q1, q1
1925 ; CHECK-FIX-NEXT: aese.8 q0, q1
1926 ; CHECK-FIX-NEXT: aesmc.8 q8, q0
1927 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
1928 ; CHECK-FIX-NEXT: bx lr
1929 %4 = bitcast ptr %2 to ptr
1930 %5 = load <4 x float>, ptr %4, align 8
1931 %6 = insertelement <4 x float> %5, float %0, i64 0
1932 %7 = bitcast <4 x float> %6 to <16 x i8>
1933 %8 = bitcast <16 x i8> %1 to <4 x float>
1934 %9 = insertelement <4 x float> %8, float %0, i64 0
1935 %10 = bitcast <4 x float> %9 to <16 x i8>
1936 %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
1937 %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
1938 store <16 x i8> %12, ptr %2, align 8
1942 define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
1943 ; CHECK-FIX-LABEL: aese_setf32_cond_via_ptr:
1944 ; CHECK-FIX: @ %bb.0:
1945 ; CHECK-FIX-NEXT: vorr q0, q0, q0
1946 ; CHECK-FIX-NEXT: cmp r0, #0
1947 ; CHECK-FIX-NEXT: beq .LBB42_2
1948 ; CHECK-FIX-NEXT: @ %bb.1:
1949 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
1950 ; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
1951 ; CHECK-FIX-NEXT: cmp r0, #0
1952 ; CHECK-FIX-NEXT: bne .LBB42_3
1953 ; CHECK-FIX-NEXT: b .LBB42_4
1954 ; CHECK-FIX-NEXT: .LBB42_2:
1955 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
1956 ; CHECK-FIX-NEXT: cmp r0, #0
1957 ; CHECK-FIX-NEXT: beq .LBB42_4
1958 ; CHECK-FIX-NEXT: .LBB42_3:
1959 ; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
1960 ; CHECK-FIX-NEXT: .LBB42_4:
1961 ; CHECK-FIX-NEXT: aese.8 q8, q0
1962 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
1963 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
1964 ; CHECK-FIX-NEXT: bx lr
1965 br i1 %0, label %5, label %10
1968 %6 = load float, ptr %1, align 4
1969 %7 = bitcast ptr %3 to ptr
1970 %8 = load <4 x float>, ptr %7, align 8
1971 %9 = insertelement <4 x float> %8, float %6, i64 0
1975 %11 = bitcast ptr %3 to ptr
1976 %12 = load <4 x float>, ptr %11, align 8
1980 %14 = phi <4 x float> [ %9, %5 ], [ %12, %10 ]
1981 br i1 %0, label %15, label %19
1984 %16 = load float, ptr %1, align 4
1985 %17 = bitcast <16 x i8> %2 to <4 x float>
1986 %18 = insertelement <4 x float> %17, float %16, i64 0
1990 %20 = bitcast <16 x i8> %2 to <4 x float>
1994 %22 = phi <4 x float> [ %18, %15 ], [ %20, %19 ]
1995 %23 = bitcast <4 x float> %14 to <16 x i8>
1996 %24 = bitcast <4 x float> %22 to <16 x i8>
1997 %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
1998 %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
1999 store <16 x i8> %26, ptr %3, align 8
2003 define arm_aapcs_vfpcc void @aese_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, ptr %3) nounwind {
2004 ; CHECK-FIX-NOSCHED-LABEL: aese_setf32_cond_via_val:
2005 ; CHECK-FIX-NOSCHED: @ %bb.0:
2006 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
2007 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
2008 ; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s8, s0
2009 ; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
2010 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
2011 ; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s4, s0
2012 ; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
2013 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q2, q1
2014 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q2
2015 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
2016 ; CHECK-FIX-NOSCHED-NEXT: bx lr
2018 ; CHECK-CORTEX-FIX-LABEL: aese_setf32_cond_via_val:
2019 ; CHECK-CORTEX-FIX: @ %bb.0:
2020 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
2021 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
2022 ; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s8, s0
2023 ; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
2024 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
2025 ; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s4, s0
2026 ; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
2027 ; CHECK-CORTEX-FIX-NEXT: aese.8 q2, q1
2028 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q2
2029 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2030 ; CHECK-CORTEX-FIX-NEXT: bx lr
2031 %5 = bitcast ptr %3 to ptr
2032 %6 = load <4 x float>, ptr %5, align 8
2033 %7 = insertelement <4 x float> %6, float %1, i64 0
2034 %8 = select i1 %0, <4 x float> %7, <4 x float> %6
2035 %9 = bitcast <16 x i8> %2 to <4 x float>
2036 %10 = insertelement <4 x float> %9, float %1, i64 0
2037 %11 = select i1 %0, <4 x float> %10, <4 x float> %9
2038 %12 = bitcast <4 x float> %8 to <16 x i8>
2039 %13 = bitcast <4 x float> %11 to <16 x i8>
2040 %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
2041 %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
2042 store <16 x i8> %15, ptr %3, align 8
2046 define arm_aapcs_vfpcc void @aese_setf32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
2047 ; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_ptr:
2048 ; CHECK-FIX-NOSCHED: @ %bb.0:
2049 ; CHECK-FIX-NOSCHED-NEXT: vldr s4, [r1]
2050 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
2051 ; CHECK-FIX-NOSCHED-NEXT: vstr s4, [r2]
2052 ; CHECK-FIX-NOSCHED-NEXT: bxeq lr
2053 ; CHECK-FIX-NOSCHED-NEXT: .LBB44_1:
2054 ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s0, s4
2055 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
2056 ; CHECK-FIX-NOSCHED-NEXT: .LBB44_2: @ =>This Inner Loop Header: Depth=1
2057 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
2058 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
2059 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
2060 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
2061 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB44_2
2062 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
2063 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
2064 ; CHECK-FIX-NOSCHED-NEXT: bx lr
2066 ; CHECK-CORTEX-FIX-LABEL: aese_setf32_loop_via_ptr:
2067 ; CHECK-CORTEX-FIX: @ %bb.0:
2068 ; CHECK-CORTEX-FIX-NEXT: vldr s4, [r1]
2069 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
2070 ; CHECK-CORTEX-FIX-NEXT: vstr s4, [r2]
2071 ; CHECK-CORTEX-FIX-NEXT: bxeq lr
2072 ; CHECK-CORTEX-FIX-NEXT: .LBB44_1:
2073 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2074 ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s0, s4
2075 ; CHECK-CORTEX-FIX-NEXT: .LBB44_2: @ =>This Inner Loop Header: Depth=1
2076 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
2077 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
2078 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
2079 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
2080 ; CHECK-CORTEX-FIX-NEXT: bne .LBB44_2
2081 ; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
2082 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2083 ; CHECK-CORTEX-FIX-NEXT: bx lr
2084 %5 = load float, ptr %1, align 4
2085 %6 = bitcast <16 x i8> %2 to <4 x float>
2086 %7 = insertelement <4 x float> %6, float %5, i64 0
2087 %8 = bitcast <4 x float> %7 to <16 x i8>
2088 %9 = bitcast ptr %3 to ptr
2089 store float %5, ptr %9, align 8
2090 %10 = icmp eq i32 %0, 0
2091 br i1 %10, label %14, label %11
2094 %12 = load <16 x i8>, ptr %3, align 8
2098 store <16 x i8> %19, ptr %3, align 8
2105 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
2106 %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
2107 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
2108 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
2109 %20 = add nuw i32 %17, 1
2110 %21 = icmp eq i32 %20, %0
2111 br i1 %21, label %13, label %15
2114 define arm_aapcs_vfpcc void @aese_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, ptr %3) nounwind {
2115 ; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_val:
2116 ; CHECK-FIX-NOSCHED: @ %bb.0:
2117 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
2118 ; CHECK-FIX-NOSCHED-NEXT: bxeq lr
2119 ; CHECK-FIX-NOSCHED-NEXT: .LBB45_1:
2120 ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s4, s0
2121 ; CHECK-FIX-NOSCHED-NEXT: .LBB45_2: @ =>This Inner Loop Header: Depth=1
2122 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
2123 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
2124 ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s8, s0
2125 ; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
2126 ; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
2127 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q2, q1
2128 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q2
2129 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
2130 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB45_2
2131 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
2132 ; CHECK-FIX-NOSCHED-NEXT: bx lr
2134 ; CHECK-CORTEX-FIX-LABEL: aese_setf32_loop_via_val:
2135 ; CHECK-CORTEX-FIX: @ %bb.0:
2136 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
2137 ; CHECK-CORTEX-FIX-NEXT: bxeq lr
2138 ; CHECK-CORTEX-FIX-NEXT: .LBB45_1:
2139 ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s4, s0
2140 ; CHECK-CORTEX-FIX-NEXT: .LBB45_2: @ =>This Inner Loop Header: Depth=1
2141 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
2142 ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s8, s0
2143 ; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
2144 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
2145 ; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
2146 ; CHECK-CORTEX-FIX-NEXT: aese.8 q2, q1
2147 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q2
2148 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2149 ; CHECK-CORTEX-FIX-NEXT: bne .LBB45_2
2150 ; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
2151 ; CHECK-CORTEX-FIX-NEXT: bx lr
2152 %5 = icmp eq i32 %0, 0
2153 br i1 %5, label %12, label %6
2156 %7 = bitcast <16 x i8> %2 to <4 x float>
2157 %8 = insertelement <4 x float> %7, float %1, i64 0
2158 %9 = bitcast <4 x float> %8 to <16 x i8>
2159 %10 = bitcast ptr %3 to ptr
2160 %11 = bitcast ptr %3 to ptr
2167 %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
2168 %15 = load <4 x float>, ptr %10, align 8
2169 %16 = insertelement <4 x float> %15, float %1, i64 0
2170 %17 = bitcast <4 x float> %16 to <16 x i8>
2171 store float %1, ptr %11, align 8
2172 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
2173 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
2174 store <16 x i8> %19, ptr %3, align 8
2175 %20 = add nuw i32 %14, 1
2176 %21 = icmp eq i32 %20, %0
2177 br i1 %21, label %12, label %13
2180 define arm_aapcs_vfpcc void @aesd_zero(ptr %0) nounwind {
2181 ; CHECK-FIX-LABEL: aesd_zero:
2182 ; CHECK-FIX: @ %bb.0:
2183 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
2184 ; CHECK-FIX-NEXT: vmov.i32 q9, #0x0
2185 ; CHECK-FIX-NEXT: aesd.8 q9, q8
2186 ; CHECK-FIX-NEXT: aesimc.8 q8, q9
2187 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
2188 ; CHECK-FIX-NEXT: bx lr
2189 %2 = load <16 x i8>, ptr %0, align 8
2190 %3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> zeroinitializer, <16 x i8> %2)
2191 %4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3)
2192 store <16 x i8> %4, ptr %0, align 8
2196 define arm_aapcs_vfpcc void @aesd_via_call1(ptr %0) nounwind {
2197 ; CHECK-FIX-LABEL: aesd_via_call1:
2198 ; CHECK-FIX: @ %bb.0:
2199 ; CHECK-FIX-NEXT: .save {r4, lr}
2200 ; CHECK-FIX-NEXT: push {r4, lr}
2201 ; CHECK-FIX-NEXT: mov r4, r0
2202 ; CHECK-FIX-NEXT: bl get_input
2203 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2204 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
2205 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2206 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2207 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
2208 ; CHECK-FIX-NEXT: pop {r4, pc}
2209 %2 = call arm_aapcs_vfpcc <16 x i8> @get_input()
2210 %3 = load <16 x i8>, ptr %0, align 8
2211 %4 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %2, <16 x i8> %3)
2212 %5 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %4)
2213 store <16 x i8> %5, ptr %0, align 8
2217 define arm_aapcs_vfpcc void @aesd_via_call2(half %0, ptr %1) nounwind {
2218 ; CHECK-FIX-LABEL: aesd_via_call2:
2219 ; CHECK-FIX: @ %bb.0:
2220 ; CHECK-FIX-NEXT: .save {r4, lr}
2221 ; CHECK-FIX-NEXT: push {r4, lr}
2222 ; CHECK-FIX-NEXT: mov r4, r0
2223 ; CHECK-FIX-NEXT: bl get_inputf16
2224 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2225 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
2226 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2227 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2228 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
2229 ; CHECK-FIX-NEXT: pop {r4, pc}
2230 %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0)
2231 %4 = load <16 x i8>, ptr %1, align 8
2232 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
2233 %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
2234 store <16 x i8> %6, ptr %1, align 8
2238 define arm_aapcs_vfpcc void @aesd_via_call3(float %0, ptr %1) nounwind {
2239 ; CHECK-FIX-LABEL: aesd_via_call3:
2240 ; CHECK-FIX: @ %bb.0:
2241 ; CHECK-FIX-NEXT: .save {r4, lr}
2242 ; CHECK-FIX-NEXT: push {r4, lr}
2243 ; CHECK-FIX-NEXT: mov r4, r0
2244 ; CHECK-FIX-NEXT: bl get_inputf32
2245 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2246 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
2247 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2248 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2249 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
2250 ; CHECK-FIX-NEXT: pop {r4, pc}
2251 %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0)
2252 %4 = load <16 x i8>, ptr %1, align 8
2253 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
2254 %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
2255 store <16 x i8> %6, ptr %1, align 8
2259 define arm_aapcs_vfpcc void @aesd_once_via_ptr(ptr %0, ptr %1) nounwind {
2260 ; CHECK-FIX-LABEL: aesd_once_via_ptr:
2261 ; CHECK-FIX: @ %bb.0:
2262 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
2263 ; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1]
2264 ; CHECK-FIX-NEXT: aesd.8 q9, q8
2265 ; CHECK-FIX-NEXT: aesimc.8 q8, q9
2266 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2267 ; CHECK-FIX-NEXT: bx lr
2268 %3 = load <16 x i8>, ptr %1, align 8
2269 %4 = load <16 x i8>, ptr %0, align 8
2270 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
2271 %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
2272 store <16 x i8> %6, ptr %1, align 8
2276 define arm_aapcs_vfpcc <16 x i8> @aesd_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
2277 ; CHECK-FIX-LABEL: aesd_once_via_val:
2278 ; CHECK-FIX: @ %bb.0:
2279 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2280 ; CHECK-FIX-NEXT: vorr q1, q1, q1
2281 ; CHECK-FIX-NEXT: aesd.8 q0, q1
2282 ; CHECK-FIX-NEXT: aesimc.8 q0, q0
2283 ; CHECK-FIX-NEXT: bx lr
2284 %3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %1, <16 x i8> %0)
2285 %4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3)
2289 define arm_aapcs_vfpcc void @aesd_twice_via_ptr(ptr %0, ptr %1) nounwind {
2290 ; CHECK-FIX-LABEL: aesd_twice_via_ptr:
2291 ; CHECK-FIX: @ %bb.0:
2292 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
2293 ; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1]
2294 ; CHECK-FIX-NEXT: aesd.8 q9, q8
2295 ; CHECK-FIX-NEXT: aesimc.8 q8, q9
2296 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2297 ; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r0]
2298 ; CHECK-FIX-NEXT: aesd.8 q9, q8
2299 ; CHECK-FIX-NEXT: aesimc.8 q8, q9
2300 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2301 ; CHECK-FIX-NEXT: bx lr
2302 %3 = load <16 x i8>, ptr %1, align 8
2303 %4 = load <16 x i8>, ptr %0, align 8
2304 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
2305 %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
2306 store <16 x i8> %6, ptr %1, align 8
2307 %7 = load <16 x i8>, ptr %0, align 8
2308 %8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7)
2309 %9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8)
2310 store <16 x i8> %9, ptr %1, align 8
2314 define arm_aapcs_vfpcc <16 x i8> @aesd_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
2315 ; CHECK-FIX-LABEL: aesd_twice_via_val:
2316 ; CHECK-FIX: @ %bb.0:
2317 ; CHECK-FIX-NEXT: vorr q1, q1, q1
2318 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2319 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2320 ; CHECK-FIX-NEXT: aesd.8 q1, q0
2321 ; CHECK-FIX-NEXT: aesimc.8 q8, q1
2322 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2323 ; CHECK-FIX-NEXT: aesimc.8 q0, q8
2324 ; CHECK-FIX-NEXT: bx lr
2325 %3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %1, <16 x i8> %0)
2326 %4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3)
2327 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %4, <16 x i8> %0)
2328 %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
2332 define arm_aapcs_vfpcc void @aesd_loop_via_ptr(i32 %0, ptr %1, ptr %2) nounwind {
2333 ; CHECK-FIX-NOSCHED-LABEL: aesd_loop_via_ptr:
2334 ; CHECK-FIX-NOSCHED: @ %bb.0:
2335 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
2336 ; CHECK-FIX-NOSCHED-NEXT: bxeq lr
2337 ; CHECK-FIX-NOSCHED-NEXT: .LBB54_1: @ =>This Inner Loop Header: Depth=1
2338 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
2339 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
2340 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d18, d19}, [r2]
2341 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q9, q8
2342 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q9
2343 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
2344 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB54_1
2345 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.2:
2346 ; CHECK-FIX-NOSCHED-NEXT: bx lr
2348 ; CHECK-CORTEX-FIX-LABEL: aesd_loop_via_ptr:
2349 ; CHECK-CORTEX-FIX: @ %bb.0:
2350 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
2351 ; CHECK-CORTEX-FIX-NEXT: bxeq lr
2352 ; CHECK-CORTEX-FIX-NEXT: .LBB54_1: @ =>This Inner Loop Header: Depth=1
2353 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
2354 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d18, d19}, [r2]
2355 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
2356 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q9, q8
2357 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q9
2358 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2359 ; CHECK-CORTEX-FIX-NEXT: bne .LBB54_1
2360 ; CHECK-CORTEX-FIX-NEXT: @ %bb.2:
2361 ; CHECK-CORTEX-FIX-NEXT: bx lr
2362 %4 = icmp eq i32 %0, 0
2363 br i1 %4, label %5, label %6
2369 %7 = phi i32 [ %12, %6 ], [ 0, %3 ]
2370 %8 = load <16 x i8>, ptr %2, align 8
2371 %9 = load <16 x i8>, ptr %1, align 8
2372 %10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %9)
2373 %11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10)
2374 store <16 x i8> %11, ptr %2, align 8
2375 %12 = add nuw i32 %7, 1
2376 %13 = icmp eq i32 %12, %0
2377 br i1 %13, label %5, label %6
2380 define arm_aapcs_vfpcc <16 x i8> @aesd_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind {
2381 ; CHECK-FIX-LABEL: aesd_loop_via_val:
2382 ; CHECK-FIX: @ %bb.0:
2383 ; CHECK-FIX-NEXT: vorr q1, q1, q1
2384 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2385 ; CHECK-FIX-NEXT: cmp r0, #0
2386 ; CHECK-FIX-NEXT: beq .LBB55_2
2387 ; CHECK-FIX-NEXT: .LBB55_1: @ =>This Inner Loop Header: Depth=1
2388 ; CHECK-FIX-NEXT: aesd.8 q1, q0
2389 ; CHECK-FIX-NEXT: subs r0, r0, #1
2390 ; CHECK-FIX-NEXT: aesimc.8 q1, q1
2391 ; CHECK-FIX-NEXT: bne .LBB55_1
2392 ; CHECK-FIX-NEXT: .LBB55_2:
2393 ; CHECK-FIX-NEXT: vorr q0, q1, q1
2394 ; CHECK-FIX-NEXT: bx lr
2395 %4 = icmp eq i32 %0, 0
2396 br i1 %4, label %5, label %7
2399 %6 = phi <16 x i8> [ %2, %3 ], [ %11, %7 ]
2403 %8 = phi i32 [ %12, %7 ], [ 0, %3 ]
2404 %9 = phi <16 x i8> [ %11, %7 ], [ %2, %3 ]
2405 %10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %9, <16 x i8> %1)
2406 %11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10)
2407 %12 = add nuw i32 %8, 1
2408 %13 = icmp eq i32 %12, %0
2409 br i1 %13, label %5, label %7
2412 define arm_aapcs_vfpcc void @aesd_set8_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
2413 ; CHECK-FIX-NOSCHED-LABEL: aesd_set8_via_ptr:
2414 ; CHECK-FIX-NOSCHED: @ %bb.0:
2415 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
2416 ; CHECK-FIX-NOSCHED-NEXT: ldrb r0, [r0]
2417 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
2418 ; CHECK-FIX-NOSCHED-NEXT: vmov.8 d0[0], r0
2419 ; CHECK-FIX-NOSCHED-NEXT: vmov.8 d16[0], r0
2420 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
2421 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
2422 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
2423 ; CHECK-FIX-NOSCHED-NEXT: bx lr
2425 ; CHECK-CORTEX-FIX-LABEL: aesd_set8_via_ptr:
2426 ; CHECK-CORTEX-FIX: @ %bb.0:
2427 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
2428 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
2429 ; CHECK-CORTEX-FIX-NEXT: ldrb r0, [r0]
2430 ; CHECK-CORTEX-FIX-NEXT: vmov.8 d0[0], r0
2431 ; CHECK-CORTEX-FIX-NEXT: vmov.8 d16[0], r0
2432 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
2433 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
2434 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2435 ; CHECK-CORTEX-FIX-NEXT: bx lr
2436 %4 = load i8, ptr %0, align 1
2437 %5 = load <16 x i8>, ptr %2, align 8
2438 %6 = insertelement <16 x i8> %5, i8 %4, i64 0
2439 %7 = insertelement <16 x i8> %1, i8 %4, i64 0
2440 %8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7)
2441 %9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8)
2442 store <16 x i8> %9, ptr %2, align 8
2446 define arm_aapcs_vfpcc void @aesd_set8_via_val(i8 zeroext %0, <16 x i8> %1, ptr %2) nounwind {
2447 ; CHECK-FIX-LABEL: aesd_set8_via_val:
2448 ; CHECK-FIX: @ %bb.0:
2449 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2450 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
2451 ; CHECK-FIX-NEXT: vmov.8 d0[0], r0
2452 ; CHECK-FIX-NEXT: vmov.8 d16[0], r0
2453 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2454 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2455 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2456 ; CHECK-FIX-NEXT: bx lr
2457 %4 = load <16 x i8>, ptr %2, align 8
2458 %5 = insertelement <16 x i8> %4, i8 %0, i64 0
2459 %6 = insertelement <16 x i8> %1, i8 %0, i64 0
2460 %7 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %5, <16 x i8> %6)
2461 %8 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %7)
2462 store <16 x i8> %8, ptr %2, align 8
2466 define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
2467 ; CHECK-FIX-LABEL: aesd_set8_cond_via_ptr:
2468 ; CHECK-FIX: @ %bb.0:
2469 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2470 ; CHECK-FIX-NEXT: cmp r0, #0
2471 ; CHECK-FIX-NEXT: beq .LBB58_2
2472 ; CHECK-FIX-NEXT: @ %bb.1:
2473 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2474 ; CHECK-FIX-NEXT: vld1.8 {d16[0]}, [r1]
2475 ; CHECK-FIX-NEXT: cmp r0, #0
2476 ; CHECK-FIX-NEXT: bne .LBB58_3
2477 ; CHECK-FIX-NEXT: b .LBB58_4
2478 ; CHECK-FIX-NEXT: .LBB58_2:
2479 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2480 ; CHECK-FIX-NEXT: cmp r0, #0
2481 ; CHECK-FIX-NEXT: beq .LBB58_4
2482 ; CHECK-FIX-NEXT: .LBB58_3:
2483 ; CHECK-FIX-NEXT: vld1.8 {d0[0]}, [r1]
2484 ; CHECK-FIX-NEXT: .LBB58_4:
2485 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2486 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2487 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2488 ; CHECK-FIX-NEXT: bx lr
2489 br i1 %0, label %5, label %9
2492 %6 = load i8, ptr %1, align 1
2493 %7 = load <16 x i8>, ptr %3, align 8
2494 %8 = insertelement <16 x i8> %7, i8 %6, i64 0
2498 %10 = load <16 x i8>, ptr %3, align 8
2502 %12 = phi <16 x i8> [ %8, %5 ], [ %10, %9 ]
2503 br i1 %0, label %13, label %16
2506 %14 = load i8, ptr %1, align 1
2507 %15 = insertelement <16 x i8> %2, i8 %14, i64 0
2511 %17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ]
2512 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %17)
2513 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
2514 store <16 x i8> %19, ptr %3, align 8
2518 define arm_aapcs_vfpcc void @aesd_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
2519 ; CHECK-FIX-LABEL: aesd_set8_cond_via_val:
2520 ; CHECK-FIX: @ %bb.0:
2521 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2522 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2523 ; CHECK-FIX-NEXT: cmp r0, #0
2524 ; CHECK-FIX-NEXT: beq .LBB59_2
2525 ; CHECK-FIX-NEXT: @ %bb.1:
2526 ; CHECK-FIX-NEXT: vmov.8 d16[0], r1
2527 ; CHECK-FIX-NEXT: .LBB59_2: @ %select.end
2528 ; CHECK-FIX-NEXT: cmp r0, #0
2529 ; CHECK-FIX-NEXT: beq .LBB59_4
2530 ; CHECK-FIX-NEXT: @ %bb.3:
2531 ; CHECK-FIX-NEXT: vmov.8 d0[0], r1
2532 ; CHECK-FIX-NEXT: .LBB59_4: @ %select.end2
2533 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2534 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2535 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2536 ; CHECK-FIX-NEXT: bx lr
2537 %5 = load <16 x i8>, ptr %3, align 8
2538 %6 = insertelement <16 x i8> %5, i8 %1, i64 0
2539 %7 = select i1 %0, <16 x i8> %6, <16 x i8> %5
2540 %8 = insertelement <16 x i8> %2, i8 %1, i64 0
2541 %9 = select i1 %0, <16 x i8> %8, <16 x i8> %2
2542 %10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %9)
2543 %11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10)
2544 store <16 x i8> %11, ptr %3, align 8
2548 define arm_aapcs_vfpcc void @aesd_set8_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
2549 ; CHECK-FIX-LABEL: aesd_set8_loop_via_ptr:
2550 ; CHECK-FIX: @ %bb.0:
2551 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2552 ; CHECK-FIX-NEXT: ldrb r1, [r1]
2553 ; CHECK-FIX-NEXT: cmp r0, #0
2554 ; CHECK-FIX-NEXT: strb r1, [r2]
2555 ; CHECK-FIX-NEXT: bxeq lr
2556 ; CHECK-FIX-NEXT: .LBB60_1:
2557 ; CHECK-FIX-NEXT: vmov.8 d0[0], r1
2558 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2559 ; CHECK-FIX-NEXT: .LBB60_2: @ =>This Inner Loop Header: Depth=1
2560 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2561 ; CHECK-FIX-NEXT: subs r0, r0, #1
2562 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2563 ; CHECK-FIX-NEXT: bne .LBB60_2
2564 ; CHECK-FIX-NEXT: @ %bb.3:
2565 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2566 ; CHECK-FIX-NEXT: bx lr
2567 %5 = load i8, ptr %1, align 1
2568 %6 = insertelement <16 x i8> %2, i8 %5, i64 0
2569 %7 = getelementptr inbounds <16 x i8>, ptr %3, i32 0, i32 0
2570 store i8 %5, ptr %7, align 8
2571 %8 = icmp eq i32 %0, 0
2572 br i1 %8, label %12, label %9
2575 %10 = load <16 x i8>, ptr %3, align 8
2579 store <16 x i8> %17, ptr %3, align 8
2586 %14 = phi <16 x i8> [ %10, %9 ], [ %17, %13 ]
2587 %15 = phi i32 [ 0, %9 ], [ %18, %13 ]
2588 %16 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %14, <16 x i8> %6)
2589 %17 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %16)
2590 %18 = add nuw i32 %15, 1
2591 %19 = icmp eq i32 %18, %0
2592 br i1 %19, label %11, label %13
2595 define arm_aapcs_vfpcc void @aesd_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
2596 ; CHECK-FIX-LABEL: aesd_set8_loop_via_val:
2597 ; CHECK-FIX: @ %bb.0:
2598 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2599 ; CHECK-FIX-NEXT: cmp r0, #0
2600 ; CHECK-FIX-NEXT: bxeq lr
2601 ; CHECK-FIX-NEXT: .LBB61_1:
2602 ; CHECK-FIX-NEXT: vmov.8 d0[0], r1
2603 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2604 ; CHECK-FIX-NEXT: .LBB61_2: @ =>This Inner Loop Header: Depth=1
2605 ; CHECK-FIX-NEXT: vmov.8 d16[0], r1
2606 ; CHECK-FIX-NEXT: subs r0, r0, #1
2607 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2608 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2609 ; CHECK-FIX-NEXT: bne .LBB61_2
2610 ; CHECK-FIX-NEXT: @ %bb.3:
2611 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2612 ; CHECK-FIX-NEXT: bx lr
2613 %5 = icmp eq i32 %0, 0
2614 br i1 %5, label %10, label %6
2617 %7 = insertelement <16 x i8> %2, i8 %1, i64 0
2618 %8 = load <16 x i8>, ptr %3, align 8
2622 store <16 x i8> %16, ptr %3, align 8
2629 %12 = phi <16 x i8> [ %8, %6 ], [ %16, %11 ]
2630 %13 = phi i32 [ 0, %6 ], [ %17, %11 ]
2631 %14 = insertelement <16 x i8> %12, i8 %1, i64 0
2632 %15 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %14, <16 x i8> %7)
2633 %16 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %15)
2634 %17 = add nuw i32 %13, 1
2635 %18 = icmp eq i32 %17, %0
2636 br i1 %18, label %9, label %11
2639 define arm_aapcs_vfpcc void @aesd_set16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
2640 ; CHECK-FIX-NOSCHED-LABEL: aesd_set16_via_ptr:
2641 ; CHECK-FIX-NOSCHED: @ %bb.0:
2642 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
2643 ; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
2644 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
2645 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
2646 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0
2647 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
2648 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
2649 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
2650 ; CHECK-FIX-NOSCHED-NEXT: bx lr
2652 ; CHECK-CORTEX-FIX-LABEL: aesd_set16_via_ptr:
2653 ; CHECK-CORTEX-FIX: @ %bb.0:
2654 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
2655 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
2656 ; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
2657 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
2658 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0
2659 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
2660 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
2661 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2662 ; CHECK-CORTEX-FIX-NEXT: bx lr
2663 %4 = load i16, ptr %0, align 2
2664 %5 = bitcast ptr %2 to ptr
2665 %6 = load <8 x i16>, ptr %5, align 8
2666 %7 = insertelement <8 x i16> %6, i16 %4, i64 0
2667 %8 = bitcast <8 x i16> %7 to <16 x i8>
2668 %9 = bitcast <16 x i8> %1 to <8 x i16>
2669 %10 = insertelement <8 x i16> %9, i16 %4, i64 0
2670 %11 = bitcast <8 x i16> %10 to <16 x i8>
2671 %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
2672 %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
2673 store <16 x i8> %13, ptr %2, align 8
2677 define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, ptr %2) nounwind {
2678 ; CHECK-FIX-LABEL: aesd_set16_via_val:
2679 ; CHECK-FIX: @ %bb.0:
2680 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2681 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
2682 ; CHECK-FIX-NEXT: vmov.16 d0[0], r0
2683 ; CHECK-FIX-NEXT: vmov.16 d16[0], r0
2684 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2685 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2686 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2687 ; CHECK-FIX-NEXT: bx lr
2688 %4 = bitcast ptr %2 to ptr
2689 %5 = load <8 x i16>, ptr %4, align 8
2690 %6 = insertelement <8 x i16> %5, i16 %0, i64 0
2691 %7 = bitcast <8 x i16> %6 to <16 x i8>
2692 %8 = bitcast <16 x i8> %1 to <8 x i16>
2693 %9 = insertelement <8 x i16> %8, i16 %0, i64 0
2694 %10 = bitcast <8 x i16> %9 to <16 x i8>
2695 %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
2696 %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
2697 store <16 x i8> %12, ptr %2, align 8
2701 define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
2702 ; CHECK-FIX-LABEL: aesd_set16_cond_via_ptr:
2703 ; CHECK-FIX: @ %bb.0:
2704 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2705 ; CHECK-FIX-NEXT: cmp r0, #0
2706 ; CHECK-FIX-NEXT: beq .LBB64_2
2707 ; CHECK-FIX-NEXT: @ %bb.1:
2708 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2709 ; CHECK-FIX-NEXT: vld1.16 {d16[0]}, [r1:16]
2710 ; CHECK-FIX-NEXT: cmp r0, #0
2711 ; CHECK-FIX-NEXT: bne .LBB64_3
2712 ; CHECK-FIX-NEXT: b .LBB64_4
2713 ; CHECK-FIX-NEXT: .LBB64_2:
2714 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2715 ; CHECK-FIX-NEXT: cmp r0, #0
2716 ; CHECK-FIX-NEXT: beq .LBB64_4
2717 ; CHECK-FIX-NEXT: .LBB64_3:
2718 ; CHECK-FIX-NEXT: vld1.16 {d0[0]}, [r1:16]
2719 ; CHECK-FIX-NEXT: .LBB64_4:
2720 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2721 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2722 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2723 ; CHECK-FIX-NEXT: bx lr
2724 br i1 %0, label %5, label %10
2727 %6 = load i16, ptr %1, align 2
2728 %7 = bitcast ptr %3 to ptr
2729 %8 = load <8 x i16>, ptr %7, align 8
2730 %9 = insertelement <8 x i16> %8, i16 %6, i64 0
2734 %11 = bitcast ptr %3 to ptr
2735 %12 = load <8 x i16>, ptr %11, align 8
2739 %14 = phi <8 x i16> [ %9, %5 ], [ %12, %10 ]
2740 br i1 %0, label %15, label %19
2743 %16 = load i16, ptr %1, align 2
2744 %17 = bitcast <16 x i8> %2 to <8 x i16>
2745 %18 = insertelement <8 x i16> %17, i16 %16, i64 0
2749 %20 = bitcast <16 x i8> %2 to <8 x i16>
2753 %22 = phi <8 x i16> [ %18, %15 ], [ %20, %19 ]
2754 %23 = bitcast <8 x i16> %14 to <16 x i8>
2755 %24 = bitcast <8 x i16> %22 to <16 x i8>
2756 %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
2757 %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
2758 store <16 x i8> %26, ptr %3, align 8
2762 define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
2763 ; CHECK-FIX-LABEL: aesd_set16_cond_via_val:
2764 ; CHECK-FIX: @ %bb.0:
2765 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2766 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2767 ; CHECK-FIX-NEXT: cmp r0, #0
2768 ; CHECK-FIX-NEXT: beq .LBB65_2
2769 ; CHECK-FIX-NEXT: @ %bb.1:
2770 ; CHECK-FIX-NEXT: vmov.16 d16[0], r1
2771 ; CHECK-FIX-NEXT: .LBB65_2: @ %select.end
2772 ; CHECK-FIX-NEXT: cmp r0, #0
2773 ; CHECK-FIX-NEXT: beq .LBB65_4
2774 ; CHECK-FIX-NEXT: @ %bb.3:
2775 ; CHECK-FIX-NEXT: vmov.16 d0[0], r1
2776 ; CHECK-FIX-NEXT: .LBB65_4: @ %select.end2
2777 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2778 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2779 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2780 ; CHECK-FIX-NEXT: bx lr
2781 %5 = bitcast ptr %3 to ptr
2782 %6 = load <8 x i16>, ptr %5, align 8
2783 %7 = insertelement <8 x i16> %6, i16 %1, i64 0
2784 %8 = select i1 %0, <8 x i16> %7, <8 x i16> %6
2785 %9 = bitcast <16 x i8> %2 to <8 x i16>
2786 %10 = insertelement <8 x i16> %9, i16 %1, i64 0
2787 %11 = select i1 %0, <8 x i16> %10, <8 x i16> %9
2788 %12 = bitcast <8 x i16> %8 to <16 x i8>
2789 %13 = bitcast <8 x i16> %11 to <16 x i8>
2790 %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
2791 %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
2792 store <16 x i8> %15, ptr %3, align 8
2796 define arm_aapcs_vfpcc void @aesd_set16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
2797 ; CHECK-FIX-LABEL: aesd_set16_loop_via_ptr:
2798 ; CHECK-FIX: @ %bb.0:
2799 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2800 ; CHECK-FIX-NEXT: ldrh r1, [r1]
2801 ; CHECK-FIX-NEXT: cmp r0, #0
2802 ; CHECK-FIX-NEXT: strh r1, [r2]
2803 ; CHECK-FIX-NEXT: bxeq lr
2804 ; CHECK-FIX-NEXT: .LBB66_1:
2805 ; CHECK-FIX-NEXT: vmov.16 d0[0], r1
2806 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2807 ; CHECK-FIX-NEXT: .LBB66_2: @ =>This Inner Loop Header: Depth=1
2808 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2809 ; CHECK-FIX-NEXT: subs r0, r0, #1
2810 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2811 ; CHECK-FIX-NEXT: bne .LBB66_2
2812 ; CHECK-FIX-NEXT: @ %bb.3:
2813 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2814 ; CHECK-FIX-NEXT: bx lr
2815 %5 = load i16, ptr %1, align 2
2816 %6 = bitcast <16 x i8> %2 to <8 x i16>
2817 %7 = insertelement <8 x i16> %6, i16 %5, i64 0
2818 %8 = bitcast <8 x i16> %7 to <16 x i8>
2819 %9 = bitcast ptr %3 to ptr
2820 store i16 %5, ptr %9, align 8
2821 %10 = icmp eq i32 %0, 0
2822 br i1 %10, label %14, label %11
2825 %12 = load <16 x i8>, ptr %3, align 8
2829 store <16 x i8> %19, ptr %3, align 8
2836 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
2837 %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
2838 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
2839 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
2840 %20 = add nuw i32 %17, 1
2841 %21 = icmp eq i32 %20, %0
2842 br i1 %21, label %13, label %15
2845 define arm_aapcs_vfpcc void @aesd_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
2846 ; CHECK-FIX-LABEL: aesd_set16_loop_via_val:
2847 ; CHECK-FIX: @ %bb.0:
2848 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2849 ; CHECK-FIX-NEXT: cmp r0, #0
2850 ; CHECK-FIX-NEXT: bxeq lr
2851 ; CHECK-FIX-NEXT: .LBB67_1:
2852 ; CHECK-FIX-NEXT: vmov.16 d0[0], r1
2853 ; CHECK-FIX-NEXT: .LBB67_2: @ =>This Inner Loop Header: Depth=1
2854 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2855 ; CHECK-FIX-NEXT: subs r0, r0, #1
2856 ; CHECK-FIX-NEXT: vmov.16 d16[0], r1
2857 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2858 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2859 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2860 ; CHECK-FIX-NEXT: bne .LBB67_2
2861 ; CHECK-FIX-NEXT: @ %bb.3:
2862 ; CHECK-FIX-NEXT: bx lr
2863 %5 = icmp eq i32 %0, 0
2864 br i1 %5, label %12, label %6
2867 %7 = bitcast <16 x i8> %2 to <8 x i16>
2868 %8 = insertelement <8 x i16> %7, i16 %1, i64 0
2869 %9 = bitcast <8 x i16> %8 to <16 x i8>
2870 %10 = bitcast ptr %3 to ptr
2871 %11 = bitcast ptr %3 to ptr
2878 %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
2879 %15 = load <8 x i16>, ptr %10, align 8
2880 %16 = insertelement <8 x i16> %15, i16 %1, i64 0
2881 %17 = bitcast <8 x i16> %16 to <16 x i8>
2882 store i16 %1, ptr %11, align 8
2883 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
2884 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
2885 store <16 x i8> %19, ptr %3, align 8
2886 %20 = add nuw i32 %14, 1
2887 %21 = icmp eq i32 %20, %0
2888 br i1 %21, label %12, label %13
2891 define arm_aapcs_vfpcc void @aesd_set32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
2892 ; CHECK-FIX-NOSCHED-LABEL: aesd_set32_via_ptr:
2893 ; CHECK-FIX-NOSCHED: @ %bb.0:
2894 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
2895 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [r0]
2896 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
2897 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d0[0], r0
2898 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
2899 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
2900 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
2901 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
2902 ; CHECK-FIX-NOSCHED-NEXT: bx lr
2904 ; CHECK-CORTEX-FIX-LABEL: aesd_set32_via_ptr:
2905 ; CHECK-CORTEX-FIX: @ %bb.0:
2906 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
2907 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
2908 ; CHECK-CORTEX-FIX-NEXT: ldr r0, [r0]
2909 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d0[0], r0
2910 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r0
2911 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
2912 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
2913 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2914 ; CHECK-CORTEX-FIX-NEXT: bx lr
2915 %4 = load i32, ptr %0, align 4
2916 %5 = bitcast ptr %2 to ptr
2917 %6 = load <4 x i32>, ptr %5, align 8
2918 %7 = insertelement <4 x i32> %6, i32 %4, i64 0
2919 %8 = bitcast <4 x i32> %7 to <16 x i8>
2920 %9 = bitcast <16 x i8> %1 to <4 x i32>
2921 %10 = insertelement <4 x i32> %9, i32 %4, i64 0
2922 %11 = bitcast <4 x i32> %10 to <16 x i8>
2923 %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
2924 %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
2925 store <16 x i8> %13, ptr %2, align 8
2929 define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, ptr %2) nounwind {
2930 ; CHECK-FIX-LABEL: aesd_set32_via_val:
2931 ; CHECK-FIX: @ %bb.0:
2932 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2933 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
2934 ; CHECK-FIX-NEXT: vmov.32 d0[0], r0
2935 ; CHECK-FIX-NEXT: vmov.32 d16[0], r0
2936 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2937 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2938 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2939 ; CHECK-FIX-NEXT: bx lr
2940 %4 = bitcast ptr %2 to ptr
2941 %5 = load <4 x i32>, ptr %4, align 8
2942 %6 = insertelement <4 x i32> %5, i32 %0, i64 0
2943 %7 = bitcast <4 x i32> %6 to <16 x i8>
2944 %8 = bitcast <16 x i8> %1 to <4 x i32>
2945 %9 = insertelement <4 x i32> %8, i32 %0, i64 0
2946 %10 = bitcast <4 x i32> %9 to <16 x i8>
2947 %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
2948 %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
2949 store <16 x i8> %12, ptr %2, align 8
2953 define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
2954 ; CHECK-FIX-LABEL: aesd_set32_cond_via_ptr:
2955 ; CHECK-FIX: @ %bb.0:
2956 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2957 ; CHECK-FIX-NEXT: cmp r0, #0
2958 ; CHECK-FIX-NEXT: beq .LBB70_2
2959 ; CHECK-FIX-NEXT: @ %bb.1:
2960 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2961 ; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
2962 ; CHECK-FIX-NEXT: cmp r0, #0
2963 ; CHECK-FIX-NEXT: bne .LBB70_3
2964 ; CHECK-FIX-NEXT: b .LBB70_4
2965 ; CHECK-FIX-NEXT: .LBB70_2:
2966 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2967 ; CHECK-FIX-NEXT: cmp r0, #0
2968 ; CHECK-FIX-NEXT: beq .LBB70_4
2969 ; CHECK-FIX-NEXT: .LBB70_3:
2970 ; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
2971 ; CHECK-FIX-NEXT: .LBB70_4:
2972 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2973 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2974 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2975 ; CHECK-FIX-NEXT: bx lr
2976 br i1 %0, label %5, label %10
2979 %6 = load i32, ptr %1, align 4
2980 %7 = bitcast ptr %3 to ptr
2981 %8 = load <4 x i32>, ptr %7, align 8
2982 %9 = insertelement <4 x i32> %8, i32 %6, i64 0
2986 %11 = bitcast ptr %3 to ptr
2987 %12 = load <4 x i32>, ptr %11, align 8
2991 %14 = phi <4 x i32> [ %9, %5 ], [ %12, %10 ]
2992 br i1 %0, label %15, label %19
2995 %16 = load i32, ptr %1, align 4
2996 %17 = bitcast <16 x i8> %2 to <4 x i32>
2997 %18 = insertelement <4 x i32> %17, i32 %16, i64 0
3001 %20 = bitcast <16 x i8> %2 to <4 x i32>
3005 %22 = phi <4 x i32> [ %18, %15 ], [ %20, %19 ]
3006 %23 = bitcast <4 x i32> %14 to <16 x i8>
3007 %24 = bitcast <4 x i32> %22 to <16 x i8>
3008 %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
3009 %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
3010 store <16 x i8> %26, ptr %3, align 8
3014 define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, ptr %3) nounwind {
3015 ; CHECK-FIX-LABEL: aesd_set32_cond_via_val:
3016 ; CHECK-FIX: @ %bb.0:
3017 ; CHECK-FIX-NEXT: vorr q0, q0, q0
3018 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3019 ; CHECK-FIX-NEXT: cmp r0, #0
3020 ; CHECK-FIX-NEXT: beq .LBB71_2
3021 ; CHECK-FIX-NEXT: @ %bb.1:
3022 ; CHECK-FIX-NEXT: vmov.32 d16[0], r1
3023 ; CHECK-FIX-NEXT: .LBB71_2: @ %select.end
3024 ; CHECK-FIX-NEXT: cmp r0, #0
3025 ; CHECK-FIX-NEXT: beq .LBB71_4
3026 ; CHECK-FIX-NEXT: @ %bb.3:
3027 ; CHECK-FIX-NEXT: vmov.32 d0[0], r1
3028 ; CHECK-FIX-NEXT: .LBB71_4: @ %select.end2
3029 ; CHECK-FIX-NEXT: aesd.8 q8, q0
3030 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
3031 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
3032 ; CHECK-FIX-NEXT: bx lr
3033 %5 = bitcast ptr %3 to ptr
3034 %6 = load <4 x i32>, ptr %5, align 8
3035 %7 = insertelement <4 x i32> %6, i32 %1, i64 0
3036 %8 = select i1 %0, <4 x i32> %7, <4 x i32> %6
3037 %9 = bitcast <16 x i8> %2 to <4 x i32>
3038 %10 = insertelement <4 x i32> %9, i32 %1, i64 0
3039 %11 = select i1 %0, <4 x i32> %10, <4 x i32> %9
3040 %12 = bitcast <4 x i32> %8 to <16 x i8>
3041 %13 = bitcast <4 x i32> %11 to <16 x i8>
3042 %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
3043 %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
3044 store <16 x i8> %15, ptr %3, align 8
3048 define arm_aapcs_vfpcc void @aesd_set32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
3049 ; CHECK-FIX-LABEL: aesd_set32_loop_via_ptr:
3050 ; CHECK-FIX: @ %bb.0:
3051 ; CHECK-FIX-NEXT: vorr q0, q0, q0
3052 ; CHECK-FIX-NEXT: ldr r1, [r1]
3053 ; CHECK-FIX-NEXT: cmp r0, #0
3054 ; CHECK-FIX-NEXT: str r1, [r2]
3055 ; CHECK-FIX-NEXT: bxeq lr
3056 ; CHECK-FIX-NEXT: .LBB72_1:
3057 ; CHECK-FIX-NEXT: vmov.32 d0[0], r1
3058 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3059 ; CHECK-FIX-NEXT: .LBB72_2: @ =>This Inner Loop Header: Depth=1
3060 ; CHECK-FIX-NEXT: aesd.8 q8, q0
3061 ; CHECK-FIX-NEXT: subs r0, r0, #1
3062 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
3063 ; CHECK-FIX-NEXT: bne .LBB72_2
3064 ; CHECK-FIX-NEXT: @ %bb.3:
3065 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
3066 ; CHECK-FIX-NEXT: bx lr
3067 %5 = load i32, ptr %1, align 4
3068 %6 = bitcast <16 x i8> %2 to <4 x i32>
3069 %7 = insertelement <4 x i32> %6, i32 %5, i64 0
3070 %8 = bitcast <4 x i32> %7 to <16 x i8>
3071 %9 = bitcast ptr %3 to ptr
3072 store i32 %5, ptr %9, align 8
3073 %10 = icmp eq i32 %0, 0
3074 br i1 %10, label %14, label %11
3077 %12 = load <16 x i8>, ptr %3, align 8
3081 store <16 x i8> %19, ptr %3, align 8
3088 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
3089 %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
3090 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
3091 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
3092 %20 = add nuw i32 %17, 1
3093 %21 = icmp eq i32 %20, %0
3094 br i1 %21, label %13, label %15
3097 define arm_aapcs_vfpcc void @aesd_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, ptr %3) nounwind {
3098 ; CHECK-FIX-LABEL: aesd_set32_loop_via_val:
3099 ; CHECK-FIX: @ %bb.0:
3100 ; CHECK-FIX-NEXT: vorr q0, q0, q0
3101 ; CHECK-FIX-NEXT: cmp r0, #0
3102 ; CHECK-FIX-NEXT: bxeq lr
3103 ; CHECK-FIX-NEXT: .LBB73_1:
3104 ; CHECK-FIX-NEXT: vmov.32 d0[0], r1
3105 ; CHECK-FIX-NEXT: .LBB73_2: @ =>This Inner Loop Header: Depth=1
3106 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3107 ; CHECK-FIX-NEXT: subs r0, r0, #1
3108 ; CHECK-FIX-NEXT: vmov.32 d16[0], r1
3109 ; CHECK-FIX-NEXT: aesd.8 q8, q0
3110 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
3111 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
3112 ; CHECK-FIX-NEXT: bne .LBB73_2
3113 ; CHECK-FIX-NEXT: @ %bb.3:
3114 ; CHECK-FIX-NEXT: bx lr
3115 %5 = icmp eq i32 %0, 0
3116 br i1 %5, label %12, label %6
3119 %7 = bitcast <16 x i8> %2 to <4 x i32>
3120 %8 = insertelement <4 x i32> %7, i32 %1, i64 0
3121 %9 = bitcast <4 x i32> %8 to <16 x i8>
3122 %10 = bitcast ptr %3 to ptr
3123 %11 = bitcast ptr %3 to ptr
3130 %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
3131 %15 = load <4 x i32>, ptr %10, align 8
3132 %16 = insertelement <4 x i32> %15, i32 %1, i64 0
3133 %17 = bitcast <4 x i32> %16 to <16 x i8>
3134 store i32 %1, ptr %11, align 8
3135 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
3136 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
3137 store <16 x i8> %19, ptr %3, align 8
3138 %20 = add nuw i32 %14, 1
3139 %21 = icmp eq i32 %20, %0
3140 br i1 %21, label %12, label %13
3143 define arm_aapcs_vfpcc void @aesd_set64_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
3144 ; CHECK-FIX-NOSCHED-LABEL: aesd_set64_via_ptr:
3145 ; CHECK-FIX-NOSCHED: @ %bb.0:
3146 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
3147 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
3148 ; CHECK-FIX-NOSCHED-NEXT: vldr d0, [r0]
3149 ; CHECK-FIX-NOSCHED-NEXT: vorr d16, d0, d0
3150 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
3151 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
3152 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
3153 ; CHECK-FIX-NOSCHED-NEXT: bx lr
3155 ; CHECK-CORTEX-FIX-LABEL: aesd_set64_via_ptr:
3156 ; CHECK-CORTEX-FIX: @ %bb.0:
3157 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
3158 ; CHECK-CORTEX-FIX-NEXT: vldr d0, [r0]
3159 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
3160 ; CHECK-CORTEX-FIX-NEXT: vorr d16, d0, d0
3161 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
3162 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
3163 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
3164 ; CHECK-CORTEX-FIX-NEXT: bx lr
3165 %4 = load i64, ptr %0, align 8
3166 %5 = bitcast ptr %2 to ptr
3167 %6 = load <2 x i64>, ptr %5, align 8
3168 %7 = insertelement <2 x i64> %6, i64 %4, i64 0
3169 %8 = bitcast <2 x i64> %7 to <16 x i8>
3170 %9 = bitcast <16 x i8> %1 to <2 x i64>
3171 %10 = insertelement <2 x i64> %9, i64 %4, i64 0
3172 %11 = bitcast <2 x i64> %10 to <16 x i8>
3173 %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
3174 %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
3175 store <16 x i8> %13, ptr %2, align 8
3179 define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, ptr %2) nounwind {
3180 ; CHECK-FIX-LABEL: aesd_set64_via_val:
3181 ; CHECK-FIX: @ %bb.0:
3182 ; CHECK-FIX-NEXT: vorr q0, q0, q0
3183 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3184 ; CHECK-FIX-NEXT: vmov.32 d0[0], r0
3185 ; CHECK-FIX-NEXT: vmov.32 d16[0], r0
3186 ; CHECK-FIX-NEXT: vmov.32 d0[1], r1
3187 ; CHECK-FIX-NEXT: vmov.32 d16[1], r1
3188 ; CHECK-FIX-NEXT: aesd.8 q8, q0
3189 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
3190 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
3191 ; CHECK-FIX-NEXT: bx lr
3192 %4 = bitcast ptr %2 to ptr
3193 %5 = load <2 x i64>, ptr %4, align 8
3194 %6 = insertelement <2 x i64> %5, i64 %0, i64 0
3195 %7 = bitcast <2 x i64> %6 to <16 x i8>
3196 %8 = bitcast <16 x i8> %1 to <2 x i64>
3197 %9 = insertelement <2 x i64> %8, i64 %0, i64 0
3198 %10 = bitcast <2 x i64> %9 to <16 x i8>
3199 %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
3200 %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
3201 store <16 x i8> %12, ptr %2, align 8
3205 define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
3206 ; CHECK-FIX-NOSCHED-LABEL: aesd_set64_cond_via_ptr:
3207 ; CHECK-FIX-NOSCHED: @ %bb.0:
3208 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3209 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB76_2
3210 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
3211 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
3212 ; CHECK-FIX-NOSCHED-NEXT: vldr d16, [r1]
3213 ; CHECK-FIX-NOSCHED-NEXT: b .LBB76_3
3214 ; CHECK-FIX-NOSCHED-NEXT: .LBB76_2:
3215 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
3216 ; CHECK-FIX-NOSCHED-NEXT: .LBB76_3:
3217 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3218 ; CHECK-FIX-NOSCHED-NEXT: vldrne d0, [r1]
3219 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
3220 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
3221 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
3222 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
3223 ; CHECK-FIX-NOSCHED-NEXT: bx lr
3225 ; CHECK-CORTEX-FIX-LABEL: aesd_set64_cond_via_ptr:
3226 ; CHECK-CORTEX-FIX: @ %bb.0:
3227 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
3228 ; CHECK-CORTEX-FIX-NEXT: beq .LBB76_2
3229 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
3230 ; CHECK-CORTEX-FIX-NEXT: vldr d18, [r1]
3231 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3232 ; CHECK-CORTEX-FIX-NEXT: vorr d16, d18, d18
3233 ; CHECK-CORTEX-FIX-NEXT: b .LBB76_3
3234 ; CHECK-CORTEX-FIX-NEXT: .LBB76_2:
3235 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3236 ; CHECK-CORTEX-FIX-NEXT: .LBB76_3:
3237 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
3238 ; CHECK-CORTEX-FIX-NEXT: vldrne d0, [r1]
3239 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
3240 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
3241 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
3242 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
3243 ; CHECK-CORTEX-FIX-NEXT: bx lr
3244 br i1 %0, label %5, label %10
3247 %6 = load i64, ptr %1, align 8
3248 %7 = bitcast ptr %3 to ptr
3249 %8 = load <2 x i64>, ptr %7, align 8
3250 %9 = insertelement <2 x i64> %8, i64 %6, i64 0
3254 %11 = bitcast ptr %3 to ptr
3255 %12 = load <2 x i64>, ptr %11, align 8
3259 %14 = phi <2 x i64> [ %9, %5 ], [ %12, %10 ]
3260 br i1 %0, label %15, label %19
3263 %16 = load i64, ptr %1, align 8
3264 %17 = bitcast <16 x i8> %2 to <2 x i64>
3265 %18 = insertelement <2 x i64> %17, i64 %16, i64 0
3269 %20 = bitcast <16 x i8> %2 to <2 x i64>
3273 %22 = phi <2 x i64> [ %18, %15 ], [ %20, %19 ]
3274 %23 = bitcast <2 x i64> %14 to <16 x i8>
3275 %24 = bitcast <2 x i64> %22 to <16 x i8>
3276 %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
3277 %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
3278 store <16 x i8> %26, ptr %3, align 8
3282 define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, ptr %3) nounwind {
3283 ; CHECK-FIX-LABEL: aesd_set64_cond_via_val:
3284 ; CHECK-FIX: @ %bb.0:
3285 ; CHECK-FIX-NEXT: vorr q0, q0, q0
3286 ; CHECK-FIX-NEXT: ldr r1, [sp]
3287 ; CHECK-FIX-NEXT: cmp r0, #0
3288 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
3289 ; CHECK-FIX-NEXT: beq .LBB77_2
3290 ; CHECK-FIX-NEXT: @ %bb.1:
3291 ; CHECK-FIX-NEXT: vmov.32 d16[0], r2
3292 ; CHECK-FIX-NEXT: vmov.32 d16[1], r3
3293 ; CHECK-FIX-NEXT: .LBB77_2: @ %select.end
3294 ; CHECK-FIX-NEXT: cmp r0, #0
3295 ; CHECK-FIX-NEXT: beq .LBB77_4
3296 ; CHECK-FIX-NEXT: @ %bb.3:
3297 ; CHECK-FIX-NEXT: vmov.32 d0[0], r2
3298 ; CHECK-FIX-NEXT: vmov.32 d0[1], r3
3299 ; CHECK-FIX-NEXT: .LBB77_4: @ %select.end2
3300 ; CHECK-FIX-NEXT: aesd.8 q8, q0
3301 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
3302 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
3303 ; CHECK-FIX-NEXT: bx lr
3304 %5 = bitcast ptr %3 to ptr
3305 %6 = load <2 x i64>, ptr %5, align 8
3306 %7 = insertelement <2 x i64> %6, i64 %1, i64 0
3307 %8 = select i1 %0, <2 x i64> %7, <2 x i64> %6
3308 %9 = bitcast <16 x i8> %2 to <2 x i64>
3309 %10 = insertelement <2 x i64> %9, i64 %1, i64 0
3310 %11 = select i1 %0, <2 x i64> %10, <2 x i64> %9
3311 %12 = bitcast <2 x i64> %8 to <16 x i8>
3312 %13 = bitcast <2 x i64> %11 to <16 x i8>
3313 %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
3314 %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
3315 store <16 x i8> %15, ptr %3, align 8
3319 define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
3320 ; CHECK-FIX-NOSCHED-LABEL: aesd_set64_loop_via_ptr:
3321 ; CHECK-FIX-NOSCHED: @ %bb.0:
3322 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
3323 ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r11, lr}
3324 ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r11, lr}
3325 ; CHECK-FIX-NOSCHED-NEXT: ldrd r4, r5, [r1]
3326 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3327 ; CHECK-FIX-NOSCHED-NEXT: strd r4, r5, [r2]
3328 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB78_4
3329 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
3330 ; CHECK-FIX-NOSCHED-NEXT: vmov d0, r4, r5
3331 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
3332 ; CHECK-FIX-NOSCHED-NEXT: .LBB78_2: @ =>This Inner Loop Header: Depth=1
3333 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
3334 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
3335 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
3336 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB78_2
3337 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
3338 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
3339 ; CHECK-FIX-NOSCHED-NEXT: .LBB78_4:
3340 ; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r11, pc}
3342 ; CHECK-CORTEX-FIX-LABEL: aesd_set64_loop_via_ptr:
3343 ; CHECK-CORTEX-FIX: @ %bb.0:
3344 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
3345 ; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r11, lr}
3346 ; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r11, lr}
3347 ; CHECK-CORTEX-FIX-NEXT: ldrd r4, r5, [r1]
3348 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
3349 ; CHECK-CORTEX-FIX-NEXT: strd r4, r5, [r2]
3350 ; CHECK-CORTEX-FIX-NEXT: popeq {r4, r5, r11, pc}
3351 ; CHECK-CORTEX-FIX-NEXT: .LBB78_1:
3352 ; CHECK-CORTEX-FIX-NEXT: vmov d0, r4, r5
3353 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3354 ; CHECK-CORTEX-FIX-NEXT: .LBB78_2: @ =>This Inner Loop Header: Depth=1
3355 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
3356 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
3357 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
3358 ; CHECK-CORTEX-FIX-NEXT: bne .LBB78_2
3359 ; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
3360 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
3361 ; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r11, pc}
3362 %5 = load i64, ptr %1, align 8
3363 %6 = bitcast <16 x i8> %2 to <2 x i64>
3364 %7 = insertelement <2 x i64> %6, i64 %5, i64 0
3365 %8 = bitcast <2 x i64> %7 to <16 x i8>
3366 %9 = bitcast ptr %3 to ptr
3367 store i64 %5, ptr %9, align 8
3368 %10 = icmp eq i32 %0, 0
3369 br i1 %10, label %14, label %11
3372 %12 = load <16 x i8>, ptr %3, align 8
3376 store <16 x i8> %19, ptr %3, align 8
3383 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
3384 %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
3385 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
3386 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
3387 %20 = add nuw i32 %17, 1
3388 %21 = icmp eq i32 %20, %0
3389 br i1 %21, label %13, label %15
3392 define arm_aapcs_vfpcc void @aesd_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, ptr %3) nounwind {
3393 ; CHECK-FIX-LABEL: aesd_set64_loop_via_val:
3394 ; CHECK-FIX: @ %bb.0:
3395 ; CHECK-FIX-NEXT: vorr q0, q0, q0
3396 ; CHECK-FIX-NEXT: cmp r0, #0
3397 ; CHECK-FIX-NEXT: bxeq lr
3398 ; CHECK-FIX-NEXT: .LBB79_1:
3399 ; CHECK-FIX-NEXT: vmov.32 d0[0], r2
3400 ; CHECK-FIX-NEXT: ldr r1, [sp]
3401 ; CHECK-FIX-NEXT: vmov.32 d0[1], r3
3402 ; CHECK-FIX-NEXT: .LBB79_2: @ =>This Inner Loop Header: Depth=1
3403 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
3404 ; CHECK-FIX-NEXT: subs r0, r0, #1
3405 ; CHECK-FIX-NEXT: vmov.32 d16[0], r2
3406 ; CHECK-FIX-NEXT: vmov.32 d16[1], r3
3407 ; CHECK-FIX-NEXT: aesd.8 q8, q0
3408 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
3409 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
3410 ; CHECK-FIX-NEXT: bne .LBB79_2
3411 ; CHECK-FIX-NEXT: @ %bb.3:
3412 ; CHECK-FIX-NEXT: bx lr
3413 %5 = icmp eq i32 %0, 0
3414 br i1 %5, label %12, label %6
3417 %7 = bitcast <16 x i8> %2 to <2 x i64>
3418 %8 = insertelement <2 x i64> %7, i64 %1, i64 0
3419 %9 = bitcast <2 x i64> %8 to <16 x i8>
3420 %10 = bitcast ptr %3 to ptr
3421 %11 = bitcast ptr %3 to ptr
3428 %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
3429 %15 = load <2 x i64>, ptr %10, align 8
3430 %16 = insertelement <2 x i64> %15, i64 %1, i64 0
3431 %17 = bitcast <2 x i64> %16 to <16 x i8>
3432 store i64 %1, ptr %11, align 8
3433 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
3434 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
3435 store <16 x i8> %19, ptr %3, align 8
3436 %20 = add nuw i32 %14, 1
3437 %21 = icmp eq i32 %20, %0
3438 br i1 %21, label %12, label %13
3441 define arm_aapcs_vfpcc void @aesd_setf16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
3442 ; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_via_ptr:
3443 ; CHECK-FIX-NOSCHED: @ %bb.0:
3444 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
3445 ; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
3446 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
3447 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
3448 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0
3449 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
3450 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
3451 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
3452 ; CHECK-FIX-NOSCHED-NEXT: bx lr
3454 ; CHECK-CORTEX-FIX-LABEL: aesd_setf16_via_ptr:
3455 ; CHECK-CORTEX-FIX: @ %bb.0:
3456 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
3457 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
3458 ; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
3459 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
3460 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0
3461 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
3462 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
3463 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
3464 ; CHECK-CORTEX-FIX-NEXT: bx lr
3465 %4 = bitcast ptr %0 to ptr
3466 %5 = load i16, ptr %4, align 2
3467 %6 = bitcast ptr %2 to ptr
3468 %7 = load <8 x i16>, ptr %6, align 8
3469 %8 = insertelement <8 x i16> %7, i16 %5, i64 0
3470 %9 = bitcast <8 x i16> %8 to <16 x i8>
3471 %10 = bitcast <16 x i8> %1 to <8 x i16>
3472 %11 = insertelement <8 x i16> %10, i16 %5, i64 0
3473 %12 = bitcast <8 x i16> %11 to <16 x i8>
3474 %13 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %9, <16 x i8> %12)
3475 %14 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %13)
3476 store <16 x i8> %14, ptr %2, align 8
3480 define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, ptr %2) nounwind {
3481 ; CHECK-FIX-LABEL: aesd_setf16_via_val:
3482 ; CHECK-FIX: @ %bb.0:
3483 ; CHECK-FIX-NEXT: vorr q1, q1, q1
3484 ; CHECK-FIX-NEXT: vmov r1, s0
3485 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
3486 ; CHECK-FIX-NEXT: vmov.16 d2[0], r1
3487 ; CHECK-FIX-NEXT: vmov.16 d16[0], r1
3488 ; CHECK-FIX-NEXT: aesd.8 q8, q1
3489 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
3490 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
3491 ; CHECK-FIX-NEXT: bx lr
3492 %4 = bitcast ptr %2 to ptr
3493 %5 = load <8 x i16>, ptr %4, align 8
3494 %6 = bitcast half %0 to i16
3495 %7 = insertelement <8 x i16> %5, i16 %6, i64 0
3496 %8 = bitcast <8 x i16> %7 to <16 x i8>
3497 %9 = bitcast <16 x i8> %1 to <8 x i16>
3498 %10 = insertelement <8 x i16> %9, i16 %6, i64 0
3499 %11 = bitcast <8 x i16> %10 to <16 x i8>
3500 %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
3501 %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
3502 store <16 x i8> %13, ptr %2, align 8
3506 define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
3507 ; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_ptr:
3508 ; CHECK-FIX-NOSCHED: @ %bb.0:
3509 ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3510 ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3511 ; CHECK-FIX-NOSCHED-NEXT: .pad #12
3512 ; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12
3513 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3514 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB82_3
3515 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
3516 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
3517 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3]
3518 ; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1]
3519 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0]
3520 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3]
3521 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
3522 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
3523 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2]
3524 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
3525 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1]
3526 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
3527 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1]
3528 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3529 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB82_4
3530 ; CHECK-FIX-NOSCHED-NEXT: .LBB82_2:
3531 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3]
3532 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2]
3533 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1]
3534 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0]
3535 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3]
3536 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2]
3537 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1]
3538 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r1, d0[0]
3539 ; CHECK-FIX-NOSCHED-NEXT: b .LBB82_5
3540 ; CHECK-FIX-NOSCHED-NEXT: .LBB82_3:
3541 ; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #8
3542 ; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r2:32]
3543 ; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r3:32]
3544 ; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #4
3545 ; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[1]}, [r3:32]
3546 ; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #12
3547 ; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[1]}, [r3:32]
3548 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3]
3549 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0]
3550 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3]
3551 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
3552 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d16[0]
3553 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
3554 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2]
3555 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
3556 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1]
3557 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
3558 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1]
3559 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3560 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB82_2
3561 ; CHECK-FIX-NOSCHED-NEXT: .LBB82_4:
3562 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3]
3563 ; CHECK-FIX-NOSCHED-NEXT: ldrh r1, [r1]
3564 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2]
3565 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1]
3566 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0]
3567 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3]
3568 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2]
3569 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1]
3570 ; CHECK-FIX-NOSCHED-NEXT: .LBB82_5:
3571 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r1, r8, lsl #16
3572 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r3, r7, r3, lsl #16
3573 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r4, r0, lsl #16
3574 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r1
3575 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, lr, r12, lsl #16
3576 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r3
3577 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r1
3578 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r11, r10, lsl #16
3579 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r1
3580 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r6, r5, lsl #16
3581 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r1
3582 ; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp] @ 4-byte Reload
3583 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r9, r1, lsl #16
3584 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
3585 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
3586 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r1
3587 ; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
3588 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16
3589 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
3590 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q9
3591 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
3592 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
3593 ; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #12
3594 ; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
3596 ; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_ptr:
3597 ; CHECK-CORTEX-FIX: @ %bb.0:
3598 ; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3599 ; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3600 ; CHECK-CORTEX-FIX-NEXT: .pad #24
3601 ; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #24
3602 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
3603 ; CHECK-CORTEX-FIX-NEXT: beq .LBB82_2
3604 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
3605 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3606 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1]
3607 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[0]
3608 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2]
3609 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
3610 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2]
3611 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
3612 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3]
3613 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
3614 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[1]
3615 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
3616 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1]
3617 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
3618 ; CHECK-CORTEX-FIX-NEXT: mov r3, r6
3619 ; CHECK-CORTEX-FIX-NEXT: b .LBB82_3
3620 ; CHECK-CORTEX-FIX-NEXT: .LBB82_2:
3621 ; CHECK-CORTEX-FIX-NEXT: add r3, r2, #8
3622 ; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[0]}, [r2:32]
3623 ; CHECK-CORTEX-FIX-NEXT: add r7, r2, #4
3624 ; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[0]}, [r3:32]
3625 ; CHECK-CORTEX-FIX-NEXT: add r3, r2, #12
3626 ; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[1]}, [r7:32]
3627 ; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[1]}, [r3:32]
3628 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[0]
3629 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
3630 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1]
3631 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[1]
3632 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
3633 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2]
3634 ; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill
3635 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2]
3636 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
3637 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3]
3638 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
3639 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[0]
3640 ; CHECK-CORTEX-FIX-NEXT: .LBB82_3:
3641 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d17[3]
3642 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
3643 ; CHECK-CORTEX-FIX-NEXT: beq .LBB82_5
3644 ; CHECK-CORTEX-FIX-NEXT: @ %bb.4:
3645 ; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r1]
3646 ; CHECK-CORTEX-FIX-NEXT: b .LBB82_6
3647 ; CHECK-CORTEX-FIX-NEXT: .LBB82_5:
3648 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r0, d0[0]
3649 ; CHECK-CORTEX-FIX-NEXT: .LBB82_6:
3650 ; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
3651 ; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
3652 ; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
3653 ; CHECK-CORTEX-FIX-NEXT: pkhbt r9, r7, r4, lsl #16
3654 ; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
3655 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d0[1]
3656 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d0[2]
3657 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d0[3]
3658 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d1[0]
3659 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d1[1]
3660 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 lr, d1[2]
3661 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d1[3]
3662 ; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r1, lsl #16
3663 ; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
3664 ; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16
3665 ; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16
3666 ; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r11, r5, lsl #16
3667 ; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r3, r1, lsl #16
3668 ; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #16] @ 4-byte Reload
3669 ; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r3, r4, lsl #16
3670 ; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp] @ 4-byte Reload
3671 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r4
3672 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r1
3673 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7
3674 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r9
3675 ; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r10, lsl #16
3676 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r3
3677 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r5
3678 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r6
3679 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r0
3680 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q9, q8
3681 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q9
3682 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
3683 ; CHECK-CORTEX-FIX-NEXT: add sp, sp, #24
3684 ; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
3685 br i1 %0, label %5, label %12
3688 %6 = bitcast ptr %1 to ptr
3689 %7 = load i16, ptr %6, align 2
3690 %8 = bitcast ptr %3 to ptr
3691 %9 = load <8 x i16>, ptr %8, align 8
3692 %10 = insertelement <8 x i16> %9, i16 %7, i64 0
3693 %11 = bitcast <8 x i16> %10 to <8 x half>
3697 %13 = bitcast ptr %3 to ptr
3698 %14 = load <8 x half>, ptr %13, align 8
3702 %16 = phi <8 x half> [ %11, %5 ], [ %14, %12 ]
3703 br i1 %0, label %17, label %23
3706 %18 = bitcast ptr %1 to ptr
3707 %19 = load i16, ptr %18, align 2
3708 %20 = bitcast <16 x i8> %2 to <8 x i16>
3709 %21 = insertelement <8 x i16> %20, i16 %19, i64 0
3710 %22 = bitcast <8 x i16> %21 to <8 x half>
3714 %24 = bitcast <16 x i8> %2 to <8 x half>
3718 %26 = phi <8 x half> [ %22, %17 ], [ %24, %23 ]
3719 %27 = bitcast <8 x half> %16 to <16 x i8>
3720 %28 = bitcast <8 x half> %26 to <16 x i8>
3721 %29 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %27, <16 x i8> %28)
3722 %30 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %29)
3723 store <16 x i8> %30, ptr %3, align 8
3727 define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, ptr %3) nounwind {
3728 ; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_val:
3729 ; CHECK-FIX-NOSCHED: @ %bb.0:
3730 ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3731 ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3732 ; CHECK-FIX-NOSCHED-NEXT: .pad #12
3733 ; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12
3734 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3735 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB83_2
3736 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
3737 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
3738 ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s2, s0
3739 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[1]
3740 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d17[3]
3741 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d17[2]
3742 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
3743 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d16[1]
3744 ; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill
3745 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[0]
3746 ; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #4] @ 4-byte Spill
3747 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[3]
3748 ; CHECK-FIX-NOSCHED-NEXT: str r2, [sp] @ 4-byte Spill
3749 ; CHECK-FIX-NOSCHED-NEXT: b .LBB83_3
3750 ; CHECK-FIX-NOSCHED-NEXT: .LBB83_2:
3751 ; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #8
3752 ; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r1:32]
3753 ; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r2:32]
3754 ; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #4
3755 ; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[1]}, [r2:32]
3756 ; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #12
3757 ; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[1]}, [r2:32]
3758 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[1]
3759 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d17[3]
3760 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d17[2]
3761 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2]
3762 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d16[1]
3763 ; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill
3764 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[0]
3765 ; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #4] @ 4-byte Spill
3766 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[3]
3767 ; CHECK-FIX-NOSCHED-NEXT: str r2, [sp] @ 4-byte Spill
3768 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[0]
3769 ; CHECK-FIX-NOSCHED-NEXT: vmov s2, r2
3770 ; CHECK-FIX-NOSCHED-NEXT: .LBB83_3:
3771 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d3[3]
3772 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3773 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d3[2]
3774 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d3[1]
3775 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d3[0]
3776 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d2[3]
3777 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d2[2]
3778 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB83_5
3779 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.4:
3780 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d2[1]
3781 ; CHECK-FIX-NOSCHED-NEXT: b .LBB83_6
3782 ; CHECK-FIX-NOSCHED-NEXT: .LBB83_5:
3783 ; CHECK-FIX-NOSCHED-NEXT: mov r0, lr
3784 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d2[0]
3785 ; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d2[1]
3786 ; CHECK-FIX-NOSCHED-NEXT: vmov s0, lr
3787 ; CHECK-FIX-NOSCHED-NEXT: mov lr, r0
3788 ; CHECK-FIX-NOSCHED-NEXT: .LBB83_6:
3789 ; CHECK-FIX-NOSCHED-NEXT: vmov r0, s0
3790 ; CHECK-FIX-NOSCHED-NEXT: vmov r6, s2
3791 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r12, lsl #16
3792 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r6, r6, r8, lsl #16
3793 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
3794 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r5, r4, lsl #16
3795 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r6
3796 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0
3797 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp] @ 4-byte Reload
3798 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r0, lsl #16
3799 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
3800 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r3, r2, lsl #16
3801 ; CHECK-FIX-NOSCHED-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
3802 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
3803 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
3804 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16
3805 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
3806 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r9, lsl #16
3807 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
3808 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, lr, r7, lsl #16
3809 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
3810 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q9
3811 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
3812 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
3813 ; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #12
3814 ; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
3816 ; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_val:
3817 ; CHECK-CORTEX-FIX: @ %bb.0:
3818 ; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3819 ; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3820 ; CHECK-CORTEX-FIX-NEXT: .pad #12
3821 ; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #12
3822 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
3823 ; CHECK-CORTEX-FIX-NEXT: beq .LBB83_3
3824 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
3825 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
3826 ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s2, s0
3827 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d16[1]
3828 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d16[2]
3829 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d16[3]
3830 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d17[2]
3831 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[3]
3832 ; CHECK-CORTEX-FIX-NEXT: str r2, [sp, #8] @ 4-byte Spill
3833 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d17[0]
3834 ; CHECK-CORTEX-FIX-NEXT: str r2, [sp, #4] @ 4-byte Spill
3835 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d17[1]
3836 ; CHECK-CORTEX-FIX-NEXT: str r2, [sp] @ 4-byte Spill
3837 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
3838 ; CHECK-CORTEX-FIX-NEXT: bne .LBB83_4
3839 ; CHECK-CORTEX-FIX-NEXT: .LBB83_2:
3840 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 lr, d2[0]
3841 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d2[1]
3842 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d2[2]
3843 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d2[3]
3844 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r9, d3[0]
3845 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d3[1]
3846 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d3[2]
3847 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d3[3]
3848 ; CHECK-CORTEX-FIX-NEXT: vmov s0, lr
3849 ; CHECK-CORTEX-FIX-NEXT: b .LBB83_5
3850 ; CHECK-CORTEX-FIX-NEXT: .LBB83_3:
3851 ; CHECK-CORTEX-FIX-NEXT: add r2, r1, #8
3852 ; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
3853 ; CHECK-CORTEX-FIX-NEXT: add r3, r1, #4
3854 ; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[0]}, [r2:32]
3855 ; CHECK-CORTEX-FIX-NEXT: add r2, r1, #12
3856 ; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[1]}, [r3:32]
3857 ; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[1]}, [r2:32]
3858 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1]
3859 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d16[0]
3860 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d16[2]
3861 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d16[3]
3862 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
3863 ; CHECK-CORTEX-FIX-NEXT: vmov s2, r2
3864 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[0]
3865 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d17[2]
3866 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[3]
3867 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
3868 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[1]
3869 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp] @ 4-byte Spill
3870 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
3871 ; CHECK-CORTEX-FIX-NEXT: beq .LBB83_2
3872 ; CHECK-CORTEX-FIX-NEXT: .LBB83_4:
3873 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d2[1]
3874 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d2[2]
3875 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d2[3]
3876 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r9, d3[0]
3877 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d3[1]
3878 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d3[2]
3879 ; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d3[3]
3880 ; CHECK-CORTEX-FIX-NEXT: .LBB83_5:
3881 ; CHECK-CORTEX-FIX-NEXT: pkhbt lr, r11, r6, lsl #16
3882 ; CHECK-CORTEX-FIX-NEXT: pkhbt r0, r7, r10, lsl #16
3883 ; CHECK-CORTEX-FIX-NEXT: ldm sp, {r6, r7} @ 8-byte Folded Reload
3884 ; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r4, lsl #16
3885 ; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r5, r12, lsl #16
3886 ; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r9, r2, lsl #16
3887 ; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r7, r6, lsl #16
3888 ; CHECK-CORTEX-FIX-NEXT: vmov r7, s2
3889 ; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
3890 ; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r7, r6, lsl #16
3891 ; CHECK-CORTEX-FIX-NEXT: vmov r6, s0
3892 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r7
3893 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r4
3894 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r0
3895 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], lr
3896 ; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r8, lsl #16
3897 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r6
3898 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r2
3899 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r3
3900 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r5
3901 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q9, q8
3902 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q9
3903 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
3904 ; CHECK-CORTEX-FIX-NEXT: add sp, sp, #12
3905 ; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
3906 br i1 %0, label %5, label %11
3909 %6 = bitcast ptr %3 to ptr
3910 %7 = load <8 x i16>, ptr %6, align 8
3911 %8 = bitcast half %1 to i16
3912 %9 = insertelement <8 x i16> %7, i16 %8, i64 0
3913 %10 = bitcast <8 x i16> %9 to <8 x half>
3917 %12 = bitcast ptr %3 to ptr
3918 %13 = load <8 x half>, ptr %12, align 8
3922 %15 = phi <8 x half> [ %10, %5 ], [ %13, %11 ]
3923 br i1 %0, label %16, label %21
3926 %17 = bitcast <16 x i8> %2 to <8 x i16>
3927 %18 = bitcast half %1 to i16
3928 %19 = insertelement <8 x i16> %17, i16 %18, i64 0
3929 %20 = bitcast <8 x i16> %19 to <8 x half>
3933 %22 = bitcast <16 x i8> %2 to <8 x half>
3937 %24 = phi <8 x half> [ %20, %16 ], [ %22, %21 ]
3938 %25 = bitcast <8 x half> %15 to <16 x i8>
3939 %26 = bitcast <8 x half> %24 to <16 x i8>
3940 %27 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %25, <16 x i8> %26)
3941 %28 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %27)
3942 store <16 x i8> %28, ptr %3, align 8
3946 define arm_aapcs_vfpcc void @aesd_setf16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
3947 ; CHECK-FIX-LABEL: aesd_setf16_loop_via_ptr:
3948 ; CHECK-FIX: @ %bb.0:
3949 ; CHECK-FIX-NEXT: vorr q0, q0, q0
3950 ; CHECK-FIX-NEXT: ldrh r1, [r1]
3951 ; CHECK-FIX-NEXT: cmp r0, #0
3952 ; CHECK-FIX-NEXT: strh r1, [r2]
3953 ; CHECK-FIX-NEXT: bxeq lr
3954 ; CHECK-FIX-NEXT: .LBB84_1:
3955 ; CHECK-FIX-NEXT: vmov.16 d0[0], r1
3956 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3957 ; CHECK-FIX-NEXT: .LBB84_2: @ =>This Inner Loop Header: Depth=1
3958 ; CHECK-FIX-NEXT: aesd.8 q8, q0
3959 ; CHECK-FIX-NEXT: subs r0, r0, #1
3960 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
3961 ; CHECK-FIX-NEXT: bne .LBB84_2
3962 ; CHECK-FIX-NEXT: @ %bb.3:
3963 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
3964 ; CHECK-FIX-NEXT: bx lr
3965 %5 = bitcast ptr %1 to ptr
3966 %6 = load i16, ptr %5, align 2
3967 %7 = bitcast <16 x i8> %2 to <8 x i16>
3968 %8 = insertelement <8 x i16> %7, i16 %6, i64 0
3969 %9 = bitcast <8 x i16> %8 to <16 x i8>
3970 %10 = bitcast ptr %3 to ptr
3971 store i16 %6, ptr %10, align 8
3972 %11 = icmp eq i32 %0, 0
3973 br i1 %11, label %15, label %12
3976 %13 = load <16 x i8>, ptr %3, align 8
3980 store <16 x i8> %20, ptr %3, align 8
3987 %17 = phi <16 x i8> [ %13, %12 ], [ %20, %16 ]
3988 %18 = phi i32 [ 0, %12 ], [ %21, %16 ]
3989 %19 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
3990 %20 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %19)
3991 %21 = add nuw i32 %18, 1
3992 %22 = icmp eq i32 %21, %0
3993 br i1 %22, label %14, label %16
3996 define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, ptr %3) nounwind {
3997 ; CHECK-FIX-LABEL: aesd_setf16_loop_via_val:
3998 ; CHECK-FIX: @ %bb.0:
3999 ; CHECK-FIX-NEXT: vorr q1, q1, q1
4000 ; CHECK-FIX-NEXT: cmp r0, #0
4001 ; CHECK-FIX-NEXT: bxeq lr
4002 ; CHECK-FIX-NEXT: .LBB85_1:
4003 ; CHECK-FIX-NEXT: vmov r2, s0
4004 ; CHECK-FIX-NEXT: vmov.16 d2[0], r2
4005 ; CHECK-FIX-NEXT: .LBB85_2: @ =>This Inner Loop Header: Depth=1
4006 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
4007 ; CHECK-FIX-NEXT: subs r0, r0, #1
4008 ; CHECK-FIX-NEXT: vmov.16 d16[0], r2
4009 ; CHECK-FIX-NEXT: aesd.8 q8, q1
4010 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
4011 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
4012 ; CHECK-FIX-NEXT: bne .LBB85_2
4013 ; CHECK-FIX-NEXT: @ %bb.3:
4014 ; CHECK-FIX-NEXT: bx lr
4015 %5 = icmp eq i32 %0, 0
4016 br i1 %5, label %13, label %6
4019 %7 = bitcast <16 x i8> %2 to <8 x i16>
4020 %8 = bitcast half %1 to i16
4021 %9 = insertelement <8 x i16> %7, i16 %8, i64 0
4022 %10 = bitcast <8 x i16> %9 to <16 x i8>
4023 %11 = bitcast ptr %3 to ptr
4024 %12 = bitcast ptr %3 to ptr
4031 %15 = phi i32 [ 0, %6 ], [ %21, %14 ]
4032 %16 = load <8 x i16>, ptr %11, align 8
4033 %17 = insertelement <8 x i16> %16, i16 %8, i64 0
4034 %18 = bitcast <8 x i16> %17 to <16 x i8>
4035 store half %1, ptr %12, align 8
4036 %19 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %18, <16 x i8> %10)
4037 %20 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %19)
4038 store <16 x i8> %20, ptr %3, align 8
4039 %21 = add nuw i32 %15, 1
4040 %22 = icmp eq i32 %21, %0
4041 br i1 %22, label %13, label %14
4044 define arm_aapcs_vfpcc void @aesd_setf32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
4045 ; CHECK-FIX-LABEL: aesd_setf32_via_ptr:
4046 ; CHECK-FIX: @ %bb.0:
4047 ; CHECK-FIX-NEXT: vldr s0, [r0]
4048 ; CHECK-FIX-NEXT: vld1.64 {d2, d3}, [r1]
4049 ; CHECK-FIX-NEXT: vmov.f32 s4, s0
4050 ; CHECK-FIX-NEXT: vorr q1, q1, q1
4051 ; CHECK-FIX-NEXT: vorr q0, q0, q0
4052 ; CHECK-FIX-NEXT: aesd.8 q1, q0
4053 ; CHECK-FIX-NEXT: aesimc.8 q8, q1
4054 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
4055 ; CHECK-FIX-NEXT: bx lr
4056 %4 = load float, ptr %0, align 4
4057 %5 = bitcast ptr %2 to ptr
4058 %6 = load <4 x float>, ptr %5, align 8
4059 %7 = insertelement <4 x float> %6, float %4, i64 0
4060 %8 = bitcast <4 x float> %7 to <16 x i8>
4061 %9 = bitcast <16 x i8> %1 to <4 x float>
4062 %10 = insertelement <4 x float> %9, float %4, i64 0
4063 %11 = bitcast <4 x float> %10 to <16 x i8>
4064 %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
4065 %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
4066 store <16 x i8> %13, ptr %2, align 8
4070 define arm_aapcs_vfpcc void @aesd_setf32_via_val(float %0, <16 x i8> %1, ptr %2) nounwind {
4071 ; CHECK-FIX-LABEL: aesd_setf32_via_val:
4072 ; CHECK-FIX: @ %bb.0:
4073 ; CHECK-FIX-NEXT: vmov.f32 s4, s0
4074 ; CHECK-FIX-NEXT: vld1.64 {d0, d1}, [r0]
4075 ; CHECK-FIX-NEXT: vmov.f32 s0, s4
4076 ; CHECK-FIX-NEXT: vorr q0, q0, q0
4077 ; CHECK-FIX-NEXT: vorr q1, q1, q1
4078 ; CHECK-FIX-NEXT: aesd.8 q0, q1
4079 ; CHECK-FIX-NEXT: aesimc.8 q8, q0
4080 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
4081 ; CHECK-FIX-NEXT: bx lr
4082 %4 = bitcast ptr %2 to ptr
4083 %5 = load <4 x float>, ptr %4, align 8
4084 %6 = insertelement <4 x float> %5, float %0, i64 0
4085 %7 = bitcast <4 x float> %6 to <16 x i8>
4086 %8 = bitcast <16 x i8> %1 to <4 x float>
4087 %9 = insertelement <4 x float> %8, float %0, i64 0
4088 %10 = bitcast <4 x float> %9 to <16 x i8>
4089 %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
4090 %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
4091 store <16 x i8> %12, ptr %2, align 8
4095 define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
4096 ; CHECK-FIX-LABEL: aesd_setf32_cond_via_ptr:
4097 ; CHECK-FIX: @ %bb.0:
4098 ; CHECK-FIX-NEXT: vorr q0, q0, q0
4099 ; CHECK-FIX-NEXT: cmp r0, #0
4100 ; CHECK-FIX-NEXT: beq .LBB88_2
4101 ; CHECK-FIX-NEXT: @ %bb.1:
4102 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
4103 ; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
4104 ; CHECK-FIX-NEXT: cmp r0, #0
4105 ; CHECK-FIX-NEXT: bne .LBB88_3
4106 ; CHECK-FIX-NEXT: b .LBB88_4
4107 ; CHECK-FIX-NEXT: .LBB88_2:
4108 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
4109 ; CHECK-FIX-NEXT: cmp r0, #0
4110 ; CHECK-FIX-NEXT: beq .LBB88_4
4111 ; CHECK-FIX-NEXT: .LBB88_3:
4112 ; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
4113 ; CHECK-FIX-NEXT: .LBB88_4:
4114 ; CHECK-FIX-NEXT: aesd.8 q8, q0
4115 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
4116 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
4117 ; CHECK-FIX-NEXT: bx lr
4118 br i1 %0, label %5, label %10
4121 %6 = load float, ptr %1, align 4
4122 %7 = bitcast ptr %3 to ptr
4123 %8 = load <4 x float>, ptr %7, align 8
4124 %9 = insertelement <4 x float> %8, float %6, i64 0
4128 %11 = bitcast ptr %3 to ptr
4129 %12 = load <4 x float>, ptr %11, align 8
4133 %14 = phi <4 x float> [ %9, %5 ], [ %12, %10 ]
4134 br i1 %0, label %15, label %19
4137 %16 = load float, ptr %1, align 4
4138 %17 = bitcast <16 x i8> %2 to <4 x float>
4139 %18 = insertelement <4 x float> %17, float %16, i64 0
4143 %20 = bitcast <16 x i8> %2 to <4 x float>
4147 %22 = phi <4 x float> [ %18, %15 ], [ %20, %19 ]
4148 %23 = bitcast <4 x float> %14 to <16 x i8>
4149 %24 = bitcast <4 x float> %22 to <16 x i8>
4150 %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
4151 %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
4152 store <16 x i8> %26, ptr %3, align 8
4156 define arm_aapcs_vfpcc void @aesd_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, ptr %3) nounwind {
4157 ; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_cond_via_val:
4158 ; CHECK-FIX-NOSCHED: @ %bb.0:
4159 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
4160 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
4161 ; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s8, s0
4162 ; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
4163 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
4164 ; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s4, s0
4165 ; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
4166 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q2, q1
4167 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q2
4168 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
4169 ; CHECK-FIX-NOSCHED-NEXT: bx lr
4171 ; CHECK-CORTEX-FIX-LABEL: aesd_setf32_cond_via_val:
4172 ; CHECK-CORTEX-FIX: @ %bb.0:
4173 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
4174 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
4175 ; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s8, s0
4176 ; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
4177 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
4178 ; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s4, s0
4179 ; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
4180 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q2, q1
4181 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q2
4182 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
4183 ; CHECK-CORTEX-FIX-NEXT: bx lr
4184 %5 = bitcast ptr %3 to ptr
4185 %6 = load <4 x float>, ptr %5, align 8
4186 %7 = insertelement <4 x float> %6, float %1, i64 0
4187 %8 = select i1 %0, <4 x float> %7, <4 x float> %6
4188 %9 = bitcast <16 x i8> %2 to <4 x float>
4189 %10 = insertelement <4 x float> %9, float %1, i64 0
4190 %11 = select i1 %0, <4 x float> %10, <4 x float> %9
4191 %12 = bitcast <4 x float> %8 to <16 x i8>
4192 %13 = bitcast <4 x float> %11 to <16 x i8>
4193 %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
4194 %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
4195 store <16 x i8> %15, ptr %3, align 8
4199 define arm_aapcs_vfpcc void @aesd_setf32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
4200 ; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_ptr:
4201 ; CHECK-FIX-NOSCHED: @ %bb.0:
4202 ; CHECK-FIX-NOSCHED-NEXT: vldr s4, [r1]
4203 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
4204 ; CHECK-FIX-NOSCHED-NEXT: vstr s4, [r2]
4205 ; CHECK-FIX-NOSCHED-NEXT: bxeq lr
4206 ; CHECK-FIX-NOSCHED-NEXT: .LBB90_1:
4207 ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s0, s4
4208 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
4209 ; CHECK-FIX-NOSCHED-NEXT: .LBB90_2: @ =>This Inner Loop Header: Depth=1
4210 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
4211 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
4212 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
4213 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
4214 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB90_2
4215 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
4216 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
4217 ; CHECK-FIX-NOSCHED-NEXT: bx lr
4219 ; CHECK-CORTEX-FIX-LABEL: aesd_setf32_loop_via_ptr:
4220 ; CHECK-CORTEX-FIX: @ %bb.0:
4221 ; CHECK-CORTEX-FIX-NEXT: vldr s4, [r1]
4222 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
4223 ; CHECK-CORTEX-FIX-NEXT: vstr s4, [r2]
4224 ; CHECK-CORTEX-FIX-NEXT: bxeq lr
4225 ; CHECK-CORTEX-FIX-NEXT: .LBB90_1:
4226 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
4227 ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s0, s4
4228 ; CHECK-CORTEX-FIX-NEXT: .LBB90_2: @ =>This Inner Loop Header: Depth=1
4229 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
4230 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
4231 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
4232 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
4233 ; CHECK-CORTEX-FIX-NEXT: bne .LBB90_2
4234 ; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
4235 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
4236 ; CHECK-CORTEX-FIX-NEXT: bx lr
4237 %5 = load float, ptr %1, align 4
4238 %6 = bitcast <16 x i8> %2 to <4 x float>
4239 %7 = insertelement <4 x float> %6, float %5, i64 0
4240 %8 = bitcast <4 x float> %7 to <16 x i8>
4241 %9 = bitcast ptr %3 to ptr
4242 store float %5, ptr %9, align 8
4243 %10 = icmp eq i32 %0, 0
4244 br i1 %10, label %14, label %11
4247 %12 = load <16 x i8>, ptr %3, align 8
4251 store <16 x i8> %19, ptr %3, align 8
4258 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
4259 %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
4260 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
4261 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
4262 %20 = add nuw i32 %17, 1
4263 %21 = icmp eq i32 %20, %0
4264 br i1 %21, label %13, label %15
4267 define arm_aapcs_vfpcc void @aesd_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, ptr %3) nounwind {
4268 ; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_val:
4269 ; CHECK-FIX-NOSCHED: @ %bb.0:
4270 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
4271 ; CHECK-FIX-NOSCHED-NEXT: bxeq lr
4272 ; CHECK-FIX-NOSCHED-NEXT: .LBB91_1:
4273 ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s4, s0
4274 ; CHECK-FIX-NOSCHED-NEXT: .LBB91_2: @ =>This Inner Loop Header: Depth=1
4275 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
4276 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
4277 ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s8, s0
4278 ; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
4279 ; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
4280 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q2, q1
4281 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q2
4282 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
4283 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB91_2
4284 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
4285 ; CHECK-FIX-NOSCHED-NEXT: bx lr
4287 ; CHECK-CORTEX-FIX-LABEL: aesd_setf32_loop_via_val:
4288 ; CHECK-CORTEX-FIX: @ %bb.0:
4289 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
4290 ; CHECK-CORTEX-FIX-NEXT: bxeq lr
4291 ; CHECK-CORTEX-FIX-NEXT: .LBB91_1:
4292 ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s4, s0
4293 ; CHECK-CORTEX-FIX-NEXT: .LBB91_2: @ =>This Inner Loop Header: Depth=1
4294 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
4295 ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s8, s0
4296 ; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
4297 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
4298 ; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
4299 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q2, q1
4300 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q2
4301 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
4302 ; CHECK-CORTEX-FIX-NEXT: bne .LBB91_2
4303 ; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
4304 ; CHECK-CORTEX-FIX-NEXT: bx lr
4305 %5 = icmp eq i32 %0, 0
4306 br i1 %5, label %12, label %6
4309 %7 = bitcast <16 x i8> %2 to <4 x float>
4310 %8 = insertelement <4 x float> %7, float %1, i64 0
4311 %9 = bitcast <4 x float> %8 to <16 x i8>
4312 %10 = bitcast ptr %3 to ptr
4313 %11 = bitcast ptr %3 to ptr
4320 %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
4321 %15 = load <4 x float>, ptr %10, align 8
4322 %16 = insertelement <4 x float> %15, float %1, i64 0
4323 %17 = bitcast <4 x float> %16 to <16 x i8>
4324 store float %1, ptr %11, align 8
4325 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
4326 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
4327 store <16 x i8> %19, ptr %3, align 8
4328 %20 = add nuw i32 %14, 1
4329 %21 = icmp eq i32 %20, %0
4330 br i1 %21, label %12, label %13
4333 define arm_aapcs_vfpcc void @aese_constantisland(ptr %0) nounwind {
4334 ; CHECK-FIX-NOSCHED-LABEL: aese_constantisland:
4335 ; CHECK-FIX-NOSCHED: @ %bb.0:
4336 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r0]
4337 ; CHECK-FIX-NOSCHED-NEXT: adr r1, .LCPI92_0
4338 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d18, d19}, [r1:128]
4339 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q9, q8
4340 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q9
4341 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r0]
4342 ; CHECK-FIX-NOSCHED-NEXT: bx lr
4343 ; CHECK-FIX-NOSCHED-NEXT: .p2align 4
4344 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
4345 ; CHECK-FIX-NOSCHED-NEXT: .LCPI92_0:
4346 ; CHECK-FIX-NOSCHED-NEXT: .byte 0 @ 0x0
4347 ; CHECK-FIX-NOSCHED-NEXT: .byte 1 @ 0x1
4348 ; CHECK-FIX-NOSCHED-NEXT: .byte 2 @ 0x2
4349 ; CHECK-FIX-NOSCHED-NEXT: .byte 3 @ 0x3
4350 ; CHECK-FIX-NOSCHED-NEXT: .byte 4 @ 0x4
4351 ; CHECK-FIX-NOSCHED-NEXT: .byte 5 @ 0x5
4352 ; CHECK-FIX-NOSCHED-NEXT: .byte 6 @ 0x6
4353 ; CHECK-FIX-NOSCHED-NEXT: .byte 7 @ 0x7
4354 ; CHECK-FIX-NOSCHED-NEXT: .byte 8 @ 0x8
4355 ; CHECK-FIX-NOSCHED-NEXT: .byte 9 @ 0x9
4356 ; CHECK-FIX-NOSCHED-NEXT: .byte 10 @ 0xa
4357 ; CHECK-FIX-NOSCHED-NEXT: .byte 11 @ 0xb
4358 ; CHECK-FIX-NOSCHED-NEXT: .byte 12 @ 0xc
4359 ; CHECK-FIX-NOSCHED-NEXT: .byte 13 @ 0xd
4360 ; CHECK-FIX-NOSCHED-NEXT: .byte 14 @ 0xe
4361 ; CHECK-FIX-NOSCHED-NEXT: .byte 15 @ 0xf
4363 ; CHECK-CORTEX-FIX-LABEL: aese_constantisland:
4364 ; CHECK-CORTEX-FIX: @ %bb.0:
4365 ; CHECK-CORTEX-FIX-NEXT: adr r1, .LCPI92_0
4366 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r0]
4367 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d18, d19}, [r1:128]
4368 ; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8
4369 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9
4370 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r0]
4371 ; CHECK-CORTEX-FIX-NEXT: bx lr
4372 ; CHECK-CORTEX-FIX-NEXT: .p2align 4
4373 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
4374 ; CHECK-CORTEX-FIX-NEXT: .LCPI92_0:
4375 ; CHECK-CORTEX-FIX-NEXT: .byte 0 @ 0x0
4376 ; CHECK-CORTEX-FIX-NEXT: .byte 1 @ 0x1
4377 ; CHECK-CORTEX-FIX-NEXT: .byte 2 @ 0x2
4378 ; CHECK-CORTEX-FIX-NEXT: .byte 3 @ 0x3
4379 ; CHECK-CORTEX-FIX-NEXT: .byte 4 @ 0x4
4380 ; CHECK-CORTEX-FIX-NEXT: .byte 5 @ 0x5
4381 ; CHECK-CORTEX-FIX-NEXT: .byte 6 @ 0x6
4382 ; CHECK-CORTEX-FIX-NEXT: .byte 7 @ 0x7
4383 ; CHECK-CORTEX-FIX-NEXT: .byte 8 @ 0x8
4384 ; CHECK-CORTEX-FIX-NEXT: .byte 9 @ 0x9
4385 ; CHECK-CORTEX-FIX-NEXT: .byte 10 @ 0xa
4386 ; CHECK-CORTEX-FIX-NEXT: .byte 11 @ 0xb
4387 ; CHECK-CORTEX-FIX-NEXT: .byte 12 @ 0xc
4388 ; CHECK-CORTEX-FIX-NEXT: .byte 13 @ 0xd
4389 ; CHECK-CORTEX-FIX-NEXT: .byte 14 @ 0xe
4390 ; CHECK-CORTEX-FIX-NEXT: .byte 15 @ 0xf
4391 %2 = load <16 x i8>, ptr %0, align 8
4392 %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> %2)
4393 %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
4394 store <16 x i8> %4, ptr %0, align 8