1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple armv8---eabi -mattr=+aes,+fix-cortex-a57-aes-1742098 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-FIX-NOSCHED
4 ; These CPUs should have the fix enabled by default. They use different
5 ; FileCheck prefixes because some instructions are scheduled differently.
7 ; RUN: llc -mtriple armv8---eabi -mcpu=cortex-a57 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-CORTEX-FIX
8 ; RUN: llc -mtriple armv8---eabi -mcpu=cortex-a72 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-CORTEX-FIX
10 ; This checks that adding `+fix-cortex-a57-aes-1742098` causes `vorr` to be
11 ; inserted wherever the compiler cannot prove that either input to the first aes
12 ; instruction in a fused aes pair was set by 64-bit Neon register writes or
13 ; 128-bit Neon register writes. All other register writes are unsafe, and
14 ; require a `vorr` to protect the AES input.
16 declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>)
17 declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>)
18 declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>)
19 declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>)
21 declare arm_aapcs_vfpcc <16 x i8> @get_input() local_unnamed_addr
22 declare arm_aapcs_vfpcc <16 x i8> @get_inputf16(half) local_unnamed_addr
23 declare arm_aapcs_vfpcc <16 x i8> @get_inputf32(float) local_unnamed_addr
27 define arm_aapcs_vfpcc void @aese_zero(<16 x i8>* %0) nounwind {
28 ; CHECK-FIX-LABEL: aese_zero:
30 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
31 ; CHECK-FIX-NEXT: vmov.i32 q9, #0x0
32 ; CHECK-FIX-NEXT: aese.8 q9, q8
33 ; CHECK-FIX-NEXT: aesmc.8 q8, q9
34 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
35 ; CHECK-FIX-NEXT: bx lr
36 %2 = load <16 x i8>, <16 x i8>* %0, align 8
37 %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> zeroinitializer, <16 x i8> %2)
38 %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
39 store <16 x i8> %4, <16 x i8>* %0, align 8
43 define arm_aapcs_vfpcc void @aese_via_call1(<16 x i8>* %0) nounwind {
44 ; CHECK-FIX-LABEL: aese_via_call1:
46 ; CHECK-FIX-NEXT: .save {r4, lr}
47 ; CHECK-FIX-NEXT: push {r4, lr}
48 ; CHECK-FIX-NEXT: mov r4, r0
49 ; CHECK-FIX-NEXT: bl get_input
50 ; CHECK-FIX-NEXT: vorr q0, q0, q0
51 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
52 ; CHECK-FIX-NEXT: aese.8 q0, q8
53 ; CHECK-FIX-NEXT: aesmc.8 q8, q0
54 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
55 ; CHECK-FIX-NEXT: pop {r4, pc}
56 %2 = call arm_aapcs_vfpcc <16 x i8> @get_input()
57 %3 = load <16 x i8>, <16 x i8>* %0, align 8
58 %4 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %2, <16 x i8> %3)
59 %5 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %4)
60 store <16 x i8> %5, <16 x i8>* %0, align 8
64 define arm_aapcs_vfpcc void @aese_via_call2(half %0, <16 x i8>* %1) nounwind {
65 ; CHECK-FIX-LABEL: aese_via_call2:
67 ; CHECK-FIX-NEXT: .save {r4, lr}
68 ; CHECK-FIX-NEXT: push {r4, lr}
69 ; CHECK-FIX-NEXT: mov r4, r0
70 ; CHECK-FIX-NEXT: bl get_inputf16
71 ; CHECK-FIX-NEXT: vorr q0, q0, q0
72 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
73 ; CHECK-FIX-NEXT: aese.8 q0, q8
74 ; CHECK-FIX-NEXT: aesmc.8 q8, q0
75 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
76 ; CHECK-FIX-NEXT: pop {r4, pc}
77 %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0)
78 %4 = load <16 x i8>, <16 x i8>* %1, align 8
79 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
80 %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
81 store <16 x i8> %6, <16 x i8>* %1, align 8
85 define arm_aapcs_vfpcc void @aese_via_call3(float %0, <16 x i8>* %1) nounwind {
86 ; CHECK-FIX-LABEL: aese_via_call3:
88 ; CHECK-FIX-NEXT: .save {r4, lr}
89 ; CHECK-FIX-NEXT: push {r4, lr}
90 ; CHECK-FIX-NEXT: mov r4, r0
91 ; CHECK-FIX-NEXT: bl get_inputf32
92 ; CHECK-FIX-NEXT: vorr q0, q0, q0
93 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
94 ; CHECK-FIX-NEXT: aese.8 q0, q8
95 ; CHECK-FIX-NEXT: aesmc.8 q8, q0
96 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
97 ; CHECK-FIX-NEXT: pop {r4, pc}
98 %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0)
99 %4 = load <16 x i8>, <16 x i8>* %1, align 8
100 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
101 %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
102 store <16 x i8> %6, <16 x i8>* %1, align 8
106 define arm_aapcs_vfpcc void @aese_once_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind {
107 ; CHECK-FIX-LABEL: aese_once_via_ptr:
108 ; CHECK-FIX: @ %bb.0:
109 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
110 ; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1]
111 ; CHECK-FIX-NEXT: aese.8 q9, q8
112 ; CHECK-FIX-NEXT: aesmc.8 q8, q9
113 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
114 ; CHECK-FIX-NEXT: bx lr
115 %3 = load <16 x i8>, <16 x i8>* %1, align 8
116 %4 = load <16 x i8>, <16 x i8>* %0, align 8
117 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
118 %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
119 store <16 x i8> %6, <16 x i8>* %1, align 8
123 define arm_aapcs_vfpcc <16 x i8> @aese_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
124 ; CHECK-FIX-LABEL: aese_once_via_val:
125 ; CHECK-FIX: @ %bb.0:
126 ; CHECK-FIX-NEXT: vorr q1, q1, q1
127 ; CHECK-FIX-NEXT: vorr q0, q0, q0
128 ; CHECK-FIX-NEXT: aese.8 q1, q0
129 ; CHECK-FIX-NEXT: aesmc.8 q0, q1
130 ; CHECK-FIX-NEXT: bx lr
131 %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %1, <16 x i8> %0)
132 %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
136 define arm_aapcs_vfpcc void @aese_twice_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind {
137 ; CHECK-FIX-LABEL: aese_twice_via_ptr:
138 ; CHECK-FIX: @ %bb.0:
139 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
140 ; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1]
141 ; CHECK-FIX-NEXT: aese.8 q9, q8
142 ; CHECK-FIX-NEXT: aesmc.8 q8, q9
143 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
144 ; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r0]
145 ; CHECK-FIX-NEXT: aese.8 q8, q9
146 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
147 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
148 ; CHECK-FIX-NEXT: bx lr
149 %3 = load <16 x i8>, <16 x i8>* %1, align 8
150 %4 = load <16 x i8>, <16 x i8>* %0, align 8
151 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
152 %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
153 store <16 x i8> %6, <16 x i8>* %1, align 8
154 %7 = load <16 x i8>, <16 x i8>* %0, align 8
155 %8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7)
156 %9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8)
157 store <16 x i8> %9, <16 x i8>* %1, align 8
161 define arm_aapcs_vfpcc <16 x i8> @aese_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
162 ; CHECK-FIX-LABEL: aese_twice_via_val:
163 ; CHECK-FIX: @ %bb.0:
164 ; CHECK-FIX-NEXT: vorr q1, q1, q1
165 ; CHECK-FIX-NEXT: vorr q0, q0, q0
166 ; CHECK-FIX-NEXT: vorr q0, q0, q0
167 ; CHECK-FIX-NEXT: aese.8 q1, q0
168 ; CHECK-FIX-NEXT: aesmc.8 q8, q1
169 ; CHECK-FIX-NEXT: aese.8 q8, q0
170 ; CHECK-FIX-NEXT: aesmc.8 q0, q8
171 ; CHECK-FIX-NEXT: bx lr
172 %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %1, <16 x i8> %0)
173 %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
174 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %4, <16 x i8> %0)
175 %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
179 define arm_aapcs_vfpcc void @aese_loop_via_ptr(i32 %0, <16 x i8>* %1, <16 x i8>* %2) nounwind {
180 ; CHECK-FIX-NOSCHED-LABEL: aese_loop_via_ptr:
181 ; CHECK-FIX-NOSCHED: @ %bb.0:
182 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
183 ; CHECK-FIX-NOSCHED-NEXT: bxeq lr
184 ; CHECK-FIX-NOSCHED-NEXT: .LBB8_1: @ =>This Inner Loop Header: Depth=1
185 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
186 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
187 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d18, d19}, [r2]
188 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q9, q8
189 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q9
190 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
191 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB8_1
192 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.2:
193 ; CHECK-FIX-NOSCHED-NEXT: bx lr
195 ; CHECK-CORTEX-FIX-LABEL: aese_loop_via_ptr:
196 ; CHECK-CORTEX-FIX: @ %bb.0:
197 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
198 ; CHECK-CORTEX-FIX-NEXT: bxeq lr
199 ; CHECK-CORTEX-FIX-NEXT: .LBB8_1: @ =>This Inner Loop Header: Depth=1
200 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
201 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d18, d19}, [r2]
202 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
203 ; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8
204 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9
205 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
206 ; CHECK-CORTEX-FIX-NEXT: bne .LBB8_1
207 ; CHECK-CORTEX-FIX-NEXT: @ %bb.2:
208 ; CHECK-CORTEX-FIX-NEXT: bx lr
209 %4 = icmp eq i32 %0, 0
210 br i1 %4, label %5, label %6
216 %7 = phi i32 [ %12, %6 ], [ 0, %3 ]
217 %8 = load <16 x i8>, <16 x i8>* %2, align 8
218 %9 = load <16 x i8>, <16 x i8>* %1, align 8
219 %10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %9)
220 %11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10)
221 store <16 x i8> %11, <16 x i8>* %2, align 8
222 %12 = add nuw i32 %7, 1
223 %13 = icmp eq i32 %12, %0
224 br i1 %13, label %5, label %6
227 define arm_aapcs_vfpcc <16 x i8> @aese_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind {
228 ; CHECK-FIX-LABEL: aese_loop_via_val:
229 ; CHECK-FIX: @ %bb.0:
230 ; CHECK-FIX-NEXT: vorr q1, q1, q1
231 ; CHECK-FIX-NEXT: vorr q0, q0, q0
232 ; CHECK-FIX-NEXT: cmp r0, #0
233 ; CHECK-FIX-NEXT: beq .LBB9_2
234 ; CHECK-FIX-NEXT: .LBB9_1: @ =>This Inner Loop Header: Depth=1
235 ; CHECK-FIX-NEXT: aese.8 q1, q0
236 ; CHECK-FIX-NEXT: subs r0, r0, #1
237 ; CHECK-FIX-NEXT: aesmc.8 q1, q1
238 ; CHECK-FIX-NEXT: bne .LBB9_1
239 ; CHECK-FIX-NEXT: .LBB9_2:
240 ; CHECK-FIX-NEXT: vorr q0, q1, q1
241 ; CHECK-FIX-NEXT: bx lr
242 %4 = icmp eq i32 %0, 0
243 br i1 %4, label %5, label %7
246 %6 = phi <16 x i8> [ %2, %3 ], [ %11, %7 ]
250 %8 = phi i32 [ %12, %7 ], [ 0, %3 ]
251 %9 = phi <16 x i8> [ %11, %7 ], [ %2, %3 ]
252 %10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %9, <16 x i8> %1)
253 %11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10)
254 %12 = add nuw i32 %8, 1
255 %13 = icmp eq i32 %12, %0
256 br i1 %13, label %5, label %7
259 define arm_aapcs_vfpcc void @aese_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
260 ; CHECK-FIX-NOSCHED-LABEL: aese_set8_via_ptr:
261 ; CHECK-FIX-NOSCHED: @ %bb.0:
262 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
263 ; CHECK-FIX-NOSCHED-NEXT: ldrb r0, [r0]
264 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
265 ; CHECK-FIX-NOSCHED-NEXT: vmov.8 d0[0], r0
266 ; CHECK-FIX-NOSCHED-NEXT: vmov.8 d16[0], r0
267 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
268 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
269 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
270 ; CHECK-FIX-NOSCHED-NEXT: bx lr
272 ; CHECK-CORTEX-FIX-LABEL: aese_set8_via_ptr:
273 ; CHECK-CORTEX-FIX: @ %bb.0:
274 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
275 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
276 ; CHECK-CORTEX-FIX-NEXT: ldrb r0, [r0]
277 ; CHECK-CORTEX-FIX-NEXT: vmov.8 d0[0], r0
278 ; CHECK-CORTEX-FIX-NEXT: vmov.8 d16[0], r0
279 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
280 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
281 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
282 ; CHECK-CORTEX-FIX-NEXT: bx lr
283 %4 = load i8, i8* %0, align 1
284 %5 = load <16 x i8>, <16 x i8>* %2, align 8
285 %6 = insertelement <16 x i8> %5, i8 %4, i64 0
286 %7 = insertelement <16 x i8> %1, i8 %4, i64 0
287 %8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7)
288 %9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8)
289 store <16 x i8> %9, <16 x i8>* %2, align 8
293 define arm_aapcs_vfpcc void @aese_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
294 ; CHECK-FIX-LABEL: aese_set8_via_val:
295 ; CHECK-FIX: @ %bb.0:
296 ; CHECK-FIX-NEXT: vorr q0, q0, q0
297 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
298 ; CHECK-FIX-NEXT: vmov.8 d0[0], r0
299 ; CHECK-FIX-NEXT: vmov.8 d16[0], r0
300 ; CHECK-FIX-NEXT: aese.8 q8, q0
301 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
302 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
303 ; CHECK-FIX-NEXT: bx lr
304 %4 = load <16 x i8>, <16 x i8>* %2, align 8
305 %5 = insertelement <16 x i8> %4, i8 %0, i64 0
306 %6 = insertelement <16 x i8> %1, i8 %0, i64 0
307 %7 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %5, <16 x i8> %6)
308 %8 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %7)
309 store <16 x i8> %8, <16 x i8>* %2, align 8
313 define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
314 ; CHECK-FIX-LABEL: aese_set8_cond_via_ptr:
315 ; CHECK-FIX: @ %bb.0:
316 ; CHECK-FIX-NEXT: vorr q0, q0, q0
317 ; CHECK-FIX-NEXT: cmp r0, #0
318 ; CHECK-FIX-NEXT: beq .LBB12_2
319 ; CHECK-FIX-NEXT: @ %bb.1:
320 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
321 ; CHECK-FIX-NEXT: vld1.8 {d16[0]}, [r1]
322 ; CHECK-FIX-NEXT: cmp r0, #0
323 ; CHECK-FIX-NEXT: bne .LBB12_3
324 ; CHECK-FIX-NEXT: b .LBB12_4
325 ; CHECK-FIX-NEXT: .LBB12_2:
326 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
327 ; CHECK-FIX-NEXT: cmp r0, #0
328 ; CHECK-FIX-NEXT: beq .LBB12_4
329 ; CHECK-FIX-NEXT: .LBB12_3:
330 ; CHECK-FIX-NEXT: vld1.8 {d0[0]}, [r1]
331 ; CHECK-FIX-NEXT: .LBB12_4:
332 ; CHECK-FIX-NEXT: aese.8 q8, q0
333 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
334 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
335 ; CHECK-FIX-NEXT: bx lr
336 br i1 %0, label %5, label %9
339 %6 = load i8, i8* %1, align 1
340 %7 = load <16 x i8>, <16 x i8>* %3, align 8
341 %8 = insertelement <16 x i8> %7, i8 %6, i64 0
345 %10 = load <16 x i8>, <16 x i8>* %3, align 8
349 %12 = phi <16 x i8> [ %8, %5 ], [ %10, %9 ]
350 br i1 %0, label %13, label %16
353 %14 = load i8, i8* %1, align 1
354 %15 = insertelement <16 x i8> %2, i8 %14, i64 0
358 %17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ]
359 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %17)
360 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
361 store <16 x i8> %19, <16 x i8>* %3, align 8
365 define arm_aapcs_vfpcc void @aese_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
366 ; CHECK-FIX-LABEL: aese_set8_cond_via_val:
367 ; CHECK-FIX: @ %bb.0:
368 ; CHECK-FIX-NEXT: vorr q0, q0, q0
369 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
370 ; CHECK-FIX-NEXT: cmp r0, #0
371 ; CHECK-FIX-NEXT: beq .LBB13_2
372 ; CHECK-FIX-NEXT: @ %bb.1:
373 ; CHECK-FIX-NEXT: vmov.8 d16[0], r1
374 ; CHECK-FIX-NEXT: .LBB13_2: @ %select.end
375 ; CHECK-FIX-NEXT: cmp r0, #0
376 ; CHECK-FIX-NEXT: beq .LBB13_4
377 ; CHECK-FIX-NEXT: @ %bb.3:
378 ; CHECK-FIX-NEXT: vmov.8 d0[0], r1
379 ; CHECK-FIX-NEXT: .LBB13_4: @ %select.end2
380 ; CHECK-FIX-NEXT: aese.8 q8, q0
381 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
382 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
383 ; CHECK-FIX-NEXT: bx lr
384 %5 = load <16 x i8>, <16 x i8>* %3, align 8
385 %6 = insertelement <16 x i8> %5, i8 %1, i64 0
386 %7 = select i1 %0, <16 x i8> %6, <16 x i8> %5
387 %8 = insertelement <16 x i8> %2, i8 %1, i64 0
388 %9 = select i1 %0, <16 x i8> %8, <16 x i8> %2
389 %10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %9)
390 %11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10)
391 store <16 x i8> %11, <16 x i8>* %3, align 8
395 define arm_aapcs_vfpcc void @aese_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
396 ; CHECK-FIX-LABEL: aese_set8_loop_via_ptr:
397 ; CHECK-FIX: @ %bb.0:
398 ; CHECK-FIX-NEXT: vorr q0, q0, q0
399 ; CHECK-FIX-NEXT: ldrb r1, [r1]
400 ; CHECK-FIX-NEXT: cmp r0, #0
401 ; CHECK-FIX-NEXT: strb r1, [r2]
402 ; CHECK-FIX-NEXT: bxeq lr
403 ; CHECK-FIX-NEXT: .LBB14_1:
404 ; CHECK-FIX-NEXT: vmov.8 d0[0], r1
405 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
406 ; CHECK-FIX-NEXT: .LBB14_2: @ =>This Inner Loop Header: Depth=1
407 ; CHECK-FIX-NEXT: aese.8 q8, q0
408 ; CHECK-FIX-NEXT: subs r0, r0, #1
409 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
410 ; CHECK-FIX-NEXT: bne .LBB14_2
411 ; CHECK-FIX-NEXT: @ %bb.3:
412 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
413 ; CHECK-FIX-NEXT: bx lr
414 %5 = load i8, i8* %1, align 1
415 %6 = insertelement <16 x i8> %2, i8 %5, i64 0
416 %7 = getelementptr inbounds <16 x i8>, <16 x i8>* %3, i32 0, i32 0
417 store i8 %5, i8* %7, align 8
418 %8 = icmp eq i32 %0, 0
419 br i1 %8, label %12, label %9
422 %10 = load <16 x i8>, <16 x i8>* %3, align 8
426 store <16 x i8> %17, <16 x i8>* %3, align 8
433 %14 = phi <16 x i8> [ %10, %9 ], [ %17, %13 ]
434 %15 = phi i32 [ 0, %9 ], [ %18, %13 ]
435 %16 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %14, <16 x i8> %6)
436 %17 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %16)
437 %18 = add nuw i32 %15, 1
438 %19 = icmp eq i32 %18, %0
439 br i1 %19, label %11, label %13
442 define arm_aapcs_vfpcc void @aese_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
443 ; CHECK-FIX-LABEL: aese_set8_loop_via_val:
444 ; CHECK-FIX: @ %bb.0:
445 ; CHECK-FIX-NEXT: vorr q0, q0, q0
446 ; CHECK-FIX-NEXT: cmp r0, #0
447 ; CHECK-FIX-NEXT: bxeq lr
448 ; CHECK-FIX-NEXT: .LBB15_1:
449 ; CHECK-FIX-NEXT: vmov.8 d0[0], r1
450 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
451 ; CHECK-FIX-NEXT: .LBB15_2: @ =>This Inner Loop Header: Depth=1
452 ; CHECK-FIX-NEXT: vmov.8 d16[0], r1
453 ; CHECK-FIX-NEXT: subs r0, r0, #1
454 ; CHECK-FIX-NEXT: aese.8 q8, q0
455 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
456 ; CHECK-FIX-NEXT: bne .LBB15_2
457 ; CHECK-FIX-NEXT: @ %bb.3:
458 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
459 ; CHECK-FIX-NEXT: bx lr
460 %5 = icmp eq i32 %0, 0
461 br i1 %5, label %10, label %6
464 %7 = insertelement <16 x i8> %2, i8 %1, i64 0
465 %8 = load <16 x i8>, <16 x i8>* %3, align 8
469 store <16 x i8> %16, <16 x i8>* %3, align 8
476 %12 = phi <16 x i8> [ %8, %6 ], [ %16, %11 ]
477 %13 = phi i32 [ 0, %6 ], [ %17, %11 ]
478 %14 = insertelement <16 x i8> %12, i8 %1, i64 0
479 %15 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %14, <16 x i8> %7)
480 %16 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %15)
481 %17 = add nuw i32 %13, 1
482 %18 = icmp eq i32 %17, %0
483 br i1 %18, label %9, label %11
486 define arm_aapcs_vfpcc void @aese_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
487 ; CHECK-FIX-NOSCHED-LABEL: aese_set16_via_ptr:
488 ; CHECK-FIX-NOSCHED: @ %bb.0:
489 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
490 ; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
491 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
492 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
493 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0
494 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
495 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
496 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
497 ; CHECK-FIX-NOSCHED-NEXT: bx lr
499 ; CHECK-CORTEX-FIX-LABEL: aese_set16_via_ptr:
500 ; CHECK-CORTEX-FIX: @ %bb.0:
501 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
502 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
503 ; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
504 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
505 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0
506 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
507 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
508 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
509 ; CHECK-CORTEX-FIX-NEXT: bx lr
510 %4 = load i16, i16* %0, align 2
511 %5 = bitcast <16 x i8>* %2 to <8 x i16>*
512 %6 = load <8 x i16>, <8 x i16>* %5, align 8
513 %7 = insertelement <8 x i16> %6, i16 %4, i64 0
514 %8 = bitcast <8 x i16> %7 to <16 x i8>
515 %9 = bitcast <16 x i8> %1 to <8 x i16>
516 %10 = insertelement <8 x i16> %9, i16 %4, i64 0
517 %11 = bitcast <8 x i16> %10 to <16 x i8>
518 %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
519 %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
520 store <16 x i8> %13, <16 x i8>* %2, align 8
524 define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
525 ; CHECK-FIX-LABEL: aese_set16_via_val:
526 ; CHECK-FIX: @ %bb.0:
527 ; CHECK-FIX-NEXT: vorr q0, q0, q0
528 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
529 ; CHECK-FIX-NEXT: vmov.16 d0[0], r0
530 ; CHECK-FIX-NEXT: vmov.16 d16[0], r0
531 ; CHECK-FIX-NEXT: aese.8 q8, q0
532 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
533 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
534 ; CHECK-FIX-NEXT: bx lr
535 %4 = bitcast <16 x i8>* %2 to <8 x i16>*
536 %5 = load <8 x i16>, <8 x i16>* %4, align 8
537 %6 = insertelement <8 x i16> %5, i16 %0, i64 0
538 %7 = bitcast <8 x i16> %6 to <16 x i8>
539 %8 = bitcast <16 x i8> %1 to <8 x i16>
540 %9 = insertelement <8 x i16> %8, i16 %0, i64 0
541 %10 = bitcast <8 x i16> %9 to <16 x i8>
542 %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
543 %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
544 store <16 x i8> %12, <16 x i8>* %2, align 8
548 define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
549 ; CHECK-FIX-LABEL: aese_set16_cond_via_ptr:
550 ; CHECK-FIX: @ %bb.0:
551 ; CHECK-FIX-NEXT: vorr q0, q0, q0
552 ; CHECK-FIX-NEXT: cmp r0, #0
553 ; CHECK-FIX-NEXT: beq .LBB18_2
554 ; CHECK-FIX-NEXT: @ %bb.1:
555 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
556 ; CHECK-FIX-NEXT: vld1.16 {d16[0]}, [r1:16]
557 ; CHECK-FIX-NEXT: cmp r0, #0
558 ; CHECK-FIX-NEXT: bne .LBB18_3
559 ; CHECK-FIX-NEXT: b .LBB18_4
560 ; CHECK-FIX-NEXT: .LBB18_2:
561 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
562 ; CHECK-FIX-NEXT: cmp r0, #0
563 ; CHECK-FIX-NEXT: beq .LBB18_4
564 ; CHECK-FIX-NEXT: .LBB18_3:
565 ; CHECK-FIX-NEXT: vld1.16 {d0[0]}, [r1:16]
566 ; CHECK-FIX-NEXT: .LBB18_4:
567 ; CHECK-FIX-NEXT: aese.8 q8, q0
568 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
569 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
570 ; CHECK-FIX-NEXT: bx lr
571 br i1 %0, label %5, label %10
574 %6 = load i16, i16* %1, align 2
575 %7 = bitcast <16 x i8>* %3 to <8 x i16>*
576 %8 = load <8 x i16>, <8 x i16>* %7, align 8
577 %9 = insertelement <8 x i16> %8, i16 %6, i64 0
581 %11 = bitcast <16 x i8>* %3 to <8 x i16>*
582 %12 = load <8 x i16>, <8 x i16>* %11, align 8
586 %14 = phi <8 x i16> [ %9, %5 ], [ %12, %10 ]
587 br i1 %0, label %15, label %19
590 %16 = load i16, i16* %1, align 2
591 %17 = bitcast <16 x i8> %2 to <8 x i16>
592 %18 = insertelement <8 x i16> %17, i16 %16, i64 0
596 %20 = bitcast <16 x i8> %2 to <8 x i16>
600 %22 = phi <8 x i16> [ %18, %15 ], [ %20, %19 ]
601 %23 = bitcast <8 x i16> %14 to <16 x i8>
602 %24 = bitcast <8 x i16> %22 to <16 x i8>
603 %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
604 %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
605 store <16 x i8> %26, <16 x i8>* %3, align 8
609 define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
610 ; CHECK-FIX-LABEL: aese_set16_cond_via_val:
611 ; CHECK-FIX: @ %bb.0:
612 ; CHECK-FIX-NEXT: vorr q0, q0, q0
613 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
614 ; CHECK-FIX-NEXT: cmp r0, #0
615 ; CHECK-FIX-NEXT: beq .LBB19_2
616 ; CHECK-FIX-NEXT: @ %bb.1:
617 ; CHECK-FIX-NEXT: vmov.16 d16[0], r1
618 ; CHECK-FIX-NEXT: .LBB19_2: @ %select.end
619 ; CHECK-FIX-NEXT: cmp r0, #0
620 ; CHECK-FIX-NEXT: beq .LBB19_4
621 ; CHECK-FIX-NEXT: @ %bb.3:
622 ; CHECK-FIX-NEXT: vmov.16 d0[0], r1
623 ; CHECK-FIX-NEXT: .LBB19_4: @ %select.end2
624 ; CHECK-FIX-NEXT: aese.8 q8, q0
625 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
626 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
627 ; CHECK-FIX-NEXT: bx lr
628 %5 = bitcast <16 x i8>* %3 to <8 x i16>*
629 %6 = load <8 x i16>, <8 x i16>* %5, align 8
630 %7 = insertelement <8 x i16> %6, i16 %1, i64 0
631 %8 = select i1 %0, <8 x i16> %7, <8 x i16> %6
632 %9 = bitcast <16 x i8> %2 to <8 x i16>
633 %10 = insertelement <8 x i16> %9, i16 %1, i64 0
634 %11 = select i1 %0, <8 x i16> %10, <8 x i16> %9
635 %12 = bitcast <8 x i16> %8 to <16 x i8>
636 %13 = bitcast <8 x i16> %11 to <16 x i8>
637 %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
638 %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
639 store <16 x i8> %15, <16 x i8>* %3, align 8
643 define arm_aapcs_vfpcc void @aese_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
644 ; CHECK-FIX-LABEL: aese_set16_loop_via_ptr:
645 ; CHECK-FIX: @ %bb.0:
646 ; CHECK-FIX-NEXT: vorr q0, q0, q0
647 ; CHECK-FIX-NEXT: ldrh r1, [r1]
648 ; CHECK-FIX-NEXT: cmp r0, #0
649 ; CHECK-FIX-NEXT: strh r1, [r2]
650 ; CHECK-FIX-NEXT: bxeq lr
651 ; CHECK-FIX-NEXT: .LBB20_1:
652 ; CHECK-FIX-NEXT: vmov.16 d0[0], r1
653 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
654 ; CHECK-FIX-NEXT: .LBB20_2: @ =>This Inner Loop Header: Depth=1
655 ; CHECK-FIX-NEXT: aese.8 q8, q0
656 ; CHECK-FIX-NEXT: subs r0, r0, #1
657 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
658 ; CHECK-FIX-NEXT: bne .LBB20_2
659 ; CHECK-FIX-NEXT: @ %bb.3:
660 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
661 ; CHECK-FIX-NEXT: bx lr
662 %5 = load i16, i16* %1, align 2
663 %6 = bitcast <16 x i8> %2 to <8 x i16>
664 %7 = insertelement <8 x i16> %6, i16 %5, i64 0
665 %8 = bitcast <8 x i16> %7 to <16 x i8>
666 %9 = bitcast <16 x i8>* %3 to i16*
667 store i16 %5, i16* %9, align 8
668 %10 = icmp eq i32 %0, 0
669 br i1 %10, label %14, label %11
672 %12 = load <16 x i8>, <16 x i8>* %3, align 8
676 store <16 x i8> %19, <16 x i8>* %3, align 8
683 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
684 %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
685 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
686 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
687 %20 = add nuw i32 %17, 1
688 %21 = icmp eq i32 %20, %0
689 br i1 %21, label %13, label %15
692 define arm_aapcs_vfpcc void @aese_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
693 ; CHECK-FIX-LABEL: aese_set16_loop_via_val:
694 ; CHECK-FIX: @ %bb.0:
695 ; CHECK-FIX-NEXT: vorr q0, q0, q0
696 ; CHECK-FIX-NEXT: cmp r0, #0
697 ; CHECK-FIX-NEXT: bxeq lr
698 ; CHECK-FIX-NEXT: .LBB21_1:
699 ; CHECK-FIX-NEXT: vmov.16 d0[0], r1
700 ; CHECK-FIX-NEXT: .LBB21_2: @ =>This Inner Loop Header: Depth=1
701 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
702 ; CHECK-FIX-NEXT: subs r0, r0, #1
703 ; CHECK-FIX-NEXT: vmov.16 d16[0], r1
704 ; CHECK-FIX-NEXT: aese.8 q8, q0
705 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
706 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
707 ; CHECK-FIX-NEXT: bne .LBB21_2
708 ; CHECK-FIX-NEXT: @ %bb.3:
709 ; CHECK-FIX-NEXT: bx lr
710 %5 = icmp eq i32 %0, 0
711 br i1 %5, label %12, label %6
714 %7 = bitcast <16 x i8> %2 to <8 x i16>
715 %8 = insertelement <8 x i16> %7, i16 %1, i64 0
716 %9 = bitcast <8 x i16> %8 to <16 x i8>
717 %10 = bitcast <16 x i8>* %3 to <8 x i16>*
718 %11 = bitcast <16 x i8>* %3 to i16*
725 %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
726 %15 = load <8 x i16>, <8 x i16>* %10, align 8
727 %16 = insertelement <8 x i16> %15, i16 %1, i64 0
728 %17 = bitcast <8 x i16> %16 to <16 x i8>
729 store i16 %1, i16* %11, align 8
730 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
731 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
732 store <16 x i8> %19, <16 x i8>* %3, align 8
733 %20 = add nuw i32 %14, 1
734 %21 = icmp eq i32 %20, %0
735 br i1 %21, label %12, label %13
738 define arm_aapcs_vfpcc void @aese_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
739 ; CHECK-FIX-NOSCHED-LABEL: aese_set32_via_ptr:
740 ; CHECK-FIX-NOSCHED: @ %bb.0:
741 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
742 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [r0]
743 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
744 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d0[0], r0
745 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
746 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
747 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
748 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
749 ; CHECK-FIX-NOSCHED-NEXT: bx lr
751 ; CHECK-CORTEX-FIX-LABEL: aese_set32_via_ptr:
752 ; CHECK-CORTEX-FIX: @ %bb.0:
753 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
754 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
755 ; CHECK-CORTEX-FIX-NEXT: ldr r0, [r0]
756 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d0[0], r0
757 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r0
758 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
759 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
760 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
761 ; CHECK-CORTEX-FIX-NEXT: bx lr
762 %4 = load i32, i32* %0, align 4
763 %5 = bitcast <16 x i8>* %2 to <4 x i32>*
764 %6 = load <4 x i32>, <4 x i32>* %5, align 8
765 %7 = insertelement <4 x i32> %6, i32 %4, i64 0
766 %8 = bitcast <4 x i32> %7 to <16 x i8>
767 %9 = bitcast <16 x i8> %1 to <4 x i32>
768 %10 = insertelement <4 x i32> %9, i32 %4, i64 0
769 %11 = bitcast <4 x i32> %10 to <16 x i8>
770 %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
771 %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
772 store <16 x i8> %13, <16 x i8>* %2, align 8
776 define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
777 ; CHECK-FIX-LABEL: aese_set32_via_val:
778 ; CHECK-FIX: @ %bb.0:
779 ; CHECK-FIX-NEXT: vorr q0, q0, q0
780 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
781 ; CHECK-FIX-NEXT: vmov.32 d0[0], r0
782 ; CHECK-FIX-NEXT: vmov.32 d16[0], r0
783 ; CHECK-FIX-NEXT: aese.8 q8, q0
784 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
785 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
786 ; CHECK-FIX-NEXT: bx lr
787 %4 = bitcast <16 x i8>* %2 to <4 x i32>*
788 %5 = load <4 x i32>, <4 x i32>* %4, align 8
789 %6 = insertelement <4 x i32> %5, i32 %0, i64 0
790 %7 = bitcast <4 x i32> %6 to <16 x i8>
791 %8 = bitcast <16 x i8> %1 to <4 x i32>
792 %9 = insertelement <4 x i32> %8, i32 %0, i64 0
793 %10 = bitcast <4 x i32> %9 to <16 x i8>
794 %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
795 %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
796 store <16 x i8> %12, <16 x i8>* %2, align 8
800 define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
801 ; CHECK-FIX-LABEL: aese_set32_cond_via_ptr:
802 ; CHECK-FIX: @ %bb.0:
803 ; CHECK-FIX-NEXT: vorr q0, q0, q0
804 ; CHECK-FIX-NEXT: cmp r0, #0
805 ; CHECK-FIX-NEXT: beq .LBB24_2
806 ; CHECK-FIX-NEXT: @ %bb.1:
807 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
808 ; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
809 ; CHECK-FIX-NEXT: cmp r0, #0
810 ; CHECK-FIX-NEXT: bne .LBB24_3
811 ; CHECK-FIX-NEXT: b .LBB24_4
812 ; CHECK-FIX-NEXT: .LBB24_2:
813 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
814 ; CHECK-FIX-NEXT: cmp r0, #0
815 ; CHECK-FIX-NEXT: beq .LBB24_4
816 ; CHECK-FIX-NEXT: .LBB24_3:
817 ; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
818 ; CHECK-FIX-NEXT: .LBB24_4:
819 ; CHECK-FIX-NEXT: aese.8 q8, q0
820 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
821 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
822 ; CHECK-FIX-NEXT: bx lr
823 br i1 %0, label %5, label %10
826 %6 = load i32, i32* %1, align 4
827 %7 = bitcast <16 x i8>* %3 to <4 x i32>*
828 %8 = load <4 x i32>, <4 x i32>* %7, align 8
829 %9 = insertelement <4 x i32> %8, i32 %6, i64 0
833 %11 = bitcast <16 x i8>* %3 to <4 x i32>*
834 %12 = load <4 x i32>, <4 x i32>* %11, align 8
838 %14 = phi <4 x i32> [ %9, %5 ], [ %12, %10 ]
839 br i1 %0, label %15, label %19
842 %16 = load i32, i32* %1, align 4
843 %17 = bitcast <16 x i8> %2 to <4 x i32>
844 %18 = insertelement <4 x i32> %17, i32 %16, i64 0
848 %20 = bitcast <16 x i8> %2 to <4 x i32>
852 %22 = phi <4 x i32> [ %18, %15 ], [ %20, %19 ]
853 %23 = bitcast <4 x i32> %14 to <16 x i8>
854 %24 = bitcast <4 x i32> %22 to <16 x i8>
855 %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
856 %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
857 store <16 x i8> %26, <16 x i8>* %3, align 8
861 define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
862 ; CHECK-FIX-LABEL: aese_set32_cond_via_val:
863 ; CHECK-FIX: @ %bb.0:
864 ; CHECK-FIX-NEXT: vorr q0, q0, q0
865 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
866 ; CHECK-FIX-NEXT: cmp r0, #0
867 ; CHECK-FIX-NEXT: beq .LBB25_2
868 ; CHECK-FIX-NEXT: @ %bb.1:
869 ; CHECK-FIX-NEXT: vmov.32 d16[0], r1
870 ; CHECK-FIX-NEXT: .LBB25_2: @ %select.end
871 ; CHECK-FIX-NEXT: cmp r0, #0
872 ; CHECK-FIX-NEXT: beq .LBB25_4
873 ; CHECK-FIX-NEXT: @ %bb.3:
874 ; CHECK-FIX-NEXT: vmov.32 d0[0], r1
875 ; CHECK-FIX-NEXT: .LBB25_4: @ %select.end2
876 ; CHECK-FIX-NEXT: aese.8 q8, q0
877 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
878 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
879 ; CHECK-FIX-NEXT: bx lr
880 %5 = bitcast <16 x i8>* %3 to <4 x i32>*
881 %6 = load <4 x i32>, <4 x i32>* %5, align 8
882 %7 = insertelement <4 x i32> %6, i32 %1, i64 0
883 %8 = select i1 %0, <4 x i32> %7, <4 x i32> %6
884 %9 = bitcast <16 x i8> %2 to <4 x i32>
885 %10 = insertelement <4 x i32> %9, i32 %1, i64 0
886 %11 = select i1 %0, <4 x i32> %10, <4 x i32> %9
887 %12 = bitcast <4 x i32> %8 to <16 x i8>
888 %13 = bitcast <4 x i32> %11 to <16 x i8>
889 %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
890 %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
891 store <16 x i8> %15, <16 x i8>* %3, align 8
895 define arm_aapcs_vfpcc void @aese_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
896 ; CHECK-FIX-LABEL: aese_set32_loop_via_ptr:
897 ; CHECK-FIX: @ %bb.0:
898 ; CHECK-FIX-NEXT: vorr q0, q0, q0
899 ; CHECK-FIX-NEXT: ldr r1, [r1]
900 ; CHECK-FIX-NEXT: cmp r0, #0
901 ; CHECK-FIX-NEXT: str r1, [r2]
902 ; CHECK-FIX-NEXT: bxeq lr
903 ; CHECK-FIX-NEXT: .LBB26_1:
904 ; CHECK-FIX-NEXT: vmov.32 d0[0], r1
905 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
906 ; CHECK-FIX-NEXT: .LBB26_2: @ =>This Inner Loop Header: Depth=1
907 ; CHECK-FIX-NEXT: aese.8 q8, q0
908 ; CHECK-FIX-NEXT: subs r0, r0, #1
909 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
910 ; CHECK-FIX-NEXT: bne .LBB26_2
911 ; CHECK-FIX-NEXT: @ %bb.3:
912 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
913 ; CHECK-FIX-NEXT: bx lr
914 %5 = load i32, i32* %1, align 4
915 %6 = bitcast <16 x i8> %2 to <4 x i32>
916 %7 = insertelement <4 x i32> %6, i32 %5, i64 0
917 %8 = bitcast <4 x i32> %7 to <16 x i8>
918 %9 = bitcast <16 x i8>* %3 to i32*
919 store i32 %5, i32* %9, align 8
920 %10 = icmp eq i32 %0, 0
921 br i1 %10, label %14, label %11
924 %12 = load <16 x i8>, <16 x i8>* %3, align 8
928 store <16 x i8> %19, <16 x i8>* %3, align 8
935 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
936 %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
937 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
938 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
939 %20 = add nuw i32 %17, 1
940 %21 = icmp eq i32 %20, %0
941 br i1 %21, label %13, label %15
944 define arm_aapcs_vfpcc void @aese_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
945 ; CHECK-FIX-LABEL: aese_set32_loop_via_val:
946 ; CHECK-FIX: @ %bb.0:
947 ; CHECK-FIX-NEXT: vorr q0, q0, q0
948 ; CHECK-FIX-NEXT: cmp r0, #0
949 ; CHECK-FIX-NEXT: bxeq lr
950 ; CHECK-FIX-NEXT: .LBB27_1:
951 ; CHECK-FIX-NEXT: vmov.32 d0[0], r1
952 ; CHECK-FIX-NEXT: .LBB27_2: @ =>This Inner Loop Header: Depth=1
953 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
954 ; CHECK-FIX-NEXT: subs r0, r0, #1
955 ; CHECK-FIX-NEXT: vmov.32 d16[0], r1
956 ; CHECK-FIX-NEXT: aese.8 q8, q0
957 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
958 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
959 ; CHECK-FIX-NEXT: bne .LBB27_2
960 ; CHECK-FIX-NEXT: @ %bb.3:
961 ; CHECK-FIX-NEXT: bx lr
962 %5 = icmp eq i32 %0, 0
963 br i1 %5, label %12, label %6
966 %7 = bitcast <16 x i8> %2 to <4 x i32>
967 %8 = insertelement <4 x i32> %7, i32 %1, i64 0
968 %9 = bitcast <4 x i32> %8 to <16 x i8>
969 %10 = bitcast <16 x i8>* %3 to <4 x i32>*
970 %11 = bitcast <16 x i8>* %3 to i32*
977 %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
978 %15 = load <4 x i32>, <4 x i32>* %10, align 8
979 %16 = insertelement <4 x i32> %15, i32 %1, i64 0
980 %17 = bitcast <4 x i32> %16 to <16 x i8>
981 store i32 %1, i32* %11, align 8
982 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
983 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
984 store <16 x i8> %19, <16 x i8>* %3, align 8
985 %20 = add nuw i32 %14, 1
986 %21 = icmp eq i32 %20, %0
987 br i1 %21, label %12, label %13
990 define arm_aapcs_vfpcc void @aese_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
991 ; CHECK-FIX-NOSCHED-LABEL: aese_set64_via_ptr:
992 ; CHECK-FIX-NOSCHED: @ %bb.0:
993 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
994 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
995 ; CHECK-FIX-NOSCHED-NEXT: vldr d0, [r0]
996 ; CHECK-FIX-NOSCHED-NEXT: vorr d16, d0, d0
997 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
998 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
999 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
1000 ; CHECK-FIX-NOSCHED-NEXT: bx lr
1002 ; CHECK-CORTEX-FIX-LABEL: aese_set64_via_ptr:
1003 ; CHECK-CORTEX-FIX: @ %bb.0:
1004 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
1005 ; CHECK-CORTEX-FIX-NEXT: vldr d0, [r0]
1006 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
1007 ; CHECK-CORTEX-FIX-NEXT: vorr d16, d0, d0
1008 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
1009 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
1010 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
1011 ; CHECK-CORTEX-FIX-NEXT: bx lr
1012 %4 = load i64, i64* %0, align 8
1013 %5 = bitcast <16 x i8>* %2 to <2 x i64>*
1014 %6 = load <2 x i64>, <2 x i64>* %5, align 8
1015 %7 = insertelement <2 x i64> %6, i64 %4, i64 0
1016 %8 = bitcast <2 x i64> %7 to <16 x i8>
1017 %9 = bitcast <16 x i8> %1 to <2 x i64>
1018 %10 = insertelement <2 x i64> %9, i64 %4, i64 0
1019 %11 = bitcast <2 x i64> %10 to <16 x i8>
1020 %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
1021 %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
1022 store <16 x i8> %13, <16 x i8>* %2, align 8
1026 define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
1027 ; CHECK-FIX-LABEL: aese_set64_via_val:
1028 ; CHECK-FIX: @ %bb.0:
1029 ; CHECK-FIX-NEXT: vorr q0, q0, q0
1030 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
1031 ; CHECK-FIX-NEXT: vmov.32 d0[0], r0
1032 ; CHECK-FIX-NEXT: vmov.32 d16[0], r0
1033 ; CHECK-FIX-NEXT: vmov.32 d0[1], r1
1034 ; CHECK-FIX-NEXT: vmov.32 d16[1], r1
1035 ; CHECK-FIX-NEXT: aese.8 q8, q0
1036 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
1037 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
1038 ; CHECK-FIX-NEXT: bx lr
1039 %4 = bitcast <16 x i8>* %2 to <2 x i64>*
1040 %5 = load <2 x i64>, <2 x i64>* %4, align 8
1041 %6 = insertelement <2 x i64> %5, i64 %0, i64 0
1042 %7 = bitcast <2 x i64> %6 to <16 x i8>
1043 %8 = bitcast <16 x i8> %1 to <2 x i64>
1044 %9 = insertelement <2 x i64> %8, i64 %0, i64 0
1045 %10 = bitcast <2 x i64> %9 to <16 x i8>
1046 %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
1047 %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
1048 store <16 x i8> %12, <16 x i8>* %2, align 8
1052 define arm_aapcs_vfpcc void @aese_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
1053 ; CHECK-FIX-NOSCHED-LABEL: aese_set64_cond_via_ptr:
1054 ; CHECK-FIX-NOSCHED: @ %bb.0:
1055 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
1056 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB30_2
1057 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
1058 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
1059 ; CHECK-FIX-NOSCHED-NEXT: vldr d16, [r1]
1060 ; CHECK-FIX-NOSCHED-NEXT: b .LBB30_3
1061 ; CHECK-FIX-NOSCHED-NEXT: .LBB30_2:
1062 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
1063 ; CHECK-FIX-NOSCHED-NEXT: .LBB30_3:
1064 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
1065 ; CHECK-FIX-NOSCHED-NEXT: vldrne d0, [r1]
1066 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
1067 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
1068 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
1069 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
1070 ; CHECK-FIX-NOSCHED-NEXT: bx lr
1072 ; CHECK-CORTEX-FIX-LABEL: aese_set64_cond_via_ptr:
1073 ; CHECK-CORTEX-FIX: @ %bb.0:
1074 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
1075 ; CHECK-CORTEX-FIX-NEXT: beq .LBB30_2
1076 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
1077 ; CHECK-CORTEX-FIX-NEXT: vldr d18, [r1]
1078 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
1079 ; CHECK-CORTEX-FIX-NEXT: vorr d16, d18, d18
1080 ; CHECK-CORTEX-FIX-NEXT: b .LBB30_3
1081 ; CHECK-CORTEX-FIX-NEXT: .LBB30_2:
1082 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
1083 ; CHECK-CORTEX-FIX-NEXT: .LBB30_3:
1084 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
1085 ; CHECK-CORTEX-FIX-NEXT: vldrne d0, [r1]
1086 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
1087 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
1088 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
1089 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
1090 ; CHECK-CORTEX-FIX-NEXT: bx lr
1091 br i1 %0, label %5, label %10
1094 %6 = load i64, i64* %1, align 8
1095 %7 = bitcast <16 x i8>* %3 to <2 x i64>*
1096 %8 = load <2 x i64>, <2 x i64>* %7, align 8
1097 %9 = insertelement <2 x i64> %8, i64 %6, i64 0
1101 %11 = bitcast <16 x i8>* %3 to <2 x i64>*
1102 %12 = load <2 x i64>, <2 x i64>* %11, align 8
1106 %14 = phi <2 x i64> [ %9, %5 ], [ %12, %10 ]
1107 br i1 %0, label %15, label %19
1110 %16 = load i64, i64* %1, align 8
1111 %17 = bitcast <16 x i8> %2 to <2 x i64>
1112 %18 = insertelement <2 x i64> %17, i64 %16, i64 0
1116 %20 = bitcast <16 x i8> %2 to <2 x i64>
1120 %22 = phi <2 x i64> [ %18, %15 ], [ %20, %19 ]
1121 %23 = bitcast <2 x i64> %14 to <16 x i8>
1122 %24 = bitcast <2 x i64> %22 to <16 x i8>
1123 %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
1124 %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
1125 store <16 x i8> %26, <16 x i8>* %3, align 8
1129 define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
1130 ; CHECK-FIX-LABEL: aese_set64_cond_via_val:
1131 ; CHECK-FIX: @ %bb.0:
1132 ; CHECK-FIX-NEXT: vorr q0, q0, q0
1133 ; CHECK-FIX-NEXT: ldr r1, [sp]
1134 ; CHECK-FIX-NEXT: cmp r0, #0
1135 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
1136 ; CHECK-FIX-NEXT: beq .LBB31_2
1137 ; CHECK-FIX-NEXT: @ %bb.1:
1138 ; CHECK-FIX-NEXT: vmov.32 d16[0], r2
1139 ; CHECK-FIX-NEXT: vmov.32 d16[1], r3
1140 ; CHECK-FIX-NEXT: .LBB31_2: @ %select.end
1141 ; CHECK-FIX-NEXT: cmp r0, #0
1142 ; CHECK-FIX-NEXT: beq .LBB31_4
1143 ; CHECK-FIX-NEXT: @ %bb.3:
1144 ; CHECK-FIX-NEXT: vmov.32 d0[0], r2
1145 ; CHECK-FIX-NEXT: vmov.32 d0[1], r3
1146 ; CHECK-FIX-NEXT: .LBB31_4: @ %select.end2
1147 ; CHECK-FIX-NEXT: aese.8 q8, q0
1148 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
1149 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
1150 ; CHECK-FIX-NEXT: bx lr
1151 %5 = bitcast <16 x i8>* %3 to <2 x i64>*
1152 %6 = load <2 x i64>, <2 x i64>* %5, align 8
1153 %7 = insertelement <2 x i64> %6, i64 %1, i64 0
1154 %8 = select i1 %0, <2 x i64> %7, <2 x i64> %6
1155 %9 = bitcast <16 x i8> %2 to <2 x i64>
1156 %10 = insertelement <2 x i64> %9, i64 %1, i64 0
1157 %11 = select i1 %0, <2 x i64> %10, <2 x i64> %9
1158 %12 = bitcast <2 x i64> %8 to <16 x i8>
1159 %13 = bitcast <2 x i64> %11 to <16 x i8>
1160 %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
1161 %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
1162 store <16 x i8> %15, <16 x i8>* %3, align 8
1166 define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
1167 ; CHECK-FIX-NOSCHED-LABEL: aese_set64_loop_via_ptr:
1168 ; CHECK-FIX-NOSCHED: @ %bb.0:
1169 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
1170 ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r11, lr}
1171 ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r11, lr}
1172 ; CHECK-FIX-NOSCHED-NEXT: ldrd r4, r5, [r1]
1173 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
1174 ; CHECK-FIX-NOSCHED-NEXT: strd r4, r5, [r2]
1175 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB32_4
1176 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
1177 ; CHECK-FIX-NOSCHED-NEXT: vmov d0, r4, r5
1178 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
1179 ; CHECK-FIX-NOSCHED-NEXT: .LBB32_2: @ =>This Inner Loop Header: Depth=1
1180 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
1181 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
1182 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
1183 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB32_2
1184 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
1185 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
1186 ; CHECK-FIX-NOSCHED-NEXT: .LBB32_4:
1187 ; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r11, pc}
1189 ; CHECK-CORTEX-FIX-LABEL: aese_set64_loop_via_ptr:
1190 ; CHECK-CORTEX-FIX: @ %bb.0:
1191 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
1192 ; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r11, lr}
1193 ; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r11, lr}
1194 ; CHECK-CORTEX-FIX-NEXT: ldrd r4, r5, [r1]
1195 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
1196 ; CHECK-CORTEX-FIX-NEXT: strd r4, r5, [r2]
1197 ; CHECK-CORTEX-FIX-NEXT: popeq {r4, r5, r11, pc}
1198 ; CHECK-CORTEX-FIX-NEXT: .LBB32_1:
1199 ; CHECK-CORTEX-FIX-NEXT: vmov d0, r4, r5
1200 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
1201 ; CHECK-CORTEX-FIX-NEXT: .LBB32_2: @ =>This Inner Loop Header: Depth=1
1202 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
1203 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
1204 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
1205 ; CHECK-CORTEX-FIX-NEXT: bne .LBB32_2
1206 ; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
1207 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
1208 ; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r11, pc}
1209 %5 = load i64, i64* %1, align 8
1210 %6 = bitcast <16 x i8> %2 to <2 x i64>
1211 %7 = insertelement <2 x i64> %6, i64 %5, i64 0
1212 %8 = bitcast <2 x i64> %7 to <16 x i8>
1213 %9 = bitcast <16 x i8>* %3 to i64*
1214 store i64 %5, i64* %9, align 8
1215 %10 = icmp eq i32 %0, 0
1216 br i1 %10, label %14, label %11
1219 %12 = load <16 x i8>, <16 x i8>* %3, align 8
1223 store <16 x i8> %19, <16 x i8>* %3, align 8
1230 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
1231 %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
1232 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
1233 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
1234 %20 = add nuw i32 %17, 1
1235 %21 = icmp eq i32 %20, %0
1236 br i1 %21, label %13, label %15
1239 define arm_aapcs_vfpcc void @aese_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
1240 ; CHECK-FIX-LABEL: aese_set64_loop_via_val:
1241 ; CHECK-FIX: @ %bb.0:
1242 ; CHECK-FIX-NEXT: vorr q0, q0, q0
1243 ; CHECK-FIX-NEXT: cmp r0, #0
1244 ; CHECK-FIX-NEXT: bxeq lr
1245 ; CHECK-FIX-NEXT: .LBB33_1:
1246 ; CHECK-FIX-NEXT: vmov.32 d0[0], r2
1247 ; CHECK-FIX-NEXT: ldr r1, [sp]
1248 ; CHECK-FIX-NEXT: vmov.32 d0[1], r3
1249 ; CHECK-FIX-NEXT: .LBB33_2: @ =>This Inner Loop Header: Depth=1
1250 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
1251 ; CHECK-FIX-NEXT: subs r0, r0, #1
1252 ; CHECK-FIX-NEXT: vmov.32 d16[0], r2
1253 ; CHECK-FIX-NEXT: vmov.32 d16[1], r3
1254 ; CHECK-FIX-NEXT: aese.8 q8, q0
1255 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
1256 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
1257 ; CHECK-FIX-NEXT: bne .LBB33_2
1258 ; CHECK-FIX-NEXT: @ %bb.3:
1259 ; CHECK-FIX-NEXT: bx lr
1260 %5 = icmp eq i32 %0, 0
1261 br i1 %5, label %12, label %6
1264 %7 = bitcast <16 x i8> %2 to <2 x i64>
1265 %8 = insertelement <2 x i64> %7, i64 %1, i64 0
1266 %9 = bitcast <2 x i64> %8 to <16 x i8>
1267 %10 = bitcast <16 x i8>* %3 to <2 x i64>*
1268 %11 = bitcast <16 x i8>* %3 to i64*
1275 %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
1276 %15 = load <2 x i64>, <2 x i64>* %10, align 8
1277 %16 = insertelement <2 x i64> %15, i64 %1, i64 0
1278 %17 = bitcast <2 x i64> %16 to <16 x i8>
1279 store i64 %1, i64* %11, align 8
1280 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
1281 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
1282 store <16 x i8> %19, <16 x i8>* %3, align 8
1283 %20 = add nuw i32 %14, 1
1284 %21 = icmp eq i32 %20, %0
1285 br i1 %21, label %12, label %13
1288 define arm_aapcs_vfpcc void @aese_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
1289 ; CHECK-FIX-NOSCHED-LABEL: aese_setf16_via_ptr:
1290 ; CHECK-FIX-NOSCHED: @ %bb.0:
1291 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
1292 ; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
1293 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
1294 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
1295 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0
1296 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
1297 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
1298 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
1299 ; CHECK-FIX-NOSCHED-NEXT: bx lr
1301 ; CHECK-CORTEX-FIX-LABEL: aese_setf16_via_ptr:
1302 ; CHECK-CORTEX-FIX: @ %bb.0:
1303 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
1304 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
1305 ; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
1306 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
1307 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0
1308 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
1309 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
1310 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
1311 ; CHECK-CORTEX-FIX-NEXT: bx lr
1312 %4 = bitcast half* %0 to i16*
1313 %5 = load i16, i16* %4, align 2
1314 %6 = bitcast <16 x i8>* %2 to <8 x i16>*
1315 %7 = load <8 x i16>, <8 x i16>* %6, align 8
1316 %8 = insertelement <8 x i16> %7, i16 %5, i64 0
1317 %9 = bitcast <8 x i16> %8 to <16 x i8>
1318 %10 = bitcast <16 x i8> %1 to <8 x i16>
1319 %11 = insertelement <8 x i16> %10, i16 %5, i64 0
1320 %12 = bitcast <8 x i16> %11 to <16 x i8>
1321 %13 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %9, <16 x i8> %12)
1322 %14 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %13)
1323 store <16 x i8> %14, <16 x i8>* %2, align 8
1327 define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
1328 ; CHECK-FIX-LABEL: aese_setf16_via_val:
1329 ; CHECK-FIX: @ %bb.0:
1330 ; CHECK-FIX-NEXT: vorr q1, q1, q1
1331 ; CHECK-FIX-NEXT: vmov r1, s0
1332 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
1333 ; CHECK-FIX-NEXT: vmov.16 d2[0], r1
1334 ; CHECK-FIX-NEXT: vmov.16 d16[0], r1
1335 ; CHECK-FIX-NEXT: aese.8 q8, q1
1336 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
1337 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
1338 ; CHECK-FIX-NEXT: bx lr
1339 %4 = bitcast <16 x i8>* %2 to <8 x i16>*
1340 %5 = load <8 x i16>, <8 x i16>* %4, align 8
1341 %6 = bitcast half %0 to i16
1342 %7 = insertelement <8 x i16> %5, i16 %6, i64 0
1343 %8 = bitcast <8 x i16> %7 to <16 x i8>
1344 %9 = bitcast <16 x i8> %1 to <8 x i16>
1345 %10 = insertelement <8 x i16> %9, i16 %6, i64 0
1346 %11 = bitcast <8 x i16> %10 to <16 x i8>
1347 %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
1348 %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
1349 store <16 x i8> %13, <16 x i8>* %2, align 8
1353 define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
1354 ; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_ptr:
1355 ; CHECK-FIX-NOSCHED: @ %bb.0:
1356 ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1357 ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1358 ; CHECK-FIX-NOSCHED-NEXT: .pad #24
1359 ; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24
1360 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
1361 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB36_3
1362 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
1363 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
1364 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
1365 ; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17
1366 ; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d16[0]}, [r1:16]
1367 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0]
1368 ; CHECK-FIX-NOSCHED-NEXT: uxth r4, r3
1369 ; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
1370 ; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #8] @ 4-byte Spill
1371 ; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7
1372 ; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill
1373 ; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6
1374 ; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16
1375 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
1376 ; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill
1377 ; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
1378 ; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16
1379 ; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill
1380 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
1381 ; CHECK-FIX-NOSCHED-NEXT: uxth r10, r5
1382 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
1383 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB36_4
1384 ; CHECK-FIX-NOSCHED-NEXT: .LBB36_2:
1385 ; CHECK-FIX-NOSCHED-NEXT: vmov r4, r6, d1
1386 ; CHECK-FIX-NOSCHED-NEXT: vmov r0, r3, d0
1387 ; CHECK-FIX-NOSCHED-NEXT: lsr r5, r4, #16
1388 ; CHECK-FIX-NOSCHED-NEXT: lsr r1, r6, #16
1389 ; CHECK-FIX-NOSCHED-NEXT: uxth r11, r6
1390 ; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
1391 ; CHECK-FIX-NOSCHED-NEXT: lsr r12, r3, #16
1392 ; CHECK-FIX-NOSCHED-NEXT: uxth r9, r4
1393 ; CHECK-FIX-NOSCHED-NEXT: uxth r6, r3
1394 ; CHECK-FIX-NOSCHED-NEXT: b .LBB36_5
1395 ; CHECK-FIX-NOSCHED-NEXT: .LBB36_3:
1396 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #14]
1397 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill
1398 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #12]
1399 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill
1400 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #8]
1401 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill
1402 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #6]
1403 ; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r2, #10]
1404 ; CHECK-FIX-NOSCHED-NEXT: ldrh r10, [r2]
1405 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
1406 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #4]
1407 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
1408 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #2]
1409 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
1410 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
1411 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB36_2
1412 ; CHECK-FIX-NOSCHED-NEXT: .LBB36_4:
1413 ; CHECK-FIX-NOSCHED-NEXT: vmov r5, r3, d1
1414 ; CHECK-FIX-NOSCHED-NEXT: mov r4, r7
1415 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 r7, d0[1]
1416 ; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d0[0]}, [r1:16]
1417 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 r0, d0[0]
1418 ; CHECK-FIX-NOSCHED-NEXT: uxth r9, r5
1419 ; CHECK-FIX-NOSCHED-NEXT: uxth r11, r3
1420 ; CHECK-FIX-NOSCHED-NEXT: uxth r6, r7
1421 ; CHECK-FIX-NOSCHED-NEXT: lsr r12, r7, #16
1422 ; CHECK-FIX-NOSCHED-NEXT: lsr r1, r3, #16
1423 ; CHECK-FIX-NOSCHED-NEXT: lsr r5, r5, #16
1424 ; CHECK-FIX-NOSCHED-NEXT: mov r7, r4
1425 ; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
1426 ; CHECK-FIX-NOSCHED-NEXT: .LBB36_5:
1427 ; CHECK-FIX-NOSCHED-NEXT: uxth r8, r0
1428 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp] @ 4-byte Reload
1429 ; CHECK-FIX-NOSCHED-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
1430 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r0, lsl #16
1431 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
1432 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r8, lr, lsl #16
1433 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
1434 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r6, r12, lsl #16
1435 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0
1436 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
1437 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r3, lsl #16
1438 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
1439 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r9, r5, lsl #16
1440 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
1441 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
1442 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r7, lsl #16
1443 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
1444 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r1, lsl #16
1445 ; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
1446 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
1447 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
1448 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r1, lsl #16
1449 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
1450 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q9
1451 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
1452 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
1453 ; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #24
1454 ; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
1456 ; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_ptr:
1457 ; CHECK-CORTEX-FIX: @ %bb.0:
1458 ; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1459 ; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1460 ; CHECK-CORTEX-FIX-NEXT: .pad #24
1461 ; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #24
1462 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
1463 ; CHECK-CORTEX-FIX-NEXT: beq .LBB36_3
1464 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
1465 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
1466 ; CHECK-CORTEX-FIX-NEXT: vorr q9, q8, q8
1467 ; CHECK-CORTEX-FIX-NEXT: vld1.16 {d18[0]}, [r1:16]
1468 ; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d18[0]
1469 ; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
1470 ; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
1471 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
1472 ; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1]
1473 ; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #20] @ 4-byte Spill
1474 ; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
1475 ; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
1476 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
1477 ; CHECK-CORTEX-FIX-NEXT: vmov r3, r6, d17
1478 ; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #8] @ 4-byte Spill
1479 ; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
1480 ; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
1481 ; CHECK-CORTEX-FIX-NEXT: uxth r11, r6
1482 ; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
1483 ; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill
1484 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
1485 ; CHECK-CORTEX-FIX-NEXT: bne .LBB36_4
1486 ; CHECK-CORTEX-FIX-NEXT: .LBB36_2:
1487 ; CHECK-CORTEX-FIX-NEXT: vmov r1, r7, d0
1488 ; CHECK-CORTEX-FIX-NEXT: uxth r0, r1
1489 ; CHECK-CORTEX-FIX-NEXT: uxth r6, r7
1490 ; CHECK-CORTEX-FIX-NEXT: lsr r12, r7, #16
1491 ; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16
1492 ; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
1493 ; CHECK-CORTEX-FIX-NEXT: mov r0, r3
1494 ; CHECK-CORTEX-FIX-NEXT: vmov r7, r3, d1
1495 ; CHECK-CORTEX-FIX-NEXT: uxth r10, r7
1496 ; CHECK-CORTEX-FIX-NEXT: lsr r5, r7, #16
1497 ; CHECK-CORTEX-FIX-NEXT: uxth lr, r3
1498 ; CHECK-CORTEX-FIX-NEXT: lsr r8, r3, #16
1499 ; CHECK-CORTEX-FIX-NEXT: mov r3, r0
1500 ; CHECK-CORTEX-FIX-NEXT: b .LBB36_5
1501 ; CHECK-CORTEX-FIX-NEXT: .LBB36_3:
1502 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2]
1503 ; CHECK-CORTEX-FIX-NEXT: ldrh r11, [r2, #12]
1504 ; CHECK-CORTEX-FIX-NEXT: ldrh r4, [r2, #14]
1505 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
1506 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #2]
1507 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
1508 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #4]
1509 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
1510 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #6]
1511 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
1512 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #8]
1513 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
1514 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #10]
1515 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
1516 ; CHECK-CORTEX-FIX-NEXT: beq .LBB36_2
1517 ; CHECK-CORTEX-FIX-NEXT: .LBB36_4:
1518 ; CHECK-CORTEX-FIX-NEXT: vorr q8, q0, q0
1519 ; CHECK-CORTEX-FIX-NEXT: vmov.32 r5, d0[1]
1520 ; CHECK-CORTEX-FIX-NEXT: vld1.16 {d16[0]}, [r1:16]
1521 ; CHECK-CORTEX-FIX-NEXT: uxth r6, r5
1522 ; CHECK-CORTEX-FIX-NEXT: lsr r12, r5, #16
1523 ; CHECK-CORTEX-FIX-NEXT: vmov r5, r7, d1
1524 ; CHECK-CORTEX-FIX-NEXT: vmov.32 r1, d16[0]
1525 ; CHECK-CORTEX-FIX-NEXT: uxth r10, r5
1526 ; CHECK-CORTEX-FIX-NEXT: lsr r5, r5, #16
1527 ; CHECK-CORTEX-FIX-NEXT: uxth lr, r7
1528 ; CHECK-CORTEX-FIX-NEXT: lsr r8, r7, #16
1529 ; CHECK-CORTEX-FIX-NEXT: uxth r0, r1
1530 ; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16
1531 ; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
1532 ; CHECK-CORTEX-FIX-NEXT: .LBB36_5:
1533 ; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
1534 ; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
1535 ; CHECK-CORTEX-FIX-NEXT: pkhbt r11, r11, r4, lsl #16
1536 ; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
1537 ; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16
1538 ; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r10, r5, lsl #16
1539 ; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r1, lsl #16
1540 ; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
1541 ; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16
1542 ; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r1, r3, lsl #16
1543 ; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
1544 ; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r3, r4, lsl #16
1545 ; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp] @ 4-byte Reload
1546 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r4
1547 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r1
1548 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7
1549 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r11
1550 ; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r9, lsl #16
1551 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r3
1552 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r5
1553 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r6
1554 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r0
1555 ; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8
1556 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9
1557 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
1558 ; CHECK-CORTEX-FIX-NEXT: add sp, sp, #24
1559 ; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
1560 br i1 %0, label %5, label %12
1563 %6 = bitcast half* %1 to i16*
1564 %7 = load i16, i16* %6, align 2
1565 %8 = bitcast <16 x i8>* %3 to <8 x i16>*
1566 %9 = load <8 x i16>, <8 x i16>* %8, align 8
1567 %10 = insertelement <8 x i16> %9, i16 %7, i64 0
1568 %11 = bitcast <8 x i16> %10 to <8 x half>
1572 %13 = bitcast <16 x i8>* %3 to <8 x half>*
1573 %14 = load <8 x half>, <8 x half>* %13, align 8
1577 %16 = phi <8 x half> [ %11, %5 ], [ %14, %12 ]
1578 br i1 %0, label %17, label %23
1581 %18 = bitcast half* %1 to i16*
1582 %19 = load i16, i16* %18, align 2
1583 %20 = bitcast <16 x i8> %2 to <8 x i16>
1584 %21 = insertelement <8 x i16> %20, i16 %19, i64 0
1585 %22 = bitcast <8 x i16> %21 to <8 x half>
1589 %24 = bitcast <16 x i8> %2 to <8 x half>
1593 %26 = phi <8 x half> [ %22, %17 ], [ %24, %23 ]
1594 %27 = bitcast <8 x half> %16 to <16 x i8>
1595 %28 = bitcast <8 x half> %26 to <16 x i8>
1596 %29 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %27, <16 x i8> %28)
1597 %30 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %29)
1598 store <16 x i8> %30, <16 x i8>* %3, align 8
1602 define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
1603 ; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_val:
1604 ; CHECK-FIX-NOSCHED: @ %bb.0:
1605 ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1606 ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1607 ; CHECK-FIX-NOSCHED-NEXT: .pad #24
1608 ; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24
1609 ; CHECK-FIX-NOSCHED-NEXT: vmov r12, s0
1610 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
1611 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB37_2
1612 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
1613 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
1614 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
1615 ; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17
1616 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r12
1617 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0]
1618 ; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7
1619 ; CHECK-FIX-NOSCHED-NEXT: uxth r2, r3
1620 ; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
1621 ; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill
1622 ; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6
1623 ; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16
1624 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
1625 ; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
1626 ; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16
1627 ; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill
1628 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
1629 ; CHECK-FIX-NOSCHED-NEXT: uxth r3, r5
1630 ; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill
1631 ; CHECK-FIX-NOSCHED-NEXT: b .LBB37_3
1632 ; CHECK-FIX-NOSCHED-NEXT: .LBB37_2:
1633 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #14]
1634 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill
1635 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #12]
1636 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill
1637 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #8]
1638 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill
1639 ; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #6]
1640 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #2]
1641 ; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill
1642 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
1643 ; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1, #10]
1644 ; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #4]
1645 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1]
1646 ; CHECK-FIX-NOSCHED-NEXT: .LBB37_3:
1647 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
1648 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
1649 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB37_5
1650 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.4:
1651 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 r6, d2[1]
1652 ; CHECK-FIX-NOSCHED-NEXT: mov r3, r2
1653 ; CHECK-FIX-NOSCHED-NEXT: mov r2, r7
1654 ; CHECK-FIX-NOSCHED-NEXT: vmov r4, r7, d3
1655 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d2[0], r12
1656 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 r0, d2[0]
1657 ; CHECK-FIX-NOSCHED-NEXT: uxth r5, r6
1658 ; CHECK-FIX-NOSCHED-NEXT: lsr r12, r6, #16
1659 ; CHECK-FIX-NOSCHED-NEXT: uxth r10, r4
1660 ; CHECK-FIX-NOSCHED-NEXT: uxth r11, r7
1661 ; CHECK-FIX-NOSCHED-NEXT: lsr r9, r7, #16
1662 ; CHECK-FIX-NOSCHED-NEXT: mov r7, r2
1663 ; CHECK-FIX-NOSCHED-NEXT: mov r2, r3
1664 ; CHECK-FIX-NOSCHED-NEXT: lsr r4, r4, #16
1665 ; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
1666 ; CHECK-FIX-NOSCHED-NEXT: b .LBB37_6
1667 ; CHECK-FIX-NOSCHED-NEXT: .LBB37_5:
1668 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, r6, d3
1669 ; CHECK-FIX-NOSCHED-NEXT: vmov r0, r5, d2
1670 ; CHECK-FIX-NOSCHED-NEXT: lsr r4, r3, #16
1671 ; CHECK-FIX-NOSCHED-NEXT: lsr r9, r6, #16
1672 ; CHECK-FIX-NOSCHED-NEXT: lsr r12, r5, #16
1673 ; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
1674 ; CHECK-FIX-NOSCHED-NEXT: uxth r11, r6
1675 ; CHECK-FIX-NOSCHED-NEXT: uxth r10, r3
1676 ; CHECK-FIX-NOSCHED-NEXT: uxth r5, r5
1677 ; CHECK-FIX-NOSCHED-NEXT: .LBB37_6:
1678 ; CHECK-FIX-NOSCHED-NEXT: uxth r8, r0
1679 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
1680 ; CHECK-FIX-NOSCHED-NEXT: ldr r3, [sp] @ 4-byte Reload
1681 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r3, r0, lsl #16
1682 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
1683 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r8, lr, lsl #16
1684 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
1685 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r5, r12, lsl #16
1686 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0
1687 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
1688 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16
1689 ; CHECK-FIX-NOSCHED-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
1690 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
1691 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r4, lsl #16
1692 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
1693 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
1694 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r7, lsl #16
1695 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
1696 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r9, lsl #16
1697 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
1698 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
1699 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r2, lsl #16
1700 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
1701 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q9
1702 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
1703 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
1704 ; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #24
1705 ; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
1707 ; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_val:
1708 ; CHECK-CORTEX-FIX: @ %bb.0:
1709 ; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1710 ; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1711 ; CHECK-CORTEX-FIX-NEXT: .pad #28
1712 ; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #28
1713 ; CHECK-CORTEX-FIX-NEXT: vmov r2, s0
1714 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
1715 ; CHECK-CORTEX-FIX-NEXT: beq .LBB37_2
1716 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
1717 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
1718 ; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1]
1719 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r2
1720 ; CHECK-CORTEX-FIX-NEXT: vmov.32 r7, d16[0]
1721 ; CHECK-CORTEX-FIX-NEXT: uxth r6, r7
1722 ; CHECK-CORTEX-FIX-NEXT: lsr r7, r7, #16
1723 ; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #20] @ 4-byte Spill
1724 ; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
1725 ; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
1726 ; CHECK-CORTEX-FIX-NEXT: str r6, [sp, #24] @ 4-byte Spill
1727 ; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #8] @ 4-byte Spill
1728 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
1729 ; CHECK-CORTEX-FIX-NEXT: vmov r3, r7, d17
1730 ; CHECK-CORTEX-FIX-NEXT: uxth r6, r3
1731 ; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
1732 ; CHECK-CORTEX-FIX-NEXT: uxth r11, r7
1733 ; CHECK-CORTEX-FIX-NEXT: lsr r7, r7, #16
1734 ; CHECK-CORTEX-FIX-NEXT: str r6, [sp, #16] @ 4-byte Spill
1735 ; CHECK-CORTEX-FIX-NEXT: b .LBB37_3
1736 ; CHECK-CORTEX-FIX-NEXT: .LBB37_2:
1737 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1]
1738 ; CHECK-CORTEX-FIX-NEXT: ldrh r11, [r1, #12]
1739 ; CHECK-CORTEX-FIX-NEXT: ldrh r7, [r1, #14]
1740 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #24] @ 4-byte Spill
1741 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #2]
1742 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
1743 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #4]
1744 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
1745 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #6]
1746 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
1747 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #8]
1748 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
1749 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #10]
1750 ; CHECK-CORTEX-FIX-NEXT: .LBB37_3:
1751 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
1752 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
1753 ; CHECK-CORTEX-FIX-NEXT: beq .LBB37_5
1754 ; CHECK-CORTEX-FIX-NEXT: @ %bb.4:
1755 ; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d2[1]
1756 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d2[0], r2
1757 ; CHECK-CORTEX-FIX-NEXT: vmov r4, r6, d3
1758 ; CHECK-CORTEX-FIX-NEXT: uxth r10, r4
1759 ; CHECK-CORTEX-FIX-NEXT: lsr r4, r4, #16
1760 ; CHECK-CORTEX-FIX-NEXT: uxth lr, r6
1761 ; CHECK-CORTEX-FIX-NEXT: lsr r8, r6, #16
1762 ; CHECK-CORTEX-FIX-NEXT: uxth r5, r3
1763 ; CHECK-CORTEX-FIX-NEXT: lsr r12, r3, #16
1764 ; CHECK-CORTEX-FIX-NEXT: vmov.32 r2, d2[0]
1765 ; CHECK-CORTEX-FIX-NEXT: uxth r0, r2
1766 ; CHECK-CORTEX-FIX-NEXT: lsr r9, r2, #16
1767 ; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
1768 ; CHECK-CORTEX-FIX-NEXT: b .LBB37_6
1769 ; CHECK-CORTEX-FIX-NEXT: .LBB37_5:
1770 ; CHECK-CORTEX-FIX-NEXT: vmov r2, r3, d2
1771 ; CHECK-CORTEX-FIX-NEXT: uxth r0, r2
1772 ; CHECK-CORTEX-FIX-NEXT: lsr r9, r2, #16
1773 ; CHECK-CORTEX-FIX-NEXT: uxth r5, r3
1774 ; CHECK-CORTEX-FIX-NEXT: lsr r12, r3, #16
1775 ; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
1776 ; CHECK-CORTEX-FIX-NEXT: mov r0, r7
1777 ; CHECK-CORTEX-FIX-NEXT: vmov r6, r7, d3
1778 ; CHECK-CORTEX-FIX-NEXT: uxth r10, r6
1779 ; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
1780 ; CHECK-CORTEX-FIX-NEXT: uxth lr, r7
1781 ; CHECK-CORTEX-FIX-NEXT: lsr r8, r7, #16
1782 ; CHECK-CORTEX-FIX-NEXT: mov r7, r0
1783 ; CHECK-CORTEX-FIX-NEXT: .LBB37_6:
1784 ; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
1785 ; CHECK-CORTEX-FIX-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
1786 ; CHECK-CORTEX-FIX-NEXT: pkhbt r11, r11, r7, lsl #16
1787 ; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
1788 ; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp, #20] @ 4-byte Reload
1789 ; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r5, r12, lsl #16
1790 ; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r10, r4, lsl #16
1791 ; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r2, lsl #16
1792 ; CHECK-CORTEX-FIX-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
1793 ; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16
1794 ; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r2, r3, lsl #16
1795 ; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
1796 ; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r6, lsl #16
1797 ; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp] @ 4-byte Reload
1798 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r3
1799 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r2
1800 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7
1801 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r11
1802 ; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r9, lsl #16
1803 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r6
1804 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r4
1805 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r5
1806 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r0
1807 ; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8
1808 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9
1809 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
1810 ; CHECK-CORTEX-FIX-NEXT: add sp, sp, #28
1811 ; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
1812 br i1 %0, label %5, label %11
1815 %6 = bitcast <16 x i8>* %3 to <8 x i16>*
1816 %7 = load <8 x i16>, <8 x i16>* %6, align 8
1817 %8 = bitcast half %1 to i16
1818 %9 = insertelement <8 x i16> %7, i16 %8, i64 0
1819 %10 = bitcast <8 x i16> %9 to <8 x half>
1823 %12 = bitcast <16 x i8>* %3 to <8 x half>*
1824 %13 = load <8 x half>, <8 x half>* %12, align 8
1828 %15 = phi <8 x half> [ %10, %5 ], [ %13, %11 ]
1829 br i1 %0, label %16, label %21
1832 %17 = bitcast <16 x i8> %2 to <8 x i16>
1833 %18 = bitcast half %1 to i16
1834 %19 = insertelement <8 x i16> %17, i16 %18, i64 0
1835 %20 = bitcast <8 x i16> %19 to <8 x half>
1839 %22 = bitcast <16 x i8> %2 to <8 x half>
1843 %24 = phi <8 x half> [ %20, %16 ], [ %22, %21 ]
1844 %25 = bitcast <8 x half> %15 to <16 x i8>
1845 %26 = bitcast <8 x half> %24 to <16 x i8>
1846 %27 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %25, <16 x i8> %26)
1847 %28 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %27)
1848 store <16 x i8> %28, <16 x i8>* %3, align 8
1852 define arm_aapcs_vfpcc void @aese_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
1853 ; CHECK-FIX-LABEL: aese_setf16_loop_via_ptr:
1854 ; CHECK-FIX: @ %bb.0:
1855 ; CHECK-FIX-NEXT: vorr q0, q0, q0
1856 ; CHECK-FIX-NEXT: ldrh r1, [r1]
1857 ; CHECK-FIX-NEXT: cmp r0, #0
1858 ; CHECK-FIX-NEXT: strh r1, [r2]
1859 ; CHECK-FIX-NEXT: bxeq lr
1860 ; CHECK-FIX-NEXT: .LBB38_1:
1861 ; CHECK-FIX-NEXT: vmov.16 d0[0], r1
1862 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
1863 ; CHECK-FIX-NEXT: .LBB38_2: @ =>This Inner Loop Header: Depth=1
1864 ; CHECK-FIX-NEXT: aese.8 q8, q0
1865 ; CHECK-FIX-NEXT: subs r0, r0, #1
1866 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
1867 ; CHECK-FIX-NEXT: bne .LBB38_2
1868 ; CHECK-FIX-NEXT: @ %bb.3:
1869 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
1870 ; CHECK-FIX-NEXT: bx lr
1871 %5 = bitcast half* %1 to i16*
1872 %6 = load i16, i16* %5, align 2
1873 %7 = bitcast <16 x i8> %2 to <8 x i16>
1874 %8 = insertelement <8 x i16> %7, i16 %6, i64 0
1875 %9 = bitcast <8 x i16> %8 to <16 x i8>
1876 %10 = bitcast <16 x i8>* %3 to i16*
1877 store i16 %6, i16* %10, align 8
1878 %11 = icmp eq i32 %0, 0
1879 br i1 %11, label %15, label %12
1882 %13 = load <16 x i8>, <16 x i8>* %3, align 8
1886 store <16 x i8> %20, <16 x i8>* %3, align 8
1893 %17 = phi <16 x i8> [ %13, %12 ], [ %20, %16 ]
1894 %18 = phi i32 [ 0, %12 ], [ %21, %16 ]
1895 %19 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
1896 %20 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %19)
1897 %21 = add nuw i32 %18, 1
1898 %22 = icmp eq i32 %21, %0
1899 br i1 %22, label %14, label %16
1902 define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
1903 ; CHECK-FIX-LABEL: aese_setf16_loop_via_val:
1904 ; CHECK-FIX: @ %bb.0:
1905 ; CHECK-FIX-NEXT: vorr q1, q1, q1
1906 ; CHECK-FIX-NEXT: cmp r0, #0
1907 ; CHECK-FIX-NEXT: bxeq lr
1908 ; CHECK-FIX-NEXT: .LBB39_1:
1909 ; CHECK-FIX-NEXT: vmov r2, s0
1910 ; CHECK-FIX-NEXT: vmov.16 d2[0], r2
1911 ; CHECK-FIX-NEXT: .LBB39_2: @ =>This Inner Loop Header: Depth=1
1912 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
1913 ; CHECK-FIX-NEXT: subs r0, r0, #1
1914 ; CHECK-FIX-NEXT: vmov.16 d16[0], r2
1915 ; CHECK-FIX-NEXT: aese.8 q8, q1
1916 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
1917 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
1918 ; CHECK-FIX-NEXT: bne .LBB39_2
1919 ; CHECK-FIX-NEXT: @ %bb.3:
1920 ; CHECK-FIX-NEXT: bx lr
1921 %5 = icmp eq i32 %0, 0
1922 br i1 %5, label %13, label %6
1925 %7 = bitcast <16 x i8> %2 to <8 x i16>
1926 %8 = bitcast half %1 to i16
1927 %9 = insertelement <8 x i16> %7, i16 %8, i64 0
1928 %10 = bitcast <8 x i16> %9 to <16 x i8>
1929 %11 = bitcast <16 x i8>* %3 to <8 x i16>*
1930 %12 = bitcast <16 x i8>* %3 to half*
1937 %15 = phi i32 [ 0, %6 ], [ %21, %14 ]
1938 %16 = load <8 x i16>, <8 x i16>* %11, align 8
1939 %17 = insertelement <8 x i16> %16, i16 %8, i64 0
1940 %18 = bitcast <8 x i16> %17 to <16 x i8>
1941 store half %1, half* %12, align 8
1942 %19 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %18, <16 x i8> %10)
1943 %20 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %19)
1944 store <16 x i8> %20, <16 x i8>* %3, align 8
1945 %21 = add nuw i32 %15, 1
1946 %22 = icmp eq i32 %21, %0
1947 br i1 %22, label %13, label %14
1950 define arm_aapcs_vfpcc void @aese_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
1951 ; CHECK-FIX-LABEL: aese_setf32_via_ptr:
1952 ; CHECK-FIX: @ %bb.0:
1953 ; CHECK-FIX-NEXT: vldr s0, [r0]
1954 ; CHECK-FIX-NEXT: vld1.64 {d2, d3}, [r1]
1955 ; CHECK-FIX-NEXT: vmov.f32 s4, s0
1956 ; CHECK-FIX-NEXT: vorr q1, q1, q1
1957 ; CHECK-FIX-NEXT: vorr q0, q0, q0
1958 ; CHECK-FIX-NEXT: aese.8 q1, q0
1959 ; CHECK-FIX-NEXT: aesmc.8 q8, q1
1960 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
1961 ; CHECK-FIX-NEXT: bx lr
1962 %4 = load float, float* %0, align 4
1963 %5 = bitcast <16 x i8>* %2 to <4 x float>*
1964 %6 = load <4 x float>, <4 x float>* %5, align 8
1965 %7 = insertelement <4 x float> %6, float %4, i64 0
1966 %8 = bitcast <4 x float> %7 to <16 x i8>
1967 %9 = bitcast <16 x i8> %1 to <4 x float>
1968 %10 = insertelement <4 x float> %9, float %4, i64 0
1969 %11 = bitcast <4 x float> %10 to <16 x i8>
1970 %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
1971 %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
1972 store <16 x i8> %13, <16 x i8>* %2, align 8
1976 define arm_aapcs_vfpcc void @aese_setf32_via_val(float %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
1977 ; CHECK-FIX-LABEL: aese_setf32_via_val:
1978 ; CHECK-FIX: @ %bb.0:
1979 ; CHECK-FIX-NEXT: vmov.f32 s4, s0
1980 ; CHECK-FIX-NEXT: vld1.64 {d0, d1}, [r0]
1981 ; CHECK-FIX-NEXT: vmov.f32 s0, s4
1982 ; CHECK-FIX-NEXT: vorr q0, q0, q0
1983 ; CHECK-FIX-NEXT: vorr q1, q1, q1
1984 ; CHECK-FIX-NEXT: aese.8 q0, q1
1985 ; CHECK-FIX-NEXT: aesmc.8 q8, q0
1986 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
1987 ; CHECK-FIX-NEXT: bx lr
1988 %4 = bitcast <16 x i8>* %2 to <4 x float>*
1989 %5 = load <4 x float>, <4 x float>* %4, align 8
1990 %6 = insertelement <4 x float> %5, float %0, i64 0
1991 %7 = bitcast <4 x float> %6 to <16 x i8>
1992 %8 = bitcast <16 x i8> %1 to <4 x float>
1993 %9 = insertelement <4 x float> %8, float %0, i64 0
1994 %10 = bitcast <4 x float> %9 to <16 x i8>
1995 %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
1996 %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
1997 store <16 x i8> %12, <16 x i8>* %2, align 8
2001 define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
2002 ; CHECK-FIX-LABEL: aese_setf32_cond_via_ptr:
2003 ; CHECK-FIX: @ %bb.0:
2004 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2005 ; CHECK-FIX-NEXT: cmp r0, #0
2006 ; CHECK-FIX-NEXT: beq .LBB42_2
2007 ; CHECK-FIX-NEXT: @ %bb.1:
2008 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2009 ; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
2010 ; CHECK-FIX-NEXT: cmp r0, #0
2011 ; CHECK-FIX-NEXT: bne .LBB42_3
2012 ; CHECK-FIX-NEXT: b .LBB42_4
2013 ; CHECK-FIX-NEXT: .LBB42_2:
2014 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2015 ; CHECK-FIX-NEXT: cmp r0, #0
2016 ; CHECK-FIX-NEXT: beq .LBB42_4
2017 ; CHECK-FIX-NEXT: .LBB42_3:
2018 ; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
2019 ; CHECK-FIX-NEXT: .LBB42_4:
2020 ; CHECK-FIX-NEXT: aese.8 q8, q0
2021 ; CHECK-FIX-NEXT: aesmc.8 q8, q8
2022 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2023 ; CHECK-FIX-NEXT: bx lr
2024 br i1 %0, label %5, label %10
2027 %6 = load float, float* %1, align 4
2028 %7 = bitcast <16 x i8>* %3 to <4 x float>*
2029 %8 = load <4 x float>, <4 x float>* %7, align 8
2030 %9 = insertelement <4 x float> %8, float %6, i64 0
2034 %11 = bitcast <16 x i8>* %3 to <4 x float>*
2035 %12 = load <4 x float>, <4 x float>* %11, align 8
2039 %14 = phi <4 x float> [ %9, %5 ], [ %12, %10 ]
2040 br i1 %0, label %15, label %19
2043 %16 = load float, float* %1, align 4
2044 %17 = bitcast <16 x i8> %2 to <4 x float>
2045 %18 = insertelement <4 x float> %17, float %16, i64 0
2049 %20 = bitcast <16 x i8> %2 to <4 x float>
2053 %22 = phi <4 x float> [ %18, %15 ], [ %20, %19 ]
2054 %23 = bitcast <4 x float> %14 to <16 x i8>
2055 %24 = bitcast <4 x float> %22 to <16 x i8>
2056 %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
2057 %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
2058 store <16 x i8> %26, <16 x i8>* %3, align 8
2062 define arm_aapcs_vfpcc void @aese_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
2063 ; CHECK-FIX-NOSCHED-LABEL: aese_setf32_cond_via_val:
2064 ; CHECK-FIX-NOSCHED: @ %bb.0:
2065 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
2066 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
2067 ; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s8, s0
2068 ; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
2069 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
2070 ; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s4, s0
2071 ; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
2072 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q2, q1
2073 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q2
2074 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
2075 ; CHECK-FIX-NOSCHED-NEXT: bx lr
2077 ; CHECK-CORTEX-FIX-LABEL: aese_setf32_cond_via_val:
2078 ; CHECK-CORTEX-FIX: @ %bb.0:
2079 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
2080 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
2081 ; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s8, s0
2082 ; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
2083 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
2084 ; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s4, s0
2085 ; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
2086 ; CHECK-CORTEX-FIX-NEXT: aese.8 q2, q1
2087 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q2
2088 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2089 ; CHECK-CORTEX-FIX-NEXT: bx lr
2090 %5 = bitcast <16 x i8>* %3 to <4 x float>*
2091 %6 = load <4 x float>, <4 x float>* %5, align 8
2092 %7 = insertelement <4 x float> %6, float %1, i64 0
2093 %8 = select i1 %0, <4 x float> %7, <4 x float> %6
2094 %9 = bitcast <16 x i8> %2 to <4 x float>
2095 %10 = insertelement <4 x float> %9, float %1, i64 0
2096 %11 = select i1 %0, <4 x float> %10, <4 x float> %9
2097 %12 = bitcast <4 x float> %8 to <16 x i8>
2098 %13 = bitcast <4 x float> %11 to <16 x i8>
2099 %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
2100 %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
2101 store <16 x i8> %15, <16 x i8>* %3, align 8
2105 define arm_aapcs_vfpcc void @aese_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
2106 ; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_ptr:
2107 ; CHECK-FIX-NOSCHED: @ %bb.0:
2108 ; CHECK-FIX-NOSCHED-NEXT: vldr s4, [r1]
2109 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
2110 ; CHECK-FIX-NOSCHED-NEXT: vstr s4, [r2]
2111 ; CHECK-FIX-NOSCHED-NEXT: bxeq lr
2112 ; CHECK-FIX-NOSCHED-NEXT: .LBB44_1:
2113 ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s0, s4
2114 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
2115 ; CHECK-FIX-NOSCHED-NEXT: .LBB44_2: @ =>This Inner Loop Header: Depth=1
2116 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
2117 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0
2118 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
2119 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8
2120 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB44_2
2121 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
2122 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
2123 ; CHECK-FIX-NOSCHED-NEXT: bx lr
2125 ; CHECK-CORTEX-FIX-LABEL: aese_setf32_loop_via_ptr:
2126 ; CHECK-CORTEX-FIX: @ %bb.0:
2127 ; CHECK-CORTEX-FIX-NEXT: vldr s4, [r1]
2128 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
2129 ; CHECK-CORTEX-FIX-NEXT: vstr s4, [r2]
2130 ; CHECK-CORTEX-FIX-NEXT: bxeq lr
2131 ; CHECK-CORTEX-FIX-NEXT: .LBB44_1:
2132 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2133 ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s0, s4
2134 ; CHECK-CORTEX-FIX-NEXT: .LBB44_2: @ =>This Inner Loop Header: Depth=1
2135 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
2136 ; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0
2137 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
2138 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8
2139 ; CHECK-CORTEX-FIX-NEXT: bne .LBB44_2
2140 ; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
2141 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2142 ; CHECK-CORTEX-FIX-NEXT: bx lr
2143 %5 = load float, float* %1, align 4
2144 %6 = bitcast <16 x i8> %2 to <4 x float>
2145 %7 = insertelement <4 x float> %6, float %5, i64 0
2146 %8 = bitcast <4 x float> %7 to <16 x i8>
2147 %9 = bitcast <16 x i8>* %3 to float*
2148 store float %5, float* %9, align 8
2149 %10 = icmp eq i32 %0, 0
2150 br i1 %10, label %14, label %11
2153 %12 = load <16 x i8>, <16 x i8>* %3, align 8
2157 store <16 x i8> %19, <16 x i8>* %3, align 8
2164 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
2165 %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
2166 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
2167 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
2168 %20 = add nuw i32 %17, 1
2169 %21 = icmp eq i32 %20, %0
2170 br i1 %21, label %13, label %15
2173 define arm_aapcs_vfpcc void @aese_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
2174 ; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_val:
2175 ; CHECK-FIX-NOSCHED: @ %bb.0:
2176 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
2177 ; CHECK-FIX-NOSCHED-NEXT: bxeq lr
2178 ; CHECK-FIX-NOSCHED-NEXT: .LBB45_1:
2179 ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s4, s0
2180 ; CHECK-FIX-NOSCHED-NEXT: .LBB45_2: @ =>This Inner Loop Header: Depth=1
2181 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
2182 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
2183 ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s8, s0
2184 ; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
2185 ; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
2186 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q2, q1
2187 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q2
2188 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
2189 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB45_2
2190 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
2191 ; CHECK-FIX-NOSCHED-NEXT: bx lr
2193 ; CHECK-CORTEX-FIX-LABEL: aese_setf32_loop_via_val:
2194 ; CHECK-CORTEX-FIX: @ %bb.0:
2195 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
2196 ; CHECK-CORTEX-FIX-NEXT: bxeq lr
2197 ; CHECK-CORTEX-FIX-NEXT: .LBB45_1:
2198 ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s4, s0
2199 ; CHECK-CORTEX-FIX-NEXT: .LBB45_2: @ =>This Inner Loop Header: Depth=1
2200 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
2201 ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s8, s0
2202 ; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
2203 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
2204 ; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
2205 ; CHECK-CORTEX-FIX-NEXT: aese.8 q2, q1
2206 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q2
2207 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2208 ; CHECK-CORTEX-FIX-NEXT: bne .LBB45_2
2209 ; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
2210 ; CHECK-CORTEX-FIX-NEXT: bx lr
2211 %5 = icmp eq i32 %0, 0
2212 br i1 %5, label %12, label %6
2215 %7 = bitcast <16 x i8> %2 to <4 x float>
2216 %8 = insertelement <4 x float> %7, float %1, i64 0
2217 %9 = bitcast <4 x float> %8 to <16 x i8>
2218 %10 = bitcast <16 x i8>* %3 to <4 x float>*
2219 %11 = bitcast <16 x i8>* %3 to float*
2226 %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
2227 %15 = load <4 x float>, <4 x float>* %10, align 8
2228 %16 = insertelement <4 x float> %15, float %1, i64 0
2229 %17 = bitcast <4 x float> %16 to <16 x i8>
2230 store float %1, float* %11, align 8
2231 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
2232 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
2233 store <16 x i8> %19, <16 x i8>* %3, align 8
2234 %20 = add nuw i32 %14, 1
2235 %21 = icmp eq i32 %20, %0
2236 br i1 %21, label %12, label %13
2239 define arm_aapcs_vfpcc void @aesd_zero(<16 x i8>* %0) nounwind {
2240 ; CHECK-FIX-LABEL: aesd_zero:
2241 ; CHECK-FIX: @ %bb.0:
2242 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
2243 ; CHECK-FIX-NEXT: vmov.i32 q9, #0x0
2244 ; CHECK-FIX-NEXT: aesd.8 q9, q8
2245 ; CHECK-FIX-NEXT: aesimc.8 q8, q9
2246 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
2247 ; CHECK-FIX-NEXT: bx lr
2248 %2 = load <16 x i8>, <16 x i8>* %0, align 8
2249 %3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> zeroinitializer, <16 x i8> %2)
2250 %4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3)
2251 store <16 x i8> %4, <16 x i8>* %0, align 8
2255 define arm_aapcs_vfpcc void @aesd_via_call1(<16 x i8>* %0) nounwind {
2256 ; CHECK-FIX-LABEL: aesd_via_call1:
2257 ; CHECK-FIX: @ %bb.0:
2258 ; CHECK-FIX-NEXT: .save {r4, lr}
2259 ; CHECK-FIX-NEXT: push {r4, lr}
2260 ; CHECK-FIX-NEXT: mov r4, r0
2261 ; CHECK-FIX-NEXT: bl get_input
2262 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2263 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
2264 ; CHECK-FIX-NEXT: aesd.8 q0, q8
2265 ; CHECK-FIX-NEXT: aesimc.8 q8, q0
2266 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
2267 ; CHECK-FIX-NEXT: pop {r4, pc}
2268 %2 = call arm_aapcs_vfpcc <16 x i8> @get_input()
2269 %3 = load <16 x i8>, <16 x i8>* %0, align 8
2270 %4 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %2, <16 x i8> %3)
2271 %5 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %4)
2272 store <16 x i8> %5, <16 x i8>* %0, align 8
2276 define arm_aapcs_vfpcc void @aesd_via_call2(half %0, <16 x i8>* %1) nounwind {
2277 ; CHECK-FIX-LABEL: aesd_via_call2:
2278 ; CHECK-FIX: @ %bb.0:
2279 ; CHECK-FIX-NEXT: .save {r4, lr}
2280 ; CHECK-FIX-NEXT: push {r4, lr}
2281 ; CHECK-FIX-NEXT: mov r4, r0
2282 ; CHECK-FIX-NEXT: bl get_inputf16
2283 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2284 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
2285 ; CHECK-FIX-NEXT: aesd.8 q0, q8
2286 ; CHECK-FIX-NEXT: aesimc.8 q8, q0
2287 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
2288 ; CHECK-FIX-NEXT: pop {r4, pc}
2289 %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0)
2290 %4 = load <16 x i8>, <16 x i8>* %1, align 8
2291 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
2292 %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
2293 store <16 x i8> %6, <16 x i8>* %1, align 8
2297 define arm_aapcs_vfpcc void @aesd_via_call3(float %0, <16 x i8>* %1) nounwind {
2298 ; CHECK-FIX-LABEL: aesd_via_call3:
2299 ; CHECK-FIX: @ %bb.0:
2300 ; CHECK-FIX-NEXT: .save {r4, lr}
2301 ; CHECK-FIX-NEXT: push {r4, lr}
2302 ; CHECK-FIX-NEXT: mov r4, r0
2303 ; CHECK-FIX-NEXT: bl get_inputf32
2304 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2305 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4]
2306 ; CHECK-FIX-NEXT: aesd.8 q0, q8
2307 ; CHECK-FIX-NEXT: aesimc.8 q8, q0
2308 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4]
2309 ; CHECK-FIX-NEXT: pop {r4, pc}
2310 %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0)
2311 %4 = load <16 x i8>, <16 x i8>* %1, align 8
2312 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
2313 %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
2314 store <16 x i8> %6, <16 x i8>* %1, align 8
2318 define arm_aapcs_vfpcc void @aesd_once_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind {
2319 ; CHECK-FIX-LABEL: aesd_once_via_ptr:
2320 ; CHECK-FIX: @ %bb.0:
2321 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
2322 ; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1]
2323 ; CHECK-FIX-NEXT: aesd.8 q9, q8
2324 ; CHECK-FIX-NEXT: aesimc.8 q8, q9
2325 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2326 ; CHECK-FIX-NEXT: bx lr
2327 %3 = load <16 x i8>, <16 x i8>* %1, align 8
2328 %4 = load <16 x i8>, <16 x i8>* %0, align 8
2329 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
2330 %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
2331 store <16 x i8> %6, <16 x i8>* %1, align 8
2335 define arm_aapcs_vfpcc <16 x i8> @aesd_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
2336 ; CHECK-FIX-LABEL: aesd_once_via_val:
2337 ; CHECK-FIX: @ %bb.0:
2338 ; CHECK-FIX-NEXT: vorr q1, q1, q1
2339 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2340 ; CHECK-FIX-NEXT: aesd.8 q1, q0
2341 ; CHECK-FIX-NEXT: aesimc.8 q0, q1
2342 ; CHECK-FIX-NEXT: bx lr
2343 %3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %1, <16 x i8> %0)
2344 %4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3)
2348 define arm_aapcs_vfpcc void @aesd_twice_via_ptr(<16 x i8>* %0, <16 x i8>* %1) nounwind {
2349 ; CHECK-FIX-LABEL: aesd_twice_via_ptr:
2350 ; CHECK-FIX: @ %bb.0:
2351 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
2352 ; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1]
2353 ; CHECK-FIX-NEXT: aesd.8 q9, q8
2354 ; CHECK-FIX-NEXT: aesimc.8 q8, q9
2355 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2356 ; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r0]
2357 ; CHECK-FIX-NEXT: aesd.8 q8, q9
2358 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2359 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2360 ; CHECK-FIX-NEXT: bx lr
2361 %3 = load <16 x i8>, <16 x i8>* %1, align 8
2362 %4 = load <16 x i8>, <16 x i8>* %0, align 8
2363 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
2364 %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
2365 store <16 x i8> %6, <16 x i8>* %1, align 8
2366 %7 = load <16 x i8>, <16 x i8>* %0, align 8
2367 %8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7)
2368 %9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8)
2369 store <16 x i8> %9, <16 x i8>* %1, align 8
2373 define arm_aapcs_vfpcc <16 x i8> @aesd_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
2374 ; CHECK-FIX-LABEL: aesd_twice_via_val:
2375 ; CHECK-FIX: @ %bb.0:
2376 ; CHECK-FIX-NEXT: vorr q1, q1, q1
2377 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2378 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2379 ; CHECK-FIX-NEXT: aesd.8 q1, q0
2380 ; CHECK-FIX-NEXT: aesimc.8 q8, q1
2381 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2382 ; CHECK-FIX-NEXT: aesimc.8 q0, q8
2383 ; CHECK-FIX-NEXT: bx lr
2384 %3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %1, <16 x i8> %0)
2385 %4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3)
2386 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %4, <16 x i8> %0)
2387 %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
2391 define arm_aapcs_vfpcc void @aesd_loop_via_ptr(i32 %0, <16 x i8>* %1, <16 x i8>* %2) nounwind {
2392 ; CHECK-FIX-NOSCHED-LABEL: aesd_loop_via_ptr:
2393 ; CHECK-FIX-NOSCHED: @ %bb.0:
2394 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
2395 ; CHECK-FIX-NOSCHED-NEXT: bxeq lr
2396 ; CHECK-FIX-NOSCHED-NEXT: .LBB54_1: @ =>This Inner Loop Header: Depth=1
2397 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
2398 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
2399 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d18, d19}, [r2]
2400 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q9, q8
2401 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q9
2402 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
2403 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB54_1
2404 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.2:
2405 ; CHECK-FIX-NOSCHED-NEXT: bx lr
2407 ; CHECK-CORTEX-FIX-LABEL: aesd_loop_via_ptr:
2408 ; CHECK-CORTEX-FIX: @ %bb.0:
2409 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
2410 ; CHECK-CORTEX-FIX-NEXT: bxeq lr
2411 ; CHECK-CORTEX-FIX-NEXT: .LBB54_1: @ =>This Inner Loop Header: Depth=1
2412 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
2413 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d18, d19}, [r2]
2414 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
2415 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q9, q8
2416 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q9
2417 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2418 ; CHECK-CORTEX-FIX-NEXT: bne .LBB54_1
2419 ; CHECK-CORTEX-FIX-NEXT: @ %bb.2:
2420 ; CHECK-CORTEX-FIX-NEXT: bx lr
2421 %4 = icmp eq i32 %0, 0
2422 br i1 %4, label %5, label %6
2428 %7 = phi i32 [ %12, %6 ], [ 0, %3 ]
2429 %8 = load <16 x i8>, <16 x i8>* %2, align 8
2430 %9 = load <16 x i8>, <16 x i8>* %1, align 8
2431 %10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %9)
2432 %11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10)
2433 store <16 x i8> %11, <16 x i8>* %2, align 8
2434 %12 = add nuw i32 %7, 1
2435 %13 = icmp eq i32 %12, %0
2436 br i1 %13, label %5, label %6
2439 define arm_aapcs_vfpcc <16 x i8> @aesd_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind {
2440 ; CHECK-FIX-LABEL: aesd_loop_via_val:
2441 ; CHECK-FIX: @ %bb.0:
2442 ; CHECK-FIX-NEXT: vorr q1, q1, q1
2443 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2444 ; CHECK-FIX-NEXT: cmp r0, #0
2445 ; CHECK-FIX-NEXT: beq .LBB55_2
2446 ; CHECK-FIX-NEXT: .LBB55_1: @ =>This Inner Loop Header: Depth=1
2447 ; CHECK-FIX-NEXT: aesd.8 q1, q0
2448 ; CHECK-FIX-NEXT: subs r0, r0, #1
2449 ; CHECK-FIX-NEXT: aesimc.8 q1, q1
2450 ; CHECK-FIX-NEXT: bne .LBB55_1
2451 ; CHECK-FIX-NEXT: .LBB55_2:
2452 ; CHECK-FIX-NEXT: vorr q0, q1, q1
2453 ; CHECK-FIX-NEXT: bx lr
2454 %4 = icmp eq i32 %0, 0
2455 br i1 %4, label %5, label %7
2458 %6 = phi <16 x i8> [ %2, %3 ], [ %11, %7 ]
2462 %8 = phi i32 [ %12, %7 ], [ 0, %3 ]
2463 %9 = phi <16 x i8> [ %11, %7 ], [ %2, %3 ]
2464 %10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %9, <16 x i8> %1)
2465 %11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10)
2466 %12 = add nuw i32 %8, 1
2467 %13 = icmp eq i32 %12, %0
2468 br i1 %13, label %5, label %7
2471 define arm_aapcs_vfpcc void @aesd_set8_via_ptr(i8* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
2472 ; CHECK-FIX-NOSCHED-LABEL: aesd_set8_via_ptr:
2473 ; CHECK-FIX-NOSCHED: @ %bb.0:
2474 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
2475 ; CHECK-FIX-NOSCHED-NEXT: ldrb r0, [r0]
2476 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
2477 ; CHECK-FIX-NOSCHED-NEXT: vmov.8 d0[0], r0
2478 ; CHECK-FIX-NOSCHED-NEXT: vmov.8 d16[0], r0
2479 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
2480 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
2481 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
2482 ; CHECK-FIX-NOSCHED-NEXT: bx lr
2484 ; CHECK-CORTEX-FIX-LABEL: aesd_set8_via_ptr:
2485 ; CHECK-CORTEX-FIX: @ %bb.0:
2486 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
2487 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
2488 ; CHECK-CORTEX-FIX-NEXT: ldrb r0, [r0]
2489 ; CHECK-CORTEX-FIX-NEXT: vmov.8 d0[0], r0
2490 ; CHECK-CORTEX-FIX-NEXT: vmov.8 d16[0], r0
2491 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
2492 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
2493 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2494 ; CHECK-CORTEX-FIX-NEXT: bx lr
2495 %4 = load i8, i8* %0, align 1
2496 %5 = load <16 x i8>, <16 x i8>* %2, align 8
2497 %6 = insertelement <16 x i8> %5, i8 %4, i64 0
2498 %7 = insertelement <16 x i8> %1, i8 %4, i64 0
2499 %8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7)
2500 %9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8)
2501 store <16 x i8> %9, <16 x i8>* %2, align 8
2505 define arm_aapcs_vfpcc void @aesd_set8_via_val(i8 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
2506 ; CHECK-FIX-LABEL: aesd_set8_via_val:
2507 ; CHECK-FIX: @ %bb.0:
2508 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2509 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
2510 ; CHECK-FIX-NEXT: vmov.8 d0[0], r0
2511 ; CHECK-FIX-NEXT: vmov.8 d16[0], r0
2512 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2513 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2514 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2515 ; CHECK-FIX-NEXT: bx lr
2516 %4 = load <16 x i8>, <16 x i8>* %2, align 8
2517 %5 = insertelement <16 x i8> %4, i8 %0, i64 0
2518 %6 = insertelement <16 x i8> %1, i8 %0, i64 0
2519 %7 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %5, <16 x i8> %6)
2520 %8 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %7)
2521 store <16 x i8> %8, <16 x i8>* %2, align 8
2525 define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
2526 ; CHECK-FIX-LABEL: aesd_set8_cond_via_ptr:
2527 ; CHECK-FIX: @ %bb.0:
2528 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2529 ; CHECK-FIX-NEXT: cmp r0, #0
2530 ; CHECK-FIX-NEXT: beq .LBB58_2
2531 ; CHECK-FIX-NEXT: @ %bb.1:
2532 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2533 ; CHECK-FIX-NEXT: vld1.8 {d16[0]}, [r1]
2534 ; CHECK-FIX-NEXT: cmp r0, #0
2535 ; CHECK-FIX-NEXT: bne .LBB58_3
2536 ; CHECK-FIX-NEXT: b .LBB58_4
2537 ; CHECK-FIX-NEXT: .LBB58_2:
2538 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2539 ; CHECK-FIX-NEXT: cmp r0, #0
2540 ; CHECK-FIX-NEXT: beq .LBB58_4
2541 ; CHECK-FIX-NEXT: .LBB58_3:
2542 ; CHECK-FIX-NEXT: vld1.8 {d0[0]}, [r1]
2543 ; CHECK-FIX-NEXT: .LBB58_4:
2544 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2545 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2546 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2547 ; CHECK-FIX-NEXT: bx lr
2548 br i1 %0, label %5, label %9
2551 %6 = load i8, i8* %1, align 1
2552 %7 = load <16 x i8>, <16 x i8>* %3, align 8
2553 %8 = insertelement <16 x i8> %7, i8 %6, i64 0
2557 %10 = load <16 x i8>, <16 x i8>* %3, align 8
2561 %12 = phi <16 x i8> [ %8, %5 ], [ %10, %9 ]
2562 br i1 %0, label %13, label %16
2565 %14 = load i8, i8* %1, align 1
2566 %15 = insertelement <16 x i8> %2, i8 %14, i64 0
2570 %17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ]
2571 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %17)
2572 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
2573 store <16 x i8> %19, <16 x i8>* %3, align 8
2577 define arm_aapcs_vfpcc void @aesd_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
2578 ; CHECK-FIX-LABEL: aesd_set8_cond_via_val:
2579 ; CHECK-FIX: @ %bb.0:
2580 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2581 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2582 ; CHECK-FIX-NEXT: cmp r0, #0
2583 ; CHECK-FIX-NEXT: beq .LBB59_2
2584 ; CHECK-FIX-NEXT: @ %bb.1:
2585 ; CHECK-FIX-NEXT: vmov.8 d16[0], r1
2586 ; CHECK-FIX-NEXT: .LBB59_2: @ %select.end
2587 ; CHECK-FIX-NEXT: cmp r0, #0
2588 ; CHECK-FIX-NEXT: beq .LBB59_4
2589 ; CHECK-FIX-NEXT: @ %bb.3:
2590 ; CHECK-FIX-NEXT: vmov.8 d0[0], r1
2591 ; CHECK-FIX-NEXT: .LBB59_4: @ %select.end2
2592 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2593 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2594 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2595 ; CHECK-FIX-NEXT: bx lr
2596 %5 = load <16 x i8>, <16 x i8>* %3, align 8
2597 %6 = insertelement <16 x i8> %5, i8 %1, i64 0
2598 %7 = select i1 %0, <16 x i8> %6, <16 x i8> %5
2599 %8 = insertelement <16 x i8> %2, i8 %1, i64 0
2600 %9 = select i1 %0, <16 x i8> %8, <16 x i8> %2
2601 %10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %9)
2602 %11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10)
2603 store <16 x i8> %11, <16 x i8>* %3, align 8
2607 define arm_aapcs_vfpcc void @aesd_set8_loop_via_ptr(i32 %0, i8* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
2608 ; CHECK-FIX-LABEL: aesd_set8_loop_via_ptr:
2609 ; CHECK-FIX: @ %bb.0:
2610 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2611 ; CHECK-FIX-NEXT: ldrb r1, [r1]
2612 ; CHECK-FIX-NEXT: cmp r0, #0
2613 ; CHECK-FIX-NEXT: strb r1, [r2]
2614 ; CHECK-FIX-NEXT: bxeq lr
2615 ; CHECK-FIX-NEXT: .LBB60_1:
2616 ; CHECK-FIX-NEXT: vmov.8 d0[0], r1
2617 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2618 ; CHECK-FIX-NEXT: .LBB60_2: @ =>This Inner Loop Header: Depth=1
2619 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2620 ; CHECK-FIX-NEXT: subs r0, r0, #1
2621 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2622 ; CHECK-FIX-NEXT: bne .LBB60_2
2623 ; CHECK-FIX-NEXT: @ %bb.3:
2624 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2625 ; CHECK-FIX-NEXT: bx lr
2626 %5 = load i8, i8* %1, align 1
2627 %6 = insertelement <16 x i8> %2, i8 %5, i64 0
2628 %7 = getelementptr inbounds <16 x i8>, <16 x i8>* %3, i32 0, i32 0
2629 store i8 %5, i8* %7, align 8
2630 %8 = icmp eq i32 %0, 0
2631 br i1 %8, label %12, label %9
2634 %10 = load <16 x i8>, <16 x i8>* %3, align 8
2638 store <16 x i8> %17, <16 x i8>* %3, align 8
2645 %14 = phi <16 x i8> [ %10, %9 ], [ %17, %13 ]
2646 %15 = phi i32 [ 0, %9 ], [ %18, %13 ]
2647 %16 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %14, <16 x i8> %6)
2648 %17 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %16)
2649 %18 = add nuw i32 %15, 1
2650 %19 = icmp eq i32 %18, %0
2651 br i1 %19, label %11, label %13
2654 define arm_aapcs_vfpcc void @aesd_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
2655 ; CHECK-FIX-LABEL: aesd_set8_loop_via_val:
2656 ; CHECK-FIX: @ %bb.0:
2657 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2658 ; CHECK-FIX-NEXT: cmp r0, #0
2659 ; CHECK-FIX-NEXT: bxeq lr
2660 ; CHECK-FIX-NEXT: .LBB61_1:
2661 ; CHECK-FIX-NEXT: vmov.8 d0[0], r1
2662 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2663 ; CHECK-FIX-NEXT: .LBB61_2: @ =>This Inner Loop Header: Depth=1
2664 ; CHECK-FIX-NEXT: vmov.8 d16[0], r1
2665 ; CHECK-FIX-NEXT: subs r0, r0, #1
2666 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2667 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2668 ; CHECK-FIX-NEXT: bne .LBB61_2
2669 ; CHECK-FIX-NEXT: @ %bb.3:
2670 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2671 ; CHECK-FIX-NEXT: bx lr
2672 %5 = icmp eq i32 %0, 0
2673 br i1 %5, label %10, label %6
2676 %7 = insertelement <16 x i8> %2, i8 %1, i64 0
2677 %8 = load <16 x i8>, <16 x i8>* %3, align 8
2681 store <16 x i8> %16, <16 x i8>* %3, align 8
2688 %12 = phi <16 x i8> [ %8, %6 ], [ %16, %11 ]
2689 %13 = phi i32 [ 0, %6 ], [ %17, %11 ]
2690 %14 = insertelement <16 x i8> %12, i8 %1, i64 0
2691 %15 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %14, <16 x i8> %7)
2692 %16 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %15)
2693 %17 = add nuw i32 %13, 1
2694 %18 = icmp eq i32 %17, %0
2695 br i1 %18, label %9, label %11
2698 define arm_aapcs_vfpcc void @aesd_set16_via_ptr(i16* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
2699 ; CHECK-FIX-NOSCHED-LABEL: aesd_set16_via_ptr:
2700 ; CHECK-FIX-NOSCHED: @ %bb.0:
2701 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
2702 ; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
2703 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
2704 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
2705 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0
2706 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
2707 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
2708 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
2709 ; CHECK-FIX-NOSCHED-NEXT: bx lr
2711 ; CHECK-CORTEX-FIX-LABEL: aesd_set16_via_ptr:
2712 ; CHECK-CORTEX-FIX: @ %bb.0:
2713 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
2714 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
2715 ; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
2716 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
2717 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0
2718 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
2719 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
2720 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2721 ; CHECK-CORTEX-FIX-NEXT: bx lr
2722 %4 = load i16, i16* %0, align 2
2723 %5 = bitcast <16 x i8>* %2 to <8 x i16>*
2724 %6 = load <8 x i16>, <8 x i16>* %5, align 8
2725 %7 = insertelement <8 x i16> %6, i16 %4, i64 0
2726 %8 = bitcast <8 x i16> %7 to <16 x i8>
2727 %9 = bitcast <16 x i8> %1 to <8 x i16>
2728 %10 = insertelement <8 x i16> %9, i16 %4, i64 0
2729 %11 = bitcast <8 x i16> %10 to <16 x i8>
2730 %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
2731 %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
2732 store <16 x i8> %13, <16 x i8>* %2, align 8
2736 define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
2737 ; CHECK-FIX-LABEL: aesd_set16_via_val:
2738 ; CHECK-FIX: @ %bb.0:
2739 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2740 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
2741 ; CHECK-FIX-NEXT: vmov.16 d0[0], r0
2742 ; CHECK-FIX-NEXT: vmov.16 d16[0], r0
2743 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2744 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2745 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2746 ; CHECK-FIX-NEXT: bx lr
2747 %4 = bitcast <16 x i8>* %2 to <8 x i16>*
2748 %5 = load <8 x i16>, <8 x i16>* %4, align 8
2749 %6 = insertelement <8 x i16> %5, i16 %0, i64 0
2750 %7 = bitcast <8 x i16> %6 to <16 x i8>
2751 %8 = bitcast <16 x i8> %1 to <8 x i16>
2752 %9 = insertelement <8 x i16> %8, i16 %0, i64 0
2753 %10 = bitcast <8 x i16> %9 to <16 x i8>
2754 %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
2755 %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
2756 store <16 x i8> %12, <16 x i8>* %2, align 8
2760 define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
2761 ; CHECK-FIX-LABEL: aesd_set16_cond_via_ptr:
2762 ; CHECK-FIX: @ %bb.0:
2763 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2764 ; CHECK-FIX-NEXT: cmp r0, #0
2765 ; CHECK-FIX-NEXT: beq .LBB64_2
2766 ; CHECK-FIX-NEXT: @ %bb.1:
2767 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2768 ; CHECK-FIX-NEXT: vld1.16 {d16[0]}, [r1:16]
2769 ; CHECK-FIX-NEXT: cmp r0, #0
2770 ; CHECK-FIX-NEXT: bne .LBB64_3
2771 ; CHECK-FIX-NEXT: b .LBB64_4
2772 ; CHECK-FIX-NEXT: .LBB64_2:
2773 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2774 ; CHECK-FIX-NEXT: cmp r0, #0
2775 ; CHECK-FIX-NEXT: beq .LBB64_4
2776 ; CHECK-FIX-NEXT: .LBB64_3:
2777 ; CHECK-FIX-NEXT: vld1.16 {d0[0]}, [r1:16]
2778 ; CHECK-FIX-NEXT: .LBB64_4:
2779 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2780 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2781 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2782 ; CHECK-FIX-NEXT: bx lr
2783 br i1 %0, label %5, label %10
2786 %6 = load i16, i16* %1, align 2
2787 %7 = bitcast <16 x i8>* %3 to <8 x i16>*
2788 %8 = load <8 x i16>, <8 x i16>* %7, align 8
2789 %9 = insertelement <8 x i16> %8, i16 %6, i64 0
2793 %11 = bitcast <16 x i8>* %3 to <8 x i16>*
2794 %12 = load <8 x i16>, <8 x i16>* %11, align 8
2798 %14 = phi <8 x i16> [ %9, %5 ], [ %12, %10 ]
2799 br i1 %0, label %15, label %19
2802 %16 = load i16, i16* %1, align 2
2803 %17 = bitcast <16 x i8> %2 to <8 x i16>
2804 %18 = insertelement <8 x i16> %17, i16 %16, i64 0
2808 %20 = bitcast <16 x i8> %2 to <8 x i16>
2812 %22 = phi <8 x i16> [ %18, %15 ], [ %20, %19 ]
2813 %23 = bitcast <8 x i16> %14 to <16 x i8>
2814 %24 = bitcast <8 x i16> %22 to <16 x i8>
2815 %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
2816 %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
2817 store <16 x i8> %26, <16 x i8>* %3, align 8
2821 define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
2822 ; CHECK-FIX-LABEL: aesd_set16_cond_via_val:
2823 ; CHECK-FIX: @ %bb.0:
2824 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2825 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2826 ; CHECK-FIX-NEXT: cmp r0, #0
2827 ; CHECK-FIX-NEXT: beq .LBB65_2
2828 ; CHECK-FIX-NEXT: @ %bb.1:
2829 ; CHECK-FIX-NEXT: vmov.16 d16[0], r1
2830 ; CHECK-FIX-NEXT: .LBB65_2: @ %select.end
2831 ; CHECK-FIX-NEXT: cmp r0, #0
2832 ; CHECK-FIX-NEXT: beq .LBB65_4
2833 ; CHECK-FIX-NEXT: @ %bb.3:
2834 ; CHECK-FIX-NEXT: vmov.16 d0[0], r1
2835 ; CHECK-FIX-NEXT: .LBB65_4: @ %select.end2
2836 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2837 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2838 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2839 ; CHECK-FIX-NEXT: bx lr
2840 %5 = bitcast <16 x i8>* %3 to <8 x i16>*
2841 %6 = load <8 x i16>, <8 x i16>* %5, align 8
2842 %7 = insertelement <8 x i16> %6, i16 %1, i64 0
2843 %8 = select i1 %0, <8 x i16> %7, <8 x i16> %6
2844 %9 = bitcast <16 x i8> %2 to <8 x i16>
2845 %10 = insertelement <8 x i16> %9, i16 %1, i64 0
2846 %11 = select i1 %0, <8 x i16> %10, <8 x i16> %9
2847 %12 = bitcast <8 x i16> %8 to <16 x i8>
2848 %13 = bitcast <8 x i16> %11 to <16 x i8>
2849 %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
2850 %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
2851 store <16 x i8> %15, <16 x i8>* %3, align 8
2855 define arm_aapcs_vfpcc void @aesd_set16_loop_via_ptr(i32 %0, i16* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
2856 ; CHECK-FIX-LABEL: aesd_set16_loop_via_ptr:
2857 ; CHECK-FIX: @ %bb.0:
2858 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2859 ; CHECK-FIX-NEXT: ldrh r1, [r1]
2860 ; CHECK-FIX-NEXT: cmp r0, #0
2861 ; CHECK-FIX-NEXT: strh r1, [r2]
2862 ; CHECK-FIX-NEXT: bxeq lr
2863 ; CHECK-FIX-NEXT: .LBB66_1:
2864 ; CHECK-FIX-NEXT: vmov.16 d0[0], r1
2865 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2866 ; CHECK-FIX-NEXT: .LBB66_2: @ =>This Inner Loop Header: Depth=1
2867 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2868 ; CHECK-FIX-NEXT: subs r0, r0, #1
2869 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2870 ; CHECK-FIX-NEXT: bne .LBB66_2
2871 ; CHECK-FIX-NEXT: @ %bb.3:
2872 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2873 ; CHECK-FIX-NEXT: bx lr
2874 %5 = load i16, i16* %1, align 2
2875 %6 = bitcast <16 x i8> %2 to <8 x i16>
2876 %7 = insertelement <8 x i16> %6, i16 %5, i64 0
2877 %8 = bitcast <8 x i16> %7 to <16 x i8>
2878 %9 = bitcast <16 x i8>* %3 to i16*
2879 store i16 %5, i16* %9, align 8
2880 %10 = icmp eq i32 %0, 0
2881 br i1 %10, label %14, label %11
2884 %12 = load <16 x i8>, <16 x i8>* %3, align 8
2888 store <16 x i8> %19, <16 x i8>* %3, align 8
2895 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
2896 %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
2897 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
2898 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
2899 %20 = add nuw i32 %17, 1
2900 %21 = icmp eq i32 %20, %0
2901 br i1 %21, label %13, label %15
2904 define arm_aapcs_vfpcc void @aesd_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
2905 ; CHECK-FIX-LABEL: aesd_set16_loop_via_val:
2906 ; CHECK-FIX: @ %bb.0:
2907 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2908 ; CHECK-FIX-NEXT: cmp r0, #0
2909 ; CHECK-FIX-NEXT: bxeq lr
2910 ; CHECK-FIX-NEXT: .LBB67_1:
2911 ; CHECK-FIX-NEXT: vmov.16 d0[0], r1
2912 ; CHECK-FIX-NEXT: .LBB67_2: @ =>This Inner Loop Header: Depth=1
2913 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
2914 ; CHECK-FIX-NEXT: subs r0, r0, #1
2915 ; CHECK-FIX-NEXT: vmov.16 d16[0], r1
2916 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2917 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2918 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
2919 ; CHECK-FIX-NEXT: bne .LBB67_2
2920 ; CHECK-FIX-NEXT: @ %bb.3:
2921 ; CHECK-FIX-NEXT: bx lr
2922 %5 = icmp eq i32 %0, 0
2923 br i1 %5, label %12, label %6
2926 %7 = bitcast <16 x i8> %2 to <8 x i16>
2927 %8 = insertelement <8 x i16> %7, i16 %1, i64 0
2928 %9 = bitcast <8 x i16> %8 to <16 x i8>
2929 %10 = bitcast <16 x i8>* %3 to <8 x i16>*
2930 %11 = bitcast <16 x i8>* %3 to i16*
2937 %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
2938 %15 = load <8 x i16>, <8 x i16>* %10, align 8
2939 %16 = insertelement <8 x i16> %15, i16 %1, i64 0
2940 %17 = bitcast <8 x i16> %16 to <16 x i8>
2941 store i16 %1, i16* %11, align 8
2942 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
2943 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
2944 store <16 x i8> %19, <16 x i8>* %3, align 8
2945 %20 = add nuw i32 %14, 1
2946 %21 = icmp eq i32 %20, %0
2947 br i1 %21, label %12, label %13
2950 define arm_aapcs_vfpcc void @aesd_set32_via_ptr(i32* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
2951 ; CHECK-FIX-NOSCHED-LABEL: aesd_set32_via_ptr:
2952 ; CHECK-FIX-NOSCHED: @ %bb.0:
2953 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
2954 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [r0]
2955 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
2956 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d0[0], r0
2957 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
2958 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
2959 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
2960 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
2961 ; CHECK-FIX-NOSCHED-NEXT: bx lr
2963 ; CHECK-CORTEX-FIX-LABEL: aesd_set32_via_ptr:
2964 ; CHECK-CORTEX-FIX: @ %bb.0:
2965 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
2966 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
2967 ; CHECK-CORTEX-FIX-NEXT: ldr r0, [r0]
2968 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d0[0], r0
2969 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r0
2970 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
2971 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
2972 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2973 ; CHECK-CORTEX-FIX-NEXT: bx lr
2974 %4 = load i32, i32* %0, align 4
2975 %5 = bitcast <16 x i8>* %2 to <4 x i32>*
2976 %6 = load <4 x i32>, <4 x i32>* %5, align 8
2977 %7 = insertelement <4 x i32> %6, i32 %4, i64 0
2978 %8 = bitcast <4 x i32> %7 to <16 x i8>
2979 %9 = bitcast <16 x i8> %1 to <4 x i32>
2980 %10 = insertelement <4 x i32> %9, i32 %4, i64 0
2981 %11 = bitcast <4 x i32> %10 to <16 x i8>
2982 %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
2983 %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
2984 store <16 x i8> %13, <16 x i8>* %2, align 8
2988 define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
2989 ; CHECK-FIX-LABEL: aesd_set32_via_val:
2990 ; CHECK-FIX: @ %bb.0:
2991 ; CHECK-FIX-NEXT: vorr q0, q0, q0
2992 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
2993 ; CHECK-FIX-NEXT: vmov.32 d0[0], r0
2994 ; CHECK-FIX-NEXT: vmov.32 d16[0], r0
2995 ; CHECK-FIX-NEXT: aesd.8 q8, q0
2996 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
2997 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
2998 ; CHECK-FIX-NEXT: bx lr
2999 %4 = bitcast <16 x i8>* %2 to <4 x i32>*
3000 %5 = load <4 x i32>, <4 x i32>* %4, align 8
3001 %6 = insertelement <4 x i32> %5, i32 %0, i64 0
3002 %7 = bitcast <4 x i32> %6 to <16 x i8>
3003 %8 = bitcast <16 x i8> %1 to <4 x i32>
3004 %9 = insertelement <4 x i32> %8, i32 %0, i64 0
3005 %10 = bitcast <4 x i32> %9 to <16 x i8>
3006 %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
3007 %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
3008 store <16 x i8> %12, <16 x i8>* %2, align 8
3012 define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
3013 ; CHECK-FIX-LABEL: aesd_set32_cond_via_ptr:
3014 ; CHECK-FIX: @ %bb.0:
3015 ; CHECK-FIX-NEXT: vorr q0, q0, q0
3016 ; CHECK-FIX-NEXT: cmp r0, #0
3017 ; CHECK-FIX-NEXT: beq .LBB70_2
3018 ; CHECK-FIX-NEXT: @ %bb.1:
3019 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3020 ; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
3021 ; CHECK-FIX-NEXT: cmp r0, #0
3022 ; CHECK-FIX-NEXT: bne .LBB70_3
3023 ; CHECK-FIX-NEXT: b .LBB70_4
3024 ; CHECK-FIX-NEXT: .LBB70_2:
3025 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3026 ; CHECK-FIX-NEXT: cmp r0, #0
3027 ; CHECK-FIX-NEXT: beq .LBB70_4
3028 ; CHECK-FIX-NEXT: .LBB70_3:
3029 ; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
3030 ; CHECK-FIX-NEXT: .LBB70_4:
3031 ; CHECK-FIX-NEXT: aesd.8 q8, q0
3032 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
3033 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
3034 ; CHECK-FIX-NEXT: bx lr
3035 br i1 %0, label %5, label %10
3038 %6 = load i32, i32* %1, align 4
3039 %7 = bitcast <16 x i8>* %3 to <4 x i32>*
3040 %8 = load <4 x i32>, <4 x i32>* %7, align 8
3041 %9 = insertelement <4 x i32> %8, i32 %6, i64 0
3045 %11 = bitcast <16 x i8>* %3 to <4 x i32>*
3046 %12 = load <4 x i32>, <4 x i32>* %11, align 8
3050 %14 = phi <4 x i32> [ %9, %5 ], [ %12, %10 ]
3051 br i1 %0, label %15, label %19
3054 %16 = load i32, i32* %1, align 4
3055 %17 = bitcast <16 x i8> %2 to <4 x i32>
3056 %18 = insertelement <4 x i32> %17, i32 %16, i64 0
3060 %20 = bitcast <16 x i8> %2 to <4 x i32>
3064 %22 = phi <4 x i32> [ %18, %15 ], [ %20, %19 ]
3065 %23 = bitcast <4 x i32> %14 to <16 x i8>
3066 %24 = bitcast <4 x i32> %22 to <16 x i8>
3067 %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
3068 %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
3069 store <16 x i8> %26, <16 x i8>* %3, align 8
3073 define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
3074 ; CHECK-FIX-LABEL: aesd_set32_cond_via_val:
3075 ; CHECK-FIX: @ %bb.0:
3076 ; CHECK-FIX-NEXT: vorr q0, q0, q0
3077 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3078 ; CHECK-FIX-NEXT: cmp r0, #0
3079 ; CHECK-FIX-NEXT: beq .LBB71_2
3080 ; CHECK-FIX-NEXT: @ %bb.1:
3081 ; CHECK-FIX-NEXT: vmov.32 d16[0], r1
3082 ; CHECK-FIX-NEXT: .LBB71_2: @ %select.end
3083 ; CHECK-FIX-NEXT: cmp r0, #0
3084 ; CHECK-FIX-NEXT: beq .LBB71_4
3085 ; CHECK-FIX-NEXT: @ %bb.3:
3086 ; CHECK-FIX-NEXT: vmov.32 d0[0], r1
3087 ; CHECK-FIX-NEXT: .LBB71_4: @ %select.end2
3088 ; CHECK-FIX-NEXT: aesd.8 q8, q0
3089 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
3090 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
3091 ; CHECK-FIX-NEXT: bx lr
3092 %5 = bitcast <16 x i8>* %3 to <4 x i32>*
3093 %6 = load <4 x i32>, <4 x i32>* %5, align 8
3094 %7 = insertelement <4 x i32> %6, i32 %1, i64 0
3095 %8 = select i1 %0, <4 x i32> %7, <4 x i32> %6
3096 %9 = bitcast <16 x i8> %2 to <4 x i32>
3097 %10 = insertelement <4 x i32> %9, i32 %1, i64 0
3098 %11 = select i1 %0, <4 x i32> %10, <4 x i32> %9
3099 %12 = bitcast <4 x i32> %8 to <16 x i8>
3100 %13 = bitcast <4 x i32> %11 to <16 x i8>
3101 %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
3102 %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
3103 store <16 x i8> %15, <16 x i8>* %3, align 8
3107 define arm_aapcs_vfpcc void @aesd_set32_loop_via_ptr(i32 %0, i32* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
3108 ; CHECK-FIX-LABEL: aesd_set32_loop_via_ptr:
3109 ; CHECK-FIX: @ %bb.0:
3110 ; CHECK-FIX-NEXT: vorr q0, q0, q0
3111 ; CHECK-FIX-NEXT: ldr r1, [r1]
3112 ; CHECK-FIX-NEXT: cmp r0, #0
3113 ; CHECK-FIX-NEXT: str r1, [r2]
3114 ; CHECK-FIX-NEXT: bxeq lr
3115 ; CHECK-FIX-NEXT: .LBB72_1:
3116 ; CHECK-FIX-NEXT: vmov.32 d0[0], r1
3117 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3118 ; CHECK-FIX-NEXT: .LBB72_2: @ =>This Inner Loop Header: Depth=1
3119 ; CHECK-FIX-NEXT: aesd.8 q8, q0
3120 ; CHECK-FIX-NEXT: subs r0, r0, #1
3121 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
3122 ; CHECK-FIX-NEXT: bne .LBB72_2
3123 ; CHECK-FIX-NEXT: @ %bb.3:
3124 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
3125 ; CHECK-FIX-NEXT: bx lr
3126 %5 = load i32, i32* %1, align 4
3127 %6 = bitcast <16 x i8> %2 to <4 x i32>
3128 %7 = insertelement <4 x i32> %6, i32 %5, i64 0
3129 %8 = bitcast <4 x i32> %7 to <16 x i8>
3130 %9 = bitcast <16 x i8>* %3 to i32*
3131 store i32 %5, i32* %9, align 8
3132 %10 = icmp eq i32 %0, 0
3133 br i1 %10, label %14, label %11
3136 %12 = load <16 x i8>, <16 x i8>* %3, align 8
3140 store <16 x i8> %19, <16 x i8>* %3, align 8
3147 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
3148 %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
3149 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
3150 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
3151 %20 = add nuw i32 %17, 1
3152 %21 = icmp eq i32 %20, %0
3153 br i1 %21, label %13, label %15
3156 define arm_aapcs_vfpcc void @aesd_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
3157 ; CHECK-FIX-LABEL: aesd_set32_loop_via_val:
3158 ; CHECK-FIX: @ %bb.0:
3159 ; CHECK-FIX-NEXT: vorr q0, q0, q0
3160 ; CHECK-FIX-NEXT: cmp r0, #0
3161 ; CHECK-FIX-NEXT: bxeq lr
3162 ; CHECK-FIX-NEXT: .LBB73_1:
3163 ; CHECK-FIX-NEXT: vmov.32 d0[0], r1
3164 ; CHECK-FIX-NEXT: .LBB73_2: @ =>This Inner Loop Header: Depth=1
3165 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3166 ; CHECK-FIX-NEXT: subs r0, r0, #1
3167 ; CHECK-FIX-NEXT: vmov.32 d16[0], r1
3168 ; CHECK-FIX-NEXT: aesd.8 q8, q0
3169 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
3170 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
3171 ; CHECK-FIX-NEXT: bne .LBB73_2
3172 ; CHECK-FIX-NEXT: @ %bb.3:
3173 ; CHECK-FIX-NEXT: bx lr
3174 %5 = icmp eq i32 %0, 0
3175 br i1 %5, label %12, label %6
3178 %7 = bitcast <16 x i8> %2 to <4 x i32>
3179 %8 = insertelement <4 x i32> %7, i32 %1, i64 0
3180 %9 = bitcast <4 x i32> %8 to <16 x i8>
3181 %10 = bitcast <16 x i8>* %3 to <4 x i32>*
3182 %11 = bitcast <16 x i8>* %3 to i32*
3189 %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
3190 %15 = load <4 x i32>, <4 x i32>* %10, align 8
3191 %16 = insertelement <4 x i32> %15, i32 %1, i64 0
3192 %17 = bitcast <4 x i32> %16 to <16 x i8>
3193 store i32 %1, i32* %11, align 8
3194 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
3195 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
3196 store <16 x i8> %19, <16 x i8>* %3, align 8
3197 %20 = add nuw i32 %14, 1
3198 %21 = icmp eq i32 %20, %0
3199 br i1 %21, label %12, label %13
3202 define arm_aapcs_vfpcc void @aesd_set64_via_ptr(i64* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
3203 ; CHECK-FIX-NOSCHED-LABEL: aesd_set64_via_ptr:
3204 ; CHECK-FIX-NOSCHED: @ %bb.0:
3205 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
3206 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
3207 ; CHECK-FIX-NOSCHED-NEXT: vldr d0, [r0]
3208 ; CHECK-FIX-NOSCHED-NEXT: vorr d16, d0, d0
3209 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
3210 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
3211 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
3212 ; CHECK-FIX-NOSCHED-NEXT: bx lr
3214 ; CHECK-CORTEX-FIX-LABEL: aesd_set64_via_ptr:
3215 ; CHECK-CORTEX-FIX: @ %bb.0:
3216 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
3217 ; CHECK-CORTEX-FIX-NEXT: vldr d0, [r0]
3218 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
3219 ; CHECK-CORTEX-FIX-NEXT: vorr d16, d0, d0
3220 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
3221 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
3222 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
3223 ; CHECK-CORTEX-FIX-NEXT: bx lr
3224 %4 = load i64, i64* %0, align 8
3225 %5 = bitcast <16 x i8>* %2 to <2 x i64>*
3226 %6 = load <2 x i64>, <2 x i64>* %5, align 8
3227 %7 = insertelement <2 x i64> %6, i64 %4, i64 0
3228 %8 = bitcast <2 x i64> %7 to <16 x i8>
3229 %9 = bitcast <16 x i8> %1 to <2 x i64>
3230 %10 = insertelement <2 x i64> %9, i64 %4, i64 0
3231 %11 = bitcast <2 x i64> %10 to <16 x i8>
3232 %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
3233 %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
3234 store <16 x i8> %13, <16 x i8>* %2, align 8
3238 define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
3239 ; CHECK-FIX-LABEL: aesd_set64_via_val:
3240 ; CHECK-FIX: @ %bb.0:
3241 ; CHECK-FIX-NEXT: vorr q0, q0, q0
3242 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3243 ; CHECK-FIX-NEXT: vmov.32 d0[0], r0
3244 ; CHECK-FIX-NEXT: vmov.32 d16[0], r0
3245 ; CHECK-FIX-NEXT: vmov.32 d0[1], r1
3246 ; CHECK-FIX-NEXT: vmov.32 d16[1], r1
3247 ; CHECK-FIX-NEXT: aesd.8 q8, q0
3248 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
3249 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
3250 ; CHECK-FIX-NEXT: bx lr
3251 %4 = bitcast <16 x i8>* %2 to <2 x i64>*
3252 %5 = load <2 x i64>, <2 x i64>* %4, align 8
3253 %6 = insertelement <2 x i64> %5, i64 %0, i64 0
3254 %7 = bitcast <2 x i64> %6 to <16 x i8>
3255 %8 = bitcast <16 x i8> %1 to <2 x i64>
3256 %9 = insertelement <2 x i64> %8, i64 %0, i64 0
3257 %10 = bitcast <2 x i64> %9 to <16 x i8>
3258 %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
3259 %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
3260 store <16 x i8> %12, <16 x i8>* %2, align 8
3264 define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
3265 ; CHECK-FIX-NOSCHED-LABEL: aesd_set64_cond_via_ptr:
3266 ; CHECK-FIX-NOSCHED: @ %bb.0:
3267 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3268 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB76_2
3269 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
3270 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
3271 ; CHECK-FIX-NOSCHED-NEXT: vldr d16, [r1]
3272 ; CHECK-FIX-NOSCHED-NEXT: b .LBB76_3
3273 ; CHECK-FIX-NOSCHED-NEXT: .LBB76_2:
3274 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
3275 ; CHECK-FIX-NOSCHED-NEXT: .LBB76_3:
3276 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3277 ; CHECK-FIX-NOSCHED-NEXT: vldrne d0, [r1]
3278 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
3279 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
3280 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
3281 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
3282 ; CHECK-FIX-NOSCHED-NEXT: bx lr
3284 ; CHECK-CORTEX-FIX-LABEL: aesd_set64_cond_via_ptr:
3285 ; CHECK-CORTEX-FIX: @ %bb.0:
3286 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
3287 ; CHECK-CORTEX-FIX-NEXT: beq .LBB76_2
3288 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
3289 ; CHECK-CORTEX-FIX-NEXT: vldr d18, [r1]
3290 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3291 ; CHECK-CORTEX-FIX-NEXT: vorr d16, d18, d18
3292 ; CHECK-CORTEX-FIX-NEXT: b .LBB76_3
3293 ; CHECK-CORTEX-FIX-NEXT: .LBB76_2:
3294 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3295 ; CHECK-CORTEX-FIX-NEXT: .LBB76_3:
3296 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
3297 ; CHECK-CORTEX-FIX-NEXT: vldrne d0, [r1]
3298 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
3299 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
3300 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
3301 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
3302 ; CHECK-CORTEX-FIX-NEXT: bx lr
3303 br i1 %0, label %5, label %10
3306 %6 = load i64, i64* %1, align 8
3307 %7 = bitcast <16 x i8>* %3 to <2 x i64>*
3308 %8 = load <2 x i64>, <2 x i64>* %7, align 8
3309 %9 = insertelement <2 x i64> %8, i64 %6, i64 0
3313 %11 = bitcast <16 x i8>* %3 to <2 x i64>*
3314 %12 = load <2 x i64>, <2 x i64>* %11, align 8
3318 %14 = phi <2 x i64> [ %9, %5 ], [ %12, %10 ]
3319 br i1 %0, label %15, label %19
3322 %16 = load i64, i64* %1, align 8
3323 %17 = bitcast <16 x i8> %2 to <2 x i64>
3324 %18 = insertelement <2 x i64> %17, i64 %16, i64 0
3328 %20 = bitcast <16 x i8> %2 to <2 x i64>
3332 %22 = phi <2 x i64> [ %18, %15 ], [ %20, %19 ]
3333 %23 = bitcast <2 x i64> %14 to <16 x i8>
3334 %24 = bitcast <2 x i64> %22 to <16 x i8>
3335 %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
3336 %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
3337 store <16 x i8> %26, <16 x i8>* %3, align 8
3341 define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
3342 ; CHECK-FIX-LABEL: aesd_set64_cond_via_val:
3343 ; CHECK-FIX: @ %bb.0:
3344 ; CHECK-FIX-NEXT: vorr q0, q0, q0
3345 ; CHECK-FIX-NEXT: ldr r1, [sp]
3346 ; CHECK-FIX-NEXT: cmp r0, #0
3347 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
3348 ; CHECK-FIX-NEXT: beq .LBB77_2
3349 ; CHECK-FIX-NEXT: @ %bb.1:
3350 ; CHECK-FIX-NEXT: vmov.32 d16[0], r2
3351 ; CHECK-FIX-NEXT: vmov.32 d16[1], r3
3352 ; CHECK-FIX-NEXT: .LBB77_2: @ %select.end
3353 ; CHECK-FIX-NEXT: cmp r0, #0
3354 ; CHECK-FIX-NEXT: beq .LBB77_4
3355 ; CHECK-FIX-NEXT: @ %bb.3:
3356 ; CHECK-FIX-NEXT: vmov.32 d0[0], r2
3357 ; CHECK-FIX-NEXT: vmov.32 d0[1], r3
3358 ; CHECK-FIX-NEXT: .LBB77_4: @ %select.end2
3359 ; CHECK-FIX-NEXT: aesd.8 q8, q0
3360 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
3361 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
3362 ; CHECK-FIX-NEXT: bx lr
3363 %5 = bitcast <16 x i8>* %3 to <2 x i64>*
3364 %6 = load <2 x i64>, <2 x i64>* %5, align 8
3365 %7 = insertelement <2 x i64> %6, i64 %1, i64 0
3366 %8 = select i1 %0, <2 x i64> %7, <2 x i64> %6
3367 %9 = bitcast <16 x i8> %2 to <2 x i64>
3368 %10 = insertelement <2 x i64> %9, i64 %1, i64 0
3369 %11 = select i1 %0, <2 x i64> %10, <2 x i64> %9
3370 %12 = bitcast <2 x i64> %8 to <16 x i8>
3371 %13 = bitcast <2 x i64> %11 to <16 x i8>
3372 %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
3373 %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
3374 store <16 x i8> %15, <16 x i8>* %3, align 8
3378 define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, i64* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
3379 ; CHECK-FIX-NOSCHED-LABEL: aesd_set64_loop_via_ptr:
3380 ; CHECK-FIX-NOSCHED: @ %bb.0:
3381 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
3382 ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r11, lr}
3383 ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r11, lr}
3384 ; CHECK-FIX-NOSCHED-NEXT: ldrd r4, r5, [r1]
3385 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3386 ; CHECK-FIX-NOSCHED-NEXT: strd r4, r5, [r2]
3387 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB78_4
3388 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
3389 ; CHECK-FIX-NOSCHED-NEXT: vmov d0, r4, r5
3390 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
3391 ; CHECK-FIX-NOSCHED-NEXT: .LBB78_2: @ =>This Inner Loop Header: Depth=1
3392 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
3393 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
3394 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
3395 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB78_2
3396 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
3397 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
3398 ; CHECK-FIX-NOSCHED-NEXT: .LBB78_4:
3399 ; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r11, pc}
3401 ; CHECK-CORTEX-FIX-LABEL: aesd_set64_loop_via_ptr:
3402 ; CHECK-CORTEX-FIX: @ %bb.0:
3403 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
3404 ; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r11, lr}
3405 ; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r11, lr}
3406 ; CHECK-CORTEX-FIX-NEXT: ldrd r4, r5, [r1]
3407 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
3408 ; CHECK-CORTEX-FIX-NEXT: strd r4, r5, [r2]
3409 ; CHECK-CORTEX-FIX-NEXT: popeq {r4, r5, r11, pc}
3410 ; CHECK-CORTEX-FIX-NEXT: .LBB78_1:
3411 ; CHECK-CORTEX-FIX-NEXT: vmov d0, r4, r5
3412 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3413 ; CHECK-CORTEX-FIX-NEXT: .LBB78_2: @ =>This Inner Loop Header: Depth=1
3414 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
3415 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
3416 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
3417 ; CHECK-CORTEX-FIX-NEXT: bne .LBB78_2
3418 ; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
3419 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
3420 ; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r11, pc}
3421 %5 = load i64, i64* %1, align 8
3422 %6 = bitcast <16 x i8> %2 to <2 x i64>
3423 %7 = insertelement <2 x i64> %6, i64 %5, i64 0
3424 %8 = bitcast <2 x i64> %7 to <16 x i8>
3425 %9 = bitcast <16 x i8>* %3 to i64*
3426 store i64 %5, i64* %9, align 8
3427 %10 = icmp eq i32 %0, 0
3428 br i1 %10, label %14, label %11
3431 %12 = load <16 x i8>, <16 x i8>* %3, align 8
3435 store <16 x i8> %19, <16 x i8>* %3, align 8
3442 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
3443 %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
3444 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
3445 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
3446 %20 = add nuw i32 %17, 1
3447 %21 = icmp eq i32 %20, %0
3448 br i1 %21, label %13, label %15
3451 define arm_aapcs_vfpcc void @aesd_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
3452 ; CHECK-FIX-LABEL: aesd_set64_loop_via_val:
3453 ; CHECK-FIX: @ %bb.0:
3454 ; CHECK-FIX-NEXT: vorr q0, q0, q0
3455 ; CHECK-FIX-NEXT: cmp r0, #0
3456 ; CHECK-FIX-NEXT: bxeq lr
3457 ; CHECK-FIX-NEXT: .LBB79_1:
3458 ; CHECK-FIX-NEXT: vmov.32 d0[0], r2
3459 ; CHECK-FIX-NEXT: ldr r1, [sp]
3460 ; CHECK-FIX-NEXT: vmov.32 d0[1], r3
3461 ; CHECK-FIX-NEXT: .LBB79_2: @ =>This Inner Loop Header: Depth=1
3462 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
3463 ; CHECK-FIX-NEXT: subs r0, r0, #1
3464 ; CHECK-FIX-NEXT: vmov.32 d16[0], r2
3465 ; CHECK-FIX-NEXT: vmov.32 d16[1], r3
3466 ; CHECK-FIX-NEXT: aesd.8 q8, q0
3467 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
3468 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
3469 ; CHECK-FIX-NEXT: bne .LBB79_2
3470 ; CHECK-FIX-NEXT: @ %bb.3:
3471 ; CHECK-FIX-NEXT: bx lr
3472 %5 = icmp eq i32 %0, 0
3473 br i1 %5, label %12, label %6
3476 %7 = bitcast <16 x i8> %2 to <2 x i64>
3477 %8 = insertelement <2 x i64> %7, i64 %1, i64 0
3478 %9 = bitcast <2 x i64> %8 to <16 x i8>
3479 %10 = bitcast <16 x i8>* %3 to <2 x i64>*
3480 %11 = bitcast <16 x i8>* %3 to i64*
3487 %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
3488 %15 = load <2 x i64>, <2 x i64>* %10, align 8
3489 %16 = insertelement <2 x i64> %15, i64 %1, i64 0
3490 %17 = bitcast <2 x i64> %16 to <16 x i8>
3491 store i64 %1, i64* %11, align 8
3492 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
3493 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
3494 store <16 x i8> %19, <16 x i8>* %3, align 8
3495 %20 = add nuw i32 %14, 1
3496 %21 = icmp eq i32 %20, %0
3497 br i1 %21, label %12, label %13
3500 define arm_aapcs_vfpcc void @aesd_setf16_via_ptr(half* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
3501 ; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_via_ptr:
3502 ; CHECK-FIX-NOSCHED: @ %bb.0:
3503 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
3504 ; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0]
3505 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
3506 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0
3507 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0
3508 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
3509 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
3510 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
3511 ; CHECK-FIX-NOSCHED-NEXT: bx lr
3513 ; CHECK-CORTEX-FIX-LABEL: aesd_setf16_via_ptr:
3514 ; CHECK-CORTEX-FIX: @ %bb.0:
3515 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
3516 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
3517 ; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0]
3518 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0
3519 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0
3520 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
3521 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
3522 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
3523 ; CHECK-CORTEX-FIX-NEXT: bx lr
3524 %4 = bitcast half* %0 to i16*
3525 %5 = load i16, i16* %4, align 2
3526 %6 = bitcast <16 x i8>* %2 to <8 x i16>*
3527 %7 = load <8 x i16>, <8 x i16>* %6, align 8
3528 %8 = insertelement <8 x i16> %7, i16 %5, i64 0
3529 %9 = bitcast <8 x i16> %8 to <16 x i8>
3530 %10 = bitcast <16 x i8> %1 to <8 x i16>
3531 %11 = insertelement <8 x i16> %10, i16 %5, i64 0
3532 %12 = bitcast <8 x i16> %11 to <16 x i8>
3533 %13 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %9, <16 x i8> %12)
3534 %14 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %13)
3535 store <16 x i8> %14, <16 x i8>* %2, align 8
3539 define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
3540 ; CHECK-FIX-LABEL: aesd_setf16_via_val:
3541 ; CHECK-FIX: @ %bb.0:
3542 ; CHECK-FIX-NEXT: vorr q1, q1, q1
3543 ; CHECK-FIX-NEXT: vmov r1, s0
3544 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0]
3545 ; CHECK-FIX-NEXT: vmov.16 d2[0], r1
3546 ; CHECK-FIX-NEXT: vmov.16 d16[0], r1
3547 ; CHECK-FIX-NEXT: aesd.8 q8, q1
3548 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
3549 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
3550 ; CHECK-FIX-NEXT: bx lr
3551 %4 = bitcast <16 x i8>* %2 to <8 x i16>*
3552 %5 = load <8 x i16>, <8 x i16>* %4, align 8
3553 %6 = bitcast half %0 to i16
3554 %7 = insertelement <8 x i16> %5, i16 %6, i64 0
3555 %8 = bitcast <8 x i16> %7 to <16 x i8>
3556 %9 = bitcast <16 x i8> %1 to <8 x i16>
3557 %10 = insertelement <8 x i16> %9, i16 %6, i64 0
3558 %11 = bitcast <8 x i16> %10 to <16 x i8>
3559 %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
3560 %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
3561 store <16 x i8> %13, <16 x i8>* %2, align 8
3565 define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
3566 ; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_ptr:
3567 ; CHECK-FIX-NOSCHED: @ %bb.0:
3568 ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3569 ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3570 ; CHECK-FIX-NOSCHED-NEXT: .pad #24
3571 ; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24
3572 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3573 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB82_3
3574 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
3575 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
3576 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
3577 ; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17
3578 ; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d16[0]}, [r1:16]
3579 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0]
3580 ; CHECK-FIX-NOSCHED-NEXT: uxth r4, r3
3581 ; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
3582 ; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #8] @ 4-byte Spill
3583 ; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7
3584 ; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill
3585 ; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6
3586 ; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16
3587 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
3588 ; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill
3589 ; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
3590 ; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16
3591 ; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill
3592 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
3593 ; CHECK-FIX-NOSCHED-NEXT: uxth r10, r5
3594 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3595 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB82_4
3596 ; CHECK-FIX-NOSCHED-NEXT: .LBB82_2:
3597 ; CHECK-FIX-NOSCHED-NEXT: vmov r4, r6, d1
3598 ; CHECK-FIX-NOSCHED-NEXT: vmov r0, r3, d0
3599 ; CHECK-FIX-NOSCHED-NEXT: lsr r5, r4, #16
3600 ; CHECK-FIX-NOSCHED-NEXT: lsr r1, r6, #16
3601 ; CHECK-FIX-NOSCHED-NEXT: uxth r11, r6
3602 ; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
3603 ; CHECK-FIX-NOSCHED-NEXT: lsr r12, r3, #16
3604 ; CHECK-FIX-NOSCHED-NEXT: uxth r9, r4
3605 ; CHECK-FIX-NOSCHED-NEXT: uxth r6, r3
3606 ; CHECK-FIX-NOSCHED-NEXT: b .LBB82_5
3607 ; CHECK-FIX-NOSCHED-NEXT: .LBB82_3:
3608 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #14]
3609 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill
3610 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #12]
3611 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill
3612 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #8]
3613 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill
3614 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #6]
3615 ; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r2, #10]
3616 ; CHECK-FIX-NOSCHED-NEXT: ldrh r10, [r2]
3617 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
3618 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #4]
3619 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
3620 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r2, #2]
3621 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
3622 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3623 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB82_2
3624 ; CHECK-FIX-NOSCHED-NEXT: .LBB82_4:
3625 ; CHECK-FIX-NOSCHED-NEXT: vmov r5, r3, d1
3626 ; CHECK-FIX-NOSCHED-NEXT: mov r4, r7
3627 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 r7, d0[1]
3628 ; CHECK-FIX-NOSCHED-NEXT: vld1.16 {d0[0]}, [r1:16]
3629 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 r0, d0[0]
3630 ; CHECK-FIX-NOSCHED-NEXT: uxth r9, r5
3631 ; CHECK-FIX-NOSCHED-NEXT: uxth r11, r3
3632 ; CHECK-FIX-NOSCHED-NEXT: uxth r6, r7
3633 ; CHECK-FIX-NOSCHED-NEXT: lsr r12, r7, #16
3634 ; CHECK-FIX-NOSCHED-NEXT: lsr r1, r3, #16
3635 ; CHECK-FIX-NOSCHED-NEXT: lsr r5, r5, #16
3636 ; CHECK-FIX-NOSCHED-NEXT: mov r7, r4
3637 ; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
3638 ; CHECK-FIX-NOSCHED-NEXT: .LBB82_5:
3639 ; CHECK-FIX-NOSCHED-NEXT: uxth r8, r0
3640 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp] @ 4-byte Reload
3641 ; CHECK-FIX-NOSCHED-NEXT: ldr r3, [sp, #4] @ 4-byte Reload
3642 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r0, lsl #16
3643 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
3644 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r8, lr, lsl #16
3645 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
3646 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r6, r12, lsl #16
3647 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0
3648 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
3649 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r3, lsl #16
3650 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
3651 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r9, r5, lsl #16
3652 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
3653 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
3654 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r7, lsl #16
3655 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
3656 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r1, lsl #16
3657 ; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
3658 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
3659 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
3660 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r1, lsl #16
3661 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
3662 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q9
3663 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
3664 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
3665 ; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #24
3666 ; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
3668 ; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_ptr:
3669 ; CHECK-CORTEX-FIX: @ %bb.0:
3670 ; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3671 ; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3672 ; CHECK-CORTEX-FIX-NEXT: .pad #24
3673 ; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #24
3674 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
3675 ; CHECK-CORTEX-FIX-NEXT: beq .LBB82_3
3676 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
3677 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
3678 ; CHECK-CORTEX-FIX-NEXT: vorr q9, q8, q8
3679 ; CHECK-CORTEX-FIX-NEXT: vld1.16 {d18[0]}, [r1:16]
3680 ; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d18[0]
3681 ; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
3682 ; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
3683 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
3684 ; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1]
3685 ; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #20] @ 4-byte Spill
3686 ; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
3687 ; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
3688 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
3689 ; CHECK-CORTEX-FIX-NEXT: vmov r3, r6, d17
3690 ; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #8] @ 4-byte Spill
3691 ; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
3692 ; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
3693 ; CHECK-CORTEX-FIX-NEXT: uxth r11, r6
3694 ; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
3695 ; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill
3696 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
3697 ; CHECK-CORTEX-FIX-NEXT: bne .LBB82_4
3698 ; CHECK-CORTEX-FIX-NEXT: .LBB82_2:
3699 ; CHECK-CORTEX-FIX-NEXT: vmov r1, r7, d0
3700 ; CHECK-CORTEX-FIX-NEXT: uxth r0, r1
3701 ; CHECK-CORTEX-FIX-NEXT: uxth r6, r7
3702 ; CHECK-CORTEX-FIX-NEXT: lsr r12, r7, #16
3703 ; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16
3704 ; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
3705 ; CHECK-CORTEX-FIX-NEXT: mov r0, r3
3706 ; CHECK-CORTEX-FIX-NEXT: vmov r7, r3, d1
3707 ; CHECK-CORTEX-FIX-NEXT: uxth r10, r7
3708 ; CHECK-CORTEX-FIX-NEXT: lsr r5, r7, #16
3709 ; CHECK-CORTEX-FIX-NEXT: uxth lr, r3
3710 ; CHECK-CORTEX-FIX-NEXT: lsr r8, r3, #16
3711 ; CHECK-CORTEX-FIX-NEXT: mov r3, r0
3712 ; CHECK-CORTEX-FIX-NEXT: b .LBB82_5
3713 ; CHECK-CORTEX-FIX-NEXT: .LBB82_3:
3714 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2]
3715 ; CHECK-CORTEX-FIX-NEXT: ldrh r11, [r2, #12]
3716 ; CHECK-CORTEX-FIX-NEXT: ldrh r4, [r2, #14]
3717 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
3718 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #2]
3719 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
3720 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #4]
3721 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
3722 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #6]
3723 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
3724 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #8]
3725 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
3726 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r2, #10]
3727 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
3728 ; CHECK-CORTEX-FIX-NEXT: beq .LBB82_2
3729 ; CHECK-CORTEX-FIX-NEXT: .LBB82_4:
3730 ; CHECK-CORTEX-FIX-NEXT: vorr q8, q0, q0
3731 ; CHECK-CORTEX-FIX-NEXT: vmov.32 r5, d0[1]
3732 ; CHECK-CORTEX-FIX-NEXT: vld1.16 {d16[0]}, [r1:16]
3733 ; CHECK-CORTEX-FIX-NEXT: uxth r6, r5
3734 ; CHECK-CORTEX-FIX-NEXT: lsr r12, r5, #16
3735 ; CHECK-CORTEX-FIX-NEXT: vmov r5, r7, d1
3736 ; CHECK-CORTEX-FIX-NEXT: vmov.32 r1, d16[0]
3737 ; CHECK-CORTEX-FIX-NEXT: uxth r10, r5
3738 ; CHECK-CORTEX-FIX-NEXT: lsr r5, r5, #16
3739 ; CHECK-CORTEX-FIX-NEXT: uxth lr, r7
3740 ; CHECK-CORTEX-FIX-NEXT: lsr r8, r7, #16
3741 ; CHECK-CORTEX-FIX-NEXT: uxth r0, r1
3742 ; CHECK-CORTEX-FIX-NEXT: lsr r9, r1, #16
3743 ; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
3744 ; CHECK-CORTEX-FIX-NEXT: .LBB82_5:
3745 ; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
3746 ; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
3747 ; CHECK-CORTEX-FIX-NEXT: pkhbt r11, r11, r4, lsl #16
3748 ; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #16] @ 4-byte Reload
3749 ; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16
3750 ; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r10, r5, lsl #16
3751 ; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r1, lsl #16
3752 ; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
3753 ; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16
3754 ; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r1, r3, lsl #16
3755 ; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #20] @ 4-byte Reload
3756 ; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r3, r4, lsl #16
3757 ; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp] @ 4-byte Reload
3758 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r4
3759 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r1
3760 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7
3761 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r11
3762 ; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r9, lsl #16
3763 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r3
3764 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r5
3765 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r6
3766 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r0
3767 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q9, q8
3768 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q9
3769 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
3770 ; CHECK-CORTEX-FIX-NEXT: add sp, sp, #24
3771 ; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
3772 br i1 %0, label %5, label %12
3775 %6 = bitcast half* %1 to i16*
3776 %7 = load i16, i16* %6, align 2
3777 %8 = bitcast <16 x i8>* %3 to <8 x i16>*
3778 %9 = load <8 x i16>, <8 x i16>* %8, align 8
3779 %10 = insertelement <8 x i16> %9, i16 %7, i64 0
3780 %11 = bitcast <8 x i16> %10 to <8 x half>
3784 %13 = bitcast <16 x i8>* %3 to <8 x half>*
3785 %14 = load <8 x half>, <8 x half>* %13, align 8
3789 %16 = phi <8 x half> [ %11, %5 ], [ %14, %12 ]
3790 br i1 %0, label %17, label %23
3793 %18 = bitcast half* %1 to i16*
3794 %19 = load i16, i16* %18, align 2
3795 %20 = bitcast <16 x i8> %2 to <8 x i16>
3796 %21 = insertelement <8 x i16> %20, i16 %19, i64 0
3797 %22 = bitcast <8 x i16> %21 to <8 x half>
3801 %24 = bitcast <16 x i8> %2 to <8 x half>
3805 %26 = phi <8 x half> [ %22, %17 ], [ %24, %23 ]
3806 %27 = bitcast <8 x half> %16 to <16 x i8>
3807 %28 = bitcast <8 x half> %26 to <16 x i8>
3808 %29 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %27, <16 x i8> %28)
3809 %30 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %29)
3810 store <16 x i8> %30, <16 x i8>* %3, align 8
3814 define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
3815 ; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_val:
3816 ; CHECK-FIX-NOSCHED: @ %bb.0:
3817 ; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3818 ; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3819 ; CHECK-FIX-NOSCHED-NEXT: .pad #24
3820 ; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #24
3821 ; CHECK-FIX-NOSCHED-NEXT: vmov r12, s0
3822 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3823 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB83_2
3824 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
3825 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1]
3826 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 r3, d16[1]
3827 ; CHECK-FIX-NOSCHED-NEXT: vmov r7, r6, d17
3828 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r12
3829 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 r5, d16[0]
3830 ; CHECK-FIX-NOSCHED-NEXT: uxth r4, r7
3831 ; CHECK-FIX-NOSCHED-NEXT: uxth r2, r3
3832 ; CHECK-FIX-NOSCHED-NEXT: lsr r3, r3, #16
3833 ; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #12] @ 4-byte Spill
3834 ; CHECK-FIX-NOSCHED-NEXT: uxth r4, r6
3835 ; CHECK-FIX-NOSCHED-NEXT: lsr r6, r6, #16
3836 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill
3837 ; CHECK-FIX-NOSCHED-NEXT: lsr r7, r7, #16
3838 ; CHECK-FIX-NOSCHED-NEXT: lsr r3, r5, #16
3839 ; CHECK-FIX-NOSCHED-NEXT: str r4, [sp, #20] @ 4-byte Spill
3840 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
3841 ; CHECK-FIX-NOSCHED-NEXT: uxth r3, r5
3842 ; CHECK-FIX-NOSCHED-NEXT: str r6, [sp, #16] @ 4-byte Spill
3843 ; CHECK-FIX-NOSCHED-NEXT: b .LBB83_3
3844 ; CHECK-FIX-NOSCHED-NEXT: .LBB83_2:
3845 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #14]
3846 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #16] @ 4-byte Spill
3847 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #12]
3848 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #20] @ 4-byte Spill
3849 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #8]
3850 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #12] @ 4-byte Spill
3851 ; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #6]
3852 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1, #2]
3853 ; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill
3854 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill
3855 ; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1, #10]
3856 ; CHECK-FIX-NOSCHED-NEXT: ldrh r2, [r1, #4]
3857 ; CHECK-FIX-NOSCHED-NEXT: ldrh r3, [r1]
3858 ; CHECK-FIX-NOSCHED-NEXT: .LBB83_3:
3859 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
3860 ; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill
3861 ; CHECK-FIX-NOSCHED-NEXT: beq .LBB83_5
3862 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.4:
3863 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 r6, d2[1]
3864 ; CHECK-FIX-NOSCHED-NEXT: mov r3, r2
3865 ; CHECK-FIX-NOSCHED-NEXT: mov r2, r7
3866 ; CHECK-FIX-NOSCHED-NEXT: vmov r4, r7, d3
3867 ; CHECK-FIX-NOSCHED-NEXT: vmov.16 d2[0], r12
3868 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 r0, d2[0]
3869 ; CHECK-FIX-NOSCHED-NEXT: uxth r5, r6
3870 ; CHECK-FIX-NOSCHED-NEXT: lsr r12, r6, #16
3871 ; CHECK-FIX-NOSCHED-NEXT: uxth r10, r4
3872 ; CHECK-FIX-NOSCHED-NEXT: uxth r11, r7
3873 ; CHECK-FIX-NOSCHED-NEXT: lsr r9, r7, #16
3874 ; CHECK-FIX-NOSCHED-NEXT: mov r7, r2
3875 ; CHECK-FIX-NOSCHED-NEXT: mov r2, r3
3876 ; CHECK-FIX-NOSCHED-NEXT: lsr r4, r4, #16
3877 ; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
3878 ; CHECK-FIX-NOSCHED-NEXT: b .LBB83_6
3879 ; CHECK-FIX-NOSCHED-NEXT: .LBB83_5:
3880 ; CHECK-FIX-NOSCHED-NEXT: vmov r3, r6, d3
3881 ; CHECK-FIX-NOSCHED-NEXT: vmov r0, r5, d2
3882 ; CHECK-FIX-NOSCHED-NEXT: lsr r4, r3, #16
3883 ; CHECK-FIX-NOSCHED-NEXT: lsr r9, r6, #16
3884 ; CHECK-FIX-NOSCHED-NEXT: lsr r12, r5, #16
3885 ; CHECK-FIX-NOSCHED-NEXT: lsr lr, r0, #16
3886 ; CHECK-FIX-NOSCHED-NEXT: uxth r11, r6
3887 ; CHECK-FIX-NOSCHED-NEXT: uxth r10, r3
3888 ; CHECK-FIX-NOSCHED-NEXT: uxth r5, r5
3889 ; CHECK-FIX-NOSCHED-NEXT: .LBB83_6:
3890 ; CHECK-FIX-NOSCHED-NEXT: uxth r8, r0
3891 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
3892 ; CHECK-FIX-NOSCHED-NEXT: ldr r3, [sp] @ 4-byte Reload
3893 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r3, r0, lsl #16
3894 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0
3895 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r8, lr, lsl #16
3896 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0
3897 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r5, r12, lsl #16
3898 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0
3899 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
3900 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16
3901 ; CHECK-FIX-NOSCHED-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
3902 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0
3903 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r4, lsl #16
3904 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0
3905 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
3906 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r7, lsl #16
3907 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0
3908 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r9, lsl #16
3909 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0
3910 ; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #20] @ 4-byte Reload
3911 ; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r2, lsl #16
3912 ; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0
3913 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q9
3914 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
3915 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
3916 ; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #24
3917 ; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
3919 ; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_val:
3920 ; CHECK-CORTEX-FIX: @ %bb.0:
3921 ; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3922 ; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3923 ; CHECK-CORTEX-FIX-NEXT: .pad #28
3924 ; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #28
3925 ; CHECK-CORTEX-FIX-NEXT: vmov r2, s0
3926 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
3927 ; CHECK-CORTEX-FIX-NEXT: beq .LBB83_2
3928 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
3929 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1]
3930 ; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d16[1]
3931 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r2
3932 ; CHECK-CORTEX-FIX-NEXT: vmov.32 r7, d16[0]
3933 ; CHECK-CORTEX-FIX-NEXT: uxth r6, r7
3934 ; CHECK-CORTEX-FIX-NEXT: lsr r7, r7, #16
3935 ; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #20] @ 4-byte Spill
3936 ; CHECK-CORTEX-FIX-NEXT: uxth r7, r3
3937 ; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
3938 ; CHECK-CORTEX-FIX-NEXT: str r6, [sp, #24] @ 4-byte Spill
3939 ; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #8] @ 4-byte Spill
3940 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
3941 ; CHECK-CORTEX-FIX-NEXT: vmov r3, r7, d17
3942 ; CHECK-CORTEX-FIX-NEXT: uxth r6, r3
3943 ; CHECK-CORTEX-FIX-NEXT: lsr r3, r3, #16
3944 ; CHECK-CORTEX-FIX-NEXT: uxth r11, r7
3945 ; CHECK-CORTEX-FIX-NEXT: lsr r7, r7, #16
3946 ; CHECK-CORTEX-FIX-NEXT: str r6, [sp, #16] @ 4-byte Spill
3947 ; CHECK-CORTEX-FIX-NEXT: b .LBB83_3
3948 ; CHECK-CORTEX-FIX-NEXT: .LBB83_2:
3949 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1]
3950 ; CHECK-CORTEX-FIX-NEXT: ldrh r11, [r1, #12]
3951 ; CHECK-CORTEX-FIX-NEXT: ldrh r7, [r1, #14]
3952 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #24] @ 4-byte Spill
3953 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #2]
3954 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill
3955 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #4]
3956 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill
3957 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #6]
3958 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill
3959 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #8]
3960 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill
3961 ; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1, #10]
3962 ; CHECK-CORTEX-FIX-NEXT: .LBB83_3:
3963 ; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill
3964 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
3965 ; CHECK-CORTEX-FIX-NEXT: beq .LBB83_5
3966 ; CHECK-CORTEX-FIX-NEXT: @ %bb.4:
3967 ; CHECK-CORTEX-FIX-NEXT: vmov.32 r3, d2[1]
3968 ; CHECK-CORTEX-FIX-NEXT: vmov.16 d2[0], r2
3969 ; CHECK-CORTEX-FIX-NEXT: vmov r4, r6, d3
3970 ; CHECK-CORTEX-FIX-NEXT: uxth r10, r4
3971 ; CHECK-CORTEX-FIX-NEXT: lsr r4, r4, #16
3972 ; CHECK-CORTEX-FIX-NEXT: uxth lr, r6
3973 ; CHECK-CORTEX-FIX-NEXT: lsr r8, r6, #16
3974 ; CHECK-CORTEX-FIX-NEXT: uxth r5, r3
3975 ; CHECK-CORTEX-FIX-NEXT: lsr r12, r3, #16
3976 ; CHECK-CORTEX-FIX-NEXT: vmov.32 r2, d2[0]
3977 ; CHECK-CORTEX-FIX-NEXT: uxth r0, r2
3978 ; CHECK-CORTEX-FIX-NEXT: lsr r9, r2, #16
3979 ; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
3980 ; CHECK-CORTEX-FIX-NEXT: b .LBB83_6
3981 ; CHECK-CORTEX-FIX-NEXT: .LBB83_5:
3982 ; CHECK-CORTEX-FIX-NEXT: vmov r2, r3, d2
3983 ; CHECK-CORTEX-FIX-NEXT: uxth r0, r2
3984 ; CHECK-CORTEX-FIX-NEXT: lsr r9, r2, #16
3985 ; CHECK-CORTEX-FIX-NEXT: uxth r5, r3
3986 ; CHECK-CORTEX-FIX-NEXT: lsr r12, r3, #16
3987 ; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill
3988 ; CHECK-CORTEX-FIX-NEXT: mov r0, r7
3989 ; CHECK-CORTEX-FIX-NEXT: vmov r6, r7, d3
3990 ; CHECK-CORTEX-FIX-NEXT: uxth r10, r6
3991 ; CHECK-CORTEX-FIX-NEXT: lsr r4, r6, #16
3992 ; CHECK-CORTEX-FIX-NEXT: uxth lr, r7
3993 ; CHECK-CORTEX-FIX-NEXT: lsr r8, r7, #16
3994 ; CHECK-CORTEX-FIX-NEXT: mov r7, r0
3995 ; CHECK-CORTEX-FIX-NEXT: .LBB83_6:
3996 ; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
3997 ; CHECK-CORTEX-FIX-NEXT: ldr r2, [sp, #4] @ 4-byte Reload
3998 ; CHECK-CORTEX-FIX-NEXT: pkhbt r11, r11, r7, lsl #16
3999 ; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #12] @ 4-byte Reload
4000 ; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp, #20] @ 4-byte Reload
4001 ; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r5, r12, lsl #16
4002 ; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r10, r4, lsl #16
4003 ; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r2, lsl #16
4004 ; CHECK-CORTEX-FIX-NEXT: ldr r2, [sp, #16] @ 4-byte Reload
4005 ; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16
4006 ; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r2, r3, lsl #16
4007 ; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #24] @ 4-byte Reload
4008 ; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r6, lsl #16
4009 ; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp] @ 4-byte Reload
4010 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r3
4011 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r2
4012 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7
4013 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r11
4014 ; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r9, lsl #16
4015 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r6
4016 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r4
4017 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r5
4018 ; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r0
4019 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q9, q8
4020 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q9
4021 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
4022 ; CHECK-CORTEX-FIX-NEXT: add sp, sp, #28
4023 ; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
4024 br i1 %0, label %5, label %11
4027 %6 = bitcast <16 x i8>* %3 to <8 x i16>*
4028 %7 = load <8 x i16>, <8 x i16>* %6, align 8
4029 %8 = bitcast half %1 to i16
4030 %9 = insertelement <8 x i16> %7, i16 %8, i64 0
4031 %10 = bitcast <8 x i16> %9 to <8 x half>
4035 %12 = bitcast <16 x i8>* %3 to <8 x half>*
4036 %13 = load <8 x half>, <8 x half>* %12, align 8
4040 %15 = phi <8 x half> [ %10, %5 ], [ %13, %11 ]
4041 br i1 %0, label %16, label %21
4044 %17 = bitcast <16 x i8> %2 to <8 x i16>
4045 %18 = bitcast half %1 to i16
4046 %19 = insertelement <8 x i16> %17, i16 %18, i64 0
4047 %20 = bitcast <8 x i16> %19 to <8 x half>
4051 %22 = bitcast <16 x i8> %2 to <8 x half>
4055 %24 = phi <8 x half> [ %20, %16 ], [ %22, %21 ]
4056 %25 = bitcast <8 x half> %15 to <16 x i8>
4057 %26 = bitcast <8 x half> %24 to <16 x i8>
4058 %27 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %25, <16 x i8> %26)
4059 %28 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %27)
4060 store <16 x i8> %28, <16 x i8>* %3, align 8
4064 define arm_aapcs_vfpcc void @aesd_setf16_loop_via_ptr(i32 %0, half* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
4065 ; CHECK-FIX-LABEL: aesd_setf16_loop_via_ptr:
4066 ; CHECK-FIX: @ %bb.0:
4067 ; CHECK-FIX-NEXT: vorr q0, q0, q0
4068 ; CHECK-FIX-NEXT: ldrh r1, [r1]
4069 ; CHECK-FIX-NEXT: cmp r0, #0
4070 ; CHECK-FIX-NEXT: strh r1, [r2]
4071 ; CHECK-FIX-NEXT: bxeq lr
4072 ; CHECK-FIX-NEXT: .LBB84_1:
4073 ; CHECK-FIX-NEXT: vmov.16 d0[0], r1
4074 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
4075 ; CHECK-FIX-NEXT: .LBB84_2: @ =>This Inner Loop Header: Depth=1
4076 ; CHECK-FIX-NEXT: aesd.8 q8, q0
4077 ; CHECK-FIX-NEXT: subs r0, r0, #1
4078 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
4079 ; CHECK-FIX-NEXT: bne .LBB84_2
4080 ; CHECK-FIX-NEXT: @ %bb.3:
4081 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
4082 ; CHECK-FIX-NEXT: bx lr
4083 %5 = bitcast half* %1 to i16*
4084 %6 = load i16, i16* %5, align 2
4085 %7 = bitcast <16 x i8> %2 to <8 x i16>
4086 %8 = insertelement <8 x i16> %7, i16 %6, i64 0
4087 %9 = bitcast <8 x i16> %8 to <16 x i8>
4088 %10 = bitcast <16 x i8>* %3 to i16*
4089 store i16 %6, i16* %10, align 8
4090 %11 = icmp eq i32 %0, 0
4091 br i1 %11, label %15, label %12
4094 %13 = load <16 x i8>, <16 x i8>* %3, align 8
4098 store <16 x i8> %20, <16 x i8>* %3, align 8
4105 %17 = phi <16 x i8> [ %13, %12 ], [ %20, %16 ]
4106 %18 = phi i32 [ 0, %12 ], [ %21, %16 ]
4107 %19 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
4108 %20 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %19)
4109 %21 = add nuw i32 %18, 1
4110 %22 = icmp eq i32 %21, %0
4111 br i1 %22, label %14, label %16
4114 define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
4115 ; CHECK-FIX-LABEL: aesd_setf16_loop_via_val:
4116 ; CHECK-FIX: @ %bb.0:
4117 ; CHECK-FIX-NEXT: vorr q1, q1, q1
4118 ; CHECK-FIX-NEXT: cmp r0, #0
4119 ; CHECK-FIX-NEXT: bxeq lr
4120 ; CHECK-FIX-NEXT: .LBB85_1:
4121 ; CHECK-FIX-NEXT: vmov r2, s0
4122 ; CHECK-FIX-NEXT: vmov.16 d2[0], r2
4123 ; CHECK-FIX-NEXT: .LBB85_2: @ =>This Inner Loop Header: Depth=1
4124 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1]
4125 ; CHECK-FIX-NEXT: subs r0, r0, #1
4126 ; CHECK-FIX-NEXT: vmov.16 d16[0], r2
4127 ; CHECK-FIX-NEXT: aesd.8 q8, q1
4128 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
4129 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
4130 ; CHECK-FIX-NEXT: bne .LBB85_2
4131 ; CHECK-FIX-NEXT: @ %bb.3:
4132 ; CHECK-FIX-NEXT: bx lr
4133 %5 = icmp eq i32 %0, 0
4134 br i1 %5, label %13, label %6
4137 %7 = bitcast <16 x i8> %2 to <8 x i16>
4138 %8 = bitcast half %1 to i16
4139 %9 = insertelement <8 x i16> %7, i16 %8, i64 0
4140 %10 = bitcast <8 x i16> %9 to <16 x i8>
4141 %11 = bitcast <16 x i8>* %3 to <8 x i16>*
4142 %12 = bitcast <16 x i8>* %3 to half*
4149 %15 = phi i32 [ 0, %6 ], [ %21, %14 ]
4150 %16 = load <8 x i16>, <8 x i16>* %11, align 8
4151 %17 = insertelement <8 x i16> %16, i16 %8, i64 0
4152 %18 = bitcast <8 x i16> %17 to <16 x i8>
4153 store half %1, half* %12, align 8
4154 %19 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %18, <16 x i8> %10)
4155 %20 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %19)
4156 store <16 x i8> %20, <16 x i8>* %3, align 8
4157 %21 = add nuw i32 %15, 1
4158 %22 = icmp eq i32 %21, %0
4159 br i1 %22, label %13, label %14
4162 define arm_aapcs_vfpcc void @aesd_setf32_via_ptr(float* %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
4163 ; CHECK-FIX-LABEL: aesd_setf32_via_ptr:
4164 ; CHECK-FIX: @ %bb.0:
4165 ; CHECK-FIX-NEXT: vldr s0, [r0]
4166 ; CHECK-FIX-NEXT: vld1.64 {d2, d3}, [r1]
4167 ; CHECK-FIX-NEXT: vmov.f32 s4, s0
4168 ; CHECK-FIX-NEXT: vorr q1, q1, q1
4169 ; CHECK-FIX-NEXT: vorr q0, q0, q0
4170 ; CHECK-FIX-NEXT: aesd.8 q1, q0
4171 ; CHECK-FIX-NEXT: aesimc.8 q8, q1
4172 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1]
4173 ; CHECK-FIX-NEXT: bx lr
4174 %4 = load float, float* %0, align 4
4175 %5 = bitcast <16 x i8>* %2 to <4 x float>*
4176 %6 = load <4 x float>, <4 x float>* %5, align 8
4177 %7 = insertelement <4 x float> %6, float %4, i64 0
4178 %8 = bitcast <4 x float> %7 to <16 x i8>
4179 %9 = bitcast <16 x i8> %1 to <4 x float>
4180 %10 = insertelement <4 x float> %9, float %4, i64 0
4181 %11 = bitcast <4 x float> %10 to <16 x i8>
4182 %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
4183 %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
4184 store <16 x i8> %13, <16 x i8>* %2, align 8
4188 define arm_aapcs_vfpcc void @aesd_setf32_via_val(float %0, <16 x i8> %1, <16 x i8>* %2) nounwind {
4189 ; CHECK-FIX-LABEL: aesd_setf32_via_val:
4190 ; CHECK-FIX: @ %bb.0:
4191 ; CHECK-FIX-NEXT: vmov.f32 s4, s0
4192 ; CHECK-FIX-NEXT: vld1.64 {d0, d1}, [r0]
4193 ; CHECK-FIX-NEXT: vmov.f32 s0, s4
4194 ; CHECK-FIX-NEXT: vorr q0, q0, q0
4195 ; CHECK-FIX-NEXT: vorr q1, q1, q1
4196 ; CHECK-FIX-NEXT: aesd.8 q0, q1
4197 ; CHECK-FIX-NEXT: aesimc.8 q8, q0
4198 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0]
4199 ; CHECK-FIX-NEXT: bx lr
4200 %4 = bitcast <16 x i8>* %2 to <4 x float>*
4201 %5 = load <4 x float>, <4 x float>* %4, align 8
4202 %6 = insertelement <4 x float> %5, float %0, i64 0
4203 %7 = bitcast <4 x float> %6 to <16 x i8>
4204 %8 = bitcast <16 x i8> %1 to <4 x float>
4205 %9 = insertelement <4 x float> %8, float %0, i64 0
4206 %10 = bitcast <4 x float> %9 to <16 x i8>
4207 %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
4208 %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
4209 store <16 x i8> %12, <16 x i8>* %2, align 8
4213 define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
4214 ; CHECK-FIX-LABEL: aesd_setf32_cond_via_ptr:
4215 ; CHECK-FIX: @ %bb.0:
4216 ; CHECK-FIX-NEXT: vorr q0, q0, q0
4217 ; CHECK-FIX-NEXT: cmp r0, #0
4218 ; CHECK-FIX-NEXT: beq .LBB88_2
4219 ; CHECK-FIX-NEXT: @ %bb.1:
4220 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
4221 ; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32]
4222 ; CHECK-FIX-NEXT: cmp r0, #0
4223 ; CHECK-FIX-NEXT: bne .LBB88_3
4224 ; CHECK-FIX-NEXT: b .LBB88_4
4225 ; CHECK-FIX-NEXT: .LBB88_2:
4226 ; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2]
4227 ; CHECK-FIX-NEXT: cmp r0, #0
4228 ; CHECK-FIX-NEXT: beq .LBB88_4
4229 ; CHECK-FIX-NEXT: .LBB88_3:
4230 ; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32]
4231 ; CHECK-FIX-NEXT: .LBB88_4:
4232 ; CHECK-FIX-NEXT: aesd.8 q8, q0
4233 ; CHECK-FIX-NEXT: aesimc.8 q8, q8
4234 ; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2]
4235 ; CHECK-FIX-NEXT: bx lr
4236 br i1 %0, label %5, label %10
4239 %6 = load float, float* %1, align 4
4240 %7 = bitcast <16 x i8>* %3 to <4 x float>*
4241 %8 = load <4 x float>, <4 x float>* %7, align 8
4242 %9 = insertelement <4 x float> %8, float %6, i64 0
4246 %11 = bitcast <16 x i8>* %3 to <4 x float>*
4247 %12 = load <4 x float>, <4 x float>* %11, align 8
4251 %14 = phi <4 x float> [ %9, %5 ], [ %12, %10 ]
4252 br i1 %0, label %15, label %19
4255 %16 = load float, float* %1, align 4
4256 %17 = bitcast <16 x i8> %2 to <4 x float>
4257 %18 = insertelement <4 x float> %17, float %16, i64 0
4261 %20 = bitcast <16 x i8> %2 to <4 x float>
4265 %22 = phi <4 x float> [ %18, %15 ], [ %20, %19 ]
4266 %23 = bitcast <4 x float> %14 to <16 x i8>
4267 %24 = bitcast <4 x float> %22 to <16 x i8>
4268 %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
4269 %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
4270 store <16 x i8> %26, <16 x i8>* %3, align 8
4274 define arm_aapcs_vfpcc void @aesd_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
4275 ; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_cond_via_val:
4276 ; CHECK-FIX-NOSCHED: @ %bb.0:
4277 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
4278 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
4279 ; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s8, s0
4280 ; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
4281 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
4282 ; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s4, s0
4283 ; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
4284 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q2, q1
4285 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q2
4286 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
4287 ; CHECK-FIX-NOSCHED-NEXT: bx lr
4289 ; CHECK-CORTEX-FIX-LABEL: aesd_setf32_cond_via_val:
4290 ; CHECK-CORTEX-FIX: @ %bb.0:
4291 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
4292 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
4293 ; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s8, s0
4294 ; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
4295 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
4296 ; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s4, s0
4297 ; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
4298 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q2, q1
4299 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q2
4300 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
4301 ; CHECK-CORTEX-FIX-NEXT: bx lr
4302 %5 = bitcast <16 x i8>* %3 to <4 x float>*
4303 %6 = load <4 x float>, <4 x float>* %5, align 8
4304 %7 = insertelement <4 x float> %6, float %1, i64 0
4305 %8 = select i1 %0, <4 x float> %7, <4 x float> %6
4306 %9 = bitcast <16 x i8> %2 to <4 x float>
4307 %10 = insertelement <4 x float> %9, float %1, i64 0
4308 %11 = select i1 %0, <4 x float> %10, <4 x float> %9
4309 %12 = bitcast <4 x float> %8 to <16 x i8>
4310 %13 = bitcast <4 x float> %11 to <16 x i8>
4311 %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
4312 %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
4313 store <16 x i8> %15, <16 x i8>* %3, align 8
4317 define arm_aapcs_vfpcc void @aesd_setf32_loop_via_ptr(i32 %0, float* %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
4318 ; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_ptr:
4319 ; CHECK-FIX-NOSCHED: @ %bb.0:
4320 ; CHECK-FIX-NOSCHED-NEXT: vldr s4, [r1]
4321 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
4322 ; CHECK-FIX-NOSCHED-NEXT: vstr s4, [r2]
4323 ; CHECK-FIX-NOSCHED-NEXT: bxeq lr
4324 ; CHECK-FIX-NOSCHED-NEXT: .LBB90_1:
4325 ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s0, s4
4326 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2]
4327 ; CHECK-FIX-NOSCHED-NEXT: .LBB90_2: @ =>This Inner Loop Header: Depth=1
4328 ; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0
4329 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0
4330 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
4331 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8
4332 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB90_2
4333 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
4334 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2]
4335 ; CHECK-FIX-NOSCHED-NEXT: bx lr
4337 ; CHECK-CORTEX-FIX-LABEL: aesd_setf32_loop_via_ptr:
4338 ; CHECK-CORTEX-FIX: @ %bb.0:
4339 ; CHECK-CORTEX-FIX-NEXT: vldr s4, [r1]
4340 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
4341 ; CHECK-CORTEX-FIX-NEXT: vstr s4, [r2]
4342 ; CHECK-CORTEX-FIX-NEXT: bxeq lr
4343 ; CHECK-CORTEX-FIX-NEXT: .LBB90_1:
4344 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2]
4345 ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s0, s4
4346 ; CHECK-CORTEX-FIX-NEXT: .LBB90_2: @ =>This Inner Loop Header: Depth=1
4347 ; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0
4348 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0
4349 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
4350 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8
4351 ; CHECK-CORTEX-FIX-NEXT: bne .LBB90_2
4352 ; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
4353 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2]
4354 ; CHECK-CORTEX-FIX-NEXT: bx lr
4355 %5 = load float, float* %1, align 4
4356 %6 = bitcast <16 x i8> %2 to <4 x float>
4357 %7 = insertelement <4 x float> %6, float %5, i64 0
4358 %8 = bitcast <4 x float> %7 to <16 x i8>
4359 %9 = bitcast <16 x i8>* %3 to float*
4360 store float %5, float* %9, align 8
4361 %10 = icmp eq i32 %0, 0
4362 br i1 %10, label %14, label %11
4365 %12 = load <16 x i8>, <16 x i8>* %3, align 8
4369 store <16 x i8> %19, <16 x i8>* %3, align 8
4376 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
4377 %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
4378 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
4379 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
4380 %20 = add nuw i32 %17, 1
4381 %21 = icmp eq i32 %20, %0
4382 br i1 %21, label %13, label %15
4385 define arm_aapcs_vfpcc void @aesd_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, <16 x i8>* %3) nounwind {
4386 ; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_val:
4387 ; CHECK-FIX-NOSCHED: @ %bb.0:
4388 ; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0
4389 ; CHECK-FIX-NOSCHED-NEXT: bxeq lr
4390 ; CHECK-FIX-NOSCHED-NEXT: .LBB91_1:
4391 ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s4, s0
4392 ; CHECK-FIX-NOSCHED-NEXT: .LBB91_2: @ =>This Inner Loop Header: Depth=1
4393 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1]
4394 ; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1
4395 ; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s8, s0
4396 ; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2
4397 ; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1
4398 ; CHECK-FIX-NOSCHED-NEXT: aesd.8 q2, q1
4399 ; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q2
4400 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1]
4401 ; CHECK-FIX-NOSCHED-NEXT: bne .LBB91_2
4402 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.3:
4403 ; CHECK-FIX-NOSCHED-NEXT: bx lr
4405 ; CHECK-CORTEX-FIX-LABEL: aesd_setf32_loop_via_val:
4406 ; CHECK-CORTEX-FIX: @ %bb.0:
4407 ; CHECK-CORTEX-FIX-NEXT: cmp r0, #0
4408 ; CHECK-CORTEX-FIX-NEXT: bxeq lr
4409 ; CHECK-CORTEX-FIX-NEXT: .LBB91_1:
4410 ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s4, s0
4411 ; CHECK-CORTEX-FIX-NEXT: .LBB91_2: @ =>This Inner Loop Header: Depth=1
4412 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1]
4413 ; CHECK-CORTEX-FIX-NEXT: vmov.f32 s8, s0
4414 ; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2
4415 ; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1
4416 ; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1
4417 ; CHECK-CORTEX-FIX-NEXT: aesd.8 q2, q1
4418 ; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q2
4419 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1]
4420 ; CHECK-CORTEX-FIX-NEXT: bne .LBB91_2
4421 ; CHECK-CORTEX-FIX-NEXT: @ %bb.3:
4422 ; CHECK-CORTEX-FIX-NEXT: bx lr
4423 %5 = icmp eq i32 %0, 0
4424 br i1 %5, label %12, label %6
4427 %7 = bitcast <16 x i8> %2 to <4 x float>
4428 %8 = insertelement <4 x float> %7, float %1, i64 0
4429 %9 = bitcast <4 x float> %8 to <16 x i8>
4430 %10 = bitcast <16 x i8>* %3 to <4 x float>*
4431 %11 = bitcast <16 x i8>* %3 to float*
4438 %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
4439 %15 = load <4 x float>, <4 x float>* %10, align 8
4440 %16 = insertelement <4 x float> %15, float %1, i64 0
4441 %17 = bitcast <4 x float> %16 to <16 x i8>
4442 store float %1, float* %11, align 8
4443 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
4444 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
4445 store <16 x i8> %19, <16 x i8>* %3, align 8
4446 %20 = add nuw i32 %14, 1
4447 %21 = icmp eq i32 %20, %0
4448 br i1 %21, label %12, label %13
4451 define arm_aapcs_vfpcc void @aese_constantisland(<16 x i8>* %0) nounwind {
4452 ; CHECK-FIX-NOSCHED-LABEL: aese_constantisland:
4453 ; CHECK-FIX-NOSCHED: @ %bb.0:
4454 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r0]
4455 ; CHECK-FIX-NOSCHED-NEXT: adr r1, .LCPI92_0
4456 ; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d18, d19}, [r1:128]
4457 ; CHECK-FIX-NOSCHED-NEXT: aese.8 q9, q8
4458 ; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q9
4459 ; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r0]
4460 ; CHECK-FIX-NOSCHED-NEXT: bx lr
4461 ; CHECK-FIX-NOSCHED-NEXT: .p2align 4
4462 ; CHECK-FIX-NOSCHED-NEXT: @ %bb.1:
4463 ; CHECK-FIX-NOSCHED-NEXT: .LCPI92_0:
4464 ; CHECK-FIX-NOSCHED-NEXT: .byte 0 @ 0x0
4465 ; CHECK-FIX-NOSCHED-NEXT: .byte 1 @ 0x1
4466 ; CHECK-FIX-NOSCHED-NEXT: .byte 2 @ 0x2
4467 ; CHECK-FIX-NOSCHED-NEXT: .byte 3 @ 0x3
4468 ; CHECK-FIX-NOSCHED-NEXT: .byte 4 @ 0x4
4469 ; CHECK-FIX-NOSCHED-NEXT: .byte 5 @ 0x5
4470 ; CHECK-FIX-NOSCHED-NEXT: .byte 6 @ 0x6
4471 ; CHECK-FIX-NOSCHED-NEXT: .byte 7 @ 0x7
4472 ; CHECK-FIX-NOSCHED-NEXT: .byte 8 @ 0x8
4473 ; CHECK-FIX-NOSCHED-NEXT: .byte 9 @ 0x9
4474 ; CHECK-FIX-NOSCHED-NEXT: .byte 10 @ 0xa
4475 ; CHECK-FIX-NOSCHED-NEXT: .byte 11 @ 0xb
4476 ; CHECK-FIX-NOSCHED-NEXT: .byte 12 @ 0xc
4477 ; CHECK-FIX-NOSCHED-NEXT: .byte 13 @ 0xd
4478 ; CHECK-FIX-NOSCHED-NEXT: .byte 14 @ 0xe
4479 ; CHECK-FIX-NOSCHED-NEXT: .byte 15 @ 0xf
4481 ; CHECK-CORTEX-FIX-LABEL: aese_constantisland:
4482 ; CHECK-CORTEX-FIX: @ %bb.0:
4483 ; CHECK-CORTEX-FIX-NEXT: adr r1, .LCPI92_0
4484 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r0]
4485 ; CHECK-CORTEX-FIX-NEXT: vld1.64 {d18, d19}, [r1:128]
4486 ; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8
4487 ; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9
4488 ; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r0]
4489 ; CHECK-CORTEX-FIX-NEXT: bx lr
4490 ; CHECK-CORTEX-FIX-NEXT: .p2align 4
4491 ; CHECK-CORTEX-FIX-NEXT: @ %bb.1:
4492 ; CHECK-CORTEX-FIX-NEXT: .LCPI92_0:
4493 ; CHECK-CORTEX-FIX-NEXT: .byte 0 @ 0x0
4494 ; CHECK-CORTEX-FIX-NEXT: .byte 1 @ 0x1
4495 ; CHECK-CORTEX-FIX-NEXT: .byte 2 @ 0x2
4496 ; CHECK-CORTEX-FIX-NEXT: .byte 3 @ 0x3
4497 ; CHECK-CORTEX-FIX-NEXT: .byte 4 @ 0x4
4498 ; CHECK-CORTEX-FIX-NEXT: .byte 5 @ 0x5
4499 ; CHECK-CORTEX-FIX-NEXT: .byte 6 @ 0x6
4500 ; CHECK-CORTEX-FIX-NEXT: .byte 7 @ 0x7
4501 ; CHECK-CORTEX-FIX-NEXT: .byte 8 @ 0x8
4502 ; CHECK-CORTEX-FIX-NEXT: .byte 9 @ 0x9
4503 ; CHECK-CORTEX-FIX-NEXT: .byte 10 @ 0xa
4504 ; CHECK-CORTEX-FIX-NEXT: .byte 11 @ 0xb
4505 ; CHECK-CORTEX-FIX-NEXT: .byte 12 @ 0xc
4506 ; CHECK-CORTEX-FIX-NEXT: .byte 13 @ 0xd
4507 ; CHECK-CORTEX-FIX-NEXT: .byte 14 @ 0xe
4508 ; CHECK-CORTEX-FIX-NEXT: .byte 15 @ 0xf
4509 %2 = load <16 x i8>, <16 x i8>* %0, align 8
4510 %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> %2)
4511 %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
4512 store <16 x i8> %4, <16 x i8>* %0, align 8