1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LE
3 ; RUN: llc -mtriple=thumbebv8.1m.main-arm-none-eabi -mattr=+mve,+fullfp16 -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-BE
5 define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align4_zero(<4 x i32> *%dest, <4 x i32> %a) {
6 ; CHECK-LE-LABEL: masked_v4i32_align4_zero:
7 ; CHECK-LE: @ %bb.0: @ %entry
8 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
9 ; CHECK-LE-NEXT: vldrwt.u32 q0, [r0]
10 ; CHECK-LE-NEXT: bx lr
12 ; CHECK-BE-LABEL: masked_v4i32_align4_zero:
13 ; CHECK-BE: @ %bb.0: @ %entry
14 ; CHECK-BE-NEXT: vrev64.32 q1, q0
15 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
16 ; CHECK-BE-NEXT: vldrwt.u32 q1, [r0]
17 ; CHECK-BE-NEXT: vrev64.32 q0, q1
18 ; CHECK-BE-NEXT: bx lr
20 %c = icmp sgt <4 x i32> %a, zeroinitializer
21 %l = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %dest, i32 4, <4 x i1> %c, <4 x i32> zeroinitializer)
25 define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align4_undef(<4 x i32> *%dest, <4 x i32> %a) {
26 ; CHECK-LE-LABEL: masked_v4i32_align4_undef:
27 ; CHECK-LE: @ %bb.0: @ %entry
28 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
29 ; CHECK-LE-NEXT: vldrwt.u32 q0, [r0]
30 ; CHECK-LE-NEXT: bx lr
32 ; CHECK-BE-LABEL: masked_v4i32_align4_undef:
33 ; CHECK-BE: @ %bb.0: @ %entry
34 ; CHECK-BE-NEXT: vrev64.32 q1, q0
35 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
36 ; CHECK-BE-NEXT: vldrwt.u32 q1, [r0]
37 ; CHECK-BE-NEXT: vrev64.32 q0, q1
38 ; CHECK-BE-NEXT: bx lr
40 %c = icmp sgt <4 x i32> %a, zeroinitializer
41 %l = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %dest, i32 4, <4 x i1> %c, <4 x i32> undef)
45 define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align1_undef(<4 x i32> *%dest, <4 x i32> %a) {
46 ; CHECK-LE-LABEL: masked_v4i32_align1_undef:
47 ; CHECK-LE: @ %bb.0: @ %entry
48 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
49 ; CHECK-LE-NEXT: vldrbt.u8 q0, [r0]
50 ; CHECK-LE-NEXT: bx lr
52 ; CHECK-BE-LABEL: masked_v4i32_align1_undef:
53 ; CHECK-BE: @ %bb.0: @ %entry
54 ; CHECK-BE-NEXT: vrev64.32 q1, q0
55 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
56 ; CHECK-BE-NEXT: vldrbt.u8 q0, [r0]
57 ; CHECK-BE-NEXT: vrev32.8 q1, q0
58 ; CHECK-BE-NEXT: vrev64.32 q0, q1
59 ; CHECK-BE-NEXT: bx lr
61 %c = icmp sgt <4 x i32> %a, zeroinitializer
62 %l = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %dest, i32 1, <4 x i1> %c, <4 x i32> undef)
66 define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align4_other(<4 x i32> *%dest, <4 x i32> %a) {
67 ; CHECK-LE-LABEL: masked_v4i32_align4_other:
68 ; CHECK-LE: @ %bb.0: @ %entry
69 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
70 ; CHECK-LE-NEXT: vldrwt.u32 q1, [r0]
71 ; CHECK-LE-NEXT: vpsel q0, q1, q0
72 ; CHECK-LE-NEXT: bx lr
74 ; CHECK-BE-LABEL: masked_v4i32_align4_other:
75 ; CHECK-BE: @ %bb.0: @ %entry
76 ; CHECK-BE-NEXT: vrev64.32 q1, q0
77 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
78 ; CHECK-BE-NEXT: vldrwt.u32 q0, [r0]
79 ; CHECK-BE-NEXT: vpsel q1, q0, q1
80 ; CHECK-BE-NEXT: vrev64.32 q0, q1
81 ; CHECK-BE-NEXT: bx lr
83 %c = icmp sgt <4 x i32> %a, zeroinitializer
84 %l = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %dest, i32 4, <4 x i1> %c, <4 x i32> %a)
88 define arm_aapcs_vfpcc i8* @masked_v4i32_preinc(i8* %x, i8* %y, <4 x i32> %a) {
89 ; CHECK-LE-LABEL: masked_v4i32_preinc:
90 ; CHECK-LE: @ %bb.0: @ %entry
91 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
92 ; CHECK-LE-NEXT: vldrwt.u32 q0, [r0, #4]
93 ; CHECK-LE-NEXT: adds r0, #4
94 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
95 ; CHECK-LE-NEXT: bx lr
97 ; CHECK-BE-LABEL: masked_v4i32_preinc:
98 ; CHECK-BE: @ %bb.0: @ %entry
99 ; CHECK-BE-NEXT: vrev64.32 q1, q0
100 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
101 ; CHECK-BE-NEXT: vldrwt.u32 q0, [r0, #4]
102 ; CHECK-BE-NEXT: adds r0, #4
103 ; CHECK-BE-NEXT: vstrw.32 q0, [r1]
104 ; CHECK-BE-NEXT: bx lr
106 %z = getelementptr inbounds i8, i8* %x, i32 4
107 %0 = bitcast i8* %z to <4 x i32>*
108 %c = icmp sgt <4 x i32> %a, zeroinitializer
109 %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef)
110 %2 = bitcast i8* %y to <4 x i32>*
111 store <4 x i32> %1, <4 x i32>* %2, align 4
115 define arm_aapcs_vfpcc i8* @masked_v4i32_postinc(i8* %x, i8* %y, <4 x i32> %a) {
116 ; CHECK-LE-LABEL: masked_v4i32_postinc:
117 ; CHECK-LE: @ %bb.0: @ %entry
118 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
119 ; CHECK-LE-NEXT: vldrwt.u32 q0, [r0]
120 ; CHECK-LE-NEXT: adds r0, #4
121 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
122 ; CHECK-LE-NEXT: bx lr
124 ; CHECK-BE-LABEL: masked_v4i32_postinc:
125 ; CHECK-BE: @ %bb.0: @ %entry
126 ; CHECK-BE-NEXT: vrev64.32 q1, q0
127 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
128 ; CHECK-BE-NEXT: vldrwt.u32 q0, [r0]
129 ; CHECK-BE-NEXT: adds r0, #4
130 ; CHECK-BE-NEXT: vstrw.32 q0, [r1]
131 ; CHECK-BE-NEXT: bx lr
133 %z = getelementptr inbounds i8, i8* %x, i32 4
134 %0 = bitcast i8* %x to <4 x i32>*
135 %c = icmp sgt <4 x i32> %a, zeroinitializer
136 %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %c, <4 x i32> undef)
137 %2 = bitcast i8* %y to <4 x i32>*
138 store <4 x i32> %1, <4 x i32>* %2, align 4
144 define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align4_zero(<8 x i16> *%dest, <8 x i16> %a) {
145 ; CHECK-LE-LABEL: masked_v8i16_align4_zero:
146 ; CHECK-LE: @ %bb.0: @ %entry
147 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
148 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
149 ; CHECK-LE-NEXT: vldrht.u16 q0, [r0]
150 ; CHECK-LE-NEXT: vpsel q0, q0, q1
151 ; CHECK-LE-NEXT: bx lr
153 ; CHECK-BE-LABEL: masked_v8i16_align4_zero:
154 ; CHECK-BE: @ %bb.0: @ %entry
155 ; CHECK-BE-NEXT: vmov.i32 q1, #0x0
156 ; CHECK-BE-NEXT: vrev64.16 q2, q0
157 ; CHECK-BE-NEXT: vrev32.16 q1, q1
158 ; CHECK-BE-NEXT: vpt.s16 gt, q2, zr
159 ; CHECK-BE-NEXT: vldrht.u16 q0, [r0]
160 ; CHECK-BE-NEXT: vpsel q1, q0, q1
161 ; CHECK-BE-NEXT: vrev64.16 q0, q1
162 ; CHECK-BE-NEXT: bx lr
164 %c = icmp sgt <8 x i16> %a, zeroinitializer
165 %l = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %dest, i32 2, <8 x i1> %c, <8 x i16> zeroinitializer)
169 define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align4_undef(<8 x i16> *%dest, <8 x i16> %a) {
170 ; CHECK-LE-LABEL: masked_v8i16_align4_undef:
171 ; CHECK-LE: @ %bb.0: @ %entry
172 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
173 ; CHECK-LE-NEXT: vldrht.u16 q0, [r0]
174 ; CHECK-LE-NEXT: bx lr
176 ; CHECK-BE-LABEL: masked_v8i16_align4_undef:
177 ; CHECK-BE: @ %bb.0: @ %entry
178 ; CHECK-BE-NEXT: vrev64.16 q1, q0
179 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr
180 ; CHECK-BE-NEXT: vldrht.u16 q1, [r0]
181 ; CHECK-BE-NEXT: vrev64.16 q0, q1
182 ; CHECK-BE-NEXT: bx lr
184 %c = icmp sgt <8 x i16> %a, zeroinitializer
185 %l = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %dest, i32 2, <8 x i1> %c, <8 x i16> undef)
189 define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align1_undef(<8 x i16> *%dest, <8 x i16> %a) {
190 ; CHECK-LE-LABEL: masked_v8i16_align1_undef:
191 ; CHECK-LE: @ %bb.0: @ %entry
192 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
193 ; CHECK-LE-NEXT: vldrbt.u8 q0, [r0]
194 ; CHECK-LE-NEXT: bx lr
196 ; CHECK-BE-LABEL: masked_v8i16_align1_undef:
197 ; CHECK-BE: @ %bb.0: @ %entry
198 ; CHECK-BE-NEXT: vrev64.16 q1, q0
199 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr
200 ; CHECK-BE-NEXT: vldrbt.u8 q0, [r0]
201 ; CHECK-BE-NEXT: vrev16.8 q1, q0
202 ; CHECK-BE-NEXT: vrev64.16 q0, q1
203 ; CHECK-BE-NEXT: bx lr
205 %c = icmp sgt <8 x i16> %a, zeroinitializer
206 %l = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %dest, i32 1, <8 x i1> %c, <8 x i16> undef)
210 define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align4_other(<8 x i16> *%dest, <8 x i16> %a) {
211 ; CHECK-LE-LABEL: masked_v8i16_align4_other:
212 ; CHECK-LE: @ %bb.0: @ %entry
213 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
214 ; CHECK-LE-NEXT: vldrht.u16 q1, [r0]
215 ; CHECK-LE-NEXT: vpsel q0, q1, q0
216 ; CHECK-LE-NEXT: bx lr
218 ; CHECK-BE-LABEL: masked_v8i16_align4_other:
219 ; CHECK-BE: @ %bb.0: @ %entry
220 ; CHECK-BE-NEXT: vrev64.16 q1, q0
221 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr
222 ; CHECK-BE-NEXT: vldrht.u16 q0, [r0]
223 ; CHECK-BE-NEXT: vpsel q1, q0, q1
224 ; CHECK-BE-NEXT: vrev64.16 q0, q1
225 ; CHECK-BE-NEXT: bx lr
227 %c = icmp sgt <8 x i16> %a, zeroinitializer
228 %l = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %dest, i32 2, <8 x i1> %c, <8 x i16> %a)
232 define i8* @masked_v8i16_preinc(i8* %x, i8* %y, <8 x i16> %a) {
233 ; CHECK-LE-LABEL: masked_v8i16_preinc:
234 ; CHECK-LE: @ %bb.0: @ %entry
235 ; CHECK-LE-NEXT: vldr d1, [sp]
236 ; CHECK-LE-NEXT: vmov d0, r2, r3
237 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
238 ; CHECK-LE-NEXT: vldrht.u16 q0, [r0, #4]
239 ; CHECK-LE-NEXT: adds r0, #4
240 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
241 ; CHECK-LE-NEXT: bx lr
243 ; CHECK-BE-LABEL: masked_v8i16_preinc:
244 ; CHECK-BE: @ %bb.0: @ %entry
245 ; CHECK-BE-NEXT: vldr d1, [sp]
246 ; CHECK-BE-NEXT: vmov d0, r3, r2
247 ; CHECK-BE-NEXT: vrev64.16 q1, q0
248 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr
249 ; CHECK-BE-NEXT: vldrht.u16 q0, [r0, #4]
250 ; CHECK-BE-NEXT: adds r0, #4
251 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
252 ; CHECK-BE-NEXT: bx lr
254 %z = getelementptr inbounds i8, i8* %x, i32 4
255 %0 = bitcast i8* %z to <8 x i16>*
256 %c = icmp sgt <8 x i16> %a, zeroinitializer
257 %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 4, <8 x i1> %c, <8 x i16> undef)
258 %2 = bitcast i8* %y to <8 x i16>*
259 store <8 x i16> %1, <8 x i16>* %2, align 4
263 define arm_aapcs_vfpcc i8* @masked_v8i16_postinc(i8* %x, i8* %y, <8 x i16> %a) {
264 ; CHECK-LE-LABEL: masked_v8i16_postinc:
265 ; CHECK-LE: @ %bb.0: @ %entry
266 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
267 ; CHECK-LE-NEXT: vldrht.u16 q0, [r0]
268 ; CHECK-LE-NEXT: adds r0, #4
269 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
270 ; CHECK-LE-NEXT: bx lr
272 ; CHECK-BE-LABEL: masked_v8i16_postinc:
273 ; CHECK-BE: @ %bb.0: @ %entry
274 ; CHECK-BE-NEXT: vrev64.16 q1, q0
275 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr
276 ; CHECK-BE-NEXT: vldrht.u16 q0, [r0]
277 ; CHECK-BE-NEXT: adds r0, #4
278 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
279 ; CHECK-BE-NEXT: bx lr
281 %z = getelementptr inbounds i8, i8* %x, i32 4
282 %0 = bitcast i8* %x to <8 x i16>*
283 %c = icmp sgt <8 x i16> %a, zeroinitializer
284 %1 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 4, <8 x i1> %c, <8 x i16> undef)
285 %2 = bitcast i8* %y to <8 x i16>*
286 store <8 x i16> %1, <8 x i16>* %2, align 4
291 define arm_aapcs_vfpcc <16 x i8> @masked_v16i8_align4_zero(<16 x i8> *%dest, <16 x i8> %a) {
292 ; CHECK-LE-LABEL: masked_v16i8_align4_zero:
293 ; CHECK-LE: @ %bb.0: @ %entry
294 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
295 ; CHECK-LE-NEXT: vpt.s8 gt, q0, zr
296 ; CHECK-LE-NEXT: vldrbt.u8 q0, [r0]
297 ; CHECK-LE-NEXT: vpsel q0, q0, q1
298 ; CHECK-LE-NEXT: bx lr
300 ; CHECK-BE-LABEL: masked_v16i8_align4_zero:
301 ; CHECK-BE: @ %bb.0: @ %entry
302 ; CHECK-BE-NEXT: vmov.i32 q1, #0x0
303 ; CHECK-BE-NEXT: vrev64.8 q2, q0
304 ; CHECK-BE-NEXT: vrev32.8 q1, q1
305 ; CHECK-BE-NEXT: vpt.s8 gt, q2, zr
306 ; CHECK-BE-NEXT: vldrbt.u8 q0, [r0]
307 ; CHECK-BE-NEXT: vpsel q1, q0, q1
308 ; CHECK-BE-NEXT: vrev64.8 q0, q1
309 ; CHECK-BE-NEXT: bx lr
311 %c = icmp sgt <16 x i8> %a, zeroinitializer
312 %l = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %dest, i32 1, <16 x i1> %c, <16 x i8> zeroinitializer)
316 define arm_aapcs_vfpcc <16 x i8> @masked_v16i8_align4_undef(<16 x i8> *%dest, <16 x i8> %a) {
317 ; CHECK-LE-LABEL: masked_v16i8_align4_undef:
318 ; CHECK-LE: @ %bb.0: @ %entry
319 ; CHECK-LE-NEXT: vpt.s8 gt, q0, zr
320 ; CHECK-LE-NEXT: vldrbt.u8 q0, [r0]
321 ; CHECK-LE-NEXT: bx lr
323 ; CHECK-BE-LABEL: masked_v16i8_align4_undef:
324 ; CHECK-BE: @ %bb.0: @ %entry
325 ; CHECK-BE-NEXT: vrev64.8 q1, q0
326 ; CHECK-BE-NEXT: vpt.s8 gt, q1, zr
327 ; CHECK-BE-NEXT: vldrbt.u8 q1, [r0]
328 ; CHECK-BE-NEXT: vrev64.8 q0, q1
329 ; CHECK-BE-NEXT: bx lr
331 %c = icmp sgt <16 x i8> %a, zeroinitializer
332 %l = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %dest, i32 1, <16 x i1> %c, <16 x i8> undef)
336 define arm_aapcs_vfpcc <16 x i8> @masked_v16i8_align4_other(<16 x i8> *%dest, <16 x i8> %a) {
337 ; CHECK-LE-LABEL: masked_v16i8_align4_other:
338 ; CHECK-LE: @ %bb.0: @ %entry
339 ; CHECK-LE-NEXT: vpt.s8 gt, q0, zr
340 ; CHECK-LE-NEXT: vldrbt.u8 q1, [r0]
341 ; CHECK-LE-NEXT: vpsel q0, q1, q0
342 ; CHECK-LE-NEXT: bx lr
344 ; CHECK-BE-LABEL: masked_v16i8_align4_other:
345 ; CHECK-BE: @ %bb.0: @ %entry
346 ; CHECK-BE-NEXT: vrev64.8 q1, q0
347 ; CHECK-BE-NEXT: vpt.s8 gt, q1, zr
348 ; CHECK-BE-NEXT: vldrbt.u8 q0, [r0]
349 ; CHECK-BE-NEXT: vpsel q1, q0, q1
350 ; CHECK-BE-NEXT: vrev64.8 q0, q1
351 ; CHECK-BE-NEXT: bx lr
353 %c = icmp sgt <16 x i8> %a, zeroinitializer
354 %l = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %dest, i32 1, <16 x i1> %c, <16 x i8> %a)
358 define arm_aapcs_vfpcc i8* @masked_v16i8_preinc(i8* %x, i8* %y, <16 x i8> %a) {
359 ; CHECK-LE-LABEL: masked_v16i8_preinc:
360 ; CHECK-LE: @ %bb.0: @ %entry
361 ; CHECK-LE-NEXT: vpt.s8 gt, q0, zr
362 ; CHECK-LE-NEXT: vldrbt.u8 q0, [r0, #4]
363 ; CHECK-LE-NEXT: adds r0, #4
364 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
365 ; CHECK-LE-NEXT: bx lr
367 ; CHECK-BE-LABEL: masked_v16i8_preinc:
368 ; CHECK-BE: @ %bb.0: @ %entry
369 ; CHECK-BE-NEXT: vrev64.8 q1, q0
370 ; CHECK-BE-NEXT: vpt.s8 gt, q1, zr
371 ; CHECK-BE-NEXT: vldrbt.u8 q0, [r0, #4]
372 ; CHECK-BE-NEXT: adds r0, #4
373 ; CHECK-BE-NEXT: vstrb.8 q0, [r1]
374 ; CHECK-BE-NEXT: bx lr
376 %z = getelementptr inbounds i8, i8* %x, i32 4
377 %0 = bitcast i8* %z to <16 x i8>*
378 %c = icmp sgt <16 x i8> %a, zeroinitializer
379 %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 4, <16 x i1> %c, <16 x i8> undef)
380 %2 = bitcast i8* %y to <16 x i8>*
381 store <16 x i8> %1, <16 x i8>* %2, align 4
385 define arm_aapcs_vfpcc i8* @masked_v16i8_postinc(i8* %x, i8* %y, <16 x i8> %a) {
386 ; CHECK-LE-LABEL: masked_v16i8_postinc:
387 ; CHECK-LE: @ %bb.0: @ %entry
388 ; CHECK-LE-NEXT: vpt.s8 gt, q0, zr
389 ; CHECK-LE-NEXT: vldrbt.u8 q0, [r0]
390 ; CHECK-LE-NEXT: adds r0, #4
391 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
392 ; CHECK-LE-NEXT: bx lr
394 ; CHECK-BE-LABEL: masked_v16i8_postinc:
395 ; CHECK-BE: @ %bb.0: @ %entry
396 ; CHECK-BE-NEXT: vrev64.8 q1, q0
397 ; CHECK-BE-NEXT: vpt.s8 gt, q1, zr
398 ; CHECK-BE-NEXT: vldrbt.u8 q0, [r0]
399 ; CHECK-BE-NEXT: adds r0, #4
400 ; CHECK-BE-NEXT: vstrb.8 q0, [r1]
401 ; CHECK-BE-NEXT: bx lr
403 %z = getelementptr inbounds i8, i8* %x, i32 4
404 %0 = bitcast i8* %x to <16 x i8>*
405 %c = icmp sgt <16 x i8> %a, zeroinitializer
406 %1 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 4, <16 x i1> %c, <16 x i8> undef)
407 %2 = bitcast i8* %y to <16 x i8>*
408 store <16 x i8> %1, <16 x i8>* %2, align 4
413 define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align4_zero(<4 x float> *%dest, <4 x i32> %a) {
414 ; CHECK-LE-LABEL: masked_v4f32_align4_zero:
415 ; CHECK-LE: @ %bb.0: @ %entry
416 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
417 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
418 ; CHECK-LE-NEXT: vldrwt.u32 q0, [r0]
419 ; CHECK-LE-NEXT: vpsel q0, q0, q1
420 ; CHECK-LE-NEXT: bx lr
422 ; CHECK-BE-LABEL: masked_v4f32_align4_zero:
423 ; CHECK-BE: @ %bb.0: @ %entry
424 ; CHECK-BE-NEXT: vmov.i32 q1, #0x0
425 ; CHECK-BE-NEXT: vrev64.32 q2, q0
426 ; CHECK-BE-NEXT: vpt.s32 gt, q2, zr
427 ; CHECK-BE-NEXT: vldrwt.u32 q0, [r0]
428 ; CHECK-BE-NEXT: vpsel q1, q0, q1
429 ; CHECK-BE-NEXT: vrev64.32 q0, q1
430 ; CHECK-BE-NEXT: bx lr
432 %c = icmp sgt <4 x i32> %a, zeroinitializer
433 %l = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %dest, i32 4, <4 x i1> %c, <4 x float> zeroinitializer)
437 define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align4_undef(<4 x float> *%dest, <4 x i32> %a) {
438 ; CHECK-LE-LABEL: masked_v4f32_align4_undef:
439 ; CHECK-LE: @ %bb.0: @ %entry
440 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
441 ; CHECK-LE-NEXT: vldrwt.u32 q0, [r0]
442 ; CHECK-LE-NEXT: bx lr
444 ; CHECK-BE-LABEL: masked_v4f32_align4_undef:
445 ; CHECK-BE: @ %bb.0: @ %entry
446 ; CHECK-BE-NEXT: vrev64.32 q1, q0
447 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
448 ; CHECK-BE-NEXT: vldrwt.u32 q1, [r0]
449 ; CHECK-BE-NEXT: vrev64.32 q0, q1
450 ; CHECK-BE-NEXT: bx lr
452 %c = icmp sgt <4 x i32> %a, zeroinitializer
453 %l = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %dest, i32 4, <4 x i1> %c, <4 x float> undef)
457 define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align1_undef(<4 x float> *%dest, <4 x i32> %a) {
458 ; CHECK-LE-LABEL: masked_v4f32_align1_undef:
459 ; CHECK-LE: @ %bb.0: @ %entry
460 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
461 ; CHECK-LE-NEXT: vldrbt.u8 q0, [r0]
462 ; CHECK-LE-NEXT: bx lr
464 ; CHECK-BE-LABEL: masked_v4f32_align1_undef:
465 ; CHECK-BE: @ %bb.0: @ %entry
466 ; CHECK-BE-NEXT: vrev64.32 q1, q0
467 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
468 ; CHECK-BE-NEXT: vldrbt.u8 q0, [r0]
469 ; CHECK-BE-NEXT: vrev32.8 q1, q0
470 ; CHECK-BE-NEXT: vrev64.32 q0, q1
471 ; CHECK-BE-NEXT: bx lr
473 %c = icmp sgt <4 x i32> %a, zeroinitializer
474 %l = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %dest, i32 1, <4 x i1> %c, <4 x float> undef)
478 define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align4_other(<4 x float> *%dest, <4 x i32> %a, <4 x float> %b) {
479 ; CHECK-LE-LABEL: masked_v4f32_align4_other:
480 ; CHECK-LE: @ %bb.0: @ %entry
481 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
482 ; CHECK-LE-NEXT: vldrwt.u32 q0, [r0]
483 ; CHECK-LE-NEXT: vpsel q0, q0, q1
484 ; CHECK-LE-NEXT: bx lr
486 ; CHECK-BE-LABEL: masked_v4f32_align4_other:
487 ; CHECK-BE: @ %bb.0: @ %entry
488 ; CHECK-BE-NEXT: vrev64.32 q2, q1
489 ; CHECK-BE-NEXT: vrev64.32 q1, q0
490 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
491 ; CHECK-BE-NEXT: vldrwt.u32 q0, [r0]
492 ; CHECK-BE-NEXT: vpsel q1, q0, q2
493 ; CHECK-BE-NEXT: vrev64.32 q0, q1
494 ; CHECK-BE-NEXT: bx lr
496 %c = icmp sgt <4 x i32> %a, zeroinitializer
497 %l = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %dest, i32 4, <4 x i1> %c, <4 x float> %b)
501 define arm_aapcs_vfpcc i8* @masked_v4f32_preinc(i8* %x, i8* %y, <4 x i32> %a) {
502 ; CHECK-LE-LABEL: masked_v4f32_preinc:
503 ; CHECK-LE: @ %bb.0: @ %entry
504 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
505 ; CHECK-LE-NEXT: vldrwt.u32 q0, [r0, #4]
506 ; CHECK-LE-NEXT: adds r0, #4
507 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
508 ; CHECK-LE-NEXT: bx lr
510 ; CHECK-BE-LABEL: masked_v4f32_preinc:
511 ; CHECK-BE: @ %bb.0: @ %entry
512 ; CHECK-BE-NEXT: vrev64.32 q1, q0
513 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
514 ; CHECK-BE-NEXT: vldrwt.u32 q0, [r0, #4]
515 ; CHECK-BE-NEXT: adds r0, #4
516 ; CHECK-BE-NEXT: vstrw.32 q0, [r1]
517 ; CHECK-BE-NEXT: bx lr
519 %z = getelementptr inbounds i8, i8* %x, i32 4
520 %0 = bitcast i8* %z to <4 x float>*
521 %c = icmp sgt <4 x i32> %a, zeroinitializer
522 %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef)
523 %2 = bitcast i8* %y to <4 x float>*
524 store <4 x float> %1, <4 x float>* %2, align 4
528 define arm_aapcs_vfpcc i8* @masked_v4f32_postinc(i8* %x, i8* %y, <4 x i32> %a) {
529 ; CHECK-LE-LABEL: masked_v4f32_postinc:
530 ; CHECK-LE: @ %bb.0: @ %entry
531 ; CHECK-LE-NEXT: vpt.s32 gt, q0, zr
532 ; CHECK-LE-NEXT: vldrwt.u32 q0, [r0]
533 ; CHECK-LE-NEXT: adds r0, #4
534 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
535 ; CHECK-LE-NEXT: bx lr
537 ; CHECK-BE-LABEL: masked_v4f32_postinc:
538 ; CHECK-BE: @ %bb.0: @ %entry
539 ; CHECK-BE-NEXT: vrev64.32 q1, q0
540 ; CHECK-BE-NEXT: vpt.s32 gt, q1, zr
541 ; CHECK-BE-NEXT: vldrwt.u32 q0, [r0]
542 ; CHECK-BE-NEXT: adds r0, #4
543 ; CHECK-BE-NEXT: vstrw.32 q0, [r1]
544 ; CHECK-BE-NEXT: bx lr
546 %z = getelementptr inbounds i8, i8* %x, i32 4
547 %0 = bitcast i8* %x to <4 x float>*
548 %c = icmp sgt <4 x i32> %a, zeroinitializer
549 %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %c, <4 x float> undef)
550 %2 = bitcast i8* %y to <4 x float>*
551 store <4 x float> %1, <4 x float>* %2, align 4
556 define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align4_zero(<8 x half> *%dest, <8 x i16> %a) {
557 ; CHECK-LE-LABEL: masked_v8f16_align4_zero:
558 ; CHECK-LE: @ %bb.0: @ %entry
559 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
560 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
561 ; CHECK-LE-NEXT: vldrht.u16 q0, [r0]
562 ; CHECK-LE-NEXT: vpsel q0, q0, q1
563 ; CHECK-LE-NEXT: bx lr
565 ; CHECK-BE-LABEL: masked_v8f16_align4_zero:
566 ; CHECK-BE: @ %bb.0: @ %entry
567 ; CHECK-BE-NEXT: vmov.i32 q1, #0x0
568 ; CHECK-BE-NEXT: vrev64.16 q2, q0
569 ; CHECK-BE-NEXT: vrev32.16 q1, q1
570 ; CHECK-BE-NEXT: vpt.s16 gt, q2, zr
571 ; CHECK-BE-NEXT: vldrht.u16 q0, [r0]
572 ; CHECK-BE-NEXT: vpsel q1, q0, q1
573 ; CHECK-BE-NEXT: vrev64.16 q0, q1
574 ; CHECK-BE-NEXT: bx lr
576 %c = icmp sgt <8 x i16> %a, zeroinitializer
577 %l = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %dest, i32 2, <8 x i1> %c, <8 x half> zeroinitializer)
581 define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align4_undef(<8 x half> *%dest, <8 x i16> %a) {
582 ; CHECK-LE-LABEL: masked_v8f16_align4_undef:
583 ; CHECK-LE: @ %bb.0: @ %entry
584 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
585 ; CHECK-LE-NEXT: vldrht.u16 q0, [r0]
586 ; CHECK-LE-NEXT: bx lr
588 ; CHECK-BE-LABEL: masked_v8f16_align4_undef:
589 ; CHECK-BE: @ %bb.0: @ %entry
590 ; CHECK-BE-NEXT: vrev64.16 q1, q0
591 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr
592 ; CHECK-BE-NEXT: vldrht.u16 q1, [r0]
593 ; CHECK-BE-NEXT: vrev64.16 q0, q1
594 ; CHECK-BE-NEXT: bx lr
596 %c = icmp sgt <8 x i16> %a, zeroinitializer
597 %l = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %dest, i32 2, <8 x i1> %c, <8 x half> undef)
601 define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align1_undef(<8 x half> *%dest, <8 x i16> %a) {
602 ; CHECK-LE-LABEL: masked_v8f16_align1_undef:
603 ; CHECK-LE: @ %bb.0: @ %entry
604 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
605 ; CHECK-LE-NEXT: vldrbt.u8 q0, [r0]
606 ; CHECK-LE-NEXT: bx lr
608 ; CHECK-BE-LABEL: masked_v8f16_align1_undef:
609 ; CHECK-BE: @ %bb.0: @ %entry
610 ; CHECK-BE-NEXT: vrev64.16 q1, q0
611 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr
612 ; CHECK-BE-NEXT: vldrbt.u8 q0, [r0]
613 ; CHECK-BE-NEXT: vrev16.8 q1, q0
614 ; CHECK-BE-NEXT: vrev64.16 q0, q1
615 ; CHECK-BE-NEXT: bx lr
617 %c = icmp sgt <8 x i16> %a, zeroinitializer
618 %l = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %dest, i32 1, <8 x i1> %c, <8 x half> undef)
622 define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align4_other(<8 x half> *%dest, <8 x i16> %a, <8 x half> %b) {
623 ; CHECK-LE-LABEL: masked_v8f16_align4_other:
624 ; CHECK-LE: @ %bb.0: @ %entry
625 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
626 ; CHECK-LE-NEXT: vldrht.u16 q0, [r0]
627 ; CHECK-LE-NEXT: vpsel q0, q0, q1
628 ; CHECK-LE-NEXT: bx lr
630 ; CHECK-BE-LABEL: masked_v8f16_align4_other:
631 ; CHECK-BE: @ %bb.0: @ %entry
632 ; CHECK-BE-NEXT: vrev64.16 q2, q1
633 ; CHECK-BE-NEXT: vrev64.16 q1, q0
634 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr
635 ; CHECK-BE-NEXT: vldrht.u16 q0, [r0]
636 ; CHECK-BE-NEXT: vpsel q1, q0, q2
637 ; CHECK-BE-NEXT: vrev64.16 q0, q1
638 ; CHECK-BE-NEXT: bx lr
640 %c = icmp sgt <8 x i16> %a, zeroinitializer
641 %l = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %dest, i32 2, <8 x i1> %c, <8 x half> %b)
645 define arm_aapcs_vfpcc i8* @masked_v8f16_preinc(i8* %x, i8* %y, <8 x i16> %a) {
646 ; CHECK-LE-LABEL: masked_v8f16_preinc:
647 ; CHECK-LE: @ %bb.0: @ %entry
648 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
649 ; CHECK-LE-NEXT: vldrht.u16 q0, [r0, #4]
650 ; CHECK-LE-NEXT: adds r0, #4
651 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
652 ; CHECK-LE-NEXT: bx lr
654 ; CHECK-BE-LABEL: masked_v8f16_preinc:
655 ; CHECK-BE: @ %bb.0: @ %entry
656 ; CHECK-BE-NEXT: vrev64.16 q1, q0
657 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr
658 ; CHECK-BE-NEXT: vldrht.u16 q0, [r0, #4]
659 ; CHECK-BE-NEXT: adds r0, #4
660 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
661 ; CHECK-BE-NEXT: bx lr
663 %z = getelementptr inbounds i8, i8* %x, i32 4
664 %0 = bitcast i8* %z to <8 x half>*
665 %c = icmp sgt <8 x i16> %a, zeroinitializer
666 %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 4, <8 x i1> %c, <8 x half> undef)
667 %2 = bitcast i8* %y to <8 x half>*
668 store <8 x half> %1, <8 x half>* %2, align 4
672 define arm_aapcs_vfpcc i8* @masked_v8f16_postinc(i8* %x, i8* %y, <8 x i16> %a) {
673 ; CHECK-LE-LABEL: masked_v8f16_postinc:
674 ; CHECK-LE: @ %bb.0: @ %entry
675 ; CHECK-LE-NEXT: vpt.s16 gt, q0, zr
676 ; CHECK-LE-NEXT: vldrht.u16 q0, [r0]
677 ; CHECK-LE-NEXT: adds r0, #4
678 ; CHECK-LE-NEXT: vstrw.32 q0, [r1]
679 ; CHECK-LE-NEXT: bx lr
681 ; CHECK-BE-LABEL: masked_v8f16_postinc:
682 ; CHECK-BE: @ %bb.0: @ %entry
683 ; CHECK-BE-NEXT: vrev64.16 q1, q0
684 ; CHECK-BE-NEXT: vpt.s16 gt, q1, zr
685 ; CHECK-BE-NEXT: vldrht.u16 q0, [r0]
686 ; CHECK-BE-NEXT: adds r0, #4
687 ; CHECK-BE-NEXT: vstrh.16 q0, [r1]
688 ; CHECK-BE-NEXT: bx lr
690 %z = getelementptr inbounds i8, i8* %x, i32 4
691 %0 = bitcast i8* %x to <8 x half>*
692 %c = icmp sgt <8 x i16> %a, zeroinitializer
693 %1 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 4, <8 x i1> %c, <8 x half> undef)
694 %2 = bitcast i8* %y to <8 x half>*
695 store <8 x half> %1, <8 x half>* %2, align 4
700 define arm_aapcs_vfpcc <2 x i64> @masked_v2i64_align4_zero(<2 x i64> *%dest, <2 x i64> %a) {
701 ; CHECK-LE-LABEL: masked_v2i64_align4_zero:
702 ; CHECK-LE: @ %bb.0: @ %entry
703 ; CHECK-LE-NEXT: .pad #4
704 ; CHECK-LE-NEXT: sub sp, #4
705 ; CHECK-LE-NEXT: vmov r3, s0
706 ; CHECK-LE-NEXT: movs r2, #0
707 ; CHECK-LE-NEXT: vmov r1, s1
708 ; CHECK-LE-NEXT: vmov r12, s3
709 ; CHECK-LE-NEXT: rsbs r3, r3, #0
710 ; CHECK-LE-NEXT: vmov r3, s2
711 ; CHECK-LE-NEXT: sbcs.w r1, r2, r1
712 ; CHECK-LE-NEXT: mov.w r1, #0
713 ; CHECK-LE-NEXT: it lt
714 ; CHECK-LE-NEXT: movlt r1, #1
715 ; CHECK-LE-NEXT: rsbs r3, r3, #0
716 ; CHECK-LE-NEXT: sbcs.w r3, r2, r12
717 ; CHECK-LE-NEXT: it lt
718 ; CHECK-LE-NEXT: movlt r2, #1
719 ; CHECK-LE-NEXT: cmp r2, #0
720 ; CHECK-LE-NEXT: it ne
721 ; CHECK-LE-NEXT: mvnne r2, #1
722 ; CHECK-LE-NEXT: bfi r2, r1, #0, #1
723 ; CHECK-LE-NEXT: and r1, r2, #3
724 ; CHECK-LE-NEXT: lsls r2, r2, #31
725 ; CHECK-LE-NEXT: beq .LBB29_2
726 ; CHECK-LE-NEXT: @ %bb.1: @ %cond.load
727 ; CHECK-LE-NEXT: vldr d1, .LCPI29_0
728 ; CHECK-LE-NEXT: vldr d0, [r0]
729 ; CHECK-LE-NEXT: b .LBB29_3
730 ; CHECK-LE-NEXT: .LBB29_2:
731 ; CHECK-LE-NEXT: vmov.i32 q0, #0x0
732 ; CHECK-LE-NEXT: .LBB29_3: @ %else
733 ; CHECK-LE-NEXT: lsls r1, r1, #30
734 ; CHECK-LE-NEXT: it mi
735 ; CHECK-LE-NEXT: vldrmi d1, [r0, #8]
736 ; CHECK-LE-NEXT: add sp, #4
737 ; CHECK-LE-NEXT: bx lr
738 ; CHECK-LE-NEXT: .p2align 3
739 ; CHECK-LE-NEXT: @ %bb.4:
740 ; CHECK-LE-NEXT: .LCPI29_0:
741 ; CHECK-LE-NEXT: .long 0 @ double 0
742 ; CHECK-LE-NEXT: .long 0
744 ; CHECK-BE-LABEL: masked_v2i64_align4_zero:
745 ; CHECK-BE: @ %bb.0: @ %entry
746 ; CHECK-BE-NEXT: .pad #4
747 ; CHECK-BE-NEXT: sub sp, #4
748 ; CHECK-BE-NEXT: vrev64.32 q1, q0
749 ; CHECK-BE-NEXT: movs r2, #0
750 ; CHECK-BE-NEXT: vmov r3, s7
751 ; CHECK-BE-NEXT: vmov r1, s6
752 ; CHECK-BE-NEXT: vmov r12, s4
753 ; CHECK-BE-NEXT: rsbs r3, r3, #0
754 ; CHECK-BE-NEXT: vmov r3, s5
755 ; CHECK-BE-NEXT: sbcs.w r1, r2, r1
756 ; CHECK-BE-NEXT: mov.w r1, #0
757 ; CHECK-BE-NEXT: it lt
758 ; CHECK-BE-NEXT: movlt r1, #1
759 ; CHECK-BE-NEXT: rsbs r3, r3, #0
760 ; CHECK-BE-NEXT: sbcs.w r3, r2, r12
761 ; CHECK-BE-NEXT: it lt
762 ; CHECK-BE-NEXT: movlt r2, #1
763 ; CHECK-BE-NEXT: cmp r2, #0
764 ; CHECK-BE-NEXT: it ne
765 ; CHECK-BE-NEXT: mvnne r2, #1
766 ; CHECK-BE-NEXT: bfi r2, r1, #0, #1
767 ; CHECK-BE-NEXT: and r1, r2, #3
768 ; CHECK-BE-NEXT: lsls r2, r2, #31
769 ; CHECK-BE-NEXT: beq .LBB29_2
770 ; CHECK-BE-NEXT: @ %bb.1: @ %cond.load
771 ; CHECK-BE-NEXT: vldr d1, .LCPI29_0
772 ; CHECK-BE-NEXT: vldr d0, [r0]
773 ; CHECK-BE-NEXT: b .LBB29_3
774 ; CHECK-BE-NEXT: .LBB29_2:
775 ; CHECK-BE-NEXT: vmov.i32 q1, #0x0
776 ; CHECK-BE-NEXT: vrev64.32 q0, q1
777 ; CHECK-BE-NEXT: .LBB29_3: @ %else
778 ; CHECK-BE-NEXT: lsls r1, r1, #30
779 ; CHECK-BE-NEXT: it mi
780 ; CHECK-BE-NEXT: vldrmi d1, [r0, #8]
781 ; CHECK-BE-NEXT: add sp, #4
782 ; CHECK-BE-NEXT: bx lr
783 ; CHECK-BE-NEXT: .p2align 3
784 ; CHECK-BE-NEXT: @ %bb.4:
785 ; CHECK-BE-NEXT: .LCPI29_0:
786 ; CHECK-BE-NEXT: .long 0 @ double 0
787 ; CHECK-BE-NEXT: .long 0
789 %c = icmp sgt <2 x i64> %a, zeroinitializer
790 %l = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %dest, i32 8, <2 x i1> %c, <2 x i64> zeroinitializer)
794 define arm_aapcs_vfpcc <2 x double> @masked_v2f64_align4_zero(<2 x double> *%dest, <2 x double> %a, <2 x i64> %b) {
795 ; CHECK-LE-LABEL: masked_v2f64_align4_zero:
796 ; CHECK-LE: @ %bb.0: @ %entry
797 ; CHECK-LE-NEXT: .pad #4
798 ; CHECK-LE-NEXT: sub sp, #4
799 ; CHECK-LE-NEXT: vmov r3, s4
800 ; CHECK-LE-NEXT: movs r2, #0
801 ; CHECK-LE-NEXT: vmov r1, s5
802 ; CHECK-LE-NEXT: vmov r12, s7
803 ; CHECK-LE-NEXT: rsbs r3, r3, #0
804 ; CHECK-LE-NEXT: vmov r3, s6
805 ; CHECK-LE-NEXT: sbcs.w r1, r2, r1
806 ; CHECK-LE-NEXT: mov.w r1, #0
807 ; CHECK-LE-NEXT: it lt
808 ; CHECK-LE-NEXT: movlt r1, #1
809 ; CHECK-LE-NEXT: rsbs r3, r3, #0
810 ; CHECK-LE-NEXT: sbcs.w r3, r2, r12
811 ; CHECK-LE-NEXT: it lt
812 ; CHECK-LE-NEXT: movlt r2, #1
813 ; CHECK-LE-NEXT: cmp r2, #0
814 ; CHECK-LE-NEXT: it ne
815 ; CHECK-LE-NEXT: mvnne r2, #1
816 ; CHECK-LE-NEXT: bfi r2, r1, #0, #1
817 ; CHECK-LE-NEXT: and r1, r2, #3
818 ; CHECK-LE-NEXT: lsls r2, r2, #31
819 ; CHECK-LE-NEXT: beq .LBB30_2
820 ; CHECK-LE-NEXT: @ %bb.1: @ %cond.load
821 ; CHECK-LE-NEXT: vldr d1, .LCPI30_0
822 ; CHECK-LE-NEXT: vldr d0, [r0]
823 ; CHECK-LE-NEXT: b .LBB30_3
824 ; CHECK-LE-NEXT: .LBB30_2:
825 ; CHECK-LE-NEXT: vmov.i32 q0, #0x0
826 ; CHECK-LE-NEXT: .LBB30_3: @ %else
827 ; CHECK-LE-NEXT: lsls r1, r1, #30
828 ; CHECK-LE-NEXT: it mi
829 ; CHECK-LE-NEXT: vldrmi d1, [r0, #8]
830 ; CHECK-LE-NEXT: add sp, #4
831 ; CHECK-LE-NEXT: bx lr
832 ; CHECK-LE-NEXT: .p2align 3
833 ; CHECK-LE-NEXT: @ %bb.4:
834 ; CHECK-LE-NEXT: .LCPI30_0:
835 ; CHECK-LE-NEXT: .long 0 @ double 0
836 ; CHECK-LE-NEXT: .long 0
838 ; CHECK-BE-LABEL: masked_v2f64_align4_zero:
839 ; CHECK-BE: @ %bb.0: @ %entry
840 ; CHECK-BE-NEXT: .pad #4
841 ; CHECK-BE-NEXT: sub sp, #4
842 ; CHECK-BE-NEXT: vrev64.32 q0, q1
843 ; CHECK-BE-NEXT: movs r2, #0
844 ; CHECK-BE-NEXT: vmov r3, s3
845 ; CHECK-BE-NEXT: vmov r1, s2
846 ; CHECK-BE-NEXT: vmov r12, s0
847 ; CHECK-BE-NEXT: rsbs r3, r3, #0
848 ; CHECK-BE-NEXT: vmov r3, s1
849 ; CHECK-BE-NEXT: sbcs.w r1, r2, r1
850 ; CHECK-BE-NEXT: mov.w r1, #0
851 ; CHECK-BE-NEXT: it lt
852 ; CHECK-BE-NEXT: movlt r1, #1
853 ; CHECK-BE-NEXT: rsbs r3, r3, #0
854 ; CHECK-BE-NEXT: sbcs.w r3, r2, r12
855 ; CHECK-BE-NEXT: it lt
856 ; CHECK-BE-NEXT: movlt r2, #1
857 ; CHECK-BE-NEXT: cmp r2, #0
858 ; CHECK-BE-NEXT: it ne
859 ; CHECK-BE-NEXT: mvnne r2, #1
860 ; CHECK-BE-NEXT: bfi r2, r1, #0, #1
861 ; CHECK-BE-NEXT: and r1, r2, #3
862 ; CHECK-BE-NEXT: lsls r2, r2, #31
863 ; CHECK-BE-NEXT: beq .LBB30_2
864 ; CHECK-BE-NEXT: @ %bb.1: @ %cond.load
865 ; CHECK-BE-NEXT: vldr d1, .LCPI30_0
866 ; CHECK-BE-NEXT: vldr d0, [r0]
867 ; CHECK-BE-NEXT: b .LBB30_3
868 ; CHECK-BE-NEXT: .LBB30_2:
869 ; CHECK-BE-NEXT: vmov.i32 q1, #0x0
870 ; CHECK-BE-NEXT: vrev64.32 q0, q1
871 ; CHECK-BE-NEXT: .LBB30_3: @ %else
872 ; CHECK-BE-NEXT: lsls r1, r1, #30
873 ; CHECK-BE-NEXT: it mi
874 ; CHECK-BE-NEXT: vldrmi d1, [r0, #8]
875 ; CHECK-BE-NEXT: add sp, #4
876 ; CHECK-BE-NEXT: bx lr
877 ; CHECK-BE-NEXT: .p2align 3
878 ; CHECK-BE-NEXT: @ %bb.4:
879 ; CHECK-BE-NEXT: .LCPI30_0:
880 ; CHECK-BE-NEXT: .long 0 @ double 0
881 ; CHECK-BE-NEXT: .long 0
883 %c = icmp sgt <2 x i64> %b, zeroinitializer
884 %l = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %dest, i32 8, <2 x i1> %c, <2 x double> zeroinitializer)
888 declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)
889 declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32, <8 x i1>, <8 x i16>)
890 declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32, <16 x i1>, <16 x i8>)
891 declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32, <4 x i1>, <4 x float>)
892 declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32, <8 x i1>, <8 x half>)
893 declare <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>*, i32, <2 x i1>, <2 x i64>)
894 declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>*, i32, <2 x i1>, <2 x double>)