1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-LE
3 ; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-BE
5 define arm_aapcs_vfpcc <4 x i32> @load_v4i1(ptr %src, <4 x i32> %a) {
6 ; CHECK-LE-LABEL: load_v4i1:
7 ; CHECK-LE: @ %bb.0: @ %entry
8 ; CHECK-LE-NEXT: ldrb r0, [r0]
9 ; CHECK-LE-NEXT: vmov.i8 q1, #0x0
10 ; CHECK-LE-NEXT: vmov.i8 q2, #0xff
11 ; CHECK-LE-NEXT: vmsr p0, r0
12 ; CHECK-LE-NEXT: vpsel q1, q2, q1
13 ; CHECK-LE-NEXT: vmov.u8 r0, q1[2]
14 ; CHECK-LE-NEXT: vmov.u8 r1, q1[0]
15 ; CHECK-LE-NEXT: vmov q2[2], q2[0], r1, r0
16 ; CHECK-LE-NEXT: vmov.u8 r0, q1[3]
17 ; CHECK-LE-NEXT: vmov.u8 r1, q1[1]
18 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
19 ; CHECK-LE-NEXT: vmov q2[3], q2[1], r1, r0
20 ; CHECK-LE-NEXT: vcmp.i32 ne, q2, zr
21 ; CHECK-LE-NEXT: vpsel q0, q0, q1
22 ; CHECK-LE-NEXT: bx lr
24 ; CHECK-BE-LABEL: load_v4i1:
25 ; CHECK-BE: @ %bb.0: @ %entry
26 ; CHECK-BE-NEXT: ldrb r0, [r0]
27 ; CHECK-BE-NEXT: vmov.i8 q1, #0x0
28 ; CHECK-BE-NEXT: vmov.i8 q2, #0xff
29 ; CHECK-BE-NEXT: rbit r0, r0
30 ; CHECK-BE-NEXT: lsrs r0, r0, #28
31 ; CHECK-BE-NEXT: vmsr p0, r0
32 ; CHECK-BE-NEXT: vpsel q1, q2, q1
33 ; CHECK-BE-NEXT: vmov.u8 r0, q1[2]
34 ; CHECK-BE-NEXT: vmov.u8 r1, q1[0]
35 ; CHECK-BE-NEXT: vmov q2[2], q2[0], r1, r0
36 ; CHECK-BE-NEXT: vmov.u8 r0, q1[3]
37 ; CHECK-BE-NEXT: vmov.u8 r1, q1[1]
38 ; CHECK-BE-NEXT: vrev64.32 q1, q0
39 ; CHECK-BE-NEXT: vmov q2[3], q2[1], r1, r0
40 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
41 ; CHECK-BE-NEXT: vcmp.i32 ne, q2, zr
42 ; CHECK-BE-NEXT: vpsel q1, q1, q0
43 ; CHECK-BE-NEXT: vrev64.32 q0, q1
44 ; CHECK-BE-NEXT: bx lr
46 %c = load <4 x i1>, ptr %src
47 %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer
51 define arm_aapcs_vfpcc <8 x i16> @load_v8i1(ptr %src, <8 x i16> %a) {
52 ; CHECK-LE-LABEL: load_v8i1:
53 ; CHECK-LE: @ %bb.0: @ %entry
54 ; CHECK-LE-NEXT: ldrb r0, [r0]
55 ; CHECK-LE-NEXT: vmov.i8 q1, #0x0
56 ; CHECK-LE-NEXT: vmov.i8 q2, #0xff
57 ; CHECK-LE-NEXT: vmsr p0, r0
58 ; CHECK-LE-NEXT: vpsel q2, q2, q1
59 ; CHECK-LE-NEXT: vmov.u8 r0, q2[0]
60 ; CHECK-LE-NEXT: vmov.16 q1[0], r0
61 ; CHECK-LE-NEXT: vmov.u8 r0, q2[1]
62 ; CHECK-LE-NEXT: vmov.16 q1[1], r0
63 ; CHECK-LE-NEXT: vmov.u8 r0, q2[2]
64 ; CHECK-LE-NEXT: vmov.16 q1[2], r0
65 ; CHECK-LE-NEXT: vmov.u8 r0, q2[3]
66 ; CHECK-LE-NEXT: vmov.16 q1[3], r0
67 ; CHECK-LE-NEXT: vmov.u8 r0, q2[4]
68 ; CHECK-LE-NEXT: vmov.16 q1[4], r0
69 ; CHECK-LE-NEXT: vmov.u8 r0, q2[5]
70 ; CHECK-LE-NEXT: vmov.16 q1[5], r0
71 ; CHECK-LE-NEXT: vmov.u8 r0, q2[6]
72 ; CHECK-LE-NEXT: vmov.16 q1[6], r0
73 ; CHECK-LE-NEXT: vmov.u8 r0, q2[7]
74 ; CHECK-LE-NEXT: vmov.16 q1[7], r0
75 ; CHECK-LE-NEXT: vcmp.i16 ne, q1, zr
76 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
77 ; CHECK-LE-NEXT: vpsel q0, q0, q1
78 ; CHECK-LE-NEXT: bx lr
80 ; CHECK-BE-LABEL: load_v8i1:
81 ; CHECK-BE: @ %bb.0: @ %entry
82 ; CHECK-BE-NEXT: ldrb r0, [r0]
83 ; CHECK-BE-NEXT: vmov.i8 q1, #0x0
84 ; CHECK-BE-NEXT: vmov.i8 q2, #0xff
85 ; CHECK-BE-NEXT: rbit r0, r0
86 ; CHECK-BE-NEXT: lsrs r0, r0, #24
87 ; CHECK-BE-NEXT: vmsr p0, r0
88 ; CHECK-BE-NEXT: vpsel q2, q2, q1
89 ; CHECK-BE-NEXT: vmov.u8 r0, q2[0]
90 ; CHECK-BE-NEXT: vmov.16 q1[0], r0
91 ; CHECK-BE-NEXT: vmov.u8 r0, q2[1]
92 ; CHECK-BE-NEXT: vmov.16 q1[1], r0
93 ; CHECK-BE-NEXT: vmov.u8 r0, q2[2]
94 ; CHECK-BE-NEXT: vmov.16 q1[2], r0
95 ; CHECK-BE-NEXT: vmov.u8 r0, q2[3]
96 ; CHECK-BE-NEXT: vmov.16 q1[3], r0
97 ; CHECK-BE-NEXT: vmov.u8 r0, q2[4]
98 ; CHECK-BE-NEXT: vmov.16 q1[4], r0
99 ; CHECK-BE-NEXT: vmov.u8 r0, q2[5]
100 ; CHECK-BE-NEXT: vmov.16 q1[5], r0
101 ; CHECK-BE-NEXT: vmov.u8 r0, q2[6]
102 ; CHECK-BE-NEXT: vmov.16 q1[6], r0
103 ; CHECK-BE-NEXT: vmov.u8 r0, q2[7]
104 ; CHECK-BE-NEXT: vmov.16 q1[7], r0
105 ; CHECK-BE-NEXT: vcmp.i16 ne, q1, zr
106 ; CHECK-BE-NEXT: vrev64.16 q1, q0
107 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
108 ; CHECK-BE-NEXT: vrev32.16 q0, q0
109 ; CHECK-BE-NEXT: vpsel q1, q1, q0
110 ; CHECK-BE-NEXT: vrev64.16 q0, q1
111 ; CHECK-BE-NEXT: bx lr
113 %c = load <8 x i1>, ptr %src
114 %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> zeroinitializer
118 define arm_aapcs_vfpcc <16 x i8> @load_v16i1(ptr %src, <16 x i8> %a) {
119 ; CHECK-LE-LABEL: load_v16i1:
120 ; CHECK-LE: @ %bb.0: @ %entry
121 ; CHECK-LE-NEXT: ldrh r0, [r0]
122 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
123 ; CHECK-LE-NEXT: vmsr p0, r0
124 ; CHECK-LE-NEXT: vpsel q0, q0, q1
125 ; CHECK-LE-NEXT: bx lr
127 ; CHECK-BE-LABEL: load_v16i1:
128 ; CHECK-BE: @ %bb.0: @ %entry
129 ; CHECK-BE-NEXT: ldrh r0, [r0]
130 ; CHECK-BE-NEXT: vrev64.8 q1, q0
131 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
132 ; CHECK-BE-NEXT: rbit r0, r0
133 ; CHECK-BE-NEXT: vrev32.8 q0, q0
134 ; CHECK-BE-NEXT: lsrs r0, r0, #16
135 ; CHECK-BE-NEXT: vmsr p0, r0
136 ; CHECK-BE-NEXT: vpsel q1, q1, q0
137 ; CHECK-BE-NEXT: vrev64.8 q0, q1
138 ; CHECK-BE-NEXT: bx lr
140 %c = load <16 x i1>, ptr %src
141 %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> zeroinitializer
145 define arm_aapcs_vfpcc <2 x i64> @load_v2i1(ptr %src, <2 x i64> %a) {
146 ; CHECK-LE-LABEL: load_v2i1:
147 ; CHECK-LE: @ %bb.0: @ %entry
148 ; CHECK-LE-NEXT: ldrb r0, [r0]
149 ; CHECK-LE-NEXT: vmov.i8 q1, #0x0
150 ; CHECK-LE-NEXT: vmov.i8 q2, #0xff
151 ; CHECK-LE-NEXT: vmsr p0, r0
152 ; CHECK-LE-NEXT: vpsel q1, q2, q1
153 ; CHECK-LE-NEXT: vmov.u8 r0, q1[1]
154 ; CHECK-LE-NEXT: vmov.u8 r1, q1[0]
155 ; CHECK-LE-NEXT: vmov q1[2], q1[0], r1, r0
156 ; CHECK-LE-NEXT: vmov q1[3], q1[1], r1, r0
157 ; CHECK-LE-NEXT: vcmp.i32 ne, q1, zr
158 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
159 ; CHECK-LE-NEXT: vpsel q0, q0, q1
160 ; CHECK-LE-NEXT: bx lr
162 ; CHECK-BE-LABEL: load_v2i1:
163 ; CHECK-BE: @ %bb.0: @ %entry
164 ; CHECK-BE-NEXT: ldrb r0, [r0]
165 ; CHECK-BE-NEXT: vmov.i8 q1, #0x0
166 ; CHECK-BE-NEXT: vmov.i8 q2, #0xff
167 ; CHECK-BE-NEXT: rbit r0, r0
168 ; CHECK-BE-NEXT: lsrs r0, r0, #30
169 ; CHECK-BE-NEXT: vmsr p0, r0
170 ; CHECK-BE-NEXT: vpsel q1, q2, q1
171 ; CHECK-BE-NEXT: vmov.u8 r0, q1[1]
172 ; CHECK-BE-NEXT: vmov.u8 r1, q1[0]
173 ; CHECK-BE-NEXT: vmov q1[2], q1[0], r1, r0
174 ; CHECK-BE-NEXT: vmov q1[3], q1[1], r1, r0
175 ; CHECK-BE-NEXT: vcmp.i32 ne, q1, zr
176 ; CHECK-BE-NEXT: vmov.i32 q1, #0x0
177 ; CHECK-BE-NEXT: vpsel q0, q0, q1
178 ; CHECK-BE-NEXT: bx lr
180 %c = load <2 x i1>, ptr %src
181 %s = select <2 x i1> %c, <2 x i64> %a, <2 x i64> zeroinitializer
186 define arm_aapcs_vfpcc void @store_v4i1(ptr %dst, <4 x i32> %a) {
187 ; CHECK-LE-LABEL: store_v4i1:
188 ; CHECK-LE: @ %bb.0: @ %entry
189 ; CHECK-LE-NEXT: vcmp.i32 eq, q0, zr
190 ; CHECK-LE-NEXT: movs r3, #0
191 ; CHECK-LE-NEXT: vmrs r1, p0
192 ; CHECK-LE-NEXT: and r2, r1, #1
193 ; CHECK-LE-NEXT: rsbs r2, r2, #0
194 ; CHECK-LE-NEXT: bfi r3, r2, #0, #1
195 ; CHECK-LE-NEXT: ubfx r2, r1, #4, #1
196 ; CHECK-LE-NEXT: rsbs r2, r2, #0
197 ; CHECK-LE-NEXT: bfi r3, r2, #1, #1
198 ; CHECK-LE-NEXT: ubfx r2, r1, #8, #1
199 ; CHECK-LE-NEXT: ubfx r1, r1, #12, #1
200 ; CHECK-LE-NEXT: rsbs r2, r2, #0
201 ; CHECK-LE-NEXT: bfi r3, r2, #2, #1
202 ; CHECK-LE-NEXT: rsbs r1, r1, #0
203 ; CHECK-LE-NEXT: bfi r3, r1, #3, #1
204 ; CHECK-LE-NEXT: strb r3, [r0]
205 ; CHECK-LE-NEXT: bx lr
207 ; CHECK-BE-LABEL: store_v4i1:
208 ; CHECK-BE: @ %bb.0: @ %entry
209 ; CHECK-BE-NEXT: vrev64.32 q1, q0
210 ; CHECK-BE-NEXT: movs r3, #0
211 ; CHECK-BE-NEXT: vcmp.i32 eq, q1, zr
212 ; CHECK-BE-NEXT: vmrs r1, p0
213 ; CHECK-BE-NEXT: ubfx r2, r1, #12, #1
214 ; CHECK-BE-NEXT: rsbs r2, r2, #0
215 ; CHECK-BE-NEXT: bfi r3, r2, #0, #1
216 ; CHECK-BE-NEXT: ubfx r2, r1, #8, #1
217 ; CHECK-BE-NEXT: rsbs r2, r2, #0
218 ; CHECK-BE-NEXT: bfi r3, r2, #1, #1
219 ; CHECK-BE-NEXT: ubfx r2, r1, #4, #1
220 ; CHECK-BE-NEXT: and r1, r1, #1
221 ; CHECK-BE-NEXT: rsbs r2, r2, #0
222 ; CHECK-BE-NEXT: bfi r3, r2, #2, #1
223 ; CHECK-BE-NEXT: rsbs r1, r1, #0
224 ; CHECK-BE-NEXT: bfi r3, r1, #3, #1
225 ; CHECK-BE-NEXT: strb r3, [r0]
226 ; CHECK-BE-NEXT: bx lr
228 %c = icmp eq <4 x i32> %a, zeroinitializer
229 store <4 x i1> %c, ptr %dst
233 define arm_aapcs_vfpcc void @store_v8i1(ptr %dst, <8 x i16> %a) {
234 ; CHECK-LE-LABEL: store_v8i1:
235 ; CHECK-LE: @ %bb.0: @ %entry
236 ; CHECK-LE-NEXT: vcmp.i16 eq, q0, zr
237 ; CHECK-LE-NEXT: vmrs r2, p0
238 ; CHECK-LE-NEXT: and r1, r2, #1
239 ; CHECK-LE-NEXT: rsbs r3, r1, #0
240 ; CHECK-LE-NEXT: movs r1, #0
241 ; CHECK-LE-NEXT: bfi r1, r3, #0, #1
242 ; CHECK-LE-NEXT: ubfx r3, r2, #2, #1
243 ; CHECK-LE-NEXT: rsbs r3, r3, #0
244 ; CHECK-LE-NEXT: bfi r1, r3, #1, #1
245 ; CHECK-LE-NEXT: ubfx r3, r2, #4, #1
246 ; CHECK-LE-NEXT: rsbs r3, r3, #0
247 ; CHECK-LE-NEXT: bfi r1, r3, #2, #1
248 ; CHECK-LE-NEXT: ubfx r3, r2, #6, #1
249 ; CHECK-LE-NEXT: rsbs r3, r3, #0
250 ; CHECK-LE-NEXT: bfi r1, r3, #3, #1
251 ; CHECK-LE-NEXT: ubfx r3, r2, #8, #1
252 ; CHECK-LE-NEXT: rsbs r3, r3, #0
253 ; CHECK-LE-NEXT: bfi r1, r3, #4, #1
254 ; CHECK-LE-NEXT: ubfx r3, r2, #10, #1
255 ; CHECK-LE-NEXT: rsbs r3, r3, #0
256 ; CHECK-LE-NEXT: bfi r1, r3, #5, #1
257 ; CHECK-LE-NEXT: ubfx r3, r2, #12, #1
258 ; CHECK-LE-NEXT: ubfx r2, r2, #14, #1
259 ; CHECK-LE-NEXT: rsbs r3, r3, #0
260 ; CHECK-LE-NEXT: bfi r1, r3, #6, #1
261 ; CHECK-LE-NEXT: rsbs r2, r2, #0
262 ; CHECK-LE-NEXT: bfi r1, r2, #7, #1
263 ; CHECK-LE-NEXT: strb r1, [r0]
264 ; CHECK-LE-NEXT: bx lr
266 ; CHECK-BE-LABEL: store_v8i1:
267 ; CHECK-BE: @ %bb.0: @ %entry
268 ; CHECK-BE-NEXT: vrev64.16 q1, q0
269 ; CHECK-BE-NEXT: vcmp.i16 eq, q1, zr
270 ; CHECK-BE-NEXT: vmrs r2, p0
271 ; CHECK-BE-NEXT: ubfx r1, r2, #14, #1
272 ; CHECK-BE-NEXT: rsbs r3, r1, #0
273 ; CHECK-BE-NEXT: movs r1, #0
274 ; CHECK-BE-NEXT: bfi r1, r3, #0, #1
275 ; CHECK-BE-NEXT: ubfx r3, r2, #12, #1
276 ; CHECK-BE-NEXT: rsbs r3, r3, #0
277 ; CHECK-BE-NEXT: bfi r1, r3, #1, #1
278 ; CHECK-BE-NEXT: ubfx r3, r2, #10, #1
279 ; CHECK-BE-NEXT: rsbs r3, r3, #0
280 ; CHECK-BE-NEXT: bfi r1, r3, #2, #1
281 ; CHECK-BE-NEXT: ubfx r3, r2, #8, #1
282 ; CHECK-BE-NEXT: rsbs r3, r3, #0
283 ; CHECK-BE-NEXT: bfi r1, r3, #3, #1
284 ; CHECK-BE-NEXT: ubfx r3, r2, #6, #1
285 ; CHECK-BE-NEXT: rsbs r3, r3, #0
286 ; CHECK-BE-NEXT: bfi r1, r3, #4, #1
287 ; CHECK-BE-NEXT: ubfx r3, r2, #4, #1
288 ; CHECK-BE-NEXT: rsbs r3, r3, #0
289 ; CHECK-BE-NEXT: bfi r1, r3, #5, #1
290 ; CHECK-BE-NEXT: ubfx r3, r2, #2, #1
291 ; CHECK-BE-NEXT: and r2, r2, #1
292 ; CHECK-BE-NEXT: rsbs r3, r3, #0
293 ; CHECK-BE-NEXT: bfi r1, r3, #6, #1
294 ; CHECK-BE-NEXT: rsbs r2, r2, #0
295 ; CHECK-BE-NEXT: bfi r1, r2, #7, #1
296 ; CHECK-BE-NEXT: strb r1, [r0]
297 ; CHECK-BE-NEXT: bx lr
299 %c = icmp eq <8 x i16> %a, zeroinitializer
300 store <8 x i1> %c, ptr %dst
304 define arm_aapcs_vfpcc void @store_v16i1(ptr %dst, <16 x i8> %a) {
305 ; CHECK-LE-LABEL: store_v16i1:
306 ; CHECK-LE: @ %bb.0: @ %entry
307 ; CHECK-LE-NEXT: vcmp.i8 eq, q0, zr
308 ; CHECK-LE-NEXT: vmrs r1, p0
309 ; CHECK-LE-NEXT: strh r1, [r0]
310 ; CHECK-LE-NEXT: bx lr
312 ; CHECK-BE-LABEL: store_v16i1:
313 ; CHECK-BE: @ %bb.0: @ %entry
314 ; CHECK-BE-NEXT: vrev64.8 q1, q0
315 ; CHECK-BE-NEXT: vcmp.i8 eq, q1, zr
316 ; CHECK-BE-NEXT: vmrs r1, p0
317 ; CHECK-BE-NEXT: rbit r1, r1
318 ; CHECK-BE-NEXT: lsrs r1, r1, #16
319 ; CHECK-BE-NEXT: strh r1, [r0]
320 ; CHECK-BE-NEXT: bx lr
322 %c = icmp eq <16 x i8> %a, zeroinitializer
323 store <16 x i1> %c, ptr %dst
327 define arm_aapcs_vfpcc void @store_v2i1(ptr %dst, <2 x i64> %a) {
328 ; CHECK-LE-LABEL: store_v2i1:
329 ; CHECK-LE: @ %bb.0: @ %entry
330 ; CHECK-LE-NEXT: vmov r1, r2, d0
331 ; CHECK-LE-NEXT: orrs r1, r2
332 ; CHECK-LE-NEXT: mov.w r2, #0
333 ; CHECK-LE-NEXT: csetm r1, eq
334 ; CHECK-LE-NEXT: bfi r2, r1, #0, #1
335 ; CHECK-LE-NEXT: vmov r1, r3, d1
336 ; CHECK-LE-NEXT: orrs r1, r3
337 ; CHECK-LE-NEXT: csetm r1, eq
338 ; CHECK-LE-NEXT: bfi r2, r1, #1, #1
339 ; CHECK-LE-NEXT: strb r2, [r0]
340 ; CHECK-LE-NEXT: bx lr
342 ; CHECK-BE-LABEL: store_v2i1:
343 ; CHECK-BE: @ %bb.0: @ %entry
344 ; CHECK-BE-NEXT: vrev64.32 q1, q0
345 ; CHECK-BE-NEXT: vmov r1, r2, d3
346 ; CHECK-BE-NEXT: orrs r1, r2
347 ; CHECK-BE-NEXT: mov.w r2, #0
348 ; CHECK-BE-NEXT: csetm r1, eq
349 ; CHECK-BE-NEXT: bfi r2, r1, #0, #1
350 ; CHECK-BE-NEXT: vmov r1, r3, d2
351 ; CHECK-BE-NEXT: orrs r1, r3
352 ; CHECK-BE-NEXT: csetm r1, eq
353 ; CHECK-BE-NEXT: bfi r2, r1, #1, #1
354 ; CHECK-BE-NEXT: strb r2, [r0]
355 ; CHECK-BE-NEXT: bx lr
357 %c = icmp eq <2 x i64> %a, zeroinitializer
358 store <2 x i1> %c, ptr %dst
362 define arm_aapcs_vfpcc <4 x i32> @load_predcastzext(ptr %i, <4 x i32> %a) {
363 ; CHECK-LE-LABEL: load_predcastzext:
365 ; CHECK-LE-NEXT: ldrh r0, [r0]
366 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
367 ; CHECK-LE-NEXT: vmsr p0, r0
368 ; CHECK-LE-NEXT: vpsel q0, q0, q1
369 ; CHECK-LE-NEXT: bx lr
371 ; CHECK-BE-LABEL: load_predcastzext:
373 ; CHECK-BE-NEXT: ldrh r0, [r0]
374 ; CHECK-BE-NEXT: vrev64.32 q1, q0
375 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
376 ; CHECK-BE-NEXT: vmsr p0, r0
377 ; CHECK-BE-NEXT: vpsel q1, q1, q0
378 ; CHECK-BE-NEXT: vrev64.32 q0, q1
379 ; CHECK-BE-NEXT: bx lr
380 %l = load i16, ptr %i, align 4
381 %lz = zext i16 %l to i32
382 %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %lz)
383 %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer
387 define arm_aapcs_vfpcc <4 x i32> @load_bc4(ptr %i, <4 x i32> %a) {
388 ; CHECK-LE-LABEL: load_bc4:
390 ; CHECK-LE-NEXT: vldr p0, [r0]
391 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
392 ; CHECK-LE-NEXT: vpsel q0, q0, q1
393 ; CHECK-LE-NEXT: bx lr
395 ; CHECK-BE-LABEL: load_bc4:
397 ; CHECK-BE-NEXT: vldr p0, [r0]
398 ; CHECK-BE-NEXT: vrev64.32 q1, q0
399 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
400 ; CHECK-BE-NEXT: vpsel q1, q1, q0
401 ; CHECK-BE-NEXT: vrev64.32 q0, q1
402 ; CHECK-BE-NEXT: bx lr
403 %l = load i32, ptr %i, align 4
404 %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l)
405 %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer
409 define arm_aapcs_vfpcc <8 x i16> @load_predcast8(ptr %i, <8 x i16> %a) {
410 ; CHECK-LE-LABEL: load_predcast8:
412 ; CHECK-LE-NEXT: vldr p0, [r0]
413 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
414 ; CHECK-LE-NEXT: vpsel q0, q0, q1
415 ; CHECK-LE-NEXT: bx lr
417 ; CHECK-BE-LABEL: load_predcast8:
419 ; CHECK-BE-NEXT: vrev64.16 q1, q0
420 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
421 ; CHECK-BE-NEXT: vldr p0, [r0]
422 ; CHECK-BE-NEXT: vrev32.16 q0, q0
423 ; CHECK-BE-NEXT: vpsel q1, q1, q0
424 ; CHECK-BE-NEXT: vrev64.16 q0, q1
425 ; CHECK-BE-NEXT: bx lr
426 %l = load i32, ptr %i, align 4
427 %c = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %l)
428 %s = select <8 x i1> %c, <8 x i16> %a, <8 x i16> zeroinitializer
432 define arm_aapcs_vfpcc <16 x i8> @load_predcast16(ptr %i, <16 x i8> %a) {
433 ; CHECK-LE-LABEL: load_predcast16:
435 ; CHECK-LE-NEXT: vldr p0, [r0]
436 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
437 ; CHECK-LE-NEXT: vpsel q0, q0, q1
438 ; CHECK-LE-NEXT: bx lr
440 ; CHECK-BE-LABEL: load_predcast16:
442 ; CHECK-BE-NEXT: vrev64.8 q1, q0
443 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
444 ; CHECK-BE-NEXT: vldr p0, [r0]
445 ; CHECK-BE-NEXT: vrev32.8 q0, q0
446 ; CHECK-BE-NEXT: vpsel q1, q1, q0
447 ; CHECK-BE-NEXT: vrev64.8 q0, q1
448 ; CHECK-BE-NEXT: bx lr
449 %l = load i32, ptr %i, align 4
450 %c = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %l)
451 %s = select <16 x i1> %c, <16 x i8> %a, <16 x i8> zeroinitializer
455 define arm_aapcs_vfpcc <4 x i32> @load_bc4_align2(ptr %i, <4 x i32> %a) {
456 ; CHECK-LE-LABEL: load_bc4_align2:
458 ; CHECK-LE-NEXT: ldr r0, [r0]
459 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
460 ; CHECK-LE-NEXT: vmsr p0, r0
461 ; CHECK-LE-NEXT: vpsel q0, q0, q1
462 ; CHECK-LE-NEXT: bx lr
464 ; CHECK-BE-LABEL: load_bc4_align2:
466 ; CHECK-BE-NEXT: ldr r0, [r0]
467 ; CHECK-BE-NEXT: vrev64.32 q1, q0
468 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
469 ; CHECK-BE-NEXT: vmsr p0, r0
470 ; CHECK-BE-NEXT: vpsel q1, q1, q0
471 ; CHECK-BE-NEXT: vrev64.32 q0, q1
472 ; CHECK-BE-NEXT: bx lr
473 %l = load i32, ptr %i, align 2
474 %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l)
475 %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer
479 define arm_aapcs_vfpcc <4 x i32> @load_bc4_offset(ptr %i, <4 x i32> %a) {
480 ; CHECK-LE-LABEL: load_bc4_offset:
482 ; CHECK-LE-NEXT: adds r0, #6
483 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
484 ; CHECK-LE-NEXT: vldr p0, [r0]
485 ; CHECK-LE-NEXT: vpsel q0, q0, q1
486 ; CHECK-LE-NEXT: bx lr
488 ; CHECK-BE-LABEL: load_bc4_offset:
490 ; CHECK-BE-NEXT: adds r0, #6
491 ; CHECK-BE-NEXT: vrev64.32 q1, q0
492 ; CHECK-BE-NEXT: vldr p0, [r0]
493 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
494 ; CHECK-BE-NEXT: vpsel q1, q1, q0
495 ; CHECK-BE-NEXT: vrev64.32 q0, q1
496 ; CHECK-BE-NEXT: bx lr
497 %g = getelementptr inbounds i16, ptr %i, i32 3
498 %l = load i32, ptr %g, align 4
499 %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l)
500 %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer
504 define arm_aapcs_vfpcc <4 x i32> @load_bc4_range4(ptr %i, <4 x i32> %a) {
505 ; CHECK-LE-LABEL: load_bc4_range4:
507 ; CHECK-LE-NEXT: vldr p0, [r0, #4]
508 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
509 ; CHECK-LE-NEXT: vpsel q0, q0, q1
510 ; CHECK-LE-NEXT: bx lr
512 ; CHECK-BE-LABEL: load_bc4_range4:
514 ; CHECK-BE-NEXT: vldr p0, [r0, #4]
515 ; CHECK-BE-NEXT: vrev64.32 q1, q0
516 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
517 ; CHECK-BE-NEXT: vpsel q1, q1, q0
518 ; CHECK-BE-NEXT: vrev64.32 q0, q1
519 ; CHECK-BE-NEXT: bx lr
520 %g = getelementptr inbounds i32, ptr %i, i32 1
521 %l = load i32, ptr %g, align 4
522 %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l)
523 %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer
527 define arm_aapcs_vfpcc <4 x i32> @load_bc4_range(ptr %i, <4 x i32> %a) {
528 ; CHECK-LE-LABEL: load_bc4_range:
530 ; CHECK-LE-NEXT: vldr p0, [r0, #508]
531 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
532 ; CHECK-LE-NEXT: vpsel q0, q0, q1
533 ; CHECK-LE-NEXT: bx lr
535 ; CHECK-BE-LABEL: load_bc4_range:
537 ; CHECK-BE-NEXT: vldr p0, [r0, #508]
538 ; CHECK-BE-NEXT: vrev64.32 q1, q0
539 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
540 ; CHECK-BE-NEXT: vpsel q1, q1, q0
541 ; CHECK-BE-NEXT: vrev64.32 q0, q1
542 ; CHECK-BE-NEXT: bx lr
543 %g = getelementptr inbounds i32, ptr %i, i32 127
544 %l = load i32, ptr %g, align 4
545 %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l)
546 %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer
550 define arm_aapcs_vfpcc <4 x i32> @load_bc4_range2(ptr %i, <4 x i32> %a) {
551 ; CHECK-LE-LABEL: load_bc4_range2:
553 ; CHECK-LE-NEXT: vldr p0, [r0, #-508]
554 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
555 ; CHECK-LE-NEXT: vpsel q0, q0, q1
556 ; CHECK-LE-NEXT: bx lr
558 ; CHECK-BE-LABEL: load_bc4_range2:
560 ; CHECK-BE-NEXT: vldr p0, [r0, #-508]
561 ; CHECK-BE-NEXT: vrev64.32 q1, q0
562 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
563 ; CHECK-BE-NEXT: vpsel q1, q1, q0
564 ; CHECK-BE-NEXT: vrev64.32 q0, q1
565 ; CHECK-BE-NEXT: bx lr
566 %g = getelementptr inbounds i32, ptr %i, i32 -127
567 %l = load i32, ptr %g, align 4
568 %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l)
569 %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer
573 define arm_aapcs_vfpcc <4 x i32> @load_bc4_range3(ptr %i, <4 x i32> %a) {
574 ; CHECK-LE-LABEL: load_bc4_range3:
576 ; CHECK-LE-NEXT: add.w r0, r0, #512
577 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
578 ; CHECK-LE-NEXT: vldr p0, [r0]
579 ; CHECK-LE-NEXT: vpsel q0, q0, q1
580 ; CHECK-LE-NEXT: bx lr
582 ; CHECK-BE-LABEL: load_bc4_range3:
584 ; CHECK-BE-NEXT: add.w r0, r0, #512
585 ; CHECK-BE-NEXT: vrev64.32 q1, q0
586 ; CHECK-BE-NEXT: vldr p0, [r0]
587 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
588 ; CHECK-BE-NEXT: vpsel q1, q1, q0
589 ; CHECK-BE-NEXT: vrev64.32 q0, q1
590 ; CHECK-BE-NEXT: bx lr
591 %g = getelementptr inbounds i32, ptr %i, i32 128
592 %l = load i32, ptr %g, align 4
593 %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l)
594 %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer
598 define arm_aapcs_vfpcc <4 x i32> @load_bc4_range5(ptr %i, <4 x i32> %a) {
599 ; CHECK-LE-LABEL: load_bc4_range5:
601 ; CHECK-LE-NEXT: sub.w r0, r0, #512
602 ; CHECK-LE-NEXT: vmov.i32 q1, #0x0
603 ; CHECK-LE-NEXT: vldr p0, [r0]
604 ; CHECK-LE-NEXT: vpsel q0, q0, q1
605 ; CHECK-LE-NEXT: bx lr
607 ; CHECK-BE-LABEL: load_bc4_range5:
609 ; CHECK-BE-NEXT: sub.w r0, r0, #512
610 ; CHECK-BE-NEXT: vrev64.32 q1, q0
611 ; CHECK-BE-NEXT: vldr p0, [r0]
612 ; CHECK-BE-NEXT: vmov.i32 q0, #0x0
613 ; CHECK-BE-NEXT: vpsel q1, q1, q0
614 ; CHECK-BE-NEXT: vrev64.32 q0, q1
615 ; CHECK-BE-NEXT: bx lr
616 %g = getelementptr inbounds i32, ptr %i, i32 -128
617 %l = load i32, ptr %g, align 4
618 %c = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %l)
619 %s = select <4 x i1> %c, <4 x i32> %a, <4 x i32> zeroinitializer
623 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
624 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
625 declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)