1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
4 define arm_aapcs_vfpcc <8 x half> @test_vld1q_f16(half* %base) {
5 ; CHECK-LABEL: test_vld1q_f16:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vldrh.u16 q0, [r0]
10 %0 = bitcast half* %base to <8 x half>*
11 %1 = load <8 x half>, <8 x half>* %0, align 2
15 define arm_aapcs_vfpcc <4 x float> @test_vld1q_f32(float* %base) {
16 ; CHECK-LABEL: test_vld1q_f32:
17 ; CHECK: @ %bb.0: @ %entry
18 ; CHECK-NEXT: vldrw.u32 q0, [r0]
21 %0 = bitcast float* %base to <4 x float>*
22 %1 = load <4 x float>, <4 x float>* %0, align 4
26 define arm_aapcs_vfpcc <16 x i8> @test_vld1q_s8(i8* %base) {
27 ; CHECK-LABEL: test_vld1q_s8:
28 ; CHECK: @ %bb.0: @ %entry
29 ; CHECK-NEXT: vldrb.u8 q0, [r0]
32 %0 = bitcast i8* %base to <16 x i8>*
33 %1 = load <16 x i8>, <16 x i8>* %0, align 1
37 define arm_aapcs_vfpcc <8 x i16> @test_vld1q_s16(i16* %base) {
38 ; CHECK-LABEL: test_vld1q_s16:
39 ; CHECK: @ %bb.0: @ %entry
40 ; CHECK-NEXT: vldrh.u16 q0, [r0]
43 %0 = bitcast i16* %base to <8 x i16>*
44 %1 = load <8 x i16>, <8 x i16>* %0, align 2
48 define arm_aapcs_vfpcc <4 x i32> @test_vld1q_s32(i32* %base) {
49 ; CHECK-LABEL: test_vld1q_s32:
50 ; CHECK: @ %bb.0: @ %entry
51 ; CHECK-NEXT: vldrw.u32 q0, [r0]
54 %0 = bitcast i32* %base to <4 x i32>*
55 %1 = load <4 x i32>, <4 x i32>* %0, align 4
59 define arm_aapcs_vfpcc <16 x i8> @test_vld1q_u8(i8* %base) {
60 ; CHECK-LABEL: test_vld1q_u8:
61 ; CHECK: @ %bb.0: @ %entry
62 ; CHECK-NEXT: vldrb.u8 q0, [r0]
65 %0 = bitcast i8* %base to <16 x i8>*
66 %1 = load <16 x i8>, <16 x i8>* %0, align 1
70 define arm_aapcs_vfpcc <8 x i16> @test_vld1q_u16(i16* %base) {
71 ; CHECK-LABEL: test_vld1q_u16:
72 ; CHECK: @ %bb.0: @ %entry
73 ; CHECK-NEXT: vldrh.u16 q0, [r0]
76 %0 = bitcast i16* %base to <8 x i16>*
77 %1 = load <8 x i16>, <8 x i16>* %0, align 2
81 define arm_aapcs_vfpcc <4 x i32> @test_vld1q_u32(i32* %base) {
82 ; CHECK-LABEL: test_vld1q_u32:
83 ; CHECK: @ %bb.0: @ %entry
84 ; CHECK-NEXT: vldrw.u32 q0, [r0]
87 %0 = bitcast i32* %base to <4 x i32>*
88 %1 = load <4 x i32>, <4 x i32>* %0, align 4
92 define arm_aapcs_vfpcc <8 x half> @test_vld1q_z_f16(half* %base, i16 zeroext %p) {
93 ; CHECK-LABEL: test_vld1q_z_f16:
94 ; CHECK: @ %bb.0: @ %entry
95 ; CHECK-NEXT: vmsr p0, r1
97 ; CHECK-NEXT: vldrht.u16 q0, [r0]
100 %0 = bitcast half* %base to <8 x half>*
101 %1 = zext i16 %p to i32
102 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
103 %3 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %2, <8 x half> zeroinitializer)
107 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
109 declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32 immarg, <8 x i1>, <8 x half>)
111 define arm_aapcs_vfpcc <4 x float> @test_vld1q_z_f32(float* %base, i16 zeroext %p) {
112 ; CHECK-LABEL: test_vld1q_z_f32:
113 ; CHECK: @ %bb.0: @ %entry
114 ; CHECK-NEXT: vmsr p0, r1
116 ; CHECK-NEXT: vldrwt.u32 q0, [r0]
119 %0 = bitcast float* %base to <4 x float>*
120 %1 = zext i16 %p to i32
121 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
122 %3 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %2, <4 x float> zeroinitializer)
126 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
128 declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>)
130 define arm_aapcs_vfpcc <16 x i8> @test_vld1q_z_s8(i8* %base, i16 zeroext %p) {
131 ; CHECK-LABEL: test_vld1q_z_s8:
132 ; CHECK: @ %bb.0: @ %entry
133 ; CHECK-NEXT: vmsr p0, r1
135 ; CHECK-NEXT: vldrbt.u8 q0, [r0]
138 %0 = bitcast i8* %base to <16 x i8>*
139 %1 = zext i16 %p to i32
140 %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
141 %3 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %2, <16 x i8> zeroinitializer)
145 declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
147 declare <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>*, i32 immarg, <16 x i1>, <16 x i8>)
149 define arm_aapcs_vfpcc <8 x i16> @test_vld1q_z_s16(i16* %base, i16 zeroext %p) {
150 ; CHECK-LABEL: test_vld1q_z_s16:
151 ; CHECK: @ %bb.0: @ %entry
152 ; CHECK-NEXT: vmsr p0, r1
154 ; CHECK-NEXT: vldrht.u16 q0, [r0]
157 %0 = bitcast i16* %base to <8 x i16>*
158 %1 = zext i16 %p to i32
159 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
160 %3 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %2, <8 x i16> zeroinitializer)
164 declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>)
166 define arm_aapcs_vfpcc <4 x i32> @test_vld1q_z_s32(i32* %base, i16 zeroext %p) {
167 ; CHECK-LABEL: test_vld1q_z_s32:
168 ; CHECK: @ %bb.0: @ %entry
169 ; CHECK-NEXT: vmsr p0, r1
171 ; CHECK-NEXT: vldrwt.u32 q0, [r0]
174 %0 = bitcast i32* %base to <4 x i32>*
175 %1 = zext i16 %p to i32
176 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
177 %3 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %2, <4 x i32> zeroinitializer)
181 declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32 immarg, <4 x i1>, <4 x i32>)
183 define arm_aapcs_vfpcc <16 x i8> @test_vld1q_z_u8(i8* %base, i16 zeroext %p) {
184 ; CHECK-LABEL: test_vld1q_z_u8:
185 ; CHECK: @ %bb.0: @ %entry
186 ; CHECK-NEXT: vmsr p0, r1
188 ; CHECK-NEXT: vldrbt.u8 q0, [r0]
191 %0 = bitcast i8* %base to <16 x i8>*
192 %1 = zext i16 %p to i32
193 %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
194 %3 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %2, <16 x i8> zeroinitializer)
198 define arm_aapcs_vfpcc <8 x i16> @test_vld1q_z_u16(i16* %base, i16 zeroext %p) {
199 ; CHECK-LABEL: test_vld1q_z_u16:
200 ; CHECK: @ %bb.0: @ %entry
201 ; CHECK-NEXT: vmsr p0, r1
203 ; CHECK-NEXT: vldrht.u16 q0, [r0]
206 %0 = bitcast i16* %base to <8 x i16>*
207 %1 = zext i16 %p to i32
208 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
209 %3 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %2, <8 x i16> zeroinitializer)
213 define arm_aapcs_vfpcc <4 x i32> @test_vld1q_z_u32(i32* %base, i16 zeroext %p) {
214 ; CHECK-LABEL: test_vld1q_z_u32:
215 ; CHECK: @ %bb.0: @ %entry
216 ; CHECK-NEXT: vmsr p0, r1
218 ; CHECK-NEXT: vldrwt.u32 q0, [r0]
221 %0 = bitcast i32* %base to <4 x i32>*
222 %1 = zext i16 %p to i32
223 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
224 %3 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %2, <4 x i32> zeroinitializer)
228 define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_s8(i8* %base) {
229 ; CHECK-LABEL: test_vldrbq_s8:
230 ; CHECK: @ %bb.0: @ %entry
231 ; CHECK-NEXT: vldrb.u8 q0, [r0]
234 %0 = bitcast i8* %base to <16 x i8>*
235 %1 = load <16 x i8>, <16 x i8>* %0, align 1
239 define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_s16(i8* %base) {
240 ; CHECK-LABEL: test_vldrbq_s16:
241 ; CHECK: @ %bb.0: @ %entry
242 ; CHECK-NEXT: vldrb.s16 q0, [r0]
245 %0 = bitcast i8* %base to <8 x i8>*
246 %1 = load <8 x i8>, <8 x i8>* %0, align 1
247 %2 = sext <8 x i8> %1 to <8 x i16>
251 define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_s32(i8* %base) {
252 ; CHECK-LABEL: test_vldrbq_s32:
253 ; CHECK: @ %bb.0: @ %entry
254 ; CHECK-NEXT: vldrb.s32 q0, [r0]
257 %0 = bitcast i8* %base to <4 x i8>*
258 %1 = load <4 x i8>, <4 x i8>* %0, align 1
259 %2 = sext <4 x i8> %1 to <4 x i32>
263 define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_u8(i8* %base) {
264 ; CHECK-LABEL: test_vldrbq_u8:
265 ; CHECK: @ %bb.0: @ %entry
266 ; CHECK-NEXT: vldrb.u8 q0, [r0]
269 %0 = bitcast i8* %base to <16 x i8>*
270 %1 = load <16 x i8>, <16 x i8>* %0, align 1
274 define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_u16(i8* %base) {
275 ; CHECK-LABEL: test_vldrbq_u16:
276 ; CHECK: @ %bb.0: @ %entry
277 ; CHECK-NEXT: vldrb.u16 q0, [r0]
280 %0 = bitcast i8* %base to <8 x i8>*
281 %1 = load <8 x i8>, <8 x i8>* %0, align 1
282 %2 = zext <8 x i8> %1 to <8 x i16>
286 define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_u32(i8* %base) {
287 ; CHECK-LABEL: test_vldrbq_u32:
288 ; CHECK: @ %bb.0: @ %entry
289 ; CHECK-NEXT: vldrb.u32 q0, [r0]
292 %0 = bitcast i8* %base to <4 x i8>*
293 %1 = load <4 x i8>, <4 x i8>* %0, align 1
294 %2 = zext <4 x i8> %1 to <4 x i32>
298 define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_z_s8(i8* %base, i16 zeroext %p) {
299 ; CHECK-LABEL: test_vldrbq_z_s8:
300 ; CHECK: @ %bb.0: @ %entry
301 ; CHECK-NEXT: vmsr p0, r1
303 ; CHECK-NEXT: vldrbt.u8 q0, [r0]
306 %0 = bitcast i8* %base to <16 x i8>*
307 %1 = zext i16 %p to i32
308 %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
309 %3 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %2, <16 x i8> zeroinitializer)
313 define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_z_s16(i8* %base, i16 zeroext %p) {
314 ; CHECK-LABEL: test_vldrbq_z_s16:
315 ; CHECK: @ %bb.0: @ %entry
316 ; CHECK-NEXT: vmsr p0, r1
318 ; CHECK-NEXT: vldrbt.s16 q0, [r0]
321 %0 = bitcast i8* %base to <8 x i8>*
322 %1 = zext i16 %p to i32
323 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
324 %3 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %2, <8 x i8> zeroinitializer)
325 %4 = sext <8 x i8> %3 to <8 x i16>
329 declare <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>*, i32 immarg, <8 x i1>, <8 x i8>)
331 define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_z_s32(i8* %base, i16 zeroext %p) {
332 ; CHECK-LABEL: test_vldrbq_z_s32:
333 ; CHECK: @ %bb.0: @ %entry
334 ; CHECK-NEXT: vmsr p0, r1
336 ; CHECK-NEXT: vldrbt.s32 q0, [r0]
339 %0 = bitcast i8* %base to <4 x i8>*
340 %1 = zext i16 %p to i32
341 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
342 %3 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %2, <4 x i8> zeroinitializer)
343 %4 = sext <4 x i8> %3 to <4 x i32>
347 declare <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>*, i32 immarg, <4 x i1>, <4 x i8>)
349 define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_z_u8(i8* %base, i16 zeroext %p) {
350 ; CHECK-LABEL: test_vldrbq_z_u8:
351 ; CHECK: @ %bb.0: @ %entry
352 ; CHECK-NEXT: vmsr p0, r1
354 ; CHECK-NEXT: vldrbt.u8 q0, [r0]
357 %0 = bitcast i8* %base to <16 x i8>*
358 %1 = zext i16 %p to i32
359 %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
360 %3 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* %0, i32 1, <16 x i1> %2, <16 x i8> zeroinitializer)
364 define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_z_u16(i8* %base, i16 zeroext %p) {
365 ; CHECK-LABEL: test_vldrbq_z_u16:
366 ; CHECK: @ %bb.0: @ %entry
367 ; CHECK-NEXT: vmsr p0, r1
369 ; CHECK-NEXT: vldrbt.u16 q0, [r0]
372 %0 = bitcast i8* %base to <8 x i8>*
373 %1 = zext i16 %p to i32
374 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
375 %3 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* %0, i32 1, <8 x i1> %2, <8 x i8> zeroinitializer)
376 %4 = zext <8 x i8> %3 to <8 x i16>
380 define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_z_u32(i8* %base, i16 zeroext %p) {
381 ; CHECK-LABEL: test_vldrbq_z_u32:
382 ; CHECK: @ %bb.0: @ %entry
383 ; CHECK-NEXT: vmsr p0, r1
385 ; CHECK-NEXT: vldrbt.u32 q0, [r0]
388 %0 = bitcast i8* %base to <4 x i8>*
389 %1 = zext i16 %p to i32
390 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
391 %3 = call <4 x i8> @llvm.masked.load.v4i8.p0v4i8(<4 x i8>* %0, i32 1, <4 x i1> %2, <4 x i8> zeroinitializer)
392 %4 = zext <4 x i8> %3 to <4 x i32>
396 define arm_aapcs_vfpcc <8 x half> @test_vldrhq_f16(half* %base) {
397 ; CHECK-LABEL: test_vldrhq_f16:
398 ; CHECK: @ %bb.0: @ %entry
399 ; CHECK-NEXT: vldrh.u16 q0, [r0]
402 %0 = bitcast half* %base to <8 x half>*
403 %1 = load <8 x half>, <8 x half>* %0, align 2
407 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_s16(i16* %base) {
408 ; CHECK-LABEL: test_vldrhq_s16:
409 ; CHECK: @ %bb.0: @ %entry
410 ; CHECK-NEXT: vldrh.u16 q0, [r0]
413 %0 = bitcast i16* %base to <8 x i16>*
414 %1 = load <8 x i16>, <8 x i16>* %0, align 2
418 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_s32(i16* %base) {
419 ; CHECK-LABEL: test_vldrhq_s32:
420 ; CHECK: @ %bb.0: @ %entry
421 ; CHECK-NEXT: vldrh.s32 q0, [r0]
424 %0 = bitcast i16* %base to <4 x i16>*
425 %1 = load <4 x i16>, <4 x i16>* %0, align 2
426 %2 = sext <4 x i16> %1 to <4 x i32>
430 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_u16(i16* %base) {
431 ; CHECK-LABEL: test_vldrhq_u16:
432 ; CHECK: @ %bb.0: @ %entry
433 ; CHECK-NEXT: vldrh.u16 q0, [r0]
436 %0 = bitcast i16* %base to <8 x i16>*
437 %1 = load <8 x i16>, <8 x i16>* %0, align 2
441 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_u32(i16* %base) {
442 ; CHECK-LABEL: test_vldrhq_u32:
443 ; CHECK: @ %bb.0: @ %entry
444 ; CHECK-NEXT: vldrh.u32 q0, [r0]
447 %0 = bitcast i16* %base to <4 x i16>*
448 %1 = load <4 x i16>, <4 x i16>* %0, align 2
449 %2 = zext <4 x i16> %1 to <4 x i32>
453 define arm_aapcs_vfpcc <8 x half> @test_vldrhq_z_f16(half* %base, i16 zeroext %p) {
454 ; CHECK-LABEL: test_vldrhq_z_f16:
455 ; CHECK: @ %bb.0: @ %entry
456 ; CHECK-NEXT: vmsr p0, r1
458 ; CHECK-NEXT: vldrht.u16 q0, [r0]
461 %0 = bitcast half* %base to <8 x half>*
462 %1 = zext i16 %p to i32
463 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
464 %3 = call <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %0, i32 2, <8 x i1> %2, <8 x half> zeroinitializer)
468 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_z_s16(i16* %base, i16 zeroext %p) {
469 ; CHECK-LABEL: test_vldrhq_z_s16:
470 ; CHECK: @ %bb.0: @ %entry
471 ; CHECK-NEXT: vmsr p0, r1
473 ; CHECK-NEXT: vldrht.u16 q0, [r0]
476 %0 = bitcast i16* %base to <8 x i16>*
477 %1 = zext i16 %p to i32
478 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
479 %3 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %2, <8 x i16> zeroinitializer)
483 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_z_s32(i16* %base, i16 zeroext %p) {
484 ; CHECK-LABEL: test_vldrhq_z_s32:
485 ; CHECK: @ %bb.0: @ %entry
486 ; CHECK-NEXT: vmsr p0, r1
488 ; CHECK-NEXT: vldrht.s32 q0, [r0]
491 %0 = bitcast i16* %base to <4 x i16>*
492 %1 = zext i16 %p to i32
493 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
494 %3 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %2, <4 x i16> zeroinitializer)
495 %4 = sext <4 x i16> %3 to <4 x i32>
499 declare <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>*, i32 immarg, <4 x i1>, <4 x i16>)
501 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_z_u16(i16* %base, i16 zeroext %p) {
502 ; CHECK-LABEL: test_vldrhq_z_u16:
503 ; CHECK: @ %bb.0: @ %entry
504 ; CHECK-NEXT: vmsr p0, r1
506 ; CHECK-NEXT: vldrht.u16 q0, [r0]
509 %0 = bitcast i16* %base to <8 x i16>*
510 %1 = zext i16 %p to i32
511 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
512 %3 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %0, i32 2, <8 x i1> %2, <8 x i16> zeroinitializer)
516 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_z_u32(i16* %base, i16 zeroext %p) {
517 ; CHECK-LABEL: test_vldrhq_z_u32:
518 ; CHECK: @ %bb.0: @ %entry
519 ; CHECK-NEXT: vmsr p0, r1
521 ; CHECK-NEXT: vldrht.u32 q0, [r0]
524 %0 = bitcast i16* %base to <4 x i16>*
525 %1 = zext i16 %p to i32
526 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
527 %3 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* %0, i32 2, <4 x i1> %2, <4 x i16> zeroinitializer)
528 %4 = zext <4 x i16> %3 to <4 x i32>
532 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_f32(float* %base) {
533 ; CHECK-LABEL: test_vldrwq_f32:
534 ; CHECK: @ %bb.0: @ %entry
535 ; CHECK-NEXT: vldrw.u32 q0, [r0]
538 %0 = bitcast float* %base to <4 x float>*
539 %1 = load <4 x float>, <4 x float>* %0, align 4
543 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_s32(i32* %base) {
544 ; CHECK-LABEL: test_vldrwq_s32:
545 ; CHECK: @ %bb.0: @ %entry
546 ; CHECK-NEXT: vldrw.u32 q0, [r0]
549 %0 = bitcast i32* %base to <4 x i32>*
550 %1 = load <4 x i32>, <4 x i32>* %0, align 4
554 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_u32(i32* %base) {
555 ; CHECK-LABEL: test_vldrwq_u32:
556 ; CHECK: @ %bb.0: @ %entry
557 ; CHECK-NEXT: vldrw.u32 q0, [r0]
560 %0 = bitcast i32* %base to <4 x i32>*
561 %1 = load <4 x i32>, <4 x i32>* %0, align 4
565 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_z_f32(float* %base, i16 zeroext %p) {
566 ; CHECK-LABEL: test_vldrwq_z_f32:
567 ; CHECK: @ %bb.0: @ %entry
568 ; CHECK-NEXT: vmsr p0, r1
570 ; CHECK-NEXT: vldrwt.u32 q0, [r0]
573 %0 = bitcast float* %base to <4 x float>*
574 %1 = zext i16 %p to i32
575 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
576 %3 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %0, i32 4, <4 x i1> %2, <4 x float> zeroinitializer)
580 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_z_s32(i32* %base, i16 zeroext %p) {
581 ; CHECK-LABEL: test_vldrwq_z_s32:
582 ; CHECK: @ %bb.0: @ %entry
583 ; CHECK-NEXT: vmsr p0, r1
585 ; CHECK-NEXT: vldrwt.u32 q0, [r0]
588 %0 = bitcast i32* %base to <4 x i32>*
589 %1 = zext i16 %p to i32
590 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
591 %3 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %2, <4 x i32> zeroinitializer)
595 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_z_u32(i32* %base, i16 zeroext %p) {
596 ; CHECK-LABEL: test_vldrwq_z_u32:
597 ; CHECK: @ %bb.0: @ %entry
598 ; CHECK-NEXT: vmsr p0, r1
600 ; CHECK-NEXT: vldrwt.u32 q0, [r0]
603 %0 = bitcast i32* %base to <4 x i32>*
604 %1 = zext i16 %p to i32
605 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
606 %3 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %0, i32 4, <4 x i1> %2, <4 x i32> zeroinitializer)
610 define arm_aapcs_vfpcc void @test_vst1q_f16(half* %base, <8 x half> %value) {
611 ; CHECK-LABEL: test_vst1q_f16:
612 ; CHECK: @ %bb.0: @ %entry
613 ; CHECK-NEXT: vstrh.16 q0, [r0]
616 %0 = bitcast half* %base to <8 x half>*
617 store <8 x half> %value, <8 x half>* %0, align 2
621 define arm_aapcs_vfpcc void @test_vst1q_f32(float* %base, <4 x float> %value) {
622 ; CHECK-LABEL: test_vst1q_f32:
623 ; CHECK: @ %bb.0: @ %entry
624 ; CHECK-NEXT: vstrw.32 q0, [r0]
627 %0 = bitcast float* %base to <4 x float>*
628 store <4 x float> %value, <4 x float>* %0, align 4
632 define arm_aapcs_vfpcc void @test_vst1q_s8(i8* %base, <16 x i8> %value) {
633 ; CHECK-LABEL: test_vst1q_s8:
634 ; CHECK: @ %bb.0: @ %entry
635 ; CHECK-NEXT: vstrb.8 q0, [r0]
638 %0 = bitcast i8* %base to <16 x i8>*
639 store <16 x i8> %value, <16 x i8>* %0, align 1
643 define arm_aapcs_vfpcc void @test_vst1q_s16(i16* %base, <8 x i16> %value) {
644 ; CHECK-LABEL: test_vst1q_s16:
645 ; CHECK: @ %bb.0: @ %entry
646 ; CHECK-NEXT: vstrh.16 q0, [r0]
649 %0 = bitcast i16* %base to <8 x i16>*
650 store <8 x i16> %value, <8 x i16>* %0, align 2
654 define arm_aapcs_vfpcc void @test_vst1q_s32(i32* %base, <4 x i32> %value) {
655 ; CHECK-LABEL: test_vst1q_s32:
656 ; CHECK: @ %bb.0: @ %entry
657 ; CHECK-NEXT: vstrw.32 q0, [r0]
660 %0 = bitcast i32* %base to <4 x i32>*
661 store <4 x i32> %value, <4 x i32>* %0, align 4
665 define arm_aapcs_vfpcc void @test_vst1q_u8(i8* %base, <16 x i8> %value) {
666 ; CHECK-LABEL: test_vst1q_u8:
667 ; CHECK: @ %bb.0: @ %entry
668 ; CHECK-NEXT: vstrb.8 q0, [r0]
671 %0 = bitcast i8* %base to <16 x i8>*
672 store <16 x i8> %value, <16 x i8>* %0, align 1
676 define arm_aapcs_vfpcc void @test_vst1q_u16(i16* %base, <8 x i16> %value) {
677 ; CHECK-LABEL: test_vst1q_u16:
678 ; CHECK: @ %bb.0: @ %entry
679 ; CHECK-NEXT: vstrh.16 q0, [r0]
682 %0 = bitcast i16* %base to <8 x i16>*
683 store <8 x i16> %value, <8 x i16>* %0, align 2
687 define arm_aapcs_vfpcc void @test_vst1q_u32(i32* %base, <4 x i32> %value) {
688 ; CHECK-LABEL: test_vst1q_u32:
689 ; CHECK: @ %bb.0: @ %entry
690 ; CHECK-NEXT: vstrw.32 q0, [r0]
693 %0 = bitcast i32* %base to <4 x i32>*
694 store <4 x i32> %value, <4 x i32>* %0, align 4
698 define arm_aapcs_vfpcc void @test_vst1q_p_f16(half* %base, <8 x half> %value, i16 zeroext %p) {
699 ; CHECK-LABEL: test_vst1q_p_f16:
700 ; CHECK: @ %bb.0: @ %entry
701 ; CHECK-NEXT: vmsr p0, r1
703 ; CHECK-NEXT: vstrht.16 q0, [r0]
706 %0 = bitcast half* %base to <8 x half>*
707 %1 = zext i16 %p to i32
708 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
709 call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %value, <8 x half>* %0, i32 2, <8 x i1> %2)
713 declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32 immarg, <8 x i1>)
715 define arm_aapcs_vfpcc void @test_vst1q_p_f32(float* %base, <4 x float> %value, i16 zeroext %p) {
716 ; CHECK-LABEL: test_vst1q_p_f32:
717 ; CHECK: @ %bb.0: @ %entry
718 ; CHECK-NEXT: vmsr p0, r1
720 ; CHECK-NEXT: vstrwt.32 q0, [r0]
723 %0 = bitcast float* %base to <4 x float>*
724 %1 = zext i16 %p to i32
725 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
726 call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %value, <4 x float>* %0, i32 4, <4 x i1> %2)
730 declare void @llvm.masked.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i32 immarg, <4 x i1>)
732 define arm_aapcs_vfpcc void @test_vst1q_p_s8(i8* %base, <16 x i8> %value, i16 zeroext %p) {
733 ; CHECK-LABEL: test_vst1q_p_s8:
734 ; CHECK: @ %bb.0: @ %entry
735 ; CHECK-NEXT: vmsr p0, r1
737 ; CHECK-NEXT: vstrbt.8 q0, [r0]
740 %0 = bitcast i8* %base to <16 x i8>*
741 %1 = zext i16 %p to i32
742 %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
743 call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %value, <16 x i8>* %0, i32 1, <16 x i1> %2)
747 declare void @llvm.masked.store.v16i8.p0v16i8(<16 x i8>, <16 x i8>*, i32 immarg, <16 x i1>)
749 define arm_aapcs_vfpcc void @test_vst1q_p_s16(i16* %base, <8 x i16> %value, i16 zeroext %p) {
750 ; CHECK-LABEL: test_vst1q_p_s16:
751 ; CHECK: @ %bb.0: @ %entry
752 ; CHECK-NEXT: vmsr p0, r1
754 ; CHECK-NEXT: vstrht.16 q0, [r0]
757 %0 = bitcast i16* %base to <8 x i16>*
758 %1 = zext i16 %p to i32
759 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
760 call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %value, <8 x i16>* %0, i32 2, <8 x i1> %2)
764 declare void @llvm.masked.store.v8i16.p0v8i16(<8 x i16>, <8 x i16>*, i32 immarg, <8 x i1>)
766 define arm_aapcs_vfpcc void @test_vst1q_p_s32(i32* %base, <4 x i32> %value, i16 zeroext %p) {
767 ; CHECK-LABEL: test_vst1q_p_s32:
768 ; CHECK: @ %bb.0: @ %entry
769 ; CHECK-NEXT: vmsr p0, r1
771 ; CHECK-NEXT: vstrwt.32 q0, [r0]
774 %0 = bitcast i32* %base to <4 x i32>*
775 %1 = zext i16 %p to i32
776 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
777 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %value, <4 x i32>* %0, i32 4, <4 x i1> %2)
781 declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32 immarg, <4 x i1>)
783 define arm_aapcs_vfpcc void @test_vst1q_p_u8(i8* %base, <16 x i8> %value, i16 zeroext %p) {
784 ; CHECK-LABEL: test_vst1q_p_u8:
785 ; CHECK: @ %bb.0: @ %entry
786 ; CHECK-NEXT: vmsr p0, r1
788 ; CHECK-NEXT: vstrbt.8 q0, [r0]
791 %0 = bitcast i8* %base to <16 x i8>*
792 %1 = zext i16 %p to i32
793 %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
794 call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %value, <16 x i8>* %0, i32 1, <16 x i1> %2)
798 define arm_aapcs_vfpcc void @test_vst1q_p_u16(i16* %base, <8 x i16> %value, i16 zeroext %p) {
799 ; CHECK-LABEL: test_vst1q_p_u16:
800 ; CHECK: @ %bb.0: @ %entry
801 ; CHECK-NEXT: vmsr p0, r1
803 ; CHECK-NEXT: vstrht.16 q0, [r0]
806 %0 = bitcast i16* %base to <8 x i16>*
807 %1 = zext i16 %p to i32
808 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
809 call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %value, <8 x i16>* %0, i32 2, <8 x i1> %2)
813 define arm_aapcs_vfpcc void @test_vst1q_p_u32(i32* %base, <4 x i32> %value, i16 zeroext %p) {
814 ; CHECK-LABEL: test_vst1q_p_u32:
815 ; CHECK: @ %bb.0: @ %entry
816 ; CHECK-NEXT: vmsr p0, r1
818 ; CHECK-NEXT: vstrwt.32 q0, [r0]
821 %0 = bitcast i32* %base to <4 x i32>*
822 %1 = zext i16 %p to i32
823 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
824 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %value, <4 x i32>* %0, i32 4, <4 x i1> %2)
828 define arm_aapcs_vfpcc void @test_vstrbq_s8(i8* %base, <16 x i8> %value) {
829 ; CHECK-LABEL: test_vstrbq_s8:
830 ; CHECK: @ %bb.0: @ %entry
831 ; CHECK-NEXT: vstrb.8 q0, [r0]
834 %0 = bitcast i8* %base to <16 x i8>*
835 store <16 x i8> %value, <16 x i8>* %0, align 1
839 define arm_aapcs_vfpcc void @test_vstrbq_s16(i8* %base, <8 x i16> %value) {
840 ; CHECK-LABEL: test_vstrbq_s16:
841 ; CHECK: @ %bb.0: @ %entry
842 ; CHECK-NEXT: vstrb.16 q0, [r0]
845 %0 = trunc <8 x i16> %value to <8 x i8>
846 %1 = bitcast i8* %base to <8 x i8>*
847 store <8 x i8> %0, <8 x i8>* %1, align 1
851 define arm_aapcs_vfpcc void @test_vstrbq_s32(i8* %base, <4 x i32> %value) {
852 ; CHECK-LABEL: test_vstrbq_s32:
853 ; CHECK: @ %bb.0: @ %entry
854 ; CHECK-NEXT: vstrb.32 q0, [r0]
857 %0 = trunc <4 x i32> %value to <4 x i8>
858 %1 = bitcast i8* %base to <4 x i8>*
859 store <4 x i8> %0, <4 x i8>* %1, align 1
863 define arm_aapcs_vfpcc void @test_vstrbq_u8(i8* %base, <16 x i8> %value) {
864 ; CHECK-LABEL: test_vstrbq_u8:
865 ; CHECK: @ %bb.0: @ %entry
866 ; CHECK-NEXT: vstrb.8 q0, [r0]
869 %0 = bitcast i8* %base to <16 x i8>*
870 store <16 x i8> %value, <16 x i8>* %0, align 1
874 define arm_aapcs_vfpcc void @test_vstrbq_u16(i8* %base, <8 x i16> %value) {
875 ; CHECK-LABEL: test_vstrbq_u16:
876 ; CHECK: @ %bb.0: @ %entry
877 ; CHECK-NEXT: vstrb.16 q0, [r0]
880 %0 = trunc <8 x i16> %value to <8 x i8>
881 %1 = bitcast i8* %base to <8 x i8>*
882 store <8 x i8> %0, <8 x i8>* %1, align 1
886 define arm_aapcs_vfpcc void @test_vstrbq_u32(i8* %base, <4 x i32> %value) {
887 ; CHECK-LABEL: test_vstrbq_u32:
888 ; CHECK: @ %bb.0: @ %entry
889 ; CHECK-NEXT: vstrb.32 q0, [r0]
892 %0 = trunc <4 x i32> %value to <4 x i8>
893 %1 = bitcast i8* %base to <4 x i8>*
894 store <4 x i8> %0, <4 x i8>* %1, align 1
898 define arm_aapcs_vfpcc void @test_vstrbq_p_s8(i8* %base, <16 x i8> %value, i16 zeroext %p) {
899 ; CHECK-LABEL: test_vstrbq_p_s8:
900 ; CHECK: @ %bb.0: @ %entry
901 ; CHECK-NEXT: vmsr p0, r1
903 ; CHECK-NEXT: vstrbt.8 q0, [r0]
906 %0 = bitcast i8* %base to <16 x i8>*
907 %1 = zext i16 %p to i32
908 %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
909 call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %value, <16 x i8>* %0, i32 1, <16 x i1> %2)
913 define arm_aapcs_vfpcc void @test_vstrbq_p_s16(i8* %base, <8 x i16> %value, i16 zeroext %p) {
914 ; CHECK-LABEL: test_vstrbq_p_s16:
915 ; CHECK: @ %bb.0: @ %entry
916 ; CHECK-NEXT: vmsr p0, r1
918 ; CHECK-NEXT: vstrbt.16 q0, [r0]
921 %0 = trunc <8 x i16> %value to <8 x i8>
922 %1 = bitcast i8* %base to <8 x i8>*
923 %2 = zext i16 %p to i32
924 %3 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
925 call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %0, <8 x i8>* %1, i32 1, <8 x i1> %3)
929 declare void @llvm.masked.store.v8i8.p0v8i8(<8 x i8>, <8 x i8>*, i32 immarg, <8 x i1>)
931 define arm_aapcs_vfpcc void @test_vstrbq_p_s32(i8* %base, <4 x i32> %value, i16 zeroext %p) {
932 ; CHECK-LABEL: test_vstrbq_p_s32:
933 ; CHECK: @ %bb.0: @ %entry
934 ; CHECK-NEXT: vmsr p0, r1
936 ; CHECK-NEXT: vstrbt.32 q0, [r0]
939 %0 = trunc <4 x i32> %value to <4 x i8>
940 %1 = bitcast i8* %base to <4 x i8>*
941 %2 = zext i16 %p to i32
942 %3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
943 call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %0, <4 x i8>* %1, i32 1, <4 x i1> %3)
947 declare void @llvm.masked.store.v4i8.p0v4i8(<4 x i8>, <4 x i8>*, i32 immarg, <4 x i1>)
949 define arm_aapcs_vfpcc void @test_vstrbq_p_u8(i8* %base, <16 x i8> %value, i16 zeroext %p) {
950 ; CHECK-LABEL: test_vstrbq_p_u8:
951 ; CHECK: @ %bb.0: @ %entry
952 ; CHECK-NEXT: vmsr p0, r1
954 ; CHECK-NEXT: vstrbt.8 q0, [r0]
957 %0 = bitcast i8* %base to <16 x i8>*
958 %1 = zext i16 %p to i32
959 %2 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
960 call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> %value, <16 x i8>* %0, i32 1, <16 x i1> %2)
964 define arm_aapcs_vfpcc void @test_vstrbq_p_u16(i8* %base, <8 x i16> %value, i16 zeroext %p) {
965 ; CHECK-LABEL: test_vstrbq_p_u16:
966 ; CHECK: @ %bb.0: @ %entry
967 ; CHECK-NEXT: vmsr p0, r1
969 ; CHECK-NEXT: vstrbt.16 q0, [r0]
972 %0 = trunc <8 x i16> %value to <8 x i8>
973 %1 = bitcast i8* %base to <8 x i8>*
974 %2 = zext i16 %p to i32
975 %3 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
976 call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> %0, <8 x i8>* %1, i32 1, <8 x i1> %3)
980 define arm_aapcs_vfpcc void @test_vstrbq_p_u32(i8* %base, <4 x i32> %value, i16 zeroext %p) {
981 ; CHECK-LABEL: test_vstrbq_p_u32:
982 ; CHECK: @ %bb.0: @ %entry
983 ; CHECK-NEXT: vmsr p0, r1
985 ; CHECK-NEXT: vstrbt.32 q0, [r0]
988 %0 = trunc <4 x i32> %value to <4 x i8>
989 %1 = bitcast i8* %base to <4 x i8>*
990 %2 = zext i16 %p to i32
991 %3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
992 call void @llvm.masked.store.v4i8.p0v4i8(<4 x i8> %0, <4 x i8>* %1, i32 1, <4 x i1> %3)
996 define arm_aapcs_vfpcc void @test_vstrhq_f16(half* %base, <8 x half> %value) {
997 ; CHECK-LABEL: test_vstrhq_f16:
998 ; CHECK: @ %bb.0: @ %entry
999 ; CHECK-NEXT: vstrh.16 q0, [r0]
1002 %0 = bitcast half* %base to <8 x half>*
1003 store <8 x half> %value, <8 x half>* %0, align 2
1007 define arm_aapcs_vfpcc void @test_vstrhq_s16(i16* %base, <8 x i16> %value) {
1008 ; CHECK-LABEL: test_vstrhq_s16:
1009 ; CHECK: @ %bb.0: @ %entry
1010 ; CHECK-NEXT: vstrh.16 q0, [r0]
1013 %0 = bitcast i16* %base to <8 x i16>*
1014 store <8 x i16> %value, <8 x i16>* %0, align 2
1018 define arm_aapcs_vfpcc void @test_vstrhq_s32(i16* %base, <4 x i32> %value) {
1019 ; CHECK-LABEL: test_vstrhq_s32:
1020 ; CHECK: @ %bb.0: @ %entry
1021 ; CHECK-NEXT: vstrh.32 q0, [r0]
1024 %0 = trunc <4 x i32> %value to <4 x i16>
1025 %1 = bitcast i16* %base to <4 x i16>*
1026 store <4 x i16> %0, <4 x i16>* %1, align 2
1030 define arm_aapcs_vfpcc void @test_vstrhq_u16(i16* %base, <8 x i16> %value) {
1031 ; CHECK-LABEL: test_vstrhq_u16:
1032 ; CHECK: @ %bb.0: @ %entry
1033 ; CHECK-NEXT: vstrh.16 q0, [r0]
1036 %0 = bitcast i16* %base to <8 x i16>*
1037 store <8 x i16> %value, <8 x i16>* %0, align 2
1041 define arm_aapcs_vfpcc void @test_vstrhq_u32(i16* %base, <4 x i32> %value) {
1042 ; CHECK-LABEL: test_vstrhq_u32:
1043 ; CHECK: @ %bb.0: @ %entry
1044 ; CHECK-NEXT: vstrh.32 q0, [r0]
1047 %0 = trunc <4 x i32> %value to <4 x i16>
1048 %1 = bitcast i16* %base to <4 x i16>*
1049 store <4 x i16> %0, <4 x i16>* %1, align 2
1053 define arm_aapcs_vfpcc void @test_vstrhq_p_f16(half* %base, <8 x half> %value, i16 zeroext %p) {
1054 ; CHECK-LABEL: test_vstrhq_p_f16:
1055 ; CHECK: @ %bb.0: @ %entry
1056 ; CHECK-NEXT: vmsr p0, r1
1058 ; CHECK-NEXT: vstrht.16 q0, [r0]
1061 %0 = bitcast half* %base to <8 x half>*
1062 %1 = zext i16 %p to i32
1063 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
1064 call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %value, <8 x half>* %0, i32 2, <8 x i1> %2)
1068 define arm_aapcs_vfpcc void @test_vstrhq_p_s16(i16* %base, <8 x i16> %value, i16 zeroext %p) {
1069 ; CHECK-LABEL: test_vstrhq_p_s16:
1070 ; CHECK: @ %bb.0: @ %entry
1071 ; CHECK-NEXT: vmsr p0, r1
1073 ; CHECK-NEXT: vstrht.16 q0, [r0]
1076 %0 = bitcast i16* %base to <8 x i16>*
1077 %1 = zext i16 %p to i32
1078 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
1079 call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %value, <8 x i16>* %0, i32 2, <8 x i1> %2)
1083 define arm_aapcs_vfpcc void @test_vstrhq_p_s32(i16* %base, <4 x i32> %value, i16 zeroext %p) {
1084 ; CHECK-LABEL: test_vstrhq_p_s32:
1085 ; CHECK: @ %bb.0: @ %entry
1086 ; CHECK-NEXT: vmsr p0, r1
1088 ; CHECK-NEXT: vstrht.32 q0, [r0]
1091 %0 = trunc <4 x i32> %value to <4 x i16>
1092 %1 = bitcast i16* %base to <4 x i16>*
1093 %2 = zext i16 %p to i32
1094 %3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
1095 call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %0, <4 x i16>* %1, i32 2, <4 x i1> %3)
1099 declare void @llvm.masked.store.v4i16.p0v4i16(<4 x i16>, <4 x i16>*, i32 immarg, <4 x i1>)
1101 define arm_aapcs_vfpcc void @test_vstrhq_p_u16(i16* %base, <8 x i16> %value, i16 zeroext %p) {
1102 ; CHECK-LABEL: test_vstrhq_p_u16:
1103 ; CHECK: @ %bb.0: @ %entry
1104 ; CHECK-NEXT: vmsr p0, r1
1106 ; CHECK-NEXT: vstrht.16 q0, [r0]
1109 %0 = bitcast i16* %base to <8 x i16>*
1110 %1 = zext i16 %p to i32
1111 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
1112 call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> %value, <8 x i16>* %0, i32 2, <8 x i1> %2)
1116 define arm_aapcs_vfpcc void @test_vstrhq_p_u32(i16* %base, <4 x i32> %value, i16 zeroext %p) {
1117 ; CHECK-LABEL: test_vstrhq_p_u32:
1118 ; CHECK: @ %bb.0: @ %entry
1119 ; CHECK-NEXT: vmsr p0, r1
1121 ; CHECK-NEXT: vstrht.32 q0, [r0]
1124 %0 = trunc <4 x i32> %value to <4 x i16>
1125 %1 = bitcast i16* %base to <4 x i16>*
1126 %2 = zext i16 %p to i32
1127 %3 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
1128 call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> %0, <4 x i16>* %1, i32 2, <4 x i1> %3)
1132 define arm_aapcs_vfpcc void @test_vstrwq_f32(float* %base, <4 x float> %value) {
1133 ; CHECK-LABEL: test_vstrwq_f32:
1134 ; CHECK: @ %bb.0: @ %entry
1135 ; CHECK-NEXT: vstrw.32 q0, [r0]
1138 %0 = bitcast float* %base to <4 x float>*
1139 store <4 x float> %value, <4 x float>* %0, align 4
1143 define arm_aapcs_vfpcc void @test_vstrwq_s32(i32* %base, <4 x i32> %value) {
1144 ; CHECK-LABEL: test_vstrwq_s32:
1145 ; CHECK: @ %bb.0: @ %entry
1146 ; CHECK-NEXT: vstrw.32 q0, [r0]
1149 %0 = bitcast i32* %base to <4 x i32>*
1150 store <4 x i32> %value, <4 x i32>* %0, align 4
1154 define arm_aapcs_vfpcc void @test_vstrwq_u32(i32* %base, <4 x i32> %value) {
1155 ; CHECK-LABEL: test_vstrwq_u32:
1156 ; CHECK: @ %bb.0: @ %entry
1157 ; CHECK-NEXT: vstrw.32 q0, [r0]
1160 %0 = bitcast i32* %base to <4 x i32>*
1161 store <4 x i32> %value, <4 x i32>* %0, align 4
1165 define arm_aapcs_vfpcc void @test_vstrwq_p_f32(float* %base, <4 x float> %value, i16 zeroext %p) {
1166 ; CHECK-LABEL: test_vstrwq_p_f32:
1167 ; CHECK: @ %bb.0: @ %entry
1168 ; CHECK-NEXT: vmsr p0, r1
1170 ; CHECK-NEXT: vstrwt.32 q0, [r0]
1173 %0 = bitcast float* %base to <4 x float>*
1174 %1 = zext i16 %p to i32
1175 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1176 call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %value, <4 x float>* %0, i32 4, <4 x i1> %2)
1180 define arm_aapcs_vfpcc void @test_vstrwq_p_s32(i32* %base, <4 x i32> %value, i16 zeroext %p) {
1181 ; CHECK-LABEL: test_vstrwq_p_s32:
1182 ; CHECK: @ %bb.0: @ %entry
1183 ; CHECK-NEXT: vmsr p0, r1
1185 ; CHECK-NEXT: vstrwt.32 q0, [r0]
1188 %0 = bitcast i32* %base to <4 x i32>*
1189 %1 = zext i16 %p to i32
1190 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1191 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %value, <4 x i32>* %0, i32 4, <4 x i1> %2)
1195 define arm_aapcs_vfpcc void @test_vstrwq_p_u32(i32* %base, <4 x i32> %value, i16 zeroext %p) {
1196 ; CHECK-LABEL: test_vstrwq_p_u32:
1197 ; CHECK: @ %bb.0: @ %entry
1198 ; CHECK-NEXT: vmsr p0, r1
1200 ; CHECK-NEXT: vstrwt.32 q0, [r0]
1203 %0 = bitcast i32* %base to <4 x i32>*
1204 %1 = zext i16 %p to i32
1205 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1206 call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %value, <4 x i32>* %0, i32 4, <4 x i1> %2)