1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
4 define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_s16(i8* %base, <8 x i16> %offset) {
5 ; CHECK-LABEL: test_vldrbq_gather_offset_s16:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vldrb.s16 q1, [r0, q0]
8 ; CHECK-NEXT: vmov q0, q1
11 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 0)
15 declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8*, <8 x i16>, i32, i32, i32)
17 define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_s32(i8* %base, <4 x i32> %offset) {
18 ; CHECK-LABEL: test_vldrbq_gather_offset_s32:
19 ; CHECK: @ %bb.0: @ %entry
20 ; CHECK-NEXT: vldrb.s32 q1, [r0, q0]
21 ; CHECK-NEXT: vmov q0, q1
24 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 0)
28 declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8*, <4 x i32>, i32, i32, i32)
30 define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_s8(i8* %base, <16 x i8> %offset) {
31 ; CHECK-LABEL: test_vldrbq_gather_offset_s8:
32 ; CHECK: @ %bb.0: @ %entry
33 ; CHECK-NEXT: vldrb.u8 q1, [r0, q0]
34 ; CHECK-NEXT: vmov q0, q1
37 %0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 0)
41 declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8*, <16 x i8>, i32, i32, i32)
43 define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_u16(i8* %base, <8 x i16> %offset) {
44 ; CHECK-LABEL: test_vldrbq_gather_offset_u16:
45 ; CHECK: @ %bb.0: @ %entry
46 ; CHECK-NEXT: vldrb.u16 q1, [r0, q0]
47 ; CHECK-NEXT: vmov q0, q1
50 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i8.v8i16(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 1)
54 define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_u32(i8* %base, <4 x i32> %offset) {
55 ; CHECK-LABEL: test_vldrbq_gather_offset_u32:
56 ; CHECK: @ %bb.0: @ %entry
57 ; CHECK-NEXT: vldrb.u32 q1, [r0, q0]
58 ; CHECK-NEXT: vmov q0, q1
61 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i8.v4i32(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 1)
65 define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_u8(i8* %base, <16 x i8> %offset) {
66 ; CHECK-LABEL: test_vldrbq_gather_offset_u8:
67 ; CHECK: @ %bb.0: @ %entry
68 ; CHECK-NEXT: vldrb.u8 q1, [r0, q0]
69 ; CHECK-NEXT: vmov q0, q1
72 %0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0i8.v16i8(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 1)
76 define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_s16(i8* %base, <8 x i16> %offset, i16 zeroext %p) {
77 ; CHECK-LABEL: test_vldrbq_gather_offset_z_s16:
78 ; CHECK: @ %bb.0: @ %entry
79 ; CHECK-NEXT: vmsr p0, r1
81 ; CHECK-NEXT: vldrbt.s16 q1, [r0, q0]
82 ; CHECK-NEXT: vmov q0, q1
85 %0 = zext i16 %p to i32
86 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
87 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 0, <8 x i1> %1)
91 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
93 declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8*, <8 x i16>, i32, i32, i32, <8 x i1>)
95 define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_s32(i8* %base, <4 x i32> %offset, i16 zeroext %p) {
96 ; CHECK-LABEL: test_vldrbq_gather_offset_z_s32:
97 ; CHECK: @ %bb.0: @ %entry
98 ; CHECK-NEXT: vmsr p0, r1
100 ; CHECK-NEXT: vldrbt.s32 q1, [r0, q0]
101 ; CHECK-NEXT: vmov q0, q1
104 %0 = zext i16 %p to i32
105 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
106 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 0, <4 x i1> %1)
110 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
112 declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8*, <4 x i32>, i32, i32, i32, <4 x i1>)
114 define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_s8(i8* %base, <16 x i8> %offset, i16 zeroext %p) {
115 ; CHECK-LABEL: test_vldrbq_gather_offset_z_s8:
116 ; CHECK: @ %bb.0: @ %entry
117 ; CHECK-NEXT: vmsr p0, r1
119 ; CHECK-NEXT: vldrbt.u8 q1, [r0, q0]
120 ; CHECK-NEXT: vmov q0, q1
123 %0 = zext i16 %p to i32
124 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
125 %2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 0, <16 x i1> %1)
129 declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
131 declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8*, <16 x i8>, i32, i32, i32, <16 x i1>)
133 define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_u16(i8* %base, <8 x i16> %offset, i16 zeroext %p) {
134 ; CHECK-LABEL: test_vldrbq_gather_offset_z_u16:
135 ; CHECK: @ %bb.0: @ %entry
136 ; CHECK-NEXT: vmsr p0, r1
138 ; CHECK-NEXT: vldrbt.u16 q1, [r0, q0]
139 ; CHECK-NEXT: vmov q0, q1
142 %0 = zext i16 %p to i32
143 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
144 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i8.v8i16.v8i1(i8* %base, <8 x i16> %offset, i32 8, i32 0, i32 1, <8 x i1> %1)
148 define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_u32(i8* %base, <4 x i32> %offset, i16 zeroext %p) {
149 ; CHECK-LABEL: test_vldrbq_gather_offset_z_u32:
150 ; CHECK: @ %bb.0: @ %entry
151 ; CHECK-NEXT: vmsr p0, r1
153 ; CHECK-NEXT: vldrbt.u32 q1, [r0, q0]
154 ; CHECK-NEXT: vmov q0, q1
157 %0 = zext i16 %p to i32
158 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
159 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i8.v4i32.v4i1(i8* %base, <4 x i32> %offset, i32 8, i32 0, i32 1, <4 x i1> %1)
163 define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_u8(i8* %base, <16 x i8> %offset, i16 zeroext %p) {
164 ; CHECK-LABEL: test_vldrbq_gather_offset_z_u8:
165 ; CHECK: @ %bb.0: @ %entry
166 ; CHECK-NEXT: vmsr p0, r1
168 ; CHECK-NEXT: vldrbt.u8 q1, [r0, q0]
169 ; CHECK-NEXT: vmov q0, q1
172 %0 = zext i16 %p to i32
173 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
174 %2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, i32 8, i32 0, i32 1, <16 x i1> %1)
178 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_s64(<2 x i64> %addr) {
179 ; CHECK-LABEL: test_vldrdq_gather_base_s64:
180 ; CHECK: @ %bb.0: @ %entry
181 ; CHECK-NEXT: vldrd.u64 q1, [q0, #616]
182 ; CHECK-NEXT: vmov q0, q1
185 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 616)
189 declare <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64>, i32)
191 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_u64(<2 x i64> %addr) {
192 ; CHECK-LABEL: test_vldrdq_gather_base_u64:
193 ; CHECK: @ %bb.0: @ %entry
194 ; CHECK-NEXT: vldrd.u64 q1, [q0, #-336]
195 ; CHECK-NEXT: vmov q0, q1
198 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 -336)
202 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_s64(<2 x i64>* %addr) {
203 ; CHECK-LABEL: test_vldrdq_gather_base_wb_s64:
204 ; CHECK: @ %bb.0: @ %entry
205 ; CHECK-NEXT: vldrw.u32 q1, [r0]
206 ; CHECK-NEXT: vldrd.u64 q0, [q1, #576]!
207 ; CHECK-NEXT: vstrw.32 q1, [r0]
210 %0 = load <2 x i64>, <2 x i64>* %addr, align 8
211 %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 576)
212 %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1
213 store <2 x i64> %2, <2 x i64>* %addr, align 8
214 %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0
218 declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64>, i32)
220 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_u64(<2 x i64>* %addr) {
221 ; CHECK-LABEL: test_vldrdq_gather_base_wb_u64:
222 ; CHECK: @ %bb.0: @ %entry
223 ; CHECK-NEXT: vldrw.u32 q1, [r0]
224 ; CHECK-NEXT: vldrd.u64 q0, [q1, #-328]!
225 ; CHECK-NEXT: vstrw.32 q1, [r0]
228 %0 = load <2 x i64>, <2 x i64>* %addr, align 8
229 %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -328)
230 %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1
231 store <2 x i64> %2, <2 x i64>* %addr, align 8
232 %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0
236 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(<2 x i64>* %addr, i16 zeroext %p) {
237 ; CHECK-LABEL: test_vldrdq_gather_base_wb_z_s64:
238 ; CHECK: @ %bb.0: @ %entry
239 ; CHECK-NEXT: vmsr p0, r1
240 ; CHECK-NEXT: vldrw.u32 q1, [r0]
242 ; CHECK-NEXT: vldrdt.u64 q0, [q1, #664]!
243 ; CHECK-NEXT: vstrw.32 q1, [r0]
246 %0 = load <2 x i64>, <2 x i64>* %addr, align 8
247 %1 = zext i16 %p to i32
248 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
249 %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 664, <4 x i1> %2)
250 %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1
251 store <2 x i64> %4, <2 x i64>* %addr, align 8
252 %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0
256 declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>)
258 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_u64(<2 x i64>* %addr, i16 zeroext %p) {
259 ; CHECK-LABEL: test_vldrdq_gather_base_wb_z_u64:
260 ; CHECK: @ %bb.0: @ %entry
261 ; CHECK-NEXT: vmsr p0, r1
262 ; CHECK-NEXT: vldrw.u32 q1, [r0]
264 ; CHECK-NEXT: vldrdt.u64 q0, [q1, #656]!
265 ; CHECK-NEXT: vstrw.32 q1, [r0]
268 %0 = load <2 x i64>, <2 x i64>* %addr, align 8
269 %1 = zext i16 %p to i32
270 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
271 %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 656, <4 x i1> %2)
272 %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1
273 store <2 x i64> %4, <2 x i64>* %addr, align 8
274 %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0
278 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_s64(<2 x i64> %addr, i16 zeroext %p) {
279 ; CHECK-LABEL: test_vldrdq_gather_base_z_s64:
280 ; CHECK: @ %bb.0: @ %entry
281 ; CHECK-NEXT: vmsr p0, r0
283 ; CHECK-NEXT: vldrdt.u64 q1, [q0, #888]
284 ; CHECK-NEXT: vmov q0, q1
287 %0 = zext i16 %p to i32
288 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
289 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <4 x i1> %1)
293 declare <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>)
295 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_u64(<2 x i64> %addr, i16 zeroext %p) {
296 ; CHECK-LABEL: test_vldrdq_gather_base_z_u64:
297 ; CHECK: @ %bb.0: @ %entry
298 ; CHECK-NEXT: vmsr p0, r0
300 ; CHECK-NEXT: vldrdt.u64 q1, [q0, #-1000]
301 ; CHECK-NEXT: vmov q0, q1
304 %0 = zext i16 %p to i32
305 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
306 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 -1000, <4 x i1> %1)
310 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_s64(i64* %base, <2 x i64> %offset) {
311 ; CHECK-LABEL: test_vldrdq_gather_offset_s64:
312 ; CHECK: @ %bb.0: @ %entry
313 ; CHECK-NEXT: vldrd.u64 q1, [r0, q0]
314 ; CHECK-NEXT: vmov q0, q1
317 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 0)
321 declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64*, <2 x i64>, i32, i32, i32)
323 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_u64(i64* %base, <2 x i64> %offset) {
324 ; CHECK-LABEL: test_vldrdq_gather_offset_u64:
325 ; CHECK: @ %bb.0: @ %entry
326 ; CHECK-NEXT: vldrd.u64 q1, [r0, q0]
327 ; CHECK-NEXT: vmov q0, q1
330 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 1)
334 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_s64(i64* %base, <2 x i64> %offset, i16 zeroext %p) {
335 ; CHECK-LABEL: test_vldrdq_gather_offset_z_s64:
336 ; CHECK: @ %bb.0: @ %entry
337 ; CHECK-NEXT: vmsr p0, r1
339 ; CHECK-NEXT: vldrdt.u64 q1, [r0, q0]
340 ; CHECK-NEXT: vmov q0, q1
343 %0 = zext i16 %p to i32
344 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
345 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 0, <4 x i1> %1)
349 declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64*, <2 x i64>, i32, i32, i32, <4 x i1>)
351 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_u64(i64* %base, <2 x i64> %offset, i16 zeroext %p) {
352 ; CHECK-LABEL: test_vldrdq_gather_offset_z_u64:
353 ; CHECK: @ %bb.0: @ %entry
354 ; CHECK-NEXT: vmsr p0, r1
356 ; CHECK-NEXT: vldrdt.u64 q1, [r0, q0]
357 ; CHECK-NEXT: vmov q0, q1
360 %0 = zext i16 %p to i32
361 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
362 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 0, i32 1, <4 x i1> %1)
366 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_s64(i64* %base, <2 x i64> %offset) {
367 ; CHECK-LABEL: test_vldrdq_gather_shifted_offset_s64:
368 ; CHECK: @ %bb.0: @ %entry
369 ; CHECK-NEXT: vldrd.u64 q1, [r0, q0, uxtw #3]
370 ; CHECK-NEXT: vmov q0, q1
373 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 0)
377 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_u64(i64* %base, <2 x i64> %offset) {
378 ; CHECK-LABEL: test_vldrdq_gather_shifted_offset_u64:
379 ; CHECK: @ %bb.0: @ %entry
380 ; CHECK-NEXT: vldrd.u64 q1, [r0, q0, uxtw #3]
381 ; CHECK-NEXT: vmov q0, q1
384 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0i64.v2i64(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 1)
388 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_s64(i64* %base, <2 x i64> %offset, i16 zeroext %p) {
389 ; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_s64:
390 ; CHECK: @ %bb.0: @ %entry
391 ; CHECK-NEXT: vmsr p0, r1
393 ; CHECK-NEXT: vldrdt.u64 q1, [r0, q0, uxtw #3]
394 ; CHECK-NEXT: vmov q0, q1
397 %0 = zext i16 %p to i32
398 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
399 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 0, <4 x i1> %1)
403 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_u64(i64* %base, <2 x i64> %offset, i16 zeroext %p) {
404 ; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_u64:
405 ; CHECK: @ %bb.0: @ %entry
406 ; CHECK-NEXT: vmsr p0, r1
408 ; CHECK-NEXT: vldrdt.u64 q1, [r0, q0, uxtw #3]
409 ; CHECK-NEXT: vmov q0, q1
412 %0 = zext i16 %p to i32
413 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
414 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, i32 64, i32 3, i32 1, <4 x i1> %1)
418 define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_f16(half* %base, <8 x i16> %offset) {
419 ; CHECK-LABEL: test_vldrhq_gather_offset_f16:
420 ; CHECK: @ %bb.0: @ %entry
421 ; CHECK-NEXT: vldrh.u16 q1, [r0, q0]
422 ; CHECK-NEXT: vmov q0, q1
425 %0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* %base, <8 x i16> %offset, i32 16, i32 0, i32 0)
429 declare <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half*, <8 x i16>, i32, i32, i32)
431 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_s16(i16* %base, <8 x i16> %offset) {
432 ; CHECK-LABEL: test_vldrhq_gather_offset_s16:
433 ; CHECK: @ %bb.0: @ %entry
434 ; CHECK-NEXT: vldrh.u16 q1, [r0, q0]
435 ; CHECK-NEXT: vmov q0, q1
438 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 0)
442 declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16*, <8 x i16>, i32, i32, i32)
444 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_s32(i16* %base, <4 x i32> %offset) {
445 ; CHECK-LABEL: test_vldrhq_gather_offset_s32:
446 ; CHECK: @ %bb.0: @ %entry
447 ; CHECK-NEXT: vldrh.s32 q1, [r0, q0]
448 ; CHECK-NEXT: vmov q0, q1
451 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 0)
455 declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16*, <4 x i32>, i32, i32, i32)
457 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_u16(i16* %base, <8 x i16> %offset) {
458 ; CHECK-LABEL: test_vldrhq_gather_offset_u16:
459 ; CHECK: @ %bb.0: @ %entry
460 ; CHECK-NEXT: vldrh.u16 q1, [r0, q0]
461 ; CHECK-NEXT: vmov q0, q1
464 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 1)
468 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_u32(i16* %base, <4 x i32> %offset) {
469 ; CHECK-LABEL: test_vldrhq_gather_offset_u32:
470 ; CHECK: @ %bb.0: @ %entry
471 ; CHECK-NEXT: vldrh.u32 q1, [r0, q0]
472 ; CHECK-NEXT: vmov q0, q1
475 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 1)
479 define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_z_f16(half* %base, <8 x i16> %offset, i16 zeroext %p) {
480 ; CHECK-LABEL: test_vldrhq_gather_offset_z_f16:
481 ; CHECK: @ %bb.0: @ %entry
482 ; CHECK-NEXT: vmsr p0, r1
484 ; CHECK-NEXT: vldrht.u16 q1, [r0, q0]
485 ; CHECK-NEXT: vmov q0, q1
488 %0 = zext i16 %p to i32
489 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
490 %2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1)
494 declare <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half*, <8 x i16>, i32, i32, i32, <8 x i1>)
496 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_s16(i16* %base, <8 x i16> %offset, i16 zeroext %p) {
497 ; CHECK-LABEL: test_vldrhq_gather_offset_z_s16:
498 ; CHECK: @ %bb.0: @ %entry
499 ; CHECK-NEXT: vmsr p0, r1
501 ; CHECK-NEXT: vldrht.u16 q1, [r0, q0]
502 ; CHECK-NEXT: vmov q0, q1
505 %0 = zext i16 %p to i32
506 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
507 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1)
511 declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16*, <8 x i16>, i32, i32, i32, <8 x i1>)
513 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_s32(i16* %base, <4 x i32> %offset, i16 zeroext %p) {
514 ; CHECK-LABEL: test_vldrhq_gather_offset_z_s32:
515 ; CHECK: @ %bb.0: @ %entry
516 ; CHECK-NEXT: vmsr p0, r1
518 ; CHECK-NEXT: vldrht.s32 q1, [r0, q0]
519 ; CHECK-NEXT: vmov q0, q1
522 %0 = zext i16 %p to i32
523 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
524 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 0, <4 x i1> %1)
528 declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16*, <4 x i32>, i32, i32, i32, <4 x i1>)
530 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_u16(i16* %base, <8 x i16> %offset, i16 zeroext %p) {
531 ; CHECK-LABEL: test_vldrhq_gather_offset_z_u16:
532 ; CHECK: @ %bb.0: @ %entry
533 ; CHECK-NEXT: vmsr p0, r1
535 ; CHECK-NEXT: vldrht.u16 q1, [r0, q0]
536 ; CHECK-NEXT: vmov q0, q1
539 %0 = zext i16 %p to i32
540 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
541 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 0, i32 1, <8 x i1> %1)
545 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_u32(i16* %base, <4 x i32> %offset, i16 zeroext %p) {
546 ; CHECK-LABEL: test_vldrhq_gather_offset_z_u32:
547 ; CHECK: @ %bb.0: @ %entry
548 ; CHECK-NEXT: vmsr p0, r1
550 ; CHECK-NEXT: vldrht.u32 q1, [r0, q0]
551 ; CHECK-NEXT: vmov q0, q1
554 %0 = zext i16 %p to i32
555 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
556 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 0, i32 1, <4 x i1> %1)
560 define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_f16(half* %base, <8 x i16> %offset) {
561 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_f16:
562 ; CHECK: @ %bb.0: @ %entry
563 ; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1]
564 ; CHECK-NEXT: vmov q0, q1
567 %0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0f16.v8i16(half* %base, <8 x i16> %offset, i32 16, i32 1, i32 0)
571 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_s16(i16* %base, <8 x i16> %offset) {
572 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s16:
573 ; CHECK: @ %bb.0: @ %entry
574 ; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1]
575 ; CHECK-NEXT: vmov q0, q1
578 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 0)
582 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_s32(i16* %base, <4 x i32> %offset) {
583 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s32:
584 ; CHECK: @ %bb.0: @ %entry
585 ; CHECK-NEXT: vldrh.s32 q1, [r0, q0, uxtw #1]
586 ; CHECK-NEXT: vmov q0, q1
589 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 0)
593 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_u16(i16* %base, <8 x i16> %offset) {
594 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u16:
595 ; CHECK: @ %bb.0: @ %entry
596 ; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1]
597 ; CHECK-NEXT: vmov q0, q1
600 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0i16.v8i16(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 1)
604 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_u32(i16* %base, <4 x i32> %offset) {
605 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u32:
606 ; CHECK: @ %bb.0: @ %entry
607 ; CHECK-NEXT: vldrh.u32 q1, [r0, q0, uxtw #1]
608 ; CHECK-NEXT: vmov q0, q1
611 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i16.v4i32(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 1)
615 define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_z_f16(half* %base, <8 x i16> %offset, i16 zeroext %p) {
616 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_f16:
617 ; CHECK: @ %bb.0: @ %entry
618 ; CHECK-NEXT: vmsr p0, r1
620 ; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1]
621 ; CHECK-NEXT: vmov q0, q1
624 %0 = zext i16 %p to i32
625 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
626 %2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0f16.v8i16.v8i1(half* %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1)
630 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_s16(i16* %base, <8 x i16> %offset, i16 zeroext %p) {
631 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s16:
632 ; CHECK: @ %bb.0: @ %entry
633 ; CHECK-NEXT: vmsr p0, r1
635 ; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1]
636 ; CHECK-NEXT: vmov q0, q1
639 %0 = zext i16 %p to i32
640 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
641 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1)
645 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_s32(i16* %base, <4 x i32> %offset, i16 zeroext %p) {
646 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s32:
647 ; CHECK: @ %bb.0: @ %entry
648 ; CHECK-NEXT: vmsr p0, r1
650 ; CHECK-NEXT: vldrht.s32 q1, [r0, q0, uxtw #1]
651 ; CHECK-NEXT: vmov q0, q1
654 %0 = zext i16 %p to i32
655 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
656 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 0, <4 x i1> %1)
660 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_u16(i16* %base, <8 x i16> %offset, i16 zeroext %p) {
661 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u16:
662 ; CHECK: @ %bb.0: @ %entry
663 ; CHECK-NEXT: vmsr p0, r1
665 ; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1]
666 ; CHECK-NEXT: vmov q0, q1
669 %0 = zext i16 %p to i32
670 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
671 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, i32 16, i32 1, i32 1, <8 x i1> %1)
675 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_u32(i16* %base, <4 x i32> %offset, i16 zeroext %p) {
676 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u32:
677 ; CHECK: @ %bb.0: @ %entry
678 ; CHECK-NEXT: vmsr p0, r1
680 ; CHECK-NEXT: vldrht.u32 q1, [r0, q0, uxtw #1]
681 ; CHECK-NEXT: vmov q0, q1
684 %0 = zext i16 %p to i32
685 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
686 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i16.v4i32.v4i1(i16* %base, <4 x i32> %offset, i32 16, i32 1, i32 1, <4 x i1> %1)
690 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_f32(<4 x i32> %addr) {
691 ; CHECK-LABEL: test_vldrwq_gather_base_f32:
692 ; CHECK: @ %bb.0: @ %entry
693 ; CHECK-NEXT: vldrw.u32 q1, [q0, #12]
694 ; CHECK-NEXT: vmov q0, q1
697 %0 = call <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32> %addr, i32 12)
701 declare <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32>, i32)
703 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_s32(<4 x i32> %addr) {
704 ; CHECK-LABEL: test_vldrwq_gather_base_s32:
705 ; CHECK: @ %bb.0: @ %entry
706 ; CHECK-NEXT: vldrw.u32 q1, [q0, #400]
707 ; CHECK-NEXT: vmov q0, q1
710 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 400)
714 declare <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32>, i32)
716 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_u32(<4 x i32> %addr) {
717 ; CHECK-LABEL: test_vldrwq_gather_base_u32:
718 ; CHECK: @ %bb.0: @ %entry
719 ; CHECK-NEXT: vldrw.u32 q1, [q0, #284]
720 ; CHECK-NEXT: vmov q0, q1
723 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 284)
727 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_f32(<4 x i32>* %addr) {
728 ; CHECK-LABEL: test_vldrwq_gather_base_wb_f32:
729 ; CHECK: @ %bb.0: @ %entry
730 ; CHECK-NEXT: vldrw.u32 q1, [r0]
731 ; CHECK-NEXT: vldrw.u32 q0, [q1, #-64]!
732 ; CHECK-NEXT: vstrw.32 q1, [r0]
735 %0 = load <4 x i32>, <4 x i32>* %addr, align 8
736 %1 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> %0, i32 -64)
737 %2 = extractvalue { <4 x float>, <4 x i32> } %1, 1
738 store <4 x i32> %2, <4 x i32>* %addr, align 8
739 %3 = extractvalue { <4 x float>, <4 x i32> } %1, 0
743 declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32>, i32)
745 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_s32(<4 x i32>* %addr) {
746 ; CHECK-LABEL: test_vldrwq_gather_base_wb_s32:
747 ; CHECK: @ %bb.0: @ %entry
748 ; CHECK-NEXT: vldrw.u32 q1, [r0]
749 ; CHECK-NEXT: vldrw.u32 q0, [q1, #80]!
750 ; CHECK-NEXT: vstrw.32 q1, [r0]
753 %0 = load <4 x i32>, <4 x i32>* %addr, align 8
754 %1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 80)
755 %2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1
756 store <4 x i32> %2, <4 x i32>* %addr, align 8
757 %3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0
761 declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32>, i32)
763 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_u32(<4 x i32>* %addr) {
764 ; CHECK-LABEL: test_vldrwq_gather_base_wb_u32:
765 ; CHECK: @ %bb.0: @ %entry
766 ; CHECK-NEXT: vldrw.u32 q1, [r0]
767 ; CHECK-NEXT: vldrw.u32 q0, [q1, #480]!
768 ; CHECK-NEXT: vstrw.32 q1, [r0]
771 %0 = load <4 x i32>, <4 x i32>* %addr, align 8
772 %1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 480)
773 %2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1
774 store <4 x i32> %2, <4 x i32>* %addr, align 8
775 %3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0
779 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_z_f32(<4 x i32>* %addr, i16 zeroext %p) {
780 ; CHECK-LABEL: test_vldrwq_gather_base_wb_z_f32:
781 ; CHECK: @ %bb.0: @ %entry
782 ; CHECK-NEXT: vmsr p0, r1
783 ; CHECK-NEXT: vldrw.u32 q1, [r0]
785 ; CHECK-NEXT: vldrwt.u32 q0, [q1, #-352]!
786 ; CHECK-NEXT: vstrw.32 q1, [r0]
789 %0 = load <4 x i32>, <4 x i32>* %addr, align 8
790 %1 = zext i16 %p to i32
791 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
792 %3 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %0, i32 -352, <4 x i1> %2)
793 %4 = extractvalue { <4 x float>, <4 x i32> } %3, 1
794 store <4 x i32> %4, <4 x i32>* %addr, align 8
795 %5 = extractvalue { <4 x float>, <4 x i32> } %3, 0
799 declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
801 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_s32(<4 x i32>* %addr, i16 zeroext %p) {
802 ; CHECK-LABEL: test_vldrwq_gather_base_wb_z_s32:
803 ; CHECK: @ %bb.0: @ %entry
804 ; CHECK-NEXT: vmsr p0, r1
805 ; CHECK-NEXT: vldrw.u32 q1, [r0]
807 ; CHECK-NEXT: vldrwt.u32 q0, [q1, #276]!
808 ; CHECK-NEXT: vstrw.32 q1, [r0]
811 %0 = load <4 x i32>, <4 x i32>* %addr, align 8
812 %1 = zext i16 %p to i32
813 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
814 %3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 276, <4 x i1> %2)
815 %4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1
816 store <4 x i32> %4, <4 x i32>* %addr, align 8
817 %5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0
821 declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
823 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_u32(<4 x i32>* %addr, i16 zeroext %p) {
824 ; CHECK-LABEL: test_vldrwq_gather_base_wb_z_u32:
825 ; CHECK: @ %bb.0: @ %entry
826 ; CHECK-NEXT: vmsr p0, r1
827 ; CHECK-NEXT: vldrw.u32 q1, [r0]
829 ; CHECK-NEXT: vldrwt.u32 q0, [q1, #88]!
830 ; CHECK-NEXT: vstrw.32 q1, [r0]
833 %0 = load <4 x i32>, <4 x i32>* %addr, align 8
834 %1 = zext i16 %p to i32
835 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
836 %3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 88, <4 x i1> %2)
837 %4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1
838 store <4 x i32> %4, <4 x i32>* %addr, align 8
839 %5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0
843 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_z_f32(<4 x i32> %addr, i16 zeroext %p) {
844 ; CHECK-LABEL: test_vldrwq_gather_base_z_f32:
845 ; CHECK: @ %bb.0: @ %entry
846 ; CHECK-NEXT: vmsr p0, r0
848 ; CHECK-NEXT: vldrwt.u32 q1, [q0, #-300]
849 ; CHECK-NEXT: vmov q0, q1
852 %0 = zext i16 %p to i32
853 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
854 %2 = call <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32> %addr, i32 -300, <4 x i1> %1)
858 declare <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
860 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_s32(<4 x i32> %addr, i16 zeroext %p) {
861 ; CHECK-LABEL: test_vldrwq_gather_base_z_s32:
862 ; CHECK: @ %bb.0: @ %entry
863 ; CHECK-NEXT: vmsr p0, r0
865 ; CHECK-NEXT: vldrwt.u32 q1, [q0, #440]
866 ; CHECK-NEXT: vmov q0, q1
869 %0 = zext i16 %p to i32
870 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
871 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 440, <4 x i1> %1)
875 declare <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
877 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_u32(<4 x i32> %addr, i16 zeroext %p) {
878 ; CHECK-LABEL: test_vldrwq_gather_base_z_u32:
879 ; CHECK: @ %bb.0: @ %entry
880 ; CHECK-NEXT: vmsr p0, r0
882 ; CHECK-NEXT: vldrwt.u32 q1, [q0, #300]
883 ; CHECK-NEXT: vmov q0, q1
886 %0 = zext i16 %p to i32
887 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
888 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 300, <4 x i1> %1)
892 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_f32(float* %base, <4 x i32> %offset) {
893 ; CHECK-LABEL: test_vldrwq_gather_offset_f32:
894 ; CHECK: @ %bb.0: @ %entry
895 ; CHECK-NEXT: vldrw.u32 q1, [r0, q0]
896 ; CHECK-NEXT: vmov q0, q1
899 %0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* %base, <4 x i32> %offset, i32 32, i32 0, i32 0)
903 declare <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float*, <4 x i32>, i32, i32, i32)
905 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_s32(i32* %base, <4 x i32> %offset) {
906 ; CHECK-LABEL: test_vldrwq_gather_offset_s32:
907 ; CHECK: @ %bb.0: @ %entry
908 ; CHECK-NEXT: vldrw.u32 q1, [r0, q0]
909 ; CHECK-NEXT: vmov q0, q1
912 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 0)
916 declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32*, <4 x i32>, i32, i32, i32)
918 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_u32(i32* %base, <4 x i32> %offset) {
919 ; CHECK-LABEL: test_vldrwq_gather_offset_u32:
920 ; CHECK: @ %bb.0: @ %entry
921 ; CHECK-NEXT: vldrw.u32 q1, [r0, q0]
922 ; CHECK-NEXT: vmov q0, q1
925 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 1)
929 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_z_f32(float* %base, <4 x i32> %offset, i16 zeroext %p) {
930 ; CHECK-LABEL: test_vldrwq_gather_offset_z_f32:
931 ; CHECK: @ %bb.0: @ %entry
932 ; CHECK-NEXT: vmsr p0, r1
934 ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0]
935 ; CHECK-NEXT: vmov q0, q1
938 %0 = zext i16 %p to i32
939 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
940 %2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1)
944 declare <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float*, <4 x i32>, i32, i32, i32, <4 x i1>)
946 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_s32(i32* %base, <4 x i32> %offset, i16 zeroext %p) {
947 ; CHECK-LABEL: test_vldrwq_gather_offset_z_s32:
948 ; CHECK: @ %bb.0: @ %entry
949 ; CHECK-NEXT: vmsr p0, r1
951 ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0]
952 ; CHECK-NEXT: vmov q0, q1
955 %0 = zext i16 %p to i32
956 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
957 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1)
961 declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32*, <4 x i32>, i32, i32, i32, <4 x i1>)
963 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_u32(i32* %base, <4 x i32> %offset, i16 zeroext %p) {
964 ; CHECK-LABEL: test_vldrwq_gather_offset_z_u32:
965 ; CHECK: @ %bb.0: @ %entry
966 ; CHECK-NEXT: vmsr p0, r1
968 ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0]
969 ; CHECK-NEXT: vmov q0, q1
972 %0 = zext i16 %p to i32
973 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
974 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 0, i32 1, <4 x i1> %1)
978 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_f32(float* %base, <4 x i32> %offset) {
979 ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_f32:
980 ; CHECK: @ %bb.0: @ %entry
981 ; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2]
982 ; CHECK-NEXT: vmov q0, q1
985 %0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0f32.v4i32(float* %base, <4 x i32> %offset, i32 32, i32 2, i32 0)
989 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_s32(i32* %base, <4 x i32> %offset) {
990 ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_s32:
991 ; CHECK: @ %bb.0: @ %entry
992 ; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2]
993 ; CHECK-NEXT: vmov q0, q1
996 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 0)
1000 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_u32(i32* %base, <4 x i32> %offset) {
1001 ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_u32:
1002 ; CHECK: @ %bb.0: @ %entry
1003 ; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2]
1004 ; CHECK-NEXT: vmov q0, q1
1007 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0i32.v4i32(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 1)
1011 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_z_f32(float* %base, <4 x i32> %offset, i16 zeroext %p) {
1012 ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_f32:
1013 ; CHECK: @ %bb.0: @ %entry
1014 ; CHECK-NEXT: vmsr p0, r1
1016 ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2]
1017 ; CHECK-NEXT: vmov q0, q1
1020 %0 = zext i16 %p to i32
1021 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1022 %2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0f32.v4i32.v4i1(float* %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1)
1026 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_s32(i32* %base, <4 x i32> %offset, i16 zeroext %p) {
1027 ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_s32:
1028 ; CHECK: @ %bb.0: @ %entry
1029 ; CHECK-NEXT: vmsr p0, r1
1031 ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2]
1032 ; CHECK-NEXT: vmov q0, q1
1035 %0 = zext i16 %p to i32
1036 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1037 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1)
1041 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_u32(i32* %base, <4 x i32> %offset, i16 zeroext %p) {
1042 ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_u32:
1043 ; CHECK: @ %bb.0: @ %entry
1044 ; CHECK-NEXT: vmsr p0, r1
1046 ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2]
1047 ; CHECK-NEXT: vmov q0, q1
1050 %0 = zext i16 %p to i32
1051 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1052 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, i32 32, i32 2, i32 1, <4 x i1> %1)
1056 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1057 ; CHECK-LABEL: test_vstrbq_scatter_offset_p_s16:
1058 ; CHECK: @ %bb.0: @ %entry
1059 ; CHECK-NEXT: vmsr p0, r1
1061 ; CHECK-NEXT: vstrbt.16 q1, [r0, q0]
1064 %0 = zext i16 %p to i32
1065 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1066 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1)
1070 declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8*, <8 x i16>, <8 x i16>, i32, i32, <8 x i1>)
1072 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1073 ; CHECK-LABEL: test_vstrbq_scatter_offset_p_s32:
1074 ; CHECK: @ %bb.0: @ %entry
1075 ; CHECK-NEXT: vmsr p0, r1
1077 ; CHECK-NEXT: vstrbt.32 q1, [r0, q0]
1080 %0 = zext i16 %p to i32
1081 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1082 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1)
1086 declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>)
1088 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) {
1089 ; CHECK-LABEL: test_vstrbq_scatter_offset_p_s8:
1090 ; CHECK: @ %bb.0: @ %entry
1091 ; CHECK-NEXT: vmsr p0, r1
1093 ; CHECK-NEXT: vstrbt.8 q1, [r0, q0]
1096 %0 = zext i16 %p to i32
1097 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1098 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1)
1102 declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8*, <16 x i8>, <16 x i8>, i32, i32, <16 x i1>)
1104 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1105 ; CHECK-LABEL: test_vstrbq_scatter_offset_p_u16:
1106 ; CHECK: @ %bb.0: @ %entry
1107 ; CHECK-NEXT: vmsr p0, r1
1109 ; CHECK-NEXT: vstrbt.16 q1, [r0, q0]
1112 %0 = zext i16 %p to i32
1113 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1114 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v8i16.v8i16.v8i1(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1)
1118 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1119 ; CHECK-LABEL: test_vstrbq_scatter_offset_p_u32:
1120 ; CHECK: @ %bb.0: @ %entry
1121 ; CHECK-NEXT: vmsr p0, r1
1123 ; CHECK-NEXT: vstrbt.32 q1, [r0, q0]
1126 %0 = zext i16 %p to i32
1127 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1128 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v4i32.v4i32.v4i1(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1)
1132 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) {
1133 ; CHECK-LABEL: test_vstrbq_scatter_offset_p_u8:
1134 ; CHECK: @ %bb.0: @ %entry
1135 ; CHECK-NEXT: vmsr p0, r1
1137 ; CHECK-NEXT: vstrbt.8 q1, [r0, q0]
1140 %0 = zext i16 %p to i32
1141 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1142 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i8.v16i8.v16i8.v16i1(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1)
1146 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s16(i8* %base, <8 x i16> %offset, <8 x i16> %value) {
1147 ; CHECK-LABEL: test_vstrbq_scatter_offset_s16:
1148 ; CHECK: @ %bb.0: @ %entry
1149 ; CHECK-NEXT: vstrb.16 q1, [r0, q0]
1152 call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0)
1156 declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32)
1158 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s32(i8* %base, <4 x i32> %offset, <4 x i32> %value) {
1159 ; CHECK-LABEL: test_vstrbq_scatter_offset_s32:
1160 ; CHECK: @ %bb.0: @ %entry
1161 ; CHECK-NEXT: vstrb.32 q1, [r0, q0]
1164 call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0)
1168 declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32)
1170 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s8(i8* %base, <16 x i8> %offset, <16 x i8> %value) {
1171 ; CHECK-LABEL: test_vstrbq_scatter_offset_s8:
1172 ; CHECK: @ %bb.0: @ %entry
1173 ; CHECK-NEXT: vstrb.8 q1, [r0, q0]
1176 call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0)
1180 declare void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8*, <16 x i8>, <16 x i8>, i32, i32)
1182 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u16(i8* %base, <8 x i16> %offset, <8 x i16> %value) {
1183 ; CHECK-LABEL: test_vstrbq_scatter_offset_u16:
1184 ; CHECK: @ %bb.0: @ %entry
1185 ; CHECK-NEXT: vstrb.16 q1, [r0, q0]
1188 call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v8i16.v8i16(i8* %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0)
1192 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u32(i8* %base, <4 x i32> %offset, <4 x i32> %value) {
1193 ; CHECK-LABEL: test_vstrbq_scatter_offset_u32:
1194 ; CHECK: @ %bb.0: @ %entry
1195 ; CHECK-NEXT: vstrb.32 q1, [r0, q0]
1198 call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v4i32.v4i32(i8* %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0)
1202 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u8(i8* %base, <16 x i8> %offset, <16 x i8> %value) {
1203 ; CHECK-LABEL: test_vstrbq_scatter_offset_u8:
1204 ; CHECK: @ %bb.0: @ %entry
1205 ; CHECK-NEXT: vstrb.8 q1, [r0, q0]
1208 call void @llvm.arm.mve.vstr.scatter.offset.p0i8.v16i8.v16i8(i8* %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0)
1212 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) {
1213 ; CHECK-LABEL: test_vstrdq_scatter_base_p_s64:
1214 ; CHECK: @ %bb.0: @ %entry
1215 ; CHECK-NEXT: vmsr p0, r0
1217 ; CHECK-NEXT: vstrdt.64 q1, [q0, #888]
1220 %0 = zext i16 %p to i32
1221 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1222 call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <2 x i64> %value, <4 x i1> %1)
1226 declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>)
1228 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_u64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) {
1229 ; CHECK-LABEL: test_vstrdq_scatter_base_p_u64:
1230 ; CHECK: @ %bb.0: @ %entry
1231 ; CHECK-NEXT: vmsr p0, r0
1233 ; CHECK-NEXT: vstrdt.64 q1, [q0, #264]
1236 %0 = zext i16 %p to i32
1237 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1238 call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 264, <2 x i64> %value, <4 x i1> %1)
1242 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_s64(<2 x i64> %addr, <2 x i64> %value) {
1243 ; CHECK-LABEL: test_vstrdq_scatter_base_s64:
1244 ; CHECK: @ %bb.0: @ %entry
1245 ; CHECK-NEXT: vstrd.64 q1, [q0, #408]
1248 call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 408, <2 x i64> %value)
1252 declare void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64>, i32, <2 x i64>)
1254 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_u64(<2 x i64> %addr, <2 x i64> %value) {
1255 ; CHECK-LABEL: test_vstrdq_scatter_base_u64:
1256 ; CHECK: @ %bb.0: @ %entry
1257 ; CHECK-NEXT: vstrd.64 q1, [q0, #-472]
1260 call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 -472, <2 x i64> %value)
1264 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(<2 x i64>* %addr, <2 x i64> %value, i16 zeroext %p) {
1265 ; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_s64:
1266 ; CHECK: @ %bb.0: @ %entry
1267 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1268 ; CHECK-NEXT: vmsr p0, r1
1270 ; CHECK-NEXT: vstrdt.64 q0, [q1, #248]!
1271 ; CHECK-NEXT: vstrw.32 q1, [r0]
1274 %0 = load <2 x i64>, <2 x i64>* %addr, align 8
1275 %1 = zext i16 %p to i32
1276 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1277 %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 248, <2 x i64> %value, <4 x i1> %2)
1278 store <2 x i64> %3, <2 x i64>* %addr, align 8
1282 declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>)
1284 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_u64(<2 x i64>* %addr, <2 x i64> %value, i16 zeroext %p) {
1285 ; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_u64:
1286 ; CHECK: @ %bb.0: @ %entry
1287 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1288 ; CHECK-NEXT: vmsr p0, r1
1290 ; CHECK-NEXT: vstrdt.64 q0, [q1, #136]!
1291 ; CHECK-NEXT: vstrw.32 q1, [r0]
1294 %0 = load <2 x i64>, <2 x i64>* %addr, align 8
1295 %1 = zext i16 %p to i32
1296 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1297 %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 136, <2 x i64> %value, <4 x i1> %2)
1298 store <2 x i64> %3, <2 x i64>* %addr, align 8
1302 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_s64(<2 x i64>* %addr, <2 x i64> %value) {
1303 ; CHECK-LABEL: test_vstrdq_scatter_base_wb_s64:
1304 ; CHECK: @ %bb.0: @ %entry
1305 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1306 ; CHECK-NEXT: vstrd.64 q0, [q1, #208]!
1307 ; CHECK-NEXT: vstrw.32 q1, [r0]
1310 %0 = load <2 x i64>, <2 x i64>* %addr, align 8
1311 %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 208, <2 x i64> %value)
1312 store <2 x i64> %1, <2 x i64>* %addr, align 8
1316 declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64>, i32, <2 x i64>)
1318 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_u64(<2 x i64>* %addr, <2 x i64> %value) {
1319 ; CHECK-LABEL: test_vstrdq_scatter_base_wb_u64:
1320 ; CHECK: @ %bb.0: @ %entry
1321 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1322 ; CHECK-NEXT: vstrd.64 q0, [q1, #-168]!
1323 ; CHECK-NEXT: vstrw.32 q1, [r0]
1326 %0 = load <2 x i64>, <2 x i64>* %addr, align 8
1327 %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -168, <2 x i64> %value)
1328 store <2 x i64> %1, <2 x i64>* %addr, align 8
1332 define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
1333 ; CHECK-LABEL: test_vstrdq_scatter_offset_p_s64:
1334 ; CHECK: @ %bb.0: @ %entry
1335 ; CHECK-NEXT: vmsr p0, r1
1337 ; CHECK-NEXT: vstrdt.64 q1, [r0, q0]
1340 %0 = zext i16 %p to i32
1341 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1342 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1)
1346 declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64*, <2 x i64>, <2 x i64>, i32, i32, <4 x i1>)
1348 define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
1349 ; CHECK-LABEL: test_vstrdq_scatter_offset_p_u64:
1350 ; CHECK: @ %bb.0: @ %entry
1351 ; CHECK-NEXT: vmsr p0, r1
1353 ; CHECK-NEXT: vstrdt.64 q1, [r0, q0]
1356 %0 = zext i16 %p to i32
1357 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1358 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1)
1362 define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value) {
1363 ; CHECK-LABEL: test_vstrdq_scatter_offset_s64:
1364 ; CHECK: @ %bb.0: @ %entry
1365 ; CHECK-NEXT: vstrd.64 q1, [r0, q0]
1368 call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0)
1372 declare void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64*, <2 x i64>, <2 x i64>, i32, i32)
1374 define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value) {
1375 ; CHECK-LABEL: test_vstrdq_scatter_offset_u64:
1376 ; CHECK: @ %bb.0: @ %entry
1377 ; CHECK-NEXT: vstrd.64 q1, [r0, q0]
1380 call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0)
1384 define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
1385 ; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_s64:
1386 ; CHECK: @ %bb.0: @ %entry
1387 ; CHECK-NEXT: vmsr p0, r1
1389 ; CHECK-NEXT: vstrdt.64 q1, [r0, q0, uxtw #3]
1392 %0 = zext i16 %p to i32
1393 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1394 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <4 x i1> %1)
1398 define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
1399 ; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_u64:
1400 ; CHECK: @ %bb.0: @ %entry
1401 ; CHECK-NEXT: vmsr p0, r1
1403 ; CHECK-NEXT: vstrdt.64 q1, [r0, q0, uxtw #3]
1406 %0 = zext i16 %p to i32
1407 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1408 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <4 x i1> %1)
1412 define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_s64(i64* %base, <2 x i64> %offset, <2 x i64> %value) {
1413 ; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_s64:
1414 ; CHECK: @ %bb.0: @ %entry
1415 ; CHECK-NEXT: vstrd.64 q1, [r0, q0, uxtw #3]
1418 call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3)
1422 define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_u64(i64* %base, <2 x i64> %offset, <2 x i64> %value) {
1423 ; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_u64:
1424 ; CHECK: @ %bb.0: @ %entry
1425 ; CHECK-NEXT: vstrd.64 q1, [r0, q0, uxtw #3]
1428 call void @llvm.arm.mve.vstr.scatter.offset.p0i64.v2i64.v2i64(i64* %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3)
1432 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_f16(half* %base, <8 x i16> %offset, <8 x half> %value) {
1433 ; CHECK-LABEL: test_vstrhq_scatter_offset_f16:
1434 ; CHECK: @ %bb.0: @ %entry
1435 ; CHECK-NEXT: vstrh.16 q1, [r0, q0]
1438 call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0)
1442 declare void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half*, <8 x i16>, <8 x half>, i32, i32)
1444 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_f16(half* %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) {
1445 ; CHECK-LABEL: test_vstrhq_scatter_offset_p_f16:
1446 ; CHECK: @ %bb.0: @ %entry
1447 ; CHECK-NEXT: vmsr p0, r1
1449 ; CHECK-NEXT: vstrht.16 q1, [r0, q0]
1452 %0 = zext i16 %p to i32
1453 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1454 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0, <8 x i1> %1)
1458 declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half*, <8 x i16>, <8 x half>, i32, i32, <8 x i1>)
1460 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1461 ; CHECK-LABEL: test_vstrhq_scatter_offset_p_s16:
1462 ; CHECK: @ %bb.0: @ %entry
1463 ; CHECK-NEXT: vmsr p0, r1
1465 ; CHECK-NEXT: vstrht.16 q1, [r0, q0]
1468 %0 = zext i16 %p to i32
1469 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1470 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1)
1474 declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16*, <8 x i16>, <8 x i16>, i32, i32, <8 x i1>)
1476 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1477 ; CHECK-LABEL: test_vstrhq_scatter_offset_p_s32:
1478 ; CHECK: @ %bb.0: @ %entry
1479 ; CHECK-NEXT: vmsr p0, r1
1481 ; CHECK-NEXT: vstrht.32 q1, [r0, q0]
1484 %0 = zext i16 %p to i32
1485 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1486 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1)
1490 declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>)
1492 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1493 ; CHECK-LABEL: test_vstrhq_scatter_offset_p_u16:
1494 ; CHECK: @ %bb.0: @ %entry
1495 ; CHECK-NEXT: vmsr p0, r1
1497 ; CHECK-NEXT: vstrht.16 q1, [r0, q0]
1500 %0 = zext i16 %p to i32
1501 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1502 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1)
1506 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1507 ; CHECK-LABEL: test_vstrhq_scatter_offset_p_u32:
1508 ; CHECK: @ %bb.0: @ %entry
1509 ; CHECK-NEXT: vmsr p0, r1
1511 ; CHECK-NEXT: vstrht.32 q1, [r0, q0]
1514 %0 = zext i16 %p to i32
1515 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1516 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1)
1520 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value) {
1521 ; CHECK-LABEL: test_vstrhq_scatter_offset_s16:
1522 ; CHECK: @ %bb.0: @ %entry
1523 ; CHECK-NEXT: vstrh.16 q1, [r0, q0]
1526 call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0)
1530 declare void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16*, <8 x i16>, <8 x i16>, i32, i32)
1532 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value) {
1533 ; CHECK-LABEL: test_vstrhq_scatter_offset_s32:
1534 ; CHECK: @ %bb.0: @ %entry
1535 ; CHECK-NEXT: vstrh.32 q1, [r0, q0]
1538 call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0)
1542 declare void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16*, <4 x i32>, <4 x i32>, i32, i32)
1544 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value) {
1545 ; CHECK-LABEL: test_vstrhq_scatter_offset_u16:
1546 ; CHECK: @ %bb.0: @ %entry
1547 ; CHECK-NEXT: vstrh.16 q1, [r0, q0]
1550 call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0)
1554 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value) {
1555 ; CHECK-LABEL: test_vstrhq_scatter_offset_u32:
1556 ; CHECK: @ %bb.0: @ %entry
1557 ; CHECK-NEXT: vstrh.32 q1, [r0, q0]
1560 call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0)
1564 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_f16(half* %base, <8 x i16> %offset, <8 x half> %value) {
1565 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_f16:
1566 ; CHECK: @ %bb.0: @ %entry
1567 ; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1]
1570 call void @llvm.arm.mve.vstr.scatter.offset.p0f16.v8i16.v8f16(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1)
1574 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_f16(half* %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) {
1575 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_f16:
1576 ; CHECK: @ %bb.0: @ %entry
1577 ; CHECK-NEXT: vmsr p0, r1
1579 ; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1]
1582 %0 = zext i16 %p to i32
1583 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1584 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f16.v8i16.v8f16.v8i1(half* %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1, <8 x i1> %1)
1588 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1589 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s16:
1590 ; CHECK: @ %bb.0: @ %entry
1591 ; CHECK-NEXT: vmsr p0, r1
1593 ; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1]
1596 %0 = zext i16 %p to i32
1597 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1598 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1)
1602 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1603 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s32:
1604 ; CHECK: @ %bb.0: @ %entry
1605 ; CHECK-NEXT: vmsr p0, r1
1607 ; CHECK-NEXT: vstrht.32 q1, [r0, q0, uxtw #1]
1610 %0 = zext i16 %p to i32
1611 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1612 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1)
1616 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1617 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u16:
1618 ; CHECK: @ %bb.0: @ %entry
1619 ; CHECK-NEXT: vmsr p0, r1
1621 ; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1]
1624 %0 = zext i16 %p to i32
1625 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1626 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v8i16.v8i16.v8i1(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1)
1630 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1631 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u32:
1632 ; CHECK: @ %bb.0: @ %entry
1633 ; CHECK-NEXT: vmsr p0, r1
1635 ; CHECK-NEXT: vstrht.32 q1, [r0, q0, uxtw #1]
1638 %0 = zext i16 %p to i32
1639 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1640 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i16.v4i32.v4i32.v4i1(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1)
1644 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s16(i16* %base, <8 x i16> %offset, <8 x i16> %value) {
1645 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s16:
1646 ; CHECK: @ %bb.0: @ %entry
1647 ; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1]
1650 call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1)
1654 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s32(i16* %base, <4 x i32> %offset, <4 x i32> %value) {
1655 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s32:
1656 ; CHECK: @ %bb.0: @ %entry
1657 ; CHECK-NEXT: vstrh.32 q1, [r0, q0, uxtw #1]
1660 call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1)
1664 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u16(i16* %base, <8 x i16> %offset, <8 x i16> %value) {
1665 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u16:
1666 ; CHECK: @ %bb.0: @ %entry
1667 ; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1]
1670 call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v8i16.v8i16(i16* %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1)
1674 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u32(i16* %base, <4 x i32> %offset, <4 x i32> %value) {
1675 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u32:
1676 ; CHECK: @ %bb.0: @ %entry
1677 ; CHECK-NEXT: vstrh.32 q1, [r0, q0, uxtw #1]
1680 call void @llvm.arm.mve.vstr.scatter.offset.p0i16.v4i32.v4i32(i16* %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1)
1684 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_f32(<4 x i32> %addr, <4 x float> %value) {
1685 ; CHECK-LABEL: test_vstrwq_scatter_base_f32:
1686 ; CHECK: @ %bb.0: @ %entry
1687 ; CHECK-NEXT: vstrw.32 q1, [q0, #380]
1690 call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32> %addr, i32 380, <4 x float> %value)
1694 declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32>, i32, <4 x float>)
1696 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_f32(<4 x i32> %addr, <4 x float> %value, i16 zeroext %p) {
1697 ; CHECK-LABEL: test_vstrwq_scatter_base_p_f32:
1698 ; CHECK: @ %bb.0: @ %entry
1699 ; CHECK-NEXT: vmsr p0, r0
1701 ; CHECK-NEXT: vstrwt.32 q1, [q0, #-400]
1704 %0 = zext i16 %p to i32
1705 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1706 call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32> %addr, i32 -400, <4 x float> %value, <4 x i1> %1)
1710 declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>)
1712 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_s32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) {
1713 ; CHECK-LABEL: test_vstrwq_scatter_base_p_s32:
1714 ; CHECK: @ %bb.0: @ %entry
1715 ; CHECK-NEXT: vmsr p0, r0
1717 ; CHECK-NEXT: vstrwt.32 q1, [q0, #48]
1720 %0 = zext i16 %p to i32
1721 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1722 call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 48, <4 x i32> %value, <4 x i1> %1)
1726 declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>)
1728 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_u32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) {
1729 ; CHECK-LABEL: test_vstrwq_scatter_base_p_u32:
1730 ; CHECK: @ %bb.0: @ %entry
1731 ; CHECK-NEXT: vmsr p0, r0
1733 ; CHECK-NEXT: vstrwt.32 q1, [q0, #-376]
1736 %0 = zext i16 %p to i32
1737 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1738 call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 -376, <4 x i32> %value, <4 x i1> %1)
1742 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_s32(<4 x i32> %addr, <4 x i32> %value) {
1743 ; CHECK-LABEL: test_vstrwq_scatter_base_s32:
1744 ; CHECK: @ %bb.0: @ %entry
1745 ; CHECK-NEXT: vstrw.32 q1, [q0, #156]
1748 call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 156, <4 x i32> %value)
1752 declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32>, i32, <4 x i32>)
1754 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_u32(<4 x i32> %addr, <4 x i32> %value) {
1755 ; CHECK-LABEL: test_vstrwq_scatter_base_u32:
1756 ; CHECK: @ %bb.0: @ %entry
1757 ; CHECK-NEXT: vstrw.32 q1, [q0, #212]
1760 call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 212, <4 x i32> %value)
1764 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_f32(<4 x i32>* %addr, <4 x float> %value) {
1765 ; CHECK-LABEL: test_vstrwq_scatter_base_wb_f32:
1766 ; CHECK: @ %bb.0: @ %entry
1767 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1768 ; CHECK-NEXT: vstrw.32 q0, [q1, #-412]!
1769 ; CHECK-NEXT: vstrw.32 q1, [r0]
1772 %0 = load <4 x i32>, <4 x i32>* %addr, align 8
1773 %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32> %0, i32 -412, <4 x float> %value)
1774 store <4 x i32> %1, <4 x i32>* %addr, align 8
1778 declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32>, i32, <4 x float>)
1780 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_f32(<4 x i32>* %addr, <4 x float> %value, i16 zeroext %p) {
1781 ; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_f32:
1782 ; CHECK: @ %bb.0: @ %entry
1783 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1784 ; CHECK-NEXT: vmsr p0, r1
1786 ; CHECK-NEXT: vstrwt.32 q0, [q1, #236]!
1787 ; CHECK-NEXT: vstrw.32 q1, [r0]
1790 %0 = load <4 x i32>, <4 x i32>* %addr, align 8
1791 %1 = zext i16 %p to i32
1792 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1793 %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32> %0, i32 236, <4 x float> %value, <4 x i1> %2)
1794 store <4 x i32> %3, <4 x i32>* %addr, align 8
1798 declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>)
1800 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_s32(<4 x i32>* %addr, <4 x i32> %value, i16 zeroext %p) {
1801 ; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_s32:
1802 ; CHECK: @ %bb.0: @ %entry
1803 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1804 ; CHECK-NEXT: vmsr p0, r1
1806 ; CHECK-NEXT: vstrwt.32 q0, [q1, #328]!
1807 ; CHECK-NEXT: vstrw.32 q1, [r0]
1810 %0 = load <4 x i32>, <4 x i32>* %addr, align 8
1811 %1 = zext i16 %p to i32
1812 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1813 %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 328, <4 x i32> %value, <4 x i1> %2)
1814 store <4 x i32> %3, <4 x i32>* %addr, align 8
1818 declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>)
1820 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_u32(<4 x i32>* %addr, <4 x i32> %value, i16 zeroext %p) {
1821 ; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_u32:
1822 ; CHECK: @ %bb.0: @ %entry
1823 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1824 ; CHECK-NEXT: vmsr p0, r1
1826 ; CHECK-NEXT: vstrwt.32 q0, [q1, #412]!
1827 ; CHECK-NEXT: vstrw.32 q1, [r0]
1830 %0 = load <4 x i32>, <4 x i32>* %addr, align 8
1831 %1 = zext i16 %p to i32
1832 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1833 %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 412, <4 x i32> %value, <4 x i1> %2)
1834 store <4 x i32> %3, <4 x i32>* %addr, align 8
1838 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_s32(<4 x i32>* %addr, <4 x i32> %value) {
1839 ; CHECK-LABEL: test_vstrwq_scatter_base_wb_s32:
1840 ; CHECK: @ %bb.0: @ %entry
1841 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1842 ; CHECK-NEXT: vstrw.32 q0, [q1, #-152]!
1843 ; CHECK-NEXT: vstrw.32 q1, [r0]
1846 %0 = load <4 x i32>, <4 x i32>* %addr, align 8
1847 %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 -152, <4 x i32> %value)
1848 store <4 x i32> %1, <4 x i32>* %addr, align 8
1852 declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32>, i32, <4 x i32>)
1854 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_u32(<4 x i32>* %addr, <4 x i32> %value) {
1855 ; CHECK-LABEL: test_vstrwq_scatter_base_wb_u32:
1856 ; CHECK: @ %bb.0: @ %entry
1857 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1858 ; CHECK-NEXT: vstrw.32 q0, [q1, #64]!
1859 ; CHECK-NEXT: vstrw.32 q1, [r0]
1862 %0 = load <4 x i32>, <4 x i32>* %addr, align 8
1863 %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 64, <4 x i32> %value)
1864 store <4 x i32> %1, <4 x i32>* %addr, align 8
1868 define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_f32(float* %base, <4 x i32> %offset, <4 x float> %value) {
1869 ; CHECK-LABEL: test_vstrwq_scatter_offset_f32:
1870 ; CHECK: @ %bb.0: @ %entry
1871 ; CHECK-NEXT: vstrw.32 q1, [r0, q0]
1874 call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0)
1878 declare void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float*, <4 x i32>, <4 x float>, i32, i32)
1880 define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_f32(float* %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) {
1881 ; CHECK-LABEL: test_vstrwq_scatter_offset_p_f32:
1882 ; CHECK: @ %bb.0: @ %entry
1883 ; CHECK-NEXT: vmsr p0, r1
1885 ; CHECK-NEXT: vstrwt.32 q1, [r0, q0]
1888 %0 = zext i16 %p to i32
1889 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1890 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0, <4 x i1> %1)
1894 declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float*, <4 x i32>, <4 x float>, i32, i32, <4 x i1>)
1896 define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1897 ; CHECK-LABEL: test_vstrwq_scatter_offset_p_s32:
1898 ; CHECK: @ %bb.0: @ %entry
1899 ; CHECK-NEXT: vmsr p0, r1
1901 ; CHECK-NEXT: vstrwt.32 q1, [r0, q0]
1904 %0 = zext i16 %p to i32
1905 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1906 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1)
1910 declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32*, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>)
1912 define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1913 ; CHECK-LABEL: test_vstrwq_scatter_offset_p_u32:
1914 ; CHECK: @ %bb.0: @ %entry
1915 ; CHECK-NEXT: vmsr p0, r1
1917 ; CHECK-NEXT: vstrwt.32 q1, [r0, q0]
1920 %0 = zext i16 %p to i32
1921 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1922 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1)
1926 define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value) {
1927 ; CHECK-LABEL: test_vstrwq_scatter_offset_s32:
1928 ; CHECK: @ %bb.0: @ %entry
1929 ; CHECK-NEXT: vstrw.32 q1, [r0, q0]
1932 call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0)
1936 declare void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32*, <4 x i32>, <4 x i32>, i32, i32)
1938 define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value) {
1939 ; CHECK-LABEL: test_vstrwq_scatter_offset_u32:
1940 ; CHECK: @ %bb.0: @ %entry
1941 ; CHECK-NEXT: vstrw.32 q1, [r0, q0]
1944 call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0)
1948 define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_f32(float* %base, <4 x i32> %offset, <4 x float> %value) {
1949 ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_f32:
1950 ; CHECK: @ %bb.0: @ %entry
1951 ; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2]
1954 call void @llvm.arm.mve.vstr.scatter.offset.p0f32.v4i32.v4f32(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2)
1958 define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_f32(float* %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) {
1959 ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_f32:
1960 ; CHECK: @ %bb.0: @ %entry
1961 ; CHECK-NEXT: vmsr p0, r1
1963 ; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2]
1966 %0 = zext i16 %p to i32
1967 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1968 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0f32.v4i32.v4f32.v4i1(float* %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2, <4 x i1> %1)
1972 define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1973 ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_s32:
1974 ; CHECK: @ %bb.0: @ %entry
1975 ; CHECK-NEXT: vmsr p0, r1
1977 ; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2]
1980 %0 = zext i16 %p to i32
1981 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1982 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1)
1986 define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1987 ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_u32:
1988 ; CHECK: @ %bb.0: @ %entry
1989 ; CHECK-NEXT: vmsr p0, r1
1991 ; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2]
1994 %0 = zext i16 %p to i32
1995 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1996 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0i32.v4i32.v4i32.v4i1(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1)
2000 define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_s32(i32* %base, <4 x i32> %offset, <4 x i32> %value) {
2001 ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_s32:
2002 ; CHECK: @ %bb.0: @ %entry
2003 ; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2]
2006 call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2)
2010 define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_u32(i32* %base, <4 x i32> %offset, <4 x i32> %value) {
2011 ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_u32:
2012 ; CHECK: @ %bb.0: @ %entry
2013 ; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2]
2016 call void @llvm.arm.mve.vstr.scatter.offset.p0i32.v4i32.v4i32(i32* %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2)