1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
4 define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_s16(ptr %base, <8 x i16> %offset) {
5 ; CHECK-LABEL: test_vldrbq_gather_offset_s16:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vldrb.s16 q1, [r0, q0]
8 ; CHECK-NEXT: vmov q0, q1
11 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 8, i32 0, i32 0)
15 declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr, <8 x i16>, i32, i32, i32)
17 define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_s32(ptr %base, <4 x i32> %offset) {
18 ; CHECK-LABEL: test_vldrbq_gather_offset_s32:
19 ; CHECK: @ %bb.0: @ %entry
20 ; CHECK-NEXT: vldrb.s32 q1, [r0, q0]
21 ; CHECK-NEXT: vmov q0, q1
24 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 8, i32 0, i32 0)
28 declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr, <4 x i32>, i32, i32, i32)
30 define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_s8(ptr %base, <16 x i8> %offset) {
31 ; CHECK-LABEL: test_vldrbq_gather_offset_s8:
32 ; CHECK: @ %bb.0: @ %entry
33 ; CHECK-NEXT: vldrb.u8 q1, [r0, q0]
34 ; CHECK-NEXT: vmov q0, q1
37 %0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0.v16i8(ptr %base, <16 x i8> %offset, i32 8, i32 0, i32 0)
41 declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0.v16i8(ptr, <16 x i8>, i32, i32, i32)
43 define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_u16(ptr %base, <8 x i16> %offset) {
44 ; CHECK-LABEL: test_vldrbq_gather_offset_u16:
45 ; CHECK: @ %bb.0: @ %entry
46 ; CHECK-NEXT: vldrb.u16 q1, [r0, q0]
47 ; CHECK-NEXT: vmov q0, q1
50 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 8, i32 0, i32 1)
54 define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_u32(ptr %base, <4 x i32> %offset) {
55 ; CHECK-LABEL: test_vldrbq_gather_offset_u32:
56 ; CHECK: @ %bb.0: @ %entry
57 ; CHECK-NEXT: vldrb.u32 q1, [r0, q0]
58 ; CHECK-NEXT: vmov q0, q1
61 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 8, i32 0, i32 1)
65 define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_u8(ptr %base, <16 x i8> %offset) {
66 ; CHECK-LABEL: test_vldrbq_gather_offset_u8:
67 ; CHECK: @ %bb.0: @ %entry
68 ; CHECK-NEXT: vldrb.u8 q1, [r0, q0]
69 ; CHECK-NEXT: vmov q0, q1
72 %0 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.v16i8.p0.v16i8(ptr %base, <16 x i8> %offset, i32 8, i32 0, i32 1)
76 define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_s16(ptr %base, <8 x i16> %offset, i16 zeroext %p) {
77 ; CHECK-LABEL: test_vldrbq_gather_offset_z_s16:
78 ; CHECK: @ %bb.0: @ %entry
79 ; CHECK-NEXT: vmsr p0, r1
81 ; CHECK-NEXT: vldrbt.s16 q1, [r0, q0]
82 ; CHECK-NEXT: vmov q0, q1
85 %0 = zext i16 %p to i32
86 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
87 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 8, i32 0, i32 0, <8 x i1> %1)
91 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
93 declare <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr, <8 x i16>, i32, i32, i32, <8 x i1>)
95 define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_s32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
96 ; CHECK-LABEL: test_vldrbq_gather_offset_z_s32:
97 ; CHECK: @ %bb.0: @ %entry
98 ; CHECK-NEXT: vmsr p0, r1
100 ; CHECK-NEXT: vldrbt.s32 q1, [r0, q0]
101 ; CHECK-NEXT: vmov q0, q1
104 %0 = zext i16 %p to i32
105 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
106 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 8, i32 0, i32 0, <4 x i1> %1)
110 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
112 declare <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr, <4 x i32>, i32, i32, i32, <4 x i1>)
114 define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_s8(ptr %base, <16 x i8> %offset, i16 zeroext %p) {
115 ; CHECK-LABEL: test_vldrbq_gather_offset_z_s8:
116 ; CHECK: @ %bb.0: @ %entry
117 ; CHECK-NEXT: vmsr p0, r1
119 ; CHECK-NEXT: vldrbt.u8 q1, [r0, q0]
120 ; CHECK-NEXT: vmov q0, q1
123 %0 = zext i16 %p to i32
124 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
125 %2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0.v16i8.v16i1(ptr %base, <16 x i8> %offset, i32 8, i32 0, i32 0, <16 x i1> %1)
129 declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
131 declare <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0.v16i8.v16i1(ptr, <16 x i8>, i32, i32, i32, <16 x i1>)
133 define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_gather_offset_z_u16(ptr %base, <8 x i16> %offset, i16 zeroext %p) {
134 ; CHECK-LABEL: test_vldrbq_gather_offset_z_u16:
135 ; CHECK: @ %bb.0: @ %entry
136 ; CHECK-NEXT: vmsr p0, r1
138 ; CHECK-NEXT: vldrbt.u16 q1, [r0, q0]
139 ; CHECK-NEXT: vmov q0, q1
142 %0 = zext i16 %p to i32
143 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
144 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 8, i32 0, i32 1, <8 x i1> %1)
148 define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_gather_offset_z_u32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
149 ; CHECK-LABEL: test_vldrbq_gather_offset_z_u32:
150 ; CHECK: @ %bb.0: @ %entry
151 ; CHECK-NEXT: vmsr p0, r1
153 ; CHECK-NEXT: vldrbt.u32 q1, [r0, q0]
154 ; CHECK-NEXT: vmov q0, q1
157 %0 = zext i16 %p to i32
158 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
159 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 8, i32 0, i32 1, <4 x i1> %1)
163 define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_gather_offset_z_u8(ptr %base, <16 x i8> %offset, i16 zeroext %p) {
164 ; CHECK-LABEL: test_vldrbq_gather_offset_z_u8:
165 ; CHECK: @ %bb.0: @ %entry
166 ; CHECK-NEXT: vmsr p0, r1
168 ; CHECK-NEXT: vldrbt.u8 q1, [r0, q0]
169 ; CHECK-NEXT: vmov q0, q1
172 %0 = zext i16 %p to i32
173 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
174 %2 = call <16 x i8> @llvm.arm.mve.vldr.gather.offset.predicated.v16i8.p0.v16i8.v16i1(ptr %base, <16 x i8> %offset, i32 8, i32 0, i32 1, <16 x i1> %1)
178 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_s64(<2 x i64> %addr) {
179 ; CHECK-LABEL: test_vldrdq_gather_base_s64:
180 ; CHECK: @ %bb.0: @ %entry
181 ; CHECK-NEXT: vldrd.u64 q1, [q0, #616]
182 ; CHECK-NEXT: vmov q0, q1
185 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 616)
189 declare <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64>, i32)
191 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_u64(<2 x i64> %addr) {
192 ; CHECK-LABEL: test_vldrdq_gather_base_u64:
193 ; CHECK: @ %bb.0: @ %entry
194 ; CHECK-NEXT: vldrd.u64 q1, [q0, #-336]
195 ; CHECK-NEXT: vmov q0, q1
198 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> %addr, i32 -336)
202 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_s64(ptr %addr) {
203 ; CHECK-LABEL: test_vldrdq_gather_base_wb_s64:
204 ; CHECK: @ %bb.0: @ %entry
205 ; CHECK-NEXT: vldrw.u32 q1, [r0]
206 ; CHECK-NEXT: vldrd.u64 q0, [q1, #576]!
207 ; CHECK-NEXT: vstrw.32 q1, [r0]
210 %0 = load <2 x i64>, ptr %addr, align 8
211 %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 576)
212 %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1
213 store <2 x i64> %2, ptr %addr, align 8
214 %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0
218 declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64>, i32)
220 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_u64(ptr %addr) {
221 ; CHECK-LABEL: test_vldrdq_gather_base_wb_u64:
222 ; CHECK: @ %bb.0: @ %entry
223 ; CHECK-NEXT: vldrw.u32 q1, [r0]
224 ; CHECK-NEXT: vldrd.u64 q0, [q1, #-328]!
225 ; CHECK-NEXT: vstrw.32 q1, [r0]
228 %0 = load <2 x i64>, ptr %addr, align 8
229 %1 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -328)
230 %2 = extractvalue { <2 x i64>, <2 x i64> } %1, 1
231 store <2 x i64> %2, ptr %addr, align 8
232 %3 = extractvalue { <2 x i64>, <2 x i64> } %1, 0
236 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(ptr %addr, i16 zeroext %p) {
237 ; CHECK-LABEL: test_vldrdq_gather_base_wb_z_s64:
238 ; CHECK: @ %bb.0: @ %entry
239 ; CHECK-NEXT: vmsr p0, r1
240 ; CHECK-NEXT: vldrw.u32 q1, [r0]
242 ; CHECK-NEXT: vldrdt.u64 q0, [q1, #664]!
243 ; CHECK-NEXT: vstrw.32 q1, [r0]
246 %0 = load <2 x i64>, ptr %addr, align 8
247 %1 = zext i16 %p to i32
248 %2 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %1)
249 %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> %0, i32 664, <2 x i1> %2)
250 %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1
251 store <2 x i64> %4, ptr %addr, align 8
252 %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0
256 declare <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32)
257 declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64>, i32, <2 x i1>)
259 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_u64(ptr %addr, i16 zeroext %p) {
260 ; CHECK-LABEL: test_vldrdq_gather_base_wb_z_u64:
261 ; CHECK: @ %bb.0: @ %entry
262 ; CHECK-NEXT: vmsr p0, r1
263 ; CHECK-NEXT: vldrw.u32 q1, [r0]
265 ; CHECK-NEXT: vldrdt.u64 q0, [q1, #656]!
266 ; CHECK-NEXT: vstrw.32 q1, [r0]
269 %0 = load <2 x i64>, ptr %addr, align 8
270 %1 = zext i16 %p to i32
271 %2 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %1)
272 %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> %0, i32 656, <2 x i1> %2)
273 %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1
274 store <2 x i64> %4, ptr %addr, align 8
275 %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0
279 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_s64(<2 x i64> %addr, i16 zeroext %p) {
280 ; CHECK-LABEL: test_vldrdq_gather_base_z_s64:
281 ; CHECK: @ %bb.0: @ %entry
282 ; CHECK-NEXT: vmsr p0, r0
284 ; CHECK-NEXT: vldrdt.u64 q1, [q0, #888]
285 ; CHECK-NEXT: vmov q0, q1
288 %0 = zext i16 %p to i32
289 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
290 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v2i1(<2 x i64> %addr, i32 888, <2 x i1> %1)
294 declare <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v2i1(<2 x i64>, i32, <2 x i1>)
296 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_u64(<2 x i64> %addr, i16 zeroext %p) {
297 ; CHECK-LABEL: test_vldrdq_gather_base_z_u64:
298 ; CHECK: @ %bb.0: @ %entry
299 ; CHECK-NEXT: vmsr p0, r0
301 ; CHECK-NEXT: vldrdt.u64 q1, [q0, #-1000]
302 ; CHECK-NEXT: vmov q0, q1
305 %0 = zext i16 %p to i32
306 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
307 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v2i1(<2 x i64> %addr, i32 -1000, <2 x i1> %1)
311 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_s64(ptr %base, <2 x i64> %offset) {
312 ; CHECK-LABEL: test_vldrdq_gather_offset_s64:
313 ; CHECK: @ %bb.0: @ %entry
314 ; CHECK-NEXT: vldrd.u64 q1, [r0, q0]
315 ; CHECK-NEXT: vmov q0, q1
318 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0.v2i64(ptr %base, <2 x i64> %offset, i32 64, i32 0, i32 0)
322 declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0.v2i64(ptr, <2 x i64>, i32, i32, i32)
324 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_u64(ptr %base, <2 x i64> %offset) {
325 ; CHECK-LABEL: test_vldrdq_gather_offset_u64:
326 ; CHECK: @ %bb.0: @ %entry
327 ; CHECK-NEXT: vldrd.u64 q1, [r0, q0]
328 ; CHECK-NEXT: vmov q0, q1
331 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0.v2i64(ptr %base, <2 x i64> %offset, i32 64, i32 0, i32 1)
335 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_s64(ptr %base, <2 x i64> %offset, i16 zeroext %p) {
336 ; CHECK-LABEL: test_vldrdq_gather_offset_z_s64:
337 ; CHECK: @ %bb.0: @ %entry
338 ; CHECK-NEXT: vmsr p0, r1
340 ; CHECK-NEXT: vldrdt.u64 q1, [r0, q0]
341 ; CHECK-NEXT: vmov q0, q1
344 %0 = zext i16 %p to i32
345 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
346 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr %base, <2 x i64> %offset, i32 64, i32 0, i32 0, <2 x i1> %1)
350 declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr, <2 x i64>, i32, i32, i32, <2 x i1>)
352 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_u64(ptr %base, <2 x i64> %offset, i16 zeroext %p) {
353 ; CHECK-LABEL: test_vldrdq_gather_offset_z_u64:
354 ; CHECK: @ %bb.0: @ %entry
355 ; CHECK-NEXT: vmsr p0, r1
357 ; CHECK-NEXT: vldrdt.u64 q1, [r0, q0]
358 ; CHECK-NEXT: vmov q0, q1
361 %0 = zext i16 %p to i32
362 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
363 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr %base, <2 x i64> %offset, i32 64, i32 0, i32 1, <2 x i1> %1)
367 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_s64(ptr %base, <2 x i64> %offset) {
368 ; CHECK-LABEL: test_vldrdq_gather_shifted_offset_s64:
369 ; CHECK: @ %bb.0: @ %entry
370 ; CHECK-NEXT: vldrd.u64 q1, [r0, q0, uxtw #3]
371 ; CHECK-NEXT: vmov q0, q1
374 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0.v2i64(ptr %base, <2 x i64> %offset, i32 64, i32 3, i32 0)
378 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_u64(ptr %base, <2 x i64> %offset) {
379 ; CHECK-LABEL: test_vldrdq_gather_shifted_offset_u64:
380 ; CHECK: @ %bb.0: @ %entry
381 ; CHECK-NEXT: vldrd.u64 q1, [r0, q0, uxtw #3]
382 ; CHECK-NEXT: vmov q0, q1
385 %0 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.v2i64.p0.v2i64(ptr %base, <2 x i64> %offset, i32 64, i32 3, i32 1)
389 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_s64(ptr %base, <2 x i64> %offset, i16 zeroext %p) {
390 ; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_s64:
391 ; CHECK: @ %bb.0: @ %entry
392 ; CHECK-NEXT: vmsr p0, r1
394 ; CHECK-NEXT: vldrdt.u64 q1, [r0, q0, uxtw #3]
395 ; CHECK-NEXT: vmov q0, q1
398 %0 = zext i16 %p to i32
399 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
400 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr %base, <2 x i64> %offset, i32 64, i32 3, i32 0, <2 x i1> %1)
404 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_shifted_offset_z_u64(ptr %base, <2 x i64> %offset, i16 zeroext %p) {
405 ; CHECK-LABEL: test_vldrdq_gather_shifted_offset_z_u64:
406 ; CHECK: @ %bb.0: @ %entry
407 ; CHECK-NEXT: vmsr p0, r1
409 ; CHECK-NEXT: vldrdt.u64 q1, [r0, q0, uxtw #3]
410 ; CHECK-NEXT: vmov q0, q1
413 %0 = zext i16 %p to i32
414 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
415 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr %base, <2 x i64> %offset, i32 64, i32 3, i32 1, <2 x i1> %1)
419 define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_f16(ptr %base, <8 x i16> %offset) {
420 ; CHECK-LABEL: test_vldrhq_gather_offset_f16:
421 ; CHECK: @ %bb.0: @ %entry
422 ; CHECK-NEXT: vldrh.u16 q1, [r0, q0]
423 ; CHECK-NEXT: vmov q0, q1
426 %0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 0)
430 declare <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0.v8i16(ptr, <8 x i16>, i32, i32, i32)
432 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_s16(ptr %base, <8 x i16> %offset) {
433 ; CHECK-LABEL: test_vldrhq_gather_offset_s16:
434 ; CHECK: @ %bb.0: @ %entry
435 ; CHECK-NEXT: vldrh.u16 q1, [r0, q0]
436 ; CHECK-NEXT: vmov q0, q1
439 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 0)
444 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_s32(ptr %base, <4 x i32> %offset) {
445 ; CHECK-LABEL: test_vldrhq_gather_offset_s32:
446 ; CHECK: @ %bb.0: @ %entry
447 ; CHECK-NEXT: vldrh.s32 q1, [r0, q0]
448 ; CHECK-NEXT: vmov q0, q1
451 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 16, i32 0, i32 0)
456 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_u16(ptr %base, <8 x i16> %offset) {
457 ; CHECK-LABEL: test_vldrhq_gather_offset_u16:
458 ; CHECK: @ %bb.0: @ %entry
459 ; CHECK-NEXT: vldrh.u16 q1, [r0, q0]
460 ; CHECK-NEXT: vmov q0, q1
463 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 1)
467 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_u32(ptr %base, <4 x i32> %offset) {
468 ; CHECK-LABEL: test_vldrhq_gather_offset_u32:
469 ; CHECK: @ %bb.0: @ %entry
470 ; CHECK-NEXT: vldrh.u32 q1, [r0, q0]
471 ; CHECK-NEXT: vmov q0, q1
474 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 16, i32 0, i32 1)
478 define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_offset_z_f16(ptr %base, <8 x i16> %offset, i16 zeroext %p) {
479 ; CHECK-LABEL: test_vldrhq_gather_offset_z_f16:
480 ; CHECK: @ %bb.0: @ %entry
481 ; CHECK-NEXT: vmsr p0, r1
483 ; CHECK-NEXT: vldrht.u16 q1, [r0, q0]
484 ; CHECK-NEXT: vmov q0, q1
487 %0 = zext i16 %p to i32
488 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
489 %2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1)
493 declare <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0.v8i16.v8i1(ptr, <8 x i16>, i32, i32, i32, <8 x i1>)
495 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_s16(ptr %base, <8 x i16> %offset, i16 zeroext %p) {
496 ; CHECK-LABEL: test_vldrhq_gather_offset_z_s16:
497 ; CHECK: @ %bb.0: @ %entry
498 ; CHECK-NEXT: vmsr p0, r1
500 ; CHECK-NEXT: vldrht.u16 q1, [r0, q0]
501 ; CHECK-NEXT: vmov q0, q1
504 %0 = zext i16 %p to i32
505 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
506 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 0, <8 x i1> %1)
511 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_s32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
512 ; CHECK-LABEL: test_vldrhq_gather_offset_z_s32:
513 ; CHECK: @ %bb.0: @ %entry
514 ; CHECK-NEXT: vmsr p0, r1
516 ; CHECK-NEXT: vldrht.s32 q1, [r0, q0]
517 ; CHECK-NEXT: vmov q0, q1
520 %0 = zext i16 %p to i32
521 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
522 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 16, i32 0, i32 0, <4 x i1> %1)
527 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_offset_z_u16(ptr %base, <8 x i16> %offset, i16 zeroext %p) {
528 ; CHECK-LABEL: test_vldrhq_gather_offset_z_u16:
529 ; CHECK: @ %bb.0: @ %entry
530 ; CHECK-NEXT: vmsr p0, r1
532 ; CHECK-NEXT: vldrht.u16 q1, [r0, q0]
533 ; CHECK-NEXT: vmov q0, q1
536 %0 = zext i16 %p to i32
537 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
538 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 0, i32 1, <8 x i1> %1)
542 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_offset_z_u32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
543 ; CHECK-LABEL: test_vldrhq_gather_offset_z_u32:
544 ; CHECK: @ %bb.0: @ %entry
545 ; CHECK-NEXT: vmsr p0, r1
547 ; CHECK-NEXT: vldrht.u32 q1, [r0, q0]
548 ; CHECK-NEXT: vmov q0, q1
551 %0 = zext i16 %p to i32
552 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
553 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 16, i32 0, i32 1, <4 x i1> %1)
557 define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_f16(ptr %base, <8 x i16> %offset) {
558 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_f16:
559 ; CHECK: @ %bb.0: @ %entry
560 ; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1]
561 ; CHECK-NEXT: vmov q0, q1
564 %0 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.v8f16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 0)
568 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_s16(ptr %base, <8 x i16> %offset) {
569 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s16:
570 ; CHECK: @ %bb.0: @ %entry
571 ; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1]
572 ; CHECK-NEXT: vmov q0, q1
575 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 0)
579 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_s32(ptr %base, <4 x i32> %offset) {
580 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_s32:
581 ; CHECK: @ %bb.0: @ %entry
582 ; CHECK-NEXT: vldrh.s32 q1, [r0, q0, uxtw #1]
583 ; CHECK-NEXT: vmov q0, q1
586 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 16, i32 1, i32 0)
590 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_u16(ptr %base, <8 x i16> %offset) {
591 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u16:
592 ; CHECK: @ %bb.0: @ %entry
593 ; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1]
594 ; CHECK-NEXT: vmov q0, q1
597 %0 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.v8i16.p0.v8i16(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 1)
601 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_u32(ptr %base, <4 x i32> %offset) {
602 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_u32:
603 ; CHECK: @ %bb.0: @ %entry
604 ; CHECK-NEXT: vldrh.u32 q1, [r0, q0, uxtw #1]
605 ; CHECK-NEXT: vmov q0, q1
608 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 16, i32 1, i32 1)
612 define arm_aapcs_vfpcc <8 x half> @test_vldrhq_gather_shifted_offset_z_f16(ptr %base, <8 x i16> %offset, i16 zeroext %p) {
613 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_f16:
614 ; CHECK: @ %bb.0: @ %entry
615 ; CHECK-NEXT: vmsr p0, r1
617 ; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1]
618 ; CHECK-NEXT: vmov q0, q1
621 %0 = zext i16 %p to i32
622 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
623 %2 = call <8 x half> @llvm.arm.mve.vldr.gather.offset.predicated.v8f16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1)
627 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_s16(ptr %base, <8 x i16> %offset, i16 zeroext %p) {
628 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s16:
629 ; CHECK: @ %bb.0: @ %entry
630 ; CHECK-NEXT: vmsr p0, r1
632 ; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1]
633 ; CHECK-NEXT: vmov q0, q1
636 %0 = zext i16 %p to i32
637 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
638 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 0, <8 x i1> %1)
642 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_s32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
643 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_s32:
644 ; CHECK: @ %bb.0: @ %entry
645 ; CHECK-NEXT: vmsr p0, r1
647 ; CHECK-NEXT: vldrht.s32 q1, [r0, q0, uxtw #1]
648 ; CHECK-NEXT: vmov q0, q1
651 %0 = zext i16 %p to i32
652 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
653 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 16, i32 1, i32 0, <4 x i1> %1)
657 define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_gather_shifted_offset_z_u16(ptr %base, <8 x i16> %offset, i16 zeroext %p) {
658 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u16:
659 ; CHECK: @ %bb.0: @ %entry
660 ; CHECK-NEXT: vmsr p0, r1
662 ; CHECK-NEXT: vldrht.u16 q1, [r0, q0, uxtw #1]
663 ; CHECK-NEXT: vmov q0, q1
666 %0 = zext i16 %p to i32
667 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
668 %2 = call <8 x i16> @llvm.arm.mve.vldr.gather.offset.predicated.v8i16.p0.v8i16.v8i1(ptr %base, <8 x i16> %offset, i32 16, i32 1, i32 1, <8 x i1> %1)
672 define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_gather_shifted_offset_z_u32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
673 ; CHECK-LABEL: test_vldrhq_gather_shifted_offset_z_u32:
674 ; CHECK: @ %bb.0: @ %entry
675 ; CHECK-NEXT: vmsr p0, r1
677 ; CHECK-NEXT: vldrht.u32 q1, [r0, q0, uxtw #1]
678 ; CHECK-NEXT: vmov q0, q1
681 %0 = zext i16 %p to i32
682 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
683 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 16, i32 1, i32 1, <4 x i1> %1)
687 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_f32(<4 x i32> %addr) {
688 ; CHECK-LABEL: test_vldrwq_gather_base_f32:
689 ; CHECK: @ %bb.0: @ %entry
690 ; CHECK-NEXT: vldrw.u32 q1, [q0, #12]
691 ; CHECK-NEXT: vmov q0, q1
694 %0 = call <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32> %addr, i32 12)
698 declare <4 x float> @llvm.arm.mve.vldr.gather.base.v4f32.v4i32(<4 x i32>, i32)
700 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_s32(<4 x i32> %addr) {
701 ; CHECK-LABEL: test_vldrwq_gather_base_s32:
702 ; CHECK: @ %bb.0: @ %entry
703 ; CHECK-NEXT: vldrw.u32 q1, [q0, #400]
704 ; CHECK-NEXT: vmov q0, q1
707 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 400)
711 declare <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32>, i32)
713 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_u32(<4 x i32> %addr) {
714 ; CHECK-LABEL: test_vldrwq_gather_base_u32:
715 ; CHECK: @ %bb.0: @ %entry
716 ; CHECK-NEXT: vldrw.u32 q1, [q0, #284]
717 ; CHECK-NEXT: vmov q0, q1
720 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.v4i32.v4i32(<4 x i32> %addr, i32 284)
724 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_f32(ptr %addr) {
725 ; CHECK-LABEL: test_vldrwq_gather_base_wb_f32:
726 ; CHECK: @ %bb.0: @ %entry
727 ; CHECK-NEXT: vldrw.u32 q1, [r0]
728 ; CHECK-NEXT: vldrw.u32 q0, [q1, #-64]!
729 ; CHECK-NEXT: vstrw.32 q1, [r0]
732 %0 = load <4 x i32>, ptr %addr, align 8
733 %1 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32> %0, i32 -64)
734 %2 = extractvalue { <4 x float>, <4 x i32> } %1, 1
735 store <4 x i32> %2, ptr %addr, align 8
736 %3 = extractvalue { <4 x float>, <4 x i32> } %1, 0
740 declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4f32.v4i32(<4 x i32>, i32)
742 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_s32(ptr %addr) {
743 ; CHECK-LABEL: test_vldrwq_gather_base_wb_s32:
744 ; CHECK: @ %bb.0: @ %entry
745 ; CHECK-NEXT: vldrw.u32 q1, [r0]
746 ; CHECK-NEXT: vldrw.u32 q0, [q1, #80]!
747 ; CHECK-NEXT: vstrw.32 q1, [r0]
750 %0 = load <4 x i32>, ptr %addr, align 8
751 %1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 80)
752 %2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1
753 store <4 x i32> %2, ptr %addr, align 8
754 %3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0
758 declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32>, i32)
760 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_u32(ptr %addr) {
761 ; CHECK-LABEL: test_vldrwq_gather_base_wb_u32:
762 ; CHECK: @ %bb.0: @ %entry
763 ; CHECK-NEXT: vldrw.u32 q1, [r0]
764 ; CHECK-NEXT: vldrw.u32 q0, [q1, #480]!
765 ; CHECK-NEXT: vstrw.32 q1, [r0]
768 %0 = load <4 x i32>, ptr %addr, align 8
769 %1 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.v4i32.v4i32(<4 x i32> %0, i32 480)
770 %2 = extractvalue { <4 x i32>, <4 x i32> } %1, 1
771 store <4 x i32> %2, ptr %addr, align 8
772 %3 = extractvalue { <4 x i32>, <4 x i32> } %1, 0
776 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_wb_z_f32(ptr %addr, i16 zeroext %p) {
777 ; CHECK-LABEL: test_vldrwq_gather_base_wb_z_f32:
778 ; CHECK: @ %bb.0: @ %entry
779 ; CHECK-NEXT: vmsr p0, r1
780 ; CHECK-NEXT: vldrw.u32 q1, [r0]
782 ; CHECK-NEXT: vldrwt.u32 q0, [q1, #-352]!
783 ; CHECK-NEXT: vstrw.32 q1, [r0]
786 %0 = load <4 x i32>, ptr %addr, align 8
787 %1 = zext i16 %p to i32
788 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
789 %3 = call { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32> %0, i32 -352, <4 x i1> %2)
790 %4 = extractvalue { <4 x float>, <4 x i32> } %3, 1
791 store <4 x i32> %4, ptr %addr, align 8
792 %5 = extractvalue { <4 x float>, <4 x i32> } %3, 0
796 declare { <4 x float>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
798 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_s32(ptr %addr, i16 zeroext %p) {
799 ; CHECK-LABEL: test_vldrwq_gather_base_wb_z_s32:
800 ; CHECK: @ %bb.0: @ %entry
801 ; CHECK-NEXT: vmsr p0, r1
802 ; CHECK-NEXT: vldrw.u32 q1, [r0]
804 ; CHECK-NEXT: vldrwt.u32 q0, [q1, #276]!
805 ; CHECK-NEXT: vstrw.32 q1, [r0]
808 %0 = load <4 x i32>, ptr %addr, align 8
809 %1 = zext i16 %p to i32
810 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
811 %3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 276, <4 x i1> %2)
812 %4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1
813 store <4 x i32> %4, ptr %addr, align 8
814 %5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0
818 declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
820 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_wb_z_u32(ptr %addr, i16 zeroext %p) {
821 ; CHECK-LABEL: test_vldrwq_gather_base_wb_z_u32:
822 ; CHECK: @ %bb.0: @ %entry
823 ; CHECK-NEXT: vmsr p0, r1
824 ; CHECK-NEXT: vldrw.u32 q1, [r0]
826 ; CHECK-NEXT: vldrwt.u32 q0, [q1, #88]!
827 ; CHECK-NEXT: vstrw.32 q1, [r0]
830 %0 = load <4 x i32>, ptr %addr, align 8
831 %1 = zext i16 %p to i32
832 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
833 %3 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 88, <4 x i1> %2)
834 %4 = extractvalue { <4 x i32>, <4 x i32> } %3, 1
835 store <4 x i32> %4, ptr %addr, align 8
836 %5 = extractvalue { <4 x i32>, <4 x i32> } %3, 0
840 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_base_z_f32(<4 x i32> %addr, i16 zeroext %p) {
841 ; CHECK-LABEL: test_vldrwq_gather_base_z_f32:
842 ; CHECK: @ %bb.0: @ %entry
843 ; CHECK-NEXT: vmsr p0, r0
845 ; CHECK-NEXT: vldrwt.u32 q1, [q0, #-300]
846 ; CHECK-NEXT: vmov q0, q1
849 %0 = zext i16 %p to i32
850 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
851 %2 = call <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32> %addr, i32 -300, <4 x i1> %1)
855 declare <4 x float> @llvm.arm.mve.vldr.gather.base.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
857 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_s32(<4 x i32> %addr, i16 zeroext %p) {
858 ; CHECK-LABEL: test_vldrwq_gather_base_z_s32:
859 ; CHECK: @ %bb.0: @ %entry
860 ; CHECK-NEXT: vmsr p0, r0
862 ; CHECK-NEXT: vldrwt.u32 q1, [q0, #440]
863 ; CHECK-NEXT: vmov q0, q1
866 %0 = zext i16 %p to i32
867 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
868 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 440, <4 x i1> %1)
872 declare <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
874 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_base_z_u32(<4 x i32> %addr, i16 zeroext %p) {
875 ; CHECK-LABEL: test_vldrwq_gather_base_z_u32:
876 ; CHECK: @ %bb.0: @ %entry
877 ; CHECK-NEXT: vmsr p0, r0
879 ; CHECK-NEXT: vldrwt.u32 q1, [q0, #300]
880 ; CHECK-NEXT: vmov q0, q1
883 %0 = zext i16 %p to i32
884 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
885 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 300, <4 x i1> %1)
889 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_f32(ptr %base, <4 x i32> %offset) {
890 ; CHECK-LABEL: test_vldrwq_gather_offset_f32:
891 ; CHECK: @ %bb.0: @ %entry
892 ; CHECK-NEXT: vldrw.u32 q1, [r0, q0]
893 ; CHECK-NEXT: vmov q0, q1
896 %0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 0)
900 declare <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0.v4i32(ptr, <4 x i32>, i32, i32, i32)
902 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_s32(ptr %base, <4 x i32> %offset) {
903 ; CHECK-LABEL: test_vldrwq_gather_offset_s32:
904 ; CHECK: @ %bb.0: @ %entry
905 ; CHECK-NEXT: vldrw.u32 q1, [r0, q0]
906 ; CHECK-NEXT: vmov q0, q1
909 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 0)
914 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_u32(ptr %base, <4 x i32> %offset) {
915 ; CHECK-LABEL: test_vldrwq_gather_offset_u32:
916 ; CHECK: @ %bb.0: @ %entry
917 ; CHECK-NEXT: vldrw.u32 q1, [r0, q0]
918 ; CHECK-NEXT: vmov q0, q1
921 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 1)
925 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_offset_z_f32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
926 ; CHECK-LABEL: test_vldrwq_gather_offset_z_f32:
927 ; CHECK: @ %bb.0: @ %entry
928 ; CHECK-NEXT: vmsr p0, r1
930 ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0]
931 ; CHECK-NEXT: vmov q0, q1
934 %0 = zext i16 %p to i32
935 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
936 %2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1)
940 declare <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0.v4i32.v4i1(ptr, <4 x i32>, i32, i32, i32, <4 x i1>)
942 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_s32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
943 ; CHECK-LABEL: test_vldrwq_gather_offset_z_s32:
944 ; CHECK: @ %bb.0: @ %entry
945 ; CHECK-NEXT: vmsr p0, r1
947 ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0]
948 ; CHECK-NEXT: vmov q0, q1
951 %0 = zext i16 %p to i32
952 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
953 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 0, <4 x i1> %1)
958 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_offset_z_u32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
959 ; CHECK-LABEL: test_vldrwq_gather_offset_z_u32:
960 ; CHECK: @ %bb.0: @ %entry
961 ; CHECK-NEXT: vmsr p0, r1
963 ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0]
964 ; CHECK-NEXT: vmov q0, q1
967 %0 = zext i16 %p to i32
968 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
969 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 0, i32 1, <4 x i1> %1)
973 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_f32(ptr %base, <4 x i32> %offset) {
974 ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_f32:
975 ; CHECK: @ %bb.0: @ %entry
976 ; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2]
977 ; CHECK-NEXT: vmov q0, q1
980 %0 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.v4f32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 0)
984 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_s32(ptr %base, <4 x i32> %offset) {
985 ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_s32:
986 ; CHECK: @ %bb.0: @ %entry
987 ; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2]
988 ; CHECK-NEXT: vmov q0, q1
991 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 0)
995 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_u32(ptr %base, <4 x i32> %offset) {
996 ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_u32:
997 ; CHECK: @ %bb.0: @ %entry
998 ; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2]
999 ; CHECK-NEXT: vmov q0, q1
1002 %0 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.v4i32.p0.v4i32(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 1)
1006 define arm_aapcs_vfpcc <4 x float> @test_vldrwq_gather_shifted_offset_z_f32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
1007 ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_f32:
1008 ; CHECK: @ %bb.0: @ %entry
1009 ; CHECK-NEXT: vmsr p0, r1
1011 ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2]
1012 ; CHECK-NEXT: vmov q0, q1
1015 %0 = zext i16 %p to i32
1016 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1017 %2 = call <4 x float> @llvm.arm.mve.vldr.gather.offset.predicated.v4f32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1)
1021 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_s32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
1022 ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_s32:
1023 ; CHECK: @ %bb.0: @ %entry
1024 ; CHECK-NEXT: vmsr p0, r1
1026 ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2]
1027 ; CHECK-NEXT: vmov q0, q1
1030 %0 = zext i16 %p to i32
1031 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1032 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 0, <4 x i1> %1)
1036 define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_gather_shifted_offset_z_u32(ptr %base, <4 x i32> %offset, i16 zeroext %p) {
1037 ; CHECK-LABEL: test_vldrwq_gather_shifted_offset_z_u32:
1038 ; CHECK: @ %bb.0: @ %entry
1039 ; CHECK-NEXT: vmsr p0, r1
1041 ; CHECK-NEXT: vldrwt.u32 q1, [r0, q0, uxtw #2]
1042 ; CHECK-NEXT: vmov q0, q1
1045 %0 = zext i16 %p to i32
1046 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1047 %2 = call <4 x i32> @llvm.arm.mve.vldr.gather.offset.predicated.v4i32.p0.v4i32.v4i1(ptr %base, <4 x i32> %offset, i32 32, i32 2, i32 1, <4 x i1> %1)
1051 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1052 ; CHECK-LABEL: test_vstrbq_scatter_offset_p_s16:
1053 ; CHECK: @ %bb.0: @ %entry
1054 ; CHECK-NEXT: vmsr p0, r1
1056 ; CHECK-NEXT: vstrbt.16 q1, [r0, q0]
1059 %0 = zext i16 %p to i32
1060 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1061 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1)
1065 declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr, <8 x i16>, <8 x i16>, i32, i32, <8 x i1>)
1067 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1068 ; CHECK-LABEL: test_vstrbq_scatter_offset_p_s32:
1069 ; CHECK: @ %bb.0: @ %entry
1070 ; CHECK-NEXT: vmsr p0, r1
1072 ; CHECK-NEXT: vstrbt.32 q1, [r0, q0]
1075 %0 = zext i16 %p to i32
1076 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1077 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1)
1081 declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr, <4 x i32>, <4 x i32>, i32, i32, <4 x i1>)
1083 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_s8(ptr %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) {
1084 ; CHECK-LABEL: test_vstrbq_scatter_offset_p_s8:
1085 ; CHECK: @ %bb.0: @ %entry
1086 ; CHECK-NEXT: vmsr p0, r1
1088 ; CHECK-NEXT: vstrbt.8 q1, [r0, q0]
1091 %0 = zext i16 %p to i32
1092 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1093 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v16i8.v16i8.v16i1(ptr %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1)
1097 declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v16i8.v16i8.v16i1(ptr, <16 x i8>, <16 x i8>, i32, i32, <16 x i1>)
1099 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1100 ; CHECK-LABEL: test_vstrbq_scatter_offset_p_u16:
1101 ; CHECK: @ %bb.0: @ %entry
1102 ; CHECK-NEXT: vmsr p0, r1
1104 ; CHECK-NEXT: vstrbt.16 q1, [r0, q0]
1107 %0 = zext i16 %p to i32
1108 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1109 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0, <8 x i1> %1)
1113 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1114 ; CHECK-LABEL: test_vstrbq_scatter_offset_p_u32:
1115 ; CHECK: @ %bb.0: @ %entry
1116 ; CHECK-NEXT: vmsr p0, r1
1118 ; CHECK-NEXT: vstrbt.32 q1, [r0, q0]
1121 %0 = zext i16 %p to i32
1122 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1123 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0, <4 x i1> %1)
1127 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_p_u8(ptr %base, <16 x i8> %offset, <16 x i8> %value, i16 zeroext %p) {
1128 ; CHECK-LABEL: test_vstrbq_scatter_offset_p_u8:
1129 ; CHECK: @ %bb.0: @ %entry
1130 ; CHECK-NEXT: vmsr p0, r1
1132 ; CHECK-NEXT: vstrbt.8 q1, [r0, q0]
1135 %0 = zext i16 %p to i32
1136 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
1137 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v16i8.v16i8.v16i1(ptr %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0, <16 x i1> %1)
1141 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value) {
1142 ; CHECK-LABEL: test_vstrbq_scatter_offset_s16:
1143 ; CHECK: @ %bb.0: @ %entry
1144 ; CHECK-NEXT: vstrb.16 q1, [r0, q0]
1147 call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0)
1151 declare void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr, <8 x i16>, <8 x i16>, i32, i32)
1153 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1154 ; CHECK-LABEL: test_vstrbq_scatter_offset_s32:
1155 ; CHECK: @ %bb.0: @ %entry
1156 ; CHECK-NEXT: vstrb.32 q1, [r0, q0]
1159 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0)
1163 declare void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr, <4 x i32>, <4 x i32>, i32, i32)
1165 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_s8(ptr %base, <16 x i8> %offset, <16 x i8> %value) {
1166 ; CHECK-LABEL: test_vstrbq_scatter_offset_s8:
1167 ; CHECK: @ %bb.0: @ %entry
1168 ; CHECK-NEXT: vstrb.8 q1, [r0, q0]
1171 call void @llvm.arm.mve.vstr.scatter.offset.p0.v16i8.v16i8(ptr %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0)
1175 declare void @llvm.arm.mve.vstr.scatter.offset.p0.v16i8.v16i8(ptr, <16 x i8>, <16 x i8>, i32, i32)
1177 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value) {
1178 ; CHECK-LABEL: test_vstrbq_scatter_offset_u16:
1179 ; CHECK: @ %bb.0: @ %entry
1180 ; CHECK-NEXT: vstrb.16 q1, [r0, q0]
1183 call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 8, i32 0)
1187 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1188 ; CHECK-LABEL: test_vstrbq_scatter_offset_u32:
1189 ; CHECK: @ %bb.0: @ %entry
1190 ; CHECK-NEXT: vstrb.32 q1, [r0, q0]
1193 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 8, i32 0)
1197 define arm_aapcs_vfpcc void @test_vstrbq_scatter_offset_u8(ptr %base, <16 x i8> %offset, <16 x i8> %value) {
1198 ; CHECK-LABEL: test_vstrbq_scatter_offset_u8:
1199 ; CHECK: @ %bb.0: @ %entry
1200 ; CHECK-NEXT: vstrb.8 q1, [r0, q0]
1203 call void @llvm.arm.mve.vstr.scatter.offset.p0.v16i8.v16i8(ptr %base, <16 x i8> %offset, <16 x i8> %value, i32 8, i32 0)
1207 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) {
1208 ; CHECK-LABEL: test_vstrdq_scatter_base_p_s64:
1209 ; CHECK: @ %bb.0: @ %entry
1210 ; CHECK-NEXT: vmsr p0, r0
1212 ; CHECK-NEXT: vstrdt.64 q1, [q0, #888]
1215 %0 = zext i16 %p to i32
1216 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
1217 call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64> %addr, i32 888, <2 x i64> %value, <2 x i1> %1)
1221 declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64>, i32, <2 x i64>, <2 x i1>)
1223 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_u64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) {
1224 ; CHECK-LABEL: test_vstrdq_scatter_base_p_u64:
1225 ; CHECK: @ %bb.0: @ %entry
1226 ; CHECK-NEXT: vmsr p0, r0
1228 ; CHECK-NEXT: vstrdt.64 q1, [q0, #264]
1231 %0 = zext i16 %p to i32
1232 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
1233 call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64> %addr, i32 264, <2 x i64> %value, <2 x i1> %1)
1237 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_s64(<2 x i64> %addr, <2 x i64> %value) {
1238 ; CHECK-LABEL: test_vstrdq_scatter_base_s64:
1239 ; CHECK: @ %bb.0: @ %entry
1240 ; CHECK-NEXT: vstrd.64 q1, [q0, #408]
1243 call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 408, <2 x i64> %value)
1247 declare void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64>, i32, <2 x i64>)
1249 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_u64(<2 x i64> %addr, <2 x i64> %value) {
1250 ; CHECK-LABEL: test_vstrdq_scatter_base_u64:
1251 ; CHECK: @ %bb.0: @ %entry
1252 ; CHECK-NEXT: vstrd.64 q1, [q0, #-472]
1255 call void @llvm.arm.mve.vstr.scatter.base.v2i64.v2i64(<2 x i64> %addr, i32 -472, <2 x i64> %value)
1259 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(ptr %addr, <2 x i64> %value, i16 zeroext %p) {
1260 ; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_s64:
1261 ; CHECK: @ %bb.0: @ %entry
1262 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1263 ; CHECK-NEXT: vmsr p0, r1
1265 ; CHECK-NEXT: vstrdt.64 q0, [q1, #248]!
1266 ; CHECK-NEXT: vstrw.32 q1, [r0]
1269 %0 = load <2 x i64>, ptr %addr, align 8
1270 %1 = zext i16 %p to i32
1271 %2 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %1)
1272 %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> %0, i32 248, <2 x i64> %value, <2 x i1> %2)
1273 store <2 x i64> %3, ptr %addr, align 8
1277 declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64>, i32, <2 x i64>, <2 x i1>)
1279 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_u64(ptr %addr, <2 x i64> %value, i16 zeroext %p) {
1280 ; CHECK-LABEL: test_vstrdq_scatter_base_wb_p_u64:
1281 ; CHECK: @ %bb.0: @ %entry
1282 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1283 ; CHECK-NEXT: vmsr p0, r1
1285 ; CHECK-NEXT: vstrdt.64 q0, [q1, #136]!
1286 ; CHECK-NEXT: vstrw.32 q1, [r0]
1289 %0 = load <2 x i64>, ptr %addr, align 8
1290 %1 = zext i16 %p to i32
1291 %2 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %1)
1292 %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> %0, i32 136, <2 x i64> %value, <2 x i1> %2)
1293 store <2 x i64> %3, ptr %addr, align 8
1297 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_s64(ptr %addr, <2 x i64> %value) {
1298 ; CHECK-LABEL: test_vstrdq_scatter_base_wb_s64:
1299 ; CHECK: @ %bb.0: @ %entry
1300 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1301 ; CHECK-NEXT: vstrd.64 q0, [q1, #208]!
1302 ; CHECK-NEXT: vstrw.32 q1, [r0]
1305 %0 = load <2 x i64>, ptr %addr, align 8
1306 %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 208, <2 x i64> %value)
1307 store <2 x i64> %1, ptr %addr, align 8
1311 declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64>, i32, <2 x i64>)
1313 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_u64(ptr %addr, <2 x i64> %value) {
1314 ; CHECK-LABEL: test_vstrdq_scatter_base_wb_u64:
1315 ; CHECK: @ %bb.0: @ %entry
1316 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1317 ; CHECK-NEXT: vstrd.64 q0, [q1, #-168]!
1318 ; CHECK-NEXT: vstrw.32 q1, [r0]
1321 %0 = load <2 x i64>, ptr %addr, align 8
1322 %1 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.v2i64.v2i64(<2 x i64> %0, i32 -168, <2 x i64> %value)
1323 store <2 x i64> %1, ptr %addr, align 8
1327 define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_s64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
1328 ; CHECK-LABEL: test_vstrdq_scatter_offset_p_s64:
1329 ; CHECK: @ %bb.0: @ %entry
1330 ; CHECK-NEXT: vmsr p0, r1
1332 ; CHECK-NEXT: vstrdt.64 q1, [r0, q0]
1335 %0 = zext i16 %p to i32
1336 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
1337 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <2 x i1> %1)
1341 declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr, <2 x i64>, <2 x i64>, i32, i32, <2 x i1>)
1343 define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_u64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
1344 ; CHECK-LABEL: test_vstrdq_scatter_offset_p_u64:
1345 ; CHECK: @ %bb.0: @ %entry
1346 ; CHECK-NEXT: vmsr p0, r1
1348 ; CHECK-NEXT: vstrdt.64 q1, [r0, q0]
1351 %0 = zext i16 %p to i32
1352 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
1353 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <2 x i1> %1)
1357 define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_s64(ptr %base, <2 x i64> %offset, <2 x i64> %value) {
1358 ; CHECK-LABEL: test_vstrdq_scatter_offset_s64:
1359 ; CHECK: @ %bb.0: @ %entry
1360 ; CHECK-NEXT: vstrd.64 q1, [r0, q0]
1363 call void @llvm.arm.mve.vstr.scatter.offset.p0.v2i64.v2i64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0)
1367 declare void @llvm.arm.mve.vstr.scatter.offset.p0.v2i64.v2i64(ptr, <2 x i64>, <2 x i64>, i32, i32)
1369 define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_u64(ptr %base, <2 x i64> %offset, <2 x i64> %value) {
1370 ; CHECK-LABEL: test_vstrdq_scatter_offset_u64:
1371 ; CHECK: @ %bb.0: @ %entry
1372 ; CHECK-NEXT: vstrd.64 q1, [r0, q0]
1375 call void @llvm.arm.mve.vstr.scatter.offset.p0.v2i64.v2i64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0)
1379 define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_s64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
1380 ; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_s64:
1381 ; CHECK: @ %bb.0: @ %entry
1382 ; CHECK-NEXT: vmsr p0, r1
1384 ; CHECK-NEXT: vstrdt.64 q1, [r0, q0, uxtw #3]
1387 %0 = zext i16 %p to i32
1388 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
1389 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <2 x i1> %1)
1393 define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_p_u64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
1394 ; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_p_u64:
1395 ; CHECK: @ %bb.0: @ %entry
1396 ; CHECK-NEXT: vmsr p0, r1
1398 ; CHECK-NEXT: vstrdt.64 q1, [r0, q0, uxtw #3]
1401 %0 = zext i16 %p to i32
1402 %1 = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 %0)
1403 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3, <2 x i1> %1)
1407 define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_s64(ptr %base, <2 x i64> %offset, <2 x i64> %value) {
1408 ; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_s64:
1409 ; CHECK: @ %bb.0: @ %entry
1410 ; CHECK-NEXT: vstrd.64 q1, [r0, q0, uxtw #3]
1413 call void @llvm.arm.mve.vstr.scatter.offset.p0.v2i64.v2i64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3)
1417 define arm_aapcs_vfpcc void @test_vstrdq_scatter_shifted_offset_u64(ptr %base, <2 x i64> %offset, <2 x i64> %value) {
1418 ; CHECK-LABEL: test_vstrdq_scatter_shifted_offset_u64:
1419 ; CHECK: @ %bb.0: @ %entry
1420 ; CHECK-NEXT: vstrd.64 q1, [r0, q0, uxtw #3]
1423 call void @llvm.arm.mve.vstr.scatter.offset.p0.v2i64.v2i64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 3)
1427 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_f16(ptr %base, <8 x i16> %offset, <8 x half> %value) {
1428 ; CHECK-LABEL: test_vstrhq_scatter_offset_f16:
1429 ; CHECK: @ %bb.0: @ %entry
1430 ; CHECK-NEXT: vstrh.16 q1, [r0, q0]
1433 call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8f16(ptr %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0)
1437 declare void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8f16(ptr, <8 x i16>, <8 x half>, i32, i32)
1439 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_f16(ptr %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) {
1440 ; CHECK-LABEL: test_vstrhq_scatter_offset_p_f16:
1441 ; CHECK: @ %bb.0: @ %entry
1442 ; CHECK-NEXT: vmsr p0, r1
1444 ; CHECK-NEXT: vstrht.16 q1, [r0, q0]
1447 %0 = zext i16 %p to i32
1448 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1449 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8f16.v8i1(ptr %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 0, <8 x i1> %1)
1453 declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8f16.v8i1(ptr, <8 x i16>, <8 x half>, i32, i32, <8 x i1>)
1455 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1456 ; CHECK-LABEL: test_vstrhq_scatter_offset_p_s16:
1457 ; CHECK: @ %bb.0: @ %entry
1458 ; CHECK-NEXT: vmsr p0, r1
1460 ; CHECK-NEXT: vstrht.16 q1, [r0, q0]
1463 %0 = zext i16 %p to i32
1464 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1465 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1)
1470 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1471 ; CHECK-LABEL: test_vstrhq_scatter_offset_p_s32:
1472 ; CHECK: @ %bb.0: @ %entry
1473 ; CHECK-NEXT: vmsr p0, r1
1475 ; CHECK-NEXT: vstrht.32 q1, [r0, q0]
1478 %0 = zext i16 %p to i32
1479 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1480 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1)
1485 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1486 ; CHECK-LABEL: test_vstrhq_scatter_offset_p_u16:
1487 ; CHECK: @ %bb.0: @ %entry
1488 ; CHECK-NEXT: vmsr p0, r1
1490 ; CHECK-NEXT: vstrht.16 q1, [r0, q0]
1493 %0 = zext i16 %p to i32
1494 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1495 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0, <8 x i1> %1)
1499 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_p_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1500 ; CHECK-LABEL: test_vstrhq_scatter_offset_p_u32:
1501 ; CHECK: @ %bb.0: @ %entry
1502 ; CHECK-NEXT: vmsr p0, r1
1504 ; CHECK-NEXT: vstrht.32 q1, [r0, q0]
1507 %0 = zext i16 %p to i32
1508 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1509 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0, <4 x i1> %1)
1513 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value) {
1514 ; CHECK-LABEL: test_vstrhq_scatter_offset_s16:
1515 ; CHECK: @ %bb.0: @ %entry
1516 ; CHECK-NEXT: vstrh.16 q1, [r0, q0]
1519 call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0)
1524 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1525 ; CHECK-LABEL: test_vstrhq_scatter_offset_s32:
1526 ; CHECK: @ %bb.0: @ %entry
1527 ; CHECK-NEXT: vstrh.32 q1, [r0, q0]
1530 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0)
1535 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value) {
1536 ; CHECK-LABEL: test_vstrhq_scatter_offset_u16:
1537 ; CHECK: @ %bb.0: @ %entry
1538 ; CHECK-NEXT: vstrh.16 q1, [r0, q0]
1541 call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 0)
1545 define arm_aapcs_vfpcc void @test_vstrhq_scatter_offset_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1546 ; CHECK-LABEL: test_vstrhq_scatter_offset_u32:
1547 ; CHECK: @ %bb.0: @ %entry
1548 ; CHECK-NEXT: vstrh.32 q1, [r0, q0]
1551 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 0)
1555 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_f16(ptr %base, <8 x i16> %offset, <8 x half> %value) {
1556 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_f16:
1557 ; CHECK: @ %bb.0: @ %entry
1558 ; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1]
1561 call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8f16(ptr %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1)
1565 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_f16(ptr %base, <8 x i16> %offset, <8 x half> %value, i16 zeroext %p) {
1566 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_f16:
1567 ; CHECK: @ %bb.0: @ %entry
1568 ; CHECK-NEXT: vmsr p0, r1
1570 ; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1]
1573 %0 = zext i16 %p to i32
1574 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1575 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8f16.v8i1(ptr %base, <8 x i16> %offset, <8 x half> %value, i32 16, i32 1, <8 x i1> %1)
1579 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1580 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s16:
1581 ; CHECK: @ %bb.0: @ %entry
1582 ; CHECK-NEXT: vmsr p0, r1
1584 ; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1]
1587 %0 = zext i16 %p to i32
1588 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1589 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1)
1593 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1594 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_s32:
1595 ; CHECK: @ %bb.0: @ %entry
1596 ; CHECK-NEXT: vmsr p0, r1
1598 ; CHECK-NEXT: vstrht.32 q1, [r0, q0, uxtw #1]
1601 %0 = zext i16 %p to i32
1602 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1603 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1)
1607 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i16 zeroext %p) {
1608 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u16:
1609 ; CHECK: @ %bb.0: @ %entry
1610 ; CHECK-NEXT: vmsr p0, r1
1612 ; CHECK-NEXT: vstrht.16 q1, [r0, q0, uxtw #1]
1615 %0 = zext i16 %p to i32
1616 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
1617 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v8i16.v8i16.v8i1(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1, <8 x i1> %1)
1621 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_p_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1622 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_p_u32:
1623 ; CHECK: @ %bb.0: @ %entry
1624 ; CHECK-NEXT: vmsr p0, r1
1626 ; CHECK-NEXT: vstrht.32 q1, [r0, q0, uxtw #1]
1629 %0 = zext i16 %p to i32
1630 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1631 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1, <4 x i1> %1)
1635 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s16(ptr %base, <8 x i16> %offset, <8 x i16> %value) {
1636 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s16:
1637 ; CHECK: @ %bb.0: @ %entry
1638 ; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1]
1641 call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1)
1645 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1646 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_s32:
1647 ; CHECK: @ %bb.0: @ %entry
1648 ; CHECK-NEXT: vstrh.32 q1, [r0, q0, uxtw #1]
1651 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1)
1655 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u16(ptr %base, <8 x i16> %offset, <8 x i16> %value) {
1656 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u16:
1657 ; CHECK: @ %bb.0: @ %entry
1658 ; CHECK-NEXT: vstrh.16 q1, [r0, q0, uxtw #1]
1661 call void @llvm.arm.mve.vstr.scatter.offset.p0.v8i16.v8i16(ptr %base, <8 x i16> %offset, <8 x i16> %value, i32 16, i32 1)
1665 define arm_aapcs_vfpcc void @test_vstrhq_scatter_shifted_offset_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1666 ; CHECK-LABEL: test_vstrhq_scatter_shifted_offset_u32:
1667 ; CHECK: @ %bb.0: @ %entry
1668 ; CHECK-NEXT: vstrh.32 q1, [r0, q0, uxtw #1]
1671 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 16, i32 1)
1675 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_f32(<4 x i32> %addr, <4 x float> %value) {
1676 ; CHECK-LABEL: test_vstrwq_scatter_base_f32:
1677 ; CHECK: @ %bb.0: @ %entry
1678 ; CHECK-NEXT: vstrw.32 q1, [q0, #380]
1681 call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32> %addr, i32 380, <4 x float> %value)
1685 declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4f32(<4 x i32>, i32, <4 x float>)
1687 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_f32(<4 x i32> %addr, <4 x float> %value, i16 zeroext %p) {
1688 ; CHECK-LABEL: test_vstrwq_scatter_base_p_f32:
1689 ; CHECK: @ %bb.0: @ %entry
1690 ; CHECK-NEXT: vmsr p0, r0
1692 ; CHECK-NEXT: vstrwt.32 q1, [q0, #-400]
1695 %0 = zext i16 %p to i32
1696 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1697 call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32> %addr, i32 -400, <4 x float> %value, <4 x i1> %1)
1701 declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>)
1703 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_s32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) {
1704 ; CHECK-LABEL: test_vstrwq_scatter_base_p_s32:
1705 ; CHECK: @ %bb.0: @ %entry
1706 ; CHECK-NEXT: vmsr p0, r0
1708 ; CHECK-NEXT: vstrwt.32 q1, [q0, #48]
1711 %0 = zext i16 %p to i32
1712 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1713 call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 48, <4 x i32> %value, <4 x i1> %1)
1717 declare void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>)
1719 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_p_u32(<4 x i32> %addr, <4 x i32> %value, i16 zeroext %p) {
1720 ; CHECK-LABEL: test_vstrwq_scatter_base_p_u32:
1721 ; CHECK: @ %bb.0: @ %entry
1722 ; CHECK-NEXT: vmsr p0, r0
1724 ; CHECK-NEXT: vstrwt.32 q1, [q0, #-376]
1727 %0 = zext i16 %p to i32
1728 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1729 call void @llvm.arm.mve.vstr.scatter.base.predicated.v4i32.v4i32.v4i1(<4 x i32> %addr, i32 -376, <4 x i32> %value, <4 x i1> %1)
1733 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_s32(<4 x i32> %addr, <4 x i32> %value) {
1734 ; CHECK-LABEL: test_vstrwq_scatter_base_s32:
1735 ; CHECK: @ %bb.0: @ %entry
1736 ; CHECK-NEXT: vstrw.32 q1, [q0, #156]
1739 call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 156, <4 x i32> %value)
1743 declare void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32>, i32, <4 x i32>)
1745 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_u32(<4 x i32> %addr, <4 x i32> %value) {
1746 ; CHECK-LABEL: test_vstrwq_scatter_base_u32:
1747 ; CHECK: @ %bb.0: @ %entry
1748 ; CHECK-NEXT: vstrw.32 q1, [q0, #212]
1751 call void @llvm.arm.mve.vstr.scatter.base.v4i32.v4i32(<4 x i32> %addr, i32 212, <4 x i32> %value)
1755 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_f32(ptr %addr, <4 x float> %value) {
1756 ; CHECK-LABEL: test_vstrwq_scatter_base_wb_f32:
1757 ; CHECK: @ %bb.0: @ %entry
1758 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1759 ; CHECK-NEXT: vstrw.32 q0, [q1, #-412]!
1760 ; CHECK-NEXT: vstrw.32 q1, [r0]
1763 %0 = load <4 x i32>, ptr %addr, align 8
1764 %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32> %0, i32 -412, <4 x float> %value)
1765 store <4 x i32> %1, ptr %addr, align 8
1769 declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4f32(<4 x i32>, i32, <4 x float>)
1771 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_f32(ptr %addr, <4 x float> %value, i16 zeroext %p) {
1772 ; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_f32:
1773 ; CHECK: @ %bb.0: @ %entry
1774 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1775 ; CHECK-NEXT: vmsr p0, r1
1777 ; CHECK-NEXT: vstrwt.32 q0, [q1, #236]!
1778 ; CHECK-NEXT: vstrw.32 q1, [r0]
1781 %0 = load <4 x i32>, ptr %addr, align 8
1782 %1 = zext i16 %p to i32
1783 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1784 %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32> %0, i32 236, <4 x float> %value, <4 x i1> %2)
1785 store <4 x i32> %3, ptr %addr, align 8
1789 declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4f32.v4i1(<4 x i32>, i32, <4 x float>, <4 x i1>)
1791 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_s32(ptr %addr, <4 x i32> %value, i16 zeroext %p) {
1792 ; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_s32:
1793 ; CHECK: @ %bb.0: @ %entry
1794 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1795 ; CHECK-NEXT: vmsr p0, r1
1797 ; CHECK-NEXT: vstrwt.32 q0, [q1, #328]!
1798 ; CHECK-NEXT: vstrw.32 q1, [r0]
1801 %0 = load <4 x i32>, ptr %addr, align 8
1802 %1 = zext i16 %p to i32
1803 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1804 %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 328, <4 x i32> %value, <4 x i1> %2)
1805 store <4 x i32> %3, ptr %addr, align 8
1809 declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32>, i32, <4 x i32>, <4 x i1>)
1811 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_p_u32(ptr %addr, <4 x i32> %value, i16 zeroext %p) {
1812 ; CHECK-LABEL: test_vstrwq_scatter_base_wb_p_u32:
1813 ; CHECK: @ %bb.0: @ %entry
1814 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1815 ; CHECK-NEXT: vmsr p0, r1
1817 ; CHECK-NEXT: vstrwt.32 q0, [q1, #412]!
1818 ; CHECK-NEXT: vstrw.32 q1, [r0]
1821 %0 = load <4 x i32>, ptr %addr, align 8
1822 %1 = zext i16 %p to i32
1823 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
1824 %3 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v4i32.v4i32.v4i1(<4 x i32> %0, i32 412, <4 x i32> %value, <4 x i1> %2)
1825 store <4 x i32> %3, ptr %addr, align 8
1829 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_s32(ptr %addr, <4 x i32> %value) {
1830 ; CHECK-LABEL: test_vstrwq_scatter_base_wb_s32:
1831 ; CHECK: @ %bb.0: @ %entry
1832 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1833 ; CHECK-NEXT: vstrw.32 q0, [q1, #-152]!
1834 ; CHECK-NEXT: vstrw.32 q1, [r0]
1837 %0 = load <4 x i32>, ptr %addr, align 8
1838 %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 -152, <4 x i32> %value)
1839 store <4 x i32> %1, ptr %addr, align 8
1843 declare <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32>, i32, <4 x i32>)
1845 define arm_aapcs_vfpcc void @test_vstrwq_scatter_base_wb_u32(ptr %addr, <4 x i32> %value) {
1846 ; CHECK-LABEL: test_vstrwq_scatter_base_wb_u32:
1847 ; CHECK: @ %bb.0: @ %entry
1848 ; CHECK-NEXT: vldrw.u32 q1, [r0]
1849 ; CHECK-NEXT: vstrw.32 q0, [q1, #64]!
1850 ; CHECK-NEXT: vstrw.32 q1, [r0]
1853 %0 = load <4 x i32>, ptr %addr, align 8
1854 %1 = call <4 x i32> @llvm.arm.mve.vstr.scatter.base.wb.v4i32.v4i32(<4 x i32> %0, i32 64, <4 x i32> %value)
1855 store <4 x i32> %1, ptr %addr, align 8
1859 define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_f32(ptr %base, <4 x i32> %offset, <4 x float> %value) {
1860 ; CHECK-LABEL: test_vstrwq_scatter_offset_f32:
1861 ; CHECK: @ %bb.0: @ %entry
1862 ; CHECK-NEXT: vstrw.32 q1, [r0, q0]
1865 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4f32(ptr %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0)
1869 declare void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4f32(ptr, <4 x i32>, <4 x float>, i32, i32)
1871 define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_f32(ptr %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) {
1872 ; CHECK-LABEL: test_vstrwq_scatter_offset_p_f32:
1873 ; CHECK: @ %bb.0: @ %entry
1874 ; CHECK-NEXT: vmsr p0, r1
1876 ; CHECK-NEXT: vstrwt.32 q1, [r0, q0]
1879 %0 = zext i16 %p to i32
1880 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1881 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4f32.v4i1(ptr %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 0, <4 x i1> %1)
1885 declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4f32.v4i1(ptr, <4 x i32>, <4 x float>, i32, i32, <4 x i1>)
1887 define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1888 ; CHECK-LABEL: test_vstrwq_scatter_offset_p_s32:
1889 ; CHECK: @ %bb.0: @ %entry
1890 ; CHECK-NEXT: vmsr p0, r1
1892 ; CHECK-NEXT: vstrwt.32 q1, [r0, q0]
1895 %0 = zext i16 %p to i32
1896 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1897 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1)
1902 define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_p_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1903 ; CHECK-LABEL: test_vstrwq_scatter_offset_p_u32:
1904 ; CHECK: @ %bb.0: @ %entry
1905 ; CHECK-NEXT: vmsr p0, r1
1907 ; CHECK-NEXT: vstrwt.32 q1, [r0, q0]
1910 %0 = zext i16 %p to i32
1911 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1912 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0, <4 x i1> %1)
1916 define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1917 ; CHECK-LABEL: test_vstrwq_scatter_offset_s32:
1918 ; CHECK: @ %bb.0: @ %entry
1919 ; CHECK-NEXT: vstrw.32 q1, [r0, q0]
1922 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0)
1927 define arm_aapcs_vfpcc void @test_vstrwq_scatter_offset_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1928 ; CHECK-LABEL: test_vstrwq_scatter_offset_u32:
1929 ; CHECK: @ %bb.0: @ %entry
1930 ; CHECK-NEXT: vstrw.32 q1, [r0, q0]
1933 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 0)
1937 define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_f32(ptr %base, <4 x i32> %offset, <4 x float> %value) {
1938 ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_f32:
1939 ; CHECK: @ %bb.0: @ %entry
1940 ; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2]
1943 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4f32(ptr %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2)
1947 define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_f32(ptr %base, <4 x i32> %offset, <4 x float> %value, i16 zeroext %p) {
1948 ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_f32:
1949 ; CHECK: @ %bb.0: @ %entry
1950 ; CHECK-NEXT: vmsr p0, r1
1952 ; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2]
1955 %0 = zext i16 %p to i32
1956 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1957 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4f32.v4i1(ptr %base, <4 x i32> %offset, <4 x float> %value, i32 32, i32 2, <4 x i1> %1)
1961 define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1962 ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_s32:
1963 ; CHECK: @ %bb.0: @ %entry
1964 ; CHECK-NEXT: vmsr p0, r1
1966 ; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2]
1969 %0 = zext i16 %p to i32
1970 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1971 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1)
1975 define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_p_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i16 zeroext %p) {
1976 ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_p_u32:
1977 ; CHECK: @ %bb.0: @ %entry
1978 ; CHECK-NEXT: vmsr p0, r1
1980 ; CHECK-NEXT: vstrwt.32 q1, [r0, q0, uxtw #2]
1983 %0 = zext i16 %p to i32
1984 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
1985 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v4i32.v4i32.v4i1(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2, <4 x i1> %1)
1989 define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_s32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
1990 ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_s32:
1991 ; CHECK: @ %bb.0: @ %entry
1992 ; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2]
1995 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2)
1999 define arm_aapcs_vfpcc void @test_vstrwq_scatter_shifted_offset_u32(ptr %base, <4 x i32> %offset, <4 x i32> %value) {
2000 ; CHECK-LABEL: test_vstrwq_scatter_shifted_offset_u32:
2001 ; CHECK: @ %bb.0: @ %entry
2002 ; CHECK-NEXT: vstrw.32 q1, [r0, q0, uxtw #2]
2005 call void @llvm.arm.mve.vstr.scatter.offset.p0.v4i32.v4i32(ptr %base, <4 x i32> %offset, <4 x i32> %value, i32 32, i32 2)