1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
4 define arm_aapcs_vfpcc i32 @test_vaddvq_s8(<16 x i8> %a) {
5 ; CHECK-LABEL: test_vaddvq_s8:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vaddv.s8 r0, q0
10 %0 = tail call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> %a, i32 0)
14 define arm_aapcs_vfpcc i32 @test_vaddvq_s16(<8 x i16> %a) {
15 ; CHECK-LABEL: test_vaddvq_s16:
16 ; CHECK: @ %bb.0: @ %entry
17 ; CHECK-NEXT: vaddv.s16 r0, q0
20 %0 = tail call i32 @llvm.arm.mve.addv.v8i16(<8 x i16> %a, i32 0)
24 define arm_aapcs_vfpcc i32 @test_vaddvq_s32(<4 x i32> %a) {
25 ; CHECK-LABEL: test_vaddvq_s32:
26 ; CHECK: @ %bb.0: @ %entry
27 ; CHECK-NEXT: vaddv.s32 r0, q0
30 %0 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %a, i32 0)
34 define arm_aapcs_vfpcc i32 @test_vaddvq_u8(<16 x i8> %a) {
35 ; CHECK-LABEL: test_vaddvq_u8:
36 ; CHECK: @ %bb.0: @ %entry
37 ; CHECK-NEXT: vaddv.u8 r0, q0
40 %0 = tail call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> %a, i32 1)
44 define arm_aapcs_vfpcc i32 @test_vaddvq_u16(<8 x i16> %a) {
45 ; CHECK-LABEL: test_vaddvq_u16:
46 ; CHECK: @ %bb.0: @ %entry
47 ; CHECK-NEXT: vaddv.u16 r0, q0
50 %0 = tail call i32 @llvm.arm.mve.addv.v8i16(<8 x i16> %a, i32 1)
54 define arm_aapcs_vfpcc i32 @test_vaddvq_u32(<4 x i32> %a) {
55 ; CHECK-LABEL: test_vaddvq_u32:
56 ; CHECK: @ %bb.0: @ %entry
57 ; CHECK-NEXT: vaddv.u32 r0, q0
60 %0 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %a, i32 1)
64 define arm_aapcs_vfpcc i32 @test_vaddvaq_s8(i32 %a, <16 x i8> %b) {
65 ; CHECK-LABEL: test_vaddvaq_s8:
66 ; CHECK: @ %bb.0: @ %entry
67 ; CHECK-NEXT: vaddva.s8 r0, q0
70 %0 = tail call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> %b, i32 0)
75 define arm_aapcs_vfpcc i32 @test_vaddvaq_s16(i32 %a, <8 x i16> %b) {
76 ; CHECK-LABEL: test_vaddvaq_s16:
77 ; CHECK: @ %bb.0: @ %entry
78 ; CHECK-NEXT: vaddva.s16 r0, q0
81 %0 = tail call i32 @llvm.arm.mve.addv.v8i16(<8 x i16> %b, i32 0)
86 define arm_aapcs_vfpcc i32 @test_vaddvaq_s32(i32 %a, <4 x i32> %b) {
87 ; CHECK-LABEL: test_vaddvaq_s32:
88 ; CHECK: @ %bb.0: @ %entry
89 ; CHECK-NEXT: vaddva.s32 r0, q0
92 %0 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %b, i32 0)
97 define arm_aapcs_vfpcc i32 @test_vaddvaq_u8(i32 %a, <16 x i8> %b) {
98 ; CHECK-LABEL: test_vaddvaq_u8:
99 ; CHECK: @ %bb.0: @ %entry
100 ; CHECK-NEXT: vaddva.u8 r0, q0
103 %0 = tail call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> %b, i32 1)
108 define arm_aapcs_vfpcc i32 @test_vaddvaq_u16(i32 %a, <8 x i16> %b) {
109 ; CHECK-LABEL: test_vaddvaq_u16:
110 ; CHECK: @ %bb.0: @ %entry
111 ; CHECK-NEXT: vaddva.u16 r0, q0
114 %0 = tail call i32 @llvm.arm.mve.addv.v8i16(<8 x i16> %b, i32 1)
119 define arm_aapcs_vfpcc i32 @test_vaddvaq_u32(i32 %a, <4 x i32> %b) {
120 ; CHECK-LABEL: test_vaddvaq_u32:
121 ; CHECK: @ %bb.0: @ %entry
122 ; CHECK-NEXT: vaddva.u32 r0, q0
125 %0 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %b, i32 1)
130 define arm_aapcs_vfpcc i32 @test_vaddvq_p_s8(<16 x i8> %a, i16 zeroext %p) {
131 ; CHECK-LABEL: test_vaddvq_p_s8:
132 ; CHECK: @ %bb.0: @ %entry
133 ; CHECK-NEXT: vmsr p0, r0
135 ; CHECK-NEXT: vaddvt.s8 r0, q0
138 %0 = zext i16 %p to i32
139 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
140 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v16i8.v16i1(<16 x i8> %a, i32 0, <16 x i1> %1)
144 define arm_aapcs_vfpcc i32 @test_vaddvq_p_s16(<8 x i16> %a, i16 zeroext %p) {
145 ; CHECK-LABEL: test_vaddvq_p_s16:
146 ; CHECK: @ %bb.0: @ %entry
147 ; CHECK-NEXT: vmsr p0, r0
149 ; CHECK-NEXT: vaddvt.s16 r0, q0
152 %0 = zext i16 %p to i32
153 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
154 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %a, i32 0, <8 x i1> %1)
158 define arm_aapcs_vfpcc i32 @test_vaddvq_p_s32(<4 x i32> %a, i16 zeroext %p) {
159 ; CHECK-LABEL: test_vaddvq_p_s32:
160 ; CHECK: @ %bb.0: @ %entry
161 ; CHECK-NEXT: vmsr p0, r0
163 ; CHECK-NEXT: vaddvt.s32 r0, q0
166 %0 = zext i16 %p to i32
167 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
168 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1)
172 define arm_aapcs_vfpcc i32 @test_vaddvq_p_u8(<16 x i8> %a, i16 zeroext %p) {
173 ; CHECK-LABEL: test_vaddvq_p_u8:
174 ; CHECK: @ %bb.0: @ %entry
175 ; CHECK-NEXT: vmsr p0, r0
177 ; CHECK-NEXT: vaddvt.u8 r0, q0
180 %0 = zext i16 %p to i32
181 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
182 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v16i8.v16i1(<16 x i8> %a, i32 1, <16 x i1> %1)
186 define arm_aapcs_vfpcc i32 @test_vaddvq_p_u16(<8 x i16> %a, i16 zeroext %p) {
187 ; CHECK-LABEL: test_vaddvq_p_u16:
188 ; CHECK: @ %bb.0: @ %entry
189 ; CHECK-NEXT: vmsr p0, r0
191 ; CHECK-NEXT: vaddvt.u16 r0, q0
194 %0 = zext i16 %p to i32
195 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
196 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %a, i32 1, <8 x i1> %1)
200 define arm_aapcs_vfpcc i32 @test_vaddvq_p_u32(<4 x i32> %a, i16 zeroext %p) {
201 ; CHECK-LABEL: test_vaddvq_p_u32:
202 ; CHECK: @ %bb.0: @ %entry
203 ; CHECK-NEXT: vmsr p0, r0
205 ; CHECK-NEXT: vaddvt.u32 r0, q0
208 %0 = zext i16 %p to i32
209 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
210 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %a, i32 1, <4 x i1> %1)
214 define arm_aapcs_vfpcc i32 @test_vaddvaq_p_s8(i32 %a, <16 x i8> %b, i16 zeroext %p) {
215 ; CHECK-LABEL: test_vaddvaq_p_s8:
216 ; CHECK: @ %bb.0: @ %entry
217 ; CHECK-NEXT: vmsr p0, r1
219 ; CHECK-NEXT: vaddvat.s8 r0, q0
222 %0 = zext i16 %p to i32
223 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
224 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v16i8.v16i1(<16 x i8> %b, i32 0, <16 x i1> %1)
229 define arm_aapcs_vfpcc i32 @test_vaddvaq_p_s16(i32 %a, <8 x i16> %b, i16 zeroext %p) {
230 ; CHECK-LABEL: test_vaddvaq_p_s16:
231 ; CHECK: @ %bb.0: @ %entry
232 ; CHECK-NEXT: vmsr p0, r1
234 ; CHECK-NEXT: vaddvat.s16 r0, q0
237 %0 = zext i16 %p to i32
238 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
239 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %b, i32 0, <8 x i1> %1)
244 define arm_aapcs_vfpcc i32 @test_vaddvaq_p_s32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
245 ; CHECK-LABEL: test_vaddvaq_p_s32:
246 ; CHECK: @ %bb.0: @ %entry
247 ; CHECK-NEXT: vmsr p0, r1
249 ; CHECK-NEXT: vaddvat.s32 r0, q0
252 %0 = zext i16 %p to i32
253 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
254 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %b, i32 0, <4 x i1> %1)
259 define arm_aapcs_vfpcc i32 @test_vaddvaq_p_u8(i32 %a, <16 x i8> %b, i16 zeroext %p) {
260 ; CHECK-LABEL: test_vaddvaq_p_u8:
261 ; CHECK: @ %bb.0: @ %entry
262 ; CHECK-NEXT: vmsr p0, r1
264 ; CHECK-NEXT: vaddvat.u8 r0, q0
267 %0 = zext i16 %p to i32
268 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
269 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v16i8.v16i1(<16 x i8> %b, i32 1, <16 x i1> %1)
274 define arm_aapcs_vfpcc i32 @test_vaddvaq_p_u16(i32 %a, <8 x i16> %b, i16 zeroext %p) {
275 ; CHECK-LABEL: test_vaddvaq_p_u16:
276 ; CHECK: @ %bb.0: @ %entry
277 ; CHECK-NEXT: vmsr p0, r1
279 ; CHECK-NEXT: vaddvat.u16 r0, q0
282 %0 = zext i16 %p to i32
283 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
284 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16> %b, i32 1, <8 x i1> %1)
289 define arm_aapcs_vfpcc i32 @test_vaddvaq_p_u32(i32 %a, <4 x i32> %b, i16 zeroext %p) {
290 ; CHECK-LABEL: test_vaddvaq_p_u32:
291 ; CHECK: @ %bb.0: @ %entry
292 ; CHECK-NEXT: vmsr p0, r1
294 ; CHECK-NEXT: vaddvat.u32 r0, q0
297 %0 = zext i16 %p to i32
298 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
299 %2 = tail call i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32> %b, i32 1, <4 x i1> %1)
304 define arm_aapcs_vfpcc i64 @test_vaddlvq_s32(<4 x i32> %a) {
305 ; CHECK-LABEL: test_vaddlvq_s32:
306 ; CHECK: @ %bb.0: @ %entry
307 ; CHECK-NEXT: vaddlv.s32 r0, r1, q0
310 %0 = tail call i64 @llvm.arm.mve.addlv.v4i32(<4 x i32> %a, i32 0)
314 define arm_aapcs_vfpcc i64 @test_vaddlvq_u32(<4 x i32> %a) {
315 ; CHECK-LABEL: test_vaddlvq_u32:
316 ; CHECK: @ %bb.0: @ %entry
317 ; CHECK-NEXT: vaddlv.u32 r0, r1, q0
320 %0 = tail call i64 @llvm.arm.mve.addlv.v4i32(<4 x i32> %a, i32 1)
324 define arm_aapcs_vfpcc i64 @test_vaddlvaq_s32(i64 %a, <4 x i32> %b) {
325 ; CHECK-LABEL: test_vaddlvaq_s32:
326 ; CHECK: @ %bb.0: @ %entry
327 ; CHECK-NEXT: vaddlva.s32 r0, r1, q0
330 %0 = tail call i64 @llvm.arm.mve.addlv.v4i32(<4 x i32> %b, i32 0)
335 define arm_aapcs_vfpcc i64 @test_vaddlvaq_u32(i64 %a, <4 x i32> %b) {
336 ; CHECK-LABEL: test_vaddlvaq_u32:
337 ; CHECK: @ %bb.0: @ %entry
338 ; CHECK-NEXT: vaddlva.u32 r0, r1, q0
341 %0 = tail call i64 @llvm.arm.mve.addlv.v4i32(<4 x i32> %b, i32 1)
346 define arm_aapcs_vfpcc i64 @test_vaddlvq_p_s32(<4 x i32> %a, i16 zeroext %p) {
347 ; CHECK-LABEL: test_vaddlvq_p_s32:
348 ; CHECK: @ %bb.0: @ %entry
349 ; CHECK-NEXT: vmsr p0, r0
351 ; CHECK-NEXT: vaddlvt.s32 r0, r1, q0
354 %0 = zext i16 %p to i32
355 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
356 %2 = tail call i64 @llvm.arm.mve.addlv.predicated.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1)
360 define arm_aapcs_vfpcc i64 @test_vaddlvq_p_u32(<4 x i32> %a, i16 zeroext %p) {
361 ; CHECK-LABEL: test_vaddlvq_p_u32:
362 ; CHECK: @ %bb.0: @ %entry
363 ; CHECK-NEXT: vmsr p0, r0
365 ; CHECK-NEXT: vaddlvt.u32 r0, r1, q0
368 %0 = zext i16 %p to i32
369 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
370 %2 = tail call i64 @llvm.arm.mve.addlv.predicated.v4i32.v4i1(<4 x i32> %a, i32 1, <4 x i1> %1)
374 define arm_aapcs_vfpcc i64 @test_vaddlvaq_p_s32(i64 %a, <4 x i32> %b, i16 zeroext %p) {
375 ; CHECK-LABEL: test_vaddlvaq_p_s32:
376 ; CHECK: @ %bb.0: @ %entry
377 ; CHECK-NEXT: vmsr p0, r2
379 ; CHECK-NEXT: vaddlvat.s32 r0, r1, q0
382 %0 = zext i16 %p to i32
383 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
384 %2 = tail call i64 @llvm.arm.mve.addlv.predicated.v4i32.v4i1(<4 x i32> %b, i32 0, <4 x i1> %1)
389 define arm_aapcs_vfpcc i64 @test_vaddlvaq_p_u32(i64 %a, <4 x i32> %b, i16 zeroext %p) {
390 ; CHECK-LABEL: test_vaddlvaq_p_u32:
391 ; CHECK: @ %bb.0: @ %entry
392 ; CHECK-NEXT: vmsr p0, r2
394 ; CHECK-NEXT: vaddlvat.u32 r0, r1, q0
397 %0 = zext i16 %p to i32
398 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
399 %2 = tail call i64 @llvm.arm.mve.addlv.predicated.v4i32.v4i1(<4 x i32> %b, i32 1, <4 x i1> %1)
404 declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
405 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
406 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
408 declare i32 @llvm.arm.mve.addv.v16i8(<16 x i8>, i32)
409 declare i32 @llvm.arm.mve.addv.v8i16(<8 x i16>, i32)
410 declare i32 @llvm.arm.mve.addv.v4i32(<4 x i32>, i32)
411 declare i64 @llvm.arm.mve.addlv.v4i32(<4 x i32>, i32)
413 declare i32 @llvm.arm.mve.addv.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>)
414 declare i32 @llvm.arm.mve.addv.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>)
415 declare i32 @llvm.arm.mve.addv.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)
416 declare i64 @llvm.arm.mve.addlv.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>)