1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
4 declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
5 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
6 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
8 declare <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32, i32, <16 x i8>, <16 x i8>)
9 declare <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32, i32, <4 x i32>, <4 x i32>)
10 declare <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32, i32, <8 x i16>, <8 x i16>)
11 declare <8 x half> @llvm.arm.mve.vcaddq.v8f16(i32, i32, <8 x half>, <8 x half>)
12 declare <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32, i32, <4 x float>, <4 x float>)
14 declare <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32, i32, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i1>)
15 declare <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32, i32, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i1>)
16 declare <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32, i32, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i1>)
17 declare <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32, i32, <8 x half>, <8 x half>, <8 x half>, <8 x i1>)
18 declare <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32, i32, <4 x float>, <4 x float>, <4 x float>, <4 x i1>)
20 define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_u8(<16 x i8> %a, <16 x i8> %b) {
21 ; CHECK-LABEL: test_vcaddq_rot90_u8:
22 ; CHECK: @ %bb.0: @ %entry
23 ; CHECK-NEXT: vcadd.i8 q0, q0, q1, #90
26 %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 1, i32 0, <16 x i8> %a, <16 x i8> %b)
30 define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_u16(<8 x i16> %a, <8 x i16> %b) {
31 ; CHECK-LABEL: test_vcaddq_rot90_u16:
32 ; CHECK: @ %bb.0: @ %entry
33 ; CHECK-NEXT: vcadd.i16 q0, q0, q1, #90
36 %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 1, i32 0, <8 x i16> %a, <8 x i16> %b)
40 define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_u32(<4 x i32> %a, <4 x i32> %b) {
41 ; CHECK-LABEL: test_vcaddq_rot90_u32:
42 ; CHECK: @ %bb.0: @ %entry
43 ; CHECK-NEXT: vcadd.i32 q2, q0, q1, #90
44 ; CHECK-NEXT: vmov q0, q2
47 %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 1, i32 0, <4 x i32> %a, <4 x i32> %b)
51 define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_s8(<16 x i8> %a, <16 x i8> %b) {
52 ; CHECK-LABEL: test_vcaddq_rot90_s8:
53 ; CHECK: @ %bb.0: @ %entry
54 ; CHECK-NEXT: vcadd.i8 q0, q0, q1, #90
57 %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 1, i32 0, <16 x i8> %a, <16 x i8> %b)
61 define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_s16(<8 x i16> %a, <8 x i16> %b) {
62 ; CHECK-LABEL: test_vcaddq_rot90_s16:
63 ; CHECK: @ %bb.0: @ %entry
64 ; CHECK-NEXT: vcadd.i16 q0, q0, q1, #90
67 %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 1, i32 0, <8 x i16> %a, <8 x i16> %b)
71 define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_s32(<4 x i32> %a, <4 x i32> %b) {
72 ; CHECK-LABEL: test_vcaddq_rot90_s32:
73 ; CHECK: @ %bb.0: @ %entry
74 ; CHECK-NEXT: vcadd.i32 q2, q0, q1, #90
75 ; CHECK-NEXT: vmov q0, q2
78 %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 1, i32 0, <4 x i32> %a, <4 x i32> %b)
82 define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot90_f16(<8 x half> %a, <8 x half> %b) {
83 ; CHECK-LABEL: test_vcaddq_rot90_f16:
84 ; CHECK: @ %bb.0: @ %entry
85 ; CHECK-NEXT: vcadd.f16 q0, q0, q1, #90
88 %0 = call <8 x half> @llvm.arm.mve.vcaddq.v8f16(i32 1, i32 0, <8 x half> %a, <8 x half> %b)
92 define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot90_f32(<4 x float> %a, <4 x float> %b) {
93 ; CHECK-LABEL: test_vcaddq_rot90_f32:
94 ; CHECK: @ %bb.0: @ %entry
95 ; CHECK-NEXT: vcadd.f32 q2, q0, q1, #90
96 ; CHECK-NEXT: vmov q0, q2
99 %0 = call <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32 1, i32 0, <4 x float> %a, <4 x float> %b)
103 define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_u8(<16 x i8> %a, <16 x i8> %b) {
104 ; CHECK-LABEL: test_vcaddq_rot270_u8:
105 ; CHECK: @ %bb.0: @ %entry
106 ; CHECK-NEXT: vcadd.i8 q0, q0, q1, #270
109 %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 1, i32 1, <16 x i8> %a, <16 x i8> %b)
113 define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_u16(<8 x i16> %a, <8 x i16> %b) {
114 ; CHECK-LABEL: test_vcaddq_rot270_u16:
115 ; CHECK: @ %bb.0: @ %entry
116 ; CHECK-NEXT: vcadd.i16 q0, q0, q1, #270
119 %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 1, i32 1, <8 x i16> %a, <8 x i16> %b)
123 define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_u32(<4 x i32> %a, <4 x i32> %b) {
124 ; CHECK-LABEL: test_vcaddq_rot270_u32:
125 ; CHECK: @ %bb.0: @ %entry
126 ; CHECK-NEXT: vcadd.i32 q2, q0, q1, #270
127 ; CHECK-NEXT: vmov q0, q2
130 %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 1, i32 1, <4 x i32> %a, <4 x i32> %b)
134 define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_s8(<16 x i8> %a, <16 x i8> %b) {
135 ; CHECK-LABEL: test_vcaddq_rot270_s8:
136 ; CHECK: @ %bb.0: @ %entry
137 ; CHECK-NEXT: vcadd.i8 q0, q0, q1, #270
140 %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 1, i32 1, <16 x i8> %a, <16 x i8> %b)
144 define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_s16(<8 x i16> %a, <8 x i16> %b) {
145 ; CHECK-LABEL: test_vcaddq_rot270_s16:
146 ; CHECK: @ %bb.0: @ %entry
147 ; CHECK-NEXT: vcadd.i16 q0, q0, q1, #270
150 %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 1, i32 1, <8 x i16> %a, <8 x i16> %b)
154 define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_s32(<4 x i32> %a, <4 x i32> %b) {
155 ; CHECK-LABEL: test_vcaddq_rot270_s32:
156 ; CHECK: @ %bb.0: @ %entry
157 ; CHECK-NEXT: vcadd.i32 q2, q0, q1, #270
158 ; CHECK-NEXT: vmov q0, q2
161 %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 1, i32 1, <4 x i32> %a, <4 x i32> %b)
165 define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot270_f16(<8 x half> %a, <8 x half> %b) {
166 ; CHECK-LABEL: test_vcaddq_rot270_f16:
167 ; CHECK: @ %bb.0: @ %entry
168 ; CHECK-NEXT: vcadd.f16 q0, q0, q1, #270
171 %0 = call <8 x half> @llvm.arm.mve.vcaddq.v8f16(i32 1, i32 1, <8 x half> %a, <8 x half> %b)
175 define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot270_f32(<4 x float> %a, <4 x float> %b) {
176 ; CHECK-LABEL: test_vcaddq_rot270_f32:
177 ; CHECK: @ %bb.0: @ %entry
178 ; CHECK-NEXT: vcadd.f32 q2, q0, q1, #270
179 ; CHECK-NEXT: vmov q0, q2
182 %0 = call <4 x float> @llvm.arm.mve.vcaddq.v4f32(i32 1, i32 1, <4 x float> %a, <4 x float> %b)
186 define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_m_u8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
187 ; CHECK-LABEL: test_vcaddq_rot90_m_u8:
188 ; CHECK: @ %bb.0: @ %entry
189 ; CHECK-NEXT: vmsr p0, r0
191 ; CHECK-NEXT: vcaddt.i8 q0, q1, q2, #90
194 %0 = zext i16 %p to i32
195 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
196 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 0, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
200 define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
201 ; CHECK-LABEL: test_vcaddq_rot90_m_u16:
202 ; CHECK: @ %bb.0: @ %entry
203 ; CHECK-NEXT: vmsr p0, r0
205 ; CHECK-NEXT: vcaddt.i16 q0, q1, q2, #90
208 %0 = zext i16 %p to i32
209 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
210 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 0, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
214 define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
215 ; CHECK-LABEL: test_vcaddq_rot90_m_u32:
216 ; CHECK: @ %bb.0: @ %entry
217 ; CHECK-NEXT: vmsr p0, r0
219 ; CHECK-NEXT: vcaddt.i32 q0, q1, q2, #90
222 %0 = zext i16 %p to i32
223 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
224 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 0, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
228 define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
229 ; CHECK-LABEL: test_vcaddq_rot90_m_s8:
230 ; CHECK: @ %bb.0: @ %entry
231 ; CHECK-NEXT: vmsr p0, r0
233 ; CHECK-NEXT: vcaddt.i8 q0, q1, q2, #90
236 %0 = zext i16 %p to i32
237 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
238 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 0, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
242 define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
243 ; CHECK-LABEL: test_vcaddq_rot90_m_s16:
244 ; CHECK: @ %bb.0: @ %entry
245 ; CHECK-NEXT: vmsr p0, r0
247 ; CHECK-NEXT: vcaddt.i16 q0, q1, q2, #90
250 %0 = zext i16 %p to i32
251 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
252 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 0, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
256 define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
257 ; CHECK-LABEL: test_vcaddq_rot90_m_s32:
258 ; CHECK: @ %bb.0: @ %entry
259 ; CHECK-NEXT: vmsr p0, r0
261 ; CHECK-NEXT: vcaddt.i32 q0, q1, q2, #90
264 %0 = zext i16 %p to i32
265 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
266 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 0, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
270 define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot90_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
271 ; CHECK-LABEL: test_vcaddq_rot90_m_f16:
272 ; CHECK: @ %bb.0: @ %entry
273 ; CHECK-NEXT: vmsr p0, r0
275 ; CHECK-NEXT: vcaddt.f16 q0, q1, q2, #90
278 %0 = zext i16 %p to i32
279 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
280 %2 = call <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32 1, i32 0, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
284 define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot90_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) {
285 ; CHECK-LABEL: test_vcaddq_rot90_m_f32:
286 ; CHECK: @ %bb.0: @ %entry
287 ; CHECK-NEXT: vmsr p0, r0
289 ; CHECK-NEXT: vcaddt.f32 q0, q1, q2, #90
292 %0 = zext i16 %p to i32
293 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
294 %2 = call <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32 1, i32 0, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
298 define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_m_u8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
299 ; CHECK-LABEL: test_vcaddq_rot270_m_u8:
300 ; CHECK: @ %bb.0: @ %entry
301 ; CHECK-NEXT: vmsr p0, r0
303 ; CHECK-NEXT: vcaddt.i8 q0, q1, q2, #270
306 %0 = zext i16 %p to i32
307 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
308 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 1, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
312 define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_m_u16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
313 ; CHECK-LABEL: test_vcaddq_rot270_m_u16:
314 ; CHECK: @ %bb.0: @ %entry
315 ; CHECK-NEXT: vmsr p0, r0
317 ; CHECK-NEXT: vcaddt.i16 q0, q1, q2, #270
320 %0 = zext i16 %p to i32
321 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
322 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 1, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
326 define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_m_u32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
327 ; CHECK-LABEL: test_vcaddq_rot270_m_u32:
328 ; CHECK: @ %bb.0: @ %entry
329 ; CHECK-NEXT: vmsr p0, r0
331 ; CHECK-NEXT: vcaddt.i32 q0, q1, q2, #270
334 %0 = zext i16 %p to i32
335 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
336 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 1, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
340 define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
341 ; CHECK-LABEL: test_vcaddq_rot270_m_s8:
342 ; CHECK: @ %bb.0: @ %entry
343 ; CHECK-NEXT: vmsr p0, r0
345 ; CHECK-NEXT: vcaddt.i8 q0, q1, q2, #270
348 %0 = zext i16 %p to i32
349 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
350 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 1, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
354 define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
355 ; CHECK-LABEL: test_vcaddq_rot270_m_s16:
356 ; CHECK: @ %bb.0: @ %entry
357 ; CHECK-NEXT: vmsr p0, r0
359 ; CHECK-NEXT: vcaddt.i16 q0, q1, q2, #270
362 %0 = zext i16 %p to i32
363 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
364 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 1, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
368 define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
369 ; CHECK-LABEL: test_vcaddq_rot270_m_s32:
370 ; CHECK: @ %bb.0: @ %entry
371 ; CHECK-NEXT: vmsr p0, r0
373 ; CHECK-NEXT: vcaddt.i32 q0, q1, q2, #270
376 %0 = zext i16 %p to i32
377 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
378 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 1, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
382 define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot270_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
383 ; CHECK-LABEL: test_vcaddq_rot270_m_f16:
384 ; CHECK: @ %bb.0: @ %entry
385 ; CHECK-NEXT: vmsr p0, r0
387 ; CHECK-NEXT: vcaddt.f16 q0, q1, q2, #270
390 %0 = zext i16 %p to i32
391 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
392 %2 = call <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32 1, i32 1, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
396 define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot270_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) {
397 ; CHECK-LABEL: test_vcaddq_rot270_m_f32:
398 ; CHECK: @ %bb.0: @ %entry
399 ; CHECK-NEXT: vmsr p0, r0
401 ; CHECK-NEXT: vcaddt.f32 q0, q1, q2, #270
404 %0 = zext i16 %p to i32
405 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
406 %2 = call <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32 1, i32 1, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
410 define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
411 ; CHECK-LABEL: test_vcaddq_rot90_x_u8:
412 ; CHECK: @ %bb.0: @ %entry
413 ; CHECK-NEXT: vmsr p0, r0
415 ; CHECK-NEXT: vcaddt.i8 q0, q0, q1, #90
418 %0 = zext i16 %p to i32
419 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
420 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 0, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
424 define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_x_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
425 ; CHECK-LABEL: test_vcaddq_rot90_x_u16:
426 ; CHECK: @ %bb.0: @ %entry
427 ; CHECK-NEXT: vmsr p0, r0
429 ; CHECK-NEXT: vcaddt.i16 q0, q0, q1, #90
432 %0 = zext i16 %p to i32
433 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
434 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 0, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
438 define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_x_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
439 ; CHECK-LABEL: test_vcaddq_rot90_x_u32:
440 ; CHECK: @ %bb.0: @ %entry
441 ; CHECK-NEXT: vmsr p0, r0
443 ; CHECK-NEXT: vcaddt.i32 q2, q0, q1, #90
444 ; CHECK-NEXT: vmov q0, q2
447 %0 = zext i16 %p to i32
448 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
449 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 0, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
453 define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot90_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
454 ; CHECK-LABEL: test_vcaddq_rot90_x_s8:
455 ; CHECK: @ %bb.0: @ %entry
456 ; CHECK-NEXT: vmsr p0, r0
458 ; CHECK-NEXT: vcaddt.i8 q0, q0, q1, #90
461 %0 = zext i16 %p to i32
462 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
463 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 0, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
467 define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot90_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
468 ; CHECK-LABEL: test_vcaddq_rot90_x_s16:
469 ; CHECK: @ %bb.0: @ %entry
470 ; CHECK-NEXT: vmsr p0, r0
472 ; CHECK-NEXT: vcaddt.i16 q0, q0, q1, #90
475 %0 = zext i16 %p to i32
476 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
477 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 0, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
481 define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot90_x_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
482 ; CHECK-LABEL: test_vcaddq_rot90_x_s32:
483 ; CHECK: @ %bb.0: @ %entry
484 ; CHECK-NEXT: vmsr p0, r0
486 ; CHECK-NEXT: vcaddt.i32 q2, q0, q1, #90
487 ; CHECK-NEXT: vmov q0, q2
490 %0 = zext i16 %p to i32
491 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
492 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 0, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
496 define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot90_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) {
497 ; CHECK-LABEL: test_vcaddq_rot90_x_f16:
498 ; CHECK: @ %bb.0: @ %entry
499 ; CHECK-NEXT: vmsr p0, r0
501 ; CHECK-NEXT: vcaddt.f16 q0, q0, q1, #90
504 %0 = zext i16 %p to i32
505 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
506 %2 = call <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32 1, i32 0, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
510 define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot90_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) {
511 ; CHECK-LABEL: test_vcaddq_rot90_x_f32:
512 ; CHECK: @ %bb.0: @ %entry
513 ; CHECK-NEXT: vmsr p0, r0
515 ; CHECK-NEXT: vcaddt.f32 q2, q0, q1, #90
516 ; CHECK-NEXT: vmov q0, q2
519 %0 = zext i16 %p to i32
520 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
521 %2 = call <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32 1, i32 0, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
525 define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_x_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
526 ; CHECK-LABEL: test_vcaddq_rot270_x_u8:
527 ; CHECK: @ %bb.0: @ %entry
528 ; CHECK-NEXT: vmsr p0, r0
530 ; CHECK-NEXT: vcaddt.i8 q0, q0, q1, #270
533 %0 = zext i16 %p to i32
534 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
535 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 1, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
539 define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_x_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
540 ; CHECK-LABEL: test_vcaddq_rot270_x_u16:
541 ; CHECK: @ %bb.0: @ %entry
542 ; CHECK-NEXT: vmsr p0, r0
544 ; CHECK-NEXT: vcaddt.i16 q0, q0, q1, #270
547 %0 = zext i16 %p to i32
548 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
549 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 1, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
553 define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_x_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
554 ; CHECK-LABEL: test_vcaddq_rot270_x_u32:
555 ; CHECK: @ %bb.0: @ %entry
556 ; CHECK-NEXT: vmsr p0, r0
558 ; CHECK-NEXT: vcaddt.i32 q2, q0, q1, #270
559 ; CHECK-NEXT: vmov q0, q2
562 %0 = zext i16 %p to i32
563 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
564 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 1, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
568 define arm_aapcs_vfpcc <16 x i8> @test_vcaddq_rot270_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
569 ; CHECK-LABEL: test_vcaddq_rot270_x_s8:
570 ; CHECK: @ %bb.0: @ %entry
571 ; CHECK-NEXT: vmsr p0, r0
573 ; CHECK-NEXT: vcaddt.i8 q0, q0, q1, #270
576 %0 = zext i16 %p to i32
577 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
578 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 1, i32 1, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
582 define arm_aapcs_vfpcc <8 x i16> @test_vcaddq_rot270_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
583 ; CHECK-LABEL: test_vcaddq_rot270_x_s16:
584 ; CHECK: @ %bb.0: @ %entry
585 ; CHECK-NEXT: vmsr p0, r0
587 ; CHECK-NEXT: vcaddt.i16 q0, q0, q1, #270
590 %0 = zext i16 %p to i32
591 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
592 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 1, i32 1, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
596 define arm_aapcs_vfpcc <4 x i32> @test_vcaddq_rot270_x_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
597 ; CHECK-LABEL: test_vcaddq_rot270_x_s32:
598 ; CHECK: @ %bb.0: @ %entry
599 ; CHECK-NEXT: vmsr p0, r0
601 ; CHECK-NEXT: vcaddt.i32 q2, q0, q1, #270
602 ; CHECK-NEXT: vmov q0, q2
605 %0 = zext i16 %p to i32
606 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
607 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 1, i32 1, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
611 define arm_aapcs_vfpcc <8 x half> @test_vcaddq_rot270_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) {
612 ; CHECK-LABEL: test_vcaddq_rot270_x_f16:
613 ; CHECK: @ %bb.0: @ %entry
614 ; CHECK-NEXT: vmsr p0, r0
616 ; CHECK-NEXT: vcaddt.f16 q0, q0, q1, #270
619 %0 = zext i16 %p to i32
620 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
621 %2 = call <8 x half> @llvm.arm.mve.vcaddq.predicated.v8f16.v8i1(i32 1, i32 1, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
625 define arm_aapcs_vfpcc <4 x float> @test_vcaddq_rot270_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) {
626 ; CHECK-LABEL: test_vcaddq_rot270_x_f32:
627 ; CHECK: @ %bb.0: @ %entry
628 ; CHECK-NEXT: vmsr p0, r0
630 ; CHECK-NEXT: vcaddt.f32 q2, q0, q1, #270
631 ; CHECK-NEXT: vmov q0, q2
634 %0 = zext i16 %p to i32
635 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
636 %2 = call <4 x float> @llvm.arm.mve.vcaddq.predicated.v4f32.v4i1(i32 1, i32 1, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
640 define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot90_s8(<16 x i8> %a, <16 x i8> %b) {
641 ; CHECK-LABEL: test_vhcaddq_rot90_s8:
642 ; CHECK: @ %bb.0: @ %entry
643 ; CHECK-NEXT: vhcadd.s8 q0, q0, q1, #90
646 %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 0, i32 0, <16 x i8> %a, <16 x i8> %b)
650 define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot90_s16(<8 x i16> %a, <8 x i16> %b) {
651 ; CHECK-LABEL: test_vhcaddq_rot90_s16:
652 ; CHECK: @ %bb.0: @ %entry
653 ; CHECK-NEXT: vhcadd.s16 q0, q0, q1, #90
656 %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 0, i32 0, <8 x i16> %a, <8 x i16> %b)
660 define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot90_s32(<4 x i32> %a, <4 x i32> %b) {
661 ; CHECK-LABEL: test_vhcaddq_rot90_s32:
662 ; CHECK: @ %bb.0: @ %entry
663 ; CHECK-NEXT: vhcadd.s32 q2, q0, q1, #90
664 ; CHECK-NEXT: vmov q0, q2
667 %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 0, i32 0, <4 x i32> %a, <4 x i32> %b)
671 define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot270_s8(<16 x i8> %a, <16 x i8> %b) {
672 ; CHECK-LABEL: test_vhcaddq_rot270_s8:
673 ; CHECK: @ %bb.0: @ %entry
674 ; CHECK-NEXT: vhcadd.s8 q0, q0, q1, #270
677 %0 = call <16 x i8> @llvm.arm.mve.vcaddq.v16i8(i32 0, i32 1, <16 x i8> %a, <16 x i8> %b)
681 define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot270_s16(<8 x i16> %a, <8 x i16> %b) {
682 ; CHECK-LABEL: test_vhcaddq_rot270_s16:
683 ; CHECK: @ %bb.0: @ %entry
684 ; CHECK-NEXT: vhcadd.s16 q0, q0, q1, #270
687 %0 = call <8 x i16> @llvm.arm.mve.vcaddq.v8i16(i32 0, i32 1, <8 x i16> %a, <8 x i16> %b)
691 define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_s32(<4 x i32> %a, <4 x i32> %b) {
692 ; CHECK-LABEL: test_vhcaddq_rot270_s32:
693 ; CHECK: @ %bb.0: @ %entry
694 ; CHECK-NEXT: vhcadd.s32 q2, q0, q1, #270
695 ; CHECK-NEXT: vmov q0, q2
698 %0 = call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 0, i32 1, <4 x i32> %a, <4 x i32> %b)
702 define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_s32_undef() {
703 ; CHECK-LABEL: test_vhcaddq_rot270_s32_undef:
704 ; CHECK: @ %bb.0: @ %entry
705 ; CHECK-NEXT: vhcadd.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #270
706 ; CHECK-NOT: vhcadd.s32 q[[REG:[0-9]+]], q{{[0-9]+}}, q[[REG]], #270
709 %0 = tail call <4 x i32> @llvm.arm.mve.vcaddq.v4i32(i32 0, i32 1, <4 x i32> undef, <4 x i32> undef)
713 define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_s32_undef_inline_asm() {
714 ; CHECK-LABEL: test_vhcaddq_rot270_s32_undef_inline_asm:
715 ; CHECK: @ %bb.0: @ %entry
717 ; CHECK-NEXT: vhcadd.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}}, #270
718 ; CHECK-NOT: vhcadd.s32 q[[REG:[0-9]+]], q{{[0-9]+}}, q[[REG]], #270
719 ; CHECK-NEXT: @NO_APP
722 %0 = call <4 x i32> asm sideeffect "vhcadd.s32 ${0}, ${1}, ${2}, #270", "=&w,w,w,~{memory}"(<4 x i32> undef, <4 x i32> undef)
726 define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot90_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
727 ; CHECK-LABEL: test_vhcaddq_rot90_x_s8:
728 ; CHECK: @ %bb.0: @ %entry
729 ; CHECK-NEXT: vmsr p0, r0
731 ; CHECK-NEXT: vhcaddt.s8 q0, q0, q1, #90
734 %0 = zext i16 %p to i32
735 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
736 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 0, i32 0, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
740 define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot90_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
741 ; CHECK-LABEL: test_vhcaddq_rot90_x_s16:
742 ; CHECK: @ %bb.0: @ %entry
743 ; CHECK-NEXT: vmsr p0, r0
745 ; CHECK-NEXT: vhcaddt.s16 q0, q0, q1, #90
748 %0 = zext i16 %p to i32
749 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
750 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 0, i32 0, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
754 define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot90_x_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
755 ; CHECK-LABEL: test_vhcaddq_rot90_x_s32:
756 ; CHECK: @ %bb.0: @ %entry
757 ; CHECK-NEXT: vmsr p0, r0
759 ; CHECK-NEXT: vhcaddt.s32 q2, q0, q1, #90
760 ; CHECK-NEXT: vmov q0, q2
763 %0 = zext i16 %p to i32
764 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
765 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 0, i32 0, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
769 define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot270_x_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
770 ; CHECK-LABEL: test_vhcaddq_rot270_x_s8:
771 ; CHECK: @ %bb.0: @ %entry
772 ; CHECK-NEXT: vmsr p0, r0
774 ; CHECK-NEXT: vhcaddt.s8 q0, q0, q1, #270
777 %0 = zext i16 %p to i32
778 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
779 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 0, i32 1, <16 x i8> undef, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
783 define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot270_x_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
784 ; CHECK-LABEL: test_vhcaddq_rot270_x_s16:
785 ; CHECK: @ %bb.0: @ %entry
786 ; CHECK-NEXT: vmsr p0, r0
788 ; CHECK-NEXT: vhcaddt.s16 q0, q0, q1, #270
791 %0 = zext i16 %p to i32
792 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
793 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 0, i32 1, <8 x i16> undef, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
797 define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_x_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
798 ; CHECK-LABEL: test_vhcaddq_rot270_x_s32:
799 ; CHECK: @ %bb.0: @ %entry
800 ; CHECK-NEXT: vmsr p0, r0
802 ; CHECK-NEXT: vhcaddt.s32 q2, q0, q1, #270
803 ; CHECK-NEXT: vmov q0, q2
806 %0 = zext i16 %p to i32
807 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
808 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 0, i32 1, <4 x i32> undef, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
812 define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot90_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
813 ; CHECK-LABEL: test_vhcaddq_rot90_m_s8:
814 ; CHECK: @ %bb.0: @ %entry
815 ; CHECK-NEXT: vmsr p0, r0
817 ; CHECK-NEXT: vhcaddt.s8 q0, q1, q2, #90
820 %0 = zext i16 %p to i32
821 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
822 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 0, i32 0, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
826 define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot90_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
827 ; CHECK-LABEL: test_vhcaddq_rot90_m_s16:
828 ; CHECK: @ %bb.0: @ %entry
829 ; CHECK-NEXT: vmsr p0, r0
831 ; CHECK-NEXT: vhcaddt.s16 q0, q1, q2, #90
834 %0 = zext i16 %p to i32
835 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
836 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 0, i32 0, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
840 define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot90_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
841 ; CHECK-LABEL: test_vhcaddq_rot90_m_s32:
842 ; CHECK: @ %bb.0: @ %entry
843 ; CHECK-NEXT: vmsr p0, r0
845 ; CHECK-NEXT: vhcaddt.s32 q0, q1, q2, #90
848 %0 = zext i16 %p to i32
849 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
850 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 0, i32 0, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)
854 define arm_aapcs_vfpcc <16 x i8> @test_vhcaddq_rot270_m_s8(<16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, i16 zeroext %p) {
855 ; CHECK-LABEL: test_vhcaddq_rot270_m_s8:
856 ; CHECK: @ %bb.0: @ %entry
857 ; CHECK-NEXT: vmsr p0, r0
859 ; CHECK-NEXT: vhcaddt.s8 q0, q1, q2, #270
862 %0 = zext i16 %p to i32
863 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0)
864 %2 = call <16 x i8> @llvm.arm.mve.vcaddq.predicated.v16i8.v16i1(i32 0, i32 1, <16 x i8> %inactive, <16 x i8> %a, <16 x i8> %b, <16 x i1> %1)
868 define arm_aapcs_vfpcc <8 x i16> @test_vhcaddq_rot270_m_s16(<8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, i16 zeroext %p) {
869 ; CHECK-LABEL: test_vhcaddq_rot270_m_s16:
870 ; CHECK: @ %bb.0: @ %entry
871 ; CHECK-NEXT: vmsr p0, r0
873 ; CHECK-NEXT: vhcaddt.s16 q0, q1, q2, #270
876 %0 = zext i16 %p to i32
877 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
878 %2 = call <8 x i16> @llvm.arm.mve.vcaddq.predicated.v8i16.v8i1(i32 0, i32 1, <8 x i16> %inactive, <8 x i16> %a, <8 x i16> %b, <8 x i1> %1)
882 define arm_aapcs_vfpcc <4 x i32> @test_vhcaddq_rot270_m_s32(<4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
883 ; CHECK-LABEL: test_vhcaddq_rot270_m_s32:
884 ; CHECK: @ %bb.0: @ %entry
885 ; CHECK-NEXT: vmsr p0, r0
887 ; CHECK-NEXT: vhcaddt.s32 q0, q1, q2, #270
890 %0 = zext i16 %p to i32
891 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
892 %2 = call <4 x i32> @llvm.arm.mve.vcaddq.predicated.v4i32.v4i1(i32 0, i32 1, <4 x i32> %inactive, <4 x i32> %a, <4 x i32> %b, <4 x i1> %1)