1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -o - %s | FileCheck %s
4 declare <4 x i1> @llvm.arm.mve.vctp64(i32)
5 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
6 declare i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1>)
7 declare <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, i32, <4 x i1>, <2 x i64>)
8 declare <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>, <2 x i64>)
10 declare <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>)
11 declare { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <4 x i1>)
12 declare <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1(ptr, <2 x i64>, i32, i32, i32, <4 x i1>)
13 declare void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>)
14 declare <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64>, i32, <2 x i64>, <4 x i1>)
15 declare void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1(ptr, <2 x i64>, <2 x i64>, i32, i32, <4 x i1>)
17 declare <2 x i64> @llvm.arm.cde.vcx1q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, i32 immarg, <4 x i1>)
18 declare <2 x i64> @llvm.arm.cde.vcx1qa.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, i32 immarg, <4 x i1>)
19 declare <2 x i64> @llvm.arm.cde.vcx2q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, i32 immarg, <4 x i1>)
20 declare <2 x i64> @llvm.arm.cde.vcx2qa.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, i32 immarg, <4 x i1>)
21 declare <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>)
22 declare <2 x i64> @llvm.arm.cde.vcx3qa.predicated.v2i64.v4i1(i32 immarg, <2 x i64>, <16 x i8>, <16 x i8>, i32 immarg, <4 x i1>)
24 define arm_aapcs_vfpcc zeroext i16 @test_vctp64q(i32 %a) {
25 ; CHECK-LABEL: @test_vctp64q(
27 ; CHECK-NEXT: [[TMP0:%.*]] = call <2 x i1> @llvm.arm.mve.vctp64(i32 [[A:%.*]])
28 ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v2i1(<2 x i1> [[TMP0]])
29 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
30 ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]])
31 ; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i16
32 ; CHECK-NEXT: ret i16 [[TMP4]]
35 %0 = call <4 x i1> @llvm.arm.mve.vctp64(i32 %a)
36 %1 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %0)
37 %2 = trunc i32 %1 to i16
41 define arm_aapcs_vfpcc zeroext i16 @test_vctp64q_m(i32 %a, i16 zeroext %p) {
42 ; CHECK-LABEL: @test_vctp64q_m(
44 ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
45 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
46 ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i1> @llvm.arm.mve.vctp64(i32 [[A:%.*]])
47 ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v2i1(<2 x i1> [[TMP2]])
48 ; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
49 ; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i1> [[TMP1]], [[TMP4]]
50 ; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP5]])
51 ; CHECK-NEXT: [[TMP7:%.*]] = trunc i32 [[TMP6]] to i16
52 ; CHECK-NEXT: ret i16 [[TMP7]]
55 %0 = zext i16 %p to i32
56 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
57 %2 = call <4 x i1> @llvm.arm.mve.vctp64(i32 %a)
58 %3 = and <4 x i1> %1, %2
59 %4 = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> %3)
60 %5 = trunc i32 %4 to i16
64 define arm_aapcs_vfpcc <2 x i64> @test_vmullbq_int_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
65 ; CHECK-LABEL: @test_vmullbq_int_m_s32(
67 ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
68 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
69 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
70 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
71 ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v2i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, i32 0, <2 x i1> [[TMP3]], <2 x i64> [[INACTIVE:%.*]])
72 ; CHECK-NEXT: ret <2 x i64> [[TMP4]]
75 %0 = zext i16 %p to i32
76 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
77 %2 = tail call <2 x i64> @llvm.arm.mve.mull.int.predicated.v2i64.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, i32 0, <4 x i1> %1, <2 x i64> %inactive)
81 define arm_aapcs_vfpcc <2 x i64> @test_vqdmullbq_m_s32(<2 x i64> %inactive, <4 x i32> %a, <4 x i32> %b, i16 zeroext %p) {
82 ; CHECK-LABEL: @test_vqdmullbq_m_s32(
84 ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
85 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
86 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
87 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
88 ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v2i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <2 x i1> [[TMP3]], <2 x i64> [[INACTIVE:%.*]])
89 ; CHECK-NEXT: ret <2 x i64> [[TMP4]]
92 %0 = zext i16 %p to i32
93 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
94 %2 = call <2 x i64> @llvm.arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 0, <4 x i1> %1, <2 x i64> %inactive)
98 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_z_s64(<2 x i64> %addr, i16 zeroext %p) {
99 ; CHECK-LABEL: @test_vldrdq_gather_base_z_s64(
101 ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
102 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
103 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
104 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
105 ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v2i1(<2 x i64> [[ADDR:%.*]], i32 888, <2 x i1> [[TMP3]])
106 ; CHECK-NEXT: ret <2 x i64> [[TMP4]]
109 %0 = zext i16 %p to i32
110 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
111 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <4 x i1> %1)
115 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_base_wb_z_s64(ptr %addr, i16 zeroext %p) {
116 ; CHECK-LABEL: @test_vldrdq_gather_base_wb_z_s64(
118 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[ADDR:%.*]], align 8
119 ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
120 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
121 ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]])
122 ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]])
123 ; CHECK-NEXT: [[TMP5:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> [[TMP0]], i32 664, <2 x i1> [[TMP4]])
124 ; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP5]], 1
125 ; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr [[ADDR]], align 8
126 ; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP5]], 0
127 ; CHECK-NEXT: ret <2 x i64> [[TMP7]]
130 %0 = load <2 x i64>, ptr %addr, align 8
131 %1 = zext i16 %p to i32
132 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
133 %3 = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 664, <4 x i1> %2)
134 %4 = extractvalue { <2 x i64>, <2 x i64> } %3, 1
135 store <2 x i64> %4, ptr %addr, align 8
136 %5 = extractvalue { <2 x i64>, <2 x i64> } %3, 0
140 define arm_aapcs_vfpcc <2 x i64> @test_vldrdq_gather_offset_z_s64(ptr %base, <2 x i64> %offset, i16 zeroext %p) {
141 ; CHECK-LABEL: @test_vldrdq_gather_offset_z_s64(
143 ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
144 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
145 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
146 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
147 ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v2i1(ptr [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], i32 64, i32 0, i32 0, <2 x i1> [[TMP3]])
148 ; CHECK-NEXT: ret <2 x i64> [[TMP4]]
151 %0 = zext i16 %p to i32
152 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
153 %2 = call <2 x i64> @llvm.arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1(ptr %base, <2 x i64> %offset, i32 64, i32 0, i32 0, <4 x i1> %1)
157 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_p_s64(<2 x i64> %addr, <2 x i64> %value, i16 zeroext %p) {
158 ; CHECK-LABEL: @test_vstrdq_scatter_base_p_s64(
160 ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
161 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
162 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
163 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
164 ; CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v2i1(<2 x i64> [[ADDR:%.*]], i32 888, <2 x i64> [[VALUE:%.*]], <2 x i1> [[TMP3]])
165 ; CHECK-NEXT: ret void
168 %0 = zext i16 %p to i32
169 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
170 call void @llvm.arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1(<2 x i64> %addr, i32 888, <2 x i64> %value, <4 x i1> %1)
174 define arm_aapcs_vfpcc void @test_vstrdq_scatter_base_wb_p_s64(ptr %addr, <2 x i64> %value, i16 zeroext %p) {
175 ; CHECK-LABEL: @test_vstrdq_scatter_base_wb_p_s64(
177 ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[ADDR:%.*]], align 8
178 ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
179 ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
180 ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]])
181 ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]])
182 ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v2i1(<2 x i64> [[TMP0]], i32 248, <2 x i64> [[VALUE:%.*]], <2 x i1> [[TMP4]])
183 ; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[ADDR]], align 8
184 ; CHECK-NEXT: ret void
187 %0 = load <2 x i64>, ptr %addr, align 8
188 %1 = zext i16 %p to i32
189 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
190 %3 = call <2 x i64> @llvm.arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1(<2 x i64> %0, i32 248, <2 x i64> %value, <4 x i1> %2)
191 store <2 x i64> %3, ptr %addr, align 8
195 define arm_aapcs_vfpcc void @test_vstrdq_scatter_offset_p_s64(ptr %base, <2 x i64> %offset, <2 x i64> %value, i16 zeroext %p) {
196 ; CHECK-LABEL: @test_vstrdq_scatter_offset_p_s64(
198 ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
199 ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
200 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
201 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
202 ; CHECK-NEXT: call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v2i1(ptr [[BASE:%.*]], <2 x i64> [[OFFSET:%.*]], <2 x i64> [[VALUE:%.*]], i32 64, i32 0, <2 x i1> [[TMP3]])
203 ; CHECK-NEXT: ret void
206 %0 = zext i16 %p to i32
207 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
208 call void @llvm.arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1(ptr %base, <2 x i64> %offset, <2 x i64> %value, i32 64, i32 0, <4 x i1> %1)
212 define <8 x i16> @test_vcx1q_m(<2 x i64> %inactive, i16 zeroext %p) {
213 ; CHECK-LABEL: @test_vcx1q_m(
215 ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
216 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
217 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
218 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
219 ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.cde.vcx1q.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], i32 1111, <2 x i1> [[TMP3]])
220 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <8 x i16>
221 ; CHECK-NEXT: ret <8 x i16> [[TMP5]]
224 %0 = zext i16 %p to i32
225 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
226 %2 = tail call <2 x i64> @llvm.arm.cde.vcx1q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, i32 1111, <4 x i1> %1)
227 %3 = bitcast <2 x i64> %2 to <8 x i16>
231 define <16 x i8> @test_vcx1qa_m(<2 x i64> %acc, i16 zeroext %p) {
232 ; CHECK-LABEL: @test_vcx1qa_m(
234 ; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
235 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
236 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP1]])
237 ; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP2]])
238 ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.arm.cde.vcx1qa.predicated.v2i64.v2i1(i32 0, <2 x i64> [[ACC:%.*]], i32 1112, <2 x i1> [[TMP3]])
239 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <16 x i8>
240 ; CHECK-NEXT: ret <16 x i8> [[TMP5]]
243 %0 = zext i16 %p to i32
244 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
245 %2 = tail call <2 x i64> @llvm.arm.cde.vcx1qa.predicated.v2i64.v4i1(i32 0, <2 x i64> %acc, i32 1112, <4 x i1> %1)
246 %3 = bitcast <2 x i64> %2 to <16 x i8>
250 define <4 x i32> @test_vcx2q_m(<2 x i64> %inactive, <4 x float> %n, i16 zeroext %p) {
251 ; CHECK-LABEL: @test_vcx2q_m(
253 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[N:%.*]] to <16 x i8>
254 ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
255 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
256 ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]])
257 ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]])
258 ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.arm.cde.vcx2q.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], <16 x i8> [[TMP0]], i32 111, <2 x i1> [[TMP4]])
259 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <4 x i32>
260 ; CHECK-NEXT: ret <4 x i32> [[TMP6]]
263 %0 = bitcast <4 x float> %n to <16 x i8>
264 %1 = zext i16 %p to i32
265 %2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
266 %3 = tail call <2 x i64> @llvm.arm.cde.vcx2q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, i32 111, <4 x i1> %2)
267 %4 = bitcast <2 x i64> %3 to <4 x i32>
271 define <4 x float> @test_vcx2qa_m(<2 x i64> %acc, <8 x half> %n, i16 zeroext %p) {
272 ; CHECK-LABEL: @test_vcx2qa_m(
274 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[N:%.*]] to <16 x i8>
275 ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
276 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
277 ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]])
278 ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]])
279 ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.arm.cde.vcx2qa.predicated.v2i64.v2i1(i32 0, <2 x i64> [[ACC:%.*]], <16 x i8> [[TMP0]], i32 112, <2 x i1> [[TMP4]])
280 ; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <4 x float>
281 ; CHECK-NEXT: ret <4 x float> [[TMP6]]
284 %0 = bitcast <8 x half> %n to <16 x i8>
285 %1 = zext i16 %p to i32
286 %2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
287 %3 = tail call <2 x i64> @llvm.arm.cde.vcx2qa.predicated.v2i64.v4i1(i32 0, <2 x i64> %acc, <16 x i8> %0, i32 112, <4 x i1> %2)
288 %4 = bitcast <2 x i64> %3 to <4 x float>
292 define <2 x i64> @test_vcx3q_m(<2 x i64> %inactive, <4 x float> %n, <16 x i8> %m, i16 zeroext %p) {
293 ; CHECK-LABEL: @test_vcx3q_m(
295 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[N:%.*]] to <16 x i8>
296 ; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
297 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
298 ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP2]])
299 ; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP3]])
300 ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], <16 x i8> [[TMP0]], <16 x i8> [[M:%.*]], i32 11, <2 x i1> [[TMP4]])
301 ; CHECK-NEXT: ret <2 x i64> [[TMP5]]
304 %0 = bitcast <4 x float> %n to <16 x i8>
305 %1 = zext i16 %p to i32
306 %2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
307 %3 = tail call <2 x i64> @llvm.arm.cde.vcx3q.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, <16 x i8> %m, i32 11, <4 x i1> %2)
311 define <8 x half> @test_vcx3qa_m(<2 x i64> %inactive, <8 x half> %n, <4 x i32> %m, i16 zeroext %p) {
312 ; CHECK-LABEL: @test_vcx3qa_m(
314 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[N:%.*]] to <16 x i8>
315 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[M:%.*]] to <16 x i8>
316 ; CHECK-NEXT: [[TMP2:%.*]] = zext i16 [[P:%.*]] to i32
317 ; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP2]])
318 ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.arm.mve.pred.v2i.v4i1(<4 x i1> [[TMP3]])
319 ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i1> @llvm.arm.mve.pred.i2v.v2i1(i32 [[TMP4]])
320 ; CHECK-NEXT: [[TMP6:%.*]] = call <2 x i64> @llvm.arm.cde.vcx3qa.predicated.v2i64.v2i1(i32 0, <2 x i64> [[INACTIVE:%.*]], <16 x i8> [[TMP0]], <16 x i8> [[TMP1]], i32 12, <2 x i1> [[TMP5]])
321 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP6]] to <8 x half>
322 ; CHECK-NEXT: ret <8 x half> [[TMP7]]
325 %0 = bitcast <8 x half> %n to <16 x i8>
326 %1 = bitcast <4 x i32> %m to <16 x i8>
327 %2 = zext i16 %p to i32
328 %3 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %2)
329 %4 = tail call <2 x i64> @llvm.arm.cde.vcx3qa.predicated.v2i64.v4i1(i32 0, <2 x i64> %inactive, <16 x i8> %0, <16 x i8> %1, i32 12, <4 x i1> %3)
330 %5 = bitcast <2 x i64> %4 to <8 x half>