1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
4 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
5 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
7 declare <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32, <8 x half>, <8 x half>)
8 declare <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32, <4 x float>, <4 x float>)
10 declare <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32, <8 x half>, <8 x half>, <8 x half>, <8 x i1>)
11 declare <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32, <4 x float>, <4 x float>, <4 x float>, <4 x i1>)
13 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_f16(<8 x half> %a, <8 x half> %b) {
14 ; CHECK-LABEL: test_vcmulq_f16:
15 ; CHECK: @ %bb.0: @ %entry
16 ; CHECK-NEXT: vcmul.f16 q0, q0, q1, #0
19 %0 = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 0, <8 x half> %a, <8 x half> %b)
23 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_f32(<4 x float> %a, <4 x float> %b) {
24 ; CHECK-LABEL: test_vcmulq_f32:
25 ; CHECK: @ %bb.0: @ %entry
26 ; CHECK-NEXT: vcmul.f32 q2, q0, q1, #0
27 ; CHECK-NEXT: vmov q0, q2
30 %0 = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 0, <4 x float> %a, <4 x float> %b)
34 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot90_f16(<8 x half> %a, <8 x half> %b) {
35 ; CHECK-LABEL: test_vcmulq_rot90_f16:
36 ; CHECK: @ %bb.0: @ %entry
37 ; CHECK-NEXT: vcmul.f16 q0, q0, q1, #90
40 %0 = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 1, <8 x half> %a, <8 x half> %b)
44 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot90_f32(<4 x float> %a, <4 x float> %b) {
45 ; CHECK-LABEL: test_vcmulq_rot90_f32:
46 ; CHECK: @ %bb.0: @ %entry
47 ; CHECK-NEXT: vcmul.f32 q2, q0, q1, #90
48 ; CHECK-NEXT: vmov q0, q2
51 %0 = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 1, <4 x float> %a, <4 x float> %b)
55 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot180_f16(<8 x half> %a, <8 x half> %b) {
56 ; CHECK-LABEL: test_vcmulq_rot180_f16:
57 ; CHECK: @ %bb.0: @ %entry
58 ; CHECK-NEXT: vcmul.f16 q0, q0, q1, #180
61 %0 = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 2, <8 x half> %a, <8 x half> %b)
65 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot180_f32(<4 x float> %a, <4 x float> %b) {
66 ; CHECK-LABEL: test_vcmulq_rot180_f32:
67 ; CHECK: @ %bb.0: @ %entry
68 ; CHECK-NEXT: vcmul.f32 q2, q0, q1, #180
69 ; CHECK-NEXT: vmov q0, q2
72 %0 = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 2, <4 x float> %a, <4 x float> %b)
76 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot270_f16(<8 x half> %a, <8 x half> %b) {
77 ; CHECK-LABEL: test_vcmulq_rot270_f16:
78 ; CHECK: @ %bb.0: @ %entry
79 ; CHECK-NEXT: vcmul.f16 q0, q0, q1, #270
82 %0 = call <8 x half> @llvm.arm.mve.vcmulq.v8f16(i32 3, <8 x half> %a, <8 x half> %b)
86 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot270_f32(<4 x float> %a, <4 x float> %b) {
87 ; CHECK-LABEL: test_vcmulq_rot270_f32:
88 ; CHECK: @ %bb.0: @ %entry
89 ; CHECK-NEXT: vcmul.f32 q2, q0, q1, #270
90 ; CHECK-NEXT: vmov q0, q2
93 %0 = call <4 x float> @llvm.arm.mve.vcmulq.v4f32(i32 3, <4 x float> %a, <4 x float> %b)
97 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
98 ; CHECK-LABEL: test_vcmulq_m_f16:
99 ; CHECK: @ %bb.0: @ %entry
100 ; CHECK-NEXT: vmsr p0, r0
102 ; CHECK-NEXT: vcmult.f16 q0, q1, q2, #0
105 %0 = zext i16 %p to i32
106 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
107 %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 0, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
111 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) {
112 ; CHECK-LABEL: test_vcmulq_m_f32:
113 ; CHECK: @ %bb.0: @ %entry
114 ; CHECK-NEXT: vmsr p0, r0
116 ; CHECK-NEXT: vcmult.f32 q0, q1, q2, #0
119 %0 = zext i16 %p to i32
120 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
121 %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 0, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
125 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot90_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
126 ; CHECK-LABEL: test_vcmulq_rot90_m_f16:
127 ; CHECK: @ %bb.0: @ %entry
128 ; CHECK-NEXT: vmsr p0, r0
130 ; CHECK-NEXT: vcmult.f16 q0, q1, q2, #90
133 %0 = zext i16 %p to i32
134 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
135 %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 1, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
139 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot90_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) {
140 ; CHECK-LABEL: test_vcmulq_rot90_m_f32:
141 ; CHECK: @ %bb.0: @ %entry
142 ; CHECK-NEXT: vmsr p0, r0
144 ; CHECK-NEXT: vcmult.f32 q0, q1, q2, #90
147 %0 = zext i16 %p to i32
148 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
149 %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 1, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
153 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot180_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
154 ; CHECK-LABEL: test_vcmulq_rot180_m_f16:
155 ; CHECK: @ %bb.0: @ %entry
156 ; CHECK-NEXT: vmsr p0, r0
158 ; CHECK-NEXT: vcmult.f16 q0, q1, q2, #180
161 %0 = zext i16 %p to i32
162 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
163 %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 2, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
167 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot180_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) {
168 ; CHECK-LABEL: test_vcmulq_rot180_m_f32:
169 ; CHECK: @ %bb.0: @ %entry
170 ; CHECK-NEXT: vmsr p0, r0
172 ; CHECK-NEXT: vcmult.f32 q0, q1, q2, #180
175 %0 = zext i16 %p to i32
176 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
177 %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 2, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
181 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot270_m_f16(<8 x half> %inactive, <8 x half> %a, <8 x half> %b, i16 zeroext %p) {
182 ; CHECK-LABEL: test_vcmulq_rot270_m_f16:
183 ; CHECK: @ %bb.0: @ %entry
184 ; CHECK-NEXT: vmsr p0, r0
186 ; CHECK-NEXT: vcmult.f16 q0, q1, q2, #270
189 %0 = zext i16 %p to i32
190 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
191 %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 3, <8 x half> %inactive, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
195 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot270_m_f32(<4 x float> %inactive, <4 x float> %a, <4 x float> %b, i16 zeroext %p) {
196 ; CHECK-LABEL: test_vcmulq_rot270_m_f32:
197 ; CHECK: @ %bb.0: @ %entry
198 ; CHECK-NEXT: vmsr p0, r0
200 ; CHECK-NEXT: vcmult.f32 q0, q1, q2, #270
203 %0 = zext i16 %p to i32
204 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
205 %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 3, <4 x float> %inactive, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
209 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) {
210 ; CHECK-LABEL: test_vcmulq_x_f16:
211 ; CHECK: @ %bb.0: @ %entry
212 ; CHECK-NEXT: vmsr p0, r0
214 ; CHECK-NEXT: vcmult.f16 q0, q0, q1, #0
217 %0 = zext i16 %p to i32
218 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
219 %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 0, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
223 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) {
224 ; CHECK-LABEL: test_vcmulq_x_f32:
225 ; CHECK: @ %bb.0: @ %entry
226 ; CHECK-NEXT: vmsr p0, r0
228 ; CHECK-NEXT: vcmult.f32 q2, q0, q1, #0
229 ; CHECK-NEXT: vmov q0, q2
232 %0 = zext i16 %p to i32
233 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
234 %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 0, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
238 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot90_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) {
239 ; CHECK-LABEL: test_vcmulq_rot90_x_f16:
240 ; CHECK: @ %bb.0: @ %entry
241 ; CHECK-NEXT: vmsr p0, r0
243 ; CHECK-NEXT: vcmult.f16 q0, q0, q1, #90
246 %0 = zext i16 %p to i32
247 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
248 %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 1, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
252 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot90_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) {
253 ; CHECK-LABEL: test_vcmulq_rot90_x_f32:
254 ; CHECK: @ %bb.0: @ %entry
255 ; CHECK-NEXT: vmsr p0, r0
257 ; CHECK-NEXT: vcmult.f32 q2, q0, q1, #90
258 ; CHECK-NEXT: vmov q0, q2
261 %0 = zext i16 %p to i32
262 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
263 %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 1, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
267 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot180_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) {
268 ; CHECK-LABEL: test_vcmulq_rot180_x_f16:
269 ; CHECK: @ %bb.0: @ %entry
270 ; CHECK-NEXT: vmsr p0, r0
272 ; CHECK-NEXT: vcmult.f16 q0, q0, q1, #180
275 %0 = zext i16 %p to i32
276 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
277 %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 2, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
281 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot180_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) {
282 ; CHECK-LABEL: test_vcmulq_rot180_x_f32:
283 ; CHECK: @ %bb.0: @ %entry
284 ; CHECK-NEXT: vmsr p0, r0
286 ; CHECK-NEXT: vcmult.f32 q2, q0, q1, #180
287 ; CHECK-NEXT: vmov q0, q2
290 %0 = zext i16 %p to i32
291 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
292 %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 2, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1)
296 define arm_aapcs_vfpcc <8 x half> @test_vcmulq_rot270_x_f16(<8 x half> %a, <8 x half> %b, i16 zeroext %p) {
297 ; CHECK-LABEL: test_vcmulq_rot270_x_f16:
298 ; CHECK: @ %bb.0: @ %entry
299 ; CHECK-NEXT: vmsr p0, r0
301 ; CHECK-NEXT: vcmult.f16 q0, q0, q1, #270
304 %0 = zext i16 %p to i32
305 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
306 %2 = call <8 x half> @llvm.arm.mve.vcmulq.predicated.v8f16.v8i1(i32 3, <8 x half> undef, <8 x half> %a, <8 x half> %b, <8 x i1> %1)
310 define arm_aapcs_vfpcc <4 x float> @test_vcmulq_rot270_x_f32(<4 x float> %a, <4 x float> %b, i16 zeroext %p) {
311 ; CHECK-LABEL: test_vcmulq_rot270_x_f32:
312 ; CHECK: @ %bb.0: @ %entry
313 ; CHECK-NEXT: vmsr p0, r0
315 ; CHECK-NEXT: vcmult.f32 q2, q0, q1, #270
316 ; CHECK-NEXT: vmov q0, q2
319 %0 = zext i16 %p to i32
320 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
321 %2 = call <4 x float> @llvm.arm.mve.vcmulq.predicated.v4f32.v4i1(i32 3, <4 x float> undef, <4 x float> %a, <4 x float> %b, <4 x i1> %1)