1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
4 define arm_aapcs_vfpcc <8 x half> @test_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
5 ; CHECK-LABEL: test_vfmaq_f16:
6 ; CHECK: @ %bb.0: @ %entry
7 ; CHECK-NEXT: vfma.f16 q0, q1, q2
10 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
14 define arm_aapcs_vfpcc <4 x float> @test_vfmaq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
15 ; CHECK-LABEL: test_vfmaq_f32:
16 ; CHECK: @ %bb.0: @ %entry
17 ; CHECK-NEXT: vfma.f32 q0, q1, q2
20 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %c, <4 x float> %a)
24 define arm_aapcs_vfpcc <8 x half> @test_vfmaq_n_f16(<8 x half> %a, <8 x half> %b, float %c.coerce) {
25 ; CHECK-LABEL: test_vfmaq_n_f16:
26 ; CHECK: @ %bb.0: @ %entry
27 ; CHECK-NEXT: vmov r0, s8
28 ; CHECK-NEXT: vfma.f16 q0, q1, r0
31 %0 = bitcast float %c.coerce to i32
32 %tmp.0.extract.trunc = trunc i32 %0 to i16
33 %1 = bitcast i16 %tmp.0.extract.trunc to half
34 %.splatinsert = insertelement <8 x half> undef, half %1, i32 0
35 %.splat = shufflevector <8 x half> %.splatinsert, <8 x half> undef, <8 x i32> zeroinitializer
36 %2 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %.splat, <8 x half> %a)
40 define arm_aapcs_vfpcc <4 x float> @test_vfmaq_n_f32(<4 x float> %a, <4 x float> %b, float %c) {
41 ; CHECK-LABEL: test_vfmaq_n_f32:
42 ; CHECK: @ %bb.0: @ %entry
43 ; CHECK-NEXT: vmov r0, s8
44 ; CHECK-NEXT: vfma.f32 q0, q1, r0
47 %.splatinsert = insertelement <4 x float> undef, float %c, i32 0
48 %.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
49 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %.splat, <4 x float> %a)
53 define arm_aapcs_vfpcc <8 x half> @test_vfmasq_n_f16(<8 x half> %a, <8 x half> %b, float %c.coerce) {
54 ; CHECK-LABEL: test_vfmasq_n_f16:
55 ; CHECK: @ %bb.0: @ %entry
56 ; CHECK-NEXT: vmov r0, s8
57 ; CHECK-NEXT: vfmas.f16 q0, q1, r0
60 %0 = bitcast float %c.coerce to i32
61 %tmp.0.extract.trunc = trunc i32 %0 to i16
62 %1 = bitcast i16 %tmp.0.extract.trunc to half
63 %.splatinsert = insertelement <8 x half> undef, half %1, i32 0
64 %.splat = shufflevector <8 x half> %.splatinsert, <8 x half> undef, <8 x i32> zeroinitializer
65 %2 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %.splat)
69 define arm_aapcs_vfpcc <4 x float> @test_vfmasq_n_f32(<4 x float> %a, <4 x float> %b, float %c) {
70 ; CHECK-LABEL: test_vfmasq_n_f32:
71 ; CHECK: @ %bb.0: @ %entry
72 ; CHECK-NEXT: vmov r0, s8
73 ; CHECK-NEXT: vfmas.f32 q0, q1, r0
76 %.splatinsert = insertelement <4 x float> undef, float %c, i32 0
77 %.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
78 %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %.splat)
82 define arm_aapcs_vfpcc <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
83 ; CHECK-LABEL: test_vfmsq_f16:
84 ; CHECK: @ %bb.0: @ %entry
85 ; CHECK-NEXT: vfms.f16 q0, q2, q1
88 %0 = fneg <8 x half> %c
89 %1 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %0, <8 x half> %a)
93 define arm_aapcs_vfpcc <4 x float> @test_vfmsq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
94 ; CHECK-LABEL: test_vfmsq_f32:
95 ; CHECK: @ %bb.0: @ %entry
96 ; CHECK-NEXT: vfms.f32 q0, q2, q1
99 %0 = fneg <4 x float> %c
100 %1 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %0, <4 x float> %a)
104 define arm_aapcs_vfpcc <16 x i8> @test_vmlaq_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c) {
105 ; CHECK-LABEL: test_vmlaq_n_s8:
106 ; CHECK: @ %bb.0: @ %entry
107 ; CHECK-NEXT: vmla.i8 q0, q1, r0
110 %.splatinsert = insertelement <16 x i8> undef, i8 %c, i32 0
111 %.splat = shufflevector <16 x i8> %.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
112 %0 = mul <16 x i8> %.splat, %b
113 %1 = add <16 x i8> %0, %a
117 define arm_aapcs_vfpcc <8 x i16> @test_vmlaq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) {
118 ; CHECK-LABEL: test_vmlaq_n_s16:
119 ; CHECK: @ %bb.0: @ %entry
120 ; CHECK-NEXT: vmla.i16 q0, q1, r0
123 %.splatinsert = insertelement <8 x i16> undef, i16 %c, i32 0
124 %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
125 %0 = mul <8 x i16> %.splat, %b
126 %1 = add <8 x i16> %0, %a
130 define arm_aapcs_vfpcc <4 x i32> @test_vmlaq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) {
131 ; CHECK-LABEL: test_vmlaq_n_s32:
132 ; CHECK: @ %bb.0: @ %entry
133 ; CHECK-NEXT: vmla.i32 q0, q1, r0
136 %.splatinsert = insertelement <4 x i32> undef, i32 %c, i32 0
137 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
138 %0 = mul <4 x i32> %.splat, %b
139 %1 = add <4 x i32> %0, %a
143 define arm_aapcs_vfpcc <16 x i8> @test_vmlaq_n_u8(<16 x i8> %a, <16 x i8> %b, i8 zeroext %c) {
144 ; CHECK-LABEL: test_vmlaq_n_u8:
145 ; CHECK: @ %bb.0: @ %entry
146 ; CHECK-NEXT: vmla.i8 q0, q1, r0
149 %.splatinsert = insertelement <16 x i8> undef, i8 %c, i32 0
150 %.splat = shufflevector <16 x i8> %.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
151 %0 = mul <16 x i8> %.splat, %b
152 %1 = add <16 x i8> %0, %a
156 define arm_aapcs_vfpcc <8 x i16> @test_vmlaq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c) {
157 ; CHECK-LABEL: test_vmlaq_n_u16:
158 ; CHECK: @ %bb.0: @ %entry
159 ; CHECK-NEXT: vmla.i16 q0, q1, r0
162 %.splatinsert = insertelement <8 x i16> undef, i16 %c, i32 0
163 %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
164 %0 = mul <8 x i16> %.splat, %b
165 %1 = add <8 x i16> %0, %a
169 define arm_aapcs_vfpcc <4 x i32> @test_vmlaq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) {
170 ; CHECK-LABEL: test_vmlaq_n_u32:
171 ; CHECK: @ %bb.0: @ %entry
172 ; CHECK-NEXT: vmla.i32 q0, q1, r0
175 %.splatinsert = insertelement <4 x i32> undef, i32 %c, i32 0
176 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
177 %0 = mul <4 x i32> %.splat, %b
178 %1 = add <4 x i32> %0, %a
182 define arm_aapcs_vfpcc <16 x i8> @test_vmlasq_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c) {
183 ; CHECK-LABEL: test_vmlasq_n_s8:
184 ; CHECK: @ %bb.0: @ %entry
185 ; CHECK-NEXT: vmlas.i8 q1, q0, r0
186 ; CHECK-NEXT: vmov q0, q1
189 %0 = mul <16 x i8> %b, %a
190 %.splatinsert = insertelement <16 x i8> undef, i8 %c, i32 0
191 %.splat = shufflevector <16 x i8> %.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
192 %1 = add <16 x i8> %.splat, %0
196 define arm_aapcs_vfpcc <8 x i16> @test_vmlasq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) {
197 ; CHECK-LABEL: test_vmlasq_n_s16:
198 ; CHECK: @ %bb.0: @ %entry
199 ; CHECK-NEXT: vmlas.i16 q1, q0, r0
200 ; CHECK-NEXT: vmov q0, q1
203 %0 = mul <8 x i16> %b, %a
204 %.splatinsert = insertelement <8 x i16> undef, i16 %c, i32 0
205 %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
206 %1 = add <8 x i16> %.splat, %0
210 define arm_aapcs_vfpcc <4 x i32> @test_vmlasq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) {
211 ; CHECK-LABEL: test_vmlasq_n_s32:
212 ; CHECK: @ %bb.0: @ %entry
213 ; CHECK-NEXT: vmlas.i32 q1, q0, r0
214 ; CHECK-NEXT: vmov q0, q1
217 %0 = mul <4 x i32> %b, %a
218 %.splatinsert = insertelement <4 x i32> undef, i32 %c, i32 0
219 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
220 %1 = add <4 x i32> %.splat, %0
224 define arm_aapcs_vfpcc <16 x i8> @test_vmlasq_n_u8(<16 x i8> %a, <16 x i8> %b, i8 zeroext %c) {
225 ; CHECK-LABEL: test_vmlasq_n_u8:
226 ; CHECK: @ %bb.0: @ %entry
227 ; CHECK-NEXT: vmlas.i8 q1, q0, r0
228 ; CHECK-NEXT: vmov q0, q1
231 %0 = mul <16 x i8> %b, %a
232 %.splatinsert = insertelement <16 x i8> undef, i8 %c, i32 0
233 %.splat = shufflevector <16 x i8> %.splatinsert, <16 x i8> undef, <16 x i32> zeroinitializer
234 %1 = add <16 x i8> %.splat, %0
238 define arm_aapcs_vfpcc <8 x i16> @test_vmlasq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c) {
239 ; CHECK-LABEL: test_vmlasq_n_u16:
240 ; CHECK: @ %bb.0: @ %entry
241 ; CHECK-NEXT: vmlas.i16 q1, q0, r0
242 ; CHECK-NEXT: vmov q0, q1
245 %0 = mul <8 x i16> %b, %a
246 %.splatinsert = insertelement <8 x i16> undef, i16 %c, i32 0
247 %.splat = shufflevector <8 x i16> %.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
248 %1 = add <8 x i16> %.splat, %0
252 define arm_aapcs_vfpcc <4 x i32> @test_vmlasq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) {
253 ; CHECK-LABEL: test_vmlasq_n_u32:
254 ; CHECK: @ %bb.0: @ %entry
255 ; CHECK-NEXT: vmlas.i32 q1, q0, r0
256 ; CHECK-NEXT: vmov q0, q1
259 %0 = mul <4 x i32> %b, %a
260 %.splatinsert = insertelement <4 x i32> undef, i32 %c, i32 0
261 %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
262 %1 = add <4 x i32> %.splat, %0
266 define arm_aapcs_vfpcc <16 x i8> @test_vqdmlahq_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c) {
267 ; CHECK-LABEL: test_vqdmlahq_n_s8:
268 ; CHECK: @ %bb.0: @ %entry
269 ; CHECK-NEXT: vqdmlah.s8 q0, q1, r0
272 %0 = zext i8 %c to i32
273 %1 = tail call <16 x i8> @llvm.arm.mve.vqdmlah.v16i8(<16 x i8> %a, <16 x i8> %b, i32 %0)
277 define arm_aapcs_vfpcc <8 x i16> @test_vqdmlahq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) {
278 ; CHECK-LABEL: test_vqdmlahq_n_s16:
279 ; CHECK: @ %bb.0: @ %entry
280 ; CHECK-NEXT: vqdmlah.s16 q0, q1, r0
283 %0 = zext i16 %c to i32
284 %1 = tail call <8 x i16> @llvm.arm.mve.vqdmlah.v8i16(<8 x i16> %a, <8 x i16> %b, i32 %0)
288 define arm_aapcs_vfpcc <4 x i32> @test_vqdmlahq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) {
289 ; CHECK-LABEL: test_vqdmlahq_n_s32:
290 ; CHECK: @ %bb.0: @ %entry
291 ; CHECK-NEXT: vqdmlah.s32 q0, q1, r0
294 %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlah.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %c)
298 define arm_aapcs_vfpcc <16 x i8> @test_vqdmlashq_n_s8(<16 x i8> %m1, <16 x i8> %m2, i8 signext %add) {
299 ; CHECK-LABEL: test_vqdmlashq_n_s8:
300 ; CHECK: @ %bb.0: @ %entry
301 ; CHECK-NEXT: vqdmlash.s8 q0, q1, r0
304 %0 = zext i8 %add to i32
305 %1 = tail call <16 x i8> @llvm.arm.mve.vqdmlash.v16i8(<16 x i8> %m1, <16 x i8> %m2, i32 %0)
309 define arm_aapcs_vfpcc <8 x i16> @test_vqdmlashq_n_s16(<8 x i16> %m1, <8 x i16> %m2, i16 signext %add) {
310 ; CHECK-LABEL: test_vqdmlashq_n_s16:
311 ; CHECK: @ %bb.0: @ %entry
312 ; CHECK-NEXT: vqdmlash.s16 q0, q1, r0
315 %0 = zext i16 %add to i32
316 %1 = tail call <8 x i16> @llvm.arm.mve.vqdmlash.v8i16(<8 x i16> %m1, <8 x i16> %m2, i32 %0)
320 define arm_aapcs_vfpcc <4 x i32> @test_vqdmlashq_n_s32(<4 x i32> %m1, <4 x i32> %m2, i32 %add) {
321 ; CHECK-LABEL: test_vqdmlashq_n_s32:
322 ; CHECK: @ %bb.0: @ %entry
323 ; CHECK-NEXT: vqdmlash.s32 q0, q1, r0
326 %0 = tail call <4 x i32> @llvm.arm.mve.vqdmlash.v4i32(<4 x i32> %m1, <4 x i32> %m2, i32 %add)
330 define arm_aapcs_vfpcc <16 x i8> @test_vqrdmlahq_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c) {
331 ; CHECK-LABEL: test_vqrdmlahq_n_s8:
332 ; CHECK: @ %bb.0: @ %entry
333 ; CHECK-NEXT: vqrdmlah.s8 q0, q1, r0
336 %0 = zext i8 %c to i32
337 %1 = tail call <16 x i8> @llvm.arm.mve.vqrdmlah.v16i8(<16 x i8> %a, <16 x i8> %b, i32 %0)
341 define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlahq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) {
342 ; CHECK-LABEL: test_vqrdmlahq_n_s16:
343 ; CHECK: @ %bb.0: @ %entry
344 ; CHECK-NEXT: vqrdmlah.s16 q0, q1, r0
347 %0 = zext i16 %c to i32
348 %1 = tail call <8 x i16> @llvm.arm.mve.vqrdmlah.v8i16(<8 x i16> %a, <8 x i16> %b, i32 %0)
352 define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlahq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) {
353 ; CHECK-LABEL: test_vqrdmlahq_n_s32:
354 ; CHECK: @ %bb.0: @ %entry
355 ; CHECK-NEXT: vqrdmlah.s32 q0, q1, r0
358 %0 = tail call <4 x i32> @llvm.arm.mve.vqrdmlah.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %c)
362 define arm_aapcs_vfpcc <16 x i8> @test_vqrdmlashq_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c) {
363 ; CHECK-LABEL: test_vqrdmlashq_n_s8:
364 ; CHECK: @ %bb.0: @ %entry
365 ; CHECK-NEXT: vqrdmlash.s8 q0, q1, r0
368 %0 = zext i8 %c to i32
369 %1 = tail call <16 x i8> @llvm.arm.mve.vqrdmlash.v16i8(<16 x i8> %a, <16 x i8> %b, i32 %0)
373 define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlashq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) {
374 ; CHECK-LABEL: test_vqrdmlashq_n_s16:
375 ; CHECK: @ %bb.0: @ %entry
376 ; CHECK-NEXT: vqrdmlash.s16 q0, q1, r0
379 %0 = zext i16 %c to i32
380 %1 = tail call <8 x i16> @llvm.arm.mve.vqrdmlash.v8i16(<8 x i16> %a, <8 x i16> %b, i32 %0)
384 define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlashq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) {
385 ; CHECK-LABEL: test_vqrdmlashq_n_s32:
386 ; CHECK: @ %bb.0: @ %entry
387 ; CHECK-NEXT: vqrdmlash.s32 q0, q1, r0
390 %0 = tail call <4 x i32> @llvm.arm.mve.vqrdmlash.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %c)
394 define arm_aapcs_vfpcc <8 x half> @test_vfmaq_m_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i16 zeroext %p) {
395 ; CHECK-LABEL: test_vfmaq_m_f16:
396 ; CHECK: @ %bb.0: @ %entry
397 ; CHECK-NEXT: vmsr p0, r0
399 ; CHECK-NEXT: vfmat.f16 q0, q1, q2
402 %0 = zext i16 %p to i32
403 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0)
404 %2 = tail call <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> %b, <8 x half> %c, <8 x half> %a, <8 x i1> %1)
408 define arm_aapcs_vfpcc <4 x float> @test_vfmaq_m_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, i16 zeroext %p) {
409 ; CHECK-LABEL: test_vfmaq_m_f32:
410 ; CHECK: @ %bb.0: @ %entry
411 ; CHECK-NEXT: vmsr p0, r0
413 ; CHECK-NEXT: vfmat.f32 q0, q1, q2
416 %0 = zext i16 %p to i32
417 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
418 %2 = tail call <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %b, <4 x float> %c, <4 x float> %a, <4 x i1> %1)
422 define arm_aapcs_vfpcc <8 x half> @test_vfmaq_m_n_f16(<8 x half> %a, <8 x half> %b, float %c.coerce, i16 zeroext %p) {
423 ; CHECK-LABEL: test_vfmaq_m_n_f16:
424 ; CHECK: @ %bb.0: @ %entry
425 ; CHECK-NEXT: vmov r1, s8
426 ; CHECK-NEXT: vmsr p0, r0
428 ; CHECK-NEXT: vfmat.f16 q0, q1, r1
431 %0 = bitcast float %c.coerce to i32
432 %tmp.0.extract.trunc = trunc i32 %0 to i16
433 %1 = bitcast i16 %tmp.0.extract.trunc to half
434 %.splatinsert = insertelement <8 x half> undef, half %1, i32 0
435 %.splat = shufflevector <8 x half> %.splatinsert, <8 x half> undef, <8 x i32> zeroinitializer
436 %2 = zext i16 %p to i32
437 %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
438 %4 = tail call <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> %b, <8 x half> %.splat, <8 x half> %a, <8 x i1> %3)
442 define arm_aapcs_vfpcc <4 x float> @test_vfmaq_m_n_f32(<4 x float> %a, <4 x float> %b, float %c, i16 zeroext %p) {
443 ; CHECK-LABEL: test_vfmaq_m_n_f32:
444 ; CHECK: @ %bb.0: @ %entry
445 ; CHECK-NEXT: vmov r1, s8
446 ; CHECK-NEXT: vmsr p0, r0
448 ; CHECK-NEXT: vfmat.f32 q0, q1, r1
451 %.splatinsert = insertelement <4 x float> undef, float %c, i32 0
452 %.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
453 %0 = zext i16 %p to i32
454 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
455 %2 = tail call <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %b, <4 x float> %.splat, <4 x float> %a, <4 x i1> %1)
459 define arm_aapcs_vfpcc <8 x half> @test_vfmasq_m_n_f16(<8 x half> %a, <8 x half> %b, float %c.coerce, i16 zeroext %p) {
460 ; CHECK-LABEL: test_vfmasq_m_n_f16:
461 ; CHECK: @ %bb.0: @ %entry
462 ; CHECK-NEXT: vmov r1, s8
463 ; CHECK-NEXT: vmsr p0, r0
464 ; CHECK-NEXT: vdup.16 q2, r1
466 ; CHECK-NEXT: vfmat.f16 q2, q0, q1
467 ; CHECK-NEXT: vmov q0, q2
470 %0 = bitcast float %c.coerce to i32
471 %tmp.0.extract.trunc = trunc i32 %0 to i16
472 %1 = bitcast i16 %tmp.0.extract.trunc to half
473 %.splatinsert = insertelement <8 x half> undef, half %1, i32 0
474 %.splat = shufflevector <8 x half> %.splatinsert, <8 x half> undef, <8 x i32> zeroinitializer
475 %2 = zext i16 %p to i32
476 %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
477 %4 = tail call <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> %a, <8 x half> %b, <8 x half> %.splat, <8 x i1> %3)
481 define arm_aapcs_vfpcc <8 x half> @test_vfmasq_m_n_f16_select(<8 x half> %a, <8 x half> %b, float %c.coerce, i16 zeroext %p) {
482 ; CHECK-LABEL: test_vfmasq_m_n_f16_select:
483 ; CHECK: @ %bb.0: @ %entry
484 ; CHECK-NEXT: vmov r1, s8
485 ; CHECK-NEXT: vmsr p0, r0
487 ; CHECK-NEXT: vfmast.f16 q0, q1, r1
490 %0 = bitcast float %c.coerce to i32
491 %tmp.0.extract.trunc = trunc i32 %0 to i16
492 %1 = bitcast i16 %tmp.0.extract.trunc to half
493 %.splatinsert = insertelement <8 x half> undef, half %1, i32 0
494 %.splat = shufflevector <8 x half> %.splatinsert, <8 x half> undef, <8 x i32> zeroinitializer
495 %2 = zext i16 %p to i32
496 %3 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %2)
497 %4 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %.splat)
498 %5 = select <8 x i1> %3, <8 x half> %4, <8 x half> %a
502 define arm_aapcs_vfpcc <4 x float> @test_vfmasq_m_n_f32(<4 x float> %a, <4 x float> %b, float %c, i16 zeroext %p) {
503 ; CHECK-LABEL: test_vfmasq_m_n_f32:
504 ; CHECK: @ %bb.0: @ %entry
505 ; CHECK-NEXT: vmov r1, s8
506 ; CHECK-NEXT: vmsr p0, r0
507 ; CHECK-NEXT: vdup.32 q2, r1
509 ; CHECK-NEXT: vfmat.f32 q2, q0, q1
510 ; CHECK-NEXT: vmov q0, q2
513 %.splatinsert = insertelement <4 x float> undef, float %c, i32 0
514 %.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
515 %0 = zext i16 %p to i32
516 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
517 %2 = tail call <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %a, <4 x float> %b, <4 x float> %.splat, <4 x i1> %1)
521 define arm_aapcs_vfpcc <4 x float> @test_vfmasq_m_n_f32_select(<4 x float> %a, <4 x float> %b, float %c, i16 zeroext %p) {
522 ; CHECK-LABEL: test_vfmasq_m_n_f32_select:
523 ; CHECK: @ %bb.0: @ %entry
524 ; CHECK-NEXT: vmov r1, s8
525 ; CHECK-NEXT: vmsr p0, r0
527 ; CHECK-NEXT: vfmast.f32 q0, q1, r1
530 %.splatinsert = insertelement <4 x float> undef, float %c, i32 0
531 %.splat = shufflevector <4 x float> %.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
532 %0 = zext i16 %p to i32
533 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
534 %2 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %.splat)
535 %3 = select <4 x i1> %1, <4 x float> %2, <4 x float> %a
539 define arm_aapcs_vfpcc <8 x half> @test_vfmsq_m_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i16 zeroext %p) {
540 ; CHECK-LABEL: test_vfmsq_m_f16:
541 ; CHECK: @ %bb.0: @ %entry
542 ; CHECK-NEXT: vmsr p0, r0
544 ; CHECK-NEXT: vfmst.f16 q0, q1, q2
547 %0 = fneg <8 x half> %c
548 %1 = zext i16 %p to i32
549 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
550 %3 = tail call <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half> %b, <8 x half> %0, <8 x half> %a, <8 x i1> %2)
554 define arm_aapcs_vfpcc <4 x float> @test_vfmsq_m_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c, i16 zeroext %p) {
555 ; CHECK-LABEL: test_vfmsq_m_f32:
556 ; CHECK: @ %bb.0: @ %entry
557 ; CHECK-NEXT: vmsr p0, r0
559 ; CHECK-NEXT: vfmst.f32 q0, q1, q2
562 %0 = fneg <4 x float> %c
563 %1 = zext i16 %p to i32
564 %2 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1)
565 %3 = tail call <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %b, <4 x float> %0, <4 x float> %a, <4 x i1> %2)
569 define arm_aapcs_vfpcc <16 x i8> @test_vmlaq_m_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c, i16 zeroext %p) {
570 ; CHECK-LABEL: test_vmlaq_m_n_s8:
571 ; CHECK: @ %bb.0: @ %entry
572 ; CHECK-NEXT: vmsr p0, r1
574 ; CHECK-NEXT: vmlat.i8 q0, q1, r0
577 %0 = zext i8 %c to i32
578 %1 = zext i16 %p to i32
579 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
580 %3 = tail call <16 x i8> @llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 %0, <16 x i1> %2)
584 define arm_aapcs_vfpcc <8 x i16> @test_vmlaq_m_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c, i16 zeroext %p) {
585 ; CHECK-LABEL: test_vmlaq_m_n_s16:
586 ; CHECK: @ %bb.0: @ %entry
587 ; CHECK-NEXT: vmsr p0, r1
589 ; CHECK-NEXT: vmlat.i16 q0, q1, r0
592 %0 = zext i16 %c to i32
593 %1 = zext i16 %p to i32
594 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
595 %3 = tail call <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 %0, <8 x i1> %2)
599 define arm_aapcs_vfpcc <4 x i32> @test_vmlaq_m_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c, i16 zeroext %p) {
600 ; CHECK-LABEL: test_vmlaq_m_n_s32:
601 ; CHECK: @ %bb.0: @ %entry
602 ; CHECK-NEXT: vmsr p0, r1
604 ; CHECK-NEXT: vmlat.i32 q0, q1, r0
607 %0 = zext i16 %p to i32
608 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
609 %2 = tail call <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %1)
613 define arm_aapcs_vfpcc <16 x i8> @test_vmlaq_m_n_u8(<16 x i8> %a, <16 x i8> %b, i8 zeroext %c, i16 zeroext %p) {
614 ; CHECK-LABEL: test_vmlaq_m_n_u8:
615 ; CHECK: @ %bb.0: @ %entry
616 ; CHECK-NEXT: vmsr p0, r1
618 ; CHECK-NEXT: vmlat.i8 q0, q1, r0
621 %0 = zext i8 %c to i32
622 %1 = zext i16 %p to i32
623 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
624 %3 = tail call <16 x i8> @llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 %0, <16 x i1> %2)
628 define arm_aapcs_vfpcc <8 x i16> @test_vmlaq_m_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c, i16 zeroext %p) {
629 ; CHECK-LABEL: test_vmlaq_m_n_u16:
630 ; CHECK: @ %bb.0: @ %entry
631 ; CHECK-NEXT: vmsr p0, r1
633 ; CHECK-NEXT: vmlat.i16 q0, q1, r0
636 %0 = zext i16 %c to i32
637 %1 = zext i16 %p to i32
638 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
639 %3 = tail call <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 %0, <8 x i1> %2)
643 define arm_aapcs_vfpcc <4 x i32> @test_vmlaq_m_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c, i16 zeroext %p) {
644 ; CHECK-LABEL: test_vmlaq_m_n_u32:
645 ; CHECK: @ %bb.0: @ %entry
646 ; CHECK-NEXT: vmsr p0, r1
648 ; CHECK-NEXT: vmlat.i32 q0, q1, r0
651 %0 = zext i16 %p to i32
652 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
653 %2 = tail call <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %1)
657 define arm_aapcs_vfpcc <16 x i8> @test_vmlasq_m_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c, i16 zeroext %p) {
658 ; CHECK-LABEL: test_vmlasq_m_n_s8:
659 ; CHECK: @ %bb.0: @ %entry
660 ; CHECK-NEXT: vmsr p0, r1
662 ; CHECK-NEXT: vmlast.i8 q0, q1, r0
665 %0 = zext i8 %c to i32
666 %1 = zext i16 %p to i32
667 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
668 %3 = tail call <16 x i8> @llvm.arm.mve.vmlas.n.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 %0, <16 x i1> %2)
672 define arm_aapcs_vfpcc <8 x i16> @test_vmlasq_m_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c, i16 zeroext %p) {
673 ; CHECK-LABEL: test_vmlasq_m_n_s16:
674 ; CHECK: @ %bb.0: @ %entry
675 ; CHECK-NEXT: vmsr p0, r1
677 ; CHECK-NEXT: vmlast.i16 q0, q1, r0
680 %0 = zext i16 %c to i32
681 %1 = zext i16 %p to i32
682 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
683 %3 = tail call <8 x i16> @llvm.arm.mve.vmlas.n.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 %0, <8 x i1> %2)
687 define arm_aapcs_vfpcc <4 x i32> @test_vmlasq_m_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c, i16 zeroext %p) {
688 ; CHECK-LABEL: test_vmlasq_m_n_s32:
689 ; CHECK: @ %bb.0: @ %entry
690 ; CHECK-NEXT: vmsr p0, r1
692 ; CHECK-NEXT: vmlast.i32 q0, q1, r0
695 %0 = zext i16 %p to i32
696 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
697 %2 = tail call <4 x i32> @llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %1)
701 define arm_aapcs_vfpcc <16 x i8> @test_vmlasq_m_n_u8(<16 x i8> %a, <16 x i8> %b, i8 zeroext %c, i16 zeroext %p) {
702 ; CHECK-LABEL: test_vmlasq_m_n_u8:
703 ; CHECK: @ %bb.0: @ %entry
704 ; CHECK-NEXT: vmsr p0, r1
706 ; CHECK-NEXT: vmlast.i8 q0, q1, r0
709 %0 = zext i8 %c to i32
710 %1 = zext i16 %p to i32
711 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
712 %3 = tail call <16 x i8> @llvm.arm.mve.vmlas.n.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 %0, <16 x i1> %2)
716 define arm_aapcs_vfpcc <8 x i16> @test_vmlasq_m_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c, i16 zeroext %p) {
717 ; CHECK-LABEL: test_vmlasq_m_n_u16:
718 ; CHECK: @ %bb.0: @ %entry
719 ; CHECK-NEXT: vmsr p0, r1
721 ; CHECK-NEXT: vmlast.i16 q0, q1, r0
724 %0 = zext i16 %c to i32
725 %1 = zext i16 %p to i32
726 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
727 %3 = tail call <8 x i16> @llvm.arm.mve.vmlas.n.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 %0, <8 x i1> %2)
731 define arm_aapcs_vfpcc <4 x i32> @test_vmlasq_m_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c, i16 zeroext %p) {
732 ; CHECK-LABEL: test_vmlasq_m_n_u32:
733 ; CHECK: @ %bb.0: @ %entry
734 ; CHECK-NEXT: vmsr p0, r1
736 ; CHECK-NEXT: vmlast.i32 q0, q1, r0
739 %0 = zext i16 %p to i32
740 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
741 %2 = tail call <4 x i32> @llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %1)
745 define arm_aapcs_vfpcc <16 x i8> @test_vqdmlahq_m_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c, i16 zeroext %p) {
746 ; CHECK-LABEL: test_vqdmlahq_m_n_s8:
747 ; CHECK: @ %bb.0: @ %entry
748 ; CHECK-NEXT: vmsr p0, r1
750 ; CHECK-NEXT: vqdmlaht.s8 q0, q1, r0
753 %0 = zext i8 %c to i32
754 %1 = zext i16 %p to i32
755 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
756 %3 = tail call <16 x i8> @llvm.arm.mve.vqdmlah.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 %0, <16 x i1> %2)
760 define arm_aapcs_vfpcc <8 x i16> @test_vqdmlahq_m_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c, i16 zeroext %p) {
761 ; CHECK-LABEL: test_vqdmlahq_m_n_s16:
762 ; CHECK: @ %bb.0: @ %entry
763 ; CHECK-NEXT: vmsr p0, r1
765 ; CHECK-NEXT: vqdmlaht.s16 q0, q1, r0
768 %0 = zext i16 %c to i32
769 %1 = zext i16 %p to i32
770 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
771 %3 = tail call <8 x i16> @llvm.arm.mve.vqdmlah.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 %0, <8 x i1> %2)
775 define arm_aapcs_vfpcc <4 x i32> @test_vqdmlahq_m_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c, i16 zeroext %p) {
776 ; CHECK-LABEL: test_vqdmlahq_m_n_s32:
777 ; CHECK: @ %bb.0: @ %entry
778 ; CHECK-NEXT: vmsr p0, r1
780 ; CHECK-NEXT: vqdmlaht.s32 q0, q1, r0
783 %0 = zext i16 %p to i32
784 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
785 %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlah.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %1)
789 define arm_aapcs_vfpcc <16 x i8> @test_vqdmlashq_m_n_s8(<16 x i8> %m1, <16 x i8> %m2, i8 signext %add, i16 zeroext %p) {
790 ; CHECK-LABEL: test_vqdmlashq_m_n_s8:
791 ; CHECK: @ %bb.0: @ %entry
792 ; CHECK-NEXT: vmsr p0, r1
794 ; CHECK-NEXT: vqdmlasht.s8 q0, q1, r0
797 %0 = zext i8 %add to i32
798 %1 = zext i16 %p to i32
799 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
800 %3 = tail call <16 x i8> @llvm.arm.mve.vqdmlash.predicated.v16i8.v16i1(<16 x i8> %m1, <16 x i8> %m2, i32 %0, <16 x i1> %2)
804 define arm_aapcs_vfpcc <8 x i16> @test_vqdmlashq_m_n_s16(<8 x i16> %m1, <8 x i16> %m2, i16 signext %add, i16 zeroext %p) {
805 ; CHECK-LABEL: test_vqdmlashq_m_n_s16:
806 ; CHECK: @ %bb.0: @ %entry
807 ; CHECK-NEXT: vmsr p0, r1
809 ; CHECK-NEXT: vqdmlasht.s16 q0, q1, r0
812 %0 = zext i16 %add to i32
813 %1 = zext i16 %p to i32
814 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
815 %3 = tail call <8 x i16> @llvm.arm.mve.vqdmlash.predicated.v8i16.v8i1(<8 x i16> %m1, <8 x i16> %m2, i32 %0, <8 x i1> %2)
819 define arm_aapcs_vfpcc <4 x i32> @test_vqdmlashq_m_n_s32(<4 x i32> %m1, <4 x i32> %m2, i32 %add, i16 zeroext %p) {
820 ; CHECK-LABEL: test_vqdmlashq_m_n_s32:
821 ; CHECK: @ %bb.0: @ %entry
822 ; CHECK-NEXT: vmsr p0, r1
824 ; CHECK-NEXT: vqdmlasht.s32 q0, q1, r0
827 %0 = zext i16 %p to i32
828 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
829 %2 = tail call <4 x i32> @llvm.arm.mve.vqdmlash.predicated.v4i32.v4i1(<4 x i32> %m1, <4 x i32> %m2, i32 %add, <4 x i1> %1)
833 define arm_aapcs_vfpcc <16 x i8> @test_vqrdmlahq_m_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c, i16 zeroext %p) {
834 ; CHECK-LABEL: test_vqrdmlahq_m_n_s8:
835 ; CHECK: @ %bb.0: @ %entry
836 ; CHECK-NEXT: vmsr p0, r1
838 ; CHECK-NEXT: vqrdmlaht.s8 q0, q1, r0
841 %0 = zext i8 %c to i32
842 %1 = zext i16 %p to i32
843 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
844 %3 = tail call <16 x i8> @llvm.arm.mve.vqrdmlah.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 %0, <16 x i1> %2)
848 define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlahq_m_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c, i16 zeroext %p) {
849 ; CHECK-LABEL: test_vqrdmlahq_m_n_s16:
850 ; CHECK: @ %bb.0: @ %entry
851 ; CHECK-NEXT: vmsr p0, r1
853 ; CHECK-NEXT: vqrdmlaht.s16 q0, q1, r0
856 %0 = zext i16 %c to i32
857 %1 = zext i16 %p to i32
858 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
859 %3 = tail call <8 x i16> @llvm.arm.mve.vqrdmlah.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 %0, <8 x i1> %2)
863 define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlahq_m_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c, i16 zeroext %p) {
864 ; CHECK-LABEL: test_vqrdmlahq_m_n_s32:
865 ; CHECK: @ %bb.0: @ %entry
866 ; CHECK-NEXT: vmsr p0, r1
868 ; CHECK-NEXT: vqrdmlaht.s32 q0, q1, r0
871 %0 = zext i16 %p to i32
872 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
873 %2 = tail call <4 x i32> @llvm.arm.mve.vqrdmlah.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %1)
877 define arm_aapcs_vfpcc <16 x i8> @test_vqrdmlashq_m_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c, i16 zeroext %p) {
878 ; CHECK-LABEL: test_vqrdmlashq_m_n_s8:
879 ; CHECK: @ %bb.0: @ %entry
880 ; CHECK-NEXT: vmsr p0, r1
882 ; CHECK-NEXT: vqrdmlasht.s8 q0, q1, r0
885 %0 = zext i8 %c to i32
886 %1 = zext i16 %p to i32
887 %2 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %1)
888 %3 = tail call <16 x i8> @llvm.arm.mve.vqrdmlash.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 %0, <16 x i1> %2)
892 define arm_aapcs_vfpcc <8 x i16> @test_vqrdmlashq_m_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c, i16 zeroext %p) {
893 ; CHECK-LABEL: test_vqrdmlashq_m_n_s16:
894 ; CHECK: @ %bb.0: @ %entry
895 ; CHECK-NEXT: vmsr p0, r1
897 ; CHECK-NEXT: vqrdmlasht.s16 q0, q1, r0
900 %0 = zext i16 %c to i32
901 %1 = zext i16 %p to i32
902 %2 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1)
903 %3 = tail call <8 x i16> @llvm.arm.mve.vqrdmlash.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 %0, <8 x i1> %2)
907 define arm_aapcs_vfpcc <4 x i32> @test_vqrdmlashq_m_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c, i16 zeroext %p) {
908 ; CHECK-LABEL: test_vqrdmlashq_m_n_s32:
909 ; CHECK: @ %bb.0: @ %entry
910 ; CHECK-NEXT: vmsr p0, r1
912 ; CHECK-NEXT: vqrdmlasht.s32 q0, q1, r0
915 %0 = zext i16 %p to i32
916 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0)
917 %2 = tail call <4 x i32> @llvm.arm.mve.vqrdmlash.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c, <4 x i1> %1)
921 declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32)
922 declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32)
923 declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)
925 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
926 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
927 declare <8 x half> @llvm.arm.mve.fma.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x half>, <8 x i1>)
928 declare <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x float>, <4 x i1>)
929 declare <16 x i8> @llvm.arm.mve.vmla.n.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>)
930 declare <8 x i16> @llvm.arm.mve.vmla.n.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>)
931 declare <4 x i32> @llvm.arm.mve.vmla.n.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>)
932 declare <16 x i8> @llvm.arm.mve.vmlas.n.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>)
933 declare <8 x i16> @llvm.arm.mve.vmlas.n.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>)
934 declare <4 x i32> @llvm.arm.mve.vmlas.n.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>)
935 declare <16 x i8> @llvm.arm.mve.vqdmlah.v16i8(<16 x i8>, <16 x i8>, i32)
936 declare <8 x i16> @llvm.arm.mve.vqdmlah.v8i16(<8 x i16>, <8 x i16>, i32)
937 declare <4 x i32> @llvm.arm.mve.vqdmlah.v4i32(<4 x i32>, <4 x i32>, i32)
938 declare <16 x i8> @llvm.arm.mve.vqdmlash.v16i8(<16 x i8>, <16 x i8>, i32)
939 declare <8 x i16> @llvm.arm.mve.vqdmlash.v8i16(<8 x i16>, <8 x i16>, i32)
940 declare <4 x i32> @llvm.arm.mve.vqdmlash.v4i32(<4 x i32>, <4 x i32>, i32)
941 declare <16 x i8> @llvm.arm.mve.vqrdmlah.v16i8(<16 x i8>, <16 x i8>, i32)
942 declare <8 x i16> @llvm.arm.mve.vqrdmlah.v8i16(<8 x i16>, <8 x i16>, i32)
943 declare <4 x i32> @llvm.arm.mve.vqrdmlah.v4i32(<4 x i32>, <4 x i32>, i32)
944 declare <16 x i8> @llvm.arm.mve.vqrdmlash.v16i8(<16 x i8>, <16 x i8>, i32)
945 declare <8 x i16> @llvm.arm.mve.vqrdmlash.v8i16(<8 x i16>, <8 x i16>, i32)
946 declare <4 x i32> @llvm.arm.mve.vqrdmlash.v4i32(<4 x i32>, <4 x i32>, i32)
947 declare <16 x i8> @llvm.arm.mve.vqdmlah.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>)
948 declare <8 x i16> @llvm.arm.mve.vqdmlah.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>)
949 declare <4 x i32> @llvm.arm.mve.vqdmlah.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>)
950 declare <16 x i8> @llvm.arm.mve.vqdmlash.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>)
951 declare <8 x i16> @llvm.arm.mve.vqdmlash.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>)
952 declare <4 x i32> @llvm.arm.mve.vqdmlash.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>)
953 declare <16 x i8> @llvm.arm.mve.vqrdmlah.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>)
954 declare <8 x i16> @llvm.arm.mve.vqrdmlah.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>)
955 declare <4 x i32> @llvm.arm.mve.vqrdmlah.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>)
956 declare <16 x i8> @llvm.arm.mve.vqrdmlash.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>)
957 declare <8 x i16> @llvm.arm.mve.vqrdmlash.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>)
958 declare <4 x i32> @llvm.arm.mve.vqrdmlash.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>)