1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi, -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-MVE-FP
3 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi, -mattr=+mve.fp -fp-contract=fast -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-MVE-VMLA
4 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-MVE
6 define arm_aapcs_vfpcc <8 x half> @vfma16_v1(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) {
7 ; CHECK-MVE-FP-LABEL: vfma16_v1:
8 ; CHECK-MVE-FP: @ %bb.0: @ %entry
9 ; CHECK-MVE-FP-NEXT: vmul.f16 q1, q1, q2
10 ; CHECK-MVE-FP-NEXT: vadd.f16 q0, q0, q1
11 ; CHECK-MVE-FP-NEXT: bx lr
13 ; CHECK-MVE-VMLA-LABEL: vfma16_v1:
14 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
15 ; CHECK-MVE-VMLA-NEXT: vfma.f16 q0, q1, q2
16 ; CHECK-MVE-VMLA-NEXT: bx lr
18 ; CHECK-MVE-LABEL: vfma16_v1:
19 ; CHECK-MVE: @ %bb.0: @ %entry
20 ; CHECK-MVE-NEXT: vmovx.f16 s13, s0
21 ; CHECK-MVE-NEXT: vmovx.f16 s12, s8
22 ; CHECK-MVE-NEXT: vmovx.f16 s14, s4
23 ; CHECK-MVE-NEXT: vmla.f16 s0, s4, s8
24 ; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12
25 ; CHECK-MVE-NEXT: vmovx.f16 s12, s1
26 ; CHECK-MVE-NEXT: vmovx.f16 s4, s9
27 ; CHECK-MVE-NEXT: vmovx.f16 s8, s5
28 ; CHECK-MVE-NEXT: vmla.f16 s12, s8, s4
29 ; CHECK-MVE-NEXT: vmla.f16 s1, s5, s9
30 ; CHECK-MVE-NEXT: vins.f16 s1, s12
31 ; CHECK-MVE-NEXT: vmovx.f16 s12, s2
32 ; CHECK-MVE-NEXT: vmovx.f16 s4, s10
33 ; CHECK-MVE-NEXT: vmovx.f16 s8, s6
34 ; CHECK-MVE-NEXT: vmla.f16 s12, s8, s4
35 ; CHECK-MVE-NEXT: vmla.f16 s2, s6, s10
36 ; CHECK-MVE-NEXT: vmovx.f16 s8, s3
37 ; CHECK-MVE-NEXT: vmovx.f16 s4, s11
38 ; CHECK-MVE-NEXT: vmovx.f16 s6, s7
39 ; CHECK-MVE-NEXT: vmla.f16 s3, s7, s11
40 ; CHECK-MVE-NEXT: vmla.f16 s8, s6, s4
41 ; CHECK-MVE-NEXT: vins.f16 s0, s13
42 ; CHECK-MVE-NEXT: vins.f16 s2, s12
43 ; CHECK-MVE-NEXT: vins.f16 s3, s8
44 ; CHECK-MVE-NEXT: bx lr
46 %0 = fmul <8 x half> %src2, %src3
47 %1 = fadd <8 x half> %src1, %0
51 define arm_aapcs_vfpcc <8 x half> @vfma16_v2(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) {
52 ; CHECK-MVE-FP-LABEL: vfma16_v2:
53 ; CHECK-MVE-FP: @ %bb.0: @ %entry
54 ; CHECK-MVE-FP-NEXT: vmul.f16 q1, q1, q2
55 ; CHECK-MVE-FP-NEXT: vadd.f16 q0, q1, q0
56 ; CHECK-MVE-FP-NEXT: bx lr
58 ; CHECK-MVE-VMLA-LABEL: vfma16_v2:
59 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
60 ; CHECK-MVE-VMLA-NEXT: vfma.f16 q0, q1, q2
61 ; CHECK-MVE-VMLA-NEXT: bx lr
63 ; CHECK-MVE-LABEL: vfma16_v2:
64 ; CHECK-MVE: @ %bb.0: @ %entry
65 ; CHECK-MVE-NEXT: vmovx.f16 s13, s0
66 ; CHECK-MVE-NEXT: vmovx.f16 s12, s8
67 ; CHECK-MVE-NEXT: vmovx.f16 s14, s4
68 ; CHECK-MVE-NEXT: vmla.f16 s0, s4, s8
69 ; CHECK-MVE-NEXT: vmla.f16 s13, s14, s12
70 ; CHECK-MVE-NEXT: vmovx.f16 s12, s1
71 ; CHECK-MVE-NEXT: vmovx.f16 s4, s9
72 ; CHECK-MVE-NEXT: vmovx.f16 s8, s5
73 ; CHECK-MVE-NEXT: vmla.f16 s12, s8, s4
74 ; CHECK-MVE-NEXT: vmla.f16 s1, s5, s9
75 ; CHECK-MVE-NEXT: vins.f16 s1, s12
76 ; CHECK-MVE-NEXT: vmovx.f16 s12, s2
77 ; CHECK-MVE-NEXT: vmovx.f16 s4, s10
78 ; CHECK-MVE-NEXT: vmovx.f16 s8, s6
79 ; CHECK-MVE-NEXT: vmla.f16 s12, s8, s4
80 ; CHECK-MVE-NEXT: vmla.f16 s2, s6, s10
81 ; CHECK-MVE-NEXT: vmovx.f16 s8, s3
82 ; CHECK-MVE-NEXT: vmovx.f16 s4, s11
83 ; CHECK-MVE-NEXT: vmovx.f16 s6, s7
84 ; CHECK-MVE-NEXT: vmla.f16 s3, s7, s11
85 ; CHECK-MVE-NEXT: vmla.f16 s8, s6, s4
86 ; CHECK-MVE-NEXT: vins.f16 s0, s13
87 ; CHECK-MVE-NEXT: vins.f16 s2, s12
88 ; CHECK-MVE-NEXT: vins.f16 s3, s8
89 ; CHECK-MVE-NEXT: bx lr
91 %0 = fmul <8 x half> %src2, %src3
92 %1 = fadd <8 x half> %0, %src1
96 define arm_aapcs_vfpcc <8 x half> @vfms16(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) {
97 ; CHECK-MVE-FP-LABEL: vfms16:
98 ; CHECK-MVE-FP: @ %bb.0: @ %entry
99 ; CHECK-MVE-FP-NEXT: vmul.f16 q1, q1, q2
100 ; CHECK-MVE-FP-NEXT: vsub.f16 q0, q0, q1
101 ; CHECK-MVE-FP-NEXT: bx lr
103 ; CHECK-MVE-VMLA-LABEL: vfms16:
104 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
105 ; CHECK-MVE-VMLA-NEXT: vfms.f16 q0, q1, q2
106 ; CHECK-MVE-VMLA-NEXT: bx lr
108 ; CHECK-MVE-LABEL: vfms16:
109 ; CHECK-MVE: @ %bb.0: @ %entry
110 ; CHECK-MVE-NEXT: vmovx.f16 s13, s0
111 ; CHECK-MVE-NEXT: vmovx.f16 s12, s8
112 ; CHECK-MVE-NEXT: vmovx.f16 s14, s4
113 ; CHECK-MVE-NEXT: vmls.f16 s0, s4, s8
114 ; CHECK-MVE-NEXT: vmls.f16 s13, s14, s12
115 ; CHECK-MVE-NEXT: vmovx.f16 s12, s1
116 ; CHECK-MVE-NEXT: vmovx.f16 s4, s9
117 ; CHECK-MVE-NEXT: vmovx.f16 s8, s5
118 ; CHECK-MVE-NEXT: vmls.f16 s12, s8, s4
119 ; CHECK-MVE-NEXT: vmls.f16 s1, s5, s9
120 ; CHECK-MVE-NEXT: vins.f16 s1, s12
121 ; CHECK-MVE-NEXT: vmovx.f16 s12, s2
122 ; CHECK-MVE-NEXT: vmovx.f16 s4, s10
123 ; CHECK-MVE-NEXT: vmovx.f16 s8, s6
124 ; CHECK-MVE-NEXT: vmls.f16 s12, s8, s4
125 ; CHECK-MVE-NEXT: vmls.f16 s2, s6, s10
126 ; CHECK-MVE-NEXT: vmovx.f16 s8, s3
127 ; CHECK-MVE-NEXT: vmovx.f16 s4, s11
128 ; CHECK-MVE-NEXT: vmovx.f16 s6, s7
129 ; CHECK-MVE-NEXT: vmls.f16 s3, s7, s11
130 ; CHECK-MVE-NEXT: vmls.f16 s8, s6, s4
131 ; CHECK-MVE-NEXT: vins.f16 s0, s13
132 ; CHECK-MVE-NEXT: vins.f16 s2, s12
133 ; CHECK-MVE-NEXT: vins.f16 s3, s8
134 ; CHECK-MVE-NEXT: bx lr
136 %0 = fmul <8 x half> %src2, %src3
137 %1 = fsub <8 x half> %src1, %0
141 define arm_aapcs_vfpcc <8 x half> @vfmar16(<8 x half> %src1, <8 x half> %src2, float %src3o) {
142 ; CHECK-MVE-FP-LABEL: vfmar16:
143 ; CHECK-MVE-FP: @ %bb.0: @ %entry
144 ; CHECK-MVE-FP-NEXT: vcvtb.f16.f32 s8, s8
145 ; CHECK-MVE-FP-NEXT: vmov.f16 r0, s8
146 ; CHECK-MVE-FP-NEXT: vmul.f16 q1, q1, r0
147 ; CHECK-MVE-FP-NEXT: vadd.f16 q0, q0, q1
148 ; CHECK-MVE-FP-NEXT: bx lr
150 ; CHECK-MVE-VMLA-LABEL: vfmar16:
151 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
152 ; CHECK-MVE-VMLA-NEXT: vcvtb.f16.f32 s8, s8
153 ; CHECK-MVE-VMLA-NEXT: vmov.f16 r0, s8
154 ; CHECK-MVE-VMLA-NEXT: vfma.f16 q0, q1, r0
155 ; CHECK-MVE-VMLA-NEXT: bx lr
157 ; CHECK-MVE-LABEL: vfmar16:
158 ; CHECK-MVE: @ %bb.0: @ %entry
159 ; CHECK-MVE-NEXT: vcvtb.f16.f32 s8, s8
160 ; CHECK-MVE-NEXT: vmovx.f16 s12, s0
161 ; CHECK-MVE-NEXT: vmovx.f16 s10, s4
162 ; CHECK-MVE-NEXT: vmla.f16 s0, s4, s8
163 ; CHECK-MVE-NEXT: vmla.f16 s12, s10, s8
164 ; CHECK-MVE-NEXT: vmovx.f16 s10, s1
165 ; CHECK-MVE-NEXT: vmovx.f16 s4, s5
166 ; CHECK-MVE-NEXT: vmla.f16 s1, s5, s8
167 ; CHECK-MVE-NEXT: vmla.f16 s10, s4, s8
168 ; CHECK-MVE-NEXT: vmovx.f16 s4, s6
169 ; CHECK-MVE-NEXT: vins.f16 s1, s10
170 ; CHECK-MVE-NEXT: vmovx.f16 s10, s2
171 ; CHECK-MVE-NEXT: vmla.f16 s10, s4, s8
172 ; CHECK-MVE-NEXT: vmla.f16 s2, s6, s8
173 ; CHECK-MVE-NEXT: vmovx.f16 s6, s3
174 ; CHECK-MVE-NEXT: vmovx.f16 s4, s7
175 ; CHECK-MVE-NEXT: vmla.f16 s6, s4, s8
176 ; CHECK-MVE-NEXT: vmla.f16 s3, s7, s8
177 ; CHECK-MVE-NEXT: vins.f16 s0, s12
178 ; CHECK-MVE-NEXT: vins.f16 s2, s10
179 ; CHECK-MVE-NEXT: vins.f16 s3, s6
180 ; CHECK-MVE-NEXT: bx lr
182 %src3 = fptrunc float %src3o to half
183 %i = insertelement <8 x half> undef, half %src3, i32 0
184 %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
185 %0 = fmul <8 x half> %src2, %sp
186 %1 = fadd <8 x half> %src1, %0
190 define arm_aapcs_vfpcc <8 x half> @vfma16(<8 x half> %src1, <8 x half> %src2, float %src3o) {
191 ; CHECK-MVE-FP-LABEL: vfma16:
192 ; CHECK-MVE-FP: @ %bb.0: @ %entry
193 ; CHECK-MVE-FP-NEXT: vcvtb.f16.f32 s8, s8
194 ; CHECK-MVE-FP-NEXT: vmul.f16 q0, q0, q1
195 ; CHECK-MVE-FP-NEXT: vmov.f16 r0, s8
196 ; CHECK-MVE-FP-NEXT: vadd.f16 q0, q0, r0
197 ; CHECK-MVE-FP-NEXT: bx lr
199 ; CHECK-MVE-VMLA-LABEL: vfma16:
200 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
201 ; CHECK-MVE-VMLA-NEXT: vcvtb.f16.f32 s8, s8
202 ; CHECK-MVE-VMLA-NEXT: vmov.f16 r0, s8
203 ; CHECK-MVE-VMLA-NEXT: vfmas.f16 q0, q1, r0
204 ; CHECK-MVE-VMLA-NEXT: bx lr
206 ; CHECK-MVE-LABEL: vfma16:
207 ; CHECK-MVE: @ %bb.0: @ %entry
208 ; CHECK-MVE-NEXT: vmov q3, q0
209 ; CHECK-MVE-NEXT: vcvtb.f16.f32 s3, s8
210 ; CHECK-MVE-NEXT: vmovx.f16 s0, s4
211 ; CHECK-MVE-NEXT: vmovx.f16 s2, s12
212 ; CHECK-MVE-NEXT: vmov.f32 s8, s3
213 ; CHECK-MVE-NEXT: vmla.f16 s8, s2, s0
214 ; CHECK-MVE-NEXT: vmov.f32 s0, s3
215 ; CHECK-MVE-NEXT: vmla.f16 s0, s12, s4
216 ; CHECK-MVE-NEXT: vmov.f32 s1, s3
217 ; CHECK-MVE-NEXT: vins.f16 s0, s8
218 ; CHECK-MVE-NEXT: vmovx.f16 s2, s5
219 ; CHECK-MVE-NEXT: vmovx.f16 s4, s13
220 ; CHECK-MVE-NEXT: vmov.f32 s8, s3
221 ; CHECK-MVE-NEXT: vmla.f16 s8, s4, s2
222 ; CHECK-MVE-NEXT: vmla.f16 s1, s13, s5
223 ; CHECK-MVE-NEXT: vins.f16 s1, s8
224 ; CHECK-MVE-NEXT: vmovx.f16 s2, s6
225 ; CHECK-MVE-NEXT: vmovx.f16 s4, s14
226 ; CHECK-MVE-NEXT: vmov.f32 s8, s3
227 ; CHECK-MVE-NEXT: vmla.f16 s8, s4, s2
228 ; CHECK-MVE-NEXT: vmov.f32 s2, s3
229 ; CHECK-MVE-NEXT: vmla.f16 s2, s14, s6
230 ; CHECK-MVE-NEXT: vmovx.f16 s4, s7
231 ; CHECK-MVE-NEXT: vins.f16 s2, s8
232 ; CHECK-MVE-NEXT: vmov.f32 s8, s3
233 ; CHECK-MVE-NEXT: vmovx.f16 s6, s15
234 ; CHECK-MVE-NEXT: vmla.f16 s3, s15, s7
235 ; CHECK-MVE-NEXT: vmla.f16 s8, s6, s4
236 ; CHECK-MVE-NEXT: vins.f16 s3, s8
237 ; CHECK-MVE-NEXT: bx lr
239 %src3 = fptrunc float %src3o to half
240 %i = insertelement <8 x half> undef, half %src3, i32 0
241 %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
242 %0 = fmul <8 x half> %src1, %src2
243 %1 = fadd <8 x half> %sp, %0
247 define arm_aapcs_vfpcc <4 x float> @vfma32_v1(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) {
248 ; CHECK-MVE-FP-LABEL: vfma32_v1:
249 ; CHECK-MVE-FP: @ %bb.0: @ %entry
250 ; CHECK-MVE-FP-NEXT: vmul.f32 q1, q1, q2
251 ; CHECK-MVE-FP-NEXT: vadd.f32 q0, q0, q1
252 ; CHECK-MVE-FP-NEXT: bx lr
254 ; CHECK-MVE-VMLA-LABEL: vfma32_v1:
255 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
256 ; CHECK-MVE-VMLA-NEXT: vfma.f32 q0, q1, q2
257 ; CHECK-MVE-VMLA-NEXT: bx lr
259 ; CHECK-MVE-LABEL: vfma32_v1:
260 ; CHECK-MVE: @ %bb.0: @ %entry
261 ; CHECK-MVE-NEXT: vmla.f32 s3, s7, s11
262 ; CHECK-MVE-NEXT: vmla.f32 s2, s6, s10
263 ; CHECK-MVE-NEXT: vmla.f32 s1, s5, s9
264 ; CHECK-MVE-NEXT: vmla.f32 s0, s4, s8
265 ; CHECK-MVE-NEXT: bx lr
267 %0 = fmul <4 x float> %src2, %src3
268 %1 = fadd <4 x float> %src1, %0
272 define arm_aapcs_vfpcc <4 x float> @vfma32_v2(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) {
273 ; CHECK-MVE-FP-LABEL: vfma32_v2:
274 ; CHECK-MVE-FP: @ %bb.0: @ %entry
275 ; CHECK-MVE-FP-NEXT: vmul.f32 q1, q1, q2
276 ; CHECK-MVE-FP-NEXT: vadd.f32 q0, q1, q0
277 ; CHECK-MVE-FP-NEXT: bx lr
279 ; CHECK-MVE-VMLA-LABEL: vfma32_v2:
280 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
281 ; CHECK-MVE-VMLA-NEXT: vfma.f32 q0, q1, q2
282 ; CHECK-MVE-VMLA-NEXT: bx lr
284 ; CHECK-MVE-LABEL: vfma32_v2:
285 ; CHECK-MVE: @ %bb.0: @ %entry
286 ; CHECK-MVE-NEXT: vmla.f32 s3, s7, s11
287 ; CHECK-MVE-NEXT: vmla.f32 s2, s6, s10
288 ; CHECK-MVE-NEXT: vmla.f32 s1, s5, s9
289 ; CHECK-MVE-NEXT: vmla.f32 s0, s4, s8
290 ; CHECK-MVE-NEXT: bx lr
292 %0 = fmul <4 x float> %src2, %src3
293 %1 = fadd <4 x float> %0, %src1
297 define arm_aapcs_vfpcc <4 x float> @vfms32(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) {
298 ; CHECK-MVE-FP-LABEL: vfms32:
299 ; CHECK-MVE-FP: @ %bb.0: @ %entry
300 ; CHECK-MVE-FP-NEXT: vmul.f32 q1, q1, q2
301 ; CHECK-MVE-FP-NEXT: vsub.f32 q0, q0, q1
302 ; CHECK-MVE-FP-NEXT: bx lr
304 ; CHECK-MVE-VMLA-LABEL: vfms32:
305 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
306 ; CHECK-MVE-VMLA-NEXT: vfms.f32 q0, q1, q2
307 ; CHECK-MVE-VMLA-NEXT: bx lr
309 ; CHECK-MVE-LABEL: vfms32:
310 ; CHECK-MVE: @ %bb.0: @ %entry
311 ; CHECK-MVE-NEXT: vmls.f32 s3, s7, s11
312 ; CHECK-MVE-NEXT: vmls.f32 s2, s6, s10
313 ; CHECK-MVE-NEXT: vmls.f32 s1, s5, s9
314 ; CHECK-MVE-NEXT: vmls.f32 s0, s4, s8
315 ; CHECK-MVE-NEXT: bx lr
317 %0 = fmul <4 x float> %src2, %src3
318 %1 = fsub <4 x float> %src1, %0
322 define arm_aapcs_vfpcc <4 x float> @vfmar32(<4 x float> %src1, <4 x float> %src2, float %src3) {
323 ; CHECK-MVE-FP-LABEL: vfmar32:
324 ; CHECK-MVE-FP: @ %bb.0: @ %entry
325 ; CHECK-MVE-FP-NEXT: vmov r0, s8
326 ; CHECK-MVE-FP-NEXT: vmul.f32 q1, q1, r0
327 ; CHECK-MVE-FP-NEXT: vadd.f32 q0, q0, q1
328 ; CHECK-MVE-FP-NEXT: bx lr
330 ; CHECK-MVE-VMLA-LABEL: vfmar32:
331 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
332 ; CHECK-MVE-VMLA-NEXT: vmov r0, s8
333 ; CHECK-MVE-VMLA-NEXT: vfma.f32 q0, q1, r0
334 ; CHECK-MVE-VMLA-NEXT: bx lr
336 ; CHECK-MVE-LABEL: vfmar32:
337 ; CHECK-MVE: @ %bb.0: @ %entry
338 ; CHECK-MVE-NEXT: vmla.f32 s3, s7, s8
339 ; CHECK-MVE-NEXT: vmla.f32 s2, s6, s8
340 ; CHECK-MVE-NEXT: vmla.f32 s1, s5, s8
341 ; CHECK-MVE-NEXT: vmla.f32 s0, s4, s8
342 ; CHECK-MVE-NEXT: bx lr
344 %i = insertelement <4 x float> undef, float %src3, i32 0
345 %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
346 %0 = fmul <4 x float> %src2, %sp
347 %1 = fadd <4 x float> %src1, %0
351 define arm_aapcs_vfpcc <4 x float> @vfmas32(<4 x float> %src1, <4 x float> %src2, float %src3) {
352 ; CHECK-MVE-FP-LABEL: vfmas32:
353 ; CHECK-MVE-FP: @ %bb.0: @ %entry
354 ; CHECK-MVE-FP-NEXT: vmov r0, s8
355 ; CHECK-MVE-FP-NEXT: vmul.f32 q0, q0, q1
356 ; CHECK-MVE-FP-NEXT: vadd.f32 q0, q0, r0
357 ; CHECK-MVE-FP-NEXT: bx lr
359 ; CHECK-MVE-VMLA-LABEL: vfmas32:
360 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
361 ; CHECK-MVE-VMLA-NEXT: vmov r0, s8
362 ; CHECK-MVE-VMLA-NEXT: vfmas.f32 q0, q1, r0
363 ; CHECK-MVE-VMLA-NEXT: bx lr
365 ; CHECK-MVE-LABEL: vfmas32:
366 ; CHECK-MVE: @ %bb.0: @ %entry
367 ; CHECK-MVE-NEXT: vmov.f32 s11, s8
368 ; CHECK-MVE-NEXT: vmov.f32 s10, s8
369 ; CHECK-MVE-NEXT: vmov.f32 s9, s8
370 ; CHECK-MVE-NEXT: vmla.f32 s8, s0, s4
371 ; CHECK-MVE-NEXT: vmla.f32 s11, s3, s7
372 ; CHECK-MVE-NEXT: vmla.f32 s10, s2, s6
373 ; CHECK-MVE-NEXT: vmla.f32 s9, s1, s5
374 ; CHECK-MVE-NEXT: vmov q0, q2
375 ; CHECK-MVE-NEXT: bx lr
377 %i = insertelement <4 x float> undef, float %src3, i32 0
378 %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
379 %0 = fmul <4 x float> %src1, %src2
380 %1 = fadd <4 x float> %sp, %0
385 ; Predicated version of the same tests
387 define arm_aapcs_vfpcc <8 x half> @vfma16_v1_pred(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) {
388 ; CHECK-MVE-FP-LABEL: vfma16_v1_pred:
389 ; CHECK-MVE-FP: @ %bb.0: @ %entry
390 ; CHECK-MVE-FP-NEXT: vmul.f16 q2, q1, q2
391 ; CHECK-MVE-FP-NEXT: vpt.f16 lt, q1, zr
392 ; CHECK-MVE-FP-NEXT: vaddt.f16 q0, q0, q2
393 ; CHECK-MVE-FP-NEXT: bx lr
395 ; CHECK-MVE-VMLA-LABEL: vfma16_v1_pred:
396 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
397 ; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr
398 ; CHECK-MVE-VMLA-NEXT: vfmat.f16 q0, q1, q2
399 ; CHECK-MVE-VMLA-NEXT: bx lr
401 ; CHECK-MVE-LABEL: vfma16_v1_pred:
402 ; CHECK-MVE: @ %bb.0: @ %entry
403 ; CHECK-MVE-NEXT: vmovx.f16 s14, s4
404 ; CHECK-MVE-NEXT: vmovx.f16 s13, s0
405 ; CHECK-MVE-NEXT: vcmp.f16 s14, #0
406 ; CHECK-MVE-NEXT: vmovx.f16 s12, s8
407 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
408 ; CHECK-MVE-NEXT: vmov.f32 s15, s13
409 ; CHECK-MVE-NEXT: vmla.f16 s15, s14, s12
410 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0
411 ; CHECK-MVE-NEXT: vmov.f32 s14, s0
412 ; CHECK-MVE-NEXT: vmla.f16 s14, s4, s8
413 ; CHECK-MVE-NEXT: vmovx.f16 s8, s5
414 ; CHECK-MVE-NEXT: vmovx.f16 s4, s9
415 ; CHECK-MVE-NEXT: cset r0, mi
416 ; CHECK-MVE-NEXT: cmp r0, #0
417 ; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s15
418 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
419 ; CHECK-MVE-NEXT: vcmp.f16 s8, #0
420 ; CHECK-MVE-NEXT: cset r0, mi
421 ; CHECK-MVE-NEXT: cmp r0, #0
422 ; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s14
423 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
424 ; CHECK-MVE-NEXT: vins.f16 s0, s12
425 ; CHECK-MVE-NEXT: vmovx.f16 s12, s1
426 ; CHECK-MVE-NEXT: vmov.f32 s14, s12
427 ; CHECK-MVE-NEXT: vcmp.f16 s5, #0
428 ; CHECK-MVE-NEXT: vmla.f16 s14, s8, s4
429 ; CHECK-MVE-NEXT: vmov.f32 s8, s1
430 ; CHECK-MVE-NEXT: vmla.f16 s8, s5, s9
431 ; CHECK-MVE-NEXT: cset r0, mi
432 ; CHECK-MVE-NEXT: cmp r0, #0
433 ; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14
434 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
435 ; CHECK-MVE-NEXT: vmovx.f16 s12, s2
436 ; CHECK-MVE-NEXT: vmov.f32 s14, s12
437 ; CHECK-MVE-NEXT: cset r0, mi
438 ; CHECK-MVE-NEXT: cmp r0, #0
439 ; CHECK-MVE-NEXT: vseleq.f16 s1, s1, s8
440 ; CHECK-MVE-NEXT: vmovx.f16 s8, s6
441 ; CHECK-MVE-NEXT: vcmp.f16 s8, #0
442 ; CHECK-MVE-NEXT: vins.f16 s1, s4
443 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
444 ; CHECK-MVE-NEXT: vmovx.f16 s4, s10
445 ; CHECK-MVE-NEXT: vmla.f16 s14, s8, s4
446 ; CHECK-MVE-NEXT: vcmp.f16 s6, #0
447 ; CHECK-MVE-NEXT: vmov.f32 s8, s2
448 ; CHECK-MVE-NEXT: vmla.f16 s8, s6, s10
449 ; CHECK-MVE-NEXT: vmovx.f16 s6, s7
450 ; CHECK-MVE-NEXT: cset r0, mi
451 ; CHECK-MVE-NEXT: cmp r0, #0
452 ; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14
453 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
454 ; CHECK-MVE-NEXT: vcmp.f16 s6, #0
455 ; CHECK-MVE-NEXT: cset r0, mi
456 ; CHECK-MVE-NEXT: cmp r0, #0
457 ; CHECK-MVE-NEXT: vseleq.f16 s2, s2, s8
458 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
459 ; CHECK-MVE-NEXT: vmovx.f16 s8, s3
460 ; CHECK-MVE-NEXT: vins.f16 s2, s4
461 ; CHECK-MVE-NEXT: vmovx.f16 s4, s11
462 ; CHECK-MVE-NEXT: vmov.f32 s10, s8
463 ; CHECK-MVE-NEXT: vmla.f16 s10, s6, s4
464 ; CHECK-MVE-NEXT: vcmp.f16 s7, #0
465 ; CHECK-MVE-NEXT: vmov.f32 s6, s3
466 ; CHECK-MVE-NEXT: vmla.f16 s6, s7, s11
467 ; CHECK-MVE-NEXT: cset r0, mi
468 ; CHECK-MVE-NEXT: cmp r0, #0
469 ; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s10
470 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
471 ; CHECK-MVE-NEXT: cset r0, mi
472 ; CHECK-MVE-NEXT: cmp r0, #0
473 ; CHECK-MVE-NEXT: vseleq.f16 s3, s3, s6
474 ; CHECK-MVE-NEXT: vins.f16 s3, s4
475 ; CHECK-MVE-NEXT: bx lr
477 %0 = fmul <8 x half> %src2, %src3
478 %1 = fadd <8 x half> %src1, %0
479 %c = fcmp olt <8 x half> %src2, zeroinitializer
480 %s = select <8 x i1> %c, <8 x half> %1, <8 x half> %src1
484 define arm_aapcs_vfpcc <8 x half> @vfma16_v2_pred(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) {
485 ; CHECK-MVE-FP-LABEL: vfma16_v2_pred:
486 ; CHECK-MVE-FP: @ %bb.0: @ %entry
487 ; CHECK-MVE-FP-NEXT: vmul.f16 q2, q1, q2
488 ; CHECK-MVE-FP-NEXT: vpt.f16 lt, q1, zr
489 ; CHECK-MVE-FP-NEXT: vaddt.f16 q0, q2, q0
490 ; CHECK-MVE-FP-NEXT: bx lr
492 ; CHECK-MVE-VMLA-LABEL: vfma16_v2_pred:
493 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
494 ; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr
495 ; CHECK-MVE-VMLA-NEXT: vfmat.f16 q0, q1, q2
496 ; CHECK-MVE-VMLA-NEXT: bx lr
498 ; CHECK-MVE-LABEL: vfma16_v2_pred:
499 ; CHECK-MVE: @ %bb.0: @ %entry
500 ; CHECK-MVE-NEXT: vmovx.f16 s14, s4
501 ; CHECK-MVE-NEXT: vmovx.f16 s13, s0
502 ; CHECK-MVE-NEXT: vcmp.f16 s14, #0
503 ; CHECK-MVE-NEXT: vmovx.f16 s12, s8
504 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
505 ; CHECK-MVE-NEXT: vmov.f32 s15, s13
506 ; CHECK-MVE-NEXT: vmla.f16 s15, s14, s12
507 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0
508 ; CHECK-MVE-NEXT: vmov.f32 s14, s0
509 ; CHECK-MVE-NEXT: vmla.f16 s14, s4, s8
510 ; CHECK-MVE-NEXT: vmovx.f16 s8, s5
511 ; CHECK-MVE-NEXT: vmovx.f16 s4, s9
512 ; CHECK-MVE-NEXT: cset r0, mi
513 ; CHECK-MVE-NEXT: cmp r0, #0
514 ; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s15
515 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
516 ; CHECK-MVE-NEXT: vcmp.f16 s8, #0
517 ; CHECK-MVE-NEXT: cset r0, mi
518 ; CHECK-MVE-NEXT: cmp r0, #0
519 ; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s14
520 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
521 ; CHECK-MVE-NEXT: vins.f16 s0, s12
522 ; CHECK-MVE-NEXT: vmovx.f16 s12, s1
523 ; CHECK-MVE-NEXT: vmov.f32 s14, s12
524 ; CHECK-MVE-NEXT: vcmp.f16 s5, #0
525 ; CHECK-MVE-NEXT: vmla.f16 s14, s8, s4
526 ; CHECK-MVE-NEXT: vmov.f32 s8, s1
527 ; CHECK-MVE-NEXT: vmla.f16 s8, s5, s9
528 ; CHECK-MVE-NEXT: cset r0, mi
529 ; CHECK-MVE-NEXT: cmp r0, #0
530 ; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14
531 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
532 ; CHECK-MVE-NEXT: vmovx.f16 s12, s2
533 ; CHECK-MVE-NEXT: vmov.f32 s14, s12
534 ; CHECK-MVE-NEXT: cset r0, mi
535 ; CHECK-MVE-NEXT: cmp r0, #0
536 ; CHECK-MVE-NEXT: vseleq.f16 s1, s1, s8
537 ; CHECK-MVE-NEXT: vmovx.f16 s8, s6
538 ; CHECK-MVE-NEXT: vcmp.f16 s8, #0
539 ; CHECK-MVE-NEXT: vins.f16 s1, s4
540 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
541 ; CHECK-MVE-NEXT: vmovx.f16 s4, s10
542 ; CHECK-MVE-NEXT: vmla.f16 s14, s8, s4
543 ; CHECK-MVE-NEXT: vcmp.f16 s6, #0
544 ; CHECK-MVE-NEXT: vmov.f32 s8, s2
545 ; CHECK-MVE-NEXT: vmla.f16 s8, s6, s10
546 ; CHECK-MVE-NEXT: vmovx.f16 s6, s7
547 ; CHECK-MVE-NEXT: cset r0, mi
548 ; CHECK-MVE-NEXT: cmp r0, #0
549 ; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14
550 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
551 ; CHECK-MVE-NEXT: vcmp.f16 s6, #0
552 ; CHECK-MVE-NEXT: cset r0, mi
553 ; CHECK-MVE-NEXT: cmp r0, #0
554 ; CHECK-MVE-NEXT: vseleq.f16 s2, s2, s8
555 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
556 ; CHECK-MVE-NEXT: vmovx.f16 s8, s3
557 ; CHECK-MVE-NEXT: vins.f16 s2, s4
558 ; CHECK-MVE-NEXT: vmovx.f16 s4, s11
559 ; CHECK-MVE-NEXT: vmov.f32 s10, s8
560 ; CHECK-MVE-NEXT: vmla.f16 s10, s6, s4
561 ; CHECK-MVE-NEXT: vcmp.f16 s7, #0
562 ; CHECK-MVE-NEXT: vmov.f32 s6, s3
563 ; CHECK-MVE-NEXT: vmla.f16 s6, s7, s11
564 ; CHECK-MVE-NEXT: cset r0, mi
565 ; CHECK-MVE-NEXT: cmp r0, #0
566 ; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s10
567 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
568 ; CHECK-MVE-NEXT: cset r0, mi
569 ; CHECK-MVE-NEXT: cmp r0, #0
570 ; CHECK-MVE-NEXT: vseleq.f16 s3, s3, s6
571 ; CHECK-MVE-NEXT: vins.f16 s3, s4
572 ; CHECK-MVE-NEXT: bx lr
574 %0 = fmul <8 x half> %src2, %src3
575 %1 = fadd <8 x half> %0, %src1
576 %c = fcmp olt <8 x half> %src2, zeroinitializer
577 %s = select <8 x i1> %c, <8 x half> %1, <8 x half> %src1
581 define arm_aapcs_vfpcc <8 x half> @vfms16_pred(<8 x half> %src1, <8 x half> %src2, <8 x half> %src3) {
582 ; CHECK-MVE-FP-LABEL: vfms16_pred:
583 ; CHECK-MVE-FP: @ %bb.0: @ %entry
584 ; CHECK-MVE-FP-NEXT: vmul.f16 q2, q1, q2
585 ; CHECK-MVE-FP-NEXT: vpt.f16 lt, q1, zr
586 ; CHECK-MVE-FP-NEXT: vsubt.f16 q0, q0, q2
587 ; CHECK-MVE-FP-NEXT: bx lr
589 ; CHECK-MVE-VMLA-LABEL: vfms16_pred:
590 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
591 ; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr
592 ; CHECK-MVE-VMLA-NEXT: vfmst.f16 q0, q1, q2
593 ; CHECK-MVE-VMLA-NEXT: bx lr
595 ; CHECK-MVE-LABEL: vfms16_pred:
596 ; CHECK-MVE: @ %bb.0: @ %entry
597 ; CHECK-MVE-NEXT: vmovx.f16 s14, s4
598 ; CHECK-MVE-NEXT: vmovx.f16 s13, s0
599 ; CHECK-MVE-NEXT: vcmp.f16 s14, #0
600 ; CHECK-MVE-NEXT: vmovx.f16 s12, s8
601 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
602 ; CHECK-MVE-NEXT: vmov.f32 s15, s13
603 ; CHECK-MVE-NEXT: vmls.f16 s15, s14, s12
604 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0
605 ; CHECK-MVE-NEXT: vmov.f32 s14, s0
606 ; CHECK-MVE-NEXT: vmls.f16 s14, s4, s8
607 ; CHECK-MVE-NEXT: vmovx.f16 s8, s5
608 ; CHECK-MVE-NEXT: vmovx.f16 s4, s9
609 ; CHECK-MVE-NEXT: cset r0, mi
610 ; CHECK-MVE-NEXT: cmp r0, #0
611 ; CHECK-MVE-NEXT: vseleq.f16 s12, s13, s15
612 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
613 ; CHECK-MVE-NEXT: vcmp.f16 s8, #0
614 ; CHECK-MVE-NEXT: cset r0, mi
615 ; CHECK-MVE-NEXT: cmp r0, #0
616 ; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s14
617 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
618 ; CHECK-MVE-NEXT: vins.f16 s0, s12
619 ; CHECK-MVE-NEXT: vmovx.f16 s12, s1
620 ; CHECK-MVE-NEXT: vmov.f32 s14, s12
621 ; CHECK-MVE-NEXT: vcmp.f16 s5, #0
622 ; CHECK-MVE-NEXT: vmls.f16 s14, s8, s4
623 ; CHECK-MVE-NEXT: vmov.f32 s8, s1
624 ; CHECK-MVE-NEXT: vmls.f16 s8, s5, s9
625 ; CHECK-MVE-NEXT: cset r0, mi
626 ; CHECK-MVE-NEXT: cmp r0, #0
627 ; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14
628 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
629 ; CHECK-MVE-NEXT: vmovx.f16 s12, s2
630 ; CHECK-MVE-NEXT: vmov.f32 s14, s12
631 ; CHECK-MVE-NEXT: cset r0, mi
632 ; CHECK-MVE-NEXT: cmp r0, #0
633 ; CHECK-MVE-NEXT: vseleq.f16 s1, s1, s8
634 ; CHECK-MVE-NEXT: vmovx.f16 s8, s6
635 ; CHECK-MVE-NEXT: vcmp.f16 s8, #0
636 ; CHECK-MVE-NEXT: vins.f16 s1, s4
637 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
638 ; CHECK-MVE-NEXT: vmovx.f16 s4, s10
639 ; CHECK-MVE-NEXT: vmls.f16 s14, s8, s4
640 ; CHECK-MVE-NEXT: vcmp.f16 s6, #0
641 ; CHECK-MVE-NEXT: vmov.f32 s8, s2
642 ; CHECK-MVE-NEXT: vmls.f16 s8, s6, s10
643 ; CHECK-MVE-NEXT: vmovx.f16 s6, s7
644 ; CHECK-MVE-NEXT: cset r0, mi
645 ; CHECK-MVE-NEXT: cmp r0, #0
646 ; CHECK-MVE-NEXT: vseleq.f16 s4, s12, s14
647 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
648 ; CHECK-MVE-NEXT: vcmp.f16 s6, #0
649 ; CHECK-MVE-NEXT: cset r0, mi
650 ; CHECK-MVE-NEXT: cmp r0, #0
651 ; CHECK-MVE-NEXT: vseleq.f16 s2, s2, s8
652 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
653 ; CHECK-MVE-NEXT: vmovx.f16 s8, s3
654 ; CHECK-MVE-NEXT: vins.f16 s2, s4
655 ; CHECK-MVE-NEXT: vmovx.f16 s4, s11
656 ; CHECK-MVE-NEXT: vmov.f32 s10, s8
657 ; CHECK-MVE-NEXT: vmls.f16 s10, s6, s4
658 ; CHECK-MVE-NEXT: vcmp.f16 s7, #0
659 ; CHECK-MVE-NEXT: vmov.f32 s6, s3
660 ; CHECK-MVE-NEXT: vmls.f16 s6, s7, s11
661 ; CHECK-MVE-NEXT: cset r0, mi
662 ; CHECK-MVE-NEXT: cmp r0, #0
663 ; CHECK-MVE-NEXT: vseleq.f16 s4, s8, s10
664 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
665 ; CHECK-MVE-NEXT: cset r0, mi
666 ; CHECK-MVE-NEXT: cmp r0, #0
667 ; CHECK-MVE-NEXT: vseleq.f16 s3, s3, s6
668 ; CHECK-MVE-NEXT: vins.f16 s3, s4
669 ; CHECK-MVE-NEXT: bx lr
671 %0 = fmul <8 x half> %src2, %src3
672 %1 = fsub <8 x half> %src1, %0
673 %c = fcmp olt <8 x half> %src2, zeroinitializer
674 %s = select <8 x i1> %c, <8 x half> %1, <8 x half> %src1
678 define arm_aapcs_vfpcc <8 x half> @vfmar16_pred(<8 x half> %src1, <8 x half> %src2, float %src3o) {
679 ; CHECK-MVE-FP-LABEL: vfmar16_pred:
680 ; CHECK-MVE-FP: @ %bb.0: @ %entry
681 ; CHECK-MVE-FP-NEXT: vcvtb.f16.f32 s8, s8
682 ; CHECK-MVE-FP-NEXT: vcmp.f16 lt, q1, zr
683 ; CHECK-MVE-FP-NEXT: vmov.f16 r0, s8
684 ; CHECK-MVE-FP-NEXT: vmul.f16 q1, q1, r0
685 ; CHECK-MVE-FP-NEXT: vpst
686 ; CHECK-MVE-FP-NEXT: vaddt.f16 q0, q0, q1
687 ; CHECK-MVE-FP-NEXT: bx lr
689 ; CHECK-MVE-VMLA-LABEL: vfmar16_pred:
690 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
691 ; CHECK-MVE-VMLA-NEXT: vcvtb.f16.f32 s8, s8
692 ; CHECK-MVE-VMLA-NEXT: vmov.f16 r0, s8
693 ; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr
694 ; CHECK-MVE-VMLA-NEXT: vfmat.f16 q0, q1, r0
695 ; CHECK-MVE-VMLA-NEXT: bx lr
697 ; CHECK-MVE-LABEL: vfmar16_pred:
698 ; CHECK-MVE: @ %bb.0: @ %entry
699 ; CHECK-MVE-NEXT: vmovx.f16 s10, s4
700 ; CHECK-MVE-NEXT: vmovx.f16 s12, s0
701 ; CHECK-MVE-NEXT: vcmp.f16 s10, #0
702 ; CHECK-MVE-NEXT: vcvtb.f16.f32 s8, s8
703 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
704 ; CHECK-MVE-NEXT: vmov.f32 s14, s12
705 ; CHECK-MVE-NEXT: vmla.f16 s14, s10, s8
706 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0
707 ; CHECK-MVE-NEXT: cset r0, mi
708 ; CHECK-MVE-NEXT: cmp r0, #0
709 ; CHECK-MVE-NEXT: vseleq.f16 s10, s12, s14
710 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
711 ; CHECK-MVE-NEXT: vmov.f32 s12, s0
712 ; CHECK-MVE-NEXT: vmla.f16 s12, s4, s8
713 ; CHECK-MVE-NEXT: vmovx.f16 s4, s5
714 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0
715 ; CHECK-MVE-NEXT: cset r0, mi
716 ; CHECK-MVE-NEXT: cmp r0, #0
717 ; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s12
718 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
719 ; CHECK-MVE-NEXT: vins.f16 s0, s10
720 ; CHECK-MVE-NEXT: vmovx.f16 s10, s1
721 ; CHECK-MVE-NEXT: vmov.f32 s12, s10
722 ; CHECK-MVE-NEXT: vcmp.f16 s5, #0
723 ; CHECK-MVE-NEXT: vmla.f16 s12, s4, s8
724 ; CHECK-MVE-NEXT: cset r0, mi
725 ; CHECK-MVE-NEXT: cmp r0, #0
726 ; CHECK-MVE-NEXT: vseleq.f16 s4, s10, s12
727 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
728 ; CHECK-MVE-NEXT: vmov.f32 s10, s1
729 ; CHECK-MVE-NEXT: vmla.f16 s10, s5, s8
730 ; CHECK-MVE-NEXT: cset r0, mi
731 ; CHECK-MVE-NEXT: cmp r0, #0
732 ; CHECK-MVE-NEXT: vseleq.f16 s1, s1, s10
733 ; CHECK-MVE-NEXT: vmovx.f16 s10, s2
734 ; CHECK-MVE-NEXT: vins.f16 s1, s4
735 ; CHECK-MVE-NEXT: vmovx.f16 s4, s6
736 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0
737 ; CHECK-MVE-NEXT: vmov.f32 s12, s10
738 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
739 ; CHECK-MVE-NEXT: vmla.f16 s12, s4, s8
740 ; CHECK-MVE-NEXT: vcmp.f16 s6, #0
741 ; CHECK-MVE-NEXT: cset r0, mi
742 ; CHECK-MVE-NEXT: cmp r0, #0
743 ; CHECK-MVE-NEXT: vseleq.f16 s4, s10, s12
744 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
745 ; CHECK-MVE-NEXT: vmov.f32 s10, s2
746 ; CHECK-MVE-NEXT: vmla.f16 s10, s6, s8
747 ; CHECK-MVE-NEXT: vmovx.f16 s6, s3
748 ; CHECK-MVE-NEXT: cset r0, mi
749 ; CHECK-MVE-NEXT: cmp r0, #0
750 ; CHECK-MVE-NEXT: vseleq.f16 s2, s2, s10
751 ; CHECK-MVE-NEXT: vmov.f32 s10, s6
752 ; CHECK-MVE-NEXT: vins.f16 s2, s4
753 ; CHECK-MVE-NEXT: vmovx.f16 s4, s7
754 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0
755 ; CHECK-MVE-NEXT: vmla.f16 s10, s4, s8
756 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
757 ; CHECK-MVE-NEXT: vcmp.f16 s7, #0
758 ; CHECK-MVE-NEXT: cset r0, mi
759 ; CHECK-MVE-NEXT: cmp r0, #0
760 ; CHECK-MVE-NEXT: vseleq.f16 s4, s6, s10
761 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
762 ; CHECK-MVE-NEXT: vmov.f32 s6, s3
763 ; CHECK-MVE-NEXT: vmla.f16 s6, s7, s8
764 ; CHECK-MVE-NEXT: cset r0, mi
765 ; CHECK-MVE-NEXT: cmp r0, #0
766 ; CHECK-MVE-NEXT: vseleq.f16 s3, s3, s6
767 ; CHECK-MVE-NEXT: vins.f16 s3, s4
768 ; CHECK-MVE-NEXT: bx lr
770 %src3 = fptrunc float %src3o to half
771 %i = insertelement <8 x half> undef, half %src3, i32 0
772 %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
773 %0 = fmul <8 x half> %src2, %sp
774 %1 = fadd <8 x half> %src1, %0
775 %c = fcmp olt <8 x half> %src2, zeroinitializer
776 %s = select <8 x i1> %c, <8 x half> %1, <8 x half> %src1
780 define arm_aapcs_vfpcc <8 x half> @vfma16_pred(<8 x half> %src1, <8 x half> %src2, float %src3o) {
781 ; CHECK-MVE-FP-LABEL: vfma16_pred:
782 ; CHECK-MVE-FP: @ %bb.0: @ %entry
783 ; CHECK-MVE-FP-NEXT: vcvtb.f16.f32 s8, s8
784 ; CHECK-MVE-FP-NEXT: vmov.f16 r0, s8
785 ; CHECK-MVE-FP-NEXT: vmul.f16 q2, q0, q1
786 ; CHECK-MVE-FP-NEXT: vpt.f16 lt, q1, zr
787 ; CHECK-MVE-FP-NEXT: vaddt.f16 q0, q2, r0
788 ; CHECK-MVE-FP-NEXT: bx lr
790 ; CHECK-MVE-VMLA-LABEL: vfma16_pred:
791 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
792 ; CHECK-MVE-VMLA-NEXT: vcvtb.f16.f32 s8, s8
793 ; CHECK-MVE-VMLA-NEXT: vmov.f16 r0, s8
794 ; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr
795 ; CHECK-MVE-VMLA-NEXT: vfmast.f16 q0, q1, r0
796 ; CHECK-MVE-VMLA-NEXT: bx lr
798 ; CHECK-MVE-LABEL: vfma16_pred:
799 ; CHECK-MVE: @ %bb.0: @ %entry
800 ; CHECK-MVE-NEXT: vmovx.f16 s10, s4
801 ; CHECK-MVE-NEXT: vcvtb.f16.f32 s8, s8
802 ; CHECK-MVE-NEXT: vcmp.f16 s10, #0
803 ; CHECK-MVE-NEXT: vmovx.f16 s12, s0
804 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
805 ; CHECK-MVE-NEXT: vmov.f32 s14, s8
806 ; CHECK-MVE-NEXT: vmla.f16 s14, s12, s10
807 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0
808 ; CHECK-MVE-NEXT: cset r0, mi
809 ; CHECK-MVE-NEXT: cmp r0, #0
810 ; CHECK-MVE-NEXT: vseleq.f16 s10, s12, s14
811 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
812 ; CHECK-MVE-NEXT: vmov.f32 s12, s8
813 ; CHECK-MVE-NEXT: vmla.f16 s12, s0, s4
814 ; CHECK-MVE-NEXT: vmovx.f16 s4, s5
815 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0
816 ; CHECK-MVE-NEXT: cset r0, mi
817 ; CHECK-MVE-NEXT: cmp r0, #0
818 ; CHECK-MVE-NEXT: vseleq.f16 s0, s0, s12
819 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
820 ; CHECK-MVE-NEXT: vins.f16 s0, s10
821 ; CHECK-MVE-NEXT: vmovx.f16 s10, s1
822 ; CHECK-MVE-NEXT: vmov.f32 s12, s8
823 ; CHECK-MVE-NEXT: vcmp.f16 s5, #0
824 ; CHECK-MVE-NEXT: vmla.f16 s12, s10, s4
825 ; CHECK-MVE-NEXT: cset r0, mi
826 ; CHECK-MVE-NEXT: cmp r0, #0
827 ; CHECK-MVE-NEXT: vseleq.f16 s4, s10, s12
828 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
829 ; CHECK-MVE-NEXT: vmov.f32 s10, s8
830 ; CHECK-MVE-NEXT: vmla.f16 s10, s1, s5
831 ; CHECK-MVE-NEXT: vmov.f32 s12, s8
832 ; CHECK-MVE-NEXT: cset r0, mi
833 ; CHECK-MVE-NEXT: cmp r0, #0
834 ; CHECK-MVE-NEXT: vseleq.f16 s1, s1, s10
835 ; CHECK-MVE-NEXT: vmovx.f16 s10, s2
836 ; CHECK-MVE-NEXT: vins.f16 s1, s4
837 ; CHECK-MVE-NEXT: vmovx.f16 s4, s6
838 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0
839 ; CHECK-MVE-NEXT: vmla.f16 s12, s10, s4
840 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
841 ; CHECK-MVE-NEXT: vcmp.f16 s6, #0
842 ; CHECK-MVE-NEXT: cset r0, mi
843 ; CHECK-MVE-NEXT: cmp r0, #0
844 ; CHECK-MVE-NEXT: vseleq.f16 s4, s10, s12
845 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
846 ; CHECK-MVE-NEXT: vmov.f32 s10, s8
847 ; CHECK-MVE-NEXT: vmla.f16 s10, s2, s6
848 ; CHECK-MVE-NEXT: vmovx.f16 s6, s3
849 ; CHECK-MVE-NEXT: cset r0, mi
850 ; CHECK-MVE-NEXT: cmp r0, #0
851 ; CHECK-MVE-NEXT: vseleq.f16 s2, s2, s10
852 ; CHECK-MVE-NEXT: vmov.f32 s10, s8
853 ; CHECK-MVE-NEXT: vins.f16 s2, s4
854 ; CHECK-MVE-NEXT: vmovx.f16 s4, s7
855 ; CHECK-MVE-NEXT: vcmp.f16 s4, #0
856 ; CHECK-MVE-NEXT: vmla.f16 s10, s6, s4
857 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
858 ; CHECK-MVE-NEXT: vcmp.f16 s7, #0
859 ; CHECK-MVE-NEXT: vmla.f16 s8, s3, s7
860 ; CHECK-MVE-NEXT: cset r0, mi
861 ; CHECK-MVE-NEXT: cmp r0, #0
862 ; CHECK-MVE-NEXT: vseleq.f16 s4, s6, s10
863 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
864 ; CHECK-MVE-NEXT: cset r0, mi
865 ; CHECK-MVE-NEXT: cmp r0, #0
866 ; CHECK-MVE-NEXT: vseleq.f16 s3, s3, s8
867 ; CHECK-MVE-NEXT: vins.f16 s3, s4
868 ; CHECK-MVE-NEXT: bx lr
870 %src3 = fptrunc float %src3o to half
871 %i = insertelement <8 x half> undef, half %src3, i32 0
872 %sp = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer
873 %0 = fmul <8 x half> %src1, %src2
874 %1 = fadd <8 x half> %sp, %0
875 %c = fcmp olt <8 x half> %src2, zeroinitializer
876 %s = select <8 x i1> %c, <8 x half> %1, <8 x half> %src1
880 define arm_aapcs_vfpcc <4 x float> @vfma32_v1_pred(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) {
881 ; CHECK-MVE-FP-LABEL: vfma32_v1_pred:
882 ; CHECK-MVE-FP: @ %bb.0: @ %entry
883 ; CHECK-MVE-FP-NEXT: vmul.f32 q2, q1, q2
884 ; CHECK-MVE-FP-NEXT: vpt.f32 lt, q1, zr
885 ; CHECK-MVE-FP-NEXT: vaddt.f32 q0, q0, q2
886 ; CHECK-MVE-FP-NEXT: bx lr
888 ; CHECK-MVE-VMLA-LABEL: vfma32_v1_pred:
889 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
890 ; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr
891 ; CHECK-MVE-VMLA-NEXT: vfmat.f32 q0, q1, q2
892 ; CHECK-MVE-VMLA-NEXT: bx lr
894 ; CHECK-MVE-LABEL: vfma32_v1_pred:
895 ; CHECK-MVE: @ %bb.0: @ %entry
896 ; CHECK-MVE-NEXT: vcmp.f32 s5, #0
897 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
898 ; CHECK-MVE-NEXT: vcmp.f32 s4, #0
899 ; CHECK-MVE-NEXT: vmov.f32 s14, s0
900 ; CHECK-MVE-NEXT: vmov.f32 s12, s1
901 ; CHECK-MVE-NEXT: vmla.f32 s14, s4, s8
902 ; CHECK-MVE-NEXT: vmov.f32 s4, s3
903 ; CHECK-MVE-NEXT: vmov.f32 s8, s2
904 ; CHECK-MVE-NEXT: vmla.f32 s12, s5, s9
905 ; CHECK-MVE-NEXT: vmla.f32 s4, s7, s11
906 ; CHECK-MVE-NEXT: vmla.f32 s8, s6, s10
907 ; CHECK-MVE-NEXT: cset r0, mi
908 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
909 ; CHECK-MVE-NEXT: vcmp.f32 s7, #0
910 ; CHECK-MVE-NEXT: cset r1, mi
911 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
912 ; CHECK-MVE-NEXT: vcmp.f32 s6, #0
913 ; CHECK-MVE-NEXT: cset r2, mi
914 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
915 ; CHECK-MVE-NEXT: cset r3, mi
916 ; CHECK-MVE-NEXT: cmp r2, #0
917 ; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s4
918 ; CHECK-MVE-NEXT: cmp r3, #0
919 ; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s8
920 ; CHECK-MVE-NEXT: cmp r0, #0
921 ; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s12
922 ; CHECK-MVE-NEXT: cmp r1, #0
923 ; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s14
924 ; CHECK-MVE-NEXT: bx lr
926 %0 = fmul <4 x float> %src2, %src3
927 %1 = fadd <4 x float> %src1, %0
928 %c = fcmp olt <4 x float> %src2, zeroinitializer
929 %s = select <4 x i1> %c, <4 x float> %1, <4 x float> %src1
933 define arm_aapcs_vfpcc <4 x float> @vfma32_v2_pred(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) {
934 ; CHECK-MVE-FP-LABEL: vfma32_v2_pred:
935 ; CHECK-MVE-FP: @ %bb.0: @ %entry
936 ; CHECK-MVE-FP-NEXT: vmul.f32 q2, q1, q2
937 ; CHECK-MVE-FP-NEXT: vpt.f32 lt, q1, zr
938 ; CHECK-MVE-FP-NEXT: vaddt.f32 q0, q2, q0
939 ; CHECK-MVE-FP-NEXT: bx lr
941 ; CHECK-MVE-VMLA-LABEL: vfma32_v2_pred:
942 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
943 ; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr
944 ; CHECK-MVE-VMLA-NEXT: vfmat.f32 q0, q1, q2
945 ; CHECK-MVE-VMLA-NEXT: bx lr
947 ; CHECK-MVE-LABEL: vfma32_v2_pred:
948 ; CHECK-MVE: @ %bb.0: @ %entry
949 ; CHECK-MVE-NEXT: vcmp.f32 s5, #0
950 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
951 ; CHECK-MVE-NEXT: vcmp.f32 s4, #0
952 ; CHECK-MVE-NEXT: vmov.f32 s14, s0
953 ; CHECK-MVE-NEXT: vmov.f32 s12, s1
954 ; CHECK-MVE-NEXT: vmla.f32 s14, s4, s8
955 ; CHECK-MVE-NEXT: vmov.f32 s4, s3
956 ; CHECK-MVE-NEXT: vmov.f32 s8, s2
957 ; CHECK-MVE-NEXT: vmla.f32 s12, s5, s9
958 ; CHECK-MVE-NEXT: vmla.f32 s4, s7, s11
959 ; CHECK-MVE-NEXT: vmla.f32 s8, s6, s10
960 ; CHECK-MVE-NEXT: cset r0, mi
961 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
962 ; CHECK-MVE-NEXT: vcmp.f32 s7, #0
963 ; CHECK-MVE-NEXT: cset r1, mi
964 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
965 ; CHECK-MVE-NEXT: vcmp.f32 s6, #0
966 ; CHECK-MVE-NEXT: cset r2, mi
967 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
968 ; CHECK-MVE-NEXT: cset r3, mi
969 ; CHECK-MVE-NEXT: cmp r2, #0
970 ; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s4
971 ; CHECK-MVE-NEXT: cmp r3, #0
972 ; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s8
973 ; CHECK-MVE-NEXT: cmp r0, #0
974 ; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s12
975 ; CHECK-MVE-NEXT: cmp r1, #0
976 ; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s14
977 ; CHECK-MVE-NEXT: bx lr
979 %0 = fmul <4 x float> %src2, %src3
980 %1 = fadd <4 x float> %0, %src1
981 %c = fcmp olt <4 x float> %src2, zeroinitializer
982 %s = select <4 x i1> %c, <4 x float> %1, <4 x float> %src1
986 define arm_aapcs_vfpcc <4 x float> @vfms32_pred(<4 x float> %src1, <4 x float> %src2, <4 x float> %src3) {
987 ; CHECK-MVE-FP-LABEL: vfms32_pred:
988 ; CHECK-MVE-FP: @ %bb.0: @ %entry
989 ; CHECK-MVE-FP-NEXT: vmul.f32 q2, q1, q2
990 ; CHECK-MVE-FP-NEXT: vpt.f32 lt, q1, zr
991 ; CHECK-MVE-FP-NEXT: vsubt.f32 q0, q0, q2
992 ; CHECK-MVE-FP-NEXT: bx lr
994 ; CHECK-MVE-VMLA-LABEL: vfms32_pred:
995 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
996 ; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr
997 ; CHECK-MVE-VMLA-NEXT: vfmst.f32 q0, q1, q2
998 ; CHECK-MVE-VMLA-NEXT: bx lr
1000 ; CHECK-MVE-LABEL: vfms32_pred:
1001 ; CHECK-MVE: @ %bb.0: @ %entry
1002 ; CHECK-MVE-NEXT: vcmp.f32 s5, #0
1003 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
1004 ; CHECK-MVE-NEXT: vcmp.f32 s4, #0
1005 ; CHECK-MVE-NEXT: vmov.f32 s14, s0
1006 ; CHECK-MVE-NEXT: vmov.f32 s12, s1
1007 ; CHECK-MVE-NEXT: vmls.f32 s14, s4, s8
1008 ; CHECK-MVE-NEXT: vmov.f32 s4, s3
1009 ; CHECK-MVE-NEXT: vmov.f32 s8, s2
1010 ; CHECK-MVE-NEXT: vmls.f32 s12, s5, s9
1011 ; CHECK-MVE-NEXT: vmls.f32 s4, s7, s11
1012 ; CHECK-MVE-NEXT: vmls.f32 s8, s6, s10
1013 ; CHECK-MVE-NEXT: cset r0, mi
1014 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
1015 ; CHECK-MVE-NEXT: vcmp.f32 s7, #0
1016 ; CHECK-MVE-NEXT: cset r1, mi
1017 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
1018 ; CHECK-MVE-NEXT: vcmp.f32 s6, #0
1019 ; CHECK-MVE-NEXT: cset r2, mi
1020 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
1021 ; CHECK-MVE-NEXT: cset r3, mi
1022 ; CHECK-MVE-NEXT: cmp r2, #0
1023 ; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s4
1024 ; CHECK-MVE-NEXT: cmp r3, #0
1025 ; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s8
1026 ; CHECK-MVE-NEXT: cmp r0, #0
1027 ; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s12
1028 ; CHECK-MVE-NEXT: cmp r1, #0
1029 ; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s14
1030 ; CHECK-MVE-NEXT: bx lr
1032 %0 = fmul <4 x float> %src2, %src3
1033 %1 = fsub <4 x float> %src1, %0
1034 %c = fcmp olt <4 x float> %src2, zeroinitializer
1035 %s = select <4 x i1> %c, <4 x float> %1, <4 x float> %src1
1039 define arm_aapcs_vfpcc <4 x float> @vfmar32_pred(<4 x float> %src1, <4 x float> %src2, float %src3) {
1040 ; CHECK-MVE-FP-LABEL: vfmar32_pred:
1041 ; CHECK-MVE-FP: @ %bb.0: @ %entry
1042 ; CHECK-MVE-FP-NEXT: vmov r0, s8
1043 ; CHECK-MVE-FP-NEXT: vcmp.f32 lt, q1, zr
1044 ; CHECK-MVE-FP-NEXT: vmul.f32 q1, q1, r0
1045 ; CHECK-MVE-FP-NEXT: vpst
1046 ; CHECK-MVE-FP-NEXT: vaddt.f32 q0, q0, q1
1047 ; CHECK-MVE-FP-NEXT: bx lr
1049 ; CHECK-MVE-VMLA-LABEL: vfmar32_pred:
1050 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
1051 ; CHECK-MVE-VMLA-NEXT: vmov r0, s8
1052 ; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr
1053 ; CHECK-MVE-VMLA-NEXT: vfmat.f32 q0, q1, r0
1054 ; CHECK-MVE-VMLA-NEXT: bx lr
1056 ; CHECK-MVE-LABEL: vfmar32_pred:
1057 ; CHECK-MVE: @ %bb.0: @ %entry
1058 ; CHECK-MVE-NEXT: vcmp.f32 s5, #0
1059 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
1060 ; CHECK-MVE-NEXT: vcmp.f32 s4, #0
1061 ; CHECK-MVE-NEXT: vmov.f32 s12, s0
1062 ; CHECK-MVE-NEXT: vmov.f32 s14, s2
1063 ; CHECK-MVE-NEXT: vmov.f32 s10, s1
1064 ; CHECK-MVE-NEXT: vmla.f32 s12, s4, s8
1065 ; CHECK-MVE-NEXT: vmov.f32 s4, s3
1066 ; CHECK-MVE-NEXT: vmla.f32 s14, s6, s8
1067 ; CHECK-MVE-NEXT: vmla.f32 s10, s5, s8
1068 ; CHECK-MVE-NEXT: vmla.f32 s4, s7, s8
1069 ; CHECK-MVE-NEXT: cset r0, mi
1070 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
1071 ; CHECK-MVE-NEXT: vcmp.f32 s7, #0
1072 ; CHECK-MVE-NEXT: cset r1, mi
1073 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
1074 ; CHECK-MVE-NEXT: vcmp.f32 s6, #0
1075 ; CHECK-MVE-NEXT: cset r2, mi
1076 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
1077 ; CHECK-MVE-NEXT: cset r3, mi
1078 ; CHECK-MVE-NEXT: cmp r2, #0
1079 ; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s4
1080 ; CHECK-MVE-NEXT: cmp r3, #0
1081 ; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s14
1082 ; CHECK-MVE-NEXT: cmp r0, #0
1083 ; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10
1084 ; CHECK-MVE-NEXT: cmp r1, #0
1085 ; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s12
1086 ; CHECK-MVE-NEXT: bx lr
1088 %i = insertelement <4 x float> undef, float %src3, i32 0
1089 %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
1090 %0 = fmul <4 x float> %src2, %sp
1091 %1 = fadd <4 x float> %src1, %0
1092 %c = fcmp olt <4 x float> %src2, zeroinitializer
1093 %s = select <4 x i1> %c, <4 x float> %1, <4 x float> %src1
1097 define arm_aapcs_vfpcc <4 x float> @vfmas32_pred(<4 x float> %src1, <4 x float> %src2, float %src3) {
1098 ; CHECK-MVE-FP-LABEL: vfmas32_pred:
1099 ; CHECK-MVE-FP: @ %bb.0: @ %entry
1100 ; CHECK-MVE-FP-NEXT: vmov r0, s8
1101 ; CHECK-MVE-FP-NEXT: vmul.f32 q2, q0, q1
1102 ; CHECK-MVE-FP-NEXT: vpt.f32 lt, q1, zr
1103 ; CHECK-MVE-FP-NEXT: vaddt.f32 q0, q2, r0
1104 ; CHECK-MVE-FP-NEXT: bx lr
1106 ; CHECK-MVE-VMLA-LABEL: vfmas32_pred:
1107 ; CHECK-MVE-VMLA: @ %bb.0: @ %entry
1108 ; CHECK-MVE-VMLA-NEXT: vmov r0, s8
1109 ; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr
1110 ; CHECK-MVE-VMLA-NEXT: vfmast.f32 q0, q1, r0
1111 ; CHECK-MVE-VMLA-NEXT: bx lr
1113 ; CHECK-MVE-LABEL: vfmas32_pred:
1114 ; CHECK-MVE: @ %bb.0: @ %entry
1115 ; CHECK-MVE-NEXT: vcmp.f32 s5, #0
1116 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
1117 ; CHECK-MVE-NEXT: vcmp.f32 s4, #0
1118 ; CHECK-MVE-NEXT: vmov.f32 s12, s8
1119 ; CHECK-MVE-NEXT: vmov.f32 s10, s8
1120 ; CHECK-MVE-NEXT: vmla.f32 s12, s0, s4
1121 ; CHECK-MVE-NEXT: vmov.f32 s4, s8
1122 ; CHECK-MVE-NEXT: vmla.f32 s8, s2, s6
1123 ; CHECK-MVE-NEXT: vmla.f32 s10, s1, s5
1124 ; CHECK-MVE-NEXT: vmla.f32 s4, s3, s7
1125 ; CHECK-MVE-NEXT: cset r0, mi
1126 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
1127 ; CHECK-MVE-NEXT: vcmp.f32 s7, #0
1128 ; CHECK-MVE-NEXT: cset r1, mi
1129 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
1130 ; CHECK-MVE-NEXT: vcmp.f32 s6, #0
1131 ; CHECK-MVE-NEXT: cset r2, mi
1132 ; CHECK-MVE-NEXT: vmrs APSR_nzcv, fpscr
1133 ; CHECK-MVE-NEXT: cset r3, mi
1134 ; CHECK-MVE-NEXT: cmp r2, #0
1135 ; CHECK-MVE-NEXT: vseleq.f32 s3, s3, s4
1136 ; CHECK-MVE-NEXT: cmp r3, #0
1137 ; CHECK-MVE-NEXT: vseleq.f32 s2, s2, s8
1138 ; CHECK-MVE-NEXT: cmp r0, #0
1139 ; CHECK-MVE-NEXT: vseleq.f32 s1, s1, s10
1140 ; CHECK-MVE-NEXT: cmp r1, #0
1141 ; CHECK-MVE-NEXT: vseleq.f32 s0, s0, s12
1142 ; CHECK-MVE-NEXT: bx lr
1144 %i = insertelement <4 x float> undef, float %src3, i32 0
1145 %sp = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer
1146 %0 = fmul <4 x float> %src1, %src2
1147 %1 = fadd <4 x float> %sp, %0
1148 %c = fcmp olt <4 x float> %src2, zeroinitializer
1149 %s = select <4 x i1> %c, <4 x float> %1, <4 x float> %src1