1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp,+fp64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
3 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16,+fp64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP
5 define arm_aapcs_vfpcc float @fmin_v2f32(<2 x float> %x) {
6 ; CHECK-LABEL: fmin_v2f32:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vminnm.f32 s0, s0, s1
11 %z = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
15 define arm_aapcs_vfpcc float @fmin_v4f32(<4 x float> %x) {
16 ; CHECK-FP-LABEL: fmin_v4f32:
17 ; CHECK-FP: @ %bb.0: @ %entry
18 ; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
19 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
20 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
21 ; CHECK-FP-NEXT: bx lr
23 ; CHECK-NOFP-LABEL: fmin_v4f32:
24 ; CHECK-NOFP: @ %bb.0: @ %entry
25 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s1
26 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
27 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s3
28 ; CHECK-NOFP-NEXT: bx lr
30 %z = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
34 ; FIXME fminnum (vector) -> fminnum (scalar) ?
35 define arm_aapcs_vfpcc float @fmin_v8f32(<8 x float> %x) {
36 ; CHECK-FP-LABEL: fmin_v8f32:
37 ; CHECK-FP: @ %bb.0: @ %entry
38 ; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1
39 ; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
40 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
41 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
42 ; CHECK-FP-NEXT: bx lr
44 ; CHECK-NOFP-LABEL: fmin_v8f32:
45 ; CHECK-NOFP: @ %bb.0: @ %entry
46 ; CHECK-NOFP-NEXT: vminnm.f32 s8, s1, s5
47 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s4
48 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s8
49 ; CHECK-NOFP-NEXT: vminnm.f32 s2, s2, s6
50 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
51 ; CHECK-NOFP-NEXT: vminnm.f32 s2, s3, s7
52 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
53 ; CHECK-NOFP-NEXT: bx lr
55 %z = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
59 define arm_aapcs_vfpcc half @fmin_v4f16(<4 x half> %x) {
60 ; CHECK-FP-LABEL: fmin_v4f16:
61 ; CHECK-FP: @ %bb.0: @ %entry
62 ; CHECK-FP-NEXT: vmovx.f16 s2, s1
63 ; CHECK-FP-NEXT: vmovx.f16 s4, s0
64 ; CHECK-FP-NEXT: vminnm.f16 s2, s1, s2
65 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4
66 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
67 ; CHECK-FP-NEXT: bx lr
69 ; CHECK-NOFP-LABEL: fmin_v4f16:
70 ; CHECK-NOFP: @ %bb.0: @ %entry
71 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s0
72 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
73 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
74 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s1
75 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
76 ; CHECK-NOFP-NEXT: bx lr
78 %z = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
82 define arm_aapcs_vfpcc half @fmin_v8f16(<8 x half> %x) {
83 ; CHECK-FP-LABEL: fmin_v8f16:
84 ; CHECK-FP: @ %bb.0: @ %entry
85 ; CHECK-FP-NEXT: vrev32.16 q1, q0
86 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
87 ; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
88 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
89 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
90 ; CHECK-FP-NEXT: bx lr
92 ; CHECK-NOFP-LABEL: fmin_v8f16:
93 ; CHECK-NOFP: @ %bb.0: @ %entry
94 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s0
95 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
96 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s1
97 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s1
98 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
99 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
100 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
101 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
102 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
103 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s3
104 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
105 ; CHECK-NOFP-NEXT: bx lr
107 %z = call fast half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
111 define arm_aapcs_vfpcc half @fmin_v16f16(<16 x half> %x) {
112 ; CHECK-FP-LABEL: fmin_v16f16:
113 ; CHECK-FP: @ %bb.0: @ %entry
114 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
115 ; CHECK-FP-NEXT: vrev32.16 q1, q0
116 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
117 ; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
118 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
119 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
120 ; CHECK-FP-NEXT: bx lr
122 ; CHECK-NOFP-LABEL: fmin_v16f16:
123 ; CHECK-NOFP: @ %bb.0: @ %entry
124 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s0
125 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
126 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
127 ; CHECK-NOFP-NEXT: vminnm.f16 s8, s10, s8
128 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s8
129 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s1, s5
130 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
131 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
132 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s1
133 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s8, s4
134 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
135 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s2, s6
136 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
137 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
138 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
139 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s2, s4
140 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
141 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
142 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s3, s7
143 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
144 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
145 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s4, s2
146 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
147 ; CHECK-NOFP-NEXT: bx lr
149 %z = call fast half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
153 define arm_aapcs_vfpcc double @fmin_v1f64(<1 x double> %x) {
154 ; CHECK-LABEL: fmin_v1f64:
155 ; CHECK: @ %bb.0: @ %entry
158 %z = call fast double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
162 define arm_aapcs_vfpcc double @fmin_v2f64(<2 x double> %x) {
163 ; CHECK-LABEL: fmin_v2f64:
164 ; CHECK: @ %bb.0: @ %entry
165 ; CHECK-NEXT: vminnm.f64 d0, d0, d1
168 %z = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
172 define arm_aapcs_vfpcc double @fmin_v4f64(<4 x double> %x) {
173 ; CHECK-LABEL: fmin_v4f64:
174 ; CHECK: @ %bb.0: @ %entry
175 ; CHECK-NEXT: vcmp.f64 d3, d1
176 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
177 ; CHECK-NEXT: vcmp.f64 d2, d0
178 ; CHECK-NEXT: vselgt.f64 d1, d1, d3
179 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
180 ; CHECK-NEXT: vselgt.f64 d0, d0, d2
181 ; CHECK-NEXT: vminnm.f64 d0, d0, d1
184 %z = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
188 define arm_aapcs_vfpcc float @fmin_v2f32_nofast(<2 x float> %x) {
189 ; CHECK-LABEL: fmin_v2f32_nofast:
190 ; CHECK: @ %bb.0: @ %entry
191 ; CHECK-NEXT: vminnm.f32 s0, s0, s1
194 %z = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
198 define arm_aapcs_vfpcc float @fmin_v4f32_nofast(<4 x float> %x) {
199 ; CHECK-FP-LABEL: fmin_v4f32_nofast:
200 ; CHECK-FP: @ %bb.0: @ %entry
201 ; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
202 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
203 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
204 ; CHECK-FP-NEXT: bx lr
206 ; CHECK-NOFP-LABEL: fmin_v4f32_nofast:
207 ; CHECK-NOFP: @ %bb.0: @ %entry
208 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s1
209 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
210 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s3
211 ; CHECK-NOFP-NEXT: bx lr
213 %z = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
217 define arm_aapcs_vfpcc float @fmin_v8f32_nofast(<8 x float> %x) {
218 ; CHECK-FP-LABEL: fmin_v8f32_nofast:
219 ; CHECK-FP: @ %bb.0: @ %entry
220 ; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1
221 ; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
222 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
223 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
224 ; CHECK-FP-NEXT: bx lr
226 ; CHECK-NOFP-LABEL: fmin_v8f32_nofast:
227 ; CHECK-NOFP: @ %bb.0: @ %entry
228 ; CHECK-NOFP-NEXT: vminnm.f32 s8, s1, s5
229 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s4
230 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s8
231 ; CHECK-NOFP-NEXT: vminnm.f32 s2, s2, s6
232 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
233 ; CHECK-NOFP-NEXT: vminnm.f32 s2, s3, s7
234 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
235 ; CHECK-NOFP-NEXT: bx lr
237 %z = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
241 define arm_aapcs_vfpcc half @fmin_v4f16_nofast(<4 x half> %x) {
242 ; CHECK-FP-LABEL: fmin_v4f16_nofast:
243 ; CHECK-FP: @ %bb.0: @ %entry
244 ; CHECK-FP-NEXT: vmovx.f16 s2, s1
245 ; CHECK-FP-NEXT: vmovx.f16 s4, s0
246 ; CHECK-FP-NEXT: vminnm.f16 s2, s1, s2
247 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4
248 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
249 ; CHECK-FP-NEXT: bx lr
251 ; CHECK-NOFP-LABEL: fmin_v4f16_nofast:
252 ; CHECK-NOFP: @ %bb.0: @ %entry
253 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s0
254 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
255 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
256 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s1
257 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
258 ; CHECK-NOFP-NEXT: bx lr
260 %z = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
264 define arm_aapcs_vfpcc half @fmin_v8f16_nofast(<8 x half> %x) {
265 ; CHECK-FP-LABEL: fmin_v8f16_nofast:
266 ; CHECK-FP: @ %bb.0: @ %entry
267 ; CHECK-FP-NEXT: vrev32.16 q1, q0
268 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
269 ; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
270 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
271 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
272 ; CHECK-FP-NEXT: bx lr
274 ; CHECK-NOFP-LABEL: fmin_v8f16_nofast:
275 ; CHECK-NOFP: @ %bb.0: @ %entry
276 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s0
277 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
278 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s1
279 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s1
280 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
281 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
282 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
283 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
284 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
285 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s3
286 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
287 ; CHECK-NOFP-NEXT: bx lr
289 %z = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
293 define arm_aapcs_vfpcc half @fmin_v16f16_nofast(<16 x half> %x) {
294 ; CHECK-FP-LABEL: fmin_v16f16_nofast:
295 ; CHECK-FP: @ %bb.0: @ %entry
296 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
297 ; CHECK-FP-NEXT: vrev32.16 q1, q0
298 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
299 ; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
300 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
301 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
302 ; CHECK-FP-NEXT: bx lr
304 ; CHECK-NOFP-LABEL: fmin_v16f16_nofast:
305 ; CHECK-NOFP: @ %bb.0: @ %entry
306 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s0
307 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
308 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
309 ; CHECK-NOFP-NEXT: vminnm.f16 s8, s10, s8
310 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s8
311 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s1, s5
312 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
313 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
314 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s1
315 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s8, s4
316 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
317 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s2, s6
318 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
319 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
320 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
321 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s2, s4
322 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
323 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
324 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s3, s7
325 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
326 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
327 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s4, s2
328 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
329 ; CHECK-NOFP-NEXT: bx lr
331 %z = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
335 define arm_aapcs_vfpcc double @fmin_v1f64_nofast(<1 x double> %x) {
336 ; CHECK-LABEL: fmin_v1f64_nofast:
337 ; CHECK: @ %bb.0: @ %entry
340 %z = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
344 define arm_aapcs_vfpcc double @fmin_v2f64_nofast(<2 x double> %x) {
345 ; CHECK-LABEL: fmin_v2f64_nofast:
346 ; CHECK: @ %bb.0: @ %entry
347 ; CHECK-NEXT: vminnm.f64 d0, d0, d1
350 %z = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
354 define arm_aapcs_vfpcc double @fmin_v4f64_nofast(<4 x double> %x) {
355 ; CHECK-LABEL: fmin_v4f64_nofast:
356 ; CHECK: @ %bb.0: @ %entry
357 ; CHECK-NEXT: vminnm.f64 d1, d1, d3
358 ; CHECK-NEXT: vminnm.f64 d0, d0, d2
359 ; CHECK-NEXT: vminnm.f64 d0, d0, d1
362 %z = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
366 define arm_aapcs_vfpcc float @fmin_v2f32_acc(<2 x float> %x, float %y) {
367 ; CHECK-LABEL: fmin_v2f32_acc:
368 ; CHECK: @ %bb.0: @ %entry
369 ; CHECK-NEXT: vminnm.f32 s0, s0, s1
370 ; CHECK-NEXT: vminnm.f32 s0, s4, s0
373 %z = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
374 %c = fcmp fast olt float %y, %z
375 %r = select i1 %c, float %y, float %z
379 define arm_aapcs_vfpcc float @fmin_v4f32_acc(<4 x float> %x, float %y) {
380 ; CHECK-FP-LABEL: fmin_v4f32_acc:
381 ; CHECK-FP: @ %bb.0: @ %entry
382 ; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
383 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
384 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
385 ; CHECK-FP-NEXT: vminnm.f32 s0, s4, s0
386 ; CHECK-FP-NEXT: bx lr
388 ; CHECK-NOFP-LABEL: fmin_v4f32_acc:
389 ; CHECK-NOFP: @ %bb.0: @ %entry
390 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s1
391 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
392 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s3
393 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s4, s0
394 ; CHECK-NOFP-NEXT: bx lr
396 %z = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
397 %c = fcmp fast olt float %y, %z
398 %r = select i1 %c, float %y, float %z
402 define arm_aapcs_vfpcc float @fmin_v8f32_acc(<8 x float> %x, float %y) {
403 ; CHECK-FP-LABEL: fmin_v8f32_acc:
404 ; CHECK-FP: @ %bb.0: @ %entry
405 ; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1
406 ; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
407 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
408 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
409 ; CHECK-FP-NEXT: vminnm.f32 s0, s8, s0
410 ; CHECK-FP-NEXT: bx lr
412 ; CHECK-NOFP-LABEL: fmin_v8f32_acc:
413 ; CHECK-NOFP: @ %bb.0: @ %entry
414 ; CHECK-NOFP-NEXT: vminnm.f32 s10, s1, s5
415 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s4
416 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s10
417 ; CHECK-NOFP-NEXT: vminnm.f32 s2, s2, s6
418 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
419 ; CHECK-NOFP-NEXT: vminnm.f32 s2, s3, s7
420 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
421 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s8, s0
422 ; CHECK-NOFP-NEXT: bx lr
424 %z = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
425 %c = fcmp fast olt float %y, %z
426 %r = select i1 %c, float %y, float %z
430 define arm_aapcs_vfpcc half @fmin_v4f16_acc(<4 x half> %x, half %y) {
431 ; CHECK-FP-LABEL: fmin_v4f16_acc:
432 ; CHECK-FP: @ %bb.0: @ %entry
433 ; CHECK-FP-NEXT: vmovx.f16 s2, s1
434 ; CHECK-FP-NEXT: vmovx.f16 s6, s0
435 ; CHECK-FP-NEXT: vminnm.f16 s2, s1, s2
436 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s6
437 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
438 ; CHECK-FP-NEXT: vminnm.f16 s0, s4, s0
439 ; CHECK-FP-NEXT: bx lr
441 ; CHECK-NOFP-LABEL: fmin_v4f16_acc:
442 ; CHECK-NOFP: @ %bb.0: @ %entry
443 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s0
444 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
445 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
446 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s1
447 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
448 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0
449 ; CHECK-NOFP-NEXT: bx lr
451 %z = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
452 %c = fcmp fast olt half %y, %z
453 %r = select i1 %c, half %y, half %z
457 define arm_aapcs_vfpcc half @fmin_v2f16_acc(<2 x half> %x, half %y) {
458 ; CHECK-LABEL: fmin_v2f16_acc:
459 ; CHECK: @ %bb.0: @ %entry
460 ; CHECK-NEXT: vmovx.f16 s2, s0
461 ; CHECK-NEXT: vminnm.f16 s0, s0, s2
462 ; CHECK-NEXT: vminnm.f16 s0, s4, s0
465 %z = call fast half @llvm.vector.reduce.fmin.v2f16(<2 x half> %x)
466 %c = fcmp fast olt half %y, %z
467 %r = select i1 %c, half %y, half %z
471 define arm_aapcs_vfpcc half @fmin_v8f16_acc(<8 x half> %x, half %y) {
472 ; CHECK-FP-LABEL: fmin_v8f16_acc:
473 ; CHECK-FP: @ %bb.0: @ %entry
474 ; CHECK-FP-NEXT: vrev32.16 q2, q0
475 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q2
476 ; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
477 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
478 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
479 ; CHECK-FP-NEXT: vminnm.f16 s0, s4, s0
480 ; CHECK-FP-NEXT: bx lr
482 ; CHECK-NOFP-LABEL: fmin_v8f16_acc:
483 ; CHECK-NOFP: @ %bb.0: @ %entry
484 ; CHECK-NOFP-NEXT: vmovx.f16 s6, s0
485 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s6
486 ; CHECK-NOFP-NEXT: vmovx.f16 s6, s1
487 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s1
488 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s6
489 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
490 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
491 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
492 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
493 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s3
494 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
495 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0
496 ; CHECK-NOFP-NEXT: bx lr
498 %z = call fast half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
499 %c = fcmp fast olt half %y, %z
500 %r = select i1 %c, half %y, half %z
504 define arm_aapcs_vfpcc half @fmin_v16f16_acc(<16 x half> %x, half %y) {
505 ; CHECK-FP-LABEL: fmin_v16f16_acc:
506 ; CHECK-FP: @ %bb.0: @ %entry
507 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
508 ; CHECK-FP-NEXT: vrev32.16 q1, q0
509 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
510 ; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
511 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
512 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
513 ; CHECK-FP-NEXT: vminnm.f16 s0, s8, s0
514 ; CHECK-FP-NEXT: bx lr
516 ; CHECK-NOFP-LABEL: fmin_v16f16_acc:
517 ; CHECK-NOFP: @ %bb.0: @ %entry
518 ; CHECK-NOFP-NEXT: vmovx.f16 s12, s0
519 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s4
520 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
521 ; CHECK-NOFP-NEXT: vminnm.f16 s10, s12, s10
522 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s10
523 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s1, s5
524 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
525 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
526 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s1
527 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s10, s4
528 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
529 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s2, s6
530 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
531 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
532 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
533 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s2, s4
534 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
535 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
536 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s3, s7
537 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
538 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
539 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s4, s2
540 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
541 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s8, s0
542 ; CHECK-NOFP-NEXT: bx lr
544 %z = call fast half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
545 %c = fcmp fast olt half %y, %z
546 %r = select i1 %c, half %y, half %z
550 define arm_aapcs_vfpcc double @fmin_v1f64_acc(<1 x double> %x, double %y) {
551 ; CHECK-LABEL: fmin_v1f64_acc:
552 ; CHECK: @ %bb.0: @ %entry
553 ; CHECK-NEXT: vminnm.f64 d0, d1, d0
556 %z = call fast double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
557 %c = fcmp fast olt double %y, %z
558 %r = select i1 %c, double %y, double %z
562 define arm_aapcs_vfpcc double @fmin_v2f64_acc(<2 x double> %x, double %y) {
563 ; CHECK-LABEL: fmin_v2f64_acc:
564 ; CHECK: @ %bb.0: @ %entry
565 ; CHECK-NEXT: vminnm.f64 d0, d0, d1
566 ; CHECK-NEXT: vminnm.f64 d0, d2, d0
569 %z = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
570 %c = fcmp fast olt double %y, %z
571 %r = select i1 %c, double %y, double %z
575 define arm_aapcs_vfpcc double @fmin_v4f64_acc(<4 x double> %x, double %y) {
576 ; CHECK-LABEL: fmin_v4f64_acc:
577 ; CHECK: @ %bb.0: @ %entry
578 ; CHECK-NEXT: vcmp.f64 d3, d1
579 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
580 ; CHECK-NEXT: vcmp.f64 d2, d0
581 ; CHECK-NEXT: vselgt.f64 d1, d1, d3
582 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
583 ; CHECK-NEXT: vselgt.f64 d0, d0, d2
584 ; CHECK-NEXT: vminnm.f64 d0, d0, d1
585 ; CHECK-NEXT: vminnm.f64 d0, d4, d0
588 %z = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
589 %c = fcmp fast olt double %y, %z
590 %r = select i1 %c, double %y, double %z
594 define arm_aapcs_vfpcc float @fmin_v2f32_acc_nofast(<2 x float> %x, float %y) {
595 ; CHECK-LABEL: fmin_v2f32_acc_nofast:
596 ; CHECK: @ %bb.0: @ %entry
597 ; CHECK-NEXT: vminnm.f32 s0, s0, s1
598 ; CHECK-NEXT: vcmp.f32 s0, s4
599 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
600 ; CHECK-NEXT: vselgt.f32 s0, s4, s0
603 %z = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
604 %c = fcmp olt float %y, %z
605 %r = select i1 %c, float %y, float %z
609 define arm_aapcs_vfpcc float @fmin_v4f32_acc_nofast(<4 x float> %x, float %y) {
610 ; CHECK-FP-LABEL: fmin_v4f32_acc_nofast:
611 ; CHECK-FP: @ %bb.0: @ %entry
612 ; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
613 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
614 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
615 ; CHECK-FP-NEXT: vcmp.f32 s0, s4
616 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
617 ; CHECK-FP-NEXT: vselgt.f32 s0, s4, s0
618 ; CHECK-FP-NEXT: bx lr
620 ; CHECK-NOFP-LABEL: fmin_v4f32_acc_nofast:
621 ; CHECK-NOFP: @ %bb.0: @ %entry
622 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s1
623 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
624 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s3
625 ; CHECK-NOFP-NEXT: vcmp.f32 s0, s4
626 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
627 ; CHECK-NOFP-NEXT: vselgt.f32 s0, s4, s0
628 ; CHECK-NOFP-NEXT: bx lr
630 %z = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
631 %c = fcmp olt float %y, %z
632 %r = select i1 %c, float %y, float %z
636 define arm_aapcs_vfpcc float @fmin_v8f32_acc_nofast(<8 x float> %x, float %y) {
637 ; CHECK-FP-LABEL: fmin_v8f32_acc_nofast:
638 ; CHECK-FP: @ %bb.0: @ %entry
639 ; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1
640 ; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
641 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
642 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
643 ; CHECK-FP-NEXT: vcmp.f32 s0, s8
644 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
645 ; CHECK-FP-NEXT: vselgt.f32 s0, s8, s0
646 ; CHECK-FP-NEXT: bx lr
648 ; CHECK-NOFP-LABEL: fmin_v8f32_acc_nofast:
649 ; CHECK-NOFP: @ %bb.0: @ %entry
650 ; CHECK-NOFP-NEXT: vminnm.f32 s10, s1, s5
651 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s4
652 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s10
653 ; CHECK-NOFP-NEXT: vminnm.f32 s2, s2, s6
654 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
655 ; CHECK-NOFP-NEXT: vminnm.f32 s2, s3, s7
656 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
657 ; CHECK-NOFP-NEXT: vcmp.f32 s0, s8
658 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
659 ; CHECK-NOFP-NEXT: vselgt.f32 s0, s8, s0
660 ; CHECK-NOFP-NEXT: bx lr
662 %z = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
663 %c = fcmp olt float %y, %z
664 %r = select i1 %c, float %y, float %z
668 define arm_aapcs_vfpcc half @fmin_v4f16_acc_nofast(<4 x half> %x, half %y) {
669 ; CHECK-FP-LABEL: fmin_v4f16_acc_nofast:
670 ; CHECK-FP: @ %bb.0: @ %entry
671 ; CHECK-FP-NEXT: vmovx.f16 s2, s1
672 ; CHECK-FP-NEXT: vmovx.f16 s6, s0
673 ; CHECK-FP-NEXT: vminnm.f16 s2, s1, s2
674 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s6
675 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
676 ; CHECK-FP-NEXT: vcmp.f16 s0, s4
677 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
678 ; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0
679 ; CHECK-FP-NEXT: bx lr
681 ; CHECK-NOFP-LABEL: fmin_v4f16_acc_nofast:
682 ; CHECK-NOFP: @ %bb.0: @ %entry
683 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s0
684 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
685 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
686 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s1
687 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
688 ; CHECK-NOFP-NEXT: vcmp.f16 s0, s4
689 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
690 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s4, s0
691 ; CHECK-NOFP-NEXT: bx lr
693 %z = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
694 %c = fcmp olt half %y, %z
695 %r = select i1 %c, half %y, half %z
699 define arm_aapcs_vfpcc half @fmin_v8f16_acc_nofast(<8 x half> %x, half %y) {
700 ; CHECK-FP-LABEL: fmin_v8f16_acc_nofast:
701 ; CHECK-FP: @ %bb.0: @ %entry
702 ; CHECK-FP-NEXT: vrev32.16 q2, q0
703 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q2
704 ; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
705 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
706 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
707 ; CHECK-FP-NEXT: vcmp.f16 s0, s4
708 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
709 ; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0
710 ; CHECK-FP-NEXT: bx lr
712 ; CHECK-NOFP-LABEL: fmin_v8f16_acc_nofast:
713 ; CHECK-NOFP: @ %bb.0: @ %entry
714 ; CHECK-NOFP-NEXT: vmovx.f16 s6, s0
715 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s6
716 ; CHECK-NOFP-NEXT: vmovx.f16 s6, s1
717 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s1
718 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s6
719 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
720 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
721 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
722 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
723 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s3
724 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
725 ; CHECK-NOFP-NEXT: vcmp.f16 s0, s4
726 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
727 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s4, s0
728 ; CHECK-NOFP-NEXT: bx lr
730 %z = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
731 %c = fcmp olt half %y, %z
732 %r = select i1 %c, half %y, half %z
736 define arm_aapcs_vfpcc half @fmin_v16f16_acc_nofast(<16 x half> %x, half %y) {
737 ; CHECK-FP-LABEL: fmin_v16f16_acc_nofast:
738 ; CHECK-FP: @ %bb.0: @ %entry
739 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
740 ; CHECK-FP-NEXT: vrev32.16 q1, q0
741 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
742 ; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
743 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
744 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
745 ; CHECK-FP-NEXT: vcmp.f16 s0, s8
746 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
747 ; CHECK-FP-NEXT: vselgt.f16 s0, s8, s0
748 ; CHECK-FP-NEXT: bx lr
750 ; CHECK-NOFP-LABEL: fmin_v16f16_acc_nofast:
751 ; CHECK-NOFP: @ %bb.0: @ %entry
752 ; CHECK-NOFP-NEXT: vmovx.f16 s12, s0
753 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s4
754 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
755 ; CHECK-NOFP-NEXT: vminnm.f16 s10, s12, s10
756 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s10
757 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s1, s5
758 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
759 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
760 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s1
761 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s10, s4
762 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
763 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s2, s6
764 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
765 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
766 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
767 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s2, s4
768 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
769 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
770 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s3, s7
771 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
772 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
773 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s4, s2
774 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
775 ; CHECK-NOFP-NEXT: vcmp.f16 s0, s8
776 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
777 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s8, s0
778 ; CHECK-NOFP-NEXT: bx lr
780 %z = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
781 %c = fcmp olt half %y, %z
782 %r = select i1 %c, half %y, half %z
786 define arm_aapcs_vfpcc double @fmin_v1f64_acc_nofast(<1 x double> %x, double %y) {
787 ; CHECK-LABEL: fmin_v1f64_acc_nofast:
788 ; CHECK: @ %bb.0: @ %entry
789 ; CHECK-NEXT: vcmp.f64 d0, d1
790 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
791 ; CHECK-NEXT: vselgt.f64 d0, d1, d0
794 %z = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
795 %c = fcmp olt double %y, %z
796 %r = select i1 %c, double %y, double %z
800 define arm_aapcs_vfpcc double @fmin_v2f64_acc_nofast(<2 x double> %x, double %y) {
801 ; CHECK-LABEL: fmin_v2f64_acc_nofast:
802 ; CHECK: @ %bb.0: @ %entry
803 ; CHECK-NEXT: vminnm.f64 d0, d0, d1
804 ; CHECK-NEXT: vcmp.f64 d0, d2
805 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
806 ; CHECK-NEXT: vselgt.f64 d0, d2, d0
809 %z = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
810 %c = fcmp olt double %y, %z
811 %r = select i1 %c, double %y, double %z
815 define arm_aapcs_vfpcc double @fmin_v4f64_acc_nofast(<4 x double> %x, double %y) {
816 ; CHECK-LABEL: fmin_v4f64_acc_nofast:
817 ; CHECK: @ %bb.0: @ %entry
818 ; CHECK-NEXT: vminnm.f64 d1, d1, d3
819 ; CHECK-NEXT: vminnm.f64 d0, d0, d2
820 ; CHECK-NEXT: vminnm.f64 d0, d0, d1
821 ; CHECK-NEXT: vcmp.f64 d0, d4
822 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
823 ; CHECK-NEXT: vselgt.f64 d0, d4, d0
826 %z = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
827 %c = fcmp olt double %y, %z
828 %r = select i1 %c, double %y, double %z
832 define arm_aapcs_vfpcc float @fmax_v2f32(<2 x float> %x) {
833 ; CHECK-LABEL: fmax_v2f32:
834 ; CHECK: @ %bb.0: @ %entry
835 ; CHECK-NEXT: vmaxnm.f32 s0, s0, s1
838 %z = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
842 define arm_aapcs_vfpcc float @fmax_v4f32(<4 x float> %x) {
843 ; CHECK-FP-LABEL: fmax_v4f32:
844 ; CHECK-FP: @ %bb.0: @ %entry
845 ; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
846 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
847 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
848 ; CHECK-FP-NEXT: bx lr
850 ; CHECK-NOFP-LABEL: fmax_v4f32:
851 ; CHECK-NOFP: @ %bb.0: @ %entry
852 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s1
853 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
854 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s3
855 ; CHECK-NOFP-NEXT: bx lr
857 %z = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
861 define arm_aapcs_vfpcc float @fmax_v8f32(<8 x float> %x) {
862 ; CHECK-FP-LABEL: fmax_v8f32:
863 ; CHECK-FP: @ %bb.0: @ %entry
864 ; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1
865 ; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
866 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
867 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
868 ; CHECK-FP-NEXT: bx lr
870 ; CHECK-NOFP-LABEL: fmax_v8f32:
871 ; CHECK-NOFP: @ %bb.0: @ %entry
872 ; CHECK-NOFP-NEXT: vmaxnm.f32 s8, s1, s5
873 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s4
874 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s8
875 ; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s2, s6
876 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
877 ; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s3, s7
878 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
879 ; CHECK-NOFP-NEXT: bx lr
881 %z = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
885 define arm_aapcs_vfpcc half @fmax_v4f16(<4 x half> %x) {
886 ; CHECK-FP-LABEL: fmax_v4f16:
887 ; CHECK-FP: @ %bb.0: @ %entry
888 ; CHECK-FP-NEXT: vmovx.f16 s2, s1
889 ; CHECK-FP-NEXT: vmovx.f16 s4, s0
890 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s1, s2
891 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4
892 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
893 ; CHECK-FP-NEXT: bx lr
895 ; CHECK-NOFP-LABEL: fmax_v4f16:
896 ; CHECK-NOFP: @ %bb.0: @ %entry
897 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s0
898 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
899 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
900 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s1
901 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
902 ; CHECK-NOFP-NEXT: bx lr
904 %z = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
908 define arm_aapcs_vfpcc half @fmax_v8f16(<8 x half> %x) {
909 ; CHECK-FP-LABEL: fmax_v8f16:
910 ; CHECK-FP: @ %bb.0: @ %entry
911 ; CHECK-FP-NEXT: vrev32.16 q1, q0
912 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
913 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
914 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
915 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
916 ; CHECK-FP-NEXT: bx lr
918 ; CHECK-NOFP-LABEL: fmax_v8f16:
919 ; CHECK-NOFP: @ %bb.0: @ %entry
920 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s0
921 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
922 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s1
923 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s1
924 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
925 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
926 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
927 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
928 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
929 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s3
930 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
931 ; CHECK-NOFP-NEXT: bx lr
933 %z = call fast half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
937 define arm_aapcs_vfpcc half @fmax_v16f16(<16 x half> %x) {
938 ; CHECK-FP-LABEL: fmax_v16f16:
939 ; CHECK-FP: @ %bb.0: @ %entry
940 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
941 ; CHECK-FP-NEXT: vrev32.16 q1, q0
942 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
943 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
944 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
945 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
946 ; CHECK-FP-NEXT: bx lr
948 ; CHECK-NOFP-LABEL: fmax_v16f16:
949 ; CHECK-NOFP: @ %bb.0: @ %entry
950 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s0
951 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
952 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
953 ; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s10, s8
954 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s8
955 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s1, s5
956 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
957 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
958 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s1
959 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s8, s4
960 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
961 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s2, s6
962 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
963 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
964 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
965 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s2, s4
966 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
967 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
968 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s3, s7
969 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
970 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
971 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s4, s2
972 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
973 ; CHECK-NOFP-NEXT: bx lr
975 %z = call fast half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
979 define arm_aapcs_vfpcc double @fmax_v1f64(<1 x double> %x) {
980 ; CHECK-LABEL: fmax_v1f64:
981 ; CHECK: @ %bb.0: @ %entry
984 %z = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
988 define arm_aapcs_vfpcc double @fmax_v2f64(<2 x double> %x) {
989 ; CHECK-LABEL: fmax_v2f64:
990 ; CHECK: @ %bb.0: @ %entry
991 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
994 %z = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
998 define arm_aapcs_vfpcc double @fmax_v4f64(<4 x double> %x) {
999 ; CHECK-LABEL: fmax_v4f64:
1000 ; CHECK: @ %bb.0: @ %entry
1001 ; CHECK-NEXT: vcmp.f64 d1, d3
1002 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
1003 ; CHECK-NEXT: vcmp.f64 d0, d2
1004 ; CHECK-NEXT: vselgt.f64 d1, d1, d3
1005 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
1006 ; CHECK-NEXT: vselgt.f64 d0, d0, d2
1007 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
1010 %z = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
1014 define arm_aapcs_vfpcc float @fmax_v2f32_nofast(<2 x float> %x) {
1015 ; CHECK-LABEL: fmax_v2f32_nofast:
1016 ; CHECK: @ %bb.0: @ %entry
1017 ; CHECK-NEXT: vmaxnm.f32 s0, s0, s1
1020 %z = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
1024 define arm_aapcs_vfpcc float @fmax_v4f32_nofast(<4 x float> %x) {
1025 ; CHECK-FP-LABEL: fmax_v4f32_nofast:
1026 ; CHECK-FP: @ %bb.0: @ %entry
1027 ; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
1028 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
1029 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
1030 ; CHECK-FP-NEXT: bx lr
1032 ; CHECK-NOFP-LABEL: fmax_v4f32_nofast:
1033 ; CHECK-NOFP: @ %bb.0: @ %entry
1034 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s1
1035 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
1036 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s3
1037 ; CHECK-NOFP-NEXT: bx lr
1039 %z = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
1043 define arm_aapcs_vfpcc float @fmax_v8f32_nofast(<8 x float> %x) {
1044 ; CHECK-FP-LABEL: fmax_v8f32_nofast:
1045 ; CHECK-FP: @ %bb.0: @ %entry
1046 ; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1
1047 ; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
1048 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
1049 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
1050 ; CHECK-FP-NEXT: bx lr
1052 ; CHECK-NOFP-LABEL: fmax_v8f32_nofast:
1053 ; CHECK-NOFP: @ %bb.0: @ %entry
1054 ; CHECK-NOFP-NEXT: vmaxnm.f32 s8, s1, s5
1055 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s4
1056 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s8
1057 ; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s2, s6
1058 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
1059 ; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s3, s7
1060 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
1061 ; CHECK-NOFP-NEXT: bx lr
1063 %z = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
1067 define arm_aapcs_vfpcc half @fmax_v4f16_nofast(<4 x half> %x) {
1068 ; CHECK-FP-LABEL: fmax_v4f16_nofast:
1069 ; CHECK-FP: @ %bb.0: @ %entry
1070 ; CHECK-FP-NEXT: vmovx.f16 s2, s1
1071 ; CHECK-FP-NEXT: vmovx.f16 s4, s0
1072 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s1, s2
1073 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4
1074 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1075 ; CHECK-FP-NEXT: bx lr
1077 ; CHECK-NOFP-LABEL: fmax_v4f16_nofast:
1078 ; CHECK-NOFP: @ %bb.0: @ %entry
1079 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s0
1080 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1081 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
1082 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s1
1083 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1084 ; CHECK-NOFP-NEXT: bx lr
1086 %z = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
1090 define arm_aapcs_vfpcc half @fmax_v8f16_nofast(<8 x half> %x) {
1091 ; CHECK-FP-LABEL: fmax_v8f16_nofast:
1092 ; CHECK-FP: @ %bb.0: @ %entry
1093 ; CHECK-FP-NEXT: vrev32.16 q1, q0
1094 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
1095 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
1096 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
1097 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1098 ; CHECK-FP-NEXT: bx lr
1100 ; CHECK-NOFP-LABEL: fmax_v8f16_nofast:
1101 ; CHECK-NOFP: @ %bb.0: @ %entry
1102 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s0
1103 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1104 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s1
1105 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s1
1106 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1107 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1108 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
1109 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1110 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
1111 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s3
1112 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1113 ; CHECK-NOFP-NEXT: bx lr
1115 %z = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
1119 define arm_aapcs_vfpcc half @fmax_v16f16_nofast(<16 x half> %x) {
1120 ; CHECK-FP-LABEL: fmax_v16f16_nofast:
1121 ; CHECK-FP: @ %bb.0: @ %entry
1122 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
1123 ; CHECK-FP-NEXT: vrev32.16 q1, q0
1124 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
1125 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
1126 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
1127 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1128 ; CHECK-FP-NEXT: bx lr
1130 ; CHECK-NOFP-LABEL: fmax_v16f16_nofast:
1131 ; CHECK-NOFP: @ %bb.0: @ %entry
1132 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s0
1133 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
1134 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1135 ; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s10, s8
1136 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s8
1137 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s1, s5
1138 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1139 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
1140 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s1
1141 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s8, s4
1142 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1143 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s2, s6
1144 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1145 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
1146 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
1147 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s2, s4
1148 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
1149 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1150 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s3, s7
1151 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1152 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
1153 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s4, s2
1154 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1155 ; CHECK-NOFP-NEXT: bx lr
1157 %z = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
1161 define arm_aapcs_vfpcc double @fmax_v1f64_nofast(<1 x double> %x) {
1162 ; CHECK-LABEL: fmax_v1f64_nofast:
1163 ; CHECK: @ %bb.0: @ %entry
1166 %z = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
1170 define arm_aapcs_vfpcc double @fmax_v2f64_nofast(<2 x double> %x) {
1171 ; CHECK-LABEL: fmax_v2f64_nofast:
1172 ; CHECK: @ %bb.0: @ %entry
1173 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
1176 %z = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
1180 define arm_aapcs_vfpcc double @fmax_v4f64_nofast(<4 x double> %x) {
1181 ; CHECK-LABEL: fmax_v4f64_nofast:
1182 ; CHECK: @ %bb.0: @ %entry
1183 ; CHECK-NEXT: vmaxnm.f64 d1, d1, d3
1184 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d2
1185 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
1188 %z = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
1192 define arm_aapcs_vfpcc float @fmax_v2f32_acc(<2 x float> %x, float %y) {
1193 ; CHECK-LABEL: fmax_v2f32_acc:
1194 ; CHECK: @ %bb.0: @ %entry
1195 ; CHECK-NEXT: vmaxnm.f32 s0, s0, s1
1196 ; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
1199 %z = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
1200 %c = fcmp fast ogt float %y, %z
1201 %r = select i1 %c, float %y, float %z
1205 define arm_aapcs_vfpcc float @fmax_v4f32_acc(<4 x float> %x, float %y) {
1206 ; CHECK-FP-LABEL: fmax_v4f32_acc:
1207 ; CHECK-FP: @ %bb.0: @ %entry
1208 ; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
1209 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
1210 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
1211 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s4, s0
1212 ; CHECK-FP-NEXT: bx lr
1214 ; CHECK-NOFP-LABEL: fmax_v4f32_acc:
1215 ; CHECK-NOFP: @ %bb.0: @ %entry
1216 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s1
1217 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
1218 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s3
1219 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s4, s0
1220 ; CHECK-NOFP-NEXT: bx lr
1222 %z = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
1223 %c = fcmp fast ogt float %y, %z
1224 %r = select i1 %c, float %y, float %z
1228 define arm_aapcs_vfpcc float @fmax_v8f32_acc(<8 x float> %x, float %y) {
1229 ; CHECK-FP-LABEL: fmax_v8f32_acc:
1230 ; CHECK-FP: @ %bb.0: @ %entry
1231 ; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1
1232 ; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
1233 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
1234 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
1235 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s8, s0
1236 ; CHECK-FP-NEXT: bx lr
1238 ; CHECK-NOFP-LABEL: fmax_v8f32_acc:
1239 ; CHECK-NOFP: @ %bb.0: @ %entry
1240 ; CHECK-NOFP-NEXT: vmaxnm.f32 s10, s1, s5
1241 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s4
1242 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s10
1243 ; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s2, s6
1244 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
1245 ; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s3, s7
1246 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
1247 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s8, s0
1248 ; CHECK-NOFP-NEXT: bx lr
1250 %z = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
1251 %c = fcmp fast ogt float %y, %z
1252 %r = select i1 %c, float %y, float %z
1256 define arm_aapcs_vfpcc half @fmax_v2f16_acc(<2 x half> %x, half %y) {
1257 ; CHECK-LABEL: fmax_v2f16_acc:
1258 ; CHECK: @ %bb.0: @ %entry
1259 ; CHECK-NEXT: vmovx.f16 s2, s0
1260 ; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
1261 ; CHECK-NEXT: vmaxnm.f16 s0, s4, s0
1264 %z = call fast half @llvm.vector.reduce.fmax.v2f16(<2 x half> %x)
1265 %c = fcmp fast ogt half %y, %z
1266 %r = select i1 %c, half %y, half %z
1270 define arm_aapcs_vfpcc half @fmax_v4f16_acc(<4 x half> %x, half %y) {
1271 ; CHECK-FP-LABEL: fmax_v4f16_acc:
1272 ; CHECK-FP: @ %bb.0: @ %entry
1273 ; CHECK-FP-NEXT: vmovx.f16 s2, s1
1274 ; CHECK-FP-NEXT: vmovx.f16 s6, s0
1275 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s1, s2
1276 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s6
1277 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1278 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s4, s0
1279 ; CHECK-FP-NEXT: bx lr
1281 ; CHECK-NOFP-LABEL: fmax_v4f16_acc:
1282 ; CHECK-NOFP: @ %bb.0: @ %entry
1283 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s0
1284 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1285 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
1286 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s1
1287 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1288 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0
1289 ; CHECK-NOFP-NEXT: bx lr
1291 %z = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
1292 %c = fcmp fast ogt half %y, %z
1293 %r = select i1 %c, half %y, half %z
1297 define arm_aapcs_vfpcc half @fmax_v8f16_acc(<8 x half> %x, half %y) {
1298 ; CHECK-FP-LABEL: fmax_v8f16_acc:
1299 ; CHECK-FP: @ %bb.0: @ %entry
1300 ; CHECK-FP-NEXT: vrev32.16 q2, q0
1301 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q2
1302 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
1303 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
1304 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1305 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s4, s0
1306 ; CHECK-FP-NEXT: bx lr
1308 ; CHECK-NOFP-LABEL: fmax_v8f16_acc:
1309 ; CHECK-NOFP: @ %bb.0: @ %entry
1310 ; CHECK-NOFP-NEXT: vmovx.f16 s6, s0
1311 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s6
1312 ; CHECK-NOFP-NEXT: vmovx.f16 s6, s1
1313 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s1
1314 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s6
1315 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1316 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
1317 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1318 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
1319 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s3
1320 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1321 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0
1322 ; CHECK-NOFP-NEXT: bx lr
1324 %z = call fast half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
1325 %c = fcmp fast ogt half %y, %z
1326 %r = select i1 %c, half %y, half %z
1330 define arm_aapcs_vfpcc half @fmax_v16f16_acc(<16 x half> %x, half %y) {
1331 ; CHECK-FP-LABEL: fmax_v16f16_acc:
1332 ; CHECK-FP: @ %bb.0: @ %entry
1333 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
1334 ; CHECK-FP-NEXT: vrev32.16 q1, q0
1335 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
1336 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
1337 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
1338 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1339 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s8, s0
1340 ; CHECK-FP-NEXT: bx lr
1342 ; CHECK-NOFP-LABEL: fmax_v16f16_acc:
1343 ; CHECK-NOFP: @ %bb.0: @ %entry
1344 ; CHECK-NOFP-NEXT: vmovx.f16 s12, s0
1345 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s4
1346 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1347 ; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s12, s10
1348 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s10
1349 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s1, s5
1350 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1351 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
1352 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s1
1353 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s10, s4
1354 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1355 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s2, s6
1356 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1357 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
1358 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
1359 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s2, s4
1360 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
1361 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1362 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s3, s7
1363 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1364 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
1365 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s4, s2
1366 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1367 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s8, s0
1368 ; CHECK-NOFP-NEXT: bx lr
1370 %z = call fast half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
1371 %c = fcmp fast ogt half %y, %z
1372 %r = select i1 %c, half %y, half %z
1376 define arm_aapcs_vfpcc double @fmax_v1f64_acc(<1 x double> %x, double %y) {
1377 ; CHECK-LABEL: fmax_v1f64_acc:
1378 ; CHECK: @ %bb.0: @ %entry
1379 ; CHECK-NEXT: vmaxnm.f64 d0, d1, d0
1382 %z = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
1383 %c = fcmp fast ogt double %y, %z
1384 %r = select i1 %c, double %y, double %z
1388 define arm_aapcs_vfpcc double @fmax_v2f64_acc(<2 x double> %x, double %y) {
1389 ; CHECK-LABEL: fmax_v2f64_acc:
1390 ; CHECK: @ %bb.0: @ %entry
1391 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
1392 ; CHECK-NEXT: vmaxnm.f64 d0, d2, d0
1395 %z = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
1396 %c = fcmp fast ogt double %y, %z
1397 %r = select i1 %c, double %y, double %z
1401 define arm_aapcs_vfpcc double @fmax_v4f64_acc(<4 x double> %x, double %y) {
1402 ; CHECK-LABEL: fmax_v4f64_acc:
1403 ; CHECK: @ %bb.0: @ %entry
1404 ; CHECK-NEXT: vcmp.f64 d1, d3
1405 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
1406 ; CHECK-NEXT: vcmp.f64 d0, d2
1407 ; CHECK-NEXT: vselgt.f64 d1, d1, d3
1408 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
1409 ; CHECK-NEXT: vselgt.f64 d0, d0, d2
1410 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
1411 ; CHECK-NEXT: vmaxnm.f64 d0, d4, d0
1414 %z = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
1415 %c = fcmp fast ogt double %y, %z
1416 %r = select i1 %c, double %y, double %z
1420 define arm_aapcs_vfpcc float @fmax_v2f32_acc_nofast(<2 x float> %x, float %y) {
1421 ; CHECK-LABEL: fmax_v2f32_acc_nofast:
1422 ; CHECK: @ %bb.0: @ %entry
1423 ; CHECK-NEXT: vmaxnm.f32 s0, s0, s1
1424 ; CHECK-NEXT: vcmp.f32 s4, s0
1425 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
1426 ; CHECK-NEXT: vselgt.f32 s0, s4, s0
1429 %z = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
1430 %c = fcmp ogt float %y, %z
1431 %r = select i1 %c, float %y, float %z
1435 define arm_aapcs_vfpcc float @fmax_v4f32_acc_nofast(<4 x float> %x, float %y) {
1436 ; CHECK-FP-LABEL: fmax_v4f32_acc_nofast:
1437 ; CHECK-FP: @ %bb.0: @ %entry
1438 ; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
1439 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
1440 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
1441 ; CHECK-FP-NEXT: vcmp.f32 s4, s0
1442 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
1443 ; CHECK-FP-NEXT: vselgt.f32 s0, s4, s0
1444 ; CHECK-FP-NEXT: bx lr
1446 ; CHECK-NOFP-LABEL: fmax_v4f32_acc_nofast:
1447 ; CHECK-NOFP: @ %bb.0: @ %entry
1448 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s1
1449 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
1450 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s3
1451 ; CHECK-NOFP-NEXT: vcmp.f32 s4, s0
1452 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1453 ; CHECK-NOFP-NEXT: vselgt.f32 s0, s4, s0
1454 ; CHECK-NOFP-NEXT: bx lr
1456 %z = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
1457 %c = fcmp ogt float %y, %z
1458 %r = select i1 %c, float %y, float %z
1462 define arm_aapcs_vfpcc float @fmax_v8f32_acc_nofast(<8 x float> %x, float %y) {
1463 ; CHECK-FP-LABEL: fmax_v8f32_acc_nofast:
1464 ; CHECK-FP: @ %bb.0: @ %entry
1465 ; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1
1466 ; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
1467 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
1468 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
1469 ; CHECK-FP-NEXT: vcmp.f32 s8, s0
1470 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
1471 ; CHECK-FP-NEXT: vselgt.f32 s0, s8, s0
1472 ; CHECK-FP-NEXT: bx lr
1474 ; CHECK-NOFP-LABEL: fmax_v8f32_acc_nofast:
1475 ; CHECK-NOFP: @ %bb.0: @ %entry
1476 ; CHECK-NOFP-NEXT: vmaxnm.f32 s10, s1, s5
1477 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s4
1478 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s10
1479 ; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s2, s6
1480 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
1481 ; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s3, s7
1482 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
1483 ; CHECK-NOFP-NEXT: vcmp.f32 s8, s0
1484 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1485 ; CHECK-NOFP-NEXT: vselgt.f32 s0, s8, s0
1486 ; CHECK-NOFP-NEXT: bx lr
1488 %z = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
1489 %c = fcmp ogt float %y, %z
1490 %r = select i1 %c, float %y, float %z
1494 define arm_aapcs_vfpcc half @fmax_v4f16_acc_nofast(<4 x half> %x, half %y) {
1495 ; CHECK-FP-LABEL: fmax_v4f16_acc_nofast:
1496 ; CHECK-FP: @ %bb.0: @ %entry
1497 ; CHECK-FP-NEXT: vmovx.f16 s2, s1
1498 ; CHECK-FP-NEXT: vmovx.f16 s6, s0
1499 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s1, s2
1500 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s6
1501 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1502 ; CHECK-FP-NEXT: vcmp.f16 s4, s0
1503 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
1504 ; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0
1505 ; CHECK-FP-NEXT: bx lr
1507 ; CHECK-NOFP-LABEL: fmax_v4f16_acc_nofast:
1508 ; CHECK-NOFP: @ %bb.0: @ %entry
1509 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s0
1510 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1511 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
1512 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s1
1513 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1514 ; CHECK-NOFP-NEXT: vcmp.f16 s4, s0
1515 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1516 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s4, s0
1517 ; CHECK-NOFP-NEXT: bx lr
1519 %z = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
1520 %c = fcmp ogt half %y, %z
1521 %r = select i1 %c, half %y, half %z
1525 define arm_aapcs_vfpcc half @fmax_v8f16_acc_nofast(<8 x half> %x, half %y) {
1526 ; CHECK-FP-LABEL: fmax_v8f16_acc_nofast:
1527 ; CHECK-FP: @ %bb.0: @ %entry
1528 ; CHECK-FP-NEXT: vrev32.16 q2, q0
1529 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q2
1530 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
1531 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
1532 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1533 ; CHECK-FP-NEXT: vcmp.f16 s4, s0
1534 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
1535 ; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0
1536 ; CHECK-FP-NEXT: bx lr
1538 ; CHECK-NOFP-LABEL: fmax_v8f16_acc_nofast:
1539 ; CHECK-NOFP: @ %bb.0: @ %entry
1540 ; CHECK-NOFP-NEXT: vmovx.f16 s6, s0
1541 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s6
1542 ; CHECK-NOFP-NEXT: vmovx.f16 s6, s1
1543 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s1
1544 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s6
1545 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1546 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
1547 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1548 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
1549 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s3
1550 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1551 ; CHECK-NOFP-NEXT: vcmp.f16 s4, s0
1552 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1553 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s4, s0
1554 ; CHECK-NOFP-NEXT: bx lr
1556 %z = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
1557 %c = fcmp ogt half %y, %z
1558 %r = select i1 %c, half %y, half %z
1562 define arm_aapcs_vfpcc half @fmax_v16f16_acc_nofast(<16 x half> %x, half %y) {
1563 ; CHECK-FP-LABEL: fmax_v16f16_acc_nofast:
1564 ; CHECK-FP: @ %bb.0: @ %entry
1565 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
1566 ; CHECK-FP-NEXT: vrev32.16 q1, q0
1567 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
1568 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
1569 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
1570 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1571 ; CHECK-FP-NEXT: vcmp.f16 s8, s0
1572 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
1573 ; CHECK-FP-NEXT: vselgt.f16 s0, s8, s0
1574 ; CHECK-FP-NEXT: bx lr
1576 ; CHECK-NOFP-LABEL: fmax_v16f16_acc_nofast:
1577 ; CHECK-NOFP: @ %bb.0: @ %entry
1578 ; CHECK-NOFP-NEXT: vmovx.f16 s12, s0
1579 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s4
1580 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1581 ; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s12, s10
1582 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s10
1583 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s1, s5
1584 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1585 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
1586 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s1
1587 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s10, s4
1588 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1589 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s2, s6
1590 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1591 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
1592 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
1593 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s2, s4
1594 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
1595 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1596 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s3, s7
1597 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1598 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
1599 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s4, s2
1600 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1601 ; CHECK-NOFP-NEXT: vcmp.f16 s8, s0
1602 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1603 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s8, s0
1604 ; CHECK-NOFP-NEXT: bx lr
1606 %z = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
1607 %c = fcmp ogt half %y, %z
1608 %r = select i1 %c, half %y, half %z
1612 define arm_aapcs_vfpcc double @fmax_v1f64_acc_nofast(<1 x double> %x, double %y) {
1613 ; CHECK-LABEL: fmax_v1f64_acc_nofast:
1614 ; CHECK: @ %bb.0: @ %entry
1615 ; CHECK-NEXT: vcmp.f64 d1, d0
1616 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
1617 ; CHECK-NEXT: vselgt.f64 d0, d1, d0
1620 %z = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
1621 %c = fcmp ogt double %y, %z
1622 %r = select i1 %c, double %y, double %z
1626 define arm_aapcs_vfpcc double @fmax_v2f64_acc_nofast(<2 x double> %x, double %y) {
1627 ; CHECK-LABEL: fmax_v2f64_acc_nofast:
1628 ; CHECK: @ %bb.0: @ %entry
1629 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
1630 ; CHECK-NEXT: vcmp.f64 d2, d0
1631 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
1632 ; CHECK-NEXT: vselgt.f64 d0, d2, d0
1635 %z = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
1636 %c = fcmp ogt double %y, %z
1637 %r = select i1 %c, double %y, double %z
1641 define arm_aapcs_vfpcc double @fmax_v4f64_acc_nofast(<4 x double> %x, double %y) {
1642 ; CHECK-LABEL: fmax_v4f64_acc_nofast:
1643 ; CHECK: @ %bb.0: @ %entry
1644 ; CHECK-NEXT: vmaxnm.f64 d1, d1, d3
1645 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d2
1646 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
1647 ; CHECK-NEXT: vcmp.f64 d4, d0
1648 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
1649 ; CHECK-NEXT: vselgt.f64 d0, d4, d0
1652 %z = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
1653 %c = fcmp ogt double %y, %z
1654 %r = select i1 %c, double %y, double %z
1658 declare double @llvm.vector.reduce.fmax.v1f64(<1 x double>)
1659 declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
1660 declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
1661 declare double @llvm.vector.reduce.fmin.v1f64(<1 x double>)
1662 declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
1663 declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
1664 declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
1665 declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
1666 declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
1667 declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
1668 declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
1669 declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
1670 declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>)
1671 declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>)
1672 declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
1673 declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>)
1674 declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>)
1675 declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>)
1676 declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
1677 declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>)