1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp,+fp64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
3 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16,+fp64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP
5 define arm_aapcs_vfpcc float @fmin_v2f32(<2 x float> %x) {
6 ; CHECK-LABEL: fmin_v2f32:
7 ; CHECK: @ %bb.0: @ %entry
8 ; CHECK-NEXT: vminnm.f32 s0, s0, s1
11 %z = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
15 define arm_aapcs_vfpcc float @fmin_v4f32(<4 x float> %x) {
16 ; CHECK-FP-LABEL: fmin_v4f32:
17 ; CHECK-FP: @ %bb.0: @ %entry
18 ; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
19 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
20 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
21 ; CHECK-FP-NEXT: bx lr
23 ; CHECK-NOFP-LABEL: fmin_v4f32:
24 ; CHECK-NOFP: @ %bb.0: @ %entry
25 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s1
26 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
27 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s3
28 ; CHECK-NOFP-NEXT: bx lr
30 %z = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
34 ; FIXME fminnum (vector) -> fminnum (scalar) ?
35 define arm_aapcs_vfpcc float @fmin_v8f32(<8 x float> %x) {
36 ; CHECK-FP-LABEL: fmin_v8f32:
37 ; CHECK-FP: @ %bb.0: @ %entry
38 ; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1
39 ; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
40 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
41 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
42 ; CHECK-FP-NEXT: bx lr
44 ; CHECK-NOFP-LABEL: fmin_v8f32:
45 ; CHECK-NOFP: @ %bb.0: @ %entry
46 ; CHECK-NOFP-NEXT: vcmp.f32 s5, s1
47 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
48 ; CHECK-NOFP-NEXT: vcmp.f32 s4, s0
49 ; CHECK-NOFP-NEXT: vselgt.f32 s8, s1, s5
50 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
51 ; CHECK-NOFP-NEXT: vcmp.f32 s6, s2
52 ; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s4
53 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
54 ; CHECK-NOFP-NEXT: vcmp.f32 s7, s3
55 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s8
56 ; CHECK-NOFP-NEXT: vselgt.f32 s2, s2, s6
57 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
58 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
59 ; CHECK-NOFP-NEXT: vselgt.f32 s4, s3, s7
60 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s4
61 ; CHECK-NOFP-NEXT: bx lr
63 %z = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
67 define arm_aapcs_vfpcc half @fmin_v4f16(<4 x half> %x) {
68 ; CHECK-FP-LABEL: fmin_v4f16:
69 ; CHECK-FP: @ %bb.0: @ %entry
70 ; CHECK-FP-NEXT: vmovx.f16 s2, s1
71 ; CHECK-FP-NEXT: vmovx.f16 s4, s0
72 ; CHECK-FP-NEXT: vminnm.f16 s2, s1, s2
73 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4
74 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
75 ; CHECK-FP-NEXT: bx lr
77 ; CHECK-NOFP-LABEL: fmin_v4f16:
78 ; CHECK-NOFP: @ %bb.0: @ %entry
79 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s0
80 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
81 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
82 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s1
83 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
84 ; CHECK-NOFP-NEXT: bx lr
86 %z = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
90 define arm_aapcs_vfpcc half @fmin_v8f16(<8 x half> %x) {
91 ; CHECK-FP-LABEL: fmin_v8f16:
92 ; CHECK-FP: @ %bb.0: @ %entry
93 ; CHECK-FP-NEXT: vrev32.16 q1, q0
94 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
95 ; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
96 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
97 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
98 ; CHECK-FP-NEXT: bx lr
100 ; CHECK-NOFP-LABEL: fmin_v8f16:
101 ; CHECK-NOFP: @ %bb.0: @ %entry
102 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s0
103 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
104 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s1
105 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s1
106 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
107 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
108 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
109 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
110 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
111 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s3
112 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
113 ; CHECK-NOFP-NEXT: bx lr
115 %z = call fast half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
119 define arm_aapcs_vfpcc half @fmin_v16f16(<16 x half> %x) {
120 ; CHECK-FP-LABEL: fmin_v16f16:
121 ; CHECK-FP: @ %bb.0: @ %entry
122 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
123 ; CHECK-FP-NEXT: vrev32.16 q1, q0
124 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
125 ; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
126 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
127 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
128 ; CHECK-FP-NEXT: bx lr
130 ; CHECK-NOFP-LABEL: fmin_v16f16:
131 ; CHECK-NOFP: @ %bb.0: @ %entry
132 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
133 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s0
134 ; CHECK-NOFP-NEXT: vcmp.f16 s8, s10
135 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
136 ; CHECK-NOFP-NEXT: vcmp.f16 s4, s0
137 ; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8
138 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
139 ; CHECK-NOFP-NEXT: vcmp.f16 s5, s1
140 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4
141 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
142 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s8
143 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s1
144 ; CHECK-NOFP-NEXT: vselgt.f16 s4, s1, s5
145 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
146 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
147 ; CHECK-NOFP-NEXT: vcmp.f16 s4, s8
148 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
149 ; CHECK-NOFP-NEXT: vcmp.f16 s6, s2
150 ; CHECK-NOFP-NEXT: vselgt.f16 s4, s8, s4
151 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
152 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
153 ; CHECK-NOFP-NEXT: vselgt.f16 s4, s2, s6
154 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
155 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
156 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
157 ; CHECK-NOFP-NEXT: vcmp.f16 s4, s2
158 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
159 ; CHECK-NOFP-NEXT: vcmp.f16 s7, s3
160 ; CHECK-NOFP-NEXT: vselgt.f16 s2, s2, s4
161 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
162 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
163 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
164 ; CHECK-NOFP-NEXT: vselgt.f16 s2, s3, s7
165 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
166 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
167 ; CHECK-NOFP-NEXT: vcmp.f16 s2, s4
168 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
169 ; CHECK-NOFP-NEXT: vselgt.f16 s2, s4, s2
170 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
171 ; CHECK-NOFP-NEXT: bx lr
173 %z = call fast half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
177 define arm_aapcs_vfpcc double @fmin_v1f64(<1 x double> %x) {
178 ; CHECK-LABEL: fmin_v1f64:
179 ; CHECK: @ %bb.0: @ %entry
182 %z = call fast double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
186 define arm_aapcs_vfpcc double @fmin_v2f64(<2 x double> %x) {
187 ; CHECK-LABEL: fmin_v2f64:
188 ; CHECK: @ %bb.0: @ %entry
189 ; CHECK-NEXT: vminnm.f64 d0, d0, d1
192 %z = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
196 define arm_aapcs_vfpcc double @fmin_v4f64(<4 x double> %x) {
197 ; CHECK-LABEL: fmin_v4f64:
198 ; CHECK: @ %bb.0: @ %entry
199 ; CHECK-NEXT: vcmp.f64 d3, d1
200 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
201 ; CHECK-NEXT: vcmp.f64 d2, d0
202 ; CHECK-NEXT: vselgt.f64 d1, d1, d3
203 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
204 ; CHECK-NEXT: vselgt.f64 d0, d0, d2
205 ; CHECK-NEXT: vminnm.f64 d0, d0, d1
208 %z = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
212 define arm_aapcs_vfpcc float @fmin_v2f32_nofast(<2 x float> %x) {
213 ; CHECK-LABEL: fmin_v2f32_nofast:
214 ; CHECK: @ %bb.0: @ %entry
215 ; CHECK-NEXT: vminnm.f32 s0, s0, s1
218 %z = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
222 define arm_aapcs_vfpcc float @fmin_v4f32_nofast(<4 x float> %x) {
223 ; CHECK-FP-LABEL: fmin_v4f32_nofast:
224 ; CHECK-FP: @ %bb.0: @ %entry
225 ; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
226 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
227 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
228 ; CHECK-FP-NEXT: bx lr
230 ; CHECK-NOFP-LABEL: fmin_v4f32_nofast:
231 ; CHECK-NOFP: @ %bb.0: @ %entry
232 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s1
233 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
234 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s3
235 ; CHECK-NOFP-NEXT: bx lr
237 %z = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
241 define arm_aapcs_vfpcc float @fmin_v8f32_nofast(<8 x float> %x) {
242 ; CHECK-FP-LABEL: fmin_v8f32_nofast:
243 ; CHECK-FP: @ %bb.0: @ %entry
244 ; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1
245 ; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
246 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
247 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
248 ; CHECK-FP-NEXT: bx lr
250 ; CHECK-NOFP-LABEL: fmin_v8f32_nofast:
251 ; CHECK-NOFP: @ %bb.0: @ %entry
252 ; CHECK-NOFP-NEXT: vminnm.f32 s8, s1, s5
253 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s4
254 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s8
255 ; CHECK-NOFP-NEXT: vminnm.f32 s2, s2, s6
256 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
257 ; CHECK-NOFP-NEXT: vminnm.f32 s2, s3, s7
258 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
259 ; CHECK-NOFP-NEXT: bx lr
261 %z = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
265 define arm_aapcs_vfpcc half @fmin_v4f16_nofast(<4 x half> %x) {
266 ; CHECK-FP-LABEL: fmin_v4f16_nofast:
267 ; CHECK-FP: @ %bb.0: @ %entry
268 ; CHECK-FP-NEXT: vmovx.f16 s2, s1
269 ; CHECK-FP-NEXT: vmovx.f16 s4, s0
270 ; CHECK-FP-NEXT: vminnm.f16 s2, s1, s2
271 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s4
272 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
273 ; CHECK-FP-NEXT: bx lr
275 ; CHECK-NOFP-LABEL: fmin_v4f16_nofast:
276 ; CHECK-NOFP: @ %bb.0: @ %entry
277 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s0
278 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
279 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
280 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s1
281 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
282 ; CHECK-NOFP-NEXT: bx lr
284 %z = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
288 define arm_aapcs_vfpcc half @fmin_v8f16_nofast(<8 x half> %x) {
289 ; CHECK-FP-LABEL: fmin_v8f16_nofast:
290 ; CHECK-FP: @ %bb.0: @ %entry
291 ; CHECK-FP-NEXT: vrev32.16 q1, q0
292 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
293 ; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
294 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
295 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
296 ; CHECK-FP-NEXT: bx lr
298 ; CHECK-NOFP-LABEL: fmin_v8f16_nofast:
299 ; CHECK-NOFP: @ %bb.0: @ %entry
300 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s0
301 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
302 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s1
303 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s1
304 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
305 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
306 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
307 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
308 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
309 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s3
310 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
311 ; CHECK-NOFP-NEXT: bx lr
313 %z = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
317 define arm_aapcs_vfpcc half @fmin_v16f16_nofast(<16 x half> %x) {
318 ; CHECK-FP-LABEL: fmin_v16f16_nofast:
319 ; CHECK-FP: @ %bb.0: @ %entry
320 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
321 ; CHECK-FP-NEXT: vrev32.16 q1, q0
322 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
323 ; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
324 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
325 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
326 ; CHECK-FP-NEXT: bx lr
328 ; CHECK-NOFP-LABEL: fmin_v16f16_nofast:
329 ; CHECK-NOFP: @ %bb.0: @ %entry
330 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s0
331 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
332 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
333 ; CHECK-NOFP-NEXT: vminnm.f16 s8, s10, s8
334 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s8
335 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s1, s5
336 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
337 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
338 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s1
339 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s8, s4
340 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
341 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s2, s6
342 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
343 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
344 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
345 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s2, s4
346 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
347 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
348 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s3, s7
349 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
350 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
351 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s4, s2
352 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
353 ; CHECK-NOFP-NEXT: bx lr
355 %z = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
359 define arm_aapcs_vfpcc double @fmin_v1f64_nofast(<1 x double> %x) {
360 ; CHECK-LABEL: fmin_v1f64_nofast:
361 ; CHECK: @ %bb.0: @ %entry
364 %z = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
368 define arm_aapcs_vfpcc double @fmin_v2f64_nofast(<2 x double> %x) {
369 ; CHECK-LABEL: fmin_v2f64_nofast:
370 ; CHECK: @ %bb.0: @ %entry
371 ; CHECK-NEXT: vminnm.f64 d0, d0, d1
374 %z = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
378 define arm_aapcs_vfpcc double @fmin_v4f64_nofast(<4 x double> %x) {
379 ; CHECK-LABEL: fmin_v4f64_nofast:
380 ; CHECK: @ %bb.0: @ %entry
381 ; CHECK-NEXT: vminnm.f64 d1, d1, d3
382 ; CHECK-NEXT: vminnm.f64 d0, d0, d2
383 ; CHECK-NEXT: vminnm.f64 d0, d0, d1
386 %z = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
390 define arm_aapcs_vfpcc float @fmin_v2f32_acc(<2 x float> %x, float %y) {
391 ; CHECK-LABEL: fmin_v2f32_acc:
392 ; CHECK: @ %bb.0: @ %entry
393 ; CHECK-NEXT: vminnm.f32 s0, s0, s1
394 ; CHECK-NEXT: vminnm.f32 s0, s4, s0
397 %z = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
398 %c = fcmp fast olt float %y, %z
399 %r = select i1 %c, float %y, float %z
403 define arm_aapcs_vfpcc float @fmin_v4f32_acc(<4 x float> %x, float %y) {
404 ; CHECK-FP-LABEL: fmin_v4f32_acc:
405 ; CHECK-FP: @ %bb.0: @ %entry
406 ; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
407 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
408 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
409 ; CHECK-FP-NEXT: vminnm.f32 s0, s4, s0
410 ; CHECK-FP-NEXT: bx lr
412 ; CHECK-NOFP-LABEL: fmin_v4f32_acc:
413 ; CHECK-NOFP: @ %bb.0: @ %entry
414 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s1
415 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
416 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s3
417 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s4, s0
418 ; CHECK-NOFP-NEXT: bx lr
420 %z = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
421 %c = fcmp fast olt float %y, %z
422 %r = select i1 %c, float %y, float %z
426 define arm_aapcs_vfpcc float @fmin_v8f32_acc(<8 x float> %x, float %y) {
427 ; CHECK-FP-LABEL: fmin_v8f32_acc:
428 ; CHECK-FP: @ %bb.0: @ %entry
429 ; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1
430 ; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
431 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
432 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
433 ; CHECK-FP-NEXT: vminnm.f32 s0, s8, s0
434 ; CHECK-FP-NEXT: bx lr
436 ; CHECK-NOFP-LABEL: fmin_v8f32_acc:
437 ; CHECK-NOFP: @ %bb.0: @ %entry
438 ; CHECK-NOFP-NEXT: vcmp.f32 s5, s1
439 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
440 ; CHECK-NOFP-NEXT: vcmp.f32 s4, s0
441 ; CHECK-NOFP-NEXT: vselgt.f32 s10, s1, s5
442 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
443 ; CHECK-NOFP-NEXT: vcmp.f32 s6, s2
444 ; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s4
445 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
446 ; CHECK-NOFP-NEXT: vcmp.f32 s7, s3
447 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s10
448 ; CHECK-NOFP-NEXT: vselgt.f32 s2, s2, s6
449 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
450 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
451 ; CHECK-NOFP-NEXT: vselgt.f32 s4, s3, s7
452 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s4
453 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s8, s0
454 ; CHECK-NOFP-NEXT: bx lr
456 %z = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
457 %c = fcmp fast olt float %y, %z
458 %r = select i1 %c, float %y, float %z
462 define arm_aapcs_vfpcc half @fmin_v4f16_acc(<4 x half> %x, half %y) {
463 ; CHECK-FP-LABEL: fmin_v4f16_acc:
464 ; CHECK-FP: @ %bb.0: @ %entry
465 ; CHECK-FP-NEXT: vmovx.f16 s2, s1
466 ; CHECK-FP-NEXT: vmovx.f16 s6, s0
467 ; CHECK-FP-NEXT: vminnm.f16 s2, s1, s2
468 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s6
469 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
470 ; CHECK-FP-NEXT: vminnm.f16 s0, s4, s0
471 ; CHECK-FP-NEXT: bx lr
473 ; CHECK-NOFP-LABEL: fmin_v4f16_acc:
474 ; CHECK-NOFP: @ %bb.0: @ %entry
475 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s0
476 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
477 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
478 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s1
479 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
480 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0
481 ; CHECK-NOFP-NEXT: bx lr
483 %z = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
484 %c = fcmp fast olt half %y, %z
485 %r = select i1 %c, half %y, half %z
489 define arm_aapcs_vfpcc half @fmin_v2f16_acc(<2 x half> %x, half %y) {
490 ; CHECK-LABEL: fmin_v2f16_acc:
491 ; CHECK: @ %bb.0: @ %entry
492 ; CHECK-NEXT: vmovx.f16 s2, s0
493 ; CHECK-NEXT: vminnm.f16 s0, s0, s2
494 ; CHECK-NEXT: vminnm.f16 s0, s4, s0
497 %z = call fast half @llvm.vector.reduce.fmin.v2f16(<2 x half> %x)
498 %c = fcmp fast olt half %y, %z
499 %r = select i1 %c, half %y, half %z
503 define arm_aapcs_vfpcc half @fmin_v8f16_acc(<8 x half> %x, half %y) {
504 ; CHECK-FP-LABEL: fmin_v8f16_acc:
505 ; CHECK-FP: @ %bb.0: @ %entry
506 ; CHECK-FP-NEXT: vrev32.16 q2, q0
507 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q2
508 ; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
509 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
510 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
511 ; CHECK-FP-NEXT: vminnm.f16 s0, s4, s0
512 ; CHECK-FP-NEXT: bx lr
514 ; CHECK-NOFP-LABEL: fmin_v8f16_acc:
515 ; CHECK-NOFP: @ %bb.0: @ %entry
516 ; CHECK-NOFP-NEXT: vmovx.f16 s6, s0
517 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s6
518 ; CHECK-NOFP-NEXT: vmovx.f16 s6, s1
519 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s1
520 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s6
521 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
522 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
523 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
524 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
525 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s3
526 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
527 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s4, s0
528 ; CHECK-NOFP-NEXT: bx lr
530 %z = call fast half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
531 %c = fcmp fast olt half %y, %z
532 %r = select i1 %c, half %y, half %z
536 define arm_aapcs_vfpcc half @fmin_v16f16_acc(<16 x half> %x, half %y) {
537 ; CHECK-FP-LABEL: fmin_v16f16_acc:
538 ; CHECK-FP: @ %bb.0: @ %entry
539 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
540 ; CHECK-FP-NEXT: vrev32.16 q1, q0
541 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
542 ; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
543 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
544 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
545 ; CHECK-FP-NEXT: vminnm.f16 s0, s8, s0
546 ; CHECK-FP-NEXT: bx lr
548 ; CHECK-NOFP-LABEL: fmin_v16f16_acc:
549 ; CHECK-NOFP: @ %bb.0: @ %entry
550 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s4
551 ; CHECK-NOFP-NEXT: vmovx.f16 s12, s0
552 ; CHECK-NOFP-NEXT: vcmp.f16 s10, s12
553 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
554 ; CHECK-NOFP-NEXT: vcmp.f16 s4, s0
555 ; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10
556 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
557 ; CHECK-NOFP-NEXT: vcmp.f16 s5, s1
558 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4
559 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
560 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s10
561 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s1
562 ; CHECK-NOFP-NEXT: vselgt.f16 s4, s1, s5
563 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
564 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
565 ; CHECK-NOFP-NEXT: vcmp.f16 s4, s10
566 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
567 ; CHECK-NOFP-NEXT: vcmp.f16 s6, s2
568 ; CHECK-NOFP-NEXT: vselgt.f16 s4, s10, s4
569 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
570 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
571 ; CHECK-NOFP-NEXT: vselgt.f16 s4, s2, s6
572 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
573 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
574 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
575 ; CHECK-NOFP-NEXT: vcmp.f16 s4, s2
576 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
577 ; CHECK-NOFP-NEXT: vcmp.f16 s7, s3
578 ; CHECK-NOFP-NEXT: vselgt.f16 s2, s2, s4
579 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
580 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
581 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
582 ; CHECK-NOFP-NEXT: vselgt.f16 s2, s3, s7
583 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
584 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
585 ; CHECK-NOFP-NEXT: vcmp.f16 s2, s4
586 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
587 ; CHECK-NOFP-NEXT: vselgt.f16 s2, s4, s2
588 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
589 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s8, s0
590 ; CHECK-NOFP-NEXT: bx lr
592 %z = call fast half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
593 %c = fcmp fast olt half %y, %z
594 %r = select i1 %c, half %y, half %z
598 define arm_aapcs_vfpcc double @fmin_v1f64_acc(<1 x double> %x, double %y) {
599 ; CHECK-LABEL: fmin_v1f64_acc:
600 ; CHECK: @ %bb.0: @ %entry
601 ; CHECK-NEXT: vminnm.f64 d0, d1, d0
604 %z = call fast double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
605 %c = fcmp fast olt double %y, %z
606 %r = select i1 %c, double %y, double %z
610 define arm_aapcs_vfpcc double @fmin_v2f64_acc(<2 x double> %x, double %y) {
611 ; CHECK-LABEL: fmin_v2f64_acc:
612 ; CHECK: @ %bb.0: @ %entry
613 ; CHECK-NEXT: vminnm.f64 d0, d0, d1
614 ; CHECK-NEXT: vminnm.f64 d0, d2, d0
617 %z = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
618 %c = fcmp fast olt double %y, %z
619 %r = select i1 %c, double %y, double %z
623 define arm_aapcs_vfpcc double @fmin_v4f64_acc(<4 x double> %x, double %y) {
624 ; CHECK-LABEL: fmin_v4f64_acc:
625 ; CHECK: @ %bb.0: @ %entry
626 ; CHECK-NEXT: vcmp.f64 d3, d1
627 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
628 ; CHECK-NEXT: vcmp.f64 d2, d0
629 ; CHECK-NEXT: vselgt.f64 d1, d1, d3
630 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
631 ; CHECK-NEXT: vselgt.f64 d0, d0, d2
632 ; CHECK-NEXT: vminnm.f64 d0, d0, d1
633 ; CHECK-NEXT: vminnm.f64 d0, d4, d0
636 %z = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
637 %c = fcmp fast olt double %y, %z
638 %r = select i1 %c, double %y, double %z
642 define arm_aapcs_vfpcc float @fmin_v2f32_acc_nofast(<2 x float> %x, float %y) {
643 ; CHECK-LABEL: fmin_v2f32_acc_nofast:
644 ; CHECK: @ %bb.0: @ %entry
645 ; CHECK-NEXT: vminnm.f32 s0, s0, s1
646 ; CHECK-NEXT: vcmp.f32 s0, s4
647 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
648 ; CHECK-NEXT: vselgt.f32 s0, s4, s0
651 %z = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %x)
652 %c = fcmp olt float %y, %z
653 %r = select i1 %c, float %y, float %z
657 define arm_aapcs_vfpcc float @fmin_v4f32_acc_nofast(<4 x float> %x, float %y) {
658 ; CHECK-FP-LABEL: fmin_v4f32_acc_nofast:
659 ; CHECK-FP: @ %bb.0: @ %entry
660 ; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
661 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
662 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
663 ; CHECK-FP-NEXT: vcmp.f32 s0, s4
664 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
665 ; CHECK-FP-NEXT: vselgt.f32 s0, s4, s0
666 ; CHECK-FP-NEXT: bx lr
668 ; CHECK-NOFP-LABEL: fmin_v4f32_acc_nofast:
669 ; CHECK-NOFP: @ %bb.0: @ %entry
670 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s1
671 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
672 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s3
673 ; CHECK-NOFP-NEXT: vcmp.f32 s0, s4
674 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
675 ; CHECK-NOFP-NEXT: vselgt.f32 s0, s4, s0
676 ; CHECK-NOFP-NEXT: bx lr
678 %z = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %x)
679 %c = fcmp olt float %y, %z
680 %r = select i1 %c, float %y, float %z
684 define arm_aapcs_vfpcc float @fmin_v8f32_acc_nofast(<8 x float> %x, float %y) {
685 ; CHECK-FP-LABEL: fmin_v8f32_acc_nofast:
686 ; CHECK-FP: @ %bb.0: @ %entry
687 ; CHECK-FP-NEXT: vminnm.f32 q0, q0, q1
688 ; CHECK-FP-NEXT: vminnm.f32 s2, s2, s3
689 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s1
690 ; CHECK-FP-NEXT: vminnm.f32 s0, s0, s2
691 ; CHECK-FP-NEXT: vcmp.f32 s0, s8
692 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
693 ; CHECK-FP-NEXT: vselgt.f32 s0, s8, s0
694 ; CHECK-FP-NEXT: bx lr
696 ; CHECK-NOFP-LABEL: fmin_v8f32_acc_nofast:
697 ; CHECK-NOFP: @ %bb.0: @ %entry
698 ; CHECK-NOFP-NEXT: vminnm.f32 s10, s1, s5
699 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s4
700 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s10
701 ; CHECK-NOFP-NEXT: vminnm.f32 s2, s2, s6
702 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
703 ; CHECK-NOFP-NEXT: vminnm.f32 s2, s3, s7
704 ; CHECK-NOFP-NEXT: vminnm.f32 s0, s0, s2
705 ; CHECK-NOFP-NEXT: vcmp.f32 s0, s8
706 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
707 ; CHECK-NOFP-NEXT: vselgt.f32 s0, s8, s0
708 ; CHECK-NOFP-NEXT: bx lr
710 %z = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %x)
711 %c = fcmp olt float %y, %z
712 %r = select i1 %c, float %y, float %z
716 define arm_aapcs_vfpcc half @fmin_v4f16_acc_nofast(<4 x half> %x, half %y) {
717 ; CHECK-FP-LABEL: fmin_v4f16_acc_nofast:
718 ; CHECK-FP: @ %bb.0: @ %entry
719 ; CHECK-FP-NEXT: vmovx.f16 s2, s1
720 ; CHECK-FP-NEXT: vmovx.f16 s6, s0
721 ; CHECK-FP-NEXT: vminnm.f16 s2, s1, s2
722 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s6
723 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
724 ; CHECK-FP-NEXT: vcmp.f16 s0, s4
725 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
726 ; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0
727 ; CHECK-FP-NEXT: bx lr
729 ; CHECK-NOFP-LABEL: fmin_v4f16_acc_nofast:
730 ; CHECK-NOFP: @ %bb.0: @ %entry
731 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s0
732 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
733 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
734 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s1
735 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
736 ; CHECK-NOFP-NEXT: vcmp.f16 s0, s4
737 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
738 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s4, s0
739 ; CHECK-NOFP-NEXT: bx lr
741 %z = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %x)
742 %c = fcmp olt half %y, %z
743 %r = select i1 %c, half %y, half %z
747 define arm_aapcs_vfpcc half @fmin_v8f16_acc_nofast(<8 x half> %x, half %y) {
748 ; CHECK-FP-LABEL: fmin_v8f16_acc_nofast:
749 ; CHECK-FP: @ %bb.0: @ %entry
750 ; CHECK-FP-NEXT: vrev32.16 q2, q0
751 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q2
752 ; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
753 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
754 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
755 ; CHECK-FP-NEXT: vcmp.f16 s0, s4
756 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
757 ; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0
758 ; CHECK-FP-NEXT: bx lr
760 ; CHECK-NOFP-LABEL: fmin_v8f16_acc_nofast:
761 ; CHECK-NOFP: @ %bb.0: @ %entry
762 ; CHECK-NOFP-NEXT: vmovx.f16 s6, s0
763 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s6
764 ; CHECK-NOFP-NEXT: vmovx.f16 s6, s1
765 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s1
766 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s6
767 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
768 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
769 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
770 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
771 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s3
772 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
773 ; CHECK-NOFP-NEXT: vcmp.f16 s0, s4
774 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
775 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s4, s0
776 ; CHECK-NOFP-NEXT: bx lr
778 %z = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %x)
779 %c = fcmp olt half %y, %z
780 %r = select i1 %c, half %y, half %z
784 define arm_aapcs_vfpcc half @fmin_v16f16_acc_nofast(<16 x half> %x, half %y) {
785 ; CHECK-FP-LABEL: fmin_v16f16_acc_nofast:
786 ; CHECK-FP: @ %bb.0: @ %entry
787 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
788 ; CHECK-FP-NEXT: vrev32.16 q1, q0
789 ; CHECK-FP-NEXT: vminnm.f16 q0, q0, q1
790 ; CHECK-FP-NEXT: vminnm.f16 s2, s2, s3
791 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s1
792 ; CHECK-FP-NEXT: vminnm.f16 s0, s0, s2
793 ; CHECK-FP-NEXT: vcmp.f16 s0, s8
794 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
795 ; CHECK-FP-NEXT: vselgt.f16 s0, s8, s0
796 ; CHECK-FP-NEXT: bx lr
798 ; CHECK-NOFP-LABEL: fmin_v16f16_acc_nofast:
799 ; CHECK-NOFP: @ %bb.0: @ %entry
800 ; CHECK-NOFP-NEXT: vmovx.f16 s12, s0
801 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s4
802 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
803 ; CHECK-NOFP-NEXT: vminnm.f16 s10, s12, s10
804 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s10
805 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s1, s5
806 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
807 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
808 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s1
809 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s10, s4
810 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
811 ; CHECK-NOFP-NEXT: vminnm.f16 s4, s2, s6
812 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s4
813 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
814 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
815 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s2, s4
816 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
817 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
818 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s3, s7
819 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
820 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
821 ; CHECK-NOFP-NEXT: vminnm.f16 s2, s4, s2
822 ; CHECK-NOFP-NEXT: vminnm.f16 s0, s0, s2
823 ; CHECK-NOFP-NEXT: vcmp.f16 s0, s8
824 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
825 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s8, s0
826 ; CHECK-NOFP-NEXT: bx lr
828 %z = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %x)
829 %c = fcmp olt half %y, %z
830 %r = select i1 %c, half %y, half %z
834 define arm_aapcs_vfpcc double @fmin_v1f64_acc_nofast(<1 x double> %x, double %y) {
835 ; CHECK-LABEL: fmin_v1f64_acc_nofast:
836 ; CHECK: @ %bb.0: @ %entry
837 ; CHECK-NEXT: vcmp.f64 d0, d1
838 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
839 ; CHECK-NEXT: vselgt.f64 d0, d1, d0
842 %z = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %x)
843 %c = fcmp olt double %y, %z
844 %r = select i1 %c, double %y, double %z
848 define arm_aapcs_vfpcc double @fmin_v2f64_acc_nofast(<2 x double> %x, double %y) {
849 ; CHECK-LABEL: fmin_v2f64_acc_nofast:
850 ; CHECK: @ %bb.0: @ %entry
851 ; CHECK-NEXT: vminnm.f64 d0, d0, d1
852 ; CHECK-NEXT: vcmp.f64 d0, d2
853 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
854 ; CHECK-NEXT: vselgt.f64 d0, d2, d0
857 %z = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %x)
858 %c = fcmp olt double %y, %z
859 %r = select i1 %c, double %y, double %z
863 define arm_aapcs_vfpcc double @fmin_v4f64_acc_nofast(<4 x double> %x, double %y) {
864 ; CHECK-LABEL: fmin_v4f64_acc_nofast:
865 ; CHECK: @ %bb.0: @ %entry
866 ; CHECK-NEXT: vminnm.f64 d1, d1, d3
867 ; CHECK-NEXT: vminnm.f64 d0, d0, d2
868 ; CHECK-NEXT: vminnm.f64 d0, d0, d1
869 ; CHECK-NEXT: vcmp.f64 d0, d4
870 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
871 ; CHECK-NEXT: vselgt.f64 d0, d4, d0
874 %z = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x)
875 %c = fcmp olt double %y, %z
876 %r = select i1 %c, double %y, double %z
880 define arm_aapcs_vfpcc float @fmax_v2f32(<2 x float> %x) {
881 ; CHECK-LABEL: fmax_v2f32:
882 ; CHECK: @ %bb.0: @ %entry
883 ; CHECK-NEXT: vmaxnm.f32 s0, s0, s1
886 %z = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
890 define arm_aapcs_vfpcc float @fmax_v4f32(<4 x float> %x) {
891 ; CHECK-FP-LABEL: fmax_v4f32:
892 ; CHECK-FP: @ %bb.0: @ %entry
893 ; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
894 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
895 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
896 ; CHECK-FP-NEXT: bx lr
898 ; CHECK-NOFP-LABEL: fmax_v4f32:
899 ; CHECK-NOFP: @ %bb.0: @ %entry
900 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s1
901 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
902 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s3
903 ; CHECK-NOFP-NEXT: bx lr
905 %z = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
909 define arm_aapcs_vfpcc float @fmax_v8f32(<8 x float> %x) {
910 ; CHECK-FP-LABEL: fmax_v8f32:
911 ; CHECK-FP: @ %bb.0: @ %entry
912 ; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1
913 ; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
914 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
915 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
916 ; CHECK-FP-NEXT: bx lr
918 ; CHECK-NOFP-LABEL: fmax_v8f32:
919 ; CHECK-NOFP: @ %bb.0: @ %entry
920 ; CHECK-NOFP-NEXT: vcmp.f32 s1, s5
921 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
922 ; CHECK-NOFP-NEXT: vcmp.f32 s0, s4
923 ; CHECK-NOFP-NEXT: vselgt.f32 s8, s1, s5
924 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
925 ; CHECK-NOFP-NEXT: vcmp.f32 s2, s6
926 ; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s4
927 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
928 ; CHECK-NOFP-NEXT: vcmp.f32 s3, s7
929 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s8
930 ; CHECK-NOFP-NEXT: vselgt.f32 s2, s2, s6
931 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
932 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
933 ; CHECK-NOFP-NEXT: vselgt.f32 s4, s3, s7
934 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s4
935 ; CHECK-NOFP-NEXT: bx lr
937 %z = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
941 define arm_aapcs_vfpcc half @fmax_v4f16(<4 x half> %x) {
942 ; CHECK-FP-LABEL: fmax_v4f16:
943 ; CHECK-FP: @ %bb.0: @ %entry
944 ; CHECK-FP-NEXT: vmovx.f16 s2, s1
945 ; CHECK-FP-NEXT: vmovx.f16 s4, s0
946 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s1, s2
947 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4
948 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
949 ; CHECK-FP-NEXT: bx lr
951 ; CHECK-NOFP-LABEL: fmax_v4f16:
952 ; CHECK-NOFP: @ %bb.0: @ %entry
953 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s0
954 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
955 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
956 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s1
957 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
958 ; CHECK-NOFP-NEXT: bx lr
960 %z = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
964 define arm_aapcs_vfpcc half @fmax_v8f16(<8 x half> %x) {
965 ; CHECK-FP-LABEL: fmax_v8f16:
966 ; CHECK-FP: @ %bb.0: @ %entry
967 ; CHECK-FP-NEXT: vrev32.16 q1, q0
968 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
969 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
970 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
971 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
972 ; CHECK-FP-NEXT: bx lr
974 ; CHECK-NOFP-LABEL: fmax_v8f16:
975 ; CHECK-NOFP: @ %bb.0: @ %entry
976 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s0
977 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
978 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s1
979 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s1
980 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
981 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
982 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
983 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
984 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
985 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s3
986 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
987 ; CHECK-NOFP-NEXT: bx lr
989 %z = call fast half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
993 define arm_aapcs_vfpcc half @fmax_v16f16(<16 x half> %x) {
994 ; CHECK-FP-LABEL: fmax_v16f16:
995 ; CHECK-FP: @ %bb.0: @ %entry
996 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
997 ; CHECK-FP-NEXT: vrev32.16 q1, q0
998 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
999 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
1000 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
1001 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1002 ; CHECK-FP-NEXT: bx lr
1004 ; CHECK-NOFP-LABEL: fmax_v16f16:
1005 ; CHECK-NOFP: @ %bb.0: @ %entry
1006 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
1007 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s0
1008 ; CHECK-NOFP-NEXT: vcmp.f16 s10, s8
1009 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1010 ; CHECK-NOFP-NEXT: vcmp.f16 s0, s4
1011 ; CHECK-NOFP-NEXT: vselgt.f16 s8, s10, s8
1012 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1013 ; CHECK-NOFP-NEXT: vcmp.f16 s1, s5
1014 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4
1015 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1016 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s8
1017 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s1
1018 ; CHECK-NOFP-NEXT: vselgt.f16 s4, s1, s5
1019 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1020 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
1021 ; CHECK-NOFP-NEXT: vcmp.f16 s8, s4
1022 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1023 ; CHECK-NOFP-NEXT: vcmp.f16 s2, s6
1024 ; CHECK-NOFP-NEXT: vselgt.f16 s4, s8, s4
1025 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1026 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1027 ; CHECK-NOFP-NEXT: vselgt.f16 s4, s2, s6
1028 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
1029 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1030 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
1031 ; CHECK-NOFP-NEXT: vcmp.f16 s2, s4
1032 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1033 ; CHECK-NOFP-NEXT: vcmp.f16 s3, s7
1034 ; CHECK-NOFP-NEXT: vselgt.f16 s2, s2, s4
1035 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1036 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1037 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
1038 ; CHECK-NOFP-NEXT: vselgt.f16 s2, s3, s7
1039 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1040 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
1041 ; CHECK-NOFP-NEXT: vcmp.f16 s4, s2
1042 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1043 ; CHECK-NOFP-NEXT: vselgt.f16 s2, s4, s2
1044 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1045 ; CHECK-NOFP-NEXT: bx lr
1047 %z = call fast half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
1051 define arm_aapcs_vfpcc double @fmax_v1f64(<1 x double> %x) {
1052 ; CHECK-LABEL: fmax_v1f64:
1053 ; CHECK: @ %bb.0: @ %entry
1056 %z = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
1060 define arm_aapcs_vfpcc double @fmax_v2f64(<2 x double> %x) {
1061 ; CHECK-LABEL: fmax_v2f64:
1062 ; CHECK: @ %bb.0: @ %entry
1063 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
1066 %z = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
1070 define arm_aapcs_vfpcc double @fmax_v4f64(<4 x double> %x) {
1071 ; CHECK-LABEL: fmax_v4f64:
1072 ; CHECK: @ %bb.0: @ %entry
1073 ; CHECK-NEXT: vcmp.f64 d1, d3
1074 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
1075 ; CHECK-NEXT: vcmp.f64 d0, d2
1076 ; CHECK-NEXT: vselgt.f64 d1, d1, d3
1077 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
1078 ; CHECK-NEXT: vselgt.f64 d0, d0, d2
1079 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
1082 %z = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
1086 define arm_aapcs_vfpcc float @fmax_v2f32_nofast(<2 x float> %x) {
1087 ; CHECK-LABEL: fmax_v2f32_nofast:
1088 ; CHECK: @ %bb.0: @ %entry
1089 ; CHECK-NEXT: vmaxnm.f32 s0, s0, s1
1092 %z = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
1096 define arm_aapcs_vfpcc float @fmax_v4f32_nofast(<4 x float> %x) {
1097 ; CHECK-FP-LABEL: fmax_v4f32_nofast:
1098 ; CHECK-FP: @ %bb.0: @ %entry
1099 ; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
1100 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
1101 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
1102 ; CHECK-FP-NEXT: bx lr
1104 ; CHECK-NOFP-LABEL: fmax_v4f32_nofast:
1105 ; CHECK-NOFP: @ %bb.0: @ %entry
1106 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s1
1107 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
1108 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s3
1109 ; CHECK-NOFP-NEXT: bx lr
1111 %z = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
1115 define arm_aapcs_vfpcc float @fmax_v8f32_nofast(<8 x float> %x) {
1116 ; CHECK-FP-LABEL: fmax_v8f32_nofast:
1117 ; CHECK-FP: @ %bb.0: @ %entry
1118 ; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1
1119 ; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
1120 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
1121 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
1122 ; CHECK-FP-NEXT: bx lr
1124 ; CHECK-NOFP-LABEL: fmax_v8f32_nofast:
1125 ; CHECK-NOFP: @ %bb.0: @ %entry
1126 ; CHECK-NOFP-NEXT: vmaxnm.f32 s8, s1, s5
1127 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s4
1128 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s8
1129 ; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s2, s6
1130 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
1131 ; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s3, s7
1132 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
1133 ; CHECK-NOFP-NEXT: bx lr
1135 %z = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
1139 define arm_aapcs_vfpcc half @fmax_v4f16_nofast(<4 x half> %x) {
1140 ; CHECK-FP-LABEL: fmax_v4f16_nofast:
1141 ; CHECK-FP: @ %bb.0: @ %entry
1142 ; CHECK-FP-NEXT: vmovx.f16 s2, s1
1143 ; CHECK-FP-NEXT: vmovx.f16 s4, s0
1144 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s1, s2
1145 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s4
1146 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1147 ; CHECK-FP-NEXT: bx lr
1149 ; CHECK-NOFP-LABEL: fmax_v4f16_nofast:
1150 ; CHECK-NOFP: @ %bb.0: @ %entry
1151 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s0
1152 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1153 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
1154 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s1
1155 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1156 ; CHECK-NOFP-NEXT: bx lr
1158 %z = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
1162 define arm_aapcs_vfpcc half @fmax_v8f16_nofast(<8 x half> %x) {
1163 ; CHECK-FP-LABEL: fmax_v8f16_nofast:
1164 ; CHECK-FP: @ %bb.0: @ %entry
1165 ; CHECK-FP-NEXT: vrev32.16 q1, q0
1166 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
1167 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
1168 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
1169 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1170 ; CHECK-FP-NEXT: bx lr
1172 ; CHECK-NOFP-LABEL: fmax_v8f16_nofast:
1173 ; CHECK-NOFP: @ %bb.0: @ %entry
1174 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s0
1175 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1176 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s1
1177 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s1
1178 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1179 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1180 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
1181 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1182 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
1183 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s3
1184 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1185 ; CHECK-NOFP-NEXT: bx lr
1187 %z = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
1191 define arm_aapcs_vfpcc half @fmax_v16f16_nofast(<16 x half> %x) {
1192 ; CHECK-FP-LABEL: fmax_v16f16_nofast:
1193 ; CHECK-FP: @ %bb.0: @ %entry
1194 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
1195 ; CHECK-FP-NEXT: vrev32.16 q1, q0
1196 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
1197 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
1198 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
1199 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1200 ; CHECK-FP-NEXT: bx lr
1202 ; CHECK-NOFP-LABEL: fmax_v16f16_nofast:
1203 ; CHECK-NOFP: @ %bb.0: @ %entry
1204 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s0
1205 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s4
1206 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1207 ; CHECK-NOFP-NEXT: vmaxnm.f16 s8, s10, s8
1208 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s8
1209 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s1, s5
1210 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1211 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
1212 ; CHECK-NOFP-NEXT: vmovx.f16 s8, s1
1213 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s8, s4
1214 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1215 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s2, s6
1216 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1217 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
1218 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
1219 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s2, s4
1220 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
1221 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1222 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s3, s7
1223 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1224 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
1225 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s4, s2
1226 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1227 ; CHECK-NOFP-NEXT: bx lr
1229 %z = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
1233 define arm_aapcs_vfpcc double @fmax_v1f64_nofast(<1 x double> %x) {
1234 ; CHECK-LABEL: fmax_v1f64_nofast:
1235 ; CHECK: @ %bb.0: @ %entry
1238 %z = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
1242 define arm_aapcs_vfpcc double @fmax_v2f64_nofast(<2 x double> %x) {
1243 ; CHECK-LABEL: fmax_v2f64_nofast:
1244 ; CHECK: @ %bb.0: @ %entry
1245 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
1248 %z = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
1252 define arm_aapcs_vfpcc double @fmax_v4f64_nofast(<4 x double> %x) {
1253 ; CHECK-LABEL: fmax_v4f64_nofast:
1254 ; CHECK: @ %bb.0: @ %entry
1255 ; CHECK-NEXT: vmaxnm.f64 d1, d1, d3
1256 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d2
1257 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
1260 %z = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
1264 define arm_aapcs_vfpcc float @fmax_v2f32_acc(<2 x float> %x, float %y) {
1265 ; CHECK-LABEL: fmax_v2f32_acc:
1266 ; CHECK: @ %bb.0: @ %entry
1267 ; CHECK-NEXT: vmaxnm.f32 s0, s0, s1
1268 ; CHECK-NEXT: vmaxnm.f32 s0, s4, s0
1271 %z = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
1272 %c = fcmp fast ogt float %y, %z
1273 %r = select i1 %c, float %y, float %z
1277 define arm_aapcs_vfpcc float @fmax_v4f32_acc(<4 x float> %x, float %y) {
1278 ; CHECK-FP-LABEL: fmax_v4f32_acc:
1279 ; CHECK-FP: @ %bb.0: @ %entry
1280 ; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
1281 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
1282 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
1283 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s4, s0
1284 ; CHECK-FP-NEXT: bx lr
1286 ; CHECK-NOFP-LABEL: fmax_v4f32_acc:
1287 ; CHECK-NOFP: @ %bb.0: @ %entry
1288 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s1
1289 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
1290 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s3
1291 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s4, s0
1292 ; CHECK-NOFP-NEXT: bx lr
1294 %z = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
1295 %c = fcmp fast ogt float %y, %z
1296 %r = select i1 %c, float %y, float %z
1300 define arm_aapcs_vfpcc float @fmax_v8f32_acc(<8 x float> %x, float %y) {
1301 ; CHECK-FP-LABEL: fmax_v8f32_acc:
1302 ; CHECK-FP: @ %bb.0: @ %entry
1303 ; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1
1304 ; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
1305 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
1306 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
1307 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s8, s0
1308 ; CHECK-FP-NEXT: bx lr
1310 ; CHECK-NOFP-LABEL: fmax_v8f32_acc:
1311 ; CHECK-NOFP: @ %bb.0: @ %entry
1312 ; CHECK-NOFP-NEXT: vcmp.f32 s1, s5
1313 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1314 ; CHECK-NOFP-NEXT: vcmp.f32 s0, s4
1315 ; CHECK-NOFP-NEXT: vselgt.f32 s10, s1, s5
1316 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1317 ; CHECK-NOFP-NEXT: vcmp.f32 s2, s6
1318 ; CHECK-NOFP-NEXT: vselgt.f32 s0, s0, s4
1319 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1320 ; CHECK-NOFP-NEXT: vcmp.f32 s3, s7
1321 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s10
1322 ; CHECK-NOFP-NEXT: vselgt.f32 s2, s2, s6
1323 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1324 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
1325 ; CHECK-NOFP-NEXT: vselgt.f32 s4, s3, s7
1326 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s4
1327 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s8, s0
1328 ; CHECK-NOFP-NEXT: bx lr
1330 %z = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
1331 %c = fcmp fast ogt float %y, %z
1332 %r = select i1 %c, float %y, float %z
1336 define arm_aapcs_vfpcc half @fmax_v2f16_acc(<2 x half> %x, half %y) {
1337 ; CHECK-LABEL: fmax_v2f16_acc:
1338 ; CHECK: @ %bb.0: @ %entry
1339 ; CHECK-NEXT: vmovx.f16 s2, s0
1340 ; CHECK-NEXT: vmaxnm.f16 s0, s0, s2
1341 ; CHECK-NEXT: vmaxnm.f16 s0, s4, s0
1344 %z = call fast half @llvm.vector.reduce.fmax.v2f16(<2 x half> %x)
1345 %c = fcmp fast ogt half %y, %z
1346 %r = select i1 %c, half %y, half %z
1350 define arm_aapcs_vfpcc half @fmax_v4f16_acc(<4 x half> %x, half %y) {
1351 ; CHECK-FP-LABEL: fmax_v4f16_acc:
1352 ; CHECK-FP: @ %bb.0: @ %entry
1353 ; CHECK-FP-NEXT: vmovx.f16 s2, s1
1354 ; CHECK-FP-NEXT: vmovx.f16 s6, s0
1355 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s1, s2
1356 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s6
1357 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1358 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s4, s0
1359 ; CHECK-FP-NEXT: bx lr
1361 ; CHECK-NOFP-LABEL: fmax_v4f16_acc:
1362 ; CHECK-NOFP: @ %bb.0: @ %entry
1363 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s0
1364 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1365 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
1366 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s1
1367 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1368 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0
1369 ; CHECK-NOFP-NEXT: bx lr
1371 %z = call fast half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
1372 %c = fcmp fast ogt half %y, %z
1373 %r = select i1 %c, half %y, half %z
1377 define arm_aapcs_vfpcc half @fmax_v8f16_acc(<8 x half> %x, half %y) {
1378 ; CHECK-FP-LABEL: fmax_v8f16_acc:
1379 ; CHECK-FP: @ %bb.0: @ %entry
1380 ; CHECK-FP-NEXT: vrev32.16 q2, q0
1381 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q2
1382 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
1383 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
1384 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1385 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s4, s0
1386 ; CHECK-FP-NEXT: bx lr
1388 ; CHECK-NOFP-LABEL: fmax_v8f16_acc:
1389 ; CHECK-NOFP: @ %bb.0: @ %entry
1390 ; CHECK-NOFP-NEXT: vmovx.f16 s6, s0
1391 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s6
1392 ; CHECK-NOFP-NEXT: vmovx.f16 s6, s1
1393 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s1
1394 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s6
1395 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1396 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
1397 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1398 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
1399 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s3
1400 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1401 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s4, s0
1402 ; CHECK-NOFP-NEXT: bx lr
1404 %z = call fast half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
1405 %c = fcmp fast ogt half %y, %z
1406 %r = select i1 %c, half %y, half %z
1410 define arm_aapcs_vfpcc half @fmax_v16f16_acc(<16 x half> %x, half %y) {
1411 ; CHECK-FP-LABEL: fmax_v16f16_acc:
1412 ; CHECK-FP: @ %bb.0: @ %entry
1413 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
1414 ; CHECK-FP-NEXT: vrev32.16 q1, q0
1415 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
1416 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
1417 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
1418 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1419 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s8, s0
1420 ; CHECK-FP-NEXT: bx lr
1422 ; CHECK-NOFP-LABEL: fmax_v16f16_acc:
1423 ; CHECK-NOFP: @ %bb.0: @ %entry
1424 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s4
1425 ; CHECK-NOFP-NEXT: vmovx.f16 s12, s0
1426 ; CHECK-NOFP-NEXT: vcmp.f16 s12, s10
1427 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1428 ; CHECK-NOFP-NEXT: vcmp.f16 s0, s4
1429 ; CHECK-NOFP-NEXT: vselgt.f16 s10, s12, s10
1430 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1431 ; CHECK-NOFP-NEXT: vcmp.f16 s1, s5
1432 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s0, s4
1433 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1434 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s10
1435 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s1
1436 ; CHECK-NOFP-NEXT: vselgt.f16 s4, s1, s5
1437 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1438 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
1439 ; CHECK-NOFP-NEXT: vcmp.f16 s10, s4
1440 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1441 ; CHECK-NOFP-NEXT: vcmp.f16 s2, s6
1442 ; CHECK-NOFP-NEXT: vselgt.f16 s4, s10, s4
1443 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1444 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1445 ; CHECK-NOFP-NEXT: vselgt.f16 s4, s2, s6
1446 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
1447 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1448 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
1449 ; CHECK-NOFP-NEXT: vcmp.f16 s2, s4
1450 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1451 ; CHECK-NOFP-NEXT: vcmp.f16 s3, s7
1452 ; CHECK-NOFP-NEXT: vselgt.f16 s2, s2, s4
1453 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1454 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1455 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
1456 ; CHECK-NOFP-NEXT: vselgt.f16 s2, s3, s7
1457 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1458 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
1459 ; CHECK-NOFP-NEXT: vcmp.f16 s4, s2
1460 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1461 ; CHECK-NOFP-NEXT: vselgt.f16 s2, s4, s2
1462 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1463 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s8, s0
1464 ; CHECK-NOFP-NEXT: bx lr
1466 %z = call fast half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
1467 %c = fcmp fast ogt half %y, %z
1468 %r = select i1 %c, half %y, half %z
1472 define arm_aapcs_vfpcc double @fmax_v1f64_acc(<1 x double> %x, double %y) {
1473 ; CHECK-LABEL: fmax_v1f64_acc:
1474 ; CHECK: @ %bb.0: @ %entry
1475 ; CHECK-NEXT: vmaxnm.f64 d0, d1, d0
1478 %z = call fast double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
1479 %c = fcmp fast ogt double %y, %z
1480 %r = select i1 %c, double %y, double %z
1484 define arm_aapcs_vfpcc double @fmax_v2f64_acc(<2 x double> %x, double %y) {
1485 ; CHECK-LABEL: fmax_v2f64_acc:
1486 ; CHECK: @ %bb.0: @ %entry
1487 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
1488 ; CHECK-NEXT: vmaxnm.f64 d0, d2, d0
1491 %z = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
1492 %c = fcmp fast ogt double %y, %z
1493 %r = select i1 %c, double %y, double %z
1497 define arm_aapcs_vfpcc double @fmax_v4f64_acc(<4 x double> %x, double %y) {
1498 ; CHECK-LABEL: fmax_v4f64_acc:
1499 ; CHECK: @ %bb.0: @ %entry
1500 ; CHECK-NEXT: vcmp.f64 d1, d3
1501 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
1502 ; CHECK-NEXT: vcmp.f64 d0, d2
1503 ; CHECK-NEXT: vselgt.f64 d1, d1, d3
1504 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
1505 ; CHECK-NEXT: vselgt.f64 d0, d0, d2
1506 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
1507 ; CHECK-NEXT: vmaxnm.f64 d0, d4, d0
1510 %z = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
1511 %c = fcmp fast ogt double %y, %z
1512 %r = select i1 %c, double %y, double %z
1516 define arm_aapcs_vfpcc float @fmax_v2f32_acc_nofast(<2 x float> %x, float %y) {
1517 ; CHECK-LABEL: fmax_v2f32_acc_nofast:
1518 ; CHECK: @ %bb.0: @ %entry
1519 ; CHECK-NEXT: vmaxnm.f32 s0, s0, s1
1520 ; CHECK-NEXT: vcmp.f32 s4, s0
1521 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
1522 ; CHECK-NEXT: vselgt.f32 s0, s4, s0
1525 %z = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %x)
1526 %c = fcmp ogt float %y, %z
1527 %r = select i1 %c, float %y, float %z
1531 define arm_aapcs_vfpcc float @fmax_v4f32_acc_nofast(<4 x float> %x, float %y) {
1532 ; CHECK-FP-LABEL: fmax_v4f32_acc_nofast:
1533 ; CHECK-FP: @ %bb.0: @ %entry
1534 ; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
1535 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
1536 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
1537 ; CHECK-FP-NEXT: vcmp.f32 s4, s0
1538 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
1539 ; CHECK-FP-NEXT: vselgt.f32 s0, s4, s0
1540 ; CHECK-FP-NEXT: bx lr
1542 ; CHECK-NOFP-LABEL: fmax_v4f32_acc_nofast:
1543 ; CHECK-NOFP: @ %bb.0: @ %entry
1544 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s1
1545 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
1546 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s3
1547 ; CHECK-NOFP-NEXT: vcmp.f32 s4, s0
1548 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1549 ; CHECK-NOFP-NEXT: vselgt.f32 s0, s4, s0
1550 ; CHECK-NOFP-NEXT: bx lr
1552 %z = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %x)
1553 %c = fcmp ogt float %y, %z
1554 %r = select i1 %c, float %y, float %z
1558 define arm_aapcs_vfpcc float @fmax_v8f32_acc_nofast(<8 x float> %x, float %y) {
1559 ; CHECK-FP-LABEL: fmax_v8f32_acc_nofast:
1560 ; CHECK-FP: @ %bb.0: @ %entry
1561 ; CHECK-FP-NEXT: vmaxnm.f32 q0, q0, q1
1562 ; CHECK-FP-NEXT: vmaxnm.f32 s2, s2, s3
1563 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s1
1564 ; CHECK-FP-NEXT: vmaxnm.f32 s0, s0, s2
1565 ; CHECK-FP-NEXT: vcmp.f32 s8, s0
1566 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
1567 ; CHECK-FP-NEXT: vselgt.f32 s0, s8, s0
1568 ; CHECK-FP-NEXT: bx lr
1570 ; CHECK-NOFP-LABEL: fmax_v8f32_acc_nofast:
1571 ; CHECK-NOFP: @ %bb.0: @ %entry
1572 ; CHECK-NOFP-NEXT: vmaxnm.f32 s10, s1, s5
1573 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s4
1574 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s10
1575 ; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s2, s6
1576 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
1577 ; CHECK-NOFP-NEXT: vmaxnm.f32 s2, s3, s7
1578 ; CHECK-NOFP-NEXT: vmaxnm.f32 s0, s0, s2
1579 ; CHECK-NOFP-NEXT: vcmp.f32 s8, s0
1580 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1581 ; CHECK-NOFP-NEXT: vselgt.f32 s0, s8, s0
1582 ; CHECK-NOFP-NEXT: bx lr
1584 %z = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %x)
1585 %c = fcmp ogt float %y, %z
1586 %r = select i1 %c, float %y, float %z
1590 define arm_aapcs_vfpcc half @fmax_v4f16_acc_nofast(<4 x half> %x, half %y) {
1591 ; CHECK-FP-LABEL: fmax_v4f16_acc_nofast:
1592 ; CHECK-FP: @ %bb.0: @ %entry
1593 ; CHECK-FP-NEXT: vmovx.f16 s2, s1
1594 ; CHECK-FP-NEXT: vmovx.f16 s6, s0
1595 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s1, s2
1596 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s6
1597 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1598 ; CHECK-FP-NEXT: vcmp.f16 s4, s0
1599 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
1600 ; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0
1601 ; CHECK-FP-NEXT: bx lr
1603 ; CHECK-NOFP-LABEL: fmax_v4f16_acc_nofast:
1604 ; CHECK-NOFP: @ %bb.0: @ %entry
1605 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s0
1606 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1607 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s1
1608 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s1
1609 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1610 ; CHECK-NOFP-NEXT: vcmp.f16 s4, s0
1611 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1612 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s4, s0
1613 ; CHECK-NOFP-NEXT: bx lr
1615 %z = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %x)
1616 %c = fcmp ogt half %y, %z
1617 %r = select i1 %c, half %y, half %z
1621 define arm_aapcs_vfpcc half @fmax_v8f16_acc_nofast(<8 x half> %x, half %y) {
1622 ; CHECK-FP-LABEL: fmax_v8f16_acc_nofast:
1623 ; CHECK-FP: @ %bb.0: @ %entry
1624 ; CHECK-FP-NEXT: vrev32.16 q2, q0
1625 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q2
1626 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
1627 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
1628 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1629 ; CHECK-FP-NEXT: vcmp.f16 s4, s0
1630 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
1631 ; CHECK-FP-NEXT: vselgt.f16 s0, s4, s0
1632 ; CHECK-FP-NEXT: bx lr
1634 ; CHECK-NOFP-LABEL: fmax_v8f16_acc_nofast:
1635 ; CHECK-NOFP: @ %bb.0: @ %entry
1636 ; CHECK-NOFP-NEXT: vmovx.f16 s6, s0
1637 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s6
1638 ; CHECK-NOFP-NEXT: vmovx.f16 s6, s1
1639 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s1
1640 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s6
1641 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1642 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
1643 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1644 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s3
1645 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s3
1646 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1647 ; CHECK-NOFP-NEXT: vcmp.f16 s4, s0
1648 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1649 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s4, s0
1650 ; CHECK-NOFP-NEXT: bx lr
1652 %z = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %x)
1653 %c = fcmp ogt half %y, %z
1654 %r = select i1 %c, half %y, half %z
1658 define arm_aapcs_vfpcc half @fmax_v16f16_acc_nofast(<16 x half> %x, half %y) {
1659 ; CHECK-FP-LABEL: fmax_v16f16_acc_nofast:
1660 ; CHECK-FP: @ %bb.0: @ %entry
1661 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
1662 ; CHECK-FP-NEXT: vrev32.16 q1, q0
1663 ; CHECK-FP-NEXT: vmaxnm.f16 q0, q0, q1
1664 ; CHECK-FP-NEXT: vmaxnm.f16 s2, s2, s3
1665 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s1
1666 ; CHECK-FP-NEXT: vmaxnm.f16 s0, s0, s2
1667 ; CHECK-FP-NEXT: vcmp.f16 s8, s0
1668 ; CHECK-FP-NEXT: vmrs APSR_nzcv, fpscr
1669 ; CHECK-FP-NEXT: vselgt.f16 s0, s8, s0
1670 ; CHECK-FP-NEXT: bx lr
1672 ; CHECK-NOFP-LABEL: fmax_v16f16_acc_nofast:
1673 ; CHECK-NOFP: @ %bb.0: @ %entry
1674 ; CHECK-NOFP-NEXT: vmovx.f16 s12, s0
1675 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s4
1676 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1677 ; CHECK-NOFP-NEXT: vmaxnm.f16 s10, s12, s10
1678 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s10
1679 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s1, s5
1680 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1681 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s5
1682 ; CHECK-NOFP-NEXT: vmovx.f16 s10, s1
1683 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s10, s4
1684 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1685 ; CHECK-NOFP-NEXT: vmaxnm.f16 s4, s2, s6
1686 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s4
1687 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s6
1688 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s2
1689 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s2, s4
1690 ; CHECK-NOFP-NEXT: vmovx.f16 s4, s3
1691 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1692 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s3, s7
1693 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1694 ; CHECK-NOFP-NEXT: vmovx.f16 s2, s7
1695 ; CHECK-NOFP-NEXT: vmaxnm.f16 s2, s4, s2
1696 ; CHECK-NOFP-NEXT: vmaxnm.f16 s0, s0, s2
1697 ; CHECK-NOFP-NEXT: vcmp.f16 s8, s0
1698 ; CHECK-NOFP-NEXT: vmrs APSR_nzcv, fpscr
1699 ; CHECK-NOFP-NEXT: vselgt.f16 s0, s8, s0
1700 ; CHECK-NOFP-NEXT: bx lr
1702 %z = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %x)
1703 %c = fcmp ogt half %y, %z
1704 %r = select i1 %c, half %y, half %z
1708 define arm_aapcs_vfpcc double @fmax_v1f64_acc_nofast(<1 x double> %x, double %y) {
1709 ; CHECK-LABEL: fmax_v1f64_acc_nofast:
1710 ; CHECK: @ %bb.0: @ %entry
1711 ; CHECK-NEXT: vcmp.f64 d1, d0
1712 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
1713 ; CHECK-NEXT: vselgt.f64 d0, d1, d0
1716 %z = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %x)
1717 %c = fcmp ogt double %y, %z
1718 %r = select i1 %c, double %y, double %z
1722 define arm_aapcs_vfpcc double @fmax_v2f64_acc_nofast(<2 x double> %x, double %y) {
1723 ; CHECK-LABEL: fmax_v2f64_acc_nofast:
1724 ; CHECK: @ %bb.0: @ %entry
1725 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
1726 ; CHECK-NEXT: vcmp.f64 d2, d0
1727 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
1728 ; CHECK-NEXT: vselgt.f64 d0, d2, d0
1731 %z = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %x)
1732 %c = fcmp ogt double %y, %z
1733 %r = select i1 %c, double %y, double %z
1737 define arm_aapcs_vfpcc double @fmax_v4f64_acc_nofast(<4 x double> %x, double %y) {
1738 ; CHECK-LABEL: fmax_v4f64_acc_nofast:
1739 ; CHECK: @ %bb.0: @ %entry
1740 ; CHECK-NEXT: vmaxnm.f64 d1, d1, d3
1741 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d2
1742 ; CHECK-NEXT: vmaxnm.f64 d0, d0, d1
1743 ; CHECK-NEXT: vcmp.f64 d4, d0
1744 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr
1745 ; CHECK-NEXT: vselgt.f64 d0, d4, d0
1748 %z = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x)
1749 %c = fcmp ogt double %y, %z
1750 %r = select i1 %c, double %y, double %z
1754 declare double @llvm.vector.reduce.fmax.v1f64(<1 x double>)
1755 declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
1756 declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
1757 declare double @llvm.vector.reduce.fmin.v1f64(<1 x double>)
1758 declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
1759 declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
1760 declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
1761 declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
1762 declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
1763 declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
1764 declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
1765 declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
1766 declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>)
1767 declare half @llvm.vector.reduce.fmax.v2f16(<2 x half>)
1768 declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
1769 declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>)
1770 declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>)
1771 declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>)
1772 declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
1773 declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>)