1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP --check-prefix=CHECK-NOFP-SD
3 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-SD
4 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOFP --check-prefix=CHECK-NOFP-GI
5 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP --check-prefix=CHECK-FP-GI
7 ; CHECK-NOFP-GI: warning: Instruction selection used fallback path for test_v11f16
8 ; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf
9 ; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32
10 ; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf
11 ; CHECK-NOFP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128
13 ; CHECK-FP-GI: warning: Instruction selection used fallback path for test_v11f16
14 ; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v11f16_ninf
15 ; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32
16 ; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v3f32_ninf
17 ; CHECK-FP-GI-NEXT: warning: Instruction selection used fallback path for test_v2f128
19 declare half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a)
20 declare float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
21 declare double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
22 declare fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a)
24 declare half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a)
25 declare half @llvm.vector.reduce.fmax.v8f16(<8 x half> %a)
26 declare half @llvm.vector.reduce.fmax.v16f16(<16 x half> %a)
27 declare float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a)
28 declare float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a)
29 declare float @llvm.vector.reduce.fmax.v8f32(<8 x float> %a)
30 declare float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a)
31 declare double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a)
32 declare double @llvm.vector.reduce.fmax.v4f64(<4 x double> %a)
34 declare half @llvm.vector.reduce.fmax.v11f16(<11 x half> %a)
35 declare float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
36 declare fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)
38 define half @test_v1f16(<1 x half> %a) nounwind {
39 ; CHECK-LABEL: test_v1f16:
42 %b = call nnan half @llvm.vector.reduce.fmax.v1f16(<1 x half> %a)
46 define float @test_v1f32(<1 x float> %a) nounwind {
47 ; CHECK-NOFP-SD-LABEL: test_v1f32:
48 ; CHECK-NOFP-SD: // %bb.0:
49 ; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0
50 ; CHECK-NOFP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
51 ; CHECK-NOFP-SD-NEXT: ret
53 ; CHECK-FP-SD-LABEL: test_v1f32:
54 ; CHECK-FP-SD: // %bb.0:
55 ; CHECK-FP-SD-NEXT: // kill: def $d0 killed $d0 def $q0
56 ; CHECK-FP-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
57 ; CHECK-FP-SD-NEXT: ret
59 ; CHECK-NOFP-GI-LABEL: test_v1f32:
60 ; CHECK-NOFP-GI: // %bb.0:
61 ; CHECK-NOFP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0
62 ; CHECK-NOFP-GI-NEXT: ret
64 ; CHECK-FP-GI-LABEL: test_v1f32:
65 ; CHECK-FP-GI: // %bb.0:
66 ; CHECK-FP-GI-NEXT: // kill: def $s0 killed $s0 killed $d0
67 ; CHECK-FP-GI-NEXT: ret
68 %b = call nnan float @llvm.vector.reduce.fmax.v1f32(<1 x float> %a)
72 define double @test_v1f64(<1 x double> %a) nounwind {
73 ; CHECK-LABEL: test_v1f64:
76 %b = call nnan double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
80 define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
81 ; CHECK-LABEL: test_v1f128:
84 %b = call nnan fp128 @llvm.vector.reduce.fmax.v1f128(<1 x fp128> %a)
88 define half @test_v4f16(<4 x half> %a) nounwind {
89 ; CHECK-NOFP-SD-LABEL: test_v4f16:
90 ; CHECK-NOFP-SD: // %bb.0:
91 ; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0
92 ; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1]
93 ; CHECK-NOFP-SD-NEXT: fcvt s2, h0
94 ; CHECK-NOFP-SD-NEXT: fcvt s1, h1
95 ; CHECK-NOFP-SD-NEXT: fmaxnm s1, s2, s1
96 ; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2]
97 ; CHECK-NOFP-SD-NEXT: mov h0, v0.h[3]
98 ; CHECK-NOFP-SD-NEXT: fcvt h1, s1
99 ; CHECK-NOFP-SD-NEXT: fcvt s2, h2
100 ; CHECK-NOFP-SD-NEXT: fcvt s0, h0
101 ; CHECK-NOFP-SD-NEXT: fcvt s1, h1
102 ; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2
103 ; CHECK-NOFP-SD-NEXT: fcvt h1, s1
104 ; CHECK-NOFP-SD-NEXT: fcvt s1, h1
105 ; CHECK-NOFP-SD-NEXT: fmaxnm s0, s1, s0
106 ; CHECK-NOFP-SD-NEXT: fcvt h0, s0
107 ; CHECK-NOFP-SD-NEXT: ret
109 ; CHECK-FP-LABEL: test_v4f16:
110 ; CHECK-FP: // %bb.0:
111 ; CHECK-FP-NEXT: fmaxnmv h0, v0.4h
114 ; CHECK-NOFP-GI-LABEL: test_v4f16:
115 ; CHECK-NOFP-GI: // %bb.0:
116 ; CHECK-NOFP-GI-NEXT: fcvtl v0.4s, v0.4h
117 ; CHECK-NOFP-GI-NEXT: fmaxnmv s0, v0.4s
118 ; CHECK-NOFP-GI-NEXT: fcvt h0, s0
119 ; CHECK-NOFP-GI-NEXT: ret
120 %b = call nnan half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a)
124 define half @test_v4f16_ninf(<4 x half> %a) nounwind {
125 ; CHECK-NOFP-SD-LABEL: test_v4f16_ninf:
126 ; CHECK-NOFP-SD: // %bb.0:
127 ; CHECK-NOFP-SD-NEXT: // kill: def $d0 killed $d0 def $q0
128 ; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1]
129 ; CHECK-NOFP-SD-NEXT: fcvt s2, h0
130 ; CHECK-NOFP-SD-NEXT: fcvt s1, h1
131 ; CHECK-NOFP-SD-NEXT: fmaxnm s1, s2, s1
132 ; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2]
133 ; CHECK-NOFP-SD-NEXT: mov h0, v0.h[3]
134 ; CHECK-NOFP-SD-NEXT: fcvt h1, s1
135 ; CHECK-NOFP-SD-NEXT: fcvt s2, h2
136 ; CHECK-NOFP-SD-NEXT: fcvt s0, h0
137 ; CHECK-NOFP-SD-NEXT: fcvt s1, h1
138 ; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2
139 ; CHECK-NOFP-SD-NEXT: fcvt h1, s1
140 ; CHECK-NOFP-SD-NEXT: fcvt s1, h1
141 ; CHECK-NOFP-SD-NEXT: fmaxnm s0, s1, s0
142 ; CHECK-NOFP-SD-NEXT: fcvt h0, s0
143 ; CHECK-NOFP-SD-NEXT: ret
145 ; CHECK-FP-LABEL: test_v4f16_ninf:
146 ; CHECK-FP: // %bb.0:
147 ; CHECK-FP-NEXT: fmaxnmv h0, v0.4h
150 ; CHECK-NOFP-GI-LABEL: test_v4f16_ninf:
151 ; CHECK-NOFP-GI: // %bb.0:
152 ; CHECK-NOFP-GI-NEXT: fcvtl v0.4s, v0.4h
153 ; CHECK-NOFP-GI-NEXT: fmaxnmv s0, v0.4s
154 ; CHECK-NOFP-GI-NEXT: fcvt h0, s0
155 ; CHECK-NOFP-GI-NEXT: ret
156 %b = call nnan ninf half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a)
160 define half @test_v8f16(<8 x half> %a) nounwind {
161 ; CHECK-NOFP-SD-LABEL: test_v8f16:
162 ; CHECK-NOFP-SD: // %bb.0:
163 ; CHECK-NOFP-SD-NEXT: mov h1, v0.h[1]
164 ; CHECK-NOFP-SD-NEXT: fcvt s2, h0
165 ; CHECK-NOFP-SD-NEXT: fcvt s1, h1
166 ; CHECK-NOFP-SD-NEXT: fmaxnm s1, s2, s1
167 ; CHECK-NOFP-SD-NEXT: mov h2, v0.h[2]
168 ; CHECK-NOFP-SD-NEXT: fcvt h1, s1
169 ; CHECK-NOFP-SD-NEXT: fcvt s2, h2
170 ; CHECK-NOFP-SD-NEXT: fcvt s1, h1
171 ; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2
172 ; CHECK-NOFP-SD-NEXT: mov h2, v0.h[3]
173 ; CHECK-NOFP-SD-NEXT: fcvt h1, s1
174 ; CHECK-NOFP-SD-NEXT: fcvt s2, h2
175 ; CHECK-NOFP-SD-NEXT: fcvt s1, h1
176 ; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2
177 ; CHECK-NOFP-SD-NEXT: mov h2, v0.h[4]
178 ; CHECK-NOFP-SD-NEXT: fcvt h1, s1
179 ; CHECK-NOFP-SD-NEXT: fcvt s2, h2
180 ; CHECK-NOFP-SD-NEXT: fcvt s1, h1
181 ; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2
182 ; CHECK-NOFP-SD-NEXT: mov h2, v0.h[5]
183 ; CHECK-NOFP-SD-NEXT: fcvt h1, s1
184 ; CHECK-NOFP-SD-NEXT: fcvt s2, h2
185 ; CHECK-NOFP-SD-NEXT: fcvt s1, h1
186 ; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2
187 ; CHECK-NOFP-SD-NEXT: mov h2, v0.h[6]
188 ; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7]
189 ; CHECK-NOFP-SD-NEXT: fcvt h1, s1
190 ; CHECK-NOFP-SD-NEXT: fcvt s2, h2
191 ; CHECK-NOFP-SD-NEXT: fcvt s0, h0
192 ; CHECK-NOFP-SD-NEXT: fcvt s1, h1
193 ; CHECK-NOFP-SD-NEXT: fmaxnm s1, s1, s2
194 ; CHECK-NOFP-SD-NEXT: fcvt h1, s1
195 ; CHECK-NOFP-SD-NEXT: fcvt s1, h1
196 ; CHECK-NOFP-SD-NEXT: fmaxnm s0, s1, s0
197 ; CHECK-NOFP-SD-NEXT: fcvt h0, s0
198 ; CHECK-NOFP-SD-NEXT: ret
200 ; CHECK-FP-LABEL: test_v8f16:
201 ; CHECK-FP: // %bb.0:
202 ; CHECK-FP-NEXT: fmaxnmv h0, v0.8h
205 ; CHECK-NOFP-GI-LABEL: test_v8f16:
206 ; CHECK-NOFP-GI: // %bb.0:
207 ; CHECK-NOFP-GI-NEXT: fcvtl v1.4s, v0.4h
208 ; CHECK-NOFP-GI-NEXT: fcvtl2 v0.4s, v0.8h
209 ; CHECK-NOFP-GI-NEXT: fmaxnm v0.4s, v1.4s, v0.4s
210 ; CHECK-NOFP-GI-NEXT: fmaxnmv s0, v0.4s
211 ; CHECK-NOFP-GI-NEXT: fcvt h0, s0
212 ; CHECK-NOFP-GI-NEXT: ret
213 %b = call nnan half @llvm.vector.reduce.fmax.v8f16(<8 x half> %a)
217 define half @test_v16f16(<16 x half> %a) nounwind {
218 ; CHECK-NOFP-SD-LABEL: test_v16f16:
219 ; CHECK-NOFP-SD: // %bb.0:
220 ; CHECK-NOFP-SD-NEXT: mov h2, v1.h[1]
221 ; CHECK-NOFP-SD-NEXT: mov h3, v0.h[1]
222 ; CHECK-NOFP-SD-NEXT: fcvt s4, h1
223 ; CHECK-NOFP-SD-NEXT: fcvt s5, h0
224 ; CHECK-NOFP-SD-NEXT: fcvt s2, h2
225 ; CHECK-NOFP-SD-NEXT: fcvt s3, h3
226 ; CHECK-NOFP-SD-NEXT: fcmp s3, s2
227 ; CHECK-NOFP-SD-NEXT: fcsel s2, s3, s2, gt
228 ; CHECK-NOFP-SD-NEXT: fcmp s5, s4
229 ; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
230 ; CHECK-NOFP-SD-NEXT: mov h4, v1.h[2]
231 ; CHECK-NOFP-SD-NEXT: mov h5, v0.h[2]
232 ; CHECK-NOFP-SD-NEXT: fcvt h2, s2
233 ; CHECK-NOFP-SD-NEXT: fcvt h3, s3
234 ; CHECK-NOFP-SD-NEXT: fcvt s4, h4
235 ; CHECK-NOFP-SD-NEXT: fcvt s5, h5
236 ; CHECK-NOFP-SD-NEXT: fcvt s2, h2
237 ; CHECK-NOFP-SD-NEXT: fcvt s3, h3
238 ; CHECK-NOFP-SD-NEXT: fcmp s5, s4
239 ; CHECK-NOFP-SD-NEXT: fmaxnm s2, s3, s2
240 ; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
241 ; CHECK-NOFP-SD-NEXT: mov h4, v1.h[3]
242 ; CHECK-NOFP-SD-NEXT: mov h5, v0.h[3]
243 ; CHECK-NOFP-SD-NEXT: fcvt h3, s3
244 ; CHECK-NOFP-SD-NEXT: fcvt h2, s2
245 ; CHECK-NOFP-SD-NEXT: fcvt s4, h4
246 ; CHECK-NOFP-SD-NEXT: fcvt s5, h5
247 ; CHECK-NOFP-SD-NEXT: fcvt s3, h3
248 ; CHECK-NOFP-SD-NEXT: fcvt s2, h2
249 ; CHECK-NOFP-SD-NEXT: fcmp s5, s4
250 ; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
251 ; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
252 ; CHECK-NOFP-SD-NEXT: mov h4, v1.h[4]
253 ; CHECK-NOFP-SD-NEXT: mov h5, v0.h[4]
254 ; CHECK-NOFP-SD-NEXT: fcvt h3, s3
255 ; CHECK-NOFP-SD-NEXT: fcvt h2, s2
256 ; CHECK-NOFP-SD-NEXT: fcvt s4, h4
257 ; CHECK-NOFP-SD-NEXT: fcvt s5, h5
258 ; CHECK-NOFP-SD-NEXT: fcvt s3, h3
259 ; CHECK-NOFP-SD-NEXT: fcvt s2, h2
260 ; CHECK-NOFP-SD-NEXT: fcmp s5, s4
261 ; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
262 ; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
263 ; CHECK-NOFP-SD-NEXT: mov h4, v1.h[5]
264 ; CHECK-NOFP-SD-NEXT: mov h5, v0.h[5]
265 ; CHECK-NOFP-SD-NEXT: fcvt h3, s3
266 ; CHECK-NOFP-SD-NEXT: fcvt h2, s2
267 ; CHECK-NOFP-SD-NEXT: fcvt s4, h4
268 ; CHECK-NOFP-SD-NEXT: fcvt s5, h5
269 ; CHECK-NOFP-SD-NEXT: fcvt s3, h3
270 ; CHECK-NOFP-SD-NEXT: fcvt s2, h2
271 ; CHECK-NOFP-SD-NEXT: fcmp s5, s4
272 ; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
273 ; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
274 ; CHECK-NOFP-SD-NEXT: mov h4, v1.h[6]
275 ; CHECK-NOFP-SD-NEXT: mov h5, v0.h[6]
276 ; CHECK-NOFP-SD-NEXT: mov h1, v1.h[7]
277 ; CHECK-NOFP-SD-NEXT: mov h0, v0.h[7]
278 ; CHECK-NOFP-SD-NEXT: fcvt h3, s3
279 ; CHECK-NOFP-SD-NEXT: fcvt h2, s2
280 ; CHECK-NOFP-SD-NEXT: fcvt s4, h4
281 ; CHECK-NOFP-SD-NEXT: fcvt s5, h5
282 ; CHECK-NOFP-SD-NEXT: fcvt s1, h1
283 ; CHECK-NOFP-SD-NEXT: fcvt s0, h0
284 ; CHECK-NOFP-SD-NEXT: fcvt s3, h3
285 ; CHECK-NOFP-SD-NEXT: fcvt s2, h2
286 ; CHECK-NOFP-SD-NEXT: fcmp s5, s4
287 ; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
288 ; CHECK-NOFP-SD-NEXT: fcsel s3, s5, s4, gt
289 ; CHECK-NOFP-SD-NEXT: fcmp s0, s1
290 ; CHECK-NOFP-SD-NEXT: fcvt h3, s3
291 ; CHECK-NOFP-SD-NEXT: fcsel s0, s0, s1, gt
292 ; CHECK-NOFP-SD-NEXT: fcvt h2, s2
293 ; CHECK-NOFP-SD-NEXT: fcvt h0, s0
294 ; CHECK-NOFP-SD-NEXT: fcvt s3, h3
295 ; CHECK-NOFP-SD-NEXT: fcvt s2, h2
296 ; CHECK-NOFP-SD-NEXT: fcvt s0, h0
297 ; CHECK-NOFP-SD-NEXT: fmaxnm s2, s2, s3
298 ; CHECK-NOFP-SD-NEXT: fcvt h1, s2
299 ; CHECK-NOFP-SD-NEXT: fcvt s1, h1
300 ; CHECK-NOFP-SD-NEXT: fmaxnm s0, s1, s0
301 ; CHECK-NOFP-SD-NEXT: fcvt h0, s0
302 ; CHECK-NOFP-SD-NEXT: ret
304 ; CHECK-FP-LABEL: test_v16f16:
305 ; CHECK-FP: // %bb.0:
306 ; CHECK-FP-NEXT: fmaxnm v0.8h, v0.8h, v1.8h
307 ; CHECK-FP-NEXT: fmaxnmv h0, v0.8h
310 ; CHECK-NOFP-GI-LABEL: test_v16f16:
311 ; CHECK-NOFP-GI: // %bb.0:
312 ; CHECK-NOFP-GI-NEXT: fcvtl v2.4s, v0.4h
313 ; CHECK-NOFP-GI-NEXT: fcvtl2 v0.4s, v0.8h
314 ; CHECK-NOFP-GI-NEXT: fcvtl v3.4s, v1.4h
315 ; CHECK-NOFP-GI-NEXT: fcvtl2 v1.4s, v1.8h
316 ; CHECK-NOFP-GI-NEXT: fmaxnm v0.4s, v2.4s, v0.4s
317 ; CHECK-NOFP-GI-NEXT: fmaxnm v1.4s, v3.4s, v1.4s
318 ; CHECK-NOFP-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
319 ; CHECK-NOFP-GI-NEXT: fmaxnmv s0, v0.4s
320 ; CHECK-NOFP-GI-NEXT: fcvt h0, s0
321 ; CHECK-NOFP-GI-NEXT: ret
322 %b = call nnan half @llvm.vector.reduce.fmax.v16f16(<16 x half> %a)
326 define float @test_v2f32(<2 x float> %a) nounwind {
327 ; CHECK-LABEL: test_v2f32:
329 ; CHECK-NEXT: fmaxnmp s0, v0.2s
331 %b = call nnan float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a)
335 define float @test_v4f32(<4 x float> %a) nounwind {
336 ; CHECK-LABEL: test_v4f32:
338 ; CHECK-NEXT: fmaxnmv s0, v0.4s
340 %b = call nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a)
344 define float @test_v8f32(<8 x float> %a) nounwind {
345 ; CHECK-LABEL: test_v8f32:
347 ; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
348 ; CHECK-NEXT: fmaxnmv s0, v0.4s
350 %b = call nnan float @llvm.vector.reduce.fmax.v8f32(<8 x float> %a)
354 define float @test_v16f32(<16 x float> %a) nounwind {
355 ; CHECK-NOFP-SD-LABEL: test_v16f32:
356 ; CHECK-NOFP-SD: // %bb.0:
357 ; CHECK-NOFP-SD-NEXT: fmaxnm v1.4s, v1.4s, v3.4s
358 ; CHECK-NOFP-SD-NEXT: fmaxnm v0.4s, v0.4s, v2.4s
359 ; CHECK-NOFP-SD-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
360 ; CHECK-NOFP-SD-NEXT: fmaxnmv s0, v0.4s
361 ; CHECK-NOFP-SD-NEXT: ret
363 ; CHECK-FP-SD-LABEL: test_v16f32:
364 ; CHECK-FP-SD: // %bb.0:
365 ; CHECK-FP-SD-NEXT: fmaxnm v1.4s, v1.4s, v3.4s
366 ; CHECK-FP-SD-NEXT: fmaxnm v0.4s, v0.4s, v2.4s
367 ; CHECK-FP-SD-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
368 ; CHECK-FP-SD-NEXT: fmaxnmv s0, v0.4s
369 ; CHECK-FP-SD-NEXT: ret
371 ; CHECK-NOFP-GI-LABEL: test_v16f32:
372 ; CHECK-NOFP-GI: // %bb.0:
373 ; CHECK-NOFP-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
374 ; CHECK-NOFP-GI-NEXT: fmaxnm v1.4s, v2.4s, v3.4s
375 ; CHECK-NOFP-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
376 ; CHECK-NOFP-GI-NEXT: fmaxnmv s0, v0.4s
377 ; CHECK-NOFP-GI-NEXT: ret
379 ; CHECK-FP-GI-LABEL: test_v16f32:
380 ; CHECK-FP-GI: // %bb.0:
381 ; CHECK-FP-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
382 ; CHECK-FP-GI-NEXT: fmaxnm v1.4s, v2.4s, v3.4s
383 ; CHECK-FP-GI-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
384 ; CHECK-FP-GI-NEXT: fmaxnmv s0, v0.4s
385 ; CHECK-FP-GI-NEXT: ret
386 %b = call nnan float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a)
390 define double @test_v2f64(<2 x double> %a) nounwind {
391 ; CHECK-LABEL: test_v2f64:
393 ; CHECK-NEXT: fmaxnmp d0, v0.2d
395 %b = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a)
399 define double @test_v4f64(<4 x double> %a) nounwind {
400 ; CHECK-LABEL: test_v4f64:
402 ; CHECK-NEXT: fmaxnm v0.2d, v0.2d, v1.2d
403 ; CHECK-NEXT: fmaxnmp d0, v0.2d
405 %b = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %a)
409 define half @test_v11f16(<11 x half> %a) nounwind {
410 ; CHECK-NOFP-LABEL: test_v11f16:
411 ; CHECK-NOFP: // %bb.0:
412 ; CHECK-NOFP-NEXT: ldr h16, [sp, #8]
413 ; CHECK-NOFP-NEXT: fcvt s1, h1
414 ; CHECK-NOFP-NEXT: ldr h17, [sp]
415 ; CHECK-NOFP-NEXT: fcvt s0, h0
416 ; CHECK-NOFP-NEXT: fcvt s2, h2
417 ; CHECK-NOFP-NEXT: adrp x8, .LCPI14_0
418 ; CHECK-NOFP-NEXT: fcvt s16, h16
419 ; CHECK-NOFP-NEXT: fcvt s17, h17
420 ; CHECK-NOFP-NEXT: fcvt s3, h3
421 ; CHECK-NOFP-NEXT: fcvt s4, h4
422 ; CHECK-NOFP-NEXT: fcmp s1, s16
423 ; CHECK-NOFP-NEXT: fcsel s1, s1, s16, gt
424 ; CHECK-NOFP-NEXT: fcmp s0, s17
425 ; CHECK-NOFP-NEXT: ldr h16, [sp, #16]
426 ; CHECK-NOFP-NEXT: fcvt s16, h16
427 ; CHECK-NOFP-NEXT: fcsel s0, s0, s17, gt
428 ; CHECK-NOFP-NEXT: fcvt h1, s1
429 ; CHECK-NOFP-NEXT: fcvt h0, s0
430 ; CHECK-NOFP-NEXT: fcmp s2, s16
431 ; CHECK-NOFP-NEXT: fcvt s1, h1
432 ; CHECK-NOFP-NEXT: fcvt s0, h0
433 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
434 ; CHECK-NOFP-NEXT: fcsel s1, s2, s16, gt
435 ; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI14_0]
436 ; CHECK-NOFP-NEXT: mov w8, #-8388608 // =0xff800000
437 ; CHECK-NOFP-NEXT: fcvt s2, h2
438 ; CHECK-NOFP-NEXT: fcvt h1, s1
439 ; CHECK-NOFP-NEXT: fcvt h0, s0
440 ; CHECK-NOFP-NEXT: fcmp s3, s2
441 ; CHECK-NOFP-NEXT: fcvt s1, h1
442 ; CHECK-NOFP-NEXT: fcvt s0, h0
443 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
444 ; CHECK-NOFP-NEXT: fmov s1, w8
445 ; CHECK-NOFP-NEXT: fcsel s3, s3, s1, gt
446 ; CHECK-NOFP-NEXT: fcmp s4, s2
447 ; CHECK-NOFP-NEXT: fcvt h0, s0
448 ; CHECK-NOFP-NEXT: fcvt h3, s3
449 ; CHECK-NOFP-NEXT: fcvt s0, h0
450 ; CHECK-NOFP-NEXT: fcvt s3, h3
451 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
452 ; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
453 ; CHECK-NOFP-NEXT: fcvt s4, h5
454 ; CHECK-NOFP-NEXT: fcvt h3, s3
455 ; CHECK-NOFP-NEXT: fcvt h0, s0
456 ; CHECK-NOFP-NEXT: fcmp s4, s2
457 ; CHECK-NOFP-NEXT: fcvt s3, h3
458 ; CHECK-NOFP-NEXT: fcvt s0, h0
459 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
460 ; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
461 ; CHECK-NOFP-NEXT: fcvt s4, h6
462 ; CHECK-NOFP-NEXT: fcvt h3, s3
463 ; CHECK-NOFP-NEXT: fcvt h0, s0
464 ; CHECK-NOFP-NEXT: fcmp s4, s2
465 ; CHECK-NOFP-NEXT: fcvt s3, h3
466 ; CHECK-NOFP-NEXT: fcvt s0, h0
467 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
468 ; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
469 ; CHECK-NOFP-NEXT: fcvt s4, h7
470 ; CHECK-NOFP-NEXT: fcvt h3, s3
471 ; CHECK-NOFP-NEXT: fcvt h0, s0
472 ; CHECK-NOFP-NEXT: fcmp s4, s2
473 ; CHECK-NOFP-NEXT: fcvt s3, h3
474 ; CHECK-NOFP-NEXT: fcsel s1, s4, s1, gt
475 ; CHECK-NOFP-NEXT: fcvt s0, h0
476 ; CHECK-NOFP-NEXT: fcvt h1, s1
477 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
478 ; CHECK-NOFP-NEXT: fcvt s1, h1
479 ; CHECK-NOFP-NEXT: fcvt h0, s0
480 ; CHECK-NOFP-NEXT: fcvt s0, h0
481 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
482 ; CHECK-NOFP-NEXT: fcvt h0, s0
483 ; CHECK-NOFP-NEXT: ret
485 ; CHECK-FP-LABEL: test_v11f16:
486 ; CHECK-FP: // %bb.0:
487 ; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0
488 ; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1
489 ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2
490 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3
491 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4
492 ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5
493 ; CHECK-FP-NEXT: mov x8, sp
494 ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6
495 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7
496 ; CHECK-FP-NEXT: mov v0.h[1], v1.h[0]
497 ; CHECK-FP-NEXT: movi v1.8h, #252, lsl #8
498 ; CHECK-FP-NEXT: mov v0.h[2], v2.h[0]
499 ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8]
500 ; CHECK-FP-NEXT: add x8, sp, #8
501 ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8]
502 ; CHECK-FP-NEXT: add x8, sp, #16
503 ; CHECK-FP-NEXT: mov v0.h[3], v3.h[0]
504 ; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8]
505 ; CHECK-FP-NEXT: mov v0.h[4], v4.h[0]
506 ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0]
507 ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0]
508 ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0]
509 ; CHECK-FP-NEXT: fmaxnm v0.8h, v0.8h, v1.8h
510 ; CHECK-FP-NEXT: fmaxnmv h0, v0.8h
512 %b = call nnan half @llvm.vector.reduce.fmax.v11f16(<11 x half> %a)
516 define half @test_v11f16_ninf(<11 x half> %a) nounwind {
517 ; CHECK-NOFP-LABEL: test_v11f16_ninf:
518 ; CHECK-NOFP: // %bb.0:
519 ; CHECK-NOFP-NEXT: ldr h16, [sp, #8]
520 ; CHECK-NOFP-NEXT: fcvt s1, h1
521 ; CHECK-NOFP-NEXT: ldr h17, [sp]
522 ; CHECK-NOFP-NEXT: fcvt s0, h0
523 ; CHECK-NOFP-NEXT: fcvt s2, h2
524 ; CHECK-NOFP-NEXT: adrp x8, .LCPI15_0
525 ; CHECK-NOFP-NEXT: fcvt s16, h16
526 ; CHECK-NOFP-NEXT: fcvt s17, h17
527 ; CHECK-NOFP-NEXT: fcvt s3, h3
528 ; CHECK-NOFP-NEXT: fcvt s4, h4
529 ; CHECK-NOFP-NEXT: fcmp s1, s16
530 ; CHECK-NOFP-NEXT: fcsel s1, s1, s16, gt
531 ; CHECK-NOFP-NEXT: fcmp s0, s17
532 ; CHECK-NOFP-NEXT: ldr h16, [sp, #16]
533 ; CHECK-NOFP-NEXT: fcvt s16, h16
534 ; CHECK-NOFP-NEXT: fcsel s0, s0, s17, gt
535 ; CHECK-NOFP-NEXT: fcvt h1, s1
536 ; CHECK-NOFP-NEXT: fcvt h0, s0
537 ; CHECK-NOFP-NEXT: fcmp s2, s16
538 ; CHECK-NOFP-NEXT: fcvt s1, h1
539 ; CHECK-NOFP-NEXT: fcvt s0, h0
540 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
541 ; CHECK-NOFP-NEXT: fcsel s1, s2, s16, gt
542 ; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI15_0]
543 ; CHECK-NOFP-NEXT: mov w8, #57344 // =0xe000
544 ; CHECK-NOFP-NEXT: fcvt s2, h2
545 ; CHECK-NOFP-NEXT: movk w8, #51071, lsl #16
546 ; CHECK-NOFP-NEXT: fcvt h1, s1
547 ; CHECK-NOFP-NEXT: fcvt h0, s0
548 ; CHECK-NOFP-NEXT: fcmp s3, s2
549 ; CHECK-NOFP-NEXT: fcvt s1, h1
550 ; CHECK-NOFP-NEXT: fcvt s0, h0
551 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
552 ; CHECK-NOFP-NEXT: fmov s1, w8
553 ; CHECK-NOFP-NEXT: fcsel s3, s3, s1, gt
554 ; CHECK-NOFP-NEXT: fcmp s4, s2
555 ; CHECK-NOFP-NEXT: fcvt h0, s0
556 ; CHECK-NOFP-NEXT: fcvt h3, s3
557 ; CHECK-NOFP-NEXT: fcvt s0, h0
558 ; CHECK-NOFP-NEXT: fcvt s3, h3
559 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
560 ; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
561 ; CHECK-NOFP-NEXT: fcvt s4, h5
562 ; CHECK-NOFP-NEXT: fcvt h3, s3
563 ; CHECK-NOFP-NEXT: fcvt h0, s0
564 ; CHECK-NOFP-NEXT: fcmp s4, s2
565 ; CHECK-NOFP-NEXT: fcvt s3, h3
566 ; CHECK-NOFP-NEXT: fcvt s0, h0
567 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
568 ; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
569 ; CHECK-NOFP-NEXT: fcvt s4, h6
570 ; CHECK-NOFP-NEXT: fcvt h3, s3
571 ; CHECK-NOFP-NEXT: fcvt h0, s0
572 ; CHECK-NOFP-NEXT: fcmp s4, s2
573 ; CHECK-NOFP-NEXT: fcvt s3, h3
574 ; CHECK-NOFP-NEXT: fcvt s0, h0
575 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
576 ; CHECK-NOFP-NEXT: fcsel s3, s4, s1, gt
577 ; CHECK-NOFP-NEXT: fcvt s4, h7
578 ; CHECK-NOFP-NEXT: fcvt h3, s3
579 ; CHECK-NOFP-NEXT: fcvt h0, s0
580 ; CHECK-NOFP-NEXT: fcmp s4, s2
581 ; CHECK-NOFP-NEXT: fcvt s3, h3
582 ; CHECK-NOFP-NEXT: fcsel s1, s4, s1, gt
583 ; CHECK-NOFP-NEXT: fcvt s0, h0
584 ; CHECK-NOFP-NEXT: fcvt h1, s1
585 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s3
586 ; CHECK-NOFP-NEXT: fcvt s1, h1
587 ; CHECK-NOFP-NEXT: fcvt h0, s0
588 ; CHECK-NOFP-NEXT: fcvt s0, h0
589 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1
590 ; CHECK-NOFP-NEXT: fcvt h0, s0
591 ; CHECK-NOFP-NEXT: ret
593 ; CHECK-FP-LABEL: test_v11f16_ninf:
594 ; CHECK-FP: // %bb.0:
595 ; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0
596 ; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1
597 ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2
598 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3
599 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4
600 ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5
601 ; CHECK-FP-NEXT: mov x8, sp
602 ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6
603 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7
604 ; CHECK-FP-NEXT: mov v0.h[1], v1.h[0]
605 ; CHECK-FP-NEXT: mvni v1.8h, #4, lsl #8
606 ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8]
607 ; CHECK-FP-NEXT: add x8, sp, #8
608 ; CHECK-FP-NEXT: mov v0.h[2], v2.h[0]
609 ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8]
610 ; CHECK-FP-NEXT: add x8, sp, #16
611 ; CHECK-FP-NEXT: mov v0.h[3], v3.h[0]
612 ; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8]
613 ; CHECK-FP-NEXT: mov v0.h[4], v4.h[0]
614 ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0]
615 ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0]
616 ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0]
617 ; CHECK-FP-NEXT: fmaxnm v0.8h, v0.8h, v1.8h
618 ; CHECK-FP-NEXT: fmaxnmv h0, v0.8h
620 %b = call nnan ninf half @llvm.vector.reduce.fmax.v11f16(<11 x half> %a)
624 define float @test_v3f32(<3 x float> %a) nounwind {
625 ; CHECK-LABEL: test_v3f32:
627 ; CHECK-NEXT: mov w8, #-8388608 // =0xff800000
628 ; CHECK-NEXT: fmov s1, w8
629 ; CHECK-NEXT: mov v0.s[3], v1.s[0]
630 ; CHECK-NEXT: fmaxnmv s0, v0.4s
632 %b = call nnan float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
636 define float @test_v3f32_ninf(<3 x float> %a) nounwind {
637 ; CHECK-LABEL: test_v3f32_ninf:
639 ; CHECK-NEXT: mov w8, #-8388609 // =0xff7fffff
640 ; CHECK-NEXT: fmov s1, w8
641 ; CHECK-NEXT: mov v0.s[3], v1.s[0]
642 ; CHECK-NEXT: fmaxnmv s0, v0.4s
644 %b = call nnan ninf float @llvm.vector.reduce.fmax.v3f32(<3 x float> %a)
648 define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
649 ; CHECK-LABEL: test_v2f128:
651 ; CHECK-NEXT: sub sp, sp, #48
652 ; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
653 ; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
654 ; CHECK-NEXT: bl __gttf2
655 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
656 ; CHECK-NEXT: cmp w0, #0
657 ; CHECK-NEXT: b.le .LBB18_2
658 ; CHECK-NEXT: // %bb.1:
659 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
660 ; CHECK-NEXT: .LBB18_2:
661 ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
662 ; CHECK-NEXT: add sp, sp, #48
664 %b = call nnan fp128 @llvm.vector.reduce.fmax.v2f128(<2 x fp128> %a)