1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s
3 ; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
5 target triple = "aarch64-unknown-linux-gnu"
11 define half @fadda_v4f16(half %start, <4 x half> %a) {
12 ; CHECK-LABEL: fadda_v4f16:
14 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
15 ; CHECK-NEXT: fadd h0, h0, h1
16 ; CHECK-NEXT: mov z2.h, z1.h[1]
17 ; CHECK-NEXT: fadd h0, h0, h2
18 ; CHECK-NEXT: mov z2.h, z1.h[2]
19 ; CHECK-NEXT: mov z1.h, z1.h[3]
20 ; CHECK-NEXT: fadd h0, h0, h2
21 ; CHECK-NEXT: fadd h0, h0, h1
24 ; NONEON-NOSVE-LABEL: fadda_v4f16:
25 ; NONEON-NOSVE: // %bb.0:
26 ; NONEON-NOSVE-NEXT: sub sp, sp, #16
27 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
28 ; NONEON-NOSVE-NEXT: str d1, [sp, #8]
29 ; NONEON-NOSVE-NEXT: fcvt s0, h0
30 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
31 ; NONEON-NOSVE-NEXT: fcvt s1, h1
32 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
33 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
34 ; NONEON-NOSVE-NEXT: fcvt s1, h1
35 ; NONEON-NOSVE-NEXT: fcvt h0, s0
36 ; NONEON-NOSVE-NEXT: fcvt s0, h0
37 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
38 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
39 ; NONEON-NOSVE-NEXT: fcvt s1, h1
40 ; NONEON-NOSVE-NEXT: fcvt h0, s0
41 ; NONEON-NOSVE-NEXT: fcvt s0, h0
42 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
43 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
44 ; NONEON-NOSVE-NEXT: fcvt s1, h1
45 ; NONEON-NOSVE-NEXT: fcvt h0, s0
46 ; NONEON-NOSVE-NEXT: fcvt s0, h0
47 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
48 ; NONEON-NOSVE-NEXT: fcvt h0, s0
49 ; NONEON-NOSVE-NEXT: add sp, sp, #16
50 ; NONEON-NOSVE-NEXT: ret
51 %res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
55 define half @fadda_v8f16(half %start, <8 x half> %a) {
56 ; CHECK-LABEL: fadda_v8f16:
58 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
59 ; CHECK-NEXT: fadd h0, h0, h1
60 ; CHECK-NEXT: mov z2.h, z1.h[1]
61 ; CHECK-NEXT: fadd h0, h0, h2
62 ; CHECK-NEXT: mov z2.h, z1.h[2]
63 ; CHECK-NEXT: fadd h0, h0, h2
64 ; CHECK-NEXT: mov z2.h, z1.h[3]
65 ; CHECK-NEXT: fadd h0, h0, h2
66 ; CHECK-NEXT: mov z2.h, z1.h[4]
67 ; CHECK-NEXT: fadd h0, h0, h2
68 ; CHECK-NEXT: mov z2.h, z1.h[5]
69 ; CHECK-NEXT: fadd h0, h0, h2
70 ; CHECK-NEXT: mov z2.h, z1.h[6]
71 ; CHECK-NEXT: mov z1.h, z1.h[7]
72 ; CHECK-NEXT: fadd h0, h0, h2
73 ; CHECK-NEXT: fadd h0, h0, h1
76 ; NONEON-NOSVE-LABEL: fadda_v8f16:
77 ; NONEON-NOSVE: // %bb.0:
78 ; NONEON-NOSVE-NEXT: str q1, [sp, #-16]!
79 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
80 ; NONEON-NOSVE-NEXT: ldr h1, [sp]
81 ; NONEON-NOSVE-NEXT: fcvt s0, h0
82 ; NONEON-NOSVE-NEXT: fcvt s1, h1
83 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
84 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
85 ; NONEON-NOSVE-NEXT: fcvt s1, h1
86 ; NONEON-NOSVE-NEXT: fcvt h0, s0
87 ; NONEON-NOSVE-NEXT: fcvt s0, h0
88 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
89 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
90 ; NONEON-NOSVE-NEXT: fcvt s1, h1
91 ; NONEON-NOSVE-NEXT: fcvt h0, s0
92 ; NONEON-NOSVE-NEXT: fcvt s0, h0
93 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
94 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
95 ; NONEON-NOSVE-NEXT: fcvt s1, h1
96 ; NONEON-NOSVE-NEXT: fcvt h0, s0
97 ; NONEON-NOSVE-NEXT: fcvt s0, h0
98 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
99 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
100 ; NONEON-NOSVE-NEXT: fcvt s1, h1
101 ; NONEON-NOSVE-NEXT: fcvt h0, s0
102 ; NONEON-NOSVE-NEXT: fcvt s0, h0
103 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
104 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
105 ; NONEON-NOSVE-NEXT: fcvt s1, h1
106 ; NONEON-NOSVE-NEXT: fcvt h0, s0
107 ; NONEON-NOSVE-NEXT: fcvt s0, h0
108 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
109 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
110 ; NONEON-NOSVE-NEXT: fcvt s1, h1
111 ; NONEON-NOSVE-NEXT: fcvt h0, s0
112 ; NONEON-NOSVE-NEXT: fcvt s0, h0
113 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
114 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
115 ; NONEON-NOSVE-NEXT: fcvt s1, h1
116 ; NONEON-NOSVE-NEXT: fcvt h0, s0
117 ; NONEON-NOSVE-NEXT: fcvt s0, h0
118 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
119 ; NONEON-NOSVE-NEXT: fcvt h0, s0
120 ; NONEON-NOSVE-NEXT: add sp, sp, #16
121 ; NONEON-NOSVE-NEXT: ret
122 %res = call half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)
126 define half @fadda_v16f16(half %start, ptr %a) {
127 ; CHECK-LABEL: fadda_v16f16:
129 ; CHECK-NEXT: ldr q1, [x0]
130 ; CHECK-NEXT: fadd h0, h0, h1
131 ; CHECK-NEXT: mov z2.h, z1.h[1]
132 ; CHECK-NEXT: fadd h0, h0, h2
133 ; CHECK-NEXT: mov z2.h, z1.h[2]
134 ; CHECK-NEXT: fadd h0, h0, h2
135 ; CHECK-NEXT: mov z2.h, z1.h[3]
136 ; CHECK-NEXT: fadd h0, h0, h2
137 ; CHECK-NEXT: mov z2.h, z1.h[4]
138 ; CHECK-NEXT: fadd h0, h0, h2
139 ; CHECK-NEXT: mov z2.h, z1.h[5]
140 ; CHECK-NEXT: fadd h0, h0, h2
141 ; CHECK-NEXT: mov z2.h, z1.h[6]
142 ; CHECK-NEXT: mov z1.h, z1.h[7]
143 ; CHECK-NEXT: fadd h0, h0, h2
144 ; CHECK-NEXT: fadd h0, h0, h1
145 ; CHECK-NEXT: ldr q1, [x0, #16]
146 ; CHECK-NEXT: mov z2.h, z1.h[1]
147 ; CHECK-NEXT: fadd h0, h0, h1
148 ; CHECK-NEXT: fadd h0, h0, h2
149 ; CHECK-NEXT: mov z2.h, z1.h[2]
150 ; CHECK-NEXT: fadd h0, h0, h2
151 ; CHECK-NEXT: mov z2.h, z1.h[3]
152 ; CHECK-NEXT: fadd h0, h0, h2
153 ; CHECK-NEXT: mov z2.h, z1.h[4]
154 ; CHECK-NEXT: fadd h0, h0, h2
155 ; CHECK-NEXT: mov z2.h, z1.h[5]
156 ; CHECK-NEXT: fadd h0, h0, h2
157 ; CHECK-NEXT: mov z2.h, z1.h[6]
158 ; CHECK-NEXT: mov z1.h, z1.h[7]
159 ; CHECK-NEXT: fadd h0, h0, h2
160 ; CHECK-NEXT: fadd h0, h0, h1
163 ; NONEON-NOSVE-LABEL: fadda_v16f16:
164 ; NONEON-NOSVE: // %bb.0:
165 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
166 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
167 ; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
168 ; NONEON-NOSVE-NEXT: fcvt s0, h0
169 ; NONEON-NOSVE-NEXT: str q1, [sp, #16]
170 ; NONEON-NOSVE-NEXT: ldr q1, [x0]
171 ; NONEON-NOSVE-NEXT: str q1, [sp]
172 ; NONEON-NOSVE-NEXT: ldr h1, [sp]
173 ; NONEON-NOSVE-NEXT: fcvt s1, h1
174 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
175 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
176 ; NONEON-NOSVE-NEXT: fcvt s1, h1
177 ; NONEON-NOSVE-NEXT: fcvt h0, s0
178 ; NONEON-NOSVE-NEXT: fcvt s0, h0
179 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
180 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
181 ; NONEON-NOSVE-NEXT: fcvt s1, h1
182 ; NONEON-NOSVE-NEXT: fcvt h0, s0
183 ; NONEON-NOSVE-NEXT: fcvt s0, h0
184 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
185 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
186 ; NONEON-NOSVE-NEXT: fcvt s1, h1
187 ; NONEON-NOSVE-NEXT: fcvt h0, s0
188 ; NONEON-NOSVE-NEXT: fcvt s0, h0
189 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
190 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
191 ; NONEON-NOSVE-NEXT: fcvt s1, h1
192 ; NONEON-NOSVE-NEXT: fcvt h0, s0
193 ; NONEON-NOSVE-NEXT: fcvt s0, h0
194 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
195 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
196 ; NONEON-NOSVE-NEXT: fcvt s1, h1
197 ; NONEON-NOSVE-NEXT: fcvt h0, s0
198 ; NONEON-NOSVE-NEXT: fcvt s0, h0
199 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
200 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
201 ; NONEON-NOSVE-NEXT: fcvt s1, h1
202 ; NONEON-NOSVE-NEXT: fcvt h0, s0
203 ; NONEON-NOSVE-NEXT: fcvt s0, h0
204 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
205 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
206 ; NONEON-NOSVE-NEXT: fcvt s1, h1
207 ; NONEON-NOSVE-NEXT: fcvt h0, s0
208 ; NONEON-NOSVE-NEXT: fcvt s0, h0
209 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
210 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #16]
211 ; NONEON-NOSVE-NEXT: fcvt s1, h1
212 ; NONEON-NOSVE-NEXT: fcvt h0, s0
213 ; NONEON-NOSVE-NEXT: fcvt s0, h0
214 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
215 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #18]
216 ; NONEON-NOSVE-NEXT: fcvt s1, h1
217 ; NONEON-NOSVE-NEXT: fcvt h0, s0
218 ; NONEON-NOSVE-NEXT: fcvt s0, h0
219 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
220 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #20]
221 ; NONEON-NOSVE-NEXT: fcvt s1, h1
222 ; NONEON-NOSVE-NEXT: fcvt h0, s0
223 ; NONEON-NOSVE-NEXT: fcvt s0, h0
224 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
225 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #22]
226 ; NONEON-NOSVE-NEXT: fcvt s1, h1
227 ; NONEON-NOSVE-NEXT: fcvt h0, s0
228 ; NONEON-NOSVE-NEXT: fcvt s0, h0
229 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
230 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #24]
231 ; NONEON-NOSVE-NEXT: fcvt s1, h1
232 ; NONEON-NOSVE-NEXT: fcvt h0, s0
233 ; NONEON-NOSVE-NEXT: fcvt s0, h0
234 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
235 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #26]
236 ; NONEON-NOSVE-NEXT: fcvt s1, h1
237 ; NONEON-NOSVE-NEXT: fcvt h0, s0
238 ; NONEON-NOSVE-NEXT: fcvt s0, h0
239 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
240 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #28]
241 ; NONEON-NOSVE-NEXT: fcvt s1, h1
242 ; NONEON-NOSVE-NEXT: fcvt h0, s0
243 ; NONEON-NOSVE-NEXT: fcvt s0, h0
244 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
245 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #30]
246 ; NONEON-NOSVE-NEXT: fcvt s1, h1
247 ; NONEON-NOSVE-NEXT: fcvt h0, s0
248 ; NONEON-NOSVE-NEXT: fcvt s0, h0
249 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
250 ; NONEON-NOSVE-NEXT: fcvt h0, s0
251 ; NONEON-NOSVE-NEXT: add sp, sp, #32
252 ; NONEON-NOSVE-NEXT: ret
253 %op = load <16 x half>, ptr %a
254 %res = call half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
258 define float @fadda_v2f32(float %start, <2 x float> %a) {
259 ; CHECK-LABEL: fadda_v2f32:
261 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
262 ; CHECK-NEXT: fadd s0, s0, s1
263 ; CHECK-NEXT: mov z1.s, z1.s[1]
264 ; CHECK-NEXT: fadd s0, s0, s1
267 ; NONEON-NOSVE-LABEL: fadda_v2f32:
268 ; NONEON-NOSVE: // %bb.0:
269 ; NONEON-NOSVE-NEXT: sub sp, sp, #16
270 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
271 ; NONEON-NOSVE-NEXT: str d1, [sp, #8]
272 ; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8]
273 ; NONEON-NOSVE-NEXT: fadd s0, s0, s2
274 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
275 ; NONEON-NOSVE-NEXT: add sp, sp, #16
276 ; NONEON-NOSVE-NEXT: ret
277 %res = call float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)
281 define float @fadda_v4f32(float %start, <4 x float> %a) {
282 ; CHECK-LABEL: fadda_v4f32:
284 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
285 ; CHECK-NEXT: fadd s0, s0, s1
286 ; CHECK-NEXT: mov z2.s, z1.s[1]
287 ; CHECK-NEXT: fadd s0, s0, s2
288 ; CHECK-NEXT: mov z2.s, z1.s[2]
289 ; CHECK-NEXT: mov z1.s, z1.s[3]
290 ; CHECK-NEXT: fadd s0, s0, s2
291 ; CHECK-NEXT: fadd s0, s0, s1
294 ; NONEON-NOSVE-LABEL: fadda_v4f32:
295 ; NONEON-NOSVE: // %bb.0:
296 ; NONEON-NOSVE-NEXT: str q1, [sp, #-16]!
297 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
298 ; NONEON-NOSVE-NEXT: ldp s2, s1, [sp]
299 ; NONEON-NOSVE-NEXT: fadd s0, s0, s2
300 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
301 ; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8]
302 ; NONEON-NOSVE-NEXT: fadd s0, s0, s2
303 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
304 ; NONEON-NOSVE-NEXT: add sp, sp, #16
305 ; NONEON-NOSVE-NEXT: ret
306 %res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)
310 define float @fadda_v8f32(float %start, ptr %a) {
311 ; CHECK-LABEL: fadda_v8f32:
313 ; CHECK-NEXT: ldr q1, [x0]
314 ; CHECK-NEXT: fadd s0, s0, s1
315 ; CHECK-NEXT: mov z2.s, z1.s[1]
316 ; CHECK-NEXT: fadd s0, s0, s2
317 ; CHECK-NEXT: mov z2.s, z1.s[2]
318 ; CHECK-NEXT: mov z1.s, z1.s[3]
319 ; CHECK-NEXT: fadd s0, s0, s2
320 ; CHECK-NEXT: fadd s0, s0, s1
321 ; CHECK-NEXT: ldr q1, [x0, #16]
322 ; CHECK-NEXT: mov z2.s, z1.s[1]
323 ; CHECK-NEXT: fadd s0, s0, s1
324 ; CHECK-NEXT: fadd s0, s0, s2
325 ; CHECK-NEXT: mov z2.s, z1.s[2]
326 ; CHECK-NEXT: mov z1.s, z1.s[3]
327 ; CHECK-NEXT: fadd s0, s0, s2
328 ; CHECK-NEXT: fadd s0, s0, s1
331 ; NONEON-NOSVE-LABEL: fadda_v8f32:
332 ; NONEON-NOSVE: // %bb.0:
333 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
334 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
335 ; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
336 ; NONEON-NOSVE-NEXT: str q1, [sp, #16]
337 ; NONEON-NOSVE-NEXT: ldr q1, [x0]
338 ; NONEON-NOSVE-NEXT: str q1, [sp]
339 ; NONEON-NOSVE-NEXT: ldp s2, s1, [sp]
340 ; NONEON-NOSVE-NEXT: fadd s0, s0, s2
341 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
342 ; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8]
343 ; NONEON-NOSVE-NEXT: fadd s0, s0, s2
344 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
345 ; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #16]
346 ; NONEON-NOSVE-NEXT: fadd s0, s0, s2
347 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
348 ; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #24]
349 ; NONEON-NOSVE-NEXT: fadd s0, s0, s2
350 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
351 ; NONEON-NOSVE-NEXT: add sp, sp, #32
352 ; NONEON-NOSVE-NEXT: ret
353 %op = load <8 x float>, ptr %a
354 %res = call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
358 define double @fadda_v1f64(double %start, <1 x double> %a) {
359 ; CHECK-LABEL: fadda_v1f64:
361 ; CHECK-NEXT: fadd d0, d0, d1
364 ; NONEON-NOSVE-LABEL: fadda_v1f64:
365 ; NONEON-NOSVE: // %bb.0:
366 ; NONEON-NOSVE-NEXT: fadd d0, d0, d1
367 ; NONEON-NOSVE-NEXT: ret
368 %res = call double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a)
372 define double @fadda_v2f64(double %start, <2 x double> %a) {
373 ; CHECK-LABEL: fadda_v2f64:
375 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
376 ; CHECK-NEXT: fadd d0, d0, d1
377 ; CHECK-NEXT: mov z1.d, z1.d[1]
378 ; CHECK-NEXT: fadd d0, d0, d1
381 ; NONEON-NOSVE-LABEL: fadda_v2f64:
382 ; NONEON-NOSVE: // %bb.0:
383 ; NONEON-NOSVE-NEXT: str q1, [sp, #-16]!
384 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
385 ; NONEON-NOSVE-NEXT: ldp d2, d1, [sp], #16
386 ; NONEON-NOSVE-NEXT: fadd d0, d0, d2
387 ; NONEON-NOSVE-NEXT: fadd d0, d0, d1
388 ; NONEON-NOSVE-NEXT: ret
389 %res = call double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)
393 define double @fadda_v4f64(double %start, ptr %a) {
394 ; CHECK-LABEL: fadda_v4f64:
396 ; CHECK-NEXT: ldr q1, [x0]
397 ; CHECK-NEXT: fadd d0, d0, d1
398 ; CHECK-NEXT: mov z1.d, z1.d[1]
399 ; CHECK-NEXT: fadd d0, d0, d1
400 ; CHECK-NEXT: ldr q1, [x0, #16]
401 ; CHECK-NEXT: fadd d0, d0, d1
402 ; CHECK-NEXT: mov z1.d, z1.d[1]
403 ; CHECK-NEXT: fadd d0, d0, d1
406 ; NONEON-NOSVE-LABEL: fadda_v4f64:
407 ; NONEON-NOSVE: // %bb.0:
408 ; NONEON-NOSVE-NEXT: sub sp, sp, #32
409 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
410 ; NONEON-NOSVE-NEXT: ldr q1, [x0, #16]
411 ; NONEON-NOSVE-NEXT: str q1, [sp, #16]
412 ; NONEON-NOSVE-NEXT: ldr q1, [x0]
413 ; NONEON-NOSVE-NEXT: str q1, [sp]
414 ; NONEON-NOSVE-NEXT: ldp d2, d1, [sp]
415 ; NONEON-NOSVE-NEXT: fadd d0, d0, d2
416 ; NONEON-NOSVE-NEXT: fadd d0, d0, d1
417 ; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #16]
418 ; NONEON-NOSVE-NEXT: fadd d0, d0, d2
419 ; NONEON-NOSVE-NEXT: fadd d0, d0, d1
420 ; NONEON-NOSVE-NEXT: add sp, sp, #32
421 ; NONEON-NOSVE-NEXT: ret
422 %op = load <4 x double>, ptr %a
423 %res = call double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
431 define half @faddv_v4f16(half %start, <4 x half> %a) {
432 ; CHECK-LABEL: faddv_v4f16:
434 ; CHECK-NEXT: ptrue p0.h, vl4
435 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
436 ; CHECK-NEXT: faddv h1, p0, z1.h
437 ; CHECK-NEXT: fadd h0, h0, h1
440 ; NONEON-NOSVE-LABEL: faddv_v4f16:
441 ; NONEON-NOSVE: // %bb.0:
442 ; NONEON-NOSVE-NEXT: sub sp, sp, #16
443 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
444 ; NONEON-NOSVE-NEXT: str d1, [sp, #8]
445 ; NONEON-NOSVE-NEXT: fcvt s0, h0
446 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
447 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #10]
448 ; NONEON-NOSVE-NEXT: fcvt s2, h2
449 ; NONEON-NOSVE-NEXT: fcvt s1, h1
450 ; NONEON-NOSVE-NEXT: fadd s1, s1, s2
451 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #12]
452 ; NONEON-NOSVE-NEXT: fcvt s2, h2
453 ; NONEON-NOSVE-NEXT: fcvt h1, s1
454 ; NONEON-NOSVE-NEXT: fcvt s1, h1
455 ; NONEON-NOSVE-NEXT: fadd s1, s1, s2
456 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #14]
457 ; NONEON-NOSVE-NEXT: fcvt s2, h2
458 ; NONEON-NOSVE-NEXT: fcvt h1, s1
459 ; NONEON-NOSVE-NEXT: fcvt s1, h1
460 ; NONEON-NOSVE-NEXT: fadd s1, s1, s2
461 ; NONEON-NOSVE-NEXT: fcvt h1, s1
462 ; NONEON-NOSVE-NEXT: fcvt s1, h1
463 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
464 ; NONEON-NOSVE-NEXT: fcvt h0, s0
465 ; NONEON-NOSVE-NEXT: add sp, sp, #16
466 ; NONEON-NOSVE-NEXT: ret
467 %res = call fast half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
471 define half @faddv_v8f16(half %start, <8 x half> %a) {
472 ; CHECK-LABEL: faddv_v8f16:
474 ; CHECK-NEXT: ptrue p0.h, vl8
475 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
476 ; CHECK-NEXT: faddv h1, p0, z1.h
477 ; CHECK-NEXT: fadd h0, h0, h1
480 ; NONEON-NOSVE-LABEL: faddv_v8f16:
481 ; NONEON-NOSVE: // %bb.0:
482 ; NONEON-NOSVE-NEXT: str q1, [sp, #-16]!
483 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
484 ; NONEON-NOSVE-NEXT: ldr h1, [sp]
485 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #2]
486 ; NONEON-NOSVE-NEXT: fcvt s0, h0
487 ; NONEON-NOSVE-NEXT: fcvt s2, h2
488 ; NONEON-NOSVE-NEXT: fcvt s1, h1
489 ; NONEON-NOSVE-NEXT: fadd s1, s1, s2
490 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #4]
491 ; NONEON-NOSVE-NEXT: fcvt s2, h2
492 ; NONEON-NOSVE-NEXT: fcvt h1, s1
493 ; NONEON-NOSVE-NEXT: fcvt s1, h1
494 ; NONEON-NOSVE-NEXT: fadd s1, s1, s2
495 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #6]
496 ; NONEON-NOSVE-NEXT: fcvt s2, h2
497 ; NONEON-NOSVE-NEXT: fcvt h1, s1
498 ; NONEON-NOSVE-NEXT: fcvt s1, h1
499 ; NONEON-NOSVE-NEXT: fadd s1, s1, s2
500 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #8]
501 ; NONEON-NOSVE-NEXT: fcvt s2, h2
502 ; NONEON-NOSVE-NEXT: fcvt h1, s1
503 ; NONEON-NOSVE-NEXT: fcvt s1, h1
504 ; NONEON-NOSVE-NEXT: fadd s1, s1, s2
505 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #10]
506 ; NONEON-NOSVE-NEXT: fcvt s2, h2
507 ; NONEON-NOSVE-NEXT: fcvt h1, s1
508 ; NONEON-NOSVE-NEXT: fcvt s1, h1
509 ; NONEON-NOSVE-NEXT: fadd s1, s1, s2
510 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #12]
511 ; NONEON-NOSVE-NEXT: fcvt s2, h2
512 ; NONEON-NOSVE-NEXT: fcvt h1, s1
513 ; NONEON-NOSVE-NEXT: fcvt s1, h1
514 ; NONEON-NOSVE-NEXT: fadd s1, s1, s2
515 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #14]
516 ; NONEON-NOSVE-NEXT: fcvt s2, h2
517 ; NONEON-NOSVE-NEXT: fcvt h1, s1
518 ; NONEON-NOSVE-NEXT: fcvt s1, h1
519 ; NONEON-NOSVE-NEXT: fadd s1, s1, s2
520 ; NONEON-NOSVE-NEXT: fcvt h1, s1
521 ; NONEON-NOSVE-NEXT: fcvt s1, h1
522 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
523 ; NONEON-NOSVE-NEXT: fcvt h0, s0
524 ; NONEON-NOSVE-NEXT: add sp, sp, #16
525 ; NONEON-NOSVE-NEXT: ret
526 %res = call fast half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)
530 define half @faddv_v16f16(half %start, ptr %a) {
531 ; CHECK-LABEL: faddv_v16f16:
533 ; CHECK-NEXT: ldp q2, q1, [x0]
534 ; CHECK-NEXT: ptrue p0.h, vl8
535 ; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z2.h
536 ; CHECK-NEXT: faddv h1, p0, z1.h
537 ; CHECK-NEXT: fadd h0, h0, h1
540 ; NONEON-NOSVE-LABEL: faddv_v16f16:
541 ; NONEON-NOSVE: // %bb.0:
542 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
543 ; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #-32]!
544 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
545 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #18]
546 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #2]
547 ; NONEON-NOSVE-NEXT: fcvt s0, h0
548 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #16]
549 ; NONEON-NOSVE-NEXT: ldr h4, [sp]
550 ; NONEON-NOSVE-NEXT: fcvt s1, h1
551 ; NONEON-NOSVE-NEXT: fcvt s2, h2
552 ; NONEON-NOSVE-NEXT: fcvt s3, h3
553 ; NONEON-NOSVE-NEXT: fcvt s4, h4
554 ; NONEON-NOSVE-NEXT: fadd s1, s2, s1
555 ; NONEON-NOSVE-NEXT: fadd s2, s4, s3
556 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #20]
557 ; NONEON-NOSVE-NEXT: ldr h4, [sp, #4]
558 ; NONEON-NOSVE-NEXT: fcvt s3, h3
559 ; NONEON-NOSVE-NEXT: fcvt s4, h4
560 ; NONEON-NOSVE-NEXT: fcvt h1, s1
561 ; NONEON-NOSVE-NEXT: fcvt h2, s2
562 ; NONEON-NOSVE-NEXT: fadd s3, s4, s3
563 ; NONEON-NOSVE-NEXT: ldr h4, [sp, #6]
564 ; NONEON-NOSVE-NEXT: fcvt s1, h1
565 ; NONEON-NOSVE-NEXT: fcvt s2, h2
566 ; NONEON-NOSVE-NEXT: fcvt s4, h4
567 ; NONEON-NOSVE-NEXT: fadd s1, s2, s1
568 ; NONEON-NOSVE-NEXT: fcvt h2, s3
569 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #22]
570 ; NONEON-NOSVE-NEXT: fcvt s3, h3
571 ; NONEON-NOSVE-NEXT: fcvt h1, s1
572 ; NONEON-NOSVE-NEXT: fcvt s2, h2
573 ; NONEON-NOSVE-NEXT: fadd s3, s4, s3
574 ; NONEON-NOSVE-NEXT: ldr h4, [sp, #8]
575 ; NONEON-NOSVE-NEXT: fcvt s1, h1
576 ; NONEON-NOSVE-NEXT: fcvt s4, h4
577 ; NONEON-NOSVE-NEXT: fadd s1, s1, s2
578 ; NONEON-NOSVE-NEXT: fcvt h2, s3
579 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #24]
580 ; NONEON-NOSVE-NEXT: fcvt s3, h3
581 ; NONEON-NOSVE-NEXT: fcvt h1, s1
582 ; NONEON-NOSVE-NEXT: fcvt s2, h2
583 ; NONEON-NOSVE-NEXT: fadd s3, s4, s3
584 ; NONEON-NOSVE-NEXT: ldr h4, [sp, #10]
585 ; NONEON-NOSVE-NEXT: fcvt s1, h1
586 ; NONEON-NOSVE-NEXT: fcvt s4, h4
587 ; NONEON-NOSVE-NEXT: fadd s1, s1, s2
588 ; NONEON-NOSVE-NEXT: fcvt h2, s3
589 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #26]
590 ; NONEON-NOSVE-NEXT: fcvt s3, h3
591 ; NONEON-NOSVE-NEXT: fcvt h1, s1
592 ; NONEON-NOSVE-NEXT: fcvt s2, h2
593 ; NONEON-NOSVE-NEXT: fadd s3, s4, s3
594 ; NONEON-NOSVE-NEXT: ldr h4, [sp, #12]
595 ; NONEON-NOSVE-NEXT: fcvt s1, h1
596 ; NONEON-NOSVE-NEXT: fcvt s4, h4
597 ; NONEON-NOSVE-NEXT: fadd s1, s1, s2
598 ; NONEON-NOSVE-NEXT: fcvt h2, s3
599 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #28]
600 ; NONEON-NOSVE-NEXT: fcvt s3, h3
601 ; NONEON-NOSVE-NEXT: fcvt h1, s1
602 ; NONEON-NOSVE-NEXT: fcvt s2, h2
603 ; NONEON-NOSVE-NEXT: fcvt s1, h1
604 ; NONEON-NOSVE-NEXT: fadd s1, s1, s2
605 ; NONEON-NOSVE-NEXT: fadd s2, s4, s3
606 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #30]
607 ; NONEON-NOSVE-NEXT: ldr h4, [sp, #14]
608 ; NONEON-NOSVE-NEXT: fcvt s3, h3
609 ; NONEON-NOSVE-NEXT: fcvt s4, h4
610 ; NONEON-NOSVE-NEXT: fcvt h1, s1
611 ; NONEON-NOSVE-NEXT: fcvt h2, s2
612 ; NONEON-NOSVE-NEXT: fcvt s1, h1
613 ; NONEON-NOSVE-NEXT: fcvt s2, h2
614 ; NONEON-NOSVE-NEXT: fadd s1, s1, s2
615 ; NONEON-NOSVE-NEXT: fadd s2, s4, s3
616 ; NONEON-NOSVE-NEXT: fcvt h1, s1
617 ; NONEON-NOSVE-NEXT: fcvt h2, s2
618 ; NONEON-NOSVE-NEXT: fcvt s1, h1
619 ; NONEON-NOSVE-NEXT: fcvt s2, h2
620 ; NONEON-NOSVE-NEXT: fadd s1, s1, s2
621 ; NONEON-NOSVE-NEXT: fcvt h1, s1
622 ; NONEON-NOSVE-NEXT: fcvt s1, h1
623 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
624 ; NONEON-NOSVE-NEXT: fcvt h0, s0
625 ; NONEON-NOSVE-NEXT: add sp, sp, #32
626 ; NONEON-NOSVE-NEXT: ret
627 %op = load <16 x half>, ptr %a
628 %res = call fast half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
632 define float @faddv_v2f32(float %start, <2 x float> %a) {
633 ; CHECK-LABEL: faddv_v2f32:
635 ; CHECK-NEXT: ptrue p0.s, vl2
636 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
637 ; CHECK-NEXT: faddv s1, p0, z1.s
638 ; CHECK-NEXT: fadd s0, s0, s1
641 ; NONEON-NOSVE-LABEL: faddv_v2f32:
642 ; NONEON-NOSVE: // %bb.0:
643 ; NONEON-NOSVE-NEXT: sub sp, sp, #16
644 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
645 ; NONEON-NOSVE-NEXT: str d1, [sp, #8]
646 ; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8]
647 ; NONEON-NOSVE-NEXT: fadd s1, s2, s1
648 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
649 ; NONEON-NOSVE-NEXT: add sp, sp, #16
650 ; NONEON-NOSVE-NEXT: ret
651 %res = call fast float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)
655 define float @faddv_v4f32(float %start, <4 x float> %a) {
656 ; CHECK-LABEL: faddv_v4f32:
658 ; CHECK-NEXT: ptrue p0.s, vl4
659 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
660 ; CHECK-NEXT: faddv s1, p0, z1.s
661 ; CHECK-NEXT: fadd s0, s0, s1
664 ; NONEON-NOSVE-LABEL: faddv_v4f32:
665 ; NONEON-NOSVE: // %bb.0:
666 ; NONEON-NOSVE-NEXT: str q1, [sp, #-16]!
667 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
668 ; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8]
669 ; NONEON-NOSVE-NEXT: ldp s4, s3, [sp], #16
670 ; NONEON-NOSVE-NEXT: fadd s3, s4, s3
671 ; NONEON-NOSVE-NEXT: fadd s1, s2, s1
672 ; NONEON-NOSVE-NEXT: fadd s1, s3, s1
673 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
674 ; NONEON-NOSVE-NEXT: ret
675 %res = call fast float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)
679 define float @faddv_v8f32(float %start, ptr %a) {
680 ; CHECK-LABEL: faddv_v8f32:
682 ; CHECK-NEXT: ldp q2, q1, [x0]
683 ; CHECK-NEXT: ptrue p0.s, vl4
684 ; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z2.s
685 ; CHECK-NEXT: faddv s1, p0, z1.s
686 ; CHECK-NEXT: fadd s0, s0, s1
689 ; NONEON-NOSVE-LABEL: faddv_v8f32:
690 ; NONEON-NOSVE: // %bb.0:
691 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
692 ; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #-32]!
693 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
694 ; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #16]
695 ; NONEON-NOSVE-NEXT: ldp s4, s3, [sp]
696 ; NONEON-NOSVE-NEXT: ldp s5, s6, [sp, #24]
697 ; NONEON-NOSVE-NEXT: ldp s7, s16, [sp, #8]
698 ; NONEON-NOSVE-NEXT: fadd s1, s3, s1
699 ; NONEON-NOSVE-NEXT: fadd s2, s4, s2
700 ; NONEON-NOSVE-NEXT: fadd s3, s7, s5
701 ; NONEON-NOSVE-NEXT: fadd s4, s16, s6
702 ; NONEON-NOSVE-NEXT: fadd s1, s2, s1
703 ; NONEON-NOSVE-NEXT: fadd s2, s3, s4
704 ; NONEON-NOSVE-NEXT: fadd s1, s1, s2
705 ; NONEON-NOSVE-NEXT: fadd s0, s0, s1
706 ; NONEON-NOSVE-NEXT: add sp, sp, #32
707 ; NONEON-NOSVE-NEXT: ret
708 %op = load <8 x float>, ptr %a
709 %res = call fast float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
713 define double @faddv_v1f64(double %start, <1 x double> %a) {
714 ; CHECK-LABEL: faddv_v1f64:
716 ; CHECK-NEXT: fadd d0, d0, d1
719 ; NONEON-NOSVE-LABEL: faddv_v1f64:
720 ; NONEON-NOSVE: // %bb.0:
721 ; NONEON-NOSVE-NEXT: fadd d0, d0, d1
722 ; NONEON-NOSVE-NEXT: ret
723 %res = call fast double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a)
727 define double @faddv_v2f64(double %start, <2 x double> %a) {
728 ; CHECK-LABEL: faddv_v2f64:
730 ; CHECK-NEXT: ptrue p0.d, vl2
731 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
732 ; CHECK-NEXT: faddv d1, p0, z1.d
733 ; CHECK-NEXT: fadd d0, d0, d1
736 ; NONEON-NOSVE-LABEL: faddv_v2f64:
737 ; NONEON-NOSVE: // %bb.0:
738 ; NONEON-NOSVE-NEXT: str q1, [sp, #-16]!
739 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
740 ; NONEON-NOSVE-NEXT: ldp d2, d1, [sp], #16
741 ; NONEON-NOSVE-NEXT: fadd d1, d2, d1
742 ; NONEON-NOSVE-NEXT: fadd d0, d0, d1
743 ; NONEON-NOSVE-NEXT: ret
744 %res = call fast double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)
748 define double @faddv_v4f64(double %start, ptr %a) {
749 ; CHECK-LABEL: faddv_v4f64:
751 ; CHECK-NEXT: ldp q2, q1, [x0]
752 ; CHECK-NEXT: ptrue p0.d, vl2
753 ; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z2.d
754 ; CHECK-NEXT: faddv d1, p0, z1.d
755 ; CHECK-NEXT: fadd d0, d0, d1
758 ; NONEON-NOSVE-LABEL: faddv_v4f64:
759 ; NONEON-NOSVE: // %bb.0:
760 ; NONEON-NOSVE-NEXT: ldp q2, q1, [x0]
761 ; NONEON-NOSVE-NEXT: stp q2, q1, [sp, #-32]!
762 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
763 ; NONEON-NOSVE-NEXT: ldp d2, d1, [sp, #16]
764 ; NONEON-NOSVE-NEXT: ldp d4, d3, [sp], #32
765 ; NONEON-NOSVE-NEXT: fadd d1, d3, d1
766 ; NONEON-NOSVE-NEXT: fadd d2, d4, d2
767 ; NONEON-NOSVE-NEXT: fadd d1, d2, d1
768 ; NONEON-NOSVE-NEXT: fadd d0, d0, d1
769 ; NONEON-NOSVE-NEXT: ret
770 %op = load <4 x double>, ptr %a
771 %res = call fast double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
779 define half @fmaxv_v4f16(<4 x half> %a) {
780 ; CHECK-LABEL: fmaxv_v4f16:
782 ; CHECK-NEXT: ptrue p0.h, vl4
783 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
784 ; CHECK-NEXT: fmaxnmv h0, p0, z0.h
785 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
788 ; NONEON-NOSVE-LABEL: fmaxv_v4f16:
789 ; NONEON-NOSVE: // %bb.0:
790 ; NONEON-NOSVE-NEXT: sub sp, sp, #16
791 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
792 ; NONEON-NOSVE-NEXT: str d0, [sp, #8]
793 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
794 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
795 ; NONEON-NOSVE-NEXT: fcvt s1, h1
796 ; NONEON-NOSVE-NEXT: fcvt s0, h0
797 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
798 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
799 ; NONEON-NOSVE-NEXT: fcvt s1, h1
800 ; NONEON-NOSVE-NEXT: fcvt h0, s0
801 ; NONEON-NOSVE-NEXT: fcvt s0, h0
802 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
803 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
804 ; NONEON-NOSVE-NEXT: fcvt s1, h1
805 ; NONEON-NOSVE-NEXT: fcvt h0, s0
806 ; NONEON-NOSVE-NEXT: fcvt s0, h0
807 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
808 ; NONEON-NOSVE-NEXT: fcvt h0, s0
809 ; NONEON-NOSVE-NEXT: add sp, sp, #16
810 ; NONEON-NOSVE-NEXT: ret
811 %res = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a)
815 define half @fmaxv_v8f16(<8 x half> %a) {
816 ; CHECK-LABEL: fmaxv_v8f16:
818 ; CHECK-NEXT: ptrue p0.h, vl8
819 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
820 ; CHECK-NEXT: fmaxnmv h0, p0, z0.h
821 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
824 ; NONEON-NOSVE-LABEL: fmaxv_v8f16:
825 ; NONEON-NOSVE: // %bb.0:
826 ; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
827 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
828 ; NONEON-NOSVE-NEXT: ldr h0, [sp]
829 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
830 ; NONEON-NOSVE-NEXT: fcvt s1, h1
831 ; NONEON-NOSVE-NEXT: fcvt s0, h0
832 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
833 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
834 ; NONEON-NOSVE-NEXT: fcvt s1, h1
835 ; NONEON-NOSVE-NEXT: fcvt h0, s0
836 ; NONEON-NOSVE-NEXT: fcvt s0, h0
837 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
838 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
839 ; NONEON-NOSVE-NEXT: fcvt s1, h1
840 ; NONEON-NOSVE-NEXT: fcvt h0, s0
841 ; NONEON-NOSVE-NEXT: fcvt s0, h0
842 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
843 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
844 ; NONEON-NOSVE-NEXT: fcvt s1, h1
845 ; NONEON-NOSVE-NEXT: fcvt h0, s0
846 ; NONEON-NOSVE-NEXT: fcvt s0, h0
847 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
848 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
849 ; NONEON-NOSVE-NEXT: fcvt s1, h1
850 ; NONEON-NOSVE-NEXT: fcvt h0, s0
851 ; NONEON-NOSVE-NEXT: fcvt s0, h0
852 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
853 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
854 ; NONEON-NOSVE-NEXT: fcvt s1, h1
855 ; NONEON-NOSVE-NEXT: fcvt h0, s0
856 ; NONEON-NOSVE-NEXT: fcvt s0, h0
857 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
858 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
859 ; NONEON-NOSVE-NEXT: fcvt s1, h1
860 ; NONEON-NOSVE-NEXT: fcvt h0, s0
861 ; NONEON-NOSVE-NEXT: fcvt s0, h0
862 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
863 ; NONEON-NOSVE-NEXT: fcvt h0, s0
864 ; NONEON-NOSVE-NEXT: add sp, sp, #16
865 ; NONEON-NOSVE-NEXT: ret
866 %res = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %a)
870 define half @fmaxv_v16f16(ptr %a) {
871 ; CHECK-LABEL: fmaxv_v16f16:
873 ; CHECK-NEXT: ldp q1, q0, [x0]
874 ; CHECK-NEXT: ptrue p0.h, vl8
875 ; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
876 ; CHECK-NEXT: fmaxnmv h0, p0, z0.h
877 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
880 ; NONEON-NOSVE-LABEL: fmaxv_v16f16:
881 ; NONEON-NOSVE: // %bb.0:
882 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
883 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]!
884 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
885 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
886 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
887 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #16]
888 ; NONEON-NOSVE-NEXT: ldr h3, [sp]
889 ; NONEON-NOSVE-NEXT: fcvt s0, h0
890 ; NONEON-NOSVE-NEXT: fcvt s1, h1
891 ; NONEON-NOSVE-NEXT: fcvt s2, h2
892 ; NONEON-NOSVE-NEXT: fcvt s3, h3
893 ; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0
894 ; NONEON-NOSVE-NEXT: fmaxnm s1, s3, s2
895 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #20]
896 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #4]
897 ; NONEON-NOSVE-NEXT: fcvt s2, h2
898 ; NONEON-NOSVE-NEXT: fcvt s3, h3
899 ; NONEON-NOSVE-NEXT: fcvt h0, s0
900 ; NONEON-NOSVE-NEXT: fcvt h1, s1
901 ; NONEON-NOSVE-NEXT: fmaxnm s2, s3, s2
902 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #6]
903 ; NONEON-NOSVE-NEXT: fcvt s0, h0
904 ; NONEON-NOSVE-NEXT: fcvt s1, h1
905 ; NONEON-NOSVE-NEXT: fcvt s3, h3
906 ; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0
907 ; NONEON-NOSVE-NEXT: fcvt h1, s2
908 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #22]
909 ; NONEON-NOSVE-NEXT: fcvt s2, h2
910 ; NONEON-NOSVE-NEXT: fcvt h0, s0
911 ; NONEON-NOSVE-NEXT: fcvt s1, h1
912 ; NONEON-NOSVE-NEXT: fmaxnm s2, s3, s2
913 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #8]
914 ; NONEON-NOSVE-NEXT: fcvt s0, h0
915 ; NONEON-NOSVE-NEXT: fcvt s3, h3
916 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
917 ; NONEON-NOSVE-NEXT: fcvt h1, s2
918 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #24]
919 ; NONEON-NOSVE-NEXT: fcvt s2, h2
920 ; NONEON-NOSVE-NEXT: fcvt h0, s0
921 ; NONEON-NOSVE-NEXT: fcvt s1, h1
922 ; NONEON-NOSVE-NEXT: fmaxnm s2, s3, s2
923 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #10]
924 ; NONEON-NOSVE-NEXT: fcvt s0, h0
925 ; NONEON-NOSVE-NEXT: fcvt s3, h3
926 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
927 ; NONEON-NOSVE-NEXT: fcvt h1, s2
928 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #26]
929 ; NONEON-NOSVE-NEXT: fcvt s2, h2
930 ; NONEON-NOSVE-NEXT: fcvt h0, s0
931 ; NONEON-NOSVE-NEXT: fcvt s1, h1
932 ; NONEON-NOSVE-NEXT: fmaxnm s2, s3, s2
933 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #12]
934 ; NONEON-NOSVE-NEXT: fcvt s0, h0
935 ; NONEON-NOSVE-NEXT: fcvt s3, h3
936 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
937 ; NONEON-NOSVE-NEXT: fcvt h1, s2
938 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #28]
939 ; NONEON-NOSVE-NEXT: fcvt s2, h2
940 ; NONEON-NOSVE-NEXT: fcvt h0, s0
941 ; NONEON-NOSVE-NEXT: fcvt s1, h1
942 ; NONEON-NOSVE-NEXT: fcvt s0, h0
943 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
944 ; NONEON-NOSVE-NEXT: fmaxnm s1, s3, s2
945 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #30]
946 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #14]
947 ; NONEON-NOSVE-NEXT: fcvt s2, h2
948 ; NONEON-NOSVE-NEXT: fcvt s3, h3
949 ; NONEON-NOSVE-NEXT: fcvt h0, s0
950 ; NONEON-NOSVE-NEXT: fcvt h1, s1
951 ; NONEON-NOSVE-NEXT: fcvt s0, h0
952 ; NONEON-NOSVE-NEXT: fcvt s1, h1
953 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
954 ; NONEON-NOSVE-NEXT: fmaxnm s1, s3, s2
955 ; NONEON-NOSVE-NEXT: fcvt h0, s0
956 ; NONEON-NOSVE-NEXT: fcvt h1, s1
957 ; NONEON-NOSVE-NEXT: fcvt s0, h0
958 ; NONEON-NOSVE-NEXT: fcvt s1, h1
959 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
960 ; NONEON-NOSVE-NEXT: fcvt h0, s0
961 ; NONEON-NOSVE-NEXT: add sp, sp, #32
962 ; NONEON-NOSVE-NEXT: ret
963 %op = load <16 x half>, ptr %a
964 %res = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %op)
968 define float @fmaxv_v2f32(<2 x float> %a) {
969 ; CHECK-LABEL: fmaxv_v2f32:
971 ; CHECK-NEXT: ptrue p0.s, vl2
972 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
973 ; CHECK-NEXT: fmaxnmv s0, p0, z0.s
974 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
977 ; NONEON-NOSVE-LABEL: fmaxv_v2f32:
978 ; NONEON-NOSVE: // %bb.0:
979 ; NONEON-NOSVE-NEXT: sub sp, sp, #16
980 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
981 ; NONEON-NOSVE-NEXT: str d0, [sp, #8]
982 ; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #8]
983 ; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0
984 ; NONEON-NOSVE-NEXT: add sp, sp, #16
985 ; NONEON-NOSVE-NEXT: ret
986 %res = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a)
990 define float @fmaxv_v4f32(<4 x float> %a) {
991 ; CHECK-LABEL: fmaxv_v4f32:
993 ; CHECK-NEXT: ptrue p0.s, vl4
994 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
995 ; CHECK-NEXT: fmaxnmv s0, p0, z0.s
996 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
999 ; NONEON-NOSVE-LABEL: fmaxv_v4f32:
1000 ; NONEON-NOSVE: // %bb.0:
1001 ; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
1002 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
1003 ; NONEON-NOSVE-NEXT: ldp s1, s0, [sp]
1004 ; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0
1005 ; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8]
1006 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s2
1007 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
1008 ; NONEON-NOSVE-NEXT: add sp, sp, #16
1009 ; NONEON-NOSVE-NEXT: ret
1010 %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a)
1014 define float @fmaxv_v8f32(ptr %a) {
1015 ; CHECK-LABEL: fmaxv_v8f32:
1017 ; CHECK-NEXT: ldp q1, q0, [x0]
1018 ; CHECK-NEXT: ptrue p0.s, vl4
1019 ; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
1020 ; CHECK-NEXT: fmaxnmv s0, p0, z0.s
1021 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
1024 ; NONEON-NOSVE-LABEL: fmaxv_v8f32:
1025 ; NONEON-NOSVE: // %bb.0:
1026 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
1027 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]!
1028 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
1029 ; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #16]
1030 ; NONEON-NOSVE-NEXT: ldp s3, s2, [sp]
1031 ; NONEON-NOSVE-NEXT: fmaxnm s0, s2, s0
1032 ; NONEON-NOSVE-NEXT: fmaxnm s1, s3, s1
1033 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8]
1034 ; NONEON-NOSVE-NEXT: fmaxnm s0, s1, s0
1035 ; NONEON-NOSVE-NEXT: ldp s3, s1, [sp, #24]
1036 ; NONEON-NOSVE-NEXT: fmaxnm s2, s2, s3
1037 ; NONEON-NOSVE-NEXT: fmaxnm s1, s4, s1
1038 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s2
1039 ; NONEON-NOSVE-NEXT: fmaxnm s0, s0, s1
1040 ; NONEON-NOSVE-NEXT: add sp, sp, #32
1041 ; NONEON-NOSVE-NEXT: ret
1042 %op = load <8 x float>, ptr %a
1043 %res = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %op)
1047 define double @fmaxv_v1f64(<1 x double> %a) {
1048 ; CHECK-LABEL: fmaxv_v1f64:
1052 ; NONEON-NOSVE-LABEL: fmaxv_v1f64:
1053 ; NONEON-NOSVE: // %bb.0:
1054 ; NONEON-NOSVE-NEXT: ret
1055 %res = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
1059 define double @fmaxv_v2f64(<2 x double> %a) {
1060 ; CHECK-LABEL: fmaxv_v2f64:
1062 ; CHECK-NEXT: ptrue p0.d, vl2
1063 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1064 ; CHECK-NEXT: fmaxnmv d0, p0, z0.d
1065 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1068 ; NONEON-NOSVE-LABEL: fmaxv_v2f64:
1069 ; NONEON-NOSVE: // %bb.0:
1070 ; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
1071 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
1072 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp], #16
1073 ; NONEON-NOSVE-NEXT: fmaxnm d0, d1, d0
1074 ; NONEON-NOSVE-NEXT: ret
1075 %res = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a)
1079 define double @fmaxv_v4f64(ptr %a) {
1080 ; CHECK-LABEL: fmaxv_v4f64:
1082 ; CHECK-NEXT: ldp q1, q0, [x0]
1083 ; CHECK-NEXT: ptrue p0.d, vl2
1084 ; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
1085 ; CHECK-NEXT: fmaxnmv d0, p0, z0.d
1086 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1089 ; NONEON-NOSVE-LABEL: fmaxv_v4f64:
1090 ; NONEON-NOSVE: // %bb.0:
1091 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
1092 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]!
1093 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
1094 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16]
1095 ; NONEON-NOSVE-NEXT: ldp d3, d2, [sp], #32
1096 ; NONEON-NOSVE-NEXT: fmaxnm d0, d2, d0
1097 ; NONEON-NOSVE-NEXT: fmaxnm d1, d3, d1
1098 ; NONEON-NOSVE-NEXT: fmaxnm d0, d1, d0
1099 ; NONEON-NOSVE-NEXT: ret
1100 %op = load <4 x double>, ptr %a
1101 %res = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %op)
1109 define half @fminv_v4f16(<4 x half> %a) {
1110 ; CHECK-LABEL: fminv_v4f16:
1112 ; CHECK-NEXT: ptrue p0.h, vl4
1113 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1114 ; CHECK-NEXT: fminnmv h0, p0, z0.h
1115 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
1118 ; NONEON-NOSVE-LABEL: fminv_v4f16:
1119 ; NONEON-NOSVE: // %bb.0:
1120 ; NONEON-NOSVE-NEXT: sub sp, sp, #16
1121 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
1122 ; NONEON-NOSVE-NEXT: str d0, [sp, #8]
1123 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
1124 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
1125 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1126 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1127 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1128 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
1129 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1130 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1131 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1132 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1133 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
1134 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1135 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1136 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1137 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1138 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1139 ; NONEON-NOSVE-NEXT: add sp, sp, #16
1140 ; NONEON-NOSVE-NEXT: ret
1141 %res = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a)
1145 define half @fminv_v8f16(<8 x half> %a) {
1146 ; CHECK-LABEL: fminv_v8f16:
1148 ; CHECK-NEXT: ptrue p0.h, vl8
1149 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1150 ; CHECK-NEXT: fminnmv h0, p0, z0.h
1151 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
1154 ; NONEON-NOSVE-LABEL: fminv_v8f16:
1155 ; NONEON-NOSVE: // %bb.0:
1156 ; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
1157 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
1158 ; NONEON-NOSVE-NEXT: ldr h0, [sp]
1159 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
1160 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1161 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1162 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1163 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
1164 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1165 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1166 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1167 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1168 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
1169 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1170 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1171 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1172 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1173 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
1174 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1175 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1176 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1177 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1178 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
1179 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1180 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1181 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1182 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1183 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
1184 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1185 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1186 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1187 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1188 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
1189 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1190 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1191 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1192 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1193 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1194 ; NONEON-NOSVE-NEXT: add sp, sp, #16
1195 ; NONEON-NOSVE-NEXT: ret
1196 %res = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %a)
1200 define half @fminv_v16f16(ptr %a) {
1201 ; CHECK-LABEL: fminv_v16f16:
1203 ; CHECK-NEXT: ldp q1, q0, [x0]
1204 ; CHECK-NEXT: ptrue p0.h, vl8
1205 ; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
1206 ; CHECK-NEXT: fminnmv h0, p0, z0.h
1207 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
1210 ; NONEON-NOSVE-LABEL: fminv_v16f16:
1211 ; NONEON-NOSVE: // %bb.0:
1212 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
1213 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]!
1214 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
1215 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
1216 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
1217 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #16]
1218 ; NONEON-NOSVE-NEXT: ldr h3, [sp]
1219 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1220 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1221 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1222 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1223 ; NONEON-NOSVE-NEXT: fminnm s0, s1, s0
1224 ; NONEON-NOSVE-NEXT: fminnm s1, s3, s2
1225 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #20]
1226 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #4]
1227 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1228 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1229 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1230 ; NONEON-NOSVE-NEXT: fcvt h1, s1
1231 ; NONEON-NOSVE-NEXT: fminnm s2, s3, s2
1232 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #6]
1233 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1234 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1235 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1236 ; NONEON-NOSVE-NEXT: fminnm s0, s1, s0
1237 ; NONEON-NOSVE-NEXT: fcvt h1, s2
1238 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #22]
1239 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1240 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1241 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1242 ; NONEON-NOSVE-NEXT: fminnm s2, s3, s2
1243 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #8]
1244 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1245 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1246 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1247 ; NONEON-NOSVE-NEXT: fcvt h1, s2
1248 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #24]
1249 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1250 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1251 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1252 ; NONEON-NOSVE-NEXT: fminnm s2, s3, s2
1253 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #10]
1254 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1255 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1256 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1257 ; NONEON-NOSVE-NEXT: fcvt h1, s2
1258 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #26]
1259 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1260 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1261 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1262 ; NONEON-NOSVE-NEXT: fminnm s2, s3, s2
1263 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #12]
1264 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1265 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1266 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1267 ; NONEON-NOSVE-NEXT: fcvt h1, s2
1268 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #28]
1269 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1270 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1271 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1272 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1273 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1274 ; NONEON-NOSVE-NEXT: fminnm s1, s3, s2
1275 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #30]
1276 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #14]
1277 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1278 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1279 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1280 ; NONEON-NOSVE-NEXT: fcvt h1, s1
1281 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1282 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1283 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1284 ; NONEON-NOSVE-NEXT: fminnm s1, s3, s2
1285 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1286 ; NONEON-NOSVE-NEXT: fcvt h1, s1
1287 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1288 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1289 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1290 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1291 ; NONEON-NOSVE-NEXT: add sp, sp, #32
1292 ; NONEON-NOSVE-NEXT: ret
1293 %op = load <16 x half>, ptr %a
1294 %res = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %op)
1298 define float @fminv_v2f32(<2 x float> %a) {
1299 ; CHECK-LABEL: fminv_v2f32:
1301 ; CHECK-NEXT: ptrue p0.s, vl2
1302 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1303 ; CHECK-NEXT: fminnmv s0, p0, z0.s
1304 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
1307 ; NONEON-NOSVE-LABEL: fminv_v2f32:
1308 ; NONEON-NOSVE: // %bb.0:
1309 ; NONEON-NOSVE-NEXT: sub sp, sp, #16
1310 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
1311 ; NONEON-NOSVE-NEXT: str d0, [sp, #8]
1312 ; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #8]
1313 ; NONEON-NOSVE-NEXT: fminnm s0, s1, s0
1314 ; NONEON-NOSVE-NEXT: add sp, sp, #16
1315 ; NONEON-NOSVE-NEXT: ret
1316 %res = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a)
1320 define float @fminv_v4f32(<4 x float> %a) {
1321 ; CHECK-LABEL: fminv_v4f32:
1323 ; CHECK-NEXT: ptrue p0.s, vl4
1324 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1325 ; CHECK-NEXT: fminnmv s0, p0, z0.s
1326 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
1329 ; NONEON-NOSVE-LABEL: fminv_v4f32:
1330 ; NONEON-NOSVE: // %bb.0:
1331 ; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
1332 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
1333 ; NONEON-NOSVE-NEXT: ldp s1, s0, [sp]
1334 ; NONEON-NOSVE-NEXT: fminnm s0, s1, s0
1335 ; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8]
1336 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s2
1337 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1338 ; NONEON-NOSVE-NEXT: add sp, sp, #16
1339 ; NONEON-NOSVE-NEXT: ret
1340 %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
1344 define float @fminv_v8f32(ptr %a) {
1345 ; CHECK-LABEL: fminv_v8f32:
1347 ; CHECK-NEXT: ldp q1, q0, [x0]
1348 ; CHECK-NEXT: ptrue p0.s, vl4
1349 ; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
1350 ; CHECK-NEXT: fminnmv s0, p0, z0.s
1351 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
1354 ; NONEON-NOSVE-LABEL: fminv_v8f32:
1355 ; NONEON-NOSVE: // %bb.0:
1356 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
1357 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]!
1358 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
1359 ; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #16]
1360 ; NONEON-NOSVE-NEXT: ldp s3, s2, [sp]
1361 ; NONEON-NOSVE-NEXT: fminnm s0, s2, s0
1362 ; NONEON-NOSVE-NEXT: fminnm s1, s3, s1
1363 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8]
1364 ; NONEON-NOSVE-NEXT: fminnm s0, s1, s0
1365 ; NONEON-NOSVE-NEXT: ldp s3, s1, [sp, #24]
1366 ; NONEON-NOSVE-NEXT: fminnm s2, s2, s3
1367 ; NONEON-NOSVE-NEXT: fminnm s1, s4, s1
1368 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s2
1369 ; NONEON-NOSVE-NEXT: fminnm s0, s0, s1
1370 ; NONEON-NOSVE-NEXT: add sp, sp, #32
1371 ; NONEON-NOSVE-NEXT: ret
1372 %op = load <8 x float>, ptr %a
1373 %res = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %op)
1377 define double @fminv_v1f64(<1 x double> %a) {
1378 ; CHECK-LABEL: fminv_v1f64:
1382 ; NONEON-NOSVE-LABEL: fminv_v1f64:
1383 ; NONEON-NOSVE: // %bb.0:
1384 ; NONEON-NOSVE-NEXT: ret
1385 %res = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a)
1389 define double @fminv_v2f64(<2 x double> %a) {
1390 ; CHECK-LABEL: fminv_v2f64:
1392 ; CHECK-NEXT: ptrue p0.d, vl2
1393 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1394 ; CHECK-NEXT: fminnmv d0, p0, z0.d
1395 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1398 ; NONEON-NOSVE-LABEL: fminv_v2f64:
1399 ; NONEON-NOSVE: // %bb.0:
1400 ; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
1401 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
1402 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp], #16
1403 ; NONEON-NOSVE-NEXT: fminnm d0, d1, d0
1404 ; NONEON-NOSVE-NEXT: ret
1405 %res = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
1409 define double @fminv_v4f64(ptr %a) {
1410 ; CHECK-LABEL: fminv_v4f64:
1412 ; CHECK-NEXT: ldp q1, q0, [x0]
1413 ; CHECK-NEXT: ptrue p0.d, vl2
1414 ; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
1415 ; CHECK-NEXT: fminnmv d0, p0, z0.d
1416 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1419 ; NONEON-NOSVE-LABEL: fminv_v4f64:
1420 ; NONEON-NOSVE: // %bb.0:
1421 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
1422 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]!
1423 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
1424 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16]
1425 ; NONEON-NOSVE-NEXT: ldp d3, d2, [sp], #32
1426 ; NONEON-NOSVE-NEXT: fminnm d0, d2, d0
1427 ; NONEON-NOSVE-NEXT: fminnm d1, d3, d1
1428 ; NONEON-NOSVE-NEXT: fminnm d0, d1, d0
1429 ; NONEON-NOSVE-NEXT: ret
1430 %op = load <4 x double>, ptr %a
1431 %res = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %op)
1439 define half @fmaximumv_v4f16(<4 x half> %a) {
1440 ; CHECK-LABEL: fmaximumv_v4f16:
1442 ; CHECK-NEXT: ptrue p0.h, vl4
1443 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1444 ; CHECK-NEXT: fmaxv h0, p0, z0.h
1445 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
1448 ; NONEON-NOSVE-LABEL: fmaximumv_v4f16:
1449 ; NONEON-NOSVE: // %bb.0:
1450 ; NONEON-NOSVE-NEXT: sub sp, sp, #16
1451 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
1452 ; NONEON-NOSVE-NEXT: str d0, [sp, #8]
1453 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
1454 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
1455 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1456 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1457 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1458 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
1459 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1460 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1461 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1462 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1463 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
1464 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1465 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1466 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1467 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1468 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1469 ; NONEON-NOSVE-NEXT: add sp, sp, #16
1470 ; NONEON-NOSVE-NEXT: ret
1471 %res = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> %a)
1475 define half @fmaximumv_v8f16(<8 x half> %a) {
1476 ; CHECK-LABEL: fmaximumv_v8f16:
1478 ; CHECK-NEXT: ptrue p0.h, vl8
1479 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1480 ; CHECK-NEXT: fmaxv h0, p0, z0.h
1481 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
1484 ; NONEON-NOSVE-LABEL: fmaximumv_v8f16:
1485 ; NONEON-NOSVE: // %bb.0:
1486 ; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
1487 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
1488 ; NONEON-NOSVE-NEXT: ldr h0, [sp]
1489 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
1490 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1491 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1492 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1493 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
1494 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1495 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1496 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1497 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1498 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
1499 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1500 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1501 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1502 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1503 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
1504 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1505 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1506 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1507 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1508 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
1509 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1510 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1511 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1512 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1513 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
1514 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1515 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1516 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1517 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1518 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
1519 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1520 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1521 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1522 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1523 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1524 ; NONEON-NOSVE-NEXT: add sp, sp, #16
1525 ; NONEON-NOSVE-NEXT: ret
1526 %res = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> %a)
1530 define half @fmaximumv_v16f16(ptr %a) {
1531 ; CHECK-LABEL: fmaximumv_v16f16:
1533 ; CHECK-NEXT: ldp q1, q0, [x0]
1534 ; CHECK-NEXT: ptrue p0.h, vl8
1535 ; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h
1536 ; CHECK-NEXT: fmaxv h0, p0, z0.h
1537 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
1540 ; NONEON-NOSVE-LABEL: fmaximumv_v16f16:
1541 ; NONEON-NOSVE: // %bb.0:
1542 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
1543 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]!
1544 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
1545 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
1546 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
1547 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #16]
1548 ; NONEON-NOSVE-NEXT: ldr h3, [sp]
1549 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1550 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1551 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1552 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1553 ; NONEON-NOSVE-NEXT: fmax s0, s1, s0
1554 ; NONEON-NOSVE-NEXT: fmax s1, s3, s2
1555 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #20]
1556 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #4]
1557 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1558 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1559 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1560 ; NONEON-NOSVE-NEXT: fcvt h1, s1
1561 ; NONEON-NOSVE-NEXT: fmax s2, s3, s2
1562 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #6]
1563 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1564 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1565 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1566 ; NONEON-NOSVE-NEXT: fmax s0, s1, s0
1567 ; NONEON-NOSVE-NEXT: fcvt h1, s2
1568 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #22]
1569 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1570 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1571 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1572 ; NONEON-NOSVE-NEXT: fmax s2, s3, s2
1573 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #8]
1574 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1575 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1576 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1577 ; NONEON-NOSVE-NEXT: fcvt h1, s2
1578 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #24]
1579 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1580 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1581 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1582 ; NONEON-NOSVE-NEXT: fmax s2, s3, s2
1583 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #10]
1584 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1585 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1586 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1587 ; NONEON-NOSVE-NEXT: fcvt h1, s2
1588 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #26]
1589 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1590 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1591 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1592 ; NONEON-NOSVE-NEXT: fmax s2, s3, s2
1593 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #12]
1594 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1595 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1596 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1597 ; NONEON-NOSVE-NEXT: fcvt h1, s2
1598 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #28]
1599 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1600 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1601 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1602 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1603 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1604 ; NONEON-NOSVE-NEXT: fmax s1, s3, s2
1605 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #30]
1606 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #14]
1607 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1608 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1609 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1610 ; NONEON-NOSVE-NEXT: fcvt h1, s1
1611 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1612 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1613 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1614 ; NONEON-NOSVE-NEXT: fmax s1, s3, s2
1615 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1616 ; NONEON-NOSVE-NEXT: fcvt h1, s1
1617 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1618 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1619 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1620 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1621 ; NONEON-NOSVE-NEXT: add sp, sp, #32
1622 ; NONEON-NOSVE-NEXT: ret
1623 %op = load <16 x half>, ptr %a
1624 %res = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> %op)
1628 define float @fmaximumv_v2f32(<2 x float> %a) {
1629 ; CHECK-LABEL: fmaximumv_v2f32:
1631 ; CHECK-NEXT: ptrue p0.s, vl2
1632 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1633 ; CHECK-NEXT: fmaxv s0, p0, z0.s
1634 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
1637 ; NONEON-NOSVE-LABEL: fmaximumv_v2f32:
1638 ; NONEON-NOSVE: // %bb.0:
1639 ; NONEON-NOSVE-NEXT: sub sp, sp, #16
1640 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
1641 ; NONEON-NOSVE-NEXT: str d0, [sp, #8]
1642 ; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #8]
1643 ; NONEON-NOSVE-NEXT: fmax s0, s1, s0
1644 ; NONEON-NOSVE-NEXT: add sp, sp, #16
1645 ; NONEON-NOSVE-NEXT: ret
1646 %res = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> %a)
1650 define float @fmaximumv_v4f32(<4 x float> %a) {
1651 ; CHECK-LABEL: fmaximumv_v4f32:
1653 ; CHECK-NEXT: ptrue p0.s, vl4
1654 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1655 ; CHECK-NEXT: fmaxv s0, p0, z0.s
1656 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
1659 ; NONEON-NOSVE-LABEL: fmaximumv_v4f32:
1660 ; NONEON-NOSVE: // %bb.0:
1661 ; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
1662 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
1663 ; NONEON-NOSVE-NEXT: ldp s1, s0, [sp]
1664 ; NONEON-NOSVE-NEXT: fmax s0, s1, s0
1665 ; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8]
1666 ; NONEON-NOSVE-NEXT: fmax s0, s0, s2
1667 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1668 ; NONEON-NOSVE-NEXT: add sp, sp, #16
1669 ; NONEON-NOSVE-NEXT: ret
1670 %res = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %a)
1674 define float @fmaximumv_v8f32(ptr %a) {
1675 ; CHECK-LABEL: fmaximumv_v8f32:
1677 ; CHECK-NEXT: ldp q1, q0, [x0]
1678 ; CHECK-NEXT: ptrue p0.s, vl4
1679 ; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
1680 ; CHECK-NEXT: fmaxv s0, p0, z0.s
1681 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
1684 ; NONEON-NOSVE-LABEL: fmaximumv_v8f32:
1685 ; NONEON-NOSVE: // %bb.0:
1686 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
1687 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]!
1688 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
1689 ; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #16]
1690 ; NONEON-NOSVE-NEXT: ldp s3, s2, [sp]
1691 ; NONEON-NOSVE-NEXT: fmax s0, s2, s0
1692 ; NONEON-NOSVE-NEXT: fmax s1, s3, s1
1693 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8]
1694 ; NONEON-NOSVE-NEXT: fmax s0, s1, s0
1695 ; NONEON-NOSVE-NEXT: ldp s3, s1, [sp, #24]
1696 ; NONEON-NOSVE-NEXT: fmax s2, s2, s3
1697 ; NONEON-NOSVE-NEXT: fmax s1, s4, s1
1698 ; NONEON-NOSVE-NEXT: fmax s0, s0, s2
1699 ; NONEON-NOSVE-NEXT: fmax s0, s0, s1
1700 ; NONEON-NOSVE-NEXT: add sp, sp, #32
1701 ; NONEON-NOSVE-NEXT: ret
1702 %op = load <8 x float>, ptr %a
1703 %res = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> %op)
1707 define double @fmaximumv_v1f64(<1 x double> %a) {
1708 ; CHECK-LABEL: fmaximumv_v1f64:
1712 ; NONEON-NOSVE-LABEL: fmaximumv_v1f64:
1713 ; NONEON-NOSVE: // %bb.0:
1714 ; NONEON-NOSVE-NEXT: ret
1715 %res = call double @llvm.vector.reduce.fmaximum.v1f64(<1 x double> %a)
1719 define double @fmaximumv_v2f64(<2 x double> %a) {
1720 ; CHECK-LABEL: fmaximumv_v2f64:
1722 ; CHECK-NEXT: ptrue p0.d, vl2
1723 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1724 ; CHECK-NEXT: fmaxv d0, p0, z0.d
1725 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1728 ; NONEON-NOSVE-LABEL: fmaximumv_v2f64:
1729 ; NONEON-NOSVE: // %bb.0:
1730 ; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
1731 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
1732 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp], #16
1733 ; NONEON-NOSVE-NEXT: fmax d0, d1, d0
1734 ; NONEON-NOSVE-NEXT: ret
1735 %res = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %a)
1739 define double @fmaximumv_v4f64(ptr %a) {
1740 ; CHECK-LABEL: fmaximumv_v4f64:
1742 ; CHECK-NEXT: ldp q1, q0, [x0]
1743 ; CHECK-NEXT: ptrue p0.d, vl2
1744 ; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
1745 ; CHECK-NEXT: fmaxv d0, p0, z0.d
1746 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1749 ; NONEON-NOSVE-LABEL: fmaximumv_v4f64:
1750 ; NONEON-NOSVE: // %bb.0:
1751 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
1752 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]!
1753 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
1754 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16]
1755 ; NONEON-NOSVE-NEXT: ldp d3, d2, [sp], #32
1756 ; NONEON-NOSVE-NEXT: fmax d0, d2, d0
1757 ; NONEON-NOSVE-NEXT: fmax d1, d3, d1
1758 ; NONEON-NOSVE-NEXT: fmax d0, d1, d0
1759 ; NONEON-NOSVE-NEXT: ret
1760 %op = load <4 x double>, ptr %a
1761 %res = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %op)
1769 define half @fminimumv_v4f16(<4 x half> %a) {
1770 ; CHECK-LABEL: fminimumv_v4f16:
1772 ; CHECK-NEXT: ptrue p0.h, vl4
1773 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1774 ; CHECK-NEXT: fminv h0, p0, z0.h
1775 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
1778 ; NONEON-NOSVE-LABEL: fminimumv_v4f16:
1779 ; NONEON-NOSVE: // %bb.0:
1780 ; NONEON-NOSVE-NEXT: sub sp, sp, #16
1781 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
1782 ; NONEON-NOSVE-NEXT: str d0, [sp, #8]
1783 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #8]
1784 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
1785 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1786 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1787 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
1788 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
1789 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1790 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1791 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1792 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
1793 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
1794 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1795 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1796 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1797 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
1798 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1799 ; NONEON-NOSVE-NEXT: add sp, sp, #16
1800 ; NONEON-NOSVE-NEXT: ret
1801 %res = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> %a)
1805 define half @fminimumv_v8f16(<8 x half> %a) {
1806 ; CHECK-LABEL: fminimumv_v8f16:
1808 ; CHECK-NEXT: ptrue p0.h, vl8
1809 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1810 ; CHECK-NEXT: fminv h0, p0, z0.h
1811 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
1814 ; NONEON-NOSVE-LABEL: fminimumv_v8f16:
1815 ; NONEON-NOSVE: // %bb.0:
1816 ; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
1817 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
1818 ; NONEON-NOSVE-NEXT: ldr h0, [sp]
1819 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
1820 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1821 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1822 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
1823 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #4]
1824 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1825 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1826 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1827 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
1828 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #6]
1829 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1830 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1831 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1832 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
1833 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #8]
1834 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1835 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1836 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1837 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
1838 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #10]
1839 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1840 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1841 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1842 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
1843 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #12]
1844 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1845 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1846 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1847 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
1848 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #14]
1849 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1850 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1851 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1852 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
1853 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1854 ; NONEON-NOSVE-NEXT: add sp, sp, #16
1855 ; NONEON-NOSVE-NEXT: ret
1856 %res = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> %a)
1860 define half @fminimumv_v16f16(ptr %a) {
1861 ; CHECK-LABEL: fminimumv_v16f16:
1863 ; CHECK-NEXT: ldp q1, q0, [x0]
1864 ; CHECK-NEXT: ptrue p0.h, vl8
1865 ; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h
1866 ; CHECK-NEXT: fminv h0, p0, z0.h
1867 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
1870 ; NONEON-NOSVE-LABEL: fminimumv_v16f16:
1871 ; NONEON-NOSVE: // %bb.0:
1872 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
1873 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]!
1874 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
1875 ; NONEON-NOSVE-NEXT: ldr h0, [sp, #18]
1876 ; NONEON-NOSVE-NEXT: ldr h1, [sp, #2]
1877 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #16]
1878 ; NONEON-NOSVE-NEXT: ldr h3, [sp]
1879 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1880 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1881 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1882 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1883 ; NONEON-NOSVE-NEXT: fmin s0, s1, s0
1884 ; NONEON-NOSVE-NEXT: fmin s1, s3, s2
1885 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #20]
1886 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #4]
1887 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1888 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1889 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1890 ; NONEON-NOSVE-NEXT: fcvt h1, s1
1891 ; NONEON-NOSVE-NEXT: fmin s2, s3, s2
1892 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #6]
1893 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1894 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1895 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1896 ; NONEON-NOSVE-NEXT: fmin s0, s1, s0
1897 ; NONEON-NOSVE-NEXT: fcvt h1, s2
1898 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #22]
1899 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1900 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1901 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1902 ; NONEON-NOSVE-NEXT: fmin s2, s3, s2
1903 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #8]
1904 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1905 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1906 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
1907 ; NONEON-NOSVE-NEXT: fcvt h1, s2
1908 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #24]
1909 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1910 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1911 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1912 ; NONEON-NOSVE-NEXT: fmin s2, s3, s2
1913 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #10]
1914 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1915 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1916 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
1917 ; NONEON-NOSVE-NEXT: fcvt h1, s2
1918 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #26]
1919 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1920 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1921 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1922 ; NONEON-NOSVE-NEXT: fmin s2, s3, s2
1923 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #12]
1924 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1925 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1926 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
1927 ; NONEON-NOSVE-NEXT: fcvt h1, s2
1928 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #28]
1929 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1930 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1931 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1932 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1933 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
1934 ; NONEON-NOSVE-NEXT: fmin s1, s3, s2
1935 ; NONEON-NOSVE-NEXT: ldr h2, [sp, #30]
1936 ; NONEON-NOSVE-NEXT: ldr h3, [sp, #14]
1937 ; NONEON-NOSVE-NEXT: fcvt s2, h2
1938 ; NONEON-NOSVE-NEXT: fcvt s3, h3
1939 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1940 ; NONEON-NOSVE-NEXT: fcvt h1, s1
1941 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1942 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1943 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
1944 ; NONEON-NOSVE-NEXT: fmin s1, s3, s2
1945 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1946 ; NONEON-NOSVE-NEXT: fcvt h1, s1
1947 ; NONEON-NOSVE-NEXT: fcvt s0, h0
1948 ; NONEON-NOSVE-NEXT: fcvt s1, h1
1949 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
1950 ; NONEON-NOSVE-NEXT: fcvt h0, s0
1951 ; NONEON-NOSVE-NEXT: add sp, sp, #32
1952 ; NONEON-NOSVE-NEXT: ret
1953 %op = load <16 x half>, ptr %a
1954 %res = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> %op)
1958 define float @fminimumv_v2f32(<2 x float> %a) {
1959 ; CHECK-LABEL: fminimumv_v2f32:
1961 ; CHECK-NEXT: ptrue p0.s, vl2
1962 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
1963 ; CHECK-NEXT: fminv s0, p0, z0.s
1964 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
1967 ; NONEON-NOSVE-LABEL: fminimumv_v2f32:
1968 ; NONEON-NOSVE: // %bb.0:
1969 ; NONEON-NOSVE-NEXT: sub sp, sp, #16
1970 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
1971 ; NONEON-NOSVE-NEXT: str d0, [sp, #8]
1972 ; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #8]
1973 ; NONEON-NOSVE-NEXT: fmin s0, s1, s0
1974 ; NONEON-NOSVE-NEXT: add sp, sp, #16
1975 ; NONEON-NOSVE-NEXT: ret
1976 %res = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> %a)
1980 define float @fminimumv_v4f32(<4 x float> %a) {
1981 ; CHECK-LABEL: fminimumv_v4f32:
1983 ; CHECK-NEXT: ptrue p0.s, vl4
1984 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
1985 ; CHECK-NEXT: fminv s0, p0, z0.s
1986 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
1989 ; NONEON-NOSVE-LABEL: fminimumv_v4f32:
1990 ; NONEON-NOSVE: // %bb.0:
1991 ; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
1992 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
1993 ; NONEON-NOSVE-NEXT: ldp s1, s0, [sp]
1994 ; NONEON-NOSVE-NEXT: fmin s0, s1, s0
1995 ; NONEON-NOSVE-NEXT: ldp s2, s1, [sp, #8]
1996 ; NONEON-NOSVE-NEXT: fmin s0, s0, s2
1997 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
1998 ; NONEON-NOSVE-NEXT: add sp, sp, #16
1999 ; NONEON-NOSVE-NEXT: ret
2000 %res = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %a)
2004 define float @fminimumv_v8f32(ptr %a) {
2005 ; CHECK-LABEL: fminimumv_v8f32:
2007 ; CHECK-NEXT: ldp q1, q0, [x0]
2008 ; CHECK-NEXT: ptrue p0.s, vl4
2009 ; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
2010 ; CHECK-NEXT: fminv s0, p0, z0.s
2011 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
2014 ; NONEON-NOSVE-LABEL: fminimumv_v8f32:
2015 ; NONEON-NOSVE: // %bb.0:
2016 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
2017 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]!
2018 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
2019 ; NONEON-NOSVE-NEXT: ldp s1, s0, [sp, #16]
2020 ; NONEON-NOSVE-NEXT: ldp s3, s2, [sp]
2021 ; NONEON-NOSVE-NEXT: fmin s0, s2, s0
2022 ; NONEON-NOSVE-NEXT: fmin s1, s3, s1
2023 ; NONEON-NOSVE-NEXT: ldp s2, s4, [sp, #8]
2024 ; NONEON-NOSVE-NEXT: fmin s0, s1, s0
2025 ; NONEON-NOSVE-NEXT: ldp s3, s1, [sp, #24]
2026 ; NONEON-NOSVE-NEXT: fmin s2, s2, s3
2027 ; NONEON-NOSVE-NEXT: fmin s1, s4, s1
2028 ; NONEON-NOSVE-NEXT: fmin s0, s0, s2
2029 ; NONEON-NOSVE-NEXT: fmin s0, s0, s1
2030 ; NONEON-NOSVE-NEXT: add sp, sp, #32
2031 ; NONEON-NOSVE-NEXT: ret
2032 %op = load <8 x float>, ptr %a
2033 %res = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> %op)
2037 define double @fminimumv_v1f64(<1 x double> %a) {
2038 ; CHECK-LABEL: fminimumv_v1f64:
2042 ; NONEON-NOSVE-LABEL: fminimumv_v1f64:
2043 ; NONEON-NOSVE: // %bb.0:
2044 ; NONEON-NOSVE-NEXT: ret
2045 %res = call double @llvm.vector.reduce.fminimum.v1f64(<1 x double> %a)
2049 define double @fminimumv_v2f64(<2 x double> %a) {
2050 ; CHECK-LABEL: fminimumv_v2f64:
2052 ; CHECK-NEXT: ptrue p0.d, vl2
2053 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
2054 ; CHECK-NEXT: fminv d0, p0, z0.d
2055 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
2058 ; NONEON-NOSVE-LABEL: fminimumv_v2f64:
2059 ; NONEON-NOSVE: // %bb.0:
2060 ; NONEON-NOSVE-NEXT: str q0, [sp, #-16]!
2061 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16
2062 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp], #16
2063 ; NONEON-NOSVE-NEXT: fmin d0, d1, d0
2064 ; NONEON-NOSVE-NEXT: ret
2065 %res = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %a)
2069 define double @fminimumv_v4f64(ptr %a) {
2070 ; CHECK-LABEL: fminimumv_v4f64:
2072 ; CHECK-NEXT: ldp q1, q0, [x0]
2073 ; CHECK-NEXT: ptrue p0.d, vl2
2074 ; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
2075 ; CHECK-NEXT: fminv d0, p0, z0.d
2076 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
2079 ; NONEON-NOSVE-LABEL: fminimumv_v4f64:
2080 ; NONEON-NOSVE: // %bb.0:
2081 ; NONEON-NOSVE-NEXT: ldp q1, q0, [x0]
2082 ; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]!
2083 ; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32
2084 ; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16]
2085 ; NONEON-NOSVE-NEXT: ldp d3, d2, [sp], #32
2086 ; NONEON-NOSVE-NEXT: fmin d0, d2, d0
2087 ; NONEON-NOSVE-NEXT: fmin d1, d3, d1
2088 ; NONEON-NOSVE-NEXT: fmin d0, d1, d0
2089 ; NONEON-NOSVE-NEXT: ret
2090 %op = load <4 x double>, ptr %a
2091 %res = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %op)
2095 declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
2096 declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>)
2097 declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>)
2099 declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
2100 declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
2101 declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
2103 declare double @llvm.vector.reduce.fadd.v1f64(double, <1 x double>)
2104 declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
2105 declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
2107 declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
2108 declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>)
2109 declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>)
2111 declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
2112 declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
2113 declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
2115 declare double @llvm.vector.reduce.fmax.v1f64(<1 x double>)
2116 declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
2117 declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
2119 declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
2120 declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>)
2121 declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>)
2123 declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
2124 declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
2125 declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
2127 declare double @llvm.vector.reduce.fmin.v1f64(<1 x double>)
2128 declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
2129 declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
2131 declare half @llvm.vector.reduce.fmaximum.v4f16(<4 x half>)
2132 declare half @llvm.vector.reduce.fmaximum.v8f16(<8 x half>)
2133 declare half @llvm.vector.reduce.fmaximum.v16f16(<16 x half>)
2135 declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>)
2136 declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>)
2137 declare float @llvm.vector.reduce.fmaximum.v8f32(<8 x float>)
2139 declare double @llvm.vector.reduce.fmaximum.v1f64(<1 x double>)
2140 declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>)
2141 declare double @llvm.vector.reduce.fmaximum.v4f64(<4 x double>)
2143 declare half @llvm.vector.reduce.fminimum.v4f16(<4 x half>)
2144 declare half @llvm.vector.reduce.fminimum.v8f16(<8 x half>)
2145 declare half @llvm.vector.reduce.fminimum.v16f16(<16 x half>)
2147 declare float @llvm.vector.reduce.fminimum.v2f32(<2 x float>)
2148 declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float>)
2149 declare float @llvm.vector.reduce.fminimum.v8f32(<8 x float>)
2151 declare double @llvm.vector.reduce.fminimum.v1f64(<1 x double>)
2152 declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double>)
2153 declare double @llvm.vector.reduce.fminimum.v4f64(<4 x double>)