1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s
4 target triple = "aarch64-unknown-linux-gnu"
10 define half @fadda_v4f16(half %start, <4 x half> %a) {
11 ; CHECK-LABEL: fadda_v4f16:
13 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
14 ; CHECK-NEXT: fadd h0, h0, h1
15 ; CHECK-NEXT: mov z2.h, z1.h[1]
16 ; CHECK-NEXT: fadd h0, h0, h2
17 ; CHECK-NEXT: mov z2.h, z1.h[2]
18 ; CHECK-NEXT: mov z1.h, z1.h[3]
19 ; CHECK-NEXT: fadd h0, h0, h2
20 ; CHECK-NEXT: fadd h0, h0, h1
22 %res = call half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
26 define half @fadda_v8f16(half %start, <8 x half> %a) {
27 ; CHECK-LABEL: fadda_v8f16:
29 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
30 ; CHECK-NEXT: fadd h0, h0, h1
31 ; CHECK-NEXT: mov z2.h, z1.h[1]
32 ; CHECK-NEXT: fadd h0, h0, h2
33 ; CHECK-NEXT: mov z2.h, z1.h[2]
34 ; CHECK-NEXT: fadd h0, h0, h2
35 ; CHECK-NEXT: mov z2.h, z1.h[3]
36 ; CHECK-NEXT: fadd h0, h0, h2
37 ; CHECK-NEXT: mov z2.h, z1.h[4]
38 ; CHECK-NEXT: fadd h0, h0, h2
39 ; CHECK-NEXT: mov z2.h, z1.h[5]
40 ; CHECK-NEXT: fadd h0, h0, h2
41 ; CHECK-NEXT: mov z2.h, z1.h[6]
42 ; CHECK-NEXT: mov z1.h, z1.h[7]
43 ; CHECK-NEXT: fadd h0, h0, h2
44 ; CHECK-NEXT: fadd h0, h0, h1
46 %res = call half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)
50 define half @fadda_v16f16(half %start, ptr %a) {
51 ; CHECK-LABEL: fadda_v16f16:
53 ; CHECK-NEXT: ldr q1, [x0]
54 ; CHECK-NEXT: fadd h0, h0, h1
55 ; CHECK-NEXT: mov z2.h, z1.h[1]
56 ; CHECK-NEXT: fadd h0, h0, h2
57 ; CHECK-NEXT: mov z2.h, z1.h[2]
58 ; CHECK-NEXT: fadd h0, h0, h2
59 ; CHECK-NEXT: mov z2.h, z1.h[3]
60 ; CHECK-NEXT: fadd h0, h0, h2
61 ; CHECK-NEXT: mov z2.h, z1.h[4]
62 ; CHECK-NEXT: fadd h0, h0, h2
63 ; CHECK-NEXT: mov z2.h, z1.h[5]
64 ; CHECK-NEXT: fadd h0, h0, h2
65 ; CHECK-NEXT: mov z2.h, z1.h[6]
66 ; CHECK-NEXT: mov z1.h, z1.h[7]
67 ; CHECK-NEXT: fadd h0, h0, h2
68 ; CHECK-NEXT: fadd h0, h0, h1
69 ; CHECK-NEXT: ldr q1, [x0, #16]
70 ; CHECK-NEXT: mov z2.h, z1.h[1]
71 ; CHECK-NEXT: fadd h0, h0, h1
72 ; CHECK-NEXT: fadd h0, h0, h2
73 ; CHECK-NEXT: mov z2.h, z1.h[2]
74 ; CHECK-NEXT: fadd h0, h0, h2
75 ; CHECK-NEXT: mov z2.h, z1.h[3]
76 ; CHECK-NEXT: fadd h0, h0, h2
77 ; CHECK-NEXT: mov z2.h, z1.h[4]
78 ; CHECK-NEXT: fadd h0, h0, h2
79 ; CHECK-NEXT: mov z2.h, z1.h[5]
80 ; CHECK-NEXT: fadd h0, h0, h2
81 ; CHECK-NEXT: mov z2.h, z1.h[6]
82 ; CHECK-NEXT: mov z1.h, z1.h[7]
83 ; CHECK-NEXT: fadd h0, h0, h2
84 ; CHECK-NEXT: fadd h0, h0, h1
86 %op = load <16 x half>, ptr %a
87 %res = call half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
91 define float @fadda_v2f32(float %start, <2 x float> %a) {
92 ; CHECK-LABEL: fadda_v2f32:
94 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
95 ; CHECK-NEXT: fadd s0, s0, s1
96 ; CHECK-NEXT: mov z1.s, z1.s[1]
97 ; CHECK-NEXT: fadd s0, s0, s1
99 %res = call float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)
103 define float @fadda_v4f32(float %start, <4 x float> %a) {
104 ; CHECK-LABEL: fadda_v4f32:
106 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
107 ; CHECK-NEXT: fadd s0, s0, s1
108 ; CHECK-NEXT: mov z2.s, z1.s[1]
109 ; CHECK-NEXT: fadd s0, s0, s2
110 ; CHECK-NEXT: mov z2.s, z1.s[2]
111 ; CHECK-NEXT: mov z1.s, z1.s[3]
112 ; CHECK-NEXT: fadd s0, s0, s2
113 ; CHECK-NEXT: fadd s0, s0, s1
115 %res = call float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)
119 define float @fadda_v8f32(float %start, ptr %a) {
120 ; CHECK-LABEL: fadda_v8f32:
122 ; CHECK-NEXT: ldr q1, [x0]
123 ; CHECK-NEXT: fadd s0, s0, s1
124 ; CHECK-NEXT: mov z2.s, z1.s[1]
125 ; CHECK-NEXT: fadd s0, s0, s2
126 ; CHECK-NEXT: mov z2.s, z1.s[2]
127 ; CHECK-NEXT: mov z1.s, z1.s[3]
128 ; CHECK-NEXT: fadd s0, s0, s2
129 ; CHECK-NEXT: fadd s0, s0, s1
130 ; CHECK-NEXT: ldr q1, [x0, #16]
131 ; CHECK-NEXT: mov z2.s, z1.s[1]
132 ; CHECK-NEXT: fadd s0, s0, s1
133 ; CHECK-NEXT: fadd s0, s0, s2
134 ; CHECK-NEXT: mov z2.s, z1.s[2]
135 ; CHECK-NEXT: mov z1.s, z1.s[3]
136 ; CHECK-NEXT: fadd s0, s0, s2
137 ; CHECK-NEXT: fadd s0, s0, s1
139 %op = load <8 x float>, ptr %a
140 %res = call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
144 define double @fadda_v1f64(double %start, <1 x double> %a) {
145 ; CHECK-LABEL: fadda_v1f64:
147 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
148 ; CHECK-NEXT: fadd d0, d0, d1
150 %res = call double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a)
154 define double @fadda_v2f64(double %start, <2 x double> %a) {
155 ; CHECK-LABEL: fadda_v2f64:
157 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
158 ; CHECK-NEXT: fadd d0, d0, d1
159 ; CHECK-NEXT: mov z1.d, z1.d[1]
160 ; CHECK-NEXT: fadd d0, d0, d1
162 %res = call double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)
166 define double @fadda_v4f64(double %start, ptr %a) {
167 ; CHECK-LABEL: fadda_v4f64:
169 ; CHECK-NEXT: ldr q1, [x0]
170 ; CHECK-NEXT: fadd d0, d0, d1
171 ; CHECK-NEXT: mov z1.d, z1.d[1]
172 ; CHECK-NEXT: fadd d0, d0, d1
173 ; CHECK-NEXT: ldr q1, [x0, #16]
174 ; CHECK-NEXT: fadd d0, d0, d1
175 ; CHECK-NEXT: mov z1.d, z1.d[1]
176 ; CHECK-NEXT: fadd d0, d0, d1
178 %op = load <4 x double>, ptr %a
179 %res = call double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
187 define half @faddv_v4f16(half %start, <4 x half> %a) {
188 ; CHECK-LABEL: faddv_v4f16:
190 ; CHECK-NEXT: ptrue p0.h, vl4
191 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
192 ; CHECK-NEXT: faddv h1, p0, z1.h
193 ; CHECK-NEXT: fadd h0, h0, h1
195 %res = call fast half @llvm.vector.reduce.fadd.v4f16(half %start, <4 x half> %a)
199 define half @faddv_v8f16(half %start, <8 x half> %a) {
200 ; CHECK-LABEL: faddv_v8f16:
202 ; CHECK-NEXT: ptrue p0.h, vl8
203 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
204 ; CHECK-NEXT: faddv h1, p0, z1.h
205 ; CHECK-NEXT: fadd h0, h0, h1
207 %res = call fast half @llvm.vector.reduce.fadd.v8f16(half %start, <8 x half> %a)
211 define half @faddv_v16f16(half %start, ptr %a) {
212 ; CHECK-LABEL: faddv_v16f16:
214 ; CHECK-NEXT: ptrue p0.h, vl8
215 ; CHECK-NEXT: ldp q2, q1, [x0]
216 ; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z2.h
217 ; CHECK-NEXT: faddv h1, p0, z1.h
218 ; CHECK-NEXT: fadd h0, h0, h1
220 %op = load <16 x half>, ptr %a
221 %res = call fast half @llvm.vector.reduce.fadd.v16f16(half %start, <16 x half> %op)
225 define float @faddv_v2f32(float %start, <2 x float> %a) {
226 ; CHECK-LABEL: faddv_v2f32:
228 ; CHECK-NEXT: ptrue p0.s, vl2
229 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
230 ; CHECK-NEXT: faddv s1, p0, z1.s
231 ; CHECK-NEXT: fadd s0, s0, s1
233 %res = call fast float @llvm.vector.reduce.fadd.v2f32(float %start, <2 x float> %a)
237 define float @faddv_v4f32(float %start, <4 x float> %a) {
238 ; CHECK-LABEL: faddv_v4f32:
240 ; CHECK-NEXT: ptrue p0.s, vl4
241 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
242 ; CHECK-NEXT: faddv s1, p0, z1.s
243 ; CHECK-NEXT: fadd s0, s0, s1
245 %res = call fast float @llvm.vector.reduce.fadd.v4f32(float %start, <4 x float> %a)
249 define float @faddv_v8f32(float %start, ptr %a) {
250 ; CHECK-LABEL: faddv_v8f32:
252 ; CHECK-NEXT: ptrue p0.s, vl4
253 ; CHECK-NEXT: ldp q2, q1, [x0]
254 ; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z2.s
255 ; CHECK-NEXT: faddv s1, p0, z1.s
256 ; CHECK-NEXT: fadd s0, s0, s1
258 %op = load <8 x float>, ptr %a
259 %res = call fast float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %op)
263 define double @faddv_v1f64(double %start, <1 x double> %a) {
264 ; CHECK-LABEL: faddv_v1f64:
266 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
267 ; CHECK-NEXT: fadd d0, d0, d1
269 %res = call fast double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a)
273 define double @faddv_v2f64(double %start, <2 x double> %a) {
274 ; CHECK-LABEL: faddv_v2f64:
276 ; CHECK-NEXT: ptrue p0.d, vl2
277 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1
278 ; CHECK-NEXT: faddv d1, p0, z1.d
279 ; CHECK-NEXT: fadd d0, d0, d1
281 %res = call fast double @llvm.vector.reduce.fadd.v2f64(double %start, <2 x double> %a)
285 define double @faddv_v4f64(double %start, ptr %a) {
286 ; CHECK-LABEL: faddv_v4f64:
288 ; CHECK-NEXT: ptrue p0.d, vl2
289 ; CHECK-NEXT: ldp q2, q1, [x0]
290 ; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z2.d
291 ; CHECK-NEXT: faddv d1, p0, z1.d
292 ; CHECK-NEXT: fadd d0, d0, d1
294 %op = load <4 x double>, ptr %a
295 %res = call fast double @llvm.vector.reduce.fadd.v4f64(double %start, <4 x double> %op)
303 define half @fmaxv_v4f16(<4 x half> %a) {
304 ; CHECK-LABEL: fmaxv_v4f16:
306 ; CHECK-NEXT: ptrue p0.h, vl4
307 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
308 ; CHECK-NEXT: fmaxnmv h0, p0, z0.h
309 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
311 %res = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> %a)
315 define half @fmaxv_v8f16(<8 x half> %a) {
316 ; CHECK-LABEL: fmaxv_v8f16:
318 ; CHECK-NEXT: ptrue p0.h, vl8
319 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
320 ; CHECK-NEXT: fmaxnmv h0, p0, z0.h
321 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
323 %res = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> %a)
327 define half @fmaxv_v16f16(ptr %a) {
328 ; CHECK-LABEL: fmaxv_v16f16:
330 ; CHECK-NEXT: ptrue p0.h, vl8
331 ; CHECK-NEXT: ldp q1, q0, [x0]
332 ; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h
333 ; CHECK-NEXT: fmaxnmv h0, p0, z0.h
334 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
336 %op = load <16 x half>, ptr %a
337 %res = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> %op)
341 define float @fmaxv_v2f32(<2 x float> %a) {
342 ; CHECK-LABEL: fmaxv_v2f32:
344 ; CHECK-NEXT: ptrue p0.s, vl2
345 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
346 ; CHECK-NEXT: fmaxnmv s0, p0, z0.s
347 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
349 %res = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a)
353 define float @fmaxv_v4f32(<4 x float> %a) {
354 ; CHECK-LABEL: fmaxv_v4f32:
356 ; CHECK-NEXT: ptrue p0.s, vl4
357 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
358 ; CHECK-NEXT: fmaxnmv s0, p0, z0.s
359 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
361 %res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a)
365 define float @fmaxv_v8f32(ptr %a) {
366 ; CHECK-LABEL: fmaxv_v8f32:
368 ; CHECK-NEXT: ptrue p0.s, vl4
369 ; CHECK-NEXT: ldp q1, q0, [x0]
370 ; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s
371 ; CHECK-NEXT: fmaxnmv s0, p0, z0.s
372 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
374 %op = load <8 x float>, ptr %a
375 %res = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %op)
379 define double @fmaxv_v1f64(<1 x double> %a) {
380 ; CHECK-LABEL: fmaxv_v1f64:
382 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
383 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
385 %res = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
389 define double @fmaxv_v2f64(<2 x double> %a) {
390 ; CHECK-LABEL: fmaxv_v2f64:
392 ; CHECK-NEXT: ptrue p0.d, vl2
393 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
394 ; CHECK-NEXT: fmaxnmv d0, p0, z0.d
395 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
397 %res = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a)
401 define double @fmaxv_v4f64(ptr %a) {
402 ; CHECK-LABEL: fmaxv_v4f64:
404 ; CHECK-NEXT: ptrue p0.d, vl2
405 ; CHECK-NEXT: ldp q1, q0, [x0]
406 ; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d
407 ; CHECK-NEXT: fmaxnmv d0, p0, z0.d
408 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
410 %op = load <4 x double>, ptr %a
411 %res = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %op)
419 define half @fminv_v4f16(<4 x half> %a) {
420 ; CHECK-LABEL: fminv_v4f16:
422 ; CHECK-NEXT: ptrue p0.h, vl4
423 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
424 ; CHECK-NEXT: fminnmv h0, p0, z0.h
425 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
427 %res = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a)
431 define half @fminv_v8f16(<8 x half> %a) {
432 ; CHECK-LABEL: fminv_v8f16:
434 ; CHECK-NEXT: ptrue p0.h, vl8
435 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
436 ; CHECK-NEXT: fminnmv h0, p0, z0.h
437 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
439 %res = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> %a)
443 define half @fminv_v16f16(ptr %a) {
444 ; CHECK-LABEL: fminv_v16f16:
446 ; CHECK-NEXT: ptrue p0.h, vl8
447 ; CHECK-NEXT: ldp q1, q0, [x0]
448 ; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h
449 ; CHECK-NEXT: fminnmv h0, p0, z0.h
450 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
452 %op = load <16 x half>, ptr %a
453 %res = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> %op)
457 define float @fminv_v2f32(<2 x float> %a) {
458 ; CHECK-LABEL: fminv_v2f32:
460 ; CHECK-NEXT: ptrue p0.s, vl2
461 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
462 ; CHECK-NEXT: fminnmv s0, p0, z0.s
463 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
465 %res = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a)
469 define float @fminv_v4f32(<4 x float> %a) {
470 ; CHECK-LABEL: fminv_v4f32:
472 ; CHECK-NEXT: ptrue p0.s, vl4
473 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
474 ; CHECK-NEXT: fminnmv s0, p0, z0.s
475 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
477 %res = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
481 define float @fminv_v8f32(ptr %a) {
482 ; CHECK-LABEL: fminv_v8f32:
484 ; CHECK-NEXT: ptrue p0.s, vl4
485 ; CHECK-NEXT: ldp q1, q0, [x0]
486 ; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s
487 ; CHECK-NEXT: fminnmv s0, p0, z0.s
488 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
490 %op = load <8 x float>, ptr %a
491 %res = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %op)
495 define double @fminv_v1f64(<1 x double> %a) {
496 ; CHECK-LABEL: fminv_v1f64:
498 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
499 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
501 %res = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a)
505 define double @fminv_v2f64(<2 x double> %a) {
506 ; CHECK-LABEL: fminv_v2f64:
508 ; CHECK-NEXT: ptrue p0.d, vl2
509 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
510 ; CHECK-NEXT: fminnmv d0, p0, z0.d
511 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
513 %res = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
517 define double @fminv_v4f64(ptr %a) {
518 ; CHECK-LABEL: fminv_v4f64:
520 ; CHECK-NEXT: ptrue p0.d, vl2
521 ; CHECK-NEXT: ldp q1, q0, [x0]
522 ; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d
523 ; CHECK-NEXT: fminnmv d0, p0, z0.d
524 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
526 %op = load <4 x double>, ptr %a
527 %res = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %op)
535 define half @fmaximumv_v4f16(<4 x half> %a) {
536 ; CHECK-LABEL: fmaximumv_v4f16:
538 ; CHECK-NEXT: ptrue p0.h, vl4
539 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
540 ; CHECK-NEXT: fmaxv h0, p0, z0.h
541 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
543 %res = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> %a)
547 define half @fmaximumv_v8f16(<8 x half> %a) {
548 ; CHECK-LABEL: fmaximumv_v8f16:
550 ; CHECK-NEXT: ptrue p0.h, vl8
551 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
552 ; CHECK-NEXT: fmaxv h0, p0, z0.h
553 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
555 %res = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> %a)
559 define half @fmaximumv_v16f16(ptr %a) {
560 ; CHECK-LABEL: fmaximumv_v16f16:
562 ; CHECK-NEXT: ptrue p0.h, vl8
563 ; CHECK-NEXT: ldp q1, q0, [x0]
564 ; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h
565 ; CHECK-NEXT: fmaxv h0, p0, z0.h
566 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
568 %op = load <16 x half>, ptr %a
569 %res = call half @llvm.vector.reduce.fmaximum.v16f16(<16 x half> %op)
573 define float @fmaximumv_v2f32(<2 x float> %a) {
574 ; CHECK-LABEL: fmaximumv_v2f32:
576 ; CHECK-NEXT: ptrue p0.s, vl2
577 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
578 ; CHECK-NEXT: fmaxv s0, p0, z0.s
579 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
581 %res = call float @llvm.vector.reduce.fmaximum.v2f32(<2 x float> %a)
585 define float @fmaximumv_v4f32(<4 x float> %a) {
586 ; CHECK-LABEL: fmaximumv_v4f32:
588 ; CHECK-NEXT: ptrue p0.s, vl4
589 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
590 ; CHECK-NEXT: fmaxv s0, p0, z0.s
591 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
593 %res = call float @llvm.vector.reduce.fmaximum.v4f32(<4 x float> %a)
597 define float @fmaximumv_v8f32(ptr %a) {
598 ; CHECK-LABEL: fmaximumv_v8f32:
600 ; CHECK-NEXT: ptrue p0.s, vl4
601 ; CHECK-NEXT: ldp q1, q0, [x0]
602 ; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s
603 ; CHECK-NEXT: fmaxv s0, p0, z0.s
604 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
606 %op = load <8 x float>, ptr %a
607 %res = call float @llvm.vector.reduce.fmaximum.v8f32(<8 x float> %op)
611 define double @fmaximumv_v1f64(<1 x double> %a) {
612 ; CHECK-LABEL: fmaximumv_v1f64:
614 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
615 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
617 %res = call double @llvm.vector.reduce.fmaximum.v1f64(<1 x double> %a)
621 define double @fmaximumv_v2f64(<2 x double> %a) {
622 ; CHECK-LABEL: fmaximumv_v2f64:
624 ; CHECK-NEXT: ptrue p0.d, vl2
625 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
626 ; CHECK-NEXT: fmaxv d0, p0, z0.d
627 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
629 %res = call double @llvm.vector.reduce.fmaximum.v2f64(<2 x double> %a)
633 define double @fmaximumv_v4f64(ptr %a) {
634 ; CHECK-LABEL: fmaximumv_v4f64:
636 ; CHECK-NEXT: ptrue p0.d, vl2
637 ; CHECK-NEXT: ldp q1, q0, [x0]
638 ; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d
639 ; CHECK-NEXT: fmaxv d0, p0, z0.d
640 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
642 %op = load <4 x double>, ptr %a
643 %res = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %op)
651 define half @fminimumv_v4f16(<4 x half> %a) {
652 ; CHECK-LABEL: fminimumv_v4f16:
654 ; CHECK-NEXT: ptrue p0.h, vl4
655 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
656 ; CHECK-NEXT: fminv h0, p0, z0.h
657 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
659 %res = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> %a)
663 define half @fminimumv_v8f16(<8 x half> %a) {
664 ; CHECK-LABEL: fminimumv_v8f16:
666 ; CHECK-NEXT: ptrue p0.h, vl8
667 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
668 ; CHECK-NEXT: fminv h0, p0, z0.h
669 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
671 %res = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> %a)
675 define half @fminimumv_v16f16(ptr %a) {
676 ; CHECK-LABEL: fminimumv_v16f16:
678 ; CHECK-NEXT: ptrue p0.h, vl8
679 ; CHECK-NEXT: ldp q1, q0, [x0]
680 ; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h
681 ; CHECK-NEXT: fminv h0, p0, z0.h
682 ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
684 %op = load <16 x half>, ptr %a
685 %res = call half @llvm.vector.reduce.fminimum.v16f16(<16 x half> %op)
689 define float @fminimumv_v2f32(<2 x float> %a) {
690 ; CHECK-LABEL: fminimumv_v2f32:
692 ; CHECK-NEXT: ptrue p0.s, vl2
693 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
694 ; CHECK-NEXT: fminv s0, p0, z0.s
695 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
697 %res = call float @llvm.vector.reduce.fminimum.v2f32(<2 x float> %a)
701 define float @fminimumv_v4f32(<4 x float> %a) {
702 ; CHECK-LABEL: fminimumv_v4f32:
704 ; CHECK-NEXT: ptrue p0.s, vl4
705 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
706 ; CHECK-NEXT: fminv s0, p0, z0.s
707 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
709 %res = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %a)
713 define float @fminimumv_v8f32(ptr %a) {
714 ; CHECK-LABEL: fminimumv_v8f32:
716 ; CHECK-NEXT: ptrue p0.s, vl4
717 ; CHECK-NEXT: ldp q1, q0, [x0]
718 ; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s
719 ; CHECK-NEXT: fminv s0, p0, z0.s
720 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
722 %op = load <8 x float>, ptr %a
723 %res = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> %op)
727 define double @fminimumv_v1f64(<1 x double> %a) {
728 ; CHECK-LABEL: fminimumv_v1f64:
730 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
731 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
733 %res = call double @llvm.vector.reduce.fminimum.v1f64(<1 x double> %a)
737 define double @fminimumv_v2f64(<2 x double> %a) {
738 ; CHECK-LABEL: fminimumv_v2f64:
740 ; CHECK-NEXT: ptrue p0.d, vl2
741 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
742 ; CHECK-NEXT: fminv d0, p0, z0.d
743 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
745 %res = call double @llvm.vector.reduce.fminimum.v2f64(<2 x double> %a)
749 define double @fminimumv_v4f64(ptr %a) {
750 ; CHECK-LABEL: fminimumv_v4f64:
752 ; CHECK-NEXT: ptrue p0.d, vl2
753 ; CHECK-NEXT: ldp q1, q0, [x0]
754 ; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d
755 ; CHECK-NEXT: fminv d0, p0, z0.d
756 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
758 %op = load <4 x double>, ptr %a
759 %res = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %op)
763 declare half @llvm.vector.reduce.fadd.v4f16(half, <4 x half>)
764 declare half @llvm.vector.reduce.fadd.v8f16(half, <8 x half>)
765 declare half @llvm.vector.reduce.fadd.v16f16(half, <16 x half>)
767 declare float @llvm.vector.reduce.fadd.v2f32(float, <2 x float>)
768 declare float @llvm.vector.reduce.fadd.v4f32(float, <4 x float>)
769 declare float @llvm.vector.reduce.fadd.v8f32(float, <8 x float>)
771 declare double @llvm.vector.reduce.fadd.v1f64(double, <1 x double>)
772 declare double @llvm.vector.reduce.fadd.v2f64(double, <2 x double>)
773 declare double @llvm.vector.reduce.fadd.v4f64(double, <4 x double>)
775 declare half @llvm.vector.reduce.fmax.v4f16(<4 x half>)
776 declare half @llvm.vector.reduce.fmax.v8f16(<8 x half>)
777 declare half @llvm.vector.reduce.fmax.v16f16(<16 x half>)
779 declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>)
780 declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>)
781 declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>)
783 declare double @llvm.vector.reduce.fmax.v1f64(<1 x double>)
784 declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>)
785 declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>)
787 declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
788 declare half @llvm.vector.reduce.fmin.v8f16(<8 x half>)
789 declare half @llvm.vector.reduce.fmin.v16f16(<16 x half>)
791 declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>)
792 declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
793 declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>)
795 declare double @llvm.vector.reduce.fmin.v1f64(<1 x double>)
796 declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
797 declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>)
799 declare half @llvm.vector.reduce.fmaximum.v4f16(<4 x half>)
800 declare half @llvm.vector.reduce.fmaximum.v8f16(<8 x half>)
801 declare half @llvm.vector.reduce.fmaximum.v16f16(<16 x half>)
803 declare float @llvm.vector.reduce.fmaximum.v2f32(<2 x float>)
804 declare float @llvm.vector.reduce.fmaximum.v4f32(<4 x float>)
805 declare float @llvm.vector.reduce.fmaximum.v8f32(<8 x float>)
807 declare double @llvm.vector.reduce.fmaximum.v1f64(<1 x double>)
808 declare double @llvm.vector.reduce.fmaximum.v2f64(<2 x double>)
809 declare double @llvm.vector.reduce.fmaximum.v4f64(<4 x double>)
811 declare half @llvm.vector.reduce.fminimum.v4f16(<4 x half>)
812 declare half @llvm.vector.reduce.fminimum.v8f16(<8 x half>)
813 declare half @llvm.vector.reduce.fminimum.v16f16(<16 x half>)
815 declare float @llvm.vector.reduce.fminimum.v2f32(<2 x float>)
816 declare float @llvm.vector.reduce.fminimum.v4f32(<4 x float>)
817 declare float @llvm.vector.reduce.fminimum.v8f32(<8 x float>)
819 declare double @llvm.vector.reduce.fminimum.v1f64(<1 x double>)
820 declare double @llvm.vector.reduce.fminimum.v2f64(<2 x double>)
821 declare double @llvm.vector.reduce.fminimum.v4f64(<4 x double>)