1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
3 ; RUN: -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE
4 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
5 ; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE
6 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
7 ; RUN: -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64le < %s | \
8 ; RUN: FileCheck %s --check-prefix=PWR10LE
9 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
10 ; RUN: -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64 < %s | \
11 ; RUN: FileCheck %s --check-prefix=PWR10BE
16 define dso_local float @v2f32(<2 x float> %a) local_unnamed_addr #0 {
17 ; PWR9LE-LABEL: v2f32:
18 ; PWR9LE: # %bb.0: # %entry
19 ; PWR9LE-NEXT: xxswapd vs0, v2
20 ; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 3
21 ; PWR9LE-NEXT: xscvspdpn f0, vs0
22 ; PWR9LE-NEXT: xscvspdpn f1, vs1
23 ; PWR9LE-NEXT: xsmindp f1, f1, f0
26 ; PWR9BE-LABEL: v2f32:
27 ; PWR9BE: # %bb.0: # %entry
28 ; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1
29 ; PWR9BE-NEXT: xscvspdpn f0, v2
30 ; PWR9BE-NEXT: xscvspdpn f1, vs1
31 ; PWR9BE-NEXT: xsmindp f1, f0, f1
34 ; PWR10LE-LABEL: v2f32:
35 ; PWR10LE: # %bb.0: # %entry
36 ; PWR10LE-NEXT: xxswapd vs0, v2
37 ; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 3
38 ; PWR10LE-NEXT: xscvspdpn f0, vs0
39 ; PWR10LE-NEXT: xscvspdpn f1, vs1
40 ; PWR10LE-NEXT: xsmindp f1, f1, f0
43 ; PWR10BE-LABEL: v2f32:
44 ; PWR10BE: # %bb.0: # %entry
45 ; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1
46 ; PWR10BE-NEXT: xscvspdpn f0, v2
47 ; PWR10BE-NEXT: xscvspdpn f1, vs1
48 ; PWR10BE-NEXT: xsmindp f1, f0, f1
51 %0 = call float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a)
55 define dso_local float @v2f32_fast(<2 x float> %a) local_unnamed_addr #0 {
56 ; PWR9LE-LABEL: v2f32_fast:
57 ; PWR9LE: # %bb.0: # %entry
58 ; PWR9LE-NEXT: xxspltw vs0, v2, 2
59 ; PWR9LE-NEXT: xvminsp vs0, v2, vs0
60 ; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
61 ; PWR9LE-NEXT: xscvspdpn f1, vs0
64 ; PWR9BE-LABEL: v2f32_fast:
65 ; PWR9BE: # %bb.0: # %entry
66 ; PWR9BE-NEXT: xxspltw vs0, v2, 1
67 ; PWR9BE-NEXT: xvminsp vs0, v2, vs0
68 ; PWR9BE-NEXT: xscvspdpn f1, vs0
71 ; PWR10LE-LABEL: v2f32_fast:
72 ; PWR10LE: # %bb.0: # %entry
73 ; PWR10LE-NEXT: xxspltw vs0, v2, 2
74 ; PWR10LE-NEXT: xvminsp vs0, v2, vs0
75 ; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
76 ; PWR10LE-NEXT: xscvspdpn f1, vs0
79 ; PWR10BE-LABEL: v2f32_fast:
80 ; PWR10BE: # %bb.0: # %entry
81 ; PWR10BE-NEXT: xxspltw vs0, v2, 1
82 ; PWR10BE-NEXT: xvminsp vs0, v2, vs0
83 ; PWR10BE-NEXT: xscvspdpn f1, vs0
86 %0 = call fast float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a)
90 define dso_local float @v4f32(<4 x float> %a) local_unnamed_addr #0 {
91 ; PWR9LE-LABEL: v4f32:
92 ; PWR9LE: # %bb.0: # %entry
93 ; PWR9LE-NEXT: xxsldwi vs2, v2, v2, 3
94 ; PWR9LE-NEXT: xxswapd vs3, v2
95 ; PWR9LE-NEXT: xscvspdpn f0, v2
96 ; PWR9LE-NEXT: xscvspdpn f2, vs2
97 ; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1
98 ; PWR9LE-NEXT: xscvspdpn f3, vs3
99 ; PWR9LE-NEXT: xscvspdpn f1, vs1
100 ; PWR9LE-NEXT: xsmindp f2, f2, f3
101 ; PWR9LE-NEXT: xsmindp f1, f2, f1
102 ; PWR9LE-NEXT: xsmindp f1, f1, f0
105 ; PWR9BE-LABEL: v4f32:
106 ; PWR9BE: # %bb.0: # %entry
107 ; PWR9BE-NEXT: xxsldwi vs2, v2, v2, 1
108 ; PWR9BE-NEXT: xxswapd vs1, v2
109 ; PWR9BE-NEXT: xscvspdpn f3, v2
110 ; PWR9BE-NEXT: xscvspdpn f2, vs2
111 ; PWR9BE-NEXT: xxsldwi vs0, v2, v2, 3
112 ; PWR9BE-NEXT: xscvspdpn f1, vs1
113 ; PWR9BE-NEXT: xscvspdpn f0, vs0
114 ; PWR9BE-NEXT: xsmindp f2, f3, f2
115 ; PWR9BE-NEXT: xsmindp f1, f2, f1
116 ; PWR9BE-NEXT: xsmindp f1, f1, f0
119 ; PWR10LE-LABEL: v4f32:
120 ; PWR10LE: # %bb.0: # %entry
121 ; PWR10LE-NEXT: xxsldwi vs2, v2, v2, 3
122 ; PWR10LE-NEXT: xxswapd vs3, v2
123 ; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1
124 ; PWR10LE-NEXT: xscvspdpn f0, v2
125 ; PWR10LE-NEXT: xscvspdpn f2, vs2
126 ; PWR10LE-NEXT: xscvspdpn f3, vs3
127 ; PWR10LE-NEXT: xscvspdpn f1, vs1
128 ; PWR10LE-NEXT: xsmindp f2, f2, f3
129 ; PWR10LE-NEXT: xsmindp f1, f2, f1
130 ; PWR10LE-NEXT: xsmindp f1, f1, f0
133 ; PWR10BE-LABEL: v4f32:
134 ; PWR10BE: # %bb.0: # %entry
135 ; PWR10BE-NEXT: xxsldwi vs2, v2, v2, 1
136 ; PWR10BE-NEXT: xxswapd vs1, v2
137 ; PWR10BE-NEXT: xscvspdpn f3, v2
138 ; PWR10BE-NEXT: xxsldwi vs0, v2, v2, 3
139 ; PWR10BE-NEXT: xscvspdpn f2, vs2
140 ; PWR10BE-NEXT: xscvspdpn f1, vs1
141 ; PWR10BE-NEXT: xscvspdpn f0, vs0
142 ; PWR10BE-NEXT: xsmindp f2, f3, f2
143 ; PWR10BE-NEXT: xsmindp f1, f2, f1
144 ; PWR10BE-NEXT: xsmindp f1, f1, f0
147 %0 = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
151 define dso_local float @v4f32_fast(<4 x float> %a) local_unnamed_addr #0 {
152 ; PWR9LE-LABEL: v4f32_fast:
153 ; PWR9LE: # %bb.0: # %entry
154 ; PWR9LE-NEXT: xxswapd v3, v2
155 ; PWR9LE-NEXT: xvminsp vs0, v2, v3
156 ; PWR9LE-NEXT: xxspltw vs1, vs0, 2
157 ; PWR9LE-NEXT: xvminsp vs0, vs0, vs1
158 ; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
159 ; PWR9LE-NEXT: xscvspdpn f1, vs0
162 ; PWR9BE-LABEL: v4f32_fast:
163 ; PWR9BE: # %bb.0: # %entry
164 ; PWR9BE-NEXT: xxswapd v3, v2
165 ; PWR9BE-NEXT: xvminsp vs0, v2, v3
166 ; PWR9BE-NEXT: xxspltw vs1, vs0, 1
167 ; PWR9BE-NEXT: xvminsp vs0, vs0, vs1
168 ; PWR9BE-NEXT: xscvspdpn f1, vs0
171 ; PWR10LE-LABEL: v4f32_fast:
172 ; PWR10LE: # %bb.0: # %entry
173 ; PWR10LE-NEXT: xxswapd v3, v2
174 ; PWR10LE-NEXT: xvminsp vs0, v2, v3
175 ; PWR10LE-NEXT: xxspltw vs1, vs0, 2
176 ; PWR10LE-NEXT: xvminsp vs0, vs0, vs1
177 ; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
178 ; PWR10LE-NEXT: xscvspdpn f1, vs0
181 ; PWR10BE-LABEL: v4f32_fast:
182 ; PWR10BE: # %bb.0: # %entry
183 ; PWR10BE-NEXT: xxswapd v3, v2
184 ; PWR10BE-NEXT: xvminsp vs0, v2, v3
185 ; PWR10BE-NEXT: xxspltw vs1, vs0, 1
186 ; PWR10BE-NEXT: xvminsp vs0, vs0, vs1
187 ; PWR10BE-NEXT: xscvspdpn f1, vs0
190 %0 = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
194 define dso_local float @v8f32(<8 x float> %a) local_unnamed_addr #0 {
195 ; PWR9LE-LABEL: v8f32:
196 ; PWR9LE: # %bb.0: # %entry
197 ; PWR9LE-NEXT: xvminsp vs0, v2, v3
198 ; PWR9LE-NEXT: xxswapd vs1, vs0
199 ; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 3
200 ; PWR9LE-NEXT: xscvspdpn f1, vs1
201 ; PWR9LE-NEXT: xscvspdpn f2, vs2
202 ; PWR9LE-NEXT: xsmindp f1, f2, f1
203 ; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 1
204 ; PWR9LE-NEXT: xscvspdpn f0, vs0
205 ; PWR9LE-NEXT: xscvspdpn f2, vs2
206 ; PWR9LE-NEXT: xsmindp f1, f1, f2
207 ; PWR9LE-NEXT: xsmindp f1, f1, f0
210 ; PWR9BE-LABEL: v8f32:
211 ; PWR9BE: # %bb.0: # %entry
212 ; PWR9BE-NEXT: xvminsp vs0, v2, v3
213 ; PWR9BE-NEXT: xxsldwi vs2, vs0, vs0, 1
214 ; PWR9BE-NEXT: xscvspdpn f1, vs0
215 ; PWR9BE-NEXT: xscvspdpn f2, vs2
216 ; PWR9BE-NEXT: xsmindp f1, f1, f2
217 ; PWR9BE-NEXT: xxswapd vs2, vs0
218 ; PWR9BE-NEXT: xxsldwi vs0, vs0, vs0, 3
219 ; PWR9BE-NEXT: xscvspdpn f2, vs2
220 ; PWR9BE-NEXT: xscvspdpn f0, vs0
221 ; PWR9BE-NEXT: xsmindp f1, f1, f2
222 ; PWR9BE-NEXT: xsmindp f1, f1, f0
225 ; PWR10LE-LABEL: v8f32:
226 ; PWR10LE: # %bb.0: # %entry
227 ; PWR10LE-NEXT: xvminsp vs0, v2, v3
228 ; PWR10LE-NEXT: xxswapd vs1, vs0
229 ; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 3
230 ; PWR10LE-NEXT: xscvspdpn f1, vs1
231 ; PWR10LE-NEXT: xscvspdpn f2, vs2
232 ; PWR10LE-NEXT: xsmindp f1, f2, f1
233 ; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 1
234 ; PWR10LE-NEXT: xscvspdpn f0, vs0
235 ; PWR10LE-NEXT: xscvspdpn f2, vs2
236 ; PWR10LE-NEXT: xsmindp f1, f1, f2
237 ; PWR10LE-NEXT: xsmindp f1, f1, f0
240 ; PWR10BE-LABEL: v8f32:
241 ; PWR10BE: # %bb.0: # %entry
242 ; PWR10BE-NEXT: xvminsp vs0, v2, v3
243 ; PWR10BE-NEXT: xxsldwi vs2, vs0, vs0, 1
244 ; PWR10BE-NEXT: xscvspdpn f1, vs0
245 ; PWR10BE-NEXT: xscvspdpn f2, vs2
246 ; PWR10BE-NEXT: xsmindp f1, f1, f2
247 ; PWR10BE-NEXT: xxswapd vs2, vs0
248 ; PWR10BE-NEXT: xxsldwi vs0, vs0, vs0, 3
249 ; PWR10BE-NEXT: xscvspdpn f2, vs2
250 ; PWR10BE-NEXT: xscvspdpn f0, vs0
251 ; PWR10BE-NEXT: xsmindp f1, f1, f2
252 ; PWR10BE-NEXT: xsmindp f1, f1, f0
255 %0 = call float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a)
259 define dso_local float @v8f32_fast(<8 x float> %a) local_unnamed_addr #0 {
260 ; PWR9LE-LABEL: v8f32_fast:
261 ; PWR9LE: # %bb.0: # %entry
262 ; PWR9LE-NEXT: xvminsp vs0, v2, v3
263 ; PWR9LE-NEXT: xxswapd v2, vs0
264 ; PWR9LE-NEXT: xvminsp vs0, vs0, v2
265 ; PWR9LE-NEXT: xxspltw vs1, vs0, 2
266 ; PWR9LE-NEXT: xvminsp vs0, vs0, vs1
267 ; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
268 ; PWR9LE-NEXT: xscvspdpn f1, vs0
271 ; PWR9BE-LABEL: v8f32_fast:
272 ; PWR9BE: # %bb.0: # %entry
273 ; PWR9BE-NEXT: xvminsp vs0, v2, v3
274 ; PWR9BE-NEXT: xxswapd v2, vs0
275 ; PWR9BE-NEXT: xvminsp vs0, vs0, v2
276 ; PWR9BE-NEXT: xxspltw vs1, vs0, 1
277 ; PWR9BE-NEXT: xvminsp vs0, vs0, vs1
278 ; PWR9BE-NEXT: xscvspdpn f1, vs0
281 ; PWR10LE-LABEL: v8f32_fast:
282 ; PWR10LE: # %bb.0: # %entry
283 ; PWR10LE-NEXT: xvminsp vs0, v2, v3
284 ; PWR10LE-NEXT: xxswapd v2, vs0
285 ; PWR10LE-NEXT: xvminsp vs0, vs0, v2
286 ; PWR10LE-NEXT: xxspltw vs1, vs0, 2
287 ; PWR10LE-NEXT: xvminsp vs0, vs0, vs1
288 ; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
289 ; PWR10LE-NEXT: xscvspdpn f1, vs0
292 ; PWR10BE-LABEL: v8f32_fast:
293 ; PWR10BE: # %bb.0: # %entry
294 ; PWR10BE-NEXT: xvminsp vs0, v2, v3
295 ; PWR10BE-NEXT: xxswapd v2, vs0
296 ; PWR10BE-NEXT: xvminsp vs0, vs0, v2
297 ; PWR10BE-NEXT: xxspltw vs1, vs0, 1
298 ; PWR10BE-NEXT: xvminsp vs0, vs0, vs1
299 ; PWR10BE-NEXT: xscvspdpn f1, vs0
302 %0 = call fast float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a)
306 define dso_local float @v16f32(<16 x float> %a) local_unnamed_addr #0 {
307 ; PWR9LE-LABEL: v16f32:
308 ; PWR9LE: # %bb.0: # %entry
309 ; PWR9LE-NEXT: xvminsp vs0, v3, v5
310 ; PWR9LE-NEXT: xvminsp vs1, v2, v4
311 ; PWR9LE-NEXT: xvminsp vs0, vs1, vs0
312 ; PWR9LE-NEXT: xxswapd vs1, vs0
313 ; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 3
314 ; PWR9LE-NEXT: xscvspdpn f1, vs1
315 ; PWR9LE-NEXT: xscvspdpn f2, vs2
316 ; PWR9LE-NEXT: xsmindp f1, f2, f1
317 ; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 1
318 ; PWR9LE-NEXT: xscvspdpn f0, vs0
319 ; PWR9LE-NEXT: xscvspdpn f2, vs2
320 ; PWR9LE-NEXT: xsmindp f1, f1, f2
321 ; PWR9LE-NEXT: xsmindp f1, f1, f0
324 ; PWR9BE-LABEL: v16f32:
325 ; PWR9BE: # %bb.0: # %entry
326 ; PWR9BE-NEXT: xvminsp vs0, v3, v5
327 ; PWR9BE-NEXT: xvminsp vs1, v2, v4
328 ; PWR9BE-NEXT: xvminsp vs0, vs1, vs0
329 ; PWR9BE-NEXT: xxsldwi vs2, vs0, vs0, 1
330 ; PWR9BE-NEXT: xscvspdpn f1, vs0
331 ; PWR9BE-NEXT: xscvspdpn f2, vs2
332 ; PWR9BE-NEXT: xsmindp f1, f1, f2
333 ; PWR9BE-NEXT: xxswapd vs2, vs0
334 ; PWR9BE-NEXT: xxsldwi vs0, vs0, vs0, 3
335 ; PWR9BE-NEXT: xscvspdpn f2, vs2
336 ; PWR9BE-NEXT: xscvspdpn f0, vs0
337 ; PWR9BE-NEXT: xsmindp f1, f1, f2
338 ; PWR9BE-NEXT: xsmindp f1, f1, f0
341 ; PWR10LE-LABEL: v16f32:
342 ; PWR10LE: # %bb.0: # %entry
343 ; PWR10LE-NEXT: xvminsp vs0, v3, v5
344 ; PWR10LE-NEXT: xvminsp vs1, v2, v4
345 ; PWR10LE-NEXT: xvminsp vs0, vs1, vs0
346 ; PWR10LE-NEXT: xxswapd vs1, vs0
347 ; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 3
348 ; PWR10LE-NEXT: xscvspdpn f1, vs1
349 ; PWR10LE-NEXT: xscvspdpn f2, vs2
350 ; PWR10LE-NEXT: xsmindp f1, f2, f1
351 ; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 1
352 ; PWR10LE-NEXT: xscvspdpn f0, vs0
353 ; PWR10LE-NEXT: xscvspdpn f2, vs2
354 ; PWR10LE-NEXT: xsmindp f1, f1, f2
355 ; PWR10LE-NEXT: xsmindp f1, f1, f0
358 ; PWR10BE-LABEL: v16f32:
359 ; PWR10BE: # %bb.0: # %entry
360 ; PWR10BE-NEXT: xvminsp vs0, v3, v5
361 ; PWR10BE-NEXT: xvminsp vs1, v2, v4
362 ; PWR10BE-NEXT: xvminsp vs0, vs1, vs0
363 ; PWR10BE-NEXT: xxsldwi vs2, vs0, vs0, 1
364 ; PWR10BE-NEXT: xscvspdpn f1, vs0
365 ; PWR10BE-NEXT: xscvspdpn f2, vs2
366 ; PWR10BE-NEXT: xsmindp f1, f1, f2
367 ; PWR10BE-NEXT: xxswapd vs2, vs0
368 ; PWR10BE-NEXT: xxsldwi vs0, vs0, vs0, 3
369 ; PWR10BE-NEXT: xscvspdpn f2, vs2
370 ; PWR10BE-NEXT: xscvspdpn f0, vs0
371 ; PWR10BE-NEXT: xsmindp f1, f1, f2
372 ; PWR10BE-NEXT: xsmindp f1, f1, f0
375 %0 = call float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a)
379 define dso_local float @v16f32_fast(<16 x float> %a) local_unnamed_addr #0 {
380 ; PWR9LE-LABEL: v16f32_fast:
381 ; PWR9LE: # %bb.0: # %entry
382 ; PWR9LE-NEXT: xvminsp vs0, v3, v5
383 ; PWR9LE-NEXT: xvminsp vs1, v2, v4
384 ; PWR9LE-NEXT: xvminsp vs0, vs1, vs0
385 ; PWR9LE-NEXT: xxswapd v2, vs0
386 ; PWR9LE-NEXT: xvminsp vs0, vs0, v2
387 ; PWR9LE-NEXT: xxspltw vs1, vs0, 2
388 ; PWR9LE-NEXT: xvminsp vs0, vs0, vs1
389 ; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
390 ; PWR9LE-NEXT: xscvspdpn f1, vs0
393 ; PWR9BE-LABEL: v16f32_fast:
394 ; PWR9BE: # %bb.0: # %entry
395 ; PWR9BE-NEXT: xvminsp vs0, v3, v5
396 ; PWR9BE-NEXT: xvminsp vs1, v2, v4
397 ; PWR9BE-NEXT: xvminsp vs0, vs1, vs0
398 ; PWR9BE-NEXT: xxswapd v2, vs0
399 ; PWR9BE-NEXT: xvminsp vs0, vs0, v2
400 ; PWR9BE-NEXT: xxspltw vs1, vs0, 1
401 ; PWR9BE-NEXT: xvminsp vs0, vs0, vs1
402 ; PWR9BE-NEXT: xscvspdpn f1, vs0
405 ; PWR10LE-LABEL: v16f32_fast:
406 ; PWR10LE: # %bb.0: # %entry
407 ; PWR10LE-NEXT: xvminsp vs0, v3, v5
408 ; PWR10LE-NEXT: xvminsp vs1, v2, v4
409 ; PWR10LE-NEXT: xvminsp vs0, vs1, vs0
410 ; PWR10LE-NEXT: xxswapd v2, vs0
411 ; PWR10LE-NEXT: xvminsp vs0, vs0, v2
412 ; PWR10LE-NEXT: xxspltw vs1, vs0, 2
413 ; PWR10LE-NEXT: xvminsp vs0, vs0, vs1
414 ; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
415 ; PWR10LE-NEXT: xscvspdpn f1, vs0
418 ; PWR10BE-LABEL: v16f32_fast:
419 ; PWR10BE: # %bb.0: # %entry
420 ; PWR10BE-NEXT: xvminsp vs0, v3, v5
421 ; PWR10BE-NEXT: xvminsp vs1, v2, v4
422 ; PWR10BE-NEXT: xvminsp vs0, vs1, vs0
423 ; PWR10BE-NEXT: xxswapd v2, vs0
424 ; PWR10BE-NEXT: xvminsp vs0, vs0, v2
425 ; PWR10BE-NEXT: xxspltw vs1, vs0, 1
426 ; PWR10BE-NEXT: xvminsp vs0, vs0, vs1
427 ; PWR10BE-NEXT: xscvspdpn f1, vs0
430 %0 = call fast float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a)
434 define dso_local float @v32f32(<32 x float> %a) local_unnamed_addr #0 {
435 ; PWR9LE-LABEL: v32f32:
436 ; PWR9LE: # %bb.0: # %entry
437 ; PWR9LE-NEXT: xvminsp vs0, v5, v9
438 ; PWR9LE-NEXT: xvminsp vs1, v3, v7
439 ; PWR9LE-NEXT: xvminsp vs2, v2, v6
440 ; PWR9LE-NEXT: xvminsp vs0, vs1, vs0
441 ; PWR9LE-NEXT: xvminsp vs1, v4, v8
442 ; PWR9LE-NEXT: xvminsp vs1, vs2, vs1
443 ; PWR9LE-NEXT: xvminsp vs0, vs1, vs0
444 ; PWR9LE-NEXT: xxswapd vs1, vs0
445 ; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 3
446 ; PWR9LE-NEXT: xscvspdpn f1, vs1
447 ; PWR9LE-NEXT: xscvspdpn f2, vs2
448 ; PWR9LE-NEXT: xsmindp f1, f2, f1
449 ; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 1
450 ; PWR9LE-NEXT: xscvspdpn f0, vs0
451 ; PWR9LE-NEXT: xscvspdpn f2, vs2
452 ; PWR9LE-NEXT: xsmindp f1, f1, f2
453 ; PWR9LE-NEXT: xsmindp f1, f1, f0
456 ; PWR9BE-LABEL: v32f32:
457 ; PWR9BE: # %bb.0: # %entry
458 ; PWR9BE-NEXT: xvminsp vs0, v5, v9
459 ; PWR9BE-NEXT: xvminsp vs1, v3, v7
460 ; PWR9BE-NEXT: xvminsp vs2, v2, v6
461 ; PWR9BE-NEXT: xvminsp vs0, vs1, vs0
462 ; PWR9BE-NEXT: xvminsp vs1, v4, v8
463 ; PWR9BE-NEXT: xvminsp vs1, vs2, vs1
464 ; PWR9BE-NEXT: xvminsp vs0, vs1, vs0
465 ; PWR9BE-NEXT: xxsldwi vs2, vs0, vs0, 1
466 ; PWR9BE-NEXT: xscvspdpn f1, vs0
467 ; PWR9BE-NEXT: xscvspdpn f2, vs2
468 ; PWR9BE-NEXT: xsmindp f1, f1, f2
469 ; PWR9BE-NEXT: xxswapd vs2, vs0
470 ; PWR9BE-NEXT: xxsldwi vs0, vs0, vs0, 3
471 ; PWR9BE-NEXT: xscvspdpn f2, vs2
472 ; PWR9BE-NEXT: xscvspdpn f0, vs0
473 ; PWR9BE-NEXT: xsmindp f1, f1, f2
474 ; PWR9BE-NEXT: xsmindp f1, f1, f0
477 ; PWR10LE-LABEL: v32f32:
478 ; PWR10LE: # %bb.0: # %entry
479 ; PWR10LE-NEXT: xvminsp vs0, v5, v9
480 ; PWR10LE-NEXT: xvminsp vs1, v3, v7
481 ; PWR10LE-NEXT: xvminsp vs2, v2, v6
482 ; PWR10LE-NEXT: xvminsp vs0, vs1, vs0
483 ; PWR10LE-NEXT: xvminsp vs1, v4, v8
484 ; PWR10LE-NEXT: xvminsp vs1, vs2, vs1
485 ; PWR10LE-NEXT: xvminsp vs0, vs1, vs0
486 ; PWR10LE-NEXT: xxswapd vs1, vs0
487 ; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 3
488 ; PWR10LE-NEXT: xscvspdpn f1, vs1
489 ; PWR10LE-NEXT: xscvspdpn f2, vs2
490 ; PWR10LE-NEXT: xsmindp f1, f2, f1
491 ; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 1
492 ; PWR10LE-NEXT: xscvspdpn f0, vs0
493 ; PWR10LE-NEXT: xscvspdpn f2, vs2
494 ; PWR10LE-NEXT: xsmindp f1, f1, f2
495 ; PWR10LE-NEXT: xsmindp f1, f1, f0
498 ; PWR10BE-LABEL: v32f32:
499 ; PWR10BE: # %bb.0: # %entry
500 ; PWR10BE-NEXT: xvminsp vs0, v5, v9
501 ; PWR10BE-NEXT: xvminsp vs1, v3, v7
502 ; PWR10BE-NEXT: xvminsp vs2, v2, v6
503 ; PWR10BE-NEXT: xvminsp vs0, vs1, vs0
504 ; PWR10BE-NEXT: xvminsp vs1, v4, v8
505 ; PWR10BE-NEXT: xvminsp vs1, vs2, vs1
506 ; PWR10BE-NEXT: xvminsp vs0, vs1, vs0
507 ; PWR10BE-NEXT: xxsldwi vs2, vs0, vs0, 1
508 ; PWR10BE-NEXT: xscvspdpn f1, vs0
509 ; PWR10BE-NEXT: xscvspdpn f2, vs2
510 ; PWR10BE-NEXT: xsmindp f1, f1, f2
511 ; PWR10BE-NEXT: xxswapd vs2, vs0
512 ; PWR10BE-NEXT: xxsldwi vs0, vs0, vs0, 3
513 ; PWR10BE-NEXT: xscvspdpn f2, vs2
514 ; PWR10BE-NEXT: xscvspdpn f0, vs0
515 ; PWR10BE-NEXT: xsmindp f1, f1, f2
516 ; PWR10BE-NEXT: xsmindp f1, f1, f0
519 %0 = call float @llvm.vector.reduce.fmin.v32f32(<32 x float> %a)
523 define dso_local float @v32f32_fast(<32 x float> %a) local_unnamed_addr #0 {
524 ; PWR9LE-LABEL: v32f32_fast:
525 ; PWR9LE: # %bb.0: # %entry
526 ; PWR9LE-NEXT: xvminsp vs0, v4, v8
527 ; PWR9LE-NEXT: xvminsp vs1, v2, v6
528 ; PWR9LE-NEXT: xvminsp vs2, v5, v9
529 ; PWR9LE-NEXT: xvminsp vs3, v3, v7
530 ; PWR9LE-NEXT: xvminsp vs2, vs3, vs2
531 ; PWR9LE-NEXT: xvminsp vs0, vs1, vs0
532 ; PWR9LE-NEXT: xvminsp vs0, vs0, vs2
533 ; PWR9LE-NEXT: xxswapd v2, vs0
534 ; PWR9LE-NEXT: xvminsp vs0, vs0, v2
535 ; PWR9LE-NEXT: xxspltw vs1, vs0, 2
536 ; PWR9LE-NEXT: xvminsp vs0, vs0, vs1
537 ; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3
538 ; PWR9LE-NEXT: xscvspdpn f1, vs0
541 ; PWR9BE-LABEL: v32f32_fast:
542 ; PWR9BE: # %bb.0: # %entry
543 ; PWR9BE-NEXT: xvminsp vs0, v4, v8
544 ; PWR9BE-NEXT: xvminsp vs1, v2, v6
545 ; PWR9BE-NEXT: xvminsp vs2, v5, v9
546 ; PWR9BE-NEXT: xvminsp vs3, v3, v7
547 ; PWR9BE-NEXT: xvminsp vs2, vs3, vs2
548 ; PWR9BE-NEXT: xvminsp vs0, vs1, vs0
549 ; PWR9BE-NEXT: xvminsp vs0, vs0, vs2
550 ; PWR9BE-NEXT: xxswapd v2, vs0
551 ; PWR9BE-NEXT: xvminsp vs0, vs0, v2
552 ; PWR9BE-NEXT: xxspltw vs1, vs0, 1
553 ; PWR9BE-NEXT: xvminsp vs0, vs0, vs1
554 ; PWR9BE-NEXT: xscvspdpn f1, vs0
557 ; PWR10LE-LABEL: v32f32_fast:
558 ; PWR10LE: # %bb.0: # %entry
559 ; PWR10LE-NEXT: xvminsp vs0, v4, v8
560 ; PWR10LE-NEXT: xvminsp vs1, v2, v6
561 ; PWR10LE-NEXT: xvminsp vs2, v5, v9
562 ; PWR10LE-NEXT: xvminsp vs3, v3, v7
563 ; PWR10LE-NEXT: xvminsp vs2, vs3, vs2
564 ; PWR10LE-NEXT: xvminsp vs0, vs1, vs0
565 ; PWR10LE-NEXT: xvminsp vs0, vs0, vs2
566 ; PWR10LE-NEXT: xxswapd v2, vs0
567 ; PWR10LE-NEXT: xvminsp vs0, vs0, v2
568 ; PWR10LE-NEXT: xxspltw vs1, vs0, 2
569 ; PWR10LE-NEXT: xvminsp vs0, vs0, vs1
570 ; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3
571 ; PWR10LE-NEXT: xscvspdpn f1, vs0
574 ; PWR10BE-LABEL: v32f32_fast:
575 ; PWR10BE: # %bb.0: # %entry
576 ; PWR10BE-NEXT: xvminsp vs0, v4, v8
577 ; PWR10BE-NEXT: xvminsp vs1, v2, v6
578 ; PWR10BE-NEXT: xvminsp vs2, v5, v9
579 ; PWR10BE-NEXT: xvminsp vs3, v3, v7
580 ; PWR10BE-NEXT: xvminsp vs2, vs3, vs2
581 ; PWR10BE-NEXT: xvminsp vs0, vs1, vs0
582 ; PWR10BE-NEXT: xvminsp vs0, vs0, vs2
583 ; PWR10BE-NEXT: xxswapd v2, vs0
584 ; PWR10BE-NEXT: xvminsp vs0, vs0, v2
585 ; PWR10BE-NEXT: xxspltw vs1, vs0, 1
586 ; PWR10BE-NEXT: xvminsp vs0, vs0, vs1
587 ; PWR10BE-NEXT: xscvspdpn f1, vs0
590 %0 = call fast float @llvm.vector.reduce.fmin.v32f32(<32 x float> %a)
594 declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) #0
595 declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) #0
596 declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) #0
597 declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>) #0
598 declare float @llvm.vector.reduce.fmin.v32f32(<32 x float>) #0
603 define dso_local double @v2f64(<2 x double> %a) local_unnamed_addr #0 {
604 ; PWR9LE-LABEL: v2f64:
605 ; PWR9LE: # %bb.0: # %entry
606 ; PWR9LE-NEXT: xxswapd vs0, v2
607 ; PWR9LE-NEXT: xsmindp f1, f0, v2
610 ; PWR9BE-LABEL: v2f64:
611 ; PWR9BE: # %bb.0: # %entry
612 ; PWR9BE-NEXT: xxswapd vs0, v2
613 ; PWR9BE-NEXT: xsmindp f1, v2, f0
616 ; PWR10LE-LABEL: v2f64:
617 ; PWR10LE: # %bb.0: # %entry
618 ; PWR10LE-NEXT: xxswapd vs0, v2
619 ; PWR10LE-NEXT: xsmindp f1, f0, v2
622 ; PWR10BE-LABEL: v2f64:
623 ; PWR10BE: # %bb.0: # %entry
624 ; PWR10BE-NEXT: xxswapd vs0, v2
625 ; PWR10BE-NEXT: xsmindp f1, v2, f0
628 %0 = call double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
632 define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 {
633 ; PWR9LE-LABEL: v2f64_fast:
634 ; PWR9LE: # %bb.0: # %entry
635 ; PWR9LE-NEXT: xxswapd vs0, v2
636 ; PWR9LE-NEXT: xvmindp vs0, v2, vs0
637 ; PWR9LE-NEXT: xxswapd vs1, vs0
640 ; PWR9BE-LABEL: v2f64_fast:
641 ; PWR9BE: # %bb.0: # %entry
642 ; PWR9BE-NEXT: xxswapd vs0, v2
643 ; PWR9BE-NEXT: xvmindp vs1, v2, vs0
646 ; PWR10LE-LABEL: v2f64_fast:
647 ; PWR10LE: # %bb.0: # %entry
648 ; PWR10LE-NEXT: xxswapd vs0, v2
649 ; PWR10LE-NEXT: xvmindp vs0, v2, vs0
650 ; PWR10LE-NEXT: xxswapd vs1, vs0
653 ; PWR10BE-LABEL: v2f64_fast:
654 ; PWR10BE: # %bb.0: # %entry
655 ; PWR10BE-NEXT: xxswapd vs0, v2
656 ; PWR10BE-NEXT: xvmindp vs1, v2, vs0
659 %0 = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
663 define dso_local double @v4f64(<4 x double> %a) local_unnamed_addr #0 {
664 ; PWR9LE-LABEL: v4f64:
665 ; PWR9LE: # %bb.0: # %entry
666 ; PWR9LE-NEXT: xvmindp vs0, v2, v3
667 ; PWR9LE-NEXT: xxswapd vs1, vs0
668 ; PWR9LE-NEXT: xsmindp f1, f1, f0
671 ; PWR9BE-LABEL: v4f64:
672 ; PWR9BE: # %bb.0: # %entry
673 ; PWR9BE-NEXT: xvmindp vs0, v2, v3
674 ; PWR9BE-NEXT: xxswapd vs1, vs0
675 ; PWR9BE-NEXT: xsmindp f1, f0, f1
678 ; PWR10LE-LABEL: v4f64:
679 ; PWR10LE: # %bb.0: # %entry
680 ; PWR10LE-NEXT: xvmindp vs0, v2, v3
681 ; PWR10LE-NEXT: xxswapd vs1, vs0
682 ; PWR10LE-NEXT: xsmindp f1, f1, f0
685 ; PWR10BE-LABEL: v4f64:
686 ; PWR10BE: # %bb.0: # %entry
687 ; PWR10BE-NEXT: xvmindp vs0, v2, v3
688 ; PWR10BE-NEXT: xxswapd vs1, vs0
689 ; PWR10BE-NEXT: xsmindp f1, f0, f1
692 %0 = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a)
696 define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
697 ; PWR9LE-LABEL: v4f64_fast:
698 ; PWR9LE: # %bb.0: # %entry
699 ; PWR9LE-NEXT: xvmindp vs0, v2, v3
700 ; PWR9LE-NEXT: xxswapd vs1, vs0
701 ; PWR9LE-NEXT: xvmindp vs0, vs0, vs1
702 ; PWR9LE-NEXT: xxswapd vs1, vs0
705 ; PWR9BE-LABEL: v4f64_fast:
706 ; PWR9BE: # %bb.0: # %entry
707 ; PWR9BE-NEXT: xvmindp vs0, v2, v3
708 ; PWR9BE-NEXT: xxswapd vs1, vs0
709 ; PWR9BE-NEXT: xvmindp vs1, vs0, vs1
712 ; PWR10LE-LABEL: v4f64_fast:
713 ; PWR10LE: # %bb.0: # %entry
714 ; PWR10LE-NEXT: xvmindp vs0, v2, v3
715 ; PWR10LE-NEXT: xxswapd vs1, vs0
716 ; PWR10LE-NEXT: xvmindp vs0, vs0, vs1
717 ; PWR10LE-NEXT: xxswapd vs1, vs0
720 ; PWR10BE-LABEL: v4f64_fast:
721 ; PWR10BE: # %bb.0: # %entry
722 ; PWR10BE-NEXT: xvmindp vs0, v2, v3
723 ; PWR10BE-NEXT: xxswapd vs1, vs0
724 ; PWR10BE-NEXT: xvmindp vs1, vs0, vs1
727 %0 = call fast double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a)
731 define dso_local double @v8f64(<8 x double> %a) local_unnamed_addr #0 {
732 ; PWR9LE-LABEL: v8f64:
733 ; PWR9LE: # %bb.0: # %entry
734 ; PWR9LE-NEXT: xvmindp vs0, v3, v5
735 ; PWR9LE-NEXT: xvmindp vs1, v2, v4
736 ; PWR9LE-NEXT: xvmindp vs0, vs1, vs0
737 ; PWR9LE-NEXT: xxswapd vs1, vs0
738 ; PWR9LE-NEXT: xsmindp f1, f1, f0
741 ; PWR9BE-LABEL: v8f64:
742 ; PWR9BE: # %bb.0: # %entry
743 ; PWR9BE-NEXT: xvmindp vs0, v3, v5
744 ; PWR9BE-NEXT: xvmindp vs1, v2, v4
745 ; PWR9BE-NEXT: xvmindp vs0, vs1, vs0
746 ; PWR9BE-NEXT: xxswapd vs1, vs0
747 ; PWR9BE-NEXT: xsmindp f1, f0, f1
750 ; PWR10LE-LABEL: v8f64:
751 ; PWR10LE: # %bb.0: # %entry
752 ; PWR10LE-NEXT: xvmindp vs0, v3, v5
753 ; PWR10LE-NEXT: xvmindp vs1, v2, v4
754 ; PWR10LE-NEXT: xvmindp vs0, vs1, vs0
755 ; PWR10LE-NEXT: xxswapd vs1, vs0
756 ; PWR10LE-NEXT: xsmindp f1, f1, f0
759 ; PWR10BE-LABEL: v8f64:
760 ; PWR10BE: # %bb.0: # %entry
761 ; PWR10BE-NEXT: xvmindp vs0, v3, v5
762 ; PWR10BE-NEXT: xvmindp vs1, v2, v4
763 ; PWR10BE-NEXT: xvmindp vs0, vs1, vs0
764 ; PWR10BE-NEXT: xxswapd vs1, vs0
765 ; PWR10BE-NEXT: xsmindp f1, f0, f1
768 %0 = call double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a)
772 define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
773 ; PWR9LE-LABEL: v8f64_fast:
774 ; PWR9LE: # %bb.0: # %entry
775 ; PWR9LE-NEXT: xvmindp vs0, v3, v5
776 ; PWR9LE-NEXT: xvmindp vs1, v2, v4
777 ; PWR9LE-NEXT: xvmindp vs0, vs1, vs0
778 ; PWR9LE-NEXT: xxswapd vs1, vs0
779 ; PWR9LE-NEXT: xvmindp vs0, vs0, vs1
780 ; PWR9LE-NEXT: xxswapd vs1, vs0
783 ; PWR9BE-LABEL: v8f64_fast:
784 ; PWR9BE: # %bb.0: # %entry
785 ; PWR9BE-NEXT: xvmindp vs0, v3, v5
786 ; PWR9BE-NEXT: xvmindp vs1, v2, v4
787 ; PWR9BE-NEXT: xvmindp vs0, vs1, vs0
788 ; PWR9BE-NEXT: xxswapd vs1, vs0
789 ; PWR9BE-NEXT: xvmindp vs1, vs0, vs1
792 ; PWR10LE-LABEL: v8f64_fast:
793 ; PWR10LE: # %bb.0: # %entry
794 ; PWR10LE-NEXT: xvmindp vs0, v3, v5
795 ; PWR10LE-NEXT: xvmindp vs1, v2, v4
796 ; PWR10LE-NEXT: xvmindp vs0, vs1, vs0
797 ; PWR10LE-NEXT: xxswapd vs1, vs0
798 ; PWR10LE-NEXT: xvmindp vs0, vs0, vs1
799 ; PWR10LE-NEXT: xxswapd vs1, vs0
802 ; PWR10BE-LABEL: v8f64_fast:
803 ; PWR10BE: # %bb.0: # %entry
804 ; PWR10BE-NEXT: xvmindp vs0, v3, v5
805 ; PWR10BE-NEXT: xvmindp vs1, v2, v4
806 ; PWR10BE-NEXT: xvmindp vs0, vs1, vs0
807 ; PWR10BE-NEXT: xxswapd vs1, vs0
808 ; PWR10BE-NEXT: xvmindp vs1, vs0, vs1
811 %0 = call fast double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a)
815 define dso_local double @v16f64(<16 x double> %a) local_unnamed_addr #0 {
816 ; PWR9LE-LABEL: v16f64:
817 ; PWR9LE: # %bb.0: # %entry
818 ; PWR9LE-NEXT: xvmindp vs0, v5, v9
819 ; PWR9LE-NEXT: xvmindp vs1, v3, v7
820 ; PWR9LE-NEXT: xvmindp vs2, v2, v6
821 ; PWR9LE-NEXT: xvmindp vs0, vs1, vs0
822 ; PWR9LE-NEXT: xvmindp vs1, v4, v8
823 ; PWR9LE-NEXT: xvmindp vs1, vs2, vs1
824 ; PWR9LE-NEXT: xvmindp vs0, vs1, vs0
825 ; PWR9LE-NEXT: xxswapd vs1, vs0
826 ; PWR9LE-NEXT: xsmindp f1, f1, f0
829 ; PWR9BE-LABEL: v16f64:
830 ; PWR9BE: # %bb.0: # %entry
831 ; PWR9BE-NEXT: xvmindp vs0, v5, v9
832 ; PWR9BE-NEXT: xvmindp vs1, v3, v7
833 ; PWR9BE-NEXT: xvmindp vs2, v2, v6
834 ; PWR9BE-NEXT: xvmindp vs0, vs1, vs0
835 ; PWR9BE-NEXT: xvmindp vs1, v4, v8
836 ; PWR9BE-NEXT: xvmindp vs1, vs2, vs1
837 ; PWR9BE-NEXT: xvmindp vs0, vs1, vs0
838 ; PWR9BE-NEXT: xxswapd vs1, vs0
839 ; PWR9BE-NEXT: xsmindp f1, f0, f1
842 ; PWR10LE-LABEL: v16f64:
843 ; PWR10LE: # %bb.0: # %entry
844 ; PWR10LE-NEXT: xvmindp vs0, v5, v9
845 ; PWR10LE-NEXT: xvmindp vs1, v3, v7
846 ; PWR10LE-NEXT: xvmindp vs2, v2, v6
847 ; PWR10LE-NEXT: xvmindp vs0, vs1, vs0
848 ; PWR10LE-NEXT: xvmindp vs1, v4, v8
849 ; PWR10LE-NEXT: xvmindp vs1, vs2, vs1
850 ; PWR10LE-NEXT: xvmindp vs0, vs1, vs0
851 ; PWR10LE-NEXT: xxswapd vs1, vs0
852 ; PWR10LE-NEXT: xsmindp f1, f1, f0
855 ; PWR10BE-LABEL: v16f64:
856 ; PWR10BE: # %bb.0: # %entry
857 ; PWR10BE-NEXT: xvmindp vs0, v5, v9
858 ; PWR10BE-NEXT: xvmindp vs1, v3, v7
859 ; PWR10BE-NEXT: xvmindp vs2, v2, v6
860 ; PWR10BE-NEXT: xvmindp vs0, vs1, vs0
861 ; PWR10BE-NEXT: xvmindp vs1, v4, v8
862 ; PWR10BE-NEXT: xvmindp vs1, vs2, vs1
863 ; PWR10BE-NEXT: xvmindp vs0, vs1, vs0
864 ; PWR10BE-NEXT: xxswapd vs1, vs0
865 ; PWR10BE-NEXT: xsmindp f1, f0, f1
868 %0 = call double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a)
872 define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
873 ; PWR9LE-LABEL: v16f64_fast:
874 ; PWR9LE: # %bb.0: # %entry
875 ; PWR9LE-NEXT: xvmindp vs0, v4, v8
876 ; PWR9LE-NEXT: xvmindp vs1, v2, v6
877 ; PWR9LE-NEXT: xvmindp vs2, v5, v9
878 ; PWR9LE-NEXT: xvmindp vs3, v3, v7
879 ; PWR9LE-NEXT: xvmindp vs2, vs3, vs2
880 ; PWR9LE-NEXT: xvmindp vs0, vs1, vs0
881 ; PWR9LE-NEXT: xvmindp vs0, vs0, vs2
882 ; PWR9LE-NEXT: xxswapd vs1, vs0
883 ; PWR9LE-NEXT: xvmindp vs0, vs0, vs1
884 ; PWR9LE-NEXT: xxswapd vs1, vs0
887 ; PWR9BE-LABEL: v16f64_fast:
888 ; PWR9BE: # %bb.0: # %entry
889 ; PWR9BE-NEXT: xvmindp vs0, v4, v8
890 ; PWR9BE-NEXT: xvmindp vs1, v2, v6
891 ; PWR9BE-NEXT: xvmindp vs2, v5, v9
892 ; PWR9BE-NEXT: xvmindp vs3, v3, v7
893 ; PWR9BE-NEXT: xvmindp vs2, vs3, vs2
894 ; PWR9BE-NEXT: xvmindp vs0, vs1, vs0
895 ; PWR9BE-NEXT: xvmindp vs0, vs0, vs2
896 ; PWR9BE-NEXT: xxswapd vs1, vs0
897 ; PWR9BE-NEXT: xvmindp vs1, vs0, vs1
900 ; PWR10LE-LABEL: v16f64_fast:
901 ; PWR10LE: # %bb.0: # %entry
902 ; PWR10LE-NEXT: xvmindp vs0, v4, v8
903 ; PWR10LE-NEXT: xvmindp vs1, v2, v6
904 ; PWR10LE-NEXT: xvmindp vs2, v5, v9
905 ; PWR10LE-NEXT: xvmindp vs3, v3, v7
906 ; PWR10LE-NEXT: xvmindp vs2, vs3, vs2
907 ; PWR10LE-NEXT: xvmindp vs0, vs1, vs0
908 ; PWR10LE-NEXT: xvmindp vs0, vs0, vs2
909 ; PWR10LE-NEXT: xxswapd vs1, vs0
910 ; PWR10LE-NEXT: xvmindp vs0, vs0, vs1
911 ; PWR10LE-NEXT: xxswapd vs1, vs0
914 ; PWR10BE-LABEL: v16f64_fast:
915 ; PWR10BE: # %bb.0: # %entry
916 ; PWR10BE-NEXT: xvmindp vs0, v4, v8
917 ; PWR10BE-NEXT: xvmindp vs1, v2, v6
918 ; PWR10BE-NEXT: xvmindp vs2, v5, v9
919 ; PWR10BE-NEXT: xvmindp vs3, v3, v7
920 ; PWR10BE-NEXT: xvmindp vs2, vs3, vs2
921 ; PWR10BE-NEXT: xvmindp vs0, vs1, vs0
922 ; PWR10BE-NEXT: xvmindp vs0, vs0, vs2
923 ; PWR10BE-NEXT: xxswapd vs1, vs0
924 ; PWR10BE-NEXT: xvmindp vs1, vs0, vs1
927 %0 = call fast double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a)
931 define dso_local double @v32f64(<32 x double> %a) local_unnamed_addr #0 {
932 ; PWR9LE-LABEL: v32f64:
933 ; PWR9LE: # %bb.0: # %entry
934 ; PWR9LE-NEXT: lxv vs3, 272(r1)
935 ; PWR9LE-NEXT: lxv vs2, 240(r1)
936 ; PWR9LE-NEXT: xvmindp vs4, v5, v13
937 ; PWR9LE-NEXT: lxv vs1, 256(r1)
938 ; PWR9LE-NEXT: lxv vs0, 224(r1)
939 ; PWR9LE-NEXT: xvmindp vs3, v9, vs3
940 ; PWR9LE-NEXT: xvmindp vs2, v7, vs2
941 ; PWR9LE-NEXT: xvmindp vs1, v8, vs1
942 ; PWR9LE-NEXT: xvmindp vs0, v6, vs0
943 ; PWR9LE-NEXT: xvmindp vs3, vs4, vs3
944 ; PWR9LE-NEXT: xvmindp vs4, v3, v11
945 ; PWR9LE-NEXT: xvmindp vs2, vs4, vs2
946 ; PWR9LE-NEXT: xvmindp vs2, vs2, vs3
947 ; PWR9LE-NEXT: xvmindp vs3, v4, v12
948 ; PWR9LE-NEXT: xvmindp vs1, vs3, vs1
949 ; PWR9LE-NEXT: xvmindp vs3, v2, v10
950 ; PWR9LE-NEXT: xvmindp vs0, vs3, vs0
951 ; PWR9LE-NEXT: xvmindp vs0, vs0, vs1
952 ; PWR9LE-NEXT: xvmindp vs0, vs0, vs2
953 ; PWR9LE-NEXT: xxswapd vs1, vs0
954 ; PWR9LE-NEXT: xsmindp f1, f1, f0
957 ; PWR9BE-LABEL: v32f64:
958 ; PWR9BE: # %bb.0: # %entry
959 ; PWR9BE-NEXT: lxv vs3, 288(r1)
960 ; PWR9BE-NEXT: lxv vs2, 256(r1)
961 ; PWR9BE-NEXT: xvmindp vs4, v5, v13
962 ; PWR9BE-NEXT: lxv vs1, 272(r1)
963 ; PWR9BE-NEXT: lxv vs0, 240(r1)
964 ; PWR9BE-NEXT: xvmindp vs3, v9, vs3
965 ; PWR9BE-NEXT: xvmindp vs2, v7, vs2
966 ; PWR9BE-NEXT: xvmindp vs1, v8, vs1
967 ; PWR9BE-NEXT: xvmindp vs0, v6, vs0
968 ; PWR9BE-NEXT: xvmindp vs3, vs4, vs3
969 ; PWR9BE-NEXT: xvmindp vs4, v3, v11
970 ; PWR9BE-NEXT: xvmindp vs2, vs4, vs2
971 ; PWR9BE-NEXT: xvmindp vs2, vs2, vs3
972 ; PWR9BE-NEXT: xvmindp vs3, v4, v12
973 ; PWR9BE-NEXT: xvmindp vs1, vs3, vs1
974 ; PWR9BE-NEXT: xvmindp vs3, v2, v10
975 ; PWR9BE-NEXT: xvmindp vs0, vs3, vs0
976 ; PWR9BE-NEXT: xvmindp vs0, vs0, vs1
977 ; PWR9BE-NEXT: xvmindp vs0, vs0, vs2
978 ; PWR9BE-NEXT: xxswapd vs1, vs0
979 ; PWR9BE-NEXT: xsmindp f1, f0, f1
982 ; PWR10LE-LABEL: v32f64:
983 ; PWR10LE: # %bb.0: # %entry
984 ; PWR10LE-NEXT: lxv vs3, 272(r1)
985 ; PWR10LE-NEXT: lxv vs2, 240(r1)
986 ; PWR10LE-NEXT: xvmindp vs4, v5, v13
987 ; PWR10LE-NEXT: xvmindp vs3, v9, vs3
988 ; PWR10LE-NEXT: lxv vs1, 256(r1)
989 ; PWR10LE-NEXT: xvmindp vs2, v7, vs2
990 ; PWR10LE-NEXT: lxv vs0, 224(r1)
991 ; PWR10LE-NEXT: xvmindp vs1, v8, vs1
992 ; PWR10LE-NEXT: xvmindp vs0, v6, vs0
993 ; PWR10LE-NEXT: xvmindp vs3, vs4, vs3
994 ; PWR10LE-NEXT: xvmindp vs4, v3, v11
995 ; PWR10LE-NEXT: xvmindp vs2, vs4, vs2
996 ; PWR10LE-NEXT: xvmindp vs2, vs2, vs3
997 ; PWR10LE-NEXT: xvmindp vs3, v4, v12
998 ; PWR10LE-NEXT: xvmindp vs1, vs3, vs1
999 ; PWR10LE-NEXT: xvmindp vs3, v2, v10
1000 ; PWR10LE-NEXT: xvmindp vs0, vs3, vs0
1001 ; PWR10LE-NEXT: xvmindp vs0, vs0, vs1
1002 ; PWR10LE-NEXT: xvmindp vs0, vs0, vs2
1003 ; PWR10LE-NEXT: xxswapd vs1, vs0
1004 ; PWR10LE-NEXT: xsmindp f1, f1, f0
1007 ; PWR10BE-LABEL: v32f64:
1008 ; PWR10BE: # %bb.0: # %entry
1009 ; PWR10BE-NEXT: lxv vs3, 288(r1)
1010 ; PWR10BE-NEXT: lxv vs2, 256(r1)
1011 ; PWR10BE-NEXT: xvmindp vs4, v5, v13
1012 ; PWR10BE-NEXT: xvmindp vs3, v9, vs3
1013 ; PWR10BE-NEXT: lxv vs1, 272(r1)
1014 ; PWR10BE-NEXT: xvmindp vs2, v7, vs2
1015 ; PWR10BE-NEXT: lxv vs0, 240(r1)
1016 ; PWR10BE-NEXT: xvmindp vs1, v8, vs1
1017 ; PWR10BE-NEXT: xvmindp vs0, v6, vs0
1018 ; PWR10BE-NEXT: xvmindp vs3, vs4, vs3
1019 ; PWR10BE-NEXT: xvmindp vs4, v3, v11
1020 ; PWR10BE-NEXT: xvmindp vs2, vs4, vs2
1021 ; PWR10BE-NEXT: xvmindp vs2, vs2, vs3
1022 ; PWR10BE-NEXT: xvmindp vs3, v4, v12
1023 ; PWR10BE-NEXT: xvmindp vs1, vs3, vs1
1024 ; PWR10BE-NEXT: xvmindp vs3, v2, v10
1025 ; PWR10BE-NEXT: xvmindp vs0, vs3, vs0
1026 ; PWR10BE-NEXT: xvmindp vs0, vs0, vs1
1027 ; PWR10BE-NEXT: xvmindp vs0, vs0, vs2
1028 ; PWR10BE-NEXT: xxswapd vs1, vs0
1029 ; PWR10BE-NEXT: xsmindp f1, f0, f1
1032 %0 = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %a)
1036 define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
1037 ; PWR9LE-LABEL: v32f64_fast:
1038 ; PWR9LE: # %bb.0: # %entry
1039 ; PWR9LE-NEXT: lxv vs0, 256(r1)
1040 ; PWR9LE-NEXT: lxv vs1, 224(r1)
1041 ; PWR9LE-NEXT: lxv vs2, 272(r1)
1042 ; PWR9LE-NEXT: lxv vs3, 240(r1)
1043 ; PWR9LE-NEXT: xvmindp vs4, v3, v11
1044 ; PWR9LE-NEXT: xvmindp vs5, v5, v13
1045 ; PWR9LE-NEXT: xvmindp vs6, v2, v10
1046 ; PWR9LE-NEXT: xvmindp vs7, v4, v12
1047 ; PWR9LE-NEXT: xvmindp vs3, v7, vs3
1048 ; PWR9LE-NEXT: xvmindp vs2, v9, vs2
1049 ; PWR9LE-NEXT: xvmindp vs1, v6, vs1
1050 ; PWR9LE-NEXT: xvmindp vs0, v8, vs0
1051 ; PWR9LE-NEXT: xvmindp vs0, vs7, vs0
1052 ; PWR9LE-NEXT: xvmindp vs1, vs6, vs1
1053 ; PWR9LE-NEXT: xvmindp vs2, vs5, vs2
1054 ; PWR9LE-NEXT: xvmindp vs3, vs4, vs3
1055 ; PWR9LE-NEXT: xvmindp vs2, vs3, vs2
1056 ; PWR9LE-NEXT: xvmindp vs0, vs1, vs0
1057 ; PWR9LE-NEXT: xvmindp vs0, vs0, vs2
1058 ; PWR9LE-NEXT: xxswapd vs1, vs0
1059 ; PWR9LE-NEXT: xvmindp vs0, vs0, vs1
1060 ; PWR9LE-NEXT: xxswapd vs1, vs0
1063 ; PWR9BE-LABEL: v32f64_fast:
1064 ; PWR9BE: # %bb.0: # %entry
1065 ; PWR9BE-NEXT: lxv vs0, 272(r1)
1066 ; PWR9BE-NEXT: lxv vs1, 240(r1)
1067 ; PWR9BE-NEXT: lxv vs2, 288(r1)
1068 ; PWR9BE-NEXT: lxv vs3, 256(r1)
1069 ; PWR9BE-NEXT: xvmindp vs4, v3, v11
1070 ; PWR9BE-NEXT: xvmindp vs5, v5, v13
1071 ; PWR9BE-NEXT: xvmindp vs6, v2, v10
1072 ; PWR9BE-NEXT: xvmindp vs7, v4, v12
1073 ; PWR9BE-NEXT: xvmindp vs3, v7, vs3
1074 ; PWR9BE-NEXT: xvmindp vs2, v9, vs2
1075 ; PWR9BE-NEXT: xvmindp vs1, v6, vs1
1076 ; PWR9BE-NEXT: xvmindp vs0, v8, vs0
1077 ; PWR9BE-NEXT: xvmindp vs0, vs7, vs0
1078 ; PWR9BE-NEXT: xvmindp vs1, vs6, vs1
1079 ; PWR9BE-NEXT: xvmindp vs2, vs5, vs2
1080 ; PWR9BE-NEXT: xvmindp vs3, vs4, vs3
1081 ; PWR9BE-NEXT: xvmindp vs2, vs3, vs2
1082 ; PWR9BE-NEXT: xvmindp vs0, vs1, vs0
1083 ; PWR9BE-NEXT: xvmindp vs0, vs0, vs2
1084 ; PWR9BE-NEXT: xxswapd vs1, vs0
1085 ; PWR9BE-NEXT: xvmindp vs1, vs0, vs1
1088 ; PWR10LE-LABEL: v32f64_fast:
1089 ; PWR10LE: # %bb.0: # %entry
1090 ; PWR10LE-NEXT: lxv vs0, 256(r1)
1091 ; PWR10LE-NEXT: lxv vs1, 224(r1)
1092 ; PWR10LE-NEXT: xvmindp vs4, v3, v11
1093 ; PWR10LE-NEXT: xvmindp vs5, v5, v13
1094 ; PWR10LE-NEXT: xvmindp vs6, v2, v10
1095 ; PWR10LE-NEXT: xvmindp vs7, v4, v12
1096 ; PWR10LE-NEXT: xvmindp vs1, v6, vs1
1097 ; PWR10LE-NEXT: lxv vs2, 272(r1)
1098 ; PWR10LE-NEXT: lxv vs3, 240(r1)
1099 ; PWR10LE-NEXT: xvmindp vs3, v7, vs3
1100 ; PWR10LE-NEXT: xvmindp vs2, v9, vs2
1101 ; PWR10LE-NEXT: xvmindp vs0, v8, vs0
1102 ; PWR10LE-NEXT: xvmindp vs0, vs7, vs0
1103 ; PWR10LE-NEXT: xvmindp vs1, vs6, vs1
1104 ; PWR10LE-NEXT: xvmindp vs2, vs5, vs2
1105 ; PWR10LE-NEXT: xvmindp vs3, vs4, vs3
1106 ; PWR10LE-NEXT: xvmindp vs2, vs3, vs2
1107 ; PWR10LE-NEXT: xvmindp vs0, vs1, vs0
1108 ; PWR10LE-NEXT: xvmindp vs0, vs0, vs2
1109 ; PWR10LE-NEXT: xxswapd vs1, vs0
1110 ; PWR10LE-NEXT: xvmindp vs0, vs0, vs1
1111 ; PWR10LE-NEXT: xxswapd vs1, vs0
1114 ; PWR10BE-LABEL: v32f64_fast:
1115 ; PWR10BE: # %bb.0: # %entry
1116 ; PWR10BE-NEXT: lxv vs0, 272(r1)
1117 ; PWR10BE-NEXT: lxv vs1, 240(r1)
1118 ; PWR10BE-NEXT: xvmindp vs4, v3, v11
1119 ; PWR10BE-NEXT: xvmindp vs5, v5, v13
1120 ; PWR10BE-NEXT: xvmindp vs6, v2, v10
1121 ; PWR10BE-NEXT: xvmindp vs7, v4, v12
1122 ; PWR10BE-NEXT: xvmindp vs1, v6, vs1
1123 ; PWR10BE-NEXT: lxv vs2, 288(r1)
1124 ; PWR10BE-NEXT: lxv vs3, 256(r1)
1125 ; PWR10BE-NEXT: xvmindp vs3, v7, vs3
1126 ; PWR10BE-NEXT: xvmindp vs2, v9, vs2
1127 ; PWR10BE-NEXT: xvmindp vs0, v8, vs0
1128 ; PWR10BE-NEXT: xvmindp vs0, vs7, vs0
1129 ; PWR10BE-NEXT: xvmindp vs1, vs6, vs1
1130 ; PWR10BE-NEXT: xvmindp vs2, vs5, vs2
1131 ; PWR10BE-NEXT: xvmindp vs3, vs4, vs3
1132 ; PWR10BE-NEXT: xvmindp vs2, vs3, vs2
1133 ; PWR10BE-NEXT: xvmindp vs0, vs1, vs0
1134 ; PWR10BE-NEXT: xvmindp vs0, vs0, vs2
1135 ; PWR10BE-NEXT: xxswapd vs1, vs0
1136 ; PWR10BE-NEXT: xvmindp vs1, vs0, vs1
1139 %0 = call fast double @llvm.vector.reduce.fmin.v32f64(<32 x double> %a)
1143 declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) #0
1144 declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) #0
1145 declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>) #0
1146 declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>) #0
1147 declare double @llvm.vector.reduce.fmin.v32f64(<32 x double>) #0
1149 attributes #0 = { nounwind }