1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
3 ; RUN: -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE
4 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
5 ; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE
6 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
7 ; RUN: -mcpu=pwr10 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR10LE
8 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
9 ; RUN: -mcpu=pwr10 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR10BE
12 ;; Vectors of type i32
14 define dso_local i32 @v2i32(<2 x i32> %a) local_unnamed_addr #0 {
15 ; PWR9LE-LABEL: v2i32:
16 ; PWR9LE: # %bb.0: # %entry
17 ; PWR9LE-NEXT: xxspltw vs0, v2, 2
18 ; PWR9LE-NEXT: li r3, 0
19 ; PWR9LE-NEXT: xxlxor v2, v2, vs0
20 ; PWR9LE-NEXT: vextuwrx r3, r3, v2
23 ; PWR9BE-LABEL: v2i32:
24 ; PWR9BE: # %bb.0: # %entry
25 ; PWR9BE-NEXT: xxspltw vs0, v2, 1
26 ; PWR9BE-NEXT: li r3, 0
27 ; PWR9BE-NEXT: xxlxor v2, v2, vs0
28 ; PWR9BE-NEXT: vextuwlx r3, r3, v2
31 ; PWR10LE-LABEL: v2i32:
32 ; PWR10LE: # %bb.0: # %entry
33 ; PWR10LE-NEXT: xxspltw vs0, v2, 2
34 ; PWR10LE-NEXT: li r3, 0
35 ; PWR10LE-NEXT: xxlxor v2, v2, vs0
36 ; PWR10LE-NEXT: vextuwrx r3, r3, v2
39 ; PWR10BE-LABEL: v2i32:
40 ; PWR10BE: # %bb.0: # %entry
41 ; PWR10BE-NEXT: xxspltw vs0, v2, 1
42 ; PWR10BE-NEXT: li r3, 0
43 ; PWR10BE-NEXT: xxlxor v2, v2, vs0
44 ; PWR10BE-NEXT: vextuwlx r3, r3, v2
47 %0 = call i32 @llvm.vector.reduce.xor.v2i32(<2 x i32> %a)
51 define dso_local i32 @v4i32(<4 x i32> %a) local_unnamed_addr #0 {
52 ; PWR9LE-LABEL: v4i32:
53 ; PWR9LE: # %bb.0: # %entry
54 ; PWR9LE-NEXT: xxswapd v3, v2
55 ; PWR9LE-NEXT: li r3, 0
56 ; PWR9LE-NEXT: xxlxor vs0, v2, v3
57 ; PWR9LE-NEXT: xxspltw vs1, vs0, 2
58 ; PWR9LE-NEXT: xxlxor v2, vs0, vs1
59 ; PWR9LE-NEXT: vextuwrx r3, r3, v2
62 ; PWR9BE-LABEL: v4i32:
63 ; PWR9BE: # %bb.0: # %entry
64 ; PWR9BE-NEXT: xxswapd v3, v2
65 ; PWR9BE-NEXT: li r3, 0
66 ; PWR9BE-NEXT: xxlxor vs0, v2, v3
67 ; PWR9BE-NEXT: xxspltw vs1, vs0, 1
68 ; PWR9BE-NEXT: xxlxor v2, vs0, vs1
69 ; PWR9BE-NEXT: vextuwlx r3, r3, v2
72 ; PWR10LE-LABEL: v4i32:
73 ; PWR10LE: # %bb.0: # %entry
74 ; PWR10LE-NEXT: xxswapd v3, v2
75 ; PWR10LE-NEXT: li r3, 0
76 ; PWR10LE-NEXT: xxlxor vs0, v2, v3
77 ; PWR10LE-NEXT: xxspltw vs0, vs0, 2
78 ; PWR10LE-NEXT: xxeval v2, v2, v3, vs0, 105
79 ; PWR10LE-NEXT: vextuwrx r3, r3, v2
82 ; PWR10BE-LABEL: v4i32:
83 ; PWR10BE: # %bb.0: # %entry
84 ; PWR10BE-NEXT: xxswapd v3, v2
85 ; PWR10BE-NEXT: li r3, 0
86 ; PWR10BE-NEXT: xxlxor vs0, v2, v3
87 ; PWR10BE-NEXT: xxspltw vs0, vs0, 1
88 ; PWR10BE-NEXT: xxeval v2, v2, v3, vs0, 105
89 ; PWR10BE-NEXT: vextuwlx r3, r3, v2
92 %0 = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> %a)
96 define dso_local i32 @v8i32(<8 x i32> %a) local_unnamed_addr #0 {
97 ; PWR9LE-LABEL: v8i32:
98 ; PWR9LE: # %bb.0: # %entry
99 ; PWR9LE-NEXT: xxlxor vs0, v2, v3
100 ; PWR9LE-NEXT: li r3, 0
101 ; PWR9LE-NEXT: xxswapd v2, vs0
102 ; PWR9LE-NEXT: xxlxor vs0, vs0, v2
103 ; PWR9LE-NEXT: xxspltw vs1, vs0, 2
104 ; PWR9LE-NEXT: xxlxor v2, vs0, vs1
105 ; PWR9LE-NEXT: vextuwrx r3, r3, v2
108 ; PWR9BE-LABEL: v8i32:
109 ; PWR9BE: # %bb.0: # %entry
110 ; PWR9BE-NEXT: xxlxor vs0, v2, v3
111 ; PWR9BE-NEXT: li r3, 0
112 ; PWR9BE-NEXT: xxswapd v2, vs0
113 ; PWR9BE-NEXT: xxlxor vs0, vs0, v2
114 ; PWR9BE-NEXT: xxspltw vs1, vs0, 1
115 ; PWR9BE-NEXT: xxlxor v2, vs0, vs1
116 ; PWR9BE-NEXT: vextuwlx r3, r3, v2
119 ; PWR10LE-LABEL: v8i32:
120 ; PWR10LE: # %bb.0: # %entry
121 ; PWR10LE-NEXT: xxlxor vs0, v2, v3
122 ; PWR10LE-NEXT: li r3, 0
123 ; PWR10LE-NEXT: xxswapd v4, vs0
124 ; PWR10LE-NEXT: xxeval vs1, v2, v3, v4, 105
125 ; PWR10LE-NEXT: xxspltw vs1, vs1, 2
126 ; PWR10LE-NEXT: xxeval v2, vs0, v4, vs1, 105
127 ; PWR10LE-NEXT: vextuwrx r3, r3, v2
130 ; PWR10BE-LABEL: v8i32:
131 ; PWR10BE: # %bb.0: # %entry
132 ; PWR10BE-NEXT: xxlxor vs0, v2, v3
133 ; PWR10BE-NEXT: li r3, 0
134 ; PWR10BE-NEXT: xxswapd v4, vs0
135 ; PWR10BE-NEXT: xxeval vs1, v2, v3, v4, 105
136 ; PWR10BE-NEXT: xxspltw vs1, vs1, 1
137 ; PWR10BE-NEXT: xxeval v2, vs0, v4, vs1, 105
138 ; PWR10BE-NEXT: vextuwlx r3, r3, v2
141 %0 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %a)
145 define dso_local i32 @v16i32(<16 x i32> %a) local_unnamed_addr #0 {
146 ; PWR9LE-LABEL: v16i32:
147 ; PWR9LE: # %bb.0: # %entry
148 ; PWR9LE-NEXT: xxlxor vs0, v3, v5
149 ; PWR9LE-NEXT: xxlxor vs1, v2, v4
150 ; PWR9LE-NEXT: li r3, 0
151 ; PWR9LE-NEXT: xxlxor vs0, vs1, vs0
152 ; PWR9LE-NEXT: xxswapd v2, vs0
153 ; PWR9LE-NEXT: xxlxor vs0, vs0, v2
154 ; PWR9LE-NEXT: xxspltw vs1, vs0, 2
155 ; PWR9LE-NEXT: xxlxor v2, vs0, vs1
156 ; PWR9LE-NEXT: vextuwrx r3, r3, v2
159 ; PWR9BE-LABEL: v16i32:
160 ; PWR9BE: # %bb.0: # %entry
161 ; PWR9BE-NEXT: xxlxor vs0, v3, v5
162 ; PWR9BE-NEXT: xxlxor vs1, v2, v4
163 ; PWR9BE-NEXT: li r3, 0
164 ; PWR9BE-NEXT: xxlxor vs0, vs1, vs0
165 ; PWR9BE-NEXT: xxswapd v2, vs0
166 ; PWR9BE-NEXT: xxlxor vs0, vs0, v2
167 ; PWR9BE-NEXT: xxspltw vs1, vs0, 1
168 ; PWR9BE-NEXT: xxlxor v2, vs0, vs1
169 ; PWR9BE-NEXT: vextuwlx r3, r3, v2
172 ; PWR10LE-LABEL: v16i32:
173 ; PWR10LE: # %bb.0: # %entry
174 ; PWR10LE-NEXT: xxlxor vs1, v2, v4
175 ; PWR10LE-NEXT: xxlxor vs0, v3, v5
176 ; PWR10LE-NEXT: li r3, 0
177 ; PWR10LE-NEXT: xxeval vs2, vs1, v3, v5, 105
178 ; PWR10LE-NEXT: xxswapd v2, vs2
179 ; PWR10LE-NEXT: xxeval vs0, vs1, vs0, v2, 105
180 ; PWR10LE-NEXT: xxspltw vs0, vs0, 2
181 ; PWR10LE-NEXT: xxeval v2, vs2, v2, vs0, 105
182 ; PWR10LE-NEXT: vextuwrx r3, r3, v2
185 ; PWR10BE-LABEL: v16i32:
186 ; PWR10BE: # %bb.0: # %entry
187 ; PWR10BE-NEXT: xxlxor vs1, v2, v4
188 ; PWR10BE-NEXT: xxlxor vs0, v3, v5
189 ; PWR10BE-NEXT: li r3, 0
190 ; PWR10BE-NEXT: xxeval vs2, vs1, v3, v5, 105
191 ; PWR10BE-NEXT: xxswapd v2, vs2
192 ; PWR10BE-NEXT: xxeval vs0, vs1, vs0, v2, 105
193 ; PWR10BE-NEXT: xxspltw vs0, vs0, 1
194 ; PWR10BE-NEXT: xxeval v2, vs2, v2, vs0, 105
195 ; PWR10BE-NEXT: vextuwlx r3, r3, v2
198 %0 = call i32 @llvm.vector.reduce.xor.v16i32(<16 x i32> %a)
202 declare i32 @llvm.vector.reduce.xor.v2i32(<2 x i32>) #0
203 declare i32 @llvm.vector.reduce.xor.v4i32(<4 x i32>) #0
204 declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>) #0
205 declare i32 @llvm.vector.reduce.xor.v16i32(<16 x i32>) #0
208 ;; Vectors of type i64
210 define dso_local i64 @v2i64(<2 x i64> %a) local_unnamed_addr #0 {
211 ; PWR9LE-LABEL: v2i64:
212 ; PWR9LE: # %bb.0: # %entry
213 ; PWR9LE-NEXT: xxswapd v3, v2
214 ; PWR9LE-NEXT: xxlxor vs0, v2, v3
215 ; PWR9LE-NEXT: mfvsrld r3, vs0
218 ; PWR9BE-LABEL: v2i64:
219 ; PWR9BE: # %bb.0: # %entry
220 ; PWR9BE-NEXT: xxswapd v3, v2
221 ; PWR9BE-NEXT: xxlxor vs0, v2, v3
222 ; PWR9BE-NEXT: mffprd r3, f0
225 ; PWR10LE-LABEL: v2i64:
226 ; PWR10LE: # %bb.0: # %entry
227 ; PWR10LE-NEXT: xxswapd v3, v2
228 ; PWR10LE-NEXT: xxlxor vs0, v2, v3
229 ; PWR10LE-NEXT: mfvsrld r3, vs0
232 ; PWR10BE-LABEL: v2i64:
233 ; PWR10BE: # %bb.0: # %entry
234 ; PWR10BE-NEXT: xxswapd v3, v2
235 ; PWR10BE-NEXT: xxlxor vs0, v2, v3
236 ; PWR10BE-NEXT: mffprd r3, f0
239 %0 = call i64 @llvm.vector.reduce.xor.v2i64(<2 x i64> %a)
243 define dso_local i64 @v4i64(<4 x i64> %a) local_unnamed_addr #0 {
244 ; PWR9LE-LABEL: v4i64:
245 ; PWR9LE: # %bb.0: # %entry
246 ; PWR9LE-NEXT: xxlxor vs0, v2, v3
247 ; PWR9LE-NEXT: xxswapd v2, vs0
248 ; PWR9LE-NEXT: xxlxor vs0, vs0, v2
249 ; PWR9LE-NEXT: mfvsrld r3, vs0
252 ; PWR9BE-LABEL: v4i64:
253 ; PWR9BE: # %bb.0: # %entry
254 ; PWR9BE-NEXT: xxlxor vs0, v2, v3
255 ; PWR9BE-NEXT: xxswapd v2, vs0
256 ; PWR9BE-NEXT: xxlxor vs0, vs0, v2
257 ; PWR9BE-NEXT: mffprd r3, f0
260 ; PWR10LE-LABEL: v4i64:
261 ; PWR10LE: # %bb.0: # %entry
262 ; PWR10LE-NEXT: xxlxor vs0, v2, v3
263 ; PWR10LE-NEXT: xxswapd v4, vs0
264 ; PWR10LE-NEXT: xxeval vs0, v2, v3, v4, 105
265 ; PWR10LE-NEXT: mfvsrld r3, vs0
268 ; PWR10BE-LABEL: v4i64:
269 ; PWR10BE: # %bb.0: # %entry
270 ; PWR10BE-NEXT: xxlxor vs0, v2, v3
271 ; PWR10BE-NEXT: xxswapd v4, vs0
272 ; PWR10BE-NEXT: xxeval vs0, v2, v3, v4, 105
273 ; PWR10BE-NEXT: mffprd r3, f0
276 %0 = call i64 @llvm.vector.reduce.xor.v4i64(<4 x i64> %a)
280 define dso_local i64 @v8i64(<8 x i64> %a) local_unnamed_addr #0 {
281 ; PWR9LE-LABEL: v8i64:
282 ; PWR9LE: # %bb.0: # %entry
283 ; PWR9LE-NEXT: xxlxor vs0, v3, v5
284 ; PWR9LE-NEXT: xxlxor vs1, v2, v4
285 ; PWR9LE-NEXT: xxlxor vs0, vs1, vs0
286 ; PWR9LE-NEXT: xxswapd v2, vs0
287 ; PWR9LE-NEXT: xxlxor vs0, vs0, v2
288 ; PWR9LE-NEXT: mfvsrld r3, vs0
291 ; PWR9BE-LABEL: v8i64:
292 ; PWR9BE: # %bb.0: # %entry
293 ; PWR9BE-NEXT: xxlxor vs0, v3, v5
294 ; PWR9BE-NEXT: xxlxor vs1, v2, v4
295 ; PWR9BE-NEXT: xxlxor vs0, vs1, vs0
296 ; PWR9BE-NEXT: xxswapd v2, vs0
297 ; PWR9BE-NEXT: xxlxor vs0, vs0, v2
298 ; PWR9BE-NEXT: mffprd r3, f0
301 ; PWR10LE-LABEL: v8i64:
302 ; PWR10LE: # %bb.0: # %entry
303 ; PWR10LE-NEXT: xxlxor vs1, v2, v4
304 ; PWR10LE-NEXT: xxlxor vs0, v3, v5
305 ; PWR10LE-NEXT: xxeval vs2, vs1, v3, v5, 105
306 ; PWR10LE-NEXT: xxswapd v2, vs2
307 ; PWR10LE-NEXT: xxeval vs0, vs1, vs0, v2, 105
308 ; PWR10LE-NEXT: mfvsrld r3, vs0
311 ; PWR10BE-LABEL: v8i64:
312 ; PWR10BE: # %bb.0: # %entry
313 ; PWR10BE-NEXT: xxlxor vs1, v2, v4
314 ; PWR10BE-NEXT: xxlxor vs0, v3, v5
315 ; PWR10BE-NEXT: xxeval vs2, vs1, v3, v5, 105
316 ; PWR10BE-NEXT: xxswapd v2, vs2
317 ; PWR10BE-NEXT: xxeval vs0, vs1, vs0, v2, 105
318 ; PWR10BE-NEXT: mffprd r3, f0
321 %0 = call i64 @llvm.vector.reduce.xor.v8i64(<8 x i64> %a)
325 define dso_local i64 @v16i64(<16 x i64> %a) local_unnamed_addr #0 {
326 ; PWR9LE-LABEL: v16i64:
327 ; PWR9LE: # %bb.0: # %entry
328 ; PWR9LE-NEXT: xxlxor vs0, v4, v8
329 ; PWR9LE-NEXT: xxlxor vs1, v2, v6
330 ; PWR9LE-NEXT: xxlxor vs2, v5, v9
331 ; PWR9LE-NEXT: xxlxor vs3, v3, v7
332 ; PWR9LE-NEXT: xxlxor vs2, vs3, vs2
333 ; PWR9LE-NEXT: xxlxor vs0, vs1, vs0
334 ; PWR9LE-NEXT: xxlxor vs0, vs0, vs2
335 ; PWR9LE-NEXT: xxswapd v2, vs0
336 ; PWR9LE-NEXT: xxlxor vs0, vs0, v2
337 ; PWR9LE-NEXT: mfvsrld r3, vs0
340 ; PWR9BE-LABEL: v16i64:
341 ; PWR9BE: # %bb.0: # %entry
342 ; PWR9BE-NEXT: xxlxor vs0, v4, v8
343 ; PWR9BE-NEXT: xxlxor vs1, v2, v6
344 ; PWR9BE-NEXT: xxlxor vs2, v5, v9
345 ; PWR9BE-NEXT: xxlxor vs3, v3, v7
346 ; PWR9BE-NEXT: xxlxor vs2, vs3, vs2
347 ; PWR9BE-NEXT: xxlxor vs0, vs1, vs0
348 ; PWR9BE-NEXT: xxlxor vs0, vs0, vs2
349 ; PWR9BE-NEXT: xxswapd v2, vs0
350 ; PWR9BE-NEXT: xxlxor vs0, vs0, v2
351 ; PWR9BE-NEXT: mffprd r3, f0
354 ; PWR10LE-LABEL: v16i64:
355 ; PWR10LE: # %bb.0: # %entry
356 ; PWR10LE-NEXT: xxlxor vs1, v2, v6
357 ; PWR10LE-NEXT: xxlxor vs0, v5, v9
358 ; PWR10LE-NEXT: xxlxor vs2, v3, v7
359 ; PWR10LE-NEXT: xxeval vs1, vs1, v4, v8, 105
360 ; PWR10LE-NEXT: xxeval vs3, vs2, v5, v9, 105
361 ; PWR10LE-NEXT: xxeval vs0, vs1, vs2, vs0, 105
362 ; PWR10LE-NEXT: xxswapd v2, vs0
363 ; PWR10LE-NEXT: xxeval vs0, vs1, vs3, v2, 105
364 ; PWR10LE-NEXT: mfvsrld r3, vs0
367 ; PWR10BE-LABEL: v16i64:
368 ; PWR10BE: # %bb.0: # %entry
369 ; PWR10BE-NEXT: xxlxor vs1, v2, v6
370 ; PWR10BE-NEXT: xxlxor vs0, v5, v9
371 ; PWR10BE-NEXT: xxlxor vs2, v3, v7
372 ; PWR10BE-NEXT: xxeval vs1, vs1, v4, v8, 105
373 ; PWR10BE-NEXT: xxeval vs3, vs2, v5, v9, 105
374 ; PWR10BE-NEXT: xxeval vs0, vs1, vs2, vs0, 105
375 ; PWR10BE-NEXT: xxswapd v2, vs0
376 ; PWR10BE-NEXT: xxeval vs0, vs1, vs3, v2, 105
377 ; PWR10BE-NEXT: mffprd r3, f0
380 %0 = call i64 @llvm.vector.reduce.xor.v16i64(<16 x i64> %a)
384 declare i64 @llvm.vector.reduce.xor.v2i64(<2 x i64>) #0
385 declare i64 @llvm.vector.reduce.xor.v4i64(<4 x i64>) #0
386 declare i64 @llvm.vector.reduce.xor.v8i64(<8 x i64>) #0
387 declare i64 @llvm.vector.reduce.xor.v16i64(<16 x i64>) #0
390 attributes #0 = { nounwind }