1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \
3 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P8
5 ; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr9 -mattr=+vsx \
6 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P9
8 ; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \
9 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P8-BE
11 ; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr9 -mattr=+vsx \
12 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P9-BE
14 define <2 x i64> @load_swap00(ptr %vp1, ptr %vp2) {
15 ; CHECK-P8-LABEL: load_swap00:
17 ; CHECK-P8-NEXT: lxvd2x v2, 0, r3
20 ; CHECK-P9-LABEL: load_swap00:
22 ; CHECK-P9-NEXT: lxvd2x v2, 0, r3
25 ; CHECK-P8-BE-LABEL: load_swap00:
26 ; CHECK-P8-BE: # %bb.0:
27 ; CHECK-P8-BE-NEXT: lxvd2x v2, 0, r3
28 ; CHECK-P8-BE-NEXT: xxswapd v2, v2
29 ; CHECK-P8-BE-NEXT: blr
31 ; CHECK-P9-BE-LABEL: load_swap00:
32 ; CHECK-P9-BE: # %bb.0:
33 ; CHECK-P9-BE-NEXT: lxv v2, 0(r3)
34 ; CHECK-P9-BE-NEXT: xxswapd v2, v2
35 ; CHECK-P9-BE-NEXT: blr
36 %v1 = load <2 x i64>, ptr %vp1
37 %v2 = load <2 x i64>, ptr %vp2
38 %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 1, i32 0>
42 define <2 x i64> @load_swap01(ptr %vp1, ptr %vp2) {
43 ; CHECK-P8-LABEL: load_swap01:
45 ; CHECK-P8-NEXT: lxvd2x v2, 0, r4
48 ; CHECK-P9-LABEL: load_swap01:
50 ; CHECK-P9-NEXT: lxvd2x v2, 0, r4
53 ; CHECK-P8-BE-LABEL: load_swap01:
54 ; CHECK-P8-BE: # %bb.0:
55 ; CHECK-P8-BE-NEXT: lxvd2x v2, 0, r4
56 ; CHECK-P8-BE-NEXT: xxswapd v2, v2
57 ; CHECK-P8-BE-NEXT: blr
59 ; CHECK-P9-BE-LABEL: load_swap01:
60 ; CHECK-P9-BE: # %bb.0:
61 ; CHECK-P9-BE-NEXT: lxv v2, 0(r4)
62 ; CHECK-P9-BE-NEXT: xxswapd v2, v2
63 ; CHECK-P9-BE-NEXT: blr
64 %v1 = load <2 x i64>, ptr %vp1
65 %v2 = load <2 x i64>, ptr %vp2
66 %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 3, i32 2>
70 define <4 x i32> @load_swap10(ptr %vp1, ptr %vp2) {
71 ; CHECK-P8-LABEL: load_swap10:
73 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
74 ; CHECK-P8-NEXT: addis r3, r2, .LCPI2_0@toc@ha
75 ; CHECK-P8-NEXT: addi r3, r3, .LCPI2_0@toc@l
76 ; CHECK-P8-NEXT: xxswapd v2, vs0
77 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
78 ; CHECK-P8-NEXT: xxswapd v3, vs0
79 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3
82 ; CHECK-P9-LABEL: load_swap10:
84 ; CHECK-P9-NEXT: lxvw4x v2, 0, r3
87 ; CHECK-P8-BE-LABEL: load_swap10:
88 ; CHECK-P8-BE: # %bb.0:
89 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
90 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha
91 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l
92 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
93 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
94 ; CHECK-P8-BE-NEXT: blr
96 ; CHECK-P9-BE-LABEL: load_swap10:
97 ; CHECK-P9-BE: # %bb.0:
98 ; CHECK-P9-BE-NEXT: lxv v2, 0(r3)
99 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha
100 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l
101 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
102 ; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0
103 ; CHECK-P9-BE-NEXT: blr
104 %v1 = load <4 x i32>, ptr %vp1
105 %v2 = load <4 x i32>, ptr %vp2
106 %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
110 define <4 x i32> @load_swap11(ptr %vp1, ptr %vp2) {
111 ; CHECK-P8-LABEL: load_swap11:
113 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
114 ; CHECK-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha
115 ; CHECK-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l
116 ; CHECK-P8-NEXT: xxswapd v2, vs0
117 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
118 ; CHECK-P8-NEXT: xxswapd v3, vs0
119 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3
122 ; CHECK-P9-LABEL: load_swap11:
124 ; CHECK-P9-NEXT: lxvw4x v2, 0, r4
127 ; CHECK-P8-BE-LABEL: load_swap11:
128 ; CHECK-P8-BE: # %bb.0:
129 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha
130 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r4
131 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l
132 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
133 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
134 ; CHECK-P8-BE-NEXT: blr
136 ; CHECK-P9-BE-LABEL: load_swap11:
137 ; CHECK-P9-BE: # %bb.0:
138 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha
139 ; CHECK-P9-BE-NEXT: lxv v2, 0(r4)
140 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l
141 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
142 ; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0
143 ; CHECK-P9-BE-NEXT: blr
144 %v1 = load <4 x i32>, ptr %vp1
145 %v2 = load <4 x i32>, ptr %vp2
146 %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
150 define <8 x i16> @load_swap20(ptr %vp1, ptr %vp2){
151 ; CHECK-P8-LABEL: load_swap20:
153 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
154 ; CHECK-P8-NEXT: addis r3, r2, .LCPI4_0@toc@ha
155 ; CHECK-P8-NEXT: addi r3, r3, .LCPI4_0@toc@l
156 ; CHECK-P8-NEXT: xxswapd v2, vs0
157 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
158 ; CHECK-P8-NEXT: xxswapd v3, vs0
159 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3
162 ; CHECK-P9-LABEL: load_swap20:
164 ; CHECK-P9-NEXT: lxvh8x v2, 0, r3
167 ; CHECK-P8-BE-LABEL: load_swap20:
168 ; CHECK-P8-BE: # %bb.0:
169 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
170 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
171 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l
172 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
173 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
174 ; CHECK-P8-BE-NEXT: blr
176 ; CHECK-P9-BE-LABEL: load_swap20:
177 ; CHECK-P9-BE: # %bb.0:
178 ; CHECK-P9-BE-NEXT: lxv v2, 0(r3)
179 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
180 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l
181 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
182 ; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0
183 ; CHECK-P9-BE-NEXT: blr
184 %v1 = load <8 x i16>, ptr %vp1
185 %v2 = load <8 x i16>, ptr %vp2
186 %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
190 define <8 x i16> @load_swap21(ptr %vp1, ptr %vp2){
191 ; CHECK-P8-LABEL: load_swap21:
193 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
194 ; CHECK-P8-NEXT: addis r3, r2, .LCPI5_0@toc@ha
195 ; CHECK-P8-NEXT: addi r3, r3, .LCPI5_0@toc@l
196 ; CHECK-P8-NEXT: xxswapd v2, vs0
197 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
198 ; CHECK-P8-NEXT: xxswapd v3, vs0
199 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3
202 ; CHECK-P9-LABEL: load_swap21:
204 ; CHECK-P9-NEXT: lxvh8x v2, 0, r4
207 ; CHECK-P8-BE-LABEL: load_swap21:
208 ; CHECK-P8-BE: # %bb.0:
209 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha
210 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r4
211 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l
212 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
213 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
214 ; CHECK-P8-BE-NEXT: blr
216 ; CHECK-P9-BE-LABEL: load_swap21:
217 ; CHECK-P9-BE: # %bb.0:
218 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha
219 ; CHECK-P9-BE-NEXT: lxv v2, 0(r4)
220 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l
221 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
222 ; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0
223 ; CHECK-P9-BE-NEXT: blr
224 %v1 = load <8 x i16>, ptr %vp1
225 %v2 = load <8 x i16>, ptr %vp2
226 %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
230 define <16 x i8> @load_swap30(ptr %vp1, ptr %vp2){
231 ; CHECK-P8-LABEL: load_swap30:
233 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
234 ; CHECK-P8-NEXT: addis r3, r2, .LCPI6_0@toc@ha
235 ; CHECK-P8-NEXT: addi r3, r3, .LCPI6_0@toc@l
236 ; CHECK-P8-NEXT: xxswapd v2, vs0
237 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
238 ; CHECK-P8-NEXT: xxswapd v3, vs0
239 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3
242 ; CHECK-P9-LABEL: load_swap30:
244 ; CHECK-P9-NEXT: lxvb16x v2, 0, r3
247 ; CHECK-P8-BE-LABEL: load_swap30:
248 ; CHECK-P8-BE: # %bb.0:
249 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
250 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI6_0@toc@ha
251 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI6_0@toc@l
252 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
253 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
254 ; CHECK-P8-BE-NEXT: blr
256 ; CHECK-P9-BE-LABEL: load_swap30:
257 ; CHECK-P9-BE: # %bb.0:
258 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
259 ; CHECK-P9-BE-NEXT: xxbrq v2, vs0
260 ; CHECK-P9-BE-NEXT: blr
261 %v1 = load <16 x i8>, ptr %vp1
262 %v2 = load <16 x i8>, ptr %vp2
263 %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
267 define <16 x i8> @load_swap31(ptr %vp1, ptr %vp2){
268 ; CHECK-P8-LABEL: load_swap31:
270 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
271 ; CHECK-P8-NEXT: addis r3, r2, .LCPI7_0@toc@ha
272 ; CHECK-P8-NEXT: addi r3, r3, .LCPI7_0@toc@l
273 ; CHECK-P8-NEXT: xxswapd v2, vs0
274 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
275 ; CHECK-P8-NEXT: xxswapd v3, vs0
276 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3
279 ; CHECK-P9-LABEL: load_swap31:
281 ; CHECK-P9-NEXT: lxvb16x v2, 0, r4
284 ; CHECK-P8-BE-LABEL: load_swap31:
285 ; CHECK-P8-BE: # %bb.0:
286 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI7_0@toc@ha
287 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r4
288 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI7_0@toc@l
289 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
290 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
291 ; CHECK-P8-BE-NEXT: blr
293 ; CHECK-P9-BE-LABEL: load_swap31:
294 ; CHECK-P9-BE: # %bb.0:
295 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r4)
296 ; CHECK-P9-BE-NEXT: xxbrq v2, vs0
297 ; CHECK-P9-BE-NEXT: blr
298 %v1 = load <16 x i8>, ptr %vp1
299 %v2 = load <16 x i8>, ptr %vp2
300 %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
304 define <2 x double> @load_swap40(ptr %vp1, ptr %vp2) {
305 ; CHECK-P8-LABEL: load_swap40:
307 ; CHECK-P8-NEXT: lxvd2x v2, 0, r4
310 ; CHECK-P9-LABEL: load_swap40:
312 ; CHECK-P9-NEXT: lxvd2x v2, 0, r4
315 ; CHECK-P8-BE-LABEL: load_swap40:
316 ; CHECK-P8-BE: # %bb.0:
317 ; CHECK-P8-BE-NEXT: lxvd2x vs0, 0, r4
318 ; CHECK-P8-BE-NEXT: xxswapd v2, vs0
319 ; CHECK-P8-BE-NEXT: blr
321 ; CHECK-P9-BE-LABEL: load_swap40:
322 ; CHECK-P9-BE: # %bb.0:
323 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r4)
324 ; CHECK-P9-BE-NEXT: xxswapd v2, vs0
325 ; CHECK-P9-BE-NEXT: blr
326 %v1 = load <2 x double>, ptr %vp1
327 %v2 = load <2 x double>, ptr %vp2
328 %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 3, i32 2>
332 define <4 x float> @load_swap50(ptr %vp1, ptr %vp2) {
333 ; CHECK-P8-LABEL: load_swap50:
335 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
336 ; CHECK-P8-NEXT: addis r3, r2, .LCPI9_0@toc@ha
337 ; CHECK-P8-NEXT: addi r3, r3, .LCPI9_0@toc@l
338 ; CHECK-P8-NEXT: xxswapd v2, vs0
339 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
340 ; CHECK-P8-NEXT: xxswapd v3, vs0
341 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3
344 ; CHECK-P9-LABEL: load_swap50:
346 ; CHECK-P9-NEXT: lxvw4x v2, 0, r3
349 ; CHECK-P8-BE-LABEL: load_swap50:
350 ; CHECK-P8-BE: # %bb.0:
351 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
352 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI9_0@toc@ha
353 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI9_0@toc@l
354 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
355 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
356 ; CHECK-P8-BE-NEXT: blr
358 ; CHECK-P9-BE-LABEL: load_swap50:
359 ; CHECK-P9-BE: # %bb.0:
360 ; CHECK-P9-BE-NEXT: lxv v2, 0(r3)
361 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI9_0@toc@ha
362 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI9_0@toc@l
363 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
364 ; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0
365 ; CHECK-P9-BE-NEXT: blr
366 %v1 = load <4 x float>, ptr %vp1
367 %v2 = load <4 x float>, ptr %vp2
368 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
372 define <4 x float> @load_swap51(ptr %vp1, ptr %vp2) {
373 ; CHECK-P8-LABEL: load_swap51:
375 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r4
376 ; CHECK-P8-NEXT: addis r3, r2, .LCPI10_0@toc@ha
377 ; CHECK-P8-NEXT: addi r3, r3, .LCPI10_0@toc@l
378 ; CHECK-P8-NEXT: xxswapd v2, vs0
379 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
380 ; CHECK-P8-NEXT: xxswapd v3, vs0
381 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3
384 ; CHECK-P9-LABEL: load_swap51:
386 ; CHECK-P9-NEXT: lxvw4x v2, 0, r4
389 ; CHECK-P8-BE-LABEL: load_swap51:
390 ; CHECK-P8-BE: # %bb.0:
391 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI10_0@toc@ha
392 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r4
393 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI10_0@toc@l
394 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
395 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
396 ; CHECK-P8-BE-NEXT: blr
398 ; CHECK-P9-BE-LABEL: load_swap51:
399 ; CHECK-P9-BE: # %bb.0:
400 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI10_0@toc@ha
401 ; CHECK-P9-BE-NEXT: lxv v2, 0(r4)
402 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI10_0@toc@l
403 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
404 ; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0
405 ; CHECK-P9-BE-NEXT: blr
406 %v1 = load <4 x float>, ptr %vp1
407 %v2 = load <4 x float>, ptr %vp2
408 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
412 define void @swap_store00(<2 x i64> %v1, <2 x i64> %v2, ptr %vp) {
413 ; CHECK-P8-LABEL: swap_store00:
415 ; CHECK-P8-NEXT: stxvd2x v2, 0, r7
418 ; CHECK-P9-LABEL: swap_store00:
420 ; CHECK-P9-NEXT: stxvd2x v2, 0, r7
423 ; CHECK-P8-BE-LABEL: swap_store00:
424 ; CHECK-P8-BE: # %bb.0:
425 ; CHECK-P8-BE-NEXT: xxswapd vs0, v2
426 ; CHECK-P8-BE-NEXT: stxvd2x vs0, 0, r7
427 ; CHECK-P8-BE-NEXT: blr
429 ; CHECK-P9-BE-LABEL: swap_store00:
430 ; CHECK-P9-BE: # %bb.0:
431 ; CHECK-P9-BE-NEXT: xxswapd vs0, v2
432 ; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
433 ; CHECK-P9-BE-NEXT: blr
434 %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 1, i32 0>
435 store <2 x i64> %v3, ptr %vp
439 define void @swap_store01(<2 x i64> %v1, <2 x i64> %v2, ptr %vp) {
440 ; CHECK-P8-LABEL: swap_store01:
442 ; CHECK-P8-NEXT: stxvd2x v3, 0, r7
445 ; CHECK-P9-LABEL: swap_store01:
447 ; CHECK-P9-NEXT: stxvd2x v3, 0, r7
450 ; CHECK-P8-BE-LABEL: swap_store01:
451 ; CHECK-P8-BE: # %bb.0:
452 ; CHECK-P8-BE-NEXT: xxswapd vs0, v3
453 ; CHECK-P8-BE-NEXT: stxvd2x vs0, 0, r7
454 ; CHECK-P8-BE-NEXT: blr
456 ; CHECK-P9-BE-LABEL: swap_store01:
457 ; CHECK-P9-BE: # %bb.0:
458 ; CHECK-P9-BE-NEXT: xxswapd vs0, v3
459 ; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
460 ; CHECK-P9-BE-NEXT: blr
461 %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 3, i32 2>
462 store <2 x i64> %v3, ptr %vp
466 define void @swap_store10(<4 x i32> %v1, <4 x i32> %v2, ptr %vp) {
467 ; CHECK-P8-LABEL: swap_store10:
469 ; CHECK-P8-NEXT: addis r3, r2, .LCPI13_0@toc@ha
470 ; CHECK-P8-NEXT: addi r3, r3, .LCPI13_0@toc@l
471 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
472 ; CHECK-P8-NEXT: xxswapd v3, vs0
473 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3
474 ; CHECK-P8-NEXT: xxswapd vs0, v2
475 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r7
478 ; CHECK-P9-LABEL: swap_store10:
480 ; CHECK-P9-NEXT: stxvw4x v2, 0, r7
483 ; CHECK-P8-BE-LABEL: swap_store10:
484 ; CHECK-P8-BE: # %bb.0:
485 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI13_0@toc@ha
486 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI13_0@toc@l
487 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
488 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
489 ; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
490 ; CHECK-P8-BE-NEXT: blr
492 ; CHECK-P9-BE-LABEL: swap_store10:
493 ; CHECK-P9-BE: # %bb.0:
494 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI13_0@toc@ha
495 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI13_0@toc@l
496 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
497 ; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0
498 ; CHECK-P9-BE-NEXT: stxv v2, 0(r7)
499 ; CHECK-P9-BE-NEXT: blr
500 %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
501 store <4 x i32> %v3, ptr %vp
505 define void @swap_store11(<4 x i32> %v1, <4 x i32> %v2, ptr %vp) {
506 ; CHECK-P8-LABEL: swap_store11:
508 ; CHECK-P8-NEXT: addis r3, r2, .LCPI14_0@toc@ha
509 ; CHECK-P8-NEXT: addi r3, r3, .LCPI14_0@toc@l
510 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
511 ; CHECK-P8-NEXT: xxswapd v2, vs0
512 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2
513 ; CHECK-P8-NEXT: xxswapd vs0, v2
514 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r7
517 ; CHECK-P9-LABEL: swap_store11:
519 ; CHECK-P9-NEXT: stxvw4x v3, 0, r7
522 ; CHECK-P8-BE-LABEL: swap_store11:
523 ; CHECK-P8-BE: # %bb.0:
524 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI14_0@toc@ha
525 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI14_0@toc@l
526 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
527 ; CHECK-P8-BE-NEXT: vperm v2, v3, v3, v2
528 ; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
529 ; CHECK-P8-BE-NEXT: blr
531 ; CHECK-P9-BE-LABEL: swap_store11:
532 ; CHECK-P9-BE: # %bb.0:
533 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI14_0@toc@ha
534 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI14_0@toc@l
535 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
536 ; CHECK-P9-BE-NEXT: xxperm v3, v3, vs0
537 ; CHECK-P9-BE-NEXT: stxv v3, 0(r7)
538 ; CHECK-P9-BE-NEXT: blr
539 %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
540 store <4 x i32> %v3, ptr %vp
544 define void @swap_store20(<8 x i16> %v1, <8 x i16> %v2, ptr %vp) {
545 ; CHECK-P8-LABEL: swap_store20:
547 ; CHECK-P8-NEXT: addis r3, r2, .LCPI15_0@toc@ha
548 ; CHECK-P8-NEXT: addi r3, r3, .LCPI15_0@toc@l
549 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
550 ; CHECK-P8-NEXT: xxswapd v3, vs0
551 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3
552 ; CHECK-P8-NEXT: xxswapd vs0, v2
553 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r7
556 ; CHECK-P9-LABEL: swap_store20:
558 ; CHECK-P9-NEXT: stxvh8x v2, 0, r7
561 ; CHECK-P8-BE-LABEL: swap_store20:
562 ; CHECK-P8-BE: # %bb.0:
563 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI15_0@toc@ha
564 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI15_0@toc@l
565 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
566 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
567 ; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
568 ; CHECK-P8-BE-NEXT: blr
570 ; CHECK-P9-BE-LABEL: swap_store20:
571 ; CHECK-P9-BE: # %bb.0:
572 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI15_0@toc@ha
573 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI15_0@toc@l
574 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
575 ; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0
576 ; CHECK-P9-BE-NEXT: stxv v2, 0(r7)
577 ; CHECK-P9-BE-NEXT: blr
578 %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
579 store <8 x i16> %v3, ptr %vp
583 define void @swap_store21(<8 x i16> %v1, <8 x i16> %v2, ptr %vp) {
584 ; CHECK-P8-LABEL: swap_store21:
586 ; CHECK-P8-NEXT: addis r3, r2, .LCPI16_0@toc@ha
587 ; CHECK-P8-NEXT: addi r3, r3, .LCPI16_0@toc@l
588 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
589 ; CHECK-P8-NEXT: xxswapd v2, vs0
590 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2
591 ; CHECK-P8-NEXT: xxswapd vs0, v2
592 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r7
595 ; CHECK-P9-LABEL: swap_store21:
597 ; CHECK-P9-NEXT: stxvh8x v3, 0, r7
600 ; CHECK-P8-BE-LABEL: swap_store21:
601 ; CHECK-P8-BE: # %bb.0:
602 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI16_0@toc@ha
603 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI16_0@toc@l
604 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
605 ; CHECK-P8-BE-NEXT: vperm v2, v3, v3, v2
606 ; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
607 ; CHECK-P8-BE-NEXT: blr
609 ; CHECK-P9-BE-LABEL: swap_store21:
610 ; CHECK-P9-BE: # %bb.0:
611 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI16_0@toc@ha
612 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI16_0@toc@l
613 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
614 ; CHECK-P9-BE-NEXT: xxperm v3, v3, vs0
615 ; CHECK-P9-BE-NEXT: stxv v3, 0(r7)
616 ; CHECK-P9-BE-NEXT: blr
617 %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
618 store <8 x i16> %v3, ptr %vp
622 define void @swap_store30(<16 x i8> %v1, <16 x i8> %v2, ptr %vp) {
623 ; CHECK-P8-LABEL: swap_store30:
625 ; CHECK-P8-NEXT: addis r3, r2, .LCPI17_0@toc@ha
626 ; CHECK-P8-NEXT: addi r3, r3, .LCPI17_0@toc@l
627 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
628 ; CHECK-P8-NEXT: xxswapd v3, vs0
629 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3
630 ; CHECK-P8-NEXT: xxswapd vs0, v2
631 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r7
634 ; CHECK-P9-LABEL: swap_store30:
636 ; CHECK-P9-NEXT: stxvb16x v2, 0, r7
639 ; CHECK-P8-BE-LABEL: swap_store30:
640 ; CHECK-P8-BE: # %bb.0:
641 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI17_0@toc@ha
642 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI17_0@toc@l
643 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
644 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
645 ; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
646 ; CHECK-P8-BE-NEXT: blr
648 ; CHECK-P9-BE-LABEL: swap_store30:
649 ; CHECK-P9-BE: # %bb.0:
650 ; CHECK-P9-BE-NEXT: xxbrq vs0, v2
651 ; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
652 ; CHECK-P9-BE-NEXT: blr
653 %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
654 store <16 x i8> %v3, ptr %vp
658 define void @swap_store31(<16 x i8> %v1, <16 x i8> %v2, ptr %vp) {
659 ; CHECK-P8-LABEL: swap_store31:
661 ; CHECK-P8-NEXT: addis r3, r2, .LCPI18_0@toc@ha
662 ; CHECK-P8-NEXT: addi r3, r3, .LCPI18_0@toc@l
663 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
664 ; CHECK-P8-NEXT: xxswapd v2, vs0
665 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2
666 ; CHECK-P8-NEXT: xxswapd vs0, v2
667 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r7
670 ; CHECK-P9-LABEL: swap_store31:
672 ; CHECK-P9-NEXT: stxvb16x v3, 0, r7
675 ; CHECK-P8-BE-LABEL: swap_store31:
676 ; CHECK-P8-BE: # %bb.0:
677 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI18_0@toc@ha
678 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI18_0@toc@l
679 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
680 ; CHECK-P8-BE-NEXT: vperm v2, v3, v3, v2
681 ; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
682 ; CHECK-P8-BE-NEXT: blr
684 ; CHECK-P9-BE-LABEL: swap_store31:
685 ; CHECK-P9-BE: # %bb.0:
686 ; CHECK-P9-BE-NEXT: xxbrq vs0, v3
687 ; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
688 ; CHECK-P9-BE-NEXT: blr
689 %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
690 store <16 x i8> %v3, ptr %vp
694 define void @swap_store40(<2 x double> %v1, <2 x double> %v2, ptr %vp) {
695 ; CHECK-P8-LABEL: swap_store40:
697 ; CHECK-P8-NEXT: stxvd2x v2, 0, r7
700 ; CHECK-P9-LABEL: swap_store40:
702 ; CHECK-P9-NEXT: stxvd2x v2, 0, r7
705 ; CHECK-P8-BE-LABEL: swap_store40:
706 ; CHECK-P8-BE: # %bb.0:
707 ; CHECK-P8-BE-NEXT: xxswapd vs0, v2
708 ; CHECK-P8-BE-NEXT: stxvd2x vs0, 0, r7
709 ; CHECK-P8-BE-NEXT: blr
711 ; CHECK-P9-BE-LABEL: swap_store40:
712 ; CHECK-P9-BE: # %bb.0:
713 ; CHECK-P9-BE-NEXT: xxswapd vs0, v2
714 ; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
715 ; CHECK-P9-BE-NEXT: blr
716 %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 1, i32 0>
717 store <2 x double> %v3, ptr %vp
721 define void @swap_store41(<2 x double> %v1, <2 x double> %v2, ptr %vp) {
722 ; CHECK-P8-LABEL: swap_store41:
724 ; CHECK-P8-NEXT: stxvd2x v3, 0, r7
727 ; CHECK-P9-LABEL: swap_store41:
729 ; CHECK-P9-NEXT: stxvd2x v3, 0, r7
732 ; CHECK-P8-BE-LABEL: swap_store41:
733 ; CHECK-P8-BE: # %bb.0:
734 ; CHECK-P8-BE-NEXT: xxswapd vs0, v3
735 ; CHECK-P8-BE-NEXT: stxvd2x vs0, 0, r7
736 ; CHECK-P8-BE-NEXT: blr
738 ; CHECK-P9-BE-LABEL: swap_store41:
739 ; CHECK-P9-BE: # %bb.0:
740 ; CHECK-P9-BE-NEXT: xxswapd vs0, v3
741 ; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
742 ; CHECK-P9-BE-NEXT: blr
743 %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 3, i32 2>
744 store <2 x double> %v3, ptr %vp
748 define void @swap_store50(<4 x float> %v1, <4 x float> %v2, ptr %vp) {
749 ; CHECK-P8-LABEL: swap_store50:
751 ; CHECK-P8-NEXT: addis r3, r2, .LCPI21_0@toc@ha
752 ; CHECK-P8-NEXT: addi r3, r3, .LCPI21_0@toc@l
753 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
754 ; CHECK-P8-NEXT: xxswapd v3, vs0
755 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3
756 ; CHECK-P8-NEXT: xxswapd vs0, v2
757 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r7
760 ; CHECK-P9-LABEL: swap_store50:
762 ; CHECK-P9-NEXT: stxvw4x v2, 0, r7
765 ; CHECK-P8-BE-LABEL: swap_store50:
766 ; CHECK-P8-BE: # %bb.0:
767 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI21_0@toc@ha
768 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI21_0@toc@l
769 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
770 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
771 ; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
772 ; CHECK-P8-BE-NEXT: blr
774 ; CHECK-P9-BE-LABEL: swap_store50:
775 ; CHECK-P9-BE: # %bb.0:
776 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI21_0@toc@ha
777 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI21_0@toc@l
778 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
779 ; CHECK-P9-BE-NEXT: xxperm v2, v2, vs0
780 ; CHECK-P9-BE-NEXT: stxv v2, 0(r7)
781 ; CHECK-P9-BE-NEXT: blr
782 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
783 store <4 x float> %v3, ptr %vp
787 define void @swap_store51(<4 x float> %v1, <4 x float> %v2, ptr %vp) {
788 ; CHECK-P8-LABEL: swap_store51:
790 ; CHECK-P8-NEXT: addis r3, r2, .LCPI22_0@toc@ha
791 ; CHECK-P8-NEXT: addi r3, r3, .LCPI22_0@toc@l
792 ; CHECK-P8-NEXT: lxvd2x vs0, 0, r3
793 ; CHECK-P8-NEXT: xxswapd v2, vs0
794 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2
795 ; CHECK-P8-NEXT: xxswapd vs0, v2
796 ; CHECK-P8-NEXT: stxvd2x vs0, 0, r7
799 ; CHECK-P9-LABEL: swap_store51:
801 ; CHECK-P9-NEXT: stxvw4x v3, 0, r7
804 ; CHECK-P8-BE-LABEL: swap_store51:
805 ; CHECK-P8-BE: # %bb.0:
806 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI22_0@toc@ha
807 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI22_0@toc@l
808 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
809 ; CHECK-P8-BE-NEXT: vperm v2, v3, v3, v2
810 ; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
811 ; CHECK-P8-BE-NEXT: blr
813 ; CHECK-P9-BE-LABEL: swap_store51:
814 ; CHECK-P9-BE: # %bb.0:
815 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI22_0@toc@ha
816 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI22_0@toc@l
817 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
818 ; CHECK-P9-BE-NEXT: xxperm v3, v3, vs0
819 ; CHECK-P9-BE-NEXT: stxv v3, 0(r7)
820 ; CHECK-P9-BE-NEXT: blr
821 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
822 store <4 x float> %v3, ptr %vp