1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \
3 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P8
5 ; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr9 -mattr=+vsx \
6 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P9
8 ; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \
9 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P8-BE
11 ; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr9 -mattr=+vsx \
12 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P9-BE
14 define <2 x i64> @load_swap00(<2 x i64>* %vp1, <2 x i64>* %vp2) {
15 ; CHECK-P8-LABEL: load_swap00:
17 ; CHECK-P8-NEXT: lxvd2x v2, 0, r3
20 ; CHECK-P9-LABEL: load_swap00:
22 ; CHECK-P9-NEXT: lxvd2x v2, 0, r3
25 ; CHECK-P8-BE-LABEL: load_swap00:
26 ; CHECK-P8-BE: # %bb.0:
27 ; CHECK-P8-BE-NEXT: lxvd2x v2, 0, r3
28 ; CHECK-P8-BE-NEXT: xxswapd v2, v2
29 ; CHECK-P8-BE-NEXT: blr
31 ; CHECK-P9-BE-LABEL: load_swap00:
32 ; CHECK-P9-BE: # %bb.0:
33 ; CHECK-P9-BE-NEXT: lxv v2, 0(r3)
34 ; CHECK-P9-BE-NEXT: xxswapd v2, v2
35 ; CHECK-P9-BE-NEXT: blr
36 %v1 = load <2 x i64>, <2 x i64>* %vp1
37 %v2 = load <2 x i64>, <2 x i64>* %vp2
38 %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 1, i32 0>
42 define <2 x i64> @load_swap01(<2 x i64>* %vp1, <2 x i64>* %vp2) {
43 ; CHECK-P8-LABEL: load_swap01:
45 ; CHECK-P8-NEXT: lxvd2x v2, 0, r4
48 ; CHECK-P9-LABEL: load_swap01:
50 ; CHECK-P9-NEXT: lxvd2x v2, 0, r4
53 ; CHECK-P8-BE-LABEL: load_swap01:
54 ; CHECK-P8-BE: # %bb.0:
55 ; CHECK-P8-BE-NEXT: lxvd2x v2, 0, r4
56 ; CHECK-P8-BE-NEXT: xxswapd v2, v2
57 ; CHECK-P8-BE-NEXT: blr
59 ; CHECK-P9-BE-LABEL: load_swap01:
60 ; CHECK-P9-BE: # %bb.0:
61 ; CHECK-P9-BE-NEXT: lxv v2, 0(r4)
62 ; CHECK-P9-BE-NEXT: xxswapd v2, v2
63 ; CHECK-P9-BE-NEXT: blr
64 %v1 = load <2 x i64>, <2 x i64>* %vp1
65 %v2 = load <2 x i64>, <2 x i64>* %vp2
66 %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 3, i32 2>
70 define <4 x i32> @load_swap10(<4 x i32>* %vp1, <4 x i32>* %vp2) {
71 ; CHECK-P8-LABEL: load_swap10:
73 ; CHECK-P8-NEXT: addis r4, r2, .LCPI2_0@toc@ha
74 ; CHECK-P8-NEXT: lvx v3, 0, r3
75 ; CHECK-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l
76 ; CHECK-P8-NEXT: lvx v2, 0, r4
77 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2
80 ; CHECK-P9-LABEL: load_swap10:
82 ; CHECK-P9-NEXT: lxvw4x v2, 0, r3
85 ; CHECK-P8-BE-LABEL: load_swap10:
86 ; CHECK-P8-BE: # %bb.0:
87 ; CHECK-P8-BE-NEXT: addis r4, r2, .LCPI2_0@toc@ha
88 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
89 ; CHECK-P8-BE-NEXT: addi r4, r4, .LCPI2_0@toc@l
90 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r4
91 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
92 ; CHECK-P8-BE-NEXT: blr
94 ; CHECK-P9-BE-LABEL: load_swap10:
95 ; CHECK-P9-BE: # %bb.0:
96 ; CHECK-P9-BE-NEXT: lxv v2, 0(r3)
97 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI2_0@toc@ha
98 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI2_0@toc@l
99 ; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
100 ; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
101 ; CHECK-P9-BE-NEXT: blr
102 %v1 = load <4 x i32>, <4 x i32>* %vp1
103 %v2 = load <4 x i32>, <4 x i32>* %vp2
104 %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
108 define <4 x i32> @load_swap11(<4 x i32>* %vp1, <4 x i32>* %vp2) {
109 ; CHECK-P8-LABEL: load_swap11:
111 ; CHECK-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha
112 ; CHECK-P8-NEXT: lvx v3, 0, r4
113 ; CHECK-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l
114 ; CHECK-P8-NEXT: lvx v2, 0, r3
115 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2
118 ; CHECK-P9-LABEL: load_swap11:
120 ; CHECK-P9-NEXT: lxvw4x v2, 0, r4
123 ; CHECK-P8-BE-LABEL: load_swap11:
124 ; CHECK-P8-BE: # %bb.0:
125 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha
126 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r4
127 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l
128 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
129 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
130 ; CHECK-P8-BE-NEXT: blr
132 ; CHECK-P9-BE-LABEL: load_swap11:
133 ; CHECK-P9-BE: # %bb.0:
134 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha
135 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI3_0@toc@l
136 ; CHECK-P9-BE-NEXT: lxv v2, 0(r4)
137 ; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
138 ; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
139 ; CHECK-P9-BE-NEXT: blr
140 %v1 = load <4 x i32>, <4 x i32>* %vp1
141 %v2 = load <4 x i32>, <4 x i32>* %vp2
142 %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
146 define <8 x i16> @load_swap20(<8 x i16>* %vp1, <8 x i16>* %vp2){
147 ; CHECK-P8-LABEL: load_swap20:
149 ; CHECK-P8-NEXT: addis r4, r2, .LCPI4_0@toc@ha
150 ; CHECK-P8-NEXT: lvx v3, 0, r3
151 ; CHECK-P8-NEXT: addi r4, r4, .LCPI4_0@toc@l
152 ; CHECK-P8-NEXT: lvx v2, 0, r4
153 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2
156 ; CHECK-P9-LABEL: load_swap20:
158 ; CHECK-P9-NEXT: lxvh8x v2, 0, r3
161 ; CHECK-P8-BE-LABEL: load_swap20:
162 ; CHECK-P8-BE: # %bb.0:
163 ; CHECK-P8-BE-NEXT: addis r4, r2, .LCPI4_0@toc@ha
164 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
165 ; CHECK-P8-BE-NEXT: addi r4, r4, .LCPI4_0@toc@l
166 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r4
167 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
168 ; CHECK-P8-BE-NEXT: blr
170 ; CHECK-P9-BE-LABEL: load_swap20:
171 ; CHECK-P9-BE: # %bb.0:
172 ; CHECK-P9-BE-NEXT: lxv v2, 0(r3)
173 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
174 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI4_0@toc@l
175 ; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
176 ; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
177 ; CHECK-P9-BE-NEXT: blr
178 %v1 = load <8 x i16>, <8 x i16>* %vp1
179 %v2 = load <8 x i16>, <8 x i16>* %vp2
180 %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
184 define <8 x i16> @load_swap21(<8 x i16>* %vp1, <8 x i16>* %vp2){
185 ; CHECK-P8-LABEL: load_swap21:
187 ; CHECK-P8-NEXT: addis r3, r2, .LCPI5_0@toc@ha
188 ; CHECK-P8-NEXT: lvx v3, 0, r4
189 ; CHECK-P8-NEXT: addi r3, r3, .LCPI5_0@toc@l
190 ; CHECK-P8-NEXT: lvx v2, 0, r3
191 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2
194 ; CHECK-P9-LABEL: load_swap21:
196 ; CHECK-P9-NEXT: lxvh8x v2, 0, r4
199 ; CHECK-P8-BE-LABEL: load_swap21:
200 ; CHECK-P8-BE: # %bb.0:
201 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha
202 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r4
203 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l
204 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
205 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
206 ; CHECK-P8-BE-NEXT: blr
208 ; CHECK-P9-BE-LABEL: load_swap21:
209 ; CHECK-P9-BE: # %bb.0:
210 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha
211 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI5_0@toc@l
212 ; CHECK-P9-BE-NEXT: lxv v2, 0(r4)
213 ; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
214 ; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
215 ; CHECK-P9-BE-NEXT: blr
216 %v1 = load <8 x i16>, <8 x i16>* %vp1
217 %v2 = load <8 x i16>, <8 x i16>* %vp2
218 %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
222 define <16 x i8> @load_swap30(<16 x i8>* %vp1, <16 x i8>* %vp2){
223 ; CHECK-P8-LABEL: load_swap30:
225 ; CHECK-P8-NEXT: addis r4, r2, .LCPI6_0@toc@ha
226 ; CHECK-P8-NEXT: lvx v3, 0, r3
227 ; CHECK-P8-NEXT: addi r4, r4, .LCPI6_0@toc@l
228 ; CHECK-P8-NEXT: lvx v2, 0, r4
229 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2
232 ; CHECK-P9-LABEL: load_swap30:
234 ; CHECK-P9-NEXT: lxvb16x v2, 0, r3
237 ; CHECK-P8-BE-LABEL: load_swap30:
238 ; CHECK-P8-BE: # %bb.0:
239 ; CHECK-P8-BE-NEXT: addis r4, r2, .LCPI6_0@toc@ha
240 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
241 ; CHECK-P8-BE-NEXT: addi r4, r4, .LCPI6_0@toc@l
242 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r4
243 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
244 ; CHECK-P8-BE-NEXT: blr
246 ; CHECK-P9-BE-LABEL: load_swap30:
247 ; CHECK-P9-BE: # %bb.0:
248 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r3)
249 ; CHECK-P9-BE-NEXT: xxbrq v2, vs0
250 ; CHECK-P9-BE-NEXT: blr
251 %v1 = load <16 x i8>, <16 x i8>* %vp1
252 %v2 = load <16 x i8>, <16 x i8>* %vp2
253 %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
257 define <16 x i8> @load_swap31(<16 x i8>* %vp1, <16 x i8>* %vp2){
258 ; CHECK-P8-LABEL: load_swap31:
260 ; CHECK-P8-NEXT: addis r3, r2, .LCPI7_0@toc@ha
261 ; CHECK-P8-NEXT: lvx v3, 0, r4
262 ; CHECK-P8-NEXT: addi r3, r3, .LCPI7_0@toc@l
263 ; CHECK-P8-NEXT: lvx v2, 0, r3
264 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2
267 ; CHECK-P9-LABEL: load_swap31:
269 ; CHECK-P9-NEXT: lxvb16x v2, 0, r4
272 ; CHECK-P8-BE-LABEL: load_swap31:
273 ; CHECK-P8-BE: # %bb.0:
274 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI7_0@toc@ha
275 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r4
276 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI7_0@toc@l
277 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
278 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
279 ; CHECK-P8-BE-NEXT: blr
281 ; CHECK-P9-BE-LABEL: load_swap31:
282 ; CHECK-P9-BE: # %bb.0:
283 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r4)
284 ; CHECK-P9-BE-NEXT: xxbrq v2, vs0
285 ; CHECK-P9-BE-NEXT: blr
286 %v1 = load <16 x i8>, <16 x i8>* %vp1
287 %v2 = load <16 x i8>, <16 x i8>* %vp2
288 %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
292 define <2 x double> @load_swap40(<2 x double>* %vp1, <2 x double>* %vp2) {
293 ; CHECK-P8-LABEL: load_swap40:
295 ; CHECK-P8-NEXT: lxvd2x v2, 0, r4
298 ; CHECK-P9-LABEL: load_swap40:
300 ; CHECK-P9-NEXT: lxvd2x v2, 0, r4
303 ; CHECK-P8-BE-LABEL: load_swap40:
304 ; CHECK-P8-BE: # %bb.0:
305 ; CHECK-P8-BE-NEXT: lxvd2x vs0, 0, r4
306 ; CHECK-P8-BE-NEXT: xxswapd v2, vs0
307 ; CHECK-P8-BE-NEXT: blr
309 ; CHECK-P9-BE-LABEL: load_swap40:
310 ; CHECK-P9-BE: # %bb.0:
311 ; CHECK-P9-BE-NEXT: lxv vs0, 0(r4)
312 ; CHECK-P9-BE-NEXT: xxswapd v2, vs0
313 ; CHECK-P9-BE-NEXT: blr
314 %v1 = load <2 x double>, <2 x double>* %vp1
315 %v2 = load <2 x double>, <2 x double>* %vp2
316 %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 3, i32 2>
320 define <4 x float> @load_swap50(<4 x float>* %vp1, <4 x float>* %vp2) {
321 ; CHECK-P8-LABEL: load_swap50:
323 ; CHECK-P8-NEXT: addis r4, r2, .LCPI9_0@toc@ha
324 ; CHECK-P8-NEXT: lvx v3, 0, r3
325 ; CHECK-P8-NEXT: addi r4, r4, .LCPI9_0@toc@l
326 ; CHECK-P8-NEXT: lvx v2, 0, r4
327 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2
330 ; CHECK-P9-LABEL: load_swap50:
332 ; CHECK-P9-NEXT: lxvw4x v2, 0, r3
335 ; CHECK-P8-BE-LABEL: load_swap50:
336 ; CHECK-P8-BE: # %bb.0:
337 ; CHECK-P8-BE-NEXT: addis r4, r2, .LCPI9_0@toc@ha
338 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
339 ; CHECK-P8-BE-NEXT: addi r4, r4, .LCPI9_0@toc@l
340 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r4
341 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
342 ; CHECK-P8-BE-NEXT: blr
344 ; CHECK-P9-BE-LABEL: load_swap50:
345 ; CHECK-P9-BE: # %bb.0:
346 ; CHECK-P9-BE-NEXT: lxv v2, 0(r3)
347 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI9_0@toc@ha
348 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI9_0@toc@l
349 ; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
350 ; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
351 ; CHECK-P9-BE-NEXT: blr
352 %v1 = load <4 x float>, <4 x float>* %vp1
353 %v2 = load <4 x float>, <4 x float>* %vp2
354 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
358 define <4 x float> @load_swap51(<4 x float>* %vp1, <4 x float>* %vp2) {
359 ; CHECK-P8-LABEL: load_swap51:
361 ; CHECK-P8-NEXT: addis r3, r2, .LCPI10_0@toc@ha
362 ; CHECK-P8-NEXT: lvx v3, 0, r4
363 ; CHECK-P8-NEXT: addi r3, r3, .LCPI10_0@toc@l
364 ; CHECK-P8-NEXT: lvx v2, 0, r3
365 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2
368 ; CHECK-P9-LABEL: load_swap51:
370 ; CHECK-P9-NEXT: lxvw4x v2, 0, r4
373 ; CHECK-P8-BE-LABEL: load_swap51:
374 ; CHECK-P8-BE: # %bb.0:
375 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI10_0@toc@ha
376 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r4
377 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI10_0@toc@l
378 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
379 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
380 ; CHECK-P8-BE-NEXT: blr
382 ; CHECK-P9-BE-LABEL: load_swap51:
383 ; CHECK-P9-BE: # %bb.0:
384 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI10_0@toc@ha
385 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI10_0@toc@l
386 ; CHECK-P9-BE-NEXT: lxv v2, 0(r4)
387 ; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
388 ; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
389 ; CHECK-P9-BE-NEXT: blr
390 %v1 = load <4 x float>, <4 x float>* %vp1
391 %v2 = load <4 x float>, <4 x float>* %vp2
392 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
396 define void @swap_store00(<2 x i64> %v1, <2 x i64> %v2, <2 x i64>* %vp) {
397 ; CHECK-P8-LABEL: swap_store00:
399 ; CHECK-P8-NEXT: stxvd2x v2, 0, r7
402 ; CHECK-P9-LABEL: swap_store00:
404 ; CHECK-P9-NEXT: stxvd2x v2, 0, r7
407 ; CHECK-P8-BE-LABEL: swap_store00:
408 ; CHECK-P8-BE: # %bb.0:
409 ; CHECK-P8-BE-NEXT: xxswapd vs0, v2
410 ; CHECK-P8-BE-NEXT: stxvd2x vs0, 0, r7
411 ; CHECK-P8-BE-NEXT: blr
413 ; CHECK-P9-BE-LABEL: swap_store00:
414 ; CHECK-P9-BE: # %bb.0:
415 ; CHECK-P9-BE-NEXT: xxswapd vs0, v2
416 ; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
417 ; CHECK-P9-BE-NEXT: blr
418 %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 1, i32 0>
419 store <2 x i64> %v3, <2 x i64>* %vp
423 define void @swap_store01(<2 x i64> %v1, <2 x i64> %v2, <2 x i64>* %vp) {
424 ; CHECK-P8-LABEL: swap_store01:
426 ; CHECK-P8-NEXT: stxvd2x v3, 0, r7
429 ; CHECK-P9-LABEL: swap_store01:
431 ; CHECK-P9-NEXT: stxvd2x v3, 0, r7
434 ; CHECK-P8-BE-LABEL: swap_store01:
435 ; CHECK-P8-BE: # %bb.0:
436 ; CHECK-P8-BE-NEXT: xxswapd vs0, v3
437 ; CHECK-P8-BE-NEXT: stxvd2x vs0, 0, r7
438 ; CHECK-P8-BE-NEXT: blr
440 ; CHECK-P9-BE-LABEL: swap_store01:
441 ; CHECK-P9-BE: # %bb.0:
442 ; CHECK-P9-BE-NEXT: xxswapd vs0, v3
443 ; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
444 ; CHECK-P9-BE-NEXT: blr
445 %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 3, i32 2>
446 store <2 x i64> %v3, <2 x i64>* %vp
450 define void @swap_store10(<4 x i32> %v1, <4 x i32> %v2, <4 x i32>* %vp) {
451 ; CHECK-P8-LABEL: swap_store10:
453 ; CHECK-P8-NEXT: addis r3, r2, .LCPI13_0@toc@ha
454 ; CHECK-P8-NEXT: addi r3, r3, .LCPI13_0@toc@l
455 ; CHECK-P8-NEXT: lvx v3, 0, r3
456 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3
457 ; CHECK-P8-NEXT: stvx v2, 0, r7
460 ; CHECK-P9-LABEL: swap_store10:
462 ; CHECK-P9-NEXT: stxvw4x v2, 0, r7
465 ; CHECK-P8-BE-LABEL: swap_store10:
466 ; CHECK-P8-BE: # %bb.0:
467 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI13_0@toc@ha
468 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI13_0@toc@l
469 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
470 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
471 ; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
472 ; CHECK-P8-BE-NEXT: blr
474 ; CHECK-P9-BE-LABEL: swap_store10:
475 ; CHECK-P9-BE: # %bb.0:
476 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI13_0@toc@ha
477 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI13_0@toc@l
478 ; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
479 ; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
480 ; CHECK-P9-BE-NEXT: stxv v2, 0(r7)
481 ; CHECK-P9-BE-NEXT: blr
482 %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
483 store <4 x i32> %v3, <4 x i32>* %vp
487 define void @swap_store11(<4 x i32> %v1, <4 x i32> %v2, <4 x i32>* %vp) {
488 ; CHECK-P8-LABEL: swap_store11:
490 ; CHECK-P8-NEXT: addis r3, r2, .LCPI14_0@toc@ha
491 ; CHECK-P8-NEXT: addi r3, r3, .LCPI14_0@toc@l
492 ; CHECK-P8-NEXT: lvx v2, 0, r3
493 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2
494 ; CHECK-P8-NEXT: stvx v2, 0, r7
497 ; CHECK-P9-LABEL: swap_store11:
499 ; CHECK-P9-NEXT: stxvw4x v3, 0, r7
502 ; CHECK-P8-BE-LABEL: swap_store11:
503 ; CHECK-P8-BE: # %bb.0:
504 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI14_0@toc@ha
505 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI14_0@toc@l
506 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
507 ; CHECK-P8-BE-NEXT: vperm v2, v3, v3, v2
508 ; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
509 ; CHECK-P8-BE-NEXT: blr
511 ; CHECK-P9-BE-LABEL: swap_store11:
512 ; CHECK-P9-BE: # %bb.0:
513 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI14_0@toc@ha
514 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI14_0@toc@l
515 ; CHECK-P9-BE-NEXT: lxvx v2, 0, r3
516 ; CHECK-P9-BE-NEXT: vperm v2, v3, v3, v2
517 ; CHECK-P9-BE-NEXT: stxv v2, 0(r7)
518 ; CHECK-P9-BE-NEXT: blr
519 %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
520 store <4 x i32> %v3, <4 x i32>* %vp
524 define void @swap_store20(<8 x i16> %v1, <8 x i16> %v2, <8 x i16>* %vp) {
525 ; CHECK-P8-LABEL: swap_store20:
527 ; CHECK-P8-NEXT: addis r3, r2, .LCPI15_0@toc@ha
528 ; CHECK-P8-NEXT: addi r3, r3, .LCPI15_0@toc@l
529 ; CHECK-P8-NEXT: lvx v3, 0, r3
530 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3
531 ; CHECK-P8-NEXT: stvx v2, 0, r7
534 ; CHECK-P9-LABEL: swap_store20:
536 ; CHECK-P9-NEXT: stxvh8x v2, 0, r7
539 ; CHECK-P8-BE-LABEL: swap_store20:
540 ; CHECK-P8-BE: # %bb.0:
541 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI15_0@toc@ha
542 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI15_0@toc@l
543 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
544 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
545 ; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
546 ; CHECK-P8-BE-NEXT: blr
548 ; CHECK-P9-BE-LABEL: swap_store20:
549 ; CHECK-P9-BE: # %bb.0:
550 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI15_0@toc@ha
551 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI15_0@toc@l
552 ; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
553 ; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
554 ; CHECK-P9-BE-NEXT: stxv v2, 0(r7)
555 ; CHECK-P9-BE-NEXT: blr
556 %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
557 store <8 x i16> %v3, <8 x i16>* %vp
561 define void @swap_store21(<8 x i16> %v1, <8 x i16> %v2, <8 x i16>* %vp) {
562 ; CHECK-P8-LABEL: swap_store21:
564 ; CHECK-P8-NEXT: addis r3, r2, .LCPI16_0@toc@ha
565 ; CHECK-P8-NEXT: addi r3, r3, .LCPI16_0@toc@l
566 ; CHECK-P8-NEXT: lvx v2, 0, r3
567 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2
568 ; CHECK-P8-NEXT: stvx v2, 0, r7
571 ; CHECK-P9-LABEL: swap_store21:
573 ; CHECK-P9-NEXT: stxvh8x v3, 0, r7
576 ; CHECK-P8-BE-LABEL: swap_store21:
577 ; CHECK-P8-BE: # %bb.0:
578 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI16_0@toc@ha
579 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI16_0@toc@l
580 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
581 ; CHECK-P8-BE-NEXT: vperm v2, v3, v3, v2
582 ; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
583 ; CHECK-P8-BE-NEXT: blr
585 ; CHECK-P9-BE-LABEL: swap_store21:
586 ; CHECK-P9-BE: # %bb.0:
587 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI16_0@toc@ha
588 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI16_0@toc@l
589 ; CHECK-P9-BE-NEXT: lxvx v2, 0, r3
590 ; CHECK-P9-BE-NEXT: vperm v2, v3, v3, v2
591 ; CHECK-P9-BE-NEXT: stxv v2, 0(r7)
592 ; CHECK-P9-BE-NEXT: blr
593 %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
594 store <8 x i16> %v3, <8 x i16>* %vp
598 define void @swap_store30(<16 x i8> %v1, <16 x i8> %v2, <16 x i8>* %vp) {
599 ; CHECK-P8-LABEL: swap_store30:
601 ; CHECK-P8-NEXT: addis r3, r2, .LCPI17_0@toc@ha
602 ; CHECK-P8-NEXT: addi r3, r3, .LCPI17_0@toc@l
603 ; CHECK-P8-NEXT: lvx v3, 0, r3
604 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3
605 ; CHECK-P8-NEXT: stvx v2, 0, r7
608 ; CHECK-P9-LABEL: swap_store30:
610 ; CHECK-P9-NEXT: stxvb16x v2, 0, r7
613 ; CHECK-P8-BE-LABEL: swap_store30:
614 ; CHECK-P8-BE: # %bb.0:
615 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI17_0@toc@ha
616 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI17_0@toc@l
617 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
618 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
619 ; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
620 ; CHECK-P8-BE-NEXT: blr
622 ; CHECK-P9-BE-LABEL: swap_store30:
623 ; CHECK-P9-BE: # %bb.0:
624 ; CHECK-P9-BE-NEXT: xxbrq vs0, v2
625 ; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
626 ; CHECK-P9-BE-NEXT: blr
627 %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
628 store <16 x i8> %v3, <16 x i8>* %vp
632 define void @swap_store31(<16 x i8> %v1, <16 x i8> %v2, <16 x i8>* %vp) {
633 ; CHECK-P8-LABEL: swap_store31:
635 ; CHECK-P8-NEXT: addis r3, r2, .LCPI18_0@toc@ha
636 ; CHECK-P8-NEXT: addi r3, r3, .LCPI18_0@toc@l
637 ; CHECK-P8-NEXT: lvx v2, 0, r3
638 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2
639 ; CHECK-P8-NEXT: stvx v2, 0, r7
642 ; CHECK-P9-LABEL: swap_store31:
644 ; CHECK-P9-NEXT: stxvb16x v3, 0, r7
647 ; CHECK-P8-BE-LABEL: swap_store31:
648 ; CHECK-P8-BE: # %bb.0:
649 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI18_0@toc@ha
650 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI18_0@toc@l
651 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
652 ; CHECK-P8-BE-NEXT: vperm v2, v3, v3, v2
653 ; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
654 ; CHECK-P8-BE-NEXT: blr
656 ; CHECK-P9-BE-LABEL: swap_store31:
657 ; CHECK-P9-BE: # %bb.0:
658 ; CHECK-P9-BE-NEXT: xxbrq vs0, v3
659 ; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
660 ; CHECK-P9-BE-NEXT: blr
661 %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
662 store <16 x i8> %v3, <16 x i8>* %vp
666 define void @swap_store40(<2 x double> %v1, <2 x double> %v2, <2 x double>* %vp) {
667 ; CHECK-P8-LABEL: swap_store40:
669 ; CHECK-P8-NEXT: stxvd2x v2, 0, r7
672 ; CHECK-P9-LABEL: swap_store40:
674 ; CHECK-P9-NEXT: stxvd2x v2, 0, r7
677 ; CHECK-P8-BE-LABEL: swap_store40:
678 ; CHECK-P8-BE: # %bb.0:
679 ; CHECK-P8-BE-NEXT: xxswapd vs0, v2
680 ; CHECK-P8-BE-NEXT: stxvd2x vs0, 0, r7
681 ; CHECK-P8-BE-NEXT: blr
683 ; CHECK-P9-BE-LABEL: swap_store40:
684 ; CHECK-P9-BE: # %bb.0:
685 ; CHECK-P9-BE-NEXT: xxswapd vs0, v2
686 ; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
687 ; CHECK-P9-BE-NEXT: blr
688 %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 1, i32 0>
689 store <2 x double> %v3, <2 x double>* %vp
693 define void @swap_store41(<2 x double> %v1, <2 x double> %v2, <2 x double>* %vp) {
694 ; CHECK-P8-LABEL: swap_store41:
696 ; CHECK-P8-NEXT: stxvd2x v3, 0, r7
699 ; CHECK-P9-LABEL: swap_store41:
701 ; CHECK-P9-NEXT: stxvd2x v3, 0, r7
704 ; CHECK-P8-BE-LABEL: swap_store41:
705 ; CHECK-P8-BE: # %bb.0:
706 ; CHECK-P8-BE-NEXT: xxswapd vs0, v3
707 ; CHECK-P8-BE-NEXT: stxvd2x vs0, 0, r7
708 ; CHECK-P8-BE-NEXT: blr
710 ; CHECK-P9-BE-LABEL: swap_store41:
711 ; CHECK-P9-BE: # %bb.0:
712 ; CHECK-P9-BE-NEXT: xxswapd vs0, v3
713 ; CHECK-P9-BE-NEXT: stxv vs0, 0(r7)
714 ; CHECK-P9-BE-NEXT: blr
715 %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 3, i32 2>
716 store <2 x double> %v3, <2 x double>* %vp
720 define void @swap_store50(<4 x float> %v1, <4 x float> %v2, <4 x float>* %vp) {
721 ; CHECK-P8-LABEL: swap_store50:
723 ; CHECK-P8-NEXT: addis r3, r2, .LCPI21_0@toc@ha
724 ; CHECK-P8-NEXT: addi r3, r3, .LCPI21_0@toc@l
725 ; CHECK-P8-NEXT: lvx v3, 0, r3
726 ; CHECK-P8-NEXT: vperm v2, v2, v2, v3
727 ; CHECK-P8-NEXT: stvx v2, 0, r7
730 ; CHECK-P9-LABEL: swap_store50:
732 ; CHECK-P9-NEXT: stxvw4x v2, 0, r7
735 ; CHECK-P8-BE-LABEL: swap_store50:
736 ; CHECK-P8-BE: # %bb.0:
737 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI21_0@toc@ha
738 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI21_0@toc@l
739 ; CHECK-P8-BE-NEXT: lxvw4x v3, 0, r3
740 ; CHECK-P8-BE-NEXT: vperm v2, v2, v2, v3
741 ; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
742 ; CHECK-P8-BE-NEXT: blr
744 ; CHECK-P9-BE-LABEL: swap_store50:
745 ; CHECK-P9-BE: # %bb.0:
746 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI21_0@toc@ha
747 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI21_0@toc@l
748 ; CHECK-P9-BE-NEXT: lxvx v3, 0, r3
749 ; CHECK-P9-BE-NEXT: vperm v2, v2, v2, v3
750 ; CHECK-P9-BE-NEXT: stxv v2, 0(r7)
751 ; CHECK-P9-BE-NEXT: blr
752 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
753 store <4 x float> %v3, <4 x float>* %vp
757 define void @swap_store51(<4 x float> %v1, <4 x float> %v2, <4 x float>* %vp) {
758 ; CHECK-P8-LABEL: swap_store51:
760 ; CHECK-P8-NEXT: addis r3, r2, .LCPI22_0@toc@ha
761 ; CHECK-P8-NEXT: addi r3, r3, .LCPI22_0@toc@l
762 ; CHECK-P8-NEXT: lvx v2, 0, r3
763 ; CHECK-P8-NEXT: vperm v2, v3, v3, v2
764 ; CHECK-P8-NEXT: stvx v2, 0, r7
767 ; CHECK-P9-LABEL: swap_store51:
769 ; CHECK-P9-NEXT: stxvw4x v3, 0, r7
772 ; CHECK-P8-BE-LABEL: swap_store51:
773 ; CHECK-P8-BE: # %bb.0:
774 ; CHECK-P8-BE-NEXT: addis r3, r2, .LCPI22_0@toc@ha
775 ; CHECK-P8-BE-NEXT: addi r3, r3, .LCPI22_0@toc@l
776 ; CHECK-P8-BE-NEXT: lxvw4x v2, 0, r3
777 ; CHECK-P8-BE-NEXT: vperm v2, v3, v3, v2
778 ; CHECK-P8-BE-NEXT: stxvw4x v2, 0, r7
779 ; CHECK-P8-BE-NEXT: blr
781 ; CHECK-P9-BE-LABEL: swap_store51:
782 ; CHECK-P9-BE: # %bb.0:
783 ; CHECK-P9-BE-NEXT: addis r3, r2, .LCPI22_0@toc@ha
784 ; CHECK-P9-BE-NEXT: addi r3, r3, .LCPI22_0@toc@l
785 ; CHECK-P9-BE-NEXT: lxvx v2, 0, r3
786 ; CHECK-P9-BE-NEXT: vperm v2, v3, v3, v2
787 ; CHECK-P9-BE-NEXT: stxv v2, 0(r7)
788 ; CHECK-P9-BE-NEXT: blr
789 %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
790 store <4 x float> %v3, <4 x float>* %vp