1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
3 ; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
4 ; RUN: -check-prefix=P9
5 ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
6 ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
7 ; RUN: -check-prefix=P8
8 ; RUN: llc -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
9 ; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
10 ; RUN: -check-prefix=P7
13 define dso_local void @test(<2 x double>* nocapture %c, double* nocapture readonly %a) local_unnamed_addr {
15 ; P9: # %bb.0: # %entry
16 ; P9-NEXT: addi r4, r4, 24
17 ; P9-NEXT: lxvdsx vs0, 0, r4
18 ; P9-NEXT: stxv vs0, 0(r3)
22 ; P8: # %bb.0: # %entry
23 ; P8-NEXT: addi r4, r4, 24
24 ; P8-NEXT: lxvdsx vs0, 0, r4
25 ; P8-NEXT: stxvd2x vs0, 0, r3
29 ; P7: # %bb.0: # %entry
30 ; P7-NEXT: addi r4, r4, 24
31 ; P7-NEXT: lxvdsx vs0, 0, r4
32 ; P7-NEXT: stxvd2x vs0, 0, r3
35 %arrayidx = getelementptr inbounds double, double* %a, i64 3
36 %0 = load double, double* %arrayidx, align 8
37 %splat.splatinsert.i = insertelement <2 x double> undef, double %0, i32 0
38 %splat.splat.i = shufflevector <2 x double> %splat.splatinsert.i, <2 x double> undef, <2 x i32> zeroinitializer
39 store <2 x double> %splat.splat.i, <2 x double>* %c, align 16
44 define dso_local void @test2(<4 x float>* nocapture %c, float* nocapture readonly %a) local_unnamed_addr {
46 ; P9: # %bb.0: # %entry
47 ; P9-NEXT: addi r4, r4, 12
48 ; P9-NEXT: lxvwsx vs0, 0, r4
49 ; P9-NEXT: stxv vs0, 0(r3)
53 ; P8: # %bb.0: # %entry
54 ; P8-NEXT: addi r4, r4, 12
55 ; P8-NEXT: lfiwzx f0, 0, r4
56 ; P8-NEXT: xxspltw v2, vs0, 1
57 ; P8-NEXT: stvx v2, 0, r3
61 ; P7: # %bb.0: # %entry
62 ; P7-NEXT: lwz r4, 12(r4)
63 ; P7-NEXT: addi r5, r1, -16
64 ; P7-NEXT: stw r4, -16(r1)
65 ; P7-NEXT: lxvw4x vs0, 0, r5
66 ; P7-NEXT: xxspltw vs0, vs0, 0
67 ; P7-NEXT: stxvw4x vs0, 0, r3
70 %arrayidx = getelementptr inbounds float, float* %a, i64 3
71 %0 = load float, float* %arrayidx, align 4
72 %splat.splatinsert.i = insertelement <4 x float> undef, float %0, i32 0
73 %splat.splat.i = shufflevector <4 x float> %splat.splatinsert.i, <4 x float> undef, <4 x i32> zeroinitializer
74 store <4 x float> %splat.splat.i, <4 x float>* %c, align 16
79 define dso_local void @test3(<4 x i32>* nocapture %c, i32* nocapture readonly %a) local_unnamed_addr {
81 ; P9: # %bb.0: # %entry
82 ; P9-NEXT: addi r4, r4, 12
83 ; P9-NEXT: lxvwsx vs0, 0, r4
84 ; P9-NEXT: stxv vs0, 0(r3)
88 ; P8: # %bb.0: # %entry
89 ; P8-NEXT: addi r4, r4, 12
90 ; P8-NEXT: lfiwzx f0, 0, r4
91 ; P8-NEXT: xxspltw v2, vs0, 1
92 ; P8-NEXT: stvx v2, 0, r3
96 ; P7: # %bb.0: # %entry
97 ; P7-NEXT: lwz r4, 12(r4)
98 ; P7-NEXT: addi r5, r1, -16
99 ; P7-NEXT: stw r4, -16(r1)
100 ; P7-NEXT: lxvw4x vs0, 0, r5
101 ; P7-NEXT: xxspltw vs0, vs0, 0
102 ; P7-NEXT: stxvw4x vs0, 0, r3
105 %arrayidx = getelementptr inbounds i32, i32* %a, i64 3
106 %0 = load i32, i32* %arrayidx, align 4
107 %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %0, i32 0
108 %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer
109 store <4 x i32> %splat.splat.i, <4 x i32>* %c, align 16
114 define dso_local void @test4(<2 x i64>* nocapture %c, i64* nocapture readonly %a) local_unnamed_addr {
116 ; P9: # %bb.0: # %entry
117 ; P9-NEXT: addi r4, r4, 24
118 ; P9-NEXT: lxvdsx vs0, 0, r4
119 ; P9-NEXT: stxv vs0, 0(r3)
123 ; P8: # %bb.0: # %entry
124 ; P8-NEXT: addi r4, r4, 24
125 ; P8-NEXT: lxvdsx vs0, 0, r4
126 ; P8-NEXT: stxvd2x vs0, 0, r3
130 ; P7: # %bb.0: # %entry
131 ; P7-NEXT: addi r4, r4, 24
132 ; P7-NEXT: lxvdsx vs0, 0, r4
133 ; P7-NEXT: stxvd2x vs0, 0, r3
136 %arrayidx = getelementptr inbounds i64, i64* %a, i64 3
137 %0 = load i64, i64* %arrayidx, align 8
138 %splat.splatinsert.i = insertelement <2 x i64> undef, i64 %0, i32 0
139 %splat.splat.i = shufflevector <2 x i64> %splat.splatinsert.i, <2 x i64> undef, <2 x i32> zeroinitializer
140 store <2 x i64> %splat.splat.i, <2 x i64>* %c, align 16
145 define void @test5(<2 x i64>* %a, i32* %in) {
147 ; P9: # %bb.0: # %entry
148 ; P9-NEXT: lfiwax f0, 0, r4
149 ; P9-NEXT: xxspltd vs0, vs0, 0
150 ; P9-NEXT: stxv vs0, 0(r3)
154 ; P8: # %bb.0: # %entry
155 ; P8-NEXT: lfiwax f0, 0, r4
156 ; P8-NEXT: xxspltd vs0, vs0, 0
157 ; P8-NEXT: stxvd2x vs0, 0, r3
161 ; P7: # %bb.0: # %entry
162 ; P7-NEXT: lwa r4, 0(r4)
163 ; P7-NEXT: addi r5, r1, -16
164 ; P7-NEXT: std r4, -8(r1)
165 ; P7-NEXT: std r4, -16(r1)
166 ; P7-NEXT: lxvd2x vs0, 0, r5
167 ; P7-NEXT: stxvd2x vs0, 0, r3
170 %0 = load i32, i32* %in, align 4
171 %conv = sext i32 %0 to i64
172 %splat.splatinsert.i = insertelement <2 x i64> poison, i64 %conv, i32 0
173 %splat.splat.i = shufflevector <2 x i64> %splat.splatinsert.i, <2 x i64> poison, <2 x i32> zeroinitializer
174 store <2 x i64> %splat.splat.i, <2 x i64>* %a, align 16
179 define void @test6(<2 x i64>* %a, i32* %in) {
181 ; P9: # %bb.0: # %entry
182 ; P9-NEXT: lfiwzx f0, 0, r4
183 ; P9-NEXT: xxspltd vs0, vs0, 0
184 ; P9-NEXT: stxv vs0, 0(r3)
188 ; P8: # %bb.0: # %entry
189 ; P8-NEXT: lfiwzx f0, 0, r4
190 ; P8-NEXT: xxspltd vs0, vs0, 0
191 ; P8-NEXT: stxvd2x vs0, 0, r3
195 ; P7: # %bb.0: # %entry
196 ; P7-NEXT: lwz r4, 0(r4)
197 ; P7-NEXT: addi r5, r1, -16
198 ; P7-NEXT: std r4, -8(r1)
199 ; P7-NEXT: std r4, -16(r1)
200 ; P7-NEXT: lxvd2x vs0, 0, r5
201 ; P7-NEXT: stxvd2x vs0, 0, r3
204 %0 = load i32, i32* %in, align 4
205 %conv = zext i32 %0 to i64
206 %splat.splatinsert.i = insertelement <2 x i64> poison, i64 %conv, i32 0
207 %splat.splat.i = shufflevector <2 x i64> %splat.splatinsert.i, <2 x i64> poison, <2 x i32> zeroinitializer
208 store <2 x i64> %splat.splat.i, <2 x i64>* %a, align 16
213 define void @test7(<8 x i16>* %a, i16* %in) {
215 ; P9: # %bb.0: # %entry
216 ; P9-NEXT: lxsihzx v2, 0, r4
217 ; P9-NEXT: vsplth v2, v2, 3
218 ; P9-NEXT: stxv v2, 0(r3)
222 ; P8: # %bb.0: # %entry
223 ; P8-NEXT: lhz r4, 0(r4)
224 ; P8-NEXT: mtvsrd v2, r4
225 ; P8-NEXT: vsplth v2, v2, 3
226 ; P8-NEXT: stvx v2, 0, r3
230 ; P7: # %bb.0: # %entry
231 ; P7-NEXT: lhz r4, 0(r4)
232 ; P7-NEXT: addi r5, r1, -16
233 ; P7-NEXT: sth r4, -16(r1)
234 ; P7-NEXT: lxvw4x v2, 0, r5
235 ; P7-NEXT: vsplth v2, v2, 0
236 ; P7-NEXT: stxvw4x v2, 0, r3
239 %0 = load i16, i16* %in, align 2
240 %splat.splatinsert.i = insertelement <8 x i16> poison, i16 %0, i32 0
241 %splat.splat.i = shufflevector <8 x i16> %splat.splatinsert.i, <8 x i16> poison, <8 x i32> zeroinitializer
242 store <8 x i16> %splat.splat.i, <8 x i16>* %a, align 16
247 define void @test8(<16 x i8>* %a, i8* %in) {
249 ; P9: # %bb.0: # %entry
250 ; P9-NEXT: lxsibzx v2, 0, r4
251 ; P9-NEXT: vspltb v2, v2, 7
252 ; P9-NEXT: stxv v2, 0(r3)
256 ; P8: # %bb.0: # %entry
257 ; P8-NEXT: lbz r4, 0(r4)
258 ; P8-NEXT: mtvsrd v2, r4
259 ; P8-NEXT: vspltb v2, v2, 7
260 ; P8-NEXT: stvx v2, 0, r3
264 ; P7: # %bb.0: # %entry
265 ; P7-NEXT: lbz r4, 0(r4)
266 ; P7-NEXT: addi r5, r1, -16
267 ; P7-NEXT: stb r4, -16(r1)
268 ; P7-NEXT: lxvw4x v2, 0, r5
269 ; P7-NEXT: vspltb v2, v2, 0
270 ; P7-NEXT: stxvw4x v2, 0, r3
273 %0 = load i8, i8* %in, align 1
274 %splat.splatinsert.i = insertelement <16 x i8> poison, i8 %0, i32 0
275 %splat.splat.i = shufflevector <16 x i8> %splat.splatinsert.i, <16 x i8> poison, <16 x i32> zeroinitializer
276 store <16 x i8> %splat.splat.i, <16 x i8>* %a, align 16
280 define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) {
281 ; P9-LABEL: unadjusted_lxvwsx:
282 ; P9: # %bb.0: # %entry
283 ; P9-NEXT: lxvwsx v2, 0, r3
286 ; P8-LABEL: unadjusted_lxvwsx:
287 ; P8: # %bb.0: # %entry
288 ; P8-NEXT: lfiwzx f0, 0, r3
289 ; P8-NEXT: xxspltw v2, vs0, 1
292 ; P7-LABEL: unadjusted_lxvwsx:
293 ; P7: # %bb.0: # %entry
294 ; P7-NEXT: lwz r3, 0(r3)
295 ; P7-NEXT: addi r4, r1, -16
296 ; P7-NEXT: stw r3, -16(r1)
297 ; P7-NEXT: lxvw4x vs0, 0, r4
298 ; P7-NEXT: xxspltw v2, vs0, 0
301 %0 = bitcast i32* %s to <4 x i8>*
302 %1 = load <4 x i8>, <4 x i8>* %0, align 4
303 %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
307 define <16 x i8> @adjusted_lxvwsx(i64* %s, i64* %t) {
308 ; P9-LABEL: adjusted_lxvwsx:
309 ; P9: # %bb.0: # %entry
310 ; P9-NEXT: addi r3, r3, 4
311 ; P9-NEXT: lxvwsx v2, 0, r3
314 ; P8-LABEL: adjusted_lxvwsx:
315 ; P8: # %bb.0: # %entry
316 ; P8-NEXT: lfdx f0, 0, r3
317 ; P8-NEXT: xxspltw v2, vs0, 0
320 ; P7-LABEL: adjusted_lxvwsx:
321 ; P7: # %bb.0: # %entry
322 ; P7-NEXT: ld r3, 0(r3)
323 ; P7-NEXT: addi r4, r1, -16
324 ; P7-NEXT: std r3, -16(r1)
325 ; P7-NEXT: lxvw4x vs0, 0, r4
326 ; P7-NEXT: xxspltw v2, vs0, 1
329 %0 = bitcast i64* %s to <8 x i8>*
330 %1 = load <8 x i8>, <8 x i8>* %0, align 8
331 %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
335 define <16 x i8> @unadjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
336 ; P9-LABEL: unadjusted_lxvwsx_v16i8:
337 ; P9: # %bb.0: # %entry
338 ; P9-NEXT: lxvwsx v2, 0, r3
341 ; P8-LABEL: unadjusted_lxvwsx_v16i8:
342 ; P8: # %bb.0: # %entry
343 ; P8-NEXT: lvx v2, 0, r3
344 ; P8-NEXT: xxspltw v2, v2, 3
347 ; P7-LABEL: unadjusted_lxvwsx_v16i8:
348 ; P7: # %bb.0: # %entry
349 ; P7-NEXT: lxvw4x vs0, 0, r3
350 ; P7-NEXT: xxspltw v2, vs0, 0
353 %0 = load <16 x i8>, <16 x i8>* %s, align 16
354 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
358 define <16 x i8> @adjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
359 ; P9-LABEL: adjusted_lxvwsx_v16i8:
360 ; P9: # %bb.0: # %entry
361 ; P9-NEXT: addi r3, r3, 4
362 ; P9-NEXT: lxvwsx v2, 0, r3
365 ; P8-LABEL: adjusted_lxvwsx_v16i8:
366 ; P8: # %bb.0: # %entry
367 ; P8-NEXT: lvx v2, 0, r3
368 ; P8-NEXT: xxspltw v2, v2, 2
371 ; P7-LABEL: adjusted_lxvwsx_v16i8:
372 ; P7: # %bb.0: # %entry
373 ; P7-NEXT: lxvw4x vs0, 0, r3
374 ; P7-NEXT: xxspltw v2, vs0, 1
377 %0 = load <16 x i8>, <16 x i8>* %s, align 16
378 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
382 define <16 x i8> @adjusted_lxvwsx_v16i8_2(<16 x i8> *%s, <16 x i8> %t) {
383 ; P9-LABEL: adjusted_lxvwsx_v16i8_2:
384 ; P9: # %bb.0: # %entry
385 ; P9-NEXT: addi r3, r3, 8
386 ; P9-NEXT: lxvwsx v2, 0, r3
389 ; P8-LABEL: adjusted_lxvwsx_v16i8_2:
390 ; P8: # %bb.0: # %entry
391 ; P8-NEXT: lvx v2, 0, r3
392 ; P8-NEXT: xxspltw v2, v2, 1
395 ; P7-LABEL: adjusted_lxvwsx_v16i8_2:
396 ; P7: # %bb.0: # %entry
397 ; P7-NEXT: lxvw4x vs0, 0, r3
398 ; P7-NEXT: xxspltw v2, vs0, 2
401 %0 = load <16 x i8>, <16 x i8>* %s, align 16
402 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11>
406 define <16 x i8> @adjusted_lxvwsx_v16i8_3(<16 x i8> *%s, <16 x i8> %t) {
407 ; P9-LABEL: adjusted_lxvwsx_v16i8_3:
408 ; P9: # %bb.0: # %entry
409 ; P9-NEXT: addi r3, r3, 12
410 ; P9-NEXT: lxvwsx v2, 0, r3
413 ; P8-LABEL: adjusted_lxvwsx_v16i8_3:
414 ; P8: # %bb.0: # %entry
415 ; P8-NEXT: lvx v2, 0, r3
416 ; P8-NEXT: xxspltw v2, v2, 0
419 ; P7-LABEL: adjusted_lxvwsx_v16i8_3:
420 ; P7: # %bb.0: # %entry
421 ; P7-NEXT: lxvw4x vs0, 0, r3
422 ; P7-NEXT: xxspltw v2, vs0, 3
425 %0 = load <16 x i8>, <16 x i8>* %s, align 16
426 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
430 define <16 x i8> @unadjusted_lxvdsx(i64* %s, i64* %t) {
431 ; P9-LABEL: unadjusted_lxvdsx:
432 ; P9: # %bb.0: # %entry
433 ; P9-NEXT: lxvdsx v2, 0, r3
436 ; P8-LABEL: unadjusted_lxvdsx:
437 ; P8: # %bb.0: # %entry
438 ; P8-NEXT: lxvdsx v2, 0, r3
441 ; P7-LABEL: unadjusted_lxvdsx:
442 ; P7: # %bb.0: # %entry
443 ; P7-NEXT: lxvdsx v2, 0, r3
446 %0 = bitcast i64* %s to <8 x i8>*
447 %1 = load <8 x i8>, <8 x i8>* %0, align 8
448 %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
452 define <16 x i8> @unadjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
453 ; P9-LABEL: unadjusted_lxvdsx_v16i8:
454 ; P9: # %bb.0: # %entry
455 ; P9-NEXT: lxvdsx v2, 0, r3
458 ; P8-LABEL: unadjusted_lxvdsx_v16i8:
459 ; P8: # %bb.0: # %entry
460 ; P8-NEXT: lxvdsx v2, 0, r3
463 ; P7-LABEL: unadjusted_lxvdsx_v16i8:
464 ; P7: # %bb.0: # %entry
465 ; P7-NEXT: lxvdsx v2, 0, r3
468 %0 = load <16 x i8>, <16 x i8>* %s, align 16
469 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
473 define <16 x i8> @adjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
474 ; P9-LABEL: adjusted_lxvdsx_v16i8:
475 ; P9: # %bb.0: # %entry
476 ; P9-NEXT: addi r3, r3, 8
477 ; P9-NEXT: lxvdsx v2, 0, r3
480 ; P8-LABEL: adjusted_lxvdsx_v16i8:
481 ; P8: # %bb.0: # %entry
482 ; P8-NEXT: addi r3, r3, 8
483 ; P8-NEXT: lxvdsx v2, 0, r3
486 ; P7-LABEL: adjusted_lxvdsx_v16i8:
487 ; P7: # %bb.0: # %entry
488 ; P7-NEXT: addi r3, r3, 8
489 ; P7-NEXT: lxvdsx v2, 0, r3
492 %0 = load <16 x i8>, <16 x i8>* %s, align 16
493 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>