1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
3 ; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
4 ; RUN: -check-prefix=P9
5 ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
6 ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
7 ; RUN: -check-prefix=P8
8 define dso_local void @test(<2 x double>* nocapture %c, double* nocapture readonly %a) local_unnamed_addr {
10 ; P9: # %bb.0: # %entry
11 ; P9-NEXT: addi r4, r4, 24
12 ; P9-NEXT: lxvdsx vs0, 0, r4
13 ; P9-NEXT: stxv vs0, 0(r3)
17 ; P8: # %bb.0: # %entry
18 ; P8-NEXT: addi r4, r4, 24
19 ; P8-NEXT: lxvdsx vs0, 0, r4
20 ; P8-NEXT: stxvd2x vs0, 0, r3
23 %arrayidx = getelementptr inbounds double, double* %a, i64 3
24 %0 = load double, double* %arrayidx, align 8
25 %splat.splatinsert.i = insertelement <2 x double> undef, double %0, i32 0
26 %splat.splat.i = shufflevector <2 x double> %splat.splatinsert.i, <2 x double> undef, <2 x i32> zeroinitializer
27 store <2 x double> %splat.splat.i, <2 x double>* %c, align 16
31 define dso_local void @test2(<4 x float>* nocapture %c, float* nocapture readonly %a) local_unnamed_addr {
33 ; P9: # %bb.0: # %entry
34 ; P9-NEXT: addi r4, r4, 12
35 ; P9-NEXT: lxvwsx vs0, 0, r4
36 ; P9-NEXT: stxv vs0, 0(r3)
40 ; P8: # %bb.0: # %entry
41 ; P8-NEXT: addi r4, r4, 12
42 ; P8-NEXT: lfiwzx f0, 0, r4
43 ; P8-NEXT: xxpermdi vs0, f0, f0, 2
44 ; P8-NEXT: xxspltw v2, vs0, 3
45 ; P8-NEXT: stvx v2, 0, r3
48 %arrayidx = getelementptr inbounds float, float* %a, i64 3
49 %0 = load float, float* %arrayidx, align 4
50 %splat.splatinsert.i = insertelement <4 x float> undef, float %0, i32 0
51 %splat.splat.i = shufflevector <4 x float> %splat.splatinsert.i, <4 x float> undef, <4 x i32> zeroinitializer
52 store <4 x float> %splat.splat.i, <4 x float>* %c, align 16
56 define dso_local void @test3(<4 x i32>* nocapture %c, i32* nocapture readonly %a) local_unnamed_addr {
58 ; P9: # %bb.0: # %entry
59 ; P9-NEXT: addi r4, r4, 12
60 ; P9-NEXT: lxvwsx vs0, 0, r4
61 ; P9-NEXT: stxv vs0, 0(r3)
65 ; P8: # %bb.0: # %entry
66 ; P8-NEXT: addi r4, r4, 12
67 ; P8-NEXT: lfiwzx f0, 0, r4
68 ; P8-NEXT: xxpermdi vs0, f0, f0, 2
69 ; P8-NEXT: xxspltw v2, vs0, 3
70 ; P8-NEXT: stvx v2, 0, r3
73 %arrayidx = getelementptr inbounds i32, i32* %a, i64 3
74 %0 = load i32, i32* %arrayidx, align 4
75 %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %0, i32 0
76 %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer
77 store <4 x i32> %splat.splat.i, <4 x i32>* %c, align 16
81 define dso_local void @test4(<2 x i64>* nocapture %c, i64* nocapture readonly %a) local_unnamed_addr {
83 ; P9: # %bb.0: # %entry
84 ; P9-NEXT: addi r4, r4, 24
85 ; P9-NEXT: lxvdsx vs0, 0, r4
86 ; P9-NEXT: stxv vs0, 0(r3)
90 ; P8: # %bb.0: # %entry
91 ; P8-NEXT: addi r4, r4, 24
92 ; P8-NEXT: lxvdsx vs0, 0, r4
93 ; P8-NEXT: stxvd2x vs0, 0, r3
96 %arrayidx = getelementptr inbounds i64, i64* %a, i64 3
97 %0 = load i64, i64* %arrayidx, align 8
98 %splat.splatinsert.i = insertelement <2 x i64> undef, i64 %0, i32 0
99 %splat.splat.i = shufflevector <2 x i64> %splat.splatinsert.i, <2 x i64> undef, <2 x i32> zeroinitializer
100 store <2 x i64> %splat.splat.i, <2 x i64>* %c, align 16
104 define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) {
105 ; P9-LABEL: unadjusted_lxvwsx:
106 ; P9: # %bb.0: # %entry
107 ; P9-NEXT: lxvwsx v2, 0, r3
110 ; P8-LABEL: unadjusted_lxvwsx:
111 ; P8: # %bb.0: # %entry
112 ; P8-NEXT: lfiwzx f0, 0, r3
113 ; P8-NEXT: xxpermdi vs0, f0, f0, 2
114 ; P8-NEXT: xxspltw v2, vs0, 3
117 %0 = bitcast i32* %s to <4 x i8>*
118 %1 = load <4 x i8>, <4 x i8>* %0, align 4
119 %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
123 define <16 x i8> @adjusted_lxvwsx(i64* %s, i64* %t) {
124 ; P9-LABEL: adjusted_lxvwsx:
125 ; P9: # %bb.0: # %entry
126 ; P9-NEXT: addi r3, r3, 4
127 ; P9-NEXT: lxvwsx v2, 0, r3
130 ; P8-LABEL: adjusted_lxvwsx:
131 ; P8: # %bb.0: # %entry
132 ; P8-NEXT: ld r3, 0(r3)
133 ; P8-NEXT: mtvsrd f0, r3
134 ; P8-NEXT: xxswapd v2, vs0
135 ; P8-NEXT: xxspltw v2, v2, 2
138 %0 = bitcast i64* %s to <8 x i8>*
139 %1 = load <8 x i8>, <8 x i8>* %0, align 8
140 %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
144 define <16 x i8> @unadjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
145 ; P9-LABEL: unadjusted_lxvwsx_v16i8:
146 ; P9: # %bb.0: # %entry
147 ; P9-NEXT: lxvwsx v2, 0, r3
150 ; P8-LABEL: unadjusted_lxvwsx_v16i8:
151 ; P8: # %bb.0: # %entry
152 ; P8-NEXT: lvx v2, 0, r3
153 ; P8-NEXT: xxspltw v2, v2, 3
156 %0 = load <16 x i8>, <16 x i8>* %s, align 16
157 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
161 define <16 x i8> @adjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
162 ; P9-LABEL: adjusted_lxvwsx_v16i8:
163 ; P9: # %bb.0: # %entry
164 ; P9-NEXT: addi r3, r3, 4
165 ; P9-NEXT: lxvwsx v2, 0, r3
168 ; P8-LABEL: adjusted_lxvwsx_v16i8:
169 ; P8: # %bb.0: # %entry
170 ; P8-NEXT: lvx v2, 0, r3
171 ; P8-NEXT: xxspltw v2, v2, 2
174 %0 = load <16 x i8>, <16 x i8>* %s, align 16
175 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
179 define <16 x i8> @adjusted_lxvwsx_v16i8_2(<16 x i8> *%s, <16 x i8> %t) {
180 ; P9-LABEL: adjusted_lxvwsx_v16i8_2:
181 ; P9: # %bb.0: # %entry
182 ; P9-NEXT: addi r3, r3, 8
183 ; P9-NEXT: lxvwsx v2, 0, r3
186 ; P8-LABEL: adjusted_lxvwsx_v16i8_2:
187 ; P8: # %bb.0: # %entry
188 ; P8-NEXT: lvx v2, 0, r3
189 ; P8-NEXT: xxspltw v2, v2, 1
192 %0 = load <16 x i8>, <16 x i8>* %s, align 16
193 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11>
197 define <16 x i8> @adjusted_lxvwsx_v16i8_3(<16 x i8> *%s, <16 x i8> %t) {
198 ; P9-LABEL: adjusted_lxvwsx_v16i8_3:
199 ; P9: # %bb.0: # %entry
200 ; P9-NEXT: addi r3, r3, 12
201 ; P9-NEXT: lxvwsx v2, 0, r3
204 ; P8-LABEL: adjusted_lxvwsx_v16i8_3:
205 ; P8: # %bb.0: # %entry
206 ; P8-NEXT: lvx v2, 0, r3
207 ; P8-NEXT: xxspltw v2, v2, 0
210 %0 = load <16 x i8>, <16 x i8>* %s, align 16
211 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
215 define <16 x i8> @unadjusted_lxvdsx(i64* %s, i64* %t) {
216 ; P9-LABEL: unadjusted_lxvdsx:
217 ; P9: # %bb.0: # %entry
218 ; P9-NEXT: lxvdsx v2, 0, r3
221 ; P8-LABEL: unadjusted_lxvdsx:
222 ; P8: # %bb.0: # %entry
223 ; P8-NEXT: lxvdsx v2, 0, r3
226 %0 = bitcast i64* %s to <8 x i8>*
227 %1 = load <8 x i8>, <8 x i8>* %0, align 8
228 %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
232 define <16 x i8> @unadjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
233 ; P9-LABEL: unadjusted_lxvdsx_v16i8:
234 ; P9: # %bb.0: # %entry
235 ; P9-NEXT: lxvdsx v2, 0, r3
238 ; P8-LABEL: unadjusted_lxvdsx_v16i8:
239 ; P8: # %bb.0: # %entry
240 ; P8-NEXT: lxvdsx v2, 0, r3
243 %0 = load <16 x i8>, <16 x i8>* %s, align 16
244 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
248 define <16 x i8> @adjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
249 ; P9-LABEL: adjusted_lxvdsx_v16i8:
250 ; P9: # %bb.0: # %entry
251 ; P9-NEXT: addi r3, r3, 8
252 ; P9-NEXT: lxvdsx v2, 0, r3
255 ; P8-LABEL: adjusted_lxvdsx_v16i8:
256 ; P8: # %bb.0: # %entry
257 ; P8-NEXT: addi r3, r3, 8
258 ; P8-NEXT: lxvdsx v2, 0, r3
261 %0 = load <16 x i8>, <16 x i8>* %s, align 16
262 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>