1 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
2 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
3 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
4 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
5 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
6 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
7 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
8 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
10 ; Function Attrs: norecurse nounwind readonly
11 define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) {
12 ; P8LE-LABEL: s2v_test1:
13 ; P8LE: # %bb.0: # %entry
14 ; P8LE-NEXT: lfiwzx f0, 0, r3
15 ; P8LE-NEXT: addis r4, r2, .LCPI0_0@toc@ha
16 ; P8LE-NEXT: addi r3, r4, .LCPI0_0@toc@l
17 ; P8LE-NEXT: lvx v4, 0, r3
18 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
19 ; P8LE-NEXT: vperm v2, v3, v2, v4
22 ; P8BE-LABEL: s2v_test1:
23 ; P8BE: # %bb.0: # %entry
24 ; P8BE: lfiwzx f0, 0, r3
25 ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
26 ; P8BE: xxsldwi vs0, v2, vs0, 1
27 ; P8BE: xxsldwi v2, vs0, vs0, 3
30 %0 = load i32, i32* %int32, align 4
31 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
35 ; Function Attrs: norecurse nounwind readonly
36 define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec) {
37 ; P8LE-LABEL: s2v_test2:
38 ; P8LE: # %bb.0: # %entry
39 ; P8LE-NEXT: addi r3, r3, 4
40 ; P8LE-NEXT: addis r4, r2, .LCPI1_0@toc@ha
41 ; P8LE-NEXT: lfiwzx f0, 0, r3
42 ; P8LE-NEXT: addi r3, r4, .LCPI1_0@toc@l
43 ; P8LE-NEXT: lvx v4, 0, r3
44 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
45 ; P8LE-NEXT: vperm v2, v3, v2, v4
48 ; P8BE-LABEL: s2v_test2:
49 ; P8BE: # %bb.0: # %entry
50 ; P8BE: addi r3, r3, 4
51 ; P8BE: lfiwzx f0, 0, r3
52 ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
53 ; P8BE: xxsldwi vs0, v2, vs0, 1
54 ; P8BE: xxsldwi v2, vs0, vs0, 3
57 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
58 %0 = load i32, i32* %arrayidx, align 4
59 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
63 ; Function Attrs: norecurse nounwind readonly
64 define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32 signext %Idx) {
65 ; P8LE-LABEL: s2v_test3:
66 ; P8LE: # %bb.0: # %entry
67 ; P8LE-NEXT: sldi r5, r7, 2
68 ; P8LE-NEXT: addis r4, r2, .LCPI2_0@toc@ha
69 ; P8LE-NEXT: lfiwzx f0, r3, r5
70 ; P8LE-NEXT: addi r3, r4, .LCPI2_0@toc@l
71 ; P8LE-NEXT: lvx v4, 0, r3
72 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
73 ; P8LE-NEXT: vperm v2, v3, v2, v4
76 ; P8BE-LABEL: s2v_test3:
77 ; P8BE: # %bb.0: # %entry
78 ; P8BE: sldi r4, r7, 2
79 ; P8BE: lfiwzx f0, r3, r4
80 ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
81 ; P8BE: xxsldwi vs0, v2, vs0, 1
82 ; P8BE: xxsldwi v2, vs0, vs0, 3
85 %idxprom = sext i32 %Idx to i64
86 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom
87 %0 = load i32, i32* %arrayidx, align 4
88 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
92 ; Function Attrs: norecurse nounwind readonly
93 define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec) {
94 ; P8LE-LABEL: s2v_test4:
95 ; P8LE: # %bb.0: # %entry
96 ; P8LE-NEXT: addi r3, r3, 4
97 ; P8LE-NEXT: addis r4, r2, .LCPI3_0@toc@ha
98 ; P8LE-NEXT: lfiwzx f0, 0, r3
99 ; P8LE-NEXT: addi r3, r4, .LCPI3_0@toc@l
100 ; P8LE-NEXT: lvx v4, 0, r3
101 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
102 ; P8LE-NEXT: vperm v2, v3, v2, v4
105 ; P8BE-LABEL: s2v_test4:
106 ; P8BE: # %bb.0: # %entry
107 ; P8BE: addi r3, r3, 4
108 ; P8BE: lfiwzx f0, 0, r3
109 ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
110 ; P8BE: xxsldwi vs0, v2, vs0, 1
111 ; P8BE: xxsldwi v2, vs0, vs0, 3
114 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
115 %0 = load i32, i32* %arrayidx, align 4
116 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
117 ret <4 x i32> %vecins
120 ; Function Attrs: norecurse nounwind readonly
121 define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1) {
122 ; P8LE-LABEL: s2v_test5:
123 ; P8LE: # %bb.0: # %entry
124 ; P8LE-NEXT: lfiwzx f0, 0, r5
125 ; P8LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
126 ; P8LE-NEXT: addi r3, r3, .LCPI4_0@toc@l
127 ; P8LE-NEXT: lvx v4, 0, r3
128 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
129 ; P8LE-NEXT: vperm v2, v3, v2, v4
132 ; P8BE-LABEL: s2v_test5:
133 ; P8BE: # %bb.0: # %entry
134 ; P8BE: lfiwzx f0, 0, r5
135 ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
136 ; P8BE: xxsldwi vs0, v2, vs0, 1
137 ; P8BE: xxsldwi v2, vs0, vs0, 3
140 %0 = load i32, i32* %ptr1, align 4
141 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
142 ret <4 x i32> %vecins
145 ; Function Attrs: norecurse nounwind readonly
146 define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec) {
147 ; P8LE-LABEL: s2v_test_f1:
148 ; P8LE: # %bb.0: # %entry
149 ; P8LE-NEXT: lfiwzx f0, 0, r3
150 ; P8LE-NEXT: addis r4, r2, .LCPI5_0@toc@ha
151 ; P8LE-NEXT: addi r3, r4, .LCPI5_0@toc@l
152 ; P8LE-NEXT: lvx v4, 0, r3
153 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
154 ; P8LE-NEXT: vperm v2, v3, v2, v4
157 ; P8BE-LABEL: s2v_test_f1:
158 ; P8BE: # %bb.0: # %entry
159 ; P8BE: lfiwzx f0, 0, r3
160 ; P8BE-NEXT: xxsldwi vs0, f0, f0, 1
161 ; P8BE: xxsldwi vs0, v2, vs0, 1
162 ; P8BE: xxsldwi v2, vs0, vs0, 3
165 %0 = load float, float* %f64, align 4
166 %vecins = insertelement <4 x float> %vec, float %0, i32 0
167 ret <4 x float> %vecins
170 ; Function Attrs: norecurse nounwind readonly
171 define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec) {
172 ; P9LE-LABEL: s2v_test_f2:
173 ; P9LE: # %bb.0: # %entry
174 ; P9LE-NEXT: addi r3, r3, 4
175 ; P9LE-DAG: xxspltw v2, v2, 2
176 ; P9LE-DAG: lfiwzx f0, 0, r3
177 ; P9LE-NEXT: xxpermdi v3, f0, f0, 2
178 ; P9LE-NEXT: vmrglw v2, v2, v3
181 ; P9BE-LABEL: s2v_test_f2:
182 ; P9BE: # %bb.0: # %entry
183 ; P9BE: addi r3, r3, 4
184 ; P9BE-DAG: xxspltw v2, v2, 1
185 ; P9BE-DAG: lfiwzx f0, 0, r3
186 ; P9BE-NEXT: xxsldwi v3, f0, f0, 1
187 ; P9BE: vmrghw v2, v3, v2
190 ; P8LE-LABEL: s2v_test_f2:
191 ; P8LE: # %bb.0: # %entry
192 ; P8LE-NEXT: addi r3, r3, 4
193 ; P8LE-NEXT: xxspltw v2, v2, 2
194 ; P8LE-NEXT: lfiwzx f0, 0, r3
195 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
196 ; P8LE-NEXT: vmrglw v2, v2, v3
199 ; P8BE-LABEL: s2v_test_f2:
200 ; P8BE: # %bb.0: # %entry
201 ; P8BE-NEXT: addi r3, r3, 4
202 ; P8BE-NEXT: xxspltw v2, v2, 1
203 ; P8BE-NEXT: lfiwzx f0, 0, r3
204 ; P8BE-NEXT: xxsldwi v3, f0, f0, 1
205 ; P8BE-NEXT: vmrghw v2, v3, v2
208 %arrayidx = getelementptr inbounds float, float* %f64, i64 1
209 %0 = load float, float* %arrayidx, align 8
210 %vecins = insertelement <2 x float> %vec, float %0, i32 0
211 ret <2 x float> %vecins
214 ; Function Attrs: norecurse nounwind readonly
215 define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec, i32 signext %Idx) {
216 ; P9LE-LABEL: s2v_test_f3:
217 ; P9LE: # %bb.0: # %entry
218 ; P9LE-NEXT: sldi r4, r7, 2
219 ; P9LE-NEXT: lfiwzx f0, r3, r4
220 ; P9LE-DAG: xxspltw v2, v2, 2
221 ; P9LE-DAG: xxpermdi v3, f0, f0, 2
222 ; P9LE-NEXT: vmrglw v2, v2, v3
225 ; P9BE-LABEL: s2v_test_f3:
226 ; P9BE: # %bb.0: # %entry
227 ; P9BE: sldi r4, r7, 2
228 ; P9BE: lfiwzx f0, r3, r4
229 ; P9BE-DAG: xxspltw v2, v2, 1
230 ; P9BE-DAG: xxsldwi v3, f0, f0, 1
231 ; P9BE: vmrghw v2, v3, v2
234 ; P8LE-LABEL: s2v_test_f3:
235 ; P8LE: # %bb.0: # %entry
236 ; P8LE-NEXT: sldi r4, r7, 2
237 ; P8LE-NEXT: xxspltw v2, v2, 2
238 ; P8LE-NEXT: lfiwzx f0, r3, r4
239 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
240 ; P8LE-NEXT: vmrglw v2, v2, v3
243 ; P8BE-LABEL: s2v_test_f3:
244 ; P8BE: # %bb.0: # %entry
245 ; P8BE-NEXT: sldi r4, r7, 2
246 ; P8BE-NEXT: xxspltw v2, v2, 1
247 ; P8BE-NEXT: lfiwzx f0, r3, r4
248 ; P8BE-NEXT: xxsldwi v3, f0, f0, 1
249 ; P8BE-NEXT: vmrghw v2, v3, v2
252 %idxprom = sext i32 %Idx to i64
253 %arrayidx = getelementptr inbounds float, float* %f64, i64 %idxprom
254 %0 = load float, float* %arrayidx, align 8
255 %vecins = insertelement <2 x float> %vec, float %0, i32 0
256 ret <2 x float> %vecins
259 ; Function Attrs: norecurse nounwind readonly
260 define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec) {
261 ; P9LE-LABEL: s2v_test_f4:
262 ; P9LE: # %bb.0: # %entry
263 ; P9LE-NEXT: addi r3, r3, 4
264 ; P9LE-NEXT: lfiwzx f0, 0, r3
265 ; P9LE-DAG: xxspltw v2, v2, 2
266 ; P9LE-DAG: xxpermdi v3, f0, f0, 2
267 ; P9LE-NEXT: vmrglw v2, v2, v3
270 ; P9BE-LABEL: s2v_test_f4:
271 ; P9BE: # %bb.0: # %entry
272 ; P9BE: addi r3, r3, 4
273 ; P9BE: lfiwzx f0, 0, r3
274 ; P9BE-DAG: xxspltw v2, v2, 1
275 ; P9BE-DAG: xxsldwi v3, f0, f0, 1
276 ; P9BE: vmrghw v2, v3, v2
279 ; P8LE-LABEL: s2v_test_f4:
280 ; P8LE: # %bb.0: # %entry
281 ; P8LE-NEXT: addi r3, r3, 4
282 ; P8LE-NEXT: xxspltw v2, v2, 2
283 ; P8LE-NEXT: lfiwzx f0, 0, r3
284 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
285 ; P8LE-NEXT: vmrglw v2, v2, v3
288 ; P8BE-LABEL: s2v_test_f4:
289 ; P8BE: # %bb.0: # %entry
290 ; P8BE-NEXT: addi r3, r3, 4
291 ; P8BE-NEXT: xxspltw v2, v2, 1
292 ; P8BE-NEXT: lfiwzx f0, 0, r3
293 ; P8BE-NEXT: xxsldwi v3, f0, f0, 1
294 ; P8BE-NEXT: vmrghw v2, v3, v2
297 %arrayidx = getelementptr inbounds float, float* %f64, i64 1
298 %0 = load float, float* %arrayidx, align 8
299 %vecins = insertelement <2 x float> %vec, float %0, i32 0
300 ret <2 x float> %vecins
303 ; Function Attrs: norecurse nounwind readonly
304 define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1) {
305 ; P9LE-LABEL: s2v_test_f5:
306 ; P9LE: # %bb.0: # %entry
307 ; P9LE-NEXT: lfiwzx f0, 0, r5
308 ; P9LE-NEXT: xxspltw v2, v2, 2
309 ; P9LE-NEXT: xxpermdi v3, f0, f0, 2
310 ; P9LE-NEXT: vmrglw v2, v2, v3
313 ; P9BE-LABEL: s2v_test_f5:
314 ; P9BE: # %bb.0: # %entry
315 ; P9BE: lfiwzx f0, 0, r5
316 ; P9BE: xxspltw v2, v2, 1
317 ; P9BE-NEXT: xxsldwi v3, f0, f0, 1
318 ; P9BE: vmrghw v2, v3, v2
321 ; P8LE-LABEL: s2v_test_f5:
322 ; P8LE: # %bb.0: # %entry
323 ; P8LE-NEXT: lfiwzx f0, 0, r5
324 ; P8LE-NEXT: xxspltw v2, v2, 2
325 ; P8LE-NEXT: xxpermdi v3, f0, f0, 2
326 ; P8LE-NEXT: vmrglw v2, v2, v3
329 ; P8BE-LABEL: s2v_test_f5:
330 ; P8BE: # %bb.0: # %entry
331 ; P8BE-NEXT: lfiwzx f0, 0, r5
332 ; P8BE-NEXT: xxspltw v2, v2, 1
333 ; P8BE-NEXT: xxsldwi v3, f0, f0, 1
334 ; P8BE-NEXT: vmrghw v2, v3, v2
337 %0 = load float, float* %ptr1, align 8
338 %vecins = insertelement <2 x float> %vec, float %0, i32 0
339 ret <2 x float> %vecins