1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
3 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
4 ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
5 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
6 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
7 ; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
8 ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
9 ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
11 ; Function Attrs: norecurse nounwind readonly
12 define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec) {
13 ; P9LE-LABEL: s2v_test1:
14 ; P9LE: # %bb.0: # %entry
15 ; P9LE-NEXT: lwz r3, 0(r3)
16 ; P9LE-NEXT: mtfprwz f0, r3
17 ; P9LE-NEXT: xxinsertw v2, vs0, 12
20 ; P9BE-LABEL: s2v_test1:
21 ; P9BE: # %bb.0: # %entry
22 ; P9BE-NEXT: lwz r3, 0(r3)
23 ; P9BE-NEXT: mtfprwz f0, r3
24 ; P9BE-NEXT: xxinsertw v2, vs0, 0
27 ; P8LE-LABEL: s2v_test1:
28 ; P8LE: # %bb.0: # %entry
29 ; P8LE-NEXT: addis r4, r2, .LCPI0_0@toc@ha
30 ; P8LE-NEXT: lxsiwzx v4, 0, r3
31 ; P8LE-NEXT: addi r4, r4, .LCPI0_0@toc@l
32 ; P8LE-NEXT: lvx v3, 0, r4
33 ; P8LE-NEXT: vperm v2, v2, v4, v3
36 ; P8BE-LABEL: s2v_test1:
37 ; P8BE: # %bb.0: # %entry
38 ; P8BE-NEXT: lxsiwzx v3, 0, r3
39 ; P8BE-NEXT: vmrghw v4, v2, v3
40 ; P8BE-NEXT: xxsldwi vs0, v2, v3, 1
41 ; P8BE-NEXT: xxsldwi v2, v4, vs0, 3
44 %0 = load i32, i32* %int32, align 4
45 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
49 ; Function Attrs: norecurse nounwind readonly
50 define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec) {
51 ; P9LE-LABEL: s2v_test2:
52 ; P9LE: # %bb.0: # %entry
53 ; P9LE-NEXT: lwz r3, 4(r3)
54 ; P9LE-NEXT: mtfprwz f0, r3
55 ; P9LE-NEXT: xxinsertw v2, vs0, 12
58 ; P9BE-LABEL: s2v_test2:
59 ; P9BE: # %bb.0: # %entry
60 ; P9BE-NEXT: lwz r3, 4(r3)
61 ; P9BE-NEXT: mtfprwz f0, r3
62 ; P9BE-NEXT: xxinsertw v2, vs0, 0
65 ; P8LE-LABEL: s2v_test2:
66 ; P8LE: # %bb.0: # %entry
67 ; P8LE-NEXT: addis r4, r2, .LCPI1_0@toc@ha
68 ; P8LE-NEXT: addi r3, r3, 4
69 ; P8LE-NEXT: addi r4, r4, .LCPI1_0@toc@l
70 ; P8LE-NEXT: lxsiwzx v4, 0, r3
71 ; P8LE-NEXT: lvx v3, 0, r4
72 ; P8LE-NEXT: vperm v2, v2, v4, v3
75 ; P8BE-LABEL: s2v_test2:
76 ; P8BE: # %bb.0: # %entry
77 ; P8BE-NEXT: addi r3, r3, 4
78 ; P8BE-NEXT: lxsiwzx v3, 0, r3
79 ; P8BE-NEXT: vmrghw v4, v2, v3
80 ; P8BE-NEXT: xxsldwi vs0, v2, v3, 1
81 ; P8BE-NEXT: xxsldwi v2, v4, vs0, 3
84 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
85 %0 = load i32, i32* %arrayidx, align 4
86 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
90 ; Function Attrs: norecurse nounwind readonly
91 define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32 signext %Idx) {
92 ; P9LE-LABEL: s2v_test3:
93 ; P9LE: # %bb.0: # %entry
94 ; P9LE-NEXT: sldi r4, r7, 2
95 ; P9LE-NEXT: lwzx r3, r3, r4
96 ; P9LE-NEXT: mtfprwz f0, r3
97 ; P9LE-NEXT: xxinsertw v2, vs0, 12
100 ; P9BE-LABEL: s2v_test3:
101 ; P9BE: # %bb.0: # %entry
102 ; P9BE-NEXT: sldi r4, r7, 2
103 ; P9BE-NEXT: lwzx r3, r3, r4
104 ; P9BE-NEXT: mtfprwz f0, r3
105 ; P9BE-NEXT: xxinsertw v2, vs0, 0
108 ; P8LE-LABEL: s2v_test3:
109 ; P8LE: # %bb.0: # %entry
110 ; P8LE-NEXT: addis r4, r2, .LCPI2_0@toc@ha
111 ; P8LE-NEXT: sldi r5, r7, 2
112 ; P8LE-NEXT: addi r4, r4, .LCPI2_0@toc@l
113 ; P8LE-NEXT: lxsiwzx v3, r3, r5
114 ; P8LE-NEXT: lvx v4, 0, r4
115 ; P8LE-NEXT: vperm v2, v2, v3, v4
118 ; P8BE-LABEL: s2v_test3:
119 ; P8BE: # %bb.0: # %entry
120 ; P8BE-NEXT: sldi r4, r7, 2
121 ; P8BE-NEXT: lxsiwzx v3, r3, r4
122 ; P8BE-NEXT: vmrghw v4, v2, v3
123 ; P8BE-NEXT: xxsldwi vs0, v2, v3, 1
124 ; P8BE-NEXT: xxsldwi v2, v4, vs0, 3
127 %idxprom = sext i32 %Idx to i64
128 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom
129 %0 = load i32, i32* %arrayidx, align 4
130 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
131 ret <4 x i32> %vecins
134 ; Function Attrs: norecurse nounwind readonly
135 define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec) {
136 ; P9LE-LABEL: s2v_test4:
137 ; P9LE: # %bb.0: # %entry
138 ; P9LE-NEXT: lwz r3, 4(r3)
139 ; P9LE-NEXT: mtfprwz f0, r3
140 ; P9LE-NEXT: xxinsertw v2, vs0, 12
143 ; P9BE-LABEL: s2v_test4:
144 ; P9BE: # %bb.0: # %entry
145 ; P9BE-NEXT: lwz r3, 4(r3)
146 ; P9BE-NEXT: mtfprwz f0, r3
147 ; P9BE-NEXT: xxinsertw v2, vs0, 0
150 ; P8LE-LABEL: s2v_test4:
151 ; P8LE: # %bb.0: # %entry
152 ; P8LE-NEXT: addis r4, r2, .LCPI3_0@toc@ha
153 ; P8LE-NEXT: addi r3, r3, 4
154 ; P8LE-NEXT: addi r4, r4, .LCPI3_0@toc@l
155 ; P8LE-NEXT: lxsiwzx v4, 0, r3
156 ; P8LE-NEXT: lvx v3, 0, r4
157 ; P8LE-NEXT: vperm v2, v2, v4, v3
160 ; P8BE-LABEL: s2v_test4:
161 ; P8BE: # %bb.0: # %entry
162 ; P8BE-NEXT: addi r3, r3, 4
163 ; P8BE-NEXT: lxsiwzx v3, 0, r3
164 ; P8BE-NEXT: vmrghw v4, v2, v3
165 ; P8BE-NEXT: xxsldwi vs0, v2, v3, 1
166 ; P8BE-NEXT: xxsldwi v2, v4, vs0, 3
169 %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
170 %0 = load i32, i32* %arrayidx, align 4
171 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
172 ret <4 x i32> %vecins
175 ; Function Attrs: norecurse nounwind readonly
176 define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1) {
177 ; P9LE-LABEL: s2v_test5:
178 ; P9LE: # %bb.0: # %entry
179 ; P9LE-NEXT: lwz r3, 0(r5)
180 ; P9LE-NEXT: mtfprwz f0, r3
181 ; P9LE-NEXT: xxinsertw v2, vs0, 12
184 ; P9BE-LABEL: s2v_test5:
185 ; P9BE: # %bb.0: # %entry
186 ; P9BE-NEXT: lwz r3, 0(r5)
187 ; P9BE-NEXT: mtfprwz f0, r3
188 ; P9BE-NEXT: xxinsertw v2, vs0, 0
191 ; P8LE-LABEL: s2v_test5:
192 ; P8LE: # %bb.0: # %entry
193 ; P8LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha
194 ; P8LE-NEXT: lxsiwzx v4, 0, r5
195 ; P8LE-NEXT: addi r3, r3, .LCPI4_0@toc@l
196 ; P8LE-NEXT: lvx v3, 0, r3
197 ; P8LE-NEXT: vperm v2, v2, v4, v3
200 ; P8BE-LABEL: s2v_test5:
201 ; P8BE: # %bb.0: # %entry
202 ; P8BE-NEXT: lxsiwzx v3, 0, r5
203 ; P8BE-NEXT: vmrghw v4, v2, v3
204 ; P8BE-NEXT: xxsldwi vs0, v2, v3, 1
205 ; P8BE-NEXT: xxsldwi v2, v4, vs0, 3
208 %0 = load i32, i32* %ptr1, align 4
209 %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
210 ret <4 x i32> %vecins
213 ; Function Attrs: norecurse nounwind readonly
214 define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec) {
215 ; P9LE-LABEL: s2v_test_f1:
216 ; P9LE: # %bb.0: # %entry
217 ; P9LE-NEXT: lfs f0, 0(r3)
218 ; P9LE-NEXT: xscvdpspn vs0, f0
219 ; P9LE-NEXT: xxinsertw v2, vs0, 12
222 ; P9BE-LABEL: s2v_test_f1:
223 ; P9BE: # %bb.0: # %entry
224 ; P9BE-NEXT: lfs f0, 0(r3)
225 ; P9BE-NEXT: xscvdpspn vs0, f0
226 ; P9BE-NEXT: xxinsertw v2, vs0, 0
229 ; P8LE-LABEL: s2v_test_f1:
230 ; P8LE: # %bb.0: # %entry
231 ; P8LE-NEXT: addis r4, r2, .LCPI5_0@toc@ha
232 ; P8LE-NEXT: lxsiwzx v4, 0, r3
233 ; P8LE-NEXT: addi r4, r4, .LCPI5_0@toc@l
234 ; P8LE-NEXT: lvx v3, 0, r4
235 ; P8LE-NEXT: vperm v2, v2, v4, v3
238 ; P8BE-LABEL: s2v_test_f1:
239 ; P8BE: # %bb.0: # %entry
240 ; P8BE-NEXT: lxsiwzx v3, 0, r3
241 ; P8BE-NEXT: vmrghw v4, v2, v3
242 ; P8BE-NEXT: xxsldwi vs0, v2, v3, 1
243 ; P8BE-NEXT: xxsldwi v2, v4, vs0, 3
246 %0 = load float, float* %f64, align 4
247 %vecins = insertelement <4 x float> %vec, float %0, i32 0
248 ret <4 x float> %vecins
251 ; Function Attrs: norecurse nounwind readonly
252 define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec) {
253 ; P9LE-LABEL: s2v_test_f2:
254 ; P9LE: # %bb.0: # %entry
255 ; P9LE-NEXT: addi r3, r3, 4
256 ; P9LE-NEXT: vmrglw v2, v2, v2
257 ; P9LE-NEXT: lxsiwzx v3, 0, r3
258 ; P9LE-NEXT: vmrghw v2, v2, v3
261 ; P9BE-LABEL: s2v_test_f2:
262 ; P9BE: # %bb.0: # %entry
263 ; P9BE-NEXT: addi r3, r3, 4
264 ; P9BE-NEXT: lxsiwzx v3, 0, r3
265 ; P9BE-NEXT: vmrgow v2, v3, v2
268 ; P8LE-LABEL: s2v_test_f2:
269 ; P8LE: # %bb.0: # %entry
270 ; P8LE-NEXT: vmrglw v2, v2, v2
271 ; P8LE-NEXT: addi r3, r3, 4
272 ; P8LE-NEXT: lxsiwzx v3, 0, r3
273 ; P8LE-NEXT: vmrghw v2, v2, v3
276 ; P8BE-LABEL: s2v_test_f2:
277 ; P8BE: # %bb.0: # %entry
278 ; P8BE-NEXT: addi r3, r3, 4
279 ; P8BE-NEXT: lxsiwzx v3, 0, r3
280 ; P8BE-NEXT: vmrgow v2, v3, v2
283 %arrayidx = getelementptr inbounds float, float* %f64, i64 1
284 %0 = load float, float* %arrayidx, align 8
285 %vecins = insertelement <2 x float> %vec, float %0, i32 0
286 ret <2 x float> %vecins
289 ; Function Attrs: norecurse nounwind readonly
290 define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec, i32 signext %Idx) {
291 ; P9LE-LABEL: s2v_test_f3:
292 ; P9LE: # %bb.0: # %entry
293 ; P9LE-NEXT: sldi r4, r7, 2
294 ; P9LE-NEXT: vmrglw v2, v2, v2
295 ; P9LE-NEXT: lxsiwzx v3, r3, r4
296 ; P9LE-NEXT: vmrghw v2, v2, v3
299 ; P9BE-LABEL: s2v_test_f3:
300 ; P9BE: # %bb.0: # %entry
301 ; P9BE-NEXT: sldi r4, r7, 2
302 ; P9BE-NEXT: lxsiwzx v3, r3, r4
303 ; P9BE-NEXT: vmrgow v2, v3, v2
306 ; P8LE-LABEL: s2v_test_f3:
307 ; P8LE: # %bb.0: # %entry
308 ; P8LE-NEXT: vmrglw v2, v2, v2
309 ; P8LE-NEXT: sldi r4, r7, 2
310 ; P8LE-NEXT: lxsiwzx v3, r3, r4
311 ; P8LE-NEXT: vmrghw v2, v2, v3
314 ; P8BE-LABEL: s2v_test_f3:
315 ; P8BE: # %bb.0: # %entry
316 ; P8BE-NEXT: sldi r4, r7, 2
317 ; P8BE-NEXT: lxsiwzx v3, r3, r4
318 ; P8BE-NEXT: vmrgow v2, v3, v2
321 %idxprom = sext i32 %Idx to i64
322 %arrayidx = getelementptr inbounds float, float* %f64, i64 %idxprom
323 %0 = load float, float* %arrayidx, align 8
324 %vecins = insertelement <2 x float> %vec, float %0, i32 0
325 ret <2 x float> %vecins
328 ; Function Attrs: norecurse nounwind readonly
329 define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec) {
330 ; P9LE-LABEL: s2v_test_f4:
331 ; P9LE: # %bb.0: # %entry
332 ; P9LE-NEXT: addi r3, r3, 4
333 ; P9LE-NEXT: vmrglw v2, v2, v2
334 ; P9LE-NEXT: lxsiwzx v3, 0, r3
335 ; P9LE-NEXT: vmrghw v2, v2, v3
338 ; P9BE-LABEL: s2v_test_f4:
339 ; P9BE: # %bb.0: # %entry
340 ; P9BE-NEXT: addi r3, r3, 4
341 ; P9BE-NEXT: lxsiwzx v3, 0, r3
342 ; P9BE-NEXT: vmrgow v2, v3, v2
345 ; P8LE-LABEL: s2v_test_f4:
346 ; P8LE: # %bb.0: # %entry
347 ; P8LE-NEXT: vmrglw v2, v2, v2
348 ; P8LE-NEXT: addi r3, r3, 4
349 ; P8LE-NEXT: lxsiwzx v3, 0, r3
350 ; P8LE-NEXT: vmrghw v2, v2, v3
353 ; P8BE-LABEL: s2v_test_f4:
354 ; P8BE: # %bb.0: # %entry
355 ; P8BE-NEXT: addi r3, r3, 4
356 ; P8BE-NEXT: lxsiwzx v3, 0, r3
357 ; P8BE-NEXT: vmrgow v2, v3, v2
360 %arrayidx = getelementptr inbounds float, float* %f64, i64 1
361 %0 = load float, float* %arrayidx, align 8
362 %vecins = insertelement <2 x float> %vec, float %0, i32 0
363 ret <2 x float> %vecins
366 ; Function Attrs: norecurse nounwind readonly
367 define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1) {
368 ; P9LE-LABEL: s2v_test_f5:
369 ; P9LE: # %bb.0: # %entry
370 ; P9LE-NEXT: lxsiwzx v3, 0, r5
371 ; P9LE-NEXT: vmrglw v2, v2, v2
372 ; P9LE-NEXT: vmrghw v2, v2, v3
375 ; P9BE-LABEL: s2v_test_f5:
376 ; P9BE: # %bb.0: # %entry
377 ; P9BE-NEXT: lxsiwzx v3, 0, r5
378 ; P9BE-NEXT: vmrgow v2, v3, v2
381 ; P8LE-LABEL: s2v_test_f5:
382 ; P8LE: # %bb.0: # %entry
383 ; P8LE-NEXT: vmrglw v2, v2, v2
384 ; P8LE-NEXT: lxsiwzx v3, 0, r5
385 ; P8LE-NEXT: vmrghw v2, v2, v3
388 ; P8BE-LABEL: s2v_test_f5:
389 ; P8BE: # %bb.0: # %entry
390 ; P8BE-NEXT: lxsiwzx v3, 0, r5
391 ; P8BE-NEXT: vmrgow v2, v3, v2
394 %0 = load float, float* %ptr1, align 8
395 %vecins = insertelement <2 x float> %vec, float %0, i32 0
396 ret <2 x float> %vecins