1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3 ; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
4 ; RUN: FileCheck %s --check-prefix=CHECK-P8
5 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
6 ; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
7 ; RUN: FileCheck %s --check-prefix=CHECK-P9
8 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
9 ; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
10 ; RUN: FileCheck %s --check-prefix=CHECK-BE
12 define i64 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
13 ; CHECK-P8-LABEL: test2elt:
14 ; CHECK-P8: # %bb.0: # %entry
15 ; CHECK-P8-NEXT: mtvsrd f0, r3
16 ; CHECK-P8-NEXT: xxswapd v2, vs0
17 ; CHECK-P8-NEXT: xvcvspuxws vs0, v2
18 ; CHECK-P8-NEXT: xxswapd vs0, vs0
19 ; CHECK-P8-NEXT: mfvsrd r3, f0
22 ; CHECK-P9-LABEL: test2elt:
23 ; CHECK-P9: # %bb.0: # %entry
24 ; CHECK-P9-NEXT: mtvsrd f0, r3
25 ; CHECK-P9-NEXT: xxswapd v2, vs0
26 ; CHECK-P9-NEXT: xvcvspuxws vs0, v2
27 ; CHECK-P9-NEXT: mfvsrld r3, vs0
30 ; CHECK-BE-LABEL: test2elt:
31 ; CHECK-BE: # %bb.0: # %entry
32 ; CHECK-BE-NEXT: mtvsrd f0, r3
33 ; CHECK-BE-NEXT: xvcvspuxws vs0, vs0
34 ; CHECK-BE-NEXT: mfvsrd r3, f0
37 %0 = bitcast i64 %a.coerce to <2 x float>
38 %1 = fptoui <2 x float> %0 to <2 x i32>
39 %2 = bitcast <2 x i32> %1 to i64
43 define <4 x i32> @test4elt(<4 x float> %a) local_unnamed_addr #1 {
44 ; CHECK-P8-LABEL: test4elt:
45 ; CHECK-P8: # %bb.0: # %entry
46 ; CHECK-P8-NEXT: xvcvspuxws v2, v2
49 ; CHECK-P9-LABEL: test4elt:
50 ; CHECK-P9: # %bb.0: # %entry
51 ; CHECK-P9-NEXT: xvcvspuxws v2, v2
54 ; CHECK-BE-LABEL: test4elt:
55 ; CHECK-BE: # %bb.0: # %entry
56 ; CHECK-BE-NEXT: xvcvspuxws v2, v2
59 %0 = fptoui <4 x float> %a to <4 x i32>
63 define void @test8elt(<8 x i32>* noalias nocapture sret %agg.result, <8 x float>* nocapture readonly) local_unnamed_addr #2 {
64 ; CHECK-P8-LABEL: test8elt:
65 ; CHECK-P8: # %bb.0: # %entry
66 ; CHECK-P8-NEXT: li r5, 16
67 ; CHECK-P8-NEXT: lvx v3, 0, r4
68 ; CHECK-P8-NEXT: lvx v2, r4, r5
69 ; CHECK-P8-NEXT: xvcvspuxws v3, v3
70 ; CHECK-P8-NEXT: xvcvspuxws v2, v2
71 ; CHECK-P8-NEXT: stvx v3, 0, r3
72 ; CHECK-P8-NEXT: stvx v2, r3, r5
75 ; CHECK-P9-LABEL: test8elt:
76 ; CHECK-P9: # %bb.0: # %entry
77 ; CHECK-P9-NEXT: lxv vs0, 16(r4)
78 ; CHECK-P9-NEXT: lxv vs1, 0(r4)
79 ; CHECK-P9-NEXT: xvcvspuxws vs1, vs1
80 ; CHECK-P9-NEXT: xvcvspuxws vs0, vs0
81 ; CHECK-P9-NEXT: stxv vs0, 16(r3)
82 ; CHECK-P9-NEXT: stxv vs1, 0(r3)
85 ; CHECK-BE-LABEL: test8elt:
86 ; CHECK-BE: # %bb.0: # %entry
87 ; CHECK-BE-NEXT: lxv vs0, 16(r4)
88 ; CHECK-BE-NEXT: lxv vs1, 0(r4)
89 ; CHECK-BE-NEXT: xvcvspuxws vs1, vs1
90 ; CHECK-BE-NEXT: xvcvspuxws vs0, vs0
91 ; CHECK-BE-NEXT: stxv vs0, 16(r3)
92 ; CHECK-BE-NEXT: stxv vs1, 0(r3)
95 %a = load <8 x float>, <8 x float>* %0, align 32
96 %1 = fptoui <8 x float> %a to <8 x i32>
97 store <8 x i32> %1, <8 x i32>* %agg.result, align 32
101 define void @test16elt(<16 x i32>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #2 {
102 ; CHECK-P8-LABEL: test16elt:
103 ; CHECK-P8: # %bb.0: # %entry
104 ; CHECK-P8-NEXT: li r5, 16
105 ; CHECK-P8-NEXT: li r6, 32
106 ; CHECK-P8-NEXT: li r7, 48
107 ; CHECK-P8-NEXT: lvx v5, 0, r4
108 ; CHECK-P8-NEXT: lvx v2, r4, r5
109 ; CHECK-P8-NEXT: lvx v3, r4, r6
110 ; CHECK-P8-NEXT: lvx v4, r4, r7
111 ; CHECK-P8-NEXT: xvcvspuxws v5, v5
112 ; CHECK-P8-NEXT: xvcvspuxws v2, v2
113 ; CHECK-P8-NEXT: xvcvspuxws v3, v3
114 ; CHECK-P8-NEXT: xvcvspuxws v4, v4
115 ; CHECK-P8-NEXT: stvx v5, 0, r3
116 ; CHECK-P8-NEXT: stvx v2, r3, r5
117 ; CHECK-P8-NEXT: stvx v3, r3, r6
118 ; CHECK-P8-NEXT: stvx v4, r3, r7
121 ; CHECK-P9-LABEL: test16elt:
122 ; CHECK-P9: # %bb.0: # %entry
123 ; CHECK-P9-NEXT: lxv vs0, 48(r4)
124 ; CHECK-P9-NEXT: lxv vs1, 32(r4)
125 ; CHECK-P9-NEXT: lxv vs2, 16(r4)
126 ; CHECK-P9-NEXT: lxv vs3, 0(r4)
127 ; CHECK-P9-NEXT: xvcvspuxws vs3, vs3
128 ; CHECK-P9-NEXT: xvcvspuxws vs2, vs2
129 ; CHECK-P9-NEXT: xvcvspuxws vs1, vs1
130 ; CHECK-P9-NEXT: xvcvspuxws vs0, vs0
131 ; CHECK-P9-NEXT: stxv vs0, 48(r3)
132 ; CHECK-P9-NEXT: stxv vs1, 32(r3)
133 ; CHECK-P9-NEXT: stxv vs2, 16(r3)
134 ; CHECK-P9-NEXT: stxv vs3, 0(r3)
137 ; CHECK-BE-LABEL: test16elt:
138 ; CHECK-BE: # %bb.0: # %entry
139 ; CHECK-BE-NEXT: lxv vs0, 48(r4)
140 ; CHECK-BE-NEXT: lxv vs1, 32(r4)
141 ; CHECK-BE-NEXT: lxv vs2, 16(r4)
142 ; CHECK-BE-NEXT: lxv vs3, 0(r4)
143 ; CHECK-BE-NEXT: xvcvspuxws vs3, vs3
144 ; CHECK-BE-NEXT: xvcvspuxws vs2, vs2
145 ; CHECK-BE-NEXT: xvcvspuxws vs1, vs1
146 ; CHECK-BE-NEXT: xvcvspuxws vs0, vs0
147 ; CHECK-BE-NEXT: stxv vs0, 48(r3)
148 ; CHECK-BE-NEXT: stxv vs1, 32(r3)
149 ; CHECK-BE-NEXT: stxv vs2, 16(r3)
150 ; CHECK-BE-NEXT: stxv vs3, 0(r3)
153 %a = load <16 x float>, <16 x float>* %0, align 64
154 %1 = fptoui <16 x float> %a to <16 x i32>
155 store <16 x i32> %1, <16 x i32>* %agg.result, align 64
159 define i64 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
160 ; CHECK-P8-LABEL: test2elt_signed:
161 ; CHECK-P8: # %bb.0: # %entry
162 ; CHECK-P8-NEXT: mtvsrd f0, r3
163 ; CHECK-P8-NEXT: xxswapd v2, vs0
164 ; CHECK-P8-NEXT: xvcvspsxws vs0, v2
165 ; CHECK-P8-NEXT: xxswapd vs0, vs0
166 ; CHECK-P8-NEXT: mfvsrd r3, f0
169 ; CHECK-P9-LABEL: test2elt_signed:
170 ; CHECK-P9: # %bb.0: # %entry
171 ; CHECK-P9-NEXT: mtvsrd f0, r3
172 ; CHECK-P9-NEXT: xxswapd v2, vs0
173 ; CHECK-P9-NEXT: xvcvspsxws vs0, v2
174 ; CHECK-P9-NEXT: mfvsrld r3, vs0
177 ; CHECK-BE-LABEL: test2elt_signed:
178 ; CHECK-BE: # %bb.0: # %entry
179 ; CHECK-BE-NEXT: mtvsrd f0, r3
180 ; CHECK-BE-NEXT: xvcvspsxws vs0, vs0
181 ; CHECK-BE-NEXT: mfvsrd r3, f0
184 %0 = bitcast i64 %a.coerce to <2 x float>
185 %1 = fptosi <2 x float> %0 to <2 x i32>
186 %2 = bitcast <2 x i32> %1 to i64
190 define <4 x i32> @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
191 ; CHECK-P8-LABEL: test4elt_signed:
192 ; CHECK-P8: # %bb.0: # %entry
193 ; CHECK-P8-NEXT: xvcvspsxws v2, v2
196 ; CHECK-P9-LABEL: test4elt_signed:
197 ; CHECK-P9: # %bb.0: # %entry
198 ; CHECK-P9-NEXT: xvcvspsxws v2, v2
201 ; CHECK-BE-LABEL: test4elt_signed:
202 ; CHECK-BE: # %bb.0: # %entry
203 ; CHECK-BE-NEXT: xvcvspsxws v2, v2
206 %0 = fptosi <4 x float> %a to <4 x i32>
210 define void @test8elt_signed(<8 x i32>* noalias nocapture sret %agg.result, <8 x float>* nocapture readonly) local_unnamed_addr #2 {
211 ; CHECK-P8-LABEL: test8elt_signed:
212 ; CHECK-P8: # %bb.0: # %entry
213 ; CHECK-P8-NEXT: li r5, 16
214 ; CHECK-P8-NEXT: lvx v3, 0, r4
215 ; CHECK-P8-NEXT: lvx v2, r4, r5
216 ; CHECK-P8-NEXT: xvcvspsxws v3, v3
217 ; CHECK-P8-NEXT: xvcvspsxws v2, v2
218 ; CHECK-P8-NEXT: stvx v3, 0, r3
219 ; CHECK-P8-NEXT: stvx v2, r3, r5
222 ; CHECK-P9-LABEL: test8elt_signed:
223 ; CHECK-P9: # %bb.0: # %entry
224 ; CHECK-P9-NEXT: lxv vs0, 16(r4)
225 ; CHECK-P9-NEXT: lxv vs1, 0(r4)
226 ; CHECK-P9-NEXT: xvcvspsxws vs1, vs1
227 ; CHECK-P9-NEXT: xvcvspsxws vs0, vs0
228 ; CHECK-P9-NEXT: stxv vs0, 16(r3)
229 ; CHECK-P9-NEXT: stxv vs1, 0(r3)
232 ; CHECK-BE-LABEL: test8elt_signed:
233 ; CHECK-BE: # %bb.0: # %entry
234 ; CHECK-BE-NEXT: lxv vs0, 16(r4)
235 ; CHECK-BE-NEXT: lxv vs1, 0(r4)
236 ; CHECK-BE-NEXT: xvcvspsxws vs1, vs1
237 ; CHECK-BE-NEXT: xvcvspsxws vs0, vs0
238 ; CHECK-BE-NEXT: stxv vs0, 16(r3)
239 ; CHECK-BE-NEXT: stxv vs1, 0(r3)
242 %a = load <8 x float>, <8 x float>* %0, align 32
243 %1 = fptosi <8 x float> %a to <8 x i32>
244 store <8 x i32> %1, <8 x i32>* %agg.result, align 32
248 define void @test16elt_signed(<16 x i32>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #2 {
249 ; CHECK-P8-LABEL: test16elt_signed:
250 ; CHECK-P8: # %bb.0: # %entry
251 ; CHECK-P8-NEXT: li r5, 16
252 ; CHECK-P8-NEXT: li r6, 32
253 ; CHECK-P8-NEXT: li r7, 48
254 ; CHECK-P8-NEXT: lvx v5, 0, r4
255 ; CHECK-P8-NEXT: lvx v2, r4, r5
256 ; CHECK-P8-NEXT: lvx v3, r4, r6
257 ; CHECK-P8-NEXT: lvx v4, r4, r7
258 ; CHECK-P8-NEXT: xvcvspsxws v5, v5
259 ; CHECK-P8-NEXT: xvcvspsxws v2, v2
260 ; CHECK-P8-NEXT: xvcvspsxws v3, v3
261 ; CHECK-P8-NEXT: xvcvspsxws v4, v4
262 ; CHECK-P8-NEXT: stvx v5, 0, r3
263 ; CHECK-P8-NEXT: stvx v2, r3, r5
264 ; CHECK-P8-NEXT: stvx v3, r3, r6
265 ; CHECK-P8-NEXT: stvx v4, r3, r7
268 ; CHECK-P9-LABEL: test16elt_signed:
269 ; CHECK-P9: # %bb.0: # %entry
270 ; CHECK-P9-NEXT: lxv vs0, 48(r4)
271 ; CHECK-P9-NEXT: lxv vs1, 32(r4)
272 ; CHECK-P9-NEXT: lxv vs2, 16(r4)
273 ; CHECK-P9-NEXT: lxv vs3, 0(r4)
274 ; CHECK-P9-NEXT: xvcvspsxws vs3, vs3
275 ; CHECK-P9-NEXT: xvcvspsxws vs2, vs2
276 ; CHECK-P9-NEXT: xvcvspsxws vs1, vs1
277 ; CHECK-P9-NEXT: xvcvspsxws vs0, vs0
278 ; CHECK-P9-NEXT: stxv vs0, 48(r3)
279 ; CHECK-P9-NEXT: stxv vs1, 32(r3)
280 ; CHECK-P9-NEXT: stxv vs2, 16(r3)
281 ; CHECK-P9-NEXT: stxv vs3, 0(r3)
284 ; CHECK-BE-LABEL: test16elt_signed:
285 ; CHECK-BE: # %bb.0: # %entry
286 ; CHECK-BE-NEXT: lxv vs0, 48(r4)
287 ; CHECK-BE-NEXT: lxv vs1, 32(r4)
288 ; CHECK-BE-NEXT: lxv vs2, 16(r4)
289 ; CHECK-BE-NEXT: lxv vs3, 0(r4)
290 ; CHECK-BE-NEXT: xvcvspsxws vs3, vs3
291 ; CHECK-BE-NEXT: xvcvspsxws vs2, vs2
292 ; CHECK-BE-NEXT: xvcvspsxws vs1, vs1
293 ; CHECK-BE-NEXT: xvcvspsxws vs0, vs0
294 ; CHECK-BE-NEXT: stxv vs0, 48(r3)
295 ; CHECK-BE-NEXT: stxv vs1, 32(r3)
296 ; CHECK-BE-NEXT: stxv vs2, 16(r3)
297 ; CHECK-BE-NEXT: stxv vs3, 0(r3)
300 %a = load <16 x float>, <16 x float>* %0, align 64
301 %1 = fptosi <16 x float> %a to <16 x i32>
302 store <16 x i32> %1, <16 x i32>* %agg.result, align 64