1 ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
2 ; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
3 ; RUN: -check-prefix=P9BE -implicit-check-not frsp
4 ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
5 ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
6 ; RUN: -check-prefix=P9LE -implicit-check-not frsp
7 ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
8 ; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
9 ; RUN: -check-prefix=P8BE -implicit-check-not frsp
10 ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
11 ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
12 ; RUN: -check-prefix=P8LE -implicit-check-not frsp
14 ; This test case comes from the following C test case (included as it may be
15 ; slightly more readable than the LLVM IR.
17 ;/* This test case provides various ways of building vectors to ensure we
18 ; produce optimal code for all cases. The cases are (for each type):
21 ; - Splat of a constant
22 ; - From different values already in registers
23 ; - From different constants
24 ; - From different values in memory
25 ; - Splat of a value in register
26 ; - Splat of a value in memory
27 ; - Inserting element into existing vector
28 ; - Inserting element from existing vector into existing vector
30 ; With conversions (float <-> int)
31 ; - Splat of a constant
32 ; - From different values already in registers
33 ; - From different constants
34 ; - From different values in memory
35 ; - Splat of a value in register
36 ; - Splat of a value in memory
37 ; - Inserting element into existing vector
38 ; - Inserting element from existing vector into existing vector
41 ;/*=================================== int ===================================*/
44 ;vector int allZeroi() { //
45 ; return (vector int)0; //
47 ;// P8: vspltisb -1 //
48 ;// P9: xxspltisb 255 //
49 ;vector int allOnei() { //
50 ; return (vector int)-1; //
54 ;vector int spltConst1i() { //
55 ; return (vector int)1; //
57 ;// P8: vspltisw -15; vsrw //
58 ;// P9: vspltisw -15; vsrw //
59 ;vector int spltConst16ki() { //
60 ; return (vector int)((1<<15) - 1); //
62 ;// P8: vspltisw -16; vsrw //
63 ;// P9: vspltisw -16; vsrw //
64 ;vector int spltConst32ki() { //
65 ; return (vector int)((1<<16) - 1); //
67 ;// P8: 4 x mtvsrwz, 2 x xxmrgh, vmrgow //
68 ;// P9: 2 x mtvsrdd, vmrgow //
69 ;vector int fromRegsi(int a, int b, int c, int d) { //
70 ; return (vector int){ a, b, c, d }; //
72 ;// P8: lxvd2x, xxswapd //
73 ;// P9: lxvx (or even lxv) //
74 ;vector int fromDiffConstsi() { //
75 ; return (vector int) { 242, -113, 889, 19 }; //
77 ;// P8: lxvd2x, xxswapd //
79 ;vector int fromDiffMemConsAi(int *arr) { //
80 ; return (vector int) { arr[0], arr[1], arr[2], arr[3] }; //
82 ;// P8: 2 x lxvd2x, 2 x xxswapd, vperm //
83 ;// P9: 2 x lxvx, vperm //
84 ;vector int fromDiffMemConsDi(int *arr) { //
85 ; return (vector int) { arr[3], arr[2], arr[1], arr[0] }; //
87 ;// P8: sldi 2, lxvd2x, xxswapd //
88 ;// P9: sldi 2, lxvx //
89 ;vector int fromDiffMemVarAi(int *arr, int elem) { //
90 ; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; //
92 ;// P8: sldi 2, 2 x lxvd2x, 2 x xxswapd, vperm //
93 ;// P9: sldi 2, 2 x lxvx, vperm //
94 ;vector int fromDiffMemVarDi(int *arr, int elem) { //
95 ; return (vector int) { arr[elem], arr[elem-1], arr[elem-2], arr[elem-3] }; //
97 ;// P8: 4 x lwz, 4 x mtvsrwz, 2 x xxmrghd, vmrgow //
98 ;// P9: 4 x lwz, 2 x mtvsrdd, vmrgow //
99 ;vector int fromRandMemConsi(int *arr) { //
100 ; return (vector int) { arr[4], arr[18], arr[2], arr[88] }; //
102 ;// P8: sldi 2, 4 x lwz, 4 x mtvsrwz, 2 x xxmrghd, vmrgow //
103 ;// P9: sldi 2, add, 4 x lwz, 2 x mtvsrdd, vmrgow //
104 ;vector int fromRandMemVari(int *arr, int elem) { //
105 ; return (vector int) { arr[elem+4], arr[elem+1], arr[elem+2], arr[elem+8] };//
107 ;// P8: mtvsrwz, xxspltw //
109 ;vector int spltRegVali(int val) { //
110 ; return (vector int) val; //
112 ;// P8: lxsiwax, xxspltw //
114 ;vector int spltMemVali(int *ptr) { //
115 ; return (vector int)*ptr; //
119 ;vector int spltCnstConvftoi() { //
120 ; return (vector int) 4.74f; //
122 ;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
123 ;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvdpsxws //
124 ;vector int fromRegsConvftoi(float a, float b, float c, float d) { //
125 ; return (vector int) { a, b, c, d }; //
127 ;// P8: lxvd2x, xxswapd //
128 ;// P9: lxvx (even lxv) //
129 ;vector int fromDiffConstsConvftoi() { //
130 ; return (vector int) { 24.46f, 234.f, 988.19f, 422.39f }; //
132 ;// P8: lxvd2x, xxswapd, xvcvspsxws //
133 ;// P9: lxvx, xvcvspsxws //
134 ;vector int fromDiffMemConsAConvftoi(float *ptr) { //
135 ; return (vector int) { ptr[0], ptr[1], ptr[2], ptr[3] }; //
137 ;// P8: 2 x lxvd2x, 2 x xxswapd, vperm, xvcvspsxws //
138 ;// P9: 2 x lxvx, vperm, xvcvspsxws //
139 ;vector int fromDiffMemConsDConvftoi(float *ptr) { //
140 ; return (vector int) { ptr[3], ptr[2], ptr[1], ptr[0] }; //
142 ;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
143 ;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
144 ;// Note: if the consecutive loads learns to handle pre-inc, this can be: //
145 ;// sldi 2, load, xvcvspuxws //
146 ;vector int fromDiffMemVarAConvftoi(float *arr, int elem) { //
147 ; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; //
149 ;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
150 ;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
151 ;// Note: if the consecutive loads learns to handle pre-inc, this can be: //
152 ;// sldi 2, 2 x load, vperm, xvcvspuxws //
153 ;vector int fromDiffMemVarDConvftoi(float *arr, int elem) { //
154 ; return (vector int) { arr[elem], arr[elem-1], arr[elem-2], arr[elem-3] }; //
156 ;// P8: xscvdpsxws, xxspltw //
157 ;// P9: xscvdpsxws, xxspltw //
158 ;vector int spltRegValConvftoi(float val) { //
159 ; return (vector int) val; //
161 ;// P8: lxsspx, xscvdpsxws, xxspltw //
162 ;// P9: lxvwsx, xvcvspsxws //
163 ;vector int spltMemValConvftoi(float *ptr) { //
164 ; return (vector int)*ptr; //
168 ;vector int spltCnstConvdtoi() { //
169 ; return (vector int) 4.74; //
171 ;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
172 ;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
173 ;vector int fromRegsConvdtoi(double a, double b, double c, double d) { //
174 ; return (vector int) { a, b, c, d }; //
176 ;// P8: lxvd2x, xxswapd //
177 ;// P9: lxvx (even lxv) //
178 ;vector int fromDiffConstsConvdtoi() { //
179 ; return (vector int) { 24.46, 234., 988.19, 422.39 }; //
181 ;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, //
183 ;// P9: 2 x lxvx, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, //
185 ;vector int fromDiffMemConsAConvdtoi(double *ptr) { //
186 ; return (vector int) { ptr[0], ptr[1], ptr[2], ptr[3] }; //
188 ;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
189 ;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
190 ;vector int fromDiffMemConsDConvdtoi(double *ptr) { //
191 ; return (vector int) { ptr[3], ptr[2], ptr[1], ptr[0] }; //
193 ;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
194 ;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
195 ;vector int fromDiffMemVarAConvdtoi(double *arr, int elem) { //
196 ; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; //
198 ;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
199 ;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspsxws //
200 ;vector int fromDiffMemVarDConvdtoi(double *arr, int elem) { //
201 ; return (vector int) { arr[elem], arr[elem-1], arr[elem-2], arr[elem-3] }; //
203 ;// P8: xscvdpsxws, xxspltw //
204 ;// P9: xscvdpsxws, xxspltw //
205 ;vector int spltRegValConvdtoi(double val) { //
206 ; return (vector int) val; //
208 ;// P8: lxsdx, xscvdpsxws, xxspltw //
209 ;// P9: lxssp, xscvdpsxws, xxspltw //
210 ;vector int spltMemValConvdtoi(double *ptr) { //
211 ; return (vector int)*ptr; //
213 ;/*=================================== int ===================================*/
214 ;/*=============================== unsigned int ==============================*/
217 ;vector unsigned int allZeroui() { //
218 ; return (vector unsigned int)0; //
220 ;// P8: vspltisb -1 //
221 ;// P9: xxspltisb 255 //
222 ;vector unsigned int allOneui() { //
223 ; return (vector unsigned int)-1; //
225 ;// P8: vspltisw 1 //
226 ;// P9: vspltisw 1 //
227 ;vector unsigned int spltConst1ui() { //
228 ; return (vector unsigned int)1; //
230 ;// P8: vspltisw -15; vsrw //
231 ;// P9: vspltisw -15; vsrw //
232 ;vector unsigned int spltConst16kui() { //
233 ; return (vector unsigned int)((1<<15) - 1); //
235 ;// P8: vspltisw -16; vsrw //
236 ;// P9: vspltisw -16; vsrw //
237 ;vector unsigned int spltConst32kui() { //
238 ; return (vector unsigned int)((1<<16) - 1); //
240 ;// P8: 4 x mtvsrwz, 2 x xxmrghd, vmrgow //
241 ;// P9: 2 x mtvsrdd, vmrgow //
242 ;vector unsigned int fromRegsui(unsigned int a, unsigned int b, //
243 ; unsigned int c, unsigned int d) { //
244 ; return (vector unsigned int){ a, b, c, d }; //
246 ;// P8: lxvd2x, xxswapd //
247 ;// P9: lxvx (or even lxv) //
248 ;vector unsigned int fromDiffConstsui() { //
249 ; return (vector unsigned int) { 242, -113, 889, 19 }; //
251 ;// P8: lxvd2x, xxswapd //
253 ;vector unsigned int fromDiffMemConsAui(unsigned int *arr) { //
254 ; return (vector unsigned int) { arr[0], arr[1], arr[2], arr[3] }; //
256 ;// P8: 2 x lxvd2x, 2 x xxswapd, vperm //
257 ;// P9: 2 x lxvx, vperm //
258 ;vector unsigned int fromDiffMemConsDui(unsigned int *arr) { //
259 ; return (vector unsigned int) { arr[3], arr[2], arr[1], arr[0] }; //
261 ;// P8: sldi 2, lxvd2x, xxswapd //
262 ;// P9: sldi 2, lxvx //
263 ;vector unsigned int fromDiffMemVarAui(unsigned int *arr, int elem) { //
264 ; return (vector unsigned int) { arr[elem], arr[elem+1], //
265 ; arr[elem+2], arr[elem+3] }; //
267 ;// P8: sldi 2, 2 x lxvd2x, 2 x xxswapd, vperm //
268 ;// P9: sldi 2, 2 x lxvx, vperm //
269 ;vector unsigned int fromDiffMemVarDui(unsigned int *arr, int elem) { //
270 ; return (vector unsigned int) { arr[elem], arr[elem-1], //
271 ; arr[elem-2], arr[elem-3] }; //
273 ;// P8: 4 x lwz, 4 x mtvsrwz, 2 x xxmrghd, vmrgow //
274 ;// P9: 4 x lwz, 2 x mtvsrdd, vmrgow //
275 ;vector unsigned int fromRandMemConsui(unsigned int *arr) { //
276 ; return (vector unsigned int) { arr[4], arr[18], arr[2], arr[88] }; //
278 ;// P8: sldi 2, 4 x lwz, 4 x mtvsrwz, 2 x xxmrghd, vmrgow //
279 ;// P9: sldi 2, add, 4 x lwz, 2 x mtvsrdd, vmrgow //
280 ;vector unsigned int fromRandMemVarui(unsigned int *arr, int elem) { //
281 ; return (vector unsigned int) { arr[elem+4], arr[elem+1], //
282 ; arr[elem+2], arr[elem+8] }; //
284 ;// P8: mtvsrwz, xxspltw //
286 ;vector unsigned int spltRegValui(unsigned int val) { //
287 ; return (vector unsigned int) val; //
289 ;// P8: lxsiwax, xxspltw //
291 ;vector unsigned int spltMemValui(unsigned int *ptr) { //
292 ; return (vector unsigned int)*ptr; //
296 ;vector unsigned int spltCnstConvftoui() { //
297 ; return (vector unsigned int) 4.74f; //
299 ;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
300 ;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
301 ;vector unsigned int fromRegsConvftoui(float a, float b, float c, float d) { //
302 ; return (vector unsigned int) { a, b, c, d }; //
304 ;// P8: lxvd2x, xxswapd //
305 ;// P9: lxvx (even lxv) //
306 ;vector unsigned int fromDiffConstsConvftoui() { //
307 ; return (vector unsigned int) { 24.46f, 234.f, 988.19f, 422.39f }; //
309 ;// P8: lxvd2x, xxswapd, xvcvspuxws //
310 ;// P9: lxvx, xvcvspuxws //
311 ;vector unsigned int fromDiffMemConsAConvftoui(float *ptr) { //
312 ; return (vector unsigned int) { ptr[0], ptr[1], ptr[2], ptr[3] }; //
314 ;// P8: 2 x lxvd2x, 2 x xxswapd, vperm, xvcvspuxws //
315 ;// P9: 2 x lxvx, vperm, xvcvspuxws //
316 ;vector unsigned int fromDiffMemConsDConvftoui(float *ptr) { //
317 ; return (vector unsigned int) { ptr[3], ptr[2], ptr[1], ptr[0] }; //
319 ;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
320 ;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
321 ;// Note: if the consecutive loads learns to handle pre-inc, this can be: //
322 ;// sldi 2, load, xvcvspuxws //
323 ;vector unsigned int fromDiffMemVarAConvftoui(float *arr, int elem) { //
324 ; return (vector unsigned int) { arr[elem], arr[elem+1], //
325 ; arr[elem+2], arr[elem+3] }; //
327 ;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
328 ;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
329 ;// Note: if the consecutive loads learns to handle pre-inc, this can be: //
330 ;// sldi 2, 2 x load, vperm, xvcvspuxws //
331 ;vector unsigned int fromDiffMemVarDConvftoui(float *arr, int elem) { //
332 ; return (vector unsigned int) { arr[elem], arr[elem-1], //
333 ; arr[elem-2], arr[elem-3] }; //
335 ;// P8: xscvdpuxws, xxspltw //
336 ;// P9: xscvdpuxws, xxspltw //
337 ;vector unsigned int spltRegValConvftoui(float val) { //
338 ; return (vector unsigned int) val; //
340 ;// P8: lxsspx, xscvdpuxws, xxspltw //
341 ;// P9: lxvwsx, xvcvspuxws //
342 ;vector unsigned int spltMemValConvftoui(float *ptr) { //
343 ; return (vector unsigned int)*ptr; //
347 ;vector unsigned int spltCnstConvdtoui() { //
348 ; return (vector unsigned int) 4.74; //
350 ;// P8: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
351 ;// P9: 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
352 ;vector unsigned int fromRegsConvdtoui(double a, double b, //
353 ; double c, double d) { //
354 ; return (vector unsigned int) { a, b, c, d }; //
356 ;// P8: lxvd2x, xxswapd //
357 ;// P9: lxvx (even lxv) //
358 ;vector unsigned int fromDiffConstsConvdtoui() { //
359 ; return (vector unsigned int) { 24.46, 234., 988.19, 422.39 }; //
361 ;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, //
363 ;// P9: 2 x lxvx, xxmrgld, xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
364 ;vector unsigned int fromDiffMemConsAConvdtoui(double *ptr) { //
365 ; return (vector unsigned int) { ptr[0], ptr[1], ptr[2], ptr[3] }; //
367 ;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
368 ;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
369 ;vector unsigned int fromDiffMemConsDConvdtoui(double *ptr) { //
370 ; return (vector unsigned int) { ptr[3], ptr[2], ptr[1], ptr[0] }; //
372 ;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
373 ;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
374 ;vector unsigned int fromDiffMemVarAConvdtoui(double *arr, int elem) { //
375 ; return (vector unsigned int) { arr[elem], arr[elem+1], //
376 ; arr[elem+2], arr[elem+3] }; //
378 ;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
379 ;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvdpsp, vmrgew, xvcvspuxws //
380 ;vector unsigned int fromDiffMemVarDConvdtoui(double *arr, int elem) { //
381 ; return (vector unsigned int) { arr[elem], arr[elem-1], //
382 ; arr[elem-2], arr[elem-3] }; //
384 ;// P8: xscvdpuxws, xxspltw //
385 ;// P9: xscvdpuxws, xxspltw //
386 ;vector unsigned int spltRegValConvdtoui(double val) { //
387 ; return (vector unsigned int) val; //
389 ;// P8: lxsspx, xscvdpuxws, xxspltw //
390 ;// P9: lfd, xscvdpuxws, xxspltw //
391 ;vector unsigned int spltMemValConvdtoui(double *ptr) { //
392 ; return (vector unsigned int)*ptr; //
394 ;/*=============================== unsigned int ==============================*/
395 ;/*=============================== long long =================================*/
398 ;vector long long allZeroll() { //
399 ; return (vector long long)0; //
401 ;// P8: vspltisb -1 //
402 ;// P9: xxspltisb 255 //
403 ;vector long long allOnell() { //
404 ; return (vector long long)-1; //
406 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
407 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
408 ;vector long long spltConst1ll() { //
409 ; return (vector long long)1; //
411 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) //
412 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) //
413 ;vector long long spltConst16kll() { //
414 ; return (vector long long)((1<<15) - 1); //
416 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) //
417 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) //
418 ;vector long long spltConst32kll() { //
419 ; return (vector long long)((1<<16) - 1); //
421 ;// P8: 2 x mtvsrd, xxmrghd //
423 ;vector long long fromRegsll(long long a, long long b) { //
424 ; return (vector long long){ a, b }; //
426 ;// P8: lxvd2x, xxswapd //
427 ;// P9: lxvx (or even lxv) //
428 ;vector long long fromDiffConstsll() { //
429 ; return (vector long long) { 242, -113 }; //
431 ;// P8: lxvd2x, xxswapd //
433 ;vector long long fromDiffMemConsAll(long long *arr) { //
434 ; return (vector long long) { arr[0], arr[1] }; //
437 ;// P9: lxvx, xxswapd (maybe just use lxvd2x) //
438 ;vector long long fromDiffMemConsDll(long long *arr) { //
439 ; return (vector long long) { arr[3], arr[2] }; //
441 ;// P8: sldi 3, lxvd2x, xxswapd //
442 ;// P9: sldi 3, lxvx //
443 ;vector long long fromDiffMemVarAll(long long *arr, int elem) { //
444 ; return (vector long long) { arr[elem], arr[elem+1] }; //
446 ;// P8: sldi 3, lxvd2x //
447 ;// P9: sldi 3, lxvx, xxswapd (maybe just use lxvd2x) //
448 ;vector long long fromDiffMemVarDll(long long *arr, int elem) { //
449 ; return (vector long long) { arr[elem], arr[elem-1] }; //
451 ;// P8: 2 x ld, 2 x mtvsrd, xxmrghd //
452 ;// P9: 2 x ld, mtvsrdd //
453 ;vector long long fromRandMemConsll(long long *arr) { //
454 ; return (vector long long) { arr[4], arr[18] }; //
456 ;// P8: sldi 3, add, 2 x ld, 2 x mtvsrd, xxmrghd //
457 ;// P9: sldi 3, add, 2 x ld, mtvsrdd //
458 ;vector long long fromRandMemVarll(long long *arr, int elem) { //
459 ; return (vector long long) { arr[elem+4], arr[elem+1] }; //
461 ;// P8: mtvsrd, xxspltd //
463 ;vector long long spltRegValll(long long val) { //
464 ; return (vector long long) val; //
468 ;vector long long spltMemValll(long long *ptr) { //
469 ; return (vector long long)*ptr; //
471 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
472 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
473 ;vector long long spltCnstConvftoll() { //
474 ; return (vector long long) 4.74f; //
476 ;// P8: xxmrghd, xvcvdpsxds //
477 ;// P9: xxmrghd, xvcvdpsxds //
478 ;vector long long fromRegsConvftoll(float a, float b) { //
479 ; return (vector long long) { a, b }; //
481 ;// P8: lxvd2x, xxswapd //
482 ;// P9: lxvx (even lxv) //
483 ;vector long long fromDiffConstsConvftoll() { //
484 ; return (vector long long) { 24.46f, 234.f }; //
486 ;// P8: 2 x lxsspx, xxmrghd, xvcvdpsxds //
487 ;// P9: 2 x lxssp, xxmrghd, xvcvdpsxds //
488 ;vector long long fromDiffMemConsAConvftoll(float *ptr) { //
489 ; return (vector long long) { ptr[0], ptr[1] }; //
491 ;// P8: 2 x lxsspx, xxmrghd, xvcvdpsxds //
492 ;// P9: 2 x lxssp, xxmrghd, xvcvdpsxds //
493 ;vector long long fromDiffMemConsDConvftoll(float *ptr) { //
494 ; return (vector long long) { ptr[3], ptr[2] }; //
496 ;// P8: sldi 2, lfsux, lxsspx, xxmrghd, xvcvdpsxds //
497 ;// P9: sldi 2, lfsux, lfs, xxmrghd, xvcvdpsxds //
498 ;vector long long fromDiffMemVarAConvftoll(float *arr, int elem) { //
499 ; return (vector long long) { arr[elem], arr[elem+1] }; //
501 ;// P8: sldi 2, lfsux, lxsspx, xxmrghd, xvcvdpsxds //
502 ;// P9: sldi 2, lfsux, lfs, xxmrghd, xvcvdpsxds //
503 ;vector long long fromDiffMemVarDConvftoll(float *arr, int elem) { //
504 ; return (vector long long) { arr[elem], arr[elem-1] }; //
506 ;// P8: xscvdpsxds, xxspltd //
507 ;// P9: xscvdpsxds, xxspltd //
508 ;vector long long spltRegValConvftoll(float val) { //
509 ; return (vector long long) val; //
511 ;// P8: lxsspx, xscvdpsxds, xxspltd //
512 ;// P9: lfs, xscvdpsxds, xxspltd //
513 ;vector long long spltMemValConvftoll(float *ptr) { //
514 ; return (vector long long)*ptr; //
516 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
517 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
518 ;vector long long spltCnstConvdtoll() { //
519 ; return (vector long long) 4.74; //
521 ;// P8: xxmrghd, xvcvdpsxds //
522 ;// P9: xxmrghd, xvcvdpsxds //
523 ;vector long long fromRegsConvdtoll(double a, double b) { //
524 ; return (vector long long) { a, b }; //
526 ;// P8: lxvd2x, xxswapd //
527 ;// P9: lxvx (even lxv) //
528 ;vector long long fromDiffConstsConvdtoll() { //
529 ; return (vector long long) { 24.46, 234. }; //
531 ;// P8: lxvd2x, xxswapd, xvcvdpsxds //
532 ;// P9: lxvx, xvcvdpsxds //
533 ;vector long long fromDiffMemConsAConvdtoll(double *ptr) { //
534 ; return (vector long long) { ptr[0], ptr[1] }; //
536 ;// P8: lxvd2x, xvcvdpsxds //
537 ;// P9: lxvx, xxswapd, xvcvdpsxds //
538 ;vector long long fromDiffMemConsDConvdtoll(double *ptr) { //
539 ; return (vector long long) { ptr[3], ptr[2] }; //
541 ;// P8: sldi 3, lxvd2x, xxswapd, xvcvdpsxds //
542 ;// P9: sldi 3, lxvx, xvcvdpsxds //
543 ;vector long long fromDiffMemVarAConvdtoll(double *arr, int elem) { //
544 ; return (vector long long) { arr[elem], arr[elem+1] }; //
546 ;// P8: sldi 3, lxvd2x, xvcvdpsxds //
547 ;// P9: sldi 3, lxvx, xxswapd, xvcvdpsxds //
548 ;vector long long fromDiffMemVarDConvdtoll(double *arr, int elem) { //
549 ; return (vector long long) { arr[elem], arr[elem-1] }; //
551 ;// P8: xscvdpsxds, xxspltd //
552 ;// P9: xscvdpsxds, xxspltd //
553 ;vector long long spltRegValConvdtoll(double val) { //
554 ; return (vector long long) val; //
556 ;// P8: lxvdsx, xvcvdpsxds //
557 ;// P9: lxvdsx, xvcvdpsxds //
558 ;vector long long spltMemValConvdtoll(double *ptr) { //
559 ; return (vector long long)*ptr; //
561 ;/*=============================== long long =================================*/
562 ;/*========================== unsigned long long =============================*/
565 ;vector unsigned long long allZeroull() { //
566 ; return (vector unsigned long long)0; //
568 ;// P8: vspltisb -1 //
569 ;// P9: xxspltisb 255 //
570 ;vector unsigned long long allOneull() { //
571 ; return (vector unsigned long long)-1; //
573 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
574 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
575 ;vector unsigned long long spltConst1ull() { //
576 ; return (vector unsigned long long)1; //
578 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) //
579 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) //
580 ;vector unsigned long long spltConst16kull() { //
581 ; return (vector unsigned long long)((1<<15) - 1); //
583 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) //
584 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) //
585 ;vector unsigned long long spltConst32kull() { //
586 ; return (vector unsigned long long)((1<<16) - 1); //
588 ;// P8: 2 x mtvsrd, xxmrghd //
590 ;vector unsigned long long fromRegsull(unsigned long long a, //
591 ; unsigned long long b) { //
592 ; return (vector unsigned long long){ a, b }; //
594 ;// P8: lxvd2x, xxswapd //
595 ;// P9: lxvx (or even lxv) //
596 ;vector unsigned long long fromDiffConstsull() { //
597 ; return (vector unsigned long long) { 242, -113 }; //
599 ;// P8: lxvd2x, xxswapd //
601 ;vector unsigned long long fromDiffMemConsAull(unsigned long long *arr) { //
602 ; return (vector unsigned long long) { arr[0], arr[1] }; //
605 ;// P9: lxvx, xxswapd (maybe just use lxvd2x) //
606 ;vector unsigned long long fromDiffMemConsDull(unsigned long long *arr) { //
607 ; return (vector unsigned long long) { arr[3], arr[2] }; //
609 ;// P8: sldi 3, lxvd2x, xxswapd //
610 ;// P9: sldi 3, lxvx //
611 ;vector unsigned long long fromDiffMemVarAull(unsigned long long *arr, //
613 ; return (vector unsigned long long) { arr[elem], arr[elem+1] }; //
615 ;// P8: sldi 3, lxvd2x //
616 ;// P9: sldi 3, lxvx, xxswapd (maybe just use lxvd2x) //
617 ;vector unsigned long long fromDiffMemVarDull(unsigned long long *arr, //
619 ; return (vector unsigned long long) { arr[elem], arr[elem-1] }; //
621 ;// P8: 2 x ld, 2 x mtvsrd, xxmrghd //
622 ;// P9: 2 x ld, mtvsrdd //
623 ;vector unsigned long long fromRandMemConsull(unsigned long long *arr) { //
624 ; return (vector unsigned long long) { arr[4], arr[18] }; //
626 ;// P8: sldi 3, add, 2 x ld, 2 x mtvsrd, xxmrghd //
627 ;// P9: sldi 3, add, 2 x ld, mtvsrdd //
628 ;vector unsigned long long fromRandMemVarull(unsigned long long *arr, //
630 ; return (vector unsigned long long) { arr[elem+4], arr[elem+1] }; //
632 ;// P8: mtvsrd, xxspltd //
634 ;vector unsigned long long spltRegValull(unsigned long long val) { //
635 ; return (vector unsigned long long) val; //
639 ;vector unsigned long long spltMemValull(unsigned long long *ptr) { //
640 ; return (vector unsigned long long)*ptr; //
642 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
643 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
644 ;vector unsigned long long spltCnstConvftoull() { //
645 ; return (vector unsigned long long) 4.74f; //
647 ;// P8: xxmrghd, xvcvdpuxds //
648 ;// P9: xxmrghd, xvcvdpuxds //
649 ;vector unsigned long long fromRegsConvftoull(float a, float b) { //
650 ; return (vector unsigned long long) { a, b }; //
652 ;// P8: lxvd2x, xxswapd //
653 ;// P9: lxvx (even lxv) //
654 ;vector unsigned long long fromDiffConstsConvftoull() { //
655 ; return (vector unsigned long long) { 24.46f, 234.f }; //
657 ;// P8: 2 x lxsspx, xxmrghd, xvcvdpuxds //
658 ;// P9: 2 x lxssp, xxmrghd, xvcvdpuxds //
659 ;vector unsigned long long fromDiffMemConsAConvftoull(float *ptr) { //
660 ; return (vector unsigned long long) { ptr[0], ptr[1] }; //
662 ;// P8: 2 x lxsspx, xxmrghd, xvcvdpuxds //
663 ;// P9: 2 x lxssp, xxmrghd, xvcvdpuxds //
664 ;vector unsigned long long fromDiffMemConsDConvftoull(float *ptr) { //
665 ; return (vector unsigned long long) { ptr[3], ptr[2] }; //
667 ;// P8: sldi 2, lfsux, lxsspx, xxmrghd, xvcvdpuxds //
668 ;// P9: sldi 2, lfsux, lfs, xxmrghd, xvcvdpuxds //
669 ;vector unsigned long long fromDiffMemVarAConvftoull(float *arr, int elem) { //
670 ; return (vector unsigned long long) { arr[elem], arr[elem+1] }; //
672 ;// P8: sldi 2, lfsux, lxsspx, xxmrghd, xvcvdpuxds //
673 ;// P9: sldi 2, lfsux, lfs, xxmrghd, xvcvdpuxds //
674 ;vector unsigned long long fromDiffMemVarDConvftoull(float *arr, int elem) { //
675 ; return (vector unsigned long long) { arr[elem], arr[elem-1] }; //
677 ;// P8: xscvdpuxds, xxspltd //
678 ;// P9: xscvdpuxds, xxspltd //
679 ;vector unsigned long long spltRegValConvftoull(float val) { //
680 ; return (vector unsigned long long) val; //
682 ;// P8: lxsspx, xscvdpuxds, xxspltd //
683 ;// P9: lfs, xscvdpuxds, xxspltd //
684 ;vector unsigned long long spltMemValConvftoull(float *ptr) { //
685 ; return (vector unsigned long long)*ptr; //
687 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
688 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
689 ;vector unsigned long long spltCnstConvdtoull() { //
690 ; return (vector unsigned long long) 4.74; //
692 ;// P8: xxmrghd, xvcvdpuxds //
693 ;// P9: xxmrghd, xvcvdpuxds //
694 ;vector unsigned long long fromRegsConvdtoull(double a, double b) { //
695 ; return (vector unsigned long long) { a, b }; //
697 ;// P8: lxvd2x, xxswapd //
698 ;// P9: lxvx (even lxv) //
699 ;vector unsigned long long fromDiffConstsConvdtoull() { //
700 ; return (vector unsigned long long) { 24.46, 234. }; //
702 ;// P8: lxvd2x, xxswapd, xvcvdpuxds //
703 ;// P9: lxvx, xvcvdpuxds //
704 ;vector unsigned long long fromDiffMemConsAConvdtoull(double *ptr) { //
705 ; return (vector unsigned long long) { ptr[0], ptr[1] }; //
707 ;// P8: lxvd2x, xvcvdpuxds //
708 ;// P9: lxvx, xxswapd, xvcvdpuxds //
709 ;vector unsigned long long fromDiffMemConsDConvdtoull(double *ptr) { //
710 ; return (vector unsigned long long) { ptr[3], ptr[2] }; //
712 ;// P8: sldi 3, lxvd2x, xxswapd, xvcvdpuxds //
713 ;// P9: sldi 3, lxvx, xvcvdpuxds //
714 ;vector unsigned long long fromDiffMemVarAConvdtoull(double *arr, int elem) { //
715 ; return (vector unsigned long long) { arr[elem], arr[elem+1] }; //
717 ;// P8: sldi 3, lxvd2x, xvcvdpuxds //
718 ;// P9: sldi 3, lxvx, xxswapd, xvcvdpuxds //
719 ;vector unsigned long long fromDiffMemVarDConvdtoull(double *arr, int elem) { //
720 ; return (vector unsigned long long) { arr[elem], arr[elem-1] }; //
722 ;// P8: xscvdpuxds, xxspltd //
723 ;// P9: xscvdpuxds, xxspltd //
724 ;vector unsigned long long spltRegValConvdtoull(double val) { //
725 ; return (vector unsigned long long) val; //
727 ;// P8: lxvdsx, xvcvdpuxds //
728 ;// P9: lxvdsx, xvcvdpuxds //
729 ;vector unsigned long long spltMemValConvdtoull(double *ptr) { //
730 ; return (vector unsigned long long)*ptr; //
732 ;/*========================== unsigned long long ==============================*/
734 ; Function Attrs: norecurse nounwind readnone
735 define <4 x i32> @allZeroi() {
737 ret <4 x i32> zeroinitializer
738 ; P9BE-LABEL: allZeroi
739 ; P9LE-LABEL: allZeroi
740 ; P8BE-LABEL: allZeroi
741 ; P8LE-LABEL: allZeroi
742 ; P9BE: xxlxor v2, v2, v2
744 ; P9LE: xxlxor v2, v2, v2
746 ; P8BE: xxlxor v2, v2, v2
748 ; P8LE: xxlxor v2, v2, v2
752 ; Function Attrs: norecurse nounwind readnone
753 define <4 x i32> @allOnei() {
755 ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
756 ; P9BE-LABEL: allOnei
757 ; P9LE-LABEL: allOnei
758 ; P8BE-LABEL: allOnei
759 ; P8LE-LABEL: allOnei
760 ; P9BE: xxspltib v2, 255
762 ; P9LE: xxspltib v2, 255
764 ; P8BE: vspltisb v2, -1
766 ; P8LE: vspltisb v2, -1
770 ; Function Attrs: norecurse nounwind readnone
771 define <4 x i32> @spltConst1i() {
773 ret <4 x i32> <i32 1, i32 1, i32 1, i32 1>
774 ; P9BE-LABEL: spltConst1i
775 ; P9LE-LABEL: spltConst1i
776 ; P8BE-LABEL: spltConst1i
777 ; P8LE-LABEL: spltConst1i
778 ; P9BE: vspltisw v2, 1
780 ; P9LE: vspltisw v2, 1
782 ; P8BE: vspltisw v2, 1
784 ; P8LE: vspltisw v2, 1
788 ; Function Attrs: norecurse nounwind readnone
789 define <4 x i32> @spltConst16ki() {
791 ret <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
792 ; P9BE-LABEL: spltConst16ki
793 ; P9LE-LABEL: spltConst16ki
794 ; P8BE-LABEL: spltConst16ki
795 ; P8LE-LABEL: spltConst16ki
796 ; P9BE: vspltisw v2, -15
797 ; P9BE: vsrw v2, v2, v2
799 ; P9LE: vspltisw v2, -15
800 ; P9LE: vsrw v2, v2, v2
802 ; P8BE: vspltisw v2, -15
803 ; P8BE: vsrw v2, v2, v2
805 ; P8LE: vspltisw v2, -15
806 ; P8LE: vsrw v2, v2, v2
810 ; Function Attrs: norecurse nounwind readnone
811 define <4 x i32> @spltConst32ki() {
813 ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
814 ; P9BE-LABEL: spltConst32ki
815 ; P9LE-LABEL: spltConst32ki
816 ; P8BE-LABEL: spltConst32ki
817 ; P8LE-LABEL: spltConst32ki
818 ; P9BE: vspltisw v2, -16
819 ; P9BE: vsrw v2, v2, v2
821 ; P9LE: vspltisw v2, -16
822 ; P9LE: vsrw v2, v2, v2
824 ; P8BE: vspltisw v2, -16
825 ; P8BE: vsrw v2, v2, v2
827 ; P8LE: vspltisw v2, -16
828 ; P8LE: vsrw v2, v2, v2
832 ; Function Attrs: norecurse nounwind readnone
833 define <4 x i32> @fromRegsi(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d) {
835 %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0
836 %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
837 %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %c, i32 2
838 %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %d, i32 3
839 ret <4 x i32> %vecinit3
840 ; P9BE-LABEL: fromRegsi
841 ; P9LE-LABEL: fromRegsi
842 ; P8BE-LABEL: fromRegsi
843 ; P8LE-LABEL: fromRegsi
844 ; P9BE-DAG: mtvsrdd [[REG1:v[0-9]+]], r3, r5
845 ; P9BE-DAG: mtvsrdd [[REG2:v[0-9]+]], r4, r6
846 ; P9BE: vmrgow v2, [[REG1]], [[REG2]]
848 ; P9LE-DAG: mtvsrdd [[REG1:v[0-9]+]], r5, r3
849 ; P9LE-DAG: mtvsrdd [[REG2:v[0-9]+]], r6, r4
850 ; P9LE: vmrgow v2, [[REG2]], [[REG1]]
852 ; P8BE-DAG: mtvsrwz {{[vf]}}[[REG1:[0-9]+]], r3
853 ; P8BE-DAG: mtvsrwz {{[vf]}}[[REG2:[0-9]+]], r4
854 ; P8BE-DAG: mtvsrwz {{[vf]}}[[REG3:[0-9]+]], r5
855 ; P8BE-DAG: mtvsrwz {{[vf]}}[[REG4:[0-9]+]], r6
856 ; P8BE-DAG: xxmrghd [[REG5:v[0-9]+]], {{[v][s]*}}[[REG1]], {{[v][s]*}}[[REG3]]
857 ; P8BE-DAG: xxmrghd [[REG6:v[0-9]+]], {{[v][s]*}}[[REG2]], {{[v][s]*}}[[REG4]]
858 ; P8BE: vmrgow v2, [[REG5]], [[REG6]]
859 ; P8LE-DAG: mtvsrwz {{[vf]}}[[REG1:[0-9]+]], r3
860 ; P8LE-DAG: mtvsrwz {{[vf]}}[[REG2:[0-9]+]], r4
861 ; P8LE-DAG: mtvsrwz {{[vf]}}[[REG3:[0-9]+]], r5
862 ; P8LE-DAG: mtvsrwz {{[vf]}}[[REG4:[0-9]+]], r6
863 ; P8LE: xxmrghd [[REG5:v[0-9]+]], {{[v][s]*}}[[REG3]], {{[v][s]*}}[[REG1]]
864 ; P8LE: xxmrghd [[REG6:v[0-9]+]], {{[v][s]*}}[[REG4]], {{[v][s]*}}[[REG2]]
865 ; P8LE: vmrgow v2, [[REG6]], [[REG5]]
868 ; Function Attrs: norecurse nounwind readnone
869 define <4 x i32> @fromDiffConstsi() {
871 ret <4 x i32> <i32 242, i32 -113, i32 889, i32 19>
872 ; P9BE-LABEL: fromDiffConstsi
873 ; P9LE-LABEL: fromDiffConstsi
874 ; P8BE-LABEL: fromDiffConstsi
875 ; P8LE-LABEL: fromDiffConstsi
887 ; Function Attrs: norecurse nounwind readonly
888 define <4 x i32> @fromDiffMemConsAi(i32* nocapture readonly %arr) {
890 %0 = load i32, i32* %arr, align 4
891 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
892 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 1
893 %1 = load i32, i32* %arrayidx1, align 4
894 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
895 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 2
896 %2 = load i32, i32* %arrayidx3, align 4
897 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2
898 %arrayidx5 = getelementptr inbounds i32, i32* %arr, i64 3
899 %3 = load i32, i32* %arrayidx5, align 4
900 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
901 ret <4 x i32> %vecinit6
902 ; P9BE-LABEL: fromDiffMemConsAi
903 ; P9LE-LABEL: fromDiffMemConsAi
904 ; P8BE-LABEL: fromDiffMemConsAi
905 ; P8LE-LABEL: fromDiffMemConsAi
917 ; Function Attrs: norecurse nounwind readonly
918 define <4 x i32> @fromDiffMemConsDi(i32* nocapture readonly %arr) {
920 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3
921 %0 = load i32, i32* %arrayidx, align 4
922 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
923 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 2
924 %1 = load i32, i32* %arrayidx1, align 4
925 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
926 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 1
927 %2 = load i32, i32* %arrayidx3, align 4
928 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2
929 %3 = load i32, i32* %arr, align 4
930 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
931 ret <4 x i32> %vecinit6
932 ; P9BE-LABEL: fromDiffMemConsDi
933 ; P9LE-LABEL: fromDiffMemConsDi
934 ; P8BE-LABEL: fromDiffMemConsDi
935 ; P8LE-LABEL: fromDiffMemConsDi
955 ; Function Attrs: norecurse nounwind readonly
956 define <4 x i32> @fromDiffMemVarAi(i32* nocapture readonly %arr, i32 signext %elem) {
958 %idxprom = sext i32 %elem to i64
959 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom
960 %0 = load i32, i32* %arrayidx, align 4
961 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
962 %add = add nsw i32 %elem, 1
963 %idxprom1 = sext i32 %add to i64
964 %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1
965 %1 = load i32, i32* %arrayidx2, align 4
966 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
967 %add4 = add nsw i32 %elem, 2
968 %idxprom5 = sext i32 %add4 to i64
969 %arrayidx6 = getelementptr inbounds i32, i32* %arr, i64 %idxprom5
970 %2 = load i32, i32* %arrayidx6, align 4
971 %vecinit7 = insertelement <4 x i32> %vecinit3, i32 %2, i32 2
972 %add8 = add nsw i32 %elem, 3
973 %idxprom9 = sext i32 %add8 to i64
974 %arrayidx10 = getelementptr inbounds i32, i32* %arr, i64 %idxprom9
975 %3 = load i32, i32* %arrayidx10, align 4
976 %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3
977 ret <4 x i32> %vecinit11
978 ; P9BE-LABEL: fromDiffMemVarAi
979 ; P9LE-LABEL: fromDiffMemVarAi
980 ; P8BE-LABEL: fromDiffMemVarAi
981 ; P8LE-LABEL: fromDiffMemVarAi
982 ; P9BE: sldi r4, r4, 2
983 ; P9BE: lxvx v2, r3, r4
985 ; P9LE: sldi r4, r4, 2
986 ; P9LE: lxvx v2, r3, r4
988 ; P8BE: sldi r4, r4, 2
989 ; P8BE: lxvw4x {{[vs0-9]+}}, r3, r4
991 ; P8LE: sldi r4, r4, 2
992 ; P8LE: lxvd2x {{[vs0-9]+}}, r3, r4
997 ; Function Attrs: norecurse nounwind readonly
998 define <4 x i32> @fromDiffMemVarDi(i32* nocapture readonly %arr, i32 signext %elem) {
1000 %idxprom = sext i32 %elem to i64
1001 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom
1002 %0 = load i32, i32* %arrayidx, align 4
1003 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
1004 %sub = add nsw i32 %elem, -1
1005 %idxprom1 = sext i32 %sub to i64
1006 %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1
1007 %1 = load i32, i32* %arrayidx2, align 4
1008 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
1009 %sub4 = add nsw i32 %elem, -2
1010 %idxprom5 = sext i32 %sub4 to i64
1011 %arrayidx6 = getelementptr inbounds i32, i32* %arr, i64 %idxprom5
1012 %2 = load i32, i32* %arrayidx6, align 4
1013 %vecinit7 = insertelement <4 x i32> %vecinit3, i32 %2, i32 2
1014 %sub8 = add nsw i32 %elem, -3
1015 %idxprom9 = sext i32 %sub8 to i64
1016 %arrayidx10 = getelementptr inbounds i32, i32* %arr, i64 %idxprom9
1017 %3 = load i32, i32* %arrayidx10, align 4
1018 %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3
1019 ret <4 x i32> %vecinit11
1020 ; P9BE-LABEL: fromDiffMemVarDi
1021 ; P9LE-LABEL: fromDiffMemVarDi
1022 ; P8BE-LABEL: fromDiffMemVarDi
1023 ; P8LE-LABEL: fromDiffMemVarDi
1024 ; P9BE: sldi {{r[0-9]+}}, r4, 2
1025 ; P9BE-DAG: lxvx {{v[0-9]+}}
1029 ; P9LE: sldi {{r[0-9]+}}, r4, 2
1030 ; P9LE-DAG: lxvx {{v[0-9]+}}
1034 ; P8BE: sldi {{r[0-9]+}}, r4, 2
1035 ; P8BE-DAG: lxvw4x {{v[0-9]+}}, 0, r3
1039 ; P8LE: sldi {{r[0-9]+}}, r4, 2
1047 ; Function Attrs: norecurse nounwind readonly
1048 define <4 x i32> @fromRandMemConsi(i32* nocapture readonly %arr) {
1050 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 4
1051 %0 = load i32, i32* %arrayidx, align 4
1052 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
1053 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 18
1054 %1 = load i32, i32* %arrayidx1, align 4
1055 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
1056 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 2
1057 %2 = load i32, i32* %arrayidx3, align 4
1058 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2
1059 %arrayidx5 = getelementptr inbounds i32, i32* %arr, i64 88
1060 %3 = load i32, i32* %arrayidx5, align 4
1061 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
1062 ret <4 x i32> %vecinit6
1063 ; P9BE-LABEL: fromRandMemConsi
1064 ; P9LE-LABEL: fromRandMemConsi
1065 ; P8BE-LABEL: fromRandMemConsi
1066 ; P8LE-LABEL: fromRandMemConsi
1105 ; Function Attrs: norecurse nounwind readonly
1106 define <4 x i32> @fromRandMemVari(i32* nocapture readonly %arr, i32 signext %elem) {
1108 %add = add nsw i32 %elem, 4
1109 %idxprom = sext i32 %add to i64
1110 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom
1111 %0 = load i32, i32* %arrayidx, align 4
1112 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
1113 %add1 = add nsw i32 %elem, 1
1114 %idxprom2 = sext i32 %add1 to i64
1115 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 %idxprom2
1116 %1 = load i32, i32* %arrayidx3, align 4
1117 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
1118 %add5 = add nsw i32 %elem, 2
1119 %idxprom6 = sext i32 %add5 to i64
1120 %arrayidx7 = getelementptr inbounds i32, i32* %arr, i64 %idxprom6
1121 %2 = load i32, i32* %arrayidx7, align 4
1122 %vecinit8 = insertelement <4 x i32> %vecinit4, i32 %2, i32 2
1123 %add9 = add nsw i32 %elem, 8
1124 %idxprom10 = sext i32 %add9 to i64
1125 %arrayidx11 = getelementptr inbounds i32, i32* %arr, i64 %idxprom10
1126 %3 = load i32, i32* %arrayidx11, align 4
1127 %vecinit12 = insertelement <4 x i32> %vecinit8, i32 %3, i32 3
1128 ret <4 x i32> %vecinit12
1129 ; P9BE-LABEL: fromRandMemVari
1130 ; P9LE-LABEL: fromRandMemVari
1131 ; P8BE-LABEL: fromRandMemVari
1132 ; P8LE-LABEL: fromRandMemVari
1133 ; P9BE: sldi r4, r4, 2
1141 ; P9LE: sldi r4, r4, 2
1149 ; P8BE: sldi r4, r4, 2
1161 ; P8LE: sldi r4, r4, 2
1175 ; Function Attrs: norecurse nounwind readnone
1176 define <4 x i32> @spltRegVali(i32 signext %val) {
1178 %splat.splatinsert = insertelement <4 x i32> undef, i32 %val, i32 0
1179 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
1180 ret <4 x i32> %splat.splat
1181 ; P9BE-LABEL: spltRegVali
1182 ; P9LE-LABEL: spltRegVali
1183 ; P8BE-LABEL: spltRegVali
1184 ; P8LE-LABEL: spltRegVali
1185 ; P9BE: mtvsrws v2, r3
1187 ; P9LE: mtvsrws v2, r3
1189 ; P8BE: mtvsrwz {{[vsf0-9]+}}, r3
1190 ; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1
1192 ; P8LE: mtvsrwz {{[vsf0-9]+}}, r3
1193 ; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1
1197 ; Function Attrs: norecurse nounwind readonly
1198 define <4 x i32> @spltMemVali(i32* nocapture readonly %ptr) {
1200 %0 = load i32, i32* %ptr, align 4
1201 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
1202 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
1203 ret <4 x i32> %splat.splat
1204 ; P9BE-LABEL: spltMemVali
1205 ; P9LE-LABEL: spltMemVali
1206 ; P8BE-LABEL: spltMemVali
1207 ; P8LE-LABEL: spltMemVali
1208 ; P9BE: lxvwsx v2, 0, r3
1210 ; P9LE: lxvwsx v2, 0, r3
1212 ; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3
1213 ; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1
1215 ; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3
1216 ; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1
1220 ; Function Attrs: norecurse nounwind readnone
1221 define <4 x i32> @spltCnstConvftoi() {
1223 ret <4 x i32> <i32 4, i32 4, i32 4, i32 4>
1224 ; P9BE-LABEL: spltCnstConvftoi
1225 ; P9LE-LABEL: spltCnstConvftoi
1226 ; P8BE-LABEL: spltCnstConvftoi
1227 ; P8LE-LABEL: spltCnstConvftoi
1228 ; P9BE: vspltisw v2, 4
1230 ; P9LE: vspltisw v2, 4
1232 ; P8BE: vspltisw v2, 4
1234 ; P8LE: vspltisw v2, 4
1238 ; Function Attrs: norecurse nounwind readnone
1239 define <4 x i32> @fromRegsConvftoi(float %a, float %b, float %c, float %d) {
1241 %conv = fptosi float %a to i32
1242 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
1243 %conv1 = fptosi float %b to i32
1244 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %conv1, i32 1
1245 %conv3 = fptosi float %c to i32
1246 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %conv3, i32 2
1247 %conv5 = fptosi float %d to i32
1248 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3
1249 ret <4 x i32> %vecinit6
1250 ; P9BE-LABEL: fromRegsConvftoi
1251 ; P9LE-LABEL: fromRegsConvftoi
1252 ; P8BE-LABEL: fromRegsConvftoi
1253 ; P8LE-LABEL: fromRegsConvftoi
1254 ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
1255 ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
1256 ; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
1257 ; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
1258 ; P9BE: vmrgew v2, [[REG3]], [[REG4]]
1259 ; P9BE: xvcvspsxws v2, v2
1260 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
1261 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
1262 ; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
1263 ; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
1264 ; P9LE: vmrgew v2, [[REG4]], [[REG3]]
1265 ; P9LE: xvcvspsxws v2, v2
1266 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
1267 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
1268 ; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
1269 ; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
1270 ; P8BE: vmrgew v2, [[REG3]], [[REG4]]
1271 ; P8BE: xvcvspsxws v2, v2
1272 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
1273 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
1274 ; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
1275 ; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
1276 ; P8LE: vmrgew v2, [[REG4]], [[REG3]]
1277 ; P8LE: xvcvspsxws v2, v2
1280 ; Function Attrs: norecurse nounwind readnone
1281 define <4 x i32> @fromDiffConstsConvftoi() {
1283 ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
1284 ; P9BE-LABEL: fromDiffConstsConvftoi
1285 ; P9LE-LABEL: fromDiffConstsConvftoi
1286 ; P8BE-LABEL: fromDiffConstsConvftoi
1287 ; P8LE-LABEL: fromDiffConstsConvftoi
1299 ; Function Attrs: norecurse nounwind readonly
1300 define <4 x i32> @fromDiffMemConsAConvftoi(float* nocapture readonly %ptr) {
1302 %0 = bitcast float* %ptr to <4 x float>*
1303 %1 = load <4 x float>, <4 x float>* %0, align 4
1304 %2 = fptosi <4 x float> %1 to <4 x i32>
1306 ; P9BE-LABEL: fromDiffMemConsAConvftoi
1307 ; P9LE-LABEL: fromDiffMemConsAConvftoi
1308 ; P8BE-LABEL: fromDiffMemConsAConvftoi
1309 ; P8LE-LABEL: fromDiffMemConsAConvftoi
1310 ; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3)
1311 ; P9BE: xvcvspsxws v2, [[REG1]]
1313 ; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3)
1314 ; P9LE: xvcvspsxws v2, [[REG1]]
1316 ; P8BE: lxvw4x [[REG1:[vs0-9]+]], 0, r3
1317 ; P8BE: xvcvspsxws v2, [[REG1]]
1319 ; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
1321 ; P8LE: xvcvspsxws v2, v2
1325 ; Function Attrs: norecurse nounwind readonly
1326 define <4 x i32> @fromDiffMemConsDConvftoi(float* nocapture readonly %ptr) {
1328 %arrayidx = getelementptr inbounds float, float* %ptr, i64 3
1329 %0 = load float, float* %arrayidx, align 4
1330 %conv = fptosi float %0 to i32
1331 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
1332 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 2
1333 %1 = load float, float* %arrayidx1, align 4
1334 %conv2 = fptosi float %1 to i32
1335 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
1336 %arrayidx4 = getelementptr inbounds float, float* %ptr, i64 1
1337 %2 = load float, float* %arrayidx4, align 4
1338 %conv5 = fptosi float %2 to i32
1339 %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
1340 %3 = load float, float* %ptr, align 4
1341 %conv8 = fptosi float %3 to i32
1342 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
1343 ret <4 x i32> %vecinit9
1344 ; P9BE-LABEL: fromDiffMemConsDConvftoi
1345 ; P9LE-LABEL: fromDiffMemConsDConvftoi
1346 ; P8BE-LABEL: fromDiffMemConsDConvftoi
1347 ; P8LE-LABEL: fromDiffMemConsDConvftoi
1371 ; Function Attrs: norecurse nounwind readonly
1372 define <4 x i32> @fromDiffMemVarAConvftoi(float* nocapture readonly %arr, i32 signext %elem) {
1374 %idxprom = sext i32 %elem to i64
1375 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
1376 %0 = load float, float* %arrayidx, align 4
1377 %conv = fptosi float %0 to i32
1378 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
1379 %add = add nsw i32 %elem, 1
1380 %idxprom1 = sext i32 %add to i64
1381 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1
1382 %1 = load float, float* %arrayidx2, align 4
1383 %conv3 = fptosi float %1 to i32
1384 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1
1385 %add5 = add nsw i32 %elem, 2
1386 %idxprom6 = sext i32 %add5 to i64
1387 %arrayidx7 = getelementptr inbounds float, float* %arr, i64 %idxprom6
1388 %2 = load float, float* %arrayidx7, align 4
1389 %conv8 = fptosi float %2 to i32
1390 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2
1391 %add10 = add nsw i32 %elem, 3
1392 %idxprom11 = sext i32 %add10 to i64
1393 %arrayidx12 = getelementptr inbounds float, float* %arr, i64 %idxprom11
1394 %3 = load float, float* %arrayidx12, align 4
1395 %conv13 = fptosi float %3 to i32
1396 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
1397 ret <4 x i32> %vecinit14
1398 ; P9BE-LABEL: fromDiffMemVarAConvftoi
1399 ; P9LE-LABEL: fromDiffMemVarAConvftoi
1400 ; P8BE-LABEL: fromDiffMemVarAConvftoi
1401 ; P8LE-LABEL: fromDiffMemVarAConvftoi
1402 ; FIXME: implement finding consecutive loads with pre-inc
1409 ; Function Attrs: norecurse nounwind readonly
1410 define <4 x i32> @fromDiffMemVarDConvftoi(float* nocapture readonly %arr, i32 signext %elem) {
1412 %idxprom = sext i32 %elem to i64
1413 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
1414 %0 = load float, float* %arrayidx, align 4
1415 %conv = fptosi float %0 to i32
1416 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
1417 %sub = add nsw i32 %elem, -1
1418 %idxprom1 = sext i32 %sub to i64
1419 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1
1420 %1 = load float, float* %arrayidx2, align 4
1421 %conv3 = fptosi float %1 to i32
1422 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1
1423 %sub5 = add nsw i32 %elem, -2
1424 %idxprom6 = sext i32 %sub5 to i64
1425 %arrayidx7 = getelementptr inbounds float, float* %arr, i64 %idxprom6
1426 %2 = load float, float* %arrayidx7, align 4
1427 %conv8 = fptosi float %2 to i32
1428 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2
1429 %sub10 = add nsw i32 %elem, -3
1430 %idxprom11 = sext i32 %sub10 to i64
1431 %arrayidx12 = getelementptr inbounds float, float* %arr, i64 %idxprom11
1432 %3 = load float, float* %arrayidx12, align 4
1433 %conv13 = fptosi float %3 to i32
1434 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
1435 ret <4 x i32> %vecinit14
1436 ; P9BE-LABEL: fromDiffMemVarDConvftoi
1437 ; P9LE-LABEL: fromDiffMemVarDConvftoi
1438 ; P8BE-LABEL: fromDiffMemVarDConvftoi
1439 ; P8LE-LABEL: fromDiffMemVarDConvftoi
1440 ; FIXME: implement finding consecutive loads with pre-inc
1447 ; Function Attrs: norecurse nounwind readnone
1448 define <4 x i32> @spltRegValConvftoi(float %val) {
1450 %conv = fptosi float %val to i32
1451 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
1452 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
1453 ret <4 x i32> %splat.splat
1454 ; P9BE-LABEL: spltRegValConvftoi
1455 ; P9LE-LABEL: spltRegValConvftoi
1456 ; P8BE-LABEL: spltRegValConvftoi
1457 ; P8LE-LABEL: spltRegValConvftoi
1458 ; P9BE: xscvdpsxws f[[REG1:[0-9]+]], f1
1459 ; P9BE: xxspltw v2, vs[[REG1]], 1
1461 ; P9LE: xscvdpsxws f[[REG1:[0-9]+]], f1
1462 ; P9LE: xxspltw v2, vs[[REG1]], 1
1464 ; P8BE: xscvdpsxws f[[REG1:[0-9]+]], f1
1465 ; P8BE: xxspltw v2, vs[[REG1]], 1
1467 ; P8LE: xscvdpsxws f[[REG1:[0-9]+]], f1
1468 ; P8LE: xxspltw v2, vs[[REG1]], 1
1472 ; Function Attrs: norecurse nounwind readonly
1473 define <4 x i32> @spltMemValConvftoi(float* nocapture readonly %ptr) {
1475 %0 = load float, float* %ptr, align 4
1476 %conv = fptosi float %0 to i32
1477 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
1478 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
1479 ret <4 x i32> %splat.splat
1480 ; P9BE-LABEL: spltMemValConvftoi
1481 ; P9LE-LABEL: spltMemValConvftoi
1482 ; P8BE-LABEL: spltMemValConvftoi
1483 ; P8LE-LABEL: spltMemValConvftoi
1484 ; P9BE: lxvwsx [[REG1:[vs0-9]+]], 0, r3
1485 ; P9BE: xvcvspsxws v2, [[REG1]]
1486 ; P9LE: [[REG1:[vs0-9]+]], 0, r3
1487 ; P9LE: xvcvspsxws v2, [[REG1]]
1488 ; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3
1489 ; P8BE: xscvdpsxws f[[REG2:[0-9]+]], [[REG1]]
1490 ; P8BE: xxspltw v2, vs[[REG2]], 1
1491 ; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3
1492 ; P8LE: xscvdpsxws f[[REG2:[vs0-9]+]], [[REG1]]
1493 ; P8LE: xxspltw v2, vs[[REG2]], 1
1496 ; Function Attrs: norecurse nounwind readnone
1497 define <4 x i32> @spltCnstConvdtoi() {
1499 ret <4 x i32> <i32 4, i32 4, i32 4, i32 4>
1500 ; P9BE-LABEL: spltCnstConvdtoi
1501 ; P9LE-LABEL: spltCnstConvdtoi
1502 ; P8BE-LABEL: spltCnstConvdtoi
1503 ; P8LE-LABEL: spltCnstConvdtoi
1504 ; P9BE: vspltisw v2, 4
1506 ; P9LE: vspltisw v2, 4
1508 ; P8BE: vspltisw v2, 4
1510 ; P8LE: vspltisw v2, 4
1514 ; Function Attrs: norecurse nounwind readnone
1515 define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
1517 %conv = fptosi double %a to i32
1518 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
1519 %conv1 = fptosi double %b to i32
1520 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %conv1, i32 1
1521 %conv3 = fptosi double %c to i32
1522 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %conv3, i32 2
1523 %conv5 = fptosi double %d to i32
1524 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3
1525 ret <4 x i32> %vecinit6
1526 ; P9BE-LABEL: fromRegsConvdtoi
1527 ; P9LE-LABEL: fromRegsConvdtoi
1528 ; P8BE-LABEL: fromRegsConvdtoi
1529 ; P8LE-LABEL: fromRegsConvdtoi
1530 ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
1531 ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
1532 ; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
1533 ; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
1534 ; P9BE: vmrgew v2, [[REG3]], [[REG4]]
1535 ; P9BE: xvcvspsxws v2, v2
1536 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
1537 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
1538 ; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
1539 ; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
1540 ; P9LE: vmrgew v2, [[REG4]], [[REG3]]
1541 ; P9LE: xvcvspsxws v2, v2
1542 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
1543 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
1544 ; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
1545 ; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
1546 ; P8BE: vmrgew v2, [[REG3]], [[REG4]]
1547 ; P8BE: xvcvspsxws v2, v2
1548 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
1549 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
1550 ; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
1551 ; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
1552 ; P8LE: vmrgew v2, [[REG4]], [[REG3]]
1553 ; P8LE: xvcvspsxws v2, v2
1556 ; Function Attrs: norecurse nounwind readnone
1557 define <4 x i32> @fromDiffConstsConvdtoi() {
1559 ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
1560 ; P9BE-LABEL: fromDiffConstsConvdtoi
1561 ; P9LE-LABEL: fromDiffConstsConvdtoi
1562 ; P8BE-LABEL: fromDiffConstsConvdtoi
1563 ; P8LE-LABEL: fromDiffConstsConvdtoi
1575 ; Function Attrs: norecurse nounwind readonly
1576 define <4 x i32> @fromDiffMemConsAConvdtoi(double* nocapture readonly %ptr) {
1578 %0 = bitcast double* %ptr to <2 x double>*
1579 %1 = load <2 x double>, <2 x double>* %0, align 8
1580 %2 = fptosi <2 x double> %1 to <2 x i32>
1581 %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 2
1582 %3 = bitcast double* %arrayidx4 to <2 x double>*
1583 %4 = load <2 x double>, <2 x double>* %3, align 8
1584 %5 = fptosi <2 x double> %4 to <2 x i32>
1585 %vecinit9 = shufflevector <2 x i32> %2, <2 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1586 ret <4 x i32> %vecinit9
1587 ; P9BE-LABEL: fromDiffMemConsAConvdtoi
1588 ; P9LE-LABEL: fromDiffMemConsAConvdtoi
1589 ; P8BE-LABEL: fromDiffMemConsAConvdtoi
1590 ; P8LE-LABEL: fromDiffMemConsAConvdtoi
1591 ; P9BE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
1592 ; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
1593 ; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
1594 ; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
1595 ; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
1596 ; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
1597 ; P9BE: vmrgew v2, [[REG6]], [[REG5]]
1598 ; P9BE: xvcvspsxws v2, v2
1599 ; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
1600 ; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
1601 ; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]]
1602 ; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]]
1603 ; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
1604 ; P9LE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
1605 ; P9LE: vmrgew v2, [[REG6]], [[REG5]]
1606 ; P9LE: xvcvspsxws v2, v2
1607 ; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
1608 ; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
1609 ; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
1610 ; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
1611 ; P8BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
1612 ; P8BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
1613 ; P8BE: vmrgew v2, [[REG6]], [[REG5]]
1614 ; P8BE: xvcvspsxws v2, v2
1615 ; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
1616 ; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
1617 ; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]]
1618 ; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]]
1619 ; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]]
1620 ; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]]
1621 ; P8LE-DAG: xvcvdpsp [[REG7:[vs0-9]+]], [[REG5]]
1622 ; P8LE-DAG: xvcvdpsp [[REG8:[vs0-9]+]], [[REG6]]
1623 ; P8LE: vmrgew v2, [[REG8]], [[REG7]]
1624 ; P8LE: xvcvspsxws v2, v2
1627 ; Function Attrs: norecurse nounwind readonly
1628 define <4 x i32> @fromDiffMemConsDConvdtoi(double* nocapture readonly %ptr) {
1630 %arrayidx = getelementptr inbounds double, double* %ptr, i64 3
1631 %0 = load double, double* %arrayidx, align 8
1632 %conv = fptosi double %0 to i32
1633 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
1634 %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 2
1635 %1 = load double, double* %arrayidx1, align 8
1636 %conv2 = fptosi double %1 to i32
1637 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
1638 %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 1
1639 %2 = load double, double* %arrayidx4, align 8
1640 %conv5 = fptosi double %2 to i32
1641 %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
1642 %3 = load double, double* %ptr, align 8
1643 %conv8 = fptosi double %3 to i32
1644 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
1645 ret <4 x i32> %vecinit9
1646 ; P9BE-LABEL: fromDiffMemConsDConvdtoi
1647 ; P9LE-LABEL: fromDiffMemConsDConvdtoi
1648 ; P8BE-LABEL: fromDiffMemConsDConvdtoi
1649 ; P8LE-LABEL: fromDiffMemConsDConvdtoi
1659 ; P9BE: xvcvspsxws v2
1669 ; P9LE: xvcvspsxws v2
1679 ; P8BE: xvcvspsxws v2
1689 ; P8LE: xvcvspsxws v2
1692 ; Function Attrs: norecurse nounwind readonly
1693 define <4 x i32> @fromDiffMemVarAConvdtoi(double* nocapture readonly %arr, i32 signext %elem) {
1695 %idxprom = sext i32 %elem to i64
1696 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
1697 %0 = load double, double* %arrayidx, align 8
1698 %conv = fptosi double %0 to i32
1699 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
1700 %add = add nsw i32 %elem, 1
1701 %idxprom1 = sext i32 %add to i64
1702 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1
1703 %1 = load double, double* %arrayidx2, align 8
1704 %conv3 = fptosi double %1 to i32
1705 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1
1706 %add5 = add nsw i32 %elem, 2
1707 %idxprom6 = sext i32 %add5 to i64
1708 %arrayidx7 = getelementptr inbounds double, double* %arr, i64 %idxprom6
1709 %2 = load double, double* %arrayidx7, align 8
1710 %conv8 = fptosi double %2 to i32
1711 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2
1712 %add10 = add nsw i32 %elem, 3
1713 %idxprom11 = sext i32 %add10 to i64
1714 %arrayidx12 = getelementptr inbounds double, double* %arr, i64 %idxprom11
1715 %3 = load double, double* %arrayidx12, align 8
1716 %conv13 = fptosi double %3 to i32
1717 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
1718 ret <4 x i32> %vecinit14
1719 ; P9BE-LABEL: fromDiffMemVarAConvdtoi
1720 ; P9LE-LABEL: fromDiffMemVarAConvdtoi
1721 ; P8BE-LABEL: fromDiffMemVarAConvdtoi
1722 ; P8LE-LABEL: fromDiffMemVarAConvdtoi
1732 ; P9BE: xvcvspsxws v2
1742 ; P9LE: xvcvspsxws v2
1752 ; P8BE: xvcvspsxws v2
1762 ; P8LE: xvcvspsxws v2
1765 ; Function Attrs: norecurse nounwind readonly
1766 define <4 x i32> @fromDiffMemVarDConvdtoi(double* nocapture readonly %arr, i32 signext %elem) {
1768 %idxprom = sext i32 %elem to i64
1769 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
1770 %0 = load double, double* %arrayidx, align 8
1771 %conv = fptosi double %0 to i32
1772 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
1773 %sub = add nsw i32 %elem, -1
1774 %idxprom1 = sext i32 %sub to i64
1775 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1
1776 %1 = load double, double* %arrayidx2, align 8
1777 %conv3 = fptosi double %1 to i32
1778 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1
1779 %sub5 = add nsw i32 %elem, -2
1780 %idxprom6 = sext i32 %sub5 to i64
1781 %arrayidx7 = getelementptr inbounds double, double* %arr, i64 %idxprom6
1782 %2 = load double, double* %arrayidx7, align 8
1783 %conv8 = fptosi double %2 to i32
1784 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2
1785 %sub10 = add nsw i32 %elem, -3
1786 %idxprom11 = sext i32 %sub10 to i64
1787 %arrayidx12 = getelementptr inbounds double, double* %arr, i64 %idxprom11
1788 %3 = load double, double* %arrayidx12, align 8
1789 %conv13 = fptosi double %3 to i32
1790 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
1791 ret <4 x i32> %vecinit14
1792 ; P9BE-LABEL: fromDiffMemVarDConvdtoi
1793 ; P9LE-LABEL: fromDiffMemVarDConvdtoi
1794 ; P8BE-LABEL: fromDiffMemVarDConvdtoi
1795 ; P8LE-LABEL: fromDiffMemVarDConvdtoi
1805 ; P9BE: xvcvspsxws v2
1815 ; P9LE: xvcvspsxws v2
1825 ; P8BE: xvcvspsxws v2
1835 ; P8LE: xvcvspsxws v2
1838 ; Function Attrs: norecurse nounwind readnone
1839 define <4 x i32> @spltRegValConvdtoi(double %val) {
1841 %conv = fptosi double %val to i32
1842 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
1843 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
1844 ret <4 x i32> %splat.splat
1845 ; P9BE-LABEL: spltRegValConvdtoi
1846 ; P9LE-LABEL: spltRegValConvdtoi
1847 ; P8BE-LABEL: spltRegValConvdtoi
1848 ; P8LE-LABEL: spltRegValConvdtoi
1863 ; Function Attrs: norecurse nounwind readonly
1864 define <4 x i32> @spltMemValConvdtoi(double* nocapture readonly %ptr) {
1866 %0 = load double, double* %ptr, align 8
1867 %conv = fptosi double %0 to i32
1868 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
1869 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
1870 ret <4 x i32> %splat.splat
1871 ; P9BE-LABEL: spltMemValConvdtoi
1872 ; P9LE-LABEL: spltMemValConvdtoi
1873 ; P8BE-LABEL: spltMemValConvdtoi
1874 ; P8LE-LABEL: spltMemValConvdtoi
1892 ; Function Attrs: norecurse nounwind readnone
1893 define <4 x i32> @allZeroui() {
1895 ret <4 x i32> zeroinitializer
1896 ; P9BE-LABEL: allZeroui
1897 ; P9LE-LABEL: allZeroui
1898 ; P8BE-LABEL: allZeroui
1899 ; P8LE-LABEL: allZeroui
1900 ; P9BE: xxlxor v2, v2, v2
1902 ; P9LE: xxlxor v2, v2, v2
1904 ; P8BE: xxlxor v2, v2, v2
1906 ; P8LE: xxlxor v2, v2, v2
1910 ; Function Attrs: norecurse nounwind readnone
1911 define <4 x i32> @allOneui() {
1913 ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
1914 ; P9BE-LABEL: allOneui
1915 ; P9LE-LABEL: allOneui
1916 ; P8BE-LABEL: allOneui
1917 ; P8LE-LABEL: allOneui
1918 ; P9BE: xxspltib v2, 255
1920 ; P9LE: xxspltib v2, 255
1922 ; P8BE: vspltisb v2, -1
1924 ; P8LE: vspltisb v2, -1
1928 ; Function Attrs: norecurse nounwind readnone
1929 define <4 x i32> @spltConst1ui() {
1931 ret <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1932 ; P9BE-LABEL: spltConst1ui
1933 ; P9LE-LABEL: spltConst1ui
1934 ; P8BE-LABEL: spltConst1ui
1935 ; P8LE-LABEL: spltConst1ui
1936 ; P9BE: vspltisw v2, 1
1938 ; P9LE: vspltisw v2, 1
1940 ; P8BE: vspltisw v2, 1
1942 ; P8LE: vspltisw v2, 1
1946 ; Function Attrs: norecurse nounwind readnone
1947 define <4 x i32> @spltConst16kui() {
1949 ret <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
1950 ; P9BE-LABEL: spltConst16kui
1951 ; P9LE-LABEL: spltConst16kui
1952 ; P8BE-LABEL: spltConst16kui
1953 ; P8LE-LABEL: spltConst16kui
1954 ; P9BE: vspltisw v2, -15
1955 ; P9BE: vsrw v2, v2, v2
1957 ; P9LE: vspltisw v2, -15
1958 ; P9LE: vsrw v2, v2, v2
1960 ; P8BE: vspltisw v2, -15
1961 ; P8BE: vsrw v2, v2, v2
1963 ; P8LE: vspltisw v2, -15
1964 ; P8LE: vsrw v2, v2, v2
1968 ; Function Attrs: norecurse nounwind readnone
1969 define <4 x i32> @spltConst32kui() {
1971 ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
1972 ; P9BE-LABEL: spltConst32kui
1973 ; P9LE-LABEL: spltConst32kui
1974 ; P8BE-LABEL: spltConst32kui
1975 ; P8LE-LABEL: spltConst32kui
1976 ; P9BE: vspltisw v2, -16
1977 ; P9BE: vsrw v2, v2, v2
1979 ; P9LE: vspltisw v2, -16
1980 ; P9LE: vsrw v2, v2, v2
1982 ; P8BE: vspltisw v2, -16
1983 ; P8BE: vsrw v2, v2, v2
1985 ; P8LE: vspltisw v2, -16
1986 ; P8LE: vsrw v2, v2, v2
1990 ; Function Attrs: norecurse nounwind readnone
1991 define <4 x i32> @fromRegsui(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c, i32 zeroext %d) {
1993 %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0
1994 %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
1995 %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %c, i32 2
1996 %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %d, i32 3
1997 ret <4 x i32> %vecinit3
1998 ; P9BE-LABEL: fromRegsui
1999 ; P9LE-LABEL: fromRegsui
2000 ; P8BE-LABEL: fromRegsui
2001 ; P8LE-LABEL: fromRegsui
2002 ; P9BE-DAG: mtvsrdd [[REG1:v[0-9]+]], r3, r5
2003 ; P9BE-DAG: mtvsrdd [[REG2:v[0-9]+]], r4, r6
2004 ; P9BE: vmrgow v2, [[REG1]], [[REG2]]
2006 ; P9LE-DAG: mtvsrdd [[REG1:v[0-9]+]], r5, r3
2007 ; P9LE-DAG: mtvsrdd [[REG2:v[0-9]+]], r6, r4
2008 ; P9LE: vmrgow v2, [[REG2]], [[REG1]]
2010 ; P8BE-DAG: mtvsrwz {{[vf]}}[[REG1:[0-9]+]], r3
2011 ; P8BE-DAG: mtvsrwz {{[vf]}}[[REG2:[0-9]+]], r4
2012 ; P8BE-DAG: mtvsrwz {{[vf]}}[[REG3:[0-9]+]], r5
2013 ; P8BE-DAG: mtvsrwz {{[vf]}}[[REG4:[0-9]+]], r6
2014 ; P8BE-DAG: xxmrghd [[REG5:v[0-9]+]], {{[v][s]*}}[[REG1]], {{[v][s]*}}[[REG3]]
2015 ; P8BE-DAG: xxmrghd [[REG6:v[0-9]+]], {{[v][s]*}}[[REG2]], {{[v][s]*}}[[REG4]]
2016 ; P8BE: vmrgow v2, [[REG5]], [[REG6]]
2017 ; P8LE-DAG: mtvsrwz {{[vf]}}[[REG1:[0-9]+]], r3
2018 ; P8LE-DAG: mtvsrwz {{[vf]}}[[REG2:[0-9]+]], r4
2019 ; P8LE-DAG: mtvsrwz {{[vf]}}[[REG3:[0-9]+]], r5
2020 ; P8LE-DAG: mtvsrwz {{[vf]}}[[REG4:[0-9]+]], r6
2021 ; P8LE: xxmrghd [[REG5:v[0-9]+]], {{[v][s]*}}[[REG3]], {{[v][s]*}}[[REG1]]
2022 ; P8LE: xxmrghd [[REG6:v[0-9]+]], {{[v][s]*}}[[REG4]], {{[v][s]*}}[[REG2]]
2023 ; P8LE: vmrgow v2, [[REG6]], [[REG5]]
2026 ; Function Attrs: norecurse nounwind readnone
2027 define <4 x i32> @fromDiffConstsui() {
2029 ret <4 x i32> <i32 242, i32 -113, i32 889, i32 19>
2030 ; P9BE-LABEL: fromDiffConstsui
2031 ; P9LE-LABEL: fromDiffConstsui
2032 ; P8BE-LABEL: fromDiffConstsui
2033 ; P8LE-LABEL: fromDiffConstsui
2045 ; Function Attrs: norecurse nounwind readonly
2046 define <4 x i32> @fromDiffMemConsAui(i32* nocapture readonly %arr) {
2048 %0 = load i32, i32* %arr, align 4
2049 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
2050 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 1
2051 %1 = load i32, i32* %arrayidx1, align 4
2052 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
2053 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 2
2054 %2 = load i32, i32* %arrayidx3, align 4
2055 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2
2056 %arrayidx5 = getelementptr inbounds i32, i32* %arr, i64 3
2057 %3 = load i32, i32* %arrayidx5, align 4
2058 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
2059 ret <4 x i32> %vecinit6
2060 ; P9BE-LABEL: fromDiffMemConsAui
2061 ; P9LE-LABEL: fromDiffMemConsAui
2062 ; P8BE-LABEL: fromDiffMemConsAui
2063 ; P8LE-LABEL: fromDiffMemConsAui
2075 ; Function Attrs: norecurse nounwind readonly
2076 define <4 x i32> @fromDiffMemConsDui(i32* nocapture readonly %arr) {
2078 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3
2079 %0 = load i32, i32* %arrayidx, align 4
2080 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
2081 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 2
2082 %1 = load i32, i32* %arrayidx1, align 4
2083 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
2084 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 1
2085 %2 = load i32, i32* %arrayidx3, align 4
2086 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2
2087 %3 = load i32, i32* %arr, align 4
2088 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
2089 ret <4 x i32> %vecinit6
2090 ; P9BE-LABEL: fromDiffMemConsDui
2091 ; P9LE-LABEL: fromDiffMemConsDui
2092 ; P8BE-LABEL: fromDiffMemConsDui
2093 ; P8LE-LABEL: fromDiffMemConsDui
2114 ; Function Attrs: norecurse nounwind readonly
2115 define <4 x i32> @fromDiffMemVarAui(i32* nocapture readonly %arr, i32 signext %elem) {
2117 %idxprom = sext i32 %elem to i64
2118 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom
2119 %0 = load i32, i32* %arrayidx, align 4
2120 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
2121 %add = add nsw i32 %elem, 1
2122 %idxprom1 = sext i32 %add to i64
2123 %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1
2124 %1 = load i32, i32* %arrayidx2, align 4
2125 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
2126 %add4 = add nsw i32 %elem, 2
2127 %idxprom5 = sext i32 %add4 to i64
2128 %arrayidx6 = getelementptr inbounds i32, i32* %arr, i64 %idxprom5
2129 %2 = load i32, i32* %arrayidx6, align 4
2130 %vecinit7 = insertelement <4 x i32> %vecinit3, i32 %2, i32 2
2131 %add8 = add nsw i32 %elem, 3
2132 %idxprom9 = sext i32 %add8 to i64
2133 %arrayidx10 = getelementptr inbounds i32, i32* %arr, i64 %idxprom9
2134 %3 = load i32, i32* %arrayidx10, align 4
2135 %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3
2136 ret <4 x i32> %vecinit11
2137 ; P9BE-LABEL: fromDiffMemVarAui
2138 ; P9LE-LABEL: fromDiffMemVarAui
2139 ; P8BE-LABEL: fromDiffMemVarAui
2140 ; P8LE-LABEL: fromDiffMemVarAui
2141 ; P9BE: sldi r4, r4, 2
2142 ; P9BE: lxvx v2, r3, r4
2144 ; P9LE: sldi r4, r4, 2
2145 ; P9LE: lxvx v2, r3, r4
2147 ; P8BE: sldi r4, r4, 2
2148 ; P8BE: lxvw4x {{[vs0-9]+}}, r3, r4
2150 ; P8LE: sldi r4, r4, 2
2151 ; P8LE: lxvd2x {{[vs0-9]+}}, r3, r4
2156 ; Function Attrs: norecurse nounwind readonly
2157 define <4 x i32> @fromDiffMemVarDui(i32* nocapture readonly %arr, i32 signext %elem) {
2159 %idxprom = sext i32 %elem to i64
2160 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom
2161 %0 = load i32, i32* %arrayidx, align 4
2162 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
2163 %sub = add nsw i32 %elem, -1
2164 %idxprom1 = sext i32 %sub to i64
2165 %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1
2166 %1 = load i32, i32* %arrayidx2, align 4
2167 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
2168 %sub4 = add nsw i32 %elem, -2
2169 %idxprom5 = sext i32 %sub4 to i64
2170 %arrayidx6 = getelementptr inbounds i32, i32* %arr, i64 %idxprom5
2171 %2 = load i32, i32* %arrayidx6, align 4
2172 %vecinit7 = insertelement <4 x i32> %vecinit3, i32 %2, i32 2
2173 %sub8 = add nsw i32 %elem, -3
2174 %idxprom9 = sext i32 %sub8 to i64
2175 %arrayidx10 = getelementptr inbounds i32, i32* %arr, i64 %idxprom9
2176 %3 = load i32, i32* %arrayidx10, align 4
2177 %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3
2178 ret <4 x i32> %vecinit11
2179 ; P9BE-LABEL: fromDiffMemVarDui
2180 ; P9LE-LABEL: fromDiffMemVarDui
2181 ; P8BE-LABEL: fromDiffMemVarDui
2182 ; P8LE-LABEL: fromDiffMemVarDui
2183 ; P9BE-DAG: sldi {{r[0-9]+}}, r4, 2
2184 ; P9BE-DAG: addi r3, r3, -12
2185 ; P9BE-DAG: lxvx {{v[0-9]+}}, 0, r3
2189 ; P9LE-DAG: sldi {{r[0-9]+}}, r4, 2
2190 ; P9LE-DAG: addi r3, r3, -12
2191 ; P9LE-DAG: lxvx {{v[0-9]+}}, 0, r3
2195 ; P8BE-DAG: sldi {{r[0-9]+}}, r4, 2
2196 ; P8BE-DAG: lxvw4x {{v[0-9]+}}, 0, r3
2200 ; P8LE-DAG: sldi {{r[0-9]+}}, r4, 2
2207 ; Function Attrs: norecurse nounwind readonly
2208 define <4 x i32> @fromRandMemConsui(i32* nocapture readonly %arr) {
2210 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 4
2211 %0 = load i32, i32* %arrayidx, align 4
2212 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
2213 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 18
2214 %1 = load i32, i32* %arrayidx1, align 4
2215 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
2216 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 2
2217 %2 = load i32, i32* %arrayidx3, align 4
2218 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2
2219 %arrayidx5 = getelementptr inbounds i32, i32* %arr, i64 88
2220 %3 = load i32, i32* %arrayidx5, align 4
2221 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
2222 ret <4 x i32> %vecinit6
2223 ; P9BE-LABEL: fromRandMemConsui
2224 ; P9LE-LABEL: fromRandMemConsui
2225 ; P8BE-LABEL: fromRandMemConsui
2226 ; P8LE-LABEL: fromRandMemConsui
2265 ; Function Attrs: norecurse nounwind readonly
2266 define <4 x i32> @fromRandMemVarui(i32* nocapture readonly %arr, i32 signext %elem) {
2268 %add = add nsw i32 %elem, 4
2269 %idxprom = sext i32 %add to i64
2270 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom
2271 %0 = load i32, i32* %arrayidx, align 4
2272 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
2273 %add1 = add nsw i32 %elem, 1
2274 %idxprom2 = sext i32 %add1 to i64
2275 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 %idxprom2
2276 %1 = load i32, i32* %arrayidx3, align 4
2277 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
2278 %add5 = add nsw i32 %elem, 2
2279 %idxprom6 = sext i32 %add5 to i64
2280 %arrayidx7 = getelementptr inbounds i32, i32* %arr, i64 %idxprom6
2281 %2 = load i32, i32* %arrayidx7, align 4
2282 %vecinit8 = insertelement <4 x i32> %vecinit4, i32 %2, i32 2
2283 %add9 = add nsw i32 %elem, 8
2284 %idxprom10 = sext i32 %add9 to i64
2285 %arrayidx11 = getelementptr inbounds i32, i32* %arr, i64 %idxprom10
2286 %3 = load i32, i32* %arrayidx11, align 4
2287 %vecinit12 = insertelement <4 x i32> %vecinit8, i32 %3, i32 3
2288 ret <4 x i32> %vecinit12
2289 ; P9BE-LABEL: fromRandMemVarui
2290 ; P9LE-LABEL: fromRandMemVarui
2291 ; P8BE-LABEL: fromRandMemVarui
2292 ; P8LE-LABEL: fromRandMemVarui
2293 ; P9BE: sldi r4, r4, 2
2301 ; P9LE: sldi r4, r4, 2
2309 ; P8BE: sldi r4, r4, 2
2321 ; P8LE: sldi r4, r4, 2
2335 ; Function Attrs: norecurse nounwind readnone
2336 define <4 x i32> @spltRegValui(i32 zeroext %val) {
2338 %splat.splatinsert = insertelement <4 x i32> undef, i32 %val, i32 0
2339 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
2340 ret <4 x i32> %splat.splat
2341 ; P9BE-LABEL: spltRegValui
2342 ; P9LE-LABEL: spltRegValui
2343 ; P8BE-LABEL: spltRegValui
2344 ; P8LE-LABEL: spltRegValui
2345 ; P9BE: mtvsrws v2, r3
2347 ; P9LE: mtvsrws v2, r3
2349 ; P8BE: mtvsrwz {{[vsf0-9]+}}, r3
2350 ; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1
2352 ; P8LE: mtvsrwz {{[vsf0-9]+}}, r3
2353 ; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1
2357 ; Function Attrs: norecurse nounwind readonly
2358 define <4 x i32> @spltMemValui(i32* nocapture readonly %ptr) {
2360 %0 = load i32, i32* %ptr, align 4
2361 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
2362 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
2363 ret <4 x i32> %splat.splat
2364 ; P9BE-LABEL: spltMemValui
2365 ; P9LE-LABEL: spltMemValui
2366 ; P8BE-LABEL: spltMemValui
2367 ; P8LE-LABEL: spltMemValui
2368 ; P9BE: lxvwsx v2, 0, r3
2370 ; P9LE: lxvwsx v2, 0, r3
2372 ; P8BE: lxsiwax {{[vsf0-9]+}}, 0, r3
2373 ; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1
2375 ; P8LE: lxsiwax {{[vsf0-9]+}}, 0, r3
2376 ; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1
2380 ; Function Attrs: norecurse nounwind readnone
2381 define <4 x i32> @spltCnstConvftoui() {
2383 ret <4 x i32> <i32 4, i32 4, i32 4, i32 4>
2384 ; P9BE-LABEL: spltCnstConvftoui
2385 ; P9LE-LABEL: spltCnstConvftoui
2386 ; P8BE-LABEL: spltCnstConvftoui
2387 ; P8LE-LABEL: spltCnstConvftoui
2388 ; P9BE: vspltisw v2, 4
2390 ; P9LE: vspltisw v2, 4
2392 ; P8BE: vspltisw v2, 4
2394 ; P8LE: vspltisw v2, 4
2398 ; Function Attrs: norecurse nounwind readnone
2399 define <4 x i32> @fromRegsConvftoui(float %a, float %b, float %c, float %d) {
2401 %conv = fptoui float %a to i32
2402 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
2403 %conv1 = fptoui float %b to i32
2404 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %conv1, i32 1
2405 %conv3 = fptoui float %c to i32
2406 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %conv3, i32 2
2407 %conv5 = fptoui float %d to i32
2408 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3
2409 ret <4 x i32> %vecinit6
2410 ; P9BE-LABEL: fromRegsConvftoui
2411 ; P9LE-LABEL: fromRegsConvftoui
2412 ; P8BE-LABEL: fromRegsConvftoui
2413 ; P8LE-LABEL: fromRegsConvftoui
2414 ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
2415 ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
2416 ; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
2417 ; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
2418 ; P9BE: vmrgew v2, [[REG3]], [[REG4]]
2419 ; P9BE: xvcvspuxws v2, v2
2420 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
2421 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
2422 ; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
2423 ; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
2424 ; P9LE: vmrgew v2, [[REG4]], [[REG3]]
2425 ; P9LE: xvcvspuxws v2, v2
2426 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
2427 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
2428 ; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
2429 ; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
2430 ; P8BE: vmrgew v2, [[REG3]], [[REG4]]
2431 ; P8BE: xvcvspuxws v2, v2
2432 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
2433 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
2434 ; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
2435 ; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
2436 ; P8LE: vmrgew v2, [[REG4]], [[REG3]]
2437 ; P8LE: xvcvspuxws v2, v2
2440 ; Function Attrs: norecurse nounwind readnone
2441 define <4 x i32> @fromDiffConstsConvftoui() {
2443 ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
2444 ; P9BE-LABEL: fromDiffConstsConvftoui
2445 ; P9LE-LABEL: fromDiffConstsConvftoui
2446 ; P8BE-LABEL: fromDiffConstsConvftoui
2447 ; P8LE-LABEL: fromDiffConstsConvftoui
2459 ; Function Attrs: norecurse nounwind readonly
2460 define <4 x i32> @fromDiffMemConsAConvftoui(float* nocapture readonly %ptr) {
2462 %0 = bitcast float* %ptr to <4 x float>*
2463 %1 = load <4 x float>, <4 x float>* %0, align 4
2464 %2 = fptoui <4 x float> %1 to <4 x i32>
2466 ; P9BE-LABEL: fromDiffMemConsAConvftoui
2467 ; P9LE-LABEL: fromDiffMemConsAConvftoui
2468 ; P8BE-LABEL: fromDiffMemConsAConvftoui
2469 ; P8LE-LABEL: fromDiffMemConsAConvftoui
2470 ; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3)
2471 ; P9BE: xvcvspuxws v2, [[REG1]]
2473 ; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3)
2474 ; P9LE: xvcvspuxws v2, [[REG1]]
2476 ; P8BE: lxvw4x [[REG1:[vs0-9]+]], 0, r3
2477 ; P8BE: xvcvspuxws v2, [[REG1]]
2479 ; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
2480 ; P8LE: xxswapd v2, [[REG1]]
2481 ; P8LE: xvcvspuxws v2, v2
2485 ; Function Attrs: norecurse nounwind readonly
2486 define <4 x i32> @fromDiffMemConsDConvftoui(float* nocapture readonly %ptr) {
2488 %arrayidx = getelementptr inbounds float, float* %ptr, i64 3
2489 %0 = load float, float* %arrayidx, align 4
2490 %conv = fptoui float %0 to i32
2491 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
2492 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 2
2493 %1 = load float, float* %arrayidx1, align 4
2494 %conv2 = fptoui float %1 to i32
2495 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
2496 %arrayidx4 = getelementptr inbounds float, float* %ptr, i64 1
2497 %2 = load float, float* %arrayidx4, align 4
2498 %conv5 = fptoui float %2 to i32
2499 %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
2500 %3 = load float, float* %ptr, align 4
2501 %conv8 = fptoui float %3 to i32
2502 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
2503 ret <4 x i32> %vecinit9
2504 ; P9BE-LABEL: fromDiffMemConsDConvftoui
2505 ; P9LE-LABEL: fromDiffMemConsDConvftoui
2506 ; P8BE-LABEL: fromDiffMemConsDConvftoui
2507 ; P8LE-LABEL: fromDiffMemConsDConvftoui
2531 ; Function Attrs: norecurse nounwind readonly
2532 define <4 x i32> @fromDiffMemVarAConvftoui(float* nocapture readonly %arr, i32 signext %elem) {
2534 %idxprom = sext i32 %elem to i64
2535 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
2536 %0 = load float, float* %arrayidx, align 4
2537 %conv = fptoui float %0 to i32
2538 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
2539 %add = add nsw i32 %elem, 1
2540 %idxprom1 = sext i32 %add to i64
2541 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1
2542 %1 = load float, float* %arrayidx2, align 4
2543 %conv3 = fptoui float %1 to i32
2544 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1
2545 %add5 = add nsw i32 %elem, 2
2546 %idxprom6 = sext i32 %add5 to i64
2547 %arrayidx7 = getelementptr inbounds float, float* %arr, i64 %idxprom6
2548 %2 = load float, float* %arrayidx7, align 4
2549 %conv8 = fptoui float %2 to i32
2550 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2
2551 %add10 = add nsw i32 %elem, 3
2552 %idxprom11 = sext i32 %add10 to i64
2553 %arrayidx12 = getelementptr inbounds float, float* %arr, i64 %idxprom11
2554 %3 = load float, float* %arrayidx12, align 4
2555 %conv13 = fptoui float %3 to i32
2556 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
2557 ret <4 x i32> %vecinit14
2558 ; P9BE-LABEL: fromDiffMemVarAConvftoui
2559 ; P9LE-LABEL: fromDiffMemVarAConvftoui
2560 ; P8BE-LABEL: fromDiffMemVarAConvftoui
2561 ; P8LE-LABEL: fromDiffMemVarAConvftoui
2562 ; FIXME: implement finding consecutive loads with pre-inc
2569 ; Function Attrs: norecurse nounwind readonly
2570 define <4 x i32> @fromDiffMemVarDConvftoui(float* nocapture readonly %arr, i32 signext %elem) {
2572 %idxprom = sext i32 %elem to i64
2573 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
2574 %0 = load float, float* %arrayidx, align 4
2575 %conv = fptoui float %0 to i32
2576 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
2577 %sub = add nsw i32 %elem, -1
2578 %idxprom1 = sext i32 %sub to i64
2579 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1
2580 %1 = load float, float* %arrayidx2, align 4
2581 %conv3 = fptoui float %1 to i32
2582 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1
2583 %sub5 = add nsw i32 %elem, -2
2584 %idxprom6 = sext i32 %sub5 to i64
2585 %arrayidx7 = getelementptr inbounds float, float* %arr, i64 %idxprom6
2586 %2 = load float, float* %arrayidx7, align 4
2587 %conv8 = fptoui float %2 to i32
2588 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2
2589 %sub10 = add nsw i32 %elem, -3
2590 %idxprom11 = sext i32 %sub10 to i64
2591 %arrayidx12 = getelementptr inbounds float, float* %arr, i64 %idxprom11
2592 %3 = load float, float* %arrayidx12, align 4
2593 %conv13 = fptoui float %3 to i32
2594 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
2595 ret <4 x i32> %vecinit14
2596 ; P9BE-LABEL: fromDiffMemVarDConvftoui
2597 ; P9LE-LABEL: fromDiffMemVarDConvftoui
2598 ; P8BE-LABEL: fromDiffMemVarDConvftoui
2599 ; P8LE-LABEL: fromDiffMemVarDConvftoui
2600 ; FIXME: implement finding consecutive loads with pre-inc
2607 ; Function Attrs: norecurse nounwind readnone
2608 define <4 x i32> @spltRegValConvftoui(float %val) {
2610 %conv = fptoui float %val to i32
2611 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
2612 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
2613 ret <4 x i32> %splat.splat
2614 ; P9BE-LABEL: spltRegValConvftoui
2615 ; P9LE-LABEL: spltRegValConvftoui
2616 ; P8BE-LABEL: spltRegValConvftoui
2617 ; P8LE-LABEL: spltRegValConvftoui
2618 ; P9BE: xscvdpuxws f[[REG1:[0-9]+]], f1
2619 ; P9BE: xxspltw v2, vs[[REG1]], 1
2621 ; P9LE: xscvdpuxws f[[REG1:[0-9]+]], f1
2622 ; P9LE: xxspltw v2, vs[[REG1]], 1
2624 ; P8BE: xscvdpuxws f[[REG1:[0-9]+]], f1
2625 ; P8BE: xxspltw v2, vs[[REG1]], 1
2627 ; P8LE: xscvdpuxws f[[REG1:[0-9]+]], f1
2628 ; P8LE: xxspltw v2, vs[[REG1]], 1
2632 ; Function Attrs: norecurse nounwind readonly
2633 define <4 x i32> @spltMemValConvftoui(float* nocapture readonly %ptr) {
2635 %0 = load float, float* %ptr, align 4
2636 %conv = fptoui float %0 to i32
2637 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
2638 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
2639 ret <4 x i32> %splat.splat
2640 ; P9BE-LABEL: spltMemValConvftoui
2641 ; P9LE-LABEL: spltMemValConvftoui
2642 ; P8BE-LABEL: spltMemValConvftoui
2643 ; P8LE-LABEL: spltMemValConvftoui
2644 ; P9BE: lxvwsx [[REG1:[vs0-9]+]], 0, r3
2645 ; P9BE: xvcvspuxws v2, [[REG1]]
2646 ; P9LE: [[REG1:[vs0-9]+]], 0, r3
2647 ; P9LE: xvcvspuxws v2, [[REG1]]
2648 ; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3
2649 ; P8BE: xscvdpuxws f[[REG2:[0-9]+]], [[REG1]]
2650 ; P8BE: xxspltw v2, vs[[REG2]], 1
2651 ; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3
2652 ; P8LE: xscvdpuxws f[[REG2:[vs0-9]+]], [[REG1]]
2653 ; P8LE: xxspltw v2, vs[[REG2]], 1
2656 ; Function Attrs: norecurse nounwind readnone
2657 define <4 x i32> @spltCnstConvdtoui() {
2659 ret <4 x i32> <i32 4, i32 4, i32 4, i32 4>
2660 ; P9BE-LABEL: spltCnstConvdtoui
2661 ; P9LE-LABEL: spltCnstConvdtoui
2662 ; P8BE-LABEL: spltCnstConvdtoui
2663 ; P8LE-LABEL: spltCnstConvdtoui
2664 ; P9BE: vspltisw v2, 4
2666 ; P9LE: vspltisw v2, 4
2668 ; P8BE: vspltisw v2, 4
2670 ; P8LE: vspltisw v2, 4
2674 ; Function Attrs: norecurse nounwind readnone
2675 define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d) {
2677 %conv = fptoui double %a to i32
2678 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
2679 %conv1 = fptoui double %b to i32
2680 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %conv1, i32 1
2681 %conv3 = fptoui double %c to i32
2682 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %conv3, i32 2
2683 %conv5 = fptoui double %d to i32
2684 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3
2685 ret <4 x i32> %vecinit6
2686 ; P9BE-LABEL: fromRegsConvdtoui
2687 ; P9LE-LABEL: fromRegsConvdtoui
2688 ; P8BE-LABEL: fromRegsConvdtoui
2689 ; P8LE-LABEL: fromRegsConvdtoui
2690 ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
2691 ; P9BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
2692 ; P9BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
2693 ; P9BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
2694 ; P9BE: vmrgew v2, [[REG3]], [[REG4]]
2695 ; P9BE: xvcvspuxws v2, v2
2696 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
2697 ; P9LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
2698 ; P9LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
2699 ; P9LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
2700 ; P9LE: vmrgew v2, [[REG4]], [[REG3]]
2701 ; P9LE: xvcvspuxws v2, v2
2702 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
2703 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
2704 ; P8BE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
2705 ; P8BE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
2706 ; P8BE: vmrgew v2, [[REG3]], [[REG4]]
2707 ; P8BE: xvcvspuxws v2, v2
2708 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
2709 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
2710 ; P8LE-DAG: xvcvdpsp [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
2711 ; P8LE-DAG: xvcvdpsp [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
2712 ; P8LE: vmrgew v2, [[REG4]], [[REG3]]
2713 ; P8LE: xvcvspuxws v2, v2
2716 ; Function Attrs: norecurse nounwind readnone
2717 define <4 x i32> @fromDiffConstsConvdtoui() {
2719 ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
2720 ; P9BE-LABEL: fromDiffConstsConvdtoui
2721 ; P9LE-LABEL: fromDiffConstsConvdtoui
2722 ; P8BE-LABEL: fromDiffConstsConvdtoui
2723 ; P8LE-LABEL: fromDiffConstsConvdtoui
2735 ; Function Attrs: norecurse nounwind readonly
2736 define <4 x i32> @fromDiffMemConsAConvdtoui(double* nocapture readonly %ptr) {
2738 %0 = bitcast double* %ptr to <2 x double>*
2739 %1 = load <2 x double>, <2 x double>* %0, align 8
2740 %2 = fptoui <2 x double> %1 to <2 x i32>
2741 %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 2
2742 %3 = bitcast double* %arrayidx4 to <2 x double>*
2743 %4 = load <2 x double>, <2 x double>* %3, align 8
2744 %5 = fptoui <2 x double> %4 to <2 x i32>
2745 %vecinit9 = shufflevector <2 x i32> %2, <2 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2746 ret <4 x i32> %vecinit9
2747 ; P9BE-LABEL: fromDiffMemConsAConvdtoui
2748 ; P9LE-LABEL: fromDiffMemConsAConvdtoui
2749 ; P8BE-LABEL: fromDiffMemConsAConvdtoui
2750 ; P8LE-LABEL: fromDiffMemConsAConvdtoui
2751 ; P9BE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
2752 ; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
2753 ; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
2754 ; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
2755 ; P9BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
2756 ; P9BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
2757 ; P9BE: vmrgew v2, [[REG6]], [[REG5]]
2758 ; P9BE: xvcvspuxws v2, v2
2759 ; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
2760 ; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
2761 ; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]]
2762 ; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]]
2763 ; P9LE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
2764 ; P9LE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
2765 ; P9LE: vmrgew v2, [[REG6]], [[REG5]]
2766 ; P9LE: xvcvspuxws v2, v2
2767 ; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
2768 ; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
2769 ; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
2770 ; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
2771 ; P8BE-DAG: xvcvdpsp [[REG5:[vs0-9]+]], [[REG3]]
2772 ; P8BE-DAG: xvcvdpsp [[REG6:[vs0-9]+]], [[REG4]]
2773 ; P8BE: vmrgew v2, [[REG6]], [[REG5]]
2774 ; P8BE: xvcvspuxws v2, v2
2775 ; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
2776 ; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
2777 ; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]]
2778 ; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]]
2779 ; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]]
2780 ; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]]
2781 ; P8LE-DAG: xvcvdpsp [[REG7:[vs0-9]+]], [[REG5]]
2782 ; P8LE-DAG: xvcvdpsp [[REG8:[vs0-9]+]], [[REG6]]
2783 ; P8LE: vmrgew v2, [[REG8]], [[REG7]]
2784 ; P8LE: xvcvspuxws v2, v2
2787 ; Function Attrs: norecurse nounwind readonly
2788 define <4 x i32> @fromDiffMemConsDConvdtoui(double* nocapture readonly %ptr) {
2790 %arrayidx = getelementptr inbounds double, double* %ptr, i64 3
2791 %0 = load double, double* %arrayidx, align 8
2792 %conv = fptoui double %0 to i32
2793 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
2794 %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 2
2795 %1 = load double, double* %arrayidx1, align 8
2796 %conv2 = fptoui double %1 to i32
2797 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
2798 %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 1
2799 %2 = load double, double* %arrayidx4, align 8
2800 %conv5 = fptoui double %2 to i32
2801 %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
2802 %3 = load double, double* %ptr, align 8
2803 %conv8 = fptoui double %3 to i32
2804 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
2805 ret <4 x i32> %vecinit9
2806 ; P9BE-LABEL: fromDiffMemConsDConvdtoui
2807 ; P9LE-LABEL: fromDiffMemConsDConvdtoui
2808 ; P8BE-LABEL: fromDiffMemConsDConvdtoui
2809 ; P8LE-LABEL: fromDiffMemConsDConvdtoui
2819 ; P9BE: xvcvspuxws v2
2829 ; P9LE: xvcvspuxws v2
2839 ; P8BE: xvcvspuxws v2
2849 ; P8LE: xvcvspuxws v2
2852 ; Function Attrs: norecurse nounwind readonly
2853 define <4 x i32> @fromDiffMemVarAConvdtoui(double* nocapture readonly %arr, i32 signext %elem) {
2855 %idxprom = sext i32 %elem to i64
2856 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
2857 %0 = load double, double* %arrayidx, align 8
2858 %conv = fptoui double %0 to i32
2859 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
2860 %add = add nsw i32 %elem, 1
2861 %idxprom1 = sext i32 %add to i64
2862 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1
2863 %1 = load double, double* %arrayidx2, align 8
2864 %conv3 = fptoui double %1 to i32
2865 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1
2866 %add5 = add nsw i32 %elem, 2
2867 %idxprom6 = sext i32 %add5 to i64
2868 %arrayidx7 = getelementptr inbounds double, double* %arr, i64 %idxprom6
2869 %2 = load double, double* %arrayidx7, align 8
2870 %conv8 = fptoui double %2 to i32
2871 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2
2872 %add10 = add nsw i32 %elem, 3
2873 %idxprom11 = sext i32 %add10 to i64
2874 %arrayidx12 = getelementptr inbounds double, double* %arr, i64 %idxprom11
2875 %3 = load double, double* %arrayidx12, align 8
2876 %conv13 = fptoui double %3 to i32
2877 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
2878 ret <4 x i32> %vecinit14
2879 ; P9BE-LABEL: fromDiffMemVarAConvdtoui
2880 ; P9LE-LABEL: fromDiffMemVarAConvdtoui
2881 ; P8BE-LABEL: fromDiffMemVarAConvdtoui
2882 ; P8LE-LABEL: fromDiffMemVarAConvdtoui
2892 ; P9BE: xvcvspuxws v2
2902 ; P9LE: xvcvspuxws v2
2912 ; P8BE: xvcvspuxws v2
2922 ; P8LE: xvcvspuxws v2
2925 ; Function Attrs: norecurse nounwind readonly
2926 define <4 x i32> @fromDiffMemVarDConvdtoui(double* nocapture readonly %arr, i32 signext %elem) {
2928 %idxprom = sext i32 %elem to i64
2929 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
2930 %0 = load double, double* %arrayidx, align 8
2931 %conv = fptoui double %0 to i32
2932 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
2933 %sub = add nsw i32 %elem, -1
2934 %idxprom1 = sext i32 %sub to i64
2935 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1
2936 %1 = load double, double* %arrayidx2, align 8
2937 %conv3 = fptoui double %1 to i32
2938 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1
2939 %sub5 = add nsw i32 %elem, -2
2940 %idxprom6 = sext i32 %sub5 to i64
2941 %arrayidx7 = getelementptr inbounds double, double* %arr, i64 %idxprom6
2942 %2 = load double, double* %arrayidx7, align 8
2943 %conv8 = fptoui double %2 to i32
2944 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2
2945 %sub10 = add nsw i32 %elem, -3
2946 %idxprom11 = sext i32 %sub10 to i64
2947 %arrayidx12 = getelementptr inbounds double, double* %arr, i64 %idxprom11
2948 %3 = load double, double* %arrayidx12, align 8
2949 %conv13 = fptoui double %3 to i32
2950 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
2951 ret <4 x i32> %vecinit14
2952 ; P9BE-LABEL: fromDiffMemVarDConvdtoui
2953 ; P9LE-LABEL: fromDiffMemVarDConvdtoui
2954 ; P8BE-LABEL: fromDiffMemVarDConvdtoui
2955 ; P8LE-LABEL: fromDiffMemVarDConvdtoui
2965 ; P9BE: xvcvspuxws v2
2975 ; P9LE: xvcvspuxws v2
2985 ; P8BE: xvcvspuxws v2
2995 ; P8LE: xvcvspuxws v2
2998 ; Function Attrs: norecurse nounwind readnone
2999 define <4 x i32> @spltRegValConvdtoui(double %val) {
3001 %conv = fptoui double %val to i32
3002 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
3003 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
3004 ret <4 x i32> %splat.splat
3005 ; P9BE-LABEL: spltRegValConvdtoui
3006 ; P9LE-LABEL: spltRegValConvdtoui
3007 ; P8BE-LABEL: spltRegValConvdtoui
3008 ; P8LE-LABEL: spltRegValConvdtoui
3023 ; Function Attrs: norecurse nounwind readonly
3024 define <4 x i32> @spltMemValConvdtoui(double* nocapture readonly %ptr) {
3026 %0 = load double, double* %ptr, align 8
3027 %conv = fptoui double %0 to i32
3028 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
3029 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
3030 ret <4 x i32> %splat.splat
3031 ; P9BE-LABEL: spltMemValConvdtoui
3032 ; P9LE-LABEL: spltMemValConvdtoui
3033 ; P8BE-LABEL: spltMemValConvdtoui
3034 ; P8LE-LABEL: spltMemValConvdtoui
3052 ; Function Attrs: norecurse nounwind readnone
3053 define <2 x i64> @allZeroll() {
3055 ret <2 x i64> zeroinitializer
3056 ; P9BE-LABEL: allZeroll
3057 ; P9LE-LABEL: allZeroll
3058 ; P8BE-LABEL: allZeroll
3059 ; P8LE-LABEL: allZeroll
3060 ; P9BE: xxlxor v2, v2, v2
3062 ; P9LE: xxlxor v2, v2, v2
3064 ; P8BE: xxlxor v2, v2, v2
3066 ; P8LE: xxlxor v2, v2, v2
3070 ; Function Attrs: norecurse nounwind readnone
3071 define <2 x i64> @allOnell() {
3073 ret <2 x i64> <i64 -1, i64 -1>
3074 ; P9BE-LABEL: allOnell
3075 ; P9LE-LABEL: allOnell
3076 ; P8BE-LABEL: allOnell
3077 ; P8LE-LABEL: allOnell
3078 ; P9BE: xxspltib v2, 255
3080 ; P9LE: xxspltib v2, 255
3082 ; P8BE: vspltisb v2, -1
3084 ; P8LE: vspltisb v2, -1
3088 ; Function Attrs: norecurse nounwind readnone
3089 define <2 x i64> @spltConst1ll() {
3091 ret <2 x i64> <i64 1, i64 1>
3092 ; P9BE-LABEL: spltConst1ll
3093 ; P9LE-LABEL: spltConst1ll
3094 ; P8BE-LABEL: spltConst1ll
3095 ; P8LE-LABEL: spltConst1ll
3106 ; Function Attrs: norecurse nounwind readnone
3107 define <2 x i64> @spltConst16kll() {
3109 ret <2 x i64> <i64 32767, i64 32767>
3110 ; P9BE-LABEL: spltConst16kll
3111 ; P9LE-LABEL: spltConst16kll
3112 ; P8BE-LABEL: spltConst16kll
3113 ; P8LE-LABEL: spltConst16kll
3124 ; Function Attrs: norecurse nounwind readnone
3125 define <2 x i64> @spltConst32kll() {
3127 ret <2 x i64> <i64 65535, i64 65535>
3128 ; P9BE-LABEL: spltConst32kll
3129 ; P9LE-LABEL: spltConst32kll
3130 ; P8BE-LABEL: spltConst32kll
3131 ; P8LE-LABEL: spltConst32kll
3142 ; Function Attrs: norecurse nounwind readnone
3143 define <2 x i64> @fromRegsll(i64 %a, i64 %b) {
3145 %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0
3146 %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1
3147 ret <2 x i64> %vecinit1
3148 ; P9BE-LABEL: fromRegsll
3149 ; P9LE-LABEL: fromRegsll
3150 ; P8BE-LABEL: fromRegsll
3151 ; P8LE-LABEL: fromRegsll
3152 ; P9BE: mtvsrdd v2, r3, r4
3154 ; P9LE: mtvsrdd v2, r4, r3
3156 ; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r3
3157 ; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r4
3160 ; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r3
3161 ; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r4
3166 ; Function Attrs: norecurse nounwind readnone
3167 define <2 x i64> @fromDiffConstsll() {
3169 ret <2 x i64> <i64 242, i64 -113>
3170 ; P9BE-LABEL: fromDiffConstsll
3171 ; P9LE-LABEL: fromDiffConstsll
3172 ; P8BE-LABEL: fromDiffConstsll
3173 ; P8LE-LABEL: fromDiffConstsll
3184 ; Function Attrs: norecurse nounwind readonly
3185 define <2 x i64> @fromDiffMemConsAll(i64* nocapture readonly %arr) {
3187 %0 = load i64, i64* %arr, align 8
3188 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
3189 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 1
3190 %1 = load i64, i64* %arrayidx1, align 8
3191 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
3192 ret <2 x i64> %vecinit2
3193 ; P9BE-LABEL: fromDiffMemConsAll
3194 ; P9LE-LABEL: fromDiffMemConsAll
3195 ; P8BE-LABEL: fromDiffMemConsAll
3196 ; P8LE-LABEL: fromDiffMemConsAll
3208 ; Function Attrs: norecurse nounwind readonly
3209 define <2 x i64> @fromDiffMemConsDll(i64* nocapture readonly %arr) {
3211 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 3
3212 %0 = load i64, i64* %arrayidx, align 8
3213 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
3214 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 2
3215 %1 = load i64, i64* %arrayidx1, align 8
3216 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
3217 ret <2 x i64> %vecinit2
3218 ; P9BE-LABEL: fromDiffMemConsDll
3219 ; P9LE-LABEL: fromDiffMemConsDll
3220 ; P8BE-LABEL: fromDiffMemConsDll
3221 ; P8LE-LABEL: fromDiffMemConsDll
3234 ; Function Attrs: norecurse nounwind readonly
3235 define <2 x i64> @fromDiffMemVarAll(i64* nocapture readonly %arr, i32 signext %elem) {
3237 %idxprom = sext i32 %elem to i64
3238 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom
3239 %0 = load i64, i64* %arrayidx, align 8
3240 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
3241 %add = add nsw i32 %elem, 1
3242 %idxprom1 = sext i32 %add to i64
3243 %arrayidx2 = getelementptr inbounds i64, i64* %arr, i64 %idxprom1
3244 %1 = load i64, i64* %arrayidx2, align 8
3245 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
3246 ret <2 x i64> %vecinit3
3247 ; P9BE-LABEL: fromDiffMemVarAll
3248 ; P9LE-LABEL: fromDiffMemVarAll
3249 ; P8BE-LABEL: fromDiffMemVarAll
3250 ; P8LE-LABEL: fromDiffMemVarAll
3266 ; Function Attrs: norecurse nounwind readonly
3267 define <2 x i64> @fromDiffMemVarDll(i64* nocapture readonly %arr, i32 signext %elem) {
3269 %idxprom = sext i32 %elem to i64
3270 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom
3271 %0 = load i64, i64* %arrayidx, align 8
3272 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
3273 %sub = add nsw i32 %elem, -1
3274 %idxprom1 = sext i32 %sub to i64
3275 %arrayidx2 = getelementptr inbounds i64, i64* %arr, i64 %idxprom1
3276 %1 = load i64, i64* %arrayidx2, align 8
3277 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
3278 ret <2 x i64> %vecinit3
3279 ; P9BE-LABEL: fromDiffMemVarDll
3280 ; P9LE-LABEL: fromDiffMemVarDll
3281 ; P8BE-LABEL: fromDiffMemVarDll
3282 ; P8LE-LABEL: fromDiffMemVarDll
3300 ; Function Attrs: norecurse nounwind readonly
3301 define <2 x i64> @fromRandMemConsll(i64* nocapture readonly %arr) {
3303 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 4
3304 %0 = load i64, i64* %arrayidx, align 8
3305 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
3306 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 18
3307 %1 = load i64, i64* %arrayidx1, align 8
3308 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
3309 ret <2 x i64> %vecinit2
3310 ; P9BE-LABEL: fromRandMemConsll
3311 ; P9LE-LABEL: fromRandMemConsll
3312 ; P8BE-LABEL: fromRandMemConsll
3313 ; P8LE-LABEL: fromRandMemConsll
3336 ; Function Attrs: norecurse nounwind readonly
3337 define <2 x i64> @fromRandMemVarll(i64* nocapture readonly %arr, i32 signext %elem) {
3339 %add = add nsw i32 %elem, 4
3340 %idxprom = sext i32 %add to i64
3341 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom
3342 %0 = load i64, i64* %arrayidx, align 8
3343 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
3344 %add1 = add nsw i32 %elem, 1
3345 %idxprom2 = sext i32 %add1 to i64
3346 %arrayidx3 = getelementptr inbounds i64, i64* %arr, i64 %idxprom2
3347 %1 = load i64, i64* %arrayidx3, align 8
3348 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
3349 ret <2 x i64> %vecinit4
3350 ; P9BE-LABEL: fromRandMemVarll
3351 ; P9LE-LABEL: fromRandMemVarll
3352 ; P8BE-LABEL: fromRandMemVarll
3353 ; P8LE-LABEL: fromRandMemVarll
3380 ; Function Attrs: norecurse nounwind readnone
3381 define <2 x i64> @spltRegValll(i64 %val) {
3383 %splat.splatinsert = insertelement <2 x i64> undef, i64 %val, i32 0
3384 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
3385 ret <2 x i64> %splat.splat
3386 ; P9BE-LABEL: spltRegValll
3387 ; P9LE-LABEL: spltRegValll
3388 ; P8BE-LABEL: spltRegValll
3389 ; P8LE-LABEL: spltRegValll
3390 ; P9BE: mtvsrdd v2, r3, r3
3392 ; P9LE: mtvsrdd v2, r3, r3
3394 ; P8BE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3
3395 ; P8BE: xxspltd v2, {{[vsf]+}}[[REG1]], 0
3397 ; P8LE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3
3398 ; P8LE: xxspltd v2, {{[vsf]+}}[[REG1]], 0
3402 ; Function Attrs: norecurse nounwind readonly
3403 define <2 x i64> @spltMemValll(i64* nocapture readonly %ptr) {
3405 %0 = load i64, i64* %ptr, align 8
3406 %splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0
3407 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
3408 ret <2 x i64> %splat.splat
3409 ; P9BE-LABEL: spltMemValll
3410 ; P9LE-LABEL: spltMemValll
3411 ; P8BE-LABEL: spltMemValll
3412 ; P8LE-LABEL: spltMemValll
3423 ; Function Attrs: norecurse nounwind readnone
3424 define <2 x i64> @spltCnstConvftoll() {
3426 ret <2 x i64> <i64 4, i64 4>
3427 ; P9BE-LABEL: spltCnstConvftoll
3428 ; P9LE-LABEL: spltCnstConvftoll
3429 ; P8BE-LABEL: spltCnstConvftoll
3430 ; P8LE-LABEL: spltCnstConvftoll
3441 ; Function Attrs: norecurse nounwind readnone
3442 define <2 x i64> @fromRegsConvftoll(float %a, float %b) {
3444 %conv = fptosi float %a to i64
3445 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3446 %conv1 = fptosi float %b to i64
3447 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1
3448 ret <2 x i64> %vecinit2
3449 ; P9BE-LABEL: fromRegsConvftoll
3450 ; P9LE-LABEL: fromRegsConvftoll
3451 ; P8BE-LABEL: fromRegsConvftoll
3452 ; P8LE-LABEL: fromRegsConvftoll
3454 ; P9BE: xvcvdpsxds v2
3457 ; P9LE: xvcvdpsxds v2
3460 ; P8BE: xvcvdpsxds v2
3463 ; P8LE: xvcvdpsxds v2
3467 ; Function Attrs: norecurse nounwind readnone
3468 define <2 x i64> @fromDiffConstsConvftoll() {
3470 ret <2 x i64> <i64 24, i64 234>
3471 ; P9BE-LABEL: fromDiffConstsConvftoll
3472 ; P9LE-LABEL: fromDiffConstsConvftoll
3473 ; P8BE-LABEL: fromDiffConstsConvftoll
3474 ; P8LE-LABEL: fromDiffConstsConvftoll
3486 ; Function Attrs: norecurse nounwind readonly
3487 define <2 x i64> @fromDiffMemConsAConvftoll(float* nocapture readonly %ptr) {
3489 %0 = load float, float* %ptr, align 4
3490 %conv = fptosi float %0 to i64
3491 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3492 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 1
3493 %1 = load float, float* %arrayidx1, align 4
3494 %conv2 = fptosi float %1 to i64
3495 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
3496 ret <2 x i64> %vecinit3
3497 ; P9BE-LABEL: fromDiffMemConsAConvftoll
3498 ; P9LE-LABEL: fromDiffMemConsAConvftoll
3499 ; P8BE-LABEL: fromDiffMemConsAConvftoll
3500 ; P8LE-LABEL: fromDiffMemConsAConvftoll
3504 ; P9BE-NEXT: xvcvdpsxds v2
3509 ; P9LE-NEXT: xvcvdpsxds v2
3514 ; P8BE-NEXT: xvcvdpsxds v2
3519 ; P8LE-NEXT: xvcvdpsxds v2
3523 ; Function Attrs: norecurse nounwind readonly
3524 define <2 x i64> @fromDiffMemConsDConvftoll(float* nocapture readonly %ptr) {
3526 %arrayidx = getelementptr inbounds float, float* %ptr, i64 3
3527 %0 = load float, float* %arrayidx, align 4
3528 %conv = fptosi float %0 to i64
3529 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3530 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 2
3531 %1 = load float, float* %arrayidx1, align 4
3532 %conv2 = fptosi float %1 to i64
3533 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
3534 ret <2 x i64> %vecinit3
3535 ; P9BE-LABEL: fromDiffMemConsDConvftoll
3536 ; P9LE-LABEL: fromDiffMemConsDConvftoll
3537 ; P8BE-LABEL: fromDiffMemConsDConvftoll
3538 ; P8LE-LABEL: fromDiffMemConsDConvftoll
3542 ; P9BE-NEXT: xvcvdpsxds v2
3547 ; P9LE-NEXT: xvcvdpsxds v2
3552 ; P8BE-NEXT: xvcvdpsxds v2
3557 ; P8LE-NEXT: xvcvdpsxds v2
3561 ; Function Attrs: norecurse nounwind readonly
3562 define <2 x i64> @fromDiffMemVarAConvftoll(float* nocapture readonly %arr, i32 signext %elem) {
3564 %idxprom = sext i32 %elem to i64
3565 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
3566 %0 = load float, float* %arrayidx, align 4
3567 %conv = fptosi float %0 to i64
3568 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3569 %add = add nsw i32 %elem, 1
3570 %idxprom1 = sext i32 %add to i64
3571 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1
3572 %1 = load float, float* %arrayidx2, align 4
3573 %conv3 = fptosi float %1 to i64
3574 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
3575 ret <2 x i64> %vecinit4
3576 ; P9BE-LABEL: fromDiffMemVarAConvftoll
3577 ; P9LE-LABEL: fromDiffMemVarAConvftoll
3578 ; P8BE-LABEL: fromDiffMemVarAConvftoll
3579 ; P8LE-LABEL: fromDiffMemVarAConvftoll
3584 ; P9BE-NEXT: xvcvdpsxds v2
3590 ; P9LE-NEXT: xvcvdpsxds v2
3596 ; P8BE-NEXT: xvcvdpsxds v2
3602 ; P8LE-NEXT: xvcvdpsxds v2
3606 ; Function Attrs: norecurse nounwind readonly
3607 define <2 x i64> @fromDiffMemVarDConvftoll(float* nocapture readonly %arr, i32 signext %elem) {
3609 %idxprom = sext i32 %elem to i64
3610 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
3611 %0 = load float, float* %arrayidx, align 4
3612 %conv = fptosi float %0 to i64
3613 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3614 %sub = add nsw i32 %elem, -1
3615 %idxprom1 = sext i32 %sub to i64
3616 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1
3617 %1 = load float, float* %arrayidx2, align 4
3618 %conv3 = fptosi float %1 to i64
3619 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
3620 ret <2 x i64> %vecinit4
3621 ; P9BE-LABEL: fromDiffMemVarDConvftoll
3622 ; P9LE-LABEL: fromDiffMemVarDConvftoll
3623 ; P8BE-LABEL: fromDiffMemVarDConvftoll
3624 ; P8LE-LABEL: fromDiffMemVarDConvftoll
3629 ; P9BE-NEXT: xvcvdpsxds v2
3635 ; P9LE-NEXT: xvcvdpsxds v2
3641 ; P8BE-NEXT: xvcvdpsxds v2
3647 ; P8LE-NEXT: xvcvdpsxds v2
3651 ; Function Attrs: norecurse nounwind readnone
3652 define <2 x i64> @spltRegValConvftoll(float %val) {
3654 %conv = fptosi float %val to i64
3655 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
3656 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
3657 ret <2 x i64> %splat.splat
3658 ; P9BE-LABEL: spltRegValConvftoll
3659 ; P9LE-LABEL: spltRegValConvftoll
3660 ; P8BE-LABEL: spltRegValConvftoll
3661 ; P8LE-LABEL: spltRegValConvftoll
3663 ; P9BE-NEXT: xxspltd v2
3666 ; P9LE-NEXT: xxspltd v2
3669 ; P8BE-NEXT: xxspltd v2
3672 ; P8LE-NEXT: xxspltd v2
3676 ; Function Attrs: norecurse nounwind readonly
3677 define <2 x i64> @spltMemValConvftoll(float* nocapture readonly %ptr) {
3679 %0 = load float, float* %ptr, align 4
3680 %conv = fptosi float %0 to i64
3681 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
3682 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
3683 ret <2 x i64> %splat.splat
3684 ; P9BE-LABEL: spltMemValConvftoll
3685 ; P9LE-LABEL: spltMemValConvftoll
3686 ; P8BE-LABEL: spltMemValConvftoll
3687 ; P8LE-LABEL: spltMemValConvftoll
3689 ; P9BE-NEXT: xscvdpsxds
3690 ; P9BE-NEXT: xxspltd v2
3693 ; P9LE-NEXT: xscvdpsxds
3694 ; P9LE-NEXT: xxspltd v2
3697 ; P8BE-NEXT: xscvdpsxds
3698 ; P8BE-NEXT: xxspltd v2
3701 ; P8LE-NEXT: xscvdpsxds
3702 ; P8LE-NEXT: xxspltd v2
3706 ; Function Attrs: norecurse nounwind readnone
3707 define <2 x i64> @spltCnstConvdtoll() {
3709 ret <2 x i64> <i64 4, i64 4>
3710 ; P9BE-LABEL: spltCnstConvdtoll
3711 ; P9LE-LABEL: spltCnstConvdtoll
3712 ; P8BE-LABEL: spltCnstConvdtoll
3713 ; P8LE-LABEL: spltCnstConvdtoll
3724 ; Function Attrs: norecurse nounwind readnone
3725 define <2 x i64> @fromRegsConvdtoll(double %a, double %b) {
3727 %conv = fptosi double %a to i64
3728 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3729 %conv1 = fptosi double %b to i64
3730 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1
3731 ret <2 x i64> %vecinit2
3732 ; P9BE-LABEL: fromRegsConvdtoll
3733 ; P9LE-LABEL: fromRegsConvdtoll
3734 ; P8BE-LABEL: fromRegsConvdtoll
3735 ; P8LE-LABEL: fromRegsConvdtoll
3737 ; P9BE-NEXT: xvcvdpsxds
3740 ; P9LE-NEXT: xvcvdpsxds
3743 ; P8BE-NEXT: xvcvdpsxds
3746 ; P8LE-NEXT: xvcvdpsxds
3750 ; Function Attrs: norecurse nounwind readnone
3751 define <2 x i64> @fromDiffConstsConvdtoll() {
3753 ret <2 x i64> <i64 24, i64 234>
3754 ; P9BE-LABEL: fromDiffConstsConvdtoll
3755 ; P9LE-LABEL: fromDiffConstsConvdtoll
3756 ; P8BE-LABEL: fromDiffConstsConvdtoll
3757 ; P8LE-LABEL: fromDiffConstsConvdtoll
3768 ; Function Attrs: norecurse nounwind readonly
3769 define <2 x i64> @fromDiffMemConsAConvdtoll(double* nocapture readonly %ptr) {
3771 %0 = bitcast double* %ptr to <2 x double>*
3772 %1 = load <2 x double>, <2 x double>* %0, align 8
3773 %2 = fptosi <2 x double> %1 to <2 x i64>
3775 ; P9BE-LABEL: fromDiffMemConsAConvdtoll
3776 ; P9LE-LABEL: fromDiffMemConsAConvdtoll
3777 ; P8BE-LABEL: fromDiffMemConsAConvdtoll
3778 ; P8LE-LABEL: fromDiffMemConsAConvdtoll
3780 ; P9BE-NEXT: xvcvdpsxds v2
3783 ; P9LE-NEXT: xvcvdpsxds v2
3786 ; P8BE-NEXT: xvcvdpsxds v2
3790 ; P8LE-NEXT: xvcvdpsxds v2
3794 ; Function Attrs: norecurse nounwind readonly
3795 define <2 x i64> @fromDiffMemConsDConvdtoll(double* nocapture readonly %ptr) {
3797 %arrayidx = getelementptr inbounds double, double* %ptr, i64 3
3798 %0 = load double, double* %arrayidx, align 8
3799 %conv = fptosi double %0 to i64
3800 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3801 %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 2
3802 %1 = load double, double* %arrayidx1, align 8
3803 %conv2 = fptosi double %1 to i64
3804 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
3805 ret <2 x i64> %vecinit3
3806 ; P9BE-LABEL: fromDiffMemConsDConvdtoll
3807 ; P9LE-LABEL: fromDiffMemConsDConvdtoll
3808 ; P8BE-LABEL: fromDiffMemConsDConvdtoll
3809 ; P8LE-LABEL: fromDiffMemConsDConvdtoll
3811 ; P9BE-NEXT: xxswapd
3812 ; P9BE-NEXT: xvcvdpsxds v2
3815 ; P9LE-NEXT: xxswapd
3816 ; P9LE-NEXT: xvcvdpsxds v2
3819 ; P8BE-NEXT: xxswapd
3820 ; P8BE-NEXT: xvcvdpsxds v2
3823 ; P8LE-NEXT: xvcvdpsxds v2
3827 ; Function Attrs: norecurse nounwind readonly
3828 define <2 x i64> @fromDiffMemVarAConvdtoll(double* nocapture readonly %arr, i32 signext %elem) {
3830 %idxprom = sext i32 %elem to i64
3831 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
3832 %0 = load double, double* %arrayidx, align 8
3833 %conv = fptosi double %0 to i64
3834 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3835 %add = add nsw i32 %elem, 1
3836 %idxprom1 = sext i32 %add to i64
3837 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1
3838 %1 = load double, double* %arrayidx2, align 8
3839 %conv3 = fptosi double %1 to i64
3840 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
3841 ret <2 x i64> %vecinit4
3842 ; P9BE-LABEL: fromDiffMemVarAConvdtoll
3843 ; P9LE-LABEL: fromDiffMemVarAConvdtoll
3844 ; P8BE-LABEL: fromDiffMemVarAConvdtoll
3845 ; P8LE-LABEL: fromDiffMemVarAConvdtoll
3848 ; P9BE-NEXT: xvcvdpsxds v2
3852 ; P9LE-NEXT: xvcvdpsxds v2
3856 ; P8BE-NEXT: xvcvdpsxds v2
3860 ; P8LE-NEXT: xxswapd
3861 ; P8LE-NEXT: xvcvdpsxds v2
3865 ; Function Attrs: norecurse nounwind readonly
3866 define <2 x i64> @fromDiffMemVarDConvdtoll(double* nocapture readonly %arr, i32 signext %elem) {
3868 %idxprom = sext i32 %elem to i64
3869 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
3870 %0 = load double, double* %arrayidx, align 8
3871 %conv = fptosi double %0 to i64
3872 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3873 %sub = add nsw i32 %elem, -1
3874 %idxprom1 = sext i32 %sub to i64
3875 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1
3876 %1 = load double, double* %arrayidx2, align 8
3877 %conv3 = fptosi double %1 to i64
3878 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
3879 ret <2 x i64> %vecinit4
3880 ; P9BE-LABEL: fromDiffMemVarDConvdtoll
3881 ; P9LE-LABEL: fromDiffMemVarDConvdtoll
3882 ; P8BE-LABEL: fromDiffMemVarDConvdtoll
3883 ; P8LE-LABEL: fromDiffMemVarDConvdtoll
3886 ; P9BE-NEXT: xxswapd
3887 ; P9BE-NEXT: xvcvdpsxds v2
3891 ; P9LE-NEXT: xxswapd
3892 ; P9LE-NEXT: xvcvdpsxds v2
3896 ; P8BE-NEXT: xxswapd
3897 ; P8BE-NEXT: xvcvdpsxds v2
3901 ; P8LE-NEXT: xvcvdpsxds v2
3905 ; Function Attrs: norecurse nounwind readnone
3906 define <2 x i64> @spltRegValConvdtoll(double %val) {
3908 %conv = fptosi double %val to i64
3909 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
3910 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
3911 ret <2 x i64> %splat.splat
3912 ; P9BE-LABEL: spltRegValConvdtoll
3913 ; P9LE-LABEL: spltRegValConvdtoll
3914 ; P8BE-LABEL: spltRegValConvdtoll
3915 ; P8LE-LABEL: spltRegValConvdtoll
3917 ; P9BE-NEXT: xxspltd v2
3920 ; P9LE-NEXT: xxspltd v2
3923 ; P8BE-NEXT: xxspltd v2
3926 ; P8LE-NEXT: xxspltd v2
3930 ; Function Attrs: norecurse nounwind readonly
3931 define <2 x i64> @spltMemValConvdtoll(double* nocapture readonly %ptr) {
3933 %0 = load double, double* %ptr, align 8
3934 %conv = fptosi double %0 to i64
3935 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
3936 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
3937 ret <2 x i64> %splat.splat
3938 ; P9BE-LABEL: spltMemValConvdtoll
3939 ; P9LE-LABEL: spltMemValConvdtoll
3940 ; P8BE-LABEL: spltMemValConvdtoll
3941 ; P8LE-LABEL: spltMemValConvdtoll
3943 ; P9BE-NEXT: xvcvdpsxds
3946 ; P9LE-NEXT: xvcvdpsxds
3949 ; P8BE-NEXT: xvcvdpsxds
3952 ; P8LE-NEXT: xvcvdpsxds
3956 ; Function Attrs: norecurse nounwind readnone
3957 define <2 x i64> @allZeroull() {
3959 ret <2 x i64> zeroinitializer
3960 ; P9BE-LABEL: allZeroull
3961 ; P9LE-LABEL: allZeroull
3962 ; P8BE-LABEL: allZeroull
3963 ; P8LE-LABEL: allZeroull
3964 ; P9BE: xxlxor v2, v2, v2
3966 ; P9LE: xxlxor v2, v2, v2
3968 ; P8BE: xxlxor v2, v2, v2
3970 ; P8LE: xxlxor v2, v2, v2
3974 ; Function Attrs: norecurse nounwind readnone
3975 define <2 x i64> @allOneull() {
3977 ret <2 x i64> <i64 -1, i64 -1>
3978 ; P9BE-LABEL: allOneull
3979 ; P9LE-LABEL: allOneull
3980 ; P8BE-LABEL: allOneull
3981 ; P8LE-LABEL: allOneull
3982 ; P9BE: xxspltib v2, 255
3984 ; P9LE: xxspltib v2, 255
3986 ; P8BE: vspltisb v2, -1
3988 ; P8LE: vspltisb v2, -1
3992 ; Function Attrs: norecurse nounwind readnone
3993 define <2 x i64> @spltConst1ull() {
3995 ret <2 x i64> <i64 1, i64 1>
3996 ; P9BE-LABEL: spltConst1ull
3997 ; P9LE-LABEL: spltConst1ull
3998 ; P8BE-LABEL: spltConst1ull
3999 ; P8LE-LABEL: spltConst1ull
4010 ; Function Attrs: norecurse nounwind readnone
4011 define <2 x i64> @spltConst16kull() {
4013 ret <2 x i64> <i64 32767, i64 32767>
4014 ; P9BE-LABEL: spltConst16kull
4015 ; P9LE-LABEL: spltConst16kull
4016 ; P8BE-LABEL: spltConst16kull
4017 ; P8LE-LABEL: spltConst16kull
4028 ; Function Attrs: norecurse nounwind readnone
4029 define <2 x i64> @spltConst32kull() {
4031 ret <2 x i64> <i64 65535, i64 65535>
4032 ; P9BE-LABEL: spltConst32kull
4033 ; P9LE-LABEL: spltConst32kull
4034 ; P8BE-LABEL: spltConst32kull
4035 ; P8LE-LABEL: spltConst32kull
4046 ; Function Attrs: norecurse nounwind readnone
4047 define <2 x i64> @fromRegsull(i64 %a, i64 %b) {
4049 %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0
4050 %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1
4051 ret <2 x i64> %vecinit1
4052 ; P9BE-LABEL: fromRegsull
4053 ; P9LE-LABEL: fromRegsull
4054 ; P8BE-LABEL: fromRegsull
4055 ; P8LE-LABEL: fromRegsull
4056 ; P9BE: mtvsrdd v2, r3, r4
4058 ; P9LE: mtvsrdd v2, r4, r3
4060 ; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r3
4061 ; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r4
4064 ; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r3
4065 ; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r4
4070 ; Function Attrs: norecurse nounwind readnone
4071 define <2 x i64> @fromDiffConstsull() {
4073 ret <2 x i64> <i64 242, i64 -113>
4074 ; P9BE-LABEL: fromDiffConstsull
4075 ; P9LE-LABEL: fromDiffConstsull
4076 ; P8BE-LABEL: fromDiffConstsull
4077 ; P8LE-LABEL: fromDiffConstsull
4088 ; Function Attrs: norecurse nounwind readonly
4089 define <2 x i64> @fromDiffMemConsAull(i64* nocapture readonly %arr) {
4091 %0 = load i64, i64* %arr, align 8
4092 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
4093 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 1
4094 %1 = load i64, i64* %arrayidx1, align 8
4095 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
4096 ret <2 x i64> %vecinit2
4097 ; P9BE-LABEL: fromDiffMemConsAull
4098 ; P9LE-LABEL: fromDiffMemConsAull
4099 ; P8BE-LABEL: fromDiffMemConsAull
4100 ; P8LE-LABEL: fromDiffMemConsAull
4112 ; Function Attrs: norecurse nounwind readonly
4113 define <2 x i64> @fromDiffMemConsDull(i64* nocapture readonly %arr) {
4115 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 3
4116 %0 = load i64, i64* %arrayidx, align 8
4117 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
4118 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 2
4119 %1 = load i64, i64* %arrayidx1, align 8
4120 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
4121 ret <2 x i64> %vecinit2
4122 ; P9BE-LABEL: fromDiffMemConsDull
4123 ; P9LE-LABEL: fromDiffMemConsDull
4124 ; P8BE-LABEL: fromDiffMemConsDull
4125 ; P8LE-LABEL: fromDiffMemConsDull
4138 ; Function Attrs: norecurse nounwind readonly
4139 define <2 x i64> @fromDiffMemVarAull(i64* nocapture readonly %arr, i32 signext %elem) {
4141 %idxprom = sext i32 %elem to i64
4142 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom
4143 %0 = load i64, i64* %arrayidx, align 8
4144 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
4145 %add = add nsw i32 %elem, 1
4146 %idxprom1 = sext i32 %add to i64
4147 %arrayidx2 = getelementptr inbounds i64, i64* %arr, i64 %idxprom1
4148 %1 = load i64, i64* %arrayidx2, align 8
4149 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
4150 ret <2 x i64> %vecinit3
4151 ; P9BE-LABEL: fromDiffMemVarAull
4152 ; P9LE-LABEL: fromDiffMemVarAull
4153 ; P8BE-LABEL: fromDiffMemVarAull
4154 ; P8LE-LABEL: fromDiffMemVarAull
4170 ; Function Attrs: norecurse nounwind readonly
4171 define <2 x i64> @fromDiffMemVarDull(i64* nocapture readonly %arr, i32 signext %elem) {
4173 %idxprom = sext i32 %elem to i64
4174 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom
4175 %0 = load i64, i64* %arrayidx, align 8
4176 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
4177 %sub = add nsw i32 %elem, -1
4178 %idxprom1 = sext i32 %sub to i64
4179 %arrayidx2 = getelementptr inbounds i64, i64* %arr, i64 %idxprom1
4180 %1 = load i64, i64* %arrayidx2, align 8
4181 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
4182 ret <2 x i64> %vecinit3
4183 ; P9BE-LABEL: fromDiffMemVarDull
4184 ; P9LE-LABEL: fromDiffMemVarDull
4185 ; P8BE-LABEL: fromDiffMemVarDull
4186 ; P8LE-LABEL: fromDiffMemVarDull
4204 ; Function Attrs: norecurse nounwind readonly
4205 define <2 x i64> @fromRandMemConsull(i64* nocapture readonly %arr) {
4207 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 4
4208 %0 = load i64, i64* %arrayidx, align 8
4209 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
4210 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 18
4211 %1 = load i64, i64* %arrayidx1, align 8
4212 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
4213 ret <2 x i64> %vecinit2
4214 ; P9BE-LABEL: fromRandMemConsull
4215 ; P9LE-LABEL: fromRandMemConsull
4216 ; P8BE-LABEL: fromRandMemConsull
4217 ; P8LE-LABEL: fromRandMemConsull
4240 ; Function Attrs: norecurse nounwind readonly
4241 define <2 x i64> @fromRandMemVarull(i64* nocapture readonly %arr, i32 signext %elem) {
4243 %add = add nsw i32 %elem, 4
4244 %idxprom = sext i32 %add to i64
4245 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom
4246 %0 = load i64, i64* %arrayidx, align 8
4247 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
4248 %add1 = add nsw i32 %elem, 1
4249 %idxprom2 = sext i32 %add1 to i64
4250 %arrayidx3 = getelementptr inbounds i64, i64* %arr, i64 %idxprom2
4251 %1 = load i64, i64* %arrayidx3, align 8
4252 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
4253 ret <2 x i64> %vecinit4
4254 ; P9BE-LABEL: fromRandMemVarull
4255 ; P9LE-LABEL: fromRandMemVarull
4256 ; P8BE-LABEL: fromRandMemVarull
4257 ; P8LE-LABEL: fromRandMemVarull
4284 ; Function Attrs: norecurse nounwind readnone
4285 define <2 x i64> @spltRegValull(i64 %val) {
4287 %splat.splatinsert = insertelement <2 x i64> undef, i64 %val, i32 0
4288 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
4289 ret <2 x i64> %splat.splat
4290 ; P9BE-LABEL: spltRegValull
4291 ; P9LE-LABEL: spltRegValull
4292 ; P8BE-LABEL: spltRegValull
4293 ; P8LE-LABEL: spltRegValull
4294 ; P9BE: mtvsrdd v2, r3, r3
4296 ; P9LE: mtvsrdd v2, r3, r3
4298 ; P8BE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3
4299 ; P8BE: xxspltd v2, {{[vsf]+}}[[REG1]], 0
4301 ; P8LE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3
4302 ; P8LE: xxspltd v2, {{[vsf]+}}[[REG1]], 0
4306 ; Function Attrs: norecurse nounwind readonly
4307 define <2 x i64> @spltMemValull(i64* nocapture readonly %ptr) {
4309 %0 = load i64, i64* %ptr, align 8
4310 %splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0
4311 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
4312 ret <2 x i64> %splat.splat
4313 ; P9BE-LABEL: spltMemValull
4314 ; P9LE-LABEL: spltMemValull
4315 ; P8BE-LABEL: spltMemValull
4316 ; P8LE-LABEL: spltMemValull
4327 ; Function Attrs: norecurse nounwind readnone
4328 define <2 x i64> @spltCnstConvftoull() {
4330 ret <2 x i64> <i64 4, i64 4>
4331 ; P9BE-LABEL: spltCnstConvftoull
4332 ; P9LE-LABEL: spltCnstConvftoull
4333 ; P8BE-LABEL: spltCnstConvftoull
4334 ; P8LE-LABEL: spltCnstConvftoull
4345 ; Function Attrs: norecurse nounwind readnone
4346 define <2 x i64> @fromRegsConvftoull(float %a, float %b) {
4348 %conv = fptoui float %a to i64
4349 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4350 %conv1 = fptoui float %b to i64
4351 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1
4352 ret <2 x i64> %vecinit2
4353 ; P9BE-LABEL: fromRegsConvftoull
4354 ; P9LE-LABEL: fromRegsConvftoull
4355 ; P8BE-LABEL: fromRegsConvftoull
4356 ; P8LE-LABEL: fromRegsConvftoull
4358 ; P9BE: xvcvdpuxds v2
4361 ; P9LE: xvcvdpuxds v2
4364 ; P8BE: xvcvdpuxds v2
4367 ; P8LE: xvcvdpuxds v2
4371 ; Function Attrs: norecurse nounwind readnone
4372 define <2 x i64> @fromDiffConstsConvftoull() {
4374 ret <2 x i64> <i64 24, i64 234>
4375 ; P9BE-LABEL: fromDiffConstsConvftoull
4376 ; P9LE-LABEL: fromDiffConstsConvftoull
4377 ; P8BE-LABEL: fromDiffConstsConvftoull
4378 ; P8LE-LABEL: fromDiffConstsConvftoull
4390 ; Function Attrs: norecurse nounwind readonly
4391 define <2 x i64> @fromDiffMemConsAConvftoull(float* nocapture readonly %ptr) {
4393 %0 = load float, float* %ptr, align 4
4394 %conv = fptoui float %0 to i64
4395 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4396 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 1
4397 %1 = load float, float* %arrayidx1, align 4
4398 %conv2 = fptoui float %1 to i64
4399 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
4400 ret <2 x i64> %vecinit3
4401 ; P9BE-LABEL: fromDiffMemConsAConvftoull
4402 ; P9LE-LABEL: fromDiffMemConsAConvftoull
4403 ; P8BE-LABEL: fromDiffMemConsAConvftoull
4404 ; P8LE-LABEL: fromDiffMemConsAConvftoull
4408 ; P9BE-NEXT: xvcvdpuxds v2
4413 ; P9LE-NEXT: xvcvdpuxds v2
4418 ; P8BE-NEXT: xvcvdpuxds v2
4423 ; P8LE-NEXT: xvcvdpuxds v2
4427 ; Function Attrs: norecurse nounwind readonly
4428 define <2 x i64> @fromDiffMemConsDConvftoull(float* nocapture readonly %ptr) {
4430 %arrayidx = getelementptr inbounds float, float* %ptr, i64 3
4431 %0 = load float, float* %arrayidx, align 4
4432 %conv = fptoui float %0 to i64
4433 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4434 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 2
4435 %1 = load float, float* %arrayidx1, align 4
4436 %conv2 = fptoui float %1 to i64
4437 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
4438 ret <2 x i64> %vecinit3
4439 ; P9BE-LABEL: fromDiffMemConsDConvftoull
4440 ; P9LE-LABEL: fromDiffMemConsDConvftoull
4441 ; P8BE-LABEL: fromDiffMemConsDConvftoull
4442 ; P8LE-LABEL: fromDiffMemConsDConvftoull
4446 ; P9BE-NEXT: xvcvdpuxds v2
4451 ; P9LE-NEXT: xvcvdpuxds v2
4456 ; P8BE-NEXT: xvcvdpuxds v2
4461 ; P8LE-NEXT: xvcvdpuxds v2
4465 ; Function Attrs: norecurse nounwind readonly
4466 define <2 x i64> @fromDiffMemVarAConvftoull(float* nocapture readonly %arr, i32 signext %elem) {
4468 %idxprom = sext i32 %elem to i64
4469 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
4470 %0 = load float, float* %arrayidx, align 4
4471 %conv = fptoui float %0 to i64
4472 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4473 %add = add nsw i32 %elem, 1
4474 %idxprom1 = sext i32 %add to i64
4475 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1
4476 %1 = load float, float* %arrayidx2, align 4
4477 %conv3 = fptoui float %1 to i64
4478 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
4479 ret <2 x i64> %vecinit4
4480 ; P9BE-LABEL: fromDiffMemVarAConvftoull
4481 ; P9LE-LABEL: fromDiffMemVarAConvftoull
4482 ; P8BE-LABEL: fromDiffMemVarAConvftoull
4483 ; P8LE-LABEL: fromDiffMemVarAConvftoull
4488 ; P9BE-NEXT: xvcvdpuxds v2
4494 ; P9LE-NEXT: xvcvdpuxds v2
4500 ; P8BE-NEXT: xvcvdpuxds v2
4506 ; P8LE-NEXT: xvcvdpuxds v2
4510 ; Function Attrs: norecurse nounwind readonly
4511 define <2 x i64> @fromDiffMemVarDConvftoull(float* nocapture readonly %arr, i32 signext %elem) {
4513 %idxprom = sext i32 %elem to i64
4514 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
4515 %0 = load float, float* %arrayidx, align 4
4516 %conv = fptoui float %0 to i64
4517 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4518 %sub = add nsw i32 %elem, -1
4519 %idxprom1 = sext i32 %sub to i64
4520 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1
4521 %1 = load float, float* %arrayidx2, align 4
4522 %conv3 = fptoui float %1 to i64
4523 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
4524 ret <2 x i64> %vecinit4
4525 ; P9BE-LABEL: fromDiffMemVarDConvftoull
4526 ; P9LE-LABEL: fromDiffMemVarDConvftoull
4527 ; P8BE-LABEL: fromDiffMemVarDConvftoull
4528 ; P8LE-LABEL: fromDiffMemVarDConvftoull
4533 ; P9BE-NEXT: xvcvdpuxds v2
4539 ; P9LE-NEXT: xvcvdpuxds v2
4545 ; P8BE-NEXT: xvcvdpuxds v2
4551 ; P8LE-NEXT: xvcvdpuxds v2
4555 ; Function Attrs: norecurse nounwind readnone
4556 define <2 x i64> @spltRegValConvftoull(float %val) {
4558 %conv = fptoui float %val to i64
4559 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
4560 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
4561 ret <2 x i64> %splat.splat
4562 ; P9BE-LABEL: spltRegValConvftoull
4563 ; P9LE-LABEL: spltRegValConvftoull
4564 ; P8BE-LABEL: spltRegValConvftoull
4565 ; P8LE-LABEL: spltRegValConvftoull
4567 ; P9BE-NEXT: xxspltd v2
4570 ; P9LE-NEXT: xxspltd v2
4573 ; P8BE-NEXT: xxspltd v2
4576 ; P8LE-NEXT: xxspltd v2
4580 ; Function Attrs: norecurse nounwind readonly
4581 define <2 x i64> @spltMemValConvftoull(float* nocapture readonly %ptr) {
4583 %0 = load float, float* %ptr, align 4
4584 %conv = fptoui float %0 to i64
4585 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
4586 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
4587 ret <2 x i64> %splat.splat
4588 ; P9BE-LABEL: spltMemValConvftoull
4589 ; P9LE-LABEL: spltMemValConvftoull
4590 ; P8BE-LABEL: spltMemValConvftoull
4591 ; P8LE-LABEL: spltMemValConvftoull
4593 ; P9BE-NEXT: xscvdpuxds
4594 ; P9BE-NEXT: xxspltd v2
4597 ; P9LE-NEXT: xscvdpuxds
4598 ; P9LE-NEXT: xxspltd v2
4601 ; P8BE-NEXT: xscvdpuxds
4602 ; P8BE-NEXT: xxspltd v2
4605 ; P8LE-NEXT: xscvdpuxds
4606 ; P8LE-NEXT: xxspltd v2
4610 ; Function Attrs: norecurse nounwind readnone
4611 define <2 x i64> @spltCnstConvdtoull() {
4613 ret <2 x i64> <i64 4, i64 4>
4614 ; P9BE-LABEL: spltCnstConvdtoull
4615 ; P9LE-LABEL: spltCnstConvdtoull
4616 ; P8BE-LABEL: spltCnstConvdtoull
4617 ; P8LE-LABEL: spltCnstConvdtoull
4628 ; Function Attrs: norecurse nounwind readnone
4629 define <2 x i64> @fromRegsConvdtoull(double %a, double %b) {
4631 %conv = fptoui double %a to i64
4632 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4633 %conv1 = fptoui double %b to i64
4634 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1
4635 ret <2 x i64> %vecinit2
4636 ; P9BE-LABEL: fromRegsConvdtoull
4637 ; P9LE-LABEL: fromRegsConvdtoull
4638 ; P8BE-LABEL: fromRegsConvdtoull
4639 ; P8LE-LABEL: fromRegsConvdtoull
4641 ; P9BE-NEXT: xvcvdpuxds
4644 ; P9LE-NEXT: xvcvdpuxds
4647 ; P8BE-NEXT: xvcvdpuxds
4650 ; P8LE-NEXT: xvcvdpuxds
4654 ; Function Attrs: norecurse nounwind readnone
4655 define <2 x i64> @fromDiffConstsConvdtoull() {
4657 ret <2 x i64> <i64 24, i64 234>
4658 ; P9BE-LABEL: fromDiffConstsConvdtoull
4659 ; P9LE-LABEL: fromDiffConstsConvdtoull
4660 ; P8BE-LABEL: fromDiffConstsConvdtoull
4661 ; P8LE-LABEL: fromDiffConstsConvdtoull
4672 ; Function Attrs: norecurse nounwind readonly
4673 define <2 x i64> @fromDiffMemConsAConvdtoull(double* nocapture readonly %ptr) {
4675 %0 = bitcast double* %ptr to <2 x double>*
4676 %1 = load <2 x double>, <2 x double>* %0, align 8
4677 %2 = fptoui <2 x double> %1 to <2 x i64>
4679 ; P9BE-LABEL: fromDiffMemConsAConvdtoull
4680 ; P9LE-LABEL: fromDiffMemConsAConvdtoull
4681 ; P8BE-LABEL: fromDiffMemConsAConvdtoull
4682 ; P8LE-LABEL: fromDiffMemConsAConvdtoull
4684 ; P9BE-NEXT: xvcvdpuxds v2
4687 ; P9LE-NEXT: xvcvdpuxds v2
4690 ; P8BE-NEXT: xvcvdpuxds v2
4694 ; P8LE-NEXT: xvcvdpuxds v2
4698 ; Function Attrs: norecurse nounwind readonly
4699 define <2 x i64> @fromDiffMemConsDConvdtoull(double* nocapture readonly %ptr) {
4701 %arrayidx = getelementptr inbounds double, double* %ptr, i64 3
4702 %0 = load double, double* %arrayidx, align 8
4703 %conv = fptoui double %0 to i64
4704 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4705 %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 2
4706 %1 = load double, double* %arrayidx1, align 8
4707 %conv2 = fptoui double %1 to i64
4708 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
4709 ret <2 x i64> %vecinit3
4710 ; P9BE-LABEL: fromDiffMemConsDConvdtoull
4711 ; P9LE-LABEL: fromDiffMemConsDConvdtoull
4712 ; P8BE-LABEL: fromDiffMemConsDConvdtoull
4713 ; P8LE-LABEL: fromDiffMemConsDConvdtoull
4715 ; P9BE-NEXT: xxswapd
4716 ; P9BE-NEXT: xvcvdpuxds v2
4719 ; P9LE-NEXT: xxswapd
4720 ; P9LE-NEXT: xvcvdpuxds v2
4723 ; P8BE-NEXT: xxswapd
4724 ; P8BE-NEXT: xvcvdpuxds v2
4727 ; P8LE-NEXT: xvcvdpuxds v2
4731 ; Function Attrs: norecurse nounwind readonly
4732 define <2 x i64> @fromDiffMemVarAConvdtoull(double* nocapture readonly %arr, i32 signext %elem) {
4734 %idxprom = sext i32 %elem to i64
4735 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
4736 %0 = load double, double* %arrayidx, align 8
4737 %conv = fptoui double %0 to i64
4738 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4739 %add = add nsw i32 %elem, 1
4740 %idxprom1 = sext i32 %add to i64
4741 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1
4742 %1 = load double, double* %arrayidx2, align 8
4743 %conv3 = fptoui double %1 to i64
4744 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
4745 ret <2 x i64> %vecinit4
4746 ; P9BE-LABEL: fromDiffMemVarAConvdtoull
4747 ; P9LE-LABEL: fromDiffMemVarAConvdtoull
4748 ; P8BE-LABEL: fromDiffMemVarAConvdtoull
4749 ; P8LE-LABEL: fromDiffMemVarAConvdtoull
4752 ; P9BE-NEXT: xvcvdpuxds v2
4756 ; P9LE-NEXT: xvcvdpuxds v2
4760 ; P8BE-NEXT: xvcvdpuxds v2
4764 ; P8LE-NEXT: xxswapd
4765 ; P8LE-NEXT: xvcvdpuxds v2
4769 ; Function Attrs: norecurse nounwind readonly
4770 define <2 x i64> @fromDiffMemVarDConvdtoull(double* nocapture readonly %arr, i32 signext %elem) {
4772 %idxprom = sext i32 %elem to i64
4773 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
4774 %0 = load double, double* %arrayidx, align 8
4775 %conv = fptoui double %0 to i64
4776 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4777 %sub = add nsw i32 %elem, -1
4778 %idxprom1 = sext i32 %sub to i64
4779 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1
4780 %1 = load double, double* %arrayidx2, align 8
4781 %conv3 = fptoui double %1 to i64
4782 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
4783 ret <2 x i64> %vecinit4
4784 ; P9BE-LABEL: fromDiffMemVarDConvdtoull
4785 ; P9LE-LABEL: fromDiffMemVarDConvdtoull
4786 ; P8BE-LABEL: fromDiffMemVarDConvdtoull
4787 ; P8LE-LABEL: fromDiffMemVarDConvdtoull
4790 ; P9BE-NEXT: xxswapd
4791 ; P9BE-NEXT: xvcvdpuxds v2
4795 ; P9LE-NEXT: xxswapd
4796 ; P9LE-NEXT: xvcvdpuxds v2
4800 ; P8BE-NEXT: xxswapd
4801 ; P8BE-NEXT: xvcvdpuxds v2
4805 ; P8LE-NEXT: xvcvdpuxds v2
4809 ; Function Attrs: norecurse nounwind readnone
4810 define <2 x i64> @spltRegValConvdtoull(double %val) {
4812 %conv = fptoui double %val to i64
4813 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
4814 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
4815 ret <2 x i64> %splat.splat
4816 ; P9BE-LABEL: spltRegValConvdtoull
4817 ; P9LE-LABEL: spltRegValConvdtoull
4818 ; P8BE-LABEL: spltRegValConvdtoull
4819 ; P8LE-LABEL: spltRegValConvdtoull
4821 ; P9BE-NEXT: xxspltd v2
4824 ; P9LE-NEXT: xxspltd v2
4827 ; P8BE-NEXT: xxspltd v2
4830 ; P8LE-NEXT: xxspltd v2
4834 ; Function Attrs: norecurse nounwind readonly
4835 define <2 x i64> @spltMemValConvdtoull(double* nocapture readonly %ptr) {
4837 %0 = load double, double* %ptr, align 8
4838 %conv = fptoui double %0 to i64
4839 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
4840 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
4841 ret <2 x i64> %splat.splat
4842 ; P9BE-LABEL: spltMemValConvdtoull
4843 ; P9LE-LABEL: spltMemValConvdtoull
4844 ; P8BE-LABEL: spltMemValConvdtoull
4845 ; P8LE-LABEL: spltMemValConvdtoull
4847 ; P9BE-NEXT: xvcvdpuxds
4850 ; P9LE-NEXT: xvcvdpuxds
4853 ; P8BE-NEXT: xvcvdpuxds
4856 ; P8LE-NEXT: xvcvdpuxds