1 ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
2 ; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \
3 ; RUN: -check-prefix=P9BE -implicit-check-not frsp
4 ; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
5 ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \
6 ; RUN: -check-prefix=P9LE -implicit-check-not frsp
7 ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
8 ; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \
9 ; RUN: -check-prefix=P8BE -implicit-check-not frsp
10 ; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
11 ; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck -allow-deprecated-dag-overlap %s \
12 ; RUN: -check-prefix=P8LE -implicit-check-not frsp
14 ; This test case comes from the following C test case (included as it may be
15 ; slightly more readable than the LLVM IR.
17 ;/* This test case provides various ways of building vectors to ensure we
18 ; produce optimal code for all cases. The cases are (for each type):
21 ; - Splat of a constant
22 ; - From different values already in registers
23 ; - From different constants
24 ; - From different values in memory
25 ; - Splat of a value in register
26 ; - Splat of a value in memory
27 ; - Inserting element into existing vector
28 ; - Inserting element from existing vector into existing vector
30 ; With conversions (float <-> int)
31 ; - Splat of a constant
32 ; - From different values already in registers
33 ; - From different constants
34 ; - From different values in memory
35 ; - Splat of a value in register
36 ; - Splat of a value in memory
37 ; - Inserting element into existing vector
38 ; - Inserting element from existing vector into existing vector
41 ;/*=================================== int ===================================*/
44 ;vector int allZeroi() { //
45 ; return (vector int)0; //
47 ;// P8: vspltisb -1 //
48 ;// P9: xxspltisb 255 //
49 ;vector int allOnei() { //
50 ; return (vector int)-1; //
54 ;vector int spltConst1i() { //
55 ; return (vector int)1; //
57 ;// P8: vspltisw -15; vsrw //
58 ;// P9: vspltisw -15; vsrw //
59 ;vector int spltConst16ki() { //
60 ; return (vector int)((1<<15) - 1); //
62 ;// P8: vspltisw -16; vsrw //
63 ;// P9: vspltisw -16; vsrw //
64 ;vector int spltConst32ki() { //
65 ; return (vector int)((1<<16) - 1); //
67 ;// P8: 4 x mtvsrwz, 2 x xxmrgh, vmrgow //
68 ;// P9: 2 x mtvsrdd, vmrgow //
69 ;vector int fromRegsi(int a, int b, int c, int d) { //
70 ; return (vector int){ a, b, c, d }; //
72 ;// P8: lxvd2x, xxswapd //
73 ;// P9: lxvx (or even lxv) //
74 ;vector int fromDiffConstsi() { //
75 ; return (vector int) { 242, -113, 889, 19 }; //
77 ;// P8: lxvd2x, xxswapd //
79 ;vector int fromDiffMemConsAi(int *arr) { //
80 ; return (vector int) { arr[0], arr[1], arr[2], arr[3] }; //
82 ;// P8: 2 x lxvd2x, 2 x xxswapd, vperm //
83 ;// P9: 2 x lxvx, vperm //
84 ;vector int fromDiffMemConsDi(int *arr) { //
85 ; return (vector int) { arr[3], arr[2], arr[1], arr[0] }; //
87 ;// P8: sldi 2, lxvd2x, xxswapd //
88 ;// P9: sldi 2, lxvx //
89 ;vector int fromDiffMemVarAi(int *arr, int elem) { //
90 ; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; //
92 ;// P8: sldi 2, 2 x lxvd2x, 2 x xxswapd, vperm //
93 ;// P9: sldi 2, 2 x lxvx, vperm //
94 ;vector int fromDiffMemVarDi(int *arr, int elem) { //
95 ; return (vector int) { arr[elem], arr[elem-1], arr[elem-2], arr[elem-3] }; //
97 ;// P8: 4 x lwz, 4 x mtvsrwz, 2 x xxmrghd, vmrgow //
98 ;// P9: 4 x lwz, 2 x mtvsrdd, vmrgow //
99 ;vector int fromRandMemConsi(int *arr) { //
100 ; return (vector int) { arr[4], arr[18], arr[2], arr[88] }; //
102 ;// P8: sldi 2, 4 x lwz, 4 x mtvsrwz, 2 x xxmrghd, vmrgow //
103 ;// P9: sldi 2, add, 4 x lwz, 2 x mtvsrdd, vmrgow //
104 ;vector int fromRandMemVari(int *arr, int elem) { //
105 ; return (vector int) { arr[elem+4], arr[elem+1], arr[elem+2], arr[elem+8] };//
107 ;// P8: mtvsrwz, xxspltw //
109 ;vector int spltRegVali(int val) { //
110 ; return (vector int) val; //
112 ;// P8: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw //
113 ;// P9: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw //
114 ;vector int spltMemVali(int *ptr) { //
115 ; return (vector int)*ptr; //
119 ;vector int spltCnstConvftoi() { //
120 ; return (vector int) 4.74f; //
122 ;// P8: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
123 ;// P9: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
124 ;vector int fromRegsConvftoi(float a, float b, float c, float d) { //
125 ; return (vector int) { a, b, c, d }; //
127 ;// P8: lxvd2x, xxswapd //
128 ;// P9: lxvx (even lxv) //
129 ;vector int fromDiffConstsConvftoi() { //
130 ; return (vector int) { 24.46f, 234.f, 988.19f, 422.39f }; //
132 ;// P8: lxvd2x, xxswapd, xvcvspsxws //
133 ;// P9: lxvx, xvcvspsxws //
134 ;vector int fromDiffMemConsAConvftoi(float *ptr) { //
135 ; return (vector int) { ptr[0], ptr[1], ptr[2], ptr[3] }; //
137 ;// P8: 2 x lxvd2x, 2 x xxswapd, vperm, xvcvspsxws //
138 ;// P9: 2 x lxvx, vperm, xvcvspsxws //
139 ;vector int fromDiffMemConsDConvftoi(float *ptr) { //
140 ; return (vector int) { ptr[3], ptr[2], ptr[1], ptr[0] }; //
142 ;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
143 ;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
144 ;// Note: if the consecutive loads learns to handle pre-inc, this can be: //
145 ;// sldi 2, load, xvcvspuxws //
146 ;vector int fromDiffMemVarAConvftoi(float *arr, int elem) { //
147 ; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; //
149 ;// P8: 4 x lxsspx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
150 ;// P9: 4 x lxssp, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
151 ;// Note: if the consecutive loads learns to handle pre-inc, this can be: //
152 ;// sldi 2, 2 x load, vperm, xvcvspuxws //
153 ;vector int fromDiffMemVarDConvftoi(float *arr, int elem) { //
154 ; return (vector int) { arr[elem], arr[elem-1], arr[elem-2], arr[elem-3] }; //
156 ;// P8: xscvdpsxws, xxspltw //
157 ;// P9: xscvdpsxws, xxspltw //
158 ;vector int spltRegValConvftoi(float val) { //
159 ; return (vector int) val; //
161 ;// P8: lxsspx, xscvdpsxws, xxspltw //
162 ;// P9: lxvwsx, xvcvspsxws //
163 ;vector int spltMemValConvftoi(float *ptr) { //
164 ; return (vector int)*ptr; //
168 ;vector int spltCnstConvdtoi() { //
169 ; return (vector int) 4.74; //
171 ;// P8: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
172 ;// P9: 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
173 ;vector int fromRegsConvdtoi(double a, double b, double c, double d) { //
174 ; return (vector int) { a, b, c, d }; //
176 ;// P8: lxvd2x, xxswapd //
177 ;// P9: lxvx (even lxv) //
178 ;vector int fromDiffConstsConvdtoi() { //
179 ; return (vector int) { 24.46, 234., 988.19, 422.39 }; //
181 ;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspsxws, vmrgew //
182 ;// P9: 2 x lxvx, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspsxws, vmrgew //
183 ;vector int fromDiffMemConsAConvdtoi(double *ptr) { //
184 ; return (vector int) { ptr[0], ptr[1], ptr[2], ptr[3] }; //
186 ;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
187 ;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
188 ;vector int fromDiffMemConsDConvdtoi(double *ptr) { //
189 ; return (vector int) { ptr[3], ptr[2], ptr[1], ptr[0] }; //
191 ;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
192 ;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
193 ;vector int fromDiffMemVarAConvdtoi(double *arr, int elem) { //
194 ; return (vector int) { arr[elem], arr[elem+1], arr[elem+2], arr[elem+3] }; //
196 ;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
197 ;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspsxws, vmrgew //
198 ;vector int fromDiffMemVarDConvdtoi(double *arr, int elem) { //
199 ; return (vector int) { arr[elem], arr[elem-1], arr[elem-2], arr[elem-3] }; //
201 ;// P8: xscvdpsxws, xxspltw //
202 ;// P9: xscvdpsxws, xxspltw //
203 ;vector int spltRegValConvdtoi(double val) { //
204 ; return (vector int) val; //
206 ;// P8: lxsdx, xscvdpsxws, xxspltw //
207 ;// P9: lxssp, xscvdpsxws, xxspltw //
208 ;vector int spltMemValConvdtoi(double *ptr) { //
209 ; return (vector int)*ptr; //
211 ;/*=================================== int ===================================*/
212 ;/*=============================== unsigned int ==============================*/
215 ;vector unsigned int allZeroui() { //
216 ; return (vector unsigned int)0; //
218 ;// P8: vspltisb -1 //
219 ;// P9: xxspltisb 255 //
220 ;vector unsigned int allOneui() { //
221 ; return (vector unsigned int)-1; //
223 ;// P8: vspltisw 1 //
224 ;// P9: vspltisw 1 //
225 ;vector unsigned int spltConst1ui() { //
226 ; return (vector unsigned int)1; //
228 ;// P8: vspltisw -15; vsrw //
229 ;// P9: vspltisw -15; vsrw //
230 ;vector unsigned int spltConst16kui() { //
231 ; return (vector unsigned int)((1<<15) - 1); //
233 ;// P8: vspltisw -16; vsrw //
234 ;// P9: vspltisw -16; vsrw //
235 ;vector unsigned int spltConst32kui() { //
236 ; return (vector unsigned int)((1<<16) - 1); //
238 ;// P8: 4 x mtvsrwz, 2 x xxmrghd, vmrgow //
239 ;// P9: 2 x mtvsrdd, vmrgow //
240 ;vector unsigned int fromRegsui(unsigned int a, unsigned int b, //
241 ; unsigned int c, unsigned int d) { //
242 ; return (vector unsigned int){ a, b, c, d }; //
244 ;// P8: lxvd2x, xxswapd //
245 ;// P9: lxvx (or even lxv) //
246 ;vector unsigned int fromDiffConstsui() { //
247 ; return (vector unsigned int) { 242, -113, 889, 19 }; //
249 ;// P8: lxvd2x, xxswapd //
251 ;vector unsigned int fromDiffMemConsAui(unsigned int *arr) { //
252 ; return (vector unsigned int) { arr[0], arr[1], arr[2], arr[3] }; //
254 ;// P8: 2 x lxvd2x, 2 x xxswapd, vperm //
255 ;// P9: 2 x lxvx, vperm //
256 ;vector unsigned int fromDiffMemConsDui(unsigned int *arr) { //
257 ; return (vector unsigned int) { arr[3], arr[2], arr[1], arr[0] }; //
259 ;// P8: sldi 2, lxvd2x, xxswapd //
260 ;// P9: sldi 2, lxvx //
261 ;vector unsigned int fromDiffMemVarAui(unsigned int *arr, int elem) { //
262 ; return (vector unsigned int) { arr[elem], arr[elem+1], //
263 ; arr[elem+2], arr[elem+3] }; //
265 ;// P8: sldi 2, 2 x lxvd2x, 2 x xxswapd, vperm //
266 ;// P9: sldi 2, 2 x lxvx, vperm //
267 ;vector unsigned int fromDiffMemVarDui(unsigned int *arr, int elem) { //
268 ; return (vector unsigned int) { arr[elem], arr[elem-1], //
269 ; arr[elem-2], arr[elem-3] }; //
271 ;// P8: 4 x lwz, 4 x mtvsrwz, 2 x xxmrghd, vmrgow //
272 ;// P9: 4 x lwz, 2 x mtvsrdd, vmrgow //
273 ;vector unsigned int fromRandMemConsui(unsigned int *arr) { //
274 ; return (vector unsigned int) { arr[4], arr[18], arr[2], arr[88] }; //
276 ;// P8: sldi 2, 4 x lwz, 4 x mtvsrwz, 2 x xxmrghd, vmrgow //
277 ;// P9: sldi 2, add, 4 x lwz, 2 x mtvsrdd, vmrgow //
278 ;vector unsigned int fromRandMemVarui(unsigned int *arr, int elem) { //
279 ; return (vector unsigned int) { arr[elem+4], arr[elem+1], //
280 ; arr[elem+2], arr[elem+8] }; //
282 ;// P8: mtvsrwz, xxspltw //
284 ;vector unsigned int spltRegValui(unsigned int val) { //
285 ; return (vector unsigned int) val; //
287 ;// P8: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw //
288 ;// P9: (LE) lfiwzx, xxpermdi, xxspltw (BE): lfiwzx, xxsldwi, xxspltw //
289 ;vector unsigned int spltMemValui(unsigned int *ptr) { //
290 ; return (vector unsigned int)*ptr; //
294 ;vector unsigned int spltCnstConvftoui() { //
295 ; return (vector unsigned int) 4.74f; //
297 ;// P8: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
298 ;// P9: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
299 ;vector unsigned int fromRegsConvftoui(float a, float b, float c, float d) { //
300 ; return (vector unsigned int) { a, b, c, d }; //
302 ;// P8: lxvd2x, xxswapd //
303 ;// P9: lxvx (even lxv) //
304 ;vector unsigned int fromDiffConstsConvftoui() { //
305 ; return (vector unsigned int) { 24.46f, 234.f, 988.19f, 422.39f }; //
307 ;// P8: lxvd2x, xxswapd, xvcvspuxws //
308 ;// P9: lxvx, xvcvspuxws //
309 ;vector unsigned int fromDiffMemConsAConvftoui(float *ptr) { //
310 ; return (vector unsigned int) { ptr[0], ptr[1], ptr[2], ptr[3] }; //
312 ;// P8: 2 x lxvd2x, 2 x xxswapd, vperm, xvcvspuxws //
313 ;// P9: 2 x lxvx, vperm, xvcvspuxws //
314 ;vector unsigned int fromDiffMemConsDConvftoui(float *ptr) { //
315 ; return (vector unsigned int) { ptr[3], ptr[2], ptr[1], ptr[0] }; //
317 ;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
318 ;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
319 ;// Note: if the consecutive loads learns to handle pre-inc, this can be: //
320 ;// sldi 2, load, xvcvspuxws //
321 ;vector unsigned int fromDiffMemVarAConvftoui(float *arr, int elem) { //
322 ; return (vector unsigned int) { arr[elem], arr[elem+1], //
323 ; arr[elem+2], arr[elem+3] }; //
325 ;// P8: lfsux, 3 x lxsspx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
326 ;// P9: lfsux, 3 x lfs, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
327 ;// Note: if the consecutive loads learns to handle pre-inc, this can be: //
328 ;// sldi 2, 2 x load, vperm, xvcvspuxws //
329 ;vector unsigned int fromDiffMemVarDConvftoui(float *arr, int elem) { //
330 ; return (vector unsigned int) { arr[elem], arr[elem-1], //
331 ; arr[elem-2], arr[elem-3] }; //
333 ;// P8: xscvdpuxws, xxspltw //
334 ;// P9: xscvdpuxws, xxspltw //
335 ;vector unsigned int spltRegValConvftoui(float val) { //
336 ; return (vector unsigned int) val; //
338 ;// P8: lxsspx, xscvdpuxws, xxspltw //
339 ;// P9: lxvwsx, xvcvspuxws //
340 ;vector unsigned int spltMemValConvftoui(float *ptr) { //
341 ; return (vector unsigned int)*ptr; //
345 ;vector unsigned int spltCnstConvdtoui() { //
346 ; return (vector unsigned int) 4.74; //
348 ;// P8: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
349 ;// P9: 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
350 ;vector unsigned int fromRegsConvdtoui(double a, double b, //
351 ; double c, double d) { //
352 ; return (vector unsigned int) { a, b, c, d }; //
354 ;// P8: lxvd2x, xxswapd //
355 ;// P9: lxvx (even lxv) //
356 ;vector unsigned int fromDiffConstsConvdtoui() { //
357 ; return (vector unsigned int) { 24.46, 234., 988.19, 422.39 }; //
359 ;// P8: 2 x lxvd2x, 2 x xxswapd, xxmrgld, xxmrghd, 2 x xvcvspuxws, vmrgew //
360 ;// P9: 2 x lxvx, xxmrgld, xxmrghd, 2 x xvcvspuxws, vmrgew //
361 ;vector unsigned int fromDiffMemConsAConvdtoui(double *ptr) { //
362 ; return (vector unsigned int) { ptr[0], ptr[1], ptr[2], ptr[3] }; //
364 ;// P8: 4 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
365 ;// P9: 4 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
366 ;vector unsigned int fromDiffMemConsDConvdtoui(double *ptr) { //
367 ; return (vector unsigned int) { ptr[3], ptr[2], ptr[1], ptr[0] }; //
369 ;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
370 ;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
371 ;vector unsigned int fromDiffMemVarAConvdtoui(double *arr, int elem) { //
372 ; return (vector unsigned int) { arr[elem], arr[elem+1], //
373 ; arr[elem+2], arr[elem+3] }; //
375 ;// P8: lfdux, 3 x lxsdx, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
376 ;// P9: lfdux, 3 x lfd, 2 x xxmrghd, 2 x xvcvspuxws, vmrgew //
377 ;vector unsigned int fromDiffMemVarDConvdtoui(double *arr, int elem) { //
378 ; return (vector unsigned int) { arr[elem], arr[elem-1], //
379 ; arr[elem-2], arr[elem-3] }; //
381 ;// P8: xscvdpuxws, xxspltw //
382 ;// P9: xscvdpuxws, xxspltw //
383 ;vector unsigned int spltRegValConvdtoui(double val) { //
384 ; return (vector unsigned int) val; //
386 ;// P8: lxsspx, xscvdpuxws, xxspltw //
387 ;// P9: lfd, xscvdpuxws, xxspltw //
388 ;vector unsigned int spltMemValConvdtoui(double *ptr) { //
389 ; return (vector unsigned int)*ptr; //
391 ;/*=============================== unsigned int ==============================*/
392 ;/*=============================== long long =================================*/
395 ;vector long long allZeroll() { //
396 ; return (vector long long)0; //
398 ;// P8: vspltisb -1 //
399 ;// P9: xxspltisb 255 //
400 ;vector long long allOnell() { //
401 ; return (vector long long)-1; //
403 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
404 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
405 ;vector long long spltConst1ll() { //
406 ; return (vector long long)1; //
408 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) //
409 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) //
410 ;vector long long spltConst16kll() { //
411 ; return (vector long long)((1<<15) - 1); //
413 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) //
414 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) //
415 ;vector long long spltConst32kll() { //
416 ; return (vector long long)((1<<16) - 1); //
418 ;// P8: 2 x mtvsrd, xxmrghd //
420 ;vector long long fromRegsll(long long a, long long b) { //
421 ; return (vector long long){ a, b }; //
423 ;// P8: lxvd2x, xxswapd //
424 ;// P9: lxvx (or even lxv) //
425 ;vector long long fromDiffConstsll() { //
426 ; return (vector long long) { 242, -113 }; //
428 ;// P8: lxvd2x, xxswapd //
430 ;vector long long fromDiffMemConsAll(long long *arr) { //
431 ; return (vector long long) { arr[0], arr[1] }; //
434 ;// P9: lxvx, xxswapd (maybe just use lxvd2x) //
435 ;vector long long fromDiffMemConsDll(long long *arr) { //
436 ; return (vector long long) { arr[3], arr[2] }; //
438 ;// P8: sldi 3, lxvd2x, xxswapd //
439 ;// P9: sldi 3, lxvx //
440 ;vector long long fromDiffMemVarAll(long long *arr, int elem) { //
441 ; return (vector long long) { arr[elem], arr[elem+1] }; //
443 ;// P8: sldi 3, lxvd2x //
444 ;// P9: sldi 3, lxvx, xxswapd (maybe just use lxvd2x) //
445 ;vector long long fromDiffMemVarDll(long long *arr, int elem) { //
446 ; return (vector long long) { arr[elem], arr[elem-1] }; //
448 ;// P8: 2 x ld, 2 x mtvsrd, xxmrghd //
449 ;// P9: 2 x ld, mtvsrdd //
450 ;vector long long fromRandMemConsll(long long *arr) { //
451 ; return (vector long long) { arr[4], arr[18] }; //
453 ;// P8: sldi 3, add, 2 x ld, 2 x mtvsrd, xxmrghd //
454 ;// P9: sldi 3, add, 2 x ld, mtvsrdd //
455 ;vector long long fromRandMemVarll(long long *arr, int elem) { //
456 ; return (vector long long) { arr[elem+4], arr[elem+1] }; //
458 ;// P8: mtvsrd, xxspltd //
460 ;vector long long spltRegValll(long long val) { //
461 ; return (vector long long) val; //
465 ;vector long long spltMemValll(long long *ptr) { //
466 ; return (vector long long)*ptr; //
468 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
469 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
470 ;vector long long spltCnstConvftoll() { //
471 ; return (vector long long) 4.74f; //
473 ;// P8: xxmrghd, xvcvdpsxds //
474 ;// P9: xxmrghd, xvcvdpsxds //
475 ;vector long long fromRegsConvftoll(float a, float b) { //
476 ; return (vector long long) { a, b }; //
478 ;// P8: lxvd2x, xxswapd //
479 ;// P9: lxvx (even lxv) //
480 ;vector long long fromDiffConstsConvftoll() { //
481 ; return (vector long long) { 24.46f, 234.f }; //
483 ;// P8: 2 x lxsspx, xxmrghd, xvcvdpsxds //
484 ;// P9: 2 x lxssp, xxmrghd, xvcvdpsxds //
485 ;vector long long fromDiffMemConsAConvftoll(float *ptr) { //
486 ; return (vector long long) { ptr[0], ptr[1] }; //
488 ;// P8: 2 x lxsspx, xxmrghd, xvcvdpsxds //
489 ;// P9: 2 x lxssp, xxmrghd, xvcvdpsxds //
490 ;vector long long fromDiffMemConsDConvftoll(float *ptr) { //
491 ; return (vector long long) { ptr[3], ptr[2] }; //
493 ;// P8: sldi 2, lfsux, lxsspx, xxmrghd, xvcvdpsxds //
494 ;// P9: sldi 2, lfsux, lfs, xxmrghd, xvcvdpsxds //
495 ;vector long long fromDiffMemVarAConvftoll(float *arr, int elem) { //
496 ; return (vector long long) { arr[elem], arr[elem+1] }; //
498 ;// P8: sldi 2, lfsux, lxsspx, xxmrghd, xvcvdpsxds //
499 ;// P9: sldi 2, lfsux, lfs, xxmrghd, xvcvdpsxds //
500 ;vector long long fromDiffMemVarDConvftoll(float *arr, int elem) { //
501 ; return (vector long long) { arr[elem], arr[elem-1] }; //
503 ;// P8: xscvdpsxds, xxspltd //
504 ;// P9: xscvdpsxds, xxspltd //
505 ;vector long long spltRegValConvftoll(float val) { //
506 ; return (vector long long) val; //
508 ;// P8: lxsspx, xscvdpsxds, xxspltd //
509 ;// P9: lfs, xscvdpsxds, xxspltd //
510 ;vector long long spltMemValConvftoll(float *ptr) { //
511 ; return (vector long long)*ptr; //
513 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
514 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
515 ;vector long long spltCnstConvdtoll() { //
516 ; return (vector long long) 4.74; //
518 ;// P8: xxmrghd, xvcvdpsxds //
519 ;// P9: xxmrghd, xvcvdpsxds //
520 ;vector long long fromRegsConvdtoll(double a, double b) { //
521 ; return (vector long long) { a, b }; //
523 ;// P8: lxvd2x, xxswapd //
524 ;// P9: lxvx (even lxv) //
525 ;vector long long fromDiffConstsConvdtoll() { //
526 ; return (vector long long) { 24.46, 234. }; //
528 ;// P8: lxvd2x, xxswapd, xvcvdpsxds //
529 ;// P9: lxvx, xvcvdpsxds //
530 ;vector long long fromDiffMemConsAConvdtoll(double *ptr) { //
531 ; return (vector long long) { ptr[0], ptr[1] }; //
533 ;// P8: lxvd2x, xvcvdpsxds //
534 ;// P9: lxvx, xxswapd, xvcvdpsxds //
535 ;vector long long fromDiffMemConsDConvdtoll(double *ptr) { //
536 ; return (vector long long) { ptr[3], ptr[2] }; //
538 ;// P8: sldi 3, lxvd2x, xxswapd, xvcvdpsxds //
539 ;// P9: sldi 3, lxvx, xvcvdpsxds //
540 ;vector long long fromDiffMemVarAConvdtoll(double *arr, int elem) { //
541 ; return (vector long long) { arr[elem], arr[elem+1] }; //
543 ;// P8: sldi 3, lxvd2x, xvcvdpsxds //
544 ;// P9: sldi 3, lxvx, xxswapd, xvcvdpsxds //
545 ;vector long long fromDiffMemVarDConvdtoll(double *arr, int elem) { //
546 ; return (vector long long) { arr[elem], arr[elem-1] }; //
548 ;// P8: xscvdpsxds, xxspltd //
549 ;// P9: xscvdpsxds, xxspltd //
550 ;vector long long spltRegValConvdtoll(double val) { //
551 ; return (vector long long) val; //
553 ;// P8: lxvdsx, xvcvdpsxds //
554 ;// P9: lxvdsx, xvcvdpsxds //
555 ;vector long long spltMemValConvdtoll(double *ptr) { //
556 ; return (vector long long)*ptr; //
558 ;/*=============================== long long =================================*/
559 ;/*========================== unsigned long long =============================*/
562 ;vector unsigned long long allZeroull() { //
563 ; return (vector unsigned long long)0; //
565 ;// P8: vspltisb -1 //
566 ;// P9: xxspltisb 255 //
567 ;vector unsigned long long allOneull() { //
568 ; return (vector unsigned long long)-1; //
570 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
571 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
572 ;vector unsigned long long spltConst1ull() { //
573 ; return (vector unsigned long long)1; //
575 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) //
576 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) //
577 ;vector unsigned long long spltConst16kull() { //
578 ; return (vector unsigned long long)((1<<15) - 1); //
580 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) //
581 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw, vsrw)) //
582 ;vector unsigned long long spltConst32kull() { //
583 ; return (vector unsigned long long)((1<<16) - 1); //
585 ;// P8: 2 x mtvsrd, xxmrghd //
587 ;vector unsigned long long fromRegsull(unsigned long long a, //
588 ; unsigned long long b) { //
589 ; return (vector unsigned long long){ a, b }; //
591 ;// P8: lxvd2x, xxswapd //
592 ;// P9: lxvx (or even lxv) //
593 ;vector unsigned long long fromDiffConstsull() { //
594 ; return (vector unsigned long long) { 242, -113 }; //
596 ;// P8: lxvd2x, xxswapd //
598 ;vector unsigned long long fromDiffMemConsAull(unsigned long long *arr) { //
599 ; return (vector unsigned long long) { arr[0], arr[1] }; //
602 ;// P9: lxvx, xxswapd (maybe just use lxvd2x) //
603 ;vector unsigned long long fromDiffMemConsDull(unsigned long long *arr) { //
604 ; return (vector unsigned long long) { arr[3], arr[2] }; //
606 ;// P8: sldi 3, lxvd2x, xxswapd //
607 ;// P9: sldi 3, lxvx //
608 ;vector unsigned long long fromDiffMemVarAull(unsigned long long *arr, //
610 ; return (vector unsigned long long) { arr[elem], arr[elem+1] }; //
612 ;// P8: sldi 3, lxvd2x //
613 ;// P9: sldi 3, lxvx, xxswapd (maybe just use lxvd2x) //
614 ;vector unsigned long long fromDiffMemVarDull(unsigned long long *arr, //
616 ; return (vector unsigned long long) { arr[elem], arr[elem-1] }; //
618 ;// P8: 2 x ld, 2 x mtvsrd, xxmrghd //
619 ;// P9: 2 x ld, mtvsrdd //
620 ;vector unsigned long long fromRandMemConsull(unsigned long long *arr) { //
621 ; return (vector unsigned long long) { arr[4], arr[18] }; //
623 ;// P8: sldi 3, add, 2 x ld, 2 x mtvsrd, xxmrghd //
624 ;// P9: sldi 3, add, 2 x ld, mtvsrdd //
625 ;vector unsigned long long fromRandMemVarull(unsigned long long *arr, //
627 ; return (vector unsigned long long) { arr[elem+4], arr[elem+1] }; //
629 ;// P8: mtvsrd, xxspltd //
631 ;vector unsigned long long spltRegValull(unsigned long long val) { //
632 ; return (vector unsigned long long) val; //
636 ;vector unsigned long long spltMemValull(unsigned long long *ptr) { //
637 ; return (vector unsigned long long)*ptr; //
639 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
640 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
641 ;vector unsigned long long spltCnstConvftoull() { //
642 ; return (vector unsigned long long) 4.74f; //
644 ;// P8: xxmrghd, xvcvdpuxds //
645 ;// P9: xxmrghd, xvcvdpuxds //
646 ;vector unsigned long long fromRegsConvftoull(float a, float b) { //
647 ; return (vector unsigned long long) { a, b }; //
649 ;// P8: lxvd2x, xxswapd //
650 ;// P9: lxvx (even lxv) //
651 ;vector unsigned long long fromDiffConstsConvftoull() { //
652 ; return (vector unsigned long long) { 24.46f, 234.f }; //
654 ;// P8: 2 x lxsspx, xxmrghd, xvcvdpuxds //
655 ;// P9: 2 x lxssp, xxmrghd, xvcvdpuxds //
656 ;vector unsigned long long fromDiffMemConsAConvftoull(float *ptr) { //
657 ; return (vector unsigned long long) { ptr[0], ptr[1] }; //
659 ;// P8: 2 x lxsspx, xxmrghd, xvcvdpuxds //
660 ;// P9: 2 x lxssp, xxmrghd, xvcvdpuxds //
661 ;vector unsigned long long fromDiffMemConsDConvftoull(float *ptr) { //
662 ; return (vector unsigned long long) { ptr[3], ptr[2] }; //
664 ;// P8: sldi 2, lfsux, lxsspx, xxmrghd, xvcvdpuxds //
665 ;// P9: sldi 2, lfsux, lfs, xxmrghd, xvcvdpuxds //
666 ;vector unsigned long long fromDiffMemVarAConvftoull(float *arr, int elem) { //
667 ; return (vector unsigned long long) { arr[elem], arr[elem+1] }; //
669 ;// P8: sldi 2, lfsux, lxsspx, xxmrghd, xvcvdpuxds //
670 ;// P9: sldi 2, lfsux, lfs, xxmrghd, xvcvdpuxds //
671 ;vector unsigned long long fromDiffMemVarDConvftoull(float *arr, int elem) { //
672 ; return (vector unsigned long long) { arr[elem], arr[elem-1] }; //
674 ;// P8: xscvdpuxds, xxspltd //
675 ;// P9: xscvdpuxds, xxspltd //
676 ;vector unsigned long long spltRegValConvftoull(float val) { //
677 ; return (vector unsigned long long) val; //
679 ;// P8: lxsspx, xscvdpuxds, xxspltd //
680 ;// P9: lfs, xscvdpuxds, xxspltd //
681 ;vector unsigned long long spltMemValConvftoull(float *ptr) { //
682 ; return (vector unsigned long long)*ptr; //
684 ;// P8: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
685 ;// P9: constant pool load (possible: vmrgew (xxlxor), (vspltisw)) //
686 ;vector unsigned long long spltCnstConvdtoull() { //
687 ; return (vector unsigned long long) 4.74; //
689 ;// P8: xxmrghd, xvcvdpuxds //
690 ;// P9: xxmrghd, xvcvdpuxds //
691 ;vector unsigned long long fromRegsConvdtoull(double a, double b) { //
692 ; return (vector unsigned long long) { a, b }; //
694 ;// P8: lxvd2x, xxswapd //
695 ;// P9: lxvx (even lxv) //
696 ;vector unsigned long long fromDiffConstsConvdtoull() { //
697 ; return (vector unsigned long long) { 24.46, 234. }; //
699 ;// P8: lxvd2x, xxswapd, xvcvdpuxds //
700 ;// P9: lxvx, xvcvdpuxds //
701 ;vector unsigned long long fromDiffMemConsAConvdtoull(double *ptr) { //
702 ; return (vector unsigned long long) { ptr[0], ptr[1] }; //
704 ;// P8: lxvd2x, xvcvdpuxds //
705 ;// P9: lxvx, xxswapd, xvcvdpuxds //
706 ;vector unsigned long long fromDiffMemConsDConvdtoull(double *ptr) { //
707 ; return (vector unsigned long long) { ptr[3], ptr[2] }; //
709 ;// P8: sldi 3, lxvd2x, xxswapd, xvcvdpuxds //
710 ;// P9: sldi 3, lxvx, xvcvdpuxds //
711 ;vector unsigned long long fromDiffMemVarAConvdtoull(double *arr, int elem) { //
712 ; return (vector unsigned long long) { arr[elem], arr[elem+1] }; //
714 ;// P8: sldi 3, lxvd2x, xvcvdpuxds //
715 ;// P9: sldi 3, lxvx, xxswapd, xvcvdpuxds //
716 ;vector unsigned long long fromDiffMemVarDConvdtoull(double *arr, int elem) { //
717 ; return (vector unsigned long long) { arr[elem], arr[elem-1] }; //
719 ;// P8: xscvdpuxds, xxspltd //
720 ;// P9: xscvdpuxds, xxspltd //
721 ;vector unsigned long long spltRegValConvdtoull(double val) { //
722 ; return (vector unsigned long long) val; //
724 ;// P8: lxvdsx, xvcvdpuxds //
725 ;// P9: lxvdsx, xvcvdpuxds //
726 ;vector unsigned long long spltMemValConvdtoull(double *ptr) { //
727 ; return (vector unsigned long long)*ptr; //
729 ;/*========================== unsigned long long ==============================*/
731 ; Function Attrs: norecurse nounwind readnone
732 define <4 x i32> @allZeroi() {
734 ret <4 x i32> zeroinitializer
735 ; P9BE-LABEL: allZeroi
736 ; P9LE-LABEL: allZeroi
737 ; P8BE-LABEL: allZeroi
738 ; P8LE-LABEL: allZeroi
739 ; P9BE: xxlxor v2, v2, v2
741 ; P9LE: xxlxor v2, v2, v2
743 ; P8BE: xxlxor v2, v2, v2
745 ; P8LE: xxlxor v2, v2, v2
749 ; Function Attrs: norecurse nounwind readnone
750 define <4 x i32> @allOnei() {
752 ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
753 ; P9BE-LABEL: allOnei
754 ; P9LE-LABEL: allOnei
755 ; P8BE-LABEL: allOnei
756 ; P8LE-LABEL: allOnei
757 ; P9BE: xxspltib v2, 255
759 ; P9LE: xxspltib v2, 255
761 ; P8BE: vspltisb v2, -1
763 ; P8LE: vspltisb v2, -1
767 ; Function Attrs: norecurse nounwind readnone
768 define <4 x i32> @spltConst1i() {
770 ret <4 x i32> <i32 1, i32 1, i32 1, i32 1>
771 ; P9BE-LABEL: spltConst1i
772 ; P9LE-LABEL: spltConst1i
773 ; P8BE-LABEL: spltConst1i
774 ; P8LE-LABEL: spltConst1i
775 ; P9BE: vspltisw v2, 1
777 ; P9LE: vspltisw v2, 1
779 ; P8BE: vspltisw v2, 1
781 ; P8LE: vspltisw v2, 1
785 ; Function Attrs: norecurse nounwind readnone
786 define <4 x i32> @spltConst16ki() {
788 ret <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
789 ; P9BE-LABEL: spltConst16ki
790 ; P9LE-LABEL: spltConst16ki
791 ; P8BE-LABEL: spltConst16ki
792 ; P8LE-LABEL: spltConst16ki
793 ; P9BE: vspltisw v2, -15
794 ; P9BE: vsrw v2, v2, v2
796 ; P9LE: vspltisw v2, -15
797 ; P9LE: vsrw v2, v2, v2
799 ; P8BE: vspltisw v2, -15
800 ; P8BE: vsrw v2, v2, v2
802 ; P8LE: vspltisw v2, -15
803 ; P8LE: vsrw v2, v2, v2
807 ; Function Attrs: norecurse nounwind readnone
808 define <4 x i32> @spltConst32ki() {
810 ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
811 ; P9BE-LABEL: spltConst32ki
812 ; P9LE-LABEL: spltConst32ki
813 ; P8BE-LABEL: spltConst32ki
814 ; P8LE-LABEL: spltConst32ki
815 ; P9BE: vspltisw v2, -16
816 ; P9BE: vsrw v2, v2, v2
818 ; P9LE: vspltisw v2, -16
819 ; P9LE: vsrw v2, v2, v2
821 ; P8BE: vspltisw v2, -16
822 ; P8BE: vsrw v2, v2, v2
824 ; P8LE: vspltisw v2, -16
825 ; P8LE: vsrw v2, v2, v2
829 ; Function Attrs: norecurse nounwind readnone
830 define <4 x i32> @fromRegsi(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d) {
832 %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0
833 %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
834 %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %c, i32 2
835 %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %d, i32 3
836 ret <4 x i32> %vecinit3
837 ; P9BE-LABEL: fromRegsi
838 ; P9LE-LABEL: fromRegsi
839 ; P8BE-LABEL: fromRegsi
840 ; P8LE-LABEL: fromRegsi
841 ; P9BE-DAG: rldimi r6, r5, 32, 0
842 ; P9BE-DAG: rldimi r4, r3, 32, 0
843 ; P9BE: mtvsrdd v2, r4, r6
845 ; P9LE-DAG: rldimi r3, r4, 32, 0
846 ; P9LE-DAG: rldimi r5, r6, 32, 0
847 ; P9LE: mtvsrdd v2, r5, r3
849 ; P8BE-DAG: rldimi r6, r5, 32, 0
850 ; P8BE-DAG: rldimi r4, r3, 32, 0
851 ; P8BE-DAG: mtvsrd f[[REG1:[0-9]+]], r6
852 ; P8BE-DAG: mtvsrd f[[REG2:[0-9]+]], r4
853 ; P8BE-DAG: xxmrghd v2, vs[[REG2]], vs[[REG1]]
855 ; P8LE-DAG: rldimi r3, r4, 32, 0
856 ; P8LE-DAG: rldimi r5, r6, 32, 0
857 ; P8LE-DAG: mtvsrd f[[REG1:[0-9]+]], r3
858 ; P8LE-DAG: mtvsrd f[[REG2:[0-9]+]], r5
859 ; P8LE-DAG: xxmrghd v2, vs[[REG2]], vs[[REG1]]
863 ; Function Attrs: norecurse nounwind readnone
864 define <4 x i32> @fromDiffConstsi() {
866 ret <4 x i32> <i32 242, i32 -113, i32 889, i32 19>
867 ; P9BE-LABEL: fromDiffConstsi
868 ; P9LE-LABEL: fromDiffConstsi
869 ; P8BE-LABEL: fromDiffConstsi
870 ; P8LE-LABEL: fromDiffConstsi
882 ; Function Attrs: norecurse nounwind readonly
883 define <4 x i32> @fromDiffMemConsAi(i32* nocapture readonly %arr) {
885 %0 = load i32, i32* %arr, align 4
886 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
887 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 1
888 %1 = load i32, i32* %arrayidx1, align 4
889 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
890 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 2
891 %2 = load i32, i32* %arrayidx3, align 4
892 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2
893 %arrayidx5 = getelementptr inbounds i32, i32* %arr, i64 3
894 %3 = load i32, i32* %arrayidx5, align 4
895 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
896 ret <4 x i32> %vecinit6
897 ; P9BE-LABEL: fromDiffMemConsAi
898 ; P9LE-LABEL: fromDiffMemConsAi
899 ; P8BE-LABEL: fromDiffMemConsAi
900 ; P8LE-LABEL: fromDiffMemConsAi
912 ; Function Attrs: norecurse nounwind readonly
913 define <4 x i32> @fromDiffMemConsDi(i32* nocapture readonly %arr) {
915 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3
916 %0 = load i32, i32* %arrayidx, align 4
917 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
918 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 2
919 %1 = load i32, i32* %arrayidx1, align 4
920 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
921 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 1
922 %2 = load i32, i32* %arrayidx3, align 4
923 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2
924 %3 = load i32, i32* %arr, align 4
925 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
926 ret <4 x i32> %vecinit6
927 ; P9BE-LABEL: fromDiffMemConsDi
928 ; P9LE-LABEL: fromDiffMemConsDi
929 ; P8BE-LABEL: fromDiffMemConsDi
930 ; P8LE-LABEL: fromDiffMemConsDi
950 ; Function Attrs: norecurse nounwind readonly
951 define <4 x i32> @fromDiffMemVarAi(i32* nocapture readonly %arr, i32 signext %elem) {
953 %idxprom = sext i32 %elem to i64
954 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom
955 %0 = load i32, i32* %arrayidx, align 4
956 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
957 %add = add nsw i32 %elem, 1
958 %idxprom1 = sext i32 %add to i64
959 %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1
960 %1 = load i32, i32* %arrayidx2, align 4
961 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
962 %add4 = add nsw i32 %elem, 2
963 %idxprom5 = sext i32 %add4 to i64
964 %arrayidx6 = getelementptr inbounds i32, i32* %arr, i64 %idxprom5
965 %2 = load i32, i32* %arrayidx6, align 4
966 %vecinit7 = insertelement <4 x i32> %vecinit3, i32 %2, i32 2
967 %add8 = add nsw i32 %elem, 3
968 %idxprom9 = sext i32 %add8 to i64
969 %arrayidx10 = getelementptr inbounds i32, i32* %arr, i64 %idxprom9
970 %3 = load i32, i32* %arrayidx10, align 4
971 %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3
972 ret <4 x i32> %vecinit11
973 ; P9BE-LABEL: fromDiffMemVarAi
974 ; P9LE-LABEL: fromDiffMemVarAi
975 ; P8BE-LABEL: fromDiffMemVarAi
976 ; P8LE-LABEL: fromDiffMemVarAi
977 ; P9BE: sldi r4, r4, 2
978 ; P9BE: lxvx v2, r3, r4
980 ; P9LE: sldi r4, r4, 2
981 ; P9LE: lxvx v2, r3, r4
983 ; P8BE: sldi r4, r4, 2
984 ; P8BE: lxvw4x {{[vs0-9]+}}, r3, r4
986 ; P8LE: sldi r4, r4, 2
987 ; P8LE: lxvd2x {{[vs0-9]+}}, r3, r4
992 ; Function Attrs: norecurse nounwind readonly
993 define <4 x i32> @fromDiffMemVarDi(i32* nocapture readonly %arr, i32 signext %elem) {
995 %idxprom = sext i32 %elem to i64
996 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom
997 %0 = load i32, i32* %arrayidx, align 4
998 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
999 %sub = add nsw i32 %elem, -1
1000 %idxprom1 = sext i32 %sub to i64
1001 %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1
1002 %1 = load i32, i32* %arrayidx2, align 4
1003 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
1004 %sub4 = add nsw i32 %elem, -2
1005 %idxprom5 = sext i32 %sub4 to i64
1006 %arrayidx6 = getelementptr inbounds i32, i32* %arr, i64 %idxprom5
1007 %2 = load i32, i32* %arrayidx6, align 4
1008 %vecinit7 = insertelement <4 x i32> %vecinit3, i32 %2, i32 2
1009 %sub8 = add nsw i32 %elem, -3
1010 %idxprom9 = sext i32 %sub8 to i64
1011 %arrayidx10 = getelementptr inbounds i32, i32* %arr, i64 %idxprom9
1012 %3 = load i32, i32* %arrayidx10, align 4
1013 %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3
1014 ret <4 x i32> %vecinit11
1015 ; P9BE-LABEL: fromDiffMemVarDi
1016 ; P9LE-LABEL: fromDiffMemVarDi
1017 ; P8BE-LABEL: fromDiffMemVarDi
1018 ; P8LE-LABEL: fromDiffMemVarDi
1019 ; P9BE: sldi {{r[0-9]+}}, r4, 2
1020 ; P9BE-DAG: lxvx {{v[0-9]+}}
1024 ; P9LE: sldi {{r[0-9]+}}, r4, 2
1025 ; P9LE-DAG: lxvx {{v[0-9]+}}
1029 ; P8BE: sldi {{r[0-9]+}}, r4, 2
1030 ; P8BE-DAG: lxvw4x {{v[0-9]+}}, 0, r3
1034 ; P8LE: sldi {{r[0-9]+}}, r4, 2
1042 ; Function Attrs: norecurse nounwind readonly
1043 define <4 x i32> @fromRandMemConsi(i32* nocapture readonly %arr) {
1045 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 4
1046 %0 = load i32, i32* %arrayidx, align 4
1047 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
1048 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 18
1049 %1 = load i32, i32* %arrayidx1, align 4
1050 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
1051 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 2
1052 %2 = load i32, i32* %arrayidx3, align 4
1053 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2
1054 %arrayidx5 = getelementptr inbounds i32, i32* %arr, i64 88
1055 %3 = load i32, i32* %arrayidx5, align 4
1056 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
1057 ret <4 x i32> %vecinit6
1058 ; P9BE-LABEL: fromRandMemConsi
1059 ; P9LE-LABEL: fromRandMemConsi
1060 ; P8BE-LABEL: fromRandMemConsi
1061 ; P8LE-LABEL: fromRandMemConsi
1096 ; Function Attrs: norecurse nounwind readonly
1097 define <4 x i32> @fromRandMemVari(i32* nocapture readonly %arr, i32 signext %elem) {
1099 %add = add nsw i32 %elem, 4
1100 %idxprom = sext i32 %add to i64
1101 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom
1102 %0 = load i32, i32* %arrayidx, align 4
1103 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
1104 %add1 = add nsw i32 %elem, 1
1105 %idxprom2 = sext i32 %add1 to i64
1106 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 %idxprom2
1107 %1 = load i32, i32* %arrayidx3, align 4
1108 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
1109 %add5 = add nsw i32 %elem, 2
1110 %idxprom6 = sext i32 %add5 to i64
1111 %arrayidx7 = getelementptr inbounds i32, i32* %arr, i64 %idxprom6
1112 %2 = load i32, i32* %arrayidx7, align 4
1113 %vecinit8 = insertelement <4 x i32> %vecinit4, i32 %2, i32 2
1114 %add9 = add nsw i32 %elem, 8
1115 %idxprom10 = sext i32 %add9 to i64
1116 %arrayidx11 = getelementptr inbounds i32, i32* %arr, i64 %idxprom10
1117 %3 = load i32, i32* %arrayidx11, align 4
1118 %vecinit12 = insertelement <4 x i32> %vecinit8, i32 %3, i32 3
1119 ret <4 x i32> %vecinit12
1120 ; P9BE-LABEL: fromRandMemVari
1121 ; P9LE-LABEL: fromRandMemVari
1122 ; P8BE-LABEL: fromRandMemVari
1123 ; P8LE-LABEL: fromRandMemVari
1124 ; P9BE: sldi r4, r4, 2
1132 ; P9LE: sldi r4, r4, 2
1140 ; P8BE: sldi r4, r4, 2
1150 ; P8LE: sldi r4, r4, 2
1162 ; Function Attrs: norecurse nounwind readnone
1163 define <4 x i32> @spltRegVali(i32 signext %val) {
1165 %splat.splatinsert = insertelement <4 x i32> undef, i32 %val, i32 0
1166 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
1167 ret <4 x i32> %splat.splat
1168 ; P9BE-LABEL: spltRegVali
1169 ; P9LE-LABEL: spltRegVali
1170 ; P8BE-LABEL: spltRegVali
1171 ; P8LE-LABEL: spltRegVali
1172 ; P9BE: mtvsrws v2, r3
1174 ; P9LE: mtvsrws v2, r3
1176 ; P8BE: mtvsrwz {{[vsf0-9]+}}, r3
1177 ; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1
1179 ; P8LE: mtvsrwz {{[vsf0-9]+}}, r3
1180 ; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1
1184 ; Function Attrs: norecurse nounwind readonly
1185 define <4 x i32> @spltMemVali(i32* nocapture readonly %ptr) {
1187 %0 = load i32, i32* %ptr, align 4
1188 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
1189 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
1190 ret <4 x i32> %splat.splat
1191 ; P9BE-LABEL: spltMemVali
1192 ; P9LE-LABEL: spltMemVali
1193 ; P8BE-LABEL: spltMemVali
1194 ; P8LE-LABEL: spltMemVali
1195 ; P9BE: lfiwzx f0, 0, r3
1196 ; P9BE: xxsldwi vs0, f0, f0, 1
1197 ; P9BE: xxspltw v2, vs0, 0
1199 ; P9LE: lfiwzx f0, 0, r3
1200 ; P9LE: xxpermdi vs0, f0, f0, 2
1201 ; P9LE: xxspltw v2, vs0, 3
1203 ; P8BE: lfiwzx f0, 0, r3
1204 ; P8BE: xxsldwi vs0, f0, f0, 1
1205 ; P8BE: xxspltw v2, vs0, 0
1207 ; P8LE: lfiwzx f0, 0, r3
1208 ; P8LE: xxpermdi vs0, f0, f0, 2
1209 ; P8LE: xxspltw v2, vs0, 3
1213 ; Function Attrs: norecurse nounwind readnone
1214 define <4 x i32> @spltCnstConvftoi() {
1216 ret <4 x i32> <i32 4, i32 4, i32 4, i32 4>
1217 ; P9BE-LABEL: spltCnstConvftoi
1218 ; P9LE-LABEL: spltCnstConvftoi
1219 ; P8BE-LABEL: spltCnstConvftoi
1220 ; P8LE-LABEL: spltCnstConvftoi
1221 ; P9BE: vspltisw v2, 4
1223 ; P9LE: vspltisw v2, 4
1225 ; P8BE: vspltisw v2, 4
1227 ; P8LE: vspltisw v2, 4
1231 ; Function Attrs: norecurse nounwind readnone
1232 define <4 x i32> @fromRegsConvftoi(float %a, float %b, float %c, float %d) {
1234 %conv = fptosi float %a to i32
1235 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
1236 %conv1 = fptosi float %b to i32
1237 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %conv1, i32 1
1238 %conv3 = fptosi float %c to i32
1239 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %conv3, i32 2
1240 %conv5 = fptosi float %d to i32
1241 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3
1242 ret <4 x i32> %vecinit6
1243 ; P9BE-LABEL: fromRegsConvftoi
1244 ; P9LE-LABEL: fromRegsConvftoi
1245 ; P8BE-LABEL: fromRegsConvftoi
1246 ; P8LE-LABEL: fromRegsConvftoi
1247 ; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
1248 ; P9BE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
1249 ; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
1250 ; P9BE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
1251 ; P9BE: vmrgew v2, [[REG3]], [[REG4]]
1252 ; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
1253 ; P9LE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
1254 ; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
1255 ; P9LE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
1256 ; P9LE: vmrgew v2, [[REG4]], [[REG3]]
1257 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
1258 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
1259 ; P8BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
1260 ; P8BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
1261 ; P8BE: vmrgew v2, [[REG3]], [[REG4]]
1262 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
1263 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
1264 ; P8LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
1265 ; P8LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
1266 ; P8LE: vmrgew v2, [[REG4]], [[REG3]]
1269 ; Function Attrs: norecurse nounwind readnone
1270 define <4 x i32> @fromDiffConstsConvftoi() {
1272 ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
1273 ; P9BE-LABEL: fromDiffConstsConvftoi
1274 ; P9LE-LABEL: fromDiffConstsConvftoi
1275 ; P8BE-LABEL: fromDiffConstsConvftoi
1276 ; P8LE-LABEL: fromDiffConstsConvftoi
1288 ; Function Attrs: norecurse nounwind readonly
1289 define <4 x i32> @fromDiffMemConsAConvftoi(float* nocapture readonly %ptr) {
1291 %0 = bitcast float* %ptr to <4 x float>*
1292 %1 = load <4 x float>, <4 x float>* %0, align 4
1293 %2 = fptosi <4 x float> %1 to <4 x i32>
1295 ; P9BE-LABEL: fromDiffMemConsAConvftoi
1296 ; P9LE-LABEL: fromDiffMemConsAConvftoi
1297 ; P8BE-LABEL: fromDiffMemConsAConvftoi
1298 ; P8LE-LABEL: fromDiffMemConsAConvftoi
1299 ; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3)
1300 ; P9BE: xvcvspsxws v2, [[REG1]]
1302 ; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3)
1303 ; P9LE: xvcvspsxws v2, [[REG1]]
1305 ; P8BE: lxvw4x [[REG1:[vs0-9]+]], 0, r3
1306 ; P8BE: xvcvspsxws v2, [[REG1]]
1308 ; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
1310 ; P8LE: xvcvspsxws v2, v2
1314 ; Function Attrs: norecurse nounwind readonly
1315 define <4 x i32> @fromDiffMemConsDConvftoi(float* nocapture readonly %ptr) {
1317 %arrayidx = getelementptr inbounds float, float* %ptr, i64 3
1318 %0 = load float, float* %arrayidx, align 4
1319 %conv = fptosi float %0 to i32
1320 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
1321 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 2
1322 %1 = load float, float* %arrayidx1, align 4
1323 %conv2 = fptosi float %1 to i32
1324 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
1325 %arrayidx4 = getelementptr inbounds float, float* %ptr, i64 1
1326 %2 = load float, float* %arrayidx4, align 4
1327 %conv5 = fptosi float %2 to i32
1328 %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
1329 %3 = load float, float* %ptr, align 4
1330 %conv8 = fptosi float %3 to i32
1331 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
1332 ret <4 x i32> %vecinit9
1333 ; P9BE-LABEL: fromDiffMemConsDConvftoi
1334 ; P9LE-LABEL: fromDiffMemConsDConvftoi
1335 ; P8BE-LABEL: fromDiffMemConsDConvftoi
1336 ; P8LE-LABEL: fromDiffMemConsDConvftoi
1360 ; Function Attrs: norecurse nounwind readonly
1361 define <4 x i32> @fromDiffMemVarAConvftoi(float* nocapture readonly %arr, i32 signext %elem) {
1363 %idxprom = sext i32 %elem to i64
1364 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
1365 %0 = load float, float* %arrayidx, align 4
1366 %conv = fptosi float %0 to i32
1367 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
1368 %add = add nsw i32 %elem, 1
1369 %idxprom1 = sext i32 %add to i64
1370 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1
1371 %1 = load float, float* %arrayidx2, align 4
1372 %conv3 = fptosi float %1 to i32
1373 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1
1374 %add5 = add nsw i32 %elem, 2
1375 %idxprom6 = sext i32 %add5 to i64
1376 %arrayidx7 = getelementptr inbounds float, float* %arr, i64 %idxprom6
1377 %2 = load float, float* %arrayidx7, align 4
1378 %conv8 = fptosi float %2 to i32
1379 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2
1380 %add10 = add nsw i32 %elem, 3
1381 %idxprom11 = sext i32 %add10 to i64
1382 %arrayidx12 = getelementptr inbounds float, float* %arr, i64 %idxprom11
1383 %3 = load float, float* %arrayidx12, align 4
1384 %conv13 = fptosi float %3 to i32
1385 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
1386 ret <4 x i32> %vecinit14
1387 ; P9BE-LABEL: fromDiffMemVarAConvftoi
1388 ; P9LE-LABEL: fromDiffMemVarAConvftoi
1389 ; P8BE-LABEL: fromDiffMemVarAConvftoi
1390 ; P8LE-LABEL: fromDiffMemVarAConvftoi
1391 ; FIXME: implement finding consecutive loads with pre-inc
1398 ; Function Attrs: norecurse nounwind readonly
1399 define <4 x i32> @fromDiffMemVarDConvftoi(float* nocapture readonly %arr, i32 signext %elem) {
1401 %idxprom = sext i32 %elem to i64
1402 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
1403 %0 = load float, float* %arrayidx, align 4
1404 %conv = fptosi float %0 to i32
1405 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
1406 %sub = add nsw i32 %elem, -1
1407 %idxprom1 = sext i32 %sub to i64
1408 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1
1409 %1 = load float, float* %arrayidx2, align 4
1410 %conv3 = fptosi float %1 to i32
1411 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1
1412 %sub5 = add nsw i32 %elem, -2
1413 %idxprom6 = sext i32 %sub5 to i64
1414 %arrayidx7 = getelementptr inbounds float, float* %arr, i64 %idxprom6
1415 %2 = load float, float* %arrayidx7, align 4
1416 %conv8 = fptosi float %2 to i32
1417 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2
1418 %sub10 = add nsw i32 %elem, -3
1419 %idxprom11 = sext i32 %sub10 to i64
1420 %arrayidx12 = getelementptr inbounds float, float* %arr, i64 %idxprom11
1421 %3 = load float, float* %arrayidx12, align 4
1422 %conv13 = fptosi float %3 to i32
1423 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
1424 ret <4 x i32> %vecinit14
1425 ; P9BE-LABEL: fromDiffMemVarDConvftoi
1426 ; P9LE-LABEL: fromDiffMemVarDConvftoi
1427 ; P8BE-LABEL: fromDiffMemVarDConvftoi
1428 ; P8LE-LABEL: fromDiffMemVarDConvftoi
1429 ; FIXME: implement finding consecutive loads with pre-inc
1436 ; Function Attrs: norecurse nounwind readnone
1437 define <4 x i32> @spltRegValConvftoi(float %val) {
1439 %conv = fptosi float %val to i32
1440 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
1441 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
1442 ret <4 x i32> %splat.splat
1443 ; P9BE-LABEL: spltRegValConvftoi
1444 ; P9LE-LABEL: spltRegValConvftoi
1445 ; P8BE-LABEL: spltRegValConvftoi
1446 ; P8LE-LABEL: spltRegValConvftoi
1447 ; P9BE: xscvdpsxws f[[REG1:[0-9]+]], f1
1448 ; P9BE: xxspltw v2, vs[[REG1]], 1
1450 ; P9LE: xscvdpsxws f[[REG1:[0-9]+]], f1
1451 ; P9LE: xxspltw v2, vs[[REG1]], 1
1453 ; P8BE: xscvdpsxws f[[REG1:[0-9]+]], f1
1454 ; P8BE: xxspltw v2, vs[[REG1]], 1
1456 ; P8LE: xscvdpsxws f[[REG1:[0-9]+]], f1
1457 ; P8LE: xxspltw v2, vs[[REG1]], 1
1461 ; Function Attrs: norecurse nounwind readonly
1462 define <4 x i32> @spltMemValConvftoi(float* nocapture readonly %ptr) {
1464 %0 = load float, float* %ptr, align 4
1465 %conv = fptosi float %0 to i32
1466 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
1467 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
1468 ret <4 x i32> %splat.splat
1469 ; P9BE-LABEL: spltMemValConvftoi
1470 ; P9LE-LABEL: spltMemValConvftoi
1471 ; P8BE-LABEL: spltMemValConvftoi
1472 ; P8LE-LABEL: spltMemValConvftoi
1473 ; P9BE: lxvwsx [[REG1:[vs0-9]+]], 0, r3
1474 ; P9BE: xvcvspsxws v2, [[REG1]]
1475 ; P9LE: [[REG1:[vs0-9]+]], 0, r3
1476 ; P9LE: xvcvspsxws v2, [[REG1]]
1477 ; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3
1478 ; P8BE: xscvdpsxws f[[REG2:[0-9]+]], [[REG1]]
1479 ; P8BE: xxspltw v2, vs[[REG2]], 1
1480 ; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3
1481 ; P8LE: xscvdpsxws f[[REG2:[vs0-9]+]], [[REG1]]
1482 ; P8LE: xxspltw v2, vs[[REG2]], 1
1485 ; Function Attrs: norecurse nounwind readnone
1486 define <4 x i32> @spltCnstConvdtoi() {
1488 ret <4 x i32> <i32 4, i32 4, i32 4, i32 4>
1489 ; P9BE-LABEL: spltCnstConvdtoi
1490 ; P9LE-LABEL: spltCnstConvdtoi
1491 ; P8BE-LABEL: spltCnstConvdtoi
1492 ; P8LE-LABEL: spltCnstConvdtoi
1493 ; P9BE: vspltisw v2, 4
1495 ; P9LE: vspltisw v2, 4
1497 ; P8BE: vspltisw v2, 4
1499 ; P8LE: vspltisw v2, 4
1503 ; Function Attrs: norecurse nounwind readnone
1504 define <4 x i32> @fromRegsConvdtoi(double %a, double %b, double %c, double %d) {
1506 %conv = fptosi double %a to i32
1507 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
1508 %conv1 = fptosi double %b to i32
1509 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %conv1, i32 1
1510 %conv3 = fptosi double %c to i32
1511 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %conv3, i32 2
1512 %conv5 = fptosi double %d to i32
1513 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3
1514 ret <4 x i32> %vecinit6
1515 ; P9BE-LABEL: fromRegsConvdtoi
1516 ; P9LE-LABEL: fromRegsConvdtoi
1517 ; P8BE-LABEL: fromRegsConvdtoi
1518 ; P8LE-LABEL: fromRegsConvdtoi
1519 ; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
1520 ; P9BE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
1521 ; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
1522 ; P9BE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
1523 ; P9BE: vmrgew v2, [[REG3]], [[REG4]]
1524 ; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
1525 ; P9LE: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
1526 ; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
1527 ; P9LE: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
1528 ; P9LE: vmrgew v2, [[REG4]], [[REG3]]
1529 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
1530 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
1531 ; P8BE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
1532 ; P8BE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
1533 ; P8BE: vmrgew v2, [[REG3]], [[REG4]]
1534 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
1535 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
1536 ; P8LE-DAG: xvcvdpsxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
1537 ; P8LE-DAG: xvcvdpsxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
1538 ; P8LE: vmrgew v2, [[REG4]], [[REG3]]
1541 ; Function Attrs: norecurse nounwind readnone
1542 define <4 x i32> @fromDiffConstsConvdtoi() {
1544 ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
1545 ; P9BE-LABEL: fromDiffConstsConvdtoi
1546 ; P9LE-LABEL: fromDiffConstsConvdtoi
1547 ; P8BE-LABEL: fromDiffConstsConvdtoi
1548 ; P8LE-LABEL: fromDiffConstsConvdtoi
1560 ; Function Attrs: norecurse nounwind readonly
1561 define <4 x i32> @fromDiffMemConsAConvdtoi(double* nocapture readonly %ptr) {
1563 %0 = bitcast double* %ptr to <2 x double>*
1564 %1 = load <2 x double>, <2 x double>* %0, align 8
1565 %2 = fptosi <2 x double> %1 to <2 x i32>
1566 %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 2
1567 %3 = bitcast double* %arrayidx4 to <2 x double>*
1568 %4 = load <2 x double>, <2 x double>* %3, align 8
1569 %5 = fptosi <2 x double> %4 to <2 x i32>
1570 %vecinit9 = shufflevector <2 x i32> %2, <2 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1571 ret <4 x i32> %vecinit9
1572 ; P9BE-LABEL: fromDiffMemConsAConvdtoi
1573 ; P9LE-LABEL: fromDiffMemConsAConvdtoi
1574 ; P8BE-LABEL: fromDiffMemConsAConvdtoi
1575 ; P8LE-LABEL: fromDiffMemConsAConvdtoi
1576 ; P9BE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
1577 ; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
1578 ; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
1579 ; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
1580 ; P9BE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]]
1581 ; P9BE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]]
1582 ; P9BE: vmrgew v2, [[REG6]], [[REG5]]
1583 ; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
1584 ; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
1585 ; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]]
1586 ; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]]
1587 ; P9LE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]]
1588 ; P9LE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]]
1589 ; P9LE: vmrgew v2, [[REG6]], [[REG5]]
1590 ; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
1591 ; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
1592 ; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
1593 ; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
1594 ; P8BE-DAG: xvcvdpsxws [[REG5:[vs0-9]+]], [[REG3]]
1595 ; P8BE-DAG: xvcvdpsxws [[REG6:[vs0-9]+]], [[REG4]]
1596 ; P8BE: vmrgew v2, [[REG6]], [[REG5]]
1597 ; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
1598 ; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
1599 ; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]]
1600 ; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]]
1601 ; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]]
1602 ; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]]
1603 ; P8LE-DAG: xvcvdpsxws [[REG7:[vs0-9]+]], [[REG5]]
1604 ; P8LE-DAG: xvcvdpsxws [[REG8:[vs0-9]+]], [[REG6]]
1605 ; P8LE: vmrgew v2, [[REG8]], [[REG7]]
1608 ; Function Attrs: norecurse nounwind readonly
1609 define <4 x i32> @fromDiffMemConsDConvdtoi(double* nocapture readonly %ptr) {
1611 %arrayidx = getelementptr inbounds double, double* %ptr, i64 3
1612 %0 = load double, double* %arrayidx, align 8
1613 %conv = fptosi double %0 to i32
1614 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
1615 %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 2
1616 %1 = load double, double* %arrayidx1, align 8
1617 %conv2 = fptosi double %1 to i32
1618 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
1619 %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 1
1620 %2 = load double, double* %arrayidx4, align 8
1621 %conv5 = fptosi double %2 to i32
1622 %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
1623 %3 = load double, double* %ptr, align 8
1624 %conv8 = fptosi double %3 to i32
1625 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
1626 ret <4 x i32> %vecinit9
1627 ; P9BE-LABEL: fromDiffMemConsDConvdtoi
1628 ; P9LE-LABEL: fromDiffMemConsDConvdtoi
1629 ; P8BE-LABEL: fromDiffMemConsDConvdtoi
1630 ; P8LE-LABEL: fromDiffMemConsDConvdtoi
1669 ; Function Attrs: norecurse nounwind readonly
1670 define <4 x i32> @fromDiffMemVarAConvdtoi(double* nocapture readonly %arr, i32 signext %elem) {
1672 %idxprom = sext i32 %elem to i64
1673 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
1674 %0 = load double, double* %arrayidx, align 8
1675 %conv = fptosi double %0 to i32
1676 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
1677 %add = add nsw i32 %elem, 1
1678 %idxprom1 = sext i32 %add to i64
1679 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1
1680 %1 = load double, double* %arrayidx2, align 8
1681 %conv3 = fptosi double %1 to i32
1682 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1
1683 %add5 = add nsw i32 %elem, 2
1684 %idxprom6 = sext i32 %add5 to i64
1685 %arrayidx7 = getelementptr inbounds double, double* %arr, i64 %idxprom6
1686 %2 = load double, double* %arrayidx7, align 8
1687 %conv8 = fptosi double %2 to i32
1688 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2
1689 %add10 = add nsw i32 %elem, 3
1690 %idxprom11 = sext i32 %add10 to i64
1691 %arrayidx12 = getelementptr inbounds double, double* %arr, i64 %idxprom11
1692 %3 = load double, double* %arrayidx12, align 8
1693 %conv13 = fptosi double %3 to i32
1694 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
1695 ret <4 x i32> %vecinit14
1696 ; P9BE-LABEL: fromDiffMemVarAConvdtoi
1697 ; P9LE-LABEL: fromDiffMemVarAConvdtoi
1698 ; P8BE-LABEL: fromDiffMemVarAConvdtoi
1699 ; P8LE-LABEL: fromDiffMemVarAConvdtoi
1738 ; Function Attrs: norecurse nounwind readonly
1739 define <4 x i32> @fromDiffMemVarDConvdtoi(double* nocapture readonly %arr, i32 signext %elem) {
1741 %idxprom = sext i32 %elem to i64
1742 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
1743 %0 = load double, double* %arrayidx, align 8
1744 %conv = fptosi double %0 to i32
1745 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
1746 %sub = add nsw i32 %elem, -1
1747 %idxprom1 = sext i32 %sub to i64
1748 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1
1749 %1 = load double, double* %arrayidx2, align 8
1750 %conv3 = fptosi double %1 to i32
1751 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1
1752 %sub5 = add nsw i32 %elem, -2
1753 %idxprom6 = sext i32 %sub5 to i64
1754 %arrayidx7 = getelementptr inbounds double, double* %arr, i64 %idxprom6
1755 %2 = load double, double* %arrayidx7, align 8
1756 %conv8 = fptosi double %2 to i32
1757 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2
1758 %sub10 = add nsw i32 %elem, -3
1759 %idxprom11 = sext i32 %sub10 to i64
1760 %arrayidx12 = getelementptr inbounds double, double* %arr, i64 %idxprom11
1761 %3 = load double, double* %arrayidx12, align 8
1762 %conv13 = fptosi double %3 to i32
1763 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
1764 ret <4 x i32> %vecinit14
1765 ; P9BE-LABEL: fromDiffMemVarDConvdtoi
1766 ; P9LE-LABEL: fromDiffMemVarDConvdtoi
1767 ; P8BE-LABEL: fromDiffMemVarDConvdtoi
1768 ; P8LE-LABEL: fromDiffMemVarDConvdtoi
1807 ; Function Attrs: norecurse nounwind readnone
1808 define <4 x i32> @spltRegValConvdtoi(double %val) {
1810 %conv = fptosi double %val to i32
1811 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
1812 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
1813 ret <4 x i32> %splat.splat
1814 ; P9BE-LABEL: spltRegValConvdtoi
1815 ; P9LE-LABEL: spltRegValConvdtoi
1816 ; P8BE-LABEL: spltRegValConvdtoi
1817 ; P8LE-LABEL: spltRegValConvdtoi
1832 ; Function Attrs: norecurse nounwind readonly
1833 define <4 x i32> @spltMemValConvdtoi(double* nocapture readonly %ptr) {
1835 %0 = load double, double* %ptr, align 8
1836 %conv = fptosi double %0 to i32
1837 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
1838 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
1839 ret <4 x i32> %splat.splat
1840 ; P9BE-LABEL: spltMemValConvdtoi
1841 ; P9LE-LABEL: spltMemValConvdtoi
1842 ; P8BE-LABEL: spltMemValConvdtoi
1843 ; P8LE-LABEL: spltMemValConvdtoi
1861 ; Function Attrs: norecurse nounwind readnone
1862 define <4 x i32> @allZeroui() {
1864 ret <4 x i32> zeroinitializer
1865 ; P9BE-LABEL: allZeroui
1866 ; P9LE-LABEL: allZeroui
1867 ; P8BE-LABEL: allZeroui
1868 ; P8LE-LABEL: allZeroui
1869 ; P9BE: xxlxor v2, v2, v2
1871 ; P9LE: xxlxor v2, v2, v2
1873 ; P8BE: xxlxor v2, v2, v2
1875 ; P8LE: xxlxor v2, v2, v2
1879 ; Function Attrs: norecurse nounwind readnone
1880 define <4 x i32> @allOneui() {
1882 ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
1883 ; P9BE-LABEL: allOneui
1884 ; P9LE-LABEL: allOneui
1885 ; P8BE-LABEL: allOneui
1886 ; P8LE-LABEL: allOneui
1887 ; P9BE: xxspltib v2, 255
1889 ; P9LE: xxspltib v2, 255
1891 ; P8BE: vspltisb v2, -1
1893 ; P8LE: vspltisb v2, -1
1897 ; Function Attrs: norecurse nounwind readnone
1898 define <4 x i32> @spltConst1ui() {
1900 ret <4 x i32> <i32 1, i32 1, i32 1, i32 1>
1901 ; P9BE-LABEL: spltConst1ui
1902 ; P9LE-LABEL: spltConst1ui
1903 ; P8BE-LABEL: spltConst1ui
1904 ; P8LE-LABEL: spltConst1ui
1905 ; P9BE: vspltisw v2, 1
1907 ; P9LE: vspltisw v2, 1
1909 ; P8BE: vspltisw v2, 1
1911 ; P8LE: vspltisw v2, 1
1915 ; Function Attrs: norecurse nounwind readnone
1916 define <4 x i32> @spltConst16kui() {
1918 ret <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
1919 ; P9BE-LABEL: spltConst16kui
1920 ; P9LE-LABEL: spltConst16kui
1921 ; P8BE-LABEL: spltConst16kui
1922 ; P8LE-LABEL: spltConst16kui
1923 ; P9BE: vspltisw v2, -15
1924 ; P9BE: vsrw v2, v2, v2
1926 ; P9LE: vspltisw v2, -15
1927 ; P9LE: vsrw v2, v2, v2
1929 ; P8BE: vspltisw v2, -15
1930 ; P8BE: vsrw v2, v2, v2
1932 ; P8LE: vspltisw v2, -15
1933 ; P8LE: vsrw v2, v2, v2
1937 ; Function Attrs: norecurse nounwind readnone
1938 define <4 x i32> @spltConst32kui() {
1940 ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
1941 ; P9BE-LABEL: spltConst32kui
1942 ; P9LE-LABEL: spltConst32kui
1943 ; P8BE-LABEL: spltConst32kui
1944 ; P8LE-LABEL: spltConst32kui
1945 ; P9BE: vspltisw v2, -16
1946 ; P9BE: vsrw v2, v2, v2
1948 ; P9LE: vspltisw v2, -16
1949 ; P9LE: vsrw v2, v2, v2
1951 ; P8BE: vspltisw v2, -16
1952 ; P8BE: vsrw v2, v2, v2
1954 ; P8LE: vspltisw v2, -16
1955 ; P8LE: vsrw v2, v2, v2
1959 ; Function Attrs: norecurse nounwind readnone
1960 define <4 x i32> @fromRegsui(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c, i32 zeroext %d) {
1962 %vecinit = insertelement <4 x i32> undef, i32 %a, i32 0
1963 %vecinit1 = insertelement <4 x i32> %vecinit, i32 %b, i32 1
1964 %vecinit2 = insertelement <4 x i32> %vecinit1, i32 %c, i32 2
1965 %vecinit3 = insertelement <4 x i32> %vecinit2, i32 %d, i32 3
1966 ret <4 x i32> %vecinit3
1967 ; P9BE-LABEL: fromRegsui
1968 ; P9LE-LABEL: fromRegsui
1969 ; P8BE-LABEL: fromRegsui
1970 ; P8LE-LABEL: fromRegsui
1971 ; P9BE-DAG: rldimi r6, r5, 32, 0
1972 ; P9BE-DAG: rldimi r4, r3, 32, 0
1973 ; P9BE: mtvsrdd v2, r4, r6
1975 ; P9LE-DAG: rldimi r3, r4, 32, 0
1976 ; P9LE-DAG: rldimi r5, r6, 32, 0
1977 ; P9LE: mtvsrdd v2, r5, r3
1979 ; P8BE-DAG: rldimi r6, r5, 32, 0
1980 ; P8BE-DAG: rldimi r4, r3, 32, 0
1981 ; P8BE-DAG: mtvsrd f[[REG1:[0-9]+]], r6
1982 ; P8BE-DAG: mtvsrd f[[REG2:[0-9]+]], r4
1983 ; P8BE-DAG: xxmrghd v2, vs[[REG2]], vs[[REG1]]
1985 ; P8LE-DAG: rldimi r3, r4, 32, 0
1986 ; P8LE-DAG: rldimi r5, r6, 32, 0
1987 ; P8LE-DAG: mtvsrd f[[REG1:[0-9]+]], r3
1988 ; P8LE-DAG: mtvsrd f[[REG2:[0-9]+]], r5
1989 ; P8LE-DAG: xxmrghd v2, vs[[REG2]], vs[[REG1]]
1993 ; Function Attrs: norecurse nounwind readnone
1994 define <4 x i32> @fromDiffConstsui() {
1996 ret <4 x i32> <i32 242, i32 -113, i32 889, i32 19>
1997 ; P9BE-LABEL: fromDiffConstsui
1998 ; P9LE-LABEL: fromDiffConstsui
1999 ; P8BE-LABEL: fromDiffConstsui
2000 ; P8LE-LABEL: fromDiffConstsui
2012 ; Function Attrs: norecurse nounwind readonly
2013 define <4 x i32> @fromDiffMemConsAui(i32* nocapture readonly %arr) {
2015 %0 = load i32, i32* %arr, align 4
2016 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
2017 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 1
2018 %1 = load i32, i32* %arrayidx1, align 4
2019 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
2020 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 2
2021 %2 = load i32, i32* %arrayidx3, align 4
2022 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2
2023 %arrayidx5 = getelementptr inbounds i32, i32* %arr, i64 3
2024 %3 = load i32, i32* %arrayidx5, align 4
2025 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
2026 ret <4 x i32> %vecinit6
2027 ; P9BE-LABEL: fromDiffMemConsAui
2028 ; P9LE-LABEL: fromDiffMemConsAui
2029 ; P8BE-LABEL: fromDiffMemConsAui
2030 ; P8LE-LABEL: fromDiffMemConsAui
2042 ; Function Attrs: norecurse nounwind readonly
2043 define <4 x i32> @fromDiffMemConsDui(i32* nocapture readonly %arr) {
2045 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3
2046 %0 = load i32, i32* %arrayidx, align 4
2047 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
2048 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 2
2049 %1 = load i32, i32* %arrayidx1, align 4
2050 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
2051 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 1
2052 %2 = load i32, i32* %arrayidx3, align 4
2053 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2
2054 %3 = load i32, i32* %arr, align 4
2055 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
2056 ret <4 x i32> %vecinit6
2057 ; P9BE-LABEL: fromDiffMemConsDui
2058 ; P9LE-LABEL: fromDiffMemConsDui
2059 ; P8BE-LABEL: fromDiffMemConsDui
2060 ; P8LE-LABEL: fromDiffMemConsDui
2081 ; Function Attrs: norecurse nounwind readonly
2082 define <4 x i32> @fromDiffMemVarAui(i32* nocapture readonly %arr, i32 signext %elem) {
2084 %idxprom = sext i32 %elem to i64
2085 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom
2086 %0 = load i32, i32* %arrayidx, align 4
2087 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
2088 %add = add nsw i32 %elem, 1
2089 %idxprom1 = sext i32 %add to i64
2090 %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1
2091 %1 = load i32, i32* %arrayidx2, align 4
2092 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
2093 %add4 = add nsw i32 %elem, 2
2094 %idxprom5 = sext i32 %add4 to i64
2095 %arrayidx6 = getelementptr inbounds i32, i32* %arr, i64 %idxprom5
2096 %2 = load i32, i32* %arrayidx6, align 4
2097 %vecinit7 = insertelement <4 x i32> %vecinit3, i32 %2, i32 2
2098 %add8 = add nsw i32 %elem, 3
2099 %idxprom9 = sext i32 %add8 to i64
2100 %arrayidx10 = getelementptr inbounds i32, i32* %arr, i64 %idxprom9
2101 %3 = load i32, i32* %arrayidx10, align 4
2102 %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3
2103 ret <4 x i32> %vecinit11
2104 ; P9BE-LABEL: fromDiffMemVarAui
2105 ; P9LE-LABEL: fromDiffMemVarAui
2106 ; P8BE-LABEL: fromDiffMemVarAui
2107 ; P8LE-LABEL: fromDiffMemVarAui
2108 ; P9BE: sldi r4, r4, 2
2109 ; P9BE: lxvx v2, r3, r4
2111 ; P9LE: sldi r4, r4, 2
2112 ; P9LE: lxvx v2, r3, r4
2114 ; P8BE: sldi r4, r4, 2
2115 ; P8BE: lxvw4x {{[vs0-9]+}}, r3, r4
2117 ; P8LE: sldi r4, r4, 2
2118 ; P8LE: lxvd2x {{[vs0-9]+}}, r3, r4
2123 ; Function Attrs: norecurse nounwind readonly
2124 define <4 x i32> @fromDiffMemVarDui(i32* nocapture readonly %arr, i32 signext %elem) {
2126 %idxprom = sext i32 %elem to i64
2127 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom
2128 %0 = load i32, i32* %arrayidx, align 4
2129 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
2130 %sub = add nsw i32 %elem, -1
2131 %idxprom1 = sext i32 %sub to i64
2132 %arrayidx2 = getelementptr inbounds i32, i32* %arr, i64 %idxprom1
2133 %1 = load i32, i32* %arrayidx2, align 4
2134 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
2135 %sub4 = add nsw i32 %elem, -2
2136 %idxprom5 = sext i32 %sub4 to i64
2137 %arrayidx6 = getelementptr inbounds i32, i32* %arr, i64 %idxprom5
2138 %2 = load i32, i32* %arrayidx6, align 4
2139 %vecinit7 = insertelement <4 x i32> %vecinit3, i32 %2, i32 2
2140 %sub8 = add nsw i32 %elem, -3
2141 %idxprom9 = sext i32 %sub8 to i64
2142 %arrayidx10 = getelementptr inbounds i32, i32* %arr, i64 %idxprom9
2143 %3 = load i32, i32* %arrayidx10, align 4
2144 %vecinit11 = insertelement <4 x i32> %vecinit7, i32 %3, i32 3
2145 ret <4 x i32> %vecinit11
2146 ; P9BE-LABEL: fromDiffMemVarDui
2147 ; P9LE-LABEL: fromDiffMemVarDui
2148 ; P8BE-LABEL: fromDiffMemVarDui
2149 ; P8LE-LABEL: fromDiffMemVarDui
2150 ; P9BE-DAG: sldi {{r[0-9]+}}, r4, 2
2151 ; P9BE-DAG: addi r3, r3, -12
2152 ; P9BE-DAG: lxvx {{v[0-9]+}}, 0, r3
2156 ; P9LE-DAG: sldi {{r[0-9]+}}, r4, 2
2157 ; P9LE-DAG: addi r3, r3, -12
2158 ; P9LE-DAG: lxvx {{v[0-9]+}}, 0, r3
2162 ; P8BE-DAG: sldi {{r[0-9]+}}, r4, 2
2163 ; P8BE-DAG: lxvw4x {{v[0-9]+}}, 0, r3
2167 ; P8LE-DAG: sldi {{r[0-9]+}}, r4, 2
2174 ; Function Attrs: norecurse nounwind readonly
2175 define <4 x i32> @fromRandMemConsui(i32* nocapture readonly %arr) {
2177 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 4
2178 %0 = load i32, i32* %arrayidx, align 4
2179 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
2180 %arrayidx1 = getelementptr inbounds i32, i32* %arr, i64 18
2181 %1 = load i32, i32* %arrayidx1, align 4
2182 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
2183 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 2
2184 %2 = load i32, i32* %arrayidx3, align 4
2185 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %2, i32 2
2186 %arrayidx5 = getelementptr inbounds i32, i32* %arr, i64 88
2187 %3 = load i32, i32* %arrayidx5, align 4
2188 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %3, i32 3
2189 ret <4 x i32> %vecinit6
2190 ; P9BE-LABEL: fromRandMemConsui
2191 ; P9LE-LABEL: fromRandMemConsui
2192 ; P8BE-LABEL: fromRandMemConsui
2193 ; P8LE-LABEL: fromRandMemConsui
2228 ; Function Attrs: norecurse nounwind readonly
2229 define <4 x i32> @fromRandMemVarui(i32* nocapture readonly %arr, i32 signext %elem) {
2231 %add = add nsw i32 %elem, 4
2232 %idxprom = sext i32 %add to i64
2233 %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %idxprom
2234 %0 = load i32, i32* %arrayidx, align 4
2235 %vecinit = insertelement <4 x i32> undef, i32 %0, i32 0
2236 %add1 = add nsw i32 %elem, 1
2237 %idxprom2 = sext i32 %add1 to i64
2238 %arrayidx3 = getelementptr inbounds i32, i32* %arr, i64 %idxprom2
2239 %1 = load i32, i32* %arrayidx3, align 4
2240 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %1, i32 1
2241 %add5 = add nsw i32 %elem, 2
2242 %idxprom6 = sext i32 %add5 to i64
2243 %arrayidx7 = getelementptr inbounds i32, i32* %arr, i64 %idxprom6
2244 %2 = load i32, i32* %arrayidx7, align 4
2245 %vecinit8 = insertelement <4 x i32> %vecinit4, i32 %2, i32 2
2246 %add9 = add nsw i32 %elem, 8
2247 %idxprom10 = sext i32 %add9 to i64
2248 %arrayidx11 = getelementptr inbounds i32, i32* %arr, i64 %idxprom10
2249 %3 = load i32, i32* %arrayidx11, align 4
2250 %vecinit12 = insertelement <4 x i32> %vecinit8, i32 %3, i32 3
2251 ret <4 x i32> %vecinit12
2252 ; P9BE-LABEL: fromRandMemVarui
2253 ; P9LE-LABEL: fromRandMemVarui
2254 ; P8BE-LABEL: fromRandMemVarui
2255 ; P8LE-LABEL: fromRandMemVarui
2256 ; P9BE: sldi r4, r4, 2
2264 ; P9LE: sldi r4, r4, 2
2272 ; P8BE: sldi r4, r4, 2
2282 ; P8LE: sldi r4, r4, 2
2294 ; Function Attrs: norecurse nounwind readnone
2295 define <4 x i32> @spltRegValui(i32 zeroext %val) {
2297 %splat.splatinsert = insertelement <4 x i32> undef, i32 %val, i32 0
2298 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
2299 ret <4 x i32> %splat.splat
2300 ; P9BE-LABEL: spltRegValui
2301 ; P9LE-LABEL: spltRegValui
2302 ; P8BE-LABEL: spltRegValui
2303 ; P8LE-LABEL: spltRegValui
2304 ; P9BE: mtvsrws v2, r3
2306 ; P9LE: mtvsrws v2, r3
2308 ; P8BE: mtvsrwz {{[vsf0-9]+}}, r3
2309 ; P8BE: xxspltw v2, {{[vsf0-9]+}}, 1
2311 ; P8LE: mtvsrwz {{[vsf0-9]+}}, r3
2312 ; P8LE: xxspltw v2, {{[vsf0-9]+}}, 1
2316 ; Function Attrs: norecurse nounwind readonly
2317 define <4 x i32> @spltMemValui(i32* nocapture readonly %ptr) {
2319 %0 = load i32, i32* %ptr, align 4
2320 %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0
2321 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
2322 ret <4 x i32> %splat.splat
2323 ; P9BE-LABEL: spltMemValui
2324 ; P9LE-LABEL: spltMemValui
2325 ; P8BE-LABEL: spltMemValui
2326 ; P8LE-LABEL: spltMemValui
2327 ; P9BE: lfiwzx f0, 0, r3
2328 ; P9BE: xxsldwi vs0, f0, f0, 1
2329 ; P9BE: xxspltw v2, vs0, 0
2331 ; P9LE: lfiwzx f0, 0, r3
2332 ; P9LE: xxpermdi vs0, f0, f0, 2
2333 ; P9LE: xxspltw v2, vs0, 3
2335 ; P8BE: lfiwzx f0, 0, r3
2336 ; P8BE: xxsldwi vs0, f0, f0, 1
2337 ; P8BE: xxspltw v2, vs0, 0
2339 ; P8LE: lfiwzx f0, 0, r3
2340 ; P8LE: xxpermdi vs0, f0, f0, 2
2341 ; P8LE: xxspltw v2, vs0, 3
2345 ; Function Attrs: norecurse nounwind readnone
2346 define <4 x i32> @spltCnstConvftoui() {
2348 ret <4 x i32> <i32 4, i32 4, i32 4, i32 4>
2349 ; P9BE-LABEL: spltCnstConvftoui
2350 ; P9LE-LABEL: spltCnstConvftoui
2351 ; P8BE-LABEL: spltCnstConvftoui
2352 ; P8LE-LABEL: spltCnstConvftoui
2353 ; P9BE: vspltisw v2, 4
2355 ; P9LE: vspltisw v2, 4
2357 ; P8BE: vspltisw v2, 4
2359 ; P8LE: vspltisw v2, 4
2363 ; Function Attrs: norecurse nounwind readnone
2364 define <4 x i32> @fromRegsConvftoui(float %a, float %b, float %c, float %d) {
2366 %conv = fptoui float %a to i32
2367 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
2368 %conv1 = fptoui float %b to i32
2369 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %conv1, i32 1
2370 %conv3 = fptoui float %c to i32
2371 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %conv3, i32 2
2372 %conv5 = fptoui float %d to i32
2373 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3
2374 ret <4 x i32> %vecinit6
2375 ; P9BE-LABEL: fromRegsConvftoui
2376 ; P9LE-LABEL: fromRegsConvftoui
2377 ; P8BE-LABEL: fromRegsConvftoui
2378 ; P8LE-LABEL: fromRegsConvftoui
2379 ; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
2380 ; P9BE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
2381 ; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
2382 ; P9BE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
2383 ; P9BE: vmrgew v2, [[REG3]], [[REG4]]
2384 ; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
2385 ; P9LE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
2386 ; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
2387 ; P9LE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
2388 ; P9LE: vmrgew v2, [[REG4]], [[REG3]]
2389 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
2390 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
2391 ; P8BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
2392 ; P8BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
2393 ; P8BE: vmrgew v2, [[REG3]], [[REG4]]
2394 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
2395 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
2396 ; P8LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
2397 ; P8LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
2398 ; P8LE: vmrgew v2, [[REG4]], [[REG3]]
2401 ; Function Attrs: norecurse nounwind readnone
2402 define <4 x i32> @fromDiffConstsConvftoui() {
2404 ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
2405 ; P9BE-LABEL: fromDiffConstsConvftoui
2406 ; P9LE-LABEL: fromDiffConstsConvftoui
2407 ; P8BE-LABEL: fromDiffConstsConvftoui
2408 ; P8LE-LABEL: fromDiffConstsConvftoui
2420 ; Function Attrs: norecurse nounwind readonly
2421 define <4 x i32> @fromDiffMemConsAConvftoui(float* nocapture readonly %ptr) {
2423 %0 = bitcast float* %ptr to <4 x float>*
2424 %1 = load <4 x float>, <4 x float>* %0, align 4
2425 %2 = fptoui <4 x float> %1 to <4 x i32>
2427 ; P9BE-LABEL: fromDiffMemConsAConvftoui
2428 ; P9LE-LABEL: fromDiffMemConsAConvftoui
2429 ; P8BE-LABEL: fromDiffMemConsAConvftoui
2430 ; P8LE-LABEL: fromDiffMemConsAConvftoui
2431 ; P9BE: lxv [[REG1:[vs0-9]+]], 0(r3)
2432 ; P9BE: xvcvspuxws v2, [[REG1]]
2434 ; P9LE: lxv [[REG1:[vs0-9]+]], 0(r3)
2435 ; P9LE: xvcvspuxws v2, [[REG1]]
2437 ; P8BE: lxvw4x [[REG1:[vs0-9]+]], 0, r3
2438 ; P8BE: xvcvspuxws v2, [[REG1]]
2440 ; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
2441 ; P8LE: xxswapd v2, [[REG1]]
2442 ; P8LE: xvcvspuxws v2, v2
2446 ; Function Attrs: norecurse nounwind readonly
2447 define <4 x i32> @fromDiffMemConsDConvftoui(float* nocapture readonly %ptr) {
2449 %arrayidx = getelementptr inbounds float, float* %ptr, i64 3
2450 %0 = load float, float* %arrayidx, align 4
2451 %conv = fptoui float %0 to i32
2452 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
2453 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 2
2454 %1 = load float, float* %arrayidx1, align 4
2455 %conv2 = fptoui float %1 to i32
2456 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
2457 %arrayidx4 = getelementptr inbounds float, float* %ptr, i64 1
2458 %2 = load float, float* %arrayidx4, align 4
2459 %conv5 = fptoui float %2 to i32
2460 %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
2461 %3 = load float, float* %ptr, align 4
2462 %conv8 = fptoui float %3 to i32
2463 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
2464 ret <4 x i32> %vecinit9
2465 ; P9BE-LABEL: fromDiffMemConsDConvftoui
2466 ; P9LE-LABEL: fromDiffMemConsDConvftoui
2467 ; P8BE-LABEL: fromDiffMemConsDConvftoui
2468 ; P8LE-LABEL: fromDiffMemConsDConvftoui
2492 ; Function Attrs: norecurse nounwind readonly
2493 define <4 x i32> @fromDiffMemVarAConvftoui(float* nocapture readonly %arr, i32 signext %elem) {
2495 %idxprom = sext i32 %elem to i64
2496 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
2497 %0 = load float, float* %arrayidx, align 4
2498 %conv = fptoui float %0 to i32
2499 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
2500 %add = add nsw i32 %elem, 1
2501 %idxprom1 = sext i32 %add to i64
2502 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1
2503 %1 = load float, float* %arrayidx2, align 4
2504 %conv3 = fptoui float %1 to i32
2505 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1
2506 %add5 = add nsw i32 %elem, 2
2507 %idxprom6 = sext i32 %add5 to i64
2508 %arrayidx7 = getelementptr inbounds float, float* %arr, i64 %idxprom6
2509 %2 = load float, float* %arrayidx7, align 4
2510 %conv8 = fptoui float %2 to i32
2511 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2
2512 %add10 = add nsw i32 %elem, 3
2513 %idxprom11 = sext i32 %add10 to i64
2514 %arrayidx12 = getelementptr inbounds float, float* %arr, i64 %idxprom11
2515 %3 = load float, float* %arrayidx12, align 4
2516 %conv13 = fptoui float %3 to i32
2517 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
2518 ret <4 x i32> %vecinit14
2519 ; P9BE-LABEL: fromDiffMemVarAConvftoui
2520 ; P9LE-LABEL: fromDiffMemVarAConvftoui
2521 ; P8BE-LABEL: fromDiffMemVarAConvftoui
2522 ; P8LE-LABEL: fromDiffMemVarAConvftoui
2523 ; FIXME: implement finding consecutive loads with pre-inc
2530 ; Function Attrs: norecurse nounwind readonly
2531 define <4 x i32> @fromDiffMemVarDConvftoui(float* nocapture readonly %arr, i32 signext %elem) {
2533 %idxprom = sext i32 %elem to i64
2534 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
2535 %0 = load float, float* %arrayidx, align 4
2536 %conv = fptoui float %0 to i32
2537 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
2538 %sub = add nsw i32 %elem, -1
2539 %idxprom1 = sext i32 %sub to i64
2540 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1
2541 %1 = load float, float* %arrayidx2, align 4
2542 %conv3 = fptoui float %1 to i32
2543 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1
2544 %sub5 = add nsw i32 %elem, -2
2545 %idxprom6 = sext i32 %sub5 to i64
2546 %arrayidx7 = getelementptr inbounds float, float* %arr, i64 %idxprom6
2547 %2 = load float, float* %arrayidx7, align 4
2548 %conv8 = fptoui float %2 to i32
2549 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2
2550 %sub10 = add nsw i32 %elem, -3
2551 %idxprom11 = sext i32 %sub10 to i64
2552 %arrayidx12 = getelementptr inbounds float, float* %arr, i64 %idxprom11
2553 %3 = load float, float* %arrayidx12, align 4
2554 %conv13 = fptoui float %3 to i32
2555 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
2556 ret <4 x i32> %vecinit14
2557 ; P9BE-LABEL: fromDiffMemVarDConvftoui
2558 ; P9LE-LABEL: fromDiffMemVarDConvftoui
2559 ; P8BE-LABEL: fromDiffMemVarDConvftoui
2560 ; P8LE-LABEL: fromDiffMemVarDConvftoui
2561 ; FIXME: implement finding consecutive loads with pre-inc
2568 ; Function Attrs: norecurse nounwind readnone
2569 define <4 x i32> @spltRegValConvftoui(float %val) {
2571 %conv = fptoui float %val to i32
2572 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
2573 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
2574 ret <4 x i32> %splat.splat
2575 ; P9BE-LABEL: spltRegValConvftoui
2576 ; P9LE-LABEL: spltRegValConvftoui
2577 ; P8BE-LABEL: spltRegValConvftoui
2578 ; P8LE-LABEL: spltRegValConvftoui
2579 ; P9BE: xscvdpuxws f[[REG1:[0-9]+]], f1
2580 ; P9BE: xxspltw v2, vs[[REG1]], 1
2582 ; P9LE: xscvdpuxws f[[REG1:[0-9]+]], f1
2583 ; P9LE: xxspltw v2, vs[[REG1]], 1
2585 ; P8BE: xscvdpuxws f[[REG1:[0-9]+]], f1
2586 ; P8BE: xxspltw v2, vs[[REG1]], 1
2588 ; P8LE: xscvdpuxws f[[REG1:[0-9]+]], f1
2589 ; P8LE: xxspltw v2, vs[[REG1]], 1
2593 ; Function Attrs: norecurse nounwind readonly
2594 define <4 x i32> @spltMemValConvftoui(float* nocapture readonly %ptr) {
2596 %0 = load float, float* %ptr, align 4
2597 %conv = fptoui float %0 to i32
2598 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
2599 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
2600 ret <4 x i32> %splat.splat
2601 ; P9BE-LABEL: spltMemValConvftoui
2602 ; P9LE-LABEL: spltMemValConvftoui
2603 ; P8BE-LABEL: spltMemValConvftoui
2604 ; P8LE-LABEL: spltMemValConvftoui
2605 ; P9BE: lxvwsx [[REG1:[vs0-9]+]], 0, r3
2606 ; P9BE: xvcvspuxws v2, [[REG1]]
2607 ; P9LE: [[REG1:[vs0-9]+]], 0, r3
2608 ; P9LE: xvcvspuxws v2, [[REG1]]
2609 ; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3
2610 ; P8BE: xscvdpuxws f[[REG2:[0-9]+]], [[REG1]]
2611 ; P8BE: xxspltw v2, vs[[REG2]], 1
2612 ; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3
2613 ; P8LE: xscvdpuxws f[[REG2:[vs0-9]+]], [[REG1]]
2614 ; P8LE: xxspltw v2, vs[[REG2]], 1
2617 ; Function Attrs: norecurse nounwind readnone
2618 define <4 x i32> @spltCnstConvdtoui() {
2620 ret <4 x i32> <i32 4, i32 4, i32 4, i32 4>
2621 ; P9BE-LABEL: spltCnstConvdtoui
2622 ; P9LE-LABEL: spltCnstConvdtoui
2623 ; P8BE-LABEL: spltCnstConvdtoui
2624 ; P8LE-LABEL: spltCnstConvdtoui
2625 ; P9BE: vspltisw v2, 4
2627 ; P9LE: vspltisw v2, 4
2629 ; P8BE: vspltisw v2, 4
2631 ; P8LE: vspltisw v2, 4
2635 ; Function Attrs: norecurse nounwind readnone
2636 define <4 x i32> @fromRegsConvdtoui(double %a, double %b, double %c, double %d) {
2638 %conv = fptoui double %a to i32
2639 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
2640 %conv1 = fptoui double %b to i32
2641 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %conv1, i32 1
2642 %conv3 = fptoui double %c to i32
2643 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %conv3, i32 2
2644 %conv5 = fptoui double %d to i32
2645 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %conv5, i32 3
2646 ret <4 x i32> %vecinit6
2647 ; P9BE-LABEL: fromRegsConvdtoui
2648 ; P9LE-LABEL: fromRegsConvdtoui
2649 ; P8BE-LABEL: fromRegsConvdtoui
2650 ; P8LE-LABEL: fromRegsConvdtoui
2651 ; P9BE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
2652 ; P9BE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
2653 ; P9BE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
2654 ; P9BE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
2655 ; P9BE: vmrgew v2, [[REG3]], [[REG4]]
2656 ; P9LE: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
2657 ; P9LE: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
2658 ; P9LE: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
2659 ; P9LE: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
2660 ; P9LE: vmrgew v2, [[REG4]], [[REG3]]
2661 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs1, vs3
2662 ; P8BE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs2, vs4
2663 ; P8BE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
2664 ; P8BE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
2665 ; P8BE: vmrgew v2, [[REG3]], [[REG4]]
2666 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG1:[0-9]+]], vs3, vs1
2667 ; P8LE-DAG: xxmrghd {{[vs]+}}[[REG2:[0-9]+]], vs4, vs2
2668 ; P8LE-DAG: xvcvdpuxws [[REG3:v[0-9]+]], {{[vs]+}}[[REG1]]
2669 ; P8LE-DAG: xvcvdpuxws [[REG4:v[0-9]+]], {{[vs]+}}[[REG2]]
2670 ; P8LE: vmrgew v2, [[REG4]], [[REG3]]
2673 ; Function Attrs: norecurse nounwind readnone
2674 define <4 x i32> @fromDiffConstsConvdtoui() {
2676 ret <4 x i32> <i32 24, i32 234, i32 988, i32 422>
2677 ; P9BE-LABEL: fromDiffConstsConvdtoui
2678 ; P9LE-LABEL: fromDiffConstsConvdtoui
2679 ; P8BE-LABEL: fromDiffConstsConvdtoui
2680 ; P8LE-LABEL: fromDiffConstsConvdtoui
2692 ; Function Attrs: norecurse nounwind readonly
2693 define <4 x i32> @fromDiffMemConsAConvdtoui(double* nocapture readonly %ptr) {
2695 %0 = bitcast double* %ptr to <2 x double>*
2696 %1 = load <2 x double>, <2 x double>* %0, align 8
2697 %2 = fptoui <2 x double> %1 to <2 x i32>
2698 %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 2
2699 %3 = bitcast double* %arrayidx4 to <2 x double>*
2700 %4 = load <2 x double>, <2 x double>* %3, align 8
2701 %5 = fptoui <2 x double> %4 to <2 x i32>
2702 %vecinit9 = shufflevector <2 x i32> %2, <2 x i32> %5, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2703 ret <4 x i32> %vecinit9
2704 ; P9BE-LABEL: fromDiffMemConsAConvdtoui
2705 ; P9LE-LABEL: fromDiffMemConsAConvdtoui
2706 ; P8BE-LABEL: fromDiffMemConsAConvdtoui
2707 ; P8LE-LABEL: fromDiffMemConsAConvdtoui
2708 ; P9BE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
2709 ; P9BE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
2710 ; P9BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
2711 ; P9BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
2712 ; P9BE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]]
2713 ; P9BE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]]
2714 ; P9BE: vmrgew v2, [[REG6]], [[REG5]]
2715 ; P9LE-DAG: lxv [[REG1:[vs0-9]+]], 0(r3)
2716 ; P9LE-DAG: lxv [[REG2:[vs0-9]+]], 16(r3)
2717 ; P9LE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG2]], [[REG1]]
2718 ; P9LE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG2]], [[REG1]]
2719 ; P9LE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]]
2720 ; P9LE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]]
2721 ; P9LE: vmrgew v2, [[REG6]], [[REG5]]
2722 ; P8BE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
2723 ; P8BE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
2724 ; P8BE-DAG: xxmrgld [[REG3:[vs0-9]+]], [[REG1]], [[REG2]]
2725 ; P8BE-DAG: xxmrghd [[REG4:[vs0-9]+]], [[REG1]], [[REG2]]
2726 ; P8BE-DAG: xvcvdpuxws [[REG5:[vs0-9]+]], [[REG3]]
2727 ; P8BE-DAG: xvcvdpuxws [[REG6:[vs0-9]+]], [[REG4]]
2728 ; P8BE: vmrgew v2, [[REG6]], [[REG5]]
2729 ; P8LE: lxvd2x [[REG1:[vs0-9]+]], 0, r3
2730 ; P8LE: lxvd2x [[REG2:[vs0-9]+]], r3, r4
2731 ; P8LE-DAG: xxswapd [[REG3:[vs0-9]+]], [[REG1]]
2732 ; P8LE-DAG: xxswapd [[REG4:[vs0-9]+]], [[REG2]]
2733 ; P8LE-DAG: xxmrgld [[REG5:[vs0-9]+]], [[REG4]], [[REG3]]
2734 ; P8LE-DAG: xxmrghd [[REG6:[vs0-9]+]], [[REG4]], [[REG3]]
2735 ; P8LE-DAG: xvcvdpuxws [[REG7:[vs0-9]+]], [[REG5]]
2736 ; P8LE-DAG: xvcvdpuxws [[REG8:[vs0-9]+]], [[REG6]]
2737 ; P8LE: vmrgew v2, [[REG8]], [[REG7]]
2740 ; Function Attrs: norecurse nounwind readonly
2741 define <4 x i32> @fromDiffMemConsDConvdtoui(double* nocapture readonly %ptr) {
2743 %arrayidx = getelementptr inbounds double, double* %ptr, i64 3
2744 %0 = load double, double* %arrayidx, align 8
2745 %conv = fptoui double %0 to i32
2746 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
2747 %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 2
2748 %1 = load double, double* %arrayidx1, align 8
2749 %conv2 = fptoui double %1 to i32
2750 %vecinit3 = insertelement <4 x i32> %vecinit, i32 %conv2, i32 1
2751 %arrayidx4 = getelementptr inbounds double, double* %ptr, i64 1
2752 %2 = load double, double* %arrayidx4, align 8
2753 %conv5 = fptoui double %2 to i32
2754 %vecinit6 = insertelement <4 x i32> %vecinit3, i32 %conv5, i32 2
2755 %3 = load double, double* %ptr, align 8
2756 %conv8 = fptoui double %3 to i32
2757 %vecinit9 = insertelement <4 x i32> %vecinit6, i32 %conv8, i32 3
2758 ret <4 x i32> %vecinit9
2759 ; P9BE-LABEL: fromDiffMemConsDConvdtoui
2760 ; P9LE-LABEL: fromDiffMemConsDConvdtoui
2761 ; P8BE-LABEL: fromDiffMemConsDConvdtoui
2762 ; P8LE-LABEL: fromDiffMemConsDConvdtoui
2801 ; Function Attrs: norecurse nounwind readonly
2802 define <4 x i32> @fromDiffMemVarAConvdtoui(double* nocapture readonly %arr, i32 signext %elem) {
2804 %idxprom = sext i32 %elem to i64
2805 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
2806 %0 = load double, double* %arrayidx, align 8
2807 %conv = fptoui double %0 to i32
2808 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
2809 %add = add nsw i32 %elem, 1
2810 %idxprom1 = sext i32 %add to i64
2811 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1
2812 %1 = load double, double* %arrayidx2, align 8
2813 %conv3 = fptoui double %1 to i32
2814 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1
2815 %add5 = add nsw i32 %elem, 2
2816 %idxprom6 = sext i32 %add5 to i64
2817 %arrayidx7 = getelementptr inbounds double, double* %arr, i64 %idxprom6
2818 %2 = load double, double* %arrayidx7, align 8
2819 %conv8 = fptoui double %2 to i32
2820 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2
2821 %add10 = add nsw i32 %elem, 3
2822 %idxprom11 = sext i32 %add10 to i64
2823 %arrayidx12 = getelementptr inbounds double, double* %arr, i64 %idxprom11
2824 %3 = load double, double* %arrayidx12, align 8
2825 %conv13 = fptoui double %3 to i32
2826 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
2827 ret <4 x i32> %vecinit14
2828 ; P9BE-LABEL: fromDiffMemVarAConvdtoui
2829 ; P9LE-LABEL: fromDiffMemVarAConvdtoui
2830 ; P8BE-LABEL: fromDiffMemVarAConvdtoui
2831 ; P8LE-LABEL: fromDiffMemVarAConvdtoui
2870 ; Function Attrs: norecurse nounwind readonly
2871 define <4 x i32> @fromDiffMemVarDConvdtoui(double* nocapture readonly %arr, i32 signext %elem) {
2873 %idxprom = sext i32 %elem to i64
2874 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
2875 %0 = load double, double* %arrayidx, align 8
2876 %conv = fptoui double %0 to i32
2877 %vecinit = insertelement <4 x i32> undef, i32 %conv, i32 0
2878 %sub = add nsw i32 %elem, -1
2879 %idxprom1 = sext i32 %sub to i64
2880 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1
2881 %1 = load double, double* %arrayidx2, align 8
2882 %conv3 = fptoui double %1 to i32
2883 %vecinit4 = insertelement <4 x i32> %vecinit, i32 %conv3, i32 1
2884 %sub5 = add nsw i32 %elem, -2
2885 %idxprom6 = sext i32 %sub5 to i64
2886 %arrayidx7 = getelementptr inbounds double, double* %arr, i64 %idxprom6
2887 %2 = load double, double* %arrayidx7, align 8
2888 %conv8 = fptoui double %2 to i32
2889 %vecinit9 = insertelement <4 x i32> %vecinit4, i32 %conv8, i32 2
2890 %sub10 = add nsw i32 %elem, -3
2891 %idxprom11 = sext i32 %sub10 to i64
2892 %arrayidx12 = getelementptr inbounds double, double* %arr, i64 %idxprom11
2893 %3 = load double, double* %arrayidx12, align 8
2894 %conv13 = fptoui double %3 to i32
2895 %vecinit14 = insertelement <4 x i32> %vecinit9, i32 %conv13, i32 3
2896 ret <4 x i32> %vecinit14
2897 ; P9BE-LABEL: fromDiffMemVarDConvdtoui
2898 ; P9LE-LABEL: fromDiffMemVarDConvdtoui
2899 ; P8BE-LABEL: fromDiffMemVarDConvdtoui
2900 ; P8LE-LABEL: fromDiffMemVarDConvdtoui
2939 ; Function Attrs: norecurse nounwind readnone
2940 define <4 x i32> @spltRegValConvdtoui(double %val) {
2942 %conv = fptoui double %val to i32
2943 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
2944 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
2945 ret <4 x i32> %splat.splat
2946 ; P9BE-LABEL: spltRegValConvdtoui
2947 ; P9LE-LABEL: spltRegValConvdtoui
2948 ; P8BE-LABEL: spltRegValConvdtoui
2949 ; P8LE-LABEL: spltRegValConvdtoui
2964 ; Function Attrs: norecurse nounwind readonly
2965 define <4 x i32> @spltMemValConvdtoui(double* nocapture readonly %ptr) {
2967 %0 = load double, double* %ptr, align 8
2968 %conv = fptoui double %0 to i32
2969 %splat.splatinsert = insertelement <4 x i32> undef, i32 %conv, i32 0
2970 %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
2971 ret <4 x i32> %splat.splat
2972 ; P9BE-LABEL: spltMemValConvdtoui
2973 ; P9LE-LABEL: spltMemValConvdtoui
2974 ; P8BE-LABEL: spltMemValConvdtoui
2975 ; P8LE-LABEL: spltMemValConvdtoui
2993 ; Function Attrs: norecurse nounwind readnone
2994 define <2 x i64> @allZeroll() {
2996 ret <2 x i64> zeroinitializer
2997 ; P9BE-LABEL: allZeroll
2998 ; P9LE-LABEL: allZeroll
2999 ; P8BE-LABEL: allZeroll
3000 ; P8LE-LABEL: allZeroll
3001 ; P9BE: xxlxor v2, v2, v2
3003 ; P9LE: xxlxor v2, v2, v2
3005 ; P8BE: xxlxor v2, v2, v2
3007 ; P8LE: xxlxor v2, v2, v2
3011 ; Function Attrs: norecurse nounwind readnone
3012 define <2 x i64> @allOnell() {
3014 ret <2 x i64> <i64 -1, i64 -1>
3015 ; P9BE-LABEL: allOnell
3016 ; P9LE-LABEL: allOnell
3017 ; P8BE-LABEL: allOnell
3018 ; P8LE-LABEL: allOnell
3019 ; P9BE: xxspltib v2, 255
3021 ; P9LE: xxspltib v2, 255
3023 ; P8BE: vspltisb v2, -1
3025 ; P8LE: vspltisb v2, -1
3029 ; Function Attrs: norecurse nounwind readnone
3030 define <2 x i64> @spltConst1ll() {
3032 ret <2 x i64> <i64 1, i64 1>
3033 ; P9BE-LABEL: spltConst1ll
3034 ; P9LE-LABEL: spltConst1ll
3035 ; P8BE-LABEL: spltConst1ll
3036 ; P8LE-LABEL: spltConst1ll
3047 ; Function Attrs: norecurse nounwind readnone
3048 define <2 x i64> @spltConst16kll() {
3050 ret <2 x i64> <i64 32767, i64 32767>
3051 ; P9BE-LABEL: spltConst16kll
3052 ; P9LE-LABEL: spltConst16kll
3053 ; P8BE-LABEL: spltConst16kll
3054 ; P8LE-LABEL: spltConst16kll
3065 ; Function Attrs: norecurse nounwind readnone
3066 define <2 x i64> @spltConst32kll() {
3068 ret <2 x i64> <i64 65535, i64 65535>
3069 ; P9BE-LABEL: spltConst32kll
3070 ; P9LE-LABEL: spltConst32kll
3071 ; P8BE-LABEL: spltConst32kll
3072 ; P8LE-LABEL: spltConst32kll
3083 ; Function Attrs: norecurse nounwind readnone
3084 define <2 x i64> @fromRegsll(i64 %a, i64 %b) {
3086 %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0
3087 %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1
3088 ret <2 x i64> %vecinit1
3089 ; P9BE-LABEL: fromRegsll
3090 ; P9LE-LABEL: fromRegsll
3091 ; P8BE-LABEL: fromRegsll
3092 ; P8LE-LABEL: fromRegsll
3093 ; P9BE: mtvsrdd v2, r3, r4
3095 ; P9LE: mtvsrdd v2, r4, r3
3097 ; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r3
3098 ; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r4
3101 ; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r3
3102 ; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r4
3107 ; Function Attrs: norecurse nounwind readnone
3108 define <2 x i64> @fromDiffConstsll() {
3110 ret <2 x i64> <i64 242, i64 -113>
3111 ; P9BE-LABEL: fromDiffConstsll
3112 ; P9LE-LABEL: fromDiffConstsll
3113 ; P8BE-LABEL: fromDiffConstsll
3114 ; P8LE-LABEL: fromDiffConstsll
3125 ; Function Attrs: norecurse nounwind readonly
3126 define <2 x i64> @fromDiffMemConsAll(i64* nocapture readonly %arr) {
3128 %0 = load i64, i64* %arr, align 8
3129 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
3130 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 1
3131 %1 = load i64, i64* %arrayidx1, align 8
3132 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
3133 ret <2 x i64> %vecinit2
3134 ; P9BE-LABEL: fromDiffMemConsAll
3135 ; P9LE-LABEL: fromDiffMemConsAll
3136 ; P8BE-LABEL: fromDiffMemConsAll
3137 ; P8LE-LABEL: fromDiffMemConsAll
3149 ; Function Attrs: norecurse nounwind readonly
3150 define <2 x i64> @fromDiffMemConsDll(i64* nocapture readonly %arr) {
3152 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 3
3153 %0 = load i64, i64* %arrayidx, align 8
3154 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
3155 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 2
3156 %1 = load i64, i64* %arrayidx1, align 8
3157 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
3158 ret <2 x i64> %vecinit2
3159 ; P9BE-LABEL: fromDiffMemConsDll
3160 ; P9LE-LABEL: fromDiffMemConsDll
3161 ; P8BE-LABEL: fromDiffMemConsDll
3162 ; P8LE-LABEL: fromDiffMemConsDll
3175 ; Function Attrs: norecurse nounwind readonly
3176 define <2 x i64> @fromDiffMemVarAll(i64* nocapture readonly %arr, i32 signext %elem) {
3178 %idxprom = sext i32 %elem to i64
3179 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom
3180 %0 = load i64, i64* %arrayidx, align 8
3181 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
3182 %add = add nsw i32 %elem, 1
3183 %idxprom1 = sext i32 %add to i64
3184 %arrayidx2 = getelementptr inbounds i64, i64* %arr, i64 %idxprom1
3185 %1 = load i64, i64* %arrayidx2, align 8
3186 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
3187 ret <2 x i64> %vecinit3
3188 ; P9BE-LABEL: fromDiffMemVarAll
3189 ; P9LE-LABEL: fromDiffMemVarAll
3190 ; P8BE-LABEL: fromDiffMemVarAll
3191 ; P8LE-LABEL: fromDiffMemVarAll
3207 ; Function Attrs: norecurse nounwind readonly
3208 define <2 x i64> @fromDiffMemVarDll(i64* nocapture readonly %arr, i32 signext %elem) {
3210 %idxprom = sext i32 %elem to i64
3211 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom
3212 %0 = load i64, i64* %arrayidx, align 8
3213 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
3214 %sub = add nsw i32 %elem, -1
3215 %idxprom1 = sext i32 %sub to i64
3216 %arrayidx2 = getelementptr inbounds i64, i64* %arr, i64 %idxprom1
3217 %1 = load i64, i64* %arrayidx2, align 8
3218 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
3219 ret <2 x i64> %vecinit3
3220 ; P9BE-LABEL: fromDiffMemVarDll
3221 ; P9LE-LABEL: fromDiffMemVarDll
3222 ; P8BE-LABEL: fromDiffMemVarDll
3223 ; P8LE-LABEL: fromDiffMemVarDll
3241 ; Function Attrs: norecurse nounwind readonly
3242 define <2 x i64> @fromRandMemConsll(i64* nocapture readonly %arr) {
3244 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 4
3245 %0 = load i64, i64* %arrayidx, align 8
3246 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
3247 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 18
3248 %1 = load i64, i64* %arrayidx1, align 8
3249 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
3250 ret <2 x i64> %vecinit2
3251 ; P9BE-LABEL: fromRandMemConsll
3252 ; P9LE-LABEL: fromRandMemConsll
3253 ; P8BE-LABEL: fromRandMemConsll
3254 ; P8LE-LABEL: fromRandMemConsll
3277 ; Function Attrs: norecurse nounwind readonly
3278 define <2 x i64> @fromRandMemVarll(i64* nocapture readonly %arr, i32 signext %elem) {
3280 %add = add nsw i32 %elem, 4
3281 %idxprom = sext i32 %add to i64
3282 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom
3283 %0 = load i64, i64* %arrayidx, align 8
3284 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
3285 %add1 = add nsw i32 %elem, 1
3286 %idxprom2 = sext i32 %add1 to i64
3287 %arrayidx3 = getelementptr inbounds i64, i64* %arr, i64 %idxprom2
3288 %1 = load i64, i64* %arrayidx3, align 8
3289 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
3290 ret <2 x i64> %vecinit4
3291 ; P9BE-LABEL: fromRandMemVarll
3292 ; P9LE-LABEL: fromRandMemVarll
3293 ; P8BE-LABEL: fromRandMemVarll
3294 ; P8LE-LABEL: fromRandMemVarll
3321 ; Function Attrs: norecurse nounwind readnone
3322 define <2 x i64> @spltRegValll(i64 %val) {
3324 %splat.splatinsert = insertelement <2 x i64> undef, i64 %val, i32 0
3325 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
3326 ret <2 x i64> %splat.splat
3327 ; P9BE-LABEL: spltRegValll
3328 ; P9LE-LABEL: spltRegValll
3329 ; P8BE-LABEL: spltRegValll
3330 ; P8LE-LABEL: spltRegValll
3331 ; P9BE: mtvsrdd v2, r3, r3
3333 ; P9LE: mtvsrdd v2, r3, r3
3335 ; P8BE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3
3336 ; P8BE: xxspltd v2, {{[vsf]+}}[[REG1]], 0
3338 ; P8LE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3
3339 ; P8LE: xxspltd v2, {{[vsf]+}}[[REG1]], 0
3343 ; Function Attrs: norecurse nounwind readonly
3344 define <2 x i64> @spltMemValll(i64* nocapture readonly %ptr) {
3346 %0 = load i64, i64* %ptr, align 8
3347 %splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0
3348 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
3349 ret <2 x i64> %splat.splat
3350 ; P9BE-LABEL: spltMemValll
3351 ; P9LE-LABEL: spltMemValll
3352 ; P8BE-LABEL: spltMemValll
3353 ; P8LE-LABEL: spltMemValll
3364 ; Function Attrs: norecurse nounwind readnone
3365 define <2 x i64> @spltCnstConvftoll() {
3367 ret <2 x i64> <i64 4, i64 4>
3368 ; P9BE-LABEL: spltCnstConvftoll
3369 ; P9LE-LABEL: spltCnstConvftoll
3370 ; P8BE-LABEL: spltCnstConvftoll
3371 ; P8LE-LABEL: spltCnstConvftoll
3382 ; Function Attrs: norecurse nounwind readnone
3383 define <2 x i64> @fromRegsConvftoll(float %a, float %b) {
3385 %conv = fptosi float %a to i64
3386 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3387 %conv1 = fptosi float %b to i64
3388 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1
3389 ret <2 x i64> %vecinit2
3390 ; P9BE-LABEL: fromRegsConvftoll
3391 ; P9LE-LABEL: fromRegsConvftoll
3392 ; P8BE-LABEL: fromRegsConvftoll
3393 ; P8LE-LABEL: fromRegsConvftoll
3395 ; P9BE: xvcvdpsxds v2
3398 ; P9LE: xvcvdpsxds v2
3401 ; P8BE: xvcvdpsxds v2
3404 ; P8LE: xvcvdpsxds v2
3408 ; Function Attrs: norecurse nounwind readnone
3409 define <2 x i64> @fromDiffConstsConvftoll() {
3411 ret <2 x i64> <i64 24, i64 234>
3412 ; P9BE-LABEL: fromDiffConstsConvftoll
3413 ; P9LE-LABEL: fromDiffConstsConvftoll
3414 ; P8BE-LABEL: fromDiffConstsConvftoll
3415 ; P8LE-LABEL: fromDiffConstsConvftoll
3427 ; Function Attrs: norecurse nounwind readonly
3428 define <2 x i64> @fromDiffMemConsAConvftoll(float* nocapture readonly %ptr) {
3430 %0 = load float, float* %ptr, align 4
3431 %conv = fptosi float %0 to i64
3432 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3433 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 1
3434 %1 = load float, float* %arrayidx1, align 4
3435 %conv2 = fptosi float %1 to i64
3436 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
3437 ret <2 x i64> %vecinit3
3438 ; P9BE-LABEL: fromDiffMemConsAConvftoll
3439 ; P9LE-LABEL: fromDiffMemConsAConvftoll
3440 ; P8BE-LABEL: fromDiffMemConsAConvftoll
3441 ; P8LE-LABEL: fromDiffMemConsAConvftoll
3445 ; P9BE-NEXT: xvcvdpsxds v2
3450 ; P9LE-NEXT: xvcvdpsxds v2
3455 ; P8BE-NEXT: xvcvdpsxds v2
3460 ; P8LE-NEXT: xvcvdpsxds v2
3464 ; Function Attrs: norecurse nounwind readonly
3465 define <2 x i64> @fromDiffMemConsDConvftoll(float* nocapture readonly %ptr) {
3467 %arrayidx = getelementptr inbounds float, float* %ptr, i64 3
3468 %0 = load float, float* %arrayidx, align 4
3469 %conv = fptosi float %0 to i64
3470 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3471 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 2
3472 %1 = load float, float* %arrayidx1, align 4
3473 %conv2 = fptosi float %1 to i64
3474 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
3475 ret <2 x i64> %vecinit3
3476 ; P9BE-LABEL: fromDiffMemConsDConvftoll
3477 ; P9LE-LABEL: fromDiffMemConsDConvftoll
3478 ; P8BE-LABEL: fromDiffMemConsDConvftoll
3479 ; P8LE-LABEL: fromDiffMemConsDConvftoll
3483 ; P9BE-NEXT: xvcvdpsxds v2
3488 ; P9LE-NEXT: xvcvdpsxds v2
3493 ; P8BE-NEXT: xvcvdpsxds v2
3498 ; P8LE-NEXT: xvcvdpsxds v2
3502 ; Function Attrs: norecurse nounwind readonly
3503 define <2 x i64> @fromDiffMemVarAConvftoll(float* nocapture readonly %arr, i32 signext %elem) {
3505 %idxprom = sext i32 %elem to i64
3506 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
3507 %0 = load float, float* %arrayidx, align 4
3508 %conv = fptosi float %0 to i64
3509 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3510 %add = add nsw i32 %elem, 1
3511 %idxprom1 = sext i32 %add to i64
3512 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1
3513 %1 = load float, float* %arrayidx2, align 4
3514 %conv3 = fptosi float %1 to i64
3515 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
3516 ret <2 x i64> %vecinit4
3517 ; P9BE-LABEL: fromDiffMemVarAConvftoll
3518 ; P9LE-LABEL: fromDiffMemVarAConvftoll
3519 ; P8BE-LABEL: fromDiffMemVarAConvftoll
3520 ; P8LE-LABEL: fromDiffMemVarAConvftoll
3525 ; P9BE-NEXT: xvcvdpsxds v2
3531 ; P9LE-NEXT: xvcvdpsxds v2
3537 ; P8BE-NEXT: xvcvdpsxds v2
3543 ; P8LE-NEXT: xvcvdpsxds v2
3547 ; Function Attrs: norecurse nounwind readonly
3548 define <2 x i64> @fromDiffMemVarDConvftoll(float* nocapture readonly %arr, i32 signext %elem) {
3550 %idxprom = sext i32 %elem to i64
3551 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
3552 %0 = load float, float* %arrayidx, align 4
3553 %conv = fptosi float %0 to i64
3554 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3555 %sub = add nsw i32 %elem, -1
3556 %idxprom1 = sext i32 %sub to i64
3557 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1
3558 %1 = load float, float* %arrayidx2, align 4
3559 %conv3 = fptosi float %1 to i64
3560 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
3561 ret <2 x i64> %vecinit4
3562 ; P9BE-LABEL: fromDiffMemVarDConvftoll
3563 ; P9LE-LABEL: fromDiffMemVarDConvftoll
3564 ; P8BE-LABEL: fromDiffMemVarDConvftoll
3565 ; P8LE-LABEL: fromDiffMemVarDConvftoll
3570 ; P9BE-NEXT: xvcvdpsxds v2
3576 ; P9LE-NEXT: xvcvdpsxds v2
3582 ; P8BE-NEXT: xvcvdpsxds v2
3588 ; P8LE-NEXT: xvcvdpsxds v2
3592 ; Function Attrs: norecurse nounwind readnone
3593 define <2 x i64> @spltRegValConvftoll(float %val) {
3595 %conv = fptosi float %val to i64
3596 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
3597 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
3598 ret <2 x i64> %splat.splat
3599 ; P9BE-LABEL: spltRegValConvftoll
3600 ; P9LE-LABEL: spltRegValConvftoll
3601 ; P8BE-LABEL: spltRegValConvftoll
3602 ; P8LE-LABEL: spltRegValConvftoll
3604 ; P9BE-NEXT: xxspltd v2
3607 ; P9LE-NEXT: xxspltd v2
3610 ; P8BE-NEXT: xxspltd v2
3613 ; P8LE-NEXT: xxspltd v2
3617 ; Function Attrs: norecurse nounwind readonly
3618 define <2 x i64> @spltMemValConvftoll(float* nocapture readonly %ptr) {
3620 %0 = load float, float* %ptr, align 4
3621 %conv = fptosi float %0 to i64
3622 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
3623 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
3624 ret <2 x i64> %splat.splat
3625 ; P9BE-LABEL: spltMemValConvftoll
3626 ; P9LE-LABEL: spltMemValConvftoll
3627 ; P8BE-LABEL: spltMemValConvftoll
3628 ; P8LE-LABEL: spltMemValConvftoll
3630 ; P9BE-NEXT: xscvdpsxds
3631 ; P9BE-NEXT: xxspltd v2
3634 ; P9LE-NEXT: xscvdpsxds
3635 ; P9LE-NEXT: xxspltd v2
3638 ; P8BE-NEXT: xscvdpsxds
3639 ; P8BE-NEXT: xxspltd v2
3642 ; P8LE-NEXT: xscvdpsxds
3643 ; P8LE-NEXT: xxspltd v2
3647 ; Function Attrs: norecurse nounwind readnone
3648 define <2 x i64> @spltCnstConvdtoll() {
3650 ret <2 x i64> <i64 4, i64 4>
3651 ; P9BE-LABEL: spltCnstConvdtoll
3652 ; P9LE-LABEL: spltCnstConvdtoll
3653 ; P8BE-LABEL: spltCnstConvdtoll
3654 ; P8LE-LABEL: spltCnstConvdtoll
3665 ; Function Attrs: norecurse nounwind readnone
3666 define <2 x i64> @fromRegsConvdtoll(double %a, double %b) {
3668 %conv = fptosi double %a to i64
3669 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3670 %conv1 = fptosi double %b to i64
3671 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1
3672 ret <2 x i64> %vecinit2
3673 ; P9BE-LABEL: fromRegsConvdtoll
3674 ; P9LE-LABEL: fromRegsConvdtoll
3675 ; P8BE-LABEL: fromRegsConvdtoll
3676 ; P8LE-LABEL: fromRegsConvdtoll
3678 ; P9BE-NEXT: xvcvdpsxds
3681 ; P9LE-NEXT: xvcvdpsxds
3684 ; P8BE-NEXT: xvcvdpsxds
3687 ; P8LE-NEXT: xvcvdpsxds
3691 ; Function Attrs: norecurse nounwind readnone
3692 define <2 x i64> @fromDiffConstsConvdtoll() {
3694 ret <2 x i64> <i64 24, i64 234>
3695 ; P9BE-LABEL: fromDiffConstsConvdtoll
3696 ; P9LE-LABEL: fromDiffConstsConvdtoll
3697 ; P8BE-LABEL: fromDiffConstsConvdtoll
3698 ; P8LE-LABEL: fromDiffConstsConvdtoll
3709 ; Function Attrs: norecurse nounwind readonly
3710 define <2 x i64> @fromDiffMemConsAConvdtoll(double* nocapture readonly %ptr) {
3712 %0 = bitcast double* %ptr to <2 x double>*
3713 %1 = load <2 x double>, <2 x double>* %0, align 8
3714 %2 = fptosi <2 x double> %1 to <2 x i64>
3716 ; P9BE-LABEL: fromDiffMemConsAConvdtoll
3717 ; P9LE-LABEL: fromDiffMemConsAConvdtoll
3718 ; P8BE-LABEL: fromDiffMemConsAConvdtoll
3719 ; P8LE-LABEL: fromDiffMemConsAConvdtoll
3721 ; P9BE-NEXT: xvcvdpsxds v2
3724 ; P9LE-NEXT: xvcvdpsxds v2
3727 ; P8BE-NEXT: xvcvdpsxds v2
3731 ; P8LE-NEXT: xvcvdpsxds v2
3735 ; Function Attrs: norecurse nounwind readonly
3736 define <2 x i64> @fromDiffMemConsDConvdtoll(double* nocapture readonly %ptr) {
3738 %arrayidx = getelementptr inbounds double, double* %ptr, i64 3
3739 %0 = load double, double* %arrayidx, align 8
3740 %conv = fptosi double %0 to i64
3741 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3742 %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 2
3743 %1 = load double, double* %arrayidx1, align 8
3744 %conv2 = fptosi double %1 to i64
3745 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
3746 ret <2 x i64> %vecinit3
3747 ; P9BE-LABEL: fromDiffMemConsDConvdtoll
3748 ; P9LE-LABEL: fromDiffMemConsDConvdtoll
3749 ; P8BE-LABEL: fromDiffMemConsDConvdtoll
3750 ; P8LE-LABEL: fromDiffMemConsDConvdtoll
3752 ; P9BE-NEXT: xxswapd
3753 ; P9BE-NEXT: xvcvdpsxds v2
3756 ; P9LE-NEXT: xxswapd
3757 ; P9LE-NEXT: xvcvdpsxds v2
3760 ; P8BE-NEXT: xxswapd
3761 ; P8BE-NEXT: xvcvdpsxds v2
3764 ; P8LE-NEXT: xvcvdpsxds v2
3768 ; Function Attrs: norecurse nounwind readonly
3769 define <2 x i64> @fromDiffMemVarAConvdtoll(double* nocapture readonly %arr, i32 signext %elem) {
3771 %idxprom = sext i32 %elem to i64
3772 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
3773 %0 = load double, double* %arrayidx, align 8
3774 %conv = fptosi double %0 to i64
3775 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3776 %add = add nsw i32 %elem, 1
3777 %idxprom1 = sext i32 %add to i64
3778 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1
3779 %1 = load double, double* %arrayidx2, align 8
3780 %conv3 = fptosi double %1 to i64
3781 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
3782 ret <2 x i64> %vecinit4
3783 ; P9BE-LABEL: fromDiffMemVarAConvdtoll
3784 ; P9LE-LABEL: fromDiffMemVarAConvdtoll
3785 ; P8BE-LABEL: fromDiffMemVarAConvdtoll
3786 ; P8LE-LABEL: fromDiffMemVarAConvdtoll
3789 ; P9BE-NEXT: xvcvdpsxds v2
3793 ; P9LE-NEXT: xvcvdpsxds v2
3797 ; P8BE-NEXT: xvcvdpsxds v2
3801 ; P8LE-NEXT: xxswapd
3802 ; P8LE-NEXT: xvcvdpsxds v2
3806 ; Function Attrs: norecurse nounwind readonly
3807 define <2 x i64> @fromDiffMemVarDConvdtoll(double* nocapture readonly %arr, i32 signext %elem) {
3809 %idxprom = sext i32 %elem to i64
3810 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
3811 %0 = load double, double* %arrayidx, align 8
3812 %conv = fptosi double %0 to i64
3813 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
3814 %sub = add nsw i32 %elem, -1
3815 %idxprom1 = sext i32 %sub to i64
3816 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1
3817 %1 = load double, double* %arrayidx2, align 8
3818 %conv3 = fptosi double %1 to i64
3819 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
3820 ret <2 x i64> %vecinit4
3821 ; P9BE-LABEL: fromDiffMemVarDConvdtoll
3822 ; P9LE-LABEL: fromDiffMemVarDConvdtoll
3823 ; P8BE-LABEL: fromDiffMemVarDConvdtoll
3824 ; P8LE-LABEL: fromDiffMemVarDConvdtoll
3827 ; P9BE-NEXT: xxswapd
3828 ; P9BE-NEXT: xvcvdpsxds v2
3832 ; P9LE-NEXT: xxswapd
3833 ; P9LE-NEXT: xvcvdpsxds v2
3837 ; P8BE-NEXT: xxswapd
3838 ; P8BE-NEXT: xvcvdpsxds v2
3842 ; P8LE-NEXT: xvcvdpsxds v2
3846 ; Function Attrs: norecurse nounwind readnone
3847 define <2 x i64> @spltRegValConvdtoll(double %val) {
3849 %conv = fptosi double %val to i64
3850 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
3851 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
3852 ret <2 x i64> %splat.splat
3853 ; P9BE-LABEL: spltRegValConvdtoll
3854 ; P9LE-LABEL: spltRegValConvdtoll
3855 ; P8BE-LABEL: spltRegValConvdtoll
3856 ; P8LE-LABEL: spltRegValConvdtoll
3858 ; P9BE-NEXT: xxspltd v2
3861 ; P9LE-NEXT: xxspltd v2
3864 ; P8BE-NEXT: xxspltd v2
3867 ; P8LE-NEXT: xxspltd v2
3871 ; Function Attrs: norecurse nounwind readonly
3872 define <2 x i64> @spltMemValConvdtoll(double* nocapture readonly %ptr) {
3874 %0 = load double, double* %ptr, align 8
3875 %conv = fptosi double %0 to i64
3876 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
3877 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
3878 ret <2 x i64> %splat.splat
3879 ; P9BE-LABEL: spltMemValConvdtoll
3880 ; P9LE-LABEL: spltMemValConvdtoll
3881 ; P8BE-LABEL: spltMemValConvdtoll
3882 ; P8LE-LABEL: spltMemValConvdtoll
3884 ; P9BE-NEXT: xvcvdpsxds
3887 ; P9LE-NEXT: xvcvdpsxds
3890 ; P8BE-NEXT: xvcvdpsxds
3893 ; P8LE-NEXT: xvcvdpsxds
3897 ; Function Attrs: norecurse nounwind readnone
3898 define <2 x i64> @allZeroull() {
3900 ret <2 x i64> zeroinitializer
3901 ; P9BE-LABEL: allZeroull
3902 ; P9LE-LABEL: allZeroull
3903 ; P8BE-LABEL: allZeroull
3904 ; P8LE-LABEL: allZeroull
3905 ; P9BE: xxlxor v2, v2, v2
3907 ; P9LE: xxlxor v2, v2, v2
3909 ; P8BE: xxlxor v2, v2, v2
3911 ; P8LE: xxlxor v2, v2, v2
3915 ; Function Attrs: norecurse nounwind readnone
3916 define <2 x i64> @allOneull() {
3918 ret <2 x i64> <i64 -1, i64 -1>
3919 ; P9BE-LABEL: allOneull
3920 ; P9LE-LABEL: allOneull
3921 ; P8BE-LABEL: allOneull
3922 ; P8LE-LABEL: allOneull
3923 ; P9BE: xxspltib v2, 255
3925 ; P9LE: xxspltib v2, 255
3927 ; P8BE: vspltisb v2, -1
3929 ; P8LE: vspltisb v2, -1
3933 ; Function Attrs: norecurse nounwind readnone
3934 define <2 x i64> @spltConst1ull() {
3936 ret <2 x i64> <i64 1, i64 1>
3937 ; P9BE-LABEL: spltConst1ull
3938 ; P9LE-LABEL: spltConst1ull
3939 ; P8BE-LABEL: spltConst1ull
3940 ; P8LE-LABEL: spltConst1ull
3951 ; Function Attrs: norecurse nounwind readnone
3952 define <2 x i64> @spltConst16kull() {
3954 ret <2 x i64> <i64 32767, i64 32767>
3955 ; P9BE-LABEL: spltConst16kull
3956 ; P9LE-LABEL: spltConst16kull
3957 ; P8BE-LABEL: spltConst16kull
3958 ; P8LE-LABEL: spltConst16kull
3969 ; Function Attrs: norecurse nounwind readnone
3970 define <2 x i64> @spltConst32kull() {
3972 ret <2 x i64> <i64 65535, i64 65535>
3973 ; P9BE-LABEL: spltConst32kull
3974 ; P9LE-LABEL: spltConst32kull
3975 ; P8BE-LABEL: spltConst32kull
3976 ; P8LE-LABEL: spltConst32kull
3987 ; Function Attrs: norecurse nounwind readnone
3988 define <2 x i64> @fromRegsull(i64 %a, i64 %b) {
3990 %vecinit = insertelement <2 x i64> undef, i64 %a, i32 0
3991 %vecinit1 = insertelement <2 x i64> %vecinit, i64 %b, i32 1
3992 ret <2 x i64> %vecinit1
3993 ; P9BE-LABEL: fromRegsull
3994 ; P9LE-LABEL: fromRegsull
3995 ; P8BE-LABEL: fromRegsull
3996 ; P8LE-LABEL: fromRegsull
3997 ; P9BE: mtvsrdd v2, r3, r4
3999 ; P9LE: mtvsrdd v2, r4, r3
4001 ; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r3
4002 ; P8BE-DAG: mtvsrd {{[vsf0-9]+}}, r4
4005 ; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r3
4006 ; P8LE-DAG: mtvsrd {{[vsf0-9]+}}, r4
4011 ; Function Attrs: norecurse nounwind readnone
4012 define <2 x i64> @fromDiffConstsull() {
4014 ret <2 x i64> <i64 242, i64 -113>
4015 ; P9BE-LABEL: fromDiffConstsull
4016 ; P9LE-LABEL: fromDiffConstsull
4017 ; P8BE-LABEL: fromDiffConstsull
4018 ; P8LE-LABEL: fromDiffConstsull
4029 ; Function Attrs: norecurse nounwind readonly
4030 define <2 x i64> @fromDiffMemConsAull(i64* nocapture readonly %arr) {
4032 %0 = load i64, i64* %arr, align 8
4033 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
4034 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 1
4035 %1 = load i64, i64* %arrayidx1, align 8
4036 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
4037 ret <2 x i64> %vecinit2
4038 ; P9BE-LABEL: fromDiffMemConsAull
4039 ; P9LE-LABEL: fromDiffMemConsAull
4040 ; P8BE-LABEL: fromDiffMemConsAull
4041 ; P8LE-LABEL: fromDiffMemConsAull
4053 ; Function Attrs: norecurse nounwind readonly
4054 define <2 x i64> @fromDiffMemConsDull(i64* nocapture readonly %arr) {
4056 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 3
4057 %0 = load i64, i64* %arrayidx, align 8
4058 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
4059 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 2
4060 %1 = load i64, i64* %arrayidx1, align 8
4061 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
4062 ret <2 x i64> %vecinit2
4063 ; P9BE-LABEL: fromDiffMemConsDull
4064 ; P9LE-LABEL: fromDiffMemConsDull
4065 ; P8BE-LABEL: fromDiffMemConsDull
4066 ; P8LE-LABEL: fromDiffMemConsDull
4079 ; Function Attrs: norecurse nounwind readonly
4080 define <2 x i64> @fromDiffMemVarAull(i64* nocapture readonly %arr, i32 signext %elem) {
4082 %idxprom = sext i32 %elem to i64
4083 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom
4084 %0 = load i64, i64* %arrayidx, align 8
4085 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
4086 %add = add nsw i32 %elem, 1
4087 %idxprom1 = sext i32 %add to i64
4088 %arrayidx2 = getelementptr inbounds i64, i64* %arr, i64 %idxprom1
4089 %1 = load i64, i64* %arrayidx2, align 8
4090 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
4091 ret <2 x i64> %vecinit3
4092 ; P9BE-LABEL: fromDiffMemVarAull
4093 ; P9LE-LABEL: fromDiffMemVarAull
4094 ; P8BE-LABEL: fromDiffMemVarAull
4095 ; P8LE-LABEL: fromDiffMemVarAull
4111 ; Function Attrs: norecurse nounwind readonly
4112 define <2 x i64> @fromDiffMemVarDull(i64* nocapture readonly %arr, i32 signext %elem) {
4114 %idxprom = sext i32 %elem to i64
4115 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom
4116 %0 = load i64, i64* %arrayidx, align 8
4117 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
4118 %sub = add nsw i32 %elem, -1
4119 %idxprom1 = sext i32 %sub to i64
4120 %arrayidx2 = getelementptr inbounds i64, i64* %arr, i64 %idxprom1
4121 %1 = load i64, i64* %arrayidx2, align 8
4122 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
4123 ret <2 x i64> %vecinit3
4124 ; P9BE-LABEL: fromDiffMemVarDull
4125 ; P9LE-LABEL: fromDiffMemVarDull
4126 ; P8BE-LABEL: fromDiffMemVarDull
4127 ; P8LE-LABEL: fromDiffMemVarDull
4145 ; Function Attrs: norecurse nounwind readonly
4146 define <2 x i64> @fromRandMemConsull(i64* nocapture readonly %arr) {
4148 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 4
4149 %0 = load i64, i64* %arrayidx, align 8
4150 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
4151 %arrayidx1 = getelementptr inbounds i64, i64* %arr, i64 18
4152 %1 = load i64, i64* %arrayidx1, align 8
4153 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
4154 ret <2 x i64> %vecinit2
4155 ; P9BE-LABEL: fromRandMemConsull
4156 ; P9LE-LABEL: fromRandMemConsull
4157 ; P8BE-LABEL: fromRandMemConsull
4158 ; P8LE-LABEL: fromRandMemConsull
4181 ; Function Attrs: norecurse nounwind readonly
4182 define <2 x i64> @fromRandMemVarull(i64* nocapture readonly %arr, i32 signext %elem) {
4184 %add = add nsw i32 %elem, 4
4185 %idxprom = sext i32 %add to i64
4186 %arrayidx = getelementptr inbounds i64, i64* %arr, i64 %idxprom
4187 %0 = load i64, i64* %arrayidx, align 8
4188 %vecinit = insertelement <2 x i64> undef, i64 %0, i32 0
4189 %add1 = add nsw i32 %elem, 1
4190 %idxprom2 = sext i32 %add1 to i64
4191 %arrayidx3 = getelementptr inbounds i64, i64* %arr, i64 %idxprom2
4192 %1 = load i64, i64* %arrayidx3, align 8
4193 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %1, i32 1
4194 ret <2 x i64> %vecinit4
4195 ; P9BE-LABEL: fromRandMemVarull
4196 ; P9LE-LABEL: fromRandMemVarull
4197 ; P8BE-LABEL: fromRandMemVarull
4198 ; P8LE-LABEL: fromRandMemVarull
4225 ; Function Attrs: norecurse nounwind readnone
4226 define <2 x i64> @spltRegValull(i64 %val) {
4228 %splat.splatinsert = insertelement <2 x i64> undef, i64 %val, i32 0
4229 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
4230 ret <2 x i64> %splat.splat
4231 ; P9BE-LABEL: spltRegValull
4232 ; P9LE-LABEL: spltRegValull
4233 ; P8BE-LABEL: spltRegValull
4234 ; P8LE-LABEL: spltRegValull
4235 ; P9BE: mtvsrdd v2, r3, r3
4237 ; P9LE: mtvsrdd v2, r3, r3
4239 ; P8BE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3
4240 ; P8BE: xxspltd v2, {{[vsf]+}}[[REG1]], 0
4242 ; P8LE: mtvsrd {{[vsf]+}}[[REG1:[0-9]+]], r3
4243 ; P8LE: xxspltd v2, {{[vsf]+}}[[REG1]], 0
4247 ; Function Attrs: norecurse nounwind readonly
4248 define <2 x i64> @spltMemValull(i64* nocapture readonly %ptr) {
4250 %0 = load i64, i64* %ptr, align 8
4251 %splat.splatinsert = insertelement <2 x i64> undef, i64 %0, i32 0
4252 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
4253 ret <2 x i64> %splat.splat
4254 ; P9BE-LABEL: spltMemValull
4255 ; P9LE-LABEL: spltMemValull
4256 ; P8BE-LABEL: spltMemValull
4257 ; P8LE-LABEL: spltMemValull
4268 ; Function Attrs: norecurse nounwind readnone
4269 define <2 x i64> @spltCnstConvftoull() {
4271 ret <2 x i64> <i64 4, i64 4>
4272 ; P9BE-LABEL: spltCnstConvftoull
4273 ; P9LE-LABEL: spltCnstConvftoull
4274 ; P8BE-LABEL: spltCnstConvftoull
4275 ; P8LE-LABEL: spltCnstConvftoull
4286 ; Function Attrs: norecurse nounwind readnone
4287 define <2 x i64> @fromRegsConvftoull(float %a, float %b) {
4289 %conv = fptoui float %a to i64
4290 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4291 %conv1 = fptoui float %b to i64
4292 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1
4293 ret <2 x i64> %vecinit2
4294 ; P9BE-LABEL: fromRegsConvftoull
4295 ; P9LE-LABEL: fromRegsConvftoull
4296 ; P8BE-LABEL: fromRegsConvftoull
4297 ; P8LE-LABEL: fromRegsConvftoull
4299 ; P9BE: xvcvdpuxds v2
4302 ; P9LE: xvcvdpuxds v2
4305 ; P8BE: xvcvdpuxds v2
4308 ; P8LE: xvcvdpuxds v2
4312 ; Function Attrs: norecurse nounwind readnone
4313 define <2 x i64> @fromDiffConstsConvftoull() {
4315 ret <2 x i64> <i64 24, i64 234>
4316 ; P9BE-LABEL: fromDiffConstsConvftoull
4317 ; P9LE-LABEL: fromDiffConstsConvftoull
4318 ; P8BE-LABEL: fromDiffConstsConvftoull
4319 ; P8LE-LABEL: fromDiffConstsConvftoull
4331 ; Function Attrs: norecurse nounwind readonly
4332 define <2 x i64> @fromDiffMemConsAConvftoull(float* nocapture readonly %ptr) {
4334 %0 = load float, float* %ptr, align 4
4335 %conv = fptoui float %0 to i64
4336 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4337 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 1
4338 %1 = load float, float* %arrayidx1, align 4
4339 %conv2 = fptoui float %1 to i64
4340 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
4341 ret <2 x i64> %vecinit3
4342 ; P9BE-LABEL: fromDiffMemConsAConvftoull
4343 ; P9LE-LABEL: fromDiffMemConsAConvftoull
4344 ; P8BE-LABEL: fromDiffMemConsAConvftoull
4345 ; P8LE-LABEL: fromDiffMemConsAConvftoull
4349 ; P9BE-NEXT: xvcvdpuxds v2
4354 ; P9LE-NEXT: xvcvdpuxds v2
4359 ; P8BE-NEXT: xvcvdpuxds v2
4364 ; P8LE-NEXT: xvcvdpuxds v2
4368 ; Function Attrs: norecurse nounwind readonly
4369 define <2 x i64> @fromDiffMemConsDConvftoull(float* nocapture readonly %ptr) {
4371 %arrayidx = getelementptr inbounds float, float* %ptr, i64 3
4372 %0 = load float, float* %arrayidx, align 4
4373 %conv = fptoui float %0 to i64
4374 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4375 %arrayidx1 = getelementptr inbounds float, float* %ptr, i64 2
4376 %1 = load float, float* %arrayidx1, align 4
4377 %conv2 = fptoui float %1 to i64
4378 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
4379 ret <2 x i64> %vecinit3
4380 ; P9BE-LABEL: fromDiffMemConsDConvftoull
4381 ; P9LE-LABEL: fromDiffMemConsDConvftoull
4382 ; P8BE-LABEL: fromDiffMemConsDConvftoull
4383 ; P8LE-LABEL: fromDiffMemConsDConvftoull
4387 ; P9BE-NEXT: xvcvdpuxds v2
4392 ; P9LE-NEXT: xvcvdpuxds v2
4397 ; P8BE-NEXT: xvcvdpuxds v2
4402 ; P8LE-NEXT: xvcvdpuxds v2
4406 ; Function Attrs: norecurse nounwind readonly
4407 define <2 x i64> @fromDiffMemVarAConvftoull(float* nocapture readonly %arr, i32 signext %elem) {
4409 %idxprom = sext i32 %elem to i64
4410 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
4411 %0 = load float, float* %arrayidx, align 4
4412 %conv = fptoui float %0 to i64
4413 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4414 %add = add nsw i32 %elem, 1
4415 %idxprom1 = sext i32 %add to i64
4416 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1
4417 %1 = load float, float* %arrayidx2, align 4
4418 %conv3 = fptoui float %1 to i64
4419 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
4420 ret <2 x i64> %vecinit4
4421 ; P9BE-LABEL: fromDiffMemVarAConvftoull
4422 ; P9LE-LABEL: fromDiffMemVarAConvftoull
4423 ; P8BE-LABEL: fromDiffMemVarAConvftoull
4424 ; P8LE-LABEL: fromDiffMemVarAConvftoull
4429 ; P9BE-NEXT: xvcvdpuxds v2
4435 ; P9LE-NEXT: xvcvdpuxds v2
4441 ; P8BE-NEXT: xvcvdpuxds v2
4447 ; P8LE-NEXT: xvcvdpuxds v2
4451 ; Function Attrs: norecurse nounwind readonly
4452 define <2 x i64> @fromDiffMemVarDConvftoull(float* nocapture readonly %arr, i32 signext %elem) {
4454 %idxprom = sext i32 %elem to i64
4455 %arrayidx = getelementptr inbounds float, float* %arr, i64 %idxprom
4456 %0 = load float, float* %arrayidx, align 4
4457 %conv = fptoui float %0 to i64
4458 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4459 %sub = add nsw i32 %elem, -1
4460 %idxprom1 = sext i32 %sub to i64
4461 %arrayidx2 = getelementptr inbounds float, float* %arr, i64 %idxprom1
4462 %1 = load float, float* %arrayidx2, align 4
4463 %conv3 = fptoui float %1 to i64
4464 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
4465 ret <2 x i64> %vecinit4
4466 ; P9BE-LABEL: fromDiffMemVarDConvftoull
4467 ; P9LE-LABEL: fromDiffMemVarDConvftoull
4468 ; P8BE-LABEL: fromDiffMemVarDConvftoull
4469 ; P8LE-LABEL: fromDiffMemVarDConvftoull
4474 ; P9BE-NEXT: xvcvdpuxds v2
4480 ; P9LE-NEXT: xvcvdpuxds v2
4486 ; P8BE-NEXT: xvcvdpuxds v2
4492 ; P8LE-NEXT: xvcvdpuxds v2
4496 ; Function Attrs: norecurse nounwind readnone
4497 define <2 x i64> @spltRegValConvftoull(float %val) {
4499 %conv = fptoui float %val to i64
4500 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
4501 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
4502 ret <2 x i64> %splat.splat
4503 ; P9BE-LABEL: spltRegValConvftoull
4504 ; P9LE-LABEL: spltRegValConvftoull
4505 ; P8BE-LABEL: spltRegValConvftoull
4506 ; P8LE-LABEL: spltRegValConvftoull
4508 ; P9BE-NEXT: xxspltd v2
4511 ; P9LE-NEXT: xxspltd v2
4514 ; P8BE-NEXT: xxspltd v2
4517 ; P8LE-NEXT: xxspltd v2
4521 ; Function Attrs: norecurse nounwind readonly
4522 define <2 x i64> @spltMemValConvftoull(float* nocapture readonly %ptr) {
4524 %0 = load float, float* %ptr, align 4
4525 %conv = fptoui float %0 to i64
4526 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
4527 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
4528 ret <2 x i64> %splat.splat
4529 ; P9BE-LABEL: spltMemValConvftoull
4530 ; P9LE-LABEL: spltMemValConvftoull
4531 ; P8BE-LABEL: spltMemValConvftoull
4532 ; P8LE-LABEL: spltMemValConvftoull
4534 ; P9BE-NEXT: xscvdpuxds
4535 ; P9BE-NEXT: xxspltd v2
4538 ; P9LE-NEXT: xscvdpuxds
4539 ; P9LE-NEXT: xxspltd v2
4542 ; P8BE-NEXT: xscvdpuxds
4543 ; P8BE-NEXT: xxspltd v2
4546 ; P8LE-NEXT: xscvdpuxds
4547 ; P8LE-NEXT: xxspltd v2
4551 ; Function Attrs: norecurse nounwind readnone
4552 define <2 x i64> @spltCnstConvdtoull() {
4554 ret <2 x i64> <i64 4, i64 4>
4555 ; P9BE-LABEL: spltCnstConvdtoull
4556 ; P9LE-LABEL: spltCnstConvdtoull
4557 ; P8BE-LABEL: spltCnstConvdtoull
4558 ; P8LE-LABEL: spltCnstConvdtoull
4569 ; Function Attrs: norecurse nounwind readnone
4570 define <2 x i64> @fromRegsConvdtoull(double %a, double %b) {
4572 %conv = fptoui double %a to i64
4573 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4574 %conv1 = fptoui double %b to i64
4575 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %conv1, i32 1
4576 ret <2 x i64> %vecinit2
4577 ; P9BE-LABEL: fromRegsConvdtoull
4578 ; P9LE-LABEL: fromRegsConvdtoull
4579 ; P8BE-LABEL: fromRegsConvdtoull
4580 ; P8LE-LABEL: fromRegsConvdtoull
4582 ; P9BE-NEXT: xvcvdpuxds
4585 ; P9LE-NEXT: xvcvdpuxds
4588 ; P8BE-NEXT: xvcvdpuxds
4591 ; P8LE-NEXT: xvcvdpuxds
4595 ; Function Attrs: norecurse nounwind readnone
4596 define <2 x i64> @fromDiffConstsConvdtoull() {
4598 ret <2 x i64> <i64 24, i64 234>
4599 ; P9BE-LABEL: fromDiffConstsConvdtoull
4600 ; P9LE-LABEL: fromDiffConstsConvdtoull
4601 ; P8BE-LABEL: fromDiffConstsConvdtoull
4602 ; P8LE-LABEL: fromDiffConstsConvdtoull
4613 ; Function Attrs: norecurse nounwind readonly
4614 define <2 x i64> @fromDiffMemConsAConvdtoull(double* nocapture readonly %ptr) {
4616 %0 = bitcast double* %ptr to <2 x double>*
4617 %1 = load <2 x double>, <2 x double>* %0, align 8
4618 %2 = fptoui <2 x double> %1 to <2 x i64>
4620 ; P9BE-LABEL: fromDiffMemConsAConvdtoull
4621 ; P9LE-LABEL: fromDiffMemConsAConvdtoull
4622 ; P8BE-LABEL: fromDiffMemConsAConvdtoull
4623 ; P8LE-LABEL: fromDiffMemConsAConvdtoull
4625 ; P9BE-NEXT: xvcvdpuxds v2
4628 ; P9LE-NEXT: xvcvdpuxds v2
4631 ; P8BE-NEXT: xvcvdpuxds v2
4635 ; P8LE-NEXT: xvcvdpuxds v2
4639 ; Function Attrs: norecurse nounwind readonly
4640 define <2 x i64> @fromDiffMemConsDConvdtoull(double* nocapture readonly %ptr) {
4642 %arrayidx = getelementptr inbounds double, double* %ptr, i64 3
4643 %0 = load double, double* %arrayidx, align 8
4644 %conv = fptoui double %0 to i64
4645 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4646 %arrayidx1 = getelementptr inbounds double, double* %ptr, i64 2
4647 %1 = load double, double* %arrayidx1, align 8
4648 %conv2 = fptoui double %1 to i64
4649 %vecinit3 = insertelement <2 x i64> %vecinit, i64 %conv2, i32 1
4650 ret <2 x i64> %vecinit3
4651 ; P9BE-LABEL: fromDiffMemConsDConvdtoull
4652 ; P9LE-LABEL: fromDiffMemConsDConvdtoull
4653 ; P8BE-LABEL: fromDiffMemConsDConvdtoull
4654 ; P8LE-LABEL: fromDiffMemConsDConvdtoull
4656 ; P9BE-NEXT: xxswapd
4657 ; P9BE-NEXT: xvcvdpuxds v2
4660 ; P9LE-NEXT: xxswapd
4661 ; P9LE-NEXT: xvcvdpuxds v2
4664 ; P8BE-NEXT: xxswapd
4665 ; P8BE-NEXT: xvcvdpuxds v2
4668 ; P8LE-NEXT: xvcvdpuxds v2
4672 ; Function Attrs: norecurse nounwind readonly
4673 define <2 x i64> @fromDiffMemVarAConvdtoull(double* nocapture readonly %arr, i32 signext %elem) {
4675 %idxprom = sext i32 %elem to i64
4676 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
4677 %0 = load double, double* %arrayidx, align 8
4678 %conv = fptoui double %0 to i64
4679 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4680 %add = add nsw i32 %elem, 1
4681 %idxprom1 = sext i32 %add to i64
4682 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1
4683 %1 = load double, double* %arrayidx2, align 8
4684 %conv3 = fptoui double %1 to i64
4685 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
4686 ret <2 x i64> %vecinit4
4687 ; P9BE-LABEL: fromDiffMemVarAConvdtoull
4688 ; P9LE-LABEL: fromDiffMemVarAConvdtoull
4689 ; P8BE-LABEL: fromDiffMemVarAConvdtoull
4690 ; P8LE-LABEL: fromDiffMemVarAConvdtoull
4693 ; P9BE-NEXT: xvcvdpuxds v2
4697 ; P9LE-NEXT: xvcvdpuxds v2
4701 ; P8BE-NEXT: xvcvdpuxds v2
4705 ; P8LE-NEXT: xxswapd
4706 ; P8LE-NEXT: xvcvdpuxds v2
4710 ; Function Attrs: norecurse nounwind readonly
4711 define <2 x i64> @fromDiffMemVarDConvdtoull(double* nocapture readonly %arr, i32 signext %elem) {
4713 %idxprom = sext i32 %elem to i64
4714 %arrayidx = getelementptr inbounds double, double* %arr, i64 %idxprom
4715 %0 = load double, double* %arrayidx, align 8
4716 %conv = fptoui double %0 to i64
4717 %vecinit = insertelement <2 x i64> undef, i64 %conv, i32 0
4718 %sub = add nsw i32 %elem, -1
4719 %idxprom1 = sext i32 %sub to i64
4720 %arrayidx2 = getelementptr inbounds double, double* %arr, i64 %idxprom1
4721 %1 = load double, double* %arrayidx2, align 8
4722 %conv3 = fptoui double %1 to i64
4723 %vecinit4 = insertelement <2 x i64> %vecinit, i64 %conv3, i32 1
4724 ret <2 x i64> %vecinit4
4725 ; P9BE-LABEL: fromDiffMemVarDConvdtoull
4726 ; P9LE-LABEL: fromDiffMemVarDConvdtoull
4727 ; P8BE-LABEL: fromDiffMemVarDConvdtoull
4728 ; P8LE-LABEL: fromDiffMemVarDConvdtoull
4731 ; P9BE-NEXT: xxswapd
4732 ; P9BE-NEXT: xvcvdpuxds v2
4736 ; P9LE-NEXT: xxswapd
4737 ; P9LE-NEXT: xvcvdpuxds v2
4741 ; P8BE-NEXT: xxswapd
4742 ; P8BE-NEXT: xvcvdpuxds v2
4746 ; P8LE-NEXT: xvcvdpuxds v2
4750 ; Function Attrs: norecurse nounwind readnone
4751 define <2 x i64> @spltRegValConvdtoull(double %val) {
4753 %conv = fptoui double %val to i64
4754 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
4755 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
4756 ret <2 x i64> %splat.splat
4757 ; P9BE-LABEL: spltRegValConvdtoull
4758 ; P9LE-LABEL: spltRegValConvdtoull
4759 ; P8BE-LABEL: spltRegValConvdtoull
4760 ; P8LE-LABEL: spltRegValConvdtoull
4762 ; P9BE-NEXT: xxspltd v2
4765 ; P9LE-NEXT: xxspltd v2
4768 ; P8BE-NEXT: xxspltd v2
4771 ; P8LE-NEXT: xxspltd v2
4775 ; Function Attrs: norecurse nounwind readonly
4776 define <2 x i64> @spltMemValConvdtoull(double* nocapture readonly %ptr) {
4778 %0 = load double, double* %ptr, align 8
4779 %conv = fptoui double %0 to i64
4780 %splat.splatinsert = insertelement <2 x i64> undef, i64 %conv, i32 0
4781 %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
4782 ret <2 x i64> %splat.splat
4783 ; P9BE-LABEL: spltMemValConvdtoull
4784 ; P9LE-LABEL: spltMemValConvdtoull
4785 ; P8BE-LABEL: spltMemValConvdtoull
4786 ; P8LE-LABEL: spltMemValConvdtoull
4788 ; P9BE-NEXT: xvcvdpuxds
4791 ; P9LE-NEXT: xvcvdpuxds
4794 ; P8BE-NEXT: xvcvdpuxds
4797 ; P8LE-NEXT: xvcvdpuxds