1 ; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
2 ; RUN: -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE \
3 ; RUN: --implicit-check-not xxswapd
5 ; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
6 ; RUN: -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE
8 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \
9 ; RUN: -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE
11 ; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
12 ; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
14 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \
15 ; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
17 ; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
18 ; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX \
19 ; RUN: --implicit-check-not xxswapd
21 ; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
22 ; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX
24 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \
25 ; RUN: -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX
27 ; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
28 ; RUN: -mcpu=pwr8 -mattr=-vsx < %s | \
29 ; RUN: FileCheck %s -check-prefix=CHECK-LE-NOVSX --implicit-check-not xxswapd
31 ; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
32 ; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
33 ; RUN: FileCheck %s -check-prefix=CHECK-P9 --implicit-check-not xxswapd
35 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \
36 ; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s -check-prefix=CHECK-P9
38 ; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
39 ; RUN: -mcpu=pwr9 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX \
40 ; RUN: --implicit-check-not xxswapd
42 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff \
43 ; RUN: -mcpu=pwr9 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX
45 ; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
46 ; RUN: -mcpu=pwr9 -mattr=-power9-vector -mattr=-direct-move < %s | \
47 ; RUN: FileCheck %s -check-prefix=CHECK-LE --implicit-check-not xxswapd
49 @x = common global <1 x i128> zeroinitializer, align 16
50 @y = common global <1 x i128> zeroinitializer, align 16
51 @a = common global i128 zeroinitializer, align 16
52 @b = common global i128 zeroinitializer, align 16
55 ; %a is passed in register 34
56 ; The value of 1 is stored in the TOC.
57 ; On LE, ensure the value of 1 is swapped before being used (using xxswapd).
59 ; %a is passed in register 2
60 ; The value of 1 is stored in the TOC.
61 ; No swaps are necessary when using P8 Vector instructions on LE
62 define <1 x i128> @v1i128_increment_by_one(<1 x i128> %a) nounwind {
63 %tmp = add <1 x i128> %a, <i128 1>
66 ; FIXME: Seems a 128-bit literal is materialized by loading from the TOC. There
67 ; should be a better way of doing this.
69 ; CHECK-LE-LABEL: @v1i128_increment_by_one
70 ; CHECK-LE: lxvd2x [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
71 ; CHECK-LE: xxswapd 35, [[VAL]]
72 ; CHECK-LE: vadduqm 2, 2, 3
75 ; CHECK-P9-LABEL: @v1i128_increment_by_one
76 ; The below FIXME is due to the lowering for BUILD_VECTOR that will be fixed
77 ; in a subsequent patch.
78 ; FIXME: li [[R1:r[0-9]+]], 1
79 ; FIXME: li [[R2:r[0-9]+]], 0
80 ; FIXME: mtvsrdd [[V1:v[0-9]+]], [[R2]], [[R1]]
81 ; CHECK-P9: lxv [[V1:v[0-9]+]]
82 ; CHECK-P9: vadduqm v2, v2, [[V1]]
85 ; CHECK-BE-LABEL: @v1i128_increment_by_one
86 ; CHECK-BE: lxvd2x 35, {{[0-9]+}}, {{[0-9]+}}
87 ; CHECK-BE-NOT: xxswapd
88 ; CHECK-BE: vadduqm 2, 2, 3
89 ; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
92 ; CHECK-NOVSX-LABEL: @v1i128_increment_by_one
93 ; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
94 ; CHECK-NOVSX-NOT: stxvd2x {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
95 ; CHECK-NOVSX: lvx [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
96 ; CHECK-NOVSX-NOT: lxvd2x {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
97 ; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
98 ; CHECK-NOVSX: vadduqm 2, 2, [[VAL]]
103 ; %a is passed in register 34
104 ; %b is passed in register 35
105 ; No swaps are necessary when using P8 Vector instructions on LE
107 ; %a is passewd in register 2
108 ; %b is passed in register 3
109 ; On LE, do not need to swap contents of 2 and 3 because the lvx/stvx
110 ; instructions no not swap elements
111 define <1 x i128> @v1i128_increment_by_val(<1 x i128> %a, <1 x i128> %b) nounwind {
112 %tmp = add <1 x i128> %a, %b
115 ; CHECK-LE-LABEL: @v1i128_increment_by_val
116 ; CHECK-LE-NOT: xxswapd
117 ; CHECK-LE: adduqm 2, 2, 3
120 ; CHECK-BE-LABEL: @v1i128_increment_by_val
121 ; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 34
122 ; CHECK-BE-NOT: xxswapd {{[0-9]+}}, 35
123 ; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
124 ; CHECK-BE: adduqm 2, 2, 3
127 ; CHECK-NOVSX-LABEL: @v1i128_increment_by_val
128 ; CHECK-NOVSX-NOT: xxswapd 34, {{[0-9]+}}
129 ; CHECK-NOVSX: adduqm 2, 2, 3
133 ; Little Endian (VSX and VMX):
134 ; Lower 64-bits of %a are passed in register 3
135 ; Upper 64-bits of %a are passed in register 4
136 ; Increment lower 64-bits using addic (immediate value of 1)
137 ; Increment upper 64-bits using add zero extended
138 ; Results are placed in registers 3 and 4
139 ; Big Endian (VSX and VMX)
140 ; Lower 64-bits of %a are passed in register 4
141 ; Upper 64-bits of %a are passed in register 3
142 ; Increment lower 64-bits using addic (immediate value of 1)
143 ; Increment upper 64-bits using add zero extended
144 ; Results are placed in registers 3 and 4
145 define i128 @i128_increment_by_one(i128 %a) nounwind {
146 %tmp = add i128 %a, 1
148 ; CHECK-LE-LABEL: @i128_increment_by_one
149 ; CHECK-LE: addic 3, 3, 1
150 ; CHECK-LE-NEXT: addze 4, 4
153 ; CHECK-BE-LABEL: @i128_increment_by_one
154 ; CHECK-BE: addic 4, 4, 1
155 ; CHECK-BE-NEXT: addze 3, 3
158 ; CHECK-LE-NOVSX-LABEL: @i128_increment_by_one
159 ; CHECK-LE-NOVSX: addic 3, 3, 1
160 ; CHECK-LE-NOVSX-NEXT: addze 4, 4
161 ; CHECK-LE-NOVSX: blr
163 ; CHECK-BE-NOVSX-LABEL: @i128_increment_by_one
164 ; CHECK-BE-NOVSX: addic 4, 4, 1
165 ; CHECK-BE-NOVSX-NEXT: addze 3, 3
166 ; CHECK-BE-NOVSX: blr
169 ; Little Endian (VSX and VMX):
170 ; Lower 64-bits of %a are passed in register 3
171 ; Upper 64-bits of %a are passed in register 4
172 ; Lower 64-bits of %b are passed in register 5
173 ; Upper 64-bits of %b are passed in register 6
174 ; Add the lower 64-bits using addc on registers 3 and 5
175 ; Add the upper 64-bits using adde on registers 4 and 6
176 ; Registers 3 and 4 should hold the result
177 ; Big Endian (VSX and VMX):
178 ; Upper 64-bits of %a are passed in register 3
179 ; Lower 64-bits of %a are passed in register 4
180 ; Upper 64-bits of %b are passed in register 5
181 ; Lower 64-bits of %b are passed in register 6
182 ; Add the lower 64-bits using addc on registers 4 and 6
183 ; Add the upper 64-bits using adde on registers 3 and 5
184 ; Registers 3 and 4 should hold the result
185 define i128 @i128_increment_by_val(i128 %a, i128 %b) nounwind {
186 %tmp = add i128 %a, %b
188 ; CHECK-LE-LABEL: @i128_increment_by_val
189 ; CHECK-LE: addc 3, 3, 5
190 ; CHECK-LE-NEXT: adde 4, 4, 6
193 ; CHECK-BE-LABEL: @i128_increment_by_val
194 ; CHECK-BE: addc 4, 4, 6
195 ; CHECK-BE-NEXT: adde 3, 3, 5
198 ; CHECK-LE-NOVSX-LABEL: @i128_increment_by_val
199 ; CHECK-LE-NOVSX: addc 3, 3, 5
200 ; CHECK-LE-NOVSX-NEXT: adde 4, 4, 6
201 ; CHECK-LE-NOVSX: blr
203 ; CHECK-BE-NOVSX-LABEL: @i128_increment_by_val
204 ; CHECK-BE-NOVSX: addc 4, 4, 6
205 ; CHECK-BE-NOVSX-NEXT: adde 3, 3, 5
206 ; CHECK-BE-NOVSX: blr
210 ; Callsites for the routines defined above.
211 ; Ensure the parameters are loaded in the same order that is expected by the
212 ; callee. See comments for individual functions above for details on registers
213 ; used for parameters.
214 define <1 x i128> @call_v1i128_increment_by_one() nounwind {
215 %tmp = load <1 x i128>, ptr @x, align 16
216 %ret = call <1 x i128> @v1i128_increment_by_one(<1 x i128> %tmp)
219 ; CHECK-LE-LABEL: @call_v1i128_increment_by_one
220 ; CHECK-LE: lxvd2x [[VAL:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
221 ; CHECK-LE: xxswapd 34, [[VAL]]
222 ; CHECK-LE: bl v1i128_increment_by_one
225 ; CHECK-P9-LABEL: @call_v1i128_increment_by_one
227 ; CHECK-P9: bl {{.?}}v1i128_increment_by_one
230 ; CHECK-BE-LABEL: @call_v1i128_increment_by_one
231 ; CHECK-BE: lxvw4x 34, {{[0-9]+}}, {{[0-9]+}}
232 ; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
233 ; CHECK-BE: bl {{.?}}v1i128_increment_by_one
236 ; CHECK-NOVSX-LABEL: @call_v1i128_increment_by_one
237 ; CHECK-NOVSX: lvx 2, {{[0-9]+}}, {{[0-9]+}}
238 ; CHECK-NOVSX-NOT: xxswapd {{[0-9]+}}, {{[0-9]+}}
239 ; CHECK-NOVSX: bl {{.?}}v1i128_increment_by_one
243 define <1 x i128> @call_v1i128_increment_by_val() nounwind {
244 %tmp = load <1 x i128>, ptr @x, align 16
245 %tmp2 = load <1 x i128>, ptr @y, align 16
246 %ret = call <1 x i128> @v1i128_increment_by_val(<1 x i128> %tmp, <1 x i128> %tmp2)
249 ; CHECK-LE-LABEL: @call_v1i128_increment_by_val
250 ; CHECK-LE: lxvd2x [[VAL1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
251 ; CHECK-LE-DAG: lxvd2x [[VAL2:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
252 ; CHECK-LE-DAG: xxswapd 34, [[VAL1]]
253 ; CHECK-LE: xxswapd 35, [[VAL2]]
254 ; CHECK-LE: bl v1i128_increment_by_val
257 ; CHECK-P9-LABEL: @call_v1i128_increment_by_val
258 ; CHECK-P9-DAG: lxv v2
259 ; CHECK-P9-DAG: lxv v3
260 ; CHECK-P9: bl {{.?}}v1i128_increment_by_val
263 ; CHECK-BE-LABEL: @call_v1i128_increment_by_val
266 ; CHECK-BE-DAG: lxvw4x 35, {{[0-9]+}}, {{[0-9]+}}
267 ; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}}
268 ; CHECK-BE-NOT: xxswapd 35, {{[0-9]+}}
269 ; CHECK-BE: bl {{.?}}v1i128_increment_by_val
272 ; CHECK-NOVSX-LABEL: @call_v1i128_increment_by_val
273 ; CHECK-NOVSX-DAG: lvx 2, {{[0-9]+}}, {{[0-9]+}}
274 ; CHECK-NOVSX-DAG: lvx 3, {{[0-9]+}}, {{[0-9]+}}
275 ; CHECK-NOVSX-NOT: xxswapd 34, {{[0-9]+}}
276 ; CHECK-NOVSX-NOT: xxswapd 35, {{[0-9]+}}
277 ; CHECK-NOVSX: bl {{.?}}v1i128_increment_by_val
282 define i128 @call_i128_increment_by_one() nounwind {
283 %tmp = load i128, ptr @a, align 16
284 %ret = call i128 @i128_increment_by_one(i128 %tmp)
286 ; %ret4 = call i128 @i128_increment_by_val(i128 %tmp2, i128 %tmp2)
287 ; CHECK-LE-LABEL: @call_i128_increment_by_one
288 ; CHECK-LE-DAG: ld 3, 0([[BASEREG:[0-9]+]])
289 ; CHECK-LE-DAG: ld 4, 8([[BASEREG]])
290 ; CHECK-LE: bl i128_increment_by_one
293 ; CHECK-BE-LABEL: @call_i128_increment_by_one
294 ; CHECK-BE-DAG: ld 3, 0([[BASEREG:[0-9]+]])
295 ; CHECK-BE-DAG: ld 4, 8([[BASEREG]])
296 ; CHECK-BE: bl {{.?}}i128_increment_by_one
299 ; CHECK-NOVSX-LABEL: @call_i128_increment_by_one
300 ; CHECK-NOVSX-DAG: ld 3, 0([[BASEREG:[0-9]+]])
301 ; CHECK-NOVSX-DAG: ld 4, 8([[BASEREG]])
302 ; CHECK-NOVSX: bl {{.?}}i128_increment_by_one
306 define i128 @call_i128_increment_by_val() nounwind {
307 %tmp = load i128, ptr @a, align 16
308 %tmp2 = load i128, ptr @b, align 16
309 %ret = call i128 @i128_increment_by_val(i128 %tmp, i128 %tmp2)
311 ; CHECK-LE-LABEL: @call_i128_increment_by_val
312 ; CHECK-LE-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
313 ; CHECK-LE-DAG: ld 4, 8([[P1BASEREG]])
314 ; CHECK-LE-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
315 ; CHECK-LE-DAG: ld 6, 8([[P2BASEREG]])
316 ; CHECK-LE: bl i128_increment_by_val
319 ; CHECK-BE-LABEL: @call_i128_increment_by_val
320 ; CHECK-BE-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
321 ; CHECK-BE-DAG: ld 4, 8([[P1BASEREG]])
322 ; CHECK-BE-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
323 ; CHECK-BE-DAG: ld 6, 8([[P2BASEREG]])
324 ; CHECK-BE: bl {{.?}}i128_increment_by_val
327 ; CHECK-NOVSX-LABEL: @call_i128_increment_by_val
328 ; CHECK-NOVSX-DAG: ld 3, 0([[P1BASEREG:[0-9]+]])
329 ; CHECK-NOVSX-DAG: ld 4, 8([[P1BASEREG]])
330 ; CHECK-NOVSX-DAG: ld 5, 0([[P2BASEREG:[0-9]+]])
331 ; CHECK-NOVSX-DAG: ld 6, 8([[P2BASEREG]])
332 ; CHECK-NOVSX: bl {{.?}}i128_increment_by_val
336 define i128 @callee_i128_split(i32 %i, i128 %i1280, i32 %i4, i32 %i5,
337 i32 %i6, i32 %i7, i128 %i1281, i32 %i8, i128 %i1282){
339 %tmp = add i128 %i1280, %i1281
340 %tmp1 = add i128 %tmp, %i1282
344 ; CHECK-LE-LABEL: @callee_i128_split
345 ; CHECK-LE-DAG: ld [[TMPREG:[0-9]+]], [[OFFSET:[0-9]+]](1)
346 ; CHECK-LE-DAG: addc [[TMPREG2:[0-9]+]], 4, 10
347 ; CHECK-LE-DAG: adde [[TMPREG3:[0-9]+]], 5, [[TMPREG]]
349 ; CHECK-LE-DAG: ld [[TMPREG4:[0-9]+]], [[OFFSET2:[0-9]+]](1)
350 ; CHECK-LE-DAG: ld [[TMPREG5:[0-9]+]], [[OFFSET3:[0-9]+]](1)
351 ; CHECK-LE-DAG: addc 3, [[TMPREG2]], [[TMPREG4]]
352 ; CHECK-LE-DAG: adde 4, [[TMPREG3]], [[TMPREG5]]
354 ; CHECK-BE-LABEL: @callee_i128_split
355 ; CHECK-BE-DAG: ld [[TMPREG:[0-9]+]], [[OFFSET:[0-9]+]](1)
356 ; CHECK-BE-DAG: addc [[TMPREG3:[0-9]+]], 5, [[TMPREG]]
357 ; CHECK-BE-DAG: adde [[TMPREG2:[0-9]+]], 4, 10
359 ; CHECK-BE-DAG: ld [[TMPREG4:[0-9]+]], [[OFFSET2:[0-9]+]](1)
360 ; CHECK-BE-DAG: ld [[TMPREG5:[0-9]+]], [[OFFSET3:[0-9]+]](1)
361 ; CHECK-BE-DAG: addc 4, [[TMPREG3]], [[TMPREG4]]
362 ; CHECK-BE-DAG: adde 3, [[TMPREG2]], [[TMPREG5]]
364 define i128 @i128_split() {
366 %0 = load i128, ptr @a, align 16
367 %1 = load i128, ptr @b, align 16
368 %call = tail call i128 @callee_i128_split(i32 1, i128 %0, i32 4, i32 5,
369 i32 6, i32 7, i128 %1, i32 8, i128 9)
373 ; CHECK-LE-LABEL: @i128_split
374 ; CHECK-LE-DAG: li 3, 1
375 ; CHECK-LE-DAG: ld 4, 0([[P2BASEREG:[0-9]+]])
376 ; CHECK-LE-DAG: ld 5, 8([[P2BASEREG]])
377 ; CHECK-LE-DAG: li 6, 4
378 ; CHECK-LE-DAG: li 7, 5
379 ; CHECK-LE-DAG: li 8, 6
380 ; CHECK-LE-DAG: li 9, 7
381 ; CHECK-LE-DAG: ld 10, 0([[P7BASEREG:[0-9]+]])
382 ; CHECK-LE-DAG: ld [[TMPREG:[0-9]+]], 8([[P7BASEREG]])
383 ; CHECK-LE-DAG: std [[TMPREG]], [[OFFSET:[0-9]+]](1)
384 ; CHECK-LE: bl callee_i128_split
387 ; CHECK-BE-LABEL: @i128_split
388 ; CHECK-BE-DAG: li 3, 1
389 ; CHECK-BE-DAG: ld 4, 0([[P2BASEREG:[0-9]+]])
390 ; CHECK-BE-DAG: ld 5, 8([[P2BASEREG]])
391 ; CHECK-BE-DAG: li 6, 4
392 ; CHECK-BE-DAG: li 7, 5
393 ; CHECK-BE-DAG: li 8, 6
394 ; CHECK-BE-DAG: li 9, 7
395 ; CHECK-BE-DAG: ld 10, 0([[P7BASEREG:[0-9]+]])
396 ; CHECK-BE-DAG: ld [[TMPREG:[0-9]+]], 8([[P7BASEREG]])
397 ; CHECK-BE-DAG: std [[TMPREG]], [[OFFSET:[0-9]+]](1)
398 ; CHECK-BE: bl {{.?}}callee_i128_split
400 ; CHECK-NOVSX-LABEL: @i128_split
401 ; CHECK-NOVSX-DAG: li 3, 1
402 ; CHECK-NOVSX-DAG: ld 4, 0([[P2BASEREG:[0-9]+]])
403 ; CHECK-NOVSX-DAG: ld 5, 8([[P2BASEREG]])
404 ; CHECK-NOVSX-DAG: li 6, 4
405 ; CHECK-NOVSX-DAG: li 7, 5
406 ; CHECK-NOVSX-DAG: li 8, 6
407 ; CHECK-NOVSX-DAG: li 9, 7
408 ; CHECK-NOVSX-DAG: ld 10, 0([[P7BASEREG:[0-9]+]])
409 ; CHECK-NOVSX-DAG: ld [[TMPREG:[0-9]+]], 8([[P7BASEREG]])
410 ; CHECK-NOVSX-DAG: std [[TMPREG]], [[OFFSET:[0-9]+]](1)
411 ; CHECK-NOVSX: bl {{.?}}callee_i128_split