1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32
3 ; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64
4 ; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64
6 declare i8 @llvm.fshl.i8(i8, i8, i8)
7 declare i16 @llvm.fshl.i16(i16, i16, i16)
8 declare i32 @llvm.fshl.i32(i32, i32, i32)
9 declare i64 @llvm.fshl.i64(i64, i64, i64)
10 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
12 declare i8 @llvm.fshr.i8(i8, i8, i8)
13 declare i16 @llvm.fshr.i16(i16, i16, i16)
14 declare i32 @llvm.fshr.i32(i32, i32, i32)
15 declare i64 @llvm.fshr.i64(i64, i64, i64)
16 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
18 ; When first 2 operands match, it's a rotate.
20 define i8 @rotl_i8_const_shift(i8 %x) {
21 ; CHECK-LABEL: rotl_i8_const_shift:
23 ; CHECK-NEXT: rotlwi 4, 3, 27
24 ; CHECK-NEXT: rlwimi 4, 3, 3, 0, 28
27 %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
31 define i64 @rotl_i64_const_shift(i64 %x) {
32 ; CHECK32-LABEL: rotl_i64_const_shift:
34 ; CHECK32-NEXT: rotlwi 5, 4, 3
35 ; CHECK32-NEXT: rotlwi 6, 3, 3
36 ; CHECK32-NEXT: rlwimi 5, 3, 3, 0, 28
37 ; CHECK32-NEXT: rlwimi 6, 4, 3, 0, 28
38 ; CHECK32-NEXT: mr 3, 5
39 ; CHECK32-NEXT: mr 4, 6
42 ; CHECK64-LABEL: rotl_i64_const_shift:
44 ; CHECK64-NEXT: rotldi 3, 3, 3
46 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
50 ; When first 2 operands match, it's a rotate (by variable amount).
52 define i16 @rotl_i16(i16 %x, i16 %z) {
53 ; CHECK32-LABEL: rotl_i16:
55 ; CHECK32-NEXT: clrlwi 6, 4, 28
56 ; CHECK32-NEXT: neg 4, 4
57 ; CHECK32-NEXT: clrlwi 5, 3, 16
58 ; CHECK32-NEXT: clrlwi 4, 4, 28
59 ; CHECK32-NEXT: slw 3, 3, 6
60 ; CHECK32-NEXT: srw 4, 5, 4
61 ; CHECK32-NEXT: or 3, 3, 4
64 ; CHECK64-LABEL: rotl_i16:
66 ; CHECK64-NEXT: neg 5, 4
67 ; CHECK64-NEXT: clrlwi 6, 3, 16
68 ; CHECK64-NEXT: clrlwi 4, 4, 28
69 ; CHECK64-NEXT: clrlwi 5, 5, 28
70 ; CHECK64-NEXT: slw 3, 3, 4
71 ; CHECK64-NEXT: srw 4, 6, 5
72 ; CHECK64-NEXT: or 3, 3, 4
74 %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
78 define i32 @rotl_i32(i32 %x, i32 %z) {
79 ; CHECK-LABEL: rotl_i32:
81 ; CHECK-NEXT: rotlw 3, 3, 4
83 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
87 define i64 @rotl_i64(i64 %x, i64 %z) {
88 ; CHECK32_32-LABEL: rotl_i64:
89 ; CHECK32_32: # %bb.0:
90 ; CHECK32_32-NEXT: clrlwi 5, 6, 26
91 ; CHECK32_32-NEXT: subfic 8, 5, 32
92 ; CHECK32_32-NEXT: neg 6, 6
93 ; CHECK32_32-NEXT: slw 7, 3, 5
94 ; CHECK32_32-NEXT: addi 9, 5, -32
95 ; CHECK32_32-NEXT: srw 8, 4, 8
96 ; CHECK32_32-NEXT: clrlwi 6, 6, 26
97 ; CHECK32_32-NEXT: slw 9, 4, 9
98 ; CHECK32_32-NEXT: or 7, 7, 8
99 ; CHECK32_32-NEXT: subfic 8, 6, 32
100 ; CHECK32_32-NEXT: or 7, 7, 9
101 ; CHECK32_32-NEXT: addi 9, 6, -32
102 ; CHECK32_32-NEXT: slw 8, 3, 8
103 ; CHECK32_32-NEXT: srw 9, 3, 9
104 ; CHECK32_32-NEXT: srw 3, 3, 6
105 ; CHECK32_32-NEXT: srw 6, 4, 6
106 ; CHECK32_32-NEXT: or 6, 6, 8
107 ; CHECK32_32-NEXT: or 6, 6, 9
108 ; CHECK32_32-NEXT: slw 4, 4, 5
109 ; CHECK32_32-NEXT: or 3, 7, 3
110 ; CHECK32_32-NEXT: or 4, 4, 6
111 ; CHECK32_32-NEXT: blr
113 ; CHECK32_64-LABEL: rotl_i64:
114 ; CHECK32_64: # %bb.0:
115 ; CHECK32_64-NEXT: clrlwi 5, 6, 26
116 ; CHECK32_64-NEXT: neg 6, 6
117 ; CHECK32_64-NEXT: subfic 8, 5, 32
118 ; CHECK32_64-NEXT: slw 7, 3, 5
119 ; CHECK32_64-NEXT: clrlwi 6, 6, 26
120 ; CHECK32_64-NEXT: srw 8, 4, 8
121 ; CHECK32_64-NEXT: addi 9, 5, -32
122 ; CHECK32_64-NEXT: or 7, 7, 8
123 ; CHECK32_64-NEXT: subfic 8, 6, 32
124 ; CHECK32_64-NEXT: slw 5, 4, 5
125 ; CHECK32_64-NEXT: slw 9, 4, 9
126 ; CHECK32_64-NEXT: srw 10, 3, 6
127 ; CHECK32_64-NEXT: srw 4, 4, 6
128 ; CHECK32_64-NEXT: addi 6, 6, -32
129 ; CHECK32_64-NEXT: slw 8, 3, 8
130 ; CHECK32_64-NEXT: srw 3, 3, 6
131 ; CHECK32_64-NEXT: or 4, 4, 8
132 ; CHECK32_64-NEXT: or 6, 7, 9
133 ; CHECK32_64-NEXT: or 4, 4, 3
134 ; CHECK32_64-NEXT: or 3, 6, 10
135 ; CHECK32_64-NEXT: or 4, 5, 4
136 ; CHECK32_64-NEXT: blr
138 ; CHECK64-LABEL: rotl_i64:
140 ; CHECK64-NEXT: rotld 3, 3, 4
142 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z)
148 define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
149 ; CHECK32_32-LABEL: rotl_v4i32:
150 ; CHECK32_32: # %bb.0:
151 ; CHECK32_32-NEXT: rotlw 3, 3, 7
152 ; CHECK32_32-NEXT: rotlw 4, 4, 8
153 ; CHECK32_32-NEXT: rotlw 5, 5, 9
154 ; CHECK32_32-NEXT: rotlw 6, 6, 10
155 ; CHECK32_32-NEXT: blr
157 ; CHECK32_64-LABEL: rotl_v4i32:
158 ; CHECK32_64: # %bb.0:
159 ; CHECK32_64-NEXT: vrlw 2, 2, 3
160 ; CHECK32_64-NEXT: blr
162 ; CHECK64-LABEL: rotl_v4i32:
164 ; CHECK64-NEXT: vrlw 2, 2, 3
166 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
170 ; Vector rotate by constant splat amount.
172 define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) {
173 ; CHECK32_32-LABEL: rotl_v4i32_const_shift:
174 ; CHECK32_32: # %bb.0:
175 ; CHECK32_32-NEXT: rotlwi 3, 3, 3
176 ; CHECK32_32-NEXT: rotlwi 4, 4, 3
177 ; CHECK32_32-NEXT: rotlwi 5, 5, 3
178 ; CHECK32_32-NEXT: rotlwi 6, 6, 3
179 ; CHECK32_32-NEXT: blr
181 ; CHECK32_64-LABEL: rotl_v4i32_const_shift:
182 ; CHECK32_64: # %bb.0:
183 ; CHECK32_64-NEXT: vspltisw 3, 3
184 ; CHECK32_64-NEXT: vrlw 2, 2, 3
185 ; CHECK32_64-NEXT: blr
187 ; CHECK64-LABEL: rotl_v4i32_const_shift:
189 ; CHECK64-NEXT: vspltisw 3, 3
190 ; CHECK64-NEXT: vrlw 2, 2, 3
192 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
196 ; Repeat everything for funnel shift right.
198 define i8 @rotr_i8_const_shift(i8 %x) {
199 ; CHECK-LABEL: rotr_i8_const_shift:
201 ; CHECK-NEXT: rotlwi 4, 3, 29
202 ; CHECK-NEXT: rlwimi 4, 3, 5, 0, 26
203 ; CHECK-NEXT: mr 3, 4
205 %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
209 define i32 @rotr_i32_const_shift(i32 %x) {
210 ; CHECK-LABEL: rotr_i32_const_shift:
212 ; CHECK-NEXT: rotlwi 3, 3, 29
214 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
218 ; When first 2 operands match, it's a rotate (by variable amount).
220 define i16 @rotr_i16(i16 %x, i16 %z) {
221 ; CHECK32-LABEL: rotr_i16:
223 ; CHECK32-NEXT: clrlwi 6, 4, 28
224 ; CHECK32-NEXT: neg 4, 4
225 ; CHECK32-NEXT: clrlwi 5, 3, 16
226 ; CHECK32-NEXT: clrlwi 4, 4, 28
227 ; CHECK32-NEXT: srw 5, 5, 6
228 ; CHECK32-NEXT: slw 3, 3, 4
229 ; CHECK32-NEXT: or 3, 5, 3
232 ; CHECK64-LABEL: rotr_i16:
234 ; CHECK64-NEXT: neg 5, 4
235 ; CHECK64-NEXT: clrlwi 6, 3, 16
236 ; CHECK64-NEXT: clrlwi 4, 4, 28
237 ; CHECK64-NEXT: clrlwi 5, 5, 28
238 ; CHECK64-NEXT: srw 4, 6, 4
239 ; CHECK64-NEXT: slw 3, 3, 5
240 ; CHECK64-NEXT: or 3, 4, 3
242 %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
246 define i32 @rotr_i32(i32 %x, i32 %z) {
247 ; CHECK-LABEL: rotr_i32:
249 ; CHECK-NEXT: neg 4, 4
250 ; CHECK-NEXT: rotlw 3, 3, 4
252 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z)
256 define i64 @rotr_i64(i64 %x, i64 %z) {
257 ; CHECK32_32-LABEL: rotr_i64:
258 ; CHECK32_32: # %bb.0:
259 ; CHECK32_32-NEXT: clrlwi 5, 6, 26
260 ; CHECK32_32-NEXT: subfic 8, 5, 32
261 ; CHECK32_32-NEXT: neg 6, 6
262 ; CHECK32_32-NEXT: srw 7, 4, 5
263 ; CHECK32_32-NEXT: addi 9, 5, -32
264 ; CHECK32_32-NEXT: slw 8, 3, 8
265 ; CHECK32_32-NEXT: clrlwi 6, 6, 26
266 ; CHECK32_32-NEXT: srw 9, 3, 9
267 ; CHECK32_32-NEXT: or 7, 7, 8
268 ; CHECK32_32-NEXT: subfic 8, 6, 32
269 ; CHECK32_32-NEXT: or 7, 7, 9
270 ; CHECK32_32-NEXT: addi 9, 6, -32
271 ; CHECK32_32-NEXT: srw 8, 4, 8
272 ; CHECK32_32-NEXT: slw 9, 4, 9
273 ; CHECK32_32-NEXT: slw 4, 4, 6
274 ; CHECK32_32-NEXT: slw 6, 3, 6
275 ; CHECK32_32-NEXT: or 6, 6, 8
276 ; CHECK32_32-NEXT: or 6, 6, 9
277 ; CHECK32_32-NEXT: srw 3, 3, 5
278 ; CHECK32_32-NEXT: or 4, 7, 4
279 ; CHECK32_32-NEXT: or 3, 3, 6
280 ; CHECK32_32-NEXT: blr
282 ; CHECK32_64-LABEL: rotr_i64:
283 ; CHECK32_64: # %bb.0:
284 ; CHECK32_64-NEXT: clrlwi 5, 6, 26
285 ; CHECK32_64-NEXT: neg 6, 6
286 ; CHECK32_64-NEXT: subfic 8, 5, 32
287 ; CHECK32_64-NEXT: srw 7, 4, 5
288 ; CHECK32_64-NEXT: clrlwi 6, 6, 26
289 ; CHECK32_64-NEXT: slw 8, 3, 8
290 ; CHECK32_64-NEXT: addi 9, 5, -32
291 ; CHECK32_64-NEXT: or 7, 7, 8
292 ; CHECK32_64-NEXT: subfic 8, 6, 32
293 ; CHECK32_64-NEXT: srw 5, 3, 5
294 ; CHECK32_64-NEXT: srw 9, 3, 9
295 ; CHECK32_64-NEXT: slw 10, 4, 6
296 ; CHECK32_64-NEXT: slw 3, 3, 6
297 ; CHECK32_64-NEXT: addi 6, 6, -32
298 ; CHECK32_64-NEXT: srw 8, 4, 8
299 ; CHECK32_64-NEXT: slw 4, 4, 6
300 ; CHECK32_64-NEXT: or 3, 3, 8
301 ; CHECK32_64-NEXT: or 6, 7, 9
302 ; CHECK32_64-NEXT: or 3, 3, 4
303 ; CHECK32_64-NEXT: or 4, 6, 10
304 ; CHECK32_64-NEXT: or 3, 5, 3
305 ; CHECK32_64-NEXT: blr
307 ; CHECK64-LABEL: rotr_i64:
309 ; CHECK64-NEXT: neg 4, 4
310 ; CHECK64-NEXT: rotld 3, 3, 4
312 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
318 define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
319 ; CHECK32_32-LABEL: rotr_v4i32:
320 ; CHECK32_32: # %bb.0:
321 ; CHECK32_32-NEXT: neg 7, 7
322 ; CHECK32_32-NEXT: neg 8, 8
323 ; CHECK32_32-NEXT: neg 9, 9
324 ; CHECK32_32-NEXT: neg 10, 10
325 ; CHECK32_32-NEXT: rotlw 3, 3, 7
326 ; CHECK32_32-NEXT: rotlw 4, 4, 8
327 ; CHECK32_32-NEXT: rotlw 5, 5, 9
328 ; CHECK32_32-NEXT: rotlw 6, 6, 10
329 ; CHECK32_32-NEXT: blr
331 ; CHECK32_64-LABEL: rotr_v4i32:
332 ; CHECK32_64: # %bb.0:
333 ; CHECK32_64-NEXT: vxor 4, 4, 4
334 ; CHECK32_64-NEXT: vsubuwm 3, 4, 3
335 ; CHECK32_64-NEXT: vrlw 2, 2, 3
336 ; CHECK32_64-NEXT: blr
338 ; CHECK64-LABEL: rotr_v4i32:
340 ; CHECK64-NEXT: xxlxor 36, 36, 36
341 ; CHECK64-NEXT: vsubuwm 3, 4, 3
342 ; CHECK64-NEXT: vrlw 2, 2, 3
344 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
348 ; Vector rotate by constant splat amount.
350 define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) {
351 ; CHECK32_32-LABEL: rotr_v4i32_const_shift:
352 ; CHECK32_32: # %bb.0:
353 ; CHECK32_32-NEXT: rotlwi 3, 3, 29
354 ; CHECK32_32-NEXT: rotlwi 4, 4, 29
355 ; CHECK32_32-NEXT: rotlwi 5, 5, 29
356 ; CHECK32_32-NEXT: rotlwi 6, 6, 29
357 ; CHECK32_32-NEXT: blr
359 ; CHECK32_64-LABEL: rotr_v4i32_const_shift:
360 ; CHECK32_64: # %bb.0:
361 ; CHECK32_64-NEXT: vspltisw 3, -16
362 ; CHECK32_64-NEXT: vspltisw 4, 13
363 ; CHECK32_64-NEXT: vsubuwm 3, 4, 3
364 ; CHECK32_64-NEXT: vrlw 2, 2, 3
365 ; CHECK32_64-NEXT: blr
367 ; CHECK64-LABEL: rotr_v4i32_const_shift:
369 ; CHECK64-NEXT: vspltisw 3, -16
370 ; CHECK64-NEXT: vspltisw 4, 13
371 ; CHECK64-NEXT: vsubuwm 3, 4, 3
372 ; CHECK64-NEXT: vrlw 2, 2, 3
374 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
378 define i32 @rotl_i32_shift_by_bitwidth(i32 %x) {
379 ; CHECK-LABEL: rotl_i32_shift_by_bitwidth:
382 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32)
386 define i32 @rotr_i32_shift_by_bitwidth(i32 %x) {
387 ; CHECK-LABEL: rotr_i32_shift_by_bitwidth:
390 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32)
394 define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) {
395 ; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth:
398 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
402 define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) {
403 ; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth:
406 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)