1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32
3 ; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64
4 ; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64
6 declare i8 @llvm.fshl.i8(i8, i8, i8)
7 declare i16 @llvm.fshl.i16(i16, i16, i16)
8 declare i32 @llvm.fshl.i32(i32, i32, i32)
9 declare i64 @llvm.fshl.i64(i64, i64, i64)
10 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
12 declare i8 @llvm.fshr.i8(i8, i8, i8)
13 declare i16 @llvm.fshr.i16(i16, i16, i16)
14 declare i32 @llvm.fshr.i32(i32, i32, i32)
15 declare i64 @llvm.fshr.i64(i64, i64, i64)
16 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
18 ; General case - all operands can be variables.
20 define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
21 ; CHECK32-LABEL: fshl_i32:
23 ; CHECK32-NEXT: clrlwi 5, 5, 27
24 ; CHECK32-NEXT: slw 3, 3, 5
25 ; CHECK32-NEXT: subfic 5, 5, 32
26 ; CHECK32-NEXT: srw 4, 4, 5
27 ; CHECK32-NEXT: or 3, 3, 4
30 ; CHECK64-LABEL: fshl_i32:
32 ; CHECK64-NEXT: clrlwi 5, 5, 27
33 ; CHECK64-NEXT: subfic 6, 5, 32
34 ; CHECK64-NEXT: slw 3, 3, 5
35 ; CHECK64-NEXT: srw 4, 4, 6
36 ; CHECK64-NEXT: or 3, 3, 4
38 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
42 define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
43 ; CHECK32_32-LABEL: fshl_i64:
44 ; CHECK32_32: # %bb.0:
45 ; CHECK32_32-NEXT: clrlwi 7, 8, 26
46 ; CHECK32_32-NEXT: not 8, 8
47 ; CHECK32_32-NEXT: rotlwi 6, 6, 31
48 ; CHECK32_32-NEXT: subfic 10, 7, 32
49 ; CHECK32_32-NEXT: srwi 9, 5, 1
50 ; CHECK32_32-NEXT: slw 3, 3, 7
51 ; CHECK32_32-NEXT: clrlwi 8, 8, 26
52 ; CHECK32_32-NEXT: rlwimi 6, 5, 31, 0, 0
53 ; CHECK32_32-NEXT: srw 5, 4, 10
54 ; CHECK32_32-NEXT: srw 10, 9, 8
55 ; CHECK32_32-NEXT: srw 6, 6, 8
56 ; CHECK32_32-NEXT: or 3, 3, 5
57 ; CHECK32_32-NEXT: subfic 5, 8, 32
58 ; CHECK32_32-NEXT: addi 8, 8, -32
59 ; CHECK32_32-NEXT: slw 5, 9, 5
60 ; CHECK32_32-NEXT: srw 8, 9, 8
61 ; CHECK32_32-NEXT: addi 9, 7, -32
62 ; CHECK32_32-NEXT: slw 9, 4, 9
63 ; CHECK32_32-NEXT: or 5, 6, 5
64 ; CHECK32_32-NEXT: or 3, 3, 9
65 ; CHECK32_32-NEXT: or 5, 5, 8
66 ; CHECK32_32-NEXT: slw 4, 4, 7
67 ; CHECK32_32-NEXT: or 3, 3, 10
68 ; CHECK32_32-NEXT: or 4, 4, 5
69 ; CHECK32_32-NEXT: blr
71 ; CHECK32_64-LABEL: fshl_i64:
72 ; CHECK32_64: # %bb.0:
73 ; CHECK32_64-NEXT: clrlwi 7, 8, 26
74 ; CHECK32_64-NEXT: not 8, 8
75 ; CHECK32_64-NEXT: subfic 9, 7, 32
76 ; CHECK32_64-NEXT: rotlwi 6, 6, 31
77 ; CHECK32_64-NEXT: slw 3, 3, 7
78 ; CHECK32_64-NEXT: clrlwi 8, 8, 26
79 ; CHECK32_64-NEXT: srw 9, 4, 9
80 ; CHECK32_64-NEXT: rlwimi 6, 5, 31, 0, 0
81 ; CHECK32_64-NEXT: srwi 5, 5, 1
82 ; CHECK32_64-NEXT: addi 10, 7, -32
83 ; CHECK32_64-NEXT: or 3, 3, 9
84 ; CHECK32_64-NEXT: subfic 9, 8, 32
85 ; CHECK32_64-NEXT: slw 7, 4, 7
86 ; CHECK32_64-NEXT: slw 4, 4, 10
87 ; CHECK32_64-NEXT: srw 10, 5, 8
88 ; CHECK32_64-NEXT: srw 6, 6, 8
89 ; CHECK32_64-NEXT: addi 8, 8, -32
90 ; CHECK32_64-NEXT: slw 9, 5, 9
91 ; CHECK32_64-NEXT: srw 5, 5, 8
92 ; CHECK32_64-NEXT: or 6, 6, 9
93 ; CHECK32_64-NEXT: or 3, 3, 4
94 ; CHECK32_64-NEXT: or 4, 6, 5
95 ; CHECK32_64-NEXT: or 3, 3, 10
96 ; CHECK32_64-NEXT: or 4, 7, 4
97 ; CHECK32_64-NEXT: blr
99 ; CHECK64-LABEL: fshl_i64:
101 ; CHECK64-NEXT: clrlwi 5, 5, 26
102 ; CHECK64-NEXT: subfic 6, 5, 64
103 ; CHECK64-NEXT: sld 3, 3, 5
104 ; CHECK64-NEXT: srd 4, 4, 6
105 ; CHECK64-NEXT: or 3, 3, 4
107 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
111 ; Verify that weird types are minimally supported.
112 declare i37 @llvm.fshl.i37(i37, i37, i37)
113 define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
114 ; CHECK32_32-LABEL: fshl_i37:
115 ; CHECK32_32: # %bb.0:
116 ; CHECK32_32-NEXT: mflr 0
117 ; CHECK32_32-NEXT: stw 0, 4(1)
118 ; CHECK32_32-NEXT: stwu 1, -32(1)
119 ; CHECK32_32-NEXT: .cfi_def_cfa_offset 32
120 ; CHECK32_32-NEXT: .cfi_offset lr, 4
121 ; CHECK32_32-NEXT: .cfi_offset r27, -20
122 ; CHECK32_32-NEXT: .cfi_offset r28, -16
123 ; CHECK32_32-NEXT: .cfi_offset r29, -12
124 ; CHECK32_32-NEXT: .cfi_offset r30, -8
125 ; CHECK32_32-NEXT: stw 27, 12(1) # 4-byte Folded Spill
126 ; CHECK32_32-NEXT: mr 27, 3
127 ; CHECK32_32-NEXT: stw 28, 16(1) # 4-byte Folded Spill
128 ; CHECK32_32-NEXT: mr 28, 4
129 ; CHECK32_32-NEXT: stw 29, 20(1) # 4-byte Folded Spill
130 ; CHECK32_32-NEXT: mr 29, 5
131 ; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill
132 ; CHECK32_32-NEXT: mr 30, 6
133 ; CHECK32_32-NEXT: mr 3, 7
134 ; CHECK32_32-NEXT: mr 4, 8
135 ; CHECK32_32-NEXT: li 5, 0
136 ; CHECK32_32-NEXT: li 6, 37
137 ; CHECK32_32-NEXT: bl __umoddi3
138 ; CHECK32_32-NEXT: clrlwi 6, 4, 26
139 ; CHECK32_32-NEXT: srwi 3, 30, 6
140 ; CHECK32_32-NEXT: not 4, 4
141 ; CHECK32_32-NEXT: subfic 8, 6, 32
142 ; CHECK32_32-NEXT: slwi 5, 30, 26
143 ; CHECK32_32-NEXT: rlwimi 3, 29, 26, 1, 5
144 ; CHECK32_32-NEXT: slw 7, 27, 6
145 ; CHECK32_32-NEXT: clrlwi 4, 4, 26
146 ; CHECK32_32-NEXT: srw 8, 28, 8
147 ; CHECK32_32-NEXT: srw 9, 3, 4
148 ; CHECK32_32-NEXT: srw 5, 5, 4
149 ; CHECK32_32-NEXT: or 7, 7, 8
150 ; CHECK32_32-NEXT: subfic 8, 4, 32
151 ; CHECK32_32-NEXT: addi 4, 4, -32
152 ; CHECK32_32-NEXT: slw 8, 3, 8
153 ; CHECK32_32-NEXT: srw 4, 3, 4
154 ; CHECK32_32-NEXT: addi 3, 6, -32
155 ; CHECK32_32-NEXT: slw 3, 28, 3
156 ; CHECK32_32-NEXT: or 5, 5, 8
157 ; CHECK32_32-NEXT: or 3, 7, 3
158 ; CHECK32_32-NEXT: or 4, 5, 4
159 ; CHECK32_32-NEXT: slw 5, 28, 6
160 ; CHECK32_32-NEXT: or 3, 3, 9
161 ; CHECK32_32-NEXT: or 4, 5, 4
162 ; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
163 ; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
164 ; CHECK32_32-NEXT: lwz 28, 16(1) # 4-byte Folded Reload
165 ; CHECK32_32-NEXT: lwz 27, 12(1) # 4-byte Folded Reload
166 ; CHECK32_32-NEXT: lwz 0, 36(1)
167 ; CHECK32_32-NEXT: addi 1, 1, 32
168 ; CHECK32_32-NEXT: mtlr 0
169 ; CHECK32_32-NEXT: blr
171 ; CHECK32_64-LABEL: fshl_i37:
172 ; CHECK32_64: # %bb.0:
173 ; CHECK32_64-NEXT: mflr 0
174 ; CHECK32_64-NEXT: stw 0, 4(1)
175 ; CHECK32_64-NEXT: stwu 1, -32(1)
176 ; CHECK32_64-NEXT: .cfi_def_cfa_offset 32
177 ; CHECK32_64-NEXT: .cfi_offset lr, 4
178 ; CHECK32_64-NEXT: .cfi_offset r27, -20
179 ; CHECK32_64-NEXT: .cfi_offset r28, -16
180 ; CHECK32_64-NEXT: .cfi_offset r29, -12
181 ; CHECK32_64-NEXT: .cfi_offset r30, -8
182 ; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill
183 ; CHECK32_64-NEXT: mr 27, 3
184 ; CHECK32_64-NEXT: mr 3, 7
185 ; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill
186 ; CHECK32_64-NEXT: mr 28, 4
187 ; CHECK32_64-NEXT: mr 4, 8
188 ; CHECK32_64-NEXT: stw 29, 20(1) # 4-byte Folded Spill
189 ; CHECK32_64-NEXT: mr 29, 5
190 ; CHECK32_64-NEXT: li 5, 0
191 ; CHECK32_64-NEXT: stw 30, 24(1) # 4-byte Folded Spill
192 ; CHECK32_64-NEXT: mr 30, 6
193 ; CHECK32_64-NEXT: li 6, 37
194 ; CHECK32_64-NEXT: bl __umoddi3
195 ; CHECK32_64-NEXT: clrlwi 6, 4, 26
196 ; CHECK32_64-NEXT: not 4, 4
197 ; CHECK32_64-NEXT: subfic 8, 6, 32
198 ; CHECK32_64-NEXT: srwi 3, 30, 6
199 ; CHECK32_64-NEXT: slw 7, 27, 6
200 ; CHECK32_64-NEXT: clrlwi 4, 4, 26
201 ; CHECK32_64-NEXT: lwz 27, 12(1) # 4-byte Folded Reload
202 ; CHECK32_64-NEXT: srw 8, 28, 8
203 ; CHECK32_64-NEXT: rlwimi 3, 29, 26, 1, 5
204 ; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
205 ; CHECK32_64-NEXT: slwi 5, 30, 26
206 ; CHECK32_64-NEXT: or 7, 7, 8
207 ; CHECK32_64-NEXT: subfic 8, 4, 32
208 ; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
209 ; CHECK32_64-NEXT: addi 9, 6, -32
210 ; CHECK32_64-NEXT: srw 10, 3, 4
211 ; CHECK32_64-NEXT: srw 5, 5, 4
212 ; CHECK32_64-NEXT: addi 4, 4, -32
213 ; CHECK32_64-NEXT: slw 8, 3, 8
214 ; CHECK32_64-NEXT: slw 9, 28, 9
215 ; CHECK32_64-NEXT: srw 3, 3, 4
216 ; CHECK32_64-NEXT: or 4, 5, 8
217 ; CHECK32_64-NEXT: slw 6, 28, 6
218 ; CHECK32_64-NEXT: or 5, 7, 9
219 ; CHECK32_64-NEXT: lwz 28, 16(1) # 4-byte Folded Reload
220 ; CHECK32_64-NEXT: or 4, 4, 3
221 ; CHECK32_64-NEXT: or 3, 5, 10
222 ; CHECK32_64-NEXT: lwz 0, 36(1)
223 ; CHECK32_64-NEXT: or 4, 6, 4
224 ; CHECK32_64-NEXT: addi 1, 1, 32
225 ; CHECK32_64-NEXT: mtlr 0
226 ; CHECK32_64-NEXT: blr
228 ; CHECK64-LABEL: fshl_i37:
230 ; CHECK64-NEXT: lis 6, 28339
231 ; CHECK64-NEXT: sldi 4, 4, 27
232 ; CHECK64-NEXT: ori 6, 6, 58451
233 ; CHECK64-NEXT: rldic 6, 6, 33, 0
234 ; CHECK64-NEXT: oris 6, 6, 3542
235 ; CHECK64-NEXT: ori 6, 6, 31883
236 ; CHECK64-NEXT: mulhdu 6, 5, 6
237 ; CHECK64-NEXT: rldicl 6, 6, 59, 5
238 ; CHECK64-NEXT: mulli 6, 6, 37
239 ; CHECK64-NEXT: sub 5, 5, 6
240 ; CHECK64-NEXT: clrlwi 5, 5, 26
241 ; CHECK64-NEXT: subfic 6, 5, 64
242 ; CHECK64-NEXT: sld 3, 3, 5
243 ; CHECK64-NEXT: srd 4, 4, 6
244 ; CHECK64-NEXT: or 3, 3, 4
246 %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
250 ; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
252 declare i7 @llvm.fshl.i7(i7, i7, i7)
253 define i7 @fshl_i7_const_fold() {
254 ; CHECK-LABEL: fshl_i7_const_fold:
256 ; CHECK-NEXT: li 3, 67
258 %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
262 ; With constant shift amount, this is rotate + insert (missing extended mnemonics).
264 define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
265 ; CHECK-LABEL: fshl_i32_const_shift:
267 ; CHECK-NEXT: rotlwi 4, 4, 9
268 ; CHECK-NEXT: rlwimi 4, 3, 9, 0, 22
269 ; CHECK-NEXT: mr 3, 4
271 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
275 ; Check modulo math on shift amount.
277 define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
278 ; CHECK-LABEL: fshl_i32_const_overshift:
280 ; CHECK-NEXT: rotlwi 4, 4, 9
281 ; CHECK-NEXT: rlwimi 4, 3, 9, 0, 22
282 ; CHECK-NEXT: mr 3, 4
284 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
288 ; 64-bit should also work.
290 define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
291 ; CHECK32-LABEL: fshl_i64_const_overshift:
293 ; CHECK32-NEXT: rotlwi 6, 6, 9
294 ; CHECK32-NEXT: rotlwi 3, 5, 9
295 ; CHECK32-NEXT: rlwimi 6, 5, 9, 0, 22
296 ; CHECK32-NEXT: rlwimi 3, 4, 9, 0, 22
297 ; CHECK32-NEXT: mr 4, 6
300 ; CHECK64-LABEL: fshl_i64_const_overshift:
302 ; CHECK64-NEXT: rotldi 4, 4, 41
303 ; CHECK64-NEXT: rldimi 4, 3, 41, 0
304 ; CHECK64-NEXT: mr 3, 4
306 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
310 ; This should work without any node-specific logic.
312 define i8 @fshl_i8_const_fold() {
313 ; CHECK-LABEL: fshl_i8_const_fold:
315 ; CHECK-NEXT: li 3, 128
317 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
321 ; Repeat everything for funnel shift right.
323 ; General case - all operands can be variables.
325 define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
326 ; CHECK32-LABEL: fshr_i32:
328 ; CHECK32-NEXT: clrlwi 5, 5, 27
329 ; CHECK32-NEXT: srw 4, 4, 5
330 ; CHECK32-NEXT: subfic 5, 5, 32
331 ; CHECK32-NEXT: slw 3, 3, 5
332 ; CHECK32-NEXT: or 3, 3, 4
335 ; CHECK64-LABEL: fshr_i32:
337 ; CHECK64-NEXT: clrlwi 5, 5, 27
338 ; CHECK64-NEXT: subfic 6, 5, 32
339 ; CHECK64-NEXT: srw 4, 4, 5
340 ; CHECK64-NEXT: slw 3, 3, 6
341 ; CHECK64-NEXT: or 3, 3, 4
343 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
347 define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) {
348 ; CHECK32_32-LABEL: fshr_i64:
349 ; CHECK32_32: # %bb.0:
350 ; CHECK32_32-NEXT: clrlwi 7, 8, 26
351 ; CHECK32_32-NEXT: slwi 9, 4, 1
352 ; CHECK32_32-NEXT: not 8, 8
353 ; CHECK32_32-NEXT: rotlwi 4, 4, 1
354 ; CHECK32_32-NEXT: subfic 10, 7, 32
355 ; CHECK32_32-NEXT: srw 6, 6, 7
356 ; CHECK32_32-NEXT: clrlwi 8, 8, 26
357 ; CHECK32_32-NEXT: rlwimi 4, 3, 1, 0, 30
358 ; CHECK32_32-NEXT: slw 3, 5, 10
359 ; CHECK32_32-NEXT: slw 10, 9, 8
360 ; CHECK32_32-NEXT: slw 4, 4, 8
361 ; CHECK32_32-NEXT: or 3, 6, 3
362 ; CHECK32_32-NEXT: subfic 6, 8, 32
363 ; CHECK32_32-NEXT: addi 8, 8, -32
364 ; CHECK32_32-NEXT: srw 6, 9, 6
365 ; CHECK32_32-NEXT: slw 8, 9, 8
366 ; CHECK32_32-NEXT: addi 9, 7, -32
367 ; CHECK32_32-NEXT: srw 9, 5, 9
368 ; CHECK32_32-NEXT: or 3, 3, 9
369 ; CHECK32_32-NEXT: or 6, 4, 6
370 ; CHECK32_32-NEXT: or 4, 10, 3
371 ; CHECK32_32-NEXT: or 3, 6, 8
372 ; CHECK32_32-NEXT: srw 5, 5, 7
373 ; CHECK32_32-NEXT: or 3, 3, 5
374 ; CHECK32_32-NEXT: blr
376 ; CHECK32_64-LABEL: fshr_i64:
377 ; CHECK32_64: # %bb.0:
378 ; CHECK32_64-NEXT: rotlwi 7, 4, 1
379 ; CHECK32_64-NEXT: slwi 4, 4, 1
380 ; CHECK32_64-NEXT: rlwimi 7, 3, 1, 0, 30
381 ; CHECK32_64-NEXT: clrlwi 3, 8, 26
382 ; CHECK32_64-NEXT: not 8, 8
383 ; CHECK32_64-NEXT: subfic 9, 3, 32
384 ; CHECK32_64-NEXT: srw 6, 6, 3
385 ; CHECK32_64-NEXT: clrlwi 8, 8, 26
386 ; CHECK32_64-NEXT: slw 9, 5, 9
387 ; CHECK32_64-NEXT: addi 10, 3, -32
388 ; CHECK32_64-NEXT: or 6, 6, 9
389 ; CHECK32_64-NEXT: subfic 9, 8, 32
390 ; CHECK32_64-NEXT: srw 3, 5, 3
391 ; CHECK32_64-NEXT: srw 5, 5, 10
392 ; CHECK32_64-NEXT: slw 10, 4, 8
393 ; CHECK32_64-NEXT: slw 7, 7, 8
394 ; CHECK32_64-NEXT: addi 8, 8, -32
395 ; CHECK32_64-NEXT: srw 9, 4, 9
396 ; CHECK32_64-NEXT: slw 4, 4, 8
397 ; CHECK32_64-NEXT: or 7, 7, 9
398 ; CHECK32_64-NEXT: or 5, 6, 5
399 ; CHECK32_64-NEXT: or 6, 7, 4
400 ; CHECK32_64-NEXT: or 4, 10, 5
401 ; CHECK32_64-NEXT: or 3, 6, 3
402 ; CHECK32_64-NEXT: blr
404 ; CHECK64-LABEL: fshr_i64:
406 ; CHECK64-NEXT: clrlwi 5, 5, 26
407 ; CHECK64-NEXT: subfic 6, 5, 64
408 ; CHECK64-NEXT: srd 4, 4, 5
409 ; CHECK64-NEXT: sld 3, 3, 6
410 ; CHECK64-NEXT: or 3, 3, 4
412 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z)
416 ; Verify that weird types are minimally supported.
417 declare i37 @llvm.fshr.i37(i37, i37, i37)
418 define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
419 ; CHECK32_32-LABEL: fshr_i37:
420 ; CHECK32_32: # %bb.0:
421 ; CHECK32_32-NEXT: mflr 0
422 ; CHECK32_32-NEXT: stw 0, 4(1)
423 ; CHECK32_32-NEXT: stwu 1, -32(1)
424 ; CHECK32_32-NEXT: .cfi_def_cfa_offset 32
425 ; CHECK32_32-NEXT: .cfi_offset lr, 4
426 ; CHECK32_32-NEXT: .cfi_offset r27, -20
427 ; CHECK32_32-NEXT: .cfi_offset r28, -16
428 ; CHECK32_32-NEXT: .cfi_offset r29, -12
429 ; CHECK32_32-NEXT: .cfi_offset r30, -8
430 ; CHECK32_32-NEXT: stw 27, 12(1) # 4-byte Folded Spill
431 ; CHECK32_32-NEXT: mr 27, 3
432 ; CHECK32_32-NEXT: stw 28, 16(1) # 4-byte Folded Spill
433 ; CHECK32_32-NEXT: mr 28, 4
434 ; CHECK32_32-NEXT: stw 29, 20(1) # 4-byte Folded Spill
435 ; CHECK32_32-NEXT: mr 29, 5
436 ; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill
437 ; CHECK32_32-NEXT: mr 30, 6
438 ; CHECK32_32-NEXT: mr 3, 7
439 ; CHECK32_32-NEXT: mr 4, 8
440 ; CHECK32_32-NEXT: li 5, 0
441 ; CHECK32_32-NEXT: li 6, 37
442 ; CHECK32_32-NEXT: bl __umoddi3
443 ; CHECK32_32-NEXT: addi 4, 4, 27
444 ; CHECK32_32-NEXT: rotlwi 5, 30, 27
445 ; CHECK32_32-NEXT: clrlwi 8, 4, 26
446 ; CHECK32_32-NEXT: slwi 3, 30, 27
447 ; CHECK32_32-NEXT: rotlwi 7, 28, 1
448 ; CHECK32_32-NEXT: rlwimi 5, 29, 27, 0, 4
449 ; CHECK32_32-NEXT: not 4, 4
450 ; CHECK32_32-NEXT: subfic 9, 8, 32
451 ; CHECK32_32-NEXT: slwi 6, 28, 1
452 ; CHECK32_32-NEXT: rlwimi 7, 27, 1, 0, 30
453 ; CHECK32_32-NEXT: srw 3, 3, 8
454 ; CHECK32_32-NEXT: clrlwi 4, 4, 26
455 ; CHECK32_32-NEXT: slw 9, 5, 9
456 ; CHECK32_32-NEXT: slw 10, 6, 4
457 ; CHECK32_32-NEXT: slw 7, 7, 4
458 ; CHECK32_32-NEXT: or 3, 3, 9
459 ; CHECK32_32-NEXT: subfic 9, 4, 32
460 ; CHECK32_32-NEXT: addi 4, 4, -32
461 ; CHECK32_32-NEXT: srw 9, 6, 9
462 ; CHECK32_32-NEXT: slw 6, 6, 4
463 ; CHECK32_32-NEXT: addi 4, 8, -32
464 ; CHECK32_32-NEXT: srw 4, 5, 4
465 ; CHECK32_32-NEXT: or 3, 3, 4
466 ; CHECK32_32-NEXT: or 7, 7, 9
467 ; CHECK32_32-NEXT: or 4, 10, 3
468 ; CHECK32_32-NEXT: or 3, 7, 6
469 ; CHECK32_32-NEXT: srw 5, 5, 8
470 ; CHECK32_32-NEXT: or 3, 3, 5
471 ; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
472 ; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
473 ; CHECK32_32-NEXT: lwz 28, 16(1) # 4-byte Folded Reload
474 ; CHECK32_32-NEXT: lwz 27, 12(1) # 4-byte Folded Reload
475 ; CHECK32_32-NEXT: lwz 0, 36(1)
476 ; CHECK32_32-NEXT: addi 1, 1, 32
477 ; CHECK32_32-NEXT: mtlr 0
478 ; CHECK32_32-NEXT: blr
480 ; CHECK32_64-LABEL: fshr_i37:
481 ; CHECK32_64: # %bb.0:
482 ; CHECK32_64-NEXT: mflr 0
483 ; CHECK32_64-NEXT: stw 0, 4(1)
484 ; CHECK32_64-NEXT: stwu 1, -32(1)
485 ; CHECK32_64-NEXT: .cfi_def_cfa_offset 32
486 ; CHECK32_64-NEXT: .cfi_offset lr, 4
487 ; CHECK32_64-NEXT: .cfi_offset r27, -20
488 ; CHECK32_64-NEXT: .cfi_offset r28, -16
489 ; CHECK32_64-NEXT: .cfi_offset r29, -12
490 ; CHECK32_64-NEXT: .cfi_offset r30, -8
491 ; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill
492 ; CHECK32_64-NEXT: mr 27, 3
493 ; CHECK32_64-NEXT: mr 3, 7
494 ; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill
495 ; CHECK32_64-NEXT: mr 28, 4
496 ; CHECK32_64-NEXT: mr 4, 8
497 ; CHECK32_64-NEXT: stw 29, 20(1) # 4-byte Folded Spill
498 ; CHECK32_64-NEXT: mr 29, 5
499 ; CHECK32_64-NEXT: li 5, 0
500 ; CHECK32_64-NEXT: stw 30, 24(1) # 4-byte Folded Spill
501 ; CHECK32_64-NEXT: mr 30, 6
502 ; CHECK32_64-NEXT: li 6, 37
503 ; CHECK32_64-NEXT: bl __umoddi3
504 ; CHECK32_64-NEXT: addi 4, 4, 27
505 ; CHECK32_64-NEXT: rotlwi 3, 30, 27
506 ; CHECK32_64-NEXT: clrlwi 8, 4, 26
507 ; CHECK32_64-NEXT: rlwimi 3, 29, 27, 0, 4
508 ; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
509 ; CHECK32_64-NEXT: slwi 6, 30, 27
510 ; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
511 ; CHECK32_64-NEXT: not 4, 4
512 ; CHECK32_64-NEXT: subfic 9, 8, 32
513 ; CHECK32_64-NEXT: rotlwi 5, 28, 1
514 ; CHECK32_64-NEXT: srw 6, 6, 8
515 ; CHECK32_64-NEXT: clrlwi 4, 4, 26
516 ; CHECK32_64-NEXT: slw 9, 3, 9
517 ; CHECK32_64-NEXT: rlwimi 5, 27, 1, 0, 30
518 ; CHECK32_64-NEXT: slwi 7, 28, 1
519 ; CHECK32_64-NEXT: lwz 28, 16(1) # 4-byte Folded Reload
520 ; CHECK32_64-NEXT: addi 10, 8, -32
521 ; CHECK32_64-NEXT: lwz 27, 12(1) # 4-byte Folded Reload
522 ; CHECK32_64-NEXT: or 6, 6, 9
523 ; CHECK32_64-NEXT: subfic 9, 4, 32
524 ; CHECK32_64-NEXT: srw 8, 3, 8
525 ; CHECK32_64-NEXT: srw 3, 3, 10
526 ; CHECK32_64-NEXT: lwz 0, 36(1)
527 ; CHECK32_64-NEXT: slw 10, 7, 4
528 ; CHECK32_64-NEXT: slw 5, 5, 4
529 ; CHECK32_64-NEXT: addi 4, 4, -32
530 ; CHECK32_64-NEXT: srw 9, 7, 9
531 ; CHECK32_64-NEXT: slw 4, 7, 4
532 ; CHECK32_64-NEXT: or 5, 5, 9
533 ; CHECK32_64-NEXT: or 3, 6, 3
534 ; CHECK32_64-NEXT: or 5, 5, 4
535 ; CHECK32_64-NEXT: or 4, 10, 3
536 ; CHECK32_64-NEXT: or 3, 5, 8
537 ; CHECK32_64-NEXT: addi 1, 1, 32
538 ; CHECK32_64-NEXT: mtlr 0
539 ; CHECK32_64-NEXT: blr
541 ; CHECK64-LABEL: fshr_i37:
543 ; CHECK64-NEXT: lis 6, 28339
544 ; CHECK64-NEXT: sldi 4, 4, 27
545 ; CHECK64-NEXT: ori 6, 6, 58451
546 ; CHECK64-NEXT: rldic 6, 6, 33, 0
547 ; CHECK64-NEXT: oris 6, 6, 3542
548 ; CHECK64-NEXT: ori 6, 6, 31883
549 ; CHECK64-NEXT: mulhdu 6, 5, 6
550 ; CHECK64-NEXT: rldicl 6, 6, 59, 5
551 ; CHECK64-NEXT: mulli 6, 6, 37
552 ; CHECK64-NEXT: sub 5, 5, 6
553 ; CHECK64-NEXT: addi 5, 5, 27
554 ; CHECK64-NEXT: clrlwi 5, 5, 26
555 ; CHECK64-NEXT: subfic 6, 5, 64
556 ; CHECK64-NEXT: srd 4, 4, 5
557 ; CHECK64-NEXT: sld 3, 3, 6
558 ; CHECK64-NEXT: or 3, 3, 4
560 %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
564 ; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
566 declare i7 @llvm.fshr.i7(i7, i7, i7)
567 define i7 @fshr_i7_const_fold() {
568 ; CHECK-LABEL: fshr_i7_const_fold:
570 ; CHECK-NEXT: li 3, 31
572 %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
576 ; With constant shift amount, this is rotate + insert (missing extended mnemonics).
578 define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
579 ; CHECK-LABEL: fshr_i32_const_shift:
581 ; CHECK-NEXT: rotlwi 4, 4, 23
582 ; CHECK-NEXT: rlwimi 4, 3, 23, 0, 8
583 ; CHECK-NEXT: mr 3, 4
585 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
589 ; Check modulo math on shift amount. 41-32=9.
591 define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) {
592 ; CHECK-LABEL: fshr_i32_const_overshift:
594 ; CHECK-NEXT: rotlwi 4, 4, 23
595 ; CHECK-NEXT: rlwimi 4, 3, 23, 0, 8
596 ; CHECK-NEXT: mr 3, 4
598 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
602 ; 64-bit should also work. 105-64 = 41.
604 define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
605 ; CHECK32-LABEL: fshr_i64_const_overshift:
607 ; CHECK32-NEXT: rotlwi 6, 4, 23
608 ; CHECK32-NEXT: rotlwi 5, 5, 23
609 ; CHECK32-NEXT: rlwimi 6, 3, 23, 0, 8
610 ; CHECK32-NEXT: rlwimi 5, 4, 23, 0, 8
611 ; CHECK32-NEXT: mr 3, 6
612 ; CHECK32-NEXT: mr 4, 5
615 ; CHECK64-LABEL: fshr_i64_const_overshift:
617 ; CHECK64-NEXT: rotldi 4, 4, 23
618 ; CHECK64-NEXT: rldimi 4, 3, 23, 0
619 ; CHECK64-NEXT: mr 3, 4
621 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
625 ; This should work without any node-specific logic.
627 define i8 @fshr_i8_const_fold() {
628 ; CHECK-LABEL: fshr_i8_const_fold:
630 ; CHECK-NEXT: li 3, 254
632 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
636 define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) {
637 ; CHECK-LABEL: fshl_i32_shift_by_bitwidth:
640 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
644 define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) {
645 ; CHECK-LABEL: fshr_i32_shift_by_bitwidth:
647 ; CHECK-NEXT: mr 3, 4
649 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
653 define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
654 ; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
657 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
661 define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
662 ; CHECK32_32-LABEL: fshr_v4i32_shift_by_bitwidth:
663 ; CHECK32_32: # %bb.0:
664 ; CHECK32_32-NEXT: mr 6, 10
665 ; CHECK32_32-NEXT: mr 5, 9
666 ; CHECK32_32-NEXT: mr 4, 8
667 ; CHECK32_32-NEXT: mr 3, 7
668 ; CHECK32_32-NEXT: blr
670 ; CHECK32_64-LABEL: fshr_v4i32_shift_by_bitwidth:
671 ; CHECK32_64: # %bb.0:
672 ; CHECK32_64-NEXT: vmr 2, 3
673 ; CHECK32_64-NEXT: blr
675 ; CHECK64-LABEL: fshr_v4i32_shift_by_bitwidth:
677 ; CHECK64-NEXT: vmr 2, 3
679 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)