1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32
3 ; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64
4 ; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64
6 declare i8 @llvm.fshl.i8(i8, i8, i8)
7 declare i16 @llvm.fshl.i16(i16, i16, i16)
8 declare i32 @llvm.fshl.i32(i32, i32, i32)
9 declare i64 @llvm.fshl.i64(i64, i64, i64)
10 declare i128 @llvm.fshl.i128(i128, i128, i128)
11 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
13 declare i8 @llvm.fshr.i8(i8, i8, i8)
14 declare i16 @llvm.fshr.i16(i16, i16, i16)
15 declare i32 @llvm.fshr.i32(i32, i32, i32)
16 declare i64 @llvm.fshr.i64(i64, i64, i64)
17 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
19 ; General case - all operands can be variables.
21 define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
22 ; CHECK-LABEL: fshl_i32:
24 ; CHECK-NEXT: clrlwi 5, 5, 27
25 ; CHECK-NEXT: slw 3, 3, 5
26 ; CHECK-NEXT: subfic 5, 5, 32
27 ; CHECK-NEXT: srw 4, 4, 5
28 ; CHECK-NEXT: or 3, 3, 4
30 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
34 define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
35 ; CHECK32-LABEL: fshl_i64:
37 ; CHECK32-NEXT: andi. 7, 8, 32
38 ; CHECK32-NEXT: mr 7, 5
39 ; CHECK32-NEXT: bne 0, .LBB1_2
40 ; CHECK32-NEXT: # %bb.1:
41 ; CHECK32-NEXT: mr 7, 4
42 ; CHECK32-NEXT: .LBB1_2:
43 ; CHECK32-NEXT: clrlwi 8, 8, 27
44 ; CHECK32-NEXT: subfic 9, 8, 32
45 ; CHECK32-NEXT: srw 10, 7, 9
46 ; CHECK32-NEXT: bne 0, .LBB1_4
47 ; CHECK32-NEXT: # %bb.3:
48 ; CHECK32-NEXT: mr 4, 3
49 ; CHECK32-NEXT: .LBB1_4:
50 ; CHECK32-NEXT: slw 3, 4, 8
51 ; CHECK32-NEXT: or 3, 3, 10
52 ; CHECK32-NEXT: bne 0, .LBB1_6
53 ; CHECK32-NEXT: # %bb.5:
54 ; CHECK32-NEXT: mr 6, 5
55 ; CHECK32-NEXT: .LBB1_6:
56 ; CHECK32-NEXT: srw 4, 6, 9
57 ; CHECK32-NEXT: slw 5, 7, 8
58 ; CHECK32-NEXT: or 4, 5, 4
61 ; CHECK64-LABEL: fshl_i64:
63 ; CHECK64-NEXT: clrlwi 5, 5, 26
64 ; CHECK64-NEXT: sld 3, 3, 5
65 ; CHECK64-NEXT: subfic 5, 5, 64
66 ; CHECK64-NEXT: srd 4, 4, 5
67 ; CHECK64-NEXT: or 3, 3, 4
69 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
73 define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
74 ; CHECK32_32-LABEL: fshl_i128:
75 ; CHECK32_32: # %bb.0:
76 ; CHECK32_32-NEXT: stwu 1, -32(1)
77 ; CHECK32_32-NEXT: lwz 12, 52(1)
78 ; CHECK32_32-NEXT: stw 29, 20(1) # 4-byte Folded Spill
79 ; CHECK32_32-NEXT: andi. 11, 12, 64
80 ; CHECK32_32-NEXT: mcrf 1, 0
81 ; CHECK32_32-NEXT: mr 11, 6
82 ; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill
83 ; CHECK32_32-NEXT: bne 0, .LBB2_2
84 ; CHECK32_32-NEXT: # %bb.1:
85 ; CHECK32_32-NEXT: mr 11, 4
86 ; CHECK32_32-NEXT: .LBB2_2:
87 ; CHECK32_32-NEXT: mr 30, 7
88 ; CHECK32_32-NEXT: bne 1, .LBB2_4
89 ; CHECK32_32-NEXT: # %bb.3:
90 ; CHECK32_32-NEXT: mr 30, 5
91 ; CHECK32_32-NEXT: .LBB2_4:
92 ; CHECK32_32-NEXT: andi. 4, 12, 32
93 ; CHECK32_32-NEXT: mr 4, 30
94 ; CHECK32_32-NEXT: beq 0, .LBB2_18
95 ; CHECK32_32-NEXT: # %bb.5:
96 ; CHECK32_32-NEXT: beq 1, .LBB2_19
97 ; CHECK32_32-NEXT: .LBB2_6:
98 ; CHECK32_32-NEXT: beq 0, .LBB2_20
99 ; CHECK32_32-NEXT: .LBB2_7:
100 ; CHECK32_32-NEXT: mr 5, 8
101 ; CHECK32_32-NEXT: beq 1, .LBB2_21
102 ; CHECK32_32-NEXT: .LBB2_8:
103 ; CHECK32_32-NEXT: mr 3, 5
104 ; CHECK32_32-NEXT: beq 0, .LBB2_22
105 ; CHECK32_32-NEXT: .LBB2_9:
106 ; CHECK32_32-NEXT: clrlwi 6, 12, 27
107 ; CHECK32_32-NEXT: bne 1, .LBB2_11
108 ; CHECK32_32-NEXT: .LBB2_10:
109 ; CHECK32_32-NEXT: mr 9, 7
110 ; CHECK32_32-NEXT: .LBB2_11:
111 ; CHECK32_32-NEXT: subfic 7, 6, 32
112 ; CHECK32_32-NEXT: mr 12, 9
113 ; CHECK32_32-NEXT: bne 0, .LBB2_13
114 ; CHECK32_32-NEXT: # %bb.12:
115 ; CHECK32_32-NEXT: mr 12, 5
116 ; CHECK32_32-NEXT: .LBB2_13:
117 ; CHECK32_32-NEXT: srw 5, 4, 7
118 ; CHECK32_32-NEXT: slw 11, 11, 6
119 ; CHECK32_32-NEXT: srw 0, 3, 7
120 ; CHECK32_32-NEXT: slw 4, 4, 6
121 ; CHECK32_32-NEXT: srw 30, 12, 7
122 ; CHECK32_32-NEXT: slw 29, 3, 6
123 ; CHECK32_32-NEXT: bne 1, .LBB2_15
124 ; CHECK32_32-NEXT: # %bb.14:
125 ; CHECK32_32-NEXT: mr 10, 8
126 ; CHECK32_32-NEXT: .LBB2_15:
127 ; CHECK32_32-NEXT: or 3, 11, 5
128 ; CHECK32_32-NEXT: or 4, 4, 0
129 ; CHECK32_32-NEXT: or 5, 29, 30
130 ; CHECK32_32-NEXT: bne 0, .LBB2_17
131 ; CHECK32_32-NEXT: # %bb.16:
132 ; CHECK32_32-NEXT: mr 10, 9
133 ; CHECK32_32-NEXT: .LBB2_17:
134 ; CHECK32_32-NEXT: srw 7, 10, 7
135 ; CHECK32_32-NEXT: slw 6, 12, 6
136 ; CHECK32_32-NEXT: or 6, 6, 7
137 ; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
138 ; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
139 ; CHECK32_32-NEXT: addi 1, 1, 32
140 ; CHECK32_32-NEXT: blr
141 ; CHECK32_32-NEXT: .LBB2_18:
142 ; CHECK32_32-NEXT: mr 4, 11
143 ; CHECK32_32-NEXT: bne 1, .LBB2_6
144 ; CHECK32_32-NEXT: .LBB2_19:
145 ; CHECK32_32-NEXT: mr 5, 3
146 ; CHECK32_32-NEXT: bne 0, .LBB2_7
147 ; CHECK32_32-NEXT: .LBB2_20:
148 ; CHECK32_32-NEXT: mr 11, 5
149 ; CHECK32_32-NEXT: mr 5, 8
150 ; CHECK32_32-NEXT: bne 1, .LBB2_8
151 ; CHECK32_32-NEXT: .LBB2_21:
152 ; CHECK32_32-NEXT: mr 5, 6
153 ; CHECK32_32-NEXT: mr 3, 5
154 ; CHECK32_32-NEXT: bne 0, .LBB2_9
155 ; CHECK32_32-NEXT: .LBB2_22:
156 ; CHECK32_32-NEXT: mr 3, 30
157 ; CHECK32_32-NEXT: clrlwi 6, 12, 27
158 ; CHECK32_32-NEXT: beq 1, .LBB2_10
159 ; CHECK32_32-NEXT: b .LBB2_11
161 ; CHECK32_64-LABEL: fshl_i128:
162 ; CHECK32_64: # %bb.0:
163 ; CHECK32_64-NEXT: stwu 1, -32(1)
164 ; CHECK32_64-NEXT: lwz 12, 52(1)
165 ; CHECK32_64-NEXT: andi. 11, 12, 64
166 ; CHECK32_64-NEXT: stw 29, 20(1) # 4-byte Folded Spill
167 ; CHECK32_64-NEXT: mcrf 1, 0
168 ; CHECK32_64-NEXT: mr 11, 6
169 ; CHECK32_64-NEXT: stw 30, 24(1) # 4-byte Folded Spill
170 ; CHECK32_64-NEXT: bne 0, .LBB2_2
171 ; CHECK32_64-NEXT: # %bb.1:
172 ; CHECK32_64-NEXT: mr 11, 4
173 ; CHECK32_64-NEXT: .LBB2_2:
174 ; CHECK32_64-NEXT: mr 30, 7
175 ; CHECK32_64-NEXT: bne 1, .LBB2_4
176 ; CHECK32_64-NEXT: # %bb.3:
177 ; CHECK32_64-NEXT: mr 30, 5
178 ; CHECK32_64-NEXT: .LBB2_4:
179 ; CHECK32_64-NEXT: andi. 4, 12, 32
180 ; CHECK32_64-NEXT: mr 4, 30
181 ; CHECK32_64-NEXT: beq 0, .LBB2_18
182 ; CHECK32_64-NEXT: # %bb.5:
183 ; CHECK32_64-NEXT: beq 1, .LBB2_19
184 ; CHECK32_64-NEXT: .LBB2_6:
185 ; CHECK32_64-NEXT: beq 0, .LBB2_20
186 ; CHECK32_64-NEXT: .LBB2_7:
187 ; CHECK32_64-NEXT: mr 5, 8
188 ; CHECK32_64-NEXT: beq 1, .LBB2_21
189 ; CHECK32_64-NEXT: .LBB2_8:
190 ; CHECK32_64-NEXT: mr 3, 5
191 ; CHECK32_64-NEXT: beq 0, .LBB2_22
192 ; CHECK32_64-NEXT: .LBB2_9:
193 ; CHECK32_64-NEXT: clrlwi 6, 12, 27
194 ; CHECK32_64-NEXT: bne 1, .LBB2_11
195 ; CHECK32_64-NEXT: .LBB2_10:
196 ; CHECK32_64-NEXT: mr 9, 7
197 ; CHECK32_64-NEXT: .LBB2_11:
198 ; CHECK32_64-NEXT: subfic 7, 6, 32
199 ; CHECK32_64-NEXT: mr 12, 9
200 ; CHECK32_64-NEXT: bne 0, .LBB2_13
201 ; CHECK32_64-NEXT: # %bb.12:
202 ; CHECK32_64-NEXT: mr 12, 5
203 ; CHECK32_64-NEXT: .LBB2_13:
204 ; CHECK32_64-NEXT: srw 5, 4, 7
205 ; CHECK32_64-NEXT: slw 11, 11, 6
206 ; CHECK32_64-NEXT: srw 0, 3, 7
207 ; CHECK32_64-NEXT: slw 4, 4, 6
208 ; CHECK32_64-NEXT: srw 30, 12, 7
209 ; CHECK32_64-NEXT: slw 29, 3, 6
210 ; CHECK32_64-NEXT: bne 1, .LBB2_15
211 ; CHECK32_64-NEXT: # %bb.14:
212 ; CHECK32_64-NEXT: mr 10, 8
213 ; CHECK32_64-NEXT: .LBB2_15:
214 ; CHECK32_64-NEXT: or 3, 11, 5
215 ; CHECK32_64-NEXT: or 4, 4, 0
216 ; CHECK32_64-NEXT: or 5, 29, 30
217 ; CHECK32_64-NEXT: bne 0, .LBB2_17
218 ; CHECK32_64-NEXT: # %bb.16:
219 ; CHECK32_64-NEXT: mr 10, 9
220 ; CHECK32_64-NEXT: .LBB2_17:
221 ; CHECK32_64-NEXT: srw 7, 10, 7
222 ; CHECK32_64-NEXT: slw 6, 12, 6
223 ; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
224 ; CHECK32_64-NEXT: or 6, 6, 7
225 ; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
226 ; CHECK32_64-NEXT: addi 1, 1, 32
227 ; CHECK32_64-NEXT: blr
228 ; CHECK32_64-NEXT: .LBB2_18:
229 ; CHECK32_64-NEXT: mr 4, 11
230 ; CHECK32_64-NEXT: bne 1, .LBB2_6
231 ; CHECK32_64-NEXT: .LBB2_19:
232 ; CHECK32_64-NEXT: mr 5, 3
233 ; CHECK32_64-NEXT: bne 0, .LBB2_7
234 ; CHECK32_64-NEXT: .LBB2_20:
235 ; CHECK32_64-NEXT: mr 11, 5
236 ; CHECK32_64-NEXT: mr 5, 8
237 ; CHECK32_64-NEXT: bne 1, .LBB2_8
238 ; CHECK32_64-NEXT: .LBB2_21:
239 ; CHECK32_64-NEXT: mr 5, 6
240 ; CHECK32_64-NEXT: mr 3, 5
241 ; CHECK32_64-NEXT: bne 0, .LBB2_9
242 ; CHECK32_64-NEXT: .LBB2_22:
243 ; CHECK32_64-NEXT: mr 3, 30
244 ; CHECK32_64-NEXT: clrlwi 6, 12, 27
245 ; CHECK32_64-NEXT: beq 1, .LBB2_10
246 ; CHECK32_64-NEXT: b .LBB2_11
248 ; CHECK64-LABEL: fshl_i128:
250 ; CHECK64-NEXT: andi. 8, 7, 64
251 ; CHECK64-NEXT: clrlwi 7, 7, 26
252 ; CHECK64-NEXT: subfic 8, 7, 64
253 ; CHECK64-NEXT: iseleq 5, 6, 5
254 ; CHECK64-NEXT: iseleq 6, 3, 6
255 ; CHECK64-NEXT: iseleq 3, 4, 3
256 ; CHECK64-NEXT: srd 5, 5, 8
257 ; CHECK64-NEXT: sld 9, 6, 7
258 ; CHECK64-NEXT: srd 6, 6, 8
259 ; CHECK64-NEXT: sld 3, 3, 7
260 ; CHECK64-NEXT: or 5, 9, 5
261 ; CHECK64-NEXT: or 4, 3, 6
262 ; CHECK64-NEXT: mr 3, 5
264 %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
268 ; Verify that weird types are minimally supported.
269 declare i37 @llvm.fshl.i37(i37, i37, i37)
270 define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
271 ; CHECK32_32-LABEL: fshl_i37:
272 ; CHECK32_32: # %bb.0:
273 ; CHECK32_32-NEXT: mflr 0
274 ; CHECK32_32-NEXT: stwu 1, -32(1)
275 ; CHECK32_32-NEXT: stw 0, 36(1)
276 ; CHECK32_32-NEXT: .cfi_def_cfa_offset 32
277 ; CHECK32_32-NEXT: .cfi_offset lr, 4
278 ; CHECK32_32-NEXT: .cfi_offset r27, -20
279 ; CHECK32_32-NEXT: .cfi_offset r28, -16
280 ; CHECK32_32-NEXT: .cfi_offset r29, -12
281 ; CHECK32_32-NEXT: .cfi_offset r30, -8
282 ; CHECK32_32-NEXT: stw 27, 12(1) # 4-byte Folded Spill
283 ; CHECK32_32-NEXT: mr 27, 5
284 ; CHECK32_32-NEXT: stw 28, 16(1) # 4-byte Folded Spill
285 ; CHECK32_32-NEXT: mr 28, 3
286 ; CHECK32_32-NEXT: stw 29, 20(1) # 4-byte Folded Spill
287 ; CHECK32_32-NEXT: mr 29, 4
288 ; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill
289 ; CHECK32_32-NEXT: mr 30, 6
290 ; CHECK32_32-NEXT: clrlwi 3, 7, 27
291 ; CHECK32_32-NEXT: mr 4, 8
292 ; CHECK32_32-NEXT: li 5, 0
293 ; CHECK32_32-NEXT: li 6, 37
294 ; CHECK32_32-NEXT: bl __umoddi3
295 ; CHECK32_32-NEXT: rotlwi 5, 30, 27
296 ; CHECK32_32-NEXT: rlwimi 5, 27, 27, 0, 4
297 ; CHECK32_32-NEXT: andi. 3, 4, 32
298 ; CHECK32_32-NEXT: mr 6, 5
299 ; CHECK32_32-NEXT: bne 0, .LBB3_2
300 ; CHECK32_32-NEXT: # %bb.1:
301 ; CHECK32_32-NEXT: mr 6, 29
302 ; CHECK32_32-NEXT: .LBB3_2:
303 ; CHECK32_32-NEXT: clrlwi 4, 4, 27
304 ; CHECK32_32-NEXT: subfic 7, 4, 32
305 ; CHECK32_32-NEXT: srw 3, 6, 7
306 ; CHECK32_32-NEXT: bne 0, .LBB3_4
307 ; CHECK32_32-NEXT: # %bb.3:
308 ; CHECK32_32-NEXT: mr 29, 28
309 ; CHECK32_32-NEXT: .LBB3_4:
310 ; CHECK32_32-NEXT: slw 8, 29, 4
311 ; CHECK32_32-NEXT: or 3, 8, 3
312 ; CHECK32_32-NEXT: beq 0, .LBB3_6
313 ; CHECK32_32-NEXT: # %bb.5:
314 ; CHECK32_32-NEXT: slwi 5, 30, 27
315 ; CHECK32_32-NEXT: .LBB3_6:
316 ; CHECK32_32-NEXT: srw 5, 5, 7
317 ; CHECK32_32-NEXT: slw 4, 6, 4
318 ; CHECK32_32-NEXT: or 4, 4, 5
319 ; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
320 ; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
321 ; CHECK32_32-NEXT: lwz 28, 16(1) # 4-byte Folded Reload
322 ; CHECK32_32-NEXT: lwz 27, 12(1) # 4-byte Folded Reload
323 ; CHECK32_32-NEXT: lwz 0, 36(1)
324 ; CHECK32_32-NEXT: addi 1, 1, 32
325 ; CHECK32_32-NEXT: mtlr 0
326 ; CHECK32_32-NEXT: blr
328 ; CHECK32_64-LABEL: fshl_i37:
329 ; CHECK32_64: # %bb.0:
330 ; CHECK32_64-NEXT: mflr 0
331 ; CHECK32_64-NEXT: stwu 1, -32(1)
332 ; CHECK32_64-NEXT: stw 0, 36(1)
333 ; CHECK32_64-NEXT: .cfi_def_cfa_offset 32
334 ; CHECK32_64-NEXT: .cfi_offset lr, 4
335 ; CHECK32_64-NEXT: .cfi_offset r27, -20
336 ; CHECK32_64-NEXT: .cfi_offset r28, -16
337 ; CHECK32_64-NEXT: .cfi_offset r29, -12
338 ; CHECK32_64-NEXT: .cfi_offset r30, -8
339 ; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill
340 ; CHECK32_64-NEXT: mr 27, 5
341 ; CHECK32_64-NEXT: li 5, 0
342 ; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill
343 ; CHECK32_64-NEXT: mr 28, 3
344 ; CHECK32_64-NEXT: clrlwi 3, 7, 27
345 ; CHECK32_64-NEXT: stw 29, 20(1) # 4-byte Folded Spill
346 ; CHECK32_64-NEXT: mr 29, 4
347 ; CHECK32_64-NEXT: mr 4, 8
348 ; CHECK32_64-NEXT: stw 30, 24(1) # 4-byte Folded Spill
349 ; CHECK32_64-NEXT: mr 30, 6
350 ; CHECK32_64-NEXT: li 6, 37
351 ; CHECK32_64-NEXT: bl __umoddi3
352 ; CHECK32_64-NEXT: rotlwi 5, 30, 27
353 ; CHECK32_64-NEXT: andi. 3, 4, 32
354 ; CHECK32_64-NEXT: rlwimi 5, 27, 27, 0, 4
355 ; CHECK32_64-NEXT: mr 6, 5
356 ; CHECK32_64-NEXT: bne 0, .LBB3_2
357 ; CHECK32_64-NEXT: # %bb.1:
358 ; CHECK32_64-NEXT: mr 6, 29
359 ; CHECK32_64-NEXT: .LBB3_2:
360 ; CHECK32_64-NEXT: clrlwi 4, 4, 27
361 ; CHECK32_64-NEXT: subfic 7, 4, 32
362 ; CHECK32_64-NEXT: srw 3, 6, 7
363 ; CHECK32_64-NEXT: bne 0, .LBB3_4
364 ; CHECK32_64-NEXT: # %bb.3:
365 ; CHECK32_64-NEXT: mr 29, 28
366 ; CHECK32_64-NEXT: .LBB3_4:
367 ; CHECK32_64-NEXT: slw 8, 29, 4
368 ; CHECK32_64-NEXT: or 3, 8, 3
369 ; CHECK32_64-NEXT: beq 0, .LBB3_6
370 ; CHECK32_64-NEXT: # %bb.5:
371 ; CHECK32_64-NEXT: slwi 5, 30, 27
372 ; CHECK32_64-NEXT: .LBB3_6:
373 ; CHECK32_64-NEXT: srw 5, 5, 7
374 ; CHECK32_64-NEXT: slw 4, 6, 4
375 ; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
376 ; CHECK32_64-NEXT: or 4, 4, 5
377 ; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
378 ; CHECK32_64-NEXT: lwz 28, 16(1) # 4-byte Folded Reload
379 ; CHECK32_64-NEXT: lwz 27, 12(1) # 4-byte Folded Reload
380 ; CHECK32_64-NEXT: lwz 0, 36(1)
381 ; CHECK32_64-NEXT: addi 1, 1, 32
382 ; CHECK32_64-NEXT: mtlr 0
383 ; CHECK32_64-NEXT: blr
385 ; CHECK64-LABEL: fshl_i37:
387 ; CHECK64-NEXT: lis 7, 1771
388 ; CHECK64-NEXT: clrldi 6, 5, 27
389 ; CHECK64-NEXT: sldi 4, 4, 27
390 ; CHECK64-NEXT: ori 7, 7, 15941
391 ; CHECK64-NEXT: rldic 7, 7, 32, 5
392 ; CHECK64-NEXT: oris 7, 7, 12398
393 ; CHECK64-NEXT: ori 7, 7, 46053
394 ; CHECK64-NEXT: mulhdu 6, 6, 7
395 ; CHECK64-NEXT: mulli 6, 6, 37
396 ; CHECK64-NEXT: sub 5, 5, 6
397 ; CHECK64-NEXT: clrlwi 5, 5, 26
398 ; CHECK64-NEXT: sld 3, 3, 5
399 ; CHECK64-NEXT: subfic 5, 5, 64
400 ; CHECK64-NEXT: srd 4, 4, 5
401 ; CHECK64-NEXT: or 3, 3, 4
403 %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
407 ; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
409 declare i7 @llvm.fshl.i7(i7, i7, i7)
410 define i7 @fshl_i7_const_fold() {
411 ; CHECK-LABEL: fshl_i7_const_fold:
413 ; CHECK-NEXT: li 3, 67
415 %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
419 ; With constant shift amount, this is rotate + insert (missing extended mnemonics).
421 define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
422 ; CHECK-LABEL: fshl_i32_const_shift:
424 ; CHECK-NEXT: rotlwi 4, 4, 9
425 ; CHECK-NEXT: rlwimi 4, 3, 9, 0, 22
426 ; CHECK-NEXT: mr 3, 4
428 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
432 ; Check modulo math on shift amount.
434 define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
435 ; CHECK-LABEL: fshl_i32_const_overshift:
437 ; CHECK-NEXT: rotlwi 4, 4, 9
438 ; CHECK-NEXT: rlwimi 4, 3, 9, 0, 22
439 ; CHECK-NEXT: mr 3, 4
441 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
445 ; 64-bit should also work.
447 define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
448 ; CHECK32-LABEL: fshl_i64_const_overshift:
450 ; CHECK32-NEXT: rotlwi 6, 6, 9
451 ; CHECK32-NEXT: rotlwi 3, 5, 9
452 ; CHECK32-NEXT: rlwimi 6, 5, 9, 0, 22
453 ; CHECK32-NEXT: rlwimi 3, 4, 9, 0, 22
454 ; CHECK32-NEXT: mr 4, 6
457 ; CHECK64-LABEL: fshl_i64_const_overshift:
459 ; CHECK64-NEXT: rotldi 4, 4, 41
460 ; CHECK64-NEXT: rldimi 4, 3, 41, 0
461 ; CHECK64-NEXT: mr 3, 4
463 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
467 ; This should work without any node-specific logic.
469 define i8 @fshl_i8_const_fold() {
470 ; CHECK-LABEL: fshl_i8_const_fold:
472 ; CHECK-NEXT: li 3, 128
474 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
478 ; Repeat everything for funnel shift right.
480 ; General case - all operands can be variables.
482 define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
483 ; CHECK-LABEL: fshr_i32:
485 ; CHECK-NEXT: clrlwi 5, 5, 27
486 ; CHECK-NEXT: srw 4, 4, 5
487 ; CHECK-NEXT: subfic 5, 5, 32
488 ; CHECK-NEXT: slw 3, 3, 5
489 ; CHECK-NEXT: or 3, 3, 4
491 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
495 define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) {
496 ; CHECK32-LABEL: fshr_i64:
498 ; CHECK32-NEXT: andi. 7, 8, 32
499 ; CHECK32-NEXT: mr 7, 5
500 ; CHECK32-NEXT: beq 0, .LBB10_2
501 ; CHECK32-NEXT: # %bb.1:
502 ; CHECK32-NEXT: mr 7, 4
503 ; CHECK32-NEXT: .LBB10_2:
504 ; CHECK32-NEXT: clrlwi 8, 8, 27
505 ; CHECK32-NEXT: srw 10, 7, 8
506 ; CHECK32-NEXT: beq 0, .LBB10_4
507 ; CHECK32-NEXT: # %bb.3:
508 ; CHECK32-NEXT: mr 4, 3
509 ; CHECK32-NEXT: .LBB10_4:
510 ; CHECK32-NEXT: subfic 9, 8, 32
511 ; CHECK32-NEXT: slw 3, 4, 9
512 ; CHECK32-NEXT: or 3, 3, 10
513 ; CHECK32-NEXT: beq 0, .LBB10_6
514 ; CHECK32-NEXT: # %bb.5:
515 ; CHECK32-NEXT: mr 6, 5
516 ; CHECK32-NEXT: .LBB10_6:
517 ; CHECK32-NEXT: srw 4, 6, 8
518 ; CHECK32-NEXT: slw 5, 7, 9
519 ; CHECK32-NEXT: or 4, 5, 4
522 ; CHECK64-LABEL: fshr_i64:
524 ; CHECK64-NEXT: clrlwi 5, 5, 26
525 ; CHECK64-NEXT: srd 4, 4, 5
526 ; CHECK64-NEXT: subfic 5, 5, 64
527 ; CHECK64-NEXT: sld 3, 3, 5
528 ; CHECK64-NEXT: or 3, 3, 4
530 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z)
534 ; Verify that weird types are minimally supported.
535 declare i37 @llvm.fshr.i37(i37, i37, i37)
536 define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
537 ; CHECK32_32-LABEL: fshr_i37:
538 ; CHECK32_32: # %bb.0:
539 ; CHECK32_32-NEXT: mflr 0
540 ; CHECK32_32-NEXT: stwu 1, -32(1)
541 ; CHECK32_32-NEXT: stw 0, 36(1)
542 ; CHECK32_32-NEXT: .cfi_def_cfa_offset 32
543 ; CHECK32_32-NEXT: .cfi_offset lr, 4
544 ; CHECK32_32-NEXT: .cfi_offset r27, -20
545 ; CHECK32_32-NEXT: .cfi_offset r28, -16
546 ; CHECK32_32-NEXT: .cfi_offset r29, -12
547 ; CHECK32_32-NEXT: .cfi_offset r30, -8
548 ; CHECK32_32-NEXT: stw 27, 12(1) # 4-byte Folded Spill
549 ; CHECK32_32-NEXT: mr 27, 5
550 ; CHECK32_32-NEXT: stw 28, 16(1) # 4-byte Folded Spill
551 ; CHECK32_32-NEXT: mr 28, 3
552 ; CHECK32_32-NEXT: stw 29, 20(1) # 4-byte Folded Spill
553 ; CHECK32_32-NEXT: mr 29, 4
554 ; CHECK32_32-NEXT: stw 30, 24(1) # 4-byte Folded Spill
555 ; CHECK32_32-NEXT: mr 30, 6
556 ; CHECK32_32-NEXT: clrlwi 3, 7, 27
557 ; CHECK32_32-NEXT: mr 4, 8
558 ; CHECK32_32-NEXT: li 5, 0
559 ; CHECK32_32-NEXT: li 6, 37
560 ; CHECK32_32-NEXT: bl __umoddi3
561 ; CHECK32_32-NEXT: rotlwi 5, 30, 27
562 ; CHECK32_32-NEXT: addi 3, 4, 27
563 ; CHECK32_32-NEXT: andi. 4, 3, 32
564 ; CHECK32_32-NEXT: rlwimi 5, 27, 27, 0, 4
565 ; CHECK32_32-NEXT: mr 4, 5
566 ; CHECK32_32-NEXT: beq 0, .LBB11_2
567 ; CHECK32_32-NEXT: # %bb.1:
568 ; CHECK32_32-NEXT: mr 4, 29
569 ; CHECK32_32-NEXT: .LBB11_2:
570 ; CHECK32_32-NEXT: clrlwi 6, 3, 27
571 ; CHECK32_32-NEXT: srw 3, 4, 6
572 ; CHECK32_32-NEXT: beq 0, .LBB11_4
573 ; CHECK32_32-NEXT: # %bb.3:
574 ; CHECK32_32-NEXT: mr 29, 28
575 ; CHECK32_32-NEXT: .LBB11_4:
576 ; CHECK32_32-NEXT: subfic 7, 6, 32
577 ; CHECK32_32-NEXT: slw 8, 29, 7
578 ; CHECK32_32-NEXT: or 3, 8, 3
579 ; CHECK32_32-NEXT: bne 0, .LBB11_6
580 ; CHECK32_32-NEXT: # %bb.5:
581 ; CHECK32_32-NEXT: slwi 5, 30, 27
582 ; CHECK32_32-NEXT: .LBB11_6:
583 ; CHECK32_32-NEXT: srw 5, 5, 6
584 ; CHECK32_32-NEXT: slw 4, 4, 7
585 ; CHECK32_32-NEXT: or 4, 4, 5
586 ; CHECK32_32-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
587 ; CHECK32_32-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
588 ; CHECK32_32-NEXT: lwz 28, 16(1) # 4-byte Folded Reload
589 ; CHECK32_32-NEXT: lwz 27, 12(1) # 4-byte Folded Reload
590 ; CHECK32_32-NEXT: lwz 0, 36(1)
591 ; CHECK32_32-NEXT: addi 1, 1, 32
592 ; CHECK32_32-NEXT: mtlr 0
593 ; CHECK32_32-NEXT: blr
595 ; CHECK32_64-LABEL: fshr_i37:
596 ; CHECK32_64: # %bb.0:
597 ; CHECK32_64-NEXT: mflr 0
598 ; CHECK32_64-NEXT: stwu 1, -32(1)
599 ; CHECK32_64-NEXT: stw 0, 36(1)
600 ; CHECK32_64-NEXT: .cfi_def_cfa_offset 32
601 ; CHECK32_64-NEXT: .cfi_offset lr, 4
602 ; CHECK32_64-NEXT: .cfi_offset r27, -20
603 ; CHECK32_64-NEXT: .cfi_offset r28, -16
604 ; CHECK32_64-NEXT: .cfi_offset r29, -12
605 ; CHECK32_64-NEXT: .cfi_offset r30, -8
606 ; CHECK32_64-NEXT: stw 27, 12(1) # 4-byte Folded Spill
607 ; CHECK32_64-NEXT: mr 27, 5
608 ; CHECK32_64-NEXT: li 5, 0
609 ; CHECK32_64-NEXT: stw 28, 16(1) # 4-byte Folded Spill
610 ; CHECK32_64-NEXT: mr 28, 3
611 ; CHECK32_64-NEXT: clrlwi 3, 7, 27
612 ; CHECK32_64-NEXT: stw 29, 20(1) # 4-byte Folded Spill
613 ; CHECK32_64-NEXT: mr 29, 4
614 ; CHECK32_64-NEXT: mr 4, 8
615 ; CHECK32_64-NEXT: stw 30, 24(1) # 4-byte Folded Spill
616 ; CHECK32_64-NEXT: mr 30, 6
617 ; CHECK32_64-NEXT: li 6, 37
618 ; CHECK32_64-NEXT: bl __umoddi3
619 ; CHECK32_64-NEXT: rotlwi 5, 30, 27
620 ; CHECK32_64-NEXT: addi 3, 4, 27
621 ; CHECK32_64-NEXT: andi. 4, 3, 32
622 ; CHECK32_64-NEXT: rlwimi 5, 27, 27, 0, 4
623 ; CHECK32_64-NEXT: mr 4, 5
624 ; CHECK32_64-NEXT: beq 0, .LBB11_2
625 ; CHECK32_64-NEXT: # %bb.1:
626 ; CHECK32_64-NEXT: mr 4, 29
627 ; CHECK32_64-NEXT: .LBB11_2:
628 ; CHECK32_64-NEXT: clrlwi 6, 3, 27
629 ; CHECK32_64-NEXT: srw 3, 4, 6
630 ; CHECK32_64-NEXT: beq 0, .LBB11_4
631 ; CHECK32_64-NEXT: # %bb.3:
632 ; CHECK32_64-NEXT: mr 29, 28
633 ; CHECK32_64-NEXT: .LBB11_4:
634 ; CHECK32_64-NEXT: subfic 7, 6, 32
635 ; CHECK32_64-NEXT: slw 8, 29, 7
636 ; CHECK32_64-NEXT: or 3, 8, 3
637 ; CHECK32_64-NEXT: bne 0, .LBB11_6
638 ; CHECK32_64-NEXT: # %bb.5:
639 ; CHECK32_64-NEXT: slwi 5, 30, 27
640 ; CHECK32_64-NEXT: .LBB11_6:
641 ; CHECK32_64-NEXT: srw 5, 5, 6
642 ; CHECK32_64-NEXT: slw 4, 4, 7
643 ; CHECK32_64-NEXT: lwz 30, 24(1) # 4-byte Folded Reload
644 ; CHECK32_64-NEXT: or 4, 4, 5
645 ; CHECK32_64-NEXT: lwz 29, 20(1) # 4-byte Folded Reload
646 ; CHECK32_64-NEXT: lwz 28, 16(1) # 4-byte Folded Reload
647 ; CHECK32_64-NEXT: lwz 27, 12(1) # 4-byte Folded Reload
648 ; CHECK32_64-NEXT: lwz 0, 36(1)
649 ; CHECK32_64-NEXT: addi 1, 1, 32
650 ; CHECK32_64-NEXT: mtlr 0
651 ; CHECK32_64-NEXT: blr
653 ; CHECK64-LABEL: fshr_i37:
655 ; CHECK64-NEXT: lis 7, 1771
656 ; CHECK64-NEXT: clrldi 6, 5, 27
657 ; CHECK64-NEXT: sldi 4, 4, 27
658 ; CHECK64-NEXT: ori 7, 7, 15941
659 ; CHECK64-NEXT: rldic 7, 7, 32, 5
660 ; CHECK64-NEXT: oris 7, 7, 12398
661 ; CHECK64-NEXT: ori 7, 7, 46053
662 ; CHECK64-NEXT: mulhdu 6, 6, 7
663 ; CHECK64-NEXT: mulli 6, 6, 37
664 ; CHECK64-NEXT: sub 5, 5, 6
665 ; CHECK64-NEXT: addi 5, 5, 27
666 ; CHECK64-NEXT: clrlwi 5, 5, 26
667 ; CHECK64-NEXT: srd 4, 4, 5
668 ; CHECK64-NEXT: subfic 5, 5, 64
669 ; CHECK64-NEXT: sld 3, 3, 5
670 ; CHECK64-NEXT: or 3, 3, 4
672 %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
676 ; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
678 declare i7 @llvm.fshr.i7(i7, i7, i7)
679 define i7 @fshr_i7_const_fold() {
680 ; CHECK-LABEL: fshr_i7_const_fold:
682 ; CHECK-NEXT: li 3, 31
684 %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
688 ; With constant shift amount, this is rotate + insert (missing extended mnemonics).
690 define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
691 ; CHECK-LABEL: fshr_i32_const_shift:
693 ; CHECK-NEXT: rotlwi 4, 4, 23
694 ; CHECK-NEXT: rlwimi 4, 3, 23, 0, 8
695 ; CHECK-NEXT: mr 3, 4
697 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
701 ; Check modulo math on shift amount. 41-32=9.
703 define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) {
704 ; CHECK-LABEL: fshr_i32_const_overshift:
706 ; CHECK-NEXT: rotlwi 4, 4, 23
707 ; CHECK-NEXT: rlwimi 4, 3, 23, 0, 8
708 ; CHECK-NEXT: mr 3, 4
710 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
714 ; 64-bit should also work. 105-64 = 41.
716 define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
717 ; CHECK32-LABEL: fshr_i64_const_overshift:
719 ; CHECK32-NEXT: rotlwi 6, 4, 23
720 ; CHECK32-NEXT: rotlwi 5, 5, 23
721 ; CHECK32-NEXT: rlwimi 6, 3, 23, 0, 8
722 ; CHECK32-NEXT: rlwimi 5, 4, 23, 0, 8
723 ; CHECK32-NEXT: mr 3, 6
724 ; CHECK32-NEXT: mr 4, 5
727 ; CHECK64-LABEL: fshr_i64_const_overshift:
729 ; CHECK64-NEXT: rotldi 4, 4, 23
730 ; CHECK64-NEXT: rldimi 4, 3, 23, 0
731 ; CHECK64-NEXT: mr 3, 4
733 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
737 ; This should work without any node-specific logic.
739 define i8 @fshr_i8_const_fold() {
740 ; CHECK-LABEL: fshr_i8_const_fold:
742 ; CHECK-NEXT: li 3, 254
744 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
748 define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) {
749 ; CHECK-LABEL: fshl_i32_shift_by_bitwidth:
752 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
756 define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) {
757 ; CHECK-LABEL: fshr_i32_shift_by_bitwidth:
759 ; CHECK-NEXT: mr 3, 4
761 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
765 define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
766 ; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
769 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
773 define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
774 ; CHECK32_32-LABEL: fshr_v4i32_shift_by_bitwidth:
775 ; CHECK32_32: # %bb.0:
776 ; CHECK32_32-NEXT: mr 6, 10
777 ; CHECK32_32-NEXT: mr 5, 9
778 ; CHECK32_32-NEXT: mr 4, 8
779 ; CHECK32_32-NEXT: mr 3, 7
780 ; CHECK32_32-NEXT: blr
782 ; CHECK32_64-LABEL: fshr_v4i32_shift_by_bitwidth:
783 ; CHECK32_64: # %bb.0:
784 ; CHECK32_64-NEXT: vmr 2, 3
785 ; CHECK32_64-NEXT: blr
787 ; CHECK64-LABEL: fshr_v4i32_shift_by_bitwidth:
789 ; CHECK64-NEXT: vmr 2, 3
791 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)