1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-- -mattr=sse2 | FileCheck %s --check-prefixes=ANY,X32-SSE2
3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=ANY,X64-AVX2
5 declare i8 @llvm.fshl.i8(i8, i8, i8)
6 declare i16 @llvm.fshl.i16(i16, i16, i16)
7 declare i32 @llvm.fshl.i32(i32, i32, i32)
8 declare i64 @llvm.fshl.i64(i64, i64, i64)
9 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
11 declare i8 @llvm.fshr.i8(i8, i8, i8)
12 declare i16 @llvm.fshr.i16(i16, i16, i16)
13 declare i32 @llvm.fshr.i32(i32, i32, i32)
14 declare i64 @llvm.fshr.i64(i64, i64, i64)
15 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
17 ; General case - all operands can be variables
19 define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind {
20 ; X32-SSE2-LABEL: fshl_i32:
22 ; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
23 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
24 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
25 ; X32-SSE2-NEXT: shldl %cl, %edx, %eax
28 ; X64-AVX2-LABEL: fshl_i32:
30 ; X64-AVX2-NEXT: movl %edx, %ecx
31 ; X64-AVX2-NEXT: movl %edi, %eax
32 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
33 ; X64-AVX2-NEXT: shldl %cl, %esi, %eax
35 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
39 ; Verify that weird types are minimally supported.
40 declare i37 @llvm.fshl.i37(i37, i37, i37)
41 define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
42 ; X32-SSE2-LABEL: fshl_i37:
44 ; X32-SSE2-NEXT: pushl %ebp
45 ; X32-SSE2-NEXT: pushl %ebx
46 ; X32-SSE2-NEXT: pushl %edi
47 ; X32-SSE2-NEXT: pushl %esi
48 ; X32-SSE2-NEXT: pushl %eax
49 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
50 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
51 ; X32-SSE2-NEXT: andl $31, %esi
52 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
53 ; X32-SSE2-NEXT: andl $31, %eax
54 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp
55 ; X32-SSE2-NEXT: pushl $0
56 ; X32-SSE2-NEXT: pushl $37
57 ; X32-SSE2-NEXT: pushl %eax
58 ; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
59 ; X32-SSE2-NEXT: calll __umoddi3
60 ; X32-SSE2-NEXT: addl $16, %esp
61 ; X32-SSE2-NEXT: movl %eax, %ebx
62 ; X32-SSE2-NEXT: movl %edx, (%esp) # 4-byte Spill
63 ; X32-SSE2-NEXT: movl %ebp, %edx
64 ; X32-SSE2-NEXT: movl %ebx, %ecx
65 ; X32-SSE2-NEXT: shll %cl, %ebp
66 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
67 ; X32-SSE2-NEXT: shldl %cl, %edx, %eax
68 ; X32-SSE2-NEXT: xorl %ecx, %ecx
69 ; X32-SSE2-NEXT: testb $32, %bl
70 ; X32-SSE2-NEXT: cmovnel %ebp, %eax
71 ; X32-SSE2-NEXT: cmovnel %ecx, %ebp
72 ; X32-SSE2-NEXT: xorl %edx, %edx
73 ; X32-SSE2-NEXT: movb $37, %cl
74 ; X32-SSE2-NEXT: subb %bl, %cl
75 ; X32-SSE2-NEXT: shrdl %cl, %esi, %edi
76 ; X32-SSE2-NEXT: shrl %cl, %esi
77 ; X32-SSE2-NEXT: testb $32, %cl
78 ; X32-SSE2-NEXT: cmovnel %esi, %edi
79 ; X32-SSE2-NEXT: cmovnel %edx, %esi
80 ; X32-SSE2-NEXT: orl %eax, %esi
81 ; X32-SSE2-NEXT: orl %ebp, %edi
82 ; X32-SSE2-NEXT: orl %ebx, (%esp) # 4-byte Folded Spill
83 ; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edi
84 ; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %esi
85 ; X32-SSE2-NEXT: movl %edi, %eax
86 ; X32-SSE2-NEXT: movl %esi, %edx
87 ; X32-SSE2-NEXT: addl $4, %esp
88 ; X32-SSE2-NEXT: popl %esi
89 ; X32-SSE2-NEXT: popl %edi
90 ; X32-SSE2-NEXT: popl %ebx
91 ; X32-SSE2-NEXT: popl %ebp
94 ; X64-AVX2-LABEL: fshl_i37:
96 ; X64-AVX2-NEXT: movq %rdx, %r8
97 ; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
98 ; X64-AVX2-NEXT: andq %rax, %rsi
99 ; X64-AVX2-NEXT: andq %rax, %r8
100 ; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rcx # imm = 0xDD67C8A60DD67C8B
101 ; X64-AVX2-NEXT: movq %r8, %rax
102 ; X64-AVX2-NEXT: mulq %rcx
103 ; X64-AVX2-NEXT: shrq $5, %rdx
104 ; X64-AVX2-NEXT: leaq (%rdx,%rdx,8), %rax
105 ; X64-AVX2-NEXT: leaq (%rdx,%rax,4), %rax
106 ; X64-AVX2-NEXT: subq %rax, %r8
107 ; X64-AVX2-NEXT: movq %rdi, %rax
108 ; X64-AVX2-NEXT: movl %r8d, %ecx
109 ; X64-AVX2-NEXT: shlq %cl, %rax
110 ; X64-AVX2-NEXT: movl $37, %ecx
111 ; X64-AVX2-NEXT: subl %r8d, %ecx
112 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
113 ; X64-AVX2-NEXT: shrq %cl, %rsi
114 ; X64-AVX2-NEXT: orq %rax, %rsi
115 ; X64-AVX2-NEXT: testq %r8, %r8
116 ; X64-AVX2-NEXT: cmoveq %rdi, %rsi
117 ; X64-AVX2-NEXT: movq %rsi, %rax
118 ; X64-AVX2-NEXT: retq
119 %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
123 ; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
125 declare i7 @llvm.fshl.i7(i7, i7, i7)
126 define i7 @fshl_i7_const_fold() {
127 ; ANY-LABEL: fshl_i7_const_fold:
129 ; ANY-NEXT: movb $67, %al
130 ; ANY-NEXT: ret{{[l|q]}}
131 %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
135 ; With constant shift amount, this is 'shld' with constant operand.
137 define i32 @fshl_i32_const_shift(i32 %x, i32 %y) nounwind {
138 ; X32-SSE2-LABEL: fshl_i32_const_shift:
140 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
141 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
142 ; X32-SSE2-NEXT: shldl $9, %ecx, %eax
143 ; X32-SSE2-NEXT: retl
145 ; X64-AVX2-LABEL: fshl_i32_const_shift:
147 ; X64-AVX2-NEXT: movl %edi, %eax
148 ; X64-AVX2-NEXT: shldl $9, %esi, %eax
149 ; X64-AVX2-NEXT: retq
150 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
154 ; Check modulo math on shift amount.
156 define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) nounwind {
157 ; X32-SSE2-LABEL: fshl_i32_const_overshift:
159 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
160 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
161 ; X32-SSE2-NEXT: shldl $9, %ecx, %eax
162 ; X32-SSE2-NEXT: retl
164 ; X64-AVX2-LABEL: fshl_i32_const_overshift:
166 ; X64-AVX2-NEXT: movl %edi, %eax
167 ; X64-AVX2-NEXT: shldl $9, %esi, %eax
168 ; X64-AVX2-NEXT: retq
169 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
173 ; 64-bit should also work.
175 define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) nounwind {
176 ; X32-SSE2-LABEL: fshl_i64_const_overshift:
178 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
179 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
180 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
181 ; X32-SSE2-NEXT: shldl $9, %ecx, %edx
182 ; X32-SSE2-NEXT: shrdl $23, %ecx, %eax
183 ; X32-SSE2-NEXT: retl
185 ; X64-AVX2-LABEL: fshl_i64_const_overshift:
187 ; X64-AVX2-NEXT: movq %rdi, %rax
188 ; X64-AVX2-NEXT: shldq $41, %rsi, %rax
189 ; X64-AVX2-NEXT: retq
190 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
194 ; This should work without any node-specific logic.
196 define i8 @fshl_i8_const_fold() nounwind {
197 ; ANY-LABEL: fshl_i8_const_fold:
199 ; ANY-NEXT: movb $-128, %al
200 ; ANY-NEXT: ret{{[l|q]}}
201 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
205 ; Repeat everything for funnel shift right.
207 ; General case - all operands can be variables
209 define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) nounwind {
210 ; X32-SSE2-LABEL: fshr_i32:
212 ; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
213 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
214 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
215 ; X32-SSE2-NEXT: shrdl %cl, %edx, %eax
216 ; X32-SSE2-NEXT: retl
218 ; X64-AVX2-LABEL: fshr_i32:
220 ; X64-AVX2-NEXT: movl %edx, %ecx
221 ; X64-AVX2-NEXT: movl %esi, %eax
222 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
223 ; X64-AVX2-NEXT: shrdl %cl, %edi, %eax
224 ; X64-AVX2-NEXT: retq
225 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
229 ; Verify that weird types are minimally supported.
230 declare i37 @llvm.fshr.i37(i37, i37, i37)
231 define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind {
232 ; X32-SSE2-LABEL: fshr_i37:
234 ; X32-SSE2-NEXT: pushl %ebp
235 ; X32-SSE2-NEXT: pushl %ebx
236 ; X32-SSE2-NEXT: pushl %edi
237 ; X32-SSE2-NEXT: pushl %esi
238 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
239 ; X32-SSE2-NEXT: andl $31, %esi
240 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
241 ; X32-SSE2-NEXT: andl $31, %eax
242 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp
243 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
244 ; X32-SSE2-NEXT: pushl $0
245 ; X32-SSE2-NEXT: pushl $37
246 ; X32-SSE2-NEXT: pushl %eax
247 ; X32-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
248 ; X32-SSE2-NEXT: calll __umoddi3
249 ; X32-SSE2-NEXT: addl $16, %esp
250 ; X32-SSE2-NEXT: movl %eax, %ebx
251 ; X32-SSE2-NEXT: movb $37, %cl
252 ; X32-SSE2-NEXT: subb %bl, %cl
253 ; X32-SSE2-NEXT: movl %ebp, %eax
254 ; X32-SSE2-NEXT: shll %cl, %ebp
255 ; X32-SSE2-NEXT: shldl %cl, %eax, %edi
256 ; X32-SSE2-NEXT: xorl %eax, %eax
257 ; X32-SSE2-NEXT: testb $32, %cl
258 ; X32-SSE2-NEXT: cmovnel %ebp, %edi
259 ; X32-SSE2-NEXT: cmovnel %eax, %ebp
260 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
261 ; X32-SSE2-NEXT: movl %ebx, %ecx
262 ; X32-SSE2-NEXT: shrdl %cl, %esi, %eax
263 ; X32-SSE2-NEXT: shrl %cl, %esi
264 ; X32-SSE2-NEXT: testb $32, %bl
265 ; X32-SSE2-NEXT: cmovnel %esi, %eax
266 ; X32-SSE2-NEXT: movl $0, %ecx
267 ; X32-SSE2-NEXT: cmovnel %ecx, %esi
268 ; X32-SSE2-NEXT: orl %edi, %esi
269 ; X32-SSE2-NEXT: orl %ebp, %eax
270 ; X32-SSE2-NEXT: orl %ebx, %edx
271 ; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %eax
272 ; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %esi
273 ; X32-SSE2-NEXT: movl %esi, %edx
274 ; X32-SSE2-NEXT: popl %esi
275 ; X32-SSE2-NEXT: popl %edi
276 ; X32-SSE2-NEXT: popl %ebx
277 ; X32-SSE2-NEXT: popl %ebp
278 ; X32-SSE2-NEXT: retl
280 ; X64-AVX2-LABEL: fshr_i37:
282 ; X64-AVX2-NEXT: movq %rdx, %r8
283 ; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
284 ; X64-AVX2-NEXT: movq %rsi, %r9
285 ; X64-AVX2-NEXT: andq %rax, %r9
286 ; X64-AVX2-NEXT: andq %rax, %r8
287 ; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rcx # imm = 0xDD67C8A60DD67C8B
288 ; X64-AVX2-NEXT: movq %r8, %rax
289 ; X64-AVX2-NEXT: mulq %rcx
290 ; X64-AVX2-NEXT: shrq $5, %rdx
291 ; X64-AVX2-NEXT: leaq (%rdx,%rdx,8), %rax
292 ; X64-AVX2-NEXT: leaq (%rdx,%rax,4), %rax
293 ; X64-AVX2-NEXT: subq %rax, %r8
294 ; X64-AVX2-NEXT: movl %r8d, %ecx
295 ; X64-AVX2-NEXT: shrq %cl, %r9
296 ; X64-AVX2-NEXT: movl $37, %ecx
297 ; X64-AVX2-NEXT: subl %r8d, %ecx
298 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
299 ; X64-AVX2-NEXT: shlq %cl, %rdi
300 ; X64-AVX2-NEXT: orq %r9, %rdi
301 ; X64-AVX2-NEXT: testq %r8, %r8
302 ; X64-AVX2-NEXT: cmoveq %rsi, %rdi
303 ; X64-AVX2-NEXT: movq %rdi, %rax
304 ; X64-AVX2-NEXT: retq
305 %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
309 ; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
311 declare i7 @llvm.fshr.i7(i7, i7, i7)
312 define i7 @fshr_i7_const_fold() nounwind {
313 ; ANY-LABEL: fshr_i7_const_fold:
315 ; ANY-NEXT: movb $31, %al
316 ; ANY-NEXT: ret{{[l|q]}}
317 %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
321 ; demanded bits tests
323 define i32 @fshl_i32_demandedbits(i32 %a0, i32 %a1) nounwind {
324 ; X32-SSE2-LABEL: fshl_i32_demandedbits:
326 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
327 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
328 ; X32-SSE2-NEXT: shldl $9, %ecx, %eax
329 ; X32-SSE2-NEXT: retl
331 ; X64-AVX2-LABEL: fshl_i32_demandedbits:
333 ; X64-AVX2-NEXT: movl %edi, %eax
334 ; X64-AVX2-NEXT: shldl $9, %esi, %eax
335 ; X64-AVX2-NEXT: retq
336 %x = or i32 %a0, 2147483648
338 %res = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
342 define i32 @fshr_i32_demandedbits(i32 %a0, i32 %a1) nounwind {
343 ; X32-SSE2-LABEL: fshr_i32_demandedbits:
345 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
346 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
347 ; X32-SSE2-NEXT: shrdl $9, %ecx, %eax
348 ; X32-SSE2-NEXT: retl
350 ; X64-AVX2-LABEL: fshr_i32_demandedbits:
352 ; X64-AVX2-NEXT: movl %edi, %eax
353 ; X64-AVX2-NEXT: shldl $23, %esi, %eax
354 ; X64-AVX2-NEXT: retq
355 %x = or i32 %a0, 2147483648
357 %res = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
363 define i32 @fshl_i32_undef0(i32 %a0, i32 %a1) nounwind {
364 ; X32-SSE2-LABEL: fshl_i32_undef0:
366 ; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
367 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
368 ; X32-SSE2-NEXT: shldl %cl, %eax, %eax
369 ; X32-SSE2-NEXT: retl
371 ; X64-AVX2-LABEL: fshl_i32_undef0:
373 ; X64-AVX2-NEXT: movl %esi, %ecx
374 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
375 ; X64-AVX2-NEXT: shldl %cl, %edi, %eax
376 ; X64-AVX2-NEXT: retq
377 %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 %a1)
381 define i32 @fshl_i32_undef0_msk(i32 %a0, i32 %a1) nounwind {
382 ; X32-SSE2-LABEL: fshl_i32_undef0_msk:
384 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
385 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
386 ; X32-SSE2-NEXT: andl $7, %ecx
387 ; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
388 ; X32-SSE2-NEXT: shldl %cl, %eax, %eax
389 ; X32-SSE2-NEXT: retl
391 ; X64-AVX2-LABEL: fshl_i32_undef0_msk:
393 ; X64-AVX2-NEXT: movl %esi, %ecx
394 ; X64-AVX2-NEXT: andl $7, %ecx
395 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
396 ; X64-AVX2-NEXT: shldl %cl, %edi, %eax
397 ; X64-AVX2-NEXT: retq
399 %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 %m)
403 define i32 @fshl_i32_undef0_cst(i32 %a0) nounwind {
404 ; X32-SSE2-LABEL: fshl_i32_undef0_cst:
406 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
407 ; X32-SSE2-NEXT: shrl $23, %eax
408 ; X32-SSE2-NEXT: retl
410 ; X64-AVX2-LABEL: fshl_i32_undef0_cst:
412 ; X64-AVX2-NEXT: movl %edi, %eax
413 ; X64-AVX2-NEXT: shrl $23, %eax
414 ; X64-AVX2-NEXT: retq
415 %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 9)
419 define i32 @fshl_i32_undef1(i32 %a0, i32 %a1) nounwind {
420 ; X32-SSE2-LABEL: fshl_i32_undef1:
422 ; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
423 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
424 ; X32-SSE2-NEXT: shldl %cl, %eax, %eax
425 ; X32-SSE2-NEXT: retl
427 ; X64-AVX2-LABEL: fshl_i32_undef1:
429 ; X64-AVX2-NEXT: movl %esi, %ecx
430 ; X64-AVX2-NEXT: movl %edi, %eax
431 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
432 ; X64-AVX2-NEXT: shldl %cl, %eax, %eax
433 ; X64-AVX2-NEXT: retq
434 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %a1)
438 define i32 @fshl_i32_undef1_msk(i32 %a0, i32 %a1) nounwind {
439 ; X32-SSE2-LABEL: fshl_i32_undef1_msk:
441 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
442 ; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
443 ; X32-SSE2-NEXT: andb $7, %cl
444 ; X32-SSE2-NEXT: shll %cl, %eax
445 ; X32-SSE2-NEXT: retl
447 ; X64-AVX2-LABEL: fshl_i32_undef1_msk:
449 ; X64-AVX2-NEXT: movl %esi, %ecx
450 ; X64-AVX2-NEXT: movl %edi, %eax
451 ; X64-AVX2-NEXT: andb $7, %cl
452 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
453 ; X64-AVX2-NEXT: shll %cl, %eax
454 ; X64-AVX2-NEXT: retq
456 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %m)
460 define i32 @fshl_i32_undef1_cst(i32 %a0) nounwind {
461 ; X32-SSE2-LABEL: fshl_i32_undef1_cst:
463 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
464 ; X32-SSE2-NEXT: shll $9, %eax
465 ; X32-SSE2-NEXT: retl
467 ; X64-AVX2-LABEL: fshl_i32_undef1_cst:
469 ; X64-AVX2-NEXT: movl %edi, %eax
470 ; X64-AVX2-NEXT: shll $9, %eax
471 ; X64-AVX2-NEXT: retq
472 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 9)
476 define i32 @fshl_i32_undef2(i32 %a0, i32 %a1) nounwind {
477 ; X32-SSE2-LABEL: fshl_i32_undef2:
479 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
480 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
481 ; X32-SSE2-NEXT: shldl %cl, %ecx, %eax
482 ; X32-SSE2-NEXT: retl
484 ; X64-AVX2-LABEL: fshl_i32_undef2:
486 ; X64-AVX2-NEXT: movl %edi, %eax
487 ; X64-AVX2-NEXT: shldl %cl, %esi, %eax
488 ; X64-AVX2-NEXT: retq
489 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 %a1, i32 undef)
493 define i32 @fshr_i32_undef0(i32 %a0, i32 %a1) nounwind {
494 ; X32-SSE2-LABEL: fshr_i32_undef0:
496 ; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
497 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
498 ; X32-SSE2-NEXT: shrdl %cl, %eax, %eax
499 ; X32-SSE2-NEXT: retl
501 ; X64-AVX2-LABEL: fshr_i32_undef0:
503 ; X64-AVX2-NEXT: movl %esi, %ecx
504 ; X64-AVX2-NEXT: movl %edi, %eax
505 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
506 ; X64-AVX2-NEXT: shrdl %cl, %eax, %eax
507 ; X64-AVX2-NEXT: retq
508 %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %a1)
512 define i32 @fshr_i32_undef0_msk(i32 %a0, i32 %a1) nounwind {
513 ; X32-SSE2-LABEL: fshr_i32_undef0_msk:
515 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
516 ; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
517 ; X32-SSE2-NEXT: andb $7, %cl
518 ; X32-SSE2-NEXT: shrl %cl, %eax
519 ; X32-SSE2-NEXT: retl
521 ; X64-AVX2-LABEL: fshr_i32_undef0_msk:
523 ; X64-AVX2-NEXT: movl %esi, %ecx
524 ; X64-AVX2-NEXT: movl %edi, %eax
525 ; X64-AVX2-NEXT: andb $7, %cl
526 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
527 ; X64-AVX2-NEXT: shrl %cl, %eax
528 ; X64-AVX2-NEXT: retq
530 %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %m)
534 define i32 @fshr_i32_undef0_cst(i32 %a0) nounwind {
535 ; X32-SSE2-LABEL: fshr_i32_undef0_cst:
537 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
538 ; X32-SSE2-NEXT: shrl $9, %eax
539 ; X32-SSE2-NEXT: retl
541 ; X64-AVX2-LABEL: fshr_i32_undef0_cst:
543 ; X64-AVX2-NEXT: movl %edi, %eax
544 ; X64-AVX2-NEXT: shrl $9, %eax
545 ; X64-AVX2-NEXT: retq
546 %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 9)
550 define i32 @fshr_i32_undef1(i32 %a0, i32 %a1) nounwind {
551 ; X32-SSE2-LABEL: fshr_i32_undef1:
553 ; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
554 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
555 ; X32-SSE2-NEXT: shrdl %cl, %eax, %eax
556 ; X32-SSE2-NEXT: retl
558 ; X64-AVX2-LABEL: fshr_i32_undef1:
560 ; X64-AVX2-NEXT: movl %esi, %ecx
561 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
562 ; X64-AVX2-NEXT: shrdl %cl, %edi, %eax
563 ; X64-AVX2-NEXT: retq
564 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 %a1)
568 define i32 @fshr_i32_undef1_msk(i32 %a0, i32 %a1) nounwind {
569 ; X32-SSE2-LABEL: fshr_i32_undef1_msk:
571 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
572 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
573 ; X32-SSE2-NEXT: andl $7, %ecx
574 ; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
575 ; X32-SSE2-NEXT: shrdl %cl, %eax, %eax
576 ; X32-SSE2-NEXT: retl
578 ; X64-AVX2-LABEL: fshr_i32_undef1_msk:
580 ; X64-AVX2-NEXT: movl %esi, %ecx
581 ; X64-AVX2-NEXT: andl $7, %ecx
582 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
583 ; X64-AVX2-NEXT: shrdl %cl, %edi, %eax
584 ; X64-AVX2-NEXT: retq
586 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 %m)
590 define i32 @fshr_i32_undef1_cst(i32 %a0) nounwind {
591 ; X32-SSE2-LABEL: fshr_i32_undef1_cst:
593 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
594 ; X32-SSE2-NEXT: shll $23, %eax
595 ; X32-SSE2-NEXT: retl
597 ; X64-AVX2-LABEL: fshr_i32_undef1_cst:
599 ; X64-AVX2-NEXT: movl %edi, %eax
600 ; X64-AVX2-NEXT: shll $23, %eax
601 ; X64-AVX2-NEXT: retq
602 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 9)
606 define i32 @fshr_i32_undef2(i32 %a0, i32 %a1) nounwind {
607 ; X32-SSE2-LABEL: fshr_i32_undef2:
609 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
610 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
611 ; X32-SSE2-NEXT: shrdl %cl, %ecx, %eax
612 ; X32-SSE2-NEXT: retl
614 ; X64-AVX2-LABEL: fshr_i32_undef2:
616 ; X64-AVX2-NEXT: movl %esi, %eax
617 ; X64-AVX2-NEXT: shrdl %cl, %edi, %eax
618 ; X64-AVX2-NEXT: retq
619 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 %a1, i32 undef)
625 define i32 @fshl_i32_zero0(i32 %a0, i32 %a1) nounwind {
626 ; X32-SSE2-LABEL: fshl_i32_zero0:
628 ; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
629 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
630 ; X32-SSE2-NEXT: xorl %eax, %eax
631 ; X32-SSE2-NEXT: shldl %cl, %edx, %eax
632 ; X32-SSE2-NEXT: retl
634 ; X64-AVX2-LABEL: fshl_i32_zero0:
636 ; X64-AVX2-NEXT: movl %esi, %ecx
637 ; X64-AVX2-NEXT: xorl %eax, %eax
638 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
639 ; X64-AVX2-NEXT: shldl %cl, %edi, %eax
640 ; X64-AVX2-NEXT: retq
641 %res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 %a1)
645 define i32 @fshl_i32_zero0_cst(i32 %a0) nounwind {
646 ; X32-SSE2-LABEL: fshl_i32_zero0_cst:
648 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
649 ; X32-SSE2-NEXT: shrl $23, %eax
650 ; X32-SSE2-NEXT: retl
652 ; X64-AVX2-LABEL: fshl_i32_zero0_cst:
654 ; X64-AVX2-NEXT: movl %edi, %eax
655 ; X64-AVX2-NEXT: shrl $23, %eax
656 ; X64-AVX2-NEXT: retq
657 %res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 9)
661 define i32 @fshl_i32_zero1(i32 %a0, i32 %a1) nounwind {
662 ; X32-SSE2-LABEL: fshl_i32_zero1:
664 ; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
665 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
666 ; X32-SSE2-NEXT: xorl %edx, %edx
667 ; X32-SSE2-NEXT: shldl %cl, %edx, %eax
668 ; X32-SSE2-NEXT: retl
670 ; X64-AVX2-LABEL: fshl_i32_zero1:
672 ; X64-AVX2-NEXT: movl %esi, %ecx
673 ; X64-AVX2-NEXT: movl %edi, %eax
674 ; X64-AVX2-NEXT: xorl %edx, %edx
675 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
676 ; X64-AVX2-NEXT: shldl %cl, %edx, %eax
677 ; X64-AVX2-NEXT: retq
678 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 %a1)
682 define i32 @fshl_i32_zero1_cst(i32 %a0) nounwind {
683 ; X32-SSE2-LABEL: fshl_i32_zero1_cst:
685 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
686 ; X32-SSE2-NEXT: shll $9, %eax
687 ; X32-SSE2-NEXT: retl
689 ; X64-AVX2-LABEL: fshl_i32_zero1_cst:
691 ; X64-AVX2-NEXT: movl %edi, %eax
692 ; X64-AVX2-NEXT: shll $9, %eax
693 ; X64-AVX2-NEXT: retq
694 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 9)
698 define i32 @fshr_i32_zero0(i32 %a0, i32 %a1) nounwind {
699 ; X32-SSE2-LABEL: fshr_i32_zero0:
701 ; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
702 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
703 ; X32-SSE2-NEXT: xorl %edx, %edx
704 ; X32-SSE2-NEXT: shrdl %cl, %edx, %eax
705 ; X32-SSE2-NEXT: retl
707 ; X64-AVX2-LABEL: fshr_i32_zero0:
709 ; X64-AVX2-NEXT: movl %esi, %ecx
710 ; X64-AVX2-NEXT: movl %edi, %eax
711 ; X64-AVX2-NEXT: xorl %edx, %edx
712 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
713 ; X64-AVX2-NEXT: shrdl %cl, %edx, %eax
714 ; X64-AVX2-NEXT: retq
715 %res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 %a1)
719 define i32 @fshr_i32_zero0_cst(i32 %a0) nounwind {
720 ; X32-SSE2-LABEL: fshr_i32_zero0_cst:
722 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
723 ; X32-SSE2-NEXT: shrl $9, %eax
724 ; X32-SSE2-NEXT: retl
726 ; X64-AVX2-LABEL: fshr_i32_zero0_cst:
728 ; X64-AVX2-NEXT: movl %edi, %eax
729 ; X64-AVX2-NEXT: shrl $9, %eax
730 ; X64-AVX2-NEXT: retq
731 %res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 9)
735 define i32 @fshr_i32_zero1(i32 %a0, i32 %a1) nounwind {
736 ; X32-SSE2-LABEL: fshr_i32_zero1:
738 ; X32-SSE2-NEXT: movb {{[0-9]+}}(%esp), %cl
739 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
740 ; X32-SSE2-NEXT: xorl %eax, %eax
741 ; X32-SSE2-NEXT: shrdl %cl, %edx, %eax
742 ; X32-SSE2-NEXT: retl
744 ; X64-AVX2-LABEL: fshr_i32_zero1:
746 ; X64-AVX2-NEXT: movl %esi, %ecx
747 ; X64-AVX2-NEXT: xorl %eax, %eax
748 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
749 ; X64-AVX2-NEXT: shrdl %cl, %edi, %eax
750 ; X64-AVX2-NEXT: retq
751 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 %a1)
755 define i32 @fshr_i32_zero1_cst(i32 %a0) nounwind {
756 ; X32-SSE2-LABEL: fshr_i32_zero1_cst:
758 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
759 ; X32-SSE2-NEXT: shll $23, %eax
760 ; X32-SSE2-NEXT: retl
762 ; X64-AVX2-LABEL: fshr_i32_zero1_cst:
764 ; X64-AVX2-NEXT: movl %edi, %eax
765 ; X64-AVX2-NEXT: shll $23, %eax
766 ; X64-AVX2-NEXT: retq
767 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 9)
773 define i32 @fshl_i32_zero2(i32 %a0, i32 %a1) nounwind {
774 ; X32-SSE2-LABEL: fshl_i32_zero2:
776 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
777 ; X32-SSE2-NEXT: retl
779 ; X64-AVX2-LABEL: fshl_i32_zero2:
781 ; X64-AVX2-NEXT: movl %edi, %eax
782 ; X64-AVX2-NEXT: retq
783 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 %a1, i32 0)
787 define i32 @fshr_i32_zero2(i32 %a0, i32 %a1) nounwind {
788 ; X32-SSE2-LABEL: fshr_i32_zero2:
790 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
791 ; X32-SSE2-NEXT: retl
793 ; X64-AVX2-LABEL: fshr_i32_zero2:
795 ; X64-AVX2-NEXT: movl %esi, %eax
796 ; X64-AVX2-NEXT: retq
797 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 %a1, i32 0)
801 ; With constant shift amount, this is 'shrd' or 'shld'.
803 define i32 @fshr_i32_const_shift(i32 %x, i32 %y) nounwind {
804 ; X32-SSE2-LABEL: fshr_i32_const_shift:
806 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
807 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
808 ; X32-SSE2-NEXT: shrdl $9, %ecx, %eax
809 ; X32-SSE2-NEXT: retl
811 ; X64-AVX2-LABEL: fshr_i32_const_shift:
813 ; X64-AVX2-NEXT: movl %edi, %eax
814 ; X64-AVX2-NEXT: shldl $23, %esi, %eax
815 ; X64-AVX2-NEXT: retq
816 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
820 ; Check modulo math on shift amount. 41-32=9, but right-shift may became left, so 32-9=23.
822 define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) nounwind {
823 ; X32-SSE2-LABEL: fshr_i32_const_overshift:
825 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
826 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
827 ; X32-SSE2-NEXT: shrdl $9, %ecx, %eax
828 ; X32-SSE2-NEXT: retl
830 ; X64-AVX2-LABEL: fshr_i32_const_overshift:
832 ; X64-AVX2-NEXT: movl %edi, %eax
833 ; X64-AVX2-NEXT: shldl $23, %esi, %eax
834 ; X64-AVX2-NEXT: retq
835 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
839 ; 64-bit should also work. 105-64 = 41, but right-shift became left, so 64-41=23.
841 define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) nounwind {
842 ; X32-SSE2-LABEL: fshr_i64_const_overshift:
844 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
845 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
846 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
847 ; X32-SSE2-NEXT: shrdl $9, %ecx, %eax
848 ; X32-SSE2-NEXT: shldl $23, %ecx, %edx
849 ; X32-SSE2-NEXT: retl
851 ; X64-AVX2-LABEL: fshr_i64_const_overshift:
853 ; X64-AVX2-NEXT: movq %rdi, %rax
854 ; X64-AVX2-NEXT: shldq $23, %rsi, %rax
855 ; X64-AVX2-NEXT: retq
856 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
860 ; This should work without any node-specific logic.
862 define i8 @fshr_i8_const_fold() nounwind {
863 ; ANY-LABEL: fshr_i8_const_fold:
865 ; ANY-NEXT: movb $-2, %al
866 ; ANY-NEXT: ret{{[l|q]}}
867 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
871 define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind {
872 ; X32-SSE2-LABEL: fshl_i32_shift_by_bitwidth:
874 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
875 ; X32-SSE2-NEXT: retl
877 ; X64-AVX2-LABEL: fshl_i32_shift_by_bitwidth:
879 ; X64-AVX2-NEXT: movl %edi, %eax
880 ; X64-AVX2-NEXT: retq
881 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
885 define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind {
886 ; X32-SSE2-LABEL: fshr_i32_shift_by_bitwidth:
888 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
889 ; X32-SSE2-NEXT: retl
891 ; X64-AVX2-LABEL: fshr_i32_shift_by_bitwidth:
893 ; X64-AVX2-NEXT: movl %esi, %eax
894 ; X64-AVX2-NEXT: retq
895 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
899 define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounwind {
900 ; ANY-LABEL: fshl_v4i32_shift_by_bitwidth:
902 ; ANY-NEXT: ret{{[l|q]}}
903 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
907 define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounwind {
908 ; X32-SSE2-LABEL: fshr_v4i32_shift_by_bitwidth:
910 ; X32-SSE2-NEXT: movaps %xmm1, %xmm0
911 ; X32-SSE2-NEXT: retl
913 ; X64-AVX2-LABEL: fshr_v4i32_shift_by_bitwidth:
915 ; X64-AVX2-NEXT: vmovaps %xmm1, %xmm0
916 ; X64-AVX2-NEXT: retq
917 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)