1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,X86-SSE2
3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,X64-AVX2
5 declare i8 @llvm.fshl.i8(i8, i8, i8)
6 declare i16 @llvm.fshl.i16(i16, i16, i16)
7 declare i32 @llvm.fshl.i32(i32, i32, i32)
8 declare i64 @llvm.fshl.i64(i64, i64, i64)
9 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
10 declare i128 @llvm.fshl.i128(i128, i128, i128)
12 declare i8 @llvm.fshr.i8(i8, i8, i8)
13 declare i16 @llvm.fshr.i16(i16, i16, i16)
14 declare i32 @llvm.fshr.i32(i32, i32, i32)
15 declare i64 @llvm.fshr.i64(i64, i64, i64)
16 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
18 ; General case - all operands can be variables
20 define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind {
21 ; X86-SSE2-LABEL: fshl_i32:
23 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
24 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
25 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
26 ; X86-SSE2-NEXT: shldl %cl, %edx, %eax
29 ; X64-AVX2-LABEL: fshl_i32:
31 ; X64-AVX2-NEXT: movl %edx, %ecx
32 ; X64-AVX2-NEXT: movl %edi, %eax
33 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
34 ; X64-AVX2-NEXT: shldl %cl, %esi, %eax
36 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
40 define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) nounwind {
41 ; X86-SSE2-LABEL: fshl_i64:
43 ; X86-SSE2-NEXT: pushl %edi
44 ; X86-SSE2-NEXT: pushl %esi
45 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
46 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
47 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
48 ; X86-SSE2-NEXT: testb $32, %cl
49 ; X86-SSE2-NEXT: movl %edx, %edi
50 ; X86-SSE2-NEXT: cmovnel %esi, %edi
51 ; X86-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edx
52 ; X86-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %esi
53 ; X86-SSE2-NEXT: movl %edi, %eax
54 ; X86-SSE2-NEXT: shldl %cl, %esi, %eax
55 ; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
56 ; X86-SSE2-NEXT: shldl %cl, %edi, %edx
57 ; X86-SSE2-NEXT: popl %esi
58 ; X86-SSE2-NEXT: popl %edi
61 ; X64-AVX2-LABEL: fshl_i64:
63 ; X64-AVX2-NEXT: movq %rdx, %rcx
64 ; X64-AVX2-NEXT: movq %rdi, %rax
65 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
66 ; X64-AVX2-NEXT: shldq %cl, %rsi, %rax
68 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
72 define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
73 ; X86-SSE2-LABEL: fshl_i128:
75 ; X86-SSE2-NEXT: pushl %ebp
76 ; X86-SSE2-NEXT: pushl %ebx
77 ; X86-SSE2-NEXT: pushl %edi
78 ; X86-SSE2-NEXT: pushl %esi
79 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
80 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
81 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
82 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
83 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
84 ; X86-SSE2-NEXT: testb $64, %cl
85 ; X86-SSE2-NEXT: movl %esi, %eax
86 ; X86-SSE2-NEXT: cmovnel %ebx, %eax
87 ; X86-SSE2-NEXT: movl %edx, %ebp
88 ; X86-SSE2-NEXT: cmovnel %edi, %ebp
89 ; X86-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %edi
90 ; X86-SSE2-NEXT: cmovnel {{[0-9]+}}(%esp), %ebx
91 ; X86-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edx
92 ; X86-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %esi
93 ; X86-SSE2-NEXT: testb $32, %cl
94 ; X86-SSE2-NEXT: cmovnel %esi, %edx
95 ; X86-SSE2-NEXT: cmovnel %ebp, %esi
96 ; X86-SSE2-NEXT: cmovnel %eax, %ebp
97 ; X86-SSE2-NEXT: cmovel %edi, %ebx
98 ; X86-SSE2-NEXT: cmovel %eax, %edi
99 ; X86-SSE2-NEXT: movl %edi, %eax
100 ; X86-SSE2-NEXT: shldl %cl, %ebx, %eax
101 ; X86-SSE2-NEXT: movl %ebp, %ebx
102 ; X86-SSE2-NEXT: shldl %cl, %edi, %ebx
103 ; X86-SSE2-NEXT: movl %esi, %edi
104 ; X86-SSE2-NEXT: shldl %cl, %ebp, %edi
105 ; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
106 ; X86-SSE2-NEXT: shldl %cl, %esi, %edx
107 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
108 ; X86-SSE2-NEXT: movl %edx, 12(%ecx)
109 ; X86-SSE2-NEXT: movl %edi, 8(%ecx)
110 ; X86-SSE2-NEXT: movl %ebx, 4(%ecx)
111 ; X86-SSE2-NEXT: movl %eax, (%ecx)
112 ; X86-SSE2-NEXT: movl %ecx, %eax
113 ; X86-SSE2-NEXT: popl %esi
114 ; X86-SSE2-NEXT: popl %edi
115 ; X86-SSE2-NEXT: popl %ebx
116 ; X86-SSE2-NEXT: popl %ebp
117 ; X86-SSE2-NEXT: retl $4
119 ; X64-AVX2-LABEL: fshl_i128:
121 ; X64-AVX2-NEXT: testb $64, %r8b
122 ; X64-AVX2-NEXT: cmovneq %rdi, %rsi
123 ; X64-AVX2-NEXT: cmoveq %rcx, %rdx
124 ; X64-AVX2-NEXT: cmovneq %rcx, %rdi
125 ; X64-AVX2-NEXT: movq %rdi, %rax
126 ; X64-AVX2-NEXT: movl %r8d, %ecx
127 ; X64-AVX2-NEXT: shldq %cl, %rdx, %rax
128 ; X64-AVX2-NEXT: shldq %cl, %rdi, %rsi
129 ; X64-AVX2-NEXT: movq %rsi, %rdx
130 ; X64-AVX2-NEXT: retq
131 %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
135 ; Verify that weird types are minimally supported.
136 declare i37 @llvm.fshl.i37(i37, i37, i37)
137 define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
138 ; X86-SSE2-LABEL: fshl_i37:
140 ; X86-SSE2-NEXT: pushl %ebx
141 ; X86-SSE2-NEXT: pushl %edi
142 ; X86-SSE2-NEXT: pushl %esi
143 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
144 ; X86-SSE2-NEXT: andl $31, %eax
145 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
146 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
147 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
148 ; X86-SSE2-NEXT: shldl $27, %ebx, %edi
149 ; X86-SSE2-NEXT: pushl $0
150 ; X86-SSE2-NEXT: pushl $37
151 ; X86-SSE2-NEXT: pushl %eax
152 ; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
153 ; X86-SSE2-NEXT: calll __umoddi3
154 ; X86-SSE2-NEXT: addl $16, %esp
155 ; X86-SSE2-NEXT: movl %eax, %ecx
156 ; X86-SSE2-NEXT: testb $32, %cl
157 ; X86-SSE2-NEXT: jne .LBB3_1
158 ; X86-SSE2-NEXT: # %bb.2:
159 ; X86-SSE2-NEXT: movl %edi, %ebx
160 ; X86-SSE2-NEXT: movl %esi, %edi
161 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
162 ; X86-SSE2-NEXT: jmp .LBB3_3
163 ; X86-SSE2-NEXT: .LBB3_1:
164 ; X86-SSE2-NEXT: shll $27, %ebx
165 ; X86-SSE2-NEXT: .LBB3_3:
166 ; X86-SSE2-NEXT: movl %edi, %eax
167 ; X86-SSE2-NEXT: shldl %cl, %ebx, %eax
168 ; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
169 ; X86-SSE2-NEXT: shldl %cl, %edi, %esi
170 ; X86-SSE2-NEXT: movl %esi, %edx
171 ; X86-SSE2-NEXT: popl %esi
172 ; X86-SSE2-NEXT: popl %edi
173 ; X86-SSE2-NEXT: popl %ebx
174 ; X86-SSE2-NEXT: retl
176 ; X64-AVX2-LABEL: fshl_i37:
178 ; X64-AVX2-NEXT: movq %rdx, %rcx
179 ; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
180 ; X64-AVX2-NEXT: andq %rdx, %rax
181 ; X64-AVX2-NEXT: movabsq $498560650640798693, %rdx # imm = 0x6EB3E45306EB3E5
182 ; X64-AVX2-NEXT: mulq %rdx
183 ; X64-AVX2-NEXT: leal (%rdx,%rdx,8), %eax
184 ; X64-AVX2-NEXT: leal (%rdx,%rax,4), %eax
185 ; X64-AVX2-NEXT: subl %eax, %ecx
186 ; X64-AVX2-NEXT: shlq $27, %rsi
187 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
188 ; X64-AVX2-NEXT: shldq %cl, %rsi, %rdi
189 ; X64-AVX2-NEXT: movq %rdi, %rax
190 ; X64-AVX2-NEXT: retq
191 %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
195 ; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
197 declare i7 @llvm.fshl.i7(i7, i7, i7)
198 define i7 @fshl_i7_const_fold() {
199 ; CHECK-LABEL: fshl_i7_const_fold:
201 ; CHECK-NEXT: movb $67, %al
202 ; CHECK-NEXT: ret{{[l|q]}}
203 %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
207 ; With constant shift amount, this is 'shld' with constant operand.
209 define i32 @fshl_i32_const_shift(i32 %x, i32 %y) nounwind {
210 ; X86-SSE2-LABEL: fshl_i32_const_shift:
212 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
213 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
214 ; X86-SSE2-NEXT: shldl $9, %ecx, %eax
215 ; X86-SSE2-NEXT: retl
217 ; X64-AVX2-LABEL: fshl_i32_const_shift:
219 ; X64-AVX2-NEXT: movl %edi, %eax
220 ; X64-AVX2-NEXT: shldl $9, %esi, %eax
221 ; X64-AVX2-NEXT: retq
222 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
226 ; Check modulo math on shift amount.
228 define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) nounwind {
229 ; X86-SSE2-LABEL: fshl_i32_const_overshift:
231 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
232 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
233 ; X86-SSE2-NEXT: shldl $9, %ecx, %eax
234 ; X86-SSE2-NEXT: retl
236 ; X64-AVX2-LABEL: fshl_i32_const_overshift:
238 ; X64-AVX2-NEXT: movl %edi, %eax
239 ; X64-AVX2-NEXT: shldl $9, %esi, %eax
240 ; X64-AVX2-NEXT: retq
241 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
245 ; 64-bit should also work.
247 define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) nounwind {
248 ; X86-SSE2-LABEL: fshl_i64_const_overshift:
250 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
251 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
252 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
253 ; X86-SSE2-NEXT: shldl $9, %ecx, %edx
254 ; X86-SSE2-NEXT: shrdl $23, %ecx, %eax
255 ; X86-SSE2-NEXT: retl
257 ; X64-AVX2-LABEL: fshl_i64_const_overshift:
259 ; X64-AVX2-NEXT: movq %rdi, %rax
260 ; X64-AVX2-NEXT: shldq $41, %rsi, %rax
261 ; X64-AVX2-NEXT: retq
262 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
266 ; This should work without any node-specific logic.
268 define i8 @fshl_i8_const_fold() nounwind {
269 ; CHECK-LABEL: fshl_i8_const_fold:
271 ; CHECK-NEXT: movb $-128, %al
272 ; CHECK-NEXT: ret{{[l|q]}}
273 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
277 ; Repeat everything for funnel shift right.
279 ; General case - all operands can be variables
281 define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) nounwind {
282 ; X86-SSE2-LABEL: fshr_i32:
284 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
285 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
286 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
287 ; X86-SSE2-NEXT: shrdl %cl, %edx, %eax
288 ; X86-SSE2-NEXT: retl
290 ; X64-AVX2-LABEL: fshr_i32:
292 ; X64-AVX2-NEXT: movl %edx, %ecx
293 ; X64-AVX2-NEXT: movl %esi, %eax
294 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
295 ; X64-AVX2-NEXT: shrdl %cl, %edi, %eax
296 ; X64-AVX2-NEXT: retq
297 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
301 ; Verify that weird types are minimally supported.
302 declare i37 @llvm.fshr.i37(i37, i37, i37)
303 define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind {
304 ; X86-SSE2-LABEL: fshr_i37:
306 ; X86-SSE2-NEXT: pushl %ebx
307 ; X86-SSE2-NEXT: pushl %edi
308 ; X86-SSE2-NEXT: pushl %esi
309 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
310 ; X86-SSE2-NEXT: andl $31, %eax
311 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
312 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx
313 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi
314 ; X86-SSE2-NEXT: shldl $27, %ebx, %esi
315 ; X86-SSE2-NEXT: pushl $0
316 ; X86-SSE2-NEXT: pushl $37
317 ; X86-SSE2-NEXT: pushl %eax
318 ; X86-SSE2-NEXT: pushl {{[0-9]+}}(%esp)
319 ; X86-SSE2-NEXT: calll __umoddi3
320 ; X86-SSE2-NEXT: addl $16, %esp
321 ; X86-SSE2-NEXT: movl %eax, %ecx
322 ; X86-SSE2-NEXT: addl $27, %ecx
323 ; X86-SSE2-NEXT: testb $32, %cl
324 ; X86-SSE2-NEXT: je .LBB10_1
325 ; X86-SSE2-NEXT: # %bb.2:
326 ; X86-SSE2-NEXT: movl %edi, %edx
327 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi
328 ; X86-SSE2-NEXT: jmp .LBB10_3
329 ; X86-SSE2-NEXT: .LBB10_1:
330 ; X86-SSE2-NEXT: shll $27, %ebx
331 ; X86-SSE2-NEXT: movl %esi, %edx
332 ; X86-SSE2-NEXT: movl %ebx, %esi
333 ; X86-SSE2-NEXT: .LBB10_3:
334 ; X86-SSE2-NEXT: shrdl %cl, %edx, %esi
335 ; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
336 ; X86-SSE2-NEXT: shrdl %cl, %edi, %edx
337 ; X86-SSE2-NEXT: movl %esi, %eax
338 ; X86-SSE2-NEXT: popl %esi
339 ; X86-SSE2-NEXT: popl %edi
340 ; X86-SSE2-NEXT: popl %ebx
341 ; X86-SSE2-NEXT: retl
343 ; X64-AVX2-LABEL: fshr_i37:
345 ; X64-AVX2-NEXT: movq %rdx, %rcx
346 ; X64-AVX2-NEXT: movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
347 ; X64-AVX2-NEXT: andq %rdx, %rax
348 ; X64-AVX2-NEXT: movabsq $498560650640798693, %rdx # imm = 0x6EB3E45306EB3E5
349 ; X64-AVX2-NEXT: mulq %rdx
350 ; X64-AVX2-NEXT: leal (%rdx,%rdx,8), %eax
351 ; X64-AVX2-NEXT: leal (%rdx,%rax,4), %eax
352 ; X64-AVX2-NEXT: subl %eax, %ecx
353 ; X64-AVX2-NEXT: addl $27, %ecx
354 ; X64-AVX2-NEXT: shlq $27, %rsi
355 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
356 ; X64-AVX2-NEXT: shrdq %cl, %rdi, %rsi
357 ; X64-AVX2-NEXT: movq %rsi, %rax
358 ; X64-AVX2-NEXT: retq
359 %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
363 ; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
365 declare i7 @llvm.fshr.i7(i7, i7, i7)
366 define i7 @fshr_i7_const_fold() nounwind {
367 ; CHECK-LABEL: fshr_i7_const_fold:
369 ; CHECK-NEXT: movb $31, %al
370 ; CHECK-NEXT: ret{{[l|q]}}
371 %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
375 ; demanded bits tests
377 define i32 @fshl_i32_demandedbits(i32 %a0, i32 %a1) nounwind {
378 ; X86-SSE2-LABEL: fshl_i32_demandedbits:
380 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
381 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
382 ; X86-SSE2-NEXT: shldl $9, %ecx, %eax
383 ; X86-SSE2-NEXT: retl
385 ; X64-AVX2-LABEL: fshl_i32_demandedbits:
387 ; X64-AVX2-NEXT: movl %edi, %eax
388 ; X64-AVX2-NEXT: shldl $9, %esi, %eax
389 ; X64-AVX2-NEXT: retq
390 %x = or i32 %a0, 2147483648
392 %res = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
396 define i32 @fshr_i32_demandedbits(i32 %a0, i32 %a1) nounwind {
397 ; X86-SSE2-LABEL: fshr_i32_demandedbits:
399 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
400 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
401 ; X86-SSE2-NEXT: shrdl $9, %ecx, %eax
402 ; X86-SSE2-NEXT: retl
404 ; X64-AVX2-LABEL: fshr_i32_demandedbits:
406 ; X64-AVX2-NEXT: movl %edi, %eax
407 ; X64-AVX2-NEXT: shldl $23, %esi, %eax
408 ; X64-AVX2-NEXT: retq
409 %x = or i32 %a0, 2147483648
411 %res = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
417 define i32 @fshl_i32_undef0(i32 %a0, i32 %a1) nounwind {
418 ; X86-SSE2-LABEL: fshl_i32_undef0:
420 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
421 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
422 ; X86-SSE2-NEXT: shldl %cl, %eax, %eax
423 ; X86-SSE2-NEXT: retl
425 ; X64-AVX2-LABEL: fshl_i32_undef0:
427 ; X64-AVX2-NEXT: movl %esi, %ecx
428 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
429 ; X64-AVX2-NEXT: shldl %cl, %edi, %eax
430 ; X64-AVX2-NEXT: retq
431 %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 %a1)
435 define i32 @fshl_i32_undef0_msk(i32 %a0, i32 %a1) nounwind {
436 ; X86-SSE2-LABEL: fshl_i32_undef0_msk:
438 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
439 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
440 ; X86-SSE2-NEXT: andl $7, %ecx
441 ; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
442 ; X86-SSE2-NEXT: shldl %cl, %eax, %eax
443 ; X86-SSE2-NEXT: retl
445 ; X64-AVX2-LABEL: fshl_i32_undef0_msk:
447 ; X64-AVX2-NEXT: movl %esi, %ecx
448 ; X64-AVX2-NEXT: andl $7, %ecx
449 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
450 ; X64-AVX2-NEXT: shldl %cl, %edi, %eax
451 ; X64-AVX2-NEXT: retq
453 %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 %m)
457 define i32 @fshl_i32_undef0_cst(i32 %a0) nounwind {
458 ; X86-SSE2-LABEL: fshl_i32_undef0_cst:
460 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
461 ; X86-SSE2-NEXT: shrl $23, %eax
462 ; X86-SSE2-NEXT: retl
464 ; X64-AVX2-LABEL: fshl_i32_undef0_cst:
466 ; X64-AVX2-NEXT: movl %edi, %eax
467 ; X64-AVX2-NEXT: shrl $23, %eax
468 ; X64-AVX2-NEXT: retq
469 %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 9)
473 define i32 @fshl_i32_undef1(i32 %a0, i32 %a1) nounwind {
474 ; X86-SSE2-LABEL: fshl_i32_undef1:
476 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
477 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
478 ; X86-SSE2-NEXT: shldl %cl, %eax, %eax
479 ; X86-SSE2-NEXT: retl
481 ; X64-AVX2-LABEL: fshl_i32_undef1:
483 ; X64-AVX2-NEXT: movl %esi, %ecx
484 ; X64-AVX2-NEXT: movl %edi, %eax
485 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
486 ; X64-AVX2-NEXT: shldl %cl, %eax, %eax
487 ; X64-AVX2-NEXT: retq
488 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %a1)
492 define i32 @fshl_i32_undef1_msk(i32 %a0, i32 %a1) nounwind {
493 ; X86-SSE2-LABEL: fshl_i32_undef1_msk:
495 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
496 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
497 ; X86-SSE2-NEXT: andb $7, %cl
498 ; X86-SSE2-NEXT: shll %cl, %eax
499 ; X86-SSE2-NEXT: retl
501 ; X64-AVX2-LABEL: fshl_i32_undef1_msk:
503 ; X64-AVX2-NEXT: movl %esi, %ecx
504 ; X64-AVX2-NEXT: movl %edi, %eax
505 ; X64-AVX2-NEXT: andb $7, %cl
506 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
507 ; X64-AVX2-NEXT: shll %cl, %eax
508 ; X64-AVX2-NEXT: retq
510 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %m)
514 define i32 @fshl_i32_undef1_cst(i32 %a0) nounwind {
515 ; X86-SSE2-LABEL: fshl_i32_undef1_cst:
517 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
518 ; X86-SSE2-NEXT: shll $9, %eax
519 ; X86-SSE2-NEXT: retl
521 ; X64-AVX2-LABEL: fshl_i32_undef1_cst:
523 ; X64-AVX2-NEXT: movl %edi, %eax
524 ; X64-AVX2-NEXT: shll $9, %eax
525 ; X64-AVX2-NEXT: retq
526 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 9)
530 define i32 @fshl_i32_undef2(i32 %a0, i32 %a1) nounwind {
531 ; X86-SSE2-LABEL: fshl_i32_undef2:
533 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
534 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
535 ; X86-SSE2-NEXT: shldl %cl, %ecx, %eax
536 ; X86-SSE2-NEXT: retl
538 ; X64-AVX2-LABEL: fshl_i32_undef2:
540 ; X64-AVX2-NEXT: movl %edi, %eax
541 ; X64-AVX2-NEXT: shldl %cl, %esi, %eax
542 ; X64-AVX2-NEXT: retq
543 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 %a1, i32 undef)
547 define i32 @fshr_i32_undef0(i32 %a0, i32 %a1) nounwind {
548 ; X86-SSE2-LABEL: fshr_i32_undef0:
550 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
551 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
552 ; X86-SSE2-NEXT: shrdl %cl, %eax, %eax
553 ; X86-SSE2-NEXT: retl
555 ; X64-AVX2-LABEL: fshr_i32_undef0:
557 ; X64-AVX2-NEXT: movl %esi, %ecx
558 ; X64-AVX2-NEXT: movl %edi, %eax
559 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
560 ; X64-AVX2-NEXT: shrdl %cl, %eax, %eax
561 ; X64-AVX2-NEXT: retq
562 %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %a1)
566 define i32 @fshr_i32_undef0_msk(i32 %a0, i32 %a1) nounwind {
567 ; X86-SSE2-LABEL: fshr_i32_undef0_msk:
569 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
570 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
571 ; X86-SSE2-NEXT: andb $7, %cl
572 ; X86-SSE2-NEXT: shrl %cl, %eax
573 ; X86-SSE2-NEXT: retl
575 ; X64-AVX2-LABEL: fshr_i32_undef0_msk:
577 ; X64-AVX2-NEXT: movl %esi, %ecx
578 ; X64-AVX2-NEXT: movl %edi, %eax
579 ; X64-AVX2-NEXT: andb $7, %cl
580 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
581 ; X64-AVX2-NEXT: shrl %cl, %eax
582 ; X64-AVX2-NEXT: retq
584 %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %m)
588 define i32 @fshr_i32_undef0_cst(i32 %a0) nounwind {
589 ; X86-SSE2-LABEL: fshr_i32_undef0_cst:
591 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
592 ; X86-SSE2-NEXT: shrl $9, %eax
593 ; X86-SSE2-NEXT: retl
595 ; X64-AVX2-LABEL: fshr_i32_undef0_cst:
597 ; X64-AVX2-NEXT: movl %edi, %eax
598 ; X64-AVX2-NEXT: shrl $9, %eax
599 ; X64-AVX2-NEXT: retq
600 %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 9)
604 define i32 @fshr_i32_undef1(i32 %a0, i32 %a1) nounwind {
605 ; X86-SSE2-LABEL: fshr_i32_undef1:
607 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
608 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
609 ; X86-SSE2-NEXT: shrdl %cl, %eax, %eax
610 ; X86-SSE2-NEXT: retl
612 ; X64-AVX2-LABEL: fshr_i32_undef1:
614 ; X64-AVX2-NEXT: movl %esi, %ecx
615 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
616 ; X64-AVX2-NEXT: shrdl %cl, %edi, %eax
617 ; X64-AVX2-NEXT: retq
618 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 %a1)
622 define i32 @fshr_i32_undef1_msk(i32 %a0, i32 %a1) nounwind {
623 ; X86-SSE2-LABEL: fshr_i32_undef1_msk:
625 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
626 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
627 ; X86-SSE2-NEXT: andl $7, %ecx
628 ; X86-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx
629 ; X86-SSE2-NEXT: shrdl %cl, %eax, %eax
630 ; X86-SSE2-NEXT: retl
632 ; X64-AVX2-LABEL: fshr_i32_undef1_msk:
634 ; X64-AVX2-NEXT: movl %esi, %ecx
635 ; X64-AVX2-NEXT: andl $7, %ecx
636 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
637 ; X64-AVX2-NEXT: shrdl %cl, %edi, %eax
638 ; X64-AVX2-NEXT: retq
640 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 %m)
644 define i32 @fshr_i32_undef1_cst(i32 %a0) nounwind {
645 ; X86-SSE2-LABEL: fshr_i32_undef1_cst:
647 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
648 ; X86-SSE2-NEXT: shll $23, %eax
649 ; X86-SSE2-NEXT: retl
651 ; X64-AVX2-LABEL: fshr_i32_undef1_cst:
653 ; X64-AVX2-NEXT: movl %edi, %eax
654 ; X64-AVX2-NEXT: shll $23, %eax
655 ; X64-AVX2-NEXT: retq
656 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 9)
660 define i32 @fshr_i32_undef2(i32 %a0, i32 %a1) nounwind {
661 ; X86-SSE2-LABEL: fshr_i32_undef2:
663 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
664 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
665 ; X86-SSE2-NEXT: shrdl %cl, %ecx, %eax
666 ; X86-SSE2-NEXT: retl
668 ; X64-AVX2-LABEL: fshr_i32_undef2:
670 ; X64-AVX2-NEXT: movl %esi, %eax
671 ; X64-AVX2-NEXT: shrdl %cl, %edi, %eax
672 ; X64-AVX2-NEXT: retq
673 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 %a1, i32 undef)
679 define i32 @fshl_i32_zero0(i32 %a0, i32 %a1) nounwind {
680 ; X86-SSE2-LABEL: fshl_i32_zero0:
682 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
683 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
684 ; X86-SSE2-NEXT: xorl %eax, %eax
685 ; X86-SSE2-NEXT: shldl %cl, %edx, %eax
686 ; X86-SSE2-NEXT: retl
688 ; X64-AVX2-LABEL: fshl_i32_zero0:
690 ; X64-AVX2-NEXT: movl %esi, %ecx
691 ; X64-AVX2-NEXT: xorl %eax, %eax
692 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
693 ; X64-AVX2-NEXT: shldl %cl, %edi, %eax
694 ; X64-AVX2-NEXT: retq
695 %res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 %a1)
699 define i32 @fshl_i32_zero0_cst(i32 %a0) nounwind {
700 ; X86-SSE2-LABEL: fshl_i32_zero0_cst:
702 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
703 ; X86-SSE2-NEXT: shrl $23, %eax
704 ; X86-SSE2-NEXT: retl
706 ; X64-AVX2-LABEL: fshl_i32_zero0_cst:
708 ; X64-AVX2-NEXT: movl %edi, %eax
709 ; X64-AVX2-NEXT: shrl $23, %eax
710 ; X64-AVX2-NEXT: retq
711 %res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 9)
715 define i32 @fshl_i32_zero1(i32 %a0, i32 %a1) nounwind {
716 ; X86-SSE2-LABEL: fshl_i32_zero1:
718 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
719 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
720 ; X86-SSE2-NEXT: xorl %edx, %edx
721 ; X86-SSE2-NEXT: shldl %cl, %edx, %eax
722 ; X86-SSE2-NEXT: retl
724 ; X64-AVX2-LABEL: fshl_i32_zero1:
726 ; X64-AVX2-NEXT: movl %esi, %ecx
727 ; X64-AVX2-NEXT: movl %edi, %eax
728 ; X64-AVX2-NEXT: xorl %edx, %edx
729 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
730 ; X64-AVX2-NEXT: shldl %cl, %edx, %eax
731 ; X64-AVX2-NEXT: retq
732 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 %a1)
736 define i32 @fshl_i32_zero1_cst(i32 %a0) nounwind {
737 ; X86-SSE2-LABEL: fshl_i32_zero1_cst:
739 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
740 ; X86-SSE2-NEXT: shll $9, %eax
741 ; X86-SSE2-NEXT: retl
743 ; X64-AVX2-LABEL: fshl_i32_zero1_cst:
745 ; X64-AVX2-NEXT: movl %edi, %eax
746 ; X64-AVX2-NEXT: shll $9, %eax
747 ; X64-AVX2-NEXT: retq
748 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 9)
752 define i32 @fshr_i32_zero0(i32 %a0, i32 %a1) nounwind {
753 ; X86-SSE2-LABEL: fshr_i32_zero0:
755 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
756 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
757 ; X86-SSE2-NEXT: xorl %edx, %edx
758 ; X86-SSE2-NEXT: shrdl %cl, %edx, %eax
759 ; X86-SSE2-NEXT: retl
761 ; X64-AVX2-LABEL: fshr_i32_zero0:
763 ; X64-AVX2-NEXT: movl %esi, %ecx
764 ; X64-AVX2-NEXT: movl %edi, %eax
765 ; X64-AVX2-NEXT: xorl %edx, %edx
766 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
767 ; X64-AVX2-NEXT: shrdl %cl, %edx, %eax
768 ; X64-AVX2-NEXT: retq
769 %res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 %a1)
773 define i32 @fshr_i32_zero0_cst(i32 %a0) nounwind {
774 ; X86-SSE2-LABEL: fshr_i32_zero0_cst:
776 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
777 ; X86-SSE2-NEXT: shrl $9, %eax
778 ; X86-SSE2-NEXT: retl
780 ; X64-AVX2-LABEL: fshr_i32_zero0_cst:
782 ; X64-AVX2-NEXT: movl %edi, %eax
783 ; X64-AVX2-NEXT: shrl $9, %eax
784 ; X64-AVX2-NEXT: retq
785 %res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 9)
789 define i32 @fshr_i32_zero1(i32 %a0, i32 %a1) nounwind {
790 ; X86-SSE2-LABEL: fshr_i32_zero1:
792 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
793 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
794 ; X86-SSE2-NEXT: xorl %eax, %eax
795 ; X86-SSE2-NEXT: shrdl %cl, %edx, %eax
796 ; X86-SSE2-NEXT: retl
798 ; X64-AVX2-LABEL: fshr_i32_zero1:
800 ; X64-AVX2-NEXT: movl %esi, %ecx
801 ; X64-AVX2-NEXT: xorl %eax, %eax
802 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
803 ; X64-AVX2-NEXT: shrdl %cl, %edi, %eax
804 ; X64-AVX2-NEXT: retq
805 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 %a1)
809 define i32 @fshr_i32_zero1_cst(i32 %a0) nounwind {
810 ; X86-SSE2-LABEL: fshr_i32_zero1_cst:
812 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
813 ; X86-SSE2-NEXT: shll $23, %eax
814 ; X86-SSE2-NEXT: retl
816 ; X64-AVX2-LABEL: fshr_i32_zero1_cst:
818 ; X64-AVX2-NEXT: movl %edi, %eax
819 ; X64-AVX2-NEXT: shll $23, %eax
820 ; X64-AVX2-NEXT: retq
821 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 9)
827 define i32 @fshl_i32_zero2(i32 %a0, i32 %a1) nounwind {
828 ; X86-SSE2-LABEL: fshl_i32_zero2:
830 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
831 ; X86-SSE2-NEXT: retl
833 ; X64-AVX2-LABEL: fshl_i32_zero2:
835 ; X64-AVX2-NEXT: movl %edi, %eax
836 ; X64-AVX2-NEXT: retq
837 %res = call i32 @llvm.fshl.i32(i32 %a0, i32 %a1, i32 0)
841 define i32 @fshr_i32_zero2(i32 %a0, i32 %a1) nounwind {
842 ; X86-SSE2-LABEL: fshr_i32_zero2:
844 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
845 ; X86-SSE2-NEXT: retl
847 ; X64-AVX2-LABEL: fshr_i32_zero2:
849 ; X64-AVX2-NEXT: movl %esi, %eax
850 ; X64-AVX2-NEXT: retq
851 %res = call i32 @llvm.fshr.i32(i32 %a0, i32 %a1, i32 0)
855 ; With constant shift amount, this is 'shrd' or 'shld'.
857 define i32 @fshr_i32_const_shift(i32 %x, i32 %y) nounwind {
858 ; X86-SSE2-LABEL: fshr_i32_const_shift:
860 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
861 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
862 ; X86-SSE2-NEXT: shrdl $9, %ecx, %eax
863 ; X86-SSE2-NEXT: retl
865 ; X64-AVX2-LABEL: fshr_i32_const_shift:
867 ; X64-AVX2-NEXT: movl %edi, %eax
868 ; X64-AVX2-NEXT: shldl $23, %esi, %eax
869 ; X64-AVX2-NEXT: retq
870 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
874 ; Check modulo math on shift amount. 41-32=9, but right-shift may became left, so 32-9=23.
876 define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) nounwind {
877 ; X86-SSE2-LABEL: fshr_i32_const_overshift:
879 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
880 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
881 ; X86-SSE2-NEXT: shrdl $9, %ecx, %eax
882 ; X86-SSE2-NEXT: retl
884 ; X64-AVX2-LABEL: fshr_i32_const_overshift:
886 ; X64-AVX2-NEXT: movl %edi, %eax
887 ; X64-AVX2-NEXT: shldl $23, %esi, %eax
888 ; X64-AVX2-NEXT: retq
889 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
893 ; 64-bit should also work. 105-64 = 41, but right-shift became left, so 64-41=23.
895 define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) nounwind {
896 ; X86-SSE2-LABEL: fshr_i64_const_overshift:
898 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
899 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
900 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
901 ; X86-SSE2-NEXT: shrdl $9, %ecx, %eax
902 ; X86-SSE2-NEXT: shldl $23, %ecx, %edx
903 ; X86-SSE2-NEXT: retl
905 ; X64-AVX2-LABEL: fshr_i64_const_overshift:
907 ; X64-AVX2-NEXT: movq %rdi, %rax
908 ; X64-AVX2-NEXT: shldq $23, %rsi, %rax
909 ; X64-AVX2-NEXT: retq
910 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
914 ; This should work without any node-specific logic.
916 define i8 @fshr_i8_const_fold() nounwind {
917 ; CHECK-LABEL: fshr_i8_const_fold:
919 ; CHECK-NEXT: movb $-2, %al
920 ; CHECK-NEXT: ret{{[l|q]}}
921 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
925 define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind {
926 ; X86-SSE2-LABEL: fshl_i32_shift_by_bitwidth:
928 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
929 ; X86-SSE2-NEXT: retl
931 ; X64-AVX2-LABEL: fshl_i32_shift_by_bitwidth:
933 ; X64-AVX2-NEXT: movl %edi, %eax
934 ; X64-AVX2-NEXT: retq
935 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
939 define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind {
940 ; X86-SSE2-LABEL: fshr_i32_shift_by_bitwidth:
942 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
943 ; X86-SSE2-NEXT: retl
945 ; X64-AVX2-LABEL: fshr_i32_shift_by_bitwidth:
947 ; X64-AVX2-NEXT: movl %esi, %eax
948 ; X64-AVX2-NEXT: retq
949 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
953 define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounwind {
954 ; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
956 ; CHECK-NEXT: ret{{[l|q]}}
957 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
961 define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounwind {
962 ; X86-SSE2-LABEL: fshr_v4i32_shift_by_bitwidth:
964 ; X86-SSE2-NEXT: movaps %xmm1, %xmm0
965 ; X86-SSE2-NEXT: retl
967 ; X64-AVX2-LABEL: fshr_v4i32_shift_by_bitwidth:
969 ; X64-AVX2-NEXT: vmovaps %xmm1, %xmm0
970 ; X64-AVX2-NEXT: retq
971 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
975 %struct.S = type { [11 x i8], i8 }
976 define void @PR45265(i32 %0, %struct.S* nocapture readonly %1) nounwind {
977 ; X86-SSE2-LABEL: PR45265:
979 ; X86-SSE2-NEXT: pushl %edi
980 ; X86-SSE2-NEXT: pushl %esi
981 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
982 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
983 ; X86-SSE2-NEXT: leal (%eax,%eax,2), %esi
984 ; X86-SSE2-NEXT: movzwl 8(%ecx,%esi,4), %edx
985 ; X86-SSE2-NEXT: movl 4(%ecx,%esi,4), %edi
986 ; X86-SSE2-NEXT: shrdl $8, %edx, %edi
987 ; X86-SSE2-NEXT: xorl %eax, %edi
988 ; X86-SSE2-NEXT: sarl $31, %eax
989 ; X86-SSE2-NEXT: movzbl 10(%ecx,%esi,4), %ecx
990 ; X86-SSE2-NEXT: shll $16, %ecx
991 ; X86-SSE2-NEXT: orl %edx, %ecx
992 ; X86-SSE2-NEXT: shll $8, %ecx
993 ; X86-SSE2-NEXT: movl %ecx, %edx
994 ; X86-SSE2-NEXT: sarl $8, %edx
995 ; X86-SSE2-NEXT: sarl $31, %ecx
996 ; X86-SSE2-NEXT: shldl $24, %edx, %ecx
997 ; X86-SSE2-NEXT: xorl %eax, %ecx
998 ; X86-SSE2-NEXT: orl %ecx, %edi
999 ; X86-SSE2-NEXT: jne .LBB46_1
1000 ; X86-SSE2-NEXT: # %bb.2:
1001 ; X86-SSE2-NEXT: popl %esi
1002 ; X86-SSE2-NEXT: popl %edi
1003 ; X86-SSE2-NEXT: jmp _Z3foov # TAILCALL
1004 ; X86-SSE2-NEXT: .LBB46_1:
1005 ; X86-SSE2-NEXT: popl %esi
1006 ; X86-SSE2-NEXT: popl %edi
1007 ; X86-SSE2-NEXT: retl
1009 ; X64-AVX2-LABEL: PR45265:
1010 ; X64-AVX2: # %bb.0:
1011 ; X64-AVX2-NEXT: movslq %edi, %rax
1012 ; X64-AVX2-NEXT: leaq (%rax,%rax,2), %rcx
1013 ; X64-AVX2-NEXT: movsbq 10(%rsi,%rcx,4), %rdx
1014 ; X64-AVX2-NEXT: shlq $16, %rdx
1015 ; X64-AVX2-NEXT: movzwl 8(%rsi,%rcx,4), %edi
1016 ; X64-AVX2-NEXT: orq %rdx, %rdi
1017 ; X64-AVX2-NEXT: movq (%rsi,%rcx,4), %rcx
1018 ; X64-AVX2-NEXT: shrdq $40, %rdi, %rcx
1019 ; X64-AVX2-NEXT: cmpq %rax, %rcx
1020 ; X64-AVX2-NEXT: je _Z3foov # TAILCALL
1021 ; X64-AVX2-NEXT: # %bb.1:
1022 ; X64-AVX2-NEXT: retq
1023 %3 = sext i32 %0 to i64
1024 %4 = getelementptr inbounds %struct.S, %struct.S* %1, i64 %3
1025 %5 = bitcast %struct.S* %4 to i88*
1026 %6 = load i88, i88* %5, align 1
1027 %7 = ashr i88 %6, 40
1028 %8 = trunc i88 %7 to i64
1029 %9 = icmp eq i64 %8, %3
1030 br i1 %9, label %10, label %11
1033 tail call void @_Z3foov()
1039 declare dso_local void @_Z3foov()
1041 define i32 @or_shl_fshl(i32 %x, i32 %y, i32 %s) nounwind {
1042 ; X86-SSE2-LABEL: or_shl_fshl:
1043 ; X86-SSE2: # %bb.0:
1044 ; X86-SSE2-NEXT: pushl %esi
1045 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1046 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1047 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
1048 ; X86-SSE2-NEXT: movl %edx, %esi
1049 ; X86-SSE2-NEXT: shll %cl, %esi
1050 ; X86-SSE2-NEXT: shldl %cl, %edx, %eax
1051 ; X86-SSE2-NEXT: orl %esi, %eax
1052 ; X86-SSE2-NEXT: popl %esi
1053 ; X86-SSE2-NEXT: retl
1055 ; X64-AVX2-LABEL: or_shl_fshl:
1056 ; X64-AVX2: # %bb.0:
1057 ; X64-AVX2-NEXT: movl %edx, %ecx
1058 ; X64-AVX2-NEXT: movl %esi, %eax
1059 ; X64-AVX2-NEXT: shll %cl, %eax
1060 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
1061 ; X64-AVX2-NEXT: shldl %cl, %esi, %edi
1062 ; X64-AVX2-NEXT: orl %edi, %eax
1063 ; X64-AVX2-NEXT: retq
1064 %shy = shl i32 %y, %s
1065 %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
1066 %or = or i32 %fun, %shy
1070 define i32 @or_shl_rotl(i32 %x, i32 %y, i32 %s) nounwind {
1071 ; X86-SSE2-LABEL: or_shl_rotl:
1072 ; X86-SSE2: # %bb.0:
1073 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1074 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1075 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
1076 ; X86-SSE2-NEXT: shll %cl, %edx
1077 ; X86-SSE2-NEXT: roll %cl, %eax
1078 ; X86-SSE2-NEXT: orl %edx, %eax
1079 ; X86-SSE2-NEXT: retl
1081 ; X64-AVX2-LABEL: or_shl_rotl:
1082 ; X64-AVX2: # %bb.0:
1083 ; X64-AVX2-NEXT: movl %edx, %ecx
1084 ; X64-AVX2-NEXT: movl %esi, %eax
1085 ; X64-AVX2-NEXT: shll %cl, %edi
1086 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
1087 ; X64-AVX2-NEXT: roll %cl, %eax
1088 ; X64-AVX2-NEXT: orl %edi, %eax
1089 ; X64-AVX2-NEXT: retq
1090 %shx = shl i32 %x, %s
1091 %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
1092 %or = or i32 %rot, %shx
1096 define i32 @or_shl_fshl_commute(i32 %x, i32 %y, i32 %s) nounwind {
1097 ; X86-SSE2-LABEL: or_shl_fshl_commute:
1098 ; X86-SSE2: # %bb.0:
1099 ; X86-SSE2-NEXT: pushl %esi
1100 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1101 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1102 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
1103 ; X86-SSE2-NEXT: movl %edx, %esi
1104 ; X86-SSE2-NEXT: shll %cl, %esi
1105 ; X86-SSE2-NEXT: shldl %cl, %edx, %eax
1106 ; X86-SSE2-NEXT: orl %esi, %eax
1107 ; X86-SSE2-NEXT: popl %esi
1108 ; X86-SSE2-NEXT: retl
1110 ; X64-AVX2-LABEL: or_shl_fshl_commute:
1111 ; X64-AVX2: # %bb.0:
1112 ; X64-AVX2-NEXT: movl %edx, %ecx
1113 ; X64-AVX2-NEXT: movl %esi, %eax
1114 ; X64-AVX2-NEXT: shll %cl, %eax
1115 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
1116 ; X64-AVX2-NEXT: shldl %cl, %esi, %edi
1117 ; X64-AVX2-NEXT: orl %edi, %eax
1118 ; X64-AVX2-NEXT: retq
1119 %shy = shl i32 %y, %s
1120 %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
1121 %or = or i32 %shy, %fun
1125 define i32 @or_shl_rotl_commute(i32 %x, i32 %y, i32 %s) nounwind {
1126 ; X86-SSE2-LABEL: or_shl_rotl_commute:
1127 ; X86-SSE2: # %bb.0:
1128 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1129 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1130 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
1131 ; X86-SSE2-NEXT: shll %cl, %edx
1132 ; X86-SSE2-NEXT: roll %cl, %eax
1133 ; X86-SSE2-NEXT: orl %edx, %eax
1134 ; X86-SSE2-NEXT: retl
1136 ; X64-AVX2-LABEL: or_shl_rotl_commute:
1137 ; X64-AVX2: # %bb.0:
1138 ; X64-AVX2-NEXT: movl %edx, %ecx
1139 ; X64-AVX2-NEXT: movl %esi, %eax
1140 ; X64-AVX2-NEXT: shll %cl, %edi
1141 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
1142 ; X64-AVX2-NEXT: roll %cl, %eax
1143 ; X64-AVX2-NEXT: orl %edi, %eax
1144 ; X64-AVX2-NEXT: retq
1145 %shx = shl i32 %x, %s
1146 %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
1147 %or = or i32 %shx, %rot
1151 define i32 @or_lshr_fshr(i32 %x, i32 %y, i32 %s) nounwind {
1152 ; X86-SSE2-LABEL: or_lshr_fshr:
1153 ; X86-SSE2: # %bb.0:
1154 ; X86-SSE2-NEXT: pushl %esi
1155 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1156 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1157 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
1158 ; X86-SSE2-NEXT: movl %edx, %esi
1159 ; X86-SSE2-NEXT: shrl %cl, %esi
1160 ; X86-SSE2-NEXT: shrdl %cl, %edx, %eax
1161 ; X86-SSE2-NEXT: orl %esi, %eax
1162 ; X86-SSE2-NEXT: popl %esi
1163 ; X86-SSE2-NEXT: retl
1165 ; X64-AVX2-LABEL: or_lshr_fshr:
1166 ; X64-AVX2: # %bb.0:
1167 ; X64-AVX2-NEXT: movl %edx, %ecx
1168 ; X64-AVX2-NEXT: movl %esi, %eax
1169 ; X64-AVX2-NEXT: shrl %cl, %eax
1170 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
1171 ; X64-AVX2-NEXT: shrdl %cl, %esi, %edi
1172 ; X64-AVX2-NEXT: orl %edi, %eax
1173 ; X64-AVX2-NEXT: retq
1174 %shy = lshr i32 %y, %s
1175 %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
1176 %or = or i32 %fun, %shy
1180 define i32 @or_lshr_rotr(i32 %x, i32 %y, i32 %s) nounwind {
1181 ; X86-SSE2-LABEL: or_lshr_rotr:
1182 ; X86-SSE2: # %bb.0:
1183 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1184 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1185 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
1186 ; X86-SSE2-NEXT: shrl %cl, %edx
1187 ; X86-SSE2-NEXT: rorl %cl, %eax
1188 ; X86-SSE2-NEXT: orl %edx, %eax
1189 ; X86-SSE2-NEXT: retl
1191 ; X64-AVX2-LABEL: or_lshr_rotr:
1192 ; X64-AVX2: # %bb.0:
1193 ; X64-AVX2-NEXT: movl %edx, %ecx
1194 ; X64-AVX2-NEXT: movl %esi, %eax
1195 ; X64-AVX2-NEXT: shrl %cl, %edi
1196 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
1197 ; X64-AVX2-NEXT: rorl %cl, %eax
1198 ; X64-AVX2-NEXT: orl %edi, %eax
1199 ; X64-AVX2-NEXT: retq
1200 %shx = lshr i32 %x, %s
1201 %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
1202 %or = or i32 %rot, %shx
1206 define i32 @or_lshr_fshr_commute(i32 %x, i32 %y, i32 %s) nounwind {
1207 ; X86-SSE2-LABEL: or_lshr_fshr_commute:
1208 ; X86-SSE2: # %bb.0:
1209 ; X86-SSE2-NEXT: pushl %esi
1210 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1211 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1212 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
1213 ; X86-SSE2-NEXT: movl %edx, %esi
1214 ; X86-SSE2-NEXT: shrl %cl, %esi
1215 ; X86-SSE2-NEXT: shrdl %cl, %edx, %eax
1216 ; X86-SSE2-NEXT: orl %esi, %eax
1217 ; X86-SSE2-NEXT: popl %esi
1218 ; X86-SSE2-NEXT: retl
1220 ; X64-AVX2-LABEL: or_lshr_fshr_commute:
1221 ; X64-AVX2: # %bb.0:
1222 ; X64-AVX2-NEXT: movl %edx, %ecx
1223 ; X64-AVX2-NEXT: movl %esi, %eax
1224 ; X64-AVX2-NEXT: shrl %cl, %eax
1225 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
1226 ; X64-AVX2-NEXT: shrdl %cl, %esi, %edi
1227 ; X64-AVX2-NEXT: orl %edi, %eax
1228 ; X64-AVX2-NEXT: retq
1229 %shy = lshr i32 %y, %s
1230 %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
1231 %or = or i32 %shy, %fun
1235 define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) nounwind {
1236 ; X86-SSE2-LABEL: or_lshr_rotr_commute:
1237 ; X86-SSE2: # %bb.0:
1238 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1239 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1240 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
1241 ; X86-SSE2-NEXT: shrl %cl, %edx
1242 ; X86-SSE2-NEXT: rorl %cl, %eax
1243 ; X86-SSE2-NEXT: orl %edx, %eax
1244 ; X86-SSE2-NEXT: retl
1246 ; X64-AVX2-LABEL: or_lshr_rotr_commute:
1247 ; X64-AVX2: # %bb.0:
1248 ; X64-AVX2-NEXT: movl %edx, %ecx
1249 ; X64-AVX2-NEXT: movl %esi, %eax
1250 ; X64-AVX2-NEXT: shrl %cl, %edi
1251 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
1252 ; X64-AVX2-NEXT: rorl %cl, %eax
1253 ; X64-AVX2-NEXT: orl %edi, %eax
1254 ; X64-AVX2-NEXT: retq
1255 %shx = lshr i32 %x, %s
1256 %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
1257 %or = or i32 %shx, %rot
1261 define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) nounwind {
1262 ; X86-SSE2-LABEL: or_shl_fshl_simplify:
1263 ; X86-SSE2: # %bb.0:
1264 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1265 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
1266 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1267 ; X86-SSE2-NEXT: shldl %cl, %edx, %eax
1268 ; X86-SSE2-NEXT: retl
1270 ; X64-AVX2-LABEL: or_shl_fshl_simplify:
1271 ; X64-AVX2: # %bb.0:
1272 ; X64-AVX2-NEXT: movl %edx, %ecx
1273 ; X64-AVX2-NEXT: movl %esi, %eax
1274 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
1275 ; X64-AVX2-NEXT: shldl %cl, %edi, %eax
1276 ; X64-AVX2-NEXT: retq
1277 %shy = shl i32 %y, %s
1278 %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
1279 %or = or i32 %fun, %shy
1283 define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) nounwind {
1284 ; X86-SSE2-LABEL: or_lshr_fshr_simplify:
1285 ; X86-SSE2: # %bb.0:
1286 ; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
1287 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx
1288 ; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
1289 ; X86-SSE2-NEXT: shrdl %cl, %edx, %eax
1290 ; X86-SSE2-NEXT: retl
1292 ; X64-AVX2-LABEL: or_lshr_fshr_simplify:
1293 ; X64-AVX2: # %bb.0:
1294 ; X64-AVX2-NEXT: movl %edx, %ecx
1295 ; X64-AVX2-NEXT: movl %esi, %eax
1296 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
1297 ; X64-AVX2-NEXT: shrdl %cl, %edi, %eax
1298 ; X64-AVX2-NEXT: retq
1299 %shy = lshr i32 %y, %s
1300 %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s)
1301 %or = or i32 %shy, %fun