1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86,X86-FAST
3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefixes=X86,X86-SLOW
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-FAST
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefixes=X64,X64-SLOW
7 declare i8 @llvm.fshl.i8(i8, i8, i8) nounwind readnone
8 declare i16 @llvm.fshl.i16(i16, i16, i16) nounwind readnone
9 declare i32 @llvm.fshl.i32(i32, i32, i32) nounwind readnone
10 declare i64 @llvm.fshl.i64(i64, i64, i64) nounwind readnone
11 declare i128 @llvm.fshl.i128(i128, i128, i128) nounwind readnone
14 ; Variable Funnel Shift
17 define i8 @var_shift_i8(i8 %x, i8 %y, i8 %z) nounwind {
18 ; X86-LABEL: var_shift_i8:
20 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
21 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
22 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
23 ; X86-NEXT: shll $8, %eax
24 ; X86-NEXT: orl %edx, %eax
25 ; X86-NEXT: andb $7, %cl
26 ; X86-NEXT: shll %cl, %eax
27 ; X86-NEXT: movb %ah, %al
30 ; X64-LABEL: var_shift_i8:
32 ; X64-NEXT: movl %edx, %ecx
33 ; X64-NEXT: shll $8, %edi
34 ; X64-NEXT: movzbl %sil, %eax
35 ; X64-NEXT: orl %edi, %eax
36 ; X64-NEXT: andb $7, %cl
37 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
38 ; X64-NEXT: shll %cl, %eax
39 ; X64-NEXT: shrl $8, %eax
40 ; X64-NEXT: # kill: def $al killed $al killed $eax
42 %tmp = tail call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 %z)
46 define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind {
47 ; X86-FAST-LABEL: var_shift_i16:
49 ; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %edx
50 ; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %eax
51 ; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl
52 ; X86-FAST-NEXT: andb $15, %cl
53 ; X86-FAST-NEXT: shldw %cl, %dx, %ax
56 ; X86-SLOW-LABEL: var_shift_i16:
58 ; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
59 ; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %edx
60 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
61 ; X86-SLOW-NEXT: shll $16, %eax
62 ; X86-SLOW-NEXT: orl %edx, %eax
63 ; X86-SLOW-NEXT: andb $15, %cl
64 ; X86-SLOW-NEXT: shll %cl, %eax
65 ; X86-SLOW-NEXT: shrl $16, %eax
66 ; X86-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
69 ; X64-FAST-LABEL: var_shift_i16:
71 ; X64-FAST-NEXT: movl %edx, %ecx
72 ; X64-FAST-NEXT: movl %edi, %eax
73 ; X64-FAST-NEXT: andb $15, %cl
74 ; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx
75 ; X64-FAST-NEXT: shldw %cl, %si, %ax
76 ; X64-FAST-NEXT: # kill: def $ax killed $ax killed $eax
79 ; X64-SLOW-LABEL: var_shift_i16:
81 ; X64-SLOW-NEXT: movl %edx, %ecx
82 ; X64-SLOW-NEXT: shll $16, %edi
83 ; X64-SLOW-NEXT: movzwl %si, %eax
84 ; X64-SLOW-NEXT: orl %edi, %eax
85 ; X64-SLOW-NEXT: andb $15, %cl
86 ; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
87 ; X64-SLOW-NEXT: shll %cl, %eax
88 ; X64-SLOW-NEXT: shrl $16, %eax
89 ; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
91 %tmp = tail call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z)
95 define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind {
96 ; X86-FAST-LABEL: var_shift_i32:
98 ; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %cl
99 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
100 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
101 ; X86-FAST-NEXT: shldl %cl, %edx, %eax
102 ; X86-FAST-NEXT: retl
104 ; X86-SLOW-LABEL: var_shift_i32:
106 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
107 ; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
108 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
109 ; X86-SLOW-NEXT: shll %cl, %edx
110 ; X86-SLOW-NEXT: notb %cl
111 ; X86-SLOW-NEXT: shrl %eax
112 ; X86-SLOW-NEXT: shrl %cl, %eax
113 ; X86-SLOW-NEXT: orl %edx, %eax
114 ; X86-SLOW-NEXT: retl
116 ; X64-FAST-LABEL: var_shift_i32:
118 ; X64-FAST-NEXT: movl %edx, %ecx
119 ; X64-FAST-NEXT: movl %edi, %eax
120 ; X64-FAST-NEXT: # kill: def $cl killed $cl killed $ecx
121 ; X64-FAST-NEXT: shldl %cl, %esi, %eax
122 ; X64-FAST-NEXT: retq
124 ; X64-SLOW-LABEL: var_shift_i32:
126 ; X64-SLOW-NEXT: movl %edx, %ecx
127 ; X64-SLOW-NEXT: movl %esi, %eax
128 ; X64-SLOW-NEXT: shll %cl, %edi
129 ; X64-SLOW-NEXT: shrl %eax
130 ; X64-SLOW-NEXT: notb %cl
131 ; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
132 ; X64-SLOW-NEXT: shrl %cl, %eax
133 ; X64-SLOW-NEXT: orl %edi, %eax
134 ; X64-SLOW-NEXT: retq
135 %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
139 define i32 @var_shift_i32_optsize(i32 %x, i32 %y, i32 %z) nounwind optsize {
140 ; X86-LABEL: var_shift_i32_optsize:
142 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
143 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
144 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
145 ; X86-NEXT: shldl %cl, %edx, %eax
148 ; X64-LABEL: var_shift_i32_optsize:
150 ; X64-NEXT: movl %edx, %ecx
151 ; X64-NEXT: movl %edi, %eax
152 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
153 ; X64-NEXT: shldl %cl, %esi, %eax
155 %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
159 define i32 @var_shift_i32_pgso(i32 %x, i32 %y, i32 %z) nounwind !prof !14 {
160 ; X86-LABEL: var_shift_i32_pgso:
162 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
163 ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
164 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
165 ; X86-NEXT: shldl %cl, %edx, %eax
168 ; X64-LABEL: var_shift_i32_pgso:
170 ; X64-NEXT: movl %edx, %ecx
171 ; X64-NEXT: movl %edi, %eax
172 ; X64-NEXT: # kill: def $cl killed $cl killed $ecx
173 ; X64-NEXT: shldl %cl, %esi, %eax
175 %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
179 define i64 @var_shift_i64(i64 %x, i64 %y, i64 %z) nounwind {
180 ; X86-FAST-LABEL: var_shift_i64:
182 ; X86-FAST-NEXT: pushl %ebx
183 ; X86-FAST-NEXT: pushl %edi
184 ; X86-FAST-NEXT: pushl %esi
185 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
186 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
187 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
188 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
189 ; X86-FAST-NEXT: movb {{[0-9]+}}(%esp), %ch
190 ; X86-FAST-NEXT: movb %ch, %cl
191 ; X86-FAST-NEXT: notb %cl
192 ; X86-FAST-NEXT: shrdl $1, %edi, %esi
193 ; X86-FAST-NEXT: shrl %edi
194 ; X86-FAST-NEXT: shrdl %cl, %edi, %esi
195 ; X86-FAST-NEXT: shrl %cl, %edi
196 ; X86-FAST-NEXT: testb $32, %cl
197 ; X86-FAST-NEXT: je .LBB5_2
198 ; X86-FAST-NEXT: # %bb.1:
199 ; X86-FAST-NEXT: movl %edi, %esi
200 ; X86-FAST-NEXT: xorl %edi, %edi
201 ; X86-FAST-NEXT: .LBB5_2:
202 ; X86-FAST-NEXT: movl %ebx, %eax
203 ; X86-FAST-NEXT: movb %ch, %cl
204 ; X86-FAST-NEXT: shll %cl, %eax
205 ; X86-FAST-NEXT: shldl %cl, %ebx, %edx
206 ; X86-FAST-NEXT: testb $32, %ch
207 ; X86-FAST-NEXT: je .LBB5_4
208 ; X86-FAST-NEXT: # %bb.3:
209 ; X86-FAST-NEXT: movl %eax, %edx
210 ; X86-FAST-NEXT: xorl %eax, %eax
211 ; X86-FAST-NEXT: .LBB5_4:
212 ; X86-FAST-NEXT: orl %edi, %edx
213 ; X86-FAST-NEXT: orl %esi, %eax
214 ; X86-FAST-NEXT: popl %esi
215 ; X86-FAST-NEXT: popl %edi
216 ; X86-FAST-NEXT: popl %ebx
217 ; X86-FAST-NEXT: retl
219 ; X86-SLOW-LABEL: var_shift_i64:
221 ; X86-SLOW-NEXT: pushl %ebp
222 ; X86-SLOW-NEXT: pushl %ebx
223 ; X86-SLOW-NEXT: pushl %edi
224 ; X86-SLOW-NEXT: pushl %esi
225 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
226 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
227 ; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
228 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
229 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
230 ; X86-SLOW-NEXT: shrl %eax
231 ; X86-SLOW-NEXT: movl %esi, %edi
232 ; X86-SLOW-NEXT: shll $31, %edi
233 ; X86-SLOW-NEXT: orl %eax, %edi
234 ; X86-SLOW-NEXT: movl %ecx, %eax
235 ; X86-SLOW-NEXT: movb %cl, %ch
236 ; X86-SLOW-NEXT: notb %ch
237 ; X86-SLOW-NEXT: movb %ch, %cl
238 ; X86-SLOW-NEXT: shrl %cl, %edi
239 ; X86-SLOW-NEXT: shrl %esi
240 ; X86-SLOW-NEXT: leal (%esi,%esi), %ebp
241 ; X86-SLOW-NEXT: movb %al, %cl
242 ; X86-SLOW-NEXT: shll %cl, %ebp
243 ; X86-SLOW-NEXT: shll %cl, %ebx
244 ; X86-SLOW-NEXT: movl %edx, %eax
245 ; X86-SLOW-NEXT: shrl %eax
246 ; X86-SLOW-NEXT: movb %ch, %cl
247 ; X86-SLOW-NEXT: shrl %cl, %eax
248 ; X86-SLOW-NEXT: movb {{[0-9]+}}(%esp), %cl
249 ; X86-SLOW-NEXT: shll %cl, %edx
250 ; X86-SLOW-NEXT: testb $32, {{[0-9]+}}(%esp)
251 ; X86-SLOW-NEXT: jne .LBB5_1
252 ; X86-SLOW-NEXT: # %bb.2:
253 ; X86-SLOW-NEXT: orl %eax, %ebx
254 ; X86-SLOW-NEXT: jmp .LBB5_3
255 ; X86-SLOW-NEXT: .LBB5_1:
256 ; X86-SLOW-NEXT: movl %edx, %ebx
257 ; X86-SLOW-NEXT: xorl %edx, %edx
258 ; X86-SLOW-NEXT: .LBB5_3:
259 ; X86-SLOW-NEXT: movb %ch, %cl
260 ; X86-SLOW-NEXT: shrl %cl, %esi
261 ; X86-SLOW-NEXT: testb $32, %ch
262 ; X86-SLOW-NEXT: jne .LBB5_4
263 ; X86-SLOW-NEXT: # %bb.5:
264 ; X86-SLOW-NEXT: orl %edi, %ebp
265 ; X86-SLOW-NEXT: jmp .LBB5_6
266 ; X86-SLOW-NEXT: .LBB5_4:
267 ; X86-SLOW-NEXT: movl %esi, %ebp
268 ; X86-SLOW-NEXT: xorl %esi, %esi
269 ; X86-SLOW-NEXT: .LBB5_6:
270 ; X86-SLOW-NEXT: orl %ebp, %edx
271 ; X86-SLOW-NEXT: orl %esi, %ebx
272 ; X86-SLOW-NEXT: movl %edx, %eax
273 ; X86-SLOW-NEXT: movl %ebx, %edx
274 ; X86-SLOW-NEXT: popl %esi
275 ; X86-SLOW-NEXT: popl %edi
276 ; X86-SLOW-NEXT: popl %ebx
277 ; X86-SLOW-NEXT: popl %ebp
278 ; X86-SLOW-NEXT: retl
280 ; X64-FAST-LABEL: var_shift_i64:
282 ; X64-FAST-NEXT: movq %rdx, %rcx
283 ; X64-FAST-NEXT: movq %rdi, %rax
284 ; X64-FAST-NEXT: # kill: def $cl killed $cl killed $rcx
285 ; X64-FAST-NEXT: shldq %cl, %rsi, %rax
286 ; X64-FAST-NEXT: retq
288 ; X64-SLOW-LABEL: var_shift_i64:
290 ; X64-SLOW-NEXT: movq %rdx, %rcx
291 ; X64-SLOW-NEXT: movq %rsi, %rax
292 ; X64-SLOW-NEXT: shlq %cl, %rdi
293 ; X64-SLOW-NEXT: shrq %rax
294 ; X64-SLOW-NEXT: notb %cl
295 ; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $rcx
296 ; X64-SLOW-NEXT: shrq %cl, %rax
297 ; X64-SLOW-NEXT: orq %rdi, %rax
298 ; X64-SLOW-NEXT: retq
299 %tmp = tail call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
303 define i128 @var_shift_i128(i128 %x, i128 %y, i128 %z) nounwind {
304 ; X86-FAST-LABEL: var_shift_i128:
306 ; X86-FAST-NEXT: pushl %ebp
307 ; X86-FAST-NEXT: pushl %ebx
308 ; X86-FAST-NEXT: pushl %edi
309 ; X86-FAST-NEXT: pushl %esi
310 ; X86-FAST-NEXT: subl $72, %esp
311 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebx
312 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
313 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
314 ; X86-FAST-NEXT: movl %edx, %edi
315 ; X86-FAST-NEXT: shldl $31, %eax, %edi
316 ; X86-FAST-NEXT: movl %ebx, %eax
317 ; X86-FAST-NEXT: notl %ebx
318 ; X86-FAST-NEXT: andl $127, %ebx
319 ; X86-FAST-NEXT: movb $64, %cl
320 ; X86-FAST-NEXT: subb %bl, %cl
321 ; X86-FAST-NEXT: shrl %edx
322 ; X86-FAST-NEXT: movl %edx, %ebp
323 ; X86-FAST-NEXT: shldl %cl, %edi, %edx
324 ; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
325 ; X86-FAST-NEXT: movl %edi, %edx
326 ; X86-FAST-NEXT: shll %cl, %edx
327 ; X86-FAST-NEXT: testb $32, %cl
328 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
329 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
330 ; X86-FAST-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
331 ; X86-FAST-NEXT: jne .LBB6_1
332 ; X86-FAST-NEXT: # %bb.2:
333 ; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
334 ; X86-FAST-NEXT: jmp .LBB6_3
335 ; X86-FAST-NEXT: .LBB6_1:
336 ; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
337 ; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
338 ; X86-FAST-NEXT: .LBB6_3:
339 ; X86-FAST-NEXT: andl $127, %eax
340 ; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
341 ; X86-FAST-NEXT: movb %al, %ch
342 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
343 ; X86-FAST-NEXT: movb %ch, %cl
344 ; X86-FAST-NEXT: shldl %cl, %esi, %eax
345 ; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
346 ; X86-FAST-NEXT: movb %bl, %cl
347 ; X86-FAST-NEXT: addb $-64, %cl
348 ; X86-FAST-NEXT: movl %edi, %eax
349 ; X86-FAST-NEXT: movl %ebp, %edx
350 ; X86-FAST-NEXT: shrdl %cl, %ebp, %eax
351 ; X86-FAST-NEXT: shrl %cl, %ebp
352 ; X86-FAST-NEXT: testb $32, %cl
353 ; X86-FAST-NEXT: jne .LBB6_4
354 ; X86-FAST-NEXT: # %bb.5:
355 ; X86-FAST-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
356 ; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
357 ; X86-FAST-NEXT: jmp .LBB6_6
358 ; X86-FAST-NEXT: .LBB6_4:
359 ; X86-FAST-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
360 ; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
361 ; X86-FAST-NEXT: .LBB6_6:
362 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
363 ; X86-FAST-NEXT: movb %ch, %cl
364 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
365 ; X86-FAST-NEXT: shldl %cl, %eax, %ebp
366 ; X86-FAST-NEXT: shll %cl, %eax
367 ; X86-FAST-NEXT: shll %cl, %esi
368 ; X86-FAST-NEXT: testb $32, %ch
369 ; X86-FAST-NEXT: jne .LBB6_7
370 ; X86-FAST-NEXT: # %bb.8:
371 ; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
372 ; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
373 ; X86-FAST-NEXT: jmp .LBB6_9
374 ; X86-FAST-NEXT: .LBB6_7:
375 ; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
376 ; X86-FAST-NEXT: movl %eax, %ebp
377 ; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
378 ; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
379 ; X86-FAST-NEXT: .LBB6_9:
380 ; X86-FAST-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
381 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
382 ; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
383 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
384 ; X86-FAST-NEXT: jb .LBB6_11
385 ; X86-FAST-NEXT: # %bb.10:
386 ; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
387 ; X86-FAST-NEXT: .LBB6_11:
388 ; X86-FAST-NEXT: movb %bl, %cl
389 ; X86-FAST-NEXT: shrdl %cl, %edx, %edi
390 ; X86-FAST-NEXT: shrl %cl, %edx
391 ; X86-FAST-NEXT: shldl $31, %eax, %esi
392 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
393 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
394 ; X86-FAST-NEXT: shrdl $1, %ebp, %eax
395 ; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
396 ; X86-FAST-NEXT: shrdl %cl, %esi, %eax
397 ; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
398 ; X86-FAST-NEXT: movl %esi, %eax
399 ; X86-FAST-NEXT: shrl %cl, %eax
400 ; X86-FAST-NEXT: testb $32, %bl
401 ; X86-FAST-NEXT: je .LBB6_13
402 ; X86-FAST-NEXT: # %bb.12:
403 ; X86-FAST-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
404 ; X86-FAST-NEXT: movl %edx, %edi
405 ; X86-FAST-NEXT: xorl %eax, %eax
406 ; X86-FAST-NEXT: xorl %edx, %edx
407 ; X86-FAST-NEXT: .LBB6_13:
408 ; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
409 ; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
410 ; X86-FAST-NEXT: jb .LBB6_15
411 ; X86-FAST-NEXT: # %bb.14:
412 ; X86-FAST-NEXT: xorl %ebp, %ebp
413 ; X86-FAST-NEXT: .LBB6_15:
414 ; X86-FAST-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
415 ; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
416 ; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
417 ; X86-FAST-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
418 ; X86-FAST-NEXT: movb $64, %cl
419 ; X86-FAST-NEXT: subb %ch, %cl
420 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ebp
421 ; X86-FAST-NEXT: shrl %cl, %ebp
422 ; X86-FAST-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
423 ; X86-FAST-NEXT: testb $32, %cl
424 ; X86-FAST-NEXT: movl $0, %edx
425 ; X86-FAST-NEXT: jne .LBB6_17
426 ; X86-FAST-NEXT: # %bb.16:
427 ; X86-FAST-NEXT: movl %ebp, %edx
428 ; X86-FAST-NEXT: .LBB6_17:
429 ; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
430 ; X86-FAST-NEXT: addb $-64, %ch
431 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edi
432 ; X86-FAST-NEXT: movl %edi, %esi
433 ; X86-FAST-NEXT: movb %ch, %cl
434 ; X86-FAST-NEXT: shll %cl, %esi
435 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
436 ; X86-FAST-NEXT: shldl %cl, %edi, %edx
437 ; X86-FAST-NEXT: testb $32, %ch
438 ; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
439 ; X86-FAST-NEXT: jne .LBB6_19
440 ; X86-FAST-NEXT: # %bb.18:
441 ; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
442 ; X86-FAST-NEXT: .LBB6_19:
443 ; X86-FAST-NEXT: cmpl $64, %ebx
444 ; X86-FAST-NEXT: jb .LBB6_21
445 ; X86-FAST-NEXT: # %bb.20:
446 ; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
447 ; X86-FAST-NEXT: .LBB6_21:
448 ; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
449 ; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
450 ; X86-FAST-NEXT: jae .LBB6_23
451 ; X86-FAST-NEXT: # %bb.22:
452 ; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
453 ; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
454 ; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
455 ; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
456 ; X86-FAST-NEXT: .LBB6_23:
457 ; X86-FAST-NEXT: testb $32, %ch
458 ; X86-FAST-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
459 ; X86-FAST-NEXT: jne .LBB6_25
460 ; X86-FAST-NEXT: # %bb.24:
461 ; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
462 ; X86-FAST-NEXT: .LBB6_25:
463 ; X86-FAST-NEXT: cmpl $64, %ebx
464 ; X86-FAST-NEXT: jb .LBB6_27
465 ; X86-FAST-NEXT: # %bb.26:
466 ; X86-FAST-NEXT: xorl %edx, %edx
467 ; X86-FAST-NEXT: .LBB6_27:
468 ; X86-FAST-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
469 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
470 ; X86-FAST-NEXT: shrdl %cl, %esi, %edi
471 ; X86-FAST-NEXT: testb $32, %cl
472 ; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
473 ; X86-FAST-NEXT: jne .LBB6_29
474 ; X86-FAST-NEXT: # %bb.28:
475 ; X86-FAST-NEXT: movl %edi, %ebp
476 ; X86-FAST-NEXT: .LBB6_29:
477 ; X86-FAST-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
478 ; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
479 ; X86-FAST-NEXT: jae .LBB6_31
480 ; X86-FAST-NEXT: # %bb.30:
481 ; X86-FAST-NEXT: orl %ebp, %esi
482 ; X86-FAST-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
483 ; X86-FAST-NEXT: .LBB6_31:
484 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
485 ; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
486 ; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
487 ; X86-FAST-NEXT: cmpl $64, %ebx
488 ; X86-FAST-NEXT: jae .LBB6_33
489 ; X86-FAST-NEXT: # %bb.32:
490 ; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
491 ; X86-FAST-NEXT: movl %eax, %ebp
492 ; X86-FAST-NEXT: .LBB6_33:
493 ; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
494 ; X86-FAST-NEXT: cmpl $64, %ebx
495 ; X86-FAST-NEXT: jae .LBB6_35
496 ; X86-FAST-NEXT: # %bb.34:
497 ; X86-FAST-NEXT: movl %edx, %ecx
498 ; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
499 ; X86-FAST-NEXT: orl %eax, %edx
500 ; X86-FAST-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
501 ; X86-FAST-NEXT: movl %ecx, %edx
502 ; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
503 ; X86-FAST-NEXT: .LBB6_35:
504 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
505 ; X86-FAST-NEXT: testl %ebx, %ebx
506 ; X86-FAST-NEXT: je .LBB6_37
507 ; X86-FAST-NEXT: # %bb.36:
508 ; X86-FAST-NEXT: movl %ebp, %ecx
509 ; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
510 ; X86-FAST-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
511 ; X86-FAST-NEXT: .LBB6_37:
512 ; X86-FAST-NEXT: orl %ecx, %edi
513 ; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
514 ; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
515 ; X86-FAST-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
516 ; X86-FAST-NEXT: je .LBB6_39
517 ; X86-FAST-NEXT: # %bb.38:
518 ; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
519 ; X86-FAST-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
520 ; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
521 ; X86-FAST-NEXT: .LBB6_39:
522 ; X86-FAST-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
523 ; X86-FAST-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
524 ; X86-FAST-NEXT: orl %edx, %esi
525 ; X86-FAST-NEXT: movl %ecx, 12(%eax)
526 ; X86-FAST-NEXT: movl %esi, 8(%eax)
527 ; X86-FAST-NEXT: movl %edi, 4(%eax)
528 ; X86-FAST-NEXT: movl %ebx, (%eax)
529 ; X86-FAST-NEXT: addl $72, %esp
530 ; X86-FAST-NEXT: popl %esi
531 ; X86-FAST-NEXT: popl %edi
532 ; X86-FAST-NEXT: popl %ebx
533 ; X86-FAST-NEXT: popl %ebp
534 ; X86-FAST-NEXT: retl $4
536 ; X86-SLOW-LABEL: var_shift_i128:
538 ; X86-SLOW-NEXT: pushl %ebp
539 ; X86-SLOW-NEXT: pushl %ebx
540 ; X86-SLOW-NEXT: pushl %edi
541 ; X86-SLOW-NEXT: pushl %esi
542 ; X86-SLOW-NEXT: subl $76, %esp
543 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
544 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
545 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
546 ; X86-SLOW-NEXT: andl $127, %eax
547 ; X86-SLOW-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
548 ; X86-SLOW-NEXT: # kill: def $al killed $al killed $eax
549 ; X86-SLOW-NEXT: movl %eax, %ecx
550 ; X86-SLOW-NEXT: shll %cl, %edx
551 ; X86-SLOW-NEXT: movl %ebx, %esi
552 ; X86-SLOW-NEXT: shrl %esi
553 ; X86-SLOW-NEXT: movb %al, %ah
554 ; X86-SLOW-NEXT: notb %ah
555 ; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
556 ; X86-SLOW-NEXT: movb %ah, %cl
557 ; X86-SLOW-NEXT: shrl %cl, %esi
558 ; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill
559 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
560 ; X86-SLOW-NEXT: movl %eax, %ecx
561 ; X86-SLOW-NEXT: shll %cl, %ebp
562 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
563 ; X86-SLOW-NEXT: shrl %edi
564 ; X86-SLOW-NEXT: movb %ah, %cl
565 ; X86-SLOW-NEXT: shrl %cl, %edi
566 ; X86-SLOW-NEXT: movl %ebx, %esi
567 ; X86-SLOW-NEXT: movl %eax, %ecx
568 ; X86-SLOW-NEXT: shll %cl, %esi
569 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
570 ; X86-SLOW-NEXT: shll %cl, %ebx
571 ; X86-SLOW-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
572 ; X86-SLOW-NEXT: testb $32, %al
573 ; X86-SLOW-NEXT: jne .LBB6_1
574 ; X86-SLOW-NEXT: # %bb.2:
575 ; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
576 ; X86-SLOW-NEXT: orl (%esp), %edx # 4-byte Folded Reload
577 ; X86-SLOW-NEXT: movl %edx, (%esp) # 4-byte Spill
578 ; X86-SLOW-NEXT: orl %edi, %ebp
579 ; X86-SLOW-NEXT: jmp .LBB6_3
580 ; X86-SLOW-NEXT: .LBB6_1:
581 ; X86-SLOW-NEXT: movl %ebx, %ebp
582 ; X86-SLOW-NEXT: movl %esi, (%esp) # 4-byte Spill
583 ; X86-SLOW-NEXT: xorl %ebx, %ebx
584 ; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
585 ; X86-SLOW-NEXT: .LBB6_3:
586 ; X86-SLOW-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
587 ; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
588 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
589 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
590 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
591 ; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
592 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebx
593 ; X86-SLOW-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Reload
594 ; X86-SLOW-NEXT: jb .LBB6_5
595 ; X86-SLOW-NEXT: # %bb.4:
596 ; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
597 ; X86-SLOW-NEXT: .LBB6_5:
598 ; X86-SLOW-NEXT: shrl %edi
599 ; X86-SLOW-NEXT: notl %ebx
600 ; X86-SLOW-NEXT: andl $127, %ebx
601 ; X86-SLOW-NEXT: movl %edi, %ebp
602 ; X86-SLOW-NEXT: movl %ebx, %ecx
603 ; X86-SLOW-NEXT: shrl %cl, %ebp
604 ; X86-SLOW-NEXT: movl %esi, %ecx
605 ; X86-SLOW-NEXT: shrl %ecx
606 ; X86-SLOW-NEXT: movl %eax, %esi
607 ; X86-SLOW-NEXT: shll $31, %esi
608 ; X86-SLOW-NEXT: orl %ecx, %esi
609 ; X86-SLOW-NEXT: movl %esi, %ecx
610 ; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
611 ; X86-SLOW-NEXT: movl %ebx, %ecx
612 ; X86-SLOW-NEXT: shrl %cl, %esi
613 ; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
614 ; X86-SLOW-NEXT: testb $32, %bl
615 ; X86-SLOW-NEXT: movl $0, %esi
616 ; X86-SLOW-NEXT: movl $0, %ecx
617 ; X86-SLOW-NEXT: jne .LBB6_7
618 ; X86-SLOW-NEXT: # %bb.6:
619 ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
620 ; X86-SLOW-NEXT: movl %ebp, %ecx
621 ; X86-SLOW-NEXT: .LBB6_7:
622 ; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
623 ; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
624 ; X86-SLOW-NEXT: shrl %eax
625 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
626 ; X86-SLOW-NEXT: shll $31, %esi
627 ; X86-SLOW-NEXT: orl %eax, %esi
628 ; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
629 ; X86-SLOW-NEXT: movl %ebx, %ecx
630 ; X86-SLOW-NEXT: shrl %cl, %esi
631 ; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
632 ; X86-SLOW-NEXT: addl %edi, %edi
633 ; X86-SLOW-NEXT: notb %cl
634 ; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
635 ; X86-SLOW-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
636 ; X86-SLOW-NEXT: shll %cl, %edi
637 ; X86-SLOW-NEXT: testb $32, %bl
638 ; X86-SLOW-NEXT: jne .LBB6_9
639 ; X86-SLOW-NEXT: # %bb.8:
640 ; X86-SLOW-NEXT: orl %esi, %edi
641 ; X86-SLOW-NEXT: movl %edi, %ebp
642 ; X86-SLOW-NEXT: .LBB6_9:
643 ; X86-SLOW-NEXT: movb %bl, %dh
644 ; X86-SLOW-NEXT: addb $-64, %dh
645 ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
646 ; X86-SLOW-NEXT: movb %dh, %cl
647 ; X86-SLOW-NEXT: shrl %cl, %esi
648 ; X86-SLOW-NEXT: testb $32, %dh
649 ; X86-SLOW-NEXT: movl $0, %ecx
650 ; X86-SLOW-NEXT: movl (%esp), %eax # 4-byte Reload
651 ; X86-SLOW-NEXT: jne .LBB6_11
652 ; X86-SLOW-NEXT: # %bb.10:
653 ; X86-SLOW-NEXT: movl %esi, %ecx
654 ; X86-SLOW-NEXT: .LBB6_11:
655 ; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
656 ; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
657 ; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
658 ; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
659 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
660 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ebp
661 ; X86-SLOW-NEXT: jb .LBB6_13
662 ; X86-SLOW-NEXT: # %bb.12:
663 ; X86-SLOW-NEXT: xorl %eax, %eax
664 ; X86-SLOW-NEXT: .LBB6_13:
665 ; X86-SLOW-NEXT: movl %eax, (%esp) # 4-byte Spill
666 ; X86-SLOW-NEXT: movb $64, %ch
667 ; X86-SLOW-NEXT: movb $64, %ah
668 ; X86-SLOW-NEXT: subb %dl, %ah
669 ; X86-SLOW-NEXT: movb %ah, %cl
670 ; X86-SLOW-NEXT: shrl %cl, %esi
671 ; X86-SLOW-NEXT: notb %cl
672 ; X86-SLOW-NEXT: leal (%ebp,%ebp), %edi
673 ; X86-SLOW-NEXT: shll %cl, %edi
674 ; X86-SLOW-NEXT: movb %ah, %cl
675 ; X86-SLOW-NEXT: shrl %cl, %ebp
676 ; X86-SLOW-NEXT: testb $32, %ah
677 ; X86-SLOW-NEXT: jne .LBB6_14
678 ; X86-SLOW-NEXT: # %bb.15:
679 ; X86-SLOW-NEXT: orl %esi, %edi
680 ; X86-SLOW-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
681 ; X86-SLOW-NEXT: movl %edi, %ebp
682 ; X86-SLOW-NEXT: jmp .LBB6_16
683 ; X86-SLOW-NEXT: .LBB6_14:
684 ; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
685 ; X86-SLOW-NEXT: .LBB6_16:
686 ; X86-SLOW-NEXT: addb $-64, %dl
687 ; X86-SLOW-NEXT: movb %dl, %cl
688 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
689 ; X86-SLOW-NEXT: shll %cl, %edi
690 ; X86-SLOW-NEXT: notb %cl
691 ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
692 ; X86-SLOW-NEXT: shrl %cl, %eax
693 ; X86-SLOW-NEXT: movb %dl, %cl
694 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
695 ; X86-SLOW-NEXT: shll %cl, %esi
696 ; X86-SLOW-NEXT: testb $32, %dl
697 ; X86-SLOW-NEXT: jne .LBB6_17
698 ; X86-SLOW-NEXT: # %bb.18:
699 ; X86-SLOW-NEXT: orl %eax, %edi
700 ; X86-SLOW-NEXT: cmpl $64, %ebx
701 ; X86-SLOW-NEXT: jae .LBB6_20
702 ; X86-SLOW-NEXT: jmp .LBB6_21
703 ; X86-SLOW-NEXT: .LBB6_17:
704 ; X86-SLOW-NEXT: movl %esi, %edi
705 ; X86-SLOW-NEXT: xorl %esi, %esi
706 ; X86-SLOW-NEXT: cmpl $64, %ebx
707 ; X86-SLOW-NEXT: jb .LBB6_21
708 ; X86-SLOW-NEXT: .LBB6_20:
709 ; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
710 ; X86-SLOW-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
711 ; X86-SLOW-NEXT: .LBB6_21:
712 ; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
713 ; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
714 ; X86-SLOW-NEXT: jae .LBB6_23
715 ; X86-SLOW-NEXT: # %bb.22:
716 ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
717 ; X86-SLOW-NEXT: orl %ebp, %esi
718 ; X86-SLOW-NEXT: .LBB6_23:
719 ; X86-SLOW-NEXT: movl (%esp), %ebp # 4-byte Reload
720 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
721 ; X86-SLOW-NEXT: cmpl $64, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
722 ; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
723 ; X86-SLOW-NEXT: jae .LBB6_25
724 ; X86-SLOW-NEXT: # %bb.24:
725 ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
726 ; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
727 ; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
728 ; X86-SLOW-NEXT: .LBB6_25:
729 ; X86-SLOW-NEXT: shrl %edi
730 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
731 ; X86-SLOW-NEXT: shll $31, %esi
732 ; X86-SLOW-NEXT: orl %edi, %esi
733 ; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
734 ; X86-SLOW-NEXT: movb %bl, %cl
735 ; X86-SLOW-NEXT: shrl %cl, %esi
736 ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
737 ; X86-SLOW-NEXT: addl %edi, %edi
738 ; X86-SLOW-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload
739 ; X86-SLOW-NEXT: shll %cl, %edi
740 ; X86-SLOW-NEXT: testb $32, %bl
741 ; X86-SLOW-NEXT: jne .LBB6_27
742 ; X86-SLOW-NEXT: # %bb.26:
743 ; X86-SLOW-NEXT: orl %esi, %edi
744 ; X86-SLOW-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
745 ; X86-SLOW-NEXT: .LBB6_27:
746 ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
747 ; X86-SLOW-NEXT: movl %edi, %eax
748 ; X86-SLOW-NEXT: movb %dh, %cl
749 ; X86-SLOW-NEXT: shrl %cl, %eax
750 ; X86-SLOW-NEXT: notb %cl
751 ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
752 ; X86-SLOW-NEXT: shll %cl, %esi
753 ; X86-SLOW-NEXT: testb $32, %dh
754 ; X86-SLOW-NEXT: jne .LBB6_29
755 ; X86-SLOW-NEXT: # %bb.28:
756 ; X86-SLOW-NEXT: orl %eax, %esi
757 ; X86-SLOW-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
758 ; X86-SLOW-NEXT: .LBB6_29:
759 ; X86-SLOW-NEXT: subb %bl, %ch
760 ; X86-SLOW-NEXT: movl %edi, %eax
761 ; X86-SLOW-NEXT: movb %ch, %cl
762 ; X86-SLOW-NEXT: shll %cl, %eax
763 ; X86-SLOW-NEXT: shrl %edi
764 ; X86-SLOW-NEXT: notb %cl
765 ; X86-SLOW-NEXT: shrl %cl, %edi
766 ; X86-SLOW-NEXT: movb %ch, %cl
767 ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
768 ; X86-SLOW-NEXT: shll %cl, %edx
769 ; X86-SLOW-NEXT: testb $32, %ch
770 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
771 ; X86-SLOW-NEXT: movl %edi, %ecx
772 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edi
773 ; X86-SLOW-NEXT: jne .LBB6_30
774 ; X86-SLOW-NEXT: # %bb.31:
775 ; X86-SLOW-NEXT: orl %ecx, %edx
776 ; X86-SLOW-NEXT: movl %edx, %ecx
777 ; X86-SLOW-NEXT: cmpl $64, %ebx
778 ; X86-SLOW-NEXT: jb .LBB6_33
779 ; X86-SLOW-NEXT: jmp .LBB6_34
780 ; X86-SLOW-NEXT: .LBB6_30:
781 ; X86-SLOW-NEXT: movl %eax, %ecx
782 ; X86-SLOW-NEXT: xorl %eax, %eax
783 ; X86-SLOW-NEXT: cmpl $64, %ebx
784 ; X86-SLOW-NEXT: jae .LBB6_34
785 ; X86-SLOW-NEXT: .LBB6_33:
786 ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
787 ; X86-SLOW-NEXT: orl %eax, %edx
788 ; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
789 ; X86-SLOW-NEXT: .LBB6_34:
790 ; X86-SLOW-NEXT: cmpl $64, %ebx
791 ; X86-SLOW-NEXT: jb .LBB6_35
792 ; X86-SLOW-NEXT: # %bb.36:
793 ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
794 ; X86-SLOW-NEXT: jmp .LBB6_37
795 ; X86-SLOW-NEXT: .LBB6_35:
796 ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
797 ; X86-SLOW-NEXT: orl %ecx, %eax
798 ; X86-SLOW-NEXT: movl %eax, %ecx
799 ; X86-SLOW-NEXT: .LBB6_37:
800 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
801 ; X86-SLOW-NEXT: testl %ebx, %ebx
802 ; X86-SLOW-NEXT: je .LBB6_39
803 ; X86-SLOW-NEXT: # %bb.38:
804 ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
805 ; X86-SLOW-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
806 ; X86-SLOW-NEXT: movl %ecx, %ebx
807 ; X86-SLOW-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
808 ; X86-SLOW-NEXT: .LBB6_39:
809 ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
810 ; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
811 ; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
812 ; X86-SLOW-NEXT: cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
813 ; X86-SLOW-NEXT: je .LBB6_41
814 ; X86-SLOW-NEXT: # %bb.40:
815 ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
816 ; X86-SLOW-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
817 ; X86-SLOW-NEXT: .LBB6_41:
818 ; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
819 ; X86-SLOW-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
820 ; X86-SLOW-NEXT: movl %esi, 12(%eax)
821 ; X86-SLOW-NEXT: movl %edi, 8(%eax)
822 ; X86-SLOW-NEXT: movl %ebx, (%eax)
823 ; X86-SLOW-NEXT: movl %ebp, 4(%eax)
824 ; X86-SLOW-NEXT: addl $76, %esp
825 ; X86-SLOW-NEXT: popl %esi
826 ; X86-SLOW-NEXT: popl %edi
827 ; X86-SLOW-NEXT: popl %ebx
828 ; X86-SLOW-NEXT: popl %ebp
829 ; X86-SLOW-NEXT: retl $4
831 ; X64-FAST-LABEL: var_shift_i128:
833 ; X64-FAST-NEXT: movq %r8, %r9
834 ; X64-FAST-NEXT: movq %rcx, %r10
835 ; X64-FAST-NEXT: movq %rdx, %r8
836 ; X64-FAST-NEXT: movq %rsi, %rdx
837 ; X64-FAST-NEXT: movl %r9d, %ecx
838 ; X64-FAST-NEXT: shldq %cl, %rdi, %rdx
839 ; X64-FAST-NEXT: shrdq $1, %r10, %r8
840 ; X64-FAST-NEXT: shrq %r10
841 ; X64-FAST-NEXT: notb %cl
842 ; X64-FAST-NEXT: shrdq %cl, %r10, %r8
843 ; X64-FAST-NEXT: shrq %cl, %r10
844 ; X64-FAST-NEXT: xorl %eax, %eax
845 ; X64-FAST-NEXT: testb $64, %cl
846 ; X64-FAST-NEXT: cmovneq %r10, %r8
847 ; X64-FAST-NEXT: cmovneq %rax, %r10
848 ; X64-FAST-NEXT: movl %r9d, %ecx
849 ; X64-FAST-NEXT: shlq %cl, %rdi
850 ; X64-FAST-NEXT: testb $64, %r9b
851 ; X64-FAST-NEXT: cmovneq %rdi, %rdx
852 ; X64-FAST-NEXT: cmoveq %rdi, %rax
853 ; X64-FAST-NEXT: orq %r8, %rax
854 ; X64-FAST-NEXT: orq %r10, %rdx
855 ; X64-FAST-NEXT: retq
857 ; X64-SLOW-LABEL: var_shift_i128:
859 ; X64-SLOW-NEXT: movq %rcx, %r11
860 ; X64-SLOW-NEXT: movq %rdx, %r9
861 ; X64-SLOW-NEXT: movl %r8d, %ecx
862 ; X64-SLOW-NEXT: shlq %cl, %rsi
863 ; X64-SLOW-NEXT: movq %rdi, %rdx
864 ; X64-SLOW-NEXT: shrq %rdx
865 ; X64-SLOW-NEXT: movl %r8d, %r10d
866 ; X64-SLOW-NEXT: notb %r10b
867 ; X64-SLOW-NEXT: movl %r10d, %ecx
868 ; X64-SLOW-NEXT: shrq %cl, %rdx
869 ; X64-SLOW-NEXT: orq %rsi, %rdx
870 ; X64-SLOW-NEXT: shrq %r9
871 ; X64-SLOW-NEXT: movq %r11, %rax
872 ; X64-SLOW-NEXT: shlq $63, %rax
873 ; X64-SLOW-NEXT: orq %r9, %rax
874 ; X64-SLOW-NEXT: shrq %cl, %rax
875 ; X64-SLOW-NEXT: shrq %r11
876 ; X64-SLOW-NEXT: leaq (%r11,%r11), %rsi
877 ; X64-SLOW-NEXT: movl %r8d, %ecx
878 ; X64-SLOW-NEXT: shlq %cl, %rsi
879 ; X64-SLOW-NEXT: orq %rax, %rsi
880 ; X64-SLOW-NEXT: movl %r10d, %ecx
881 ; X64-SLOW-NEXT: shrq %cl, %r11
882 ; X64-SLOW-NEXT: xorl %eax, %eax
883 ; X64-SLOW-NEXT: testb $64, %r10b
884 ; X64-SLOW-NEXT: cmovneq %r11, %rsi
885 ; X64-SLOW-NEXT: cmovneq %rax, %r11
886 ; X64-SLOW-NEXT: movl %r8d, %ecx
887 ; X64-SLOW-NEXT: shlq %cl, %rdi
888 ; X64-SLOW-NEXT: testb $64, %r8b
889 ; X64-SLOW-NEXT: cmovneq %rdi, %rdx
890 ; X64-SLOW-NEXT: cmoveq %rdi, %rax
891 ; X64-SLOW-NEXT: orq %rsi, %rax
892 ; X64-SLOW-NEXT: orq %r11, %rdx
893 ; X64-SLOW-NEXT: retq
894 %tmp = tail call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
902 define i8 @const_shift_i8(i8 %x, i8 %y) nounwind {
903 ; X86-LABEL: const_shift_i8:
905 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al
906 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
908 ; X86-NEXT: shlb $7, %al
909 ; X86-NEXT: orb %cl, %al
912 ; X64-LABEL: const_shift_i8:
914 ; X64-NEXT: # kill: def $esi killed $esi def $rsi
915 ; X64-NEXT: # kill: def $edi killed $edi def $rdi
916 ; X64-NEXT: shrb %sil
917 ; X64-NEXT: shlb $7, %dil
918 ; X64-NEXT: leal (%rdi,%rsi), %eax
919 ; X64-NEXT: # kill: def $al killed $al killed $eax
921 %tmp = tail call i8 @llvm.fshl.i8(i8 %x, i8 %y, i8 7)
925 define i16 @const_shift_i16(i16 %x, i16 %y) nounwind {
926 ; X86-FAST-LABEL: const_shift_i16:
928 ; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
929 ; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %eax
930 ; X86-FAST-NEXT: shldw $7, %cx, %ax
931 ; X86-FAST-NEXT: retl
933 ; X86-SLOW-LABEL: const_shift_i16:
935 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
936 ; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
937 ; X86-SLOW-NEXT: shrl $9, %ecx
938 ; X86-SLOW-NEXT: shll $7, %eax
939 ; X86-SLOW-NEXT: orl %ecx, %eax
940 ; X86-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
941 ; X86-SLOW-NEXT: retl
943 ; X64-FAST-LABEL: const_shift_i16:
945 ; X64-FAST-NEXT: movl %edi, %eax
946 ; X64-FAST-NEXT: shldw $7, %si, %ax
947 ; X64-FAST-NEXT: # kill: def $ax killed $ax killed $eax
948 ; X64-FAST-NEXT: retq
950 ; X64-SLOW-LABEL: const_shift_i16:
952 ; X64-SLOW-NEXT: movzwl %si, %eax
953 ; X64-SLOW-NEXT: shll $7, %edi
954 ; X64-SLOW-NEXT: shrl $9, %eax
955 ; X64-SLOW-NEXT: orl %edi, %eax
956 ; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
957 ; X64-SLOW-NEXT: retq
958 %tmp = tail call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 7)
962 define i32 @const_shift_i32(i32 %x, i32 %y) nounwind {
963 ; X86-FAST-LABEL: const_shift_i32:
965 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
966 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
967 ; X86-FAST-NEXT: shldl $7, %ecx, %eax
968 ; X86-FAST-NEXT: retl
970 ; X86-SLOW-LABEL: const_shift_i32:
972 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
973 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
974 ; X86-SLOW-NEXT: shrl $25, %ecx
975 ; X86-SLOW-NEXT: shll $7, %eax
976 ; X86-SLOW-NEXT: orl %ecx, %eax
977 ; X86-SLOW-NEXT: retl
979 ; X64-FAST-LABEL: const_shift_i32:
981 ; X64-FAST-NEXT: movl %edi, %eax
982 ; X64-FAST-NEXT: shldl $7, %esi, %eax
983 ; X64-FAST-NEXT: retq
985 ; X64-SLOW-LABEL: const_shift_i32:
987 ; X64-SLOW-NEXT: # kill: def $esi killed $esi def $rsi
988 ; X64-SLOW-NEXT: # kill: def $edi killed $edi def $rdi
989 ; X64-SLOW-NEXT: shrl $25, %esi
990 ; X64-SLOW-NEXT: shll $7, %edi
991 ; X64-SLOW-NEXT: leal (%rdi,%rsi), %eax
992 ; X64-SLOW-NEXT: retq
993 %tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
997 define i64 @const_shift_i64(i64 %x, i64 %y) nounwind {
998 ; X86-FAST-LABEL: const_shift_i64:
1000 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
1001 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
1002 ; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
1003 ; X86-FAST-NEXT: shrdl $25, %ecx, %eax
1004 ; X86-FAST-NEXT: shldl $7, %ecx, %edx
1005 ; X86-FAST-NEXT: retl
1007 ; X86-SLOW-LABEL: const_shift_i64:
1008 ; X86-SLOW: # %bb.0:
1009 ; X86-SLOW-NEXT: pushl %esi
1010 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
1011 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
1012 ; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
1013 ; X86-SLOW-NEXT: shrl $25, %esi
1014 ; X86-SLOW-NEXT: movl %ecx, %eax
1015 ; X86-SLOW-NEXT: shll $7, %eax
1016 ; X86-SLOW-NEXT: orl %esi, %eax
1017 ; X86-SLOW-NEXT: shrl $25, %ecx
1018 ; X86-SLOW-NEXT: shll $7, %edx
1019 ; X86-SLOW-NEXT: orl %ecx, %edx
1020 ; X86-SLOW-NEXT: popl %esi
1021 ; X86-SLOW-NEXT: retl
1023 ; X64-FAST-LABEL: const_shift_i64:
1024 ; X64-FAST: # %bb.0:
1025 ; X64-FAST-NEXT: movq %rdi, %rax
1026 ; X64-FAST-NEXT: shldq $7, %rsi, %rax
1027 ; X64-FAST-NEXT: retq
1029 ; X64-SLOW-LABEL: const_shift_i64:
1030 ; X64-SLOW: # %bb.0:
1031 ; X64-SLOW-NEXT: shrq $57, %rsi
1032 ; X64-SLOW-NEXT: shlq $7, %rdi
1033 ; X64-SLOW-NEXT: leaq (%rdi,%rsi), %rax
1034 ; X64-SLOW-NEXT: retq
1035 %tmp = tail call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 7)
1040 ; Combine Consecutive Loads
1043 define i8 @combine_fshl_load_i8(i8* %p) nounwind {
1044 ; X86-LABEL: combine_fshl_load_i8:
1046 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1047 ; X86-NEXT: movb 1(%eax), %al
1050 ; X64-LABEL: combine_fshl_load_i8:
1052 ; X64-NEXT: movb 1(%rdi), %al
1054 %p1 = getelementptr i8, i8* %p, i32 1
1055 %ld0 = load i8, i8 *%p
1056 %ld1 = load i8, i8 *%p1
1057 %res = call i8 @llvm.fshl.i8(i8 %ld1, i8 %ld0, i8 8)
1061 define i16 @combine_fshl_load_i16(i16* %p) nounwind {
1062 ; X86-LABEL: combine_fshl_load_i16:
1064 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1065 ; X86-NEXT: movzwl 1(%eax), %eax
1068 ; X64-LABEL: combine_fshl_load_i16:
1070 ; X64-NEXT: movzwl 1(%rdi), %eax
1072 %p0 = getelementptr i16, i16* %p, i32 0
1073 %p1 = getelementptr i16, i16* %p, i32 1
1074 %ld0 = load i16, i16 *%p0
1075 %ld1 = load i16, i16 *%p1
1076 %res = call i16 @llvm.fshl.i16(i16 %ld1, i16 %ld0, i16 8)
1080 define i32 @combine_fshl_load_i32(i32* %p) nounwind {
1081 ; X86-LABEL: combine_fshl_load_i32:
1083 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
1084 ; X86-NEXT: movl 11(%eax), %eax
1087 ; X64-LABEL: combine_fshl_load_i32:
1089 ; X64-NEXT: movl 11(%rdi), %eax
1091 %p0 = getelementptr i32, i32* %p, i32 2
1092 %p1 = getelementptr i32, i32* %p, i32 3
1093 %ld0 = load i32, i32 *%p0
1094 %ld1 = load i32, i32 *%p1
1095 %res = call i32 @llvm.fshl.i32(i32 %ld1, i32 %ld0, i32 8)
1099 define i64 @combine_fshl_load_i64(i64* %p) nounwind {
1100 ; X86-LABEL: combine_fshl_load_i64:
1102 ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
1103 ; X86-NEXT: movl 13(%ecx), %eax
1104 ; X86-NEXT: movl 17(%ecx), %edx
1107 ; X64-LABEL: combine_fshl_load_i64:
1109 ; X64-NEXT: movq 13(%rdi), %rax
1111 %p0 = getelementptr i64, i64* %p, i64 1
1112 %p1 = getelementptr i64, i64* %p, i64 2
1113 %ld0 = load i64, i64 *%p0
1114 %ld1 = load i64, i64 *%p1
1115 %res = call i64 @llvm.fshl.i64(i64 %ld1, i64 %ld0, i64 24)
1119 !llvm.module.flags = !{!0}
1120 !0 = !{i32 1, !"ProfileSummary", !1}
1121 !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
1122 !2 = !{!"ProfileFormat", !"InstrProf"}
1123 !3 = !{!"TotalCount", i64 10000}
1124 !4 = !{!"MaxCount", i64 10}
1125 !5 = !{!"MaxInternalCount", i64 1}
1126 !6 = !{!"MaxFunctionCount", i64 1000}
1127 !7 = !{!"NumCounts", i64 3}
1128 !8 = !{!"NumFunctions", i64 3}
1129 !9 = !{!"DetailedSummary", !10}
1130 !10 = !{!11, !12, !13}
1131 !11 = !{i32 10000, i64 100, i32 1}
1132 !12 = !{i32 999000, i64 100, i32 1}
1133 !13 = !{i32 999999, i64 1, i32 2}
1134 !14 = !{!"function_entry_count", i64 0}