1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=32-ALL,32-GOOD-RA
3 ; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=32-ALL,32-FAST-RA
5 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA
6 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA
7 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
8 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=64-ALL,64-FAST-RA-SAHF
9 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefixes=64-ALL,64-GOOD-RA-SAHF
14 ; In the following case when using fast scheduling we get a long chain of
15 ; EFLAGS save/restore due to a sequence of:
16 ; cmpxchg8b (implicit-def eflags)
21 ; During PEI the adjcallstackdown32 is replaced with the subl which
22 ; clobbers eflags, effectively interfering in the liveness interval. However,
23 ; we then promote these copies into independent conditions in GPRs that avoids
24 ; repeated saving and restoring logic and can be trivially managed by the
26 define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) nounwind {
27 ; 32-GOOD-RA-LABEL: test_intervening_call:
28 ; 32-GOOD-RA: # %bb.0: # %entry
29 ; 32-GOOD-RA-NEXT: pushl %ebx
30 ; 32-GOOD-RA-NEXT: pushl %esi
31 ; 32-GOOD-RA-NEXT: pushl %eax
32 ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
33 ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
34 ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
35 ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
36 ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
37 ; 32-GOOD-RA-NEXT: lock cmpxchg8b (%esi)
38 ; 32-GOOD-RA-NEXT: setne %bl
39 ; 32-GOOD-RA-NEXT: subl $8, %esp
40 ; 32-GOOD-RA-NEXT: pushl %edx
41 ; 32-GOOD-RA-NEXT: pushl %eax
42 ; 32-GOOD-RA-NEXT: calll bar
43 ; 32-GOOD-RA-NEXT: addl $16, %esp
44 ; 32-GOOD-RA-NEXT: testb %bl, %bl
45 ; 32-GOOD-RA-NEXT: jne .LBB0_3
46 ; 32-GOOD-RA-NEXT: # %bb.1: # %t
47 ; 32-GOOD-RA-NEXT: movl $42, %eax
48 ; 32-GOOD-RA-NEXT: jmp .LBB0_2
49 ; 32-GOOD-RA-NEXT: .LBB0_3: # %f
50 ; 32-GOOD-RA-NEXT: xorl %eax, %eax
51 ; 32-GOOD-RA-NEXT: .LBB0_2: # %t
52 ; 32-GOOD-RA-NEXT: xorl %edx, %edx
53 ; 32-GOOD-RA-NEXT: addl $4, %esp
54 ; 32-GOOD-RA-NEXT: popl %esi
55 ; 32-GOOD-RA-NEXT: popl %ebx
56 ; 32-GOOD-RA-NEXT: retl
58 ; 32-FAST-RA-LABEL: test_intervening_call:
59 ; 32-FAST-RA: # %bb.0: # %entry
60 ; 32-FAST-RA-NEXT: pushl %ebx
61 ; 32-FAST-RA-NEXT: pushl %esi
62 ; 32-FAST-RA-NEXT: pushl %eax
63 ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
64 ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
65 ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
66 ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
67 ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
68 ; 32-FAST-RA-NEXT: lock cmpxchg8b (%esi)
69 ; 32-FAST-RA-NEXT: setne %bl
70 ; 32-FAST-RA-NEXT: subl $8, %esp
71 ; 32-FAST-RA-NEXT: pushl %edx
72 ; 32-FAST-RA-NEXT: pushl %eax
73 ; 32-FAST-RA-NEXT: calll bar
74 ; 32-FAST-RA-NEXT: addl $16, %esp
75 ; 32-FAST-RA-NEXT: testb %bl, %bl
76 ; 32-FAST-RA-NEXT: jne .LBB0_3
77 ; 32-FAST-RA-NEXT: # %bb.1: # %t
78 ; 32-FAST-RA-NEXT: movl $42, %eax
79 ; 32-FAST-RA-NEXT: jmp .LBB0_2
80 ; 32-FAST-RA-NEXT: .LBB0_3: # %f
81 ; 32-FAST-RA-NEXT: xorl %eax, %eax
82 ; 32-FAST-RA-NEXT: .LBB0_2: # %t
83 ; 32-FAST-RA-NEXT: xorl %edx, %edx
84 ; 32-FAST-RA-NEXT: addl $4, %esp
85 ; 32-FAST-RA-NEXT: popl %esi
86 ; 32-FAST-RA-NEXT: popl %ebx
87 ; 32-FAST-RA-NEXT: retl
89 ; 64-ALL-LABEL: test_intervening_call:
90 ; 64-ALL: # %bb.0: # %entry
91 ; 64-ALL-NEXT: pushq %rbx
92 ; 64-ALL-NEXT: movq %rsi, %rax
93 ; 64-ALL-NEXT: lock cmpxchgq %rdx, (%rdi)
94 ; 64-ALL-NEXT: setne %bl
95 ; 64-ALL-NEXT: movq %rax, %rdi
96 ; 64-ALL-NEXT: callq bar
97 ; 64-ALL-NEXT: testb %bl, %bl
98 ; 64-ALL-NEXT: jne .LBB0_2
99 ; 64-ALL-NEXT: # %bb.1: # %t
100 ; 64-ALL-NEXT: movl $42, %eax
101 ; 64-ALL-NEXT: popq %rbx
103 ; 64-ALL-NEXT: .LBB0_2: # %f
104 ; 64-ALL-NEXT: xorl %eax, %eax
105 ; 64-ALL-NEXT: popq %rbx
108 %cx = cmpxchg i64* %foo, i64 %bar, i64 %baz seq_cst seq_cst
109 %v = extractvalue { i64, i1 } %cx, 0
110 %p = extractvalue { i64, i1 } %cx, 1
111 call i32 @bar(i64 %v)
112 br i1 %p, label %t, label %f
121 ; Interesting in producing a clobber without any function calls.
122 define i32 @test_control_flow(i32* %p, i32 %i, i32 %j) nounwind {
123 ; 32-ALL-LABEL: test_control_flow:
124 ; 32-ALL: # %bb.0: # %entry
125 ; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %eax
126 ; 32-ALL-NEXT: cmpl {{[0-9]+}}(%esp), %eax
127 ; 32-ALL-NEXT: jle .LBB1_6
128 ; 32-ALL-NEXT: # %bb.1: # %loop_start
129 ; 32-ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx
130 ; 32-ALL-NEXT: .p2align 4, 0x90
131 ; 32-ALL-NEXT: .LBB1_2: # %while.condthread-pre-split.i
132 ; 32-ALL-NEXT: # =>This Loop Header: Depth=1
133 ; 32-ALL-NEXT: # Child Loop BB1_3 Depth 2
134 ; 32-ALL-NEXT: movl (%ecx), %edx
135 ; 32-ALL-NEXT: .p2align 4, 0x90
136 ; 32-ALL-NEXT: .LBB1_3: # %while.cond.i
137 ; 32-ALL-NEXT: # Parent Loop BB1_2 Depth=1
138 ; 32-ALL-NEXT: # => This Inner Loop Header: Depth=2
139 ; 32-ALL-NEXT: movl %edx, %eax
140 ; 32-ALL-NEXT: xorl %edx, %edx
141 ; 32-ALL-NEXT: testl %eax, %eax
142 ; 32-ALL-NEXT: je .LBB1_3
143 ; 32-ALL-NEXT: # %bb.4: # %while.body.i
144 ; 32-ALL-NEXT: # in Loop: Header=BB1_2 Depth=1
145 ; 32-ALL-NEXT: lock cmpxchgl %eax, (%ecx)
146 ; 32-ALL-NEXT: jne .LBB1_2
147 ; 32-ALL-NEXT: # %bb.5:
148 ; 32-ALL-NEXT: xorl %eax, %eax
149 ; 32-ALL-NEXT: .LBB1_6: # %cond.end
152 ; 64-ALL-LABEL: test_control_flow:
153 ; 64-ALL: # %bb.0: # %entry
154 ; 64-ALL-NEXT: movl %esi, %eax
155 ; 64-ALL-NEXT: cmpl %edx, %esi
156 ; 64-ALL-NEXT: jle .LBB1_5
157 ; 64-ALL-NEXT: .p2align 4, 0x90
158 ; 64-ALL-NEXT: .LBB1_1: # %while.condthread-pre-split.i
159 ; 64-ALL-NEXT: # =>This Loop Header: Depth=1
160 ; 64-ALL-NEXT: # Child Loop BB1_2 Depth 2
161 ; 64-ALL-NEXT: movl (%rdi), %ecx
162 ; 64-ALL-NEXT: .p2align 4, 0x90
163 ; 64-ALL-NEXT: .LBB1_2: # %while.cond.i
164 ; 64-ALL-NEXT: # Parent Loop BB1_1 Depth=1
165 ; 64-ALL-NEXT: # => This Inner Loop Header: Depth=2
166 ; 64-ALL-NEXT: movl %ecx, %eax
167 ; 64-ALL-NEXT: xorl %ecx, %ecx
168 ; 64-ALL-NEXT: testl %eax, %eax
169 ; 64-ALL-NEXT: je .LBB1_2
170 ; 64-ALL-NEXT: # %bb.3: # %while.body.i
171 ; 64-ALL-NEXT: # in Loop: Header=BB1_1 Depth=1
172 ; 64-ALL-NEXT: lock cmpxchgl %eax, (%rdi)
173 ; 64-ALL-NEXT: jne .LBB1_1
174 ; 64-ALL-NEXT: # %bb.4:
175 ; 64-ALL-NEXT: xorl %eax, %eax
176 ; 64-ALL-NEXT: .LBB1_5: # %cond.end
179 %cmp = icmp sgt i32 %i, %j
180 br i1 %cmp, label %loop_start, label %cond.end
183 br label %while.condthread-pre-split.i
185 while.condthread-pre-split.i:
186 %.pr.i = load i32, i32* %p, align 4
187 br label %while.cond.i
190 %0 = phi i32 [ %.pr.i, %while.condthread-pre-split.i ], [ 0, %while.cond.i ]
191 %tobool.i = icmp eq i32 %0, 0
192 br i1 %tobool.i, label %while.cond.i, label %while.body.i
195 %.lcssa = phi i32 [ %0, %while.cond.i ]
196 %1 = cmpxchg i32* %p, i32 %.lcssa, i32 %.lcssa seq_cst seq_cst
197 %2 = extractvalue { i32, i1 } %1, 1
198 br i1 %2, label %cond.end.loopexit, label %while.condthread-pre-split.i
204 %cond = phi i32 [ %i, %entry ], [ 0, %cond.end.loopexit ]
208 ; This one is an interesting case because CMOV doesn't have a chain
209 ; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here.
210 define i32 @test_feed_cmov(i32* %addr, i32 %desired, i32 %new) nounwind {
211 ; 32-GOOD-RA-LABEL: test_feed_cmov:
212 ; 32-GOOD-RA: # %bb.0: # %entry
213 ; 32-GOOD-RA-NEXT: pushl %ebx
214 ; 32-GOOD-RA-NEXT: pushl %esi
215 ; 32-GOOD-RA-NEXT: pushl %eax
216 ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
217 ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
218 ; 32-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
219 ; 32-GOOD-RA-NEXT: lock cmpxchgl %esi, (%ecx)
220 ; 32-GOOD-RA-NEXT: sete %bl
221 ; 32-GOOD-RA-NEXT: calll foo
222 ; 32-GOOD-RA-NEXT: testb %bl, %bl
223 ; 32-GOOD-RA-NEXT: jne .LBB2_2
224 ; 32-GOOD-RA-NEXT: # %bb.1: # %entry
225 ; 32-GOOD-RA-NEXT: movl %eax, %esi
226 ; 32-GOOD-RA-NEXT: .LBB2_2: # %entry
227 ; 32-GOOD-RA-NEXT: movl %esi, %eax
228 ; 32-GOOD-RA-NEXT: addl $4, %esp
229 ; 32-GOOD-RA-NEXT: popl %esi
230 ; 32-GOOD-RA-NEXT: popl %ebx
231 ; 32-GOOD-RA-NEXT: retl
233 ; 32-FAST-RA-LABEL: test_feed_cmov:
234 ; 32-FAST-RA: # %bb.0: # %entry
235 ; 32-FAST-RA-NEXT: pushl %ebx
236 ; 32-FAST-RA-NEXT: pushl %esi
237 ; 32-FAST-RA-NEXT: pushl %eax
238 ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
239 ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
240 ; 32-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
241 ; 32-FAST-RA-NEXT: lock cmpxchgl %esi, (%ecx)
242 ; 32-FAST-RA-NEXT: sete %bl
243 ; 32-FAST-RA-NEXT: calll foo
244 ; 32-FAST-RA-NEXT: testb %bl, %bl
245 ; 32-FAST-RA-NEXT: jne .LBB2_2
246 ; 32-FAST-RA-NEXT: # %bb.1: # %entry
247 ; 32-FAST-RA-NEXT: movl %eax, %esi
248 ; 32-FAST-RA-NEXT: .LBB2_2: # %entry
249 ; 32-FAST-RA-NEXT: movl %esi, %eax
250 ; 32-FAST-RA-NEXT: addl $4, %esp
251 ; 32-FAST-RA-NEXT: popl %esi
252 ; 32-FAST-RA-NEXT: popl %ebx
253 ; 32-FAST-RA-NEXT: retl
255 ; 64-ALL-LABEL: test_feed_cmov:
256 ; 64-ALL: # %bb.0: # %entry
257 ; 64-ALL-NEXT: pushq %rbp
258 ; 64-ALL-NEXT: pushq %rbx
259 ; 64-ALL-NEXT: pushq %rax
260 ; 64-ALL-NEXT: movl %edx, %ebx
261 ; 64-ALL-NEXT: movl %esi, %eax
262 ; 64-ALL-NEXT: lock cmpxchgl %edx, (%rdi)
263 ; 64-ALL-NEXT: sete %bpl
264 ; 64-ALL-NEXT: callq foo
265 ; 64-ALL-NEXT: testb %bpl, %bpl
266 ; 64-ALL-NEXT: cmovnel %ebx, %eax
267 ; 64-ALL-NEXT: addq $8, %rsp
268 ; 64-ALL-NEXT: popq %rbx
269 ; 64-ALL-NEXT: popq %rbp
272 %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
273 %success = extractvalue { i32, i1 } %res, 1
275 %rhs = call i32 @foo()
277 %ret = select i1 %success, i32 %new, i32 %rhs