1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=X86-ALL,X86-GOOD-RA
3 ; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefixes=X86-ALL,X86-FAST-RA
5 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=X64-ALL
6 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefix=X64-ALL
7 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf %s -o - | FileCheck %s --check-prefix=X64-ALL
8 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mattr=+sahf -pre-RA-sched=fast %s -o - | FileCheck %s --check-prefix=X64-ALL
9 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs -mcpu=corei7 %s -o - | FileCheck %s --check-prefix=X64-ALL
14 ; In the following case when using fast scheduling we get a long chain of
15 ; EFLAGS save/restore due to a sequence of:
16 ; cmpxchg8b (implicit-def eflags)
21 ; During PEI the adjcallstackdown32 is replaced with the subl which
22 ; clobbers eflags, effectively interfering in the liveness interval. However,
23 ; we then promote these copies into independent conditions in GPRs that avoids
24 ; repeated saving and restoring logic and can be trivially managed by the
26 define i64 @test_intervening_call(ptr %foo, i64 %bar, i64 %baz) nounwind {
27 ; X86-GOOD-RA-LABEL: test_intervening_call:
28 ; X86-GOOD-RA: # %bb.0: # %entry
29 ; X86-GOOD-RA-NEXT: pushl %ebx
30 ; X86-GOOD-RA-NEXT: pushl %esi
31 ; X86-GOOD-RA-NEXT: pushl %eax
32 ; X86-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
33 ; X86-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
34 ; X86-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
35 ; X86-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
36 ; X86-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
37 ; X86-GOOD-RA-NEXT: lock cmpxchg8b (%esi)
38 ; X86-GOOD-RA-NEXT: setne %bl
39 ; X86-GOOD-RA-NEXT: subl $8, %esp
40 ; X86-GOOD-RA-NEXT: pushl %edx
41 ; X86-GOOD-RA-NEXT: pushl %eax
42 ; X86-GOOD-RA-NEXT: calll bar@PLT
43 ; X86-GOOD-RA-NEXT: addl $16, %esp
44 ; X86-GOOD-RA-NEXT: testb %bl, %bl
45 ; X86-GOOD-RA-NEXT: jne .LBB0_3
46 ; X86-GOOD-RA-NEXT: # %bb.1: # %t
47 ; X86-GOOD-RA-NEXT: movl $42, %eax
48 ; X86-GOOD-RA-NEXT: jmp .LBB0_2
49 ; X86-GOOD-RA-NEXT: .LBB0_3: # %f
50 ; X86-GOOD-RA-NEXT: xorl %eax, %eax
51 ; X86-GOOD-RA-NEXT: .LBB0_2: # %t
52 ; X86-GOOD-RA-NEXT: xorl %edx, %edx
53 ; X86-GOOD-RA-NEXT: addl $4, %esp
54 ; X86-GOOD-RA-NEXT: popl %esi
55 ; X86-GOOD-RA-NEXT: popl %ebx
56 ; X86-GOOD-RA-NEXT: retl
58 ; X86-FAST-RA-LABEL: test_intervening_call:
59 ; X86-FAST-RA: # %bb.0: # %entry
60 ; X86-FAST-RA-NEXT: pushl %ebx
61 ; X86-FAST-RA-NEXT: pushl %esi
62 ; X86-FAST-RA-NEXT: pushl %eax
63 ; X86-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
64 ; X86-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ebx
65 ; X86-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
66 ; X86-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
67 ; X86-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %edx
68 ; X86-FAST-RA-NEXT: lock cmpxchg8b (%esi)
69 ; X86-FAST-RA-NEXT: setne %bl
70 ; X86-FAST-RA-NEXT: subl $8, %esp
71 ; X86-FAST-RA-NEXT: pushl %edx
72 ; X86-FAST-RA-NEXT: pushl %eax
73 ; X86-FAST-RA-NEXT: calll bar@PLT
74 ; X86-FAST-RA-NEXT: addl $16, %esp
75 ; X86-FAST-RA-NEXT: testb %bl, %bl
76 ; X86-FAST-RA-NEXT: jne .LBB0_3
77 ; X86-FAST-RA-NEXT: # %bb.1: # %t
78 ; X86-FAST-RA-NEXT: movl $42, %eax
79 ; X86-FAST-RA-NEXT: jmp .LBB0_2
80 ; X86-FAST-RA-NEXT: .LBB0_3: # %f
81 ; X86-FAST-RA-NEXT: xorl %eax, %eax
82 ; X86-FAST-RA-NEXT: .LBB0_2: # %t
83 ; X86-FAST-RA-NEXT: xorl %edx, %edx
84 ; X86-FAST-RA-NEXT: addl $4, %esp
85 ; X86-FAST-RA-NEXT: popl %esi
86 ; X86-FAST-RA-NEXT: popl %ebx
87 ; X86-FAST-RA-NEXT: retl
89 ; X64-ALL-LABEL: test_intervening_call:
90 ; X64-ALL: # %bb.0: # %entry
91 ; X64-ALL-NEXT: pushq %rbx
92 ; X64-ALL-NEXT: movq %rsi, %rax
93 ; X64-ALL-NEXT: lock cmpxchgq %rdx, (%rdi)
94 ; X64-ALL-NEXT: setne %bl
95 ; X64-ALL-NEXT: movq %rax, %rdi
96 ; X64-ALL-NEXT: callq bar@PLT
97 ; X64-ALL-NEXT: testb %bl, %bl
98 ; X64-ALL-NEXT: jne .LBB0_2
99 ; X64-ALL-NEXT: # %bb.1: # %t
100 ; X64-ALL-NEXT: movl $42, %eax
101 ; X64-ALL-NEXT: popq %rbx
103 ; X64-ALL-NEXT: .LBB0_2: # %f
104 ; X64-ALL-NEXT: xorl %eax, %eax
105 ; X64-ALL-NEXT: popq %rbx
108 %cx = cmpxchg ptr %foo, i64 %bar, i64 %baz seq_cst seq_cst
109 %v = extractvalue { i64, i1 } %cx, 0
110 %p = extractvalue { i64, i1 } %cx, 1
111 call i32 @bar(i64 %v)
112 br i1 %p, label %t, label %f
121 ; Interesting in producing a clobber without any function calls.
122 define i32 @test_control_flow(ptr %p, i32 %i, i32 %j) nounwind {
123 ; X86-ALL-LABEL: test_control_flow:
124 ; X86-ALL: # %bb.0: # %entry
125 ; X86-ALL-NEXT: movl {{[0-9]+}}(%esp), %eax
126 ; X86-ALL-NEXT: cmpl {{[0-9]+}}(%esp), %eax
127 ; X86-ALL-NEXT: jle .LBB1_6
128 ; X86-ALL-NEXT: # %bb.1: # %loop_start
129 ; X86-ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx
130 ; X86-ALL-NEXT: .p2align 4
131 ; X86-ALL-NEXT: .LBB1_2: # %while.condthread-pre-split.i
132 ; X86-ALL-NEXT: # =>This Loop Header: Depth=1
133 ; X86-ALL-NEXT: # Child Loop BB1_3 Depth 2
134 ; X86-ALL-NEXT: movl (%ecx), %edx
135 ; X86-ALL-NEXT: .p2align 4
136 ; X86-ALL-NEXT: .LBB1_3: # %while.cond.i
137 ; X86-ALL-NEXT: # Parent Loop BB1_2 Depth=1
138 ; X86-ALL-NEXT: # => This Inner Loop Header: Depth=2
139 ; X86-ALL-NEXT: movl %edx, %eax
140 ; X86-ALL-NEXT: xorl %edx, %edx
141 ; X86-ALL-NEXT: testl %eax, %eax
142 ; X86-ALL-NEXT: je .LBB1_3
143 ; X86-ALL-NEXT: # %bb.4: # %while.body.i
144 ; X86-ALL-NEXT: # in Loop: Header=BB1_2 Depth=1
145 ; X86-ALL-NEXT: lock cmpxchgl %eax, (%ecx)
146 ; X86-ALL-NEXT: jne .LBB1_2
147 ; X86-ALL-NEXT: # %bb.5:
148 ; X86-ALL-NEXT: xorl %eax, %eax
149 ; X86-ALL-NEXT: .LBB1_6: # %cond.end
152 ; X64-ALL-LABEL: test_control_flow:
153 ; X64-ALL: # %bb.0: # %entry
154 ; X64-ALL-NEXT: movl %esi, %eax
155 ; X64-ALL-NEXT: cmpl %edx, %esi
156 ; X64-ALL-NEXT: jle .LBB1_5
157 ; X64-ALL-NEXT: .p2align 4
158 ; X64-ALL-NEXT: .LBB1_1: # %while.condthread-pre-split.i
159 ; X64-ALL-NEXT: # =>This Loop Header: Depth=1
160 ; X64-ALL-NEXT: # Child Loop BB1_2 Depth 2
161 ; X64-ALL-NEXT: movl (%rdi), %ecx
162 ; X64-ALL-NEXT: .p2align 4
163 ; X64-ALL-NEXT: .LBB1_2: # %while.cond.i
164 ; X64-ALL-NEXT: # Parent Loop BB1_1 Depth=1
165 ; X64-ALL-NEXT: # => This Inner Loop Header: Depth=2
166 ; X64-ALL-NEXT: movl %ecx, %eax
167 ; X64-ALL-NEXT: xorl %ecx, %ecx
168 ; X64-ALL-NEXT: testl %eax, %eax
169 ; X64-ALL-NEXT: je .LBB1_2
170 ; X64-ALL-NEXT: # %bb.3: # %while.body.i
171 ; X64-ALL-NEXT: # in Loop: Header=BB1_1 Depth=1
172 ; X64-ALL-NEXT: lock cmpxchgl %eax, (%rdi)
173 ; X64-ALL-NEXT: jne .LBB1_1
174 ; X64-ALL-NEXT: # %bb.4:
175 ; X64-ALL-NEXT: xorl %eax, %eax
176 ; X64-ALL-NEXT: .LBB1_5: # %cond.end
179 %cmp = icmp sgt i32 %i, %j
180 br i1 %cmp, label %loop_start, label %cond.end
183 br label %while.condthread-pre-split.i
185 while.condthread-pre-split.i:
186 %.pr.i = load i32, ptr %p, align 4
187 br label %while.cond.i
190 %0 = phi i32 [ %.pr.i, %while.condthread-pre-split.i ], [ 0, %while.cond.i ]
191 %tobool.i = icmp eq i32 %0, 0
192 br i1 %tobool.i, label %while.cond.i, label %while.body.i
195 %.lcssa = phi i32 [ %0, %while.cond.i ]
196 %1 = cmpxchg ptr %p, i32 %.lcssa, i32 %.lcssa seq_cst seq_cst
197 %2 = extractvalue { i32, i1 } %1, 1
198 br i1 %2, label %cond.end.loopexit, label %while.condthread-pre-split.i
204 %cond = phi i32 [ %i, %entry ], [ 0, %cond.end.loopexit ]
208 ; This one is an interesting case because CMOV doesn't have a chain
209 ; operand. Naive attempts to limit cmpxchg EFLAGS use are likely to fail here.
210 define i32 @test_feed_cmov(ptr %addr, i32 %desired, i32 %new) nounwind {
211 ; X86-GOOD-RA-LABEL: test_feed_cmov:
212 ; X86-GOOD-RA: # %bb.0: # %entry
213 ; X86-GOOD-RA-NEXT: pushl %ebx
214 ; X86-GOOD-RA-NEXT: pushl %esi
215 ; X86-GOOD-RA-NEXT: pushl %eax
216 ; X86-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
217 ; X86-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
218 ; X86-GOOD-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
219 ; X86-GOOD-RA-NEXT: lock cmpxchgl %esi, (%ecx)
220 ; X86-GOOD-RA-NEXT: sete %bl
221 ; X86-GOOD-RA-NEXT: calll foo@PLT
222 ; X86-GOOD-RA-NEXT: testb %bl, %bl
223 ; X86-GOOD-RA-NEXT: jne .LBB2_2
224 ; X86-GOOD-RA-NEXT: # %bb.1: # %entry
225 ; X86-GOOD-RA-NEXT: movl %eax, %esi
226 ; X86-GOOD-RA-NEXT: .LBB2_2: # %entry
227 ; X86-GOOD-RA-NEXT: movl %esi, %eax
228 ; X86-GOOD-RA-NEXT: addl $4, %esp
229 ; X86-GOOD-RA-NEXT: popl %esi
230 ; X86-GOOD-RA-NEXT: popl %ebx
231 ; X86-GOOD-RA-NEXT: retl
233 ; X86-FAST-RA-LABEL: test_feed_cmov:
234 ; X86-FAST-RA: # %bb.0: # %entry
235 ; X86-FAST-RA-NEXT: pushl %ebx
236 ; X86-FAST-RA-NEXT: pushl %esi
237 ; X86-FAST-RA-NEXT: pushl %eax
238 ; X86-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %ecx
239 ; X86-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %esi
240 ; X86-FAST-RA-NEXT: movl {{[0-9]+}}(%esp), %eax
241 ; X86-FAST-RA-NEXT: lock cmpxchgl %esi, (%ecx)
242 ; X86-FAST-RA-NEXT: sete %bl
243 ; X86-FAST-RA-NEXT: calll foo@PLT
244 ; X86-FAST-RA-NEXT: testb %bl, %bl
245 ; X86-FAST-RA-NEXT: jne .LBB2_2
246 ; X86-FAST-RA-NEXT: # %bb.1: # %entry
247 ; X86-FAST-RA-NEXT: movl %eax, %esi
248 ; X86-FAST-RA-NEXT: .LBB2_2: # %entry
249 ; X86-FAST-RA-NEXT: movl %esi, %eax
250 ; X86-FAST-RA-NEXT: addl $4, %esp
251 ; X86-FAST-RA-NEXT: popl %esi
252 ; X86-FAST-RA-NEXT: popl %ebx
253 ; X86-FAST-RA-NEXT: retl
255 ; X64-ALL-LABEL: test_feed_cmov:
256 ; X64-ALL: # %bb.0: # %entry
257 ; X64-ALL-NEXT: pushq %rbp
258 ; X64-ALL-NEXT: pushq %rbx
259 ; X64-ALL-NEXT: pushq %rax
260 ; X64-ALL-NEXT: movl %edx, %ebx
261 ; X64-ALL-NEXT: movl %esi, %eax
262 ; X64-ALL-NEXT: lock cmpxchgl %edx, (%rdi)
263 ; X64-ALL-NEXT: sete %bpl
264 ; X64-ALL-NEXT: callq foo@PLT
265 ; X64-ALL-NEXT: testb %bpl, %bpl
266 ; X64-ALL-NEXT: cmovnel %ebx, %eax
267 ; X64-ALL-NEXT: addq $8, %rsp
268 ; X64-ALL-NEXT: popq %rbx
269 ; X64-ALL-NEXT: popq %rbp
272 %res = cmpxchg ptr %addr, i32 %desired, i32 %new seq_cst seq_cst
273 %success = extractvalue { i32, i1 } %res, 1
275 %rhs = call i32 @foo()
277 %ret = select i1 %success, i32 %new, i32 %rhs