1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=i386-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK32
3 ; RUN: llc -mtriple=x86_64-linux-gnu -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK64
5 ; The peephole optimizer can elide some physical register copies such as
6 ; EFLAGS. Make sure the flags are used directly, instead of needlessly using
7 ; saving and restoring specific conditions.
9 @L = external dso_local global i32
10 @M = external dso_local global i8
14 define i1 @plus_one() nounwind {
15 ; CHECK32-LABEL: plus_one:
16 ; CHECK32: # %bb.0: # %entry
17 ; CHECK32-NEXT: movzbl M, %eax
18 ; CHECK32-NEXT: incl L
19 ; CHECK32-NEXT: jne .LBB0_2
20 ; CHECK32-NEXT: # %bb.1: # %entry
21 ; CHECK32-NEXT: andb $8, %al
22 ; CHECK32-NEXT: je .LBB0_2
23 ; CHECK32-NEXT: # %bb.3: # %exit2
24 ; CHECK32-NEXT: xorl %eax, %eax
26 ; CHECK32-NEXT: .LBB0_2: # %exit
27 ; CHECK32-NEXT: movb $1, %al
30 ; CHECK64-LABEL: plus_one:
31 ; CHECK64: # %bb.0: # %entry
32 ; CHECK64-NEXT: movzbl M(%rip), %eax
33 ; CHECK64-NEXT: incl L(%rip)
34 ; CHECK64-NEXT: jne .LBB0_2
35 ; CHECK64-NEXT: # %bb.1: # %entry
36 ; CHECK64-NEXT: andb $8, %al
37 ; CHECK64-NEXT: je .LBB0_2
38 ; CHECK64-NEXT: # %bb.3: # %exit2
39 ; CHECK64-NEXT: xorl %eax, %eax
41 ; CHECK64-NEXT: .LBB0_2: # %exit
42 ; CHECK64-NEXT: movb $1, %al
45 %loaded_L = load i32, ptr @L
46 %val = add nsw i32 %loaded_L, 1 ; N.B. will emit inc.
47 store i32 %val, ptr @L
48 %loaded_M = load i8, ptr @M
49 %masked = and i8 %loaded_M, 8
50 %M_is_true = icmp ne i8 %masked, 0
51 %L_is_false = icmp eq i32 %val, 0
52 %cond = and i1 %L_is_false, %M_is_true
53 br i1 %cond, label %exit2, label %exit
62 define i1 @plus_forty_two() nounwind {
63 ; CHECK32-LABEL: plus_forty_two:
64 ; CHECK32: # %bb.0: # %entry
65 ; CHECK32-NEXT: movzbl M, %eax
66 ; CHECK32-NEXT: addl $42, L
67 ; CHECK32-NEXT: jne .LBB1_2
68 ; CHECK32-NEXT: # %bb.1: # %entry
69 ; CHECK32-NEXT: andb $8, %al
70 ; CHECK32-NEXT: je .LBB1_2
71 ; CHECK32-NEXT: # %bb.3: # %exit2
72 ; CHECK32-NEXT: xorl %eax, %eax
74 ; CHECK32-NEXT: .LBB1_2: # %exit
75 ; CHECK32-NEXT: movb $1, %al
78 ; CHECK64-LABEL: plus_forty_two:
79 ; CHECK64: # %bb.0: # %entry
80 ; CHECK64-NEXT: movzbl M(%rip), %eax
81 ; CHECK64-NEXT: addl $42, L(%rip)
82 ; CHECK64-NEXT: jne .LBB1_2
83 ; CHECK64-NEXT: # %bb.1: # %entry
84 ; CHECK64-NEXT: andb $8, %al
85 ; CHECK64-NEXT: je .LBB1_2
86 ; CHECK64-NEXT: # %bb.3: # %exit2
87 ; CHECK64-NEXT: xorl %eax, %eax
89 ; CHECK64-NEXT: .LBB1_2: # %exit
90 ; CHECK64-NEXT: movb $1, %al
93 %loaded_L = load i32, ptr @L
94 %val = add nsw i32 %loaded_L, 42 ; N.B. won't emit inc.
95 store i32 %val, ptr @L
96 %loaded_M = load i8, ptr @M
97 %masked = and i8 %loaded_M, 8
98 %M_is_true = icmp ne i8 %masked, 0
99 %L_is_false = icmp eq i32 %val, 0
100 %cond = and i1 %L_is_false, %M_is_true
101 br i1 %cond, label %exit2, label %exit
110 define i1 @minus_one() nounwind {
111 ; CHECK32-LABEL: minus_one:
112 ; CHECK32: # %bb.0: # %entry
113 ; CHECK32-NEXT: movzbl M, %eax
114 ; CHECK32-NEXT: decl L
115 ; CHECK32-NEXT: jne .LBB2_2
116 ; CHECK32-NEXT: # %bb.1: # %entry
117 ; CHECK32-NEXT: andb $8, %al
118 ; CHECK32-NEXT: je .LBB2_2
119 ; CHECK32-NEXT: # %bb.3: # %exit2
120 ; CHECK32-NEXT: xorl %eax, %eax
122 ; CHECK32-NEXT: .LBB2_2: # %exit
123 ; CHECK32-NEXT: movb $1, %al
126 ; CHECK64-LABEL: minus_one:
127 ; CHECK64: # %bb.0: # %entry
128 ; CHECK64-NEXT: movzbl M(%rip), %eax
129 ; CHECK64-NEXT: decl L(%rip)
130 ; CHECK64-NEXT: jne .LBB2_2
131 ; CHECK64-NEXT: # %bb.1: # %entry
132 ; CHECK64-NEXT: andb $8, %al
133 ; CHECK64-NEXT: je .LBB2_2
134 ; CHECK64-NEXT: # %bb.3: # %exit2
135 ; CHECK64-NEXT: xorl %eax, %eax
137 ; CHECK64-NEXT: .LBB2_2: # %exit
138 ; CHECK64-NEXT: movb $1, %al
141 %loaded_L = load i32, ptr @L
142 %val = add nsw i32 %loaded_L, -1 ; N.B. will emit dec.
143 store i32 %val, ptr @L
144 %loaded_M = load i8, ptr @M
145 %masked = and i8 %loaded_M, 8
146 %M_is_true = icmp ne i8 %masked, 0
147 %L_is_false = icmp eq i32 %val, 0
148 %cond = and i1 %L_is_false, %M_is_true
149 br i1 %cond, label %exit2, label %exit
158 define i1 @minus_forty_two() nounwind {
159 ; CHECK32-LABEL: minus_forty_two:
160 ; CHECK32: # %bb.0: # %entry
161 ; CHECK32-NEXT: movzbl M, %eax
162 ; CHECK32-NEXT: addl $-42, L
163 ; CHECK32-NEXT: jne .LBB3_2
164 ; CHECK32-NEXT: # %bb.1: # %entry
165 ; CHECK32-NEXT: andb $8, %al
166 ; CHECK32-NEXT: je .LBB3_2
167 ; CHECK32-NEXT: # %bb.3: # %exit2
168 ; CHECK32-NEXT: xorl %eax, %eax
170 ; CHECK32-NEXT: .LBB3_2: # %exit
171 ; CHECK32-NEXT: movb $1, %al
174 ; CHECK64-LABEL: minus_forty_two:
175 ; CHECK64: # %bb.0: # %entry
176 ; CHECK64-NEXT: movzbl M(%rip), %eax
177 ; CHECK64-NEXT: addl $-42, L(%rip)
178 ; CHECK64-NEXT: jne .LBB3_2
179 ; CHECK64-NEXT: # %bb.1: # %entry
180 ; CHECK64-NEXT: andb $8, %al
181 ; CHECK64-NEXT: je .LBB3_2
182 ; CHECK64-NEXT: # %bb.3: # %exit2
183 ; CHECK64-NEXT: xorl %eax, %eax
185 ; CHECK64-NEXT: .LBB3_2: # %exit
186 ; CHECK64-NEXT: movb $1, %al
189 %loaded_L = load i32, ptr @L
190 %val = add nsw i32 %loaded_L, -42 ; N.B. won't emit dec.
191 store i32 %val, ptr @L
192 %loaded_M = load i8, ptr @M
193 %masked = and i8 %loaded_M, 8
194 %M_is_true = icmp ne i8 %masked, 0
195 %L_is_false = icmp eq i32 %val, 0
196 %cond = and i1 %L_is_false, %M_is_true
197 br i1 %cond, label %exit2, label %exit
206 define i64 @test_intervening_call(ptr %foo, i64 %bar, i64 %baz) nounwind {
207 ; CHECK32-LABEL: test_intervening_call:
208 ; CHECK32: # %bb.0: # %entry
209 ; CHECK32-NEXT: pushl %ebx
210 ; CHECK32-NEXT: pushl %esi
211 ; CHECK32-NEXT: pushl %eax
212 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
213 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx
214 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx
215 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
216 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
217 ; CHECK32-NEXT: lock cmpxchg8b (%esi)
218 ; CHECK32-NEXT: setne %bl
219 ; CHECK32-NEXT: subl $8, %esp
220 ; CHECK32-NEXT: pushl %edx
221 ; CHECK32-NEXT: pushl %eax
222 ; CHECK32-NEXT: calll bar@PLT
223 ; CHECK32-NEXT: addl $16, %esp
224 ; CHECK32-NEXT: testb %bl, %bl
225 ; CHECK32-NEXT: jne .LBB4_3
226 ; CHECK32-NEXT: # %bb.1: # %t
227 ; CHECK32-NEXT: movl $42, %eax
228 ; CHECK32-NEXT: jmp .LBB4_2
229 ; CHECK32-NEXT: .LBB4_3: # %f
230 ; CHECK32-NEXT: xorl %eax, %eax
231 ; CHECK32-NEXT: .LBB4_2: # %t
232 ; CHECK32-NEXT: xorl %edx, %edx
233 ; CHECK32-NEXT: addl $4, %esp
234 ; CHECK32-NEXT: popl %esi
235 ; CHECK32-NEXT: popl %ebx
238 ; CHECK64-LABEL: test_intervening_call:
239 ; CHECK64: # %bb.0: # %entry
240 ; CHECK64-NEXT: pushq %rbx
241 ; CHECK64-NEXT: movq %rsi, %rax
242 ; CHECK64-NEXT: lock cmpxchgq %rdx, (%rdi)
243 ; CHECK64-NEXT: setne %bl
244 ; CHECK64-NEXT: movq %rax, %rdi
245 ; CHECK64-NEXT: callq bar@PLT
246 ; CHECK64-NEXT: testb %bl, %bl
247 ; CHECK64-NEXT: jne .LBB4_2
248 ; CHECK64-NEXT: # %bb.1: # %t
249 ; CHECK64-NEXT: movl $42, %eax
250 ; CHECK64-NEXT: popq %rbx
252 ; CHECK64-NEXT: .LBB4_2: # %f
253 ; CHECK64-NEXT: xorl %eax, %eax
254 ; CHECK64-NEXT: popq %rbx
257 ; cmpxchg sets EFLAGS, call clobbers it, then br uses EFLAGS.
258 %cx = cmpxchg ptr %foo, i64 %bar, i64 %baz seq_cst seq_cst
259 %v = extractvalue { i64, i1 } %cx, 0
260 %p = extractvalue { i64, i1 } %cx, 1
261 call i32 @bar(i64 %v)
262 br i1 %p, label %t, label %f
271 define i64 @test_two_live_flags(ptr %foo0, i64 %bar0, i64 %baz0, ptr %foo1, i64 %bar1, i64 %baz1) nounwind {
272 ; CHECK32-LABEL: test_two_live_flags:
273 ; CHECK32: # %bb.0: # %entry
274 ; CHECK32-NEXT: pushl %ebp
275 ; CHECK32-NEXT: pushl %ebx
276 ; CHECK32-NEXT: pushl %edi
277 ; CHECK32-NEXT: pushl %esi
278 ; CHECK32-NEXT: pushl %eax
279 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebp
280 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edi
281 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
282 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %edx
283 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx
284 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
285 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
286 ; CHECK32-NEXT: lock cmpxchg8b (%esi)
287 ; CHECK32-NEXT: sete {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
288 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax
289 ; CHECK32-NEXT: movl %ebp, %edx
290 ; CHECK32-NEXT: movl %edi, %ecx
291 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ebx
292 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %esi
293 ; CHECK32-NEXT: lock cmpxchg8b (%esi)
294 ; CHECK32-NEXT: sete %al
295 ; CHECK32-NEXT: andb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Folded Reload
296 ; CHECK32-NEXT: cmpb $1, %al
297 ; CHECK32-NEXT: jne .LBB5_3
298 ; CHECK32-NEXT: # %bb.1: # %t
299 ; CHECK32-NEXT: movl $42, %eax
300 ; CHECK32-NEXT: jmp .LBB5_2
301 ; CHECK32-NEXT: .LBB5_3: # %f
302 ; CHECK32-NEXT: xorl %eax, %eax
303 ; CHECK32-NEXT: .LBB5_2: # %t
304 ; CHECK32-NEXT: xorl %edx, %edx
305 ; CHECK32-NEXT: addl $4, %esp
306 ; CHECK32-NEXT: popl %esi
307 ; CHECK32-NEXT: popl %edi
308 ; CHECK32-NEXT: popl %ebx
309 ; CHECK32-NEXT: popl %ebp
312 ; CHECK64-LABEL: test_two_live_flags:
313 ; CHECK64: # %bb.0: # %entry
314 ; CHECK64-NEXT: movq %rsi, %rax
315 ; CHECK64-NEXT: lock cmpxchgq %rdx, (%rdi)
316 ; CHECK64-NEXT: sete %dl
317 ; CHECK64-NEXT: movq %r8, %rax
318 ; CHECK64-NEXT: lock cmpxchgq %r9, (%rcx)
319 ; CHECK64-NEXT: sete %al
320 ; CHECK64-NEXT: andb %dl, %al
321 ; CHECK64-NEXT: cmpb $1, %al
322 ; CHECK64-NEXT: jne .LBB5_2
323 ; CHECK64-NEXT: # %bb.1: # %t
324 ; CHECK64-NEXT: movl $42, %eax
326 ; CHECK64-NEXT: .LBB5_2: # %f
327 ; CHECK64-NEXT: xorl %eax, %eax
330 %cx0 = cmpxchg ptr %foo0, i64 %bar0, i64 %baz0 seq_cst seq_cst
331 %p0 = extractvalue { i64, i1 } %cx0, 1
332 %cx1 = cmpxchg ptr %foo1, i64 %bar1, i64 %baz1 seq_cst seq_cst
333 %p1 = extractvalue { i64, i1 } %cx1, 1
334 %flag = and i1 %p0, %p1
335 br i1 %flag, label %t, label %f
344 define i1 @asm_clobbering_flags(ptr %mem) nounwind {
345 ; CHECK32-LABEL: asm_clobbering_flags:
346 ; CHECK32: # %bb.0: # %entry
347 ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx
348 ; CHECK32-NEXT: movl (%ecx), %edx
349 ; CHECK32-NEXT: testl %edx, %edx
350 ; CHECK32-NEXT: setg %al
352 ; CHECK32-NEXT: bsfl %edx, %edx
353 ; CHECK32-NEXT: #NO_APP
354 ; CHECK32-NEXT: movl %edx, (%ecx)
357 ; CHECK64-LABEL: asm_clobbering_flags:
358 ; CHECK64: # %bb.0: # %entry
359 ; CHECK64-NEXT: movl (%rdi), %ecx
360 ; CHECK64-NEXT: testl %ecx, %ecx
361 ; CHECK64-NEXT: setg %al
363 ; CHECK64-NEXT: bsfl %ecx, %ecx
364 ; CHECK64-NEXT: #NO_APP
365 ; CHECK64-NEXT: movl %ecx, (%rdi)
368 %val = load i32, ptr %mem, align 4
369 %cmp = icmp sgt i32 %val, 0
370 %res = tail call i32 asm "bsfl $1,$0", "=r,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i32 %val)
371 store i32 %res, ptr %mem, align 4