1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -o - -mtriple=i686-unknown-unknown %s | FileCheck %s --check-prefix=X32
3 ; RUN: llc -o - -mtriple=x86_64-unknown-unknown %s | FileCheck %s --check-prefix=X64
5 ; Test patterns that require preserving and restoring flags.
7 @b = common dso_local global i8 0, align 1
8 @c = common dso_local global i32 0, align 4
9 @a = common dso_local global i8 0, align 1
10 @d = common dso_local global i8 0, align 1
11 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
13 declare dso_local void @external(i32)
15 ; A test that re-uses flags in interesting ways due to volatile accesses.
16 ; Specifically, the first increment's flags are reused for the branch despite
17 ; being clobbered by the second increment.
18 define dso_local i32 @test1() nounwind {
20 ; X32: # %bb.0: # %entry
21 ; X32-NEXT: movzbl b, %ecx
22 ; X32-NEXT: movl %ecx, %eax
24 ; X32-NEXT: movb %al, b
27 ; X32-NEXT: movb a, %ah
28 ; X32-NEXT: movb %ah, %ch
30 ; X32-NEXT: cmpb %cl, %ah
32 ; X32-NEXT: movb %ch, a
33 ; X32-NEXT: testb %dl, %dl
34 ; X32-NEXT: jne .LBB0_2
35 ; X32-NEXT: # %bb.1: # %if.then
36 ; X32-NEXT: movsbl %al, %eax
37 ; X32-NEXT: pushl %eax
38 ; X32-NEXT: calll external
39 ; X32-NEXT: addl $4, %esp
40 ; X32-NEXT: .LBB0_2: # %if.end
41 ; X32-NEXT: xorl %eax, %eax
45 ; X64: # %bb.0: # %entry
46 ; X64-NEXT: movzbl b(%rip), %ecx
47 ; X64-NEXT: leal 1(%rcx), %eax
48 ; X64-NEXT: movb %al, b(%rip)
49 ; X64-NEXT: incl c(%rip)
51 ; X64-NEXT: movzbl a(%rip), %esi
52 ; X64-NEXT: leal 1(%rsi), %edi
53 ; X64-NEXT: cmpb %cl, %sil
54 ; X64-NEXT: sete d(%rip)
55 ; X64-NEXT: movb %dil, a(%rip)
56 ; X64-NEXT: testb %dl, %dl
57 ; X64-NEXT: jne .LBB0_2
58 ; X64-NEXT: # %bb.1: # %if.then
59 ; X64-NEXT: pushq %rax
60 ; X64-NEXT: movsbl %al, %edi
61 ; X64-NEXT: callq external
62 ; X64-NEXT: addq $8, %rsp
63 ; X64-NEXT: .LBB0_2: # %if.end
64 ; X64-NEXT: xorl %eax, %eax
67 %bval = load i8, ptr @b
68 %inc = add i8 %bval, 1
69 store volatile i8 %inc, ptr @b
70 %cval = load volatile i32, ptr @c
71 %inc1 = add nsw i32 %cval, 1
72 store volatile i32 %inc1, ptr @c
73 %aval = load volatile i8, ptr @a
74 %inc2 = add i8 %aval, 1
75 store volatile i8 %inc2, ptr @a
76 %cmp = icmp eq i8 %aval, %bval
77 %conv5 = zext i1 %cmp to i8
78 store i8 %conv5, ptr @d
79 %tobool = icmp eq i32 %inc1, 0
80 br i1 %tobool, label %if.end, label %if.then
83 %conv6 = sext i8 %inc to i32
84 call void @external(i32 %conv6)
91 ; Preserve increment flags across a call.
92 define dso_local i32 @test2(ptr %ptr) nounwind {
94 ; X32: # %bb.0: # %entry
95 ; X32-NEXT: pushl %ebx
96 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
97 ; X32-NEXT: incl (%eax)
100 ; X32-NEXT: calll external
101 ; X32-NEXT: addl $4, %esp
102 ; X32-NEXT: testb %bl, %bl
103 ; X32-NEXT: jne .LBB1_2
104 ; X32-NEXT: # %bb.1: # %then
105 ; X32-NEXT: movl $64, %eax
106 ; X32-NEXT: popl %ebx
108 ; X32-NEXT: .LBB1_2: # %else
109 ; X32-NEXT: xorl %eax, %eax
110 ; X32-NEXT: popl %ebx
114 ; X64: # %bb.0: # %entry
115 ; X64-NEXT: pushq %rbx
116 ; X64-NEXT: incl (%rdi)
117 ; X64-NEXT: setne %bl
118 ; X64-NEXT: movl $42, %edi
119 ; X64-NEXT: callq external
120 ; X64-NEXT: testb %bl, %bl
121 ; X64-NEXT: jne .LBB1_2
122 ; X64-NEXT: # %bb.1: # %then
123 ; X64-NEXT: movl $64, %eax
124 ; X64-NEXT: popq %rbx
126 ; X64-NEXT: .LBB1_2: # %else
127 ; X64-NEXT: xorl %eax, %eax
128 ; X64-NEXT: popq %rbx
131 %val = load i32, ptr %ptr
132 %inc = add i32 %val, 1
133 store i32 %inc, ptr %ptr
134 %cmp = icmp eq i32 %inc, 0
135 call void @external(i32 42)
136 br i1 %cmp, label %then, label %else
145 declare dso_local void @external_a()
146 declare dso_local void @external_b()
148 ; This lowers to a conditional tail call instead of a conditional branch. This
149 ; is tricky because we can only do this from a leaf function, and so we have to
150 ; use volatile stores similar to test1 to force the save and restore of
151 ; a condition without calling another function. We then set up subsequent calls
153 define dso_local void @test_tail_call(ptr %ptr) nounwind optsize {
154 ; X32-LABEL: test_tail_call:
155 ; X32: # %bb.0: # %entry
156 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
157 ; X32-NEXT: incl (%eax)
158 ; X32-NEXT: setne %al
161 ; X32-NEXT: testb %al, %al
162 ; X32-NEXT: jne external_b # TAILCALL
163 ; X32-NEXT: # %bb.1: # %then
164 ; X32-NEXT: jmp external_a # TAILCALL
166 ; X64-LABEL: test_tail_call:
167 ; X64: # %bb.0: # %entry
168 ; X64-NEXT: incl (%rdi)
169 ; X64-NEXT: setne %al
170 ; X64-NEXT: incb a(%rip)
171 ; X64-NEXT: sete d(%rip)
172 ; X64-NEXT: testb %al, %al
173 ; X64-NEXT: jne external_b # TAILCALL
174 ; X64-NEXT: # %bb.1: # %then
175 ; X64-NEXT: jmp external_a # TAILCALL
177 %val = load i32, ptr %ptr
178 %inc = add i32 %val, 1
179 store i32 %inc, ptr %ptr
180 %cmp = icmp eq i32 %inc, 0
181 %aval = load volatile i8, ptr @a
182 %inc2 = add i8 %aval, 1
183 store volatile i8 %inc2, ptr @a
184 %cmp2 = icmp eq i8 %inc2, 0
185 %conv5 = zext i1 %cmp2 to i8
186 store i8 %conv5, ptr @d
187 br i1 %cmp, label %then, label %else
190 tail call void @external_a()
194 tail call void @external_b()
198 ; Test a function that gets special select lowering into CFG with copied EFLAGS
199 ; threaded across the CFG. This requires our EFLAGS copy rewriting to handle
200 ; cross-block rewrites in at least some narrow cases.
201 define dso_local void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, ptr %ptr1, ptr %ptr2, i32 %x) nounwind {
202 ; X32-LABEL: PR37100:
203 ; X32: # %bb.0: # %bb
204 ; X32-NEXT: pushl %ebp
205 ; X32-NEXT: pushl %ebx
206 ; X32-NEXT: pushl %edi
207 ; X32-NEXT: pushl %esi
208 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
209 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
210 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
211 ; X32-NEXT: movb {{[0-9]+}}(%esp), %ch
212 ; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
213 ; X32-NEXT: jmp .LBB3_1
214 ; X32-NEXT: .p2align 4
215 ; X32-NEXT: .LBB3_5: # %bb1
216 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
217 ; X32-NEXT: movl %esi, %eax
219 ; X32-NEXT: idivl %edi
220 ; X32-NEXT: .LBB3_1: # %bb1
221 ; X32-NEXT: # =>This Inner Loop Header: Depth=1
222 ; X32-NEXT: movsbl %cl, %eax
223 ; X32-NEXT: movl %eax, %edx
224 ; X32-NEXT: sarl $31, %edx
225 ; X32-NEXT: cmpl %eax, {{[0-9]+}}(%esp)
226 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
227 ; X32-NEXT: sbbl %edx, %eax
230 ; X32-NEXT: movzbl %dl, %edi
231 ; X32-NEXT: negl %edi
232 ; X32-NEXT: testb %al, %al
233 ; X32-NEXT: jne .LBB3_3
234 ; X32-NEXT: # %bb.2: # %bb1
235 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
236 ; X32-NEXT: movb %ch, %cl
237 ; X32-NEXT: .LBB3_3: # %bb1
238 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
239 ; X32-NEXT: movb %cl, (%ebp)
240 ; X32-NEXT: movl (%ebx), %edx
241 ; X32-NEXT: testb %al, %al
242 ; X32-NEXT: jne .LBB3_5
243 ; X32-NEXT: # %bb.4: # %bb1
244 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
245 ; X32-NEXT: movl %edx, %edi
246 ; X32-NEXT: jmp .LBB3_5
248 ; X64-LABEL: PR37100:
249 ; X64: # %bb.0: # %bb
250 ; X64-NEXT: movq %rdx, %r10
251 ; X64-NEXT: movl {{[0-9]+}}(%rsp), %esi
252 ; X64-NEXT: movzbl %cl, %ecx
253 ; X64-NEXT: .p2align 4
254 ; X64-NEXT: .LBB3_1: # %bb1
255 ; X64-NEXT: # =>This Inner Loop Header: Depth=1
256 ; X64-NEXT: movsbq %dil, %rax
257 ; X64-NEXT: xorl %r11d, %r11d
258 ; X64-NEXT: cmpq %rax, %r10
259 ; X64-NEXT: setl %r11b
260 ; X64-NEXT: negl %r11d
261 ; X64-NEXT: cmpq %rax, %r10
262 ; X64-NEXT: movzbl %al, %edi
263 ; X64-NEXT: cmovgel %ecx, %edi
264 ; X64-NEXT: movb %dil, (%r8)
265 ; X64-NEXT: cmovgel (%r9), %r11d
266 ; X64-NEXT: movl %esi, %eax
268 ; X64-NEXT: idivl %r11d
269 ; X64-NEXT: jmp .LBB3_1
274 %tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ]
275 %tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ]
276 %tmp3 = icmp sgt i16 %tmp2, 7
277 %tmp4 = select i1 %tmp3, i16 %tmp2, i16 7
278 %tmp5 = sext i8 %tmp to i64
279 %tmp6 = icmp slt i64 %arg3, %tmp5
280 %tmp7 = sext i1 %tmp6 to i32
281 %tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4
282 store volatile i8 %tmp8, ptr %ptr1
283 %tmp9 = load volatile i32, ptr %ptr2
284 %tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9
285 %tmp11 = srem i32 %x, %tmp10
286 %tmp12 = trunc i32 %tmp11 to i16
290 ; Use a particular instruction pattern in order to lower to the post-RA pseudo
291 ; used to lower SETB into an SBB pattern in order to make sure that kind of
292 ; usage of a copied EFLAGS continues to work.
293 define dso_local void @PR37431(ptr %arg1, ptr %arg2, ptr %arg3, i32 %arg4, i64 %arg5) nounwind {
294 ; X32-LABEL: PR37431:
295 ; X32: # %bb.0: # %entry
296 ; X32-NEXT: pushl %ebp
297 ; X32-NEXT: pushl %ebx
298 ; X32-NEXT: pushl %edi
299 ; X32-NEXT: pushl %esi
300 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
301 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
302 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
303 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
304 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
305 ; X32-NEXT: movl (%edi), %edi
306 ; X32-NEXT: movl %edi, %ebp
307 ; X32-NEXT: sarl $31, %ebp
308 ; X32-NEXT: xorl %ebx, %ebx
309 ; X32-NEXT: cmpl %edi, {{[0-9]+}}(%esp)
310 ; X32-NEXT: sbbl %ebp, %esi
311 ; X32-NEXT: sbbl %ebx, %ebx
312 ; X32-NEXT: movb %bl, (%edx)
314 ; X32-NEXT: idivl %ebx
315 ; X32-NEXT: movb %dl, (%ecx)
316 ; X32-NEXT: popl %esi
317 ; X32-NEXT: popl %edi
318 ; X32-NEXT: popl %ebx
319 ; X32-NEXT: popl %ebp
322 ; X64-LABEL: PR37431:
323 ; X64: # %bb.0: # %entry
324 ; X64-NEXT: movl %ecx, %eax
325 ; X64-NEXT: movq %rdx, %rcx
326 ; X64-NEXT: movslq (%rdi), %rdx
327 ; X64-NEXT: xorl %edi, %edi
328 ; X64-NEXT: cmpq %rdx, %r8
329 ; X64-NEXT: sbbl %edi, %edi
330 ; X64-NEXT: movb %dil, (%rsi)
332 ; X64-NEXT: idivl %edi
333 ; X64-NEXT: movb %dl, (%rcx)
336 %tmp = load i32, ptr %arg1
337 %tmp1 = sext i32 %tmp to i64
338 %tmp2 = icmp ugt i64 %tmp1, %arg5
339 %tmp3 = zext i1 %tmp2 to i8
340 %tmp4 = sub i8 0, %tmp3
341 store i8 %tmp4, ptr %arg2
342 %tmp5 = sext i8 %tmp4 to i32
343 %tmp6 = srem i32 %arg4, %tmp5
344 %tmp7 = trunc i32 %tmp6 to i8
345 store i8 %tmp7, ptr %arg3