1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -o - -mtriple=i686-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X32
3 ; RUN: llc -o - -mtriple=x86_64-unknown-unknown %s | FileCheck %s --check-prefixes=ALL,X64
5 ; Test patterns that require preserving and restoring flags.
7 @b = common global i8 0, align 1
8 @c = common global i32 0, align 4
9 @a = common global i8 0, align 1
10 @d = common global i8 0, align 1
11 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
13 declare void @external(i32)
15 ; A test that re-uses flags in interesting ways due to volatile accesses.
16 ; Specifically, the first increment's flags are reused for the branch despite
17 ; being clobbered by the second increment.
18 define i32 @test1() nounwind {
20 ; X32: # %bb.0: # %entry
21 ; X32-NEXT: movb b, %cl
22 ; X32-NEXT: movl %ecx, %eax
24 ; X32-NEXT: movb %al, b
27 ; X32-NEXT: movb a, %ah
28 ; X32-NEXT: movb %ah, %ch
30 ; X32-NEXT: cmpb %cl, %ah
32 ; X32-NEXT: movb %ch, a
33 ; X32-NEXT: testb %dl, %dl
34 ; X32-NEXT: jne .LBB0_2
35 ; X32-NEXT: # %bb.1: # %if.then
36 ; X32-NEXT: movsbl %al, %eax
37 ; X32-NEXT: pushl %eax
38 ; X32-NEXT: calll external
39 ; X32-NEXT: addl $4, %esp
40 ; X32-NEXT: .LBB0_2: # %if.end
41 ; X32-NEXT: xorl %eax, %eax
45 ; X64: # %bb.0: # %entry
46 ; X64-NEXT: movb {{.*}}(%rip), %dil
47 ; X64-NEXT: movl %edi, %eax
49 ; X64-NEXT: movb %al, {{.*}}(%rip)
50 ; X64-NEXT: incl {{.*}}(%rip)
52 ; X64-NEXT: movb {{.*}}(%rip), %cl
53 ; X64-NEXT: movl %ecx, %edx
55 ; X64-NEXT: cmpb %dil, %cl
56 ; X64-NEXT: sete {{.*}}(%rip)
57 ; X64-NEXT: movb %dl, {{.*}}(%rip)
58 ; X64-NEXT: testb %sil, %sil
59 ; X64-NEXT: jne .LBB0_2
60 ; X64-NEXT: # %bb.1: # %if.then
61 ; X64-NEXT: pushq %rax
62 ; X64-NEXT: movsbl %al, %edi
63 ; X64-NEXT: callq external
64 ; X64-NEXT: addq $8, %rsp
65 ; X64-NEXT: .LBB0_2: # %if.end
66 ; X64-NEXT: xorl %eax, %eax
69 %bval = load i8, i8* @b
70 %inc = add i8 %bval, 1
71 store volatile i8 %inc, i8* @b
72 %cval = load volatile i32, i32* @c
73 %inc1 = add nsw i32 %cval, 1
74 store volatile i32 %inc1, i32* @c
75 %aval = load volatile i8, i8* @a
76 %inc2 = add i8 %aval, 1
77 store volatile i8 %inc2, i8* @a
78 %cmp = icmp eq i8 %aval, %bval
79 %conv5 = zext i1 %cmp to i8
80 store i8 %conv5, i8* @d
81 %tobool = icmp eq i32 %inc1, 0
82 br i1 %tobool, label %if.end, label %if.then
85 %conv6 = sext i8 %inc to i32
86 call void @external(i32 %conv6)
93 ; Preserve increment flags across a call.
94 define i32 @test2(i32* %ptr) nounwind {
96 ; X32: # %bb.0: # %entry
97 ; X32-NEXT: pushl %ebx
98 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
99 ; X32-NEXT: incl (%eax)
100 ; X32-NEXT: setne %bl
101 ; X32-NEXT: pushl $42
102 ; X32-NEXT: calll external
103 ; X32-NEXT: addl $4, %esp
104 ; X32-NEXT: testb %bl, %bl
105 ; X32-NEXT: jne .LBB1_2
106 ; X32-NEXT: # %bb.1: # %then
107 ; X32-NEXT: movl $64, %eax
108 ; X32-NEXT: popl %ebx
110 ; X32-NEXT: .LBB1_2: # %else
111 ; X32-NEXT: xorl %eax, %eax
112 ; X32-NEXT: popl %ebx
116 ; X64: # %bb.0: # %entry
117 ; X64-NEXT: pushq %rbx
118 ; X64-NEXT: incl (%rdi)
119 ; X64-NEXT: setne %bl
120 ; X64-NEXT: movl $42, %edi
121 ; X64-NEXT: callq external
122 ; X64-NEXT: testb %bl, %bl
123 ; X64-NEXT: jne .LBB1_2
124 ; X64-NEXT: # %bb.1: # %then
125 ; X64-NEXT: movl $64, %eax
126 ; X64-NEXT: popq %rbx
128 ; X64-NEXT: .LBB1_2: # %else
129 ; X64-NEXT: xorl %eax, %eax
130 ; X64-NEXT: popq %rbx
133 %val = load i32, i32* %ptr
134 %inc = add i32 %val, 1
135 store i32 %inc, i32* %ptr
136 %cmp = icmp eq i32 %inc, 0
137 call void @external(i32 42)
138 br i1 %cmp, label %then, label %else
147 declare void @external_a()
148 declare void @external_b()
150 ; This lowers to a conditional tail call instead of a conditional branch. This
151 ; is tricky because we can only do this from a leaf function, and so we have to
152 ; use volatile stores similar to test1 to force the save and restore of
153 ; a condition without calling another function. We then set up subsequent calls
155 define void @test_tail_call(i32* %ptr) nounwind optsize {
156 ; X32-LABEL: test_tail_call:
157 ; X32: # %bb.0: # %entry
158 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
159 ; X32-NEXT: incl (%eax)
160 ; X32-NEXT: setne %al
163 ; X32-NEXT: testb %al, %al
164 ; X32-NEXT: jne external_b # TAILCALL
165 ; X32-NEXT: # %bb.1: # %then
166 ; X32-NEXT: jmp external_a # TAILCALL
168 ; X64-LABEL: test_tail_call:
169 ; X64: # %bb.0: # %entry
170 ; X64-NEXT: incl (%rdi)
171 ; X64-NEXT: setne %al
172 ; X64-NEXT: incb {{.*}}(%rip)
173 ; X64-NEXT: sete {{.*}}(%rip)
174 ; X64-NEXT: testb %al, %al
175 ; X64-NEXT: jne external_b # TAILCALL
176 ; X64-NEXT: # %bb.1: # %then
177 ; X64-NEXT: jmp external_a # TAILCALL
179 %val = load i32, i32* %ptr
180 %inc = add i32 %val, 1
181 store i32 %inc, i32* %ptr
182 %cmp = icmp eq i32 %inc, 0
183 %aval = load volatile i8, i8* @a
184 %inc2 = add i8 %aval, 1
185 store volatile i8 %inc2, i8* @a
186 %cmp2 = icmp eq i8 %inc2, 0
187 %conv5 = zext i1 %cmp2 to i8
188 store i8 %conv5, i8* @d
189 br i1 %cmp, label %then, label %else
192 tail call void @external_a()
196 tail call void @external_b()
200 ; Test a function that gets special select lowering into CFG with copied EFLAGS
201 ; threaded across the CFG. This requires our EFLAGS copy rewriting to handle
202 ; cross-block rewrites in at least some narrow cases.
203 define void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2, i32 %x) nounwind {
204 ; X32-LABEL: PR37100:
205 ; X32: # %bb.0: # %bb
206 ; X32-NEXT: pushl %ebp
207 ; X32-NEXT: pushl %ebx
208 ; X32-NEXT: pushl %edi
209 ; X32-NEXT: pushl %esi
210 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
211 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
212 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp
213 ; X32-NEXT: movb {{[0-9]+}}(%esp), %ch
214 ; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
215 ; X32-NEXT: jmp .LBB3_1
216 ; X32-NEXT: .p2align 4, 0x90
217 ; X32-NEXT: .LBB3_5: # %bb1
218 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
219 ; X32-NEXT: movl %esi, %eax
221 ; X32-NEXT: idivl %edi
222 ; X32-NEXT: .LBB3_1: # %bb1
223 ; X32-NEXT: # =>This Inner Loop Header: Depth=1
224 ; X32-NEXT: movsbl %cl, %eax
225 ; X32-NEXT: movl %eax, %edx
226 ; X32-NEXT: sarl $31, %edx
227 ; X32-NEXT: cmpl %eax, {{[0-9]+}}(%esp)
228 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
229 ; X32-NEXT: sbbl %edx, %eax
232 ; X32-NEXT: movzbl %dl, %edi
233 ; X32-NEXT: negl %edi
234 ; X32-NEXT: testb %al, %al
235 ; X32-NEXT: jne .LBB3_3
236 ; X32-NEXT: # %bb.2: # %bb1
237 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
238 ; X32-NEXT: movb %ch, %cl
239 ; X32-NEXT: .LBB3_3: # %bb1
240 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
241 ; X32-NEXT: movb %cl, (%ebp)
242 ; X32-NEXT: movl (%ebx), %edx
243 ; X32-NEXT: testb %al, %al
244 ; X32-NEXT: jne .LBB3_5
245 ; X32-NEXT: # %bb.4: # %bb1
246 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1
247 ; X32-NEXT: movl %edx, %edi
248 ; X32-NEXT: jmp .LBB3_5
250 ; X64-LABEL: PR37100:
251 ; X64: # %bb.0: # %bb
252 ; X64-NEXT: movq %rdx, %r11
253 ; X64-NEXT: movl {{[0-9]+}}(%rsp), %r10d
254 ; X64-NEXT: jmp .LBB3_1
255 ; X64-NEXT: .p2align 4, 0x90
256 ; X64-NEXT: .LBB3_5: # %bb1
257 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
258 ; X64-NEXT: movl %r10d, %eax
260 ; X64-NEXT: idivl %esi
261 ; X64-NEXT: .LBB3_1: # %bb1
262 ; X64-NEXT: # =>This Inner Loop Header: Depth=1
263 ; X64-NEXT: movsbq %dil, %rax
264 ; X64-NEXT: xorl %esi, %esi
265 ; X64-NEXT: cmpq %rax, %r11
266 ; X64-NEXT: setl %sil
267 ; X64-NEXT: negl %esi
268 ; X64-NEXT: cmpq %rax, %r11
269 ; X64-NEXT: jl .LBB3_3
270 ; X64-NEXT: # %bb.2: # %bb1
271 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
272 ; X64-NEXT: movl %ecx, %edi
273 ; X64-NEXT: .LBB3_3: # %bb1
274 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
275 ; X64-NEXT: movb %dil, (%r8)
276 ; X64-NEXT: jl .LBB3_5
277 ; X64-NEXT: # %bb.4: # %bb1
278 ; X64-NEXT: # in Loop: Header=BB3_1 Depth=1
279 ; X64-NEXT: movl (%r9), %esi
280 ; X64-NEXT: jmp .LBB3_5
285 %tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ]
286 %tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ]
287 %tmp3 = icmp sgt i16 %tmp2, 7
288 %tmp4 = select i1 %tmp3, i16 %tmp2, i16 7
289 %tmp5 = sext i8 %tmp to i64
290 %tmp6 = icmp slt i64 %arg3, %tmp5
291 %tmp7 = sext i1 %tmp6 to i32
292 %tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4
293 store volatile i8 %tmp8, i8* %ptr1
294 %tmp9 = load volatile i32, i32* %ptr2
295 %tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9
296 %tmp11 = srem i32 %x, %tmp10
297 %tmp12 = trunc i32 %tmp11 to i16
301 ; Use a particular instruction pattern in order to lower to the post-RA pseudo
302 ; used to lower SETB into an SBB pattern in order to make sure that kind of
303 ; usage of a copied EFLAGS continues to work.
304 define void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %x) nounwind {
305 ; X32-LABEL: PR37431:
306 ; X32: # %bb.0: # %entry
307 ; X32-NEXT: pushl %edi
308 ; X32-NEXT: pushl %esi
309 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
310 ; X32-NEXT: movl (%eax), %eax
311 ; X32-NEXT: movl %eax, %ecx
312 ; X32-NEXT: sarl $31, %ecx
313 ; X32-NEXT: cmpl %eax, %eax
314 ; X32-NEXT: sbbl %ecx, %eax
316 ; X32-NEXT: sbbb %dl, %dl
317 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
318 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
319 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
320 ; X32-NEXT: movb %dl, (%edi)
321 ; X32-NEXT: movzbl %cl, %ecx
322 ; X32-NEXT: xorl %edi, %edi
323 ; X32-NEXT: subl %ecx, %edi
325 ; X32-NEXT: idivl %edi
326 ; X32-NEXT: movb %dl, (%esi)
327 ; X32-NEXT: popl %esi
328 ; X32-NEXT: popl %edi
331 ; X64-LABEL: PR37431:
332 ; X64: # %bb.0: # %entry
333 ; X64-NEXT: movl %ecx, %eax
334 ; X64-NEXT: movq %rdx, %r8
335 ; X64-NEXT: movslq (%rdi), %rdx
336 ; X64-NEXT: cmpq %rdx, %rax
337 ; X64-NEXT: sbbb %cl, %cl
338 ; X64-NEXT: cmpq %rdx, %rax
339 ; X64-NEXT: movb %cl, (%rsi)
340 ; X64-NEXT: sbbl %ecx, %ecx
342 ; X64-NEXT: idivl %ecx
343 ; X64-NEXT: movb %dl, (%r8)
346 %tmp = load i32, i32* %arg1
347 %tmp1 = sext i32 %tmp to i64
348 %tmp2 = icmp ugt i64 %tmp1, undef
349 %tmp3 = zext i1 %tmp2 to i8
350 %tmp4 = sub i8 0, %tmp3
351 store i8 %tmp4, i8* %arg2
352 %tmp5 = sext i8 %tmp4 to i32
353 %tmp6 = srem i32 %x, %tmp5
354 %tmp7 = trunc i32 %tmp6 to i8
355 store i8 %tmp7, i8* %arg3