1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -post-RA-scheduler=true | FileCheck %s
14 ; BranchFolding should tail-merge the stores since they all precede
15 ; direct branches to the same place.
17 define void @tail_merge_me() nounwind {
18 ; CHECK-LABEL: tail_merge_me:
19 ; CHECK: # %bb.0: # %entry
20 ; CHECK-NEXT: pushq %rax
21 ; CHECK-NEXT: callq qux
22 ; CHECK-NEXT: testb $1, %al
23 ; CHECK-NEXT: je .LBB0_1
24 ; CHECK-NEXT: # %bb.6: # %A
25 ; CHECK-NEXT: xorl %edi, %edi
26 ; CHECK-NEXT: callq bar
27 ; CHECK-NEXT: jmp .LBB0_4
28 ; CHECK-NEXT: .LBB0_1: # %next
29 ; CHECK-NEXT: callq qux
30 ; CHECK-NEXT: testb $1, %al
31 ; CHECK-NEXT: je .LBB0_3
32 ; CHECK-NEXT: # %bb.2: # %B
33 ; CHECK-NEXT: movl $1, %edi
34 ; CHECK-NEXT: callq car
35 ; CHECK-NEXT: jmp .LBB0_4
36 ; CHECK-NEXT: .LBB0_3: # %C
37 ; CHECK-NEXT: movl $2, %edi
38 ; CHECK-NEXT: callq dar
39 ; CHECK-NEXT: .LBB0_4: # %M
40 ; CHECK-NEXT: movl $0, {{.*}}(%rip)
41 ; CHECK-NEXT: movl $1, {{.*}}(%rip)
42 ; CHECK-NEXT: callq qux
43 ; CHECK-NEXT: testb $1, %al
44 ; CHECK-NEXT: je .LBB0_5
45 ; CHECK-NEXT: # %bb.7: # %return
46 ; CHECK-NEXT: movl $1000, %edi # imm = 0x3E8
47 ; CHECK-NEXT: callq ear
48 ; CHECK-NEXT: popq %rax
50 ; CHECK-NEXT: .LBB0_5: # %altret
51 ; CHECK-NEXT: movl $1001, %edi # imm = 0x3E9
52 ; CHECK-NEXT: callq far
53 ; CHECK-NEXT: popq %rax
57 br i1 %a, label %A, label %next
60 br i1 %b, label %B, label %C
64 store i32 0, i32* @GHJK
69 store i32 0, i32* @GHJK
74 store i32 0, i32* @GHJK
78 store i32 1, i32* @HABC
80 br i1 %c, label %return, label %altret
83 call void @ear(i32 1000)
86 call void @far(i32 1001)
90 declare i8* @choose(i8*, i8*)
92 ; BranchFolding should tail-duplicate the indirect jump to avoid
93 ; redundant branching.
95 define void @tail_duplicate_me() nounwind {
96 ; CHECK-LABEL: tail_duplicate_me:
97 ; CHECK: # %bb.0: # %entry
98 ; CHECK-NEXT: pushq %r14
99 ; CHECK-NEXT: pushq %rbx
100 ; CHECK-NEXT: pushq %rax
101 ; CHECK-NEXT: callq qux
102 ; CHECK-NEXT: movl $.Ltmp0, %edi
103 ; CHECK-NEXT: movl $.Ltmp1, %esi
104 ; CHECK-NEXT: movl %eax, %ebx
105 ; CHECK-NEXT: callq choose
106 ; CHECK-NEXT: movq %rax, %r14
107 ; CHECK-NEXT: testb $1, %bl
108 ; CHECK-NEXT: je .LBB1_1
109 ; CHECK-NEXT: # %bb.7: # %A
110 ; CHECK-NEXT: xorl %edi, %edi
111 ; CHECK-NEXT: callq bar
112 ; CHECK-NEXT: movl $0, {{.*}}(%rip)
113 ; CHECK-NEXT: jmpq *%r14
114 ; CHECK-NEXT: .Ltmp0: # Block address taken
115 ; CHECK-NEXT: .LBB1_4: # %return
116 ; CHECK-NEXT: movl $1000, %edi # imm = 0x3E8
117 ; CHECK-NEXT: callq ear
118 ; CHECK-NEXT: jmp .LBB1_5
119 ; CHECK-NEXT: .LBB1_1: # %next
120 ; CHECK-NEXT: callq qux
121 ; CHECK-NEXT: testb $1, %al
122 ; CHECK-NEXT: je .LBB1_3
123 ; CHECK-NEXT: # %bb.2: # %B
124 ; CHECK-NEXT: movl $1, %edi
125 ; CHECK-NEXT: callq car
126 ; CHECK-NEXT: movl $0, {{.*}}(%rip)
127 ; CHECK-NEXT: jmpq *%r14
128 ; CHECK-NEXT: .Ltmp1: # Block address taken
129 ; CHECK-NEXT: .LBB1_6: # %altret
130 ; CHECK-NEXT: movl $1001, %edi # imm = 0x3E9
131 ; CHECK-NEXT: callq far
132 ; CHECK-NEXT: .LBB1_5: # %return
133 ; CHECK-NEXT: addq $8, %rsp
134 ; CHECK-NEXT: popq %rbx
135 ; CHECK-NEXT: popq %r14
137 ; CHECK-NEXT: .LBB1_3: # %C
138 ; CHECK-NEXT: movl $2, %edi
139 ; CHECK-NEXT: callq dar
140 ; CHECK-NEXT: movl $0, {{.*}}(%rip)
141 ; CHECK-NEXT: jmpq *%r14
144 %c = call i8* @choose(i8* blockaddress(@tail_duplicate_me, %return),
145 i8* blockaddress(@tail_duplicate_me, %altret))
146 br i1 %a, label %A, label %next
149 br i1 %b, label %B, label %C
152 call void @bar(i32 0)
153 store i32 0, i32* @GHJK
157 call void @car(i32 1)
158 store i32 0, i32* @GHJK
162 call void @dar(i32 2)
163 store i32 0, i32* @GHJK
167 indirectbr i8* %c, [label %return, label %altret]
170 call void @ear(i32 1000)
173 call void @far(i32 1001)
177 ; BranchFolding shouldn't try to merge the tails of two blocks
178 ; with only a branch in common, regardless of the fallthrough situation.
180 define i1 @dont_merge_oddly(float* %result) nounwind {
181 ; CHECK-LABEL: dont_merge_oddly:
182 ; CHECK: # %bb.0: # %entry
183 ; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
184 ; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
185 ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
186 ; CHECK-NEXT: ucomiss %xmm1, %xmm2
187 ; CHECK-NEXT: jbe .LBB2_3
188 ; CHECK-NEXT: # %bb.1: # %bb
189 ; CHECK-NEXT: ucomiss %xmm0, %xmm1
190 ; CHECK-NEXT: ja .LBB2_4
191 ; CHECK-NEXT: .LBB2_2: # %bb30
192 ; CHECK-NEXT: movb $1, %al
194 ; CHECK-NEXT: .LBB2_3: # %bb21
195 ; CHECK-NEXT: ucomiss %xmm0, %xmm2
196 ; CHECK-NEXT: jbe .LBB2_2
197 ; CHECK-NEXT: .LBB2_4: # %bb26
198 ; CHECK-NEXT: xorl %eax, %eax
201 %tmp4 = getelementptr float, float* %result, i32 2
202 %tmp5 = load float, float* %tmp4, align 4
203 %tmp7 = getelementptr float, float* %result, i32 4
204 %tmp8 = load float, float* %tmp7, align 4
205 %tmp10 = getelementptr float, float* %result, i32 6
206 %tmp11 = load float, float* %tmp10, align 4
207 %tmp12 = fcmp olt float %tmp8, %tmp11
208 br i1 %tmp12, label %bb, label %bb21
211 %tmp23469 = fcmp olt float %tmp5, %tmp8
212 br i1 %tmp23469, label %bb26, label %bb30
215 %tmp23 = fcmp olt float %tmp5, %tmp11
216 br i1 %tmp23, label %bb26, label %bb30
225 ; Do any-size tail-merging when two candidate blocks will both require
226 ; an unconditional jump to complete a two-way conditional branch.
228 ; This test only works when register allocation happens to use %rax for both
231 %0 = type { %struct.rtx_def* }
232 %struct.lang_decl = type opaque
233 %struct.rtx_def = type { i16, i8, i8, [1 x %union.rtunion] }
234 %struct.tree_decl = type { [24 x i8], i8*, i32, %union.tree_node*, i32, i8, i8, i8, i8, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %union..2anon, %0, %union.tree_node*, %struct.lang_decl* }
235 %union..2anon = type { i32 }
236 %union.rtunion = type { i8* }
237 %union.tree_node = type { %struct.tree_decl }
239 define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind {
240 ; CHECK-LABEL: c_expand_expr_stmt:
241 ; CHECK: # %bb.0: # %entry
242 ; CHECK-NEXT: pushq %rbx
243 ; CHECK-NEXT: xorl %eax, %eax
244 ; CHECK-NEXT: testb %al, %al
245 ; CHECK-NEXT: jne .LBB3_17
246 ; CHECK-NEXT: # %bb.1: # %entry
247 ; CHECK-NEXT: movb 0, %bl
248 ; CHECK-NEXT: xorl %eax, %eax
249 ; CHECK-NEXT: testb %al, %al
250 ; CHECK-NEXT: jne .LBB3_16
251 ; CHECK-NEXT: # %bb.2: # %bb.i
252 ; CHECK-NEXT: xorl %eax, %eax
253 ; CHECK-NEXT: testb %al, %al
254 ; CHECK-NEXT: je .LBB3_16
255 ; CHECK-NEXT: # %bb.3: # %lvalue_p.exit
256 ; CHECK-NEXT: movq 0, %rax
257 ; CHECK-NEXT: movzbl (%rax), %ecx
258 ; CHECK-NEXT: testl %ecx, %ecx
259 ; CHECK-NEXT: je .LBB3_12
260 ; CHECK-NEXT: # %bb.4: # %lvalue_p.exit
261 ; CHECK-NEXT: cmpl $2, %ecx
262 ; CHECK-NEXT: jne .LBB3_5
263 ; CHECK-NEXT: # %bb.6: # %bb.i1
264 ; CHECK-NEXT: movq 32(%rax), %rax
265 ; CHECK-NEXT: movzbl 16(%rax), %ecx
266 ; CHECK-NEXT: testl %ecx, %ecx
267 ; CHECK-NEXT: je .LBB3_10
268 ; CHECK-NEXT: # %bb.7: # %bb.i1
269 ; CHECK-NEXT: cmpl $2, %ecx
270 ; CHECK-NEXT: jne .LBB3_8
271 ; CHECK-NEXT: # %bb.9: # %bb.i.i
272 ; CHECK-NEXT: xorl %edi, %edi
273 ; CHECK-NEXT: callq lvalue_p
274 ; CHECK-NEXT: testl %eax, %eax
275 ; CHECK-NEXT: setne %al
276 ; CHECK-NEXT: testb %al, %al
277 ; CHECK-NEXT: je .LBB3_15
278 ; CHECK-NEXT: jmp .LBB3_17
279 ; CHECK-NEXT: .LBB3_16: # %bb1
280 ; CHECK-NEXT: cmpb $23, %bl
281 ; CHECK-NEXT: .LBB3_17: # %bb3
282 ; CHECK-NEXT: .LBB3_12: # %bb2.i3
283 ; CHECK-NEXT: movq 8(%rax), %rax
284 ; CHECK-NEXT: movb 16(%rax), %cl
285 ; CHECK-NEXT: xorl %eax, %eax
286 ; CHECK-NEXT: cmpb $23, %cl
287 ; CHECK-NEXT: je .LBB3_14
288 ; CHECK-NEXT: # %bb.13: # %bb2.i3
289 ; CHECK-NEXT: cmpb $16, %cl
290 ; CHECK-NEXT: je .LBB3_14
291 ; CHECK-NEXT: jmp .LBB3_17
292 ; CHECK-NEXT: .LBB3_5:
293 ; CHECK-NEXT: xorl %eax, %eax
294 ; CHECK-NEXT: testb %al, %al
295 ; CHECK-NEXT: je .LBB3_15
296 ; CHECK-NEXT: jmp .LBB3_17
297 ; CHECK-NEXT: .LBB3_10: # %bb2.i.i2
298 ; CHECK-NEXT: movq 8(%rax), %rax
299 ; CHECK-NEXT: movb 16(%rax), %cl
300 ; CHECK-NEXT: xorl %eax, %eax
301 ; CHECK-NEXT: cmpb $16, %cl
302 ; CHECK-NEXT: je .LBB3_14
303 ; CHECK-NEXT: # %bb.11: # %bb2.i.i2
304 ; CHECK-NEXT: cmpb $23, %cl
305 ; CHECK-NEXT: je .LBB3_14
306 ; CHECK-NEXT: jmp .LBB3_17
307 ; CHECK-NEXT: .LBB3_8:
308 ; CHECK-NEXT: xorl %eax, %eax
309 ; CHECK-NEXT: .LBB3_14: # %lvalue_p.exit4
310 ; CHECK-NEXT: testb %al, %al
311 ; CHECK-NEXT: jne .LBB3_17
312 ; CHECK-NEXT: .LBB3_15: # %lvalue_p.exit4
313 ; CHECK-NEXT: testb %bl, %bl
315 %tmp4 = load i8, i8* null, align 8 ; <i8> [#uses=3]
316 switch i8 %tmp4, label %bb3 [
321 switch i32 undef, label %bb1 [
327 switch i32 undef, label %bb1 [
328 i32 0, label %lvalue_p.exit
334 lvalue_p.exit: ; preds = %bb.i
335 %tmp21 = load %union.tree_node*, %union.tree_node** null, align 8 ; <%union.tree_node*> [#uses=3]
336 %tmp22 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 0 ; <i8*> [#uses=1]
337 %tmp23 = load i8, i8* %tmp22, align 8 ; <i8> [#uses=1]
338 %tmp24 = zext i8 %tmp23 to i32 ; <i32> [#uses=1]
339 switch i32 %tmp24, label %lvalue_p.exit4 [
344 bb.i1: ; preds = %lvalue_p.exit
345 %tmp25 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp21, i64 0, i32 0, i32 2 ; <i32*> [#uses=1]
346 %tmp26 = bitcast i32* %tmp25 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
347 %tmp27 = load %union.tree_node*, %union.tree_node** %tmp26, align 8 ; <%union.tree_node*> [#uses=2]
348 %tmp28 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
349 %tmp29 = load i8, i8* %tmp28, align 8 ; <i8> [#uses=1]
350 %tmp30 = zext i8 %tmp29 to i32 ; <i32> [#uses=1]
351 switch i32 %tmp30, label %lvalue_p.exit4 [
352 i32 0, label %bb2.i.i2
356 bb.i.i: ; preds = %bb.i1
357 %tmp34 = tail call fastcc i32 @lvalue_p(%union.tree_node* null) nounwind ; <i32> [#uses=1]
358 %phitmp = icmp ne i32 %tmp34, 0 ; <i1> [#uses=1]
359 br label %lvalue_p.exit4
361 bb2.i.i2: ; preds = %bb.i1
362 %tmp35 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
363 %tmp36 = bitcast i8* %tmp35 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
364 %tmp37 = load %union.tree_node*, %union.tree_node** %tmp36, align 8 ; <%union.tree_node*> [#uses=1]
365 %tmp38 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp37, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
366 %tmp39 = load i8, i8* %tmp38, align 8 ; <i8> [#uses=1]
367 switch i8 %tmp39, label %bb2 [
368 i8 16, label %lvalue_p.exit4
369 i8 23, label %lvalue_p.exit4
372 bb2.i3: ; preds = %lvalue_p.exit
373 %tmp40 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
374 %tmp41 = bitcast i8* %tmp40 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
375 %tmp42 = load %union.tree_node*, %union.tree_node** %tmp41, align 8 ; <%union.tree_node*> [#uses=1]
376 %tmp43 = getelementptr inbounds %union.tree_node, %union.tree_node* %tmp42, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
377 %tmp44 = load i8, i8* %tmp43, align 8 ; <i8> [#uses=1]
378 switch i8 %tmp44, label %bb2 [
379 i8 16, label %lvalue_p.exit4
380 i8 23, label %lvalue_p.exit4
383 lvalue_p.exit4: ; preds = %bb2.i3, %bb2.i3, %bb2.i.i2, %bb2.i.i2, %bb.i.i, %bb.i1, %lvalue_p.exit
384 %tmp45 = phi i1 [ %phitmp, %bb.i.i ], [ false, %bb2.i.i2 ], [ false, %bb2.i.i2 ], [ false, %bb.i1 ], [ false, %bb2.i3 ], [ false, %bb2.i3 ], [ false, %lvalue_p.exit ] ; <i1> [#uses=1]
385 %tmp46 = icmp eq i8 %tmp4, 0 ; <i1> [#uses=1]
386 %or.cond = or i1 %tmp45, %tmp46 ; <i1> [#uses=1]
387 br i1 %or.cond, label %bb2, label %bb3
389 bb1: ; preds = %bb2.i.i, %bb.i, %bb
390 %.old = icmp eq i8 %tmp4, 23 ; <i1> [#uses=1]
391 br i1 %.old, label %bb2, label %bb3
393 bb2: ; preds = %bb1, %lvalue_p.exit4, %bb2.i3, %bb2.i.i2
396 bb3: ; preds = %bb2, %bb1, %lvalue_p.exit4, %bb2.i, %entry
397 %expr_addr.0 = phi %union.tree_node* [ null, %bb2 ], [ %expr, %bb2.i ], [ %expr, %entry ], [ %expr, %bb1 ], [ %expr, %lvalue_p.exit4 ] ; <%union.tree_node*> [#uses=0]
401 declare fastcc i32 @lvalue_p(%union.tree_node* nocapture) nounwind readonly
403 declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind
406 ; If one tail merging candidate falls through into the other,
407 ; tail merging is likely profitable regardless of how few
408 ; instructions are involved. This function should have only
409 ; one ret instruction.
411 define void @foo(i1* %V) nounwind {
413 ; CHECK: # %bb.0: # %entry
414 ; CHECK-NEXT: testq %rdi, %rdi
415 ; CHECK-NEXT: je .LBB4_2
416 ; CHECK-NEXT: # %bb.1: # %bb
417 ; CHECK-NEXT: pushq %rax
418 ; CHECK-NEXT: callq func
419 ; CHECK-NEXT: popq %rax
420 ; CHECK-NEXT: .LBB4_2: # %return
423 %t0 = icmp eq i1* %V, null
424 br i1 %t0, label %return, label %bb
436 ; one - One instruction may be tail-duplicated even with optsize.
438 @XYZ = external global i32
440 declare void @tail_call_me()
442 define void @one(i32 %v) nounwind optsize {
444 ; CHECK: # %bb.0: # %entry
445 ; CHECK-NEXT: testl %edi, %edi
446 ; CHECK-NEXT: je .LBB5_3
447 ; CHECK-NEXT: # %bb.1: # %bby
448 ; CHECK-NEXT: cmpl $16, %edi
449 ; CHECK-NEXT: je .LBB5_4
450 ; CHECK-NEXT: # %bb.2: # %bb7
451 ; CHECK-NEXT: jmp tail_call_me # TAILCALL
452 ; CHECK-NEXT: .LBB5_3: # %bbx
453 ; CHECK-NEXT: cmpl $128, %edi
454 ; CHECK-NEXT: jne tail_call_me # TAILCALL
455 ; CHECK-NEXT: .LBB5_4: # %return
458 %0 = icmp eq i32 %v, 0
459 br i1 %0, label %bbx, label %bby
462 switch i32 %v, label %bb7 [
463 i32 16, label %return
467 tail call void @tail_call_me()
471 switch i32 %v, label %bb12 [
472 i32 128, label %return
476 tail call void @tail_call_me()
483 ; two - Same as one, but with two instructions in the common
484 ; tail instead of one. This is too much to be merged, given
485 ; the optsize attribute.
487 define void @two() nounwind optsize {
489 ; CHECK: # %bb.0: # %entry
490 ; CHECK-NEXT: xorl %eax, %eax
491 ; CHECK-NEXT: testb %al, %al
492 ; CHECK-NEXT: xorl %eax, %eax
493 ; CHECK-NEXT: testb %al, %al
494 ; CHECK-NEXT: je .LBB6_1
495 ; CHECK-NEXT: # %bb.2: # %return
497 ; CHECK-NEXT: .LBB6_1: # %bb7
498 ; CHECK-NEXT: movl $0, {{.*}}(%rip)
499 ; CHECK-NEXT: movl $1, {{.*}}(%rip)
501 %0 = icmp eq i32 undef, 0
502 br i1 %0, label %bbx, label %bby
505 switch i32 undef, label %bb7 [
506 i32 16, label %return
510 store volatile i32 0, i32* @XYZ
511 store volatile i32 1, i32* @XYZ
515 switch i32 undef, label %bb12 [
516 i32 128, label %return
520 store volatile i32 0, i32* @XYZ
521 store volatile i32 1, i32* @XYZ
528 ; two_minsize - Same as two, but with minsize instead of optsize.
530 define void @two_minsize() nounwind minsize {
531 ; CHECK-LABEL: two_minsize:
532 ; CHECK: # %bb.0: # %entry
533 ; CHECK-NEXT: xorl %eax, %eax
534 ; CHECK-NEXT: testb %al, %al
535 ; CHECK-NEXT: xorl %eax, %eax
536 ; CHECK-NEXT: testb %al, %al
537 ; CHECK-NEXT: je .LBB7_1
538 ; CHECK-NEXT: # %bb.2: # %return
540 ; CHECK-NEXT: .LBB7_1: # %bb7
541 ; CHECK-NEXT: movl $0, {{.*}}(%rip)
542 ; CHECK-NEXT: movl $1, {{.*}}(%rip)
544 %0 = icmp eq i32 undef, 0
545 br i1 %0, label %bbx, label %bby
548 switch i32 undef, label %bb7 [
549 i32 16, label %return
553 store volatile i32 0, i32* @XYZ
554 store volatile i32 1, i32* @XYZ
558 switch i32 undef, label %bb12 [
559 i32 128, label %return
563 store volatile i32 0, i32* @XYZ
564 store volatile i32 1, i32* @XYZ
571 ; two_nosize - Same as two, but without the optsize attribute.
572 ; Now two instructions are enough to be tail-duplicated.
574 define void @two_nosize(i32 %x, i32 %y, i32 %z) nounwind {
575 ; CHECK-LABEL: two_nosize:
576 ; CHECK: # %bb.0: # %entry
577 ; CHECK-NEXT: testl %edi, %edi
578 ; CHECK-NEXT: je .LBB8_3
579 ; CHECK-NEXT: # %bb.1: # %bby
580 ; CHECK-NEXT: testl %esi, %esi
581 ; CHECK-NEXT: je .LBB8_4
582 ; CHECK-NEXT: # %bb.2: # %bb7
583 ; CHECK-NEXT: movl $0, {{.*}}(%rip)
584 ; CHECK-NEXT: jmp tail_call_me # TAILCALL
585 ; CHECK-NEXT: .LBB8_3: # %bbx
586 ; CHECK-NEXT: cmpl $-1, %edx
587 ; CHECK-NEXT: je .LBB8_4
588 ; CHECK-NEXT: # %bb.5: # %bb12
589 ; CHECK-NEXT: movl $0, {{.*}}(%rip)
590 ; CHECK-NEXT: jmp tail_call_me # TAILCALL
591 ; CHECK-NEXT: .LBB8_4: # %return
594 %0 = icmp eq i32 %x, 0
595 br i1 %0, label %bbx, label %bby
598 switch i32 %y, label %bb7 [
603 store volatile i32 0, i32* @XYZ
604 tail call void @tail_call_me()
608 switch i32 %z, label %bb12 [
609 i32 -1, label %return
613 store volatile i32 0, i32* @XYZ
614 tail call void @tail_call_me()
621 ; Tail-merging should merge the two ret instructions since one side
622 ; can fall-through into the ret and the other side has to branch anyway.
624 define i64 @TESTE(i64 %parami, i64 %paraml) nounwind readnone {
625 ; CHECK-LABEL: TESTE:
626 ; CHECK: # %bb.0: # %entry
627 ; CHECK-NEXT: testq %rdi, %rdi
628 ; CHECK-NEXT: movl $1, %eax
629 ; CHECK-NEXT: cmovgq %rdi, %rax
630 ; CHECK-NEXT: testq %rsi, %rsi
631 ; CHECK-NEXT: jle .LBB9_2
632 ; CHECK-NEXT: # %bb.1: # %bb.nph
633 ; CHECK-NEXT: imulq %rdi, %rsi
634 ; CHECK-NEXT: movq %rsi, %rax
635 ; CHECK-NEXT: .LBB9_2: # %for.end
638 %cmp = icmp slt i64 %parami, 1 ; <i1> [#uses=1]
639 %varx.0 = select i1 %cmp, i64 1, i64 %parami ; <i64> [#uses=1]
640 %cmp410 = icmp slt i64 %paraml, 1 ; <i1> [#uses=1]
641 br i1 %cmp410, label %for.end, label %bb.nph
643 bb.nph: ; preds = %entry
644 %tmp15 = mul i64 %paraml, %parami ; <i64> [#uses=1]
647 for.end: ; preds = %entry
651 ; We should tail merge small blocks that don't end in a tail call or return
652 ; instruction. Those blocks are typically unreachable and will be placed
653 ; out-of-line after the main return, so we should try to eliminate as many of
656 declare void @abort()
657 define void @merge_aborts() {
658 ; CHECK-LABEL: merge_aborts:
659 ; CHECK: # %bb.0: # %entry
660 ; CHECK-NEXT: pushq %rax
661 ; CHECK-NEXT: .cfi_def_cfa_offset 16
662 ; CHECK-NEXT: callq qux
663 ; CHECK-NEXT: testb $1, %al
664 ; CHECK-NEXT: je .LBB10_5
665 ; CHECK-NEXT: # %bb.1: # %cont1
666 ; CHECK-NEXT: callq qux
667 ; CHECK-NEXT: testb $1, %al
668 ; CHECK-NEXT: je .LBB10_5
669 ; CHECK-NEXT: # %bb.2: # %cont2
670 ; CHECK-NEXT: callq qux
671 ; CHECK-NEXT: testb $1, %al
672 ; CHECK-NEXT: je .LBB10_5
673 ; CHECK-NEXT: # %bb.3: # %cont3
674 ; CHECK-NEXT: callq qux
675 ; CHECK-NEXT: testb $1, %al
676 ; CHECK-NEXT: je .LBB10_5
677 ; CHECK-NEXT: # %bb.4: # %cont4
678 ; CHECK-NEXT: popq %rax
679 ; CHECK-NEXT: .cfi_def_cfa_offset 8
681 ; CHECK-NEXT: .LBB10_5: # %abort1
682 ; CHECK-NEXT: .cfi_def_cfa_offset 16
683 ; CHECK-NEXT: callq abort
686 br i1 %c1, label %cont1, label %abort1
692 br i1 %c2, label %cont2, label %abort2
698 br i1 %c3, label %cont3, label %abort3
704 br i1 %c4, label %cont4, label %abort4
712 ; Use alternating abort functions so that the blocks we wish to merge are not
713 ; layout successors during branch folding.
715 declare void @alt_abort()
717 define void @merge_alternating_aborts() {
718 ; CHECK-LABEL: merge_alternating_aborts:
719 ; CHECK: # %bb.0: # %entry
720 ; CHECK-NEXT: pushq %rax
721 ; CHECK-NEXT: .cfi_def_cfa_offset 16
722 ; CHECK-NEXT: callq qux
723 ; CHECK-NEXT: testb $1, %al
724 ; CHECK-NEXT: je .LBB11_5
725 ; CHECK-NEXT: # %bb.1: # %cont1
726 ; CHECK-NEXT: callq qux
727 ; CHECK-NEXT: testb $1, %al
728 ; CHECK-NEXT: je .LBB11_6
729 ; CHECK-NEXT: # %bb.2: # %cont2
730 ; CHECK-NEXT: callq qux
731 ; CHECK-NEXT: testb $1, %al
732 ; CHECK-NEXT: je .LBB11_5
733 ; CHECK-NEXT: # %bb.3: # %cont3
734 ; CHECK-NEXT: callq qux
735 ; CHECK-NEXT: testb $1, %al
736 ; CHECK-NEXT: je .LBB11_6
737 ; CHECK-NEXT: # %bb.4: # %cont4
738 ; CHECK-NEXT: popq %rax
739 ; CHECK-NEXT: .cfi_def_cfa_offset 8
741 ; CHECK-NEXT: .LBB11_5: # %abort1
742 ; CHECK-NEXT: .cfi_def_cfa_offset 16
743 ; CHECK-NEXT: callq abort
744 ; CHECK-NEXT: .LBB11_6: # %abort2
745 ; CHECK-NEXT: callq alt_abort
748 br i1 %c1, label %cont1, label %abort1
754 br i1 %c2, label %cont2, label %abort2
756 call void @alt_abort()
760 br i1 %c3, label %cont3, label %abort3
766 br i1 %c4, label %cont4, label %abort4
768 call void @alt_abort()