1 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -stop-after=si-pre-emit-peephole -o - %s | FileCheck -check-prefix=GCN %s
3 ; If the block containing the SI_RETURN_TO_EPILOG is not the last block, insert an empty block at the end and
4 ; insert an unconditional jump there.
5 define amdgpu_ps float @simple_test_return_to_epilog(float %a) #0 {
6 ; GCN-LABEL: name: simple_test_return_to_epilog
8 ; GCN-NEXT: liveins: $vgpr0
10 ; GCN-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0
15 define amdgpu_ps float @test_return_to_epilog_into_end_block(i32 inreg %a, float %b) #0 {
16 ; GCN-LABEL: name: test_return_to_epilog_into_end_block
18 ; GCN-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000)
19 ; GCN-NEXT: liveins: $sgpr2, $vgpr0
21 ; GCN-NEXT: S_CMP_LT_I32 killed renamable $sgpr2, 1, implicit-def $scc
22 ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
25 ; GCN-NEXT: successors: %bb.3(0x80000000)
26 ; GCN-NEXT: liveins: $vgpr0
28 ; GCN-NEXT: S_BRANCH %bb.3
30 ; GCN-NEXT: bb.2.else:
31 ; GCN-NEXT: successors:
33 ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
34 ; GCN-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
35 ; GCN-NEXT: S_WAITCNT 3952
39 %cc = icmp sgt i32 %a, 0
40 br i1 %cc, label %if, label %else
43 else: ; preds = %entry
44 store volatile i32 0, ptr addrspace(1) undef
48 define amdgpu_ps float @test_unify_return_to_epilog_into_end_block(i32 inreg %a, i32 inreg %b, float %c, float %d) #0 {
49 ; GCN-LABEL: name: test_unify_return_to_epilog_into_end_block
51 ; GCN-NEXT: successors: %bb.1(0x50000000), %bb.2(0x30000000)
52 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
54 ; GCN-NEXT: S_CMP_LT_I32 killed renamable $sgpr2, 1, implicit-def $scc
55 ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
58 ; GCN-NEXT: successors: %bb.5(0x80000000)
59 ; GCN-NEXT: liveins: $vgpr0
61 ; GCN-NEXT: S_BRANCH %bb.5
63 ; GCN-NEXT: bb.2.else.if.cond:
64 ; GCN-NEXT: successors: %bb.3(0x80000000), %bb.4(0x00000000)
65 ; GCN-NEXT: liveins: $sgpr3, $vgpr1
67 ; GCN-NEXT: S_CMP_LT_I32 killed renamable $sgpr3, 1, implicit-def $scc
68 ; GCN-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
70 ; GCN-NEXT: bb.3.else.if:
71 ; GCN-NEXT: successors: %bb.5(0x80000000)
72 ; GCN-NEXT: liveins: $vgpr1
74 ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $exec
75 ; GCN-NEXT: S_BRANCH %bb.5
77 ; GCN-NEXT: bb.4.else:
78 ; GCN-NEXT: successors:
80 ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
81 ; GCN-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
82 ; GCN-NEXT: S_WAITCNT 3952
86 %cc = icmp sgt i32 %a, 0
87 br i1 %cc, label %if, label %else.if.cond
90 else.if.cond: ; preds = %entry
91 %cc1 = icmp sgt i32 %b, 0
92 br i1 %cc1, label %else.if, label %else
93 else.if: ; preds = %else.if.cond
95 else: ; preds = %else.if.cond
96 store volatile i32 0, ptr addrspace(1) undef
100 define amdgpu_ps { <4 x float> } @test_return_to_epilog_with_optimized_kill(float %val) #0 {
101 ; GCN-LABEL: name: test_return_to_epilog_with_optimized_kill
102 ; GCN: bb.0 (%ir-block.0):
103 ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000)
104 ; GCN-NEXT: liveins: $vgpr0
106 ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RCP_F32_e32 $vgpr0, implicit $mode, implicit $exec
107 ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
108 ; GCN-NEXT: nofpexcept V_CMP_NGT_F32_e32 0, killed $vgpr1, implicit-def $vcc, implicit $mode, implicit $exec
109 ; GCN-NEXT: $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
110 ; GCN-NEXT: renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc
111 ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.3, implicit $exec
113 ; GCN-NEXT: bb.1.Flow1:
114 ; GCN-NEXT: successors: %bb.6(0x40000000), %bb.2(0x40000000)
115 ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
117 ; GCN-NEXT: $sgpr2_sgpr3 = S_ANDN2_SAVEEXEC_B64 killed $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
118 ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.6, implicit $exec
120 ; GCN-NEXT: bb.2.end:
121 ; GCN-NEXT: successors: %bb.9(0x80000000)
122 ; GCN-NEXT: liveins: $sgpr2_sgpr3
124 ; GCN-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
125 ; GCN-NEXT: S_BRANCH %bb.9
127 ; GCN-NEXT: bb.3.flow.preheader:
128 ; GCN-NEXT: successors: %bb.4(0x80000000)
129 ; GCN-NEXT: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3
131 ; GCN-NEXT: nofpexcept V_CMP_NGT_F32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $mode, implicit $exec
132 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 0
134 ; GCN-NEXT: bb.4.flow:
135 ; GCN-NEXT: successors: %bb.5(0x04000000), %bb.4(0x7c000000)
136 ; GCN-NEXT: liveins: $vcc, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
138 ; GCN-NEXT: renamable $sgpr6_sgpr7 = S_AND_B64 $exec, renamable $vcc, implicit-def $scc
139 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_OR_B64 killed renamable $sgpr6_sgpr7, killed renamable $sgpr4_sgpr5, implicit-def $scc
140 ; GCN-NEXT: $exec = S_ANDN2_B64 $exec, renamable $sgpr4_sgpr5, implicit-def $scc
141 ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.4, implicit $exec
143 ; GCN-NEXT: bb.5.Flow:
144 ; GCN-NEXT: successors: %bb.6(0x40000000), %bb.2(0x40000000)
145 ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5
147 ; GCN-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
148 ; GCN-NEXT: $sgpr2_sgpr3 = S_ANDN2_SAVEEXEC_B64 killed $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec
149 ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
151 ; GCN-NEXT: bb.6.kill0:
152 ; GCN-NEXT: successors: %bb.7(0x40000000), %bb.8(0x40000000)
153 ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
155 ; GCN-NEXT: dead renamable $sgpr0_sgpr1 = S_ANDN2_B64 killed renamable $sgpr0_sgpr1, $exec, implicit-def $scc
156 ; GCN-NEXT: S_CBRANCH_SCC0 %bb.8, implicit $scc
158 ; GCN-NEXT: bb.7.kill0:
159 ; GCN-NEXT: successors: %bb.9(0x80000000)
160 ; GCN-NEXT: liveins: $sgpr2_sgpr3, $scc
162 ; GCN-NEXT: $exec = S_MOV_B64 0
163 ; GCN-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc
164 ; GCN-NEXT: S_BRANCH %bb.9
167 ; GCN-NEXT: $exec = S_MOV_B64 0
168 ; GCN-NEXT: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
169 ; GCN-NEXT: S_ENDPGM 0
172 %.i0 = fdiv reassoc nnan nsz arcp contract afn float 1.000000e+00, %val
173 %cmp0 = fcmp olt float %.i0, 0.000000e+00
174 br i1 %cmp0, label %kill0, label %flow
176 kill0: ; preds = %entry
177 call void @llvm.amdgcn.kill(i1 false)
180 flow: ; preds = %entry
181 %cmp1 = fcmp olt float %val, 0.000000e+00
182 br i1 %cmp1, label %flow, label %end
184 kill1: ; preds = %flow
185 call void @llvm.amdgcn.kill(i1 false)
188 end: ; preds = %kill0, %kill1, %flow
189 ret { <4 x float> } undef
192 declare void @llvm.amdgcn.kill(i1) #0
194 attributes #0 = { nounwind }