1 ; RUN: llc -mtriple=amdgcn--amdpal -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s
2 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -mattr=-xnack -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
3 ; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mattr=-xnack -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s
5 declare amdgpu_gfx float @extern_func(float) #0
6 declare amdgpu_gfx float @extern_func_many_args(<64 x float>) #0
8 @funcptr = external hidden unnamed_addr addrspace(4) constant ptr, align 4
10 define amdgpu_gfx float @no_stack(float %arg0) #0 {
11 %add = fadd float %arg0, 1.0
15 define amdgpu_gfx float @simple_stack(float %arg0) #0 {
16 %stack = alloca float, i32 4, align 4, addrspace(5)
17 store volatile float 2.0, ptr addrspace(5) %stack
18 %val = load volatile float, ptr addrspace(5) %stack
19 %add = fadd float %arg0, %val
23 define amdgpu_gfx float @multiple_stack(float %arg0) #0 {
24 %stack = alloca float, i32 4, align 4, addrspace(5)
25 store volatile float 2.0, ptr addrspace(5) %stack
26 %val = load volatile float, ptr addrspace(5) %stack
27 %add = fadd float %arg0, %val
28 %stack2 = alloca float, i32 4, align 4, addrspace(5)
29 store volatile float 2.0, ptr addrspace(5) %stack2
30 %val2 = load volatile float, ptr addrspace(5) %stack2
31 %add2 = fadd float %add, %val2
35 define amdgpu_gfx float @dynamic_stack(float %arg0) #0 {
37 %cmp = fcmp ogt float %arg0, 0.0
38 br i1 %cmp, label %bb1, label %bb2
41 %stack = alloca float, i32 4, align 4, addrspace(5)
42 store volatile float 2.0, ptr addrspace(5) %stack
43 %val = load volatile float, ptr addrspace(5) %stack
44 %add = fadd float %arg0, %val
48 %res = phi float [ 0.0, %bb0 ], [ %add, %bb1 ]
52 define amdgpu_gfx float @dynamic_stack_loop(float %arg0) #0 {
57 %ctr = phi i32 [ 0, %bb0 ], [ %newctr, %bb1 ]
58 %stack = alloca float, i32 4, align 4, addrspace(5)
59 store volatile float 2.0, ptr addrspace(5) %stack
60 %val = load volatile float, ptr addrspace(5) %stack
61 %add = fadd float %arg0, %val
62 %cmp = icmp sgt i32 %ctr, 0
63 %newctr = sub i32 %ctr, 1
64 br i1 %cmp, label %bb1, label %bb2
70 define amdgpu_gfx float @no_stack_call(float %arg0) #0 {
71 %res = call amdgpu_gfx float @simple_stack(float %arg0)
75 define amdgpu_gfx float @simple_stack_call(float %arg0) #0 {
76 %stack = alloca float, i32 4, align 4, addrspace(5)
77 store volatile float 2.0, ptr addrspace(5) %stack
78 %val = load volatile float, ptr addrspace(5) %stack
79 %res = call amdgpu_gfx float @simple_stack(float %arg0)
80 %add = fadd float %res, %val
84 define amdgpu_gfx float @no_stack_extern_call(float %arg0) #0 {
85 %res = call amdgpu_gfx float @extern_func(float %arg0)
89 define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 {
90 %stack = alloca float, i32 4, align 4, addrspace(5)
91 store volatile float 2.0, ptr addrspace(5) %stack
92 %val = load volatile float, ptr addrspace(5) %stack
93 %res = call amdgpu_gfx float @extern_func(float %arg0)
94 %add = fadd float %res, %val
98 define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) #0 {
99 %res = call amdgpu_gfx float @extern_func_many_args(<64 x float> %arg0)
103 define amdgpu_gfx float @no_stack_indirect_call(float %arg0) #0 {
104 %fptr = load ptr, ptr addrspace(4) @funcptr
105 call amdgpu_gfx void %fptr()
109 define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 {
110 %stack = alloca float, i32 4, align 4, addrspace(5)
111 store volatile float 2.0, ptr addrspace(5) %stack
112 %val = load volatile float, ptr addrspace(5) %stack
113 %fptr = load ptr, ptr addrspace(4) @funcptr
114 call amdgpu_gfx void %fptr()
115 %add = fadd float %arg0, %val
119 define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
120 %stack = alloca float, i32 4, align 4, addrspace(5)
121 store volatile float 2.0, ptr addrspace(5) %stack
122 %val = load volatile float, ptr addrspace(5) %stack
123 %res = call amdgpu_gfx float @simple_stack_recurse(float %arg0)
124 %add = fadd float %res, %val
128 @lds = internal addrspace(3) global [64 x float] undef
130 define amdgpu_gfx float @simple_lds(float %arg0) #0 {
131 %val = load float, ptr addrspace(3) @lds
135 define amdgpu_gfx float @simple_lds_recurse(float %arg0) #0 {
136 %val = load float, ptr addrspace(3) @lds
137 %res = call amdgpu_gfx float @simple_lds_recurse(float %val)
141 attributes #0 = { nounwind }
143 ; GCN: amdpal.pipelines:
144 ; GCN-NEXT: - .registers:
145 ; GCN-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
146 ; GCN-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
147 ; GCN-NEXT: .shader_functions:
148 ; GCN-NEXT: dynamic_stack:
149 ; GCN-NEXT: .lds_size: 0{{$}}
150 ; GCN-NEXT: .sgpr_count: 0x28{{$}}
151 ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
152 ; SDAG-NEXT: .vgpr_count: 0x2{{$}}
153 ; GISEL-NEXT: .vgpr_count: 0x3{{$}}
154 ; GCN-NEXT: dynamic_stack_loop:
155 ; GCN-NEXT: .lds_size: 0{{$}}
156 ; SDAG-NEXT: .sgpr_count: 0x25{{$}}
157 ; GISEL-NEXT: .sgpr_count: 0x26{{$}}
158 ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
159 ; SDAG-NEXT: .vgpr_count: 0x3{{$}}
160 ; GISEL-NEXT: .vgpr_count: 0x4{{$}}
161 ; GCN-NEXT: multiple_stack:
162 ; GCN-NEXT: .lds_size: 0{{$}}
163 ; GCN-NEXT: .sgpr_count: 0x21{{$}}
164 ; GCN-NEXT: .stack_frame_size_in_bytes: 0x24{{$}}
165 ; GCN-NEXT: .vgpr_count: 0x3{{$}}
166 ; GCN-NEXT: no_stack:
167 ; GCN-NEXT: .lds_size: 0{{$}}
168 ; GCN-NEXT: .sgpr_count: 0x20{{$}}
169 ; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
170 ; GCN-NEXT: .vgpr_count: 0x1{{$}}
171 ; GCN-NEXT: no_stack_call:
172 ; GCN-NEXT: .lds_size: 0{{$}}
173 ; GCN-NEXT: .sgpr_count: 0x25{{$}}
174 ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
175 ; GCN-NEXT: .vgpr_count: 0x3{{$}}
176 ; GCN-NEXT: no_stack_extern_call:
177 ; GCN-NEXT: .lds_size: 0{{$}}
178 ; GFX8-NEXT: .sgpr_count: 0x28{{$}}
179 ; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
180 ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
181 ; GCN-NEXT: .vgpr_count: 0x2b{{$}}
182 ; GCN-NEXT: no_stack_extern_call_many_args:
183 ; GCN-NEXT: .lds_size: 0{{$}}
184 ; GFX8-NEXT: .sgpr_count: 0x28{{$}}
185 ; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
186 ; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
187 ; GCN-NEXT: .vgpr_count: 0x2b{{$}}
188 ; GCN-NEXT: no_stack_indirect_call:
189 ; GCN-NEXT: .lds_size: 0{{$}}
190 ; GFX8-NEXT: .sgpr_count: 0x28{{$}}
191 ; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
192 ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
193 ; GCN-NEXT: .vgpr_count: 0x2b{{$}}
194 ; GCN-NEXT: simple_lds:
195 ; GCN-NEXT: .lds_size: 0x100{{$}}
196 ; GCN-NEXT: .sgpr_count: 0x20{{$}}
197 ; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
198 ; GCN-NEXT: .vgpr_count: 0x1{{$}}
199 ; GCN-NEXT: simple_lds_recurse:
200 ; GCN-NEXT: .lds_size: 0x100{{$}}
201 ; GCN-NEXT: .sgpr_count: 0x28{{$}}
202 ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
203 ; GCN-NEXT: .vgpr_count: 0x29{{$}}
204 ; GCN-NEXT: simple_stack:
205 ; GCN-NEXT: .lds_size: 0{{$}}
206 ; GCN-NEXT: .sgpr_count: 0x21{{$}}
207 ; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}}
208 ; GCN-NEXT: .vgpr_count: 0x2{{$}}
209 ; GCN-NEXT: simple_stack_call:
210 ; GCN-NEXT: .lds_size: 0{{$}}
211 ; GCN-NEXT: .sgpr_count: 0x25{{$}}
212 ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
213 ; GCN-NEXT: .vgpr_count: 0x4{{$}}
214 ; GCN-NEXT: simple_stack_extern_call:
215 ; GCN-NEXT: .lds_size: 0{{$}}
216 ; GFX8-NEXT: .sgpr_count: 0x28{{$}}
217 ; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
218 ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
219 ; GCN-NEXT: .vgpr_count: 0x2b{{$}}
220 ; GCN-NEXT: simple_stack_indirect_call:
221 ; GCN-NEXT: .lds_size: 0{{$}}
222 ; GFX8-NEXT: .sgpr_count: 0x28{{$}}
223 ; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
224 ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
225 ; GCN-NEXT: .vgpr_count: 0x2b{{$}}
226 ; GCN-NEXT: simple_stack_recurse:
227 ; GCN-NEXT: .lds_size: 0{{$}}
228 ; GCN-NEXT: .sgpr_count: 0x28{{$}}
229 ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
230 ; GCN-NEXT: .vgpr_count: 0x2a{{$}}