1 ; Note: uses a randomly selected assumed external call stack size so that the
2 ; test assertions are unlikely to succeed by accident.
4 ; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX7 %s
5 ; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX8 %s
6 ; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX9 %s
7 ; RUN: llc -amdgpu-assume-external-call-stack-size=5310 -mattr=-xnack -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -enable-misched=0 -filetype=asm -o - < %s | FileCheck --check-prefixes CHECK,GFX10 %s
9 ; CHECK-LABEL: amdhsa.kernels
11 ; test a kernel without an external call that occurs before its callee in the module
13 ; CHECK: .private_segment_fixed_size: 20
15 ; GFX7: .sgpr_count: 37
16 ; GFX7: .sgpr_spill_count: 0
17 ; GFX7: .vgpr_count: 4
18 ; GFX7: .vgpr_spill_count: 0
20 ; GFX8: .sgpr_count: 39
21 ; GFX8: .sgpr_spill_count: 0
22 ; GFX8: .vgpr_count: 4
23 ; GFX8: .vgpr_spill_count: 0
25 ; GFX9: .sgpr_count: 39
26 ; GFX9: .sgpr_spill_count: 0
27 ; GFX9: .vgpr_count: 4
28 ; GFX9: .vgpr_spill_count: 0
30 ; GFX10: .sgpr_count: 33
31 ; GFX10: .sgpr_spill_count: 0
32 ; GFX10: .vgpr_count: 4
33 ; GFX10: .vgpr_spill_count: 0
34 define amdgpu_kernel void @test1(ptr %x) {
35 %1 = load volatile float, ptr %x
36 %2 = call float @f(float %1)
37 store volatile float %2, ptr %x
41 define internal float @f(float %arg0) #0 {
42 %stack = alloca float, i32 4, align 4, addrspace(5)
43 store volatile float 3.0, ptr addrspace(5) %stack
44 %val = load volatile float, ptr addrspace(5) %stack
45 %add = fadd float %arg0, %val
49 ; test a kernel without an external call that occurs after its callee in the module
51 ; CHECK: .private_segment_fixed_size: 20
53 ; GFX7: .sgpr_count: 37
54 ; GFX7: .sgpr_spill_count: 0
55 ; GFX7: .vgpr_count: 4
56 ; GFX7: .vgpr_spill_count: 0
58 ; GFX8: .sgpr_count: 39
59 ; GFX8: .sgpr_spill_count: 0
60 ; GFX8: .vgpr_count: 4
61 ; GFX8: .vgpr_spill_count: 0
63 ; GFX9: .sgpr_count: 39
64 ; GFX9: .sgpr_spill_count: 0
65 ; GFX9: .vgpr_count: 4
66 ; GFX9: .vgpr_spill_count: 0
68 ; GFX10: .sgpr_count: 33
69 ; GFX10: .sgpr_spill_count: 0
70 ; GFX10: .vgpr_count: 4
71 ; GFX10: .vgpr_spill_count: 0
72 define amdgpu_kernel void @test2(ptr %x) {
73 %1 = load volatile float, ptr %x
74 %2 = call float @f(float %1)
75 store volatile float %2, ptr %x
79 ; test a kernel with an external call that occurs before its callee in the module
81 ; CHECK: .private_segment_fixed_size: 5310
83 ; GFX7: .sgpr_count: 37
84 ; GFX7: .sgpr_spill_count: 0
85 ; GFX7: .vgpr_count: 32
86 ; GFX7: .vgpr_spill_count: 0
88 ; GFX8: .sgpr_count: 39
89 ; GFX8: .sgpr_spill_count: 0
90 ; GFX8: .vgpr_count: 32
91 ; GFX8: .vgpr_spill_count: 0
93 ; GFX9: .sgpr_count: 39
94 ; GFX9: .sgpr_spill_count: 0
95 ; GFX9: .vgpr_count: 32
96 ; GFX9: .vgpr_spill_count: 0
98 ; GFX10: .sgpr_count: 35
99 ; GFX10: .sgpr_spill_count: 0
100 ; GFX10: .vgpr_count: 32
101 ; GFX10: .vgpr_spill_count: 0
102 define amdgpu_kernel void @test3() {
109 ; test a kernel without an external call that occurs after its callee in the module
111 ; CHECK: .private_segment_fixed_size: 5310
113 ; GFX7: .sgpr_count: 37
114 ; GFX7: .sgpr_spill_count: 0
115 ; GFX7: .vgpr_count: 32
116 ; GFX7: .vgpr_spill_count: 0
118 ; GFX8: .sgpr_count: 39
119 ; GFX8: .sgpr_spill_count: 0
120 ; GFX8: .vgpr_count: 32
121 ; GFX8: .vgpr_spill_count: 0
123 ; GFX9: .sgpr_count: 39
124 ; GFX9: .sgpr_spill_count: 0
125 ; GFX9: .vgpr_count: 32
126 ; GFX9: .vgpr_spill_count: 0
128 ; GFX10: .sgpr_count: 35
129 ; GFX10: .sgpr_spill_count: 0
130 ; GFX10: .vgpr_count: 32
131 ; GFX10: .vgpr_spill_count: 0
132 define amdgpu_kernel void @test4() {
137 attributes #0 = { norecurse }
139 !llvm.module.flags = !{!0}
140 !0 = !{i32 1, !"amdgpu_code_object_version", i32 400}