1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -pass-remarks-output=%t -pass-remarks-analysis=kernel-resource-usage -filetype=null %s 2>&1 | FileCheck -check-prefix=STDERR %s
2 ; RUN: FileCheck -check-prefix=REMARK %s < %t
4 ; STDERR: remark: foo.cl:27:0: Function Name: test_kernel
5 ; STDERR-NEXT: remark: foo.cl:27:0: TotalSGPRs: 28
6 ; STDERR-NEXT: remark: foo.cl:27:0: VGPRs: 9
7 ; STDERR-NEXT: remark: foo.cl:27:0: AGPRs: 43
8 ; STDERR-NEXT: remark: foo.cl:27:0: ScratchSize [bytes/lane]: 0
9 ; STDERR-NEXT: remark: foo.cl:27:0: Dynamic Stack: False
10 ; STDERR-NEXT: remark: foo.cl:27:0: Occupancy [waves/SIMD]: 5
11 ; STDERR-NEXT: remark: foo.cl:27:0: SGPRs Spill: 0
12 ; STDERR-NEXT: remark: foo.cl:27:0: VGPRs Spill: 0
13 ; STDERR-NEXT: remark: foo.cl:27:0: LDS Size [bytes/block]: 512
15 ; REMARK-LABEL: --- !Analysis
16 ; REMARK: Pass: kernel-resource-usage
17 ; REMARK-NEXT: Name: FunctionName
18 ; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
19 ; REMARK-NEXT: Function: test_kernel
21 ; REMARK-NEXT: - String: 'Function Name: '
22 ; REMARK-NEXT: - FunctionName: test_kernel
24 ; REMARK-NEXT: --- !Analysis
25 ; REMARK-NEXT: Pass: kernel-resource-usage
26 ; REMARK-NEXT: Name: NumSGPR
27 ; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
28 ; REMARK-NEXT: Function: test_kernel
30 ; REMARK-NEXT: - String: ' TotalSGPRs: '
31 ; REMARK-NEXT: - NumSGPR: '28'
33 ; REMARK-NEXT: --- !Analysis
34 ; REMARK-NEXT: Pass: kernel-resource-usage
35 ; REMARK-NEXT: Name: NumVGPR
36 ; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
37 ; REMARK-NEXT: Function: test_kernel
39 ; REMARK-NEXT: - String: ' VGPRs: '
40 ; REMARK-NEXT: - NumVGPR: '9'
42 ; REMARK-NEXT: --- !Analysis
43 ; REMARK-NEXT: Pass: kernel-resource-usage
44 ; REMARK-NEXT: Name: NumAGPR
45 ; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
46 ; REMARK-NEXT: Function: test_kernel
48 ; REMARK-NEXT: - String: ' AGPRs: '
49 ; REMARK-NEXT: - NumAGPR: '43'
51 ; REMARK-NEXT: --- !Analysis
52 ; REMARK-NEXT: Pass: kernel-resource-usage
53 ; REMARK-NEXT: Name: ScratchSize
54 ; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
55 ; REMARK-NEXT: Function: test_kernel
57 ; REMARK-NEXT: - String: ' ScratchSize [bytes/lane]: '
58 ; REMARK-NEXT: - ScratchSize: '0'
60 ; REMARK-NEXT: --- !Analysis
61 ; REMARK-NEXT: Pass: kernel-resource-usage
62 ; REMARK-NEXT: Name: DynamicStack
63 ; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
64 ; REMARK-NEXT: Function: test_kernel
66 ; REMARK-NEXT: - String: ' Dynamic Stack:
67 ; REMARK-NEXT: - DynamicStack: 'False'
69 ; REMARK-NEXT: --- !Analysis
70 ; REMARK-NEXT: Pass: kernel-resource-usage
71 ; REMARK-NEXT: Name: Occupancy
72 ; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
73 ; REMARK-NEXT: Function: test_kernel
75 ; REMARK-NEXT: - String: ' Occupancy [waves/SIMD]: '
76 ; REMARK-NEXT: - Occupancy: '5'
78 ; REMARK-NEXT: --- !Analysis
79 ; REMARK-NEXT: Pass: kernel-resource-usage
80 ; REMARK-NEXT: Name: SGPRSpill
81 ; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
82 ; REMARK-NEXT: Function: test_kernel
84 ; REMARK-NEXT: - String: ' SGPRs Spill: '
85 ; REMARK-NEXT: - SGPRSpill: '0'
87 ; REMARK-NEXT: --- !Analysis
88 ; REMARK-NEXT: Pass: kernel-resource-usage
89 ; REMARK-NEXT: Name: VGPRSpill
90 ; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
91 ; REMARK-NEXT: Function: test_kernel
93 ; REMARK-NEXT: - String: ' VGPRs Spill: '
94 ; REMARK-NEXT: - VGPRSpill: '0'
96 ; REMARK-NEXT: --- !Analysis
97 ; REMARK-NEXT: Pass: kernel-resource-usage
98 ; REMARK-NEXT: Name: BytesLDS
99 ; REMARK-NEXT: DebugLoc: { File: foo.cl, Line: 27, Column: 0 }
100 ; REMARK-NEXT: Function: test_kernel
102 ; REMARK-NEXT: - String: ' LDS Size [bytes/block]: '
103 ; REMARK-NEXT: - BytesLDS: '512'
106 @lds = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
108 define amdgpu_kernel void @test_kernel() !dbg !3 {
109 call void asm sideeffect "; clobber v8", "~{v8}"()
110 call void asm sideeffect "; clobber s23", "~{s23}"()
111 call void asm sideeffect "; clobber a42", "~{a42}"()
112 call void asm sideeffect "; use $0", "v"(ptr addrspace(3) @lds)
116 ; STDERR-NOT: test_func
117 define void @test_func() !dbg !6 {
118 call void asm sideeffect "; clobber v17", "~{v17}"()
119 call void asm sideeffect "; clobber s11", "~{s11}"()
120 call void asm sideeffect "; clobber a9", "~{a9}"()
124 ; STDERR: remark: foo.cl:8:0: Function Name: empty_kernel
125 ; STDERR-NEXT: remark: foo.cl:8:0: TotalSGPRs: 4
126 ; STDERR-NEXT: remark: foo.cl:8:0: VGPRs: 0
127 ; STDERR-NEXT: remark: foo.cl:8:0: AGPRs: 0
128 ; STDERR-NEXT: remark: foo.cl:8:0: ScratchSize [bytes/lane]: 0
129 ; STDERR-NEXT: remark: foo.cl:8:0: Dynamic Stack: False
130 ; STDERR-NEXT: remark: foo.cl:8:0: Occupancy [waves/SIMD]: 10
131 ; STDERR-NEXT: remark: foo.cl:8:0: SGPRs Spill: 0
132 ; STDERR-NEXT: remark: foo.cl:8:0: VGPRs Spill: 0
133 ; STDERR-NEXT: remark: foo.cl:8:0: LDS Size [bytes/block]: 0
134 define amdgpu_kernel void @empty_kernel() !dbg !7 {
138 ; STDERR-NOT: empty_func
139 define void @empty_func() !dbg !8 {
143 ; STDERR: remark: foo.cl:64:0: Function Name: test_indirect_call
144 ; STDERR-NEXT: remark: foo.cl:64:0: TotalSGPRs: test_indirect_call.numbered_sgpr+6
145 ; STDERR-NEXT: remark: foo.cl:64:0: VGPRs: test_indirect_call.num_vgpr
146 ; STDERR-NEXT: remark: foo.cl:64:0: AGPRs: test_indirect_call.num_agpr
147 ; STDERR-NEXT: remark: foo.cl:64:0: ScratchSize [bytes/lane]: 0
148 ; STDERR-NEXT: remark: foo.cl:64:0: Dynamic Stack: True
149 ; STDERR-NEXT: remark: foo.cl:64:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 10, max(test_indirect_call.numbered_sgpr+(extrasgprs(test_indirect_call.uses_vcc, test_indirect_call.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(test_indirect_call.num_agpr, test_indirect_call.num_vgpr), 1, 0))
150 ; STDERR-NEXT: remark: foo.cl:64:0: SGPRs Spill: 0
151 ; STDERR-NEXT: remark: foo.cl:64:0: VGPRs Spill: 0
152 ; STDERR-NEXT: remark: foo.cl:64:0: LDS Size [bytes/block]: 0
153 @gv.fptr0 = external hidden unnamed_addr addrspace(4) constant ptr, align 4
155 define amdgpu_kernel void @test_indirect_call() !dbg !9 {
156 %fptr = load ptr, ptr addrspace(4) @gv.fptr0
161 ; STDERR: remark: foo.cl:74:0: Function Name: test_indirect_w_static_stack
162 ; STDERR-NEXT: remark: foo.cl:74:0: TotalSGPRs: test_indirect_w_static_stack.numbered_sgpr+6
163 ; STDERR-NEXT: remark: foo.cl:74:0: VGPRs: test_indirect_w_static_stack.num_vgpr
164 ; STDERR-NEXT: remark: foo.cl:74:0: AGPRs: test_indirect_w_static_stack.num_agpr
165 ; STDERR-NEXT: remark: foo.cl:74:0: ScratchSize [bytes/lane]: 144
166 ; STDERR-NEXT: remark: foo.cl:74:0: Dynamic Stack: True
167 ; STDERR-NEXT: remark: foo.cl:74:0: Occupancy [waves/SIMD]: occupancy(10, 4, 256, 8, 10, max(test_indirect_w_static_stack.numbered_sgpr+(extrasgprs(test_indirect_w_static_stack.uses_vcc, test_indirect_w_static_stack.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(test_indirect_w_static_stack.num_agpr, test_indirect_w_static_stack.num_vgpr), 1, 0))
168 ; STDERR-NEXT: remark: foo.cl:74:0: SGPRs Spill: 0
169 ; STDERR-NEXT: remark: foo.cl:74:0: VGPRs Spill: 0
170 ; STDERR-NEXT: remark: foo.cl:74:0: LDS Size [bytes/block]: 0
172 declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture readonly, i8, i64, i1 immarg)
174 define amdgpu_kernel void @test_indirect_w_static_stack() !dbg !10 {
175 %alloca = alloca <10 x i64>, align 16, addrspace(5)
176 call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 40, i1 false)
177 %fptr = load ptr, ptr addrspace(4) @gv.fptr0
183 !llvm.module.flags = !{!2}
184 !llvm.module.flags = !{!11}
186 !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
187 !1 = !DIFile(filename: "foo.cl", directory: "/tmp")
188 !2 = !{i32 2, !"Debug Info Version", i32 3}
189 !3 = distinct !DISubprogram(name: "test_kernel", scope: !1, file: !1, type: !4, scopeLine: 27, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
190 !4 = !DISubroutineType(types: !5)
192 !6 = distinct !DISubprogram(name: "test_func", scope: !1, file: !1, type: !4, scopeLine: 42, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
193 !7 = distinct !DISubprogram(name: "empty_kernel", scope: !1, file: !1, type: !4, scopeLine: 8, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
194 !8 = distinct !DISubprogram(name: "empty_func", scope: !1, file: !1, type: !4, scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
195 !9 = distinct !DISubprogram(name: "test_indirect_call", scope: !1, file: !1, type: !4, scopeLine: 64, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
196 !10 = distinct !DISubprogram(name: "test_indirect_w_static_stack", scope: !1, file: !1, type: !4, scopeLine: 74, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0)
197 !11 = !{i32 1, !"amdhsa_code_object_version", i32 500}