1 ; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
2 ; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
3 ; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
4 ; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
5 ; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
6 ; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
7 ; RUN: llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
8 ; RUN: llc -global-isel -march=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
10 declare i32 @llvm.amdgcn.workitem.id.x() #0
11 declare i32 @llvm.amdgcn.workitem.id.y() #0
12 declare i32 @llvm.amdgcn.workitem.id.z() #0
14 ; MESA: .section .AMDGPU.config
16 ; MESA-NEXT: .long 132{{$}}
18 ; ALL-LABEL: {{^}}test_workitem_id_x:
19 ; MESA3D: enable_vgpr_workitem_id = 0
22 ; ALL: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}v0
24 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 0
25 define amdgpu_kernel void @test_workitem_id_x(ptr addrspace(1) %out) #1 {
26 %id = call i32 @llvm.amdgcn.workitem.id.x()
27 store i32 %id, ptr addrspace(1) %out
31 ; MESA: .section .AMDGPU.config
33 ; MESA-NEXT: .long 2180{{$}}
35 ; ALL-LABEL: {{^}}test_workitem_id_y:
36 ; MESA3D: enable_vgpr_workitem_id = 1
38 ; MESA3D: {{buffer|flat}}_store_dword {{.*}}v1
40 ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10
41 ; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
42 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 1
43 define amdgpu_kernel void @test_workitem_id_y(ptr addrspace(1) %out) #1 {
44 %id = call i32 @llvm.amdgcn.workitem.id.y()
45 store i32 %id, ptr addrspace(1) %out
49 ; MESA: .section .AMDGPU.config
51 ; MESA-NEXT: .long 4228{{$}}
53 ; ALL-LABEL: {{^}}test_workitem_id_z:
54 ; MESA3D: enable_vgpr_workitem_id = 2
56 ; MESA3D: {{buffer|flat}}_store_dword {{.*}}v2
58 ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10
59 ; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
60 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 2
61 define amdgpu_kernel void @test_workitem_id_z(ptr addrspace(1) %out) #1 {
62 %id = call i32 @llvm.amdgcn.workitem.id.z()
63 store i32 %id, ptr addrspace(1) %out
67 ; ALL-LABEL: {{^}}test_workitem_id_x_usex2:
69 ; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0
71 ; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0
72 define amdgpu_kernel void @test_workitem_id_x_usex2(ptr addrspace(1) %out) #1 {
73 %id0 = call i32 @llvm.amdgcn.workitem.id.x()
74 store volatile i32 %id0, ptr addrspace(1) %out
76 %id1 = call i32 @llvm.amdgcn.workitem.id.x()
77 store volatile i32 %id1, ptr addrspace(1) %out
81 ; ALL-LABEL: {{^}}test_workitem_id_x_use_outside_entry:
83 ; ALL: {{flat|global}}_store_{{dword|b32}}
85 ; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0
86 define amdgpu_kernel void @test_workitem_id_x_use_outside_entry(ptr addrspace(1) %out, i32 %arg) #1 {
88 store volatile i32 0, ptr addrspace(1) %out
89 %cond = icmp eq i32 %arg, 0
90 br i1 %cond, label %bb1, label %bb2
93 %id = call i32 @llvm.amdgcn.workitem.id.x()
94 store volatile i32 %id, ptr addrspace(1) %out
101 ; ALL-LABEL: {{^}}test_workitem_id_x_func:
103 ; HSA-NEXT: v_and_b32_e32 v2, 0x3ff, v31
104 ; MESA-NEXT: v_and_b32_e32 v2, 0x3ff, v31
105 define void @test_workitem_id_x_func(ptr addrspace(1) %out) #1 {
106 %id = call i32 @llvm.amdgcn.workitem.id.x()
107 store i32 %id, ptr addrspace(1) %out
111 ; ALL-LABEL: {{^}}test_workitem_id_y_func:
112 ; HSA: v_bfe_u32 v2, v31, 10, 10
113 ; MESA: v_bfe_u32 v2, v31, 10, 10
114 define void @test_workitem_id_y_func(ptr addrspace(1) %out) #1 {
115 %id = call i32 @llvm.amdgcn.workitem.id.y()
116 store i32 %id, ptr addrspace(1) %out
120 ; ALL-LABEL: {{^}}test_workitem_id_z_func:
121 ; HSA: v_bfe_u32 v2, v31, 20, 10
122 ; MESA: v_bfe_u32 v2, v31, 20, 10
123 define void @test_workitem_id_z_func(ptr addrspace(1) %out) #1 {
124 %id = call i32 @llvm.amdgcn.workitem.id.z()
125 store i32 %id, ptr addrspace(1) %out
129 ; FIXME: Should be able to avoid enabling in kernel inputs
130 ; FIXME: Packed tid should avoid the and
131 ; ALL-LABEL: {{^}}test_reqd_workgroup_size_x_only:
132 ; MESA3D: enable_vgpr_workitem_id = 0
134 ; ALL-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
135 ; UNPACKED-DAG: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
137 ; PACKED: v_and_b32_e32 [[MASKED:v[0-9]+]], 0x3ff, v0
138 ; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
140 ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
141 ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
142 define amdgpu_kernel void @test_reqd_workgroup_size_x_only(ptr %out) !reqd_work_group_size !0 {
143 %id.x = call i32 @llvm.amdgcn.workitem.id.x()
144 %id.y = call i32 @llvm.amdgcn.workitem.id.y()
145 %id.z = call i32 @llvm.amdgcn.workitem.id.z()
146 store volatile i32 %id.x, ptr %out
147 store volatile i32 %id.y, ptr %out
148 store volatile i32 %id.z, ptr %out
152 ; ALL-LABEL: {{^}}test_reqd_workgroup_size_y_only:
153 ; MESA3D: enable_vgpr_workitem_id = 1
155 ; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
156 ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
158 ; UNPACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
160 ; PACKED: v_bfe_u32 [[MASKED:v[0-9]+]], v0, 10, 10
161 ; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
163 ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
164 define amdgpu_kernel void @test_reqd_workgroup_size_y_only(ptr %out) !reqd_work_group_size !1 {
165 %id.x = call i32 @llvm.amdgcn.workitem.id.x()
166 %id.y = call i32 @llvm.amdgcn.workitem.id.y()
167 %id.z = call i32 @llvm.amdgcn.workitem.id.z()
168 store volatile i32 %id.x, ptr %out
169 store volatile i32 %id.y, ptr %out
170 store volatile i32 %id.z, ptr %out
174 ; ALL-LABEL: {{^}}test_reqd_workgroup_size_z_only:
175 ; MESA3D: enable_vgpr_workitem_id = 2
177 ; ALL: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
178 ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
179 ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
181 ; UNPACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v2
183 ; PACKED: v_bfe_u32 [[MASKED:v[0-9]+]], v0, 10, 20
184 ; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
185 define amdgpu_kernel void @test_reqd_workgroup_size_z_only(ptr %out) !reqd_work_group_size !2 {
186 %id.x = call i32 @llvm.amdgcn.workitem.id.x()
187 %id.y = call i32 @llvm.amdgcn.workitem.id.y()
188 %id.z = call i32 @llvm.amdgcn.workitem.id.z()
189 store volatile i32 %id.x, ptr %out
190 store volatile i32 %id.y, ptr %out
191 store volatile i32 %id.z, ptr %out
195 attributes #0 = { nounwind readnone }
196 attributes #1 = { nounwind }
198 !0 = !{i32 64, i32 1, i32 1}
199 !1 = !{i32 1, i32 64, i32 1}
200 !2 = !{i32 1, i32 1, i32 64}
202 !llvm.module.flags = !{!99}
203 !99 = !{i32 1, !"amdgpu_code_object_version", i32 400}