1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
5 ; Legacy intrinsics that just read implicit parameters
7 ; FUNC-LABEL: {{^}}ngroups_x:
8 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
9 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0
10 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
11 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
13 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
14 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
15 define amdgpu_kernel void @ngroups_x (i32 addrspace(1)* %out) {
17 %0 = call i32 @llvm.r600.read.ngroups.x() #0
18 store i32 %0, i32 addrspace(1)* %out
22 ; FUNC-LABEL: {{^}}ngroups_y:
23 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
24 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
25 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
26 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
28 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
29 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
30 define amdgpu_kernel void @ngroups_y (i32 addrspace(1)* %out) {
32 %0 = call i32 @llvm.r600.read.ngroups.y() #0
33 store i32 %0, i32 addrspace(1)* %out
37 ; FUNC-LABEL: {{^}}ngroups_z:
38 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
39 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
40 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
41 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
43 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
44 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
45 define amdgpu_kernel void @ngroups_z (i32 addrspace(1)* %out) {
47 %0 = call i32 @llvm.r600.read.ngroups.z() #0
48 store i32 %0, i32 addrspace(1)* %out
52 ; FUNC-LABEL: {{^}}global_size_x:
53 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
54 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
55 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
56 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
58 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
59 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
60 define amdgpu_kernel void @global_size_x (i32 addrspace(1)* %out) {
62 %0 = call i32 @llvm.r600.read.global.size.x() #0
63 store i32 %0, i32 addrspace(1)* %out
67 ; FUNC-LABEL: {{^}}global_size_y:
68 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
69 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
70 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
71 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
73 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
74 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
75 define amdgpu_kernel void @global_size_y (i32 addrspace(1)* %out) {
77 %0 = call i32 @llvm.r600.read.global.size.y() #0
78 store i32 %0, i32 addrspace(1)* %out
82 ; FUNC-LABEL: {{^}}global_size_z:
83 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
84 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
85 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
86 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
88 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
89 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
90 define amdgpu_kernel void @global_size_z (i32 addrspace(1)* %out) {
92 %0 = call i32 @llvm.r600.read.global.size.z() #0
93 store i32 %0, i32 addrspace(1)* %out
97 ; FUNC-LABEL: {{^}}local_size_x:
98 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
99 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
100 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
101 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
103 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
104 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].Z
105 define amdgpu_kernel void @local_size_x (i32 addrspace(1)* %out) {
107 %0 = call i32 @llvm.r600.read.local.size.x() #0
108 store i32 %0, i32 addrspace(1)* %out
112 ; FUNC-LABEL: {{^}}local_size_y:
113 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
114 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
115 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
116 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
118 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
119 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].W
120 define amdgpu_kernel void @local_size_y (i32 addrspace(1)* %out) {
122 %0 = call i32 @llvm.r600.read.local.size.y() #0
123 store i32 %0, i32 addrspace(1)* %out
127 ; FUNC-LABEL: {{^}}local_size_z:
128 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
129 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
130 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
131 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
133 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
134 ; EG: MOV {{\*? *}}[[VAL]], KC0[2].X
135 define amdgpu_kernel void @local_size_z (i32 addrspace(1)* %out) {
137 %0 = call i32 @llvm.r600.read.local.size.z() #0
138 store i32 %0, i32 addrspace(1)* %out
142 ; Legacy use of r600 intrinsics by GCN
144 ; The tgid values are stored in sgprs offset by the number of user
147 ; FUNC-LABEL: {{^}}tgid_x_legacy:
148 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
149 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
151 ; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
152 ; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
153 ; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
154 ; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
155 ; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
156 define amdgpu_kernel void @tgid_x_legacy(i32 addrspace(1)* %out) {
158 %0 = call i32 @llvm.r600.read.tgid.x() #0
159 store i32 %0, i32 addrspace(1)* %out
163 ; FUNC-LABEL: {{^}}tgid_y_legacy:
164 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
165 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
167 ; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
168 define amdgpu_kernel void @tgid_y_legacy(i32 addrspace(1)* %out) {
170 %0 = call i32 @llvm.r600.read.tgid.y() #0
171 store i32 %0, i32 addrspace(1)* %out
175 ; FUNC-LABEL: {{^}}tgid_z_legacy:
176 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
177 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
179 ; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
180 ; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
181 ; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
182 ; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
183 ; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
184 define amdgpu_kernel void @tgid_z_legacy(i32 addrspace(1)* %out) {
186 %0 = call i32 @llvm.r600.read.tgid.z() #0
187 store i32 %0, i32 addrspace(1)* %out
191 ; GCN-NOHSA: .section .AMDGPU.config
192 ; GCN-NOHSA: .long 47180
193 ; GCN-NOHSA-NEXT: .long 132{{$}}
195 ; FUNC-LABEL: {{^}}tidig_x_legacy:
196 ; GCN-NOHSA: buffer_store_dword v0
197 define amdgpu_kernel void @tidig_x_legacy(i32 addrspace(1)* %out) {
199 %0 = call i32 @llvm.r600.read.tidig.x() #0
200 store i32 %0, i32 addrspace(1)* %out
204 ; GCN-NOHSA: .section .AMDGPU.config
205 ; GCN-NOHSA: .long 47180
206 ; GCN-NOHSA-NEXT: .long 2180{{$}}
208 ; FUNC-LABEL: {{^}}tidig_y_legacy:
210 ; GCN-NOHSA: buffer_store_dword v1
211 define amdgpu_kernel void @tidig_y_legacy(i32 addrspace(1)* %out) {
213 %0 = call i32 @llvm.r600.read.tidig.y() #0
214 store i32 %0, i32 addrspace(1)* %out
218 ; GCN-NOHSA: .section .AMDGPU.config
219 ; GCN-NOHSA: .long 47180
220 ; GCN-NOHSA-NEXT: .long 4228{{$}}
222 ; FUNC-LABEL: {{^}}tidig_z_legacy:
223 ; GCN-NOHSA: buffer_store_dword v2
224 define amdgpu_kernel void @tidig_z_legacy(i32 addrspace(1)* %out) {
226 %0 = call i32 @llvm.r600.read.tidig.z() #0
227 store i32 %0, i32 addrspace(1)* %out
231 declare i32 @llvm.r600.read.ngroups.x() #0
232 declare i32 @llvm.r600.read.ngroups.y() #0
233 declare i32 @llvm.r600.read.ngroups.z() #0
235 declare i32 @llvm.r600.read.global.size.x() #0
236 declare i32 @llvm.r600.read.global.size.y() #0
237 declare i32 @llvm.r600.read.global.size.z() #0
239 declare i32 @llvm.r600.read.local.size.x() #0
240 declare i32 @llvm.r600.read.local.size.y() #0
241 declare i32 @llvm.r600.read.local.size.z() #0
243 declare i32 @llvm.r600.read.tgid.x() #0
244 declare i32 @llvm.r600.read.tgid.y() #0
245 declare i32 @llvm.r600.read.tgid.z() #0
247 declare i32 @llvm.r600.read.tidig.x() #0
248 declare i32 @llvm.r600.read.tidig.y() #0
249 declare i32 @llvm.r600.read.tidig.z() #0
251 attributes #0 = { readnone }