1 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=SI-NOHSA,GCN-NOHSA,FUNC %s
2 ; RUN: llc -global-isel -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=SI-NOHSA,GCN-NOHSA,FUNC %s
4 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=VI-NOHSA,GCN-NOHSA,FUNC %s
5 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=VI-NOHSA,GCN-NOHSA,FUNC %s
7 ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck --check-prefixes=EG,FUNC %s
9 ; Legacy intrinsics that just read implicit parameters
11 ; FUNC-LABEL: {{^}}ngroups_x:
12 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x0
13 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x0
14 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
15 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
17 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
18 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
19 define amdgpu_kernel void @ngroups_x (ptr addrspace(1) %out) {
21 %0 = call i32 @llvm.r600.read.ngroups.x() #0
22 store i32 %0, ptr addrspace(1) %out
26 ; FUNC-LABEL: {{^}}ngroups_y:
27 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x1
28 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x4
29 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
30 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
32 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
33 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
34 define amdgpu_kernel void @ngroups_y (ptr addrspace(1) %out) {
36 %0 = call i32 @llvm.r600.read.ngroups.y() #0
37 store i32 %0, ptr addrspace(1) %out
41 ; FUNC-LABEL: {{^}}ngroups_z:
42 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x2
43 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x8
44 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
45 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
47 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
48 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
49 define amdgpu_kernel void @ngroups_z (ptr addrspace(1) %out) {
51 %0 = call i32 @llvm.r600.read.ngroups.z() #0
52 store i32 %0, ptr addrspace(1) %out
56 ; FUNC-LABEL: {{^}}global_size_x:
57 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x3
58 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0xc
59 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
60 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
62 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
63 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
64 define amdgpu_kernel void @global_size_x (ptr addrspace(1) %out) {
66 %0 = call i32 @llvm.r600.read.global.size.x() #0
67 store i32 %0, ptr addrspace(1) %out
71 ; FUNC-LABEL: {{^}}global_size_y:
72 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x4
73 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x10
74 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
75 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
77 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
78 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
79 define amdgpu_kernel void @global_size_y (ptr addrspace(1) %out) {
81 %0 = call i32 @llvm.r600.read.global.size.y() #0
82 store i32 %0, ptr addrspace(1) %out
86 ; FUNC-LABEL: {{^}}global_size_z:
87 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x5
88 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x14
89 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
90 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
92 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
93 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
94 define amdgpu_kernel void @global_size_z (ptr addrspace(1) %out) {
96 %0 = call i32 @llvm.r600.read.global.size.z() #0
97 store i32 %0, ptr addrspace(1) %out
101 ; FUNC-LABEL: {{^}}local_size_x:
102 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x6
103 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x18
104 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
105 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
107 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
108 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].Z
109 define amdgpu_kernel void @local_size_x (ptr addrspace(1) %out) {
111 %0 = call i32 @llvm.r600.read.local.size.x() #0
112 store i32 %0, ptr addrspace(1) %out
116 ; FUNC-LABEL: {{^}}local_size_y:
117 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x7
118 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x1c
119 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
120 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
122 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
123 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].W
124 define amdgpu_kernel void @local_size_y (ptr addrspace(1) %out) {
126 %0 = call i32 @llvm.r600.read.local.size.y() #0
127 store i32 %0, ptr addrspace(1) %out
131 ; FUNC-LABEL: {{^}}local_size_z:
132 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x8
133 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x20
134 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
135 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
137 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
138 ; EG: MOV {{\*? *}}[[VAL]], KC0[2].X
139 define amdgpu_kernel void @local_size_z (ptr addrspace(1) %out) {
141 %0 = call i32 @llvm.r600.read.local.size.z() #0
142 store i32 %0, ptr addrspace(1) %out
146 declare i32 @llvm.r600.read.ngroups.x() #0
147 declare i32 @llvm.r600.read.ngroups.y() #0
148 declare i32 @llvm.r600.read.ngroups.z() #0
150 declare i32 @llvm.r600.read.global.size.x() #0
151 declare i32 @llvm.r600.read.global.size.y() #0
152 declare i32 @llvm.r600.read.global.size.z() #0
154 declare i32 @llvm.r600.read.local.size.x() #0
155 declare i32 @llvm.r600.read.local.size.y() #0
156 declare i32 @llvm.r600.read.local.size.z() #0
158 attributes #0 = { readnone }