1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=SI,GCN,SI-NOHSA,FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=VI,VI-NOHSA,GCN,FUNC %s
3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefixes=EG,FUNC %s
6 ; FUNC-LABEL: {{^}}local_size_x:
7 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
8 ; EG: MOV * [[VAL]], KC0[1].Z
10 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
11 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
12 ; CI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x1
13 ; VI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x4
15 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
16 ; GCN: buffer_store_dword [[VVAL]]
17 define amdgpu_kernel void @local_size_x(ptr addrspace(1) %out) {
19 %0 = call i32 @llvm.r600.read.local.size.x() #0
20 store i32 %0, ptr addrspace(1) %out
24 ; FUNC-LABEL: {{^}}local_size_y:
25 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
26 ; EG: MOV * [[VAL]], KC0[1].W
28 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
29 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
30 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
31 ; GCN: buffer_store_dword [[VVAL]]
32 define amdgpu_kernel void @local_size_y(ptr addrspace(1) %out) {
34 %0 = call i32 @llvm.r600.read.local.size.y() #0
35 store i32 %0, ptr addrspace(1) %out
39 ; FUNC-LABEL: {{^}}local_size_z:
40 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
41 ; EG: MOV * [[VAL]], KC0[2].X
43 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
44 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
45 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
46 ; GCN: buffer_store_dword [[VVAL]]
47 define amdgpu_kernel void @local_size_z(ptr addrspace(1) %out) {
49 %0 = call i32 @llvm.r600.read.local.size.z() #0
50 store i32 %0, ptr addrspace(1) %out
54 ; FUNC-LABEL: {{^}}local_size_xy:
55 ; SI-NOHSA-DAG: s_load_dwordx2 s[[[X:[0-9]+]]:[[Y:[0-9+]]]], s[0:1], 0x6
56 ; VI-NOHSA-DAG: s_load_dwordx2 s[[[X:[0-9]+]]:[[Y:[0-9+]]]], s[0:1], 0x18
57 ; GCN: s_mul_i32 [[VAL:s[0-9]+]], s[[X]], s[[Y]]
58 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
59 ; GCN: buffer_store_dword [[VVAL]]
60 define amdgpu_kernel void @local_size_xy(ptr addrspace(1) %out) {
62 %x = call i32 @llvm.r600.read.local.size.x() #0
63 %y = call i32 @llvm.r600.read.local.size.y() #0
65 store i32 %val, ptr addrspace(1) %out
69 ; FUNC-LABEL: {{^}}local_size_xz:
71 ; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6
72 ; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8
73 ; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18
74 ; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20
75 ; HSA-DAG: s_and_b32 [[X:s[0-9]+]], [[XY]], 0xffff
76 ; GCN: s_mul_i32 [[VAL:s[0-9]+]], [[X]], [[Z]]
77 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
78 ; GCN: buffer_store_dword [[VVAL]]
79 define amdgpu_kernel void @local_size_xz(ptr addrspace(1) %out) {
81 %x = call i32 @llvm.r600.read.local.size.x() #0
82 %z = call i32 @llvm.r600.read.local.size.z() #0
84 store i32 %val, ptr addrspace(1) %out
88 ; FUNC-LABEL: {{^}}local_size_yz:
89 ; HSA: enable_sgpr_private_segment_buffer = 1
90 ; HSA: enable_sgpr_dispatch_ptr = 1
92 ; SI-NOHSA-DAG: s_load_dwordx4 s[[[#LOAD:]]:{{[0-9]+}}], s[0:1], 0x7
93 ; VI-NOHSA-DAG: s_load_dwordx4 s[[[#LOAD:]]:{{[0-9]+}}], s[0:1], 0x1c
94 ; GCN: s_mul_i32 [[VAL:s[0-9]+]], s[[#LOAD + 0]], s[[#LOAD + 1]]
95 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
96 ; GCN: buffer_store_dword [[VVAL]]
97 define amdgpu_kernel void @local_size_yz(ptr addrspace(1) %out) {
99 %y = call i32 @llvm.r600.read.local.size.y() #0
100 %z = call i32 @llvm.r600.read.local.size.z() #0
101 %val = mul i32 %y, %z
102 store i32 %val, ptr addrspace(1) %out
106 ; FUNC-LABEL: {{^}}local_size_xyz:
107 ; HSA: enable_sgpr_private_segment_buffer = 1
108 ; HSA: enable_sgpr_dispatch_ptr = 1
110 ; SI-NOHSA-DAG: s_load_dwordx2 s[[[X:[0-9]+]]:[[Y:[0-9]+]]], s[0:1], 0x6
111 ; SI-NOHSA-DAG: s_load_dword s[[Z:[0-9]+]], s[0:1], 0x8
112 ; VI-NOHSA-DAG: s_load_dwordx2 s[[[X:[0-9]+]]:[[Y:[0-9]+]]], s[0:1], 0x18
113 ; VI-NOHSA-DAG: s_load_dword s[[Z:[0-9]+]], s[0:1], 0x20
114 ; GCN: s_mul_i32 [[M:s[0-9]+]], s[[X]], s[[Y]]
115 ; GCN: s_add_i32 [[VAL:s[0-9]+]], [[M]], s[[Z]]
116 ; GCN-DAG: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
117 ; GCN: buffer_store_dword [[VVAL]]
118 define amdgpu_kernel void @local_size_xyz(ptr addrspace(1) %out) {
120 %x = call i32 @llvm.r600.read.local.size.x() #0
121 %y = call i32 @llvm.r600.read.local.size.y() #0
122 %z = call i32 @llvm.r600.read.local.size.z() #0
124 %xyz = add i32 %xy, %z
125 store i32 %xyz, ptr addrspace(1) %out
129 ; FUNC-LABEL: {{^}}local_size_x_known_bits:
130 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
131 ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
133 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
134 ; GCN-NEXT: buffer_store_dword [[VVAL]]
135 define amdgpu_kernel void @local_size_x_known_bits(ptr addrspace(1) %out) {
137 %size = call i32 @llvm.r600.read.local.size.x() #0
138 %shl = shl i32 %size, 16
139 %shr = lshr i32 %shl, 16
140 store i32 %shr, ptr addrspace(1) %out
144 ; FUNC-LABEL: {{^}}local_size_y_known_bits:
145 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
146 ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
148 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
149 ; GCN-NEXT: buffer_store_dword [[VVAL]]
150 define amdgpu_kernel void @local_size_y_known_bits(ptr addrspace(1) %out) {
152 %size = call i32 @llvm.r600.read.local.size.y() #0
153 %shl = shl i32 %size, 16
154 %shr = lshr i32 %shl, 16
155 store i32 %shr, ptr addrspace(1) %out
159 ; FUNC-LABEL: {{^}}local_size_z_known_bits:
160 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
161 ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
163 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
164 ; GCN-NEXT: buffer_store_dword [[VVAL]]
165 define amdgpu_kernel void @local_size_z_known_bits(ptr addrspace(1) %out) {
167 %size = call i32 @llvm.r600.read.local.size.z() #0
168 %shl = shl i32 %size, 16
169 %shr = lshr i32 %shl, 16
170 store i32 %shr, ptr addrspace(1) %out
174 declare i32 @llvm.r600.read.local.size.x() #0
175 declare i32 @llvm.r600.read.local.size.y() #0
176 declare i32 @llvm.r600.read.local.size.z() #0
178 attributes #0 = { nounwind readnone }