1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
6 ; FUNC-LABEL: {{^}}local_size_x:
7 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
8 ; EG: MOV * [[VAL]], KC0[1].Z
10 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
11 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
12 ; CI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x1
13 ; VI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x4
15 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
16 ; GCN: buffer_store_dword [[VVAL]]
17 define amdgpu_kernel void @local_size_x(i32 addrspace(1)* %out) {
19 %0 = call i32 @llvm.r600.read.local.size.x() #0
20 store i32 %0, i32 addrspace(1)* %out
24 ; FUNC-LABEL: {{^}}local_size_y:
25 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
26 ; EG: MOV * [[VAL]], KC0[1].W
28 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
29 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
30 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
31 ; GCN: buffer_store_dword [[VVAL]]
32 define amdgpu_kernel void @local_size_y(i32 addrspace(1)* %out) {
34 %0 = call i32 @llvm.r600.read.local.size.y() #0
35 store i32 %0, i32 addrspace(1)* %out
39 ; FUNC-LABEL: {{^}}local_size_z:
40 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
41 ; EG: MOV * [[VAL]], KC0[2].X
43 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
44 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
45 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
46 ; GCN: buffer_store_dword [[VVAL]]
47 define amdgpu_kernel void @local_size_z(i32 addrspace(1)* %out) {
49 %0 = call i32 @llvm.r600.read.local.size.z() #0
50 store i32 %0, i32 addrspace(1)* %out
54 ; FUNC-LABEL: {{^}}local_size_xy:
55 ; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6
56 ; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7
57 ; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18
58 ; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c
59 ; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]]
60 ; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VY]]
61 ; GCN: buffer_store_dword [[VAL]]
62 define amdgpu_kernel void @local_size_xy(i32 addrspace(1)* %out) {
64 %x = call i32 @llvm.r600.read.local.size.x() #0
65 %y = call i32 @llvm.r600.read.local.size.y() #0
67 store i32 %val, i32 addrspace(1)* %out
71 ; FUNC-LABEL: {{^}}local_size_xz:
73 ; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6
74 ; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8
75 ; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18
76 ; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20
77 ; HSA-DAG: s_and_b32 [[X:s[0-9]+]], [[XY]], 0xffff
78 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
79 ; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VZ]]
80 ; GCN: buffer_store_dword [[VAL]]
81 define amdgpu_kernel void @local_size_xz(i32 addrspace(1)* %out) {
83 %x = call i32 @llvm.r600.read.local.size.x() #0
84 %z = call i32 @llvm.r600.read.local.size.z() #0
86 store i32 %val, i32 addrspace(1)* %out
90 ; FUNC-LABEL: {{^}}local_size_yz:
91 ; HSA: enable_sgpr_private_segment_buffer = 1
92 ; HSA: enable_sgpr_dispatch_ptr = 1
94 ; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7
95 ; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8
96 ; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c
97 ; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20
98 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
99 ; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[Y]], [[VZ]]
100 ; GCN: buffer_store_dword [[VAL]]
101 define amdgpu_kernel void @local_size_yz(i32 addrspace(1)* %out) {
103 %y = call i32 @llvm.r600.read.local.size.y() #0
104 %z = call i32 @llvm.r600.read.local.size.z() #0
105 %val = mul i32 %y, %z
106 store i32 %val, i32 addrspace(1)* %out
110 ; FUNC-LABEL: {{^}}local_size_xyz:
111 ; HSA: enable_sgpr_private_segment_buffer = 1
112 ; HSA: enable_sgpr_dispatch_ptr = 1
114 ; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6
115 ; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7
116 ; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8
117 ; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18
118 ; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c
119 ; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20
120 ; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]]
121 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
122 ; GCN: v_mad_u32_u24 [[VAL:v[0-9]+]], [[X]], [[VY]], [[VZ]]
123 ; GCN: buffer_store_dword [[VAL]]
124 define amdgpu_kernel void @local_size_xyz(i32 addrspace(1)* %out) {
126 %x = call i32 @llvm.r600.read.local.size.x() #0
127 %y = call i32 @llvm.r600.read.local.size.y() #0
128 %z = call i32 @llvm.r600.read.local.size.z() #0
130 %xyz = add i32 %xy, %z
131 store i32 %xyz, i32 addrspace(1)* %out
135 ; FUNC-LABEL: {{^}}local_size_x_known_bits:
136 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
137 ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
139 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
140 ; GCN-NEXT: buffer_store_dword [[VVAL]]
141 define amdgpu_kernel void @local_size_x_known_bits(i32 addrspace(1)* %out) {
143 %size = call i32 @llvm.r600.read.local.size.x() #0
144 %shl = shl i32 %size, 16
145 %shr = lshr i32 %shl, 16
146 store i32 %shr, i32 addrspace(1)* %out
150 ; FUNC-LABEL: {{^}}local_size_y_known_bits:
151 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
152 ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
154 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
155 ; GCN-NEXT: buffer_store_dword [[VVAL]]
156 define amdgpu_kernel void @local_size_y_known_bits(i32 addrspace(1)* %out) {
158 %size = call i32 @llvm.r600.read.local.size.y() #0
159 %shl = shl i32 %size, 16
160 %shr = lshr i32 %shl, 16
161 store i32 %shr, i32 addrspace(1)* %out
165 ; FUNC-LABEL: {{^}}local_size_z_known_bits:
166 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
167 ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
169 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
170 ; GCN-NEXT: buffer_store_dword [[VVAL]]
171 define amdgpu_kernel void @local_size_z_known_bits(i32 addrspace(1)* %out) {
173 %size = call i32 @llvm.r600.read.local.size.z() #0
174 %shl = shl i32 %size, 16
175 %shr = lshr i32 %shl, 16
176 store i32 %shr, i32 addrspace(1)* %out
180 declare i32 @llvm.r600.read.local.size.x() #0
181 declare i32 @llvm.r600.read.local.size.y() #0
182 declare i32 @llvm.r600.read.local.size.z() #0
184 attributes #0 = { nounwind readnone }