1 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,W32 --enable-var-scope %s
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,W64 --enable-var-scope %s
3 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -S -amdgpu-annotate-uniform < %s | FileCheck --check-prefixes=OPT,OPT-W32 --enable-var-scope %s
4 ; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+wavefrontsize64 -S -amdgpu-annotate-uniform < %s | FileCheck --check-prefixes=OPT,OPT-W64 --enable-var-scope %s
6 ; GCN-LABEL: {{^}}lshr_threadid:
7 ; W64: global_load_dword
8 ; W32: v_readfirstlane_b32 [[OFFSET:s[0-9]+]], v0
9 ; W32: s_load_dword s{{[0-9]+}}, s[{{[0-9:]+}}], [[OFFSET]]
11 ; OPT-LABEL: @lshr_threadid
12 ; OPT-W64: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4{{$}}
13 ; OPT-W32: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4, !amdgpu.uniform
14 define amdgpu_kernel void @lshr_threadid(ptr addrspace(1) align 4 %in, ptr addrspace(1) align 4 %out) !reqd_work_group_size !0 {
16 %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
17 %div = lshr i32 %lid, 5
18 %div4 = zext i32 %div to i64
19 %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4
20 %load = load i32, ptr addrspace(1) %arrayidx, align 4
21 %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %div4
22 store i32 %load, ptr addrspace(1) %arrayidx2, align 4
26 ; GCN-LABEL: {{^}}ashr_threadid:
27 ; W64: global_load_dword
28 ; W32: v_readfirstlane_b32 [[OFFSET:s[0-9]+]], v0
29 ; W32: s_load_dword s{{[0-9]+}}, s[{{[0-9:]+}}], [[OFFSET]]
31 ; OPT-LABEL: @ashr_threadid
32 ; OPT-W64: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4{{$}}
33 ; OPT-W32: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4, !amdgpu.uniform
34 define amdgpu_kernel void @ashr_threadid(ptr addrspace(1) align 4 %in, ptr addrspace(1) align 4 %out) !reqd_work_group_size !0 {
36 %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
37 %div = ashr i32 %lid, 5
38 %div4 = zext i32 %div to i64
39 %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4
40 %load = load i32, ptr addrspace(1) %arrayidx, align 4
41 %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %div4
42 store i32 %load, ptr addrspace(1) %arrayidx2, align 4
46 ; GCN-LABEL: {{^}}and_threadid:
47 ; W64: global_load_dword
48 ; W32: v_readfirstlane_b32 [[OFFSET:s[0-9]+]], v0
49 ; W32: s_load_dword s{{[0-9]+}}, s[{{[0-9:]+}}], [[OFFSET]]
51 ; OPT-LABEL: @and_threadid
52 ; OPT-W64: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4{{$}}
53 ; OPT-W32: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4, !amdgpu.uniform
54 define amdgpu_kernel void @and_threadid(ptr addrspace(1) align 4 %in, ptr addrspace(1) align 4 %out) !reqd_work_group_size !0 {
56 %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
57 %and = and i32 %lid, -32
58 %div4 = zext i32 %and to i64
59 %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4
60 %load = load i32, ptr addrspace(1) %arrayidx, align 4
61 %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %div4
62 store i32 %load, ptr addrspace(1) %arrayidx2, align 4
66 ; GCN-LABEL: {{^}}lshr_threadid_no_dim_info:
67 ; GCN: global_load_dword
69 ; OPT-LABEL: @lshr_threadid_no_dim_info
70 ; OPT: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4{{$}}
71 define amdgpu_kernel void @lshr_threadid_no_dim_info(ptr addrspace(1) align 4 %in, ptr addrspace(1) align 4 %out) {
73 %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
74 %div = lshr i32 %lid, 5
75 %div4 = zext i32 %div to i64
76 %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4
77 %load = load i32, ptr addrspace(1) %arrayidx, align 4
78 %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %div4
79 store i32 %load, ptr addrspace(1) %arrayidx2, align 4
83 ; GCN-LABEL: {{^}}lshr_threadid_2d:
84 ; GCN: global_load_dword
86 ; OPT-LABEL: @lshr_threadid_2d
87 ; OPT: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4{{$}}
88 define amdgpu_kernel void @lshr_threadid_2d(ptr addrspace(1) align 4 %in, ptr addrspace(1) align 4 %out) !reqd_work_group_size !1 {
90 %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
91 %div = lshr i32 %lid, 5
92 %div4 = zext i32 %div to i64
93 %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4
94 %load = load i32, ptr addrspace(1) %arrayidx, align 4
95 %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %div4
96 store i32 %load, ptr addrspace(1) %arrayidx2, align 4
100 ; GCN-LABEL: {{^}}lshr_threadid_3d:
101 ; GCN: global_load_dword
103 ; OPT-LABEL: @lshr_threadid_3d
104 ; OPT: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4{{$}}
105 define amdgpu_kernel void @lshr_threadid_3d(ptr addrspace(1) align 4 %in, ptr addrspace(1) align 4 %out) !reqd_work_group_size !2 {
107 %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
108 %div = lshr i32 %lid, 5
109 %div4 = zext i32 %div to i64
110 %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4
111 %load = load i32, ptr addrspace(1) %arrayidx, align 4
112 %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %div4
113 store i32 %load, ptr addrspace(1) %arrayidx2, align 4
117 ; GCN-LABEL: {{^}}lshr_threadid_1d_uneven:
118 ; W64: global_load_dword
119 ; W32: v_readfirstlane_b32 [[OFFSET:s[0-9]+]], v0
120 ; W32: s_load_dword s{{[0-9]+}}, s[{{[0-9:]+}}], [[OFFSET]]
122 ; OPT-LABEL: @lshr_threadid_1d_uneven
123 ; OPT-W64: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4{{$}}
124 ; OPT-W32: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4, !amdgpu.uniform
125 define amdgpu_kernel void @lshr_threadid_1d_uneven(ptr addrspace(1) align 4 %in, ptr addrspace(1) align 4 %out) !reqd_work_group_size !3 {
127 %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
128 %div = lshr i32 %lid, 5
129 %div4 = zext i32 %div to i64
130 %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4
131 %load = load i32, ptr addrspace(1) %arrayidx, align 4
132 %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %div4
133 store i32 %load, ptr addrspace(1) %arrayidx2, align 4
137 ; GCN-LABEL: {{^}}and_threadid_2d:
138 ; GCN: global_load_dword
140 ; OPT-LABEL: @and_threadid_2d
141 ; OPT: %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4{{$}}
142 define amdgpu_kernel void @and_threadid_2d(ptr addrspace(1) align 4 %in, ptr addrspace(1) align 4 %out) !reqd_work_group_size !1 {
144 %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
145 %and = and i32 %lid, -32
146 %div4 = zext i32 %and to i64
147 %arrayidx = getelementptr inbounds i32, ptr addrspace(1) %in, i64 %div4
148 %load = load i32, ptr addrspace(1) %arrayidx, align 4
149 %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %div4
150 store i32 %load, ptr addrspace(1) %arrayidx2, align 4
154 declare i32 @llvm.amdgcn.workitem.id.x()
156 !0 = !{i32 64, i32 1, i32 1}
157 !1 = !{i32 65, i32 2, i32 1}
158 !2 = !{i32 64, i32 1, i32 2}
159 !3 = !{i32 65, i32 1, i32 1}