1 ; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-promote-alloca,+max-private-element-size-16 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT16,ALL %s
2 ; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-promote-alloca,+max-private-element-size-8 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT8,ALL,HSA-ELTGE8 %s
3 ; RUN: llc -march=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=2 -mattr=-promote-alloca,+max-private-element-size-4 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefixes=HSA-ELT4,ALL %s
6 ; ALL-LABEL: {{^}}private_elt_size_v4i32:
8 ; HSA-ELT16: private_element_size = 3
9 ; HSA-ELT8: private_element_size = 2
10 ; HSA-ELT4: private_element_size = 1
13 ; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16
14 ; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32
15 ; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}}
17 ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:24{{$}}
18 ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16
19 ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32
20 ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:40
22 ; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen
23 ; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen
26 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:16{{$}}
27 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:20{{$}}
28 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}}
29 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}}
30 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:32{{$}}
31 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:36{{$}}
32 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:40{{$}}
33 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:44{{$}}
35 ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}}
36 ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}}
37 ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:8{{$}}
38 ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:12{{$}}
39 define amdgpu_kernel void @private_elt_size_v4i32(<4 x i32> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
41 %tid = call i32 @llvm.amdgcn.workitem.id.x()
42 %idxprom = sext i32 %tid to i64
43 %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
44 %index.load = load i32, i32 addrspace(1)* %gep.index
45 %index = and i32 %index.load, 2
46 %alloca = alloca [2 x <4 x i32>], align 16, addrspace(5)
47 %gep0 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>] addrspace(5)* %alloca, i32 0, i32 0
48 %gep1 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>] addrspace(5)* %alloca, i32 0, i32 1
49 store <4 x i32> zeroinitializer, <4 x i32> addrspace(5)* %gep0
50 store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(5)* %gep1
51 %gep2 = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>] addrspace(5)* %alloca, i32 0, i32 %index
52 %load = load <4 x i32>, <4 x i32> addrspace(5)* %gep2
53 store <4 x i32> %load, <4 x i32> addrspace(1)* %out
57 ; ALL-LABEL: {{^}}private_elt_size_v8i32:
58 ; HSA-ELT16: private_element_size = 3
59 ; HSA-ELT8: private_element_size = 2
60 ; HSA-ELT4: private_element_size = 1
62 ; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32
63 ; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:48
64 ; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:64
65 ; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:80
67 ; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}}
68 ; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}}
71 ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32
72 ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:40
73 ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:48
74 ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:56
75 ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:88
76 ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:80
77 ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:72
78 ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:64
80 ; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen
81 ; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen
84 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:32{{$}}
85 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:36{{$}}
86 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:40{{$}}
87 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:44{{$}}
88 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:48{{$}}
89 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:52{{$}}
90 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:56{{$}}
91 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:60{{$}}
92 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:64{{$}}
93 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:68{{$}}
94 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:72{{$}}
95 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:76{{$}}
96 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:80{{$}}
97 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:84{{$}}
98 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:88{{$}}
99 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:92{{$}}
101 ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}}
102 ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}}
103 ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:8{{$}}
104 ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:12{{$}}
105 ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:16{{$}}
106 ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:20{{$}}
107 ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:24{{$}}
108 ; HSA-ELT4-DAG: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:28{{$}}
109 define amdgpu_kernel void @private_elt_size_v8i32(<8 x i32> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
111 %tid = call i32 @llvm.amdgcn.workitem.id.x()
112 %idxprom = sext i32 %tid to i64
113 %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
114 %index.load = load i32, i32 addrspace(1)* %gep.index
115 %index = and i32 %index.load, 2
116 %alloca = alloca [2 x <8 x i32>], align 32, addrspace(5)
117 %gep0 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>] addrspace(5)* %alloca, i32 0, i32 0
118 %gep1 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>] addrspace(5)* %alloca, i32 0, i32 1
119 store <8 x i32> zeroinitializer, <8 x i32> addrspace(5)* %gep0
120 store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, <8 x i32> addrspace(5)* %gep1
121 %gep2 = getelementptr inbounds [2 x <8 x i32>], [2 x <8 x i32>] addrspace(5)* %alloca, i32 0, i32 %index
122 %load = load <8 x i32>, <8 x i32> addrspace(5)* %gep2
123 store <8 x i32> %load, <8 x i32> addrspace(1)* %out
128 ; ALL-LABEL: {{^}}private_elt_size_i64:
129 ; HSA-ELT16: private_element_size = 3
130 ; HSA-ELT8: private_element_size = 2
131 ; HSA-ELT4: private_element_size = 1
133 ; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], 0 offset:1
134 ; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, {{off|v[0-9]}}, s[0:3], 0 offset:2
135 ; HSA-ELTGE8-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, s[0:3], 0 offen
136 ; HSA-ELTGE8: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]]
139 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:16{{$}}
140 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:20{{$}}
141 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}}
142 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}}
144 ; HSA-ELT4-DAG: buffer_load_dword v[[HI:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}}
145 ; HSA-ELT4-DAG: buffer_load_dword v[[LO:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen{{$}}
146 ; HSA-ELT4: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
147 define amdgpu_kernel void @private_elt_size_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
149 %tid = call i32 @llvm.amdgcn.workitem.id.x()
150 %idxprom = sext i32 %tid to i64
151 %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
152 %index.load = load i32, i32 addrspace(1)* %gep.index
153 %index = and i32 %index.load, 2
154 %alloca = alloca [2 x i64], align 16, addrspace(5)
155 %gep0 = getelementptr inbounds [2 x i64], [2 x i64] addrspace(5)* %alloca, i32 0, i32 0
156 %gep1 = getelementptr inbounds [2 x i64], [2 x i64] addrspace(5)* %alloca, i32 0, i32 1
157 store i64 0, i64 addrspace(5)* %gep0
158 store i64 34359738602, i64 addrspace(5)* %gep1
159 %gep2 = getelementptr inbounds [2 x i64], [2 x i64] addrspace(5)* %alloca, i32 0, i32 %index
160 %load = load i64, i64 addrspace(5)* %gep2
161 store i64 %load, i64 addrspace(1)* %out
165 ; ALL-LABEL: {{^}}private_elt_size_f64:
166 ; HSA-ELT16: private_element_size = 3
167 ; HSA-ELT8: private_element_size = 2
168 ; HSA-ELT4: private_element_size = 1
170 ; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16
171 ; HSA-ELTGE8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:24
172 ; HSA-ELTGE8-DAG: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, s[0:3], 0 offen
173 ; HSA-ELTGE8: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, [[VAL]]
176 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:16{{$}}
177 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:20{{$}}
178 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}}
179 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}}
181 ; HSA-ELT4-DAG: buffer_load_dword v[[HI:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}}
182 ; HSA-ELT4-DAG: buffer_load_dword v[[LO:[0-9]+]], v{{[0-9]+}}, s[0:3], 0 offen{{$}}
183 ; HSA-ELT4: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
184 define amdgpu_kernel void @private_elt_size_f64(double addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
186 %tid = call i32 @llvm.amdgcn.workitem.id.x()
187 %idxprom = sext i32 %tid to i64
188 %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
189 %index.load = load i32, i32 addrspace(1)* %gep.index
190 %index = and i32 %index.load, 2
191 %alloca = alloca [2 x double], align 16, addrspace(5)
192 %gep0 = getelementptr inbounds [2 x double], [2 x double] addrspace(5)* %alloca, i32 0, i32 0
193 %gep1 = getelementptr inbounds [2 x double], [2 x double] addrspace(5)* %alloca, i32 0, i32 1
194 store double 0.0, double addrspace(5)* %gep0
195 store double 4.0, double addrspace(5)* %gep1
196 %gep2 = getelementptr inbounds [2 x double], [2 x double] addrspace(5)* %alloca, i32 0, i32 %index
197 %load = load double, double addrspace(5)* %gep2
198 store double %load, double addrspace(1)* %out
202 ; ALL-LABEL: {{^}}private_elt_size_v2i64:
203 ; HSA-ELT16: private_element_size = 3
204 ; HSA-ELT8: private_element_size = 2
205 ; HSA-ELT4: private_element_size = 1
207 ; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16
208 ; HSA-ELT16-DAG: buffer_store_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32
209 ; HSA-ELT16-DAG: buffer_load_dwordx4 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}}
211 ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:16{{$}}
212 ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:24
213 ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:40
214 ; HSA-ELT8-DAG: buffer_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 offset:32
216 ; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen
217 ; HSA-ELT8: buffer_load_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s[0:3], 0 offen
220 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:16{{$}}
221 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:20{{$}}
222 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:24{{$}}
223 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:28{{$}}
224 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:32{{$}}
225 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:36{{$}}
226 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:40{{$}}
227 ; HSA-ELT4-DAG: buffer_store_dword {{v[0-9]+}}, off, s[0:3], 0 offset:44{{$}}
229 ; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:12{{$}}
230 ; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:8{{$}}
231 ; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen offset:4{{$}}
232 ; HSA-ELT4: buffer_load_dword {{v[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen{{$}}
233 define amdgpu_kernel void @private_elt_size_v2i64(<2 x i64> addrspace(1)* %out, i32 addrspace(1)* %index.array) #0 {
235 %tid = call i32 @llvm.amdgcn.workitem.id.x()
236 %idxprom = sext i32 %tid to i64
237 %gep.index = getelementptr inbounds i32, i32 addrspace(1)* %index.array, i64 %idxprom
238 %index.load = load i32, i32 addrspace(1)* %gep.index
239 %index = and i32 %index.load, 2
240 %alloca = alloca [2 x <2 x i64>], align 16, addrspace(5)
241 %gep0 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>] addrspace(5)* %alloca, i32 0, i32 0
242 %gep1 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>] addrspace(5)* %alloca, i32 0, i32 1
243 store <2 x i64> zeroinitializer, <2 x i64> addrspace(5)* %gep0
244 store <2 x i64> <i64 1, i64 2>, <2 x i64> addrspace(5)* %gep1
245 %gep2 = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>] addrspace(5)* %alloca, i32 0, i32 %index
246 %load = load <2 x i64>, <2 x i64> addrspace(5)* %gep2
247 store <2 x i64> %load, <2 x i64> addrspace(1)* %out
251 declare i32 @llvm.amdgcn.workitem.id.x() #1
253 attributes #0 = { nounwind }
254 attributes #1 = { nounwind readnone }