1 // NOTE
: Assertions have been autogenerated by utils
/update_cc_test_checks.py UTC_ARGS
: --version
5
2 // RUN
: %clang_cc1 %s -emit-llvm -o - -cl-std
=CL2.0 -O0 -triple amdgcn | FileCheck -check-prefix
=AMDGCN %s
4 typedef int int2 __attribute__
((ext_vector_type(2)));
22 struct StructOneMember {
26 struct StructTwoMember {
31 struct LargeStructOneMember {
35 struct LargeStructTwoMember {
40 #if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
41 struct LargeStructOneMember g_s;
45 // AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo(
46 // AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
47 // AMDGCN-NEXT: [[ENTRY:.*:]]
48 // AMDGCN-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
49 // AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
50 // AMDGCN-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
51 // AMDGCN-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
52 // AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
53 // AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4
54 // AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4
55 // AMDGCN-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]]
57 Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
62 // Expect two mem copies: one for the argument "in", and one for
65 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker(
66 // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
67 // AMDGCN-NEXT: [[ENTRY:.*:]]
68 // AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
69 // AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
70 // AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
71 // AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
72 // AMDGCN-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
73 // AMDGCN-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
74 // AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8
75 // AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
76 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
77 // AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
78 // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
79 // AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
80 // AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
81 // AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
82 // AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
83 // AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr [[TMP_ASCAST]], i32 0, i32 0
84 // AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
85 // AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr [[TMP4]], align 4
86 // AMDGCN-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 64, i1 false)
87 // AMDGCN-NEXT: ret void
89 kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
93 // AMDGCN-LABEL: define dso_local void @foo_large(
94 // AMDGCN-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
95 // AMDGCN-NEXT: [[ENTRY:.*:]]
96 // AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
97 // AMDGCN-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
98 // AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
99 // AMDGCN-NEXT: ret void
101 Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
106 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker_large(
107 // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
108 // AMDGCN-NEXT: [[ENTRY:.*:]]
109 // AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
110 // AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
111 // AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
112 // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
113 // AMDGCN-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
114 // AMDGCN-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
115 // AMDGCN-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
116 // AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8
117 // AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
118 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
119 // AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0
120 // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
121 // AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
122 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
123 // AMDGCN-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP_ASCAST]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
124 // AMDGCN-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false)
125 // AMDGCN-NEXT: ret void
127 kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) {
128 out[0] = foo_large(in[1]);
131 // AMDGCN-LABEL: define dso_local void @FuncOneMember(
132 // AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] {
133 // AMDGCN-NEXT: [[ENTRY:.*:]]
134 // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
135 // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
136 // AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
137 // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
138 // AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
139 // AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8
140 // AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
141 // AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
142 // AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
143 // AMDGCN-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8
144 // AMDGCN-NEXT: ret void
146 void FuncOneMember(struct StructOneMember u) {
150 // AMDGCN-LABEL: define dso_local void @FuncOneLargeMember(
151 // AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
152 // AMDGCN-NEXT: [[ENTRY:.*:]]
153 // AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
154 // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
155 // AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
156 // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
157 // AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
158 // AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
159 // AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
160 // AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
161 // AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i64 0, i64 0
162 // AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
163 // AMDGCN-NEXT: ret void
165 void FuncOneLargeMember(struct LargeStructOneMember u) {
166 u.x[0] = (int2)(0, 0);
169 #if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
170 // AMDGCN-LABEL: define dso_local void @test_indirect_arg_globl(
171 // AMDGCN-SAME: ) #[[ATTR0]] {
172 // AMDGCN-NEXT: [[ENTRY:.*:]]
173 // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
174 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false)
175 // AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
176 // AMDGCN-NEXT: ret void
178 void test_indirect_arg_globl(void) {
179 FuncOneLargeMember(g_s);
183 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local(
184 // AMDGCN-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
185 // AMDGCN-NEXT: [[ENTRY:.*:]]
186 // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
187 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
188 // AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
189 // AMDGCN-NEXT: ret void
191 kernel void test_indirect_arg_local(void) {
192 local struct LargeStructOneMember l_s;
193 FuncOneLargeMember(l_s);
196 // AMDGCN-LABEL: define dso_local void @test_indirect_arg_private(
197 // AMDGCN-SAME: ) #[[ATTR0]] {
198 // AMDGCN-NEXT: [[ENTRY:.*:]]
199 // AMDGCN-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
200 // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
201 // AMDGCN-NEXT: [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr
202 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false)
203 // AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
204 // AMDGCN-NEXT: ret void
206 void test_indirect_arg_private(void) {
207 struct LargeStructOneMember p_s;
208 FuncOneLargeMember(p_s);
211 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember(
212 // AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
213 // AMDGCN-NEXT: [[ENTRY:.*:]]
214 // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
215 // AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
216 // AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
217 // AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8
218 // AMDGCN-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
219 // AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8
220 // AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]]
221 // AMDGCN-NEXT: ret void
223 kernel void KernelOneMember(struct StructOneMember u) {
227 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir(
228 // AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
229 // AMDGCN-NEXT: [[ENTRY:.*:]]
230 // AMDGCN-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
231 // AMDGCN-NEXT: [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr
232 // AMDGCN-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8
233 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8
234 // AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
235 // AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8
236 // AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]]
237 // AMDGCN-NEXT: ret void
239 kernel void KernelOneMemberSpir(global struct StructOneMember* u) {
243 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
244 // AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
245 // AMDGCN-NEXT: [[ENTRY:.*:]]
246 // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
247 // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
248 // AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
249 // AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
250 // AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
251 // AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
252 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 800, i1 false)
253 // AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
254 // AMDGCN-NEXT: ret void
256 kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
257 FuncOneLargeMember(u);
260 // AMDGCN-LABEL: define dso_local void @FuncTwoMember(
261 // AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] {
262 // AMDGCN-NEXT: [[ENTRY:.*:]]
263 // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
264 // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
265 // AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
266 // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
267 // AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
268 // AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr [[TMP0]], align 8
269 // AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
270 // AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr [[TMP1]], align 8
271 // AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
272 // AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
273 // AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
274 // AMDGCN-NEXT: store <2 x i32> [[TMP2]], ptr [[Y]], align 8
275 // AMDGCN-NEXT: ret void
277 void FuncTwoMember(struct StructTwoMember u) {
281 // AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember(
282 // AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
283 // AMDGCN-NEXT: [[ENTRY:.*:]]
284 // AMDGCN-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
285 // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
286 // AMDGCN-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
287 // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
288 // AMDGCN-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
289 // AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
290 // AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
291 // AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
292 // AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i64 0, i64 0
293 // AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
294 // AMDGCN-NEXT: ret void
296 void FuncLargeTwoMember(struct LargeStructTwoMember u) {
297 u.y[0] = (int2)(0, 0);
300 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
301 // AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
302 // AMDGCN-NEXT: [[ENTRY:.*:]]
303 // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
304 // AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
305 // AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
306 // AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
307 // AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP0]], align 8
308 // AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
309 // AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
310 // AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr [[TMP2]], align 8
311 // AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
312 // AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8
313 // AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
314 // AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
315 // AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]]
316 // AMDGCN-NEXT: ret void
318 kernel void KernelTwoMember(struct StructTwoMember u) {
322 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
323 // AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
324 // AMDGCN-NEXT: [[ENTRY:.*:]]
325 // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
326 // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
327 // AMDGCN-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
328 // AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
329 // AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
330 // AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
331 // AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
332 // AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
333 // AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8
334 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 480, i1 false)
335 // AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
336 // AMDGCN-NEXT: ret void
338 kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
339 FuncLargeTwoMember(u);
342 // AMDGCN: [[META4]] = !{i32 1, i32 1}
343 // AMDGCN: [[META5]] = !{!"none", !"none"}
344 // AMDGCN: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
345 // AMDGCN: [[META7]] = !{!"", !""}
346 // AMDGCN: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
347 // AMDGCN: [[META9]] = !{}
348 // AMDGCN: [[META10]] = !{i32 0}
349 // AMDGCN: [[META11]] = !{!"none"}
350 // AMDGCN: [[META12]] = !{!"struct StructOneMember"}
351 // AMDGCN: [[META13]] = !{!""}
352 // AMDGCN: [[META14]] = !{i32 1}
353 // AMDGCN: [[META15]] = !{!"struct StructOneMember*"}
354 // AMDGCN: [[META16]] = !{!"struct LargeStructOneMember"}
355 // AMDGCN: [[META17]] = !{!"struct StructTwoMember"}
356 // AMDGCN: [[META18]] = !{!"struct LargeStructTwoMember"}