1 // NOTE
: Assertions have been autogenerated by utils
/update_cc_test_checks.py UTC_ARGS
: --version
5
2 // RUN
: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -check-prefixes
=X86 %s
3 // RUN
: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -check-prefixes
=AMDGCN %s
4 // RUN
: %clang_cc1 %s -emit-llvm -o - -cl-std
=CL2.0 -O0 -triple amdgcn | FileCheck -check-prefixes
=AMDGCN20 %s
5 // RUN
: %clang_cc1 %s -emit-llvm -o - -cl-std
=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -check-prefixes
=SPIR %s
6 // RUN
: %clang_cc1 %s -emit-llvm -o - -cl-std
=CL3.0 -O0 -triple amdgcn -cl-ext
=+__opencl_c_program_scope_global_variables | FileCheck -check-prefixes
=AMDGCN30-GVAR %s
7 // RUN
: %clang_cc1 %s -emit-llvm -o - -cl-std
=CL3.0 -O0 -triple amdgcn | FileCheck -check-prefixes
=AMDGCN30 %s
9 typedef int int2 __attribute__
((ext_vector_type(2)));
27 struct StructOneMember {
31 struct StructTwoMember {
36 struct LargeStructOneMember {
40 struct LargeStructTwoMember {
45 #if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
46 struct LargeStructOneMember g_s;
50 // X86-LABEL: define void @foo(
51 // X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] {
52 // X86-NEXT: [[ENTRY:.*:]]
53 // X86-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4
54 // X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
57 // AMDGCN-LABEL: define dso_local %struct.Mat4X4 @foo(
58 // AMDGCN-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
59 // AMDGCN-NEXT: [[ENTRY:.*:]]
60 // AMDGCN-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
61 // AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
62 // AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0
63 // AMDGCN-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4
64 // AMDGCN-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4
65 // AMDGCN-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]]
67 // AMDGCN20-LABEL: define dso_local %struct.Mat4X4 @foo(
68 // AMDGCN20-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
69 // AMDGCN20-NEXT: [[ENTRY:.*:]]
70 // AMDGCN20-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
71 // AMDGCN20-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
72 // AMDGCN20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
73 // AMDGCN20-NEXT: [[IN1:%.*]] = addrspacecast ptr addrspace(5) [[IN]] to ptr
74 // AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr [[IN1]], i32 0, i32 0
75 // AMDGCN20-NEXT: store [9 x i32] [[IN_COERCE]], ptr [[COERCE_DIVE]], align 4
76 // AMDGCN20-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr [[RETVAL_ASCAST]], align 4
77 // AMDGCN20-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]]
79 // SPIR-LABEL: define dso_local spir_func void @foo(
80 // SPIR-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT4X4:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT3X3:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0:[0-9]+]] {
81 // SPIR-NEXT: [[ENTRY:.*:]]
82 // SPIR-NEXT: ret void
84 // AMDGCN30-GVAR-LABEL: define dso_local %struct.Mat4X4 @foo(
85 // AMDGCN30-GVAR-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
86 // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
87 // AMDGCN30-GVAR-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
88 // AMDGCN30-GVAR-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
89 // AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0
90 // AMDGCN30-GVAR-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4
91 // AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4
92 // AMDGCN30-GVAR-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]]
94 // AMDGCN30-LABEL: define dso_local %struct.Mat4X4 @foo(
95 // AMDGCN30-SAME: [9 x i32] [[IN_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
96 // AMDGCN30-NEXT: [[ENTRY:.*:]]
97 // AMDGCN30-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
98 // AMDGCN30-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4, addrspace(5)
99 // AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(5) [[IN]], i32 0, i32 0
100 // AMDGCN30-NEXT: store [9 x i32] [[IN_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 4
101 // AMDGCN30-NEXT: [[TMP0:%.*]] = load [[STRUCT_MAT4X4]], ptr addrspace(5) [[RETVAL]], align 4
102 // AMDGCN30-NEXT: ret [[STRUCT_MAT4X4]] [[TMP0]]
104 Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
110 // X86-LABEL: define spir_kernel void @ker(
111 // X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
112 // X86-NEXT: [[ENTRY:.*:]]
113 // X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4
114 // X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4
115 // X86-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4
116 // X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4
117 // X86-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4
118 // X86-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4
119 // X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4
120 // X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i32 0
121 // X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4
122 // X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP1]], i32 1
123 // X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 36, i1 false)
124 // X86-NEXT: call void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3:[0-9]+]]
125 // X86-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false)
126 // X86-NEXT: ret void
128 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker(
129 // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
130 // AMDGCN-NEXT: [[ENTRY:.*:]]
131 // AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
132 // AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
133 // AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
134 // AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
135 // AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
136 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
137 // AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
138 // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
139 // AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
140 // AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
141 // AMDGCN-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
142 // AMDGCN-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
143 // AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0
144 // AMDGCN-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
145 // AMDGCN-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4
146 // AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false)
147 // AMDGCN-NEXT: ret void
149 // AMDGCN20-LABEL: define dso_local amdgpu_kernel void @ker(
150 // AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
151 // AMDGCN20-NEXT: [[ENTRY:.*:]]
152 // AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
153 // AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
154 // AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
155 // AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
156 // AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
157 // AMDGCN20-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
158 // AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8
159 // AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
160 // AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
161 // AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
162 // AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
163 // AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
164 // AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
165 // AMDGCN20-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
166 // AMDGCN20-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
167 // AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr [[TMP_ASCAST]], i32 0, i32 0
168 // AMDGCN20-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
169 // AMDGCN20-NEXT: store [16 x i32] [[TMP5]], ptr [[TMP4]], align 4
170 // AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 64, i1 false)
171 // AMDGCN20-NEXT: ret void
173 // SPIR-LABEL: define dso_local spir_kernel void @ker(
174 // SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] !kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] !kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
175 // SPIR-NEXT: [[ENTRY:.*:]]
176 // SPIR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4
177 // SPIR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4
178 // SPIR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4
179 // SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT3X3:%.*]], align 4
180 // SPIR-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4
181 // SPIR-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4
182 // SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4
183 // SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i32 0
184 // SPIR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4
185 // SPIR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3]], ptr addrspace(1) [[TMP1]], i32 1
186 // SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 36, i1 false)
187 // SPIR-NEXT: call spir_func void @foo(ptr dead_on_unwind writable sret([[STRUCT_MAT4X4]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT3X3]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3:[0-9]+]]
188 // SPIR-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 64, i1 false)
189 // SPIR-NEXT: ret void
191 // AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @ker(
192 // AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
193 // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
194 // AMDGCN30-GVAR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
195 // AMDGCN30-GVAR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
196 // AMDGCN30-GVAR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
197 // AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
198 // AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
199 // AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
200 // AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
201 // AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
202 // AMDGCN30-GVAR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
203 // AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
204 // AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
205 // AMDGCN30-GVAR-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
206 // AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0
207 // AMDGCN30-GVAR-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
208 // AMDGCN30-GVAR-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4
209 // AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false)
210 // AMDGCN30-GVAR-NEXT: ret void
212 // AMDGCN30-LABEL: define dso_local amdgpu_kernel void @ker(
213 // AMDGCN30-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1:[0-9]+]] !kernel_arg_addr_space [[META4:![0-9]+]] !kernel_arg_access_qual [[META5:![0-9]+]] !kernel_arg_type [[META6:![0-9]+]] !kernel_arg_base_type [[META6]] !kernel_arg_type_qual [[META7:![0-9]+]] {
214 // AMDGCN30-NEXT: [[ENTRY:.*:]]
215 // AMDGCN30-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
216 // AMDGCN30-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
217 // AMDGCN30-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT4X4:%.*]], align 4, addrspace(5)
218 // AMDGCN30-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
219 // AMDGCN30-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
220 // AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
221 // AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT4X4]], ptr addrspace(1) [[TMP0]], i64 0
222 // AMDGCN30-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
223 // AMDGCN30-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT3X3:%.*]], ptr addrspace(1) [[TMP1]], i64 1
224 // AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT3X3]], ptr addrspace(1) [[ARRAYIDX1]], i32 0, i32 0
225 // AMDGCN30-NEXT: [[TMP3:%.*]] = load [9 x i32], ptr addrspace(1) [[TMP2]], align 4
226 // AMDGCN30-NEXT: [[CALL:%.*]] = call [[STRUCT_MAT4X4]] @[[FOO:[a-zA-Z0-9_$\"\\.-]*[a-zA-Z_$\"\\.-][a-zA-Z0-9_$\"\\.-]*]]([9 x i32] [[TMP3]]) #[[ATTR3:[0-9]+]]
227 // AMDGCN30-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_MAT4X4]], ptr addrspace(5) [[TMP]], i32 0, i32 0
228 // AMDGCN30-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_MAT4X4]] [[CALL]], 0
229 // AMDGCN30-NEXT: store [16 x i32] [[TMP5]], ptr addrspace(5) [[TMP4]], align 4
230 // AMDGCN30-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 64, i1 false)
231 // AMDGCN30-NEXT: ret void
233 kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
238 // X86-LABEL: define void @foo_large(
239 // X86-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] {
240 // X86-NEXT: [[ENTRY:.*:]]
241 // X86-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4
242 // X86-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 4
243 // X86-NEXT: ret void
245 // AMDGCN-LABEL: define dso_local void @foo_large(
246 // AMDGCN-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
247 // AMDGCN-NEXT: [[ENTRY:.*:]]
248 // AMDGCN-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
249 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
250 // AMDGCN-NEXT: ret void
252 // AMDGCN20-LABEL: define dso_local void @foo_large(
253 // AMDGCN20-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
254 // AMDGCN20-NEXT: [[ENTRY:.*:]]
255 // AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
256 // AMDGCN20-NEXT: [[IN:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
257 // AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
258 // AMDGCN20-NEXT: ret void
260 // SPIR-LABEL: define dso_local spir_func void @foo_large(
261 // SPIR-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr noundef byval([[STRUCT_MAT32X32:%.*]]) align 4 [[IN:%.*]]) #[[ATTR0]] {
262 // SPIR-NEXT: [[ENTRY:.*:]]
263 // SPIR-NEXT: ret void
265 // AMDGCN30-GVAR-LABEL: define dso_local void @foo_large(
266 // AMDGCN30-GVAR-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
267 // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
268 // AMDGCN30-GVAR-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
269 // AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
270 // AMDGCN30-GVAR-NEXT: ret void
272 // AMDGCN30-LABEL: define dso_local void @foo_large(
273 // AMDGCN30-SAME: ptr addrspace(5) dead_on_unwind noalias writable sret([[STRUCT_MAT64X64:%.*]]) align 4 [[AGG_RESULT:%.*]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
274 // AMDGCN30-NEXT: [[ENTRY:.*:]]
275 // AMDGCN30-NEXT: [[IN:%.*]] = alloca [[STRUCT_MAT32X32]], align 4, addrspace(5)
276 // AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 [[IN]], ptr addrspace(5) align 4 [[TMP0]], i64 4096, i1 false)
277 // AMDGCN30-NEXT: ret void
279 Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
285 // X86-LABEL: define spir_kernel void @ker_large(
286 // X86-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
287 // X86-NEXT: [[ENTRY:.*:]]
288 // X86-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4
289 // X86-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4
290 // X86-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4
291 // X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4
292 // X86-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4
293 // X86-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4
294 // X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4
295 // X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i32 0
296 // X86-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4
297 // X86-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i32 1
298 // X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 4096, i1 false)
299 // X86-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
300 // X86-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false)
301 // X86-NEXT: ret void
303 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @ker_large(
304 // AMDGCN-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
305 // AMDGCN-NEXT: [[ENTRY:.*:]]
306 // AMDGCN-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
307 // AMDGCN-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
308 // AMDGCN-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
309 // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
310 // AMDGCN-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
311 // AMDGCN-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
312 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
313 // AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0
314 // AMDGCN-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
315 // AMDGCN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
316 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
317 // AMDGCN-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
318 // AMDGCN-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false)
319 // AMDGCN-NEXT: ret void
321 // AMDGCN20-LABEL: define dso_local amdgpu_kernel void @ker_large(
322 // AMDGCN20-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
323 // AMDGCN20-NEXT: [[ENTRY:.*:]]
324 // AMDGCN20-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
325 // AMDGCN20-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
326 // AMDGCN20-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
327 // AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
328 // AMDGCN20-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr
329 // AMDGCN20-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[OUT_ADDR]] to ptr
330 // AMDGCN20-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
331 // AMDGCN20-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR_ASCAST]], align 8
332 // AMDGCN20-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], align 8
333 // AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR_ASCAST]], align 8
334 // AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0
335 // AMDGCN20-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR_ASCAST]], align 8
336 // AMDGCN20-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
337 // AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
338 // AMDGCN20-NEXT: call void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP_ASCAST]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
339 // AMDGCN20-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP_ASCAST]], i64 16384, i1 false)
340 // AMDGCN20-NEXT: ret void
342 // SPIR-LABEL: define dso_local spir_kernel void @ker_large(
343 // SPIR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual [[META4]] !kernel_arg_type [[META7:![0-9]+]] !kernel_arg_base_type [[META7]] !kernel_arg_type_qual [[META6]] {
344 // SPIR-NEXT: [[ENTRY:.*:]]
345 // SPIR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 4
346 // SPIR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 4
347 // SPIR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4
348 // SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4
349 // SPIR-NEXT: store ptr addrspace(1) [[IN]], ptr [[IN_ADDR]], align 4
350 // SPIR-NEXT: store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR]], align 4
351 // SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[OUT_ADDR]], align 4
352 // SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i32 0
353 // SPIR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr [[IN_ADDR]], align 4
354 // SPIR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i32 1
355 // SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i32 4096, i1 false)
356 // SPIR-NEXT: call spir_func void @foo_large(ptr dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr noundef byval([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
357 // SPIR-NEXT: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr align 4 [[TMP]], i32 16384, i1 false)
358 // SPIR-NEXT: ret void
360 // AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @ker_large(
361 // AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
362 // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
363 // AMDGCN30-GVAR-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
364 // AMDGCN30-GVAR-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
365 // AMDGCN30-GVAR-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
366 // AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
367 // AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
368 // AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
369 // AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
370 // AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0
371 // AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
372 // AMDGCN30-GVAR-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
373 // AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
374 // AMDGCN30-GVAR-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
375 // AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false)
376 // AMDGCN30-GVAR-NEXT: ret void
378 // AMDGCN30-LABEL: define dso_local amdgpu_kernel void @ker_large(
379 // AMDGCN30-SAME: ptr addrspace(1) noundef align 4 [[IN:%.*]], ptr addrspace(1) noundef align 4 [[OUT:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META4]] !kernel_arg_access_qual [[META5]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META7]] {
380 // AMDGCN30-NEXT: [[ENTRY:.*:]]
381 // AMDGCN30-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
382 // AMDGCN30-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
383 // AMDGCN30-NEXT: [[TMP:%.*]] = alloca [[STRUCT_MAT64X64:%.*]], align 4, addrspace(5)
384 // AMDGCN30-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_MAT32X32:%.*]], align 4, addrspace(5)
385 // AMDGCN30-NEXT: store ptr addrspace(1) [[IN]], ptr addrspace(5) [[IN_ADDR]], align 8
386 // AMDGCN30-NEXT: store ptr addrspace(1) [[OUT]], ptr addrspace(5) [[OUT_ADDR]], align 8
387 // AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
388 // AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_MAT64X64]], ptr addrspace(1) [[TMP0]], i64 0
389 // AMDGCN30-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[IN_ADDR]], align 8
390 // AMDGCN30-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [[STRUCT_MAT32X32]], ptr addrspace(1) [[TMP1]], i64 1
391 // AMDGCN30-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 4 [[ARRAYIDX1]], i64 4096, i1 false)
392 // AMDGCN30-NEXT: call void @foo_large(ptr addrspace(5) dead_on_unwind writable sret([[STRUCT_MAT64X64]]) align 4 [[TMP]], ptr addrspace(5) noundef byref([[STRUCT_MAT32X32]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
393 // AMDGCN30-NEXT: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 [[ARRAYIDX]], ptr addrspace(5) align 4 [[TMP]], i64 16384, i1 false)
394 // AMDGCN30-NEXT: ret void
396 kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) {
397 out[0] = foo_large(in[1]);
401 // X86-LABEL: define void @FuncOneMember(
402 // X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
403 // X86-NEXT: [[ENTRY:.*:]]
404 // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER]], align 8
405 // X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
406 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 8, i1 false)
407 // X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
408 // X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
409 // X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
410 // X86-NEXT: store <2 x i32> [[TMP1]], ptr [[X]], align 8
411 // X86-NEXT: ret void
413 // AMDGCN-LABEL: define dso_local void @FuncOneMember(
414 // AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] {
415 // AMDGCN-NEXT: [[ENTRY:.*:]]
416 // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
417 // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
418 // AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
419 // AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
420 // AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
421 // AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
422 // AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
423 // AMDGCN-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8
424 // AMDGCN-NEXT: ret void
426 // AMDGCN20-LABEL: define dso_local void @FuncOneMember(
427 // AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] {
428 // AMDGCN20-NEXT: [[ENTRY:.*:]]
429 // AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
430 // AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
431 // AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
432 // AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
433 // AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
434 // AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8
435 // AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
436 // AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
437 // AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
438 // AMDGCN20-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8
439 // AMDGCN20-NEXT: ret void
441 // SPIR-LABEL: define dso_local spir_func void @FuncOneMember(
442 // SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] {
443 // SPIR-NEXT: [[ENTRY:.*:]]
444 // SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
445 // SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
446 // SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
447 // SPIR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
448 // SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[X]], align 8
449 // SPIR-NEXT: ret void
451 // AMDGCN30-GVAR-LABEL: define dso_local void @FuncOneMember(
452 // AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] {
453 // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
454 // AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
455 // AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
456 // AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
457 // AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
458 // AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
459 // AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
460 // AMDGCN30-GVAR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
461 // AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8
462 // AMDGCN30-GVAR-NEXT: ret void
464 // AMDGCN30-LABEL: define dso_local void @FuncOneMember(
465 // AMDGCN30-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR0]] {
466 // AMDGCN30-NEXT: [[ENTRY:.*:]]
467 // AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
468 // AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
469 // AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
470 // AMDGCN30-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
471 // AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
472 // AMDGCN30-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
473 // AMDGCN30-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
474 // AMDGCN30-NEXT: store <2 x i32> [[TMP0]], ptr addrspace(5) [[X]], align 8
475 // AMDGCN30-NEXT: ret void
477 void FuncOneMember(struct StructOneMember u) {
482 // X86-LABEL: define void @FuncOneLargeMember(
483 // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
484 // X86-NEXT: [[ENTRY:.*:]]
485 // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8
486 // X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
487 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 800, i1 false)
488 // X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
489 // X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
490 // X86-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
491 // X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0
492 // X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
493 // X86-NEXT: ret void
495 // AMDGCN-LABEL: define dso_local void @FuncOneLargeMember(
496 // AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
497 // AMDGCN-NEXT: [[ENTRY:.*:]]
498 // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
499 // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
500 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
501 // AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
502 // AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
503 // AMDGCN-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
504 // AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0
505 // AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
506 // AMDGCN-NEXT: ret void
508 // AMDGCN20-LABEL: define dso_local void @FuncOneLargeMember(
509 // AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
510 // AMDGCN20-NEXT: [[ENTRY:.*:]]
511 // AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
512 // AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
513 // AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
514 // AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
515 // AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
516 // AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
517 // AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
518 // AMDGCN20-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
519 // AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i64 0, i64 0
520 // AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
521 // AMDGCN20-NEXT: ret void
523 // SPIR-LABEL: define dso_local spir_func void @FuncOneLargeMember(
524 // SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] {
525 // SPIR-NEXT: [[ENTRY:.*:]]
526 // SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
527 // SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
528 // SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
529 // SPIR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U]], i32 0, i32 0
530 // SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr [[X]], i32 0, i32 0
531 // SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[ARRAYIDX]], align 8
532 // SPIR-NEXT: ret void
534 // AMDGCN30-GVAR-LABEL: define dso_local void @FuncOneLargeMember(
535 // AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
536 // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
537 // AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
538 // AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
539 // AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
540 // AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
541 // AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
542 // AMDGCN30-GVAR-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
543 // AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0
544 // AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
545 // AMDGCN30-GVAR-NEXT: ret void
547 // AMDGCN30-LABEL: define dso_local void @FuncOneLargeMember(
548 // AMDGCN30-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
549 // AMDGCN30-NEXT: [[ENTRY:.*:]]
550 // AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
551 // AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
552 // AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 800, i1 false)
553 // AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
554 // AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
555 // AMDGCN30-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
556 // AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x <2 x i32>], ptr addrspace(5) [[X]], i64 0, i64 0
557 // AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
558 // AMDGCN30-NEXT: ret void
560 void FuncOneLargeMember(struct LargeStructOneMember u) {
561 u.x[0] = (int2)(0, 0);
564 #if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
565 // AMDGCN20-LABEL: define dso_local void @test_indirect_arg_globl(
566 // AMDGCN20-SAME: ) #[[ATTR0]] {
567 // AMDGCN20-NEXT: [[ENTRY:.*:]]
568 // AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
569 // AMDGCN20-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false)
570 // AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
571 // AMDGCN20-NEXT: ret void
573 // AMDGCN30-GVAR-LABEL: define dso_local void @test_indirect_arg_globl(
574 // AMDGCN30-GVAR-SAME: ) #[[ATTR0]] {
575 // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
576 // AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
577 // AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false)
578 // AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
579 // AMDGCN30-GVAR-NEXT: ret void
581 void test_indirect_arg_globl(void) {
582 FuncOneLargeMember(g_s);
587 // X86-LABEL: define spir_kernel void @test_indirect_arg_local(
588 // X86-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
589 // X86-NEXT: [[ENTRY:.*:]]
590 // X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 4
591 // X86-NEXT: call void @llvm.memcpy.p0.p3.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false)
592 // X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
593 // X86-NEXT: ret void
595 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local(
596 // AMDGCN-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
597 // AMDGCN-NEXT: [[ENTRY:.*:]]
598 // AMDGCN-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
599 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
600 // AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
601 // AMDGCN-NEXT: ret void
603 // AMDGCN20-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local(
604 // AMDGCN20-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
605 // AMDGCN20-NEXT: [[ENTRY:.*:]]
606 // AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
607 // AMDGCN20-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
608 // AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
609 // AMDGCN20-NEXT: ret void
611 // SPIR-LABEL: define dso_local spir_kernel void @test_indirect_arg_local(
612 // SPIR-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META8:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] !kernel_arg_type_qual [[META8]] {
613 // SPIR-NEXT: [[ENTRY:.*:]]
614 // SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8
615 // SPIR-NEXT: call void @llvm.memcpy.p0.p3.i32(ptr align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i32 800, i1 false)
616 // SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
617 // SPIR-NEXT: ret void
619 // AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local(
620 // AMDGCN30-GVAR-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
621 // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
622 // AMDGCN30-GVAR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
623 // AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
624 // AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
625 // AMDGCN30-GVAR-NEXT: ret void
627 // AMDGCN30-LABEL: define dso_local amdgpu_kernel void @test_indirect_arg_local(
628 // AMDGCN30-SAME: ) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META9]] !kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] !kernel_arg_type_qual [[META9]] {
629 // AMDGCN30-NEXT: [[ENTRY:.*:]]
630 // AMDGCN30-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
631 // AMDGCN30-NEXT: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
632 // AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
633 // AMDGCN30-NEXT: ret void
635 kernel void test_indirect_arg_local(void) {
636 local struct LargeStructOneMember l_s;
637 FuncOneLargeMember(l_s);
641 // X86-LABEL: define void @test_indirect_arg_private(
642 // X86-SAME: ) #[[ATTR0]] {
643 // X86-NEXT: [[ENTRY:.*:]]
644 // X86-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8
645 // X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[P_S]]) #[[ATTR3]]
646 // X86-NEXT: ret void
648 // AMDGCN-LABEL: define dso_local void @test_indirect_arg_private(
649 // AMDGCN-SAME: ) #[[ATTR0]] {
650 // AMDGCN-NEXT: [[ENTRY:.*:]]
651 // AMDGCN-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
652 // AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]]
653 // AMDGCN-NEXT: ret void
655 // AMDGCN20-LABEL: define dso_local void @test_indirect_arg_private(
656 // AMDGCN20-SAME: ) #[[ATTR0]] {
657 // AMDGCN20-NEXT: [[ENTRY:.*:]]
658 // AMDGCN20-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
659 // AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
660 // AMDGCN20-NEXT: [[P_S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[P_S]] to ptr
661 // AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[P_S_ASCAST]], i64 800, i1 false)
662 // AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
663 // AMDGCN20-NEXT: ret void
665 // SPIR-LABEL: define dso_local spir_func void @test_indirect_arg_private(
666 // SPIR-SAME: ) #[[ATTR0]] {
667 // SPIR-NEXT: [[ENTRY:.*:]]
668 // SPIR-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8
669 // SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]]
670 // SPIR-NEXT: ret void
672 // AMDGCN30-GVAR-LABEL: define dso_local void @test_indirect_arg_private(
673 // AMDGCN30-GVAR-SAME: ) #[[ATTR0]] {
674 // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
675 // AMDGCN30-GVAR-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
676 // AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]]
677 // AMDGCN30-GVAR-NEXT: ret void
679 // AMDGCN30-LABEL: define dso_local void @test_indirect_arg_private(
680 // AMDGCN30-SAME: ) #[[ATTR0]] {
681 // AMDGCN30-NEXT: [[ENTRY:.*:]]
682 // AMDGCN30-NEXT: [[P_S:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER:%.*]], align 8, addrspace(5)
683 // AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[P_S]]) #[[ATTR3]]
684 // AMDGCN30-NEXT: ret void
686 void test_indirect_arg_private(void) {
687 struct LargeStructOneMember p_s;
688 FuncOneLargeMember(p_s);
692 // X86-LABEL: define spir_kernel void @KernelOneMember(
693 // X86-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
694 // X86-NEXT: [[ENTRY:.*:]]
695 // X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]]
696 // X86-NEXT: ret void
698 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMember(
699 // AMDGCN-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
700 // AMDGCN-NEXT: [[ENTRY:.*:]]
701 // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
702 // AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
703 // AMDGCN-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
704 // AMDGCN-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
705 // AMDGCN-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8
706 // AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]]
707 // AMDGCN-NEXT: ret void
709 // AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelOneMember(
710 // AMDGCN20-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
711 // AMDGCN20-NEXT: [[ENTRY:.*:]]
712 // AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
713 // AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
714 // AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
715 // AMDGCN20-NEXT: store <2 x i32> [[U_COERCE]], ptr [[COERCE_DIVE]], align 8
716 // AMDGCN20-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
717 // AMDGCN20-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[COERCE_DIVE2]], align 8
718 // AMDGCN20-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]]
719 // AMDGCN20-NEXT: ret void
721 // SPIR-LABEL: define dso_local spir_kernel void @KernelOneMember(
722 // SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9:![0-9]+]] !kernel_arg_access_qual [[META10:![0-9]+]] !kernel_arg_type [[META11:![0-9]+]] !kernel_arg_base_type [[META11]] !kernel_arg_type_qual [[META12:![0-9]+]] {
723 // SPIR-NEXT: [[ENTRY:.*:]]
724 // SPIR-NEXT: call spir_func void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]]
725 // SPIR-NEXT: ret void
727 // AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelOneMember(
728 // AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
729 // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
730 // AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
731 // AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
732 // AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
733 // AMDGCN30-GVAR-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
734 // AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8
735 // AMDGCN30-GVAR-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]]
736 // AMDGCN30-GVAR-NEXT: ret void
738 // AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelOneMember(
739 // AMDGCN30-SAME: <2 x i32> [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10:![0-9]+]] !kernel_arg_access_qual [[META11:![0-9]+]] !kernel_arg_type [[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META13:![0-9]+]] {
740 // AMDGCN30-NEXT: [[ENTRY:.*:]]
741 // AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8, addrspace(5)
742 // AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
743 // AMDGCN30-NEXT: store <2 x i32> [[U_COERCE]], ptr addrspace(5) [[COERCE_DIVE]], align 8
744 // AMDGCN30-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
745 // AMDGCN30-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(5) [[COERCE_DIVE1]], align 8
746 // AMDGCN30-NEXT: call void @FuncOneMember(<2 x i32> [[TMP0]]) #[[ATTR3]]
747 // AMDGCN30-NEXT: ret void
749 kernel void KernelOneMember(struct StructOneMember u) {
754 // X86-LABEL: define spir_kernel void @KernelOneMemberSpir(
755 // X86-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
756 // X86-NEXT: [[ENTRY:.*:]]
757 // X86-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4
758 // X86-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 4
759 // X86-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4
760 // X86-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4
761 // X86-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 4 [[BYVAL_TEMP]], ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false)
762 // X86-NEXT: call void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 4 [[BYVAL_TEMP]]) #[[ATTR3]]
763 // X86-NEXT: ret void
765 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir(
766 // AMDGCN-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
767 // AMDGCN-NEXT: [[ENTRY:.*:]]
768 // AMDGCN-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
769 // AMDGCN-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8
770 // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8
771 // AMDGCN-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
772 // AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8
773 // AMDGCN-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]]
774 // AMDGCN-NEXT: ret void
776 // AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir(
777 // AMDGCN20-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
778 // AMDGCN20-NEXT: [[ENTRY:.*:]]
779 // AMDGCN20-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
780 // AMDGCN20-NEXT: [[U_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[U_ADDR]] to ptr
781 // AMDGCN20-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR_ASCAST]], align 8
782 // AMDGCN20-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR_ASCAST]], align 8
783 // AMDGCN20-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
784 // AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8
785 // AMDGCN20-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]]
786 // AMDGCN20-NEXT: ret void
788 // SPIR-LABEL: define dso_local spir_kernel void @KernelOneMemberSpir(
789 // SPIR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META13:![0-9]+]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META14:![0-9]+]] !kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META12]] {
790 // SPIR-NEXT: [[ENTRY:.*:]]
791 // SPIR-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 4
792 // SPIR-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_STRUCTONEMEMBER:%.*]], align 8
793 // SPIR-NEXT: store ptr addrspace(1) [[U]], ptr [[U_ADDR]], align 4
794 // SPIR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[U_ADDR]], align 4
795 // SPIR-NEXT: call void @llvm.memcpy.p0.p1.i32(ptr align 8 [[BYVAL_TEMP]], ptr addrspace(1) align 8 [[TMP0]], i32 8, i1 false)
796 // SPIR-NEXT: call spir_func void @FuncOneMember(ptr noundef byval([[STRUCT_STRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
797 // SPIR-NEXT: ret void
799 // AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir(
800 // AMDGCN30-GVAR-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
801 // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
802 // AMDGCN30-GVAR-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
803 // AMDGCN30-GVAR-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8
804 // AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8
805 // AMDGCN30-GVAR-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
806 // AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8
807 // AMDGCN30-GVAR-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]]
808 // AMDGCN30-GVAR-NEXT: ret void
810 // AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelOneMemberSpir(
811 // AMDGCN30-SAME: ptr addrspace(1) noundef align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META14:![0-9]+]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META13]] {
812 // AMDGCN30-NEXT: [[ENTRY:.*:]]
813 // AMDGCN30-NEXT: [[U_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
814 // AMDGCN30-NEXT: store ptr addrspace(1) [[U]], ptr addrspace(5) [[U_ADDR]], align 8
815 // AMDGCN30-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[U_ADDR]], align 8
816 // AMDGCN30-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTONEMEMBER:%.*]], ptr addrspace(1) [[TMP0]], i32 0, i32 0
817 // AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(1) [[COERCE_DIVE]], align 8
818 // AMDGCN30-NEXT: call void @FuncOneMember(<2 x i32> [[TMP1]]) #[[ATTR3]]
819 // AMDGCN30-NEXT: ret void
821 kernel void KernelOneMemberSpir(global struct StructOneMember* u) {
826 // X86-LABEL: define spir_kernel void @KernelLargeOneMember(
827 // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
828 // X86-NEXT: [[ENTRY:.*:]]
829 // X86-NEXT: call void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 4 [[U]]) #[[ATTR3]]
830 // X86-NEXT: ret void
832 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
833 // AMDGCN-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
834 // AMDGCN-NEXT: [[ENTRY:.*:]]
835 // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
836 // AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
837 // AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
838 // AMDGCN-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
839 // AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]]
840 // AMDGCN-NEXT: ret void
842 // AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
843 // AMDGCN20-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
844 // AMDGCN20-NEXT: [[ENTRY:.*:]]
845 // AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
846 // AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
847 // AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
848 // AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr [[U1]], i32 0, i32 0
849 // AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
850 // AMDGCN20-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
851 // AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 800, i1 false)
852 // AMDGCN20-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
853 // AMDGCN20-NEXT: ret void
855 // SPIR-LABEL: define dso_local spir_kernel void @KernelLargeOneMember(
856 // SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META15:![0-9]+]] !kernel_arg_base_type [[META15]] !kernel_arg_type_qual [[META12]] {
857 // SPIR-NEXT: [[ENTRY:.*:]]
858 // SPIR-NEXT: call spir_func void @FuncOneLargeMember(ptr noundef byval([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]]
859 // SPIR-NEXT: ret void
861 // AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
862 // AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
863 // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
864 // AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
865 // AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
866 // AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
867 // AMDGCN30-GVAR-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
868 // AMDGCN30-GVAR-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]]
869 // AMDGCN30-GVAR-NEXT: ret void
871 // AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeOneMember(
872 // AMDGCN30-SAME: [[STRUCT_LARGESTRUCTONEMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META13]] {
873 // AMDGCN30-NEXT: [[ENTRY:.*:]]
874 // AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTONEMEMBER]], align 8, addrspace(5)
875 // AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTONEMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
876 // AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTONEMEMBER]] [[U_COERCE]], 0
877 // AMDGCN30-NEXT: store [100 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
878 // AMDGCN30-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTONEMEMBER]]) align 8 [[U]]) #[[ATTR3]]
879 // AMDGCN30-NEXT: ret void
881 kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
882 FuncOneLargeMember(u);
886 // X86-LABEL: define void @FuncTwoMember(
887 // X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
888 // X86-NEXT: [[ENTRY:.*:]]
889 // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8
890 // X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
891 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 16, i1 false)
892 // X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
893 // X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
894 // X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
895 // X86-NEXT: store <2 x i32> [[TMP1]], ptr [[Y]], align 8
896 // X86-NEXT: ret void
898 // AMDGCN-LABEL: define dso_local void @FuncTwoMember(
899 // AMDGCN-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] {
900 // AMDGCN-NEXT: [[ENTRY:.*:]]
901 // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
902 // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
903 // AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
904 // AMDGCN-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8
905 // AMDGCN-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
906 // AMDGCN-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8
907 // AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
908 // AMDGCN-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
909 // AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
910 // AMDGCN-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8
911 // AMDGCN-NEXT: ret void
913 // AMDGCN20-LABEL: define dso_local void @FuncTwoMember(
914 // AMDGCN20-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] {
915 // AMDGCN20-NEXT: [[ENTRY:.*:]]
916 // AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
917 // AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
918 // AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
919 // AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
920 // AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
921 // AMDGCN20-NEXT: store <2 x i32> [[U_COERCE0]], ptr [[TMP0]], align 8
922 // AMDGCN20-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
923 // AMDGCN20-NEXT: store <2 x i32> [[U_COERCE1]], ptr [[TMP1]], align 8
924 // AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
925 // AMDGCN20-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
926 // AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
927 // AMDGCN20-NEXT: store <2 x i32> [[TMP2]], ptr [[Y]], align 8
928 // AMDGCN20-NEXT: ret void
930 // SPIR-LABEL: define dso_local spir_func void @FuncTwoMember(
931 // SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] {
932 // SPIR-NEXT: [[ENTRY:.*:]]
933 // SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
934 // SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
935 // SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
936 // SPIR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
937 // SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[Y]], align 8
938 // SPIR-NEXT: ret void
940 // AMDGCN30-GVAR-LABEL: define dso_local void @FuncTwoMember(
941 // AMDGCN30-GVAR-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] {
942 // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
943 // AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
944 // AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
945 // AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
946 // AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8
947 // AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
948 // AMDGCN30-GVAR-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8
949 // AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
950 // AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
951 // AMDGCN30-GVAR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
952 // AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8
953 // AMDGCN30-GVAR-NEXT: ret void
955 // AMDGCN30-LABEL: define dso_local void @FuncTwoMember(
956 // AMDGCN30-SAME: <2 x i32> [[U_COERCE0:%.*]], <2 x i32> [[U_COERCE1:%.*]]) #[[ATTR0]] {
957 // AMDGCN30-NEXT: [[ENTRY:.*:]]
958 // AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER:%.*]], align 8, addrspace(5)
959 // AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
960 // AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
961 // AMDGCN30-NEXT: store <2 x i32> [[U_COERCE0]], ptr addrspace(5) [[TMP0]], align 8
962 // AMDGCN30-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
963 // AMDGCN30-NEXT: store <2 x i32> [[U_COERCE1]], ptr addrspace(5) [[TMP1]], align 8
964 // AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
965 // AMDGCN30-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
966 // AMDGCN30-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
967 // AMDGCN30-NEXT: store <2 x i32> [[TMP2]], ptr addrspace(5) [[Y]], align 8
968 // AMDGCN30-NEXT: ret void
970 void FuncTwoMember(struct StructTwoMember u) {
975 // X86-LABEL: define void @FuncLargeTwoMember(
976 // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 4 [[TMP0:%.*]]) #[[ATTR0]] {
977 // X86-NEXT: [[ENTRY:.*:]]
978 // X86-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8
979 // X86-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
980 // X86-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[U]], ptr align 4 [[TMP0]], i32 480, i1 false)
981 // X86-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
982 // X86-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
983 // X86-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
984 // X86-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0
985 // X86-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
986 // X86-NEXT: ret void
988 // AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember(
989 // AMDGCN-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
990 // AMDGCN-NEXT: [[ENTRY:.*:]]
991 // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
992 // AMDGCN-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
993 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
994 // AMDGCN-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
995 // AMDGCN-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
996 // AMDGCN-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
997 // AMDGCN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0
998 // AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
999 // AMDGCN-NEXT: ret void
1001 // AMDGCN20-LABEL: define dso_local void @FuncLargeTwoMember(
1002 // AMDGCN20-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
1003 // AMDGCN20-NEXT: [[ENTRY:.*:]]
1004 // AMDGCN20-NEXT: [[COERCE:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
1005 // AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
1006 // AMDGCN20-NEXT: [[U:%.*]] = addrspacecast ptr addrspace(5) [[COERCE]] to ptr
1007 // AMDGCN20-NEXT: [[DOTCOMPOUNDLITERAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCOMPOUNDLITERAL]] to ptr
1008 // AMDGCN20-NEXT: call void @llvm.memcpy.p0.p5.i64(ptr align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
1009 // AMDGCN20-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
1010 // AMDGCN20-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL_ASCAST]], align 8
1011 // AMDGCN20-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
1012 // AMDGCN20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i64 0, i64 0
1013 // AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[ARRAYIDX]], align 8
1014 // AMDGCN20-NEXT: ret void
1016 // SPIR-LABEL: define dso_local spir_func void @FuncLargeTwoMember(
1017 // SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR0]] {
1018 // SPIR-NEXT: [[ENTRY:.*:]]
1019 // SPIR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8
1020 // SPIR-NEXT: store <2 x i32> zeroinitializer, ptr [[DOTCOMPOUNDLITERAL]], align 8
1021 // SPIR-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[DOTCOMPOUNDLITERAL]], align 8
1022 // SPIR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U]], i32 0, i32 1
1023 // SPIR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr [[Y]], i32 0, i32 0
1024 // SPIR-NEXT: store <2 x i32> [[TMP0]], ptr [[ARRAYIDX]], align 8
1025 // SPIR-NEXT: ret void
1027 // AMDGCN30-GVAR-LABEL: define dso_local void @FuncLargeTwoMember(
1028 // AMDGCN30-GVAR-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
1029 // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
1030 // AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
1031 // AMDGCN30-GVAR-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
1032 // AMDGCN30-GVAR-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
1033 // AMDGCN30-GVAR-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
1034 // AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
1035 // AMDGCN30-GVAR-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1036 // AMDGCN30-GVAR-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0
1037 // AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
1038 // AMDGCN30-GVAR-NEXT: ret void
1040 // AMDGCN30-LABEL: define dso_local void @FuncLargeTwoMember(
1041 // AMDGCN30-SAME: ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]]) #[[ATTR0]] {
1042 // AMDGCN30-NEXT: [[ENTRY:.*:]]
1043 // AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
1044 // AMDGCN30-NEXT: [[DOTCOMPOUNDLITERAL:%.*]] = alloca <2 x i32>, align 8, addrspace(5)
1045 // AMDGCN30-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 [[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
1046 // AMDGCN30-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
1047 // AMDGCN30-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[DOTCOMPOUNDLITERAL]], align 8
1048 // AMDGCN30-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1049 // AMDGCN30-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [20 x <2 x i32>], ptr addrspace(5) [[Y]], i64 0, i64 0
1050 // AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[ARRAYIDX]], align 8
1051 // AMDGCN30-NEXT: ret void
1053 void FuncLargeTwoMember(struct LargeStructTwoMember u) {
1054 u.y[0] = (int2)(0, 0);
1058 // X86-LABEL: define spir_kernel void @KernelTwoMember(
1059 // X86-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
1060 // X86-NEXT: [[ENTRY:.*:]]
1061 // X86-NEXT: call void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]]
1062 // X86-NEXT: ret void
1064 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
1065 // AMDGCN-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
1066 // AMDGCN-NEXT: [[ENTRY:.*:]]
1067 // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
1068 // AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1069 // AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
1070 // AMDGCN-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
1071 // AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1072 // AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
1073 // AMDGCN-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
1074 // AMDGCN-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1075 // AMDGCN-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8
1076 // AMDGCN-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1077 // AMDGCN-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
1078 // AMDGCN-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]]
1079 // AMDGCN-NEXT: ret void
1081 // AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
1082 // AMDGCN20-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
1083 // AMDGCN20-NEXT: [[ENTRY:.*:]]
1084 // AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
1085 // AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
1086 // AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
1087 // AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
1088 // AMDGCN20-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP0]], align 8
1089 // AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
1090 // AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
1091 // AMDGCN20-NEXT: store <2 x i32> [[TMP3]], ptr [[TMP2]], align 8
1092 // AMDGCN20-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
1093 // AMDGCN20-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8
1094 // AMDGCN20-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
1095 // AMDGCN20-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
1096 // AMDGCN20-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]]
1097 // AMDGCN20-NEXT: ret void
1099 // SPIR-LABEL: define dso_local spir_kernel void @KernelTwoMember(
1100 // SPIR-SAME: ptr noundef byval([[STRUCT_STRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META16:![0-9]+]] !kernel_arg_base_type [[META16]] !kernel_arg_type_qual [[META12]] {
1101 // SPIR-NEXT: [[ENTRY:.*:]]
1102 // SPIR-NEXT: call spir_func void @FuncTwoMember(ptr noundef byval([[STRUCT_STRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]]
1103 // SPIR-NEXT: ret void
1105 // AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
1106 // AMDGCN30-GVAR-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
1107 // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
1108 // AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
1109 // AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1110 // AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
1111 // AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
1112 // AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1113 // AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
1114 // AMDGCN30-GVAR-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
1115 // AMDGCN30-GVAR-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1116 // AMDGCN30-GVAR-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8
1117 // AMDGCN30-GVAR-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1118 // AMDGCN30-GVAR-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
1119 // AMDGCN30-GVAR-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]]
1120 // AMDGCN30-GVAR-NEXT: ret void
1122 // AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelTwoMember(
1123 // AMDGCN30-SAME: [[STRUCT_STRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META13]] {
1124 // AMDGCN30-NEXT: [[ENTRY:.*:]]
1125 // AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_STRUCTTWOMEMBER]], align 8, addrspace(5)
1126 // AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1127 // AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 0
1128 // AMDGCN30-NEXT: store <2 x i32> [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
1129 // AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1130 // AMDGCN30-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_STRUCTTWOMEMBER]] [[U_COERCE]], 1
1131 // AMDGCN30-NEXT: store <2 x i32> [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
1132 // AMDGCN30-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1133 // AMDGCN30-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP4]], align 8
1134 // AMDGCN30-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT_STRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1135 // AMDGCN30-NEXT: [[TMP7:%.*]] = load <2 x i32>, ptr addrspace(5) [[TMP6]], align 8
1136 // AMDGCN30-NEXT: call void @FuncTwoMember(<2 x i32> [[TMP5]], <2 x i32> [[TMP7]]) #[[ATTR3]]
1137 // AMDGCN30-NEXT: ret void
1139 kernel void KernelTwoMember(struct StructTwoMember u) {
1144 // X86-LABEL: define spir_kernel void @KernelLargeTwoMember(
1145 // X86-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
1146 // X86-NEXT: [[ENTRY:.*:]]
1147 // X86-NEXT: call void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 4 [[U]]) #[[ATTR3]]
1148 // X86-NEXT: ret void
1150 // AMDGCN-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
1151 // AMDGCN-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
1152 // AMDGCN-NEXT: [[ENTRY:.*:]]
1153 // AMDGCN-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
1154 // AMDGCN-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1155 // AMDGCN-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
1156 // AMDGCN-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
1157 // AMDGCN-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1158 // AMDGCN-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
1159 // AMDGCN-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
1160 // AMDGCN-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]]
1161 // AMDGCN-NEXT: ret void
1163 // AMDGCN20-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
1164 // AMDGCN20-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
1165 // AMDGCN20-NEXT: [[ENTRY:.*:]]
1166 // AMDGCN20-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
1167 // AMDGCN20-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
1168 // AMDGCN20-NEXT: [[U1:%.*]] = addrspacecast ptr addrspace(5) [[U]] to ptr
1169 // AMDGCN20-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 0
1170 // AMDGCN20-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
1171 // AMDGCN20-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr [[TMP0]], align 8
1172 // AMDGCN20-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr [[U1]], i32 0, i32 1
1173 // AMDGCN20-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
1174 // AMDGCN20-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr [[TMP2]], align 8
1175 // AMDGCN20-NEXT: call void @llvm.memcpy.p5.p0.i64(ptr addrspace(5) align 8 [[BYVAL_TEMP]], ptr align 8 [[U1]], i64 480, i1 false)
1176 // AMDGCN20-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[BYVAL_TEMP]]) #[[ATTR3]]
1177 // AMDGCN20-NEXT: ret void
1179 // SPIR-LABEL: define dso_local spir_kernel void @KernelLargeTwoMember(
1180 // SPIR-SAME: ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[U:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META9]] !kernel_arg_access_qual [[META10]] !kernel_arg_type [[META17:![0-9]+]] !kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META12]] {
1181 // SPIR-NEXT: [[ENTRY:.*:]]
1182 // SPIR-NEXT: call spir_func void @FuncLargeTwoMember(ptr noundef byval([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]]
1183 // SPIR-NEXT: ret void
1185 // AMDGCN30-GVAR-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
1186 // AMDGCN30-GVAR-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
1187 // AMDGCN30-GVAR-NEXT: [[ENTRY:.*:]]
1188 // AMDGCN30-GVAR-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
1189 // AMDGCN30-GVAR-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1190 // AMDGCN30-GVAR-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
1191 // AMDGCN30-GVAR-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
1192 // AMDGCN30-GVAR-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1193 // AMDGCN30-GVAR-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
1194 // AMDGCN30-GVAR-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
1195 // AMDGCN30-GVAR-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]]
1196 // AMDGCN30-GVAR-NEXT: ret void
1198 // AMDGCN30-LABEL: define dso_local amdgpu_kernel void @KernelLargeTwoMember(
1199 // AMDGCN30-SAME: [[STRUCT_LARGESTRUCTTWOMEMBER:%.*]] [[U_COERCE:%.*]]) #[[ATTR1]] !kernel_arg_addr_space [[META10]] !kernel_arg_access_qual [[META11]] !kernel_arg_type [[META18:![0-9]+]] !kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META13]] {
1200 // AMDGCN30-NEXT: [[ENTRY:.*:]]
1201 // AMDGCN30-NEXT: [[U:%.*]] = alloca [[STRUCT_LARGESTRUCTTWOMEMBER]], align 8, addrspace(5)
1202 // AMDGCN30-NEXT: [[TMP0:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 0
1203 // AMDGCN30-NEXT: [[TMP1:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 0
1204 // AMDGCN30-NEXT: store [40 x <2 x i32>] [[TMP1]], ptr addrspace(5) [[TMP0]], align 8
1205 // AMDGCN30-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_LARGESTRUCTTWOMEMBER]], ptr addrspace(5) [[U]], i32 0, i32 1
1206 // AMDGCN30-NEXT: [[TMP3:%.*]] = extractvalue [[STRUCT_LARGESTRUCTTWOMEMBER]] [[U_COERCE]], 1
1207 // AMDGCN30-NEXT: store [20 x <2 x i32>] [[TMP3]], ptr addrspace(5) [[TMP2]], align 8
1208 // AMDGCN30-NEXT: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER]]) align 8 [[U]]) #[[ATTR3]]
1209 // AMDGCN30-NEXT: ret void
1211 kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
1212 FuncLargeTwoMember(u);
1215 // X86: [[META4]] = !{i32 1, i32 1}
1216 // X86: [[META5]] = !{!"none", !"none"}
1217 // X86: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
1218 // X86: [[META7]] = !{!"", !""}
1219 // X86: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
1220 // X86: [[META9]] = !{}
1221 // X86: [[META10]] = !{i32 0}
1222 // X86: [[META11]] = !{!"none"}
1223 // X86: [[META12]] = !{!"struct StructOneMember"}
1224 // X86: [[META13]] = !{!""}
1225 // X86: [[META14]] = !{i32 1}
1226 // X86: [[META15]] = !{!"struct StructOneMember*"}
1227 // X86: [[META16]] = !{!"struct LargeStructOneMember"}
1228 // X86: [[META17]] = !{!"struct StructTwoMember"}
1229 // X86: [[META18]] = !{!"struct LargeStructTwoMember"}
1231 // AMDGCN: [[META4]] = !{i32 1, i32 1}
1232 // AMDGCN: [[META5]] = !{!"none", !"none"}
1233 // AMDGCN: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
1234 // AMDGCN: [[META7]] = !{!"", !""}
1235 // AMDGCN: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
1236 // AMDGCN: [[META9]] = !{}
1237 // AMDGCN: [[META10]] = !{i32 0}
1238 // AMDGCN: [[META11]] = !{!"none"}
1239 // AMDGCN: [[META12]] = !{!"struct StructOneMember"}
1240 // AMDGCN: [[META13]] = !{!""}
1241 // AMDGCN: [[META14]] = !{i32 1}
1242 // AMDGCN: [[META15]] = !{!"struct StructOneMember*"}
1243 // AMDGCN: [[META16]] = !{!"struct LargeStructOneMember"}
1244 // AMDGCN: [[META17]] = !{!"struct StructTwoMember"}
1245 // AMDGCN: [[META18]] = !{!"struct LargeStructTwoMember"}
1247 // AMDGCN20: [[META4]] = !{i32 1, i32 1}
1248 // AMDGCN20: [[META5]] = !{!"none", !"none"}
1249 // AMDGCN20: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
1250 // AMDGCN20: [[META7]] = !{!"", !""}
1251 // AMDGCN20: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
1252 // AMDGCN20: [[META9]] = !{}
1253 // AMDGCN20: [[META10]] = !{i32 0}
1254 // AMDGCN20: [[META11]] = !{!"none"}
1255 // AMDGCN20: [[META12]] = !{!"struct StructOneMember"}
1256 // AMDGCN20: [[META13]] = !{!""}
1257 // AMDGCN20: [[META14]] = !{i32 1}
1258 // AMDGCN20: [[META15]] = !{!"struct StructOneMember*"}
1259 // AMDGCN20: [[META16]] = !{!"struct LargeStructOneMember"}
1260 // AMDGCN20: [[META17]] = !{!"struct StructTwoMember"}
1261 // AMDGCN20: [[META18]] = !{!"struct LargeStructTwoMember"}
1263 // SPIR: [[META3]] = !{i32 1, i32 1}
1264 // SPIR: [[META4]] = !{!"none", !"none"}
1265 // SPIR: [[META5]] = !{!"Mat3X3*", !"Mat4X4*"}
1266 // SPIR: [[META6]] = !{!"", !""}
1267 // SPIR: [[META7]] = !{!"Mat32X32*", !"Mat64X64*"}
1268 // SPIR: [[META8]] = !{}
1269 // SPIR: [[META9]] = !{i32 0}
1270 // SPIR: [[META10]] = !{!"none"}
1271 // SPIR: [[META11]] = !{!"struct StructOneMember"}
1272 // SPIR: [[META12]] = !{!""}
1273 // SPIR: [[META13]] = !{i32 1}
1274 // SPIR: [[META14]] = !{!"struct StructOneMember*"}
1275 // SPIR: [[META15]] = !{!"struct LargeStructOneMember"}
1276 // SPIR: [[META16]] = !{!"struct StructTwoMember"}
1277 // SPIR: [[META17]] = !{!"struct LargeStructTwoMember"}
1279 // AMDGCN30-GVAR: [[META4]] = !{i32 1, i32 1}
1280 // AMDGCN30-GVAR: [[META5]] = !{!"none", !"none"}
1281 // AMDGCN30-GVAR: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
1282 // AMDGCN30-GVAR: [[META7]] = !{!"", !""}
1283 // AMDGCN30-GVAR: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
1284 // AMDGCN30-GVAR: [[META9]] = !{}
1285 // AMDGCN30-GVAR: [[META10]] = !{i32 0}
1286 // AMDGCN30-GVAR: [[META11]] = !{!"none"}
1287 // AMDGCN30-GVAR: [[META12]] = !{!"struct StructOneMember"}
1288 // AMDGCN30-GVAR: [[META13]] = !{!""}
1289 // AMDGCN30-GVAR: [[META14]] = !{i32 1}
1290 // AMDGCN30-GVAR: [[META15]] = !{!"struct StructOneMember*"}
1291 // AMDGCN30-GVAR: [[META16]] = !{!"struct LargeStructOneMember"}
1292 // AMDGCN30-GVAR: [[META17]] = !{!"struct StructTwoMember"}
1293 // AMDGCN30-GVAR: [[META18]] = !{!"struct LargeStructTwoMember"}
1295 // AMDGCN30: [[META4]] = !{i32 1, i32 1}
1296 // AMDGCN30: [[META5]] = !{!"none", !"none"}
1297 // AMDGCN30: [[META6]] = !{!"Mat3X3*", !"Mat4X4*"}
1298 // AMDGCN30: [[META7]] = !{!"", !""}
1299 // AMDGCN30: [[META8]] = !{!"Mat32X32*", !"Mat64X64*"}
1300 // AMDGCN30: [[META9]] = !{}
1301 // AMDGCN30: [[META10]] = !{i32 0}
1302 // AMDGCN30: [[META11]] = !{!"none"}
1303 // AMDGCN30: [[META12]] = !{!"struct StructOneMember"}
1304 // AMDGCN30: [[META13]] = !{!""}
1305 // AMDGCN30: [[META14]] = !{i32 1}
1306 // AMDGCN30: [[META15]] = !{!"struct StructOneMember*"}
1307 // AMDGCN30: [[META16]] = !{!"struct LargeStructOneMember"}
1308 // AMDGCN30: [[META17]] = !{!"struct StructTwoMember"}
1309 // AMDGCN30: [[META18]] = !{!"struct LargeStructTwoMember"}