1 // RUN
: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes
=ALL
,X86 %s
2 // RUN
: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes
=ALL
,AMDGCN %s
3 // RUN
: %clang_cc1 %s -emit-llvm -o - -cl-std
=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes
=ALL
,AMDGCN
,AMDGCN20 %s
4 // RUN
: %clang_cc1 %s -emit-llvm -o - -cl-std
=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes
=SPIR %s
5 // RUN
: %clang_cc1 %s -emit-llvm -o - -cl-std
=CL3.0 -O0 -triple amdgcn -cl-ext
=+__opencl_c_program_scope_global_variables | FileCheck -enable-var-scope -check-prefixes
=ALL
,AMDGCN
,AMDGCN20 %s
6 // RUN
: %clang_cc1 %s -emit-llvm -o - -cl-std
=CL3.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes
=ALL
,AMDGCN %s
8 typedef int int2 __attribute__
((ext_vector_type(2)));
26 struct StructOneMember {
30 struct StructTwoMember {
35 struct LargeStructOneMember {
39 struct LargeStructTwoMember {
44 #if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
45 struct LargeStructOneMember g_s;
48 // X86-LABEL: define{{.*}} void @foo(ptr noalias sret(%struct.Mat4X4) align 4 %agg.result, ptr noundef byval(%struct.Mat3X3) align 4 %in)
49 // AMDGCN-LABEL: define{{.*}} %struct.Mat4X4 @foo([9 x i32] %in.coerce)
50 Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
55 // ALL-LABEL: define {{.*}} void @ker
56 // Expect two mem copies: one for the argument "in", and one for
58 // X86: call void @llvm.memcpy.p0.p1.i32(ptr
59 // X86: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1)
61 // AMDGCN: load [9 x i32], ptr addrspace(1)
62 // AMDGCN: call %struct.Mat4X4 @foo([9 x i32]
63 // AMDGCN: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1)
64 kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
68 // X86-LABEL: define{{.*}} void @foo_large(ptr noalias sret(%struct.Mat64X64) align 4 %agg.result, ptr noundef byval(%struct.Mat32X32) align 4 %in)
69 // AMDGCN-LABEL: define{{.*}} void @foo_large(ptr addrspace(5) noalias sret(%struct.Mat64X64) align 4 %agg.result, ptr addrspace(5) noundef byref(%struct.Mat32X32) align 4 %{{.*}}
70 // AMDGCN: %in = alloca %struct.Mat32X32, align 4, addrspace(5)
71 // AMDGCN-NEXT: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 4 %in, ptr addrspace(5) align 4 %{{.*}}, i64 4096, i1 false)
72 Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
77 // ALL-LABEL: define {{.*}} void @ker_large
78 // Expect two mem copies: one for the argument "in", and one for
80 // X86: call void @llvm.memcpy.p0.p1.i32(ptr
81 // X86: call void @llvm.memcpy.p1.p0.i32(ptr addrspace(1)
82 // AMDGCN: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5)
83 // AMDGCN: call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1)
84 kernel void ker_large(global Mat32X32 *in, global Mat64X64 *out) {
85 out[0] = foo_large(in[1]);
88 // AMDGCN-LABEL: define{{.*}} void @FuncOneMember(<2 x i32> %u.coerce)
89 void FuncOneMember(struct StructOneMember u) {
93 // AMDGCN-LABEL: define{{.*}} void @FuncOneLargeMember(ptr addrspace(5) noundef byref(%struct.LargeStructOneMember) align 8 %{{.*}}
94 // AMDGCN: %u = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
95 // AMDGCN: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 %u, ptr addrspace(5) align 8 %{{.*}}, i64 800, i1 false)
96 // AMDGCN-NOT: addrspacecast
97 // AMDGCN: store <2 x i32> %{{.*}}, ptr addrspace(5)
98 void FuncOneLargeMember(struct LargeStructOneMember u) {
99 u.x[0] = (int2)(0, 0);
102 // AMDGCN20-LABEL: define{{.*}} void @test_indirect_arg_globl()
103 // AMDGCN20: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
104 // AMDGCN20: call void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) align 8 %[[byval_temp]], ptr addrspace(1) align 8 @g_s, i64 800, i1 false)
105 // AMDGCN20: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref(%struct.LargeStructOneMember) align 8 %[[byval_temp]])
106 #if (__OPENCL_C_VERSION__ == 200) || (__OPENCL_C_VERSION__ >= 300 && defined(__opencl_c_program_scope_global_variables))
107 void test_indirect_arg_globl(void) {
108 FuncOneLargeMember(g_s);
112 // AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @test_indirect_arg_local()
113 // AMDGCN: %[[byval_temp:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
114 // AMDGCN: call void @llvm.memcpy.p5.p3.i64(ptr addrspace(5) align 8 %[[byval_temp]], ptr addrspace(3) align 8 @test_indirect_arg_local.l_s, i64 800, i1 false)
115 // AMDGCN: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref(%struct.LargeStructOneMember) align 8 %[[byval_temp]])
116 kernel void test_indirect_arg_local(void) {
117 local struct LargeStructOneMember l_s;
118 FuncOneLargeMember(l_s);
121 // AMDGCN-LABEL: define{{.*}} void @test_indirect_arg_private()
122 // AMDGCN: %[[p_s:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
123 // AMDGCN-NOT: @llvm.memcpy
124 // AMDGCN-NEXT: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref(%struct.LargeStructOneMember) align 8 %[[p_s]])
125 void test_indirect_arg_private(void) {
126 struct LargeStructOneMember p_s;
127 FuncOneLargeMember(p_s);
130 // AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelOneMember
131 // AMDGCN-SAME: (<2 x i32> %[[u_coerce:.*]])
132 // AMDGCN: %[[u:.*]] = alloca %struct.StructOneMember, align 8, addrspace(5)
133 // AMDGCN: %[[coerce_dive:.*]] = getelementptr inbounds %struct.StructOneMember, ptr addrspace(5) %[[u]], i32 0, i32 0
134 // AMDGCN: store <2 x i32> %[[u_coerce]], ptr addrspace(5) %[[coerce_dive]]
135 // AMDGCN: call void @FuncOneMember(<2 x i32>
136 kernel void KernelOneMember(struct StructOneMember u) {
140 // SPIR: call void @llvm.memcpy.p0.p1.i32
141 // SPIR-NOT: addrspacecast
142 kernel void KernelOneMemberSpir(global struct StructOneMember* u) {
146 // AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelLargeOneMember(
147 // AMDGCN: %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
148 // AMDGCN: store %struct.LargeStructOneMember %u.coerce, ptr addrspace(5) %[[U]], align 8
149 // AMDGCN: call void @FuncOneLargeMember(ptr addrspace(5) noundef byref(%struct.LargeStructOneMember) align 8 %[[U]])
150 kernel void KernelLargeOneMember(struct LargeStructOneMember u) {
151 FuncOneLargeMember(u);
154 // AMDGCN-LABEL: define{{.*}} void @FuncTwoMember(<2 x i32> %u.coerce0, <2 x i32> %u.coerce1)
155 void FuncTwoMember(struct StructTwoMember u) {
159 // AMDGCN-LABEL: define dso_local void @FuncLargeTwoMember
160 // AMDGCN-SAME: (ptr addrspace(5) noundef byref([[STRUCT_LARGESTRUCTTWOMEMBER:%.*]]) align 8 [[TMP0:%.*]])
161 // AMDGCN: %[[U:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5)
162 // AMDGCN: call void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) align 8 %[[U]], ptr addrspace(5) align 8 [[TMP0]], i64 480, i1 false)
163 void FuncLargeTwoMember(struct LargeStructTwoMember u) {
164 u.y[0] = (int2)(0, 0);
167 // AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelTwoMember
168 // AMDGCN-SAME: (%struct.StructTwoMember %[[u_coerce:.*]])
169 // AMDGCN: %[[u:.*]] = alloca %struct.StructTwoMember, align 8, addrspace(5)
170 // AMDGCN: %[[LD0:.*]] = load <2 x i32>, ptr addrspace(5)
171 // AMDGCN: %[[LD1:.*]] = load <2 x i32>, ptr addrspace(5)
172 // AMDGCN: call void @FuncTwoMember(<2 x i32> %[[LD0]], <2 x i32> %[[LD1]])
173 kernel void KernelTwoMember(struct StructTwoMember u) {
177 // AMDGCN-LABEL: define{{.*}} amdgpu_kernel void @KernelLargeTwoMember
178 // AMDGCN-SAME: (%struct.LargeStructTwoMember %[[u_coerce:.*]])
179 // AMDGCN: %[[u:.*]] = alloca %struct.LargeStructTwoMember, align 8, addrspace(5)
180 // AMDGCN: store %struct.LargeStructTwoMember %[[u_coerce]], ptr addrspace(5) %[[u]]
181 // AMDGCN: call void @FuncLargeTwoMember(ptr addrspace(5) noundef byref(%struct.LargeStructTwoMember) align 8 %[[u]])
182 kernel void KernelLargeTwoMember(struct LargeStructTwoMember u) {
183 FuncLargeTwoMember(u);