[clang-format] Fix a bug in aligning comments above PPDirective (#72791)
[llvm-project.git] / clang / test / CodeGenOpenCL / cl20-device-side-enqueue.cl
blobf0c164795b7642269caf24f5f5803da511307ce9
1 // RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR
2 // RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR
3 // RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES
4 // RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B32,SPIR
5 // RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefixes=COMMON,B64,SPIR
6 // RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "spir64-unknown-unknown" | FileCheck %s --check-prefix=CHECK-LIFETIMES
7 // RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL2.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86
8 // RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O0 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefixes=COMMON,B64,X86
9 // RUN: %clang_cc1 -no-enable-noundef-analysis %s -cl-std=CL3.0 -ffake-address-space-map -O1 -emit-llvm -o - -triple "x86_64-unknown-linux-gnu" | FileCheck %s --check-prefix=CHECK-LIFETIMES
11 #pragma OPENCL EXTENSION cl_khr_subgroups : enable
13 typedef void (^bl_t)(local void *);
14 typedef struct {int a;} ndrange_t;
16 // For a block global variable, first emit the block literal as a global variable, then emit the block variable itself.
17 // COMMON: [[BL_GLOBAL:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr [[INV_G:@[^ ]+]] to ptr addrspace(4)) }
18 // COMMON: @block_G ={{.*}} addrspace(1) constant ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BL_GLOBAL]] to ptr addrspace(4))
20 // For anonymous blocks without captures, emit block literals as global variable.
21 // COMMON: [[BLG0:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
22 // COMMON: [[BLG1:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
23 // COMMON: [[BLG2:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
24 // COMMON: [[BLG3:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
25 // COMMON: [[BLG4:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
26 // COMMON: [[BLG5:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
27 // COMMON: [[BLG6:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
28 // COMMON: [[BLG7:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
29 // COMMON: [[BLG8:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr [[INVG8:@[^ ]+]] to ptr addrspace(4)) }
30 // COMMON: [[BLG9:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr [[INVG9:@[^ ]+]] to ptr addrspace(4)) }
31 // COMMON: [[BLG10:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
32 // COMMON: [[BLG11:@__block_literal_global[^ ]*]] = internal addrspace(1) constant { i32, i32, ptr addrspace(4) } { i32 {{[0-9]+}}, i32 {{[0-9]+}}, ptr addrspace(4) addrspacecast (ptr {{@[^ ]+}} to ptr addrspace(4)) }
34 // Emits block literal [[BL_GLOBAL]], invoke function [[INV_G]] and global block variable @block_G
35 // COMMON: define internal {{(spir_func )?}}void [[INV_G]](ptr addrspace(4) %{{.*}}, ptr addrspace(3) %{{.*}})
36 const bl_t block_G = (bl_t) ^ (local void *a) {};
38 void callee(int id, __global int *out) {
39 out[id] = id;
42 // COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(ptr addrspace(1) align 4 %{{.*}}, ptr addrspace(1) align 4 %b, i32 %i)
43 kernel void device_side_enqueue(global int *a, global int *b, int i) {
44 // SPIR: %default_queue = alloca target("spirv.Queue")
45 // X86: %default_queue = alloca ptr
46 queue_t default_queue;
47 // COMMON: %flags = alloca i32
48 unsigned flags = 0;
49 // COMMON: %ndrange = alloca %struct.ndrange_t
50 ndrange_t ndrange;
51 // SPIR: %clk_event = alloca target("spirv.DeviceEvent")
52 // X86: %clk_event = alloca ptr
53 clk_event_t clk_event;
54 // SPIR: %event_wait_list = alloca target("spirv.DeviceEvent")
55 // X86: %event_wait_list = alloca ptr
56 clk_event_t event_wait_list;
57 // SPIR: %event_wait_list2 = alloca [1 x target("spirv.DeviceEvent")]
58 // X86: %event_wait_list2 = alloca [1 x ptr]
59 clk_event_t event_wait_list2[] = {clk_event};
61 // COMMON: [[NDR:%[a-z0-9]+]] = alloca %struct.ndrange_t, align 4
63 // B32: %[[BLOCK_SIZES1:.*]] = alloca [1 x i32]
64 // B64: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64]
65 // CHECK-LIFETIMES: %[[BLOCK_SIZES1:.*]] = alloca [1 x i64]
66 // B32: %[[BLOCK_SIZES2:.*]] = alloca [1 x i32]
67 // B64: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64]
68 // CHECK-LIFETIMES: %[[BLOCK_SIZES2:.*]] = alloca [1 x i64]
69 // B32: %[[BLOCK_SIZES3:.*]] = alloca [1 x i32]
70 // B64: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64]
71 // CHECK-LIFETIMES: %[[BLOCK_SIZES3:.*]] = alloca [1 x i64]
72 // B32: %[[BLOCK_SIZES4:.*]] = alloca [1 x i32]
73 // B64: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64]
74 // CHECK-LIFETIMES: %[[BLOCK_SIZES4:.*]] = alloca [1 x i64]
75 // B32: %[[BLOCK_SIZES5:.*]] = alloca [1 x i32]
76 // B64: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64]
77 // CHECK-LIFETIMES: %[[BLOCK_SIZES5:.*]] = alloca [1 x i64]
78 // B32: %[[BLOCK_SIZES6:.*]] = alloca [3 x i32]
79 // B64: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64]
80 // CHECK-LIFETIMES: %[[BLOCK_SIZES6:.*]] = alloca [3 x i64]
81 // B32: %[[BLOCK_SIZES7:.*]] = alloca [1 x i32]
82 // B64: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64]
83 // CHECK-LIFETIMES: %[[BLOCK_SIZES7:.*]] = alloca [1 x i64]
85 // Emits block literal on stack and block kernel [[INVLK1]].
86 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
87 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
88 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
89 // COMMON: store ptr addrspace(4) addrspacecast (ptr [[INVL1:@__device_side_enqueue_block_invoke[^ ]*]] to ptr addrspace(4)), ptr %block.invoke
90 // COMMON: [[BL_I8:%[0-9]+]] ={{.*}} addrspacecast ptr %block to ptr addrspace(4)
91 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_basic(
92 // SPIR-SAME: target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
93 // X86-SAME: ptr [[DEF_Q]], i32 [[FLAGS]], ptr byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
94 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVLK1:[^ ]+_kernel]] to ptr addrspace(4)),
95 // COMMON-SAME: ptr addrspace(4) [[BL_I8]])
96 enqueue_kernel(default_queue, flags, ndrange,
97 ^(void) {
98 a[i] = b[i];
99 });
101 // Emits block literal on stack and block kernel [[INVLK2]].
102 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
103 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
104 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
105 // COMMON: [[WAIT_EVNT:%[0-9]+]] ={{.*}} addrspacecast ptr %event_wait_list to ptr addrspace(4)
106 // COMMON: [[EVNT:%[0-9]+]] ={{.*}} addrspacecast ptr %clk_event to ptr addrspace(4)
107 // COMMON: store ptr addrspace(4) addrspacecast (ptr [[INVL2:@__device_side_enqueue_block_invoke[^ ]*]] to ptr addrspace(4)), ptr %block.invoke
108 // COMMON: [[BL_I8:%[0-9]+]] ={{.*}} addrspacecast ptr %block4 to ptr addrspace(4)
109 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_basic_events
110 // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]],
111 // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]], ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]],
112 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVLK2:[^ ]+_kernel]] to ptr addrspace(4)),
113 // COMMON-SAME: ptr addrspace(4) [[BL_I8]])
114 enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, &clk_event,
115 ^(void) {
116 a[i] = b[i];
119 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_basic_events
120 // SPIR-SAME: (target("spirv.Queue") {{%[0-9]+}}, i32 {{%[0-9]+}}, ptr {{.*}}, i32 1, ptr addrspace(4) null, ptr addrspace(4) null,
121 // X86-SAME: (ptr {{%[0-9]+}}, i32 {{%[0-9]+}}, ptr {{.*}}, i32 1, ptr addrspace(4) null, ptr addrspace(4) null,
122 enqueue_kernel(default_queue, flags, ndrange, 1, 0, 0,
123 ^(void) {
124 return;
127 // Emits global block literal [[BLG1]] and block kernel [[INVGK1]].
128 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
129 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
130 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
131 // CHECK-LIFETIMES: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES1]])
132 // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs(
133 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES1]])
134 // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES1]], i32 0, i32 0
135 // B32: store i32 256, ptr %[[TMP]], align 4
136 // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES1]], i32 0, i32 0
137 // B64: store i64 256, ptr %[[TMP]], align 8
138 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs(
139 // SPIR-SAME: target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
140 // X86-SAME: ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
141 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK1:[^ ]+_kernel]] to ptr addrspace(4)),
142 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG1]] to ptr addrspace(4)), i32 1,
143 // B32-SAME: ptr %[[TMP]])
144 // B64-SAME: ptr %[[TMP]])
145 enqueue_kernel(default_queue, flags, ndrange,
146 ^(local void *p) {
147 return;
149 256);
151 char c;
152 // Emits global block literal [[BLG2]] and block kernel [[INVGK2]].
153 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
154 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
155 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
156 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES2]])
157 // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs(
158 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES2]])
159 // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES2]], i32 0, i32 0
160 // B32: store i32 %{{.*}}, ptr %[[TMP]], align 4
161 // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES2]], i32 0, i32 0
162 // B64: store i64 %{{.*}}, ptr %[[TMP]], align 8
163 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs(
164 // SPIR-SAME: target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
165 // X86-SAME: ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
166 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK2:[^ ]+_kernel]] to ptr addrspace(4)),
167 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG2]] to ptr addrspace(4)), i32 1,
168 // B32-SAME: ptr %[[TMP]])
169 // B64-SAME: ptr %[[TMP]])
170 enqueue_kernel(default_queue, flags, ndrange,
171 ^(local void *p) {
172 return;
176 // Emits global block literal [[BLG3]] and block kernel [[INVGK3]].
177 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
178 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
179 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
180 // SPIR: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x target("spirv.DeviceEvent")], ptr %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0
181 // X86: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x ptr], ptr %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0
182 // COMMON: [[WAIT_EVNT:%[0-9]+]] ={{.*}} addrspacecast ptr [[AD]] to ptr addrspace(4)
183 // COMMON: [[EVNT:%[0-9]+]] ={{.*}} addrspacecast ptr %clk_event to ptr addrspace(4)
184 // CHECK-LIFETIMES: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES3]])
185 // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_events_varargs(
186 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES3]])
187 // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES3]], i32 0, i32 0
188 // B32: store i32 256, ptr %[[TMP]], align 4
189 // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES3]], i32 0, i32 0
190 // B64: store i64 256, ptr %[[TMP]], align 8
191 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_events_varargs
192 // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]],
193 // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]], ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]],
194 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK3:[^ ]+_kernel]] to ptr addrspace(4)),
195 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG3]] to ptr addrspace(4)), i32 1,
196 // B32-SAME: ptr %[[TMP]])
197 // B64-SAME: ptr %[[TMP]])
198 enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
199 ^(local void *p) {
200 return;
202 256);
204 // Emits global block literal [[BLG4]] and block kernel [[INVGK4]].
205 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
206 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
207 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
208 // SPIR: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x target("spirv.DeviceEvent")], ptr %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0
209 // X86: [[AD:%arraydecay[0-9]*]] = getelementptr inbounds [1 x ptr], ptr %event_wait_list2, i{{32|64}} 0, i{{32|64}} 0
210 // COMMON: [[WAIT_EVNT:%[0-9]+]] ={{.*}} addrspacecast ptr [[AD]] to ptr addrspace(4)
211 // COMMON: [[EVNT:%[0-9]+]] ={{.*}} addrspacecast ptr %clk_event to ptr addrspace(4)
212 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES4]])
213 // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_events_varargs(
214 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES4]])
215 // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES4]], i32 0, i32 0
216 // B32: store i32 %{{.*}}, ptr %[[TMP]], align 4
217 // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES4]], i32 0, i32 0
218 // B64: store i64 %{{.*}}, ptr %[[TMP]], align 8
219 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_events_varargs
220 // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]],
221 // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]], ptr {{.*}}, i32 2, ptr addrspace(4) [[WAIT_EVNT]], ptr addrspace(4) [[EVNT]],
222 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK4:[^ ]+_kernel]] to ptr addrspace(4)),
223 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG4]] to ptr addrspace(4)), i32 1,
224 // B32-SAME: ptr %[[TMP]])
225 // B64-SAME: ptr %[[TMP]])
226 enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
227 ^(local void *p) {
228 return;
232 long l;
233 // Emits global block literal [[BLG5]] and block kernel [[INVGK5]].
234 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
235 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
236 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
237 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES5]])
238 // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs(
239 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES5]])
240 // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES5]], i32 0, i32 0
241 // B32: store i32 %{{.*}}, ptr %[[TMP]], align 4
242 // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES5]], i32 0, i32 0
243 // B64: store i64 %{{.*}}, ptr %[[TMP]], align 8
244 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs
245 // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
246 // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
247 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK5:[^ ]+_kernel]] to ptr addrspace(4)),
248 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG5]] to ptr addrspace(4)), i32 1,
249 // B32-SAME: ptr %[[TMP]])
250 // B64-SAME: ptr %[[TMP]])
251 enqueue_kernel(default_queue, flags, ndrange,
252 ^(local void *p) {
253 return;
257 // Emits global block literal [[BLG6]] and block kernel [[INVGK6]].
258 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
259 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
260 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
261 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %[[BLOCK_SIZES6]])
262 // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs(
263 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %[[BLOCK_SIZES6]])
264 // B32: %[[TMP:.*]] = getelementptr [3 x i32], ptr %[[BLOCK_SIZES6]], i32 0, i32 0
265 // B32: store i32 1, ptr %[[TMP]], align 4
266 // B32: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i32], ptr %[[BLOCK_SIZES6]], i32 0, i32 1
267 // B32: store i32 2, ptr %[[BLOCK_SIZES62]], align 4
268 // B32: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i32], ptr %[[BLOCK_SIZES6]], i32 0, i32 2
269 // B32: store i32 4, ptr %[[BLOCK_SIZES63]], align 4
270 // B64: %[[TMP:.*]] = getelementptr [3 x i64], ptr %[[BLOCK_SIZES6]], i32 0, i32 0
271 // B64: store i64 1, ptr %[[TMP]], align 8
272 // B64: %[[BLOCK_SIZES62:.*]] = getelementptr [3 x i64], ptr %[[BLOCK_SIZES6]], i32 0, i32 1
273 // B64: store i64 2, ptr %[[BLOCK_SIZES62]], align 8
274 // B64: %[[BLOCK_SIZES63:.*]] = getelementptr [3 x i64], ptr %[[BLOCK_SIZES6]], i32 0, i32 2
275 // B64: store i64 4, ptr %[[BLOCK_SIZES63]], align 8
276 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs
277 // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
278 // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
279 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK6:[^ ]+_kernel]] to ptr addrspace(4)),
280 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG6]] to ptr addrspace(4)), i32 3,
281 // B32-SAME: ptr %[[TMP]])
282 // B64-SAME: ptr %[[TMP]])
283 enqueue_kernel(default_queue, flags, ndrange,
284 ^(local void *p1, local void *p2, local void *p3) {
285 return;
287 1, 2, 4);
289 // Emits global block literal [[BLG7]] and block kernel [[INVGK7]].
290 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
291 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
292 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
293 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %[[BLOCK_SIZES7]])
294 // CHECK-LIFETIMES-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs(
295 // CHECK-LIFETIMES-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %[[BLOCK_SIZES7]])
296 // B32: %[[TMP:.*]] = getelementptr [1 x i32], ptr %[[BLOCK_SIZES7]], i32 0, i32 0
297 // B32: store i32 0, ptr %[[TMP]], align 4
298 // B64: %[[TMP:.*]] = getelementptr [1 x i64], ptr %[[BLOCK_SIZES7]], i32 0, i32 0
299 // B64: store i64 4294967296, ptr %[[TMP]], align 8
300 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_varargs
301 // SPIR-SAME: (target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
302 // X86-SAME: (ptr [[DEF_Q]], i32 [[FLAGS]], ptr [[NDR]]{{([0-9]+)?}},
303 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK7:[^ ]+_kernel]] to ptr addrspace(4)),
304 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG7]] to ptr addrspace(4)), i32 1,
305 // B32-SAME: ptr %[[TMP]])
306 // B64-SAME: ptr %[[TMP]])
307 enqueue_kernel(default_queue, flags, ndrange,
308 ^(local void *p) {
309 return;
311 4294967296L);
313 // Emits global block literal [[BLG8]] and invoke function [[INVG8]].
314 // The full type of these expressions are long (and repeated elsewhere), so we
315 // capture it as part of the regex for convenience and clarity.
316 // COMMON: store ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4)), ptr %block_A
317 void (^const block_A)(void) = ^{
318 return;
321 // Emits global block literal [[BLG9]] and invoke function [[INVG9]].
322 // COMMON: store ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG9]] to ptr addrspace(4)), ptr %block_B
323 void (^const block_B)(local void *) = ^(local void *a) {
324 return;
327 // Uses global block literal [[BLG8]] and invoke function [[INVG8]].
328 // COMMON: call {{(spir_func )?}}void @__device_side_enqueue_block_invoke_11(ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4))) [[INVOKE_ATTR:#[0-9]+]]
329 block_A();
331 // Emits global block literal [[BLG8]] and block kernel [[INVGK8]]. [[INVGK8]] calls [[INVG8]].
332 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
333 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
334 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
335 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_basic(
336 // SPIR-SAME: target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
337 // X86-SAME: ptr [[DEF_Q]], i32 [[FLAGS]], ptr byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
338 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK8:[^ ]+_kernel]] to ptr addrspace(4)),
339 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4)))
340 enqueue_kernel(default_queue, flags, ndrange, block_A);
342 // Uses block kernel [[INVGK8]] and global block literal [[BLG8]].
343 // COMMON: call {{(spir_func )?}}i32 @__get_kernel_work_group_size_impl(
344 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK8]] to ptr addrspace(4)),
345 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4)))
346 unsigned size = get_kernel_work_group_size(block_A);
348 // Uses global block literal [[BLG8]] and invoke function [[INVG8]]. Make sure no redundant block literal and invoke functions are emitted.
349 // COMMON: call {{(spir_func )?}}void @__device_side_enqueue_block_invoke_11(ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4)))
350 block_A();
352 // Make sure that block invoke function is resolved correctly after sequence of assignements.
353 // COMMON: store ptr addrspace(4)
354 // COMMON-SAME: addrspacecast (ptr addrspace(1)
355 // COMMON-SAME: [[BL_GLOBAL]]
356 // COMMON-SAME: to ptr addrspace(4)),
357 // COMMON-SAME: ptr %b1,
358 bl_t b1 = block_G;
359 // COMMON: store ptr addrspace(4)
360 // COMMON-SAME: addrspacecast (ptr addrspace(1)
361 // COMMON-SAME: [[BL_GLOBAL]]
362 // COMMON-SAME: to ptr addrspace(4)),
363 // COMMON-SAME: ptr %b2,
364 bl_t b2 = b1;
365 // COMMON: call {{(spir_func )?}}void @block_G_block_invoke(ptr addrspace(4) addrspacecast (ptr addrspace(1)
366 // COMMON-SAME: [[BL_GLOBAL]]
367 // COOMON-SAME: to ptr addrspace(4)), ptr addrspace(3) null)
368 b2(0);
369 // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
370 // COMMON: call {{(spir_func )?}}i32 @__get_kernel_preferred_work_group_size_multiple_impl(
371 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INV_G_K:[^ ]+_kernel]] to ptr addrspace(4)),
372 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BL_GLOBAL]] to ptr addrspace(4)))
373 size = get_kernel_preferred_work_group_size_multiple(b2);
375 void (^block_C)(void) = ^{
376 callee(i, a);
378 // Emits block literal on stack and block kernel [[INVLK3]].
379 // COMMON: store ptr addrspace(4) addrspacecast (ptr [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to ptr addrspace(4)), ptr %block.invoke
380 // SPIR: [[DEF_Q:%[0-9]+]] = load target("spirv.Queue"), ptr %default_queue
381 // X86: [[DEF_Q:%[0-9]+]] = load ptr, ptr %default_queue
382 // COMMON: [[FLAGS:%[0-9]+]] = load i32, ptr %flags
383 // COMMON: [[BL_I8:%[0-9]+]] ={{.*}} addrspacecast ptr {{.*}} to ptr addrspace(4)
384 // COMMON-LABEL: call {{(spir_func )?}}i32 @__enqueue_kernel_basic(
385 // SPIR-SAME: target("spirv.Queue") [[DEF_Q]], i32 [[FLAGS]], ptr byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
386 // X86-SAME: ptr [[DEF_Q]], i32 [[FLAGS]], ptr byval(%struct.ndrange_t) [[NDR]]{{([0-9]+)?}},
387 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVLK3:[^ ]+_kernel]] to ptr addrspace(4)),
388 // COMMON-SAME: ptr addrspace(4) [[BL_I8]])
389 enqueue_kernel(default_queue, flags, ndrange, block_C);
391 // Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INV9]].
392 // COMMON: call {{(spir_func )?}}i32 @__get_kernel_work_group_size_impl(
393 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK9:[^ ]+_kernel]] to ptr addrspace(4)),
394 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG9]] to ptr addrspace(4)))
395 size = get_kernel_work_group_size(block_B);
397 // Uses global block literal [[BLG8]] and block kernel [[INVGK8]]. Make sure no redundant block literal ind invoke functions are emitted.
398 // COMMON: call {{(spir_func )?}}i32 @__get_kernel_preferred_work_group_size_multiple_impl(
399 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK8]] to ptr addrspace(4)),
400 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG8]] to ptr addrspace(4)))
401 size = get_kernel_preferred_work_group_size_multiple(block_A);
403 // Uses global block literal [[BL_GLOBAL]] and block kernel [[INV_G_K]]. [[INV_G_K]] calls [[INV_G]].
404 // COMMON: call {{(spir_func )?}}i32 @__get_kernel_preferred_work_group_size_multiple_impl(
405 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INV_G_K:[^ ]+_kernel]] to ptr addrspace(4)),
406 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BL_GLOBAL]] to ptr addrspace(4)))
407 size = get_kernel_preferred_work_group_size_multiple(block_G);
409 // Emits global block literal [[BLG10]] and block kernel [[INVGK10]].
410 // COMMON: call {{(spir_func )?}}i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(ptr {{[^,]+}},
411 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK10:[^ ]+_kernel]] to ptr addrspace(4)),
412 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG10]] to ptr addrspace(4)))
413 size = get_kernel_max_sub_group_size_for_ndrange(ndrange, ^(){});
415 // Emits global block literal [[BLG11]] and block kernel [[INVGK11]].
416 // COMMON: call {{(spir_func )?}}i32 @__get_kernel_sub_group_count_for_ndrange_impl(ptr {{[^,]+}},
417 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr [[INVGK11:[^ ]+_kernel]] to ptr addrspace(4)),
418 // COMMON-SAME: ptr addrspace(4) addrspacecast (ptr addrspace(1) [[BLG11]] to ptr addrspace(4)))
419 size = get_kernel_sub_group_count_for_ndrange(ndrange, ^(){});
422 // COMMON: define spir_kernel void [[INVLK1]](ptr addrspace(4) %0) #{{[0-9]+}} {
423 // COMMON: entry:
424 // COMMON: call {{(spir_func )?}}void @__device_side_enqueue_block_invoke(ptr addrspace(4) %0)
425 // COMMON: ret void
426 // COMMON: }
427 // COMMON: define spir_kernel void [[INVLK2]](ptr addrspace(4){{.*}})
428 // COMMON: define spir_kernel void [[INVGK1]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}}) [[INVOKE_KERNEL_ATTR:#[0-9]+]]
429 // COMMON: define spir_kernel void [[INVGK2]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
430 // COMMON: define spir_kernel void [[INVGK3]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
431 // COMMON: define spir_kernel void [[INVGK4]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
432 // COMMON: define spir_kernel void [[INVGK5]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
433 // COMMON: define spir_kernel void [[INVGK6]](ptr addrspace(4) %0, ptr addrspace(3) %1, ptr addrspace(3) %2, ptr addrspace(3) %3) #{{[0-9]+}} {
434 // COMMON: entry:
435 // COMMON: call {{(spir_func )?}}void @__device_side_enqueue_block_invoke_9(ptr addrspace(4) %0, ptr addrspace(3) %1, ptr addrspace(3) %2, ptr addrspace(3) %3)
436 // COMMON: ret void
437 // COMMON: }
438 // COMMON: define spir_kernel void [[INVGK7]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
439 // COMMON: define internal {{(spir_func )?}}void [[INVG8]](ptr addrspace(4){{.*}}) [[INVG8_INVOKE_FUNC_ATTR:#[0-9]+]]
440 // COMMON: define internal {{(spir_func )?}}void [[INVG9]](ptr addrspace(4){{.*}}, ptr addrspace(3) %{{.*}})
441 // COMMON: define spir_kernel void [[INVGK8]](ptr addrspace(4){{.*}})
442 // COMMON: define spir_kernel void [[INV_G_K]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
443 // COMMON: define spir_kernel void [[INVLK3]](ptr addrspace(4){{.*}})
444 // COMMON: define spir_kernel void [[INVGK9]](ptr addrspace(4){{.*}}, ptr addrspace(3){{.*}})
445 // COMMON: define spir_kernel void [[INVGK10]](ptr addrspace(4){{.*}})
446 // COMMON: define spir_kernel void [[INVGK11]](ptr addrspace(4){{.*}})
448 // SPIR: attributes [[INVG8_INVOKE_FUNC_ATTR]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
449 // SPIR: attributes [[INVOKE_KERNEL_ATTR]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
450 // X86: attributes [[INVG8_INVOKE_FUNC_ATTR]] = { convergent noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="{{[^"]*}}" }
451 // X86: attributes [[INVOKE_KERNEL_ATTR]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="{{[^"]*}}" }