Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / SemaOpenCL / fdeclare-opencl-builtins.cl
blobbf943a400320c3cbd75c3cc92050f9033428f779
1 // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL -fdeclare-opencl-builtins -DNO_HEADER
2 // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL -fdeclare-opencl-builtins -finclude-default-header
3 // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL1.2 -fdeclare-opencl-builtins -DNO_HEADER -cl-ext=-cl_intel_subgroups
4 // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL1.2 -fdeclare-opencl-builtins -finclude-default-header -cl-ext=-cl_intel_subgroups
5 // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL2.0 -fdeclare-opencl-builtins -DNO_HEADER
6 // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL2.0 -fdeclare-opencl-builtins -finclude-default-header
7 // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL3.0 -fdeclare-opencl-builtins -finclude-default-header
8 // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CLC++ -fdeclare-opencl-builtins -DNO_HEADER
9 // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CLC++ -fdeclare-opencl-builtins -finclude-default-header
10 // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CLC++2021 -fdeclare-opencl-builtins -finclude-default-header
11 // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL2.0 -fdeclare-opencl-builtins -finclude-default-header -cl-ext=-cl_khr_fp64 -DNO_FP64
12 // RUN: %clang_cc1 %s -triple spir -verify -pedantic -Wconversion -Werror -fsyntax-only -cl-std=CL3.0 -fdeclare-opencl-builtins -finclude-default-header -DNO_ATOMSCOPE
14 // Test the -fdeclare-opencl-builtins option. This is not a completeness
15 // test, so it should not test for all builtins defined by OpenCL. Instead
16 // this test should cover different functional aspects of the TableGen builtin
17 // function machinery.
19 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
20 #if __OPENCL_C_VERSION__ < CL_VERSION_1_2
21 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
22 #endif
24 #if __OPENCL_C_VERSION__ <= CL_VERSION_1_2
25 #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
26 #endif
28 // First, test that Clang gracefully handles missing types.
29 #ifdef NO_HEADER
30 void test_without_header(void) {
31 barrier(0);
32 // expected-note@-1 0+{{candidate function not viable}}
33 // expected-error@-2 0+{{argument type 'void' is incomplete}}
34 // expected-error@-3 0+{{no matching function for call to 'barrier'}}
35 // expected-error@* {{typedef type cl_mem_fence_flags not found; include the base header with -finclude-default-header}}
37 #endif
39 // Provide typedefs when invoking clang without -finclude-default-header.
40 #ifdef NO_HEADER
41 typedef unsigned char uchar;
42 typedef unsigned int uint;
43 typedef unsigned long ulong;
44 typedef unsigned short ushort;
45 typedef __SIZE_TYPE__ size_t;
46 typedef __PTRDIFF_TYPE__ ptrdiff_t;
47 typedef __INTPTR_TYPE__ intptr_t;
48 typedef __UINTPTR_TYPE__ uintptr_t;
49 typedef char char2 __attribute__((ext_vector_type(2)));
50 typedef char char4 __attribute__((ext_vector_type(4)));
51 typedef uchar uchar4 __attribute__((ext_vector_type(4)));
52 typedef uchar uchar16 __attribute__((ext_vector_type(16)));
53 typedef float float4 __attribute__((ext_vector_type(4)));
54 typedef float float16 __attribute__((ext_vector_type(16)));
55 typedef half half4 __attribute__((ext_vector_type(4)));
56 typedef int int2 __attribute__((ext_vector_type(2)));
57 typedef int int4 __attribute__((ext_vector_type(4)));
58 typedef uint uint2 __attribute__((ext_vector_type(2)));
59 typedef uint uint4 __attribute__((ext_vector_type(4)));
60 typedef long long2 __attribute__((ext_vector_type(2)));
61 typedef long long8 __attribute__((ext_vector_type(8)));
62 typedef ulong ulong4 __attribute__((ext_vector_type(4)));
63 typedef short short16 __attribute__((ext_vector_type(16)));
64 typedef ushort ushort3 __attribute__((ext_vector_type(3)));
66 typedef int clk_profiling_info;
67 #define CLK_PROFILING_COMMAND_EXEC_TIME 0x1
69 typedef uint cl_mem_fence_flags;
70 #define CLK_GLOBAL_MEM_FENCE 0x02
72 typedef struct {int a;} ndrange_t;
74 // Enable extensions that are enabled in opencl-c-base.h.
75 #if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
76 #define __opencl_c_device_enqueue 1
77 #define __opencl_c_generic_address_space 1
78 #define cl_khr_subgroup_extended_types 1
79 #define cl_khr_subgroup_ballot 1
80 #define cl_khr_subgroup_non_uniform_arithmetic 1
81 #define cl_khr_subgroup_clustered_reduce 1
82 #define __opencl_c_read_write_images 1
83 #define __opencl_subgroup_builtins 1
84 #endif
86 #if (__OPENCL_CPP_VERSION__ == 100 || __OPENCL_C_VERSION__ == 200)
87 #define __opencl_c_atomic_order_seq_cst 1
88 #define __opencl_c_atomic_scope_device 1
89 #endif
91 #define __opencl_c_named_address_space_builtins 1
92 #endif
94 kernel void test_pointers(volatile global void *global_p, global const int4 *a) {
95 int i;
96 unsigned int ui;
98 prefetch(a, 2);
100 atom_add((volatile __global int *)global_p, i);
101 atom_cmpxchg((volatile __global unsigned int *)global_p, ui, ui);
104 // Only test enum arguments when the base header is included, because we need
105 // the enum declarations.
106 #if !defined(NO_HEADER) && (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
107 kernel void test_enum_args(volatile global atomic_int *global_p, global int *expected) {
108 int desired;
109 atomic_work_item_fence(CLK_GLOBAL_MEM_FENCE, memory_order_acq_rel, memory_scope_device);
110 atomic_compare_exchange_strong_explicit(global_p, expected, desired,
111 memory_order_acq_rel,
112 memory_order_relaxed,
113 memory_scope_work_group);
115 #endif
117 #if defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200
118 void test_typedef_args(clk_event_t evt, volatile atomic_flag *flg, global unsigned long long *values) {
119 capture_event_profiling_info(evt, CLK_PROFILING_COMMAND_EXEC_TIME, values);
121 atomic_flag_clear(flg);
122 bool result = atomic_flag_test_and_set(flg);
124 size_t ws[2] = {2, 8};
125 ndrange_t r = ndrange_2D(ws);
128 // Check that atomic_fetch_ functions can be called with (u)intptr_t arguments,
129 // despite OpenCLBuiltins.td not providing explicit overloads for those types.
130 void test_atomic_fetch(volatile __generic atomic_int *a_int,
131 volatile __generic atomic_intptr_t *a_intptr,
132 volatile __generic atomic_uintptr_t *a_uintptr) {
133 int i;
134 intptr_t ip;
135 uintptr_t uip;
136 ptrdiff_t ptrdiff;
138 i = atomic_fetch_add(a_int, i);
139 ip = atomic_fetch_add(a_intptr, ptrdiff);
140 uip = atomic_fetch_add(a_uintptr, ptrdiff);
142 ip = atomic_fetch_or(a_intptr, ip);
143 uip = atomic_fetch_or(a_uintptr, uip);
145 #endif
147 #if !defined(NO_HEADER) && !defined(NO_FP64) && __OPENCL_C_VERSION__ >= 200
148 // Check added atomic_fetch_ functions by cl_ext_float_atomics
149 // extension can be called
150 void test_atomic_fetch_with_address_space(volatile __generic atomic_float *a_float,
151 volatile __generic atomic_double *a_double,
152 volatile __local atomic_float *a_float_local,
153 volatile __local atomic_double *a_double_local,
154 volatile __global atomic_float *a_float_global,
155 volatile __global atomic_double *a_double_global) {
156 float f1, resf1;
157 double d1, resd1;
158 resf1 = atomic_fetch_min(a_float, f1);
159 resf1 = atomic_fetch_max_explicit(a_float_local, f1, memory_order_seq_cst);
160 resf1 = atomic_fetch_add_explicit(a_float_global, f1, memory_order_seq_cst, memory_scope_work_group);
162 resd1 = atomic_fetch_min(a_double, d1);
163 resd1 = atomic_fetch_max_explicit(a_double_local, d1, memory_order_seq_cst);
164 resd1 = atomic_fetch_add_explicit(a_double_global, d1, memory_order_seq_cst, memory_scope_work_group);
166 #endif // !defined(NO_HEADER) && __OPENCL_C_VERSION__ >= 200
168 #if !defined(NO_HEADER) && __OPENCL_C_VERSION__ == 200 && defined(__opencl_c_generic_address_space)
170 // Test that overloads that use atomic_double are not available when the fp64
171 // extension is disabled. Test this by counting the number of notes about
172 // candidate functions.
173 void test_atomic_double_reporting(volatile __generic atomic_int *a) {
174 atomic_init(a, a);
175 // expected-error@-1{{no matching function for call to 'atomic_init'}}
176 #if defined(NO_FP64)
177 // Expecting 5 candidates: int, uint, long, ulong, float
178 // expected-note@-4 5 {{candidate function not viable: no known conversion}}
179 #else
180 // Expecting 6 candidates: int, uint, long, ulong, float, double
181 // expected-note@-7 6 {{candidate function not viable: no known conversion}}
182 #endif
185 #endif
187 #if defined(NO_ATOMSCOPE) && __OPENCL_C_VERSION__ >= 300
188 // Disable the feature by undefining the feature macro.
189 #undef __opencl_c_atomic_scope_device
191 // Test that only the overload with explicit order and scope arguments is
192 // available when the __opencl_c_atomic_scope_device feature is disabled.
193 void test_atomics_without_scope_device(volatile __generic atomic_int *a_int) {
194 int d;
196 atomic_exchange(a_int, d);
197 // expected-error@-1{{use of undeclared identifier 'atomic_exchange'}}
199 atomic_exchange_explicit(a_int, d, memory_order_seq_cst);
200 // expected-error@-1{{no matching function for call to 'atomic_exchange_explicit'}}
202 atomic_exchange_explicit(a_int, d, memory_order_seq_cst, memory_scope_work_group);
205 #endif
207 // Test old atomic overloaded with generic address space in C++ for OpenCL.
208 #if __OPENCL_C_VERSION__ >= 200
209 void test_legacy_atomics_cpp(__generic volatile unsigned int *a) {
210 atomic_add(a, 1);
211 #if !defined(__cplusplus)
212 // expected-error@-2{{no matching function for call to 'atomic_add'}}
213 // expected-note@-3 4 {{candidate function not viable}}
214 #endif
216 #endif
218 kernel void basic_conversion(void) {
219 float f;
220 char2 c2;
221 long2 l2;
222 float4 f4;
223 int4 i4;
225 #ifdef NO_FP64
226 (void)convert_double_rtp(f);
227 // expected-error@-1{{use of undeclared identifier 'convert_double_rtp'}}
228 #else
229 double d;
230 f = convert_float(d);
231 #endif
232 l2 = convert_long2_rtz(c2);
233 i4 = convert_int4_sat(f4);
236 kernel void basic_conversion_neg(void) {
237 int i;
238 float f;
240 f = convert_float_sat(i);
241 #if !defined(__OPENCL_CPP_VERSION__)
242 // expected-error@-2{{use of undeclared identifier 'convert_float_sat'}}
243 #else
244 // expected-error@-4{{use of undeclared identifier 'convert_float_sat'; did you mean 'convert_float'?}}
245 // expected-note@-5{{'convert_float' declared here}}
246 #endif
249 char4 test_int(char c, char4 c4) {
250 char m = max(c, c);
251 char4 m4 = max(c4, c4);
252 uchar4 abs1 = abs(c4);
253 uchar4 abs2 = abs(abs1);
254 return max(c4, c);
257 kernel void basic_vector_misc(float4 a) {
258 float4 res;
259 uint4 mask = (uint4)(1, 2, 3, 4);
261 res = shuffle(a, mask);
264 kernel void basic_image_readonly(read_only image2d_t image_read_only_image2d) {
265 int2 i2;
266 sampler_t sampler;
267 half4 res;
268 float4 resf;
270 resf = read_imagef(image_read_only_image2d, i2);
271 res = read_imageh(image_read_only_image2d, i2);
272 #if __OPENCL_C_VERSION__ < CL_VERSION_1_2 && !defined(__OPENCL_CPP_VERSION__)
273 // expected-error@-3{{no matching function for call to 'read_imagef'}}
274 // expected-error@-3{{no matching function for call to 'read_imageh'}}
275 #endif
276 res = read_imageh(image_read_only_image2d, sampler, i2);
278 int imgWidth = get_image_width(image_read_only_image2d);
281 #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
282 kernel void basic_image_readwrite(read_write image3d_t image_read_write_image3d) {
283 half4 h4;
284 int4 i4;
286 write_imageh(image_read_write_image3d, i4, h4);
288 int imgDepth = get_image_depth(image_read_write_image3d);
290 #endif // __OPENCL_C_VERSION__ >= CL_VERSION_2_0
292 kernel void basic_image_writeonly(write_only image1d_buffer_t image_write_only_image1d_buffer, write_only image3d_t image3dwo) {
293 half4 h4;
294 float4 f4;
295 int i;
297 write_imagef(image_write_only_image1d_buffer, i, f4);
298 write_imageh(image_write_only_image1d_buffer, i, h4);
300 int4 i4;
301 write_imagef(image3dwo, i4, i, f4);
302 #if __OPENCL_C_VERSION__ <= CL_VERSION_1_2 && !defined(__OPENCL_CPP_VERSION__)
303 // expected-error@-2{{no matching function for call to 'write_imagef'}}
304 #endif
307 kernel void basic_subgroup(global uint *out) {
308 out[0] = get_sub_group_size();
309 #if __OPENCL_C_VERSION__ <= CL_VERSION_1_2 && !defined(__OPENCL_CPP_VERSION__)
310 // expected-error@-2{{use of undeclared identifier 'get_sub_group_size'}}
311 #endif
313 // Only test when the base header is included, because we need the enum declarations.
314 #if !defined(NO_HEADER) && (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
315 sub_group_barrier(CLK_GLOBAL_MEM_FENCE, memory_scope_device);
316 #endif
319 kernel void extended_subgroup(global uint4 *out, global int *scalar, global char2 *c2) {
320 out[0] = get_sub_group_eq_mask();
321 scalar[0] = sub_group_non_uniform_scan_inclusive_or(3);
322 scalar[1] = sub_group_clustered_reduce_logical_xor(2, 4);
323 *c2 = sub_group_broadcast(*c2, 2);
324 #if __OPENCL_C_VERSION__ < CL_VERSION_2_0 && !defined(__OPENCL_CPP_VERSION__)
325 // expected-error@-5{{use of undeclared identifier 'get_sub_group_eq_mask'}}
326 // expected-error@-5{{use of undeclared identifier 'sub_group_non_uniform_scan_inclusive_or'}}
327 // expected-error@-5{{use of undeclared identifier 'sub_group_clustered_reduce_logical_xor'}}
328 // expected-error@-5{{use of undeclared identifier 'sub_group_broadcast'}}
329 #endif
332 kernel void basic_vector_data(void) {
333 #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
334 generic void *generic_p;
335 #endif
336 constant void *constant_p;
337 local void *local_p;
338 global void *global_p;
339 private void *private_p;
340 size_t s;
341 ulong4 ul4;
342 short16 s16;
343 #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
344 ushort3 us3;
345 uchar16 uc16;
346 #endif
347 long8 l8;
348 uint2 ui2;
349 float16 f16;
351 ul4 = vload4(s, (const __constant ulong *) constant_p);
352 s16 = vload16(s, (const __constant short *) constant_p);
354 #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0
355 us3 = vload3(s, (const __generic ushort *) generic_p);
356 uc16 = vload16(s, (const __generic uchar *) generic_p);
357 #endif
359 l8 = vload8(s, (const __global long *) global_p);
360 ui2 = vload2(s, (const __local uint *) local_p);
361 f16 = vload16(s, (const __private float *) private_p);
364 kernel void basic_work_item(void) {
365 uint ui;
367 barrier(CLK_GLOBAL_MEM_FENCE);
369 get_enqueued_local_size(ui);
370 #if !defined(__OPENCL_CPP_VERSION__) && __OPENCL_C_VERSION__ < CL_VERSION_2_0
371 // expected-error@-2{{use of undeclared identifier 'get_enqueued_local_size'}}
372 #endif
375 #ifdef NO_FP64
376 void test_extension_types(char2 c2) {
377 // We should see 6 candidates for float and half types, and none for double types.
378 int i = isnan(c2);
379 // expected-error@-1{{no matching function for call to 'isnan'}}
380 // expected-note@-2 6 {{candidate function not viable: no known conversion from '__private char2' (vector of 2 'char' values) to 'float}}
381 // expected-note@-3 6 {{candidate function not viable: no known conversion from '__private char2' (vector of 2 'char' values) to 'half}}
383 #endif