Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / OpenMP / declare_variant_construct_codegen_1.c
blob3cd1ed8dbb32054a7aee54004bdb20f9efdc09b3
1 // expected-no-diagnostics
3 #ifndef HEADER
4 #define HEADER
6 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1
7 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
8 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK1
9 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1
10 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1
11 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
12 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK1
14 // RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
15 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
16 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
17 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
18 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
19 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
20 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
22 #ifdef CK1
24 #define N 100
26 void p_vxv(int *v1, int *v2, int *v3, int n);
27 void t_vxv(int *v1, int *v2, int *v3, int n);
29 #pragma omp declare variant(t_vxv) match(construct={target})
30 #pragma omp declare variant(p_vxv) match(construct={parallel})
31 void vxv(int *v1, int *v2, int *v3, int n) {
32 for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i];
34 // CK1: define dso_local void @vxv
36 void p_vxv(int *v1, int *v2, int *v3, int n) {
37 #pragma omp for
38 for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 3;
40 // CK1: define dso_local void @p_vxv
42 #pragma omp declare target
43 void t_vxv(int *v1, int *v2, int *v3, int n) {
44 #pragma distribute simd
45 for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 2;
47 #pragma omp end declare target
48 // CK1: define dso_local void @t_vxv
51 // CK1-LABEL: define {{[^@]+}}@test
52 int test(void) {
53 int v1[N], v2[N], v3[N];
55 // init
56 for (int i = 0; i < N; i++) {
57 v1[i] = (i + 1);
58 v2[i] = -(i + 1);
59 v3[i] = 0;
62 #pragma omp target teams map(to: v1[:N],v2[:N]) map(from: v3[:N])
64 vxv(v1, v2, v3, N);
66 // CK1: call void @__omp_offloading_[[OFFLOAD:.+]]({{.+}})
68 vxv(v1, v2, v3, N);
69 // CK1: call void @vxv
71 #pragma omp parallel
73 vxv(v1, v2, v3, N);
75 // CK1: call void ({{.+}}) @__kmpc_fork_call(ptr {{.+}}, i32 3, ptr [[PARALLEL_REGION:@[^,]+]]
77 return 0;
80 // CK1: define internal void @__omp_offloading_[[OFFLOAD]]({{.+}})
81 // CK1: call void ({{.+}}) @__kmpc_fork_teams(ptr {{.+}}, i32 3, ptr [[TARGET_REGION:@[^,]+]]
82 // CK1: define internal void [[TARGET_REGION]](
83 // CK1: call void @t_vxv
85 // CK1: define internal void [[PARALLEL_REGION]](
86 // CK1: call void @p_vxv
87 #endif // CK1
89 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK2
90 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
91 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK2
92 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK2
93 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK2
94 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
95 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK2
97 // RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
98 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
99 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
100 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
101 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
102 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
103 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
105 #ifdef CK2
107 void test_teams(int ***v1, int ***v2, int ***v3, int n);
108 void test_target(int ***v1, int ***v2, int ***v3, int n);
109 void test_parallel(int ***v1, int ***v2, int ***v3, int n);
111 #pragma omp declare variant(test_teams) match(construct = {teams})
112 #pragma omp declare variant(test_target) match(construct = {target})
113 #pragma omp declare variant(test_parallel) match(construct = {parallel})
114 void test_base(int ***v1, int ***v2, int ***v3, int n) {
115 for (int i = 0; i < n; i++)
116 for (int j = 0; j < n; ++j)
117 for (int k = 0; k < n; ++k)
118 v3[i][j][k] = v1[i][j][k] * v2[i][j][k];
121 #pragma omp declare target
122 void test_teams(int ***v1, int ***v2, int ***v3, int n) {
123 #pragma omp distribute parallel for simd collapse(2)
124 for (int i = 0; i < n; ++i)
125 for (int j = 0; j < n; ++j)
126 for (int k = 0; k < n; ++k)
127 v3[i][j][k] = v1[i][j][k] * v2[i][j][k];
129 #pragma omp end declare target
131 #pragma omp declare target
132 void test_target(int ***v1, int ***v2, int ***v3, int n) {
133 #pragma omp parallel for simd collapse(3)
134 for (int i = 0; i < n; ++i)
135 for (int j = 0; j < n; ++j)
136 for (int k = 0; k < n; ++k)
137 v3[i][j][k] = v1[i][j][k] * v2[i][j][k];
139 #pragma omp end declare target
141 void test_parallel(int ***v1, int ***v2, int ***v3, int n) {
142 #pragma omp for collapse(3)
143 for (int i = 0; i < n; ++i)
144 for (int j = 0; j < n; ++j)
145 for (int k = 0; k < n; ++k)
146 v3[i][j][k] = v1[i][j][k] * v2[i][j][k];
149 // CK2-LABEL: define {{[^@]+}}@test
150 void test(int ***v1, int ***v2, int ***v3, int n) {
151 int i;
153 #pragma omp target
154 #pragma omp teams
156 test_base(v1, v2, v3, 0);
158 // CK2: call void @__omp_offloading_[[OFFLOAD_1:.+]]({{.+}})
160 #pragma omp target
162 test_base(v1, v2, v3, 0);
164 // CK2: call void @__omp_offloading_[[OFFLOAD_2:.+]]({{.+}})
166 #pragma omp parallel
168 test_base(v1, v2, v3, 0);
170 // CK2: call void ({{.+}}) @__kmpc_fork_call(ptr {{.+}}, i32 3, ptr [[PARALLEL_REGION:@[^,]+]]
173 // CK2: define internal void @__omp_offloading_[[OFFLOAD_1]]({{.+}})
174 // CK2: call void ({{.+}}) @__kmpc_fork_teams(ptr {{.+}}, i32 3, ptr [[TARGET_REGION_1:@[^,]+]]
175 // CK2: define internal void [[TARGET_REGION_1]](
176 // CK2: call void @test_teams
178 // CK2: define internal void @__omp_offloading_[[OFFLOAD_2]]({{.+}})
179 // CK2: call void @test_target
181 // CK2: define internal void [[PARALLEL_REGION]](
182 // CK2: call void @test_parallel
184 #endif // CK2
186 // RUN: %clang_cc1 -DCK3 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK3
187 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
188 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK3
189 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK3
190 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK3
191 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
192 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK3
194 // RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
195 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
196 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
197 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
198 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
199 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
200 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
202 #ifdef CK3
204 #define N 100
206 int t_for(int *v1, int *v2, int *v3, int n);
207 int t_simd(int *v1, int *v2, int *v3, int n);
209 #pragma omp declare variant(t_simd) match(construct = {simd})
210 #pragma omp declare variant(t_for) match(construct = {for})
211 int t(int *v1, int *v2, int *v3, int idx) {
212 return v1[idx] * v2[idx];
215 int t_for(int *v1, int *v2, int *v3, int idx) {
216 return v1[idx] * v2[idx];
219 #pragma omp declare simd
220 int t_simd(int *v1, int *v2, int *v3, int idx) {
221 return v1[idx] * v2[idx];
224 // CK3-LABEL: define {{[^@]+}}@test
225 void test(void) {
226 int v1[N], v2[N], v3[N];
228 // init
229 for (int i = 0; i < N; i++) {
230 v1[i] = (i + 1);
231 v2[i] = -(i + 1);
232 v3[i] = 0;
235 #pragma omp simd
236 for (int i = 0; i < N; i++) {
237 v3[i] = t(v1, v2, v3, i);
239 // CK3: call = call i32 @t_simd
242 #pragma omp for
243 for (int i = 0; i < N; i++) {
244 v3[i] = t(v1, v2, v3, i);
246 // CK3: call{{.+}} = call i32 @t_for
249 #endif // CK3
251 // RUN: %clang_cc1 -DCK4 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK4
252 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
253 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK4
254 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK4
255 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK4
256 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
257 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK4
259 // RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
260 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
261 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
262 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
263 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
264 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
265 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
267 #ifdef CK4
269 #define N 100
271 void not_selected_vxv(int *v1, int *v2, int *v3, int n);
272 void combined_vxv(int *v1, int *v2, int *v3, int n);
273 void all_vxv(int *v1, int *v2, int *v3, int n);
275 #pragma omp declare variant(all_vxv) match(construct={target,teams,parallel,for,simd})
276 #pragma omp declare variant(combined_vxv) match(construct={target,teams,parallel,for})
277 #pragma omp declare variant(not_selected_vxv) match(construct={parallel,for})
278 void vxv(int *v1, int *v2, int *v3, int n) {
279 for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i];
282 void not_selected_vxv(int *v1, int *v2, int *v3, int n) {
283 for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 3;
286 #pragma omp declare target
287 void combined_vxv(int *v1, int *v2, int *v3, int n) {
288 for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 2;
290 #pragma omp end declare target
292 #pragma omp declare target
293 void all_vxv(int *v1, int *v2, int *v3, int n) {
294 for (int i = 0; i < n; i++) v3[i] = v1[i] * v2[i] * 4;
296 #pragma omp end declare target
298 // CK4-LABEL: define {{[^@]+}}@test
299 void test(void) {
300 int v1[N], v2[N], v3[N];
302 //init
303 for (int i = 0; i < N; i++) {
304 v1[i] = (i + 1);
305 v2[i] = -(i + 1);
306 v3[i] = 0;
309 #pragma omp target teams map(to: v1[:N],v2[:N]) map(from: v3[:N])
311 #pragma omp parallel for
312 for (int i = 0; i < N; i++)
313 vxv(v1, v2, v3, N);
315 // CK4: call void @__omp_offloading_[[OFFLOAD_1:.+]]({{.+}})
317 #pragma omp simd
318 for (int i = 0; i < N; i++)
319 vxv(v1, v2, v3, N);
320 // CK4: call void @vxv
322 #pragma omp target teams distribute parallel for simd map(from: v3[:N])
323 for (int i = 0; i < N; i++)
324 for (int i = 0; i < N; i++)
325 for (int i = 0; i < N; i++)
326 vxv(v1, v2, v3, N);
327 // CK4: call void @__omp_offloading_[[OFFLOAD_2:.+]]({{.+}})
329 // CK4-DAG: call void @all_vxv
330 // CK4-DAG: call void @combined_vxv
332 #endif // CK4
334 #endif // HEADER