1 // expected-no-diagnostics
6 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1
7 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
8 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK1
9 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1
10 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK1
11 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
12 // RUN: %clang_cc1 -DCK1 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK1
14 // RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
15 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
16 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
17 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
18 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
19 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
20 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
26 void p_vxv(int *v1
, int *v2
, int *v3
, int n
);
27 void t_vxv(int *v1
, int *v2
, int *v3
, int n
);
29 #pragma omp declare variant(t_vxv) match(construct={target})
30 #pragma omp declare variant(p_vxv) match(construct={parallel})
31 void vxv(int *v1
, int *v2
, int *v3
, int n
) {
32 for (int i
= 0; i
< n
; i
++) v3
[i
] = v1
[i
] * v2
[i
];
34 // CK1: define dso_local void @vxv
36 void p_vxv(int *v1
, int *v2
, int *v3
, int n
) {
38 for (int i
= 0; i
< n
; i
++) v3
[i
] = v1
[i
] * v2
[i
] * 3;
40 // CK1: define dso_local void @p_vxv
42 #pragma omp declare target
43 void t_vxv(int *v1
, int *v2
, int *v3
, int n
) {
44 #pragma distribute simd
45 for (int i
= 0; i
< n
; i
++) v3
[i
] = v1
[i
] * v2
[i
] * 2;
47 #pragma omp end declare target
48 // CK1: define dso_local void @t_vxv
51 // CK1-LABEL: define {{[^@]+}}@test
53 int v1
[N
], v2
[N
], v3
[N
];
56 for (int i
= 0; i
< N
; i
++) {
62 #pragma omp target teams map(to: v1[:N],v2[:N]) map(from: v3[:N])
66 // CK1: call void @__omp_offloading_[[OFFLOAD:.+]]({{.+}})
69 // CK1: call void @vxv
75 // CK1: call void ({{.+}}) @__kmpc_fork_call(ptr {{.+}}, i32 3, ptr [[PARALLEL_REGION:@[^,]+]]
80 // CK1: define internal void @__omp_offloading_[[OFFLOAD]]({{.+}})
81 // CK1: call void ({{.+}}) @__kmpc_fork_teams(ptr {{.+}}, i32 3, ptr [[TARGET_REGION:@[^,]+]]
82 // CK1: define internal void [[TARGET_REGION]](
83 // CK1: call void @t_vxv
85 // CK1: define internal void [[PARALLEL_REGION]](
86 // CK1: call void @p_vxv
89 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK2
90 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
91 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK2
92 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK2
93 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK2
94 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
95 // RUN: %clang_cc1 -DCK2 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK2
97 // RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
98 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
99 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
100 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
101 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
102 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
103 // RUN: %clang_cc1 -DCK2 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
107 void test_teams(int ***v1
, int ***v2
, int ***v3
, int n
);
108 void test_target(int ***v1
, int ***v2
, int ***v3
, int n
);
109 void test_parallel(int ***v1
, int ***v2
, int ***v3
, int n
);
111 #pragma omp declare variant(test_teams) match(construct = {teams})
112 #pragma omp declare variant(test_target) match(construct = {target})
113 #pragma omp declare variant(test_parallel) match(construct = {parallel})
114 void test_base(int ***v1
, int ***v2
, int ***v3
, int n
) {
115 for (int i
= 0; i
< n
; i
++)
116 for (int j
= 0; j
< n
; ++j
)
117 for (int k
= 0; k
< n
; ++k
)
118 v3
[i
][j
][k
] = v1
[i
][j
][k
] * v2
[i
][j
][k
];
121 #pragma omp declare target
122 void test_teams(int ***v1
, int ***v2
, int ***v3
, int n
) {
123 #pragma omp distribute parallel for simd collapse(2)
124 for (int i
= 0; i
< n
; ++i
)
125 for (int j
= 0; j
< n
; ++j
)
126 for (int k
= 0; k
< n
; ++k
)
127 v3
[i
][j
][k
] = v1
[i
][j
][k
] * v2
[i
][j
][k
];
129 #pragma omp end declare target
131 #pragma omp declare target
132 void test_target(int ***v1
, int ***v2
, int ***v3
, int n
) {
133 #pragma omp parallel for simd collapse(3)
134 for (int i
= 0; i
< n
; ++i
)
135 for (int j
= 0; j
< n
; ++j
)
136 for (int k
= 0; k
< n
; ++k
)
137 v3
[i
][j
][k
] = v1
[i
][j
][k
] * v2
[i
][j
][k
];
139 #pragma omp end declare target
141 void test_parallel(int ***v1
, int ***v2
, int ***v3
, int n
) {
142 #pragma omp for collapse(3)
143 for (int i
= 0; i
< n
; ++i
)
144 for (int j
= 0; j
< n
; ++j
)
145 for (int k
= 0; k
< n
; ++k
)
146 v3
[i
][j
][k
] = v1
[i
][j
][k
] * v2
[i
][j
][k
];
149 // CK2-LABEL: define {{[^@]+}}@test
150 void test(int ***v1
, int ***v2
, int ***v3
, int n
) {
156 test_base(v1
, v2
, v3
, 0);
158 // CK2: call void @__omp_offloading_[[OFFLOAD_1:.+]]({{.+}})
162 test_base(v1
, v2
, v3
, 0);
164 // CK2: call void @__omp_offloading_[[OFFLOAD_2:.+]]({{.+}})
168 test_base(v1
, v2
, v3
, 0);
170 // CK2: call void ({{.+}}) @__kmpc_fork_call(ptr {{.+}}, i32 3, ptr [[PARALLEL_REGION:@[^,]+]]
173 // CK2: define internal void @__omp_offloading_[[OFFLOAD_1]]({{.+}})
174 // CK2: call void ({{.+}}) @__kmpc_fork_teams(ptr {{.+}}, i32 3, ptr [[TARGET_REGION_1:@[^,]+]]
175 // CK2: define internal void [[TARGET_REGION_1]](
176 // CK2: call void @test_teams
178 // CK2: define internal void @__omp_offloading_[[OFFLOAD_2]]({{.+}})
179 // CK2: call void @test_target
181 // CK2: define internal void [[PARALLEL_REGION]](
182 // CK2: call void @test_parallel
186 // RUN: %clang_cc1 -DCK3 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK3
187 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
188 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK3
189 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK3
190 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK3
191 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
192 // RUN: %clang_cc1 -DCK3 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK3
194 // RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
195 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
196 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
197 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
198 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
199 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
200 // RUN: %clang_cc1 -DCK3 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
206 int t_for(int *v1
, int *v2
, int *v3
, int n
);
207 int t_simd(int *v1
, int *v2
, int *v3
, int n
);
209 #pragma omp declare variant(t_simd) match(construct = {simd})
210 #pragma omp declare variant(t_for) match(construct = {for})
211 int t(int *v1
, int *v2
, int *v3
, int idx
) {
212 return v1
[idx
] * v2
[idx
];
215 int t_for(int *v1
, int *v2
, int *v3
, int idx
) {
216 return v1
[idx
] * v2
[idx
];
219 #pragma omp declare simd
220 int t_simd(int *v1
, int *v2
, int *v3
, int idx
) {
221 return v1
[idx
] * v2
[idx
];
224 // CK3-LABEL: define {{[^@]+}}@test
226 int v1
[N
], v2
[N
], v3
[N
];
229 for (int i
= 0; i
< N
; i
++) {
236 for (int i
= 0; i
< N
; i
++) {
237 v3
[i
] = t(v1
, v2
, v3
, i
);
239 // CK3: call = call i32 @t_simd
243 for (int i
= 0; i
< N
; i
++) {
244 v3
[i
] = t(v1
, v2
, v3
, i
);
246 // CK3: call{{.+}} = call i32 @t_for
251 // RUN: %clang_cc1 -DCK4 -verify -fopenmp -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --check-prefix=CK4
252 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
253 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --check-prefix=CK4
254 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CK4
255 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=CK4
256 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
257 // RUN: %clang_cc1 -DCK4 -fopenmp -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix=CK4
259 // RUN: %clang_cc1 -DCK4 -verify -fopenmp-simd -triple x86_64-unknown-linux -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
260 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -emit-pch -o %t -fopenmp-version=45 %s
261 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -include-pch %t -verify %s -emit-llvm -o - -fopenmp-version=45 | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
262 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
263 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
264 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -emit-pch -o %t %s
265 // RUN: %clang_cc1 -DCK4 -fopenmp-simd -x c -triple x86_64-unknown-linux -fopenmp-targets=amdgcn-amd-amdhsa -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --implicit-check-not="{{__kmpc|__tgt}}"
271 void not_selected_vxv(int *v1
, int *v2
, int *v3
, int n
);
272 void combined_vxv(int *v1
, int *v2
, int *v3
, int n
);
273 void all_vxv(int *v1
, int *v2
, int *v3
, int n
);
275 #pragma omp declare variant(all_vxv) match(construct={target,teams,parallel,for,simd})
276 #pragma omp declare variant(combined_vxv) match(construct={target,teams,parallel,for})
277 #pragma omp declare variant(not_selected_vxv) match(construct={parallel,for})
278 void vxv(int *v1
, int *v2
, int *v3
, int n
) {
279 for (int i
= 0; i
< n
; i
++) v3
[i
] = v1
[i
] * v2
[i
];
282 void not_selected_vxv(int *v1
, int *v2
, int *v3
, int n
) {
283 for (int i
= 0; i
< n
; i
++) v3
[i
] = v1
[i
] * v2
[i
] * 3;
286 #pragma omp declare target
287 void combined_vxv(int *v1
, int *v2
, int *v3
, int n
) {
288 for (int i
= 0; i
< n
; i
++) v3
[i
] = v1
[i
] * v2
[i
] * 2;
290 #pragma omp end declare target
292 #pragma omp declare target
293 void all_vxv(int *v1
, int *v2
, int *v3
, int n
) {
294 for (int i
= 0; i
< n
; i
++) v3
[i
] = v1
[i
] * v2
[i
] * 4;
296 #pragma omp end declare target
298 // CK4-LABEL: define {{[^@]+}}@test
300 int v1
[N
], v2
[N
], v3
[N
];
303 for (int i
= 0; i
< N
; i
++) {
309 #pragma omp target teams map(to: v1[:N],v2[:N]) map(from: v3[:N])
311 #pragma omp parallel for
312 for (int i
= 0; i
< N
; i
++)
315 // CK4: call void @__omp_offloading_[[OFFLOAD_1:.+]]({{.+}})
318 for (int i
= 0; i
< N
; i
++)
320 // CK4: call void @vxv
322 #pragma omp target teams distribute parallel for simd map(from: v3[:N])
323 for (int i
= 0; i
< N
; i
++)
324 for (int i
= 0; i
< N
; i
++)
325 for (int i
= 0; i
< N
; i
++)
327 // CK4: call void @__omp_offloading_[[OFFLOAD_2:.+]]({{.+}})
329 // CK4-DAG: call void @all_vxv
330 // CK4-DAG: call void @combined_vxv