Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / OpenMP / ordered_doacross_codegen.cpp
blob0eca16911578c61a5ff5d3c7a6b9e0ce52c7709a
1 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NORMAL
2 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
3 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NORMAL
5 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=CHECK,CHECK-IRBUILDER
6 // RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
7 // RUN: %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-IRBUILDER
9 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
10 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
11 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
12 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
13 // expected-no-diagnostics
15 #ifndef HEADER
16 #define HEADER
18 // CHECK: [[KMP_DIM:%.+]] = type { i64, i64, i64 }
19 extern int n;
20 int a[10], b[10], c[10], d[10];
21 void foo();
23 // CHECK-LABEL:bar
24 void bar() {
25 int i,j;
26 // CHECK: call void @__kmpc_doacross_init(
27 // CHECK: call void @__kmpc_doacross_fini(
28 #pragma omp parallel for ordered(2)
29 for (i = 0; i < n; ++i)
30 for (j = 0; j < n; ++j)
31 a[i] = b[i] + 1;
32 // CHECK: call void @__kmpc_doacross_init(
33 // CHECK: call void @__kmpc_doacross_fini(
34 #pragma omp for collapse(2) ordered(2)
35 for (int i = 0; i < n; i++)
36 for (int j = 0; j < n; j++)
40 // CHECK-LABEL: @main()
41 int main() {
42 int i;
43 // CHECK: [[DIMS:%.+]] = alloca [1 x [[KMP_DIM]]],
44 // CHECK-NORMAL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(ptr [[IDENT:@.+]])
45 // CHECK: icmp
46 // CHECK-NEXT: br i1 %
47 // CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[DIMS]], i8 0, i64 24, i1 false)
48 // CHECK: [[DIM:%.+]] = getelementptr inbounds [1 x [[KMP_DIM]]], ptr [[DIMS]], i64 0, i64 0
49 // CHECK: getelementptr inbounds [[KMP_DIM]], ptr [[DIM]], i32 0, i32 1
50 // CHECK: store i64 %{{.+}}, ptr %
51 // CHECK: getelementptr inbounds [[KMP_DIM]], ptr [[DIM]], i32 0, i32 2
52 // CHECK: store i64 1, ptr %
53 // CHECK: [[DIM:%.+]] = getelementptr inbounds [1 x [[KMP_DIM]]], ptr [[DIMS]], i64 0, i64 0
54 // CHECK-NORMAL: call void @__kmpc_doacross_init(ptr [[IDENT]], i32 [[GTID]], i32 1, ptr [[DIM]])
55 // CHECK-NORMAL: call void @__kmpc_for_static_init_4(ptr @{{.+}}, i32 [[GTID]], i32 33, ptr %{{.+}}, ptr %{{.+}}, ptr %{{.+}}, ptr %{{.+}}, i32 1, i32 1)
56 #pragma omp for ordered(1)
57 for (int i = 0; i < n; ++i) {
58 a[i] = b[i] + 1;
59 foo();
60 // CHECK: invoke void [[FOO:.+]](
61 // CHECK: load i32, ptr [[I:%.+]],
62 // CHECK-NEXT: sub nsw i32 %{{.+}}, 0
63 // CHECK-NEXT: sdiv i32 %{{.+}}, 1
64 // CHECK-NEXT: sext i32 %{{.+}} to i64
65 // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], ptr [[CNT:%.+]], i64 0, i64 0
66 // CHECK-NEXT: store i64 %{{.+}}, ptr [[TMP]],
67 // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], ptr [[CNT]], i64 0, i64 0
68 // CHECK-NORMAL-NEXT: call void @__kmpc_doacross_post(ptr [[IDENT]], i32 [[GTID]], ptr [[TMP]])
69 // CHECK-IRBUILDER-NEXT: [[GTID18:%.+]] = call i32 @__kmpc_global_thread_num(ptr [[IDENT:@.+]])
70 // CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_post(ptr [[IDENT]], i32 [[GTID18]], ptr [[TMP]])
71 #pragma omp ordered depend(source)
72 c[i] = c[i] + 1;
73 foo();
74 // CHECK: invoke void [[FOO]]
75 // CHECK: load i32, ptr [[I]],
76 // CHECK-NEXT: sub nsw i32 %{{.+}}, 2
77 // CHECK-NEXT: sub nsw i32 %{{.+}}, 0
78 // CHECK-NEXT: sdiv i32 %{{.+}}, 1
79 // CHECK-NEXT: sext i32 %{{.+}} to i64
80 // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], ptr [[CNT:%.+]], i64 0, i64 0
81 // CHECK-NEXT: store i64 %{{.+}}, ptr [[TMP]],
82 // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], ptr [[CNT]], i64 0, i64 0
83 // CHECK-NORMAL-NEXT: call void @__kmpc_doacross_wait(ptr [[IDENT]], i32 [[GTID]], ptr [[TMP]])
84 // CHECK-IRBUILDER-NEXT: [[GTID30:%.+]] = call i32 @__kmpc_global_thread_num(ptr [[IDENT:@.+]])
85 // CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_wait(ptr [[IDENT]], i32 [[GTID30]], ptr [[TMP]])
86 #pragma omp ordered depend(sink : i - 2)
87 d[i] = a[i - 2];
90 // CHECK: call void @__kmpc_for_static_fini(
91 // CHECK-NORMAL: call void @__kmpc_doacross_fini(ptr [[IDENT]], i32 [[GTID]])
92 // CHECK: ret i32 0
93 return 0;
96 // CHECK-LABEL: main1
97 int main1() {
98 // CHECK: [[DIMS:%.+]] = alloca [1 x [[KMP_DIM]]],
99 // CHECK-NORMAL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(ptr [[IDENT:@.+]])
100 // CHECK: icmp
101 // CHECK-NEXT: br i1 %
102 // CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[DIMS]], i8 0, i64 24, i1 false)
103 // CHECK: [[DIM:%.+]] = getelementptr inbounds [1 x [[KMP_DIM]]], ptr [[DIMS]], i64 0, i64 0
104 // CHECK: getelementptr inbounds [[KMP_DIM]], ptr [[DIM]], i32 0, i32 1
105 // CHECK: store i64 %{{.+}}, ptr %
106 // CHECK: getelementptr inbounds [[KMP_DIM]], ptr [[DIM]], i32 0, i32 2
107 // CHECK: store i64 1, ptr %
108 // CHECK: [[DIM:%.+]] = getelementptr inbounds [1 x [[KMP_DIM]]], ptr [[DIMS]], i64 0, i64 0
109 // CHECK-NORMAL: call void @__kmpc_doacross_init(ptr [[IDENT]], i32 [[GTID]], i32 1, ptr [[DIM]])
110 // CHECK-NORMAL: call void @__kmpc_for_static_init_4(ptr @{{.+}}, i32 [[GTID]], i32 33, ptr %{{.+}}, ptr %{{.+}}, ptr %{{.+}}, ptr %{{.+}}, i32 1, i32 1)
111 #pragma omp for ordered(1)
112 for (int i = n; i > 0; --i) {
113 a[i] = b[i] + 1;
114 foo();
115 // CHECK: invoke void [[FOO:.+]](
116 // CHECK: [[UB_VAL:%.+]] = load i32, ptr [[UB:%.+]],
117 // CHECK-NEXT: [[I_VAL:%.+]] = load i32, ptr [[I:%.+]],
118 // CHECK-NEXT: sub i32 [[UB_VAL]], [[I_VAL]]
119 // CHECK-NEXT: udiv i32 %{{.+}}, 1
120 // CHECK-NEXT: zext i32 %{{.+}} to i64
121 // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], ptr [[CNT:%.+]], i64 0, i64 0
122 // CHECK-NEXT: store i64 %{{.+}}, ptr [[TMP]],
123 // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], ptr [[CNT]], i64 0, i64 0
124 // CHECK-NORMAL-NEXT: call void @__kmpc_doacross_post(ptr [[IDENT]], i32 [[GTID]], ptr [[TMP]])
125 // CHECK-IRBUILDER-NEXT: [[GTID17:%.+]] = call i32 @__kmpc_global_thread_num(ptr [[IDENT:@.+]])
126 // CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_post(ptr [[IDENT]], i32 [[GTID17]], ptr [[TMP]])
127 #pragma omp ordered depend(source)
128 c[i] = c[i] + 1;
129 foo();
130 // CHECK: invoke void [[FOO]]
131 // CHECK: [[UB_VAL:%.+]] = load i32, ptr [[UB]],
132 // CHECK-NEXT: [[I_VAL:%.+]] = load i32, ptr [[I]],
133 // CHECK-NEXT: [[SUB:%.+]] = sub nsw i32 [[I_VAL]], 2
134 // CHECK-NEXT: sub i32 [[UB_VAL]], [[SUB]]
135 // CHECK-NEXT: udiv i32 %{{.+}}, 1
136 // CHECK-NEXT: zext i32 %{{.+}} to i64
137 // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], ptr [[CNT:%.+]], i64 0, i64 0
138 // CHECK-NEXT: store i64 %{{.+}}, ptr [[TMP]],
139 // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [1 x i64], ptr [[CNT]], i64 0, i64 0
140 // CHECK-NORMAL-NEXT: call void @__kmpc_doacross_wait(ptr [[IDENT]], i32 [[GTID]], ptr [[TMP]])
141 // CHECK-IRBUILDER-NEXT: [[GTID29:%.+]] = call i32 @__kmpc_global_thread_num(ptr [[IDENT:@.+]])
142 // CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_wait(ptr [[IDENT]], i32 [[GTID29]], ptr [[TMP]])
143 #pragma omp ordered depend(sink : i - 2)
144 d[i] = a[i - 2];
146 // CHECK: call void @__kmpc_for_static_fini(
147 // CHECK-NORMAL: call void @__kmpc_doacross_fini(ptr [[IDENT]], i32 [[GTID]])
148 // CHECK: ret i32 0
149 return 0;
152 // CHECK: define {{.+}}TestStruct
153 template <typename T>
154 struct TestStruct {
155 static const int M = 10;
156 static const int N = 20;
157 T i;
158 T a[N][M];
159 T b[N][M];
160 T foo(T, T);
161 T bar(T, T, T);
162 void baz(T, T);
163 TestStruct() {
164 // CHECK: [[DIMS:%.+]] = alloca [2 x [[KMP_DIM]]],
165 // CHECK-NORMAL: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(ptr [[IDENT:@.+]])
166 // CHECK: call void @llvm.memset.p0.i64(ptr align 8 [[DIMS]], i8 0, i64 48, i1 false)
167 // CHECK: [[DIM:%.+]] = getelementptr inbounds [2 x [[KMP_DIM]]], ptr [[DIMS]], i64 0, i64 0
168 // CHECK: getelementptr inbounds [[KMP_DIM]], ptr [[DIM]], i32 0, i32 1
169 // CHECK: store i64 10, ptr %
170 // CHECK: getelementptr inbounds [[KMP_DIM]], ptr [[DIM]], i32 0, i32 2
171 // CHECK: store i64 1, ptr %
172 // CHECK: [[DIM:%.+]] = getelementptr inbounds [2 x [[KMP_DIM]]], ptr [[DIMS]], i64 0, i64 1
173 // CHECK: getelementptr inbounds [[KMP_DIM]], ptr [[DIM]], i32 0, i32 1
174 // CHECK: store i64 %{{.+}}, ptr %
175 // CHECK: getelementptr inbounds [[KMP_DIM]], ptr [[DIM]], i32 0, i32 2
176 // CHECK: store i64 1, ptr %
177 // CHECK: [[DIM:%.+]] = getelementptr inbounds [2 x [[KMP_DIM]]], ptr [[DIMS]], i64 0, i64 0
178 // CHECK-NORMAL: call void @__kmpc_doacross_init(ptr [[IDENT]], i32 [[GTID]], i32 2, ptr [[DIM]])
179 // CHECK-NORMAL: call void @__kmpc_for_static_init_4(ptr @{{.+}}, i32 [[GTID]], i32 33, ptr %{{.+}}, ptr %{{.+}}, ptr %{{.+}}, ptr %{{.+}}, i32 1, i32 1)
180 #pragma omp for ordered(2)
181 for (T j = 0; j < M; j++)
182 for (i = 0; i < n; i += 2) {
183 a[i][j] = foo(i, j);
184 // CHECK: invoke {{.+TestStruct.+foo}}
185 // CHECK: load ptr, ptr %
186 // CHECK: load i32, ptr %
187 // CHECK: load i32, ptr %
188 // CHECK: load i32, ptr [[J:%.+]],
189 // CHECK-NEXT: sub nsw i32 %{{.+}}, 0
190 // CHECK-NEXT: sdiv i32 %{{.+}}, 1
191 // CHECK-NEXT: sext i32 %{{.+}} to i64
192 // CHECK-NORMAL-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], ptr [[CNT:%.+]], i64 0, i64 0
193 // CHECK-NORMAL-NEXT: store i64 %{{.+}}, ptr [[TMP:%.+]],
194 // CHECK-NEXT: [[I:%.+]] = load ptr, ptr [[I_REF:%.+]],
195 // CHECK-NEXT: load i32, ptr [[I]],
196 // CHECK-NEXT: sub nsw i32 %{{.+}}, 2
197 // CHECK-NEXT: sub nsw i32 %{{.+}}, 0
198 // CHECK-NEXT: sdiv i32 %{{.+}}, 2
199 // CHECK-NEXT: sext i32 %{{.+}} to i64
200 // CHECK-IRBUILDER-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], ptr [[CNT:%.+]], i64 0, i64 0
201 // CHECK-IRBUILDER-NEXT: store i64 %{{.+}}, ptr [[TMP:%.+]],
202 // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], ptr [[CNT]], i64 0, i64 1
203 // CHECK-NEXT: store i64 %{{.+}}, ptr [[TMP]],
204 // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], ptr [[CNT]], i64 0, i64 0
205 // CHECK-NORMAL-NEXT: call void @__kmpc_doacross_wait(ptr [[IDENT]], i32 [[GTID]], ptr [[TMP]])
206 // CHECK-IRBUILDER-NEXT: [[GTID18:%.+]] = call i32 @__kmpc_global_thread_num(ptr [[IDENT:@.+]])
207 // CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_wait(ptr [[IDENT]], i32 [[GTID18]], ptr [[TMP]])
208 // CHECK-NEXT: load i32, ptr [[J:%.+]],
209 // CHECK-NEXT: sub nsw i32 %{{.+}}, 1
210 // CHECK-NEXT: sub nsw i32 %{{.+}}, 0
211 // CHECK-NEXT: sdiv i32 %{{.+}}, 1
212 // CHECK-NEXT: sext i32 %{{.+}} to i64
213 // CHECK-NORMAL-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], ptr [[CNT:%.+]], i64 0, i64 0
214 // CHECK-NORMAL-NEXT: store i64 %{{.+}}, ptr [[TMP]],
215 // CHECK-NEXT: [[I:%.+]] = load ptr, ptr [[I_REF]],
216 // CHECK-NEXT: load i32, ptr [[I]],
217 // CHECK-NEXT: sub nsw i32 %{{.+}}, 0
218 // CHECK-NEXT: sdiv i32 %{{.+}}, 2
219 // CHECK-NEXT: sext i32 %{{.+}} to i64
220 // CHECK-IRBUILDER-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], ptr [[CNT:%.+]], i64 0, i64 0
221 // CHECK-IRBUILDER-NEXT: store i64 %{{.+}}, ptr [[TMP]],
222 // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], ptr [[CNT]], i64 0, i64 1
223 // CHECK-NEXT: store i64 %{{.+}}, ptr [[TMP]],
224 // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], ptr [[CNT]], i64 0, i64 0
225 // CHECK-NORMAL-NEXT: call void @__kmpc_doacross_wait(ptr [[IDENT]], i32 [[GTID]], ptr [[TMP]])
226 // CHECK-IRBUILDER-NEXT: [[GTID27:%.+]] = call i32 @__kmpc_global_thread_num(ptr [[IDENT:@.+]])
227 // CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_wait(ptr [[IDENT]], i32 [[GTID27]], ptr [[TMP]])
228 #pragma omp ordered depend(sink : j, i - 2) depend(sink : j - 1, i)
229 b[i][j] = bar(a[i][j], b[i - 1][j], b[i][j - 1]);
230 // CHECK: invoke {{.+TestStruct.+bar}}
231 // CHECK: load ptr, ptr %
232 // CHECK: load i32, ptr %
233 // CHECK: load i32, ptr %
234 // CHECK: load i32, ptr [[J]],
235 // CHECK-NEXT: sub nsw i32 %{{.+}}, 0
236 // CHECK-NEXT: sdiv i32 %{{.+}}, 1
237 // CHECK-NEXT: sext i32 %{{.+}} to i64
238 // CHECK-NORMAL-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], ptr [[CNT:%.+]], i64 0, i64 0
239 // CHECK-NORMAL-NEXT: store i64 %{{.+}}, ptr [[TMP]],
240 // CHECK-NEXT: [[I:%.+]] = load ptr, ptr [[I_REF]],
241 // CHECK-NEXT: load i32, ptr [[I]],
242 // CHECK-NEXT: sub nsw i32 %{{.+}}, 0
243 // CHECK-NEXT: sdiv i32 %{{.+}}, 2
244 // CHECK-NEXT: sext i32 %{{.+}} to i64
245 // CHECK-IRBUILDER-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], ptr [[CNT:%.+]], i64 0, i64 0
246 // CHECK-IRBUILDER-NEXT: store i64 %{{.+}}, ptr [[TMP]],
247 // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], ptr [[CNT]], i64 0, i64 1
248 // CHECK-NEXT: store i64 %{{.+}}, ptr [[TMP]],
249 // CHECK-NEXT: [[TMP:%.+]] = getelementptr inbounds [2 x i64], ptr [[CNT]], i64 0, i64 0
250 // CHECK-NORMAL-NEXT: call void @__kmpc_doacross_post(ptr [[IDENT]], i32 [[GTID]], ptr [[TMP]])
251 // CHECK-IRBUILDER-NEXT: [[GTID58:%.+]] = call i32 @__kmpc_global_thread_num(ptr [[IDENT:@.+]])
252 // CHECK-IRBUILDER-NEXT: call void @__kmpc_doacross_post(ptr [[IDENT]], i32 [[GTID58]], ptr [[TMP]])
253 #pragma omp ordered depend(source)
254 baz(a[i][j], b[i][j]);
257 // CHECK: call void @__kmpc_for_static_fini(
258 // CHECK-NORMAL: call void @__kmpc_doacross_fini(ptr [[IDENT]], i32 [[GTID]])
259 // CHECK: ret
262 TestStruct<int> s;
263 #endif // HEADER