Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / OpenMP / parallel_for_simd_scan_codegen.cpp
blob7e973a602a65cca7f270a6e342e162dba93b59b6
1 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
2 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
3 // RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
5 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
6 // RUN: %clang_cc1 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -emit-pch -o %t %s
7 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
8 // SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
9 // expected-no-diagnostics
10 #ifndef HEADER
11 #define HEADER
13 void foo();
14 void bar();
16 // CHECK: define{{.*}} void @{{.*}}baz{{.*}}(i32 noundef %n)
17 void baz(int n) {
18 static float a[10];
19 static double b;
21 // CHECK: call ptr @llvm.stacksave.p0()
22 // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
24 // float a_buffer[10][n];
25 // CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]],
26 // CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
28 // CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(
30 // CHECK: [[A_BUF_SIZE:%.+]] = mul nuw i64 10, [[NUM_ELEMS:%[^,]+]]
32 // float a_buffer[10][n];
33 // CHECK: [[A_BUF:%.+]] = alloca float, i64 [[A_BUF_SIZE]],
35 // double b_buffer[10];
36 // CHECK: [[B_BUF:%.+]] = alloca double, i64 10,
37 // CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(
38 // CHECK: call void @llvm.stackrestore.p0(ptr
40 #pragma omp parallel for simd reduction(inscan, +:a[:n], b)
41 for (int i = 0; i < 10; ++i) {
42 // CHECK: call void @__kmpc_for_static_init_4(
43 // CHECK: call ptr @llvm.stacksave.p0()
44 // CHECK: store float 0.000000e+00, ptr %
45 // CHECK: store double 0.000000e+00, ptr [[B_PRIV_ADDR:%.+]],
46 // CHECK: br label %[[DISPATCH:[^,]+]]
47 // CHECK: [[INPUT_PHASE:.+]]:
48 // CHECK: call void @{{.+}}foo{{.+}}()
50 // a_buffer[i][0..n] = a_priv[[0..n];
51 // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
52 // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
53 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]]
54 // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF:%.+]], i64 [[IDX]]
55 // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
56 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
57 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false)
59 // b_buffer[i] = b_priv;
60 // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]]
61 // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]],
62 // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]],
63 // CHECK: br label %[[LOOP_CONTINUE:.+]]
65 // CHECK: [[DISPATCH]]:
66 // CHECK: br label %[[INPUT_PHASE]]
67 // CHECK: [[LOOP_CONTINUE]]:
68 // CHECK: call void @llvm.stackrestore.p0(ptr %
69 // CHECK: call void @__kmpc_for_static_fini(
70 // CHECK: call void @__kmpc_barrier(
71 foo();
72 #pragma omp scan inclusive(a[:n], b)
73 // CHECK: [[LOG2_10:%.+]] = call double @llvm.log2.f64(double 1.000000e+01)
74 // CHECK: [[CEIL_LOG2_10:%.+]] = call double @llvm.ceil.f64(double [[LOG2_10]])
75 // CHECK: [[CEIL_LOG2_10_INT:%.+]] = fptoui double [[CEIL_LOG2_10]] to i32
76 // CHECK: br label %[[OUTER_BODY:[^,]+]]
77 // CHECK: [[OUTER_BODY]]:
78 // CHECK: [[K:%.+]] = phi i32 [ 0, %{{.+}} ], [ [[K_NEXT:%.+]], %{{.+}} ]
79 // CHECK: [[K2POW:%.+]] = phi i64 [ 1, %{{.+}} ], [ [[K2POW_NEXT:%.+]], %{{.+}} ]
80 // CHECK: [[CMP:%.+]] = icmp uge i64 9, [[K2POW]]
81 // CHECK: br i1 [[CMP]], label %[[INNER_BODY:[^,]+]], label %[[INNER_EXIT:[^,]+]]
82 // CHECK: [[INNER_BODY]]:
83 // CHECK: [[I:%.+]] = phi i64 [ 9, %[[OUTER_BODY]] ], [ [[I_PREV:%.+]], %{{.+}} ]
85 // a_buffer[i] += a_buffer[i-pow(2, k)];
86 // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
87 // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
88 // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
89 // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
90 // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
91 // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]]
92 // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
93 // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
94 // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
95 // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]]
96 // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
97 // CHECK: [[RED_BODY]]:
98 // CHECK: [[A_BUF_IDX_SUB_K2POW_ELEM:%.+]] = phi ptr [ [[A_BUF_IDX_SUB_K2POW]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_SUB_K2POW_NEXT:%.+]], %[[RED_BODY]] ]
99 // CHECK: [[A_BUF_IDX_ELEM:%.+]] = phi ptr [ [[A_BUF_IDX]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_NEXT:%.+]], %[[RED_BODY]] ]
100 // CHECK: [[A_BUF_IDX_VAL:%.+]] = load float, ptr [[A_BUF_IDX_ELEM]],
101 // CHECK: [[A_BUF_IDX_SUB_K2POW_VAL:%.+]] = load float, ptr [[A_BUF_IDX_SUB_K2POW_ELEM]],
102 // CHECK: [[RED:%.+]] = fadd float [[A_BUF_IDX_VAL]], [[A_BUF_IDX_SUB_K2POW_VAL]]
103 // CHECK: store float [[RED]], ptr [[A_BUF_IDX_ELEM]],
104 // CHECK: [[A_BUF_IDX_NEXT]] = getelementptr float, ptr [[A_BUF_IDX_ELEM]], i32 1
105 // CHECK: [[A_BUF_IDX_SUB_K2POW_NEXT]] = getelementptr float, ptr [[A_BUF_IDX_SUB_K2POW_ELEM]], i32 1
106 // CHECK: [[DONE:%.+]] = icmp eq ptr [[A_BUF_IDX_NEXT]], [[A_BUF_END]]
107 // CHECK: br i1 [[DONE]], label %[[RED_DONE]], label %[[RED_BODY]]
108 // CHECK: [[RED_DONE]]:
110 // b_buffer[i] += b_buffer[i-pow(2, k)];
111 // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]],
112 // CHECK: [[B_BUF_IDX_SUB_K2POW_VAL:%.+]] = load double, ptr [[B_BUF_IDX_SUB_K2POW]],
113 // CHECK: [[RED:%.+]] = fadd double [[B_BUF_IDX_VAL]], [[B_BUF_IDX_SUB_K2POW_VAL]]
114 // CHECK: store double [[RED]], ptr [[B_BUF_IDX]],
116 // --i;
117 // CHECK: [[I_PREV:%.+]] = sub nuw i64 [[I]], 1
118 // CHECK: [[CMP:%.+]] = icmp uge i64 [[I_PREV]], [[K2POW]]
119 // CHECK: br i1 [[CMP]], label %[[INNER_BODY]], label %[[INNER_EXIT]]
120 // CHECK: [[INNER_EXIT]]:
122 // ++k;
123 // CHECK: [[K_NEXT]] = add nuw i32 [[K]], 1
124 // k2pow <<= 1;
125 // CHECK: [[K2POW_NEXT]] = shl nuw i64 [[K2POW]], 1
126 // CHECK: [[CMP:%.+]] = icmp ne i32 [[K_NEXT]], [[CEIL_LOG2_10_INT]]
127 // CHECK: br i1 [[CMP]], label %[[OUTER_BODY]], label %[[OUTER_EXIT:[^,]+]]
128 // CHECK: [[OUTER_EXIT]]:
129 bar();
130 // CHECK: call void @__kmpc_for_static_init_4(
131 // CHECK: call ptr @llvm.stacksave.p0()
132 // CHECK: store float 0.000000e+00, ptr %
133 // CHECK: store double 0.000000e+00, ptr [[B_PRIV_ADDR:%.+]],
134 // CHECK: br label %[[DISPATCH:[^,]+]]
136 // Skip the before scan body.
137 // CHECK: call void @{{.+}}foo{{.+}}()
139 // CHECK: [[EXIT_INSCAN:[^,]+]]:
140 // CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
142 // CHECK: [[DISPATCH]]:
143 // a_priv[[0..n] = a_buffer[i][0..n];
144 // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
145 // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
146 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS]]
147 // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
148 // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
149 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
150 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false)
152 // b_priv = b_buffer[i];
153 // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX]]
154 // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]],
155 // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]],
156 // CHECK: br label %[[SCAN_PHASE:[^,]+]]
158 // CHECK: [[SCAN_PHASE]]:
159 // CHECK: call void @{{.+}}bar{{.+}}()
160 // CHECK: br label %[[EXIT_INSCAN]]
162 // CHECK: [[LOOP_CONTINUE]]:
163 // CHECK: call void @llvm.stackrestore.p0(ptr %
164 // CHECK: call void @__kmpc_for_static_fini(
167 #pragma omp parallel for simd reduction(inscan, +:a[:n], b)
168 for (int i = 0; i < 10; ++i) {
169 // CHECK: call void @__kmpc_for_static_init_4(
170 // CHECK: call ptr @llvm.stacksave.p0()
171 // CHECK: store float 0.000000e+00, ptr %
172 // CHECK: store double 0.000000e+00, ptr [[B_PRIV_ADDR:%.+]],
173 // CHECK: br label %[[DISPATCH:[^,]+]]
175 // Skip the before scan body.
176 // CHECK: call void @{{.+}}foo{{.+}}()
178 // CHECK: [[EXIT_INSCAN:[^,]+]]:
180 // a_buffer[i][0..n] = a_priv[[0..n];
181 // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
182 // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
183 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX]], [[NUM_ELEMS:%.+]]
184 // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF:%.+]], i64 [[IDX]]
185 // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
186 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
187 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_BUF_IDX]], ptr {{.*}}[[A_PRIV]], i64 [[BYTES]], i1 false)
189 // b_buffer[i] = b_priv;
190 // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF:%.+]], i64 [[BASE_IDX]]
191 // CHECK: [[B_PRIV:%.+]] = load double, ptr [[B_PRIV_ADDR]],
192 // CHECK: store double [[B_PRIV]], ptr [[B_BUF_IDX]],
193 // CHECK: br label %[[LOOP_CONTINUE:[^,]+]]
195 // CHECK: [[DISPATCH]]:
196 // CHECK: br label %[[INPUT_PHASE:[^,]+]]
198 // CHECK: [[INPUT_PHASE]]:
199 // CHECK: call void @{{.+}}bar{{.+}}()
200 // CHECK: br label %[[EXIT_INSCAN]]
202 // CHECK: [[LOOP_CONTINUE]]:
203 // CHECK: call void @llvm.stackrestore.p0(ptr %
204 // CHECK: call void @__kmpc_for_static_fini(
205 // CHECK: call void @__kmpc_barrier(
206 foo();
207 #pragma omp scan exclusive(a[:n], b)
208 // CHECK: [[LOG2_10:%.+]] = call double @llvm.log2.f64(double 1.000000e+01)
209 // CHECK: [[CEIL_LOG2_10:%.+]] = call double @llvm.ceil.f64(double [[LOG2_10]])
210 // CHECK: [[CEIL_LOG2_10_INT:%.+]] = fptoui double [[CEIL_LOG2_10]] to i32
211 // CHECK: br label %[[OUTER_BODY:[^,]+]]
212 // CHECK: [[OUTER_BODY]]:
213 // CHECK: [[K:%.+]] = phi i32 [ 0, %{{.+}} ], [ [[K_NEXT:%.+]], %{{.+}} ]
214 // CHECK: [[K2POW:%.+]] = phi i64 [ 1, %{{.+}} ], [ [[K2POW_NEXT:%.+]], %{{.+}} ]
215 // CHECK: [[CMP:%.+]] = icmp uge i64 9, [[K2POW]]
216 // CHECK: br i1 [[CMP]], label %[[INNER_BODY:[^,]+]], label %[[INNER_EXIT:[^,]+]]
217 // CHECK: [[INNER_BODY]]:
218 // CHECK: [[I:%.+]] = phi i64 [ 9, %[[OUTER_BODY]] ], [ [[I_PREV:%.+]], %{{.+}} ]
220 // a_buffer[i] += a_buffer[i-pow(2, k)];
221 // CHECK: [[IDX:%.+]] = mul nsw i64 [[I]], [[NUM_ELEMS]]
222 // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
223 // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
224 // CHECK: [[IDX:%.+]] = mul nsw i64 [[IDX_SUB_K2POW]], [[NUM_ELEMS]]
225 // CHECK: [[A_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
226 // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[I]]
227 // CHECK: [[IDX_SUB_K2POW:%.+]] = sub nuw i64 [[I]], [[K2POW]]
228 // CHECK: [[B_BUF_IDX_SUB_K2POW:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[IDX_SUB_K2POW]]
229 // CHECK: [[A_BUF_END:%.+]] = getelementptr float, ptr [[A_BUF_IDX]], i64 [[NUM_ELEMS]]
230 // CHECK: [[ISEMPTY:%.+]] = icmp eq ptr [[A_BUF_IDX]], [[A_BUF_END]]
231 // CHECK: br i1 [[ISEMPTY]], label %[[RED_DONE:[^,]+]], label %[[RED_BODY:[^,]+]]
232 // CHECK: [[RED_BODY]]:
233 // CHECK: [[A_BUF_IDX_SUB_K2POW_ELEM:%.+]] = phi ptr [ [[A_BUF_IDX_SUB_K2POW]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_SUB_K2POW_NEXT:%.+]], %[[RED_BODY]] ]
234 // CHECK: [[A_BUF_IDX_ELEM:%.+]] = phi ptr [ [[A_BUF_IDX]], %[[INNER_BODY]] ], [ [[A_BUF_IDX_NEXT:%.+]], %[[RED_BODY]] ]
235 // CHECK: [[A_BUF_IDX_VAL:%.+]] = load float, ptr [[A_BUF_IDX_ELEM]],
236 // CHECK: [[A_BUF_IDX_SUB_K2POW_VAL:%.+]] = load float, ptr [[A_BUF_IDX_SUB_K2POW_ELEM]],
237 // CHECK: [[RED:%.+]] = fadd float [[A_BUF_IDX_VAL]], [[A_BUF_IDX_SUB_K2POW_VAL]]
238 // CHECK: store float [[RED]], ptr [[A_BUF_IDX_ELEM]],
239 // CHECK: [[A_BUF_IDX_NEXT]] = getelementptr float, ptr [[A_BUF_IDX_ELEM]], i32 1
240 // CHECK: [[A_BUF_IDX_SUB_K2POW_NEXT]] = getelementptr float, ptr [[A_BUF_IDX_SUB_K2POW_ELEM]], i32 1
241 // CHECK: [[DONE:%.+]] = icmp eq ptr [[A_BUF_IDX_NEXT]], [[A_BUF_END]]
242 // CHECK: br i1 [[DONE]], label %[[RED_DONE]], label %[[RED_BODY]]
243 // CHECK: [[RED_DONE]]:
245 // b_buffer[i] += b_buffer[i-pow(2, k)];
246 // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]],
247 // CHECK: [[B_BUF_IDX_SUB_K2POW_VAL:%.+]] = load double, ptr [[B_BUF_IDX_SUB_K2POW]],
248 // CHECK: [[RED:%.+]] = fadd double [[B_BUF_IDX_VAL]], [[B_BUF_IDX_SUB_K2POW_VAL]]
249 // CHECK: store double [[RED]], ptr [[B_BUF_IDX]],
251 // --i;
252 // CHECK: [[I_PREV:%.+]] = sub nuw i64 [[I]], 1
253 // CHECK: [[CMP:%.+]] = icmp uge i64 [[I_PREV]], [[K2POW]]
254 // CHECK: br i1 [[CMP]], label %[[INNER_BODY]], label %[[INNER_EXIT]]
255 // CHECK: [[INNER_EXIT]]:
257 // ++k;
258 // CHECK: [[K_NEXT]] = add nuw i32 [[K]], 1
259 // k2pow <<= 1;
260 // CHECK: [[K2POW_NEXT]] = shl nuw i64 [[K2POW]], 1
261 // CHECK: [[CMP:%.+]] = icmp ne i32 [[K_NEXT]], [[CEIL_LOG2_10_INT]]
262 // CHECK: br i1 [[CMP]], label %[[OUTER_BODY]], label %[[OUTER_EXIT:[^,]+]]
263 // CHECK: [[OUTER_EXIT]]:
264 bar();
265 // CHECK: call void @__kmpc_for_static_init_4(
266 // CHECK: call ptr @llvm.stacksave.p0()
267 // CHECK: store float 0.000000e+00, ptr %
268 // CHECK: store double 0.000000e+00, ptr [[B_PRIV_ADDR:%.+]],
269 // CHECK: br label %[[DISPATCH:[^,]+]]
271 // CHECK: [[SCAN_PHASE:.+]]:
272 // CHECK: call void @{{.+}}foo{{.+}}()
273 // CHECK: br label %[[LOOP_CONTINUE:.+]]
275 // CHECK: [[DISPATCH]]:
276 // if (i >0)
277 // a_priv[[0..n] = a_buffer[i-1][0..n];
278 // CHECK: [[BASE_IDX_I:%.+]] = load i32, ptr [[IV_ADDR:%.+]],
279 // CHECK: [[BASE_IDX:%.+]] = zext i32 [[BASE_IDX_I]] to i64
280 // CHECK: [[CMP:%.+]] = icmp eq i64 [[BASE_IDX]], 0
281 // CHECK: br i1 [[CMP]], label %[[IF_DONE:[^,]+]], label %[[IF_THEN:[^,]+]]
282 // CHECK: [[IF_THEN]]:
283 // CHECK: [[BASE_IDX_SUB_1:%.+]] = sub nuw i64 [[BASE_IDX]], 1
284 // CHECK: [[IDX:%.+]] = mul nsw i64 [[BASE_IDX_SUB_1]], [[NUM_ELEMS]]
285 // CHECK: [[A_BUF_IDX:%.+]] = getelementptr inbounds float, ptr [[A_BUF]], i64 [[IDX]]
286 // CHECK: [[A_PRIV:%.+]] = getelementptr inbounds [10 x float], ptr [[A_PRIV_ADDR:%.+]], i64 0, i64 0
287 // CHECK: [[BYTES:%.+]] = mul nuw i64 [[NUM_ELEMS:%.+]], 4
288 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}[[A_PRIV]], ptr {{.*}}[[A_BUF_IDX]], i64 [[BYTES]], i1 false)
290 // b_priv = b_buffer[i];
291 // CHECK: [[B_BUF_IDX:%.+]] = getelementptr inbounds double, ptr [[B_BUF]], i64 [[BASE_IDX_SUB_1]]
292 // CHECK: [[B_BUF_IDX_VAL:%.+]] = load double, ptr [[B_BUF_IDX]],
293 // CHECK: store double [[B_BUF_IDX_VAL]], ptr [[B_PRIV_ADDR]],
294 // CHECK: br label %[[SCAN_PHASE]]
296 // CHECK: [[LOOP_CONTINUE]]:
297 // CHECK: call void @llvm.stackrestore.p0(ptr %
298 // CHECK: call void @__kmpc_for_static_fini(
302 // CHECK: !{!"llvm.loop.vectorize.enable", i1 true}
304 #endif