1 // RUN: %libomp-compile && env LIBOMP_USE_HIDDEN_HELPER_TASK=0 LIBOMP_NUM_HIDDEN_HELPER_THREADS=0 %libomp-run
3 Test for the 'schedule(simd:guided)' clause.
4 Compiler needs to generate a dynamic dispatching and pass the schedule
5 value 46 to the OpenMP RTL. Test uses numerous loop parameter combinations.
11 #if defined(WIN32) || defined(_WIN32)
13 #define delay() Sleep(1);
16 #define delay() usleep(10);
19 // uncomment for debug diagnostics:
24 // ---------------------------------------------------------------------------
25 // Various definitions copied from OpenMP RTL
27 kmp_sch_static_balanced_chunked
= 45,
28 kmp_sch_guided_simd
= 46,
29 kmp_sch_runtime_simd
= 47,
32 typedef long long i64
;
33 typedef unsigned long long u64
;
42 extern int __kmpc_global_thread_num(id
*);
43 extern void __kmpc_barrier(id
*, int gtid
);
44 extern void __kmpc_dispatch_init_4(id
*, int, enum sched
, int, int, int, int);
45 extern void __kmpc_dispatch_init_8(id
*, int, enum sched
, i64
, i64
, i64
, i64
);
46 extern int __kmpc_dispatch_next_4(id
*, int, void*, void*, void*, void*);
47 extern int __kmpc_dispatch_next_8(id
*, int, void*, void*, void*, void*);
48 // End of definitions copied from OpenMP RTL.
49 // ---------------------------------------------------------------------------
50 static id loc
= {0, 2, 0, 0, ";file;func;0;0;;"};
51 // This variable is defined in OpenMP RTL but we can't have it exposed so we
52 // need to redefine it here.
53 static int __kmp_hidden_helper_threads_num
= 0;
55 // ---------------------------------------------------------------------------
56 int run_loop_64(i64 loop_lb
, i64 loop_ub
, i64 loop_st
, int loop_chunk
) {
58 static int volatile loop_sync
= 0;
59 i64 lb
; // Chunk lower bound
60 i64 ub
; // Chunk upper bound
61 i64 st
; // Chunk stride
63 int tid
= omp_get_thread_num();
66 gtid
+= __kmp_hidden_helper_threads_num
;
70 printf("run_loop_<%d>(lb=%d, ub=%d, st=%d, ch=%d)\n",
71 (int)sizeof(i64
), gtid
, tid
,
72 (int)loop_lb
, (int)loop_ub
, (int)loop_st
, loop_chunk
);
74 // Don't test degenerate cases that should have been discovered by codegen
77 if (loop_st
> 0 ? loop_lb
> loop_ub
: loop_lb
< loop_ub
)
80 __kmpc_dispatch_init_8(&loc
, gtid
, kmp_sch_guided_simd
,
81 loop_lb
, loop_ub
, loop_st
, loop_chunk
);
83 // Let the master thread handle the chunks alone
84 int chunk
; // No of current chunk
85 i64 next_lb
; // Lower bound of the next chunk
86 i64 last_ub
; // Upper bound of the last processed chunk
87 u64 cur
; // Number of interations in current chunk
88 u64 max
; // Max allowed iterations for current chunk
93 max
= (loop_ub
- loop_lb
) / loop_st
+ 1;
94 // The first chunk can consume all iterations
95 while (__kmpc_dispatch_next_8(&loc
, gtid
, &last
, &lb
, &ub
, &st
)) {
98 printf("chunk=%d, lb=%d, ub=%d\n", chunk
, (int)lb
, (int)ub
);
100 // Check if previous chunk (it is not the final chunk) is undersized
102 printf("Error with chunk %d\n", chunk
);
105 // Check lower and upper bounds
107 printf("Error with lb %d, %d, ch %d\n", (int)lb
, (int)next_lb
, chunk
);
111 if (!(ub
<= loop_ub
)) {
112 printf("Error with ub %d, %d, ch %d\n", (int)ub
, (int)loop_ub
, chunk
);
116 printf("Error with bounds %d, %d, %d\n", (int)lb
, (int)ub
, chunk
);
120 if (!(ub
>= loop_ub
)) {
121 printf("Error with ub %d, %d, %d\n", (int)ub
, (int)loop_ub
, chunk
);
125 printf("Error with bounds %d, %d, %d\n", (int)lb
, (int)ub
, chunk
);
129 // Stride should not change
130 if (!(st
== loop_st
)) {
131 printf("Error with st %d, %d, ch %d\n", (int)st
, (int)loop_st
, chunk
);
134 cur
= (ub
- lb
) / loop_st
+ 1;
135 // Guided scheduling uses FP computations, so current chunk may
136 // be a bit bigger (+1) than allowed maximum
137 if (!(cur
<= max
+ 1)) {
138 printf("Error with iter %llu, %llu\n", cur
, max
);
141 // Update maximum for the next chunk
144 next_lb
= ub
+ loop_st
;
146 undersized
= (cur
< loop_chunk
);
148 // Must have at least one chunk
150 printf("Error with chunk %d\n", chunk
);
153 // Must have the right last iteration index
155 if (!(last_ub
<= loop_ub
)) {
156 printf("Error with last1 %d, %d, ch %d\n",
157 (int)last_ub
, (int)loop_ub
, chunk
);
160 if (!(last_ub
+ loop_st
> loop_ub
)) {
161 printf("Error with last2 %d, %d, %d, ch %d\n",
162 (int)last_ub
, (int)loop_st
, (int)loop_ub
, chunk
);
166 if (!(last_ub
>= loop_ub
)) {
167 printf("Error with last1 %d, %d, ch %d\n",
168 (int)last_ub
, (int)loop_ub
, chunk
);
171 if (!(last_ub
+ loop_st
< loop_ub
)) {
172 printf("Error with last2 %d, %d, %d, ch %d\n",
173 (int)last_ub
, (int)loop_st
, (int)loop_ub
, chunk
);
177 // Let non-master threads go
181 // Workers wait for master thread to finish, then call __kmpc_dispatch_next
182 for (i
= 0; i
< 1000000; ++ i
) {
183 if (loop_sync
!= 0) {
187 while (loop_sync
== 0) {
190 // At this moment we do not have any more chunks -- all the chunks already
191 // processed by master thread
192 rc
= __kmpc_dispatch_next_8(&loc
, gtid
, &last
, &lb
, &ub
, &st
);
194 printf("Error return value\n");
199 __kmpc_barrier(&loc
, gtid
);
201 loop_sync
= 0; // Restore original state
203 printf("run_loop_64(): at the end\n");
206 __kmpc_barrier(&loc
, gtid
);
210 // ---------------------------------------------------------------------------
211 int run_loop_32(int loop_lb
, int loop_ub
, int loop_st
, int loop_chunk
) {
213 static int volatile loop_sync
= 0;
214 int lb
; // Chunk lower bound
215 int ub
; // Chunk upper bound
216 int st
; // Chunk stride
218 int tid
= omp_get_thread_num();
221 gtid
+= __kmp_hidden_helper_threads_num
;
225 printf("run_loop_<%d>(lb=%d, ub=%d, st=%d, ch=%d)\n",
226 (int)sizeof(int), gtid
, tid
,
227 (int)loop_lb
, (int)loop_ub
, (int)loop_st
, loop_chunk
);
229 // Don't test degenerate cases that should have been discovered by codegen
232 if (loop_st
> 0 ? loop_lb
> loop_ub
: loop_lb
< loop_ub
)
235 __kmpc_dispatch_init_4(&loc
, gtid
, kmp_sch_guided_simd
,
236 loop_lb
, loop_ub
, loop_st
, loop_chunk
);
238 // Let the master thread handle the chunks alone
239 int chunk
; // No of current chunk
240 int next_lb
; // Lower bound of the next chunk
241 int last_ub
; // Upper bound of the last processed chunk
242 u64 cur
; // Number of interations in current chunk
243 u64 max
; // Max allowed iterations for current chunk
248 max
= (loop_ub
- loop_lb
) / loop_st
+ 1;
249 // The first chunk can consume all iterations
250 while (__kmpc_dispatch_next_4(&loc
, gtid
, &last
, &lb
, &ub
, &st
)) {
253 printf("chunk=%d, lb=%d, ub=%d\n", chunk
, (int)lb
, (int)ub
);
255 // Check if previous chunk (it is not the final chunk) is undersized
257 printf("Error with chunk %d\n", chunk
);
260 // Check lower and upper bounds
262 printf("Error with lb %d, %d, ch %d\n", (int)lb
, (int)next_lb
, chunk
);
266 if (!(ub
<= loop_ub
)) {
267 printf("Error with ub %d, %d, ch %d\n", (int)ub
, (int)loop_ub
, chunk
);
271 printf("Error with bounds %d, %d, %d\n", (int)lb
, (int)ub
, chunk
);
275 if (!(ub
>= loop_ub
)) {
276 printf("Error with ub %d, %d, %d\n", (int)ub
, (int)loop_ub
, chunk
);
280 printf("Error with bounds %d, %d, %d\n", (int)lb
, (int)ub
, chunk
);
284 // Stride should not change
285 if (!(st
== loop_st
)) {
286 printf("Error with st %d, %d, ch %d\n", (int)st
, (int)loop_st
, chunk
);
289 cur
= (ub
- lb
) / loop_st
+ 1;
290 // Guided scheduling uses FP computations, so current chunk may
291 // be a bit bigger (+1) than allowed maximum
292 if (!(cur
<= max
+ 1)) {
293 printf("Error with iter %llu, %llu\n", cur
, max
);
296 // Update maximum for the next chunk
299 next_lb
= ub
+ loop_st
;
301 undersized
= (cur
< loop_chunk
);
303 // Must have at least one chunk
305 printf("Error with chunk %d\n", chunk
);
308 // Must have the right last iteration index
310 if (!(last_ub
<= loop_ub
)) {
311 printf("Error with last1 %d, %d, ch %d\n",
312 (int)last_ub
, (int)loop_ub
, chunk
);
315 if (!(last_ub
+ loop_st
> loop_ub
)) {
316 printf("Error with last2 %d, %d, %d, ch %d\n",
317 (int)last_ub
, (int)loop_st
, (int)loop_ub
, chunk
);
321 if (!(last_ub
>= loop_ub
)) {
322 printf("Error with last1 %d, %d, ch %d\n",
323 (int)last_ub
, (int)loop_ub
, chunk
);
326 if (!(last_ub
+ loop_st
< loop_ub
)) {
327 printf("Error with last2 %d, %d, %d, ch %d\n",
328 (int)last_ub
, (int)loop_st
, (int)loop_ub
, chunk
);
332 // Let non-master threads go
336 // Workers wait for master thread to finish, then call __kmpc_dispatch_next
337 for (i
= 0; i
< 1000000; ++ i
) {
338 if (loop_sync
!= 0) {
342 while (loop_sync
== 0) {
345 // At this moment we do not have any more chunks -- all the chunks already
346 // processed by the master thread
347 rc
= __kmpc_dispatch_next_4(&loc
, gtid
, &last
, &lb
, &ub
, &st
);
349 printf("Error return value\n");
354 __kmpc_barrier(&loc
, gtid
);
356 loop_sync
= 0; // Restore original state
358 printf("run_loop<>(): at the end\n");
361 __kmpc_barrier(&loc
, gtid
);
365 // ---------------------------------------------------------------------------
366 int run_64(int num_th
)
369 #pragma omp parallel num_threads(num_th)
373 for (chunk
= SIMD_LEN
; chunk
<= 3*SIMD_LEN
; chunk
+= SIMD_LEN
) {
374 for (st
= 1; st
<= 3; ++ st
) {
375 for (lb
= -3 * num_th
* st
; lb
<= 3 * num_th
* st
; ++ lb
) {
376 for (ub
= lb
; ub
< lb
+ num_th
* (chunk
+1) * st
; ++ ub
) {
377 err
+= run_loop_64(lb
, ub
, st
, chunk
);
378 err
+= run_loop_64(ub
, lb
, -st
, chunk
);
387 int run_32(int num_th
)
390 #pragma omp parallel num_threads(num_th)
392 int chunk
, st
, lb
, ub
;
393 for (chunk
= SIMD_LEN
; chunk
<= 3*SIMD_LEN
; chunk
+= SIMD_LEN
) {
394 for (st
= 1; st
<= 3; ++ st
) {
395 for (lb
= -3 * num_th
* st
; lb
<= 3 * num_th
* st
; ++ lb
) {
396 for (ub
= lb
; ub
< lb
+ num_th
* (chunk
+1) * st
; ++ ub
) {
397 err
+= run_loop_32(lb
, ub
, st
, chunk
);
398 err
+= run_loop_32(ub
, lb
, -st
, chunk
);
407 // ---------------------------------------------------------------------------
411 const char *env
= getenv("LIBOMP_NUM_HIDDEN_HELPER_THREADS");
413 __kmp_hidden_helper_threads_num
= atoi(env
);
418 for (n
= 1; n
<= 4; ++ n
) {
423 printf("failed with %d errors\n", err
);