1 // RUN: %libomp-cxx-compile-and-run
10 The test emulates code generation needed for reduction with task modifier on
13 Note: tasks could just use in_reduction clause, but compiler does not accept
14 this because of bug: it mistakenly requires reduction item to be shared, which
15 is only true for reduction on worksharing and wrong for task reductions.
18 //------------------------------------------------
19 // OpenMP runtime library routines
23 extern void *__kmpc_task_reduction_get_th_data(int gtid
, void *tg
, void *item
);
24 extern void *__kmpc_task_reduction_modifier_init(void *loc
, int gtid
, int is_ws
,
26 extern void __kmpc_task_reduction_modifier_fini(void *loc
, int gtid
, int is_ws
);
27 extern int __kmpc_global_thread_num(void *);
32 //------------------------------------------------
33 // Compiler-generated code
35 typedef struct red_input
{
36 void *reduce_shar
; /**< shared between tasks item to reduce into */
37 size_t reduce_size
; /**< size of data item in bytes */
38 // three compiler-generated routines (init, fini are optional):
39 void *reduce_init
; /**< data initialization routine (single parameter) */
40 void *reduce_fini
; /**< data finalization routine */
41 void *reduce_comb
; /**< data combiner routine */
42 unsigned flags
; /**< flags for additional info from compiler */
45 void i_comb(void *lhs
, void *rhs
) { *(int *)lhs
+= *(int *)rhs
; }
51 omp_set_num_threads(NT
);
52 #pragma omp parallel private(i)
53 // #pragma omp for reduction(task,+:var)
54 #pragma omp for reduction(+ : var)
55 for (i
= 0; i
< NT
; ++i
) // single iteration per thread
57 // generated code, which actually should be placed before
58 // loop iterations distribution, but placed here just to show the idea,
59 // and to keep correctness the loop count is equal to number of threads
60 int gtid
= __kmpc_global_thread_num(NULL
);
61 void *tg
; // pointer to taskgroup (optional)
63 r_var
.reduce_shar
= &var
;
64 r_var
.reduce_size
= sizeof(var
);
65 r_var
.reduce_init
= NULL
;
66 r_var
.reduce_fini
= NULL
;
67 r_var
.reduce_comb
= (void *)&i_comb
;
68 tg
= __kmpc_task_reduction_modifier_init(
71 1, // 1 - worksharing construct, 0 - parallel
72 1, // number of reduction objects
73 &r_var
// related data
75 // end of generated code
77 #pragma omp task /*in_reduction(+:var)*/ shared(var)
79 // emulate task reduction here because of compiler bug:
80 // it mistakenly declines to accept in_reduction because var is private
82 int gtid
= __kmpc_global_thread_num(NULL
);
83 int *p_var
= (int *)__kmpc_task_reduction_get_th_data(gtid
, tg
, &var
);
86 if (omp_get_thread_num() > 0) {
87 #pragma omp task /*in_reduction(+:var)*/ shared(var)
89 int gtid
= __kmpc_global_thread_num(NULL
);
90 int *p_var
= (int *)__kmpc_task_reduction_get_th_data(gtid
, tg
, &var
);
94 // generated code, which actually should be placed after loop completion
95 // but before barrier and before loop reduction. It placed here just to show
97 // and to keep correctness the loop count is equal to number of threads
98 __kmpc_task_reduction_modifier_fini(NULL
, gtid
, 1);
99 // end of generated code
101 if (var
== INIT
+ NT
* 3 - 1) {
105 printf("failed: var = %d (!= %d)\n", var
, INIT
+ NT
* 3 - 1);