1 // RUN: %libomp-cxx-compile-and-run
2 // RUN: %libomp-cxx-compile -DFLG=1 && %libomp-run
3 // GCC-5 is needed for OpenMP 4.0 support (taskgroup)
10 // Total number of loop iterations, should be multiple of T for this test
13 // Flag to request lazy (1) or eager (0) allocation of reduction objects
19 // initial user's code that corresponds to pseudo code of the test
20 #pragma omp taskgroup task_reduction(+:i,j) task_reduction(*:x)
22 for( int l = 0; l < N; ++l ) {
23 #pragma omp task firstprivate(l) in_reduction(+:i) in_reduction(*:x)
33 #pragma omp taskgroup task_reduction(-:i,k) task_reduction(+:y)
35 for( int l = 0; l < N; ++l ) {
36 #pragma omp task firstprivate(l) in_reduction(+:j,y) \
37 in_reduction(*:x) in_reduction(-:k)
47 #pragma omp task firstprivate(l) in_reduction(+:y) in_reduction(-:i,k)
53 #pragma omp task firstprivate(l) in_reduction(+:j) in_reduction(*:x)
64 for( int l = 0; l < N; ++l ) {
65 #pragma omp task firstprivate(l) in_reduction(+:j)
71 //------------------------------------------------
72 // OpenMP runtime library routines
76 extern void* __kmpc_task_reduction_get_th_data(int gtid
, void* tg
, void* item
);
77 extern void* __kmpc_task_reduction_init(int gtid
, int num
, void* data
);
78 extern int __kmpc_global_thread_num(void*);
83 //------------------------------------------------
84 // Compiler-generated code
86 typedef struct _task_red_item
{
87 void *shar
; // shared reduction item
88 size_t size
; // size of data item
89 void *f_init
; // data initialization routine
90 void *f_fini
; // data finalization routine
91 void *f_comb
; // data combiner routine
95 // int:+ no need in init/fini callbacks, valid for subtraction
96 void __red_int_add_comb(void *lhs
, void *rhs
) // combiner
97 { *(int*)lhs
+= *(int*)rhs
; }
99 // long long:+ no need in init/fini callbacks, valid for subtraction
100 void __red_llong_add_comb(void *lhs
, void *rhs
) // combiner
101 { *(long long*)lhs
+= *(long long*)rhs
; }
103 // double:* no need in fini callback
104 void __red_dbl_mul_init(void *data
) // initializer
105 { *(double*)data
= 1.0; }
106 void __red_dbl_mul_comb(void *lhs
, void *rhs
) // combiner
107 { *(double*)lhs
*= *(double*)rhs
; }
109 // double:+ no need in init/fini callbacks
110 void __red_dbl_add_comb(void *lhs
, void *rhs
) // combiner
111 { *(double*)lhs
+= *(double*)rhs
; }
113 // ==============================
115 void calc_serial(int *pi
, long long *pj
, double *px
, long long *pk
, double *py
)
117 for( int l
= 0; l
< N
; ++l
) {
120 *px
*= 1.0 / (l
+ 1);
124 for( int l
= 0; l
< N
; ++l
) {
129 *px
*= 1.0 / (l
+ 1);
139 *px
*= 1.0 / (l
+ 1);
143 for( int l
= 0; l
< N
; ++l
) {
148 //------------------------------------------------
152 int nthreads
= omp_get_max_threads();
154 void** ptrs
= (void**)malloc(nthreads
*sizeof(void*));
156 // user's code ======================================
157 // variables for serial calculations:
159 long long js
= -9999999;
161 long long ks
= 99999999;
162 double ys
= -99999999.0;
163 // variables for parallel calculations:
165 long long jp
= -9999999;
167 long long kp
= 99999999;
168 double yp
= -99999999.0;
170 calc_serial(&is
, &js
, &xs
, &ks
, &ys
);
171 // ==================================================
172 for (int i
= 0; i
< nthreads
; ++i
)
176 #pragma omp single nowait
178 // outer taskgroup reduces (i,j,x)
179 #pragma omp taskgroup // task_reduction(+:i,j) task_reduction(*:x)
181 _task_red_item_t red_data
[3];
182 red_data
[0].shar
= &ip
;
183 red_data
[0].size
= sizeof(ip
);
184 red_data
[0].f_init
= NULL
; // RTL will zero thread-specific objects
185 red_data
[0].f_fini
= NULL
; // no destructors needed
186 red_data
[0].f_comb
= (void*)&__red_int_add_comb
;
187 red_data
[0].flags
= FLG
;
188 red_data
[1].shar
= &jp
;
189 red_data
[1].size
= sizeof(jp
);
190 red_data
[1].f_init
= NULL
; // RTL will zero thread-specific objects
191 red_data
[1].f_fini
= NULL
; // no destructors needed
192 red_data
[1].f_comb
= (void*)&__red_llong_add_comb
;
193 red_data
[1].flags
= FLG
;
194 red_data
[2].shar
= &xp
;
195 red_data
[2].size
= sizeof(xp
);
196 red_data
[2].f_init
= (void*)&__red_dbl_mul_init
;
197 red_data
[2].f_fini
= NULL
; // no destructors needed
198 red_data
[2].f_comb
= (void*)&__red_dbl_mul_comb
;
199 red_data
[2].flags
= FLG
;
200 int gtid
= __kmpc_global_thread_num(NULL
);
201 void* tg1
= __kmpc_task_reduction_init(gtid
, 3, red_data
);
203 for( int l
= 0; l
< N
; l
+= 2 ) {
204 // 2 iterations per task to get correct x value; actually any even
205 // number of iters per task will work, otherwise x looses precision
206 #pragma omp task firstprivate(l) //in_reduction(+:i) in_reduction(*:x)
208 int gtid
= __kmpc_global_thread_num(NULL
);
209 int *p_ip
= (int*)__kmpc_task_reduction_get_th_data(gtid
, tg1
, &ip
);
210 double *p_xp
= (double*)__kmpc_task_reduction_get_th_data(
212 if (!ptrs
[gtid
]) ptrs
[gtid
] = p_xp
;
214 // user's pseudo-code ==============================
219 *p_xp
*= 1.0 / (l
+ 2);
220 // ==================================================
223 // inner taskgroup reduces (i,k,y), i is same object as in outer one
224 #pragma omp taskgroup // task_reduction(-:i,k) task_reduction(+:y)
226 _task_red_item_t red_data
[3];
227 red_data
[0].shar
= &ip
;
228 red_data
[0].size
= sizeof(ip
);
229 red_data
[0].f_init
= NULL
; // RTL will zero thread-specific objects
230 red_data
[0].f_fini
= NULL
; // no destructors needed
231 red_data
[0].f_comb
= (void*)&__red_int_add_comb
;
232 red_data
[0].flags
= FLG
;
233 red_data
[1].shar
= &kp
;
234 red_data
[1].size
= sizeof(kp
);
235 red_data
[1].f_init
= NULL
; // RTL will zero thread-specific objects
236 red_data
[1].f_fini
= NULL
; // no destructors needed
237 red_data
[1].f_comb
= (void*)&__red_llong_add_comb
; // same for + and -
238 red_data
[1].flags
= FLG
;
239 red_data
[2].shar
= &yp
;
240 red_data
[2].size
= sizeof(yp
);
241 red_data
[2].f_init
= NULL
; // RTL will zero thread-specific objects
242 red_data
[2].f_fini
= NULL
; // no destructors needed
243 red_data
[2].f_comb
= (void*)&__red_dbl_add_comb
;
244 red_data
[2].flags
= FLG
;
245 int gtid
= __kmpc_global_thread_num(NULL
);
246 void* tg2
= __kmpc_task_reduction_init(gtid
, 3, red_data
);
248 for( int l
= 0; l
< N
; l
+= 2 ) {
249 #pragma omp task firstprivate(l)
250 // in_reduction(+:j,y) in_reduction(*:x) in_reduction(-:k)
252 int gtid
= __kmpc_global_thread_num(NULL
);
253 long long *p_jp
= (long long*)__kmpc_task_reduction_get_th_data(
255 long long *p_kp
= (long long*)__kmpc_task_reduction_get_th_data(
257 double *p_xp
= (double*)__kmpc_task_reduction_get_th_data(
259 double *p_yp
= (double*)__kmpc_task_reduction_get_th_data(
261 // user's pseudo-code ==============================
269 *p_yp
+= (double)(l
+ 1);
270 *p_xp
*= 1.0 / (l
+ 2);
271 // =================================================
273 // the following code is here just to check __kmpc_task_reduction_get_th_data:
274 int tid
= omp_get_thread_num();
277 addr1
= __kmpc_task_reduction_get_th_data(gtid
, tg1
, &xp
); // from shared
278 addr2
= __kmpc_task_reduction_get_th_data(gtid
, tg1
, addr1
); // from private
279 if (addr1
!= addr2
) {
282 printf("Wrong thread-specific addresses %d s:%p p:%p\n", tid
, addr1
, addr2
);
284 // from neighbour w/o taskgroup (should start lookup from current tg2)
287 addr2
= __kmpc_task_reduction_get_th_data(gtid
, NULL
, ptrs
[tid
-1]);
288 if (addr1
!= addr2
) {
291 printf("Wrong thread-specific addresses %d s:%p n:%p\n",
296 if (ptrs
[nthreads
-1]) {
297 addr2
= __kmpc_task_reduction_get_th_data(gtid
, NULL
, ptrs
[nthreads
-1]);
298 if (addr1
!= addr2
) {
301 printf("Wrong thread-specific addresses %d s:%p n:%p\n",
306 // ----------------------------------------------
309 #pragma omp task firstprivate(l)
310 // in_reduction(+:y) in_reduction(-:i,k)
312 int gtid
= __kmpc_global_thread_num(NULL
);
313 int *p_ip
= (int*)__kmpc_task_reduction_get_th_data(
315 long long *p_kp
= (long long*)__kmpc_task_reduction_get_th_data(
317 double *p_yp
= (double*)__kmpc_task_reduction_get_th_data(
320 // user's pseudo-code ==============================
327 *p_yp
+= (double)(l
+ 1);
328 // =================================================
330 #pragma omp task firstprivate(l)
331 // in_reduction(+:j) in_reduction(*:x)
333 int gtid
= __kmpc_global_thread_num(NULL
);
334 long long *p_jp
= (long long*)__kmpc_task_reduction_get_th_data(
336 double *p_xp
= (double*)__kmpc_task_reduction_get_th_data(
338 // user's pseudo-code ==============================
343 *p_xp
*= 1.0 / (l
+ 2);
344 // =================================================
349 for( int l
= 0; l
< N
; l
+= 2 ) {
350 #pragma omp task firstprivate(l) // in_reduction(+:j)
352 int gtid
= __kmpc_global_thread_num(NULL
);
353 long long *p_jp
= (long long*)__kmpc_task_reduction_get_th_data(
355 // user's pseudo-code ==============================
358 // =================================================
366 printf("reduction flags = %u\n", FLG
);
368 if (ip
== is
&& jp
== js
&& ks
== kp
&&
369 fabs(xp
- xs
) < 0.01 && fabs(yp
- ys
) < 0.01)
372 printf("failed,\n ser:(%d %lld %f %lld %f)\n par:(%d %lld %f %lld %f)\n",