5 //===----------------------------------------------------------------------===//
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
13 //#define KMP_SUPPORT_GRAPH_OUTPUT 1
17 #include "kmp_wait_release.h"
18 #include "kmp_taskdeps.h"
20 #include "ompt-specific.h"
23 // TODO: Improve memory allocation? keep a list of pre-allocated structures?
24 // allocate in blocks? re-use list finished list entries?
25 // TODO: don't use atomic ref counters for stack-allocated nodes.
26 // TODO: find an alternate to atomic refs for heap-allocated nodes?
27 // TODO: Finish graph output support
28 // TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other
30 // TODO: Any ITT support needed?
32 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
33 static std::atomic
<kmp_int32
> kmp_node_id_seed
= 0;
36 static void __kmp_init_node(kmp_depnode_t
*node
) {
37 node
->dn
.successors
= NULL
;
38 node
->dn
.task
= NULL
; // will point to the right task
39 // once dependences have been processed
40 for (int i
= 0; i
< MAX_MTX_DEPS
; ++i
)
41 node
->dn
.mtx_locks
[i
] = NULL
;
42 node
->dn
.mtx_num_locks
= 0;
43 __kmp_init_lock(&node
->dn
.lock
);
44 KMP_ATOMIC_ST_RLX(&node
->dn
.nrefs
, 1); // init creates the first reference
45 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
46 node
->dn
.id
= KMP_ATOMIC_INC(&kmp_node_id_seed
);
48 #if USE_ITT_BUILD && USE_ITT_NOTIFY
49 __itt_sync_create(node
, "OMP task dep node", NULL
, 0);
53 static inline kmp_depnode_t
*__kmp_node_ref(kmp_depnode_t
*node
) {
54 KMP_ATOMIC_INC(&node
->dn
.nrefs
);
58 enum { KMP_DEPHASH_OTHER_SIZE
= 97, KMP_DEPHASH_MASTER_SIZE
= 997 };
60 size_t sizes
[] = {997, 2003, 4001, 8191, 16001, 32003, 64007, 131071, 270029};
61 const size_t MAX_GEN
= 8;
63 static inline size_t __kmp_dephash_hash(kmp_intptr_t addr
, size_t hsize
) {
64 // TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) %
66 return ((addr
>> 6) ^ (addr
>> 2)) % hsize
;
69 static kmp_dephash_t
*__kmp_dephash_extend(kmp_info_t
*thread
,
70 kmp_dephash_t
*current_dephash
) {
73 size_t gen
= current_dephash
->generation
+ 1;
75 return current_dephash
;
76 size_t new_size
= sizes
[gen
];
78 size_t size_to_allocate
=
79 new_size
* sizeof(kmp_dephash_entry_t
*) + sizeof(kmp_dephash_t
);
82 h
= (kmp_dephash_t
*)__kmp_fast_allocate(thread
, size_to_allocate
);
84 h
= (kmp_dephash_t
*)__kmp_thread_malloc(thread
, size_to_allocate
);
88 h
->nelements
= current_dephash
->nelements
;
89 h
->buckets
= (kmp_dephash_entry
**)(h
+ 1);
92 h
->last_all
= current_dephash
->last_all
;
94 // make sure buckets are properly initialized
95 for (size_t i
= 0; i
< new_size
; i
++) {
99 // insert existing elements in the new table
100 for (size_t i
= 0; i
< current_dephash
->size
; i
++) {
101 kmp_dephash_entry_t
*next
, *entry
;
102 for (entry
= current_dephash
->buckets
[i
]; entry
; entry
= next
) {
103 next
= entry
->next_in_bucket
;
104 // Compute the new hash using the new size, and insert the entry in
106 size_t new_bucket
= __kmp_dephash_hash(entry
->addr
, h
->size
);
107 entry
->next_in_bucket
= h
->buckets
[new_bucket
];
108 if (entry
->next_in_bucket
) {
111 h
->buckets
[new_bucket
] = entry
;
115 // Free old hash table
117 __kmp_fast_free(thread
, current_dephash
);
119 __kmp_thread_free(thread
, current_dephash
);
125 static kmp_dephash_t
*__kmp_dephash_create(kmp_info_t
*thread
,
126 kmp_taskdata_t
*current_task
) {
131 if (current_task
->td_flags
.tasktype
== TASK_IMPLICIT
)
132 h_size
= KMP_DEPHASH_MASTER_SIZE
;
134 h_size
= KMP_DEPHASH_OTHER_SIZE
;
136 size_t size
= h_size
* sizeof(kmp_dephash_entry_t
*) + sizeof(kmp_dephash_t
);
139 h
= (kmp_dephash_t
*)__kmp_fast_allocate(thread
, size
);
141 h
= (kmp_dephash_t
*)__kmp_thread_malloc(thread
, size
);
148 h
->buckets
= (kmp_dephash_entry
**)(h
+ 1);
151 for (size_t i
= 0; i
< h_size
; i
++)
157 static kmp_dephash_entry
*__kmp_dephash_find(kmp_info_t
*thread
,
158 kmp_dephash_t
**hash
,
160 kmp_dephash_t
*h
= *hash
;
161 if (h
->nelements
!= 0 && h
->nconflicts
/ h
->size
>= 1) {
162 *hash
= __kmp_dephash_extend(thread
, h
);
165 size_t bucket
= __kmp_dephash_hash(addr
, h
->size
);
167 kmp_dephash_entry_t
*entry
;
168 for (entry
= h
->buckets
[bucket
]; entry
; entry
= entry
->next_in_bucket
)
169 if (entry
->addr
== addr
)
173 // create entry. This is only done by one thread so no locking required
175 entry
= (kmp_dephash_entry_t
*)__kmp_fast_allocate(
176 thread
, sizeof(kmp_dephash_entry_t
));
178 entry
= (kmp_dephash_entry_t
*)__kmp_thread_malloc(
179 thread
, sizeof(kmp_dephash_entry_t
));
182 if (!h
->last_all
) // no predecessor task with omp_all_memory dependence
183 entry
->last_out
= NULL
;
184 else // else link the omp_all_memory depnode to the new entry
185 entry
->last_out
= __kmp_node_ref(h
->last_all
);
186 entry
->last_set
= NULL
;
187 entry
->prev_set
= NULL
;
188 entry
->last_flag
= 0;
189 entry
->mtx_lock
= NULL
;
190 entry
->next_in_bucket
= h
->buckets
[bucket
];
191 h
->buckets
[bucket
] = entry
;
193 if (entry
->next_in_bucket
)
199 static kmp_depnode_list_t
*__kmp_add_node(kmp_info_t
*thread
,
200 kmp_depnode_list_t
*list
,
201 kmp_depnode_t
*node
) {
202 kmp_depnode_list_t
*new_head
;
205 new_head
= (kmp_depnode_list_t
*)__kmp_fast_allocate(
206 thread
, sizeof(kmp_depnode_list_t
));
208 new_head
= (kmp_depnode_list_t
*)__kmp_thread_malloc(
209 thread
, sizeof(kmp_depnode_list_t
));
212 new_head
->node
= __kmp_node_ref(node
);
213 new_head
->next
= list
;
218 static inline void __kmp_track_dependence(kmp_int32 gtid
, kmp_depnode_t
*source
,
220 kmp_task_t
*sink_task
) {
222 kmp_taskdata_t
*task_source
= KMP_TASK_TO_TASKDATA(source
->dn
.task
);
223 kmp_taskdata_t
*task_sink
= KMP_TASK_TO_TASKDATA(sink_task
);
224 if (source
->dn
.task
&& sink_task
) {
225 // Not supporting dependency between two tasks that one is within the TDG
226 // and the other is not
227 KMP_ASSERT(task_source
->is_taskgraph
== task_sink
->is_taskgraph
);
229 if (task_sink
->is_taskgraph
&&
230 __kmp_tdg_is_recording(task_sink
->tdg
->tdg_status
)) {
231 kmp_node_info_t
*source_info
=
232 &task_sink
->tdg
->record_map
[task_source
->td_task_id
];
234 for (int i
= 0; i
< source_info
->nsuccessors
; i
++) {
235 if (source_info
->successors
[i
] == task_sink
->td_task_id
) {
241 if (source_info
->nsuccessors
>= source_info
->successors_size
) {
242 source_info
->successors_size
= 2 * source_info
->successors_size
;
243 kmp_int32
*old_succ_ids
= source_info
->successors
;
244 kmp_int32
*new_succ_ids
= (kmp_int32
*)__kmp_allocate(
245 source_info
->successors_size
* sizeof(kmp_int32
));
246 source_info
->successors
= new_succ_ids
;
247 __kmp_free(old_succ_ids
);
250 source_info
->successors
[source_info
->nsuccessors
] = task_sink
->td_task_id
;
251 source_info
->nsuccessors
++;
253 kmp_node_info_t
*sink_info
=
254 &(task_sink
->tdg
->record_map
[task_sink
->td_task_id
]);
255 sink_info
->npredecessors
++;
259 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
260 kmp_taskdata_t
*task_source
= KMP_TASK_TO_TASKDATA(source
->dn
.task
);
261 // do not use sink->dn.task as that is only filled after the dependences
262 // are already processed!
263 kmp_taskdata_t
*task_sink
= KMP_TASK_TO_TASKDATA(sink_task
);
265 __kmp_printf("%d(%s) -> %d(%s)\n", source
->dn
.id
,
266 task_source
->td_ident
->psource
, sink
->dn
.id
,
267 task_sink
->td_ident
->psource
);
269 #if OMPT_SUPPORT && OMPT_OPTIONAL
270 /* OMPT tracks dependences between task (a=source, b=sink) in which
271 task a blocks the execution of b through the ompt_new_dependence_callback
273 if (ompt_enabled
.ompt_callback_task_dependence
) {
274 kmp_taskdata_t
*task_source
= KMP_TASK_TO_TASKDATA(source
->dn
.task
);
275 ompt_data_t
*sink_data
;
277 sink_data
= &(KMP_TASK_TO_TASKDATA(sink_task
)->ompt_task_info
.task_data
);
279 sink_data
= &__kmp_threads
[gtid
]->th
.ompt_thread_info
.task_data
;
281 ompt_callbacks
.ompt_callback(ompt_callback_task_dependence
)(
282 &(task_source
->ompt_task_info
.task_data
), sink_data
);
284 #endif /* OMPT_SUPPORT && OMPT_OPTIONAL */
287 static inline kmp_int32
288 __kmp_depnode_link_successor(kmp_int32 gtid
, kmp_info_t
*thread
,
289 kmp_task_t
*task
, kmp_depnode_t
*node
,
290 kmp_depnode_list_t
*plist
) {
293 kmp_int32 npredecessors
= 0;
294 // link node as successor of list elements
295 for (kmp_depnode_list_t
*p
= plist
; p
; p
= p
->next
) {
296 kmp_depnode_t
*dep
= p
->node
;
298 kmp_tdg_status tdg_status
= KMP_TDG_NONE
;
300 kmp_taskdata_t
*td
= KMP_TASK_TO_TASKDATA(task
);
301 if (td
->is_taskgraph
)
302 tdg_status
= KMP_TASK_TO_TASKDATA(task
)->tdg
->tdg_status
;
303 if (__kmp_tdg_is_recording(tdg_status
))
304 __kmp_track_dependence(gtid
, dep
, node
, task
);
308 KMP_ACQUIRE_DEPNODE(gtid
, dep
);
311 if (!(__kmp_tdg_is_recording(tdg_status
)) && task
)
313 __kmp_track_dependence(gtid
, dep
, node
, task
);
314 dep
->dn
.successors
= __kmp_add_node(thread
, dep
->dn
.successors
, node
);
315 KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
317 gtid
, KMP_TASK_TO_TASKDATA(dep
->dn
.task
),
318 KMP_TASK_TO_TASKDATA(task
)));
321 KMP_RELEASE_DEPNODE(gtid
, dep
);
324 return npredecessors
;
327 static inline kmp_int32
__kmp_depnode_link_successor(kmp_int32 gtid
,
330 kmp_depnode_t
*source
,
331 kmp_depnode_t
*sink
) {
334 kmp_int32 npredecessors
= 0;
336 kmp_tdg_status tdg_status
= KMP_TDG_NONE
;
337 kmp_taskdata_t
*td
= KMP_TASK_TO_TASKDATA(task
);
339 if (td
->is_taskgraph
)
340 tdg_status
= KMP_TASK_TO_TASKDATA(task
)->tdg
->tdg_status
;
341 if (__kmp_tdg_is_recording(tdg_status
) && sink
->dn
.task
)
342 __kmp_track_dependence(gtid
, sink
, source
, task
);
346 // synchronously add source to sink' list of successors
347 KMP_ACQUIRE_DEPNODE(gtid
, sink
);
350 if (!(__kmp_tdg_is_recording(tdg_status
)) && task
)
352 __kmp_track_dependence(gtid
, sink
, source
, task
);
353 sink
->dn
.successors
= __kmp_add_node(thread
, sink
->dn
.successors
, source
);
354 KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
356 gtid
, KMP_TASK_TO_TASKDATA(sink
->dn
.task
),
357 KMP_TASK_TO_TASKDATA(task
)));
359 if (__kmp_tdg_is_recording(tdg_status
)) {
360 kmp_taskdata_t
*tdd
= KMP_TASK_TO_TASKDATA(sink
->dn
.task
);
361 if (tdd
->is_taskgraph
) {
362 if (tdd
->td_flags
.onced
)
363 // decrement npredecessors if sink->dn.task belongs to a taskgraph
365 // 1) the task is reset to its initial state (by kmp_free_task) or
366 // 2) the task is complete but not yet reset
373 KMP_RELEASE_DEPNODE(gtid
, sink
);
375 return npredecessors
;
378 static inline kmp_int32
379 __kmp_process_dep_all(kmp_int32 gtid
, kmp_depnode_t
*node
, kmp_dephash_t
*h
,
380 bool dep_barrier
, kmp_task_t
*task
) {
381 KA_TRACE(30, ("__kmp_process_dep_all: T#%d processing dep_all, "
382 "dep_barrier = %d\n",
384 kmp_info_t
*thread
= __kmp_threads
[gtid
];
385 kmp_int32 npredecessors
= 0;
387 // process previous omp_all_memory node if any
389 __kmp_depnode_link_successor(gtid
, thread
, task
, node
, h
->last_all
);
390 __kmp_node_deref(thread
, h
->last_all
);
392 h
->last_all
= __kmp_node_ref(node
);
394 // if this is a sync point in the serial sequence, then the previous
395 // outputs are guaranteed to be completed after the execution of this
396 // task so the previous output nodes can be cleared.
400 // process all regular dependences
401 for (size_t i
= 0; i
< h
->size
; i
++) {
402 kmp_dephash_entry_t
*info
= h
->buckets
[i
];
403 if (!info
) // skip empty slots in dephash
405 for (; info
; info
= info
->next_in_bucket
) {
406 // for each entry the omp_all_memory works as OUT dependence
407 kmp_depnode_t
*last_out
= info
->last_out
;
408 kmp_depnode_list_t
*last_set
= info
->last_set
;
409 kmp_depnode_list_t
*prev_set
= info
->prev_set
;
412 __kmp_depnode_link_successor(gtid
, thread
, task
, node
, last_set
);
413 __kmp_depnode_list_free(thread
, last_set
);
414 __kmp_depnode_list_free(thread
, prev_set
);
415 info
->last_set
= NULL
;
416 info
->prev_set
= NULL
;
417 info
->last_flag
= 0; // no sets in this dephash entry
420 __kmp_depnode_link_successor(gtid
, thread
, task
, node
, last_out
);
422 __kmp_node_deref(thread
, last_out
);
424 info
->last_out
= __kmp_node_ref(node
);
426 info
->last_out
= NULL
;
430 KA_TRACE(30, ("__kmp_process_dep_all: T#%d found %d predecessors\n", gtid
,
432 return npredecessors
;
435 template <bool filter
>
436 static inline kmp_int32
437 __kmp_process_deps(kmp_int32 gtid
, kmp_depnode_t
*node
, kmp_dephash_t
**hash
,
438 bool dep_barrier
, kmp_int32 ndeps
,
439 kmp_depend_info_t
*dep_list
, kmp_task_t
*task
) {
440 KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependences : "
441 "dep_barrier = %d\n",
442 filter
, gtid
, ndeps
, dep_barrier
));
444 kmp_info_t
*thread
= __kmp_threads
[gtid
];
445 kmp_int32 npredecessors
= 0;
446 for (kmp_int32 i
= 0; i
< ndeps
; i
++) {
447 const kmp_depend_info_t
*dep
= &dep_list
[i
];
449 if (filter
&& dep
->base_addr
== 0)
450 continue; // skip filtered entries
452 kmp_dephash_entry_t
*info
=
453 __kmp_dephash_find(thread
, hash
, dep
->base_addr
);
454 kmp_depnode_t
*last_out
= info
->last_out
;
455 kmp_depnode_list_t
*last_set
= info
->last_set
;
456 kmp_depnode_list_t
*prev_set
= info
->prev_set
;
458 if (dep
->flags
.out
) { // out or inout --> clean lists if any
461 __kmp_depnode_link_successor(gtid
, thread
, task
, node
, last_set
);
462 __kmp_depnode_list_free(thread
, last_set
);
463 __kmp_depnode_list_free(thread
, prev_set
);
464 info
->last_set
= NULL
;
465 info
->prev_set
= NULL
;
466 info
->last_flag
= 0; // no sets in this dephash entry
469 __kmp_depnode_link_successor(gtid
, thread
, task
, node
, last_out
);
471 __kmp_node_deref(thread
, last_out
);
473 info
->last_out
= __kmp_node_ref(node
);
475 // if this is a sync point in the serial sequence, then the previous
476 // outputs are guaranteed to be completed after the execution of this
477 // task so the previous output nodes can be cleared.
478 info
->last_out
= NULL
;
480 } else { // either IN or MTX or SET
481 if (info
->last_flag
== 0 || info
->last_flag
== dep
->flag
) {
482 // last_set either didn't exist or of same dep kind
483 // link node as successor of the last_out if any
485 __kmp_depnode_link_successor(gtid
, thread
, task
, node
, last_out
);
486 // link node as successor of all nodes in the prev_set if any
488 __kmp_depnode_link_successor(gtid
, thread
, task
, node
, prev_set
);
490 // clean last_out and prev_set if any; don't touch last_set
491 __kmp_node_deref(thread
, last_out
);
492 info
->last_out
= NULL
;
493 __kmp_depnode_list_free(thread
, prev_set
);
494 info
->prev_set
= NULL
;
496 } else { // last_set is of different dep kind, make it prev_set
497 // link node as successor of all nodes in the last_set
499 __kmp_depnode_link_successor(gtid
, thread
, task
, node
, last_set
);
500 // clean last_out if any
501 __kmp_node_deref(thread
, last_out
);
502 info
->last_out
= NULL
;
503 // clean prev_set if any
504 __kmp_depnode_list_free(thread
, prev_set
);
506 // move last_set to prev_set, new last_set will be allocated
507 info
->prev_set
= last_set
;
509 info
->prev_set
= NULL
;
512 info
->last_set
= NULL
;
514 // for dep_barrier last_flag value should remain:
515 // 0 if last_set is empty, unchanged otherwise
517 info
->last_flag
= dep
->flag
; // store dep kind of the last_set
518 info
->last_set
= __kmp_add_node(thread
, info
->last_set
, node
);
520 // check if we are processing MTX dependency
521 if (dep
->flag
== KMP_DEP_MTX
) {
522 if (info
->mtx_lock
== NULL
) {
523 info
->mtx_lock
= (kmp_lock_t
*)__kmp_allocate(sizeof(kmp_lock_t
));
524 __kmp_init_lock(info
->mtx_lock
);
526 KMP_DEBUG_ASSERT(node
->dn
.mtx_num_locks
< MAX_MTX_DEPS
);
528 // Save lock in node's array
529 for (m
= 0; m
< MAX_MTX_DEPS
; ++m
) {
530 // sort pointers in decreasing order to avoid potential livelock
531 if (node
->dn
.mtx_locks
[m
] < info
->mtx_lock
) {
532 KMP_DEBUG_ASSERT(!node
->dn
.mtx_locks
[node
->dn
.mtx_num_locks
]);
533 for (int n
= node
->dn
.mtx_num_locks
; n
> m
; --n
) {
534 // shift right all lesser non-NULL pointers
535 KMP_DEBUG_ASSERT(node
->dn
.mtx_locks
[n
- 1] != NULL
);
536 node
->dn
.mtx_locks
[n
] = node
->dn
.mtx_locks
[n
- 1];
538 node
->dn
.mtx_locks
[m
] = info
->mtx_lock
;
542 KMP_DEBUG_ASSERT(m
< MAX_MTX_DEPS
); // must break from loop
543 node
->dn
.mtx_num_locks
++;
547 KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter
,
548 gtid
, npredecessors
));
549 return npredecessors
;
552 #define NO_DEP_BARRIER (false)
553 #define DEP_BARRIER (true)
555 // returns true if the task has any outstanding dependence
556 static bool __kmp_check_deps(kmp_int32 gtid
, kmp_depnode_t
*node
,
557 kmp_task_t
*task
, kmp_dephash_t
**hash
,
558 bool dep_barrier
, kmp_int32 ndeps
,
559 kmp_depend_info_t
*dep_list
,
560 kmp_int32 ndeps_noalias
,
561 kmp_depend_info_t
*noalias_dep_list
) {
562 int i
, n_mtxs
= 0, dep_all
= 0;
564 kmp_taskdata_t
*taskdata
= KMP_TASK_TO_TASKDATA(task
);
566 KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependences for task %p : %d "
567 "possibly aliased dependences, %d non-aliased dependences : "
568 "dep_barrier=%d .\n",
569 gtid
, taskdata
, ndeps
, ndeps_noalias
, dep_barrier
));
571 // Filter deps in dep_list
572 // TODO: Different algorithm for large dep_list ( > 10 ? )
573 for (i
= 0; i
< ndeps
; i
++) {
574 if (dep_list
[i
].base_addr
!= 0 &&
575 dep_list
[i
].base_addr
!= (kmp_intptr_t
)KMP_SIZE_T_MAX
) {
577 dep_list
[i
].flag
== KMP_DEP_IN
|| dep_list
[i
].flag
== KMP_DEP_OUT
||
578 dep_list
[i
].flag
== KMP_DEP_INOUT
||
579 dep_list
[i
].flag
== KMP_DEP_MTX
|| dep_list
[i
].flag
== KMP_DEP_SET
);
580 for (int j
= i
+ 1; j
< ndeps
; j
++) {
581 if (dep_list
[i
].base_addr
== dep_list
[j
].base_addr
) {
582 if (dep_list
[i
].flag
!= dep_list
[j
].flag
) {
583 // two different dependences on same address work identical to OUT
584 dep_list
[i
].flag
= KMP_DEP_OUT
;
586 dep_list
[j
].base_addr
= 0; // Mark j element as void
589 if (dep_list
[i
].flag
== KMP_DEP_MTX
) {
590 // limit number of mtx deps to MAX_MTX_DEPS per node
591 if (n_mtxs
< MAX_MTX_DEPS
&& task
!= NULL
) {
594 dep_list
[i
].flag
= KMP_DEP_OUT
; // downgrade mutexinoutset to inout
597 } else if (dep_list
[i
].flag
== KMP_DEP_ALL
||
598 dep_list
[i
].base_addr
== (kmp_intptr_t
)KMP_SIZE_T_MAX
) {
599 // omp_all_memory dependence can be marked by compiler by either
600 // (addr=0 && flag=0x80) (flag KMP_DEP_ALL), or (addr=-1).
601 // omp_all_memory overrides all other dependences if any
607 // doesn't need to be atomic as no other thread is going to be accessing this
609 // npredecessors is set -1 to ensure that none of the releasing tasks queues
610 // this task before we have finished processing all the dependences
611 node
->dn
.npredecessors
= -1;
613 // used to pack all npredecessors additions into a single atomic operation at
617 if (!dep_all
) { // regular dependences
618 npredecessors
= __kmp_process_deps
<true>(gtid
, node
, hash
, dep_barrier
,
619 ndeps
, dep_list
, task
);
620 npredecessors
+= __kmp_process_deps
<false>(
621 gtid
, node
, hash
, dep_barrier
, ndeps_noalias
, noalias_dep_list
, task
);
622 } else { // omp_all_memory dependence
623 npredecessors
= __kmp_process_dep_all(gtid
, node
, *hash
, dep_barrier
, task
);
626 node
->dn
.task
= task
;
629 // Account for our initial fake value
632 // Update predecessors and obtain current value to check if there are still
633 // any outstanding dependences (some tasks may have finished while we
634 // processed the dependences)
636 node
->dn
.npredecessors
.fetch_add(npredecessors
) + npredecessors
;
638 KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n",
639 gtid
, npredecessors
, taskdata
));
641 // beyond this point the task could be queued (and executed) by a releasing
643 return npredecessors
> 0 ? true : false;
648 @param loc_ref location of the original task directive
649 @param gtid Global Thread ID of encountering thread
650 @param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new
652 @param ndeps Number of depend items with possible aliasing
653 @param dep_list List of depend items with possible aliasing
654 @param ndeps_noalias Number of depend items with no aliasing
655 @param noalias_dep_list List of depend items with no aliasing
657 @return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not
658 suspended and queued, or TASK_CURRENT_QUEUED if it was suspended and queued
660 Schedule a non-thread-switchable task with dependences for execution
662 kmp_int32
__kmpc_omp_task_with_deps(ident_t
*loc_ref
, kmp_int32 gtid
,
663 kmp_task_t
*new_task
, kmp_int32 ndeps
,
664 kmp_depend_info_t
*dep_list
,
665 kmp_int32 ndeps_noalias
,
666 kmp_depend_info_t
*noalias_dep_list
) {
668 kmp_taskdata_t
*new_taskdata
= KMP_TASK_TO_TASKDATA(new_task
);
669 KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid
,
670 loc_ref
, new_taskdata
));
671 __kmp_assert_valid_gtid(gtid
);
672 kmp_info_t
*thread
= __kmp_threads
[gtid
];
673 kmp_taskdata_t
*current_task
= thread
->th
.th_current_task
;
676 // record TDG with deps
677 if (new_taskdata
->is_taskgraph
&&
678 __kmp_tdg_is_recording(new_taskdata
->tdg
->tdg_status
)) {
679 kmp_tdg_info_t
*tdg
= new_taskdata
->tdg
;
680 // extend record_map if needed
681 if (new_taskdata
->td_task_id
>= tdg
->map_size
) {
682 __kmp_acquire_bootstrap_lock(&tdg
->graph_lock
);
683 if (new_taskdata
->td_task_id
>= tdg
->map_size
) {
684 kmp_uint old_size
= tdg
->map_size
;
685 kmp_uint new_size
= old_size
* 2;
686 kmp_node_info_t
*old_record
= tdg
->record_map
;
687 kmp_node_info_t
*new_record
= (kmp_node_info_t
*)__kmp_allocate(
688 new_size
* sizeof(kmp_node_info_t
));
689 KMP_MEMCPY(new_record
, tdg
->record_map
,
690 old_size
* sizeof(kmp_node_info_t
));
691 tdg
->record_map
= new_record
;
693 __kmp_free(old_record
);
695 for (kmp_int i
= old_size
; i
< new_size
; i
++) {
696 kmp_int32
*successorsList
= (kmp_int32
*)__kmp_allocate(
697 __kmp_successors_size
* sizeof(kmp_int32
));
698 new_record
[i
].task
= nullptr;
699 new_record
[i
].successors
= successorsList
;
700 new_record
[i
].nsuccessors
= 0;
701 new_record
[i
].npredecessors
= 0;
702 new_record
[i
].successors_size
= __kmp_successors_size
;
703 KMP_ATOMIC_ST_REL(&new_record
[i
].npredecessors_counter
, 0);
705 // update the size at the end, so that we avoid other
706 // threads use old_record while map_size is already updated
707 tdg
->map_size
= new_size
;
709 __kmp_release_bootstrap_lock(&tdg
->graph_lock
);
711 tdg
->record_map
[new_taskdata
->td_task_id
].task
= new_task
;
712 tdg
->record_map
[new_taskdata
->td_task_id
].parent_task
=
713 new_taskdata
->td_parent
;
714 KMP_ATOMIC_INC(&tdg
->num_tasks
);
718 if (ompt_enabled
.enabled
) {
719 if (!current_task
->ompt_task_info
.frame
.enter_frame
.ptr
)
720 current_task
->ompt_task_info
.frame
.enter_frame
.ptr
=
721 OMPT_GET_FRAME_ADDRESS(0);
722 if (ompt_enabled
.ompt_callback_task_create
) {
723 ompt_callbacks
.ompt_callback(ompt_callback_task_create
)(
724 &(current_task
->ompt_task_info
.task_data
),
725 &(current_task
->ompt_task_info
.frame
),
726 &(new_taskdata
->ompt_task_info
.task_data
),
727 ompt_task_explicit
| TASK_TYPE_DETAILS_FORMAT(new_taskdata
), 1,
728 OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid
));
731 new_taskdata
->ompt_task_info
.frame
.enter_frame
.ptr
=
732 OMPT_GET_FRAME_ADDRESS(0);
736 /* OMPT grab all dependences if requested by the tool */
737 if (ndeps
+ ndeps_noalias
> 0 && ompt_enabled
.ompt_callback_dependences
) {
740 int ompt_ndeps
= ndeps
+ ndeps_noalias
;
741 ompt_dependence_t
*ompt_deps
= (ompt_dependence_t
*)KMP_OMPT_DEPS_ALLOC(
742 thread
, (ndeps
+ ndeps_noalias
) * sizeof(ompt_dependence_t
));
744 KMP_ASSERT(ompt_deps
!= NULL
);
746 for (i
= 0; i
< ndeps
; i
++) {
747 ompt_deps
[i
].variable
.ptr
= (void *)dep_list
[i
].base_addr
;
748 if (dep_list
[i
].base_addr
== KMP_SIZE_T_MAX
)
749 ompt_deps
[i
].dependence_type
= ompt_dependence_type_out_all_memory
;
750 else if (dep_list
[i
].flags
.in
&& dep_list
[i
].flags
.out
)
751 ompt_deps
[i
].dependence_type
= ompt_dependence_type_inout
;
752 else if (dep_list
[i
].flags
.out
)
753 ompt_deps
[i
].dependence_type
= ompt_dependence_type_out
;
754 else if (dep_list
[i
].flags
.in
)
755 ompt_deps
[i
].dependence_type
= ompt_dependence_type_in
;
756 else if (dep_list
[i
].flags
.mtx
)
757 ompt_deps
[i
].dependence_type
= ompt_dependence_type_mutexinoutset
;
758 else if (dep_list
[i
].flags
.set
)
759 ompt_deps
[i
].dependence_type
= ompt_dependence_type_inoutset
;
760 else if (dep_list
[i
].flags
.all
)
761 ompt_deps
[i
].dependence_type
= ompt_dependence_type_out_all_memory
;
763 for (i
= 0; i
< ndeps_noalias
; i
++) {
764 ompt_deps
[ndeps
+ i
].variable
.ptr
= (void *)noalias_dep_list
[i
].base_addr
;
765 if (noalias_dep_list
[i
].base_addr
== KMP_SIZE_T_MAX
)
766 ompt_deps
[ndeps
+ i
].dependence_type
=
767 ompt_dependence_type_out_all_memory
;
768 else if (noalias_dep_list
[i
].flags
.in
&& noalias_dep_list
[i
].flags
.out
)
769 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_inout
;
770 else if (noalias_dep_list
[i
].flags
.out
)
771 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_out
;
772 else if (noalias_dep_list
[i
].flags
.in
)
773 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_in
;
774 else if (noalias_dep_list
[i
].flags
.mtx
)
775 ompt_deps
[ndeps
+ i
].dependence_type
=
776 ompt_dependence_type_mutexinoutset
;
777 else if (noalias_dep_list
[i
].flags
.set
)
778 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_inoutset
;
779 else if (noalias_dep_list
[i
].flags
.all
)
780 ompt_deps
[ndeps
+ i
].dependence_type
=
781 ompt_dependence_type_out_all_memory
;
783 ompt_callbacks
.ompt_callback(ompt_callback_dependences
)(
784 &(new_taskdata
->ompt_task_info
.task_data
), ompt_deps
, ompt_ndeps
);
785 /* We can now free the allocated memory for the dependences */
786 /* For OMPD we might want to delay the free until end of this function */
787 KMP_OMPT_DEPS_FREE(thread
, ompt_deps
);
789 #endif /* OMPT_OPTIONAL */
790 #endif /* OMPT_SUPPORT */
792 bool serial
= current_task
->td_flags
.team_serial
||
793 current_task
->td_flags
.tasking_ser
||
794 current_task
->td_flags
.final
;
795 kmp_task_team_t
*task_team
= thread
->th
.th_task_team
;
797 !(task_team
&& (task_team
->tt
.tt_found_proxy_tasks
||
798 task_team
->tt
.tt_hidden_helper_task_encountered
));
800 if (!serial
&& (ndeps
> 0 || ndeps_noalias
> 0)) {
801 /* if no dependences have been tracked yet, create the dependence hash */
802 if (current_task
->td_dephash
== NULL
)
803 current_task
->td_dephash
= __kmp_dephash_create(thread
, current_task
);
806 kmp_depnode_t
*node
=
807 (kmp_depnode_t
*)__kmp_fast_allocate(thread
, sizeof(kmp_depnode_t
));
809 kmp_depnode_t
*node
=
810 (kmp_depnode_t
*)__kmp_thread_malloc(thread
, sizeof(kmp_depnode_t
));
813 __kmp_init_node(node
);
814 new_taskdata
->td_depnode
= node
;
816 if (__kmp_check_deps(gtid
, node
, new_task
, ¤t_task
->td_dephash
,
817 NO_DEP_BARRIER
, ndeps
, dep_list
, ndeps_noalias
,
819 KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking "
821 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
822 gtid
, loc_ref
, new_taskdata
));
824 if (ompt_enabled
.enabled
) {
825 current_task
->ompt_task_info
.frame
.enter_frame
= ompt_data_none
;
828 return TASK_CURRENT_NOT_QUEUED
;
831 KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependences "
832 "for task (serialized) loc=%p task=%p\n",
833 gtid
, loc_ref
, new_taskdata
));
836 KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking "
838 "loc=%p task=%p, transferring to __kmp_omp_task\n",
839 gtid
, loc_ref
, new_taskdata
));
841 kmp_int32 ret
= __kmp_omp_task(gtid
, new_task
, true);
843 if (ompt_enabled
.enabled
) {
844 current_task
->ompt_task_info
.frame
.enter_frame
= ompt_data_none
;
851 void __ompt_taskwait_dep_finish(kmp_taskdata_t
*current_task
,
852 ompt_data_t
*taskwait_task_data
) {
853 if (ompt_enabled
.ompt_callback_task_schedule
) {
854 ompt_callbacks
.ompt_callback(ompt_callback_task_schedule
)(
855 taskwait_task_data
, ompt_taskwait_complete
, NULL
);
857 current_task
->ompt_task_info
.frame
.enter_frame
.ptr
= NULL
;
858 *taskwait_task_data
= ompt_data_none
;
860 #endif /* OMPT_SUPPORT */
864 @param loc_ref location of the original task directive
865 @param gtid Global Thread ID of encountering thread
866 @param ndeps Number of depend items with possible aliasing
867 @param dep_list List of depend items with possible aliasing
868 @param ndeps_noalias Number of depend items with no aliasing
869 @param noalias_dep_list List of depend items with no aliasing
871 Blocks the current task until all specifies dependences have been fulfilled.
873 void __kmpc_omp_wait_deps(ident_t
*loc_ref
, kmp_int32 gtid
, kmp_int32 ndeps
,
874 kmp_depend_info_t
*dep_list
, kmp_int32 ndeps_noalias
,
875 kmp_depend_info_t
*noalias_dep_list
) {
876 __kmpc_omp_taskwait_deps_51(loc_ref
, gtid
, ndeps
, dep_list
, ndeps_noalias
,
877 noalias_dep_list
, false);
880 /* __kmpc_omp_taskwait_deps_51 : Function for OpenMP 5.1 nowait clause.
881 Placeholder for taskwait with nowait clause.
882 Earlier code of __kmpc_omp_wait_deps() is now
885 void __kmpc_omp_taskwait_deps_51(ident_t
*loc_ref
, kmp_int32 gtid
,
886 kmp_int32 ndeps
, kmp_depend_info_t
*dep_list
,
887 kmp_int32 ndeps_noalias
,
888 kmp_depend_info_t
*noalias_dep_list
,
889 kmp_int32 has_no_wait
) {
890 KA_TRACE(10, ("__kmpc_omp_taskwait_deps(enter): T#%d loc=%p nowait#%d\n",
891 gtid
, loc_ref
, has_no_wait
));
892 if (ndeps
== 0 && ndeps_noalias
== 0) {
893 KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no dependences to "
894 "wait upon : loc=%p\n",
898 __kmp_assert_valid_gtid(gtid
);
899 kmp_info_t
*thread
= __kmp_threads
[gtid
];
900 kmp_taskdata_t
*current_task
= thread
->th
.th_current_task
;
903 // this function represents a taskwait construct with depend clause
904 // We signal 4 events:
905 // - creation of the taskwait task
906 // - dependences of the taskwait task
907 // - schedule and finish of the taskwait task
908 ompt_data_t
*taskwait_task_data
= &thread
->th
.ompt_thread_info
.task_data
;
909 KMP_ASSERT(taskwait_task_data
->ptr
== NULL
);
910 if (ompt_enabled
.enabled
) {
911 if (!current_task
->ompt_task_info
.frame
.enter_frame
.ptr
)
912 current_task
->ompt_task_info
.frame
.enter_frame
.ptr
=
913 OMPT_GET_FRAME_ADDRESS(0);
914 if (ompt_enabled
.ompt_callback_task_create
) {
915 ompt_callbacks
.ompt_callback(ompt_callback_task_create
)(
916 &(current_task
->ompt_task_info
.task_data
),
917 &(current_task
->ompt_task_info
.frame
), taskwait_task_data
,
918 ompt_task_taskwait
| ompt_task_undeferred
| ompt_task_mergeable
, 1,
919 OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid
));
924 /* OMPT grab all dependences if requested by the tool */
925 if (ndeps
+ ndeps_noalias
> 0 && ompt_enabled
.ompt_callback_dependences
) {
928 int ompt_ndeps
= ndeps
+ ndeps_noalias
;
929 ompt_dependence_t
*ompt_deps
= (ompt_dependence_t
*)KMP_OMPT_DEPS_ALLOC(
930 thread
, (ndeps
+ ndeps_noalias
) * sizeof(ompt_dependence_t
));
932 KMP_ASSERT(ompt_deps
!= NULL
);
934 for (i
= 0; i
< ndeps
; i
++) {
935 ompt_deps
[i
].variable
.ptr
= (void *)dep_list
[i
].base_addr
;
936 if (dep_list
[i
].flags
.in
&& dep_list
[i
].flags
.out
)
937 ompt_deps
[i
].dependence_type
= ompt_dependence_type_inout
;
938 else if (dep_list
[i
].flags
.out
)
939 ompt_deps
[i
].dependence_type
= ompt_dependence_type_out
;
940 else if (dep_list
[i
].flags
.in
)
941 ompt_deps
[i
].dependence_type
= ompt_dependence_type_in
;
942 else if (dep_list
[i
].flags
.mtx
)
943 ompt_deps
[ndeps
+ i
].dependence_type
=
944 ompt_dependence_type_mutexinoutset
;
945 else if (dep_list
[i
].flags
.set
)
946 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_inoutset
;
948 for (i
= 0; i
< ndeps_noalias
; i
++) {
949 ompt_deps
[ndeps
+ i
].variable
.ptr
= (void *)noalias_dep_list
[i
].base_addr
;
950 if (noalias_dep_list
[i
].flags
.in
&& noalias_dep_list
[i
].flags
.out
)
951 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_inout
;
952 else if (noalias_dep_list
[i
].flags
.out
)
953 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_out
;
954 else if (noalias_dep_list
[i
].flags
.in
)
955 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_in
;
956 else if (noalias_dep_list
[i
].flags
.mtx
)
957 ompt_deps
[ndeps
+ i
].dependence_type
=
958 ompt_dependence_type_mutexinoutset
;
959 else if (noalias_dep_list
[i
].flags
.set
)
960 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_inoutset
;
962 ompt_callbacks
.ompt_callback(ompt_callback_dependences
)(
963 taskwait_task_data
, ompt_deps
, ompt_ndeps
);
964 /* We can now free the allocated memory for the dependences */
965 /* For OMPD we might want to delay the free until end of this function */
966 KMP_OMPT_DEPS_FREE(thread
, ompt_deps
);
969 #endif /* OMPT_OPTIONAL */
970 #endif /* OMPT_SUPPORT */
972 // We can return immediately as:
973 // - dependences are not computed in serial teams (except with proxy tasks)
974 // - if the dephash is not yet created it means we have nothing to wait for
975 bool ignore
= current_task
->td_flags
.team_serial
||
976 current_task
->td_flags
.tasking_ser
||
977 current_task
->td_flags
.final
;
979 ignore
&& thread
->th
.th_task_team
!= NULL
&&
980 thread
->th
.th_task_team
->tt
.tt_found_proxy_tasks
== FALSE
&&
981 thread
->th
.th_task_team
->tt
.tt_hidden_helper_task_encountered
== FALSE
;
982 ignore
= ignore
|| current_task
->td_dephash
== NULL
;
985 KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking "
986 "dependences : loc=%p\n",
989 __ompt_taskwait_dep_finish(current_task
, taskwait_task_data
);
990 #endif /* OMPT_SUPPORT */
994 kmp_depnode_t node
= {0};
995 __kmp_init_node(&node
);
997 if (!__kmp_check_deps(gtid
, &node
, NULL
, ¤t_task
->td_dephash
,
998 DEP_BARRIER
, ndeps
, dep_list
, ndeps_noalias
,
1000 KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking "
1001 "dependences : loc=%p\n",
1004 __ompt_taskwait_dep_finish(current_task
, taskwait_task_data
);
1005 #endif /* OMPT_SUPPORT */
1009 int thread_finished
= FALSE
;
1010 kmp_flag_32
<false, false> flag(
1011 (std::atomic
<kmp_uint32
> *)&node
.dn
.npredecessors
, 0U);
1012 while (node
.dn
.npredecessors
> 0) {
1013 flag
.execute_tasks(thread
, gtid
, FALSE
,
1014 &thread_finished
USE_ITT_BUILD_ARG(NULL
),
1015 __kmp_task_stealing_constraint
);
1019 __ompt_taskwait_dep_finish(current_task
, taskwait_task_data
);
1020 #endif /* OMPT_SUPPORT */
1021 KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d finished waiting : loc=%p\