5 //===----------------------------------------------------------------------===//
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
13 //#define KMP_SUPPORT_GRAPH_OUTPUT 1
17 #include "kmp_wait_release.h"
18 #include "kmp_taskdeps.h"
20 #include "ompt-specific.h"
23 // TODO: Improve memory allocation? keep a list of pre-allocated structures?
24 // allocate in blocks? re-use list finished list entries?
25 // TODO: don't use atomic ref counters for stack-allocated nodes.
26 // TODO: find an alternate to atomic refs for heap-allocated nodes?
27 // TODO: Finish graph output support
28 // TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other
30 // TODO: Any ITT support needed?
32 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
33 static std::atomic
<kmp_int32
> kmp_node_id_seed
= 0;
36 static void __kmp_init_node(kmp_depnode_t
*node
) {
37 node
->dn
.successors
= NULL
;
38 node
->dn
.task
= NULL
; // will point to the right task
39 // once dependences have been processed
40 for (int i
= 0; i
< MAX_MTX_DEPS
; ++i
)
41 node
->dn
.mtx_locks
[i
] = NULL
;
42 node
->dn
.mtx_num_locks
= 0;
43 __kmp_init_lock(&node
->dn
.lock
);
44 KMP_ATOMIC_ST_RLX(&node
->dn
.nrefs
, 1); // init creates the first reference
45 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
46 node
->dn
.id
= KMP_ATOMIC_INC(&kmp_node_id_seed
);
48 #if USE_ITT_BUILD && USE_ITT_NOTIFY
49 __itt_sync_create(node
, "OMP task dep node", NULL
, 0);
53 static inline kmp_depnode_t
*__kmp_node_ref(kmp_depnode_t
*node
) {
54 KMP_ATOMIC_INC(&node
->dn
.nrefs
);
58 enum { KMP_DEPHASH_OTHER_SIZE
= 97, KMP_DEPHASH_MASTER_SIZE
= 997 };
60 size_t sizes
[] = {997, 2003, 4001, 8191, 16001, 32003, 64007, 131071, 270029};
61 const size_t MAX_GEN
= 8;
63 static inline size_t __kmp_dephash_hash(kmp_intptr_t addr
, size_t hsize
) {
64 // TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) %
66 return ((addr
>> 6) ^ (addr
>> 2)) % hsize
;
69 static kmp_dephash_t
*__kmp_dephash_extend(kmp_info_t
*thread
,
70 kmp_dephash_t
*current_dephash
) {
73 size_t gen
= current_dephash
->generation
+ 1;
75 return current_dephash
;
76 size_t new_size
= sizes
[gen
];
78 size_t size_to_allocate
=
79 new_size
* sizeof(kmp_dephash_entry_t
*) + sizeof(kmp_dephash_t
);
82 h
= (kmp_dephash_t
*)__kmp_fast_allocate(thread
, size_to_allocate
);
84 h
= (kmp_dephash_t
*)__kmp_thread_malloc(thread
, size_to_allocate
);
88 h
->nelements
= current_dephash
->nelements
;
89 h
->buckets
= (kmp_dephash_entry
**)(h
+ 1);
92 h
->last_all
= current_dephash
->last_all
;
94 // make sure buckets are properly initialized
95 for (size_t i
= 0; i
< new_size
; i
++) {
99 // insert existing elements in the new table
100 for (size_t i
= 0; i
< current_dephash
->size
; i
++) {
101 kmp_dephash_entry_t
*next
, *entry
;
102 for (entry
= current_dephash
->buckets
[i
]; entry
; entry
= next
) {
103 next
= entry
->next_in_bucket
;
104 // Compute the new hash using the new size, and insert the entry in
106 size_t new_bucket
= __kmp_dephash_hash(entry
->addr
, h
->size
);
107 entry
->next_in_bucket
= h
->buckets
[new_bucket
];
108 if (entry
->next_in_bucket
) {
111 h
->buckets
[new_bucket
] = entry
;
115 // Free old hash table
117 __kmp_fast_free(thread
, current_dephash
);
119 __kmp_thread_free(thread
, current_dephash
);
125 static kmp_dephash_t
*__kmp_dephash_create(kmp_info_t
*thread
,
126 kmp_taskdata_t
*current_task
) {
131 if (current_task
->td_flags
.tasktype
== TASK_IMPLICIT
)
132 h_size
= KMP_DEPHASH_MASTER_SIZE
;
134 h_size
= KMP_DEPHASH_OTHER_SIZE
;
136 size_t size
= h_size
* sizeof(kmp_dephash_entry_t
*) + sizeof(kmp_dephash_t
);
139 h
= (kmp_dephash_t
*)__kmp_fast_allocate(thread
, size
);
141 h
= (kmp_dephash_t
*)__kmp_thread_malloc(thread
, size
);
148 h
->buckets
= (kmp_dephash_entry
**)(h
+ 1);
151 for (size_t i
= 0; i
< h_size
; i
++)
157 static kmp_dephash_entry
*__kmp_dephash_find(kmp_info_t
*thread
,
158 kmp_dephash_t
**hash
,
160 kmp_dephash_t
*h
= *hash
;
161 if (h
->nelements
!= 0 && h
->nconflicts
/ h
->size
>= 1) {
162 *hash
= __kmp_dephash_extend(thread
, h
);
165 size_t bucket
= __kmp_dephash_hash(addr
, h
->size
);
167 kmp_dephash_entry_t
*entry
;
168 for (entry
= h
->buckets
[bucket
]; entry
; entry
= entry
->next_in_bucket
)
169 if (entry
->addr
== addr
)
173 // create entry. This is only done by one thread so no locking required
175 entry
= (kmp_dephash_entry_t
*)__kmp_fast_allocate(
176 thread
, sizeof(kmp_dephash_entry_t
));
178 entry
= (kmp_dephash_entry_t
*)__kmp_thread_malloc(
179 thread
, sizeof(kmp_dephash_entry_t
));
182 if (!h
->last_all
) // no predecessor task with omp_all_memory dependence
183 entry
->last_out
= NULL
;
184 else // else link the omp_all_memory depnode to the new entry
185 entry
->last_out
= __kmp_node_ref(h
->last_all
);
186 entry
->last_set
= NULL
;
187 entry
->prev_set
= NULL
;
188 entry
->last_flag
= 0;
189 entry
->mtx_lock
= NULL
;
190 entry
->next_in_bucket
= h
->buckets
[bucket
];
191 h
->buckets
[bucket
] = entry
;
193 if (entry
->next_in_bucket
)
199 static kmp_depnode_list_t
*__kmp_add_node(kmp_info_t
*thread
,
200 kmp_depnode_list_t
*list
,
201 kmp_depnode_t
*node
) {
202 kmp_depnode_list_t
*new_head
;
205 new_head
= (kmp_depnode_list_t
*)__kmp_fast_allocate(
206 thread
, sizeof(kmp_depnode_list_t
));
208 new_head
= (kmp_depnode_list_t
*)__kmp_thread_malloc(
209 thread
, sizeof(kmp_depnode_list_t
));
212 new_head
->node
= __kmp_node_ref(node
);
213 new_head
->next
= list
;
218 static inline void __kmp_track_dependence(kmp_int32 gtid
, kmp_depnode_t
*source
,
220 kmp_task_t
*sink_task
) {
222 kmp_taskdata_t
*task_source
= KMP_TASK_TO_TASKDATA(source
->dn
.task
);
223 kmp_taskdata_t
*task_sink
= KMP_TASK_TO_TASKDATA(sink_task
);
224 if (source
->dn
.task
&& sink_task
) {
225 // Not supporting dependency between two tasks that one is within the TDG
226 // and the other is not
227 KMP_ASSERT(task_source
->is_taskgraph
== task_sink
->is_taskgraph
);
229 if (task_sink
->is_taskgraph
&&
230 __kmp_tdg_is_recording(task_sink
->tdg
->tdg_status
)) {
231 kmp_node_info_t
*source_info
=
232 &task_sink
->tdg
->record_map
[task_source
->td_task_id
];
234 for (int i
= 0; i
< source_info
->nsuccessors
; i
++) {
235 if (source_info
->successors
[i
] == task_sink
->td_task_id
) {
241 if (source_info
->nsuccessors
>= source_info
->successors_size
) {
242 source_info
->successors_size
= 2 * source_info
->successors_size
;
243 kmp_int32
*old_succ_ids
= source_info
->successors
;
244 kmp_int32
*new_succ_ids
= (kmp_int32
*)__kmp_allocate(
245 source_info
->successors_size
* sizeof(kmp_int32
));
246 source_info
->successors
= new_succ_ids
;
247 __kmp_free(old_succ_ids
);
250 source_info
->successors
[source_info
->nsuccessors
] = task_sink
->td_task_id
;
251 source_info
->nsuccessors
++;
253 kmp_node_info_t
*sink_info
=
254 &(task_sink
->tdg
->record_map
[task_sink
->td_task_id
]);
255 sink_info
->npredecessors
++;
259 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
260 kmp_taskdata_t
*task_source
= KMP_TASK_TO_TASKDATA(source
->dn
.task
);
261 // do not use sink->dn.task as that is only filled after the dependences
262 // are already processed!
263 kmp_taskdata_t
*task_sink
= KMP_TASK_TO_TASKDATA(sink_task
);
265 __kmp_printf("%d(%s) -> %d(%s)\n", source
->dn
.id
,
266 task_source
->td_ident
->psource
, sink
->dn
.id
,
267 task_sink
->td_ident
->psource
);
269 #if OMPT_SUPPORT && OMPT_OPTIONAL
270 /* OMPT tracks dependences between task (a=source, b=sink) in which
271 task a blocks the execution of b through the ompt_new_dependence_callback
273 if (ompt_enabled
.ompt_callback_task_dependence
) {
274 kmp_taskdata_t
*task_source
= KMP_TASK_TO_TASKDATA(source
->dn
.task
);
275 ompt_data_t
*sink_data
;
277 sink_data
= &(KMP_TASK_TO_TASKDATA(sink_task
)->ompt_task_info
.task_data
);
279 sink_data
= &__kmp_threads
[gtid
]->th
.ompt_thread_info
.task_data
;
281 ompt_callbacks
.ompt_callback(ompt_callback_task_dependence
)(
282 &(task_source
->ompt_task_info
.task_data
), sink_data
);
284 #endif /* OMPT_SUPPORT && OMPT_OPTIONAL */
287 kmp_base_depnode_t
*__kmpc_task_get_depnode(kmp_task_t
*task
) {
288 kmp_taskdata_t
*td
= KMP_TASK_TO_TASKDATA(task
);
289 return td
->td_depnode
? &(td
->td_depnode
->dn
) : NULL
;
292 kmp_depnode_list_t
*__kmpc_task_get_successors(kmp_task_t
*task
) {
293 kmp_taskdata_t
*td
= KMP_TASK_TO_TASKDATA(task
);
294 return td
->td_depnode
->dn
.successors
;
297 static inline kmp_int32
298 __kmp_depnode_link_successor(kmp_int32 gtid
, kmp_info_t
*thread
,
299 kmp_task_t
*task
, kmp_depnode_t
*node
,
300 kmp_depnode_list_t
*plist
) {
303 kmp_int32 npredecessors
= 0;
304 // link node as successor of list elements
305 for (kmp_depnode_list_t
*p
= plist
; p
; p
= p
->next
) {
306 kmp_depnode_t
*dep
= p
->node
;
308 kmp_tdg_status tdg_status
= KMP_TDG_NONE
;
310 kmp_taskdata_t
*td
= KMP_TASK_TO_TASKDATA(task
);
311 if (td
->is_taskgraph
)
312 tdg_status
= KMP_TASK_TO_TASKDATA(task
)->tdg
->tdg_status
;
313 if (__kmp_tdg_is_recording(tdg_status
))
314 __kmp_track_dependence(gtid
, dep
, node
, task
);
318 KMP_ACQUIRE_DEPNODE(gtid
, dep
);
320 if (!dep
->dn
.successors
|| dep
->dn
.successors
->node
!= node
) {
322 if (!(__kmp_tdg_is_recording(tdg_status
)) && task
)
324 __kmp_track_dependence(gtid
, dep
, node
, task
);
325 dep
->dn
.successors
= __kmp_add_node(thread
, dep
->dn
.successors
, node
);
326 KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
328 gtid
, KMP_TASK_TO_TASKDATA(dep
->dn
.task
),
329 KMP_TASK_TO_TASKDATA(task
)));
333 KMP_RELEASE_DEPNODE(gtid
, dep
);
336 return npredecessors
;
339 // Add the edge 'sink' -> 'source' in the task dependency graph
340 static inline kmp_int32
__kmp_depnode_link_successor(kmp_int32 gtid
,
343 kmp_depnode_t
*source
,
344 kmp_depnode_t
*sink
) {
347 kmp_int32 npredecessors
= 0;
349 kmp_tdg_status tdg_status
= KMP_TDG_NONE
;
350 kmp_taskdata_t
*td
= KMP_TASK_TO_TASKDATA(task
);
352 if (td
->is_taskgraph
)
353 tdg_status
= KMP_TASK_TO_TASKDATA(task
)->tdg
->tdg_status
;
354 if (__kmp_tdg_is_recording(tdg_status
) && sink
->dn
.task
)
355 __kmp_track_dependence(gtid
, sink
, source
, task
);
359 // synchronously add source to sink' list of successors
360 KMP_ACQUIRE_DEPNODE(gtid
, sink
);
362 if (!sink
->dn
.successors
|| sink
->dn
.successors
->node
!= source
) {
364 if (!(__kmp_tdg_is_recording(tdg_status
)) && task
)
366 __kmp_track_dependence(gtid
, sink
, source
, task
);
367 sink
->dn
.successors
= __kmp_add_node(thread
, sink
->dn
.successors
, source
);
368 KA_TRACE(40, ("__kmp_process_deps: T#%d adding dependence from %p to "
370 gtid
, KMP_TASK_TO_TASKDATA(sink
->dn
.task
),
371 KMP_TASK_TO_TASKDATA(task
)));
373 if (__kmp_tdg_is_recording(tdg_status
)) {
374 kmp_taskdata_t
*tdd
= KMP_TASK_TO_TASKDATA(sink
->dn
.task
);
375 if (tdd
->is_taskgraph
) {
376 if (tdd
->td_flags
.onced
)
377 // decrement npredecessors if sink->dn.task belongs to a taskgraph
379 // 1) the task is reset to its initial state (by kmp_free_task) or
380 // 2) the task is complete but not yet reset
388 KMP_RELEASE_DEPNODE(gtid
, sink
);
390 return npredecessors
;
393 static inline kmp_int32
394 __kmp_process_dep_all(kmp_int32 gtid
, kmp_depnode_t
*node
, kmp_dephash_t
*h
,
395 bool dep_barrier
, kmp_task_t
*task
) {
396 KA_TRACE(30, ("__kmp_process_dep_all: T#%d processing dep_all, "
397 "dep_barrier = %d\n",
399 kmp_info_t
*thread
= __kmp_threads
[gtid
];
400 kmp_int32 npredecessors
= 0;
402 // process previous omp_all_memory node if any
404 __kmp_depnode_link_successor(gtid
, thread
, task
, node
, h
->last_all
);
405 __kmp_node_deref(thread
, h
->last_all
);
407 h
->last_all
= __kmp_node_ref(node
);
409 // if this is a sync point in the serial sequence, then the previous
410 // outputs are guaranteed to be completed after the execution of this
411 // task so the previous output nodes can be cleared.
415 // process all regular dependences
416 for (size_t i
= 0; i
< h
->size
; i
++) {
417 kmp_dephash_entry_t
*info
= h
->buckets
[i
];
418 if (!info
) // skip empty slots in dephash
420 for (; info
; info
= info
->next_in_bucket
) {
421 // for each entry the omp_all_memory works as OUT dependence
422 kmp_depnode_t
*last_out
= info
->last_out
;
423 kmp_depnode_list_t
*last_set
= info
->last_set
;
424 kmp_depnode_list_t
*prev_set
= info
->prev_set
;
427 __kmp_depnode_link_successor(gtid
, thread
, task
, node
, last_set
);
428 __kmp_depnode_list_free(thread
, last_set
);
429 __kmp_depnode_list_free(thread
, prev_set
);
430 info
->last_set
= NULL
;
431 info
->prev_set
= NULL
;
432 info
->last_flag
= 0; // no sets in this dephash entry
435 __kmp_depnode_link_successor(gtid
, thread
, task
, node
, last_out
);
437 __kmp_node_deref(thread
, last_out
);
439 info
->last_out
= __kmp_node_ref(node
);
441 info
->last_out
= NULL
;
445 KA_TRACE(30, ("__kmp_process_dep_all: T#%d found %d predecessors\n", gtid
,
447 return npredecessors
;
450 template <bool filter
>
451 static inline kmp_int32
452 __kmp_process_deps(kmp_int32 gtid
, kmp_depnode_t
*node
, kmp_dephash_t
**hash
,
453 bool dep_barrier
, kmp_int32 ndeps
,
454 kmp_depend_info_t
*dep_list
, kmp_task_t
*task
) {
455 KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependences : "
456 "dep_barrier = %d\n",
457 filter
, gtid
, ndeps
, dep_barrier
));
459 kmp_info_t
*thread
= __kmp_threads
[gtid
];
460 kmp_int32 npredecessors
= 0;
461 for (kmp_int32 i
= 0; i
< ndeps
; i
++) {
462 const kmp_depend_info_t
*dep
= &dep_list
[i
];
464 if (filter
&& dep
->base_addr
== 0)
465 continue; // skip filtered entries
467 kmp_dephash_entry_t
*info
=
468 __kmp_dephash_find(thread
, hash
, dep
->base_addr
);
469 kmp_depnode_t
*last_out
= info
->last_out
;
470 kmp_depnode_list_t
*last_set
= info
->last_set
;
471 kmp_depnode_list_t
*prev_set
= info
->prev_set
;
473 if (dep
->flags
.out
) { // out or inout --> clean lists if any
476 __kmp_depnode_link_successor(gtid
, thread
, task
, node
, last_set
);
477 __kmp_depnode_list_free(thread
, last_set
);
478 __kmp_depnode_list_free(thread
, prev_set
);
479 info
->last_set
= NULL
;
480 info
->prev_set
= NULL
;
481 info
->last_flag
= 0; // no sets in this dephash entry
484 __kmp_depnode_link_successor(gtid
, thread
, task
, node
, last_out
);
486 __kmp_node_deref(thread
, last_out
);
488 info
->last_out
= __kmp_node_ref(node
);
490 // if this is a sync point in the serial sequence, then the previous
491 // outputs are guaranteed to be completed after the execution of this
492 // task so the previous output nodes can be cleared.
493 info
->last_out
= NULL
;
495 } else { // either IN or MTX or SET
496 if (info
->last_flag
== 0 || info
->last_flag
== dep
->flag
) {
497 // last_set either didn't exist or of same dep kind
498 // link node as successor of the last_out if any
500 __kmp_depnode_link_successor(gtid
, thread
, task
, node
, last_out
);
501 // link node as successor of all nodes in the prev_set if any
503 __kmp_depnode_link_successor(gtid
, thread
, task
, node
, prev_set
);
505 // clean last_out and prev_set if any; don't touch last_set
506 __kmp_node_deref(thread
, last_out
);
507 info
->last_out
= NULL
;
508 __kmp_depnode_list_free(thread
, prev_set
);
509 info
->prev_set
= NULL
;
511 } else { // last_set is of different dep kind, make it prev_set
512 // link node as successor of all nodes in the last_set
514 __kmp_depnode_link_successor(gtid
, thread
, task
, node
, last_set
);
515 // clean last_out if any
516 __kmp_node_deref(thread
, last_out
);
517 info
->last_out
= NULL
;
518 // clean prev_set if any
519 __kmp_depnode_list_free(thread
, prev_set
);
521 // move last_set to prev_set, new last_set will be allocated
522 info
->prev_set
= last_set
;
524 info
->prev_set
= NULL
;
527 info
->last_set
= NULL
;
529 // for dep_barrier last_flag value should remain:
530 // 0 if last_set is empty, unchanged otherwise
532 info
->last_flag
= dep
->flag
; // store dep kind of the last_set
533 info
->last_set
= __kmp_add_node(thread
, info
->last_set
, node
);
535 // check if we are processing MTX dependency
536 if (dep
->flag
== KMP_DEP_MTX
) {
537 if (info
->mtx_lock
== NULL
) {
538 info
->mtx_lock
= (kmp_lock_t
*)__kmp_allocate(sizeof(kmp_lock_t
));
539 __kmp_init_lock(info
->mtx_lock
);
541 KMP_DEBUG_ASSERT(node
->dn
.mtx_num_locks
< MAX_MTX_DEPS
);
543 // Save lock in node's array
544 for (m
= 0; m
< MAX_MTX_DEPS
; ++m
) {
545 // sort pointers in decreasing order to avoid potential livelock
546 if (node
->dn
.mtx_locks
[m
] < info
->mtx_lock
) {
547 KMP_DEBUG_ASSERT(!node
->dn
.mtx_locks
[node
->dn
.mtx_num_locks
]);
548 for (int n
= node
->dn
.mtx_num_locks
; n
> m
; --n
) {
549 // shift right all lesser non-NULL pointers
550 KMP_DEBUG_ASSERT(node
->dn
.mtx_locks
[n
- 1] != NULL
);
551 node
->dn
.mtx_locks
[n
] = node
->dn
.mtx_locks
[n
- 1];
553 node
->dn
.mtx_locks
[m
] = info
->mtx_lock
;
557 KMP_DEBUG_ASSERT(m
< MAX_MTX_DEPS
); // must break from loop
558 node
->dn
.mtx_num_locks
++;
562 KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter
,
563 gtid
, npredecessors
));
564 return npredecessors
;
567 #define NO_DEP_BARRIER (false)
568 #define DEP_BARRIER (true)
570 // returns true if the task has any outstanding dependence
571 static bool __kmp_check_deps(kmp_int32 gtid
, kmp_depnode_t
*node
,
572 kmp_task_t
*task
, kmp_dephash_t
**hash
,
573 bool dep_barrier
, kmp_int32 ndeps
,
574 kmp_depend_info_t
*dep_list
,
575 kmp_int32 ndeps_noalias
,
576 kmp_depend_info_t
*noalias_dep_list
) {
577 int i
, n_mtxs
= 0, dep_all
= 0;
579 kmp_taskdata_t
*taskdata
= KMP_TASK_TO_TASKDATA(task
);
581 KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependences for task %p : %d "
582 "possibly aliased dependences, %d non-aliased dependences : "
583 "dep_barrier=%d .\n",
584 gtid
, taskdata
, ndeps
, ndeps_noalias
, dep_barrier
));
586 // Filter deps in dep_list
587 // TODO: Different algorithm for large dep_list ( > 10 ? )
588 for (i
= 0; i
< ndeps
; i
++) {
589 if (dep_list
[i
].base_addr
!= 0 &&
590 dep_list
[i
].base_addr
!= (kmp_intptr_t
)KMP_SIZE_T_MAX
) {
592 dep_list
[i
].flag
== KMP_DEP_IN
|| dep_list
[i
].flag
== KMP_DEP_OUT
||
593 dep_list
[i
].flag
== KMP_DEP_INOUT
||
594 dep_list
[i
].flag
== KMP_DEP_MTX
|| dep_list
[i
].flag
== KMP_DEP_SET
);
595 for (int j
= i
+ 1; j
< ndeps
; j
++) {
596 if (dep_list
[i
].base_addr
== dep_list
[j
].base_addr
) {
597 if (dep_list
[i
].flag
!= dep_list
[j
].flag
) {
598 // two different dependences on same address work identical to OUT
599 dep_list
[i
].flag
= KMP_DEP_OUT
;
601 dep_list
[j
].base_addr
= 0; // Mark j element as void
604 if (dep_list
[i
].flag
== KMP_DEP_MTX
) {
605 // limit number of mtx deps to MAX_MTX_DEPS per node
606 if (n_mtxs
< MAX_MTX_DEPS
&& task
!= NULL
) {
609 dep_list
[i
].flag
= KMP_DEP_OUT
; // downgrade mutexinoutset to inout
612 } else if (dep_list
[i
].flag
== KMP_DEP_ALL
||
613 dep_list
[i
].base_addr
== (kmp_intptr_t
)KMP_SIZE_T_MAX
) {
614 // omp_all_memory dependence can be marked by compiler by either
615 // (addr=0 && flag=0x80) (flag KMP_DEP_ALL), or (addr=-1).
616 // omp_all_memory overrides all other dependences if any
622 // doesn't need to be atomic as no other thread is going to be accessing this
624 // npredecessors is set -1 to ensure that none of the releasing tasks queues
625 // this task before we have finished processing all the dependences
626 node
->dn
.npredecessors
= -1;
628 // used to pack all npredecessors additions into a single atomic operation at
632 if (!dep_all
) { // regular dependences
633 npredecessors
= __kmp_process_deps
<true>(gtid
, node
, hash
, dep_barrier
,
634 ndeps
, dep_list
, task
);
635 npredecessors
+= __kmp_process_deps
<false>(
636 gtid
, node
, hash
, dep_barrier
, ndeps_noalias
, noalias_dep_list
, task
);
637 } else { // omp_all_memory dependence
638 npredecessors
= __kmp_process_dep_all(gtid
, node
, *hash
, dep_barrier
, task
);
641 node
->dn
.task
= task
;
644 // Account for our initial fake value
647 // Update predecessors and obtain current value to check if there are still
648 // any outstanding dependences (some tasks may have finished while we
649 // processed the dependences)
651 node
->dn
.npredecessors
.fetch_add(npredecessors
) + npredecessors
;
653 KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n",
654 gtid
, npredecessors
, taskdata
));
656 // beyond this point the task could be queued (and executed) by a releasing
658 return npredecessors
> 0 ? true : false;
663 @param loc_ref location of the original task directive
664 @param gtid Global Thread ID of encountering thread
665 @param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new
667 @param ndeps Number of depend items with possible aliasing
668 @param dep_list List of depend items with possible aliasing
669 @param ndeps_noalias Number of depend items with no aliasing
670 @param noalias_dep_list List of depend items with no aliasing
672 @return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not
673 suspended and queued, or TASK_CURRENT_QUEUED if it was suspended and queued
675 Schedule a non-thread-switchable task with dependences for execution
677 kmp_int32
__kmpc_omp_task_with_deps(ident_t
*loc_ref
, kmp_int32 gtid
,
678 kmp_task_t
*new_task
, kmp_int32 ndeps
,
679 kmp_depend_info_t
*dep_list
,
680 kmp_int32 ndeps_noalias
,
681 kmp_depend_info_t
*noalias_dep_list
) {
683 kmp_taskdata_t
*new_taskdata
= KMP_TASK_TO_TASKDATA(new_task
);
684 KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid
,
685 loc_ref
, new_taskdata
));
686 __kmp_assert_valid_gtid(gtid
);
687 kmp_info_t
*thread
= __kmp_threads
[gtid
];
688 kmp_taskdata_t
*current_task
= thread
->th
.th_current_task
;
691 // record TDG with deps
692 if (new_taskdata
->is_taskgraph
&&
693 __kmp_tdg_is_recording(new_taskdata
->tdg
->tdg_status
)) {
694 kmp_tdg_info_t
*tdg
= new_taskdata
->tdg
;
695 // extend record_map if needed
696 if (new_taskdata
->td_task_id
>= tdg
->map_size
) {
697 __kmp_acquire_bootstrap_lock(&tdg
->graph_lock
);
698 if (new_taskdata
->td_task_id
>= tdg
->map_size
) {
699 kmp_uint old_size
= tdg
->map_size
;
700 kmp_uint new_size
= old_size
* 2;
701 kmp_node_info_t
*old_record
= tdg
->record_map
;
702 kmp_node_info_t
*new_record
= (kmp_node_info_t
*)__kmp_allocate(
703 new_size
* sizeof(kmp_node_info_t
));
704 KMP_MEMCPY(new_record
, tdg
->record_map
,
705 old_size
* sizeof(kmp_node_info_t
));
706 tdg
->record_map
= new_record
;
708 __kmp_free(old_record
);
710 for (kmp_int i
= old_size
; i
< new_size
; i
++) {
711 kmp_int32
*successorsList
= (kmp_int32
*)__kmp_allocate(
712 __kmp_successors_size
* sizeof(kmp_int32
));
713 new_record
[i
].task
= nullptr;
714 new_record
[i
].successors
= successorsList
;
715 new_record
[i
].nsuccessors
= 0;
716 new_record
[i
].npredecessors
= 0;
717 new_record
[i
].successors_size
= __kmp_successors_size
;
718 KMP_ATOMIC_ST_REL(&new_record
[i
].npredecessors_counter
, 0);
720 // update the size at the end, so that we avoid other
721 // threads use old_record while map_size is already updated
722 tdg
->map_size
= new_size
;
724 __kmp_release_bootstrap_lock(&tdg
->graph_lock
);
726 tdg
->record_map
[new_taskdata
->td_task_id
].task
= new_task
;
727 tdg
->record_map
[new_taskdata
->td_task_id
].parent_task
=
728 new_taskdata
->td_parent
;
729 KMP_ATOMIC_INC(&tdg
->num_tasks
);
733 if (ompt_enabled
.enabled
) {
734 if (!current_task
->ompt_task_info
.frame
.enter_frame
.ptr
)
735 current_task
->ompt_task_info
.frame
.enter_frame
.ptr
=
736 OMPT_GET_FRAME_ADDRESS(0);
737 if (ompt_enabled
.ompt_callback_task_create
) {
738 ompt_callbacks
.ompt_callback(ompt_callback_task_create
)(
739 &(current_task
->ompt_task_info
.task_data
),
740 &(current_task
->ompt_task_info
.frame
),
741 &(new_taskdata
->ompt_task_info
.task_data
),
742 TASK_TYPE_DETAILS_FORMAT(new_taskdata
), 1,
743 OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid
));
746 new_taskdata
->ompt_task_info
.frame
.enter_frame
.ptr
=
747 OMPT_GET_FRAME_ADDRESS(0);
751 /* OMPT grab all dependences if requested by the tool */
752 if (ndeps
+ ndeps_noalias
> 0 && ompt_enabled
.ompt_callback_dependences
) {
755 int ompt_ndeps
= ndeps
+ ndeps_noalias
;
756 ompt_dependence_t
*ompt_deps
= (ompt_dependence_t
*)KMP_OMPT_DEPS_ALLOC(
757 thread
, (ndeps
+ ndeps_noalias
) * sizeof(ompt_dependence_t
));
759 KMP_ASSERT(ompt_deps
!= NULL
);
761 for (i
= 0; i
< ndeps
; i
++) {
762 ompt_deps
[i
].variable
.ptr
= (void *)dep_list
[i
].base_addr
;
763 if (dep_list
[i
].base_addr
== KMP_SIZE_T_MAX
)
764 ompt_deps
[i
].dependence_type
= ompt_dependence_type_out_all_memory
;
765 else if (dep_list
[i
].flags
.in
&& dep_list
[i
].flags
.out
)
766 ompt_deps
[i
].dependence_type
= ompt_dependence_type_inout
;
767 else if (dep_list
[i
].flags
.out
)
768 ompt_deps
[i
].dependence_type
= ompt_dependence_type_out
;
769 else if (dep_list
[i
].flags
.in
)
770 ompt_deps
[i
].dependence_type
= ompt_dependence_type_in
;
771 else if (dep_list
[i
].flags
.mtx
)
772 ompt_deps
[i
].dependence_type
= ompt_dependence_type_mutexinoutset
;
773 else if (dep_list
[i
].flags
.set
)
774 ompt_deps
[i
].dependence_type
= ompt_dependence_type_inoutset
;
775 else if (dep_list
[i
].flags
.all
)
776 ompt_deps
[i
].dependence_type
= ompt_dependence_type_out_all_memory
;
778 for (i
= 0; i
< ndeps_noalias
; i
++) {
779 ompt_deps
[ndeps
+ i
].variable
.ptr
= (void *)noalias_dep_list
[i
].base_addr
;
780 if (noalias_dep_list
[i
].base_addr
== KMP_SIZE_T_MAX
)
781 ompt_deps
[ndeps
+ i
].dependence_type
=
782 ompt_dependence_type_out_all_memory
;
783 else if (noalias_dep_list
[i
].flags
.in
&& noalias_dep_list
[i
].flags
.out
)
784 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_inout
;
785 else if (noalias_dep_list
[i
].flags
.out
)
786 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_out
;
787 else if (noalias_dep_list
[i
].flags
.in
)
788 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_in
;
789 else if (noalias_dep_list
[i
].flags
.mtx
)
790 ompt_deps
[ndeps
+ i
].dependence_type
=
791 ompt_dependence_type_mutexinoutset
;
792 else if (noalias_dep_list
[i
].flags
.set
)
793 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_inoutset
;
794 else if (noalias_dep_list
[i
].flags
.all
)
795 ompt_deps
[ndeps
+ i
].dependence_type
=
796 ompt_dependence_type_out_all_memory
;
798 ompt_callbacks
.ompt_callback(ompt_callback_dependences
)(
799 &(new_taskdata
->ompt_task_info
.task_data
), ompt_deps
, ompt_ndeps
);
800 /* We can now free the allocated memory for the dependences */
801 /* For OMPD we might want to delay the free until end of this function */
802 KMP_OMPT_DEPS_FREE(thread
, ompt_deps
);
804 #endif /* OMPT_OPTIONAL */
805 #endif /* OMPT_SUPPORT */
807 bool serial
= current_task
->td_flags
.team_serial
||
808 current_task
->td_flags
.tasking_ser
||
809 current_task
->td_flags
.final
;
810 kmp_task_team_t
*task_team
= thread
->th
.th_task_team
;
812 !(task_team
&& (task_team
->tt
.tt_found_proxy_tasks
||
813 task_team
->tt
.tt_hidden_helper_task_encountered
));
815 if (!serial
&& (ndeps
> 0 || ndeps_noalias
> 0)) {
816 /* if no dependences have been tracked yet, create the dependence hash */
817 if (current_task
->td_dephash
== NULL
)
818 current_task
->td_dephash
= __kmp_dephash_create(thread
, current_task
);
821 kmp_depnode_t
*node
=
822 (kmp_depnode_t
*)__kmp_fast_allocate(thread
, sizeof(kmp_depnode_t
));
824 kmp_depnode_t
*node
=
825 (kmp_depnode_t
*)__kmp_thread_malloc(thread
, sizeof(kmp_depnode_t
));
828 __kmp_init_node(node
);
829 new_taskdata
->td_depnode
= node
;
831 if (__kmp_check_deps(gtid
, node
, new_task
, ¤t_task
->td_dephash
,
832 NO_DEP_BARRIER
, ndeps
, dep_list
, ndeps_noalias
,
834 KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking "
836 "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n",
837 gtid
, loc_ref
, new_taskdata
));
839 if (ompt_enabled
.enabled
) {
840 current_task
->ompt_task_info
.frame
.enter_frame
= ompt_data_none
;
843 return TASK_CURRENT_NOT_QUEUED
;
846 KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependences "
847 "for task (serialized) loc=%p task=%p\n",
848 gtid
, loc_ref
, new_taskdata
));
851 KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking "
853 "loc=%p task=%p, transferring to __kmp_omp_task\n",
854 gtid
, loc_ref
, new_taskdata
));
856 kmp_int32 ret
= __kmp_omp_task(gtid
, new_task
, true);
858 if (ompt_enabled
.enabled
) {
859 current_task
->ompt_task_info
.frame
.enter_frame
= ompt_data_none
;
866 void __ompt_taskwait_dep_finish(kmp_taskdata_t
*current_task
,
867 ompt_data_t
*taskwait_task_data
) {
868 if (ompt_enabled
.ompt_callback_task_schedule
) {
869 ompt_callbacks
.ompt_callback(ompt_callback_task_schedule
)(
870 taskwait_task_data
, ompt_taskwait_complete
, NULL
);
872 current_task
->ompt_task_info
.frame
.enter_frame
.ptr
= NULL
;
873 *taskwait_task_data
= ompt_data_none
;
875 #endif /* OMPT_SUPPORT */
879 @param loc_ref location of the original task directive
880 @param gtid Global Thread ID of encountering thread
881 @param ndeps Number of depend items with possible aliasing
882 @param dep_list List of depend items with possible aliasing
883 @param ndeps_noalias Number of depend items with no aliasing
884 @param noalias_dep_list List of depend items with no aliasing
886 Blocks the current task until all specifies dependences have been fulfilled.
888 void __kmpc_omp_wait_deps(ident_t
*loc_ref
, kmp_int32 gtid
, kmp_int32 ndeps
,
889 kmp_depend_info_t
*dep_list
, kmp_int32 ndeps_noalias
,
890 kmp_depend_info_t
*noalias_dep_list
) {
891 __kmpc_omp_taskwait_deps_51(loc_ref
, gtid
, ndeps
, dep_list
, ndeps_noalias
,
892 noalias_dep_list
, false);
895 /* __kmpc_omp_taskwait_deps_51 : Function for OpenMP 5.1 nowait clause.
896 Placeholder for taskwait with nowait clause.
897 Earlier code of __kmpc_omp_wait_deps() is now
900 void __kmpc_omp_taskwait_deps_51(ident_t
*loc_ref
, kmp_int32 gtid
,
901 kmp_int32 ndeps
, kmp_depend_info_t
*dep_list
,
902 kmp_int32 ndeps_noalias
,
903 kmp_depend_info_t
*noalias_dep_list
,
904 kmp_int32 has_no_wait
) {
905 KA_TRACE(10, ("__kmpc_omp_taskwait_deps(enter): T#%d loc=%p nowait#%d\n",
906 gtid
, loc_ref
, has_no_wait
));
907 if (ndeps
== 0 && ndeps_noalias
== 0) {
908 KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no dependences to "
909 "wait upon : loc=%p\n",
913 __kmp_assert_valid_gtid(gtid
);
914 kmp_info_t
*thread
= __kmp_threads
[gtid
];
915 kmp_taskdata_t
*current_task
= thread
->th
.th_current_task
;
918 // this function represents a taskwait construct with depend clause
919 // We signal 4 events:
920 // - creation of the taskwait task
921 // - dependences of the taskwait task
922 // - schedule and finish of the taskwait task
923 ompt_data_t
*taskwait_task_data
= &thread
->th
.ompt_thread_info
.task_data
;
924 KMP_ASSERT(taskwait_task_data
->ptr
== NULL
);
925 if (ompt_enabled
.enabled
) {
926 if (!current_task
->ompt_task_info
.frame
.enter_frame
.ptr
)
927 current_task
->ompt_task_info
.frame
.enter_frame
.ptr
=
928 OMPT_GET_FRAME_ADDRESS(0);
929 if (ompt_enabled
.ompt_callback_task_create
) {
930 ompt_callbacks
.ompt_callback(ompt_callback_task_create
)(
931 &(current_task
->ompt_task_info
.task_data
),
932 &(current_task
->ompt_task_info
.frame
), taskwait_task_data
,
933 ompt_task_taskwait
| ompt_task_undeferred
| ompt_task_mergeable
, 1,
934 OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid
));
939 /* OMPT grab all dependences if requested by the tool */
940 if (ndeps
+ ndeps_noalias
> 0 && ompt_enabled
.ompt_callback_dependences
) {
943 int ompt_ndeps
= ndeps
+ ndeps_noalias
;
944 ompt_dependence_t
*ompt_deps
= (ompt_dependence_t
*)KMP_OMPT_DEPS_ALLOC(
945 thread
, (ndeps
+ ndeps_noalias
) * sizeof(ompt_dependence_t
));
947 KMP_ASSERT(ompt_deps
!= NULL
);
949 for (i
= 0; i
< ndeps
; i
++) {
950 ompt_deps
[i
].variable
.ptr
= (void *)dep_list
[i
].base_addr
;
951 if (dep_list
[i
].flags
.in
&& dep_list
[i
].flags
.out
)
952 ompt_deps
[i
].dependence_type
= ompt_dependence_type_inout
;
953 else if (dep_list
[i
].flags
.out
)
954 ompt_deps
[i
].dependence_type
= ompt_dependence_type_out
;
955 else if (dep_list
[i
].flags
.in
)
956 ompt_deps
[i
].dependence_type
= ompt_dependence_type_in
;
957 else if (dep_list
[i
].flags
.mtx
)
958 ompt_deps
[ndeps
+ i
].dependence_type
=
959 ompt_dependence_type_mutexinoutset
;
960 else if (dep_list
[i
].flags
.set
)
961 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_inoutset
;
963 for (i
= 0; i
< ndeps_noalias
; i
++) {
964 ompt_deps
[ndeps
+ i
].variable
.ptr
= (void *)noalias_dep_list
[i
].base_addr
;
965 if (noalias_dep_list
[i
].flags
.in
&& noalias_dep_list
[i
].flags
.out
)
966 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_inout
;
967 else if (noalias_dep_list
[i
].flags
.out
)
968 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_out
;
969 else if (noalias_dep_list
[i
].flags
.in
)
970 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_in
;
971 else if (noalias_dep_list
[i
].flags
.mtx
)
972 ompt_deps
[ndeps
+ i
].dependence_type
=
973 ompt_dependence_type_mutexinoutset
;
974 else if (noalias_dep_list
[i
].flags
.set
)
975 ompt_deps
[ndeps
+ i
].dependence_type
= ompt_dependence_type_inoutset
;
977 ompt_callbacks
.ompt_callback(ompt_callback_dependences
)(
978 taskwait_task_data
, ompt_deps
, ompt_ndeps
);
979 /* We can now free the allocated memory for the dependences */
980 /* For OMPD we might want to delay the free until end of this function */
981 KMP_OMPT_DEPS_FREE(thread
, ompt_deps
);
984 #endif /* OMPT_OPTIONAL */
985 #endif /* OMPT_SUPPORT */
987 // We can return immediately as:
988 // - dependences are not computed in serial teams (except with proxy tasks)
989 // - if the dephash is not yet created it means we have nothing to wait for
990 bool ignore
= current_task
->td_flags
.team_serial
||
991 current_task
->td_flags
.tasking_ser
||
992 current_task
->td_flags
.final
;
994 ignore
&& thread
->th
.th_task_team
!= NULL
&&
995 thread
->th
.th_task_team
->tt
.tt_found_proxy_tasks
== FALSE
&&
996 thread
->th
.th_task_team
->tt
.tt_hidden_helper_task_encountered
== FALSE
;
997 ignore
= ignore
|| current_task
->td_dephash
== NULL
;
1000 KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking "
1001 "dependences : loc=%p\n",
1004 __ompt_taskwait_dep_finish(current_task
, taskwait_task_data
);
1005 #endif /* OMPT_SUPPORT */
1009 kmp_depnode_t node
= {0};
1010 __kmp_init_node(&node
);
1012 if (!__kmp_check_deps(gtid
, &node
, NULL
, ¤t_task
->td_dephash
,
1013 DEP_BARRIER
, ndeps
, dep_list
, ndeps_noalias
,
1014 noalias_dep_list
)) {
1015 KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d has no blocking "
1016 "dependences : loc=%p\n",
1019 __ompt_taskwait_dep_finish(current_task
, taskwait_task_data
);
1020 #endif /* OMPT_SUPPORT */
1024 int thread_finished
= FALSE
;
1025 kmp_flag_32
<false, false> flag(
1026 (std::atomic
<kmp_uint32
> *)&node
.dn
.npredecessors
, 0U);
1027 while (node
.dn
.npredecessors
> 0) {
1028 flag
.execute_tasks(thread
, gtid
, FALSE
,
1029 &thread_finished
USE_ITT_BUILD_ARG(NULL
),
1030 __kmp_task_stealing_constraint
);
1033 // Wait until the last __kmp_release_deps is finished before we free the
1034 // current stack frame holding the "node" variable; once its nrefs count
1035 // reaches 1, we're sure nobody else can try to reference it again.
1036 while (node
.dn
.nrefs
> 1)
1040 __ompt_taskwait_dep_finish(current_task
, taskwait_task_data
);
1041 #endif /* OMPT_SUPPORT */
1042 KA_TRACE(10, ("__kmpc_omp_taskwait_deps(exit): T#%d finished waiting : loc=%p\