3 * kmp_itt.inl -- Inline functions of ITT Notify.
6 //===----------------------------------------------------------------------===//
8 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9 // See https://llvm.org/LICENSE.txt for license information.
10 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
12 //===----------------------------------------------------------------------===//
14 // Inline function definitions. This file should be included into kmp_itt.h file
15 // for production build (to let compiler inline functions) or into kmp_itt.c
16 // file for debug build (to reduce the number of files to recompile and save
23 extern kmp_bootstrap_lock_t __kmp_itt_debug_lock;
24 #define KMP_ITT_DEBUG_LOCK() \
25 { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); }
26 #define KMP_ITT_DEBUG_PRINT(...) \
28 fprintf(stderr, "#%02d: ", __kmp_get_gtid()); \
29 fprintf(stderr, __VA_ARGS__); \
31 __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock); \
34 #define KMP_ITT_DEBUG_LOCK()
35 #define KMP_ITT_DEBUG_PRINT(...)
36 #endif // KMP_ITT_DEBUG
38 // Ensure that the functions are static if they're supposed to be being inlined.
39 // Otherwise they cannot be used in more than one file, since there will be
40 // multiple definitions.
44 #define LINKAGE static inline
47 // ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses
48 // this API to support user-defined synchronization primitives, but does not use
49 // ZCA; it would be safe to turn this off until wider support becomes available.
51 #ifdef __INTEL_COMPILER
52 #if __INTEL_COMPILER >= 1200
53 #undef __itt_sync_acquired
54 #undef __itt_sync_releasing
55 #define __itt_sync_acquired(addr) \
56 __notify_zc_intrinsic((char *)"sync_acquired", addr)
57 #define __itt_sync_releasing(addr) \
58 __notify_intrinsic((char *)"sync_releasing", addr)
63 static kmp_bootstrap_lock_t metadata_lock =
64 KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock);
67 LINKAGE size_t __kmp_itthash_hash(kmp_intptr_t addr, size_t hsize) {
68 return ((addr >> 6) ^ (addr >> 2)) % hsize;
70 LINKAGE kmp_itthash_entry *__kmp_itthash_find(kmp_info_t *thread,
71 kmp_itthash_t *h, ident_t *loc,
73 kmp_itthash_entry_t *entry;
74 size_t bucket = __kmp_itthash_hash((kmp_intptr_t)loc, KMP_MAX_FRAME_DOMAINS);
75 for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket)
76 if (entry->loc == loc && entry->team_size == team_size)
80 // two foreign threads could report frames concurrently
81 int cnt = KMP_TEST_THEN_INC32(&h->count);
82 if (cnt >= KMP_MAX_FRAME_DOMAINS) {
83 KMP_TEST_THEN_DEC32(&h->count); // revert the count
84 return entry; // too many entries
87 entry = (kmp_itthash_entry_t *)__kmp_thread_malloc(
88 thread, sizeof(kmp_itthash_entry_t));
90 entry->team_size = team_size;
92 entry->next_in_bucket = h->buckets[bucket];
93 while (!KMP_COMPARE_AND_STORE_PTR(&h->buckets[bucket],
94 entry->next_in_bucket, entry)) {
96 entry->next_in_bucket = h->buckets[bucket];
101 // check the contents of the location info is unique
102 KMP_DEBUG_ASSERT(loc->psource == entry->loc->psource);
109 /* Parallel region reporting.
110 * __kmp_itt_region_forking should be called by primary thread of a team.
111 Exact moment of call does not matter, but it should be completed before any
112 thread of this team calls __kmp_itt_region_starting.
113 * __kmp_itt_region_starting should be called by each thread of a team just
114 before entering parallel region body.
115 * __kmp_itt_region_finished should be called by each thread of a team right
116 after returning from parallel region body.
117 * __kmp_itt_region_joined should be called by primary thread of a team, after
118 all threads called __kmp_itt_region_finished.
120 Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can
121 execute some more user code -- such a thread can execute tasks.
123 Note: The overhead of logging region_starting and region_finished in each
124 thread is too large, so these calls are not used. */
126 LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) {
128 kmp_team_t *team = __kmp_team_from_gtid(gtid);
129 if (team->t.t_active_level > 1) {
130 // The frame notifications are only supported for the outermost teams.
133 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
134 ident_t *loc = th->th.th_ident;
136 // no sense to report a region without location info
139 kmp_itthash_entry *e;
140 e = __kmp_itthash_find(th, &__kmp_itt_region_domains, loc, team_size);
142 return; // too many entries in the hash
144 // Transform compiler-generated region location into the format
145 // that the tools more or less standardized on:
146 // "<func>$omp$parallel@[file:]<line>[:<col>]"
148 kmp_str_loc_t str_loc =
149 __kmp_str_loc_init(loc->psource, /* init_fname */ false);
150 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
151 team_size, str_loc.file, str_loc.line, str_loc.col);
153 __itt_suppress_push(__itt_suppress_memory_errors);
154 e->d = __itt_domain_create(buff);
155 KMP_ASSERT(e->d != NULL);
156 __itt_suppress_pop();
158 __kmp_str_free(&buff);
160 kmp_itthash_entry *e;
161 e = __kmp_itthash_find(th, &__kmp_itt_barrier_domains, loc, 0);
163 KMP_DEBUG_ASSERT(e->d == NULL);
165 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
166 str_loc.file, str_loc.line);
167 __itt_suppress_push(__itt_suppress_memory_errors);
168 e->d = __itt_domain_create(buff);
169 KMP_ASSERT(e->d != NULL);
170 __itt_suppress_pop();
171 __kmp_str_free(&buff);
174 __kmp_str_loc_free(&str_loc);
176 __itt_frame_begin_v3(e->d, NULL);
177 KMP_ITT_DEBUG_LOCK();
178 KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, domain=%p, loc:%p\n", gtid, e->d,
181 } // __kmp_itt_region_forking
183 // -----------------------------------------------------------------------------
184 LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
185 __itt_timestamp end, int imbalance,
186 ident_t *loc, int team_size, int region) {
189 // no sense to report a region without location info
192 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
194 kmp_team_t *team = __kmp_team_from_gtid(gtid);
195 int serialized = (region == 2 ? 1 : 0);
196 if (team->t.t_active_level + serialized > 1) {
197 // The frame notifications are only supported for the outermost teams.
200 // Check region domain has not been created before.
201 kmp_itthash_entry *e;
202 e = __kmp_itthash_find(th, &__kmp_itt_region_domains, loc, team_size);
204 return; // too many entries in the hash
205 if (e->d == NULL) { // new entry, need to calculate domain
206 // Transform compiler-generated region location into the format
207 // that the tools more or less standardized on:
208 // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]"
210 kmp_str_loc_t str_loc =
211 __kmp_str_loc_init(loc->psource, /* init_fname */ false);
213 __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
214 team_size, str_loc.file, str_loc.line, str_loc.col);
215 __itt_suppress_push(__itt_suppress_memory_errors);
216 e->d = __itt_domain_create(buff);
217 KMP_ASSERT(e->d != NULL);
218 __itt_suppress_pop();
220 __kmp_str_free(&buff);
221 __kmp_str_loc_free(&str_loc);
223 __itt_frame_submit_v3(e->d, NULL, begin, end);
224 KMP_ITT_DEBUG_LOCK();
226 "[reg sub] gtid=%d, domain=%p, region:%d, loc:%p, beg:%llu, end:%llu\n",
227 gtid, e->d, region, loc, begin, end);
229 } else { // called for barrier reporting
230 kmp_itthash_entry *e;
231 e = __kmp_itthash_find(th, &__kmp_itt_barrier_domains, loc, 0);
233 return; // too many entries in the hash
234 if (e->d == NULL) { // new entry, need to calculate domain
235 // Transform compiler-generated region location into the format
236 // that the tools more or less standardized on:
237 // "<func>$omp$frame@[file:]<line>[:<col>]"
238 kmp_str_loc_t str_loc =
239 __kmp_str_loc_init(loc->psource, /* init_fname */ false);
243 __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", str_loc.func,
244 team_size, str_loc.file, str_loc.line);
246 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
247 str_loc.file, str_loc.line);
249 __itt_suppress_push(__itt_suppress_memory_errors);
250 e->d = __itt_domain_create(buff);
251 KMP_ASSERT(e->d != NULL);
252 __itt_suppress_pop();
253 __kmp_str_free(&buff);
254 __kmp_str_loc_free(&str_loc);
256 __itt_frame_submit_v3(e->d, NULL, begin, end);
257 KMP_ITT_DEBUG_LOCK();
259 "[frm sub] gtid=%d, domain=%p, loc:%p, beg:%llu, end:%llu\n", gtid,
260 e->d, loc, begin, end);
263 } // __kmp_itt_frame_submit
265 // -----------------------------------------------------------------------------
266 LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
267 kmp_uint64 end, kmp_uint64 imbalance,
268 kmp_uint64 reduction) {
270 if (metadata_domain == NULL) {
271 __kmp_acquire_bootstrap_lock(&metadata_lock);
272 if (metadata_domain == NULL) {
273 __itt_suppress_push(__itt_suppress_memory_errors);
274 metadata_domain = __itt_domain_create("OMP Metadata");
275 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
276 string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
277 string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
278 __itt_suppress_pop();
280 __kmp_release_bootstrap_lock(&metadata_lock);
283 kmp_uint64 imbalance_data[4];
284 imbalance_data[0] = begin;
285 imbalance_data[1] = end;
286 imbalance_data[2] = imbalance;
287 imbalance_data[3] = reduction;
289 __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl,
290 __itt_metadata_u64, 4, imbalance_data);
292 } // __kmp_itt_metadata_imbalance
294 // -----------------------------------------------------------------------------
295 LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
296 kmp_uint64 iterations, kmp_uint64 chunk) {
298 if (metadata_domain == NULL) {
299 __kmp_acquire_bootstrap_lock(&metadata_lock);
300 if (metadata_domain == NULL) {
301 __itt_suppress_push(__itt_suppress_memory_errors);
302 metadata_domain = __itt_domain_create("OMP Metadata");
303 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
304 string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
305 string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
306 __itt_suppress_pop();
308 __kmp_release_bootstrap_lock(&metadata_lock);
311 // Parse line and column from psource string: ";file;func;line;col;;"
312 KMP_DEBUG_ASSERT(loc->psource);
313 kmp_uint64 loop_data[5];
315 __kmp_str_loc_numbers(loc->psource, &line, &col);
318 loop_data[2] = sched_type;
319 loop_data[3] = iterations;
320 loop_data[4] = chunk;
322 __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop,
323 __itt_metadata_u64, 5, loop_data);
325 } // __kmp_itt_metadata_loop
327 // -----------------------------------------------------------------------------
328 LINKAGE void __kmp_itt_metadata_single(ident_t *loc) {
330 if (metadata_domain == NULL) {
331 __kmp_acquire_bootstrap_lock(&metadata_lock);
332 if (metadata_domain == NULL) {
333 __itt_suppress_push(__itt_suppress_memory_errors);
334 metadata_domain = __itt_domain_create("OMP Metadata");
335 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
336 string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
337 string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
338 __itt_suppress_pop();
340 __kmp_release_bootstrap_lock(&metadata_lock);
344 __kmp_str_loc_numbers(loc->psource, &line, &col);
345 kmp_uint64 single_data[2];
346 single_data[0] = line;
347 single_data[1] = col;
349 __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl,
350 __itt_metadata_u64, 2, single_data);
352 } // __kmp_itt_metadata_single
354 // -----------------------------------------------------------------------------
355 LINKAGE void __kmp_itt_region_starting(int gtid) {
358 } // __kmp_itt_region_starting
360 // -----------------------------------------------------------------------------
361 LINKAGE void __kmp_itt_region_finished(int gtid) {
364 } // __kmp_itt_region_finished
366 // ----------------------------------------------------------------------------
367 LINKAGE void __kmp_itt_region_joined(int gtid) {
369 kmp_team_t *team = __kmp_team_from_gtid(gtid);
370 if (team->t.t_active_level > 1) {
371 // The frame notifications are only supported for the outermost teams.
374 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
375 ident_t *loc = th->th.th_ident;
377 kmp_itthash_entry *e = __kmp_itthash_find(th, &__kmp_itt_region_domains,
378 loc, th->th.th_team_nproc);
380 return; // too many entries in the hash
381 KMP_DEBUG_ASSERT(e->d);
382 KMP_ITT_DEBUG_LOCK();
383 __itt_frame_end_v3(e->d, NULL);
384 KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, domain=%p, loc:%p\n", gtid, e->d,
388 } // __kmp_itt_region_joined
390 /* Barriers reporting.
392 A barrier consists of two phases:
393 1. Gather -- primary thread waits for all worker threads to arrive; each
394 worker thread registers arrival and goes further.
395 2. Release -- each worker thread waits until primary thread lets it go;
396 primary thread lets worker threads go.
398 Function should be called by each thread:
399 * __kmp_itt_barrier_starting() -- before arriving to the gather phase.
400 * __kmp_itt_barrier_middle() -- between gather and release phases.
401 * __kmp_itt_barrier_finished() -- after release phase.
403 Note: Call __kmp_itt_barrier_object() before call to
404 __kmp_itt_barrier_starting() and save result in local variable.
405 __kmp_itt_barrier_object(), being called too late (e. g. after gather phase)
406 would return itt sync object for the next barrier!
408 ITT need an address (void *) to be specified as a sync object. OpenMP RTL
409 does not have barrier object or barrier data structure. Barrier is just a
410 counter in team and thread structures. We could use an address of team
411 structure as a barrier sync object, but ITT wants different objects for
412 different barriers (even whithin the same team). So let us use team address
413 as barrier sync object for the first barrier, then increase it by one for the
414 next barrier, and so on (but wrap it not to use addresses outside of team
417 void *__kmp_itt_barrier_object(int gtid, int bt, int set_name,
418 int delta // 0 (current barrier) is default
419 // value; specify -1 to get previous
424 kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
425 kmp_team_t *team = thr->th.th_team;
427 // NOTE: If the function is called from __kmp_fork_barrier, team pointer can
428 // be NULL. This "if" helps to avoid crash. However, this is not complete
429 // solution, and reporting fork/join barriers to ITT should be revisited.
432 // Primary thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time.
433 // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter.
435 team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta;
436 // Now form the barrier id. Encode barrier type (bt) in barrier id too, so
437 // barriers of different types do not have the same ids.
438 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier);
439 // This condition is a must (we would have zero divide otherwise).
440 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier);
441 // More strong condition: make sure we have room at least for two
442 // different ids (for each barrier type).
443 object = reinterpret_cast<void *>(
444 (kmp_uintptr_t)(team) +
445 (kmp_uintptr_t)counter % (sizeof(kmp_team_t) / bs_last_barrier) *
448 KMP_ITT_DEBUG_LOCK();
449 KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt,
453 ident_t const *loc = NULL;
454 char const *src = NULL;
455 char const *type = "OMP Barrier";
457 case bs_plain_barrier: {
458 // For plain barrier compiler calls __kmpc_barrier() function, which
459 // saves location in thr->th.th_ident.
460 loc = thr->th.th_ident;
461 // Get the barrier type from flags provided by compiler.
466 expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0;
467 impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0;
470 switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) {
471 case KMP_IDENT_BARRIER_IMPL_FOR: {
472 type = "OMP For Barrier";
474 case KMP_IDENT_BARRIER_IMPL_SECTIONS: {
475 type = "OMP Sections Barrier";
477 case KMP_IDENT_BARRIER_IMPL_SINGLE: {
478 type = "OMP Single Barrier";
480 case KMP_IDENT_BARRIER_IMPL_WORKSHARE: {
481 type = "OMP Workshare Barrier";
484 type = "OMP Implicit Barrier";
489 type = "OMP Explicit Barrier";
492 case bs_forkjoin_barrier: {
493 // In case of fork/join barrier we can read thr->th.th_ident, because it
494 // contains location of last passed construct (while join barrier is not
495 // such one). Use th_ident of primary thread instead --
496 // __kmp_join_call() called by the primary thread saves location.
498 // AC: cannot read from primary thread because __kmp_join_call may not
499 // be called yet, so we read the location from team. This is the
500 // same location. Team is valid on entry to join barrier where this
502 loc = team->t.t_ident;
506 type = "OMP Join Barrier";
509 KMP_ITT_DEBUG_LOCK();
510 __itt_sync_create(object, type, src, __itt_attr_barrier);
512 "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object,
518 } // __kmp_itt_barrier_object
520 // -----------------------------------------------------------------------------
521 void __kmp_itt_barrier_starting(int gtid, void *object) {
523 if (!KMP_MASTER_GTID(gtid)) {
524 KMP_ITT_DEBUG_LOCK();
525 __itt_sync_releasing(object);
526 KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object);
528 KMP_ITT_DEBUG_LOCK();
529 __itt_sync_prepare(object);
530 KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object);
532 } // __kmp_itt_barrier_starting
534 // -----------------------------------------------------------------------------
535 void __kmp_itt_barrier_middle(int gtid, void *object) {
537 if (KMP_MASTER_GTID(gtid)) {
538 KMP_ITT_DEBUG_LOCK();
539 __itt_sync_acquired(object);
540 KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object);
541 KMP_ITT_DEBUG_LOCK();
542 __itt_sync_releasing(object);
543 KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object);
547 } // __kmp_itt_barrier_middle
549 // -----------------------------------------------------------------------------
550 void __kmp_itt_barrier_finished(int gtid, void *object) {
552 if (KMP_MASTER_GTID(gtid)) {
554 KMP_ITT_DEBUG_LOCK();
555 __itt_sync_acquired(object);
556 KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object);
559 } // __kmp_itt_barrier_finished
561 /* Taskwait reporting.
562 ITT need an address (void *) to be specified as a sync object. OpenMP RTL
563 does not have taskwait structure, so we need to construct something. */
565 void *__kmp_itt_taskwait_object(int gtid) {
568 if (UNLIKELY(__itt_sync_create_ptr)) {
569 kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
570 kmp_taskdata_t *taskdata = thread->th.th_current_task;
571 object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) +
572 taskdata->td_taskwait_counter %
573 sizeof(kmp_taskdata_t));
577 } // __kmp_itt_taskwait_object
579 void __kmp_itt_taskwait_starting(int gtid, void *object) {
581 kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
582 kmp_taskdata_t *taskdata = thread->th.th_current_task;
583 ident_t const *loc = taskdata->td_taskwait_ident;
584 char const *src = (loc == NULL ? NULL : loc->psource);
585 KMP_ITT_DEBUG_LOCK();
586 __itt_sync_create(object, "OMP Taskwait", src, 0);
587 KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n",
589 KMP_ITT_DEBUG_LOCK();
590 __itt_sync_prepare(object);
591 KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object);
593 } // __kmp_itt_taskwait_starting
595 void __kmp_itt_taskwait_finished(int gtid, void *object) {
597 KMP_ITT_DEBUG_LOCK();
598 __itt_sync_acquired(object);
599 KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object);
600 KMP_ITT_DEBUG_LOCK();
601 __itt_sync_destroy(object);
602 KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object);
604 } // __kmp_itt_taskwait_finished
607 Only those tasks are reported which are executed by a thread spinning at
608 barrier (or taskwait). Synch object passed to the function must be barrier of
609 taskwait the threads waiting at. */
611 void __kmp_itt_task_starting(
612 void *object // ITT sync object: barrier or taskwait.
615 if (UNLIKELY(object != NULL)) {
616 KMP_ITT_DEBUG_LOCK();
617 __itt_sync_cancel(object);
618 KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object);
621 } // __kmp_itt_task_starting
623 // -----------------------------------------------------------------------------
624 void __kmp_itt_task_finished(
625 void *object // ITT sync object: barrier or taskwait.
628 KMP_ITT_DEBUG_LOCK();
629 __itt_sync_prepare(object);
630 KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object);
632 } // __kmp_itt_task_finished
635 * __kmp_itt_lock_creating( lock ) should be called *before* the first lock
636 operation (set/unset). It is not a real event shown to the user but just
637 setting a name for synchronization object. `lock' is an address of sync
638 object, the same address should be used in all subsequent calls.
639 * __kmp_itt_lock_acquiring() should be called before setting the lock.
640 * __kmp_itt_lock_acquired() should be called after setting the lock.
641 * __kmp_itt_lock_realeasing() should be called before unsetting the lock.
642 * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting
644 * __kmp_itt_lock_destroyed( lock ) should be called after the last lock
645 operation. After __kmp_itt_lock_destroyed() all the references to the same
646 address will be considered as another sync object, not related with the
649 #if KMP_USE_DYNAMIC_LOCK
650 // Takes location information directly
651 __kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type,
652 const ident_t *loc) {
654 if (__itt_sync_create_ptr) {
655 char const *src = (loc == NULL ? NULL : loc->psource);
656 KMP_ITT_DEBUG_LOCK();
657 __itt_sync_create(lock, type, src, 0);
658 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
663 #else // KMP_USE_DYNAMIC_LOCK
664 // Internal guts -- common code for locks and critical sections, do not call
666 __kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) {
668 if (__itt_sync_create_ptr) {
669 ident_t const *loc = NULL;
670 if (__kmp_get_user_lock_location_ != NULL)
671 loc = __kmp_get_user_lock_location_((lock));
672 char const *src = (loc == NULL ? NULL : loc->psource);
673 KMP_ITT_DEBUG_LOCK();
674 __itt_sync_create(lock, type, src, 0);
675 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
679 } // ___kmp_itt_lock_init
680 #endif // KMP_USE_DYNAMIC_LOCK
682 // Internal guts -- common code for locks and critical sections, do not call
684 __kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) {
686 KMP_ITT_DEBUG_LOCK();
687 __itt_sync_destroy(lock);
688 KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock);
690 } // ___kmp_itt_lock_fini
692 // -----------------------------------------------------------------------------
693 #if KMP_USE_DYNAMIC_LOCK
694 void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) {
695 ___kmp_itt_lock_init(lock, "OMP Lock", loc);
698 void __kmp_itt_lock_creating(kmp_user_lock_p lock) {
699 ___kmp_itt_lock_init(lock, "OMP Lock");
700 } // __kmp_itt_lock_creating
703 void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) {
704 #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
705 // postpone lock object access
706 if (__itt_sync_prepare_ptr) {
707 if (KMP_EXTRACT_D_TAG(lock) == 0) {
708 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
709 __itt_sync_prepare(ilk->lock);
711 __itt_sync_prepare(lock);
715 __itt_sync_prepare(lock);
717 } // __kmp_itt_lock_acquiring
719 void __kmp_itt_lock_acquired(kmp_user_lock_p lock) {
720 #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
721 // postpone lock object access
722 if (__itt_sync_acquired_ptr) {
723 if (KMP_EXTRACT_D_TAG(lock) == 0) {
724 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
725 __itt_sync_acquired(ilk->lock);
727 __itt_sync_acquired(lock);
731 __itt_sync_acquired(lock);
733 } // __kmp_itt_lock_acquired
735 void __kmp_itt_lock_releasing(kmp_user_lock_p lock) {
736 #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
737 if (__itt_sync_releasing_ptr) {
738 if (KMP_EXTRACT_D_TAG(lock) == 0) {
739 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
740 __itt_sync_releasing(ilk->lock);
742 __itt_sync_releasing(lock);
746 __itt_sync_releasing(lock);
748 } // __kmp_itt_lock_releasing
750 void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) {
751 #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
752 if (__itt_sync_cancel_ptr) {
753 if (KMP_EXTRACT_D_TAG(lock) == 0) {
754 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
755 __itt_sync_cancel(ilk->lock);
757 __itt_sync_cancel(lock);
761 __itt_sync_cancel(lock);
763 } // __kmp_itt_lock_cancelled
765 void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) {
766 ___kmp_itt_lock_fini(lock, "OMP Lock");
767 } // __kmp_itt_lock_destroyed
769 /* Critical reporting.
770 Critical sections are treated exactly as locks (but have different object
772 #if KMP_USE_DYNAMIC_LOCK
773 void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) {
774 ___kmp_itt_lock_init(lock, "OMP Critical", loc);
777 void __kmp_itt_critical_creating(kmp_user_lock_p lock) {
778 ___kmp_itt_lock_init(lock, "OMP Critical");
779 } // __kmp_itt_critical_creating
782 void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) {
783 __itt_sync_prepare(lock);
784 } // __kmp_itt_critical_acquiring
786 void __kmp_itt_critical_acquired(kmp_user_lock_p lock) {
787 __itt_sync_acquired(lock);
788 } // __kmp_itt_critical_acquired
790 void __kmp_itt_critical_releasing(kmp_user_lock_p lock) {
791 __itt_sync_releasing(lock);
792 } // __kmp_itt_critical_releasing
794 void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) {
795 ___kmp_itt_lock_fini(lock, "OMP Critical");
796 } // __kmp_itt_critical_destroyed
798 /* Single reporting. */
800 void __kmp_itt_single_start(int gtid) {
802 if (__itt_mark_create_ptr || KMP_ITT_DEBUG) {
803 kmp_info_t *thr = __kmp_thread_from_gtid((gtid));
804 ident_t *loc = thr->th.th_ident;
805 char const *src = (loc == NULL ? NULL : loc->psource);
807 __kmp_str_buf_init(&name);
808 __kmp_str_buf_print(&name, "OMP Single-%s", src);
809 KMP_ITT_DEBUG_LOCK();
810 thr->th.th_itt_mark_single = __itt_mark_create(name.str);
811 KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str,
812 thr->th.th_itt_mark_single);
813 __kmp_str_buf_free(&name);
814 KMP_ITT_DEBUG_LOCK();
815 __itt_mark(thr->th.th_itt_mark_single, NULL);
816 KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n",
817 thr->th.th_itt_mark_single);
820 } // __kmp_itt_single_start
822 void __kmp_itt_single_end(int gtid) {
824 __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single;
825 KMP_ITT_DEBUG_LOCK();
826 __itt_mark_off(mark);
827 KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark);
829 } // __kmp_itt_single_end
831 /* Ordered reporting.
832 * __kmp_itt_ordered_init is called by each thread *before* first using sync
833 object. ITT team would like it to be called once, but it requires extra
835 * __kmp_itt_ordered_prep is called when thread is going to enter ordered
836 section (before synchronization).
837 * __kmp_itt_ordered_start is called just before entering user code (after
839 * __kmp_itt_ordered_end is called after returning from user code.
841 Sync object is th->th.th_dispatch->th_dispatch_sh_current.
842 Events are not generated in case of serialized team. */
844 void __kmp_itt_ordered_init(int gtid) {
846 if (__itt_sync_create_ptr) {
847 kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
848 ident_t const *loc = thr->th.th_ident;
849 char const *src = (loc == NULL ? NULL : loc->psource);
850 __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current,
851 "OMP Ordered", src, 0);
854 } // __kmp_itt_ordered_init
856 void __kmp_itt_ordered_prep(int gtid) {
858 if (__itt_sync_create_ptr) {
859 kmp_team_t *t = __kmp_team_from_gtid(gtid);
860 if (!t->t.t_serialized) {
861 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
862 __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current);
866 } // __kmp_itt_ordered_prep
868 void __kmp_itt_ordered_start(int gtid) {
870 if (__itt_sync_create_ptr) {
871 kmp_team_t *t = __kmp_team_from_gtid(gtid);
872 if (!t->t.t_serialized) {
873 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
874 __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current);
878 } // __kmp_itt_ordered_start
880 void __kmp_itt_ordered_end(int gtid) {
882 if (__itt_sync_create_ptr) {
883 kmp_team_t *t = __kmp_team_from_gtid(gtid);
884 if (!t->t.t_serialized) {
885 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
886 __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current);
890 } // __kmp_itt_ordered_end
892 /* Threads reporting. */
894 void __kmp_itt_thread_ignore() {
896 } // __kmp_itt_thread_ignore
898 void __kmp_itt_thread_name(int gtid) {
900 if (__itt_thr_name_set_ptr) {
902 __kmp_str_buf_init(&name);
903 if (KMP_MASTER_GTID(gtid)) {
904 __kmp_str_buf_print(&name, "OMP Primary Thread #%d", gtid);
906 __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid);
908 KMP_ITT_DEBUG_LOCK();
909 __itt_thr_name_set(name.str, name.used);
910 KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str);
911 __kmp_str_buf_free(&name);
914 } // __kmp_itt_thread_name
916 /* System object reporting.
917 ITT catches operations with system sync objects (like Windows* OS on IA-32
918 architecture API critical sections and events). We only need to specify
919 name ("OMP Scheduler") for the object to let ITT know it is an object used
920 by OpenMP RTL for internal purposes. */
922 void __kmp_itt_system_object_created(void *object, char const *name) {
924 KMP_ITT_DEBUG_LOCK();
925 __itt_sync_create(object, "OMP Scheduler", name, 0);
926 KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n",
929 } // __kmp_itt_system_object_created
931 /* Stack stitching api.
932 Primary thread calls "create" and put the stitching id into team structure.
933 Workers read the stitching id and call "enter" / "leave" api.
934 Primary thread calls "destroy" at the end of the parallel region. */
936 __itt_caller __kmp_itt_stack_caller_create() {
938 if (!__itt_stack_caller_create_ptr)
940 KMP_ITT_DEBUG_LOCK();
941 __itt_caller id = __itt_stack_caller_create();
942 KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id);
948 void __kmp_itt_stack_caller_destroy(__itt_caller id) {
950 if (__itt_stack_caller_destroy_ptr) {
951 KMP_ITT_DEBUG_LOCK();
952 __itt_stack_caller_destroy(id);
953 KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id);
958 void __kmp_itt_stack_callee_enter(__itt_caller id) {
960 if (__itt_stack_callee_enter_ptr) {
961 KMP_ITT_DEBUG_LOCK();
962 __itt_stack_callee_enter(id);
963 KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id);
968 void __kmp_itt_stack_callee_leave(__itt_caller id) {
970 if (__itt_stack_callee_leave_ptr) {
971 KMP_ITT_DEBUG_LOCK();
972 __itt_stack_callee_leave(id);
973 KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id);
978 #endif /* USE_ITT_BUILD */