3 * kmp_itt.inl -- Inline functions of ITT Notify.
6 //===----------------------------------------------------------------------===//
8 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9 // See https://llvm.org/LICENSE.txt for license information.
10 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
12 //===----------------------------------------------------------------------===//
14 // Inline function definitions. This file should be included into kmp_itt.h file
15 // for production build (to let compiler inline functions) or into kmp_itt.c
16 // file for debug build (to reduce the number of files to recompile and save
23 extern kmp_bootstrap_lock_t __kmp_itt_debug_lock;
24 #define KMP_ITT_DEBUG_LOCK() \
25 { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); }
26 #define KMP_ITT_DEBUG_PRINT(...) \
28 fprintf(stderr, "#%02d: ", __kmp_get_gtid()); \
29 fprintf(stderr, __VA_ARGS__); \
31 __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock); \
34 #define KMP_ITT_DEBUG_LOCK()
35 #define KMP_ITT_DEBUG_PRINT(...)
36 #endif // KMP_ITT_DEBUG
38 // Ensure that the functions are static if they're supposed to be being inlined.
39 // Otherwise they cannot be used in more than one file, since there will be
40 // multiple definitions.
44 #define LINKAGE static inline
47 // ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses
48 // this API to support user-defined synchronization primitives, but does not use
49 // ZCA; it would be safe to turn this off until wider support becomes available.
51 #ifdef __INTEL_COMPILER
52 #if __INTEL_COMPILER >= 1200
53 #undef __itt_sync_acquired
54 #undef __itt_sync_releasing
55 #define __itt_sync_acquired(addr) \
56 __notify_zc_intrinsic((char *)"sync_acquired", addr)
57 #define __itt_sync_releasing(addr) \
58 __notify_intrinsic((char *)"sync_releasing", addr)
63 static kmp_bootstrap_lock_t metadata_lock =
64 KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock);
66 /* Parallel region reporting.
67 * __kmp_itt_region_forking should be called by master thread of a team.
68 Exact moment of call does not matter, but it should be completed before any
69 thread of this team calls __kmp_itt_region_starting.
70 * __kmp_itt_region_starting should be called by each thread of a team just
71 before entering parallel region body.
72 * __kmp_itt_region_finished should be called by each thread of a team right
73 after returning from parallel region body.
74 * __kmp_itt_region_joined should be called by master thread of a team, after
75 all threads called __kmp_itt_region_finished.
77 Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can
78 execute some more user code -- such a thread can execute tasks.
80 Note: The overhead of logging region_starting and region_finished in each
81 thread is too large, so these calls are not used. */
83 LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) {
85 kmp_team_t *team = __kmp_team_from_gtid(gtid);
86 if (team->t.t_active_level > 1) {
87 // The frame notifications are only supported for the outermost teams.
90 ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
92 // Use the reserved_2 field to store the index to the region domain.
93 // Assume that reserved_2 contains zero initially. Since zero is special
94 // value here, store the index into domain array increased by 1.
95 if (loc->reserved_2 == 0) {
96 if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
98 KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
99 if (frm >= KMP_MAX_FRAME_DOMAINS) {
100 KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
101 return; // loc->reserved_2 is still 0
103 // if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) {
104 // frm = loc->reserved_2 - 1; // get value saved by other thread
106 //} // AC: this block is to replace next unsynchronized line
108 // We need to save indexes for both region and barrier frames. We'll use
109 // loc->reserved_2 field but put region index to the low two bytes and
110 // barrier indexes to the high two bytes. It is OK because
111 // KMP_MAX_FRAME_DOMAINS = 512.
112 loc->reserved_2 |= (frm + 1); // save "new" value
114 // Transform compiler-generated region location into the format
115 // that the tools more or less standardized on:
116 // "<func>$omp$parallel@[file:]<line>[:<col>]"
118 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
119 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
120 team_size, str_loc.file, str_loc.line,
123 __itt_suppress_push(__itt_suppress_memory_errors);
124 __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
125 __itt_suppress_pop();
127 __kmp_str_free(&buff);
129 if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
130 int frm = KMP_TEST_THEN_INC32(
131 &__kmp_barrier_domain_count); // get "old" value
132 if (frm >= KMP_MAX_FRAME_DOMAINS) {
134 &__kmp_barrier_domain_count); // revert the count
135 return; // loc->reserved_2 is still 0
138 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
139 str_loc.file, str_loc.col);
140 __itt_suppress_push(__itt_suppress_memory_errors);
141 __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
142 __itt_suppress_pop();
143 __kmp_str_free(&buff);
144 // Save the barrier frame index to the high two bytes.
145 loc->reserved_2 |= (frm + 1) << 16;
148 __kmp_str_loc_free(&str_loc);
149 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
151 } else { // Region domain exists for this location
152 // Check if team size was changed. Then create new region domain for this
154 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
155 if ((frm < KMP_MAX_FRAME_DOMAINS) &&
156 (__kmp_itt_region_team_size[frm] != team_size)) {
158 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
159 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
160 team_size, str_loc.file, str_loc.line,
163 __itt_suppress_push(__itt_suppress_memory_errors);
164 __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
165 __itt_suppress_pop();
167 __kmp_str_free(&buff);
168 __kmp_str_loc_free(&str_loc);
169 __kmp_itt_region_team_size[frm] = team_size;
170 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
171 } else { // Team size was not changed. Use existing domain.
172 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
175 KMP_ITT_DEBUG_LOCK();
176 KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, idx=%x, loc:%p\n", gtid,
177 loc->reserved_2, loc);
180 } // __kmp_itt_region_forking
182 // -----------------------------------------------------------------------------
183 LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
184 __itt_timestamp end, int imbalance,
185 ident_t *loc, int team_size, int region) {
188 kmp_team_t *team = __kmp_team_from_gtid(gtid);
189 int serialized = (region == 2 ? 1 : 0);
190 if (team->t.t_active_level + serialized > 1) {
191 // The frame notifications are only supported for the outermost teams.
194 // Check region domain has not been created before. It's index is saved in
195 // the low two bytes.
196 if ((loc->reserved_2 & 0x0000FFFF) == 0) {
197 if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
199 KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
200 if (frm >= KMP_MAX_FRAME_DOMAINS) {
201 KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
202 return; // loc->reserved_2 is still 0
205 // We need to save indexes for both region and barrier frames. We'll use
206 // loc->reserved_2 field but put region index to the low two bytes and
207 // barrier indexes to the high two bytes. It is OK because
208 // KMP_MAX_FRAME_DOMAINS = 512.
209 loc->reserved_2 |= (frm + 1); // save "new" value
211 // Transform compiler-generated region location into the format
212 // that the tools more or less standardized on:
213 // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]"
215 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
216 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
217 team_size, str_loc.file, str_loc.line,
220 __itt_suppress_push(__itt_suppress_memory_errors);
221 __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
222 __itt_suppress_pop();
224 __kmp_str_free(&buff);
225 __kmp_str_loc_free(&str_loc);
226 __kmp_itt_region_team_size[frm] = team_size;
227 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
229 } else { // Region domain exists for this location
230 // Check if team size was changed. Then create new region domain for this
232 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
233 if (frm >= KMP_MAX_FRAME_DOMAINS)
234 return; // something's gone wrong, returning
235 if (__kmp_itt_region_team_size[frm] != team_size) {
237 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
238 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
239 team_size, str_loc.file, str_loc.line,
242 __itt_suppress_push(__itt_suppress_memory_errors);
243 __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
244 __itt_suppress_pop();
246 __kmp_str_free(&buff);
247 __kmp_str_loc_free(&str_loc);
248 __kmp_itt_region_team_size[frm] = team_size;
249 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
250 } else { // Team size was not changed. Use existing domain.
251 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
254 KMP_ITT_DEBUG_LOCK();
256 "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n",
257 gtid, loc->reserved_2, region, loc, begin, end);
259 } else { // called for barrier reporting
261 if ((loc->reserved_2 & 0xFFFF0000) == 0) {
262 if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
263 int frm = KMP_TEST_THEN_INC32(
264 &__kmp_barrier_domain_count); // get "old" value
265 if (frm >= KMP_MAX_FRAME_DOMAINS) {
267 &__kmp_barrier_domain_count); // revert the count
268 return; // loc->reserved_2 is still 0
270 // Save the barrier frame index to the high two bytes.
271 loc->reserved_2 |= (frm + 1) << 16; // save "new" value
273 // Transform compiler-generated region location into the format
274 // that the tools more or less standardized on:
275 // "<func>$omp$frame@[file:]<line>[:<col>]"
276 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
278 char *buff_imb = NULL;
279 buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d",
280 str_loc.func, team_size, str_loc.file,
282 __itt_suppress_push(__itt_suppress_memory_errors);
283 __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff_imb);
284 __itt_suppress_pop();
285 __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin,
287 __kmp_str_free(&buff_imb);
290 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
291 str_loc.file, str_loc.col);
292 __itt_suppress_push(__itt_suppress_memory_errors);
293 __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
294 __itt_suppress_pop();
295 __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin,
297 __kmp_str_free(&buff);
299 __kmp_str_loc_free(&str_loc);
301 } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS
303 __itt_frame_submit_v3(
304 __kmp_itt_imbalance_domains[(loc->reserved_2 >> 16) - 1], NULL,
307 __itt_frame_submit_v3(
308 __kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL,
312 KMP_ITT_DEBUG_LOCK();
314 "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid,
315 loc->reserved_2, loc, begin, end);
319 } // __kmp_itt_frame_submit
321 // -----------------------------------------------------------------------------
322 LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
323 kmp_uint64 end, kmp_uint64 imbalance,
324 kmp_uint64 reduction) {
326 if (metadata_domain == NULL) {
327 __kmp_acquire_bootstrap_lock(&metadata_lock);
328 if (metadata_domain == NULL) {
329 __itt_suppress_push(__itt_suppress_memory_errors);
330 metadata_domain = __itt_domain_create("OMP Metadata");
331 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
332 string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
333 string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
334 __itt_suppress_pop();
336 __kmp_release_bootstrap_lock(&metadata_lock);
339 kmp_uint64 imbalance_data[4];
340 imbalance_data[0] = begin;
341 imbalance_data[1] = end;
342 imbalance_data[2] = imbalance;
343 imbalance_data[3] = reduction;
345 __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl,
346 __itt_metadata_u64, 4, imbalance_data);
348 } // __kmp_itt_metadata_imbalance
350 // -----------------------------------------------------------------------------
351 LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
352 kmp_uint64 iterations, kmp_uint64 chunk) {
354 if (metadata_domain == NULL) {
355 __kmp_acquire_bootstrap_lock(&metadata_lock);
356 if (metadata_domain == NULL) {
357 __itt_suppress_push(__itt_suppress_memory_errors);
358 metadata_domain = __itt_domain_create("OMP Metadata");
359 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
360 string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
361 string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
362 __itt_suppress_pop();
364 __kmp_release_bootstrap_lock(&metadata_lock);
367 // Parse line and column from psource string: ";file;func;line;col;;"
370 KMP_DEBUG_ASSERT(loc->psource);
372 s_line = strchr(CCAST(char *, loc->psource), ';');
374 s_line = strchr(loc->psource, ';');
376 KMP_DEBUG_ASSERT(s_line);
377 s_line = strchr(s_line + 1, ';'); // 2-nd semicolon
378 KMP_DEBUG_ASSERT(s_line);
379 s_line = strchr(s_line + 1, ';'); // 3-rd semicolon
380 KMP_DEBUG_ASSERT(s_line);
381 s_col = strchr(s_line + 1, ';'); // 4-th semicolon
382 KMP_DEBUG_ASSERT(s_col);
384 kmp_uint64 loop_data[5];
385 loop_data[0] = atoi(s_line + 1); // read line
386 loop_data[1] = atoi(s_col + 1); // read column
387 loop_data[2] = sched_type;
388 loop_data[3] = iterations;
389 loop_data[4] = chunk;
391 __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop,
392 __itt_metadata_u64, 5, loop_data);
394 } // __kmp_itt_metadata_loop
396 // -----------------------------------------------------------------------------
397 LINKAGE void __kmp_itt_metadata_single(ident_t *loc) {
399 if (metadata_domain == NULL) {
400 __kmp_acquire_bootstrap_lock(&metadata_lock);
401 if (metadata_domain == NULL) {
402 __itt_suppress_push(__itt_suppress_memory_errors);
403 metadata_domain = __itt_domain_create("OMP Metadata");
404 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
405 string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
406 string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
407 __itt_suppress_pop();
409 __kmp_release_bootstrap_lock(&metadata_lock);
412 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
413 kmp_uint64 single_data[2];
414 single_data[0] = str_loc.line;
415 single_data[1] = str_loc.col;
417 __kmp_str_loc_free(&str_loc);
419 __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl,
420 __itt_metadata_u64, 2, single_data);
422 } // __kmp_itt_metadata_single
424 // -----------------------------------------------------------------------------
425 LINKAGE void __kmp_itt_region_starting(int gtid) {
428 } // __kmp_itt_region_starting
430 // -----------------------------------------------------------------------------
431 LINKAGE void __kmp_itt_region_finished(int gtid) {
434 } // __kmp_itt_region_finished
436 // ----------------------------------------------------------------------------
437 LINKAGE void __kmp_itt_region_joined(int gtid) {
439 kmp_team_t *team = __kmp_team_from_gtid(gtid);
440 if (team->t.t_active_level > 1) {
441 // The frame notifications are only supported for the outermost teams.
444 ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
445 if (loc && loc->reserved_2) {
446 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
447 if (frm < KMP_MAX_FRAME_DOMAINS) {
448 KMP_ITT_DEBUG_LOCK();
449 __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL);
450 KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, idx=%x, loc:%p\n", gtid,
451 loc->reserved_2, loc);
455 } // __kmp_itt_region_joined
457 /* Barriers reporting.
459 A barrier consists of two phases:
460 1. Gather -- master waits for arriving of all the worker threads; each
461 worker thread registers arrival and goes further.
462 2. Release -- each worker threads waits until master lets it go; master lets
465 Function should be called by each thread:
466 * __kmp_itt_barrier_starting() -- before arriving to the gather phase.
467 * __kmp_itt_barrier_middle() -- between gather and release phases.
468 * __kmp_itt_barrier_finished() -- after release phase.
470 Note: Call __kmp_itt_barrier_object() before call to
471 __kmp_itt_barrier_starting() and save result in local variable.
472 __kmp_itt_barrier_object(), being called too late (e. g. after gather phase)
473 would return itt sync object for the next barrier!
475 ITT need an address (void *) to be specified as a sync object. OpenMP RTL
476 does not have barrier object or barrier data structure. Barrier is just a
477 counter in team and thread structures. We could use an address of team
478 structure as a barrier sync object, but ITT wants different objects for
479 different barriers (even whithin the same team). So let us use team address
480 as barrier sync object for the first barrier, then increase it by one for the
481 next barrier, and so on (but wrap it not to use addresses outside of team
484 void *__kmp_itt_barrier_object(int gtid, int bt, int set_name,
485 int delta // 0 (current barrier) is default
486 // value; specify -1 to get previous
491 kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
492 kmp_team_t *team = thr->th.th_team;
494 // NOTE: If the function is called from __kmp_fork_barrier, team pointer can
495 // be NULL. This "if" helps to avoid crash. However, this is not complete
496 // solution, and reporting fork/join barriers to ITT should be revisited.
499 // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time.
500 // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter.
502 team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta;
503 // Now form the barrier id. Encode barrier type (bt) in barrier id too, so
504 // barriers of different types do not have the same ids.
505 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier);
506 // This condition is a must (we would have zero divide otherwise).
507 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier);
508 // More strong condition: make sure we have room at least for for two
509 // different ids (for each barrier type).
510 object = reinterpret_cast<void *>(
511 kmp_uintptr_t(team) +
512 counter % (sizeof(kmp_team_t) / bs_last_barrier) * bs_last_barrier +
514 KMP_ITT_DEBUG_LOCK();
515 KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt,
519 ident_t const *loc = NULL;
520 char const *src = NULL;
521 char const *type = "OMP Barrier";
523 case bs_plain_barrier: {
524 // For plain barrier compiler calls __kmpc_barrier() function, which
525 // saves location in thr->th.th_ident.
526 loc = thr->th.th_ident;
527 // Get the barrier type from flags provided by compiler.
532 expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0;
533 impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0;
536 switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) {
537 case KMP_IDENT_BARRIER_IMPL_FOR: {
538 type = "OMP For Barrier";
540 case KMP_IDENT_BARRIER_IMPL_SECTIONS: {
541 type = "OMP Sections Barrier";
543 case KMP_IDENT_BARRIER_IMPL_SINGLE: {
544 type = "OMP Single Barrier";
546 case KMP_IDENT_BARRIER_IMPL_WORKSHARE: {
547 type = "OMP Workshare Barrier";
550 type = "OMP Implicit Barrier";
555 type = "OMP Explicit Barrier";
558 case bs_forkjoin_barrier: {
559 // In case of fork/join barrier we can read thr->th.th_ident, because it
560 // contains location of last passed construct (while join barrier is not
561 // such one). Use th_ident of master thread instead -- __kmp_join_call()
562 // called by the master thread saves location.
564 // AC: cannot read from master because __kmp_join_call may be not called
565 // yet, so we read the location from team. This is the same location.
566 // And team is valid at the enter to join barrier where this happens.
567 loc = team->t.t_ident;
571 type = "OMP Join Barrier";
574 KMP_ITT_DEBUG_LOCK();
575 __itt_sync_create(object, type, src, __itt_attr_barrier);
577 "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object,
583 } // __kmp_itt_barrier_object
585 // -----------------------------------------------------------------------------
586 void __kmp_itt_barrier_starting(int gtid, void *object) {
588 if (!KMP_MASTER_GTID(gtid)) {
589 KMP_ITT_DEBUG_LOCK();
590 __itt_sync_releasing(object);
591 KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object);
593 KMP_ITT_DEBUG_LOCK();
594 __itt_sync_prepare(object);
595 KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object);
597 } // __kmp_itt_barrier_starting
599 // -----------------------------------------------------------------------------
600 void __kmp_itt_barrier_middle(int gtid, void *object) {
602 if (KMP_MASTER_GTID(gtid)) {
603 KMP_ITT_DEBUG_LOCK();
604 __itt_sync_acquired(object);
605 KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object);
606 KMP_ITT_DEBUG_LOCK();
607 __itt_sync_releasing(object);
608 KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object);
612 } // __kmp_itt_barrier_middle
614 // -----------------------------------------------------------------------------
615 void __kmp_itt_barrier_finished(int gtid, void *object) {
617 if (KMP_MASTER_GTID(gtid)) {
619 KMP_ITT_DEBUG_LOCK();
620 __itt_sync_acquired(object);
621 KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object);
624 } // __kmp_itt_barrier_finished
626 /* Taskwait reporting.
627 ITT need an address (void *) to be specified as a sync object. OpenMP RTL
628 does not have taskwait structure, so we need to construct something. */
630 void *__kmp_itt_taskwait_object(int gtid) {
633 if (__itt_sync_create_ptr) {
634 kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
635 kmp_taskdata_t *taskdata = thread->th.th_current_task;
636 object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) +
637 taskdata->td_taskwait_counter %
638 sizeof(kmp_taskdata_t));
642 } // __kmp_itt_taskwait_object
644 void __kmp_itt_taskwait_starting(int gtid, void *object) {
646 kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
647 kmp_taskdata_t *taskdata = thread->th.th_current_task;
648 ident_t const *loc = taskdata->td_taskwait_ident;
649 char const *src = (loc == NULL ? NULL : loc->psource);
650 KMP_ITT_DEBUG_LOCK();
651 __itt_sync_create(object, "OMP Taskwait", src, 0);
652 KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n",
654 KMP_ITT_DEBUG_LOCK();
655 __itt_sync_prepare(object);
656 KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object);
658 } // __kmp_itt_taskwait_starting
660 void __kmp_itt_taskwait_finished(int gtid, void *object) {
662 KMP_ITT_DEBUG_LOCK();
663 __itt_sync_acquired(object);
664 KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object);
665 KMP_ITT_DEBUG_LOCK();
666 __itt_sync_destroy(object);
667 KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object);
669 } // __kmp_itt_taskwait_finished
672 Only those tasks are reported which are executed by a thread spinning at
673 barrier (or taskwait). Synch object passed to the function must be barrier of
674 taskwait the threads waiting at. */
676 void __kmp_itt_task_starting(
677 void *object // ITT sync object: barrier or taskwait.
680 if (object != NULL) {
681 KMP_ITT_DEBUG_LOCK();
682 __itt_sync_cancel(object);
683 KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object);
686 } // __kmp_itt_task_starting
688 // -----------------------------------------------------------------------------
689 void __kmp_itt_task_finished(
690 void *object // ITT sync object: barrier or taskwait.
693 KMP_ITT_DEBUG_LOCK();
694 __itt_sync_prepare(object);
695 KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object);
697 } // __kmp_itt_task_finished
700 * __kmp_itt_lock_creating( lock ) should be called *before* the first lock
701 operation (set/unset). It is not a real event shown to the user but just
702 setting a name for synchronization object. `lock' is an address of sync
703 object, the same address should be used in all subsequent calls.
704 * __kmp_itt_lock_acquiring() should be called before setting the lock.
705 * __kmp_itt_lock_acquired() should be called after setting the lock.
706 * __kmp_itt_lock_realeasing() should be called before unsetting the lock.
707 * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting
709 * __kmp_itt_lock_destroyed( lock ) should be called after the last lock
710 operation. After __kmp_itt_lock_destroyed() all the references to the same
711 address will be considered as another sync object, not related with the
714 #if KMP_USE_DYNAMIC_LOCK
715 // Takes location information directly
716 __kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type,
717 const ident_t *loc) {
719 if (__itt_sync_create_ptr) {
720 char const *src = (loc == NULL ? NULL : loc->psource);
721 KMP_ITT_DEBUG_LOCK();
722 __itt_sync_create(lock, type, src, 0);
723 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
728 #else // KMP_USE_DYNAMIC_LOCK
729 // Internal guts -- common code for locks and critical sections, do not call
731 __kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) {
733 if (__itt_sync_create_ptr) {
734 ident_t const *loc = NULL;
735 if (__kmp_get_user_lock_location_ != NULL)
736 loc = __kmp_get_user_lock_location_((lock));
737 char const *src = (loc == NULL ? NULL : loc->psource);
738 KMP_ITT_DEBUG_LOCK();
739 __itt_sync_create(lock, type, src, 0);
740 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
744 } // ___kmp_itt_lock_init
745 #endif // KMP_USE_DYNAMIC_LOCK
747 // Internal guts -- common code for locks and critical sections, do not call
749 __kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) {
751 KMP_ITT_DEBUG_LOCK();
752 __itt_sync_destroy(lock);
753 KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock);
755 } // ___kmp_itt_lock_fini
757 // -----------------------------------------------------------------------------
758 #if KMP_USE_DYNAMIC_LOCK
759 void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) {
760 ___kmp_itt_lock_init(lock, "OMP Lock", loc);
763 void __kmp_itt_lock_creating(kmp_user_lock_p lock) {
764 ___kmp_itt_lock_init(lock, "OMP Lock");
765 } // __kmp_itt_lock_creating
768 void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) {
769 #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
770 // postpone lock object access
771 if (__itt_sync_prepare_ptr) {
772 if (KMP_EXTRACT_D_TAG(lock) == 0) {
773 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
774 __itt_sync_prepare(ilk->lock);
776 __itt_sync_prepare(lock);
780 __itt_sync_prepare(lock);
782 } // __kmp_itt_lock_acquiring
784 void __kmp_itt_lock_acquired(kmp_user_lock_p lock) {
785 #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
786 // postpone lock object access
787 if (__itt_sync_acquired_ptr) {
788 if (KMP_EXTRACT_D_TAG(lock) == 0) {
789 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
790 __itt_sync_acquired(ilk->lock);
792 __itt_sync_acquired(lock);
796 __itt_sync_acquired(lock);
798 } // __kmp_itt_lock_acquired
800 void __kmp_itt_lock_releasing(kmp_user_lock_p lock) {
801 #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
802 if (__itt_sync_releasing_ptr) {
803 if (KMP_EXTRACT_D_TAG(lock) == 0) {
804 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
805 __itt_sync_releasing(ilk->lock);
807 __itt_sync_releasing(lock);
811 __itt_sync_releasing(lock);
813 } // __kmp_itt_lock_releasing
815 void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) {
816 #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
817 if (__itt_sync_cancel_ptr) {
818 if (KMP_EXTRACT_D_TAG(lock) == 0) {
819 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
820 __itt_sync_cancel(ilk->lock);
822 __itt_sync_cancel(lock);
826 __itt_sync_cancel(lock);
828 } // __kmp_itt_lock_cancelled
830 void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) {
831 ___kmp_itt_lock_fini(lock, "OMP Lock");
832 } // __kmp_itt_lock_destroyed
834 /* Critical reporting.
835 Critical sections are treated exactly as locks (but have different object
837 #if KMP_USE_DYNAMIC_LOCK
838 void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) {
839 ___kmp_itt_lock_init(lock, "OMP Critical", loc);
842 void __kmp_itt_critical_creating(kmp_user_lock_p lock) {
843 ___kmp_itt_lock_init(lock, "OMP Critical");
844 } // __kmp_itt_critical_creating
847 void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) {
848 __itt_sync_prepare(lock);
849 } // __kmp_itt_critical_acquiring
851 void __kmp_itt_critical_acquired(kmp_user_lock_p lock) {
852 __itt_sync_acquired(lock);
853 } // __kmp_itt_critical_acquired
855 void __kmp_itt_critical_releasing(kmp_user_lock_p lock) {
856 __itt_sync_releasing(lock);
857 } // __kmp_itt_critical_releasing
859 void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) {
860 ___kmp_itt_lock_fini(lock, "OMP Critical");
861 } // __kmp_itt_critical_destroyed
863 /* Single reporting. */
865 void __kmp_itt_single_start(int gtid) {
867 if (__itt_mark_create_ptr || KMP_ITT_DEBUG) {
868 kmp_info_t *thr = __kmp_thread_from_gtid((gtid));
869 ident_t *loc = thr->th.th_ident;
870 char const *src = (loc == NULL ? NULL : loc->psource);
872 __kmp_str_buf_init(&name);
873 __kmp_str_buf_print(&name, "OMP Single-%s", src);
874 KMP_ITT_DEBUG_LOCK();
875 thr->th.th_itt_mark_single = __itt_mark_create(name.str);
876 KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str,
877 thr->th.th_itt_mark_single);
878 __kmp_str_buf_free(&name);
879 KMP_ITT_DEBUG_LOCK();
880 __itt_mark(thr->th.th_itt_mark_single, NULL);
881 KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n",
882 thr->th.th_itt_mark_single);
885 } // __kmp_itt_single_start
887 void __kmp_itt_single_end(int gtid) {
889 __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single;
890 KMP_ITT_DEBUG_LOCK();
891 __itt_mark_off(mark);
892 KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark);
894 } // __kmp_itt_single_end
896 /* Ordered reporting.
897 * __kmp_itt_ordered_init is called by each thread *before* first using sync
898 object. ITT team would like it to be called once, but it requires extra
900 * __kmp_itt_ordered_prep is called when thread is going to enter ordered
901 section (before synchronization).
902 * __kmp_itt_ordered_start is called just before entering user code (after
904 * __kmp_itt_ordered_end is called after returning from user code.
906 Sync object is th->th.th_dispatch->th_dispatch_sh_current.
907 Events are not generated in case of serialized team. */
909 void __kmp_itt_ordered_init(int gtid) {
911 if (__itt_sync_create_ptr) {
912 kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
913 ident_t const *loc = thr->th.th_ident;
914 char const *src = (loc == NULL ? NULL : loc->psource);
915 __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current,
916 "OMP Ordered", src, 0);
919 } // __kmp_itt_ordered_init
921 void __kmp_itt_ordered_prep(int gtid) {
923 if (__itt_sync_create_ptr) {
924 kmp_team_t *t = __kmp_team_from_gtid(gtid);
925 if (!t->t.t_serialized) {
926 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
927 __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current);
931 } // __kmp_itt_ordered_prep
933 void __kmp_itt_ordered_start(int gtid) {
935 if (__itt_sync_create_ptr) {
936 kmp_team_t *t = __kmp_team_from_gtid(gtid);
937 if (!t->t.t_serialized) {
938 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
939 __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current);
943 } // __kmp_itt_ordered_start
945 void __kmp_itt_ordered_end(int gtid) {
947 if (__itt_sync_create_ptr) {
948 kmp_team_t *t = __kmp_team_from_gtid(gtid);
949 if (!t->t.t_serialized) {
950 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
951 __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current);
955 } // __kmp_itt_ordered_end
957 /* Threads reporting. */
959 void __kmp_itt_thread_ignore() {
961 } // __kmp_itt_thread_ignore
963 void __kmp_itt_thread_name(int gtid) {
965 if (__itt_thr_name_set_ptr) {
967 __kmp_str_buf_init(&name);
968 if (KMP_MASTER_GTID(gtid)) {
969 __kmp_str_buf_print(&name, "OMP Master Thread #%d", gtid);
971 __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid);
973 KMP_ITT_DEBUG_LOCK();
974 __itt_thr_name_set(name.str, name.used);
975 KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str);
976 __kmp_str_buf_free(&name);
979 } // __kmp_itt_thread_name
981 /* System object reporting.
982 ITT catches operations with system sync objects (like Windows* OS on IA-32
983 architecture API critical sections and events). We only need to specify
984 name ("OMP Scheduler") for the object to let ITT know it is an object used
985 by OpenMP RTL for internal purposes. */
987 void __kmp_itt_system_object_created(void *object, char const *name) {
989 KMP_ITT_DEBUG_LOCK();
990 __itt_sync_create(object, "OMP Scheduler", name, 0);
991 KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n",
994 } // __kmp_itt_system_object_created
996 /* Stack stitching api.
997 Master calls "create" and put the stitching id into team structure.
998 Workers read the stitching id and call "enter" / "leave" api.
999 Master calls "destroy" at the end of the parallel region. */
1001 __itt_caller __kmp_itt_stack_caller_create() {
1003 if (!__itt_stack_caller_create_ptr)
1005 KMP_ITT_DEBUG_LOCK();
1006 __itt_caller id = __itt_stack_caller_create();
1007 KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id);
1013 void __kmp_itt_stack_caller_destroy(__itt_caller id) {
1015 if (__itt_stack_caller_destroy_ptr) {
1016 KMP_ITT_DEBUG_LOCK();
1017 __itt_stack_caller_destroy(id);
1018 KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id);
1023 void __kmp_itt_stack_callee_enter(__itt_caller id) {
1025 if (__itt_stack_callee_enter_ptr) {
1026 KMP_ITT_DEBUG_LOCK();
1027 __itt_stack_callee_enter(id);
1028 KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id);
1033 void __kmp_itt_stack_callee_leave(__itt_caller id) {
1035 if (__itt_stack_callee_leave_ptr) {
1036 KMP_ITT_DEBUG_LOCK();
1037 __itt_stack_callee_leave(id);
1038 KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id);
1043 #endif /* USE_ITT_BUILD */