4 #ifndef _DEFAULT_SOURCE
5 #define _DEFAULT_SOURCE
8 #ifndef __STDC_FORMAT_MACROS
9 #define __STDC_FORMAT_MACROS
13 #include <omp-tools.h>
14 #include "ompt-signal.h"
16 // Used to detect architecture
17 #include "../../src/kmp_platform.h"
20 #define _TOOL_PREFIX ""
21 // If no _TOOL_PREFIX is set, we assume that we run as part of an OMPT test
25 static const char *ompt_thread_t_values
[] = {
26 "ompt_thread_UNDEFINED", "ompt_thread_initial", "ompt_thread_worker",
29 static const char *ompt_task_status_t_values
[] = {
30 "ompt_task_UNDEFINED",
31 "ompt_task_complete", // 1
32 "ompt_task_yield", // 2
33 "ompt_task_cancel", // 3
34 "ompt_task_detach", // 4
35 "ompt_task_early_fulfill", // 5
36 "ompt_task_late_fulfill", // 6
37 "ompt_task_switch", // 7
38 "ompt_taskwait_complete" // 8
40 static const char* ompt_cancel_flag_t_values
[] = {
41 "ompt_cancel_parallel",
42 "ompt_cancel_sections",
44 "ompt_cancel_taskgroup",
45 "ompt_cancel_activated",
46 "ompt_cancel_detected",
47 "ompt_cancel_discarded_task"
50 static const char *ompt_work_t_values
[] = {"undefined",
53 "ompt_work_single_executor",
54 "ompt_work_single_other",
55 "ompt_work_workshare",
56 "ompt_work_distribute",
59 "ompt_work_workdistribute",
60 "ompt_work_loop_static",
61 "ompt_work_loop_dynamic",
62 "ompt_work_loop_guided",
63 "ompt_work_loop_other"};
65 static const char *ompt_work_events_t_values
[] = {"undefined",
67 "ompt_event_sections",
68 "ompt_event_single_in_block",
69 "ompt_event_single_others",
70 "ompt_event_workshare",
71 "ompt_event_distribute",
72 "ompt_event_taskloop",
74 "ompt_event_workdistribute",
75 "ompt_event_loop_static",
76 "ompt_event_loop_dynamic",
77 "ompt_event_loop_guided",
78 "ompt_event_loop_other"};
80 static const char *ompt_dependence_type_t_values
[36] = {
81 "ompt_dependence_type_UNDEFINED",
82 "ompt_dependence_type_in", // 1
83 "ompt_dependence_type_out", // 2
84 "ompt_dependence_type_inout", // 3
85 "ompt_dependence_type_mutexinoutset", // 4
86 "ompt_dependence_type_source", // 5
87 "ompt_dependence_type_sink", // 6
88 "ompt_dependence_type_inoutset", // 7
89 "", "", "", "", "", "", // 8-13
90 "", "", "", "", "", "", "", "", "", "", // 14-23
91 "", "", "", "", "", "", "", "", "", "", // 24-33
92 "ompt_dependence_type_out_all_memory", // 34
93 "ompt_dependence_type_inout_all_memory" // 35
96 static const char *ompt_sync_region_t_values
[] = {"undefined",
100 "barrier_implementation",
104 "barrier_implicit_workshare",
105 "barrier_implicit_parallel",
108 static void format_task_type(int type
, char *buffer
) {
109 char *progress
= buffer
;
110 if (type
& ompt_task_initial
)
111 progress
+= sprintf(progress
, "ompt_task_initial");
112 if (type
& ompt_task_implicit
)
113 progress
+= sprintf(progress
, "ompt_task_implicit");
114 if (type
& ompt_task_explicit
)
115 progress
+= sprintf(progress
, "ompt_task_explicit");
116 if (type
& ompt_task_target
)
117 progress
+= sprintf(progress
, "ompt_task_target");
118 if (type
& ompt_task_taskwait
)
119 progress
+= sprintf(progress
, "ompt_task_taskwait");
120 if (type
& ompt_task_undeferred
)
121 progress
+= sprintf(progress
, "|ompt_task_undeferred");
122 if (type
& ompt_task_untied
)
123 progress
+= sprintf(progress
, "|ompt_task_untied");
124 if (type
& ompt_task_final
)
125 progress
+= sprintf(progress
, "|ompt_task_final");
126 if (type
& ompt_task_mergeable
)
127 progress
+= sprintf(progress
, "|ompt_task_mergeable");
128 if (type
& ompt_task_merged
)
129 progress
+= sprintf(progress
, "|ompt_task_merged");
132 static ompt_set_callback_t ompt_set_callback
;
133 static ompt_get_callback_t ompt_get_callback
;
134 static ompt_get_state_t ompt_get_state
;
135 static ompt_get_task_info_t ompt_get_task_info
;
136 static ompt_get_task_memory_t ompt_get_task_memory
;
137 static ompt_get_thread_data_t ompt_get_thread_data
;
138 static ompt_get_parallel_info_t ompt_get_parallel_info
;
139 static ompt_get_unique_id_t ompt_get_unique_id
;
140 static ompt_finalize_tool_t ompt_finalize_tool
;
141 static ompt_get_num_procs_t ompt_get_num_procs
;
142 static ompt_get_num_places_t ompt_get_num_places
;
143 static ompt_get_place_proc_ids_t ompt_get_place_proc_ids
;
144 static ompt_get_place_num_t ompt_get_place_num
;
145 static ompt_get_partition_place_nums_t ompt_get_partition_place_nums
;
146 static ompt_get_proc_id_t ompt_get_proc_id
;
147 static ompt_enumerate_states_t ompt_enumerate_states
;
148 static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls
;
150 static void print_ids(int level
)
152 int task_type
, thread_num
;
154 ompt_data_t
*task_parallel_data
;
155 ompt_data_t
*task_data
;
156 int exists_task
= ompt_get_task_info(level
, &task_type
, &task_data
, &frame
,
157 &task_parallel_data
, &thread_num
);
159 format_task_type(task_type
, buffer
);
161 printf("%" PRIu64
": task level %d: parallel_id=%" PRIu64
162 ", task_id=%" PRIu64
", exit_frame=%p, reenter_frame=%p, "
163 "task_type=%s=%d, thread_num=%d\n",
164 ompt_get_thread_data()->value
, level
,
165 exists_task
? task_parallel_data
->value
: 0,
166 exists_task
? task_data
->value
: 0, frame
->exit_frame
.ptr
,
167 frame
->enter_frame
.ptr
, buffer
, task_type
, thread_num
);
170 #define get_frame_address(level) __builtin_frame_address(level)
172 #define print_frame(level) \
173 printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", \
174 ompt_get_thread_data()->value, level, get_frame_address(level))
176 // clang (version 5.0 and above) adds an intermediate function call with debug flag (-g)
177 #if defined(TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN)
178 #if defined(DEBUG) && defined(__clang__) && __clang_major__ >= 5
179 #define print_frame_from_outlined_fn(level) print_frame(level+1)
181 #define print_frame_from_outlined_fn(level) print_frame(level)
184 #if defined(__clang__) && __clang_major__ >= 5
185 #warning "Clang 5.0 and later add an additional wrapper for outlined functions when compiling with debug information."
186 #warning "Please define -DDEBUG iff you manually pass in -g to make the tests succeed!"
190 // This macro helps to define a label at the current position that can be used
191 // to get the current address in the code.
193 // For print_current_address():
194 // To reliably determine the offset between the address of the label and the
195 // actual return address, we insert a NOP instruction as a jump target as the
196 // compiler would otherwise insert an instruction that we can't control. The
197 // instruction length is target dependent and is explained below.
199 // (The empty block between "#pragma omp ..." and the __asm__ statement is a
200 // workaround for a bug in the Intel Compiler.)
201 #define define_ompt_label(id) \
206 // This macro helps to get the address of a label that is inserted by the above
207 // macro define_ompt_label(). The address is obtained with a GNU extension
208 // (&&label) that has been tested with gcc, clang and icc.
209 #define get_ompt_label_address(id) (&& ompt_label_##id)
211 // This macro prints the exact address that a previously called runtime function
213 #define print_current_address(id) \
214 define_ompt_label(id) \
215 print_possible_return_addresses(get_ompt_label_address(id))
217 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
218 // On X86 the NOP instruction is 1 byte long. In addition, the compiler inserts
219 // a MOV instruction for non-void runtime functions which is 3 bytes long.
220 #define print_possible_return_addresses(addr) \
221 printf("%" PRIu64 ": current_address=%p or %p for non-void functions\n", \
222 ompt_get_thread_data()->value, ((char *)addr) - 1, ((char *)addr) - 4)
224 // On Power the NOP instruction is 4 bytes long. In addition, the compiler
225 // inserts a second NOP instruction (another 4 bytes). For non-void runtime
226 // functions Clang inserts a STW instruction (but only if compiling under
227 // -fno-PIC which will be the default with Clang 8.0, another 4 bytes).
228 #define print_possible_return_addresses(addr) \
229 printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \
230 ((char *)addr) - 8, ((char *)addr) - 12)
231 #elif KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32
232 // On AArch64 the NOP instruction is 4 bytes long, can be followed by inserted
233 // store instruction (another 4 bytes long).
234 // FIXME: PR #65696 addded a third possibility (12 byte offset) to make the
235 // tests pass on Darwin. Adding the same for other OSes. However, the proper
236 // fix for this is to remove the extra branch instruction being generated by
237 // the AArch64 backend. See issue #69627.
238 #define print_possible_return_addresses(addr) \
239 printf("%" PRIu64 ": current_address=%p or %p or %p\n", \
240 ompt_get_thread_data()->value, ((char *)addr) - 4, \
241 ((char *)addr) - 8, ((char *)addr) - 12)
242 #elif KMP_ARCH_RISCV64
243 #if __riscv_compressed
244 // On RV64GC the C.NOP instruction is 2 byte long. In addition, the compiler
245 // inserts a J instruction (targeting the successor basic block), which
246 // accounts for another 4 bytes. Finally, an additional J instruction may
247 // appear (adding 4 more bytes) when the C.NOP is referenced elsewhere (ie.
249 #define print_possible_return_addresses(addr) \
250 printf("%" PRIu64 ": current_address=%p or %p\n", \
251 ompt_get_thread_data()->value, ((char *)addr) - 6, ((char *)addr) - 10)
253 // On RV64G the NOP instruction is 4 byte long. In addition, the compiler
254 // inserts a J instruction (targeting the successor basic block), which
255 // accounts for another 4 bytes. Finally, an additional J instruction may
256 // appear (adding 4 more bytes) when the NOP is referenced elsewhere (ie.
258 #define print_possible_return_addresses(addr) \
259 printf("%" PRIu64 ": current_address=%p or %p\n", \
260 ompt_get_thread_data()->value, ((char *)addr) - 8, ((char *)addr) - 12)
262 #elif KMP_ARCH_LOONGARCH64
263 // On LoongArch64 the NOP instruction is 4 bytes long, can be followed by
264 // inserted jump instruction (another 4 bytes long). And an additional jump
265 // instruction may appear (adding 4 more bytes) when the NOP is referenced
266 // elsewhere (ie. another branch).
267 #define print_possible_return_addresses(addr) \
268 printf("%" PRIu64 ": current_address=%p or %p or %p\n", \
269 ompt_get_thread_data()->value, ((char *)addr) - 4, \
270 ((char *)addr) - 8, ((char *)addr) - 12)
272 // On VE the NOP instruction is 8 byte long. In addition, the compiler inserts
273 // a ??? instruction for non-void runtime functions which is ? bytes long.
274 #define print_possible_return_addresses(addr) \
275 printf("%" PRIu64 ": current_address=%p or %p\n", \
276 ompt_get_thread_data()->value, ((char *)addr) - 8, \
279 // On s390x the NOP instruction is 2 bytes long. For non-void runtime
280 // functions Clang inserts a STY instruction (but only if compiling under
281 // -fno-PIC which will be the default with Clang 8.0, another 6 bytes).
283 // Another possibility is:
285 // brasl %r14,__kmpc_end_master@plt
287 // 47 00 00 00 0: nop
288 // a7 f4 00 02 j addr
290 #define print_possible_return_addresses(addr) \
291 printf("%" PRIu64 ": current_address=%p or %p or %p\n", \
292 ompt_get_thread_data()->value, ((char *)addr) - 2, \
293 ((char *)addr) - 8, ((char *)addr) - 12)
295 #error Unsupported target architecture, cannot determine address offset!
299 // This macro performs a somewhat similar job to print_current_address(), except
300 // that it discards a certain number of nibbles from the address and only prints
301 // the most significant bits / nibbles. This can be used for cases where the
302 // return address can only be approximated.
304 // To account for overflows (ie the most significant bits / nibbles have just
305 // changed as we are a few bytes above the relevant power of two) the addresses
306 // of the "current" and of the "previous block" are printed.
307 #define print_fuzzy_address(id) \
308 define_ompt_label(id) \
309 print_fuzzy_address_blocks(get_ompt_label_address(id))
311 // If you change this define you need to adapt all capture patterns in the tests
312 // to include or discard the new number of nibbles!
313 #define FUZZY_ADDRESS_DISCARD_NIBBLES 2
314 #define FUZZY_ADDRESS_DISCARD_BYTES (1 << ((FUZZY_ADDRESS_DISCARD_NIBBLES) * 4))
315 #define print_fuzzy_address_blocks(addr) \
316 printf("%" PRIu64 ": fuzzy_address=0x%" PRIx64 " or 0x%" PRIx64 \
317 " or 0x%" PRIx64 " or 0x%" PRIx64 " (%p)\n", \
318 ompt_get_thread_data()->value, \
319 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES - 1, \
320 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES, \
321 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 1, \
322 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 2, addr)
324 #define register_ompt_callback_t(name, type) \
326 type f_##name = &on_##name; \
327 if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \
328 printf("0: Could not register callback '" #name "'\n"); \
331 #define register_ompt_callback(name) register_ompt_callback_t(name, name##_t)
333 #ifndef USE_PRIVATE_TOOL
335 on_ompt_callback_mutex_acquire(
339 ompt_wait_id_t wait_id
,
340 const void *codeptr_ra
)
344 case ompt_mutex_lock
:
345 printf("%" PRIu64
":" _TOOL_PREFIX
346 " ompt_event_wait_lock: wait_id=%" PRIu64
", hint=%" PRIu32
347 ", impl=%" PRIu32
", codeptr_ra=%p \n",
348 ompt_get_thread_data()->value
, wait_id
, hint
, impl
, codeptr_ra
);
350 case ompt_mutex_test_lock
:
351 printf("%" PRIu64
":" _TOOL_PREFIX
352 " ompt_event_wait_test_lock: wait_id=%" PRIu64
", hint=%" PRIu32
353 ", impl=%" PRIu32
", codeptr_ra=%p \n",
354 ompt_get_thread_data()->value
, wait_id
, hint
, impl
, codeptr_ra
);
356 case ompt_mutex_nest_lock
:
357 printf("%" PRIu64
":" _TOOL_PREFIX
358 " ompt_event_wait_nest_lock: wait_id=%" PRIu64
", hint=%" PRIu32
359 ", impl=%" PRIu32
", codeptr_ra=%p \n",
360 ompt_get_thread_data()->value
, wait_id
, hint
, impl
, codeptr_ra
);
362 case ompt_mutex_test_nest_lock
:
363 printf("%" PRIu64
":" _TOOL_PREFIX
364 " ompt_event_wait_test_nest_lock: wait_id=%" PRIu64
365 ", hint=%" PRIu32
", impl=%" PRIu32
", codeptr_ra=%p \n",
366 ompt_get_thread_data()->value
, wait_id
, hint
, impl
, codeptr_ra
);
368 case ompt_mutex_critical
:
369 printf("%" PRIu64
":" _TOOL_PREFIX
370 " ompt_event_wait_critical: wait_id=%" PRIu64
", hint=%" PRIu32
371 ", impl=%" PRIu32
", codeptr_ra=%p \n",
372 ompt_get_thread_data()->value
, wait_id
, hint
, impl
, codeptr_ra
);
374 case ompt_mutex_atomic
:
375 printf("%" PRIu64
":" _TOOL_PREFIX
376 " ompt_event_wait_atomic: wait_id=%" PRIu64
", hint=%" PRIu32
377 ", impl=%" PRIu32
", codeptr_ra=%p \n",
378 ompt_get_thread_data()->value
, wait_id
, hint
, impl
, codeptr_ra
);
380 case ompt_mutex_ordered
:
381 printf("%" PRIu64
":" _TOOL_PREFIX
382 " ompt_event_wait_ordered: wait_id=%" PRIu64
", hint=%" PRIu32
383 ", impl=%" PRIu32
", codeptr_ra=%p \n",
384 ompt_get_thread_data()->value
, wait_id
, hint
, impl
, codeptr_ra
);
392 on_ompt_callback_mutex_acquired(
394 ompt_wait_id_t wait_id
,
395 const void *codeptr_ra
)
399 case ompt_mutex_lock
:
400 printf("%" PRIu64
":" _TOOL_PREFIX
401 " ompt_event_acquired_lock: wait_id=%" PRIu64
", codeptr_ra=%p \n",
402 ompt_get_thread_data()->value
, wait_id
, codeptr_ra
);
404 case ompt_mutex_test_lock
:
405 printf("%" PRIu64
":" _TOOL_PREFIX
406 " ompt_event_acquired_test_lock: wait_id=%" PRIu64
407 ", codeptr_ra=%p \n",
408 ompt_get_thread_data()->value
, wait_id
, codeptr_ra
);
410 case ompt_mutex_nest_lock
:
411 printf("%" PRIu64
":" _TOOL_PREFIX
412 " ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64
413 ", codeptr_ra=%p \n",
414 ompt_get_thread_data()->value
, wait_id
, codeptr_ra
);
416 case ompt_mutex_test_nest_lock
:
417 printf("%" PRIu64
":" _TOOL_PREFIX
418 " ompt_event_acquired_test_nest_lock_first: wait_id=%" PRIu64
419 ", codeptr_ra=%p \n",
420 ompt_get_thread_data()->value
, wait_id
, codeptr_ra
);
422 case ompt_mutex_critical
:
423 printf("%" PRIu64
":" _TOOL_PREFIX
424 " ompt_event_acquired_critical: wait_id=%" PRIu64
425 ", codeptr_ra=%p \n",
426 ompt_get_thread_data()->value
, wait_id
, codeptr_ra
);
428 case ompt_mutex_atomic
:
429 printf("%" PRIu64
":" _TOOL_PREFIX
430 " ompt_event_acquired_atomic: wait_id=%" PRIu64
431 ", codeptr_ra=%p \n",
432 ompt_get_thread_data()->value
, wait_id
, codeptr_ra
);
434 case ompt_mutex_ordered
:
435 printf("%" PRIu64
":" _TOOL_PREFIX
436 " ompt_event_acquired_ordered: wait_id=%" PRIu64
437 ", codeptr_ra=%p \n",
438 ompt_get_thread_data()->value
, wait_id
, codeptr_ra
);
446 on_ompt_callback_mutex_released(
448 ompt_wait_id_t wait_id
,
449 const void *codeptr_ra
)
453 case ompt_mutex_lock
:
454 printf("%" PRIu64
":" _TOOL_PREFIX
455 " ompt_event_release_lock: wait_id=%" PRIu64
", codeptr_ra=%p \n",
456 ompt_get_thread_data()->value
, wait_id
, codeptr_ra
);
458 case ompt_mutex_nest_lock
:
459 printf("%" PRIu64
":" _TOOL_PREFIX
460 " ompt_event_release_nest_lock_last: wait_id=%" PRIu64
461 ", codeptr_ra=%p \n",
462 ompt_get_thread_data()->value
, wait_id
, codeptr_ra
);
464 case ompt_mutex_critical
:
465 printf("%" PRIu64
":" _TOOL_PREFIX
466 " ompt_event_release_critical: wait_id=%" PRIu64
467 ", codeptr_ra=%p \n",
468 ompt_get_thread_data()->value
, wait_id
, codeptr_ra
);
470 case ompt_mutex_atomic
:
471 printf("%" PRIu64
":" _TOOL_PREFIX
472 " ompt_event_release_atomic: wait_id=%" PRIu64
473 ", codeptr_ra=%p \n",
474 ompt_get_thread_data()->value
, wait_id
, codeptr_ra
);
476 case ompt_mutex_ordered
:
477 printf("%" PRIu64
":" _TOOL_PREFIX
478 " ompt_event_release_ordered: wait_id=%" PRIu64
479 ", codeptr_ra=%p \n",
480 ompt_get_thread_data()->value
, wait_id
, codeptr_ra
);
488 on_ompt_callback_nest_lock(
489 ompt_scope_endpoint_t endpoint
,
490 ompt_wait_id_t wait_id
,
491 const void *codeptr_ra
)
495 case ompt_scope_begin
:
496 printf("%" PRIu64
":" _TOOL_PREFIX
497 " ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64
498 ", codeptr_ra=%p \n",
499 ompt_get_thread_data()->value
, wait_id
, codeptr_ra
);
502 printf("%" PRIu64
":" _TOOL_PREFIX
503 " ompt_event_release_nest_lock_prev: wait_id=%" PRIu64
504 ", codeptr_ra=%p \n",
505 ompt_get_thread_data()->value
, wait_id
, codeptr_ra
);
507 case ompt_scope_beginend
:
508 printf("ompt_scope_beginend should never be passed to %s\n", __func__
);
514 on_ompt_callback_sync_region(
515 ompt_sync_region_t kind
,
516 ompt_scope_endpoint_t endpoint
,
517 ompt_data_t
*parallel_data
,
518 ompt_data_t
*task_data
,
519 const void *codeptr_ra
)
521 if (endpoint
== ompt_scope_beginend
) {
522 printf("ompt_scope_beginend should never be passed to %s\n", __func__
);
525 if (kind
== ompt_sync_region_reduction
) {
526 printf("ompt_sync_region_reduction should never be passed to %s\n",
530 uint64_t parallel_data_value
= parallel_data
? parallel_data
->value
: 0;
531 const char *begin_or_end
= (endpoint
== ompt_scope_begin
) ? "begin" : "end";
532 printf("%" PRIu64
":" _TOOL_PREFIX
" ompt_event_%s_%s: parallel_id=%" PRIu64
533 ", task_id=%" PRIu64
", codeptr_ra=%p\n",
534 ompt_get_thread_data()->value
, ompt_sync_region_t_values
[kind
],
535 begin_or_end
, parallel_data_value
, task_data
->value
, codeptr_ra
);
537 case ompt_sync_region_barrier
:
538 case ompt_sync_region_barrier_implicit
:
539 case ompt_sync_region_barrier_implicit_workshare
:
540 case ompt_sync_region_barrier_implicit_parallel
:
541 case ompt_sync_region_barrier_teams
:
542 case ompt_sync_region_barrier_explicit
:
543 case ompt_sync_region_barrier_implementation
:
544 if (endpoint
== ompt_scope_begin
)
551 on_ompt_callback_sync_region_wait(
552 ompt_sync_region_t kind
,
553 ompt_scope_endpoint_t endpoint
,
554 ompt_data_t
*parallel_data
,
555 ompt_data_t
*task_data
,
556 const void *codeptr_ra
)
558 if (endpoint
== ompt_scope_beginend
) {
559 printf("ompt_scope_beginend should never be passed to %s\n", __func__
);
562 if (kind
== ompt_sync_region_reduction
) {
563 printf("ompt_sync_region_reduction should never be passed to %s\n",
567 uint64_t parallel_data_value
= parallel_data
? parallel_data
->value
: 0;
568 const char *begin_or_end
= (endpoint
== ompt_scope_begin
) ? "begin" : "end";
569 printf("%" PRIu64
":" _TOOL_PREFIX
570 " ompt_event_wait_%s_%s: parallel_id=%" PRIu64
", task_id=%" PRIu64
572 ompt_get_thread_data()->value
, ompt_sync_region_t_values
[kind
],
573 begin_or_end
, parallel_data_value
, task_data
->value
, codeptr_ra
);
576 static void on_ompt_callback_reduction(ompt_sync_region_t kind
,
577 ompt_scope_endpoint_t endpoint
,
578 ompt_data_t
*parallel_data
,
579 ompt_data_t
*task_data
,
580 const void *codeptr_ra
) {
582 case ompt_scope_begin
:
583 printf("%" PRIu64
":" _TOOL_PREFIX
584 " ompt_event_reduction_begin: parallel_id=%" PRIu64
585 ", task_id=%" PRIu64
", codeptr_ra=%p\n",
586 ompt_get_thread_data()->value
,
587 (parallel_data
) ? parallel_data
->value
: 0, task_data
->value
,
591 printf("%" PRIu64
":" _TOOL_PREFIX
592 " ompt_event_reduction_end: parallel_id=%" PRIu64
593 ", task_id=%" PRIu64
", codeptr_ra=%p\n",
594 ompt_get_thread_data()->value
,
595 (parallel_data
) ? parallel_data
->value
: 0, task_data
->value
,
598 case ompt_scope_beginend
:
599 printf("ompt_scope_beginend should never be passed to %s\n", __func__
);
605 on_ompt_callback_flush(
606 ompt_data_t
*thread_data
,
607 const void *codeptr_ra
)
609 printf("%" PRIu64
":" _TOOL_PREFIX
" ompt_event_flush: codeptr_ra=%p\n",
610 thread_data
->value
, codeptr_ra
);
614 on_ompt_callback_cancel(
615 ompt_data_t
*task_data
,
617 const void *codeptr_ra
)
619 const char* first_flag_value
;
620 const char* second_flag_value
;
621 if(flags
& ompt_cancel_parallel
)
622 first_flag_value
= ompt_cancel_flag_t_values
[0];
623 else if(flags
& ompt_cancel_sections
)
624 first_flag_value
= ompt_cancel_flag_t_values
[1];
625 else if(flags
& ompt_cancel_loop
)
626 first_flag_value
= ompt_cancel_flag_t_values
[2];
627 else if(flags
& ompt_cancel_taskgroup
)
628 first_flag_value
= ompt_cancel_flag_t_values
[3];
630 if(flags
& ompt_cancel_activated
)
631 second_flag_value
= ompt_cancel_flag_t_values
[4];
632 else if(flags
& ompt_cancel_detected
)
633 second_flag_value
= ompt_cancel_flag_t_values
[5];
634 else if(flags
& ompt_cancel_discarded_task
)
635 second_flag_value
= ompt_cancel_flag_t_values
[6];
637 printf("%" PRIu64
":" _TOOL_PREFIX
" ompt_event_cancel: task_data=%" PRIu64
638 ", flags=%s|%s=%" PRIu32
", codeptr_ra=%p\n",
639 ompt_get_thread_data()->value
, task_data
->value
, first_flag_value
,
640 second_flag_value
, flags
, codeptr_ra
);
644 on_ompt_callback_implicit_task(
645 ompt_scope_endpoint_t endpoint
,
646 ompt_data_t
*parallel_data
,
647 ompt_data_t
*task_data
,
648 unsigned int team_size
,
649 unsigned int thread_num
,
654 case ompt_scope_begin
:
656 printf("%s\n", "0: task_data initially not null");
657 task_data
->value
= ompt_get_unique_id();
659 //there is no parallel_begin callback for implicit parallel region
660 //thus it is initialized in initial task
661 if(flags
& ompt_task_initial
)
665 format_task_type(flags
, buffer
);
666 // Only check initial task not created by teams construct
667 if (team_size
== 1 && thread_num
== 1 && parallel_data
->ptr
)
668 printf("%s\n", "0: parallel_data initially not null");
669 parallel_data
->value
= ompt_get_unique_id();
670 printf("%" PRIu64
":" _TOOL_PREFIX
671 " ompt_event_initial_task_begin: parallel_id=%" PRIu64
672 ", task_id=%" PRIu64
", actual_parallelism=%" PRIu32
673 ", index=%" PRIu32
", flags=%" PRIu32
"\n",
674 ompt_get_thread_data()->value
, parallel_data
->value
,
675 task_data
->value
, team_size
, thread_num
, flags
);
677 printf("%" PRIu64
":" _TOOL_PREFIX
678 " ompt_event_implicit_task_begin: parallel_id=%" PRIu64
679 ", task_id=%" PRIu64
", team_size=%" PRIu32
680 ", thread_num=%" PRIu32
"\n",
681 ompt_get_thread_data()->value
, parallel_data
->value
,
682 task_data
->value
, team_size
, thread_num
);
687 if(flags
& ompt_task_initial
){
688 printf("%" PRIu64
":" _TOOL_PREFIX
689 " ompt_event_initial_task_end: parallel_id=%" PRIu64
690 ", task_id=%" PRIu64
", actual_parallelism=%" PRIu32
691 ", index=%" PRIu32
"\n",
692 ompt_get_thread_data()->value
,
693 (parallel_data
) ? parallel_data
->value
: 0, task_data
->value
,
694 team_size
, thread_num
);
696 printf("%" PRIu64
":" _TOOL_PREFIX
697 " ompt_event_implicit_task_end: parallel_id=%" PRIu64
698 ", task_id=%" PRIu64
", team_size=%" PRIu32
699 ", thread_num=%" PRIu32
"\n",
700 ompt_get_thread_data()->value
,
701 (parallel_data
) ? parallel_data
->value
: 0, task_data
->value
,
702 team_size
, thread_num
);
705 case ompt_scope_beginend
:
706 printf("ompt_scope_beginend should never be passed to %s\n", __func__
);
712 on_ompt_callback_lock_init(
716 ompt_wait_id_t wait_id
,
717 const void *codeptr_ra
)
721 case ompt_mutex_lock
:
722 printf("%" PRIu64
":" _TOOL_PREFIX
723 " ompt_event_init_lock: wait_id=%" PRIu64
", hint=%" PRIu32
724 ", impl=%" PRIu32
", codeptr_ra=%p \n",
725 ompt_get_thread_data()->value
, wait_id
, hint
, impl
, codeptr_ra
);
727 case ompt_mutex_nest_lock
:
728 printf("%" PRIu64
":" _TOOL_PREFIX
729 " ompt_event_init_nest_lock: wait_id=%" PRIu64
", hint=%" PRIu32
730 ", impl=%" PRIu32
", codeptr_ra=%p \n",
731 ompt_get_thread_data()->value
, wait_id
, hint
, impl
, codeptr_ra
);
739 on_ompt_callback_lock_destroy(
741 ompt_wait_id_t wait_id
,
742 const void *codeptr_ra
)
746 case ompt_mutex_lock
:
747 printf("%" PRIu64
":" _TOOL_PREFIX
748 " ompt_event_destroy_lock: wait_id=%" PRIu64
", codeptr_ra=%p \n",
749 ompt_get_thread_data()->value
, wait_id
, codeptr_ra
);
751 case ompt_mutex_nest_lock
:
752 printf("%" PRIu64
":" _TOOL_PREFIX
753 " ompt_event_destroy_nest_lock: wait_id=%" PRIu64
754 ", codeptr_ra=%p \n",
755 ompt_get_thread_data()->value
, wait_id
, codeptr_ra
);
763 on_ompt_callback_work(
765 ompt_scope_endpoint_t endpoint
,
766 ompt_data_t
*parallel_data
,
767 ompt_data_t
*task_data
,
769 const void *codeptr_ra
)
773 case ompt_scope_begin
:
774 printf("%" PRIu64
":" _TOOL_PREFIX
" %s_begin: parallel_id=%" PRIu64
775 ", task_id=%" PRIu64
", codeptr_ra=%p, count=%" PRIu64
"\n",
776 ompt_get_thread_data()->value
, ompt_work_events_t_values
[wstype
],
777 parallel_data
->value
, task_data
->value
, codeptr_ra
, count
);
780 printf("%" PRIu64
":" _TOOL_PREFIX
" %s_end: parallel_id=%" PRIu64
781 ", task_id=%" PRIu64
", codeptr_ra=%p, count=%" PRIu64
"\n",
782 ompt_get_thread_data()->value
, ompt_work_events_t_values
[wstype
],
783 parallel_data
->value
, task_data
->value
, codeptr_ra
, count
);
785 case ompt_scope_beginend
:
786 printf("ompt_scope_beginend should never be passed to %s\n", __func__
);
791 static void on_ompt_callback_dispatch(
792 ompt_data_t
*parallel_data
,
793 ompt_data_t
*task_data
,
794 ompt_dispatch_t kind
,
795 ompt_data_t instance
) {
796 char *event_name
= NULL
;
797 void *codeptr_ra
= NULL
;
798 ompt_dispatch_chunk_t
*dispatch_chunk
= NULL
;
800 case ompt_dispatch_section
:
801 event_name
= "ompt_event_section_begin";
802 codeptr_ra
= instance
.ptr
;
804 case ompt_dispatch_ws_loop_chunk
:
805 event_name
= "ompt_event_ws_loop_chunk_begin";
806 dispatch_chunk
= (ompt_dispatch_chunk_t
*)instance
.ptr
;
808 case ompt_dispatch_taskloop_chunk
:
809 event_name
= "ompt_event_taskloop_chunk_begin";
810 dispatch_chunk
= (ompt_dispatch_chunk_t
*)instance
.ptr
;
812 case ompt_dispatch_distribute_chunk
:
813 event_name
= "ompt_event_distribute_chunk_begin";
814 dispatch_chunk
= (ompt_dispatch_chunk_t
*)instance
.ptr
;
817 event_name
= "ompt_ws_loop_iteration_begin";
819 printf("%" PRIu64
":" _TOOL_PREFIX
820 " %s: parallel_id=%" PRIu64
", task_id=%" PRIu64
821 ", codeptr_ra=%p, chunk_start=%" PRIu64
", chunk_iterations=%" PRIu64
822 "\n", ompt_get_thread_data()->value
, event_name
, parallel_data
->value
,
823 task_data
->value
, codeptr_ra
,
824 dispatch_chunk
? dispatch_chunk
->start
: 0,
825 dispatch_chunk
? dispatch_chunk
->iterations
: 0);
828 static void on_ompt_callback_masked(ompt_scope_endpoint_t endpoint
,
829 ompt_data_t
*parallel_data
,
830 ompt_data_t
*task_data
,
831 const void *codeptr_ra
) {
834 case ompt_scope_begin
:
835 printf("%" PRIu64
":" _TOOL_PREFIX
836 " ompt_event_masked_begin: parallel_id=%" PRIu64
837 ", task_id=%" PRIu64
", codeptr_ra=%p\n",
838 ompt_get_thread_data()->value
, parallel_data
->value
,
839 task_data
->value
, codeptr_ra
);
842 printf("%" PRIu64
":" _TOOL_PREFIX
843 " ompt_event_masked_end: parallel_id=%" PRIu64
", task_id=%" PRIu64
845 ompt_get_thread_data()->value
, parallel_data
->value
,
846 task_data
->value
, codeptr_ra
);
848 case ompt_scope_beginend
:
849 printf("ompt_scope_beginend should never be passed to %s\n", __func__
);
854 static void on_ompt_callback_parallel_begin(
855 ompt_data_t
*encountering_task_data
,
856 const ompt_frame_t
*encountering_task_frame
, ompt_data_t
*parallel_data
,
857 uint32_t requested_team_size
, int flag
, const void *codeptr_ra
) {
858 if(parallel_data
->ptr
)
859 printf("0: parallel_data initially not null\n");
860 parallel_data
->value
= ompt_get_unique_id();
861 int invoker
= flag
& 0xF;
862 const char *event
= (flag
& ompt_parallel_team
) ? "parallel" : "teams";
863 const char *size
= (flag
& ompt_parallel_team
) ? "team_size" : "num_teams";
864 printf("%" PRIu64
":" _TOOL_PREFIX
865 " ompt_event_%s_begin: parent_task_id=%" PRIu64
866 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
867 "parallel_id=%" PRIu64
", requested_%s=%" PRIu32
868 ", codeptr_ra=%p, invoker=%d\n",
869 ompt_get_thread_data()->value
, event
, encountering_task_data
->value
,
870 encountering_task_frame
->exit_frame
.ptr
,
871 encountering_task_frame
->enter_frame
.ptr
, parallel_data
->value
, size
,
872 requested_team_size
, codeptr_ra
, invoker
);
875 static void on_ompt_callback_parallel_end(ompt_data_t
*parallel_data
,
876 ompt_data_t
*encountering_task_data
,
877 int flag
, const void *codeptr_ra
) {
878 int invoker
= flag
& 0xF;
879 const char *event
= (flag
& ompt_parallel_team
) ? "parallel" : "teams";
880 printf("%" PRIu64
":" _TOOL_PREFIX
" ompt_event_%s_end: parallel_id=%" PRIu64
881 ", task_id=%" PRIu64
", invoker=%d, codeptr_ra=%p\n",
882 ompt_get_thread_data()->value
, event
, parallel_data
->value
,
883 encountering_task_data
->value
, invoker
, codeptr_ra
);
887 on_ompt_callback_task_create(
888 ompt_data_t
*encountering_task_data
,
889 const ompt_frame_t
*encountering_task_frame
,
890 ompt_data_t
* new_task_data
,
893 const void *codeptr_ra
)
895 if(new_task_data
->ptr
)
896 printf("0: new_task_data initially not null\n");
897 new_task_data
->value
= ompt_get_unique_id();
900 format_task_type(type
, buffer
);
903 "%" PRIu64
":" _TOOL_PREFIX
904 " ompt_event_task_create: parent_task_id=%" PRIu64
905 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
906 "new_task_id=%" PRIu64
907 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n",
908 ompt_get_thread_data()->value
,
909 encountering_task_data
? encountering_task_data
->value
: 0,
910 encountering_task_frame
? encountering_task_frame
->exit_frame
.ptr
: NULL
,
911 encountering_task_frame
? encountering_task_frame
->enter_frame
.ptr
: NULL
,
912 new_task_data
->value
, codeptr_ra
, buffer
, type
,
913 has_dependences
? "yes" : "no");
917 on_ompt_callback_task_schedule(
918 ompt_data_t
*first_task_data
,
919 ompt_task_status_t prior_task_status
,
920 ompt_data_t
*second_task_data
)
922 printf("%" PRIu64
":" _TOOL_PREFIX
923 " ompt_event_task_schedule: first_task_id=%" PRIu64
924 ", second_task_id=%" PRIu64
", prior_task_status=%s=%d\n",
925 ompt_get_thread_data()->value
, first_task_data
->value
,
926 (second_task_data
? second_task_data
->value
: -1),
927 ompt_task_status_t_values
[prior_task_status
], prior_task_status
);
928 if (prior_task_status
== ompt_task_complete
||
929 prior_task_status
== ompt_task_late_fulfill
||
930 prior_task_status
== ompt_taskwait_complete
) {
931 printf("%" PRIu64
":" _TOOL_PREFIX
" ompt_event_task_end: task_id=%" PRIu64
932 "\n", ompt_get_thread_data()->value
, first_task_data
->value
);
937 on_ompt_callback_dependences(
938 ompt_data_t
*task_data
,
939 const ompt_dependence_t
*deps
,
943 char *progress
= buffer
;
945 for (i
= 0; i
< ndeps
&& progress
< buffer
+ 2000; i
++) {
946 if (deps
[i
].dependence_type
== ompt_dependence_type_source
||
947 deps
[i
].dependence_type
== ompt_dependence_type_sink
)
949 sprintf(progress
, "(%" PRIu64
", %s), ", deps
[i
].variable
.value
,
950 ompt_dependence_type_t_values
[deps
[i
].dependence_type
]);
953 sprintf(progress
, "(%p, %s), ", deps
[i
].variable
.ptr
,
954 ompt_dependence_type_t_values
[deps
[i
].dependence_type
]);
958 printf("%" PRIu64
":" _TOOL_PREFIX
" ompt_event_dependences: task_id=%" PRIu64
959 ", deps=[%s], ndeps=%d\n",
960 ompt_get_thread_data()->value
, task_data
->value
, buffer
, ndeps
);
964 on_ompt_callback_task_dependence(
965 ompt_data_t
*first_task_data
,
966 ompt_data_t
*second_task_data
)
968 printf("%" PRIu64
":" _TOOL_PREFIX
969 " ompt_event_task_dependence_pair: first_task_id=%" PRIu64
970 ", second_task_id=%" PRIu64
"\n",
971 ompt_get_thread_data()->value
, first_task_data
->value
,
972 second_task_data
->value
);
976 on_ompt_callback_thread_begin(
977 ompt_thread_t thread_type
,
978 ompt_data_t
*thread_data
)
981 printf("%s\n", "0: thread_data initially not null");
982 thread_data
->value
= ompt_get_unique_id();
983 printf("%" PRIu64
":" _TOOL_PREFIX
984 " ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64
"\n",
985 ompt_get_thread_data()->value
, ompt_thread_t_values
[thread_type
],
986 thread_type
, thread_data
->value
);
990 on_ompt_callback_thread_end(
991 ompt_data_t
*thread_data
)
993 printf("%" PRIu64
":" _TOOL_PREFIX
" ompt_event_thread_end: thread_id=%" PRIu64
995 ompt_get_thread_data()->value
, thread_data
->value
);
999 on_ompt_callback_control_tool(
1003 const void *codeptr_ra
)
1005 ompt_frame_t
* omptTaskFrame
;
1006 ompt_get_task_info(0, NULL
, (ompt_data_t
**) NULL
, &omptTaskFrame
, NULL
, NULL
);
1007 printf("%" PRIu64
":" _TOOL_PREFIX
" ompt_event_control_tool: command=%" PRIu64
1008 ", modifier=%" PRIu64
1009 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, "
1010 "current_task_frame.reenter=%p \n",
1011 ompt_get_thread_data()->value
, command
, modifier
, arg
, codeptr_ra
,
1012 omptTaskFrame
->exit_frame
.ptr
, omptTaskFrame
->enter_frame
.ptr
);
1014 // the following would interfere with expected output for OMPT tests, so skip
1018 ompt_data_t
*task_data
;
1019 while (ompt_get_task_info(task_level
, NULL
, (ompt_data_t
**)&task_data
, NULL
,
1021 printf("%" PRIu64
":" _TOOL_PREFIX
" task level %d: task_id=%" PRIu64
"\n",
1022 ompt_get_thread_data()->value
, task_level
, task_data
->value
);
1026 // print parallel data
1027 int parallel_level
= 0;
1028 ompt_data_t
*parallel_data
;
1029 while (ompt_get_parallel_info(parallel_level
, (ompt_data_t
**)¶llel_data
,
1031 printf("%" PRIu64
":" _TOOL_PREFIX
" parallel level %d: parallel_id=%" PRIu64
1033 ompt_get_thread_data()->value
, parallel_level
, parallel_data
->value
);
1040 static void on_ompt_callback_error(ompt_severity_t severity
,
1041 const char *message
, size_t length
,
1042 const void *codeptr_ra
) {
1043 printf("%" PRIu64
": ompt_event_runtime_error: severity=%" PRIu32
1044 ", message=%s, length=%" PRIu64
", codeptr_ra=%p\n",
1045 ompt_get_thread_data()->value
, severity
, message
, (uint64_t)length
,
1049 int ompt_initialize(
1050 ompt_function_lookup_t lookup
,
1051 int initial_device_num
,
1052 ompt_data_t
*tool_data
)
1054 ompt_set_callback
= (ompt_set_callback_t
) lookup("ompt_set_callback");
1055 ompt_get_callback
= (ompt_get_callback_t
) lookup("ompt_get_callback");
1056 ompt_get_state
= (ompt_get_state_t
) lookup("ompt_get_state");
1057 ompt_get_task_info
= (ompt_get_task_info_t
) lookup("ompt_get_task_info");
1058 ompt_get_task_memory
= (ompt_get_task_memory_t
)lookup("ompt_get_task_memory");
1059 ompt_get_thread_data
= (ompt_get_thread_data_t
) lookup("ompt_get_thread_data");
1060 ompt_get_parallel_info
= (ompt_get_parallel_info_t
) lookup("ompt_get_parallel_info");
1061 ompt_get_unique_id
= (ompt_get_unique_id_t
) lookup("ompt_get_unique_id");
1062 ompt_finalize_tool
= (ompt_finalize_tool_t
)lookup("ompt_finalize_tool");
1064 ompt_get_unique_id();
1066 ompt_get_num_procs
= (ompt_get_num_procs_t
) lookup("ompt_get_num_procs");
1067 ompt_get_num_places
= (ompt_get_num_places_t
) lookup("ompt_get_num_places");
1068 ompt_get_place_proc_ids
= (ompt_get_place_proc_ids_t
) lookup("ompt_get_place_proc_ids");
1069 ompt_get_place_num
= (ompt_get_place_num_t
) lookup("ompt_get_place_num");
1070 ompt_get_partition_place_nums
= (ompt_get_partition_place_nums_t
) lookup("ompt_get_partition_place_nums");
1071 ompt_get_proc_id
= (ompt_get_proc_id_t
) lookup("ompt_get_proc_id");
1072 ompt_enumerate_states
= (ompt_enumerate_states_t
) lookup("ompt_enumerate_states");
1073 ompt_enumerate_mutex_impls
= (ompt_enumerate_mutex_impls_t
) lookup("ompt_enumerate_mutex_impls");
1075 register_ompt_callback(ompt_callback_mutex_acquire
);
1076 register_ompt_callback_t(ompt_callback_mutex_acquired
, ompt_callback_mutex_t
);
1077 register_ompt_callback_t(ompt_callback_mutex_released
, ompt_callback_mutex_t
);
1078 register_ompt_callback(ompt_callback_nest_lock
);
1079 register_ompt_callback(ompt_callback_sync_region
);
1080 register_ompt_callback_t(ompt_callback_sync_region_wait
, ompt_callback_sync_region_t
);
1081 register_ompt_callback_t(ompt_callback_reduction
, ompt_callback_sync_region_t
);
1082 register_ompt_callback(ompt_callback_control_tool
);
1083 register_ompt_callback(ompt_callback_flush
);
1084 register_ompt_callback(ompt_callback_cancel
);
1085 register_ompt_callback(ompt_callback_implicit_task
);
1086 register_ompt_callback_t(ompt_callback_lock_init
, ompt_callback_mutex_acquire_t
);
1087 register_ompt_callback_t(ompt_callback_lock_destroy
, ompt_callback_mutex_t
);
1088 register_ompt_callback(ompt_callback_work
);
1089 register_ompt_callback(ompt_callback_dispatch
);
1090 register_ompt_callback(ompt_callback_masked
);
1091 register_ompt_callback(ompt_callback_parallel_begin
);
1092 register_ompt_callback(ompt_callback_parallel_end
);
1093 register_ompt_callback(ompt_callback_task_create
);
1094 register_ompt_callback(ompt_callback_task_schedule
);
1095 register_ompt_callback(ompt_callback_dependences
);
1096 register_ompt_callback(ompt_callback_task_dependence
);
1097 register_ompt_callback(ompt_callback_thread_begin
);
1098 register_ompt_callback(ompt_callback_thread_end
);
1099 register_ompt_callback(ompt_callback_error
);
1100 printf("0: NULL_POINTER=%p\n", (void*)NULL
);
1104 void ompt_finalize(ompt_data_t
*tool_data
)
1106 printf("0: ompt_event_runtime_shutdown\n");
1112 ompt_start_tool_result_t
* ompt_start_tool(
1113 unsigned int omp_version
,
1114 const char *runtime_version
)
1116 static ompt_start_tool_result_t ompt_start_tool_result
= {&ompt_initialize
,&ompt_finalize
, 0};
1117 return &ompt_start_tool_result
;
1122 #endif // ifndef USE_PRIVATE_TOOL