[llvm] Stop including unordered_map (NFC)
[llvm-project.git] / openmp / runtime / test / ompt / callback.h
blob62eff09e28423e6af54e4651e3c4f37e3c8d69af
1 #ifndef _BSD_SOURCE
2 #define _BSD_SOURCE
3 #endif
4 #ifndef _DEFAULT_SOURCE
5 #define _DEFAULT_SOURCE
6 #endif
7 #include <stdio.h>
8 #ifndef __STDC_FORMAT_MACROS
9 #define __STDC_FORMAT_MACROS
10 #endif
11 #include <inttypes.h>
12 #include <omp.h>
13 #include <omp-tools.h>
14 #include "ompt-signal.h"
16 // Used to detect architecture
17 #include "../../src/kmp_platform.h"
19 #ifndef _TOOL_PREFIX
20 #define _TOOL_PREFIX ""
21 // If no _TOOL_PREFIX is set, we assume that we run as part of an OMPT test
22 #define _OMPT_TESTS
23 #endif
25 static const char *ompt_thread_t_values[] = {
26 "ompt_thread_UNDEFINED", "ompt_thread_initial", "ompt_thread_worker",
27 "ompt_thread_other"};
29 static const char *ompt_task_status_t_values[] = {
30 "ompt_task_UNDEFINED",
31 "ompt_task_complete", // 1
32 "ompt_task_yield", // 2
33 "ompt_task_cancel", // 3
34 "ompt_task_detach", // 4
35 "ompt_task_early_fulfill", // 5
36 "ompt_task_late_fulfill", // 6
37 "ompt_task_switch", // 7
38 "ompt_taskwait_complete" // 8
40 static const char* ompt_cancel_flag_t_values[] = {
41 "ompt_cancel_parallel",
42 "ompt_cancel_sections",
43 "ompt_cancel_loop",
44 "ompt_cancel_taskgroup",
45 "ompt_cancel_activated",
46 "ompt_cancel_detected",
47 "ompt_cancel_discarded_task"
50 static const char *ompt_dependence_type_t_values[36] = {
51 "ompt_dependence_type_UNDEFINED",
52 "ompt_dependence_type_in", // 1
53 "ompt_dependence_type_out", // 2
54 "ompt_dependence_type_inout", // 3
55 "ompt_dependence_type_mutexinoutset", // 4
56 "ompt_dependence_type_source", // 5
57 "ompt_dependence_type_sink", // 6
58 "ompt_dependence_type_inoutset", // 7
59 "", "", "", "", "", "", // 8-13
60 "", "", "", "", "", "", "", "", "", "", // 14-23
61 "", "", "", "", "", "", "", "", "", "", // 24-33
62 "ompt_dependence_type_out_all_memory", // 34
63 "ompt_dependence_type_inout_all_memory" // 35
66 static void format_task_type(int type, char *buffer) {
67 char *progress = buffer;
68 if (type & ompt_task_initial)
69 progress += sprintf(progress, "ompt_task_initial");
70 if (type & ompt_task_implicit)
71 progress += sprintf(progress, "ompt_task_implicit");
72 if (type & ompt_task_explicit)
73 progress += sprintf(progress, "ompt_task_explicit");
74 if (type & ompt_task_target)
75 progress += sprintf(progress, "ompt_task_target");
76 if (type & ompt_task_taskwait)
77 progress += sprintf(progress, "ompt_task_taskwait");
78 if (type & ompt_task_undeferred)
79 progress += sprintf(progress, "|ompt_task_undeferred");
80 if (type & ompt_task_untied)
81 progress += sprintf(progress, "|ompt_task_untied");
82 if (type & ompt_task_final)
83 progress += sprintf(progress, "|ompt_task_final");
84 if (type & ompt_task_mergeable)
85 progress += sprintf(progress, "|ompt_task_mergeable");
86 if (type & ompt_task_merged)
87 progress += sprintf(progress, "|ompt_task_merged");
90 static ompt_set_callback_t ompt_set_callback;
91 static ompt_get_callback_t ompt_get_callback;
92 static ompt_get_state_t ompt_get_state;
93 static ompt_get_task_info_t ompt_get_task_info;
94 static ompt_get_task_memory_t ompt_get_task_memory;
95 static ompt_get_thread_data_t ompt_get_thread_data;
96 static ompt_get_parallel_info_t ompt_get_parallel_info;
97 static ompt_get_unique_id_t ompt_get_unique_id;
98 static ompt_finalize_tool_t ompt_finalize_tool;
99 static ompt_get_num_procs_t ompt_get_num_procs;
100 static ompt_get_num_places_t ompt_get_num_places;
101 static ompt_get_place_proc_ids_t ompt_get_place_proc_ids;
102 static ompt_get_place_num_t ompt_get_place_num;
103 static ompt_get_partition_place_nums_t ompt_get_partition_place_nums;
104 static ompt_get_proc_id_t ompt_get_proc_id;
105 static ompt_enumerate_states_t ompt_enumerate_states;
106 static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls;
108 static void print_ids(int level)
110 int task_type, thread_num;
111 ompt_frame_t *frame;
112 ompt_data_t *task_parallel_data;
113 ompt_data_t *task_data;
114 int exists_task = ompt_get_task_info(level, &task_type, &task_data, &frame,
115 &task_parallel_data, &thread_num);
116 char buffer[2048];
117 format_task_type(task_type, buffer);
118 if (frame)
119 printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64
120 ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p, "
121 "task_type=%s=%d, thread_num=%d\n",
122 ompt_get_thread_data()->value, level,
123 exists_task ? task_parallel_data->value : 0,
124 exists_task ? task_data->value : 0, frame->exit_frame.ptr,
125 frame->enter_frame.ptr, buffer, task_type, thread_num);
128 #define get_frame_address(level) __builtin_frame_address(level)
130 #define print_frame(level) \
131 printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", \
132 ompt_get_thread_data()->value, level, get_frame_address(level))
134 // clang (version 5.0 and above) adds an intermediate function call with debug flag (-g)
135 #if defined(TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN)
136 #if defined(DEBUG) && defined(__clang__) && __clang_major__ >= 5
137 #define print_frame_from_outlined_fn(level) print_frame(level+1)
138 #else
139 #define print_frame_from_outlined_fn(level) print_frame(level)
140 #endif
142 #if defined(__clang__) && __clang_major__ >= 5
143 #warning "Clang 5.0 and later add an additional wrapper for outlined functions when compiling with debug information."
144 #warning "Please define -DDEBUG iff you manually pass in -g to make the tests succeed!"
145 #endif
146 #endif
148 // This macro helps to define a label at the current position that can be used
149 // to get the current address in the code.
151 // For print_current_address():
152 // To reliably determine the offset between the address of the label and the
153 // actual return address, we insert a NOP instruction as a jump target as the
154 // compiler would otherwise insert an instruction that we can't control. The
155 // instruction length is target dependent and is explained below.
157 // (The empty block between "#pragma omp ..." and the __asm__ statement is a
158 // workaround for a bug in the Intel Compiler.)
159 #define define_ompt_label(id) \
160 {} \
161 __asm__("nop"); \
162 ompt_label_##id:
164 // This macro helps to get the address of a label that is inserted by the above
165 // macro define_ompt_label(). The address is obtained with a GNU extension
166 // (&&label) that has been tested with gcc, clang and icc.
167 #define get_ompt_label_address(id) (&& ompt_label_##id)
169 // This macro prints the exact address that a previously called runtime function
170 // returns to.
171 #define print_current_address(id) \
172 define_ompt_label(id) \
173 print_possible_return_addresses(get_ompt_label_address(id))
175 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
176 // On X86 the NOP instruction is 1 byte long. In addition, the compiler inserts
177 // a MOV instruction for non-void runtime functions which is 3 bytes long.
178 #define print_possible_return_addresses(addr) \
179 printf("%" PRIu64 ": current_address=%p or %p for non-void functions\n", \
180 ompt_get_thread_data()->value, ((char *)addr) - 1, ((char *)addr) - 4)
181 #elif KMP_ARCH_PPC64
182 // On Power the NOP instruction is 4 bytes long. In addition, the compiler
183 // inserts a second NOP instruction (another 4 bytes). For non-void runtime
184 // functions Clang inserts a STW instruction (but only if compiling under
185 // -fno-PIC which will be the default with Clang 8.0, another 4 bytes).
186 #define print_possible_return_addresses(addr) \
187 printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \
188 ((char *)addr) - 8, ((char *)addr) - 12)
189 #elif KMP_ARCH_AARCH64
190 // On AArch64 the NOP instruction is 4 bytes long, can be followed by inserted
191 // store instruction (another 4 bytes long).
192 #if KMP_OS_DARWIN
193 #define print_possible_return_addresses(addr) \
194 printf("%" PRIu64 ": current_address=%p or %p or %p\n", \
195 ompt_get_thread_data()->value, ((char *)addr) - 4, \
196 ((char *)addr) - 8, ((char *)addr) - 12)
197 #else
198 #define print_possible_return_addresses(addr) \
199 printf("%" PRIu64 ": current_address=%p or %p\n", \
200 ompt_get_thread_data()->value, ((char *)addr) - 4, \
201 ((char *)addr) - 8)
202 #endif
203 #elif KMP_ARCH_RISCV64
204 #if __riscv_compressed
205 // On RV64GC the C.NOP instruction is 2 byte long. In addition, the compiler
206 // inserts a J instruction (targeting the successor basic block), which
207 // accounts for another 4 bytes. Finally, an additional J instruction may
208 // appear (adding 4 more bytes) when the C.NOP is referenced elsewhere (ie.
209 // another branch).
210 #define print_possible_return_addresses(addr) \
211 printf("%" PRIu64 ": current_address=%p or %p\n", \
212 ompt_get_thread_data()->value, ((char *)addr) - 6, ((char *)addr) - 10)
213 #else
214 // On RV64G the NOP instruction is 4 byte long. In addition, the compiler
215 // inserts a J instruction (targeting the successor basic block), which
216 // accounts for another 4 bytes. Finally, an additional J instruction may
217 // appear (adding 4 more bytes) when the NOP is referenced elsewhere (ie.
218 // another branch).
219 #define print_possible_return_addresses(addr) \
220 printf("%" PRIu64 ": current_address=%p or %p\n", \
221 ompt_get_thread_data()->value, ((char *)addr) - 8, ((char *)addr) - 12)
222 #endif
223 #elif KMP_ARCH_LOONGARCH64
224 // On LoongArch64 the NOP instruction is 4 bytes long, can be followed by
225 // inserted jump instruction (another 4 bytes long). And an additional jump
226 // instruction may appear (adding 4 more bytes) when the NOP is referenced
227 // elsewhere (ie. another branch).
228 #define print_possible_return_addresses(addr) \
229 printf("%" PRIu64 ": current_address=%p or %p or %p\n", \
230 ompt_get_thread_data()->value, ((char *)addr) - 4, \
231 ((char *)addr) - 8, ((char *)addr) - 12)
232 #elif KMP_ARCH_VE
233 // On VE the NOP instruction is 8 byte long. In addition, the compiler inserts
234 // a ??? instruction for non-void runtime functions which is ? bytes long.
235 #define print_possible_return_addresses(addr) \
236 printf("%" PRIu64 ": current_address=%p or %p\n", \
237 ompt_get_thread_data()->value, ((char *)addr) - 8, \
238 ((char *)addr) - 8)
239 #elif KMP_ARCH_S390X
240 // On s390x the NOP instruction is 2 bytes long. For non-void runtime
241 // functions Clang inserts a STY instruction (but only if compiling under
242 // -fno-PIC which will be the default with Clang 8.0, another 6 bytes).
244 // Another possibility is:
246 // brasl %r14,__kmpc_end_master@plt
247 // a7 f4 00 02 j 0f
248 // 47 00 00 00 0: nop
249 // a7 f4 00 02 j addr
250 // addr:
251 #define print_possible_return_addresses(addr) \
252 printf("%" PRIu64 ": current_address=%p or %p or %p\n", \
253 ompt_get_thread_data()->value, ((char *)addr) - 2, \
254 ((char *)addr) - 8, ((char *)addr) - 12)
255 #else
256 #error Unsupported target architecture, cannot determine address offset!
257 #endif
260 // This macro performs a somewhat similar job to print_current_address(), except
261 // that it discards a certain number of nibbles from the address and only prints
262 // the most significant bits / nibbles. This can be used for cases where the
263 // return address can only be approximated.
265 // To account for overflows (ie the most significant bits / nibbles have just
266 // changed as we are a few bytes above the relevant power of two) the addresses
267 // of the "current" and of the "previous block" are printed.
268 #define print_fuzzy_address(id) \
269 define_ompt_label(id) \
270 print_fuzzy_address_blocks(get_ompt_label_address(id))
272 // If you change this define you need to adapt all capture patterns in the tests
273 // to include or discard the new number of nibbles!
274 #define FUZZY_ADDRESS_DISCARD_NIBBLES 2
275 #define FUZZY_ADDRESS_DISCARD_BYTES (1 << ((FUZZY_ADDRESS_DISCARD_NIBBLES) * 4))
276 #define print_fuzzy_address_blocks(addr) \
277 printf("%" PRIu64 ": fuzzy_address=0x%" PRIx64 " or 0x%" PRIx64 \
278 " or 0x%" PRIx64 " or 0x%" PRIx64 " (%p)\n", \
279 ompt_get_thread_data()->value, \
280 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES - 1, \
281 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES, \
282 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 1, \
283 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 2, addr)
285 #define register_ompt_callback_t(name, type) \
286 do { \
287 type f_##name = &on_##name; \
288 if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \
289 printf("0: Could not register callback '" #name "'\n"); \
290 } while (0)
292 #define register_ompt_callback(name) register_ompt_callback_t(name, name##_t)
294 #ifndef USE_PRIVATE_TOOL
295 static void
296 on_ompt_callback_mutex_acquire(
297 ompt_mutex_t kind,
298 unsigned int hint,
299 unsigned int impl,
300 ompt_wait_id_t wait_id,
301 const void *codeptr_ra)
303 switch(kind)
305 case ompt_mutex_lock:
306 printf("%" PRIu64 ":" _TOOL_PREFIX
307 " ompt_event_wait_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
308 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
309 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
310 break;
311 case ompt_mutex_test_lock:
312 printf("%" PRIu64 ":" _TOOL_PREFIX
313 " ompt_event_wait_test_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
314 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
315 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
316 break;
317 case ompt_mutex_nest_lock:
318 printf("%" PRIu64 ":" _TOOL_PREFIX
319 " ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
320 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
321 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
322 break;
323 case ompt_mutex_test_nest_lock:
324 printf("%" PRIu64 ":" _TOOL_PREFIX
325 " ompt_event_wait_test_nest_lock: wait_id=%" PRIu64
326 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
327 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
328 break;
329 case ompt_mutex_critical:
330 printf("%" PRIu64 ":" _TOOL_PREFIX
331 " ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32
332 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
333 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
334 break;
335 case ompt_mutex_atomic:
336 printf("%" PRIu64 ":" _TOOL_PREFIX
337 " ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32
338 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
339 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
340 break;
341 case ompt_mutex_ordered:
342 printf("%" PRIu64 ":" _TOOL_PREFIX
343 " ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32
344 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
345 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
346 break;
347 default:
348 break;
352 static void
353 on_ompt_callback_mutex_acquired(
354 ompt_mutex_t kind,
355 ompt_wait_id_t wait_id,
356 const void *codeptr_ra)
358 switch(kind)
360 case ompt_mutex_lock:
361 printf("%" PRIu64 ":" _TOOL_PREFIX
362 " ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
363 ompt_get_thread_data()->value, wait_id, codeptr_ra);
364 break;
365 case ompt_mutex_test_lock:
366 printf("%" PRIu64 ":" _TOOL_PREFIX
367 " ompt_event_acquired_test_lock: wait_id=%" PRIu64
368 ", codeptr_ra=%p \n",
369 ompt_get_thread_data()->value, wait_id, codeptr_ra);
370 break;
371 case ompt_mutex_nest_lock:
372 printf("%" PRIu64 ":" _TOOL_PREFIX
373 " ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64
374 ", codeptr_ra=%p \n",
375 ompt_get_thread_data()->value, wait_id, codeptr_ra);
376 break;
377 case ompt_mutex_test_nest_lock:
378 printf("%" PRIu64 ":" _TOOL_PREFIX
379 " ompt_event_acquired_test_nest_lock_first: wait_id=%" PRIu64
380 ", codeptr_ra=%p \n",
381 ompt_get_thread_data()->value, wait_id, codeptr_ra);
382 break;
383 case ompt_mutex_critical:
384 printf("%" PRIu64 ":" _TOOL_PREFIX
385 " ompt_event_acquired_critical: wait_id=%" PRIu64
386 ", codeptr_ra=%p \n",
387 ompt_get_thread_data()->value, wait_id, codeptr_ra);
388 break;
389 case ompt_mutex_atomic:
390 printf("%" PRIu64 ":" _TOOL_PREFIX
391 " ompt_event_acquired_atomic: wait_id=%" PRIu64
392 ", codeptr_ra=%p \n",
393 ompt_get_thread_data()->value, wait_id, codeptr_ra);
394 break;
395 case ompt_mutex_ordered:
396 printf("%" PRIu64 ":" _TOOL_PREFIX
397 " ompt_event_acquired_ordered: wait_id=%" PRIu64
398 ", codeptr_ra=%p \n",
399 ompt_get_thread_data()->value, wait_id, codeptr_ra);
400 break;
401 default:
402 break;
406 static void
407 on_ompt_callback_mutex_released(
408 ompt_mutex_t kind,
409 ompt_wait_id_t wait_id,
410 const void *codeptr_ra)
412 switch(kind)
414 case ompt_mutex_lock:
415 printf("%" PRIu64 ":" _TOOL_PREFIX
416 " ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
417 ompt_get_thread_data()->value, wait_id, codeptr_ra);
418 break;
419 case ompt_mutex_nest_lock:
420 printf("%" PRIu64 ":" _TOOL_PREFIX
421 " ompt_event_release_nest_lock_last: wait_id=%" PRIu64
422 ", codeptr_ra=%p \n",
423 ompt_get_thread_data()->value, wait_id, codeptr_ra);
424 break;
425 case ompt_mutex_critical:
426 printf("%" PRIu64 ":" _TOOL_PREFIX
427 " ompt_event_release_critical: wait_id=%" PRIu64
428 ", codeptr_ra=%p \n",
429 ompt_get_thread_data()->value, wait_id, codeptr_ra);
430 break;
431 case ompt_mutex_atomic:
432 printf("%" PRIu64 ":" _TOOL_PREFIX
433 " ompt_event_release_atomic: wait_id=%" PRIu64
434 ", codeptr_ra=%p \n",
435 ompt_get_thread_data()->value, wait_id, codeptr_ra);
436 break;
437 case ompt_mutex_ordered:
438 printf("%" PRIu64 ":" _TOOL_PREFIX
439 " ompt_event_release_ordered: wait_id=%" PRIu64
440 ", codeptr_ra=%p \n",
441 ompt_get_thread_data()->value, wait_id, codeptr_ra);
442 break;
443 default:
444 break;
448 static void
449 on_ompt_callback_nest_lock(
450 ompt_scope_endpoint_t endpoint,
451 ompt_wait_id_t wait_id,
452 const void *codeptr_ra)
454 switch(endpoint)
456 case ompt_scope_begin:
457 printf("%" PRIu64 ":" _TOOL_PREFIX
458 " ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64
459 ", codeptr_ra=%p \n",
460 ompt_get_thread_data()->value, wait_id, codeptr_ra);
461 break;
462 case ompt_scope_end:
463 printf("%" PRIu64 ":" _TOOL_PREFIX
464 " ompt_event_release_nest_lock_prev: wait_id=%" PRIu64
465 ", codeptr_ra=%p \n",
466 ompt_get_thread_data()->value, wait_id, codeptr_ra);
467 break;
468 case ompt_scope_beginend:
469 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
470 exit(-1);
474 static void
475 on_ompt_callback_sync_region(
476 ompt_sync_region_t kind,
477 ompt_scope_endpoint_t endpoint,
478 ompt_data_t *parallel_data,
479 ompt_data_t *task_data,
480 const void *codeptr_ra)
482 switch(endpoint)
484 case ompt_scope_begin:
485 switch(kind)
487 case ompt_sync_region_barrier:
488 case ompt_sync_region_barrier_implicit:
489 case ompt_sync_region_barrier_implicit_workshare:
490 case ompt_sync_region_barrier_implicit_parallel:
491 case ompt_sync_region_barrier_teams:
492 case ompt_sync_region_barrier_explicit:
493 case ompt_sync_region_barrier_implementation:
494 printf("%" PRIu64 ":" _TOOL_PREFIX
495 " ompt_event_barrier_begin: parallel_id=%" PRIu64
496 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
497 ompt_get_thread_data()->value, parallel_data->value,
498 task_data->value, codeptr_ra);
499 print_ids(0);
500 break;
501 case ompt_sync_region_taskwait:
502 printf("%" PRIu64 ":" _TOOL_PREFIX
503 " ompt_event_taskwait_begin: parallel_id=%" PRIu64
504 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
505 ompt_get_thread_data()->value, parallel_data->value,
506 task_data->value, codeptr_ra);
507 break;
508 case ompt_sync_region_taskgroup:
509 printf("%" PRIu64 ":" _TOOL_PREFIX
510 " ompt_event_taskgroup_begin: parallel_id=%" PRIu64
511 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
512 ompt_get_thread_data()->value, parallel_data->value,
513 task_data->value, codeptr_ra);
514 break;
515 case ompt_sync_region_reduction:
516 printf("ompt_sync_region_reduction should never be passed to "
517 "on_ompt_callback_sync_region\n");
518 exit(-1);
519 break;
521 break;
522 case ompt_scope_end:
523 switch(kind)
525 case ompt_sync_region_barrier:
526 case ompt_sync_region_barrier_implicit:
527 case ompt_sync_region_barrier_explicit:
528 case ompt_sync_region_barrier_implicit_workshare:
529 case ompt_sync_region_barrier_implicit_parallel:
530 case ompt_sync_region_barrier_teams:
531 case ompt_sync_region_barrier_implementation:
532 printf("%" PRIu64 ":" _TOOL_PREFIX
533 " ompt_event_barrier_end: parallel_id=%" PRIu64
534 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
535 ompt_get_thread_data()->value,
536 (parallel_data) ? parallel_data->value : 0, task_data->value,
537 codeptr_ra);
538 break;
539 case ompt_sync_region_taskwait:
540 printf("%" PRIu64 ":" _TOOL_PREFIX
541 " ompt_event_taskwait_end: parallel_id=%" PRIu64
542 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
543 ompt_get_thread_data()->value,
544 (parallel_data) ? parallel_data->value : 0, task_data->value,
545 codeptr_ra);
546 break;
547 case ompt_sync_region_taskgroup:
548 printf("%" PRIu64 ":" _TOOL_PREFIX
549 " ompt_event_taskgroup_end: parallel_id=%" PRIu64
550 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
551 ompt_get_thread_data()->value,
552 (parallel_data) ? parallel_data->value : 0, task_data->value,
553 codeptr_ra);
554 break;
555 case ompt_sync_region_reduction:
556 printf("ompt_sync_region_reduction should never be passed to "
557 "on_ompt_callback_sync_region\n");
558 exit(-1);
559 break;
561 break;
562 case ompt_scope_beginend:
563 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
564 exit(-1);
568 static void
569 on_ompt_callback_sync_region_wait(
570 ompt_sync_region_t kind,
571 ompt_scope_endpoint_t endpoint,
572 ompt_data_t *parallel_data,
573 ompt_data_t *task_data,
574 const void *codeptr_ra)
576 switch(endpoint)
578 case ompt_scope_begin:
579 switch(kind)
581 case ompt_sync_region_barrier:
582 case ompt_sync_region_barrier_implicit:
583 case ompt_sync_region_barrier_implicit_workshare:
584 case ompt_sync_region_barrier_implicit_parallel:
585 case ompt_sync_region_barrier_teams:
586 case ompt_sync_region_barrier_explicit:
587 case ompt_sync_region_barrier_implementation:
588 printf("%" PRIu64 ":" _TOOL_PREFIX
589 " ompt_event_wait_barrier_begin: parallel_id=%" PRIu64
590 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
591 ompt_get_thread_data()->value, parallel_data->value,
592 task_data->value, codeptr_ra);
593 break;
594 case ompt_sync_region_taskwait:
595 printf("%" PRIu64 ":" _TOOL_PREFIX
596 " ompt_event_wait_taskwait_begin: parallel_id=%" PRIu64
597 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
598 ompt_get_thread_data()->value, parallel_data->value,
599 task_data->value, codeptr_ra);
600 break;
601 case ompt_sync_region_taskgroup:
602 printf("%" PRIu64 ":" _TOOL_PREFIX
603 " ompt_event_wait_taskgroup_begin: parallel_id=%" PRIu64
604 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
605 ompt_get_thread_data()->value, parallel_data->value,
606 task_data->value, codeptr_ra);
607 break;
608 case ompt_sync_region_reduction:
609 printf("ompt_sync_region_reduction should never be passed to "
610 "on_ompt_callback_sync_region_wait\n");
611 exit(-1);
612 break;
614 break;
615 case ompt_scope_end:
616 switch(kind)
618 case ompt_sync_region_barrier:
619 case ompt_sync_region_barrier_implicit:
620 case ompt_sync_region_barrier_implicit_workshare:
621 case ompt_sync_region_barrier_implicit_parallel:
622 case ompt_sync_region_barrier_teams:
623 case ompt_sync_region_barrier_explicit:
624 case ompt_sync_region_barrier_implementation:
625 printf("%" PRIu64 ":" _TOOL_PREFIX
626 " ompt_event_wait_barrier_end: parallel_id=%" PRIu64
627 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
628 ompt_get_thread_data()->value,
629 (parallel_data) ? parallel_data->value : 0, task_data->value,
630 codeptr_ra);
631 break;
632 case ompt_sync_region_taskwait:
633 printf("%" PRIu64 ":" _TOOL_PREFIX
634 " ompt_event_wait_taskwait_end: parallel_id=%" PRIu64
635 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
636 ompt_get_thread_data()->value,
637 (parallel_data) ? parallel_data->value : 0, task_data->value,
638 codeptr_ra);
639 break;
640 case ompt_sync_region_taskgroup:
641 printf("%" PRIu64 ":" _TOOL_PREFIX
642 " ompt_event_wait_taskgroup_end: parallel_id=%" PRIu64
643 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
644 ompt_get_thread_data()->value,
645 (parallel_data) ? parallel_data->value : 0, task_data->value,
646 codeptr_ra);
647 break;
648 case ompt_sync_region_reduction:
649 printf("ompt_sync_region_reduction should never be passed to "
650 "on_ompt_callback_sync_region_wait\n");
651 exit(-1);
652 break;
654 break;
655 case ompt_scope_beginend:
656 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
657 exit(-1);
661 static void on_ompt_callback_reduction(ompt_sync_region_t kind,
662 ompt_scope_endpoint_t endpoint,
663 ompt_data_t *parallel_data,
664 ompt_data_t *task_data,
665 const void *codeptr_ra) {
666 switch (endpoint) {
667 case ompt_scope_begin:
668 printf("%" PRIu64 ":" _TOOL_PREFIX
669 " ompt_event_reduction_begin: parallel_id=%" PRIu64
670 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
671 ompt_get_thread_data()->value,
672 (parallel_data) ? parallel_data->value : 0, task_data->value,
673 codeptr_ra);
674 break;
675 case ompt_scope_end:
676 printf("%" PRIu64 ":" _TOOL_PREFIX
677 " ompt_event_reduction_end: parallel_id=%" PRIu64
678 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
679 ompt_get_thread_data()->value,
680 (parallel_data) ? parallel_data->value : 0, task_data->value,
681 codeptr_ra);
682 break;
683 case ompt_scope_beginend:
684 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
685 exit(-1);
689 static void
690 on_ompt_callback_flush(
691 ompt_data_t *thread_data,
692 const void *codeptr_ra)
694 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_flush: codeptr_ra=%p\n",
695 thread_data->value, codeptr_ra);
698 static void
699 on_ompt_callback_cancel(
700 ompt_data_t *task_data,
701 int flags,
702 const void *codeptr_ra)
704 const char* first_flag_value;
705 const char* second_flag_value;
706 if(flags & ompt_cancel_parallel)
707 first_flag_value = ompt_cancel_flag_t_values[0];
708 else if(flags & ompt_cancel_sections)
709 first_flag_value = ompt_cancel_flag_t_values[1];
710 else if(flags & ompt_cancel_loop)
711 first_flag_value = ompt_cancel_flag_t_values[2];
712 else if(flags & ompt_cancel_taskgroup)
713 first_flag_value = ompt_cancel_flag_t_values[3];
715 if(flags & ompt_cancel_activated)
716 second_flag_value = ompt_cancel_flag_t_values[4];
717 else if(flags & ompt_cancel_detected)
718 second_flag_value = ompt_cancel_flag_t_values[5];
719 else if(flags & ompt_cancel_discarded_task)
720 second_flag_value = ompt_cancel_flag_t_values[6];
722 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_cancel: task_data=%" PRIu64
723 ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n",
724 ompt_get_thread_data()->value, task_data->value, first_flag_value,
725 second_flag_value, flags, codeptr_ra);
728 static void
729 on_ompt_callback_implicit_task(
730 ompt_scope_endpoint_t endpoint,
731 ompt_data_t *parallel_data,
732 ompt_data_t *task_data,
733 unsigned int team_size,
734 unsigned int thread_num,
735 int flags)
737 switch(endpoint)
739 case ompt_scope_begin:
740 if(task_data->ptr)
741 printf("%s\n", "0: task_data initially not null");
742 task_data->value = ompt_get_unique_id();
744 //there is no parallel_begin callback for implicit parallel region
745 //thus it is initialized in initial task
746 if(flags & ompt_task_initial)
748 char buffer[2048];
750 format_task_type(flags, buffer);
751 // Only check initial task not created by teams construct
752 if (team_size == 1 && thread_num == 1 && parallel_data->ptr)
753 printf("%s\n", "0: parallel_data initially not null");
754 parallel_data->value = ompt_get_unique_id();
755 printf("%" PRIu64 ":" _TOOL_PREFIX
756 " ompt_event_initial_task_begin: parallel_id=%" PRIu64
757 ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32
758 ", index=%" PRIu32 ", flags=%" PRIu32 "\n",
759 ompt_get_thread_data()->value, parallel_data->value,
760 task_data->value, team_size, thread_num, flags);
761 } else {
762 printf("%" PRIu64 ":" _TOOL_PREFIX
763 " ompt_event_implicit_task_begin: parallel_id=%" PRIu64
764 ", task_id=%" PRIu64 ", team_size=%" PRIu32
765 ", thread_num=%" PRIu32 "\n",
766 ompt_get_thread_data()->value, parallel_data->value,
767 task_data->value, team_size, thread_num);
770 break;
771 case ompt_scope_end:
772 if(flags & ompt_task_initial){
773 printf("%" PRIu64 ":" _TOOL_PREFIX
774 " ompt_event_initial_task_end: parallel_id=%" PRIu64
775 ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32
776 ", index=%" PRIu32 "\n",
777 ompt_get_thread_data()->value,
778 (parallel_data) ? parallel_data->value : 0, task_data->value,
779 team_size, thread_num);
780 } else {
781 printf("%" PRIu64 ":" _TOOL_PREFIX
782 " ompt_event_implicit_task_end: parallel_id=%" PRIu64
783 ", task_id=%" PRIu64 ", team_size=%" PRIu32
784 ", thread_num=%" PRIu32 "\n",
785 ompt_get_thread_data()->value,
786 (parallel_data) ? parallel_data->value : 0, task_data->value,
787 team_size, thread_num);
789 break;
790 case ompt_scope_beginend:
791 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
792 exit(-1);
796 static void
797 on_ompt_callback_lock_init(
798 ompt_mutex_t kind,
799 unsigned int hint,
800 unsigned int impl,
801 ompt_wait_id_t wait_id,
802 const void *codeptr_ra)
804 switch(kind)
806 case ompt_mutex_lock:
807 printf("%" PRIu64 ":" _TOOL_PREFIX
808 " ompt_event_init_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
809 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
810 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
811 break;
812 case ompt_mutex_nest_lock:
813 printf("%" PRIu64 ":" _TOOL_PREFIX
814 " ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
815 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
816 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
817 break;
818 default:
819 break;
823 static void
824 on_ompt_callback_lock_destroy(
825 ompt_mutex_t kind,
826 ompt_wait_id_t wait_id,
827 const void *codeptr_ra)
829 switch(kind)
831 case ompt_mutex_lock:
832 printf("%" PRIu64 ":" _TOOL_PREFIX
833 " ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
834 ompt_get_thread_data()->value, wait_id, codeptr_ra);
835 break;
836 case ompt_mutex_nest_lock:
837 printf("%" PRIu64 ":" _TOOL_PREFIX
838 " ompt_event_destroy_nest_lock: wait_id=%" PRIu64
839 ", codeptr_ra=%p \n",
840 ompt_get_thread_data()->value, wait_id, codeptr_ra);
841 break;
842 default:
843 break;
847 static void
848 on_ompt_callback_work(
849 ompt_work_t wstype,
850 ompt_scope_endpoint_t endpoint,
851 ompt_data_t *parallel_data,
852 ompt_data_t *task_data,
853 uint64_t count,
854 const void *codeptr_ra)
856 switch(endpoint)
858 case ompt_scope_begin:
859 switch(wstype)
861 case ompt_work_loop:
862 case ompt_work_loop_static:
863 case ompt_work_loop_dynamic:
864 case ompt_work_loop_guided:
865 case ompt_work_loop_other:
866 // TODO: add schedule attribute for the different work_loop types.
867 // e.g., ", schedule=%s", ..., ompt_schedule_values[wstype]
868 printf("%" PRIu64 ":" _TOOL_PREFIX
869 " ompt_event_loop_begin: parallel_id=%" PRIu64
870 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
871 "\n",
872 ompt_get_thread_data()->value, parallel_data->value,
873 task_data->value, codeptr_ra, count);
874 break;
875 case ompt_work_sections:
876 printf("%" PRIu64 ":" _TOOL_PREFIX
877 " ompt_event_sections_begin: parallel_id=%" PRIu64
878 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
879 "\n",
880 ompt_get_thread_data()->value, parallel_data->value,
881 task_data->value, codeptr_ra, count);
882 break;
883 case ompt_work_single_executor:
884 printf("%" PRIu64 ":" _TOOL_PREFIX
885 " ompt_event_single_in_block_begin: parallel_id=%" PRIu64
886 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
887 "\n",
888 ompt_get_thread_data()->value, parallel_data->value,
889 task_data->value, codeptr_ra, count);
890 break;
891 case ompt_work_single_other:
892 printf("%" PRIu64 ":" _TOOL_PREFIX
893 " ompt_event_single_others_begin: parallel_id=%" PRIu64
894 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
895 ompt_get_thread_data()->value, parallel_data->value,
896 task_data->value, codeptr_ra, count);
897 break;
898 case ompt_work_workshare:
899 //impl
900 break;
901 case ompt_work_distribute:
902 printf("%" PRIu64 ":" _TOOL_PREFIX
903 " ompt_event_distribute_begin: parallel_id=%" PRIu64
904 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
905 "\n",
906 ompt_get_thread_data()->value, parallel_data->value,
907 task_data->value, codeptr_ra, count);
908 break;
909 case ompt_work_taskloop:
910 //impl
911 printf("%" PRIu64 ":" _TOOL_PREFIX
912 " ompt_event_taskloop_begin: parallel_id=%" PRIu64
913 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
914 "\n",
915 ompt_get_thread_data()->value, parallel_data->value,
916 task_data->value, codeptr_ra, count);
917 break;
918 case ompt_work_scope:
919 printf("%" PRIu64 ":" _TOOL_PREFIX
920 " ompt_event_scope_begin: parallel_id=%" PRIu64
921 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
922 "\n",
923 ompt_get_thread_data()->value, parallel_data->value,
924 task_data->value, codeptr_ra, count);
925 break;
927 break;
928 case ompt_scope_end:
929 switch(wstype)
931 case ompt_work_loop:
932 case ompt_work_loop_static:
933 case ompt_work_loop_dynamic:
934 case ompt_work_loop_guided:
935 case ompt_work_loop_other:
936 printf("%" PRIu64 ":" _TOOL_PREFIX
937 " ompt_event_loop_end: parallel_id=%" PRIu64
938 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
939 ompt_get_thread_data()->value, parallel_data->value,
940 task_data->value, codeptr_ra, count);
941 break;
942 case ompt_work_sections:
943 printf("%" PRIu64 ":" _TOOL_PREFIX
944 " ompt_event_sections_end: parallel_id=%" PRIu64
945 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
946 ompt_get_thread_data()->value, parallel_data->value,
947 task_data->value, codeptr_ra, count);
948 break;
949 case ompt_work_single_executor:
950 printf("%" PRIu64 ":" _TOOL_PREFIX
951 " ompt_event_single_in_block_end: parallel_id=%" PRIu64
952 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
953 ompt_get_thread_data()->value, parallel_data->value,
954 task_data->value, codeptr_ra, count);
955 break;
956 case ompt_work_single_other:
957 printf("%" PRIu64 ":" _TOOL_PREFIX
958 " ompt_event_single_others_end: parallel_id=%" PRIu64
959 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
960 ompt_get_thread_data()->value, parallel_data->value,
961 task_data->value, codeptr_ra, count);
962 break;
963 case ompt_work_workshare:
964 //impl
965 break;
966 case ompt_work_distribute:
967 printf("%" PRIu64 ":" _TOOL_PREFIX
968 " ompt_event_distribute_end: parallel_id=%" PRIu64
969 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
970 "\n",
971 ompt_get_thread_data()->value, parallel_data->value,
972 task_data->value, codeptr_ra, count);
973 break;
974 case ompt_work_taskloop:
975 //impl
976 printf("%" PRIu64 ":" _TOOL_PREFIX
977 " ompt_event_taskloop_end: parallel_id=%" PRIu64
978 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
979 "\n",
980 ompt_get_thread_data()->value, parallel_data->value,
981 task_data->value, codeptr_ra, count);
982 break;
983 case ompt_work_scope:
984 printf("%" PRIu64 ":" _TOOL_PREFIX
985 " ompt_event_scope_end: parallel_id=%" PRIu64
986 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
987 "\n",
988 ompt_get_thread_data()->value, parallel_data->value,
989 task_data->value, codeptr_ra, count);
990 break;
992 break;
993 case ompt_scope_beginend:
994 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
995 exit(-1);
999 static void on_ompt_callback_dispatch(
1000 ompt_data_t *parallel_data,
1001 ompt_data_t *task_data,
1002 ompt_dispatch_t kind,
1003 ompt_data_t instance) {
1004 char *event_name = NULL;
1005 void *codeptr_ra = NULL;
1006 ompt_dispatch_chunk_t *dispatch_chunk = NULL;
1007 switch (kind) {
1008 case ompt_dispatch_section:
1009 event_name = "ompt_event_section_begin";
1010 codeptr_ra = instance.ptr;
1011 break;
1012 case ompt_dispatch_ws_loop_chunk:
1013 event_name = "ompt_event_ws_loop_chunk_begin";
1014 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
1015 break;
1016 case ompt_dispatch_taskloop_chunk:
1017 event_name = "ompt_event_taskloop_chunk_begin";
1018 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
1019 break;
1020 case ompt_dispatch_distribute_chunk:
1021 event_name = "ompt_event_distribute_chunk_begin";
1022 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
1023 break;
1024 default:
1025 event_name = "ompt_ws_loop_iteration_begin";
1027 printf("%" PRIu64 ":" _TOOL_PREFIX
1028 " %s: parallel_id=%" PRIu64 ", task_id=%" PRIu64
1029 ", codeptr_ra=%p, chunk_start=%" PRIu64 ", chunk_iterations=%" PRIu64
1030 "\n", ompt_get_thread_data()->value, event_name, parallel_data->value,
1031 task_data->value, codeptr_ra,
1032 dispatch_chunk ? dispatch_chunk->start : 0,
1033 dispatch_chunk ? dispatch_chunk->iterations : 0);
1036 static void on_ompt_callback_masked(ompt_scope_endpoint_t endpoint,
1037 ompt_data_t *parallel_data,
1038 ompt_data_t *task_data,
1039 const void *codeptr_ra) {
1040 switch(endpoint)
1042 case ompt_scope_begin:
1043 printf("%" PRIu64 ":" _TOOL_PREFIX
1044 " ompt_event_masked_begin: parallel_id=%" PRIu64
1045 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
1046 ompt_get_thread_data()->value, parallel_data->value,
1047 task_data->value, codeptr_ra);
1048 break;
1049 case ompt_scope_end:
1050 printf("%" PRIu64 ":" _TOOL_PREFIX
1051 " ompt_event_masked_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64
1052 ", codeptr_ra=%p\n",
1053 ompt_get_thread_data()->value, parallel_data->value,
1054 task_data->value, codeptr_ra);
1055 break;
1056 case ompt_scope_beginend:
1057 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
1058 exit(-1);
1062 static void on_ompt_callback_parallel_begin(
1063 ompt_data_t *encountering_task_data,
1064 const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
1065 uint32_t requested_team_size, int flag, const void *codeptr_ra) {
1066 if(parallel_data->ptr)
1067 printf("0: parallel_data initially not null\n");
1068 parallel_data->value = ompt_get_unique_id();
1069 int invoker = flag & 0xF;
1070 const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
1071 const char *size = (flag & ompt_parallel_team) ? "team_size" : "num_teams";
1072 printf("%" PRIu64 ":" _TOOL_PREFIX
1073 " ompt_event_%s_begin: parent_task_id=%" PRIu64
1074 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
1075 "parallel_id=%" PRIu64 ", requested_%s=%" PRIu32
1076 ", codeptr_ra=%p, invoker=%d\n",
1077 ompt_get_thread_data()->value, event, encountering_task_data->value,
1078 encountering_task_frame->exit_frame.ptr,
1079 encountering_task_frame->enter_frame.ptr, parallel_data->value, size,
1080 requested_team_size, codeptr_ra, invoker);
1083 static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data,
1084 ompt_data_t *encountering_task_data,
1085 int flag, const void *codeptr_ra) {
1086 int invoker = flag & 0xF;
1087 const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
1088 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_%s_end: parallel_id=%" PRIu64
1089 ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n",
1090 ompt_get_thread_data()->value, event, parallel_data->value,
1091 encountering_task_data->value, invoker, codeptr_ra);
1094 static void
1095 on_ompt_callback_task_create(
1096 ompt_data_t *encountering_task_data,
1097 const ompt_frame_t *encountering_task_frame,
1098 ompt_data_t* new_task_data,
1099 int type,
1100 int has_dependences,
1101 const void *codeptr_ra)
1103 if(new_task_data->ptr)
1104 printf("0: new_task_data initially not null\n");
1105 new_task_data->value = ompt_get_unique_id();
1106 char buffer[2048];
1108 format_task_type(type, buffer);
1110 printf(
1111 "%" PRIu64 ":" _TOOL_PREFIX
1112 " ompt_event_task_create: parent_task_id=%" PRIu64
1113 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
1114 "new_task_id=%" PRIu64
1115 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n",
1116 ompt_get_thread_data()->value,
1117 encountering_task_data ? encountering_task_data->value : 0,
1118 encountering_task_frame ? encountering_task_frame->exit_frame.ptr : NULL,
1119 encountering_task_frame ? encountering_task_frame->enter_frame.ptr : NULL,
1120 new_task_data->value, codeptr_ra, buffer, type,
1121 has_dependences ? "yes" : "no");
1124 static void
1125 on_ompt_callback_task_schedule(
1126 ompt_data_t *first_task_data,
1127 ompt_task_status_t prior_task_status,
1128 ompt_data_t *second_task_data)
1130 printf("%" PRIu64 ":" _TOOL_PREFIX
1131 " ompt_event_task_schedule: first_task_id=%" PRIu64
1132 ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n",
1133 ompt_get_thread_data()->value, first_task_data->value,
1134 (second_task_data ? second_task_data->value : -1),
1135 ompt_task_status_t_values[prior_task_status], prior_task_status);
1136 if (prior_task_status == ompt_task_complete ||
1137 prior_task_status == ompt_task_late_fulfill ||
1138 prior_task_status == ompt_taskwait_complete) {
1139 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_task_end: task_id=%" PRIu64
1140 "\n", ompt_get_thread_data()->value, first_task_data->value);
1144 static void
1145 on_ompt_callback_dependences(
1146 ompt_data_t *task_data,
1147 const ompt_dependence_t *deps,
1148 int ndeps)
1150 char buffer[2048];
1151 char *progress = buffer;
1152 int i;
1153 for (i = 0; i < ndeps && progress < buffer + 2000; i++) {
1154 if (deps[i].dependence_type == ompt_dependence_type_source ||
1155 deps[i].dependence_type == ompt_dependence_type_sink)
1156 progress +=
1157 sprintf(progress, "(%" PRIu64 ", %s), ", deps[i].variable.value,
1158 ompt_dependence_type_t_values[deps[i].dependence_type]);
1159 else
1160 progress +=
1161 sprintf(progress, "(%p, %s), ", deps[i].variable.ptr,
1162 ompt_dependence_type_t_values[deps[i].dependence_type]);
1164 if (ndeps > 0)
1165 progress[-2] = 0;
1166 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_dependences: task_id=%" PRIu64
1167 ", deps=[%s], ndeps=%d\n",
1168 ompt_get_thread_data()->value, task_data->value, buffer, ndeps);
1171 static void
1172 on_ompt_callback_task_dependence(
1173 ompt_data_t *first_task_data,
1174 ompt_data_t *second_task_data)
1176 printf("%" PRIu64 ":" _TOOL_PREFIX
1177 " ompt_event_task_dependence_pair: first_task_id=%" PRIu64
1178 ", second_task_id=%" PRIu64 "\n",
1179 ompt_get_thread_data()->value, first_task_data->value,
1180 second_task_data->value);
1183 static void
1184 on_ompt_callback_thread_begin(
1185 ompt_thread_t thread_type,
1186 ompt_data_t *thread_data)
1188 if(thread_data->ptr)
1189 printf("%s\n", "0: thread_data initially not null");
1190 thread_data->value = ompt_get_unique_id();
1191 printf("%" PRIu64 ":" _TOOL_PREFIX
1192 " ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n",
1193 ompt_get_thread_data()->value, ompt_thread_t_values[thread_type],
1194 thread_type, thread_data->value);
1197 static void
1198 on_ompt_callback_thread_end(
1199 ompt_data_t *thread_data)
1201 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_thread_end: thread_id=%" PRIu64
1202 "\n",
1203 ompt_get_thread_data()->value, thread_data->value);
1206 static int
1207 on_ompt_callback_control_tool(
1208 uint64_t command,
1209 uint64_t modifier,
1210 void *arg,
1211 const void *codeptr_ra)
1213 ompt_frame_t* omptTaskFrame;
1214 ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL);
1215 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_control_tool: command=%" PRIu64
1216 ", modifier=%" PRIu64
1217 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, "
1218 "current_task_frame.reenter=%p \n",
1219 ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra,
1220 omptTaskFrame->exit_frame.ptr, omptTaskFrame->enter_frame.ptr);
1222 // the following would interfere with expected output for OMPT tests, so skip
1223 #ifndef _OMPT_TESTS
1224 // print task data
1225 int task_level = 0;
1226 ompt_data_t *task_data;
1227 while (ompt_get_task_info(task_level, NULL, (ompt_data_t **)&task_data, NULL,
1228 NULL, NULL)) {
1229 printf("%" PRIu64 ":" _TOOL_PREFIX " task level %d: task_id=%" PRIu64 "\n",
1230 ompt_get_thread_data()->value, task_level, task_data->value);
1231 task_level++;
1234 // print parallel data
1235 int parallel_level = 0;
1236 ompt_data_t *parallel_data;
1237 while (ompt_get_parallel_info(parallel_level, (ompt_data_t **)&parallel_data,
1238 NULL)) {
1239 printf("%" PRIu64 ":" _TOOL_PREFIX " parallel level %d: parallel_id=%" PRIu64
1240 "\n",
1241 ompt_get_thread_data()->value, parallel_level, parallel_data->value);
1242 parallel_level++;
1244 #endif
1245 return 0; //success
1248 static void on_ompt_callback_error(ompt_severity_t severity,
1249 const char *message, size_t length,
1250 const void *codeptr_ra) {
1251 printf("%" PRIu64 ": ompt_event_runtime_error: severity=%" PRIu32
1252 ", message=%s, length=%" PRIu64 ", codeptr_ra=%p\n",
1253 ompt_get_thread_data()->value, severity, message, (uint64_t)length,
1254 codeptr_ra);
1257 int ompt_initialize(
1258 ompt_function_lookup_t lookup,
1259 int initial_device_num,
1260 ompt_data_t *tool_data)
1262 ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
1263 ompt_get_callback = (ompt_get_callback_t) lookup("ompt_get_callback");
1264 ompt_get_state = (ompt_get_state_t) lookup("ompt_get_state");
1265 ompt_get_task_info = (ompt_get_task_info_t) lookup("ompt_get_task_info");
1266 ompt_get_task_memory = (ompt_get_task_memory_t)lookup("ompt_get_task_memory");
1267 ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data");
1268 ompt_get_parallel_info = (ompt_get_parallel_info_t) lookup("ompt_get_parallel_info");
1269 ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id");
1270 ompt_finalize_tool = (ompt_finalize_tool_t)lookup("ompt_finalize_tool");
1272 ompt_get_unique_id();
1274 ompt_get_num_procs = (ompt_get_num_procs_t) lookup("ompt_get_num_procs");
1275 ompt_get_num_places = (ompt_get_num_places_t) lookup("ompt_get_num_places");
1276 ompt_get_place_proc_ids = (ompt_get_place_proc_ids_t) lookup("ompt_get_place_proc_ids");
1277 ompt_get_place_num = (ompt_get_place_num_t) lookup("ompt_get_place_num");
1278 ompt_get_partition_place_nums = (ompt_get_partition_place_nums_t) lookup("ompt_get_partition_place_nums");
1279 ompt_get_proc_id = (ompt_get_proc_id_t) lookup("ompt_get_proc_id");
1280 ompt_enumerate_states = (ompt_enumerate_states_t) lookup("ompt_enumerate_states");
1281 ompt_enumerate_mutex_impls = (ompt_enumerate_mutex_impls_t) lookup("ompt_enumerate_mutex_impls");
1283 register_ompt_callback(ompt_callback_mutex_acquire);
1284 register_ompt_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t);
1285 register_ompt_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t);
1286 register_ompt_callback(ompt_callback_nest_lock);
1287 register_ompt_callback(ompt_callback_sync_region);
1288 register_ompt_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t);
1289 register_ompt_callback_t(ompt_callback_reduction, ompt_callback_sync_region_t);
1290 register_ompt_callback(ompt_callback_control_tool);
1291 register_ompt_callback(ompt_callback_flush);
1292 register_ompt_callback(ompt_callback_cancel);
1293 register_ompt_callback(ompt_callback_implicit_task);
1294 register_ompt_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t);
1295 register_ompt_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t);
1296 register_ompt_callback(ompt_callback_work);
1297 register_ompt_callback(ompt_callback_dispatch);
1298 register_ompt_callback(ompt_callback_masked);
1299 register_ompt_callback(ompt_callback_parallel_begin);
1300 register_ompt_callback(ompt_callback_parallel_end);
1301 register_ompt_callback(ompt_callback_task_create);
1302 register_ompt_callback(ompt_callback_task_schedule);
1303 register_ompt_callback(ompt_callback_dependences);
1304 register_ompt_callback(ompt_callback_task_dependence);
1305 register_ompt_callback(ompt_callback_thread_begin);
1306 register_ompt_callback(ompt_callback_thread_end);
1307 register_ompt_callback(ompt_callback_error);
1308 printf("0: NULL_POINTER=%p\n", (void*)NULL);
1309 return 1; //success
1312 void ompt_finalize(ompt_data_t *tool_data)
1314 printf("0: ompt_event_runtime_shutdown\n");
1317 #ifdef __cplusplus
1318 extern "C" {
1319 #endif
1320 ompt_start_tool_result_t* ompt_start_tool(
1321 unsigned int omp_version,
1322 const char *runtime_version)
1324 static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0};
1325 return &ompt_start_tool_result;
1327 #ifdef __cplusplus
1329 #endif
1330 #endif // ifndef USE_PRIVATE_TOOL
1331 #ifdef _OMPT_TESTS
1332 #undef _OMPT_TESTS
1333 #endif