Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / openmp / runtime / test / ompt / callback.h
blobc5266e230c26f774611c0e9d565d291bd229fcfd
1 #ifndef _BSD_SOURCE
2 #define _BSD_SOURCE
3 #endif
4 #ifndef _DEFAULT_SOURCE
5 #define _DEFAULT_SOURCE
6 #endif
7 #include <stdio.h>
8 #ifndef __STDC_FORMAT_MACROS
9 #define __STDC_FORMAT_MACROS
10 #endif
11 #include <inttypes.h>
12 #include <omp.h>
13 #include <omp-tools.h>
14 #include "ompt-signal.h"
16 // Used to detect architecture
17 #include "../../src/kmp_platform.h"
19 #ifndef _TOOL_PREFIX
20 #define _TOOL_PREFIX ""
21 // If no _TOOL_PREFIX is set, we assume that we run as part of an OMPT test
22 #define _OMPT_TESTS
23 #endif
25 static const char *ompt_thread_t_values[] = {
26 "ompt_thread_UNDEFINED", "ompt_thread_initial", "ompt_thread_worker",
27 "ompt_thread_other"};
29 static const char *ompt_task_status_t_values[] = {
30 "ompt_task_UNDEFINED",
31 "ompt_task_complete", // 1
32 "ompt_task_yield", // 2
33 "ompt_task_cancel", // 3
34 "ompt_task_detach", // 4
35 "ompt_task_early_fulfill", // 5
36 "ompt_task_late_fulfill", // 6
37 "ompt_task_switch", // 7
38 "ompt_taskwait_complete" // 8
40 static const char* ompt_cancel_flag_t_values[] = {
41 "ompt_cancel_parallel",
42 "ompt_cancel_sections",
43 "ompt_cancel_loop",
44 "ompt_cancel_taskgroup",
45 "ompt_cancel_activated",
46 "ompt_cancel_detected",
47 "ompt_cancel_discarded_task"
50 static const char *ompt_dependence_type_t_values[36] = {
51 "ompt_dependence_type_UNDEFINED",
52 "ompt_dependence_type_in", // 1
53 "ompt_dependence_type_out", // 2
54 "ompt_dependence_type_inout", // 3
55 "ompt_dependence_type_mutexinoutset", // 4
56 "ompt_dependence_type_source", // 5
57 "ompt_dependence_type_sink", // 6
58 "ompt_dependence_type_inoutset", // 7
59 "", "", "", "", "", "", // 8-13
60 "", "", "", "", "", "", "", "", "", "", // 14-23
61 "", "", "", "", "", "", "", "", "", "", // 24-33
62 "ompt_dependence_type_out_all_memory", // 34
63 "ompt_dependence_type_inout_all_memory" // 35
66 static void format_task_type(int type, char *buffer) {
67 char *progress = buffer;
68 if (type & ompt_task_initial)
69 progress += sprintf(progress, "ompt_task_initial");
70 if (type & ompt_task_implicit)
71 progress += sprintf(progress, "ompt_task_implicit");
72 if (type & ompt_task_explicit)
73 progress += sprintf(progress, "ompt_task_explicit");
74 if (type & ompt_task_target)
75 progress += sprintf(progress, "ompt_task_target");
76 if (type & ompt_task_taskwait)
77 progress += sprintf(progress, "ompt_task_taskwait");
78 if (type & ompt_task_undeferred)
79 progress += sprintf(progress, "|ompt_task_undeferred");
80 if (type & ompt_task_untied)
81 progress += sprintf(progress, "|ompt_task_untied");
82 if (type & ompt_task_final)
83 progress += sprintf(progress, "|ompt_task_final");
84 if (type & ompt_task_mergeable)
85 progress += sprintf(progress, "|ompt_task_mergeable");
86 if (type & ompt_task_merged)
87 progress += sprintf(progress, "|ompt_task_merged");
90 static ompt_set_callback_t ompt_set_callback;
91 static ompt_get_callback_t ompt_get_callback;
92 static ompt_get_state_t ompt_get_state;
93 static ompt_get_task_info_t ompt_get_task_info;
94 static ompt_get_task_memory_t ompt_get_task_memory;
95 static ompt_get_thread_data_t ompt_get_thread_data;
96 static ompt_get_parallel_info_t ompt_get_parallel_info;
97 static ompt_get_unique_id_t ompt_get_unique_id;
98 static ompt_finalize_tool_t ompt_finalize_tool;
99 static ompt_get_num_procs_t ompt_get_num_procs;
100 static ompt_get_num_places_t ompt_get_num_places;
101 static ompt_get_place_proc_ids_t ompt_get_place_proc_ids;
102 static ompt_get_place_num_t ompt_get_place_num;
103 static ompt_get_partition_place_nums_t ompt_get_partition_place_nums;
104 static ompt_get_proc_id_t ompt_get_proc_id;
105 static ompt_enumerate_states_t ompt_enumerate_states;
106 static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls;
108 static void print_ids(int level)
110 int task_type, thread_num;
111 ompt_frame_t *frame;
112 ompt_data_t *task_parallel_data;
113 ompt_data_t *task_data;
114 int exists_task = ompt_get_task_info(level, &task_type, &task_data, &frame,
115 &task_parallel_data, &thread_num);
116 char buffer[2048];
117 format_task_type(task_type, buffer);
118 if (frame)
119 printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64
120 ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p, "
121 "task_type=%s=%d, thread_num=%d\n",
122 ompt_get_thread_data()->value, level,
123 exists_task ? task_parallel_data->value : 0,
124 exists_task ? task_data->value : 0, frame->exit_frame.ptr,
125 frame->enter_frame.ptr, buffer, task_type, thread_num);
128 #define get_frame_address(level) __builtin_frame_address(level)
130 #define print_frame(level) \
131 printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", \
132 ompt_get_thread_data()->value, level, get_frame_address(level))
134 // clang (version 5.0 and above) adds an intermediate function call with debug flag (-g)
135 #if defined(TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN)
136 #if defined(DEBUG) && defined(__clang__) && __clang_major__ >= 5
137 #define print_frame_from_outlined_fn(level) print_frame(level+1)
138 #else
139 #define print_frame_from_outlined_fn(level) print_frame(level)
140 #endif
142 #if defined(__clang__) && __clang_major__ >= 5
143 #warning "Clang 5.0 and later add an additional wrapper for outlined functions when compiling with debug information."
144 #warning "Please define -DDEBUG iff you manually pass in -g to make the tests succeed!"
145 #endif
146 #endif
148 // This macro helps to define a label at the current position that can be used
149 // to get the current address in the code.
151 // For print_current_address():
152 // To reliably determine the offset between the address of the label and the
153 // actual return address, we insert a NOP instruction as a jump target as the
154 // compiler would otherwise insert an instruction that we can't control. The
155 // instruction length is target dependent and is explained below.
157 // (The empty block between "#pragma omp ..." and the __asm__ statement is a
158 // workaround for a bug in the Intel Compiler.)
159 #define define_ompt_label(id) \
160 {} \
161 __asm__("nop"); \
162 ompt_label_##id:
164 // This macro helps to get the address of a label that is inserted by the above
165 // macro define_ompt_label(). The address is obtained with a GNU extension
166 // (&&label) that has been tested with gcc, clang and icc.
167 #define get_ompt_label_address(id) (&& ompt_label_##id)
169 // This macro prints the exact address that a previously called runtime function
170 // returns to.
171 #define print_current_address(id) \
172 define_ompt_label(id) \
173 print_possible_return_addresses(get_ompt_label_address(id))
175 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
176 // On X86 the NOP instruction is 1 byte long. In addition, the compiler inserts
177 // a MOV instruction for non-void runtime functions which is 3 bytes long.
178 #define print_possible_return_addresses(addr) \
179 printf("%" PRIu64 ": current_address=%p or %p for non-void functions\n", \
180 ompt_get_thread_data()->value, ((char *)addr) - 1, ((char *)addr) - 4)
181 #elif KMP_ARCH_PPC64
182 // On Power the NOP instruction is 4 bytes long. In addition, the compiler
183 // inserts a second NOP instruction (another 4 bytes). For non-void runtime
184 // functions Clang inserts a STW instruction (but only if compiling under
185 // -fno-PIC which will be the default with Clang 8.0, another 4 bytes).
186 #define print_possible_return_addresses(addr) \
187 printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \
188 ((char *)addr) - 8, ((char *)addr) - 12)
189 #elif KMP_ARCH_AARCH64
190 // On AArch64 the NOP instruction is 4 bytes long, can be followed by inserted
191 // store instruction (another 4 bytes long).
192 #define print_possible_return_addresses(addr) \
193 printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \
194 ((char *)addr) - 4, ((char *)addr) - 8)
195 #elif KMP_ARCH_RISCV64
196 #if __riscv_compressed
197 // On RV64GC the C.NOP instruction is 2 byte long. In addition, the compiler
198 // inserts a J instruction (targeting the successor basic block), which
199 // accounts for another 4 bytes. Finally, an additional J instruction may
200 // appear (adding 4 more bytes) when the C.NOP is referenced elsewhere (ie.
201 // another branch).
202 #define print_possible_return_addresses(addr) \
203 printf("%" PRIu64 ": current_address=%p or %p\n", \
204 ompt_get_thread_data()->value, ((char *)addr) - 6, ((char *)addr) - 10)
205 #else
206 // On RV64G the NOP instruction is 4 byte long. In addition, the compiler
207 // inserts a J instruction (targeting the successor basic block), which
208 // accounts for another 4 bytes. Finally, an additional J instruction may
209 // appear (adding 4 more bytes) when the NOP is referenced elsewhere (ie.
210 // another branch).
211 #define print_possible_return_addresses(addr) \
212 printf("%" PRIu64 ": current_address=%p or %p\n", \
213 ompt_get_thread_data()->value, ((char *)addr) - 8, ((char *)addr) - 12)
214 #endif
215 #elif KMP_ARCH_LOONGARCH64
216 // On LoongArch64 the NOP instruction is 4 bytes long, can be followed by
217 // inserted jump instruction (another 4 bytes long). And an additional jump
218 // instruction may appear (adding 4 more bytes) when the NOP is referenced
219 // elsewhere (ie. another branch).
220 #define print_possible_return_addresses(addr) \
221 printf("%" PRIu64 ": current_address=%p or %p or %p\n", \
222 ompt_get_thread_data()->value, ((char *)addr) - 4, \
223 ((char *)addr) - 8, ((char *)addr) - 12)
224 #elif KMP_ARCH_VE
225 // On VE the NOP instruction is 8 byte long. In addition, the compiler inserts
226 // a ??? instruction for non-void runtime functions which is ? bytes long.
227 #define print_possible_return_addresses(addr) \
228 printf("%" PRIu64 ": current_address=%p or %p\n", \
229 ompt_get_thread_data()->value, ((char *)addr) - 8, \
230 ((char *)addr) - 8)
231 #else
232 #error Unsupported target architecture, cannot determine address offset!
233 #endif
236 // This macro performs a somewhat similar job to print_current_address(), except
237 // that it discards a certain number of nibbles from the address and only prints
238 // the most significant bits / nibbles. This can be used for cases where the
239 // return address can only be approximated.
241 // To account for overflows (ie the most significant bits / nibbles have just
242 // changed as we are a few bytes above the relevant power of two) the addresses
243 // of the "current" and of the "previous block" are printed.
244 #define print_fuzzy_address(id) \
245 define_ompt_label(id) \
246 print_fuzzy_address_blocks(get_ompt_label_address(id))
248 // If you change this define you need to adapt all capture patterns in the tests
249 // to include or discard the new number of nibbles!
250 #define FUZZY_ADDRESS_DISCARD_NIBBLES 2
251 #define FUZZY_ADDRESS_DISCARD_BYTES (1 << ((FUZZY_ADDRESS_DISCARD_NIBBLES) * 4))
252 #define print_fuzzy_address_blocks(addr) \
253 printf("%" PRIu64 ": fuzzy_address=0x%" PRIx64 " or 0x%" PRIx64 \
254 " or 0x%" PRIx64 " or 0x%" PRIx64 " (%p)\n", \
255 ompt_get_thread_data()->value, \
256 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES - 1, \
257 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES, \
258 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 1, \
259 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 2, addr)
261 #define register_ompt_callback_t(name, type) \
262 do { \
263 type f_##name = &on_##name; \
264 if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \
265 printf("0: Could not register callback '" #name "'\n"); \
266 } while (0)
268 #define register_ompt_callback(name) register_ompt_callback_t(name, name##_t)
270 #ifndef USE_PRIVATE_TOOL
271 static void
272 on_ompt_callback_mutex_acquire(
273 ompt_mutex_t kind,
274 unsigned int hint,
275 unsigned int impl,
276 ompt_wait_id_t wait_id,
277 const void *codeptr_ra)
279 switch(kind)
281 case ompt_mutex_lock:
282 printf("%" PRIu64 ":" _TOOL_PREFIX
283 " ompt_event_wait_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
284 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
285 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
286 break;
287 case ompt_mutex_test_lock:
288 printf("%" PRIu64 ":" _TOOL_PREFIX
289 " ompt_event_wait_test_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
290 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
291 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
292 break;
293 case ompt_mutex_nest_lock:
294 printf("%" PRIu64 ":" _TOOL_PREFIX
295 " ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
296 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
297 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
298 break;
299 case ompt_mutex_test_nest_lock:
300 printf("%" PRIu64 ":" _TOOL_PREFIX
301 " ompt_event_wait_test_nest_lock: wait_id=%" PRIu64
302 ", hint=%" PRIu32 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
303 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
304 break;
305 case ompt_mutex_critical:
306 printf("%" PRIu64 ":" _TOOL_PREFIX
307 " ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32
308 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
309 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
310 break;
311 case ompt_mutex_atomic:
312 printf("%" PRIu64 ":" _TOOL_PREFIX
313 " ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32
314 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
315 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
316 break;
317 case ompt_mutex_ordered:
318 printf("%" PRIu64 ":" _TOOL_PREFIX
319 " ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32
320 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
321 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
322 break;
323 default:
324 break;
328 static void
329 on_ompt_callback_mutex_acquired(
330 ompt_mutex_t kind,
331 ompt_wait_id_t wait_id,
332 const void *codeptr_ra)
334 switch(kind)
336 case ompt_mutex_lock:
337 printf("%" PRIu64 ":" _TOOL_PREFIX
338 " ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
339 ompt_get_thread_data()->value, wait_id, codeptr_ra);
340 break;
341 case ompt_mutex_test_lock:
342 printf("%" PRIu64 ":" _TOOL_PREFIX
343 " ompt_event_acquired_test_lock: wait_id=%" PRIu64
344 ", codeptr_ra=%p \n",
345 ompt_get_thread_data()->value, wait_id, codeptr_ra);
346 break;
347 case ompt_mutex_nest_lock:
348 printf("%" PRIu64 ":" _TOOL_PREFIX
349 " ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64
350 ", codeptr_ra=%p \n",
351 ompt_get_thread_data()->value, wait_id, codeptr_ra);
352 break;
353 case ompt_mutex_test_nest_lock:
354 printf("%" PRIu64 ":" _TOOL_PREFIX
355 " ompt_event_acquired_test_nest_lock_first: wait_id=%" PRIu64
356 ", codeptr_ra=%p \n",
357 ompt_get_thread_data()->value, wait_id, codeptr_ra);
358 break;
359 case ompt_mutex_critical:
360 printf("%" PRIu64 ":" _TOOL_PREFIX
361 " ompt_event_acquired_critical: wait_id=%" PRIu64
362 ", codeptr_ra=%p \n",
363 ompt_get_thread_data()->value, wait_id, codeptr_ra);
364 break;
365 case ompt_mutex_atomic:
366 printf("%" PRIu64 ":" _TOOL_PREFIX
367 " ompt_event_acquired_atomic: wait_id=%" PRIu64
368 ", codeptr_ra=%p \n",
369 ompt_get_thread_data()->value, wait_id, codeptr_ra);
370 break;
371 case ompt_mutex_ordered:
372 printf("%" PRIu64 ":" _TOOL_PREFIX
373 " ompt_event_acquired_ordered: wait_id=%" PRIu64
374 ", codeptr_ra=%p \n",
375 ompt_get_thread_data()->value, wait_id, codeptr_ra);
376 break;
377 default:
378 break;
382 static void
383 on_ompt_callback_mutex_released(
384 ompt_mutex_t kind,
385 ompt_wait_id_t wait_id,
386 const void *codeptr_ra)
388 switch(kind)
390 case ompt_mutex_lock:
391 printf("%" PRIu64 ":" _TOOL_PREFIX
392 " ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
393 ompt_get_thread_data()->value, wait_id, codeptr_ra);
394 break;
395 case ompt_mutex_nest_lock:
396 printf("%" PRIu64 ":" _TOOL_PREFIX
397 " ompt_event_release_nest_lock_last: wait_id=%" PRIu64
398 ", codeptr_ra=%p \n",
399 ompt_get_thread_data()->value, wait_id, codeptr_ra);
400 break;
401 case ompt_mutex_critical:
402 printf("%" PRIu64 ":" _TOOL_PREFIX
403 " ompt_event_release_critical: wait_id=%" PRIu64
404 ", codeptr_ra=%p \n",
405 ompt_get_thread_data()->value, wait_id, codeptr_ra);
406 break;
407 case ompt_mutex_atomic:
408 printf("%" PRIu64 ":" _TOOL_PREFIX
409 " ompt_event_release_atomic: wait_id=%" PRIu64
410 ", codeptr_ra=%p \n",
411 ompt_get_thread_data()->value, wait_id, codeptr_ra);
412 break;
413 case ompt_mutex_ordered:
414 printf("%" PRIu64 ":" _TOOL_PREFIX
415 " ompt_event_release_ordered: wait_id=%" PRIu64
416 ", codeptr_ra=%p \n",
417 ompt_get_thread_data()->value, wait_id, codeptr_ra);
418 break;
419 default:
420 break;
424 static void
425 on_ompt_callback_nest_lock(
426 ompt_scope_endpoint_t endpoint,
427 ompt_wait_id_t wait_id,
428 const void *codeptr_ra)
430 switch(endpoint)
432 case ompt_scope_begin:
433 printf("%" PRIu64 ":" _TOOL_PREFIX
434 " ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64
435 ", codeptr_ra=%p \n",
436 ompt_get_thread_data()->value, wait_id, codeptr_ra);
437 break;
438 case ompt_scope_end:
439 printf("%" PRIu64 ":" _TOOL_PREFIX
440 " ompt_event_release_nest_lock_prev: wait_id=%" PRIu64
441 ", codeptr_ra=%p \n",
442 ompt_get_thread_data()->value, wait_id, codeptr_ra);
443 break;
444 case ompt_scope_beginend:
445 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
446 exit(-1);
450 static void
451 on_ompt_callback_sync_region(
452 ompt_sync_region_t kind,
453 ompt_scope_endpoint_t endpoint,
454 ompt_data_t *parallel_data,
455 ompt_data_t *task_data,
456 const void *codeptr_ra)
458 switch(endpoint)
460 case ompt_scope_begin:
461 switch(kind)
463 case ompt_sync_region_barrier:
464 case ompt_sync_region_barrier_implicit:
465 case ompt_sync_region_barrier_implicit_workshare:
466 case ompt_sync_region_barrier_implicit_parallel:
467 case ompt_sync_region_barrier_teams:
468 case ompt_sync_region_barrier_explicit:
469 case ompt_sync_region_barrier_implementation:
470 printf("%" PRIu64 ":" _TOOL_PREFIX
471 " ompt_event_barrier_begin: parallel_id=%" PRIu64
472 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
473 ompt_get_thread_data()->value, parallel_data->value,
474 task_data->value, codeptr_ra);
475 print_ids(0);
476 break;
477 case ompt_sync_region_taskwait:
478 printf("%" PRIu64 ":" _TOOL_PREFIX
479 " ompt_event_taskwait_begin: parallel_id=%" PRIu64
480 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
481 ompt_get_thread_data()->value, parallel_data->value,
482 task_data->value, codeptr_ra);
483 break;
484 case ompt_sync_region_taskgroup:
485 printf("%" PRIu64 ":" _TOOL_PREFIX
486 " ompt_event_taskgroup_begin: parallel_id=%" PRIu64
487 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
488 ompt_get_thread_data()->value, parallel_data->value,
489 task_data->value, codeptr_ra);
490 break;
491 case ompt_sync_region_reduction:
492 printf("ompt_sync_region_reduction should never be passed to "
493 "on_ompt_callback_sync_region\n");
494 exit(-1);
495 break;
497 break;
498 case ompt_scope_end:
499 switch(kind)
501 case ompt_sync_region_barrier:
502 case ompt_sync_region_barrier_implicit:
503 case ompt_sync_region_barrier_explicit:
504 case ompt_sync_region_barrier_implicit_workshare:
505 case ompt_sync_region_barrier_implicit_parallel:
506 case ompt_sync_region_barrier_teams:
507 case ompt_sync_region_barrier_implementation:
508 printf("%" PRIu64 ":" _TOOL_PREFIX
509 " ompt_event_barrier_end: parallel_id=%" PRIu64
510 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
511 ompt_get_thread_data()->value,
512 (parallel_data) ? parallel_data->value : 0, task_data->value,
513 codeptr_ra);
514 break;
515 case ompt_sync_region_taskwait:
516 printf("%" PRIu64 ":" _TOOL_PREFIX
517 " ompt_event_taskwait_end: parallel_id=%" PRIu64
518 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
519 ompt_get_thread_data()->value,
520 (parallel_data) ? parallel_data->value : 0, task_data->value,
521 codeptr_ra);
522 break;
523 case ompt_sync_region_taskgroup:
524 printf("%" PRIu64 ":" _TOOL_PREFIX
525 " ompt_event_taskgroup_end: parallel_id=%" PRIu64
526 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
527 ompt_get_thread_data()->value,
528 (parallel_data) ? parallel_data->value : 0, task_data->value,
529 codeptr_ra);
530 break;
531 case ompt_sync_region_reduction:
532 printf("ompt_sync_region_reduction should never be passed to "
533 "on_ompt_callback_sync_region\n");
534 exit(-1);
535 break;
537 break;
538 case ompt_scope_beginend:
539 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
540 exit(-1);
544 static void
545 on_ompt_callback_sync_region_wait(
546 ompt_sync_region_t kind,
547 ompt_scope_endpoint_t endpoint,
548 ompt_data_t *parallel_data,
549 ompt_data_t *task_data,
550 const void *codeptr_ra)
552 switch(endpoint)
554 case ompt_scope_begin:
555 switch(kind)
557 case ompt_sync_region_barrier:
558 case ompt_sync_region_barrier_implicit:
559 case ompt_sync_region_barrier_implicit_workshare:
560 case ompt_sync_region_barrier_implicit_parallel:
561 case ompt_sync_region_barrier_teams:
562 case ompt_sync_region_barrier_explicit:
563 case ompt_sync_region_barrier_implementation:
564 printf("%" PRIu64 ":" _TOOL_PREFIX
565 " ompt_event_wait_barrier_begin: parallel_id=%" PRIu64
566 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
567 ompt_get_thread_data()->value, parallel_data->value,
568 task_data->value, codeptr_ra);
569 break;
570 case ompt_sync_region_taskwait:
571 printf("%" PRIu64 ":" _TOOL_PREFIX
572 " ompt_event_wait_taskwait_begin: parallel_id=%" PRIu64
573 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
574 ompt_get_thread_data()->value, parallel_data->value,
575 task_data->value, codeptr_ra);
576 break;
577 case ompt_sync_region_taskgroup:
578 printf("%" PRIu64 ":" _TOOL_PREFIX
579 " ompt_event_wait_taskgroup_begin: parallel_id=%" PRIu64
580 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
581 ompt_get_thread_data()->value, parallel_data->value,
582 task_data->value, codeptr_ra);
583 break;
584 case ompt_sync_region_reduction:
585 printf("ompt_sync_region_reduction should never be passed to "
586 "on_ompt_callback_sync_region_wait\n");
587 exit(-1);
588 break;
590 break;
591 case ompt_scope_end:
592 switch(kind)
594 case ompt_sync_region_barrier:
595 case ompt_sync_region_barrier_implicit:
596 case ompt_sync_region_barrier_implicit_workshare:
597 case ompt_sync_region_barrier_implicit_parallel:
598 case ompt_sync_region_barrier_teams:
599 case ompt_sync_region_barrier_explicit:
600 case ompt_sync_region_barrier_implementation:
601 printf("%" PRIu64 ":" _TOOL_PREFIX
602 " ompt_event_wait_barrier_end: parallel_id=%" PRIu64
603 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
604 ompt_get_thread_data()->value,
605 (parallel_data) ? parallel_data->value : 0, task_data->value,
606 codeptr_ra);
607 break;
608 case ompt_sync_region_taskwait:
609 printf("%" PRIu64 ":" _TOOL_PREFIX
610 " ompt_event_wait_taskwait_end: parallel_id=%" PRIu64
611 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
612 ompt_get_thread_data()->value,
613 (parallel_data) ? parallel_data->value : 0, task_data->value,
614 codeptr_ra);
615 break;
616 case ompt_sync_region_taskgroup:
617 printf("%" PRIu64 ":" _TOOL_PREFIX
618 " ompt_event_wait_taskgroup_end: parallel_id=%" PRIu64
619 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
620 ompt_get_thread_data()->value,
621 (parallel_data) ? parallel_data->value : 0, task_data->value,
622 codeptr_ra);
623 break;
624 case ompt_sync_region_reduction:
625 printf("ompt_sync_region_reduction should never be passed to "
626 "on_ompt_callback_sync_region_wait\n");
627 exit(-1);
628 break;
630 break;
631 case ompt_scope_beginend:
632 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
633 exit(-1);
637 static void on_ompt_callback_reduction(ompt_sync_region_t kind,
638 ompt_scope_endpoint_t endpoint,
639 ompt_data_t *parallel_data,
640 ompt_data_t *task_data,
641 const void *codeptr_ra) {
642 switch (endpoint) {
643 case ompt_scope_begin:
644 printf("%" PRIu64 ":" _TOOL_PREFIX
645 " ompt_event_reduction_begin: parallel_id=%" PRIu64
646 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
647 ompt_get_thread_data()->value,
648 (parallel_data) ? parallel_data->value : 0, task_data->value,
649 codeptr_ra);
650 break;
651 case ompt_scope_end:
652 printf("%" PRIu64 ":" _TOOL_PREFIX
653 " ompt_event_reduction_end: parallel_id=%" PRIu64
654 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
655 ompt_get_thread_data()->value,
656 (parallel_data) ? parallel_data->value : 0, task_data->value,
657 codeptr_ra);
658 break;
659 case ompt_scope_beginend:
660 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
661 exit(-1);
665 static void
666 on_ompt_callback_flush(
667 ompt_data_t *thread_data,
668 const void *codeptr_ra)
670 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_flush: codeptr_ra=%p\n",
671 thread_data->value, codeptr_ra);
674 static void
675 on_ompt_callback_cancel(
676 ompt_data_t *task_data,
677 int flags,
678 const void *codeptr_ra)
680 const char* first_flag_value;
681 const char* second_flag_value;
682 if(flags & ompt_cancel_parallel)
683 first_flag_value = ompt_cancel_flag_t_values[0];
684 else if(flags & ompt_cancel_sections)
685 first_flag_value = ompt_cancel_flag_t_values[1];
686 else if(flags & ompt_cancel_loop)
687 first_flag_value = ompt_cancel_flag_t_values[2];
688 else if(flags & ompt_cancel_taskgroup)
689 first_flag_value = ompt_cancel_flag_t_values[3];
691 if(flags & ompt_cancel_activated)
692 second_flag_value = ompt_cancel_flag_t_values[4];
693 else if(flags & ompt_cancel_detected)
694 second_flag_value = ompt_cancel_flag_t_values[5];
695 else if(flags & ompt_cancel_discarded_task)
696 second_flag_value = ompt_cancel_flag_t_values[6];
698 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_cancel: task_data=%" PRIu64
699 ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n",
700 ompt_get_thread_data()->value, task_data->value, first_flag_value,
701 second_flag_value, flags, codeptr_ra);
704 static void
705 on_ompt_callback_implicit_task(
706 ompt_scope_endpoint_t endpoint,
707 ompt_data_t *parallel_data,
708 ompt_data_t *task_data,
709 unsigned int team_size,
710 unsigned int thread_num,
711 int flags)
713 switch(endpoint)
715 case ompt_scope_begin:
716 if(task_data->ptr)
717 printf("%s\n", "0: task_data initially not null");
718 task_data->value = ompt_get_unique_id();
720 //there is no parallel_begin callback for implicit parallel region
721 //thus it is initialized in initial task
722 if(flags & ompt_task_initial)
724 char buffer[2048];
726 format_task_type(flags, buffer);
727 // Only check initial task not created by teams construct
728 if (team_size == 1 && thread_num == 1 && parallel_data->ptr)
729 printf("%s\n", "0: parallel_data initially not null");
730 parallel_data->value = ompt_get_unique_id();
731 printf("%" PRIu64 ":" _TOOL_PREFIX
732 " ompt_event_initial_task_begin: parallel_id=%" PRIu64
733 ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32
734 ", index=%" PRIu32 ", flags=%" PRIu32 "\n",
735 ompt_get_thread_data()->value, parallel_data->value,
736 task_data->value, team_size, thread_num, flags);
737 } else {
738 printf("%" PRIu64 ":" _TOOL_PREFIX
739 " ompt_event_implicit_task_begin: parallel_id=%" PRIu64
740 ", task_id=%" PRIu64 ", team_size=%" PRIu32
741 ", thread_num=%" PRIu32 "\n",
742 ompt_get_thread_data()->value, parallel_data->value,
743 task_data->value, team_size, thread_num);
746 break;
747 case ompt_scope_end:
748 if(flags & ompt_task_initial){
749 printf("%" PRIu64 ":" _TOOL_PREFIX
750 " ompt_event_initial_task_end: parallel_id=%" PRIu64
751 ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32
752 ", index=%" PRIu32 "\n",
753 ompt_get_thread_data()->value,
754 (parallel_data) ? parallel_data->value : 0, task_data->value,
755 team_size, thread_num);
756 } else {
757 printf("%" PRIu64 ":" _TOOL_PREFIX
758 " ompt_event_implicit_task_end: parallel_id=%" PRIu64
759 ", task_id=%" PRIu64 ", team_size=%" PRIu32
760 ", thread_num=%" PRIu32 "\n",
761 ompt_get_thread_data()->value,
762 (parallel_data) ? parallel_data->value : 0, task_data->value,
763 team_size, thread_num);
765 break;
766 case ompt_scope_beginend:
767 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
768 exit(-1);
772 static void
773 on_ompt_callback_lock_init(
774 ompt_mutex_t kind,
775 unsigned int hint,
776 unsigned int impl,
777 ompt_wait_id_t wait_id,
778 const void *codeptr_ra)
780 switch(kind)
782 case ompt_mutex_lock:
783 printf("%" PRIu64 ":" _TOOL_PREFIX
784 " ompt_event_init_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
785 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
786 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
787 break;
788 case ompt_mutex_nest_lock:
789 printf("%" PRIu64 ":" _TOOL_PREFIX
790 " ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
791 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
792 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
793 break;
794 default:
795 break;
799 static void
800 on_ompt_callback_lock_destroy(
801 ompt_mutex_t kind,
802 ompt_wait_id_t wait_id,
803 const void *codeptr_ra)
805 switch(kind)
807 case ompt_mutex_lock:
808 printf("%" PRIu64 ":" _TOOL_PREFIX
809 " ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
810 ompt_get_thread_data()->value, wait_id, codeptr_ra);
811 break;
812 case ompt_mutex_nest_lock:
813 printf("%" PRIu64 ":" _TOOL_PREFIX
814 " ompt_event_destroy_nest_lock: wait_id=%" PRIu64
815 ", codeptr_ra=%p \n",
816 ompt_get_thread_data()->value, wait_id, codeptr_ra);
817 break;
818 default:
819 break;
823 static void
824 on_ompt_callback_work(
825 ompt_work_t wstype,
826 ompt_scope_endpoint_t endpoint,
827 ompt_data_t *parallel_data,
828 ompt_data_t *task_data,
829 uint64_t count,
830 const void *codeptr_ra)
832 switch(endpoint)
834 case ompt_scope_begin:
835 switch(wstype)
837 case ompt_work_loop:
838 case ompt_work_loop_static:
839 case ompt_work_loop_dynamic:
840 case ompt_work_loop_guided:
841 case ompt_work_loop_other:
842 // TODO: add schedule attribute for the different work_loop types.
843 // e.g., ", schedule=%s", ..., ompt_schedule_values[wstype]
844 printf("%" PRIu64 ":" _TOOL_PREFIX
845 " ompt_event_loop_begin: parallel_id=%" PRIu64
846 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
847 "\n",
848 ompt_get_thread_data()->value, parallel_data->value,
849 task_data->value, codeptr_ra, count);
850 break;
851 case ompt_work_sections:
852 printf("%" PRIu64 ":" _TOOL_PREFIX
853 " ompt_event_sections_begin: parallel_id=%" PRIu64
854 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
855 "\n",
856 ompt_get_thread_data()->value, parallel_data->value,
857 task_data->value, codeptr_ra, count);
858 break;
859 case ompt_work_single_executor:
860 printf("%" PRIu64 ":" _TOOL_PREFIX
861 " ompt_event_single_in_block_begin: parallel_id=%" PRIu64
862 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
863 "\n",
864 ompt_get_thread_data()->value, parallel_data->value,
865 task_data->value, codeptr_ra, count);
866 break;
867 case ompt_work_single_other:
868 printf("%" PRIu64 ":" _TOOL_PREFIX
869 " ompt_event_single_others_begin: parallel_id=%" PRIu64
870 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
871 ompt_get_thread_data()->value, parallel_data->value,
872 task_data->value, codeptr_ra, count);
873 break;
874 case ompt_work_workshare:
875 //impl
876 break;
877 case ompt_work_distribute:
878 printf("%" PRIu64 ":" _TOOL_PREFIX
879 " ompt_event_distribute_begin: parallel_id=%" PRIu64
880 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
881 "\n",
882 ompt_get_thread_data()->value, parallel_data->value,
883 task_data->value, codeptr_ra, count);
884 break;
885 case ompt_work_taskloop:
886 //impl
887 printf("%" PRIu64 ":" _TOOL_PREFIX
888 " ompt_event_taskloop_begin: parallel_id=%" PRIu64
889 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
890 "\n",
891 ompt_get_thread_data()->value, parallel_data->value,
892 task_data->value, codeptr_ra, count);
893 break;
894 case ompt_work_scope:
895 printf("%" PRIu64 ":" _TOOL_PREFIX
896 " ompt_event_scope_begin: parallel_id=%" PRIu64
897 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
898 "\n",
899 ompt_get_thread_data()->value, parallel_data->value,
900 task_data->value, codeptr_ra, count);
901 break;
903 break;
904 case ompt_scope_end:
905 switch(wstype)
907 case ompt_work_loop:
908 case ompt_work_loop_static:
909 case ompt_work_loop_dynamic:
910 case ompt_work_loop_guided:
911 case ompt_work_loop_other:
912 printf("%" PRIu64 ":" _TOOL_PREFIX
913 " ompt_event_loop_end: parallel_id=%" PRIu64
914 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
915 ompt_get_thread_data()->value, parallel_data->value,
916 task_data->value, codeptr_ra, count);
917 break;
918 case ompt_work_sections:
919 printf("%" PRIu64 ":" _TOOL_PREFIX
920 " ompt_event_sections_end: parallel_id=%" PRIu64
921 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
922 ompt_get_thread_data()->value, parallel_data->value,
923 task_data->value, codeptr_ra, count);
924 break;
925 case ompt_work_single_executor:
926 printf("%" PRIu64 ":" _TOOL_PREFIX
927 " ompt_event_single_in_block_end: parallel_id=%" PRIu64
928 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
929 ompt_get_thread_data()->value, parallel_data->value,
930 task_data->value, codeptr_ra, count);
931 break;
932 case ompt_work_single_other:
933 printf("%" PRIu64 ":" _TOOL_PREFIX
934 " ompt_event_single_others_end: parallel_id=%" PRIu64
935 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
936 ompt_get_thread_data()->value, parallel_data->value,
937 task_data->value, codeptr_ra, count);
938 break;
939 case ompt_work_workshare:
940 //impl
941 break;
942 case ompt_work_distribute:
943 printf("%" PRIu64 ":" _TOOL_PREFIX
944 " ompt_event_distribute_end: parallel_id=%" PRIu64
945 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
946 "\n",
947 ompt_get_thread_data()->value, parallel_data->value,
948 task_data->value, codeptr_ra, count);
949 break;
950 case ompt_work_taskloop:
951 //impl
952 printf("%" PRIu64 ":" _TOOL_PREFIX
953 " ompt_event_taskloop_end: parallel_id=%" PRIu64
954 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
955 "\n",
956 ompt_get_thread_data()->value, parallel_data->value,
957 task_data->value, codeptr_ra, count);
958 break;
959 case ompt_work_scope:
960 printf("%" PRIu64 ":" _TOOL_PREFIX
961 " ompt_event_scope_end: parallel_id=%" PRIu64
962 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
963 "\n",
964 ompt_get_thread_data()->value, parallel_data->value,
965 task_data->value, codeptr_ra, count);
966 break;
968 break;
969 case ompt_scope_beginend:
970 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
971 exit(-1);
975 static void on_ompt_callback_dispatch(
976 ompt_data_t *parallel_data,
977 ompt_data_t *task_data,
978 ompt_dispatch_t kind,
979 ompt_data_t instance) {
980 char *event_name = NULL;
981 void *codeptr_ra = NULL;
982 ompt_dispatch_chunk_t *dispatch_chunk = NULL;
983 switch (kind) {
984 case ompt_dispatch_section:
985 event_name = "ompt_event_section_begin";
986 codeptr_ra = instance.ptr;
987 break;
988 case ompt_dispatch_ws_loop_chunk:
989 event_name = "ompt_event_ws_loop_chunk_begin";
990 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
991 break;
992 case ompt_dispatch_taskloop_chunk:
993 event_name = "ompt_event_taskloop_chunk_begin";
994 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
995 break;
996 case ompt_dispatch_distribute_chunk:
997 event_name = "ompt_event_distribute_chunk_begin";
998 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
999 break;
1000 default:
1001 event_name = "ompt_ws_loop_iteration_begin";
1003 printf("%" PRIu64 ":" _TOOL_PREFIX
1004 " %s: parallel_id=%" PRIu64 ", task_id=%" PRIu64
1005 ", codeptr_ra=%p, chunk_start=%" PRIu64 ", chunk_iterations=%" PRIu64
1006 "\n", ompt_get_thread_data()->value, event_name, parallel_data->value,
1007 task_data->value, codeptr_ra,
1008 dispatch_chunk ? dispatch_chunk->start : 0,
1009 dispatch_chunk ? dispatch_chunk->iterations : 0);
1012 static void on_ompt_callback_masked(ompt_scope_endpoint_t endpoint,
1013 ompt_data_t *parallel_data,
1014 ompt_data_t *task_data,
1015 const void *codeptr_ra) {
1016 switch(endpoint)
1018 case ompt_scope_begin:
1019 printf("%" PRIu64 ":" _TOOL_PREFIX
1020 " ompt_event_masked_begin: parallel_id=%" PRIu64
1021 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
1022 ompt_get_thread_data()->value, parallel_data->value,
1023 task_data->value, codeptr_ra);
1024 break;
1025 case ompt_scope_end:
1026 printf("%" PRIu64 ":" _TOOL_PREFIX
1027 " ompt_event_masked_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64
1028 ", codeptr_ra=%p\n",
1029 ompt_get_thread_data()->value, parallel_data->value,
1030 task_data->value, codeptr_ra);
1031 break;
1032 case ompt_scope_beginend:
1033 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
1034 exit(-1);
1038 static void on_ompt_callback_parallel_begin(
1039 ompt_data_t *encountering_task_data,
1040 const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
1041 uint32_t requested_team_size, int flag, const void *codeptr_ra) {
1042 if(parallel_data->ptr)
1043 printf("0: parallel_data initially not null\n");
1044 parallel_data->value = ompt_get_unique_id();
1045 int invoker = flag & 0xF;
1046 const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
1047 const char *size = (flag & ompt_parallel_team) ? "team_size" : "num_teams";
1048 printf("%" PRIu64 ":" _TOOL_PREFIX
1049 " ompt_event_%s_begin: parent_task_id=%" PRIu64
1050 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
1051 "parallel_id=%" PRIu64 ", requested_%s=%" PRIu32
1052 ", codeptr_ra=%p, invoker=%d\n",
1053 ompt_get_thread_data()->value, event, encountering_task_data->value,
1054 encountering_task_frame->exit_frame.ptr,
1055 encountering_task_frame->enter_frame.ptr, parallel_data->value, size,
1056 requested_team_size, codeptr_ra, invoker);
1059 static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data,
1060 ompt_data_t *encountering_task_data,
1061 int flag, const void *codeptr_ra) {
1062 int invoker = flag & 0xF;
1063 const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
1064 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_%s_end: parallel_id=%" PRIu64
1065 ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n",
1066 ompt_get_thread_data()->value, event, parallel_data->value,
1067 encountering_task_data->value, invoker, codeptr_ra);
1070 static void
1071 on_ompt_callback_task_create(
1072 ompt_data_t *encountering_task_data,
1073 const ompt_frame_t *encountering_task_frame,
1074 ompt_data_t* new_task_data,
1075 int type,
1076 int has_dependences,
1077 const void *codeptr_ra)
1079 if(new_task_data->ptr)
1080 printf("0: new_task_data initially not null\n");
1081 new_task_data->value = ompt_get_unique_id();
1082 char buffer[2048];
1084 format_task_type(type, buffer);
1086 printf(
1087 "%" PRIu64 ":" _TOOL_PREFIX
1088 " ompt_event_task_create: parent_task_id=%" PRIu64
1089 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
1090 "new_task_id=%" PRIu64
1091 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n",
1092 ompt_get_thread_data()->value,
1093 encountering_task_data ? encountering_task_data->value : 0,
1094 encountering_task_frame ? encountering_task_frame->exit_frame.ptr : NULL,
1095 encountering_task_frame ? encountering_task_frame->enter_frame.ptr : NULL,
1096 new_task_data->value, codeptr_ra, buffer, type,
1097 has_dependences ? "yes" : "no");
1100 static void
1101 on_ompt_callback_task_schedule(
1102 ompt_data_t *first_task_data,
1103 ompt_task_status_t prior_task_status,
1104 ompt_data_t *second_task_data)
1106 printf("%" PRIu64 ":" _TOOL_PREFIX
1107 " ompt_event_task_schedule: first_task_id=%" PRIu64
1108 ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n",
1109 ompt_get_thread_data()->value, first_task_data->value,
1110 (second_task_data ? second_task_data->value : -1),
1111 ompt_task_status_t_values[prior_task_status], prior_task_status);
1112 if (prior_task_status == ompt_task_complete ||
1113 prior_task_status == ompt_task_late_fulfill ||
1114 prior_task_status == ompt_taskwait_complete) {
1115 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_task_end: task_id=%" PRIu64
1116 "\n", ompt_get_thread_data()->value, first_task_data->value);
1120 static void
1121 on_ompt_callback_dependences(
1122 ompt_data_t *task_data,
1123 const ompt_dependence_t *deps,
1124 int ndeps)
1126 char buffer[2048];
1127 char *progress = buffer;
1128 int i;
1129 for (i = 0; i < ndeps && progress < buffer + 2000; i++) {
1130 if (deps[i].dependence_type == ompt_dependence_type_source ||
1131 deps[i].dependence_type == ompt_dependence_type_sink)
1132 progress +=
1133 sprintf(progress, "(%" PRIu64 ", %s), ", deps[i].variable.value,
1134 ompt_dependence_type_t_values[deps[i].dependence_type]);
1135 else
1136 progress +=
1137 sprintf(progress, "(%p, %s), ", deps[i].variable.ptr,
1138 ompt_dependence_type_t_values[deps[i].dependence_type]);
1140 if (ndeps > 0)
1141 progress[-2] = 0;
1142 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_dependences: task_id=%" PRIu64
1143 ", deps=[%s], ndeps=%d\n",
1144 ompt_get_thread_data()->value, task_data->value, buffer, ndeps);
1147 static void
1148 on_ompt_callback_task_dependence(
1149 ompt_data_t *first_task_data,
1150 ompt_data_t *second_task_data)
1152 printf("%" PRIu64 ":" _TOOL_PREFIX
1153 " ompt_event_task_dependence_pair: first_task_id=%" PRIu64
1154 ", second_task_id=%" PRIu64 "\n",
1155 ompt_get_thread_data()->value, first_task_data->value,
1156 second_task_data->value);
1159 static void
1160 on_ompt_callback_thread_begin(
1161 ompt_thread_t thread_type,
1162 ompt_data_t *thread_data)
1164 if(thread_data->ptr)
1165 printf("%s\n", "0: thread_data initially not null");
1166 thread_data->value = ompt_get_unique_id();
1167 printf("%" PRIu64 ":" _TOOL_PREFIX
1168 " ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n",
1169 ompt_get_thread_data()->value, ompt_thread_t_values[thread_type],
1170 thread_type, thread_data->value);
1173 static void
1174 on_ompt_callback_thread_end(
1175 ompt_data_t *thread_data)
1177 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_thread_end: thread_id=%" PRIu64
1178 "\n",
1179 ompt_get_thread_data()->value, thread_data->value);
1182 static int
1183 on_ompt_callback_control_tool(
1184 uint64_t command,
1185 uint64_t modifier,
1186 void *arg,
1187 const void *codeptr_ra)
1189 ompt_frame_t* omptTaskFrame;
1190 ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL);
1191 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_control_tool: command=%" PRIu64
1192 ", modifier=%" PRIu64
1193 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, "
1194 "current_task_frame.reenter=%p \n",
1195 ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra,
1196 omptTaskFrame->exit_frame.ptr, omptTaskFrame->enter_frame.ptr);
1198 // the following would interfere with expected output for OMPT tests, so skip
1199 #ifndef _OMPT_TESTS
1200 // print task data
1201 int task_level = 0;
1202 ompt_data_t *task_data;
1203 while (ompt_get_task_info(task_level, NULL, (ompt_data_t **)&task_data, NULL,
1204 NULL, NULL)) {
1205 printf("%" PRIu64 ":" _TOOL_PREFIX " task level %d: task_id=%" PRIu64 "\n",
1206 ompt_get_thread_data()->value, task_level, task_data->value);
1207 task_level++;
1210 // print parallel data
1211 int parallel_level = 0;
1212 ompt_data_t *parallel_data;
1213 while (ompt_get_parallel_info(parallel_level, (ompt_data_t **)&parallel_data,
1214 NULL)) {
1215 printf("%" PRIu64 ":" _TOOL_PREFIX " parallel level %d: parallel_id=%" PRIu64
1216 "\n",
1217 ompt_get_thread_data()->value, parallel_level, parallel_data->value);
1218 parallel_level++;
1220 #endif
1221 return 0; //success
1224 static void on_ompt_callback_error(ompt_severity_t severity,
1225 const char *message, size_t length,
1226 const void *codeptr_ra) {
1227 printf("%" PRIu64 ": ompt_event_runtime_error: severity=%" PRIu32
1228 ", message=%s, length=%" PRIu64 ", codeptr_ra=%p\n",
1229 ompt_get_thread_data()->value, severity, message, (uint64_t)length,
1230 codeptr_ra);
1233 int ompt_initialize(
1234 ompt_function_lookup_t lookup,
1235 int initial_device_num,
1236 ompt_data_t *tool_data)
1238 ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
1239 ompt_get_callback = (ompt_get_callback_t) lookup("ompt_get_callback");
1240 ompt_get_state = (ompt_get_state_t) lookup("ompt_get_state");
1241 ompt_get_task_info = (ompt_get_task_info_t) lookup("ompt_get_task_info");
1242 ompt_get_task_memory = (ompt_get_task_memory_t)lookup("ompt_get_task_memory");
1243 ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data");
1244 ompt_get_parallel_info = (ompt_get_parallel_info_t) lookup("ompt_get_parallel_info");
1245 ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id");
1246 ompt_finalize_tool = (ompt_finalize_tool_t)lookup("ompt_finalize_tool");
1248 ompt_get_unique_id();
1250 ompt_get_num_procs = (ompt_get_num_procs_t) lookup("ompt_get_num_procs");
1251 ompt_get_num_places = (ompt_get_num_places_t) lookup("ompt_get_num_places");
1252 ompt_get_place_proc_ids = (ompt_get_place_proc_ids_t) lookup("ompt_get_place_proc_ids");
1253 ompt_get_place_num = (ompt_get_place_num_t) lookup("ompt_get_place_num");
1254 ompt_get_partition_place_nums = (ompt_get_partition_place_nums_t) lookup("ompt_get_partition_place_nums");
1255 ompt_get_proc_id = (ompt_get_proc_id_t) lookup("ompt_get_proc_id");
1256 ompt_enumerate_states = (ompt_enumerate_states_t) lookup("ompt_enumerate_states");
1257 ompt_enumerate_mutex_impls = (ompt_enumerate_mutex_impls_t) lookup("ompt_enumerate_mutex_impls");
1259 register_ompt_callback(ompt_callback_mutex_acquire);
1260 register_ompt_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t);
1261 register_ompt_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t);
1262 register_ompt_callback(ompt_callback_nest_lock);
1263 register_ompt_callback(ompt_callback_sync_region);
1264 register_ompt_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t);
1265 register_ompt_callback_t(ompt_callback_reduction, ompt_callback_sync_region_t);
1266 register_ompt_callback(ompt_callback_control_tool);
1267 register_ompt_callback(ompt_callback_flush);
1268 register_ompt_callback(ompt_callback_cancel);
1269 register_ompt_callback(ompt_callback_implicit_task);
1270 register_ompt_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t);
1271 register_ompt_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t);
1272 register_ompt_callback(ompt_callback_work);
1273 register_ompt_callback(ompt_callback_dispatch);
1274 register_ompt_callback(ompt_callback_masked);
1275 register_ompt_callback(ompt_callback_parallel_begin);
1276 register_ompt_callback(ompt_callback_parallel_end);
1277 register_ompt_callback(ompt_callback_task_create);
1278 register_ompt_callback(ompt_callback_task_schedule);
1279 register_ompt_callback(ompt_callback_dependences);
1280 register_ompt_callback(ompt_callback_task_dependence);
1281 register_ompt_callback(ompt_callback_thread_begin);
1282 register_ompt_callback(ompt_callback_thread_end);
1283 register_ompt_callback(ompt_callback_error);
1284 printf("0: NULL_POINTER=%p\n", (void*)NULL);
1285 return 1; //success
1288 void ompt_finalize(ompt_data_t *tool_data)
1290 printf("0: ompt_event_runtime_shutdown\n");
1293 #ifdef __cplusplus
1294 extern "C" {
1295 #endif
1296 ompt_start_tool_result_t* ompt_start_tool(
1297 unsigned int omp_version,
1298 const char *runtime_version)
1300 static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0};
1301 return &ompt_start_tool_result;
1303 #ifdef __cplusplus
1305 #endif
1306 #endif // ifndef USE_PRIVATE_TOOL
1307 #ifdef _OMPT_TESTS
1308 #undef _OMPT_TESTS
1309 #endif