1 /* Test dispatch of events to callbacks. */
11 /* Use explicit 'copyin' clauses, to work around "'firstprivate'
12 optimizations", which will cause the value at the point of call to be used
13 (*before* any potential modifications done in callbacks), as opposed to its
14 address being taken, which then later gets dereferenced (*after* any
15 modifications done in callbacks). */
16 #define COPYIN(...) copyin(__VA_ARGS__)
19 /* See the 'DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT' reference in
21 #define DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT 0
24 /* Do we expect to see 'acc_ev_exit_data_start' and 'acc_ev_exit_data_end'
25 after a compute construct with an 'async' clause? */
26 #define ASYNC_EXIT_DATA 1
29 #define DEBUG_printf(...) //__builtin_printf (__VA_ARGS__)
32 static acc_prof_reg reg
;
33 static acc_prof_reg unreg
;
34 static acc_prof_lookup_func lookup
;
35 void acc_register_library (acc_prof_reg reg_
, acc_prof_reg unreg_
, acc_prof_lookup_func lookup_
)
37 DEBUG_printf ("%s\n", __FUNCTION__
);
45 static int state
= -1;
47 #define STATE_OP(state, op) \
50 typeof (state) state_o = (state); \
53 DEBUG_printf("state: %d -> %d\n", state_o, (state)); \
58 static acc_device_t acc_device_type
;
59 static int acc_device_num
;
60 static int acc_async
= acc_async_sync
;
65 acc_event_info event_info
;
66 struct tool_info
*nested
;
68 struct tool_info
*tool_info
;
70 static void cb_device_init_start (acc_prof_info
*prof_info
, acc_event_info
*event_info
, acc_api_info
*api_info
)
72 DEBUG_printf ("%s\n", __FUNCTION__
);
74 #if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
79 assert (tool_info
!= NULL
);
80 assert (tool_info
->event_info
.other_event
.event_type
== acc_ev_compute_construct_start
);
81 assert (tool_info
->nested
== NULL
);
82 tool_info
->nested
= (struct tool_info
*) malloc(sizeof *tool_info
);
83 assert (tool_info
->nested
!= NULL
);
84 tool_info
->nested
->nested
= NULL
;
90 assert (tool_info
== NULL
);
91 tool_info
= (struct tool_info
*) malloc(sizeof *tool_info
);
92 assert (tool_info
!= NULL
);
93 tool_info
->nested
= NULL
;
96 assert (prof_info
->event_type
== acc_ev_device_init_start
);
97 assert (prof_info
->valid_bytes
== _ACC_PROF_INFO_VALID_BYTES
);
98 assert (prof_info
->version
== _ACC_PROF_INFO_VERSION
);
99 assert (prof_info
->device_type
== acc_device_default
);
100 assert (prof_info
->device_number
== acc_device_num
);
101 assert (prof_info
->thread_id
== -1);
102 assert (prof_info
->async
== acc_async_sync
);
103 assert (prof_info
->async_queue
== prof_info
->async
);
104 assert (prof_info
->src_file
== NULL
);
105 assert (prof_info
->func_name
== NULL
);
106 assert (prof_info
->line_no
== -1);
107 assert (prof_info
->end_line_no
== -1);
108 assert (prof_info
->func_line_no
== -1);
109 assert (prof_info
->func_end_line_no
== -1);
111 assert (event_info
->other_event
.event_type
== prof_info
->event_type
);
112 assert (event_info
->other_event
.valid_bytes
== _ACC_OTHER_EVENT_INFO_VALID_BYTES
);
113 assert (event_info
->other_event
.parent_construct
== acc_construct_parallel
);
114 assert (event_info
->other_event
.implicit
== 1);
115 assert (event_info
->other_event
.tool_info
== NULL
);
117 assert (api_info
->device_api
== acc_device_api_none
);
118 assert (api_info
->valid_bytes
== _ACC_API_INFO_VALID_BYTES
);
119 assert (api_info
->device_type
== prof_info
->device_type
);
120 assert (api_info
->vendor
== -1);
121 assert (api_info
->device_handle
== NULL
);
122 assert (api_info
->context_handle
== NULL
);
123 assert (api_info
->async_handle
== NULL
);
125 #if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
126 tool_info
->nested
->event_info
.other_event
.event_type
= event_info
->other_event
.event_type
;
127 event_info
->other_event
.tool_info
= tool_info
->nested
;
129 tool_info
->event_info
.other_event
.event_type
= event_info
->other_event
.event_type
;
130 event_info
->other_event
.tool_info
= tool_info
;
134 static void cb_device_init_end (acc_prof_info
*prof_info
, acc_event_info
*event_info
, acc_api_info
*api_info
)
136 DEBUG_printf ("%s\n", __FUNCTION__
);
138 #if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
141 STATE_OP (state
, ++);
143 assert (tool_info
!= NULL
);
144 assert (tool_info
->event_info
.other_event
.event_type
== acc_ev_compute_construct_start
);
145 assert (tool_info
->nested
!= NULL
);
146 assert (tool_info
->nested
->event_info
.other_event
.event_type
== acc_ev_device_init_start
);
150 STATE_OP (state
, ++);
152 assert (tool_info
!= NULL
);
153 assert (tool_info
->event_info
.other_event
.event_type
== acc_ev_device_init_start
);
156 assert (prof_info
->event_type
== acc_ev_device_init_end
);
157 assert (prof_info
->valid_bytes
== _ACC_PROF_INFO_VALID_BYTES
);
158 assert (prof_info
->version
== _ACC_PROF_INFO_VERSION
);
159 assert (prof_info
->device_type
== acc_device_default
);
160 assert (prof_info
->device_number
== acc_device_num
);
161 assert (prof_info
->thread_id
== -1);
162 assert (prof_info
->async
== acc_async_sync
);
163 assert (prof_info
->async_queue
== prof_info
->async
);
164 assert (prof_info
->src_file
== NULL
);
165 assert (prof_info
->func_name
== NULL
);
166 assert (prof_info
->line_no
== -1);
167 assert (prof_info
->end_line_no
== -1);
168 assert (prof_info
->func_line_no
== -1);
169 assert (prof_info
->func_end_line_no
== -1);
171 assert (event_info
->other_event
.event_type
== prof_info
->event_type
);
172 assert (event_info
->other_event
.valid_bytes
== _ACC_OTHER_EVENT_INFO_VALID_BYTES
);
173 assert (event_info
->other_event
.parent_construct
== acc_construct_parallel
);
174 assert (event_info
->other_event
.implicit
== 1);
175 #if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
176 assert (event_info
->other_event
.tool_info
== tool_info
->nested
);
178 assert (event_info
->other_event
.tool_info
== tool_info
);
181 assert (api_info
->device_api
== acc_device_api_none
);
182 assert (api_info
->valid_bytes
== _ACC_API_INFO_VALID_BYTES
);
183 assert (api_info
->device_type
== prof_info
->device_type
);
184 assert (api_info
->vendor
== -1);
185 assert (api_info
->device_handle
== NULL
);
186 assert (api_info
->context_handle
== NULL
);
187 assert (api_info
->async_handle
== NULL
);
189 #if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
190 free (tool_info
->nested
);
191 tool_info
->nested
= NULL
;
198 static void cb_alloc (acc_prof_info
*prof_info
, acc_event_info
*event_info
, acc_api_info
*api_info
)
200 DEBUG_printf ("%s\n", __FUNCTION__
);
202 #if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
207 STATE_OP (state
, ++);
212 assert (tool_info
!= NULL
);
213 assert (tool_info
->event_info
.other_event
.event_type
== acc_ev_compute_construct_start
);
214 assert (tool_info
->nested
!= NULL
);
215 assert (tool_info
->nested
->event_info
.other_event
.event_type
== acc_ev_enter_data_start
);
216 assert (tool_info
->nested
->nested
== NULL
);
222 assert (prof_info
->event_type
== acc_ev_alloc
);
223 assert (prof_info
->valid_bytes
== _ACC_PROF_INFO_VALID_BYTES
);
224 assert (prof_info
->version
== _ACC_PROF_INFO_VERSION
);
225 assert (prof_info
->device_type
== acc_device_type
);
226 assert (prof_info
->device_number
== acc_device_num
);
227 assert (prof_info
->thread_id
== -1);
228 assert (prof_info
->async
== acc_async
);
229 assert (prof_info
->async_queue
== prof_info
->async
);
230 assert (prof_info
->src_file
== NULL
);
231 assert (prof_info
->func_name
== NULL
);
232 assert (prof_info
->line_no
== -1);
233 assert (prof_info
->end_line_no
== -1);
234 assert (prof_info
->func_line_no
== -1);
235 assert (prof_info
->func_end_line_no
== -1);
237 assert (event_info
->data_event
.event_type
== prof_info
->event_type
);
238 assert (event_info
->data_event
.valid_bytes
== _ACC_DATA_EVENT_INFO_VALID_BYTES
);
239 assert (event_info
->data_event
.parent_construct
== acc_construct_parallel
);
240 assert (event_info
->data_event
.implicit
== 1);
241 assert (event_info
->data_event
.tool_info
== NULL
);
242 assert (event_info
->data_event
.var_name
== NULL
);
243 assert (event_info
->data_event
.bytes
!= 0);
244 assert (event_info
->data_event
.host_ptr
== NULL
);
245 assert (event_info
->data_event
.device_ptr
!= NULL
);
247 assert (api_info
->valid_bytes
== _ACC_API_INFO_VALID_BYTES
);
248 assert (api_info
->device_type
== prof_info
->device_type
);
249 assert (api_info
->vendor
== -1);
250 assert (api_info
->device_handle
== NULL
);
251 assert (api_info
->context_handle
== NULL
);
252 assert (api_info
->async_handle
== NULL
);
255 static void cb_free (acc_prof_info
*prof_info
, acc_event_info
*event_info
, acc_api_info
*api_info
)
257 DEBUG_printf ("%s\n", __FUNCTION__
);
259 #if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
263 STATE_OP (state
, ++);
267 assert (tool_info
!= NULL
);
268 assert (tool_info
->event_info
.other_event
.event_type
== acc_ev_compute_construct_start
);
269 assert (tool_info
->nested
!= NULL
);
270 assert (tool_info
->nested
->event_info
.other_event
.event_type
== acc_ev_exit_data_start
);
271 assert (tool_info
->nested
->nested
== NULL
);
277 assert (prof_info
->event_type
== acc_ev_free
);
278 assert (prof_info
->valid_bytes
== _ACC_PROF_INFO_VALID_BYTES
);
279 assert (prof_info
->version
== _ACC_PROF_INFO_VERSION
);
280 assert (prof_info
->device_type
== acc_device_type
);
281 assert (prof_info
->device_number
== acc_device_num
);
282 assert (prof_info
->thread_id
== -1);
283 assert (prof_info
->async
== acc_async
);
284 assert (prof_info
->async_queue
== prof_info
->async
);
285 assert (prof_info
->src_file
== NULL
);
286 assert (prof_info
->func_name
== NULL
);
287 assert (prof_info
->line_no
== -1);
288 assert (prof_info
->end_line_no
== -1);
289 assert (prof_info
->func_line_no
== -1);
290 assert (prof_info
->func_end_line_no
== -1);
292 assert (event_info
->data_event
.event_type
== prof_info
->event_type
);
293 assert (event_info
->data_event
.valid_bytes
== _ACC_DATA_EVENT_INFO_VALID_BYTES
);
294 assert (event_info
->data_event
.parent_construct
== acc_construct_parallel
);
295 assert (event_info
->data_event
.implicit
== 1);
296 assert (event_info
->data_event
.tool_info
== NULL
);
297 assert (event_info
->data_event
.var_name
== NULL
);
298 if (acc_device_type
== acc_device_nvidia
)
299 assert (event_info
->data_event
.bytes
== (size_t) -1);
300 else if (acc_device_type
== acc_device_radeon
)
301 assert (event_info
->data_event
.bytes
== 0);
304 assert (event_info
->data_event
.host_ptr
== NULL
);
305 assert (event_info
->data_event
.device_ptr
!= NULL
);
307 assert (api_info
->valid_bytes
== _ACC_API_INFO_VALID_BYTES
);
308 assert (api_info
->device_type
== prof_info
->device_type
);
309 assert (api_info
->vendor
== -1);
310 assert (api_info
->device_handle
== NULL
);
311 assert (api_info
->context_handle
== NULL
);
312 assert (api_info
->async_handle
== NULL
);
315 static void cb_enter_data_start (acc_prof_info
*prof_info
, acc_event_info
*event_info
, acc_api_info
*api_info
)
317 DEBUG_printf ("%s\n", __FUNCTION__
);
321 STATE_OP (state
, ++);
323 assert (tool_info
!= NULL
);
324 assert (tool_info
->event_info
.other_event
.event_type
== acc_ev_compute_construct_start
);
325 assert (tool_info
->nested
== NULL
);
326 tool_info
->nested
= (struct tool_info
*) malloc(sizeof *tool_info
);
327 assert (tool_info
->nested
!= NULL
);
328 tool_info
->nested
->nested
= NULL
;
330 assert (prof_info
->event_type
== acc_ev_enter_data_start
);
331 assert (prof_info
->valid_bytes
== _ACC_PROF_INFO_VALID_BYTES
);
332 assert (prof_info
->version
== _ACC_PROF_INFO_VERSION
);
333 assert (prof_info
->device_type
== acc_device_type
);
334 assert (prof_info
->device_number
== acc_device_num
);
335 assert (prof_info
->thread_id
== -1);
336 assert (prof_info
->async
== acc_async
);
337 assert (prof_info
->async_queue
== prof_info
->async
);
338 assert (prof_info
->src_file
== NULL
);
339 assert (prof_info
->func_name
== NULL
);
340 assert (prof_info
->line_no
== -1);
341 assert (prof_info
->end_line_no
== -1);
342 assert (prof_info
->func_line_no
== -1);
343 assert (prof_info
->func_end_line_no
== -1);
345 assert (event_info
->other_event
.event_type
== prof_info
->event_type
);
346 assert (event_info
->other_event
.valid_bytes
== _ACC_OTHER_EVENT_INFO_VALID_BYTES
);
347 assert (event_info
->other_event
.parent_construct
== acc_construct_parallel
);
348 assert (event_info
->other_event
.implicit
== 1);
349 assert (event_info
->other_event
.tool_info
== NULL
);
351 assert (api_info
->valid_bytes
== _ACC_API_INFO_VALID_BYTES
);
352 assert (api_info
->device_type
== prof_info
->device_type
);
353 assert (api_info
->vendor
== -1);
354 assert (api_info
->device_handle
== NULL
);
355 assert (api_info
->context_handle
== NULL
);
356 assert (api_info
->async_handle
== NULL
);
358 tool_info
->nested
->event_info
.other_event
.event_type
= event_info
->other_event
.event_type
;
359 event_info
->other_event
.tool_info
= tool_info
->nested
;
362 static void cb_enter_data_end (acc_prof_info
*prof_info
, acc_event_info
*event_info
, acc_api_info
*api_info
)
364 DEBUG_printf ("%s\n", __FUNCTION__
);
369 /* Conceptually, 'acc_ev_enter_data_end' marks the end of data copying,
370 before 'acc_ev_enqueue_launch_start' marks invoking the compute region.
371 That's the 'state_init = state;' intended to be captured in the compute
373 /* In an 'async' setting, this event may be triggered before actual 'async'
374 data copying has completed. Given that 'state' appears in 'COPYIN', we
375 first have to synchronize (that is, let the 'async' 'COPYIN' read the
376 current 'state' value)... */
377 if (acc_async
!= acc_async_sync
)
379 /* "We're not yet accounting for the fact that _OpenACC events may occur
380 during event processing_"; temporarily disable to avoid deadlock. */
381 unreg (acc_ev_none
, NULL
, acc_toggle_per_thread
);
382 acc_wait (acc_async
);
383 reg (acc_ev_none
, NULL
, acc_toggle_per_thread
);
385 /* ... before modifying it in the following. */
387 STATE_OP (state
, ++);
389 assert (tool_info
!= NULL
);
390 assert (tool_info
->event_info
.other_event
.event_type
== acc_ev_compute_construct_start
);
391 assert (tool_info
->nested
!= NULL
);
392 assert (tool_info
->nested
->event_info
.other_event
.event_type
== acc_ev_enter_data_start
);
394 assert (prof_info
->event_type
== acc_ev_enter_data_end
);
395 assert (prof_info
->valid_bytes
== _ACC_PROF_INFO_VALID_BYTES
);
396 assert (prof_info
->version
== _ACC_PROF_INFO_VERSION
);
397 assert (prof_info
->device_type
== acc_device_type
);
398 assert (prof_info
->device_number
== acc_device_num
);
399 assert (prof_info
->thread_id
== -1);
400 assert (prof_info
->async
== acc_async
);
401 assert (prof_info
->async_queue
== prof_info
->async
);
402 assert (prof_info
->src_file
== NULL
);
403 assert (prof_info
->func_name
== NULL
);
404 assert (prof_info
->line_no
== -1);
405 assert (prof_info
->end_line_no
== -1);
406 assert (prof_info
->func_line_no
== -1);
407 assert (prof_info
->func_end_line_no
== -1);
409 assert (event_info
->other_event
.event_type
== prof_info
->event_type
);
410 assert (event_info
->other_event
.valid_bytes
== _ACC_OTHER_EVENT_INFO_VALID_BYTES
);
411 assert (event_info
->other_event
.parent_construct
== acc_construct_parallel
);
412 assert (event_info
->other_event
.implicit
== 1);
413 assert (event_info
->other_event
.tool_info
== tool_info
->nested
);
415 if (acc_device_type
== acc_device_host
)
416 assert (api_info
->device_api
== acc_device_api_none
);
417 else if (acc_device_type
== acc_device_radeon
)
418 assert (api_info
->device_api
== acc_device_api_other
);
420 assert (api_info
->device_api
== acc_device_api_cuda
);
421 assert (api_info
->valid_bytes
== _ACC_API_INFO_VALID_BYTES
);
422 assert (api_info
->device_type
== prof_info
->device_type
);
423 assert (api_info
->vendor
== -1);
424 assert (api_info
->device_handle
== NULL
);
425 assert (api_info
->context_handle
== NULL
);
426 assert (api_info
->async_handle
== NULL
);
428 free (tool_info
->nested
);
429 tool_info
->nested
= NULL
;
432 static void cb_exit_data_start (acc_prof_info
*prof_info
, acc_event_info
*event_info
, acc_api_info
*api_info
)
434 DEBUG_printf ("%s\n", __FUNCTION__
);
441 STATE_OP (state
, ++);
443 assert (tool_info
!= NULL
);
444 assert (tool_info
->event_info
.other_event
.event_type
== acc_ev_compute_construct_start
);
445 assert (tool_info
->nested
== NULL
);
446 tool_info
->nested
= (struct tool_info
*) malloc(sizeof *tool_info
);
447 assert (tool_info
->nested
!= NULL
);
448 tool_info
->nested
->nested
= NULL
;
450 assert (prof_info
->event_type
== acc_ev_exit_data_start
);
451 assert (prof_info
->valid_bytes
== _ACC_PROF_INFO_VALID_BYTES
);
452 assert (prof_info
->version
== _ACC_PROF_INFO_VERSION
);
453 assert (prof_info
->device_type
== acc_device_type
);
454 assert (prof_info
->device_number
== acc_device_num
);
455 assert (prof_info
->thread_id
== -1);
456 assert (prof_info
->async
== acc_async
);
457 assert (prof_info
->async_queue
== prof_info
->async
);
458 assert (prof_info
->src_file
== NULL
);
459 assert (prof_info
->func_name
== NULL
);
460 assert (prof_info
->line_no
== -1);
461 assert (prof_info
->end_line_no
== -1);
462 assert (prof_info
->func_line_no
== -1);
463 assert (prof_info
->func_end_line_no
== -1);
465 assert (event_info
->other_event
.event_type
== prof_info
->event_type
);
466 assert (event_info
->other_event
.valid_bytes
== _ACC_OTHER_EVENT_INFO_VALID_BYTES
);
467 assert (event_info
->other_event
.parent_construct
== acc_construct_parallel
);
468 assert (event_info
->other_event
.implicit
== 1);
469 assert (event_info
->other_event
.tool_info
== NULL
);
471 if (acc_device_type
== acc_device_host
)
472 assert (api_info
->device_api
== acc_device_api_none
);
473 else if (acc_device_type
== acc_device_radeon
)
474 assert (api_info
->device_api
== acc_device_api_other
);
476 assert (api_info
->device_api
== acc_device_api_cuda
);
477 assert (api_info
->valid_bytes
== _ACC_API_INFO_VALID_BYTES
);
478 assert (api_info
->device_type
== prof_info
->device_type
);
479 assert (api_info
->vendor
== -1);
480 assert (api_info
->device_handle
== NULL
);
481 assert (api_info
->context_handle
== NULL
);
482 assert (api_info
->async_handle
== NULL
);
484 tool_info
->nested
->event_info
.other_event
.event_type
= event_info
->other_event
.event_type
;
485 event_info
->other_event
.tool_info
= tool_info
->nested
;
488 if (acc_async
!= acc_async_sync
)
490 /* Compensate for the deferred 'acc_ev_free'. */
498 static void cb_exit_data_end (acc_prof_info
*prof_info
, acc_event_info
*event_info
, acc_api_info
*api_info
)
500 DEBUG_printf ("%s\n", __FUNCTION__
);
507 STATE_OP (state
, ++);
509 assert (tool_info
!= NULL
);
510 assert (tool_info
->event_info
.other_event
.event_type
== acc_ev_compute_construct_start
);
511 assert (tool_info
->nested
!= NULL
);
512 assert (tool_info
->nested
->event_info
.other_event
.event_type
== acc_ev_exit_data_start
);
514 assert (prof_info
->event_type
== acc_ev_exit_data_end
);
515 assert (prof_info
->valid_bytes
== _ACC_PROF_INFO_VALID_BYTES
);
516 assert (prof_info
->version
== _ACC_PROF_INFO_VERSION
);
517 assert (prof_info
->device_type
== acc_device_type
);
518 assert (prof_info
->device_number
== acc_device_num
);
519 assert (prof_info
->thread_id
== -1);
520 assert (prof_info
->async
== acc_async
);
521 assert (prof_info
->async_queue
== prof_info
->async
);
522 assert (prof_info
->src_file
== NULL
);
523 assert (prof_info
->func_name
== NULL
);
524 assert (prof_info
->line_no
== -1);
525 assert (prof_info
->end_line_no
== -1);
526 assert (prof_info
->func_line_no
== -1);
527 assert (prof_info
->func_end_line_no
== -1);
529 assert (event_info
->other_event
.event_type
== prof_info
->event_type
);
530 assert (event_info
->other_event
.valid_bytes
== _ACC_OTHER_EVENT_INFO_VALID_BYTES
);
531 assert (event_info
->other_event
.parent_construct
== acc_construct_parallel
);
532 assert (event_info
->other_event
.implicit
== 1);
533 assert (event_info
->other_event
.tool_info
== tool_info
->nested
);
535 if (acc_device_type
== acc_device_host
)
536 assert (api_info
->device_api
== acc_device_api_none
);
537 else if (acc_device_type
== acc_device_radeon
)
538 assert (api_info
->device_api
== acc_device_api_other
);
540 assert (api_info
->device_api
== acc_device_api_cuda
);
541 assert (api_info
->valid_bytes
== _ACC_API_INFO_VALID_BYTES
);
542 assert (api_info
->device_type
== prof_info
->device_type
);
543 assert (api_info
->vendor
== -1);
544 assert (api_info
->device_handle
== NULL
);
545 assert (api_info
->context_handle
== NULL
);
546 assert (api_info
->async_handle
== NULL
);
548 free (tool_info
->nested
);
549 tool_info
->nested
= NULL
;
552 static void cb_compute_construct_start (acc_prof_info
*prof_info
, acc_event_info
*event_info
, acc_api_info
*api_info
)
554 DEBUG_printf ("%s\n", __FUNCTION__
);
556 #if DEVICE_INIT_INSIDE_COMPUTE_CONSTRUCT
561 /* Compensate for the missing 'acc_ev_device_init_start' and
562 'acc_ev_device_init_end'. */
568 /* Compensate for the missing 'acc_ev_device_init_start' and
569 'acc_ev_device_init_end'. */
575 STATE_OP (state
, ++);
577 assert (tool_info
== NULL
);
578 tool_info
= (struct tool_info
*) malloc(sizeof *tool_info
);
579 assert (tool_info
!= NULL
);
580 tool_info
->nested
= NULL
;
582 assert (prof_info
->event_type
== acc_ev_compute_construct_start
);
583 assert (prof_info
->valid_bytes
== _ACC_PROF_INFO_VALID_BYTES
);
584 assert (prof_info
->version
== _ACC_PROF_INFO_VERSION
);
585 assert (prof_info
->device_type
== acc_device_type
);
586 assert (prof_info
->device_number
== acc_device_num
);
587 assert (prof_info
->thread_id
== -1);
588 assert (prof_info
->async
== /* TODO acc_async */ acc_async_sync
);
589 assert (prof_info
->async_queue
== prof_info
->async
);
590 assert (prof_info
->src_file
== NULL
);
591 assert (prof_info
->func_name
== NULL
);
592 assert (prof_info
->line_no
== -1);
593 assert (prof_info
->end_line_no
== -1);
594 assert (prof_info
->func_line_no
== -1);
595 assert (prof_info
->func_end_line_no
== -1);
597 assert (event_info
->other_event
.event_type
== prof_info
->event_type
);
598 assert (event_info
->other_event
.valid_bytes
== _ACC_OTHER_EVENT_INFO_VALID_BYTES
);
599 assert (event_info
->other_event
.parent_construct
== acc_construct_parallel
);
600 assert (event_info
->other_event
.implicit
== 0);
601 assert (event_info
->other_event
.tool_info
== NULL
);
603 assert (api_info
->device_api
== acc_device_api_none
);
604 assert (api_info
->valid_bytes
== _ACC_API_INFO_VALID_BYTES
);
605 assert (api_info
->device_type
== prof_info
->device_type
);
606 assert (api_info
->vendor
== -1);
607 assert (api_info
->device_handle
== NULL
);
608 assert (api_info
->context_handle
== NULL
);
609 assert (api_info
->async_handle
== NULL
);
611 tool_info
->event_info
.other_event
.event_type
= event_info
->other_event
.event_type
;
612 event_info
->other_event
.tool_info
= tool_info
;
614 if (acc_device_type
== acc_device_host
)
616 /* Compensate for the missing 'acc_ev_enter_data_start'. */
618 /* Compensate for the missing 'acc_ev_alloc'. */
623 static void cb_compute_construct_end (acc_prof_info
*prof_info
, acc_event_info
*event_info
, acc_api_info
*api_info
)
625 DEBUG_printf ("%s\n", __FUNCTION__
);
627 if (acc_device_type
== acc_device_host
)
629 /* Compensate for the missing 'acc_ev_enter_data_end'. */
631 /* Compensate for the missing 'acc_ev_enqueue_launch_start' and
632 'acc_ev_enqueue_launch_end'. */
634 /* Compensate for the missing 'acc_ev_exit_data_start'. */
636 /* Compensate for the missing 'acc_ev_free'. */
638 /* Compensate for the missing 'acc_ev_exit_data_end'. */
642 else if (acc_async
!= acc_async_sync
)
644 /* Compensate for the missing 'acc_ev_exit_data_start' and
645 'acc_ev_exit_data_end'. */
651 STATE_OP (state
, ++);
653 assert (tool_info
!= NULL
);
654 assert (tool_info
->event_info
.other_event
.event_type
== acc_ev_compute_construct_start
);
655 assert (tool_info
->nested
== NULL
);
657 assert (prof_info
->event_type
== acc_ev_compute_construct_end
);
658 assert (prof_info
->valid_bytes
== _ACC_PROF_INFO_VALID_BYTES
);
659 assert (prof_info
->version
== _ACC_PROF_INFO_VERSION
);
660 assert (prof_info
->device_type
== acc_device_type
);
661 assert (prof_info
->device_number
== acc_device_num
);
662 assert (prof_info
->thread_id
== -1);
663 if (acc_device_type
== acc_device_host
)
664 assert (prof_info
->async
== acc_async_sync
);
666 assert (prof_info
->async
== acc_async
);
667 assert (prof_info
->async_queue
== prof_info
->async
);
668 assert (prof_info
->src_file
== NULL
);
669 assert (prof_info
->func_name
== NULL
);
670 assert (prof_info
->line_no
== -1);
671 assert (prof_info
->end_line_no
== -1);
672 assert (prof_info
->func_line_no
== -1);
673 assert (prof_info
->func_end_line_no
== -1);
675 assert (event_info
->other_event
.event_type
== prof_info
->event_type
);
676 assert (event_info
->other_event
.valid_bytes
== _ACC_OTHER_EVENT_INFO_VALID_BYTES
);
677 assert (event_info
->other_event
.parent_construct
== acc_construct_parallel
);
678 assert (event_info
->other_event
.implicit
== 0);
679 assert (event_info
->other_event
.tool_info
== tool_info
);
681 if (acc_device_type
== acc_device_host
)
682 assert (api_info
->device_api
== acc_device_api_none
);
683 else if (acc_device_type
== acc_device_radeon
)
684 assert (api_info
->device_api
== acc_device_api_other
);
686 assert (api_info
->device_api
== acc_device_api_cuda
);
687 assert (api_info
->valid_bytes
== _ACC_API_INFO_VALID_BYTES
);
688 assert (api_info
->device_type
== prof_info
->device_type
);
689 assert (api_info
->vendor
== -1);
690 assert (api_info
->device_handle
== NULL
);
691 assert (api_info
->context_handle
== NULL
);
692 assert (api_info
->async_handle
== NULL
);
698 static void cb_enqueue_launch_start (acc_prof_info
*prof_info
, acc_event_info
*event_info
, acc_api_info
*api_info
)
700 DEBUG_printf ("%s\n", __FUNCTION__
);
702 assert (acc_device_type
!= acc_device_host
);
706 STATE_OP (state
, ++);
708 assert (tool_info
!= NULL
);
709 assert (tool_info
->event_info
.other_event
.event_type
== acc_ev_compute_construct_start
);
710 assert (tool_info
->nested
== NULL
);
711 tool_info
->nested
= (struct tool_info
*) malloc(sizeof *tool_info
);
712 assert (tool_info
->nested
!= NULL
);
713 tool_info
->nested
->nested
= NULL
;
715 assert (prof_info
->event_type
== acc_ev_enqueue_launch_start
);
716 assert (prof_info
->valid_bytes
== _ACC_PROF_INFO_VALID_BYTES
);
717 assert (prof_info
->version
== _ACC_PROF_INFO_VERSION
);
718 assert (prof_info
->device_type
== acc_device_type
);
719 assert (prof_info
->device_number
== acc_device_num
);
720 assert (prof_info
->thread_id
== -1);
721 assert (prof_info
->async
== acc_async
);
722 assert (prof_info
->async_queue
== prof_info
->async
);
723 assert (prof_info
->src_file
== NULL
);
724 assert (prof_info
->func_name
== NULL
);
725 assert (prof_info
->line_no
== -1);
726 assert (prof_info
->end_line_no
== -1);
727 assert (prof_info
->func_line_no
== -1);
728 assert (prof_info
->func_end_line_no
== -1);
730 assert (event_info
->launch_event
.event_type
== prof_info
->event_type
);
731 assert (event_info
->launch_event
.valid_bytes
== _ACC_LAUNCH_EVENT_INFO_VALID_BYTES
);
732 assert (event_info
->launch_event
.parent_construct
== acc_construct_parallel
);
733 assert (event_info
->launch_event
.implicit
== 1);
734 assert (event_info
->launch_event
.tool_info
== NULL
);
735 assert (event_info
->launch_event
.kernel_name
!= NULL
);
737 const char *s
= strstr (event_info
->launch_event
.kernel_name
, "main");
739 s
= strstr (s
, "omp_fn");
742 assert (event_info
->launch_event
.num_gangs
>= 1);
743 assert (event_info
->launch_event
.num_workers
>= 1);
744 assert (event_info
->launch_event
.vector_length
>= 1);
746 if (acc_device_type
== acc_device_host
)
747 assert (api_info
->device_api
== acc_device_api_none
);
748 else if (acc_device_type
== acc_device_radeon
)
749 assert (api_info
->device_api
== acc_device_api_other
);
751 assert (api_info
->device_api
== acc_device_api_cuda
);
752 assert (api_info
->valid_bytes
== _ACC_API_INFO_VALID_BYTES
);
753 assert (api_info
->device_type
== prof_info
->device_type
);
754 assert (api_info
->vendor
== -1);
755 assert (api_info
->device_handle
== NULL
);
756 assert (api_info
->context_handle
== NULL
);
757 assert (api_info
->async_handle
== NULL
);
759 tool_info
->nested
->event_info
.launch_event
.event_type
= event_info
->launch_event
.event_type
;
760 tool_info
->nested
->event_info
.launch_event
.kernel_name
= strdup (event_info
->launch_event
.kernel_name
);
761 tool_info
->nested
->event_info
.launch_event
.num_gangs
= event_info
->launch_event
.num_gangs
;
762 tool_info
->nested
->event_info
.launch_event
.num_workers
= event_info
->launch_event
.num_workers
;
763 tool_info
->nested
->event_info
.launch_event
.vector_length
= event_info
->launch_event
.vector_length
;
764 event_info
->other_event
.tool_info
= tool_info
->nested
;
767 static void cb_enqueue_launch_end (acc_prof_info
*prof_info
, acc_event_info
*event_info
, acc_api_info
*api_info
)
769 DEBUG_printf ("%s\n", __FUNCTION__
);
771 assert (acc_device_type
!= acc_device_host
);
775 STATE_OP (state
, ++);
777 assert (tool_info
!= NULL
);
778 assert (tool_info
->event_info
.other_event
.event_type
== acc_ev_compute_construct_start
);
779 assert (tool_info
->nested
!= NULL
);
780 assert (tool_info
->nested
->event_info
.launch_event
.event_type
== acc_ev_enqueue_launch_start
);
781 assert (tool_info
->nested
->event_info
.launch_event
.kernel_name
!= NULL
);
782 assert (tool_info
->nested
->event_info
.launch_event
.num_gangs
>= 1);
783 assert (tool_info
->nested
->event_info
.launch_event
.num_workers
>= 1);
784 assert (tool_info
->nested
->event_info
.launch_event
.vector_length
>= 1);
786 assert (prof_info
->event_type
== acc_ev_enqueue_launch_end
);
787 assert (prof_info
->valid_bytes
== _ACC_PROF_INFO_VALID_BYTES
);
788 assert (prof_info
->version
== _ACC_PROF_INFO_VERSION
);
789 assert (prof_info
->device_type
== acc_device_type
);
790 assert (prof_info
->device_number
== acc_device_num
);
791 assert (prof_info
->thread_id
== -1);
792 assert (prof_info
->async
== acc_async
);
793 assert (prof_info
->async_queue
== prof_info
->async
);
794 assert (prof_info
->src_file
== NULL
);
795 assert (prof_info
->func_name
== NULL
);
796 assert (prof_info
->line_no
== -1);
797 assert (prof_info
->end_line_no
== -1);
798 assert (prof_info
->func_line_no
== -1);
799 assert (prof_info
->func_end_line_no
== -1);
801 assert (event_info
->launch_event
.event_type
== prof_info
->event_type
);
802 assert (event_info
->launch_event
.valid_bytes
== _ACC_LAUNCH_EVENT_INFO_VALID_BYTES
);
803 assert (event_info
->launch_event
.parent_construct
== acc_construct_parallel
);
804 assert (event_info
->launch_event
.implicit
== 1);
805 assert (event_info
->launch_event
.tool_info
== tool_info
->nested
);
806 assert (event_info
->launch_event
.kernel_name
!= NULL
);
807 assert (strcmp (event_info
->launch_event
.kernel_name
, tool_info
->nested
->event_info
.launch_event
.kernel_name
) == 0);
808 assert (event_info
->launch_event
.num_gangs
== tool_info
->nested
->event_info
.launch_event
.num_gangs
);
809 assert (event_info
->launch_event
.num_workers
== tool_info
->nested
->event_info
.launch_event
.num_workers
);
810 assert (event_info
->launch_event
.vector_length
== tool_info
->nested
->event_info
.launch_event
.vector_length
);
812 if (acc_device_type
== acc_device_host
)
813 assert (api_info
->device_api
== acc_device_api_none
);
814 else if (acc_device_type
== acc_device_radeon
)
815 assert (api_info
->device_api
== acc_device_api_other
);
817 assert (api_info
->device_api
== acc_device_api_cuda
);
818 assert (api_info
->valid_bytes
== _ACC_API_INFO_VALID_BYTES
);
819 assert (api_info
->device_type
== prof_info
->device_type
);
820 assert (api_info
->vendor
== -1);
821 assert (api_info
->device_handle
== NULL
);
822 assert (api_info
->context_handle
== NULL
);
823 assert (api_info
->async_handle
== NULL
);
825 free ((void *) tool_info
->nested
->event_info
.launch_event
.kernel_name
);
826 free (tool_info
->nested
);
827 tool_info
->nested
= NULL
;
833 acc_register_library (acc_prof_register
, acc_prof_unregister
, acc_prof_lookup
);
835 STATE_OP (state
, = 0);
836 reg (acc_ev_device_init_start
, cb_device_init_start
, acc_reg
);
837 reg (acc_ev_device_init_end
, cb_device_init_end
, acc_reg
);
838 reg (acc_ev_alloc
, cb_alloc
, acc_reg
);
839 reg (acc_ev_free
, cb_free
, acc_reg
);
840 reg (acc_ev_enter_data_start
, cb_enter_data_start
, acc_reg
);
841 reg (acc_ev_enter_data_end
, cb_enter_data_end
, acc_reg
);
842 reg (acc_ev_exit_data_start
, cb_exit_data_start
, acc_reg
);
843 reg (acc_ev_exit_data_end
, cb_exit_data_end
, acc_reg
);
844 reg (acc_ev_compute_construct_start
, cb_compute_construct_start
, acc_reg
);
845 reg (acc_ev_compute_construct_end
, cb_compute_construct_end
, acc_reg
);
846 reg (acc_ev_enqueue_launch_start
, cb_enqueue_launch_start
, acc_reg
);
847 reg (acc_ev_enqueue_launch_end
, cb_enqueue_launch_end
, acc_reg
);
850 acc_device_type
= acc_get_device_type ();
851 acc_device_num
= acc_get_device_num (acc_device_type
);
856 #pragma acc parallel COPYIN(state) copyout(state_init)
858 asm volatile ("" : : : "memory"); // TODO PR90488
862 assert (state_init
== 5);
864 assert (state
== 12);
866 STATE_OP (state
, = 100);
871 #pragma acc parallel async(acc_async) COPYIN(state) copyout(state_init)
873 asm volatile ("" : : : "memory"); // TODO PR90488
877 acc_async
= acc_async_sync
;
879 assert (state_init
== 105);
881 assert (state
== 112);