1 /* Run a stand-alone AMD GCN kernel.
3 Copyright 2017 Mentor Graphics Corporation
4 Copyright (C) 2018-2024 Free Software Foundation, Inc.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 /* This program will run a compiled stand-alone GCN kernel on a GPU.
21 The kernel entry point's signature must use a standard main signature:
23 int main(int argc, char **argv)
38 #include "../../../libgomp/config/gcn/libgomp-gcn.h"
40 #ifndef HSA_RUNTIME_LIB
41 #define HSA_RUNTIME_LIB "libhsa-runtime64.so.1"
44 #ifndef VERSION_STRING
45 #define VERSION_STRING "(version unknown)"
50 hsa_agent_t device
= { 0 };
51 hsa_queue_t
*queue
= NULL
;
52 uint64_t init_array_kernel
= 0;
53 uint64_t fini_array_kernel
= 0;
54 uint64_t main_kernel
= 0;
55 hsa_executable_t executable
= { 0 };
57 hsa_region_t kernargs_region
= { 0 };
58 hsa_region_t heap_region
= { 0 };
59 uint32_t kernarg_segment_size
= 0;
60 uint32_t group_segment_size
= 0;
61 uint32_t private_segment_size
= 0;
64 usage (const char *progname
)
66 printf ("Usage: %s [options] kernel [kernel-args]\n\n"
70 " --debug\n", progname
);
74 version (const char *progname
)
76 printf ("%s " VERSION_STRING
"\n", progname
);
79 /* As an HSA runtime is dlopened, following structure defines the necessary
81 Code adapted from libgomp. */
83 struct hsa_runtime_fn_info
86 hsa_status_t (*hsa_status_string_fn
) (hsa_status_t status
,
87 const char **status_string
);
88 hsa_status_t (*hsa_agent_get_info_fn
) (hsa_agent_t agent
,
89 hsa_agent_info_t attribute
,
91 hsa_status_t (*hsa_init_fn
) (void);
92 hsa_status_t (*hsa_iterate_agents_fn
)
93 (hsa_status_t (*callback
) (hsa_agent_t agent
, void *data
), void *data
);
94 hsa_status_t (*hsa_region_get_info_fn
) (hsa_region_t region
,
95 hsa_region_info_t attribute
,
97 hsa_status_t (*hsa_queue_create_fn
)
98 (hsa_agent_t agent
, uint32_t size
, hsa_queue_type_t type
,
99 void (*callback
) (hsa_status_t status
, hsa_queue_t
*source
, void *data
),
100 void *data
, uint32_t private_segment_size
,
101 uint32_t group_segment_size
, hsa_queue_t
**queue
);
102 hsa_status_t (*hsa_agent_iterate_regions_fn
)
104 hsa_status_t (*callback
) (hsa_region_t region
, void *data
), void *data
);
105 hsa_status_t (*hsa_executable_destroy_fn
) (hsa_executable_t executable
);
106 hsa_status_t (*hsa_executable_create_fn
)
107 (hsa_profile_t profile
, hsa_executable_state_t executable_state
,
108 const char *options
, hsa_executable_t
*executable
);
109 hsa_status_t (*hsa_executable_global_variable_define_fn
)
110 (hsa_executable_t executable
, const char *variable_name
, void *address
);
111 hsa_status_t (*hsa_executable_load_code_object_fn
)
112 (hsa_executable_t executable
, hsa_agent_t agent
,
113 hsa_code_object_t code_object
, const char *options
);
114 hsa_status_t (*hsa_executable_freeze_fn
) (hsa_executable_t executable
,
115 const char *options
);
116 hsa_status_t (*hsa_signal_create_fn
) (hsa_signal_value_t initial_value
,
117 uint32_t num_consumers
,
118 const hsa_agent_t
*consumers
,
119 hsa_signal_t
*signal
);
120 hsa_status_t (*hsa_memory_allocate_fn
) (hsa_region_t region
, size_t size
,
122 hsa_status_t (*hsa_memory_assign_agent_fn
) (void *ptr
, hsa_agent_t agent
,
123 hsa_access_permission_t access
);
124 hsa_status_t (*hsa_memory_copy_fn
) (void *dst
, const void *src
,
126 hsa_status_t (*hsa_memory_free_fn
) (void *ptr
);
127 hsa_status_t (*hsa_signal_destroy_fn
) (hsa_signal_t signal
);
128 hsa_status_t (*hsa_executable_get_symbol_fn
)
129 (hsa_executable_t executable
, const char *module_name
,
130 const char *symbol_name
, hsa_agent_t agent
, int32_t call_convention
,
131 hsa_executable_symbol_t
*symbol
);
132 hsa_status_t (*hsa_executable_symbol_get_info_fn
)
133 (hsa_executable_symbol_t executable_symbol
,
134 hsa_executable_symbol_info_t attribute
, void *value
);
135 void (*hsa_signal_store_relaxed_fn
) (hsa_signal_t signal
,
136 hsa_signal_value_t value
);
137 hsa_signal_value_t (*hsa_signal_wait_acquire_fn
)
138 (hsa_signal_t signal
, hsa_signal_condition_t condition
,
139 hsa_signal_value_t compare_value
, uint64_t timeout_hint
,
140 hsa_wait_state_t wait_state_hint
);
141 hsa_signal_value_t (*hsa_signal_wait_relaxed_fn
)
142 (hsa_signal_t signal
, hsa_signal_condition_t condition
,
143 hsa_signal_value_t compare_value
, uint64_t timeout_hint
,
144 hsa_wait_state_t wait_state_hint
);
145 hsa_status_t (*hsa_queue_destroy_fn
) (hsa_queue_t
*queue
);
146 hsa_status_t (*hsa_code_object_deserialize_fn
)
147 (void *serialized_code_object
, size_t serialized_code_object_size
,
148 const char *options
, hsa_code_object_t
*code_object
);
149 uint64_t (*hsa_queue_load_write_index_relaxed_fn
)
150 (const hsa_queue_t
*queue
);
151 void (*hsa_queue_store_write_index_relaxed_fn
)
152 (const hsa_queue_t
*queue
, uint64_t value
);
153 hsa_status_t (*hsa_shut_down_fn
) ();
156 /* HSA runtime functions that are initialized in init_hsa_context.
157 Code adapted from libgomp. */
159 static struct hsa_runtime_fn_info hsa_fns
;
161 #define DLSYM_FN(function) \
162 *(void**)(&hsa_fns.function##_fn) = dlsym (handle, #function); \
163 if (hsa_fns.function##_fn == NULL) \
167 init_hsa_runtime_functions (void)
169 void *handle
= dlopen (HSA_RUNTIME_LIB
, RTLD_LAZY
);
173 "The HSA runtime is required to run GCN kernels on hardware.\n"
174 "%s: File not found or could not be opened\n",
179 DLSYM_FN (hsa_status_string
)
180 DLSYM_FN (hsa_agent_get_info
)
182 DLSYM_FN (hsa_iterate_agents
)
183 DLSYM_FN (hsa_region_get_info
)
184 DLSYM_FN (hsa_queue_create
)
185 DLSYM_FN (hsa_agent_iterate_regions
)
186 DLSYM_FN (hsa_executable_destroy
)
187 DLSYM_FN (hsa_executable_create
)
188 DLSYM_FN (hsa_executable_global_variable_define
)
189 DLSYM_FN (hsa_executable_load_code_object
)
190 DLSYM_FN (hsa_executable_freeze
)
191 DLSYM_FN (hsa_signal_create
)
192 DLSYM_FN (hsa_memory_allocate
)
193 DLSYM_FN (hsa_memory_assign_agent
)
194 DLSYM_FN (hsa_memory_copy
)
195 DLSYM_FN (hsa_memory_free
)
196 DLSYM_FN (hsa_signal_destroy
)
197 DLSYM_FN (hsa_executable_get_symbol
)
198 DLSYM_FN (hsa_executable_symbol_get_info
)
199 DLSYM_FN (hsa_signal_wait_acquire
)
200 DLSYM_FN (hsa_signal_wait_relaxed
)
201 DLSYM_FN (hsa_signal_store_relaxed
)
202 DLSYM_FN (hsa_queue_destroy
)
203 DLSYM_FN (hsa_code_object_deserialize
)
204 DLSYM_FN (hsa_queue_load_write_index_relaxed
)
205 DLSYM_FN (hsa_queue_store_write_index_relaxed
)
206 DLSYM_FN (hsa_shut_down
)
211 fprintf (stderr
, "Failed to find HSA functions in " HSA_RUNTIME_LIB
"\n");
217 /* Report a fatal error STR together with the HSA error corresponding to
218 STATUS and terminate execution of the current process. */
221 hsa_fatal (const char *str
, hsa_status_t status
)
223 const char *hsa_error_msg
;
224 hsa_fns
.hsa_status_string_fn (status
, &hsa_error_msg
);
225 fprintf (stderr
, "%s: FAILED\nHSA Runtime message: %s\n", str
,
230 /* Helper macros to ensure we check the return values from the HSA Runtime.
231 These just keep the rest of the code a bit cleaner. */
233 #define XHSA_CMP(FN, CMP, MSG) \
235 hsa_status_t status = (FN); \
237 hsa_fatal ((MSG), status); \
239 fprintf (stderr, "%s: OK\n", (MSG)); \
241 #define XHSA(FN, MSG) XHSA_CMP(FN, status == HSA_STATUS_SUCCESS, MSG)
243 /* Callback of hsa_iterate_agents.
244 Called once for each available device, and returns "break" when a
245 suitable one has been found. */
248 get_gpu_agent (hsa_agent_t agent
, void *data
__attribute__ ((unused
)))
250 hsa_device_type_t device_type
;
251 XHSA (hsa_fns
.hsa_agent_get_info_fn (agent
, HSA_AGENT_INFO_DEVICE
,
255 /* Select only GPU devices. */
256 /* TODO: support selecting from multiple GPUs. */
257 if (HSA_DEVICE_TYPE_GPU
== device_type
)
260 return HSA_STATUS_INFO_BREAK
;
263 /* The device was not suitable. */
264 return HSA_STATUS_SUCCESS
;
267 /* Callback of hsa_iterate_regions.
268 Called once for each available memory region, and returns "break" when a
269 suitable one has been found. */
272 get_memory_region (hsa_region_t region
, hsa_region_t
*retval
,
273 hsa_region_global_flag_t kind
)
275 /* Reject non-global regions. */
276 hsa_region_segment_t segment
;
277 hsa_fns
.hsa_region_get_info_fn (region
, HSA_REGION_INFO_SEGMENT
, &segment
);
278 if (HSA_REGION_SEGMENT_GLOBAL
!= segment
)
279 return HSA_STATUS_SUCCESS
;
281 /* Find a region with the KERNARG flag set. */
282 hsa_region_global_flag_t flags
;
283 hsa_fns
.hsa_region_get_info_fn (region
, HSA_REGION_INFO_GLOBAL_FLAGS
,
288 return HSA_STATUS_INFO_BREAK
;
291 /* The region was not suitable. */
292 return HSA_STATUS_SUCCESS
;
296 get_kernarg_region (hsa_region_t region
, void *data
__attribute__((unused
)))
298 return get_memory_region (region
, &kernargs_region
,
299 HSA_REGION_GLOBAL_FLAG_KERNARG
);
303 get_heap_region (hsa_region_t region
, void *data
__attribute__((unused
)))
305 return get_memory_region (region
, &heap_region
,
306 HSA_REGION_GLOBAL_FLAG_COARSE_GRAINED
);
309 /* Initialize the HSA Runtime library and GPU device. */
314 /* Load the shared library and find the API functions. */
315 init_hsa_runtime_functions ();
317 /* Initialize the HSA Runtime. */
318 XHSA (hsa_fns
.hsa_init_fn (),
319 "Initialize run-time");
321 /* Select a suitable device.
322 The call-back function, get_gpu_agent, does the selection. */
323 XHSA_CMP (hsa_fns
.hsa_iterate_agents_fn (get_gpu_agent
, NULL
),
324 status
== HSA_STATUS_SUCCESS
|| status
== HSA_STATUS_INFO_BREAK
,
327 /* Initialize the queue used for launching kernels. */
328 uint32_t queue_size
= 0;
329 XHSA (hsa_fns
.hsa_agent_get_info_fn (device
, HSA_AGENT_INFO_QUEUE_MAX_SIZE
,
331 "Find max queue size");
332 XHSA (hsa_fns
.hsa_queue_create_fn (device
, queue_size
,
333 HSA_QUEUE_TYPE_SINGLE
, NULL
,
334 NULL
, UINT32_MAX
, UINT32_MAX
, &queue
),
335 "Set up a device queue");
337 /* Select a memory region for the kernel arguments.
338 The call-back function, get_kernarg_region, does the selection. */
339 XHSA_CMP (hsa_fns
.hsa_agent_iterate_regions_fn (device
, get_kernarg_region
,
341 status
== HSA_STATUS_SUCCESS
|| status
== HSA_STATUS_INFO_BREAK
,
342 "Locate kernargs memory");
344 /* Select a memory region for the kernel heap.
345 The call-back function, get_heap_region, does the selection. */
346 XHSA_CMP (hsa_fns
.hsa_agent_iterate_regions_fn (device
, get_heap_region
,
348 status
== HSA_STATUS_SUCCESS
|| status
== HSA_STATUS_INFO_BREAK
,
349 "Locate device memory");
353 /* Read a whole input file.
354 Code copied from mkoffload. */
357 read_file (const char *filename
, size_t *plen
)
359 size_t alloc
= 16384;
363 FILE *stream
= fopen (filename
, "rb");
370 if (!fseek (stream
, 0, SEEK_END
))
372 /* Get the file size. */
373 long s
= ftell (stream
);
376 fseek (stream
, 0, SEEK_SET
);
378 buffer
= malloc (alloc
);
382 size_t n
= fread (buffer
+ base
, 1, alloc
- base
- 1, stream
);
387 if (base
+ 1 == alloc
)
390 buffer
= realloc (buffer
, alloc
);
401 /* Read a HSA Code Object (HSACO) from file, and load it into the device. */
404 load_image (const char *filename
)
407 Elf64_Ehdr
*image
= (void *) read_file (filename
, &image_size
);
409 /* An "executable" consists of one or more code objects. */
410 XHSA (hsa_fns
.hsa_executable_create_fn (HSA_PROFILE_FULL
,
411 HSA_EXECUTABLE_STATE_UNFROZEN
, "",
413 "Initialize GCN executable");
415 /* Add the HSACO to the executable. */
416 hsa_code_object_t co
= { 0 };
417 XHSA (hsa_fns
.hsa_code_object_deserialize_fn (image
, image_size
, NULL
, &co
),
418 "Deserialize GCN code object");
419 XHSA (hsa_fns
.hsa_executable_load_code_object_fn (executable
, device
, co
,
421 "Load GCN code object");
423 /* We're done modifying he executable. */
424 XHSA (hsa_fns
.hsa_executable_freeze_fn (executable
, ""),
425 "Freeze GCN executable");
427 /* Locate the "_init_array" function, and read the kernel's properties. */
428 hsa_executable_symbol_t symbol
;
429 XHSA (hsa_fns
.hsa_executable_get_symbol_fn (executable
, NULL
,
430 "_init_array.kd", device
, 0,
432 "Find '_init_array' function");
433 XHSA (hsa_fns
.hsa_executable_symbol_get_info_fn
434 (symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT
,
436 "Extract '_init_array' kernel object kernel object");
438 /* Locate the "_fini_array" function, and read the kernel's properties. */
439 XHSA (hsa_fns
.hsa_executable_get_symbol_fn (executable
, NULL
,
440 "_fini_array.kd", device
, 0,
442 "Find '_fini_array' function");
443 XHSA (hsa_fns
.hsa_executable_symbol_get_info_fn
444 (symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT
,
446 "Extract '_fini_array' kernel object kernel object");
448 /* Locate the "main" function, and read the kernel's properties. */
449 XHSA (hsa_fns
.hsa_executable_get_symbol_fn (executable
, NULL
, "main.kd",
451 "Find 'main' function");
452 XHSA (hsa_fns
.hsa_executable_symbol_get_info_fn
453 (symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_OBJECT
, &main_kernel
),
454 "Extract 'main' kernel object");
455 XHSA (hsa_fns
.hsa_executable_symbol_get_info_fn
456 (symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE
,
457 &kernarg_segment_size
),
458 "Extract kernarg segment size");
459 XHSA (hsa_fns
.hsa_executable_symbol_get_info_fn
460 (symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE
,
461 &group_segment_size
),
462 "Extract group segment size");
463 XHSA (hsa_fns
.hsa_executable_symbol_get_info_fn
464 (symbol
, HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE
,
465 &private_segment_size
),
466 "Extract private segment size");
469 /* Allocate some device memory from the kernargs region.
470 The returned address will be 32-bit (with excess zeroed on 64-bit host),
471 and accessible via the same address on both host and target (via
472 __flat_scalar GCN address space). */
475 device_malloc (size_t size
, hsa_region_t region
)
478 XHSA (hsa_fns
.hsa_memory_allocate_fn (region
, size
, &result
),
479 "Allocate device memory");
483 /* These are the device pointers that will be transferred to the target.
484 The HSA Runtime points the kernargs register here.
485 They correspond to function signature:
486 int main (int argc, char *argv[], int *return_value)
487 The compiler expects this, for kernel functions, and will
488 automatically assign the exit value to *return_value. */
496 struct kernargs_abi abi
;
498 struct output output_data
;
501 /* Print any console output from the kernel.
502 We print all entries from "consumed" to the next entry without a "written"
503 flag, or "next_output" is reached. The buffer is circular, but the
504 indices are absolute. It is assumed the kernel will stop writing data
505 if "next_output" wraps (becomes smaller than "consumed"). */
507 gomp_print_output (struct kernargs
*kernargs
, bool final
)
509 unsigned int limit
= (sizeof (kernargs
->output_data
.queue
)
510 / sizeof (kernargs
->output_data
.queue
[0]));
512 unsigned int from
= __atomic_load_n (&kernargs
->output_data
.consumed
,
514 unsigned int to
= kernargs
->output_data
.next_output
;
520 printf ("GCN print buffer overflowed.\n");
525 for (i
= from
; i
< to
; i
++)
527 struct printf_data
*data
= &kernargs
->output_data
.queue
[i
%limit
];
529 if (!data
->written
&& !final
)
535 printf ("%.128s%ld\n", data
->msg
, data
->ivalue
);
538 printf ("%.128s%f\n", data
->msg
, data
->dvalue
);
541 printf ("%.128s%.128s\n", data
->msg
, data
->text
);
544 printf ("%.128s%.128s", data
->msg
, data
->text
);
547 printf ("GCN print buffer error!\n");
552 __atomic_store_n (&kernargs
->output_data
.consumed
, i
+1,
558 /* Execute an already-loaded kernel on the device. */
561 run (uint64_t kernel
, void *kernargs
)
563 /* A "signal" is used to launch and monitor the kernel. */
565 XHSA (hsa_fns
.hsa_signal_create_fn (1, 0, NULL
, &signal
),
568 /* Configure for a single-worker kernel. */
569 uint64_t index
= hsa_fns
.hsa_queue_load_write_index_relaxed_fn (queue
);
570 const uint32_t queueMask
= queue
->size
- 1;
571 hsa_kernel_dispatch_packet_t
*dispatch_packet
=
572 &(((hsa_kernel_dispatch_packet_t
*) (queue
->base_address
))[index
&
574 dispatch_packet
->setup
|= 3 << HSA_KERNEL_DISPATCH_PACKET_SETUP_DIMENSIONS
;
575 dispatch_packet
->workgroup_size_x
= (uint16_t) 1;
576 dispatch_packet
->workgroup_size_y
= (uint16_t) 64;
577 dispatch_packet
->workgroup_size_z
= (uint16_t) 1;
578 dispatch_packet
->grid_size_x
= 1;
579 dispatch_packet
->grid_size_y
= 64;
580 dispatch_packet
->grid_size_z
= 1;
581 dispatch_packet
->completion_signal
= signal
;
582 dispatch_packet
->kernel_object
= kernel
;
583 dispatch_packet
->kernarg_address
= (void *) kernargs
;
584 dispatch_packet
->private_segment_size
= private_segment_size
;
585 dispatch_packet
->group_segment_size
= group_segment_size
;
588 header
|= HSA_FENCE_SCOPE_SYSTEM
<< HSA_PACKET_HEADER_ACQUIRE_FENCE_SCOPE
;
589 header
|= HSA_FENCE_SCOPE_SYSTEM
<< HSA_PACKET_HEADER_RELEASE_FENCE_SCOPE
;
590 header
|= HSA_PACKET_TYPE_KERNEL_DISPATCH
<< HSA_PACKET_HEADER_TYPE
;
592 __atomic_store_n ((uint32_t *) dispatch_packet
,
593 header
| (dispatch_packet
->setup
<< 16),
597 fprintf (stderr
, "Launch kernel\n");
599 hsa_fns
.hsa_queue_store_write_index_relaxed_fn (queue
, index
+ 1);
600 hsa_fns
.hsa_signal_store_relaxed_fn (queue
->doorbell_signal
, index
);
601 /* Kernel running ...... */
602 while (hsa_fns
.hsa_signal_wait_relaxed_fn (signal
, HSA_SIGNAL_CONDITION_LT
,
604 HSA_WAIT_STATE_ACTIVE
) != 0)
607 gomp_print_output (kernargs
, false);
610 gomp_print_output (kernargs
, true);
613 fprintf (stderr
, "Kernel exited\n");
615 XHSA (hsa_fns
.hsa_signal_destroy_fn (signal
),
620 main (int argc
, char *argv
[])
623 for (int i
= 1; i
< argc
; i
++)
625 if (!strcmp (argv
[i
], "--help"))
630 else if (!strcmp (argv
[i
], "--version"))
635 else if (!strcmp (argv
[i
], "--debug"))
637 else if (argv
[i
][0] == '-')
651 /* No kernel arguments were found. */
656 /* The remaining arguments are for the GCN kernel. */
657 int kernel_argc
= argc
- kernel_arg
;
658 char **kernel_argv
= &argv
[kernel_arg
];
661 load_image (kernel_argv
[0]);
663 /* Calculate size of function parameters + argv data. */
664 size_t args_size
= 0;
665 for (int i
= 0; i
< kernel_argc
; i
++)
666 args_size
+= strlen (kernel_argv
[i
]) + 1;
668 /* The device stack can be adjusted via an environment variable. */
669 char *envvar
= getenv ("GCN_STACK_SIZE");
670 int stack_size
= 1 * 1024 * 1024; /* 1MB default. */
673 int val
= atoi (envvar
);
678 /* Allocate device memory for both function parameters and the argv
680 struct kernargs
*kernargs
= device_malloc (sizeof (*kernargs
),
684 int64_t argv_data
[kernel_argc
];
685 char strings
[args_size
];
686 } *args
= device_malloc (sizeof (struct argdata
), kernargs_region
);
688 size_t heap_size
= 10 * 1024 * 1024; /* 10MB. */
689 struct heap
*heap
= device_malloc (heap_size
, heap_region
);
690 XHSA (hsa_fns
.hsa_memory_assign_agent_fn (heap
, device
,
691 HSA_ACCESS_PERMISSION_RW
),
692 "Assign heap to device agent");
693 void *stack
= device_malloc (stack_size
, heap_region
);
695 /* Write the data to the target. */
696 kernargs
->args
.argc
= kernel_argc
;
697 kernargs
->args
.argv
= (int64_t) args
->argv_data
;
698 kernargs
->abi
.out_ptr
= (int64_t) &kernargs
->output_data
;
699 kernargs
->output_data
.return_value
= 0xcafe0000; /* Default return value. */
700 kernargs
->output_data
.next_output
= 0;
701 for (unsigned i
= 0; i
< (sizeof (kernargs
->output_data
.queue
)
702 / sizeof (kernargs
->output_data
.queue
[0])); i
++)
703 kernargs
->output_data
.queue
[i
].written
= 0;
704 kernargs
->output_data
.consumed
= 0;
706 for (int i
= 0; i
< kernel_argc
; i
++)
708 size_t arg_len
= strlen (kernel_argv
[i
]) + 1;
709 args
->argv_data
[i
] = (int64_t) &args
->strings
[offset
];
710 memcpy (&args
->strings
[offset
], kernel_argv
[i
], arg_len
+ 1);
713 kernargs
->abi
.heap_ptr
= (int64_t) heap
;
714 hsa_fns
.hsa_memory_copy_fn (&heap
->size
, &heap_size
, sizeof (heap_size
));
715 kernargs
->abi
.arena_ptr
= 0;
716 kernargs
->abi
.stack_ptr
= (int64_t) stack
;
717 kernargs
->abi
.stack_size_per_thread
= stack_size
;
719 /* Run constructors on the GPU. */
720 run (init_array_kernel
, kernargs
);
722 /* Run the kernel on the GPU. */
723 run (main_kernel
, kernargs
);
724 unsigned int return_value
=
725 (unsigned int) kernargs
->output_data
.return_value
;
727 /* Run destructors on the GPU. */
728 run (fini_array_kernel
, kernargs
);
730 unsigned int upper
= (return_value
& ~0xffff) >> 16;
733 printf ("Kernel exit value was never set\n");
736 else if (upper
== 0xffff)
739 ; /* Set by return from main. */
741 printf ("Possible kernel exit value corruption, 2 most significant bytes "
742 "aren't 0xffff, 0xcafe, or 0: 0x%x\n", return_value
);
746 unsigned int signal
= (return_value
>> 8) & 0xff;
747 if (signal
== SIGABRT
)
748 printf ("Kernel aborted\n");
749 else if (signal
!= 0)
750 printf ("Kernel received unkown signal\n");
754 printf ("Kernel exit value: %d\n", return_value
& 0xff);
756 /* Clean shut down. */
757 XHSA (hsa_fns
.hsa_memory_free_fn (kernargs
),
758 "Clean up device kernargs memory");
759 XHSA (hsa_fns
.hsa_memory_free_fn (args
),
760 "Clean up device args memory");
761 XHSA (hsa_fns
.hsa_memory_free_fn (heap
),
762 "Clean up device heap memory");
763 XHSA (hsa_fns
.hsa_memory_free_fn (stack
),
764 "Clean up device stack memory");
765 XHSA (hsa_fns
.hsa_executable_destroy_fn (executable
),
766 "Clean up GCN executable");
767 XHSA (hsa_fns
.hsa_queue_destroy_fn (queue
),
768 "Clean up device queue");
769 XHSA (hsa_fns
.hsa_shut_down_fn (),
770 "Shut down run-time");
772 return return_value
& 0xff;