1 //===-- Generic device loader interface -----------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H
10 #define LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H
12 #include "include/llvm-libc-types/test_rpc_opcodes_t.h"
14 #include "shared/rpc.h"
15 #include "shared/rpc_opcodes.h"
23 /// Generic launch parameters for configuration the number of blocks / threads.
24 struct LaunchParameters
{
25 uint32_t num_threads_x
;
26 uint32_t num_threads_y
;
27 uint32_t num_threads_z
;
28 uint32_t num_blocks_x
;
29 uint32_t num_blocks_y
;
30 uint32_t num_blocks_z
;
33 /// The arguments to the '_begin' kernel.
40 /// The arguments to the '_start' kernel.
48 /// The arguments to the '_end' kernel.
53 /// Generic interface to load the \p image and launch execution of the _start
54 /// kernel on the target device. Copies \p argc and \p argv to the device.
55 /// Returns the final value of the `main` function on the device.
56 int load(int argc
, const char **argv
, const char **evnp
, void *image
,
57 size_t size
, const LaunchParameters
¶ms
,
58 bool print_resource_usage
);
60 /// Return \p V aligned "upwards" according to \p Align.
61 template <typename V
, typename A
> inline V
align_up(V val
, A align
) {
62 return ((val
+ V(align
) - 1) / V(align
)) * V(align
);
65 /// Copy the system's argument vector to GPU memory allocated using \p alloc.
66 template <typename Allocator
>
67 void *copy_argument_vector(int argc
, const char **argv
, Allocator alloc
) {
68 size_t argv_size
= sizeof(char *) * (argc
+ 1);
70 for (int i
= 0; i
< argc
; ++i
)
71 str_size
+= strlen(argv
[i
]) + 1;
73 // We allocate enough space for a null terminated array and all the strings.
74 void *dev_argv
= alloc(argv_size
+ str_size
);
78 // Store the strings linerally in the same memory buffer.
79 void *dev_str
= reinterpret_cast<uint8_t *>(dev_argv
) + argv_size
;
80 for (int i
= 0; i
< argc
; ++i
) {
81 size_t size
= strlen(argv
[i
]) + 1;
82 std::memcpy(dev_str
, argv
[i
], size
);
83 static_cast<void **>(dev_argv
)[i
] = dev_str
;
84 dev_str
= reinterpret_cast<uint8_t *>(dev_str
) + size
;
87 // Ensure the vector is null terminated.
88 reinterpret_cast<void **>(dev_argv
)[argc
] = nullptr;
92 /// Copy the system's environment to GPU memory allocated using \p alloc.
93 template <typename Allocator
>
94 void *copy_environment(const char **envp
, Allocator alloc
) {
96 for (const char **env
= envp
; *env
!= 0; ++env
)
99 return copy_argument_vector(envc
, envp
, alloc
);
102 inline void handle_error_impl(const char *file
, int32_t line
, const char *msg
) {
103 fprintf(stderr
, "%s:%d:0: Error: %s\n", file
, line
, msg
);
106 #define handle_error(X) handle_error_impl(__FILE__, __LINE__, X)
108 template <uint32_t num_lanes
, typename Alloc
, typename Free
>
109 inline uint32_t handle_server(rpc::Server
&server
, uint32_t index
,
110 Alloc
&&alloc
, Free
&&free
) {
111 auto port
= server
.try_open(num_lanes
, index
);
114 index
= port
->get_index() + 1;
116 int status
= rpc::RPC_SUCCESS
;
117 switch (port
->get_opcode()) {
118 case RPC_TEST_INCREMENT
: {
119 port
->recv_and_send([](rpc::Buffer
*buffer
, uint32_t) {
120 reinterpret_cast<uint64_t *>(buffer
->data
)[0] += 1;
124 case RPC_TEST_INTERFACE
: {
127 port
->recv([&](rpc::Buffer
*buffer
, uint32_t) {
128 end_with_recv
= buffer
->data
[0];
130 port
->recv([&](rpc::Buffer
*buffer
, uint32_t) { cnt
= buffer
->data
[0]; });
131 port
->send([&](rpc::Buffer
*buffer
, uint32_t) {
132 buffer
->data
[0] = cnt
= cnt
+ 1;
134 port
->recv([&](rpc::Buffer
*buffer
, uint32_t) { cnt
= buffer
->data
[0]; });
135 port
->send([&](rpc::Buffer
*buffer
, uint32_t) {
136 buffer
->data
[0] = cnt
= cnt
+ 1;
138 port
->recv([&](rpc::Buffer
*buffer
, uint32_t) { cnt
= buffer
->data
[0]; });
139 port
->recv([&](rpc::Buffer
*buffer
, uint32_t) { cnt
= buffer
->data
[0]; });
140 port
->send([&](rpc::Buffer
*buffer
, uint32_t) {
141 buffer
->data
[0] = cnt
= cnt
+ 1;
143 port
->send([&](rpc::Buffer
*buffer
, uint32_t) {
144 buffer
->data
[0] = cnt
= cnt
+ 1;
147 port
->recv([&](rpc::Buffer
*buffer
, uint32_t) { cnt
= buffer
->data
[0]; });
149 port
->send([&](rpc::Buffer
*buffer
, uint32_t) {
150 buffer
->data
[0] = cnt
= cnt
+ 1;
155 case RPC_TEST_STREAM
: {
156 uint64_t sizes
[num_lanes
] = {0};
157 void *dst
[num_lanes
] = {nullptr};
158 port
->recv_n(dst
, sizes
,
159 [](uint64_t size
) -> void * { return new char[size
]; });
160 port
->send_n(dst
, sizes
);
161 for (uint64_t i
= 0; i
< num_lanes
; ++i
) {
163 delete[] reinterpret_cast<uint8_t *>(dst
[i
]);
167 case RPC_TEST_NOOP
: {
168 port
->recv([&](rpc::Buffer
*, uint32_t) {});
172 port
->recv_and_send([&](rpc::Buffer
*buffer
, uint32_t) {
173 buffer
->data
[0] = reinterpret_cast<uintptr_t>(alloc(buffer
->data
[0]));
178 port
->recv([&](rpc::Buffer
*buffer
, uint32_t) {
179 free(reinterpret_cast<void *>(buffer
->data
[0]));
184 status
= handle_libc_opcodes(*port
, num_lanes
);
188 // Handle all of the `libc` specific opcodes.
189 if (status
!= rpc::RPC_SUCCESS
)
190 handle_error("Error handling RPC server");