1 //===-- Generic device loader interface -----------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H
10 #define LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H
12 #include "utils/gpu/server/rpc_server.h"
14 #include "include/llvm-libc-types/test_rpc_opcodes_t.h"
22 /// Generic launch parameters for configuration the number of blocks / threads.
23 struct LaunchParameters
{
24 uint32_t num_threads_x
;
25 uint32_t num_threads_y
;
26 uint32_t num_threads_z
;
27 uint32_t num_blocks_x
;
28 uint32_t num_blocks_y
;
29 uint32_t num_blocks_z
;
32 /// The arguments to the '_begin' kernel.
39 /// The arguments to the '_start' kernel.
47 /// The arguments to the '_end' kernel.
52 /// Generic interface to load the \p image and launch execution of the _start
53 /// kernel on the target device. Copies \p argc and \p argv to the device.
54 /// Returns the final value of the `main` function on the device.
55 int load(int argc
, char **argv
, char **evnp
, void *image
, size_t size
,
56 const LaunchParameters
¶ms
);
58 /// Return \p V aligned "upwards" according to \p Align.
59 template <typename V
, typename A
> inline V
align_up(V val
, A align
) {
60 return ((val
+ V(align
) - 1) / V(align
)) * V(align
);
63 /// Copy the system's argument vector to GPU memory allocated using \p alloc.
64 template <typename Allocator
>
65 void *copy_argument_vector(int argc
, char **argv
, Allocator alloc
) {
66 size_t argv_size
= sizeof(char *) * (argc
+ 1);
68 for (int i
= 0; i
< argc
; ++i
)
69 str_size
+= strlen(argv
[i
]) + 1;
71 // We allocate enough space for a null terminated array and all the strings.
72 void *dev_argv
= alloc(argv_size
+ str_size
);
76 // Store the strings linerally in the same memory buffer.
77 void *dev_str
= reinterpret_cast<uint8_t *>(dev_argv
) + argv_size
;
78 for (int i
= 0; i
< argc
; ++i
) {
79 size_t size
= strlen(argv
[i
]) + 1;
80 std::memcpy(dev_str
, argv
[i
], size
);
81 static_cast<void **>(dev_argv
)[i
] = dev_str
;
82 dev_str
= reinterpret_cast<uint8_t *>(dev_str
) + size
;
85 // Ensure the vector is null terminated.
86 reinterpret_cast<void **>(dev_argv
)[argv_size
] = nullptr;
90 /// Copy the system's environment to GPU memory allocated using \p alloc.
91 template <typename Allocator
>
92 void *copy_environment(char **envp
, Allocator alloc
) {
94 for (char **env
= envp
; *env
!= 0; ++env
)
97 return copy_argument_vector(envc
, envp
, alloc
);
100 inline void handle_error(const char *msg
) {
101 fprintf(stderr
, "%s\n", msg
);
105 inline void handle_error(rpc_status_t
) {
106 handle_error("Failure in the RPC server\n");
109 template <uint32_t lane_size
>
110 inline void register_rpc_callbacks(uint32_t device_id
) {
111 static_assert(lane_size
== 32 || lane_size
== 64, "Invalid Lane size");
112 // Register the ping test for the `libc` tests.
113 rpc_register_callback(
114 device_id
, static_cast<rpc_opcode_t
>(RPC_TEST_INCREMENT
),
115 [](rpc_port_t port
, void *data
) {
118 [](rpc_buffer_t
*buffer
, void *data
) {
119 reinterpret_cast<uint64_t *>(buffer
->data
)[0] += 1;
125 // Register the interface test callbacks.
126 rpc_register_callback(
127 device_id
, static_cast<rpc_opcode_t
>(RPC_TEST_INTERFACE
),
128 [](rpc_port_t port
, void *data
) {
133 [](rpc_buffer_t
*buffer
, void *data
) {
134 *reinterpret_cast<bool *>(data
) = buffer
->data
[0];
139 [](rpc_buffer_t
*buffer
, void *data
) {
140 *reinterpret_cast<uint64_t *>(data
) = buffer
->data
[0];
145 [](rpc_buffer_t
*buffer
, void *data
) {
146 uint64_t &cnt
= *reinterpret_cast<uint64_t *>(data
);
147 buffer
->data
[0] = cnt
= cnt
+ 1;
152 [](rpc_buffer_t
*buffer
, void *data
) {
153 *reinterpret_cast<uint64_t *>(data
) = buffer
->data
[0];
158 [](rpc_buffer_t
*buffer
, void *data
) {
159 uint64_t &cnt
= *reinterpret_cast<uint64_t *>(data
);
160 buffer
->data
[0] = cnt
= cnt
+ 1;
165 [](rpc_buffer_t
*buffer
, void *data
) {
166 *reinterpret_cast<uint64_t *>(data
) = buffer
->data
[0];
171 [](rpc_buffer_t
*buffer
, void *data
) {
172 *reinterpret_cast<uint64_t *>(data
) = buffer
->data
[0];
177 [](rpc_buffer_t
*buffer
, void *data
) {
178 uint64_t &cnt
= *reinterpret_cast<uint64_t *>(data
);
179 buffer
->data
[0] = cnt
= cnt
+ 1;
184 [](rpc_buffer_t
*buffer
, void *data
) {
185 uint64_t &cnt
= *reinterpret_cast<uint64_t *>(data
);
186 buffer
->data
[0] = cnt
= cnt
+ 1;
192 [](rpc_buffer_t
*buffer
, void *data
) {
193 *reinterpret_cast<uint64_t *>(data
) = buffer
->data
[0];
199 [](rpc_buffer_t
*buffer
, void *data
) {
200 uint64_t &cnt
= *reinterpret_cast<uint64_t *>(data
);
201 buffer
->data
[0] = cnt
= cnt
+ 1;
207 // Register the stream test handler.
208 rpc_register_callback(
209 device_id
, static_cast<rpc_opcode_t
>(RPC_TEST_STREAM
),
210 [](rpc_port_t port
, void *data
) {
211 uint64_t sizes
[lane_size
] = {0};
212 void *dst
[lane_size
] = {nullptr};
215 [](uint64_t size
, void *) -> void * { return new char[size
]; },
217 rpc_send_n(port
, dst
, sizes
);
218 for (uint64_t i
= 0; i
< lane_size
; ++i
) {
220 delete[] reinterpret_cast<uint8_t *>(dst
[i
]);