Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / libc / utils / gpu / loader / Loader.h
blobd2b2ee5baebedb6dcd892c120d6542eb3fa31d1f
1 //===-- Generic device loader interface -----------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H
10 #define LLVM_LIBC_UTILS_GPU_LOADER_LOADER_H
12 #include "utils/gpu/server/rpc_server.h"
14 #include "include/llvm-libc-types/test_rpc_opcodes_t.h"
16 #include <cstddef>
17 #include <cstdint>
18 #include <cstdio>
19 #include <cstdlib>
20 #include <cstring>
22 /// Generic launch parameters for configuration the number of blocks / threads.
23 struct LaunchParameters {
24 uint32_t num_threads_x;
25 uint32_t num_threads_y;
26 uint32_t num_threads_z;
27 uint32_t num_blocks_x;
28 uint32_t num_blocks_y;
29 uint32_t num_blocks_z;
32 /// The arguments to the '_begin' kernel.
33 struct begin_args_t {
34 int argc;
35 void *argv;
36 void *envp;
39 /// The arguments to the '_start' kernel.
40 struct start_args_t {
41 int argc;
42 void *argv;
43 void *envp;
44 void *ret;
47 /// The arguments to the '_end' kernel.
48 struct end_args_t {
49 int argc;
52 /// Generic interface to load the \p image and launch execution of the _start
53 /// kernel on the target device. Copies \p argc and \p argv to the device.
54 /// Returns the final value of the `main` function on the device.
55 int load(int argc, char **argv, char **evnp, void *image, size_t size,
56 const LaunchParameters &params);
58 /// Return \p V aligned "upwards" according to \p Align.
59 template <typename V, typename A> inline V align_up(V val, A align) {
60 return ((val + V(align) - 1) / V(align)) * V(align);
63 /// Copy the system's argument vector to GPU memory allocated using \p alloc.
64 template <typename Allocator>
65 void *copy_argument_vector(int argc, char **argv, Allocator alloc) {
66 size_t argv_size = sizeof(char *) * (argc + 1);
67 size_t str_size = 0;
68 for (int i = 0; i < argc; ++i)
69 str_size += strlen(argv[i]) + 1;
71 // We allocate enough space for a null terminated array and all the strings.
72 void *dev_argv = alloc(argv_size + str_size);
73 if (!dev_argv)
74 return nullptr;
76 // Store the strings linerally in the same memory buffer.
77 void *dev_str = reinterpret_cast<uint8_t *>(dev_argv) + argv_size;
78 for (int i = 0; i < argc; ++i) {
79 size_t size = strlen(argv[i]) + 1;
80 std::memcpy(dev_str, argv[i], size);
81 static_cast<void **>(dev_argv)[i] = dev_str;
82 dev_str = reinterpret_cast<uint8_t *>(dev_str) + size;
85 // Ensure the vector is null terminated.
86 reinterpret_cast<void **>(dev_argv)[argv_size] = nullptr;
87 return dev_argv;
90 /// Copy the system's environment to GPU memory allocated using \p alloc.
91 template <typename Allocator>
92 void *copy_environment(char **envp, Allocator alloc) {
93 int envc = 0;
94 for (char **env = envp; *env != 0; ++env)
95 ++envc;
97 return copy_argument_vector(envc, envp, alloc);
100 inline void handle_error(const char *msg) {
101 fprintf(stderr, "%s\n", msg);
102 exit(EXIT_FAILURE);
105 inline void handle_error(rpc_status_t) {
106 handle_error("Failure in the RPC server\n");
109 template <uint32_t lane_size>
110 inline void register_rpc_callbacks(uint32_t device_id) {
111 static_assert(lane_size == 32 || lane_size == 64, "Invalid Lane size");
112 // Register the ping test for the `libc` tests.
113 rpc_register_callback(
114 device_id, static_cast<rpc_opcode_t>(RPC_TEST_INCREMENT),
115 [](rpc_port_t port, void *data) {
116 rpc_recv_and_send(
117 port,
118 [](rpc_buffer_t *buffer, void *data) {
119 reinterpret_cast<uint64_t *>(buffer->data)[0] += 1;
121 data);
123 nullptr);
125 // Register the interface test callbacks.
126 rpc_register_callback(
127 device_id, static_cast<rpc_opcode_t>(RPC_TEST_INTERFACE),
128 [](rpc_port_t port, void *data) {
129 uint64_t cnt = 0;
130 bool end_with_recv;
131 rpc_recv(
132 port,
133 [](rpc_buffer_t *buffer, void *data) {
134 *reinterpret_cast<bool *>(data) = buffer->data[0];
136 &end_with_recv);
137 rpc_recv(
138 port,
139 [](rpc_buffer_t *buffer, void *data) {
140 *reinterpret_cast<uint64_t *>(data) = buffer->data[0];
142 &cnt);
143 rpc_send(
144 port,
145 [](rpc_buffer_t *buffer, void *data) {
146 uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
147 buffer->data[0] = cnt = cnt + 1;
149 &cnt);
150 rpc_recv(
151 port,
152 [](rpc_buffer_t *buffer, void *data) {
153 *reinterpret_cast<uint64_t *>(data) = buffer->data[0];
155 &cnt);
156 rpc_send(
157 port,
158 [](rpc_buffer_t *buffer, void *data) {
159 uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
160 buffer->data[0] = cnt = cnt + 1;
162 &cnt);
163 rpc_recv(
164 port,
165 [](rpc_buffer_t *buffer, void *data) {
166 *reinterpret_cast<uint64_t *>(data) = buffer->data[0];
168 &cnt);
169 rpc_recv(
170 port,
171 [](rpc_buffer_t *buffer, void *data) {
172 *reinterpret_cast<uint64_t *>(data) = buffer->data[0];
174 &cnt);
175 rpc_send(
176 port,
177 [](rpc_buffer_t *buffer, void *data) {
178 uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
179 buffer->data[0] = cnt = cnt + 1;
181 &cnt);
182 rpc_send(
183 port,
184 [](rpc_buffer_t *buffer, void *data) {
185 uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
186 buffer->data[0] = cnt = cnt + 1;
188 &cnt);
189 if (end_with_recv)
190 rpc_recv(
191 port,
192 [](rpc_buffer_t *buffer, void *data) {
193 *reinterpret_cast<uint64_t *>(data) = buffer->data[0];
195 &cnt);
196 else
197 rpc_send(
198 port,
199 [](rpc_buffer_t *buffer, void *data) {
200 uint64_t &cnt = *reinterpret_cast<uint64_t *>(data);
201 buffer->data[0] = cnt = cnt + 1;
203 &cnt);
205 nullptr);
207 // Register the stream test handler.
208 rpc_register_callback(
209 device_id, static_cast<rpc_opcode_t>(RPC_TEST_STREAM),
210 [](rpc_port_t port, void *data) {
211 uint64_t sizes[lane_size] = {0};
212 void *dst[lane_size] = {nullptr};
213 rpc_recv_n(
214 port, dst, sizes,
215 [](uint64_t size, void *) -> void * { return new char[size]; },
216 nullptr);
217 rpc_send_n(port, dst, sizes);
218 for (uint64_t i = 0; i < lane_size; ++i) {
219 if (dst[i])
220 delete[] reinterpret_cast<uint8_t *>(dst[i]);
223 nullptr);
226 #endif