Revert "[clang-repl] Implement partial translation units and error recovery."
[llvm-project.git] / parallel-libs / acxxel / opencl_acxxel.cpp
bloba710b5713595471918850d29f4de1baa43735b5b
1 //===--- opencl_acxxel.cpp - OpenCL implementation of the Acxxel API ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// This file defines the standard OpenCL implementation of the Acxxel API.
10 ///
11 //===----------------------------------------------------------------------===//
13 #include "acxxel.h"
15 #include "CL/cl.h"
17 #include <mutex>
18 #include <sstream>
19 #include <utility>
20 #include <vector>
22 namespace acxxel {
24 namespace {
26 /// An ID containing the platform ID and the device ID within the platform.
27 struct FullDeviceID {
28 cl_platform_id PlatformID;
29 cl_device_id DeviceID;
31 FullDeviceID(cl_platform_id PlatformID, cl_device_id DeviceID)
32 : PlatformID(PlatformID), DeviceID(DeviceID) {}
35 static std::string getOpenCLErrorMessage(cl_int Result) {
36 if (!Result)
37 return "success";
38 std::ostringstream OutStream;
39 OutStream << "OpenCL error: code = " << Result;
40 return OutStream.str();
43 static Status getOpenCLError(cl_int Result, const std::string &Message) {
44 if (!Result)
45 return Status();
46 std::ostringstream OutStream;
47 OutStream << getOpenCLErrorMessage(Result) << ", message = " << Message;
48 return Status(OutStream.str());
51 static void logOpenCLWarning(cl_int Result, const std::string &Message) {
52 if (Result) {
53 std::ostringstream OutStream;
54 OutStream << Message << ": " << getOpenCLErrorMessage(Result);
55 logWarning(OutStream.str());
59 class OpenCLPlatform : public Platform {
60 public:
61 ~OpenCLPlatform() override = default;
63 static Expected<OpenCLPlatform> create();
65 Expected<int> getDeviceCount() override;
67 Expected<Stream> createStream(int DeviceIndex) override;
69 Expected<Event> createEvent(int DeviceIndex) override;
71 Expected<Program> createProgramFromSource(Span<const char> Source,
72 int DeviceIndex) override;
74 protected:
75 Status streamSync(void *Stream) override;
77 Status streamWaitOnEvent(void *Stream, void *Event) override;
79 Expected<void *> rawMallocD(ptrdiff_t ByteCount, int DeviceIndex) override;
80 HandleDestructor getDeviceMemoryHandleDestructor() override;
81 void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize,
82 size_t ByteOffset) override;
83 void rawDestroyDeviceMemorySpanHandle(void *Handle) override;
85 Expected<void *> rawGetDeviceSymbolAddress(const void *Symbol,
86 int DeviceIndex) override;
87 Expected<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol,
88 int DeviceIndex) override;
90 Status rawRegisterHostMem(const void *Memory, ptrdiff_t ByteCount) override;
91 HandleDestructor getUnregisterHostMemoryHandleDestructor() override;
93 Expected<void *> rawMallocRegisteredH(ptrdiff_t ByteCount) override;
94 HandleDestructor getFreeHostMemoryHandleDestructor() override;
96 Status asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
97 void *DeviceDst, ptrdiff_t DeviceDstByteOffset,
98 ptrdiff_t ByteCount, void *Stream) override;
99 Status asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
100 void *HostDst, ptrdiff_t ByteCount,
101 void *Stream) override;
102 Status asyncCopyHToD(const void *HostSrc, void *DeviceDst,
103 ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount,
104 void *Stream) override;
106 Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
107 ptrdiff_t ByteCount, char ByteValue,
108 void *Stream) override;
110 Status addStreamCallback(Stream &Stream, StreamCallback Callback) override;
112 Status enqueueEvent(void *Event, void *Stream) override;
113 bool eventIsDone(void *Event) override;
114 Status eventSync(void *Event) override;
115 Expected<float> getSecondsBetweenEvents(void *StartEvent,
116 void *EndEvent) override;
118 Expected<void *> rawCreateKernel(void *Program,
119 const std::string &Name) override;
120 HandleDestructor getKernelHandleDestructor() override;
122 Status rawEnqueueKernelLaunch(void *Stream, void *Kernel,
123 KernelLaunchDimensions LaunchDimensions,
124 Span<void *> Arguments,
125 Span<size_t> ArgumentSizes,
126 size_t SharedMemoryBytes) override;
128 private:
129 OpenCLPlatform(std::vector<FullDeviceID> &&FullDeviceIDs,
130 std::vector<cl_context> &&Contexts,
131 std::vector<cl_command_queue> &&CommandQueues)
132 : FullDeviceIDs(std::move(FullDeviceIDs)), Contexts(std::move(Contexts)),
133 CommandQueues(std::move(CommandQueues)) {}
135 std::vector<FullDeviceID> FullDeviceIDs;
136 std::vector<cl_context> Contexts;
137 std::vector<cl_command_queue> CommandQueues;
140 Expected<OpenCLPlatform> OpenCLPlatform::create() {
141 constexpr cl_uint MaxNumEntries = 100;
142 cl_platform_id Platforms[MaxNumEntries];
143 cl_uint NumPlatforms;
144 if (cl_int Result = clGetPlatformIDs(MaxNumEntries, Platforms, &NumPlatforms))
145 return getOpenCLError(Result, "clGetPlatformIDs");
147 std::vector<FullDeviceID> FullDeviceIDs;
148 for (cl_uint PlatformIndex = 0; PlatformIndex < NumPlatforms;
149 ++PlatformIndex) {
150 cl_uint NumDevices;
151 cl_device_id Devices[MaxNumEntries];
152 if (cl_int Result =
153 clGetDeviceIDs(Platforms[PlatformIndex], CL_DEVICE_TYPE_ALL,
154 MaxNumEntries, Devices, &NumDevices))
155 return getOpenCLError(Result, "clGetDeviceIDs");
156 for (cl_uint DeviceIndex = 0; DeviceIndex < NumDevices; ++DeviceIndex)
157 FullDeviceIDs.emplace_back(Platforms[PlatformIndex],
158 Devices[DeviceIndex]);
161 if (FullDeviceIDs.empty())
162 return Status("No OpenCL device available on this system.");
164 std::vector<cl_context> Contexts(FullDeviceIDs.size());
165 std::vector<cl_command_queue> CommandQueues(FullDeviceIDs.size());
166 for (size_t I = 0; I < FullDeviceIDs.size(); ++I) {
167 cl_int CreateContextResult;
168 Contexts[I] = clCreateContext(nullptr, 1, &FullDeviceIDs[I].DeviceID,
169 nullptr, nullptr, &CreateContextResult);
170 if (CreateContextResult)
171 return getOpenCLError(CreateContextResult, "clCreateContext");
173 cl_int CreateCommandQueueResult;
174 CommandQueues[I] = clCreateCommandQueue(
175 Contexts[I], FullDeviceIDs[I].DeviceID, CL_QUEUE_PROFILING_ENABLE,
176 &CreateCommandQueueResult);
177 if (CreateCommandQueueResult)
178 return getOpenCLError(CreateCommandQueueResult, "clCreateCommandQueue");
181 return OpenCLPlatform(std::move(FullDeviceIDs), std::move(Contexts),
182 std::move(CommandQueues));
185 Expected<int> OpenCLPlatform::getDeviceCount() { return FullDeviceIDs.size(); }
187 static void openCLDestroyStream(void *H) {
188 logOpenCLWarning(clReleaseCommandQueue(static_cast<cl_command_queue>(H)),
189 "clReleaseCommandQueue");
192 Expected<Stream> OpenCLPlatform::createStream(int DeviceIndex) {
193 cl_int Result;
194 cl_command_queue Queue = clCreateCommandQueue(
195 Contexts[DeviceIndex], FullDeviceIDs[DeviceIndex].DeviceID,
196 CL_QUEUE_PROFILING_ENABLE, &Result);
197 if (Result)
198 return getOpenCLError(Result, "clCreateCommandQueue");
199 return constructStream(this, DeviceIndex, Queue, openCLDestroyStream);
202 static void openCLEventDestroy(void *H) {
203 cl_event *CLEvent = static_cast<cl_event *>(H);
204 logOpenCLWarning(clReleaseEvent(*CLEvent), "clReleaseEvent");
205 delete CLEvent;
208 Status OpenCLPlatform::streamSync(void *Stream) {
209 return getOpenCLError(clFinish(static_cast<cl_command_queue>(Stream)),
210 "clFinish");
213 Status OpenCLPlatform::streamWaitOnEvent(void *Stream, void *Event) {
214 cl_event *CLEvent = static_cast<cl_event *>(Event);
215 return getOpenCLError(
216 clEnqueueBarrierWithWaitList(static_cast<cl_command_queue>(Stream), 1,
217 CLEvent, nullptr),
218 "clEnqueueMarkerWithWaitList");
221 Expected<Event> OpenCLPlatform::createEvent(int DeviceIndex) {
222 cl_int Result;
223 cl_event Event = clCreateUserEvent(Contexts[DeviceIndex], &Result);
224 if (Result)
225 return getOpenCLError(Result, "clCreateUserEvent");
226 if (cl_int Result = clSetUserEventStatus(Event, CL_COMPLETE))
227 return getOpenCLError(Result, "clSetUserEventStatus");
228 return constructEvent(this, DeviceIndex, new cl_event(Event),
229 openCLEventDestroy);
232 static void openCLDestroyProgram(void *H) {
233 logOpenCLWarning(clReleaseProgram(static_cast<cl_program>(H)),
234 "clReleaseProgram");
237 Expected<Program>
238 OpenCLPlatform::createProgramFromSource(Span<const char> Source,
239 int DeviceIndex) {
240 cl_int Error;
241 const char *CSource = Source.data();
242 size_t SourceSize = Source.size();
243 cl_program Program = clCreateProgramWithSource(Contexts[DeviceIndex], 1,
244 &CSource, &SourceSize, &Error);
245 if (Error)
246 return getOpenCLError(Error, "clCreateProgramWithSource");
247 cl_device_id DeviceID = FullDeviceIDs[DeviceIndex].DeviceID;
248 if (cl_int Error =
249 clBuildProgram(Program, 1, &DeviceID, nullptr, nullptr, nullptr))
250 return getOpenCLError(Error, "clBuildProgram");
251 return constructProgram(this, Program, openCLDestroyProgram);
254 Expected<void *> OpenCLPlatform::rawMallocD(ptrdiff_t ByteCount,
255 int DeviceIndex) {
256 cl_int Result;
257 cl_mem Memory = clCreateBuffer(Contexts[DeviceIndex], CL_MEM_READ_WRITE,
258 ByteCount, nullptr, &Result);
259 if (Result)
260 return getOpenCLError(Result, "clCreateBuffer");
261 return reinterpret_cast<void *>(Memory);
264 static void openCLDestroyDeviceMemory(void *H) {
265 logOpenCLWarning(clReleaseMemObject(static_cast<cl_mem>(H)),
266 "clReleaseMemObject");
269 HandleDestructor OpenCLPlatform::getDeviceMemoryHandleDestructor() {
270 return openCLDestroyDeviceMemory;
273 void *OpenCLPlatform::getDeviceMemorySpanHandle(void *BaseHandle,
274 size_t ByteSize,
275 size_t ByteOffset) {
276 cl_int Error;
277 cl_buffer_region Region;
278 Region.origin = ByteOffset;
279 Region.size = ByteSize;
280 cl_mem SubBuffer =
281 clCreateSubBuffer(static_cast<cl_mem>(BaseHandle), 0,
282 CL_BUFFER_CREATE_TYPE_REGION, &Region, &Error);
283 logOpenCLWarning(Error, "clCreateSubBuffer");
284 if (Error)
285 return nullptr;
286 return SubBuffer;
289 void OpenCLPlatform::rawDestroyDeviceMemorySpanHandle(void *Handle) {
290 openCLDestroyDeviceMemory(Handle);
293 Expected<void *>
294 OpenCLPlatform::rawGetDeviceSymbolAddress(const void * /*Symbol*/,
295 int /*DeviceIndex*/) {
296 // This doesn't seem to have any equivalent in OpenCL.
297 return Status("not implemented");
300 Expected<ptrdiff_t>
301 OpenCLPlatform::rawGetDeviceSymbolSize(const void * /*Symbol*/,
302 int /*DeviceIndex*/) {
303 // This doesn't seem to have any equivalent in OpenCL.
304 return Status("not implemented");
307 static void noOpHandleDestructor(void *) {}
309 Status OpenCLPlatform::rawRegisterHostMem(const void * /*Memory*/,
310 ptrdiff_t /*ByteCount*/) {
311 // TODO(jhen): Do we want to do something to pin the memory here?
312 return Status();
315 HandleDestructor OpenCLPlatform::getUnregisterHostMemoryHandleDestructor() {
316 // TODO(jhen): Do we want to unpin the memory here?
317 return noOpHandleDestructor;
320 Expected<void *> OpenCLPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount) {
321 // TODO(jhen): Do we want to do something to pin the memory here?
322 return std::malloc(ByteCount);
325 static void freeMemoryHandleDestructor(void *Memory) {
326 // TODO(jhen): Do we want to unpin the memory here?
327 std::free(Memory);
330 HandleDestructor OpenCLPlatform::getFreeHostMemoryHandleDestructor() {
331 return freeMemoryHandleDestructor;
334 Status OpenCLPlatform::asyncCopyDToD(const void *DeviceSrc,
335 ptrdiff_t DeviceSrcByteOffset,
336 void *DeviceDst,
337 ptrdiff_t DeviceDstByteOffset,
338 ptrdiff_t ByteCount, void *Stream) {
339 return getOpenCLError(
340 clEnqueueCopyBuffer(static_cast<cl_command_queue>(Stream),
341 static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
342 static_cast<cl_mem>(DeviceDst), DeviceSrcByteOffset,
343 DeviceDstByteOffset, ByteCount, 0, nullptr, nullptr),
344 "clEnqueueCopyBuffer");
347 Status OpenCLPlatform::asyncCopyDToH(const void *DeviceSrc,
348 ptrdiff_t DeviceSrcByteOffset,
349 void *HostDst, ptrdiff_t ByteCount,
350 void *Stream) {
351 return getOpenCLError(
352 clEnqueueReadBuffer(static_cast<cl_command_queue>(Stream),
353 static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
354 CL_TRUE, DeviceSrcByteOffset, ByteCount, HostDst, 0,
355 nullptr, nullptr),
356 "clEnqueueReadBuffer");
359 Status OpenCLPlatform::asyncCopyHToD(const void *HostSrc, void *DeviceDst,
360 ptrdiff_t DeviceDstByteOffset,
361 ptrdiff_t ByteCount, void *Stream) {
362 return getOpenCLError(
363 clEnqueueWriteBuffer(static_cast<cl_command_queue>(Stream),
364 static_cast<cl_mem>(DeviceDst), CL_TRUE,
365 DeviceDstByteOffset, ByteCount, HostSrc, 0, nullptr,
366 nullptr),
367 "clEnqueueWriteBuffer");
370 Status OpenCLPlatform::asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
371 ptrdiff_t ByteCount, char ByteValue,
372 void *Stream) {
373 return getOpenCLError(
374 clEnqueueFillBuffer(static_cast<cl_command_queue>(Stream),
375 static_cast<cl_mem>(DeviceDst), &ByteValue, 1,
376 ByteOffset, ByteCount, 0, nullptr, nullptr),
377 "clEnqueueFillBuffer");
380 struct StreamCallbackUserData {
381 StreamCallbackUserData(Stream &TheStream, StreamCallback Function,
382 cl_event EndEvent)
383 : TheStream(TheStream), TheFunction(std::move(Function)),
384 EndEvent(EndEvent) {}
386 Stream &TheStream;
387 StreamCallback TheFunction;
388 cl_event EndEvent;
391 // A function with the right signature to pass to clSetEventCallback.
392 void CL_CALLBACK openCLStreamCallbackShim(cl_event,
393 cl_int EventCommandExecStatus,
394 void *UserData) {
395 std::unique_ptr<StreamCallbackUserData> Data(
396 static_cast<StreamCallbackUserData *>(UserData));
397 Data->TheFunction(
398 Data->TheStream,
399 getOpenCLError(EventCommandExecStatus, "stream callback error state"));
400 if (cl_int Result = clSetUserEventStatus(Data->EndEvent, CL_COMPLETE))
401 logOpenCLWarning(Result, "clSetUserEventStatus");
402 if (cl_int Result = clReleaseEvent(Data->EndEvent))
403 logOpenCLWarning(Result, "clReleaseEvent");
406 Status OpenCLPlatform::addStreamCallback(Stream &TheStream,
407 StreamCallback Callback) {
408 cl_int Result;
409 cl_event StartEvent =
410 clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result);
411 if (Result)
412 return getOpenCLError(Result, "clCreateUserEvent");
413 cl_event EndEvent =
414 clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result);
415 if (Result)
416 return getOpenCLError(Result, "clCreateUserEvent");
417 cl_event StartBarrierEvent;
418 if (cl_int Result = clEnqueueBarrierWithWaitList(
419 static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
420 &StartEvent, &StartBarrierEvent))
421 return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
423 if (cl_int Result = clEnqueueBarrierWithWaitList(
424 static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
425 &EndEvent, nullptr))
426 return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
428 std::unique_ptr<StreamCallbackUserData> UserData(
429 new StreamCallbackUserData(TheStream, std::move(Callback), EndEvent));
430 if (cl_int Result =
431 clSetEventCallback(StartBarrierEvent, CL_RUNNING,
432 openCLStreamCallbackShim, UserData.release()))
433 return getOpenCLError(Result, "clSetEventCallback");
435 if (cl_int Result = clSetUserEventStatus(StartEvent, CL_COMPLETE))
436 return getOpenCLError(Result, "clSetUserEventStatus");
438 if (cl_int Result = clReleaseEvent(StartBarrierEvent))
439 return getOpenCLError(Result, "clReleaseEvent");
441 return getOpenCLError(clReleaseEvent(StartEvent), "clReleaseEvent");
444 Status OpenCLPlatform::enqueueEvent(void *Event, void *Stream) {
445 cl_event *CLEvent = static_cast<cl_event *>(Event);
446 cl_event OldEvent = *CLEvent;
447 cl_event NewEvent;
448 if (cl_int Result = clEnqueueMarkerWithWaitList(
449 static_cast<cl_command_queue>(Stream), 0, nullptr, &NewEvent))
450 return getOpenCLError(Result, "clEnqueueMarkerWithWaitList");
451 *CLEvent = NewEvent;
452 return getOpenCLError(clReleaseEvent(OldEvent), "clReleaseEvent");
455 bool OpenCLPlatform::eventIsDone(void *Event) {
456 cl_event *CLEvent = static_cast<cl_event *>(Event);
457 cl_int EventStatus;
458 logOpenCLWarning(clGetEventInfo(*CLEvent, CL_EVENT_COMMAND_EXECUTION_STATUS,
459 sizeof(EventStatus), &EventStatus, nullptr),
460 "clGetEventInfo");
461 return EventStatus == CL_COMPLETE || EventStatus < 0;
464 Status OpenCLPlatform::eventSync(void *Event) {
465 cl_event *CLEvent = static_cast<cl_event *>(Event);
466 return getOpenCLError(clWaitForEvents(1, CLEvent), "clWaitForEvents");
469 Expected<float> OpenCLPlatform::getSecondsBetweenEvents(void *StartEvent,
470 void *EndEvent) {
471 cl_event *CLStartEvent = static_cast<cl_event *>(StartEvent);
472 cl_event *CLEndEvent = static_cast<cl_event *>(EndEvent);
474 cl_profiling_info ParamName = CL_PROFILING_COMMAND_END;
475 cl_ulong StartNanoseconds;
476 cl_ulong EndNanoseconds;
477 if (cl_int Result =
478 clGetEventProfilingInfo(*CLStartEvent, ParamName, sizeof(cl_ulong),
479 &StartNanoseconds, nullptr))
480 return getOpenCLError(Result, "clGetEventProfilingInfo");
481 if (cl_int Result = clGetEventProfilingInfo(
482 *CLEndEvent, ParamName, sizeof(cl_ulong), &EndNanoseconds, nullptr))
483 return getOpenCLError(Result, "clGetEventProfilingInfo");
484 return (EndNanoseconds - StartNanoseconds) * 1e-12;
487 Expected<void *> OpenCLPlatform::rawCreateKernel(void *Program,
488 const std::string &Name) {
490 cl_int Error;
491 cl_kernel Kernel =
492 clCreateKernel(static_cast<cl_program>(Program), Name.c_str(), &Error);
493 if (Error)
494 return getOpenCLError(Error, "clCreateKernel");
495 return Kernel;
498 static void openCLDestroyKernel(void *H) {
499 logOpenCLWarning(clReleaseKernel(static_cast<cl_kernel>(H)),
500 "clReleaseKernel");
503 HandleDestructor OpenCLPlatform::getKernelHandleDestructor() {
504 return openCLDestroyKernel;
507 Status OpenCLPlatform::rawEnqueueKernelLaunch(
508 void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions,
509 Span<void *> Arguments, Span<size_t> ArgumentSizes,
510 size_t SharedMemoryBytes) {
511 if (SharedMemoryBytes != 0)
512 return Status("OpenCL kernel launches only accept zero for the shared "
513 "memory byte size");
514 cl_kernel TheKernel = static_cast<cl_kernel>(Kernel);
515 for (int I = 0; I < Arguments.size(); ++I)
516 if (cl_int Error =
517 clSetKernelArg(TheKernel, I, ArgumentSizes[I], Arguments[I]))
518 return getOpenCLError(Error, "clSetKernelArg");
519 size_t LocalWorkSize[] = {LaunchDimensions.BlockX, LaunchDimensions.BlockY,
520 LaunchDimensions.BlockZ};
521 size_t GlobalWorkSize[] = {LaunchDimensions.BlockX * LaunchDimensions.GridX,
522 LaunchDimensions.BlockY * LaunchDimensions.GridY,
523 LaunchDimensions.BlockZ * LaunchDimensions.GridZ};
524 return getOpenCLError(
525 clEnqueueNDRangeKernel(static_cast<cl_command_queue>(Stream), TheKernel,
526 3, nullptr, GlobalWorkSize, LocalWorkSize, 0,
527 nullptr, nullptr),
528 "clEnqueueNDRangeKernel");
531 } // namespace
533 namespace opencl {
535 /// Gets an OpenCLPlatform instance and returns it as an unowned pointer to a
536 /// Platform.
537 Expected<Platform *> getPlatform() {
538 static auto MaybePlatform = []() -> Expected<OpenCLPlatform *> {
539 Expected<OpenCLPlatform> CreationResult = OpenCLPlatform::create();
540 if (CreationResult.isError())
541 return CreationResult.getError();
542 else
543 return new OpenCLPlatform(CreationResult.takeValue());
544 }();
545 return MaybePlatform;
548 } // namespace opencl
550 } // namespace acxxel