1 //===--- opencl_acxxel.cpp - OpenCL implementation of the Acxxel API ------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file defines the standard OpenCL implementation of the Acxxel API.
11 //===----------------------------------------------------------------------===//
26 /// An ID containing the platform ID and the device ID within the platform.
28 cl_platform_id PlatformID
;
29 cl_device_id DeviceID
;
31 FullDeviceID(cl_platform_id PlatformID
, cl_device_id DeviceID
)
32 : PlatformID(PlatformID
), DeviceID(DeviceID
) {}
35 static std::string
getOpenCLErrorMessage(cl_int Result
) {
38 std::ostringstream OutStream
;
39 OutStream
<< "OpenCL error: code = " << Result
;
40 return OutStream
.str();
43 static Status
getOpenCLError(cl_int Result
, const std::string
&Message
) {
46 std::ostringstream OutStream
;
47 OutStream
<< getOpenCLErrorMessage(Result
) << ", message = " << Message
;
48 return Status(OutStream
.str());
51 static void logOpenCLWarning(cl_int Result
, const std::string
&Message
) {
53 std::ostringstream OutStream
;
54 OutStream
<< Message
<< ": " << getOpenCLErrorMessage(Result
);
55 logWarning(OutStream
.str());
59 class OpenCLPlatform
: public Platform
{
61 ~OpenCLPlatform() override
= default;
63 static Expected
<OpenCLPlatform
> create();
65 Expected
<int> getDeviceCount() override
;
67 Expected
<Stream
> createStream(int DeviceIndex
) override
;
69 Expected
<Event
> createEvent(int DeviceIndex
) override
;
71 Expected
<Program
> createProgramFromSource(Span
<const char> Source
,
72 int DeviceIndex
) override
;
75 Status
streamSync(void *Stream
) override
;
77 Status
streamWaitOnEvent(void *Stream
, void *Event
) override
;
79 Expected
<void *> rawMallocD(ptrdiff_t ByteCount
, int DeviceIndex
) override
;
80 HandleDestructor
getDeviceMemoryHandleDestructor() override
;
81 void *getDeviceMemorySpanHandle(void *BaseHandle
, size_t ByteSize
,
82 size_t ByteOffset
) override
;
83 void rawDestroyDeviceMemorySpanHandle(void *Handle
) override
;
85 Expected
<void *> rawGetDeviceSymbolAddress(const void *Symbol
,
86 int DeviceIndex
) override
;
87 Expected
<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol
,
88 int DeviceIndex
) override
;
90 Status
rawRegisterHostMem(const void *Memory
, ptrdiff_t ByteCount
) override
;
91 HandleDestructor
getUnregisterHostMemoryHandleDestructor() override
;
93 Expected
<void *> rawMallocRegisteredH(ptrdiff_t ByteCount
) override
;
94 HandleDestructor
getFreeHostMemoryHandleDestructor() override
;
96 Status
asyncCopyDToD(const void *DeviceSrc
, ptrdiff_t DeviceSrcByteOffset
,
97 void *DeviceDst
, ptrdiff_t DeviceDstByteOffset
,
98 ptrdiff_t ByteCount
, void *Stream
) override
;
99 Status
asyncCopyDToH(const void *DeviceSrc
, ptrdiff_t DeviceSrcByteOffset
,
100 void *HostDst
, ptrdiff_t ByteCount
,
101 void *Stream
) override
;
102 Status
asyncCopyHToD(const void *HostSrc
, void *DeviceDst
,
103 ptrdiff_t DeviceDstByteOffset
, ptrdiff_t ByteCount
,
104 void *Stream
) override
;
106 Status
asyncMemsetD(void *DeviceDst
, ptrdiff_t ByteOffset
,
107 ptrdiff_t ByteCount
, char ByteValue
,
108 void *Stream
) override
;
110 Status
addStreamCallback(Stream
&Stream
, StreamCallback Callback
) override
;
112 Status
enqueueEvent(void *Event
, void *Stream
) override
;
113 bool eventIsDone(void *Event
) override
;
114 Status
eventSync(void *Event
) override
;
115 Expected
<float> getSecondsBetweenEvents(void *StartEvent
,
116 void *EndEvent
) override
;
118 Expected
<void *> rawCreateKernel(void *Program
,
119 const std::string
&Name
) override
;
120 HandleDestructor
getKernelHandleDestructor() override
;
122 Status
rawEnqueueKernelLaunch(void *Stream
, void *Kernel
,
123 KernelLaunchDimensions LaunchDimensions
,
124 Span
<void *> Arguments
,
125 Span
<size_t> ArgumentSizes
,
126 size_t SharedMemoryBytes
) override
;
129 OpenCLPlatform(std::vector
<FullDeviceID
> &&FullDeviceIDs
,
130 std::vector
<cl_context
> &&Contexts
,
131 std::vector
<cl_command_queue
> &&CommandQueues
)
132 : FullDeviceIDs(std::move(FullDeviceIDs
)), Contexts(std::move(Contexts
)),
133 CommandQueues(std::move(CommandQueues
)) {}
135 std::vector
<FullDeviceID
> FullDeviceIDs
;
136 std::vector
<cl_context
> Contexts
;
137 std::vector
<cl_command_queue
> CommandQueues
;
140 Expected
<OpenCLPlatform
> OpenCLPlatform::create() {
141 constexpr cl_uint MaxNumEntries
= 100;
142 cl_platform_id Platforms
[MaxNumEntries
];
143 cl_uint NumPlatforms
;
144 if (cl_int Result
= clGetPlatformIDs(MaxNumEntries
, Platforms
, &NumPlatforms
))
145 return getOpenCLError(Result
, "clGetPlatformIDs");
147 std::vector
<FullDeviceID
> FullDeviceIDs
;
148 for (cl_uint PlatformIndex
= 0; PlatformIndex
< NumPlatforms
;
151 cl_device_id Devices
[MaxNumEntries
];
153 clGetDeviceIDs(Platforms
[PlatformIndex
], CL_DEVICE_TYPE_ALL
,
154 MaxNumEntries
, Devices
, &NumDevices
))
155 return getOpenCLError(Result
, "clGetDeviceIDs");
156 for (cl_uint DeviceIndex
= 0; DeviceIndex
< NumDevices
; ++DeviceIndex
)
157 FullDeviceIDs
.emplace_back(Platforms
[PlatformIndex
],
158 Devices
[DeviceIndex
]);
161 if (FullDeviceIDs
.empty())
162 return Status("No OpenCL device available on this system.");
164 std::vector
<cl_context
> Contexts(FullDeviceIDs
.size());
165 std::vector
<cl_command_queue
> CommandQueues(FullDeviceIDs
.size());
166 for (size_t I
= 0; I
< FullDeviceIDs
.size(); ++I
) {
167 cl_int CreateContextResult
;
168 Contexts
[I
] = clCreateContext(nullptr, 1, &FullDeviceIDs
[I
].DeviceID
,
169 nullptr, nullptr, &CreateContextResult
);
170 if (CreateContextResult
)
171 return getOpenCLError(CreateContextResult
, "clCreateContext");
173 cl_int CreateCommandQueueResult
;
174 CommandQueues
[I
] = clCreateCommandQueue(
175 Contexts
[I
], FullDeviceIDs
[I
].DeviceID
, CL_QUEUE_PROFILING_ENABLE
,
176 &CreateCommandQueueResult
);
177 if (CreateCommandQueueResult
)
178 return getOpenCLError(CreateCommandQueueResult
, "clCreateCommandQueue");
181 return OpenCLPlatform(std::move(FullDeviceIDs
), std::move(Contexts
),
182 std::move(CommandQueues
));
185 Expected
<int> OpenCLPlatform::getDeviceCount() { return FullDeviceIDs
.size(); }
187 static void openCLDestroyStream(void *H
) {
188 logOpenCLWarning(clReleaseCommandQueue(static_cast<cl_command_queue
>(H
)),
189 "clReleaseCommandQueue");
192 Expected
<Stream
> OpenCLPlatform::createStream(int DeviceIndex
) {
194 cl_command_queue Queue
= clCreateCommandQueue(
195 Contexts
[DeviceIndex
], FullDeviceIDs
[DeviceIndex
].DeviceID
,
196 CL_QUEUE_PROFILING_ENABLE
, &Result
);
198 return getOpenCLError(Result
, "clCreateCommandQueue");
199 return constructStream(this, DeviceIndex
, Queue
, openCLDestroyStream
);
202 static void openCLEventDestroy(void *H
) {
203 cl_event
*CLEvent
= static_cast<cl_event
*>(H
);
204 logOpenCLWarning(clReleaseEvent(*CLEvent
), "clReleaseEvent");
208 Status
OpenCLPlatform::streamSync(void *Stream
) {
209 return getOpenCLError(clFinish(static_cast<cl_command_queue
>(Stream
)),
213 Status
OpenCLPlatform::streamWaitOnEvent(void *Stream
, void *Event
) {
214 cl_event
*CLEvent
= static_cast<cl_event
*>(Event
);
215 return getOpenCLError(
216 clEnqueueBarrierWithWaitList(static_cast<cl_command_queue
>(Stream
), 1,
218 "clEnqueueMarkerWithWaitList");
221 Expected
<Event
> OpenCLPlatform::createEvent(int DeviceIndex
) {
223 cl_event Event
= clCreateUserEvent(Contexts
[DeviceIndex
], &Result
);
225 return getOpenCLError(Result
, "clCreateUserEvent");
226 if (cl_int Result
= clSetUserEventStatus(Event
, CL_COMPLETE
))
227 return getOpenCLError(Result
, "clSetUserEventStatus");
228 return constructEvent(this, DeviceIndex
, new cl_event(Event
),
232 static void openCLDestroyProgram(void *H
) {
233 logOpenCLWarning(clReleaseProgram(static_cast<cl_program
>(H
)),
238 OpenCLPlatform::createProgramFromSource(Span
<const char> Source
,
241 const char *CSource
= Source
.data();
242 size_t SourceSize
= Source
.size();
243 cl_program Program
= clCreateProgramWithSource(Contexts
[DeviceIndex
], 1,
244 &CSource
, &SourceSize
, &Error
);
246 return getOpenCLError(Error
, "clCreateProgramWithSource");
247 cl_device_id DeviceID
= FullDeviceIDs
[DeviceIndex
].DeviceID
;
249 clBuildProgram(Program
, 1, &DeviceID
, nullptr, nullptr, nullptr))
250 return getOpenCLError(Error
, "clBuildProgram");
251 return constructProgram(this, Program
, openCLDestroyProgram
);
254 Expected
<void *> OpenCLPlatform::rawMallocD(ptrdiff_t ByteCount
,
257 cl_mem Memory
= clCreateBuffer(Contexts
[DeviceIndex
], CL_MEM_READ_WRITE
,
258 ByteCount
, nullptr, &Result
);
260 return getOpenCLError(Result
, "clCreateBuffer");
261 return reinterpret_cast<void *>(Memory
);
264 static void openCLDestroyDeviceMemory(void *H
) {
265 logOpenCLWarning(clReleaseMemObject(static_cast<cl_mem
>(H
)),
266 "clReleaseMemObject");
269 HandleDestructor
OpenCLPlatform::getDeviceMemoryHandleDestructor() {
270 return openCLDestroyDeviceMemory
;
273 void *OpenCLPlatform::getDeviceMemorySpanHandle(void *BaseHandle
,
277 cl_buffer_region Region
;
278 Region
.origin
= ByteOffset
;
279 Region
.size
= ByteSize
;
281 clCreateSubBuffer(static_cast<cl_mem
>(BaseHandle
), 0,
282 CL_BUFFER_CREATE_TYPE_REGION
, &Region
, &Error
);
283 logOpenCLWarning(Error
, "clCreateSubBuffer");
289 void OpenCLPlatform::rawDestroyDeviceMemorySpanHandle(void *Handle
) {
290 openCLDestroyDeviceMemory(Handle
);
294 OpenCLPlatform::rawGetDeviceSymbolAddress(const void * /*Symbol*/,
295 int /*DeviceIndex*/) {
296 // This doesn't seem to have any equivalent in OpenCL.
297 return Status("not implemented");
301 OpenCLPlatform::rawGetDeviceSymbolSize(const void * /*Symbol*/,
302 int /*DeviceIndex*/) {
303 // This doesn't seem to have any equivalent in OpenCL.
304 return Status("not implemented");
307 static void noOpHandleDestructor(void *) {}
309 Status
OpenCLPlatform::rawRegisterHostMem(const void * /*Memory*/,
310 ptrdiff_t /*ByteCount*/) {
311 // TODO(jhen): Do we want to do something to pin the memory here?
315 HandleDestructor
OpenCLPlatform::getUnregisterHostMemoryHandleDestructor() {
316 // TODO(jhen): Do we want to unpin the memory here?
317 return noOpHandleDestructor
;
320 Expected
<void *> OpenCLPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount
) {
321 // TODO(jhen): Do we want to do something to pin the memory here?
322 return std::malloc(ByteCount
);
325 static void freeMemoryHandleDestructor(void *Memory
) {
326 // TODO(jhen): Do we want to unpin the memory here?
330 HandleDestructor
OpenCLPlatform::getFreeHostMemoryHandleDestructor() {
331 return freeMemoryHandleDestructor
;
334 Status
OpenCLPlatform::asyncCopyDToD(const void *DeviceSrc
,
335 ptrdiff_t DeviceSrcByteOffset
,
337 ptrdiff_t DeviceDstByteOffset
,
338 ptrdiff_t ByteCount
, void *Stream
) {
339 return getOpenCLError(
340 clEnqueueCopyBuffer(static_cast<cl_command_queue
>(Stream
),
341 static_cast<cl_mem
>(const_cast<void *>(DeviceSrc
)),
342 static_cast<cl_mem
>(DeviceDst
), DeviceSrcByteOffset
,
343 DeviceDstByteOffset
, ByteCount
, 0, nullptr, nullptr),
344 "clEnqueueCopyBuffer");
347 Status
OpenCLPlatform::asyncCopyDToH(const void *DeviceSrc
,
348 ptrdiff_t DeviceSrcByteOffset
,
349 void *HostDst
, ptrdiff_t ByteCount
,
351 return getOpenCLError(
352 clEnqueueReadBuffer(static_cast<cl_command_queue
>(Stream
),
353 static_cast<cl_mem
>(const_cast<void *>(DeviceSrc
)),
354 CL_TRUE
, DeviceSrcByteOffset
, ByteCount
, HostDst
, 0,
356 "clEnqueueReadBuffer");
359 Status
OpenCLPlatform::asyncCopyHToD(const void *HostSrc
, void *DeviceDst
,
360 ptrdiff_t DeviceDstByteOffset
,
361 ptrdiff_t ByteCount
, void *Stream
) {
362 return getOpenCLError(
363 clEnqueueWriteBuffer(static_cast<cl_command_queue
>(Stream
),
364 static_cast<cl_mem
>(DeviceDst
), CL_TRUE
,
365 DeviceDstByteOffset
, ByteCount
, HostSrc
, 0, nullptr,
367 "clEnqueueWriteBuffer");
370 Status
OpenCLPlatform::asyncMemsetD(void *DeviceDst
, ptrdiff_t ByteOffset
,
371 ptrdiff_t ByteCount
, char ByteValue
,
373 return getOpenCLError(
374 clEnqueueFillBuffer(static_cast<cl_command_queue
>(Stream
),
375 static_cast<cl_mem
>(DeviceDst
), &ByteValue
, 1,
376 ByteOffset
, ByteCount
, 0, nullptr, nullptr),
377 "clEnqueueFillBuffer");
380 struct StreamCallbackUserData
{
381 StreamCallbackUserData(Stream
&TheStream
, StreamCallback Function
,
383 : TheStream(TheStream
), TheFunction(std::move(Function
)),
384 EndEvent(EndEvent
) {}
387 StreamCallback TheFunction
;
391 // A function with the right signature to pass to clSetEventCallback.
392 void CL_CALLBACK
openCLStreamCallbackShim(cl_event
,
393 cl_int EventCommandExecStatus
,
395 std::unique_ptr
<StreamCallbackUserData
> Data(
396 static_cast<StreamCallbackUserData
*>(UserData
));
399 getOpenCLError(EventCommandExecStatus
, "stream callback error state"));
400 if (cl_int Result
= clSetUserEventStatus(Data
->EndEvent
, CL_COMPLETE
))
401 logOpenCLWarning(Result
, "clSetUserEventStatus");
402 if (cl_int Result
= clReleaseEvent(Data
->EndEvent
))
403 logOpenCLWarning(Result
, "clReleaseEvent");
406 Status
OpenCLPlatform::addStreamCallback(Stream
&TheStream
,
407 StreamCallback Callback
) {
409 cl_event StartEvent
=
410 clCreateUserEvent(Contexts
[TheStream
.getDeviceIndex()], &Result
);
412 return getOpenCLError(Result
, "clCreateUserEvent");
414 clCreateUserEvent(Contexts
[TheStream
.getDeviceIndex()], &Result
);
416 return getOpenCLError(Result
, "clCreateUserEvent");
417 cl_event StartBarrierEvent
;
418 if (cl_int Result
= clEnqueueBarrierWithWaitList(
419 static_cast<cl_command_queue
>(getStreamHandle(TheStream
)), 1,
420 &StartEvent
, &StartBarrierEvent
))
421 return getOpenCLError(Result
, "clEnqueueBarrierWithWaitList");
423 if (cl_int Result
= clEnqueueBarrierWithWaitList(
424 static_cast<cl_command_queue
>(getStreamHandle(TheStream
)), 1,
426 return getOpenCLError(Result
, "clEnqueueBarrierWithWaitList");
428 std::unique_ptr
<StreamCallbackUserData
> UserData(
429 new StreamCallbackUserData(TheStream
, std::move(Callback
), EndEvent
));
431 clSetEventCallback(StartBarrierEvent
, CL_RUNNING
,
432 openCLStreamCallbackShim
, UserData
.release()))
433 return getOpenCLError(Result
, "clSetEventCallback");
435 if (cl_int Result
= clSetUserEventStatus(StartEvent
, CL_COMPLETE
))
436 return getOpenCLError(Result
, "clSetUserEventStatus");
438 if (cl_int Result
= clReleaseEvent(StartBarrierEvent
))
439 return getOpenCLError(Result
, "clReleaseEvent");
441 return getOpenCLError(clReleaseEvent(StartEvent
), "clReleaseEvent");
444 Status
OpenCLPlatform::enqueueEvent(void *Event
, void *Stream
) {
445 cl_event
*CLEvent
= static_cast<cl_event
*>(Event
);
446 cl_event OldEvent
= *CLEvent
;
448 if (cl_int Result
= clEnqueueMarkerWithWaitList(
449 static_cast<cl_command_queue
>(Stream
), 0, nullptr, &NewEvent
))
450 return getOpenCLError(Result
, "clEnqueueMarkerWithWaitList");
452 return getOpenCLError(clReleaseEvent(OldEvent
), "clReleaseEvent");
455 bool OpenCLPlatform::eventIsDone(void *Event
) {
456 cl_event
*CLEvent
= static_cast<cl_event
*>(Event
);
458 logOpenCLWarning(clGetEventInfo(*CLEvent
, CL_EVENT_COMMAND_EXECUTION_STATUS
,
459 sizeof(EventStatus
), &EventStatus
, nullptr),
461 return EventStatus
== CL_COMPLETE
|| EventStatus
< 0;
464 Status
OpenCLPlatform::eventSync(void *Event
) {
465 cl_event
*CLEvent
= static_cast<cl_event
*>(Event
);
466 return getOpenCLError(clWaitForEvents(1, CLEvent
), "clWaitForEvents");
469 Expected
<float> OpenCLPlatform::getSecondsBetweenEvents(void *StartEvent
,
471 cl_event
*CLStartEvent
= static_cast<cl_event
*>(StartEvent
);
472 cl_event
*CLEndEvent
= static_cast<cl_event
*>(EndEvent
);
474 cl_profiling_info ParamName
= CL_PROFILING_COMMAND_END
;
475 cl_ulong StartNanoseconds
;
476 cl_ulong EndNanoseconds
;
478 clGetEventProfilingInfo(*CLStartEvent
, ParamName
, sizeof(cl_ulong
),
479 &StartNanoseconds
, nullptr))
480 return getOpenCLError(Result
, "clGetEventProfilingInfo");
481 if (cl_int Result
= clGetEventProfilingInfo(
482 *CLEndEvent
, ParamName
, sizeof(cl_ulong
), &EndNanoseconds
, nullptr))
483 return getOpenCLError(Result
, "clGetEventProfilingInfo");
484 return (EndNanoseconds
- StartNanoseconds
) * 1e-12;
487 Expected
<void *> OpenCLPlatform::rawCreateKernel(void *Program
,
488 const std::string
&Name
) {
492 clCreateKernel(static_cast<cl_program
>(Program
), Name
.c_str(), &Error
);
494 return getOpenCLError(Error
, "clCreateKernel");
498 static void openCLDestroyKernel(void *H
) {
499 logOpenCLWarning(clReleaseKernel(static_cast<cl_kernel
>(H
)),
503 HandleDestructor
OpenCLPlatform::getKernelHandleDestructor() {
504 return openCLDestroyKernel
;
507 Status
OpenCLPlatform::rawEnqueueKernelLaunch(
508 void *Stream
, void *Kernel
, KernelLaunchDimensions LaunchDimensions
,
509 Span
<void *> Arguments
, Span
<size_t> ArgumentSizes
,
510 size_t SharedMemoryBytes
) {
511 if (SharedMemoryBytes
!= 0)
512 return Status("OpenCL kernel launches only accept zero for the shared "
514 cl_kernel TheKernel
= static_cast<cl_kernel
>(Kernel
);
515 for (int I
= 0; I
< Arguments
.size(); ++I
)
517 clSetKernelArg(TheKernel
, I
, ArgumentSizes
[I
], Arguments
[I
]))
518 return getOpenCLError(Error
, "clSetKernelArg");
519 size_t LocalWorkSize
[] = {LaunchDimensions
.BlockX
, LaunchDimensions
.BlockY
,
520 LaunchDimensions
.BlockZ
};
521 size_t GlobalWorkSize
[] = {LaunchDimensions
.BlockX
* LaunchDimensions
.GridX
,
522 LaunchDimensions
.BlockY
* LaunchDimensions
.GridY
,
523 LaunchDimensions
.BlockZ
* LaunchDimensions
.GridZ
};
524 return getOpenCLError(
525 clEnqueueNDRangeKernel(static_cast<cl_command_queue
>(Stream
), TheKernel
,
526 3, nullptr, GlobalWorkSize
, LocalWorkSize
, 0,
528 "clEnqueueNDRangeKernel");
535 /// Gets an OpenCLPlatform instance and returns it as an unowned pointer to a
537 Expected
<Platform
*> getPlatform() {
538 static auto MaybePlatform
= []() -> Expected
<OpenCLPlatform
*> {
539 Expected
<OpenCLPlatform
> CreationResult
= OpenCLPlatform::create();
540 if (CreationResult
.isError())
541 return CreationResult
.getError();
543 return new OpenCLPlatform(CreationResult
.takeValue());
545 return MaybePlatform
;
548 } // namespace opencl
550 } // namespace acxxel