Update git submodules
[LibreOffice.git] / opencl / source / openclwrapper.cxx
blobf6d1242baa3d746d8b011ab3e32ac019301cd1bd
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <config_folders.h>
12 #include <opencl_device.hxx>
13 #include <opencl_device_selection.h>
15 #include <opencl/openclconfig.hxx>
16 #include <opencl/openclwrapper.hxx>
17 #include <opencl/platforminfo.hxx>
18 #include <osl/file.hxx>
19 #include <rtl/bootstrap.hxx>
20 #include <rtl/digest.h>
21 #include <rtl/strbuf.hxx>
22 #include <rtl/ustring.hxx>
23 #include <sal/config.h>
24 #include <sal/log.hxx>
25 #include <opencl/OpenCLZone.hxx>
27 #include <memory>
28 #include <string_view>
30 #include <stdlib.h>
32 #include <officecfg/Office/Common.hxx>
34 #ifdef _WIN32
35 #include <prewin.h>
36 #include <postwin.h>
37 #define OPENCL_DLL_NAME "OpenCL.dll"
38 #elif defined(MACOSX)
39 #define OPENCL_DLL_NAME nullptr
40 #else
41 #define OPENCL_DLL_NAME "libOpenCL.so.1"
42 #endif
44 #ifdef _WIN32_WINNT_WINBLUE
45 #include <VersionHelpers.h>
46 #endif
48 #define DEVICE_NAME_LENGTH 1024
49 #define DRIVER_VERSION_LENGTH 1024
50 #define PLATFORM_VERSION_LENGTH 1024
52 #define CHECK_OPENCL(status,name) \
53 if( status != CL_SUCCESS ) \
54 { \
55 SAL_WARN( "opencl", "OpenCL error code " << status << " at " SAL_DETAIL_WHERE "from " name ); \
56 return false; \
59 namespace {
61 bool bIsInited = false;
65 namespace openclwrapper {
67 GPUEnv gpuEnv;
68 sal_uInt64 kernelFailures = 0;
70 namespace
73 OString generateMD5(const void* pData, size_t length)
75 sal_uInt8 pBuffer[RTL_DIGEST_LENGTH_MD5];
76 rtlDigestError aError = rtl_digest_MD5(pData, length,
77 pBuffer, RTL_DIGEST_LENGTH_MD5);
78 SAL_WARN_IF(aError != rtl_Digest_E_None, "opencl", "md5 generation failed");
80 OStringBuffer aBuffer;
81 const char* const pString = "0123456789ABCDEF";
82 for(sal_uInt8 val : pBuffer)
84 aBuffer.append(pString[val/16]);
85 aBuffer.append(pString[val%16]);
87 return aBuffer.makeStringAndClear();
90 OString const & getCacheFolder()
92 static OString const aCacheFolder = []()
94 OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
95 rtl::Bootstrap::expandMacros(url);
97 osl::Directory::create(url);
99 return OUStringToOString(url, RTL_TEXTENCODING_UTF8);
100 }();
101 return aCacheFolder;
106 static bool initializeCommandQueue(GPUEnv& aGpuEnv)
108 OpenCLZone zone;
110 cl_int nState;
111 cl_command_queue command_queue[OPENCL_CMDQUEUE_SIZE];
113 for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
115 command_queue[i] = clCreateCommandQueue(aGpuEnv.mpContext, aGpuEnv.mpDevID, 0, &nState);
116 if (nState != CL_SUCCESS)
117 SAL_WARN("opencl", "clCreateCommandQueue failed: " << errorString(nState));
119 if (command_queue[i] == nullptr || nState != CL_SUCCESS)
121 // Release all command queues created so far.
122 for (int j = 0; j <= i; ++j)
124 if (command_queue[j])
126 clReleaseCommandQueue(command_queue[j]);
127 command_queue[j] = nullptr;
131 clReleaseContext(aGpuEnv.mpContext);
132 SAL_WARN("opencl", "failed to set/switch opencl device");
133 return false;
136 SAL_INFO("opencl", "Created command queue " << command_queue[i] << " for context " << aGpuEnv.mpContext);
139 for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
141 aGpuEnv.mpCmdQueue[i] = command_queue[i];
143 aGpuEnv.mbCommandQueueInitialized = true;
144 return true;
147 void setKernelEnv( KernelEnv *envInfo )
149 if (!gpuEnv.mbCommandQueueInitialized)
151 initializeCommandQueue(gpuEnv);
154 envInfo->mpkContext = gpuEnv.mpContext;
155 envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
157 assert(gpuEnv.mnCmdQueuePos < OPENCL_CMDQUEUE_SIZE);
158 envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue[gpuEnv.mnCmdQueuePos];
161 namespace {
163 OString createFileName(cl_device_id deviceId, const char* clFileName)
165 OString fileName(clFileName);
166 sal_Int32 nIndex = fileName.lastIndexOf(".cl");
167 if(nIndex > 0)
168 fileName = fileName.copy(0, nIndex);
170 char deviceName[DEVICE_NAME_LENGTH] = {0};
171 clGetDeviceInfo(deviceId, CL_DEVICE_NAME,
172 sizeof(deviceName), deviceName, nullptr);
174 char driverVersion[DRIVER_VERSION_LENGTH] = {0};
175 clGetDeviceInfo(deviceId, CL_DRIVER_VERSION,
176 sizeof(driverVersion), driverVersion, nullptr);
178 cl_platform_id platformId;
179 clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM,
180 sizeof(platformId), &platformId, nullptr);
182 char platformVersion[PLATFORM_VERSION_LENGTH] = {0};
183 clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, sizeof(platformVersion),
184 platformVersion, nullptr);
186 // create hash for deviceName + driver version + platform version
187 OString aString = OString::Concat(deviceName) + driverVersion + platformVersion;
188 OString aHash = generateMD5(aString.getStr(), aString.getLength());
190 return getCacheFolder() + fileName + "-" + aHash + ".bin";
193 std::vector<std::shared_ptr<osl::File> > binaryGenerated( const char * clFileName, cl_context context )
195 size_t numDevices=0;
197 std::vector<std::shared_ptr<osl::File> > aGeneratedFiles;
198 cl_int clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
199 0, nullptr, &numDevices );
200 numDevices /= sizeof(numDevices);
202 if(clStatus != CL_SUCCESS)
203 return aGeneratedFiles;
205 assert(numDevices == 1);
207 // grab the handle to the device in the context.
208 cl_device_id pDevID;
209 clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
210 sizeof( cl_device_id ), &pDevID, nullptr );
212 if(clStatus != CL_SUCCESS)
213 return aGeneratedFiles;
215 assert(pDevID == gpuEnv.mpDevID);
217 OString fileName = createFileName(gpuEnv.mpDevID, clFileName);
218 auto pNewFile = std::make_shared<osl::File>(OStringToOUString(fileName, RTL_TEXTENCODING_UTF8));
219 if(pNewFile->open(osl_File_OpenFlag_Read) == osl::FileBase::E_None)
221 aGeneratedFiles.push_back(pNewFile);
222 SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: success");
224 else
226 SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: FAIL");
229 return aGeneratedFiles;
232 bool writeBinaryToFile( std::string_view rFileName, const char* binary, size_t numBytes )
234 osl::File file(OStringToOUString(rFileName, RTL_TEXTENCODING_UTF8));
235 osl::FileBase::RC status = file.open(
236 osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
238 if(status != osl::FileBase::E_None)
239 return false;
241 sal_uInt64 nBytesWritten = 0;
242 file.write( binary, numBytes, nBytesWritten );
244 assert(numBytes == nBytesWritten);
246 return true;
251 bool generatBinFromKernelSource( cl_program program, const char * clFileName )
253 cl_uint numDevices;
255 cl_int clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES,
256 sizeof(numDevices), &numDevices, nullptr );
257 CHECK_OPENCL( clStatus, "clGetProgramInfo" );
259 assert(numDevices == 1);
261 cl_device_id pDevID;
262 /* grab the handle to the device in the program. */
263 clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES,
264 sizeof(cl_device_id), &pDevID, nullptr );
265 CHECK_OPENCL( clStatus, "clGetProgramInfo" );
267 /* figure out the size of the binary. */
268 size_t binarySize;
270 clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES,
271 sizeof(size_t), &binarySize, nullptr );
272 CHECK_OPENCL( clStatus, "clGetProgramInfo" );
274 /* copy over the generated binary. */
275 if ( binarySize != 0 )
277 std::unique_ptr<char[]> binary(new char[binarySize]);
278 clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES,
279 sizeof(char *), &binary, nullptr );
280 CHECK_OPENCL(clStatus,"clGetProgramInfo");
282 OString fileName = createFileName(pDevID, clFileName);
283 if ( !writeBinaryToFile( fileName,
284 binary.get(), binarySize ) )
285 SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': FAIL");
286 else
287 SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': success");
289 return true;
292 namespace {
294 struct OpenCLEnv
296 cl_platform_id mpOclPlatformID;
297 cl_context mpOclContext;
298 cl_device_id mpOclDevsID;
301 bool initOpenCLAttr( OpenCLEnv * env )
303 if ( gpuEnv.mnIsUserCreated )
304 return true;
306 gpuEnv.mpContext = env->mpOclContext;
307 gpuEnv.mpPlatformID = env->mpOclPlatformID;
308 gpuEnv.mpDevID = env->mpOclDevsID;
310 gpuEnv.mnIsUserCreated = 1;
312 gpuEnv.mbCommandQueueInitialized = false;
314 gpuEnv.mnCmdQueuePos = 0; // default to 0.
316 return false;
319 bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
321 cl_int clStatus;
322 //char options[512];
323 // create a cl program executable for all the devices specified
324 clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &gpuInfo->mpDevID,
325 buildOption, nullptr, nullptr);
327 if ( clStatus != CL_SUCCESS )
329 size_t length;
330 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
331 CL_PROGRAM_BUILD_LOG, 0, nullptr, &length);
332 if ( clStatus != CL_SUCCESS )
334 return false;
337 std::unique_ptr<char[]> buildLog(new char[length]);
338 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
339 CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
340 if ( clStatus != CL_SUCCESS )
342 return false;
345 OString aBuildLogFileURL = getCacheFolder() + "kernel-build.log";
346 osl::File aBuildLogFile(OStringToOUString(aBuildLogFileURL, RTL_TEXTENCODING_UTF8));
347 osl::FileBase::RC status = aBuildLogFile.open(
348 osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
350 if(status != osl::FileBase::E_None)
351 return false;
353 sal_uInt64 nBytesWritten = 0;
354 aBuildLogFile.write( buildLog.get(), length, nBytesWritten );
356 return false;
359 return true;
364 bool buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx)
366 size_t numDevices;
367 cl_int clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
368 0, nullptr, &numDevices );
369 numDevices /= sizeof(numDevices);
370 CHECK_OPENCL( clStatus, "clGetContextInfo" );
372 std::vector<std::shared_ptr<osl::File> > aGeneratedFiles = binaryGenerated(
373 filename, gpuInfo->mpContext );
375 if (aGeneratedFiles.size() == numDevices)
377 std::unique_ptr<size_t[]> length(new size_t[numDevices]);
378 std::unique_ptr<unsigned char*[]> pBinary(new unsigned char*[numDevices]);
379 for(size_t i = 0; i < numDevices; ++i)
381 sal_uInt64 nSize;
382 aGeneratedFiles[i]->getSize(nSize);
383 unsigned char* binary = new unsigned char[nSize];
384 sal_uInt64 nBytesRead;
385 aGeneratedFiles[i]->read(binary, nSize, nBytesRead);
386 if(nSize != nBytesRead)
387 assert(false);
389 length[i] = nBytesRead;
391 pBinary[i] = binary;
394 // grab the handles to all of the devices in the context.
395 std::unique_ptr<cl_device_id[]> pArryDevsID(new cl_device_id[numDevices]);
396 clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
397 sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), nullptr );
399 if(clStatus != CL_SUCCESS)
401 for(size_t i = 0; i < numDevices; ++i)
403 delete[] pBinary[i];
405 return false;
408 cl_int binary_status;
410 gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
411 pArryDevsID.get(), length.get(), const_cast<const unsigned char**>(pBinary.get()),
412 &binary_status, &clStatus );
413 if(clStatus != CL_SUCCESS)
415 // something went wrong, fall back to compiling from source
416 return false;
418 SAL_INFO("opencl", "Created program " << gpuInfo->mpArryPrograms[idx] << " from binary");
419 for(size_t i = 0; i < numDevices; ++i)
421 delete[] pBinary[i];
425 if ( !gpuInfo->mpArryPrograms[idx] )
427 return false;
429 return buildProgram(buildOption, gpuInfo, idx);
432 namespace {
434 void checkDeviceForDoubleSupport(cl_device_id deviceId, bool& bKhrFp64, bool& bAmdFp64)
436 OpenCLZone zone;
438 bKhrFp64 = false;
439 bAmdFp64 = false;
441 // Check device extensions for double type
442 size_t aDevExtInfoSize = 0;
444 cl_uint clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, 0, nullptr, &aDevExtInfoSize );
445 if( clStatus != CL_SUCCESS )
446 return;
448 std::unique_ptr<char[]> pExtInfo(new char[aDevExtInfoSize]);
450 clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS,
451 sizeof(char) * aDevExtInfoSize, pExtInfo.get(), nullptr);
453 if( clStatus != CL_SUCCESS )
454 return;
456 if ( strstr( pExtInfo.get(), "cl_khr_fp64" ) )
458 bKhrFp64 = true;
460 else
462 // Check if cl_amd_fp64 extension is supported
463 if ( strstr( pExtInfo.get(), "cl_amd_fp64" ) )
464 bAmdFp64 = true;
468 bool initOpenCLRunEnv( GPUEnv *gpuInfo )
470 OpenCLZone zone;
471 cl_uint nPreferredVectorWidthFloat;
472 char pName[64];
474 bool bKhrFp64 = false;
475 bool bAmdFp64 = false;
477 checkDeviceForDoubleSupport(gpuInfo->mpDevID, bKhrFp64, bAmdFp64);
479 gpuInfo->mnKhrFp64Flag = bKhrFp64;
480 gpuInfo->mnAmdFp64Flag = bAmdFp64;
482 gpuInfo->mbNeedsTDRAvoidance = false;
484 clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint),
485 &nPreferredVectorWidthFloat, nullptr);
486 SAL_INFO("opencl", "CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT=" << nPreferredVectorWidthFloat);
488 clGetPlatformInfo(gpuInfo->mpPlatformID, CL_PLATFORM_NAME, 64,
489 pName, nullptr);
491 #if defined (_WIN32)
492 // the Win32 SDK 8.1 deprecates GetVersionEx()
493 # ifdef _WIN32_WINNT_WINBLUE
494 const bool bIsNotWinOrIsWin8OrGreater = IsWindows8OrGreater();
495 # else
496 bool bIsNotWinOrIsWin8OrGreater = true;
497 OSVERSIONINFOW aVersionInfo = {};
498 aVersionInfo.dwOSVersionInfoSize = sizeof( aVersionInfo );
499 if (GetVersionExW( &aVersionInfo ))
501 // Windows 7 or lower?
502 if (aVersionInfo.dwMajorVersion < 6 ||
503 (aVersionInfo.dwMajorVersion == 6 && aVersionInfo.dwMinorVersion < 2))
504 bIsNotWinOrIsWin8OrGreater = false;
506 # endif
507 #else
508 const bool bIsNotWinOrIsWin8OrGreater = true;
509 #endif
511 // Heuristic: Certain old low-end OpenCL implementations don't
512 // work for us with too large group lengths. Looking at the preferred
513 // float vector width seems to be a way to detect these devices, except
514 // the non-working NVIDIA cards on Windows older than version 8.
515 gpuInfo->mbNeedsTDRAvoidance = ( nPreferredVectorWidthFloat == 4 ) ||
516 ( !bIsNotWinOrIsWin8OrGreater &&
517 OUString::createFromAscii(pName).indexOf("NVIDIA") > -1 );
519 size_t nMaxParameterSize;
520 clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(size_t),
521 &nMaxParameterSize, nullptr);
522 SAL_INFO("opencl", "CL_DEVICE_MAX_PARAMETER_SIZE=" << nMaxParameterSize);
524 return false;
527 bool initOpenCLRunEnv( int argc )
529 if ( ( argc > MAX_CLFILE_NUM ) || ( argc < 0 ) )
530 return true;
532 if ( !bIsInited )
534 if ( !gpuEnv.mnIsUserCreated )
535 memset( &gpuEnv, 0, sizeof(gpuEnv) );
537 //initialize devices, context, command_queue
538 bool status = initOpenCLRunEnv( &gpuEnv );
539 if ( status )
541 return true;
543 //initialize program, kernelName, kernelCount
544 if( getenv( "SC_FLOAT" ) )
546 gpuEnv.mnKhrFp64Flag = false;
547 gpuEnv.mnAmdFp64Flag = false;
549 if( gpuEnv.mnKhrFp64Flag )
551 SAL_INFO("opencl", "Use Khr double");
553 else if( gpuEnv.mnAmdFp64Flag )
555 SAL_INFO("opencl", "Use AMD double type");
557 else
559 SAL_INFO("opencl", "USE float type");
561 bIsInited = true;
563 return false;
566 // based on crashes and hanging during kernel compilation
567 void createDeviceInfo(cl_device_id aDeviceId, OpenCLPlatformInfo& rPlatformInfo)
569 OpenCLDeviceInfo aDeviceInfo;
570 aDeviceInfo.device = aDeviceId;
572 char pName[DEVICE_NAME_LENGTH];
573 cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_NAME, DEVICE_NAME_LENGTH, pName, nullptr);
574 if(nState != CL_SUCCESS)
575 return;
577 aDeviceInfo.maName = OUString::createFromAscii(pName);
579 char pVendor[DEVICE_NAME_LENGTH];
580 nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_VENDOR, DEVICE_NAME_LENGTH, pVendor, nullptr);
581 if(nState != CL_SUCCESS)
582 return;
584 aDeviceInfo.maVendor = OUString::createFromAscii(pVendor);
586 cl_ulong nMemSize;
587 nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(nMemSize), &nMemSize, nullptr);
588 if(nState != CL_SUCCESS)
589 return;
591 aDeviceInfo.mnMemory = nMemSize;
593 cl_uint nClockFrequency;
594 nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(nClockFrequency), &nClockFrequency, nullptr);
595 if(nState != CL_SUCCESS)
596 return;
598 aDeviceInfo.mnFrequency = nClockFrequency;
600 cl_uint nComputeUnits;
601 nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(nComputeUnits), &nComputeUnits, nullptr);
602 if(nState != CL_SUCCESS)
603 return;
605 char pDriver[DEVICE_NAME_LENGTH];
606 nState = clGetDeviceInfo(aDeviceId, CL_DRIVER_VERSION, DEVICE_NAME_LENGTH, pDriver, nullptr);
608 if(nState != CL_SUCCESS)
609 return;
611 aDeviceInfo.maDriver = OUString::createFromAscii(pDriver);
613 bool bKhrFp64 = false;
614 bool bAmdFp64 = false;
615 checkDeviceForDoubleSupport(aDeviceId, bKhrFp64, bAmdFp64);
617 // only list devices that support double
618 if(!bKhrFp64 && !bAmdFp64)
619 return;
621 aDeviceInfo.mnComputeUnits = nComputeUnits;
623 if(!OpenCLConfig::get().checkImplementation(rPlatformInfo, aDeviceInfo))
624 rPlatformInfo.maDevices.push_back(aDeviceInfo);
627 bool createPlatformInfo(cl_platform_id nPlatformId, OpenCLPlatformInfo& rPlatformInfo)
629 rPlatformInfo.platform = nPlatformId;
630 char pName[64];
631 cl_int nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_NAME, 64,
632 pName, nullptr);
633 if(nState != CL_SUCCESS)
634 return false;
635 rPlatformInfo.maName = OUString::createFromAscii(pName);
637 char pVendor[64];
638 nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_VENDOR, 64,
639 pVendor, nullptr);
640 if(nState != CL_SUCCESS)
641 return false;
643 rPlatformInfo.maVendor = OUString::createFromAscii(pVendor);
645 cl_uint nDevices;
646 nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, 0, nullptr, &nDevices);
647 if(nState != CL_SUCCESS)
648 return false;
650 // memory leak that does not matter
651 // memory is stored in static variable that lives through the whole program
652 cl_device_id* pDevices = new cl_device_id[nDevices];
653 nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, nDevices, pDevices, nullptr);
654 if(nState != CL_SUCCESS)
655 return false;
657 for(size_t i = 0; i < nDevices; ++i)
659 createDeviceInfo(pDevices[i], rPlatformInfo);
662 return true;
667 const std::vector<OpenCLPlatformInfo>& fillOpenCLInfo()
669 static std::vector<OpenCLPlatformInfo> aPlatforms;
671 // return early if we already initialized or can't use OpenCL
672 if (!aPlatforms.empty() || !canUseOpenCL())
673 return aPlatforms;
675 int status = clewInit(OPENCL_DLL_NAME);
676 if (status < 0)
677 return aPlatforms;
679 cl_uint nPlatforms;
680 cl_int nState = clGetPlatformIDs(0, nullptr, &nPlatforms);
682 if(nState != CL_SUCCESS)
683 return aPlatforms;
685 // memory leak that does not matter,
686 // memory is stored in static instance aPlatforms
687 cl_platform_id* pPlatforms = new cl_platform_id[nPlatforms];
688 nState = clGetPlatformIDs(nPlatforms, pPlatforms, nullptr);
690 if(nState != CL_SUCCESS)
691 return aPlatforms;
693 for(size_t i = 0; i < nPlatforms; ++i)
695 OpenCLPlatformInfo aPlatformInfo;
696 if(createPlatformInfo(pPlatforms[i], aPlatformInfo))
697 aPlatforms.push_back(aPlatformInfo);
700 return aPlatforms;
703 namespace {
705 cl_device_id findDeviceIdByDeviceString(std::u16string_view rString, const std::vector<OpenCLPlatformInfo>& rPlatforms)
707 for (const OpenCLPlatformInfo& rPlatform : rPlatforms)
709 for (const OpenCLDeviceInfo& rDeviceInfo : rPlatform.maDevices)
711 OUString aDeviceId = rDeviceInfo.maVendor + " " + rDeviceInfo.maName;
712 if (rString == aDeviceId)
714 return rDeviceInfo.device;
719 return nullptr;
722 void findDeviceInfoFromDeviceId(cl_device_id aDeviceId, size_t& rDeviceId, size_t& rPlatformId)
724 cl_platform_id platformId;
725 cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_PLATFORM,
726 sizeof(platformId), &platformId, nullptr);
728 if(nState != CL_SUCCESS)
729 return;
731 const std::vector<OpenCLPlatformInfo>& rPlatforms = fillOpenCLInfo();
732 for(size_t i = 0; i < rPlatforms.size(); ++i)
734 cl_platform_id platId = rPlatforms[i].platform;
735 if(platId != platformId)
736 continue;
738 for(size_t j = 0; j < rPlatforms[i].maDevices.size(); ++j)
740 cl_device_id id = rPlatforms[i].maDevices[j].device;
741 if(id == aDeviceId)
743 rDeviceId = j;
744 rPlatformId = i;
745 return;
753 bool canUseOpenCL()
755 if( const char* env = getenv( "SC_FORCE_CALCULATION" ))
757 if( strcmp( env, "opencl" ) == 0 )
758 return true;
760 return !getenv("SAL_DISABLE_OPENCL") && officecfg::Office::Common::Misc::UseOpenCL::get();
763 bool switchOpenCLDevice(const OUString* pDevice, bool bAutoSelect, bool bForceEvaluation, OUString& rOutSelectedDeviceVersionIDString)
765 if (!canUseOpenCL() || fillOpenCLInfo().empty())
766 return false;
768 cl_device_id pDeviceId = nullptr;
769 if(pDevice)
770 pDeviceId = findDeviceIdByDeviceString(*pDevice, fillOpenCLInfo());
772 if(!pDeviceId || bAutoSelect)
774 int status = clewInit(OPENCL_DLL_NAME);
775 if (status < 0)
776 return false;
778 OUString url(OStringToOUString(getCacheFolder(), RTL_TEXTENCODING_UTF8));
779 OUString path;
780 osl::FileBase::getSystemPathFromFileURL(url,path);
781 ds_device aSelectedDevice = getDeviceSelection(path, bForceEvaluation);
782 if ( aSelectedDevice.eType != DeviceType::OpenCLDevice)
783 return false;
784 pDeviceId = aSelectedDevice.aDeviceID;
787 if(gpuEnv.mpDevID == pDeviceId)
789 // we don't need to change anything
790 // still the same device
791 return pDeviceId != nullptr;
794 cl_context context;
795 cl_platform_id platformId;
798 OpenCLZone zone;
799 cl_int nState = clGetDeviceInfo(pDeviceId, CL_DEVICE_PLATFORM,
800 sizeof(platformId), &platformId, nullptr);
802 cl_context_properties cps[3];
803 cps[0] = CL_CONTEXT_PLATFORM;
804 cps[1] = reinterpret_cast<cl_context_properties>(platformId);
805 cps[2] = 0;
806 context = clCreateContext( cps, 1, &pDeviceId, nullptr, nullptr, &nState );
807 if (nState != CL_SUCCESS)
808 SAL_WARN("opencl", "clCreateContext failed: " << errorString(nState));
810 if(nState != CL_SUCCESS || context == nullptr)
812 if(context != nullptr)
813 clReleaseContext(context);
815 SAL_WARN("opencl", "failed to set/switch opencl device");
816 return false;
818 SAL_INFO("opencl", "Created context " << context << " for platform " << platformId << ", device " << pDeviceId);
820 OString sDeviceID = getDeviceInfoString(pDeviceId, CL_DEVICE_VENDOR) + " " + getDeviceInfoString(pDeviceId, CL_DRIVER_VERSION);
821 rOutSelectedDeviceVersionIDString = OStringToOUString(sDeviceID, RTL_TEXTENCODING_UTF8);
824 setOpenCLCmdQueuePosition(0); // Call this just to avoid the method being deleted from unused function deleter.
826 releaseOpenCLEnv(&gpuEnv);
828 OpenCLEnv env;
829 env.mpOclPlatformID = platformId;
830 env.mpOclContext = context;
831 env.mpOclDevsID = pDeviceId;
833 initOpenCLAttr(&env);
835 return !initOpenCLRunEnv(0);
838 void getOpenCLDeviceInfo(size_t& rDeviceId, size_t& rPlatformId)
840 if (!canUseOpenCL())
841 return;
843 int status = clewInit(OPENCL_DLL_NAME);
844 if (status < 0)
845 return;
847 cl_device_id id = gpuEnv.mpDevID;
848 findDeviceInfoFromDeviceId(id, rDeviceId, rPlatformId);
851 void getOpenCLDeviceName(OUString& rDeviceName, OUString& rPlatformName)
853 if (!canUseOpenCL())
854 return;
856 int status = clewInit(OPENCL_DLL_NAME);
857 if (status < 0)
858 return;
860 cl_device_id deviceId = gpuEnv.mpDevID;
861 cl_platform_id platformId;
862 if( clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM, sizeof(platformId), &platformId, nullptr) != CL_SUCCESS )
863 return;
865 char deviceName[DEVICE_NAME_LENGTH] = {0};
866 if( clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(deviceName), deviceName, nullptr) != CL_SUCCESS )
867 return;
868 char platformName[64];
869 if( clGetPlatformInfo(platformId, CL_PLATFORM_NAME, 64, platformName, nullptr) != CL_SUCCESS )
870 return;
871 rDeviceName = OUString::createFromAscii(deviceName);
872 rPlatformName = OUString::createFromAscii(platformName);
875 void setOpenCLCmdQueuePosition( int nPos )
877 if (nPos < 0 || nPos >= OPENCL_CMDQUEUE_SIZE)
878 // Out of range. Ignore this.
879 return;
881 gpuEnv.mnCmdQueuePos = nPos;
884 const char* errorString(cl_int nError)
886 #define CASE(val) case CL_##val: return #val
887 switch (nError)
889 CASE(SUCCESS);
890 CASE(DEVICE_NOT_FOUND);
891 CASE(DEVICE_NOT_AVAILABLE);
892 CASE(COMPILER_NOT_AVAILABLE);
893 CASE(MEM_OBJECT_ALLOCATION_FAILURE);
894 CASE(OUT_OF_RESOURCES);
895 CASE(OUT_OF_HOST_MEMORY);
896 CASE(PROFILING_INFO_NOT_AVAILABLE);
897 CASE(MEM_COPY_OVERLAP);
898 CASE(IMAGE_FORMAT_MISMATCH);
899 CASE(IMAGE_FORMAT_NOT_SUPPORTED);
900 CASE(BUILD_PROGRAM_FAILURE);
901 CASE(MAP_FAILURE);
902 CASE(INVALID_VALUE);
903 CASE(INVALID_DEVICE_TYPE);
904 CASE(INVALID_PLATFORM);
905 CASE(INVALID_DEVICE);
906 CASE(INVALID_CONTEXT);
907 CASE(INVALID_QUEUE_PROPERTIES);
908 CASE(INVALID_COMMAND_QUEUE);
909 CASE(INVALID_HOST_PTR);
910 CASE(INVALID_MEM_OBJECT);
911 CASE(INVALID_IMAGE_FORMAT_DESCRIPTOR);
912 CASE(INVALID_IMAGE_SIZE);
913 CASE(INVALID_SAMPLER);
914 CASE(INVALID_BINARY);
915 CASE(INVALID_BUILD_OPTIONS);
916 CASE(INVALID_PROGRAM);
917 CASE(INVALID_PROGRAM_EXECUTABLE);
918 CASE(INVALID_KERNEL_NAME);
919 CASE(INVALID_KERNEL_DEFINITION);
920 CASE(INVALID_KERNEL);
921 CASE(INVALID_ARG_INDEX);
922 CASE(INVALID_ARG_VALUE);
923 CASE(INVALID_ARG_SIZE);
924 CASE(INVALID_KERNEL_ARGS);
925 CASE(INVALID_WORK_DIMENSION);
926 CASE(INVALID_WORK_GROUP_SIZE);
927 CASE(INVALID_WORK_ITEM_SIZE);
928 CASE(INVALID_GLOBAL_OFFSET);
929 CASE(INVALID_EVENT_WAIT_LIST);
930 CASE(INVALID_EVENT);
931 CASE(INVALID_OPERATION);
932 CASE(INVALID_GL_OBJECT);
933 CASE(INVALID_BUFFER_SIZE);
934 CASE(INVALID_MIP_LEVEL);
935 CASE(INVALID_GLOBAL_WORK_SIZE);
936 default:
937 return "Unknown OpenCL error code";
939 #undef CASE
942 bool GPUEnv::isOpenCLEnabled()
944 return gpuEnv.mpDevID && gpuEnv.mpContext;
949 void releaseOpenCLEnv( openclwrapper::GPUEnv *gpuInfo )
951 OpenCLZone zone;
953 if ( !bIsInited )
955 return;
958 for (_cl_command_queue* & i : openclwrapper::gpuEnv.mpCmdQueue)
960 if (i)
962 clReleaseCommandQueue(i);
963 i = nullptr;
966 openclwrapper::gpuEnv.mnCmdQueuePos = 0;
968 if ( openclwrapper::gpuEnv.mpContext )
970 clReleaseContext( openclwrapper::gpuEnv.mpContext );
971 openclwrapper::gpuEnv.mpContext = nullptr;
973 bIsInited = false;
974 gpuInfo->mnIsUserCreated = 0;
977 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */