nss: upgrade to release 3.73
[LibreOffice.git] / opencl / source / openclwrapper.cxx
blobc7ac9525d620ca029f6e94bccee3c0a3e786a42c
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <config_folders.h>
12 #include <opencl_device.hxx>
13 #include <opencl_device_selection.h>
15 #include <opencl/openclconfig.hxx>
16 #include <opencl/openclwrapper.hxx>
17 #include <opencl/platforminfo.hxx>
18 #include <osl/file.hxx>
19 #include <rtl/bootstrap.hxx>
20 #include <rtl/digest.h>
21 #include <rtl/strbuf.hxx>
22 #include <rtl/ustring.hxx>
23 #include <sal/config.h>
24 #include <sal/log.hxx>
25 #include <opencl/OpenCLZone.hxx>
27 #include <memory>
29 #include <stdlib.h>
31 #include <officecfg/Office/Common.hxx>
33 #ifdef _WIN32
34 #include <prewin.h>
35 #include <postwin.h>
36 #define OPENCL_DLL_NAME "OpenCL.dll"
37 #elif defined(MACOSX)
38 #define OPENCL_DLL_NAME nullptr
39 #else
40 #define OPENCL_DLL_NAME "libOpenCL.so.1"
41 #endif
43 #ifdef _WIN32_WINNT_WINBLUE
44 #include <VersionHelpers.h>
45 #endif
47 #define DEVICE_NAME_LENGTH 1024
48 #define DRIVER_VERSION_LENGTH 1024
49 #define PLATFORM_VERSION_LENGTH 1024
51 #define CHECK_OPENCL(status,name) \
52 if( status != CL_SUCCESS ) \
53 { \
54 SAL_WARN( "opencl", "OpenCL error code " << status << " at " SAL_DETAIL_WHERE "from " name ); \
55 return false; \
58 using namespace std;
60 namespace {
62 bool bIsInited = false;
66 namespace openclwrapper {
68 GPUEnv gpuEnv;
69 sal_uInt64 kernelFailures = 0;
71 namespace
74 OString generateMD5(const void* pData, size_t length)
76 sal_uInt8 pBuffer[RTL_DIGEST_LENGTH_MD5];
77 rtlDigestError aError = rtl_digest_MD5(pData, length,
78 pBuffer, RTL_DIGEST_LENGTH_MD5);
79 SAL_WARN_IF(aError != rtl_Digest_E_None, "opencl", "md5 generation failed");
81 OStringBuffer aBuffer;
82 const char* const pString = "0123456789ABCDEF";
83 for(sal_uInt8 val : pBuffer)
85 aBuffer.append(pString[val/16]);
86 aBuffer.append(pString[val%16]);
88 return aBuffer.makeStringAndClear();
91 OString const & getCacheFolder()
93 static OString const aCacheFolder = [&]()
95 OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
96 rtl::Bootstrap::expandMacros(url);
98 osl::Directory::create(url);
100 return OUStringToOString(url, RTL_TEXTENCODING_UTF8);
101 }();
102 return aCacheFolder;
107 static bool initializeCommandQueue(GPUEnv& aGpuEnv)
109 OpenCLZone zone;
111 cl_int nState;
112 cl_command_queue command_queue[OPENCL_CMDQUEUE_SIZE];
114 for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
116 command_queue[i] = clCreateCommandQueue(aGpuEnv.mpContext, aGpuEnv.mpDevID, 0, &nState);
117 if (nState != CL_SUCCESS)
118 SAL_WARN("opencl", "clCreateCommandQueue failed: " << errorString(nState));
120 if (command_queue[i] == nullptr || nState != CL_SUCCESS)
122 // Release all command queues created so far.
123 for (int j = 0; j <= i; ++j)
125 if (command_queue[j])
127 clReleaseCommandQueue(command_queue[j]);
128 command_queue[j] = nullptr;
132 clReleaseContext(aGpuEnv.mpContext);
133 SAL_WARN("opencl", "failed to set/switch opencl device");
134 return false;
137 SAL_INFO("opencl", "Created command queue " << command_queue[i] << " for context " << aGpuEnv.mpContext);
140 for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
142 aGpuEnv.mpCmdQueue[i] = command_queue[i];
144 aGpuEnv.mbCommandQueueInitialized = true;
145 return true;
148 void setKernelEnv( KernelEnv *envInfo )
150 if (!gpuEnv.mbCommandQueueInitialized)
152 initializeCommandQueue(gpuEnv);
155 envInfo->mpkContext = gpuEnv.mpContext;
156 envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
158 assert(gpuEnv.mnCmdQueuePos < OPENCL_CMDQUEUE_SIZE);
159 envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue[gpuEnv.mnCmdQueuePos];
162 namespace {
164 OString createFileName(cl_device_id deviceId, const char* clFileName)
166 OString fileName(clFileName);
167 sal_Int32 nIndex = fileName.lastIndexOf(".cl");
168 if(nIndex > 0)
169 fileName = fileName.copy(0, nIndex);
171 char deviceName[DEVICE_NAME_LENGTH] = {0};
172 clGetDeviceInfo(deviceId, CL_DEVICE_NAME,
173 sizeof(deviceName), deviceName, nullptr);
175 char driverVersion[DRIVER_VERSION_LENGTH] = {0};
176 clGetDeviceInfo(deviceId, CL_DRIVER_VERSION,
177 sizeof(driverVersion), driverVersion, nullptr);
179 cl_platform_id platformId;
180 clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM,
181 sizeof(platformId), &platformId, nullptr);
183 char platformVersion[PLATFORM_VERSION_LENGTH] = {0};
184 clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, sizeof(platformVersion),
185 platformVersion, nullptr);
187 // create hash for deviceName + driver version + platform version
188 OString aString = OString::Concat(deviceName) + driverVersion + platformVersion;
189 OString aHash = generateMD5(aString.getStr(), aString.getLength());
191 return getCacheFolder() + fileName + "-" + aHash + ".bin";
194 std::vector<std::shared_ptr<osl::File> > binaryGenerated( const char * clFileName, cl_context context )
196 size_t numDevices=0;
198 std::vector<std::shared_ptr<osl::File> > aGeneratedFiles;
199 cl_int clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
200 0, nullptr, &numDevices );
201 numDevices /= sizeof(numDevices);
203 if(clStatus != CL_SUCCESS)
204 return aGeneratedFiles;
206 assert(numDevices == 1);
208 // grab the handle to the device in the context.
209 cl_device_id pDevID;
210 clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
211 sizeof( cl_device_id ), &pDevID, nullptr );
213 if(clStatus != CL_SUCCESS)
214 return aGeneratedFiles;
216 assert(pDevID == gpuEnv.mpDevID);
218 OString fileName = createFileName(gpuEnv.mpDevID, clFileName);
219 auto pNewFile = std::make_shared<osl::File>(OStringToOUString(fileName, RTL_TEXTENCODING_UTF8));
220 if(pNewFile->open(osl_File_OpenFlag_Read) == osl::FileBase::E_None)
222 aGeneratedFiles.push_back(pNewFile);
223 SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: success");
225 else
227 SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: FAIL");
230 return aGeneratedFiles;
233 bool writeBinaryToFile( const OString& rFileName, const char* binary, size_t numBytes )
235 osl::File file(OStringToOUString(rFileName, RTL_TEXTENCODING_UTF8));
236 osl::FileBase::RC status = file.open(
237 osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
239 if(status != osl::FileBase::E_None)
240 return false;
242 sal_uInt64 nBytesWritten = 0;
243 file.write( binary, numBytes, nBytesWritten );
245 assert(numBytes == nBytesWritten);
247 return true;
252 bool generatBinFromKernelSource( cl_program program, const char * clFileName )
254 cl_uint numDevices;
256 cl_int clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES,
257 sizeof(numDevices), &numDevices, nullptr );
258 CHECK_OPENCL( clStatus, "clGetProgramInfo" );
260 assert(numDevices == 1);
262 cl_device_id pDevID;
263 /* grab the handle to the device in the program. */
264 clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES,
265 sizeof(cl_device_id), &pDevID, nullptr );
266 CHECK_OPENCL( clStatus, "clGetProgramInfo" );
268 /* figure out the size of the binary. */
269 size_t binarySize;
271 clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES,
272 sizeof(size_t), &binarySize, nullptr );
273 CHECK_OPENCL( clStatus, "clGetProgramInfo" );
275 /* copy over the generated binary. */
276 if ( binarySize != 0 )
278 std::unique_ptr<char[]> binary(new char[binarySize]);
279 clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES,
280 sizeof(char *), &binary, nullptr );
281 CHECK_OPENCL(clStatus,"clGetProgramInfo");
283 OString fileName = createFileName(pDevID, clFileName);
284 if ( !writeBinaryToFile( fileName,
285 binary.get(), binarySize ) )
286 SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': FAIL");
287 else
288 SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': success");
290 return true;
293 namespace {
295 struct OpenCLEnv
297 cl_platform_id mpOclPlatformID;
298 cl_context mpOclContext;
299 cl_device_id mpOclDevsID;
302 bool initOpenCLAttr( OpenCLEnv * env )
304 if ( gpuEnv.mnIsUserCreated )
305 return true;
307 gpuEnv.mpContext = env->mpOclContext;
308 gpuEnv.mpPlatformID = env->mpOclPlatformID;
309 gpuEnv.mpDevID = env->mpOclDevsID;
311 gpuEnv.mnIsUserCreated = 1;
313 gpuEnv.mbCommandQueueInitialized = false;
315 gpuEnv.mnCmdQueuePos = 0; // default to 0.
317 return false;
320 bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
322 cl_int clStatus;
323 //char options[512];
324 // create a cl program executable for all the devices specified
325 clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &gpuInfo->mpDevID,
326 buildOption, nullptr, nullptr);
328 if ( clStatus != CL_SUCCESS )
330 size_t length;
331 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
332 CL_PROGRAM_BUILD_LOG, 0, nullptr, &length);
333 if ( clStatus != CL_SUCCESS )
335 return false;
338 std::unique_ptr<char[]> buildLog(new char[length]);
339 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
340 CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
341 if ( clStatus != CL_SUCCESS )
343 return false;
346 OString aBuildLogFileURL = getCacheFolder() + "kernel-build.log";
347 osl::File aBuildLogFile(OStringToOUString(aBuildLogFileURL, RTL_TEXTENCODING_UTF8));
348 osl::FileBase::RC status = aBuildLogFile.open(
349 osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
351 if(status != osl::FileBase::E_None)
352 return false;
354 sal_uInt64 nBytesWritten = 0;
355 aBuildLogFile.write( buildLog.get(), length, nBytesWritten );
357 return false;
360 return true;
365 bool buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx)
367 size_t numDevices;
368 cl_int clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
369 0, nullptr, &numDevices );
370 numDevices /= sizeof(numDevices);
371 CHECK_OPENCL( clStatus, "clGetContextInfo" );
373 std::vector<std::shared_ptr<osl::File> > aGeneratedFiles = binaryGenerated(
374 filename, gpuInfo->mpContext );
376 if (aGeneratedFiles.size() == numDevices)
378 std::unique_ptr<size_t[]> length(new size_t[numDevices]);
379 std::unique_ptr<unsigned char*[]> pBinary(new unsigned char*[numDevices]);
380 for(size_t i = 0; i < numDevices; ++i)
382 sal_uInt64 nSize;
383 aGeneratedFiles[i]->getSize(nSize);
384 unsigned char* binary = new unsigned char[nSize];
385 sal_uInt64 nBytesRead;
386 aGeneratedFiles[i]->read(binary, nSize, nBytesRead);
387 if(nSize != nBytesRead)
388 assert(false);
390 length[i] = nBytesRead;
392 pBinary[i] = binary;
395 // grab the handles to all of the devices in the context.
396 std::unique_ptr<cl_device_id[]> pArryDevsID(new cl_device_id[numDevices]);
397 clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
398 sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), nullptr );
400 if(clStatus != CL_SUCCESS)
402 for(size_t i = 0; i < numDevices; ++i)
404 delete[] pBinary[i];
406 return false;
409 cl_int binary_status;
411 gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
412 pArryDevsID.get(), length.get(), const_cast<const unsigned char**>(pBinary.get()),
413 &binary_status, &clStatus );
414 if(clStatus != CL_SUCCESS)
416 // something went wrong, fall back to compiling from source
417 return false;
419 SAL_INFO("opencl", "Created program " << gpuInfo->mpArryPrograms[idx] << " from binary");
420 for(size_t i = 0; i < numDevices; ++i)
422 delete[] pBinary[i];
426 if ( !gpuInfo->mpArryPrograms[idx] )
428 return false;
430 return buildProgram(buildOption, gpuInfo, idx);
433 namespace {
435 void checkDeviceForDoubleSupport(cl_device_id deviceId, bool& bKhrFp64, bool& bAmdFp64)
437 OpenCLZone zone;
439 bKhrFp64 = false;
440 bAmdFp64 = false;
442 // Check device extensions for double type
443 size_t aDevExtInfoSize = 0;
445 cl_uint clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, 0, nullptr, &aDevExtInfoSize );
446 if( clStatus != CL_SUCCESS )
447 return;
449 std::unique_ptr<char[]> pExtInfo(new char[aDevExtInfoSize]);
451 clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS,
452 sizeof(char) * aDevExtInfoSize, pExtInfo.get(), nullptr);
454 if( clStatus != CL_SUCCESS )
455 return;
457 if ( strstr( pExtInfo.get(), "cl_khr_fp64" ) )
459 bKhrFp64 = true;
461 else
463 // Check if cl_amd_fp64 extension is supported
464 if ( strstr( pExtInfo.get(), "cl_amd_fp64" ) )
465 bAmdFp64 = true;
469 bool initOpenCLRunEnv( GPUEnv *gpuInfo )
471 OpenCLZone zone;
472 cl_uint nPreferredVectorWidthFloat;
473 char pName[64];
475 bool bKhrFp64 = false;
476 bool bAmdFp64 = false;
478 checkDeviceForDoubleSupport(gpuInfo->mpDevID, bKhrFp64, bAmdFp64);
480 gpuInfo->mnKhrFp64Flag = bKhrFp64;
481 gpuInfo->mnAmdFp64Flag = bAmdFp64;
483 gpuInfo->mbNeedsTDRAvoidance = false;
485 clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint),
486 &nPreferredVectorWidthFloat, nullptr);
487 SAL_INFO("opencl", "CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT=" << nPreferredVectorWidthFloat);
489 clGetPlatformInfo(gpuInfo->mpPlatformID, CL_PLATFORM_NAME, 64,
490 pName, nullptr);
492 #if defined (_WIN32)
493 // the Win32 SDK 8.1 deprecates GetVersionEx()
494 # ifdef _WIN32_WINNT_WINBLUE
495 const bool bIsNotWinOrIsWin8OrGreater = IsWindows8OrGreater();
496 # else
497 bool bIsNotWinOrIsWin8OrGreater = true;
498 OSVERSIONINFOW aVersionInfo = {};
499 aVersionInfo.dwOSVersionInfoSize = sizeof( aVersionInfo );
500 if (GetVersionExW( &aVersionInfo ))
502 // Windows 7 or lower?
503 if (aVersionInfo.dwMajorVersion < 6 ||
504 (aVersionInfo.dwMajorVersion == 6 && aVersionInfo.dwMinorVersion < 2))
505 bIsNotWinOrIsWin8OrGreater = false;
507 # endif
508 #else
509 const bool bIsNotWinOrIsWin8OrGreater = true;
510 #endif
512 // Heuristic: Certain old low-end OpenCL implementations don't
513 // work for us with too large group lengths. Looking at the preferred
514 // float vector width seems to be a way to detect these devices, except
515 // the non-working NVIDIA cards on Windows older than version 8.
516 gpuInfo->mbNeedsTDRAvoidance = ( nPreferredVectorWidthFloat == 4 ) ||
517 ( !bIsNotWinOrIsWin8OrGreater &&
518 OUString::createFromAscii(pName).indexOf("NVIDIA") > -1 );
520 size_t nMaxParameterSize;
521 clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof(size_t),
522 &nMaxParameterSize, nullptr);
523 SAL_INFO("opencl", "CL_DEVICE_MAX_PARAMETER_SIZE=" << nMaxParameterSize);
525 return false;
528 bool initOpenCLRunEnv( int argc )
530 if ( ( argc > MAX_CLFILE_NUM ) || ( argc < 0 ) )
531 return true;
533 if ( !bIsInited )
535 if ( !gpuEnv.mnIsUserCreated )
536 memset( &gpuEnv, 0, sizeof(gpuEnv) );
538 //initialize devices, context, command_queue
539 bool status = initOpenCLRunEnv( &gpuEnv );
540 if ( status )
542 return true;
544 //initialize program, kernelName, kernelCount
545 if( getenv( "SC_FLOAT" ) )
547 gpuEnv.mnKhrFp64Flag = false;
548 gpuEnv.mnAmdFp64Flag = false;
550 if( gpuEnv.mnKhrFp64Flag )
552 SAL_INFO("opencl", "Use Khr double");
554 else if( gpuEnv.mnAmdFp64Flag )
556 SAL_INFO("opencl", "Use AMD double type");
558 else
560 SAL_INFO("opencl", "USE float type");
562 bIsInited = true;
564 return false;
567 // based on crashes and hanging during kernel compilation
568 void createDeviceInfo(cl_device_id aDeviceId, OpenCLPlatformInfo& rPlatformInfo)
570 OpenCLDeviceInfo aDeviceInfo;
571 aDeviceInfo.device = aDeviceId;
573 char pName[DEVICE_NAME_LENGTH];
574 cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_NAME, DEVICE_NAME_LENGTH, pName, nullptr);
575 if(nState != CL_SUCCESS)
576 return;
578 aDeviceInfo.maName = OUString::createFromAscii(pName);
580 char pVendor[DEVICE_NAME_LENGTH];
581 nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_VENDOR, DEVICE_NAME_LENGTH, pVendor, nullptr);
582 if(nState != CL_SUCCESS)
583 return;
585 aDeviceInfo.maVendor = OUString::createFromAscii(pVendor);
587 cl_ulong nMemSize;
588 nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(nMemSize), &nMemSize, nullptr);
589 if(nState != CL_SUCCESS)
590 return;
592 aDeviceInfo.mnMemory = nMemSize;
594 cl_uint nClockFrequency;
595 nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(nClockFrequency), &nClockFrequency, nullptr);
596 if(nState != CL_SUCCESS)
597 return;
599 aDeviceInfo.mnFrequency = nClockFrequency;
601 cl_uint nComputeUnits;
602 nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(nComputeUnits), &nComputeUnits, nullptr);
603 if(nState != CL_SUCCESS)
604 return;
606 char pDriver[DEVICE_NAME_LENGTH];
607 nState = clGetDeviceInfo(aDeviceId, CL_DRIVER_VERSION, DEVICE_NAME_LENGTH, pDriver, nullptr);
609 if(nState != CL_SUCCESS)
610 return;
612 aDeviceInfo.maDriver = OUString::createFromAscii(pDriver);
614 bool bKhrFp64 = false;
615 bool bAmdFp64 = false;
616 checkDeviceForDoubleSupport(aDeviceId, bKhrFp64, bAmdFp64);
618 // only list devices that support double
619 if(!bKhrFp64 && !bAmdFp64)
620 return;
622 aDeviceInfo.mnComputeUnits = nComputeUnits;
624 if(!OpenCLConfig::get().checkImplementation(rPlatformInfo, aDeviceInfo))
625 rPlatformInfo.maDevices.push_back(aDeviceInfo);
628 bool createPlatformInfo(cl_platform_id nPlatformId, OpenCLPlatformInfo& rPlatformInfo)
630 rPlatformInfo.platform = nPlatformId;
631 char pName[64];
632 cl_int nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_NAME, 64,
633 pName, nullptr);
634 if(nState != CL_SUCCESS)
635 return false;
636 rPlatformInfo.maName = OUString::createFromAscii(pName);
638 char pVendor[64];
639 nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_VENDOR, 64,
640 pVendor, nullptr);
641 if(nState != CL_SUCCESS)
642 return false;
644 rPlatformInfo.maVendor = OUString::createFromAscii(pVendor);
646 cl_uint nDevices;
647 nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, 0, nullptr, &nDevices);
648 if(nState != CL_SUCCESS)
649 return false;
651 // memory leak that does not matter
652 // memory is stored in static variable that lives through the whole program
653 cl_device_id* pDevices = new cl_device_id[nDevices];
654 nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, nDevices, pDevices, nullptr);
655 if(nState != CL_SUCCESS)
656 return false;
658 for(size_t i = 0; i < nDevices; ++i)
660 createDeviceInfo(pDevices[i], rPlatformInfo);
663 return true;
668 const std::vector<OpenCLPlatformInfo>& fillOpenCLInfo()
670 static std::vector<OpenCLPlatformInfo> aPlatforms;
672 // return early if we already initialized or can't use OpenCL
673 if (!aPlatforms.empty() || !canUseOpenCL())
674 return aPlatforms;
676 int status = clewInit(OPENCL_DLL_NAME);
677 if (status < 0)
678 return aPlatforms;
680 cl_uint nPlatforms;
681 cl_int nState = clGetPlatformIDs(0, nullptr, &nPlatforms);
683 if(nState != CL_SUCCESS)
684 return aPlatforms;
686 // memory leak that does not matter,
687 // memory is stored in static instance aPlatforms
688 cl_platform_id* pPlatforms = new cl_platform_id[nPlatforms];
689 nState = clGetPlatformIDs(nPlatforms, pPlatforms, nullptr);
691 if(nState != CL_SUCCESS)
692 return aPlatforms;
694 for(size_t i = 0; i < nPlatforms; ++i)
696 OpenCLPlatformInfo aPlatformInfo;
697 if(createPlatformInfo(pPlatforms[i], aPlatformInfo))
698 aPlatforms.push_back(aPlatformInfo);
701 return aPlatforms;
704 namespace {
706 cl_device_id findDeviceIdByDeviceString(const OUString& rString, const std::vector<OpenCLPlatformInfo>& rPlatforms)
708 for (const OpenCLPlatformInfo& rPlatform : rPlatforms)
710 for (const OpenCLDeviceInfo& rDeviceInfo : rPlatform.maDevices)
712 OUString aDeviceId = rDeviceInfo.maVendor + " " + rDeviceInfo.maName;
713 if (rString == aDeviceId)
715 return rDeviceInfo.device;
720 return nullptr;
723 void findDeviceInfoFromDeviceId(cl_device_id aDeviceId, size_t& rDeviceId, size_t& rPlatformId)
725 cl_platform_id platformId;
726 cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_PLATFORM,
727 sizeof(platformId), &platformId, nullptr);
729 if(nState != CL_SUCCESS)
730 return;
732 const std::vector<OpenCLPlatformInfo>& rPlatforms = fillOpenCLInfo();
733 for(size_t i = 0; i < rPlatforms.size(); ++i)
735 cl_platform_id platId = rPlatforms[i].platform;
736 if(platId != platformId)
737 continue;
739 for(size_t j = 0; j < rPlatforms[i].maDevices.size(); ++j)
741 cl_device_id id = rPlatforms[i].maDevices[j].device;
742 if(id == aDeviceId)
744 rDeviceId = j;
745 rPlatformId = i;
746 return;
754 bool canUseOpenCL()
756 if( const char* env = getenv( "SC_FORCE_CALCULATION" ))
758 if( strcmp( env, "opencl" ) == 0 )
759 return true;
761 return !getenv("SAL_DISABLE_OPENCL") && officecfg::Office::Common::Misc::UseOpenCL::get();
764 bool switchOpenCLDevice(const OUString* pDevice, bool bAutoSelect, bool bForceEvaluation, OUString& rOutSelectedDeviceVersionIDString)
766 if (!canUseOpenCL() || fillOpenCLInfo().empty())
767 return false;
769 cl_device_id pDeviceId = nullptr;
770 if(pDevice)
771 pDeviceId = findDeviceIdByDeviceString(*pDevice, fillOpenCLInfo());
773 if(!pDeviceId || bAutoSelect)
775 int status = clewInit(OPENCL_DLL_NAME);
776 if (status < 0)
777 return false;
779 OUString url(OStringToOUString(getCacheFolder(), RTL_TEXTENCODING_UTF8));
780 OUString path;
781 osl::FileBase::getSystemPathFromFileURL(url,path);
782 ds_device aSelectedDevice = getDeviceSelection(path, bForceEvaluation);
783 if ( aSelectedDevice.eType != DeviceType::OpenCLDevice)
784 return false;
785 pDeviceId = aSelectedDevice.aDeviceID;
788 if(gpuEnv.mpDevID == pDeviceId)
790 // we don't need to change anything
791 // still the same device
792 return pDeviceId != nullptr;
795 cl_context context;
796 cl_platform_id platformId;
799 OpenCLZone zone;
800 cl_int nState = clGetDeviceInfo(pDeviceId, CL_DEVICE_PLATFORM,
801 sizeof(platformId), &platformId, nullptr);
803 cl_context_properties cps[3];
804 cps[0] = CL_CONTEXT_PLATFORM;
805 cps[1] = reinterpret_cast<cl_context_properties>(platformId);
806 cps[2] = 0;
807 context = clCreateContext( cps, 1, &pDeviceId, nullptr, nullptr, &nState );
808 if (nState != CL_SUCCESS)
809 SAL_WARN("opencl", "clCreateContext failed: " << errorString(nState));
811 if(nState != CL_SUCCESS || context == nullptr)
813 if(context != nullptr)
814 clReleaseContext(context);
816 SAL_WARN("opencl", "failed to set/switch opencl device");
817 return false;
819 SAL_INFO("opencl", "Created context " << context << " for platform " << platformId << ", device " << pDeviceId);
821 OString sDeviceID = getDeviceInfoString(pDeviceId, CL_DEVICE_VENDOR) + " " + getDeviceInfoString(pDeviceId, CL_DRIVER_VERSION);
822 rOutSelectedDeviceVersionIDString = OStringToOUString(sDeviceID, RTL_TEXTENCODING_UTF8);
825 setOpenCLCmdQueuePosition(0); // Call this just to avoid the method being deleted from unused function deleter.
827 releaseOpenCLEnv(&gpuEnv);
829 OpenCLEnv env;
830 env.mpOclPlatformID = platformId;
831 env.mpOclContext = context;
832 env.mpOclDevsID = pDeviceId;
834 initOpenCLAttr(&env);
836 return !initOpenCLRunEnv(0);
839 void getOpenCLDeviceInfo(size_t& rDeviceId, size_t& rPlatformId)
841 if (!canUseOpenCL())
842 return;
844 int status = clewInit(OPENCL_DLL_NAME);
845 if (status < 0)
846 return;
848 cl_device_id id = gpuEnv.mpDevID;
849 findDeviceInfoFromDeviceId(id, rDeviceId, rPlatformId);
852 void getOpenCLDeviceName(OUString& rDeviceName, OUString& rPlatformName)
854 if (!canUseOpenCL())
855 return;
857 int status = clewInit(OPENCL_DLL_NAME);
858 if (status < 0)
859 return;
861 cl_device_id deviceId = gpuEnv.mpDevID;
862 cl_platform_id platformId;
863 if( clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM, sizeof(platformId), &platformId, nullptr) != CL_SUCCESS )
864 return;
866 char deviceName[DEVICE_NAME_LENGTH] = {0};
867 if( clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(deviceName), deviceName, nullptr) != CL_SUCCESS )
868 return;
869 char platformName[64];
870 if( clGetPlatformInfo(platformId, CL_PLATFORM_NAME, 64, platformName, nullptr) != CL_SUCCESS )
871 return;
872 rDeviceName = OUString::createFromAscii(deviceName);
873 rPlatformName = OUString::createFromAscii(platformName);
876 void setOpenCLCmdQueuePosition( int nPos )
878 if (nPos < 0 || nPos >= OPENCL_CMDQUEUE_SIZE)
879 // Out of range. Ignore this.
880 return;
882 gpuEnv.mnCmdQueuePos = nPos;
885 const char* errorString(cl_int nError)
887 #define CASE(val) case CL_##val: return #val
888 switch (nError)
890 CASE(SUCCESS);
891 CASE(DEVICE_NOT_FOUND);
892 CASE(DEVICE_NOT_AVAILABLE);
893 CASE(COMPILER_NOT_AVAILABLE);
894 CASE(MEM_OBJECT_ALLOCATION_FAILURE);
895 CASE(OUT_OF_RESOURCES);
896 CASE(OUT_OF_HOST_MEMORY);
897 CASE(PROFILING_INFO_NOT_AVAILABLE);
898 CASE(MEM_COPY_OVERLAP);
899 CASE(IMAGE_FORMAT_MISMATCH);
900 CASE(IMAGE_FORMAT_NOT_SUPPORTED);
901 CASE(BUILD_PROGRAM_FAILURE);
902 CASE(MAP_FAILURE);
903 CASE(INVALID_VALUE);
904 CASE(INVALID_DEVICE_TYPE);
905 CASE(INVALID_PLATFORM);
906 CASE(INVALID_DEVICE);
907 CASE(INVALID_CONTEXT);
908 CASE(INVALID_QUEUE_PROPERTIES);
909 CASE(INVALID_COMMAND_QUEUE);
910 CASE(INVALID_HOST_PTR);
911 CASE(INVALID_MEM_OBJECT);
912 CASE(INVALID_IMAGE_FORMAT_DESCRIPTOR);
913 CASE(INVALID_IMAGE_SIZE);
914 CASE(INVALID_SAMPLER);
915 CASE(INVALID_BINARY);
916 CASE(INVALID_BUILD_OPTIONS);
917 CASE(INVALID_PROGRAM);
918 CASE(INVALID_PROGRAM_EXECUTABLE);
919 CASE(INVALID_KERNEL_NAME);
920 CASE(INVALID_KERNEL_DEFINITION);
921 CASE(INVALID_KERNEL);
922 CASE(INVALID_ARG_INDEX);
923 CASE(INVALID_ARG_VALUE);
924 CASE(INVALID_ARG_SIZE);
925 CASE(INVALID_KERNEL_ARGS);
926 CASE(INVALID_WORK_DIMENSION);
927 CASE(INVALID_WORK_GROUP_SIZE);
928 CASE(INVALID_WORK_ITEM_SIZE);
929 CASE(INVALID_GLOBAL_OFFSET);
930 CASE(INVALID_EVENT_WAIT_LIST);
931 CASE(INVALID_EVENT);
932 CASE(INVALID_OPERATION);
933 CASE(INVALID_GL_OBJECT);
934 CASE(INVALID_BUFFER_SIZE);
935 CASE(INVALID_MIP_LEVEL);
936 CASE(INVALID_GLOBAL_WORK_SIZE);
937 default:
938 return "Unknown OpenCL error code";
940 #undef CASE
943 bool GPUEnv::isOpenCLEnabled()
945 return gpuEnv.mpDevID && gpuEnv.mpContext;
950 void releaseOpenCLEnv( openclwrapper::GPUEnv *gpuInfo )
952 OpenCLZone zone;
954 if ( !bIsInited )
956 return;
959 for (_cl_command_queue* & i : openclwrapper::gpuEnv.mpCmdQueue)
961 if (i)
963 clReleaseCommandQueue(i);
964 i = nullptr;
967 openclwrapper::gpuEnv.mnCmdQueuePos = 0;
969 if ( openclwrapper::gpuEnv.mpContext )
971 clReleaseContext( openclwrapper::gpuEnv.mpContext );
972 openclwrapper::gpuEnv.mpContext = nullptr;
974 bIsInited = false;
975 gpuInfo->mnIsUserCreated = 0;
978 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */