Version 5.2.6.1, tag libreoffice-5.2.6.1
[LibreOffice.git] / opencl / source / openclwrapper.cxx
blobdcc55d06d9a31f20bb3ff90932617c5c5f603ab7
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <config_folders.h>
12 #include "opencl_device.hxx"
14 #include <comphelper/string.hxx>
15 #include <opencl/openclconfig.hxx>
16 #include <opencl/openclwrapper.hxx>
17 #include <osl/file.hxx>
18 #include <rtl/bootstrap.hxx>
19 #include <rtl/digest.h>
20 #include <rtl/strbuf.hxx>
21 #include <rtl/ustring.hxx>
22 #include <sal/config.h>
23 #include <sal/log.hxx>
24 #include <opencl/OpenCLZone.hxx>
26 #include <memory>
27 #include <unicode/regex.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
33 #include <cmath>
35 #include <officecfg/Office/Common.hxx>
37 #ifdef _WIN32
38 #include <prewin.h>
39 #include <postwin.h>
40 #define OPENCL_DLL_NAME "OpenCL.dll"
41 #elif defined(MACOSX)
42 #define OPENCL_DLL_NAME nullptr
43 #else
44 #define OPENCL_DLL_NAME "libOpenCL.so.1"
45 #endif
47 #ifdef _WIN32_WINNT_WINBLUE
48 #include <VersionHelpers.h>
49 #endif
51 #define DEVICE_NAME_LENGTH 1024
52 #define DRIVER_VERSION_LENGTH 1024
53 #define PLATFORM_VERSION_LENGTH 1024
55 #define CHECK_OPENCL(status,name) \
56 if( status != CL_SUCCESS ) \
57 { \
58 SAL_WARN( "opencl", "OpenCL error code " << status << " at " SAL_DETAIL_WHERE " from " name ); \
59 return false; \
62 using namespace std;
64 namespace opencl {
66 GPUEnv gpuEnv;
67 sal_uInt64 kernelFailures = 0;
69 namespace
72 bool bIsInited = false;
74 OString generateMD5(const void* pData, size_t length)
76 sal_uInt8 pBuffer[RTL_DIGEST_LENGTH_MD5];
77 rtlDigestError aError = rtl_digest_MD5(pData, length,
78 pBuffer, RTL_DIGEST_LENGTH_MD5);
79 SAL_WARN_IF(aError != rtl_Digest_E_None, "opencl", "md5 generation failed");
81 OStringBuffer aBuffer;
82 const char* pString = "0123456789ABCDEF";
83 for(sal_uInt8 val : pBuffer)
85 aBuffer.append(pString[val/16]);
86 aBuffer.append(pString[val%16]);
88 return aBuffer.makeStringAndClear();
91 OString getCacheFolder()
93 static OString aCacheFolder;
95 if (aCacheFolder.isEmpty())
97 OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
98 rtl::Bootstrap::expandMacros(url);
100 osl::Directory::create(url);
102 aCacheFolder = rtl::OUStringToOString(url, RTL_TEXTENCODING_UTF8);
104 return aCacheFolder;
109 bool initializeCommandQueue(GPUEnv& aGpuEnv)
111 OpenCLZone zone;
113 cl_int nState;
114 cl_command_queue command_queue[OPENCL_CMDQUEUE_SIZE];
116 for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
118 command_queue[i] = clCreateCommandQueue(aGpuEnv.mpContext, aGpuEnv.mpDevID, 0, &nState);
119 if (nState != CL_SUCCESS)
120 SAL_WARN("opencl", "clCreateCommandQueue failed: " << errorString(nState));
122 if (command_queue[i] == nullptr || nState != CL_SUCCESS)
124 // Release all command queues created so far.
125 for (int j = 0; j <= i; ++j)
127 if (command_queue[j])
129 clReleaseCommandQueue(command_queue[j]);
130 command_queue[j] = nullptr;
134 clReleaseContext(aGpuEnv.mpContext);
135 SAL_WARN("opencl", "failed to set/switch opencl device");
136 return false;
139 SAL_INFO("opencl", "Created command queue " << command_queue[i] << " for context " << aGpuEnv.mpContext);
142 for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
144 aGpuEnv.mpCmdQueue[i] = command_queue[i];
146 aGpuEnv.mbCommandQueueInitialized = true;
147 return true;
150 void setKernelEnv( KernelEnv *envInfo )
152 if (!gpuEnv.mbCommandQueueInitialized)
154 initializeCommandQueue(gpuEnv);
157 envInfo->mpkContext = gpuEnv.mpContext;
158 envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
160 assert(gpuEnv.mnCmdQueuePos < OPENCL_CMDQUEUE_SIZE);
161 envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue[gpuEnv.mnCmdQueuePos];
164 namespace {
166 OString createFileName(cl_device_id deviceId, const char* clFileName)
168 OString fileName(clFileName);
169 sal_Int32 nIndex = fileName.lastIndexOf(".cl");
170 if(nIndex > 0)
171 fileName = fileName.copy(0, nIndex);
173 char deviceName[DEVICE_NAME_LENGTH] = {0};
174 clGetDeviceInfo(deviceId, CL_DEVICE_NAME,
175 sizeof(deviceName), deviceName, nullptr);
177 char driverVersion[DRIVER_VERSION_LENGTH] = {0};
178 clGetDeviceInfo(deviceId, CL_DRIVER_VERSION,
179 sizeof(driverVersion), driverVersion, nullptr);
181 cl_platform_id platformId;
182 clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM,
183 sizeof(platformId), &platformId, nullptr);
185 char platformVersion[PLATFORM_VERSION_LENGTH] = {0};
186 clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, sizeof(platformVersion),
187 platformVersion, nullptr);
189 // create hash for deviceName + driver version + platform version
190 OString aString = OString(deviceName) + driverVersion + platformVersion;
191 OString aHash = generateMD5(aString.getStr(), aString.getLength());
193 return getCacheFolder() + fileName + "-" + aHash + ".bin";
196 std::vector<std::shared_ptr<osl::File> > binaryGenerated( const char * clFileName, cl_context context )
198 size_t numDevices=0;
200 std::vector<std::shared_ptr<osl::File> > aGeneratedFiles;
201 cl_int clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
202 0, nullptr, &numDevices );
203 numDevices /= sizeof(numDevices);
205 if(clStatus != CL_SUCCESS)
206 return aGeneratedFiles;
208 assert(numDevices == 1);
210 // grab the handle to the device in the context.
211 cl_device_id pDevID;
212 clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
213 sizeof( cl_device_id ), &pDevID, nullptr );
215 if(clStatus != CL_SUCCESS)
216 return aGeneratedFiles;
218 assert(pDevID == gpuEnv.mpDevID);
220 OString fileName = createFileName(gpuEnv.mpDevID, clFileName);
221 osl::File* pNewFile = new osl::File(rtl::OStringToOUString(fileName, RTL_TEXTENCODING_UTF8));
222 if(pNewFile->open(osl_File_OpenFlag_Read) == osl::FileBase::E_None)
224 aGeneratedFiles.push_back(std::shared_ptr<osl::File>(pNewFile));
225 SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: success");
227 else
229 SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: FAIL");
230 delete pNewFile;
233 return aGeneratedFiles;
236 bool writeBinaryToFile( const OString& rFileName, const char* binary, size_t numBytes )
238 osl::File file(rtl::OStringToOUString(rFileName, RTL_TEXTENCODING_UTF8));
239 osl::FileBase::RC status = file.open(
240 osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
242 if(status != osl::FileBase::E_None)
243 return false;
245 sal_uInt64 nBytesWritten = 0;
246 file.write( binary, numBytes, nBytesWritten );
248 assert(numBytes == nBytesWritten);
250 return true;
255 bool generatBinFromKernelSource( cl_program program, const char * clFileName )
257 cl_uint numDevices;
259 cl_int clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES,
260 sizeof(numDevices), &numDevices, nullptr );
261 CHECK_OPENCL( clStatus, "clGetProgramInfo" );
263 assert(numDevices == 1);
265 cl_device_id pDevID;
266 /* grab the handle to the device in the program. */
267 clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES,
268 sizeof(cl_device_id), &pDevID, nullptr );
269 CHECK_OPENCL( clStatus, "clGetProgramInfo" );
271 /* figure out the size of the binary. */
272 size_t binarySize;
274 clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES,
275 sizeof(size_t), &binarySize, nullptr );
276 CHECK_OPENCL( clStatus, "clGetProgramInfo" );
278 /* copy over the generated binary. */
279 if ( binarySize != 0 )
281 std::unique_ptr<char[]> binary(new char[binarySize]);
282 clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES,
283 sizeof(char *), &binary, nullptr );
284 CHECK_OPENCL(clStatus,"clGetProgramInfo");
286 OString fileName = createFileName(pDevID, clFileName);
287 if ( !writeBinaryToFile( fileName,
288 binary.get(), binarySize ) )
289 SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': FAIL");
290 else
291 SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': success");
293 return true;
296 namespace {
298 struct OpenCLEnv
300 cl_platform_id mpOclPlatformID;
301 cl_context mpOclContext;
302 cl_device_id mpOclDevsID;
303 cl_command_queue mpOclCmdQueue[OPENCL_CMDQUEUE_SIZE];
306 bool initOpenCLAttr( OpenCLEnv * env )
308 if ( gpuEnv.mnIsUserCreated )
309 return true;
311 gpuEnv.mpContext = env->mpOclContext;
312 gpuEnv.mpPlatformID = env->mpOclPlatformID;
313 gpuEnv.mpDevID = env->mpOclDevsID;
315 gpuEnv.mnIsUserCreated = 1;
317 gpuEnv.mbCommandQueueInitialized = false;
319 gpuEnv.mnCmdQueuePos = 0; // default to 0.
321 return false;
326 void releaseOpenCLEnv( GPUEnv *gpuInfo )
328 OpenCLZone zone;
330 if ( !bIsInited )
332 return;
335 for (_cl_command_queue* & i : gpuEnv.mpCmdQueue)
337 if (i)
339 clReleaseCommandQueue(i);
340 i = nullptr;
343 gpuEnv.mnCmdQueuePos = 0;
345 if ( gpuEnv.mpContext )
347 clReleaseContext( gpuEnv.mpContext );
348 gpuEnv.mpContext = nullptr;
350 bIsInited = false;
351 gpuInfo->mnIsUserCreated = 0;
353 return;
356 namespace {
358 bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
360 cl_int clStatus;
361 //char options[512];
362 // create a cl program executable for all the devices specified
363 clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &gpuInfo->mpDevID,
364 buildOption, nullptr, nullptr);
366 if ( clStatus != CL_SUCCESS )
368 size_t length;
369 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
370 CL_PROGRAM_BUILD_LOG, 0, nullptr, &length);
371 if ( clStatus != CL_SUCCESS )
373 return false;
376 std::unique_ptr<char[]> buildLog(new char[length]);
377 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
378 CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
379 if ( clStatus != CL_SUCCESS )
381 return false;
384 OString aBuildLogFileURL = getCacheFolder() + "kernel-build.log";
385 osl::File aBuildLogFile(rtl::OStringToOUString(aBuildLogFileURL, RTL_TEXTENCODING_UTF8));
386 osl::FileBase::RC status = aBuildLogFile.open(
387 osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
389 if(status != osl::FileBase::E_None)
390 return false;
392 sal_uInt64 nBytesWritten = 0;
393 aBuildLogFile.write( buildLog.get(), length, nBytesWritten );
395 return false;
398 return true;
403 bool buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx)
405 size_t numDevices;
406 cl_int clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
407 0, nullptr, &numDevices );
408 numDevices /= sizeof(numDevices);
409 CHECK_OPENCL( clStatus, "clGetContextInfo" );
411 std::vector<std::shared_ptr<osl::File> > aGeneratedFiles = binaryGenerated(
412 filename, gpuInfo->mpContext );
414 if (aGeneratedFiles.size() == numDevices)
416 std::unique_ptr<size_t[]> length(new size_t[numDevices]);
417 std::unique_ptr<unsigned char*[]> pBinary(new unsigned char*[numDevices]);
418 for(size_t i = 0; i < numDevices; ++i)
420 sal_uInt64 nSize;
421 aGeneratedFiles[i]->getSize(nSize);
422 unsigned char* binary = new unsigned char[nSize];
423 sal_uInt64 nBytesRead;
424 aGeneratedFiles[i]->read(binary, nSize, nBytesRead);
425 if(nSize != nBytesRead)
426 assert(false);
428 length[i] = nBytesRead;
430 pBinary[i] = binary;
433 // grab the handles to all of the devices in the context.
434 std::unique_ptr<cl_device_id[]> pArryDevsID(new cl_device_id[numDevices]);
435 clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
436 sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), nullptr );
438 if(clStatus != CL_SUCCESS)
440 for(size_t i = 0; i < numDevices; ++i)
442 delete[] pBinary[i];
444 return false;
447 cl_int binary_status;
449 gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
450 pArryDevsID.get(), length.get(), const_cast<const unsigned char**>(pBinary.get()),
451 &binary_status, &clStatus );
452 if(clStatus != CL_SUCCESS)
454 // something went wrong, fall back to compiling from source
455 return false;
457 SAL_INFO("opencl", "Created program " << gpuInfo->mpArryPrograms[idx] << " from binary");
458 for(size_t i = 0; i < numDevices; ++i)
460 delete[] pBinary[i];
464 if ( !gpuInfo->mpArryPrograms[idx] )
466 return false;
468 return buildProgram(buildOption, gpuInfo, idx);
471 namespace {
473 void checkDeviceForDoubleSupport(cl_device_id deviceId, bool& bKhrFp64, bool& bAmdFp64)
475 OpenCLZone zone;
477 bKhrFp64 = false;
478 bAmdFp64 = false;
480 // Check device extensions for double type
481 size_t aDevExtInfoSize = 0;
483 cl_uint clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, 0, nullptr, &aDevExtInfoSize );
484 if( clStatus != CL_SUCCESS )
485 return;
487 std::unique_ptr<char[]> pExtInfo(new char[aDevExtInfoSize]);
489 clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS,
490 sizeof(char) * aDevExtInfoSize, pExtInfo.get(), nullptr);
492 if( clStatus != CL_SUCCESS )
493 return;
495 if ( strstr( pExtInfo.get(), "cl_khr_fp64" ) )
497 bKhrFp64 = true;
499 else
501 // Check if cl_amd_fp64 extension is supported
502 if ( strstr( pExtInfo.get(), "cl_amd_fp64" ) )
503 bAmdFp64 = true;
507 bool initOpenCLRunEnv( GPUEnv *gpuInfo )
509 OpenCLZone zone;
510 cl_uint nPreferredVectorWidthFloat;
511 char pName[64];
513 bool bKhrFp64 = false;
514 bool bAmdFp64 = false;
516 checkDeviceForDoubleSupport(gpuInfo->mpDevID, bKhrFp64, bAmdFp64);
518 gpuInfo->mnKhrFp64Flag = bKhrFp64;
519 gpuInfo->mnAmdFp64Flag = bAmdFp64;
521 gpuInfo->mbNeedsTDRAvoidance = false;
523 clGetDeviceInfo(gpuInfo->mpDevID, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint),
524 &nPreferredVectorWidthFloat, nullptr);
525 clGetPlatformInfo(gpuInfo->mpPlatformID, CL_PLATFORM_NAME, 64,
526 pName, nullptr);
528 bool bIsNotWinOrIsWin8OrGreater = true;
530 // the Win32 SDK 8.1 deprecates GetVersionEx()
531 #ifdef _WIN32_WINNT_WINBLUE
532 bIsNotWinOrIsWin8OrGreater = IsWindows8OrGreater();
533 #elif defined (_WIN32)
534 OSVERSIONINFO aVersionInfo;
535 memset( &aVersionInfo, 0, sizeof(aVersionInfo) );
536 aVersionInfo.dwOSVersionInfoSize = sizeof( aVersionInfo );
537 if (GetVersionEx( &aVersionInfo ))
539 // Windows 7 or lower?
540 if (aVersionInfo.dwMajorVersion < 6 ||
541 (aVersionInfo.dwMajorVersion == 6 && aVersionInfo.dwMinorVersion < 2))
542 bIsNotWinOrIsWin8OrGreater = false;
544 #endif
546 // Heuristic: Certain old low-end OpenCL implementations don't
547 // work for us with too large group lengths. Looking at the preferred
548 // float vector width seems to be a way to detect these devices, except
549 // the non-working NVIDIA cards on Windows older than version 8.
550 gpuInfo->mbNeedsTDRAvoidance = ( nPreferredVectorWidthFloat == 4 ) ||
551 ( !bIsNotWinOrIsWin8OrGreater &&
552 OUString::createFromAscii(pName).indexOf("NVIDIA") > -1 );
553 return false;
556 bool initOpenCLRunEnv( int argc )
558 if ( ( argc > MAX_CLFILE_NUM ) || ( argc < 0 ) )
559 return true;
561 if ( !bIsInited )
563 if ( !gpuEnv.mnIsUserCreated )
564 memset( &gpuEnv, 0, sizeof(gpuEnv) );
566 //initialize devices, context, command_queue
567 bool status = initOpenCLRunEnv( &gpuEnv );
568 if ( status )
570 return true;
572 //initialize program, kernelName, kernelCount
573 if( getenv( "SC_FLOAT" ) )
575 gpuEnv.mnKhrFp64Flag = false;
576 gpuEnv.mnAmdFp64Flag = false;
578 if( gpuEnv.mnKhrFp64Flag )
580 SAL_INFO("opencl", "Use Khr double");
582 else if( gpuEnv.mnAmdFp64Flag )
584 SAL_INFO("opencl", "Use AMD double type");
586 else
588 SAL_INFO("opencl", "USE float type");
590 bIsInited = true;
592 return false;
595 // based on crashes and hanging during kernel compilation
596 void createDeviceInfo(cl_device_id aDeviceId, OpenCLPlatformInfo& rPlatformInfo)
598 OpenCLDeviceInfo aDeviceInfo;
599 aDeviceInfo.device = aDeviceId;
601 char pName[DEVICE_NAME_LENGTH];
602 cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_NAME, DEVICE_NAME_LENGTH, pName, nullptr);
603 if(nState != CL_SUCCESS)
604 return;
606 aDeviceInfo.maName = OUString::createFromAscii(pName);
608 char pVendor[DEVICE_NAME_LENGTH];
609 nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_VENDOR, DEVICE_NAME_LENGTH, pVendor, nullptr);
610 if(nState != CL_SUCCESS)
611 return;
613 aDeviceInfo.maVendor = OUString::createFromAscii(pVendor);
615 cl_ulong nMemSize;
616 nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(nMemSize), &nMemSize, nullptr);
617 if(nState != CL_SUCCESS)
618 return;
620 aDeviceInfo.mnMemory = nMemSize;
622 cl_uint nClockFrequency;
623 nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(nClockFrequency), &nClockFrequency, nullptr);
624 if(nState != CL_SUCCESS)
625 return;
627 aDeviceInfo.mnFrequency = nClockFrequency;
629 cl_uint nComputeUnits;
630 nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(nComputeUnits), &nComputeUnits, nullptr);
631 if(nState != CL_SUCCESS)
632 return;
634 char pDriver[DEVICE_NAME_LENGTH];
635 nState = clGetDeviceInfo(aDeviceId, CL_DRIVER_VERSION, DEVICE_NAME_LENGTH, pDriver, nullptr);
637 if(nState != CL_SUCCESS)
638 return;
640 aDeviceInfo.maDriver = OUString::createFromAscii(pDriver);
642 bool bKhrFp64 = false;
643 bool bAmdFp64 = false;
644 checkDeviceForDoubleSupport(aDeviceId, bKhrFp64, bAmdFp64);
646 // only list devices that support double
647 if(!bKhrFp64 && !bAmdFp64)
648 return;
650 aDeviceInfo.mnComputeUnits = nComputeUnits;
652 if(!OpenCLConfig::get().checkImplementation(rPlatformInfo, aDeviceInfo))
653 rPlatformInfo.maDevices.push_back(aDeviceInfo);
656 bool createPlatformInfo(cl_platform_id nPlatformId, OpenCLPlatformInfo& rPlatformInfo)
658 rPlatformInfo.platform = nPlatformId;
659 char pName[64];
660 cl_int nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_NAME, 64,
661 pName, nullptr);
662 if(nState != CL_SUCCESS)
663 return false;
664 rPlatformInfo.maName = OUString::createFromAscii(pName);
666 char pVendor[64];
667 nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_VENDOR, 64,
668 pVendor, nullptr);
669 if(nState != CL_SUCCESS)
670 return false;
672 rPlatformInfo.maVendor = OUString::createFromAscii(pVendor);
674 cl_uint nDevices;
675 nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, 0, nullptr, &nDevices);
676 if(nState != CL_SUCCESS)
677 return false;
679 // memory leak that does not matter
680 // memory is stored in static variable that lives through the whole program
681 cl_device_id* pDevices = new cl_device_id[nDevices];
682 nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, nDevices, pDevices, nullptr);
683 if(nState != CL_SUCCESS)
684 return false;
686 for(size_t i = 0; i < nDevices; ++i)
688 createDeviceInfo(pDevices[i], rPlatformInfo);
691 return true;
696 const std::vector<OpenCLPlatformInfo>& fillOpenCLInfo()
698 static std::vector<OpenCLPlatformInfo> aPlatforms;
700 // return early if we already initialized or can't use OpenCL
701 if (!aPlatforms.empty() || !canUseOpenCL())
702 return aPlatforms;
704 int status = clewInit(OPENCL_DLL_NAME);
705 if (status < 0)
706 return aPlatforms;
708 cl_uint nPlatforms;
709 cl_int nState = clGetPlatformIDs(0, nullptr, &nPlatforms);
711 if(nState != CL_SUCCESS)
712 return aPlatforms;
714 // memory leak that does not matter,
715 // memory is stored in static instance aPlatforms
716 cl_platform_id* pPlatforms = new cl_platform_id[nPlatforms];
717 nState = clGetPlatformIDs(nPlatforms, pPlatforms, nullptr);
719 if(nState != CL_SUCCESS)
720 return aPlatforms;
722 for(size_t i = 0; i < nPlatforms; ++i)
724 OpenCLPlatformInfo aPlatformInfo;
725 if(createPlatformInfo(pPlatforms[i], aPlatformInfo))
726 aPlatforms.push_back(aPlatformInfo);
729 return aPlatforms;
732 namespace {
734 cl_device_id findDeviceIdByDeviceString(const OUString& rString, const std::vector<OpenCLPlatformInfo>& rPlatforms)
736 std::vector<OpenCLPlatformInfo>::const_iterator it = rPlatforms.begin(), itEnd = rPlatforms.end();
737 for(; it != itEnd; ++it)
739 std::vector<OpenCLDeviceInfo>::const_iterator itr = it->maDevices.begin(), itrEnd = it->maDevices.end();
740 for(; itr != itrEnd; ++itr)
742 OUString aDeviceId = it->maVendor + " " + itr->maName;
743 if(rString == aDeviceId)
745 return static_cast<cl_device_id>(itr->device);
750 return nullptr;
753 void findDeviceInfoFromDeviceId(cl_device_id aDeviceId, size_t& rDeviceId, size_t& rPlatformId)
755 cl_platform_id platformId;
756 cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_PLATFORM,
757 sizeof(platformId), &platformId, nullptr);
759 if(nState != CL_SUCCESS)
760 return;
762 const std::vector<OpenCLPlatformInfo>& rPlatforms = fillOpenCLInfo();
763 for(size_t i = 0; i < rPlatforms.size(); ++i)
765 cl_platform_id platId = static_cast<cl_platform_id>(rPlatforms[i].platform);
766 if(platId != platformId)
767 continue;
769 for(size_t j = 0; j < rPlatforms[i].maDevices.size(); ++j)
771 cl_device_id id = static_cast<cl_device_id>(rPlatforms[i].maDevices[j].device);
772 if(id == aDeviceId)
774 rDeviceId = j;
775 rPlatformId = i;
776 return;
784 bool canUseOpenCL()
786 if (getenv("SAL_DISABLE_OPENCL") || !officecfg::Office::Common::Misc::UseOpenCL::get())
787 return false;
788 return true;
791 bool switchOpenCLDevice(const OUString* pDevice, bool bAutoSelect, bool bForceEvaluation, OUString& rOutSelectedDeviceVersionIDString)
793 if (!canUseOpenCL() || fillOpenCLInfo().empty())
794 return false;
796 cl_device_id pDeviceId = nullptr;
797 if(pDevice)
798 pDeviceId = findDeviceIdByDeviceString(*pDevice, fillOpenCLInfo());
800 if(!pDeviceId || bAutoSelect)
802 int status = clewInit(OPENCL_DLL_NAME);
803 if (status < 0)
804 return false;
806 OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
807 rtl::Bootstrap::expandMacros(url);
808 OUString path;
809 osl::FileBase::getSystemPathFromFileURL(url,path);
810 ds_device aSelectedDevice = getDeviceSelection(path, bForceEvaluation);
811 if ( aSelectedDevice.eType != DeviceType::OpenCLDevice)
812 return false;
813 pDeviceId = aSelectedDevice.aDeviceID;
816 if(gpuEnv.mpDevID == pDeviceId)
818 // we don't need to change anything
819 // still the same device
820 return pDeviceId != nullptr;
823 cl_context context;
824 cl_platform_id platformId;
827 OpenCLZone zone;
828 cl_int nState = clGetDeviceInfo(pDeviceId, CL_DEVICE_PLATFORM,
829 sizeof(platformId), &platformId, nullptr);
831 cl_context_properties cps[3];
832 cps[0] = CL_CONTEXT_PLATFORM;
833 cps[1] = reinterpret_cast<cl_context_properties>(platformId);
834 cps[2] = 0;
835 context = clCreateContext( cps, 1, &pDeviceId, nullptr, nullptr, &nState );
836 if (nState != CL_SUCCESS)
837 SAL_WARN("opencl", "clCreateContext failed: " << errorString(nState));
839 if(nState != CL_SUCCESS || context == nullptr)
841 if(context != nullptr)
842 clReleaseContext(context);
844 SAL_WARN("opencl", "failed to set/switch opencl device");
845 return false;
847 SAL_INFO("opencl", "Created context " << context << " for platform " << platformId << ", device " << pDeviceId);
849 OString sDeviceID = getDeviceInfoString(pDeviceId, CL_DEVICE_VENDOR) + " " + getDeviceInfoString(pDeviceId, CL_DRIVER_VERSION);
850 rOutSelectedDeviceVersionIDString = OStringToOUString(sDeviceID, RTL_TEXTENCODING_UTF8);
853 setOpenCLCmdQueuePosition(0); // Call this just to avoid the method being deleted from unused function deleter.
855 releaseOpenCLEnv(&gpuEnv);
857 OpenCLEnv env;
858 env.mpOclPlatformID = platformId;
859 env.mpOclContext = context;
860 env.mpOclDevsID = pDeviceId;
862 initOpenCLAttr(&env);
864 return !initOpenCLRunEnv(0);
867 void getOpenCLDeviceInfo(size_t& rDeviceId, size_t& rPlatformId)
869 if (!canUseOpenCL())
870 return;
872 int status = clewInit(OPENCL_DLL_NAME);
873 if (status < 0)
874 return;
876 cl_device_id id = gpuEnv.mpDevID;
877 findDeviceInfoFromDeviceId(id, rDeviceId, rPlatformId);
880 void setOpenCLCmdQueuePosition( int nPos )
882 if (nPos < 0 || nPos >= OPENCL_CMDQUEUE_SIZE)
883 // Out of range. Ignore this.
884 return;
886 gpuEnv.mnCmdQueuePos = nPos;
889 const char* errorString(cl_int nError)
891 #define CASE(val) case CL_##val: return #val
892 switch (nError)
894 CASE(SUCCESS);
895 CASE(DEVICE_NOT_FOUND);
896 CASE(DEVICE_NOT_AVAILABLE);
897 CASE(COMPILER_NOT_AVAILABLE);
898 CASE(MEM_OBJECT_ALLOCATION_FAILURE);
899 CASE(OUT_OF_RESOURCES);
900 CASE(OUT_OF_HOST_MEMORY);
901 CASE(PROFILING_INFO_NOT_AVAILABLE);
902 CASE(MEM_COPY_OVERLAP);
903 CASE(IMAGE_FORMAT_MISMATCH);
904 CASE(IMAGE_FORMAT_NOT_SUPPORTED);
905 CASE(BUILD_PROGRAM_FAILURE);
906 CASE(MAP_FAILURE);
907 CASE(INVALID_VALUE);
908 CASE(INVALID_DEVICE_TYPE);
909 CASE(INVALID_PLATFORM);
910 CASE(INVALID_DEVICE);
911 CASE(INVALID_CONTEXT);
912 CASE(INVALID_QUEUE_PROPERTIES);
913 CASE(INVALID_COMMAND_QUEUE);
914 CASE(INVALID_HOST_PTR);
915 CASE(INVALID_MEM_OBJECT);
916 CASE(INVALID_IMAGE_FORMAT_DESCRIPTOR);
917 CASE(INVALID_IMAGE_SIZE);
918 CASE(INVALID_SAMPLER);
919 CASE(INVALID_BINARY);
920 CASE(INVALID_BUILD_OPTIONS);
921 CASE(INVALID_PROGRAM);
922 CASE(INVALID_PROGRAM_EXECUTABLE);
923 CASE(INVALID_KERNEL_NAME);
924 CASE(INVALID_KERNEL_DEFINITION);
925 CASE(INVALID_KERNEL);
926 CASE(INVALID_ARG_INDEX);
927 CASE(INVALID_ARG_VALUE);
928 CASE(INVALID_ARG_SIZE);
929 CASE(INVALID_KERNEL_ARGS);
930 CASE(INVALID_WORK_DIMENSION);
931 CASE(INVALID_WORK_GROUP_SIZE);
932 CASE(INVALID_WORK_ITEM_SIZE);
933 CASE(INVALID_GLOBAL_OFFSET);
934 CASE(INVALID_EVENT_WAIT_LIST);
935 CASE(INVALID_EVENT);
936 CASE(INVALID_OPERATION);
937 CASE(INVALID_GL_OBJECT);
938 CASE(INVALID_BUFFER_SIZE);
939 CASE(INVALID_MIP_LEVEL);
940 CASE(INVALID_GLOBAL_WORK_SIZE);
941 default:
942 return "Unknown OpenCL error code";
944 #undef CASE
947 bool GPUEnv::isOpenCLEnabled()
949 return gpuEnv.mpDevID && gpuEnv.mpContext;
954 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */