bump product version to 5.0.4.1
[LibreOffice.git] / opencl / source / openclwrapper.cxx
blob9d03a2780220f12374abd50ae0d537c4c43b89f4
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <config_folders.h>
12 #include "opencl_device.hxx"
14 #include <comphelper/string.hxx>
15 #include <opencl/openclconfig.hxx>
16 #include <opencl/openclwrapper.hxx>
17 #include <osl/file.hxx>
18 #include <rtl/bootstrap.hxx>
19 #include <rtl/digest.h>
20 #include <rtl/strbuf.hxx>
21 #include <rtl/ustring.hxx>
22 #include <sal/config.h>
23 #include <sal/log.hxx>
25 #include <boost/scoped_array.hpp>
26 #include <unicode/regex.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <string.h>
32 #include <cmath>
34 #ifdef _WIN32
35 #include <prewin.h>
36 #include <postwin.h>
37 #define OPENCL_DLL_NAME "OpenCL.dll"
38 #elif defined(MACOSX)
39 #define OPENCL_DLL_NAME NULL
40 #else
41 #define OPENCL_DLL_NAME "libOpenCL.so"
42 #endif
44 #define DEVICE_NAME_LENGTH 1024
45 #define DRIVER_VERSION_LENGTH 1024
46 #define PLATFORM_VERSION_LENGTH 1024
48 #define CHECK_OPENCL(status,name) \
49 if( status != CL_SUCCESS ) \
50 { \
51 SAL_WARN( "opencl", "OpenCL error code " << status << " at " SAL_DETAIL_WHERE " from " name ); \
52 return false; \
55 using namespace std;
57 namespace opencl {
59 GPUEnv gpuEnv;
61 namespace {
63 bool bIsInited = false;
65 OString generateMD5(const void* pData, size_t length)
67 sal_uInt8 pBuffer[RTL_DIGEST_LENGTH_MD5];
68 rtlDigestError aError = rtl_digest_MD5(pData, length,
69 pBuffer, RTL_DIGEST_LENGTH_MD5);
70 SAL_WARN_IF(aError != rtl_Digest_E_None, "opencl", "md5 generation failed");
72 OStringBuffer aBuffer;
73 const char* pString = "0123456789ABCDEF";
74 for(size_t i = 0; i < RTL_DIGEST_LENGTH_MD5; ++i)
76 sal_uInt8 val = pBuffer[i];
77 aBuffer.append(pString[val/16]);
78 aBuffer.append(pString[val%16]);
80 return aBuffer.makeStringAndClear();
83 OString getCacheFolder()
85 OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
86 rtl::Bootstrap::expandMacros(url);
88 osl::Directory::create(url);
90 return rtl::OUStringToOString(url, RTL_TEXTENCODING_UTF8);
93 OString maCacheFolder = getCacheFolder();
97 void setKernelEnv( KernelEnv *envInfo )
99 envInfo->mpkContext = gpuEnv.mpContext;
100 envInfo->mpkProgram = gpuEnv.mpArryPrograms[0];
102 assert(gpuEnv.mnCmdQueuePos < OPENCL_CMDQUEUE_SIZE);
103 envInfo->mpkCmdQueue = gpuEnv.mpCmdQueue[gpuEnv.mnCmdQueuePos];
106 namespace {
108 OString createFileName(cl_device_id deviceId, const char* clFileName)
110 OString fileName(clFileName);
111 sal_Int32 nIndex = fileName.lastIndexOf(".cl");
112 if(nIndex > 0)
113 fileName = fileName.copy(0, nIndex);
115 char deviceName[DEVICE_NAME_LENGTH] = {0};
116 clGetDeviceInfo(deviceId, CL_DEVICE_NAME,
117 sizeof(deviceName), deviceName, NULL);
119 char driverVersion[DRIVER_VERSION_LENGTH] = {0};
120 clGetDeviceInfo(deviceId, CL_DRIVER_VERSION,
121 sizeof(driverVersion), driverVersion, NULL);
123 cl_platform_id platformId;
124 clGetDeviceInfo(deviceId, CL_DEVICE_PLATFORM,
125 sizeof(platformId), &platformId, NULL);
127 char platformVersion[PLATFORM_VERSION_LENGTH] = {0};
128 clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, sizeof(platformVersion),
129 platformVersion, NULL);
131 // create hash for deviceName + driver version + platform version
132 OString aString = OString(deviceName) + driverVersion + platformVersion;
133 OString aHash = generateMD5(aString.getStr(), aString.getLength());
135 return maCacheFolder + fileName + "-" +
136 aHash + ".bin";
139 std::vector<boost::shared_ptr<osl::File> > binaryGenerated( const char * clFileName, cl_context context )
141 size_t numDevices=0;
143 std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles;
144 cl_int clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
145 0, NULL, &numDevices );
146 numDevices /= sizeof(numDevices);
148 if(clStatus != CL_SUCCESS)
149 return aGeneratedFiles;
151 // grab the handles to all of the devices in the context.
152 boost::scoped_array<cl_device_id> pArryDevsID(new cl_device_id[numDevices]);
153 clStatus = clGetContextInfo( context, CL_CONTEXT_DEVICES,
154 sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), NULL );
156 if(clStatus != CL_SUCCESS)
157 return aGeneratedFiles;
159 for ( size_t i = 0; i < numDevices; i++ )
161 if ( pArryDevsID[i] != 0 )
163 OString fileName = createFileName(gpuEnv.mpArryDevsID[i], clFileName);
164 osl::File* pNewFile = new osl::File(rtl::OStringToOUString(fileName, RTL_TEXTENCODING_UTF8));
165 if(pNewFile->open(osl_File_OpenFlag_Read) == osl::FileBase::E_None)
167 aGeneratedFiles.push_back(boost::shared_ptr<osl::File>(pNewFile));
168 SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: success");
170 else
172 SAL_INFO("opencl.file", "Opening binary file '" << fileName << "' for reading: FAIL");
173 delete pNewFile;
174 break;
179 return aGeneratedFiles;
182 bool writeBinaryToFile( const OString& rFileName, const char* binary, size_t numBytes )
184 osl::File file(rtl::OStringToOUString(rFileName, RTL_TEXTENCODING_UTF8));
185 osl::FileBase::RC status = file.open(
186 osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
188 if(status != osl::FileBase::E_None)
189 return false;
191 sal_uInt64 nBytesWritten = 0;
192 file.write( binary, numBytes, nBytesWritten );
194 assert(numBytes == nBytesWritten);
196 return true;
201 bool generatBinFromKernelSource( cl_program program, const char * clFileName )
203 cl_uint numDevices;
205 cl_int clStatus = clGetProgramInfo( program, CL_PROGRAM_NUM_DEVICES,
206 sizeof(numDevices), &numDevices, NULL );
207 CHECK_OPENCL( clStatus, "clGetProgramInfo" );
209 std::vector<cl_device_id> pArryDevsID(numDevices);
210 /* grab the handles to all of the devices in the program. */
211 clStatus = clGetProgramInfo( program, CL_PROGRAM_DEVICES,
212 sizeof(cl_device_id) * numDevices, &pArryDevsID[0], NULL );
213 CHECK_OPENCL( clStatus, "clGetProgramInfo" );
215 /* figure out the sizes of each of the binaries. */
216 std::vector<size_t> binarySizes(numDevices);
218 clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARY_SIZES,
219 sizeof(size_t) * numDevices, &binarySizes[0], NULL );
220 CHECK_OPENCL( clStatus, "clGetProgramInfo" );
222 /* copy over all of the generated binaries. */
223 boost::scoped_array<char*> binaries(new char*[numDevices]);
225 for ( size_t i = 0; i < numDevices; i++ )
227 if ( binarySizes[i] != 0 )
229 binaries[i] = new char[binarySizes[i]];
231 else
233 binaries[i] = NULL;
237 clStatus = clGetProgramInfo( program, CL_PROGRAM_BINARIES,
238 sizeof(char *) * numDevices, binaries.get(), NULL );
239 CHECK_OPENCL(clStatus,"clGetProgramInfo");
241 /* dump out each binary into its own separate file. */
242 for ( size_t i = 0; i < numDevices; i++ )
245 if ( binarySizes[i] != 0 )
247 OString fileName = createFileName(pArryDevsID[i], clFileName);
248 if ( !writeBinaryToFile( fileName,
249 binaries[i], binarySizes[i] ) )
250 SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': FAIL");
251 else
252 SAL_INFO("opencl.file", "Writing binary file '" << fileName << "': success");
256 // Release all resources and memory
257 for ( size_t i = 0; i < numDevices; i++ )
259 delete[] binaries[i];
262 return true;
265 namespace {
267 struct OpenCLEnv
269 cl_platform_id mpOclPlatformID;
270 cl_context mpOclContext;
271 cl_device_id mpOclDevsID;
272 cl_command_queue mpOclCmdQueue[OPENCL_CMDQUEUE_SIZE];
275 bool initOpenCLAttr( OpenCLEnv * env )
277 if ( gpuEnv.mnIsUserCreated )
278 return true;
280 gpuEnv.mpContext = env->mpOclContext;
281 gpuEnv.mpPlatformID = env->mpOclPlatformID;
282 gpuEnv.mpDevID = env->mpOclDevsID;
284 gpuEnv.mnIsUserCreated = 1;
286 for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
287 gpuEnv.mpCmdQueue[i] = env->mpOclCmdQueue[i];
289 gpuEnv.mnCmdQueuePos = 0; // default to 0.
291 return false;
294 void releaseOpenCLEnv( GPUEnv *gpuInfo )
296 if ( !bIsInited )
298 return;
301 for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
303 if (gpuEnv.mpCmdQueue[i])
305 clReleaseCommandQueue(gpuEnv.mpCmdQueue[i]);
306 gpuEnv.mpCmdQueue[i] = NULL;
309 gpuEnv.mnCmdQueuePos = 0;
311 if ( gpuEnv.mpContext )
313 clReleaseContext( gpuEnv.mpContext );
314 gpuEnv.mpContext = NULL;
316 bIsInited = false;
317 gpuInfo->mnIsUserCreated = 0;
318 free( gpuInfo->mpArryDevsID );
320 return;
323 bool buildProgram(const char* buildOption, GPUEnv* gpuInfo, int idx)
325 cl_int clStatus;
326 //char options[512];
327 // create a cl program executable for all the devices specified
328 if (!gpuInfo->mnIsUserCreated)
330 clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, gpuInfo->mpArryDevsID,
331 buildOption, NULL, NULL);
333 else
335 clStatus = clBuildProgram(gpuInfo->mpArryPrograms[idx], 1, &(gpuInfo->mpDevID),
336 buildOption, NULL, NULL);
339 if ( clStatus != CL_SUCCESS )
341 size_t length;
342 if ( !gpuInfo->mnIsUserCreated )
344 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
345 CL_PROGRAM_BUILD_LOG, 0, NULL, &length );
347 else
349 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
350 CL_PROGRAM_BUILD_LOG, 0, NULL, &length);
352 if ( clStatus != CL_SUCCESS )
354 return false;
357 boost::scoped_array<char> buildLog(new char[length]);
358 if ( !gpuInfo->mnIsUserCreated )
360 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpArryDevsID[0],
361 CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
363 else
365 clStatus = clGetProgramBuildInfo( gpuInfo->mpArryPrograms[idx], gpuInfo->mpDevID,
366 CL_PROGRAM_BUILD_LOG, length, buildLog.get(), &length );
368 if ( clStatus != CL_SUCCESS )
370 return false;
373 OString aBuildLogFileURL = maCacheFolder + "kernel-build.log";
374 osl::File aBuildLogFile(rtl::OStringToOUString(aBuildLogFileURL, RTL_TEXTENCODING_UTF8));
375 osl::FileBase::RC status = aBuildLogFile.open(
376 osl_File_OpenFlag_Write | osl_File_OpenFlag_Create );
378 if(status != osl::FileBase::E_None)
379 return false;
381 sal_uInt64 nBytesWritten = 0;
382 aBuildLogFile.write( buildLog.get(), length, nBytesWritten );
384 return false;
387 return true;
392 bool buildProgramFromBinary(const char* buildOption, GPUEnv* gpuInfo, const char* filename, int idx)
394 size_t numDevices;
395 cl_int clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
396 0, NULL, &numDevices );
397 numDevices /= sizeof(numDevices);
398 CHECK_OPENCL( clStatus, "clGetContextInfo" );
400 std::vector<boost::shared_ptr<osl::File> > aGeneratedFiles = binaryGenerated(
401 filename, gpuInfo->mpContext );
403 if (aGeneratedFiles.size() == numDevices)
405 boost::scoped_array<size_t> length(new size_t[numDevices]);
406 boost::scoped_array<unsigned char*> pBinary(new unsigned char*[numDevices]);
407 for(size_t i = 0; i < numDevices; ++i)
409 sal_uInt64 nSize;
410 aGeneratedFiles[i]->getSize(nSize);
411 unsigned char* binary = new unsigned char[nSize];
412 sal_uInt64 nBytesRead;
413 aGeneratedFiles[i]->read(binary, nSize, nBytesRead);
414 if(nSize != nBytesRead)
415 assert(false);
417 length[i] = nBytesRead;
419 pBinary[i] = binary;
422 // grab the handles to all of the devices in the context.
423 boost::scoped_array<cl_device_id> pArryDevsID(new cl_device_id[numDevices]);
424 clStatus = clGetContextInfo( gpuInfo->mpContext, CL_CONTEXT_DEVICES,
425 sizeof( cl_device_id ) * numDevices, pArryDevsID.get(), NULL );
427 if(clStatus != CL_SUCCESS)
429 for(size_t i = 0; i < numDevices; ++i)
431 delete[] pBinary[i];
433 return false;
436 cl_int binary_status;
438 gpuInfo->mpArryPrograms[idx] = clCreateProgramWithBinary( gpuInfo->mpContext,numDevices,
439 pArryDevsID.get(), length.get(), (const unsigned char**) pBinary.get(),
440 &binary_status, &clStatus );
441 if(clStatus != CL_SUCCESS)
443 // something went wrong, fall back to compiling from source
444 return false;
446 SAL_INFO("opencl", "Created program " << gpuInfo->mpArryPrograms[idx] << " from binary");
447 for(size_t i = 0; i < numDevices; ++i)
449 delete[] pBinary[i];
453 if ( !gpuInfo->mpArryPrograms[idx] )
455 return false;
457 return buildProgram(buildOption, gpuInfo, idx);
460 namespace {
462 void checkDeviceForDoubleSupport(cl_device_id deviceId, bool& bKhrFp64, bool& bAmdFp64)
464 bKhrFp64 = false;
465 bAmdFp64 = false;
467 // Check device extensions for double type
468 size_t aDevExtInfoSize = 0;
470 cl_uint clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS, 0, NULL, &aDevExtInfoSize );
471 if( clStatus != CL_SUCCESS )
472 return;
474 boost::scoped_array<char> pExtInfo(new char[aDevExtInfoSize]);
476 clStatus = clGetDeviceInfo( deviceId, CL_DEVICE_EXTENSIONS,
477 sizeof(char) * aDevExtInfoSize, pExtInfo.get(), NULL);
479 if( clStatus != CL_SUCCESS )
480 return;
482 if ( strstr( pExtInfo.get(), "cl_khr_fp64" ) )
484 bKhrFp64 = true;
486 else
488 // Check if cl_amd_fp64 extension is supported
489 if ( strstr( pExtInfo.get(), "cl_amd_fp64" ) )
490 bAmdFp64 = true;
494 bool initOpenCLRunEnv( GPUEnv *gpuInfo )
496 bool bKhrFp64 = false;
497 bool bAmdFp64 = false;
499 checkDeviceForDoubleSupport(gpuInfo->mpArryDevsID[0], bKhrFp64, bAmdFp64);
501 gpuInfo->mnKhrFp64Flag = bKhrFp64;
502 gpuInfo->mnAmdFp64Flag = bAmdFp64;
504 gpuInfo->mnPreferredVectorWidthFloat = 0;
506 clGetDeviceInfo(gpuInfo->mpArryDevsID[0], CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint),
507 &gpuInfo->mnPreferredVectorWidthFloat, NULL);
509 return false;
512 bool initOpenCLRunEnv( int argc )
514 if ( ( argc > MAX_CLFILE_NUM ) || ( argc < 0 ) )
515 return true;
517 if ( !bIsInited )
519 if ( !gpuEnv.mnIsUserCreated )
520 memset( &gpuEnv, 0, sizeof(gpuEnv) );
522 //initialize devices, context, command_queue
523 bool status = initOpenCLRunEnv( &gpuEnv );
524 if ( status )
526 return true;
528 //initialize program, kernelName, kernelCount
529 if( getenv( "SC_FLOAT" ) )
531 gpuEnv.mnKhrFp64Flag = false;
532 gpuEnv.mnAmdFp64Flag = false;
534 if( gpuEnv.mnKhrFp64Flag )
536 SAL_INFO("opencl", "Use Khr double");
538 else if( gpuEnv.mnAmdFp64Flag )
540 SAL_INFO("opencl", "Use AMD double type");
542 else
544 SAL_INFO("opencl", "USE float type");
546 bIsInited = true;
548 return false;
551 // based on crashes and hanging during kernel compilation
552 void createDeviceInfo(cl_device_id aDeviceId, OpenCLPlatformInfo& rPlatformInfo)
554 OpenCLDeviceInfo aDeviceInfo;
555 aDeviceInfo.device = aDeviceId;
557 char pName[DEVICE_NAME_LENGTH];
558 cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_NAME, DEVICE_NAME_LENGTH, pName, NULL);
559 if(nState != CL_SUCCESS)
560 return;
562 aDeviceInfo.maName = OUString::createFromAscii(pName);
564 char pVendor[DEVICE_NAME_LENGTH];
565 nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_VENDOR, DEVICE_NAME_LENGTH, pVendor, NULL);
566 if(nState != CL_SUCCESS)
567 return;
569 aDeviceInfo.maVendor = OUString::createFromAscii(pVendor);
571 cl_ulong nMemSize;
572 nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(nMemSize), &nMemSize, NULL);
573 if(nState != CL_SUCCESS)
574 return;
576 aDeviceInfo.mnMemory = nMemSize;
578 cl_uint nClockFrequency;
579 nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(nClockFrequency), &nClockFrequency, NULL);
580 if(nState != CL_SUCCESS)
581 return;
583 aDeviceInfo.mnFrequency = nClockFrequency;
585 cl_uint nComputeUnits;
586 nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(nComputeUnits), &nComputeUnits, NULL);
587 if(nState != CL_SUCCESS)
588 return;
590 char pDriver[DEVICE_NAME_LENGTH];
591 nState = clGetDeviceInfo(aDeviceId, CL_DRIVER_VERSION, DEVICE_NAME_LENGTH, pDriver, NULL);
593 if(nState != CL_SUCCESS)
594 return;
596 aDeviceInfo.maDriver = OUString::createFromAscii(pDriver);
598 bool bKhrFp64 = false;
599 bool bAmdFp64 = false;
600 checkDeviceForDoubleSupport(aDeviceId, bKhrFp64, bAmdFp64);
602 // only list devices that support double
603 if(!bKhrFp64 && !bAmdFp64)
604 return;
606 aDeviceInfo.mnComputeUnits = nComputeUnits;
608 if(!OpenCLConfig::get().checkImplementation(rPlatformInfo, aDeviceInfo))
609 rPlatformInfo.maDevices.push_back(aDeviceInfo);
612 bool createPlatformInfo(cl_platform_id nPlatformId, OpenCLPlatformInfo& rPlatformInfo)
614 rPlatformInfo.platform = nPlatformId;
615 char pName[64];
616 cl_int nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_NAME, 64,
617 pName, NULL);
618 if(nState != CL_SUCCESS)
619 return false;
620 rPlatformInfo.maName = OUString::createFromAscii(pName);
622 char pVendor[64];
623 nState = clGetPlatformInfo(nPlatformId, CL_PLATFORM_VENDOR, 64,
624 pVendor, NULL);
625 if(nState != CL_SUCCESS)
626 return false;
628 rPlatformInfo.maVendor = OUString::createFromAscii(pVendor);
630 cl_uint nDevices;
631 nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, 0, NULL, &nDevices);
632 if(nState != CL_SUCCESS)
633 return false;
635 // memory leak that does not matter
636 // memory is stored in static variable that lives through the whole program
637 cl_device_id* pDevices = new cl_device_id[nDevices];
638 nState = clGetDeviceIDs(nPlatformId, CL_DEVICE_TYPE_ALL, nDevices, pDevices, NULL);
639 if(nState != CL_SUCCESS)
640 return false;
642 for(size_t i = 0; i < nDevices; ++i)
644 createDeviceInfo(pDevices[i], rPlatformInfo);
647 return true;
652 const std::vector<OpenCLPlatformInfo>& fillOpenCLInfo()
654 static std::vector<OpenCLPlatformInfo> aPlatforms;
655 if(!aPlatforms.empty())
656 return aPlatforms;
658 int status = clewInit(OPENCL_DLL_NAME);
659 if (status < 0)
660 return aPlatforms;
662 cl_uint nPlatforms;
663 cl_int nState = clGetPlatformIDs(0, NULL, &nPlatforms);
665 if(nState != CL_SUCCESS)
666 return aPlatforms;
668 // memory leak that does not matter,
669 // memory is stored in static instance aPlatforms
670 cl_platform_id* pPlatforms = new cl_platform_id[nPlatforms];
671 nState = clGetPlatformIDs(nPlatforms, pPlatforms, NULL);
673 if(nState != CL_SUCCESS)
674 return aPlatforms;
676 for(size_t i = 0; i < nPlatforms; ++i)
678 OpenCLPlatformInfo aPlatformInfo;
679 if(createPlatformInfo(pPlatforms[i], aPlatformInfo))
680 aPlatforms.push_back(aPlatformInfo);
683 return aPlatforms;
686 namespace {
688 cl_device_id findDeviceIdByDeviceString(const OUString& rString, const std::vector<OpenCLPlatformInfo>& rPlatforms)
690 std::vector<OpenCLPlatformInfo>::const_iterator it = rPlatforms.begin(), itEnd = rPlatforms.end();
691 for(; it != itEnd; ++it)
693 std::vector<OpenCLDeviceInfo>::const_iterator itr = it->maDevices.begin(), itrEnd = it->maDevices.end();
694 for(; itr != itrEnd; ++itr)
696 OUString aDeviceId = it->maVendor + " " + itr->maName;
697 if(rString == aDeviceId)
699 return static_cast<cl_device_id>(itr->device);
704 return NULL;
707 void findDeviceInfoFromDeviceId(cl_device_id aDeviceId, size_t& rDeviceId, size_t& rPlatformId)
709 cl_platform_id platformId;
710 cl_int nState = clGetDeviceInfo(aDeviceId, CL_DEVICE_PLATFORM,
711 sizeof(platformId), &platformId, NULL);
713 if(nState != CL_SUCCESS)
714 return;
716 const std::vector<OpenCLPlatformInfo>& rPlatforms = fillOpenCLInfo();
717 for(size_t i = 0; i < rPlatforms.size(); ++i)
719 cl_platform_id platId = static_cast<cl_platform_id>(rPlatforms[i].platform);
720 if(platId != platformId)
721 continue;
723 for(size_t j = 0; j < rPlatforms[i].maDevices.size(); ++j)
725 cl_device_id id = static_cast<cl_device_id>(rPlatforms[i].maDevices[j].device);
726 if(id == aDeviceId)
728 rDeviceId = j;
729 rPlatformId = i;
730 return;
738 bool switchOpenCLDevice(const OUString* pDevice, bool bAutoSelect, bool bForceEvaluation)
740 if(fillOpenCLInfo().empty())
741 return false;
743 cl_device_id pDeviceId = NULL;
744 if(pDevice)
745 pDeviceId = findDeviceIdByDeviceString(*pDevice, fillOpenCLInfo());
747 if(!pDeviceId || bAutoSelect)
749 int status = clewInit(OPENCL_DLL_NAME);
750 if (status < 0)
751 return false;
753 OUString url("${$BRAND_BASE_DIR/" LIBO_ETC_FOLDER "/" SAL_CONFIGFILE("bootstrap") ":UserInstallation}/cache/");
754 rtl::Bootstrap::expandMacros(url);
755 OUString path;
756 osl::FileBase::getSystemPathFromFileURL(url,path);
757 OString dsFileName = rtl::OUStringToOString(path, RTL_TEXTENCODING_UTF8);
758 ds_device pSelectedDevice = getDeviceSelection(dsFileName.getStr(), bForceEvaluation);
759 pDeviceId = pSelectedDevice.oclDeviceID;
763 if(gpuEnv.mpDevID == pDeviceId)
765 // we don't need to change anything
766 // still the same device
767 return pDeviceId != NULL;
770 cl_platform_id platformId;
771 cl_int nState = clGetDeviceInfo(pDeviceId, CL_DEVICE_PLATFORM,
772 sizeof(platformId), &platformId, NULL);
774 cl_context_properties cps[3];
775 cps[0] = CL_CONTEXT_PLATFORM;
776 cps[1] = reinterpret_cast<cl_context_properties>(platformId);
777 cps[2] = 0;
778 cl_context context = clCreateContext( cps, 1, &pDeviceId, NULL, NULL, &nState );
779 if (nState != CL_SUCCESS)
780 SAL_WARN("opencl", "clCreateContext failed: " << nState);
782 if(nState != CL_SUCCESS || context == NULL)
784 if(context != NULL)
785 clReleaseContext(context);
787 SAL_WARN("opencl", "failed to set/switch opencl device");
788 return false;
790 SAL_INFO("opencl", "Created context " << context << " for platform " << platformId << ", device " << pDeviceId);
792 cl_command_queue command_queue[OPENCL_CMDQUEUE_SIZE];
793 for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
795 command_queue[i] = clCreateCommandQueue(
796 context, pDeviceId, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &nState);
797 if (nState != CL_SUCCESS)
798 SAL_WARN("opencl", "clCreateCommandQueue failed: " << nState);
800 if (command_queue[i] == NULL || nState != CL_SUCCESS)
802 // Release all command queues created so far.
803 for (int j = 0; j <= i; ++j)
805 if (command_queue[j])
807 clReleaseCommandQueue(command_queue[j]);
808 command_queue[j] = NULL;
812 clReleaseContext(context);
813 SAL_WARN("opencl", "failed to set/switch opencl device");
814 return false;
817 SAL_INFO("opencl", "Created command queue " << command_queue[i] << " for context " << context);
820 setOpenCLCmdQueuePosition(0); // Call this just to avoid the method being deleted from unused function deleter.
822 releaseOpenCLEnv(&gpuEnv);
823 OpenCLEnv env;
824 env.mpOclPlatformID = platformId;
825 env.mpOclContext = context;
826 env.mpOclDevsID = pDeviceId;
828 for (int i = 0; i < OPENCL_CMDQUEUE_SIZE; ++i)
829 env.mpOclCmdQueue[i] = command_queue[i];
831 initOpenCLAttr(&env);
833 // why do we need this at all?
835 // (Assuming the above question refers to the mpArryDevsID
836 // initialisation below.) Because otherwise the code crashes in
837 // initOpenCLRunEnv(). Confused? You should be.
839 gpuEnv.mpArryDevsID = static_cast<cl_device_id*>(malloc( sizeof(cl_device_id) ));
840 gpuEnv.mpArryDevsID[0] = pDeviceId;
842 return !initOpenCLRunEnv(0);
845 void getOpenCLDeviceInfo(size_t& rDeviceId, size_t& rPlatformId)
847 int status = clewInit(OPENCL_DLL_NAME);
848 if (status < 0)
849 return;
851 cl_device_id id = gpuEnv.mpDevID;
852 findDeviceInfoFromDeviceId(id, rDeviceId, rPlatformId);
855 void setOpenCLCmdQueuePosition( int nPos )
857 if (nPos < 0 || nPos >= OPENCL_CMDQUEUE_SIZE)
858 // Out of range. Ignore this.
859 return;
861 gpuEnv.mnCmdQueuePos = nPos;
864 const char* errorString(cl_int nError)
866 #define CASE(val) case CL_##val: return #val
867 switch (nError)
869 CASE(SUCCESS);
870 CASE(DEVICE_NOT_FOUND);
871 CASE(DEVICE_NOT_AVAILABLE);
872 CASE(COMPILER_NOT_AVAILABLE);
873 CASE(MEM_OBJECT_ALLOCATION_FAILURE);
874 CASE(OUT_OF_RESOURCES);
875 CASE(OUT_OF_HOST_MEMORY);
876 CASE(PROFILING_INFO_NOT_AVAILABLE);
877 CASE(MEM_COPY_OVERLAP);
878 CASE(IMAGE_FORMAT_MISMATCH);
879 CASE(IMAGE_FORMAT_NOT_SUPPORTED);
880 CASE(BUILD_PROGRAM_FAILURE);
881 CASE(MAP_FAILURE);
882 CASE(INVALID_VALUE);
883 CASE(INVALID_DEVICE_TYPE);
884 CASE(INVALID_PLATFORM);
885 CASE(INVALID_DEVICE);
886 CASE(INVALID_CONTEXT);
887 CASE(INVALID_QUEUE_PROPERTIES);
888 CASE(INVALID_COMMAND_QUEUE);
889 CASE(INVALID_HOST_PTR);
890 CASE(INVALID_MEM_OBJECT);
891 CASE(INVALID_IMAGE_FORMAT_DESCRIPTOR);
892 CASE(INVALID_IMAGE_SIZE);
893 CASE(INVALID_SAMPLER);
894 CASE(INVALID_BINARY);
895 CASE(INVALID_BUILD_OPTIONS);
896 CASE(INVALID_PROGRAM);
897 CASE(INVALID_PROGRAM_EXECUTABLE);
898 CASE(INVALID_KERNEL_NAME);
899 CASE(INVALID_KERNEL_DEFINITION);
900 CASE(INVALID_KERNEL);
901 CASE(INVALID_ARG_INDEX);
902 CASE(INVALID_ARG_VALUE);
903 CASE(INVALID_ARG_SIZE);
904 CASE(INVALID_KERNEL_ARGS);
905 CASE(INVALID_WORK_DIMENSION);
906 CASE(INVALID_WORK_GROUP_SIZE);
907 CASE(INVALID_WORK_ITEM_SIZE);
908 CASE(INVALID_GLOBAL_OFFSET);
909 CASE(INVALID_EVENT_WAIT_LIST);
910 CASE(INVALID_EVENT);
911 CASE(INVALID_OPERATION);
912 CASE(INVALID_GL_OBJECT);
913 CASE(INVALID_BUFFER_SIZE);
914 CASE(INVALID_MIP_LEVEL);
915 CASE(INVALID_GLOBAL_WORK_SIZE);
916 default:
917 return "Unknown OpenCL error code";
919 #undef CASE
924 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */