1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
13 #elif defined __MACH__
14 #include <mach/mach_time.h>
27 #include <comphelper/random.hxx>
28 #include <opencl/openclconfig.hxx>
29 #include <opencl/openclwrapper.hxx>
30 #include <opencl/platforminfo.hxx>
31 #include <sal/log.hxx>
32 #include <rtl/math.hxx>
34 #include <opencl/OpenCLZone.hxx>
36 #include "opencl_device.hxx"
38 #define INPUTSIZE 15360
39 #define OUTPUTSIZE 15360
41 #define STRINGIFY(...) #__VA_ARGS__"\n"
43 #define DS_CHECK_STATUS(status, name) \
44 if (CL_SUCCESS != status) \
46 SAL_INFO("opencl.device", "Error code is " << status << " at " name); \
53 bool bIsDeviceSelected
= false;
54 ds_device selectedDevice
;
56 struct LibreOfficeDeviceEvaluationIO
58 std::vector
<double> input0
;
59 std::vector
<double> input1
;
60 std::vector
<double> input2
;
61 std::vector
<double> input3
;
62 std::vector
<double> output
;
63 unsigned long inputSize
;
64 unsigned long outputSize
;
76 const char* source
= STRINGIFY(
77 \n#if defined(KHR_DP_EXTENSION)
78 \n#pragma OPENCL EXTENSION cl_khr_fp64 : enable
79 \n#elif defined(AMD_DP_EXTENSION)
80 \n#pragma OPENCL EXTENSION cl_amd_fp64 : enable
83 int isNan(fp_t a
) { return a
!= a
; }
84 fp_t
fsum(fp_t a
, fp_t b
) { return a
+ b
; }
86 fp_t
fAverage(__global fp_t
* input
)
90 for (int i
= 0; i
< INPUTSIZE
; i
++)
94 sum
= fsum(input
[i
], sum
);
98 return sum
/ (fp_t
)count
;
100 fp_t
fMin(__global fp_t
* input
)
103 for (int i
= 0; i
< INPUTSIZE
; i
++)
105 if (!isNan(input
[i
]))
107 min
= fmin(input
[i
], min
);
112 fp_t
fSoP(__global fp_t
* input0
, __global fp_t
* input1
)
115 for (int i
= 0; i
< INPUTSIZE
; i
++)
117 sop
+= (isNan(input0
[i
]) ? 0 : input0
[i
]) * (isNan(input1
[i
]) ? 0 : input1
[i
]);
121 __kernel
void DynamicKernel(
122 __global fp_t
* result
, __global fp_t
* input0
, __global fp_t
* input1
, __global fp_t
* input2
, __global fp_t
* input3
)
124 int gid0
= get_global_id(0);
125 fp_t tmp0
= fAverage(input0
);
126 fp_t tmp1
= fMin(input1
) * fSoP(input2
, input3
);
127 result
[gid0
] = fsum(tmp0
, tmp1
);
131 size_t sourceSize
[] = { strlen(source
) };
133 /*************************************************************************/
134 /* INTERNAL FUNCTIONS */
135 /*************************************************************************/
136 /* Timer functions - start timer */
137 void timerStart(timer
* mytimer
)
140 QueryPerformanceCounter(&mytimer
->start
);
141 #elif defined __MACH__
142 mytimer
->start
= mach_absolute_time();
145 clock_gettime(CLOCK_MONOTONIC
, &s
);
146 mytimer
->start
= (long long)s
.tv_sec
* (long long)1.0E6
+ (long long)s
.tv_nsec
/ (long long)1.0E3
;
150 /* Timer functions - get current value */
151 double timerCurrent(timer
* mytimer
)
154 LARGE_INTEGER stop
, frequency
;
155 QueryPerformanceCounter(&stop
);
156 QueryPerformanceFrequency(&frequency
);
157 double time
= ((double)(stop
.QuadPart
- mytimer
->start
.QuadPart
) / frequency
.QuadPart
);
158 #elif defined __MACH__
159 static mach_timebase_info_data_t info
= { 0, 0 };
161 mach_timebase_info(&info
);
162 long long stop
= mach_absolute_time();
163 double time
= ((stop
- mytimer
->start
) * (double) info
.numer
/ info
.denom
) / 1.0E9
;
167 clock_gettime(CLOCK_MONOTONIC
, &s
);
168 stop
= (long long)s
.tv_sec
* (long long)1.0E6
+ (long long)s
.tv_nsec
/ (long long)1.0E3
;
169 double time
= ((double)(stop
- mytimer
->start
) / 1.0E6
);
174 /* Random number generator */
175 double random(double min
, double max
)
177 if (rtl::math::approxEqual(min
, max
))
179 return comphelper::rng::uniform_real_distribution(min
, max
);
183 void populateInput(std::unique_ptr
<LibreOfficeDeviceEvaluationIO
>& testData
)
185 double* input0
= &testData
->input0
[0];
186 double* input1
= &testData
->input1
[0];
187 double* input2
= &testData
->input2
[0];
188 double* input3
= &testData
->input3
[0];
189 for (unsigned long i
= 0; i
< testData
->inputSize
; i
++)
191 input0
[i
] = random(0, i
);
192 input1
[i
] = random(0, i
);
193 input2
[i
] = random(0, i
);
194 input3
[i
] = random(0, i
);
198 /* Evaluate devices */
199 ds_status
evaluateScoreForDevice(ds_device
& rDevice
, std::unique_ptr
<LibreOfficeDeviceEvaluationIO
>& testData
)
201 if (rDevice
.eType
== DeviceType::OpenCLDevice
)
203 /* Evaluating an OpenCL device */
204 SAL_INFO("opencl.device", "Device: \"" << rDevice
.sDeviceName
<< "\" (OpenCL) evaluation...");
207 /* Check for 64-bit float extensions */
208 std::unique_ptr
<char[]> aExtInfo
;
210 size_t aDevExtInfoSize
= 0;
213 clStatus
= clGetDeviceInfo(rDevice
.aDeviceID
, CL_DEVICE_EXTENSIONS
, 0, nullptr, &aDevExtInfoSize
);
214 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clGetDeviceInfo");
216 aExtInfo
.reset(new char[aDevExtInfoSize
]);
217 clStatus
= clGetDeviceInfo(rDevice
.aDeviceID
, CL_DEVICE_EXTENSIONS
, sizeof(char) * aDevExtInfoSize
, aExtInfo
.get(), nullptr);
218 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clGetDeviceInfo");
221 bool bKhrFp64Flag
= false;
222 bool bAmdFp64Flag
= false;
223 const char* buildOption
= nullptr;
224 std::string
tmpStr("-Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16 -DINPUTSIZE=");
225 std::ostringstream tmpOStrStr
;
226 tmpOStrStr
<< std::dec
<< INPUTSIZE
;
227 tmpStr
.append(tmpOStrStr
.str());
229 if ((std::string(aExtInfo
.get())).find("cl_khr_fp64") != std::string::npos
)
232 //buildOption = "-D KHR_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
233 tmpStr
.append(" -DKHR_DP_EXTENSION");
234 buildOption
= tmpStr
.c_str();
235 SAL_INFO("opencl.device", "... has cl_khr_fp64");
237 else if ((std::string(aExtInfo
.get())).find("cl_amd_fp64") != std::string::npos
)
240 //buildOption = "-D AMD_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
241 tmpStr
.append(" -DAMD_DP_EXTENSION");
242 buildOption
= tmpStr
.c_str();
243 SAL_INFO("opencl.device", "... has cl_amd_fp64");
246 if (!bKhrFp64Flag
&& !bAmdFp64Flag
)
248 /* No 64-bit float support */
249 rDevice
.fTime
= DBL_MAX
;
250 rDevice
.bErrors
= false;
251 SAL_INFO("opencl.device", "... no fp64 support");
255 /* 64-bit float support present */
259 /* Create context and command queue */
260 cl_context clContext
= clCreateContext(nullptr, 1, &rDevice
.aDeviceID
, nullptr, nullptr, &clStatus
);
261 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateContext");
262 cl_command_queue clQueue
= clCreateCommandQueue(clContext
, rDevice
.aDeviceID
, 0, &clStatus
);
263 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateCommandQueue");
266 cl_program clProgram
= clCreateProgramWithSource(clContext
, 1, &source
, sourceSize
, &clStatus
);
267 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateProgramWithSource");
268 clStatus
= clBuildProgram(clProgram
, 1, &rDevice
.aDeviceID
, buildOption
, nullptr, nullptr);
269 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clBuildProgram");
270 if (CL_SUCCESS
!= clStatus
)
272 /* Build program failed */
275 clStatus
= clGetProgramBuildInfo(clProgram
, rDevice
.aDeviceID
, CL_PROGRAM_BUILD_LOG
, 0, nullptr, &length
);
276 buildLog
= static_cast<char*>(malloc(length
));
277 clGetProgramBuildInfo(clProgram
, rDevice
.aDeviceID
, CL_PROGRAM_BUILD_LOG
, length
, buildLog
, &length
);
278 SAL_INFO("opencl.device", "Build Errors:\n" << buildLog
);
281 rDevice
.fTime
= DBL_MAX
;
282 rDevice
.bErrors
= true;
286 /* Build program succeeded */
288 timerStart(&kernelTime
);
291 cl_kernel clKernel
= clCreateKernel(clProgram
, "DynamicKernel", &clStatus
);
292 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateKernel");
293 cl_mem clResult
= clCreateBuffer(clContext
, CL_MEM_WRITE_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->outputSize
, &testData
->output
[0], &clStatus
);
294 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clResult");
295 cl_mem clInput0
= clCreateBuffer(clContext
, CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->inputSize
, &testData
->input0
[0], &clStatus
);
296 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clInput0");
297 cl_mem clInput1
= clCreateBuffer(clContext
, CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->inputSize
, &testData
->input1
[0], &clStatus
);
298 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clInput1");
299 cl_mem clInput2
= clCreateBuffer(clContext
, CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->inputSize
, &testData
->input2
[0], &clStatus
);
300 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clInput2");
301 cl_mem clInput3
= clCreateBuffer(clContext
, CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->inputSize
, &testData
->input3
[0], &clStatus
);
302 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clInput3");
303 clStatus
= clSetKernelArg(clKernel
, 0, sizeof(cl_mem
), static_cast<void*>(&clResult
));
304 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clResult");
305 clStatus
= clSetKernelArg(clKernel
, 1, sizeof(cl_mem
), static_cast<void*>(&clInput0
));
306 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clInput0");
307 clStatus
= clSetKernelArg(clKernel
, 2, sizeof(cl_mem
), static_cast<void*>(&clInput1
));
308 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clInput1");
309 clStatus
= clSetKernelArg(clKernel
, 3, sizeof(cl_mem
), static_cast<void*>(&clInput2
));
310 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clInput2");
311 clStatus
= clSetKernelArg(clKernel
, 4, sizeof(cl_mem
), static_cast<void*>(&clInput3
));
312 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clInput3");
313 size_t globalWS
[1] = { testData
->outputSize
};
314 size_t localSize
[1] = { 64 };
315 clStatus
= clEnqueueNDRangeKernel(clQueue
, clKernel
, 1, nullptr, globalWS
, localSize
, 0, nullptr, nullptr);
316 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clEnqueueNDRangeKernel");
318 clReleaseMemObject(clInput3
);
319 clReleaseMemObject(clInput2
);
320 clReleaseMemObject(clInput1
);
321 clReleaseMemObject(clInput0
);
322 clReleaseMemObject(clResult
);
323 clReleaseKernel(clKernel
);
325 rDevice
.fTime
= timerCurrent(&kernelTime
);
326 rDevice
.bErrors
= false;
329 clReleaseProgram(clProgram
);
330 clReleaseCommandQueue(clQueue
);
331 clReleaseContext(clContext
);
336 /* Evaluating an Native CPU device */
337 SAL_INFO("opencl.device", "Device: \"CPU\" (Native) evaluation...");
339 timerStart(&kernelTime
);
341 for (unsigned long j
= 0; j
< testData
->outputSize
; j
++)
343 double fAverage
= 0.0f
;
344 double fMin
= DBL_MAX
;
346 for (unsigned long i
= 0; i
< testData
->inputSize
; i
++)
348 fAverage
+= testData
->input0
[i
];
349 fMin
= ((fMin
< testData
->input1
[i
]) ? fMin
: testData
->input1
[i
]);
350 fSoP
+= testData
->input2
[i
] * testData
->input3
[i
];
352 fAverage
/= testData
->inputSize
;
353 testData
->output
[j
] = fAverage
+ (fMin
* fSoP
);
356 // InterpretTail - the S/W fallback is nothing like as efficient
357 // as any good openCL implementation: no SIMD, tons of branching
358 // in the inner loops etc. Generously characterise it as only 10x
359 // slower than the above.
360 float fInterpretTailFactor
= 10.0;
362 rDevice
.fTime
= timerCurrent(&kernelTime
);
363 rDevice
.fTime
*= fInterpretTailFactor
;
364 rDevice
.bErrors
= false;
369 ds_status
profileDevices(std::unique_ptr
<ds_profile
>& pProfile
, std::unique_ptr
<LibreOfficeDeviceEvaluationIO
>& pTestData
)
371 ds_status status
= DS_SUCCESS
;
374 return DS_INVALID_PROFILE
;
376 for (ds_device
& rDevice
: pProfile
->devices
)
378 ds_status evaluatorStatus
= evaluateScoreForDevice(rDevice
, pTestData
);
379 if (evaluatorStatus
!= DS_SUCCESS
)
381 status
= evaluatorStatus
;
388 /* Pick best device */
389 ds_status
pickBestDevice(std::unique_ptr
<ds_profile
>& profile
, int& rBestDeviceIndex
)
391 double bestScore
= DBL_MAX
;
393 rBestDeviceIndex
= -1;
395 for (std::vector
<ds_device
>::size_type d
= 0; d
< profile
->devices
.size();
398 ds_device
& device
= profile
->devices
[d
];
400 // Check blacklist and whitelist for actual devices
401 if (device
.eType
== DeviceType::OpenCLDevice
)
403 // There is a silly impedance mismatch here. Why do we
404 // need two different ways to describe an OpenCL platform
405 // and an OpenCL device driver?
407 OpenCLPlatformInfo aPlatform
;
408 OpenCLDeviceInfo aDevice
;
410 // We know that only the below fields are used by checkForKnownBadCompilers()
411 aPlatform
.maVendor
= OStringToOUString(device
.sPlatformVendor
, RTL_TEXTENCODING_UTF8
);
412 aDevice
.maName
= OStringToOUString(device
.sDeviceName
, RTL_TEXTENCODING_UTF8
);
413 aDevice
.maDriver
= OStringToOUString(device
.sDriverVersion
, RTL_TEXTENCODING_UTF8
);
415 // If blacklisted or not whitelisted, ignore it
416 if (OpenCLConfig::get().checkImplementation(aPlatform
, aDevice
))
418 SAL_INFO("opencl.device", "Device[" << d
<< "] " << device
.sDeviceName
<< " is blacklisted or not whitelisted");
419 device
.fTime
= DBL_MAX
;
420 device
.bErrors
= false;
424 double fScore
= DBL_MAX
;
425 if (device
.fTime
>= 0.0
426 || rtl::math::approxEqual(device
.fTime
, DBL_MAX
))
428 fScore
= device
.fTime
;
432 SAL_INFO("opencl.device", "Unusual null score");
435 if (device
.eType
== DeviceType::OpenCLDevice
)
437 SAL_INFO("opencl.device", "Device[" << d
<< "] " << device
.sDeviceName
<< " (OpenCL) score is " << fScore
);
441 SAL_INFO("opencl.device", "Device[" << d
<< "] CPU (Native) score is " << fScore
);
443 if (fScore
< bestScore
)
446 rBestDeviceIndex
= d
;
449 if (rBestDeviceIndex
!= -1 && profile
->devices
[rBestDeviceIndex
].eType
== DeviceType::OpenCLDevice
)
451 SAL_INFO("opencl.device", "Selected Device[" << rBestDeviceIndex
<< "]: " << profile
->devices
[rBestDeviceIndex
].sDeviceName
<< "(OpenCL).");
455 SAL_INFO("opencl.device", "Selected Device[" << rBestDeviceIndex
<< "]: CPU (Native).");
460 /* Return device ID for matching device name */
461 int matchDevice(std::unique_ptr
<ds_profile
>& profile
, char* deviceName
)
463 int deviceMatch
= -1;
464 for (unsigned int d
= 0; d
< profile
->devices
.size() - 1; d
++)
466 if ((std::string(profile
->devices
[d
].sDeviceName
.getStr())).find(deviceName
) != std::string::npos
)
469 if (std::string("NATIVE_CPU").find(deviceName
) != std::string::npos
)
470 deviceMatch
= profile
->devices
.size() - 1;
477 SvFileStream maStream
;
479 LogWriter(OUString aFileName
)
480 : maStream(aFileName
, StreamMode::WRITE
)
483 void text(const OString
& rText
)
485 maStream
.WriteOString(rText
);
486 maStream
.WriteChar('\n');
489 void log(const OString
& rKey
, const OString
& rValue
)
491 maStream
.WriteOString(rKey
);
492 maStream
.WriteCharPtr(": ");
493 maStream
.WriteOString(rValue
);
494 maStream
.WriteChar('\n');
497 void log(const OString
& rKey
, int rValue
)
499 log(rKey
, OString::number(rValue
));
502 void log(const OString
& rKey
, bool rValue
)
504 log(rKey
, OString::boolean(rValue
));
509 void writeDevicesLog(std::unique_ptr
<ds_profile
>& rProfile
, OUString sProfilePath
, int nSelectedIndex
)
511 OUString
aCacheFile(sProfilePath
+ "opencl_devices.log");
512 LogWriter
aWriter(aCacheFile
);
516 for (ds_device
& rDevice
: rProfile
->devices
)
518 if (rDevice
.eType
== DeviceType::OpenCLDevice
)
520 aWriter
.log("Device Index", nIndex
);
521 aWriter
.log(" Selected", nIndex
== nSelectedIndex
);
522 aWriter
.log(" Device Name", rDevice
.sDeviceName
);
523 aWriter
.log(" Device Vendor", rDevice
.sDeviceVendor
);
524 aWriter
.log(" Device Version", rDevice
.sDeviceVersion
);
525 aWriter
.log(" Driver Version", rDevice
.sDriverVersion
);
526 aWriter
.log(" Device Type", rDevice
.sDeviceType
);
527 aWriter
.log(" Device Extensions", rDevice
.sDeviceExtensions
);
528 aWriter
.log(" Device OpenCL C Version", rDevice
.sDeviceOpenCLVersion
);
530 aWriter
.log(" Device Available", rDevice
.bDeviceAvailable
);
531 aWriter
.log(" Device Compiler Available", rDevice
.bDeviceCompilerAvailable
);
532 aWriter
.log(" Device Linker Available", rDevice
.bDeviceLinkerAvailable
);
534 aWriter
.log(" Platform Name", rDevice
.sPlatformName
);
535 aWriter
.log(" Platform Vendor", rDevice
.sPlatformVendor
);
536 aWriter
.log(" Platform Version", rDevice
.sPlatformVersion
);
537 aWriter
.log(" Platform Profile", rDevice
.sPlatformProfile
);
538 aWriter
.log(" Platform Extensions", rDevice
.sPlatformExtensions
);
545 } // end anonymous namespace
547 ds_device
getDeviceSelection(
548 OUString
const & sProfilePath
, bool bForceSelection
)
550 /* Run only if device is not yet selected */
551 if (!bIsDeviceSelected
|| bForceSelection
)
554 std::unique_ptr
<ds_profile
> aProfile
;
556 status
= initDSProfile(aProfile
, "LibreOffice v1");
558 if (status
!= DS_SUCCESS
)
560 // failed to initialize profile.
561 selectedDevice
.eType
= DeviceType::NativeCPU
;
562 return selectedDevice
;
565 /* Try reading scores from file */
566 OUString sFilePath
= sProfilePath
+ "opencl_profile.xml";
568 if (!bForceSelection
)
570 status
= readProfile(sFilePath
, aProfile
);
574 status
= DS_INVALID_PROFILE
;
575 SAL_INFO("opencl.device", "Performing forced profiling.");
577 if (DS_SUCCESS
!= status
)
579 if (!bForceSelection
)
581 SAL_INFO("opencl.device", "Profile file not available (" << sFilePath
<< "); performing profiling.");
584 /* Populate input data for micro-benchmark */
585 std::unique_ptr
<LibreOfficeDeviceEvaluationIO
> testData(new LibreOfficeDeviceEvaluationIO
);
586 testData
->inputSize
= INPUTSIZE
;
587 testData
->outputSize
= OUTPUTSIZE
;
588 testData
->input0
.resize(testData
->inputSize
);
589 testData
->input1
.resize(testData
->inputSize
);
590 testData
->input2
.resize(testData
->inputSize
);
591 testData
->input3
.resize(testData
->inputSize
);
592 testData
->output
.resize(testData
->outputSize
);
593 populateInput(testData
);
595 /* Perform evaluations */
596 status
= profileDevices(aProfile
, testData
);
598 if (DS_SUCCESS
== status
)
600 /* Write scores to file */
601 status
= writeProfile(sFilePath
, aProfile
);
602 if (DS_SUCCESS
== status
)
604 SAL_INFO("opencl.device", "Scores written to file (" << sFilePath
<< ").");
608 SAL_INFO("opencl.device", "Error saving scores to file (" << sFilePath
<< "); scores not written to file.");
613 SAL_INFO("opencl.device", "Unable to evaluate performance; scores not written to file.");
618 SAL_INFO("opencl.device", "Profile read from file (" << sFilePath
<< ").");
621 /* Pick best device */
623 pickBestDevice(aProfile
, bestDeviceIdx
);
625 /* Override if necessary */
626 char* overrideDeviceStr
= getenv("SC_OPENCL_DEVICE_OVERRIDE");
627 if (nullptr != overrideDeviceStr
)
629 int overrideDeviceIdx
= matchDevice(aProfile
, overrideDeviceStr
);
630 if (-1 != overrideDeviceIdx
)
632 SAL_INFO("opencl.device", "Overriding Device Selection (SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr
<< ").");
633 bestDeviceIdx
= overrideDeviceIdx
;
634 if (aProfile
->devices
[bestDeviceIdx
].eType
== DeviceType::OpenCLDevice
)
636 SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx
<< "]: " << aProfile
->devices
[bestDeviceIdx
].sDeviceName
<< " (OpenCL).");
640 SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx
<< "]: CPU (Native).");
645 SAL_INFO("opencl.device", "Ignoring invalid SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr
<< ").");
649 /* Final device selection */
650 if (bestDeviceIdx
>=0 && static_cast< std::vector
<ds_device
>::size_type
> ( bestDeviceIdx
) < aProfile
->devices
.size() )
652 selectedDevice
= aProfile
->devices
[bestDeviceIdx
];
653 bIsDeviceSelected
= true;
655 writeDevicesLog(aProfile
, sProfilePath
, bestDeviceIdx
);
657 selectedDevice
.eType
= DeviceType::NativeCPU
;
660 return selectedDevice
;
665 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */