1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
13 #include <string_view>
17 #include <comphelper/random.hxx>
18 #include <o3tl/safeint.hxx>
19 #include <opencl/openclconfig.hxx>
20 #include <opencl/platforminfo.hxx>
21 #include <sal/log.hxx>
22 #include <rtl/math.hxx>
23 #include <tools/time.hxx>
25 #include <opencl/OpenCLZone.hxx>
27 #include <opencl_device.hxx>
28 #include <opencl_device_selection.h>
30 #define INPUTSIZE 15360
31 #define OUTPUTSIZE 15360
35 void DS_CHECK_STATUS(cl_int status
, char const * name
) {
36 if (CL_SUCCESS
!= status
)
38 SAL_INFO("opencl.device", "Error code is " << status
<< " at " << name
);
42 bool bIsDeviceSelected
= false;
43 ds_device selectedDevice
;
45 struct LibreOfficeDeviceEvaluationIO
47 std::vector
<double> input0
;
48 std::vector
<double> input1
;
49 std::vector
<double> input2
;
50 std::vector
<double> input3
;
51 std::vector
<double> output
;
52 tools::ULong inputSize
;
53 tools::ULong outputSize
;
56 const char* source
= R
"delimit(
57 #if defined(KHR_DP_EXTENSION)
58 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
59 #elif defined(AMD_DP_EXTENSION)
60 #pragma OPENCL EXTENSION cl_amd_fp64 : enable
63 int isNan(fp_t a) { return a != a; }
64 fp_t fsum(fp_t a, fp_t b) { return a + b; }
66 fp_t fAverage(__global fp_t* input)
70 for (int i = 0; i < INPUTSIZE; i++)
74 sum = fsum(input[i], sum);
78 return sum / (fp_t)count;
80 fp_t fMin(__global fp_t* input)
83 for (int i = 0; i < INPUTSIZE; i++)
87 min = fmin(input[i], min);
92 fp_t fSoP(__global fp_t* input0, __global fp_t* input1)
95 for (int i = 0; i < INPUTSIZE; i++)
97 sop += (isNan(input0[i]) ? 0 : input0[i]) * (isNan(input1[i]) ? 0 : input1[i]);
101 __kernel void DynamicKernel(
102 __global fp_t* result, __global fp_t* input0, __global fp_t* input1, __global fp_t* input2, __global fp_t* input3)
104 int gid0 = get_global_id(0);
105 fp_t tmp0 = fAverage(input0);
106 fp_t tmp1 = fMin(input1) * fSoP(input2, input3);
107 result[gid0] = fsum(tmp0, tmp1);
111 size_t sourceSize
[] = { strlen(source
) };
113 /* Random number generator */
114 double random(double min
, double max
)
116 if (rtl::math::approxEqual(min
, max
))
118 return comphelper::rng::uniform_real_distribution(min
, max
);
122 void populateInput(std::unique_ptr
<LibreOfficeDeviceEvaluationIO
> const & testData
)
124 double* input0
= testData
->input0
.data();
125 double* input1
= testData
->input1
.data();
126 double* input2
= testData
->input2
.data();
127 double* input3
= testData
->input3
.data();
128 for (tools::ULong i
= 0; i
< testData
->inputSize
; i
++)
130 input0
[i
] = random(0, i
);
131 input1
[i
] = random(0, i
);
132 input2
[i
] = random(0, i
);
133 input3
[i
] = random(0, i
);
137 /* Evaluate devices */
138 ds_status
evaluateScoreForDevice(ds_device
& rDevice
, std::unique_ptr
<LibreOfficeDeviceEvaluationIO
> const & testData
)
140 if (rDevice
.eType
== DeviceType::OpenCLDevice
)
142 /* Evaluating an OpenCL device */
143 SAL_INFO("opencl.device", "Device: \"" << rDevice
.sDeviceName
<< "\" (OpenCL) evaluation...");
146 /* Check for 64-bit float extensions */
147 std::unique_ptr
<char[]> aExtInfo
;
149 size_t aDevExtInfoSize
= 0;
152 clStatus
= clGetDeviceInfo(rDevice
.aDeviceID
, CL_DEVICE_EXTENSIONS
, 0, nullptr, &aDevExtInfoSize
);
153 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clGetDeviceInfo");
155 aExtInfo
.reset(new char[aDevExtInfoSize
]);
156 clStatus
= clGetDeviceInfo(rDevice
.aDeviceID
, CL_DEVICE_EXTENSIONS
, sizeof(char) * aDevExtInfoSize
, aExtInfo
.get(), nullptr);
157 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clGetDeviceInfo");
160 bool bKhrFp64Flag
= false;
161 bool bAmdFp64Flag
= false;
162 const char* buildOption
= nullptr;
163 std::string
tmpStr("-Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16 -DINPUTSIZE=");
164 std::ostringstream tmpOStrStr
;
165 tmpOStrStr
<< std::dec
<< INPUTSIZE
;
166 tmpStr
.append(tmpOStrStr
.str());
168 if ((std::string(aExtInfo
.get())).find("cl_khr_fp64") != std::string::npos
)
171 //buildOption = "-D KHR_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
172 tmpStr
.append(" -DKHR_DP_EXTENSION");
173 buildOption
= tmpStr
.c_str();
174 SAL_INFO("opencl.device", "... has cl_khr_fp64");
176 else if ((std::string(aExtInfo
.get())).find("cl_amd_fp64") != std::string::npos
)
179 //buildOption = "-D AMD_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
180 tmpStr
.append(" -DAMD_DP_EXTENSION");
181 buildOption
= tmpStr
.c_str();
182 SAL_INFO("opencl.device", "... has cl_amd_fp64");
185 if (!bKhrFp64Flag
&& !bAmdFp64Flag
)
187 /* No 64-bit float support */
188 rDevice
.fTime
= DBL_MAX
;
189 rDevice
.bErrors
= false;
190 SAL_INFO("opencl.device", "... no fp64 support");
194 /* 64-bit float support present */
198 /* Create context and command queue */
199 cl_context clContext
= clCreateContext(nullptr, 1, &rDevice
.aDeviceID
, nullptr, nullptr, &clStatus
);
200 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateContext");
201 cl_command_queue clQueue
= clCreateCommandQueue(clContext
, rDevice
.aDeviceID
, 0, &clStatus
);
202 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateCommandQueue");
205 cl_program clProgram
= clCreateProgramWithSource(clContext
, 1, &source
, sourceSize
, &clStatus
);
206 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateProgramWithSource");
207 clStatus
= clBuildProgram(clProgram
, 1, &rDevice
.aDeviceID
, buildOption
, nullptr, nullptr);
208 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clBuildProgram");
209 if (CL_SUCCESS
!= clStatus
)
211 /* Build program failed */
214 clStatus
= clGetProgramBuildInfo(clProgram
, rDevice
.aDeviceID
, CL_PROGRAM_BUILD_LOG
, 0, nullptr, &length
);
215 buildLog
= static_cast<char*>(malloc(length
));
216 clGetProgramBuildInfo(clProgram
, rDevice
.aDeviceID
, CL_PROGRAM_BUILD_LOG
, length
, buildLog
, &length
);
217 SAL_INFO("opencl.device", "Build Errors:\n" << buildLog
);
220 rDevice
.fTime
= DBL_MAX
;
221 rDevice
.bErrors
= true;
225 /* Build program succeeded */
226 sal_uInt64 kernelTime
= tools::Time::GetMonotonicTicks();
229 cl_kernel clKernel
= clCreateKernel(clProgram
, "DynamicKernel", &clStatus
);
230 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateKernel");
231 cl_mem clResult
= clCreateBuffer(clContext
, CL_MEM_WRITE_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->outputSize
, testData
->output
.data(), &clStatus
);
232 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clResult");
233 cl_mem clInput0
= clCreateBuffer(clContext
, CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->inputSize
, testData
->input0
.data(), &clStatus
);
234 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clInput0");
235 cl_mem clInput1
= clCreateBuffer(clContext
, CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->inputSize
, testData
->input1
.data(), &clStatus
);
236 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clInput1");
237 cl_mem clInput2
= clCreateBuffer(clContext
, CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->inputSize
, testData
->input2
.data(), &clStatus
);
238 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clInput2");
239 cl_mem clInput3
= clCreateBuffer(clContext
, CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->inputSize
, testData
->input3
.data(), &clStatus
);
240 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clInput3");
241 clStatus
= clSetKernelArg(clKernel
, 0, sizeof(cl_mem
), static_cast<void*>(&clResult
));
242 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clResult");
243 clStatus
= clSetKernelArg(clKernel
, 1, sizeof(cl_mem
), static_cast<void*>(&clInput0
));
244 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clInput0");
245 clStatus
= clSetKernelArg(clKernel
, 2, sizeof(cl_mem
), static_cast<void*>(&clInput1
));
246 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clInput1");
247 clStatus
= clSetKernelArg(clKernel
, 3, sizeof(cl_mem
), static_cast<void*>(&clInput2
));
248 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clInput2");
249 clStatus
= clSetKernelArg(clKernel
, 4, sizeof(cl_mem
), static_cast<void*>(&clInput3
));
250 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clInput3");
251 size_t globalWS
[1] = { testData
->outputSize
};
252 size_t const localSize
[1] = { 64 };
253 clStatus
= clEnqueueNDRangeKernel(clQueue
, clKernel
, 1, nullptr, globalWS
, localSize
, 0, nullptr, nullptr);
254 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clEnqueueNDRangeKernel");
256 clReleaseMemObject(clInput3
);
257 clReleaseMemObject(clInput2
);
258 clReleaseMemObject(clInput1
);
259 clReleaseMemObject(clInput0
);
260 clReleaseMemObject(clResult
);
261 clReleaseKernel(clKernel
);
263 rDevice
.fTime
= tools::Time::GetMonotonicTicks() - kernelTime
;
264 rDevice
.bErrors
= false;
267 clReleaseProgram(clProgram
);
268 clReleaseCommandQueue(clQueue
);
269 clReleaseContext(clContext
);
274 /* Evaluating a Native CPU device */
275 SAL_INFO("opencl.device", "Device: \"CPU\" (Native) evaluation...");
276 sal_uInt64 kernelTime
= tools::Time::GetMonotonicTicks();
279 for (j
= 0; j
< testData
->outputSize
; j
++)
281 double fAverage
= 0.0f
;
282 double fMin
= DBL_MAX
;
284 for (tools::ULong i
= 0; i
< testData
->inputSize
; i
++)
286 fAverage
+= testData
->input0
[i
];
287 fMin
= std::min(fMin
, testData
->input1
[i
]);
288 fSoP
+= testData
->input2
[i
] * testData
->input3
[i
];
290 fAverage
/= testData
->inputSize
;
291 testData
->output
[j
] = fAverage
+ (fMin
* fSoP
);
292 // Don't run for much longer than one second
293 if (j
> 0 && j
% 100 == 0)
295 rDevice
.fTime
= tools::Time::GetMonotonicTicks() - kernelTime
;
296 if (rDevice
.fTime
>= 1)
301 rDevice
.fTime
= tools::Time::GetMonotonicTicks() - kernelTime
;
303 // Scale time to how long it would have taken to go all the way to outputSize
304 rDevice
.fTime
/= (static_cast<double>(j
) / testData
->outputSize
);
306 // InterpretTail - the S/W fallback is nothing like as efficient
307 // as any good openCL implementation: no SIMD, tons of branching
308 // in the inner loops etc. Generously characterise it as only 10x
309 // slower than the above.
310 rDevice
.fTime
*= 10.0;
311 rDevice
.bErrors
= false;
316 ds_status
profileDevices(std::unique_ptr
<ds_profile
> const & pProfile
, std::unique_ptr
<LibreOfficeDeviceEvaluationIO
> const & pTestData
)
318 ds_status status
= DS_SUCCESS
;
321 return DS_INVALID_PROFILE
;
323 for (ds_device
& rDevice
: pProfile
->devices
)
325 ds_status evaluatorStatus
= evaluateScoreForDevice(rDevice
, pTestData
);
326 if (evaluatorStatus
!= DS_SUCCESS
)
328 status
= evaluatorStatus
;
335 /* Pick best device */
336 int pickBestDevice(std::unique_ptr
<ds_profile
> const & profile
)
338 double bestScore
= DBL_MAX
;
340 int nBestDeviceIndex
= -1;
342 for (std::vector
<ds_device
>::size_type d
= 0; d
< profile
->devices
.size();
345 ds_device
& device
= profile
->devices
[d
];
347 // Check denylist and allowlist for actual devices
348 if (device
.eType
== DeviceType::OpenCLDevice
)
350 // There is a silly impedance mismatch here. Why do we
351 // need two different ways to describe an OpenCL platform
352 // and an OpenCL device driver?
354 OpenCLPlatformInfo aPlatform
;
355 OpenCLDeviceInfo aDevice
;
357 // We know that only the below fields are used by checkForKnownBadCompilers()
358 aPlatform
.maVendor
= OStringToOUString(device
.sPlatformVendor
, RTL_TEXTENCODING_UTF8
);
359 aDevice
.maName
= OStringToOUString(device
.sDeviceName
, RTL_TEXTENCODING_UTF8
);
360 aDevice
.maDriver
= OStringToOUString(device
.sDriverVersion
, RTL_TEXTENCODING_UTF8
);
362 // If denylisted or not allowlisted, ignore it
363 if (OpenCLConfig::get().checkImplementation(aPlatform
, aDevice
))
365 SAL_INFO("opencl.device", "Device[" << d
<< "] " << device
.sDeviceName
<< " is denylisted or not allowlisted");
366 device
.fTime
= DBL_MAX
;
367 device
.bErrors
= false;
371 double fScore
= DBL_MAX
;
372 if (device
.fTime
>= 0.0
373 || rtl::math::approxEqual(device
.fTime
, DBL_MAX
))
375 fScore
= device
.fTime
;
379 SAL_INFO("opencl.device", "Unusual null score");
382 if (device
.eType
== DeviceType::OpenCLDevice
)
384 SAL_INFO("opencl.device", "Device[" << d
<< "] " << device
.sDeviceName
<< " (OpenCL) score is " << fScore
);
388 SAL_INFO("opencl.device", "Device[" << d
<< "] CPU (Native) score is " << fScore
);
390 if (fScore
< bestScore
)
393 nBestDeviceIndex
= d
;
396 if (nBestDeviceIndex
!= -1 && profile
->devices
[nBestDeviceIndex
].eType
== DeviceType::OpenCLDevice
)
398 SAL_INFO("opencl.device", "Selected Device[" << nBestDeviceIndex
<< "]: " << profile
->devices
[nBestDeviceIndex
].sDeviceName
<< "(OpenCL).");
402 SAL_INFO("opencl.device", "Selected Device[" << nBestDeviceIndex
<< "]: CPU (Native).");
404 return nBestDeviceIndex
;
407 /* Return device ID for matching device name */
408 int matchDevice(std::unique_ptr
<ds_profile
> const & profile
, const char* deviceName
)
410 int deviceMatch
= -1;
411 for (size_t d
= 0; d
< profile
->devices
.size() - 1; d
++)
413 if (profile
->devices
[d
].sDeviceName
.indexOf(deviceName
) != -1)
416 if (std::string("NATIVE_CPU").find(deviceName
) != std::string::npos
)
417 deviceMatch
= profile
->devices
.size() - 1;
424 SvFileStream maStream
;
426 explicit LogWriter(OUString
const & aFileName
)
427 : maStream(aFileName
, StreamMode::WRITE
)
430 void text(std::string_view rText
)
432 maStream
.WriteOString(rText
);
433 maStream
.WriteChar('\n');
436 void log(std::string_view rKey
, std::string_view rValue
)
438 maStream
.WriteOString(rKey
);
439 maStream
.WriteOString(": ");
440 maStream
.WriteOString(rValue
);
441 maStream
.WriteChar('\n');
444 void log(std::string_view rKey
, int rValue
)
446 log(rKey
, OString::number(rValue
));
449 void log(std::string_view rKey
, bool rValue
)
451 log(rKey
, OString::boolean(rValue
));
456 void writeDevicesLog(std::unique_ptr
<ds_profile
> const & rProfile
, std::u16string_view sProfilePath
, int nSelectedIndex
)
458 OUString
aCacheFile(OUString::Concat(sProfilePath
) + "opencl_devices.log");
459 LogWriter
aWriter(aCacheFile
);
463 for (const ds_device
& rDevice
: rProfile
->devices
)
465 if (rDevice
.eType
== DeviceType::OpenCLDevice
)
467 aWriter
.log("Device Index", nIndex
);
468 aWriter
.log(" Selected", nIndex
== nSelectedIndex
);
469 aWriter
.log(" Device Name", rDevice
.sDeviceName
);
470 aWriter
.log(" Device Vendor", rDevice
.sDeviceVendor
);
471 aWriter
.log(" Device Version", rDevice
.sDeviceVersion
);
472 aWriter
.log(" Driver Version", rDevice
.sDriverVersion
);
473 aWriter
.log(" Device Type", rDevice
.sDeviceType
);
474 aWriter
.log(" Device Extensions", rDevice
.sDeviceExtensions
);
475 aWriter
.log(" Device OpenCL C Version", rDevice
.sDeviceOpenCLVersion
);
477 aWriter
.log(" Device Available", rDevice
.bDeviceAvailable
);
478 aWriter
.log(" Device Compiler Available", rDevice
.bDeviceCompilerAvailable
);
479 aWriter
.log(" Device Linker Available", rDevice
.bDeviceLinkerAvailable
);
481 aWriter
.log(" Platform Name", rDevice
.sPlatformName
);
482 aWriter
.log(" Platform Vendor", rDevice
.sPlatformVendor
);
483 aWriter
.log(" Platform Version", rDevice
.sPlatformVersion
);
484 aWriter
.log(" Platform Profile", rDevice
.sPlatformProfile
);
485 aWriter
.log(" Platform Extensions", rDevice
.sPlatformExtensions
);
492 } // end anonymous namespace
494 ds_device
const & getDeviceSelection(
495 std::u16string_view sProfilePath
, bool bForceSelection
)
497 /* Run only if device is not yet selected */
498 if (!bIsDeviceSelected
|| bForceSelection
)
501 std::unique_ptr
<ds_profile
> aProfile
;
503 status
= initDSProfile(aProfile
, "LibreOffice v1");
505 if (status
!= DS_SUCCESS
)
507 // failed to initialize profile.
508 selectedDevice
.eType
= DeviceType::NativeCPU
;
509 return selectedDevice
;
512 /* Try reading scores from file */
513 OUString sFilePath
= OUString::Concat(sProfilePath
) + "opencl_profile.xml";
515 if (!bForceSelection
)
517 status
= readProfile(sFilePath
, aProfile
);
521 status
= DS_INVALID_PROFILE
;
522 SAL_INFO("opencl.device", "Performing forced profiling.");
524 if (DS_SUCCESS
!= status
)
526 if (!bForceSelection
)
528 SAL_INFO("opencl.device", "Profile file not available (" << sFilePath
<< "); performing profiling.");
531 /* Populate input data for micro-benchmark */
532 std::unique_ptr
<LibreOfficeDeviceEvaluationIO
> testData(new LibreOfficeDeviceEvaluationIO
);
533 testData
->inputSize
= INPUTSIZE
;
534 testData
->outputSize
= OUTPUTSIZE
;
535 testData
->input0
.resize(testData
->inputSize
);
536 testData
->input1
.resize(testData
->inputSize
);
537 testData
->input2
.resize(testData
->inputSize
);
538 testData
->input3
.resize(testData
->inputSize
);
539 testData
->output
.resize(testData
->outputSize
);
540 populateInput(testData
);
542 /* Perform evaluations */
543 status
= profileDevices(aProfile
, testData
);
545 if (DS_SUCCESS
== status
)
547 /* Write scores to file */
548 status
= writeProfile(sFilePath
, aProfile
);
549 if (DS_SUCCESS
== status
)
551 SAL_INFO("opencl.device", "Scores written to file (" << sFilePath
<< ").");
555 SAL_INFO("opencl.device", "Error saving scores to file (" << sFilePath
<< "); scores not written to file.");
560 SAL_INFO("opencl.device", "Unable to evaluate performance; scores not written to file.");
565 SAL_INFO("opencl.device", "Profile read from file (" << sFilePath
<< ").");
568 /* Pick best device */
569 int bestDeviceIdx
= pickBestDevice(aProfile
);
571 /* Override if necessary */
572 char* overrideDeviceStr
= getenv("SC_OPENCL_DEVICE_OVERRIDE");
573 if (nullptr != overrideDeviceStr
)
575 int overrideDeviceIdx
= matchDevice(aProfile
, overrideDeviceStr
);
576 if (-1 != overrideDeviceIdx
)
578 SAL_INFO("opencl.device", "Overriding Device Selection (SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr
<< ").");
579 bestDeviceIdx
= overrideDeviceIdx
;
580 if (aProfile
->devices
[bestDeviceIdx
].eType
== DeviceType::OpenCLDevice
)
582 SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx
<< "]: " << aProfile
->devices
[bestDeviceIdx
].sDeviceName
<< " (OpenCL).");
586 SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx
<< "]: CPU (Native).");
591 SAL_INFO("opencl.device", "Ignoring invalid SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr
<< ").");
595 /* Final device selection */
596 if (bestDeviceIdx
>=0 && o3tl::make_unsigned( bestDeviceIdx
) < aProfile
->devices
.size() )
598 selectedDevice
= aProfile
->devices
[bestDeviceIdx
];
599 bIsDeviceSelected
= true;
601 writeDevicesLog(aProfile
, sProfilePath
, bestDeviceIdx
);
603 selectedDevice
.eType
= DeviceType::NativeCPU
;
606 return selectedDevice
;
609 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */