Bump version to 6.0-36
[LibreOffice.git] / opencl / source / opencl_device.cxx
blob57c56ca3cf58fb1915d86d80780186033ada014b
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <math.h>
11 #include <float.h>
12 #include <iostream>
13 #include <sstream>
14 #include <memory>
15 #include <vector>
17 #include <comphelper/random.hxx>
18 #include <opencl/openclconfig.hxx>
19 #include <opencl/openclwrapper.hxx>
20 #include <opencl/platforminfo.hxx>
21 #include <sal/log.hxx>
22 #include <rtl/math.hxx>
23 #include <tools/time.hxx>
25 #include <opencl/OpenCLZone.hxx>
27 #include <opencl_device.hxx>
29 #define INPUTSIZE 15360
30 #define OUTPUTSIZE 15360
32 #define STRINGIFY(...) #__VA_ARGS__"\n"
34 namespace {
36 void DS_CHECK_STATUS(cl_int status, char const * name) {
37 if (CL_SUCCESS != status)
39 SAL_INFO("opencl.device", "Error code is " << status << " at " << name);
43 bool bIsDeviceSelected = false;
44 ds_device selectedDevice;
46 struct LibreOfficeDeviceEvaluationIO
48 std::vector<double> input0;
49 std::vector<double> input1;
50 std::vector<double> input2;
51 std::vector<double> input3;
52 std::vector<double> output;
53 unsigned long inputSize;
54 unsigned long outputSize;
57 const char* source = STRINGIFY(
58 \n#if defined(KHR_DP_EXTENSION)
59 \n#pragma OPENCL EXTENSION cl_khr_fp64 : enable
60 \n#elif defined(AMD_DP_EXTENSION)
61 \n#pragma OPENCL EXTENSION cl_amd_fp64 : enable
62 \n#endif
64 int isNan(fp_t a) { return a != a; }
65 fp_t fsum(fp_t a, fp_t b) { return a + b; }
67 fp_t fAverage(__global fp_t* input)
69 fp_t sum = 0;
70 int count = 0;
71 for (int i = 0; i < INPUTSIZE; i++)
73 if (!isNan(input[i]))
75 sum = fsum(input[i], sum);
76 count += 1;
79 return sum / (fp_t)count;
81 fp_t fMin(__global fp_t* input)
83 fp_t min = MAXFLOAT;
84 for (int i = 0; i < INPUTSIZE; i++)
86 if (!isNan(input[i]))
88 min = fmin(input[i], min);
91 return min;
93 fp_t fSoP(__global fp_t* input0, __global fp_t* input1)
95 fp_t sop = 0.0;
96 for (int i = 0; i < INPUTSIZE; i++)
98 sop += (isNan(input0[i]) ? 0 : input0[i]) * (isNan(input1[i]) ? 0 : input1[i]);
100 return sop;
102 __kernel void DynamicKernel(
103 __global fp_t* result, __global fp_t* input0, __global fp_t* input1, __global fp_t* input2, __global fp_t* input3)
105 int gid0 = get_global_id(0);
106 fp_t tmp0 = fAverage(input0);
107 fp_t tmp1 = fMin(input1) * fSoP(input2, input3);
108 result[gid0] = fsum(tmp0, tmp1);
112 size_t sourceSize[] = { strlen(source) };
114 /* Random number generator */
115 double random(double min, double max)
117 if (rtl::math::approxEqual(min, max))
118 return min;
119 return comphelper::rng::uniform_real_distribution(min, max);
122 /* Populate input */
123 void populateInput(std::unique_ptr<LibreOfficeDeviceEvaluationIO> const & testData)
125 double* input0 = &testData->input0[0];
126 double* input1 = &testData->input1[0];
127 double* input2 = &testData->input2[0];
128 double* input3 = &testData->input3[0];
129 for (unsigned long i = 0; i < testData->inputSize; i++)
131 input0[i] = random(0, i);
132 input1[i] = random(0, i);
133 input2[i] = random(0, i);
134 input3[i] = random(0, i);
138 /* Evaluate devices */
139 ds_status evaluateScoreForDevice(ds_device& rDevice, std::unique_ptr<LibreOfficeDeviceEvaluationIO> const & testData)
141 if (rDevice.eType == DeviceType::OpenCLDevice)
143 /* Evaluating an OpenCL device */
144 SAL_INFO("opencl.device", "Device: \"" << rDevice.sDeviceName << "\" (OpenCL) evaluation...");
145 cl_int clStatus;
147 /* Check for 64-bit float extensions */
148 std::unique_ptr<char[]> aExtInfo;
150 size_t aDevExtInfoSize = 0;
152 OpenCLZone zone;
153 clStatus = clGetDeviceInfo(rDevice.aDeviceID, CL_DEVICE_EXTENSIONS, 0, nullptr, &aDevExtInfoSize);
154 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo");
156 aExtInfo.reset(new char[aDevExtInfoSize]);
157 clStatus = clGetDeviceInfo(rDevice.aDeviceID, CL_DEVICE_EXTENSIONS, sizeof(char) * aDevExtInfoSize, aExtInfo.get(), nullptr);
158 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clGetDeviceInfo");
161 bool bKhrFp64Flag = false;
162 bool bAmdFp64Flag = false;
163 const char* buildOption = nullptr;
164 std::string tmpStr("-Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16 -DINPUTSIZE=");
165 std::ostringstream tmpOStrStr;
166 tmpOStrStr << std::dec << INPUTSIZE;
167 tmpStr.append(tmpOStrStr.str());
169 if ((std::string(aExtInfo.get())).find("cl_khr_fp64") != std::string::npos)
171 bKhrFp64Flag = true;
172 //buildOption = "-D KHR_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
173 tmpStr.append(" -DKHR_DP_EXTENSION");
174 buildOption = tmpStr.c_str();
175 SAL_INFO("opencl.device", "... has cl_khr_fp64");
177 else if ((std::string(aExtInfo.get())).find("cl_amd_fp64") != std::string::npos)
179 bAmdFp64Flag = true;
180 //buildOption = "-D AMD_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
181 tmpStr.append(" -DAMD_DP_EXTENSION");
182 buildOption = tmpStr.c_str();
183 SAL_INFO("opencl.device", "... has cl_amd_fp64");
186 if (!bKhrFp64Flag && !bAmdFp64Flag)
188 /* No 64-bit float support */
189 rDevice.fTime = DBL_MAX;
190 rDevice.bErrors = false;
191 SAL_INFO("opencl.device", "... no fp64 support");
193 else
195 /* 64-bit float support present */
197 OpenCLZone zone;
199 /* Create context and command queue */
200 cl_context clContext = clCreateContext(nullptr, 1, &rDevice.aDeviceID, nullptr, nullptr, &clStatus);
201 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateContext");
202 cl_command_queue clQueue = clCreateCommandQueue(clContext, rDevice.aDeviceID, 0, &clStatus);
203 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateCommandQueue");
205 /* Build program */
206 cl_program clProgram = clCreateProgramWithSource(clContext, 1, &source, sourceSize, &clStatus);
207 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateProgramWithSource");
208 clStatus = clBuildProgram(clProgram, 1, &rDevice.aDeviceID, buildOption, nullptr, nullptr);
209 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clBuildProgram");
210 if (CL_SUCCESS != clStatus)
212 /* Build program failed */
213 size_t length;
214 char* buildLog;
215 clStatus = clGetProgramBuildInfo(clProgram, rDevice.aDeviceID, CL_PROGRAM_BUILD_LOG, 0, nullptr, &length);
216 buildLog = static_cast<char*>(malloc(length));
217 clGetProgramBuildInfo(clProgram, rDevice.aDeviceID, CL_PROGRAM_BUILD_LOG, length, buildLog, &length);
218 SAL_INFO("opencl.device", "Build Errors:\n" << buildLog);
219 free(buildLog);
221 rDevice.fTime = DBL_MAX;
222 rDevice.bErrors = true;
224 else
226 /* Build program succeeded */
227 sal_uInt64 kernelTime = tools::Time::GetMonotonicTicks();
229 /* Run kernel */
230 cl_kernel clKernel = clCreateKernel(clProgram, "DynamicKernel", &clStatus);
231 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateKernel");
232 cl_mem clResult = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->outputSize, &testData->output[0], &clStatus);
233 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clResult");
234 cl_mem clInput0 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input0[0], &clStatus);
235 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput0");
236 cl_mem clInput1 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input1[0], &clStatus);
237 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput1");
238 cl_mem clInput2 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input2[0], &clStatus);
239 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput2");
240 cl_mem clInput3 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(cl_double) * testData->inputSize, &testData->input3[0], &clStatus);
241 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clCreateBuffer::clInput3");
242 clStatus = clSetKernelArg(clKernel, 0, sizeof(cl_mem), static_cast<void*>(&clResult));
243 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clResult");
244 clStatus = clSetKernelArg(clKernel, 1, sizeof(cl_mem), static_cast<void*>(&clInput0));
245 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput0");
246 clStatus = clSetKernelArg(clKernel, 2, sizeof(cl_mem), static_cast<void*>(&clInput1));
247 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput1");
248 clStatus = clSetKernelArg(clKernel, 3, sizeof(cl_mem), static_cast<void*>(&clInput2));
249 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput2");
250 clStatus = clSetKernelArg(clKernel, 4, sizeof(cl_mem), static_cast<void*>(&clInput3));
251 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clSetKernelArg::clInput3");
252 size_t globalWS[1] = { testData->outputSize };
253 size_t const localSize[1] = { 64 };
254 clStatus = clEnqueueNDRangeKernel(clQueue, clKernel, 1, nullptr, globalWS, localSize, 0, nullptr, nullptr);
255 DS_CHECK_STATUS(clStatus, "evaluateScoreForDevice::clEnqueueNDRangeKernel");
256 clFinish(clQueue);
257 clReleaseMemObject(clInput3);
258 clReleaseMemObject(clInput2);
259 clReleaseMemObject(clInput1);
260 clReleaseMemObject(clInput0);
261 clReleaseMemObject(clResult);
262 clReleaseKernel(clKernel);
264 rDevice.fTime = tools::Time::GetMonotonicTicks() - kernelTime;
265 rDevice.bErrors = false;
268 clReleaseProgram(clProgram);
269 clReleaseCommandQueue(clQueue);
270 clReleaseContext(clContext);
273 else
275 /* Evaluating an Native CPU device */
276 SAL_INFO("opencl.device", "Device: \"CPU\" (Native) evaluation...");
277 sal_uInt64 kernelTime = tools::Time::GetMonotonicTicks();
279 unsigned long j;
280 for (j = 0; j < testData->outputSize; j++)
282 double fAverage = 0.0f;
283 double fMin = DBL_MAX;
284 double fSoP = 0.0f;
285 for (unsigned long i = 0; i < testData->inputSize; i++)
287 fAverage += testData->input0[i];
288 fMin = ((fMin < testData->input1[i]) ? fMin : testData->input1[i]);
289 fSoP += testData->input2[i] * testData->input3[i];
291 fAverage /= testData->inputSize;
292 testData->output[j] = fAverage + (fMin * fSoP);
293 // Don't run for much longer than one second
294 if (j > 0 && j % 100 == 0)
296 rDevice.fTime = tools::Time::GetMonotonicTicks() - kernelTime;
297 if (rDevice.fTime >= 1)
298 break;
302 rDevice.fTime = tools::Time::GetMonotonicTicks() - kernelTime;
304 // Scale time to how long it would have taken to go all the way to outputSize
305 rDevice.fTime /= ((double) j / testData->outputSize);
307 // InterpretTail - the S/W fallback is nothing like as efficient
308 // as any good openCL implementation: no SIMD, tons of branching
309 // in the inner loops etc. Generously characterise it as only 10x
310 // slower than the above.
311 rDevice.fTime *= 10.0;
312 rDevice.bErrors = false;
314 return DS_SUCCESS;
317 ds_status profileDevices(std::unique_ptr<ds_profile> const & pProfile, std::unique_ptr<LibreOfficeDeviceEvaluationIO> const & pTestData)
319 ds_status status = DS_SUCCESS;
321 if (!pProfile)
322 return DS_INVALID_PROFILE;
324 for (ds_device& rDevice : pProfile->devices)
326 ds_status evaluatorStatus = evaluateScoreForDevice(rDevice, pTestData);
327 if (evaluatorStatus != DS_SUCCESS)
329 status = evaluatorStatus;
330 return status;
333 return status;
336 /* Pick best device */
337 ds_status pickBestDevice(std::unique_ptr<ds_profile> const & profile, int& rBestDeviceIndex)
339 double bestScore = DBL_MAX;
341 rBestDeviceIndex = -1;
343 for (std::vector<ds_device>::size_type d = 0; d < profile->devices.size();
344 d++)
346 ds_device& device = profile->devices[d];
348 // Check blacklist and whitelist for actual devices
349 if (device.eType == DeviceType::OpenCLDevice)
351 // There is a silly impedance mismatch here. Why do we
352 // need two different ways to describe an OpenCL platform
353 // and an OpenCL device driver?
355 OpenCLPlatformInfo aPlatform;
356 OpenCLDeviceInfo aDevice;
358 // We know that only the below fields are used by checkForKnownBadCompilers()
359 aPlatform.maVendor = OStringToOUString(device.sPlatformVendor, RTL_TEXTENCODING_UTF8);
360 aDevice.maName = OStringToOUString(device.sDeviceName, RTL_TEXTENCODING_UTF8);
361 aDevice.maDriver = OStringToOUString(device.sDriverVersion, RTL_TEXTENCODING_UTF8);
363 // If blacklisted or not whitelisted, ignore it
364 if (OpenCLConfig::get().checkImplementation(aPlatform, aDevice))
366 SAL_INFO("opencl.device", "Device[" << d << "] " << device.sDeviceName << " is blacklisted or not whitelisted");
367 device.fTime = DBL_MAX;
368 device.bErrors = false;
372 double fScore = DBL_MAX;
373 if (device.fTime >= 0.0
374 || rtl::math::approxEqual(device.fTime, DBL_MAX))
376 fScore = device.fTime;
378 else
380 SAL_INFO("opencl.device", "Unusual null score");
383 if (device.eType == DeviceType::OpenCLDevice)
385 SAL_INFO("opencl.device", "Device[" << d << "] " << device.sDeviceName << " (OpenCL) score is " << fScore);
387 else
389 SAL_INFO("opencl.device", "Device[" << d << "] CPU (Native) score is " << fScore);
391 if (fScore < bestScore)
393 bestScore = fScore;
394 rBestDeviceIndex = d;
397 if (rBestDeviceIndex != -1 && profile->devices[rBestDeviceIndex].eType == DeviceType::OpenCLDevice)
399 SAL_INFO("opencl.device", "Selected Device[" << rBestDeviceIndex << "]: " << profile->devices[rBestDeviceIndex].sDeviceName << "(OpenCL).");
401 else
403 SAL_INFO("opencl.device", "Selected Device[" << rBestDeviceIndex << "]: CPU (Native).");
405 return DS_SUCCESS;
408 /* Return device ID for matching device name */
409 int matchDevice(std::unique_ptr<ds_profile> const & profile, char* deviceName)
411 int deviceMatch = -1;
412 for (unsigned int d = 0; d < profile->devices.size() - 1; d++)
414 if (profile->devices[d].sDeviceName.indexOf(deviceName) != -1)
415 deviceMatch = d;
417 if (std::string("NATIVE_CPU").find(deviceName) != std::string::npos)
418 deviceMatch = profile->devices.size() - 1;
419 return deviceMatch;
422 class LogWriter
424 private:
425 SvFileStream maStream;
426 public:
427 explicit LogWriter(OUString const & aFileName)
428 : maStream(aFileName, StreamMode::WRITE)
431 void text(const OString& rText)
433 maStream.WriteOString(rText);
434 maStream.WriteChar('\n');
437 void log(const OString& rKey, const OString& rValue)
439 maStream.WriteOString(rKey);
440 maStream.WriteCharPtr(": ");
441 maStream.WriteOString(rValue);
442 maStream.WriteChar('\n');
445 void log(const OString& rKey, int rValue)
447 log(rKey, OString::number(rValue));
450 void log(const OString& rKey, bool rValue)
452 log(rKey, OString::boolean(rValue));
457 void writeDevicesLog(std::unique_ptr<ds_profile> const & rProfile, OUString const & sProfilePath, int nSelectedIndex)
459 OUString aCacheFile(sProfilePath + "opencl_devices.log");
460 LogWriter aWriter(aCacheFile);
462 int nIndex = 0;
464 for (ds_device& rDevice : rProfile->devices)
466 if (rDevice.eType == DeviceType::OpenCLDevice)
468 aWriter.log("Device Index", nIndex);
469 aWriter.log(" Selected", nIndex == nSelectedIndex);
470 aWriter.log(" Device Name", rDevice.sDeviceName);
471 aWriter.log(" Device Vendor", rDevice.sDeviceVendor);
472 aWriter.log(" Device Version", rDevice.sDeviceVersion);
473 aWriter.log(" Driver Version", rDevice.sDriverVersion);
474 aWriter.log(" Device Type", rDevice.sDeviceType);
475 aWriter.log(" Device Extensions", rDevice.sDeviceExtensions);
476 aWriter.log(" Device OpenCL C Version", rDevice.sDeviceOpenCLVersion);
478 aWriter.log(" Device Available", rDevice.bDeviceAvailable);
479 aWriter.log(" Device Compiler Available", rDevice.bDeviceCompilerAvailable);
480 aWriter.log(" Device Linker Available", rDevice.bDeviceLinkerAvailable);
482 aWriter.log(" Platform Name", rDevice.sPlatformName);
483 aWriter.log(" Platform Vendor", rDevice.sPlatformVendor);
484 aWriter.log(" Platform Version", rDevice.sPlatformVersion);
485 aWriter.log(" Platform Profile", rDevice.sPlatformProfile);
486 aWriter.log(" Platform Extensions", rDevice.sPlatformExtensions);
487 aWriter.text("");
489 nIndex++;
493 } // end anonymous namespace
495 ds_device const & getDeviceSelection(
496 OUString const & sProfilePath, bool bForceSelection)
498 /* Run only if device is not yet selected */
499 if (!bIsDeviceSelected || bForceSelection)
501 /* Setup */
502 std::unique_ptr<ds_profile> aProfile;
503 ds_status status;
504 status = initDSProfile(aProfile, "LibreOffice v1");
506 if (status != DS_SUCCESS)
508 // failed to initialize profile.
509 selectedDevice.eType = DeviceType::NativeCPU;
510 return selectedDevice;
513 /* Try reading scores from file */
514 OUString sFilePath = sProfilePath + "opencl_profile.xml";
516 if (!bForceSelection)
518 status = readProfile(sFilePath, aProfile);
520 else
522 status = DS_INVALID_PROFILE;
523 SAL_INFO("opencl.device", "Performing forced profiling.");
525 if (DS_SUCCESS != status)
527 if (!bForceSelection)
529 SAL_INFO("opencl.device", "Profile file not available (" << sFilePath << "); performing profiling.");
532 /* Populate input data for micro-benchmark */
533 std::unique_ptr<LibreOfficeDeviceEvaluationIO> testData(new LibreOfficeDeviceEvaluationIO);
534 testData->inputSize = INPUTSIZE;
535 testData->outputSize = OUTPUTSIZE;
536 testData->input0.resize(testData->inputSize);
537 testData->input1.resize(testData->inputSize);
538 testData->input2.resize(testData->inputSize);
539 testData->input3.resize(testData->inputSize);
540 testData->output.resize(testData->outputSize);
541 populateInput(testData);
543 /* Perform evaluations */
544 status = profileDevices(aProfile, testData);
546 if (DS_SUCCESS == status)
548 /* Write scores to file */
549 status = writeProfile(sFilePath, aProfile);
550 if (DS_SUCCESS == status)
552 SAL_INFO("opencl.device", "Scores written to file (" << sFilePath << ").");
554 else
556 SAL_INFO("opencl.device", "Error saving scores to file (" << sFilePath << "); scores not written to file.");
559 else
561 SAL_INFO("opencl.device", "Unable to evaluate performance; scores not written to file.");
564 else
566 SAL_INFO("opencl.device", "Profile read from file (" << sFilePath << ").");
569 /* Pick best device */
570 int bestDeviceIdx;
571 pickBestDevice(aProfile, bestDeviceIdx);
573 /* Override if necessary */
574 char* overrideDeviceStr = getenv("SC_OPENCL_DEVICE_OVERRIDE");
575 if (nullptr != overrideDeviceStr)
577 int overrideDeviceIdx = matchDevice(aProfile, overrideDeviceStr);
578 if (-1 != overrideDeviceIdx)
580 SAL_INFO("opencl.device", "Overriding Device Selection (SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ").");
581 bestDeviceIdx = overrideDeviceIdx;
582 if (aProfile->devices[bestDeviceIdx].eType == DeviceType::OpenCLDevice)
584 SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: " << aProfile->devices[bestDeviceIdx].sDeviceName << " (OpenCL).");
586 else
588 SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx << "]: CPU (Native).");
591 else
593 SAL_INFO("opencl.device", "Ignoring invalid SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr << ").");
597 /* Final device selection */
598 if (bestDeviceIdx >=0 && static_cast< std::vector<ds_device>::size_type> ( bestDeviceIdx ) < aProfile->devices.size() )
600 selectedDevice = aProfile->devices[bestDeviceIdx];
601 bIsDeviceSelected = true;
603 writeDevicesLog(aProfile, sProfilePath, bestDeviceIdx);
604 } else {
605 selectedDevice.eType = DeviceType::NativeCPU;
608 return selectedDevice;
611 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */