1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
13 #elif defined __MACH__
14 #include <mach/mach_time.h>
26 #include <boost/scoped_ptr.hpp>
28 #include <comphelper/random.hxx>
29 #include <opencl/openclconfig.hxx>
30 #include <opencl/openclwrapper.hxx>
31 #include <opencl/platforminfo.hxx>
32 #include <sal/log.hxx>
34 #include "opencl_device.hxx"
36 #define INPUTSIZE 15360
37 #define OUTPUTSIZE 15360
39 #define STRINGIFY(...) #__VA_ARGS__"\n"
41 #define DS_CHECK_STATUS(status, name) \
42 if (CL_SUCCESS != status) \
44 SAL_INFO("opencl.device", "Error code is " << status << " at " name); \
49 bool bIsDeviceSelected
= false;
50 ds_device selectedDevice
;
52 struct LibreOfficeDeviceScore
54 double fTime
; // small time means faster device
55 bool bNoCLErrors
; // were there any opencl errors
58 struct LibreOfficeDeviceEvaluationIO
60 std::vector
<double> input0
;
61 std::vector
<double> input1
;
62 std::vector
<double> input2
;
63 std::vector
<double> input3
;
64 std::vector
<double> output
;
65 unsigned long inputSize
;
66 unsigned long outputSize
;
78 const char* source
= STRINGIFY(
79 \n#if defined(KHR_DP_EXTENSION)
80 \n#pragma OPENCL EXTENSION cl_khr_fp64 : enable
81 \n#elif defined(AMD_DP_EXTENSION)
82 \n#pragma OPENCL EXTENSION cl_amd_fp64 : enable
85 int isNan(fp_t a
) { return a
!= a
; }
86 fp_t
fsum(fp_t a
, fp_t b
) { return a
+ b
; }
88 fp_t
fAverage(__global fp_t
* input
)
92 for (int i
= 0; i
< INPUTSIZE
; i
++)
96 sum
= fsum(input
[i
], sum
);
100 return sum
/ (fp_t
)count
;
102 fp_t
fMin(__global fp_t
* input
)
105 for (int i
= 0; i
< INPUTSIZE
; i
++)
107 if (!isNan(input
[i
]))
109 min
= fmin(input
[i
], min
);
114 fp_t
fSoP(__global fp_t
* input0
, __global fp_t
* input1
)
117 for (int i
= 0; i
< INPUTSIZE
; i
++)
119 sop
+= (isNan(input0
[i
]) ? 0 : input0
[i
]) * (isNan(input1
[i
]) ? 0 : input1
[i
]);
123 __kernel
void DynamicKernel(
124 __global fp_t
* result
, __global fp_t
* input0
, __global fp_t
* input1
, __global fp_t
* input2
, __global fp_t
* input3
)
126 int gid0
= get_global_id(0);
127 fp_t tmp0
= fAverage(input0
);
128 fp_t tmp1
= fMin(input1
) * fSoP(input2
, input3
);
129 result
[gid0
] = fsum(tmp0
, tmp1
);
133 size_t sourceSize
[] = { strlen(source
) };
135 /*************************************************************************/
136 /* INTERNAL FUNCTIONS */
137 /*************************************************************************/
138 /* Timer functions - start timer */
139 void timerStart(timer
* mytimer
)
142 QueryPerformanceCounter(&mytimer
->start
);
143 #elif defined __MACH__
144 mytimer
->start
= mach_absolute_time();
147 clock_gettime(CLOCK_MONOTONIC
, &s
);
148 mytimer
->start
= (long long)s
.tv_sec
* (long long)1.0E6
+ (long long)s
.tv_nsec
/ (long long)1.0E3
;
152 /* Timer functions - get current value */
153 double timerCurrent(timer
* mytimer
)
156 LARGE_INTEGER stop
, frequency
;
157 QueryPerformanceCounter(&stop
);
158 QueryPerformanceFrequency(&frequency
);
159 double time
= ((double)(stop
.QuadPart
- mytimer
->start
.QuadPart
) / frequency
.QuadPart
);
160 #elif defined __MACH__
161 static mach_timebase_info_data_t info
= { 0, 0 };
163 mach_timebase_info(&info
);
164 long long stop
= mach_absolute_time();
165 double time
= ((stop
- mytimer
->start
) * (double) info
.numer
/ info
.denom
) / 1.0E9
;
169 clock_gettime(CLOCK_MONOTONIC
, &s
);
170 stop
= (long long)s
.tv_sec
* (long long)1.0E6
+ (long long)s
.tv_nsec
/ (long long)1.0E3
;
171 double time
= ((double)(stop
- mytimer
->start
) / 1.0E6
);
176 /* Random number generator */
177 double random(double min
, double max
)
181 return comphelper::rng::uniform_real_distribution(min
, max
);
185 void populateInput(LibreOfficeDeviceEvaluationIO
* testData
)
187 double* input0
= &testData
->input0
[0];
188 double* input1
= &testData
->input1
[0];
189 double* input2
= &testData
->input2
[0];
190 double* input3
= &testData
->input3
[0];
191 for (unsigned long i
= 0; i
< testData
->inputSize
; i
++)
193 input0
[i
] = random(0, i
);
194 input1
[i
] = random(0, i
);
195 input2
[i
] = random(0, i
);
196 input3
[i
] = random(0, i
);
199 /* Encode score object as byte string */
200 ds_status
serializeScore(ds_device
* device
, void** serializedScore
, unsigned int* serializedScoreSize
)
202 *serializedScoreSize
= sizeof(LibreOfficeDeviceScore
);
203 *serializedScore
= (void*)new unsigned char[*serializedScoreSize
];
204 memcpy(*serializedScore
, device
->score
, *serializedScoreSize
);
208 /* Parses byte string and stores in score object */
209 ds_status
deserializeScore(ds_device
* device
, const unsigned char* serializedScore
, unsigned int serializedScoreSize
)
211 // check that serializedScoreSize == sizeof(LibreOfficeDeviceScore);
212 device
->score
= new LibreOfficeDeviceScore
;
213 memcpy(device
->score
, serializedScore
, serializedScoreSize
);
217 /* Releases memory held by score */
218 ds_status
releaseScore(void* score
)
222 delete static_cast<LibreOfficeDeviceScore
*>(score
);
227 /* Evaluate devices */
228 ds_status
evaluateScoreForDevice(ds_device
* device
, void* evalData
)
230 if (DS_DEVICE_OPENCL_DEVICE
== device
->type
)
232 /* Evaluating an OpenCL device */
233 SAL_INFO("opencl.device", "Device: \"" << device
->oclDeviceName
<< "\" (OpenCL) evaluation...");
235 /* Check for 64-bit float extensions */
236 size_t aDevExtInfoSize
= 0;
237 clStatus
= clGetDeviceInfo(device
->oclDeviceID
, CL_DEVICE_EXTENSIONS
, 0, NULL
, &aDevExtInfoSize
);
238 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clGetDeviceInfo");
240 char* aExtInfo
= new char[aDevExtInfoSize
];
241 clStatus
= clGetDeviceInfo(device
->oclDeviceID
, CL_DEVICE_EXTENSIONS
, sizeof(char) * aDevExtInfoSize
, aExtInfo
, NULL
);
242 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clGetDeviceInfo");
243 bool bKhrFp64Flag
= false;
244 bool bAmdFp64Flag
= false;
245 const char* buildOption
= NULL
;
246 std::string
tmpStr("-Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16 -DINPUTSIZE=");
247 std::ostringstream tmpOStrStr
;
248 tmpOStrStr
<< std::dec
<< INPUTSIZE
;
249 tmpStr
.append(tmpOStrStr
.str());
251 if ((std::string(aExtInfo
)).find("cl_khr_fp64") != std::string::npos
)
254 //buildOption = "-D KHR_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
255 tmpStr
.append(" -DKHR_DP_EXTENSION");
256 buildOption
= tmpStr
.c_str();
257 SAL_INFO("opencl.device", "... has cl_khr_fp64");
259 else if ((std::string(aExtInfo
)).find("cl_amd_fp64") != std::string::npos
)
262 //buildOption = "-D AMD_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
263 tmpStr
.append(" -DAMD_DP_EXTENSION");
264 buildOption
= tmpStr
.c_str();
265 SAL_INFO("opencl.device", "... has cl_amd_fp64");
269 if (!bKhrFp64Flag
&& !bAmdFp64Flag
)
271 /* No 64-bit float support */
272 device
->score
= (void*)new LibreOfficeDeviceScore
;
273 static_cast<LibreOfficeDeviceScore
*>(device
->score
)->fTime
= DBL_MAX
;
274 static_cast<LibreOfficeDeviceScore
*>(device
->score
)->bNoCLErrors
= true;
275 SAL_INFO("opencl.device", "... no fp64 support");
279 /* 64-bit float support present */
281 /* Create context and command queue */
282 cl_context clContext
= clCreateContext(NULL
, 1, &device
->oclDeviceID
, NULL
, NULL
, &clStatus
);
283 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateContext");
284 cl_command_queue clQueue
= clCreateCommandQueue(clContext
, device
->oclDeviceID
, 0, &clStatus
);
285 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateCommandQueue");
288 cl_program clProgram
= clCreateProgramWithSource(clContext
, 1, &source
, sourceSize
, &clStatus
);
289 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateProgramWithSource");
290 clStatus
= clBuildProgram(clProgram
, 1, &device
->oclDeviceID
, buildOption
, NULL
, NULL
);
291 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clBuildProgram");
292 if (CL_SUCCESS
!= clStatus
)
294 /* Build program failed */
297 clStatus
= clGetProgramBuildInfo(clProgram
, device
->oclDeviceID
, CL_PROGRAM_BUILD_LOG
, 0, NULL
, &length
);
298 buildLog
= static_cast<char*>(malloc(length
));
299 clGetProgramBuildInfo(clProgram
, device
->oclDeviceID
, CL_PROGRAM_BUILD_LOG
, length
, buildLog
, &length
);
300 SAL_INFO("opencl.device", "Build Errors:\n" << buildLog
);
303 device
->score
= (void*)new LibreOfficeDeviceScore
;
304 static_cast<LibreOfficeDeviceScore
*>(device
->score
)->fTime
= DBL_MAX
;
305 static_cast<LibreOfficeDeviceScore
*>(device
->score
)->bNoCLErrors
= false;
309 /* Build program succeeded */
311 timerStart(&kernelTime
);
314 LibreOfficeDeviceEvaluationIO
* testData
= static_cast<LibreOfficeDeviceEvaluationIO
*>(evalData
);
315 cl_kernel clKernel
= clCreateKernel(clProgram
, "DynamicKernel", &clStatus
);
316 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateKernel");
317 cl_mem clResult
= clCreateBuffer(clContext
, CL_MEM_WRITE_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->outputSize
, &testData
->output
[0], &clStatus
);
318 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clResult");
319 cl_mem clInput0
= clCreateBuffer(clContext
, CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->inputSize
, &testData
->input0
[0], &clStatus
);
320 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clInput0");
321 cl_mem clInput1
= clCreateBuffer(clContext
, CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->inputSize
, &testData
->input1
[0], &clStatus
);
322 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clInput1");
323 cl_mem clInput2
= clCreateBuffer(clContext
, CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->inputSize
, &testData
->input2
[0], &clStatus
);
324 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clInput2");
325 cl_mem clInput3
= clCreateBuffer(clContext
, CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->inputSize
, &testData
->input3
[0], &clStatus
);
326 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clInput3");
327 clStatus
= clSetKernelArg(clKernel
, 0, sizeof(cl_mem
), (void*)&clResult
);
328 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clResult");
329 clStatus
= clSetKernelArg(clKernel
, 1, sizeof(cl_mem
), (void*)&clInput0
);
330 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clInput0");
331 clStatus
= clSetKernelArg(clKernel
, 2, sizeof(cl_mem
), (void*)&clInput1
);
332 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clInput1");
333 clStatus
= clSetKernelArg(clKernel
, 3, sizeof(cl_mem
), (void*)&clInput2
);
334 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clInput2");
335 clStatus
= clSetKernelArg(clKernel
, 4, sizeof(cl_mem
), (void*)&clInput3
);
336 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clInput3");
337 size_t globalWS
[1] = { testData
->outputSize
};
338 size_t localSize
[1] = { 64 };
339 clStatus
= clEnqueueNDRangeKernel(clQueue
, clKernel
, 1, 0, globalWS
, localSize
, 0, NULL
, NULL
);
340 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clEnqueueNDRangeKernel");
342 clReleaseMemObject(clInput3
);
343 clReleaseMemObject(clInput2
);
344 clReleaseMemObject(clInput1
);
345 clReleaseMemObject(clInput0
);
346 clReleaseMemObject(clResult
);
347 clReleaseKernel(clKernel
);
349 device
->score
= (void*)new LibreOfficeDeviceScore
;
350 static_cast<LibreOfficeDeviceScore
*>(device
->score
)->fTime
= timerCurrent(&kernelTime
);
351 static_cast<LibreOfficeDeviceScore
*>(device
->score
)->bNoCLErrors
= true;
354 clReleaseProgram(clProgram
);
355 clReleaseCommandQueue(clQueue
);
356 clReleaseContext(clContext
);
361 /* Evaluating an Native CPU device */
362 SAL_INFO("opencl.device", "Device: \"CPU\" (Native) evaluation...");
364 timerStart(&kernelTime
);
366 LibreOfficeDeviceEvaluationIO
* testData
= static_cast<LibreOfficeDeviceEvaluationIO
*>(evalData
);
367 for (unsigned long j
= 0; j
< testData
->outputSize
; j
++)
369 double fAverage
= 0.0f
;
370 double fMin
= DBL_MAX
;
372 for (unsigned long i
= 0; i
< testData
->inputSize
; i
++)
374 fAverage
+= testData
->input0
[i
];
375 fMin
= ((fMin
< testData
->input1
[i
]) ? fMin
: testData
->input1
[i
]);
376 fSoP
+= testData
->input2
[i
] * testData
->input3
[i
];
378 fAverage
/= testData
->inputSize
;
379 testData
->output
[j
] = fAverage
+ (fMin
* fSoP
);
382 // InterpretTail - the S/W fallback is nothing like as efficient
383 // as any good openCL implementation: no SIMD, tons of branching
384 // in the inner loops etc. Generously characterise it as only 10x
385 // slower than the above.
386 float fInterpretTailFactor
= 10.0;
388 device
->score
= (void*)new LibreOfficeDeviceScore
;
389 static_cast<LibreOfficeDeviceScore
*>(device
->score
)->fTime
= timerCurrent(&kernelTime
);
390 static_cast<LibreOfficeDeviceScore
*>(device
->score
)->bNoCLErrors
= true;
392 static_cast<LibreOfficeDeviceScore
*>(device
->score
)->fTime
*= fInterpretTailFactor
;
397 /* Pick best device */
398 ds_status
pickBestDevice(ds_profile
* profile
, int* bestDeviceIdx
)
400 double bestScore
= DBL_MAX
;
403 for (unsigned int d
= 0; d
< profile
->numDevices
; d
++)
405 ds_device device
= profile
->devices
[d
];
406 LibreOfficeDeviceScore
*pScore
= static_cast<LibreOfficeDeviceScore
*>(device
.score
);
408 // Check blacklist and whitelist for actual devices
409 if (device
.type
== DS_DEVICE_OPENCL_DEVICE
)
411 // There is a silly impedance mismatch here. Why do we
412 // need two different ways to describe an OpenCL platform
413 // and an OpenCL device driver?
415 OpenCLPlatformInfo aPlatform
;
416 OpenCLDeviceInfo aDevice
;
418 // We know that only the below fields are used by checkForKnownBadCompilers()
419 aPlatform
.maVendor
= OUString(device
.oclPlatformVendor
, strlen(device
.oclPlatformVendor
), RTL_TEXTENCODING_UTF8
);
420 aDevice
.maName
= OUString(device
.oclDeviceName
, strlen(device
.oclDeviceName
), RTL_TEXTENCODING_UTF8
);
421 aDevice
.maDriver
= OUString(device
.oclDriverVersion
, strlen(device
.oclDriverVersion
), RTL_TEXTENCODING_UTF8
);
423 // If blacklisted or not whitelisted, ignore it
424 if (OpenCLConfig::get().checkImplementation(aPlatform
, aDevice
))
426 SAL_INFO("opencl.device", "Device[" << d
<< "] " << device
.oclDeviceName
<< " is blacklisted or not whitelisted");
427 pScore
->fTime
= DBL_MAX
;
428 pScore
->bNoCLErrors
= true;
432 double fScore
= DBL_MAX
;
435 fScore
= pScore
->fTime
;
439 SAL_INFO("opencl.device", "Unusual null score");
442 if (DS_DEVICE_OPENCL_DEVICE
== device
.type
)
444 SAL_INFO("opencl.device", "Device[" << d
<< "] " << device
.oclDeviceName
<< " (OpenCL) score is " << fScore
);
448 SAL_INFO("opencl.device", "Device[" << d
<< "] CPU (Native) score is " << fScore
);
450 if (fScore
< bestScore
)
456 if (DS_DEVICE_OPENCL_DEVICE
== profile
->devices
[*bestDeviceIdx
].type
)
458 SAL_INFO("opencl.device", "Selected Device[" << *bestDeviceIdx
<< "]: " << profile
->devices
[*bestDeviceIdx
].oclDeviceName
<< "(OpenCL).");
462 SAL_INFO("opencl.device", "Selected Device[" << *bestDeviceIdx
<< "]: CPU (Native).");
468 /* Return device ID for matching device name */
469 int matchDevice(ds_profile
* profile
, char* deviceName
)
471 int deviceMatch
= -1;
472 for (unsigned int d
= 0; d
< profile
->numDevices
- 1; d
++)
474 if ((std::string(profile
->devices
[d
].oclDeviceName
)).find(deviceName
) != std::string::npos
) deviceMatch
= d
;
476 if (std::string("NATIVE_CPU").find(deviceName
) != std::string::npos
) deviceMatch
= profile
->numDevices
- 1;
480 /*************************************************************************/
481 /* EXTERNAL FUNCTIONS */
482 /*************************************************************************/
483 ds_device
getDeviceSelection(const char* sProfilePath
, bool bForceSelection
)
485 /* Run only if device is not yet selected */
486 if (!bIsDeviceSelected
|| bForceSelection
)
490 ds_profile
* profile
= NULL
;
491 status
= initDSProfile(&profile
, "LibreOffice v0.1");
495 // failed to initialize profile.
496 selectedDevice
.type
= DS_DEVICE_NATIVE_CPU
;
497 return selectedDevice
;
500 /* Try reading scores from file */
501 std::string
tmpStr(sProfilePath
);
502 const char* fileName
= tmpStr
.append("sc_opencl_device_profile.dat").c_str();
503 if (!bForceSelection
)
505 status
= readProfileFromFile(profile
, deserializeScore
, fileName
);
509 status
= DS_INVALID_PROFILE
;
510 SAL_INFO("opencl.device", "Performing forced profiling.");
512 if (DS_SUCCESS
!= status
)
514 if (!bForceSelection
)
516 SAL_INFO("opencl.device", "Profile file not available (" << fileName
<< "); performing profiling.");
519 /* Populate input data for micro-benchmark */
520 boost::scoped_ptr
<LibreOfficeDeviceEvaluationIO
> testData(new LibreOfficeDeviceEvaluationIO
);
521 testData
->inputSize
= INPUTSIZE
;
522 testData
->outputSize
= OUTPUTSIZE
;
523 testData
->input0
.resize(testData
->inputSize
);
524 testData
->input1
.resize(testData
->inputSize
);
525 testData
->input2
.resize(testData
->inputSize
);
526 testData
->input3
.resize(testData
->inputSize
);
527 testData
->output
.resize(testData
->outputSize
);
528 populateInput(testData
.get());
530 /* Perform evaluations */
531 unsigned int numUpdates
;
532 status
= profileDevices(profile
, DS_EVALUATE_ALL
, evaluateScoreForDevice
, (void*)testData
.get(), &numUpdates
);
534 if (DS_SUCCESS
== status
)
536 /* Write scores to file */
537 status
= writeProfileToFile(profile
, serializeScore
, fileName
);
538 if (DS_SUCCESS
== status
)
540 SAL_INFO("opencl.device", "Scores written to file (" << fileName
<< ").");
544 SAL_INFO("opencl.device", "Error saving scores to file (" << fileName
<< "); scores not written to file.");
549 SAL_INFO("opencl.device", "Unable to evaluate performance; scores not written to file.");
554 SAL_INFO("opencl.device", "Profile read from file (" << fileName
<< ").");
557 /* Pick best device */
559 pickBestDevice(profile
, &bestDeviceIdx
);
561 /* Override if necessary */
562 char* overrideDeviceStr
= getenv("SC_OPENCL_DEVICE_OVERRIDE");
563 if (NULL
!= overrideDeviceStr
)
565 int overrideDeviceIdx
= matchDevice(profile
, overrideDeviceStr
);
566 if (-1 != overrideDeviceIdx
)
568 SAL_INFO("opencl.device", "Overriding Device Selection (SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr
<< ").");
569 bestDeviceIdx
= overrideDeviceIdx
;
570 if (DS_DEVICE_OPENCL_DEVICE
== profile
->devices
[bestDeviceIdx
].type
)
572 SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx
<< "]: " << profile
->devices
[bestDeviceIdx
].oclDeviceName
<< " (OpenCL).");
576 SAL_INFO("opencl.device", "Selected Device[" << bestDeviceIdx
<< "]: CPU (Native).");
581 SAL_INFO("opencl.device", "Ignoring invalid SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr
<< ").");
585 /* Final device selection */
586 selectedDevice
= profile
->devices
[bestDeviceIdx
];
587 bIsDeviceSelected
= true;
589 /* Release profile */
590 releaseDSProfile(profile
, releaseScore
);
592 return selectedDevice
;
597 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */