1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
12 #elif defined __MACH__
13 #include <mach/mach_time.h>
23 #include <boost/scoped_ptr.hpp>
25 #include "opencl_device.hxx"
28 #define INPUTSIZE 15360
29 #define OUTPUTSIZE 15360
31 #define STRINGIFY(...) #__VA_ARGS__"\n"
32 //#define LOG_PRINTF(x) (std::cout << x << std::endl)
35 #define DS_CHECK_STATUS(status, name) \
36 if (CL_SUCCESS != status) \
38 LOG_PRINTF("[OCL] Error code is " << status << " at " << name); \
41 namespace sc
{ namespace OpenCLDevice
{
43 bool bIsInited
= false;
44 bool bIsDeviceSelected
= false;
45 ds_device selectedDevice
;
47 struct LibreOfficeDeviceScore
49 double fTime
; // small time means faster device
50 bool bNoCLErrors
; // were there any opencl errors
53 struct LibreOfficeDeviceEvaluationIO
55 std::vector
<double> input0
;
56 std::vector
<double> input1
;
57 std::vector
<double> input2
;
58 std::vector
<double> input3
;
59 std::vector
<double> output
;
60 unsigned long inputSize
;
61 unsigned long outputSize
;
73 const char* source
= STRINGIFY(
74 \n#if defined(KHR_DP_EXTENSION)
75 \n#pragma OPENCL EXTENSION cl_khr_fp64 : enable
76 \n#elif defined(AMD_DP_EXTENSION)
77 \n#pragma OPENCL EXTENSION cl_amd_fp64 : enable
80 int isNan(fp_t a
) { return a
!= a
; }
81 fp_t
fsum(fp_t a
, fp_t b
) { return a
+ b
; }
83 fp_t
fAverage(__global fp_t
* input
)
87 for (int i
= 0; i
< INPUTSIZE
; i
++)
91 sum
= fsum(input
[i
], sum
);
95 return sum
/ (fp_t
)count
;
97 fp_t
fMin(__global fp_t
* input
)
100 for (int i
= 0; i
< INPUTSIZE
; i
++)
102 if (!isNan(input
[i
]))
104 min
= fmin(input
[i
], min
);
109 fp_t
fSoP(__global fp_t
* input0
, __global fp_t
* input1
)
112 for (int i
= 0; i
< INPUTSIZE
; i
++)
114 sop
+= (isNan(input0
[i
]) ? 0 : input0
[i
]) * (isNan(input1
[i
]) ? 0 : input1
[i
]);
118 __kernel
void DynamicKernel(
119 __global fp_t
* result
, __global fp_t
* input0
, __global fp_t
* input1
, __global fp_t
* input2
, __global fp_t
* input3
)
121 int gid0
= get_global_id(0);
122 fp_t tmp0
= fAverage(input0
);
123 fp_t tmp1
= fMin(input1
) * fSoP(input2
, input3
);
124 result
[gid0
] = fsum(tmp0
, tmp1
);
128 size_t sourceSize
[] = { strlen(source
) };
130 /*************************************************************************/
131 /* INTERNAL FUNCTIONS */
132 /*************************************************************************/
133 /* Timer functions - start timer */
134 void timerStart(timer
* mytimer
)
137 QueryPerformanceCounter(&mytimer
->start
);
138 #elif defined __MACH__
139 mytimer
->start
= mach_absolute_time();
142 clock_gettime(CLOCK_MONOTONIC
, &s
);
143 mytimer
->start
= (long long)s
.tv_sec
* (long long)1.0E6
+ (long long)s
.tv_nsec
/ (long long)1.0E3
;
147 /* Timer functions - get current value */
148 double timerCurrent(timer
* mytimer
)
151 LARGE_INTEGER stop
, frequency
;
152 QueryPerformanceCounter(&stop
);
153 QueryPerformanceFrequency(&frequency
);
154 double time
= ((double)(stop
.QuadPart
- mytimer
->start
.QuadPart
) / frequency
.QuadPart
);
155 #elif defined __MACH__
156 static mach_timebase_info_data_t info
= { 0, 0 };
158 mach_timebase_info(&info
);
159 long long stop
= mach_absolute_time();
160 double time
= ((stop
- mytimer
->start
) * (double) info
.numer
/ info
.denom
) / 1.0E9
;
164 clock_gettime(CLOCK_MONOTONIC
, &s
);
165 stop
= (long long)s
.tv_sec
* (long long)1.0E6
+ (long long)s
.tv_nsec
/ (long long)1.0E3
;
166 double time
= ((double)(stop
- mytimer
->start
) / 1.0E6
);
171 /* Random number generator */
172 double random(double min
, double max
)
174 return floor(((double)rand() / ((unsigned int)RAND_MAX
+ 1)) * (max
- min
+ 1) + min
);
178 void populateInput(LibreOfficeDeviceEvaluationIO
* testData
)
180 srand((unsigned int)time(NULL
));
181 double* input0
= &testData
->input0
[0];
182 double* input1
= &testData
->input1
[0];
183 double* input2
= &testData
->input2
[0];
184 double* input3
= &testData
->input3
[0];
185 for (unsigned long i
= 0; i
< testData
->inputSize
; i
++)
187 input0
[i
] = random(0, i
);
188 input1
[i
] = random(0, i
);
189 input2
[i
] = random(0, i
);
190 input3
[i
] = random(0, i
);
193 /* Encode score object as byte string */
194 ds_status
serializeScore(ds_device
* device
, void** serializedScore
, unsigned int* serializedScoreSize
)
196 *serializedScoreSize
= sizeof(LibreOfficeDeviceScore
);
197 *serializedScore
= (void*)new unsigned char[*serializedScoreSize
];
198 memcpy(*serializedScore
, device
->score
, *serializedScoreSize
);
202 /* Parses byte string and stores in score object */
203 ds_status
deserializeScore(ds_device
* device
, const unsigned char* serializedScore
, unsigned int serializedScoreSize
)
205 // check that serializedScoreSize == sizeof(LibreOfficeDeviceScore);
206 device
->score
= new LibreOfficeDeviceScore
;
207 memcpy(device
->score
, serializedScore
, serializedScoreSize
);
211 /* Releases memory held by score */
212 ds_status
releaseScore(void* score
)
216 delete (LibreOfficeDeviceScore
*)score
;
221 /* Evaluate devices */
222 ds_status
evaluateScoreForDevice(ds_device
* device
, void* evalData
)
224 if (DS_DEVICE_OPENCL_DEVICE
== device
->type
)
226 /* Evaluating an OpenCL device */
227 LOG_PRINTF("[DS] Device: \"" << device
->oclDeviceName
<< "\" (OpenCL) evaluation...");
229 cl_context clContext
;
230 cl_command_queue clQueue
;
231 cl_program clProgram
;
234 /* Check for 64-bit float extensions */
235 size_t aDevExtInfoSize
= 0;
236 clStatus
= clGetDeviceInfo(device
->oclDeviceID
, CL_DEVICE_EXTENSIONS
, 0, NULL
, &aDevExtInfoSize
);
237 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clGetDeviceInfo");
239 char* aExtInfo
= new char[aDevExtInfoSize
];
240 clStatus
= clGetDeviceInfo(device
->oclDeviceID
, CL_DEVICE_EXTENSIONS
, sizeof(char) * aDevExtInfoSize
, aExtInfo
, NULL
);
241 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clGetDeviceInfo");
243 bool bKhrFp64Flag
= false;
244 bool bAmdFp64Flag
= false;
245 const char* buildOption
= NULL
;
246 std::string
tmpStr("-Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16 -DINPUTSIZE=");
247 std::ostringstream tmpOStrStr
;
248 tmpOStrStr
<< std::dec
<< INPUTSIZE
;
249 tmpStr
.append(tmpOStrStr
.str());
251 if ((std::string(aExtInfo
)).find("cl_khr_fp64") != std::string::npos
)
254 //buildOption = "-D KHR_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
255 tmpStr
.append(" -DKHR_DP_EXTENSION");
256 buildOption
= tmpStr
.c_str();
258 else if ((std::string(aExtInfo
)).find("cl_amd_fp64") != std::string::npos
)
261 //buildOption = "-D AMD_DP_EXTENSION -Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16";
262 tmpStr
.append(" -DAMD_DP_EXTENSION");
263 buildOption
= tmpStr
.c_str();
267 if (!bKhrFp64Flag
&& !bAmdFp64Flag
)
269 /* No 64-bit float support */
270 device
->score
= (void*)new LibreOfficeDeviceScore
;
271 ((LibreOfficeDeviceScore
*)device
->score
)->fTime
= DBL_MAX
;
272 ((LibreOfficeDeviceScore
*)device
->score
)->bNoCLErrors
= true;
276 /* 64-bit float support present */
278 /* Create context and command queue */
279 clContext
= clCreateContext(NULL
, 1, &device
->oclDeviceID
, NULL
, NULL
, &clStatus
);
280 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateContext");
281 clQueue
= clCreateCommandQueue(clContext
, device
->oclDeviceID
, 0, &clStatus
);
282 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateCommandQueue");
285 clProgram
= clCreateProgramWithSource(clContext
, 1, &source
, sourceSize
, &clStatus
);
286 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateProgramWithSource");
287 clStatus
= clBuildProgram(clProgram
, 1, &device
->oclDeviceID
, buildOption
, NULL
, NULL
);
288 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clBuildProgram");
289 if (CL_SUCCESS
!= clStatus
)
291 /* Build program failed */
294 clStatus
= clGetProgramBuildInfo(clProgram
, device
->oclDeviceID
, CL_PROGRAM_BUILD_LOG
, 0, NULL
, &length
);
295 buildLog
= (char*)malloc(length
);
296 clStatus
= clGetProgramBuildInfo(clProgram
, device
->oclDeviceID
, CL_PROGRAM_BUILD_LOG
, length
, buildLog
, &length
);
297 LOG_PRINTF("[OCL] Build Errors" << std::endl
<< buildLog
);
300 device
->score
= (void*)new LibreOfficeDeviceScore
;
301 ((LibreOfficeDeviceScore
*)device
->score
)->fTime
= DBL_MAX
;
302 ((LibreOfficeDeviceScore
*)device
->score
)->bNoCLErrors
= false;
306 /* Build program succeeded */
308 timerStart(&kernelTime
);
311 LibreOfficeDeviceEvaluationIO
* testData
= (LibreOfficeDeviceEvaluationIO
*)evalData
;
312 clKernel
= clCreateKernel(clProgram
, "DynamicKernel", &clStatus
);
313 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateKernel");
314 cl_mem clResult
= clCreateBuffer(clContext
, CL_MEM_WRITE_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->outputSize
, &testData
->output
[0], &clStatus
);
315 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clResult");
316 cl_mem clInput0
= clCreateBuffer(clContext
, CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->inputSize
, &testData
->input0
[0], &clStatus
);
317 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clInput0");
318 cl_mem clInput1
= clCreateBuffer(clContext
, CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->inputSize
, &testData
->input1
[0], &clStatus
);
319 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clInput1");
320 cl_mem clInput2
= clCreateBuffer(clContext
, CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->inputSize
, &testData
->input2
[0], &clStatus
);
321 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clInput2");
322 cl_mem clInput3
= clCreateBuffer(clContext
, CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
, sizeof(cl_double
) * testData
->inputSize
, &testData
->input3
[0], &clStatus
);
323 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clCreateBuffer::clInput3");
324 clStatus
= clSetKernelArg(clKernel
, 0, sizeof(cl_mem
), (void*)&clResult
);
325 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clResult");
326 clStatus
= clSetKernelArg(clKernel
, 1, sizeof(cl_mem
), (void*)&clInput0
);
327 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clInput0");
328 clStatus
= clSetKernelArg(clKernel
, 2, sizeof(cl_mem
), (void*)&clInput1
);
329 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clInput1");
330 clStatus
= clSetKernelArg(clKernel
, 3, sizeof(cl_mem
), (void*)&clInput2
);
331 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clInput2");
332 clStatus
= clSetKernelArg(clKernel
, 4, sizeof(cl_mem
), (void*)&clInput3
);
333 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clSetKernelArg::clInput3");
334 size_t globalWS
[1] = { testData
->outputSize
};
335 size_t localSize
[1] = { 64 };
336 clStatus
= clEnqueueNDRangeKernel(clQueue
, clKernel
, 1, 0, globalWS
, localSize
, 0, NULL
, NULL
);
337 DS_CHECK_STATUS(clStatus
, "evaluateScoreForDevice::clEnqueueNDRangeKernel");
339 clReleaseMemObject(clInput3
);
340 clReleaseMemObject(clInput2
);
341 clReleaseMemObject(clInput1
);
342 clReleaseMemObject(clInput0
);
343 clReleaseMemObject(clResult
);
344 clReleaseKernel(clKernel
);
346 device
->score
= (void*)new LibreOfficeDeviceScore
;
347 ((LibreOfficeDeviceScore
*)device
->score
)->fTime
= timerCurrent(&kernelTime
);
348 ((LibreOfficeDeviceScore
*)device
->score
)->bNoCLErrors
= true;
351 clReleaseProgram(clProgram
);
352 clReleaseCommandQueue(clQueue
);
353 clReleaseContext(clContext
);
358 /* Evaluating an Native CPU device */
359 LOG_PRINTF("[DS] Device: \"CPU\" (Native) evaluation...");
361 timerStart(&kernelTime
);
363 LibreOfficeDeviceEvaluationIO
* testData
= (LibreOfficeDeviceEvaluationIO
*)evalData
;
364 for (unsigned long j
= 0; j
< testData
->outputSize
; j
++)
366 double fAverage
= 0.0f
;
367 double fMin
= DBL_MAX
;
369 for (unsigned long i
= 0; i
< testData
->inputSize
; i
++)
371 fAverage
+= testData
->input0
[i
];
372 fMin
= ((fMin
< testData
->input1
[i
]) ? fMin
: testData
->input1
[i
]);
373 fSoP
+= testData
->input2
[i
] * testData
->input3
[i
];
375 fAverage
/= testData
->inputSize
;
376 testData
->output
[j
] = fAverage
+ (fMin
* fSoP
);
379 // InterpretTail - the S/W fallback is nothing like as efficient
380 // as any good openCL implementation: no SIMD, tons of branching
381 // in the inner loops etc. Generously characterise it as only 10x
382 // slower than the above.
383 float fInterpretTailFactor
= 10.0;
385 device
->score
= (void*)new LibreOfficeDeviceScore
;
386 ((LibreOfficeDeviceScore
*)device
->score
)->fTime
= timerCurrent(&kernelTime
);
387 ((LibreOfficeDeviceScore
*)device
->score
)->bNoCLErrors
= true;
389 ((LibreOfficeDeviceScore
*)device
->score
)->fTime
*= fInterpretTailFactor
;
394 /* Pick best device */
395 ds_status
pickBestDevice(ds_profile
* profile
, int* bestDeviceIdx
)
397 double bestScore
= DBL_MAX
;
400 for (unsigned int d
= 0; d
< profile
->numDevices
; d
++)
402 ds_device device
= profile
->devices
[d
];
403 LibreOfficeDeviceScore
*pScore
= (LibreOfficeDeviceScore
*)device
.score
;
405 double fScore
= DBL_MAX
;
407 fScore
= pScore
->fTime
;
410 LOG_PRINTF("Unusual null score");
413 if (DS_DEVICE_OPENCL_DEVICE
== device
.type
)
415 LOG_PRINTF("[DS] Device[" << d
<< "] " << device
.oclDeviceName
<< " (OpenCL) score is " << fScore
);
419 LOG_PRINTF("[DS] Device[" << d
<< "] CPU (Native) score is " << fScore
);
421 if (fScore
< bestScore
)
427 if (DS_DEVICE_OPENCL_DEVICE
== profile
->devices
[*bestDeviceIdx
].type
)
429 LOG_PRINTF("[DS] Selected Device[" << *bestDeviceIdx
<< "]: " << profile
->devices
[*bestDeviceIdx
].oclDeviceName
<< "(OpenCL).");
433 LOG_PRINTF("[DS] Selected Device[" << *bestDeviceIdx
<< "]: CPU (Native).");
439 /* Return device ID for matching device name */
440 int matchDevice(ds_profile
* profile
, char* deviceName
)
442 int deviceMatch
= -1;
443 for (unsigned int d
= 0; d
< profile
->numDevices
- 1; d
++)
445 if ((std::string(profile
->devices
[d
].oclDeviceName
)).find(deviceName
) != std::string::npos
) deviceMatch
= d
;
447 if (std::string("NATIVE_CPU").find(deviceName
) != std::string::npos
) deviceMatch
= profile
->numDevices
- 1;
451 /*************************************************************************/
452 /* EXTERNAL FUNCTIONS */
453 /*************************************************************************/
454 ds_device
getDeviceSelection(const char* sProfilePath
, bool bForceSelection
)
456 /* Run only if device is not yet selected */
457 if (!bIsDeviceSelected
|| bForceSelection
)
461 ds_profile
* profile
= NULL
;
462 status
= initDSProfile(&profile
, "LibreOffice v0.1");
466 // failed to initialize profile.
467 selectedDevice
.type
= DS_DEVICE_NATIVE_CPU
;
468 return selectedDevice
;
471 /* Try reading scores from file */
472 std::string
tmpStr(sProfilePath
);
473 const char* fileName
= tmpStr
.append("sc_opencl_device_profile.dat").c_str();
474 if (!bForceSelection
)
476 status
= readProfileFromFile(profile
, deserializeScore
, fileName
);
480 status
= DS_INVALID_PROFILE
;
481 LOG_PRINTF("[DS] Performing forced profiling.");
483 if (DS_SUCCESS
!= status
)
485 if (!bForceSelection
)
487 LOG_PRINTF("[DS] Profile file not available (" << fileName
<< "); performing profiling.");
490 /* Populate input data for micro-benchmark */
491 boost::scoped_ptr
<LibreOfficeDeviceEvaluationIO
> testData(new LibreOfficeDeviceEvaluationIO
);
492 testData
->inputSize
= INPUTSIZE
;
493 testData
->outputSize
= OUTPUTSIZE
;
494 testData
->input0
.resize(testData
->inputSize
);
495 testData
->input1
.resize(testData
->inputSize
);
496 testData
->input2
.resize(testData
->inputSize
);
497 testData
->input3
.resize(testData
->inputSize
);
498 testData
->output
.resize(testData
->outputSize
);
499 populateInput(testData
.get());
501 /* Perform evaluations */
502 unsigned int numUpdates
;
503 status
= profileDevices(profile
, DS_EVALUATE_ALL
, evaluateScoreForDevice
, (void*)testData
.get(), &numUpdates
);
505 if (DS_SUCCESS
== status
)
507 /* Write scores to file */
508 status
= writeProfileToFile(profile
, serializeScore
, fileName
);
509 if (DS_SUCCESS
== status
)
511 LOG_PRINTF("[DS] Scores written to file (" << fileName
<< ").");
515 LOG_PRINTF("[DS] Error saving scores to file (" << fileName
<< "); scores not written to file.");
520 LOG_PRINTF("[DS] Unable to evaluate performance; scores not written to file.");
525 LOG_PRINTF("[DS] Profile read from file (" << fileName
<< ").");
528 /* Pick best device */
530 pickBestDevice(profile
, &bestDeviceIdx
);
532 /* Overide if necessary */
533 char* overrideDeviceStr
= getenv("SC_OPENCL_DEVICE_OVERRIDE");
534 if (NULL
!= overrideDeviceStr
)
536 int overrideDeviceIdx
= matchDevice(profile
, overrideDeviceStr
);
537 if (-1 != overrideDeviceIdx
)
539 LOG_PRINTF("[DS] Overriding Device Selection (SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr
<< ").");
540 bestDeviceIdx
= overrideDeviceIdx
;
541 if (DS_DEVICE_OPENCL_DEVICE
== profile
->devices
[bestDeviceIdx
].type
)
543 LOG_PRINTF("[DS] Selected Device[" << bestDeviceIdx
<< "]: " << profile
->devices
[bestDeviceIdx
].oclDeviceName
<< " (OpenCL).");
547 LOG_PRINTF("[DS] Selected Device[" << bestDeviceIdx
<< "]: CPU (Native).");
552 LOG_PRINTF("[DS] Ignoring invalid SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr
<< ").");
556 /* Final device selection */
557 selectedDevice
= profile
->devices
[bestDeviceIdx
];
558 bIsDeviceSelected
= true;
560 /* Release profile */
561 status
= releaseDSProfile(profile
, releaseScore
);
563 return selectedDevice
;
566 bool selectedDeviceIsOpenCL(ds_device device
)
568 return (DS_DEVICE_OPENCL_DEVICE
== device
.type
);
571 bool selectedDeviceIsNativeCPU(ds_device device
)
573 return (DS_DEVICE_NATIVE_CPU
== device
.type
);
578 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */