fix baseline build (old cairo) - 'cairo_rectangle_int_t' does not name a type
[LibreOffice.git] / sc / source / core / opencl / formulagroupcl.cxx
blob2667f5d67e96d7cd4cbce3183ed22249621c4014
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include "formulagroup.hxx"
11 #include "formulagroupcl.hxx"
12 #include "clkernelthread.hxx"
13 #include "grouptokenconverter.hxx"
14 #include "document.hxx"
15 #include "formulacell.hxx"
16 #include "tokenarray.hxx"
17 #include "compiler.hxx"
18 #include "interpre.hxx"
19 #include <formula/random.hxx>
20 #include <formula/vectortoken.hxx>
21 #include "scmatrix.hxx"
23 #include <opencl/openclwrapper.hxx>
25 #include "op_financial.hxx"
26 #include "op_database.hxx"
27 #include "op_math.hxx"
28 #include "op_logical.hxx"
29 #include "op_statistical.hxx"
30 #include "op_array.hxx"
31 #include "op_spreadsheet.hxx"
32 #include "op_addin.hxx"
34 /// CONFIGURATIONS
35 #define REDUCE_THRESHOLD 201 // set to 4 for correctness testing. priority 1
36 #define UNROLLING_FACTOR 16 // set to 4 for correctness testing (if no reduce)
38 static const char* publicFunc =
39 "\n"
40 "#define errIllegalFPOperation 503 // #NUM!\n"
41 "#define errNoValue 519 // #VALUE!\n"
42 "#define errDivisionByZero 532 // #DIV/0!\n"
43 "#define NOTAVAILABLE 0x7fff // #N/A\n"
44 "\n"
45 "double CreateDoubleError(ulong nErr)\n"
46 "{\n"
47 " return nan(nErr);\n"
48 "}\n"
49 "\n"
50 "uint GetDoubleErrorValue(double fVal)\n"
51 "{\n"
52 " if (isfinite(fVal))\n"
53 " return 0;\n"
54 " if (isinf(fVal))\n"
55 " return errIllegalFPOperation; // normal INF\n"
56 " if (as_ulong(fVal) & 0XFFFF0000u)\n"
57 " return errNoValue; // just a normal NAN\n"
58 " return (as_ulong(fVal) & 0XFFFF); // any other error\n"
59 "}\n"
60 "\n"
61 "int isNan(double a) { return isnan(a); }\n"
62 "double fsum_count(double a, double b, __private int *p) {\n"
63 " bool t = isNan(a);\n"
64 " (*p) += t?0:1;\n"
65 " return t?b:a+b;\n"
66 "}\n"
67 "double fmin_count(double a, double b, __private int *p) {\n"
68 " double result = fmin(a, b);\n"
69 " bool t = isnan(result);\n"
70 " (*p) += t?0:1;\n"
71 " return result;\n"
72 "}\n"
73 "double fmax_count(double a, double b, __private int *p) {\n"
74 " double result = fmax(a, b);\n"
75 " bool t = isnan(result);\n"
76 " (*p) += t?0:1;\n"
77 " return result;\n"
78 "}\n"
79 "double fsum(double a, double b) { return isNan(a)?b:a+b; }\n"
80 "double legalize(double a, double b) { return isNan(a)?b:a;}\n"
81 "double fsub(double a, double b) { return a-b; }\n"
82 "double fdiv(double a, double b) { return a/b; }\n"
83 "double strequal(unsigned a, unsigned b) { return (a==b)?1.0:0; }\n"
86 #ifdef WIN32
87 #ifndef NAN
88 namespace {
90 const unsigned long __nan[2] = {0xffffffff, 0x7fffffff};
93 #define NAN (*(const double*) __nan)
94 #endif
95 #endif
97 #include <list>
98 #include <map>
99 #include <iostream>
100 #include <sstream>
101 #include <algorithm>
103 #include <rtl/digest.h>
105 #include <boost/scoped_ptr.hpp>
106 #include <boost/scoped_array.hpp>
108 using namespace formula;
110 namespace sc { namespace opencl {
112 namespace {
114 std::string StackVarEnumToString(StackVar const e)
116 switch (e)
118 #define CASE(x) case sv##x: return #x
119 CASE(Byte);
120 CASE(Double);
121 CASE(String);
122 CASE(SingleRef);
123 CASE(DoubleRef);
124 CASE(Matrix);
125 CASE(Index);
126 CASE(Jump);
127 CASE(External);
128 CASE(FAP);
129 CASE(JumpMatrix);
130 CASE(RefList);
131 CASE(EmptyCell);
132 CASE(MatrixCell);
133 CASE(HybridCell);
134 CASE(HybridValueCell);
135 CASE(ExternalSingleRef);
136 CASE(ExternalDoubleRef);
137 CASE(ExternalName);
138 CASE(SingleVectorRef);
139 CASE(DoubleVectorRef);
140 CASE(Subroutine);
141 CASE(Error);
142 CASE(Missing);
143 CASE(Sep);
144 CASE(Unknown);
145 #undef CASE
147 return std::to_string(static_cast<int>(e));
150 #ifdef SAL_DETAIL_ENABLE_LOG_INFO
151 std::string linenumberify(const std::string& s)
153 std::stringstream ss;
154 int linenumber = 1;
155 size_t start = 0;
156 size_t newline;
157 while ((newline = s.find('\n', start)) != std::string::npos)
159 ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, newline-start+1);
160 start = newline + 1;
162 if (start < s.size())
163 ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, std::string::npos);
164 return ss.str();
166 #endif
168 bool AllStringsAreNull(const rtl_uString* const* pStringArray, size_t nLength)
170 if (pStringArray == nullptr)
171 return true;
173 for (size_t i = 0; i < nLength; i++)
174 if (pStringArray[i] != nullptr)
175 return false;
177 return true;
181 } // anonymous namespace
183 /// Map the buffer used by an argument and do necessary argument setting
184 size_t VectorRef::Marshal( cl_kernel k, int argno, int, cl_program )
186 FormulaToken* ref = mFormulaTree->GetFormulaToken();
187 double* pHostBuffer = NULL;
188 size_t szHostBuffer = 0;
189 if (ref->GetType() == formula::svSingleVectorRef)
191 const formula::SingleVectorRefToken* pSVR =
192 static_cast<const formula::SingleVectorRefToken*>(ref);
194 SAL_INFO("sc.opencl", "SingleVectorRef len=" << pSVR->GetArrayLength() << " mpNumericArray=" << pSVR->GetArray().mpNumericArray << " (mpStringArray=" << pSVR->GetArray().mpStringArray << ")");
196 pHostBuffer = const_cast<double*>(pSVR->GetArray().mpNumericArray);
197 szHostBuffer = pSVR->GetArrayLength() * sizeof(double);
199 else if (ref->GetType() == formula::svDoubleVectorRef)
201 const formula::DoubleVectorRefToken* pDVR =
202 static_cast<const formula::DoubleVectorRefToken*>(ref);
204 SAL_INFO("sc.opencl", "DoubleVectorRef index=" << mnIndex << " len=" << pDVR->GetArrayLength() << " mpNumericArray=" << pDVR->GetArrays()[mnIndex].mpNumericArray << " (mpStringArray=" << pDVR->GetArrays()[mnIndex].mpStringArray << ")");
206 pHostBuffer = const_cast<double*>(
207 pDVR->GetArrays()[mnIndex].mpNumericArray);
208 szHostBuffer = pDVR->GetArrayLength() * sizeof(double);
210 else
212 throw Unhandled();
214 // Obtain cl context
215 ::opencl::KernelEnv kEnv;
216 ::opencl::setKernelEnv(&kEnv);
217 cl_int err;
218 if (pHostBuffer)
220 mpClmem = clCreateBuffer(kEnv.mpkContext,
221 (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
222 szHostBuffer,
223 pHostBuffer, &err);
224 if (CL_SUCCESS != err)
225 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
226 SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer << " using host buffer " << pHostBuffer);
228 else
230 if (szHostBuffer == 0)
231 szHostBuffer = sizeof(double); // a dummy small value
232 // Marshal as a buffer of NANs
233 mpClmem = clCreateBuffer(kEnv.mpkContext,
234 (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
235 szHostBuffer, NULL, &err);
236 if (CL_SUCCESS != err)
237 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
238 SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
240 double* pNanBuffer = static_cast<double*>(clEnqueueMapBuffer(
241 kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
242 szHostBuffer, 0, NULL, NULL, &err));
243 if (CL_SUCCESS != err)
244 throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
246 for (size_t i = 0; i < szHostBuffer / sizeof(double); i++)
247 pNanBuffer[i] = NAN;
248 err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
249 pNanBuffer, 0, NULL, NULL);
250 // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
251 if (CL_SUCCESS != err)
252 SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << ::opencl::errorString(err));
255 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem);
256 err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem);
257 if (CL_SUCCESS != err)
258 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
259 return 1;
262 /// Arguments that are actually compile-time constant string
263 /// Currently, only the hash is passed.
264 /// TBD(IJSUNG): pass also length and the actual string if there is a
265 /// hash function collision
266 class ConstStringArgument : public DynamicKernelArgument
268 public:
269 ConstStringArgument( const ScCalcConfig& config, const std::string& s,
270 FormulaTreeNodeRef ft ) :
271 DynamicKernelArgument(config, s, ft) { }
272 /// Generate declaration
273 virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
275 ss << "unsigned " << mSymName;
277 virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
279 ss << GenSlidingWindowDeclRef(false);
281 virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
283 GenDecl(ss);
285 virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
287 std::stringstream ss;
288 if (GetFormulaToken()->GetType() != formula::svString)
289 throw Unhandled();
290 FormulaToken* Tok = GetFormulaToken();
291 ss << Tok->GetString().getString().toAsciiUpperCase().hashCode() << "U";
292 return ss.str();
294 virtual size_t GetWindowSize() const SAL_OVERRIDE
296 return 1;
298 /// Pass the 32-bit hash of the string to the kernel
299 virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE
301 FormulaToken* ref = mFormulaTree->GetFormulaToken();
302 cl_uint hashCode = 0;
303 if (ref->GetType() == formula::svString)
305 const rtl::OUString s = ref->GetString().getString().toAsciiUpperCase();
306 hashCode = s.hashCode();
308 else
310 throw Unhandled();
313 // Pass the scalar result back to the rest of the formula kernel
314 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_uint: " << hashCode);
315 cl_int err = clSetKernelArg(k, argno, sizeof(cl_uint), (void*)&hashCode);
316 if (CL_SUCCESS != err)
317 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
318 return 1;
322 /// Arguments that are actually compile-time constants
323 class DynamicKernelConstantArgument : public DynamicKernelArgument
325 public:
326 DynamicKernelConstantArgument( const ScCalcConfig& config, const std::string& s,
327 FormulaTreeNodeRef ft ) :
328 DynamicKernelArgument(config, s, ft) { }
329 /// Generate declaration
330 virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
332 ss << "double " << mSymName;
334 virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
336 ss << mSymName;
338 virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
340 GenDecl(ss);
342 virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
344 if (GetFormulaToken()->GetType() != formula::svDouble)
345 throw Unhandled();
346 return mSymName;
348 virtual size_t GetWindowSize() const SAL_OVERRIDE
350 return 1;
352 double GetDouble() const
354 FormulaToken* Tok = GetFormulaToken();
355 if (Tok->GetType() != formula::svDouble)
356 throw Unhandled();
357 return Tok->GetDouble();
359 /// Create buffer and pass the buffer to a given kernel
360 virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE
362 double tmp = GetDouble();
363 // Pass the scalar result back to the rest of the formula kernel
364 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp);
365 cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp);
366 if (CL_SUCCESS != err)
367 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
368 return 1;
372 class DynamicKernelPiArgument : public DynamicKernelArgument
374 public:
375 DynamicKernelPiArgument( const ScCalcConfig& config, const std::string& s,
376 FormulaTreeNodeRef ft ) :
377 DynamicKernelArgument(config, s, ft) { }
378 /// Generate declaration
379 virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
381 ss << "double " << mSymName;
383 virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
385 ss << "3.14159265358979";
387 virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
389 GenDecl(ss);
391 virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
393 return mSymName;
395 virtual size_t GetWindowSize() const SAL_OVERRIDE
397 return 1;
399 /// Create buffer and pass the buffer to a given kernel
400 virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE
402 double tmp = 0.0;
403 // Pass the scalar result back to the rest of the formula kernel
404 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp);
405 cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp);
406 if (CL_SUCCESS != err)
407 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
408 return 1;
412 class DynamicKernelRandomArgument : public DynamicKernelArgument
414 public:
415 DynamicKernelRandomArgument( const ScCalcConfig& config, const std::string& s,
416 FormulaTreeNodeRef ft ) :
417 DynamicKernelArgument(config, s, ft) { }
418 /// Generate declaration
419 virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
421 ss << "double " << mSymName;
423 virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
425 ss << mSymName;
427 virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
429 ss << "int " << mSymName;
431 virtual std::string GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
433 return mSymName + "_Random(" + mSymName + ")";
435 virtual void GenSlidingWindowFunction( std::stringstream& ss ) SAL_OVERRIDE
437 // This string is from the pi_opencl_kernel.i file as
438 // generated when building the Random123 examples. Unused
439 // stuff has been removed, and the actual kernel is not the
440 // same as in the totally different use case of that example,
441 // of course. Only the code that calculates the counter-based
442 // random number and what it needs is left.
443 ss << "\
445 #ifndef DEFINED_RANDOM123_STUFF\n\
446 #define DEFINED_RANDOM123_STUFF\n\
448 /*\n\
449 Copyright 2010-2011, D. E. Shaw Research.\n\
450 All rights reserved.\n\
452 Redistribution and use in source and binary forms, with or without\n\
453 modification, are permitted provided that the following conditions are\n\
454 met:\n\
456 * Redistributions of source code must retain the above copyright\n\
457 notice, this list of conditions, and the following disclaimer.\n\
459 * Redistributions in binary form must reproduce the above copyright\n\
460 notice, this list of conditions, and the following disclaimer in the\n\
461 documentation and/or other materials provided with the distribution.\n\
463 * Neither the name of D. E. Shaw Research nor the names of its\n\
464 contributors may be used to endorse or promote products derived from\n\
465 this software without specific prior written permission.\n\
467 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\
468 \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n\
469 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n\
470 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n\
471 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n\
472 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n\
473 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n\
474 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n\
475 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n\
476 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n\
477 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\
478 */\n\
480 typedef uint uint32_t;\n\
481 struct r123array2x32\n\
482 {\n\
483 uint32_t v[2];\n\
484 };\n\
485 enum r123_enum_threefry32x2\n\
486 {\n\
487 R_32x2_0_0 = 13,\n\
488 R_32x2_1_0 = 15,\n\
489 R_32x2_2_0 = 26,\n\
490 R_32x2_3_0 = 6,\n\
491 R_32x2_4_0 = 17,\n\
492 R_32x2_5_0 = 29,\n\
493 R_32x2_6_0 = 16,\n\
494 R_32x2_7_0 = 24\n\
495 };\n\
496 inline uint32_t RotL_32 (uint32_t x, unsigned int N)\n\
497 __attribute__ ((always_inline));\n\
498 inline uint32_t\n\
499 RotL_32 (uint32_t x, unsigned int N)\n\
500 {\n\
501 return (x << (N & 31)) | (x >> ((32 - N) & 31));\n\
502 }\n\
504 typedef struct r123array2x32 threefry2x32_ctr_t;\n\
505 typedef struct r123array2x32 threefry2x32_key_t;\n\
506 typedef struct r123array2x32 threefry2x32_ukey_t;\n\
507 inline threefry2x32_key_t\n\
508 threefry2x32keyinit (threefry2x32_ukey_t uk)\n\
509 {\n\
510 return uk;\n\
511 }\n\
513 inline threefry2x32_ctr_t threefry2x32_R (unsigned int Nrounds,\n\
514 threefry2x32_ctr_t in,\n\
515 threefry2x32_key_t k)\n\
516 __attribute__ ((always_inline));\n\
517 inline threefry2x32_ctr_t\n\
518 threefry2x32_R (unsigned int Nrounds, threefry2x32_ctr_t in,\n\
519 threefry2x32_key_t k)\n\
520 {\n\
521 threefry2x32_ctr_t X;\n\
522 uint32_t ks[2 + 1];\n\
523 int i;\n\
524 ks[2] = 0x1BD11BDA;\n\
525 for (i = 0; i < 2; i++) {\n\
526 ks[i] = k.v[i];\n\
527 X.v[i] = in.v[i];\n\
528 ks[2] ^= k.v[i];\n\
529 }\n\
530 X.v[0] += ks[0];\n\
531 X.v[1] += ks[1];\n\
532 if (Nrounds > 0) {\n\
533 X.v[0] += X.v[1];\n\
534 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
535 X.v[1] ^= X.v[0];\n\
536 }\n\
537 if (Nrounds > 1) {\n\
538 X.v[0] += X.v[1];\n\
539 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
540 X.v[1] ^= X.v[0];\n\
541 }\n\
542 if (Nrounds > 2) {\n\
543 X.v[0] += X.v[1];\n\
544 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
545 X.v[1] ^= X.v[0];\n\
546 }\n\
547 if (Nrounds > 3) {\n\
548 X.v[0] += X.v[1];\n\
549 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
550 X.v[1] ^= X.v[0];\n\
551 }\n\
552 if (Nrounds > 3) {\n\
553 X.v[0] += ks[1];\n\
554 X.v[1] += ks[2];\n\
555 X.v[1] += 1;\n\
556 }\n\
557 if (Nrounds > 4) {\n\
558 X.v[0] += X.v[1];\n\
559 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
560 X.v[1] ^= X.v[0];\n\
561 }\n\
562 if (Nrounds > 5) {\n\
563 X.v[0] += X.v[1];\n\
564 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
565 X.v[1] ^= X.v[0];\n\
566 }\n\
567 if (Nrounds > 6) {\n\
568 X.v[0] += X.v[1];\n\
569 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
570 X.v[1] ^= X.v[0];\n\
571 }\n\
572 if (Nrounds > 7) {\n\
573 X.v[0] += X.v[1];\n\
574 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
575 X.v[1] ^= X.v[0];\n\
576 }\n\
577 if (Nrounds > 7) {\n\
578 X.v[0] += ks[2];\n\
579 X.v[1] += ks[0];\n\
580 X.v[1] += 2;\n\
581 }\n\
582 if (Nrounds > 8) {\n\
583 X.v[0] += X.v[1];\n\
584 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
585 X.v[1] ^= X.v[0];\n\
586 }\n\
587 if (Nrounds > 9) {\n\
588 X.v[0] += X.v[1];\n\
589 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
590 X.v[1] ^= X.v[0];\n\
591 }\n\
592 if (Nrounds > 10) {\n\
593 X.v[0] += X.v[1];\n\
594 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
595 X.v[1] ^= X.v[0];\n\
596 }\n\
597 if (Nrounds > 11) {\n\
598 X.v[0] += X.v[1];\n\
599 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
600 X.v[1] ^= X.v[0];\n\
601 }\n\
602 if (Nrounds > 11) {\n\
603 X.v[0] += ks[0];\n\
604 X.v[1] += ks[1];\n\
605 X.v[1] += 3;\n\
606 }\n\
607 if (Nrounds > 12) {\n\
608 X.v[0] += X.v[1];\n\
609 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
610 X.v[1] ^= X.v[0];\n\
611 }\n\
612 if (Nrounds > 13) {\n\
613 X.v[0] += X.v[1];\n\
614 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
615 X.v[1] ^= X.v[0];\n\
616 }\n\
617 if (Nrounds > 14) {\n\
618 X.v[0] += X.v[1];\n\
619 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
620 X.v[1] ^= X.v[0];\n\
621 }\n\
622 if (Nrounds > 15) {\n\
623 X.v[0] += X.v[1];\n\
624 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
625 X.v[1] ^= X.v[0];\n\
626 }\n\
627 if (Nrounds > 15) {\n\
628 X.v[0] += ks[1];\n\
629 X.v[1] += ks[2];\n\
630 X.v[1] += 4;\n\
631 }\n\
632 if (Nrounds > 16) {\n\
633 X.v[0] += X.v[1];\n\
634 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
635 X.v[1] ^= X.v[0];\n\
636 }\n\
637 if (Nrounds > 17) {\n\
638 X.v[0] += X.v[1];\n\
639 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
640 X.v[1] ^= X.v[0];\n\
641 }\n\
642 if (Nrounds > 18) {\n\
643 X.v[0] += X.v[1];\n\
644 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
645 X.v[1] ^= X.v[0];\n\
646 }\n\
647 if (Nrounds > 19) {\n\
648 X.v[0] += X.v[1];\n\
649 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
650 X.v[1] ^= X.v[0];\n\
651 }\n\
652 if (Nrounds > 19) {\n\
653 X.v[0] += ks[2];\n\
654 X.v[1] += ks[0];\n\
655 X.v[1] += 5;\n\
656 }\n\
657 if (Nrounds > 20) {\n\
658 X.v[0] += X.v[1];\n\
659 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
660 X.v[1] ^= X.v[0];\n\
661 }\n\
662 if (Nrounds > 21) {\n\
663 X.v[0] += X.v[1];\n\
664 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
665 X.v[1] ^= X.v[0];\n\
666 }\n\
667 if (Nrounds > 22) {\n\
668 X.v[0] += X.v[1];\n\
669 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
670 X.v[1] ^= X.v[0];\n\
671 }\n\
672 if (Nrounds > 23) {\n\
673 X.v[0] += X.v[1];\n\
674 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
675 X.v[1] ^= X.v[0];\n\
676 }\n\
677 if (Nrounds > 23) {\n\
678 X.v[0] += ks[0];\n\
679 X.v[1] += ks[1];\n\
680 X.v[1] += 6;\n\
681 }\n\
682 if (Nrounds > 24) {\n\
683 X.v[0] += X.v[1];\n\
684 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
685 X.v[1] ^= X.v[0];\n\
686 }\n\
687 if (Nrounds > 25) {\n\
688 X.v[0] += X.v[1];\n\
689 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
690 X.v[1] ^= X.v[0];\n\
691 }\n\
692 if (Nrounds > 26) {\n\
693 X.v[0] += X.v[1];\n\
694 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
695 X.v[1] ^= X.v[0];\n\
696 }\n\
697 if (Nrounds > 27) {\n\
698 X.v[0] += X.v[1];\n\
699 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
700 X.v[1] ^= X.v[0];\n\
701 }\n\
702 if (Nrounds > 27) {\n\
703 X.v[0] += ks[1];\n\
704 X.v[1] += ks[2];\n\
705 X.v[1] += 7;\n\
706 }\n\
707 if (Nrounds > 28) {\n\
708 X.v[0] += X.v[1];\n\
709 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
710 X.v[1] ^= X.v[0];\n\
711 }\n\
712 if (Nrounds > 29) {\n\
713 X.v[0] += X.v[1];\n\
714 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
715 X.v[1] ^= X.v[0];\n\
716 }\n\
717 if (Nrounds > 30) {\n\
718 X.v[0] += X.v[1];\n\
719 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
720 X.v[1] ^= X.v[0];\n\
721 }\n\
722 if (Nrounds > 31) {\n\
723 X.v[0] += X.v[1];\n\
724 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
725 X.v[1] ^= X.v[0];\n\
726 }\n\
727 if (Nrounds > 31) {\n\
728 X.v[0] += ks[2];\n\
729 X.v[1] += ks[0];\n\
730 X.v[1] += 8;\n\
731 }\n\
732 return X;\n\
733 }\n\
735 enum r123_enum_threefry2x32\n\
736 { threefry2x32_rounds = 20 };\n\
737 inline threefry2x32_ctr_t threefry2x32 (threefry2x32_ctr_t in,\n\
738 threefry2x32_key_t k)\n\
739 __attribute__ ((always_inline));\n\
740 inline threefry2x32_ctr_t\n\
741 threefry2x32 (threefry2x32_ctr_t in, threefry2x32_key_t k)\n\
742 {\n\
743 return threefry2x32_R (threefry2x32_rounds, in, k);\n\
744 }\n\
745 #endif\n\
748 ss << "double " << mSymName << "_Random (int seed)\n\
749 {\n\
750 unsigned tid = get_global_id(0);\n\
751 threefry2x32_key_t k = { {tid, 0xdecafbad} };\n\
752 threefry2x32_ctr_t c = { {seed, 0xf00dcafe} };\n\
753 c = threefry2x32_R(threefry2x32_rounds, c, k);\n\
754 const double factor = 1./(" << SAL_MAX_UINT32 << ".0 + 1.0);\n\
755 const double halffactor = 0.5*factor;\n\
756 return c.v[0] * factor + halffactor;\n\
757 }\n\
760 virtual size_t GetWindowSize() const SAL_OVERRIDE
762 return 1;
764 /// Create buffer and pass the buffer to a given kernel
765 virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) SAL_OVERRIDE
767 cl_int seed = formula::rng::nRandom(0, SAL_MAX_INT32);
768 // Pass the scalar result back to the rest of the formula kernel
769 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_int: " << seed);
770 cl_int err = clSetKernelArg(k, argno, sizeof(cl_int), (void*)&seed);
771 if (CL_SUCCESS != err)
772 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
773 return 1;
777 /// A vector of strings
778 class DynamicKernelStringArgument : public VectorRef
780 public:
781 DynamicKernelStringArgument( const ScCalcConfig& config, const std::string& s,
782 FormulaTreeNodeRef ft, int index = 0 ) :
783 VectorRef(config, s, ft, index) { }
785 virtual void GenSlidingWindowFunction( std::stringstream& ) SAL_OVERRIDE { }
786 /// Generate declaration
787 virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
789 ss << "__global unsigned int *" << mSymName;
791 virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
793 DynamicKernelStringArgument::GenDecl(ss);
795 virtual size_t Marshal( cl_kernel, int, int, cl_program ) SAL_OVERRIDE;
798 /// Marshal a string vector reference
799 size_t DynamicKernelStringArgument::Marshal( cl_kernel k, int argno, int, cl_program )
801 FormulaToken* ref = mFormulaTree->GetFormulaToken();
802 // Obtain cl context
803 ::opencl::KernelEnv kEnv;
804 ::opencl::setKernelEnv(&kEnv);
805 cl_int err;
806 formula::VectorRefArray vRef;
807 size_t nStrings = 0;
808 if (ref->GetType() == formula::svSingleVectorRef)
810 const formula::SingleVectorRefToken* pSVR =
811 static_cast<const formula::SingleVectorRefToken*>(ref);
812 nStrings = pSVR->GetArrayLength();
813 vRef = pSVR->GetArray();
815 else if (ref->GetType() == formula::svDoubleVectorRef)
817 const formula::DoubleVectorRefToken* pDVR =
818 static_cast<const formula::DoubleVectorRefToken*>(ref);
819 nStrings = pDVR->GetArrayLength();
820 vRef = pDVR->GetArrays()[mnIndex];
822 size_t szHostBuffer = nStrings * sizeof(cl_int);
823 cl_uint* pHashBuffer = NULL;
825 if (vRef.mpStringArray != NULL)
827 // Marshal strings. Right now we pass hashes of these string
828 mpClmem = clCreateBuffer(kEnv.mpkContext,
829 (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
830 szHostBuffer, NULL, &err);
831 if (CL_SUCCESS != err)
832 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
833 SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
835 pHashBuffer = static_cast<cl_uint*>(clEnqueueMapBuffer(
836 kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
837 szHostBuffer, 0, NULL, NULL, &err));
838 if (CL_SUCCESS != err)
839 throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
841 for (size_t i = 0; i < nStrings; i++)
843 if (vRef.mpStringArray[i])
845 const OUString tmp = OUString(vRef.mpStringArray[i]);
846 pHashBuffer[i] = tmp.hashCode();
848 else
850 pHashBuffer[i] = 0;
854 else
856 if (nStrings == 0)
857 szHostBuffer = sizeof(cl_int); // a dummy small value
858 // Marshal as a buffer of NANs
859 mpClmem = clCreateBuffer(kEnv.mpkContext,
860 (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
861 szHostBuffer, NULL, &err);
862 if (CL_SUCCESS != err)
863 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
864 SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
866 pHashBuffer = static_cast<cl_uint*>(clEnqueueMapBuffer(
867 kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
868 szHostBuffer, 0, NULL, NULL, &err));
869 if (CL_SUCCESS != err)
870 throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
872 for (size_t i = 0; i < szHostBuffer / sizeof(cl_int); i++)
873 pHashBuffer[i] = 0;
875 err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
876 pHashBuffer, 0, NULL, NULL);
877 if (CL_SUCCESS != err)
878 throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
880 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem);
881 err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem);
882 if (CL_SUCCESS != err)
883 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
884 return 1;
887 /// A mixed string/numberic vector
888 class DynamicKernelMixedArgument : public VectorRef
890 public:
891 DynamicKernelMixedArgument( const ScCalcConfig& config, const std::string& s,
892 FormulaTreeNodeRef ft ) :
893 VectorRef(config, s, ft), mStringArgument(config, s + "s", ft) { }
894 virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
896 VectorRef::GenSlidingWindowDecl(ss);
897 ss << ", ";
898 mStringArgument.GenSlidingWindowDecl(ss);
900 virtual bool IsMixedArgument() const SAL_OVERRIDE { return true;}
901 virtual void GenSlidingWindowFunction( std::stringstream& ) SAL_OVERRIDE { }
902 /// Generate declaration
903 virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
905 VectorRef::GenDecl(ss);
906 ss << ", ";
907 mStringArgument.GenDecl(ss);
909 virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
911 VectorRef::GenDeclRef(ss);
912 ss << ",";
913 mStringArgument.GenDeclRef(ss);
915 virtual void GenNumDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
917 VectorRef::GenSlidingWindowDecl(ss);
919 virtual void GenStringDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
921 mStringArgument.GenSlidingWindowDecl(ss);
923 virtual std::string GenSlidingWindowDeclRef( bool nested ) const SAL_OVERRIDE
925 std::stringstream ss;
926 ss << "(!isNan(" << VectorRef::GenSlidingWindowDeclRef();
927 ss << ")?" << VectorRef::GenSlidingWindowDeclRef();
928 ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
929 ss << ")";
930 return ss.str();
932 virtual std::string GenDoubleSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
934 std::stringstream ss;
935 ss << VectorRef::GenSlidingWindowDeclRef();
936 return ss.str();
938 virtual std::string GenStringSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
940 std::stringstream ss;
941 ss << mStringArgument.GenSlidingWindowDeclRef();
942 return ss.str();
944 virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) SAL_OVERRIDE
946 int i = VectorRef::Marshal(k, argno, vw, p);
947 i += mStringArgument.Marshal(k, argno + i, vw, p);
948 return i;
951 protected:
952 DynamicKernelStringArgument mStringArgument;
955 /// Handling a Double Vector that is used as a sliding window input
956 /// to either a sliding window average or sum-of-products
957 /// Generate a sequential loop for reductions
958 class OpAverage;
959 class OpCount;
961 template<class Base>
962 class DynamicKernelSlidingArgument : public Base
964 public:
965 DynamicKernelSlidingArgument( const ScCalcConfig& config, const std::string& s,
966 FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase>& CodeGen,
967 int index = 0 ) :
968 Base(config, s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL)
970 FormulaToken* t = ft->GetFormulaToken();
971 if (t->GetType() != formula::svDoubleVectorRef)
972 throw Unhandled();
973 mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
974 bIsStartFixed = mpDVR->IsStartFixed();
975 bIsEndFixed = mpDVR->IsEndFixed();
977 // Should only be called by SumIfs. Yikes!
978 virtual bool NeedParallelReduction() const
980 assert(dynamic_cast<OpSumIfs*>(mpCodeGen.get()));
981 return GetWindowSize() > 100 &&
982 ((GetStartFixed() && GetEndFixed()) ||
983 (!GetStartFixed() && !GetEndFixed()));
985 virtual void GenSlidingWindowFunction( std::stringstream& ) { }
987 virtual std::string GenSlidingWindowDeclRef( bool nested = false ) const
989 size_t nArrayLength = mpDVR->GetArrayLength();
990 std::stringstream ss;
991 if (!bIsStartFixed && !bIsEndFixed)
993 if (nested)
994 ss << "((i+gid0) <" << nArrayLength << "?";
995 ss << Base::GetName() << "[i + gid0]";
996 if (nested)
997 ss << ":NAN)";
999 else
1001 if (nested)
1002 ss << "(i <" << nArrayLength << "?";
1003 ss << Base::GetName() << "[i]";
1004 if (nested)
1005 ss << ":NAN)";
1007 return ss.str();
1009 /// Controls how the elements in the DoubleVectorRef are traversed
1010 virtual size_t GenReductionLoopHeader(
1011 std::stringstream& ss, bool& needBody )
1013 assert(mpDVR);
1014 size_t nCurWindowSize = mpDVR->GetRefRowSize();
1017 if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1019 ss << "for (int i = ";
1020 ss << "gid0; i < " << mpDVR->GetArrayLength();
1021 ss << " && i < " << nCurWindowSize << "; i++){\n\t\t";
1022 needBody = true;
1023 return nCurWindowSize;
1025 else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1027 ss << "for (int i = ";
1028 ss << "0; i < " << mpDVR->GetArrayLength();
1029 ss << " && i < gid0+" << nCurWindowSize << "; i++){\n\t\t";
1030 needBody = true;
1031 return nCurWindowSize;
1033 else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1035 ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
1036 ss << "{int i;\n\t";
1037 std::stringstream temp1, temp2;
1038 int outLoopSize = UNROLLING_FACTOR;
1039 if (nCurWindowSize / outLoopSize != 0)
1041 ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1042 for (int count = 0; count < outLoopSize; count++)
1044 ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
1045 if (count == 0)
1047 temp1 << "if(i + gid0 < " << mpDVR->GetArrayLength();
1048 temp1 << "){\n\t\t";
1049 temp1 << "tmp = legalize(";
1050 temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1051 temp1 << ", tmp);\n\t\t\t";
1052 temp1 << "}\n\t";
1054 ss << temp1.str();
1056 ss << "}\n\t";
1058 // The residual of mod outLoopSize
1059 for (unsigned int count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
1061 ss << "i = " << count << ";\n\t";
1062 if (count == nCurWindowSize / outLoopSize * outLoopSize)
1064 temp2 << "if(i + gid0 < " << mpDVR->GetArrayLength();
1065 temp2 << "){\n\t\t";
1066 temp2 << "tmp = legalize(";
1067 temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1068 temp2 << ", tmp);\n\t\t\t";
1069 temp2 << "}\n\t";
1071 ss << temp2.str();
1073 ss << "}\n";
1074 needBody = false;
1075 return nCurWindowSize;
1077 // (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1078 else
1080 ss << "\n\t";
1081 ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
1082 ss << "{int i;\n\t";
1083 std::stringstream temp1, temp2;
1084 int outLoopSize = UNROLLING_FACTOR;
1085 if (nCurWindowSize / outLoopSize != 0)
1087 ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1088 for (int count = 0; count < outLoopSize; count++)
1090 ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
1091 if (count == 0)
1093 temp1 << "tmp = legalize(";
1094 temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1095 temp1 << ", tmp);\n\t\t\t";
1097 ss << temp1.str();
1099 ss << "}\n\t";
1101 // The residual of mod outLoopSize
1102 for (unsigned int count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
1104 ss << "i = " << count << ";\n\t";
1105 if (count == nCurWindowSize / outLoopSize * outLoopSize)
1107 temp2 << "tmp = legalize(";
1108 temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1109 temp2 << ", tmp);\n\t\t\t";
1111 ss << temp2.str();
1113 ss << "}\n";
1114 needBody = false;
1115 return nCurWindowSize;
1119 ~DynamicKernelSlidingArgument()
1121 if (mpClmem2)
1123 cl_int err;
1124 err = clReleaseMemObject(mpClmem2);
1125 SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err));
1126 mpClmem2 = NULL;
1130 size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
1132 size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
1134 size_t GetStartFixed() const { return bIsStartFixed; }
1136 size_t GetEndFixed() const { return bIsEndFixed; }
1138 protected:
1139 bool bIsStartFixed, bIsEndFixed;
1140 const formula::DoubleVectorRefToken* mpDVR;
1141 // from parent nodes
1142 boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
1143 // controls whether to invoke the reduction kernel during marshaling or not
1144 cl_mem mpClmem2;
1147 /// A mixed string/numberic vector
1148 class DynamicKernelMixedSlidingArgument : public VectorRef
1150 public:
1151 DynamicKernelMixedSlidingArgument( const ScCalcConfig& config, const std::string& s,
1152 FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase>& CodeGen,
1153 int index = 0 ) :
1154 VectorRef(config, s, ft),
1155 mDoubleArgument(mCalcConfig, s, ft, CodeGen, index),
1156 mStringArgument(mCalcConfig, s + "s", ft, CodeGen, index) { }
1157 virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
1159 mDoubleArgument.GenSlidingWindowDecl(ss);
1160 ss << ", ";
1161 mStringArgument.GenSlidingWindowDecl(ss);
1163 virtual void GenSlidingWindowFunction( std::stringstream& ) SAL_OVERRIDE { }
1164 /// Generate declaration
1165 virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
1167 mDoubleArgument.GenDecl(ss);
1168 ss << ", ";
1169 mStringArgument.GenDecl(ss);
1171 virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
1173 mDoubleArgument.GenDeclRef(ss);
1174 ss << ",";
1175 mStringArgument.GenDeclRef(ss);
1177 virtual std::string GenSlidingWindowDeclRef( bool nested ) const SAL_OVERRIDE
1179 std::stringstream ss;
1180 ss << "(!isNan(" << mDoubleArgument.GenSlidingWindowDeclRef();
1181 ss << ")?" << mDoubleArgument.GenSlidingWindowDeclRef();
1182 ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
1183 ss << ")";
1184 return ss.str();
1186 virtual bool IsMixedArgument() const SAL_OVERRIDE { return true;}
1187 virtual std::string GenDoubleSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
1189 std::stringstream ss;
1190 ss << mDoubleArgument.GenSlidingWindowDeclRef();
1191 return ss.str();
1193 virtual std::string GenStringSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
1195 std::stringstream ss;
1196 ss << mStringArgument.GenSlidingWindowDeclRef();
1197 return ss.str();
1199 virtual void GenNumDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
1201 mDoubleArgument.GenDeclRef(ss);
1203 virtual void GenStringDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
1205 mStringArgument.GenDeclRef(ss);
1207 virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) SAL_OVERRIDE
1209 int i = mDoubleArgument.Marshal(k, argno, vw, p);
1210 i += mStringArgument.Marshal(k, argno + i, vw, p);
1211 return i;
1214 protected:
1215 DynamicKernelSlidingArgument<VectorRef> mDoubleArgument;
1216 DynamicKernelSlidingArgument<DynamicKernelStringArgument> mStringArgument;
1219 /// Holds the symbol table for a given dynamic kernel
1220 class SymbolTable
1222 public:
1223 typedef std::map<const formula::FormulaToken*, DynamicKernelArgumentRef> ArgumentMap;
1224 // This avoids instability caused by using pointer as the key type
1225 typedef std::list<DynamicKernelArgumentRef> ArgumentList;
1226 SymbolTable() : mCurId(0) { }
1227 template<class T>
1228 const DynamicKernelArgument* DeclRefArg( const ScCalcConfig& config, FormulaTreeNodeRef, SlidingFunctionBase* pCodeGen, int nResultSize );
1229 /// Used to generate sliding window helpers
1230 void DumpSlidingWindowFunctions( std::stringstream& ss )
1232 for (ArgumentList::iterator it = mParams.begin(), e = mParams.end(); it != e;
1233 ++it)
1235 (*it)->GenSlidingWindowFunction(ss);
1236 ss << "\n";
1239 /// Memory mapping from host to device and pass buffers to the given kernel as
1240 /// arguments
1241 void Marshal( cl_kernel, int, cl_program );
1243 private:
1244 unsigned int mCurId;
1245 ArgumentMap mSymbols;
1246 ArgumentList mParams;
1249 void SymbolTable::Marshal( cl_kernel k, int nVectorWidth, cl_program pProgram )
1251 int i = 1; //The first argument is reserved for results
1252 for (ArgumentList::iterator it = mParams.begin(), e = mParams.end(); it != e;
1253 ++it)
1255 i += (*it)->Marshal(k, i, nVectorWidth, pProgram);
1259 /// Handling a Double Vector that is used as a sliding window input
1260 /// Performs parallel reduction based on given operator
1261 template<class Base>
1262 class ParallelReductionVectorRef : public Base
1264 public:
1265 ParallelReductionVectorRef( const ScCalcConfig& config, const std::string& s,
1266 FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase>& CodeGen,
1267 int index = 0 ) :
1268 Base(config, s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL)
1270 FormulaToken* t = ft->GetFormulaToken();
1271 if (t->GetType() != formula::svDoubleVectorRef)
1272 throw Unhandled();
1273 mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
1274 bIsStartFixed = mpDVR->IsStartFixed();
1275 bIsEndFixed = mpDVR->IsEndFixed();
1277 /// Emit the definition for the auxiliary reduction kernel
1278 virtual void GenSlidingWindowFunction( std::stringstream& ss )
1280 if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
1282 std::string name = Base::GetName();
1283 ss << "__kernel void " << name;
1284 ss << "_reduction(__global double* A, "
1285 "__global double *result,int arrayLength,int windowSize){\n";
1286 ss << " double tmp, current_result =" <<
1287 mpCodeGen->GetBottom();
1288 ss << ";\n";
1289 ss << " int writePos = get_group_id(1);\n";
1290 ss << " int lidx = get_local_id(0);\n";
1291 ss << " __local double shm_buf[256];\n";
1292 if (mpDVR->IsStartFixed())
1293 ss << " int offset = 0;\n";
1294 else // if (!mpDVR->IsStartFixed())
1295 ss << " int offset = get_group_id(1);\n";
1296 if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1297 ss << " int end = windowSize;\n";
1298 else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1299 ss << " int end = offset + windowSize;\n";
1300 else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1301 ss << " int end = windowSize + get_group_id(1);\n";
1302 else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1303 ss << " int end = windowSize;\n";
1304 ss << " end = min(end, arrayLength);\n";
1306 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1307 ss << " int loop = arrayLength/512 + 1;\n";
1308 ss << " for (int l=0; l<loop; l++){\n";
1309 ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
1310 ss << " int loopOffset = l*512;\n";
1311 ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
1312 ss << " tmp = legalize(" << mpCodeGen->Gen2(
1313 "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
1314 ss << " tmp = legalize(" << mpCodeGen->Gen2(
1315 "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n";
1316 ss << " } else if ((loopOffset + lidx + offset) < end)\n";
1317 ss << " tmp = legalize(" << mpCodeGen->Gen2(
1318 "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
1319 ss << " shm_buf[lidx] = tmp;\n";
1320 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1321 ss << " for (int i = 128; i >0; i/=2) {\n";
1322 ss << " if (lidx < i)\n";
1323 ss << " shm_buf[lidx] = ";
1324 // Special case count
1325 if (dynamic_cast<OpCount*>(mpCodeGen.get()))
1326 ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
1327 else
1328 ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n";
1329 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1330 ss << " }\n";
1331 ss << " if (lidx == 0)\n";
1332 ss << " current_result =";
1333 if (dynamic_cast<OpCount*>(mpCodeGen.get()))
1334 ss << "current_result + shm_buf[0]";
1335 else
1336 ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
1337 ss << ";\n";
1338 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1339 ss << " }\n";
1340 ss << " if (lidx == 0)\n";
1341 ss << " result[writePos] = current_result;\n";
1342 ss << "}\n";
1344 else
1346 std::string name = Base::GetName();
1347 /*sum reduction*/
1348 ss << "__kernel void " << name << "_sum";
1349 ss << "_reduction(__global double* A, "
1350 "__global double *result,int arrayLength,int windowSize){\n";
1351 ss << " double tmp, current_result =" <<
1352 mpCodeGen->GetBottom();
1353 ss << ";\n";
1354 ss << " int writePos = get_group_id(1);\n";
1355 ss << " int lidx = get_local_id(0);\n";
1356 ss << " __local double shm_buf[256];\n";
1357 if (mpDVR->IsStartFixed())
1358 ss << " int offset = 0;\n";
1359 else // if (!mpDVR->IsStartFixed())
1360 ss << " int offset = get_group_id(1);\n";
1361 if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1362 ss << " int end = windowSize;\n";
1363 else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1364 ss << " int end = offset + windowSize;\n";
1365 else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1366 ss << " int end = windowSize + get_group_id(1);\n";
1367 else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1368 ss << " int end = windowSize;\n";
1369 ss << " end = min(end, arrayLength);\n";
1370 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1371 ss << " int loop = arrayLength/512 + 1;\n";
1372 ss << " for (int l=0; l<loop; l++){\n";
1373 ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
1374 ss << " int loopOffset = l*512;\n";
1375 ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
1376 ss << " tmp = legalize(";
1377 ss << "(A[loopOffset + lidx + offset]+ tmp)";
1378 ss << ", tmp);\n";
1379 ss << " tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
1380 ss << ", tmp);\n";
1381 ss << " } else if ((loopOffset + lidx + offset) < end)\n";
1382 ss << " tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
1383 ss << ", tmp);\n";
1384 ss << " shm_buf[lidx] = tmp;\n";
1385 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1386 ss << " for (int i = 128; i >0; i/=2) {\n";
1387 ss << " if (lidx < i)\n";
1388 ss << " shm_buf[lidx] = ";
1389 ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
1390 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1391 ss << " }\n";
1392 ss << " if (lidx == 0)\n";
1393 ss << " current_result =";
1394 ss << "current_result + shm_buf[0]";
1395 ss << ";\n";
1396 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1397 ss << " }\n";
1398 ss << " if (lidx == 0)\n";
1399 ss << " result[writePos] = current_result;\n";
1400 ss << "}\n";
1401 /*count reduction*/
1402 ss << "__kernel void " << name << "_count";
1403 ss << "_reduction(__global double* A, "
1404 "__global double *result,int arrayLength,int windowSize){\n";
1405 ss << " double tmp, current_result =" <<
1406 mpCodeGen->GetBottom();
1407 ss << ";\n";
1408 ss << " int writePos = get_group_id(1);\n";
1409 ss << " int lidx = get_local_id(0);\n";
1410 ss << " __local double shm_buf[256];\n";
1411 if (mpDVR->IsStartFixed())
1412 ss << " int offset = 0;\n";
1413 else // if (!mpDVR->IsStartFixed())
1414 ss << " int offset = get_group_id(1);\n";
1415 if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1416 ss << " int end = windowSize;\n";
1417 else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1418 ss << " int end = offset + windowSize;\n";
1419 else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1420 ss << " int end = windowSize + get_group_id(1);\n";
1421 else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1422 ss << " int end = windowSize;\n";
1423 ss << " end = min(end, arrayLength);\n";
1424 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1425 ss << " int loop = arrayLength/512 + 1;\n";
1426 ss << " for (int l=0; l<loop; l++){\n";
1427 ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
1428 ss << " int loopOffset = l*512;\n";
1429 ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
1430 ss << " tmp = legalize((isNan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
1431 ss << ", tmp);\n";
1432 ss << " tmp = legalize((isNan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
1433 ss << ", tmp);\n";
1434 ss << " } else if ((loopOffset + lidx + offset) < end)\n";
1435 ss << " tmp = legalize((isNan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
1436 ss << ", tmp);\n";
1437 ss << " shm_buf[lidx] = tmp;\n";
1438 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1439 ss << " for (int i = 128; i >0; i/=2) {\n";
1440 ss << " if (lidx < i)\n";
1441 ss << " shm_buf[lidx] = ";
1442 ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
1443 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1444 ss << " }\n";
1445 ss << " if (lidx == 0)\n";
1446 ss << " current_result =";
1447 ss << "current_result + shm_buf[0];";
1448 ss << ";\n";
1449 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1450 ss << " }\n";
1451 ss << " if (lidx == 0)\n";
1452 ss << " result[writePos] = current_result;\n";
1453 ss << "}\n";
1457 virtual std::string GenSlidingWindowDeclRef( bool = false ) const
1459 std::stringstream ss;
1460 if (!bIsStartFixed && !bIsEndFixed)
1461 ss << Base::GetName() << "[i + gid0]";
1462 else
1463 ss << Base::GetName() << "[i]";
1464 return ss.str();
1466 /// Controls how the elements in the DoubleVectorRef are traversed
1467 virtual size_t GenReductionLoopHeader(
1468 std::stringstream& ss, int nResultSize, bool& needBody )
1470 assert(mpDVR);
1471 size_t nCurWindowSize = mpDVR->GetRefRowSize();
1472 std::string temp = Base::GetName() + "[gid0]";
1473 ss << "tmp = ";
1474 // Special case count
1475 if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
1477 ss << mpCodeGen->Gen2(temp, "tmp") << ";\n";
1478 ss << "nCount = nCount-1;\n";
1479 ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/
1480 ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n";
1482 else if (dynamic_cast<OpCount*>(mpCodeGen.get()))
1483 ss << temp << "+ tmp";
1484 else
1485 ss << mpCodeGen->Gen2(temp, "tmp");
1486 ss << ";\n\t";
1487 needBody = false;
1488 return nCurWindowSize;
1491 virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram )
1493 assert(Base::mpClmem == NULL);
1494 // Obtain cl context
1495 ::opencl::KernelEnv kEnv;
1496 ::opencl::setKernelEnv(&kEnv);
1497 cl_int err;
1498 size_t nInput = mpDVR->GetArrayLength();
1499 size_t nCurWindowSize = mpDVR->GetRefRowSize();
1500 // create clmem buffer
1501 if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == NULL)
1502 throw Unhandled();
1503 double* pHostBuffer = const_cast<double*>(
1504 mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
1505 size_t szHostBuffer = nInput * sizeof(double);
1506 Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
1507 (cl_mem_flags)CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
1508 szHostBuffer,
1509 pHostBuffer, &err);
1510 SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer);
1512 mpClmem2 = clCreateBuffer(kEnv.mpkContext,
1513 CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
1514 sizeof(double) * w, NULL, NULL);
1515 if (CL_SUCCESS != err)
1516 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
1517 SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w));
1519 // reproduce the reduction function name
1520 std::string kernelName;
1521 if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
1522 kernelName = Base::GetName() + "_reduction";
1523 else
1524 kernelName = Base::GetName() + "_sum_reduction";
1525 cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
1526 if (err != CL_SUCCESS)
1527 throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
1528 SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
1530 // set kernel arg of reduction kernel
1531 // TODO(Wei Wei): use unique name for kernel
1532 cl_mem buf = Base::GetCLBuffer();
1533 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
1534 err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
1535 (void*)&buf);
1536 if (CL_SUCCESS != err)
1537 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1539 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
1540 err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
1541 if (CL_SUCCESS != err)
1542 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1544 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
1545 err = clSetKernelArg(redKernel, 2, sizeof(cl_int), (void*)&nInput);
1546 if (CL_SUCCESS != err)
1547 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1549 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
1550 err = clSetKernelArg(redKernel, 3, sizeof(cl_int), (void*)&nCurWindowSize);
1551 if (CL_SUCCESS != err)
1552 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1554 // set work group size and execute
1555 size_t global_work_size[] = { 256, (size_t)w };
1556 size_t local_work_size[] = { 256, 1 };
1557 SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
1558 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
1559 global_work_size, local_work_size, 0, NULL, NULL);
1560 if (CL_SUCCESS != err)
1561 throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
1562 err = clFinish(kEnv.mpkCmdQueue);
1563 if (CL_SUCCESS != err)
1564 throw OpenCLError("clFinish", err, __FILE__, __LINE__);
1565 if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
1567 /*average need more reduction kernel for count computing*/
1568 boost::scoped_array<double> pAllBuffer(new double[2 * w]);
1569 double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
1570 mpClmem2,
1571 CL_TRUE, CL_MAP_READ, 0,
1572 sizeof(double) * w, 0, NULL, NULL,
1573 &err));
1574 if (err != CL_SUCCESS)
1575 throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
1577 for (int i = 0; i < w; i++)
1578 pAllBuffer[i] = resbuf[i];
1579 err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, NULL, NULL);
1580 if (err != CL_SUCCESS)
1581 throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
1583 kernelName = Base::GetName() + "_count_reduction";
1584 redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
1585 if (err != CL_SUCCESS)
1586 throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
1587 SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
1589 // set kernel arg of reduction kernel
1590 buf = Base::GetCLBuffer();
1591 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
1592 err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
1593 (void*)&buf);
1594 if (CL_SUCCESS != err)
1595 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1597 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
1598 err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
1599 if (CL_SUCCESS != err)
1600 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1602 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
1603 err = clSetKernelArg(redKernel, 2, sizeof(cl_int), (void*)&nInput);
1604 if (CL_SUCCESS != err)
1605 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1607 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
1608 err = clSetKernelArg(redKernel, 3, sizeof(cl_int), (void*)&nCurWindowSize);
1609 if (CL_SUCCESS != err)
1610 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1612 // set work group size and execute
1613 size_t global_work_size1[] = { 256, (size_t)w };
1614 size_t local_work_size1[] = { 256, 1 };
1615 SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
1616 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
1617 global_work_size1, local_work_size1, 0, NULL, NULL);
1618 if (CL_SUCCESS != err)
1619 throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
1620 err = clFinish(kEnv.mpkCmdQueue);
1621 if (CL_SUCCESS != err)
1622 throw OpenCLError("clFinish", err, __FILE__, __LINE__);
1623 resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
1624 mpClmem2,
1625 CL_TRUE, CL_MAP_READ, 0,
1626 sizeof(double) * w, 0, NULL, NULL,
1627 &err));
1628 if (err != CL_SUCCESS)
1629 throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
1630 for (int i = 0; i < w; i++)
1631 pAllBuffer[i + w] = resbuf[i];
1632 err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, NULL, NULL);
1633 // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
1634 if (CL_SUCCESS != err)
1635 SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << ::opencl::errorString(err));
1636 if (mpClmem2)
1638 err = clReleaseMemObject(mpClmem2);
1639 SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err));
1640 mpClmem2 = NULL;
1642 mpClmem2 = clCreateBuffer(kEnv.mpkContext,
1643 (cl_mem_flags)CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1644 w * sizeof(double) * 2, pAllBuffer.get(), &err);
1645 if (CL_SUCCESS != err)
1646 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
1647 SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get());
1649 // set kernel arg
1650 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
1651 err = clSetKernelArg(k, argno, sizeof(cl_mem), &(mpClmem2));
1652 if (CL_SUCCESS != err)
1653 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1654 return 1;
1656 ~ParallelReductionVectorRef()
1658 if (mpClmem2)
1660 cl_int err;
1661 err = clReleaseMemObject(mpClmem2);
1662 SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err));
1663 mpClmem2 = NULL;
1667 size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
1669 size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
1671 size_t GetStartFixed() const { return bIsStartFixed; }
1673 size_t GetEndFixed() const { return bIsEndFixed; }
1675 protected:
1676 bool bIsStartFixed, bIsEndFixed;
1677 const formula::DoubleVectorRefToken* mpDVR;
1678 // from parent nodes
1679 boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
1680 // controls whether to invoke the reduction kernel during marshaling or not
1681 cl_mem mpClmem2;
1684 class Reduction : public SlidingFunctionBase
1686 int mnResultSize;
1687 public:
1688 Reduction( int nResultSize ) : mnResultSize(nResultSize) {}
1690 typedef DynamicKernelSlidingArgument<VectorRef> NumericRange;
1691 typedef DynamicKernelSlidingArgument<DynamicKernelStringArgument> StringRange;
1692 typedef ParallelReductionVectorRef<VectorRef> ParallelNumericRange;
1694 virtual bool HandleNaNArgument( std::stringstream&, unsigned, SubArguments& ) const
1696 return false;
1699 virtual void GenSlidingWindowFunction( std::stringstream& ss,
1700 const std::string& sSymName, SubArguments& vSubArguments ) SAL_OVERRIDE
1702 ss << "\ndouble " << sSymName;
1703 ss << "_" << BinFuncName() << "(";
1704 for (unsigned i = 0; i < vSubArguments.size(); i++)
1706 if (i)
1707 ss << ", ";
1708 vSubArguments[i]->GenSlidingWindowDecl(ss);
1710 ss << ") {\n";
1711 ss << "double tmp = " << GetBottom() << ";\n";
1712 ss << "int gid0 = get_global_id(0);\n";
1713 if (isAverage() || isMinOrMax())
1714 ss << "int nCount = 0;\n";
1715 ss << "double tmpBottom;\n";
1716 unsigned i = vSubArguments.size();
1717 while (i--)
1719 if (NumericRange* NR =
1720 dynamic_cast<NumericRange*>(vSubArguments[i].get()))
1722 bool needBody; NR->GenReductionLoopHeader(ss, needBody); if (!needBody)
1723 continue;
1725 else if (ParallelNumericRange* PNR =
1726 dynamic_cast<ParallelNumericRange*>(vSubArguments[i].get()))
1728 //did not handle yet
1729 bool bNeedBody = false;
1730 PNR->GenReductionLoopHeader(ss, mnResultSize, bNeedBody);
1731 if (!bNeedBody)
1732 continue;
1734 else if (StringRange* SR =
1735 dynamic_cast<StringRange*>(vSubArguments[i].get()))
1737 //did not handle yet
1738 bool needBody;
1739 SR->GenReductionLoopHeader(ss, needBody);
1740 if (!needBody)
1741 continue;
1743 else
1745 FormulaToken* pCur = vSubArguments[i]->GetFormulaToken();
1746 assert(pCur);
1747 assert(pCur->GetType() != formula::svDoubleVectorRef);
1749 if (pCur->GetType() == formula::svSingleVectorRef ||
1750 pCur->GetType() == formula::svDouble)
1752 ss << "{\n";
1755 if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1757 bool bNanHandled = HandleNaNArgument(ss, i, vSubArguments);
1759 ss << "tmpBottom = " << GetBottom() << ";\n";
1761 if (!bNanHandled)
1763 ss << "if (isNan(";
1764 ss << vSubArguments[i]->GenSlidingWindowDeclRef();
1765 ss << "))\n";
1766 if (ZeroReturnZero())
1767 ss << " return 0;\n";
1768 else
1770 ss << " tmp = ";
1771 ss << Gen2("tmpBottom", "tmp") << ";\n";
1773 ss << "else\n";
1775 ss << "{";
1776 ss << " tmp = ";
1777 ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
1778 ss << ";\n";
1779 ss << " }\n";
1780 ss << "}\n";
1782 else
1784 ss << "tmp = ";
1785 ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
1786 ss << ";\n";
1789 if (isAverage())
1790 ss <<
1791 "if (nCount==0)\n"
1792 " return CreateDoubleError(errDivisionByZero);\n";
1793 else if (isMinOrMax())
1794 ss <<
1795 "if (nCount==0)\n"
1796 " return 0;\n";
1797 ss << "return tmp";
1798 if (isAverage())
1799 ss << "*pow((double)nCount,-1.0)";
1800 ss << ";\n}";
1802 virtual bool isAverage() const { return false; }
1803 virtual bool isMinOrMax() const { return false; }
1804 virtual bool takeString() const SAL_OVERRIDE { return false; }
1805 virtual bool takeNumeric() const SAL_OVERRIDE { return true; }
1808 // Strictly binary operators
1809 class Binary : public SlidingFunctionBase
1811 public:
1812 virtual void GenSlidingWindowFunction( std::stringstream& ss,
1813 const std::string& sSymName, SubArguments& vSubArguments ) SAL_OVERRIDE
1815 ss << "\ndouble " << sSymName;
1816 ss << "_" << BinFuncName() << "(";
1817 assert(vSubArguments.size() == 2);
1818 for (unsigned i = 0; i < vSubArguments.size(); i++)
1820 if (i)
1821 ss << ", ";
1822 vSubArguments[i]->GenSlidingWindowDecl(ss);
1824 ss << ") {\n\t";
1825 ss << "int gid0 = get_global_id(0), i = 0;\n\t";
1826 ss << "double tmp = ";
1827 ss << Gen2(vSubArguments[0]->GenSlidingWindowDeclRef(false),
1828 vSubArguments[1]->GenSlidingWindowDeclRef(false)) << ";\n\t";
1829 ss << "return tmp;\n}";
1831 virtual bool takeString() const SAL_OVERRIDE { return true; }
1832 virtual bool takeNumeric() const SAL_OVERRIDE { return true; }
1835 class SumOfProduct : public SlidingFunctionBase
1837 public:
1838 virtual void GenSlidingWindowFunction( std::stringstream& ss,
1839 const std::string& sSymName, SubArguments& vSubArguments ) SAL_OVERRIDE
1841 size_t nCurWindowSize = 0;
1842 FormulaToken* tmpCur = NULL;
1843 const formula::DoubleVectorRefToken* pCurDVR = NULL;
1844 ss << "\ndouble " << sSymName;
1845 ss << "_" << BinFuncName() << "(";
1846 for (unsigned i = 0; i < vSubArguments.size(); i++)
1848 if (i)
1849 ss << ",";
1850 vSubArguments[i]->GenSlidingWindowDecl(ss);
1851 size_t nCurChildWindowSize = vSubArguments[i]->GetWindowSize();
1852 nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
1853 nCurChildWindowSize : nCurWindowSize;
1854 tmpCur = vSubArguments[i]->GetFormulaToken();
1855 if (ocPush == tmpCur->GetOpCode())
1858 pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
1859 if (!
1860 ((!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1861 || (pCurDVR->IsStartFixed() && pCurDVR->IsEndFixed()))
1863 throw Unhandled();
1866 ss << ") {\n";
1867 ss << " double tmp = 0.0;\n";
1868 ss << " int gid0 = get_global_id(0);\n";
1870 ss << "\tint i;\n\t";
1871 ss << "int currentCount0;\n";
1872 for (unsigned i = 0; i < vSubArguments.size() - 1; i++)
1873 ss << "int currentCount" << i + 1 << ";\n";
1874 std::stringstream temp3, temp4;
1875 int outLoopSize = UNROLLING_FACTOR;
1876 if (nCurWindowSize / outLoopSize != 0)
1878 ss << "for(int outLoop=0; outLoop<" <<
1879 nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1880 for (int count = 0; count < outLoopSize; count++)
1882 ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n";
1883 if (count == 0)
1885 for (unsigned i = 0; i < vSubArguments.size(); i++)
1887 tmpCur = vSubArguments[i]->GetFormulaToken();
1888 if (ocPush == tmpCur->GetOpCode())
1890 pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
1891 if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1893 temp3 << " currentCount";
1894 temp3 << i;
1895 temp3 << " =i+gid0+1;\n";
1897 else
1899 temp3 << " currentCount";
1900 temp3 << i;
1901 temp3 << " =i+1;\n";
1906 temp3 << "tmp = fsum(";
1907 for (unsigned i = 0; i < vSubArguments.size(); i++)
1909 if (i)
1910 temp3 << "*";
1911 if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1913 temp3 << "(";
1914 temp3 << "(currentCount";
1915 temp3 << i;
1916 temp3 << ">";
1917 if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1918 formula::svSingleVectorRef)
1920 const formula::SingleVectorRefToken* pSVR =
1921 static_cast<const formula::SingleVectorRefToken*>
1922 (vSubArguments[i]->GetFormulaToken());
1923 temp3 << pSVR->GetArrayLength();
1924 temp3 << ")||isNan(" << vSubArguments[i]
1925 ->GenSlidingWindowDeclRef();
1926 temp3 << ")?0:";
1927 temp3 << vSubArguments[i]->GenSlidingWindowDeclRef();
1928 temp3 << ")";
1930 else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1931 formula::svDoubleVectorRef)
1933 const formula::DoubleVectorRefToken* pSVR =
1934 static_cast<const formula::DoubleVectorRefToken*>
1935 (vSubArguments[i]->GetFormulaToken());
1936 temp3 << pSVR->GetArrayLength();
1937 temp3 << ")||isNan(" << vSubArguments[i]
1938 ->GenSlidingWindowDeclRef(true);
1939 temp3 << ")?0:";
1940 temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1941 temp3 << ")";
1945 else
1946 temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1948 temp3 << ", tmp);\n\t";
1950 ss << temp3.str();
1952 ss << "}\n\t";
1954 //The residual of mod outLoopSize
1955 for (unsigned int count = nCurWindowSize / outLoopSize * outLoopSize;
1956 count < nCurWindowSize; count++)
1958 ss << "i =" << count << ";\n";
1959 if (count == nCurWindowSize / outLoopSize * outLoopSize)
1961 for (unsigned i = 0; i < vSubArguments.size(); i++)
1963 tmpCur = vSubArguments[i]->GetFormulaToken();
1964 if (ocPush == tmpCur->GetOpCode())
1966 pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
1967 if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1969 temp4 << " currentCount";
1970 temp4 << i;
1971 temp4 << " =i+gid0+1;\n";
1973 else
1975 temp4 << " currentCount";
1976 temp4 << i;
1977 temp4 << " =i+1;\n";
1982 temp4 << "tmp = fsum(";
1983 for (unsigned i = 0; i < vSubArguments.size(); i++)
1985 if (i)
1986 temp4 << "*";
1987 if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1989 temp4 << "(";
1990 temp4 << "(currentCount";
1991 temp4 << i;
1992 temp4 << ">";
1993 if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1994 formula::svSingleVectorRef)
1996 const formula::SingleVectorRefToken* pSVR =
1997 static_cast<const formula::SingleVectorRefToken*>
1998 (vSubArguments[i]->GetFormulaToken());
1999 temp4 << pSVR->GetArrayLength();
2000 temp4 << ")||isNan(" << vSubArguments[i]
2001 ->GenSlidingWindowDeclRef();
2002 temp4 << ")?0:";
2003 temp4 << vSubArguments[i]->GenSlidingWindowDeclRef();
2004 temp4 << ")";
2006 else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
2007 formula::svDoubleVectorRef)
2009 const formula::DoubleVectorRefToken* pSVR =
2010 static_cast<const formula::DoubleVectorRefToken*>
2011 (vSubArguments[i]->GetFormulaToken());
2012 temp4 << pSVR->GetArrayLength();
2013 temp4 << ")||isNan(" << vSubArguments[i]
2014 ->GenSlidingWindowDeclRef(true);
2015 temp4 << ")?0:";
2016 temp4 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
2017 temp4 << ")";
2021 else
2023 temp4 << vSubArguments[i]
2024 ->GenSlidingWindowDeclRef(true);
2027 temp4 << ", tmp);\n\t";
2029 ss << temp4.str();
2031 ss << "return tmp;\n";
2032 ss << "}";
2034 virtual bool takeString() const SAL_OVERRIDE { return false; }
2035 virtual bool takeNumeric() const SAL_OVERRIDE { return true; }
2038 /// operator traits
2039 class OpNop : public Reduction
2041 public:
2042 OpNop( int nResultSize ) : Reduction(nResultSize) {}
2044 virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2045 virtual std::string Gen2( const std::string& lhs, const std::string& ) const SAL_OVERRIDE
2047 return lhs;
2049 virtual std::string BinFuncName() const SAL_OVERRIDE { return "nop"; }
2052 class OpCount : public Reduction
2054 public:
2055 OpCount( int nResultSize ) : Reduction(nResultSize) {}
2057 virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2058 virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2060 std::stringstream ss;
2061 ss << "(isNan(" << lhs << ")?" << rhs << ":" << rhs << "+1.0)";
2062 return ss.str();
2064 virtual std::string BinFuncName() const SAL_OVERRIDE { return "fcount"; }
2067 class OpEqual : public Binary
2069 public:
2070 virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2071 virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2073 std::stringstream ss;
2074 ss << "strequal(" << lhs << "," << rhs << ")";
2075 return ss.str();
2077 virtual std::string BinFuncName() const SAL_OVERRIDE { return "eq"; }
2080 class OpLessEqual : public Binary
2082 public:
2083 virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2084 virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2086 std::stringstream ss;
2087 ss << "(" << lhs << "<=" << rhs << ")";
2088 return ss.str();
2090 virtual std::string BinFuncName() const SAL_OVERRIDE { return "leq"; }
2093 class OpLess : public Binary
2095 public:
2096 virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2097 virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2099 std::stringstream ss;
2100 ss << "(" << lhs << "<" << rhs << ")";
2101 return ss.str();
2103 virtual std::string BinFuncName() const SAL_OVERRIDE { return "less"; }
2106 class OpGreater : public Binary
2108 public:
2109 virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2110 virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2112 std::stringstream ss;
2113 ss << "(" << lhs << ">" << rhs << ")";
2114 return ss.str();
2116 virtual std::string BinFuncName() const SAL_OVERRIDE { return "gt"; }
2119 class OpSum : public Reduction
2121 public:
2122 OpSum( int nResultSize ) : Reduction(nResultSize) {}
2124 virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2125 virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2127 std::stringstream ss;
2128 ss << "((" << lhs << ")+(" << rhs << "))";
2129 return ss.str();
2131 virtual std::string BinFuncName() const SAL_OVERRIDE { return "fsum"; }
2134 class OpAverage : public Reduction
2136 public:
2137 OpAverage( int nResultSize ) : Reduction(nResultSize) {}
2139 virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2140 virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2142 std::stringstream ss;
2143 ss << "fsum_count(" << lhs << "," << rhs << ", &nCount)";
2144 return ss.str();
2146 virtual std::string BinFuncName() const SAL_OVERRIDE { return "average"; }
2147 virtual bool isAverage() const SAL_OVERRIDE { return true; }
2150 class OpSub : public Reduction
2152 public:
2153 OpSub( int nResultSize ) : Reduction(nResultSize) {}
2155 virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2156 virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2158 return lhs + "-" + rhs;
2160 virtual std::string BinFuncName() const SAL_OVERRIDE { return "fsub"; }
2163 class OpMul : public Reduction
2165 public:
2166 OpMul( int nResultSize ) : Reduction(nResultSize) {}
2168 virtual std::string GetBottom() SAL_OVERRIDE { return "1"; }
2169 virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2171 return lhs + "*" + rhs;
2173 virtual std::string BinFuncName() const SAL_OVERRIDE { return "fmul"; }
2174 virtual bool ZeroReturnZero() SAL_OVERRIDE { return true; }
2177 /// Technically not a reduction, but fits the framework.
2178 class OpDiv : public Reduction
2180 public:
2181 OpDiv( int nResultSize ) : Reduction(nResultSize) {}
2183 virtual std::string GetBottom() SAL_OVERRIDE { return "1.0"; }
2184 virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2186 return "(" + rhs + "==0 ? CreateDoubleError(errDivisionByZero) : (" + lhs + "/" + rhs + ") )";
2188 virtual std::string BinFuncName() const SAL_OVERRIDE { return "fdiv"; }
2190 virtual bool HandleNaNArgument( std::stringstream& ss, unsigned argno, SubArguments& vSubArguments ) const SAL_OVERRIDE
2192 if (argno == 1)
2194 ss <<
2195 "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ")) {\n"
2196 " return CreateDoubleError(errDivisionByZero);\n"
2197 "}\n";
2198 return true;
2200 else if (argno == 0)
2202 ss <<
2203 "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ") &&\n"
2204 " !(isnan(" << vSubArguments[1]->GenSlidingWindowDeclRef() << ") || " << vSubArguments[1]->GenSlidingWindowDeclRef() << " == 0)) {\n"
2205 " return 0;\n"
2206 "}\n";
2208 return false;
2213 class OpMin : public Reduction
2215 public:
2216 OpMin( int nResultSize ) : Reduction(nResultSize) {}
2218 virtual std::string GetBottom() SAL_OVERRIDE { return "NAN"; }
2219 virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2221 return "fmin_count(" + lhs + "," + rhs + ", &nCount)";
2223 virtual std::string BinFuncName() const SAL_OVERRIDE { return "min"; }
2224 virtual bool isMinOrMax() const SAL_OVERRIDE { return true; }
2227 class OpMax : public Reduction
2229 public:
2230 OpMax( int nResultSize ) : Reduction(nResultSize) {}
2232 virtual std::string GetBottom() SAL_OVERRIDE { return "NAN"; }
2233 virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2235 return "fmax_count(" + lhs + "," + rhs + ", &nCount)";
2237 virtual std::string BinFuncName() const SAL_OVERRIDE { return "max"; }
2238 virtual bool isMinOrMax() const SAL_OVERRIDE { return true; }
2241 class OpSumProduct : public SumOfProduct
2243 public:
2244 virtual std::string GetBottom() SAL_OVERRIDE { return "0"; }
2245 virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const SAL_OVERRIDE
2247 return lhs + "*" + rhs;
2249 virtual std::string BinFuncName() const SAL_OVERRIDE { return "fsop"; }
2251 namespace {
2252 struct SumIfsArgs
2254 SumIfsArgs( cl_mem x ) : mCLMem(x), mConst(0.0) { }
2255 SumIfsArgs( double x ) : mCLMem(NULL), mConst(x) { }
2256 cl_mem mCLMem;
2257 double mConst;
2261 /// Helper functions that have multiple buffers
2262 class DynamicKernelSoPArguments : public DynamicKernelArgument
2264 public:
2265 typedef std::vector<DynamicKernelArgumentRef> SubArgumentsType;
2267 DynamicKernelSoPArguments( const ScCalcConfig& config,
2268 const std::string& s, const FormulaTreeNodeRef& ft,
2269 SlidingFunctionBase* pCodeGen, int nResultSize );
2271 /// Create buffer and pass the buffer to a given kernel
2272 virtual size_t Marshal( cl_kernel k, int argno, int nVectorWidth, cl_program pProgram ) SAL_OVERRIDE
2274 unsigned i = 0;
2275 for (SubArgumentsType::iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2276 ++it)
2278 i += (*it)->Marshal(k, argno + i, nVectorWidth, pProgram);
2280 if (OpGeoMean* OpSumCodeGen = dynamic_cast<OpGeoMean*>(mpCodeGen.get()))
2282 // Obtain cl context
2283 ::opencl::KernelEnv kEnv;
2284 ::opencl::setKernelEnv(&kEnv);
2285 cl_int err;
2286 cl_mem pClmem2;
2288 if (OpSumCodeGen->NeedReductionKernel())
2290 std::vector<cl_mem> vclmem;
2291 for (SubArgumentsType::iterator it = mvSubArguments.begin(),
2292 e = mvSubArguments.end(); it != e; ++it)
2294 if (VectorRef* VR = dynamic_cast<VectorRef*>(it->get()))
2295 vclmem.push_back(VR->GetCLBuffer());
2296 else
2297 vclmem.push_back(NULL);
2299 pClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
2300 sizeof(double) * nVectorWidth, NULL, &err);
2301 if (CL_SUCCESS != err)
2302 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
2303 SAL_INFO("sc.opencl", "Created buffer " << pClmem2 << " size " << sizeof(double) << "*" << nVectorWidth << "=" << (sizeof(double)*nVectorWidth));
2305 std::string kernelName = "GeoMean_reduction";
2306 cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
2307 if (err != CL_SUCCESS)
2308 throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
2309 SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << pProgram);
2311 // set kernel arg of reduction kernel
2312 for (size_t j = 0; j < vclmem.size(); j++)
2314 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": " << (vclmem[j] ? "cl_mem" : "double") << ": " << vclmem[j]);
2315 err = clSetKernelArg(redKernel, j,
2316 vclmem[j] ? sizeof(cl_mem) : sizeof(double),
2317 (void*)&vclmem[j]);
2318 if (CL_SUCCESS != err)
2319 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2321 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << pClmem2);
2322 err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), (void*)&pClmem2);
2323 if (CL_SUCCESS != err)
2324 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2326 // set work group size and execute
2327 size_t global_work_size[] = { 256, (size_t)nVectorWidth };
2328 size_t local_work_size[] = { 256, 1 };
2329 SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
2330 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
2331 global_work_size, local_work_size, 0, NULL, NULL);
2332 if (CL_SUCCESS != err)
2333 throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2334 err = clFinish(kEnv.mpkCmdQueue);
2335 if (CL_SUCCESS != err)
2336 throw OpenCLError("clFinish", err, __FILE__, __LINE__);
2338 // Pass pClmem2 to the "real" kernel
2339 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << pClmem2);
2340 err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&pClmem2);
2341 if (CL_SUCCESS != err)
2342 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2345 if (OpSumIfs* OpSumCodeGen = dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
2347 // Obtain cl context
2348 ::opencl::KernelEnv kEnv;
2349 ::opencl::setKernelEnv(&kEnv);
2350 cl_int err;
2351 DynamicKernelArgument* Arg = mvSubArguments[0].get();
2352 DynamicKernelSlidingArgument<VectorRef>* slidingArgPtr =
2353 static_cast<DynamicKernelSlidingArgument<VectorRef>*>(Arg);
2354 mpClmem2 = NULL;
2356 if (OpSumCodeGen->NeedReductionKernel())
2358 size_t nInput = slidingArgPtr->GetArrayLength();
2359 size_t nCurWindowSize = slidingArgPtr->GetWindowSize();
2360 std::vector<SumIfsArgs> vclmem;
2362 for (SubArgumentsType::iterator it = mvSubArguments.begin(),
2363 e = mvSubArguments.end(); it != e; ++it)
2365 if (VectorRef* VR = dynamic_cast<VectorRef*>(it->get()))
2366 vclmem.push_back(SumIfsArgs(VR->GetCLBuffer()));
2367 else if (DynamicKernelConstantArgument* CA = dynamic_cast<DynamicKernelConstantArgument*>(it->get()))
2368 vclmem.push_back(SumIfsArgs(CA->GetDouble()));
2369 else
2370 vclmem.push_back(SumIfsArgs((cl_mem)NULL));
2372 mpClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
2373 sizeof(double) * nVectorWidth, NULL, &err);
2374 if (CL_SUCCESS != err)
2375 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
2376 SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << nVectorWidth << "=" << (sizeof(double)*nVectorWidth));
2378 std::string kernelName = mvSubArguments[0]->GetName() + "_SumIfs_reduction";
2379 cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
2380 if (err != CL_SUCCESS)
2381 throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
2382 SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << pProgram);
2384 // set kernel arg of reduction kernel
2385 for (size_t j = 0; j < vclmem.size(); j++)
2387 if (vclmem[j].mCLMem)
2388 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": cl_mem: " << vclmem[j].mCLMem);
2389 else
2390 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": double: " << vclmem[j].mConst);
2391 err = clSetKernelArg(redKernel, j,
2392 vclmem[j].mCLMem ? sizeof(cl_mem) : sizeof(double),
2393 vclmem[j].mCLMem ? (void*)&vclmem[j].mCLMem :
2394 (void*)&vclmem[j].mConst);
2395 if (CL_SUCCESS != err)
2396 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2398 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << mpClmem2);
2399 err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), (void*)&mpClmem2);
2400 if (CL_SUCCESS != err)
2401 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2403 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 1) << ": cl_int: " << nInput);
2404 err = clSetKernelArg(redKernel, vclmem.size() + 1, sizeof(cl_int), (void*)&nInput);
2405 if (CL_SUCCESS != err)
2406 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2408 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 2) << ": cl_int: " << nCurWindowSize);
2409 err = clSetKernelArg(redKernel, vclmem.size() + 2, sizeof(cl_int), (void*)&nCurWindowSize);
2410 if (CL_SUCCESS != err)
2411 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2412 // set work group size and execute
2413 size_t global_work_size[] = { 256, (size_t)nVectorWidth };
2414 size_t local_work_size[] = { 256, 1 };
2415 SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel);
2416 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
2417 global_work_size, local_work_size, 0, NULL, NULL);
2418 if (CL_SUCCESS != err)
2419 throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2421 err = clFinish(kEnv.mpkCmdQueue);
2422 if (CL_SUCCESS != err)
2423 throw OpenCLError("clFinish", err, __FILE__, __LINE__);
2425 SAL_INFO("sc.opencl", "Relasing kernel " << redKernel);
2426 err = clReleaseKernel(redKernel);
2427 SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseKernel failed: " << ::opencl::errorString(err));
2429 // Pass mpClmem2 to the "real" kernel
2430 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
2431 err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem2);
2432 if (CL_SUCCESS != err)
2433 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2436 return i;
2439 virtual void GenSlidingWindowFunction( std::stringstream& ss ) SAL_OVERRIDE
2441 for (unsigned i = 0; i < mvSubArguments.size(); i++)
2442 mvSubArguments[i]->GenSlidingWindowFunction(ss);
2443 mpCodeGen->GenSlidingWindowFunction(ss, mSymName, mvSubArguments);
2445 virtual void GenDeclRef( std::stringstream& ss ) const SAL_OVERRIDE
2447 for (unsigned i = 0; i < mvSubArguments.size(); i++)
2449 if (i)
2450 ss << ",";
2451 mvSubArguments[i]->GenDeclRef(ss);
2454 virtual void GenDecl( std::stringstream& ss ) const SAL_OVERRIDE
2456 for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2457 ++it)
2459 if (it != mvSubArguments.begin())
2460 ss << ", ";
2461 (*it)->GenDecl(ss);
2465 virtual size_t GetWindowSize() const SAL_OVERRIDE
2467 size_t nCurWindowSize = 0;
2468 for (unsigned i = 0; i < mvSubArguments.size(); i++)
2470 size_t nCurChildWindowSize = mvSubArguments[i]->GetWindowSize();
2471 nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
2472 nCurChildWindowSize : nCurWindowSize;
2474 return nCurWindowSize;
2477 /// When declared as input to a sliding window function
2478 virtual void GenSlidingWindowDecl( std::stringstream& ss ) const SAL_OVERRIDE
2480 for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2481 ++it)
2483 if (it != mvSubArguments.begin())
2484 ss << ", ";
2485 (*it)->GenSlidingWindowDecl(ss);
2488 /// Generate either a function call to each children
2489 /// or directly inline it if we are already inside a loop
2490 virtual std::string GenSlidingWindowDeclRef( bool nested = false ) const SAL_OVERRIDE
2492 std::stringstream ss;
2493 if (!nested)
2495 ss << mSymName << "_" << mpCodeGen->BinFuncName() << "(";
2496 for (unsigned i = 0; i < mvSubArguments.size(); i++)
2498 if (i)
2499 ss << ", ";
2500 mvSubArguments[i]->GenDeclRef(ss);
2502 ss << ")";
2504 else
2506 if (mvSubArguments.size() != 2)
2507 throw Unhandled();
2508 bool bArgument1_NeedNested =
2509 mvSubArguments[0]->GetFormulaToken()->GetType()
2510 != formula::svSingleVectorRef;
2511 bool bArgument2_NeedNested =
2512 mvSubArguments[1]->GetFormulaToken()->GetType()
2513 != formula::svSingleVectorRef;
2514 ss << "(";
2515 ss << mpCodeGen->
2516 Gen2(mvSubArguments[0]
2517 ->GenSlidingWindowDeclRef(bArgument1_NeedNested),
2518 mvSubArguments[1]
2519 ->GenSlidingWindowDeclRef(bArgument2_NeedNested));
2520 ss << ")";
2522 return ss.str();
2524 virtual std::string DumpOpName() const SAL_OVERRIDE
2526 std::string t = "_" + mpCodeGen->BinFuncName();
2527 for (unsigned i = 0; i < mvSubArguments.size(); i++)
2528 t = t + mvSubArguments[i]->DumpOpName();
2529 return t;
2531 virtual void DumpInlineFun( std::set<std::string>& decls,
2532 std::set<std::string>& funs ) const SAL_OVERRIDE
2534 mpCodeGen->BinInlineFun(decls, funs);
2535 for (unsigned i = 0; i < mvSubArguments.size(); i++)
2536 mvSubArguments[i]->DumpInlineFun(decls, funs);
2538 virtual ~DynamicKernelSoPArguments()
2540 if (mpClmem2)
2542 cl_int err;
2543 err = clReleaseMemObject(mpClmem2);
2544 SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err));
2545 mpClmem2 = NULL;
2549 private:
2550 SubArgumentsType mvSubArguments;
2551 boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
2552 cl_mem mpClmem2;
2555 DynamicKernelArgumentRef SoPHelper( const ScCalcConfig& config,
2556 const std::string& ts, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen,
2557 int nResultSize )
2559 return DynamicKernelArgumentRef(new DynamicKernelSoPArguments(config, ts, ft, pCodeGen, nResultSize));
2562 template<class Base>
2563 DynamicKernelArgument* VectorRefFactory( const ScCalcConfig& config, const std::string& s,
2564 const FormulaTreeNodeRef& ft,
2565 boost::shared_ptr<SlidingFunctionBase>& pCodeGen,
2566 int index )
2568 //Black lists ineligible classes here ..
2569 // SUMIFS does not perform parallel reduction at DoubleVectorRef level
2570 if (dynamic_cast<OpSumIfs*>(pCodeGen.get()))
2572 if (index == 0) // the first argument of OpSumIfs cannot be strings anyway
2573 return new DynamicKernelSlidingArgument<VectorRef>(config, s, ft, pCodeGen, index);
2574 return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2576 // AVERAGE is not supported yet
2577 //Average has been supported by reduction kernel
2578 /*else if (dynamic_cast<OpAverage*>(pCodeGen.get()))
2580 return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2582 // MUL is not supported yet
2583 else if (dynamic_cast<OpMul*>(pCodeGen.get()))
2585 return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2587 // Sub is not a reduction per se
2588 else if (dynamic_cast<OpSub*>(pCodeGen.get()))
2590 return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2592 // Only child class of Reduction is supported
2593 else if (!dynamic_cast<Reduction*>(pCodeGen.get()))
2595 return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2598 const formula::DoubleVectorRefToken* pDVR =
2599 static_cast<const formula::DoubleVectorRefToken*>(
2600 ft->GetFormulaToken());
2601 // Window being too small to justify a parallel reduction
2602 if (pDVR->GetRefRowSize() < REDUCE_THRESHOLD)
2603 return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2604 if ((pDVR->IsStartFixed() && pDVR->IsEndFixed()) ||
2605 (!pDVR->IsStartFixed() && !pDVR->IsEndFixed()))
2606 return new ParallelReductionVectorRef<Base>(config, s, ft, pCodeGen, index);
2607 else // Other cases are not supported as well
2608 return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2611 DynamicKernelSoPArguments::DynamicKernelSoPArguments(const ScCalcConfig& config,
2612 const std::string& s, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen, int nResultSize ) :
2613 DynamicKernelArgument(config, s, ft), mpCodeGen(pCodeGen), mpClmem2(NULL)
2615 size_t nChildren = ft->Children.size();
2617 for (unsigned i = 0; i < nChildren; i++)
2619 FormulaTreeNodeRef rChild = ft->Children[i];
2620 if (!rChild)
2621 throw Unhandled();
2622 FormulaToken* pChild = rChild->GetFormulaToken();
2623 if (!pChild)
2624 throw Unhandled();
2625 OpCode opc = pChild->GetOpCode();
2626 std::stringstream tmpname;
2627 tmpname << s << "_" << i;
2628 std::string ts = tmpname.str();
2629 switch (opc)
2631 case ocPush:
2632 if (pChild->GetType() == formula::svDoubleVectorRef)
2634 const formula::DoubleVectorRefToken* pDVR =
2635 static_cast<const formula::DoubleVectorRefToken*>(pChild);
2637 for (size_t j = 0; j < pDVR->GetArrays().size(); ++j)
2639 SAL_INFO("sc.opencl", "i=" << i << " j=" << j <<
2640 " mpNumericArray=" << pDVR->GetArrays()[j].mpNumericArray <<
2641 " mpStringArray=" << pDVR->GetArrays()[j].mpStringArray <<
2642 " allStringsAreNull=" << (AllStringsAreNull(pDVR->GetArrays()[j].mpStringArray, pDVR->GetArrayLength())?"YES":"NO") <<
2643 " takeNumeric=" << (pCodeGen->takeNumeric()?"YES":"NO") <<
2644 " takeString=" << (pCodeGen->takeString()?"YES":"NO"));
2646 if (pDVR->GetArrays()[j].mpNumericArray ||
2647 (pDVR->GetArrays()[j].mpNumericArray == NULL &&
2648 pDVR->GetArrays()[j].mpStringArray == NULL))
2650 if (pDVR->GetArrays()[j].mpNumericArray &&
2651 pCodeGen->takeNumeric() &&
2652 pDVR->GetArrays()[j].mpStringArray &&
2653 pCodeGen->takeString())
2655 // Function takes numbers or strings, there are both
2656 SAL_INFO("sc.opencl", "Numbers and strings and that is OK");
2657 mvSubArguments.push_back(
2658 DynamicKernelArgumentRef(
2659 new DynamicKernelMixedSlidingArgument(mCalcConfig,
2660 ts, ft->Children[i], mpCodeGen, j)));
2662 else
2664 // Not sure I can figure out what case this exactly is;)
2665 SAL_INFO("sc.opencl", "The other case");
2666 mvSubArguments.push_back(
2667 DynamicKernelArgumentRef(VectorRefFactory<VectorRef>(mCalcConfig,
2668 ts, ft->Children[i], mpCodeGen, j)));
2671 else
2673 // Ditto here. This is such crack.
2674 SAL_INFO("sc.opencl", "The outer other case (can't figure out what it exactly means)");
2675 mvSubArguments.push_back(
2676 DynamicKernelArgumentRef(VectorRefFactory
2677 <DynamicKernelStringArgument>(mCalcConfig,
2678 ts, ft->Children[i], mpCodeGen, j)));
2682 else if (pChild->GetType() == formula::svSingleVectorRef)
2684 const formula::SingleVectorRefToken* pSVR =
2685 static_cast<const formula::SingleVectorRefToken*>(pChild);
2687 SAL_INFO("sc.opencl", "i=" << i <<
2688 " mpNumericArray=" << pSVR->GetArray().mpNumericArray <<
2689 " mpStringArray=" << pSVR->GetArray().mpStringArray <<
2690 " allStringsAreNull=" << (AllStringsAreNull(pSVR->GetArray().mpStringArray, pSVR->GetArrayLength())?"YES":"NO") <<
2691 " takeNumeric=" << (pCodeGen->takeNumeric()?"YES":"NO") <<
2692 " takeString=" << (pCodeGen->takeString()?"YES":"NO"));
2694 if (pSVR->GetArray().mpNumericArray &&
2695 pCodeGen->takeNumeric() &&
2696 pSVR->GetArray().mpStringArray &&
2697 pCodeGen->takeString())
2699 // Function takes numbers or strings, there are both
2700 SAL_INFO("sc.opencl", "Numbers and strings and that is OK");
2701 mvSubArguments.push_back(
2702 DynamicKernelArgumentRef(new DynamicKernelMixedArgument(mCalcConfig,
2703 ts, ft->Children[i])));
2705 else if (pSVR->GetArray().mpNumericArray &&
2706 pCodeGen->takeNumeric() &&
2707 (AllStringsAreNull(pSVR->GetArray().mpStringArray, pSVR->GetArrayLength()) || mCalcConfig.meStringConversion == ScCalcConfig::StringConversion::ZERO))
2709 // Function takes numbers, and either there
2710 // are no strings, or there are strings but
2711 // they are to be treated as zero
2712 SAL_INFO("sc.opencl", "Maybe strings even if want numbers but should be treated as zero");
2713 mvSubArguments.push_back(
2714 DynamicKernelArgumentRef(new VectorRef(mCalcConfig, ts,
2715 ft->Children[i])));
2717 else if (pSVR->GetArray().mpNumericArray == NULL &&
2718 pCodeGen->takeNumeric() &&
2719 pSVR->GetArray().mpStringArray &&
2720 mCalcConfig.meStringConversion == ScCalcConfig::StringConversion::ZERO)
2722 // Function takes numbers, and there are only
2723 // strings, but they are to be treated as zero
2724 SAL_INFO("sc.opencl", "Only strings even if want numbers but should be treated as zero");
2725 mvSubArguments.push_back(
2726 DynamicKernelArgumentRef(new VectorRef(mCalcConfig, ts,
2727 ft->Children[i])));
2729 else if (pSVR->GetArray().mpStringArray &&
2730 pCodeGen->takeString())
2732 // There are strings, and the function takes strings.
2733 SAL_INFO("sc.opencl", "Strings only");
2734 mvSubArguments.push_back(
2735 DynamicKernelArgumentRef(new DynamicKernelStringArgument(mCalcConfig,
2736 ts, ft->Children[i])));
2738 else if (AllStringsAreNull(pSVR->GetArray().mpStringArray, pSVR->GetArrayLength()) &&
2739 pSVR->GetArray().mpNumericArray == NULL)
2741 // There are only empty cells. Push as an
2742 // array of NANs
2743 SAL_INFO("sc.opencl", "Only empty cells");
2744 mvSubArguments.push_back(
2745 DynamicKernelArgumentRef(new VectorRef(mCalcConfig, ts,
2746 ft->Children[i])));
2748 else
2750 SAL_INFO("sc.opencl", "Fallback case, rejecting for OpenCL");
2751 throw UnhandledToken(pChild,
2752 "Got unhandled case here", __FILE__, __LINE__);
2755 else if (pChild->GetType() == formula::svDouble)
2757 SAL_INFO("sc.opencl", "Constant number (?) case");
2758 mvSubArguments.push_back(
2759 DynamicKernelArgumentRef(new DynamicKernelConstantArgument(mCalcConfig, ts,
2760 ft->Children[i])));
2762 else if (pChild->GetType() == formula::svString
2763 && pCodeGen->takeString())
2765 SAL_INFO("sc.opencl", "Constant string (?) case");
2766 mvSubArguments.push_back(
2767 DynamicKernelArgumentRef(new ConstStringArgument(mCalcConfig, ts,
2768 ft->Children[i])));
2770 else
2772 SAL_INFO("sc.opencl", "Fallback case, rejecting for OpenCL");
2773 throw UnhandledToken(pChild, ("unhandled operand " + StackVarEnumToString(pChild->GetType()) + " for ocPush").c_str());
2775 break;
2776 case ocDiv:
2777 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpDiv(nResultSize), nResultSize));
2778 break;
2779 case ocMul:
2780 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpMul(nResultSize), nResultSize));
2781 break;
2782 case ocSub:
2783 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpSub(nResultSize), nResultSize));
2784 break;
2785 case ocAdd:
2786 case ocSum:
2787 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpSum(nResultSize), nResultSize));
2788 break;
2789 case ocAverage:
2790 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpAverage(nResultSize), nResultSize));
2791 break;
2792 case ocMin:
2793 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpMin(nResultSize), nResultSize));
2794 break;
2795 case ocMax:
2796 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpMax(nResultSize), nResultSize));
2797 break;
2798 case ocCount:
2799 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCount(nResultSize), nResultSize));
2800 break;
2801 case ocSumProduct:
2802 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpSumProduct, nResultSize));
2803 break;
2804 case ocIRR:
2805 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpIRR, nResultSize));
2806 break;
2807 case ocMIRR:
2808 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpMIRR, nResultSize));
2809 break;
2810 case ocPMT:
2811 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpPMT, nResultSize));
2812 break;
2813 case ocRate:
2814 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpIntrate, nResultSize));
2815 break;
2816 case ocRRI:
2817 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpRRI, nResultSize));
2818 break;
2819 case ocPpmt:
2820 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpPPMT, nResultSize));
2821 break;
2822 case ocFisher:
2823 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpFisher, nResultSize));
2824 break;
2825 case ocFisherInv:
2826 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpFisherInv, nResultSize));
2827 break;
2828 case ocGamma:
2829 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpGamma, nResultSize));
2830 break;
2831 case ocSLN:
2832 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpSLN, nResultSize));
2833 break;
2834 case ocGammaLn:
2835 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpGammaLn, nResultSize));
2836 break;
2837 case ocGauss:
2838 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpGauss, nResultSize));
2839 break;
2840 /*case ocGeoMean:
2841 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpGeoMean));
2842 break;*/
2843 case ocHarMean:
2844 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpHarMean, nResultSize));
2845 break;
2846 case ocLessEqual:
2847 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpLessEqual, nResultSize));
2848 break;
2849 case ocLess:
2850 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpLess, nResultSize));
2851 break;
2852 case ocEqual:
2853 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpEqual, nResultSize));
2854 break;
2855 case ocGreater:
2856 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpGreater, nResultSize));
2857 break;
2858 case ocSYD:
2859 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpSYD, nResultSize));
2860 break;
2861 case ocCorrel:
2862 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCorrel, nResultSize));
2863 break;
2864 case ocCos:
2865 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCos, nResultSize));
2866 break;
2867 case ocNegBinomVert :
2868 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpNegbinomdist, nResultSize));
2869 break;
2870 case ocPearson:
2871 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpPearson, nResultSize));
2872 break;
2873 case ocRSQ:
2874 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpRsq, nResultSize));
2875 break;
2876 case ocCosecant:
2877 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCsc, nResultSize));
2878 break;
2879 case ocISPMT:
2880 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpISPMT, nResultSize));
2881 break;
2882 case ocDuration:
2883 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2884 ft->Children[i], new OpDuration, nResultSize));
2885 break;
2886 case ocSinHyp:
2887 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2888 ft->Children[i], new OpSinh, nResultSize));
2889 break;
2890 case ocAbs:
2891 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2892 ft->Children[i], new OpAbs, nResultSize));
2893 break;
2894 case ocPV:
2895 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2896 ft->Children[i], new OpPV, nResultSize));
2897 break;
2898 case ocSin:
2899 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2900 ft->Children[i], new OpSin, nResultSize));
2901 break;
2902 case ocTan:
2903 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2904 ft->Children[i], new OpTan, nResultSize));
2905 break;
2906 case ocTanHyp:
2907 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2908 ft->Children[i], new OpTanH, nResultSize));
2909 break;
2910 case ocStandard:
2911 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2912 ft->Children[i], new OpStandard, nResultSize));
2913 break;
2914 case ocWeibull:
2915 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2916 ft->Children[i], new OpWeibull, nResultSize));
2917 break;
2918 /*case ocMedian:
2919 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2920 ft->Children[i],new OpMedian));
2921 break;*/
2922 case ocDDB:
2923 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2924 ft->Children[i], new OpDDB, nResultSize));
2925 break;
2926 case ocFV:
2927 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2928 ft->Children[i], new OpFV, nResultSize));
2929 break;
2930 case ocSumIfs:
2931 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2932 ft->Children[i], new OpSumIfs, nResultSize));
2933 break;
2934 /*case ocVBD:
2935 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2936 ft->Children[i],new OpVDB));
2937 break;*/
2938 case ocKurt:
2939 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2940 ft->Children[i], new OpKurt, nResultSize));
2941 break;
2942 /*case ocNper:
2943 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2944 ft->Children[i], new OpNper));
2945 break;*/
2946 case ocNormDist:
2947 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2948 ft->Children[i], new OpNormdist, nResultSize));
2949 break;
2950 case ocArcCos:
2951 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2952 ft->Children[i], new OpArcCos, nResultSize));
2953 break;
2954 case ocSqrt:
2955 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2956 ft->Children[i], new OpSqrt, nResultSize));
2957 break;
2958 case ocArcCosHyp:
2959 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2960 ft->Children[i], new OpArcCosHyp, nResultSize));
2961 break;
2962 case ocNPV:
2963 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2964 ft->Children[i], new OpNPV, nResultSize));
2965 break;
2966 case ocStdNormDist:
2967 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2968 ft->Children[i], new OpNormsdist, nResultSize));
2969 break;
2970 case ocNormInv:
2971 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2972 ft->Children[i], new OpNorminv, nResultSize));
2973 break;
2974 case ocSNormInv:
2975 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2976 ft->Children[i], new OpNormsinv, nResultSize));
2977 break;
2978 case ocPermut:
2979 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2980 ft->Children[i], new OpPermut, nResultSize));
2981 break;
2982 case ocPermutationA:
2983 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2984 ft->Children[i], new OpPermutationA, nResultSize));
2985 break;
2986 case ocPhi:
2987 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2988 ft->Children[i], new OpPhi, nResultSize));
2989 break;
2990 case ocIpmt:
2991 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2992 ft->Children[i], new OpIPMT, nResultSize));
2993 break;
2994 case ocConfidence:
2995 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2996 ft->Children[i], new OpConfidence, nResultSize));
2997 break;
2998 case ocIntercept:
2999 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3000 ft->Children[i], new OpIntercept, nResultSize));
3001 break;
3002 case ocDB:
3003 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3004 new OpDB, nResultSize));
3005 break;
3006 case ocLogInv:
3007 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3008 ft->Children[i], new OpLogInv, nResultSize));
3009 break;
3010 case ocArcCot:
3011 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3012 ft->Children[i], new OpArcCot, nResultSize));
3013 break;
3014 case ocCosHyp:
3015 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3016 ft->Children[i], new OpCosh, nResultSize));
3017 break;
3018 case ocCritBinom:
3019 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3020 ft->Children[i], new OpCritBinom, nResultSize));
3021 break;
3022 case ocArcCotHyp:
3023 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3024 ft->Children[i], new OpArcCotHyp, nResultSize));
3025 break;
3026 case ocArcSin:
3027 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3028 ft->Children[i], new OpArcSin, nResultSize));
3029 break;
3030 case ocArcSinHyp:
3031 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3032 ft->Children[i], new OpArcSinHyp, nResultSize));
3033 break;
3034 case ocArcTan:
3035 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3036 ft->Children[i], new OpArcTan, nResultSize));
3037 break;
3038 case ocArcTanHyp:
3039 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3040 ft->Children[i], new OpArcTanH, nResultSize));
3041 break;
3042 case ocBitAnd:
3043 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3044 ft->Children[i], new OpBitAnd, nResultSize));
3045 break;
3046 case ocForecast:
3047 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3048 ft->Children[i], new OpForecast, nResultSize));
3049 break;
3050 case ocLogNormDist:
3051 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3052 ft->Children[i], new OpLogNormDist, nResultSize));
3053 break;
3054 /*case ocGammaDist:
3055 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3056 ft->Children[i], new OpGammaDist));
3057 break;*/
3058 case ocLn:
3059 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3060 ft->Children[i], new OpLn, nResultSize));
3061 break;
3062 case ocRound:
3063 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3064 ft->Children[i], new OpRound, nResultSize));
3065 break;
3066 case ocCot:
3067 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3068 ft->Children[i], new OpCot, nResultSize));
3069 break;
3070 case ocCotHyp:
3071 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3072 ft->Children[i], new OpCoth, nResultSize));
3073 break;
3074 case ocFDist:
3075 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3076 ft->Children[i], new OpFdist, nResultSize));
3077 break;
3078 case ocVar:
3079 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3080 ft->Children[i], new OpVar, nResultSize));
3081 break;
3082 /*case ocChiDist:
3083 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3084 ft->Children[i],new OpChiDist));
3085 break;*/
3086 case ocPow:
3087 case ocPower:
3088 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3089 ft->Children[i], new OpPower, nResultSize));
3090 break;
3091 case ocOdd:
3092 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3093 ft->Children[i], new OpOdd, nResultSize));
3094 break;
3095 /*case ocChiSqDist:
3096 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3097 ft->Children[i],new OpChiSqDist));
3098 break;
3099 case ocChiSqInv:
3100 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3101 ft->Children[i],new OpChiSqInv));
3102 break;
3103 case ocGammaInv:
3104 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3105 ft->Children[i], new OpGammaInv));
3106 break;*/
3107 case ocFloor:
3108 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3109 ft->Children[i], new OpFloor, nResultSize));
3110 break;
3111 /*case ocFInv:
3112 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3113 ft->Children[i], new OpFInv));
3114 break;*/
3115 case ocFTest:
3116 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3117 ft->Children[i], new OpFTest, nResultSize));
3118 break;
3119 case ocB:
3120 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3121 ft->Children[i], new OpB, nResultSize));
3122 break;
3123 case ocBetaDist:
3124 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3125 ft->Children[i], new OpBetaDist, nResultSize));
3126 break;
3127 case ocCosecantHyp:
3128 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3129 ft->Children[i], new OpCscH, nResultSize));
3130 break;
3131 case ocExp:
3132 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3133 ft->Children[i], new OpExp, nResultSize));
3134 break;
3135 case ocLog10:
3136 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3137 ft->Children[i], new OpLog10, nResultSize));
3138 break;
3139 case ocExpDist:
3140 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3141 ft->Children[i], new OpExponDist, nResultSize));
3142 break;
3143 case ocAverageIfs:
3144 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3145 ft->Children[i], new OpAverageIfs, nResultSize));
3146 break;
3147 case ocCountIfs:
3148 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3149 ft->Children[i], new OpCountIfs, nResultSize));
3150 break;
3151 case ocCombinA:
3152 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3153 ft->Children[i], new OpCombinA, nResultSize));
3154 break;
3155 case ocEven:
3156 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3157 ft->Children[i], new OpEven, nResultSize));
3158 break;
3159 case ocLog:
3160 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3161 ft->Children[i], new OpLog, nResultSize));
3162 break;
3163 case ocMod:
3164 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3165 ft->Children[i], new OpMod, nResultSize));
3166 break;
3167 case ocTrunc:
3168 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3169 ft->Children[i], new OpTrunc, nResultSize));
3170 break;
3171 case ocSkew:
3172 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3173 ft->Children[i], new OpSkew, nResultSize));
3174 break;
3175 case ocArcTan2:
3176 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3177 ft->Children[i], new OpArcTan2, nResultSize));
3178 break;
3179 case ocBitOr:
3180 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3181 ft->Children[i], new OpBitOr, nResultSize));
3182 break;
3183 case ocBitLshift:
3184 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3185 ft->Children[i], new OpBitLshift, nResultSize));
3186 break;
3187 case ocBitRshift:
3188 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3189 ft->Children[i], new OpBitRshift, nResultSize));
3190 break;
3191 case ocBitXor:
3192 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3193 ft->Children[i], new OpBitXor, nResultSize));
3194 break;
3195 /*case ocChiInv:
3196 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3197 ft->Children[i],new OpChiInv));
3198 break;*/
3199 case ocPoissonDist:
3200 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3201 ft->Children[i], new OpPoisson, nResultSize));
3202 break;
3203 case ocSumSQ:
3204 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3205 ft->Children[i], new OpSumSQ, nResultSize));
3206 break;
3207 case ocSkewp:
3208 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3209 ft->Children[i], new OpSkewp, nResultSize));
3210 break;
3211 case ocBinomDist:
3212 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3213 ft->Children[i], new OpBinomdist, nResultSize));
3214 break;
3215 case ocVarP:
3216 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3217 ft->Children[i], new OpVarP, nResultSize));
3218 break;
3219 case ocCeil:
3220 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3221 ft->Children[i], new OpCeil, nResultSize));
3222 break;
3223 case ocCombin:
3224 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3225 ft->Children[i], new OpCombin, nResultSize));
3226 break;
3227 case ocDevSq:
3228 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3229 ft->Children[i], new OpDevSq, nResultSize));
3230 break;
3231 case ocStDev:
3232 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3233 ft->Children[i], new OpStDev, nResultSize));
3234 break;
3235 case ocSlope:
3236 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3237 ft->Children[i], new OpSlope, nResultSize));
3238 break;
3239 case ocSTEYX:
3240 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3241 ft->Children[i], new OpSTEYX, nResultSize));
3242 break;
3243 case ocZTest:
3244 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3245 ft->Children[i], new OpZTest, nResultSize));
3246 break;
3247 case ocPi:
3248 mvSubArguments.push_back(
3249 DynamicKernelArgumentRef(new DynamicKernelPiArgument(mCalcConfig, ts,
3250 ft->Children[i])));
3251 break;
3252 case ocRandom:
3253 mvSubArguments.push_back(
3254 DynamicKernelArgumentRef(new DynamicKernelRandomArgument(mCalcConfig, ts,
3255 ft->Children[i])));
3256 break;
3257 case ocProduct:
3258 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3259 ft->Children[i], new OpProduct, nResultSize));
3260 break;
3261 /*case ocHypGeomDist:
3262 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3263 ft->Children[i],new OpHypGeomDist));
3264 break;*/
3265 case ocSumX2MY2:
3266 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3267 ft->Children[i], new OpSumX2MY2, nResultSize));
3268 break;
3269 case ocSumX2DY2:
3270 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3271 ft->Children[i], new OpSumX2PY2, nResultSize));
3272 break;
3273 /*case ocBetaInv:
3274 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3275 ft->Children[i],new OpBetainv));
3276 break;*/
3277 case ocTTest:
3278 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3279 ft->Children[i], new OpTTest, nResultSize));
3280 break;
3281 case ocTDist:
3282 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3283 ft->Children[i], new OpTDist, nResultSize));
3284 break;
3285 /*case ocTInv:
3286 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3287 ft->Children[i], new OpTInv));
3288 break;*/
3289 case ocSumXMY2:
3290 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3291 ft->Children[i], new OpSumXMY2, nResultSize));
3292 break;
3293 case ocStDevP:
3294 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3295 ft->Children[i], new OpStDevP, nResultSize));
3296 break;
3297 case ocCovar:
3298 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3299 ft->Children[i], new OpCovar, nResultSize));
3300 break;
3301 case ocAnd:
3302 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3303 ft->Children[i], new OpAnd, nResultSize));
3304 break;
3305 case ocVLookup:
3306 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3307 ft->Children[i], new OpVLookup, nResultSize));
3308 break;
3309 case ocOr:
3310 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3311 ft->Children[i], new OpOr, nResultSize));
3312 break;
3313 case ocNot:
3314 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3315 ft->Children[i], new OpNot, nResultSize));
3316 break;
3317 case ocXor:
3318 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3319 ft->Children[i], new OpXor, nResultSize));
3320 break;
3321 case ocDBMax:
3322 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3323 ft->Children[i], new OpDmax, nResultSize));
3324 break;
3325 case ocDBMin:
3326 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3327 ft->Children[i], new OpDmin, nResultSize));
3328 break;
3329 case ocDBProduct:
3330 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3331 ft->Children[i], new OpDproduct, nResultSize));
3332 break;
3333 case ocDBAverage:
3334 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3335 ft->Children[i], new OpDaverage, nResultSize));
3336 break;
3337 case ocDBStdDev:
3338 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3339 ft->Children[i], new OpDstdev, nResultSize));
3340 break;
3341 case ocDBStdDevP:
3342 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3343 ft->Children[i], new OpDstdevp, nResultSize));
3344 break;
3345 case ocDBSum:
3346 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3347 ft->Children[i], new OpDsum, nResultSize));
3348 break;
3349 case ocDBVar:
3350 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3351 ft->Children[i], new OpDvar, nResultSize));
3352 break;
3353 case ocDBVarP:
3354 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3355 ft->Children[i], new OpDvarp, nResultSize));
3356 break;
3357 case ocAverageIf:
3358 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3359 ft->Children[i], new OpAverageIf, nResultSize));
3360 break;
3361 case ocDBCount:
3362 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3363 ft->Children[i], new OpDcount, nResultSize));
3364 break;
3365 case ocDBCount2:
3366 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3367 ft->Children[i], new OpDcount2, nResultSize));
3368 break;
3369 case ocDeg:
3370 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3371 ft->Children[i], new OpDeg, nResultSize));
3372 break;
3373 case ocRoundUp:
3374 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3375 ft->Children[i], new OpRoundUp, nResultSize));
3376 break;
3377 case ocRoundDown:
3378 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3379 ft->Children[i], new OpRoundDown, nResultSize));
3380 break;
3381 case ocInt:
3382 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3383 ft->Children[i], new OpInt, nResultSize));
3384 break;
3385 case ocRad:
3386 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3387 ft->Children[i], new OpRadians, nResultSize));
3388 break;
3389 case ocCountIf:
3390 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3391 ft->Children[i], new OpCountIf, nResultSize));
3392 break;
3393 case ocIsEven:
3394 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3395 ft->Children[i], new OpIsEven, nResultSize));
3396 break;
3397 case ocIsOdd:
3398 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3399 ft->Children[i], new OpIsOdd, nResultSize));
3400 break;
3401 case ocFact:
3402 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3403 ft->Children[i], new OpFact, nResultSize));
3404 break;
3405 case ocMinA:
3406 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3407 ft->Children[i], new OpMinA, nResultSize));
3408 break;
3409 case ocCount2:
3410 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3411 ft->Children[i], new OpCountA, nResultSize));
3412 break;
3413 case ocMaxA:
3414 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3415 ft->Children[i], new OpMaxA, nResultSize));
3416 break;
3417 case ocAverageA:
3418 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3419 ft->Children[i], new OpAverageA, nResultSize));
3420 break;
3421 case ocVarA:
3422 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3423 ft->Children[i], new OpVarA, nResultSize));
3424 break;
3425 case ocVarPA:
3426 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3427 ft->Children[i], new OpVarPA, nResultSize));
3428 break;
3429 case ocStDevA:
3430 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3431 ft->Children[i], new OpStDevA, nResultSize));
3432 break;
3433 case ocStDevPA:
3434 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3435 ft->Children[i], new OpStDevPA, nResultSize));
3436 break;
3437 case ocSecant:
3438 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3439 ft->Children[i], new OpSec, nResultSize));
3440 break;
3441 case ocSecantHyp:
3442 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3443 ft->Children[i], new OpSecH, nResultSize));
3444 break;
3445 case ocSumIf:
3446 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3447 ft->Children[i], new OpSumIf, nResultSize));
3448 break;
3449 case ocNegSub:
3450 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3451 ft->Children[i], new OpNegSub, nResultSize));
3452 break;
3453 case ocAveDev:
3454 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3455 ft->Children[i], new OpAveDev, nResultSize));
3456 break;
3457 case ocIf:
3458 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3459 ft->Children[i], new OpIf, nResultSize));
3460 break;
3461 case ocExternal:
3462 if (!(pChild->GetExternal().compareTo(OUString(
3463 "com.sun.star.sheet.addin.Analysis.getEffect"))))
3465 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpEffective, nResultSize));
3467 else if (!(pChild->GetExternal().compareTo(OUString(
3468 "com.sun.star.sheet.addin.Analysis.getCumipmt"))))
3470 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCumipmt, nResultSize));
3472 else if (!(pChild->GetExternal().compareTo(OUString(
3473 "com.sun.star.sheet.addin.Analysis.getNominal"))))
3475 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpNominal, nResultSize));
3477 else if (!(pChild->GetExternal().compareTo(OUString(
3478 "com.sun.star.sheet.addin.Analysis.getCumprinc"))))
3480 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCumprinc, nResultSize));
3482 else if (!(pChild->GetExternal().compareTo(OUString(
3483 "com.sun.star.sheet.addin.Analysis.getXnpv"))))
3485 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpXNPV, nResultSize));
3487 else if (!(pChild->GetExternal().compareTo(OUString(
3488 "com.sun.star.sheet.addin.Analysis.getPricemat"))))
3490 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpPriceMat, nResultSize));
3492 else if (!(pChild->GetExternal().compareTo(OUString(
3493 "com.sun.star.sheet.addin.Analysis.getReceived"))))
3495 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpReceived, nResultSize));
3497 else if (!(pChild->GetExternal().compareTo(OUString(
3498 "com.sun.star.sheet.addin.Analysis.getTbilleq"))))
3500 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpTbilleq, nResultSize));
3502 else if (!(pChild->GetExternal().compareTo(OUString(
3503 "com.sun.star.sheet.addin.Analysis.getTbillprice"))))
3505 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpTbillprice, nResultSize));
3507 else if (!(pChild->GetExternal().compareTo(OUString(
3508 "com.sun.star.sheet.addin.Analysis.getTbillyield"))))
3510 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpTbillyield, nResultSize));
3512 else if (!(pChild->GetExternal().compareTo(OUString(
3513 "com.sun.star.sheet.addin.Analysis.getFvschedule"))))
3515 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpFvschedule, nResultSize));
3517 /*else if ( !(pChild->GetExternal().compareTo(OUString(
3518 "com.sun.star.sheet.addin.Analysis.getYield"))))
3520 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpYield));
3522 else if (!(pChild->GetExternal().compareTo(OUString(
3523 "com.sun.star.sheet.addin.Analysis.getYielddisc"))))
3525 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpYielddisc, nResultSize));
3527 else if (!(pChild->GetExternal().compareTo(OUString(
3528 "com.sun.star.sheet.addin.Analysis.getYieldmat"))))
3530 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpYieldmat, nResultSize));
3532 else if (!(pChild->GetExternal().compareTo(OUString(
3533 "com.sun.star.sheet.addin.Analysis.getAccrintm"))))
3535 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpAccrintm, nResultSize));
3537 else if (!(pChild->GetExternal().compareTo(OUString(
3538 "com.sun.star.sheet.addin.Analysis.getCoupdaybs"))))
3540 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCoupdaybs, nResultSize));
3542 else if (!(pChild->GetExternal().compareTo(OUString(
3543 "com.sun.star.sheet.addin.Analysis.getDollarde"))))
3545 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpDollarde, nResultSize));
3547 else if (!(pChild->GetExternal().compareTo(OUString(
3548 "com.sun.star.sheet.addin.Analysis.getDollarfr"))))
3550 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpDollarfr, nResultSize));
3552 else if (!(pChild->GetExternal().compareTo(OUString(
3553 "com.sun.star.sheet.addin.Analysis.getCoupdays"))))
3555 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCoupdays, nResultSize));
3557 else if (!(pChild->GetExternal().compareTo(OUString(
3558 "com.sun.star.sheet.addin.Analysis.getCoupdaysnc"))))
3560 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpCoupdaysnc, nResultSize));
3562 else if (!(pChild->GetExternal().compareTo(OUString(
3563 "com.sun.star.sheet.addin.Analysis.getDisc"))))
3565 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpDISC, nResultSize));
3567 else if (!(pChild->GetExternal().compareTo(OUString(
3568 "com.sun.star.sheet.addin.Analysis.getIntrate"))))
3570 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpINTRATE, nResultSize));
3572 else if (!(pChild->GetExternal().compareTo(OUString(
3573 "com.sun.star.sheet.addin.Analysis.getPrice"))))
3575 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3576 ft->Children[i], new OpPrice, nResultSize));
3578 else if (!(pChild->GetExternal().compareTo(OUString(
3579 "com.sun.star.sheet.addin.Analysis.getCoupnum"))))
3581 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3582 new OpCoupnum, nResultSize));
3584 /*else if ( !(pChild->GetExternal().compareTo(OUString(
3585 "com.sun.star.sheet.addin.Analysis.getDuration"))))
3587 mvSubArguments.push_back(
3588 SoPHelper(mCalcConfig, ts, ft->Children[i], new OpDuration_ADD));
3590 else if (!(pChild->GetExternal().compareTo(OUString(
3591 "com.sun.star.sheet.addin.Analysis.getAmordegrc"))))
3593 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3594 new OpAmordegrc, nResultSize));
3596 else if (!(pChild->GetExternal().compareTo(OUString(
3597 "com.sun.star.sheet.addin.Analysis.getAmorlinc"))))
3599 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3600 new OpAmorlinc, nResultSize));
3602 else if (!(pChild->GetExternal().compareTo(OUString(
3603 "com.sun.star.sheet.addin.Analysis.getMduration"))))
3605 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3606 new OpMDuration, nResultSize));
3608 else if (!(pChild->GetExternal().compareTo(OUString(
3609 "com.sun.star.sheet.addin.Analysis.getXirr"))))
3611 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3612 new OpXirr, nResultSize));
3614 else if (!(pChild->GetExternal().compareTo(OUString(
3615 "com.sun.star.sheet.addin.Analysis.getOddlprice"))))
3617 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3618 ft->Children[i], new OpOddlprice, nResultSize));
3620 else if (!(pChild->GetExternal().compareTo(OUString(
3621 "com.sun.star.sheet.addin.Analysis.getOddlyield"))))
3623 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3624 new OpOddlyield, nResultSize));
3626 else if (!(pChild->GetExternal().compareTo(OUString(
3627 "com.sun.star.sheet.addin.Analysis.getPricedisc"))))
3629 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3630 ft->Children[i], new OpPriceDisc, nResultSize));
3632 else if (!(pChild->GetExternal().compareTo(OUString(
3633 "com.sun.star.sheet.addin.Analysis.getCouppcd"))))
3635 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3636 new OpCouppcd, nResultSize));
3638 else if (!(pChild->GetExternal().compareTo(OUString(
3639 "com.sun.star.sheet.addin.Analysis.getCoupncd"))))
3641 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3642 new OpCoupncd, nResultSize));
3644 else if (!(pChild->GetExternal().compareTo(OUString(
3645 "com.sun.star.sheet.addin.Analysis.getAccrint"))))
3647 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3648 new OpAccrint, nResultSize));
3650 else if (!(pChild->GetExternal().compareTo(OUString(
3651 "com.sun.star.sheet.addin.Analysis.getSqrtpi"))))
3653 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3654 new OpSqrtPi, nResultSize));
3656 else if (!(pChild->GetExternal().compareTo(OUString(
3657 "com.sun.star.sheet.addin.Analysis.getConvert"))))
3659 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3660 new OpConvert, nResultSize));
3662 else if (!(pChild->GetExternal().compareTo(OUString(
3663 "com.sun.star.sheet.addin.Analysis.getIseven"))))
3665 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3666 new OpIsEven, nResultSize));
3668 else if (!(pChild->GetExternal().compareTo(OUString(
3669 "com.sun.star.sheet.addin.Analysis.getIsodd"))))
3671 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3672 new OpIsOdd, nResultSize));
3674 else if (!(pChild->GetExternal().compareTo(OUString(
3675 "com.sun.star.sheet.addin.Analysis.getMround"))))
3677 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3678 new OpMROUND, nResultSize));
3680 else if (!(pChild->GetExternal().compareTo(OUString(
3681 "com.sun.star.sheet.addin.Analysis.getQuotient"))))
3683 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3684 new OpQuotient, nResultSize));
3686 else if (!(pChild->GetExternal().compareTo(OUString(
3687 "com.sun.star.sheet.addin.Analysis.getSeriessum"))))
3689 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3690 new OpSeriesSum, nResultSize));
3692 else if (!(pChild->GetExternal().compareTo(OUString(
3693 "com.sun.star.sheet.addin.Analysis.getBesselj"))))
3695 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3696 new OpBesselj, nResultSize));
3698 else if (!(pChild->GetExternal().compareTo(OUString(
3699 "com.sun.star.sheet.addin.Analysis.getGestep"))))
3701 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3702 new OpGestep, nResultSize));
3704 else
3705 throw UnhandledToken(pChild, "unhandled opcode");
3706 break;
3708 default:
3709 throw UnhandledToken(pChild, "unhandled opcode");
3714 class DynamicKernel : public CompiledFormula
3716 public:
3717 DynamicKernel( const ScCalcConfig& config, const FormulaTreeNodeRef& r, int nResultSize );
3718 virtual ~DynamicKernel();
3720 static DynamicKernel* create( const ScCalcConfig& config, ScTokenArray& rCode, int nResultSize );
3722 /// OpenCL code generation
3723 void CodeGen();
3725 /// Produce kernel hash
3726 std::string GetMD5();
3728 /// Create program, build, and create kerenl
3729 /// TODO cache results based on kernel body hash
3730 /// TODO: abstract OpenCL part out into OpenCL wrapper.
3731 void CreateKernel();
3733 /// Prepare buffers, marshal them to GPU, and launch the kernel
3734 /// TODO: abstract OpenCL part out into OpenCL wrapper.
3735 void Launch( size_t nr );
3737 cl_mem GetResultBuffer() const { return mpResClmem; }
3739 private:
3740 ScCalcConfig mCalcConfig;
3741 FormulaTreeNodeRef mpRoot;
3742 SymbolTable mSyms;
3743 std::string mKernelSignature, mKernelHash;
3744 std::string mFullProgramSrc;
3745 cl_program mpProgram;
3746 cl_kernel mpKernel;
3747 cl_mem mpResClmem; // Results
3748 std::set<std::string> inlineDecl;
3749 std::set<std::string> inlineFun;
3751 int mnResultSize;
3754 DynamicKernel::DynamicKernel( const ScCalcConfig& config, const FormulaTreeNodeRef& r, int nResultSize ) :
3755 mCalcConfig(config),
3756 mpRoot(r),
3757 mpProgram(NULL),
3758 mpKernel(NULL),
3759 mpResClmem(NULL),
3760 mnResultSize(nResultSize) {}
3762 DynamicKernel::~DynamicKernel()
3764 cl_int err;
3765 if (mpResClmem)
3767 err = clReleaseMemObject(mpResClmem);
3768 SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err));
3770 if (mpKernel)
3772 SAL_INFO("sc.opencl", "Releasing kernel " << mpKernel);
3773 err = clReleaseKernel(mpKernel);
3774 SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseKernel failed: " << ::opencl::errorString(err));
3776 // mpProgram is not going to be released here -- it's cached.
3779 void DynamicKernel::CodeGen()
3781 // Travese the tree of expression and declare symbols used
3782 const DynamicKernelArgument* DK = mSyms.DeclRefArg<DynamicKernelSoPArguments>(mCalcConfig, mpRoot, new OpNop(mnResultSize), mnResultSize);
3784 std::stringstream decl;
3785 if (::opencl::gpuEnv.mnKhrFp64Flag)
3787 decl << "#if __OPENCL_VERSION__ < 120\n";
3788 decl << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
3789 decl << "#endif\n";
3791 else if (::opencl::gpuEnv.mnAmdFp64Flag)
3793 decl << "#pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
3795 // preambles
3796 decl << publicFunc;
3797 DK->DumpInlineFun(inlineDecl, inlineFun);
3798 for (std::set<std::string>::iterator set_iter = inlineDecl.begin();
3799 set_iter != inlineDecl.end(); ++set_iter)
3801 decl << *set_iter;
3804 for (std::set<std::string>::iterator set_iter = inlineFun.begin();
3805 set_iter != inlineFun.end(); ++set_iter)
3807 decl << *set_iter;
3809 mSyms.DumpSlidingWindowFunctions(decl);
3810 mKernelSignature = DK->DumpOpName();
3811 decl << "__kernel void DynamicKernel" << mKernelSignature;
3812 decl << "(__global double *result, ";
3813 DK->GenSlidingWindowDecl(decl);
3814 decl << ") {\n\tint gid0 = get_global_id(0);\n\tresult[gid0] = " <<
3815 DK->GenSlidingWindowDeclRef(false) << ";\n}\n";
3816 mFullProgramSrc = decl.str();
3817 #ifdef SAL_DETAIL_ENABLE_LOG_INFO
3818 std::stringstream area;
3819 if (mKernelSignature[0] == '_')
3820 area << "sc.opencl.source." << mKernelSignature.substr(1, std::string::npos);
3821 else
3822 area << "sc.opencl.source." << mKernelSignature;
3823 SAL_INFO(area.str().c_str(), "Program to be compiled:\n" << linenumberify(mFullProgramSrc));
3824 #endif
3827 std::string DynamicKernel::GetMD5()
3829 if (mKernelHash.empty())
3831 std::stringstream md5s;
3832 // Compute MD5SUM of kernel body to obtain the name
3833 sal_uInt8 result[RTL_DIGEST_LENGTH_MD5];
3834 rtl_digest_MD5(
3835 mFullProgramSrc.c_str(),
3836 mFullProgramSrc.length(), result,
3837 RTL_DIGEST_LENGTH_MD5);
3838 for (int i = 0; i < RTL_DIGEST_LENGTH_MD5; i++)
3840 md5s << std::hex << (int)result[i];
3842 mKernelHash = md5s.str();
3844 return mKernelHash;
3847 /// Build code
3848 void DynamicKernel::CreateKernel()
3850 if (mpKernel)
3851 // already created.
3852 return;
3854 cl_int err;
3855 std::string kname = "DynamicKernel" + mKernelSignature;
3856 // Compile kernel here!!!
3857 // Obtain cl context
3858 ::opencl::KernelEnv kEnv;
3859 ::opencl::setKernelEnv(&kEnv);
3860 const char* src = mFullProgramSrc.c_str();
3861 static std::string lastOneKernelHash = "";
3862 static std::string lastSecondKernelHash = "";
3863 static cl_program lastOneProgram = NULL;
3864 static cl_program lastSecondProgram = NULL;
3865 std::string KernelHash = mKernelSignature + GetMD5();
3866 if (lastOneKernelHash == KernelHash && lastOneProgram)
3868 mpProgram = lastOneProgram;
3870 else if (lastSecondKernelHash == KernelHash && lastSecondProgram)
3872 mpProgram = lastSecondProgram;
3874 else
3875 { // doesn't match the last compiled formula.
3877 if (lastSecondProgram)
3879 SAL_INFO("sc.opencl", "Releasing program " << lastSecondProgram);
3880 err = clReleaseProgram(lastSecondProgram);
3881 SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseProgram failed: " << ::opencl::errorString(err));
3882 lastSecondProgram = NULL;
3884 if (::opencl::buildProgramFromBinary("",
3885 &::opencl::gpuEnv, KernelHash.c_str(), 0))
3887 mpProgram = ::opencl::gpuEnv.mpArryPrograms[0];
3888 ::opencl::gpuEnv.mpArryPrograms[0] = NULL;
3890 else
3892 mpProgram = clCreateProgramWithSource(kEnv.mpkContext, 1,
3893 &src, NULL, &err);
3894 if (err != CL_SUCCESS)
3895 throw OpenCLError("clCreateProgramWithSource", err, __FILE__, __LINE__);
3896 SAL_INFO("sc.opencl", "Created program " << mpProgram);
3898 err = clBuildProgram(mpProgram, 1,
3899 ::opencl::gpuEnv.mpArryDevsID, "", NULL, NULL);
3900 if (err != CL_SUCCESS)
3902 #if OSL_DEBUG_LEVEL > 0
3903 if (err == CL_BUILD_PROGRAM_FAILURE)
3905 cl_build_status stat;
3906 cl_int e = clGetProgramBuildInfo(
3907 mpProgram, ::opencl::gpuEnv.mpArryDevsID[0],
3908 CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status),
3909 &stat, 0);
3910 SAL_WARN_IF(
3911 e != CL_SUCCESS, "sc.opencl",
3912 "after CL_BUILD_PROGRAM_FAILURE,"
3913 " clGetProgramBuildInfo(CL_PROGRAM_BUILD_STATUS)"
3914 " fails with " << ::opencl::errorString(e));
3915 if (e == CL_SUCCESS)
3917 size_t n;
3918 e = clGetProgramBuildInfo(
3919 mpProgram, ::opencl::gpuEnv.mpArryDevsID[0],
3920 CL_PROGRAM_BUILD_LOG, 0, 0, &n);
3921 SAL_WARN_IF(
3922 e != CL_SUCCESS || n == 0, "sc.opencl",
3923 "after CL_BUILD_PROGRAM_FAILURE,"
3924 " clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG)"
3925 " fails with " << ::opencl::errorString(e) << ", n=" << n);
3926 if (e == CL_SUCCESS && n != 0)
3928 std::vector<char> log(n);
3929 e = clGetProgramBuildInfo(
3930 mpProgram, ::opencl::gpuEnv.mpArryDevsID[0],
3931 CL_PROGRAM_BUILD_LOG, n, &log[0], 0);
3932 SAL_WARN_IF(
3933 e != CL_SUCCESS || n == 0, "sc.opencl",
3934 "after CL_BUILD_PROGRAM_FAILURE,"
3935 " clGetProgramBuildInfo("
3936 "CL_PROGRAM_BUILD_LOG) fails with " << ::opencl::errorString(e));
3937 if (e == CL_SUCCESS)
3938 SAL_WARN(
3939 "sc.opencl",
3940 "CL_BUILD_PROGRAM_FAILURE, status " << stat
3941 << ", log \"" << &log[0] << "\"");
3945 #endif
3946 throw OpenCLError("clBuildProgram", err, __FILE__, __LINE__);
3948 SAL_INFO("sc.opencl", "Built program " << mpProgram);
3950 // Generate binary out of compiled kernel.
3951 ::opencl::generatBinFromKernelSource(mpProgram,
3952 (mKernelSignature + GetMD5()).c_str());
3954 lastSecondKernelHash = lastOneKernelHash;
3955 lastSecondProgram = lastOneProgram;
3956 lastOneKernelHash = KernelHash;
3957 lastOneProgram = mpProgram;
3959 mpKernel = clCreateKernel(mpProgram, kname.c_str(), &err);
3960 if (err != CL_SUCCESS)
3961 throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
3962 SAL_INFO("sc.opencl", "Created kernel " << mpKernel << " with name " << kname << " in program " << mpProgram);
3965 void DynamicKernel::Launch( size_t nr )
3967 // Obtain cl context
3968 ::opencl::KernelEnv kEnv;
3969 ::opencl::setKernelEnv(&kEnv);
3970 cl_int err;
3971 // The results
3972 mpResClmem = clCreateBuffer(kEnv.mpkContext,
3973 (cl_mem_flags)CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
3974 nr * sizeof(double), NULL, &err);
3975 if (CL_SUCCESS != err)
3976 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
3977 SAL_INFO("sc.opencl", "Created buffer " << mpResClmem << " size " << nr << "*" << sizeof(double) << "=" << (nr*sizeof(double)));
3979 SAL_INFO("sc.opencl", "Kernel " << mpKernel << " arg " << 0 << ": cl_mem: " << mpResClmem);
3980 err = clSetKernelArg(mpKernel, 0, sizeof(cl_mem), (void*)&mpResClmem);
3981 if (CL_SUCCESS != err)
3982 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
3983 // The rest of buffers
3984 mSyms.Marshal(mpKernel, nr, mpProgram);
3985 size_t global_work_size[] = { nr };
3986 SAL_INFO("sc.opencl", "Enqueing kernel " << mpKernel);
3987 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, mpKernel, 1, NULL,
3988 global_work_size, NULL, 0, NULL, NULL);
3989 if (CL_SUCCESS != err)
3990 throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
3991 err = clFlush(kEnv.mpkCmdQueue);
3992 if (CL_SUCCESS != err)
3993 throw OpenCLError("clFlush", err, __FILE__, __LINE__);
3996 // Symbol lookup. If there is no such symbol created, allocate one
3997 // kernel with argument with unique name and return so.
3998 // The template argument T must be a subclass of DynamicKernelArgument
3999 template<typename T>
4000 const DynamicKernelArgument* SymbolTable::DeclRefArg( const ScCalcConfig& config,
4001 FormulaTreeNodeRef t, SlidingFunctionBase* pCodeGen, int nResultSize )
4003 FormulaToken* ref = t->GetFormulaToken();
4004 ArgumentMap::iterator it = mSymbols.find(ref);
4005 if (it == mSymbols.end())
4007 // Allocate new symbols
4008 std::stringstream ss;
4009 ss << "tmp" << mCurId++;
4010 DynamicKernelArgumentRef new_arg(new T(config, ss.str(), t, pCodeGen, nResultSize));
4011 mSymbols[ref] = new_arg;
4012 mParams.push_back(new_arg);
4013 return new_arg.get();
4015 else
4017 return it->second.get();
4021 FormulaGroupInterpreterOpenCL::FormulaGroupInterpreterOpenCL() :
4022 FormulaGroupInterpreter() {}
4024 FormulaGroupInterpreterOpenCL::~FormulaGroupInterpreterOpenCL() {}
4026 ScMatrixRef FormulaGroupInterpreterOpenCL::inverseMatrix( const ScMatrix& )
4028 return NULL;
4031 DynamicKernel* DynamicKernel::create( const ScCalcConfig& rConfig, ScTokenArray& rCode, int nResultSize )
4033 // Constructing "AST"
4034 FormulaTokenIterator aCode(rCode);
4035 std::list<FormulaToken*> aTokenList;
4036 std::map<FormulaToken*, FormulaTreeNodeRef> aHashMap;
4037 FormulaToken* pCur;
4038 while ((pCur = const_cast<FormulaToken*>(aCode.Next())) != NULL)
4040 OpCode eOp = pCur->GetOpCode();
4041 if (eOp != ocPush)
4043 FormulaTreeNodeRef pCurNode(new FormulaTreeNode(pCur));
4044 sal_uInt8 nParamCount = pCur->GetParamCount();
4045 for (sal_uInt8 i = 0; i < nParamCount; i++)
4047 FormulaToken* pTempFormula = aTokenList.back();
4048 aTokenList.pop_back();
4049 if (pTempFormula->GetOpCode() != ocPush)
4051 if (aHashMap.find(pTempFormula) == aHashMap.end())
4052 return NULL;
4053 pCurNode->Children.push_back(aHashMap[pTempFormula]);
4055 else
4057 FormulaTreeNodeRef pChildTreeNode =
4058 FormulaTreeNodeRef(
4059 new FormulaTreeNode(pTempFormula));
4060 pCurNode->Children.push_back(pChildTreeNode);
4063 std::reverse(pCurNode->Children.begin(), pCurNode->Children.end());
4064 aHashMap[pCur] = pCurNode;
4066 aTokenList.push_back(pCur);
4069 FormulaTreeNodeRef Root = FormulaTreeNodeRef(new FormulaTreeNode(NULL));
4070 Root->Children.push_back(aHashMap[aTokenList.back()]);
4072 DynamicKernel* pDynamicKernel = new DynamicKernel(rConfig, Root, nResultSize);
4074 // OpenCL source code generation and kernel compilation
4077 pDynamicKernel->CodeGen();
4078 pDynamicKernel->CreateKernel();
4080 catch (const UnhandledToken& ut)
4082 SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled token: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
4083 delete pDynamicKernel;
4084 return NULL;
4086 catch (...)
4088 SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled compiler error");
4089 return NULL;
4091 return pDynamicKernel;
4094 CompiledFormula* FormulaGroupInterpreterOpenCL::createCompiledFormula(
4095 ScFormulaCellGroup& rGroup, ScTokenArray& rCode )
4097 return DynamicKernel::create(maCalcConfig, rCode, rGroup.mnLength);
4100 namespace {
4102 class CLInterpreterResult
4104 DynamicKernel* mpKernel;
4106 SCROW mnGroupLength;
4108 cl_mem mpCLResBuf;
4109 double* mpResBuf;
4111 public:
4112 CLInterpreterResult() : mpKernel(NULL), mnGroupLength(0), mpCLResBuf(NULL), mpResBuf(NULL) {}
4113 CLInterpreterResult( DynamicKernel* pKernel, SCROW nGroupLength ) :
4114 mpKernel(pKernel), mnGroupLength(nGroupLength), mpCLResBuf(NULL), mpResBuf(NULL) {}
4116 bool isValid() const { return mpKernel != NULL; }
4118 void fetchResultFromKernel()
4120 if (!isValid())
4121 return;
4123 // Map results back
4124 mpCLResBuf = mpKernel->GetResultBuffer();
4126 // Obtain cl context
4127 ::opencl::KernelEnv kEnv;
4128 ::opencl::setKernelEnv(&kEnv);
4130 cl_int err;
4131 mpResBuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
4132 mpCLResBuf,
4133 CL_TRUE, CL_MAP_READ, 0,
4134 mnGroupLength * sizeof(double), 0, NULL, NULL,
4135 &err));
4137 if (err != CL_SUCCESS)
4139 SAL_WARN("sc.opencl", "clEnqueueMapBuffer failed:: " << ::opencl::errorString(err));
4140 mpResBuf = NULL;
4141 return;
4145 bool pushResultToDocument( ScDocument& rDoc, const ScAddress& rTopPos )
4147 if (!mpResBuf)
4148 return false;
4150 rDoc.SetFormulaResults(rTopPos, mpResBuf, mnGroupLength);
4152 // Obtain cl context
4153 ::opencl::KernelEnv kEnv;
4154 ::opencl::setKernelEnv(&kEnv);
4156 cl_int err;
4157 err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpCLResBuf, mpResBuf, 0, NULL, NULL);
4159 if (err != CL_SUCCESS)
4161 SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << ::opencl::errorString(err));
4162 return false;
4165 return true;
4169 class CLInterpreterContext
4171 std::shared_ptr<DynamicKernel> mpKernelStore; /// for managed kernel instance.
4172 DynamicKernel* mpKernel;
4174 SCROW mnGroupLength;
4176 public:
4177 CLInterpreterContext( SCROW nGroupLength ) :
4178 mpKernel(NULL), mnGroupLength(nGroupLength) {}
4180 bool isValid() const
4182 return mpKernel != NULL;
4185 void setManagedKernel( DynamicKernel* pKernel )
4187 mpKernelStore.reset(pKernel);
4188 mpKernel = pKernel;
4191 #if ENABLE_THREADED_OPENCL_KERNEL_COMPILATION
4192 void setUnmanagedKernel( DynamicKernel* pKernel )
4194 mpKernel = pKernel;
4196 #endif
4198 CLInterpreterResult launchKernel()
4200 if (!isValid())
4201 return CLInterpreterResult();
4205 // Run the kernel.
4206 mpKernel->Launch(mnGroupLength);
4208 catch (const UnhandledToken& ut)
4210 SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled token: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
4211 return CLInterpreterResult();
4213 catch (const OpenCLError& oce)
4215 SAL_WARN("sc.opencl", "Dynamic formula compiler: OpenCL error from " << oce.mFunction << ": " << ::opencl::errorString(oce.mError) << " at " << oce.mFile << ":" << oce.mLineNumber);
4216 return CLInterpreterResult();
4218 catch (const Unhandled& uh)
4220 SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled case at " << uh.mFile << ":" << uh.mLineNumber);
4221 return CLInterpreterResult();
4223 catch (...)
4225 SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled compiler error");
4226 return CLInterpreterResult();
4229 return CLInterpreterResult(mpKernel, mnGroupLength);
4234 CLInterpreterContext createCLInterpreterContext( const ScCalcConfig& rConfig,
4235 ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode )
4237 CLInterpreterContext aCxt(xGroup->mnLength);
4239 #if ENABLE_THREADED_OPENCL_KERNEL_COMPILATION
4240 if (rGroup.meKernelState == sc::OpenCLKernelCompilationScheduled ||
4241 rGroup.meKernelState == sc::OpenCLKernelBinaryCreated)
4243 if (rGroup.meKernelState == sc::OpenCLKernelCompilationScheduled)
4245 ScFormulaCellGroup::sxCompilationThread->maCompilationDoneCondition.wait();
4246 ScFormulaCellGroup::sxCompilationThread->maCompilationDoneCondition.reset();
4249 // Kernel instance is managed by the formula group.
4250 aCxt.setUnmanagedKernel(static_cast<DynamicKernel*>(xGroup->mpCompiledFormula));
4252 else
4254 assert(xGroup->meCalcState == sc::GroupCalcRunning);
4255 aCxt.setManagedKernel(static_cast<DynamicKernel*>(DynamicKernel::create(rConfig, rCode, xGroup->mnLength)));
4257 #else
4258 aCxt.setManagedKernel(static_cast<DynamicKernel*>(DynamicKernel::create(rConfig, rCode, xGroup->mnLength)));
4259 #endif
4261 return aCxt;
4264 void genRPNTokens( ScDocument& rDoc, const ScAddress& rTopPos, ScTokenArray& rCode )
4266 ScCompiler aComp(&rDoc, rTopPos, rCode);
4267 aComp.SetGrammar(rDoc.GetGrammar());
4268 // Disable special ordering for jump commands for the OpenCL interpreter.
4269 aComp.EnableJumpCommandReorder(false);
4270 aComp.CompileTokenArray(); // Regenerate RPN tokens.
4273 bool waitForResults()
4275 // Obtain cl context
4276 ::opencl::KernelEnv kEnv;
4277 ::opencl::setKernelEnv(&kEnv);
4279 cl_int err = clFinish(kEnv.mpkCmdQueue);
4280 if (err != CL_SUCCESS)
4281 SAL_WARN("sc.opencl", "clFinish failed: " << ::opencl::errorString(err));
4283 return err == CL_SUCCESS;
4288 bool FormulaGroupInterpreterOpenCL::interpret( ScDocument& rDoc,
4289 const ScAddress& rTopPos, ScFormulaCellGroupRef& xGroup,
4290 ScTokenArray& rCode )
4292 MergeCalcConfig(rDoc);
4294 genRPNTokens(rDoc, rTopPos, rCode);
4296 CLInterpreterContext aCxt = createCLInterpreterContext(maCalcConfig, xGroup, rCode);
4297 if (!aCxt.isValid())
4298 return false;
4300 CLInterpreterResult aRes = aCxt.launchKernel();
4301 if (!aRes.isValid())
4302 return false;
4304 if (!waitForResults())
4305 return false;
4307 aRes.fetchResultFromKernel();
4309 return aRes.pushResultToDocument(rDoc, rTopPos);
4312 }} // namespace sc::opencl
4314 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */