1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include "formulagroup.hxx"
11 #include "formulagroupcl.hxx"
12 #include "clkernelthread.hxx"
13 #include "grouptokenconverter.hxx"
14 #include "document.hxx"
15 #include "formulacell.hxx"
16 #include "tokenarray.hxx"
17 #include "compiler.hxx"
18 #include "interpre.hxx"
19 #include <formula/random.hxx>
20 #include <formula/vectortoken.hxx>
21 #include "scmatrix.hxx"
23 #include <opencl/openclwrapper.hxx>
25 #include "op_financial.hxx"
26 #include "op_database.hxx"
27 #include "op_math.hxx"
28 #include "op_logical.hxx"
29 #include "op_statistical.hxx"
30 #include "op_array.hxx"
31 #include "op_spreadsheet.hxx"
32 #include "op_addin.hxx"
35 #define REDUCE_THRESHOLD 201 // set to 4 for correctness testing. priority 1
36 #define UNROLLING_FACTOR 16 // set to 4 for correctness testing (if no reduce)
38 static const char* publicFunc
=
40 "#define errIllegalFPOperation 503 // #NUM!\n"
41 "#define errNoValue 519 // #VALUE!\n"
42 "#define errDivisionByZero 532 // #DIV/0!\n"
43 "#define NOTAVAILABLE 0x7fff // #N/A\n"
45 "double CreateDoubleError(ulong nErr)\n"
47 " return nan(nErr);\n"
50 "uint GetDoubleErrorValue(double fVal)\n"
52 " if (isfinite(fVal))\n"
55 " return errIllegalFPOperation; // normal INF\n"
56 " if (as_ulong(fVal) & 0XFFFF0000u)\n"
57 " return errNoValue; // just a normal NAN\n"
58 " return (as_ulong(fVal) & 0XFFFF); // any other error\n"
61 "int isNan(double a) { return isnan(a); }\n"
62 "double fsum_count(double a, double b, __private int *p) {\n"
63 " bool t = isNan(a);\n"
67 "double fmin_count(double a, double b, __private int *p) {\n"
68 " double result = fmin(a, b);\n"
69 " bool t = isnan(result);\n"
73 "double fmax_count(double a, double b, __private int *p) {\n"
74 " double result = fmax(a, b);\n"
75 " bool t = isnan(result);\n"
79 "double fsum(double a, double b) { return isNan(a)?b:a+b; }\n"
80 "double legalize(double a, double b) { return isNan(a)?b:a;}\n"
81 "double fsub(double a, double b) { return a-b; }\n"
82 "double fdiv(double a, double b) { return a/b; }\n"
83 "double strequal(unsigned a, unsigned b) { return (a==b)?1.0:0; }\n"
90 const unsigned long __nan
[2] = {0xffffffff, 0x7fffffff};
93 #define NAN (*(const double*) __nan)
103 #include <rtl/digest.h>
105 #include <boost/scoped_ptr.hpp>
106 #include <boost/scoped_array.hpp>
108 using namespace formula
;
110 namespace sc
{ namespace opencl
{
114 std::string
StackVarEnumToString(StackVar
const e
)
118 #define CASE(x) case sv##x: return #x
134 CASE(HybridValueCell
);
135 CASE(ExternalSingleRef
);
136 CASE(ExternalDoubleRef
);
138 CASE(SingleVectorRef
);
139 CASE(DoubleVectorRef
);
147 return std::to_string(static_cast<int>(e
));
150 #ifdef SAL_DETAIL_ENABLE_LOG_INFO
151 std::string
linenumberify(const std::string
& s
)
153 std::stringstream ss
;
157 while ((newline
= s
.find('\n', start
)) != std::string::npos
)
159 ss
<< "/*" << std::setw(4) << linenumber
++ << "*/ " << s
.substr(start
, newline
-start
+1);
162 if (start
< s
.size())
163 ss
<< "/*" << std::setw(4) << linenumber
++ << "*/ " << s
.substr(start
, std::string::npos
);
168 bool AllStringsAreNull(const rtl_uString
* const* pStringArray
, size_t nLength
)
170 if (pStringArray
== nullptr)
173 for (size_t i
= 0; i
< nLength
; i
++)
174 if (pStringArray
[i
] != nullptr)
181 } // anonymous namespace
183 /// Map the buffer used by an argument and do necessary argument setting
184 size_t VectorRef::Marshal( cl_kernel k
, int argno
, int, cl_program
)
186 FormulaToken
* ref
= mFormulaTree
->GetFormulaToken();
187 double* pHostBuffer
= NULL
;
188 size_t szHostBuffer
= 0;
189 if (ref
->GetType() == formula::svSingleVectorRef
)
191 const formula::SingleVectorRefToken
* pSVR
=
192 static_cast<const formula::SingleVectorRefToken
*>(ref
);
194 SAL_INFO("sc.opencl", "SingleVectorRef len=" << pSVR
->GetArrayLength() << " mpNumericArray=" << pSVR
->GetArray().mpNumericArray
<< " (mpStringArray=" << pSVR
->GetArray().mpStringArray
<< ")");
196 pHostBuffer
= const_cast<double*>(pSVR
->GetArray().mpNumericArray
);
197 szHostBuffer
= pSVR
->GetArrayLength() * sizeof(double);
199 else if (ref
->GetType() == formula::svDoubleVectorRef
)
201 const formula::DoubleVectorRefToken
* pDVR
=
202 static_cast<const formula::DoubleVectorRefToken
*>(ref
);
204 SAL_INFO("sc.opencl", "DoubleVectorRef index=" << mnIndex
<< " len=" << pDVR
->GetArrayLength() << " mpNumericArray=" << pDVR
->GetArrays()[mnIndex
].mpNumericArray
<< " (mpStringArray=" << pDVR
->GetArrays()[mnIndex
].mpStringArray
<< ")");
206 pHostBuffer
= const_cast<double*>(
207 pDVR
->GetArrays()[mnIndex
].mpNumericArray
);
208 szHostBuffer
= pDVR
->GetArrayLength() * sizeof(double);
215 ::opencl::KernelEnv kEnv
;
216 ::opencl::setKernelEnv(&kEnv
);
220 mpClmem
= clCreateBuffer(kEnv
.mpkContext
,
221 (cl_mem_flags
)CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
,
224 if (CL_SUCCESS
!= err
)
225 throw OpenCLError("clCreateBuffer", err
, __FILE__
, __LINE__
);
226 SAL_INFO("sc.opencl", "Created buffer " << mpClmem
<< " size " << szHostBuffer
<< " using host buffer " << pHostBuffer
);
230 if (szHostBuffer
== 0)
231 szHostBuffer
= sizeof(double); // a dummy small value
232 // Marshal as a buffer of NANs
233 mpClmem
= clCreateBuffer(kEnv
.mpkContext
,
234 (cl_mem_flags
)CL_MEM_READ_ONLY
| CL_MEM_ALLOC_HOST_PTR
,
235 szHostBuffer
, NULL
, &err
);
236 if (CL_SUCCESS
!= err
)
237 throw OpenCLError("clCreateBuffer", err
, __FILE__
, __LINE__
);
238 SAL_INFO("sc.opencl", "Created buffer " << mpClmem
<< " size " << szHostBuffer
);
240 double* pNanBuffer
= static_cast<double*>(clEnqueueMapBuffer(
241 kEnv
.mpkCmdQueue
, mpClmem
, CL_TRUE
, CL_MAP_WRITE
, 0,
242 szHostBuffer
, 0, NULL
, NULL
, &err
));
243 if (CL_SUCCESS
!= err
)
244 throw OpenCLError("clEnqueueMapBuffer", err
, __FILE__
, __LINE__
);
246 for (size_t i
= 0; i
< szHostBuffer
/ sizeof(double); i
++)
248 err
= clEnqueueUnmapMemObject(kEnv
.mpkCmdQueue
, mpClmem
,
249 pNanBuffer
, 0, NULL
, NULL
);
250 // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
251 if (CL_SUCCESS
!= err
)
252 SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << ::opencl::errorString(err
));
255 SAL_INFO("sc.opencl", "Kernel " << k
<< " arg " << argno
<< ": cl_mem: " << mpClmem
);
256 err
= clSetKernelArg(k
, argno
, sizeof(cl_mem
), (void*)&mpClmem
);
257 if (CL_SUCCESS
!= err
)
258 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
262 /// Arguments that are actually compile-time constant string
263 /// Currently, only the hash is passed.
264 /// TBD(IJSUNG): pass also length and the actual string if there is a
265 /// hash function collision
266 class ConstStringArgument
: public DynamicKernelArgument
269 ConstStringArgument( const ScCalcConfig
& config
, const std::string
& s
,
270 FormulaTreeNodeRef ft
) :
271 DynamicKernelArgument(config
, s
, ft
) { }
272 /// Generate declaration
273 virtual void GenDecl( std::stringstream
& ss
) const SAL_OVERRIDE
275 ss
<< "unsigned " << mSymName
;
277 virtual void GenDeclRef( std::stringstream
& ss
) const SAL_OVERRIDE
279 ss
<< GenSlidingWindowDeclRef(false);
281 virtual void GenSlidingWindowDecl( std::stringstream
& ss
) const SAL_OVERRIDE
285 virtual std::string
GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
287 std::stringstream ss
;
288 if (GetFormulaToken()->GetType() != formula::svString
)
290 FormulaToken
* Tok
= GetFormulaToken();
291 ss
<< Tok
->GetString().getString().toAsciiUpperCase().hashCode() << "U";
294 virtual size_t GetWindowSize() const SAL_OVERRIDE
298 /// Pass the 32-bit hash of the string to the kernel
299 virtual size_t Marshal( cl_kernel k
, int argno
, int, cl_program
) SAL_OVERRIDE
301 FormulaToken
* ref
= mFormulaTree
->GetFormulaToken();
302 cl_uint hashCode
= 0;
303 if (ref
->GetType() == formula::svString
)
305 const rtl::OUString s
= ref
->GetString().getString().toAsciiUpperCase();
306 hashCode
= s
.hashCode();
313 // Pass the scalar result back to the rest of the formula kernel
314 SAL_INFO("sc.opencl", "Kernel " << k
<< " arg " << argno
<< ": cl_uint: " << hashCode
);
315 cl_int err
= clSetKernelArg(k
, argno
, sizeof(cl_uint
), (void*)&hashCode
);
316 if (CL_SUCCESS
!= err
)
317 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
322 /// Arguments that are actually compile-time constants
323 class DynamicKernelConstantArgument
: public DynamicKernelArgument
326 DynamicKernelConstantArgument( const ScCalcConfig
& config
, const std::string
& s
,
327 FormulaTreeNodeRef ft
) :
328 DynamicKernelArgument(config
, s
, ft
) { }
329 /// Generate declaration
330 virtual void GenDecl( std::stringstream
& ss
) const SAL_OVERRIDE
332 ss
<< "double " << mSymName
;
334 virtual void GenDeclRef( std::stringstream
& ss
) const SAL_OVERRIDE
338 virtual void GenSlidingWindowDecl( std::stringstream
& ss
) const SAL_OVERRIDE
342 virtual std::string
GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
344 if (GetFormulaToken()->GetType() != formula::svDouble
)
348 virtual size_t GetWindowSize() const SAL_OVERRIDE
352 double GetDouble() const
354 FormulaToken
* Tok
= GetFormulaToken();
355 if (Tok
->GetType() != formula::svDouble
)
357 return Tok
->GetDouble();
359 /// Create buffer and pass the buffer to a given kernel
360 virtual size_t Marshal( cl_kernel k
, int argno
, int, cl_program
) SAL_OVERRIDE
362 double tmp
= GetDouble();
363 // Pass the scalar result back to the rest of the formula kernel
364 SAL_INFO("sc.opencl", "Kernel " << k
<< " arg " << argno
<< ": double: " << tmp
);
365 cl_int err
= clSetKernelArg(k
, argno
, sizeof(double), (void*)&tmp
);
366 if (CL_SUCCESS
!= err
)
367 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
372 class DynamicKernelPiArgument
: public DynamicKernelArgument
375 DynamicKernelPiArgument( const ScCalcConfig
& config
, const std::string
& s
,
376 FormulaTreeNodeRef ft
) :
377 DynamicKernelArgument(config
, s
, ft
) { }
378 /// Generate declaration
379 virtual void GenDecl( std::stringstream
& ss
) const SAL_OVERRIDE
381 ss
<< "double " << mSymName
;
383 virtual void GenDeclRef( std::stringstream
& ss
) const SAL_OVERRIDE
385 ss
<< "3.14159265358979";
387 virtual void GenSlidingWindowDecl( std::stringstream
& ss
) const SAL_OVERRIDE
391 virtual std::string
GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
395 virtual size_t GetWindowSize() const SAL_OVERRIDE
399 /// Create buffer and pass the buffer to a given kernel
400 virtual size_t Marshal( cl_kernel k
, int argno
, int, cl_program
) SAL_OVERRIDE
403 // Pass the scalar result back to the rest of the formula kernel
404 SAL_INFO("sc.opencl", "Kernel " << k
<< " arg " << argno
<< ": double: " << tmp
);
405 cl_int err
= clSetKernelArg(k
, argno
, sizeof(double), (void*)&tmp
);
406 if (CL_SUCCESS
!= err
)
407 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
412 class DynamicKernelRandomArgument
: public DynamicKernelArgument
415 DynamicKernelRandomArgument( const ScCalcConfig
& config
, const std::string
& s
,
416 FormulaTreeNodeRef ft
) :
417 DynamicKernelArgument(config
, s
, ft
) { }
418 /// Generate declaration
419 virtual void GenDecl( std::stringstream
& ss
) const SAL_OVERRIDE
421 ss
<< "double " << mSymName
;
423 virtual void GenDeclRef( std::stringstream
& ss
) const SAL_OVERRIDE
427 virtual void GenSlidingWindowDecl( std::stringstream
& ss
) const SAL_OVERRIDE
429 ss
<< "int " << mSymName
;
431 virtual std::string
GenSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
433 return mSymName
+ "_Random(" + mSymName
+ ")";
435 virtual void GenSlidingWindowFunction( std::stringstream
& ss
) SAL_OVERRIDE
437 // This string is from the pi_opencl_kernel.i file as
438 // generated when building the Random123 examples. Unused
439 // stuff has been removed, and the actual kernel is not the
440 // same as in the totally different use case of that example,
441 // of course. Only the code that calculates the counter-based
442 // random number and what it needs is left.
445 #ifndef DEFINED_RANDOM123_STUFF\n\
446 #define DEFINED_RANDOM123_STUFF\n\
449 Copyright 2010-2011, D. E. Shaw Research.\n\
450 All rights reserved.\n\
452 Redistribution and use in source and binary forms, with or without\n\
453 modification, are permitted provided that the following conditions are\n\
456 * Redistributions of source code must retain the above copyright\n\
457 notice, this list of conditions, and the following disclaimer.\n\
459 * Redistributions in binary form must reproduce the above copyright\n\
460 notice, this list of conditions, and the following disclaimer in the\n\
461 documentation and/or other materials provided with the distribution.\n\
463 * Neither the name of D. E. Shaw Research nor the names of its\n\
464 contributors may be used to endorse or promote products derived from\n\
465 this software without specific prior written permission.\n\
467 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\
468 \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n\
469 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n\
470 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n\
471 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n\
472 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n\
473 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n\
474 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n\
475 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n\
476 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n\
477 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\
480 typedef uint uint32_t;\n\
481 struct r123array2x32\n\
485 enum r123_enum_threefry32x2\n\
496 inline uint32_t RotL_32 (uint32_t x, unsigned int N)\n\
497 __attribute__ ((always_inline));\n\
499 RotL_32 (uint32_t x, unsigned int N)\n\
501 return (x << (N & 31)) | (x >> ((32 - N) & 31));\n\
504 typedef struct r123array2x32 threefry2x32_ctr_t;\n\
505 typedef struct r123array2x32 threefry2x32_key_t;\n\
506 typedef struct r123array2x32 threefry2x32_ukey_t;\n\
507 inline threefry2x32_key_t\n\
508 threefry2x32keyinit (threefry2x32_ukey_t uk)\n\
513 inline threefry2x32_ctr_t threefry2x32_R (unsigned int Nrounds,\n\
514 threefry2x32_ctr_t in,\n\
515 threefry2x32_key_t k)\n\
516 __attribute__ ((always_inline));\n\
517 inline threefry2x32_ctr_t\n\
518 threefry2x32_R (unsigned int Nrounds, threefry2x32_ctr_t in,\n\
519 threefry2x32_key_t k)\n\
521 threefry2x32_ctr_t X;\n\
522 uint32_t ks[2 + 1];\n\
524 ks[2] = 0x1BD11BDA;\n\
525 for (i = 0; i < 2; i++) {\n\
532 if (Nrounds > 0) {\n\
534 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
537 if (Nrounds > 1) {\n\
539 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
542 if (Nrounds > 2) {\n\
544 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
547 if (Nrounds > 3) {\n\
549 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
552 if (Nrounds > 3) {\n\
557 if (Nrounds > 4) {\n\
559 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
562 if (Nrounds > 5) {\n\
564 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
567 if (Nrounds > 6) {\n\
569 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
572 if (Nrounds > 7) {\n\
574 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
577 if (Nrounds > 7) {\n\
582 if (Nrounds > 8) {\n\
584 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
587 if (Nrounds > 9) {\n\
589 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
592 if (Nrounds > 10) {\n\
594 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
597 if (Nrounds > 11) {\n\
599 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
602 if (Nrounds > 11) {\n\
607 if (Nrounds > 12) {\n\
609 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
612 if (Nrounds > 13) {\n\
614 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
617 if (Nrounds > 14) {\n\
619 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
622 if (Nrounds > 15) {\n\
624 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
627 if (Nrounds > 15) {\n\
632 if (Nrounds > 16) {\n\
634 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
637 if (Nrounds > 17) {\n\
639 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
642 if (Nrounds > 18) {\n\
644 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
647 if (Nrounds > 19) {\n\
649 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
652 if (Nrounds > 19) {\n\
657 if (Nrounds > 20) {\n\
659 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
662 if (Nrounds > 21) {\n\
664 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
667 if (Nrounds > 22) {\n\
669 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
672 if (Nrounds > 23) {\n\
674 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
677 if (Nrounds > 23) {\n\
682 if (Nrounds > 24) {\n\
684 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
687 if (Nrounds > 25) {\n\
689 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
692 if (Nrounds > 26) {\n\
694 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
697 if (Nrounds > 27) {\n\
699 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
702 if (Nrounds > 27) {\n\
707 if (Nrounds > 28) {\n\
709 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
712 if (Nrounds > 29) {\n\
714 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
717 if (Nrounds > 30) {\n\
719 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
722 if (Nrounds > 31) {\n\
724 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
727 if (Nrounds > 31) {\n\
735 enum r123_enum_threefry2x32\n\
736 { threefry2x32_rounds = 20 };\n\
737 inline threefry2x32_ctr_t threefry2x32 (threefry2x32_ctr_t in,\n\
738 threefry2x32_key_t k)\n\
739 __attribute__ ((always_inline));\n\
740 inline threefry2x32_ctr_t\n\
741 threefry2x32 (threefry2x32_ctr_t in, threefry2x32_key_t k)\n\
743 return threefry2x32_R (threefry2x32_rounds, in, k);\n\
748 ss
<< "double " << mSymName
<< "_Random (int seed)\n\
750 unsigned tid = get_global_id(0);\n\
751 threefry2x32_key_t k = { {tid, 0xdecafbad} };\n\
752 threefry2x32_ctr_t c = { {seed, 0xf00dcafe} };\n\
753 c = threefry2x32_R(threefry2x32_rounds, c, k);\n\
754 const double factor = 1./(" << SAL_MAX_UINT32
<< ".0 + 1.0);\n\
755 const double halffactor = 0.5*factor;\n\
756 return c.v[0] * factor + halffactor;\n\
760 virtual size_t GetWindowSize() const SAL_OVERRIDE
764 /// Create buffer and pass the buffer to a given kernel
765 virtual size_t Marshal( cl_kernel k
, int argno
, int, cl_program
) SAL_OVERRIDE
767 cl_int seed
= formula::rng::nRandom(0, SAL_MAX_INT32
);
768 // Pass the scalar result back to the rest of the formula kernel
769 SAL_INFO("sc.opencl", "Kernel " << k
<< " arg " << argno
<< ": cl_int: " << seed
);
770 cl_int err
= clSetKernelArg(k
, argno
, sizeof(cl_int
), (void*)&seed
);
771 if (CL_SUCCESS
!= err
)
772 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
777 /// A vector of strings
778 class DynamicKernelStringArgument
: public VectorRef
781 DynamicKernelStringArgument( const ScCalcConfig
& config
, const std::string
& s
,
782 FormulaTreeNodeRef ft
, int index
= 0 ) :
783 VectorRef(config
, s
, ft
, index
) { }
785 virtual void GenSlidingWindowFunction( std::stringstream
& ) SAL_OVERRIDE
{ }
786 /// Generate declaration
787 virtual void GenDecl( std::stringstream
& ss
) const SAL_OVERRIDE
789 ss
<< "__global unsigned int *" << mSymName
;
791 virtual void GenSlidingWindowDecl( std::stringstream
& ss
) const SAL_OVERRIDE
793 DynamicKernelStringArgument::GenDecl(ss
);
795 virtual size_t Marshal( cl_kernel
, int, int, cl_program
) SAL_OVERRIDE
;
798 /// Marshal a string vector reference
799 size_t DynamicKernelStringArgument::Marshal( cl_kernel k
, int argno
, int, cl_program
)
801 FormulaToken
* ref
= mFormulaTree
->GetFormulaToken();
803 ::opencl::KernelEnv kEnv
;
804 ::opencl::setKernelEnv(&kEnv
);
806 formula::VectorRefArray vRef
;
808 if (ref
->GetType() == formula::svSingleVectorRef
)
810 const formula::SingleVectorRefToken
* pSVR
=
811 static_cast<const formula::SingleVectorRefToken
*>(ref
);
812 nStrings
= pSVR
->GetArrayLength();
813 vRef
= pSVR
->GetArray();
815 else if (ref
->GetType() == formula::svDoubleVectorRef
)
817 const formula::DoubleVectorRefToken
* pDVR
=
818 static_cast<const formula::DoubleVectorRefToken
*>(ref
);
819 nStrings
= pDVR
->GetArrayLength();
820 vRef
= pDVR
->GetArrays()[mnIndex
];
822 size_t szHostBuffer
= nStrings
* sizeof(cl_int
);
823 cl_uint
* pHashBuffer
= NULL
;
825 if (vRef
.mpStringArray
!= NULL
)
827 // Marshal strings. Right now we pass hashes of these string
828 mpClmem
= clCreateBuffer(kEnv
.mpkContext
,
829 (cl_mem_flags
)CL_MEM_READ_ONLY
| CL_MEM_ALLOC_HOST_PTR
,
830 szHostBuffer
, NULL
, &err
);
831 if (CL_SUCCESS
!= err
)
832 throw OpenCLError("clCreateBuffer", err
, __FILE__
, __LINE__
);
833 SAL_INFO("sc.opencl", "Created buffer " << mpClmem
<< " size " << szHostBuffer
);
835 pHashBuffer
= static_cast<cl_uint
*>(clEnqueueMapBuffer(
836 kEnv
.mpkCmdQueue
, mpClmem
, CL_TRUE
, CL_MAP_WRITE
, 0,
837 szHostBuffer
, 0, NULL
, NULL
, &err
));
838 if (CL_SUCCESS
!= err
)
839 throw OpenCLError("clEnqueueMapBuffer", err
, __FILE__
, __LINE__
);
841 for (size_t i
= 0; i
< nStrings
; i
++)
843 if (vRef
.mpStringArray
[i
])
845 const OUString tmp
= OUString(vRef
.mpStringArray
[i
]);
846 pHashBuffer
[i
] = tmp
.hashCode();
857 szHostBuffer
= sizeof(cl_int
); // a dummy small value
858 // Marshal as a buffer of NANs
859 mpClmem
= clCreateBuffer(kEnv
.mpkContext
,
860 (cl_mem_flags
)CL_MEM_READ_ONLY
| CL_MEM_ALLOC_HOST_PTR
,
861 szHostBuffer
, NULL
, &err
);
862 if (CL_SUCCESS
!= err
)
863 throw OpenCLError("clCreateBuffer", err
, __FILE__
, __LINE__
);
864 SAL_INFO("sc.opencl", "Created buffer " << mpClmem
<< " size " << szHostBuffer
);
866 pHashBuffer
= static_cast<cl_uint
*>(clEnqueueMapBuffer(
867 kEnv
.mpkCmdQueue
, mpClmem
, CL_TRUE
, CL_MAP_WRITE
, 0,
868 szHostBuffer
, 0, NULL
, NULL
, &err
));
869 if (CL_SUCCESS
!= err
)
870 throw OpenCLError("clEnqueueMapBuffer", err
, __FILE__
, __LINE__
);
872 for (size_t i
= 0; i
< szHostBuffer
/ sizeof(cl_int
); i
++)
875 err
= clEnqueueUnmapMemObject(kEnv
.mpkCmdQueue
, mpClmem
,
876 pHashBuffer
, 0, NULL
, NULL
);
877 if (CL_SUCCESS
!= err
)
878 throw OpenCLError("clEnqueueUnmapMemObject", err
, __FILE__
, __LINE__
);
880 SAL_INFO("sc.opencl", "Kernel " << k
<< " arg " << argno
<< ": cl_mem: " << mpClmem
);
881 err
= clSetKernelArg(k
, argno
, sizeof(cl_mem
), (void*)&mpClmem
);
882 if (CL_SUCCESS
!= err
)
883 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
887 /// A mixed string/numberic vector
888 class DynamicKernelMixedArgument
: public VectorRef
891 DynamicKernelMixedArgument( const ScCalcConfig
& config
, const std::string
& s
,
892 FormulaTreeNodeRef ft
) :
893 VectorRef(config
, s
, ft
), mStringArgument(config
, s
+ "s", ft
) { }
894 virtual void GenSlidingWindowDecl( std::stringstream
& ss
) const SAL_OVERRIDE
896 VectorRef::GenSlidingWindowDecl(ss
);
898 mStringArgument
.GenSlidingWindowDecl(ss
);
900 virtual bool IsMixedArgument() const SAL_OVERRIDE
{ return true;}
901 virtual void GenSlidingWindowFunction( std::stringstream
& ) SAL_OVERRIDE
{ }
902 /// Generate declaration
903 virtual void GenDecl( std::stringstream
& ss
) const SAL_OVERRIDE
905 VectorRef::GenDecl(ss
);
907 mStringArgument
.GenDecl(ss
);
909 virtual void GenDeclRef( std::stringstream
& ss
) const SAL_OVERRIDE
911 VectorRef::GenDeclRef(ss
);
913 mStringArgument
.GenDeclRef(ss
);
915 virtual void GenNumDeclRef( std::stringstream
& ss
) const SAL_OVERRIDE
917 VectorRef::GenSlidingWindowDecl(ss
);
919 virtual void GenStringDeclRef( std::stringstream
& ss
) const SAL_OVERRIDE
921 mStringArgument
.GenSlidingWindowDecl(ss
);
923 virtual std::string
GenSlidingWindowDeclRef( bool nested
) const SAL_OVERRIDE
925 std::stringstream ss
;
926 ss
<< "(!isNan(" << VectorRef::GenSlidingWindowDeclRef();
927 ss
<< ")?" << VectorRef::GenSlidingWindowDeclRef();
928 ss
<< ":" << mStringArgument
.GenSlidingWindowDeclRef(nested
);
932 virtual std::string
GenDoubleSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
934 std::stringstream ss
;
935 ss
<< VectorRef::GenSlidingWindowDeclRef();
938 virtual std::string
GenStringSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
940 std::stringstream ss
;
941 ss
<< mStringArgument
.GenSlidingWindowDeclRef();
944 virtual size_t Marshal( cl_kernel k
, int argno
, int vw
, cl_program p
) SAL_OVERRIDE
946 int i
= VectorRef::Marshal(k
, argno
, vw
, p
);
947 i
+= mStringArgument
.Marshal(k
, argno
+ i
, vw
, p
);
952 DynamicKernelStringArgument mStringArgument
;
955 /// Handling a Double Vector that is used as a sliding window input
956 /// to either a sliding window average or sum-of-products
957 /// Generate a sequential loop for reductions
962 class DynamicKernelSlidingArgument
: public Base
965 DynamicKernelSlidingArgument( const ScCalcConfig
& config
, const std::string
& s
,
966 FormulaTreeNodeRef ft
, boost::shared_ptr
<SlidingFunctionBase
>& CodeGen
,
968 Base(config
, s
, ft
, index
), mpCodeGen(CodeGen
), mpClmem2(NULL
)
970 FormulaToken
* t
= ft
->GetFormulaToken();
971 if (t
->GetType() != formula::svDoubleVectorRef
)
973 mpDVR
= static_cast<const formula::DoubleVectorRefToken
*>(t
);
974 bIsStartFixed
= mpDVR
->IsStartFixed();
975 bIsEndFixed
= mpDVR
->IsEndFixed();
977 // Should only be called by SumIfs. Yikes!
978 virtual bool NeedParallelReduction() const
980 assert(dynamic_cast<OpSumIfs
*>(mpCodeGen
.get()));
981 return GetWindowSize() > 100 &&
982 ((GetStartFixed() && GetEndFixed()) ||
983 (!GetStartFixed() && !GetEndFixed()));
985 virtual void GenSlidingWindowFunction( std::stringstream
& ) { }
987 virtual std::string
GenSlidingWindowDeclRef( bool nested
= false ) const
989 size_t nArrayLength
= mpDVR
->GetArrayLength();
990 std::stringstream ss
;
991 if (!bIsStartFixed
&& !bIsEndFixed
)
994 ss
<< "((i+gid0) <" << nArrayLength
<< "?";
995 ss
<< Base::GetName() << "[i + gid0]";
1002 ss
<< "(i <" << nArrayLength
<< "?";
1003 ss
<< Base::GetName() << "[i]";
1009 /// Controls how the elements in the DoubleVectorRef are traversed
1010 virtual size_t GenReductionLoopHeader(
1011 std::stringstream
& ss
, bool& needBody
)
1014 size_t nCurWindowSize
= mpDVR
->GetRefRowSize();
1017 if (!mpDVR
->IsStartFixed() && mpDVR
->IsEndFixed())
1019 ss
<< "for (int i = ";
1020 ss
<< "gid0; i < " << mpDVR
->GetArrayLength();
1021 ss
<< " && i < " << nCurWindowSize
<< "; i++){\n\t\t";
1023 return nCurWindowSize
;
1025 else if (mpDVR
->IsStartFixed() && !mpDVR
->IsEndFixed())
1027 ss
<< "for (int i = ";
1028 ss
<< "0; i < " << mpDVR
->GetArrayLength();
1029 ss
<< " && i < gid0+" << nCurWindowSize
<< "; i++){\n\t\t";
1031 return nCurWindowSize
;
1033 else if (!mpDVR
->IsStartFixed() && !mpDVR
->IsEndFixed())
1035 ss
<< "tmpBottom = " << mpCodeGen
->GetBottom() << ";\n\t";
1036 ss
<< "{int i;\n\t";
1037 std::stringstream temp1
, temp2
;
1038 int outLoopSize
= UNROLLING_FACTOR
;
1039 if (nCurWindowSize
/ outLoopSize
!= 0)
1041 ss
<< "for(int outLoop=0; outLoop<" << nCurWindowSize
/ outLoopSize
<< "; outLoop++){\n\t";
1042 for (int count
= 0; count
< outLoopSize
; count
++)
1044 ss
<< "i = outLoop*" << outLoopSize
<< "+" << count
<< ";\n\t";
1047 temp1
<< "if(i + gid0 < " << mpDVR
->GetArrayLength();
1048 temp1
<< "){\n\t\t";
1049 temp1
<< "tmp = legalize(";
1050 temp1
<< mpCodeGen
->Gen2(GenSlidingWindowDeclRef(), "tmp");
1051 temp1
<< ", tmp);\n\t\t\t";
1058 // The residual of mod outLoopSize
1059 for (unsigned int count
= nCurWindowSize
/ outLoopSize
* outLoopSize
; count
< nCurWindowSize
; count
++)
1061 ss
<< "i = " << count
<< ";\n\t";
1062 if (count
== nCurWindowSize
/ outLoopSize
* outLoopSize
)
1064 temp2
<< "if(i + gid0 < " << mpDVR
->GetArrayLength();
1065 temp2
<< "){\n\t\t";
1066 temp2
<< "tmp = legalize(";
1067 temp2
<< mpCodeGen
->Gen2(GenSlidingWindowDeclRef(), "tmp");
1068 temp2
<< ", tmp);\n\t\t\t";
1075 return nCurWindowSize
;
1077 // (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1081 ss
<< "tmpBottom = " << mpCodeGen
->GetBottom() << ";\n\t";
1082 ss
<< "{int i;\n\t";
1083 std::stringstream temp1
, temp2
;
1084 int outLoopSize
= UNROLLING_FACTOR
;
1085 if (nCurWindowSize
/ outLoopSize
!= 0)
1087 ss
<< "for(int outLoop=0; outLoop<" << nCurWindowSize
/ outLoopSize
<< "; outLoop++){\n\t";
1088 for (int count
= 0; count
< outLoopSize
; count
++)
1090 ss
<< "i = outLoop*" << outLoopSize
<< "+" << count
<< ";\n\t";
1093 temp1
<< "tmp = legalize(";
1094 temp1
<< mpCodeGen
->Gen2(GenSlidingWindowDeclRef(), "tmp");
1095 temp1
<< ", tmp);\n\t\t\t";
1101 // The residual of mod outLoopSize
1102 for (unsigned int count
= nCurWindowSize
/ outLoopSize
* outLoopSize
; count
< nCurWindowSize
; count
++)
1104 ss
<< "i = " << count
<< ";\n\t";
1105 if (count
== nCurWindowSize
/ outLoopSize
* outLoopSize
)
1107 temp2
<< "tmp = legalize(";
1108 temp2
<< mpCodeGen
->Gen2(GenSlidingWindowDeclRef(), "tmp");
1109 temp2
<< ", tmp);\n\t\t\t";
1115 return nCurWindowSize
;
1119 ~DynamicKernelSlidingArgument()
1124 err
= clReleaseMemObject(mpClmem2
);
1125 SAL_WARN_IF(err
!= CL_SUCCESS
, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err
));
1130 size_t GetArrayLength() const { return mpDVR
->GetArrayLength(); }
1132 size_t GetWindowSize() const { return mpDVR
->GetRefRowSize(); }
1134 size_t GetStartFixed() const { return bIsStartFixed
; }
1136 size_t GetEndFixed() const { return bIsEndFixed
; }
1139 bool bIsStartFixed
, bIsEndFixed
;
1140 const formula::DoubleVectorRefToken
* mpDVR
;
1141 // from parent nodes
1142 boost::shared_ptr
<SlidingFunctionBase
> mpCodeGen
;
1143 // controls whether to invoke the reduction kernel during marshaling or not
1147 /// A mixed string/numberic vector
1148 class DynamicKernelMixedSlidingArgument
: public VectorRef
1151 DynamicKernelMixedSlidingArgument( const ScCalcConfig
& config
, const std::string
& s
,
1152 FormulaTreeNodeRef ft
, boost::shared_ptr
<SlidingFunctionBase
>& CodeGen
,
1154 VectorRef(config
, s
, ft
),
1155 mDoubleArgument(mCalcConfig
, s
, ft
, CodeGen
, index
),
1156 mStringArgument(mCalcConfig
, s
+ "s", ft
, CodeGen
, index
) { }
1157 virtual void GenSlidingWindowDecl( std::stringstream
& ss
) const SAL_OVERRIDE
1159 mDoubleArgument
.GenSlidingWindowDecl(ss
);
1161 mStringArgument
.GenSlidingWindowDecl(ss
);
1163 virtual void GenSlidingWindowFunction( std::stringstream
& ) SAL_OVERRIDE
{ }
1164 /// Generate declaration
1165 virtual void GenDecl( std::stringstream
& ss
) const SAL_OVERRIDE
1167 mDoubleArgument
.GenDecl(ss
);
1169 mStringArgument
.GenDecl(ss
);
1171 virtual void GenDeclRef( std::stringstream
& ss
) const SAL_OVERRIDE
1173 mDoubleArgument
.GenDeclRef(ss
);
1175 mStringArgument
.GenDeclRef(ss
);
1177 virtual std::string
GenSlidingWindowDeclRef( bool nested
) const SAL_OVERRIDE
1179 std::stringstream ss
;
1180 ss
<< "(!isNan(" << mDoubleArgument
.GenSlidingWindowDeclRef();
1181 ss
<< ")?" << mDoubleArgument
.GenSlidingWindowDeclRef();
1182 ss
<< ":" << mStringArgument
.GenSlidingWindowDeclRef(nested
);
1186 virtual bool IsMixedArgument() const SAL_OVERRIDE
{ return true;}
1187 virtual std::string
GenDoubleSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
1189 std::stringstream ss
;
1190 ss
<< mDoubleArgument
.GenSlidingWindowDeclRef();
1193 virtual std::string
GenStringSlidingWindowDeclRef( bool = false ) const SAL_OVERRIDE
1195 std::stringstream ss
;
1196 ss
<< mStringArgument
.GenSlidingWindowDeclRef();
1199 virtual void GenNumDeclRef( std::stringstream
& ss
) const SAL_OVERRIDE
1201 mDoubleArgument
.GenDeclRef(ss
);
1203 virtual void GenStringDeclRef( std::stringstream
& ss
) const SAL_OVERRIDE
1205 mStringArgument
.GenDeclRef(ss
);
1207 virtual size_t Marshal( cl_kernel k
, int argno
, int vw
, cl_program p
) SAL_OVERRIDE
1209 int i
= mDoubleArgument
.Marshal(k
, argno
, vw
, p
);
1210 i
+= mStringArgument
.Marshal(k
, argno
+ i
, vw
, p
);
1215 DynamicKernelSlidingArgument
<VectorRef
> mDoubleArgument
;
1216 DynamicKernelSlidingArgument
<DynamicKernelStringArgument
> mStringArgument
;
1219 /// Holds the symbol table for a given dynamic kernel
1223 typedef std::map
<const formula::FormulaToken
*, DynamicKernelArgumentRef
> ArgumentMap
;
1224 // This avoids instability caused by using pointer as the key type
1225 typedef std::list
<DynamicKernelArgumentRef
> ArgumentList
;
1226 SymbolTable() : mCurId(0) { }
1228 const DynamicKernelArgument
* DeclRefArg( const ScCalcConfig
& config
, FormulaTreeNodeRef
, SlidingFunctionBase
* pCodeGen
, int nResultSize
);
1229 /// Used to generate sliding window helpers
1230 void DumpSlidingWindowFunctions( std::stringstream
& ss
)
1232 for (ArgumentList::iterator it
= mParams
.begin(), e
= mParams
.end(); it
!= e
;
1235 (*it
)->GenSlidingWindowFunction(ss
);
1239 /// Memory mapping from host to device and pass buffers to the given kernel as
1241 void Marshal( cl_kernel
, int, cl_program
);
1244 unsigned int mCurId
;
1245 ArgumentMap mSymbols
;
1246 ArgumentList mParams
;
1249 void SymbolTable::Marshal( cl_kernel k
, int nVectorWidth
, cl_program pProgram
)
1251 int i
= 1; //The first argument is reserved for results
1252 for (ArgumentList::iterator it
= mParams
.begin(), e
= mParams
.end(); it
!= e
;
1255 i
+= (*it
)->Marshal(k
, i
, nVectorWidth
, pProgram
);
1259 /// Handling a Double Vector that is used as a sliding window input
1260 /// Performs parallel reduction based on given operator
1261 template<class Base
>
1262 class ParallelReductionVectorRef
: public Base
1265 ParallelReductionVectorRef( const ScCalcConfig
& config
, const std::string
& s
,
1266 FormulaTreeNodeRef ft
, boost::shared_ptr
<SlidingFunctionBase
>& CodeGen
,
1268 Base(config
, s
, ft
, index
), mpCodeGen(CodeGen
), mpClmem2(NULL
)
1270 FormulaToken
* t
= ft
->GetFormulaToken();
1271 if (t
->GetType() != formula::svDoubleVectorRef
)
1273 mpDVR
= static_cast<const formula::DoubleVectorRefToken
*>(t
);
1274 bIsStartFixed
= mpDVR
->IsStartFixed();
1275 bIsEndFixed
= mpDVR
->IsEndFixed();
1277 /// Emit the definition for the auxiliary reduction kernel
1278 virtual void GenSlidingWindowFunction( std::stringstream
& ss
)
1280 if (!dynamic_cast<OpAverage
*>(mpCodeGen
.get()))
1282 std::string name
= Base::GetName();
1283 ss
<< "__kernel void " << name
;
1284 ss
<< "_reduction(__global double* A, "
1285 "__global double *result,int arrayLength,int windowSize){\n";
1286 ss
<< " double tmp, current_result =" <<
1287 mpCodeGen
->GetBottom();
1289 ss
<< " int writePos = get_group_id(1);\n";
1290 ss
<< " int lidx = get_local_id(0);\n";
1291 ss
<< " __local double shm_buf[256];\n";
1292 if (mpDVR
->IsStartFixed())
1293 ss
<< " int offset = 0;\n";
1294 else // if (!mpDVR->IsStartFixed())
1295 ss
<< " int offset = get_group_id(1);\n";
1296 if (mpDVR
->IsStartFixed() && mpDVR
->IsEndFixed())
1297 ss
<< " int end = windowSize;\n";
1298 else if (!mpDVR
->IsStartFixed() && !mpDVR
->IsEndFixed())
1299 ss
<< " int end = offset + windowSize;\n";
1300 else if (mpDVR
->IsStartFixed() && !mpDVR
->IsEndFixed())
1301 ss
<< " int end = windowSize + get_group_id(1);\n";
1302 else if (!mpDVR
->IsStartFixed() && mpDVR
->IsEndFixed())
1303 ss
<< " int end = windowSize;\n";
1304 ss
<< " end = min(end, arrayLength);\n";
1306 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
1307 ss
<< " int loop = arrayLength/512 + 1;\n";
1308 ss
<< " for (int l=0; l<loop; l++){\n";
1309 ss
<< " tmp = " << mpCodeGen
->GetBottom() << ";\n";
1310 ss
<< " int loopOffset = l*512;\n";
1311 ss
<< " if((loopOffset + lidx + offset + 256) < end) {\n";
1312 ss
<< " tmp = legalize(" << mpCodeGen
->Gen2(
1313 "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
1314 ss
<< " tmp = legalize(" << mpCodeGen
->Gen2(
1315 "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n";
1316 ss
<< " } else if ((loopOffset + lidx + offset) < end)\n";
1317 ss
<< " tmp = legalize(" << mpCodeGen
->Gen2(
1318 "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
1319 ss
<< " shm_buf[lidx] = tmp;\n";
1320 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
1321 ss
<< " for (int i = 128; i >0; i/=2) {\n";
1322 ss
<< " if (lidx < i)\n";
1323 ss
<< " shm_buf[lidx] = ";
1324 // Special case count
1325 if (dynamic_cast<OpCount
*>(mpCodeGen
.get()))
1326 ss
<< "shm_buf[lidx] + shm_buf[lidx + i];\n";
1328 ss
<< mpCodeGen
->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n";
1329 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
1331 ss
<< " if (lidx == 0)\n";
1332 ss
<< " current_result =";
1333 if (dynamic_cast<OpCount
*>(mpCodeGen
.get()))
1334 ss
<< "current_result + shm_buf[0]";
1336 ss
<< mpCodeGen
->Gen2("current_result", "shm_buf[0]");
1338 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
1340 ss
<< " if (lidx == 0)\n";
1341 ss
<< " result[writePos] = current_result;\n";
1346 std::string name
= Base::GetName();
1348 ss
<< "__kernel void " << name
<< "_sum";
1349 ss
<< "_reduction(__global double* A, "
1350 "__global double *result,int arrayLength,int windowSize){\n";
1351 ss
<< " double tmp, current_result =" <<
1352 mpCodeGen
->GetBottom();
1354 ss
<< " int writePos = get_group_id(1);\n";
1355 ss
<< " int lidx = get_local_id(0);\n";
1356 ss
<< " __local double shm_buf[256];\n";
1357 if (mpDVR
->IsStartFixed())
1358 ss
<< " int offset = 0;\n";
1359 else // if (!mpDVR->IsStartFixed())
1360 ss
<< " int offset = get_group_id(1);\n";
1361 if (mpDVR
->IsStartFixed() && mpDVR
->IsEndFixed())
1362 ss
<< " int end = windowSize;\n";
1363 else if (!mpDVR
->IsStartFixed() && !mpDVR
->IsEndFixed())
1364 ss
<< " int end = offset + windowSize;\n";
1365 else if (mpDVR
->IsStartFixed() && !mpDVR
->IsEndFixed())
1366 ss
<< " int end = windowSize + get_group_id(1);\n";
1367 else if (!mpDVR
->IsStartFixed() && mpDVR
->IsEndFixed())
1368 ss
<< " int end = windowSize;\n";
1369 ss
<< " end = min(end, arrayLength);\n";
1370 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
1371 ss
<< " int loop = arrayLength/512 + 1;\n";
1372 ss
<< " for (int l=0; l<loop; l++){\n";
1373 ss
<< " tmp = " << mpCodeGen
->GetBottom() << ";\n";
1374 ss
<< " int loopOffset = l*512;\n";
1375 ss
<< " if((loopOffset + lidx + offset + 256) < end) {\n";
1376 ss
<< " tmp = legalize(";
1377 ss
<< "(A[loopOffset + lidx + offset]+ tmp)";
1379 ss
<< " tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
1381 ss
<< " } else if ((loopOffset + lidx + offset) < end)\n";
1382 ss
<< " tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
1384 ss
<< " shm_buf[lidx] = tmp;\n";
1385 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
1386 ss
<< " for (int i = 128; i >0; i/=2) {\n";
1387 ss
<< " if (lidx < i)\n";
1388 ss
<< " shm_buf[lidx] = ";
1389 ss
<< "shm_buf[lidx] + shm_buf[lidx + i];\n";
1390 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
1392 ss
<< " if (lidx == 0)\n";
1393 ss
<< " current_result =";
1394 ss
<< "current_result + shm_buf[0]";
1396 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
1398 ss
<< " if (lidx == 0)\n";
1399 ss
<< " result[writePos] = current_result;\n";
1402 ss
<< "__kernel void " << name
<< "_count";
1403 ss
<< "_reduction(__global double* A, "
1404 "__global double *result,int arrayLength,int windowSize){\n";
1405 ss
<< " double tmp, current_result =" <<
1406 mpCodeGen
->GetBottom();
1408 ss
<< " int writePos = get_group_id(1);\n";
1409 ss
<< " int lidx = get_local_id(0);\n";
1410 ss
<< " __local double shm_buf[256];\n";
1411 if (mpDVR
->IsStartFixed())
1412 ss
<< " int offset = 0;\n";
1413 else // if (!mpDVR->IsStartFixed())
1414 ss
<< " int offset = get_group_id(1);\n";
1415 if (mpDVR
->IsStartFixed() && mpDVR
->IsEndFixed())
1416 ss
<< " int end = windowSize;\n";
1417 else if (!mpDVR
->IsStartFixed() && !mpDVR
->IsEndFixed())
1418 ss
<< " int end = offset + windowSize;\n";
1419 else if (mpDVR
->IsStartFixed() && !mpDVR
->IsEndFixed())
1420 ss
<< " int end = windowSize + get_group_id(1);\n";
1421 else if (!mpDVR
->IsStartFixed() && mpDVR
->IsEndFixed())
1422 ss
<< " int end = windowSize;\n";
1423 ss
<< " end = min(end, arrayLength);\n";
1424 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
1425 ss
<< " int loop = arrayLength/512 + 1;\n";
1426 ss
<< " for (int l=0; l<loop; l++){\n";
1427 ss
<< " tmp = " << mpCodeGen
->GetBottom() << ";\n";
1428 ss
<< " int loopOffset = l*512;\n";
1429 ss
<< " if((loopOffset + lidx + offset + 256) < end) {\n";
1430 ss
<< " tmp = legalize((isNan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
1432 ss
<< " tmp = legalize((isNan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
1434 ss
<< " } else if ((loopOffset + lidx + offset) < end)\n";
1435 ss
<< " tmp = legalize((isNan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
1437 ss
<< " shm_buf[lidx] = tmp;\n";
1438 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
1439 ss
<< " for (int i = 128; i >0; i/=2) {\n";
1440 ss
<< " if (lidx < i)\n";
1441 ss
<< " shm_buf[lidx] = ";
1442 ss
<< "shm_buf[lidx] + shm_buf[lidx + i];\n";
1443 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
1445 ss
<< " if (lidx == 0)\n";
1446 ss
<< " current_result =";
1447 ss
<< "current_result + shm_buf[0];";
1449 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
1451 ss
<< " if (lidx == 0)\n";
1452 ss
<< " result[writePos] = current_result;\n";
1457 virtual std::string
GenSlidingWindowDeclRef( bool = false ) const
1459 std::stringstream ss
;
1460 if (!bIsStartFixed
&& !bIsEndFixed
)
1461 ss
<< Base::GetName() << "[i + gid0]";
1463 ss
<< Base::GetName() << "[i]";
1466 /// Controls how the elements in the DoubleVectorRef are traversed
1467 virtual size_t GenReductionLoopHeader(
1468 std::stringstream
& ss
, int nResultSize
, bool& needBody
)
1471 size_t nCurWindowSize
= mpDVR
->GetRefRowSize();
1472 std::string temp
= Base::GetName() + "[gid0]";
1474 // Special case count
1475 if (dynamic_cast<OpAverage
*>(mpCodeGen
.get()))
1477 ss
<< mpCodeGen
->Gen2(temp
, "tmp") << ";\n";
1478 ss
<< "nCount = nCount-1;\n";
1479 ss
<< "nCount = nCount +"; /*re-assign nCount from count reduction*/
1480 ss
<< Base::GetName() << "[gid0+" << nResultSize
<< "]" << ";\n";
1482 else if (dynamic_cast<OpCount
*>(mpCodeGen
.get()))
1483 ss
<< temp
<< "+ tmp";
1485 ss
<< mpCodeGen
->Gen2(temp
, "tmp");
1488 return nCurWindowSize
;
1491 virtual size_t Marshal( cl_kernel k
, int argno
, int w
, cl_program mpProgram
)
1493 assert(Base::mpClmem
== NULL
);
1494 // Obtain cl context
1495 ::opencl::KernelEnv kEnv
;
1496 ::opencl::setKernelEnv(&kEnv
);
1498 size_t nInput
= mpDVR
->GetArrayLength();
1499 size_t nCurWindowSize
= mpDVR
->GetRefRowSize();
1500 // create clmem buffer
1501 if (mpDVR
->GetArrays()[Base::mnIndex
].mpNumericArray
== NULL
)
1503 double* pHostBuffer
= const_cast<double*>(
1504 mpDVR
->GetArrays()[Base::mnIndex
].mpNumericArray
);
1505 size_t szHostBuffer
= nInput
* sizeof(double);
1506 Base::mpClmem
= clCreateBuffer(kEnv
.mpkContext
,
1507 (cl_mem_flags
)CL_MEM_READ_ONLY
| CL_MEM_USE_HOST_PTR
,
1510 SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem
<< " size " << nInput
<< "*" << sizeof(double) << "=" << szHostBuffer
<< " using host buffer " << pHostBuffer
);
1512 mpClmem2
= clCreateBuffer(kEnv
.mpkContext
,
1513 CL_MEM_READ_WRITE
| CL_MEM_ALLOC_HOST_PTR
,
1514 sizeof(double) * w
, NULL
, NULL
);
1515 if (CL_SUCCESS
!= err
)
1516 throw OpenCLError("clCreateBuffer", err
, __FILE__
, __LINE__
);
1517 SAL_INFO("sc.opencl", "Created buffer " << mpClmem2
<< " size " << sizeof(double) << "*" << w
<< "=" << (sizeof(double)*w
));
1519 // reproduce the reduction function name
1520 std::string kernelName
;
1521 if (!dynamic_cast<OpAverage
*>(mpCodeGen
.get()))
1522 kernelName
= Base::GetName() + "_reduction";
1524 kernelName
= Base::GetName() + "_sum_reduction";
1525 cl_kernel redKernel
= clCreateKernel(mpProgram
, kernelName
.c_str(), &err
);
1526 if (err
!= CL_SUCCESS
)
1527 throw OpenCLError("clCreateKernel", err
, __FILE__
, __LINE__
);
1528 SAL_INFO("sc.opencl", "Created kernel " << redKernel
<< " with name " << kernelName
<< " in program " << mpProgram
);
1530 // set kernel arg of reduction kernel
1531 // TODO(Wei Wei): use unique name for kernel
1532 cl_mem buf
= Base::GetCLBuffer();
1533 SAL_INFO("sc.opencl", "Kernel " << redKernel
<< " arg " << 0 << ": cl_mem: " << buf
);
1534 err
= clSetKernelArg(redKernel
, 0, sizeof(cl_mem
),
1536 if (CL_SUCCESS
!= err
)
1537 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
1539 SAL_INFO("sc.opencl", "Kernel " << redKernel
<< " arg " << 1 << ": cl_mem: " << mpClmem2
);
1540 err
= clSetKernelArg(redKernel
, 1, sizeof(cl_mem
), &mpClmem2
);
1541 if (CL_SUCCESS
!= err
)
1542 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
1544 SAL_INFO("sc.opencl", "Kernel " << redKernel
<< " arg " << 2 << ": cl_int: " << nInput
);
1545 err
= clSetKernelArg(redKernel
, 2, sizeof(cl_int
), (void*)&nInput
);
1546 if (CL_SUCCESS
!= err
)
1547 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
1549 SAL_INFO("sc.opencl", "Kernel " << redKernel
<< " arg " << 3 << ": cl_int: " << nCurWindowSize
);
1550 err
= clSetKernelArg(redKernel
, 3, sizeof(cl_int
), (void*)&nCurWindowSize
);
1551 if (CL_SUCCESS
!= err
)
1552 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
1554 // set work group size and execute
1555 size_t global_work_size
[] = { 256, (size_t)w
};
1556 size_t local_work_size
[] = { 256, 1 };
1557 SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel
);
1558 err
= clEnqueueNDRangeKernel(kEnv
.mpkCmdQueue
, redKernel
, 2, NULL
,
1559 global_work_size
, local_work_size
, 0, NULL
, NULL
);
1560 if (CL_SUCCESS
!= err
)
1561 throw OpenCLError("clEnqueueNDRangeKernel", err
, __FILE__
, __LINE__
);
1562 err
= clFinish(kEnv
.mpkCmdQueue
);
1563 if (CL_SUCCESS
!= err
)
1564 throw OpenCLError("clFinish", err
, __FILE__
, __LINE__
);
1565 if (dynamic_cast<OpAverage
*>(mpCodeGen
.get()))
1567 /*average need more reduction kernel for count computing*/
1568 boost::scoped_array
<double> pAllBuffer(new double[2 * w
]);
1569 double* resbuf
= static_cast<double*>(clEnqueueMapBuffer(kEnv
.mpkCmdQueue
,
1571 CL_TRUE
, CL_MAP_READ
, 0,
1572 sizeof(double) * w
, 0, NULL
, NULL
,
1574 if (err
!= CL_SUCCESS
)
1575 throw OpenCLError("clEnqueueMapBuffer", err
, __FILE__
, __LINE__
);
1577 for (int i
= 0; i
< w
; i
++)
1578 pAllBuffer
[i
] = resbuf
[i
];
1579 err
= clEnqueueUnmapMemObject(kEnv
.mpkCmdQueue
, mpClmem2
, resbuf
, 0, NULL
, NULL
);
1580 if (err
!= CL_SUCCESS
)
1581 throw OpenCLError("clEnqueueUnmapMemObject", err
, __FILE__
, __LINE__
);
1583 kernelName
= Base::GetName() + "_count_reduction";
1584 redKernel
= clCreateKernel(mpProgram
, kernelName
.c_str(), &err
);
1585 if (err
!= CL_SUCCESS
)
1586 throw OpenCLError("clCreateKernel", err
, __FILE__
, __LINE__
);
1587 SAL_INFO("sc.opencl", "Created kernel " << redKernel
<< " with name " << kernelName
<< " in program " << mpProgram
);
1589 // set kernel arg of reduction kernel
1590 buf
= Base::GetCLBuffer();
1591 SAL_INFO("sc.opencl", "Kernel " << redKernel
<< " arg " << 0 << ": cl_mem: " << buf
);
1592 err
= clSetKernelArg(redKernel
, 0, sizeof(cl_mem
),
1594 if (CL_SUCCESS
!= err
)
1595 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
1597 SAL_INFO("sc.opencl", "Kernel " << redKernel
<< " arg " << 1 << ": cl_mem: " << mpClmem2
);
1598 err
= clSetKernelArg(redKernel
, 1, sizeof(cl_mem
), &mpClmem2
);
1599 if (CL_SUCCESS
!= err
)
1600 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
1602 SAL_INFO("sc.opencl", "Kernel " << redKernel
<< " arg " << 2 << ": cl_int: " << nInput
);
1603 err
= clSetKernelArg(redKernel
, 2, sizeof(cl_int
), (void*)&nInput
);
1604 if (CL_SUCCESS
!= err
)
1605 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
1607 SAL_INFO("sc.opencl", "Kernel " << redKernel
<< " arg " << 3 << ": cl_int: " << nCurWindowSize
);
1608 err
= clSetKernelArg(redKernel
, 3, sizeof(cl_int
), (void*)&nCurWindowSize
);
1609 if (CL_SUCCESS
!= err
)
1610 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
1612 // set work group size and execute
1613 size_t global_work_size1
[] = { 256, (size_t)w
};
1614 size_t local_work_size1
[] = { 256, 1 };
1615 SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel
);
1616 err
= clEnqueueNDRangeKernel(kEnv
.mpkCmdQueue
, redKernel
, 2, NULL
,
1617 global_work_size1
, local_work_size1
, 0, NULL
, NULL
);
1618 if (CL_SUCCESS
!= err
)
1619 throw OpenCLError("clEnqueueNDRangeKernel", err
, __FILE__
, __LINE__
);
1620 err
= clFinish(kEnv
.mpkCmdQueue
);
1621 if (CL_SUCCESS
!= err
)
1622 throw OpenCLError("clFinish", err
, __FILE__
, __LINE__
);
1623 resbuf
= static_cast<double*>(clEnqueueMapBuffer(kEnv
.mpkCmdQueue
,
1625 CL_TRUE
, CL_MAP_READ
, 0,
1626 sizeof(double) * w
, 0, NULL
, NULL
,
1628 if (err
!= CL_SUCCESS
)
1629 throw OpenCLError("clEnqueueMapBuffer", err
, __FILE__
, __LINE__
);
1630 for (int i
= 0; i
< w
; i
++)
1631 pAllBuffer
[i
+ w
] = resbuf
[i
];
1632 err
= clEnqueueUnmapMemObject(kEnv
.mpkCmdQueue
, mpClmem2
, resbuf
, 0, NULL
, NULL
);
1633 // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
1634 if (CL_SUCCESS
!= err
)
1635 SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << ::opencl::errorString(err
));
1638 err
= clReleaseMemObject(mpClmem2
);
1639 SAL_WARN_IF(err
!= CL_SUCCESS
, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err
));
1642 mpClmem2
= clCreateBuffer(kEnv
.mpkContext
,
1643 (cl_mem_flags
)CL_MEM_READ_WRITE
| CL_MEM_COPY_HOST_PTR
,
1644 w
* sizeof(double) * 2, pAllBuffer
.get(), &err
);
1645 if (CL_SUCCESS
!= err
)
1646 throw OpenCLError("clCreateBuffer", err
, __FILE__
, __LINE__
);
1647 SAL_INFO("sc.opencl", "Created buffer " << mpClmem2
<< " size " << w
<< "*" << sizeof(double) << "=" << (w
*sizeof(double)) << " copying host buffer " << pAllBuffer
.get());
1650 SAL_INFO("sc.opencl", "Kernel " << k
<< " arg " << argno
<< ": cl_mem: " << mpClmem2
);
1651 err
= clSetKernelArg(k
, argno
, sizeof(cl_mem
), &(mpClmem2
));
1652 if (CL_SUCCESS
!= err
)
1653 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
1656 ~ParallelReductionVectorRef()
1661 err
= clReleaseMemObject(mpClmem2
);
1662 SAL_WARN_IF(err
!= CL_SUCCESS
, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err
));
1667 size_t GetArrayLength() const { return mpDVR
->GetArrayLength(); }
1669 size_t GetWindowSize() const { return mpDVR
->GetRefRowSize(); }
1671 size_t GetStartFixed() const { return bIsStartFixed
; }
1673 size_t GetEndFixed() const { return bIsEndFixed
; }
1676 bool bIsStartFixed
, bIsEndFixed
;
1677 const formula::DoubleVectorRefToken
* mpDVR
;
1678 // from parent nodes
1679 boost::shared_ptr
<SlidingFunctionBase
> mpCodeGen
;
1680 // controls whether to invoke the reduction kernel during marshaling or not
1684 class Reduction
: public SlidingFunctionBase
1688 Reduction( int nResultSize
) : mnResultSize(nResultSize
) {}
1690 typedef DynamicKernelSlidingArgument
<VectorRef
> NumericRange
;
1691 typedef DynamicKernelSlidingArgument
<DynamicKernelStringArgument
> StringRange
;
1692 typedef ParallelReductionVectorRef
<VectorRef
> ParallelNumericRange
;
1694 virtual bool HandleNaNArgument( std::stringstream
&, unsigned, SubArguments
& ) const
1699 virtual void GenSlidingWindowFunction( std::stringstream
& ss
,
1700 const std::string
& sSymName
, SubArguments
& vSubArguments
) SAL_OVERRIDE
1702 ss
<< "\ndouble " << sSymName
;
1703 ss
<< "_" << BinFuncName() << "(";
1704 for (unsigned i
= 0; i
< vSubArguments
.size(); i
++)
1708 vSubArguments
[i
]->GenSlidingWindowDecl(ss
);
1711 ss
<< "double tmp = " << GetBottom() << ";\n";
1712 ss
<< "int gid0 = get_global_id(0);\n";
1713 if (isAverage() || isMinOrMax())
1714 ss
<< "int nCount = 0;\n";
1715 ss
<< "double tmpBottom;\n";
1716 unsigned i
= vSubArguments
.size();
1719 if (NumericRange
* NR
=
1720 dynamic_cast<NumericRange
*>(vSubArguments
[i
].get()))
1722 bool needBody
; NR
->GenReductionLoopHeader(ss
, needBody
); if (!needBody
)
1725 else if (ParallelNumericRange
* PNR
=
1726 dynamic_cast<ParallelNumericRange
*>(vSubArguments
[i
].get()))
1728 //did not handle yet
1729 bool bNeedBody
= false;
1730 PNR
->GenReductionLoopHeader(ss
, mnResultSize
, bNeedBody
);
1734 else if (StringRange
* SR
=
1735 dynamic_cast<StringRange
*>(vSubArguments
[i
].get()))
1737 //did not handle yet
1739 SR
->GenReductionLoopHeader(ss
, needBody
);
1745 FormulaToken
* pCur
= vSubArguments
[i
]->GetFormulaToken();
1747 assert(pCur
->GetType() != formula::svDoubleVectorRef
);
1749 if (pCur
->GetType() == formula::svSingleVectorRef
||
1750 pCur
->GetType() == formula::svDouble
)
1755 if (ocPush
== vSubArguments
[i
]->GetFormulaToken()->GetOpCode())
1757 bool bNanHandled
= HandleNaNArgument(ss
, i
, vSubArguments
);
1759 ss
<< "tmpBottom = " << GetBottom() << ";\n";
1764 ss
<< vSubArguments
[i
]->GenSlidingWindowDeclRef();
1766 if (ZeroReturnZero())
1767 ss
<< " return 0;\n";
1771 ss
<< Gen2("tmpBottom", "tmp") << ";\n";
1777 ss
<< Gen2(vSubArguments
[i
]->GenSlidingWindowDeclRef(), "tmp");
1785 ss
<< Gen2(vSubArguments
[i
]->GenSlidingWindowDeclRef(), "tmp");
1792 " return CreateDoubleError(errDivisionByZero);\n";
1793 else if (isMinOrMax())
1799 ss
<< "*pow((double)nCount,-1.0)";
1802 virtual bool isAverage() const { return false; }
1803 virtual bool isMinOrMax() const { return false; }
1804 virtual bool takeString() const SAL_OVERRIDE
{ return false; }
1805 virtual bool takeNumeric() const SAL_OVERRIDE
{ return true; }
1808 // Strictly binary operators
1809 class Binary
: public SlidingFunctionBase
1812 virtual void GenSlidingWindowFunction( std::stringstream
& ss
,
1813 const std::string
& sSymName
, SubArguments
& vSubArguments
) SAL_OVERRIDE
1815 ss
<< "\ndouble " << sSymName
;
1816 ss
<< "_" << BinFuncName() << "(";
1817 assert(vSubArguments
.size() == 2);
1818 for (unsigned i
= 0; i
< vSubArguments
.size(); i
++)
1822 vSubArguments
[i
]->GenSlidingWindowDecl(ss
);
1825 ss
<< "int gid0 = get_global_id(0), i = 0;\n\t";
1826 ss
<< "double tmp = ";
1827 ss
<< Gen2(vSubArguments
[0]->GenSlidingWindowDeclRef(false),
1828 vSubArguments
[1]->GenSlidingWindowDeclRef(false)) << ";\n\t";
1829 ss
<< "return tmp;\n}";
1831 virtual bool takeString() const SAL_OVERRIDE
{ return true; }
1832 virtual bool takeNumeric() const SAL_OVERRIDE
{ return true; }
1835 class SumOfProduct
: public SlidingFunctionBase
1838 virtual void GenSlidingWindowFunction( std::stringstream
& ss
,
1839 const std::string
& sSymName
, SubArguments
& vSubArguments
) SAL_OVERRIDE
1841 size_t nCurWindowSize
= 0;
1842 FormulaToken
* tmpCur
= NULL
;
1843 const formula::DoubleVectorRefToken
* pCurDVR
= NULL
;
1844 ss
<< "\ndouble " << sSymName
;
1845 ss
<< "_" << BinFuncName() << "(";
1846 for (unsigned i
= 0; i
< vSubArguments
.size(); i
++)
1850 vSubArguments
[i
]->GenSlidingWindowDecl(ss
);
1851 size_t nCurChildWindowSize
= vSubArguments
[i
]->GetWindowSize();
1852 nCurWindowSize
= (nCurWindowSize
< nCurChildWindowSize
) ?
1853 nCurChildWindowSize
: nCurWindowSize
;
1854 tmpCur
= vSubArguments
[i
]->GetFormulaToken();
1855 if (ocPush
== tmpCur
->GetOpCode())
1858 pCurDVR
= static_cast<const formula::DoubleVectorRefToken
*>(tmpCur
);
1860 ((!pCurDVR
->IsStartFixed() && !pCurDVR
->IsEndFixed())
1861 || (pCurDVR
->IsStartFixed() && pCurDVR
->IsEndFixed()))
1867 ss
<< " double tmp = 0.0;\n";
1868 ss
<< " int gid0 = get_global_id(0);\n";
1870 ss
<< "\tint i;\n\t";
1871 ss
<< "int currentCount0;\n";
1872 for (unsigned i
= 0; i
< vSubArguments
.size() - 1; i
++)
1873 ss
<< "int currentCount" << i
+ 1 << ";\n";
1874 std::stringstream temp3
, temp4
;
1875 int outLoopSize
= UNROLLING_FACTOR
;
1876 if (nCurWindowSize
/ outLoopSize
!= 0)
1878 ss
<< "for(int outLoop=0; outLoop<" <<
1879 nCurWindowSize
/ outLoopSize
<< "; outLoop++){\n\t";
1880 for (int count
= 0; count
< outLoopSize
; count
++)
1882 ss
<< "i = outLoop*" << outLoopSize
<< "+" << count
<< ";\n";
1885 for (unsigned i
= 0; i
< vSubArguments
.size(); i
++)
1887 tmpCur
= vSubArguments
[i
]->GetFormulaToken();
1888 if (ocPush
== tmpCur
->GetOpCode())
1890 pCurDVR
= static_cast<const formula::DoubleVectorRefToken
*>(tmpCur
);
1891 if (!pCurDVR
->IsStartFixed() && !pCurDVR
->IsEndFixed())
1893 temp3
<< " currentCount";
1895 temp3
<< " =i+gid0+1;\n";
1899 temp3
<< " currentCount";
1901 temp3
<< " =i+1;\n";
1906 temp3
<< "tmp = fsum(";
1907 for (unsigned i
= 0; i
< vSubArguments
.size(); i
++)
1911 if (ocPush
== vSubArguments
[i
]->GetFormulaToken()->GetOpCode())
1914 temp3
<< "(currentCount";
1917 if (vSubArguments
[i
]->GetFormulaToken()->GetType() ==
1918 formula::svSingleVectorRef
)
1920 const formula::SingleVectorRefToken
* pSVR
=
1921 static_cast<const formula::SingleVectorRefToken
*>
1922 (vSubArguments
[i
]->GetFormulaToken());
1923 temp3
<< pSVR
->GetArrayLength();
1924 temp3
<< ")||isNan(" << vSubArguments
[i
]
1925 ->GenSlidingWindowDeclRef();
1927 temp3
<< vSubArguments
[i
]->GenSlidingWindowDeclRef();
1930 else if (vSubArguments
[i
]->GetFormulaToken()->GetType() ==
1931 formula::svDoubleVectorRef
)
1933 const formula::DoubleVectorRefToken
* pSVR
=
1934 static_cast<const formula::DoubleVectorRefToken
*>
1935 (vSubArguments
[i
]->GetFormulaToken());
1936 temp3
<< pSVR
->GetArrayLength();
1937 temp3
<< ")||isNan(" << vSubArguments
[i
]
1938 ->GenSlidingWindowDeclRef(true);
1940 temp3
<< vSubArguments
[i
]->GenSlidingWindowDeclRef(true);
1946 temp3
<< vSubArguments
[i
]->GenSlidingWindowDeclRef(true);
1948 temp3
<< ", tmp);\n\t";
1954 //The residual of mod outLoopSize
1955 for (unsigned int count
= nCurWindowSize
/ outLoopSize
* outLoopSize
;
1956 count
< nCurWindowSize
; count
++)
1958 ss
<< "i =" << count
<< ";\n";
1959 if (count
== nCurWindowSize
/ outLoopSize
* outLoopSize
)
1961 for (unsigned i
= 0; i
< vSubArguments
.size(); i
++)
1963 tmpCur
= vSubArguments
[i
]->GetFormulaToken();
1964 if (ocPush
== tmpCur
->GetOpCode())
1966 pCurDVR
= static_cast<const formula::DoubleVectorRefToken
*>(tmpCur
);
1967 if (!pCurDVR
->IsStartFixed() && !pCurDVR
->IsEndFixed())
1969 temp4
<< " currentCount";
1971 temp4
<< " =i+gid0+1;\n";
1975 temp4
<< " currentCount";
1977 temp4
<< " =i+1;\n";
1982 temp4
<< "tmp = fsum(";
1983 for (unsigned i
= 0; i
< vSubArguments
.size(); i
++)
1987 if (ocPush
== vSubArguments
[i
]->GetFormulaToken()->GetOpCode())
1990 temp4
<< "(currentCount";
1993 if (vSubArguments
[i
]->GetFormulaToken()->GetType() ==
1994 formula::svSingleVectorRef
)
1996 const formula::SingleVectorRefToken
* pSVR
=
1997 static_cast<const formula::SingleVectorRefToken
*>
1998 (vSubArguments
[i
]->GetFormulaToken());
1999 temp4
<< pSVR
->GetArrayLength();
2000 temp4
<< ")||isNan(" << vSubArguments
[i
]
2001 ->GenSlidingWindowDeclRef();
2003 temp4
<< vSubArguments
[i
]->GenSlidingWindowDeclRef();
2006 else if (vSubArguments
[i
]->GetFormulaToken()->GetType() ==
2007 formula::svDoubleVectorRef
)
2009 const formula::DoubleVectorRefToken
* pSVR
=
2010 static_cast<const formula::DoubleVectorRefToken
*>
2011 (vSubArguments
[i
]->GetFormulaToken());
2012 temp4
<< pSVR
->GetArrayLength();
2013 temp4
<< ")||isNan(" << vSubArguments
[i
]
2014 ->GenSlidingWindowDeclRef(true);
2016 temp4
<< vSubArguments
[i
]->GenSlidingWindowDeclRef(true);
2023 temp4
<< vSubArguments
[i
]
2024 ->GenSlidingWindowDeclRef(true);
2027 temp4
<< ", tmp);\n\t";
2031 ss
<< "return tmp;\n";
2034 virtual bool takeString() const SAL_OVERRIDE
{ return false; }
2035 virtual bool takeNumeric() const SAL_OVERRIDE
{ return true; }
2039 class OpNop
: public Reduction
2042 OpNop( int nResultSize
) : Reduction(nResultSize
) {}
2044 virtual std::string
GetBottom() SAL_OVERRIDE
{ return "0"; }
2045 virtual std::string
Gen2( const std::string
& lhs
, const std::string
& ) const SAL_OVERRIDE
2049 virtual std::string
BinFuncName() const SAL_OVERRIDE
{ return "nop"; }
2052 class OpCount
: public Reduction
2055 OpCount( int nResultSize
) : Reduction(nResultSize
) {}
2057 virtual std::string
GetBottom() SAL_OVERRIDE
{ return "0"; }
2058 virtual std::string
Gen2( const std::string
& lhs
, const std::string
& rhs
) const SAL_OVERRIDE
2060 std::stringstream ss
;
2061 ss
<< "(isNan(" << lhs
<< ")?" << rhs
<< ":" << rhs
<< "+1.0)";
2064 virtual std::string
BinFuncName() const SAL_OVERRIDE
{ return "fcount"; }
2067 class OpEqual
: public Binary
2070 virtual std::string
GetBottom() SAL_OVERRIDE
{ return "0"; }
2071 virtual std::string
Gen2( const std::string
& lhs
, const std::string
& rhs
) const SAL_OVERRIDE
2073 std::stringstream ss
;
2074 ss
<< "strequal(" << lhs
<< "," << rhs
<< ")";
2077 virtual std::string
BinFuncName() const SAL_OVERRIDE
{ return "eq"; }
2080 class OpLessEqual
: public Binary
2083 virtual std::string
GetBottom() SAL_OVERRIDE
{ return "0"; }
2084 virtual std::string
Gen2( const std::string
& lhs
, const std::string
& rhs
) const SAL_OVERRIDE
2086 std::stringstream ss
;
2087 ss
<< "(" << lhs
<< "<=" << rhs
<< ")";
2090 virtual std::string
BinFuncName() const SAL_OVERRIDE
{ return "leq"; }
2093 class OpLess
: public Binary
2096 virtual std::string
GetBottom() SAL_OVERRIDE
{ return "0"; }
2097 virtual std::string
Gen2( const std::string
& lhs
, const std::string
& rhs
) const SAL_OVERRIDE
2099 std::stringstream ss
;
2100 ss
<< "(" << lhs
<< "<" << rhs
<< ")";
2103 virtual std::string
BinFuncName() const SAL_OVERRIDE
{ return "less"; }
2106 class OpGreater
: public Binary
2109 virtual std::string
GetBottom() SAL_OVERRIDE
{ return "0"; }
2110 virtual std::string
Gen2( const std::string
& lhs
, const std::string
& rhs
) const SAL_OVERRIDE
2112 std::stringstream ss
;
2113 ss
<< "(" << lhs
<< ">" << rhs
<< ")";
2116 virtual std::string
BinFuncName() const SAL_OVERRIDE
{ return "gt"; }
2119 class OpSum
: public Reduction
2122 OpSum( int nResultSize
) : Reduction(nResultSize
) {}
2124 virtual std::string
GetBottom() SAL_OVERRIDE
{ return "0"; }
2125 virtual std::string
Gen2( const std::string
& lhs
, const std::string
& rhs
) const SAL_OVERRIDE
2127 std::stringstream ss
;
2128 ss
<< "((" << lhs
<< ")+(" << rhs
<< "))";
2131 virtual std::string
BinFuncName() const SAL_OVERRIDE
{ return "fsum"; }
2134 class OpAverage
: public Reduction
2137 OpAverage( int nResultSize
) : Reduction(nResultSize
) {}
2139 virtual std::string
GetBottom() SAL_OVERRIDE
{ return "0"; }
2140 virtual std::string
Gen2( const std::string
& lhs
, const std::string
& rhs
) const SAL_OVERRIDE
2142 std::stringstream ss
;
2143 ss
<< "fsum_count(" << lhs
<< "," << rhs
<< ", &nCount)";
2146 virtual std::string
BinFuncName() const SAL_OVERRIDE
{ return "average"; }
2147 virtual bool isAverage() const SAL_OVERRIDE
{ return true; }
2150 class OpSub
: public Reduction
2153 OpSub( int nResultSize
) : Reduction(nResultSize
) {}
2155 virtual std::string
GetBottom() SAL_OVERRIDE
{ return "0"; }
2156 virtual std::string
Gen2( const std::string
& lhs
, const std::string
& rhs
) const SAL_OVERRIDE
2158 return lhs
+ "-" + rhs
;
2160 virtual std::string
BinFuncName() const SAL_OVERRIDE
{ return "fsub"; }
2163 class OpMul
: public Reduction
2166 OpMul( int nResultSize
) : Reduction(nResultSize
) {}
2168 virtual std::string
GetBottom() SAL_OVERRIDE
{ return "1"; }
2169 virtual std::string
Gen2( const std::string
& lhs
, const std::string
& rhs
) const SAL_OVERRIDE
2171 return lhs
+ "*" + rhs
;
2173 virtual std::string
BinFuncName() const SAL_OVERRIDE
{ return "fmul"; }
2174 virtual bool ZeroReturnZero() SAL_OVERRIDE
{ return true; }
2177 /// Technically not a reduction, but fits the framework.
2178 class OpDiv
: public Reduction
2181 OpDiv( int nResultSize
) : Reduction(nResultSize
) {}
2183 virtual std::string
GetBottom() SAL_OVERRIDE
{ return "1.0"; }
2184 virtual std::string
Gen2( const std::string
& lhs
, const std::string
& rhs
) const SAL_OVERRIDE
2186 return "(" + rhs
+ "==0 ? CreateDoubleError(errDivisionByZero) : (" + lhs
+ "/" + rhs
+ ") )";
2188 virtual std::string
BinFuncName() const SAL_OVERRIDE
{ return "fdiv"; }
2190 virtual bool HandleNaNArgument( std::stringstream
& ss
, unsigned argno
, SubArguments
& vSubArguments
) const SAL_OVERRIDE
2195 "if (isnan(" << vSubArguments
[argno
]->GenSlidingWindowDeclRef() << ")) {\n"
2196 " return CreateDoubleError(errDivisionByZero);\n"
2200 else if (argno
== 0)
2203 "if (isnan(" << vSubArguments
[argno
]->GenSlidingWindowDeclRef() << ") &&\n"
2204 " !(isnan(" << vSubArguments
[1]->GenSlidingWindowDeclRef() << ") || " << vSubArguments
[1]->GenSlidingWindowDeclRef() << " == 0)) {\n"
2213 class OpMin
: public Reduction
2216 OpMin( int nResultSize
) : Reduction(nResultSize
) {}
2218 virtual std::string
GetBottom() SAL_OVERRIDE
{ return "NAN"; }
2219 virtual std::string
Gen2( const std::string
& lhs
, const std::string
& rhs
) const SAL_OVERRIDE
2221 return "fmin_count(" + lhs
+ "," + rhs
+ ", &nCount)";
2223 virtual std::string
BinFuncName() const SAL_OVERRIDE
{ return "min"; }
2224 virtual bool isMinOrMax() const SAL_OVERRIDE
{ return true; }
2227 class OpMax
: public Reduction
2230 OpMax( int nResultSize
) : Reduction(nResultSize
) {}
2232 virtual std::string
GetBottom() SAL_OVERRIDE
{ return "NAN"; }
2233 virtual std::string
Gen2( const std::string
& lhs
, const std::string
& rhs
) const SAL_OVERRIDE
2235 return "fmax_count(" + lhs
+ "," + rhs
+ ", &nCount)";
2237 virtual std::string
BinFuncName() const SAL_OVERRIDE
{ return "max"; }
2238 virtual bool isMinOrMax() const SAL_OVERRIDE
{ return true; }
2241 class OpSumProduct
: public SumOfProduct
2244 virtual std::string
GetBottom() SAL_OVERRIDE
{ return "0"; }
2245 virtual std::string
Gen2( const std::string
& lhs
, const std::string
& rhs
) const SAL_OVERRIDE
2247 return lhs
+ "*" + rhs
;
2249 virtual std::string
BinFuncName() const SAL_OVERRIDE
{ return "fsop"; }
2254 SumIfsArgs( cl_mem x
) : mCLMem(x
), mConst(0.0) { }
2255 SumIfsArgs( double x
) : mCLMem(NULL
), mConst(x
) { }
2261 /// Helper functions that have multiple buffers
2262 class DynamicKernelSoPArguments
: public DynamicKernelArgument
2265 typedef std::vector
<DynamicKernelArgumentRef
> SubArgumentsType
;
2267 DynamicKernelSoPArguments( const ScCalcConfig
& config
,
2268 const std::string
& s
, const FormulaTreeNodeRef
& ft
,
2269 SlidingFunctionBase
* pCodeGen
, int nResultSize
);
2271 /// Create buffer and pass the buffer to a given kernel
2272 virtual size_t Marshal( cl_kernel k
, int argno
, int nVectorWidth
, cl_program pProgram
) SAL_OVERRIDE
2275 for (SubArgumentsType::iterator it
= mvSubArguments
.begin(), e
= mvSubArguments
.end(); it
!= e
;
2278 i
+= (*it
)->Marshal(k
, argno
+ i
, nVectorWidth
, pProgram
);
2280 if (OpGeoMean
* OpSumCodeGen
= dynamic_cast<OpGeoMean
*>(mpCodeGen
.get()))
2282 // Obtain cl context
2283 ::opencl::KernelEnv kEnv
;
2284 ::opencl::setKernelEnv(&kEnv
);
2288 if (OpSumCodeGen
->NeedReductionKernel())
2290 std::vector
<cl_mem
> vclmem
;
2291 for (SubArgumentsType::iterator it
= mvSubArguments
.begin(),
2292 e
= mvSubArguments
.end(); it
!= e
; ++it
)
2294 if (VectorRef
* VR
= dynamic_cast<VectorRef
*>(it
->get()))
2295 vclmem
.push_back(VR
->GetCLBuffer());
2297 vclmem
.push_back(NULL
);
2299 pClmem2
= clCreateBuffer(kEnv
.mpkContext
, CL_MEM_READ_WRITE
,
2300 sizeof(double) * nVectorWidth
, NULL
, &err
);
2301 if (CL_SUCCESS
!= err
)
2302 throw OpenCLError("clCreateBuffer", err
, __FILE__
, __LINE__
);
2303 SAL_INFO("sc.opencl", "Created buffer " << pClmem2
<< " size " << sizeof(double) << "*" << nVectorWidth
<< "=" << (sizeof(double)*nVectorWidth
));
2305 std::string kernelName
= "GeoMean_reduction";
2306 cl_kernel redKernel
= clCreateKernel(pProgram
, kernelName
.c_str(), &err
);
2307 if (err
!= CL_SUCCESS
)
2308 throw OpenCLError("clCreateKernel", err
, __FILE__
, __LINE__
);
2309 SAL_INFO("sc.opencl", "Created kernel " << redKernel
<< " with name " << kernelName
<< " in program " << pProgram
);
2311 // set kernel arg of reduction kernel
2312 for (size_t j
= 0; j
< vclmem
.size(); j
++)
2314 SAL_INFO("sc.opencl", "Kernel " << redKernel
<< " arg " << j
<< ": " << (vclmem
[j
] ? "cl_mem" : "double") << ": " << vclmem
[j
]);
2315 err
= clSetKernelArg(redKernel
, j
,
2316 vclmem
[j
] ? sizeof(cl_mem
) : sizeof(double),
2318 if (CL_SUCCESS
!= err
)
2319 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
2321 SAL_INFO("sc.opencl", "Kernel " << redKernel
<< " arg " << vclmem
.size() << ": cl_mem: " << pClmem2
);
2322 err
= clSetKernelArg(redKernel
, vclmem
.size(), sizeof(cl_mem
), (void*)&pClmem2
);
2323 if (CL_SUCCESS
!= err
)
2324 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
2326 // set work group size and execute
2327 size_t global_work_size
[] = { 256, (size_t)nVectorWidth
};
2328 size_t local_work_size
[] = { 256, 1 };
2329 SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel
);
2330 err
= clEnqueueNDRangeKernel(kEnv
.mpkCmdQueue
, redKernel
, 2, NULL
,
2331 global_work_size
, local_work_size
, 0, NULL
, NULL
);
2332 if (CL_SUCCESS
!= err
)
2333 throw OpenCLError("clEnqueueNDRangeKernel", err
, __FILE__
, __LINE__
);
2334 err
= clFinish(kEnv
.mpkCmdQueue
);
2335 if (CL_SUCCESS
!= err
)
2336 throw OpenCLError("clFinish", err
, __FILE__
, __LINE__
);
2338 // Pass pClmem2 to the "real" kernel
2339 SAL_INFO("sc.opencl", "Kernel " << k
<< " arg " << argno
<< ": cl_mem: " << pClmem2
);
2340 err
= clSetKernelArg(k
, argno
, sizeof(cl_mem
), (void*)&pClmem2
);
2341 if (CL_SUCCESS
!= err
)
2342 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
2345 if (OpSumIfs
* OpSumCodeGen
= dynamic_cast<OpSumIfs
*>(mpCodeGen
.get()))
2347 // Obtain cl context
2348 ::opencl::KernelEnv kEnv
;
2349 ::opencl::setKernelEnv(&kEnv
);
2351 DynamicKernelArgument
* Arg
= mvSubArguments
[0].get();
2352 DynamicKernelSlidingArgument
<VectorRef
>* slidingArgPtr
=
2353 static_cast<DynamicKernelSlidingArgument
<VectorRef
>*>(Arg
);
2356 if (OpSumCodeGen
->NeedReductionKernel())
2358 size_t nInput
= slidingArgPtr
->GetArrayLength();
2359 size_t nCurWindowSize
= slidingArgPtr
->GetWindowSize();
2360 std::vector
<SumIfsArgs
> vclmem
;
2362 for (SubArgumentsType::iterator it
= mvSubArguments
.begin(),
2363 e
= mvSubArguments
.end(); it
!= e
; ++it
)
2365 if (VectorRef
* VR
= dynamic_cast<VectorRef
*>(it
->get()))
2366 vclmem
.push_back(SumIfsArgs(VR
->GetCLBuffer()));
2367 else if (DynamicKernelConstantArgument
* CA
= dynamic_cast<DynamicKernelConstantArgument
*>(it
->get()))
2368 vclmem
.push_back(SumIfsArgs(CA
->GetDouble()));
2370 vclmem
.push_back(SumIfsArgs((cl_mem
)NULL
));
2372 mpClmem2
= clCreateBuffer(kEnv
.mpkContext
, CL_MEM_READ_WRITE
,
2373 sizeof(double) * nVectorWidth
, NULL
, &err
);
2374 if (CL_SUCCESS
!= err
)
2375 throw OpenCLError("clCreateBuffer", err
, __FILE__
, __LINE__
);
2376 SAL_INFO("sc.opencl", "Created buffer " << mpClmem2
<< " size " << sizeof(double) << "*" << nVectorWidth
<< "=" << (sizeof(double)*nVectorWidth
));
2378 std::string kernelName
= mvSubArguments
[0]->GetName() + "_SumIfs_reduction";
2379 cl_kernel redKernel
= clCreateKernel(pProgram
, kernelName
.c_str(), &err
);
2380 if (err
!= CL_SUCCESS
)
2381 throw OpenCLError("clCreateKernel", err
, __FILE__
, __LINE__
);
2382 SAL_INFO("sc.opencl", "Created kernel " << redKernel
<< " with name " << kernelName
<< " in program " << pProgram
);
2384 // set kernel arg of reduction kernel
2385 for (size_t j
= 0; j
< vclmem
.size(); j
++)
2387 if (vclmem
[j
].mCLMem
)
2388 SAL_INFO("sc.opencl", "Kernel " << redKernel
<< " arg " << j
<< ": cl_mem: " << vclmem
[j
].mCLMem
);
2390 SAL_INFO("sc.opencl", "Kernel " << redKernel
<< " arg " << j
<< ": double: " << vclmem
[j
].mConst
);
2391 err
= clSetKernelArg(redKernel
, j
,
2392 vclmem
[j
].mCLMem
? sizeof(cl_mem
) : sizeof(double),
2393 vclmem
[j
].mCLMem
? (void*)&vclmem
[j
].mCLMem
:
2394 (void*)&vclmem
[j
].mConst
);
2395 if (CL_SUCCESS
!= err
)
2396 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
2398 SAL_INFO("sc.opencl", "Kernel " << redKernel
<< " arg " << vclmem
.size() << ": cl_mem: " << mpClmem2
);
2399 err
= clSetKernelArg(redKernel
, vclmem
.size(), sizeof(cl_mem
), (void*)&mpClmem2
);
2400 if (CL_SUCCESS
!= err
)
2401 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
2403 SAL_INFO("sc.opencl", "Kernel " << redKernel
<< " arg " << (vclmem
.size() + 1) << ": cl_int: " << nInput
);
2404 err
= clSetKernelArg(redKernel
, vclmem
.size() + 1, sizeof(cl_int
), (void*)&nInput
);
2405 if (CL_SUCCESS
!= err
)
2406 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
2408 SAL_INFO("sc.opencl", "Kernel " << redKernel
<< " arg " << (vclmem
.size() + 2) << ": cl_int: " << nCurWindowSize
);
2409 err
= clSetKernelArg(redKernel
, vclmem
.size() + 2, sizeof(cl_int
), (void*)&nCurWindowSize
);
2410 if (CL_SUCCESS
!= err
)
2411 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
2412 // set work group size and execute
2413 size_t global_work_size
[] = { 256, (size_t)nVectorWidth
};
2414 size_t local_work_size
[] = { 256, 1 };
2415 SAL_INFO("sc.opencl", "Enqueing kernel " << redKernel
);
2416 err
= clEnqueueNDRangeKernel(kEnv
.mpkCmdQueue
, redKernel
, 2, NULL
,
2417 global_work_size
, local_work_size
, 0, NULL
, NULL
);
2418 if (CL_SUCCESS
!= err
)
2419 throw OpenCLError("clEnqueueNDRangeKernel", err
, __FILE__
, __LINE__
);
2421 err
= clFinish(kEnv
.mpkCmdQueue
);
2422 if (CL_SUCCESS
!= err
)
2423 throw OpenCLError("clFinish", err
, __FILE__
, __LINE__
);
2425 SAL_INFO("sc.opencl", "Relasing kernel " << redKernel
);
2426 err
= clReleaseKernel(redKernel
);
2427 SAL_WARN_IF(err
!= CL_SUCCESS
, "sc.opencl", "clReleaseKernel failed: " << ::opencl::errorString(err
));
2429 // Pass mpClmem2 to the "real" kernel
2430 SAL_INFO("sc.opencl", "Kernel " << k
<< " arg " << argno
<< ": cl_mem: " << mpClmem2
);
2431 err
= clSetKernelArg(k
, argno
, sizeof(cl_mem
), (void*)&mpClmem2
);
2432 if (CL_SUCCESS
!= err
)
2433 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
2439 virtual void GenSlidingWindowFunction( std::stringstream
& ss
) SAL_OVERRIDE
2441 for (unsigned i
= 0; i
< mvSubArguments
.size(); i
++)
2442 mvSubArguments
[i
]->GenSlidingWindowFunction(ss
);
2443 mpCodeGen
->GenSlidingWindowFunction(ss
, mSymName
, mvSubArguments
);
2445 virtual void GenDeclRef( std::stringstream
& ss
) const SAL_OVERRIDE
2447 for (unsigned i
= 0; i
< mvSubArguments
.size(); i
++)
2451 mvSubArguments
[i
]->GenDeclRef(ss
);
2454 virtual void GenDecl( std::stringstream
& ss
) const SAL_OVERRIDE
2456 for (SubArgumentsType::const_iterator it
= mvSubArguments
.begin(), e
= mvSubArguments
.end(); it
!= e
;
2459 if (it
!= mvSubArguments
.begin())
2465 virtual size_t GetWindowSize() const SAL_OVERRIDE
2467 size_t nCurWindowSize
= 0;
2468 for (unsigned i
= 0; i
< mvSubArguments
.size(); i
++)
2470 size_t nCurChildWindowSize
= mvSubArguments
[i
]->GetWindowSize();
2471 nCurWindowSize
= (nCurWindowSize
< nCurChildWindowSize
) ?
2472 nCurChildWindowSize
: nCurWindowSize
;
2474 return nCurWindowSize
;
2477 /// When declared as input to a sliding window function
2478 virtual void GenSlidingWindowDecl( std::stringstream
& ss
) const SAL_OVERRIDE
2480 for (SubArgumentsType::const_iterator it
= mvSubArguments
.begin(), e
= mvSubArguments
.end(); it
!= e
;
2483 if (it
!= mvSubArguments
.begin())
2485 (*it
)->GenSlidingWindowDecl(ss
);
2488 /// Generate either a function call to each children
2489 /// or directly inline it if we are already inside a loop
2490 virtual std::string
GenSlidingWindowDeclRef( bool nested
= false ) const SAL_OVERRIDE
2492 std::stringstream ss
;
2495 ss
<< mSymName
<< "_" << mpCodeGen
->BinFuncName() << "(";
2496 for (unsigned i
= 0; i
< mvSubArguments
.size(); i
++)
2500 mvSubArguments
[i
]->GenDeclRef(ss
);
2506 if (mvSubArguments
.size() != 2)
2508 bool bArgument1_NeedNested
=
2509 mvSubArguments
[0]->GetFormulaToken()->GetType()
2510 != formula::svSingleVectorRef
;
2511 bool bArgument2_NeedNested
=
2512 mvSubArguments
[1]->GetFormulaToken()->GetType()
2513 != formula::svSingleVectorRef
;
2516 Gen2(mvSubArguments
[0]
2517 ->GenSlidingWindowDeclRef(bArgument1_NeedNested
),
2519 ->GenSlidingWindowDeclRef(bArgument2_NeedNested
));
2524 virtual std::string
DumpOpName() const SAL_OVERRIDE
2526 std::string t
= "_" + mpCodeGen
->BinFuncName();
2527 for (unsigned i
= 0; i
< mvSubArguments
.size(); i
++)
2528 t
= t
+ mvSubArguments
[i
]->DumpOpName();
2531 virtual void DumpInlineFun( std::set
<std::string
>& decls
,
2532 std::set
<std::string
>& funs
) const SAL_OVERRIDE
2534 mpCodeGen
->BinInlineFun(decls
, funs
);
2535 for (unsigned i
= 0; i
< mvSubArguments
.size(); i
++)
2536 mvSubArguments
[i
]->DumpInlineFun(decls
, funs
);
2538 virtual ~DynamicKernelSoPArguments()
2543 err
= clReleaseMemObject(mpClmem2
);
2544 SAL_WARN_IF(err
!= CL_SUCCESS
, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err
));
2550 SubArgumentsType mvSubArguments
;
2551 boost::shared_ptr
<SlidingFunctionBase
> mpCodeGen
;
2555 DynamicKernelArgumentRef
SoPHelper( const ScCalcConfig
& config
,
2556 const std::string
& ts
, const FormulaTreeNodeRef
& ft
, SlidingFunctionBase
* pCodeGen
,
2559 return DynamicKernelArgumentRef(new DynamicKernelSoPArguments(config
, ts
, ft
, pCodeGen
, nResultSize
));
2562 template<class Base
>
2563 DynamicKernelArgument
* VectorRefFactory( const ScCalcConfig
& config
, const std::string
& s
,
2564 const FormulaTreeNodeRef
& ft
,
2565 boost::shared_ptr
<SlidingFunctionBase
>& pCodeGen
,
2568 //Black lists ineligible classes here ..
2569 // SUMIFS does not perform parallel reduction at DoubleVectorRef level
2570 if (dynamic_cast<OpSumIfs
*>(pCodeGen
.get()))
2572 if (index
== 0) // the first argument of OpSumIfs cannot be strings anyway
2573 return new DynamicKernelSlidingArgument
<VectorRef
>(config
, s
, ft
, pCodeGen
, index
);
2574 return new DynamicKernelSlidingArgument
<Base
>(config
, s
, ft
, pCodeGen
, index
);
2576 // AVERAGE is not supported yet
2577 //Average has been supported by reduction kernel
2578 /*else if (dynamic_cast<OpAverage*>(pCodeGen.get()))
2580 return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2582 // MUL is not supported yet
2583 else if (dynamic_cast<OpMul
*>(pCodeGen
.get()))
2585 return new DynamicKernelSlidingArgument
<Base
>(config
, s
, ft
, pCodeGen
, index
);
2587 // Sub is not a reduction per se
2588 else if (dynamic_cast<OpSub
*>(pCodeGen
.get()))
2590 return new DynamicKernelSlidingArgument
<Base
>(config
, s
, ft
, pCodeGen
, index
);
2592 // Only child class of Reduction is supported
2593 else if (!dynamic_cast<Reduction
*>(pCodeGen
.get()))
2595 return new DynamicKernelSlidingArgument
<Base
>(config
, s
, ft
, pCodeGen
, index
);
2598 const formula::DoubleVectorRefToken
* pDVR
=
2599 static_cast<const formula::DoubleVectorRefToken
*>(
2600 ft
->GetFormulaToken());
2601 // Window being too small to justify a parallel reduction
2602 if (pDVR
->GetRefRowSize() < REDUCE_THRESHOLD
)
2603 return new DynamicKernelSlidingArgument
<Base
>(config
, s
, ft
, pCodeGen
, index
);
2604 if ((pDVR
->IsStartFixed() && pDVR
->IsEndFixed()) ||
2605 (!pDVR
->IsStartFixed() && !pDVR
->IsEndFixed()))
2606 return new ParallelReductionVectorRef
<Base
>(config
, s
, ft
, pCodeGen
, index
);
2607 else // Other cases are not supported as well
2608 return new DynamicKernelSlidingArgument
<Base
>(config
, s
, ft
, pCodeGen
, index
);
2611 DynamicKernelSoPArguments::DynamicKernelSoPArguments(const ScCalcConfig
& config
,
2612 const std::string
& s
, const FormulaTreeNodeRef
& ft
, SlidingFunctionBase
* pCodeGen
, int nResultSize
) :
2613 DynamicKernelArgument(config
, s
, ft
), mpCodeGen(pCodeGen
), mpClmem2(NULL
)
2615 size_t nChildren
= ft
->Children
.size();
2617 for (unsigned i
= 0; i
< nChildren
; i
++)
2619 FormulaTreeNodeRef rChild
= ft
->Children
[i
];
2622 FormulaToken
* pChild
= rChild
->GetFormulaToken();
2625 OpCode opc
= pChild
->GetOpCode();
2626 std::stringstream tmpname
;
2627 tmpname
<< s
<< "_" << i
;
2628 std::string ts
= tmpname
.str();
2632 if (pChild
->GetType() == formula::svDoubleVectorRef
)
2634 const formula::DoubleVectorRefToken
* pDVR
=
2635 static_cast<const formula::DoubleVectorRefToken
*>(pChild
);
2637 for (size_t j
= 0; j
< pDVR
->GetArrays().size(); ++j
)
2639 SAL_INFO("sc.opencl", "i=" << i
<< " j=" << j
<<
2640 " mpNumericArray=" << pDVR
->GetArrays()[j
].mpNumericArray
<<
2641 " mpStringArray=" << pDVR
->GetArrays()[j
].mpStringArray
<<
2642 " allStringsAreNull=" << (AllStringsAreNull(pDVR
->GetArrays()[j
].mpStringArray
, pDVR
->GetArrayLength())?"YES":"NO") <<
2643 " takeNumeric=" << (pCodeGen
->takeNumeric()?"YES":"NO") <<
2644 " takeString=" << (pCodeGen
->takeString()?"YES":"NO"));
2646 if (pDVR
->GetArrays()[j
].mpNumericArray
||
2647 (pDVR
->GetArrays()[j
].mpNumericArray
== NULL
&&
2648 pDVR
->GetArrays()[j
].mpStringArray
== NULL
))
2650 if (pDVR
->GetArrays()[j
].mpNumericArray
&&
2651 pCodeGen
->takeNumeric() &&
2652 pDVR
->GetArrays()[j
].mpStringArray
&&
2653 pCodeGen
->takeString())
2655 // Function takes numbers or strings, there are both
2656 SAL_INFO("sc.opencl", "Numbers and strings and that is OK");
2657 mvSubArguments
.push_back(
2658 DynamicKernelArgumentRef(
2659 new DynamicKernelMixedSlidingArgument(mCalcConfig
,
2660 ts
, ft
->Children
[i
], mpCodeGen
, j
)));
2664 // Not sure I can figure out what case this exactly is;)
2665 SAL_INFO("sc.opencl", "The other case");
2666 mvSubArguments
.push_back(
2667 DynamicKernelArgumentRef(VectorRefFactory
<VectorRef
>(mCalcConfig
,
2668 ts
, ft
->Children
[i
], mpCodeGen
, j
)));
2673 // Ditto here. This is such crack.
2674 SAL_INFO("sc.opencl", "The outer other case (can't figure out what it exactly means)");
2675 mvSubArguments
.push_back(
2676 DynamicKernelArgumentRef(VectorRefFactory
2677 <DynamicKernelStringArgument
>(mCalcConfig
,
2678 ts
, ft
->Children
[i
], mpCodeGen
, j
)));
2682 else if (pChild
->GetType() == formula::svSingleVectorRef
)
2684 const formula::SingleVectorRefToken
* pSVR
=
2685 static_cast<const formula::SingleVectorRefToken
*>(pChild
);
2687 SAL_INFO("sc.opencl", "i=" << i
<<
2688 " mpNumericArray=" << pSVR
->GetArray().mpNumericArray
<<
2689 " mpStringArray=" << pSVR
->GetArray().mpStringArray
<<
2690 " allStringsAreNull=" << (AllStringsAreNull(pSVR
->GetArray().mpStringArray
, pSVR
->GetArrayLength())?"YES":"NO") <<
2691 " takeNumeric=" << (pCodeGen
->takeNumeric()?"YES":"NO") <<
2692 " takeString=" << (pCodeGen
->takeString()?"YES":"NO"));
2694 if (pSVR
->GetArray().mpNumericArray
&&
2695 pCodeGen
->takeNumeric() &&
2696 pSVR
->GetArray().mpStringArray
&&
2697 pCodeGen
->takeString())
2699 // Function takes numbers or strings, there are both
2700 SAL_INFO("sc.opencl", "Numbers and strings and that is OK");
2701 mvSubArguments
.push_back(
2702 DynamicKernelArgumentRef(new DynamicKernelMixedArgument(mCalcConfig
,
2703 ts
, ft
->Children
[i
])));
2705 else if (pSVR
->GetArray().mpNumericArray
&&
2706 pCodeGen
->takeNumeric() &&
2707 (AllStringsAreNull(pSVR
->GetArray().mpStringArray
, pSVR
->GetArrayLength()) || mCalcConfig
.meStringConversion
== ScCalcConfig::StringConversion::ZERO
))
2709 // Function takes numbers, and either there
2710 // are no strings, or there are strings but
2711 // they are to be treated as zero
2712 SAL_INFO("sc.opencl", "Maybe strings even if want numbers but should be treated as zero");
2713 mvSubArguments
.push_back(
2714 DynamicKernelArgumentRef(new VectorRef(mCalcConfig
, ts
,
2717 else if (pSVR
->GetArray().mpNumericArray
== NULL
&&
2718 pCodeGen
->takeNumeric() &&
2719 pSVR
->GetArray().mpStringArray
&&
2720 mCalcConfig
.meStringConversion
== ScCalcConfig::StringConversion::ZERO
)
2722 // Function takes numbers, and there are only
2723 // strings, but they are to be treated as zero
2724 SAL_INFO("sc.opencl", "Only strings even if want numbers but should be treated as zero");
2725 mvSubArguments
.push_back(
2726 DynamicKernelArgumentRef(new VectorRef(mCalcConfig
, ts
,
2729 else if (pSVR
->GetArray().mpStringArray
&&
2730 pCodeGen
->takeString())
2732 // There are strings, and the function takes strings.
2733 SAL_INFO("sc.opencl", "Strings only");
2734 mvSubArguments
.push_back(
2735 DynamicKernelArgumentRef(new DynamicKernelStringArgument(mCalcConfig
,
2736 ts
, ft
->Children
[i
])));
2738 else if (AllStringsAreNull(pSVR
->GetArray().mpStringArray
, pSVR
->GetArrayLength()) &&
2739 pSVR
->GetArray().mpNumericArray
== NULL
)
2741 // There are only empty cells. Push as an
2743 SAL_INFO("sc.opencl", "Only empty cells");
2744 mvSubArguments
.push_back(
2745 DynamicKernelArgumentRef(new VectorRef(mCalcConfig
, ts
,
2750 SAL_INFO("sc.opencl", "Fallback case, rejecting for OpenCL");
2751 throw UnhandledToken(pChild
,
2752 "Got unhandled case here", __FILE__
, __LINE__
);
2755 else if (pChild
->GetType() == formula::svDouble
)
2757 SAL_INFO("sc.opencl", "Constant number (?) case");
2758 mvSubArguments
.push_back(
2759 DynamicKernelArgumentRef(new DynamicKernelConstantArgument(mCalcConfig
, ts
,
2762 else if (pChild
->GetType() == formula::svString
2763 && pCodeGen
->takeString())
2765 SAL_INFO("sc.opencl", "Constant string (?) case");
2766 mvSubArguments
.push_back(
2767 DynamicKernelArgumentRef(new ConstStringArgument(mCalcConfig
, ts
,
2772 SAL_INFO("sc.opencl", "Fallback case, rejecting for OpenCL");
2773 throw UnhandledToken(pChild
, ("unhandled operand " + StackVarEnumToString(pChild
->GetType()) + " for ocPush").c_str());
2777 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpDiv(nResultSize
), nResultSize
));
2780 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpMul(nResultSize
), nResultSize
));
2783 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpSub(nResultSize
), nResultSize
));
2787 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpSum(nResultSize
), nResultSize
));
2790 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpAverage(nResultSize
), nResultSize
));
2793 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpMin(nResultSize
), nResultSize
));
2796 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpMax(nResultSize
), nResultSize
));
2799 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpCount(nResultSize
), nResultSize
));
2802 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpSumProduct
, nResultSize
));
2805 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpIRR
, nResultSize
));
2808 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpMIRR
, nResultSize
));
2811 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpPMT
, nResultSize
));
2814 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpIntrate
, nResultSize
));
2817 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpRRI
, nResultSize
));
2820 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpPPMT
, nResultSize
));
2823 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpFisher
, nResultSize
));
2826 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpFisherInv
, nResultSize
));
2829 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpGamma
, nResultSize
));
2832 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpSLN
, nResultSize
));
2835 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpGammaLn
, nResultSize
));
2838 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpGauss
, nResultSize
));
2841 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpGeoMean));
2844 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpHarMean
, nResultSize
));
2847 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpLessEqual
, nResultSize
));
2850 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpLess
, nResultSize
));
2853 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpEqual
, nResultSize
));
2856 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpGreater
, nResultSize
));
2859 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpSYD
, nResultSize
));
2862 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpCorrel
, nResultSize
));
2865 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpCos
, nResultSize
));
2867 case ocNegBinomVert
:
2868 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpNegbinomdist
, nResultSize
));
2871 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpPearson
, nResultSize
));
2874 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpRsq
, nResultSize
));
2877 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpCsc
, nResultSize
));
2880 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpISPMT
, nResultSize
));
2883 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2884 ft
->Children
[i
], new OpDuration
, nResultSize
));
2887 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2888 ft
->Children
[i
], new OpSinh
, nResultSize
));
2891 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2892 ft
->Children
[i
], new OpAbs
, nResultSize
));
2895 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2896 ft
->Children
[i
], new OpPV
, nResultSize
));
2899 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2900 ft
->Children
[i
], new OpSin
, nResultSize
));
2903 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2904 ft
->Children
[i
], new OpTan
, nResultSize
));
2907 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2908 ft
->Children
[i
], new OpTanH
, nResultSize
));
2911 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2912 ft
->Children
[i
], new OpStandard
, nResultSize
));
2915 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2916 ft
->Children
[i
], new OpWeibull
, nResultSize
));
2919 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2920 ft->Children[i],new OpMedian));
2923 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2924 ft
->Children
[i
], new OpDDB
, nResultSize
));
2927 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2928 ft
->Children
[i
], new OpFV
, nResultSize
));
2931 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2932 ft
->Children
[i
], new OpSumIfs
, nResultSize
));
2935 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2936 ft->Children[i],new OpVDB));
2939 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2940 ft
->Children
[i
], new OpKurt
, nResultSize
));
2943 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
2944 ft->Children[i], new OpNper));
2947 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2948 ft
->Children
[i
], new OpNormdist
, nResultSize
));
2951 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2952 ft
->Children
[i
], new OpArcCos
, nResultSize
));
2955 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2956 ft
->Children
[i
], new OpSqrt
, nResultSize
));
2959 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2960 ft
->Children
[i
], new OpArcCosHyp
, nResultSize
));
2963 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2964 ft
->Children
[i
], new OpNPV
, nResultSize
));
2967 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2968 ft
->Children
[i
], new OpNormsdist
, nResultSize
));
2971 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2972 ft
->Children
[i
], new OpNorminv
, nResultSize
));
2975 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2976 ft
->Children
[i
], new OpNormsinv
, nResultSize
));
2979 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2980 ft
->Children
[i
], new OpPermut
, nResultSize
));
2982 case ocPermutationA
:
2983 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2984 ft
->Children
[i
], new OpPermutationA
, nResultSize
));
2987 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2988 ft
->Children
[i
], new OpPhi
, nResultSize
));
2991 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2992 ft
->Children
[i
], new OpIPMT
, nResultSize
));
2995 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
2996 ft
->Children
[i
], new OpConfidence
, nResultSize
));
2999 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3000 ft
->Children
[i
], new OpIntercept
, nResultSize
));
3003 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3004 new OpDB
, nResultSize
));
3007 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3008 ft
->Children
[i
], new OpLogInv
, nResultSize
));
3011 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3012 ft
->Children
[i
], new OpArcCot
, nResultSize
));
3015 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3016 ft
->Children
[i
], new OpCosh
, nResultSize
));
3019 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3020 ft
->Children
[i
], new OpCritBinom
, nResultSize
));
3023 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3024 ft
->Children
[i
], new OpArcCotHyp
, nResultSize
));
3027 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3028 ft
->Children
[i
], new OpArcSin
, nResultSize
));
3031 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3032 ft
->Children
[i
], new OpArcSinHyp
, nResultSize
));
3035 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3036 ft
->Children
[i
], new OpArcTan
, nResultSize
));
3039 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3040 ft
->Children
[i
], new OpArcTanH
, nResultSize
));
3043 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3044 ft
->Children
[i
], new OpBitAnd
, nResultSize
));
3047 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3048 ft
->Children
[i
], new OpForecast
, nResultSize
));
3051 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3052 ft
->Children
[i
], new OpLogNormDist
, nResultSize
));
3055 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3056 ft->Children[i], new OpGammaDist));
3059 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3060 ft
->Children
[i
], new OpLn
, nResultSize
));
3063 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3064 ft
->Children
[i
], new OpRound
, nResultSize
));
3067 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3068 ft
->Children
[i
], new OpCot
, nResultSize
));
3071 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3072 ft
->Children
[i
], new OpCoth
, nResultSize
));
3075 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3076 ft
->Children
[i
], new OpFdist
, nResultSize
));
3079 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3080 ft
->Children
[i
], new OpVar
, nResultSize
));
3083 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3084 ft->Children[i],new OpChiDist));
3088 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3089 ft
->Children
[i
], new OpPower
, nResultSize
));
3092 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3093 ft
->Children
[i
], new OpOdd
, nResultSize
));
3096 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3097 ft->Children[i],new OpChiSqDist));
3100 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3101 ft->Children[i],new OpChiSqInv));
3104 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3105 ft->Children[i], new OpGammaInv));
3108 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3109 ft
->Children
[i
], new OpFloor
, nResultSize
));
3112 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3113 ft->Children[i], new OpFInv));
3116 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3117 ft
->Children
[i
], new OpFTest
, nResultSize
));
3120 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3121 ft
->Children
[i
], new OpB
, nResultSize
));
3124 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3125 ft
->Children
[i
], new OpBetaDist
, nResultSize
));
3128 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3129 ft
->Children
[i
], new OpCscH
, nResultSize
));
3132 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3133 ft
->Children
[i
], new OpExp
, nResultSize
));
3136 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3137 ft
->Children
[i
], new OpLog10
, nResultSize
));
3140 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3141 ft
->Children
[i
], new OpExponDist
, nResultSize
));
3144 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3145 ft
->Children
[i
], new OpAverageIfs
, nResultSize
));
3148 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3149 ft
->Children
[i
], new OpCountIfs
, nResultSize
));
3152 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3153 ft
->Children
[i
], new OpCombinA
, nResultSize
));
3156 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3157 ft
->Children
[i
], new OpEven
, nResultSize
));
3160 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3161 ft
->Children
[i
], new OpLog
, nResultSize
));
3164 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3165 ft
->Children
[i
], new OpMod
, nResultSize
));
3168 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3169 ft
->Children
[i
], new OpTrunc
, nResultSize
));
3172 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3173 ft
->Children
[i
], new OpSkew
, nResultSize
));
3176 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3177 ft
->Children
[i
], new OpArcTan2
, nResultSize
));
3180 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3181 ft
->Children
[i
], new OpBitOr
, nResultSize
));
3184 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3185 ft
->Children
[i
], new OpBitLshift
, nResultSize
));
3188 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3189 ft
->Children
[i
], new OpBitRshift
, nResultSize
));
3192 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3193 ft
->Children
[i
], new OpBitXor
, nResultSize
));
3196 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3197 ft->Children[i],new OpChiInv));
3200 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3201 ft
->Children
[i
], new OpPoisson
, nResultSize
));
3204 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3205 ft
->Children
[i
], new OpSumSQ
, nResultSize
));
3208 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3209 ft
->Children
[i
], new OpSkewp
, nResultSize
));
3212 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3213 ft
->Children
[i
], new OpBinomdist
, nResultSize
));
3216 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3217 ft
->Children
[i
], new OpVarP
, nResultSize
));
3220 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3221 ft
->Children
[i
], new OpCeil
, nResultSize
));
3224 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3225 ft
->Children
[i
], new OpCombin
, nResultSize
));
3228 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3229 ft
->Children
[i
], new OpDevSq
, nResultSize
));
3232 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3233 ft
->Children
[i
], new OpStDev
, nResultSize
));
3236 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3237 ft
->Children
[i
], new OpSlope
, nResultSize
));
3240 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3241 ft
->Children
[i
], new OpSTEYX
, nResultSize
));
3244 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3245 ft
->Children
[i
], new OpZTest
, nResultSize
));
3248 mvSubArguments
.push_back(
3249 DynamicKernelArgumentRef(new DynamicKernelPiArgument(mCalcConfig
, ts
,
3253 mvSubArguments
.push_back(
3254 DynamicKernelArgumentRef(new DynamicKernelRandomArgument(mCalcConfig
, ts
,
3258 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3259 ft
->Children
[i
], new OpProduct
, nResultSize
));
3261 /*case ocHypGeomDist:
3262 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3263 ft->Children[i],new OpHypGeomDist));
3266 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3267 ft
->Children
[i
], new OpSumX2MY2
, nResultSize
));
3270 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3271 ft
->Children
[i
], new OpSumX2PY2
, nResultSize
));
3274 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3275 ft->Children[i],new OpBetainv));
3278 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3279 ft
->Children
[i
], new OpTTest
, nResultSize
));
3282 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3283 ft
->Children
[i
], new OpTDist
, nResultSize
));
3286 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3287 ft->Children[i], new OpTInv));
3290 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3291 ft
->Children
[i
], new OpSumXMY2
, nResultSize
));
3294 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3295 ft
->Children
[i
], new OpStDevP
, nResultSize
));
3298 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3299 ft
->Children
[i
], new OpCovar
, nResultSize
));
3302 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3303 ft
->Children
[i
], new OpAnd
, nResultSize
));
3306 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3307 ft
->Children
[i
], new OpVLookup
, nResultSize
));
3310 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3311 ft
->Children
[i
], new OpOr
, nResultSize
));
3314 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3315 ft
->Children
[i
], new OpNot
, nResultSize
));
3318 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3319 ft
->Children
[i
], new OpXor
, nResultSize
));
3322 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3323 ft
->Children
[i
], new OpDmax
, nResultSize
));
3326 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3327 ft
->Children
[i
], new OpDmin
, nResultSize
));
3330 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3331 ft
->Children
[i
], new OpDproduct
, nResultSize
));
3334 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3335 ft
->Children
[i
], new OpDaverage
, nResultSize
));
3338 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3339 ft
->Children
[i
], new OpDstdev
, nResultSize
));
3342 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3343 ft
->Children
[i
], new OpDstdevp
, nResultSize
));
3346 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3347 ft
->Children
[i
], new OpDsum
, nResultSize
));
3350 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3351 ft
->Children
[i
], new OpDvar
, nResultSize
));
3354 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3355 ft
->Children
[i
], new OpDvarp
, nResultSize
));
3358 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3359 ft
->Children
[i
], new OpAverageIf
, nResultSize
));
3362 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3363 ft
->Children
[i
], new OpDcount
, nResultSize
));
3366 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3367 ft
->Children
[i
], new OpDcount2
, nResultSize
));
3370 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3371 ft
->Children
[i
], new OpDeg
, nResultSize
));
3374 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3375 ft
->Children
[i
], new OpRoundUp
, nResultSize
));
3378 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3379 ft
->Children
[i
], new OpRoundDown
, nResultSize
));
3382 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3383 ft
->Children
[i
], new OpInt
, nResultSize
));
3386 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3387 ft
->Children
[i
], new OpRadians
, nResultSize
));
3390 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3391 ft
->Children
[i
], new OpCountIf
, nResultSize
));
3394 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3395 ft
->Children
[i
], new OpIsEven
, nResultSize
));
3398 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3399 ft
->Children
[i
], new OpIsOdd
, nResultSize
));
3402 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3403 ft
->Children
[i
], new OpFact
, nResultSize
));
3406 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3407 ft
->Children
[i
], new OpMinA
, nResultSize
));
3410 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3411 ft
->Children
[i
], new OpCountA
, nResultSize
));
3414 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3415 ft
->Children
[i
], new OpMaxA
, nResultSize
));
3418 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3419 ft
->Children
[i
], new OpAverageA
, nResultSize
));
3422 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3423 ft
->Children
[i
], new OpVarA
, nResultSize
));
3426 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3427 ft
->Children
[i
], new OpVarPA
, nResultSize
));
3430 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3431 ft
->Children
[i
], new OpStDevA
, nResultSize
));
3434 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3435 ft
->Children
[i
], new OpStDevPA
, nResultSize
));
3438 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3439 ft
->Children
[i
], new OpSec
, nResultSize
));
3442 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3443 ft
->Children
[i
], new OpSecH
, nResultSize
));
3446 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3447 ft
->Children
[i
], new OpSumIf
, nResultSize
));
3450 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3451 ft
->Children
[i
], new OpNegSub
, nResultSize
));
3454 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3455 ft
->Children
[i
], new OpAveDev
, nResultSize
));
3458 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3459 ft
->Children
[i
], new OpIf
, nResultSize
));
3462 if (!(pChild
->GetExternal().compareTo(OUString(
3463 "com.sun.star.sheet.addin.Analysis.getEffect"))))
3465 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpEffective
, nResultSize
));
3467 else if (!(pChild
->GetExternal().compareTo(OUString(
3468 "com.sun.star.sheet.addin.Analysis.getCumipmt"))))
3470 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpCumipmt
, nResultSize
));
3472 else if (!(pChild
->GetExternal().compareTo(OUString(
3473 "com.sun.star.sheet.addin.Analysis.getNominal"))))
3475 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpNominal
, nResultSize
));
3477 else if (!(pChild
->GetExternal().compareTo(OUString(
3478 "com.sun.star.sheet.addin.Analysis.getCumprinc"))))
3480 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpCumprinc
, nResultSize
));
3482 else if (!(pChild
->GetExternal().compareTo(OUString(
3483 "com.sun.star.sheet.addin.Analysis.getXnpv"))))
3485 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpXNPV
, nResultSize
));
3487 else if (!(pChild
->GetExternal().compareTo(OUString(
3488 "com.sun.star.sheet.addin.Analysis.getPricemat"))))
3490 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpPriceMat
, nResultSize
));
3492 else if (!(pChild
->GetExternal().compareTo(OUString(
3493 "com.sun.star.sheet.addin.Analysis.getReceived"))))
3495 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpReceived
, nResultSize
));
3497 else if (!(pChild
->GetExternal().compareTo(OUString(
3498 "com.sun.star.sheet.addin.Analysis.getTbilleq"))))
3500 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpTbilleq
, nResultSize
));
3502 else if (!(pChild
->GetExternal().compareTo(OUString(
3503 "com.sun.star.sheet.addin.Analysis.getTbillprice"))))
3505 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpTbillprice
, nResultSize
));
3507 else if (!(pChild
->GetExternal().compareTo(OUString(
3508 "com.sun.star.sheet.addin.Analysis.getTbillyield"))))
3510 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpTbillyield
, nResultSize
));
3512 else if (!(pChild
->GetExternal().compareTo(OUString(
3513 "com.sun.star.sheet.addin.Analysis.getFvschedule"))))
3515 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpFvschedule
, nResultSize
));
3517 /*else if ( !(pChild->GetExternal().compareTo(OUString(
3518 "com.sun.star.sheet.addin.Analysis.getYield"))))
3520 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], new OpYield));
3522 else if (!(pChild
->GetExternal().compareTo(OUString(
3523 "com.sun.star.sheet.addin.Analysis.getYielddisc"))))
3525 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpYielddisc
, nResultSize
));
3527 else if (!(pChild
->GetExternal().compareTo(OUString(
3528 "com.sun.star.sheet.addin.Analysis.getYieldmat"))))
3530 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpYieldmat
, nResultSize
));
3532 else if (!(pChild
->GetExternal().compareTo(OUString(
3533 "com.sun.star.sheet.addin.Analysis.getAccrintm"))))
3535 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpAccrintm
, nResultSize
));
3537 else if (!(pChild
->GetExternal().compareTo(OUString(
3538 "com.sun.star.sheet.addin.Analysis.getCoupdaybs"))))
3540 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpCoupdaybs
, nResultSize
));
3542 else if (!(pChild
->GetExternal().compareTo(OUString(
3543 "com.sun.star.sheet.addin.Analysis.getDollarde"))))
3545 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpDollarde
, nResultSize
));
3547 else if (!(pChild
->GetExternal().compareTo(OUString(
3548 "com.sun.star.sheet.addin.Analysis.getDollarfr"))))
3550 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpDollarfr
, nResultSize
));
3552 else if (!(pChild
->GetExternal().compareTo(OUString(
3553 "com.sun.star.sheet.addin.Analysis.getCoupdays"))))
3555 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpCoupdays
, nResultSize
));
3557 else if (!(pChild
->GetExternal().compareTo(OUString(
3558 "com.sun.star.sheet.addin.Analysis.getCoupdaysnc"))))
3560 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpCoupdaysnc
, nResultSize
));
3562 else if (!(pChild
->GetExternal().compareTo(OUString(
3563 "com.sun.star.sheet.addin.Analysis.getDisc"))))
3565 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpDISC
, nResultSize
));
3567 else if (!(pChild
->GetExternal().compareTo(OUString(
3568 "com.sun.star.sheet.addin.Analysis.getIntrate"))))
3570 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
], new OpINTRATE
, nResultSize
));
3572 else if (!(pChild
->GetExternal().compareTo(OUString(
3573 "com.sun.star.sheet.addin.Analysis.getPrice"))))
3575 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3576 ft
->Children
[i
], new OpPrice
, nResultSize
));
3578 else if (!(pChild
->GetExternal().compareTo(OUString(
3579 "com.sun.star.sheet.addin.Analysis.getCoupnum"))))
3581 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3582 new OpCoupnum
, nResultSize
));
3584 /*else if ( !(pChild->GetExternal().compareTo(OUString(
3585 "com.sun.star.sheet.addin.Analysis.getDuration"))))
3587 mvSubArguments.push_back(
3588 SoPHelper(mCalcConfig, ts, ft->Children[i], new OpDuration_ADD));
3590 else if (!(pChild
->GetExternal().compareTo(OUString(
3591 "com.sun.star.sheet.addin.Analysis.getAmordegrc"))))
3593 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3594 new OpAmordegrc
, nResultSize
));
3596 else if (!(pChild
->GetExternal().compareTo(OUString(
3597 "com.sun.star.sheet.addin.Analysis.getAmorlinc"))))
3599 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3600 new OpAmorlinc
, nResultSize
));
3602 else if (!(pChild
->GetExternal().compareTo(OUString(
3603 "com.sun.star.sheet.addin.Analysis.getMduration"))))
3605 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3606 new OpMDuration
, nResultSize
));
3608 else if (!(pChild
->GetExternal().compareTo(OUString(
3609 "com.sun.star.sheet.addin.Analysis.getXirr"))))
3611 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3612 new OpXirr
, nResultSize
));
3614 else if (!(pChild
->GetExternal().compareTo(OUString(
3615 "com.sun.star.sheet.addin.Analysis.getOddlprice"))))
3617 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3618 ft
->Children
[i
], new OpOddlprice
, nResultSize
));
3620 else if (!(pChild
->GetExternal().compareTo(OUString(
3621 "com.sun.star.sheet.addin.Analysis.getOddlyield"))))
3623 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3624 new OpOddlyield
, nResultSize
));
3626 else if (!(pChild
->GetExternal().compareTo(OUString(
3627 "com.sun.star.sheet.addin.Analysis.getPricedisc"))))
3629 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
,
3630 ft
->Children
[i
], new OpPriceDisc
, nResultSize
));
3632 else if (!(pChild
->GetExternal().compareTo(OUString(
3633 "com.sun.star.sheet.addin.Analysis.getCouppcd"))))
3635 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3636 new OpCouppcd
, nResultSize
));
3638 else if (!(pChild
->GetExternal().compareTo(OUString(
3639 "com.sun.star.sheet.addin.Analysis.getCoupncd"))))
3641 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3642 new OpCoupncd
, nResultSize
));
3644 else if (!(pChild
->GetExternal().compareTo(OUString(
3645 "com.sun.star.sheet.addin.Analysis.getAccrint"))))
3647 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3648 new OpAccrint
, nResultSize
));
3650 else if (!(pChild
->GetExternal().compareTo(OUString(
3651 "com.sun.star.sheet.addin.Analysis.getSqrtpi"))))
3653 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3654 new OpSqrtPi
, nResultSize
));
3656 else if (!(pChild
->GetExternal().compareTo(OUString(
3657 "com.sun.star.sheet.addin.Analysis.getConvert"))))
3659 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3660 new OpConvert
, nResultSize
));
3662 else if (!(pChild
->GetExternal().compareTo(OUString(
3663 "com.sun.star.sheet.addin.Analysis.getIseven"))))
3665 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3666 new OpIsEven
, nResultSize
));
3668 else if (!(pChild
->GetExternal().compareTo(OUString(
3669 "com.sun.star.sheet.addin.Analysis.getIsodd"))))
3671 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3672 new OpIsOdd
, nResultSize
));
3674 else if (!(pChild
->GetExternal().compareTo(OUString(
3675 "com.sun.star.sheet.addin.Analysis.getMround"))))
3677 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3678 new OpMROUND
, nResultSize
));
3680 else if (!(pChild
->GetExternal().compareTo(OUString(
3681 "com.sun.star.sheet.addin.Analysis.getQuotient"))))
3683 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3684 new OpQuotient
, nResultSize
));
3686 else if (!(pChild
->GetExternal().compareTo(OUString(
3687 "com.sun.star.sheet.addin.Analysis.getSeriessum"))))
3689 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3690 new OpSeriesSum
, nResultSize
));
3692 else if (!(pChild
->GetExternal().compareTo(OUString(
3693 "com.sun.star.sheet.addin.Analysis.getBesselj"))))
3695 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3696 new OpBesselj
, nResultSize
));
3698 else if (!(pChild
->GetExternal().compareTo(OUString(
3699 "com.sun.star.sheet.addin.Analysis.getGestep"))))
3701 mvSubArguments
.push_back(SoPHelper(mCalcConfig
, ts
, ft
->Children
[i
],
3702 new OpGestep
, nResultSize
));
3705 throw UnhandledToken(pChild
, "unhandled opcode");
3709 throw UnhandledToken(pChild
, "unhandled opcode");
3714 class DynamicKernel
: public CompiledFormula
3717 DynamicKernel( const ScCalcConfig
& config
, const FormulaTreeNodeRef
& r
, int nResultSize
);
3718 virtual ~DynamicKernel();
3720 static DynamicKernel
* create( const ScCalcConfig
& config
, ScTokenArray
& rCode
, int nResultSize
);
3722 /// OpenCL code generation
3725 /// Produce kernel hash
3726 std::string
GetMD5();
3728 /// Create program, build, and create kerenl
3729 /// TODO cache results based on kernel body hash
3730 /// TODO: abstract OpenCL part out into OpenCL wrapper.
3731 void CreateKernel();
3733 /// Prepare buffers, marshal them to GPU, and launch the kernel
3734 /// TODO: abstract OpenCL part out into OpenCL wrapper.
3735 void Launch( size_t nr
);
3737 cl_mem
GetResultBuffer() const { return mpResClmem
; }
3740 ScCalcConfig mCalcConfig
;
3741 FormulaTreeNodeRef mpRoot
;
3743 std::string mKernelSignature
, mKernelHash
;
3744 std::string mFullProgramSrc
;
3745 cl_program mpProgram
;
3747 cl_mem mpResClmem
; // Results
3748 std::set
<std::string
> inlineDecl
;
3749 std::set
<std::string
> inlineFun
;
3754 DynamicKernel::DynamicKernel( const ScCalcConfig
& config
, const FormulaTreeNodeRef
& r
, int nResultSize
) :
3755 mCalcConfig(config
),
3760 mnResultSize(nResultSize
) {}
3762 DynamicKernel::~DynamicKernel()
3767 err
= clReleaseMemObject(mpResClmem
);
3768 SAL_WARN_IF(err
!= CL_SUCCESS
, "sc.opencl", "clReleaseMemObject failed: " << ::opencl::errorString(err
));
3772 SAL_INFO("sc.opencl", "Releasing kernel " << mpKernel
);
3773 err
= clReleaseKernel(mpKernel
);
3774 SAL_WARN_IF(err
!= CL_SUCCESS
, "sc.opencl", "clReleaseKernel failed: " << ::opencl::errorString(err
));
3776 // mpProgram is not going to be released here -- it's cached.
3779 void DynamicKernel::CodeGen()
3781 // Travese the tree of expression and declare symbols used
3782 const DynamicKernelArgument
* DK
= mSyms
.DeclRefArg
<DynamicKernelSoPArguments
>(mCalcConfig
, mpRoot
, new OpNop(mnResultSize
), mnResultSize
);
3784 std::stringstream decl
;
3785 if (::opencl::gpuEnv
.mnKhrFp64Flag
)
3787 decl
<< "#if __OPENCL_VERSION__ < 120\n";
3788 decl
<< "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
3791 else if (::opencl::gpuEnv
.mnAmdFp64Flag
)
3793 decl
<< "#pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
3797 DK
->DumpInlineFun(inlineDecl
, inlineFun
);
3798 for (std::set
<std::string
>::iterator set_iter
= inlineDecl
.begin();
3799 set_iter
!= inlineDecl
.end(); ++set_iter
)
3804 for (std::set
<std::string
>::iterator set_iter
= inlineFun
.begin();
3805 set_iter
!= inlineFun
.end(); ++set_iter
)
3809 mSyms
.DumpSlidingWindowFunctions(decl
);
3810 mKernelSignature
= DK
->DumpOpName();
3811 decl
<< "__kernel void DynamicKernel" << mKernelSignature
;
3812 decl
<< "(__global double *result, ";
3813 DK
->GenSlidingWindowDecl(decl
);
3814 decl
<< ") {\n\tint gid0 = get_global_id(0);\n\tresult[gid0] = " <<
3815 DK
->GenSlidingWindowDeclRef(false) << ";\n}\n";
3816 mFullProgramSrc
= decl
.str();
3817 #ifdef SAL_DETAIL_ENABLE_LOG_INFO
3818 std::stringstream area
;
3819 if (mKernelSignature
[0] == '_')
3820 area
<< "sc.opencl.source." << mKernelSignature
.substr(1, std::string::npos
);
3822 area
<< "sc.opencl.source." << mKernelSignature
;
3823 SAL_INFO(area
.str().c_str(), "Program to be compiled:\n" << linenumberify(mFullProgramSrc
));
3827 std::string
DynamicKernel::GetMD5()
3829 if (mKernelHash
.empty())
3831 std::stringstream md5s
;
3832 // Compute MD5SUM of kernel body to obtain the name
3833 sal_uInt8 result
[RTL_DIGEST_LENGTH_MD5
];
3835 mFullProgramSrc
.c_str(),
3836 mFullProgramSrc
.length(), result
,
3837 RTL_DIGEST_LENGTH_MD5
);
3838 for (int i
= 0; i
< RTL_DIGEST_LENGTH_MD5
; i
++)
3840 md5s
<< std::hex
<< (int)result
[i
];
3842 mKernelHash
= md5s
.str();
3848 void DynamicKernel::CreateKernel()
3855 std::string kname
= "DynamicKernel" + mKernelSignature
;
3856 // Compile kernel here!!!
3857 // Obtain cl context
3858 ::opencl::KernelEnv kEnv
;
3859 ::opencl::setKernelEnv(&kEnv
);
3860 const char* src
= mFullProgramSrc
.c_str();
3861 static std::string lastOneKernelHash
= "";
3862 static std::string lastSecondKernelHash
= "";
3863 static cl_program lastOneProgram
= NULL
;
3864 static cl_program lastSecondProgram
= NULL
;
3865 std::string KernelHash
= mKernelSignature
+ GetMD5();
3866 if (lastOneKernelHash
== KernelHash
&& lastOneProgram
)
3868 mpProgram
= lastOneProgram
;
3870 else if (lastSecondKernelHash
== KernelHash
&& lastSecondProgram
)
3872 mpProgram
= lastSecondProgram
;
3875 { // doesn't match the last compiled formula.
3877 if (lastSecondProgram
)
3879 SAL_INFO("sc.opencl", "Releasing program " << lastSecondProgram
);
3880 err
= clReleaseProgram(lastSecondProgram
);
3881 SAL_WARN_IF(err
!= CL_SUCCESS
, "sc.opencl", "clReleaseProgram failed: " << ::opencl::errorString(err
));
3882 lastSecondProgram
= NULL
;
3884 if (::opencl::buildProgramFromBinary("",
3885 &::opencl::gpuEnv
, KernelHash
.c_str(), 0))
3887 mpProgram
= ::opencl::gpuEnv
.mpArryPrograms
[0];
3888 ::opencl::gpuEnv
.mpArryPrograms
[0] = NULL
;
3892 mpProgram
= clCreateProgramWithSource(kEnv
.mpkContext
, 1,
3894 if (err
!= CL_SUCCESS
)
3895 throw OpenCLError("clCreateProgramWithSource", err
, __FILE__
, __LINE__
);
3896 SAL_INFO("sc.opencl", "Created program " << mpProgram
);
3898 err
= clBuildProgram(mpProgram
, 1,
3899 ::opencl::gpuEnv
.mpArryDevsID
, "", NULL
, NULL
);
3900 if (err
!= CL_SUCCESS
)
3902 #if OSL_DEBUG_LEVEL > 0
3903 if (err
== CL_BUILD_PROGRAM_FAILURE
)
3905 cl_build_status stat
;
3906 cl_int e
= clGetProgramBuildInfo(
3907 mpProgram
, ::opencl::gpuEnv
.mpArryDevsID
[0],
3908 CL_PROGRAM_BUILD_STATUS
, sizeof(cl_build_status
),
3911 e
!= CL_SUCCESS
, "sc.opencl",
3912 "after CL_BUILD_PROGRAM_FAILURE,"
3913 " clGetProgramBuildInfo(CL_PROGRAM_BUILD_STATUS)"
3914 " fails with " << ::opencl::errorString(e
));
3915 if (e
== CL_SUCCESS
)
3918 e
= clGetProgramBuildInfo(
3919 mpProgram
, ::opencl::gpuEnv
.mpArryDevsID
[0],
3920 CL_PROGRAM_BUILD_LOG
, 0, 0, &n
);
3922 e
!= CL_SUCCESS
|| n
== 0, "sc.opencl",
3923 "after CL_BUILD_PROGRAM_FAILURE,"
3924 " clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG)"
3925 " fails with " << ::opencl::errorString(e
) << ", n=" << n
);
3926 if (e
== CL_SUCCESS
&& n
!= 0)
3928 std::vector
<char> log(n
);
3929 e
= clGetProgramBuildInfo(
3930 mpProgram
, ::opencl::gpuEnv
.mpArryDevsID
[0],
3931 CL_PROGRAM_BUILD_LOG
, n
, &log
[0], 0);
3933 e
!= CL_SUCCESS
|| n
== 0, "sc.opencl",
3934 "after CL_BUILD_PROGRAM_FAILURE,"
3935 " clGetProgramBuildInfo("
3936 "CL_PROGRAM_BUILD_LOG) fails with " << ::opencl::errorString(e
));
3937 if (e
== CL_SUCCESS
)
3940 "CL_BUILD_PROGRAM_FAILURE, status " << stat
3941 << ", log \"" << &log
[0] << "\"");
3946 throw OpenCLError("clBuildProgram", err
, __FILE__
, __LINE__
);
3948 SAL_INFO("sc.opencl", "Built program " << mpProgram
);
3950 // Generate binary out of compiled kernel.
3951 ::opencl::generatBinFromKernelSource(mpProgram
,
3952 (mKernelSignature
+ GetMD5()).c_str());
3954 lastSecondKernelHash
= lastOneKernelHash
;
3955 lastSecondProgram
= lastOneProgram
;
3956 lastOneKernelHash
= KernelHash
;
3957 lastOneProgram
= mpProgram
;
3959 mpKernel
= clCreateKernel(mpProgram
, kname
.c_str(), &err
);
3960 if (err
!= CL_SUCCESS
)
3961 throw OpenCLError("clCreateKernel", err
, __FILE__
, __LINE__
);
3962 SAL_INFO("sc.opencl", "Created kernel " << mpKernel
<< " with name " << kname
<< " in program " << mpProgram
);
3965 void DynamicKernel::Launch( size_t nr
)
3967 // Obtain cl context
3968 ::opencl::KernelEnv kEnv
;
3969 ::opencl::setKernelEnv(&kEnv
);
3972 mpResClmem
= clCreateBuffer(kEnv
.mpkContext
,
3973 (cl_mem_flags
)CL_MEM_READ_WRITE
| CL_MEM_ALLOC_HOST_PTR
,
3974 nr
* sizeof(double), NULL
, &err
);
3975 if (CL_SUCCESS
!= err
)
3976 throw OpenCLError("clCreateBuffer", err
, __FILE__
, __LINE__
);
3977 SAL_INFO("sc.opencl", "Created buffer " << mpResClmem
<< " size " << nr
<< "*" << sizeof(double) << "=" << (nr
*sizeof(double)));
3979 SAL_INFO("sc.opencl", "Kernel " << mpKernel
<< " arg " << 0 << ": cl_mem: " << mpResClmem
);
3980 err
= clSetKernelArg(mpKernel
, 0, sizeof(cl_mem
), (void*)&mpResClmem
);
3981 if (CL_SUCCESS
!= err
)
3982 throw OpenCLError("clSetKernelArg", err
, __FILE__
, __LINE__
);
3983 // The rest of buffers
3984 mSyms
.Marshal(mpKernel
, nr
, mpProgram
);
3985 size_t global_work_size
[] = { nr
};
3986 SAL_INFO("sc.opencl", "Enqueing kernel " << mpKernel
);
3987 err
= clEnqueueNDRangeKernel(kEnv
.mpkCmdQueue
, mpKernel
, 1, NULL
,
3988 global_work_size
, NULL
, 0, NULL
, NULL
);
3989 if (CL_SUCCESS
!= err
)
3990 throw OpenCLError("clEnqueueNDRangeKernel", err
, __FILE__
, __LINE__
);
3991 err
= clFlush(kEnv
.mpkCmdQueue
);
3992 if (CL_SUCCESS
!= err
)
3993 throw OpenCLError("clFlush", err
, __FILE__
, __LINE__
);
3996 // Symbol lookup. If there is no such symbol created, allocate one
3997 // kernel with argument with unique name and return so.
3998 // The template argument T must be a subclass of DynamicKernelArgument
3999 template<typename T
>
4000 const DynamicKernelArgument
* SymbolTable::DeclRefArg( const ScCalcConfig
& config
,
4001 FormulaTreeNodeRef t
, SlidingFunctionBase
* pCodeGen
, int nResultSize
)
4003 FormulaToken
* ref
= t
->GetFormulaToken();
4004 ArgumentMap::iterator it
= mSymbols
.find(ref
);
4005 if (it
== mSymbols
.end())
4007 // Allocate new symbols
4008 std::stringstream ss
;
4009 ss
<< "tmp" << mCurId
++;
4010 DynamicKernelArgumentRef
new_arg(new T(config
, ss
.str(), t
, pCodeGen
, nResultSize
));
4011 mSymbols
[ref
] = new_arg
;
4012 mParams
.push_back(new_arg
);
4013 return new_arg
.get();
4017 return it
->second
.get();
4021 FormulaGroupInterpreterOpenCL::FormulaGroupInterpreterOpenCL() :
4022 FormulaGroupInterpreter() {}
4024 FormulaGroupInterpreterOpenCL::~FormulaGroupInterpreterOpenCL() {}
4026 ScMatrixRef
FormulaGroupInterpreterOpenCL::inverseMatrix( const ScMatrix
& )
4031 DynamicKernel
* DynamicKernel::create( const ScCalcConfig
& rConfig
, ScTokenArray
& rCode
, int nResultSize
)
4033 // Constructing "AST"
4034 FormulaTokenIterator
aCode(rCode
);
4035 std::list
<FormulaToken
*> aTokenList
;
4036 std::map
<FormulaToken
*, FormulaTreeNodeRef
> aHashMap
;
4038 while ((pCur
= const_cast<FormulaToken
*>(aCode
.Next())) != NULL
)
4040 OpCode eOp
= pCur
->GetOpCode();
4043 FormulaTreeNodeRef
pCurNode(new FormulaTreeNode(pCur
));
4044 sal_uInt8 nParamCount
= pCur
->GetParamCount();
4045 for (sal_uInt8 i
= 0; i
< nParamCount
; i
++)
4047 FormulaToken
* pTempFormula
= aTokenList
.back();
4048 aTokenList
.pop_back();
4049 if (pTempFormula
->GetOpCode() != ocPush
)
4051 if (aHashMap
.find(pTempFormula
) == aHashMap
.end())
4053 pCurNode
->Children
.push_back(aHashMap
[pTempFormula
]);
4057 FormulaTreeNodeRef pChildTreeNode
=
4059 new FormulaTreeNode(pTempFormula
));
4060 pCurNode
->Children
.push_back(pChildTreeNode
);
4063 std::reverse(pCurNode
->Children
.begin(), pCurNode
->Children
.end());
4064 aHashMap
[pCur
] = pCurNode
;
4066 aTokenList
.push_back(pCur
);
4069 FormulaTreeNodeRef Root
= FormulaTreeNodeRef(new FormulaTreeNode(NULL
));
4070 Root
->Children
.push_back(aHashMap
[aTokenList
.back()]);
4072 DynamicKernel
* pDynamicKernel
= new DynamicKernel(rConfig
, Root
, nResultSize
);
4074 // OpenCL source code generation and kernel compilation
4077 pDynamicKernel
->CodeGen();
4078 pDynamicKernel
->CreateKernel();
4080 catch (const UnhandledToken
& ut
)
4082 SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled token: " << ut
.mMessage
<< " at " << ut
.mFile
<< ":" << ut
.mLineNumber
);
4083 delete pDynamicKernel
;
4088 SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled compiler error");
4091 return pDynamicKernel
;
4094 CompiledFormula
* FormulaGroupInterpreterOpenCL::createCompiledFormula(
4095 ScFormulaCellGroup
& rGroup
, ScTokenArray
& rCode
)
4097 return DynamicKernel::create(maCalcConfig
, rCode
, rGroup
.mnLength
);
4102 class CLInterpreterResult
4104 DynamicKernel
* mpKernel
;
4106 SCROW mnGroupLength
;
4112 CLInterpreterResult() : mpKernel(NULL
), mnGroupLength(0), mpCLResBuf(NULL
), mpResBuf(NULL
) {}
4113 CLInterpreterResult( DynamicKernel
* pKernel
, SCROW nGroupLength
) :
4114 mpKernel(pKernel
), mnGroupLength(nGroupLength
), mpCLResBuf(NULL
), mpResBuf(NULL
) {}
4116 bool isValid() const { return mpKernel
!= NULL
; }
4118 void fetchResultFromKernel()
4124 mpCLResBuf
= mpKernel
->GetResultBuffer();
4126 // Obtain cl context
4127 ::opencl::KernelEnv kEnv
;
4128 ::opencl::setKernelEnv(&kEnv
);
4131 mpResBuf
= static_cast<double*>(clEnqueueMapBuffer(kEnv
.mpkCmdQueue
,
4133 CL_TRUE
, CL_MAP_READ
, 0,
4134 mnGroupLength
* sizeof(double), 0, NULL
, NULL
,
4137 if (err
!= CL_SUCCESS
)
4139 SAL_WARN("sc.opencl", "clEnqueueMapBuffer failed:: " << ::opencl::errorString(err
));
4145 bool pushResultToDocument( ScDocument
& rDoc
, const ScAddress
& rTopPos
)
4150 rDoc
.SetFormulaResults(rTopPos
, mpResBuf
, mnGroupLength
);
4152 // Obtain cl context
4153 ::opencl::KernelEnv kEnv
;
4154 ::opencl::setKernelEnv(&kEnv
);
4157 err
= clEnqueueUnmapMemObject(kEnv
.mpkCmdQueue
, mpCLResBuf
, mpResBuf
, 0, NULL
, NULL
);
4159 if (err
!= CL_SUCCESS
)
4161 SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << ::opencl::errorString(err
));
4169 class CLInterpreterContext
4171 std::shared_ptr
<DynamicKernel
> mpKernelStore
; /// for managed kernel instance.
4172 DynamicKernel
* mpKernel
;
4174 SCROW mnGroupLength
;
4177 CLInterpreterContext( SCROW nGroupLength
) :
4178 mpKernel(NULL
), mnGroupLength(nGroupLength
) {}
4180 bool isValid() const
4182 return mpKernel
!= NULL
;
4185 void setManagedKernel( DynamicKernel
* pKernel
)
4187 mpKernelStore
.reset(pKernel
);
4191 #if ENABLE_THREADED_OPENCL_KERNEL_COMPILATION
4192 void setUnmanagedKernel( DynamicKernel
* pKernel
)
4198 CLInterpreterResult
launchKernel()
4201 return CLInterpreterResult();
4206 mpKernel
->Launch(mnGroupLength
);
4208 catch (const UnhandledToken
& ut
)
4210 SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled token: " << ut
.mMessage
<< " at " << ut
.mFile
<< ":" << ut
.mLineNumber
);
4211 return CLInterpreterResult();
4213 catch (const OpenCLError
& oce
)
4215 SAL_WARN("sc.opencl", "Dynamic formula compiler: OpenCL error from " << oce
.mFunction
<< ": " << ::opencl::errorString(oce
.mError
) << " at " << oce
.mFile
<< ":" << oce
.mLineNumber
);
4216 return CLInterpreterResult();
4218 catch (const Unhandled
& uh
)
4220 SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled case at " << uh
.mFile
<< ":" << uh
.mLineNumber
);
4221 return CLInterpreterResult();
4225 SAL_WARN("sc.opencl", "Dynamic formula compiler: unhandled compiler error");
4226 return CLInterpreterResult();
4229 return CLInterpreterResult(mpKernel
, mnGroupLength
);
4234 CLInterpreterContext
createCLInterpreterContext( const ScCalcConfig
& rConfig
,
4235 ScFormulaCellGroupRef
& xGroup
, ScTokenArray
& rCode
)
4237 CLInterpreterContext
aCxt(xGroup
->mnLength
);
4239 #if ENABLE_THREADED_OPENCL_KERNEL_COMPILATION
4240 if (rGroup
.meKernelState
== sc::OpenCLKernelCompilationScheduled
||
4241 rGroup
.meKernelState
== sc::OpenCLKernelBinaryCreated
)
4243 if (rGroup
.meKernelState
== sc::OpenCLKernelCompilationScheduled
)
4245 ScFormulaCellGroup::sxCompilationThread
->maCompilationDoneCondition
.wait();
4246 ScFormulaCellGroup::sxCompilationThread
->maCompilationDoneCondition
.reset();
4249 // Kernel instance is managed by the formula group.
4250 aCxt
.setUnmanagedKernel(static_cast<DynamicKernel
*>(xGroup
->mpCompiledFormula
));
4254 assert(xGroup
->meCalcState
== sc::GroupCalcRunning
);
4255 aCxt
.setManagedKernel(static_cast<DynamicKernel
*>(DynamicKernel::create(rConfig
, rCode
, xGroup
->mnLength
)));
4258 aCxt
.setManagedKernel(static_cast<DynamicKernel
*>(DynamicKernel::create(rConfig
, rCode
, xGroup
->mnLength
)));
4264 void genRPNTokens( ScDocument
& rDoc
, const ScAddress
& rTopPos
, ScTokenArray
& rCode
)
4266 ScCompiler
aComp(&rDoc
, rTopPos
, rCode
);
4267 aComp
.SetGrammar(rDoc
.GetGrammar());
4268 // Disable special ordering for jump commands for the OpenCL interpreter.
4269 aComp
.EnableJumpCommandReorder(false);
4270 aComp
.CompileTokenArray(); // Regenerate RPN tokens.
4273 bool waitForResults()
4275 // Obtain cl context
4276 ::opencl::KernelEnv kEnv
;
4277 ::opencl::setKernelEnv(&kEnv
);
4279 cl_int err
= clFinish(kEnv
.mpkCmdQueue
);
4280 if (err
!= CL_SUCCESS
)
4281 SAL_WARN("sc.opencl", "clFinish failed: " << ::opencl::errorString(err
));
4283 return err
== CL_SUCCESS
;
4288 bool FormulaGroupInterpreterOpenCL::interpret( ScDocument
& rDoc
,
4289 const ScAddress
& rTopPos
, ScFormulaCellGroupRef
& xGroup
,
4290 ScTokenArray
& rCode
)
4292 MergeCalcConfig(rDoc
);
4294 genRPNTokens(rDoc
, rTopPos
, rCode
);
4296 CLInterpreterContext aCxt
= createCLInterpreterContext(maCalcConfig
, xGroup
, rCode
);
4297 if (!aCxt
.isValid())
4300 CLInterpreterResult aRes
= aCxt
.launchKernel();
4301 if (!aRes
.isValid())
4304 if (!waitForResults())
4307 aRes
.fetchResultFromKernel();
4309 return aRes
.pushResultToDocument(rDoc
, rTopPos
);
4312 }} // namespace sc::opencl
4314 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */