Stop leaking all ScPostIt instances.
[LibreOffice.git] / sc / source / core / opencl / formulagroupcl.cxx
blob35ba880d2618918de7562b61673c1f548b2a3e0b
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include "formulagroup.hxx"
11 #include "clkernelthread.hxx"
12 #include "grouptokenconverter.hxx"
13 #include "document.hxx"
14 #include "formulacell.hxx"
15 #include "tokenarray.hxx"
16 #include "compiler.hxx"
17 #include "interpre.hxx"
18 #include "formula/vectortoken.hxx"
19 #include "scmatrix.hxx"
21 #include "openclwrapper.hxx"
23 #include "op_financial.hxx"
24 #include "op_database.hxx"
25 #include "op_math.hxx"
26 #include "op_logical.hxx"
27 #include "op_statistical.hxx"
28 #include "op_array.hxx"
29 #include "op_spreadsheet.hxx"
30 #include "op_addin.hxx"
31 /// CONFIGURATIONS
32 // Comment out this to turn off FMIN and FMAX intrinsics
33 #define USE_FMIN_FMAX 1
34 #define REDUCE_THRESHOLD 4 // set to 4 for correctness testing. priority 1
35 #define UNROLLING_FACTOR 16 // set to 4 for correctness testing (if no reduce)
36 #include "formulagroupcl_public.hxx"
37 #ifdef WIN32
38 #ifndef NAN
39 namespace {
40 static const unsigned long __nan[2] = {0xffffffff, 0x7fffffff};
42 #define NAN (*(const double*) __nan)
43 #endif
44 #endif
46 #include <list>
47 #include <map>
48 #include <iostream>
49 #include <sstream>
50 #include <algorithm>
51 #define MD5_KERNEL 1
52 #ifdef MD5_KERNEL
53 #include <rtl/digest.h>
54 #endif
55 #include <memory>
57 #include <boost/scoped_ptr.hpp>
59 #undef NO_FALLBACK_TO_SWINTERP /* undef this for non-TDD runs */
61 using namespace formula;
63 namespace sc { namespace opencl {
66 /// Map the buffer used by an argument and do necessary argument setting
67 size_t VectorRef::Marshal(cl_kernel k, int argno, int, cl_program)
69 FormulaToken *ref = mFormulaTree->GetFormulaToken();
70 double *pHostBuffer = NULL;
71 size_t szHostBuffer = 0;
72 if (ref->GetType() == formula::svSingleVectorRef) {
73 const formula::SingleVectorRefToken* pSVR =
74 dynamic_cast< const formula::SingleVectorRefToken* >(ref);
75 assert(pSVR);
76 pHostBuffer = const_cast<double*>(pSVR->GetArray().mpNumericArray);
77 szHostBuffer = pSVR->GetArrayLength() * sizeof(double);
78 #if 0
79 std::cerr << "Marshal a Single vector of size " << pSVR->GetArrayLength();
80 std::cerr << " at argument "<< argno << "\n";
81 #endif
82 } else if (ref->GetType() == formula::svDoubleVectorRef) {
83 const formula::DoubleVectorRefToken* pDVR =
84 dynamic_cast< const formula::DoubleVectorRefToken* >(ref);
85 assert(pDVR);
86 pHostBuffer = const_cast<double*>(
87 pDVR->GetArrays()[mnIndex].mpNumericArray);
88 szHostBuffer = pDVR->GetArrayLength() * sizeof(double);
89 } else {
90 throw Unhandled();
92 // Obtain cl context
93 KernelEnv kEnv;
94 OpenclDevice::setKernelEnv(&kEnv);
95 cl_int err;
96 if (pHostBuffer)
98 mpClmem = clCreateBuffer(kEnv.mpkContext,
99 (cl_mem_flags) CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR,
100 szHostBuffer,
101 pHostBuffer, &err);
102 if (CL_SUCCESS != err)
103 throw OpenCLError(err);
105 else
107 if (szHostBuffer == 0)
108 szHostBuffer = sizeof(double); // a dummy small value
109 // Marshal as a buffer of NANs
110 mpClmem = clCreateBuffer(kEnv.mpkContext,
111 (cl_mem_flags) CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR,
112 szHostBuffer, NULL, &err);
113 if (CL_SUCCESS != err)
114 throw OpenCLError(err);
115 double *pNanBuffer = (double*)clEnqueueMapBuffer(
116 kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
117 szHostBuffer, 0, NULL, NULL, &err);
118 if (CL_SUCCESS != err)
119 throw OpenCLError(err);
120 for (size_t i = 0; i < szHostBuffer/sizeof(double); i++)
121 pNanBuffer[i] = NAN;
122 err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
123 pNanBuffer, 0, NULL, NULL);
126 err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem);
127 if (CL_SUCCESS != err)
128 throw OpenCLError(err);
129 return 1;
132 /// Arguments that are actually compile-time constant string
133 /// Currently, only the hash is passed.
134 /// TBD(IJSUNG): pass also length and the actual string if there is a
135 /// hash function collision
136 class ConstStringArgument: public DynamicKernelArgument
138 public:
139 ConstStringArgument(const std::string &s,
140 FormulaTreeNodeRef ft):
141 DynamicKernelArgument(s, ft) {}
142 /// Generate declaration
143 virtual void GenDecl(std::stringstream &ss) const
145 ss << "unsigned " << mSymName;
147 virtual void GenDeclRef(std::stringstream &ss) const
149 ss << GenSlidingWindowDeclRef(false);
151 virtual void GenSlidingWindowDecl(std::stringstream &ss) const
153 GenDecl(ss);
155 virtual std::string GenSlidingWindowDeclRef(bool=false) const
157 std::stringstream ss;
158 if (GetFormulaToken()->GetType() != formula::svString)
159 throw Unhandled();
160 FormulaToken *Tok = GetFormulaToken();
161 ss << Tok->GetString().getString().toAsciiUpperCase().hashCode() << "U";
162 return ss.str();
164 virtual size_t GetWindowSize(void) const
166 return 1;
168 /// Pass the 32-bit hash of the string to the kernel
169 virtual size_t Marshal(cl_kernel k, int argno, int, cl_program)
171 FormulaToken *ref = mFormulaTree->GetFormulaToken();
172 cl_uint hashCode = 0;
173 if (ref->GetType() == formula::svString)
175 const rtl::OUString s = ref->GetString().getString().toAsciiUpperCase();
176 hashCode = s.hashCode();
177 } else {
178 throw Unhandled();
180 // marshaling
181 // Obtain cl context
182 KernelEnv kEnv;
183 OpenclDevice::setKernelEnv(&kEnv);
184 // Pass the scalar result back to the rest of the formula kernel
185 cl_int err = clSetKernelArg(k, argno, sizeof(cl_uint), (void*)&hashCode);
186 if (CL_SUCCESS != err)
187 throw OpenCLError(err);
188 return 1;
192 /// Arguments that are actually compile-time constants
193 class DynamicKernelConstantArgument: public DynamicKernelArgument
195 public:
196 DynamicKernelConstantArgument(const std::string &s,
197 FormulaTreeNodeRef ft):
198 DynamicKernelArgument(s, ft) {}
199 /// Generate declaration
200 virtual void GenDecl(std::stringstream &ss) const
202 ss << "double " << mSymName;
204 virtual void GenDeclRef(std::stringstream &ss) const
206 ss << mSymName;
208 virtual void GenSlidingWindowDecl(std::stringstream &ss) const
210 GenDecl(ss);
212 virtual std::string GenSlidingWindowDeclRef(bool=false) const
214 if (GetFormulaToken()->GetType() != formula::svDouble)
215 throw Unhandled();
216 return mSymName;
218 virtual size_t GetWindowSize(void) const
220 return 1;
222 double GetDouble(void) const
224 FormulaToken *Tok = GetFormulaToken();
225 if (Tok->GetType() != formula::svDouble)
226 throw Unhandled();
227 return Tok->GetDouble();
229 /// Create buffer and pass the buffer to a given kernel
230 virtual size_t Marshal(cl_kernel k, int argno, int, cl_program)
232 double tmp = GetDouble();
233 // Pass the scalar result back to the rest of the formula kernel
234 cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp);
235 if (CL_SUCCESS != err)
236 throw OpenCLError(err);
237 return 1;
239 virtual cl_mem GetCLBuffer(void) const { return NULL; }
242 class DynamicKernelPiArgument: public DynamicKernelArgument
244 public:
245 DynamicKernelPiArgument(const std::string &s,
246 FormulaTreeNodeRef ft):
247 DynamicKernelArgument(s, ft) {}
248 /// Generate declaration
249 virtual void GenDecl(std::stringstream &ss) const
251 ss << "double " << mSymName;
253 virtual void GenDeclRef(std::stringstream &ss) const
255 ss << "3.14159265358979";
257 virtual void GenSlidingWindowDecl(std::stringstream &ss) const
259 GenDecl(ss);
261 virtual std::string GenSlidingWindowDeclRef(bool=false) const
263 return mSymName;
265 virtual size_t GetWindowSize(void) const
267 return 1;
269 /// Create buffer and pass the buffer to a given kernel
270 virtual size_t Marshal(cl_kernel k, int argno, int, cl_program)
272 double tmp = 0.0;
273 // Pass the scalar result back to the rest of the formula kernel
274 cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp);
275 if (CL_SUCCESS != err)
276 throw OpenCLError(err);
277 return 1;
281 class DynamicKernelRandomArgument: public DynamicKernelArgument
283 public:
284 DynamicKernelRandomArgument(const std::string &s,
285 FormulaTreeNodeRef ft):
286 DynamicKernelArgument(s, ft) {}
287 /// Generate declaration
288 virtual void GenDecl(std::stringstream &ss) const
290 ss << "double " << mSymName;
292 virtual void GenDeclRef(std::stringstream &ss) const
294 ss << mSymName;
296 virtual void GenSlidingWindowDecl(std::stringstream &ss) const
298 GenDecl(ss);
300 virtual std::string GenSlidingWindowDeclRef(bool=false) const
302 return mSymName + "_Random()";
304 void GenSlidingWindowFunction(std::stringstream &ss)
306 ss << "\ndouble " << mSymName;
307 ss << "_Random ()\n{\n";
308 ss << " int i, gid0=get_global_id(0);;\n";
309 ss << " double tmp = 0;\n";
310 ss << " double M = 2147483647;\n";
311 ss << " double Lamda = 32719;\n";
312 ss << " double f;\n";
313 ss << " f = gid0 + 1;\n";
314 ss << " int k;\n";
315 ss << " for(i = 1;i <= 100; ++i){\n";
316 ss << " f = Lamda * f;\n";
317 ss << " k = (int)(f * pow(M,-1.0));\n";
318 ss << " f = f - M * k;\n";
319 ss << " }\n";
320 ss << " tmp = f * pow(M,-1.0);\n";
321 ss << " return tmp;\n";
322 ss << "}";
324 virtual size_t GetWindowSize(void) const
326 return 1;
328 /// Create buffer and pass the buffer to a given kernel
329 virtual size_t Marshal(cl_kernel k, int argno, int, cl_program)
331 double tmp = 0.0;
332 // Pass the scalar result back to the rest of the formula kernel
333 cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp);
334 if (CL_SUCCESS != err)
335 throw OpenCLError(err);
336 return 1;
340 /// A vector of strings
341 class DynamicKernelStringArgument: public VectorRef
343 public:
344 DynamicKernelStringArgument(const std::string &s,
345 FormulaTreeNodeRef ft, int index = 0):
346 VectorRef(s, ft, index) {}
348 virtual void GenSlidingWindowFunction(std::stringstream &) {}
349 /// Generate declaration
350 virtual void GenDecl(std::stringstream &ss) const
352 ss << "__global unsigned int *"<<mSymName;
354 virtual void GenSlidingWindowDecl(std::stringstream& ss) const
356 DynamicKernelStringArgument::GenDecl(ss);
358 virtual size_t Marshal(cl_kernel, int, int, cl_program);
361 /// Marshal a string vector reference
362 size_t DynamicKernelStringArgument::Marshal(cl_kernel k, int argno, int, cl_program)
364 FormulaToken *ref = mFormulaTree->GetFormulaToken();
365 // Obtain cl context
366 KernelEnv kEnv;
367 OpenclDevice::setKernelEnv(&kEnv);
368 cl_int err;
369 formula::VectorRefArray vRef;
370 size_t nStrings = 0;
371 if (ref->GetType() == formula::svSingleVectorRef) {
372 const formula::SingleVectorRefToken* pSVR =
373 dynamic_cast< const formula::SingleVectorRefToken* >(ref);
374 assert(pSVR);
375 nStrings = pSVR->GetArrayLength();
376 vRef = pSVR->GetArray();
377 } else if (ref->GetType() == formula::svDoubleVectorRef) {
378 const formula::DoubleVectorRefToken* pDVR =
379 dynamic_cast< const formula::DoubleVectorRefToken* >(ref);
380 assert(pDVR);
381 nStrings = pDVR->GetArrayLength();
382 vRef = pDVR->GetArrays()[mnIndex];
384 size_t szHostBuffer = nStrings * sizeof(cl_int);
385 // Marshal strings. Right now we pass hashes of these string
386 mpClmem = clCreateBuffer(kEnv.mpkContext,
387 (cl_mem_flags) CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR,
388 szHostBuffer, NULL, &err);
389 if (CL_SUCCESS != err)
390 throw OpenCLError(err);
391 cl_uint *pHashBuffer = (cl_uint*)clEnqueueMapBuffer(
392 kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
393 szHostBuffer, 0, NULL, NULL, &err);
394 if (CL_SUCCESS != err)
395 throw OpenCLError(err);
396 for (size_t i = 0; i < nStrings; i++)
398 if (vRef.mpStringArray[i])
400 const OUString tmp = OUString(vRef.mpStringArray[i]);
401 pHashBuffer[i] = tmp.hashCode();
403 else
405 pHashBuffer[i] = 0;
408 err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
409 pHashBuffer, 0, NULL, NULL);
410 if (CL_SUCCESS != err)
411 throw OpenCLError(err);
413 err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem);
414 if (CL_SUCCESS != err)
415 throw OpenCLError(err);
416 return 1;
419 /// A mixed string/numberic vector
420 class DynamicKernelMixedArgument: public VectorRef
422 public:
423 DynamicKernelMixedArgument(const std::string &s,
424 FormulaTreeNodeRef ft):
425 VectorRef(s, ft), mStringArgument(s+"s", ft) {}
426 virtual void GenSlidingWindowDecl(std::stringstream& ss) const
428 VectorRef::GenSlidingWindowDecl(ss);
429 ss << ", ";
430 mStringArgument.GenSlidingWindowDecl(ss);
432 virtual void GenSlidingWindowFunction(std::stringstream &) {}
433 /// Generate declaration
434 virtual void GenDecl(std::stringstream &ss) const
436 VectorRef::GenDecl(ss);
437 ss << ", ";
438 mStringArgument.GenDecl(ss);
440 virtual void GenDeclRef(std::stringstream &ss) const
442 VectorRef::GenDeclRef(ss);
443 ss << ",";
444 mStringArgument.GenDeclRef(ss);
446 virtual std::string GenSlidingWindowDeclRef(bool) const
448 std::stringstream ss;
449 ss << "(!isNan(" << VectorRef::GenSlidingWindowDeclRef();
450 ss << ")?" << VectorRef::GenSlidingWindowDeclRef();
451 ss << ":" << mStringArgument.GenSlidingWindowDeclRef();
452 ss << ")";
453 return ss.str();
455 virtual std::string GenDoubleSlidingWindowDeclRef(bool=false) const
457 std::stringstream ss;
458 ss << VectorRef::GenSlidingWindowDeclRef();
459 return ss.str();
461 virtual std::string GenStringSlidingWindowDeclRef(bool=false) const
463 std::stringstream ss;
464 ss << mStringArgument.GenSlidingWindowDeclRef();
465 return ss.str();
467 virtual size_t Marshal(cl_kernel k, int argno, int vw, cl_program p)
469 int i = VectorRef::Marshal(k, argno, vw, p);
470 i += mStringArgument.Marshal(k, argno+i, vw, p);
471 return i;
473 protected:
474 DynamicKernelStringArgument mStringArgument;
477 /// Handling a Double Vector that is used as a sliding window input
478 /// to either a sliding window average or sum-of-products
479 /// Generate a sequential loop for reductions
480 class OpSum; // Forward Declaration
481 class OpAverage; // Forward Declaration
482 class OpMin; // Forward Declaration
483 class OpMax; // Forward Declaration
484 class OpCount; // Forward Declaration
485 template<class Base>
486 class DynamicKernelSlidingArgument: public Base
488 public:
489 DynamicKernelSlidingArgument(const std::string &s,
490 FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen,
491 int index=0):
492 Base(s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL)
494 FormulaToken *t = ft->GetFormulaToken();
495 if (t->GetType() != formula::svDoubleVectorRef)
496 throw Unhandled();
497 mpDVR = dynamic_cast<const formula::DoubleVectorRefToken *>(t);
498 assert(mpDVR);
499 bIsStartFixed = mpDVR->IsStartFixed();
500 bIsEndFixed = mpDVR->IsEndFixed();
502 // Should only be called by SumIfs. Yikes!
503 virtual bool NeedParallelReduction(void) const
505 assert(dynamic_cast<OpSumIfs*>(mpCodeGen.get()));
506 return GetWindowSize()> 100 &&
507 ( (GetStartFixed() && GetEndFixed()) ||
508 (!GetStartFixed() && !GetEndFixed()) ) ;
510 virtual void GenSlidingWindowFunction(std::stringstream &) {}
512 virtual std::string GenSlidingWindowDeclRef(bool nested=false) const
514 size_t nArrayLength = mpDVR->GetArrayLength();
515 std::stringstream ss;
516 if (!bIsStartFixed && !bIsEndFixed)
518 if (nested)
519 ss << "((i+gid0) <" << nArrayLength <<"?";
520 ss << Base::GetName() << "[i + gid0]";
521 if (nested)
522 ss << ":NAN)";
524 else
526 if (nested)
527 ss << "(i <" << nArrayLength <<"?";
528 ss << Base::GetName() << "[i]";
529 if (nested)
530 ss << ":NAN)";
532 return ss.str();
534 /// Controls how the elements in the DoubleVectorRef are traversed
535 virtual size_t GenReductionLoopHeader(
536 std::stringstream &ss, bool &needBody)
538 assert(mpDVR);
539 size_t nCurWindowSize = mpDVR->GetRefRowSize();
540 // original for loop
541 #ifndef UNROLLING_FACTOR
542 needBody = true;
543 // No need to generate a for-loop for degenerated cases
544 if (nCurWindowSize == 1)
546 ss << "if (gid0 <" << mpDVR->GetArrayLength();
547 ss << ")\n\t{\tint i = 0;\n\t\t";
548 return nCurWindowSize;
551 ss << "for (int i = ";
552 if (!bIsStartFixed && bIsEndFixed)
554 #ifdef ISNAN
555 ss << "gid0; i < " << mpDVR->GetArrayLength();
556 ss << " && i < " << nCurWindowSize << "; i++){\n\t\t";
557 #else
558 ss << "gid0; i < "<< nCurWindowSize << "; i++)\n\t\t";
559 #endif
561 else if (bIsStartFixed && !bIsEndFixed)
563 #ifdef ISNAN
564 ss << "0; i < " << mpDVR->GetArrayLength();
565 ss << " && i < gid0+"<< nCurWindowSize << "; i++){\n\t\t";
566 #else
567 ss << "0; i < gid0+"<< nCurWindowSize << "; i++)\n\t\t";
568 #endif
570 else if (!bIsStartFixed && !bIsEndFixed)
572 #ifdef ISNAN
573 ss << "0; i + gid0 < " << mpDVR->GetArrayLength();
574 ss << " && i < "<< nCurWindowSize << "; i++){\n\t\t";
575 #else
576 ss << "0; i < "<< nCurWindowSize << "; i++)\n\t\t";
577 #endif
579 else
581 unsigned limit =
582 std::min(mpDVR->GetArrayLength(), nCurWindowSize);
583 ss << "0; i < "<< limit << "; i++){\n\t\t";
585 return nCurWindowSize;
586 #endif
588 #ifdef UNROLLING_FACTOR
590 if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) {
591 ss << "for (int i = ";
592 ss << "gid0; i < " << mpDVR->GetArrayLength();
593 ss << " && i < " << nCurWindowSize << "; i++){\n\t\t";
594 needBody = true;
595 return nCurWindowSize;
596 } else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) {
597 ss << "for (int i = ";
598 ss << "0; i < " << mpDVR->GetArrayLength();
599 ss << " && i < gid0+"<< nCurWindowSize << "; i++){\n\t\t";
600 needBody = true;
601 return nCurWindowSize;
602 } else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()){
603 ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
604 ss << "{int i;\n\t";
605 std::stringstream temp1,temp2;
606 int outLoopSize = UNROLLING_FACTOR;
607 if ( nCurWindowSize/outLoopSize != 0){
608 ss << "for(int outLoop=0; outLoop<" << nCurWindowSize/outLoopSize<< "; outLoop++){\n\t";
609 for(int count=0; count < outLoopSize; count++){
610 ss << "i = outLoop*"<<outLoopSize<<"+"<<count<<";\n\t";
611 if(count==0){
612 temp1 << "if(i + gid0 < " <<mpDVR->GetArrayLength();
613 temp1 << "){\n\t\t";
614 temp1 << "tmp = legalize(";
615 temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
616 temp1 << ", tmp);\n\t\t\t";
617 temp1 << "}\n\t";
619 ss << temp1.str();
621 ss << "}\n\t";
623 // The residual of mod outLoopSize
624 for(unsigned int count=nCurWindowSize/outLoopSize*outLoopSize; count < nCurWindowSize; count++){
625 ss << "i = "<<count<<";\n\t";
626 if(count==nCurWindowSize/outLoopSize*outLoopSize){
627 temp2 << "if(i + gid0 < " << mpDVR->GetArrayLength();
628 temp2 << "){\n\t\t";
629 temp2 << "tmp = legalize(";
630 temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
631 temp2 << ", tmp);\n\t\t\t";
632 temp2 << "}\n\t";
634 ss << temp2.str();
636 ss << "} // to scope the int i declaration\n";
637 needBody = false;
638 return nCurWindowSize;
640 // (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
641 else {
642 ss << "//else situation \n\t";
643 ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
644 ss << "{int i;\n\t";
645 std::stringstream temp1,temp2;
646 int outLoopSize = UNROLLING_FACTOR;
647 if (nCurWindowSize/outLoopSize != 0){
648 ss << "for(int outLoop=0; outLoop<" << nCurWindowSize/outLoopSize<< "; outLoop++){\n\t";
649 for(int count=0; count < outLoopSize; count++){
650 ss << "i = outLoop*"<<outLoopSize<<"+"<<count<<";\n\t";
651 if(count==0){
652 temp1 << "tmp = legalize(";
653 temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
654 temp1 << ", tmp);\n\t\t\t";
656 ss << temp1.str();
658 ss << "}\n\t";
660 // The residual of mod outLoopSize
661 for(unsigned int count=nCurWindowSize/outLoopSize*outLoopSize; count < nCurWindowSize; count++){
662 ss << "i = "<<count<<";\n\t";
663 if(count==nCurWindowSize/outLoopSize*outLoopSize){
664 temp2 << "tmp = legalize(";
665 temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
666 temp2 << ", tmp);\n\t\t\t";
668 ss << temp2.str();
670 ss << "} // to scope the int i declaration\n";
671 needBody = false;
672 return nCurWindowSize;
675 #endif
677 ~DynamicKernelSlidingArgument()
679 if (mpClmem2)
681 clReleaseMemObject(mpClmem2);
682 mpClmem2 = NULL;
686 size_t GetArrayLength(void) const {return mpDVR->GetArrayLength(); }
688 size_t GetWindowSize(void) const {return mpDVR->GetRefRowSize(); }
690 size_t GetStartFixed(void) const {return bIsStartFixed; }
692 size_t GetEndFixed(void) const {return bIsEndFixed; }
694 protected:
695 bool bIsStartFixed, bIsEndFixed;
696 const formula::DoubleVectorRefToken *mpDVR;
697 // from parent nodes
698 boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
699 // controls whether to invoke the reduction kernel during marshaling or not
700 cl_mem mpClmem2;
703 /// A mixed string/numberic vector
704 class DynamicKernelMixedSlidingArgument : public VectorRef
706 public:
707 DynamicKernelMixedSlidingArgument(const std::string &s,
708 FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen,
709 int index = 0):
710 VectorRef(s, ft),
711 mDoubleArgument(s, ft, CodeGen, index),
712 mStringArgument(s+"s", ft, CodeGen, index) {}
713 virtual void GenSlidingWindowDecl(std::stringstream& ss) const
715 mDoubleArgument.GenSlidingWindowDecl(ss);
716 ss << ", ";
717 mStringArgument.GenSlidingWindowDecl(ss);
719 virtual void GenSlidingWindowFunction(std::stringstream &) {}
720 /// Generate declaration
721 virtual void GenDecl(std::stringstream &ss) const
723 mDoubleArgument.GenDecl(ss);
724 ss << ", ";
725 mStringArgument.GenDecl(ss);
727 virtual void GenDeclRef(std::stringstream &ss) const
729 mDoubleArgument.GenDeclRef(ss);
730 ss << ",";
731 mStringArgument.GenDeclRef(ss);
733 virtual std::string GenSlidingWindowDeclRef(bool) const
735 std::stringstream ss;
736 ss << "(!isNan(" << mDoubleArgument.GenSlidingWindowDeclRef();
737 ss << ")?" << mDoubleArgument.GenSlidingWindowDeclRef();
738 ss << ":" << mStringArgument.GenSlidingWindowDeclRef();
739 ss << ")";
740 return ss.str();
742 virtual std::string GenDoubleSlidingWindowDeclRef(bool=false) const
744 std::stringstream ss;
745 ss << mDoubleArgument.GenSlidingWindowDeclRef();
746 return ss.str();
748 virtual std::string GenStringSlidingWindowDeclRef(bool=false) const
750 std::stringstream ss;
751 ss << mStringArgument.GenSlidingWindowDeclRef();
752 return ss.str();
754 virtual size_t Marshal(cl_kernel k, int argno, int vw, cl_program p)
756 int i = mDoubleArgument.Marshal(k, argno, vw, p);
757 i += mStringArgument.Marshal(k, argno + i, vw, p);
758 return i;
760 protected:
761 DynamicKernelSlidingArgument<VectorRef> mDoubleArgument;
762 DynamicKernelSlidingArgument<DynamicKernelStringArgument> mStringArgument;
765 /// Handling a Double Vector that is used as a sliding window input
766 /// Performs parallel reduction based on given operator
767 template<class Base>
768 class ParallelReductionVectorRef: public Base
770 public:
771 ParallelReductionVectorRef(const std::string &s,
772 FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen,
773 int index=0):
774 Base(s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL)
776 FormulaToken *t = ft->GetFormulaToken();
777 if (t->GetType() != formula::svDoubleVectorRef)
778 throw Unhandled();
779 mpDVR = dynamic_cast<const formula::DoubleVectorRefToken *>(t);
780 assert(mpDVR);
781 bIsStartFixed = mpDVR->IsStartFixed();
782 bIsEndFixed = mpDVR->IsEndFixed();
784 /// Emit the definition for the auxiliary reduction kernel
785 virtual void GenSlidingWindowFunction(std::stringstream &ss) {
786 std::string name = Base::GetName();
787 ss << "__kernel void "<<name;
788 ss << "_reduction(__global double* A, "
789 "__global double *result,int arrayLength,int windowSize){\n";
790 ss << " double tmp, current_result =" <<
791 mpCodeGen->GetBottom();
792 ss << ";\n";
793 ss << " int writePos = get_group_id(1);\n";
794 ss << " int lidx = get_local_id(0);\n";
795 ss << " __local double shm_buf[256];\n";
796 if (mpDVR->IsStartFixed())
797 ss << " int offset = 0;\n";
798 else // if (!mpDVR->IsStartFixed())
799 ss << " int offset = get_group_id(1);\n";
800 if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
801 ss << " int end = windowSize;\n";
802 else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
803 ss << " int end = offset + windowSize;\n";
804 else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
805 ss << " int end = windowSize + get_group_id(1);\n";
806 else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
807 ss << " int end = windowSize;\n";
808 ss << " end = min(end, arrayLength);\n";
810 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
811 ss << " int loop = arrayLength/512 + 1;\n";
812 ss << " for (int l=0; l<loop; l++){\n";
813 ss << " tmp = "<< mpCodeGen->GetBottom() << ";\n";
814 ss << " int loopOffset = l*512;\n";
815 ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
816 ss << " tmp = legalize(" << mpCodeGen->Gen2(
817 "A[loopOffset + lidx + offset]", "tmp") <<", tmp);\n";
818 ss << " tmp = legalize(" << mpCodeGen->Gen2(
819 "A[loopOffset + lidx + offset + 256]", "tmp") <<", tmp);\n";
820 ss << " } else if ((loopOffset + lidx + offset) < end)\n";
821 ss << " tmp = legalize(" << mpCodeGen->Gen2(
822 "A[loopOffset + lidx + offset]", "tmp") <<", tmp);\n";
823 ss << " shm_buf[lidx] = tmp;\n";
824 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
825 ss << " for (int i = 128; i >0; i/=2) {\n";
826 ss << " if (lidx < i)\n";
827 ss << " shm_buf[lidx] = ";
828 // Special case count
829 if (dynamic_cast<OpCount*>(mpCodeGen.get()))
830 ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
831 else
832 ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]")<<";\n";
833 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
834 ss << " }\n";
835 ss << " if (lidx == 0)\n";
836 ss << " current_result =";
837 if (dynamic_cast<OpCount*>(mpCodeGen.get()))
838 ss << "current_result + shm_buf[0]";
839 else
840 ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
841 ss << ";\n";
842 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
843 ss << " }\n";
844 ss << " if (lidx == 0)\n";
845 ss << " result[writePos] = current_result;\n";
846 ss << "}\n";
850 virtual std::string GenSlidingWindowDeclRef(bool=false) const
852 std::stringstream ss;
853 if (!bIsStartFixed && !bIsEndFixed)
854 ss << Base::GetName() << "[i + gid0]";
855 else
856 ss << Base::GetName() << "[i]";
857 return ss.str();
859 /// Controls how the elements in the DoubleVectorRef are traversed
860 virtual size_t GenReductionLoopHeader(
861 std::stringstream &ss, bool &needBody)
863 assert(mpDVR);
864 size_t nCurWindowSize = mpDVR->GetRefRowSize();
865 std::string temp = Base::GetName() + "[gid0]";
866 ss << "tmp = ";
867 // Special case count
868 if (dynamic_cast<OpCount*>(mpCodeGen.get()))
869 ss << temp << "+ tmp";
870 else
871 ss << mpCodeGen->Gen2(temp, "tmp");
872 ss << ";\n\t";
873 needBody = false;
874 return nCurWindowSize;
877 virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram)
879 assert(Base::mpClmem == NULL);
880 // Obtain cl context
881 KernelEnv kEnv;
882 OpenclDevice::setKernelEnv(&kEnv);
883 cl_int err;
884 size_t nInput = mpDVR->GetArrayLength();
885 size_t nCurWindowSize = mpDVR->GetRefRowSize();
886 // create clmem buffer
887 if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == NULL)
888 throw Unhandled();
889 double *pHostBuffer = const_cast<double*>(
890 mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
891 size_t szHostBuffer = nInput * sizeof(double);
892 Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
893 (cl_mem_flags) CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR,
894 szHostBuffer,
895 pHostBuffer, &err);
896 mpClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_WRITE_ONLY,
897 sizeof(double)*w, NULL, NULL);
898 if (CL_SUCCESS != err)
899 throw OpenCLError(err);
900 // reproduce the reduction function name
901 std::string kernelName = Base::GetName() + "_reduction";
903 cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
904 if (err != CL_SUCCESS)
905 throw OpenCLError(err);
906 // set kernel arg of reduction kernel
907 // TODO(Wei Wei): use unique name for kernel
908 cl_mem buf = Base::GetCLBuffer();
909 err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
910 (void *)&buf);
911 if (CL_SUCCESS != err)
912 throw OpenCLError(err);
914 err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), (void *)&mpClmem2);
915 if (CL_SUCCESS != err)
916 throw OpenCLError(err);
918 err = clSetKernelArg(redKernel, 2, sizeof(cl_int), (void*)&nInput);
919 if (CL_SUCCESS != err)
920 throw OpenCLError(err);
922 err = clSetKernelArg(redKernel, 3, sizeof(cl_int), (void*)&nCurWindowSize);
923 if (CL_SUCCESS != err)
924 throw OpenCLError(err);
926 // set work group size and execute
927 size_t global_work_size[] = {256, (size_t)w };
928 size_t local_work_size[] = {256, 1};
929 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
930 global_work_size, local_work_size, 0, NULL, NULL);
931 if (CL_SUCCESS != err)
932 throw OpenCLError(err);
933 err = clFinish(kEnv.mpkCmdQueue);
934 if (CL_SUCCESS != err)
935 throw OpenCLError(err);
937 // set kernel arg
938 err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&(mpClmem2));
939 if (CL_SUCCESS != err)
940 throw OpenCLError(err);
941 return 1;
943 ~ParallelReductionVectorRef()
945 if (mpClmem2)
947 clReleaseMemObject(mpClmem2);
948 mpClmem2 = NULL;
952 size_t GetArrayLength(void) const {return mpDVR->GetArrayLength(); }
954 size_t GetWindowSize(void) const {return mpDVR->GetRefRowSize(); }
956 size_t GetStartFixed(void) const {return bIsStartFixed; }
958 size_t GetEndFixed(void) const {return bIsEndFixed; }
960 protected:
961 bool bIsStartFixed, bIsEndFixed;
962 const formula::DoubleVectorRefToken *mpDVR;
963 // from parent nodes
964 boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
965 // controls whether to invoke the reduction kernel during marshaling or not
966 cl_mem mpClmem2;
969 class Reduction: public SlidingFunctionBase
971 public:
972 typedef DynamicKernelSlidingArgument<VectorRef> NumericRange;
973 typedef DynamicKernelSlidingArgument<DynamicKernelStringArgument> StringRange;
974 typedef ParallelReductionVectorRef<VectorRef> ParallelNumericRange;
976 virtual void GenSlidingWindowFunction(std::stringstream &ss,
977 const std::string sSymName, SubArguments &vSubArguments)
979 ss << "\ndouble " << sSymName;
980 ss << "_"<< BinFuncName() <<"(";
981 for (unsigned i = 0; i < vSubArguments.size(); i++)
983 if (i)
984 ss << ", ";
985 vSubArguments[i]->GenSlidingWindowDecl(ss);
987 ss << ") {\n\t";
988 ss << "double tmp = " << GetBottom() <<";\n\t";
989 ss << "int gid0 = get_global_id(0);\n\t";
990 if (isAverage())
991 ss << "int nCount = 0;\n\t";
992 ss << "double tmpBottom;\n\t";
993 unsigned i = vSubArguments.size();
994 size_t nItems = 0;
995 while (i--)
997 if (NumericRange *NR =
998 dynamic_cast<NumericRange *> (vSubArguments[i].get()))
1000 bool needBody;
1001 nItems += NR->GenReductionLoopHeader(ss, needBody);
1002 if (needBody == false) continue;
1004 else if (ParallelNumericRange *PNR =
1005 dynamic_cast<ParallelNumericRange *> (vSubArguments[i].get()))
1007 //did not handle yet
1008 bool needBody;
1009 nItems += PNR->GenReductionLoopHeader(ss, needBody);
1010 if (needBody == false) continue;
1012 else if (StringRange *SR =
1013 dynamic_cast<StringRange *> (vSubArguments[i].get()))
1015 //did not handle yet
1016 bool needBody;
1017 nItems += SR->GenReductionLoopHeader(ss, needBody);
1018 if (needBody == false) continue;
1020 else
1022 FormulaToken *pCur = vSubArguments[i]->GetFormulaToken();
1023 assert(pCur);
1024 assert(pCur->GetType() != formula::svDoubleVectorRef);
1026 if (pCur->GetType() == formula::svSingleVectorRef)
1028 #ifdef ISNAN
1029 const formula::SingleVectorRefToken* pSVR =
1030 dynamic_cast< const formula::SingleVectorRefToken* >(pCur);
1031 ss << "if (gid0 < " << pSVR->GetArrayLength() << "){\n\t\t";
1032 #else
1033 nItems += 1;
1034 #endif
1036 else if (pCur->GetType() == formula::svDouble)
1038 #ifdef ISNAN
1039 ss << "{\n\t\t";
1040 #endif
1041 nItems += 1;
1043 else
1045 nItems += 1;
1048 #ifdef ISNAN
1049 if(ocPush==vSubArguments[i]->GetFormulaToken()->GetOpCode())
1051 ss << "tmpBottom = " << GetBottom() << ";\n\t\t";
1052 ss << "if (isNan(";
1053 ss << vSubArguments[i]->GenSlidingWindowDeclRef();
1054 ss << "))\n\t\t\t";
1055 ss << "tmp = ";
1056 ss << Gen2("tmpBottom", "tmp") << ";\n\t\t";
1057 ss << "else{\n\t\t\t";
1058 ss << "tmp = ";
1059 ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
1060 ss << ";\n\t\t\t";
1061 ss << "}\n\t";
1062 ss << "}\n\t";
1064 else
1066 ss << "tmp = ";
1067 ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
1068 ss << ";\n\t";
1070 #else
1071 ss << "tmp = ";
1072 // Generate the operation in binary form
1073 ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
1074 ss << ";\n\t";
1075 #endif
1077 ss << "return tmp";
1078 #ifdef ISNAN
1079 if (isAverage())
1080 ss << "/(double)nCount";
1081 #else
1082 if (isAverage())
1083 ss << "/(double)"<<nItems;
1084 #endif
1085 ss << ";\n}";
1087 virtual bool isAverage() const { return false; }
1088 virtual bool takeString() const { return false; }
1089 virtual bool takeNumeric() const { return true; }
1092 // Strictly binary operators
1093 class Binary: public SlidingFunctionBase
1095 public:
1096 virtual void GenSlidingWindowFunction(std::stringstream &ss,
1097 const std::string sSymName, SubArguments &vSubArguments)
1099 ss << "\ndouble " << sSymName;
1100 ss << "_"<< BinFuncName() <<"(";
1101 assert(vSubArguments.size() == 2);
1102 for (unsigned i = 0; i < vSubArguments.size(); i++)
1104 if (i)
1105 ss << ", ";
1106 vSubArguments[i]->GenSlidingWindowDecl(ss);
1108 ss << ") {\n\t";
1109 ss << "int gid0 = get_global_id(0), i = 0;\n\t";
1110 ss << "double tmp = ";
1111 ss << Gen2(vSubArguments[0]->GenSlidingWindowDeclRef(false),
1112 vSubArguments[1]->GenSlidingWindowDeclRef(false)) << ";\n\t";
1113 ss << "return tmp;\n}";
1115 virtual bool takeString() const { return true; }
1116 virtual bool takeNumeric() const { return true; }
1119 class SumOfProduct: public SlidingFunctionBase
1121 public:
1122 virtual void GenSlidingWindowFunction(std::stringstream &ss,
1123 const std::string sSymName, SubArguments &vSubArguments)
1125 size_t nCurWindowSize = 0;
1126 FormulaToken *tmpCur = NULL;
1127 const formula::DoubleVectorRefToken *pCurDVR = NULL;
1128 ss << "\ndouble " << sSymName;
1129 ss << "_"<< BinFuncName() <<"(";
1130 for (unsigned i = 0; i < vSubArguments.size(); i++)
1132 if (i)
1133 ss << ",";
1134 vSubArguments[i]->GenSlidingWindowDecl(ss);
1135 size_t nCurChildWindowSize = vSubArguments[i]->GetWindowSize();
1136 nCurWindowSize = (nCurWindowSize < nCurChildWindowSize)?
1137 nCurChildWindowSize:nCurWindowSize;
1138 tmpCur = vSubArguments[i]->GetFormulaToken();
1139 if ( ocPush==tmpCur->GetOpCode() )
1142 pCurDVR = dynamic_cast<
1143 const formula::DoubleVectorRefToken*>(tmpCur);
1144 if ( !
1145 ( (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1146 || (pCurDVR->IsStartFixed() && pCurDVR->IsEndFixed()) )
1148 throw Unhandled();
1151 ss << ") {\n";
1152 ss << " double tmp = 0.0;\n";
1153 ss << " int gid0 = get_global_id(0);\n";
1154 #ifndef UNROLLING_FACTOR
1155 ss << " int i ;\n";
1156 ss << " for (i = 0; i < "<< nCurWindowSize <<"; i++)\n";
1157 ss << " {\n";
1158 for (unsigned i = 0; i < vSubArguments.size(); i++)
1160 tmpCur = vSubArguments[i]->GetFormulaToken();
1161 if(ocPush==tmpCur->GetOpCode())
1163 pCurDVR= dynamic_cast<
1164 const formula::DoubleVectorRefToken *>(tmpCur);
1165 if(!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1167 ss << " int currentCount";
1168 ss << i;
1169 ss <<" =i+gid0+1;\n";
1171 else
1173 ss << " int currentCount";
1174 ss << i;
1175 ss << " =i+1;\n";
1179 ss << " tmp += fsum(";
1180 for (unsigned i = 0; i < vSubArguments.size(); i++)
1182 if (i)
1183 ss << "*";
1184 #ifdef ISNAN
1185 if(ocPush==vSubArguments[i]->GetFormulaToken()->GetOpCode())
1187 ss <<"(";
1188 ss <<"(currentCount";
1189 ss << i;
1190 ss<< ">";
1191 if(vSubArguments[i]->GetFormulaToken()->GetType() ==
1192 formula::svSingleVectorRef)
1194 const formula::SingleVectorRefToken* pSVR =
1195 dynamic_cast< const formula::SingleVectorRefToken*>
1196 (vSubArguments[i]->GetFormulaToken());
1197 ss<<pSVR->GetArrayLength();
1199 else if(vSubArguments[i]->GetFormulaToken()->GetType() ==
1200 formula::svDoubleVectorRef)
1202 const formula::DoubleVectorRefToken* pSVR =
1203 dynamic_cast< const formula::DoubleVectorRefToken*>
1204 (vSubArguments[i]->GetFormulaToken());
1205 ss<<pSVR->GetArrayLength();
1207 ss << ")||isNan("<<vSubArguments[i]
1208 ->GenSlidingWindowDeclRef(true);
1209 ss << ")?0:";
1210 ss << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1211 ss << ")";
1213 else
1214 ss << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1215 #else
1216 ss << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1217 #endif
1219 ss << ", 0.0);\n\t}\n\t";
1220 ss << "return tmp;\n";
1221 ss << "}";
1222 #endif
1224 #ifdef UNROLLING_FACTOR
1225 ss << "\tint i;\n\t";
1226 ss << "int currentCount0, currentCount1;\n\t";
1227 std::stringstream temp3,temp4;
1228 int outLoopSize = UNROLLING_FACTOR;
1229 if (nCurWindowSize/outLoopSize != 0){
1230 ss << "for(int outLoop=0; outLoop<" <<
1231 nCurWindowSize/outLoopSize<< "; outLoop++){\n\t";
1232 for(int count=0; count < outLoopSize; count++){
1233 ss << "i = outLoop*"<<outLoopSize<<"+"<<count<<";\n";
1234 if(count==0){
1235 for (unsigned i = 0; i < vSubArguments.size(); i++)
1237 tmpCur = vSubArguments[i]->GetFormulaToken();
1238 if(ocPush==tmpCur->GetOpCode())
1240 pCurDVR= dynamic_cast<
1241 const formula::DoubleVectorRefToken *>(tmpCur);
1242 if(!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1244 temp3 << " currentCount";
1245 temp3 << i;
1246 temp3 <<" =i+gid0+1;\n";
1248 else
1250 temp3 << " currentCount";
1251 temp3 << i;
1252 temp3 << " =i+1;\n";
1257 temp3 << "tmp = fsum(";
1258 for (unsigned i = 0; i < vSubArguments.size(); i++){
1259 if (i)
1260 temp3 << "*";
1261 if(ocPush==vSubArguments[i]->GetFormulaToken()->GetOpCode()){
1262 temp3 <<"(";
1263 temp3 <<"(currentCount";
1264 temp3 << i;
1265 temp3 << ">";
1266 if(vSubArguments[i]->GetFormulaToken()->GetType() ==
1267 formula::svSingleVectorRef){
1268 const formula::SingleVectorRefToken* pSVR =
1269 dynamic_cast< const formula::SingleVectorRefToken*>
1270 (vSubArguments[i]->GetFormulaToken());
1271 temp3<<pSVR->GetArrayLength();
1273 else if(vSubArguments[i]->GetFormulaToken()->GetType() ==
1274 formula::svDoubleVectorRef){
1275 const formula::DoubleVectorRefToken* pSVR =
1276 dynamic_cast< const formula::DoubleVectorRefToken*>
1277 (vSubArguments[i]->GetFormulaToken());
1278 temp3<<pSVR->GetArrayLength();
1280 temp3 << ")||isNan("<<vSubArguments[i]
1281 ->GenSlidingWindowDeclRef(true);
1282 temp3 << ")?0:";
1283 temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1284 temp3 << ")";
1286 else
1287 temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1289 temp3 << ", tmp);\n\t";
1291 ss << temp3.str();
1293 ss << "}\n\t";
1295 //The residual of mod outLoopSize
1296 for(unsigned int count=nCurWindowSize/outLoopSize*outLoopSize;
1297 count < nCurWindowSize; count++)
1299 ss << "i =" <<count<<";\n";
1300 if(count==nCurWindowSize/outLoopSize*outLoopSize){
1301 for (unsigned i = 0; i < vSubArguments.size(); i++)
1303 tmpCur = vSubArguments[i]->GetFormulaToken();
1304 if(ocPush==tmpCur->GetOpCode())
1306 pCurDVR= dynamic_cast<
1307 const formula::DoubleVectorRefToken *>(tmpCur);
1308 if(!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1310 temp4 << " currentCount";
1311 temp4 << i;
1312 temp4 <<" =i+gid0+1;\n";
1314 else
1316 temp4 << " currentCount";
1317 temp4 << i;
1318 temp4 << " =i+1;\n";
1323 temp4 << "tmp = fsum(";
1324 for (unsigned i = 0; i < vSubArguments.size(); i++)
1326 if (i)
1327 temp4 << "*";
1328 if(ocPush==vSubArguments[i]->GetFormulaToken()->GetOpCode())
1330 temp4 <<"(";
1331 temp4 <<"(currentCount";
1332 temp4 << i;
1333 temp4 << ">";
1334 if(vSubArguments[i]->GetFormulaToken()->GetType() ==
1335 formula::svSingleVectorRef)
1337 const formula::SingleVectorRefToken* pSVR =
1338 dynamic_cast< const formula::SingleVectorRefToken*>
1339 (vSubArguments[i]->GetFormulaToken());
1340 temp4<<pSVR->GetArrayLength();
1342 else if(vSubArguments[i]->GetFormulaToken()->GetType() ==
1343 formula::svDoubleVectorRef)
1345 const formula::DoubleVectorRefToken* pSVR =
1346 dynamic_cast< const formula::DoubleVectorRefToken*>
1347 (vSubArguments[i]->GetFormulaToken());
1348 temp4<<pSVR->GetArrayLength();
1350 temp4 << ")||isNan("<<vSubArguments[i]
1351 ->GenSlidingWindowDeclRef(true);
1352 temp4 << ")?0:";
1353 temp4 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1354 temp4 << ")";
1356 else
1358 temp4 << vSubArguments[i]
1359 ->GenSlidingWindowDeclRef(true);
1362 temp4 << ", tmp);\n\t";
1364 ss << temp4.str();
1366 ss << "return tmp;\n";
1367 ss << "}";
1368 #endif
1371 virtual bool takeString() const { return false; }
1372 virtual bool takeNumeric() const { return true; }
1375 /// operator traits
1376 class OpNop: public Reduction {
1377 public:
1378 virtual std::string GetBottom(void) { return "0"; }
1379 virtual std::string Gen2(const std::string &lhs, const std::string &) const
1381 return lhs;
1383 virtual std::string BinFuncName(void) const { return "nop"; }
1386 class OpCount: public Reduction {
1387 public:
1388 virtual std::string GetBottom(void) { return "0"; }
1389 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1391 std::stringstream ss;
1392 ss << "(isNan(" << lhs << ")?"<<rhs<<":"<<rhs<<"+1.0)";
1393 return ss.str();
1395 virtual std::string BinFuncName(void) const { return "fcount"; }
1398 class OpEqual: public Binary {
1399 public:
1400 virtual std::string GetBottom(void) { return "0"; }
1401 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1403 std::stringstream ss;
1404 ss << "strequal("<< lhs << "," << rhs <<")";
1405 return ss.str();
1407 virtual std::string BinFuncName(void) const { return "eq"; }
1410 class OpLessEqual: public Binary {
1411 public:
1412 virtual std::string GetBottom(void) { return "0"; }
1413 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1415 std::stringstream ss;
1416 ss << "("<< lhs << "<=" << rhs <<")";
1417 return ss.str();
1419 virtual std::string BinFuncName(void) const { return "leq"; }
1422 class OpGreater: public Binary {
1423 public:
1424 virtual std::string GetBottom(void) { return "0"; }
1425 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1427 std::stringstream ss;
1428 ss << "("<< lhs << ">" << rhs <<")";
1429 return ss.str();
1431 virtual std::string BinFuncName(void) const { return "gt"; }
1434 class OpSum: public Reduction {
1435 public:
1436 virtual std::string GetBottom(void) { return "0"; }
1437 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1439 std::stringstream ss;
1440 ss << "((" << lhs <<")+("<< rhs<<"))";
1441 return ss.str();
1443 virtual std::string BinFuncName(void) const { return "fsum"; }
1446 class OpAverage: public Reduction {
1447 public:
1448 virtual std::string GetBottom(void) { return "0"; }
1449 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1451 std::stringstream ss;
1452 ss << "fsum_count(" << lhs <<","<< rhs<<", &nCount)";
1453 return ss.str();
1455 virtual std::string BinFuncName(void) const { return "fsum"; }
1456 virtual bool isAverage() const { return true; }
1459 class OpSub: public Reduction {
1460 public:
1461 virtual std::string GetBottom(void) { return "0"; }
1462 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1464 return lhs + "-" + rhs;
1466 virtual std::string BinFuncName(void) const { return "fsub"; }
1469 class OpMul: public Reduction {
1470 public:
1471 virtual std::string GetBottom(void) { return "1"; }
1472 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1474 return lhs + "*" + rhs;
1476 virtual std::string BinFuncName(void) const { return "fmul"; }
1479 /// Technically not a reduction, but fits the framework.
1480 class OpDiv: public Reduction {
1481 public:
1482 virtual std::string GetBottom(void) { return "1.0"; }
1483 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1485 return "(" + lhs + "/" + rhs + ")";
1487 virtual std::string BinFuncName(void) const { return "fdiv"; }
1490 class OpMin: public Reduction {
1491 public:
1492 virtual std::string GetBottom(void) { return "MAXFLOAT"; }
1493 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1495 return "mcw_fmin("+lhs + "," + rhs +")";
1497 virtual std::string BinFuncName(void) const { return "min"; }
1500 class OpMax: public Reduction {
1501 public:
1502 virtual std::string GetBottom(void) { return "-MAXFLOAT"; }
1503 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1505 return "mcw_fmax("+lhs + "," + rhs +")";
1507 virtual std::string BinFuncName(void) const { return "max"; }
1509 class OpSumProduct: public SumOfProduct {
1510 public:
1511 virtual std::string GetBottom(void) { return "0"; }
1512 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1514 return lhs + "*" + rhs;
1516 virtual std::string BinFuncName(void) const { return "fsop"; }
1518 namespace {
1519 struct SumIfsArgs {
1520 SumIfsArgs(cl_mem x): mCLMem(x), mConst(0.0) {}
1521 SumIfsArgs(double x): mCLMem(NULL), mConst(x) {}
1522 cl_mem mCLMem;
1523 double mConst;
1526 /// Helper functions that have multiple buffers
1527 class DynamicKernelSoPArguments: public DynamicKernelArgument
1529 public:
1530 typedef boost::shared_ptr<DynamicKernelArgument> SubArgument;
1531 typedef std::vector<SubArgument> SubArgumentsType;
1533 DynamicKernelSoPArguments(
1534 const std::string &s, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen);
1536 /// Create buffer and pass the buffer to a given kernel
1537 virtual size_t Marshal(cl_kernel k, int argno, int nVectorWidth, cl_program pProgram)
1539 unsigned i = 0;
1540 for (SubArgumentsType::iterator it = mvSubArguments.begin(), e= mvSubArguments.end(); it!=e;
1541 ++it)
1543 i += (*it)->Marshal(k, argno + i, nVectorWidth, pProgram);
1545 if (OpGeoMean *OpSumCodeGen = dynamic_cast<OpGeoMean*>(mpCodeGen.get()))
1547 // Obtain cl context
1548 KernelEnv kEnv;
1549 OpenclDevice::setKernelEnv(&kEnv);
1550 cl_int err;
1551 DynamicKernelSlidingArgument<VectorRef> *slidingArgPtr =
1552 dynamic_cast< DynamicKernelSlidingArgument<VectorRef> *>
1553 (mvSubArguments[0].get());
1554 cl_mem pClmem2;
1556 if (OpSumCodeGen->NeedReductionKernel())
1558 assert(slidingArgPtr); (void) slidingArgPtr;
1559 std::vector<cl_mem> vclmem;
1560 for (SubArgumentsType::iterator it = mvSubArguments.begin(),
1561 e= mvSubArguments.end(); it!=e; ++it)
1563 if (VectorRef *VR = dynamic_cast<VectorRef *>(it->get()))
1564 vclmem.push_back(VR->GetCLBuffer());
1565 else
1566 vclmem.push_back(NULL);
1568 pClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
1569 sizeof(double)*nVectorWidth, NULL, &err);
1570 if (CL_SUCCESS != err)
1571 throw OpenCLError(err);
1573 std::string kernelName = "GeoMean_reduction";
1574 cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
1575 if (err != CL_SUCCESS)
1576 throw OpenCLError(err);
1577 // set kernel arg of reduction kernel
1578 for (size_t j=0; j< vclmem.size(); j++){
1579 err = clSetKernelArg(redKernel, j,
1580 vclmem[j]?sizeof(cl_mem):sizeof(double),
1581 (void *)&vclmem[j]);
1582 if (CL_SUCCESS != err)
1583 throw OpenCLError(err);
1585 err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), (void *)&pClmem2);
1586 if (CL_SUCCESS != err)
1587 throw OpenCLError(err);
1589 // set work group size and execute
1590 size_t global_work_size[] = {256, (size_t)nVectorWidth };
1591 size_t local_work_size[] = {256, 1};
1592 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
1593 global_work_size, local_work_size, 0, NULL, NULL);
1594 if (CL_SUCCESS != err)
1595 throw OpenCLError(err);
1596 err = clFinish(kEnv.mpkCmdQueue);
1597 if (CL_SUCCESS != err)
1598 throw OpenCLError(err);
1600 // Pass pClmem2 to the "real" kernel
1601 err = clSetKernelArg(k, argno, sizeof(cl_mem), (void *)&pClmem2);
1602 if (CL_SUCCESS != err)
1603 throw OpenCLError(err);
1606 if (OpSumIfs *OpSumCodeGen = dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
1608 // Obtain cl context
1609 KernelEnv kEnv;
1610 OpenclDevice::setKernelEnv(&kEnv);
1611 cl_int err;
1612 DynamicKernelArgument *Arg = mvSubArguments[0].get();
1613 DynamicKernelSlidingArgument<VectorRef> *slidingArgPtr =
1614 dynamic_cast< DynamicKernelSlidingArgument<VectorRef> *> (Arg);
1615 mpClmem2 = NULL;
1617 if (OpSumCodeGen->NeedReductionKernel())
1619 assert(slidingArgPtr);
1620 size_t nInput = slidingArgPtr -> GetArrayLength();
1621 size_t nCurWindowSize = slidingArgPtr -> GetWindowSize();
1622 std::vector<SumIfsArgs> vclmem;
1624 for (SubArgumentsType::iterator it = mvSubArguments.begin(),
1625 e= mvSubArguments.end(); it!=e; ++it)
1627 if (VectorRef *VR = dynamic_cast<VectorRef *>(it->get()))
1628 vclmem.push_back(SumIfsArgs(VR->GetCLBuffer()));
1629 else if (DynamicKernelConstantArgument *CA =
1630 dynamic_cast<
1631 DynamicKernelConstantArgument *>(it->get()))
1632 vclmem.push_back(SumIfsArgs(CA->GetDouble()));
1633 else
1634 vclmem.push_back(SumIfsArgs((cl_mem)NULL));
1636 mpClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
1637 sizeof(double)*nVectorWidth, NULL, &err);
1638 if (CL_SUCCESS != err)
1639 throw OpenCLError(err);
1641 std::string kernelName = mvSubArguments[0]->GetName() + "_SumIfs_reduction";
1642 cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
1643 if (err != CL_SUCCESS)
1644 throw OpenCLError(err);
1646 // set kernel arg of reduction kernel
1647 for (size_t j=0; j< vclmem.size(); j++){
1648 err = clSetKernelArg(redKernel, j,
1649 vclmem[j].mCLMem?sizeof(cl_mem):sizeof(double),
1650 vclmem[j].mCLMem?(void *)&vclmem[j].mCLMem:
1651 (void*)&vclmem[j].mConst);
1652 if (CL_SUCCESS != err)
1653 throw OpenCLError(err);
1655 err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), (void *)&mpClmem2);
1656 if (CL_SUCCESS != err)
1657 throw OpenCLError(err);
1659 err = clSetKernelArg(redKernel, vclmem.size()+1, sizeof(cl_int), (void*)&nInput);
1660 if (CL_SUCCESS != err)
1661 throw OpenCLError(err);
1663 err = clSetKernelArg(redKernel, vclmem.size()+2, sizeof(cl_int), (void*)&nCurWindowSize);
1664 if (CL_SUCCESS != err)
1665 throw OpenCLError(err);
1666 // set work group size and execute
1667 size_t global_work_size[] = {256, (size_t)nVectorWidth };
1668 size_t local_work_size[] = {256, 1};
1669 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
1670 global_work_size, local_work_size, 0, NULL, NULL);
1671 if (CL_SUCCESS != err)
1672 throw OpenCLError(err);
1673 err = clFinish(kEnv.mpkCmdQueue);
1674 if (CL_SUCCESS != err)
1675 throw OpenCLError(err);
1676 clReleaseKernel(redKernel);
1677 // Pass mpClmem2 to the "real" kernel
1678 err = clSetKernelArg(k, argno, sizeof(cl_mem), (void *)&mpClmem2);
1679 if (CL_SUCCESS != err)
1680 throw OpenCLError(err);
1683 return i;
1686 virtual void GenSlidingWindowFunction(std::stringstream &ss) {
1687 for (unsigned i = 0; i < mvSubArguments.size(); i++)
1688 mvSubArguments[i]->GenSlidingWindowFunction(ss);
1689 mpCodeGen->GenSlidingWindowFunction(ss, mSymName, mvSubArguments);
1691 virtual void GenDeclRef(std::stringstream &ss) const
1693 for (unsigned i = 0; i < mvSubArguments.size(); i++)
1695 if (i)
1696 ss << ",";
1697 mvSubArguments[i]->GenDeclRef(ss);
1700 virtual void GenDecl(std::stringstream &ss) const
1702 for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e= mvSubArguments.end(); it!=e;
1703 ++it) {
1704 if (it != mvSubArguments.begin())
1705 ss << ", ";
1706 (*it)->GenDecl(ss);
1710 virtual size_t GetWindowSize(void) const
1712 size_t nCurWindowSize = 0;
1713 for (unsigned i = 0; i < mvSubArguments.size(); i++)
1715 size_t nCurChildWindowSize = mvSubArguments[i]->GetWindowSize();
1716 nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
1717 nCurChildWindowSize:nCurWindowSize;
1719 return nCurWindowSize;
1722 /// When declared as input to a sliding window function
1723 virtual void GenSlidingWindowDecl(std::stringstream &ss) const
1725 for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e= mvSubArguments.end(); it!=e;
1726 ++it)
1728 if (it != mvSubArguments.begin())
1729 ss << ", ";
1730 (*it)->GenSlidingWindowDecl(ss);
1733 /// Generate either a function call to each children
1734 /// or direclty inline it if we are already inside a loop
1735 virtual std::string GenSlidingWindowDeclRef(bool nested=false) const
1737 std::stringstream ss;
1738 if (!nested)
1740 ss << mSymName << "_" << mpCodeGen->BinFuncName() <<"(";
1741 for (unsigned i = 0; i < mvSubArguments.size(); i++)
1743 if (i)
1744 ss << ", ";
1745 if (!nested)
1746 mvSubArguments[i]->GenDeclRef(ss);
1747 else
1748 ss << mvSubArguments[i]->GenSlidingWindowDeclRef(true);
1750 ss << ")";
1751 } else {
1752 if (mvSubArguments.size() != 2)
1753 throw Unhandled();
1754 ss << "(" << mpCodeGen->Gen2(mvSubArguments[0]->GenSlidingWindowDeclRef(true),
1755 mvSubArguments[1]->GenSlidingWindowDeclRef(true)) << ")";
1757 return ss.str();
1759 virtual std::string DumpOpName(void) const
1761 std::string t = "_" + mpCodeGen->BinFuncName();
1762 for (unsigned i = 0; i < mvSubArguments.size(); i++)
1763 t = t + mvSubArguments[i]->DumpOpName();
1764 return t;
1766 virtual void DumpInlineFun(std::set<std::string>& decls,
1767 std::set<std::string>& funs) const
1769 mpCodeGen->BinInlineFun(decls,funs);
1770 for (unsigned i = 0; i < mvSubArguments.size(); i++)
1771 mvSubArguments[i]->DumpInlineFun(decls,funs);
1773 ~DynamicKernelSoPArguments()
1775 if (mpClmem2)
1777 clReleaseMemObject(mpClmem2);
1778 mpClmem2 = NULL;
1781 private:
1782 SubArgumentsType mvSubArguments;
1783 boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
1784 cl_mem mpClmem2;
1787 boost::shared_ptr<DynamicKernelArgument> SoPHelper(
1788 const std::string &ts, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen)
1790 return boost::shared_ptr<DynamicKernelArgument>(new DynamicKernelSoPArguments(ts, ft, pCodeGen));
1793 template<class Base>
1794 DynamicKernelArgument *VectorRefFactory(const std::string &s,
1795 const FormulaTreeNodeRef& ft,
1796 boost::shared_ptr<SlidingFunctionBase> &pCodeGen,
1797 int index)
1799 //Black lists ineligible classes here ..
1800 // SUMIFS does not perform parallel reduction at DoubleVectorRef level
1801 if (dynamic_cast<OpSumIfs*>(pCodeGen.get())) {
1802 if (index == 0) // the first argument of OpSumIfs cannot be strings anyway
1803 return new DynamicKernelSlidingArgument<VectorRef>(s, ft, pCodeGen, index);
1804 return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
1806 // AVERAGE is not supported yet
1807 else if (dynamic_cast<OpAverage*>(pCodeGen.get()))
1809 return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
1811 // MUL is not supported yet
1812 else if (dynamic_cast<OpMul*>(pCodeGen.get()))
1814 return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
1816 // Sub is not a reduction per se
1817 else if (dynamic_cast<OpSub*>(pCodeGen.get()))
1819 return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
1821 // Only child class of Reduction is supported
1822 else if (!dynamic_cast<Reduction*>(pCodeGen.get()))
1824 return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
1827 const formula::DoubleVectorRefToken* pDVR =
1828 dynamic_cast< const formula::DoubleVectorRefToken* >(
1829 ft->GetFormulaToken());
1830 // Window being too small to justify a parallel reduction
1831 if (pDVR->GetRefRowSize() < REDUCE_THRESHOLD)
1832 return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
1833 if ((pDVR->IsStartFixed() && pDVR->IsEndFixed()) ||
1834 (!pDVR->IsStartFixed() && !pDVR->IsEndFixed()))
1835 return new ParallelReductionVectorRef<Base>(s, ft, pCodeGen, index);
1836 else // Other cases are not supported as well
1837 return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
1840 DynamicKernelSoPArguments::DynamicKernelSoPArguments(
1841 const std::string &s, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen) :
1842 DynamicKernelArgument(s, ft), mpCodeGen(pCodeGen), mpClmem2(NULL)
1844 size_t nChildren = ft->Children.size();
1846 for (unsigned i = 0; i < nChildren; i++)
1848 FormulaToken *pChild = ft->Children[i]->GetFormulaToken();
1849 if (!pChild)
1850 throw Unhandled();
1851 OpCode opc = pChild->GetOpCode();
1852 std::stringstream tmpname;
1853 tmpname << s << "_" << i;
1854 std::string ts = tmpname.str();
1855 switch(opc) {
1856 case ocPush:
1857 if (pChild->GetType() == formula::svDoubleVectorRef)
1859 const formula::DoubleVectorRefToken* pDVR =
1860 dynamic_cast< const formula::DoubleVectorRefToken* >(pChild);
1861 assert(pDVR);
1862 for (size_t j = 0; j < pDVR->GetArrays().size(); ++j)
1864 if (pDVR->GetArrays()[j].mpNumericArray ||
1865 (pDVR->GetArrays()[j].mpNumericArray == NULL &&
1866 pDVR->GetArrays()[j].mpStringArray == NULL ))
1868 if(pDVR->GetArrays()[j].mpNumericArray &&
1869 pCodeGen->takeNumeric() &&
1870 pDVR->GetArrays()[j].mpStringArray &&
1871 pCodeGen->takeString())
1873 mvSubArguments.push_back(
1874 SubArgument(
1875 new DynamicKernelMixedSlidingArgument(
1876 ts, ft->Children[i], mpCodeGen, j)));
1878 else
1880 mvSubArguments.push_back(
1881 SubArgument(VectorRefFactory<VectorRef>(
1882 ts, ft->Children[i], mpCodeGen, j)));
1885 else
1886 mvSubArguments.push_back(
1887 SubArgument(VectorRefFactory
1888 <DynamicKernelStringArgument>(
1889 ts, ft->Children[i], mpCodeGen, j)));
1891 } else if (pChild->GetType() == formula::svSingleVectorRef) {
1892 const formula::SingleVectorRefToken* pSVR =
1893 dynamic_cast< const formula::SingleVectorRefToken* >(pChild);
1894 assert(pSVR);
1895 if (pSVR->GetArray().mpNumericArray &&
1896 pCodeGen->takeNumeric() &&
1897 pSVR->GetArray().mpStringArray &&
1898 pCodeGen->takeString())
1900 mvSubArguments.push_back(
1901 SubArgument(new DynamicKernelMixedArgument(
1902 ts, ft->Children[i])));
1904 else if (pSVR->GetArray().mpNumericArray &&
1905 pCodeGen->takeNumeric())
1907 mvSubArguments.push_back(
1908 SubArgument(new VectorRef(ts,
1909 ft->Children[i])));
1911 else if (pSVR->GetArray().mpStringArray &&
1912 pCodeGen->takeString())
1914 mvSubArguments.push_back(
1915 SubArgument(new DynamicKernelStringArgument(
1916 ts, ft->Children[i])));
1918 else if (pSVR->GetArray().mpStringArray == NULL &&
1919 pSVR->GetArray().mpNumericArray == NULL)
1921 // Push as an array of NANs
1922 mvSubArguments.push_back(
1923 SubArgument(new VectorRef(ts,
1924 ft->Children[i])));
1926 else
1927 throw UnhandledToken(pChild,
1928 "Got unhandled case here", __FILE__, __LINE__);
1929 } else if (pChild->GetType() == formula::svDouble) {
1930 mvSubArguments.push_back(
1931 SubArgument(new DynamicKernelConstantArgument(ts,
1932 ft->Children[i])));
1933 } else if (pChild->GetType() == formula::svString
1934 && pCodeGen->takeString()) {
1935 mvSubArguments.push_back(
1936 SubArgument(new ConstStringArgument(ts,
1937 ft->Children[i])));
1938 } else {
1939 throw UnhandledToken(pChild, "unknown operand for ocPush");
1941 break;
1942 case ocDiv:
1943 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpDiv));
1944 break;
1945 case ocMul:
1946 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpMul));
1947 break;
1948 case ocSub:
1949 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSub));
1950 break;
1951 case ocAdd:
1952 case ocSum:
1953 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSum));
1954 break;
1955 case ocAverage:
1956 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpAverage));
1957 break;
1958 case ocMin:
1959 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpMin));
1960 break;
1961 case ocMax:
1962 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpMax));
1963 break;
1964 case ocCount:
1965 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCount));
1966 break;
1967 case ocSumProduct:
1968 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSumProduct));
1969 break;
1970 case ocIRR:
1971 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpIRR));
1972 break;
1973 case ocMIRR:
1974 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpMIRR));
1975 break;
1976 case ocRMZ:
1977 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpPMT));
1978 break;
1979 case ocZins:
1980 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpIntrate));
1981 break;
1982 case ocZGZ:
1983 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpRRI));
1984 break;
1985 case ocKapz:
1986 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpPPMT));
1987 break;
1988 case ocFisher:
1989 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpFisher));
1990 break;
1991 case ocFisherInv:
1992 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpFisherInv));
1993 break;
1994 case ocGamma:
1995 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGamma));
1996 break;
1997 case ocLIA:
1998 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSLN));
1999 break;
2000 case ocGammaLn:
2001 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGammaLn));
2002 break;
2003 case ocGauss:
2004 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGauss));
2005 break;
2006 case ocGeoMean:
2007 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGeoMean));
2008 break;
2009 case ocHarMean:
2010 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpHarMean));
2011 break;
2012 case ocLessEqual:
2013 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpLessEqual));
2014 break;
2015 case ocEqual:
2016 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpEqual));
2017 break;
2018 case ocGreater:
2019 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGreater));
2020 break;
2021 case ocDIA:
2022 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSYD));
2023 break;
2024 case ocCorrel:
2025 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCorrel));
2026 break;
2027 case ocCos:
2028 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCos));
2029 break;
2030 case ocNegBinomVert :
2031 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpNegbinomdist));
2032 break;
2033 case ocPearson:
2034 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpPearson));
2035 break;
2036 case ocRSQ:
2037 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpRsq));
2038 break;
2039 case ocCosecant:
2040 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCsc));
2041 break;
2042 case ocISPMT:
2043 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpISPMT));
2044 break;
2045 case ocLaufz:
2046 mvSubArguments.push_back(SoPHelper(ts,
2047 ft->Children[i], new OpDuration));
2048 break;
2049 case ocSinHyp:
2050 mvSubArguments.push_back(SoPHelper(ts,
2051 ft->Children[i],new OpSinh));
2052 break;
2053 case ocAbs:
2054 mvSubArguments.push_back(SoPHelper(ts,
2055 ft->Children[i], new OpAbs));
2056 break;
2057 case ocBW:
2058 mvSubArguments.push_back(SoPHelper(ts,
2059 ft->Children[i], new OpPV));
2060 break;
2061 case ocSin:
2062 mvSubArguments.push_back(SoPHelper(ts,
2063 ft->Children[i], new OpSin));
2064 break;
2065 case ocTan:
2066 mvSubArguments.push_back(SoPHelper(ts,
2067 ft->Children[i], new OpTan));
2068 break;
2069 case ocTanHyp:
2070 mvSubArguments.push_back(SoPHelper(ts,
2071 ft->Children[i], new OpTanH));
2072 break;
2073 case ocStandard:
2074 mvSubArguments.push_back(SoPHelper(ts,
2075 ft->Children[i], new OpStandard));
2076 break;
2077 case ocWeibull:
2078 mvSubArguments.push_back(SoPHelper(ts,
2079 ft->Children[i], new OpWeibull));
2080 break;
2081 case ocMedian:
2082 mvSubArguments.push_back(SoPHelper(ts,
2083 ft->Children[i],new OpMedian));
2084 break;
2085 case ocGDA:
2086 mvSubArguments.push_back(SoPHelper(ts,
2087 ft->Children[i],new OpDDB));
2088 break;
2089 case ocZW:
2090 mvSubArguments.push_back(SoPHelper(ts,
2091 ft->Children[i],new OpFV));
2092 break;
2093 case ocSumIfs:
2094 mvSubArguments.push_back(SoPHelper(ts,
2095 ft->Children[i],new OpSumIfs));
2096 break;
2097 case ocVBD:
2098 mvSubArguments.push_back(SoPHelper(ts,
2099 ft->Children[i],new OpVDB));
2100 break;
2101 case ocKurt:
2102 mvSubArguments.push_back(SoPHelper(ts,
2103 ft->Children[i], new OpKurt));
2104 break;
2105 case ocZZR:
2106 mvSubArguments.push_back(SoPHelper(ts,
2107 ft->Children[i], new OpNper));
2108 break;
2109 case ocNormDist:
2110 mvSubArguments.push_back(SoPHelper(ts,
2111 ft->Children[i],new OpNormdist));
2112 break;
2113 case ocArcCos:
2114 mvSubArguments.push_back(SoPHelper(ts,
2115 ft->Children[i], new OpArcCos));
2116 break;
2117 case ocSqrt:
2118 mvSubArguments.push_back(SoPHelper(ts,
2119 ft->Children[i],new OpSqrt));
2120 break;
2121 case ocArcCosHyp:
2122 mvSubArguments.push_back(SoPHelper(ts,
2123 ft->Children[i], new OpArcCosHyp));
2124 break;
2125 case ocNPV:
2126 mvSubArguments.push_back(SoPHelper(ts,
2127 ft->Children[i], new OpNPV));
2128 break;
2129 case ocStdNormDist:
2130 mvSubArguments.push_back(SoPHelper(ts,
2131 ft->Children[i],new OpNormsdist));
2132 break;
2133 case ocNormInv:
2134 mvSubArguments.push_back(SoPHelper(ts,
2135 ft->Children[i],new OpNorminv));
2136 break;
2137 case ocSNormInv:
2138 mvSubArguments.push_back(SoPHelper(ts,
2139 ft->Children[i],new OpNormsinv));
2140 break;
2141 case ocVariationen:
2142 mvSubArguments.push_back(SoPHelper(ts,
2143 ft->Children[i],new OpVariationen));
2144 break;
2145 case ocVariationen2:
2146 mvSubArguments.push_back(SoPHelper(ts,
2147 ft->Children[i],new OpVariationen2));
2148 break;
2149 case ocPhi:
2150 mvSubArguments.push_back(SoPHelper(ts,
2151 ft->Children[i],new OpPhi));
2152 break;
2153 case ocZinsZ:
2154 mvSubArguments.push_back(SoPHelper(ts,
2155 ft->Children[i],new OpIPMT));
2156 break;
2157 case ocConfidence:
2158 mvSubArguments.push_back(SoPHelper(ts,
2159 ft->Children[i], new OpConfidence));
2160 break;
2161 case ocIntercept:
2162 mvSubArguments.push_back(SoPHelper(ts,
2163 ft->Children[i], new OpIntercept));
2164 break;
2165 case ocGDA2:
2166 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2167 new OpDB));
2168 break;
2169 case ocLogInv:
2170 mvSubArguments.push_back(SoPHelper(ts,
2171 ft->Children[i], new OpLogInv));
2172 break;
2173 case ocArcCot:
2174 mvSubArguments.push_back(SoPHelper(ts,
2175 ft->Children[i], new OpArcCot));
2176 break;
2177 case ocCosHyp:
2178 mvSubArguments.push_back(SoPHelper(ts,
2179 ft->Children[i], new OpCosh));
2180 break;
2181 case ocKritBinom:
2182 mvSubArguments.push_back(SoPHelper(ts,
2183 ft->Children[i], new OpCritBinom));
2184 break;
2185 case ocArcCotHyp:
2186 mvSubArguments.push_back(SoPHelper(ts,
2187 ft->Children[i], new OpArcCotHyp));
2188 break;
2189 case ocArcSin:
2190 mvSubArguments.push_back(SoPHelper(ts,
2191 ft->Children[i], new OpArcSin));
2192 break;
2193 case ocArcSinHyp:
2194 mvSubArguments.push_back(SoPHelper(ts,
2195 ft->Children[i], new OpArcSinHyp));
2196 break;
2197 case ocArcTan:
2198 mvSubArguments.push_back(SoPHelper(ts,
2199 ft->Children[i], new OpArcTan));
2200 break;
2201 case ocArcTanHyp:
2202 mvSubArguments.push_back(SoPHelper(ts,
2203 ft->Children[i], new OpArcTanH));
2204 break;
2205 case ocBitAnd:
2206 mvSubArguments.push_back(SoPHelper(ts,
2207 ft->Children[i], new OpBitAnd));
2208 break;
2209 case ocForecast:
2210 mvSubArguments.push_back(SoPHelper(ts,
2211 ft->Children[i], new OpForecast));
2212 break;
2213 case ocLogNormDist:
2214 mvSubArguments.push_back(SoPHelper(ts,
2215 ft->Children[i], new OpLogNormDist));
2216 break;
2217 case ocGammaDist:
2218 mvSubArguments.push_back(SoPHelper(ts,
2219 ft->Children[i], new OpGammaDist));
2220 break;
2221 case ocLn:
2222 mvSubArguments.push_back(SoPHelper(ts,
2223 ft->Children[i],new OpLn));
2224 break;
2225 case ocRound:
2226 mvSubArguments.push_back(SoPHelper(ts,
2227 ft->Children[i],new OpRound));
2228 break;
2229 case ocCot:
2230 mvSubArguments.push_back(SoPHelper(ts,
2231 ft->Children[i], new OpCot));
2232 break;
2233 case ocCotHyp:
2234 mvSubArguments.push_back(SoPHelper(ts,
2235 ft->Children[i], new OpCoth));
2236 break;
2237 case ocFDist:
2238 mvSubArguments.push_back(SoPHelper(ts,
2239 ft->Children[i], new OpFdist));
2240 break;
2241 case ocVar:
2242 mvSubArguments.push_back(SoPHelper(ts,
2243 ft->Children[i], new OpVar));
2244 break;
2245 case ocChiDist:
2246 mvSubArguments.push_back(SoPHelper(ts,
2247 ft->Children[i],new OpChiDist));
2248 break;
2249 case ocPow:
2250 case ocPower:
2251 mvSubArguments.push_back(SoPHelper(ts,
2252 ft->Children[i], new OpPower));
2253 break;
2254 case ocOdd:
2255 mvSubArguments.push_back(SoPHelper(ts,
2256 ft->Children[i], new OpOdd));
2257 break;
2258 case ocChiSqDist:
2259 mvSubArguments.push_back(SoPHelper(ts,
2260 ft->Children[i],new OpChiSqDist));
2261 break;
2262 case ocChiSqInv:
2263 mvSubArguments.push_back(SoPHelper(ts,
2264 ft->Children[i],new OpChiSqInv));
2265 break;
2266 case ocGammaInv:
2267 mvSubArguments.push_back(SoPHelper(ts,
2268 ft->Children[i], new OpGammaInv));
2269 break;
2270 case ocFloor:
2271 mvSubArguments.push_back(SoPHelper(ts,
2272 ft->Children[i], new OpFloor));
2273 break;
2274 case ocFInv:
2275 mvSubArguments.push_back(SoPHelper(ts,
2276 ft->Children[i], new OpFInv));
2277 break;
2278 case ocFTest:
2279 mvSubArguments.push_back(SoPHelper(ts,
2280 ft->Children[i], new OpFTest));
2281 break;
2282 case ocB:
2283 mvSubArguments.push_back(SoPHelper(ts,
2284 ft->Children[i], new OpB));
2285 break;
2286 case ocBetaDist:
2287 mvSubArguments.push_back(SoPHelper(ts,
2288 ft->Children[i], new OpBetaDist));
2289 break;
2290 case ocCosecantHyp:
2291 mvSubArguments.push_back(SoPHelper(ts,
2292 ft->Children[i], new OpCscH));
2293 break;
2294 case ocExp:
2295 mvSubArguments.push_back(SoPHelper(ts,
2296 ft->Children[i], new OpExp));
2297 break;
2298 case ocLog10:
2299 mvSubArguments.push_back(SoPHelper(ts,
2300 ft->Children[i], new OpLog10));
2301 break;
2302 case ocExpDist:
2303 mvSubArguments.push_back(SoPHelper(ts,
2304 ft->Children[i], new OpExponDist));
2305 break;
2306 case ocAverageIfs:
2307 mvSubArguments.push_back(SoPHelper(ts,
2308 ft->Children[i],new OpAverageIfs));
2309 break;
2310 case ocCountIfs:
2311 mvSubArguments.push_back(SoPHelper(ts,
2312 ft->Children[i],new OpCountIfs));
2313 break;
2314 case ocKombin2:
2315 mvSubArguments.push_back(SoPHelper(ts,
2316 ft->Children[i], new OpCombina));
2317 break;
2318 case ocEven:
2319 mvSubArguments.push_back(SoPHelper(ts,
2320 ft->Children[i], new OpEven));
2321 break;
2322 case ocLog:
2323 mvSubArguments.push_back(SoPHelper(ts,
2324 ft->Children[i], new OpLog));
2325 break;
2326 case ocMod:
2327 mvSubArguments.push_back(SoPHelper(ts,
2328 ft->Children[i], new OpMod));
2329 break;
2330 case ocTrunc:
2331 mvSubArguments.push_back(SoPHelper(ts,
2332 ft->Children[i], new OpTrunc));
2333 break;
2334 case ocSchiefe:
2335 mvSubArguments.push_back(SoPHelper(ts,
2336 ft->Children[i], new OpSkew));
2337 break;
2338 case ocArcTan2:
2339 mvSubArguments.push_back(SoPHelper(ts,
2340 ft->Children[i], new OpArcTan2));
2341 break;
2342 case ocBitOr:
2343 mvSubArguments.push_back(SoPHelper(ts,
2344 ft->Children[i], new OpBitOr));
2345 break;
2346 case ocBitLshift:
2347 mvSubArguments.push_back(SoPHelper(ts,
2348 ft->Children[i], new OpBitLshift));
2349 break;
2350 case ocBitRshift:
2351 mvSubArguments.push_back(SoPHelper(ts,
2352 ft->Children[i], new OpBitRshift));
2353 break;
2354 case ocBitXor:
2355 mvSubArguments.push_back(SoPHelper(ts,
2356 ft->Children[i], new OpBitXor));
2357 break;
2358 case ocChiInv:
2359 mvSubArguments.push_back(SoPHelper(ts,
2360 ft->Children[i],new OpChiInv));
2361 break;
2362 case ocPoissonDist:
2363 mvSubArguments.push_back(SoPHelper(ts,
2364 ft->Children[i],new OpPoisson));
2365 break;
2366 case ocSumSQ:
2367 mvSubArguments.push_back(SoPHelper(ts,
2368 ft->Children[i], new OpSumSQ));
2369 break;
2370 case ocSkewp:
2371 mvSubArguments.push_back(SoPHelper(ts,
2372 ft->Children[i], new OpSkewp));
2373 break;
2374 case ocBinomDist:
2375 mvSubArguments.push_back(SoPHelper(ts,
2376 ft->Children[i],new OpBinomdist));
2377 break;
2378 case ocVarP:
2379 mvSubArguments.push_back(SoPHelper(ts,
2380 ft->Children[i], new OpVarP));
2381 break;
2382 case ocCeil:
2383 mvSubArguments.push_back(SoPHelper(ts,
2384 ft->Children[i], new OpCeil));
2385 break;
2386 case ocKombin:
2387 mvSubArguments.push_back(SoPHelper(ts,
2388 ft->Children[i], new OpKombin));
2389 break;
2390 case ocDevSq:
2391 mvSubArguments.push_back(SoPHelper(ts,
2392 ft->Children[i], new OpDevSq));
2393 break;
2394 case ocStDev:
2395 mvSubArguments.push_back(SoPHelper(ts,
2396 ft->Children[i], new OpStDev));
2397 break;
2398 case ocSlope:
2399 mvSubArguments.push_back(SoPHelper(ts,
2400 ft->Children[i], new OpSlope));
2401 break;
2402 case ocSTEYX:
2403 mvSubArguments.push_back(SoPHelper(ts,
2404 ft->Children[i], new OpSTEYX));
2405 break;
2406 case ocZTest:
2407 mvSubArguments.push_back(SoPHelper(ts,
2408 ft->Children[i], new OpZTest));
2409 break;
2410 case ocPi:
2411 mvSubArguments.push_back(
2412 SubArgument(new DynamicKernelPiArgument(ts,
2413 ft->Children[i])));
2414 break;
2415 case ocRandom:
2416 mvSubArguments.push_back(
2417 SubArgument(new DynamicKernelRandomArgument(ts,
2418 ft->Children[i])));
2419 break;
2420 case ocProduct:
2421 mvSubArguments.push_back(SoPHelper(ts,
2422 ft->Children[i], new OpProduct));
2423 break;
2424 case ocHypGeomDist:
2425 mvSubArguments.push_back(SoPHelper(ts,
2426 ft->Children[i],new OpHypGeomDist));
2427 break;
2428 case ocSumX2MY2:
2429 mvSubArguments.push_back(SoPHelper(ts,
2430 ft->Children[i],new OpSumX2MY2));
2431 break;
2432 case ocSumX2DY2:
2433 mvSubArguments.push_back(SoPHelper(ts,
2434 ft->Children[i],new OpSumX2PY2));
2435 break;
2436 case ocBetaInv:
2437 mvSubArguments.push_back(SoPHelper(ts,
2438 ft->Children[i],new OpBetainv));
2439 break;
2440 case ocTTest:
2441 mvSubArguments.push_back(SoPHelper(ts,
2442 ft->Children[i], new OpTTest));
2443 break;
2444 case ocTDist:
2445 mvSubArguments.push_back(SoPHelper(ts,
2446 ft->Children[i], new OpTDist));
2447 break;
2448 case ocTInv:
2449 mvSubArguments.push_back(SoPHelper(ts,
2450 ft->Children[i], new OpTInv));
2451 break;
2452 case ocSumXMY2:
2453 mvSubArguments.push_back(SoPHelper(ts,
2454 ft->Children[i],new OpSumXMY2));
2455 break;
2456 case ocStDevP:
2457 mvSubArguments.push_back(SoPHelper(ts,
2458 ft->Children[i], new OpStDevP));
2459 break;
2460 case ocCovar:
2461 mvSubArguments.push_back(SoPHelper(ts,
2462 ft->Children[i], new OpCovar));
2463 break;
2464 case ocAnd:
2465 mvSubArguments.push_back(SoPHelper(ts,
2466 ft->Children[i], new OpAnd));
2467 break;
2468 case ocVLookup:
2469 mvSubArguments.push_back(SoPHelper(ts,
2470 ft->Children[i], new OpVLookup));
2471 break;
2472 case ocOr:
2473 mvSubArguments.push_back(SoPHelper(ts,
2474 ft->Children[i], new OpOr));
2475 break;
2476 case ocNot:
2477 mvSubArguments.push_back(SoPHelper(ts,
2478 ft->Children[i], new OpNot));
2479 break;
2480 case ocXor:
2481 mvSubArguments.push_back(SoPHelper(ts,
2482 ft->Children[i], new OpXor));
2483 break;
2484 case ocDBMax:
2485 mvSubArguments.push_back(SoPHelper(ts,
2486 ft->Children[i], new OpDmax));
2487 break;
2488 case ocDBMin:
2489 mvSubArguments.push_back(SoPHelper(ts,
2490 ft->Children[i], new OpDmin));
2491 break;
2492 case ocDBProduct:
2493 mvSubArguments.push_back(SoPHelper(ts,
2494 ft->Children[i], new OpDproduct));
2495 break;
2496 case ocDBAverage:
2497 mvSubArguments.push_back(SoPHelper(ts,
2498 ft->Children[i], new OpDaverage));
2499 break;
2500 case ocDBStdDev:
2501 mvSubArguments.push_back(SoPHelper(ts,
2502 ft->Children[i], new OpDstdev));
2503 break;
2504 case ocDBStdDevP:
2505 mvSubArguments.push_back(SoPHelper(ts,
2506 ft->Children[i], new OpDstdevp));
2507 break;
2508 case ocDBSum:
2509 mvSubArguments.push_back(SoPHelper(ts,
2510 ft->Children[i], new OpDsum));
2511 break;
2512 case ocDBVar:
2513 mvSubArguments.push_back(SoPHelper(ts,
2514 ft->Children[i], new OpDvar));
2515 break;
2516 case ocDBVarP:
2517 mvSubArguments.push_back(SoPHelper(ts,
2518 ft->Children[i], new OpDvarp));
2519 break;
2520 case ocAverageIf:
2521 mvSubArguments.push_back(SoPHelper(ts,
2522 ft->Children[i], new OpAverageIf));
2523 break;
2524 case ocDBCount:
2525 mvSubArguments.push_back(SoPHelper(ts,
2526 ft->Children[i], new OpDcount));
2527 break;
2528 case ocDBCount2:
2529 mvSubArguments.push_back(SoPHelper(ts,
2530 ft->Children[i], new OpDcount2));
2531 break;
2532 case ocDeg:
2533 mvSubArguments.push_back(SoPHelper(ts,
2534 ft->Children[i], new OpDeg));
2535 break;
2536 case ocRoundUp:
2537 mvSubArguments.push_back(SoPHelper(ts,
2538 ft->Children[i],new OpRoundUp));
2539 break;
2540 case ocRoundDown:
2541 mvSubArguments.push_back(SoPHelper(ts,
2542 ft->Children[i],new OpRoundDown));
2543 break;
2544 case ocInt:
2545 mvSubArguments.push_back(SoPHelper(ts,
2546 ft->Children[i],new OpInt));
2547 break;
2548 case ocRad:
2549 mvSubArguments.push_back(SoPHelper(ts,
2550 ft->Children[i],new OpRadians));
2551 break;
2552 case ocCountIf:
2553 mvSubArguments.push_back(SoPHelper(ts,
2554 ft->Children[i], new OpCountIf));
2555 break;
2556 case ocIsEven:
2557 mvSubArguments.push_back(SoPHelper(ts,
2558 ft->Children[i],new OpIsEven));
2559 break;
2560 case ocIsOdd:
2561 mvSubArguments.push_back(SoPHelper(ts,
2562 ft->Children[i],new OpIsOdd));
2563 break;
2564 case ocFact:
2565 mvSubArguments.push_back(SoPHelper(ts,
2566 ft->Children[i], new OpFact));
2567 break;
2568 case ocMinA:
2569 mvSubArguments.push_back(SoPHelper(ts,
2570 ft->Children[i], new OpMinA));
2571 break;
2572 case ocCount2:
2573 mvSubArguments.push_back(SoPHelper(ts,
2574 ft->Children[i], new OpCountA));
2575 break;
2576 case ocMaxA:
2577 mvSubArguments.push_back(SoPHelper(ts,
2578 ft->Children[i], new OpMaxA));
2579 break;
2580 case ocAverageA:
2581 mvSubArguments.push_back(SoPHelper(ts,
2582 ft->Children[i], new OpAverageA));
2583 break;
2584 case ocVarA:
2585 mvSubArguments.push_back(SoPHelper(ts,
2586 ft->Children[i], new OpVarA));
2587 break;
2588 case ocVarPA:
2589 mvSubArguments.push_back(SoPHelper(ts,
2590 ft->Children[i], new OpVarPA));
2591 break;
2592 case ocStDevA:
2593 mvSubArguments.push_back(SoPHelper(ts,
2594 ft->Children[i], new OpStDevA));
2595 break;
2596 case ocStDevPA:
2597 mvSubArguments.push_back(SoPHelper(ts,
2598 ft->Children[i], new OpStDevPA));
2599 break;
2600 case ocSecant:
2601 mvSubArguments.push_back(SoPHelper(ts,
2602 ft->Children[i], new OpSec));
2603 break;
2604 case ocSecantHyp:
2605 mvSubArguments.push_back(SoPHelper(ts,
2606 ft->Children[i], new OpSecH));
2607 break;
2608 case ocSumIf:
2609 mvSubArguments.push_back(SoPHelper(ts,
2610 ft->Children[i], new OpSumIf));
2611 break;
2612 case ocNegSub:
2613 mvSubArguments.push_back(SoPHelper(ts,
2614 ft->Children[i], new OpNegSub));
2615 break;
2616 case ocAveDev:
2617 mvSubArguments.push_back(SoPHelper(ts,
2618 ft->Children[i], new OpAveDev));
2619 break;
2620 case ocExternal:
2621 if ( !(pChild->GetExternal().compareTo(OUString(
2622 "com.sun.star.sheet.addin.Analysis.getEffect"))))
2624 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpEffective));
2626 else if ( !(pChild->GetExternal().compareTo(OUString(
2627 "com.sun.star.sheet.addin.Analysis.getCumipmt"))))
2629 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCumipmt));
2631 else if ( !(pChild->GetExternal().compareTo(OUString(
2632 "com.sun.star.sheet.addin.Analysis.getNominal"))))
2634 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpNominal));
2636 else if ( !(pChild->GetExternal().compareTo(OUString(
2637 "com.sun.star.sheet.addin.Analysis.getCumprinc"))))
2639 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCumprinc));
2641 else if ( !(pChild->GetExternal().compareTo(OUString(
2642 "com.sun.star.sheet.addin.Analysis.getXnpv"))))
2644 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpXNPV));
2646 else if ( !(pChild->GetExternal().compareTo(OUString(
2647 "com.sun.star.sheet.addin.Analysis.getPricemat"))))
2649 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpPriceMat));
2651 else if ( !(pChild->GetExternal().compareTo(OUString(
2652 "com.sun.star.sheet.addin.Analysis.getReceived"))))
2654 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpReceived));
2656 else if( !(pChild->GetExternal().compareTo(OUString(
2657 "com.sun.star.sheet.addin.Analysis.getTbilleq"))))
2659 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpTbilleq));
2661 else if( !(pChild->GetExternal().compareTo(OUString(
2662 "com.sun.star.sheet.addin.Analysis.getTbillprice"))))
2664 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpTbillprice));
2666 else if( !(pChild->GetExternal().compareTo(OUString(
2667 "com.sun.star.sheet.addin.Analysis.getTbillyield"))))
2669 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpTbillyield));
2671 else if (!(pChild->GetExternal().compareTo(OUString(
2672 "com.sun.star.sheet.addin.Analysis.getFvschedule"))))
2674 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpFvschedule));
2676 else if ( !(pChild->GetExternal().compareTo(OUString(
2677 "com.sun.star.sheet.addin.Analysis.getYield"))))
2679 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpYield));
2681 else if ( !(pChild->GetExternal().compareTo(OUString(
2682 "com.sun.star.sheet.addin.Analysis.getYielddisc"))))
2684 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpYielddisc));
2686 else if ( !(pChild->GetExternal().compareTo(OUString(
2687 "com.sun.star.sheet.addin.Analysis.getYieldmat"))))
2689 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpYieldmat));
2691 else if ( !(pChild->GetExternal().compareTo(OUString(
2692 "com.sun.star.sheet.addin.Analysis.getAccrintm"))))
2694 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpAccrintm));
2696 else if ( !(pChild->GetExternal().compareTo(OUString(
2697 "com.sun.star.sheet.addin.Analysis.getCoupdaybs"))))
2699 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCoupdaybs));
2701 else if ( !(pChild->GetExternal().compareTo(OUString(
2702 "com.sun.star.sheet.addin.Analysis.getDollarde"))))
2704 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpDollarde));
2706 else if ( !(pChild->GetExternal().compareTo(OUString(
2707 "com.sun.star.sheet.addin.Analysis.getDollarfr"))))
2709 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpDollarfr));
2711 else if ( !(pChild->GetExternal().compareTo(OUString(
2712 "com.sun.star.sheet.addin.Analysis.getCoupdays"))))
2714 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCoupdays));
2716 else if ( !(pChild->GetExternal().compareTo(OUString(
2717 "com.sun.star.sheet.addin.Analysis.getCoupdaysnc"))))
2719 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCoupdaysnc));
2721 else if ( !(pChild->GetExternal().compareTo(OUString(
2722 "com.sun.star.sheet.addin.Analysis.getDisc"))))
2724 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpDISC));
2726 else if ( !(pChild->GetExternal().compareTo(OUString(
2727 "com.sun.star.sheet.addin.Analysis.getIntrate"))))
2729 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpINTRATE));
2731 else if ( !(pChild->GetExternal().compareTo(OUString(
2732 "com.sun.star.sheet.addin.Analysis.getPrice"))))
2734 mvSubArguments.push_back(SoPHelper(ts,
2735 ft->Children[i], new OpPrice));
2737 else if ( !(pChild->GetExternal().compareTo(OUString(
2738 "com.sun.star.sheet.addin.Analysis.getCoupnum"))))
2740 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2741 new OpCoupnum));
2743 else if ( !(pChild->GetExternal().compareTo(OUString(
2744 "com.sun.star.sheet.addin.Analysis.getDuration"))))
2746 mvSubArguments.push_back(
2747 SoPHelper(ts, ft->Children[i], new OpDuration_ADD));
2749 else if ( !(pChild->GetExternal().compareTo(OUString(
2750 "com.sun.star.sheet.addin.Analysis.getAmordegrc"))))
2752 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2753 new OpAmordegrc));
2755 else if ( !(pChild->GetExternal().compareTo(OUString(
2756 "com.sun.star.sheet.addin.Analysis.getAmorlinc"))))
2758 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2759 new OpAmorlinc));
2761 else if ( !(pChild->GetExternal().compareTo(OUString(
2762 "com.sun.star.sheet.addin.Analysis.getMduration"))))
2764 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2765 new OpMDuration));
2767 else if ( !(pChild->GetExternal().compareTo(OUString(
2768 "com.sun.star.sheet.addin.Analysis.getXirr"))))
2770 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2771 new OpXirr));
2773 else if ( !(pChild->GetExternal().compareTo(OUString(
2774 "com.sun.star.sheet.addin.Analysis.getOddlprice"))))
2776 mvSubArguments.push_back(SoPHelper(ts,
2777 ft->Children[i], new OpOddlprice));
2779 else if ( !(pChild->GetExternal().compareTo(OUString(
2780 "com.sun.star.sheet.addin.Analysis.getOddlyield"))))
2782 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2783 new OpOddlyield));
2785 else if ( !(pChild->GetExternal().compareTo(OUString(
2786 "com.sun.star.sheet.addin.Analysis.getPricedisc"))))
2788 mvSubArguments.push_back(SoPHelper(ts,
2789 ft->Children[i], new OpPriceDisc));
2791 else if ( !(pChild->GetExternal().compareTo(OUString(
2792 "com.sun.star.sheet.addin.Analysis.getCouppcd"))))
2794 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2795 new OpCouppcd));
2797 else if ( !(pChild->GetExternal().compareTo(OUString(
2798 "com.sun.star.sheet.addin.Analysis.getCoupncd"))))
2800 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2801 new OpCoupncd));
2803 else if ( !(pChild->GetExternal().compareTo(OUString(
2804 "com.sun.star.sheet.addin.Analysis.getAccrint"))))
2806 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2807 new OpAccrint));
2809 else if ( !(pChild->GetExternal().compareTo(OUString(
2810 "com.sun.star.sheet.addin.Analysis.getSqrtpi"))))
2812 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2813 new OpSqrtPi));
2815 else if ( !(pChild->GetExternal().compareTo(OUString(
2816 "com.sun.star.sheet.addin.Analysis.getConvert"))))
2818 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2819 new OpConvert));
2820 }else if ( !(pChild->GetExternal().compareTo(OUString(
2821 "com.sun.star.sheet.addin.Analysis.getIseven"))))
2823 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2824 new OpIsEven));
2826 else if ( !(pChild->GetExternal().compareTo(OUString(
2827 "com.sun.star.sheet.addin.Analysis.getIsodd"))))
2829 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2830 new OpIsOdd));
2832 else if ( !(pChild->GetExternal().compareTo(OUString(
2833 "com.sun.star.sheet.addin.Analysis.getMround"))))
2835 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2836 new OpMROUND));
2838 else if ( !(pChild->GetExternal().compareTo(OUString(
2839 "com.sun.star.sheet.addin.Analysis.getQuotient"))))
2841 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2842 new OpQuotient));
2844 else if ( !(pChild->GetExternal().compareTo(OUString(
2845 "com.sun.star.sheet.addin.Analysis.getSeriessum"))))
2847 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2848 new OpSeriesSum));
2850 else if ( !(pChild->GetExternal().compareTo(OUString(
2851 "com.sun.star.sheet.addin.Analysis.getBesselj"))))
2853 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2854 new OpBesselj));
2856 break;
2858 default:
2859 throw UnhandledToken(pChild, "unhandled opcode");
2864 /// Holds the symbol table for a given dynamic kernel
2865 class SymbolTable {
2866 public:
2867 typedef std::map<const formula::FormulaToken *,
2868 boost::shared_ptr<DynamicKernelArgument> > ArgumentMap;
2869 // This avoids instability caused by using pointer as the key type
2870 typedef std::list< boost::shared_ptr<DynamicKernelArgument> > ArgumentList;
2871 SymbolTable(void):mCurId(0) {}
2872 template <class T>
2873 const DynamicKernelArgument *DeclRefArg(FormulaTreeNodeRef, SlidingFunctionBase* pCodeGen);
2874 /// Used to generate sliding window helpers
2875 void DumpSlidingWindowFunctions(std::stringstream &ss)
2877 for(ArgumentList::iterator it = mParams.begin(), e= mParams.end(); it!=e;
2878 ++it) {
2879 (*it)->GenSlidingWindowFunction(ss);
2880 ss << "\n";
2883 /// Memory mapping from host to device and pass buffers to the given kernel as
2884 /// arguments
2885 void Marshal(cl_kernel, int, cl_program);
2886 private:
2887 unsigned int mCurId;
2888 ArgumentMap mSymbols;
2889 ArgumentList mParams;
2892 void SymbolTable::Marshal(cl_kernel k, int nVectorWidth, cl_program pProgram)
2894 int i = 1; //The first argument is reserved for results
2895 for(ArgumentList::iterator it = mParams.begin(), e= mParams.end(); it!=e;
2896 ++it) {
2897 i+=(*it)->Marshal(k, i, nVectorWidth, pProgram);
2901 class DynamicKernel : public CompiledFormula
2903 public:
2904 DynamicKernel(FormulaTreeNodeRef r):mpRoot(r),
2905 mpProgram(NULL), mpKernel(NULL), mpResClmem(NULL), mpCode(NULL) {}
2906 static DynamicKernel *create(ScDocument& rDoc,
2907 const ScAddress& rTopPos,
2908 ScTokenArray& rCode);
2909 /// OpenCL code generation
2910 void CodeGen() {
2911 // Travese the tree of expression and declare symbols used
2912 const DynamicKernelArgument *DK= mSyms.DeclRefArg<
2913 DynamicKernelSoPArguments>(mpRoot, new OpNop);
2915 std::stringstream decl;
2916 if (OpenclDevice::gpuEnv.mnKhrFp64Flag) {
2917 decl << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
2918 } else if (OpenclDevice::gpuEnv.mnAmdFp64Flag) {
2919 decl << "#pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
2921 // preambles
2922 decl << publicFunc;
2923 DK->DumpInlineFun(inlineDecl,inlineFun);
2924 for(std::set<std::string>::iterator set_iter=inlineDecl.begin();
2925 set_iter!=inlineDecl.end();++set_iter)
2927 decl<<*set_iter;
2930 for(std::set<std::string>::iterator set_iter=inlineFun.begin();
2931 set_iter!=inlineFun.end();++set_iter)
2933 decl<<*set_iter;
2935 mSyms.DumpSlidingWindowFunctions(decl);
2936 mKernelSignature = DK->DumpOpName();
2937 decl << "__kernel void DynamicKernel" << mKernelSignature;
2938 decl << "(__global double *result, ";
2939 DK->GenSlidingWindowDecl(decl);
2940 decl << ") {\n\tint gid0 = get_global_id(0);\n\tresult[gid0] = " <<
2941 DK->GenSlidingWindowDeclRef(false) << ";\n}\n";
2942 mFullProgramSrc = decl.str();
2943 #if 1
2944 std::cerr<< "Program to be compiled = \n" << mFullProgramSrc << "\n";
2945 #endif
2947 /// Produce kernel hash
2948 std::string GetMD5(void)
2950 #ifdef MD5_KERNEL
2951 if (mKernelHash.empty()) {
2952 std::stringstream md5s;
2953 // Compute MD5SUM of kernel body to obtain the name
2954 sal_uInt8 result[RTL_DIGEST_LENGTH_MD5];
2955 rtl_digest_MD5(
2956 mFullProgramSrc.c_str(),
2957 mFullProgramSrc.length(), result,
2958 RTL_DIGEST_LENGTH_MD5);
2959 for(int i=0; i < RTL_DIGEST_LENGTH_MD5; i++) {
2960 md5s << std::hex << (int)result[i];
2962 mKernelHash = md5s.str();
2964 return mKernelHash;
2965 #else
2966 return "";
2967 #endif
2969 /// Create program, build, and create kerenl
2970 /// TODO cache results based on kernel body hash
2971 /// TODO: abstract OpenCL part out into OpenCL wrapper.
2972 void CreateKernel(void);
2973 /// Prepare buffers, marshal them to GPU, and launch the kernel
2974 /// TODO: abstract OpenCL part out into OpenCL wrapper.
2975 void Launch(size_t nr)
2977 // Obtain cl context
2978 KernelEnv kEnv;
2979 OpenclDevice::setKernelEnv(&kEnv);
2980 cl_int err;
2981 // The results
2982 mpResClmem = clCreateBuffer(kEnv.mpkContext,
2983 (cl_mem_flags) CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR,
2984 nr*sizeof(double), NULL, &err);
2985 if (CL_SUCCESS != err)
2986 throw OpenCLError(err);
2987 err = clSetKernelArg(mpKernel, 0, sizeof(cl_mem), (void*)&mpResClmem);
2988 if (CL_SUCCESS != err)
2989 throw OpenCLError(err);
2990 // The rest of buffers
2991 mSyms.Marshal(mpKernel, nr, mpProgram);
2992 size_t global_work_size[] = {nr};
2993 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, mpKernel, 1, NULL,
2994 global_work_size, NULL, 0, NULL, NULL);
2995 if (CL_SUCCESS != err)
2996 throw OpenCLError(err);
2998 ~DynamicKernel();
2999 cl_mem GetResultBuffer(void) const { return mpResClmem; }
3000 void SetPCode(ScTokenArray *pCode) { mpCode = pCode; }
3002 private:
3003 void TraverseAST(FormulaTreeNodeRef);
3004 FormulaTreeNodeRef mpRoot;
3005 SymbolTable mSyms;
3006 std::string mKernelSignature, mKernelHash;
3007 std::string mFullProgramSrc;
3008 cl_program mpProgram;
3009 cl_kernel mpKernel;
3010 cl_mem mpResClmem; // Results
3011 std::set<std::string> inlineDecl;
3012 std::set<std::string> inlineFun;
3013 ScTokenArray *mpCode;
3016 DynamicKernel::~DynamicKernel()
3018 if (mpResClmem) {
3019 std::cerr<<"Freeing kernel "<< GetMD5() << " result buffer\n";
3020 clReleaseMemObject(mpResClmem);
3022 if (mpKernel) {
3023 std::cerr<<"Freeing kernel "<< GetMD5() << " kernel\n";
3024 clReleaseKernel(mpKernel);
3026 // mpProgram is not going to be released here -- it's cached.
3027 if (mpCode)
3028 delete mpCode;
3030 /// Build code
3031 void DynamicKernel::CreateKernel(void)
3033 cl_int err;
3034 std::string kname = "DynamicKernel"+mKernelSignature;
3035 // Compile kernel here!!!
3036 // Obtain cl context
3037 KernelEnv kEnv;
3038 OpenclDevice::setKernelEnv(&kEnv);
3039 const char *src = mFullProgramSrc.c_str();
3040 static std::string lastOneKernelHash = "";
3041 static std::string lastSecondKernelHash = "";
3042 static cl_program lastOneProgram = NULL;
3043 static cl_program lastSecondProgram = NULL;
3044 std::string KernelHash = mKernelSignature+GetMD5();
3045 if (lastOneKernelHash == KernelHash && lastOneProgram)
3047 mpProgram = lastOneProgram;
3049 else if(lastSecondKernelHash == KernelHash && lastSecondProgram)
3051 mpProgram = lastSecondProgram;
3053 else
3054 { // doesn't match the last compiled formula.
3056 if (lastSecondProgram) {
3057 clReleaseProgram(lastSecondProgram);
3059 if (OpenclDevice::buildProgramFromBinary("",
3060 &OpenclDevice::gpuEnv, KernelHash.c_str(), 0)) {
3061 mpProgram = OpenclDevice::gpuEnv.mpArryPrograms[0];
3062 OpenclDevice::gpuEnv.mpArryPrograms[0] = NULL;
3063 } else {
3064 mpProgram = clCreateProgramWithSource(kEnv.mpkContext, 1,
3065 &src, NULL, &err);
3066 if (err != CL_SUCCESS)
3067 throw OpenCLError(err);
3068 err = clBuildProgram(mpProgram, 1,
3069 OpenclDevice::gpuEnv.mpArryDevsID, "", NULL, NULL);
3070 if (err != CL_SUCCESS)
3071 throw OpenCLError(err);
3072 // Generate binary out of compiled kernel.
3073 OpenclDevice::generatBinFromKernelSource(mpProgram,
3074 (mKernelSignature+GetMD5()).c_str());
3076 lastSecondKernelHash = lastOneKernelHash;
3077 lastSecondProgram = lastOneProgram;
3078 lastOneKernelHash = KernelHash;
3079 lastOneProgram = mpProgram;
3081 mpKernel = clCreateKernel(mpProgram, kname.c_str(), &err);
3082 if (err != CL_SUCCESS)
3083 throw OpenCLError(err);
3085 // Symbol lookup. If there is no such symbol created, allocate one
3086 // kernel with argument with unique name and return so.
3087 // The template argument T must be a subclass of DynamicKernelArgument
3088 template <typename T>
3089 const DynamicKernelArgument *SymbolTable::DeclRefArg(
3090 FormulaTreeNodeRef t, SlidingFunctionBase* pCodeGen)
3092 FormulaToken *ref = t->GetFormulaToken();
3093 ArgumentMap::iterator it = mSymbols.find(ref);
3094 if (it == mSymbols.end()) {
3095 // Allocate new symbols
3096 std::cerr << "DeclRefArg: Allocate a new symbol:";
3097 std::stringstream ss;
3098 ss << "tmp"<< mCurId++;
3099 boost::shared_ptr<DynamicKernelArgument> new_arg(new T(ss.str(), t, pCodeGen));
3100 mSymbols[ref] = new_arg;
3101 mParams.push_back(new_arg);
3102 std::cerr << ss.str() <<"\n";
3103 return new_arg.get();
3104 } else {
3105 return it->second.get();
3109 class FormulaGroupInterpreterOpenCL : public FormulaGroupInterpreter
3111 public:
3112 FormulaGroupInterpreterOpenCL() :
3113 FormulaGroupInterpreter()
3116 virtual ~FormulaGroupInterpreterOpenCL()
3120 virtual ScMatrixRef inverseMatrix( const ScMatrix& rMat ) SAL_OVERRIDE;
3121 virtual CompiledFormula* createCompiledFormula(ScDocument& rDoc,
3122 const ScAddress& rTopPos,
3123 ScFormulaCellGroupRef& xGroup,
3124 ScTokenArray& rCode) SAL_OVERRIDE;
3125 virtual bool interpret( ScDocument& rDoc, const ScAddress& rTopPos,
3126 ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode ) SAL_OVERRIDE;
3129 ScMatrixRef FormulaGroupInterpreterOpenCL::inverseMatrix( const ScMatrix& )
3131 return NULL;
3134 DynamicKernel* DynamicKernel::create(ScDocument& /* rDoc */,
3135 const ScAddress& /* rTopPos */,
3136 ScTokenArray& rCode)
3138 // Constructing "AST"
3139 FormulaTokenIterator aCode = rCode;
3140 std::list<FormulaToken *> list;
3141 std::map<FormulaToken *, FormulaTreeNodeRef> m_hash_map;
3142 FormulaToken* pCur;
3143 while( (pCur = (FormulaToken*)(aCode.Next()) ) != NULL)
3145 OpCode eOp = pCur->GetOpCode();
3146 if ( eOp != ocPush )
3148 FormulaTreeNodeRef m_currNode =
3149 FormulaTreeNodeRef(new FormulaTreeNode(pCur));
3150 sal_uInt8 m_ParamCount = pCur->GetParamCount();
3151 for(int i=0; i<m_ParamCount; i++)
3153 FormulaToken* m_TempFormula = list.back();
3154 list.pop_back();
3155 if(m_TempFormula->GetOpCode()!=ocPush)
3157 if(m_hash_map.find(m_TempFormula)==m_hash_map.end())
3158 return NULL;
3159 m_currNode->Children.push_back(m_hash_map[m_TempFormula]);
3161 else
3163 FormulaTreeNodeRef m_ChildTreeNode =
3164 FormulaTreeNodeRef(
3165 new FormulaTreeNode(m_TempFormula));
3166 m_currNode->Children.push_back(m_ChildTreeNode);
3169 std::reverse(m_currNode->Children.begin(),
3170 m_currNode->Children.end());
3171 m_hash_map[pCur] = m_currNode;
3173 list.push_back(pCur);
3176 FormulaTreeNodeRef Root = FormulaTreeNodeRef(new FormulaTreeNode(NULL));
3177 Root->Children.push_back(m_hash_map[list.back()]);
3179 DynamicKernel* pDynamicKernel = new DynamicKernel(Root);
3181 if (!pDynamicKernel)
3182 return NULL;
3184 // OpenCL source code generation and kernel compilation
3185 try {
3186 pDynamicKernel->CodeGen();
3187 pDynamicKernel->CreateKernel();
3189 catch (const UnhandledToken &ut) {
3190 std::cerr << "\nDynamic formual compiler: unhandled token: ";
3191 std::cerr << ut.mMessage << " at ";
3192 std::cerr << ut.mFile << ":" << ut.mLineNumber << "\n";
3193 #ifdef NO_FALLBACK_TO_SWINTERP
3194 assert(false);
3195 #else
3196 free(pDynamicKernel);
3197 return NULL;
3198 #endif
3200 catch (...) {
3201 std::cerr << "Dynamic formula compiler: unhandled compiler error\n";
3202 return NULL;
3204 return pDynamicKernel;
3207 CompiledFormula* FormulaGroupInterpreterOpenCL::createCompiledFormula(ScDocument& rDoc,
3208 const ScAddress& rTopPos,
3209 ScFormulaCellGroupRef& xGroup,
3210 ScTokenArray& rCode)
3212 ScTokenArray *pCode = new ScTokenArray();
3213 ScGroupTokenConverter aConverter(*pCode, rDoc, *xGroup->mpTopCell, rTopPos);
3214 if (!aConverter.convert(rCode) || pCode->GetLen() == 0)
3216 delete pCode;
3217 return NULL;
3220 DynamicKernel *result = DynamicKernel::create(rDoc, rTopPos, *pCode);
3221 if ( result )
3222 result->SetPCode(pCode);
3223 return result;
3226 bool FormulaGroupInterpreterOpenCL::interpret( ScDocument& rDoc,
3227 const ScAddress& rTopPos, ScFormulaCellGroupRef& xGroup,
3228 ScTokenArray& rCode )
3230 DynamicKernel *pKernel;
3232 if (xGroup->meCalcState == sc::GroupCalcOpenCLKernelCompilationScheduled ||
3233 xGroup->meCalcState == sc::GroupCalcOpenCLKernelBinaryCreated)
3235 if (xGroup->meCalcState == sc::GroupCalcOpenCLKernelCompilationScheduled)
3237 ScFormulaCellGroup::sxCompilationThread->maCompilationDoneCondition.wait();
3238 ScFormulaCellGroup::sxCompilationThread->maCompilationDoneCondition.reset();
3241 pKernel = static_cast<DynamicKernel*>(xGroup->mpCompiledFormula);
3243 else
3245 assert(xGroup->meCalcState == sc::GroupCalcRunning);
3246 pKernel = static_cast<DynamicKernel*>(createCompiledFormula(rDoc, rTopPos, xGroup, rCode));
3249 if (!pKernel)
3250 return false;
3252 try {
3253 // Obtain cl context
3254 KernelEnv kEnv;
3255 OpenclDevice::setKernelEnv(&kEnv);
3256 // Run the kernel.
3257 pKernel->Launch(xGroup->mnLength);
3258 // Map results back
3259 cl_mem res = pKernel->GetResultBuffer();
3260 cl_int err;
3261 double *resbuf = (double*)clEnqueueMapBuffer(kEnv.mpkCmdQueue,
3262 res,
3263 CL_TRUE, CL_MAP_READ, 0,
3264 xGroup->mnLength*sizeof(double), 0, NULL, NULL,
3265 &err);
3266 if (err != CL_SUCCESS)
3267 throw OpenCLError(err);
3268 rDoc.SetFormulaResults(rTopPos, resbuf, xGroup->mnLength);
3269 err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, res, resbuf, 0, NULL, NULL);
3270 if (err != CL_SUCCESS)
3271 throw OpenCLError(err);
3272 if (xGroup->meCalcState == sc::GroupCalcRunning)
3273 delete pKernel;
3275 catch (const UnhandledToken &ut) {
3276 std::cerr << "\nDynamic formual compiler: unhandled token: ";
3277 std::cerr << ut.mMessage << "\n";
3278 #ifdef NO_FALLBACK_TO_SWINTERP
3279 assert(false);
3280 return true;
3281 #else
3282 return false;
3283 #endif
3285 catch (const OpenCLError &oce) {
3286 std::cerr << "Dynamic formula compiler: OpenCL error: ";
3287 std::cerr << oce.mError << "\n";
3288 #ifdef NO_FALLBACK_TO_SWINTERP
3289 assert(false);
3290 return true;
3291 #else
3292 return false;
3293 #endif
3295 catch (const Unhandled &uh) {
3296 std::cerr << "Dynamic formula compiler: unhandled case:";
3297 std::cerr <<" at ";
3298 std::cerr << uh.mFile << ":" << uh.mLineNumber << "\n";
3299 #ifdef NO_FALLBACK_TO_SWINTERP
3300 assert(false);
3301 return true;
3302 #else
3303 return false;
3304 #endif
3306 catch (...) {
3307 std::cerr << "Dynamic formula compiler: unhandled compiler error\n";
3308 #ifdef NO_FALLBACK_TO_SWINTERP
3309 assert(false);
3310 return true;
3311 #else
3312 return false;
3313 #endif
3315 return true;
3316 } // namespace opencl
3318 }} // namespace sc
3320 extern "C" {
3322 SAL_DLLPUBLIC_EXPORT sc::FormulaGroupInterpreter* SAL_CALL
3323 createFormulaGroupOpenCLInterpreter()
3325 #if 0// USE_GROUNDWATER_INTERPRETER
3326 if (getenv("SC_GROUNDWATER"))
3327 return new sc::opencl::FormulaGroupInterpreterGroundwater();
3328 #endif
3330 return new sc::opencl::FormulaGroupInterpreterOpenCL();
3333 SAL_DLLPUBLIC_EXPORT size_t getOpenCLPlatformCount()
3335 return sc::opencl::getOpenCLPlatformCount();
3338 SAL_DLLPUBLIC_EXPORT void SAL_CALL fillOpenCLInfo(
3339 sc::OpenclPlatformInfo* pInfos, size_t nInfoSize)
3341 const std::vector<sc::OpenclPlatformInfo>& rPlatforms =
3342 sc::opencl::fillOpenCLInfo();
3343 size_t n = std::min(rPlatforms.size(), nInfoSize);
3344 for (size_t i = 0; i < n; ++i)
3345 pInfos[i] = rPlatforms[i];
3348 SAL_DLLPUBLIC_EXPORT bool SAL_CALL switchOpenClDevice(
3349 const OUString* pDeviceId, bool bAutoSelect,
3350 bool bForceEvaluation)
3352 return sc::opencl::switchOpenclDevice(pDeviceId, bAutoSelect, bForceEvaluation);
3355 SAL_DLLPUBLIC_EXPORT void SAL_CALL getOpenCLDeviceInfo(size_t* pDeviceId, size_t* pPlatformId)
3357 sc::opencl::getOpenCLDeviceInfo(*pDeviceId, *pPlatformId);
3360 } // extern "C"
3362 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */