GPU-Calc: remove Alloc_Host_Ptr for clmem of NAN vector
[LibreOffice.git] / sc / source / core / opencl / formulagroupcl.cxx
blob5c7ed58deb7c1157117612dfa9f422ae66ee5825
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include "formulagroup.hxx"
11 #include "grouptokenconverter.hxx"
12 #include "document.hxx"
13 #include "formulacell.hxx"
14 #include "tokenarray.hxx"
15 #include "compiler.hxx"
16 #include "interpre.hxx"
17 #include "formula/vectortoken.hxx"
18 #include "scmatrix.hxx"
20 #include "openclwrapper.hxx"
22 #include "op_financial.hxx"
23 #include "op_database.hxx"
24 #include "op_math.hxx"
25 #include "op_logical.hxx"
26 #include "op_statistical.hxx"
27 #include "op_array.hxx"
28 /// CONFIGURATIONS
29 // Comment out this to turn off FMIN and FMAX intrinsics
30 #define USE_FMIN_FMAX 1
31 #define REDUCE_THRESHOLD 4 // set to 4 for correctness testing. priority 1
32 #define UNROLLING_FACTOR 16 // set to 4 for correctness testing (if no reduce)
33 #include "formulagroupcl_public.hxx"
34 #ifdef WIN32
35 #ifndef NAN
36 namespace {
37 static const unsigned long __nan[2] = {0xffffffff, 0x7fffffff};
39 #define NAN (*(const double*) __nan)
40 #endif
41 #endif
43 #include <list>
44 #include <map>
45 #include <iostream>
46 #include <sstream>
47 #include <algorithm>
48 #define MD5_KERNEL 1
49 #ifdef MD5_KERNEL
50 #include <rtl/digest.h>
51 #endif
52 #include <memory>
54 #include <boost/scoped_ptr.hpp>
56 #undef NO_FALLBACK_TO_SWINTERP /* undef this for non-TDD runs */
58 using namespace formula;
60 namespace sc { namespace opencl {
63 /// Map the buffer used by an argument and do necessary argument setting
64 size_t VectorRef::Marshal(cl_kernel k, int argno, int, cl_program)
66 FormulaToken *ref = mFormulaTree->GetFormulaToken();
67 double *pHostBuffer = NULL;
68 size_t requestedLength = 1;
69 size_t szHostBuffer = 0;
70 if (ref->GetType() == formula::svSingleVectorRef) {
71 const formula::SingleVectorRefToken* pSVR =
72 dynamic_cast< const formula::SingleVectorRefToken* >(ref);
73 assert(pSVR);
74 pHostBuffer = const_cast<double*>(pSVR->GetArray().mpNumericArray);
75 szHostBuffer = pSVR->GetArrayLength() * sizeof(double);
76 requestedLength = pSVR->GetRequestedArrayLength();
77 #if 0
78 std::cerr << "Marshal a Single vector of size " << pSVR->GetArrayLength();
79 std::cerr << " at argument "<< argno << "\n";
80 #endif
81 } else if (ref->GetType() == formula::svDoubleVectorRef) {
82 const formula::DoubleVectorRefToken* pDVR =
83 dynamic_cast< const formula::DoubleVectorRefToken* >(ref);
84 assert(pDVR);
85 pHostBuffer = const_cast<double*>(
86 pDVR->GetArrays()[mnIndex].mpNumericArray);
87 szHostBuffer = pDVR->GetArrayLength() * sizeof(double);
88 requestedLength = pDVR->GetRequestedArrayLength();
89 } else {
90 throw Unhandled();
92 // Obtain cl context
93 KernelEnv kEnv;
94 OpenclDevice::setKernelEnv(&kEnv);
95 cl_int err;
96 if (pHostBuffer)
98 mpClmem = clCreateBuffer(kEnv.mpkContext,
99 (cl_mem_flags) CL_MEM_READ_ONLY,
100 szHostBuffer,
101 NULL, &err);
102 if (CL_SUCCESS != err)
103 throw OpenCLError(err);
104 err = clEnqueueWriteBuffer(kEnv.mpkCmdQueue, mpClmem,CL_TRUE, 0, szHostBuffer,
105 pHostBuffer, 0, NULL, NULL);
106 if (CL_SUCCESS != err)
107 throw OpenCLError(err);
109 else
111 if (szHostBuffer == 0)
112 szHostBuffer = requestedLength * sizeof(double);//vector length for NAN vector
113 // Marshal as a buffer of NANs
114 mpClmem = clCreateBuffer(kEnv.mpkContext,
115 (cl_mem_flags) CL_MEM_READ_ONLY,
116 szHostBuffer, NULL, &err);
117 if (CL_SUCCESS != err)
118 throw OpenCLError(err);
119 double *pNanBuffer = new double[szHostBuffer/sizeof(double)];
120 for (size_t i = 0; i < szHostBuffer/sizeof(double); i++)
121 pNanBuffer[i] = NAN;
122 err = clEnqueueWriteBuffer(kEnv.mpkCmdQueue, mpClmem,CL_TRUE, 0, szHostBuffer,
123 pNanBuffer, 0, NULL, NULL);
124 if (CL_SUCCESS != err)
125 throw OpenCLError(err);
126 delete[] pNanBuffer;
130 err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem);
131 if (CL_SUCCESS != err)
132 throw OpenCLError(err);
133 return 1;
136 /// Arguments that are actually compile-time constant string
137 /// Currently, only the hash is passed.
138 /// TBD(IJSUNG): pass also length and the actual string if there is a
139 /// hash function collision
140 class ConstStringArgument: public DynamicKernelArgument
142 public:
143 ConstStringArgument(const std::string &s,
144 FormulaTreeNodeRef ft):
145 DynamicKernelArgument(s, ft) {}
146 /// Generate declaration
147 virtual void GenDecl(std::stringstream &ss) const
149 ss << "unsigned " << mSymName;
151 virtual void GenDeclRef(std::stringstream &ss) const
153 ss << GenSlidingWindowDeclRef(false);
155 virtual void GenSlidingWindowDecl(std::stringstream &ss) const
157 GenDecl(ss);
159 virtual std::string GenSlidingWindowDeclRef(bool=false) const
161 std::stringstream ss;
162 if (GetFormulaToken()->GetType() != formula::svString)
163 throw Unhandled();
164 FormulaToken *Tok = GetFormulaToken();
165 ss << Tok->GetString().getString().toAsciiUpperCase().hashCode() << "U";
166 return ss.str();
168 virtual size_t GetWindowSize(void) const
170 return 1;
172 /// Pass the 32-bit hash of the string to the kernel
173 virtual size_t Marshal(cl_kernel k, int argno, int, cl_program)
175 FormulaToken *ref = mFormulaTree->GetFormulaToken();
176 cl_uint hashCode = 0;
177 if (ref->GetType() == formula::svString)
179 const rtl::OUString s = ref->GetString().getString().toAsciiUpperCase();
180 hashCode = s.hashCode();
181 } else {
182 throw Unhandled();
184 // marshaling
185 // Obtain cl context
186 KernelEnv kEnv;
187 OpenclDevice::setKernelEnv(&kEnv);
188 // Pass the scalar result back to the rest of the formula kernel
189 cl_int err = clSetKernelArg(k, argno, sizeof(cl_uint), (void*)&hashCode);
190 if (CL_SUCCESS != err)
191 throw OpenCLError(err);
192 return 1;
196 /// Arguments that are actually compile-time constants
197 class DynamicKernelConstantArgument: public DynamicKernelArgument
199 public:
200 DynamicKernelConstantArgument(const std::string &s,
201 FormulaTreeNodeRef ft):
202 DynamicKernelArgument(s, ft) {}
203 /// Generate declaration
204 virtual void GenDecl(std::stringstream &ss) const
206 ss << "double " << mSymName;
208 virtual void GenDeclRef(std::stringstream &ss) const
210 ss << mSymName;
212 virtual void GenSlidingWindowDecl(std::stringstream &ss) const
214 GenDecl(ss);
216 virtual std::string GenSlidingWindowDeclRef(bool=false) const
218 if (GetFormulaToken()->GetType() != formula::svDouble)
219 throw Unhandled();
220 return mSymName;
222 virtual size_t GetWindowSize(void) const
224 return 1;
226 double GetDouble(void) const
228 FormulaToken *Tok = GetFormulaToken();
229 if (Tok->GetType() != formula::svDouble)
230 throw Unhandled();
231 return Tok->GetDouble();
233 /// Create buffer and pass the buffer to a given kernel
234 virtual size_t Marshal(cl_kernel k, int argno, int, cl_program)
236 double tmp = GetDouble();
237 // Pass the scalar result back to the rest of the formula kernel
238 cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp);
239 if (CL_SUCCESS != err)
240 throw OpenCLError(err);
241 return 1;
243 virtual cl_mem GetCLBuffer(void) const { return NULL; }
246 class DynamicKernelPiArgument: public DynamicKernelArgument
248 public:
249 DynamicKernelPiArgument(const std::string &s,
250 FormulaTreeNodeRef ft):
251 DynamicKernelArgument(s, ft) {}
252 /// Generate declaration
253 virtual void GenDecl(std::stringstream &ss) const
255 ss << "double " << mSymName;
257 virtual void GenDeclRef(std::stringstream &ss) const
259 ss << "3.14159265358979";
261 virtual void GenSlidingWindowDecl(std::stringstream &ss) const
263 GenDecl(ss);
265 virtual std::string GenSlidingWindowDeclRef(bool=false) const
267 return mSymName;
269 virtual size_t GetWindowSize(void) const
271 return 1;
273 /// Create buffer and pass the buffer to a given kernel
274 virtual size_t Marshal(cl_kernel k, int argno, int, cl_program)
276 double tmp = 0.0;
277 // Pass the scalar result back to the rest of the formula kernel
278 cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp);
279 if (CL_SUCCESS != err)
280 throw OpenCLError(err);
281 return 1;
285 class DynamicKernelRandomArgument: public DynamicKernelArgument
287 public:
288 DynamicKernelRandomArgument(const std::string &s,
289 FormulaTreeNodeRef ft):
290 DynamicKernelArgument(s, ft) {}
291 /// Generate declaration
292 virtual void GenDecl(std::stringstream &ss) const
294 ss << "double " << mSymName;
296 virtual void GenDeclRef(std::stringstream &ss) const
298 double d;
299 srand((unsigned)time(NULL));
300 d=((double)rand())/RAND_MAX;
301 ss << d;
303 virtual void GenSlidingWindowDecl(std::stringstream &ss) const
305 GenDecl(ss);
307 virtual std::string GenSlidingWindowDeclRef(bool=false) const
309 return mSymName;
311 virtual size_t GetWindowSize(void) const
313 return 1;
315 /// Create buffer and pass the buffer to a given kernel
316 virtual size_t Marshal(cl_kernel k, int argno, int, cl_program)
318 double tmp = 0.0;
319 // Pass the scalar result back to the rest of the formula kernel
320 cl_int err = clSetKernelArg(k, argno, sizeof(double), (void*)&tmp);
321 if (CL_SUCCESS != err)
322 throw OpenCLError(err);
323 return 1;
327 /// A vector of strings
328 class DynamicKernelStringArgument: public VectorRef
330 public:
331 DynamicKernelStringArgument(const std::string &s,
332 FormulaTreeNodeRef ft, int index = 0):
333 VectorRef(s, ft, index) {}
335 virtual void GenSlidingWindowFunction(std::stringstream &) {}
336 /// Generate declaration
337 virtual void GenDecl(std::stringstream &ss) const
339 ss << "__global unsigned int *"<<mSymName;
341 virtual void GenSlidingWindowDecl(std::stringstream& ss) const
343 DynamicKernelStringArgument::GenDecl(ss);
345 virtual size_t Marshal(cl_kernel, int, int, cl_program);
348 /// Marshal a string vector reference
349 size_t DynamicKernelStringArgument::Marshal(cl_kernel k, int argno, int, cl_program)
351 FormulaToken *ref = mFormulaTree->GetFormulaToken();
352 // Obtain cl context
353 KernelEnv kEnv;
354 OpenclDevice::setKernelEnv(&kEnv);
355 cl_int err;
356 formula::VectorRefArray vRef;
357 size_t nStrings = 0;
358 if (ref->GetType() == formula::svSingleVectorRef) {
359 const formula::SingleVectorRefToken* pSVR =
360 dynamic_cast< const formula::SingleVectorRefToken* >(ref);
361 assert(pSVR);
362 nStrings = pSVR->GetArrayLength();
363 vRef = pSVR->GetArray();
364 } else if (ref->GetType() == formula::svDoubleVectorRef) {
365 const formula::DoubleVectorRefToken* pDVR =
366 dynamic_cast< const formula::DoubleVectorRefToken* >(ref);
367 assert(pDVR);
368 nStrings = pDVR->GetArrayLength();
369 vRef = pDVR->GetArrays()[mnIndex];
371 size_t szHostBuffer = nStrings * sizeof(cl_int);
372 // Marshal strings. Right now we pass hashes of these string
373 mpClmem = clCreateBuffer(kEnv.mpkContext,
374 (cl_mem_flags) CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR,
375 szHostBuffer, NULL, &err);
376 if (CL_SUCCESS != err)
377 throw OpenCLError(err);
378 cl_uint *pHashBuffer = (cl_uint*)clEnqueueMapBuffer(
379 kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
380 szHostBuffer, 0, NULL, NULL, &err);
381 if (CL_SUCCESS != err)
382 throw OpenCLError(err);
383 for (size_t i = 0; i < nStrings; i++)
385 if (vRef.mpStringArray[i])
387 const OUString tmp = OUString(vRef.mpStringArray[i]);
388 pHashBuffer[i] = tmp.hashCode();
390 else
392 pHashBuffer[i] = 0;
395 err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
396 pHashBuffer, 0, NULL, NULL);
397 if (CL_SUCCESS != err)
398 throw OpenCLError(err);
400 err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&mpClmem);
401 if (CL_SUCCESS != err)
402 throw OpenCLError(err);
403 return 1;
406 /// A mixed string/numberic vector
407 class DynamicKernelMixedArgument: public VectorRef
409 public:
410 DynamicKernelMixedArgument(const std::string &s,
411 FormulaTreeNodeRef ft):
412 VectorRef(s, ft), mStringArgument(s+"s", ft) {}
413 virtual void GenSlidingWindowDecl(std::stringstream& ss) const
415 VectorRef::GenSlidingWindowDecl(ss);
416 ss << ", ";
417 mStringArgument.GenSlidingWindowDecl(ss);
419 virtual void GenSlidingWindowFunction(std::stringstream &) {}
420 /// Generate declaration
421 virtual void GenDecl(std::stringstream &ss) const
423 VectorRef::GenDecl(ss);
424 ss << ", ";
425 mStringArgument.GenDecl(ss);
427 virtual void GenDeclRef(std::stringstream &ss) const
429 VectorRef::GenDeclRef(ss);
430 ss << ",";
431 mStringArgument.GenDeclRef(ss);
433 virtual std::string GenSlidingWindowDeclRef(bool) const
435 std::stringstream ss;
436 ss << "(!isNan(" << VectorRef::GenSlidingWindowDeclRef();
437 ss << ")?" << VectorRef::GenSlidingWindowDeclRef();
438 ss << ":" << mStringArgument.GenSlidingWindowDeclRef();
439 ss << ")";
440 return ss.str();
442 virtual size_t Marshal(cl_kernel k, int argno, int vw, cl_program p)
444 int i = VectorRef::Marshal(k, argno, vw, p);
445 i += mStringArgument.Marshal(k, argno+i, vw, p);
446 return i;
448 protected:
449 DynamicKernelStringArgument mStringArgument;
452 /// Handling a Double Vector that is used as a sliding window input
453 /// to either a sliding window average or sum-of-products
454 /// Generate a sequential loop for reductions
455 class OpSum; // Forward Declaration
456 class OpAverage; // Forward Declaration
457 class OpMin; // Forward Declaration
458 class OpMax; // Forward Declaration
459 class OpCount; // Forward Declaration
460 template<class Base>
461 class DynamicKernelSlidingArgument: public Base
463 public:
464 DynamicKernelSlidingArgument(const std::string &s,
465 FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen,
466 int index=0):
467 Base(s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL)
469 FormulaToken *t = ft->GetFormulaToken();
470 if (t->GetType() != formula::svDoubleVectorRef)
471 throw Unhandled();
472 mpDVR = dynamic_cast<const formula::DoubleVectorRefToken *>(t);
473 assert(mpDVR);
474 bIsStartFixed = mpDVR->IsStartFixed();
475 bIsEndFixed = mpDVR->IsEndFixed();
477 // Should only be called by SumIfs. Yikes!
478 virtual bool NeedParallelReduction(void) const
480 assert(dynamic_cast<OpSumIfs*>(mpCodeGen.get()));
481 return GetWindowSize()> 100 &&
482 ( (GetStartFixed() && GetEndFixed()) ||
483 (!GetStartFixed() && !GetEndFixed()) ) ;
485 virtual void GenSlidingWindowFunction(std::stringstream &) {}
487 virtual std::string GenSlidingWindowDeclRef(bool nested=false) const
489 size_t nArrayLength = mpDVR->GetArrayLength();
490 std::stringstream ss;
491 if (!bIsStartFixed && !bIsEndFixed)
493 if (nested)
494 ss << "((i+gid0) <" << nArrayLength <<"?";
495 ss << Base::GetName() << "[i + gid0]";
496 if (nested)
497 ss << ":NAN)";
499 else
501 if (nested)
502 ss << "(i <" << nArrayLength <<"?";
503 ss << Base::GetName() << "[i]";
504 if (nested)
505 ss << ":NAN)";
507 return ss.str();
509 /// Controls how the elements in the DoubleVectorRef are traversed
510 virtual size_t GenReductionLoopHeader(
511 std::stringstream &ss, bool &needBody)
513 assert(mpDVR);
514 size_t nCurWindowSize = mpDVR->GetRefRowSize();
515 // original for loop
516 #ifndef UNROLLING_FACTOR
517 needBody = true;
518 // No need to generate a for-loop for degenerated cases
519 if (nCurWindowSize == 1)
521 ss << "if (gid0 <" << mpDVR->GetArrayLength();
522 ss << ")\n\t{\tint i = 0;\n\t\t";
523 return nCurWindowSize;
526 ss << "for (int i = ";
527 if (!bIsStartFixed && bIsEndFixed)
529 #ifdef ISNAN
530 ss << "gid0; i < " << mpDVR->GetArrayLength();
531 ss << " && i < " << nCurWindowSize << "; i++){\n\t\t";
532 #else
533 ss << "gid0; i < "<< nCurWindowSize << "; i++)\n\t\t";
534 #endif
536 else if (bIsStartFixed && !bIsEndFixed)
538 #ifdef ISNAN
539 ss << "0; i < " << mpDVR->GetArrayLength();
540 ss << " && i < gid0+"<< nCurWindowSize << "; i++){\n\t\t";
541 #else
542 ss << "0; i < gid0+"<< nCurWindowSize << "; i++)\n\t\t";
543 #endif
545 else if (!bIsStartFixed && !bIsEndFixed)
547 #ifdef ISNAN
548 ss << "0; i + gid0 < " << mpDVR->GetArrayLength();
549 ss << " && i < "<< nCurWindowSize << "; i++){\n\t\t";
550 #else
551 ss << "0; i < "<< nCurWindowSize << "; i++)\n\t\t";
552 #endif
554 else
556 unsigned limit =
557 std::min(mpDVR->GetArrayLength(), nCurWindowSize);
558 ss << "0; i < "<< limit << "; i++){\n\t\t";
560 return nCurWindowSize;
561 #endif
563 #ifdef UNROLLING_FACTOR
565 if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed()) {
566 ss << "for (int i = ";
567 ss << "gid0; i < " << mpDVR->GetArrayLength();
568 ss << " && i < " << nCurWindowSize << "; i++){\n\t\t";
569 needBody = true;
570 return nCurWindowSize;
571 } else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()) {
572 ss << "for (int i = ";
573 ss << "0; i < " << mpDVR->GetArrayLength();
574 ss << " && i < gid0+"<< nCurWindowSize << "; i++){\n\t\t";
575 needBody = true;
576 return nCurWindowSize;
577 } else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed()){
578 ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
579 ss << "{int i;\n\t";
580 std::stringstream temp1,temp2;
581 int outLoopSize = UNROLLING_FACTOR;
582 if ( nCurWindowSize/outLoopSize != 0){
583 ss << "for(int outLoop=0; outLoop<" << nCurWindowSize/outLoopSize<< "; outLoop++){\n\t";
584 for(int count=0; count < outLoopSize; count++){
585 ss << "i = outLoop*"<<outLoopSize<<"+"<<count<<";\n\t";
586 if(count==0){
587 temp1 << "if(i + gid0 < " <<mpDVR->GetArrayLength();
588 temp1 << "){\n\t\t";
589 temp1 << "tmp = legalize(";
590 temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
591 temp1 << ", tmp);\n\t\t\t";
592 temp1 << "}\n\t";
594 ss << temp1.str();
596 ss << "}\n\t";
598 // The residual of mod outLoopSize
599 for(unsigned int count=nCurWindowSize/outLoopSize*outLoopSize; count < nCurWindowSize; count++){
600 ss << "i = "<<count<<";\n\t";
601 if(count==nCurWindowSize/outLoopSize*outLoopSize){
602 temp2 << "if(i + gid0 < " << mpDVR->GetArrayLength();
603 temp2 << "){\n\t\t";
604 temp2 << "tmp = legalize(";
605 temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
606 temp2 << ", tmp);\n\t\t\t";
607 temp2 << "}\n\t";
609 ss << temp2.str();
611 ss << "} // to scope the int i declaration\n";
612 needBody = false;
613 return nCurWindowSize;
615 // (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
616 else {
617 ss << "//else situation \n\t";
618 ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
619 ss << "{int i;\n\t";
620 std::stringstream temp1,temp2;
621 int outLoopSize = UNROLLING_FACTOR;
622 if (nCurWindowSize/outLoopSize != 0){
623 ss << "for(int outLoop=0; outLoop<" << nCurWindowSize/outLoopSize<< "; outLoop++){\n\t";
624 for(int count=0; count < outLoopSize; count++){
625 ss << "i = outLoop*"<<outLoopSize<<"+"<<count<<";\n\t";
626 if(count==0){
627 temp1 << "tmp = legalize(";
628 temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
629 temp1 << ", tmp);\n\t\t\t";
631 ss << temp1.str();
633 ss << "}\n\t";
635 // The residual of mod outLoopSize
636 for(unsigned int count=nCurWindowSize/outLoopSize*outLoopSize; count < nCurWindowSize; count++){
637 ss << "i = "<<count<<";\n\t";
638 if(count==nCurWindowSize/outLoopSize*outLoopSize){
639 temp2 << "tmp = legalize(";
640 temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
641 temp2 << ", tmp);\n\t\t\t";
643 ss << temp2.str();
645 ss << "} // to scope the int i declaration\n";
646 needBody = false;
647 return nCurWindowSize;
650 #endif
652 ~DynamicKernelSlidingArgument()
654 if (mpClmem2)
656 clReleaseMemObject(mpClmem2);
657 mpClmem2 = NULL;
661 size_t GetArrayLength(void) const {return mpDVR->GetArrayLength(); }
663 size_t GetWindowSize(void) const {return mpDVR->GetRefRowSize(); }
665 size_t GetStartFixed(void) const {return bIsStartFixed; }
667 size_t GetEndFixed(void) const {return bIsEndFixed; }
669 protected:
670 bool bIsStartFixed, bIsEndFixed;
671 const formula::DoubleVectorRefToken *mpDVR;
672 // from parent nodes
673 boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
674 // controls whether to invoke the reduction kernel during marshaling or not
675 cl_mem mpClmem2;
678 /// Handling a Double Vector that is used as a sliding window input
679 /// Performs parallel reduction based on given operator
680 template<class Base>
681 class ParallelReductionVectorRef: public Base
683 public:
684 ParallelReductionVectorRef(const std::string &s,
685 FormulaTreeNodeRef ft, boost::shared_ptr<SlidingFunctionBase> &CodeGen,
686 int index=0):
687 Base(s, ft, index), mpCodeGen(CodeGen), mpClmem2(NULL)
689 FormulaToken *t = ft->GetFormulaToken();
690 if (t->GetType() != formula::svDoubleVectorRef)
691 throw Unhandled();
692 mpDVR = dynamic_cast<const formula::DoubleVectorRefToken *>(t);
693 assert(mpDVR);
694 bIsStartFixed = mpDVR->IsStartFixed();
695 bIsEndFixed = mpDVR->IsEndFixed();
697 /// Emit the definition for the auxiliary reduction kernel
698 virtual void GenSlidingWindowFunction(std::stringstream &ss) {
699 std::string name = Base::GetName();
700 ss << "__kernel void "<<name;
701 ss << "_reduction(__global double* A, "
702 "__global double *result,int arrayLength,int windowSize){\n";
703 ss << " double tmp, current_result =" <<
704 mpCodeGen->GetBottom();
705 ss << ";\n";
706 ss << " int writePos = get_group_id(1);\n";
707 ss << " int lidx = get_local_id(0);\n";
708 ss << " __local double shm_buf[256];\n";
709 if (mpDVR->IsStartFixed())
710 ss << " int offset = 0;\n";
711 else // if (!mpDVR->IsStartFixed())
712 ss << " int offset = get_group_id(1);\n";
713 if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
714 ss << " int end = windowSize;\n";
715 else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
716 ss << " int end = offset + windowSize;\n";
717 else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
718 ss << " int end = windowSize + get_group_id(1);\n";
719 else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
720 ss << " int end = windowSize;\n";
721 ss << " end = min(end, arrayLength);\n";
723 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
724 ss << " int loop = arrayLength/512 + 1;\n";
725 ss << " for (int l=0; l<loop; l++){\n";
726 ss << " tmp = "<< mpCodeGen->GetBottom() << ";\n";
727 ss << " int loopOffset = l*512;\n";
728 ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
729 ss << " tmp = legalize(" << mpCodeGen->Gen2(
730 "A[loopOffset + lidx + offset]", "tmp") <<", tmp);\n";
731 ss << " tmp = legalize(" << mpCodeGen->Gen2(
732 "A[loopOffset + lidx + offset + 256]", "tmp") <<", tmp);\n";
733 ss << " } else if ((loopOffset + lidx + offset) < end)\n";
734 ss << " tmp = legalize(" << mpCodeGen->Gen2(
735 "A[loopOffset + lidx + offset]", "tmp") <<", tmp);\n";
736 ss << " shm_buf[lidx] = tmp;\n";
737 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
738 ss << " for (int i = 128; i >0; i/=2) {\n";
739 ss << " if (lidx < i)\n";
740 ss << " shm_buf[lidx] = ";
741 // Special case count
742 if (dynamic_cast<OpCount*>(mpCodeGen.get()))
743 ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
744 else
745 ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]")<<";\n";
746 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
747 ss << " }\n";
748 ss << " if (lidx == 0)\n";
749 ss << " current_result =";
750 if (dynamic_cast<OpCount*>(mpCodeGen.get()))
751 ss << "current_result + shm_buf[0]";
752 else
753 ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
754 ss << ";\n";
755 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
756 ss << " }\n";
757 ss << " if (lidx == 0)\n";
758 ss << " result[writePos] = current_result;\n";
759 ss << "}\n";
763 virtual std::string GenSlidingWindowDeclRef(bool=false) const
765 std::stringstream ss;
766 if (!bIsStartFixed && !bIsEndFixed)
767 ss << Base::GetName() << "[i + gid0]";
768 else
769 ss << Base::GetName() << "[i]";
770 return ss.str();
772 /// Controls how the elements in the DoubleVectorRef are traversed
773 virtual size_t GenReductionLoopHeader(
774 std::stringstream &ss, bool &needBody)
776 assert(mpDVR);
777 size_t nCurWindowSize = mpDVR->GetRefRowSize();
778 std::string temp = Base::GetName() + "[gid0]";
779 ss << "tmp = ";
780 // Special case count
781 if (dynamic_cast<OpCount*>(mpCodeGen.get()))
782 ss << temp << "+ tmp";
783 else
784 ss << mpCodeGen->Gen2(temp, "tmp");
785 ss << ";\n\t";
786 needBody = false;
787 return nCurWindowSize;
790 virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram)
792 assert(Base::mpClmem == NULL);
793 // Obtain cl context
794 KernelEnv kEnv;
795 OpenclDevice::setKernelEnv(&kEnv);
796 cl_int err;
797 size_t nInput = mpDVR->GetArrayLength();
798 size_t nCurWindowSize = mpDVR->GetRefRowSize();
799 // create clmem buffer
800 if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == NULL)
801 throw Unhandled();
802 double *pHostBuffer = const_cast<double*>(
803 mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
804 size_t szHostBuffer = nInput * sizeof(double);
805 Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
806 (cl_mem_flags) CL_MEM_READ_ONLY,
807 szHostBuffer,
808 NULL, &err);
809 if (CL_SUCCESS != err)
810 throw OpenCLError(err);
811 err = clEnqueueWriteBuffer(kEnv.mpkCmdQueue, Base::mpClmem,CL_TRUE, 0,
812 szHostBuffer, pHostBuffer, 0, NULL, NULL);
813 if (CL_SUCCESS != err)
814 throw OpenCLError(err);
815 mpClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_WRITE_ONLY,
816 sizeof(double)*w, NULL, NULL);
817 if (CL_SUCCESS != err)
818 throw OpenCLError(err);
819 // reproduce the reduction function name
820 std::string kernelName = Base::GetName() + "_reduction";
822 cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
823 if (err != CL_SUCCESS)
824 throw OpenCLError(err);
825 // set kernel arg of reduction kernel
826 // TODO(Wei Wei): use unique name for kernel
827 cl_mem buf = Base::GetCLBuffer();
828 err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
829 (void *)&buf);
830 if (CL_SUCCESS != err)
831 throw OpenCLError(err);
833 err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), (void *)&mpClmem2);
834 if (CL_SUCCESS != err)
835 throw OpenCLError(err);
837 err = clSetKernelArg(redKernel, 2, sizeof(cl_int), (void*)&nInput);
838 if (CL_SUCCESS != err)
839 throw OpenCLError(err);
841 err = clSetKernelArg(redKernel, 3, sizeof(cl_int), (void*)&nCurWindowSize);
842 if (CL_SUCCESS != err)
843 throw OpenCLError(err);
845 // set work group size and execute
846 size_t global_work_size[] = {256, (size_t)w };
847 size_t local_work_size[] = {256, 1};
848 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
849 global_work_size, local_work_size, 0, NULL, NULL);
850 if (CL_SUCCESS != err)
851 throw OpenCLError(err);
852 err = clFinish(kEnv.mpkCmdQueue);
853 if (CL_SUCCESS != err)
854 throw OpenCLError(err);
856 // set kernel arg
857 err = clSetKernelArg(k, argno, sizeof(cl_mem), (void*)&(mpClmem2));
858 if (CL_SUCCESS != err)
859 throw OpenCLError(err);
860 return 1;
862 ~ParallelReductionVectorRef()
864 if (mpClmem2)
866 clReleaseMemObject(mpClmem2);
867 mpClmem2 = NULL;
871 size_t GetArrayLength(void) const {return mpDVR->GetArrayLength(); }
873 size_t GetWindowSize(void) const {return mpDVR->GetRefRowSize(); }
875 size_t GetStartFixed(void) const {return bIsStartFixed; }
877 size_t GetEndFixed(void) const {return bIsEndFixed; }
879 protected:
880 bool bIsStartFixed, bIsEndFixed;
881 const formula::DoubleVectorRefToken *mpDVR;
882 // from parent nodes
883 boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
884 // controls whether to invoke the reduction kernel during marshaling or not
885 cl_mem mpClmem2;
888 class Reduction: public SlidingFunctionBase
890 public:
891 typedef DynamicKernelSlidingArgument<VectorRef> NumericRange;
892 typedef DynamicKernelSlidingArgument<DynamicKernelStringArgument> StringRange;
893 typedef ParallelReductionVectorRef<VectorRef> ParallelNumericRange;
895 virtual void GenSlidingWindowFunction(std::stringstream &ss,
896 const std::string sSymName, SubArguments &vSubArguments)
898 ss << "\ndouble " << sSymName;
899 ss << "_"<< BinFuncName() <<"(";
900 for (unsigned i = 0; i < vSubArguments.size(); i++)
902 if (i)
903 ss << ", ";
904 vSubArguments[i]->GenSlidingWindowDecl(ss);
906 ss << ") {\n\t";
907 ss << "double tmp = " << GetBottom() <<";\n\t";
908 ss << "int gid0 = get_global_id(0);\n\t";
909 if (isAverage())
910 ss << "int nCount = 0;\n\t";
911 ss << "double tmpBottom;\n\t";
912 unsigned i = vSubArguments.size();
913 size_t nItems = 0;
914 while (i--)
916 if (NumericRange *NR =
917 dynamic_cast<NumericRange *> (vSubArguments[i].get()))
919 bool needBody;
920 nItems += NR->GenReductionLoopHeader(ss, needBody);
921 if (needBody == false) continue;
923 else if (ParallelNumericRange *PNR =
924 dynamic_cast<ParallelNumericRange *> (vSubArguments[i].get()))
926 //did not handle yet
927 bool needBody;
928 nItems += PNR->GenReductionLoopHeader(ss, needBody);
929 if (needBody == false) continue;
931 else if (StringRange *SR =
932 dynamic_cast<StringRange *> (vSubArguments[i].get()))
934 //did not handle yet
935 bool needBody;
936 nItems += SR->GenReductionLoopHeader(ss, needBody);
937 if (needBody == false) continue;
939 else
941 FormulaToken *pCur = vSubArguments[i]->GetFormulaToken();
942 assert(pCur);
943 assert(pCur->GetType() != formula::svDoubleVectorRef);
945 if (pCur->GetType() == formula::svSingleVectorRef)
947 #ifdef ISNAN
948 const formula::SingleVectorRefToken* pSVR =
949 dynamic_cast< const formula::SingleVectorRefToken* >(pCur);
950 ss << "if (gid0 < " << pSVR->GetArrayLength() << "){\n\t\t";
951 #else
952 nItems += 1;
953 #endif
955 else if (pCur->GetType() == formula::svDouble)
957 #ifdef ISNAN
958 ss << "{\n\t\t";
959 #endif
960 nItems += 1;
962 else
964 nItems += 1;
967 #ifdef ISNAN
968 if(ocPush==vSubArguments[i]->GetFormulaToken()->GetOpCode())
970 ss << "tmpBottom = " << GetBottom() << ";\n\t\t";
971 ss << "if (isNan(";
972 ss << vSubArguments[i]->GenSlidingWindowDeclRef();
973 ss << "))\n\t\t\t";
974 ss << "tmp = ";
975 ss << Gen2("tmpBottom", "tmp") << ";\n\t\t";
976 ss << "else{\n\t\t\t";
977 ss << "tmp = ";
978 ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
979 ss << ";\n\t\t\t";
980 ss << "}\n\t";
981 ss << "}\n\t";
983 else
985 ss << "tmp = ";
986 ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
987 ss << ";\n\t";
989 #else
990 ss << "tmp = ";
991 // Generate the operation in binary form
992 ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
993 ss << ";\n\t";
994 #endif
996 ss << "return tmp";
997 #ifdef ISNAN
998 if (isAverage())
999 ss << "/(double)nCount";
1000 #else
1001 if (isAverage())
1002 ss << "/(double)"<<nItems;
1003 #endif
1004 ss << ";\n}";
1006 virtual bool isAverage() const { return false; }
1007 virtual bool takeString() const { return false; }
1008 virtual bool takeNumeric() const { return true; }
1011 // Strictly binary operators
1012 class Binary: public SlidingFunctionBase
1014 public:
1015 virtual void GenSlidingWindowFunction(std::stringstream &ss,
1016 const std::string sSymName, SubArguments &vSubArguments)
1018 ss << "\ndouble " << sSymName;
1019 ss << "_"<< BinFuncName() <<"(";
1020 assert(vSubArguments.size() == 2);
1021 for (unsigned i = 0; i < vSubArguments.size(); i++)
1023 if (i)
1024 ss << ", ";
1025 vSubArguments[i]->GenSlidingWindowDecl(ss);
1027 ss << ") {\n\t";
1028 ss << "int gid0 = get_global_id(0), i = 0;\n\t";
1029 ss << "double tmp = ";
1030 ss << Gen2(vSubArguments[0]->GenSlidingWindowDeclRef(false),
1031 vSubArguments[1]->GenSlidingWindowDeclRef(false)) << ";\n\t";
1032 ss << "return tmp;\n}";
1034 virtual bool takeString() const { return true; }
1035 virtual bool takeNumeric() const { return true; }
1038 class SumOfProduct: public SlidingFunctionBase
1040 public:
1041 virtual void GenSlidingWindowFunction(std::stringstream &ss,
1042 const std::string sSymName, SubArguments &vSubArguments)
1044 size_t nCurWindowSize = 0;
1045 FormulaToken *tmpCur = NULL;
1046 const formula::DoubleVectorRefToken *pCurDVR = NULL;
1047 ss << "\ndouble " << sSymName;
1048 ss << "_"<< BinFuncName() <<"(";
1049 for (unsigned i = 0; i < vSubArguments.size(); i++)
1051 if (i)
1052 ss << ",";
1053 vSubArguments[i]->GenSlidingWindowDecl(ss);
1054 size_t nCurChildWindowSize = vSubArguments[i]->GetWindowSize();
1055 nCurWindowSize = (nCurWindowSize < nCurChildWindowSize)?
1056 nCurChildWindowSize:nCurWindowSize;
1057 tmpCur = vSubArguments[i]->GetFormulaToken();
1058 if ( ocPush==tmpCur->GetOpCode() )
1061 pCurDVR = dynamic_cast<
1062 const formula::DoubleVectorRefToken*>(tmpCur);
1063 if ( !
1064 ( (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1065 || (pCurDVR->IsStartFixed() && pCurDVR->IsEndFixed()) )
1067 throw Unhandled();
1070 ss << ") {\n";
1071 ss << " double tmp = 0.0;\n";
1072 ss << " int gid0 = get_global_id(0);\n";
1073 #ifndef UNROLLING_FACTOR
1074 ss << " int i ;\n";
1075 ss << " for (i = 0; i < "<< nCurWindowSize <<"; i++)\n";
1076 ss << " {\n";
1077 for (unsigned i = 0; i < vSubArguments.size(); i++)
1079 tmpCur = vSubArguments[i]->GetFormulaToken();
1080 if(ocPush==tmpCur->GetOpCode())
1082 pCurDVR= dynamic_cast<
1083 const formula::DoubleVectorRefToken *>(tmpCur);
1084 if(!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1086 ss << " int currentCount";
1087 ss << i;
1088 ss <<" =i+gid0+1;\n";
1090 else
1092 ss << " int currentCount";
1093 ss << i;
1094 ss << " =i+1;\n";
1098 ss << " tmp += fsum(";
1099 for (unsigned i = 0; i < vSubArguments.size(); i++)
1101 if (i)
1102 ss << "*";
1103 #ifdef ISNAN
1104 if(ocPush==vSubArguments[i]->GetFormulaToken()->GetOpCode())
1106 ss <<"(";
1107 ss <<"(currentCount";
1108 ss << i;
1109 ss<< ">";
1110 if(vSubArguments[i]->GetFormulaToken()->GetType() ==
1111 formula::svSingleVectorRef)
1113 const formula::SingleVectorRefToken* pSVR =
1114 dynamic_cast< const formula::SingleVectorRefToken*>
1115 (vSubArguments[i]->GetFormulaToken());
1116 ss<<pSVR->GetArrayLength();
1118 else if(vSubArguments[i]->GetFormulaToken()->GetType() ==
1119 formula::svDoubleVectorRef)
1121 const formula::DoubleVectorRefToken* pSVR =
1122 dynamic_cast< const formula::DoubleVectorRefToken*>
1123 (vSubArguments[i]->GetFormulaToken());
1124 ss<<pSVR->GetArrayLength();
1126 ss << ")||isNan("<<vSubArguments[i]
1127 ->GenSlidingWindowDeclRef(true);
1128 ss << ")?0:";
1129 ss << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1130 ss << ")";
1132 else
1133 ss << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1134 #else
1135 ss << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1136 #endif
1138 ss << ", 0.0);\n\t}\n\t";
1139 ss << "return tmp;\n";
1140 ss << "}";
1141 #endif
1143 #ifdef UNROLLING_FACTOR
1144 ss << "\tint i;\n\t";
1145 ss << "int currentCount0, currentCount1;\n\t";
1146 std::stringstream temp3,temp4;
1147 int outLoopSize = UNROLLING_FACTOR;
1148 if (nCurWindowSize/outLoopSize != 0){
1149 ss << "for(int outLoop=0; outLoop<" <<
1150 nCurWindowSize/outLoopSize<< "; outLoop++){\n\t";
1151 for(int count=0; count < outLoopSize; count++){
1152 ss << "i = outLoop*"<<outLoopSize<<"+"<<count<<";\n";
1153 if(count==0){
1154 for (unsigned i = 0; i < vSubArguments.size(); i++)
1156 tmpCur = vSubArguments[i]->GetFormulaToken();
1157 if(ocPush==tmpCur->GetOpCode())
1159 pCurDVR= dynamic_cast<
1160 const formula::DoubleVectorRefToken *>(tmpCur);
1161 if(!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1163 temp3 << " currentCount";
1164 temp3 << i;
1165 temp3 <<" =i+gid0+1;\n";
1167 else
1169 temp3 << " currentCount";
1170 temp3 << i;
1171 temp3 << " =i+1;\n";
1176 temp3 << "tmp = fsum(";
1177 for (unsigned i = 0; i < vSubArguments.size(); i++){
1178 if (i)
1179 temp3 << "*";
1180 if(ocPush==vSubArguments[i]->GetFormulaToken()->GetOpCode()){
1181 temp3 <<"(";
1182 temp3 <<"(currentCount";
1183 temp3 << i;
1184 temp3 << ">";
1185 if(vSubArguments[i]->GetFormulaToken()->GetType() ==
1186 formula::svSingleVectorRef){
1187 const formula::SingleVectorRefToken* pSVR =
1188 dynamic_cast< const formula::SingleVectorRefToken*>
1189 (vSubArguments[i]->GetFormulaToken());
1190 temp3<<pSVR->GetArrayLength();
1192 else if(vSubArguments[i]->GetFormulaToken()->GetType() ==
1193 formula::svDoubleVectorRef){
1194 const formula::DoubleVectorRefToken* pSVR =
1195 dynamic_cast< const formula::DoubleVectorRefToken*>
1196 (vSubArguments[i]->GetFormulaToken());
1197 temp3<<pSVR->GetArrayLength();
1199 temp3 << ")||isNan("<<vSubArguments[i]
1200 ->GenSlidingWindowDeclRef(true);
1201 temp3 << ")?0:";
1202 temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1203 temp3 << ")";
1205 else
1206 temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1208 temp3 << ", tmp);\n\t";
1210 ss << temp3.str();
1212 ss << "}\n\t";
1214 //The residual of mod outLoopSize
1215 for(unsigned int count=nCurWindowSize/outLoopSize*outLoopSize;
1216 count < nCurWindowSize; count++)
1218 ss << "i =" <<count<<";\n";
1219 if(count==nCurWindowSize/outLoopSize*outLoopSize){
1220 for (unsigned i = 0; i < vSubArguments.size(); i++)
1222 tmpCur = vSubArguments[i]->GetFormulaToken();
1223 if(ocPush==tmpCur->GetOpCode())
1225 pCurDVR= dynamic_cast<
1226 const formula::DoubleVectorRefToken *>(tmpCur);
1227 if(!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1229 temp4 << " currentCount";
1230 temp4 << i;
1231 temp4 <<" =i+gid0+1;\n";
1233 else
1235 temp4 << " currentCount";
1236 temp4 << i;
1237 temp4 << " =i+1;\n";
1242 temp4 << "tmp = fsum(";
1243 for (unsigned i = 0; i < vSubArguments.size(); i++)
1245 if (i)
1246 temp4 << "*";
1247 if(ocPush==vSubArguments[i]->GetFormulaToken()->GetOpCode())
1249 temp4 <<"(";
1250 temp4 <<"(currentCount";
1251 temp4 << i;
1252 temp4 << ">";
1253 if(vSubArguments[i]->GetFormulaToken()->GetType() ==
1254 formula::svSingleVectorRef)
1256 const formula::SingleVectorRefToken* pSVR =
1257 dynamic_cast< const formula::SingleVectorRefToken*>
1258 (vSubArguments[i]->GetFormulaToken());
1259 temp4<<pSVR->GetArrayLength();
1261 else if(vSubArguments[i]->GetFormulaToken()->GetType() ==
1262 formula::svDoubleVectorRef)
1264 const formula::DoubleVectorRefToken* pSVR =
1265 dynamic_cast< const formula::DoubleVectorRefToken*>
1266 (vSubArguments[i]->GetFormulaToken());
1267 temp4<<pSVR->GetArrayLength();
1269 temp4 << ")||isNan("<<vSubArguments[i]
1270 ->GenSlidingWindowDeclRef(true);
1271 temp4 << ")?0:";
1272 temp4 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1273 temp4 << ")";
1275 else
1277 temp4 << vSubArguments[i]
1278 ->GenSlidingWindowDeclRef(true);
1281 temp4 << ", tmp);\n\t";
1283 ss << temp4.str();
1285 ss << "return tmp;\n";
1286 ss << "}";
1287 #endif
1290 virtual bool takeString() const { return false; }
1291 virtual bool takeNumeric() const { return true; }
1294 /// operator traits
1295 class OpNop: public Reduction {
1296 public:
1297 virtual std::string GetBottom(void) { return "0"; }
1298 virtual std::string Gen2(const std::string &lhs, const std::string &) const
1300 return lhs;
1302 virtual std::string BinFuncName(void) const { return "nop"; }
1305 class OpCount: public Reduction {
1306 public:
1307 virtual std::string GetBottom(void) { return "0"; }
1308 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1310 std::stringstream ss;
1311 ss << "(isNan(" << lhs << ")?"<<rhs<<":"<<rhs<<"+1.0)";
1312 return ss.str();
1314 virtual std::string BinFuncName(void) const { return "fcount"; }
1317 class OpEqual: public Binary {
1318 public:
1319 virtual std::string GetBottom(void) { return "0"; }
1320 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1322 std::stringstream ss;
1323 ss << "strequal("<< lhs << "," << rhs <<")";
1324 return ss.str();
1326 virtual std::string BinFuncName(void) const { return "eq"; }
1329 class OpLessEqual: public Binary {
1330 public:
1331 virtual std::string GetBottom(void) { return "0"; }
1332 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1334 std::stringstream ss;
1335 ss << "("<< lhs << "<=" << rhs <<")";
1336 return ss.str();
1338 virtual std::string BinFuncName(void) const { return "leq"; }
1341 class OpGreater: public Binary {
1342 public:
1343 virtual std::string GetBottom(void) { return "0"; }
1344 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1346 std::stringstream ss;
1347 ss << "("<< lhs << ">" << rhs <<")";
1348 return ss.str();
1350 virtual std::string BinFuncName(void) const { return "gt"; }
1353 class OpSum: public Reduction {
1354 public:
1355 virtual std::string GetBottom(void) { return "0"; }
1356 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1358 std::stringstream ss;
1359 ss << "((" << lhs <<")+("<< rhs<<"))";
1360 return ss.str();
1362 virtual std::string BinFuncName(void) const { return "fsum"; }
1365 class OpAverage: public Reduction {
1366 public:
1367 virtual std::string GetBottom(void) { return "0"; }
1368 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1370 std::stringstream ss;
1371 ss << "fsum_count(" << lhs <<","<< rhs<<", &nCount)";
1372 return ss.str();
1374 virtual std::string BinFuncName(void) const { return "fsum"; }
1375 virtual bool isAverage() const { return true; }
1378 class OpSub: public Reduction {
1379 public:
1380 virtual std::string GetBottom(void) { return "0"; }
1381 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1383 return lhs + "-" + rhs;
1385 virtual std::string BinFuncName(void) const { return "fsub"; }
1388 class OpMul: public Reduction {
1389 public:
1390 virtual std::string GetBottom(void) { return "1"; }
1391 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1393 return lhs + "*" + rhs;
1395 virtual std::string BinFuncName(void) const { return "fmul"; }
1398 /// Technically not a reduction, but fits the framework.
1399 class OpDiv: public Reduction {
1400 public:
1401 virtual std::string GetBottom(void) { return "1.0"; }
1402 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1404 return "(" + lhs + "/" + rhs + ")";
1406 virtual std::string BinFuncName(void) const { return "fdiv"; }
1409 class OpMin: public Reduction {
1410 public:
1411 virtual std::string GetBottom(void) { return "MAXFLOAT"; }
1412 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1414 return "mcw_fmin("+lhs + "," + rhs +")";
1416 virtual std::string BinFuncName(void) const { return "min"; }
1419 class OpMax: public Reduction {
1420 public:
1421 virtual std::string GetBottom(void) { return "-MAXFLOAT"; }
1422 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1424 return "mcw_fmax("+lhs + "," + rhs +")";
1426 virtual std::string BinFuncName(void) const { return "max"; }
1428 class OpSumProduct: public SumOfProduct {
1429 public:
1430 virtual std::string GetBottom(void) { return "0"; }
1431 virtual std::string Gen2(const std::string &lhs, const std::string &rhs) const
1433 return lhs + "*" + rhs;
1435 virtual std::string BinFuncName(void) const { return "fsop"; }
1437 namespace {
1438 struct SumIfsArgs {
1439 SumIfsArgs(cl_mem x): mCLMem(x), mConst(0.0) {}
1440 SumIfsArgs(double x): mCLMem(NULL), mConst(x) {}
1441 cl_mem mCLMem;
1442 double mConst;
1445 /// Helper functions that have multiple buffers
1446 class DynamicKernelSoPArguments: public DynamicKernelArgument
1448 public:
1449 typedef boost::shared_ptr<DynamicKernelArgument> SubArgument;
1450 typedef std::vector<SubArgument> SubArgumentsType;
1452 DynamicKernelSoPArguments(
1453 const std::string &s, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen);
1455 /// Create buffer and pass the buffer to a given kernel
1456 virtual size_t Marshal(cl_kernel k, int argno, int nVectorWidth, cl_program pProgram)
1458 unsigned i = 0;
1459 for (SubArgumentsType::iterator it = mvSubArguments.begin(), e= mvSubArguments.end(); it!=e;
1460 ++it)
1462 i += (*it)->Marshal(k, argno + i, nVectorWidth, pProgram);
1464 if (OpSumIfs *OpSumCodeGen = dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
1466 // Obtain cl context
1467 KernelEnv kEnv;
1468 OpenclDevice::setKernelEnv(&kEnv);
1469 cl_int err;
1470 DynamicKernelArgument *Arg = mvSubArguments[0].get();
1471 DynamicKernelSlidingArgument<VectorRef> *slidingArgPtr =
1472 dynamic_cast< DynamicKernelSlidingArgument<VectorRef> *> (Arg);
1473 mpClmem2 = NULL;
1475 if (OpSumCodeGen->NeedReductionKernel())
1477 assert(slidingArgPtr);
1478 size_t nInput = slidingArgPtr -> GetArrayLength();
1479 size_t nCurWindowSize = slidingArgPtr -> GetWindowSize();
1480 std::vector<SumIfsArgs> vclmem;
1482 for (SubArgumentsType::iterator it = mvSubArguments.begin(),
1483 e= mvSubArguments.end(); it!=e; ++it)
1485 if (VectorRef *VR = dynamic_cast<VectorRef *>(it->get()))
1486 vclmem.push_back(SumIfsArgs(VR->GetCLBuffer()));
1487 else if (DynamicKernelConstantArgument *CA =
1488 dynamic_cast<
1489 DynamicKernelConstantArgument *>(it->get()))
1490 vclmem.push_back(SumIfsArgs(CA->GetDouble()));
1491 else
1492 vclmem.push_back(SumIfsArgs((cl_mem)NULL));
1494 mpClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
1495 sizeof(double)*nVectorWidth, NULL, &err);
1496 if (CL_SUCCESS != err)
1497 throw OpenCLError(err);
1499 std::string kernelName = mvSubArguments[0]->GetName() + "_SumIfs_reduction";
1500 cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
1501 if (err != CL_SUCCESS)
1502 throw OpenCLError(err);
1504 // set kernel arg of reduction kernel
1505 for (size_t j=0; j< vclmem.size(); j++){
1506 err = clSetKernelArg(redKernel, j,
1507 vclmem[j].mCLMem?sizeof(cl_mem):sizeof(double),
1508 vclmem[j].mCLMem?(void *)&vclmem[j].mCLMem:
1509 (void*)&vclmem[j].mConst);
1510 if (CL_SUCCESS != err)
1511 throw OpenCLError(err);
1513 err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), (void *)&mpClmem2);
1514 if (CL_SUCCESS != err)
1515 throw OpenCLError(err);
1517 err = clSetKernelArg(redKernel, vclmem.size()+1, sizeof(cl_int), (void*)&nInput);
1518 if (CL_SUCCESS != err)
1519 throw OpenCLError(err);
1521 err = clSetKernelArg(redKernel, vclmem.size()+2, sizeof(cl_int), (void*)&nCurWindowSize);
1522 if (CL_SUCCESS != err)
1523 throw OpenCLError(err);
1524 // set work group size and execute
1525 size_t global_work_size[] = {256, (size_t)nVectorWidth };
1526 size_t local_work_size[] = {256, 1};
1527 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, NULL,
1528 global_work_size, local_work_size, 0, NULL, NULL);
1529 if (CL_SUCCESS != err)
1530 throw OpenCLError(err);
1531 err = clFinish(kEnv.mpkCmdQueue);
1532 if (CL_SUCCESS != err)
1533 throw OpenCLError(err);
1534 clReleaseKernel(redKernel);
1535 // Pass mpClmem2 to the "real" kernel
1536 err = clSetKernelArg(k, argno, sizeof(cl_mem), (void *)&mpClmem2);
1537 if (CL_SUCCESS != err)
1538 throw OpenCLError(err);
1541 return i;
1544 virtual void GenSlidingWindowFunction(std::stringstream &ss) {
1545 for (unsigned i = 0; i < mvSubArguments.size(); i++)
1546 mvSubArguments[i]->GenSlidingWindowFunction(ss);
1547 mpCodeGen->GenSlidingWindowFunction(ss, mSymName, mvSubArguments);
1549 virtual void GenDeclRef(std::stringstream &ss) const
1551 for (unsigned i = 0; i < mvSubArguments.size(); i++)
1553 if (i)
1554 ss << ",";
1555 mvSubArguments[i]->GenDeclRef(ss);
1558 virtual void GenDecl(std::stringstream &ss) const
1560 for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e= mvSubArguments.end(); it!=e;
1561 ++it) {
1562 if (it != mvSubArguments.begin())
1563 ss << ", ";
1564 (*it)->GenDecl(ss);
1568 virtual size_t GetWindowSize(void) const
1570 size_t nCurWindowSize = 0;
1571 for (unsigned i = 0; i < mvSubArguments.size(); i++)
1573 size_t nCurChildWindowSize = mvSubArguments[i]->GetWindowSize();
1574 nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
1575 nCurChildWindowSize:nCurWindowSize;
1577 return nCurWindowSize;
1580 /// When declared as input to a sliding window function
1581 virtual void GenSlidingWindowDecl(std::stringstream &ss) const
1583 for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e= mvSubArguments.end(); it!=e;
1584 ++it)
1586 if (it != mvSubArguments.begin())
1587 ss << ", ";
1588 (*it)->GenSlidingWindowDecl(ss);
1591 /// Generate either a function call to each children
1592 /// or direclty inline it if we are already inside a loop
1593 virtual std::string GenSlidingWindowDeclRef(bool nested=false) const
1595 std::stringstream ss;
1596 if (!nested)
1598 ss << mSymName << "_" << mpCodeGen->BinFuncName() <<"(";
1599 for (unsigned i = 0; i < mvSubArguments.size(); i++)
1601 if (i)
1602 ss << ", ";
1603 if (!nested)
1604 mvSubArguments[i]->GenDeclRef(ss);
1605 else
1606 ss << mvSubArguments[i]->GenSlidingWindowDeclRef(true);
1608 ss << ")";
1609 } else {
1610 if (mvSubArguments.size() != 2)
1611 throw Unhandled();
1612 ss << "(" << mpCodeGen->Gen2(mvSubArguments[0]->GenSlidingWindowDeclRef(true),
1613 mvSubArguments[1]->GenSlidingWindowDeclRef(true)) << ")";
1615 return ss.str();
1617 virtual std::string DumpOpName(void) const
1619 std::string t = "_" + mpCodeGen->BinFuncName();
1620 for (unsigned i = 0; i < mvSubArguments.size(); i++)
1621 t = t + mvSubArguments[i]->DumpOpName();
1622 return t;
1624 virtual void DumpInlineFun(std::set<std::string>& decls,
1625 std::set<std::string>& funs) const
1627 mpCodeGen->BinInlineFun(decls,funs);
1628 for (unsigned i = 0; i < mvSubArguments.size(); i++)
1629 mvSubArguments[i]->DumpInlineFun(decls,funs);
1631 ~DynamicKernelSoPArguments()
1633 if (mpClmem2)
1635 clReleaseMemObject(mpClmem2);
1636 mpClmem2 = NULL;
1639 private:
1640 SubArgumentsType mvSubArguments;
1641 boost::shared_ptr<SlidingFunctionBase> mpCodeGen;
1642 cl_mem mpClmem2;
1645 boost::shared_ptr<DynamicKernelArgument> SoPHelper(
1646 const std::string &ts, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen)
1648 return boost::shared_ptr<DynamicKernelArgument>(new DynamicKernelSoPArguments(ts, ft, pCodeGen));
1651 template<class Base>
1652 DynamicKernelArgument *VectorRefFactory(const std::string &s,
1653 const FormulaTreeNodeRef& ft,
1654 boost::shared_ptr<SlidingFunctionBase> &pCodeGen,
1655 int index)
1657 //Black lists ineligible classes here ..
1658 // SUMIFS does not perform parallel reduction at DoubleVectorRef level
1659 if (dynamic_cast<OpSumIfs*>(pCodeGen.get())) {
1660 if (index == 0) // the first argument of OpSumIfs cannot be strings anyway
1661 return new DynamicKernelSlidingArgument<VectorRef>(s, ft, pCodeGen, index);
1662 return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
1664 // AVERAGE is not supported yet
1665 else if (dynamic_cast<OpAverage*>(pCodeGen.get()))
1667 return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
1669 // MUL is not supported yet
1670 else if (dynamic_cast<OpMul*>(pCodeGen.get()))
1672 return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
1674 // Sub is not a reduction per se
1675 else if (dynamic_cast<OpSub*>(pCodeGen.get()))
1677 return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
1679 // Only child class of Reduction is supported
1680 else if (!dynamic_cast<Reduction*>(pCodeGen.get()))
1682 return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
1685 const formula::DoubleVectorRefToken* pDVR =
1686 dynamic_cast< const formula::DoubleVectorRefToken* >(
1687 ft->GetFormulaToken());
1688 // Window being too small to justify a parallel reduction
1689 if (pDVR->GetRefRowSize() < REDUCE_THRESHOLD)
1690 return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
1691 if ((pDVR->IsStartFixed() && pDVR->IsEndFixed()) ||
1692 (!pDVR->IsStartFixed() && !pDVR->IsEndFixed()))
1693 return new ParallelReductionVectorRef<Base>(s, ft, pCodeGen, index);
1694 else // Other cases are not supported as well
1695 return new DynamicKernelSlidingArgument<Base>(s, ft, pCodeGen, index);
1698 DynamicKernelSoPArguments::DynamicKernelSoPArguments(
1699 const std::string &s, const FormulaTreeNodeRef& ft, SlidingFunctionBase* pCodeGen) :
1700 DynamicKernelArgument(s, ft), mpCodeGen(pCodeGen), mpClmem2(NULL)
1702 size_t nChildren = ft->Children.size();
1704 for (unsigned i = 0; i < nChildren; i++)
1706 FormulaToken *pChild = ft->Children[i]->GetFormulaToken();
1707 if (!pChild)
1708 throw Unhandled();
1709 OpCode opc = pChild->GetOpCode();
1710 std::stringstream tmpname;
1711 tmpname << s << "_" << i;
1712 std::string ts = tmpname.str();
1713 switch(opc) {
1714 case ocPush:
1715 if (pChild->GetType() == formula::svDoubleVectorRef)
1717 const formula::DoubleVectorRefToken* pDVR =
1718 dynamic_cast< const formula::DoubleVectorRefToken* >(pChild);
1719 assert(pDVR);
1720 for (size_t j = 0; j < pDVR->GetArrays().size(); ++j)
1722 if (pDVR->GetArrays()[j].mpNumericArray ||
1723 (pDVR->GetArrays()[j].mpNumericArray == NULL &&
1724 pDVR->GetArrays()[j].mpStringArray == NULL ))
1725 mvSubArguments.push_back(
1726 SubArgument(VectorRefFactory<VectorRef>(
1727 ts, ft->Children[i], mpCodeGen, j)));
1728 else
1729 mvSubArguments.push_back(
1730 SubArgument(VectorRefFactory
1731 <DynamicKernelStringArgument>(
1732 ts, ft->Children[i], mpCodeGen, j)));
1734 } else if (pChild->GetType() == formula::svSingleVectorRef) {
1735 const formula::SingleVectorRefToken* pSVR =
1736 dynamic_cast< const formula::SingleVectorRefToken* >(pChild);
1737 assert(pSVR);
1738 if (pSVR->GetArray().mpNumericArray &&
1739 pCodeGen->takeNumeric() &&
1740 pSVR->GetArray().mpStringArray &&
1741 pCodeGen->takeString())
1743 mvSubArguments.push_back(
1744 SubArgument(new DynamicKernelMixedArgument(
1745 ts, ft->Children[i])));
1747 else if (pSVR->GetArray().mpNumericArray &&
1748 pCodeGen->takeNumeric())
1750 mvSubArguments.push_back(
1751 SubArgument(new VectorRef(ts,
1752 ft->Children[i])));
1754 else if (pSVR->GetArray().mpStringArray &&
1755 pCodeGen->takeString())
1757 mvSubArguments.push_back(
1758 SubArgument(new DynamicKernelStringArgument(
1759 ts, ft->Children[i])));
1761 else if (pSVR->GetArray().mpStringArray == NULL &&
1762 pSVR->GetArray().mpNumericArray == NULL)
1764 // Push as an array of NANs
1765 mvSubArguments.push_back(
1766 SubArgument(new VectorRef(ts,
1767 ft->Children[i])));
1769 else
1770 throw UnhandledToken(pChild,
1771 "Got unhandled case here", __FILE__, __LINE__);
1772 } else if (pChild->GetType() == formula::svDouble) {
1773 mvSubArguments.push_back(
1774 SubArgument(new DynamicKernelConstantArgument(ts,
1775 ft->Children[i])));
1776 } else if (pChild->GetType() == formula::svString) {
1777 mvSubArguments.push_back(
1778 SubArgument(new ConstStringArgument(ts,
1779 ft->Children[i])));
1780 } else {
1781 throw UnhandledToken(pChild, "unknown operand for ocPush");
1783 break;
1784 case ocDiv:
1785 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpDiv));
1786 break;
1787 case ocMul:
1788 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpMul));
1789 break;
1790 case ocSub:
1791 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSub));
1792 break;
1793 case ocAdd:
1794 case ocSum:
1795 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSum));
1796 break;
1797 case ocAverage:
1798 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpAverage));
1799 break;
1800 case ocMin:
1801 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpMin));
1802 break;
1803 case ocMax:
1804 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpMax));
1805 break;
1806 case ocCount:
1807 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCount));
1808 break;
1809 case ocSumProduct:
1810 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSumProduct));
1811 break;
1812 case ocIRR:
1813 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpIRR));
1814 break;
1815 case ocMIRR:
1816 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpMIRR));
1817 break;
1818 case ocRMZ:
1819 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpPMT));
1820 break;
1821 case ocZins:
1822 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpIntrate));
1823 break;
1824 case ocZGZ:
1825 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpRRI));
1826 break;
1827 case ocKapz:
1828 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpPPMT));
1829 break;
1830 case ocFisher:
1831 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpFisher));
1832 break;
1833 case ocFisherInv:
1834 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpFisherInv));
1835 break;
1836 case ocGamma:
1837 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGamma));
1838 break;
1839 case ocLIA:
1840 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSLN));
1841 break;
1842 case ocGammaLn:
1843 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGammaLn));
1844 break;
1845 case ocGauss:
1846 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGauss));
1847 break;
1848 case ocGeoMean:
1849 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGeoMean));
1850 break;
1851 case ocHarMean:
1852 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpHarMean));
1853 break;
1854 case ocLessEqual:
1855 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpLessEqual));
1856 break;
1857 case ocEqual:
1858 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpEqual));
1859 break;
1860 case ocGreater:
1861 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpGreater));
1862 break;
1863 case ocDIA:
1864 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpSYD));
1865 break;
1866 case ocCorrel:
1867 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCorrel));
1868 break;
1869 case ocCos:
1870 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCos));
1871 break;
1872 case ocNegBinomVert :
1873 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpNegbinomdist));
1874 break;
1875 case ocPearson:
1876 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpPearson));
1877 break;
1878 case ocRSQ:
1879 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpRsq));
1880 break;
1881 case ocCosecant:
1882 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCsc));
1883 break;
1884 case ocISPMT:
1885 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpISPMT));
1886 break;
1887 case ocLaufz:
1888 mvSubArguments.push_back(SoPHelper(ts,
1889 ft->Children[i], new OpDuration));
1890 break;
1891 case ocSinHyp:
1892 mvSubArguments.push_back(SoPHelper(ts,
1893 ft->Children[i],new OpSinh));
1894 break;
1895 case ocAbs:
1896 mvSubArguments.push_back(SoPHelper(ts,
1897 ft->Children[i], new OpAbs));
1898 break;
1899 case ocBW:
1900 mvSubArguments.push_back(SoPHelper(ts,
1901 ft->Children[i], new OpPV));
1902 break;
1903 case ocSin:
1904 mvSubArguments.push_back(SoPHelper(ts,
1905 ft->Children[i], new OpSin));
1906 break;
1907 case ocTan:
1908 mvSubArguments.push_back(SoPHelper(ts,
1909 ft->Children[i], new OpTan));
1910 break;
1911 case ocTanHyp:
1912 mvSubArguments.push_back(SoPHelper(ts,
1913 ft->Children[i], new OpTanH));
1914 break;
1915 case ocStandard:
1916 mvSubArguments.push_back(SoPHelper(ts,
1917 ft->Children[i], new OpStandard));
1918 break;
1919 case ocWeibull:
1920 mvSubArguments.push_back(SoPHelper(ts,
1921 ft->Children[i], new OpWeibull));
1922 break;
1923 case ocMedian:
1924 mvSubArguments.push_back(SoPHelper(ts,
1925 ft->Children[i],new OpMedian));
1926 break;
1927 case ocGDA:
1928 mvSubArguments.push_back(SoPHelper(ts,
1929 ft->Children[i],new OpDDB));
1930 break;
1931 case ocZW:
1932 mvSubArguments.push_back(SoPHelper(ts,
1933 ft->Children[i],new OpFV));
1934 break;
1935 case ocSumIfs:
1936 mvSubArguments.push_back(SoPHelper(ts,
1937 ft->Children[i],new OpSumIfs));
1938 break;
1939 case ocVBD:
1940 mvSubArguments.push_back(SoPHelper(ts,
1941 ft->Children[i],new OpVDB));
1942 break;
1943 case ocKurt:
1944 mvSubArguments.push_back(SoPHelper(ts,
1945 ft->Children[i], new OpKurt));
1946 break;
1947 case ocZZR:
1948 mvSubArguments.push_back(SoPHelper(ts,
1949 ft->Children[i], new OpNper));
1950 break;
1951 case ocNormDist:
1952 mvSubArguments.push_back(SoPHelper(ts,
1953 ft->Children[i],new OpNormdist));
1954 break;
1955 case ocArcCos:
1956 mvSubArguments.push_back(SoPHelper(ts,
1957 ft->Children[i], new OpArcCos));
1958 break;
1959 case ocSqrt:
1960 mvSubArguments.push_back(SoPHelper(ts,
1961 ft->Children[i],new OpSqrt));
1962 break;
1963 case ocArcCosHyp:
1964 mvSubArguments.push_back(SoPHelper(ts,
1965 ft->Children[i], new OpArcCosHyp));
1966 break;
1967 case ocNPV:
1968 mvSubArguments.push_back(SoPHelper(ts,
1969 ft->Children[i], new OpNPV));
1970 break;
1971 case ocStdNormDist:
1972 mvSubArguments.push_back(SoPHelper(ts,
1973 ft->Children[i],new OpNormsdist));
1974 break;
1975 case ocNormInv:
1976 mvSubArguments.push_back(SoPHelper(ts,
1977 ft->Children[i],new OpNorminv));
1978 break;
1979 case ocSNormInv:
1980 mvSubArguments.push_back(SoPHelper(ts,
1981 ft->Children[i],new OpNormsinv));
1982 break;
1983 case ocVariationen:
1984 mvSubArguments.push_back(SoPHelper(ts,
1985 ft->Children[i],new OpVariationen));
1986 break;
1987 case ocVariationen2:
1988 mvSubArguments.push_back(SoPHelper(ts,
1989 ft->Children[i],new OpVariationen2));
1990 break;
1991 case ocPhi:
1992 mvSubArguments.push_back(SoPHelper(ts,
1993 ft->Children[i],new OpPhi));
1994 break;
1995 case ocZinsZ:
1996 mvSubArguments.push_back(SoPHelper(ts,
1997 ft->Children[i],new OpIPMT));
1998 break;
1999 case ocConfidence:
2000 mvSubArguments.push_back(SoPHelper(ts,
2001 ft->Children[i], new OpConfidence));
2002 break;
2003 case ocIntercept:
2004 mvSubArguments.push_back(SoPHelper(ts,
2005 ft->Children[i], new OpIntercept));
2006 break;
2007 case ocGDA2:
2008 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2009 new OpDB));
2010 break;
2011 case ocLogInv:
2012 mvSubArguments.push_back(SoPHelper(ts,
2013 ft->Children[i], new OpLogInv));
2014 break;
2015 case ocArcCot:
2016 mvSubArguments.push_back(SoPHelper(ts,
2017 ft->Children[i], new OpArcCot));
2018 break;
2019 case ocCosHyp:
2020 mvSubArguments.push_back(SoPHelper(ts,
2021 ft->Children[i], new OpCosh));
2022 break;
2023 case ocKritBinom:
2024 mvSubArguments.push_back(SoPHelper(ts,
2025 ft->Children[i], new OpCritBinom));
2026 break;
2027 case ocArcCotHyp:
2028 mvSubArguments.push_back(SoPHelper(ts,
2029 ft->Children[i], new OpArcCotHyp));
2030 break;
2031 case ocArcSin:
2032 mvSubArguments.push_back(SoPHelper(ts,
2033 ft->Children[i], new OpArcSin));
2034 break;
2035 case ocArcSinHyp:
2036 mvSubArguments.push_back(SoPHelper(ts,
2037 ft->Children[i], new OpArcSinHyp));
2038 break;
2039 case ocArcTan:
2040 mvSubArguments.push_back(SoPHelper(ts,
2041 ft->Children[i], new OpArcTan));
2042 break;
2043 case ocArcTanHyp:
2044 mvSubArguments.push_back(SoPHelper(ts,
2045 ft->Children[i], new OpArcTanH));
2046 break;
2047 case ocBitAnd:
2048 mvSubArguments.push_back(SoPHelper(ts,
2049 ft->Children[i], new OpBitAnd));
2050 break;
2051 case ocForecast:
2052 mvSubArguments.push_back(SoPHelper(ts,
2053 ft->Children[i], new OpForecast));
2054 break;
2055 case ocLogNormDist:
2056 mvSubArguments.push_back(SoPHelper(ts,
2057 ft->Children[i], new OpLogNormDist));
2058 break;
2059 case ocGammaDist:
2060 mvSubArguments.push_back(SoPHelper(ts,
2061 ft->Children[i], new OpGammaDist));
2062 break;
2063 case ocLn:
2064 mvSubArguments.push_back(SoPHelper(ts,
2065 ft->Children[i],new OpLn));
2066 break;
2067 case ocRound:
2068 mvSubArguments.push_back(SoPHelper(ts,
2069 ft->Children[i],new OpRound));
2070 break;
2071 case ocCot:
2072 mvSubArguments.push_back(SoPHelper(ts,
2073 ft->Children[i], new OpCot));
2074 break;
2075 case ocCotHyp:
2076 mvSubArguments.push_back(SoPHelper(ts,
2077 ft->Children[i], new OpCoth));
2078 break;
2079 case ocFDist:
2080 mvSubArguments.push_back(SoPHelper(ts,
2081 ft->Children[i], new OpFdist));
2082 break;
2083 case ocVar:
2084 mvSubArguments.push_back(SoPHelper(ts,
2085 ft->Children[i], new OpVar));
2086 break;
2087 case ocChiDist:
2088 mvSubArguments.push_back(SoPHelper(ts,
2089 ft->Children[i],new OpChiDist));
2090 break;
2091 case ocPower:
2092 mvSubArguments.push_back(SoPHelper(ts,
2093 ft->Children[i], new OpPower));
2094 break;
2095 case ocOdd:
2096 mvSubArguments.push_back(SoPHelper(ts,
2097 ft->Children[i], new OpOdd));
2098 break;
2099 case ocChiSqDist:
2100 mvSubArguments.push_back(SoPHelper(ts,
2101 ft->Children[i],new OpChiSqDist));
2102 break;
2103 case ocChiSqInv:
2104 mvSubArguments.push_back(SoPHelper(ts,
2105 ft->Children[i],new OpChiSqInv));
2106 break;
2107 case ocGammaInv:
2108 mvSubArguments.push_back(SoPHelper(ts,
2109 ft->Children[i], new OpGammaInv));
2110 break;
2111 case ocFloor:
2112 mvSubArguments.push_back(SoPHelper(ts,
2113 ft->Children[i], new OpFloor));
2114 break;
2115 case ocFInv:
2116 mvSubArguments.push_back(SoPHelper(ts,
2117 ft->Children[i], new OpFInv));
2118 break;
2119 case ocFTest:
2120 mvSubArguments.push_back(SoPHelper(ts,
2121 ft->Children[i], new OpFTest));
2122 break;
2123 case ocB:
2124 mvSubArguments.push_back(SoPHelper(ts,
2125 ft->Children[i], new OpB));
2126 break;
2127 case ocBetaDist:
2128 mvSubArguments.push_back(SoPHelper(ts,
2129 ft->Children[i], new OpBetaDist));
2130 break;
2131 case ocCosecantHyp:
2132 mvSubArguments.push_back(SoPHelper(ts,
2133 ft->Children[i], new OpCscH));
2134 break;
2135 case ocExp:
2136 mvSubArguments.push_back(SoPHelper(ts,
2137 ft->Children[i], new OpExp));
2138 break;
2139 case ocLog10:
2140 mvSubArguments.push_back(SoPHelper(ts,
2141 ft->Children[i], new OpLog10));
2142 break;
2143 case ocExpDist:
2144 mvSubArguments.push_back(SoPHelper(ts,
2145 ft->Children[i], new OpExponDist));
2146 break;
2147 case ocAverageIfs:
2148 mvSubArguments.push_back(SoPHelper(ts,
2149 ft->Children[i],new OpAverageIfs));
2150 break;
2151 case ocCountIfs:
2152 mvSubArguments.push_back(SoPHelper(ts,
2153 ft->Children[i],new OpCountIfs));
2154 break;
2155 case ocKombin2:
2156 mvSubArguments.push_back(SoPHelper(ts,
2157 ft->Children[i], new OpCombina));
2158 break;
2159 case ocEven:
2160 mvSubArguments.push_back(SoPHelper(ts,
2161 ft->Children[i], new OpEven));
2162 break;
2163 case ocLog:
2164 mvSubArguments.push_back(SoPHelper(ts,
2165 ft->Children[i], new OpLog));
2166 break;
2167 case ocMod:
2168 mvSubArguments.push_back(SoPHelper(ts,
2169 ft->Children[i], new OpMod));
2170 break;
2171 case ocTrunc:
2172 mvSubArguments.push_back(SoPHelper(ts,
2173 ft->Children[i], new OpTrunc));
2174 break;
2175 case ocSchiefe:
2176 mvSubArguments.push_back(SoPHelper(ts,
2177 ft->Children[i], new OpSkew));
2178 break;
2179 case ocArcTan2:
2180 mvSubArguments.push_back(SoPHelper(ts,
2181 ft->Children[i], new OpArcTan2));
2182 break;
2183 case ocBitOr:
2184 mvSubArguments.push_back(SoPHelper(ts,
2185 ft->Children[i], new OpBitOr));
2186 break;
2187 case ocBitLshift:
2188 mvSubArguments.push_back(SoPHelper(ts,
2189 ft->Children[i], new OpBitLshift));
2190 break;
2191 case ocBitRshift:
2192 mvSubArguments.push_back(SoPHelper(ts,
2193 ft->Children[i], new OpBitRshift));
2194 break;
2195 case ocBitXor:
2196 mvSubArguments.push_back(SoPHelper(ts,
2197 ft->Children[i], new OpBitXor));
2198 break;
2199 case ocChiInv:
2200 mvSubArguments.push_back(SoPHelper(ts,
2201 ft->Children[i],new OpChiInv));
2202 break;
2203 case ocPoissonDist:
2204 mvSubArguments.push_back(SoPHelper(ts,
2205 ft->Children[i],new OpPoisson));
2206 break;
2207 case ocSumSQ:
2208 mvSubArguments.push_back(SoPHelper(ts,
2209 ft->Children[i], new OpSumSQ));
2210 break;
2211 case ocSkewp:
2212 mvSubArguments.push_back(SoPHelper(ts,
2213 ft->Children[i], new OpSkewp));
2214 break;
2215 case ocBinomDist:
2216 mvSubArguments.push_back(SoPHelper(ts,
2217 ft->Children[i],new OpBinomdist));
2218 break;
2219 case ocVarP:
2220 mvSubArguments.push_back(SoPHelper(ts,
2221 ft->Children[i], new OpVarP));
2222 break;
2223 case ocCeil:
2224 mvSubArguments.push_back(SoPHelper(ts,
2225 ft->Children[i], new OpCeil));
2226 break;
2227 case ocKombin:
2228 mvSubArguments.push_back(SoPHelper(ts,
2229 ft->Children[i], new OpKombin));
2230 break;
2231 case ocDevSq:
2232 mvSubArguments.push_back(SoPHelper(ts,
2233 ft->Children[i], new OpDevSq));
2234 break;
2235 case ocStDev:
2236 mvSubArguments.push_back(SoPHelper(ts,
2237 ft->Children[i], new OpStDev));
2238 break;
2239 case ocSlope:
2240 mvSubArguments.push_back(SoPHelper(ts,
2241 ft->Children[i], new OpSlope));
2242 break;
2243 case ocSTEYX:
2244 mvSubArguments.push_back(SoPHelper(ts,
2245 ft->Children[i], new OpSTEYX));
2246 break;
2247 case ocZTest:
2248 mvSubArguments.push_back(SoPHelper(ts,
2249 ft->Children[i], new OpZTest));
2250 break;
2251 case ocPi:
2252 mvSubArguments.push_back(
2253 SubArgument(new DynamicKernelPiArgument(ts,
2254 ft->Children[i])));
2255 break;
2256 case ocRandom:
2257 mvSubArguments.push_back(
2258 SubArgument(new DynamicKernelRandomArgument(ts,
2259 ft->Children[i])));
2260 break;
2261 case ocProduct:
2262 mvSubArguments.push_back(SoPHelper(ts,
2263 ft->Children[i], new OpProduct));
2264 break;
2265 case ocHypGeomDist:
2266 mvSubArguments.push_back(SoPHelper(ts,
2267 ft->Children[i],new OpHypGeomDist));
2268 break;
2269 case ocSumX2MY2:
2270 mvSubArguments.push_back(SoPHelper(ts,
2271 ft->Children[i],new OpSumX2MY2));
2272 break;
2273 case ocSumX2DY2:
2274 mvSubArguments.push_back(SoPHelper(ts,
2275 ft->Children[i],new OpSumX2PY2));
2276 break;
2277 case ocBetaInv:
2278 mvSubArguments.push_back(SoPHelper(ts,
2279 ft->Children[i],new OpBetainv));
2280 break;
2281 case ocTTest:
2282 mvSubArguments.push_back(SoPHelper(ts,
2283 ft->Children[i], new OpTTest));
2284 break;
2285 case ocTDist:
2286 mvSubArguments.push_back(SoPHelper(ts,
2287 ft->Children[i], new OpTDist));
2288 break;
2289 case ocTInv:
2290 mvSubArguments.push_back(SoPHelper(ts,
2291 ft->Children[i], new OpTInv));
2292 break;
2293 case ocSumXMY2:
2294 mvSubArguments.push_back(SoPHelper(ts,
2295 ft->Children[i],new OpSumXMY2));
2296 break;
2297 case ocStDevP:
2298 mvSubArguments.push_back(SoPHelper(ts,
2299 ft->Children[i], new OpStDevP));
2300 break;
2301 case ocCovar:
2302 mvSubArguments.push_back(SoPHelper(ts,
2303 ft->Children[i], new OpCovar));
2304 break;
2305 case ocAnd:
2306 mvSubArguments.push_back(SoPHelper(ts,
2307 ft->Children[i], new OpAnd));
2308 break;
2309 case ocExternal:
2310 if ( !(pChild->GetExternal().compareTo(OUString(
2311 "com.sun.star.sheet.addin.Analysis.getEffect"))))
2313 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpEffective));
2315 else if ( !(pChild->GetExternal().compareTo(OUString(
2316 "com.sun.star.sheet.addin.Analysis.getCumipmt"))))
2318 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCumipmt));
2320 else if ( !(pChild->GetExternal().compareTo(OUString(
2321 "com.sun.star.sheet.addin.Analysis.getNominal"))))
2323 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpNominal));
2325 else if ( !(pChild->GetExternal().compareTo(OUString(
2326 "com.sun.star.sheet.addin.Analysis.getCumprinc"))))
2328 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCumprinc));
2330 else if ( !(pChild->GetExternal().compareTo(OUString(
2331 "com.sun.star.sheet.addin.Analysis.getXnpv"))))
2333 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpXNPV));
2335 else if ( !(pChild->GetExternal().compareTo(OUString(
2336 "com.sun.star.sheet.addin.Analysis.getPricemat"))))
2338 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpPriceMat));
2340 else if ( !(pChild->GetExternal().compareTo(OUString(
2341 "com.sun.star.sheet.addin.Analysis.getReceived"))))
2343 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpReceived));
2345 else if( !(pChild->GetExternal().compareTo(OUString(
2346 "com.sun.star.sheet.addin.Analysis.getTbilleq"))))
2348 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpTbilleq));
2350 else if( !(pChild->GetExternal().compareTo(OUString(
2351 "com.sun.star.sheet.addin.Analysis.getTbillprice"))))
2353 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpTbillprice));
2355 else if( !(pChild->GetExternal().compareTo(OUString(
2356 "com.sun.star.sheet.addin.Analysis.getTbillyield"))))
2358 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpTbillyield));
2360 else if (!(pChild->GetExternal().compareTo(OUString(
2361 "com.sun.star.sheet.addin.Analysis.getFvschedule"))))
2363 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpFvschedule));
2365 else if ( !(pChild->GetExternal().compareTo(OUString(
2366 "com.sun.star.sheet.addin.Analysis.getYield"))))
2368 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpYield));
2370 else if ( !(pChild->GetExternal().compareTo(OUString(
2371 "com.sun.star.sheet.addin.Analysis.getYielddisc"))))
2373 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpYielddisc));
2375 else if ( !(pChild->GetExternal().compareTo(OUString(
2376 "com.sun.star.sheet.addin.Analysis.getYieldmat"))))
2378 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpYieldmat));
2380 else if ( !(pChild->GetExternal().compareTo(OUString(
2381 "com.sun.star.sheet.addin.Analysis.getAccrintm"))))
2383 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpAccrintm));
2385 else if ( !(pChild->GetExternal().compareTo(OUString(
2386 "com.sun.star.sheet.addin.Analysis.getCoupdaybs"))))
2388 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCoupdaybs));
2390 else if ( !(pChild->GetExternal().compareTo(OUString(
2391 "com.sun.star.sheet.addin.Analysis.getDollarde"))))
2393 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpDollarde));
2395 else if ( !(pChild->GetExternal().compareTo(OUString(
2396 "com.sun.star.sheet.addin.Analysis.getDollarfr"))))
2398 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpDollarfr));
2400 else if ( !(pChild->GetExternal().compareTo(OUString(
2401 "com.sun.star.sheet.addin.Analysis.getCoupdays"))))
2403 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCoupdays));
2405 else if ( !(pChild->GetExternal().compareTo(OUString(
2406 "com.sun.star.sheet.addin.Analysis.getCoupdaysnc"))))
2408 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpCoupdaysnc));
2410 else if ( !(pChild->GetExternal().compareTo(OUString(
2411 "com.sun.star.sheet.addin.Analysis.getDisc"))))
2413 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpDISC));
2415 else if ( !(pChild->GetExternal().compareTo(OUString(
2416 "com.sun.star.sheet.addin.Analysis.getIntrate"))))
2418 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i], new OpINTRATE));
2420 else if ( !(pChild->GetExternal().compareTo(OUString(
2421 "com.sun.star.sheet.addin.Analysis.getPrice"))))
2423 mvSubArguments.push_back(SoPHelper(ts,
2424 ft->Children[i], new OpPrice));
2426 else if ( !(pChild->GetExternal().compareTo(OUString(
2427 "com.sun.star.sheet.addin.Analysis.getCoupnum"))))
2429 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2430 new OpCoupnum));
2432 else if ( !(pChild->GetExternal().compareTo(OUString(
2433 "com.sun.star.sheet.addin.Analysis.getDuration"))))
2435 mvSubArguments.push_back(
2436 SoPHelper(ts, ft->Children[i], new OpDuration_ADD));
2438 else if ( !(pChild->GetExternal().compareTo(OUString(
2439 "com.sun.star.sheet.addin.Analysis.getAmordegrc"))))
2441 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2442 new OpAmordegrc));
2444 else if ( !(pChild->GetExternal().compareTo(OUString(
2445 "com.sun.star.sheet.addin.Analysis.getAmorlinc"))))
2447 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2448 new OpAmorlinc));
2450 else if ( !(pChild->GetExternal().compareTo(OUString(
2451 "com.sun.star.sheet.addin.Analysis.getMduration"))))
2453 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2454 new OpMDuration));
2456 else if ( !(pChild->GetExternal().compareTo(OUString(
2457 "com.sun.star.sheet.addin.Analysis.getXirr"))))
2459 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2460 new OpXirr));
2462 else if ( !(pChild->GetExternal().compareTo(OUString(
2463 "com.sun.star.sheet.addin.Analysis.getOddlprice"))))
2465 mvSubArguments.push_back(SoPHelper(ts,
2466 ft->Children[i], new OpOddlprice));
2468 else if ( !(pChild->GetExternal().compareTo(OUString(
2469 "com.sun.star.sheet.addin.Analysis.getOddlyield"))))
2471 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2472 new OpOddlyield));
2474 else if ( !(pChild->GetExternal().compareTo(OUString(
2475 "com.sun.star.sheet.addin.Analysis.getPricedisc"))))
2477 mvSubArguments.push_back(SoPHelper(ts,
2478 ft->Children[i], new OpPriceDisc));
2480 else if ( !(pChild->GetExternal().compareTo(OUString(
2481 "com.sun.star.sheet.addin.Analysis.getCouppcd"))))
2483 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2484 new OpCouppcd));
2486 else if ( !(pChild->GetExternal().compareTo(OUString(
2487 "com.sun.star.sheet.addin.Analysis.getCoupncd"))))
2489 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2490 new OpCoupncd));
2492 else if ( !(pChild->GetExternal().compareTo(OUString(
2493 "com.sun.star.sheet.addin.Analysis.getAccrint"))))
2495 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2496 new OpAccrint));
2498 else if ( !(pChild->GetExternal().compareTo(OUString(
2499 "com.sun.star.sheet.addin.Analysis.getSqrtpi"))))
2501 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2502 new OpSqrtPi));
2504 else if ( !(pChild->GetExternal().compareTo(OUString(
2505 "com.sun.star.sheet.addin.Analysis.getConvert"))))
2507 mvSubArguments.push_back(SoPHelper(ts, ft->Children[i],
2508 new OpConvert));
2510 break;
2511 default:
2512 throw UnhandledToken(pChild, "unhandled opcode");
2517 /// Holds the symbol table for a given dynamic kernel
2518 class SymbolTable {
2519 public:
2520 typedef std::map<const formula::FormulaToken *,
2521 boost::shared_ptr<DynamicKernelArgument> > ArgumentMap;
2522 // This avoids instability caused by using pointer as the key type
2523 typedef std::list< boost::shared_ptr<DynamicKernelArgument> > ArgumentList;
2524 SymbolTable(void):mCurId(0) {}
2525 template <class T>
2526 const DynamicKernelArgument *DeclRefArg(FormulaTreeNodeRef, SlidingFunctionBase* pCodeGen);
2527 /// Used to generate sliding window helpers
2528 void DumpSlidingWindowFunctions(std::stringstream &ss)
2530 for(ArgumentList::iterator it = mParams.begin(), e= mParams.end(); it!=e;
2531 ++it) {
2532 (*it)->GenSlidingWindowFunction(ss);
2533 ss << "\n";
2536 /// Memory mapping from host to device and pass buffers to the given kernel as
2537 /// arguments
2538 void Marshal(cl_kernel, int, cl_program);
2539 private:
2540 unsigned int mCurId;
2541 ArgumentMap mSymbols;
2542 ArgumentList mParams;
2545 void SymbolTable::Marshal(cl_kernel k, int nVectorWidth, cl_program pProgram)
2547 int i = 1; //The first argument is reserved for results
2548 for(ArgumentList::iterator it = mParams.begin(), e= mParams.end(); it!=e;
2549 ++it) {
2550 i+=(*it)->Marshal(k, i, nVectorWidth, pProgram);
2554 class DynamicKernel : public CompiledFormula
2556 public:
2557 DynamicKernel(FormulaTreeNodeRef r):mpRoot(r),
2558 mpProgram(NULL), mpKernel(NULL), mpResClmem(NULL), mpCode(NULL) {}
2559 static DynamicKernel *create(ScDocument& rDoc,
2560 const ScAddress& rTopPos,
2561 ScTokenArray& rCode);
2562 /// OpenCL code generation
2563 void CodeGen() {
2564 // Travese the tree of expression and declare symbols used
2565 const DynamicKernelArgument *DK= mSyms.DeclRefArg<
2566 DynamicKernelSoPArguments>(mpRoot, new OpNop);
2568 std::stringstream decl;
2569 if (OpenclDevice::gpuEnv.mnKhrFp64Flag) {
2570 decl << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
2571 } else if (OpenclDevice::gpuEnv.mnAmdFp64Flag) {
2572 decl << "#pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
2574 // preambles
2575 decl << publicFunc;
2576 DK->DumpInlineFun(inlineDecl,inlineFun);
2577 for(std::set<std::string>::iterator set_iter=inlineDecl.begin();
2578 set_iter!=inlineDecl.end();++set_iter)
2580 decl<<*set_iter;
2583 for(std::set<std::string>::iterator set_iter=inlineFun.begin();
2584 set_iter!=inlineFun.end();++set_iter)
2586 decl<<*set_iter;
2588 mSyms.DumpSlidingWindowFunctions(decl);
2589 mKernelSignature = DK->DumpOpName();
2590 decl << "__kernel void DynamicKernel" << mKernelSignature;
2591 decl << "(__global double *result, ";
2592 DK->GenSlidingWindowDecl(decl);
2593 decl << ") {\n\tint gid0 = get_global_id(0);\n\tresult[gid0] = " <<
2594 DK->GenSlidingWindowDeclRef(false) << ";\n}\n";
2595 mFullProgramSrc = decl.str();
2596 #if 1
2597 std::cerr<< "Program to be compiled = \n" << mFullProgramSrc << "\n";
2598 #endif
2600 /// Produce kernel hash
2601 std::string GetMD5(void)
2603 #ifdef MD5_KERNEL
2604 if (mKernelHash.empty()) {
2605 std::stringstream md5s;
2606 // Compute MD5SUM of kernel body to obtain the name
2607 sal_uInt8 result[RTL_DIGEST_LENGTH_MD5];
2608 rtl_digest_MD5(
2609 mFullProgramSrc.c_str(),
2610 mFullProgramSrc.length(), result,
2611 RTL_DIGEST_LENGTH_MD5);
2612 for(int i=0; i < RTL_DIGEST_LENGTH_MD5; i++) {
2613 md5s << std::hex << (int)result[i];
2615 mKernelHash = md5s.str();
2617 return mKernelHash;
2618 #else
2619 return "";
2620 #endif
2622 /// Create program, build, and create kerenl
2623 /// TODO cache results based on kernel body hash
2624 /// TODO: abstract OpenCL part out into OpenCL wrapper.
2625 void CreateKernel(void);
2626 /// Prepare buffers, marshal them to GPU, and launch the kernel
2627 /// TODO: abstract OpenCL part out into OpenCL wrapper.
2628 void Launch(size_t nr)
2630 // Obtain cl context
2631 KernelEnv kEnv;
2632 OpenclDevice::setKernelEnv(&kEnv);
2633 cl_int err;
2634 // The results
2635 mpResClmem = clCreateBuffer(kEnv.mpkContext,
2636 (cl_mem_flags) CL_MEM_READ_WRITE,
2637 nr*sizeof(double), NULL, &err);
2638 if (CL_SUCCESS != err)
2639 throw OpenCLError(err);
2640 err = clSetKernelArg(mpKernel, 0, sizeof(cl_mem), (void*)&mpResClmem);
2641 if (CL_SUCCESS != err)
2642 throw OpenCLError(err);
2643 // The rest of buffers
2644 mSyms.Marshal(mpKernel, nr, mpProgram);
2645 size_t global_work_size[] = {nr};
2646 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, mpKernel, 1, NULL,
2647 global_work_size, NULL, 0, NULL, NULL);
2648 if (CL_SUCCESS != err)
2649 throw OpenCLError(err);
2651 ~DynamicKernel();
2652 cl_mem GetResultBuffer(void) const { return mpResClmem; }
2653 void SetPCode(ScTokenArray *pCode) { mpCode = pCode; }
2655 private:
2656 void TraverseAST(FormulaTreeNodeRef);
2657 FormulaTreeNodeRef mpRoot;
2658 SymbolTable mSyms;
2659 std::string mKernelSignature, mKernelHash;
2660 std::string mFullProgramSrc;
2661 cl_program mpProgram;
2662 cl_kernel mpKernel;
2663 cl_mem mpResClmem; // Results
2664 std::set<std::string> inlineDecl;
2665 std::set<std::string> inlineFun;
2666 ScTokenArray *mpCode;
2669 DynamicKernel::~DynamicKernel()
2671 if (mpResClmem) {
2672 std::cerr<<"Freeing kernel "<< GetMD5() << " result buffer\n";
2673 clReleaseMemObject(mpResClmem);
2675 if (mpKernel) {
2676 std::cerr<<"Freeing kernel "<< GetMD5() << " kernel\n";
2677 clReleaseKernel(mpKernel);
2679 // mpProgram is not going to be released here -- it's cached.
2680 if (mpCode)
2681 delete mpCode;
2683 /// Build code
2684 void DynamicKernel::CreateKernel(void)
2686 cl_int err;
2687 std::string kname = "DynamicKernel"+mKernelSignature;
2688 // Compile kernel here!!!
2689 // Obtain cl context
2690 KernelEnv kEnv;
2691 OpenclDevice::setKernelEnv(&kEnv);
2692 const char *src = mFullProgramSrc.c_str();
2693 static std::string lastOneKernelHash = "";
2694 static std::string lastSecondKernelHash = "";
2695 static cl_program lastOneProgram = NULL;
2696 static cl_program lastSecondProgram = NULL;
2697 std::string KernelHash = mKernelSignature+GetMD5();
2698 if (lastOneKernelHash == KernelHash && lastOneProgram)
2700 std::cerr<<"cl_program cache hit: "<< KernelHash << "\n";
2701 mpProgram = lastOneProgram;
2703 else if(lastSecondKernelHash == KernelHash && lastSecondProgram)
2705 std::cerr<<"cl_program cache hit: "<< KernelHash << "\n";
2706 mpProgram = lastSecondProgram;
2708 else
2709 { // doesn't match the last compiled formula.
2711 if (lastSecondProgram) {
2712 std::cerr<<"Freeing lastsecond program: "<< GetMD5() << "\n";
2713 clReleaseProgram(lastSecondProgram);
2715 if (OpenclDevice::buildProgramFromBinary("",
2716 &OpenclDevice::gpuEnv, KernelHash.c_str(), 0)) {
2717 mpProgram = OpenclDevice::gpuEnv.mpArryPrograms[0];
2718 OpenclDevice::gpuEnv.mpArryPrograms[0] = NULL;
2719 } else {
2720 mpProgram = clCreateProgramWithSource(kEnv.mpkContext, 1,
2721 &src, NULL, &err);
2722 if (err != CL_SUCCESS)
2723 throw OpenCLError(err);
2724 err = clBuildProgram(mpProgram, 1,
2725 OpenclDevice::gpuEnv.mpArryDevsID, "", NULL, NULL);
2726 if (err != CL_SUCCESS)
2727 throw OpenCLError(err);
2728 // Generate binary out of compiled kernel.
2729 OpenclDevice::generatBinFromKernelSource(mpProgram,
2730 (mKernelSignature+GetMD5()).c_str());
2732 lastSecondKernelHash = lastOneKernelHash;
2733 lastSecondProgram = lastOneProgram;
2734 lastOneKernelHash = KernelHash;
2735 lastOneProgram = mpProgram;
2737 mpKernel = clCreateKernel(mpProgram, kname.c_str(), &err);
2738 if (err != CL_SUCCESS)
2739 throw OpenCLError(err);
2741 // Symbol lookup. If there is no such symbol created, allocate one
2742 // kernel with argument with unique name and return so.
2743 // The template argument T must be a subclass of DynamicKernelArgument
2744 template <typename T>
2745 const DynamicKernelArgument *SymbolTable::DeclRefArg(
2746 FormulaTreeNodeRef t, SlidingFunctionBase* pCodeGen)
2748 FormulaToken *ref = t->GetFormulaToken();
2749 ArgumentMap::iterator it = mSymbols.find(ref);
2750 if (it == mSymbols.end()) {
2751 // Allocate new symbols
2752 std::cerr << "DeclRefArg: Allocate a new symbol:";
2753 std::stringstream ss;
2754 ss << "tmp"<< mCurId++;
2755 boost::shared_ptr<DynamicKernelArgument> new_arg(new T(ss.str(), t, pCodeGen));
2756 mSymbols[ref] = new_arg;
2757 mParams.push_back(new_arg);
2758 std::cerr << ss.str() <<"\n";
2759 return new_arg.get();
2760 } else {
2761 return it->second.get();
2765 class FormulaGroupInterpreterOpenCL : public FormulaGroupInterpreter
2767 public:
2768 FormulaGroupInterpreterOpenCL() :
2769 FormulaGroupInterpreter()
2772 virtual ~FormulaGroupInterpreterOpenCL()
2776 virtual ScMatrixRef inverseMatrix( const ScMatrix& rMat ) SAL_OVERRIDE;
2777 virtual CompiledFormula* createCompiledFormula(ScDocument& rDoc,
2778 const ScAddress& rTopPos,
2779 ScFormulaCellGroupRef& xGroup,
2780 ScTokenArray& rCode) SAL_OVERRIDE;
2781 virtual bool interpret( ScDocument& rDoc, const ScAddress& rTopPos,
2782 ScFormulaCellGroupRef& xGroup, ScTokenArray& rCode ) SAL_OVERRIDE;
2785 ScMatrixRef FormulaGroupInterpreterOpenCL::inverseMatrix( const ScMatrix& )
2787 return NULL;
2790 DynamicKernel* DynamicKernel::create(ScDocument& /* rDoc */,
2791 const ScAddress& /* rTopPos */,
2792 ScTokenArray& rCode)
2794 // Constructing "AST"
2795 FormulaTokenIterator aCode = rCode;
2796 std::list<FormulaToken *> list;
2797 std::map<FormulaToken *, FormulaTreeNodeRef> m_hash_map;
2798 FormulaToken* pCur;
2799 while( (pCur = (FormulaToken*)(aCode.Next()) ) != NULL)
2801 OpCode eOp = pCur->GetOpCode();
2802 if ( eOp != ocPush )
2804 FormulaTreeNodeRef m_currNode =
2805 FormulaTreeNodeRef(new FormulaTreeNode(pCur));
2806 sal_uInt8 m_ParamCount = pCur->GetParamCount();
2807 for(int i=0; i<m_ParamCount; i++)
2809 FormulaToken* m_TempFormula = list.back();
2810 list.pop_back();
2811 if(m_TempFormula->GetOpCode()!=ocPush)
2813 if(m_hash_map.find(m_TempFormula)==m_hash_map.end())
2814 return NULL;
2815 m_currNode->Children.push_back(m_hash_map[m_TempFormula]);
2817 else
2819 FormulaTreeNodeRef m_ChildTreeNode =
2820 FormulaTreeNodeRef(
2821 new FormulaTreeNode(m_TempFormula));
2822 m_currNode->Children.push_back(m_ChildTreeNode);
2825 std::reverse(m_currNode->Children.begin(),
2826 m_currNode->Children.end());
2827 m_hash_map[pCur] = m_currNode;
2829 list.push_back(pCur);
2832 FormulaTreeNodeRef Root = FormulaTreeNodeRef(new FormulaTreeNode(NULL));
2833 Root->Children.push_back(m_hash_map[list.back()]);
2835 DynamicKernel* pDynamicKernel = new DynamicKernel(Root);
2837 if (!pDynamicKernel)
2838 return NULL;
2840 // OpenCL source code generation and kernel compilation
2841 try {
2842 pDynamicKernel->CodeGen();
2843 pDynamicKernel->CreateKernel();
2845 catch (const UnhandledToken &ut) {
2846 std::cerr << "\nDynamic formual compiler: unhandled token: ";
2847 std::cerr << ut.mMessage << " at ";
2848 std::cerr << ut.mFile << ":" << ut.mLineNumber << "\n";
2849 #ifdef NO_FALLBACK_TO_SWINTERP
2850 assert(false);
2851 #else
2852 free(pDynamicKernel);
2853 return NULL;
2854 #endif
2856 return pDynamicKernel;
2859 CompiledFormula* FormulaGroupInterpreterOpenCL::createCompiledFormula(ScDocument& rDoc,
2860 const ScAddress& rTopPos,
2861 ScFormulaCellGroupRef& xGroup,
2862 ScTokenArray& rCode)
2864 ScTokenArray *pCode = new ScTokenArray();
2865 ScGroupTokenConverter aConverter(*pCode, rDoc, *xGroup->mpTopCell, rTopPos);
2866 if (!aConverter.convert(rCode))
2868 return NULL;
2871 DynamicKernel *result = DynamicKernel::create(rDoc, rTopPos, *pCode);
2872 result->SetPCode(pCode);
2873 return result;
2876 bool FormulaGroupInterpreterOpenCL::interpret( ScDocument& rDoc,
2877 const ScAddress& rTopPos, ScFormulaCellGroupRef& xGroup,
2878 ScTokenArray& rCode )
2880 DynamicKernel *pKernel;
2882 osl::ResettableMutexGuard aGuard(xGroup->maMutex);
2883 if (xGroup->meCalcState == sc::GroupCalcOpenCLKernelCompilationScheduled ||
2884 xGroup->meCalcState == sc::GroupCalcOpenCLKernelBinaryCreated)
2886 if (xGroup->meCalcState == sc::GroupCalcOpenCLKernelCompilationScheduled)
2888 aGuard.clear();
2889 xGroup->maCompilationDone.wait();
2890 xGroup->maCompilationDone.reset();
2892 else
2894 aGuard.clear();
2897 pKernel = static_cast<DynamicKernel*>(xGroup->mpCompiledFormula);
2899 else
2901 assert(xGroup->meCalcState == sc::GroupCalcRunning);
2902 aGuard.clear();
2903 pKernel = static_cast<DynamicKernel*>(createCompiledFormula(rDoc, rTopPos, xGroup, rCode));
2906 if (!pKernel)
2907 return false;
2909 try {
2910 // Obtain cl context
2911 KernelEnv kEnv;
2912 OpenclDevice::setKernelEnv(&kEnv);
2913 // Run the kernel.
2914 pKernel->Launch(xGroup->mnLength);
2915 // Map results back
2916 cl_mem res = pKernel->GetResultBuffer();
2917 cl_int err;
2918 double *resbuf = new double[xGroup->mnLength];
2919 err = clEnqueueReadBuffer(kEnv.mpkCmdQueue,res,
2920 CL_TRUE, 0, xGroup->mnLength*sizeof(double), resbuf, 0, NULL, NULL);
2921 if (err != CL_SUCCESS)
2922 throw OpenCLError(err);
2924 rDoc.SetFormulaResults(rTopPos, resbuf, xGroup->mnLength);
2925 delete[] resbuf;
2926 if (xGroup->meCalcState == sc::GroupCalcRunning)
2927 delete pKernel;
2929 catch (const UnhandledToken &ut) {
2930 std::cerr << "\nDynamic formual compiler: unhandled token: ";
2931 std::cerr << ut.mMessage << "\n";
2932 #ifdef NO_FALLBACK_TO_SWINTERP
2933 assert(false);
2934 return true;
2935 #else
2936 return false;
2937 #endif
2939 catch (const OpenCLError &oce) {
2940 std::cerr << "Dynamic formula compiler: OpenCL error: ";
2941 std::cerr << oce.mError << "\n";
2942 #ifdef NO_FALLBACK_TO_SWINTERP
2943 assert(false);
2944 return true;
2945 #else
2946 return false;
2947 #endif
2949 catch (const Unhandled &uh) {
2950 std::cerr << "Dynamic formula compiler: unhandled case:";
2951 std::cerr <<" at ";
2952 std::cerr << uh.mFile << ":" << uh.mLineNumber << "\n";
2953 #ifdef NO_FALLBACK_TO_SWINTERP
2954 assert(false);
2955 return true;
2956 #else
2957 return false;
2958 #endif
2960 catch (...) {
2961 std::cerr << "Dynamic formula compiler: unhandled compiler error\n";
2962 #ifdef NO_FALLBACK_TO_SWINTERP
2963 assert(false);
2964 return true;
2965 #else
2966 return false;
2967 #endif
2969 return true;
2970 } // namespace opencl
2972 }} // namespace sc
2974 extern "C" {
2976 SAL_DLLPUBLIC_EXPORT sc::FormulaGroupInterpreter* SAL_CALL
2977 createFormulaGroupOpenCLInterpreter()
2979 #if 0// USE_GROUNDWATER_INTERPRETER
2980 if (getenv("SC_GROUNDWATER"))
2981 return new sc::opencl::FormulaGroupInterpreterGroundwater();
2982 #endif
2984 return new sc::opencl::FormulaGroupInterpreterOpenCL();
2987 SAL_DLLPUBLIC_EXPORT size_t getOpenCLPlatformCount()
2989 return sc::opencl::getOpenCLPlatformCount();
2992 SAL_DLLPUBLIC_EXPORT void SAL_CALL fillOpenCLInfo(
2993 sc::OpenclPlatformInfo* pInfos, size_t nInfoSize)
2995 const std::vector<sc::OpenclPlatformInfo>& rPlatforms =
2996 sc::opencl::fillOpenCLInfo();
2997 size_t n = std::min(rPlatforms.size(), nInfoSize);
2998 for (size_t i = 0; i < n; ++i)
2999 pInfos[i] = rPlatforms[i];
3002 SAL_DLLPUBLIC_EXPORT bool SAL_CALL switchOpenClDevice(
3003 const OUString* pDeviceId, bool bAutoSelect,
3004 bool bForceEvaluation)
3006 return sc::opencl::switchOpenclDevice(pDeviceId, bAutoSelect, bForceEvaluation);
3009 SAL_DLLPUBLIC_EXPORT void SAL_CALL getOpenCLDeviceInfo(size_t* pDeviceId, size_t* pPlatformId)
3011 sc::opencl::getOpenCLDeviceInfo(*pDeviceId, *pPlatformId);
3014 } // extern "C"
3016 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */