1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include "formulagroup.hxx"
11 #include "grouptokenconverter.hxx"
12 #include "document.hxx"
13 #include "formulacell.hxx"
14 #include "tokenarray.hxx"
15 #include "compiler.hxx"
16 #include "interpre.hxx"
17 #include "formula/vectortoken.hxx"
18 #include "scmatrix.hxx"
20 #include "openclwrapper.hxx"
22 #include "op_financial.hxx"
23 #include "op_database.hxx"
24 #include "op_math.hxx"
25 #include "op_logical.hxx"
26 #include "op_statistical.hxx"
27 #include "op_array.hxx"
29 // Comment out this to turn off FMIN and FMAX intrinsics
30 #define USE_FMIN_FMAX 1
31 #define REDUCE_THRESHOLD 4 // set to 4 for correctness testing. priority 1
32 #define UNROLLING_FACTOR 16 // set to 4 for correctness testing (if no reduce)
33 #include "formulagroupcl_public.hxx"
37 static const unsigned long __nan
[2] = {0xffffffff, 0x7fffffff};
39 #define NAN (*(const double*) __nan)
50 #include <rtl/digest.h>
54 #include <boost/scoped_ptr.hpp>
56 #undef NO_FALLBACK_TO_SWINTERP /* undef this for non-TDD runs */
58 using namespace formula
;
60 namespace sc
{ namespace opencl
{
63 /// Map the buffer used by an argument and do necessary argument setting
64 size_t VectorRef::Marshal(cl_kernel k
, int argno
, int, cl_program
)
66 FormulaToken
*ref
= mFormulaTree
->GetFormulaToken();
67 double *pHostBuffer
= NULL
;
68 size_t requestedLength
= 1;
69 size_t szHostBuffer
= 0;
70 if (ref
->GetType() == formula::svSingleVectorRef
) {
71 const formula::SingleVectorRefToken
* pSVR
=
72 dynamic_cast< const formula::SingleVectorRefToken
* >(ref
);
74 pHostBuffer
= const_cast<double*>(pSVR
->GetArray().mpNumericArray
);
75 szHostBuffer
= pSVR
->GetArrayLength() * sizeof(double);
76 requestedLength
= pSVR
->GetRequestedArrayLength();
78 std::cerr
<< "Marshal a Single vector of size " << pSVR
->GetArrayLength();
79 std::cerr
<< " at argument "<< argno
<< "\n";
81 } else if (ref
->GetType() == formula::svDoubleVectorRef
) {
82 const formula::DoubleVectorRefToken
* pDVR
=
83 dynamic_cast< const formula::DoubleVectorRefToken
* >(ref
);
85 pHostBuffer
= const_cast<double*>(
86 pDVR
->GetArrays()[mnIndex
].mpNumericArray
);
87 szHostBuffer
= pDVR
->GetArrayLength() * sizeof(double);
88 requestedLength
= pDVR
->GetRequestedArrayLength();
94 OpenclDevice::setKernelEnv(&kEnv
);
98 mpClmem
= clCreateBuffer(kEnv
.mpkContext
,
99 (cl_mem_flags
) CL_MEM_READ_ONLY
,
102 if (CL_SUCCESS
!= err
)
103 throw OpenCLError(err
);
104 err
= clEnqueueWriteBuffer(kEnv
.mpkCmdQueue
, mpClmem
,CL_TRUE
, 0, szHostBuffer
,
105 pHostBuffer
, 0, NULL
, NULL
);
106 if (CL_SUCCESS
!= err
)
107 throw OpenCLError(err
);
111 if (szHostBuffer
== 0)
112 szHostBuffer
= requestedLength
* sizeof(double);//vector length for NAN vector
113 // Marshal as a buffer of NANs
114 mpClmem
= clCreateBuffer(kEnv
.mpkContext
,
115 (cl_mem_flags
) CL_MEM_READ_ONLY
,
116 szHostBuffer
, NULL
, &err
);
117 if (CL_SUCCESS
!= err
)
118 throw OpenCLError(err
);
119 double *pNanBuffer
= new double[szHostBuffer
/sizeof(double)];
120 for (size_t i
= 0; i
< szHostBuffer
/sizeof(double); i
++)
122 err
= clEnqueueWriteBuffer(kEnv
.mpkCmdQueue
, mpClmem
,CL_TRUE
, 0, szHostBuffer
,
123 pNanBuffer
, 0, NULL
, NULL
);
124 if (CL_SUCCESS
!= err
)
125 throw OpenCLError(err
);
130 err
= clSetKernelArg(k
, argno
, sizeof(cl_mem
), (void*)&mpClmem
);
131 if (CL_SUCCESS
!= err
)
132 throw OpenCLError(err
);
136 /// Arguments that are actually compile-time constant string
137 /// Currently, only the hash is passed.
138 /// TBD(IJSUNG): pass also length and the actual string if there is a
139 /// hash function collision
140 class ConstStringArgument
: public DynamicKernelArgument
143 ConstStringArgument(const std::string
&s
,
144 FormulaTreeNodeRef ft
):
145 DynamicKernelArgument(s
, ft
) {}
146 /// Generate declaration
147 virtual void GenDecl(std::stringstream
&ss
) const
149 ss
<< "unsigned " << mSymName
;
151 virtual void GenDeclRef(std::stringstream
&ss
) const
153 ss
<< GenSlidingWindowDeclRef(false);
155 virtual void GenSlidingWindowDecl(std::stringstream
&ss
) const
159 virtual std::string
GenSlidingWindowDeclRef(bool=false) const
161 std::stringstream ss
;
162 if (GetFormulaToken()->GetType() != formula::svString
)
164 FormulaToken
*Tok
= GetFormulaToken();
165 ss
<< Tok
->GetString().getString().toAsciiUpperCase().hashCode() << "U";
168 virtual size_t GetWindowSize(void) const
172 /// Pass the 32-bit hash of the string to the kernel
173 virtual size_t Marshal(cl_kernel k
, int argno
, int, cl_program
)
175 FormulaToken
*ref
= mFormulaTree
->GetFormulaToken();
176 cl_uint hashCode
= 0;
177 if (ref
->GetType() == formula::svString
)
179 const rtl::OUString s
= ref
->GetString().getString().toAsciiUpperCase();
180 hashCode
= s
.hashCode();
187 OpenclDevice::setKernelEnv(&kEnv
);
188 // Pass the scalar result back to the rest of the formula kernel
189 cl_int err
= clSetKernelArg(k
, argno
, sizeof(cl_uint
), (void*)&hashCode
);
190 if (CL_SUCCESS
!= err
)
191 throw OpenCLError(err
);
196 /// Arguments that are actually compile-time constants
197 class DynamicKernelConstantArgument
: public DynamicKernelArgument
200 DynamicKernelConstantArgument(const std::string
&s
,
201 FormulaTreeNodeRef ft
):
202 DynamicKernelArgument(s
, ft
) {}
203 /// Generate declaration
204 virtual void GenDecl(std::stringstream
&ss
) const
206 ss
<< "double " << mSymName
;
208 virtual void GenDeclRef(std::stringstream
&ss
) const
212 virtual void GenSlidingWindowDecl(std::stringstream
&ss
) const
216 virtual std::string
GenSlidingWindowDeclRef(bool=false) const
218 if (GetFormulaToken()->GetType() != formula::svDouble
)
222 virtual size_t GetWindowSize(void) const
226 double GetDouble(void) const
228 FormulaToken
*Tok
= GetFormulaToken();
229 if (Tok
->GetType() != formula::svDouble
)
231 return Tok
->GetDouble();
233 /// Create buffer and pass the buffer to a given kernel
234 virtual size_t Marshal(cl_kernel k
, int argno
, int, cl_program
)
236 double tmp
= GetDouble();
237 // Pass the scalar result back to the rest of the formula kernel
238 cl_int err
= clSetKernelArg(k
, argno
, sizeof(double), (void*)&tmp
);
239 if (CL_SUCCESS
!= err
)
240 throw OpenCLError(err
);
243 virtual cl_mem
GetCLBuffer(void) const { return NULL
; }
246 class DynamicKernelPiArgument
: public DynamicKernelArgument
249 DynamicKernelPiArgument(const std::string
&s
,
250 FormulaTreeNodeRef ft
):
251 DynamicKernelArgument(s
, ft
) {}
252 /// Generate declaration
253 virtual void GenDecl(std::stringstream
&ss
) const
255 ss
<< "double " << mSymName
;
257 virtual void GenDeclRef(std::stringstream
&ss
) const
259 ss
<< "3.14159265358979";
261 virtual void GenSlidingWindowDecl(std::stringstream
&ss
) const
265 virtual std::string
GenSlidingWindowDeclRef(bool=false) const
269 virtual size_t GetWindowSize(void) const
273 /// Create buffer and pass the buffer to a given kernel
274 virtual size_t Marshal(cl_kernel k
, int argno
, int, cl_program
)
277 // Pass the scalar result back to the rest of the formula kernel
278 cl_int err
= clSetKernelArg(k
, argno
, sizeof(double), (void*)&tmp
);
279 if (CL_SUCCESS
!= err
)
280 throw OpenCLError(err
);
285 class DynamicKernelRandomArgument
: public DynamicKernelArgument
288 DynamicKernelRandomArgument(const std::string
&s
,
289 FormulaTreeNodeRef ft
):
290 DynamicKernelArgument(s
, ft
) {}
291 /// Generate declaration
292 virtual void GenDecl(std::stringstream
&ss
) const
294 ss
<< "double " << mSymName
;
296 virtual void GenDeclRef(std::stringstream
&ss
) const
299 srand((unsigned)time(NULL
));
300 d
=((double)rand())/RAND_MAX
;
303 virtual void GenSlidingWindowDecl(std::stringstream
&ss
) const
307 virtual std::string
GenSlidingWindowDeclRef(bool=false) const
311 virtual size_t GetWindowSize(void) const
315 /// Create buffer and pass the buffer to a given kernel
316 virtual size_t Marshal(cl_kernel k
, int argno
, int, cl_program
)
319 // Pass the scalar result back to the rest of the formula kernel
320 cl_int err
= clSetKernelArg(k
, argno
, sizeof(double), (void*)&tmp
);
321 if (CL_SUCCESS
!= err
)
322 throw OpenCLError(err
);
327 /// A vector of strings
328 class DynamicKernelStringArgument
: public VectorRef
331 DynamicKernelStringArgument(const std::string
&s
,
332 FormulaTreeNodeRef ft
, int index
= 0):
333 VectorRef(s
, ft
, index
) {}
335 virtual void GenSlidingWindowFunction(std::stringstream
&) {}
336 /// Generate declaration
337 virtual void GenDecl(std::stringstream
&ss
) const
339 ss
<< "__global unsigned int *"<<mSymName
;
341 virtual void GenSlidingWindowDecl(std::stringstream
& ss
) const
343 DynamicKernelStringArgument::GenDecl(ss
);
345 virtual size_t Marshal(cl_kernel
, int, int, cl_program
);
348 /// Marshal a string vector reference
349 size_t DynamicKernelStringArgument::Marshal(cl_kernel k
, int argno
, int, cl_program
)
351 FormulaToken
*ref
= mFormulaTree
->GetFormulaToken();
354 OpenclDevice::setKernelEnv(&kEnv
);
356 formula::VectorRefArray vRef
;
358 if (ref
->GetType() == formula::svSingleVectorRef
) {
359 const formula::SingleVectorRefToken
* pSVR
=
360 dynamic_cast< const formula::SingleVectorRefToken
* >(ref
);
362 nStrings
= pSVR
->GetArrayLength();
363 vRef
= pSVR
->GetArray();
364 } else if (ref
->GetType() == formula::svDoubleVectorRef
) {
365 const formula::DoubleVectorRefToken
* pDVR
=
366 dynamic_cast< const formula::DoubleVectorRefToken
* >(ref
);
368 nStrings
= pDVR
->GetArrayLength();
369 vRef
= pDVR
->GetArrays()[mnIndex
];
371 size_t szHostBuffer
= nStrings
* sizeof(cl_int
);
372 // Marshal strings. Right now we pass hashes of these string
373 mpClmem
= clCreateBuffer(kEnv
.mpkContext
,
374 (cl_mem_flags
) CL_MEM_READ_ONLY
|CL_MEM_ALLOC_HOST_PTR
,
375 szHostBuffer
, NULL
, &err
);
376 if (CL_SUCCESS
!= err
)
377 throw OpenCLError(err
);
378 cl_uint
*pHashBuffer
= (cl_uint
*)clEnqueueMapBuffer(
379 kEnv
.mpkCmdQueue
, mpClmem
, CL_TRUE
, CL_MAP_WRITE
, 0,
380 szHostBuffer
, 0, NULL
, NULL
, &err
);
381 if (CL_SUCCESS
!= err
)
382 throw OpenCLError(err
);
383 for (size_t i
= 0; i
< nStrings
; i
++)
385 if (vRef
.mpStringArray
[i
])
387 const OUString tmp
= OUString(vRef
.mpStringArray
[i
]);
388 pHashBuffer
[i
] = tmp
.hashCode();
395 err
= clEnqueueUnmapMemObject(kEnv
.mpkCmdQueue
, mpClmem
,
396 pHashBuffer
, 0, NULL
, NULL
);
397 if (CL_SUCCESS
!= err
)
398 throw OpenCLError(err
);
400 err
= clSetKernelArg(k
, argno
, sizeof(cl_mem
), (void*)&mpClmem
);
401 if (CL_SUCCESS
!= err
)
402 throw OpenCLError(err
);
406 /// A mixed string/numberic vector
407 class DynamicKernelMixedArgument
: public VectorRef
410 DynamicKernelMixedArgument(const std::string
&s
,
411 FormulaTreeNodeRef ft
):
412 VectorRef(s
, ft
), mStringArgument(s
+"s", ft
) {}
413 virtual void GenSlidingWindowDecl(std::stringstream
& ss
) const
415 VectorRef::GenSlidingWindowDecl(ss
);
417 mStringArgument
.GenSlidingWindowDecl(ss
);
419 virtual void GenSlidingWindowFunction(std::stringstream
&) {}
420 /// Generate declaration
421 virtual void GenDecl(std::stringstream
&ss
) const
423 VectorRef::GenDecl(ss
);
425 mStringArgument
.GenDecl(ss
);
427 virtual void GenDeclRef(std::stringstream
&ss
) const
429 VectorRef::GenDeclRef(ss
);
431 mStringArgument
.GenDeclRef(ss
);
433 virtual std::string
GenSlidingWindowDeclRef(bool) const
435 std::stringstream ss
;
436 ss
<< "(!isNan(" << VectorRef::GenSlidingWindowDeclRef();
437 ss
<< ")?" << VectorRef::GenSlidingWindowDeclRef();
438 ss
<< ":" << mStringArgument
.GenSlidingWindowDeclRef();
442 virtual size_t Marshal(cl_kernel k
, int argno
, int vw
, cl_program p
)
444 int i
= VectorRef::Marshal(k
, argno
, vw
, p
);
445 i
+= mStringArgument
.Marshal(k
, argno
+i
, vw
, p
);
449 DynamicKernelStringArgument mStringArgument
;
452 /// Handling a Double Vector that is used as a sliding window input
453 /// to either a sliding window average or sum-of-products
454 /// Generate a sequential loop for reductions
455 class OpSum
; // Forward Declaration
456 class OpAverage
; // Forward Declaration
457 class OpMin
; // Forward Declaration
458 class OpMax
; // Forward Declaration
459 class OpCount
; // Forward Declaration
461 class DynamicKernelSlidingArgument
: public Base
464 DynamicKernelSlidingArgument(const std::string
&s
,
465 FormulaTreeNodeRef ft
, boost::shared_ptr
<SlidingFunctionBase
> &CodeGen
,
467 Base(s
, ft
, index
), mpCodeGen(CodeGen
), mpClmem2(NULL
)
469 FormulaToken
*t
= ft
->GetFormulaToken();
470 if (t
->GetType() != formula::svDoubleVectorRef
)
472 mpDVR
= dynamic_cast<const formula::DoubleVectorRefToken
*>(t
);
474 bIsStartFixed
= mpDVR
->IsStartFixed();
475 bIsEndFixed
= mpDVR
->IsEndFixed();
477 // Should only be called by SumIfs. Yikes!
478 virtual bool NeedParallelReduction(void) const
480 assert(dynamic_cast<OpSumIfs
*>(mpCodeGen
.get()));
481 return GetWindowSize()> 100 &&
482 ( (GetStartFixed() && GetEndFixed()) ||
483 (!GetStartFixed() && !GetEndFixed()) ) ;
485 virtual void GenSlidingWindowFunction(std::stringstream
&) {}
487 virtual std::string
GenSlidingWindowDeclRef(bool nested
=false) const
489 size_t nArrayLength
= mpDVR
->GetArrayLength();
490 std::stringstream ss
;
491 if (!bIsStartFixed
&& !bIsEndFixed
)
494 ss
<< "((i+gid0) <" << nArrayLength
<<"?";
495 ss
<< Base::GetName() << "[i + gid0]";
502 ss
<< "(i <" << nArrayLength
<<"?";
503 ss
<< Base::GetName() << "[i]";
509 /// Controls how the elements in the DoubleVectorRef are traversed
510 virtual size_t GenReductionLoopHeader(
511 std::stringstream
&ss
, bool &needBody
)
514 size_t nCurWindowSize
= mpDVR
->GetRefRowSize();
516 #ifndef UNROLLING_FACTOR
518 // No need to generate a for-loop for degenerated cases
519 if (nCurWindowSize
== 1)
521 ss
<< "if (gid0 <" << mpDVR
->GetArrayLength();
522 ss
<< ")\n\t{\tint i = 0;\n\t\t";
523 return nCurWindowSize
;
526 ss
<< "for (int i = ";
527 if (!bIsStartFixed
&& bIsEndFixed
)
530 ss
<< "gid0; i < " << mpDVR
->GetArrayLength();
531 ss
<< " && i < " << nCurWindowSize
<< "; i++){\n\t\t";
533 ss
<< "gid0; i < "<< nCurWindowSize
<< "; i++)\n\t\t";
536 else if (bIsStartFixed
&& !bIsEndFixed
)
539 ss
<< "0; i < " << mpDVR
->GetArrayLength();
540 ss
<< " && i < gid0+"<< nCurWindowSize
<< "; i++){\n\t\t";
542 ss
<< "0; i < gid0+"<< nCurWindowSize
<< "; i++)\n\t\t";
545 else if (!bIsStartFixed
&& !bIsEndFixed
)
548 ss
<< "0; i + gid0 < " << mpDVR
->GetArrayLength();
549 ss
<< " && i < "<< nCurWindowSize
<< "; i++){\n\t\t";
551 ss
<< "0; i < "<< nCurWindowSize
<< "; i++)\n\t\t";
557 std::min(mpDVR
->GetArrayLength(), nCurWindowSize
);
558 ss
<< "0; i < "<< limit
<< "; i++){\n\t\t";
560 return nCurWindowSize
;
563 #ifdef UNROLLING_FACTOR
565 if (!mpDVR
->IsStartFixed() && mpDVR
->IsEndFixed()) {
566 ss
<< "for (int i = ";
567 ss
<< "gid0; i < " << mpDVR
->GetArrayLength();
568 ss
<< " && i < " << nCurWindowSize
<< "; i++){\n\t\t";
570 return nCurWindowSize
;
571 } else if (mpDVR
->IsStartFixed() && !mpDVR
->IsEndFixed()) {
572 ss
<< "for (int i = ";
573 ss
<< "0; i < " << mpDVR
->GetArrayLength();
574 ss
<< " && i < gid0+"<< nCurWindowSize
<< "; i++){\n\t\t";
576 return nCurWindowSize
;
577 } else if (!mpDVR
->IsStartFixed() && !mpDVR
->IsEndFixed()){
578 ss
<< "tmpBottom = " << mpCodeGen
->GetBottom() << ";\n\t";
580 std::stringstream temp1
,temp2
;
581 int outLoopSize
= UNROLLING_FACTOR
;
582 if ( nCurWindowSize
/outLoopSize
!= 0){
583 ss
<< "for(int outLoop=0; outLoop<" << nCurWindowSize
/outLoopSize
<< "; outLoop++){\n\t";
584 for(int count
=0; count
< outLoopSize
; count
++){
585 ss
<< "i = outLoop*"<<outLoopSize
<<"+"<<count
<<";\n\t";
587 temp1
<< "if(i + gid0 < " <<mpDVR
->GetArrayLength();
589 temp1
<< "tmp = legalize(";
590 temp1
<< mpCodeGen
->Gen2(GenSlidingWindowDeclRef(), "tmp");
591 temp1
<< ", tmp);\n\t\t\t";
598 // The residual of mod outLoopSize
599 for(unsigned int count
=nCurWindowSize
/outLoopSize
*outLoopSize
; count
< nCurWindowSize
; count
++){
600 ss
<< "i = "<<count
<<";\n\t";
601 if(count
==nCurWindowSize
/outLoopSize
*outLoopSize
){
602 temp2
<< "if(i + gid0 < " << mpDVR
->GetArrayLength();
604 temp2
<< "tmp = legalize(";
605 temp2
<< mpCodeGen
->Gen2(GenSlidingWindowDeclRef(), "tmp");
606 temp2
<< ", tmp);\n\t\t\t";
611 ss
<< "} // to scope the int i declaration\n";
613 return nCurWindowSize
;
615 // (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
617 ss
<< "//else situation \n\t";
618 ss
<< "tmpBottom = " << mpCodeGen
->GetBottom() << ";\n\t";
620 std::stringstream temp1
,temp2
;
621 int outLoopSize
= UNROLLING_FACTOR
;
622 if (nCurWindowSize
/outLoopSize
!= 0){
623 ss
<< "for(int outLoop=0; outLoop<" << nCurWindowSize
/outLoopSize
<< "; outLoop++){\n\t";
624 for(int count
=0; count
< outLoopSize
; count
++){
625 ss
<< "i = outLoop*"<<outLoopSize
<<"+"<<count
<<";\n\t";
627 temp1
<< "tmp = legalize(";
628 temp1
<< mpCodeGen
->Gen2(GenSlidingWindowDeclRef(), "tmp");
629 temp1
<< ", tmp);\n\t\t\t";
635 // The residual of mod outLoopSize
636 for(unsigned int count
=nCurWindowSize
/outLoopSize
*outLoopSize
; count
< nCurWindowSize
; count
++){
637 ss
<< "i = "<<count
<<";\n\t";
638 if(count
==nCurWindowSize
/outLoopSize
*outLoopSize
){
639 temp2
<< "tmp = legalize(";
640 temp2
<< mpCodeGen
->Gen2(GenSlidingWindowDeclRef(), "tmp");
641 temp2
<< ", tmp);\n\t\t\t";
645 ss
<< "} // to scope the int i declaration\n";
647 return nCurWindowSize
;
652 ~DynamicKernelSlidingArgument()
656 clReleaseMemObject(mpClmem2
);
661 size_t GetArrayLength(void) const {return mpDVR
->GetArrayLength(); }
663 size_t GetWindowSize(void) const {return mpDVR
->GetRefRowSize(); }
665 size_t GetStartFixed(void) const {return bIsStartFixed
; }
667 size_t GetEndFixed(void) const {return bIsEndFixed
; }
670 bool bIsStartFixed
, bIsEndFixed
;
671 const formula::DoubleVectorRefToken
*mpDVR
;
673 boost::shared_ptr
<SlidingFunctionBase
> mpCodeGen
;
674 // controls whether to invoke the reduction kernel during marshaling or not
678 /// Handling a Double Vector that is used as a sliding window input
679 /// Performs parallel reduction based on given operator
681 class ParallelReductionVectorRef
: public Base
684 ParallelReductionVectorRef(const std::string
&s
,
685 FormulaTreeNodeRef ft
, boost::shared_ptr
<SlidingFunctionBase
> &CodeGen
,
687 Base(s
, ft
, index
), mpCodeGen(CodeGen
), mpClmem2(NULL
)
689 FormulaToken
*t
= ft
->GetFormulaToken();
690 if (t
->GetType() != formula::svDoubleVectorRef
)
692 mpDVR
= dynamic_cast<const formula::DoubleVectorRefToken
*>(t
);
694 bIsStartFixed
= mpDVR
->IsStartFixed();
695 bIsEndFixed
= mpDVR
->IsEndFixed();
697 /// Emit the definition for the auxiliary reduction kernel
698 virtual void GenSlidingWindowFunction(std::stringstream
&ss
) {
699 std::string name
= Base::GetName();
700 ss
<< "__kernel void "<<name
;
701 ss
<< "_reduction(__global double* A, "
702 "__global double *result,int arrayLength,int windowSize){\n";
703 ss
<< " double tmp, current_result =" <<
704 mpCodeGen
->GetBottom();
706 ss
<< " int writePos = get_group_id(1);\n";
707 ss
<< " int lidx = get_local_id(0);\n";
708 ss
<< " __local double shm_buf[256];\n";
709 if (mpDVR
->IsStartFixed())
710 ss
<< " int offset = 0;\n";
711 else // if (!mpDVR->IsStartFixed())
712 ss
<< " int offset = get_group_id(1);\n";
713 if (mpDVR
->IsStartFixed() && mpDVR
->IsEndFixed())
714 ss
<< " int end = windowSize;\n";
715 else if (!mpDVR
->IsStartFixed() && !mpDVR
->IsEndFixed())
716 ss
<< " int end = offset + windowSize;\n";
717 else if (mpDVR
->IsStartFixed() && !mpDVR
->IsEndFixed())
718 ss
<< " int end = windowSize + get_group_id(1);\n";
719 else if (!mpDVR
->IsStartFixed() && mpDVR
->IsEndFixed())
720 ss
<< " int end = windowSize;\n";
721 ss
<< " end = min(end, arrayLength);\n";
723 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
724 ss
<< " int loop = arrayLength/512 + 1;\n";
725 ss
<< " for (int l=0; l<loop; l++){\n";
726 ss
<< " tmp = "<< mpCodeGen
->GetBottom() << ";\n";
727 ss
<< " int loopOffset = l*512;\n";
728 ss
<< " if((loopOffset + lidx + offset + 256) < end) {\n";
729 ss
<< " tmp = legalize(" << mpCodeGen
->Gen2(
730 "A[loopOffset + lidx + offset]", "tmp") <<", tmp);\n";
731 ss
<< " tmp = legalize(" << mpCodeGen
->Gen2(
732 "A[loopOffset + lidx + offset + 256]", "tmp") <<", tmp);\n";
733 ss
<< " } else if ((loopOffset + lidx + offset) < end)\n";
734 ss
<< " tmp = legalize(" << mpCodeGen
->Gen2(
735 "A[loopOffset + lidx + offset]", "tmp") <<", tmp);\n";
736 ss
<< " shm_buf[lidx] = tmp;\n";
737 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
738 ss
<< " for (int i = 128; i >0; i/=2) {\n";
739 ss
<< " if (lidx < i)\n";
740 ss
<< " shm_buf[lidx] = ";
741 // Special case count
742 if (dynamic_cast<OpCount
*>(mpCodeGen
.get()))
743 ss
<< "shm_buf[lidx] + shm_buf[lidx + i];\n";
745 ss
<< mpCodeGen
->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]")<<";\n";
746 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
748 ss
<< " if (lidx == 0)\n";
749 ss
<< " current_result =";
750 if (dynamic_cast<OpCount
*>(mpCodeGen
.get()))
751 ss
<< "current_result + shm_buf[0]";
753 ss
<< mpCodeGen
->Gen2("current_result", "shm_buf[0]");
755 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
757 ss
<< " if (lidx == 0)\n";
758 ss
<< " result[writePos] = current_result;\n";
763 virtual std::string
GenSlidingWindowDeclRef(bool=false) const
765 std::stringstream ss
;
766 if (!bIsStartFixed
&& !bIsEndFixed
)
767 ss
<< Base::GetName() << "[i + gid0]";
769 ss
<< Base::GetName() << "[i]";
772 /// Controls how the elements in the DoubleVectorRef are traversed
773 virtual size_t GenReductionLoopHeader(
774 std::stringstream
&ss
, bool &needBody
)
777 size_t nCurWindowSize
= mpDVR
->GetRefRowSize();
778 std::string temp
= Base::GetName() + "[gid0]";
780 // Special case count
781 if (dynamic_cast<OpCount
*>(mpCodeGen
.get()))
782 ss
<< temp
<< "+ tmp";
784 ss
<< mpCodeGen
->Gen2(temp
, "tmp");
787 return nCurWindowSize
;
790 virtual size_t Marshal(cl_kernel k
, int argno
, int w
, cl_program mpProgram
)
792 assert(Base::mpClmem
== NULL
);
795 OpenclDevice::setKernelEnv(&kEnv
);
797 size_t nInput
= mpDVR
->GetArrayLength();
798 size_t nCurWindowSize
= mpDVR
->GetRefRowSize();
799 // create clmem buffer
800 if (mpDVR
->GetArrays()[Base::mnIndex
].mpNumericArray
== NULL
)
802 double *pHostBuffer
= const_cast<double*>(
803 mpDVR
->GetArrays()[Base::mnIndex
].mpNumericArray
);
804 size_t szHostBuffer
= nInput
* sizeof(double);
805 Base::mpClmem
= clCreateBuffer(kEnv
.mpkContext
,
806 (cl_mem_flags
) CL_MEM_READ_ONLY
,
809 if (CL_SUCCESS
!= err
)
810 throw OpenCLError(err
);
811 err
= clEnqueueWriteBuffer(kEnv
.mpkCmdQueue
, Base::mpClmem
,CL_TRUE
, 0,
812 szHostBuffer
, pHostBuffer
, 0, NULL
, NULL
);
813 if (CL_SUCCESS
!= err
)
814 throw OpenCLError(err
);
815 mpClmem2
= clCreateBuffer(kEnv
.mpkContext
, CL_MEM_WRITE_ONLY
,
816 sizeof(double)*w
, NULL
, NULL
);
817 if (CL_SUCCESS
!= err
)
818 throw OpenCLError(err
);
819 // reproduce the reduction function name
820 std::string kernelName
= Base::GetName() + "_reduction";
822 cl_kernel redKernel
= clCreateKernel(mpProgram
, kernelName
.c_str(), &err
);
823 if (err
!= CL_SUCCESS
)
824 throw OpenCLError(err
);
825 // set kernel arg of reduction kernel
826 // TODO(Wei Wei): use unique name for kernel
827 cl_mem buf
= Base::GetCLBuffer();
828 err
= clSetKernelArg(redKernel
, 0, sizeof(cl_mem
),
830 if (CL_SUCCESS
!= err
)
831 throw OpenCLError(err
);
833 err
= clSetKernelArg(redKernel
, 1, sizeof(cl_mem
), (void *)&mpClmem2
);
834 if (CL_SUCCESS
!= err
)
835 throw OpenCLError(err
);
837 err
= clSetKernelArg(redKernel
, 2, sizeof(cl_int
), (void*)&nInput
);
838 if (CL_SUCCESS
!= err
)
839 throw OpenCLError(err
);
841 err
= clSetKernelArg(redKernel
, 3, sizeof(cl_int
), (void*)&nCurWindowSize
);
842 if (CL_SUCCESS
!= err
)
843 throw OpenCLError(err
);
845 // set work group size and execute
846 size_t global_work_size
[] = {256, (size_t)w
};
847 size_t local_work_size
[] = {256, 1};
848 err
= clEnqueueNDRangeKernel(kEnv
.mpkCmdQueue
, redKernel
, 2, NULL
,
849 global_work_size
, local_work_size
, 0, NULL
, NULL
);
850 if (CL_SUCCESS
!= err
)
851 throw OpenCLError(err
);
852 err
= clFinish(kEnv
.mpkCmdQueue
);
853 if (CL_SUCCESS
!= err
)
854 throw OpenCLError(err
);
857 err
= clSetKernelArg(k
, argno
, sizeof(cl_mem
), (void*)&(mpClmem2
));
858 if (CL_SUCCESS
!= err
)
859 throw OpenCLError(err
);
862 ~ParallelReductionVectorRef()
866 clReleaseMemObject(mpClmem2
);
871 size_t GetArrayLength(void) const {return mpDVR
->GetArrayLength(); }
873 size_t GetWindowSize(void) const {return mpDVR
->GetRefRowSize(); }
875 size_t GetStartFixed(void) const {return bIsStartFixed
; }
877 size_t GetEndFixed(void) const {return bIsEndFixed
; }
880 bool bIsStartFixed
, bIsEndFixed
;
881 const formula::DoubleVectorRefToken
*mpDVR
;
883 boost::shared_ptr
<SlidingFunctionBase
> mpCodeGen
;
884 // controls whether to invoke the reduction kernel during marshaling or not
888 class Reduction
: public SlidingFunctionBase
891 typedef DynamicKernelSlidingArgument
<VectorRef
> NumericRange
;
892 typedef DynamicKernelSlidingArgument
<DynamicKernelStringArgument
> StringRange
;
893 typedef ParallelReductionVectorRef
<VectorRef
> ParallelNumericRange
;
895 virtual void GenSlidingWindowFunction(std::stringstream
&ss
,
896 const std::string sSymName
, SubArguments
&vSubArguments
)
898 ss
<< "\ndouble " << sSymName
;
899 ss
<< "_"<< BinFuncName() <<"(";
900 for (unsigned i
= 0; i
< vSubArguments
.size(); i
++)
904 vSubArguments
[i
]->GenSlidingWindowDecl(ss
);
907 ss
<< "double tmp = " << GetBottom() <<";\n\t";
908 ss
<< "int gid0 = get_global_id(0);\n\t";
910 ss
<< "int nCount = 0;\n\t";
911 ss
<< "double tmpBottom;\n\t";
912 unsigned i
= vSubArguments
.size();
916 if (NumericRange
*NR
=
917 dynamic_cast<NumericRange
*> (vSubArguments
[i
].get()))
920 nItems
+= NR
->GenReductionLoopHeader(ss
, needBody
);
921 if (needBody
== false) continue;
923 else if (ParallelNumericRange
*PNR
=
924 dynamic_cast<ParallelNumericRange
*> (vSubArguments
[i
].get()))
928 nItems
+= PNR
->GenReductionLoopHeader(ss
, needBody
);
929 if (needBody
== false) continue;
931 else if (StringRange
*SR
=
932 dynamic_cast<StringRange
*> (vSubArguments
[i
].get()))
936 nItems
+= SR
->GenReductionLoopHeader(ss
, needBody
);
937 if (needBody
== false) continue;
941 FormulaToken
*pCur
= vSubArguments
[i
]->GetFormulaToken();
943 assert(pCur
->GetType() != formula::svDoubleVectorRef
);
945 if (pCur
->GetType() == formula::svSingleVectorRef
)
948 const formula::SingleVectorRefToken
* pSVR
=
949 dynamic_cast< const formula::SingleVectorRefToken
* >(pCur
);
950 ss
<< "if (gid0 < " << pSVR
->GetArrayLength() << "){\n\t\t";
955 else if (pCur
->GetType() == formula::svDouble
)
968 if(ocPush
==vSubArguments
[i
]->GetFormulaToken()->GetOpCode())
970 ss
<< "tmpBottom = " << GetBottom() << ";\n\t\t";
972 ss
<< vSubArguments
[i
]->GenSlidingWindowDeclRef();
975 ss
<< Gen2("tmpBottom", "tmp") << ";\n\t\t";
976 ss
<< "else{\n\t\t\t";
978 ss
<< Gen2(vSubArguments
[i
]->GenSlidingWindowDeclRef(), "tmp");
986 ss
<< Gen2(vSubArguments
[i
]->GenSlidingWindowDeclRef(), "tmp");
991 // Generate the operation in binary form
992 ss
<< Gen2(vSubArguments
[i
]->GenSlidingWindowDeclRef(), "tmp");
999 ss
<< "/(double)nCount";
1002 ss
<< "/(double)"<<nItems
;
1006 virtual bool isAverage() const { return false; }
1007 virtual bool takeString() const { return false; }
1008 virtual bool takeNumeric() const { return true; }
1011 // Strictly binary operators
1012 class Binary
: public SlidingFunctionBase
1015 virtual void GenSlidingWindowFunction(std::stringstream
&ss
,
1016 const std::string sSymName
, SubArguments
&vSubArguments
)
1018 ss
<< "\ndouble " << sSymName
;
1019 ss
<< "_"<< BinFuncName() <<"(";
1020 assert(vSubArguments
.size() == 2);
1021 for (unsigned i
= 0; i
< vSubArguments
.size(); i
++)
1025 vSubArguments
[i
]->GenSlidingWindowDecl(ss
);
1028 ss
<< "int gid0 = get_global_id(0), i = 0;\n\t";
1029 ss
<< "double tmp = ";
1030 ss
<< Gen2(vSubArguments
[0]->GenSlidingWindowDeclRef(false),
1031 vSubArguments
[1]->GenSlidingWindowDeclRef(false)) << ";\n\t";
1032 ss
<< "return tmp;\n}";
1034 virtual bool takeString() const { return true; }
1035 virtual bool takeNumeric() const { return true; }
1038 class SumOfProduct
: public SlidingFunctionBase
1041 virtual void GenSlidingWindowFunction(std::stringstream
&ss
,
1042 const std::string sSymName
, SubArguments
&vSubArguments
)
1044 size_t nCurWindowSize
= 0;
1045 FormulaToken
*tmpCur
= NULL
;
1046 const formula::DoubleVectorRefToken
*pCurDVR
= NULL
;
1047 ss
<< "\ndouble " << sSymName
;
1048 ss
<< "_"<< BinFuncName() <<"(";
1049 for (unsigned i
= 0; i
< vSubArguments
.size(); i
++)
1053 vSubArguments
[i
]->GenSlidingWindowDecl(ss
);
1054 size_t nCurChildWindowSize
= vSubArguments
[i
]->GetWindowSize();
1055 nCurWindowSize
= (nCurWindowSize
< nCurChildWindowSize
)?
1056 nCurChildWindowSize
:nCurWindowSize
;
1057 tmpCur
= vSubArguments
[i
]->GetFormulaToken();
1058 if ( ocPush
==tmpCur
->GetOpCode() )
1061 pCurDVR
= dynamic_cast<
1062 const formula::DoubleVectorRefToken
*>(tmpCur
);
1064 ( (!pCurDVR
->IsStartFixed() && !pCurDVR
->IsEndFixed())
1065 || (pCurDVR
->IsStartFixed() && pCurDVR
->IsEndFixed()) )
1071 ss
<< " double tmp = 0.0;\n";
1072 ss
<< " int gid0 = get_global_id(0);\n";
1073 #ifndef UNROLLING_FACTOR
1075 ss
<< " for (i = 0; i < "<< nCurWindowSize
<<"; i++)\n";
1077 for (unsigned i
= 0; i
< vSubArguments
.size(); i
++)
1079 tmpCur
= vSubArguments
[i
]->GetFormulaToken();
1080 if(ocPush
==tmpCur
->GetOpCode())
1082 pCurDVR
= dynamic_cast<
1083 const formula::DoubleVectorRefToken
*>(tmpCur
);
1084 if(!pCurDVR
->IsStartFixed() && !pCurDVR
->IsEndFixed())
1086 ss
<< " int currentCount";
1088 ss
<<" =i+gid0+1;\n";
1092 ss
<< " int currentCount";
1098 ss
<< " tmp += fsum(";
1099 for (unsigned i
= 0; i
< vSubArguments
.size(); i
++)
1104 if(ocPush
==vSubArguments
[i
]->GetFormulaToken()->GetOpCode())
1107 ss
<<"(currentCount";
1110 if(vSubArguments
[i
]->GetFormulaToken()->GetType() ==
1111 formula::svSingleVectorRef
)
1113 const formula::SingleVectorRefToken
* pSVR
=
1114 dynamic_cast< const formula::SingleVectorRefToken
*>
1115 (vSubArguments
[i
]->GetFormulaToken());
1116 ss
<<pSVR
->GetArrayLength();
1118 else if(vSubArguments
[i
]->GetFormulaToken()->GetType() ==
1119 formula::svDoubleVectorRef
)
1121 const formula::DoubleVectorRefToken
* pSVR
=
1122 dynamic_cast< const formula::DoubleVectorRefToken
*>
1123 (vSubArguments
[i
]->GetFormulaToken());
1124 ss
<<pSVR
->GetArrayLength();
1126 ss
<< ")||isNan("<<vSubArguments
[i
]
1127 ->GenSlidingWindowDeclRef(true);
1129 ss
<< vSubArguments
[i
]->GenSlidingWindowDeclRef(true);
1133 ss
<< vSubArguments
[i
]->GenSlidingWindowDeclRef(true);
1135 ss
<< vSubArguments
[i
]->GenSlidingWindowDeclRef(true);
1138 ss
<< ", 0.0);\n\t}\n\t";
1139 ss
<< "return tmp;\n";
1143 #ifdef UNROLLING_FACTOR
1144 ss
<< "\tint i;\n\t";
1145 ss
<< "int currentCount0, currentCount1;\n\t";
1146 std::stringstream temp3
,temp4
;
1147 int outLoopSize
= UNROLLING_FACTOR
;
1148 if (nCurWindowSize
/outLoopSize
!= 0){
1149 ss
<< "for(int outLoop=0; outLoop<" <<
1150 nCurWindowSize
/outLoopSize
<< "; outLoop++){\n\t";
1151 for(int count
=0; count
< outLoopSize
; count
++){
1152 ss
<< "i = outLoop*"<<outLoopSize
<<"+"<<count
<<";\n";
1154 for (unsigned i
= 0; i
< vSubArguments
.size(); i
++)
1156 tmpCur
= vSubArguments
[i
]->GetFormulaToken();
1157 if(ocPush
==tmpCur
->GetOpCode())
1159 pCurDVR
= dynamic_cast<
1160 const formula::DoubleVectorRefToken
*>(tmpCur
);
1161 if(!pCurDVR
->IsStartFixed() && !pCurDVR
->IsEndFixed())
1163 temp3
<< " currentCount";
1165 temp3
<<" =i+gid0+1;\n";
1169 temp3
<< " currentCount";
1171 temp3
<< " =i+1;\n";
1176 temp3
<< "tmp = fsum(";
1177 for (unsigned i
= 0; i
< vSubArguments
.size(); i
++){
1180 if(ocPush
==vSubArguments
[i
]->GetFormulaToken()->GetOpCode()){
1182 temp3
<<"(currentCount";
1185 if(vSubArguments
[i
]->GetFormulaToken()->GetType() ==
1186 formula::svSingleVectorRef
){
1187 const formula::SingleVectorRefToken
* pSVR
=
1188 dynamic_cast< const formula::SingleVectorRefToken
*>
1189 (vSubArguments
[i
]->GetFormulaToken());
1190 temp3
<<pSVR
->GetArrayLength();
1192 else if(vSubArguments
[i
]->GetFormulaToken()->GetType() ==
1193 formula::svDoubleVectorRef
){
1194 const formula::DoubleVectorRefToken
* pSVR
=
1195 dynamic_cast< const formula::DoubleVectorRefToken
*>
1196 (vSubArguments
[i
]->GetFormulaToken());
1197 temp3
<<pSVR
->GetArrayLength();
1199 temp3
<< ")||isNan("<<vSubArguments
[i
]
1200 ->GenSlidingWindowDeclRef(true);
1202 temp3
<< vSubArguments
[i
]->GenSlidingWindowDeclRef(true);
1206 temp3
<< vSubArguments
[i
]->GenSlidingWindowDeclRef(true);
1208 temp3
<< ", tmp);\n\t";
1214 //The residual of mod outLoopSize
1215 for(unsigned int count
=nCurWindowSize
/outLoopSize
*outLoopSize
;
1216 count
< nCurWindowSize
; count
++)
1218 ss
<< "i =" <<count
<<";\n";
1219 if(count
==nCurWindowSize
/outLoopSize
*outLoopSize
){
1220 for (unsigned i
= 0; i
< vSubArguments
.size(); i
++)
1222 tmpCur
= vSubArguments
[i
]->GetFormulaToken();
1223 if(ocPush
==tmpCur
->GetOpCode())
1225 pCurDVR
= dynamic_cast<
1226 const formula::DoubleVectorRefToken
*>(tmpCur
);
1227 if(!pCurDVR
->IsStartFixed() && !pCurDVR
->IsEndFixed())
1229 temp4
<< " currentCount";
1231 temp4
<<" =i+gid0+1;\n";
1235 temp4
<< " currentCount";
1237 temp4
<< " =i+1;\n";
1242 temp4
<< "tmp = fsum(";
1243 for (unsigned i
= 0; i
< vSubArguments
.size(); i
++)
1247 if(ocPush
==vSubArguments
[i
]->GetFormulaToken()->GetOpCode())
1250 temp4
<<"(currentCount";
1253 if(vSubArguments
[i
]->GetFormulaToken()->GetType() ==
1254 formula::svSingleVectorRef
)
1256 const formula::SingleVectorRefToken
* pSVR
=
1257 dynamic_cast< const formula::SingleVectorRefToken
*>
1258 (vSubArguments
[i
]->GetFormulaToken());
1259 temp4
<<pSVR
->GetArrayLength();
1261 else if(vSubArguments
[i
]->GetFormulaToken()->GetType() ==
1262 formula::svDoubleVectorRef
)
1264 const formula::DoubleVectorRefToken
* pSVR
=
1265 dynamic_cast< const formula::DoubleVectorRefToken
*>
1266 (vSubArguments
[i
]->GetFormulaToken());
1267 temp4
<<pSVR
->GetArrayLength();
1269 temp4
<< ")||isNan("<<vSubArguments
[i
]
1270 ->GenSlidingWindowDeclRef(true);
1272 temp4
<< vSubArguments
[i
]->GenSlidingWindowDeclRef(true);
1277 temp4
<< vSubArguments
[i
]
1278 ->GenSlidingWindowDeclRef(true);
1281 temp4
<< ", tmp);\n\t";
1285 ss
<< "return tmp;\n";
1290 virtual bool takeString() const { return false; }
1291 virtual bool takeNumeric() const { return true; }
1295 class OpNop
: public Reduction
{
1297 virtual std::string
GetBottom(void) { return "0"; }
1298 virtual std::string
Gen2(const std::string
&lhs
, const std::string
&) const
1302 virtual std::string
BinFuncName(void) const { return "nop"; }
1305 class OpCount
: public Reduction
{
1307 virtual std::string
GetBottom(void) { return "0"; }
1308 virtual std::string
Gen2(const std::string
&lhs
, const std::string
&rhs
) const
1310 std::stringstream ss
;
1311 ss
<< "(isNan(" << lhs
<< ")?"<<rhs
<<":"<<rhs
<<"+1.0)";
1314 virtual std::string
BinFuncName(void) const { return "fcount"; }
1317 class OpEqual
: public Binary
{
1319 virtual std::string
GetBottom(void) { return "0"; }
1320 virtual std::string
Gen2(const std::string
&lhs
, const std::string
&rhs
) const
1322 std::stringstream ss
;
1323 ss
<< "strequal("<< lhs
<< "," << rhs
<<")";
1326 virtual std::string
BinFuncName(void) const { return "eq"; }
1329 class OpLessEqual
: public Binary
{
1331 virtual std::string
GetBottom(void) { return "0"; }
1332 virtual std::string
Gen2(const std::string
&lhs
, const std::string
&rhs
) const
1334 std::stringstream ss
;
1335 ss
<< "("<< lhs
<< "<=" << rhs
<<")";
1338 virtual std::string
BinFuncName(void) const { return "leq"; }
1341 class OpGreater
: public Binary
{
1343 virtual std::string
GetBottom(void) { return "0"; }
1344 virtual std::string
Gen2(const std::string
&lhs
, const std::string
&rhs
) const
1346 std::stringstream ss
;
1347 ss
<< "("<< lhs
<< ">" << rhs
<<")";
1350 virtual std::string
BinFuncName(void) const { return "gt"; }
1353 class OpSum
: public Reduction
{
1355 virtual std::string
GetBottom(void) { return "0"; }
1356 virtual std::string
Gen2(const std::string
&lhs
, const std::string
&rhs
) const
1358 std::stringstream ss
;
1359 ss
<< "((" << lhs
<<")+("<< rhs
<<"))";
1362 virtual std::string
BinFuncName(void) const { return "fsum"; }
1365 class OpAverage
: public Reduction
{
1367 virtual std::string
GetBottom(void) { return "0"; }
1368 virtual std::string
Gen2(const std::string
&lhs
, const std::string
&rhs
) const
1370 std::stringstream ss
;
1371 ss
<< "fsum_count(" << lhs
<<","<< rhs
<<", &nCount)";
1374 virtual std::string
BinFuncName(void) const { return "fsum"; }
1375 virtual bool isAverage() const { return true; }
1378 class OpSub
: public Reduction
{
1380 virtual std::string
GetBottom(void) { return "0"; }
1381 virtual std::string
Gen2(const std::string
&lhs
, const std::string
&rhs
) const
1383 return lhs
+ "-" + rhs
;
1385 virtual std::string
BinFuncName(void) const { return "fsub"; }
1388 class OpMul
: public Reduction
{
1390 virtual std::string
GetBottom(void) { return "1"; }
1391 virtual std::string
Gen2(const std::string
&lhs
, const std::string
&rhs
) const
1393 return lhs
+ "*" + rhs
;
1395 virtual std::string
BinFuncName(void) const { return "fmul"; }
1398 /// Technically not a reduction, but fits the framework.
1399 class OpDiv
: public Reduction
{
1401 virtual std::string
GetBottom(void) { return "1.0"; }
1402 virtual std::string
Gen2(const std::string
&lhs
, const std::string
&rhs
) const
1404 return "(" + lhs
+ "/" + rhs
+ ")";
1406 virtual std::string
BinFuncName(void) const { return "fdiv"; }
1409 class OpMin
: public Reduction
{
1411 virtual std::string
GetBottom(void) { return "MAXFLOAT"; }
1412 virtual std::string
Gen2(const std::string
&lhs
, const std::string
&rhs
) const
1414 return "mcw_fmin("+lhs
+ "," + rhs
+")";
1416 virtual std::string
BinFuncName(void) const { return "min"; }
1419 class OpMax
: public Reduction
{
1421 virtual std::string
GetBottom(void) { return "-MAXFLOAT"; }
1422 virtual std::string
Gen2(const std::string
&lhs
, const std::string
&rhs
) const
1424 return "mcw_fmax("+lhs
+ "," + rhs
+")";
1426 virtual std::string
BinFuncName(void) const { return "max"; }
1428 class OpSumProduct
: public SumOfProduct
{
1430 virtual std::string
GetBottom(void) { return "0"; }
1431 virtual std::string
Gen2(const std::string
&lhs
, const std::string
&rhs
) const
1433 return lhs
+ "*" + rhs
;
1435 virtual std::string
BinFuncName(void) const { return "fsop"; }
1439 SumIfsArgs(cl_mem x
): mCLMem(x
), mConst(0.0) {}
1440 SumIfsArgs(double x
): mCLMem(NULL
), mConst(x
) {}
1445 /// Helper functions that have multiple buffers
1446 class DynamicKernelSoPArguments
: public DynamicKernelArgument
1449 typedef boost::shared_ptr
<DynamicKernelArgument
> SubArgument
;
1450 typedef std::vector
<SubArgument
> SubArgumentsType
;
1452 DynamicKernelSoPArguments(
1453 const std::string
&s
, const FormulaTreeNodeRef
& ft
, SlidingFunctionBase
* pCodeGen
);
1455 /// Create buffer and pass the buffer to a given kernel
1456 virtual size_t Marshal(cl_kernel k
, int argno
, int nVectorWidth
, cl_program pProgram
)
1459 for (SubArgumentsType::iterator it
= mvSubArguments
.begin(), e
= mvSubArguments
.end(); it
!=e
;
1462 i
+= (*it
)->Marshal(k
, argno
+ i
, nVectorWidth
, pProgram
);
1464 if (OpSumIfs
*OpSumCodeGen
= dynamic_cast<OpSumIfs
*>(mpCodeGen
.get()))
1466 // Obtain cl context
1468 OpenclDevice::setKernelEnv(&kEnv
);
1470 DynamicKernelArgument
*Arg
= mvSubArguments
[0].get();
1471 DynamicKernelSlidingArgument
<VectorRef
> *slidingArgPtr
=
1472 dynamic_cast< DynamicKernelSlidingArgument
<VectorRef
> *> (Arg
);
1475 if (OpSumCodeGen
->NeedReductionKernel())
1477 assert(slidingArgPtr
);
1478 size_t nInput
= slidingArgPtr
-> GetArrayLength();
1479 size_t nCurWindowSize
= slidingArgPtr
-> GetWindowSize();
1480 std::vector
<SumIfsArgs
> vclmem
;
1482 for (SubArgumentsType::iterator it
= mvSubArguments
.begin(),
1483 e
= mvSubArguments
.end(); it
!=e
; ++it
)
1485 if (VectorRef
*VR
= dynamic_cast<VectorRef
*>(it
->get()))
1486 vclmem
.push_back(SumIfsArgs(VR
->GetCLBuffer()));
1487 else if (DynamicKernelConstantArgument
*CA
=
1489 DynamicKernelConstantArgument
*>(it
->get()))
1490 vclmem
.push_back(SumIfsArgs(CA
->GetDouble()));
1492 vclmem
.push_back(SumIfsArgs((cl_mem
)NULL
));
1494 mpClmem2
= clCreateBuffer(kEnv
.mpkContext
, CL_MEM_READ_WRITE
,
1495 sizeof(double)*nVectorWidth
, NULL
, &err
);
1496 if (CL_SUCCESS
!= err
)
1497 throw OpenCLError(err
);
1499 std::string kernelName
= mvSubArguments
[0]->GetName() + "_SumIfs_reduction";
1500 cl_kernel redKernel
= clCreateKernel(pProgram
, kernelName
.c_str(), &err
);
1501 if (err
!= CL_SUCCESS
)
1502 throw OpenCLError(err
);
1504 // set kernel arg of reduction kernel
1505 for (size_t j
=0; j
< vclmem
.size(); j
++){
1506 err
= clSetKernelArg(redKernel
, j
,
1507 vclmem
[j
].mCLMem
?sizeof(cl_mem
):sizeof(double),
1508 vclmem
[j
].mCLMem
?(void *)&vclmem
[j
].mCLMem
:
1509 (void*)&vclmem
[j
].mConst
);
1510 if (CL_SUCCESS
!= err
)
1511 throw OpenCLError(err
);
1513 err
= clSetKernelArg(redKernel
, vclmem
.size(), sizeof(cl_mem
), (void *)&mpClmem2
);
1514 if (CL_SUCCESS
!= err
)
1515 throw OpenCLError(err
);
1517 err
= clSetKernelArg(redKernel
, vclmem
.size()+1, sizeof(cl_int
), (void*)&nInput
);
1518 if (CL_SUCCESS
!= err
)
1519 throw OpenCLError(err
);
1521 err
= clSetKernelArg(redKernel
, vclmem
.size()+2, sizeof(cl_int
), (void*)&nCurWindowSize
);
1522 if (CL_SUCCESS
!= err
)
1523 throw OpenCLError(err
);
1524 // set work group size and execute
1525 size_t global_work_size
[] = {256, (size_t)nVectorWidth
};
1526 size_t local_work_size
[] = {256, 1};
1527 err
= clEnqueueNDRangeKernel(kEnv
.mpkCmdQueue
, redKernel
, 2, NULL
,
1528 global_work_size
, local_work_size
, 0, NULL
, NULL
);
1529 if (CL_SUCCESS
!= err
)
1530 throw OpenCLError(err
);
1531 err
= clFinish(kEnv
.mpkCmdQueue
);
1532 if (CL_SUCCESS
!= err
)
1533 throw OpenCLError(err
);
1534 clReleaseKernel(redKernel
);
1535 // Pass mpClmem2 to the "real" kernel
1536 err
= clSetKernelArg(k
, argno
, sizeof(cl_mem
), (void *)&mpClmem2
);
1537 if (CL_SUCCESS
!= err
)
1538 throw OpenCLError(err
);
1544 virtual void GenSlidingWindowFunction(std::stringstream
&ss
) {
1545 for (unsigned i
= 0; i
< mvSubArguments
.size(); i
++)
1546 mvSubArguments
[i
]->GenSlidingWindowFunction(ss
);
1547 mpCodeGen
->GenSlidingWindowFunction(ss
, mSymName
, mvSubArguments
);
1549 virtual void GenDeclRef(std::stringstream
&ss
) const
1551 for (unsigned i
= 0; i
< mvSubArguments
.size(); i
++)
1555 mvSubArguments
[i
]->GenDeclRef(ss
);
1558 virtual void GenDecl(std::stringstream
&ss
) const
1560 for (SubArgumentsType::const_iterator it
= mvSubArguments
.begin(), e
= mvSubArguments
.end(); it
!=e
;
1562 if (it
!= mvSubArguments
.begin())
1568 virtual size_t GetWindowSize(void) const
1570 size_t nCurWindowSize
= 0;
1571 for (unsigned i
= 0; i
< mvSubArguments
.size(); i
++)
1573 size_t nCurChildWindowSize
= mvSubArguments
[i
]->GetWindowSize();
1574 nCurWindowSize
= (nCurWindowSize
< nCurChildWindowSize
) ?
1575 nCurChildWindowSize
:nCurWindowSize
;
1577 return nCurWindowSize
;
1580 /// When declared as input to a sliding window function
1581 virtual void GenSlidingWindowDecl(std::stringstream
&ss
) const
1583 for (SubArgumentsType::const_iterator it
= mvSubArguments
.begin(), e
= mvSubArguments
.end(); it
!=e
;
1586 if (it
!= mvSubArguments
.begin())
1588 (*it
)->GenSlidingWindowDecl(ss
);
1591 /// Generate either a function call to each children
1592 /// or direclty inline it if we are already inside a loop
1593 virtual std::string
GenSlidingWindowDeclRef(bool nested
=false) const
1595 std::stringstream ss
;
1598 ss
<< mSymName
<< "_" << mpCodeGen
->BinFuncName() <<"(";
1599 for (unsigned i
= 0; i
< mvSubArguments
.size(); i
++)
1604 mvSubArguments
[i
]->GenDeclRef(ss
);
1606 ss
<< mvSubArguments
[i
]->GenSlidingWindowDeclRef(true);
1610 if (mvSubArguments
.size() != 2)
1612 ss
<< "(" << mpCodeGen
->Gen2(mvSubArguments
[0]->GenSlidingWindowDeclRef(true),
1613 mvSubArguments
[1]->GenSlidingWindowDeclRef(true)) << ")";
1617 virtual std::string
DumpOpName(void) const
1619 std::string t
= "_" + mpCodeGen
->BinFuncName();
1620 for (unsigned i
= 0; i
< mvSubArguments
.size(); i
++)
1621 t
= t
+ mvSubArguments
[i
]->DumpOpName();
1624 virtual void DumpInlineFun(std::set
<std::string
>& decls
,
1625 std::set
<std::string
>& funs
) const
1627 mpCodeGen
->BinInlineFun(decls
,funs
);
1628 for (unsigned i
= 0; i
< mvSubArguments
.size(); i
++)
1629 mvSubArguments
[i
]->DumpInlineFun(decls
,funs
);
1631 ~DynamicKernelSoPArguments()
1635 clReleaseMemObject(mpClmem2
);
1640 SubArgumentsType mvSubArguments
;
1641 boost::shared_ptr
<SlidingFunctionBase
> mpCodeGen
;
1645 boost::shared_ptr
<DynamicKernelArgument
> SoPHelper(
1646 const std::string
&ts
, const FormulaTreeNodeRef
& ft
, SlidingFunctionBase
* pCodeGen
)
1648 return boost::shared_ptr
<DynamicKernelArgument
>(new DynamicKernelSoPArguments(ts
, ft
, pCodeGen
));
1651 template<class Base
>
1652 DynamicKernelArgument
*VectorRefFactory(const std::string
&s
,
1653 const FormulaTreeNodeRef
& ft
,
1654 boost::shared_ptr
<SlidingFunctionBase
> &pCodeGen
,
1657 //Black lists ineligible classes here ..
1658 // SUMIFS does not perform parallel reduction at DoubleVectorRef level
1659 if (dynamic_cast<OpSumIfs
*>(pCodeGen
.get())) {
1660 if (index
== 0) // the first argument of OpSumIfs cannot be strings anyway
1661 return new DynamicKernelSlidingArgument
<VectorRef
>(s
, ft
, pCodeGen
, index
);
1662 return new DynamicKernelSlidingArgument
<Base
>(s
, ft
, pCodeGen
, index
);
1664 // AVERAGE is not supported yet
1665 else if (dynamic_cast<OpAverage
*>(pCodeGen
.get()))
1667 return new DynamicKernelSlidingArgument
<Base
>(s
, ft
, pCodeGen
, index
);
1669 // MUL is not supported yet
1670 else if (dynamic_cast<OpMul
*>(pCodeGen
.get()))
1672 return new DynamicKernelSlidingArgument
<Base
>(s
, ft
, pCodeGen
, index
);
1674 // Sub is not a reduction per se
1675 else if (dynamic_cast<OpSub
*>(pCodeGen
.get()))
1677 return new DynamicKernelSlidingArgument
<Base
>(s
, ft
, pCodeGen
, index
);
1679 // Only child class of Reduction is supported
1680 else if (!dynamic_cast<Reduction
*>(pCodeGen
.get()))
1682 return new DynamicKernelSlidingArgument
<Base
>(s
, ft
, pCodeGen
, index
);
1685 const formula::DoubleVectorRefToken
* pDVR
=
1686 dynamic_cast< const formula::DoubleVectorRefToken
* >(
1687 ft
->GetFormulaToken());
1688 // Window being too small to justify a parallel reduction
1689 if (pDVR
->GetRefRowSize() < REDUCE_THRESHOLD
)
1690 return new DynamicKernelSlidingArgument
<Base
>(s
, ft
, pCodeGen
, index
);
1691 if ((pDVR
->IsStartFixed() && pDVR
->IsEndFixed()) ||
1692 (!pDVR
->IsStartFixed() && !pDVR
->IsEndFixed()))
1693 return new ParallelReductionVectorRef
<Base
>(s
, ft
, pCodeGen
, index
);
1694 else // Other cases are not supported as well
1695 return new DynamicKernelSlidingArgument
<Base
>(s
, ft
, pCodeGen
, index
);
1698 DynamicKernelSoPArguments::DynamicKernelSoPArguments(
1699 const std::string
&s
, const FormulaTreeNodeRef
& ft
, SlidingFunctionBase
* pCodeGen
) :
1700 DynamicKernelArgument(s
, ft
), mpCodeGen(pCodeGen
), mpClmem2(NULL
)
1702 size_t nChildren
= ft
->Children
.size();
1704 for (unsigned i
= 0; i
< nChildren
; i
++)
1706 FormulaToken
*pChild
= ft
->Children
[i
]->GetFormulaToken();
1709 OpCode opc
= pChild
->GetOpCode();
1710 std::stringstream tmpname
;
1711 tmpname
<< s
<< "_" << i
;
1712 std::string ts
= tmpname
.str();
1715 if (pChild
->GetType() == formula::svDoubleVectorRef
)
1717 const formula::DoubleVectorRefToken
* pDVR
=
1718 dynamic_cast< const formula::DoubleVectorRefToken
* >(pChild
);
1720 for (size_t j
= 0; j
< pDVR
->GetArrays().size(); ++j
)
1722 if (pDVR
->GetArrays()[j
].mpNumericArray
||
1723 (pDVR
->GetArrays()[j
].mpNumericArray
== NULL
&&
1724 pDVR
->GetArrays()[j
].mpStringArray
== NULL
))
1725 mvSubArguments
.push_back(
1726 SubArgument(VectorRefFactory
<VectorRef
>(
1727 ts
, ft
->Children
[i
], mpCodeGen
, j
)));
1729 mvSubArguments
.push_back(
1730 SubArgument(VectorRefFactory
1731 <DynamicKernelStringArgument
>(
1732 ts
, ft
->Children
[i
], mpCodeGen
, j
)));
1734 } else if (pChild
->GetType() == formula::svSingleVectorRef
) {
1735 const formula::SingleVectorRefToken
* pSVR
=
1736 dynamic_cast< const formula::SingleVectorRefToken
* >(pChild
);
1738 if (pSVR
->GetArray().mpNumericArray
&&
1739 pCodeGen
->takeNumeric() &&
1740 pSVR
->GetArray().mpStringArray
&&
1741 pCodeGen
->takeString())
1743 mvSubArguments
.push_back(
1744 SubArgument(new DynamicKernelMixedArgument(
1745 ts
, ft
->Children
[i
])));
1747 else if (pSVR
->GetArray().mpNumericArray
&&
1748 pCodeGen
->takeNumeric())
1750 mvSubArguments
.push_back(
1751 SubArgument(new VectorRef(ts
,
1754 else if (pSVR
->GetArray().mpStringArray
&&
1755 pCodeGen
->takeString())
1757 mvSubArguments
.push_back(
1758 SubArgument(new DynamicKernelStringArgument(
1759 ts
, ft
->Children
[i
])));
1761 else if (pSVR
->GetArray().mpStringArray
== NULL
&&
1762 pSVR
->GetArray().mpNumericArray
== NULL
)
1764 // Push as an array of NANs
1765 mvSubArguments
.push_back(
1766 SubArgument(new VectorRef(ts
,
1770 throw UnhandledToken(pChild
,
1771 "Got unhandled case here", __FILE__
, __LINE__
);
1772 } else if (pChild
->GetType() == formula::svDouble
) {
1773 mvSubArguments
.push_back(
1774 SubArgument(new DynamicKernelConstantArgument(ts
,
1776 } else if (pChild
->GetType() == formula::svString
) {
1777 mvSubArguments
.push_back(
1778 SubArgument(new ConstStringArgument(ts
,
1781 throw UnhandledToken(pChild
, "unknown operand for ocPush");
1785 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpDiv
));
1788 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpMul
));
1791 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpSub
));
1795 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpSum
));
1798 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpAverage
));
1801 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpMin
));
1804 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpMax
));
1807 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpCount
));
1810 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpSumProduct
));
1813 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpIRR
));
1816 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpMIRR
));
1819 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpPMT
));
1822 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpIntrate
));
1825 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpRRI
));
1828 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpPPMT
));
1831 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpFisher
));
1834 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpFisherInv
));
1837 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpGamma
));
1840 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpSLN
));
1843 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpGammaLn
));
1846 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpGauss
));
1849 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpGeoMean
));
1852 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpHarMean
));
1855 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpLessEqual
));
1858 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpEqual
));
1861 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpGreater
));
1864 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpSYD
));
1867 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpCorrel
));
1870 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpCos
));
1872 case ocNegBinomVert
:
1873 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpNegbinomdist
));
1876 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpPearson
));
1879 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpRsq
));
1882 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpCsc
));
1885 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpISPMT
));
1888 mvSubArguments
.push_back(SoPHelper(ts
,
1889 ft
->Children
[i
], new OpDuration
));
1892 mvSubArguments
.push_back(SoPHelper(ts
,
1893 ft
->Children
[i
],new OpSinh
));
1896 mvSubArguments
.push_back(SoPHelper(ts
,
1897 ft
->Children
[i
], new OpAbs
));
1900 mvSubArguments
.push_back(SoPHelper(ts
,
1901 ft
->Children
[i
], new OpPV
));
1904 mvSubArguments
.push_back(SoPHelper(ts
,
1905 ft
->Children
[i
], new OpSin
));
1908 mvSubArguments
.push_back(SoPHelper(ts
,
1909 ft
->Children
[i
], new OpTan
));
1912 mvSubArguments
.push_back(SoPHelper(ts
,
1913 ft
->Children
[i
], new OpTanH
));
1916 mvSubArguments
.push_back(SoPHelper(ts
,
1917 ft
->Children
[i
], new OpStandard
));
1920 mvSubArguments
.push_back(SoPHelper(ts
,
1921 ft
->Children
[i
], new OpWeibull
));
1924 mvSubArguments
.push_back(SoPHelper(ts
,
1925 ft
->Children
[i
],new OpMedian
));
1928 mvSubArguments
.push_back(SoPHelper(ts
,
1929 ft
->Children
[i
],new OpDDB
));
1932 mvSubArguments
.push_back(SoPHelper(ts
,
1933 ft
->Children
[i
],new OpFV
));
1936 mvSubArguments
.push_back(SoPHelper(ts
,
1937 ft
->Children
[i
],new OpSumIfs
));
1940 mvSubArguments
.push_back(SoPHelper(ts
,
1941 ft
->Children
[i
],new OpVDB
));
1944 mvSubArguments
.push_back(SoPHelper(ts
,
1945 ft
->Children
[i
], new OpKurt
));
1948 mvSubArguments
.push_back(SoPHelper(ts
,
1949 ft
->Children
[i
], new OpNper
));
1952 mvSubArguments
.push_back(SoPHelper(ts
,
1953 ft
->Children
[i
],new OpNormdist
));
1956 mvSubArguments
.push_back(SoPHelper(ts
,
1957 ft
->Children
[i
], new OpArcCos
));
1960 mvSubArguments
.push_back(SoPHelper(ts
,
1961 ft
->Children
[i
],new OpSqrt
));
1964 mvSubArguments
.push_back(SoPHelper(ts
,
1965 ft
->Children
[i
], new OpArcCosHyp
));
1968 mvSubArguments
.push_back(SoPHelper(ts
,
1969 ft
->Children
[i
], new OpNPV
));
1972 mvSubArguments
.push_back(SoPHelper(ts
,
1973 ft
->Children
[i
],new OpNormsdist
));
1976 mvSubArguments
.push_back(SoPHelper(ts
,
1977 ft
->Children
[i
],new OpNorminv
));
1980 mvSubArguments
.push_back(SoPHelper(ts
,
1981 ft
->Children
[i
],new OpNormsinv
));
1984 mvSubArguments
.push_back(SoPHelper(ts
,
1985 ft
->Children
[i
],new OpVariationen
));
1987 case ocVariationen2
:
1988 mvSubArguments
.push_back(SoPHelper(ts
,
1989 ft
->Children
[i
],new OpVariationen2
));
1992 mvSubArguments
.push_back(SoPHelper(ts
,
1993 ft
->Children
[i
],new OpPhi
));
1996 mvSubArguments
.push_back(SoPHelper(ts
,
1997 ft
->Children
[i
],new OpIPMT
));
2000 mvSubArguments
.push_back(SoPHelper(ts
,
2001 ft
->Children
[i
], new OpConfidence
));
2004 mvSubArguments
.push_back(SoPHelper(ts
,
2005 ft
->Children
[i
], new OpIntercept
));
2008 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
],
2012 mvSubArguments
.push_back(SoPHelper(ts
,
2013 ft
->Children
[i
], new OpLogInv
));
2016 mvSubArguments
.push_back(SoPHelper(ts
,
2017 ft
->Children
[i
], new OpArcCot
));
2020 mvSubArguments
.push_back(SoPHelper(ts
,
2021 ft
->Children
[i
], new OpCosh
));
2024 mvSubArguments
.push_back(SoPHelper(ts
,
2025 ft
->Children
[i
], new OpCritBinom
));
2028 mvSubArguments
.push_back(SoPHelper(ts
,
2029 ft
->Children
[i
], new OpArcCotHyp
));
2032 mvSubArguments
.push_back(SoPHelper(ts
,
2033 ft
->Children
[i
], new OpArcSin
));
2036 mvSubArguments
.push_back(SoPHelper(ts
,
2037 ft
->Children
[i
], new OpArcSinHyp
));
2040 mvSubArguments
.push_back(SoPHelper(ts
,
2041 ft
->Children
[i
], new OpArcTan
));
2044 mvSubArguments
.push_back(SoPHelper(ts
,
2045 ft
->Children
[i
], new OpArcTanH
));
2048 mvSubArguments
.push_back(SoPHelper(ts
,
2049 ft
->Children
[i
], new OpBitAnd
));
2052 mvSubArguments
.push_back(SoPHelper(ts
,
2053 ft
->Children
[i
], new OpForecast
));
2056 mvSubArguments
.push_back(SoPHelper(ts
,
2057 ft
->Children
[i
], new OpLogNormDist
));
2060 mvSubArguments
.push_back(SoPHelper(ts
,
2061 ft
->Children
[i
], new OpGammaDist
));
2064 mvSubArguments
.push_back(SoPHelper(ts
,
2065 ft
->Children
[i
],new OpLn
));
2068 mvSubArguments
.push_back(SoPHelper(ts
,
2069 ft
->Children
[i
],new OpRound
));
2072 mvSubArguments
.push_back(SoPHelper(ts
,
2073 ft
->Children
[i
], new OpCot
));
2076 mvSubArguments
.push_back(SoPHelper(ts
,
2077 ft
->Children
[i
], new OpCoth
));
2080 mvSubArguments
.push_back(SoPHelper(ts
,
2081 ft
->Children
[i
], new OpFdist
));
2084 mvSubArguments
.push_back(SoPHelper(ts
,
2085 ft
->Children
[i
], new OpVar
));
2088 mvSubArguments
.push_back(SoPHelper(ts
,
2089 ft
->Children
[i
],new OpChiDist
));
2092 mvSubArguments
.push_back(SoPHelper(ts
,
2093 ft
->Children
[i
], new OpPower
));
2096 mvSubArguments
.push_back(SoPHelper(ts
,
2097 ft
->Children
[i
], new OpOdd
));
2100 mvSubArguments
.push_back(SoPHelper(ts
,
2101 ft
->Children
[i
],new OpChiSqDist
));
2104 mvSubArguments
.push_back(SoPHelper(ts
,
2105 ft
->Children
[i
],new OpChiSqInv
));
2108 mvSubArguments
.push_back(SoPHelper(ts
,
2109 ft
->Children
[i
], new OpGammaInv
));
2112 mvSubArguments
.push_back(SoPHelper(ts
,
2113 ft
->Children
[i
], new OpFloor
));
2116 mvSubArguments
.push_back(SoPHelper(ts
,
2117 ft
->Children
[i
], new OpFInv
));
2120 mvSubArguments
.push_back(SoPHelper(ts
,
2121 ft
->Children
[i
], new OpFTest
));
2124 mvSubArguments
.push_back(SoPHelper(ts
,
2125 ft
->Children
[i
], new OpB
));
2128 mvSubArguments
.push_back(SoPHelper(ts
,
2129 ft
->Children
[i
], new OpBetaDist
));
2132 mvSubArguments
.push_back(SoPHelper(ts
,
2133 ft
->Children
[i
], new OpCscH
));
2136 mvSubArguments
.push_back(SoPHelper(ts
,
2137 ft
->Children
[i
], new OpExp
));
2140 mvSubArguments
.push_back(SoPHelper(ts
,
2141 ft
->Children
[i
], new OpLog10
));
2144 mvSubArguments
.push_back(SoPHelper(ts
,
2145 ft
->Children
[i
], new OpExponDist
));
2148 mvSubArguments
.push_back(SoPHelper(ts
,
2149 ft
->Children
[i
],new OpAverageIfs
));
2152 mvSubArguments
.push_back(SoPHelper(ts
,
2153 ft
->Children
[i
],new OpCountIfs
));
2156 mvSubArguments
.push_back(SoPHelper(ts
,
2157 ft
->Children
[i
], new OpCombina
));
2160 mvSubArguments
.push_back(SoPHelper(ts
,
2161 ft
->Children
[i
], new OpEven
));
2164 mvSubArguments
.push_back(SoPHelper(ts
,
2165 ft
->Children
[i
], new OpLog
));
2168 mvSubArguments
.push_back(SoPHelper(ts
,
2169 ft
->Children
[i
], new OpMod
));
2172 mvSubArguments
.push_back(SoPHelper(ts
,
2173 ft
->Children
[i
], new OpTrunc
));
2176 mvSubArguments
.push_back(SoPHelper(ts
,
2177 ft
->Children
[i
], new OpSkew
));
2180 mvSubArguments
.push_back(SoPHelper(ts
,
2181 ft
->Children
[i
], new OpArcTan2
));
2184 mvSubArguments
.push_back(SoPHelper(ts
,
2185 ft
->Children
[i
], new OpBitOr
));
2188 mvSubArguments
.push_back(SoPHelper(ts
,
2189 ft
->Children
[i
], new OpBitLshift
));
2192 mvSubArguments
.push_back(SoPHelper(ts
,
2193 ft
->Children
[i
], new OpBitRshift
));
2196 mvSubArguments
.push_back(SoPHelper(ts
,
2197 ft
->Children
[i
], new OpBitXor
));
2200 mvSubArguments
.push_back(SoPHelper(ts
,
2201 ft
->Children
[i
],new OpChiInv
));
2204 mvSubArguments
.push_back(SoPHelper(ts
,
2205 ft
->Children
[i
],new OpPoisson
));
2208 mvSubArguments
.push_back(SoPHelper(ts
,
2209 ft
->Children
[i
], new OpSumSQ
));
2212 mvSubArguments
.push_back(SoPHelper(ts
,
2213 ft
->Children
[i
], new OpSkewp
));
2216 mvSubArguments
.push_back(SoPHelper(ts
,
2217 ft
->Children
[i
],new OpBinomdist
));
2220 mvSubArguments
.push_back(SoPHelper(ts
,
2221 ft
->Children
[i
], new OpVarP
));
2224 mvSubArguments
.push_back(SoPHelper(ts
,
2225 ft
->Children
[i
], new OpCeil
));
2228 mvSubArguments
.push_back(SoPHelper(ts
,
2229 ft
->Children
[i
], new OpKombin
));
2232 mvSubArguments
.push_back(SoPHelper(ts
,
2233 ft
->Children
[i
], new OpDevSq
));
2236 mvSubArguments
.push_back(SoPHelper(ts
,
2237 ft
->Children
[i
], new OpStDev
));
2240 mvSubArguments
.push_back(SoPHelper(ts
,
2241 ft
->Children
[i
], new OpSlope
));
2244 mvSubArguments
.push_back(SoPHelper(ts
,
2245 ft
->Children
[i
], new OpSTEYX
));
2248 mvSubArguments
.push_back(SoPHelper(ts
,
2249 ft
->Children
[i
], new OpZTest
));
2252 mvSubArguments
.push_back(
2253 SubArgument(new DynamicKernelPiArgument(ts
,
2257 mvSubArguments
.push_back(
2258 SubArgument(new DynamicKernelRandomArgument(ts
,
2262 mvSubArguments
.push_back(SoPHelper(ts
,
2263 ft
->Children
[i
], new OpProduct
));
2266 mvSubArguments
.push_back(SoPHelper(ts
,
2267 ft
->Children
[i
],new OpHypGeomDist
));
2270 mvSubArguments
.push_back(SoPHelper(ts
,
2271 ft
->Children
[i
],new OpSumX2MY2
));
2274 mvSubArguments
.push_back(SoPHelper(ts
,
2275 ft
->Children
[i
],new OpSumX2PY2
));
2278 mvSubArguments
.push_back(SoPHelper(ts
,
2279 ft
->Children
[i
],new OpBetainv
));
2282 mvSubArguments
.push_back(SoPHelper(ts
,
2283 ft
->Children
[i
], new OpTTest
));
2286 mvSubArguments
.push_back(SoPHelper(ts
,
2287 ft
->Children
[i
], new OpTDist
));
2290 mvSubArguments
.push_back(SoPHelper(ts
,
2291 ft
->Children
[i
], new OpTInv
));
2294 mvSubArguments
.push_back(SoPHelper(ts
,
2295 ft
->Children
[i
],new OpSumXMY2
));
2298 mvSubArguments
.push_back(SoPHelper(ts
,
2299 ft
->Children
[i
], new OpStDevP
));
2302 mvSubArguments
.push_back(SoPHelper(ts
,
2303 ft
->Children
[i
], new OpCovar
));
2306 mvSubArguments
.push_back(SoPHelper(ts
,
2307 ft
->Children
[i
], new OpAnd
));
2310 if ( !(pChild
->GetExternal().compareTo(OUString(
2311 "com.sun.star.sheet.addin.Analysis.getEffect"))))
2313 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpEffective
));
2315 else if ( !(pChild
->GetExternal().compareTo(OUString(
2316 "com.sun.star.sheet.addin.Analysis.getCumipmt"))))
2318 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpCumipmt
));
2320 else if ( !(pChild
->GetExternal().compareTo(OUString(
2321 "com.sun.star.sheet.addin.Analysis.getNominal"))))
2323 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpNominal
));
2325 else if ( !(pChild
->GetExternal().compareTo(OUString(
2326 "com.sun.star.sheet.addin.Analysis.getCumprinc"))))
2328 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpCumprinc
));
2330 else if ( !(pChild
->GetExternal().compareTo(OUString(
2331 "com.sun.star.sheet.addin.Analysis.getXnpv"))))
2333 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpXNPV
));
2335 else if ( !(pChild
->GetExternal().compareTo(OUString(
2336 "com.sun.star.sheet.addin.Analysis.getPricemat"))))
2338 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpPriceMat
));
2340 else if ( !(pChild
->GetExternal().compareTo(OUString(
2341 "com.sun.star.sheet.addin.Analysis.getReceived"))))
2343 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpReceived
));
2345 else if( !(pChild
->GetExternal().compareTo(OUString(
2346 "com.sun.star.sheet.addin.Analysis.getTbilleq"))))
2348 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpTbilleq
));
2350 else if( !(pChild
->GetExternal().compareTo(OUString(
2351 "com.sun.star.sheet.addin.Analysis.getTbillprice"))))
2353 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpTbillprice
));
2355 else if( !(pChild
->GetExternal().compareTo(OUString(
2356 "com.sun.star.sheet.addin.Analysis.getTbillyield"))))
2358 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpTbillyield
));
2360 else if (!(pChild
->GetExternal().compareTo(OUString(
2361 "com.sun.star.sheet.addin.Analysis.getFvschedule"))))
2363 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpFvschedule
));
2365 else if ( !(pChild
->GetExternal().compareTo(OUString(
2366 "com.sun.star.sheet.addin.Analysis.getYield"))))
2368 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpYield
));
2370 else if ( !(pChild
->GetExternal().compareTo(OUString(
2371 "com.sun.star.sheet.addin.Analysis.getYielddisc"))))
2373 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpYielddisc
));
2375 else if ( !(pChild
->GetExternal().compareTo(OUString(
2376 "com.sun.star.sheet.addin.Analysis.getYieldmat"))))
2378 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpYieldmat
));
2380 else if ( !(pChild
->GetExternal().compareTo(OUString(
2381 "com.sun.star.sheet.addin.Analysis.getAccrintm"))))
2383 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpAccrintm
));
2385 else if ( !(pChild
->GetExternal().compareTo(OUString(
2386 "com.sun.star.sheet.addin.Analysis.getCoupdaybs"))))
2388 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpCoupdaybs
));
2390 else if ( !(pChild
->GetExternal().compareTo(OUString(
2391 "com.sun.star.sheet.addin.Analysis.getDollarde"))))
2393 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpDollarde
));
2395 else if ( !(pChild
->GetExternal().compareTo(OUString(
2396 "com.sun.star.sheet.addin.Analysis.getDollarfr"))))
2398 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpDollarfr
));
2400 else if ( !(pChild
->GetExternal().compareTo(OUString(
2401 "com.sun.star.sheet.addin.Analysis.getCoupdays"))))
2403 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpCoupdays
));
2405 else if ( !(pChild
->GetExternal().compareTo(OUString(
2406 "com.sun.star.sheet.addin.Analysis.getCoupdaysnc"))))
2408 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpCoupdaysnc
));
2410 else if ( !(pChild
->GetExternal().compareTo(OUString(
2411 "com.sun.star.sheet.addin.Analysis.getDisc"))))
2413 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpDISC
));
2415 else if ( !(pChild
->GetExternal().compareTo(OUString(
2416 "com.sun.star.sheet.addin.Analysis.getIntrate"))))
2418 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
], new OpINTRATE
));
2420 else if ( !(pChild
->GetExternal().compareTo(OUString(
2421 "com.sun.star.sheet.addin.Analysis.getPrice"))))
2423 mvSubArguments
.push_back(SoPHelper(ts
,
2424 ft
->Children
[i
], new OpPrice
));
2426 else if ( !(pChild
->GetExternal().compareTo(OUString(
2427 "com.sun.star.sheet.addin.Analysis.getCoupnum"))))
2429 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
],
2432 else if ( !(pChild
->GetExternal().compareTo(OUString(
2433 "com.sun.star.sheet.addin.Analysis.getDuration"))))
2435 mvSubArguments
.push_back(
2436 SoPHelper(ts
, ft
->Children
[i
], new OpDuration_ADD
));
2438 else if ( !(pChild
->GetExternal().compareTo(OUString(
2439 "com.sun.star.sheet.addin.Analysis.getAmordegrc"))))
2441 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
],
2444 else if ( !(pChild
->GetExternal().compareTo(OUString(
2445 "com.sun.star.sheet.addin.Analysis.getAmorlinc"))))
2447 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
],
2450 else if ( !(pChild
->GetExternal().compareTo(OUString(
2451 "com.sun.star.sheet.addin.Analysis.getMduration"))))
2453 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
],
2456 else if ( !(pChild
->GetExternal().compareTo(OUString(
2457 "com.sun.star.sheet.addin.Analysis.getXirr"))))
2459 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
],
2462 else if ( !(pChild
->GetExternal().compareTo(OUString(
2463 "com.sun.star.sheet.addin.Analysis.getOddlprice"))))
2465 mvSubArguments
.push_back(SoPHelper(ts
,
2466 ft
->Children
[i
], new OpOddlprice
));
2468 else if ( !(pChild
->GetExternal().compareTo(OUString(
2469 "com.sun.star.sheet.addin.Analysis.getOddlyield"))))
2471 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
],
2474 else if ( !(pChild
->GetExternal().compareTo(OUString(
2475 "com.sun.star.sheet.addin.Analysis.getPricedisc"))))
2477 mvSubArguments
.push_back(SoPHelper(ts
,
2478 ft
->Children
[i
], new OpPriceDisc
));
2480 else if ( !(pChild
->GetExternal().compareTo(OUString(
2481 "com.sun.star.sheet.addin.Analysis.getCouppcd"))))
2483 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
],
2486 else if ( !(pChild
->GetExternal().compareTo(OUString(
2487 "com.sun.star.sheet.addin.Analysis.getCoupncd"))))
2489 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
],
2492 else if ( !(pChild
->GetExternal().compareTo(OUString(
2493 "com.sun.star.sheet.addin.Analysis.getAccrint"))))
2495 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
],
2498 else if ( !(pChild
->GetExternal().compareTo(OUString(
2499 "com.sun.star.sheet.addin.Analysis.getSqrtpi"))))
2501 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
],
2504 else if ( !(pChild
->GetExternal().compareTo(OUString(
2505 "com.sun.star.sheet.addin.Analysis.getConvert"))))
2507 mvSubArguments
.push_back(SoPHelper(ts
, ft
->Children
[i
],
2512 throw UnhandledToken(pChild
, "unhandled opcode");
2517 /// Holds the symbol table for a given dynamic kernel
2520 typedef std::map
<const formula::FormulaToken
*,
2521 boost::shared_ptr
<DynamicKernelArgument
> > ArgumentMap
;
2522 // This avoids instability caused by using pointer as the key type
2523 typedef std::list
< boost::shared_ptr
<DynamicKernelArgument
> > ArgumentList
;
2524 SymbolTable(void):mCurId(0) {}
2526 const DynamicKernelArgument
*DeclRefArg(FormulaTreeNodeRef
, SlidingFunctionBase
* pCodeGen
);
2527 /// Used to generate sliding window helpers
2528 void DumpSlidingWindowFunctions(std::stringstream
&ss
)
2530 for(ArgumentList::iterator it
= mParams
.begin(), e
= mParams
.end(); it
!=e
;
2532 (*it
)->GenSlidingWindowFunction(ss
);
2536 /// Memory mapping from host to device and pass buffers to the given kernel as
2538 void Marshal(cl_kernel
, int, cl_program
);
2540 unsigned int mCurId
;
2541 ArgumentMap mSymbols
;
2542 ArgumentList mParams
;
2545 void SymbolTable::Marshal(cl_kernel k
, int nVectorWidth
, cl_program pProgram
)
2547 int i
= 1; //The first argument is reserved for results
2548 for(ArgumentList::iterator it
= mParams
.begin(), e
= mParams
.end(); it
!=e
;
2550 i
+=(*it
)->Marshal(k
, i
, nVectorWidth
, pProgram
);
2554 class DynamicKernel
: public CompiledFormula
2557 DynamicKernel(FormulaTreeNodeRef r
):mpRoot(r
),
2558 mpProgram(NULL
), mpKernel(NULL
), mpResClmem(NULL
), mpCode(NULL
) {}
2559 static DynamicKernel
*create(ScDocument
& rDoc
,
2560 const ScAddress
& rTopPos
,
2561 ScTokenArray
& rCode
);
2562 /// OpenCL code generation
2564 // Travese the tree of expression and declare symbols used
2565 const DynamicKernelArgument
*DK
= mSyms
.DeclRefArg
<
2566 DynamicKernelSoPArguments
>(mpRoot
, new OpNop
);
2568 std::stringstream decl
;
2569 if (OpenclDevice::gpuEnv
.mnKhrFp64Flag
) {
2570 decl
<< "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
2571 } else if (OpenclDevice::gpuEnv
.mnAmdFp64Flag
) {
2572 decl
<< "#pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
2576 DK
->DumpInlineFun(inlineDecl
,inlineFun
);
2577 for(std::set
<std::string
>::iterator set_iter
=inlineDecl
.begin();
2578 set_iter
!=inlineDecl
.end();++set_iter
)
2583 for(std::set
<std::string
>::iterator set_iter
=inlineFun
.begin();
2584 set_iter
!=inlineFun
.end();++set_iter
)
2588 mSyms
.DumpSlidingWindowFunctions(decl
);
2589 mKernelSignature
= DK
->DumpOpName();
2590 decl
<< "__kernel void DynamicKernel" << mKernelSignature
;
2591 decl
<< "(__global double *result, ";
2592 DK
->GenSlidingWindowDecl(decl
);
2593 decl
<< ") {\n\tint gid0 = get_global_id(0);\n\tresult[gid0] = " <<
2594 DK
->GenSlidingWindowDeclRef(false) << ";\n}\n";
2595 mFullProgramSrc
= decl
.str();
2597 std::cerr
<< "Program to be compiled = \n" << mFullProgramSrc
<< "\n";
2600 /// Produce kernel hash
2601 std::string
GetMD5(void)
2604 if (mKernelHash
.empty()) {
2605 std::stringstream md5s
;
2606 // Compute MD5SUM of kernel body to obtain the name
2607 sal_uInt8 result
[RTL_DIGEST_LENGTH_MD5
];
2609 mFullProgramSrc
.c_str(),
2610 mFullProgramSrc
.length(), result
,
2611 RTL_DIGEST_LENGTH_MD5
);
2612 for(int i
=0; i
< RTL_DIGEST_LENGTH_MD5
; i
++) {
2613 md5s
<< std::hex
<< (int)result
[i
];
2615 mKernelHash
= md5s
.str();
2622 /// Create program, build, and create kerenl
2623 /// TODO cache results based on kernel body hash
2624 /// TODO: abstract OpenCL part out into OpenCL wrapper.
2625 void CreateKernel(void);
2626 /// Prepare buffers, marshal them to GPU, and launch the kernel
2627 /// TODO: abstract OpenCL part out into OpenCL wrapper.
2628 void Launch(size_t nr
)
2630 // Obtain cl context
2632 OpenclDevice::setKernelEnv(&kEnv
);
2635 mpResClmem
= clCreateBuffer(kEnv
.mpkContext
,
2636 (cl_mem_flags
) CL_MEM_READ_WRITE
,
2637 nr
*sizeof(double), NULL
, &err
);
2638 if (CL_SUCCESS
!= err
)
2639 throw OpenCLError(err
);
2640 err
= clSetKernelArg(mpKernel
, 0, sizeof(cl_mem
), (void*)&mpResClmem
);
2641 if (CL_SUCCESS
!= err
)
2642 throw OpenCLError(err
);
2643 // The rest of buffers
2644 mSyms
.Marshal(mpKernel
, nr
, mpProgram
);
2645 size_t global_work_size
[] = {nr
};
2646 err
= clEnqueueNDRangeKernel(kEnv
.mpkCmdQueue
, mpKernel
, 1, NULL
,
2647 global_work_size
, NULL
, 0, NULL
, NULL
);
2648 if (CL_SUCCESS
!= err
)
2649 throw OpenCLError(err
);
2652 cl_mem
GetResultBuffer(void) const { return mpResClmem
; }
2653 void SetPCode(ScTokenArray
*pCode
) { mpCode
= pCode
; }
2656 void TraverseAST(FormulaTreeNodeRef
);
2657 FormulaTreeNodeRef mpRoot
;
2659 std::string mKernelSignature
, mKernelHash
;
2660 std::string mFullProgramSrc
;
2661 cl_program mpProgram
;
2663 cl_mem mpResClmem
; // Results
2664 std::set
<std::string
> inlineDecl
;
2665 std::set
<std::string
> inlineFun
;
2666 ScTokenArray
*mpCode
;
2669 DynamicKernel::~DynamicKernel()
2672 std::cerr
<<"Freeing kernel "<< GetMD5() << " result buffer\n";
2673 clReleaseMemObject(mpResClmem
);
2676 std::cerr
<<"Freeing kernel "<< GetMD5() << " kernel\n";
2677 clReleaseKernel(mpKernel
);
2679 // mpProgram is not going to be released here -- it's cached.
2684 void DynamicKernel::CreateKernel(void)
2687 std::string kname
= "DynamicKernel"+mKernelSignature
;
2688 // Compile kernel here!!!
2689 // Obtain cl context
2691 OpenclDevice::setKernelEnv(&kEnv
);
2692 const char *src
= mFullProgramSrc
.c_str();
2693 static std::string lastOneKernelHash
= "";
2694 static std::string lastSecondKernelHash
= "";
2695 static cl_program lastOneProgram
= NULL
;
2696 static cl_program lastSecondProgram
= NULL
;
2697 std::string KernelHash
= mKernelSignature
+GetMD5();
2698 if (lastOneKernelHash
== KernelHash
&& lastOneProgram
)
2700 std::cerr
<<"cl_program cache hit: "<< KernelHash
<< "\n";
2701 mpProgram
= lastOneProgram
;
2703 else if(lastSecondKernelHash
== KernelHash
&& lastSecondProgram
)
2705 std::cerr
<<"cl_program cache hit: "<< KernelHash
<< "\n";
2706 mpProgram
= lastSecondProgram
;
2709 { // doesn't match the last compiled formula.
2711 if (lastSecondProgram
) {
2712 std::cerr
<<"Freeing lastsecond program: "<< GetMD5() << "\n";
2713 clReleaseProgram(lastSecondProgram
);
2715 if (OpenclDevice::buildProgramFromBinary("",
2716 &OpenclDevice::gpuEnv
, KernelHash
.c_str(), 0)) {
2717 mpProgram
= OpenclDevice::gpuEnv
.mpArryPrograms
[0];
2718 OpenclDevice::gpuEnv
.mpArryPrograms
[0] = NULL
;
2720 mpProgram
= clCreateProgramWithSource(kEnv
.mpkContext
, 1,
2722 if (err
!= CL_SUCCESS
)
2723 throw OpenCLError(err
);
2724 err
= clBuildProgram(mpProgram
, 1,
2725 OpenclDevice::gpuEnv
.mpArryDevsID
, "", NULL
, NULL
);
2726 if (err
!= CL_SUCCESS
)
2727 throw OpenCLError(err
);
2728 // Generate binary out of compiled kernel.
2729 OpenclDevice::generatBinFromKernelSource(mpProgram
,
2730 (mKernelSignature
+GetMD5()).c_str());
2732 lastSecondKernelHash
= lastOneKernelHash
;
2733 lastSecondProgram
= lastOneProgram
;
2734 lastOneKernelHash
= KernelHash
;
2735 lastOneProgram
= mpProgram
;
2737 mpKernel
= clCreateKernel(mpProgram
, kname
.c_str(), &err
);
2738 if (err
!= CL_SUCCESS
)
2739 throw OpenCLError(err
);
2741 // Symbol lookup. If there is no such symbol created, allocate one
2742 // kernel with argument with unique name and return so.
2743 // The template argument T must be a subclass of DynamicKernelArgument
2744 template <typename T
>
2745 const DynamicKernelArgument
*SymbolTable::DeclRefArg(
2746 FormulaTreeNodeRef t
, SlidingFunctionBase
* pCodeGen
)
2748 FormulaToken
*ref
= t
->GetFormulaToken();
2749 ArgumentMap::iterator it
= mSymbols
.find(ref
);
2750 if (it
== mSymbols
.end()) {
2751 // Allocate new symbols
2752 std::cerr
<< "DeclRefArg: Allocate a new symbol:";
2753 std::stringstream ss
;
2754 ss
<< "tmp"<< mCurId
++;
2755 boost::shared_ptr
<DynamicKernelArgument
> new_arg(new T(ss
.str(), t
, pCodeGen
));
2756 mSymbols
[ref
] = new_arg
;
2757 mParams
.push_back(new_arg
);
2758 std::cerr
<< ss
.str() <<"\n";
2759 return new_arg
.get();
2761 return it
->second
.get();
2765 class FormulaGroupInterpreterOpenCL
: public FormulaGroupInterpreter
2768 FormulaGroupInterpreterOpenCL() :
2769 FormulaGroupInterpreter()
2772 virtual ~FormulaGroupInterpreterOpenCL()
2776 virtual ScMatrixRef
inverseMatrix( const ScMatrix
& rMat
) SAL_OVERRIDE
;
2777 virtual CompiledFormula
* createCompiledFormula(ScDocument
& rDoc
,
2778 const ScAddress
& rTopPos
,
2779 ScFormulaCellGroupRef
& xGroup
,
2780 ScTokenArray
& rCode
) SAL_OVERRIDE
;
2781 virtual bool interpret( ScDocument
& rDoc
, const ScAddress
& rTopPos
,
2782 ScFormulaCellGroupRef
& xGroup
, ScTokenArray
& rCode
) SAL_OVERRIDE
;
2785 ScMatrixRef
FormulaGroupInterpreterOpenCL::inverseMatrix( const ScMatrix
& )
2790 DynamicKernel
* DynamicKernel::create(ScDocument
& /* rDoc */,
2791 const ScAddress
& /* rTopPos */,
2792 ScTokenArray
& rCode
)
2794 // Constructing "AST"
2795 FormulaTokenIterator aCode
= rCode
;
2796 std::list
<FormulaToken
*> list
;
2797 std::map
<FormulaToken
*, FormulaTreeNodeRef
> m_hash_map
;
2799 while( (pCur
= (FormulaToken
*)(aCode
.Next()) ) != NULL
)
2801 OpCode eOp
= pCur
->GetOpCode();
2802 if ( eOp
!= ocPush
)
2804 FormulaTreeNodeRef m_currNode
=
2805 FormulaTreeNodeRef(new FormulaTreeNode(pCur
));
2806 sal_uInt8 m_ParamCount
= pCur
->GetParamCount();
2807 for(int i
=0; i
<m_ParamCount
; i
++)
2809 FormulaToken
* m_TempFormula
= list
.back();
2811 if(m_TempFormula
->GetOpCode()!=ocPush
)
2813 if(m_hash_map
.find(m_TempFormula
)==m_hash_map
.end())
2815 m_currNode
->Children
.push_back(m_hash_map
[m_TempFormula
]);
2819 FormulaTreeNodeRef m_ChildTreeNode
=
2821 new FormulaTreeNode(m_TempFormula
));
2822 m_currNode
->Children
.push_back(m_ChildTreeNode
);
2825 std::reverse(m_currNode
->Children
.begin(),
2826 m_currNode
->Children
.end());
2827 m_hash_map
[pCur
] = m_currNode
;
2829 list
.push_back(pCur
);
2832 FormulaTreeNodeRef Root
= FormulaTreeNodeRef(new FormulaTreeNode(NULL
));
2833 Root
->Children
.push_back(m_hash_map
[list
.back()]);
2835 DynamicKernel
* pDynamicKernel
= new DynamicKernel(Root
);
2837 if (!pDynamicKernel
)
2840 // OpenCL source code generation and kernel compilation
2842 pDynamicKernel
->CodeGen();
2843 pDynamicKernel
->CreateKernel();
2845 catch (const UnhandledToken
&ut
) {
2846 std::cerr
<< "\nDynamic formual compiler: unhandled token: ";
2847 std::cerr
<< ut
.mMessage
<< " at ";
2848 std::cerr
<< ut
.mFile
<< ":" << ut
.mLineNumber
<< "\n";
2849 #ifdef NO_FALLBACK_TO_SWINTERP
2852 free(pDynamicKernel
);
2856 return pDynamicKernel
;
2859 CompiledFormula
* FormulaGroupInterpreterOpenCL::createCompiledFormula(ScDocument
& rDoc
,
2860 const ScAddress
& rTopPos
,
2861 ScFormulaCellGroupRef
& xGroup
,
2862 ScTokenArray
& rCode
)
2864 ScTokenArray
*pCode
= new ScTokenArray();
2865 ScGroupTokenConverter
aConverter(*pCode
, rDoc
, *xGroup
->mpTopCell
, rTopPos
);
2866 if (!aConverter
.convert(rCode
))
2871 DynamicKernel
*result
= DynamicKernel::create(rDoc
, rTopPos
, *pCode
);
2872 result
->SetPCode(pCode
);
2876 bool FormulaGroupInterpreterOpenCL::interpret( ScDocument
& rDoc
,
2877 const ScAddress
& rTopPos
, ScFormulaCellGroupRef
& xGroup
,
2878 ScTokenArray
& rCode
)
2880 DynamicKernel
*pKernel
;
2882 osl::ResettableMutexGuard
aGuard(xGroup
->maMutex
);
2883 if (xGroup
->meCalcState
== sc::GroupCalcOpenCLKernelCompilationScheduled
||
2884 xGroup
->meCalcState
== sc::GroupCalcOpenCLKernelBinaryCreated
)
2886 if (xGroup
->meCalcState
== sc::GroupCalcOpenCLKernelCompilationScheduled
)
2889 xGroup
->maCompilationDone
.wait();
2890 xGroup
->maCompilationDone
.reset();
2897 pKernel
= static_cast<DynamicKernel
*>(xGroup
->mpCompiledFormula
);
2901 assert(xGroup
->meCalcState
== sc::GroupCalcRunning
);
2903 pKernel
= static_cast<DynamicKernel
*>(createCompiledFormula(rDoc
, rTopPos
, xGroup
, rCode
));
2910 // Obtain cl context
2912 OpenclDevice::setKernelEnv(&kEnv
);
2914 pKernel
->Launch(xGroup
->mnLength
);
2916 cl_mem res
= pKernel
->GetResultBuffer();
2918 double *resbuf
= new double[xGroup
->mnLength
];
2919 err
= clEnqueueReadBuffer(kEnv
.mpkCmdQueue
,res
,
2920 CL_TRUE
, 0, xGroup
->mnLength
*sizeof(double), resbuf
, 0, NULL
, NULL
);
2921 if (err
!= CL_SUCCESS
)
2922 throw OpenCLError(err
);
2924 rDoc
.SetFormulaResults(rTopPos
, resbuf
, xGroup
->mnLength
);
2926 if (xGroup
->meCalcState
== sc::GroupCalcRunning
)
2929 catch (const UnhandledToken
&ut
) {
2930 std::cerr
<< "\nDynamic formual compiler: unhandled token: ";
2931 std::cerr
<< ut
.mMessage
<< "\n";
2932 #ifdef NO_FALLBACK_TO_SWINTERP
2939 catch (const OpenCLError
&oce
) {
2940 std::cerr
<< "Dynamic formula compiler: OpenCL error: ";
2941 std::cerr
<< oce
.mError
<< "\n";
2942 #ifdef NO_FALLBACK_TO_SWINTERP
2949 catch (const Unhandled
&uh
) {
2950 std::cerr
<< "Dynamic formula compiler: unhandled case:";
2952 std::cerr
<< uh
.mFile
<< ":" << uh
.mLineNumber
<< "\n";
2953 #ifdef NO_FALLBACK_TO_SWINTERP
2961 std::cerr
<< "Dynamic formula compiler: unhandled compiler error\n";
2962 #ifdef NO_FALLBACK_TO_SWINTERP
2970 } // namespace opencl
2976 SAL_DLLPUBLIC_EXPORT
sc::FormulaGroupInterpreter
* SAL_CALL
2977 createFormulaGroupOpenCLInterpreter()
2979 #if 0// USE_GROUNDWATER_INTERPRETER
2980 if (getenv("SC_GROUNDWATER"))
2981 return new sc::opencl::FormulaGroupInterpreterGroundwater();
2984 return new sc::opencl::FormulaGroupInterpreterOpenCL();
2987 SAL_DLLPUBLIC_EXPORT
size_t getOpenCLPlatformCount()
2989 return sc::opencl::getOpenCLPlatformCount();
2992 SAL_DLLPUBLIC_EXPORT
void SAL_CALL
fillOpenCLInfo(
2993 sc::OpenclPlatformInfo
* pInfos
, size_t nInfoSize
)
2995 const std::vector
<sc::OpenclPlatformInfo
>& rPlatforms
=
2996 sc::opencl::fillOpenCLInfo();
2997 size_t n
= std::min(rPlatforms
.size(), nInfoSize
);
2998 for (size_t i
= 0; i
< n
; ++i
)
2999 pInfos
[i
] = rPlatforms
[i
];
3002 SAL_DLLPUBLIC_EXPORT
bool SAL_CALL
switchOpenClDevice(
3003 const OUString
* pDeviceId
, bool bAutoSelect
,
3004 bool bForceEvaluation
)
3006 return sc::opencl::switchOpenclDevice(pDeviceId
, bAutoSelect
, bForceEvaluation
);
3009 SAL_DLLPUBLIC_EXPORT
void SAL_CALL
getOpenCLDeviceInfo(size_t* pDeviceId
, size_t* pPlatformId
)
3011 sc::opencl::getOpenCLDeviceInfo(*pDeviceId
, *pPlatformId
);
3016 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */