1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include "op_math.hxx"
12 #include <formula/vectortoken.hxx>
13 #include "op_math_helpers.hxx"
16 using namespace formula
;
18 namespace sc::opencl
{
20 void OpMathOneArgument::GenSlidingWindowFunction(outputstream
&ss
,
21 const std::string
&sSymName
, SubArguments
&vSubArguments
)
23 CHECK_PARAMETER_COUNT( 1, 1 );
24 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
26 ss
<< " int gid0 = get_global_id(0);\n";
27 GenerateArg( 0, vSubArguments
, ss
);
32 void OpMathTwoArguments::GenSlidingWindowFunction(outputstream
&ss
,
33 const std::string
&sSymName
, SubArguments
&vSubArguments
)
35 CHECK_PARAMETER_COUNT( 2, 2 );
36 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
38 ss
<< " double tmp = 0;\n";
39 ss
<< " int gid0 = get_global_id(0);\n";
40 GenerateArg( 0, vSubArguments
, ss
);
41 GenerateArg( 1, vSubArguments
, ss
);
46 void OpCos::GenerateCode( outputstream
& ss
) const
48 ss
<< " return cos(arg0);\n";
51 void OpSec::GenerateCode( outputstream
& ss
) const
53 ss
<< " return 1.0 / cos(arg0);\n";
56 void OpSecH::GenerateCode( outputstream
& ss
) const
58 ss
<< " return 1.0 / cosh(arg0);\n";
61 void OpCosh::BinInlineFun(std::set
<std::string
>& decls
,
62 std::set
<std::string
>& funs
)
64 decls
.insert(local_coshDecl
);
65 funs
.insert(local_cosh
);
68 void OpCosh::GenerateCode( outputstream
& ss
) const
70 ss
<< " return local_cosh(arg0);\n";
73 void OpCot::GenerateCode( outputstream
& ss
) const
75 ss
<< " arg0 = arg0 * M_1_PI;\n";
76 ss
<< " return cospi(arg0) / sinpi(arg0);\n";
79 void OpCoth::BinInlineFun(std::set
<std::string
>& decls
,
80 std::set
<std::string
>& funs
)
82 decls
.insert(local_cothDecl
);
83 funs
.insert(local_coth
);
86 void OpCoth::GenerateCode( outputstream
& ss
) const
88 ss
<< " return local_coth(arg0);\n";
91 void OpEven::GenerateCode( outputstream
& ss
) const
93 ss
<< " double tmp = fabs(arg0 / 2);\n";
94 ss
<< " if ( trunc(tmp) == tmp )\n";
95 ss
<< " tmp = tmp * 2;\n";
97 ss
<< " tmp = (trunc(tmp) + 1) * 2;\n";
98 ss
<< " if (arg0 < 0)\n";
99 ss
<< " tmp = tmp * -1.0;\n";
100 ss
<< " return tmp;\n";
103 void OpCsc::GenerateCode( outputstream
& ss
) const
105 ss
<< " return 1/sin(arg0);\n";
108 void OpCscH::GenerateCode( outputstream
& ss
) const
110 ss
<< " return 1/sinh(arg0);\n";
113 void OpExp::GenerateCode( outputstream
& ss
) const
115 ss
<< " return pow(M_E, arg0);\n";
118 void OpLog10::GenerateCode( outputstream
& ss
) const
120 ss
<< " return log10(arg0);\n";
123 void OpSinh::GenerateCode( outputstream
& ss
) const
125 ss
<< " return ( exp(arg0)-exp(-arg0) )/2;\n";
128 void OpSin::GenerateCode( outputstream
& ss
) const
130 ss
<< " arg0 = arg0 * M_1_PI;\n";
131 ss
<< " return sinpi(arg0);\n";
134 void OpAbs::GenerateCode( outputstream
& ss
) const
136 ss
<< " return fabs(arg0);\n";
139 void OpArcCos::BinInlineFun(std::set
<std::string
>& decls
,
140 std::set
<std::string
>& funs
)
142 decls
.insert(atan2Decl
);
143 funs
.insert(atan2Content
);
146 void OpArcCos::GenerateCode( outputstream
& ss
) const
148 ss
<< " return arctan2(sqrt(1.0 - pow(arg0, 2)), arg0);\n";
151 void OpArcCosHyp::GenerateCode( outputstream
& ss
) const
153 ss
<< " if( arg0 < 1 )\n";
154 ss
<< " return CreateDoubleError(IllegalArgument);\n";
155 ss
<< " return log( arg0 + pow( (pown(arg0, 2) - 1.0), 0.5));\n";
158 void OpTan::GenerateCode( outputstream
& ss
) const
160 ss
<< " arg0 = arg0 * M_1_PI;\n";
161 ss
<< " return sinpi(arg0) / cospi(arg0);\n";
164 void OpTanH::GenerateCode( outputstream
& ss
) const
166 ss
<< " return tanh(arg0);\n";
169 void OpSqrt::GenerateCode( outputstream
& ss
) const
171 ss
<< " if( arg0 < 0 )\n";
172 ss
<< " return CreateDoubleError(IllegalArgument);\n";
173 ss
<< " return sqrt(arg0);\n";
176 void OpArcCot::GenerateCode( outputstream
& ss
) const
178 ss
<< " return M_PI_2 - atan(arg0);\n";
181 void OpArcCotHyp::GenerateCode( outputstream
& ss
) const
183 ss
<< " return 0.5 * log(1 + 2 / (arg0 - 1.0));\n";
186 void OpArcSin::BinInlineFun(std::set
<std::string
>& decls
,
187 std::set
<std::string
>& funs
)
189 decls
.insert(atan2Decl
);
190 funs
.insert(atan2Content
);
193 void OpArcSin::GenerateCode( outputstream
& ss
) const
195 ss
<< " return arctan2(arg0, sqrt(1.0 - pow(arg0, 2)));\n";
198 void OpArcSinHyp::GenerateCode( outputstream
& ss
) const
200 ss
<< " return log( arg0 + pow((pown(arg0, 2) + 1.0), 0.5));\n";
203 void OpArcTan::GenerateCode( outputstream
& ss
) const
205 ss
<< " return atan(arg0);\n";
208 void OpArcTanH::GenerateCode( outputstream
& ss
) const
210 ss
<< " double a = 1.0 + arg0;\n";
211 ss
<< " double b = 1.0 - arg0;\n";
212 ss
<< " return log(pow(a/b, 0.5));\n";
215 void OpLn::GenerateCode( outputstream
& ss
) const
217 ss
<< " return log1p(arg0-1);\n";
220 void OpInt::BinInlineFun(std::set
<std::string
>& decls
,
221 std::set
<std::string
>& funs
)
223 decls
.insert(is_representable_integerDecl
);
224 funs
.insert(is_representable_integer
);
225 decls
.insert(value_approxDecl
);
226 funs
.insert(value_approx
);
229 void OpInt::GenerateCode( outputstream
& ss
) const
231 ss
<< " return floor( value_approx( arg0 ));\n";
234 void OpNegSub::GenerateCode( outputstream
& ss
) const
236 ss
<< " return -arg0;\n";
239 void OpRadians::GenerateCode( outputstream
& ss
) const
241 ss
<< " return arg0 * M_PI / 180.0;\n";
244 void OpIsEven::GenerateCode( outputstream
& ss
) const
246 ss
<< " return (fmod(floor(fabs(arg0)), 2.0)<0.5);\n";
249 void OpIsOdd::GenerateCode( outputstream
& ss
) const
251 ss
<< " return !(fmod(floor(fabs(arg0)), 2.0)<0.5);\n";
254 void OpSqrtPi::GenerateCode( outputstream
& ss
) const
256 ss
<< " return (double)sqrt(arg0 * M_PI);\n";
259 void OpDeg::GenerateCode( outputstream
& ss
) const
261 ss
<< " return arg0 / M_PI * 180;;\n";
264 void OpFact::GenerateCode( outputstream
& ss
) const
266 ss
<< " arg0 = floor(arg0);\n";
267 ss
<< " if (arg0 < 0.0)\n";
268 ss
<< " return CreateDoubleError(IllegalArgument);\n";
269 ss
<< " else if (arg0 == 0.0)\n";
270 ss
<< " return 1.0;\n";
271 ss
<< " else if (arg0 <= 170.0)\n";
273 ss
<< " double fTemp = arg0;\n";
274 ss
<< " while (fTemp > 2.0)\n";
276 ss
<< " fTemp = fTemp - 1;\n";
277 ss
<< " arg0 = arg0 * fTemp;\n";
281 ss
<< " return CreateDoubleError(NoValue);\n";
282 ss
<< " return arg0;\n";
285 void OpOdd::BinInlineFun(std::set
<std::string
>& decls
,
286 std::set
<std::string
>& funs
)
288 decls
.insert(Math_IntgDecl
);
289 funs
.insert(Math_Intg
);
292 void OpOdd::GenerateCode( outputstream
& ss
) const
294 ss
<< " double tmp;\n";
295 ss
<< " if (arg0 > 0.0 ){\n";
296 ss
<< " tmp=Intg(arg0);\n";
297 ss
<< " if(tmp-trunc(tmp/2)*2 == 0)\n";
298 ss
<< " tmp=tmp+1;\n";
299 ss
<< " }else if (arg0 < 0.0 ){\n";
300 ss
<< " tmp=Intg(arg0);\n";
301 ss
<< " if(tmp-trunc(tmp/2)*2 == 0)\n";
302 ss
<< " tmp=tmp-1.0;\n";
305 ss
<< " return tmp;\n";
308 void OpMROUND::GenerateCode( outputstream
& ss
) const
310 ss
<<" if(arg1==0)\n";
311 ss
<<" return arg1;\n";
312 ss
<<" tmp=arg1 * round(arg0 / arg1);\n";
313 ss
<<" return tmp;\n";
316 void OpCombinA::BinInlineFun(std::set
<std::string
>& decls
,
317 std::set
<std::string
>& funs
)
319 decls
.insert(bikDecl
);
323 void OpCombinA::GenerateCode( outputstream
& ss
) const
325 ss
<< " arg0 = trunc(arg0);\n";
326 ss
<< " arg1 = trunc(arg1);\n";
327 ss
<< " if (arg0 < 0.0 || arg1 < 0.0 || arg1 > arg0)\n";
328 ss
<< " return CreateDoubleError(IllegalArgument);\n";
329 ss
<< " double tem;\n";
330 ss
<< " if(arg0 >= arg1 && arg0 > 0 && arg1 > 0)\n";
331 ss
<< " tem = bik(arg0+arg1-1,arg1);\n";
332 ss
<< " else if(arg0 == 0 && arg1 == 0)\n";
334 ss
<< " else if(arg0 > 0 && arg1 == 0)\n";
337 ss
<< " tem = -1;\n";
338 ss
<< " double i = tem - trunc(tem);\n";
339 ss
<< " if(i < 0.5)\n";
340 ss
<< " tem = trunc(tem);\n";
342 ss
<< " tem = trunc(tem) + 1;\n";
343 ss
<< " return tem;\n";
346 void OpCombin::GenerateCode( outputstream
& ss
) const
348 ss
<< " double result = -1.0;\n";
349 ss
<< " double num = floor( arg0 );\n";
350 ss
<< " double num_chosen = floor( arg1 );\n";
351 ss
<< " if(num < 0 || num_chosen < 0 || num < num_chosen )\n";
352 ss
<< " return CreateDoubleError(IllegalArgument);\n";
353 ss
<< " result = select(result, 0.0, (ulong)(num < num_chosen));\n";
354 ss
<< " result = select(result, 1.0, (ulong)(num_chosen == 0.0));\n";
355 ss
<< " if(result == 0 || result ==1)\n";
356 ss
<< " return result;\n";
357 ss
<< " double4 db4num;\n";
358 ss
<< " double4 db4num_chosen;\n";
359 ss
<< " double4 db4result;\n";
360 ss
<< " double2 db2result;\n";
361 ss
<< " result = 1.0;\n";
362 ss
<< " int loop = num_chosen/4;\n";
363 ss
<< " for(int i=0; i<loop; i++)\n";
365 ss
<< " db4num = (double4){num,\n";
368 ss
<< " num-3.0};\n";
369 ss
<< " db4num_chosen = (double4){num_chosen,\n";
370 ss
<< " num_chosen-1.0,\n";
371 ss
<< " num_chosen-2.0,\n";
372 ss
<< " num_chosen-3.0};\n";
373 ss
<< " db4result = db4num / db4num_chosen;\n";
374 ss
<< " db2result = db4result.xy * db4result.zw;\n";
375 ss
<< " result *= db2result.x * db2result.y;\n";
376 ss
<< " num = num - 4.0;\n";
377 ss
<< " num_chosen = num_chosen - 4.0;\n";
379 ss
<< " while ( num_chosen > 0){\n";
380 ss
<< " result *= num / num_chosen;\n";
381 ss
<< " num = num - 1.0;\n";
382 ss
<< " num_chosen = num_chosen - 1.0;\n";
384 ss
<< " return result;\n";
387 void OpMod::BinInlineFun(std::set
<std::string
>& decls
,std::set
<std::string
>& funs
)
389 decls
.insert(is_representable_integerDecl
);
390 funs
.insert(is_representable_integer
);
391 decls
.insert(approx_equalDecl
);
392 funs
.insert(approx_equal
);
393 decls
.insert(fsub_approxDecl
);
394 funs
.insert(fsub_approx
);
395 decls
.insert(value_approxDecl
);
396 funs
.insert(value_approx
);
399 void OpMod::GenerateCode( outputstream
& ss
) const
401 ss
<< " double fNum = arg0;\n";
402 ss
<< " double fDenom = arg1;\n";
403 ss
<< " if(fDenom == 0)\n";
404 ss
<< " return CreateDoubleError(DivisionByZero);\n";
405 ss
<< " double fRes = fsub_approx( fNum, floor( value_approx( fNum / fDenom )) * fDenom );\n";
406 ss
<< " if ( ( fDenom > 0 && fRes >= 0 && fRes < fDenom ) ||\n";
407 ss
<< " ( fDenom < 0 && fRes <= 0 && fRes > fDenom ) )\n";
408 ss
<< " return fRes;\n";
409 ss
<< " return CreateDoubleError(NoValue);\n";
412 void OpPower::GenerateCode( outputstream
& ss
) const
414 ss
<< " return pow(arg0,arg1);\n";
417 void OpArcTan2::BinInlineFun(std::set
<std::string
>& decls
,
418 std::set
<std::string
>& funs
)
420 decls
.insert(atan2Decl
);
421 funs
.insert(atan2Content
);
424 void OpArcTan2::GenerateCode( outputstream
& ss
) const
426 ss
<< " return arctan2(arg1, arg0);\n";
429 void OpBitAnd::GenerateCode( outputstream
& ss
) const
431 ss
<< " if( arg0 < 0 || arg1 < 0 || arg0 >= 281474976710656.0 || arg1 >= 281474976710656.0 )\n";
432 ss
<< " return CreateDoubleError(IllegalArgument);\n";
433 ss
<< " return (long)arg0 & (long)arg1;\n";
436 void OpBitOr::GenerateCode( outputstream
& ss
) const
438 ss
<< " if( arg0 < 0 || arg1 < 0 || arg0 >= 281474976710656.0 || arg1 >= 281474976710656.0 )\n";
439 ss
<< " return CreateDoubleError(IllegalArgument);\n";
440 ss
<< " return (long)arg0 | (long)arg1;\n";
443 void OpBitXor::GenerateCode( outputstream
& ss
) const
445 ss
<< " if( arg0 < 0 || arg1 < 0 || arg0 >= 281474976710656.0 || arg1 >= 281474976710656.0 )\n";
446 ss
<< " return CreateDoubleError(IllegalArgument);\n";
447 ss
<< " return (long)arg0 ^ (long)arg1;\n";
450 void OpBitLshift::GenerateCode( outputstream
& ss
) const
452 ss
<< " double num = floor( arg0 );\n";
453 ss
<< " double shift_amount = floor( arg1 );\n";
454 ss
<< " if( num < 0 || num >= 281474976710656.0 )\n";
455 ss
<< " return CreateDoubleError(IllegalArgument);\n";
456 ss
<< " return floor(shift_amount >= 0 ? ";
457 ss
<< "num * pow(2.0, shift_amount) : ";
458 ss
<< "num / pow(2.0, fabs(shift_amount)));\n";
461 void OpBitRshift::GenerateCode( outputstream
& ss
) const
463 ss
<< " double num = floor( arg0 );\n";
464 ss
<< " double shift_amount = floor( arg1 );\n";
465 ss
<< " if( num < 0 || num >= 281474976710656.0 )\n";
466 ss
<< " return CreateDoubleError(IllegalArgument);\n";
467 ss
<< " return floor(";
468 ss
<< "shift_amount >= 0 ? num / pow(2.0, shift_amount) : ";
469 ss
<< "num * pow(2.0, fabs(shift_amount)));\n";
472 void OpQuotient::GenerateCode( outputstream
& ss
) const
474 ss
<< " return trunc(arg0/arg1);\n";
477 void OpEqual::BinInlineFun(std::set
<std::string
>& decls
,
478 std::set
<std::string
>& funs
)
480 decls
.insert(is_representable_integerDecl
);
481 funs
.insert(is_representable_integer
);
482 decls
.insert(approx_equalDecl
);
483 funs
.insert(approx_equal
);
484 decls
.insert(cell_equalDecl
);
485 funs
.insert(cell_equal
);
488 void OpEqual::GenSlidingWindowFunction(outputstream
&ss
,
489 const std::string
&sSymName
, SubArguments
&vSubArguments
)
491 CHECK_PARAMETER_COUNT( 2, 2 );
492 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
494 ss
<< " double tmp = 0;\n";
495 ss
<< " int gid0 = get_global_id(0);\n";
496 GenerateArg( 0, vSubArguments
, ss
, EmptyIsNan
, GenerateArgType
);
497 GenerateArg( 1, vSubArguments
, ss
, EmptyIsNan
, GenerateArgType
);
498 ss
<< " return cell_equal( arg0, arg1, arg0_is_string, arg1_is_string );\n";
502 void OpNotEqual::BinInlineFun(std::set
<std::string
>& decls
,
503 std::set
<std::string
>& funs
)
505 decls
.insert(is_representable_integerDecl
);
506 funs
.insert(is_representable_integer
);
507 decls
.insert(approx_equalDecl
);
508 funs
.insert(approx_equal
);
509 decls
.insert(cell_equalDecl
);
510 funs
.insert(cell_equal
);
513 void OpNotEqual::GenSlidingWindowFunction(outputstream
&ss
,
514 const std::string
&sSymName
, SubArguments
&vSubArguments
)
516 CHECK_PARAMETER_COUNT( 2, 2 );
517 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
519 ss
<< " double tmp = 0;\n";
520 ss
<< " int gid0 = get_global_id(0);\n";
521 GenerateArg( 0, vSubArguments
, ss
, EmptyIsNan
, GenerateArgType
);
522 GenerateArg( 1, vSubArguments
, ss
, EmptyIsNan
, GenerateArgType
);
523 ss
<< " return !cell_equal( arg0, arg1, arg0_is_string, arg1_is_string );\n";
527 void OpLessEqual::BinInlineFun(std::set
<std::string
>& decls
,
528 std::set
<std::string
>& funs
)
530 decls
.insert(is_representable_integerDecl
);
531 funs
.insert(is_representable_integer
);
532 decls
.insert(approx_equalDecl
);
533 funs
.insert(approx_equal
);
536 void OpLessEqual::GenerateCode( outputstream
& ss
) const
538 ss
<< " return approx_equal( arg0, arg1 ) || arg0 <= arg1;\n";
541 void OpLess::GenerateCode( outputstream
& ss
) const
543 ss
<< " return arg0 < arg1;\n";
546 void OpGreaterEqual::BinInlineFun(std::set
<std::string
>& decls
,
547 std::set
<std::string
>& funs
)
549 decls
.insert(is_representable_integerDecl
);
550 funs
.insert(is_representable_integer
);
551 decls
.insert(approx_equalDecl
);
552 funs
.insert(approx_equal
);
555 void OpGreaterEqual::GenerateCode( outputstream
& ss
) const
557 ss
<< " return approx_equal( arg0, arg1 ) || arg0 >= arg1;\n";
560 void OpGreater::GenerateCode( outputstream
& ss
) const
562 ss
<< " return arg0 > arg1;\n";
565 void OpLog::GenSlidingWindowFunction(outputstream
&ss
,
566 const std::string
&sSymName
, SubArguments
&vSubArguments
)
568 CHECK_PARAMETER_COUNT( 1, 2 );
569 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
571 ss
<< " int gid0 = get_global_id(0);\n";
572 GenerateArg( "arg0", 0, vSubArguments
, ss
);
573 GenerateArgWithDefault( "arg1", 1, 10, vSubArguments
, ss
);
574 ss
<< " return log10(arg0)/log10(arg1);;\n";
578 void OpCountIfs::GenSlidingWindowFunction(outputstream
&ss
,
579 const std::string
&sSymName
, SubArguments
&vSubArguments
)
581 FormulaToken
*tmpCur
= vSubArguments
[0]->GetFormulaToken();
582 const formula::DoubleVectorRefToken
*pCurDVR
= static_cast<const
583 formula::DoubleVectorRefToken
*>(tmpCur
);
584 size_t nCurWindowSize
= pCurDVR
->GetArrayLength() <
585 pCurDVR
->GetRefRowSize() ? pCurDVR
->GetArrayLength():
586 pCurDVR
->GetRefRowSize() ;
587 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
589 ss
<<" int gid0=get_global_id(0);\n";
590 ss
<< " int tmp =0;\n";
591 ss
<< " int loop;\n";
592 GenTmpVariables(ss
,vSubArguments
);
594 ss
<< " int singleIndex =gid0;\n";
599 for(size_t j
=0;j
<vSubArguments
.size();j
+=2,m
++)
601 CheckSubArgumentIsNan(tmpss
,vSubArguments
,j
);
602 CheckSubArgumentIsNan(ss
,vSubArguments
,j
+1);
603 tmpss
<<" if(isequal(";
611 tmpss
<< " tmp ++;\n";
612 for(size_t j
=0;j
<vSubArguments
.size();j
+=2,m
--)
614 for(int n
= 0;n
<m
+1;n
++)
620 UnrollDoubleVector(ss
,tmpss
,pCurDVR
,nCurWindowSize
);
622 ss
<< "return tmp;\n";
626 void OpSumIfs::GenSlidingWindowFunction(outputstream
&ss
,
627 const std::string
&sSymName
, SubArguments
&vSubArguments
)
629 FormulaToken
*tmpCur
= vSubArguments
[0]->GetFormulaToken();
630 const formula::DoubleVectorRefToken
*pCurDVR
= static_cast<const
631 formula::DoubleVectorRefToken
*>(tmpCur
);
632 size_t nCurWindowSize
= pCurDVR
->GetArrayLength() <
633 pCurDVR
->GetRefRowSize() ? pCurDVR
->GetArrayLength():
634 pCurDVR
->GetRefRowSize() ;
636 mNeedReductionKernel
= vSubArguments
[0]->NeedParallelReduction();
637 if (mNeedReductionKernel
)
639 // generate reduction functions
641 ss
<< "__kernel void ";
642 ss
<< vSubArguments
[0]->GetName();
643 ss
<< "_SumIfs_reduction( ";
644 for (size_t i
= 0; i
< vSubArguments
.size(); i
++)
648 vSubArguments
[i
]->GenSlidingWindowDecl(ss
);
650 ss
<< ", __global double *result,int arrayLength,int windowSize";
653 ss
<< " double tmp =0;\n";
656 GenTmpVariables(ss
,vSubArguments
);
657 ss
<< " double current_result = 0.0;\n";
658 ss
<< " int writePos = get_group_id(1);\n";
659 if (pCurDVR
->IsStartFixed() && pCurDVR
->IsEndFixed())
660 ss
<< " int offset = 0;\n";
661 else if (!pCurDVR
->IsStartFixed() && !pCurDVR
->IsEndFixed())
662 ss
<< " int offset = get_group_id(1);\n";
664 throw Unhandled(__FILE__
, __LINE__
);
665 // actually unreachable
666 ss
<< " int lidx = get_local_id(0);\n";
667 ss
<< " __local double shm_buf[256];\n";
668 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
669 ss
<< " int loop = arrayLength/512 + 1;\n";
670 ss
<< " for (int l=0; l<loop; l++){\n";
671 ss
<< " tmp = 0.0;\n";
672 ss
<< " int loopOffset = l*512;\n";
674 ss
<< " int p1 = loopOffset + lidx + offset, p2 = p1 + 256;\n";
675 ss
<< " if (p2 < min(offset + windowSize, arrayLength)) {\n";
676 ss
<< " tmp0 = 0.0;\n";
678 std::string p1
= "p1";
679 std::string p2
= "p2";
680 for(size_t j
=1;j
<vSubArguments
.size();j
+=2,mm
++)
682 CheckSubArgumentIsNan2(ss
,vSubArguments
,j
,p1
);
683 CheckSubArgumentIsNan2(ss
,vSubArguments
,j
+1,p1
);
694 CheckSubArgumentIsNan2(ss
,vSubArguments
,0,p1
);
695 ss
<< " tmp += tmp0;\n";
696 for(size_t j
=1;j
<vSubArguments
.size();j
+=2,mm
--)
698 for(int n
= 0;n
<mm
+1;n
++)
705 for(size_t j
=1;j
<vSubArguments
.size();j
+=2,mm
++)
707 CheckSubArgumentIsNan2(ss
,vSubArguments
,j
,p2
);
708 CheckSubArgumentIsNan2(ss
,vSubArguments
,j
+1,p2
);
717 CheckSubArgumentIsNan2(ss
,vSubArguments
,0,p2
);
718 ss
<< " tmp += tmp0;\n";
719 for(size_t j
=1;j
< vSubArguments
.size();j
+=2,mm
--)
721 for(int n
= 0;n
<mm
+1;n
++)
729 ss
<< " else if (p1 < min(arrayLength, offset + windowSize)) {\n";
731 for(size_t j
=1;j
<vSubArguments
.size();j
+=2,mm
++)
733 CheckSubArgumentIsNan2(ss
,vSubArguments
,j
,p1
);
734 CheckSubArgumentIsNan2(ss
,vSubArguments
,j
+1,p1
);
744 CheckSubArgumentIsNan2(ss
,vSubArguments
,0,p1
);
745 ss
<< " tmp += tmp0;\n";
746 for(size_t j
=1;j
<vSubArguments
.size();j
+=2,mm
--)
748 for(int n
= 0;n
<mm
+1;n
++)
756 ss
<< " shm_buf[lidx] = tmp;\n";
757 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
758 ss
<< " for (int i = 128; i >0; i/=2) {\n";
759 ss
<< " if (lidx < i)\n";
760 ss
<< " shm_buf[lidx] += shm_buf[lidx + i];\n";
761 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
763 ss
<< " if (lidx == 0)\n";
764 ss
<< " current_result += shm_buf[0];\n";
765 ss
<< " barrier(CLK_LOCAL_MEM_FENCE);\n";
768 ss
<< " if (lidx == 0)\n";
769 ss
<< " result[writePos] = current_result;\n";
771 }// finish generate reduction code
772 // generate functions as usual
773 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
775 ss
<<" int gid0=get_global_id(0);\n";
776 ss
<< " double tmp =0;\n";
777 if (!mNeedReductionKernel
)
780 GenTmpVariables(ss
,vSubArguments
);
782 if (!pCurDVR
->IsStartFixed() && pCurDVR
->IsEndFixed()) {
783 ss
<< "gid0; i < "<< nCurWindowSize
<<"; i++)\n";
784 } else if (pCurDVR
->IsStartFixed() && !pCurDVR
->IsEndFixed()) {
785 ss
<< "0; i < gid0+"<< nCurWindowSize
<<"; i++)\n";
787 ss
<< "0; i < "<< nCurWindowSize
<<"; i++)\n";
790 if(!pCurDVR
->IsStartFixed() && !pCurDVR
->IsEndFixed())
792 ss
<< " int doubleIndex =i+gid0;\n";
795 ss
<< " int doubleIndex =i;\n";
797 ss
<< " int singleIndex =gid0;\n";
799 for(size_t j
=1;j
<vSubArguments
.size();j
+=2,m
++)
801 CheckSubArgumentIsNan(ss
,vSubArguments
,j
);
802 CheckSubArgumentIsNan(ss
,vSubArguments
,j
+1);
811 CheckSubArgumentIsNan(ss
,vSubArguments
,0);
812 ss
<< " tmp += tmp0;\n";
813 for(size_t j
=1;j
<=vSubArguments
.size();j
+=2,m
--)
815 for(int n
= 0;n
<m
+1;n
++)
822 if (mNeedReductionKernel
)
825 vSubArguments
[0]->GenDeclRef(ss
);
828 ss
<< "return tmp;\n";
832 void OpAverageIfs::GenSlidingWindowFunction(outputstream
&ss
,
833 const std::string
&sSymName
, SubArguments
&vSubArguments
)
835 FormulaToken
*tmpCur
= vSubArguments
[0]->GetFormulaToken();
836 const formula::DoubleVectorRefToken
*pCurDVR
= static_cast<const
837 formula::DoubleVectorRefToken
*>(tmpCur
);
838 size_t nCurWindowSize
= pCurDVR
->GetArrayLength() <
839 pCurDVR
->GetRefRowSize() ? pCurDVR
->GetArrayLength():
840 pCurDVR
->GetRefRowSize() ;
841 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
843 ss
<<" int gid0=get_global_id(0);\n";
844 ss
<< " double tmp =0;\n";
845 ss
<< " int count=0;\n";
847 GenTmpVariables(ss
,vSubArguments
);
848 ss
<< " int singleIndex =gid0;\n";
851 for(size_t j
=1;j
<vSubArguments
.size();j
+=2,m
++)
853 CheckSubArgumentIsNan(tmpss
,vSubArguments
,j
);
854 CheckSubArgumentIsNan(ss
,vSubArguments
,j
+1);
855 tmpss
<<" if(isequal(";
863 CheckSubArgumentIsNan(tmpss
,vSubArguments
,0);
864 tmpss
<< " tmp += tmp0;\n";
865 tmpss
<< " count++;\n";
866 for(size_t j
=1;j
<vSubArguments
.size();j
+=2,m
--)
868 for(int n
= 0;n
<m
+1;n
++)
875 UnrollDoubleVector(ss
,tmpss
,pCurDVR
,nCurWindowSize
);
877 ss
<< " if(count!=0)\n";
878 ss
<< " tmp=tmp/count;\n";
881 ss
<< "return tmp;\n";
885 void OpRound::BinInlineFun(std::set
<std::string
>& decls
,std::set
<std::string
>& funs
)
887 decls
.insert(nCorrValDecl
);
888 decls
.insert(RoundDecl
);
892 void OpRound::GenSlidingWindowFunction(outputstream
&ss
,
893 const std::string
&sSymName
, SubArguments
&vSubArguments
)
895 CHECK_PARAMETER_COUNT( 1, 2 );
896 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
898 ss
<< " int gid0=get_global_id(0);\n";
899 GenerateArg( "value", 0, vSubArguments
, ss
);
900 if(vSubArguments
.size() ==1)
901 ss
<< " return round(value);\n";
904 GenerateArg( "fDec", 1, vSubArguments
, ss
);
905 ss
<< " int dec = floor( fDec );\n";
906 ss
<< " if( dec < -20 || dec > 20 )\n";
907 ss
<< " return CreateDoubleError( IllegalArgument );\n";
908 ss
<< " if( dec == 0 )\n";
909 ss
<< " return round(value);\n";
910 ss
<< " double orig_value = value;\n";
911 ss
<< " value = fabs(value);\n";
912 ss
<< " double multiply = pown(10.0, dec);\n";
913 ss
<< " double tmp = value*multiply;\n";
914 ss
<< " tmp = Round( tmp );\n";
915 ss
<< " return copysign(tmp/multiply, orig_value);\n";
920 void OpRoundUp::GenSlidingWindowFunction(outputstream
&ss
,
921 const std::string
&sSymName
, SubArguments
&vSubArguments
)
923 CHECK_PARAMETER_COUNT( 1, 2 );
924 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
926 ss
<< " int gid0=get_global_id(0);\n";
927 GenerateArg( "value", 0, vSubArguments
, ss
);
928 GenerateArgWithDefault( "fDec", 1, 0, vSubArguments
, ss
);
929 ss
<< " int dec = floor( fDec );\n";
930 ss
<< " if( dec < -20 || dec > 20 )\n";
931 ss
<< " return CreateDoubleError( IllegalArgument );\n";
932 ss
<< " double orig_value = value;\n";
933 ss
<< " value = fabs(value);\n";
934 ss
<< " double multiply = pown(10.0, dec);\n";
935 ss
<< " double tmp = value*multiply;\n";
936 ss
<< " double integral;\n";
937 // The pown() above increases rounding error, so compensate for it here.
938 // If the fractional part is close above zero, adjusted for rounding error,
939 // the number just needs to be rounded (=truncated).
940 ss
<< " if( modf( tmp, &integral ) / multiply < 1e-12 )\n";
941 ss
<< " tmp = integral;\n";
943 ss
<< " tmp = integral + 1;\n";
944 ss
<< " return copysign(tmp/multiply, orig_value);\n";
948 void OpRoundDown::GenSlidingWindowFunction(outputstream
&ss
,
949 const std::string
&sSymName
, SubArguments
&vSubArguments
)
951 CHECK_PARAMETER_COUNT( 1, 2 );
952 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
954 ss
<< " int gid0=get_global_id(0);\n";
955 GenerateArg( "value", 0, vSubArguments
, ss
);
956 GenerateArgWithDefault( "fDec", 1, 0, vSubArguments
, ss
);
957 ss
<< " int dec = floor( fDec );\n";
958 ss
<< " if( dec < -20 || dec > 20 )\n";
959 ss
<< " return CreateDoubleError( IllegalArgument );\n";
960 ss
<< " double orig_value = value;\n";
961 ss
<< " value = fabs(value);\n";
962 ss
<< " double multiply = pown(10.0, dec);\n";
963 ss
<< " double tmp = value*multiply;\n";
964 ss
<< " double integral;\n";
965 // The pown() above increases rounding error, so compensate for it here.
966 // If the fractional part is close below one, adjusted for rounding error,
967 // the number just needs to be rounded (=truncated + 1).
968 ss
<< " if(( 1 - modf( tmp, &integral )) / multiply < 1e-12 )\n";
969 ss
<< " tmp = integral + 1;\n";
971 ss
<< " tmp = integral;\n";
972 ss
<< " return copysign(tmp/multiply, orig_value);\n";
976 void OpCountIf::GenSlidingWindowFunction(outputstream
&ss
,
977 const std::string
&sSymName
, SubArguments
&vSubArguments
)
979 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
981 ss
<< " int gid0=get_global_id(0);\n";
982 ss
<< " double vara, varb;\n";
983 ss
<< " int varc = 0;\n";
984 FormulaToken
*tmpCur
= vSubArguments
[1]->GetFormulaToken();
986 if(ocPush
== vSubArguments
[1]->GetFormulaToken()->GetOpCode())
988 if(tmpCur
->GetType() == formula::svSingleVectorRef
)
990 const formula::SingleVectorRefToken
* tmpCurDVR
=
992 const formula::SingleVectorRefToken
*>(tmpCur
);
994 ss
<< vSubArguments
[1]->GenSlidingWindowDeclRef();
996 ss
<< " if(isnan(varb)||(gid0>=";
997 ss
<< tmpCurDVR
->GetArrayLength();
999 ss
<< " varb = 0;\n";
1001 else if(tmpCur
->GetType() == formula::svDouble
)
1004 ss
<< tmpCur
->GetDouble() << ";\n";
1010 ss
<< vSubArguments
[1]->GenSlidingWindowDeclRef();
1013 tmpCur
= vSubArguments
[0]->GetFormulaToken();
1015 if(ocPush
== vSubArguments
[0]->GetFormulaToken()->GetOpCode())
1018 if (tmpCur
->GetType() == formula::svDoubleVectorRef
)
1020 const formula::DoubleVectorRefToken
* pDVR
=
1021 static_cast<const formula::DoubleVectorRefToken
*>(tmpCur
);
1022 size_t nCurWindowSize
= pDVR
->GetRefRowSize();
1023 ss
<< " for (int i = ";
1024 if (!pDVR
->IsStartFixed() && pDVR
->IsEndFixed())
1026 ss
<< "gid0; i < " << pDVR
->GetArrayLength();
1027 ss
<< " && i < " << nCurWindowSize
<< "; ++i)\n";
1030 else if (pDVR
->IsStartFixed() && !pDVR
->IsEndFixed())
1032 ss
<< "0; i < " << pDVR
->GetArrayLength();
1033 ss
<< " && i < gid0+"<< nCurWindowSize
<< "; ++i)\n";
1036 else if (!pDVR
->IsStartFixed() && !pDVR
->IsEndFixed())
1038 ss
<< "0; i + gid0 < " << pDVR
->GetArrayLength();
1039 ss
<< " && i < "<< nCurWindowSize
<< "; ++i)\n";
1044 ss
<< "0; i < "<< nCurWindowSize
<< "; ++i)\n";
1048 ss
<< vSubArguments
[0]->GenSlidingWindowDeclRef();
1050 ss
<< " if (isnan(vara))\n";
1051 ss
<< " continue;\n";
1052 ss
<< " (vara == varb) && varc++;\n";
1055 else if(tmpCur
->GetType() == formula::svSingleVectorRef
)
1057 const formula::SingleVectorRefToken
* tmpCurDVR
=
1059 const formula::SingleVectorRefToken
*>(tmpCur
);
1061 ss
<< vSubArguments
[0]->GenSlidingWindowDeclRef();
1063 ss
<< " if(isnan(vara)||(gid0>=";
1064 ss
<< tmpCurDVR
->GetArrayLength();
1066 ss
<< " return 0;\n";
1067 ss
<< " (vara == varb) && varc++;\n";
1070 ss
<< " return varc;\n";
1074 void OpSumIf::GenSlidingWindowFunction(outputstream
&ss
,
1075 const std::string
&sSymName
, SubArguments
&vSubArguments
)
1077 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
1079 ss
<< " int gid0=get_global_id(0);\n";
1080 ss
<< " double vara, varb, varc, sum = 0.0f;\n";
1081 int flag
= 3 == vSubArguments
.size() ? 2 : 0;
1082 FormulaToken
*tmpCur
= vSubArguments
[1]->GetFormulaToken();
1084 if(ocPush
== vSubArguments
[1]->GetFormulaToken()->GetOpCode())
1086 if(tmpCur
->GetType() == formula::svSingleVectorRef
)
1088 const formula::SingleVectorRefToken
* tmpCurDVR
=
1090 const formula::SingleVectorRefToken
*>(tmpCur
);
1092 ss
<< vSubArguments
[1]->GenSlidingWindowDeclRef();
1094 ss
<< " if(isnan(varb)||(gid0>=";
1095 ss
<< tmpCurDVR
->GetArrayLength();
1097 ss
<< " varb = 0;\n";
1099 else if(tmpCur
->GetType() == formula::svDouble
)
1102 ss
<< tmpCur
->GetDouble() << ";\n";
1108 ss
<< vSubArguments
[1]->GenSlidingWindowDeclRef();
1111 tmpCur
= vSubArguments
[0]->GetFormulaToken();
1113 if(ocPush
== vSubArguments
[0]->GetFormulaToken()->GetOpCode())
1116 if (tmpCur
->GetType() == formula::svDoubleVectorRef
)
1118 const formula::DoubleVectorRefToken
* pDVR
=
1119 static_cast<const formula::DoubleVectorRefToken
*>(tmpCur
);
1120 size_t nCurWindowSize
= pDVR
->GetRefRowSize();
1121 ss
<< " for (int i = ";
1122 if (!pDVR
->IsStartFixed() && pDVR
->IsEndFixed())
1124 ss
<< "gid0; i < " << pDVR
->GetArrayLength();
1125 ss
<< " && i < " << nCurWindowSize
<< "; ++i)\n";
1128 else if (pDVR
->IsStartFixed() && !pDVR
->IsEndFixed())
1130 ss
<< "0; i < " << pDVR
->GetArrayLength();
1131 ss
<< " && i < gid0+"<< nCurWindowSize
<< "; ++i)\n";
1134 else if (!pDVR
->IsStartFixed() && !pDVR
->IsEndFixed())
1136 ss
<< "0; i + gid0 < " << pDVR
->GetArrayLength();
1137 ss
<< " && i < "<< nCurWindowSize
<< "; ++i)\n";
1142 ss
<< "0; i < "<< nCurWindowSize
<< "; ++i)\n";
1146 ss
<< vSubArguments
[0]->GenSlidingWindowDeclRef();
1148 ss
<< " if (isnan(vara))\n";
1149 ss
<< " continue;\n";
1151 ss
<< vSubArguments
[flag
]->GenSlidingWindowDeclRef();
1153 ss
<< " if (isnan(varc))\n";
1154 ss
<< " varc = 0.0f;\n";
1155 ss
<< " (vara == varb)&&(sum = sum + varc);\n";
1158 else if(tmpCur
->GetType() == formula::svSingleVectorRef
)
1160 const formula::SingleVectorRefToken
* tmpCurDVR
=
1162 const formula::SingleVectorRefToken
*>(tmpCur
);
1164 ss
<< vSubArguments
[0]->GenSlidingWindowDeclRef();
1166 ss
<< " if(isnan(vara)||(gid0>=";
1167 ss
<< tmpCurDVR
->GetArrayLength();
1169 ss
<< " return 0;\n";
1170 ss
<< " int i = 0;\n";
1172 ss
<< vSubArguments
[flag
]->GenSlidingWindowDeclRef();
1174 ss
<< " if(isnan(varc)||(gid0>=";
1175 ss
<< tmpCurDVR
->GetArrayLength();
1177 ss
<< " varc = 0.0f;\n";
1179 ss
<< " (vara == varb)&&(sum = sum + varc);\n";
1183 ss
<< " return sum;\n";
1187 void OpFloor::GenSlidingWindowFunction(
1188 outputstream
&ss
, const std::string
&sSymName
,
1189 SubArguments
&vSubArguments
)
1191 CHECK_PARAMETER_COUNT( 2, 3 );
1192 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
1194 ss
<< " int gid0=get_global_id(0);\n";
1195 GenerateArg( "arg0", 0, vSubArguments
, ss
);
1196 GenerateArg( "arg1", 1, vSubArguments
, ss
);
1197 GenerateArgWithDefault( "arg2", 2, 0, vSubArguments
, ss
);
1198 ss
<< " if(isnan(arg0) || isnan(arg1))\n";
1199 ss
<< " return 0;\n";
1200 ss
<< " if(isnan(arg2))\n";
1201 ss
<< " arg2 = 0.0;\n";
1202 ss
<< " if(arg0*arg1<0)\n";
1203 ss
<< " return CreateDoubleError(IllegalArgument);\n";
1204 ss
<< " if(arg1 == 0.0)\n";
1205 ss
<< " return 0.0;\n";
1206 ss
<< " else if(arg2==0.0&&arg0<0.0)\n";
1207 ss
<< " return (trunc(arg0/arg1)+1)*arg1;\n";
1209 ss
<< " return trunc(arg0/arg1)*arg1;\n";
1213 void OpSumSQ::GenSlidingWindowFunction(outputstream
&ss
,
1214 const std::string
&sSymName
, SubArguments
&vSubArguments
)
1216 CHECK_PARAMETER_COUNT( 1, 30 );
1217 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
1219 ss
<< " int gid0=get_global_id(0);\n";
1220 ss
<< " double sum = 0.0f, arg;\n";
1221 GenerateRangeArgs( vSubArguments
, ss
, SkipEmpty
,
1222 " sum += pown(arg, 2);\n"
1224 ss
<< " return sum;\n";
1228 void OpCeil::GenSlidingWindowFunction(outputstream
&ss
,
1229 const std::string
&sSymName
, SubArguments
&vSubArguments
)
1231 CHECK_PARAMETER_COUNT( 2, 3 );
1232 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
1234 ss
<< " int gid0 = get_global_id(0);\n";
1235 GenerateArg( "num", 0, vSubArguments
, ss
);
1236 GenerateArg( "significance", 1, vSubArguments
, ss
);
1237 GenerateArgWithDefault( "bAbs", 2, 0, vSubArguments
, ss
);
1238 ss
<< " if(num*significance < 0.0)\n";
1239 ss
<< " return CreateDoubleError(IllegalArgument);\n";
1240 ss
<< " if(significance == 0.0)\n";
1241 ss
<< " return 0.0;\n";
1243 ss
<< "( !(int)bAbs && num < 0.0 ? floor( num / significance ) : ";
1244 ss
<< "ceil( num / significance ) )";
1245 ss
<< "*significance;\n";
1249 void OpProduct::GenSlidingWindowFunction(outputstream
&ss
,
1250 const std::string
&sSymName
, SubArguments
&vSubArguments
)
1252 CHECK_PARAMETER_COUNT( 1, 30 );
1253 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
1255 ss
<< " int gid0 = get_global_id(0);\n";
1256 ss
<< " double product=1.0;\n";
1257 ss
<< " int count = 0;\n\n";
1258 GenerateRangeArgs( vSubArguments
, ss
, SkipEmpty
,
1259 " product = product*arg;\n"
1262 ss
<< " if(count == 0)\n";
1263 ss
<< " return 0;\n";
1264 ss
<< " return product;\n";
1268 void OpAverageIf::GenSlidingWindowFunction(outputstream
&ss
,
1269 const std::string
&sSymName
, SubArguments
&vSubArguments
)
1271 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
1273 ss
<< " int gid0=get_global_id(0);\n";
1274 ss
<< " double tmp =0;\n";
1275 ss
<< " double count=0;\n";
1276 ss
<< " int singleIndex =gid0;\n";
1277 ss
<< " int doubleIndex;\n";
1280 GenTmpVariables(ss
,vSubArguments
);
1282 unsigned paraOneIsDoubleVector
= 0;
1283 unsigned paraOneWidth
= 1;
1284 unsigned paraTwoWidth
= 1;
1285 unsigned loopTimes
= 0;
1287 if(vSubArguments
[0]->GetFormulaToken()->GetType() == formula::svDoubleVectorRef
)
1289 paraOneIsDoubleVector
= 1;
1290 FormulaToken
*tmpCur0
= vSubArguments
[0]->GetFormulaToken();
1291 const formula::DoubleVectorRefToken
*pCurDVR0
= static_cast<const
1292 formula::DoubleVectorRefToken
*>(tmpCur0
);
1293 paraOneWidth
= pCurDVR0
->GetArrays().size();
1294 loopTimes
= paraOneWidth
;
1295 if(paraOneWidth
> 1)
1297 throw Unhandled(__FILE__
, __LINE__
);
1301 if(vSubArguments
[paraOneWidth
]->GetFormulaToken()->GetType() ==
1302 formula::svDoubleVectorRef
)
1305 FormulaToken
*tmpCur1
= vSubArguments
[1]->GetFormulaToken();
1306 const formula::DoubleVectorRefToken
*pCurDVR1
= static_cast<const
1307 formula::DoubleVectorRefToken
*>(tmpCur1
);
1308 paraTwoWidth
= pCurDVR1
->GetArrays().size();
1309 if(paraTwoWidth
> 1)
1311 throw Unhandled(__FILE__
, __LINE__
);
1314 if (!pCurDVR1
->IsStartFixed() && pCurDVR1
->IsEndFixed()) {
1319 if(!pCurDVR1
->IsStartFixed() && !pCurDVR1
->IsEndFixed())
1321 ss
<< " doubleIndex =i+gid0;\n";
1324 ss
<< " doubleIndex =i;\n";
1328 CheckSubArgumentIsNan(ss
,vSubArguments
,paraOneWidth
);
1330 unsigned paraThreeIndex
= paraOneWidth
+ paraTwoWidth
;
1331 if(vSubArguments
.size() > paraThreeIndex
)
1333 if(vSubArguments
[paraThreeIndex
]->GetFormulaToken()->GetType() ==
1334 formula::svDoubleVectorRef
)
1336 FormulaToken
*tmpCur2
=
1337 vSubArguments
[paraThreeIndex
]->GetFormulaToken();
1338 const formula::DoubleVectorRefToken
*pCurDVR2
= static_cast<const
1339 formula::DoubleVectorRefToken
*>(tmpCur2
);
1340 unsigned paraThreeWidth
= pCurDVR2
->GetArrays().size();
1341 if(paraThreeWidth
> 1)
1343 throw Unhandled(__FILE__
, __LINE__
);
1348 if(paraOneIsDoubleVector
)
1350 unsigned loopIndex
= 0;
1351 FormulaToken
*tmpCur0
= vSubArguments
[0]->GetFormulaToken();
1352 const formula::DoubleVectorRefToken
*pCurDVR0
= static_cast<const
1353 formula::DoubleVectorRefToken
*>(tmpCur0
);
1354 size_t nCurWindowSize
= pCurDVR0
->GetArrayLength() <
1355 pCurDVR0
->GetRefRowSize() ? pCurDVR0
->GetArrayLength():
1356 pCurDVR0
->GetRefRowSize() ;
1358 for(loopIndex
=0; loopIndex
< loopTimes
; loopIndex
++)
1361 if (!pCurDVR0
->IsStartFixed() && pCurDVR0
->IsEndFixed()) {
1362 ss
<< "gid0; i < "<< nCurWindowSize
<<"; i++)\n";
1363 } else if (pCurDVR0
->IsStartFixed() && !pCurDVR0
->IsEndFixed()) {
1364 ss
<< "0; i < gid0+"<< nCurWindowSize
<<"; i++)\n";
1366 ss
<< "0; i < "<< nCurWindowSize
<<"; i++)\n";
1369 if(!pCurDVR0
->IsStartFixed() && !pCurDVR0
->IsEndFixed())
1371 ss
<< " doubleIndex =i+gid0;\n";
1374 ss
<< " doubleIndex =i;\n";
1377 CheckSubArgumentIsNan(ss
,vSubArguments
, loopIndex
);
1379 ss
<< " if ( isequal( tmp";
1380 ss
<< loopIndex
<<" , tmp"<<paraOneWidth
<<") ) \n";
1382 if(vSubArguments
.size() == paraThreeIndex
)
1383 ss
<< " tmp += tmp"<<loopIndex
<<";\n";
1386 CheckSubArgumentIsNan(ss
,vSubArguments
,
1387 paraThreeIndex
+loopIndex
);
1388 ss
<< " tmp += tmp";
1389 ss
<< paraThreeIndex
+loopIndex
<<";\n";
1391 ss
<< " count+=1.0;\n";
1398 CheckSubArgumentIsNan(ss
,vSubArguments
, 0);
1399 ss
<< " if ( isequal( tmp0 , tmp1 ) ) \n";
1401 if(vSubArguments
.size() == 2)
1402 ss
<< " tmp += tmp0;\n";
1405 CheckSubArgumentIsNan(ss
,vSubArguments
,2);
1406 ss
<< " tmp += tmp2;\n";
1408 ss
<< " count+=1.0;\n";
1412 ss
<< " if(count!=0)\n";
1413 ss
<< " tmp=tmp/count;\n";
1415 ss
<< " tmp= 0 ;\n";
1416 ss
<< " return tmp;\n";
1420 void OpSeriesSum::GenSlidingWindowFunction(outputstream
&ss
,
1421 const std::string
&sSymName
, SubArguments
&vSubArguments
)
1423 CHECK_PARAMETER_COUNT(4,4);
1424 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
1426 ss
<< " int gid0=get_global_id(0);\n";
1427 ss
<< " double var[3], coeff, res = 0.0f;\n";
1428 GenerateArg( "var0", 0, vSubArguments
, ss
);
1429 GenerateArg( "var1", 1, vSubArguments
, ss
);
1430 GenerateArg( "var2", 2, vSubArguments
, ss
);
1431 ss
<< " if( var0 == 0 && var1 == 0 )\n";
1432 ss
<< " return CreateDoubleError(NoValue);\n"; // pow(0,0)
1433 ss
<< " var[0] = var0;\n";
1434 ss
<< " var[1] = var1;\n";
1435 ss
<< " var[2] = var2;\n";
1436 ss
<< " int j = 0;\n";
1437 GenerateRangeArg( 3, vSubArguments
, ss
, SkipEmpty
,
1438 " double coeff = arg;\n"
1439 " res = res + coeff * pow(var[0], var[1] + j * var[2]);\n"
1442 ss
<< " return res;\n";
1446 void SumOfProduct::GenSlidingWindowFunction( outputstream
& ss
,
1447 const std::string
& sSymName
, SubArguments
& vSubArguments
)
1449 size_t nCurWindowSize
= 0;
1450 FormulaToken
* tmpCur
= nullptr;
1451 const formula::DoubleVectorRefToken
* pCurDVR
= nullptr;
1452 GenerateFunctionDeclaration( sSymName
, vSubArguments
, ss
);
1454 for (size_t i
= 0; i
< vSubArguments
.size(); i
++)
1456 size_t nCurChildWindowSize
= vSubArguments
[i
]->GetWindowSize();
1457 nCurWindowSize
= (nCurWindowSize
< nCurChildWindowSize
) ?
1458 nCurChildWindowSize
: nCurWindowSize
;
1459 tmpCur
= vSubArguments
[i
]->GetFormulaToken();
1460 if (ocPush
== tmpCur
->GetOpCode())
1462 pCurDVR
= static_cast<const formula::DoubleVectorRefToken
*>(tmpCur
);
1463 if (pCurDVR
->IsStartFixed() != pCurDVR
->IsEndFixed())
1464 throw Unhandled(__FILE__
, __LINE__
);
1467 ss
<< " double tmp = 0.0;\n";
1468 ss
<< " int gid0 = get_global_id(0);\n";
1470 ss
<< "\tint i;\n\t";
1471 ss
<< "int currentCount0;\n";
1472 for (size_t i
= 0; i
< vSubArguments
.size() - 1; i
++)
1473 ss
<< "int currentCount" << i
+ 1 << ";\n";
1474 outputstream temp3
, temp4
;
1475 int outLoopSize
= UNROLLING_FACTOR
;
1476 if (nCurWindowSize
/ outLoopSize
!= 0)
1478 ss
<< "for(int outLoop=0; outLoop<" <<
1479 nCurWindowSize
/ outLoopSize
<< "; outLoop++){\n\t";
1480 for (int count
= 0; count
< outLoopSize
; count
++)
1482 ss
<< "i = outLoop*" << outLoopSize
<< "+" << count
<< ";\n";
1485 for (size_t i
= 0; i
< vSubArguments
.size(); i
++)
1487 tmpCur
= vSubArguments
[i
]->GetFormulaToken();
1488 if (ocPush
== tmpCur
->GetOpCode())
1490 pCurDVR
= static_cast<const formula::DoubleVectorRefToken
*>(tmpCur
);
1491 if (!pCurDVR
->IsStartFixed() && !pCurDVR
->IsEndFixed())
1493 temp3
<< " currentCount";
1495 temp3
<< " =i+gid0+1;\n";
1499 temp3
<< " currentCount";
1501 temp3
<< " =i+1;\n";
1506 temp3
<< "tmp = fsum(";
1507 for (size_t i
= 0; i
< vSubArguments
.size(); i
++)
1511 if (ocPush
== vSubArguments
[i
]->GetFormulaToken()->GetOpCode())
1514 temp3
<< "(currentCount";
1517 if (vSubArguments
[i
]->GetFormulaToken()->GetType() ==
1518 formula::svSingleVectorRef
)
1520 const formula::SingleVectorRefToken
* pSVR
=
1521 static_cast<const formula::SingleVectorRefToken
*>
1522 (vSubArguments
[i
]->GetFormulaToken());
1523 temp3
<< pSVR
->GetArrayLength();
1524 temp3
<< ")||isnan(" << vSubArguments
[i
]
1525 ->GenSlidingWindowDeclRef();
1527 temp3
<< vSubArguments
[i
]->GenSlidingWindowDeclRef();
1530 else if (vSubArguments
[i
]->GetFormulaToken()->GetType() ==
1531 formula::svDoubleVectorRef
)
1533 const formula::DoubleVectorRefToken
* pSVR
=
1534 static_cast<const formula::DoubleVectorRefToken
*>
1535 (vSubArguments
[i
]->GetFormulaToken());
1536 temp3
<< pSVR
->GetArrayLength();
1537 temp3
<< ")||isnan(" << vSubArguments
[i
]
1538 ->GenSlidingWindowDeclRef(true);
1540 temp3
<< vSubArguments
[i
]->GenSlidingWindowDeclRef(true);
1546 temp3
<< vSubArguments
[i
]->GenSlidingWindowDeclRef(true);
1548 temp3
<< ", tmp);\n\t";
1554 //The residual of mod outLoopSize
1555 for (size_t count
= nCurWindowSize
/ outLoopSize
* outLoopSize
;
1556 count
< nCurWindowSize
; count
++)
1558 ss
<< "i =" << count
<< ";\n";
1559 if (count
== nCurWindowSize
/ outLoopSize
* outLoopSize
)
1561 for (size_t i
= 0; i
< vSubArguments
.size(); i
++)
1563 tmpCur
= vSubArguments
[i
]->GetFormulaToken();
1564 if (ocPush
== tmpCur
->GetOpCode())
1566 pCurDVR
= static_cast<const formula::DoubleVectorRefToken
*>(tmpCur
);
1567 if (!pCurDVR
->IsStartFixed() && !pCurDVR
->IsEndFixed())
1569 temp4
<< " currentCount";
1571 temp4
<< " =i+gid0+1;\n";
1575 temp4
<< " currentCount";
1577 temp4
<< " =i+1;\n";
1582 temp4
<< "tmp = fsum(";
1583 for (size_t i
= 0; i
< vSubArguments
.size(); i
++)
1587 if (ocPush
== vSubArguments
[i
]->GetFormulaToken()->GetOpCode())
1590 temp4
<< "(currentCount";
1593 if (vSubArguments
[i
]->GetFormulaToken()->GetType() ==
1594 formula::svSingleVectorRef
)
1596 const formula::SingleVectorRefToken
* pSVR
=
1597 static_cast<const formula::SingleVectorRefToken
*>
1598 (vSubArguments
[i
]->GetFormulaToken());
1599 temp4
<< pSVR
->GetArrayLength();
1600 temp4
<< ")||isnan(" << vSubArguments
[i
]
1601 ->GenSlidingWindowDeclRef();
1603 temp4
<< vSubArguments
[i
]->GenSlidingWindowDeclRef();
1606 else if (vSubArguments
[i
]->GetFormulaToken()->GetType() ==
1607 formula::svDoubleVectorRef
)
1609 const formula::DoubleVectorRefToken
* pSVR
=
1610 static_cast<const formula::DoubleVectorRefToken
*>
1611 (vSubArguments
[i
]->GetFormulaToken());
1612 temp4
<< pSVR
->GetArrayLength();
1613 temp4
<< ")||isnan(" << vSubArguments
[i
]
1614 ->GenSlidingWindowDeclRef(true);
1616 temp4
<< vSubArguments
[i
]->GenSlidingWindowDeclRef(true);
1623 temp4
<< vSubArguments
[i
]
1624 ->GenSlidingWindowDeclRef(true);
1627 temp4
<< ", tmp);\n\t";
1631 ss
<< "return tmp;\n";
1635 void OpSum::BinInlineFun(std::set
<std::string
>& decls
,std::set
<std::string
>& funs
)
1637 decls
.insert(is_representable_integerDecl
);
1638 funs
.insert(is_representable_integer
);
1639 decls
.insert(approx_equalDecl
);
1640 funs
.insert(approx_equal
);
1641 decls
.insert(fsum_approxDecl
);
1642 funs
.insert(fsum_approx
);
1645 void OpSub::BinInlineFun(std::set
<std::string
>& decls
,std::set
<std::string
>& funs
)
1647 decls
.insert(is_representable_integerDecl
);
1648 funs
.insert(is_representable_integer
);
1649 decls
.insert(approx_equalDecl
);
1650 funs
.insert(approx_equal
);
1651 decls
.insert(fsub_approxDecl
);
1652 funs
.insert(fsub_approx
);
1657 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */