9 #include "overlayframe.h"
12 OverlayFrame::OverlayFrame(int cpus)
17 scaletranslate_engine = 0;
22 OverlayFrame::~OverlayFrame()
24 //printf("OverlayFrame::~OverlayFrame 1\n");
25 if(temp_frame) delete temp_frame;
26 if(scale_engine) delete scale_engine;
27 if(translate_engine) delete translate_engine;
28 if(blend_engine) delete blend_engine;
29 if(scaletranslate_engine) delete scaletranslate_engine;
30 //printf("OverlayFrame::~OverlayFrame 2\n");
42 // (255 * 255 + 0 * 0) / 255 = 255
43 // (255 * 127 + 255 * (255 - 127)) / 255 = 255
45 // (65535 * 65535 + 0 * 0) / 65535 = 65535
46 // (65535 * 32767 + 65535 * (65535 - 32767)) / 65535 = 65535
49 // Branch prediction 4 U
51 #define BLEND_3(max, type) \
55 /* if(mode != TRANSFER_NORMAL) printf("BLEND mode = %d\n", mode); */ \
58 case TRANSFER_DIVIDE: \
59 r = output[0] ? (((int64_t)input1 * max) / output[0]) : max; \
60 g = output[1] ? (((int64_t)input2 * max) / output[1]) : max; \
61 b = output[2] ? (((int64_t)input3 * max) / output[2]) : max; \
62 r = (r * opacity + output[0] * transparency) / max; \
63 g = (g * opacity + output[1] * transparency) / max; \
64 b = (b * opacity + output[2] * transparency) / max; \
66 case TRANSFER_MULTIPLY: \
67 r = ((int64_t)input1 * output[0]) / max; \
68 g = ((int64_t)input2 * output[1]) / max; \
69 b = ((int64_t)input3 * output[2]) / max; \
70 r = (r * opacity + output[0] * transparency) / max; \
71 g = (g * opacity + output[1] * transparency) / max; \
72 b = (b * opacity + output[2] * transparency) / max; \
74 case TRANSFER_SUBTRACT: \
75 r = (((int64_t)input1 - output[0]) * opacity + output[0] * transparency) / max; \
76 g = (((int64_t)input2 - output[1]) * opacity + output[1] * transparency) / max; \
77 b = (((int64_t)input3 - output[2]) * opacity + output[2] * transparency) / max; \
79 case TRANSFER_ADDITION: \
80 r = (((int64_t)input1 + output[0]) * opacity + output[0] * transparency) / max; \
81 g = (((int64_t)input2 + output[1]) * opacity + output[1] * transparency) / max; \
82 b = (((int64_t)input3 + output[2]) * opacity + output[2] * transparency) / max; \
84 case TRANSFER_REPLACE: \
89 case TRANSFER_NORMAL: \
90 r = ((int64_t)input1 * opacity + output[0] * transparency) / max; \
91 g = ((int64_t)input2 * opacity + output[1] * transparency) / max; \
92 b = ((int64_t)input3 * opacity + output[2] * transparency) / max; \
96 output[0] = (type)CLIP(r, 0, max); \
97 output[1] = (type)CLIP(g, 0, max); \
98 output[2] = (type)CLIP(b, 0, max); \
105 // Blending equations are drastically different for 3 and 4 components
106 #define BLEND_4(max, type) \
108 int64_t r, g, b, a; \
109 int64_t pixel_opacity, pixel_transparency; \
111 pixel_opacity = opacity * input4 / max; \
112 pixel_transparency = (max - pixel_opacity) * output[3] / max; \
116 case TRANSFER_DIVIDE: \
117 r = output[0] ? (((int64_t)input1 * max) / output[0]) : max; \
118 g = output[1] ? (((int64_t)input2 * max) / output[1]) : max; \
119 b = output[2] ? (((int64_t)input3 * max) / output[2]) : max; \
120 r = (r * pixel_opacity + output[0] * pixel_transparency) / max; \
121 g = (g * pixel_opacity + output[1] * pixel_transparency) / max; \
122 b = (b * pixel_opacity + output[2] * pixel_transparency) / max; \
123 a = input4 > output[3] ? input4 : output[3]; \
125 case TRANSFER_MULTIPLY: \
126 r = ((int64_t)input1 * output[0]) / max; \
127 g = ((int64_t)input2 * output[1]) / max; \
128 b = ((int64_t)input3 * output[2]) / max; \
129 r = (r * pixel_opacity + output[0] * pixel_transparency) / max; \
130 g = (g * pixel_opacity + output[1] * pixel_transparency) / max; \
131 b = (b * pixel_opacity + output[2] * pixel_transparency) / max; \
132 a = input4 > output[3] ? input4 : output[3]; \
134 case TRANSFER_SUBTRACT: \
135 r = (((int64_t)input1 - output[0]) * pixel_opacity + output[0] * pixel_transparency) / max; \
136 g = (((int64_t)input2 - output[1]) * pixel_opacity + output[1] * pixel_transparency) / max; \
137 b = (((int64_t)input3 - output[2]) * pixel_opacity + output[2] * pixel_transparency) / max; \
138 a = input4 > output[3] ? input4 : output[3]; \
140 case TRANSFER_ADDITION: \
141 r = (((int64_t)input1 + output[0]) * pixel_opacity + output[0] * pixel_transparency) / max; \
142 g = (((int64_t)input2 + output[1]) * pixel_opacity + output[1] * pixel_transparency) / max; \
143 b = (((int64_t)input3 + output[2]) * pixel_opacity + output[2] * pixel_transparency) / max; \
144 a = input4 > output[3] ? input4 : output[3]; \
146 case TRANSFER_REPLACE: \
152 case TRANSFER_NORMAL: \
153 r = ((int64_t)input1 * pixel_opacity + output[0] * pixel_transparency) / max; \
154 g = ((int64_t)input2 * pixel_opacity + output[1] * pixel_transparency) / max; \
155 b = ((int64_t)input3 * pixel_opacity + output[2] * pixel_transparency) / max; \
156 a = input4 > output[3] ? input4 : output[3]; \
160 output[0] = (type)CLIP(r, 0, max); \
161 output[1] = (type)CLIP(g, 0, max); \
162 output[2] = (type)CLIP(b, 0, max); \
163 output[3] = (type)a; \
173 // Bicubic algorithm using multiprocessors
174 // input -> scale nearest integer boundaries -> temp -> translation -> blend -> output
176 // Nearest neighbor algorithm using multiprocessors for blending
177 // input -> scale + translate -> blend -> output
180 int OverlayFrame::overlay(VFrame *output,
190 float alpha, // 0 - 1
192 int interpolation_type)
194 float w_scale = (out_x2 - out_x1) / (in_x2 - in_x1);
195 float h_scale = (out_y2 - out_y1) / (in_y2 - in_y1);
197 //printf("OverlayFrame::overlay 1 %d %f\n", mode, alpha);
201 out_x1 += -in_x1 * w_scale;
205 if(in_x1 >= input->get_w())
207 out_x1 -= (in_x1 - input->get_w()) * w_scale;
208 in_x1 = input->get_w();
213 out_y1 += -in_y1 * h_scale;
217 if(in_y1 >= input->get_h())
219 out_y1 -= (in_y1 - input->get_h()) * h_scale;
220 in_y1 = input->get_h();
225 out_x2 += -in_x2 * w_scale;
229 if(in_x2 >= input->get_w())
231 out_x2 -= (in_x2 - input->get_w()) * w_scale;
232 in_x2 = input->get_w();
237 out_y2 += -in_y2 * h_scale;
241 if(in_y2 >= input->get_h())
243 out_y2 -= (in_y2 - input->get_h()) * h_scale;
244 in_y2 = input->get_h();
249 in_x1 += -out_x1 / w_scale;
253 if(out_x1 >= output->get_w())
255 in_x1 -= (out_x1 - output->get_w()) / w_scale;
256 out_x1 = output->get_w();
261 in_y1 += -out_y1 / h_scale;
265 if(out_y1 >= output->get_h())
267 in_y1 -= (out_y1 - output->get_h()) / h_scale;
268 out_y1 = output->get_h();
273 in_x2 += -out_x2 / w_scale;
277 if(out_x2 >= output->get_w())
279 in_x2 -= (out_x2 - output->get_w()) / w_scale;
280 out_x2 = output->get_w();
285 in_y2 += -out_y2 / h_scale;
289 if(out_y2 >= output->get_h())
291 in_y2 -= (out_y2 - output->get_h()) / h_scale;
292 out_y2 = output->get_h();
299 float in_w = in_x2 - in_x1;
300 float in_h = in_y2 - in_y1;
301 float out_w = out_x2 - out_x1;
302 float out_h = out_y2 - out_y1;
303 // Input for translation operation
304 VFrame *translation_input = input;
308 // printf("OverlayFrame::overlay %f %f %f %f -> %f %f %f %f\n", in_x1,
321 // ****************************************************************************
322 // Transfer to temp buffer by scaling nearest integer boundaries
323 // ****************************************************************************
324 if(interpolation_type != NEAREST_NEIGHBOR &&
325 (!EQUIV(w_scale, 1) || !EQUIV(h_scale, 1)))
327 // Create integer boundaries for interpolation
328 int in_x1_int = (int)in_x1;
329 int in_y1_int = (int)in_y1;
330 int in_x2_int = MIN((int)ceil(in_x2), input->get_w());
331 int in_y2_int = MIN((int)ceil(in_y2), input->get_h());
333 // Dimensions of temp frame. Integer boundaries scaled.
334 int temp_w = (int)ceil(w_scale * (in_x2_int - in_x1_int));
335 int temp_h = (int)ceil(h_scale * (in_y2_int - in_y1_int));
336 VFrame *scale_output;
340 #define NO_TRANSLATION1 \
341 (EQUIV(in_x1, 0) && \
343 EQUIV(out_x1, 0) && \
344 EQUIV(out_y1, 0) && \
345 EQUIV(in_x2, in_x2_int) && \
346 EQUIV(in_y2, in_y2_int) && \
347 EQUIV(out_x2, temp_w) && \
348 EQUIV(out_y2, temp_h))
352 (EQUIV(alpha, 1) && \
353 (mode == TRANSFER_REPLACE || \
354 (mode == TRANSFER_NORMAL && cmodel_components(input->get_color_model()) == 3)))
360 // Prepare destination for operation
362 // No translation and no blending. The blending operation is built into the
363 // translation unit but not the scaling unit.
365 if(NO_TRANSLATION1 &&
368 // printf("OverlayFrame::overlay input -> output\n");
370 scale_output = output;
371 translation_input = 0;
374 // If translation or blending
375 // input -> nearest integer boundary temp
378 (temp_frame->get_w() != temp_w ||
379 temp_frame->get_h() != temp_h))
387 temp_frame = new VFrame(0,
390 input->get_color_model(),
393 //printf("OverlayFrame::overlay input -> temp\n");
396 temp_frame->clear_frame();
398 // printf("OverlayFrame::overlay 4 temp_w=%d temp_h=%d\n",
400 scale_output = temp_frame;
401 translation_input = scale_output;
403 // Adjust input coordinates to reflect new scaled coordinates.
404 in_x1 = (in_x1 - in_x1_int) * w_scale;
405 in_y1 = (in_y1 - in_y1_int) * h_scale;
406 in_x2 = (in_x2 - in_x1_int) * w_scale;
407 in_y2 = (in_y2 - in_y1_int) * h_scale;
413 // Scale input -> scale_output
414 this->scale_output = scale_output;
415 this->scale_input = input;
416 this->w_scale = w_scale;
417 this->h_scale = h_scale;
418 this->in_x1_int = in_x1_int;
419 this->in_y1_int = in_y1_int;
420 this->out_w_int = temp_w;
421 this->out_h_int = temp_h;
422 this->interpolation_type = interpolation_type;
424 //printf("OverlayFrame::overlay ScaleEngine 1 %d\n", out_h_int);
425 if(!scale_engine) scale_engine = new ScaleEngine(this, cpus);
426 scale_engine->process_packages();
427 //printf("OverlayFrame::overlay ScaleEngine 2\n");
433 // printf("OverlayFrame::overlay 1 %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
447 #define NO_TRANSLATION2 \
448 (EQUIV(in_x1, 0) && \
450 EQUIV(in_x2, translation_input->get_w()) && \
451 EQUIV(in_y2, translation_input->get_h()) && \
452 EQUIV(out_x1, 0) && \
453 EQUIV(out_y1, 0) && \
454 EQUIV(out_x2, output->get_w()) && \
455 EQUIV(out_y2, output->get_h())) \
458 (EQUIV(out_x2 - out_x1, in_x2 - in_x1) && \
459 EQUIV(out_y2 - out_y1, in_y2 - in_y1))
464 //printf("OverlayFrame::overlay 4 %d\n", mode);
469 if(translation_input)
472 if( NO_TRANSLATION2 &&
476 //printf("OverlayFrame::overlay direct copy\n");
477 output->copy_from(translation_input);
481 if( NO_TRANSLATION2 &&
484 if(!blend_engine) blend_engine = new BlendEngine(this, cpus);
487 blend_engine->output = output;
488 blend_engine->input = translation_input;
489 blend_engine->alpha = alpha;
490 blend_engine->mode = mode;
492 blend_engine->process_packages();
495 // Scale and translate using nearest neighbor
496 // Translation is exactly on integer boundaries
497 if(interpolation_type == NEAREST_NEIGHBOR ||
498 EQUIV(in_x1, (int)in_x1) &&
499 EQUIV(in_y1, (int)in_y1) &&
500 EQUIV(in_x2, (int)in_x2) &&
501 EQUIV(in_y2, (int)in_y2) &&
503 EQUIV(out_x1, (int)out_x1) &&
504 EQUIV(out_y1, (int)out_y1) &&
505 EQUIV(out_x2, (int)out_x2) &&
506 EQUIV(out_y2, (int)out_y2))
508 //printf("OverlayFrame::overlay NEAREST_NEIGHBOR 1\n");
509 if(!scaletranslate_engine) scaletranslate_engine = new ScaleTranslateEngine(this, cpus);
512 scaletranslate_engine->output = output;
513 scaletranslate_engine->input = translation_input;
514 scaletranslate_engine->in_x1 = (int)in_x1;
515 scaletranslate_engine->in_y1 = (int)in_y1;
516 scaletranslate_engine->in_x2 = (int)in_x2;
517 scaletranslate_engine->in_y2 = (int)in_y2;
518 scaletranslate_engine->out_x1 = (int)out_x1;
519 scaletranslate_engine->out_y1 = (int)out_y1;
520 scaletranslate_engine->out_x2 = (int)out_x2;
521 scaletranslate_engine->out_y2 = (int)out_y2;
522 scaletranslate_engine->alpha = alpha;
523 scaletranslate_engine->mode = mode;
525 scaletranslate_engine->process_packages();
528 // Fractional translation
530 // Use fractional translation
531 // printf("OverlayFrame::overlay temp -> output %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
540 this->translate_output = output;
541 this->translate_input = translation_input;
542 this->translate_in_x1 = in_x1;
543 this->translate_in_y1 = in_y1;
544 this->translate_in_x2 = in_x2;
545 this->translate_in_y2 = in_y2;
546 this->translate_out_x1 = out_x1;
547 this->translate_out_y1 = out_y1;
548 this->translate_out_x2 = out_x2;
549 this->translate_out_y2 = out_y2;
550 this->translate_alpha = alpha;
551 this->translate_mode = mode;
553 //printf("OverlayFrame::overlay 5 %d\n", mode);
554 if(!translate_engine) translate_engine = new TranslateEngine(this, cpus);
555 translate_engine->process_packages();
559 //printf("OverlayFrame::overlay 2\n");
570 ScalePackage::ScalePackage()
577 ScaleUnit::ScaleUnit(ScaleEngine *server, OverlayFrame *overlay)
580 this->overlay = overlay;
583 ScaleUnit::~ScaleUnit()
589 #define BILINEAR(max, type, components) \
591 float k_y = 1.0 / scale_h; \
592 float k_x = 1.0 / scale_w; \
593 type **in_rows = (type**)input->get_rows(); \
594 type **out_rows = (type**)output->get_rows(); \
595 type zero_r, zero_g, zero_b, zero_a; \
596 int in_h_int = input->get_h(); \
597 int in_w_int = input->get_w(); \
598 int *table_int_x, *table_int_y; \
599 int *table_frac_x, *table_frac_y; \
602 zero_g = ((max + 1) >> 1) * (do_yuv); \
603 zero_b = ((max + 1) >> 1) * (do_yuv); \
604 if(components == 4) zero_a = 0; \
606 tabulate_blinear(table_int_x, table_frac_x, k_x, 0, out_w_int); \
607 tabulate_blinear(table_int_y, table_frac_y, k_y, pkg->out_row1, pkg->out_row2); \
609 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
611 int i_y = table_int_y[i - pkg->out_row1]; \
612 uint64_t a = table_frac_y[i - pkg->out_row1]; \
613 uint64_t anti_a = 0xffff - a; \
614 type *in_row1 = in_rows[i_y + in_y1_int]; \
615 type *in_row2 = (i_y + in_y1_int < in_h_int - 1) ? \
616 in_rows[i_y + in_y1_int + 1] : \
618 type *out_row = out_rows[i]; \
620 for(int j = 0; j < out_w_int; j++) \
622 int i_x = table_int_x[j]; \
623 uint64_t b = table_frac_x[j]; \
624 uint64_t anti_b = 0xffff - b; \
625 int x = i_x + in_x1_int; \
626 uint64_t output1r, output1g, output1b, output1a; \
627 uint64_t output2r, output2g, output2b, output2a; \
628 uint64_t output3r, output3g, output3b, output3a; \
629 uint64_t output4r, output4g, output4b, output4a; \
631 output1r = in_row1[x * components]; \
632 output1g = in_row1[x * components + 1]; \
633 output1b = in_row1[x * components + 2]; \
634 if(components == 4) output1a = in_row1[x * components + 3]; \
636 if(x < in_w_int - 1) \
638 output2r = in_row1[x * components + components]; \
639 output2g = in_row1[x * components + components + 1]; \
640 output2b = in_row1[x * components + components + 2]; \
641 if(components == 4) output2a = in_row1[x * components + components + 3]; \
645 output4r = in_row2[x * components + components]; \
646 output4g = in_row2[x * components + components + 1]; \
647 output4b = in_row2[x * components + components + 2]; \
648 if(components == 4) output4a = in_row2[x * components + components + 3]; \
655 if(components == 4) output4a = zero_a; \
663 if(components == 4) output2a = zero_a; \
667 if(components == 4) output4a = zero_a; \
672 output3r = in_row2[x * components]; \
673 output3g = in_row2[x * components + 1]; \
674 output3b = in_row2[x * components + 2]; \
675 if(components == 4) output3a = in_row2[x * components + 3]; \
682 if(components == 4) output3a = zero_a; \
685 out_row[j * components] = \
686 (type)(((anti_a) * (((anti_b) * output1r) + \
688 a * (((anti_b) * output3r) + \
689 (b * output4r))) / 0xffffffff); \
690 out_row[j * components + 1] = \
691 (type)(((anti_a) * (((anti_b) * output1g) + \
693 a * (((anti_b) * output3g) + \
694 (b * output4g))) / 0xffffffff); \
695 out_row[j * components + 2] = \
696 (type)(((anti_a) * (((anti_b) * output1b) + \
698 a * (((anti_b) * output3b) + \
699 (b * output4b))) / 0xffffffff); \
700 if(components == 4) \
701 out_row[j * components + 3] = \
702 (type)(((anti_a) * (((anti_b) * output1a) + \
704 a * (((anti_b) * output3a) + \
705 (b * output4a))) / 0xffffffff); \
710 delete [] table_int_x; \
711 delete [] table_frac_x; \
712 delete [] table_int_y; \
713 delete [] table_frac_y; \
718 #define BICUBIC(max, type, components) \
720 float k_y = 1.0 / scale_h; \
721 float k_x = 1.0 / scale_w; \
722 type **in_rows = (type**)input->get_rows(); \
723 type **out_rows = (type**)output->get_rows(); \
724 int *bspline_x, *bspline_y; \
725 int in_h_int = input->get_h(); \
726 int in_w_int = input->get_w(); \
727 type zero_r, zero_g, zero_b, zero_a; \
730 zero_b = ((max + 1) >> 1) * (do_yuv); \
731 zero_g = ((max + 1) >> 1) * (do_yuv); \
732 if(components == 4) \
735 tabulate_bspline(bspline_x, \
740 tabulate_bspline(bspline_y, \
745 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
747 int i_y = (int)(k_y * i); \
750 for(int j = 0; j < out_w_int; j++) \
752 int i_x = (int)(k_x * j); \
753 uint64_t output1, output2, output3, output4; \
757 if(components == 4) \
759 int table_y = i * 4; \
762 for(int m = -1; m < 3; m++) \
764 uint64_t r1 = bspline_y[table_y++]; \
765 int y = in_y1_int + i_y + m; \
766 int table_x = j * 4; \
768 for(int n = -1; n < 3; n++) \
770 uint64_t r2 = bspline_x[table_x++]; \
771 int x = in_x1_int + i_x + n; \
772 uint64_t r_square = r1 * r2; \
774 /* Inside boundary. */ \
780 output1 += r_square * in_rows[y][x * components]; \
781 output2 += r_square * in_rows[y][x * components + 1]; \
782 output3 += r_square * in_rows[y][x * components + 2]; \
783 if(components == 4) \
784 output4 += r_square * in_rows[y][x * components + 3]; \
788 output1 += r_square * zero_r; \
789 output2 += r_square * zero_g; \
790 output3 += r_square * zero_b; \
791 if(components == 4) \
792 output4 += r_square * zero_a; \
798 out_rows[i][j * components] = (type)(output1 / 0xffffffff); \
799 out_rows[i][j * components + 1] = (type)(output2 / 0xffffffff); \
800 out_rows[i][j * components + 2] = (type)(output3 / 0xffffffff); \
801 if(components == 4) \
802 out_rows[i][j * components + 3] = (type)(output4 / 0xffffffff); \
807 delete [] bspline_x; \
808 delete [] bspline_y; \
814 // Pow function is not thread safe in Compaqt C
815 #define CUBE(x) ((x) * (x) * (x))
817 int ScaleUnit::cubic_bspline(float x)
821 if((x + 2.0F) <= 0.0F)
831 if((x + 1.0F) <= 0.0F)
849 if((x - 1.0F) <= 0.0F)
859 return (int)((a - (4.0F * b) + (6.0F * c) - (4.0F * d)) / 6.0 * 0x10000);
863 void ScaleUnit::tabulate_bspline(int* &table,
868 table = new int[pixels * 4];
869 for(int i = 0, j = 0; i < pixels; i++)
871 float f_x = (float)i * scale;
872 float a = f_x - floor(f_x);
874 for(float m = -1; m < 3; m++)
876 table[j++] = cubic_bspline(coefficient * (m - a));
882 void ScaleUnit::tabulate_blinear(int* &table_int,
888 table_int = new int[pixel2 - pixel1];
889 table_frac = new int[pixel2 - pixel1];
891 for(int i = pixel1, j = 0; i < pixel2; i++, j++)
893 float f_x = (float)i * scale;
894 int i_x = (int)floor(f_x);
895 int a = (int)((f_x - floor(f_x)) * 0xffff);
902 void ScaleUnit::process_package(LoadPackage *package)
904 ScalePackage *pkg = (ScalePackage*)package;
906 // Arguments for macros
907 VFrame *output = overlay->scale_output;
908 VFrame *input = overlay->scale_input;
909 float scale_w = overlay->w_scale;
910 float scale_h = overlay->h_scale;
911 int in_x1_int = overlay->in_x1_int;
912 int in_y1_int = overlay->in_y1_int;
913 int out_h_int = overlay->out_h_int;
914 int out_w_int = overlay->out_w_int;
916 (overlay->scale_input->get_color_model() == BC_YUV888 ||
917 overlay->scale_input->get_color_model() == BC_YUVA8888 ||
918 overlay->scale_input->get_color_model() == BC_YUV161616 ||
919 overlay->scale_input->get_color_model() == BC_YUVA16161616);
921 if(overlay->interpolation_type == CUBIC_CUBIC ||
922 (overlay->interpolation_type == CUBIC_LINEAR
923 && overlay->w_scale > 1 &&
924 overlay->h_scale > 1))
927 switch(overlay->scale_input->get_color_model())
931 BICUBIC(0xff, unsigned char, 3);
936 BICUBIC(0xff, unsigned char, 4);
941 BICUBIC(0xffff, uint16_t, 3);
944 case BC_RGBA16161616:
945 case BC_YUVA16161616:
946 BICUBIC(0xffff, uint16_t, 4);
951 // Perform bilinear scaling input -> scale_output
953 switch(overlay->scale_input->get_color_model())
957 BILINEAR(0xff, unsigned char, 3);
962 BILINEAR(0xff, unsigned char, 4);
967 BILINEAR(0xffff, uint16_t, 3);
970 case BC_RGBA16161616:
971 case BC_YUVA16161616:
972 BILINEAR(0xffff, uint16_t, 4);
991 ScaleEngine::ScaleEngine(OverlayFrame *overlay, int cpus)
992 : LoadServer(cpus, cpus)
994 this->overlay = overlay;
997 ScaleEngine::~ScaleEngine()
1001 void ScaleEngine::init_packages()
1003 for(int i = 0; i < total_packages; i++)
1005 ScalePackage *package = (ScalePackage*)packages[i];
1006 package->out_row1 = overlay->out_h_int / total_packages * i;
1007 package->out_row2 = package->out_row1 + overlay->out_h_int / total_packages;
1009 if(i >= total_packages - 1)
1010 package->out_row2 = overlay->out_h_int;
1014 LoadClient* ScaleEngine::new_client()
1016 return new ScaleUnit(this, overlay);
1019 LoadPackage* ScaleEngine::new_package()
1021 return new ScalePackage;
1036 TranslatePackage::TranslatePackage()
1042 TranslateUnit::TranslateUnit(TranslateEngine *server, OverlayFrame *overlay)
1043 : LoadClient(server)
1045 this->overlay = overlay;
1048 TranslateUnit::~TranslateUnit()
1054 void TranslateUnit::translation_array(transfer_table* &table,
1065 float offset = out_x1 - in_x1;
1067 out_x1_int = (int)out_x1;
1068 out_x2_int = MIN((int)ceil(out_x2), out_total);
1069 out_w_int = out_x2_int - out_x1_int;
1071 table = new transfer_table[out_w_int];
1072 bzero(table, sizeof(transfer_table) * out_w_int);
1075 //printf("OverlayFrame::translation_array 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1078 for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1080 transfer_table *entry = &table[out_x - out_x1_int];
1082 entry->in_x1 = (int)in_x;
1083 entry->in_x2 = (int)in_x + 1;
1085 // Get fraction of output pixel to fill
1086 entry->output_fraction = 1;
1090 entry->output_fraction -= out_x1 - out_x;
1093 if(out_x2 < out_x + 1)
1095 entry->output_fraction = (out_x2 - out_x);
1098 // Advance in_x until out_x_fraction is filled
1099 float out_x_fraction = entry->output_fraction;
1100 float in_x_fraction = floor(in_x + 1) - in_x;
1102 if(out_x_fraction <= in_x_fraction)
1104 entry->in_fraction1 = out_x_fraction;
1105 entry->in_fraction2 = 0.0;
1106 in_x += out_x_fraction;
1110 entry->in_fraction1 = in_x_fraction;
1111 in_x += out_x_fraction;
1112 entry->in_fraction2 = in_x - floor(in_x);
1116 if(entry->in_x2 >= in_total)
1118 entry->in_x2 = in_total - 1;
1119 entry->in_fraction2 = 0.0;
1122 if(entry->in_x1 >= in_total)
1124 entry->in_x1 = in_total - 1;
1125 entry->in_fraction1 = 0.0;
1127 // printf("OverlayFrame::translation_array 2 %d %d %d %f %f %f\n",
1131 // entry->in_fraction1,
1132 // entry->in_fraction2,
1133 // entry->output_fraction);
1170 #define TRANSLATE(max, type, components) \
1173 type **in_rows = (type**)input->get_rows(); \
1174 type **out_rows = (type**)output->get_rows(); \
1176 /* printf("OverlayFrame::translate 1 %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n", */ \
1177 /* (in_x1), in_y1, in_x2, in_y2, out_x1, out_y1, out_x2, out_y2); */ \
1179 unsigned int master_opacity = (int)(alpha * max + 0.5); \
1180 unsigned int master_transparency = max - master_opacity; \
1182 /* printf("TRANSLATE %d\n", mode); */ \
1184 for(int i = row1; i < row2; i++) \
1186 int in_y1 = y_table[i - out_y1_int].in_x1; \
1187 int in_y2 = y_table[i - out_y1_int].in_x2; \
1188 float y_fraction1 = y_table[i - out_y1_int].in_fraction1; \
1189 float y_fraction2 = y_table[i - out_y1_int].in_fraction2; \
1190 float y_output_fraction = y_table[i - out_y1_int].output_fraction; \
1191 type *in_row1 = in_rows[(in_y1)]; \
1192 type *in_row2 = in_rows[(in_y2)]; \
1193 type *out_row = out_rows[i]; \
1195 for(int j = out_x1_int; j < out_x2_int; j++) \
1197 int in_x1 = x_table[j - out_x1_int].in_x1; \
1198 int in_x2 = x_table[j - out_x1_int].in_x2; \
1199 float x_fraction1 = x_table[j - out_x1_int].in_fraction1; \
1200 float x_fraction2 = x_table[j - out_x1_int].in_fraction2; \
1201 float x_output_fraction = x_table[j - out_x1_int].output_fraction; \
1202 type *output = &out_row[j * components]; \
1203 int input1, input2, input3, input4; \
1205 input1 = (int)(in_row1[in_x1 * components] * x_fraction1 * y_fraction1 + \
1206 in_row1[in_x2 * components] * x_fraction2 * y_fraction1 + \
1207 in_row2[in_x1 * components] * x_fraction1 * y_fraction2 + \
1208 in_row2[in_x2 * components] * x_fraction2 * y_fraction2 + 0.5); \
1209 input2 = (int)(in_row1[in_x1 * components + 1] * x_fraction1 * y_fraction1 + \
1210 in_row1[in_x2 * components + 1] * x_fraction2 * y_fraction1 + \
1211 in_row2[in_x1 * components + 1] * x_fraction1 * y_fraction2 + \
1212 in_row2[in_x2 * components + 1] * x_fraction2 * y_fraction2 + 0.5); \
1213 input3 = (int)(in_row1[in_x1 * components + 2] * x_fraction1 * y_fraction1 + \
1214 in_row1[in_x2 * components + 2] * x_fraction2 * y_fraction1 + \
1215 in_row2[in_x1 * components + 2] * x_fraction1 * y_fraction2 + \
1216 in_row2[in_x2 * components + 2] * x_fraction2 * y_fraction2 + 0.5); \
1217 if(components == 4) \
1218 input4 = (int)(in_row1[in_x1 * components + 3] * x_fraction1 * y_fraction1 + \
1219 in_row1[in_x2 * components + 3] * x_fraction2 * y_fraction1 + \
1220 in_row2[in_x1 * components + 3] * x_fraction1 * y_fraction2 + \
1221 in_row2[in_x2 * components + 3] * x_fraction2 * y_fraction2 + 0.5); \
1223 unsigned int opacity = (int)(master_opacity * \
1224 y_output_fraction * \
1225 x_output_fraction + 0.5); \
1226 unsigned int transparency = max - opacity; \
1228 /* if(opacity != max) printf("TRANSLATE %x %d %d\n", opacity, j, i); */ \
1230 if(components == 3) \
1232 BLEND_3(max, type); \
1236 BLEND_4(max, type); \
1242 void TranslateUnit::process_package(LoadPackage *package)
1244 TranslatePackage *pkg = (TranslatePackage*)package;
1251 // Variables for TRANSLATE
1252 VFrame *input = overlay->translate_input;
1253 VFrame *output = overlay->translate_output;
1254 float in_x1 = overlay->translate_in_x1;
1255 float in_y1 = overlay->translate_in_y1;
1256 float in_x2 = overlay->translate_in_x2;
1257 float in_y2 = overlay->translate_in_y2;
1258 float out_x1 = overlay->translate_out_x1;
1259 float out_y1 = overlay->translate_out_y1;
1260 float out_x2 = overlay->translate_out_x2;
1261 float out_y2 = overlay->translate_out_y2;
1262 float alpha = overlay->translate_alpha;
1263 int row1 = pkg->out_row1;
1264 int row2 = pkg->out_row2;
1265 int mode = overlay->translate_mode;
1267 transfer_table *x_table;
1268 transfer_table *y_table;
1270 translation_array(x_table,
1279 translation_array(y_table,
1289 switch(overlay->translate_input->get_color_model())
1293 TRANSLATE(0xff, unsigned char, 3);
1298 TRANSLATE(0xff, unsigned char, 4);
1303 TRANSLATE(0xffff, uint16_t, 3);
1306 case BC_RGBA16161616:
1307 case BC_YUVA16161616:
1308 TRANSLATE(0xffff, uint16_t, 4);
1325 TranslateEngine::TranslateEngine(OverlayFrame *overlay, int cpus)
1326 : LoadServer(cpus, cpus)
1328 this->overlay = overlay;
1331 TranslateEngine::~TranslateEngine()
1335 void TranslateEngine::init_packages()
1337 int out_y1_int = (int)overlay->translate_out_y1;
1338 int out_y2_int = MIN((int)ceil(overlay->translate_out_y2), overlay->translate_output->get_h());
1339 int out_h = out_y2_int - out_y1_int;
1341 for(int i = 0; i < total_packages; i++)
1343 TranslatePackage *package = (TranslatePackage*)packages[i];
1344 package->out_row1 = (int)(out_y1_int + out_h /
1347 package->out_row2 = (int)((float)package->out_row1 +
1350 if(i >= total_packages - 1)
1351 package->out_row2 = out_y2_int;
1355 LoadClient* TranslateEngine::new_client()
1357 return new TranslateUnit(this, overlay);
1360 LoadPackage* TranslateEngine::new_package()
1362 return new TranslatePackage;
1372 #define SCALE_TRANSLATE(max, type, components) \
1374 int64_t opacity = (int)(alpha * max + 0.5); \
1375 int64_t transparency = max - opacity; \
1376 int out_w = out_x2 - out_x1; \
1378 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1380 int in_y = y_table[i - out_y1]; \
1381 type *in_row = (type*)in_rows[in_y] + in_x1 * components; \
1382 type *out_row = (type*)out_rows[i] + out_x1 * components; \
1384 /* X direction is scaled and requires a table lookup */ \
1385 if(out_w != in_x2 - in_x1) \
1387 for(int j = 0; j < out_w; j++) \
1389 int in_x = x_table[j]; \
1390 int input1, input2, input3, input4; \
1391 type *output = out_row + j * components; \
1393 input1 = in_row[in_x * components]; \
1394 input2 = in_row[in_x * components + 1]; \
1395 input3 = in_row[in_x * components + 2]; \
1396 if(components == 4) \
1397 input4 = in_row[in_x * components + 3]; \
1399 if(components == 3) \
1401 BLEND_3(max, type); \
1405 BLEND_4(max, type); \
1410 /* X direction is not scaled */ \
1412 for(int j = 0; j < out_w; j++) \
1414 int input1, input2, input3, input4; \
1415 type *output = out_row + j * components; \
1417 input1 = in_row[j * components]; \
1418 input2 = in_row[j * components + 1]; \
1419 input3 = in_row[j * components + 2]; \
1420 if(components == 4) \
1421 input4 = in_row[j * components + 3]; \
1423 if(components == 3) \
1425 BLEND_3(max, type); \
1429 BLEND_4(max, type); \
1438 ScaleTranslateUnit::ScaleTranslateUnit(ScaleTranslateEngine *server, OverlayFrame *overlay)
1439 : LoadClient(server)
1441 this->overlay = overlay;
1442 this->scale_translate = server;
1445 ScaleTranslateUnit::~ScaleTranslateUnit()
1449 void ScaleTranslateUnit::scale_array(int* &table,
1456 float scale = (float)(out_x2 - out_x1) / (in_x2 - in_x1);
1458 table = new int[out_x2 - out_x1];
1462 for(int i = 0; i < out_x2 - out_x1; i++)
1464 table[i] = (int)((float)i / scale + in_x1);
1469 for(int i = 0; i < out_x2 - out_x1; i++)
1471 table[i] = (int)((float)i / scale);
1477 void ScaleTranslateUnit::process_package(LoadPackage *package)
1479 ScaleTranslatePackage *pkg = (ScaleTranslatePackage*)package;
1481 // Args for NEAREST_NEIGHBOR_MACRO
1482 VFrame *output = scale_translate->output;
1483 VFrame *input = scale_translate->input;
1484 int in_x1 = scale_translate->in_x1;
1485 int in_y1 = scale_translate->in_y1;
1486 int in_x2 = scale_translate->in_x2;
1487 int in_y2 = scale_translate->in_y2;
1488 int out_x1 = scale_translate->out_x1;
1489 int out_y1 = scale_translate->out_y1;
1490 int out_x2 = scale_translate->out_x2;
1491 int out_y2 = scale_translate->out_y2;
1492 float alpha = scale_translate->alpha;
1493 int mode = scale_translate->mode;
1497 unsigned char **in_rows = input->get_rows();
1498 unsigned char **out_rows = output->get_rows();
1500 scale_array(x_table,
1506 scale_array(y_table,
1514 switch(input->get_color_model())
1518 SCALE_TRANSLATE(0xff, uint8_t, 3);
1523 SCALE_TRANSLATE(0xff, uint8_t, 4);
1529 SCALE_TRANSLATE(0xffff, uint16_t, 3);
1532 case BC_RGBA16161616:
1533 case BC_YUVA16161616:
1534 SCALE_TRANSLATE(0xffff, uint16_t, 4);
1551 ScaleTranslateEngine::ScaleTranslateEngine(OverlayFrame *overlay, int cpus)
1552 : LoadServer(cpus, cpus)
1554 this->overlay = overlay;
1557 ScaleTranslateEngine::~ScaleTranslateEngine()
1561 void ScaleTranslateEngine::init_packages()
1563 int out_h = out_y2 - out_y1;
1565 for(int i = 0; i < total_packages; i++)
1567 ScaleTranslatePackage *package = (ScaleTranslatePackage*)packages[i];
1568 package->out_row1 = (int)(out_y1 + out_h /
1571 package->out_row2 = (int)((float)package->out_row1 +
1574 if(i >= total_packages - 1)
1575 package->out_row2 = out_y2;
1579 LoadClient* ScaleTranslateEngine::new_client()
1581 return new ScaleTranslateUnit(this, overlay);
1584 LoadPackage* ScaleTranslateEngine::new_package()
1586 return new ScaleTranslatePackage;
1590 ScaleTranslatePackage::ScaleTranslatePackage()
1621 #define BLEND_ONLY(type, max, components) \
1623 int64_t opacity = (int)(alpha * max + 0.5); \
1624 int64_t transparency = max - opacity; \
1626 type** output_rows = (type**)output->get_rows(); \
1627 type** input_rows = (type**)input->get_rows(); \
1628 int w = input->get_w(); \
1629 int h = input->get_h(); \
1631 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1633 type* in_row = input_rows[i]; \
1634 type* output = output_rows[i]; \
1636 for(int j = 0; j < w; j++) \
1638 int input1, input2, input3, input4; \
1639 input1 = in_row[j * components]; \
1640 input2 = in_row[j * components + 1]; \
1641 input3 = in_row[j * components + 2]; \
1642 if(components == 4) input4 = in_row[j * components + 3]; \
1645 if(components == 3) \
1647 BLEND_3(max, type); \
1651 BLEND_4(max, type); \
1654 input += components; \
1655 output += components; \
1663 BlendUnit::BlendUnit(BlendEngine *server, OverlayFrame *overlay)
1664 : LoadClient(server)
1666 this->overlay = overlay;
1667 this->blend_engine = server;
1670 BlendUnit::~BlendUnit()
1674 void BlendUnit::process_package(LoadPackage *package)
1676 BlendPackage *pkg = (BlendPackage*)package;
1679 VFrame *output = blend_engine->output;
1680 VFrame *input = blend_engine->input;
1681 float alpha = blend_engine->alpha;
1682 int mode = blend_engine->mode;
1684 switch(input->get_color_model())
1688 BLEND_ONLY(unsigned char, 0xff, 3);
1692 BLEND_ONLY(unsigned char, 0xff, 4);
1696 BLEND_ONLY(uint16_t, 0xffff, 3);
1698 case BC_RGBA16161616:
1699 case BC_YUVA16161616:
1700 BLEND_ONLY(uint16_t, 0xffff, 4);
1707 BlendEngine::BlendEngine(OverlayFrame *overlay, int cpus)
1708 : LoadServer(cpus, cpus)
1710 this->overlay = overlay;
1713 BlendEngine::~BlendEngine()
1717 void BlendEngine::init_packages()
1719 for(int i = 0; i < total_packages; i++)
1721 BlendPackage *package = (BlendPackage*)packages[i];
1722 package->out_row1 = (int)(input->get_h() /
1725 package->out_row2 = (int)((float)package->out_row1 +
1729 if(i >= total_packages - 1)
1730 package->out_row2 = input->get_h();
1734 LoadClient* BlendEngine::new_client()
1736 return new BlendUnit(this, overlay);
1739 LoadPackage* BlendEngine::new_package()
1741 return new BlendPackage;
1745 BlendPackage::BlendPackage()