11 #include "overlayframe.h"
15 // Easy abstraction of the float and int types. Most of these are never used
16 // but GCC expects them.
17 static int my_abs(int32_t x)
22 static int my_abs(uint32_t x)
27 static int my_abs(int64_t x)
32 static int my_abs(uint64_t x)
37 static float my_abs(float x)
45 OverlayFrame::OverlayFrame(int cpus)
50 scaletranslate_engine = 0;
55 OverlayFrame::~OverlayFrame()
57 if(temp_frame) delete temp_frame;
58 if(scale_engine) delete scale_engine;
59 if(translate_engine) delete translate_engine;
60 if(blend_engine) delete blend_engine;
61 if(scaletranslate_engine) delete scaletranslate_engine;
73 // (255 * 255 + 0 * 0) / 255 = 255
74 // (255 * 127 + 255 * (255 - 127)) / 255 = 255
76 // (65535 * 65535 + 0 * 0) / 65535 = 65535
77 // (65535 * 32767 + 65535 * (65535 - 32767)) / 65535 = 65535
82 #define BLEND_3(max, temp_type, type, chroma_offset) \
86 /* if(mode != TRANSFER_NORMAL) printf("BLEND mode = %d\n", mode); */ \
89 case TRANSFER_DIVIDE: \
90 r = input1 ? (((temp_type)output[0] * max) / input1) : max; \
93 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
94 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
98 g = input2 ? (temp_type)output[1] * max / (temp_type)input2 : max; \
99 b = input3 ? (temp_type)output[2] * max / (temp_type)input3 : max; \
101 r = (r * opacity + (temp_type)output[0] * transparency) / max; \
102 g = (g * opacity + (temp_type)output[1] * transparency) / max; \
103 b = (b * opacity + (temp_type)output[2] * transparency) / max; \
105 case TRANSFER_MULTIPLY: \
106 r = ((temp_type)input1 * output[0]) / max; \
109 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
110 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
114 g = (temp_type)input2 * (temp_type)output[1] / max; \
115 b = (temp_type)input3 * (temp_type)output[2] / max; \
117 r = (r * opacity + (temp_type)output[0] * transparency) / max; \
118 g = (g * opacity + (temp_type)output[1] * transparency) / max; \
119 b = (b * opacity + (temp_type)output[2] * transparency) / max; \
121 case TRANSFER_SUBTRACT: \
122 r = (temp_type)output[0] - (temp_type)input1; \
123 g = ((temp_type)output[1] - (temp_type)chroma_offset) - \
124 ((temp_type)input2 - (temp_type)chroma_offset) + \
125 (temp_type)chroma_offset; \
126 b = ((temp_type)output[2] - (temp_type)chroma_offset) - \
127 ((temp_type)input3 - (temp_type)chroma_offset) + \
128 (temp_type)chroma_offset; \
132 r = (r * opacity + output[0] * transparency) / max; \
133 g = (g * opacity + output[1] * transparency) / max; \
134 b = (b * opacity + output[2] * transparency) / max; \
136 case TRANSFER_ADDITION: \
137 r = (temp_type)input1 + output[0]; \
138 g = ((temp_type)input2 - chroma_offset) + \
139 ((temp_type)output[1] - chroma_offset) + \
140 (temp_type)chroma_offset; \
141 b = ((temp_type)input3 - chroma_offset) + \
142 ((temp_type)output[2] - chroma_offset) + \
143 (temp_type)chroma_offset; \
144 r = (r * opacity + output[0] * transparency) / max; \
145 g = (g * opacity + output[1] * transparency) / max; \
146 b = (b * opacity + output[2] * transparency) / max; \
150 r = (temp_type)MAX(input1, output[0]); \
151 temp_type g1 = ((temp_type)input2 - chroma_offset); \
152 if(g1 < 0) g1 = -g1; \
153 temp_type g2 = ((temp_type)output[1] - chroma_offset); \
154 if(g2 < 0) g2 = -g2; \
159 temp_type b1 = ((temp_type)input3 - chroma_offset); \
160 if(b1 < 0) b1 = -b1; \
161 temp_type b2 = ((temp_type)output[2] - chroma_offset); \
162 if(b2 < 0) b2 = -b2; \
167 r = (r * opacity + output[0] * transparency) / max; \
168 g = (g * opacity + output[1] * transparency) / max; \
169 b = (b * opacity + output[2] * transparency) / max; \
172 case TRANSFER_REPLACE: \
177 case TRANSFER_NORMAL: \
178 r = ((temp_type)input1 * opacity + output[0] * transparency) / max; \
179 g = ((temp_type)input2 * opacity + output[1] * transparency) / max; \
180 b = ((temp_type)input3 * opacity + output[2] * transparency) / max; \
184 if(sizeof(type) != 4) \
186 output[0] = (type)CLIP(r, 0, max); \
187 output[1] = (type)CLIP(g, 0, max); \
188 output[2] = (type)CLIP(b, 0, max); \
202 // Blending equations are drastically different for 3 and 4 components
203 #define BLEND_4(max, temp_type, type, chroma_offset) \
205 temp_type r, g, b, a; \
206 temp_type pixel_opacity, pixel_transparency; \
207 temp_type output1 = output[0]; \
208 temp_type output2 = output[1]; \
209 temp_type output3 = output[2]; \
210 temp_type output4 = output[3]; \
212 pixel_opacity = opacity * input4; \
213 pixel_transparency = (temp_type)max * max - pixel_opacity; \
217 case TRANSFER_DIVIDE: \
218 r = input1 ? (((temp_type)output1 * max) / input1) : max; \
221 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
222 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
226 g = input2 ? (temp_type)output2 * max / (temp_type)input2 : max; \
227 b = input3 ? (temp_type)output3 * max / (temp_type)input3 : max; \
229 r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
230 g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
231 b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
232 a = input4 > output4 ? input4 : output4; \
234 case TRANSFER_MULTIPLY: \
235 r = ((temp_type)input1 * output1) / max; \
238 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
239 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
243 g = (temp_type)input2 * (temp_type)output2 / max; \
244 b = (temp_type)input3 * (temp_type)output3 / max; \
246 r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
247 g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
248 b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
249 a = input4 > output4 ? input4 : output4; \
251 case TRANSFER_SUBTRACT: \
252 r = (temp_type)output1 - input1; \
253 g = ((temp_type)output2 - chroma_offset) - \
254 ((temp_type)input2 - (temp_type)chroma_offset) + \
255 (temp_type)chroma_offset; \
256 b = ((temp_type)output3 - chroma_offset) - \
257 ((temp_type)input3 - (temp_type)chroma_offset) + \
258 (temp_type)chroma_offset; \
262 r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
263 g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
264 b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
265 a = input4 > output4 ? input4 : output4; \
267 case TRANSFER_ADDITION: \
268 r = (temp_type)input1 + output1; \
269 g = ((temp_type)input2 - chroma_offset) + \
270 ((temp_type)output2 - chroma_offset) + \
272 b = ((temp_type)input3 - chroma_offset) + \
273 ((temp_type)output3 - chroma_offset) + \
275 r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
276 g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
277 b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
278 a = input4 > output4 ? input4 : output4; \
282 r = (temp_type)MAX(input1, output1); \
283 temp_type g1 = ((temp_type)input2 - chroma_offset); \
284 if(g1 < 0) g1 = -g1; \
285 temp_type g2 = ((temp_type)output2 - chroma_offset); \
286 if(g2 < 0) g2 = -g2; \
291 temp_type b1 = ((temp_type)input3 - chroma_offset); \
292 if(b1 < 0) b1 = -b1; \
293 temp_type b2 = ((temp_type)output3 - chroma_offset); \
294 if(b2 < 0) b2 = -b2; \
299 r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
300 g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
301 b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
302 a = input4 > output4 ? input4 : output4; \
305 case TRANSFER_REPLACE: \
311 case TRANSFER_NORMAL: \
312 r = (input1 * pixel_opacity + \
313 output1 * pixel_transparency) / max / max; \
314 g = ((input2 - chroma_offset) * pixel_opacity + \
315 (output2 - chroma_offset) * pixel_transparency) \
318 b = ((input3 - chroma_offset) * pixel_opacity + \
319 (output3 - chroma_offset) * pixel_transparency) \
322 a = input4 > output4 ? input4 : output4; \
326 if(sizeof(type) != 4) \
328 output[0] = (type)CLIP(r, 0, max); \
329 output[1] = (type)CLIP(g, 0, max); \
330 output[2] = (type)CLIP(b, 0, max); \
331 output[3] = (type)a; \
344 // Bicubic algorithm using multiprocessors
345 // input -> scale nearest integer boundaries -> temp -> translation -> blend -> output
347 // Nearest neighbor algorithm using multiprocessors for blending
348 // input -> scale + translate -> blend -> output
351 int OverlayFrame::overlay(VFrame *output,
361 float alpha, // 0 - 1
363 int interpolation_type)
365 float w_scale = (out_x2 - out_x1) / (in_x2 - in_x1);
366 float h_scale = (out_y2 - out_y1) / (in_y2 - in_y1);
382 isnan(out_y2)) return 1;
383 // printf("OverlayFrame::overlay 1 %f %f %f %f -> %f %f %f %f scale=%f %f\n", in_x1,
397 out_x1 += -in_x1 * w_scale;
401 if(in_x1 >= input->get_w())
403 out_x1 -= (in_x1 - input->get_w()) * w_scale;
404 in_x1 = input->get_w();
409 out_y1 += -in_y1 * h_scale;
413 if(in_y1 >= input->get_h())
415 out_y1 -= (in_y1 - input->get_h()) * h_scale;
416 in_y1 = input->get_h();
421 out_x2 += -in_x2 * w_scale;
425 if(in_x2 >= input->get_w())
427 out_x2 -= (in_x2 - input->get_w()) * w_scale;
428 in_x2 = input->get_w();
433 out_y2 += -in_y2 * h_scale;
437 if(in_y2 >= input->get_h())
439 out_y2 -= (in_y2 - input->get_h()) * h_scale;
440 in_y2 = input->get_h();
445 in_x1 += -out_x1 / w_scale;
449 if(out_x1 >= output->get_w())
451 in_x1 -= (out_x1 - output->get_w()) / w_scale;
452 out_x1 = output->get_w();
457 in_y1 += -out_y1 / h_scale;
461 if(out_y1 >= output->get_h())
463 in_y1 -= (out_y1 - output->get_h()) / h_scale;
464 out_y1 = output->get_h();
469 in_x2 += -out_x2 / w_scale;
473 if(out_x2 >= output->get_w())
475 in_x2 -= (out_x2 - output->get_w()) / w_scale;
476 out_x2 = output->get_w();
481 in_y2 += -out_y2 / h_scale;
485 if(out_y2 >= output->get_h())
487 in_y2 -= (out_y2 - output->get_h()) / h_scale;
488 out_y2 = output->get_h();
500 float in_w = in_x2 - in_x1;
501 float in_h = in_y2 - in_y1;
502 float out_w = out_x2 - out_x1;
503 float out_h = out_y2 - out_y1;
504 // Input for translation operation
505 VFrame *translation_input = input;
508 if(in_w <= 0 || in_h <= 0 || out_w <= 0 || out_h <= 0) return 0;
511 // printf("OverlayFrame::overlay 2 %f %f %f %f -> %f %f %f %f\n", in_x1,
524 // ****************************************************************************
525 // Transfer to temp buffer by scaling nearest integer boundaries
526 // ****************************************************************************
527 if(interpolation_type != NEAREST_NEIGHBOR &&
528 (!EQUIV(w_scale, 1) || !EQUIV(h_scale, 1)))
530 // Create integer boundaries for interpolation
531 float in_x1_float = in_x1;
532 float in_y1_float = in_y1;
533 float in_x2_float = MIN(in_x2, input->get_w());
534 float in_y2_float = MIN(in_y2, input->get_h());
535 int out_x1_int = (int)out_x1;
536 int out_y1_int = (int)out_y1;
537 int out_x2_int = MIN((int)ceil(out_x2), output->get_w());
538 int out_y2_int = MIN((int)ceil(out_y2), output->get_h());
540 // Dimensions of temp frame. Integer boundaries scaled.
541 int temp_w = (out_x2_int - out_x1_int);
542 int temp_h = (out_y2_int - out_y1_int);
543 VFrame *scale_output;
547 #define NO_TRANSLATION1 \
548 (EQUIV(in_x1, 0) && \
550 EQUIV(out_x1, 0) && \
551 EQUIV(out_y1, 0) && \
552 EQUIV(in_x2, in_x2_float) && \
553 EQUIV(in_y2, in_y2_float) && \
554 EQUIV(out_x2, temp_w) && \
555 EQUIV(out_y2, temp_h))
559 (EQUIV(alpha, 1) && \
560 (mode == TRANSFER_REPLACE || \
561 (mode == TRANSFER_NORMAL && cmodel_components(input->get_color_model()) == 3)))
567 // Prepare destination for operation
569 // No translation and no blending. The blending operation is built into the
570 // translation unit but not the scaling unit.
572 if(NO_TRANSLATION1 &&
575 // printf("OverlayFrame::overlay input -> output\n");
577 scale_output = output;
578 translation_input = 0;
581 // If translation or blending
582 // input -> nearest integer boundary temp
585 (temp_frame->get_w() != temp_w ||
586 temp_frame->get_h() != temp_h))
594 temp_frame = new VFrame(0,
597 input->get_color_model(),
600 //printf("OverlayFrame::overlay input -> temp\n");
603 temp_frame->clear_frame();
605 // printf("OverlayFrame::overlay 4 temp_w=%d temp_h=%d\n",
607 scale_output = temp_frame;
608 translation_input = scale_output;
610 // Adjust input coordinates to reflect new scaled coordinates.
619 //printf("Overlay 1\n");
621 // Scale input -> scale_output
622 if(!scale_engine) scale_engine = new ScaleEngine(this, cpus);
623 scale_engine->scale_output = scale_output;
624 scale_engine->scale_input = input;
625 scale_engine->w_scale = w_scale;
626 scale_engine->h_scale = h_scale;
627 scale_engine->in_x1_float = in_x1_float;
628 scale_engine->in_y1_float = in_y1_float;
629 scale_engine->out_w_int = temp_w;
630 scale_engine->out_h_int = temp_h;
631 scale_engine->interpolation_type = interpolation_type;
632 //printf("Overlay 2\n");
634 //printf("OverlayFrame::overlay ScaleEngine 1 %d\n", out_h_int);
635 scale_engine->process_packages();
636 //printf("OverlayFrame::overlay ScaleEngine 2\n");
642 // printf("OverlayFrame::overlay 1 %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
656 #define NO_TRANSLATION2 \
657 (EQUIV(in_x1, 0) && \
659 EQUIV(in_x2, translation_input->get_w()) && \
660 EQUIV(in_y2, translation_input->get_h()) && \
661 EQUIV(out_x1, 0) && \
662 EQUIV(out_y1, 0) && \
663 EQUIV(out_x2, output->get_w()) && \
664 EQUIV(out_y2, output->get_h())) \
667 (EQUIV(out_x2 - out_x1, in_x2 - in_x1) && \
668 EQUIV(out_y2 - out_y1, in_y2 - in_y1))
673 //printf("OverlayFrame::overlay 4 %d\n", mode);
678 if(translation_input)
681 if( NO_TRANSLATION2 &&
685 //printf("OverlayFrame::overlay direct copy\n");
686 output->copy_from(translation_input);
690 if( NO_TRANSLATION2 &&
693 if(!blend_engine) blend_engine = new BlendEngine(this, cpus);
696 blend_engine->output = output;
697 blend_engine->input = translation_input;
698 blend_engine->alpha = alpha;
699 blend_engine->mode = mode;
701 blend_engine->process_packages();
704 // Scale and translate using nearest neighbor
705 // Translation is exactly on integer boundaries
706 if(interpolation_type == NEAREST_NEIGHBOR ||
707 EQUIV(in_x1, (int)in_x1) &&
708 EQUIV(in_y1, (int)in_y1) &&
709 EQUIV(in_x2, (int)in_x2) &&
710 EQUIV(in_y2, (int)in_y2) &&
712 EQUIV(out_x1, (int)out_x1) &&
713 EQUIV(out_y1, (int)out_y1) &&
714 EQUIV(out_x2, (int)out_x2) &&
715 EQUIV(out_y2, (int)out_y2))
717 //printf("OverlayFrame::overlay NEAREST_NEIGHBOR 1\n");
718 if(!scaletranslate_engine) scaletranslate_engine =
719 new ScaleTranslateEngine(this, cpus);
722 scaletranslate_engine->output = output;
723 scaletranslate_engine->input = translation_input;
724 // Input for Scaletranslate is subpixel precise!
725 scaletranslate_engine->in_x1 = in_x1;
726 scaletranslate_engine->in_y1 = in_y1;
727 scaletranslate_engine->in_x2 = in_x2;
728 scaletranslate_engine->in_y2 = in_y2;
729 scaletranslate_engine->out_x1 = (int)out_x1;
730 scaletranslate_engine->out_y1 = (int)out_y1;
731 scaletranslate_engine->out_x2 = (int)out_x1 + (int)(out_x2 - out_x1);
732 scaletranslate_engine->out_y2 = (int)out_y1 + (int)(out_y2 - out_y1);
733 scaletranslate_engine->alpha = alpha;
734 scaletranslate_engine->mode = mode;
736 scaletranslate_engine->process_packages();
739 // Fractional translation
741 // Use fractional translation
742 // printf("OverlayFrame::overlay temp -> output %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
752 //printf("Overlay 3\n");
753 if(!translate_engine) translate_engine = new TranslateEngine(this, cpus);
754 translate_engine->translate_output = output;
755 translate_engine->translate_input = translation_input;
756 translate_engine->translate_in_x1 = in_x1;
757 translate_engine->translate_in_y1 = in_y1;
758 translate_engine->translate_in_x2 = in_x2;
759 translate_engine->translate_in_y2 = in_y2;
760 translate_engine->translate_out_x1 = out_x1;
761 translate_engine->translate_out_y1 = out_y1;
762 translate_engine->translate_out_x2 = out_x2;
763 translate_engine->translate_out_y2 = out_y2;
764 translate_engine->translate_alpha = alpha;
765 translate_engine->translate_mode = mode;
766 //printf("Overlay 4\n");
768 //printf("OverlayFrame::overlay 5 %d\n", mode);
769 translate_engine->process_packages();
773 //printf("OverlayFrame::overlay 2\n");
784 ScalePackage::ScalePackage()
791 ScaleUnit::ScaleUnit(ScaleEngine *server, OverlayFrame *overlay)
794 this->overlay = overlay;
795 this->engine = server;
798 ScaleUnit::~ScaleUnit()
804 void ScaleUnit::tabulate_reduction(bilinear_table_t* &table,
810 table = new bilinear_table_t[out_total];
811 bzero(table, sizeof(bilinear_table_t) * out_total);
812 //printf("ScaleUnit::tabulate_reduction 1 %f %d %d %d\n", scale, in_pixel1, out_total, in_total);
813 for(int i = 0; i < out_total; i++)
816 float in_start = out_start * scale;
817 float out_end = i + 1;
818 float in_end = out_end * scale;
819 bilinear_table_t *entry = table + i;
820 //printf("ScaleUnit::tabulate_reduction 1 %f %f %f %f\n", out_start, out_end, in_start, in_end);
822 // Store input fraction. Using scale to normalize these didn't work.
823 entry->input_fraction1 = (floor(in_start + 1) - in_start) /* / scale */;
824 entry->input_fraction2 = 1.0 /* / scale */;
825 entry->input_fraction3 = (in_end - floor(in_end)) /* / scale */;
827 if(in_end >= in_total - in_pixel1)
829 in_end = in_total - in_pixel1 - 1;
831 int difference = (int)in_end - (int)in_start - 1;
832 if(difference < 0) difference = 0;
833 entry->input_fraction3 = 1.0 -
834 entry->input_fraction1 -
835 entry->input_fraction2 * difference;
838 // Store input pixels
839 entry->input_pixel1 = (int)in_start;
840 entry->input_pixel2 = (int)in_end;
842 // Normalize for middle pixels
843 if(entry->input_pixel2 > entry->input_pixel1 + 1)
845 float total = entry->input_fraction1 +
846 entry->input_fraction2 *
847 (entry->input_pixel2 - entry->input_pixel1 - 1) +
848 entry->input_fraction3;
849 entry->input_fraction1 /= total;
850 entry->input_fraction2 /= total;
851 entry->input_fraction3 /= total;
855 float total = entry->input_fraction1 +
856 entry->input_fraction3;
857 entry->input_fraction1 /= total;
858 entry->input_fraction3 /= total;
861 // printf("ScaleUnit::tabulate_reduction 1 %d %d %d %f %f %f %f\n",
863 // entry->input_pixel1,
864 // entry->input_pixel2,
865 // entry->input_fraction1,
866 // entry->input_fraction2,
867 // entry->input_fraction3,
868 // entry->input_fraction1 +
869 // entry->input_fraction2 *
870 // (entry->input_pixel2 - entry->input_pixel1 - 1) +
871 // entry->input_fraction3);
875 if(entry->input_pixel1 > entry->input_pixel2)
877 entry->input_pixel1 = entry->input_pixel2;
878 entry->input_fraction1 = 0;
881 // Get total fraction of output pixel used
882 // if(entry->input_pixel2 > entry->input_pixel1)
883 entry->total_fraction =
884 entry->input_fraction1 +
885 entry->input_fraction2 * (entry->input_pixel2 - entry->input_pixel1 - 1) +
886 entry->input_fraction3;
887 entry->input_pixel1 += in_pixel1;
888 entry->input_pixel2 += in_pixel1;
892 void ScaleUnit::tabulate_enlarge(bilinear_table_t* &table,
898 table = new bilinear_table_t[out_total];
899 bzero(table, sizeof(bilinear_table_t) * out_total);
901 for(int i = 0; i < out_total; i++)
903 bilinear_table_t *entry = table + i;
904 float in_pixel = i * scale + in_pixel1;
905 entry->input_pixel1 = (int)floor(in_pixel);
906 entry->input_pixel2 = entry->input_pixel1 + 1;
908 if(in_pixel - in_pixel1 <= in_total)
910 entry->input_fraction3 = in_pixel - entry->input_pixel1;
914 entry->input_fraction3 = 0;
915 entry->input_pixel2 = 0;
918 if(in_pixel - in_pixel1 >= 0)
920 entry->input_fraction1 = entry->input_pixel2 - in_pixel;
924 entry->input_fraction1 = 0;
925 entry->input_pixel1 = (int)in_pixel1;
928 if(entry->input_pixel2 >= in_total)
930 entry->input_pixel2 = entry->input_pixel1;
931 entry->input_fraction3 = 1.0 - entry->input_fraction1;
934 entry->total_fraction =
935 entry->input_fraction1 +
936 entry->input_fraction3;
938 // printf("ScaleUnit::tabulate_enlarge %d %d %f %f %f\n",
939 // entry->input_pixel1,
940 // entry->input_pixel2,
941 // entry->input_fraction1,
942 // entry->input_fraction2,
943 // entry->input_fraction3);
947 void ScaleUnit::dump_bilinear(bilinear_table_t *table, int total)
949 printf("ScaleUnit::dump_bilinear\n");
950 for(int i = 0; i < total; i++)
952 printf("out=%d inpixel1=%d inpixel2=%d infrac1=%f infrac2=%f infrac3=%f total=%f\n",
954 table[i].input_pixel1,
955 table[i].input_pixel2,
956 table[i].input_fraction1,
957 table[i].input_fraction2,
958 table[i].input_fraction3,
959 table[i].total_fraction);
963 #define PIXEL_REDUCE_MACRO(type, components, row) \
965 type *input_row = &in_rows[row][x_entry->input_pixel1 * components]; \
966 type *input_end = &in_rows[row][x_entry->input_pixel2 * components]; \
968 /* Do first pixel */ \
969 temp_f1 += input_scale1 * input_row[0]; \
970 temp_f2 += input_scale1 * input_row[1]; \
971 temp_f3 += input_scale1 * input_row[2]; \
972 if(components == 4) temp_f4 += input_scale1 * input_row[3]; \
974 /* Do last pixel */ \
975 /* if(input_row < input_end) */\
977 temp_f1 += input_scale3 * input_end[0]; \
978 temp_f2 += input_scale3 * input_end[1]; \
979 temp_f3 += input_scale3 * input_end[2]; \
980 if(components == 4) temp_f4 += input_scale3 * input_end[3]; \
983 /* Do middle pixels */ \
984 for(input_row += components; input_row < input_end; input_row += components) \
986 temp_f1 += input_scale2 * input_row[0]; \
987 temp_f2 += input_scale2 * input_row[1]; \
988 temp_f3 += input_scale2 * input_row[2]; \
989 if(components == 4) temp_f4 += input_scale2 * input_row[3]; \
993 // Bilinear reduction and suboptimal enlargement.
994 // Very high quality.
995 #define BILINEAR_REDUCE(max, type, components) \
997 bilinear_table_t *x_table, *y_table; \
998 int out_h = pkg->out_row2 - pkg->out_row1; \
999 type **in_rows = (type**)input->get_rows(); \
1000 type **out_rows = (type**)output->get_rows(); \
1003 tabulate_reduction(x_table, \
1009 tabulate_enlarge(x_table, \
1016 tabulate_reduction(y_table, \
1022 tabulate_enlarge(y_table, \
1027 /* dump_bilinear(y_table, out_h_int); */\
1029 for(int i = 0; i < out_h; i++) \
1031 type *out_row = out_rows[i + pkg->out_row1]; \
1032 bilinear_table_t *y_entry = &y_table[i + pkg->out_row1]; \
1033 /* printf("BILINEAR_REDUCE 2 %d %d %d %f %f %f\n", */ \
1035 /* y_entry->input_pixel1, */ \
1036 /* y_entry->input_pixel2, */ \
1037 /* y_entry->input_fraction1, */ \
1038 /* y_entry->input_fraction2, */ \
1039 /* y_entry->input_fraction3); */ \
1041 for(int j = 0; j < out_w_int; j++) \
1043 bilinear_table_t *x_entry = &x_table[j]; \
1044 /* Load rounding factors */ \
1049 if(sizeof(type) != 4) \
1050 temp_f1 = temp_f2 = temp_f3 = temp_f4 = .5; \
1052 temp_f1 = temp_f2 = temp_f3 = temp_f4 = 0; \
1055 float input_scale1 = y_entry->input_fraction1 * x_entry->input_fraction1; \
1056 float input_scale2 = y_entry->input_fraction1 * x_entry->input_fraction2; \
1057 float input_scale3 = y_entry->input_fraction1 * x_entry->input_fraction3; \
1058 PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel1) \
1063 input_scale1 = y_entry->input_fraction3 * x_entry->input_fraction1; \
1064 input_scale2 = y_entry->input_fraction3 * x_entry->input_fraction2; \
1065 input_scale3 = y_entry->input_fraction3 * x_entry->input_fraction3; \
1066 PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel2) \
1071 input_scale1 = y_entry->input_fraction2 * x_entry->input_fraction1; \
1072 input_scale2 = y_entry->input_fraction2 * x_entry->input_fraction2; \
1073 input_scale3 = y_entry->input_fraction2 * x_entry->input_fraction3; \
1074 for(int k = y_entry->input_pixel1 + 1; \
1075 k < y_entry->input_pixel2; \
1078 PIXEL_REDUCE_MACRO(type, components, k) \
1086 if(temp_f1 > max) temp_f1 = max; \
1087 if(temp_f2 > max) temp_f2 = max; \
1088 if(temp_f3 > max) temp_f3 = max; \
1089 if(components == 4) if(temp_f4 > max) temp_f4 = max; \
1092 out_row[j * components ] = (type)temp_f1; \
1093 out_row[j * components + 1] = (type)temp_f2; \
1094 out_row[j * components + 2] = (type)temp_f3; \
1095 if(components == 4) out_row[j * components + 3] = (type)temp_f4; \
1097 /*printf("BILINEAR_REDUCE 3 %d\n", i);*/ \
1100 delete [] x_table; \
1101 delete [] y_table; \
1106 // Only 2 input pixels
1107 #define BILINEAR_ENLARGE(max, type, components) \
1109 /*printf("BILINEAR_ENLARGE 1\n");*/ \
1110 float k_y = 1.0 / scale_h; \
1111 float k_x = 1.0 / scale_w; \
1112 type **in_rows = (type**)input->get_rows(); \
1113 type **out_rows = (type**)output->get_rows(); \
1114 int out_h = pkg->out_row2 - pkg->out_row1; \
1115 int in_h_int = input->get_h(); \
1116 int in_w_int = input->get_w(); \
1117 int *table_int_x1, *table_int_y1; \
1118 int *table_int_x2, *table_int_y2; \
1119 float *table_frac_x_f, *table_antifrac_x_f, *table_frac_y_f, *table_antifrac_y_f; \
1120 int *table_frac_x_i, *table_antifrac_x_i, *table_frac_y_i, *table_antifrac_y_i; \
1122 tabulate_blinear_f(table_int_x1, \
1125 table_antifrac_x_f, \
1131 tabulate_blinear_f(table_int_y1, \
1134 table_antifrac_y_f, \
1141 for(int i = 0; i < out_h; i++) \
1143 int i_y1 = table_int_y1[i]; \
1144 int i_y2 = table_int_y2[i]; \
1148 uint64_t anti_a_i; \
1149 a_f = table_frac_y_f[i]; \
1150 anti_a_f = table_antifrac_y_f[i]; \
1151 type *in_row1 = in_rows[i_y1]; \
1152 type *in_row2 = in_rows[i_y2]; \
1153 type *out_row = out_rows[i + pkg->out_row1]; \
1155 for(int j = 0; j < out_w_int; j++) \
1157 int i_x1 = table_int_x1[j]; \
1158 int i_x2 = table_int_x2[j]; \
1159 float output1r, output1g, output1b, output1a; \
1160 float output2r, output2g, output2b, output2a; \
1161 float output3r, output3g, output3b, output3a; \
1162 float output4r, output4g, output4b, output4a; \
1165 b_f = table_frac_x_f[j]; \
1166 anti_b_f = table_antifrac_x_f[j]; \
1168 output1r = in_row1[i_x1 * components]; \
1169 output1g = in_row1[i_x1 * components + 1]; \
1170 output1b = in_row1[i_x1 * components + 2]; \
1171 if(components == 4) output1a = in_row1[i_x1 * components + 3]; \
1173 output2r = in_row1[i_x2 * components]; \
1174 output2g = in_row1[i_x2 * components + 1]; \
1175 output2b = in_row1[i_x2 * components + 2]; \
1176 if(components == 4) output2a = in_row1[i_x2 * components + 3]; \
1178 output3r = in_row2[i_x1 * components]; \
1179 output3g = in_row2[i_x1 * components + 1]; \
1180 output3b = in_row2[i_x1 * components + 2]; \
1181 if(components == 4) output3a = in_row2[i_x1 * components + 3]; \
1183 output4r = in_row2[i_x2 * components]; \
1184 output4g = in_row2[i_x2 * components + 1]; \
1185 output4b = in_row2[i_x2 * components + 2]; \
1186 if(components == 4) output4a = in_row2[i_x2 * components + 3]; \
1188 out_row[j * components] = \
1189 (type)(anti_a_f * (anti_b_f * output1r + \
1191 a_f * (anti_b_f * output3r + \
1193 out_row[j * components + 1] = \
1194 (type)(anti_a_f * (anti_b_f * output1g + \
1196 a_f * ((anti_b_f * output3g) + \
1198 out_row[j * components + 2] = \
1199 (type)(anti_a_f * ((anti_b_f * output1b) + \
1200 (b_f * output2b)) + \
1201 a_f * ((anti_b_f * output3b) + \
1203 if(components == 4) \
1204 out_row[j * components + 3] = \
1205 (type)(anti_a_f * ((anti_b_f * output1a) + \
1206 (b_f * output2a)) + \
1207 a_f * ((anti_b_f * output3a) + \
1213 delete [] table_int_x1; \
1214 delete [] table_int_x2; \
1215 delete [] table_int_y1; \
1216 delete [] table_int_y2; \
1217 delete [] table_frac_x_f; \
1218 delete [] table_antifrac_x_f; \
1219 delete [] table_frac_y_f; \
1220 delete [] table_antifrac_y_f; \
1222 /*printf("BILINEAR_ENLARGE 2\n");*/ \
1226 #define BICUBIC(max, type, components) \
1228 float k_y = 1.0 / scale_h; \
1229 float k_x = 1.0 / scale_w; \
1230 type **in_rows = (type**)input->get_rows(); \
1231 type **out_rows = (type**)output->get_rows(); \
1232 float *bspline_x_f, *bspline_y_f; \
1233 int *bspline_x_i, *bspline_y_i; \
1234 int *in_x_table, *in_y_table; \
1235 int in_h_int = input->get_h(); \
1236 int in_w_int = input->get_w(); \
1238 tabulate_bcubic_f(bspline_x_f, \
1246 tabulate_bcubic_f(bspline_y_f, \
1254 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1256 for(int j = 0; j < out_w_int; j++) \
1258 int i_x = (int)(k_x * j); \
1259 float output1_f, output2_f, output3_f, output4_f; \
1260 uint64_t output1_i, output2_i, output3_i, output4_i; \
1264 if(components == 4) \
1266 int table_y = i * 4; \
1269 for(int m = -1; m < 3; m++) \
1273 r1_f = bspline_y_f[table_y]; \
1274 int y = in_y_table[table_y]; \
1275 int table_x = j * 4; \
1277 for(int n = -1; n < 3; n++) \
1281 r2_f = bspline_x_f[table_x]; \
1282 int x = in_x_table[table_x]; \
1284 uint64_t r_square_i; \
1285 r_square_f = r1_f * r2_f; \
1286 output1_f += r_square_f * in_rows[y][x * components]; \
1287 output2_f += r_square_f * in_rows[y][x * components + 1]; \
1288 output3_f += r_square_f * in_rows[y][x * components + 2]; \
1289 if(components == 4) \
1290 output4_f += r_square_f * in_rows[y][x * components + 3]; \
1298 out_rows[i][j * components] = (type)output1_f; \
1299 out_rows[i][j * components + 1] = (type)output2_f; \
1300 out_rows[i][j * components + 2] = (type)output3_f; \
1301 if(components == 4) \
1302 out_rows[i][j * components + 3] = (type)output4_f; \
1307 delete [] bspline_x_f; \
1308 delete [] bspline_y_f; \
1309 delete [] in_x_table; \
1310 delete [] in_y_table; \
1316 // Pow function is not thread safe in Compaqt C
1317 #define CUBE(x) ((x) * (x) * (x))
1319 float ScaleUnit::cubic_bspline(float x)
1323 if((x + 2.0F) <= 0.0F)
1333 if((x + 1.0F) <= 0.0F)
1351 if((x - 1.0F) <= 0.0F)
1361 return (a - (4.0F * b) + (6.0F * c) - (4.0F * d)) / 6.0;
1365 void ScaleUnit::tabulate_bcubic_f(float* &coef_table,
1373 coef_table = new float[pixels * 4];
1374 coord_table = new int[pixels * 4];
1375 for(int i = 0, j = 0; i < pixels; i++)
1377 float f_x = (float)i * scale + start;
1378 float a = f_x - floor(f_x);
1380 for(float m = -1; m < 3; m++)
1382 coef_table[j] = cubic_bspline(coefficient * (m - a));
1383 coord_table[j] = (int)(f_x + m);
1384 CLAMP(coord_table[j], 0, total_pixels - 1);
1391 void ScaleUnit::tabulate_bcubic_i(int* &coef_table,
1399 coef_table = new int[pixels * 4];
1400 coord_table = new int[pixels * 4];
1401 for(int i = 0, j = 0; i < pixels; i++)
1403 float f_x = (float)i * scale + start;
1404 float a = f_x - floor(f_x);
1406 for(float m = -1; m < 3; m++)
1408 coef_table[j] = (int)(cubic_bspline(coefficient * (m - a)) * 0x10000);
1409 coord_table[j] = (int)(f_x + m);
1410 CLAMP(coord_table[j], 0, total_pixels - 1);
1417 void ScaleUnit::tabulate_blinear_f(int* &table_int1,
1420 float* &table_antifrac,
1427 table_int1 = new int[pixel2 - pixel1];
1428 table_int2 = new int[pixel2 - pixel1];
1429 table_frac = new float[pixel2 - pixel1];
1430 table_antifrac = new float[pixel2 - pixel1];
1432 for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1434 float f_x = (float)i * scale + start;
1435 int i_x = (int)floor(f_x);
1436 float a = (f_x - floor(f_x));
1438 table_int1[j] = i_x;
1439 table_int2[j] = i_x + 1;
1440 CLAMP(table_int1[j], 0, total_pixels - 1);
1441 CLAMP(table_int2[j], 0, total_pixels - 1);
1443 table_antifrac[j] = 1.0F - a;
1444 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1448 void ScaleUnit::tabulate_blinear_i(int* &table_int1,
1451 int* &table_antifrac,
1458 table_int1 = new int[pixel2 - pixel1];
1459 table_int2 = new int[pixel2 - pixel1];
1460 table_frac = new int[pixel2 - pixel1];
1461 table_antifrac = new int[pixel2 - pixel1];
1463 for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1465 double f_x = (float)i * scale + start;
1466 int i_x = (int)floor(f_x);
1467 float a = (f_x - floor(f_x));
1469 table_int1[j] = i_x;
1470 table_int2[j] = i_x + 1;
1471 CLAMP(table_int1[j], 0, total_pixels - 1);
1472 CLAMP(table_int2[j], 0, total_pixels - 1);
1473 table_frac[j] = (int)(a * 0xffff);
1474 table_antifrac[j] = (int)((1.0F - a) * 0x10000);
1475 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1479 void ScaleUnit::process_package(LoadPackage *package)
1481 ScalePackage *pkg = (ScalePackage*)package;
1483 //printf("ScaleUnit::process_package 1\n");
1484 // Arguments for macros
1485 VFrame *output = engine->scale_output;
1486 VFrame *input = engine->scale_input;
1487 float scale_w = engine->w_scale;
1488 float scale_h = engine->h_scale;
1489 float in_x1_float = engine->in_x1_float;
1490 float in_y1_float = engine->in_y1_float;
1491 int out_h_int = engine->out_h_int;
1492 int out_w_int = engine->out_w_int;
1494 (input->get_color_model() == BC_YUV888 ||
1495 input->get_color_model() == BC_YUVA8888 ||
1496 input->get_color_model() == BC_YUV161616 ||
1497 input->get_color_model() == BC_YUVA16161616);
1499 //printf("ScaleUnit::process_package 2 %f %f\n", engine->w_scale, engine->h_scale);
1500 if(engine->interpolation_type == CUBIC_CUBIC ||
1501 (engine->interpolation_type == CUBIC_LINEAR
1502 && engine->w_scale > 1 &&
1503 engine->h_scale > 1))
1505 switch(engine->scale_input->get_color_model())
1508 BICUBIC(1.0, float, 3);
1512 BICUBIC(1.0, float, 4);
1517 BICUBIC(0xff, unsigned char, 3);
1522 BICUBIC(0xff, unsigned char, 4);
1527 BICUBIC(0xffff, uint16_t, 3);
1530 case BC_RGBA16161616:
1531 case BC_YUVA16161616:
1532 BICUBIC(0xffff, uint16_t, 4);
1537 // Perform bilinear scaling input -> scale_output
1538 if(engine->w_scale > 1 &&
1539 engine->h_scale > 1)
1541 switch(engine->scale_input->get_color_model())
1544 BILINEAR_ENLARGE(1.0, float, 3);
1548 BILINEAR_ENLARGE(1.0, float, 4);
1553 BILINEAR_ENLARGE(0xff, unsigned char, 3);
1558 BILINEAR_ENLARGE(0xff, unsigned char, 4);
1563 BILINEAR_ENLARGE(0xffff, uint16_t, 3);
1566 case BC_RGBA16161616:
1567 case BC_YUVA16161616:
1568 BILINEAR_ENLARGE(0xffff, uint16_t, 4);
1573 // Bilinear reduction
1575 switch(engine->scale_input->get_color_model())
1578 BILINEAR_REDUCE(1.0, float, 3);
1581 BILINEAR_REDUCE(1.0, float, 4);
1585 BILINEAR_REDUCE(0xff, unsigned char, 3);
1590 BILINEAR_REDUCE(0xff, unsigned char, 4);
1595 BILINEAR_REDUCE(0xffff, uint16_t, 3);
1598 case BC_RGBA16161616:
1599 case BC_YUVA16161616:
1600 BILINEAR_REDUCE(0xffff, uint16_t, 4);
1604 //printf("ScaleUnit::process_package 3\n");
1620 ScaleEngine::ScaleEngine(OverlayFrame *overlay, int cpus)
1621 : LoadServer(cpus, cpus)
1623 this->overlay = overlay;
1626 ScaleEngine::~ScaleEngine()
1630 void ScaleEngine::init_packages()
1632 for(int i = 0; i < get_total_packages(); i++)
1634 ScalePackage *package = (ScalePackage*)get_package(i);
1635 package->out_row1 = out_h_int / get_total_packages() * i;
1636 package->out_row2 = package->out_row1 + out_h_int / get_total_packages();
1638 if(i >= get_total_packages() - 1)
1639 package->out_row2 = out_h_int;
1643 LoadClient* ScaleEngine::new_client()
1645 return new ScaleUnit(this, overlay);
1648 LoadPackage* ScaleEngine::new_package()
1650 return new ScalePackage;
1665 TranslatePackage::TranslatePackage()
1671 TranslateUnit::TranslateUnit(TranslateEngine *server, OverlayFrame *overlay)
1672 : LoadClient(server)
1674 this->overlay = overlay;
1675 this->engine = server;
1678 TranslateUnit::~TranslateUnit()
1684 void TranslateUnit::translation_array_f(transfer_table_f* &table,
1695 float offset = out_x1 - in_x1;
1696 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1698 out_x1_int = (int)out_x1;
1699 out_x2_int = MIN((int)ceil(out_x2), out_total);
1700 out_w_int = out_x2_int - out_x1_int;
1702 table = new transfer_table_f[out_w_int];
1703 bzero(table, sizeof(transfer_table_f) * out_w_int);
1706 // printf("OverlayFrame::translation_array_f 2 %f %f -> %f %f scale=%f %f\n",
1712 // out_x2 - out_x1);
1716 for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1718 transfer_table_f *entry = &table[out_x - out_x1_int];
1720 entry->in_x1 = (int)in_x;
1721 entry->in_x2 = (int)in_x + 1;
1723 // Get fraction of output pixel to fill
1724 entry->output_fraction = 1;
1728 entry->output_fraction -= out_x1 - out_x;
1731 if(out_x2 < out_x + 1)
1733 entry->output_fraction = (out_x2 - out_x);
1736 // Advance in_x until out_x_fraction is filled
1737 float out_x_fraction = entry->output_fraction;
1738 float in_x_fraction = floor(in_x + 1) - in_x;
1740 if(out_x_fraction <= in_x_fraction)
1742 entry->in_fraction1 = out_x_fraction;
1743 entry->in_fraction2 = 0.0;
1744 in_x += out_x_fraction;
1748 entry->in_fraction1 = in_x_fraction;
1749 in_x += out_x_fraction;
1750 entry->in_fraction2 = in_x - floor(in_x);
1753 // Clip in_x and zero out fraction. This doesn't work for YUV.
1754 if(entry->in_x2 >= in_total)
1756 entry->in_x2 = in_total - 1;
1757 entry->in_fraction2 = 0.0;
1760 if(entry->in_x1 >= in_total)
1762 entry->in_x1 = in_total - 1;
1763 entry->in_fraction1 = 0.0;
1765 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n",
1769 // entry->in_fraction1,
1770 // entry->in_fraction2,
1771 // entry->output_fraction);
1776 void TranslateUnit::translation_array_i(transfer_table_i* &table,
1787 float offset = out_x1 - in_x1;
1789 out_x1_int = (int)out_x1;
1790 out_x2_int = MIN((int)ceil(out_x2), out_total);
1791 out_w_int = out_x2_int - out_x1_int;
1793 table = new transfer_table_i[out_w_int];
1794 bzero(table, sizeof(transfer_table_i) * out_w_int);
1797 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1800 for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1802 transfer_table_i *entry = &table[out_x - out_x1_int];
1804 entry->in_x1 = (int)in_x;
1805 entry->in_x2 = (int)in_x + 1;
1807 // Get fraction of output pixel to fill
1808 entry->output_fraction = 0x10000;
1812 entry->output_fraction -= (int)((out_x1 - out_x) * 0x10000);
1815 if(out_x2 < out_x + 1)
1817 entry->output_fraction = (int)((out_x2 - out_x) * 0x10000);
1820 // Advance in_x until out_x_fraction is filled
1821 int out_x_fraction = entry->output_fraction;
1822 int in_x_fraction = (int)((floor(in_x + 1) - in_x) * 0x10000);
1824 if(out_x_fraction <= in_x_fraction)
1826 entry->in_fraction1 = out_x_fraction;
1827 entry->in_fraction2 = 0;
1828 in_x += (float)out_x_fraction / 0x10000;
1832 entry->in_fraction1 = in_x_fraction;
1833 in_x += (float)out_x_fraction / 0x10000;
1834 entry->in_fraction2 = (int)((in_x - floor(in_x)) * 0x10000);
1837 // Clip in_x and zero out fraction. This doesn't work for YUV.
1838 if(entry->in_x2 >= in_total)
1840 entry->in_x2 = in_total - 1;
1841 entry->in_fraction2 = 0;
1844 if(entry->in_x1 >= in_total)
1846 entry->in_x1 = in_total - 1;
1847 entry->in_fraction1 = 0;
1849 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n",
1853 // entry->in_fraction1,
1854 // entry->in_fraction2,
1855 // entry->output_fraction);
1892 #define TRANSLATE(max, temp_type, type, components, chroma_offset) \
1895 type **in_rows = (type**)input->get_rows(); \
1896 type **out_rows = (type**)output->get_rows(); \
1899 temp_type master_opacity; \
1900 if(sizeof(type) != 4) \
1901 master_opacity = (temp_type)(alpha * max + 0.5); \
1903 master_opacity = (temp_type)(alpha * max); \
1904 temp_type master_transparency = max - master_opacity; \
1905 float round = 0.0; \
1906 if(sizeof(type) != 4) \
1910 for(int i = row1; i < row2; i++) \
1914 float y_fraction1_f; \
1915 float y_fraction2_f; \
1916 float y_output_fraction_f; \
1917 in_y1 = y_table_f[i - out_y1_int].in_x1; \
1918 in_y2 = y_table_f[i - out_y1_int].in_x2; \
1919 y_fraction1_f = y_table_f[i - out_y1_int].in_fraction1; \
1920 y_fraction2_f = y_table_f[i - out_y1_int].in_fraction2; \
1921 y_output_fraction_f = y_table_f[i - out_y1_int].output_fraction; \
1922 type *in_row1 = in_rows[(in_y1)]; \
1923 type *in_row2 = in_rows[(in_y2)]; \
1924 type *out_row = out_rows[i]; \
1926 for(int j = out_x1_int; j < out_x2_int; j++) \
1930 float x_fraction1_f; \
1931 float x_fraction2_f; \
1932 float x_output_fraction_f; \
1933 in_x1 = x_table_f[j - out_x1_int].in_x1; \
1934 in_x2 = x_table_f[j - out_x1_int].in_x2; \
1935 x_fraction1_f = x_table_f[j - out_x1_int].in_fraction1; \
1936 x_fraction2_f = x_table_f[j - out_x1_int].in_fraction2; \
1937 x_output_fraction_f = x_table_f[j - out_x1_int].output_fraction; \
1938 type *output = &out_row[j * components]; \
1939 temp_type input1, input2, input3, input4; \
1941 float fraction1 = x_fraction1_f * y_fraction1_f; \
1942 float fraction2 = x_fraction2_f * y_fraction1_f; \
1943 float fraction3 = x_fraction1_f * y_fraction2_f; \
1944 float fraction4 = x_fraction2_f * y_fraction2_f; \
1946 input1 = (type)(in_row1[in_x1 * components] * fraction1 + \
1947 in_row1[in_x2 * components] * fraction2 + \
1948 in_row2[in_x1 * components] * fraction3 + \
1949 in_row2[in_x2 * components] * fraction4 + round); \
1951 /* Add chroma to fractional pixels */ \
1954 float extra_chroma = (1.0F - \
1958 fraction4) * chroma_offset; \
1959 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 + \
1960 in_row1[in_x2 * components + 1] * fraction2 + \
1961 in_row2[in_x1 * components + 1] * fraction3 + \
1962 in_row2[in_x2 * components + 1] * fraction4 + \
1963 extra_chroma + round); \
1964 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 + \
1965 in_row1[in_x2 * components + 2] * fraction2 + \
1966 in_row2[in_x1 * components + 2] * fraction3 + \
1967 in_row2[in_x2 * components + 2] * fraction4 + \
1968 extra_chroma + round); \
1972 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 + \
1973 in_row1[in_x2 * components + 1] * fraction2 + \
1974 in_row2[in_x1 * components + 1] * fraction3 + \
1975 in_row2[in_x2 * components + 1] * fraction4 + round); \
1976 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 + \
1977 in_row1[in_x2 * components + 2] * fraction2 + \
1978 in_row2[in_x1 * components + 2] * fraction3 + \
1979 in_row2[in_x2 * components + 2] * fraction4 + round); \
1982 if(components == 4) \
1983 input4 = (type)(in_row1[in_x1 * components + 3] * fraction1 + \
1984 in_row1[in_x2 * components + 3] * fraction2 + \
1985 in_row2[in_x1 * components + 3] * fraction3 + \
1986 in_row2[in_x2 * components + 3] * fraction4 + round); \
1988 temp_type opacity; \
1989 if(sizeof(type) != 4) \
1990 opacity = (temp_type)(master_opacity * \
1991 y_output_fraction_f * \
1992 x_output_fraction_f + 0.5); \
1994 opacity = (temp_type)(master_opacity * \
1995 y_output_fraction_f * \
1996 x_output_fraction_f); \
1997 temp_type transparency = max - opacity; \
1999 /* printf("TRANSLATE 2 %x %d %d\n", opacity, j, i); */ \
2001 if(components == 3) \
2003 BLEND_3(max, temp_type, type, chroma_offset); \
2007 BLEND_4(max, temp_type, type, chroma_offset); \
2013 void TranslateUnit::process_package(LoadPackage *package)
2015 TranslatePackage *pkg = (TranslatePackage*)package;
2022 // Variables for TRANSLATE
2023 VFrame *input = engine->translate_input;
2024 VFrame *output = engine->translate_output;
2025 float in_x1 = engine->translate_in_x1;
2026 float in_y1 = engine->translate_in_y1;
2027 float in_x2 = engine->translate_in_x2;
2028 float in_y2 = engine->translate_in_y2;
2029 float out_x1 = engine->translate_out_x1;
2030 float out_y1 = engine->translate_out_y1;
2031 float out_x2 = engine->translate_out_x2;
2032 float out_y2 = engine->translate_out_y2;
2033 float alpha = engine->translate_alpha;
2034 int row1 = pkg->out_row1;
2035 int row2 = pkg->out_row2;
2036 int mode = engine->translate_mode;
2037 int in_total_x = input->get_w();
2038 int in_total_y = input->get_h();
2040 (engine->translate_input->get_color_model() == BC_YUV888 ||
2041 engine->translate_input->get_color_model() == BC_YUVA8888 ||
2042 engine->translate_input->get_color_model() == BC_YUV161616 ||
2043 engine->translate_input->get_color_model() == BC_YUVA16161616);
2045 transfer_table_f *x_table_f;
2046 transfer_table_f *y_table_f;
2047 transfer_table_i *x_table_i;
2048 transfer_table_i *y_table_i;
2050 translation_array_f(x_table_f,
2059 translation_array_f(y_table_f,
2068 // printf("TranslateUnit::process_package 1 %d\n", mode);
2072 switch(engine->translate_input->get_color_model())
2075 TRANSLATE(0xff, uint32_t, unsigned char, 3, 0);
2079 TRANSLATE(0xff, uint32_t, unsigned char, 4, 0);
2083 TRANSLATE(1.0, float, float, 3, 0);
2087 TRANSLATE(1.0, float, float, 4, 0);
2091 TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2094 case BC_RGBA16161616:
2095 TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2099 TRANSLATE(0xff, int32_t, unsigned char, 3, 0x80);
2103 TRANSLATE(0xff, int32_t, unsigned char, 4, 0x80);
2107 TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2110 case BC_YUVA16161616:
2111 TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2114 // printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2116 delete [] x_table_f;
2117 delete [] y_table_f;
2129 TranslateEngine::TranslateEngine(OverlayFrame *overlay, int cpus)
2130 : LoadServer(cpus, cpus)
2132 this->overlay = overlay;
2135 TranslateEngine::~TranslateEngine()
2139 void TranslateEngine::init_packages()
2141 int out_y1_int = (int)translate_out_y1;
2142 int out_y2_int = MIN((int)ceil(translate_out_y2), translate_output->get_h());
2143 int out_h = out_y2_int - out_y1_int;
2145 for(int i = 0; i < get_total_packages(); i++)
2147 TranslatePackage *package = (TranslatePackage*)get_package(i);
2148 package->out_row1 = (int)(out_y1_int + out_h /
2149 get_total_packages() *
2151 package->out_row2 = (int)((float)package->out_row1 +
2153 get_total_packages());
2154 if(i >= get_total_packages() - 1)
2155 package->out_row2 = out_y2_int;
2159 LoadClient* TranslateEngine::new_client()
2161 return new TranslateUnit(this, overlay);
2164 LoadPackage* TranslateEngine::new_package()
2166 return new TranslatePackage;
2176 #define SCALE_TRANSLATE(max, temp_type, type, components, chroma_offset) \
2178 temp_type opacity; \
2179 if(sizeof(type) != 4) \
2180 opacity = (temp_type)(alpha * max + 0.5); \
2182 opacity = (temp_type)(alpha * max); \
2183 temp_type transparency = max - opacity; \
2185 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2187 int in_y = y_table[i - out_y1]; \
2188 type *in_row = (type*)in_rows[in_y]; \
2189 type *output = (type*)out_rows[i] + out_x1 * components; \
2191 /* X direction is scaled and requires a table lookup */ \
2192 if(out_w != in_x2 - in_x1) \
2194 for(int j = 0; j < out_w; j++) \
2196 type *in_row_plus_x = in_row + x_table[j] * components; \
2197 temp_type input1, input2, input3, input4; \
2199 input1 = in_row_plus_x[0]; \
2200 input2 = in_row_plus_x[1]; \
2201 input3 = in_row_plus_x[2]; \
2202 if(components == 4) \
2203 input4 = in_row_plus_x[3]; \
2205 if(components == 3) \
2207 BLEND_3(max, temp_type, type, chroma_offset); \
2211 BLEND_4(max, temp_type, type, chroma_offset); \
2213 output += components; \
2217 /* X direction is not scaled */ \
2219 in_row += in_x1 * components; \
2220 for(int j = 0; j < out_w; j++) \
2222 temp_type input1, input2, input3, input4; \
2224 input1 = in_row[0]; \
2225 input2 = in_row[1]; \
2226 input3 = in_row[2]; \
2227 if(components == 4) \
2228 input4 = in_row[3]; \
2230 if(components == 3) \
2232 BLEND_3(max, temp_type, type, chroma_offset); \
2236 BLEND_4(max, temp_type, type, chroma_offset); \
2238 in_row += components; \
2239 output += components; \
2247 ScaleTranslateUnit::ScaleTranslateUnit(ScaleTranslateEngine *server, OverlayFrame *overlay)
2248 : LoadClient(server)
2250 this->overlay = overlay;
2251 this->scale_translate = server;
2254 ScaleTranslateUnit::~ScaleTranslateUnit()
2258 void ScaleTranslateUnit::scale_array_f(int* &table,
2264 float scale = (float)(out_x2 - out_x1) / (in_x2 - in_x1);
2266 table = new int[(int)out_x2 - out_x1];
2268 for(int i = 0; i < out_x2 - out_x1; i++)
2269 table[i] = (int)((float)i / scale + in_x1);
2272 void ScaleTranslateUnit::process_package(LoadPackage *package)
2274 ScaleTranslatePackage *pkg = (ScaleTranslatePackage*)package;
2276 // Args for NEAREST_NEIGHBOR_MACRO
2277 VFrame *output = scale_translate->output;
2278 VFrame *input = scale_translate->input;
2279 int in_x1 = (int)scale_translate->in_x1;
2280 int in_y1 = (int)scale_translate->in_y1;
2281 int in_x2 = (int)scale_translate->in_x2;
2282 int in_y2 = (int)scale_translate->in_y2;
2283 int out_x1 = scale_translate->out_x1;
2284 int out_y1 = scale_translate->out_y1;
2285 int out_x2 = scale_translate->out_x2;
2286 int out_y2 = scale_translate->out_y2;
2287 float alpha = scale_translate->alpha;
2288 int mode = scale_translate->mode;
2289 int out_w = out_x2 - out_x1;
2293 unsigned char **in_rows = input->get_rows();
2294 unsigned char **out_rows = output->get_rows();
2298 //printf("ScaleTranslateUnit::process_package 1 %d\n", mode);
2299 if(out_w != in_x2 - in_x1)
2301 scale_array_f(x_table,
2304 scale_translate->in_x1,
2305 scale_translate->in_x2);
2307 scale_array_f(y_table,
2310 scale_translate->in_y1,
2311 scale_translate->in_y2);
2314 if (mode == TRANSFER_REPLACE && (out_w == in_x2 - in_x1))
2316 // if we have transfer replace and x direction is not scaled, PARTY!
2317 char bytes_per_pixel = input->calculate_bytes_per_pixel(input->get_color_model());
2318 int line_len = out_w * bytes_per_pixel;
2319 int in_start_byte = in_x1 * bytes_per_pixel;
2320 int out_start_byte = out_x1 * bytes_per_pixel;
2321 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2323 memcpy (out_rows[i] + out_start_byte,
2324 in_rows[y_table[i - out_y1]] + in_start_byte ,
2330 switch(input->get_color_model())
2333 SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 3, 0);
2337 SCALE_TRANSLATE(1.0, float, float, 3, 0);
2341 SCALE_TRANSLATE(0xff, int32_t, uint8_t, 3, 0x80);
2345 SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 4, 0);
2349 SCALE_TRANSLATE(1.0, float, float, 4, 0);
2353 SCALE_TRANSLATE(0xff, int32_t, uint8_t, 4, 0x80);
2358 SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2362 SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2365 case BC_RGBA16161616:
2366 SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2369 case BC_YUVA16161616:
2370 SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2374 //printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2389 ScaleTranslateEngine::ScaleTranslateEngine(OverlayFrame *overlay, int cpus)
2390 : LoadServer(cpus, cpus)
2392 this->overlay = overlay;
2395 ScaleTranslateEngine::~ScaleTranslateEngine()
2399 void ScaleTranslateEngine::init_packages()
2401 int out_h = out_y2 - out_y1;
2403 for(int i = 0; i < get_total_packages(); i++)
2405 ScaleTranslatePackage *package = (ScaleTranslatePackage*)get_package(i);
2406 package->out_row1 = (int)(out_y1 + out_h /
2407 get_total_packages() *
2409 package->out_row2 = (int)((float)package->out_row1 +
2411 get_total_packages());
2412 if(i >= get_total_packages() - 1)
2413 package->out_row2 = out_y2;
2417 LoadClient* ScaleTranslateEngine::new_client()
2419 return new ScaleTranslateUnit(this, overlay);
2422 LoadPackage* ScaleTranslateEngine::new_package()
2424 return new ScaleTranslatePackage;
2428 ScaleTranslatePackage::ScaleTranslatePackage()
2459 #define BLEND_ONLY(temp_type, type, max, components, chroma_offset) \
2461 temp_type opacity; \
2462 if(sizeof(type) != 4) \
2463 opacity = (temp_type)(alpha * max + 0.5); \
2465 opacity = (temp_type)(alpha * max); \
2466 temp_type transparency = max - opacity; \
2468 type** output_rows = (type**)output->get_rows(); \
2469 type** input_rows = (type**)input->get_rows(); \
2470 int w = input->get_w(); \
2471 int h = input->get_h(); \
2473 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2475 type* in_row = input_rows[i]; \
2476 type* output = output_rows[i]; \
2478 for(int j = 0; j < w; j++) \
2480 temp_type input1, input2, input3, input4; \
2481 input1 = in_row[0]; \
2482 input2 = in_row[1]; \
2483 input3 = in_row[2]; \
2484 if(components == 4) input4 = in_row[3]; \
2487 if(components == 3) \
2489 BLEND_3(max, temp_type, type, chroma_offset); \
2493 BLEND_4(max, temp_type, type, chroma_offset); \
2496 in_row += components; \
2497 output += components; \
2503 #define BLEND_ONLY_TRANSFER_REPLACE(type, components) \
2506 type** output_rows = (type**)output->get_rows(); \
2507 type** input_rows = (type**)input->get_rows(); \
2508 int w = input->get_w(); \
2509 int h = input->get_h(); \
2510 int line_len = w * sizeof(type) * components; \
2512 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2514 memcpy(output_rows[i], input_rows[i], line_len); \
2518 // components is always 4
2519 #define BLEND_ONLY_4_NORMAL(temp_type, type, max, chroma_offset) \
2521 temp_type opacity = (temp_type)(alpha * max + 0.5); \
2522 temp_type transparency = max - opacity; \
2523 temp_type max_squared = ((temp_type)max) * max; \
2525 type** output_rows = (type**)output->get_rows(); \
2526 type** input_rows = (type**)input->get_rows(); \
2527 int w = input->get_w(); \
2528 int h = input->get_h(); \
2530 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2532 type* in_row = input_rows[i]; \
2533 type* output = output_rows[i]; \
2535 for(int j = 0; j < w; j++) \
2537 temp_type pixel_opacity, pixel_transparency; \
2538 pixel_opacity = opacity * in_row[3]; \
2539 pixel_transparency = (temp_type)max_squared - pixel_opacity; \
2543 output[0] = ((temp_type)in_row[0] * pixel_opacity + \
2544 (temp_type)output[0] * pixel_transparency) / max / max; \
2545 output[1] = (((temp_type)in_row[1] - chroma_offset) * pixel_opacity + \
2546 ((temp_type)output[1] - chroma_offset) * pixel_transparency) \
2549 output[2] = (((temp_type)in_row[2] - chroma_offset) * pixel_opacity + \
2550 ((temp_type)output[2] - chroma_offset) * pixel_transparency) \
2553 output[3] = (type)(in_row[3] > output[3] ? in_row[3] : output[3]); \
2563 // components is always 3
2564 #define BLEND_ONLY_3_NORMAL(temp_type, type, max, chroma_offset) \
2566 const int bits = sizeof(type) * 8; \
2567 temp_type opacity = (temp_type)(alpha * ((temp_type)1 << bits) + 0.5); \
2568 temp_type transparency = ((temp_type)1 << bits) - opacity; \
2570 type** output_rows = (type**)output->get_rows(); \
2571 type** input_rows = (type**)input->get_rows(); \
2572 int w = input->get_w() * 3; \
2573 int h = input->get_h(); \
2575 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2577 type* in_row = input_rows[i]; \
2578 type* output = output_rows[i]; \
2580 for(int j = 0; j < w; j++) /* w = 3x width! */ \
2582 *output = ((temp_type)*in_row * opacity + *output * transparency) >> bits; \
2591 BlendUnit::BlendUnit(BlendEngine *server, OverlayFrame *overlay)
2592 : LoadClient(server)
2594 this->overlay = overlay;
2595 this->blend_engine = server;
2598 BlendUnit::~BlendUnit()
2602 void BlendUnit::process_package(LoadPackage *package)
2604 BlendPackage *pkg = (BlendPackage*)package;
2607 VFrame *output = blend_engine->output;
2608 VFrame *input = blend_engine->input;
2609 float alpha = blend_engine->alpha;
2610 int mode = blend_engine->mode;
2612 if (mode == TRANSFER_REPLACE)
2614 switch(input->get_color_model())
2617 BLEND_ONLY_TRANSFER_REPLACE(float, 3);
2620 BLEND_ONLY_TRANSFER_REPLACE(float, 4);
2624 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 3);
2628 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 4);
2632 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 3);
2634 case BC_RGBA16161616:
2635 case BC_YUVA16161616:
2636 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 4);
2641 if (mode == TRANSFER_NORMAL)
2643 switch(input->get_color_model())
2647 float opacity = alpha;
2648 float transparency = 1.0 - alpha;
2650 float** output_rows = (float**)output->get_rows();
2651 float** input_rows = (float**)input->get_rows();
2652 int w = input->get_w() * 3;
2653 int h = input->get_h();
2655 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2657 float* in_row = input_rows[i];
2658 float* output = output_rows[i];
2660 for(int j = 0; j < w; j++)
2662 *output = *in_row * opacity + *output * transparency;
2671 float opacity = alpha;
2672 float transparency = 1.0 - alpha;
2674 float** output_rows = (float**)output->get_rows();
2675 float** input_rows = (float**)input->get_rows();
2676 int w = input->get_w();
2677 int h = input->get_h();
2679 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2681 float* in_row = input_rows[i];
2682 float* output = output_rows[i];
2684 for(int j = 0; j < w; j++)
2686 float pixel_opacity, pixel_transparency;
2687 pixel_opacity = opacity * in_row[3];
2688 pixel_transparency = 1.0 - pixel_opacity;
2691 output[0] = in_row[0] * pixel_opacity +
2692 output[0] * pixel_transparency;
2693 output[1] = in_row[1] * pixel_opacity +
2694 output[1] * pixel_transparency;
2695 output[2] = in_row[2] * pixel_opacity +
2696 output[2] * pixel_transparency;
2697 output[3] = in_row[3] > output[3] ? in_row[3] : output[3];
2706 BLEND_ONLY_3_NORMAL(uint32_t, unsigned char, 0xff, 0);
2709 BLEND_ONLY_3_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2712 BLEND_ONLY_4_NORMAL(uint32_t, unsigned char, 0xff, 0);
2715 BLEND_ONLY_4_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2718 BLEND_ONLY_3_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2721 BLEND_ONLY_3_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2723 case BC_RGBA16161616:
2724 BLEND_ONLY_4_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2726 case BC_YUVA16161616:
2727 BLEND_ONLY_4_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2732 switch(input->get_color_model())
2735 BLEND_ONLY(float, float, 1.0, 3, 0);
2738 BLEND_ONLY(float, float, 1.0, 4, 0);
2741 BLEND_ONLY(int32_t, unsigned char, 0xff, 3, 0);
2744 BLEND_ONLY(int32_t, unsigned char, 0xff, 3, 0x80);
2747 BLEND_ONLY(int32_t, unsigned char, 0xff, 4, 0);
2750 BLEND_ONLY(int32_t, unsigned char, 0xff, 4, 0x80);
2753 BLEND_ONLY(int64_t, uint16_t, 0xffff, 3, 0);
2756 BLEND_ONLY(int64_t, uint16_t, 0xffff, 3, 0x8000);
2758 case BC_RGBA16161616:
2759 BLEND_ONLY(int64_t, uint16_t, 0xffff, 4, 0);
2761 case BC_YUVA16161616:
2762 BLEND_ONLY(int64_t, uint16_t, 0xffff, 4, 0x8000);
2769 BlendEngine::BlendEngine(OverlayFrame *overlay, int cpus)
2770 : LoadServer(cpus, cpus)
2772 this->overlay = overlay;
2775 BlendEngine::~BlendEngine()
2779 void BlendEngine::init_packages()
2781 for(int i = 0; i < get_total_packages(); i++)
2783 BlendPackage *package = (BlendPackage*)get_package(i);
2784 package->out_row1 = (int)(input->get_h() /
2785 get_total_packages() *
2787 package->out_row2 = (int)((float)package->out_row1 +
2789 get_total_packages());
2791 if(i >= get_total_packages() - 1)
2792 package->out_row2 = input->get_h();
2796 LoadClient* BlendEngine::new_client()
2798 return new BlendUnit(this, overlay);
2801 LoadPackage* BlendEngine::new_package()
2803 return new BlendPackage;
2807 BlendPackage::BlendPackage()