11 #include "overlayframe.h"
15 // Easy abstraction of the float and int types. Most of these are never used
16 // but GCC expects them.
17 static int my_abs(int32_t x)
22 static int my_abs(uint32_t x)
27 static int my_abs(int64_t x)
32 static int my_abs(uint64_t x)
37 static float my_abs(float x)
45 OverlayFrame::OverlayFrame(int cpus)
50 scaletranslate_engine = 0;
55 OverlayFrame::~OverlayFrame()
57 if(temp_frame) delete temp_frame;
58 if(scale_engine) delete scale_engine;
59 if(translate_engine) delete translate_engine;
60 if(blend_engine) delete blend_engine;
61 if(scaletranslate_engine) delete scaletranslate_engine;
73 // (255 * 255 + 0 * 0) / 255 = 255
74 // (255 * 127 + 255 * (255 - 127)) / 255 = 255
76 // (65535 * 65535 + 0 * 0) / 65535 = 65535
77 // (65535 * 32767 + 65535 * (65535 - 32767)) / 65535 = 65535
80 // Branch prediction 4 U
82 #define BLEND_3(max, temp_type, type, chroma_offset) \
86 /* if(mode != TRANSFER_NORMAL) printf("BLEND mode = %d\n", mode); */ \
89 case TRANSFER_DIVIDE: \
90 r = output[0] ? (((temp_type)input1 * max) / output[0]) : max; \
93 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
94 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
98 g = output[1] ? (temp_type)input2 * max / (temp_type)output[1] : max; \
99 b = output[2] ? (temp_type)input3 * max / (temp_type)output[2] : max; \
101 r = (r * opacity + (temp_type)output[0] * transparency) / max; \
102 g = (g * opacity + (temp_type)output[1] * transparency) / max; \
103 b = (b * opacity + (temp_type)output[2] * transparency) / max; \
105 case TRANSFER_MULTIPLY: \
106 r = ((temp_type)input1 * output[0]) / max; \
109 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
110 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
114 g = (temp_type)input2 * (temp_type)output[1] / max; \
115 b = (temp_type)input3 * (temp_type)output[2] / max; \
117 r = (r * opacity + (temp_type)output[0] * transparency) / max; \
118 g = (g * opacity + (temp_type)output[1] * transparency) / max; \
119 b = (b * opacity + (temp_type)output[2] * transparency) / max; \
121 case TRANSFER_SUBTRACT: \
122 r = (temp_type)output[0] - (temp_type)input1; \
123 g = ((temp_type)output[1] - (temp_type)chroma_offset) - \
124 ((temp_type)input2 - (temp_type)chroma_offset) + \
125 (temp_type)chroma_offset; \
126 b = ((temp_type)output[2] - (temp_type)chroma_offset) - \
127 ((temp_type)input3 - (temp_type)chroma_offset) + \
128 (temp_type)chroma_offset; \
129 r = (r * opacity + output[0] * transparency) / max; \
130 g = (g * opacity + output[1] * transparency) / max; \
131 b = (b * opacity + output[2] * transparency) / max; \
133 case TRANSFER_ADDITION: \
134 r = (temp_type)input1 + output[0]; \
135 g = ((temp_type)input2 - chroma_offset) + \
136 ((temp_type)output[1] - chroma_offset) + \
137 (temp_type)chroma_offset; \
138 b = ((temp_type)input3 - chroma_offset) + \
139 ((temp_type)output[2] - chroma_offset) + \
140 (temp_type)chroma_offset; \
141 r = (r * opacity + output[0] * transparency) / max; \
142 g = (g * opacity + output[1] * transparency) / max; \
143 b = (b * opacity + output[2] * transparency) / max; \
145 case TRANSFER_REPLACE: \
150 case TRANSFER_NORMAL: \
151 r = ((temp_type)input1 * opacity + output[0] * transparency) / max; \
152 g = ((temp_type)input2 * opacity + output[1] * transparency) / max; \
153 b = ((temp_type)input3 * opacity + output[2] * transparency) / max; \
157 if(sizeof(type) != 4) \
159 output[0] = (type)CLIP(r, 0, max); \
160 output[1] = (type)CLIP(g, 0, max); \
161 output[2] = (type)CLIP(b, 0, max); \
175 // Blending equations are drastically different for 3 and 4 components
176 #define BLEND_4(max, temp_type, type, chroma_offset) \
178 temp_type r, g, b, a; \
179 temp_type pixel_opacity, pixel_transparency; \
180 temp_type output1 = output[0]; \
181 temp_type output2 = output[1]; \
182 temp_type output3 = output[2]; \
183 temp_type output4 = output[3]; \
185 pixel_opacity = opacity * input4; \
186 pixel_transparency = (temp_type)max * max - pixel_opacity; \
190 case TRANSFER_DIVIDE: \
191 r = output1 ? (((temp_type)input1 * max) / output1) : max; \
194 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
195 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
199 g = output2 ? (temp_type)input2 * max / (temp_type)output2 : max; \
200 b = output3 ? (temp_type)input3 * max / (temp_type)output3 : max; \
202 r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
203 g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
204 b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
205 a = input4 > output4 ? input4 : output4; \
207 case TRANSFER_MULTIPLY: \
208 r = ((temp_type)input1 * output1) / max; \
211 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
212 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
216 g = (temp_type)input2 * (temp_type)output2 / max; \
217 b = (temp_type)input3 * (temp_type)output3 / max; \
219 r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
220 g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
221 b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
222 a = input4 > output4 ? input4 : output4; \
224 case TRANSFER_SUBTRACT: \
225 r = (temp_type)input1 - output1; \
226 g = ((temp_type)output2 - chroma_offset) - \
227 ((temp_type)input2 - (temp_type)chroma_offset) + \
228 (temp_type)chroma_offset; \
229 b = ((temp_type)output3 - chroma_offset) - \
230 ((temp_type)input3 - (temp_type)chroma_offset) + \
231 (temp_type)chroma_offset; \
232 r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
233 g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
234 b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
235 a = input4 > output4 ? input4 : output4; \
237 case TRANSFER_ADDITION: \
238 r = (temp_type)input1 + output1; \
239 g = ((temp_type)input2 - chroma_offset) + \
240 ((temp_type)output2 - chroma_offset) + \
242 b = ((temp_type)input3 - chroma_offset) + \
243 ((temp_type)output3 - chroma_offset) + \
245 r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
246 g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
247 b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
248 a = input4 > output4 ? input4 : output4; \
250 case TRANSFER_REPLACE: \
256 case TRANSFER_NORMAL: \
257 r = (input1 * pixel_opacity + \
258 output1 * pixel_transparency) / max / max; \
259 g = ((input2 - chroma_offset) * pixel_opacity + \
260 (output2 - chroma_offset) * pixel_transparency) \
263 b = ((input3 - chroma_offset) * pixel_opacity + \
264 (output3 - chroma_offset) * pixel_transparency) \
267 a = input4 > output4 ? input4 : output4; \
271 if(sizeof(type) != 4) \
273 output[0] = (type)CLIP(r, 0, max); \
274 output[1] = (type)CLIP(g, 0, max); \
275 output[2] = (type)CLIP(b, 0, max); \
276 output[3] = (type)a; \
289 // Bicubic algorithm using multiprocessors
290 // input -> scale nearest integer boundaries -> temp -> translation -> blend -> output
292 // Nearest neighbor algorithm using multiprocessors for blending
293 // input -> scale + translate -> blend -> output
296 int OverlayFrame::overlay(VFrame *output,
306 float alpha, // 0 - 1
308 int interpolation_type)
310 float w_scale = (out_x2 - out_x1) / (in_x2 - in_x1);
311 float h_scale = (out_y2 - out_y1) / (in_y2 - in_y1);
327 isnan(out_y2)) return 1;
328 // printf("OverlayFrame::overlay 1 %f %f %f %f -> %f %f %f %f scale=%f %f\n", in_x1,
342 out_x1 += -in_x1 * w_scale;
346 if(in_x1 >= input->get_w())
348 out_x1 -= (in_x1 - input->get_w()) * w_scale;
349 in_x1 = input->get_w();
354 out_y1 += -in_y1 * h_scale;
358 if(in_y1 >= input->get_h())
360 out_y1 -= (in_y1 - input->get_h()) * h_scale;
361 in_y1 = input->get_h();
366 out_x2 += -in_x2 * w_scale;
370 if(in_x2 >= input->get_w())
372 out_x2 -= (in_x2 - input->get_w()) * w_scale;
373 in_x2 = input->get_w();
378 out_y2 += -in_y2 * h_scale;
382 if(in_y2 >= input->get_h())
384 out_y2 -= (in_y2 - input->get_h()) * h_scale;
385 in_y2 = input->get_h();
390 in_x1 += -out_x1 / w_scale;
394 if(out_x1 >= output->get_w())
396 in_x1 -= (out_x1 - output->get_w()) / w_scale;
397 out_x1 = output->get_w();
402 in_y1 += -out_y1 / h_scale;
406 if(out_y1 >= output->get_h())
408 in_y1 -= (out_y1 - output->get_h()) / h_scale;
409 out_y1 = output->get_h();
414 in_x2 += -out_x2 / w_scale;
418 if(out_x2 >= output->get_w())
420 in_x2 -= (out_x2 - output->get_w()) / w_scale;
421 out_x2 = output->get_w();
426 in_y2 += -out_y2 / h_scale;
430 if(out_y2 >= output->get_h())
432 in_y2 -= (out_y2 - output->get_h()) / h_scale;
433 out_y2 = output->get_h();
445 float in_w = in_x2 - in_x1;
446 float in_h = in_y2 - in_y1;
447 float out_w = out_x2 - out_x1;
448 float out_h = out_y2 - out_y1;
449 // Input for translation operation
450 VFrame *translation_input = input;
453 if(in_w <= 0 || in_h <= 0 || out_w <= 0 || out_h <= 0) return 0;
456 // printf("OverlayFrame::overlay 2 %f %f %f %f -> %f %f %f %f\n", in_x1,
469 // ****************************************************************************
470 // Transfer to temp buffer by scaling nearest integer boundaries
471 // ****************************************************************************
472 if(interpolation_type != NEAREST_NEIGHBOR &&
473 (!EQUIV(w_scale, 1) || !EQUIV(h_scale, 1)))
475 // Create integer boundaries for interpolation
476 int in_x1_int = (int)in_x1;
477 int in_y1_int = (int)in_y1;
478 int in_x2_int = MIN((int)ceil(in_x2), input->get_w());
479 int in_y2_int = MIN((int)ceil(in_y2), input->get_h());
480 int out_x1_int = (int)out_x1;
481 int out_y1_int = (int)out_y1;
482 int out_x2_int = MIN((int)ceil(out_x2), output->get_w());
483 int out_y2_int = MIN((int)ceil(out_y2), output->get_h());
485 // Dimensions of temp frame. Integer boundaries scaled.
486 int temp_w = (out_x2_int - out_x1_int);
487 int temp_h = (out_y2_int - out_y1_int);
488 VFrame *scale_output;
492 #define NO_TRANSLATION1 \
493 (EQUIV(in_x1, 0) && \
495 EQUIV(out_x1, 0) && \
496 EQUIV(out_y1, 0) && \
497 EQUIV(in_x2, in_x2_int) && \
498 EQUIV(in_y2, in_y2_int) && \
499 EQUIV(out_x2, temp_w) && \
500 EQUIV(out_y2, temp_h))
504 (EQUIV(alpha, 1) && \
505 (mode == TRANSFER_REPLACE || \
506 (mode == TRANSFER_NORMAL && cmodel_components(input->get_color_model()) == 3)))
512 // Prepare destination for operation
514 // No translation and no blending. The blending operation is built into the
515 // translation unit but not the scaling unit.
517 if(NO_TRANSLATION1 &&
520 // printf("OverlayFrame::overlay input -> output\n");
522 scale_output = output;
523 translation_input = 0;
526 // If translation or blending
527 // input -> nearest integer boundary temp
530 (temp_frame->get_w() != temp_w ||
531 temp_frame->get_h() != temp_h))
539 temp_frame = new VFrame(0,
542 input->get_color_model(),
545 //printf("OverlayFrame::overlay input -> temp\n");
548 temp_frame->clear_frame();
550 // printf("OverlayFrame::overlay 4 temp_w=%d temp_h=%d\n",
552 scale_output = temp_frame;
553 translation_input = scale_output;
555 // Adjust input coordinates to reflect new scaled coordinates.
564 //printf("Overlay 1\n");
566 // Scale input -> scale_output
567 if(!scale_engine) scale_engine = new ScaleEngine(this, cpus);
568 scale_engine->scale_output = scale_output;
569 scale_engine->scale_input = input;
570 scale_engine->w_scale = w_scale;
571 scale_engine->h_scale = h_scale;
572 scale_engine->in_x1_int = in_x1_int;
573 scale_engine->in_y1_int = in_y1_int;
574 scale_engine->out_w_int = temp_w;
575 scale_engine->out_h_int = temp_h;
576 scale_engine->interpolation_type = interpolation_type;
577 //printf("Overlay 2\n");
579 //printf("OverlayFrame::overlay ScaleEngine 1 %d\n", out_h_int);
580 scale_engine->process_packages();
581 //printf("OverlayFrame::overlay ScaleEngine 2\n");
587 // printf("OverlayFrame::overlay 1 %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
601 #define NO_TRANSLATION2 \
602 (EQUIV(in_x1, 0) && \
604 EQUIV(in_x2, translation_input->get_w()) && \
605 EQUIV(in_y2, translation_input->get_h()) && \
606 EQUIV(out_x1, 0) && \
607 EQUIV(out_y1, 0) && \
608 EQUIV(out_x2, output->get_w()) && \
609 EQUIV(out_y2, output->get_h())) \
612 (EQUIV(out_x2 - out_x1, in_x2 - in_x1) && \
613 EQUIV(out_y2 - out_y1, in_y2 - in_y1))
618 //printf("OverlayFrame::overlay 4 %d\n", mode);
623 if(translation_input)
626 if( NO_TRANSLATION2 &&
630 //printf("OverlayFrame::overlay direct copy\n");
631 output->copy_from(translation_input);
635 if( NO_TRANSLATION2 &&
638 if(!blend_engine) blend_engine = new BlendEngine(this, cpus);
641 blend_engine->output = output;
642 blend_engine->input = translation_input;
643 blend_engine->alpha = alpha;
644 blend_engine->mode = mode;
646 blend_engine->process_packages();
649 // Scale and translate using nearest neighbor
650 // Translation is exactly on integer boundaries
651 if(interpolation_type == NEAREST_NEIGHBOR ||
652 EQUIV(in_x1, (int)in_x1) &&
653 EQUIV(in_y1, (int)in_y1) &&
654 EQUIV(in_x2, (int)in_x2) &&
655 EQUIV(in_y2, (int)in_y2) &&
657 EQUIV(out_x1, (int)out_x1) &&
658 EQUIV(out_y1, (int)out_y1) &&
659 EQUIV(out_x2, (int)out_x2) &&
660 EQUIV(out_y2, (int)out_y2))
662 //printf("OverlayFrame::overlay NEAREST_NEIGHBOR 1\n");
663 if(!scaletranslate_engine) scaletranslate_engine =
664 new ScaleTranslateEngine(this, cpus);
667 scaletranslate_engine->output = output;
668 scaletranslate_engine->input = translation_input;
669 scaletranslate_engine->in_x1 = (int)in_x1;
670 scaletranslate_engine->in_y1 = (int)in_y1;
671 // we need to do this mumbo-jumbo in order to get numerical stability
672 // other option would be to round all the coordinates
673 scaletranslate_engine->in_x2 = (int)in_x1 + (int)(in_x2 - in_x1);
674 scaletranslate_engine->in_y2 = (int)in_y1 + (int)(in_y2 - in_y1);
675 scaletranslate_engine->out_x1 = (int)out_x1;
676 scaletranslate_engine->out_y1 = (int)out_y1;
677 scaletranslate_engine->out_x2 = (int)out_x1 + (int)(out_x2 - out_x1);
678 scaletranslate_engine->out_y2 = (int)out_y1 + (int)(out_y2 - out_y1);
679 scaletranslate_engine->alpha = alpha;
680 scaletranslate_engine->mode = mode;
682 scaletranslate_engine->process_packages();
685 // Fractional translation
687 // Use fractional translation
688 // printf("OverlayFrame::overlay temp -> output %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
698 //printf("Overlay 3\n");
699 if(!translate_engine) translate_engine = new TranslateEngine(this, cpus);
700 translate_engine->translate_output = output;
701 translate_engine->translate_input = translation_input;
702 translate_engine->translate_in_x1 = in_x1;
703 translate_engine->translate_in_y1 = in_y1;
704 translate_engine->translate_in_x2 = in_x2;
705 translate_engine->translate_in_y2 = in_y2;
706 translate_engine->translate_out_x1 = out_x1;
707 translate_engine->translate_out_y1 = out_y1;
708 translate_engine->translate_out_x2 = out_x2;
709 translate_engine->translate_out_y2 = out_y2;
710 translate_engine->translate_alpha = alpha;
711 translate_engine->translate_mode = mode;
712 //printf("Overlay 4\n");
714 //printf("OverlayFrame::overlay 5 %d\n", mode);
715 translate_engine->process_packages();
719 //printf("OverlayFrame::overlay 2\n");
730 ScalePackage::ScalePackage()
737 ScaleUnit::ScaleUnit(ScaleEngine *server, OverlayFrame *overlay)
740 this->overlay = overlay;
741 this->engine = server;
744 ScaleUnit::~ScaleUnit()
750 void ScaleUnit::tabulate_reduction(bilinear_table_t* &table,
756 table = new bilinear_table_t[out_total];
757 bzero(table, sizeof(bilinear_table_t) * out_total);
758 //printf("ScaleUnit::tabulate_reduction 1 %f %d %d %d\n", scale, in_pixel1, out_total, in_total);
759 for(int i = 0; i < out_total; i++)
762 float in_start = out_start * scale;
763 float out_end = i + 1;
764 float in_end = out_end * scale;
765 bilinear_table_t *entry = table + i;
766 //printf("ScaleUnit::tabulate_reduction 1 %f %f %f %f\n", out_start, out_end, in_start, in_end);
768 // Store input fraction. Using scale to normalize these didn't work.
769 entry->input_fraction1 = (floor(in_start + 1) - in_start) /* / scale */;
770 entry->input_fraction2 = 1.0 /* / scale */;
771 entry->input_fraction3 = (in_end - floor(in_end)) /* / scale */;
773 if(in_end >= in_total - in_pixel1)
775 in_end = in_total - in_pixel1 - 1;
777 int difference = (int)in_end - (int)in_start - 1;
778 if(difference < 0) difference = 0;
779 entry->input_fraction3 = 1.0 -
780 entry->input_fraction1 -
781 entry->input_fraction2 * difference;
784 // Store input pixels
785 entry->input_pixel1 = (int)in_start;
786 entry->input_pixel2 = (int)in_end;
788 // Normalize for middle pixels
789 if(entry->input_pixel2 > entry->input_pixel1 + 1)
791 float total = entry->input_fraction1 +
792 entry->input_fraction2 *
793 (entry->input_pixel2 - entry->input_pixel1 - 1) +
794 entry->input_fraction3;
795 entry->input_fraction1 /= total;
796 entry->input_fraction2 /= total;
797 entry->input_fraction3 /= total;
801 float total = entry->input_fraction1 +
802 entry->input_fraction3;
803 entry->input_fraction1 /= total;
804 entry->input_fraction3 /= total;
807 // printf("ScaleUnit::tabulate_reduction 1 %d %d %d %f %f %f %f\n",
809 // entry->input_pixel1,
810 // entry->input_pixel2,
811 // entry->input_fraction1,
812 // entry->input_fraction2,
813 // entry->input_fraction3,
814 // entry->input_fraction1 +
815 // entry->input_fraction2 *
816 // (entry->input_pixel2 - entry->input_pixel1 - 1) +
817 // entry->input_fraction3);
821 if(entry->input_pixel1 > entry->input_pixel2)
823 entry->input_pixel1 = entry->input_pixel2;
824 entry->input_fraction1 = 0;
827 // Get total fraction of output pixel used
828 // if(entry->input_pixel2 > entry->input_pixel1)
829 entry->total_fraction =
830 entry->input_fraction1 +
831 entry->input_fraction2 * (entry->input_pixel2 - entry->input_pixel1 - 1) +
832 entry->input_fraction3;
833 entry->input_pixel1 += in_pixel1;
834 entry->input_pixel2 += in_pixel1;
838 void ScaleUnit::tabulate_enlarge(bilinear_table_t* &table,
844 table = new bilinear_table_t[out_total];
845 bzero(table, sizeof(bilinear_table_t) * out_total);
847 for(int i = 0; i < out_total; i++)
849 bilinear_table_t *entry = table + i;
850 float in_pixel = i * scale;
851 entry->input_pixel1 = (int)floor(in_pixel);
852 entry->input_pixel2 = entry->input_pixel1 + 1;
854 if(in_pixel <= in_total)
856 entry->input_fraction3 = in_pixel - entry->input_pixel1;
860 entry->input_fraction3 = 0;
861 entry->input_pixel2 = 0;
866 entry->input_fraction1 = entry->input_pixel2 - in_pixel;
870 entry->input_fraction1 = 0;
871 entry->input_pixel1 = 0;
874 if(entry->input_pixel2 >= in_total - in_pixel1)
876 entry->input_pixel2 = entry->input_pixel1;
877 entry->input_fraction3 = 1.0 - entry->input_fraction1;
880 entry->total_fraction =
881 entry->input_fraction1 +
882 entry->input_fraction3;
883 entry->input_pixel1 += in_pixel1;
884 entry->input_pixel2 += in_pixel1;
886 // printf("ScaleUnit::tabulate_enlarge %d %d %f %f %f\n",
887 // entry->input_pixel1,
888 // entry->input_pixel2,
889 // entry->input_fraction1,
890 // entry->input_fraction2,
891 // entry->input_fraction3);
895 void ScaleUnit::dump_bilinear(bilinear_table_t *table, int total)
897 printf("ScaleUnit::dump_bilinear\n");
898 for(int i = 0; i < total; i++)
900 printf("out=%d inpixel1=%d inpixel2=%d infrac1=%f infrac2=%f infrac3=%f total=%f\n",
902 table[i].input_pixel1,
903 table[i].input_pixel2,
904 table[i].input_fraction1,
905 table[i].input_fraction2,
906 table[i].input_fraction3,
907 table[i].total_fraction);
911 #define PIXEL_REDUCE_MACRO(type, components, row) \
913 type *input_row = &in_rows[row][x_entry->input_pixel1 * components]; \
914 type *input_end = &in_rows[row][x_entry->input_pixel2 * components]; \
916 /* Do first pixel */ \
917 temp_f1 += input_scale1 * input_row[0]; \
918 temp_f2 += input_scale1 * input_row[1]; \
919 temp_f3 += input_scale1 * input_row[2]; \
920 if(components == 4) temp_f4 += input_scale1 * input_row[3]; \
922 /* Do last pixel */ \
923 /* if(input_row < input_end) */\
925 temp_f1 += input_scale3 * input_end[0]; \
926 temp_f2 += input_scale3 * input_end[1]; \
927 temp_f3 += input_scale3 * input_end[2]; \
928 if(components == 4) temp_f4 += input_scale3 * input_end[3]; \
931 /* Do middle pixels */ \
932 for(input_row += components; input_row < input_end; input_row += components) \
934 temp_f1 += input_scale2 * input_row[0]; \
935 temp_f2 += input_scale2 * input_row[1]; \
936 temp_f3 += input_scale2 * input_row[2]; \
937 if(components == 4) temp_f4 += input_scale2 * input_row[3]; \
941 // Bilinear reduction and suboptimal enlargement.
942 // Very high quality.
943 #define BILINEAR_REDUCE(max, type, components) \
945 bilinear_table_t *x_table, *y_table; \
946 int out_h = pkg->out_row2 - pkg->out_row1; \
947 type **in_rows = (type**)input->get_rows(); \
948 type **out_rows = (type**)output->get_rows(); \
951 tabulate_reduction(x_table, \
957 tabulate_enlarge(x_table, \
964 tabulate_reduction(y_table, \
970 tabulate_enlarge(y_table, \
975 /* dump_bilinear(y_table, out_h_int); */\
977 for(int i = 0; i < out_h; i++) \
979 type *out_row = out_rows[i + pkg->out_row1]; \
980 bilinear_table_t *y_entry = &y_table[i + pkg->out_row1]; \
981 /* printf("BILINEAR_REDUCE 2 %d %d %d %f %f %f\n", */ \
983 /* y_entry->input_pixel1, */ \
984 /* y_entry->input_pixel2, */ \
985 /* y_entry->input_fraction1, */ \
986 /* y_entry->input_fraction2, */ \
987 /* y_entry->input_fraction3); */ \
989 for(int j = 0; j < out_w_int; j++) \
991 bilinear_table_t *x_entry = &x_table[j]; \
992 /* Load rounding factors */ \
997 if(sizeof(type) != 4) \
998 temp_f1 = temp_f2 = temp_f3 = temp_f4 = .5; \
1000 temp_f1 = temp_f2 = temp_f3 = temp_f4 = 0; \
1003 float input_scale1 = y_entry->input_fraction1 * x_entry->input_fraction1; \
1004 float input_scale2 = y_entry->input_fraction1 * x_entry->input_fraction2; \
1005 float input_scale3 = y_entry->input_fraction1 * x_entry->input_fraction3; \
1006 PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel1) \
1011 input_scale1 = y_entry->input_fraction3 * x_entry->input_fraction1; \
1012 input_scale2 = y_entry->input_fraction3 * x_entry->input_fraction2; \
1013 input_scale3 = y_entry->input_fraction3 * x_entry->input_fraction3; \
1014 PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel2) \
1019 input_scale1 = y_entry->input_fraction2 * x_entry->input_fraction1; \
1020 input_scale2 = y_entry->input_fraction2 * x_entry->input_fraction2; \
1021 input_scale3 = y_entry->input_fraction2 * x_entry->input_fraction3; \
1022 for(int k = y_entry->input_pixel1 + 1; \
1023 k < y_entry->input_pixel2; \
1026 PIXEL_REDUCE_MACRO(type, components, k) \
1034 if(temp_f1 > max) temp_f1 = max; \
1035 if(temp_f2 > max) temp_f2 = max; \
1036 if(temp_f3 > max) temp_f3 = max; \
1037 if(components == 4) if(temp_f4 > max) temp_f4 = max; \
1040 out_row[j * components ] = (type)temp_f1; \
1041 out_row[j * components + 1] = (type)temp_f2; \
1042 out_row[j * components + 2] = (type)temp_f3; \
1043 if(components == 4) out_row[j * components + 3] = (type)temp_f4; \
1045 /*printf("BILINEAR_REDUCE 3 %d\n", i);*/ \
1048 delete [] x_table; \
1049 delete [] y_table; \
1054 // Only 2 input pixels
1055 #define BILINEAR_ENLARGE(max, type, components) \
1057 /*printf("BILINEAR_ENLARGE 1\n");*/ \
1058 float k_y = 1.0 / scale_h; \
1059 float k_x = 1.0 / scale_w; \
1060 type **in_rows = (type**)input->get_rows(); \
1061 type **out_rows = (type**)output->get_rows(); \
1062 int out_h = pkg->out_row2 - pkg->out_row1; \
1063 int in_h_int = input->get_h(); \
1064 int in_w_int = input->get_w(); \
1065 int *table_int_x1, *table_int_y1; \
1066 int *table_int_x2, *table_int_y2; \
1067 float *table_frac_x_f, *table_antifrac_x_f, *table_frac_y_f, *table_antifrac_y_f; \
1068 int *table_frac_x_i, *table_antifrac_x_i, *table_frac_y_i, *table_antifrac_y_i; \
1070 tabulate_blinear_f(table_int_x1, \
1073 table_antifrac_x_f, \
1079 tabulate_blinear_f(table_int_y1, \
1082 table_antifrac_y_f, \
1089 for(int i = 0; i < out_h; i++) \
1091 int i_y1 = table_int_y1[i]; \
1092 int i_y2 = table_int_y2[i]; \
1096 uint64_t anti_a_i; \
1097 a_f = table_frac_y_f[i]; \
1098 anti_a_f = table_antifrac_y_f[i]; \
1099 type *in_row1 = in_rows[i_y1]; \
1100 type *in_row2 = in_rows[i_y2]; \
1101 type *out_row = out_rows[i + pkg->out_row1]; \
1103 for(int j = 0; j < out_w_int; j++) \
1105 int i_x1 = table_int_x1[j]; \
1106 int i_x2 = table_int_x2[j]; \
1107 float output1r, output1g, output1b, output1a; \
1108 float output2r, output2g, output2b, output2a; \
1109 float output3r, output3g, output3b, output3a; \
1110 float output4r, output4g, output4b, output4a; \
1113 b_f = table_frac_x_f[j]; \
1114 anti_b_f = table_antifrac_x_f[j]; \
1116 output1r = in_row1[i_x1 * components]; \
1117 output1g = in_row1[i_x1 * components + 1]; \
1118 output1b = in_row1[i_x1 * components + 2]; \
1119 if(components == 4) output1a = in_row1[i_x1 * components + 3]; \
1121 output2r = in_row1[i_x2 * components]; \
1122 output2g = in_row1[i_x2 * components + 1]; \
1123 output2b = in_row1[i_x2 * components + 2]; \
1124 if(components == 4) output2a = in_row1[i_x2 * components + 3]; \
1126 output3r = in_row2[i_x1 * components]; \
1127 output3g = in_row2[i_x1 * components + 1]; \
1128 output3b = in_row2[i_x1 * components + 2]; \
1129 if(components == 4) output3a = in_row2[i_x1 * components + 3]; \
1131 output4r = in_row2[i_x2 * components]; \
1132 output4g = in_row2[i_x2 * components + 1]; \
1133 output4b = in_row2[i_x2 * components + 2]; \
1134 if(components == 4) output4a = in_row2[i_x2 * components + 3]; \
1136 out_row[j * components] = \
1137 (type)(anti_a_f * (anti_b_f * output1r + \
1139 a_f * (anti_b_f * output3r + \
1141 out_row[j * components + 1] = \
1142 (type)(anti_a_f * (anti_b_f * output1g + \
1144 a_f * ((anti_b_f * output3g) + \
1146 out_row[j * components + 2] = \
1147 (type)(anti_a_f * ((anti_b_f * output1b) + \
1148 (b_f * output2b)) + \
1149 a_f * ((anti_b_f * output3b) + \
1151 if(components == 4) \
1152 out_row[j * components + 3] = \
1153 (type)(anti_a_f * ((anti_b_f * output1a) + \
1154 (b_f * output2a)) + \
1155 a_f * ((anti_b_f * output3a) + \
1161 delete [] table_int_x1; \
1162 delete [] table_int_x2; \
1163 delete [] table_int_y1; \
1164 delete [] table_int_y2; \
1165 delete [] table_frac_x_f; \
1166 delete [] table_antifrac_x_f; \
1167 delete [] table_frac_y_f; \
1168 delete [] table_antifrac_y_f; \
1170 /*printf("BILINEAR_ENLARGE 2\n");*/ \
1174 #define BICUBIC(max, type, components) \
1176 float k_y = 1.0 / scale_h; \
1177 float k_x = 1.0 / scale_w; \
1178 type **in_rows = (type**)input->get_rows(); \
1179 type **out_rows = (type**)output->get_rows(); \
1180 float *bspline_x_f, *bspline_y_f; \
1181 int *bspline_x_i, *bspline_y_i; \
1182 int *in_x_table, *in_y_table; \
1183 int in_h_int = input->get_h(); \
1184 int in_w_int = input->get_w(); \
1186 tabulate_bcubic_f(bspline_x_f, \
1194 tabulate_bcubic_f(bspline_y_f, \
1202 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1204 for(int j = 0; j < out_w_int; j++) \
1206 int i_x = (int)(k_x * j); \
1207 float output1_f, output2_f, output3_f, output4_f; \
1208 uint64_t output1_i, output2_i, output3_i, output4_i; \
1212 if(components == 4) \
1214 int table_y = i * 4; \
1217 for(int m = -1; m < 3; m++) \
1221 r1_f = bspline_y_f[table_y]; \
1222 int y = in_y_table[table_y]; \
1223 int table_x = j * 4; \
1225 for(int n = -1; n < 3; n++) \
1229 r2_f = bspline_x_f[table_x]; \
1230 int x = in_x_table[table_x]; \
1232 uint64_t r_square_i; \
1233 r_square_f = r1_f * r2_f; \
1234 output1_f += r_square_f * in_rows[y][x * components]; \
1235 output2_f += r_square_f * in_rows[y][x * components + 1]; \
1236 output3_f += r_square_f * in_rows[y][x * components + 2]; \
1237 if(components == 4) \
1238 output4_f += r_square_f * in_rows[y][x * components + 3]; \
1246 out_rows[i][j * components] = (type)output1_f; \
1247 out_rows[i][j * components + 1] = (type)output2_f; \
1248 out_rows[i][j * components + 2] = (type)output3_f; \
1249 if(components == 4) \
1250 out_rows[i][j * components + 3] = (type)output4_f; \
1255 delete [] bspline_x_f; \
1256 delete [] bspline_y_f; \
1257 delete [] in_x_table; \
1258 delete [] in_y_table; \
1264 // Pow function is not thread safe in Compaqt C
1265 #define CUBE(x) ((x) * (x) * (x))
1267 float ScaleUnit::cubic_bspline(float x)
1271 if((x + 2.0F) <= 0.0F)
1281 if((x + 1.0F) <= 0.0F)
1299 if((x - 1.0F) <= 0.0F)
1309 return (a - (4.0F * b) + (6.0F * c) - (4.0F * d)) / 6.0;
1313 void ScaleUnit::tabulate_bcubic_f(float* &coef_table,
1321 coef_table = new float[pixels * 4];
1322 coord_table = new int[pixels * 4];
1323 for(int i = 0, j = 0; i < pixels; i++)
1325 float f_x = (float)i * scale;
1326 float a = f_x - floor(f_x);
1328 for(float m = -1; m < 3; m++)
1330 coef_table[j] = cubic_bspline(coefficient * (m - a));
1331 coord_table[j] = (int)(start + (int)f_x + m);
1332 CLAMP(coord_table[j], 0, total_pixels - 1);
1339 void ScaleUnit::tabulate_bcubic_i(int* &coef_table,
1347 coef_table = new int[pixels * 4];
1348 coord_table = new int[pixels * 4];
1349 for(int i = 0, j = 0; i < pixels; i++)
1351 float f_x = (float)i * scale;
1352 float a = f_x - floor(f_x);
1354 for(float m = -1; m < 3; m++)
1356 coef_table[j] = (int)(cubic_bspline(coefficient * (m - a)) * 0x10000);
1357 coord_table[j] = (int)(start + (int)f_x + m);
1358 CLAMP(coord_table[j], 0, total_pixels - 1);
1365 void ScaleUnit::tabulate_blinear_f(int* &table_int1,
1368 float* &table_antifrac,
1375 table_int1 = new int[pixel2 - pixel1];
1376 table_int2 = new int[pixel2 - pixel1];
1377 table_frac = new float[pixel2 - pixel1];
1378 table_antifrac = new float[pixel2 - pixel1];
1380 for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1382 float f_x = (float)i * scale;
1383 int i_x = (int)floor(f_x);
1384 float a = (f_x - floor(f_x));
1386 table_int1[j] = i_x + start;
1387 table_int2[j] = i_x + start + 1;
1388 CLAMP(table_int1[j], 0, total_pixels - 1);
1389 CLAMP(table_int2[j], 0, total_pixels - 1);
1391 table_antifrac[j] = 1.0F - a;
1392 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1396 void ScaleUnit::tabulate_blinear_i(int* &table_int1,
1399 int* &table_antifrac,
1406 table_int1 = new int[pixel2 - pixel1];
1407 table_int2 = new int[pixel2 - pixel1];
1408 table_frac = new int[pixel2 - pixel1];
1409 table_antifrac = new int[pixel2 - pixel1];
1411 for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1413 double f_x = (float)i * scale;
1414 int i_x = (int)floor(f_x);
1415 float a = (f_x - floor(f_x));
1417 table_int1[j] = i_x + start;
1418 table_int2[j] = i_x + start + 1;
1419 CLAMP(table_int1[j], 0, total_pixels - 1);
1420 CLAMP(table_int2[j], 0, total_pixels - 1);
1421 table_frac[j] = (int)(a * 0xffff);
1422 table_antifrac[j] = (int)((1.0F - a) * 0x10000);
1423 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1427 void ScaleUnit::process_package(LoadPackage *package)
1429 ScalePackage *pkg = (ScalePackage*)package;
1431 //printf("ScaleUnit::process_package 1\n");
1432 // Arguments for macros
1433 VFrame *output = engine->scale_output;
1434 VFrame *input = engine->scale_input;
1435 float scale_w = engine->w_scale;
1436 float scale_h = engine->h_scale;
1437 int in_x1_int = engine->in_x1_int;
1438 int in_y1_int = engine->in_y1_int;
1439 int out_h_int = engine->out_h_int;
1440 int out_w_int = engine->out_w_int;
1442 (input->get_color_model() == BC_YUV888 ||
1443 input->get_color_model() == BC_YUVA8888 ||
1444 input->get_color_model() == BC_YUV161616 ||
1445 input->get_color_model() == BC_YUVA16161616);
1447 //printf("ScaleUnit::process_package 2 %f %f\n", engine->w_scale, engine->h_scale);
1448 if(engine->interpolation_type == CUBIC_CUBIC ||
1449 (engine->interpolation_type == CUBIC_LINEAR
1450 && engine->w_scale > 1 &&
1451 engine->h_scale > 1))
1453 switch(engine->scale_input->get_color_model())
1456 BICUBIC(1.0, float, 3);
1460 BICUBIC(1.0, float, 4);
1465 BICUBIC(0xff, unsigned char, 3);
1470 BICUBIC(0xff, unsigned char, 4);
1475 BICUBIC(0xffff, uint16_t, 3);
1478 case BC_RGBA16161616:
1479 case BC_YUVA16161616:
1480 BICUBIC(0xffff, uint16_t, 4);
1485 // Perform bilinear scaling input -> scale_output
1486 if(engine->w_scale > 1 &&
1487 engine->h_scale > 1)
1489 switch(engine->scale_input->get_color_model())
1492 BILINEAR_ENLARGE(1.0, float, 3);
1496 BILINEAR_ENLARGE(1.0, float, 4);
1501 BILINEAR_ENLARGE(0xff, unsigned char, 3);
1506 BILINEAR_ENLARGE(0xff, unsigned char, 4);
1511 BILINEAR_ENLARGE(0xffff, uint16_t, 3);
1514 case BC_RGBA16161616:
1515 case BC_YUVA16161616:
1516 BILINEAR_ENLARGE(0xffff, uint16_t, 4);
1521 // Bilinear reduction
1523 switch(engine->scale_input->get_color_model())
1526 BILINEAR_REDUCE(1.0, float, 3);
1529 BILINEAR_REDUCE(1.0, float, 4);
1533 BILINEAR_REDUCE(0xff, unsigned char, 3);
1538 BILINEAR_REDUCE(0xff, unsigned char, 4);
1543 BILINEAR_REDUCE(0xffff, uint16_t, 3);
1546 case BC_RGBA16161616:
1547 case BC_YUVA16161616:
1548 BILINEAR_REDUCE(0xffff, uint16_t, 4);
1552 //printf("ScaleUnit::process_package 3\n");
1568 ScaleEngine::ScaleEngine(OverlayFrame *overlay, int cpus)
1569 : LoadServer(cpus, cpus)
1571 this->overlay = overlay;
1574 ScaleEngine::~ScaleEngine()
1578 void ScaleEngine::init_packages()
1580 for(int i = 0; i < total_packages; i++)
1582 ScalePackage *package = (ScalePackage*)packages[i];
1583 package->out_row1 = out_h_int / total_packages * i;
1584 package->out_row2 = package->out_row1 + out_h_int / total_packages;
1586 if(i >= total_packages - 1)
1587 package->out_row2 = out_h_int;
1591 LoadClient* ScaleEngine::new_client()
1593 return new ScaleUnit(this, overlay);
1596 LoadPackage* ScaleEngine::new_package()
1598 return new ScalePackage;
1613 TranslatePackage::TranslatePackage()
1619 TranslateUnit::TranslateUnit(TranslateEngine *server, OverlayFrame *overlay)
1620 : LoadClient(server)
1622 this->overlay = overlay;
1623 this->engine = server;
1626 TranslateUnit::~TranslateUnit()
1632 void TranslateUnit::translation_array_f(transfer_table_f* &table,
1643 float offset = out_x1 - in_x1;
1644 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1646 out_x1_int = (int)out_x1;
1647 out_x2_int = MIN((int)ceil(out_x2), out_total);
1648 out_w_int = out_x2_int - out_x1_int;
1650 table = new transfer_table_f[out_w_int];
1651 bzero(table, sizeof(transfer_table_f) * out_w_int);
1654 // printf("OverlayFrame::translation_array_f 2 %f %f -> %f %f scale=%f %f\n",
1660 // out_x2 - out_x1);
1664 for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1666 transfer_table_f *entry = &table[out_x - out_x1_int];
1668 entry->in_x1 = (int)in_x;
1669 entry->in_x2 = (int)in_x + 1;
1671 // Get fraction of output pixel to fill
1672 entry->output_fraction = 1;
1676 entry->output_fraction -= out_x1 - out_x;
1679 if(out_x2 < out_x + 1)
1681 entry->output_fraction = (out_x2 - out_x);
1684 // Advance in_x until out_x_fraction is filled
1685 float out_x_fraction = entry->output_fraction;
1686 float in_x_fraction = floor(in_x + 1) - in_x;
1688 if(out_x_fraction <= in_x_fraction)
1690 entry->in_fraction1 = out_x_fraction;
1691 entry->in_fraction2 = 0.0;
1692 in_x += out_x_fraction;
1696 entry->in_fraction1 = in_x_fraction;
1697 in_x += out_x_fraction;
1698 entry->in_fraction2 = in_x - floor(in_x);
1701 // Clip in_x and zero out fraction. This doesn't work for YUV.
1702 if(entry->in_x2 >= in_total)
1704 entry->in_x2 = in_total - 1;
1705 entry->in_fraction2 = 0.0;
1708 if(entry->in_x1 >= in_total)
1710 entry->in_x1 = in_total - 1;
1711 entry->in_fraction1 = 0.0;
1713 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n",
1717 // entry->in_fraction1,
1718 // entry->in_fraction2,
1719 // entry->output_fraction);
1724 void TranslateUnit::translation_array_i(transfer_table_i* &table,
1735 float offset = out_x1 - in_x1;
1737 out_x1_int = (int)out_x1;
1738 out_x2_int = MIN((int)ceil(out_x2), out_total);
1739 out_w_int = out_x2_int - out_x1_int;
1741 table = new transfer_table_i[out_w_int];
1742 bzero(table, sizeof(transfer_table_i) * out_w_int);
1745 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1748 for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1750 transfer_table_i *entry = &table[out_x - out_x1_int];
1752 entry->in_x1 = (int)in_x;
1753 entry->in_x2 = (int)in_x + 1;
1755 // Get fraction of output pixel to fill
1756 entry->output_fraction = 0x10000;
1760 entry->output_fraction -= (int)((out_x1 - out_x) * 0x10000);
1763 if(out_x2 < out_x + 1)
1765 entry->output_fraction = (int)((out_x2 - out_x) * 0x10000);
1768 // Advance in_x until out_x_fraction is filled
1769 int out_x_fraction = entry->output_fraction;
1770 int in_x_fraction = (int)((floor(in_x + 1) - in_x) * 0x10000);
1772 if(out_x_fraction <= in_x_fraction)
1774 entry->in_fraction1 = out_x_fraction;
1775 entry->in_fraction2 = 0;
1776 in_x += (float)out_x_fraction / 0x10000;
1780 entry->in_fraction1 = in_x_fraction;
1781 in_x += (float)out_x_fraction / 0x10000;
1782 entry->in_fraction2 = (int)((in_x - floor(in_x)) * 0x10000);
1785 // Clip in_x and zero out fraction. This doesn't work for YUV.
1786 if(entry->in_x2 >= in_total)
1788 entry->in_x2 = in_total - 1;
1789 entry->in_fraction2 = 0;
1792 if(entry->in_x1 >= in_total)
1794 entry->in_x1 = in_total - 1;
1795 entry->in_fraction1 = 0;
1797 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n",
1801 // entry->in_fraction1,
1802 // entry->in_fraction2,
1803 // entry->output_fraction);
1840 #define TRANSLATE(max, temp_type, type, components, chroma_offset) \
1843 type **in_rows = (type**)input->get_rows(); \
1844 type **out_rows = (type**)output->get_rows(); \
1847 temp_type master_opacity; \
1848 if(sizeof(type) != 4) \
1849 master_opacity = (temp_type)(alpha * max + 0.5); \
1851 master_opacity = (temp_type)(alpha * max); \
1852 temp_type master_transparency = max - master_opacity; \
1853 float round = 0.0; \
1854 if(sizeof(type) != 4) \
1858 for(int i = row1; i < row2; i++) \
1862 float y_fraction1_f; \
1863 float y_fraction2_f; \
1864 float y_output_fraction_f; \
1865 in_y1 = y_table_f[i - out_y1_int].in_x1; \
1866 in_y2 = y_table_f[i - out_y1_int].in_x2; \
1867 y_fraction1_f = y_table_f[i - out_y1_int].in_fraction1; \
1868 y_fraction2_f = y_table_f[i - out_y1_int].in_fraction2; \
1869 y_output_fraction_f = y_table_f[i - out_y1_int].output_fraction; \
1870 type *in_row1 = in_rows[(in_y1)]; \
1871 type *in_row2 = in_rows[(in_y2)]; \
1872 type *out_row = out_rows[i]; \
1874 for(int j = out_x1_int; j < out_x2_int; j++) \
1878 float x_fraction1_f; \
1879 float x_fraction2_f; \
1880 float x_output_fraction_f; \
1881 in_x1 = x_table_f[j - out_x1_int].in_x1; \
1882 in_x2 = x_table_f[j - out_x1_int].in_x2; \
1883 x_fraction1_f = x_table_f[j - out_x1_int].in_fraction1; \
1884 x_fraction2_f = x_table_f[j - out_x1_int].in_fraction2; \
1885 x_output_fraction_f = x_table_f[j - out_x1_int].output_fraction; \
1886 type *output = &out_row[j * components]; \
1887 temp_type input1, input2, input3, input4; \
1889 float fraction1 = x_fraction1_f * y_fraction1_f; \
1890 float fraction2 = x_fraction2_f * y_fraction1_f; \
1891 float fraction3 = x_fraction1_f * y_fraction2_f; \
1892 float fraction4 = x_fraction2_f * y_fraction2_f; \
1894 input1 = (type)(in_row1[in_x1 * components] * fraction1 + \
1895 in_row1[in_x2 * components] * fraction2 + \
1896 in_row2[in_x1 * components] * fraction3 + \
1897 in_row2[in_x2 * components] * fraction4 + round); \
1899 /* Add chroma to fractional pixels */ \
1902 float extra_chroma = (1.0F - \
1906 fraction4) * chroma_offset; \
1907 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 + \
1908 in_row1[in_x2 * components + 1] * fraction2 + \
1909 in_row2[in_x1 * components + 1] * fraction3 + \
1910 in_row2[in_x2 * components + 1] * fraction4 + \
1911 extra_chroma + round); \
1912 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 + \
1913 in_row1[in_x2 * components + 2] * fraction2 + \
1914 in_row2[in_x1 * components + 2] * fraction3 + \
1915 in_row2[in_x2 * components + 2] * fraction4 + \
1916 extra_chroma + round); \
1920 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 + \
1921 in_row1[in_x2 * components + 1] * fraction2 + \
1922 in_row2[in_x1 * components + 1] * fraction3 + \
1923 in_row2[in_x2 * components + 1] * fraction4 + round); \
1924 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 + \
1925 in_row1[in_x2 * components + 2] * fraction2 + \
1926 in_row2[in_x1 * components + 2] * fraction3 + \
1927 in_row2[in_x2 * components + 2] * fraction4 + round); \
1930 if(components == 4) \
1931 input4 = (type)(in_row1[in_x1 * components + 3] * fraction1 + \
1932 in_row1[in_x2 * components + 3] * fraction2 + \
1933 in_row2[in_x1 * components + 3] * fraction3 + \
1934 in_row2[in_x2 * components + 3] * fraction4 + round); \
1936 temp_type opacity; \
1937 if(sizeof(type) != 4) \
1938 opacity = (temp_type)(master_opacity * \
1939 y_output_fraction_f * \
1940 x_output_fraction_f + 0.5); \
1942 opacity = (temp_type)(master_opacity * \
1943 y_output_fraction_f * \
1944 x_output_fraction_f); \
1945 temp_type transparency = max - opacity; \
1947 /* printf("TRANSLATE 2 %x %d %d\n", opacity, j, i); */ \
1949 if(components == 3) \
1951 BLEND_3(max, temp_type, type, chroma_offset); \
1955 BLEND_4(max, temp_type, type, chroma_offset); \
1961 void TranslateUnit::process_package(LoadPackage *package)
1963 TranslatePackage *pkg = (TranslatePackage*)package;
1970 // Variables for TRANSLATE
1971 VFrame *input = engine->translate_input;
1972 VFrame *output = engine->translate_output;
1973 float in_x1 = engine->translate_in_x1;
1974 float in_y1 = engine->translate_in_y1;
1975 float in_x2 = engine->translate_in_x2;
1976 float in_y2 = engine->translate_in_y2;
1977 float out_x1 = engine->translate_out_x1;
1978 float out_y1 = engine->translate_out_y1;
1979 float out_x2 = engine->translate_out_x2;
1980 float out_y2 = engine->translate_out_y2;
1981 float alpha = engine->translate_alpha;
1982 int row1 = pkg->out_row1;
1983 int row2 = pkg->out_row2;
1984 int mode = engine->translate_mode;
1985 int in_total_x = input->get_w();
1986 int in_total_y = input->get_h();
1988 (engine->translate_input->get_color_model() == BC_YUV888 ||
1989 engine->translate_input->get_color_model() == BC_YUVA8888 ||
1990 engine->translate_input->get_color_model() == BC_YUV161616 ||
1991 engine->translate_input->get_color_model() == BC_YUVA16161616);
1993 transfer_table_f *x_table_f;
1994 transfer_table_f *y_table_f;
1995 transfer_table_i *x_table_i;
1996 transfer_table_i *y_table_i;
1998 translation_array_f(x_table_f,
2007 translation_array_f(y_table_f,
2016 // printf("TranslateUnit::process_package 1 %d\n", mode);
2020 switch(engine->translate_input->get_color_model())
2023 TRANSLATE(0xff, uint32_t, unsigned char, 3, 0);
2027 TRANSLATE(0xff, uint32_t, unsigned char, 4, 0);
2031 TRANSLATE(1.0, float, float, 3, 0);
2035 TRANSLATE(1.0, float, float, 4, 0);
2039 TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2042 case BC_RGBA16161616:
2043 TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2047 TRANSLATE(0xff, int32_t, unsigned char, 3, 0x80);
2051 TRANSLATE(0xff, int32_t, unsigned char, 4, 0x80);
2055 TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2058 case BC_YUVA16161616:
2059 TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2062 // printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2064 delete [] x_table_f;
2065 delete [] y_table_f;
2077 TranslateEngine::TranslateEngine(OverlayFrame *overlay, int cpus)
2078 : LoadServer(cpus, cpus)
2080 this->overlay = overlay;
2083 TranslateEngine::~TranslateEngine()
2087 void TranslateEngine::init_packages()
2089 int out_y1_int = (int)translate_out_y1;
2090 int out_y2_int = MIN((int)ceil(translate_out_y2), translate_output->get_h());
2091 int out_h = out_y2_int - out_y1_int;
2093 for(int i = 0; i < total_packages; i++)
2095 TranslatePackage *package = (TranslatePackage*)packages[i];
2096 package->out_row1 = (int)(out_y1_int + out_h /
2099 package->out_row2 = (int)((float)package->out_row1 +
2102 if(i >= total_packages - 1)
2103 package->out_row2 = out_y2_int;
2107 LoadClient* TranslateEngine::new_client()
2109 return new TranslateUnit(this, overlay);
2112 LoadPackage* TranslateEngine::new_package()
2114 return new TranslatePackage;
2124 #define SCALE_TRANSLATE(max, temp_type, type, components, chroma_offset) \
2126 temp_type opacity; \
2127 if(sizeof(type) != 4) \
2128 opacity = (temp_type)(alpha * max + 0.5); \
2130 opacity = (temp_type)(alpha * max); \
2131 temp_type transparency = max - opacity; \
2133 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2135 int in_y = y_table[i - out_y1]; \
2136 type *in_row = (type*)in_rows[in_y] + in_x1 * components; \
2137 type *output = (type*)out_rows[i] + out_x1 * components; \
2139 /* X direction is scaled and requires a table lookup */ \
2140 if(out_w != in_x2 - in_x1) \
2142 for(int j = 0; j < out_w; j++) \
2144 type *in_row_plus_x = in_row + x_table[j] * components; \
2145 temp_type input1, input2, input3, input4; \
2147 input1 = in_row_plus_x[0]; \
2148 input2 = in_row_plus_x[1]; \
2149 input3 = in_row_plus_x[2]; \
2150 if(components == 4) \
2151 input4 = in_row_plus_x[3]; \
2153 if(components == 3) \
2155 BLEND_3(max, temp_type, type, chroma_offset); \
2159 BLEND_4(max, temp_type, type, chroma_offset); \
2161 output += components; \
2165 /* X direction is not scaled */ \
2167 for(int j = 0; j < out_w; j++) \
2169 temp_type input1, input2, input3, input4; \
2171 input1 = in_row[0]; \
2172 input2 = in_row[1]; \
2173 input3 = in_row[2]; \
2174 if(components == 4) \
2175 input4 = in_row[3]; \
2177 if(components == 3) \
2179 BLEND_3(max, temp_type, type, chroma_offset); \
2183 BLEND_4(max, temp_type, type, chroma_offset); \
2185 in_row += components; \
2186 output += components; \
2194 ScaleTranslateUnit::ScaleTranslateUnit(ScaleTranslateEngine *server, OverlayFrame *overlay)
2195 : LoadClient(server)
2197 this->overlay = overlay;
2198 this->scale_translate = server;
2201 ScaleTranslateUnit::~ScaleTranslateUnit()
2205 void ScaleTranslateUnit::scale_array(int* &table,
2212 float scale = (float)(out_x2 - out_x1) / (in_x2 - in_x1);
2214 table = new int[out_x2 - out_x1];
2218 for(int i = 0; i < out_x2 - out_x1; i++)
2220 table[i] = (int)((float)i / scale + in_x1);
2225 for(int i = 0; i < out_x2 - out_x1; i++)
2227 table[i] = (int)((float)i / scale);
2233 void ScaleTranslateUnit::process_package(LoadPackage *package)
2235 ScaleTranslatePackage *pkg = (ScaleTranslatePackage*)package;
2237 // Args for NEAREST_NEIGHBOR_MACRO
2238 VFrame *output = scale_translate->output;
2239 VFrame *input = scale_translate->input;
2240 int in_x1 = scale_translate->in_x1;
2241 int in_y1 = scale_translate->in_y1;
2242 int in_x2 = scale_translate->in_x2;
2243 int in_y2 = scale_translate->in_y2;
2244 int out_x1 = scale_translate->out_x1;
2245 int out_y1 = scale_translate->out_y1;
2246 int out_x2 = scale_translate->out_x2;
2247 int out_y2 = scale_translate->out_y2;
2248 float alpha = scale_translate->alpha;
2249 int mode = scale_translate->mode;
2250 int out_w = out_x2 - out_x1;
2254 unsigned char **in_rows = input->get_rows();
2255 unsigned char **out_rows = output->get_rows();
2259 //printf("ScaleTranslateUnit::process_package 1 %d\n", mode);
2260 if(out_w != in_x2 - in_x1)
2262 scale_array(x_table,
2269 scale_array(y_table,
2277 if (mode == TRANSFER_REPLACE && (out_w == in_x2 - in_x1))
2279 // if we have transfer replace and x direction is not scaled, PARTY!
2280 char bytes_per_pixel = input->calculate_bytes_per_pixel(input->get_color_model());
2281 int line_len = out_w * bytes_per_pixel;
2282 int in_start_byte = in_x1 * bytes_per_pixel;
2283 int out_start_byte = out_x1 * bytes_per_pixel;
2284 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2286 memcpy (out_rows[i] + out_start_byte,
2287 in_rows[y_table[i - out_y1]] + in_start_byte ,
2293 switch(input->get_color_model())
2296 SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 3, 0);
2300 SCALE_TRANSLATE(1.0, float, float, 3, 0);
2304 SCALE_TRANSLATE(0xff, int32_t, uint8_t, 3, 0x80);
2308 SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 4, 0);
2312 SCALE_TRANSLATE(1.0, float, float, 4, 0);
2316 SCALE_TRANSLATE(0xff, int32_t, uint8_t, 4, 0x80);
2321 SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2325 SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2328 case BC_RGBA16161616:
2329 SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2332 case BC_YUVA16161616:
2333 SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2337 //printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2338 if(out_x2 - out_x1 != in_x2 - in_x1)
2352 ScaleTranslateEngine::ScaleTranslateEngine(OverlayFrame *overlay, int cpus)
2353 : LoadServer(cpus, cpus)
2355 this->overlay = overlay;
2358 ScaleTranslateEngine::~ScaleTranslateEngine()
2362 void ScaleTranslateEngine::init_packages()
2364 int out_h = out_y2 - out_y1;
2366 for(int i = 0; i < total_packages; i++)
2368 ScaleTranslatePackage *package = (ScaleTranslatePackage*)packages[i];
2369 package->out_row1 = (int)(out_y1 + out_h /
2372 package->out_row2 = (int)((float)package->out_row1 +
2375 if(i >= total_packages - 1)
2376 package->out_row2 = out_y2;
2380 LoadClient* ScaleTranslateEngine::new_client()
2382 return new ScaleTranslateUnit(this, overlay);
2385 LoadPackage* ScaleTranslateEngine::new_package()
2387 return new ScaleTranslatePackage;
2391 ScaleTranslatePackage::ScaleTranslatePackage()
2422 #define BLEND_ONLY(temp_type, type, max, components, chroma_offset) \
2424 temp_type opacity; \
2425 if(sizeof(type) != 4) \
2426 opacity = (temp_type)(alpha * max + 0.5); \
2428 opacity = (temp_type)(alpha * max); \
2429 temp_type transparency = max - opacity; \
2431 type** output_rows = (type**)output->get_rows(); \
2432 type** input_rows = (type**)input->get_rows(); \
2433 int w = input->get_w(); \
2434 int h = input->get_h(); \
2436 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2438 type* in_row = input_rows[i]; \
2439 type* output = output_rows[i]; \
2441 for(int j = 0; j < w; j++) \
2443 temp_type input1, input2, input3, input4; \
2444 input1 = in_row[0]; \
2445 input2 = in_row[1]; \
2446 input3 = in_row[2]; \
2447 if(components == 4) input4 = in_row[3]; \
2450 if(components == 3) \
2452 BLEND_3(max, temp_type, type, chroma_offset); \
2456 BLEND_4(max, temp_type, type, chroma_offset); \
2459 in_row += components; \
2460 output += components; \
2466 #define BLEND_ONLY_TRANSFER_REPLACE(type, components) \
2469 type** output_rows = (type**)output->get_rows(); \
2470 type** input_rows = (type**)input->get_rows(); \
2471 int w = input->get_w(); \
2472 int h = input->get_h(); \
2473 int line_len = w * sizeof(type) * components; \
2475 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2477 memcpy(output_rows[i], input_rows[i], line_len); \
2481 // components is always 4
2482 #define BLEND_ONLY_4_NORMAL(temp_type, type, max, chroma_offset) \
2484 temp_type opacity = (temp_type)(alpha * max + 0.5); \
2485 temp_type transparency = max - opacity; \
2486 temp_type max_squared = ((temp_type)max) * max; \
2488 type** output_rows = (type**)output->get_rows(); \
2489 type** input_rows = (type**)input->get_rows(); \
2490 int w = input->get_w(); \
2491 int h = input->get_h(); \
2493 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2495 type* in_row = input_rows[i]; \
2496 type* output = output_rows[i]; \
2498 for(int j = 0; j < w; j++) \
2500 temp_type pixel_opacity, pixel_transparency; \
2501 pixel_opacity = opacity * in_row[3]; \
2502 pixel_transparency = (temp_type)max_squared - pixel_opacity; \
2506 output[0] = ((temp_type)in_row[0] * pixel_opacity + \
2507 (temp_type)output[0] * pixel_transparency) / max / max; \
2508 output[1] = (((temp_type)in_row[1] - chroma_offset) * pixel_opacity + \
2509 ((temp_type)output[1] - chroma_offset) * pixel_transparency) \
2512 output[2] = (((temp_type)in_row[2] - chroma_offset) * pixel_opacity + \
2513 ((temp_type)output[2] - chroma_offset) * pixel_transparency) \
2516 output[3] = (type)(in_row[3] > output[3] ? in_row[3] : output[3]); \
2526 // components is always 3
2527 #define BLEND_ONLY_3_NORMAL(temp_type, type, max, chroma_offset) \
2529 const int bits = sizeof(type) * 8; \
2530 temp_type opacity = (temp_type)(alpha * ((temp_type)1 << bits) + 0.5); \
2531 temp_type transparency = ((temp_type)1 << bits) - opacity; \
2533 type** output_rows = (type**)output->get_rows(); \
2534 type** input_rows = (type**)input->get_rows(); \
2535 int w = input->get_w() * 3; \
2536 int h = input->get_h(); \
2538 for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2540 type* in_row = input_rows[i]; \
2541 type* output = output_rows[i]; \
2543 for(int j = 0; j < w; j++) /* w = 3x width! */ \
2545 *output = ((temp_type)*in_row * opacity + *output * transparency) >> bits; \
2554 BlendUnit::BlendUnit(BlendEngine *server, OverlayFrame *overlay)
2555 : LoadClient(server)
2557 this->overlay = overlay;
2558 this->blend_engine = server;
2561 BlendUnit::~BlendUnit()
2565 void BlendUnit::process_package(LoadPackage *package)
2567 BlendPackage *pkg = (BlendPackage*)package;
2570 VFrame *output = blend_engine->output;
2571 VFrame *input = blend_engine->input;
2572 float alpha = blend_engine->alpha;
2573 int mode = blend_engine->mode;
2575 if (mode == TRANSFER_REPLACE)
2577 switch(input->get_color_model())
2580 BLEND_ONLY_TRANSFER_REPLACE(float, 3);
2583 BLEND_ONLY_TRANSFER_REPLACE(float, 4);
2587 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 3);
2591 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 4);
2595 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 3);
2597 case BC_RGBA16161616:
2598 case BC_YUVA16161616:
2599 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 4);
2604 if (mode == TRANSFER_NORMAL)
2606 switch(input->get_color_model())
2610 float opacity = alpha;
2611 float transparency = 1.0 - alpha;
2613 float** output_rows = (float**)output->get_rows();
2614 float** input_rows = (float**)input->get_rows();
2615 int w = input->get_w() * 3;
2616 int h = input->get_h();
2618 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2620 float* in_row = input_rows[i];
2621 float* output = output_rows[i];
2623 for(int j = 0; j < w; j++)
2625 *output = *in_row * opacity + *output * transparency;
2634 float opacity = alpha;
2635 float transparency = 1.0 - alpha;
2637 float** output_rows = (float**)output->get_rows();
2638 float** input_rows = (float**)input->get_rows();
2639 int w = input->get_w();
2640 int h = input->get_h();
2642 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2644 float* in_row = input_rows[i];
2645 float* output = output_rows[i];
2647 for(int j = 0; j < w; j++)
2649 float pixel_opacity, pixel_transparency;
2650 pixel_opacity = opacity * in_row[3];
2651 pixel_transparency = 1.0 - pixel_opacity;
2654 output[0] = in_row[0] * pixel_opacity +
2655 output[0] * pixel_transparency;
2656 output[1] = in_row[1] * pixel_opacity +
2657 output[1] * pixel_transparency;
2658 output[2] = in_row[2] * pixel_opacity +
2659 output[2] * pixel_transparency;
2660 output[3] = in_row[3] > output[3] ? in_row[3] : output[3];
2669 BLEND_ONLY_3_NORMAL(uint32_t, unsigned char, 0xff, 0);
2672 BLEND_ONLY_3_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2675 BLEND_ONLY_4_NORMAL(uint32_t, unsigned char, 0xff, 0);
2678 BLEND_ONLY_4_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2681 BLEND_ONLY_3_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2684 BLEND_ONLY_3_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2686 case BC_RGBA16161616:
2687 BLEND_ONLY_4_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2689 case BC_YUVA16161616:
2690 BLEND_ONLY_4_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2695 switch(input->get_color_model())
2698 BLEND_ONLY(float, float, 1.0, 3, 0);
2701 BLEND_ONLY(float, float, 1.0, 4, 0);
2704 BLEND_ONLY(uint32_t, unsigned char, 0xff, 3, 0);
2707 BLEND_ONLY(int32_t, unsigned char, 0xff, 3, 0x80);
2710 BLEND_ONLY(uint32_t, unsigned char, 0xff, 4, 0);
2713 BLEND_ONLY(int32_t, unsigned char, 0xff, 4, 0x80);
2716 BLEND_ONLY(uint64_t, uint16_t, 0xffff, 3, 0);
2719 BLEND_ONLY(int64_t, uint16_t, 0xffff, 3, 0x8000);
2721 case BC_RGBA16161616:
2722 BLEND_ONLY(uint64_t, uint16_t, 0xffff, 4, 0);
2724 case BC_YUVA16161616:
2725 BLEND_ONLY(int64_t, uint16_t, 0xffff, 4, 0x8000);
2732 BlendEngine::BlendEngine(OverlayFrame *overlay, int cpus)
2733 : LoadServer(cpus, cpus)
2735 this->overlay = overlay;
2738 BlendEngine::~BlendEngine()
2742 void BlendEngine::init_packages()
2744 for(int i = 0; i < total_packages; i++)
2746 BlendPackage *package = (BlendPackage*)packages[i];
2747 package->out_row1 = (int)(input->get_h() /
2750 package->out_row2 = (int)((float)package->out_row1 +
2754 if(i >= total_packages - 1)
2755 package->out_row2 = input->get_h();
2759 LoadClient* BlendEngine::new_client()
2761 return new BlendUnit(this, overlay);
2764 LoadPackage* BlendEngine::new_package()
2766 return new BlendPackage;
2770 BlendPackage::BlendPackage()