r125: This commit was manufactured by cvs2svn to create tag 'r1_1_7-last'.
[cinelerra_cv/mob.git] / hvirtual / cinelerra / overlayframe.C
bloba31a84384e77b99b0fa517b932f50de156aed951
1 #include <math.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <stdint.h>
5 #include <stdlib.h>
7 #include "clip.h"
8 #include "edl.inc"
9 #include "mutex.h"
10 #include "overlayframe.h"
11 #include "vframe.h"
13 #if 1
14         #define use_float 1
15 #else
16         #define use_float 0
17 #endif
23 OverlayFrame::OverlayFrame(int cpus)
25         temp_frame = 0;
26         blend_engine = 0;
27         scale_engine = 0;
28         scaletranslate_engine = 0;
29         translate_engine = 0;
30         this->cpus = cpus;
33 OverlayFrame::~OverlayFrame()
35 //printf("OverlayFrame::~OverlayFrame 1\n");
36         if(temp_frame) delete temp_frame;
37         if(scale_engine) delete scale_engine;
38         if(translate_engine) delete translate_engine;
39         if(blend_engine) delete blend_engine;
40         if(scaletranslate_engine) delete scaletranslate_engine;
41 //printf("OverlayFrame::~OverlayFrame 2\n");
51 // Verification: 
53 // (255 * 255 + 0 * 0) / 255 = 255
54 // (255 * 127 + 255 * (255 - 127)) / 255 = 255
56 // (65535 * 65535 + 0 * 0) / 65535 = 65535
57 // (65535 * 32767 + 65535 * (65535 - 32767)) / 65535 = 65535
60 // Branch prediction 4 U
62 #define BLEND_3(max, temp_type, type, chroma_offset) \
63 { \
64         temp_type r, g, b; \
65  \
66 /* if(mode != TRANSFER_NORMAL) printf("BLEND mode = %d\n", mode); */ \
67         switch(mode) \
68         { \
69                 case TRANSFER_DIVIDE: \
70                         r = output[0] ? (((temp_type)input1 * max) / output[0]) : max; \
71                         if(chroma_offset) \
72                         { \
73                                 g = labs((int)input2 - chroma_offset) > labs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
74                                 b = labs((int)input3 - chroma_offset) > labs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
75                         } \
76                         else \
77                         { \
78                                 g = output[1] ? (temp_type)input2 * max / (temp_type)output[1] : max; \
79                                 b = output[2] ? (temp_type)input3 * max / (temp_type)output[2] : max; \
80                         } \
81                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
82                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
83                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
84                         break; \
85                 case TRANSFER_MULTIPLY: \
86                         r = ((temp_type)input1 * output[0]) / max; \
87                         if(chroma_offset) \
88                         { \
89                                 g = labs((temp_type)input2 - chroma_offset) > labs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
90                                 b = labs((temp_type)input3 - chroma_offset) > labs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
91                         } \
92                         else \
93                         { \
94                                 g = (temp_type)input2 * (temp_type)output[1] / max; \
95                                 b = (temp_type)input3 * (temp_type)output[2] / max; \
96                         } \
97                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
98                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
99                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
100                         break; \
101                 case TRANSFER_SUBTRACT: \
102                         r = (temp_type)input1 - output[0]; \
103                         g = (temp_type)input2 - ((temp_type)output[1] - chroma_offset); \
104                         b = (temp_type)input3 - ((temp_type)output[2] - chroma_offset); \
105                         r = (r * opacity + output[0] * transparency) / max; \
106                         g = (g * opacity + output[1] * transparency) / max; \
107                         b = (b * opacity + output[2] * transparency) / max; \
108                         break; \
109                 case TRANSFER_ADDITION: \
110                         r = (temp_type)input1 + output[0]; \
111                         g = (temp_type)input2 - chroma_offset + output[1]; \
112                         b = (temp_type)input3 - chroma_offset + output[2]; \
113                         r = (r * opacity + output[0] * transparency) / max; \
114                         g = (g * opacity + output[1] * transparency) / max; \
115                         b = (b * opacity + output[2] * transparency) / max; \
116                         break; \
117                 case TRANSFER_REPLACE: \
118                         r = input1; \
119                         g = input2; \
120                         b = input3; \
121                         break; \
122                 case TRANSFER_NORMAL: \
123                         r = ((temp_type)input1 * opacity + output[0] * transparency) / max; \
124                         g = ((temp_type)input2 * opacity + output[1] * transparency) / max; \
125                         b = ((temp_type)input3 * opacity + output[2] * transparency) / max; \
126                         break; \
127         } \
129         output[0] = (type)CLIP(r, 0, max); \
130         output[1] = (type)CLIP(g, 0, max); \
131         output[2] = (type)CLIP(b, 0, max); \
138 // Blending equations are drastically different for 3 and 4 components
139 #define BLEND_4(max, temp_type, type, chroma_offset) \
140 { \
141         temp_type r, g, b, a; \
142         temp_type pixel_opacity, pixel_transparency; \
143         temp_type output1 = output[0]; \
144         temp_type output2 = output[1]; \
145         temp_type output3 = output[2]; \
146         temp_type output4 = output[3]; \
148         pixel_opacity = opacity * input4; \
149         pixel_transparency = (temp_type)max * max - pixel_opacity; \
151         switch(mode) \
152         { \
153                 case TRANSFER_DIVIDE: \
154                         r = output1 ? (((temp_type)input1 * max) / output1) : max; \
155                         if(chroma_offset) \
156                         { \
157                                 g = labs((int)input2 - chroma_offset) > labs((int)output2 - chroma_offset) ? input2 : output2; \
158                                 b = labs((int)input3 - chroma_offset) > labs((int)output3 - chroma_offset) ? input3 : output3; \
159                         } \
160                         else \
161                         { \
162                                 g = output2 ? (temp_type)input2 * max / (temp_type)output2 : max; \
163                                 b = output3 ? (temp_type)input3 * max / (temp_type)output3 : max; \
164                         } \
165                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
166                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
167                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
168                         a = input4 > output4 ? input4 : output4; \
169                         break; \
170                 case TRANSFER_MULTIPLY: \
171                         r = ((temp_type)input1 * output1) / max; \
172                         if(chroma_offset) \
173                         { \
174                                 g = labs((temp_type)input2 - chroma_offset) > labs((temp_type)output2 - chroma_offset) ? input2 : output2; \
175                                 b = labs((temp_type)input3 - chroma_offset) > labs((temp_type)output3 - chroma_offset) ? input3 : output3; \
176                         } \
177                         else \
178                         { \
179                                 g = (temp_type)input2 * (temp_type)output2 / max; \
180                                 b = (temp_type)input3 * (temp_type)output3 / max; \
181                         } \
182                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
183                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
184                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
185                         a = input4 > output4 ? input4 : output4; \
186                         break; \
187                 case TRANSFER_SUBTRACT: \
188                         r = (temp_type)input1 - output1; \
189                         g = (temp_type)input2 - ((temp_type)output2 - chroma_offset); \
190                         b = (temp_type)input3 - ((temp_type)output3 - chroma_offset); \
191                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
192                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
193                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
194                         a = input4 > output4 ? input4 : output4; \
195                         break; \
196                 case TRANSFER_ADDITION: \
197                         r = (temp_type)input1 + output1; \
198                         g = (temp_type)input2 - chroma_offset + output2; \
199                         b = (temp_type)input3 - chroma_offset + output3; \
200                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
201                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
202                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
203                         a = input4 > output4 ? input4 : output4; \
204                         break; \
205                 case TRANSFER_REPLACE: \
206                         r = input1; \
207                         g = input2; \
208                         b = input3; \
209                         a = input4; \
210                         break; \
211                 case TRANSFER_NORMAL: \
212                         r = (input1 * pixel_opacity + \
213                                 output1 * pixel_transparency) / max / max; \
214                         g = ((input2 - chroma_offset) * pixel_opacity + \
215                                 (output2 - chroma_offset) * pixel_transparency) \
216                                 / max / max + \
217                                 chroma_offset; \
218                         b = ((input3 - chroma_offset) * pixel_opacity + \
219                                 (output3 - chroma_offset) * pixel_transparency) \
220                                 / max / max + \
221                                 chroma_offset; \
222                         a = input4 > output4 ? input4 : output4; \
223                         break; \
224         } \
226         output[0] = (type)CLIP(r, 0, max); \
227         output[1] = (type)CLIP(g, 0, max); \
228         output[2] = (type)CLIP(b, 0, max); \
229         output[3] = (type)a; \
240 // Bicubic algorithm using multiprocessors
241 // input -> scale nearest integer boundaries -> temp -> translation -> blend -> output
243 // Nearest neighbor algorithm using multiprocessors for blending
244 // input -> scale + translate -> blend -> output
247 int OverlayFrame::overlay(VFrame *output, 
248         VFrame *input, 
249         float in_x1, 
250         float in_y1, 
251         float in_x2, 
252         float in_y2, 
253         float out_x1, 
254         float out_y1, 
255         float out_x2, 
256         float out_y2, 
257         float alpha,       // 0 - 1
258         int mode,
259         int interpolation_type)
261         float w_scale = (out_x2 - out_x1) / (in_x2 - in_x1);
262         float h_scale = (out_y2 - out_y1) / (in_y2 - in_y1);
264 // printf("OverlayFrame::overlay 1 %f %f %f %f -> %f %f %f %f\n", in_x1,
265 //                      in_y1,
266 //                      in_x2,
267 //                      in_y2,
268 //                      out_x1,
269 //                      out_y1,
270 //                      out_x2,
271 //                      out_y2);
273 // Limit values
274         if(in_x1 < 0)
275         {
276                 out_x1 += -in_x1 * w_scale;
277                 in_x1 = 0;
278         }
279         else
280         if(in_x1 >= input->get_w())
281         {
282                 out_x1 -= (in_x1 - input->get_w()) * w_scale;
283                 in_x1 = input->get_w();
284         }
286         if(in_y1 < 0)
287         {
288                 out_y1 += -in_y1 * h_scale;
289                 in_y1 = 0;
290         }
291         else
292         if(in_y1 >= input->get_h())
293         {
294                 out_y1 -= (in_y1 - input->get_h()) * h_scale;
295                 in_y1 = input->get_h();
296         }
298         if(in_x2 < 0)
299         {
300                 out_x2 += -in_x2 * w_scale;
301                 in_x2 = 0;
302         }
303         else
304         if(in_x2 >= input->get_w())
305         {
306                 out_x2 -= (in_x2 - input->get_w()) * w_scale;
307                 in_x2 = input->get_w();
308         }
310         if(in_y2 < 0)
311         {
312                 out_y2 += -in_y2 * h_scale;
313                 in_y2 = 0;
314         }
315         else
316         if(in_y2 >= input->get_h())
317         {
318                 out_y2 -= (in_y2 - input->get_h()) * h_scale;
319                 in_y2 = input->get_h();
320         }
322         if(out_x1 < 0)
323         {
324                 in_x1 += -out_x1 / w_scale;
325                 out_x1 = 0;
326         }
327         else
328         if(out_x1 >= output->get_w())
329         {
330                 in_x1 -= (out_x1 - output->get_w()) / w_scale;
331                 out_x1 = output->get_w();
332         }
334         if(out_y1 < 0)
335         {
336                 in_y1 += -out_y1 / h_scale;
337                 out_y1 = 0;
338         }
339         else
340         if(out_y1 >= output->get_h())
341         {
342                 in_y1 -= (out_y1 - output->get_h()) / h_scale;
343                 out_y1 = output->get_h();
344         }
346         if(out_x2 < 0)
347         {
348                 in_x2 += -out_x2 / w_scale;
349                 out_x2 = 0;
350         }
351         else
352         if(out_x2 >= output->get_w())
353         {
354                 in_x2 -= (out_x2 - output->get_w()) / w_scale;
355                 out_x2 = output->get_w();
356         }
358         if(out_y2 < 0)
359         {
360                 in_y2 += -out_y2 / h_scale;
361                 out_y2 = 0;
362         }
363         else
364         if(out_y2 >= output->get_h())
365         {
366                 in_y2 -= (out_y2 - output->get_h()) / h_scale;
367                 out_y2 = output->get_h();
368         }
379         float in_w = in_x2 - in_x1;
380         float in_h = in_y2 - in_y1;
381         float out_w = out_x2 - out_x1;
382         float out_h = out_y2 - out_y1;
383 // Input for translation operation
384         VFrame *translation_input = input;
387         if(in_w <= 0 || in_h <= 0 || out_w <= 0 || out_h <= 0) return 0;
390 // printf("OverlayFrame::overlay 2 %f %f %f %f -> %f %f %f %f\n", in_x1,
391 //                      in_y1,
392 //                      in_x2,
393 //                      in_y2,
394 //                      out_x1,
395 //                      out_y1,
396 //                      out_x2,
397 //                      out_y2);
403 // ****************************************************************************
404 // Transfer to temp buffer by scaling nearest integer boundaries
405 // ****************************************************************************
406         if(interpolation_type != NEAREST_NEIGHBOR &&
407                 (!EQUIV(w_scale, 1) || !EQUIV(h_scale, 1)))
408         {
409 // Create integer boundaries for interpolation
410                 int in_x1_int = (int)in_x1;
411                 int in_y1_int = (int)in_y1;
412                 int in_x2_int = MIN((int)ceil(in_x2), input->get_w());
413                 int in_y2_int = MIN((int)ceil(in_y2), input->get_h());
415 // Dimensions of temp frame.  Integer boundaries scaled.
416                 int temp_w = (int)ceil(w_scale * (in_x2_int - in_x1_int));
417                 int temp_h = (int)ceil(h_scale * (in_y2_int - in_y1_int));
418                 VFrame *scale_output;
422 #define NO_TRANSLATION1 \
423         (EQUIV(in_x1, 0) && \
424         EQUIV(in_y1, 0) && \
425         EQUIV(out_x1, 0) && \
426         EQUIV(out_y1, 0) && \
427         EQUIV(in_x2, in_x2_int) && \
428         EQUIV(in_y2, in_y2_int) && \
429         EQUIV(out_x2, temp_w) && \
430         EQUIV(out_y2, temp_h))
433 #define NO_BLEND \
434         (EQUIV(alpha, 1) && \
435         (mode == TRANSFER_REPLACE || \
436         (mode == TRANSFER_NORMAL && cmodel_components(input->get_color_model()) == 3)))
442 // Prepare destination for operation
444 // No translation and no blending.  The blending operation is built into the
445 // translation unit but not the scaling unit.
446 // input -> output
447                 if(NO_TRANSLATION1 &&
448                         NO_BLEND)
449                 {
450 // printf("OverlayFrame::overlay input -> output\n");
452                         scale_output = output;
453                         translation_input = 0;
454                 }
455                 else
456 // If translation or blending
457 // input -> nearest integer boundary temp
458                 {
459                         if(temp_frame && 
460                                 (temp_frame->get_w() != temp_w ||
461                                         temp_frame->get_h() != temp_h))
462                         {
463                                 delete temp_frame;
464                                 temp_frame = 0;
465                         }
467                         if(!temp_frame)
468                         {
469                                 temp_frame = new VFrame(0,
470                                         temp_w,
471                                         temp_h,
472                                         input->get_color_model(),
473                                         -1);
474                         }
475 //printf("OverlayFrame::overlay input -> temp\n");
478                         temp_frame->clear_frame();
480 // printf("OverlayFrame::overlay 4 temp_w=%d temp_h=%d\n",
481 //      temp_w, temp_h);
482                         scale_output = temp_frame;
483                         translation_input = scale_output;
485 // Adjust input coordinates to reflect new scaled coordinates.
486                         in_x1 = (in_x1 - in_x1_int) * w_scale;
487                         in_y1 = (in_y1 - in_y1_int) * h_scale;
488                         in_x2 = (in_x2 - in_x1_int) * w_scale;
489                         in_y2 = (in_y2 - in_y1_int) * h_scale;
490                 }
494 //printf("Overlay 1\n");
496 // Scale input -> scale_output
497                 if(!scale_engine) scale_engine = new ScaleEngine(this, cpus);
498                 scale_engine->scale_output = scale_output;
499                 scale_engine->scale_input = input;
500                 scale_engine->w_scale = w_scale;
501                 scale_engine->h_scale = h_scale;
502                 scale_engine->in_x1_int = in_x1_int;
503                 scale_engine->in_y1_int = in_y1_int;
504                 scale_engine->out_w_int = temp_w;
505                 scale_engine->out_h_int = temp_h;
506                 scale_engine->interpolation_type = interpolation_type;
507 //printf("Overlay 2\n");
509 //printf("OverlayFrame::overlay ScaleEngine 1 %d\n", out_h_int);
510                 scale_engine->process_packages();
511 //printf("OverlayFrame::overlay ScaleEngine 2\n");
515         }
517 // printf("OverlayFrame::overlay 1  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n", 
518 //      in_x1, 
519 //      in_y1, 
520 //      in_x2, 
521 //      in_y2, 
522 //      out_x1, 
523 //      out_y1, 
524 //      out_x2, 
525 //      out_y2);
531 #define NO_TRANSLATION2 \
532         (EQUIV(in_x1, 0) && \
533         EQUIV(in_y1, 0) && \
534         EQUIV(in_x2, translation_input->get_w()) && \
535         EQUIV(in_y2, translation_input->get_h()) && \
536         EQUIV(out_x1, 0) && \
537         EQUIV(out_y1, 0) && \
538         EQUIV(out_x2, output->get_w()) && \
539         EQUIV(out_y2, output->get_h())) \
541 #define NO_SCALE \
542         (EQUIV(out_x2 - out_x1, in_x2 - in_x1) && \
543         EQUIV(out_y2 - out_y1, in_y2 - in_y1))
545         
548 //printf("OverlayFrame::overlay 4 %d\n", mode);
553         if(translation_input)
554         {
555 // Direct copy
556                 if( NO_TRANSLATION2 &&
557                         NO_SCALE &&
558                         NO_BLEND)
559                 {
560 //printf("OverlayFrame::overlay direct copy\n");
561                         output->copy_from(translation_input);
562                 }
563                 else
564 // Blend only
565                 if( NO_TRANSLATION2 &&
566                         NO_SCALE)
567                 {
568                         if(!blend_engine) blend_engine = new BlendEngine(this, cpus);
571                         blend_engine->output = output;
572                         blend_engine->input = translation_input;
573                         blend_engine->alpha = alpha;
574                         blend_engine->mode = mode;
576                         blend_engine->process_packages();
577                 }
578                 else
579 // Scale and translate using nearest neighbor
580 // Translation is exactly on integer boundaries
581                 if(interpolation_type == NEAREST_NEIGHBOR ||
582                         EQUIV(in_x1, (int)in_x1) &&
583                         EQUIV(in_y1, (int)in_y1) &&
584                         EQUIV(in_x2, (int)in_x2) &&
585                         EQUIV(in_y2, (int)in_y2) &&
587                         EQUIV(out_x1, (int)out_x1) &&
588                         EQUIV(out_y1, (int)out_y1) &&
589                         EQUIV(out_x2, (int)out_x2) &&
590                         EQUIV(out_y2, (int)out_y2))
591                 {
592 //printf("OverlayFrame::overlay NEAREST_NEIGHBOR 1\n");
593                         if(!scaletranslate_engine) scaletranslate_engine = 
594                                 new ScaleTranslateEngine(this, cpus);
597                         scaletranslate_engine->output = output;
598                         scaletranslate_engine->input = translation_input;
599                         scaletranslate_engine->in_x1 = (int)in_x1;
600                         scaletranslate_engine->in_y1 = (int)in_y1;
601                         scaletranslate_engine->in_x2 = (int)in_x2;
602                         scaletranslate_engine->in_y2 = (int)in_y2;
603                         scaletranslate_engine->out_x1 = (int)out_x1;
604                         scaletranslate_engine->out_y1 = (int)out_y1;
605                         scaletranslate_engine->out_x2 = (int)out_x2;
606                         scaletranslate_engine->out_y2 = (int)out_y2;
607                         scaletranslate_engine->alpha = alpha;
608                         scaletranslate_engine->mode = mode;
610                         scaletranslate_engine->process_packages();
611                 }
612                 else
613 // Fractional translation
614                 {
615 // Use fractional translation
616 // printf("OverlayFrame::overlay temp -> output  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n", 
617 //      in_x1, 
618 //      in_y1, 
619 //      in_x2, 
620 //      in_y2, 
621 //      out_x1, 
622 //      out_y1, 
623 //      out_x2, 
624 //      out_y2);
626 //printf("Overlay 3\n");
627                         if(!translate_engine) translate_engine = new TranslateEngine(this, cpus);
628                         translate_engine->translate_output = output;
629                         translate_engine->translate_input = translation_input;
630                         translate_engine->translate_in_x1 = in_x1;
631                         translate_engine->translate_in_y1 = in_y1;
632                         translate_engine->translate_in_x2 = in_x2;
633                         translate_engine->translate_in_y2 = in_y2;
634                         translate_engine->translate_out_x1 = out_x1;
635                         translate_engine->translate_out_y1 = out_y1;
636                         translate_engine->translate_out_x2 = out_x2;
637                         translate_engine->translate_out_y2 = out_y2;
638                         translate_engine->translate_alpha = alpha;
639                         translate_engine->translate_mode = mode;
640 //printf("Overlay 4\n");
642 //printf("OverlayFrame::overlay 5 %d\n", mode);
643                         translate_engine->process_packages();
645                 }
646         }
647 //printf("OverlayFrame::overlay 2\n");
649         return 0;
658 ScalePackage::ScalePackage()
665 ScaleUnit::ScaleUnit(ScaleEngine *server, OverlayFrame *overlay)
666  : LoadClient(server)
668         this->overlay = overlay;
669         this->engine = server;
672 ScaleUnit::~ScaleUnit()
678 void ScaleUnit::tabulate_reduction(bilinear_table_t* &table,
679         float scale,
680         int in_pixel1, 
681         int out_total,
682         int in_total)
684         table = new bilinear_table_t[out_total];
685         bzero(table, sizeof(bilinear_table_t) * out_total);
686 //printf("ScaleUnit::tabulate_reduction 1 %f %d %d %d\n", scale, in_pixel1, out_total, in_total);
687         for(int i = 0; i < out_total; i++)
688         {
689                 float out_start = i;
690                 float in_start = out_start * scale;
691                 float out_end = i + 1;
692                 float in_end = out_end * scale;
693                 bilinear_table_t *entry = table + i;
694 //printf("ScaleUnit::tabulate_reduction 1 %f %f %f %f\n", out_start, out_end, in_start, in_end);
696 // Store input fraction
697                 entry->input_fraction1 = (floor(in_start + 1) - in_start) / scale;
698                 entry->input_fraction2 = 1.0 / scale;
699                 entry->input_fraction3 = (in_end - floor(in_end)) / scale;
701                 if(in_end >= in_total - in_pixel1)
702                 {
703                         in_end = in_total - in_pixel1 - 1;
704                         
705                         int difference = (int)in_end - (int)in_start - 1;
706                         if(difference < 0) difference = 0;
707                         entry->input_fraction3 = 1.0 - 
708                                 entry->input_fraction1 - 
709                                 entry->input_fraction2 * difference;
710                 }
712 // Store input pixels
713                 entry->input_pixel1 = (int)in_start;
714                 entry->input_pixel2 = (int)in_end;
716 // printf("ScaleUnit::tabulate_reduction 1 %d %d %f %f  %f\n", 
717 // entry->input_pixel1, 
718 // entry->input_pixel2,
719 // entry->input_fraction1,
720 // entry->input_fraction2,
721 // entry->input_fraction3);
724 // Sanity check
725                 if(entry->input_pixel1 > entry->input_pixel2)
726                 {
727                         entry->input_pixel1 = entry->input_pixel2;
728                         entry->input_fraction1 = 0;
729                 }
731 // Get total fraction of output pixel used
732 //              if(entry->input_pixel2 > entry->input_pixel1)
733                 entry->total_fraction = 
734                         entry->input_fraction1 +
735                         entry->input_fraction2 * (entry->input_pixel2 - entry->input_pixel1 - 1) +
736                         entry->input_fraction3;
737                 entry->input_pixel1 += in_pixel1;
738                 entry->input_pixel2 += in_pixel1;
739         }
742 void ScaleUnit::tabulate_enlarge(bilinear_table_t* &table,
743         float scale,
744         int in_pixel1, 
745         int out_total,
746         int in_total)
748         table = new bilinear_table_t[out_total];
749         bzero(table, sizeof(bilinear_table_t) * out_total);
751         for(int i = 0; i < out_total; i++)
752         {
753                 bilinear_table_t *entry = table + i;
754                 float in_pixel = i * scale;
755                 entry->input_pixel1 = (int)floor(in_pixel);
756                 entry->input_pixel2 = entry->input_pixel1 + 1;
758                 if(in_pixel <= in_total)
759                 {
760                         entry->input_fraction3 = in_pixel - entry->input_pixel1;
761                 }
762                 else
763                 {
764                         entry->input_fraction3 = 0;
765                         entry->input_pixel2 = 0;
766                 }
768                 if(in_pixel >= 0)
769                 {
770                         entry->input_fraction1 = entry->input_pixel2 - in_pixel;
771                 }
772                 else
773                 {
774                         entry->input_fraction1 = 0;
775                         entry->input_pixel1 = 0;
776                 }
778                 if(entry->input_pixel2 >= in_total - in_pixel1)
779                 {
780                         entry->input_pixel2 = entry->input_pixel1;
781                         entry->input_fraction3 = 1.0 - entry->input_fraction1;
782                 }
784                 entry->total_fraction = 
785                         entry->input_fraction1 + 
786                         entry->input_fraction3;
787                 entry->input_pixel1 += in_pixel1;
788                 entry->input_pixel2 += in_pixel1;
789 // 
790 // printf("ScaleUnit::tabulate_enlarge %d %d %f %f %f\n",
791 // entry->input_pixel1,
792 // entry->input_pixel2,
793 // entry->input_fraction1,
794 // entry->input_fraction2,
795 // entry->input_fraction3);
796         }
799 void ScaleUnit::dump_bilinear(bilinear_table_t *table, int total)
801         printf("ScaleUnit::dump_bilinear\n");
802         for(int i = 0; i < total; i++)
803         {
804                 printf("out=%d inpixel1=%d inpixel2=%d infrac1=%f infrac2=%f infrac3=%f total=%f\n", 
805                         i,
806                         table[i].input_pixel1,
807                         table[i].input_pixel2,
808                         table[i].input_fraction1,
809                         table[i].input_fraction2,
810                         table[i].input_fraction3,
811                         table[i].total_fraction);
812         }
815 #define PIXEL_REDUCE_MACRO(type, components, row) \
816 { \
817         type *input_row = &in_rows[row][x_entry->input_pixel1 * components]; \
818         type *input_end = &in_rows[row][x_entry->input_pixel2 * components]; \
820 /* Do first pixel */ \
821         temp_f1 += input_scale1 * input_row[0]; \
822         temp_f2 += input_scale1 * input_row[1]; \
823         temp_f3 += input_scale1 * input_row[2]; \
824         if(components == 4) temp_f4 += input_scale1 * input_row[3]; \
826 /* Do last pixel */ \
827 /*      if(input_row < input_end) */\
828         { \
829                 temp_f1 += input_scale3 * input_end[0]; \
830                 temp_f2 += input_scale3 * input_end[1]; \
831                 temp_f3 += input_scale3 * input_end[2]; \
832                 if(components == 4) temp_f4 += input_scale3 * input_end[3]; \
833         } \
835 /* Do middle pixels */ \
836         for(input_row += components; input_row < input_end; input_row += components) \
837         { \
838                 temp_f1 += input_scale2 * input_row[0]; \
839                 temp_f2 += input_scale2 * input_row[1]; \
840                 temp_f3 += input_scale2 * input_row[2]; \
841                 if(components == 4) temp_f4 += input_scale2 * input_row[3]; \
842         } \
845 // Bilinear reduction and suboptimal enlargement.
846 // Very high quality.
847 #define BILINEAR_REDUCE(max, type, components) \
848 { \
849         bilinear_table_t *x_table, *y_table; \
850         int out_h = pkg->out_row2 - pkg->out_row1; \
851         type **in_rows = (type**)input->get_rows(); \
852         type **out_rows = (type**)output->get_rows(); \
854         if(scale_w < 1) \
855                 tabulate_reduction(x_table, \
856                         1.0 / scale_w, \
857                         in_x1_int, \
858                         out_w_int, \
859                         input->get_w()); \
860         else \
861                 tabulate_enlarge(x_table, \
862                         1.0 / scale_w, \
863                         in_x1_int, \
864                         out_w_int, \
865                         input->get_w()); \
867         if(scale_h < 1) \
868                 tabulate_reduction(y_table, \
869                         1.0 / scale_h, \
870                         in_y1_int, \
871                         out_h_int, \
872                         input->get_h()); \
873         else \
874                 tabulate_enlarge(y_table, \
875                         1.0 / scale_h, \
876                         in_y1_int, \
877                         out_h_int, \
878                         input->get_h()); \
879 /* dump_bilinear(y_table, out_h_int); */\
881         for(int i = 0; i < out_h; i++) \
882         { \
883                 type *out_row = out_rows[i + pkg->out_row1]; \
884                 bilinear_table_t *y_entry = &y_table[i + pkg->out_row1]; \
885 /*printf("BILINEAR_REDUCE 2 %d %d %d\n", i, y_entry->input_pixel1, y_entry->input_pixel2); */\
887                 for(int j = 0; j < out_w_int; j++) \
888                 { \
889                         bilinear_table_t *x_entry = &x_table[j]; \
890 /* Load rounding factors */ \
891                         float temp_f1 = .5; \
892                         float temp_f2 = .5; \
893                         float temp_f3 = .5; \
894                         float temp_f4 = .5; \
896 /* First row */ \
897                         float input_scale1 = y_entry->input_fraction1 * x_entry->input_fraction1; \
898                         float input_scale2 = y_entry->input_fraction1 * x_entry->input_fraction2; \
899                         float input_scale3 = y_entry->input_fraction1 * x_entry->input_fraction3; \
900                         PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel1) \
902 /* Last row */ \
903                         if(out_h) \
904                         { \
905                                 input_scale1 = y_entry->input_fraction3 * x_entry->input_fraction1; \
906                                 input_scale2 = y_entry->input_fraction3 * x_entry->input_fraction2; \
907                                 input_scale3 = y_entry->input_fraction3 * x_entry->input_fraction3; \
908                                 PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel2) \
910 /* Middle rows */ \
911                                 if(out_h > 1) \
912                                 { \
913                                         input_scale1 = y_entry->input_fraction2 * x_entry->input_fraction1; \
914                                         input_scale2 = y_entry->input_fraction2 * x_entry->input_fraction2; \
915                                         input_scale3 = y_entry->input_fraction2 * x_entry->input_fraction3; \
916                                         for(int k = y_entry->input_pixel1 + 1; \
917                                                 k < y_entry->input_pixel2; \
918                                                 k++) \
919                                         { \
920                                                 PIXEL_REDUCE_MACRO(type, components, k) \
921                                         } \
922                                 } \
923                         } \
925                         if(temp_f1 > max) temp_f1 = max; \
926                         if(temp_f2 > max) temp_f2 = max; \
927                         if(temp_f3 > max) temp_f3 = max; \
928                         if(components == 4) if(temp_f4 > max) temp_f4 = max; \
929                         out_row[j * components    ] = (type)temp_f1; \
930                         out_row[j * components + 1] = (type)temp_f2; \
931                         out_row[j * components + 2] = (type)temp_f3; \
932                         if(components == 4) out_row[j * components + 3] = (type)temp_f4; \
933                 } \
934 /*printf("BILINEAR_REDUCE 3 %d\n", i);*/ \
935         } \
937         delete [] x_table; \
938         delete [] y_table; \
943 // Only 2 input pixels
944 #define BILINEAR_ENLARGE(max, type, components) \
945 { \
946 /*printf("BILINEAR_ENLARGE 1\n");*/ \
947         float k_y = 1.0 / scale_h; \
948         float k_x = 1.0 / scale_w; \
949         type **in_rows = (type**)input->get_rows(); \
950         type **out_rows = (type**)output->get_rows(); \
951         int out_h = pkg->out_row2 - pkg->out_row1; \
952         int in_h_int = input->get_h(); \
953         int in_w_int = input->get_w(); \
954         int *table_int_x1, *table_int_y1; \
955         int *table_int_x2, *table_int_y2; \
956         float *table_frac_x_f, *table_antifrac_x_f, *table_frac_y_f, *table_antifrac_y_f; \
957         int *table_frac_x_i, *table_antifrac_x_i, *table_frac_y_i, *table_antifrac_y_i; \
959         if(use_float) \
960         { \
961                 tabulate_blinear_f(table_int_x1,  \
962                         table_int_x2,  \
963                         table_frac_x_f,  \
964                         table_antifrac_x_f,  \
965                         k_x,  \
966                         0,  \
967                         out_w_int, \
968                         in_x1_int,  \
969                         in_w_int); \
970                 tabulate_blinear_f(table_int_y1,  \
971                         table_int_y2,  \
972                         table_frac_y_f,  \
973                         table_antifrac_y_f,  \
974                         k_y,  \
975                         pkg->out_row1,  \
976                         pkg->out_row2,  \
977                         in_y1_int, \
978                         in_h_int); \
979         } \
980         else \
981         { \
982                 tabulate_blinear_i(table_int_x1,  \
983                         table_int_x2,  \
984                         table_frac_x_i,  \
985                         table_antifrac_x_i,  \
986                         k_x,  \
987                         0,  \
988                         out_w_int, \
989                         in_x1_int,  \
990                         in_w_int); \
991                 tabulate_blinear_i(table_int_y1,  \
992                         table_int_y2,  \
993                         table_frac_y_i,  \
994                         table_antifrac_y_i,  \
995                         k_y,  \
996                         pkg->out_row1,  \
997                         pkg->out_row2,  \
998                         in_y1_int, \
999                         in_h_int); \
1000         } \
1002         for(int i = 0; i < out_h; i++) \
1003         { \
1004                 int i_y1 = table_int_y1[i]; \
1005                 int i_y2 = table_int_y2[i]; \
1006                 float a_f; \
1007         float anti_a_f; \
1008                 uint64_t a_i; \
1009         uint64_t anti_a_i; \
1010                 if(use_float) \
1011                 { \
1012                         a_f = table_frac_y_f[i]; \
1013                 anti_a_f = table_antifrac_y_f[i]; \
1014                 } \
1015                 else \
1016                 { \
1017                         a_i = table_frac_y_i[i]; \
1018                 anti_a_i = table_antifrac_y_i[i]; \
1019                 } \
1020                 type *in_row1 = in_rows[i_y1]; \
1021                 type *in_row2 = in_rows[i_y2]; \
1022                 type *out_row = out_rows[i + pkg->out_row1]; \
1024                 for(int j = 0; j < out_w_int; j++) \
1025                 { \
1026                         int i_x1 = table_int_x1[j]; \
1027                         int i_x2 = table_int_x2[j]; \
1028                         if(use_float) \
1029                         { \
1030                                 float output1r, output1g, output1b, output1a; \
1031                                 float output2r, output2g, output2b, output2a; \
1032                                 float output3r, output3g, output3b, output3a; \
1033                                 float output4r, output4g, output4b, output4a; \
1034                                 float b_f; \
1035                                 float anti_b_f; \
1036                                 b_f = table_frac_x_f[j]; \
1037                                 anti_b_f = table_antifrac_x_f[j]; \
1039                         output1r = in_row1[i_x1 * components]; \
1040                         output1g = in_row1[i_x1 * components + 1]; \
1041                         output1b = in_row1[i_x1 * components + 2]; \
1042                         if(components == 4) output1a = in_row1[i_x1 * components + 3]; \
1044                         output2r = in_row1[i_x2 * components]; \
1045                         output2g = in_row1[i_x2 * components + 1]; \
1046                         output2b = in_row1[i_x2 * components + 2]; \
1047                         if(components == 4) output2a = in_row1[i_x2 * components + 3]; \
1049                         output3r = in_row2[i_x1 * components]; \
1050                         output3g = in_row2[i_x1 * components + 1]; \
1051                         output3b = in_row2[i_x1 * components + 2]; \
1052                         if(components == 4) output3a = in_row2[i_x1 * components + 3]; \
1054                         output4r = in_row2[i_x2 * components]; \
1055                         output4g = in_row2[i_x2 * components + 1]; \
1056                         output4b = in_row2[i_x2 * components + 2]; \
1057                         if(components == 4) output4a = in_row2[i_x2 * components + 3]; \
1059                                 out_row[j * components] =  \
1060                                         (type)(anti_a_f * (anti_b_f * output1r +  \
1061                                         b_f * output2r) +  \
1062                         a_f * (anti_b_f * output3r +  \
1063                                         b_f * output4r)); \
1064                                 out_row[j * components + 1] =   \
1065                                         (type)(anti_a_f * (anti_b_f * output1g +  \
1066                                         b_f * output2g) +  \
1067                         a_f * ((anti_b_f * output3g) +  \
1068                                         b_f * output4g)); \
1069                                 out_row[j * components + 2] =   \
1070                                         (type)(anti_a_f * ((anti_b_f * output1b) +  \
1071                                         (b_f * output2b)) +  \
1072                         a_f * ((anti_b_f * output3b) +  \
1073                                         b_f * output4b)); \
1074                                 if(components == 4) \
1075                                         out_row[j * components + 3] =   \
1076                                                 (type)(anti_a_f * ((anti_b_f * output1a) +  \
1077                                                 (b_f * output2a)) +  \
1078                                 a_f * ((anti_b_f * output3a) +  \
1079                                                 b_f * output4a)); \
1080                         } \
1081                         else \
1082                         { \
1083                                 uint64_t output1r, output1g, output1b, output1a; \
1084                                 uint64_t output2r, output2g, output2b, output2a; \
1085                                 uint64_t output3r, output3g, output3b, output3a; \
1086                                 uint64_t output4r, output4g, output4b, output4a; \
1087                                 uint64_t b_i; \
1088                                 uint64_t anti_b_i; \
1089                                 b_i = table_frac_x_i[j]; \
1090                                 anti_b_i = table_antifrac_x_i[j]; \
1092                         output1r = in_row1[i_x1 * components]; \
1093                         output1g = in_row1[i_x1 * components + 1]; \
1094                         output1b = in_row1[i_x1 * components + 2]; \
1095                         if(components == 4) output1a = in_row1[i_x1 * components + 3]; \
1097                         output2r = in_row1[i_x2 * components]; \
1098                         output2g = in_row1[i_x2 * components + 1]; \
1099                         output2b = in_row1[i_x2 * components + 2]; \
1100                         if(components == 4) output2a = in_row1[i_x2 * components + 3]; \
1102                         output3r = in_row2[i_x1 * components]; \
1103                         output3g = in_row2[i_x1 * components + 1]; \
1104                         output3b = in_row2[i_x1 * components + 2]; \
1105                         if(components == 4) output3a = in_row2[i_x1 * components + 3]; \
1107                         output4r = in_row2[i_x2 * components]; \
1108                         output4g = in_row2[i_x2 * components + 1]; \
1109                         output4b = in_row2[i_x2 * components + 2]; \
1110                         if(components == 4) output4a = in_row2[i_x2 * components + 3]; \
1112                                 out_row[j * components] =  \
1113                                         (type)((anti_a_i * (anti_b_i * output1r +  \
1114                                         b_i * output2r) +  \
1115                         a_i * (anti_b_i * output3r +  \
1116                                         b_i * output4r)) / 0xffffffff); \
1117                                 out_row[j * components + 1] =   \
1118                                         (type)((anti_a_i * (anti_b_i * output1g +  \
1119                                         b_i * output2g) +  \
1120                         a_i * (anti_b_i * output3g +  \
1121                                         b_i * output4g)) / 0xffffffff); \
1122                                 out_row[j * components + 2] =   \
1123                                         (type)((anti_a_i * (anti_b_i * output1b +  \
1124                                         b_i * output2b) +  \
1125                         a_i * (anti_b_i * output3b +  \
1126                                         b_i * output4b)) / 0xffffffff); \
1127                                 if(components == 4) \
1128                                         out_row[j * components + 3] =   \
1129                                                 (type)((anti_a_i * (anti_b_i * output1a +  \
1130                                                 b_i * output2a) +  \
1131                                 a_i * (anti_b_i * output3a +  \
1132                                                 b_i * output4a)) / 0xffffffff); \
1133                         } \
1134                 } \
1135         } \
1138         delete [] table_int_x1; \
1139         delete [] table_int_x2; \
1140         delete [] table_int_y1; \
1141         delete [] table_int_y2; \
1142         if(use_float) \
1143         { \
1144                 delete [] table_frac_x_f; \
1145                 delete [] table_antifrac_x_f; \
1146                 delete [] table_frac_y_f; \
1147                 delete [] table_antifrac_y_f; \
1148         } \
1149         else \
1150         { \
1151                 delete [] table_frac_x_i; \
1152                 delete [] table_antifrac_x_i; \
1153                 delete [] table_frac_y_i; \
1154                 delete [] table_antifrac_y_i; \
1155         } \
1157 /*printf("BILINEAR_ENLARGE 2\n");*/ \
1161 #define BICUBIC(max, type, components) \
1162 { \
1163         float k_y = 1.0 / scale_h; \
1164         float k_x = 1.0 / scale_w; \
1165         type **in_rows = (type**)input->get_rows(); \
1166         type **out_rows = (type**)output->get_rows(); \
1167         float *bspline_x_f, *bspline_y_f; \
1168         int *bspline_x_i, *bspline_y_i; \
1169         int *in_x_table, *in_y_table; \
1170         int in_h_int = input->get_h(); \
1171         int in_w_int = input->get_w(); \
1173         if(use_float) \
1174         { \
1175                 tabulate_bcubic_f(bspline_x_f,  \
1176                         in_x_table, \
1177                         k_x, \
1178                         in_x1_int, \
1179                         out_w_int, \
1180                         in_w_int, \
1181                         -1); \
1182          \
1183                 tabulate_bcubic_f(bspline_y_f,  \
1184                         in_y_table, \
1185                         k_y, \
1186                         in_y1_int, \
1187                         out_h_int, \
1188                         in_h_int, \
1189                         1); \
1190         } \
1191         else \
1192         { \
1193                 tabulate_bcubic_i(bspline_x_i,  \
1194                         in_x_table, \
1195                         k_x, \
1196                         in_x1_int, \
1197                         out_w_int, \
1198                         in_w_int, \
1199                         -1); \
1200          \
1201                 tabulate_bcubic_i(bspline_y_i,  \
1202                         in_y_table, \
1203                         k_y, \
1204                         in_y1_int, \
1205                         out_h_int, \
1206                         in_h_int, \
1207                         1); \
1208         } \
1210         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1211         { \
1212                 for(int j = 0; j < out_w_int; j++) \
1213                 { \
1214                         int i_x = (int)(k_x * j); \
1215                         float output1_f, output2_f, output3_f, output4_f; \
1216                         uint64_t output1_i, output2_i, output3_i, output4_i; \
1217                         if(use_float) \
1218                         { \
1219                                 output1_f = 0; \
1220                                 output2_f = 0; \
1221                                 output3_f = 0; \
1222                                 if(components == 4) \
1223                                         output4_f = 0; \
1224                         } \
1225                         else \
1226                         { \
1227                                 output1_i = 0; \
1228                                 output2_i = 0; \
1229                                 output3_i = 0; \
1230                                 if(components == 4) \
1231                                         output4_i = 0; \
1232                         } \
1233                         int table_y = i * 4; \
1235 /* Kernel */ \
1236                         for(int m = -1; m < 3; m++) \
1237                         { \
1238                                 float r1_f; \
1239                                 uint64_t r1_i; \
1240                                 if(use_float) \
1241                                         r1_f = bspline_y_f[table_y]; \
1242                                 else \
1243                                         r1_i = bspline_y_i[table_y]; \
1244                                 int y = in_y_table[table_y]; \
1245                                 int table_x = j * 4; \
1247                                 for(int n = -1; n < 3; n++) \
1248                                 { \
1249                                         float r2_f; \
1250                                         uint64_t r2_i; \
1251                                         if(use_float) \
1252                                                 r2_f = bspline_x_f[table_x]; \
1253                                         else \
1254                                                 r2_i = bspline_x_i[table_x]; \
1255                                         int x = in_x_table[table_x]; \
1256                                         float r_square_f; \
1257                                         uint64_t r_square_i; \
1258                                         if(use_float) \
1259                                         { \
1260                                                 r_square_f = r1_f * r2_f; \
1261                                                 output1_f += r_square_f * in_rows[y][x * components]; \
1262                                                 output2_f += r_square_f * in_rows[y][x * components + 1]; \
1263                                                 output3_f += r_square_f * in_rows[y][x * components + 2]; \
1264                                                 if(components == 4) \
1265                                                         output4_f += r_square_f * in_rows[y][x * components + 3]; \
1266                                         } \
1267                                         else \
1268                                         { \
1269                                                 r_square_i = r1_i * r2_i; \
1270                                                 output1_i += r_square_i * in_rows[y][x * components]; \
1271                                                 output2_i += r_square_i * in_rows[y][x * components + 1]; \
1272                                                 output3_i += r_square_i * in_rows[y][x * components + 2]; \
1273                                                 if(components == 4) \
1274                                                         output4_i += r_square_i * in_rows[y][x * components + 3]; \
1275                                         } \
1277                                         table_x++; \
1278                                 } \
1279                                 table_y++; \
1280                         } \
1283                         if(use_float) \
1284                         { \
1285                                 out_rows[i][j * components] = (type)output1_f; \
1286                                 out_rows[i][j * components + 1] = (type)output2_f; \
1287                                 out_rows[i][j * components + 2] = (type)output3_f; \
1288                                 if(components == 4) \
1289                                         out_rows[i][j * components + 3] = (type)output4_f; \
1290                         } \
1291                         else \
1292                         { \
1293                                 out_rows[i][j * components] = (type)(output1_i / 0xffffffff); \
1294                                 out_rows[i][j * components + 1] = (type)(output2_i / 0xffffffff); \
1295                                 out_rows[i][j * components + 2] = (type)(output3_i / 0xffffffff); \
1296                                 if(components == 4) \
1297                                         out_rows[i][j * components + 3] = (type)(output4_i / 0xffffffff); \
1298                         } \
1300                 } \
1301         } \
1303         if(use_float) \
1304         { \
1305                 delete [] bspline_x_f; \
1306                 delete [] bspline_y_f; \
1307         } \
1308         else \
1309         { \
1310                 delete [] bspline_x_i; \
1311                 delete [] bspline_y_i; \
1312         } \
1313         delete [] in_x_table; \
1314         delete [] in_y_table; \
1320 // Pow function is not thread safe in Compaqt C
1321 #define CUBE(x) ((x) * (x) * (x))
1323 float ScaleUnit::cubic_bspline(float x)
1325         float a, b, c, d;
1327         if((x + 2.0F) <= 0.0F) 
1328         {
1329         a = 0.0F;
1330         }
1331         else 
1332         {
1333         a = CUBE(x + 2.0F);
1334         }
1337         if((x + 1.0F) <= 0.0F) 
1338         {
1339         b = 0.0F;
1340         }
1341         else 
1342         {
1343         b = CUBE(x + 1.0F);
1344         }    
1346         if(x <= 0) 
1347         {
1348         c = 0.0F;
1349         }
1350         else 
1351         {
1352         c = CUBE(x);
1353         }  
1355         if((x - 1.0F) <= 0.0F) 
1356         {
1357         d = 0.0F;
1358         }
1359         else 
1360         {
1361         d = CUBE(x - 1.0F);
1362         }
1365         return (a - (4.0F * b) + (6.0F * c) - (4.0F * d)) / 6.0;
1369 void ScaleUnit::tabulate_bcubic_f(float* &coef_table, 
1370         int* &coord_table,
1371         float scale,
1372         int start, 
1373         int pixels,
1374         int total_pixels,
1375         float coefficient)
1377         coef_table = new float[pixels * 4];
1378         coord_table = new int[pixels * 4];
1379         for(int i = 0, j = 0; i < pixels; i++)
1380         {
1381                 float f_x = (float)i * scale;
1382                 float a = f_x - floor(f_x);
1383                 
1384                 for(float m = -1; m < 3; m++)
1385                 {
1386                         coef_table[j] = cubic_bspline(coefficient * (m - a));
1387                         coord_table[j] = (int)(start + (int)f_x + m);
1388                         CLAMP(coord_table[j], 0, total_pixels - 1);
1389                         j++;
1390                 }
1391                 
1392         }
1395 void ScaleUnit::tabulate_bcubic_i(int* &coef_table, 
1396         int* &coord_table,
1397         float scale,
1398         int start, 
1399         int pixels,
1400         int total_pixels,
1401         float coefficient)
1403         coef_table = new int[pixels * 4];
1404         coord_table = new int[pixels * 4];
1405         for(int i = 0, j = 0; i < pixels; i++)
1406         {
1407                 float f_x = (float)i * scale;
1408                 float a = f_x - floor(f_x);
1409                 
1410                 for(float m = -1; m < 3; m++)
1411                 {
1412                         coef_table[j] = (int)(cubic_bspline(coefficient * (m - a)) * 0x10000);
1413                         coord_table[j] = (int)(start + (int)f_x + m);
1414                         CLAMP(coord_table[j], 0, total_pixels - 1);
1415                         j++;
1416                 }
1417                 
1418         }
1421 void ScaleUnit::tabulate_blinear_f(int* &table_int1,
1422                 int* &table_int2,
1423                 float* &table_frac,
1424                 float* &table_antifrac,
1425                 float scale,
1426                 int pixel1,
1427                 int pixel2,
1428                 int start,
1429                 int total_pixels)
1431         table_int1 = new int[pixel2 - pixel1];
1432         table_int2 = new int[pixel2 - pixel1];
1433         table_frac = new float[pixel2 - pixel1];
1434         table_antifrac = new float[pixel2 - pixel1];
1436         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1437         {
1438                 float f_x = (float)i * scale;
1439                 int i_x = (int)floor(f_x);
1440                 float a = (f_x - floor(f_x));
1442                 table_int1[j] = i_x + start;
1443                 table_int2[j] = i_x + start + 1;
1444                 CLAMP(table_int1[j], 0, total_pixels - 1);
1445                 CLAMP(table_int2[j], 0, total_pixels - 1);
1446                 table_frac[j] = a;
1447                 table_antifrac[j] = 1.0F - a;
1448 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1449         }
1452 void ScaleUnit::tabulate_blinear_i(int* &table_int1,
1453                 int* &table_int2,
1454                 int* &table_frac,
1455                 int* &table_antifrac,
1456                 float scale,
1457                 int pixel1,
1458                 int pixel2,
1459                 int start,
1460                 int total_pixels)
1462         table_int1 = new int[pixel2 - pixel1];
1463         table_int2 = new int[pixel2 - pixel1];
1464         table_frac = new int[pixel2 - pixel1];
1465         table_antifrac = new int[pixel2 - pixel1];
1467         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1468         {
1469                 double f_x = (float)i * scale;
1470                 int i_x = (int)floor(f_x);
1471                 float a = (f_x - floor(f_x));
1473                 table_int1[j] = i_x + start;
1474                 table_int2[j] = i_x + start + 1;
1475                 CLAMP(table_int1[j], 0, total_pixels - 1);
1476                 CLAMP(table_int2[j], 0, total_pixels - 1);
1477                 table_frac[j] = (int)(a * 0xffff);
1478                 table_antifrac[j] = (int)((1.0F - a) * 0x10000);
1479 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1480         }
1483 void ScaleUnit::process_package(LoadPackage *package)
1485         ScalePackage *pkg = (ScalePackage*)package;
1487 //printf("ScaleUnit::process_package 1\n");
1488 // Arguments for macros
1489         VFrame *output = engine->scale_output;
1490         VFrame *input = engine->scale_input;
1491         float scale_w = engine->w_scale;
1492         float scale_h = engine->h_scale;
1493         int in_x1_int = engine->in_x1_int;
1494         int in_y1_int = engine->in_y1_int;
1495         int out_h_int = engine->out_h_int;
1496         int out_w_int = engine->out_w_int;
1497         int do_yuv = 
1498                 (input->get_color_model() == BC_YUV888 ||
1499                 input->get_color_model() == BC_YUVA8888 ||
1500                 input->get_color_model() == BC_YUV161616 ||
1501                 input->get_color_model() == BC_YUVA16161616);
1503 //printf("ScaleUnit::process_package 2 %f %f\n", engine->w_scale, engine->h_scale);
1504         if(engine->interpolation_type == CUBIC_CUBIC || 
1505                 (engine->interpolation_type == CUBIC_LINEAR 
1506                         && engine->w_scale > 1 && 
1507                         engine->h_scale > 1))
1508         {
1509         
1510                 switch(engine->scale_input->get_color_model())
1511                 {
1512                         case BC_RGB888:
1513                         case BC_YUV888:
1514                                 BICUBIC(0xff, unsigned char, 3);
1515                                 break;
1517                         case BC_RGBA8888:
1518                         case BC_YUVA8888:
1519                                 BICUBIC(0xff, unsigned char, 4);
1520                                 break;
1522                         case BC_RGB161616:
1523                         case BC_YUV161616:
1524                                 BICUBIC(0xffff, uint16_t, 3);
1525                                 break;
1527                         case BC_RGBA16161616:
1528                         case BC_YUVA16161616:
1529                                 BICUBIC(0xffff, uint16_t, 4);
1530                                 break;
1531                 }
1532         }
1533         else
1534         if(engine->w_scale > 1 && 
1535                 engine->h_scale > 1)
1536 //if(0)
1537 // Perform bilinear scaling input -> scale_output
1538         {
1539                 switch(engine->scale_input->get_color_model())
1540                 {
1541                         case BC_RGB888:
1542                         case BC_YUV888:
1543                                 BILINEAR_ENLARGE(0xff, unsigned char, 3);
1544                                 break;
1546                         case BC_RGBA8888:
1547                         case BC_YUVA8888:
1548                                 BILINEAR_ENLARGE(0xff, unsigned char, 4);
1549                                 break;
1551                         case BC_RGB161616:
1552                         case BC_YUV161616:
1553                                 BILINEAR_ENLARGE(0xffff, uint16_t, 3);
1554                                 break;
1556                         case BC_RGBA16161616:
1557                         case BC_YUVA16161616:
1558                                 BILINEAR_ENLARGE(0xffff, uint16_t, 4);
1559                                 break;
1560                 }
1561         }
1562         else
1563         {
1564                 switch(engine->scale_input->get_color_model())
1565                 {
1566                         case BC_RGB888:
1567                         case BC_YUV888:
1568                                 BILINEAR_REDUCE(0xff, unsigned char, 3);
1569                                 break;
1571                         case BC_RGBA8888:
1572                         case BC_YUVA8888:
1573                                 BILINEAR_REDUCE(0xff, unsigned char, 4);
1574                                 break;
1576                         case BC_RGB161616:
1577                         case BC_YUV161616:
1578                                 BILINEAR_REDUCE(0xffff, uint16_t, 3);
1579                                 break;
1581                         case BC_RGBA16161616:
1582                         case BC_YUVA16161616:
1583                                 BILINEAR_REDUCE(0xffff, uint16_t, 4);
1584                                 break;
1585                 }
1586         }
1587 //printf("ScaleUnit::process_package 3\n");
1603 ScaleEngine::ScaleEngine(OverlayFrame *overlay, int cpus)
1604  : LoadServer(cpus, cpus)
1606         this->overlay = overlay;
1609 ScaleEngine::~ScaleEngine()
1613 void ScaleEngine::init_packages()
1615         for(int i = 0; i < total_packages; i++)
1616         {
1617                 ScalePackage *package = (ScalePackage*)packages[i];
1618                 package->out_row1 = out_h_int / total_packages * i;
1619                 package->out_row2 = package->out_row1 + out_h_int / total_packages;
1621                 if(i >= total_packages - 1)
1622                         package->out_row2 = out_h_int;
1623         }
1626 LoadClient* ScaleEngine::new_client()
1628         return new ScaleUnit(this, overlay);
1631 LoadPackage* ScaleEngine::new_package()
1633         return new ScalePackage;
1648 TranslatePackage::TranslatePackage()
1654 TranslateUnit::TranslateUnit(TranslateEngine *server, OverlayFrame *overlay)
1655  : LoadClient(server)
1657         this->overlay = overlay;
1658         this->engine = server;
1661 TranslateUnit::~TranslateUnit()
1667 void TranslateUnit::translation_array_f(transfer_table_f* &table, 
1668         float out_x1, 
1669         float out_x2,
1670         float in_x1,
1671         float in_x2,
1672         int in_total, 
1673         int out_total, 
1674         int &out_x1_int,
1675         int &out_x2_int)
1677         int out_w_int;
1678         float offset = out_x1 - in_x1;
1679 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1681         out_x1_int = (int)out_x1;
1682         out_x2_int = MIN((int)ceil(out_x2), out_total);
1683         out_w_int = out_x2_int - out_x1_int;
1685         table = new transfer_table_f[out_w_int];
1686         bzero(table, sizeof(transfer_table_f) * out_w_int);
1689 //printf("OverlayFrame::translation_array_f 2 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1691         float in_x = in_x1;
1692         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1693         {
1694                 transfer_table_f *entry = &table[out_x - out_x1_int];
1696                 entry->in_x1 = (int)in_x;
1697                 entry->in_x2 = (int)in_x + 1;
1699 // Get fraction of output pixel to fill
1700                 entry->output_fraction = 1;
1702                 if(out_x1 > out_x)
1703                 {
1704                         entry->output_fraction -= out_x1 - out_x;
1705                 }
1707                 if(out_x2 < out_x + 1)
1708                 {
1709                         entry->output_fraction = (out_x2 - out_x);
1710                 }
1712 // Advance in_x until out_x_fraction is filled
1713                 float out_x_fraction = entry->output_fraction;
1714                 float in_x_fraction = floor(in_x + 1) - in_x;
1716                 if(out_x_fraction <= in_x_fraction)
1717                 {
1718                         entry->in_fraction1 = out_x_fraction;
1719                         entry->in_fraction2 = 0.0;
1720                         in_x += out_x_fraction;
1721                 }
1722                 else
1723                 {
1724                         entry->in_fraction1 = in_x_fraction;
1725                         in_x += out_x_fraction;
1726                         entry->in_fraction2 = in_x - floor(in_x);
1727                 }
1729 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1730                 if(entry->in_x2 >= in_total)
1731                 {
1732                         entry->in_x2 = in_total - 1;
1733                         entry->in_fraction2 = 0.0;
1734                 }
1735                 
1736                 if(entry->in_x1 >= in_total)
1737                 {
1738                         entry->in_x1 = in_total - 1;
1739                         entry->in_fraction1 = 0.0;
1740                 }
1741 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n", 
1742 //      out_x, 
1743 //      entry->in_x1, 
1744 //      entry->in_x2, 
1745 //      entry->in_fraction1, 
1746 //      entry->in_fraction2, 
1747 //      entry->output_fraction);
1748         }
1752 void TranslateUnit::translation_array_i(transfer_table_i* &table, 
1753         float out_x1, 
1754         float out_x2,
1755         float in_x1,
1756         float in_x2,
1757         int in_total, 
1758         int out_total, 
1759         int &out_x1_int,
1760         int &out_x2_int)
1762         int out_w_int;
1763         float offset = out_x1 - in_x1;
1765         out_x1_int = (int)out_x1;
1766         out_x2_int = MIN((int)ceil(out_x2), out_total);
1767         out_w_int = out_x2_int - out_x1_int;
1769         table = new transfer_table_i[out_w_int];
1770         bzero(table, sizeof(transfer_table_i) * out_w_int);
1773 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1775         float in_x = in_x1;
1776         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1777         {
1778                 transfer_table_i *entry = &table[out_x - out_x1_int];
1780                 entry->in_x1 = (int)in_x;
1781                 entry->in_x2 = (int)in_x + 1;
1783 // Get fraction of output pixel to fill
1784                 entry->output_fraction = 0x10000;
1786                 if(out_x1 > out_x)
1787                 {
1788                         entry->output_fraction -= (int)((out_x1 - out_x) * 0x10000);
1789                 }
1791                 if(out_x2 < out_x + 1)
1792                 {
1793                         entry->output_fraction = (int)((out_x2 - out_x) * 0x10000);
1794                 }
1796 // Advance in_x until out_x_fraction is filled
1797                 int out_x_fraction = entry->output_fraction;
1798                 int in_x_fraction = (int)((floor(in_x + 1) - in_x) * 0x10000);
1800                 if(out_x_fraction <= in_x_fraction)
1801                 {
1802                         entry->in_fraction1 = out_x_fraction;
1803                         entry->in_fraction2 = 0;
1804                         in_x += (float)out_x_fraction / 0x10000;
1805                 }
1806                 else
1807                 {
1808                         entry->in_fraction1 = in_x_fraction;
1809                         in_x += (float)out_x_fraction / 0x10000;
1810                         entry->in_fraction2 = (int)((in_x - floor(in_x)) * 0x10000);
1811                 }
1813 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1814                 if(entry->in_x2 >= in_total)
1815                 {
1816                         entry->in_x2 = in_total - 1;
1817                         entry->in_fraction2 = 0;
1818                 }
1820                 if(entry->in_x1 >= in_total)
1821                 {
1822                         entry->in_x1 = in_total - 1;
1823                         entry->in_fraction1 = 0;
1824                 }
1825 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n", 
1826 //      out_x, 
1827 //      entry->in_x1, 
1828 //      entry->in_x2, 
1829 //      entry->in_fraction1, 
1830 //      entry->in_fraction2, 
1831 //      entry->output_fraction);
1832         }
1868 #define TRANSLATE(max, temp_type, type, components, chroma_offset) \
1869 { \
1871         type **in_rows = (type**)input->get_rows(); \
1872         type **out_rows = (type**)output->get_rows(); \
1874 /* printf("OverlayFrame::translate 1  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",  */ \
1875 /*      (in_x1),  in_y1,  in_x2,  in_y2,  out_x1,  out_y1, out_x2,  out_y2); */ \
1877         temp_type master_opacity = (temp_type)(alpha * max + 0.5); \
1878         temp_type master_transparency = max - master_opacity; \
1880 /* printf("TRANSLATE %d\n", mode); */ \
1882         for(int i = row1; i < row2; i++) \
1883         { \
1884                 int in_y1; \
1885                 int in_y2; \
1886                 float y_fraction1_f; \
1887                 float y_fraction2_f; \
1888                 float y_output_fraction_f; \
1889                 uint64_t y_fraction1_i; \
1890                 uint64_t y_fraction2_i; \
1891                 uint64_t y_output_fraction_i; \
1892                 if(use_float) \
1893                 { \
1894                         in_y1 = y_table_f[i - out_y1_int].in_x1; \
1895                         in_y2 = y_table_f[i - out_y1_int].in_x2; \
1896                         y_fraction1_f = y_table_f[i - out_y1_int].in_fraction1; \
1897                         y_fraction2_f = y_table_f[i - out_y1_int].in_fraction2; \
1898                         y_output_fraction_f = y_table_f[i - out_y1_int].output_fraction; \
1899                 } \
1900                 else \
1901                 { \
1902                         in_y1 = y_table_i[i - out_y1_int].in_x1; \
1903                         in_y2 = y_table_i[i - out_y1_int].in_x2; \
1904                         y_fraction1_i = y_table_i[i - out_y1_int].in_fraction1; \
1905                         y_fraction2_i = y_table_i[i - out_y1_int].in_fraction2; \
1906                         y_output_fraction_i = y_table_i[i - out_y1_int].output_fraction; \
1907                 } \
1908                 type *in_row1 = in_rows[(in_y1)]; \
1909                 type *in_row2 = in_rows[(in_y2)]; \
1910                 type *out_row = out_rows[i]; \
1912                 for(int j = out_x1_int; j < out_x2_int; j++) \
1913                 { \
1914                         int in_x1; \
1915                         int in_x2; \
1916                         float x_fraction1_f; \
1917                         float x_fraction2_f; \
1918                         float x_output_fraction_f; \
1919                         uint64_t x_fraction1_i; \
1920                         uint64_t x_fraction2_i; \
1921                         uint64_t x_output_fraction_i; \
1922                         if(use_float) \
1923                         { \
1924                                 in_x1 = x_table_f[j - out_x1_int].in_x1; \
1925                                 in_x2 = x_table_f[j - out_x1_int].in_x2; \
1926                                 x_fraction1_f = x_table_f[j - out_x1_int].in_fraction1; \
1927                                 x_fraction2_f = x_table_f[j - out_x1_int].in_fraction2; \
1928                                 x_output_fraction_f = x_table_f[j - out_x1_int].output_fraction; \
1929                         } \
1930                         else \
1931                         { \
1932                                 in_x1 = x_table_i[j - out_x1_int].in_x1; \
1933                                 in_x2 = x_table_i[j - out_x1_int].in_x2; \
1934                                 x_fraction1_i = x_table_i[j - out_x1_int].in_fraction1; \
1935                                 x_fraction2_i = x_table_i[j - out_x1_int].in_fraction2; \
1936                                 x_output_fraction_i = x_table_i[j - out_x1_int].output_fraction; \
1937                         } \
1938                         type *output = &out_row[j * components]; \
1939                         type input1, input2, input3, input4; \
1941                         if(use_float) \
1942                         { \
1943                                 float fraction1 = x_fraction1_f * y_fraction1_f; \
1944                                 float fraction2 = x_fraction2_f * y_fraction1_f; \
1945                                 float fraction3 = x_fraction1_f * y_fraction2_f; \
1946                                 float fraction4 = x_fraction2_f * y_fraction2_f; \
1947          \
1948                                 input1 = (type)(in_row1[in_x1 * components] * fraction1 +  \
1949                                         in_row1[in_x2 * components] * fraction2 +  \
1950                                         in_row2[in_x1 * components] * fraction3 +  \
1951                                         in_row2[in_x2 * components] * fraction4 + 0.5); \
1952          \
1953 /* Add chroma to fractional pixels */ \
1954                                 if(chroma_offset) \
1955                                 { \
1956                                         float extra_chroma = (1.0F - \
1957                                                 fraction1 - \
1958                                                 fraction2 - \
1959                                                 fraction3 - \
1960                                                 fraction4) * chroma_offset; \
1961                                         input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 +  \
1962                                                 in_row1[in_x2 * components + 1] * fraction2 +  \
1963                                                 in_row2[in_x1 * components + 1] * fraction3 +  \
1964                                                 in_row2[in_x2 * components + 1] * fraction4 + \
1965                                                 extra_chroma + 0.5); \
1966                                         input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 +  \
1967                                                 in_row1[in_x2 * components + 2] * fraction2 +  \
1968                                                 in_row2[in_x1 * components + 2] * fraction3 +  \
1969                                                 in_row2[in_x2 * components + 2] * fraction4 +  \
1970                                                 extra_chroma + 0.5); \
1971                                 } \
1972                                 else \
1973                                 { \
1974                                         input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 +  \
1975                                                 in_row1[in_x2 * components + 1] * fraction2 +  \
1976                                                 in_row2[in_x1 * components + 1] * fraction3 +  \
1977                                                 in_row2[in_x2 * components + 1] * fraction4 + 0.5); \
1978                                         input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 +  \
1979                                                 in_row1[in_x2 * components + 2] * fraction2 +  \
1980                                                 in_row2[in_x1 * components + 2] * fraction3 +  \
1981                                                 in_row2[in_x2 * components + 2] * fraction4 + 0.5); \
1982                                 } \
1983          \
1984                                 if(components == 4) \
1985                                         input4 = (type)(in_row1[in_x1 * components + 3] * fraction1 +  \
1986                                                 in_row1[in_x2 * components + 3] * fraction2 +  \
1987                                                 in_row2[in_x1 * components + 3] * fraction3 +  \
1988                                                 in_row2[in_x2 * components + 3] * fraction4 + 0.5); \
1989                         } \
1990                         else \
1991                         { \
1992                                 uint64_t fraction1 = x_fraction1_i * y_fraction1_i; \
1993                                 uint64_t fraction2 = x_fraction2_i * y_fraction1_i; \
1994                                 uint64_t fraction3 = x_fraction1_i * y_fraction2_i; \
1995                                 uint64_t fraction4 = x_fraction2_i * y_fraction2_i; \
1996          \
1997                                 input1 = (type)((in_row1[in_x1 * components] * fraction1 +  \
1998                                         in_row1[in_x2 * components] * fraction2 +  \
1999                                         in_row2[in_x1 * components] * fraction3 +  \
2000                                         in_row2[in_x2 * components] * fraction4) / 0xffffffff); \
2001          \
2002 /* Add chroma to fractional pixels */ \
2003                                 if(chroma_offset) \
2004                                 { \
2005                                         uint64_t extra_chroma = (0xffffffff - \
2006                                                 fraction1 - \
2007                                                 fraction2 - \
2008                                                 fraction3 - \
2009                                                 fraction4) * \
2010                                                 chroma_offset; \
2011                                         input2 = (type)((in_row1[in_x1 * components + 1] * fraction1 +  \
2012                                                 in_row1[in_x2 * components + 1] * fraction2 +  \
2013                                                 in_row2[in_x1 * components + 1] * fraction3 +  \
2014                                                 in_row2[in_x2 * components + 1] * fraction4 + \
2015                                                 extra_chroma) / 0xffffffff); \
2016                                         input3 = (type)((in_row1[in_x1 * components + 2] * fraction1 +  \
2017                                                 in_row1[in_x2 * components + 2] * fraction2 +  \
2018                                                 in_row2[in_x1 * components + 2] * fraction3 +  \
2019                                                 in_row2[in_x2 * components + 2] * fraction4 +  \
2020                                                 extra_chroma) / 0xffffffff); \
2021                                 } \
2022                                 else \
2023                                 { \
2024                                         input2 = (type)((in_row1[in_x1 * components + 1] * fraction1 +  \
2025                                                 in_row1[in_x2 * components + 1] * fraction2 +  \
2026                                                 in_row2[in_x1 * components + 1] * fraction3 +  \
2027                                                 in_row2[in_x2 * components + 1] * fraction4) / 0xffffffff); \
2028                                         input3 = (type)((in_row1[in_x1 * components + 2] * fraction1 +  \
2029                                                 in_row1[in_x2 * components + 2] * fraction2 +  \
2030                                                 in_row2[in_x1 * components + 2] * fraction3 +  \
2031                                                 in_row2[in_x2 * components + 2] * fraction4) / 0xffffffff); \
2032                                 } \
2033          \
2034                                 if(components == 4) \
2035                                         input4 = (type)((in_row1[in_x1 * components + 3] * fraction1 +  \
2036                                                 in_row1[in_x2 * components + 3] * fraction2 +  \
2037                                                 in_row2[in_x1 * components + 3] * fraction3 +  \
2038                                                 in_row2[in_x2 * components + 3] * fraction4) / 0xffffffff); \
2039                         } \
2041                         temp_type opacity; \
2042                         if(use_float) \
2043                                 opacity = (temp_type)(master_opacity *  \
2044                                         y_output_fraction_f *  \
2045                                         x_output_fraction_f + 0.5); \
2046                         else \
2047                                 opacity = (temp_type)((int64_t)master_opacity *  \
2048                                         y_output_fraction_i *  \
2049                                         x_output_fraction_i / \
2050                                         0xffffffff); \
2051                         temp_type transparency = max - opacity; \
2053 /* printf("TRANSLATE 2 %x %d %d\n", opacity, j, i); */ \
2055                         if(components == 3) \
2056                         { \
2057                                 BLEND_3(max, temp_type, type, chroma_offset); \
2058                         } \
2059                         else \
2060                         { \
2061                                 BLEND_4(max, temp_type, type, chroma_offset); \
2062                         } \
2063                 } \
2064         } \
2067 void TranslateUnit::process_package(LoadPackage *package)
2069         TranslatePackage *pkg = (TranslatePackage*)package;
2070         int out_y1_int; 
2071         int out_y2_int; 
2072         int out_x1_int; 
2073         int out_x2_int; 
2076 // Variables for TRANSLATE
2077         VFrame *input = engine->translate_input;
2078         VFrame *output = engine->translate_output;
2079         float in_x1 = engine->translate_in_x1;
2080         float in_y1 = engine->translate_in_y1;
2081         float in_x2 = engine->translate_in_x2;
2082         float in_y2 = engine->translate_in_y2;
2083         float out_x1 = engine->translate_out_x1;
2084         float out_y1 = engine->translate_out_y1;
2085         float out_x2 = engine->translate_out_x2;
2086         float out_y2 = engine->translate_out_y2;
2087         float alpha = engine->translate_alpha;
2088         int row1 = pkg->out_row1;
2089         int row2 = pkg->out_row2;
2090         int mode = engine->translate_mode;
2091         int in_total_x = input->get_w();
2092         int in_total_y = input->get_h();
2093         int do_yuv = 
2094                 (engine->translate_input->get_color_model() == BC_YUV888 ||
2095                 engine->translate_input->get_color_model() == BC_YUVA8888 ||
2096                 engine->translate_input->get_color_model() == BC_YUV161616 ||
2097                 engine->translate_input->get_color_model() == BC_YUVA16161616);
2099         transfer_table_f *x_table_f; 
2100         transfer_table_f *y_table_f; 
2101         transfer_table_i *x_table_i; 
2102         transfer_table_i *y_table_i; 
2104         if(use_float)
2105         {
2106                 translation_array_f(x_table_f,  
2107                         out_x1,  
2108                         out_x2, 
2109                         in_x1, 
2110                         in_x2, 
2111                         in_total_x,  
2112                         output->get_w(),  
2113                         out_x1_int, 
2114                         out_x2_int); 
2115                 translation_array_f(y_table_f,  
2116                         out_y1,  
2117                         out_y2, 
2118                         in_y1, 
2119                         in_y2, 
2120                         in_total_y,  
2121                         output->get_h(),  
2122                         out_y1_int, 
2123                         out_y2_int); 
2124         }
2125         else
2126         {
2127                 translation_array_i(x_table_i,  
2128                         out_x1,  
2129                         out_x2, 
2130                         in_x1, 
2131                         in_x2, 
2132                         in_total_x,  
2133                         output->get_w(),  
2134                         out_x1_int, 
2135                         out_x2_int); 
2136                 translation_array_i(y_table_i,  
2137                         out_y1,  
2138                         out_y2, 
2139                         in_y1, 
2140                         in_y2, 
2141                         in_total_y,  
2142                         output->get_h(),  
2143                         out_y1_int, 
2144                         out_y2_int); 
2145         }
2146 //      printf("TranslateUnit::process_package 1 %d\n", mode);
2147 //      Timer a;
2148 //      a.update();
2150         switch(engine->translate_input->get_color_model())
2151         {
2152                 case BC_RGB888:
2153                         TRANSLATE(0xff, uint32_t, unsigned char, 3, 0);
2154                         break;
2156                 case BC_RGBA8888:
2157                         TRANSLATE(0xff, uint32_t, unsigned char, 4, 0);
2158                         break;
2160                 case BC_RGB161616:
2161                         TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2162                         break;
2164                 case BC_RGBA16161616:
2165                         TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2166                         break;
2168                 case BC_YUV888:
2169                         TRANSLATE(0xff, int32_t, unsigned char, 3, 0x80);
2170                         break;
2172                 case BC_YUVA8888:
2173                         TRANSLATE(0xff, int32_t, unsigned char, 4, 0x80);
2174                         break;
2176                 case BC_YUV161616:
2177                         TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2178                         break;
2180                 case BC_YUVA16161616:
2181                         TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2182                         break;
2183         }
2184 //      printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2186         if(use_float)
2187         {
2188                 delete [] x_table_f; 
2189                 delete [] y_table_f; 
2190         }
2191         else
2192         {
2193                 delete [] x_table_i; 
2194                 delete [] y_table_i; 
2195         }
2207 TranslateEngine::TranslateEngine(OverlayFrame *overlay, int cpus)
2208  : LoadServer(cpus, cpus)
2210         this->overlay = overlay;
2213 TranslateEngine::~TranslateEngine()
2217 void TranslateEngine::init_packages()
2219         int out_y1_int = (int)translate_out_y1;
2220         int out_y2_int = MIN((int)ceil(translate_out_y2), translate_output->get_h());
2221         int out_h = out_y2_int - out_y1_int;
2223         for(int i = 0; i < total_packages; i++)
2224         {
2225                 TranslatePackage *package = (TranslatePackage*)packages[i];
2226                 package->out_row1 = (int)(out_y1_int + out_h / 
2227                         total_packages * 
2228                         i);
2229                 package->out_row2 = (int)((float)package->out_row1 + 
2230                         out_h / 
2231                         total_packages);
2232                 if(i >= total_packages - 1)
2233                         package->out_row2 = out_y2_int;
2234         }
2237 LoadClient* TranslateEngine::new_client()
2239         return new TranslateUnit(this, overlay);
2242 LoadPackage* TranslateEngine::new_package()
2244         return new TranslatePackage;
2254 #define SCALE_TRANSLATE(max, temp_type, type, components, chroma_offset) \
2255 { \
2256         temp_type opacity = (temp_type)(alpha * max + 0.5); \
2257         temp_type transparency = max - opacity; \
2258         int out_w = out_x2 - out_x1; \
2260         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2261         { \
2262                 int in_y = y_table[i - out_y1]; \
2263                 type *in_row = (type*)in_rows[in_y] + in_x1 * components; \
2264                 type *output = (type*)out_rows[i] + out_x1 * components; \
2266 /* X direction is scaled and requires a table lookup */ \
2267                 if(out_w != in_x2 - in_x1) \
2268                 { \
2269                         for(int j = 0; j < out_w; j++) \
2270                         { \
2271                                 type *in_row_plus_x = in_row + x_table[j] * components; \
2272                                 temp_type input1, input2, input3, input4; \
2273          \
2274                                 input1 = in_row_plus_x[0]; \
2275                                 input2 = in_row_plus_x[1]; \
2276                                 input3 = in_row_plus_x[2]; \
2277                                 if(components == 4) \
2278                                         input4 = in_row_plus_x[3]; \
2279          \
2280                                 if(components == 3) \
2281                                 { \
2282                                         BLEND_3(max, temp_type, type, chroma_offset); \
2283                                 } \
2284                                 else \
2285                                 { \
2286                                         BLEND_4(max, temp_type, type, chroma_offset); \
2287                                 } \
2288                                 output += components; \
2289                         } \
2290                 } \
2291                 else \
2292 /* X direction is not scaled */ \
2293                 { \
2294                         for(int j = 0; j < out_w; j++) \
2295                         { \
2296                                 temp_type input1, input2, input3, input4; \
2297          \
2298                                 input1 = in_row[0]; \
2299                                 input2 = in_row[1]; \
2300                                 input3 = in_row[2]; \
2301                                 if(components == 4) \
2302                                         input4 = in_row[3]; \
2303          \
2304                                 if(components == 3) \
2305                                 { \
2306                                         BLEND_3(max, temp_type, type, chroma_offset); \
2307                                 } \
2308                                 else \
2309                                 { \
2310                                         BLEND_4(max, temp_type, type, chroma_offset); \
2311                                 } \
2312                                 in_row += components; \
2313                                 output += components; \
2314                         } \
2315                 } \
2316         } \
2321 ScaleTranslateUnit::ScaleTranslateUnit(ScaleTranslateEngine *server, OverlayFrame *overlay)
2322  : LoadClient(server)
2324         this->overlay = overlay;
2325         this->scale_translate = server;
2328 ScaleTranslateUnit::~ScaleTranslateUnit()
2332 void ScaleTranslateUnit::scale_array(int* &table, 
2333         int out_x1, 
2334         int out_x2,
2335         int in_x1,
2336         int in_x2,
2337         int is_x)
2339         float scale = (float)(out_x2 - out_x1) / (in_x2 - in_x1);
2341         table = new int[out_x2 - out_x1];
2342         
2343         if(!is_x)
2344         {
2345                 for(int i = 0; i < out_x2 - out_x1; i++)
2346                 {
2347                         table[i] = (int)((float)i / scale + in_x1);
2348                 }
2349         }
2350         else
2351         {       
2352                 for(int i = 0; i < out_x2 - out_x1; i++)
2353                 {
2354                         table[i] = (int)((float)i / scale);
2355                 }
2356         }
2360 void ScaleTranslateUnit::process_package(LoadPackage *package)
2362         ScaleTranslatePackage *pkg = (ScaleTranslatePackage*)package;
2364 // Args for NEAREST_NEIGHBOR_MACRO
2365         VFrame *output = scale_translate->output;
2366         VFrame *input = scale_translate->input;
2367         int in_x1 = scale_translate->in_x1;
2368         int in_y1 = scale_translate->in_y1;
2369         int in_x2 = scale_translate->in_x2;
2370         int in_y2 = scale_translate->in_y2;
2371         int out_x1 = scale_translate->out_x1;
2372         int out_y1 = scale_translate->out_y1;
2373         int out_x2 = scale_translate->out_x2;
2374         int out_y2 = scale_translate->out_y2;
2375         float alpha = scale_translate->alpha;
2376         int mode = scale_translate->mode;
2378         int *x_table;
2379         int *y_table;
2380         unsigned char **in_rows = input->get_rows();
2381         unsigned char **out_rows = output->get_rows();
2383 //      Timer a;
2384 //      a.update();
2385 //printf("ScaleTranslateUnit::process_package 1 %d\n", mode);
2386         if(out_x2 - out_x1 != in_x2 - in_x1)
2387         {
2388                 scale_array(x_table, 
2389                         out_x1, 
2390                         out_x2,
2391                         in_x1,
2392                         in_x2,
2393                         1);
2394         }
2395         scale_array(y_table, 
2396                 out_y1, 
2397                 out_y2,
2398                 in_y1,
2399                 in_y2,
2400                 0);
2403         switch(input->get_color_model())
2404         {
2405                 case BC_RGB888:
2406                         SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 3, 0);
2407                         break;
2409                 case BC_YUV888:
2410                         SCALE_TRANSLATE(0xff, int32_t, uint8_t, 3, 0x80);
2411                         break;
2413                 case BC_RGBA8888:
2414                         SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 4, 0);
2415                         break;
2417                 case BC_YUVA8888:
2418                         SCALE_TRANSLATE(0xff, int32_t, uint8_t, 4, 0x80);
2419                         break;
2422                 case BC_RGB161616:
2423                         SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2424                         break;
2426                 case BC_YUV161616:
2427                         SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2428                         break;
2430                 case BC_RGBA16161616:
2431                         SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2432                         break;
2434                 case BC_YUVA16161616:
2435                         SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2436                         break;
2437         }
2438         
2439 //printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2440         if(out_x2 - out_x1 != in_x2 - in_x1)
2441                 delete [] x_table;
2442         delete [] y_table;
2454 ScaleTranslateEngine::ScaleTranslateEngine(OverlayFrame *overlay, int cpus)
2455  : LoadServer(cpus, cpus)
2457         this->overlay = overlay;
2460 ScaleTranslateEngine::~ScaleTranslateEngine()
2464 void ScaleTranslateEngine::init_packages()
2466         int out_h = out_y2 - out_y1;
2468         for(int i = 0; i < total_packages; i++)
2469         {
2470                 ScaleTranslatePackage *package = (ScaleTranslatePackage*)packages[i];
2471                 package->out_row1 = (int)(out_y1 + out_h / 
2472                         total_packages * 
2473                         i);
2474                 package->out_row2 = (int)((float)package->out_row1 + 
2475                         out_h / 
2476                         total_packages);
2477                 if(i >= total_packages - 1)
2478                         package->out_row2 = out_y2;
2479         }
2482 LoadClient* ScaleTranslateEngine::new_client()
2484         return new ScaleTranslateUnit(this, overlay);
2487 LoadPackage* ScaleTranslateEngine::new_package()
2489         return new ScaleTranslatePackage;
2493 ScaleTranslatePackage::ScaleTranslatePackage()
2524 #define BLEND_ONLY(temp_type, type, max, components, chroma_offset) \
2525 { \
2526         temp_type opacity = (temp_type)(alpha * max + 0.5); \
2527         temp_type transparency = max - opacity; \
2529         type** output_rows = (type**)output->get_rows(); \
2530         type** input_rows = (type**)input->get_rows(); \
2531         int w = input->get_w(); \
2532         int h = input->get_h(); \
2534         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2535         { \
2536                 type* in_row = input_rows[i]; \
2537                 type* output = output_rows[i]; \
2539                 for(int j = 0; j < w; j++) \
2540                 { \
2541                         temp_type input1, input2, input3, input4; \
2542                         input1 = in_row[0]; \
2543                         input2 = in_row[1]; \
2544                         input3 = in_row[2]; \
2545                         if(components == 4) input4 = in_row[3]; \
2548                         if(components == 3) \
2549                         { \
2550                                 BLEND_3(max, temp_type, type, chroma_offset); \
2551                         } \
2552                         else \
2553                         { \
2554                                 BLEND_4(max, temp_type, type, chroma_offset); \
2555                         } \
2557                         in_row += components; \
2558                         output += components; \
2559                 } \
2560         } \
2564 #define BLEND_ONLY_TRANSFER_REPLACE(type, components) \
2565 { \
2567         type** output_rows = (type**)output->get_rows(); \
2568         type** input_rows = (type**)input->get_rows(); \
2569         int w = input->get_w(); \
2570         int h = input->get_h(); \
2571         int line_len = w * sizeof(type) * components; \
2573         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2574         { \
2575                 memcpy(output_rows[i], input_rows[i], line_len); \
2576         } \
2579 // components is always 4
2580 #define BLEND_ONLY_4_NORMAL(temp_type, type, chroma_offset, maxbits) \
2581 { \
2582         temp_type opacity = (temp_type)(alpha * (((temp_type) 1) << maxbits) + 0.5); \
2583         temp_type maxsq = ((temp_type) 1) << (maxbits * 2) ; \
2585         type** output_rows = (type**)output->get_rows(); \
2586         type** input_rows = (type**)input->get_rows(); \
2587         int w = input->get_w(); \
2588         int h = input->get_h(); \
2590         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2591         { \
2592                 type* in_row = input_rows[i]; \
2593                 type* output = output_rows[i]; \
2595                 for(int j = 0; j < w; j++) \
2596                 { \
2597                         temp_type pixel_opacity, pixel_transparency; \
2598                         pixel_opacity = opacity * in_row[3]; \
2599                         pixel_transparency = (temp_type)maxsq - pixel_opacity; \
2600                  \
2601                  \
2602                         output[0] = (type)(((temp_type)in_row[0] * pixel_opacity + \
2603                                 (temp_type)output[0] * pixel_transparency) >> (maxbits * 2)); \
2604                         output[1] = (type)(((((temp_type)in_row[1] - chroma_offset) * pixel_opacity + \
2605                                 ((temp_type)output[1] - chroma_offset) * pixel_transparency) \
2606                                 >> (maxbits * 2)) + \
2607                                 chroma_offset); \
2608                         output[2] = (type)(((((temp_type)in_row[2] - chroma_offset) * pixel_opacity + \
2609                                 ((temp_type)output[2] - chroma_offset) * pixel_transparency) \
2610                                 >> (maxbits * 2)) + \
2611                                 chroma_offset); \
2612                         output[3] = (type)(in_row[3] > output[3] ? in_row[3] : output[3]); \
2614                         in_row += 4; \
2615                         output += 4; \
2616                 } \
2617         } \
2620 // components is always 3
2621 #define BLEND_ONLY_3_NORMAL(temp_type, type, chroma_offset, maxbits) \
2622 { \
2623         temp_type opacity = (temp_type)(alpha * (((temp_type) 1) << maxbits) + 0.5); \
2624         temp_type transparency = (((temp_type) 1) << maxbits) - opacity; \
2626         type** output_rows = (type**)output->get_rows(); \
2627         type** input_rows = (type**)input->get_rows(); \
2628         int w = input->get_w() * 3; \
2629         int h = input->get_h(); \
2631         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2632         { \
2633                 type* in_row = input_rows[i]; \
2634                 type* output = output_rows[i]; \
2636                 for(int j = 0; j < w; j++) /* w = 3x width! */ \
2637                 { \
2638                         *output = (type)((temp_type)*in_row * opacity + *output * transparency) >> maxbits; \
2639                         in_row ++; \
2640                         output ++; \
2641                 } \
2642         } \
2646 BlendUnit::BlendUnit(BlendEngine *server, OverlayFrame *overlay)
2647  : LoadClient(server)
2649         this->overlay = overlay;
2650         this->blend_engine = server;
2653 BlendUnit::~BlendUnit()
2657 void BlendUnit::process_package(LoadPackage *package)
2659         BlendPackage *pkg = (BlendPackage*)package;
2662         VFrame *output = blend_engine->output;
2663         VFrame *input = blend_engine->input;
2664         float alpha = blend_engine->alpha;
2665         if (alpha > 1.0) alpha = 1.0;
2666         int mode = blend_engine->mode;
2668         if (mode == TRANSFER_REPLACE) 
2669         {
2670                 switch(input->get_color_model())
2671                 {
2672                         case BC_RGB888:
2673                         case BC_YUV888:
2674                                 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 3);
2675                                 break;
2676                         case BC_RGBA8888:
2677                         case BC_YUVA8888:
2678                                 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 4);
2679                                 break;
2680                         case BC_RGB161616:
2681                         case BC_YUV161616:
2682                                 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 3);
2683                                 break;
2684                         case BC_RGBA16161616:
2685                         case BC_YUVA16161616:
2686                                 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 4);
2687                                 break;
2688                 }
2689         } 
2690         else
2691         if (mode == TRANSFER_NORMAL) 
2692         {
2693                 switch(input->get_color_model())
2694                 {
2695                         case BC_RGB888:
2696                                 BLEND_ONLY_3_NORMAL(uint32_t, unsigned char, 0, 8);
2697                                 break;
2698                         case BC_YUV888:
2699                                 BLEND_ONLY_3_NORMAL(int32_t, unsigned char, 0x80, 8);
2700                                 break;
2701                         case BC_RGBA8888:
2702                                 BLEND_ONLY_4_NORMAL(uint32_t, unsigned char, 0, 8);
2703                                 break;
2704                         case BC_YUVA8888:
2705                                 BLEND_ONLY_4_NORMAL(int32_t, unsigned char, 0x80, 8);
2706                                 break;
2707                         case BC_RGB161616:
2708                                 BLEND_ONLY_3_NORMAL(uint64_t, uint16_t, 0, 16);
2709                                 break;
2710                         case BC_YUV161616:
2711                                 BLEND_ONLY_3_NORMAL(int64_t, uint16_t, 0x8000, 16);
2712                                 break;
2713                         case BC_RGBA16161616:
2714                                 BLEND_ONLY_4_NORMAL(uint64_t, uint16_t, 0, 16);
2715                                 break;
2716                         case BC_YUVA16161616:
2717                                 BLEND_ONLY_4_NORMAL(int64_t, uint16_t, 0x8000, 16);
2718                                 break;
2719                 }
2720         } 
2721         else
2722         switch(input->get_color_model())
2723         {
2724                 case BC_RGB888:
2725                         BLEND_ONLY(uint32_t, unsigned char, 0xff, 3, 0);
2726                         break;
2727                 case BC_YUV888:
2728                         BLEND_ONLY(int32_t, unsigned char, 0xff, 3, 0x80);
2729                         break;
2730                 case BC_RGBA8888:
2731                         BLEND_ONLY(uint32_t, unsigned char, 0xff, 4, 0);
2732                         break;
2733                 case BC_YUVA8888:
2734                         BLEND_ONLY(int32_t, unsigned char, 0xff, 4, 0x80);
2735                         break;
2736                 case BC_RGB161616:
2737                         BLEND_ONLY(uint64_t, uint16_t, 0xffff, 3, 0);
2738                         break;
2739                 case BC_YUV161616:
2740                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 3, 0x8000);
2741                         break;
2742                 case BC_RGBA16161616:
2743                         BLEND_ONLY(uint64_t, uint16_t, 0xffff, 4, 0);
2744                         break;
2745                 case BC_YUVA16161616:
2746                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 4, 0x8000);
2747                         break;
2748         }
2753 BlendEngine::BlendEngine(OverlayFrame *overlay, int cpus)
2754  : LoadServer(cpus, cpus)
2756         this->overlay = overlay;
2759 BlendEngine::~BlendEngine()
2763 void BlendEngine::init_packages()
2765         for(int i = 0; i < total_packages; i++)
2766         {
2767                 BlendPackage *package = (BlendPackage*)packages[i];
2768                 package->out_row1 = (int)(input->get_h() / 
2769                         total_packages * 
2770                         i);
2771                 package->out_row2 = (int)((float)package->out_row1 +
2772                         input->get_h() / 
2773                         total_packages);
2775                 if(i >= total_packages - 1)
2776                         package->out_row2 = input->get_h();
2777         }
2780 LoadClient* BlendEngine::new_client()
2782         return new BlendUnit(this, overlay);
2785 LoadPackage* BlendEngine::new_package()
2787         return new BlendPackage;
2791 BlendPackage::BlendPackage()