r793: Small API addon, so plugins can 'see' camera and projector automation
[cinelerra_cv/mob.git] / cinelerra / overlayframe.C
blob2124a51d3b59eadfb31b7eb394db326e0192c27d
1 #include <math.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include <stdint.h>
5 #include <stdlib.h>
6 #include <unistd.h>
8 #include "clip.h"
9 #include "edl.inc"
10 #include "mutex.h"
11 #include "overlayframe.h"
12 #include "units.h"
13 #include "vframe.h"
15 // Easy abstraction of the float and int types.  Most of these are never used
16 // but GCC expects them.
17 static int my_abs(int32_t x)
19         return abs(x);
22 static int my_abs(uint32_t x)
24         return x;
27 static int my_abs(int64_t x)
29         return llabs(x);
32 static int my_abs(uint64_t x)
34         return x;
37 static float my_abs(float x)
39         return fabsf(x);
45 OverlayFrame::OverlayFrame(int cpus)
47         temp_frame = 0;
48         blend_engine = 0;
49         scale_engine = 0;
50         scaletranslate_engine = 0;
51         translate_engine = 0;
52         this->cpus = cpus;
55 OverlayFrame::~OverlayFrame()
57         if(temp_frame) delete temp_frame;
58         if(scale_engine) delete scale_engine;
59         if(translate_engine) delete translate_engine;
60         if(blend_engine) delete blend_engine;
61         if(scaletranslate_engine) delete scaletranslate_engine;
71 // Verification: 
73 // (255 * 255 + 0 * 0) / 255 = 255
74 // (255 * 127 + 255 * (255 - 127)) / 255 = 255
76 // (65535 * 65535 + 0 * 0) / 65535 = 65535
77 // (65535 * 32767 + 65535 * (65535 - 32767)) / 65535 = 65535
80 // Branch prediction 4 U
82 #define BLEND_3(max, temp_type, type, chroma_offset) \
83 { \
84         temp_type r, g, b; \
85  \
86 /* if(mode != TRANSFER_NORMAL) printf("BLEND mode = %d\n", mode); */ \
87         switch(mode) \
88         { \
89                 case TRANSFER_DIVIDE: \
90                         r = output[0] ? (((temp_type)input1 * max) / output[0]) : max; \
91                         if(chroma_offset) \
92                         { \
93                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
94                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
95                         } \
96                         else \
97                         { \
98                                 g = output[1] ? (temp_type)input2 * max / (temp_type)output[1] : max; \
99                                 b = output[2] ? (temp_type)input3 * max / (temp_type)output[2] : max; \
100                         } \
101                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
102                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
103                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
104                         break; \
105                 case TRANSFER_MULTIPLY: \
106                         r = ((temp_type)input1 * output[0]) / max; \
107                         if(chroma_offset) \
108                         { \
109                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
110                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
111                         } \
112                         else \
113                         { \
114                                 g = (temp_type)input2 * (temp_type)output[1] / max; \
115                                 b = (temp_type)input3 * (temp_type)output[2] / max; \
116                         } \
117                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
118                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
119                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
120                         break; \
121                 case TRANSFER_SUBTRACT: \
122                         r = (temp_type)output[0] - (temp_type)input1; \
123                         g = ((temp_type)output[1] - (temp_type)chroma_offset) - \
124                                 ((temp_type)input2 - (temp_type)chroma_offset) + \
125                                 (temp_type)chroma_offset; \
126                         b = ((temp_type)output[2] - (temp_type)chroma_offset) - \
127                                 ((temp_type)input3 - (temp_type)chroma_offset) + \
128                                 (temp_type)chroma_offset; \
129                         r = (r * opacity + output[0] * transparency) / max; \
130                         g = (g * opacity + output[1] * transparency) / max; \
131                         b = (b * opacity + output[2] * transparency) / max; \
132                         break; \
133                 case TRANSFER_ADDITION: \
134                         r = (temp_type)input1 + output[0]; \
135                         g = ((temp_type)input2 - chroma_offset) + \
136                                 ((temp_type)output[1] - chroma_offset) + \
137                                 (temp_type)chroma_offset; \
138                         b = ((temp_type)input3 - chroma_offset) + \
139                                 ((temp_type)output[2] - chroma_offset) + \
140                                 (temp_type)chroma_offset; \
141                         r = (r * opacity + output[0] * transparency) / max; \
142                         g = (g * opacity + output[1] * transparency) / max; \
143                         b = (b * opacity + output[2] * transparency) / max; \
144                         break; \
145                 case TRANSFER_REPLACE: \
146                         r = input1; \
147                         g = input2; \
148                         b = input3; \
149                         break; \
150                 case TRANSFER_NORMAL: \
151                         r = ((temp_type)input1 * opacity + output[0] * transparency) / max; \
152                         g = ((temp_type)input2 * opacity + output[1] * transparency) / max; \
153                         b = ((temp_type)input3 * opacity + output[2] * transparency) / max; \
154                         break; \
155         } \
157         if(sizeof(type) != 4) \
158         { \
159                 output[0] = (type)CLIP(r, 0, max); \
160                 output[1] = (type)CLIP(g, 0, max); \
161                 output[2] = (type)CLIP(b, 0, max); \
162         } \
163         else \
164         { \
165                 output[0] = r; \
166                 output[1] = g; \
167                 output[2] = b; \
168         } \
175 // Blending equations are drastically different for 3 and 4 components
176 #define BLEND_4(max, temp_type, type, chroma_offset) \
177 { \
178         temp_type r, g, b, a; \
179         temp_type pixel_opacity, pixel_transparency; \
180         temp_type output1 = output[0]; \
181         temp_type output2 = output[1]; \
182         temp_type output3 = output[2]; \
183         temp_type output4 = output[3]; \
185         pixel_opacity = opacity * input4; \
186         pixel_transparency = (temp_type)max * max - pixel_opacity; \
188         switch(mode) \
189         { \
190                 case TRANSFER_DIVIDE: \
191                         r = output1 ? (((temp_type)input1 * max) / output1) : max; \
192                         if(chroma_offset) \
193                         { \
194                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
195                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
196                         } \
197                         else \
198                         { \
199                                 g = output2 ? (temp_type)input2 * max / (temp_type)output2 : max; \
200                                 b = output3 ? (temp_type)input3 * max / (temp_type)output3 : max; \
201                         } \
202                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
203                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
204                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
205                         a = input4 > output4 ? input4 : output4; \
206                         break; \
207                 case TRANSFER_MULTIPLY: \
208                         r = ((temp_type)input1 * output1) / max; \
209                         if(chroma_offset) \
210                         { \
211                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
212                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
213                         } \
214                         else \
215                         { \
216                                 g = (temp_type)input2 * (temp_type)output2 / max; \
217                                 b = (temp_type)input3 * (temp_type)output3 / max; \
218                         } \
219                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
220                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
221                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
222                         a = input4 > output4 ? input4 : output4; \
223                         break; \
224                 case TRANSFER_SUBTRACT: \
225                         r = (temp_type)input1 - output1; \
226                         g = ((temp_type)output2 - chroma_offset) - \
227                                 ((temp_type)input2 - (temp_type)chroma_offset) + \
228                                 (temp_type)chroma_offset; \
229                         b = ((temp_type)output3 - chroma_offset) - \
230                                 ((temp_type)input3 - (temp_type)chroma_offset) + \
231                                 (temp_type)chroma_offset; \
232                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
233                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
234                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
235                         a = input4 > output4 ? input4 : output4; \
236                         break; \
237                 case TRANSFER_ADDITION: \
238                         r = (temp_type)input1 + output1; \
239                         g = ((temp_type)input2 - chroma_offset) + \
240                                 ((temp_type)output2 - chroma_offset) + \
241                                 chroma_offset; \
242                         b = ((temp_type)input3 - chroma_offset) + \
243                                 ((temp_type)output3 - chroma_offset) + \
244                                 chroma_offset; \
245                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
246                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
247                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
248                         a = input4 > output4 ? input4 : output4; \
249                         break; \
250                 case TRANSFER_REPLACE: \
251                         r = input1; \
252                         g = input2; \
253                         b = input3; \
254                         a = input4; \
255                         break; \
256                 case TRANSFER_NORMAL: \
257                         r = (input1 * pixel_opacity + \
258                                 output1 * pixel_transparency) / max / max; \
259                         g = ((input2 - chroma_offset) * pixel_opacity + \
260                                 (output2 - chroma_offset) * pixel_transparency) \
261                                 / max / max + \
262                                 chroma_offset; \
263                         b = ((input3 - chroma_offset) * pixel_opacity + \
264                                 (output3 - chroma_offset) * pixel_transparency) \
265                                 / max / max + \
266                                 chroma_offset; \
267                         a = input4 > output4 ? input4 : output4; \
268                         break; \
269         } \
271         if(sizeof(type) != 4) \
272         { \
273                 output[0] = (type)CLIP(r, 0, max); \
274                 output[1] = (type)CLIP(g, 0, max); \
275                 output[2] = (type)CLIP(b, 0, max); \
276                 output[3] = (type)a; \
277         } \
278         else \
279         { \
280                 output[0] = r; \
281                 output[1] = g; \
282                 output[2] = b; \
283                 output[3] = a; \
284         } \
289 // Bicubic algorithm using multiprocessors
290 // input -> scale nearest integer boundaries -> temp -> translation -> blend -> output
292 // Nearest neighbor algorithm using multiprocessors for blending
293 // input -> scale + translate -> blend -> output
296 int OverlayFrame::overlay(VFrame *output, 
297         VFrame *input, 
298         float in_x1, 
299         float in_y1, 
300         float in_x2, 
301         float in_y2, 
302         float out_x1, 
303         float out_y1, 
304         float out_x2, 
305         float out_y2, 
306         float alpha,       // 0 - 1
307         int mode,
308         int interpolation_type)
310         float w_scale = (out_x2 - out_x1) / (in_x2 - in_x1);
311         float h_scale = (out_y2 - out_y1) / (in_y2 - in_y1);
320         if(isnan(in_x1) ||
321                 isnan(in_y1) ||
322                 isnan(in_x2) ||
323                 isnan(in_y2) ||
324                 isnan(out_x1) ||
325                 isnan(out_y1) ||
326                 isnan(out_x2) ||
327                 isnan(out_y2)) return 1;
328 // printf("OverlayFrame::overlay 1 %f %f %f %f -> %f %f %f %f scale=%f %f\n", in_x1,
329 // in_y1,
330 // in_x2,
331 // in_y2,
332 // out_x1,
333 // out_y1,
334 // out_x2,
335 // out_y2,
336 // out_x2 - out_x1, 
337 // out_y2 - out_y1);
339 // Limit values
340         if(in_x1 < 0)
341         {
342                 out_x1 += -in_x1 * w_scale;
343                 in_x1 = 0;
344         }
345         else
346         if(in_x1 >= input->get_w())
347         {
348                 out_x1 -= (in_x1 - input->get_w()) * w_scale;
349                 in_x1 = input->get_w();
350         }
352         if(in_y1 < 0)
353         {
354                 out_y1 += -in_y1 * h_scale;
355                 in_y1 = 0;
356         }
357         else
358         if(in_y1 >= input->get_h())
359         {
360                 out_y1 -= (in_y1 - input->get_h()) * h_scale;
361                 in_y1 = input->get_h();
362         }
364         if(in_x2 < 0)
365         {
366                 out_x2 += -in_x2 * w_scale;
367                 in_x2 = 0;
368         }
369         else
370         if(in_x2 >= input->get_w())
371         {
372                 out_x2 -= (in_x2 - input->get_w()) * w_scale;
373                 in_x2 = input->get_w();
374         }
376         if(in_y2 < 0)
377         {
378                 out_y2 += -in_y2 * h_scale;
379                 in_y2 = 0;
380         }
381         else
382         if(in_y2 >= input->get_h())
383         {
384                 out_y2 -= (in_y2 - input->get_h()) * h_scale;
385                 in_y2 = input->get_h();
386         }
388         if(out_x1 < 0)
389         {
390                 in_x1 += -out_x1 / w_scale;
391                 out_x1 = 0;
392         }
393         else
394         if(out_x1 >= output->get_w())
395         {
396                 in_x1 -= (out_x1 - output->get_w()) / w_scale;
397                 out_x1 = output->get_w();
398         }
400         if(out_y1 < 0)
401         {
402                 in_y1 += -out_y1 / h_scale;
403                 out_y1 = 0;
404         }
405         else
406         if(out_y1 >= output->get_h())
407         {
408                 in_y1 -= (out_y1 - output->get_h()) / h_scale;
409                 out_y1 = output->get_h();
410         }
412         if(out_x2 < 0)
413         {
414                 in_x2 += -out_x2 / w_scale;
415                 out_x2 = 0;
416         }
417         else
418         if(out_x2 >= output->get_w())
419         {
420                 in_x2 -= (out_x2 - output->get_w()) / w_scale;
421                 out_x2 = output->get_w();
422         }
424         if(out_y2 < 0)
425         {
426                 in_y2 += -out_y2 / h_scale;
427                 out_y2 = 0;
428         }
429         else
430         if(out_y2 >= output->get_h())
431         {
432                 in_y2 -= (out_y2 - output->get_h()) / h_scale;
433                 out_y2 = output->get_h();
434         }
445         float in_w = in_x2 - in_x1;
446         float in_h = in_y2 - in_y1;
447         float out_w = out_x2 - out_x1;
448         float out_h = out_y2 - out_y1;
449 // Input for translation operation
450         VFrame *translation_input = input;
453         if(in_w <= 0 || in_h <= 0 || out_w <= 0 || out_h <= 0) return 0;
456 // printf("OverlayFrame::overlay 2 %f %f %f %f -> %f %f %f %f\n", in_x1,
457 //                      in_y1,
458 //                      in_x2,
459 //                      in_y2,
460 //                      out_x1,
461 //                      out_y1,
462 //                      out_x2,
463 //                      out_y2);
469 // ****************************************************************************
470 // Transfer to temp buffer by scaling nearest integer boundaries
471 // ****************************************************************************
472         if(interpolation_type != NEAREST_NEIGHBOR &&
473                 (!EQUIV(w_scale, 1) || !EQUIV(h_scale, 1)))
474         {
475 // Create integer boundaries for interpolation
476                 int in_x1_int = (int)in_x1;
477                 int in_y1_int = (int)in_y1;
478                 int in_x2_int = MIN((int)ceil(in_x2), input->get_w());
479                 int in_y2_int = MIN((int)ceil(in_y2), input->get_h());
480                 int out_x1_int = (int)out_x1;
481                 int out_y1_int = (int)out_y1;
482                 int out_x2_int = MIN((int)ceil(out_x2), output->get_w());
483                 int out_y2_int = MIN((int)ceil(out_y2), output->get_h());
485 // Dimensions of temp frame.  Integer boundaries scaled.
486                 int temp_w = (out_x2_int - out_x1_int);
487                 int temp_h = (out_y2_int - out_y1_int);
488                 VFrame *scale_output;
492 #define NO_TRANSLATION1 \
493         (EQUIV(in_x1, 0) && \
494         EQUIV(in_y1, 0) && \
495         EQUIV(out_x1, 0) && \
496         EQUIV(out_y1, 0) && \
497         EQUIV(in_x2, in_x2_int) && \
498         EQUIV(in_y2, in_y2_int) && \
499         EQUIV(out_x2, temp_w) && \
500         EQUIV(out_y2, temp_h))
503 #define NO_BLEND \
504         (EQUIV(alpha, 1) && \
505         (mode == TRANSFER_REPLACE || \
506         (mode == TRANSFER_NORMAL && cmodel_components(input->get_color_model()) == 3)))
512 // Prepare destination for operation
514 // No translation and no blending.  The blending operation is built into the
515 // translation unit but not the scaling unit.
516 // input -> output
517                 if(NO_TRANSLATION1 &&
518                         NO_BLEND)
519                 {
520 // printf("OverlayFrame::overlay input -> output\n");
522                         scale_output = output;
523                         translation_input = 0;
524                 }
525                 else
526 // If translation or blending
527 // input -> nearest integer boundary temp
528                 {
529                         if(temp_frame && 
530                                 (temp_frame->get_w() != temp_w ||
531                                         temp_frame->get_h() != temp_h))
532                         {
533                                 delete temp_frame;
534                                 temp_frame = 0;
535                         }
537                         if(!temp_frame)
538                         {
539                                 temp_frame = new VFrame(0,
540                                         temp_w,
541                                         temp_h,
542                                         input->get_color_model(),
543                                         -1);
544                         }
545 //printf("OverlayFrame::overlay input -> temp\n");
548                         temp_frame->clear_frame();
550 // printf("OverlayFrame::overlay 4 temp_w=%d temp_h=%d\n",
551 //      temp_w, temp_h);
552                         scale_output = temp_frame;
553                         translation_input = scale_output;
555 // Adjust input coordinates to reflect new scaled coordinates.
556                         in_x1 = 0;
557                         in_y1 = 0;
558                         in_x2 = temp_w;
559                         in_y2 = temp_h;
560                 }
564 //printf("Overlay 1\n");
566 // Scale input -> scale_output
567                 if(!scale_engine) scale_engine = new ScaleEngine(this, cpus);
568                 scale_engine->scale_output = scale_output;
569                 scale_engine->scale_input = input;
570                 scale_engine->w_scale = w_scale;
571                 scale_engine->h_scale = h_scale;
572                 scale_engine->in_x1_int = in_x1_int;
573                 scale_engine->in_y1_int = in_y1_int;
574                 scale_engine->out_w_int = temp_w;
575                 scale_engine->out_h_int = temp_h;
576                 scale_engine->interpolation_type = interpolation_type;
577 //printf("Overlay 2\n");
579 //printf("OverlayFrame::overlay ScaleEngine 1 %d\n", out_h_int);
580                 scale_engine->process_packages();
581 //printf("OverlayFrame::overlay ScaleEngine 2\n");
585         }
587 // printf("OverlayFrame::overlay 1  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n", 
588 //      in_x1, 
589 //      in_y1, 
590 //      in_x2, 
591 //      in_y2, 
592 //      out_x1, 
593 //      out_y1, 
594 //      out_x2, 
595 //      out_y2);
601 #define NO_TRANSLATION2 \
602         (EQUIV(in_x1, 0) && \
603         EQUIV(in_y1, 0) && \
604         EQUIV(in_x2, translation_input->get_w()) && \
605         EQUIV(in_y2, translation_input->get_h()) && \
606         EQUIV(out_x1, 0) && \
607         EQUIV(out_y1, 0) && \
608         EQUIV(out_x2, output->get_w()) && \
609         EQUIV(out_y2, output->get_h())) \
611 #define NO_SCALE \
612         (EQUIV(out_x2 - out_x1, in_x2 - in_x1) && \
613         EQUIV(out_y2 - out_y1, in_y2 - in_y1))
615         
618 //printf("OverlayFrame::overlay 4 %d\n", mode);
623         if(translation_input)
624         {
625 // Direct copy
626                 if( NO_TRANSLATION2 &&
627                         NO_SCALE &&
628                         NO_BLEND)
629                 {
630 //printf("OverlayFrame::overlay direct copy\n");
631                         output->copy_from(translation_input);
632                 }
633                 else
634 // Blend only
635                 if( NO_TRANSLATION2 &&
636                         NO_SCALE)
637                 {
638                         if(!blend_engine) blend_engine = new BlendEngine(this, cpus);
641                         blend_engine->output = output;
642                         blend_engine->input = translation_input;
643                         blend_engine->alpha = alpha;
644                         blend_engine->mode = mode;
646                         blend_engine->process_packages();
647                 }
648                 else
649 // Scale and translate using nearest neighbor
650 // Translation is exactly on integer boundaries
651                 if(interpolation_type == NEAREST_NEIGHBOR ||
652                         EQUIV(in_x1, (int)in_x1) &&
653                         EQUIV(in_y1, (int)in_y1) &&
654                         EQUIV(in_x2, (int)in_x2) &&
655                         EQUIV(in_y2, (int)in_y2) &&
657                         EQUIV(out_x1, (int)out_x1) &&
658                         EQUIV(out_y1, (int)out_y1) &&
659                         EQUIV(out_x2, (int)out_x2) &&
660                         EQUIV(out_y2, (int)out_y2))
661                 {
662 //printf("OverlayFrame::overlay NEAREST_NEIGHBOR 1\n");
663                         if(!scaletranslate_engine) scaletranslate_engine = 
664                                 new ScaleTranslateEngine(this, cpus);
667                         scaletranslate_engine->output = output;
668                         scaletranslate_engine->input = translation_input;
669                         scaletranslate_engine->in_x1 = (int)in_x1;
670                         scaletranslate_engine->in_y1 = (int)in_y1;
671 // we need to do this mumbo-jumbo in order to get numerical stability
672 // other option would be to round all the coordinates
673                         scaletranslate_engine->in_x2 = (int)in_x1 + (int)(in_x2 - in_x1);
674                         scaletranslate_engine->in_y2 = (int)in_y1 + (int)(in_y2 - in_y1);
675                         scaletranslate_engine->out_x1 = (int)out_x1;
676                         scaletranslate_engine->out_y1 = (int)out_y1;
677                         scaletranslate_engine->out_x2 = (int)out_x1 + (int)(out_x2 - out_x1);
678                         scaletranslate_engine->out_y2 = (int)out_y1 + (int)(out_y2 - out_y1);
679                         scaletranslate_engine->alpha = alpha;
680                         scaletranslate_engine->mode = mode;
682                         scaletranslate_engine->process_packages();
683                 }
684                 else
685 // Fractional translation
686                 {
687 // Use fractional translation
688 // printf("OverlayFrame::overlay temp -> output  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n", 
689 //      in_x1, 
690 //      in_y1, 
691 //      in_x2, 
692 //      in_y2, 
693 //      out_x1, 
694 //      out_y1, 
695 //      out_x2, 
696 //      out_y2);
698 //printf("Overlay 3\n");
699                         if(!translate_engine) translate_engine = new TranslateEngine(this, cpus);
700                         translate_engine->translate_output = output;
701                         translate_engine->translate_input = translation_input;
702                         translate_engine->translate_in_x1 = in_x1;
703                         translate_engine->translate_in_y1 = in_y1;
704                         translate_engine->translate_in_x2 = in_x2;
705                         translate_engine->translate_in_y2 = in_y2;
706                         translate_engine->translate_out_x1 = out_x1;
707                         translate_engine->translate_out_y1 = out_y1;
708                         translate_engine->translate_out_x2 = out_x2;
709                         translate_engine->translate_out_y2 = out_y2;
710                         translate_engine->translate_alpha = alpha;
711                         translate_engine->translate_mode = mode;
712 //printf("Overlay 4\n");
714 //printf("OverlayFrame::overlay 5 %d\n", mode);
715                         translate_engine->process_packages();
717                 }
718         }
719 //printf("OverlayFrame::overlay 2\n");
721         return 0;
730 ScalePackage::ScalePackage()
737 ScaleUnit::ScaleUnit(ScaleEngine *server, OverlayFrame *overlay)
738  : LoadClient(server)
740         this->overlay = overlay;
741         this->engine = server;
744 ScaleUnit::~ScaleUnit()
750 void ScaleUnit::tabulate_reduction(bilinear_table_t* &table,
751         float scale,
752         int in_pixel1, 
753         int out_total,
754         int in_total)
756         table = new bilinear_table_t[out_total];
757         bzero(table, sizeof(bilinear_table_t) * out_total);
758 //printf("ScaleUnit::tabulate_reduction 1 %f %d %d %d\n", scale, in_pixel1, out_total, in_total);
759         for(int i = 0; i < out_total; i++)
760         {
761                 float out_start = i;
762                 float in_start = out_start * scale;
763                 float out_end = i + 1;
764                 float in_end = out_end * scale;
765                 bilinear_table_t *entry = table + i;
766 //printf("ScaleUnit::tabulate_reduction 1 %f %f %f %f\n", out_start, out_end, in_start, in_end);
768 // Store input fraction.  Using scale to normalize these didn't work.
769                 entry->input_fraction1 = (floor(in_start + 1) - in_start) /* / scale */;
770                 entry->input_fraction2 = 1.0 /* / scale */;
771                 entry->input_fraction3 = (in_end - floor(in_end)) /* / scale */;
773                 if(in_end >= in_total - in_pixel1)
774                 {
775                         in_end = in_total - in_pixel1 - 1;
776                         
777                         int difference = (int)in_end - (int)in_start - 1;
778                         if(difference < 0) difference = 0;
779                         entry->input_fraction3 = 1.0 - 
780                                 entry->input_fraction1 - 
781                                 entry->input_fraction2 * difference;
782                 }
784 // Store input pixels
785                 entry->input_pixel1 = (int)in_start;
786                 entry->input_pixel2 = (int)in_end;
788 // Normalize for middle pixels
789                 if(entry->input_pixel2 > entry->input_pixel1 + 1)
790                 {
791                         float total = entry->input_fraction1 + 
792                                 entry->input_fraction2 * 
793                                 (entry->input_pixel2 - entry->input_pixel1 - 1) + 
794                                 entry->input_fraction3;
795                         entry->input_fraction1 /= total;
796                         entry->input_fraction2 /= total;
797                         entry->input_fraction3 /= total;
798                 }
799                 else
800                 {
801                         float total = entry->input_fraction1 +
802                                 entry->input_fraction3;
803                         entry->input_fraction1 /= total;
804                         entry->input_fraction3 /= total;
805                 }
807 // printf("ScaleUnit::tabulate_reduction 1 %d %d %d %f %f %f %f\n", 
808 // i,
809 // entry->input_pixel1, 
810 // entry->input_pixel2,
811 // entry->input_fraction1,
812 // entry->input_fraction2,
813 // entry->input_fraction3,
814 // entry->input_fraction1 + 
815 //      entry->input_fraction2 * 
816 //      (entry->input_pixel2 - entry->input_pixel1 - 1) + 
817 //      entry->input_fraction3);
820 // Sanity check
821                 if(entry->input_pixel1 > entry->input_pixel2)
822                 {
823                         entry->input_pixel1 = entry->input_pixel2;
824                         entry->input_fraction1 = 0;
825                 }
827 // Get total fraction of output pixel used
828 //              if(entry->input_pixel2 > entry->input_pixel1)
829                 entry->total_fraction = 
830                         entry->input_fraction1 +
831                         entry->input_fraction2 * (entry->input_pixel2 - entry->input_pixel1 - 1) +
832                         entry->input_fraction3;
833                 entry->input_pixel1 += in_pixel1;
834                 entry->input_pixel2 += in_pixel1;
835         }
838 void ScaleUnit::tabulate_enlarge(bilinear_table_t* &table,
839         float scale,
840         int in_pixel1, 
841         int out_total,
842         int in_total)
844         table = new bilinear_table_t[out_total];
845         bzero(table, sizeof(bilinear_table_t) * out_total);
847         for(int i = 0; i < out_total; i++)
848         {
849                 bilinear_table_t *entry = table + i;
850                 float in_pixel = i * scale;
851                 entry->input_pixel1 = (int)floor(in_pixel);
852                 entry->input_pixel2 = entry->input_pixel1 + 1;
854                 if(in_pixel <= in_total)
855                 {
856                         entry->input_fraction3 = in_pixel - entry->input_pixel1;
857                 }
858                 else
859                 {
860                         entry->input_fraction3 = 0;
861                         entry->input_pixel2 = 0;
862                 }
864                 if(in_pixel >= 0)
865                 {
866                         entry->input_fraction1 = entry->input_pixel2 - in_pixel;
867                 }
868                 else
869                 {
870                         entry->input_fraction1 = 0;
871                         entry->input_pixel1 = 0;
872                 }
874                 if(entry->input_pixel2 >= in_total - in_pixel1)
875                 {
876                         entry->input_pixel2 = entry->input_pixel1;
877                         entry->input_fraction3 = 1.0 - entry->input_fraction1;
878                 }
880                 entry->total_fraction = 
881                         entry->input_fraction1 + 
882                         entry->input_fraction3;
883                 entry->input_pixel1 += in_pixel1;
884                 entry->input_pixel2 += in_pixel1;
885 // 
886 // printf("ScaleUnit::tabulate_enlarge %d %d %f %f %f\n",
887 // entry->input_pixel1,
888 // entry->input_pixel2,
889 // entry->input_fraction1,
890 // entry->input_fraction2,
891 // entry->input_fraction3);
892         }
895 void ScaleUnit::dump_bilinear(bilinear_table_t *table, int total)
897         printf("ScaleUnit::dump_bilinear\n");
898         for(int i = 0; i < total; i++)
899         {
900                 printf("out=%d inpixel1=%d inpixel2=%d infrac1=%f infrac2=%f infrac3=%f total=%f\n", 
901                         i,
902                         table[i].input_pixel1,
903                         table[i].input_pixel2,
904                         table[i].input_fraction1,
905                         table[i].input_fraction2,
906                         table[i].input_fraction3,
907                         table[i].total_fraction);
908         }
911 #define PIXEL_REDUCE_MACRO(type, components, row) \
912 { \
913         type *input_row = &in_rows[row][x_entry->input_pixel1 * components]; \
914         type *input_end = &in_rows[row][x_entry->input_pixel2 * components]; \
916 /* Do first pixel */ \
917         temp_f1 += input_scale1 * input_row[0]; \
918         temp_f2 += input_scale1 * input_row[1]; \
919         temp_f3 += input_scale1 * input_row[2]; \
920         if(components == 4) temp_f4 += input_scale1 * input_row[3]; \
922 /* Do last pixel */ \
923 /*      if(input_row < input_end) */\
924         { \
925                 temp_f1 += input_scale3 * input_end[0]; \
926                 temp_f2 += input_scale3 * input_end[1]; \
927                 temp_f3 += input_scale3 * input_end[2]; \
928                 if(components == 4) temp_f4 += input_scale3 * input_end[3]; \
929         } \
931 /* Do middle pixels */ \
932         for(input_row += components; input_row < input_end; input_row += components) \
933         { \
934                 temp_f1 += input_scale2 * input_row[0]; \
935                 temp_f2 += input_scale2 * input_row[1]; \
936                 temp_f3 += input_scale2 * input_row[2]; \
937                 if(components == 4) temp_f4 += input_scale2 * input_row[3]; \
938         } \
941 // Bilinear reduction and suboptimal enlargement.
942 // Very high quality.
943 #define BILINEAR_REDUCE(max, type, components) \
944 { \
945         bilinear_table_t *x_table, *y_table; \
946         int out_h = pkg->out_row2 - pkg->out_row1; \
947         type **in_rows = (type**)input->get_rows(); \
948         type **out_rows = (type**)output->get_rows(); \
950         if(scale_w < 1) \
951                 tabulate_reduction(x_table, \
952                         1.0 / scale_w, \
953                         in_x1_int, \
954                         out_w_int, \
955                         input->get_w()); \
956         else \
957                 tabulate_enlarge(x_table, \
958                         1.0 / scale_w, \
959                         in_x1_int, \
960                         out_w_int, \
961                         input->get_w()); \
963         if(scale_h < 1) \
964                 tabulate_reduction(y_table, \
965                         1.0 / scale_h, \
966                         in_y1_int, \
967                         out_h_int, \
968                         input->get_h()); \
969         else \
970                 tabulate_enlarge(y_table, \
971                         1.0 / scale_h, \
972                         in_y1_int, \
973                         out_h_int, \
974                         input->get_h()); \
975 /* dump_bilinear(y_table, out_h_int); */\
977         for(int i = 0; i < out_h; i++) \
978         { \
979                 type *out_row = out_rows[i + pkg->out_row1]; \
980                 bilinear_table_t *y_entry = &y_table[i + pkg->out_row1]; \
981 /* printf("BILINEAR_REDUCE 2 %d %d %d %f %f %f\n", */ \
982 /* i, */ \
983 /* y_entry->input_pixel1, */ \
984 /* y_entry->input_pixel2, */ \
985 /* y_entry->input_fraction1, */ \
986 /* y_entry->input_fraction2, */ \
987 /* y_entry->input_fraction3); */ \
989                 for(int j = 0; j < out_w_int; j++) \
990                 { \
991                         bilinear_table_t *x_entry = &x_table[j]; \
992 /* Load rounding factors */ \
993                         float temp_f1; \
994                         float temp_f2; \
995                         float temp_f3; \
996                         float temp_f4; \
997                         if(sizeof(type) != 4) \
998                                 temp_f1 = temp_f2 = temp_f3 = temp_f4 = .5; \
999                         else \
1000                                 temp_f1 = temp_f2 = temp_f3 = temp_f4 = 0; \
1002 /* First row */ \
1003                         float input_scale1 = y_entry->input_fraction1 * x_entry->input_fraction1; \
1004                         float input_scale2 = y_entry->input_fraction1 * x_entry->input_fraction2; \
1005                         float input_scale3 = y_entry->input_fraction1 * x_entry->input_fraction3; \
1006                         PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel1) \
1008 /* Last row */ \
1009                         if(out_h) \
1010                         { \
1011                                 input_scale1 = y_entry->input_fraction3 * x_entry->input_fraction1; \
1012                                 input_scale2 = y_entry->input_fraction3 * x_entry->input_fraction2; \
1013                                 input_scale3 = y_entry->input_fraction3 * x_entry->input_fraction3; \
1014                                 PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel2) \
1016 /* Middle rows */ \
1017                                 if(out_h > 1) \
1018                                 { \
1019                                         input_scale1 = y_entry->input_fraction2 * x_entry->input_fraction1; \
1020                                         input_scale2 = y_entry->input_fraction2 * x_entry->input_fraction2; \
1021                                         input_scale3 = y_entry->input_fraction2 * x_entry->input_fraction3; \
1022                                         for(int k = y_entry->input_pixel1 + 1; \
1023                                                 k < y_entry->input_pixel2; \
1024                                                 k++) \
1025                                         { \
1026                                                 PIXEL_REDUCE_MACRO(type, components, k) \
1027                                         } \
1028                                 } \
1029                         } \
1032                         if(max != 1.0) \
1033                         { \
1034                                 if(temp_f1 > max) temp_f1 = max; \
1035                                 if(temp_f2 > max) temp_f2 = max; \
1036                                 if(temp_f3 > max) temp_f3 = max; \
1037                                 if(components == 4) if(temp_f4 > max) temp_f4 = max; \
1038                         } \
1040                         out_row[j * components    ] = (type)temp_f1; \
1041                         out_row[j * components + 1] = (type)temp_f2; \
1042                         out_row[j * components + 2] = (type)temp_f3; \
1043                         if(components == 4) out_row[j * components + 3] = (type)temp_f4; \
1044                 } \
1045 /*printf("BILINEAR_REDUCE 3 %d\n", i);*/ \
1046         } \
1048         delete [] x_table; \
1049         delete [] y_table; \
1054 // Only 2 input pixels
1055 #define BILINEAR_ENLARGE(max, type, components) \
1056 { \
1057 /*printf("BILINEAR_ENLARGE 1\n");*/ \
1058         float k_y = 1.0 / scale_h; \
1059         float k_x = 1.0 / scale_w; \
1060         type **in_rows = (type**)input->get_rows(); \
1061         type **out_rows = (type**)output->get_rows(); \
1062         int out_h = pkg->out_row2 - pkg->out_row1; \
1063         int in_h_int = input->get_h(); \
1064         int in_w_int = input->get_w(); \
1065         int *table_int_x1, *table_int_y1; \
1066         int *table_int_x2, *table_int_y2; \
1067         float *table_frac_x_f, *table_antifrac_x_f, *table_frac_y_f, *table_antifrac_y_f; \
1068         int *table_frac_x_i, *table_antifrac_x_i, *table_frac_y_i, *table_antifrac_y_i; \
1070         tabulate_blinear_f(table_int_x1,  \
1071                 table_int_x2,  \
1072                 table_frac_x_f,  \
1073                 table_antifrac_x_f,  \
1074                 k_x,  \
1075                 0,  \
1076                 out_w_int, \
1077                 in_x1_int,  \
1078                 in_w_int); \
1079         tabulate_blinear_f(table_int_y1,  \
1080                 table_int_y2,  \
1081                 table_frac_y_f,  \
1082                 table_antifrac_y_f,  \
1083                 k_y,  \
1084                 pkg->out_row1,  \
1085                 pkg->out_row2,  \
1086                 in_y1_int, \
1087                 in_h_int); \
1089         for(int i = 0; i < out_h; i++) \
1090         { \
1091                 int i_y1 = table_int_y1[i]; \
1092                 int i_y2 = table_int_y2[i]; \
1093                 float a_f; \
1094         float anti_a_f; \
1095                 uint64_t a_i; \
1096         uint64_t anti_a_i; \
1097                 a_f = table_frac_y_f[i]; \
1098         anti_a_f = table_antifrac_y_f[i]; \
1099                 type *in_row1 = in_rows[i_y1]; \
1100                 type *in_row2 = in_rows[i_y2]; \
1101                 type *out_row = out_rows[i + pkg->out_row1]; \
1103                 for(int j = 0; j < out_w_int; j++) \
1104                 { \
1105                         int i_x1 = table_int_x1[j]; \
1106                         int i_x2 = table_int_x2[j]; \
1107                         float output1r, output1g, output1b, output1a; \
1108                         float output2r, output2g, output2b, output2a; \
1109                         float output3r, output3g, output3b, output3a; \
1110                         float output4r, output4g, output4b, output4a; \
1111                         float b_f; \
1112                         float anti_b_f; \
1113                         b_f = table_frac_x_f[j]; \
1114                         anti_b_f = table_antifrac_x_f[j]; \
1116                 output1r = in_row1[i_x1 * components]; \
1117                 output1g = in_row1[i_x1 * components + 1]; \
1118                 output1b = in_row1[i_x1 * components + 2]; \
1119                 if(components == 4) output1a = in_row1[i_x1 * components + 3]; \
1121                 output2r = in_row1[i_x2 * components]; \
1122                 output2g = in_row1[i_x2 * components + 1]; \
1123                 output2b = in_row1[i_x2 * components + 2]; \
1124                 if(components == 4) output2a = in_row1[i_x2 * components + 3]; \
1126                 output3r = in_row2[i_x1 * components]; \
1127                 output3g = in_row2[i_x1 * components + 1]; \
1128                 output3b = in_row2[i_x1 * components + 2]; \
1129                 if(components == 4) output3a = in_row2[i_x1 * components + 3]; \
1131                 output4r = in_row2[i_x2 * components]; \
1132                 output4g = in_row2[i_x2 * components + 1]; \
1133                 output4b = in_row2[i_x2 * components + 2]; \
1134                 if(components == 4) output4a = in_row2[i_x2 * components + 3]; \
1136                         out_row[j * components] =  \
1137                                 (type)(anti_a_f * (anti_b_f * output1r +  \
1138                                 b_f * output2r) +  \
1139                 a_f * (anti_b_f * output3r +  \
1140                                 b_f * output4r)); \
1141                         out_row[j * components + 1] =   \
1142                                 (type)(anti_a_f * (anti_b_f * output1g +  \
1143                                 b_f * output2g) +  \
1144                 a_f * ((anti_b_f * output3g) +  \
1145                                 b_f * output4g)); \
1146                         out_row[j * components + 2] =   \
1147                                 (type)(anti_a_f * ((anti_b_f * output1b) +  \
1148                                 (b_f * output2b)) +  \
1149                 a_f * ((anti_b_f * output3b) +  \
1150                                 b_f * output4b)); \
1151                         if(components == 4) \
1152                                 out_row[j * components + 3] =   \
1153                                         (type)(anti_a_f * ((anti_b_f * output1a) +  \
1154                                         (b_f * output2a)) +  \
1155                         a_f * ((anti_b_f * output3a) +  \
1156                                         b_f * output4a)); \
1157                 } \
1158         } \
1161         delete [] table_int_x1; \
1162         delete [] table_int_x2; \
1163         delete [] table_int_y1; \
1164         delete [] table_int_y2; \
1165         delete [] table_frac_x_f; \
1166         delete [] table_antifrac_x_f; \
1167         delete [] table_frac_y_f; \
1168         delete [] table_antifrac_y_f; \
1170 /*printf("BILINEAR_ENLARGE 2\n");*/ \
1174 #define BICUBIC(max, type, components) \
1175 { \
1176         float k_y = 1.0 / scale_h; \
1177         float k_x = 1.0 / scale_w; \
1178         type **in_rows = (type**)input->get_rows(); \
1179         type **out_rows = (type**)output->get_rows(); \
1180         float *bspline_x_f, *bspline_y_f; \
1181         int *bspline_x_i, *bspline_y_i; \
1182         int *in_x_table, *in_y_table; \
1183         int in_h_int = input->get_h(); \
1184         int in_w_int = input->get_w(); \
1186         tabulate_bcubic_f(bspline_x_f,  \
1187                 in_x_table, \
1188                 k_x, \
1189                 in_x1_int, \
1190                 out_w_int, \
1191                 in_w_int, \
1192                 -1); \
1194         tabulate_bcubic_f(bspline_y_f,  \
1195                 in_y_table, \
1196                 k_y, \
1197                 in_y1_int, \
1198                 out_h_int, \
1199                 in_h_int, \
1200                 1); \
1202         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1203         { \
1204                 for(int j = 0; j < out_w_int; j++) \
1205                 { \
1206                         int i_x = (int)(k_x * j); \
1207                         float output1_f, output2_f, output3_f, output4_f; \
1208                         uint64_t output1_i, output2_i, output3_i, output4_i; \
1209                         output1_f = 0; \
1210                         output2_f = 0; \
1211                         output3_f = 0; \
1212                         if(components == 4) \
1213                                 output4_f = 0; \
1214                         int table_y = i * 4; \
1216 /* Kernel */ \
1217                         for(int m = -1; m < 3; m++) \
1218                         { \
1219                                 float r1_f; \
1220                                 uint64_t r1_i; \
1221                                 r1_f = bspline_y_f[table_y]; \
1222                                 int y = in_y_table[table_y]; \
1223                                 int table_x = j * 4; \
1225                                 for(int n = -1; n < 3; n++) \
1226                                 { \
1227                                         float r2_f; \
1228                                         uint64_t r2_i; \
1229                                         r2_f = bspline_x_f[table_x]; \
1230                                         int x = in_x_table[table_x]; \
1231                                         float r_square_f; \
1232                                         uint64_t r_square_i; \
1233                                         r_square_f = r1_f * r2_f; \
1234                                         output1_f += r_square_f * in_rows[y][x * components]; \
1235                                         output2_f += r_square_f * in_rows[y][x * components + 1]; \
1236                                         output3_f += r_square_f * in_rows[y][x * components + 2]; \
1237                                         if(components == 4) \
1238                                                 output4_f += r_square_f * in_rows[y][x * components + 3]; \
1240                                         table_x++; \
1241                                 } \
1242                                 table_y++; \
1243                         } \
1246                         out_rows[i][j * components] = (type)output1_f; \
1247                         out_rows[i][j * components + 1] = (type)output2_f; \
1248                         out_rows[i][j * components + 2] = (type)output3_f; \
1249                         if(components == 4) \
1250                                 out_rows[i][j * components + 3] = (type)output4_f; \
1252                 } \
1253         } \
1255         delete [] bspline_x_f; \
1256         delete [] bspline_y_f; \
1257         delete [] in_x_table; \
1258         delete [] in_y_table; \
1264 // Pow function is not thread safe in Compaqt C
1265 #define CUBE(x) ((x) * (x) * (x))
1267 float ScaleUnit::cubic_bspline(float x)
1269         float a, b, c, d;
1271         if((x + 2.0F) <= 0.0F) 
1272         {
1273         a = 0.0F;
1274         }
1275         else 
1276         {
1277         a = CUBE(x + 2.0F);
1278         }
1281         if((x + 1.0F) <= 0.0F) 
1282         {
1283         b = 0.0F;
1284         }
1285         else 
1286         {
1287         b = CUBE(x + 1.0F);
1288         }    
1290         if(x <= 0) 
1291         {
1292         c = 0.0F;
1293         }
1294         else 
1295         {
1296         c = CUBE(x);
1297         }  
1299         if((x - 1.0F) <= 0.0F) 
1300         {
1301         d = 0.0F;
1302         }
1303         else 
1304         {
1305         d = CUBE(x - 1.0F);
1306         }
1309         return (a - (4.0F * b) + (6.0F * c) - (4.0F * d)) / 6.0;
1313 void ScaleUnit::tabulate_bcubic_f(float* &coef_table, 
1314         int* &coord_table,
1315         float scale,
1316         int start, 
1317         int pixels,
1318         int total_pixels,
1319         float coefficient)
1321         coef_table = new float[pixels * 4];
1322         coord_table = new int[pixels * 4];
1323         for(int i = 0, j = 0; i < pixels; i++)
1324         {
1325                 float f_x = (float)i * scale;
1326                 float a = f_x - floor(f_x);
1327                 
1328                 for(float m = -1; m < 3; m++)
1329                 {
1330                         coef_table[j] = cubic_bspline(coefficient * (m - a));
1331                         coord_table[j] = (int)(start + (int)f_x + m);
1332                         CLAMP(coord_table[j], 0, total_pixels - 1);
1333                         j++;
1334                 }
1335                 
1336         }
1339 void ScaleUnit::tabulate_bcubic_i(int* &coef_table, 
1340         int* &coord_table,
1341         float scale,
1342         int start, 
1343         int pixels,
1344         int total_pixels,
1345         float coefficient)
1347         coef_table = new int[pixels * 4];
1348         coord_table = new int[pixels * 4];
1349         for(int i = 0, j = 0; i < pixels; i++)
1350         {
1351                 float f_x = (float)i * scale;
1352                 float a = f_x - floor(f_x);
1353                 
1354                 for(float m = -1; m < 3; m++)
1355                 {
1356                         coef_table[j] = (int)(cubic_bspline(coefficient * (m - a)) * 0x10000);
1357                         coord_table[j] = (int)(start + (int)f_x + m);
1358                         CLAMP(coord_table[j], 0, total_pixels - 1);
1359                         j++;
1360                 }
1361                 
1362         }
1365 void ScaleUnit::tabulate_blinear_f(int* &table_int1,
1366                 int* &table_int2,
1367                 float* &table_frac,
1368                 float* &table_antifrac,
1369                 float scale,
1370                 int pixel1,
1371                 int pixel2,
1372                 int start,
1373                 int total_pixels)
1375         table_int1 = new int[pixel2 - pixel1];
1376         table_int2 = new int[pixel2 - pixel1];
1377         table_frac = new float[pixel2 - pixel1];
1378         table_antifrac = new float[pixel2 - pixel1];
1380         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1381         {
1382                 float f_x = (float)i * scale;
1383                 int i_x = (int)floor(f_x);
1384                 float a = (f_x - floor(f_x));
1386                 table_int1[j] = i_x + start;
1387                 table_int2[j] = i_x + start + 1;
1388                 CLAMP(table_int1[j], 0, total_pixels - 1);
1389                 CLAMP(table_int2[j], 0, total_pixels - 1);
1390                 table_frac[j] = a;
1391                 table_antifrac[j] = 1.0F - a;
1392 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1393         }
1396 void ScaleUnit::tabulate_blinear_i(int* &table_int1,
1397                 int* &table_int2,
1398                 int* &table_frac,
1399                 int* &table_antifrac,
1400                 float scale,
1401                 int pixel1,
1402                 int pixel2,
1403                 int start,
1404                 int total_pixels)
1406         table_int1 = new int[pixel2 - pixel1];
1407         table_int2 = new int[pixel2 - pixel1];
1408         table_frac = new int[pixel2 - pixel1];
1409         table_antifrac = new int[pixel2 - pixel1];
1411         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1412         {
1413                 double f_x = (float)i * scale;
1414                 int i_x = (int)floor(f_x);
1415                 float a = (f_x - floor(f_x));
1417                 table_int1[j] = i_x + start;
1418                 table_int2[j] = i_x + start + 1;
1419                 CLAMP(table_int1[j], 0, total_pixels - 1);
1420                 CLAMP(table_int2[j], 0, total_pixels - 1);
1421                 table_frac[j] = (int)(a * 0xffff);
1422                 table_antifrac[j] = (int)((1.0F - a) * 0x10000);
1423 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1424         }
1427 void ScaleUnit::process_package(LoadPackage *package)
1429         ScalePackage *pkg = (ScalePackage*)package;
1431 //printf("ScaleUnit::process_package 1\n");
1432 // Arguments for macros
1433         VFrame *output = engine->scale_output;
1434         VFrame *input = engine->scale_input;
1435         float scale_w = engine->w_scale;
1436         float scale_h = engine->h_scale;
1437         int in_x1_int = engine->in_x1_int;
1438         int in_y1_int = engine->in_y1_int;
1439         int out_h_int = engine->out_h_int;
1440         int out_w_int = engine->out_w_int;
1441         int do_yuv = 
1442                 (input->get_color_model() == BC_YUV888 ||
1443                 input->get_color_model() == BC_YUVA8888 ||
1444                 input->get_color_model() == BC_YUV161616 ||
1445                 input->get_color_model() == BC_YUVA16161616);
1447 //printf("ScaleUnit::process_package 2 %f %f\n", engine->w_scale, engine->h_scale);
1448         if(engine->interpolation_type == CUBIC_CUBIC || 
1449                 (engine->interpolation_type == CUBIC_LINEAR 
1450                         && engine->w_scale > 1 && 
1451                         engine->h_scale > 1))
1452         {
1453                 switch(engine->scale_input->get_color_model())
1454                 {
1455                         case BC_RGB_FLOAT:
1456                                 BICUBIC(1.0, float, 3);
1457                                 break;
1459                         case BC_RGBA_FLOAT:
1460                                 BICUBIC(1.0, float, 4);
1461                                 break;
1463                         case BC_RGB888:
1464                         case BC_YUV888:
1465                                 BICUBIC(0xff, unsigned char, 3);
1466                                 break;
1468                         case BC_RGBA8888:
1469                         case BC_YUVA8888:
1470                                 BICUBIC(0xff, unsigned char, 4);
1471                                 break;
1473                         case BC_RGB161616:
1474                         case BC_YUV161616:
1475                                 BICUBIC(0xffff, uint16_t, 3);
1476                                 break;
1478                         case BC_RGBA16161616:
1479                         case BC_YUVA16161616:
1480                                 BICUBIC(0xffff, uint16_t, 4);
1481                                 break;
1482                 }
1483         }
1484         else
1485 // Perform bilinear scaling input -> scale_output
1486         if(engine->w_scale > 1 && 
1487                 engine->h_scale > 1)
1488         {
1489                 switch(engine->scale_input->get_color_model())
1490                 {
1491                         case BC_RGB_FLOAT:
1492                                 BILINEAR_ENLARGE(1.0, float, 3);
1493                                 break;
1495                         case BC_RGBA_FLOAT:
1496                                 BILINEAR_ENLARGE(1.0, float, 4);
1497                                 break;
1499                         case BC_RGB888:
1500                         case BC_YUV888:
1501                                 BILINEAR_ENLARGE(0xff, unsigned char, 3);
1502                                 break;
1504                         case BC_RGBA8888:
1505                         case BC_YUVA8888:
1506                                 BILINEAR_ENLARGE(0xff, unsigned char, 4);
1507                                 break;
1509                         case BC_RGB161616:
1510                         case BC_YUV161616:
1511                                 BILINEAR_ENLARGE(0xffff, uint16_t, 3);
1512                                 break;
1514                         case BC_RGBA16161616:
1515                         case BC_YUVA16161616:
1516                                 BILINEAR_ENLARGE(0xffff, uint16_t, 4);
1517                                 break;
1518                 }
1519         }
1520         else
1521 // Bilinear reduction
1522         {
1523                 switch(engine->scale_input->get_color_model())
1524                 {
1525                         case BC_RGB_FLOAT:
1526                                 BILINEAR_REDUCE(1.0, float, 3);
1527                                 break;
1528                         case BC_RGBA_FLOAT:
1529                                 BILINEAR_REDUCE(1.0, float, 4);
1530                                 break;
1531                         case BC_RGB888:
1532                         case BC_YUV888:
1533                                 BILINEAR_REDUCE(0xff, unsigned char, 3);
1534                                 break;
1536                         case BC_RGBA8888:
1537                         case BC_YUVA8888:
1538                                 BILINEAR_REDUCE(0xff, unsigned char, 4);
1539                                 break;
1541                         case BC_RGB161616:
1542                         case BC_YUV161616:
1543                                 BILINEAR_REDUCE(0xffff, uint16_t, 3);
1544                                 break;
1546                         case BC_RGBA16161616:
1547                         case BC_YUVA16161616:
1548                                 BILINEAR_REDUCE(0xffff, uint16_t, 4);
1549                                 break;
1550                 }
1551         }
1552 //printf("ScaleUnit::process_package 3\n");
1568 ScaleEngine::ScaleEngine(OverlayFrame *overlay, int cpus)
1569  : LoadServer(cpus, cpus)
1571         this->overlay = overlay;
1574 ScaleEngine::~ScaleEngine()
1578 void ScaleEngine::init_packages()
1580         for(int i = 0; i < total_packages; i++)
1581         {
1582                 ScalePackage *package = (ScalePackage*)packages[i];
1583                 package->out_row1 = out_h_int / total_packages * i;
1584                 package->out_row2 = package->out_row1 + out_h_int / total_packages;
1586                 if(i >= total_packages - 1)
1587                         package->out_row2 = out_h_int;
1588         }
1591 LoadClient* ScaleEngine::new_client()
1593         return new ScaleUnit(this, overlay);
1596 LoadPackage* ScaleEngine::new_package()
1598         return new ScalePackage;
1613 TranslatePackage::TranslatePackage()
1619 TranslateUnit::TranslateUnit(TranslateEngine *server, OverlayFrame *overlay)
1620  : LoadClient(server)
1622         this->overlay = overlay;
1623         this->engine = server;
1626 TranslateUnit::~TranslateUnit()
1632 void TranslateUnit::translation_array_f(transfer_table_f* &table, 
1633         float out_x1, 
1634         float out_x2,
1635         float in_x1,
1636         float in_x2,
1637         int in_total, 
1638         int out_total, 
1639         int &out_x1_int,
1640         int &out_x2_int)
1642         int out_w_int;
1643         float offset = out_x1 - in_x1;
1644 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1646         out_x1_int = (int)out_x1;
1647         out_x2_int = MIN((int)ceil(out_x2), out_total);
1648         out_w_int = out_x2_int - out_x1_int;
1650         table = new transfer_table_f[out_w_int];
1651         bzero(table, sizeof(transfer_table_f) * out_w_int);
1654 // printf("OverlayFrame::translation_array_f 2 %f %f -> %f %f scale=%f %f\n", 
1655 // in_x1, 
1656 // in_x2, 
1657 // out_x1, 
1658 // out_x2,
1659 // in_x2 - in_x1,
1660 // out_x2 - out_x1);
1661 // 
1663         float in_x = in_x1;
1664         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1665         {
1666                 transfer_table_f *entry = &table[out_x - out_x1_int];
1668                 entry->in_x1 = (int)in_x;
1669                 entry->in_x2 = (int)in_x + 1;
1671 // Get fraction of output pixel to fill
1672                 entry->output_fraction = 1;
1674                 if(out_x1 > out_x)
1675                 {
1676                         entry->output_fraction -= out_x1 - out_x;
1677                 }
1679                 if(out_x2 < out_x + 1)
1680                 {
1681                         entry->output_fraction = (out_x2 - out_x);
1682                 }
1684 // Advance in_x until out_x_fraction is filled
1685                 float out_x_fraction = entry->output_fraction;
1686                 float in_x_fraction = floor(in_x + 1) - in_x;
1688                 if(out_x_fraction <= in_x_fraction)
1689                 {
1690                         entry->in_fraction1 = out_x_fraction;
1691                         entry->in_fraction2 = 0.0;
1692                         in_x += out_x_fraction;
1693                 }
1694                 else
1695                 {
1696                         entry->in_fraction1 = in_x_fraction;
1697                         in_x += out_x_fraction;
1698                         entry->in_fraction2 = in_x - floor(in_x);
1699                 }
1701 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1702                 if(entry->in_x2 >= in_total)
1703                 {
1704                         entry->in_x2 = in_total - 1;
1705                         entry->in_fraction2 = 0.0;
1706                 }
1707                 
1708                 if(entry->in_x1 >= in_total)
1709                 {
1710                         entry->in_x1 = in_total - 1;
1711                         entry->in_fraction1 = 0.0;
1712                 }
1713 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n", 
1714 //      out_x, 
1715 //      entry->in_x1, 
1716 //      entry->in_x2, 
1717 //      entry->in_fraction1, 
1718 //      entry->in_fraction2, 
1719 //      entry->output_fraction);
1720         }
1724 void TranslateUnit::translation_array_i(transfer_table_i* &table, 
1725         float out_x1, 
1726         float out_x2,
1727         float in_x1,
1728         float in_x2,
1729         int in_total, 
1730         int out_total, 
1731         int &out_x1_int,
1732         int &out_x2_int)
1734         int out_w_int;
1735         float offset = out_x1 - in_x1;
1737         out_x1_int = (int)out_x1;
1738         out_x2_int = MIN((int)ceil(out_x2), out_total);
1739         out_w_int = out_x2_int - out_x1_int;
1741         table = new transfer_table_i[out_w_int];
1742         bzero(table, sizeof(transfer_table_i) * out_w_int);
1745 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1747         float in_x = in_x1;
1748         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1749         {
1750                 transfer_table_i *entry = &table[out_x - out_x1_int];
1752                 entry->in_x1 = (int)in_x;
1753                 entry->in_x2 = (int)in_x + 1;
1755 // Get fraction of output pixel to fill
1756                 entry->output_fraction = 0x10000;
1758                 if(out_x1 > out_x)
1759                 {
1760                         entry->output_fraction -= (int)((out_x1 - out_x) * 0x10000);
1761                 }
1763                 if(out_x2 < out_x + 1)
1764                 {
1765                         entry->output_fraction = (int)((out_x2 - out_x) * 0x10000);
1766                 }
1768 // Advance in_x until out_x_fraction is filled
1769                 int out_x_fraction = entry->output_fraction;
1770                 int in_x_fraction = (int)((floor(in_x + 1) - in_x) * 0x10000);
1772                 if(out_x_fraction <= in_x_fraction)
1773                 {
1774                         entry->in_fraction1 = out_x_fraction;
1775                         entry->in_fraction2 = 0;
1776                         in_x += (float)out_x_fraction / 0x10000;
1777                 }
1778                 else
1779                 {
1780                         entry->in_fraction1 = in_x_fraction;
1781                         in_x += (float)out_x_fraction / 0x10000;
1782                         entry->in_fraction2 = (int)((in_x - floor(in_x)) * 0x10000);
1783                 }
1785 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1786                 if(entry->in_x2 >= in_total)
1787                 {
1788                         entry->in_x2 = in_total - 1;
1789                         entry->in_fraction2 = 0;
1790                 }
1792                 if(entry->in_x1 >= in_total)
1793                 {
1794                         entry->in_x1 = in_total - 1;
1795                         entry->in_fraction1 = 0;
1796                 }
1797 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n", 
1798 //      out_x, 
1799 //      entry->in_x1, 
1800 //      entry->in_x2, 
1801 //      entry->in_fraction1, 
1802 //      entry->in_fraction2, 
1803 //      entry->output_fraction);
1804         }
1840 #define TRANSLATE(max, temp_type, type, components, chroma_offset) \
1841 { \
1843         type **in_rows = (type**)input->get_rows(); \
1844         type **out_rows = (type**)output->get_rows(); \
1847         temp_type master_opacity; \
1848         if(sizeof(type) != 4) \
1849                 master_opacity = (temp_type)(alpha * max + 0.5); \
1850         else \
1851                 master_opacity = (temp_type)(alpha * max); \
1852         temp_type master_transparency = max - master_opacity; \
1853         float round = 0.0; \
1854         if(sizeof(type) != 4) \
1855                 round = 0.5; \
1858         for(int i = row1; i < row2; i++) \
1859         { \
1860                 int in_y1; \
1861                 int in_y2; \
1862                 float y_fraction1_f; \
1863                 float y_fraction2_f; \
1864                 float y_output_fraction_f; \
1865                 in_y1 = y_table_f[i - out_y1_int].in_x1; \
1866                 in_y2 = y_table_f[i - out_y1_int].in_x2; \
1867                 y_fraction1_f = y_table_f[i - out_y1_int].in_fraction1; \
1868                 y_fraction2_f = y_table_f[i - out_y1_int].in_fraction2; \
1869                 y_output_fraction_f = y_table_f[i - out_y1_int].output_fraction; \
1870                 type *in_row1 = in_rows[(in_y1)]; \
1871                 type *in_row2 = in_rows[(in_y2)]; \
1872                 type *out_row = out_rows[i]; \
1874                 for(int j = out_x1_int; j < out_x2_int; j++) \
1875                 { \
1876                         int in_x1; \
1877                         int in_x2; \
1878                         float x_fraction1_f; \
1879                         float x_fraction2_f; \
1880                         float x_output_fraction_f; \
1881                         in_x1 = x_table_f[j - out_x1_int].in_x1; \
1882                         in_x2 = x_table_f[j - out_x1_int].in_x2; \
1883                         x_fraction1_f = x_table_f[j - out_x1_int].in_fraction1; \
1884                         x_fraction2_f = x_table_f[j - out_x1_int].in_fraction2; \
1885                         x_output_fraction_f = x_table_f[j - out_x1_int].output_fraction; \
1886                         type *output = &out_row[j * components]; \
1887                         temp_type input1, input2, input3, input4; \
1889                         float fraction1 = x_fraction1_f * y_fraction1_f; \
1890                         float fraction2 = x_fraction2_f * y_fraction1_f; \
1891                         float fraction3 = x_fraction1_f * y_fraction2_f; \
1892                         float fraction4 = x_fraction2_f * y_fraction2_f; \
1894                         input1 = (type)(in_row1[in_x1 * components] * fraction1 +  \
1895                                 in_row1[in_x2 * components] * fraction2 +  \
1896                                 in_row2[in_x1 * components] * fraction3 +  \
1897                                 in_row2[in_x2 * components] * fraction4 + round); \
1899 /* Add chroma to fractional pixels */ \
1900                         if(chroma_offset) \
1901                         { \
1902                                 float extra_chroma = (1.0F - \
1903                                         fraction1 - \
1904                                         fraction2 - \
1905                                         fraction3 - \
1906                                         fraction4) * chroma_offset; \
1907                                 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 +  \
1908                                         in_row1[in_x2 * components + 1] * fraction2 +  \
1909                                         in_row2[in_x1 * components + 1] * fraction3 +  \
1910                                         in_row2[in_x2 * components + 1] * fraction4 + \
1911                                         extra_chroma + round); \
1912                                 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 +  \
1913                                         in_row1[in_x2 * components + 2] * fraction2 +  \
1914                                         in_row2[in_x1 * components + 2] * fraction3 +  \
1915                                         in_row2[in_x2 * components + 2] * fraction4 +  \
1916                                         extra_chroma + round); \
1917                         } \
1918                         else \
1919                         { \
1920                                 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 +  \
1921                                         in_row1[in_x2 * components + 1] * fraction2 +  \
1922                                         in_row2[in_x1 * components + 1] * fraction3 +  \
1923                                         in_row2[in_x2 * components + 1] * fraction4 + round); \
1924                                 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 +  \
1925                                         in_row1[in_x2 * components + 2] * fraction2 +  \
1926                                         in_row2[in_x1 * components + 2] * fraction3 +  \
1927                                         in_row2[in_x2 * components + 2] * fraction4 + round); \
1928                         } \
1930                         if(components == 4) \
1931                                 input4 = (type)(in_row1[in_x1 * components + 3] * fraction1 +  \
1932                                         in_row1[in_x2 * components + 3] * fraction2 +  \
1933                                         in_row2[in_x1 * components + 3] * fraction3 +  \
1934                                         in_row2[in_x2 * components + 3] * fraction4 + round); \
1936                         temp_type opacity; \
1937                         if(sizeof(type) != 4) \
1938                                 opacity = (temp_type)(master_opacity *  \
1939                                         y_output_fraction_f *  \
1940                                         x_output_fraction_f + 0.5); \
1941                         else \
1942                                 opacity = (temp_type)(master_opacity *  \
1943                                         y_output_fraction_f *  \
1944                                         x_output_fraction_f); \
1945                         temp_type transparency = max - opacity; \
1947 /* printf("TRANSLATE 2 %x %d %d\n", opacity, j, i); */ \
1949                         if(components == 3) \
1950                         { \
1951                                 BLEND_3(max, temp_type, type, chroma_offset); \
1952                         } \
1953                         else \
1954                         { \
1955                                 BLEND_4(max, temp_type, type, chroma_offset); \
1956                         } \
1957                 } \
1958         } \
1961 void TranslateUnit::process_package(LoadPackage *package)
1963         TranslatePackage *pkg = (TranslatePackage*)package;
1964         int out_y1_int; 
1965         int out_y2_int; 
1966         int out_x1_int; 
1967         int out_x2_int; 
1970 // Variables for TRANSLATE
1971         VFrame *input = engine->translate_input;
1972         VFrame *output = engine->translate_output;
1973         float in_x1 = engine->translate_in_x1;
1974         float in_y1 = engine->translate_in_y1;
1975         float in_x2 = engine->translate_in_x2;
1976         float in_y2 = engine->translate_in_y2;
1977         float out_x1 = engine->translate_out_x1;
1978         float out_y1 = engine->translate_out_y1;
1979         float out_x2 = engine->translate_out_x2;
1980         float out_y2 = engine->translate_out_y2;
1981         float alpha = engine->translate_alpha;
1982         int row1 = pkg->out_row1;
1983         int row2 = pkg->out_row2;
1984         int mode = engine->translate_mode;
1985         int in_total_x = input->get_w();
1986         int in_total_y = input->get_h();
1987         int do_yuv = 
1988                 (engine->translate_input->get_color_model() == BC_YUV888 ||
1989                 engine->translate_input->get_color_model() == BC_YUVA8888 ||
1990                 engine->translate_input->get_color_model() == BC_YUV161616 ||
1991                 engine->translate_input->get_color_model() == BC_YUVA16161616);
1993         transfer_table_f *x_table_f; 
1994         transfer_table_f *y_table_f; 
1995         transfer_table_i *x_table_i; 
1996         transfer_table_i *y_table_i; 
1998         translation_array_f(x_table_f,  
1999                 out_x1,  
2000                 out_x2, 
2001                 in_x1, 
2002                 in_x2, 
2003                 in_total_x,  
2004                 output->get_w(),  
2005                 out_x1_int, 
2006                 out_x2_int); 
2007         translation_array_f(y_table_f,  
2008                 out_y1,  
2009                 out_y2, 
2010                 in_y1, 
2011                 in_y2, 
2012                 in_total_y,  
2013                 output->get_h(),  
2014                 out_y1_int, 
2015                 out_y2_int); 
2016 //      printf("TranslateUnit::process_package 1 %d\n", mode);
2017 //      Timer a;
2018 //      a.update();
2020         switch(engine->translate_input->get_color_model())
2021         {
2022                 case BC_RGB888:
2023                         TRANSLATE(0xff, uint32_t, unsigned char, 3, 0);
2024                         break;
2026                 case BC_RGBA8888:
2027                         TRANSLATE(0xff, uint32_t, unsigned char, 4, 0);
2028                         break;
2030                 case BC_RGB_FLOAT:
2031                         TRANSLATE(1.0, float, float, 3, 0);
2032                         break;
2034                 case BC_RGBA_FLOAT:
2035                         TRANSLATE(1.0, float, float, 4, 0);
2036                         break;
2038                 case BC_RGB161616:
2039                         TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2040                         break;
2042                 case BC_RGBA16161616:
2043                         TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2044                         break;
2046                 case BC_YUV888:
2047                         TRANSLATE(0xff, int32_t, unsigned char, 3, 0x80);
2048                         break;
2050                 case BC_YUVA8888:
2051                         TRANSLATE(0xff, int32_t, unsigned char, 4, 0x80);
2052                         break;
2054                 case BC_YUV161616:
2055                         TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2056                         break;
2058                 case BC_YUVA16161616:
2059                         TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2060                         break;
2061         }
2062 //      printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2064         delete [] x_table_f; 
2065         delete [] y_table_f; 
2077 TranslateEngine::TranslateEngine(OverlayFrame *overlay, int cpus)
2078  : LoadServer(cpus, cpus)
2080         this->overlay = overlay;
2083 TranslateEngine::~TranslateEngine()
2087 void TranslateEngine::init_packages()
2089         int out_y1_int = (int)translate_out_y1;
2090         int out_y2_int = MIN((int)ceil(translate_out_y2), translate_output->get_h());
2091         int out_h = out_y2_int - out_y1_int;
2093         for(int i = 0; i < total_packages; i++)
2094         {
2095                 TranslatePackage *package = (TranslatePackage*)packages[i];
2096                 package->out_row1 = (int)(out_y1_int + out_h / 
2097                         total_packages * 
2098                         i);
2099                 package->out_row2 = (int)((float)package->out_row1 + 
2100                         out_h / 
2101                         total_packages);
2102                 if(i >= total_packages - 1)
2103                         package->out_row2 = out_y2_int;
2104         }
2107 LoadClient* TranslateEngine::new_client()
2109         return new TranslateUnit(this, overlay);
2112 LoadPackage* TranslateEngine::new_package()
2114         return new TranslatePackage;
2124 #define SCALE_TRANSLATE(max, temp_type, type, components, chroma_offset) \
2125 { \
2126         temp_type opacity; \
2127         if(sizeof(type) != 4) \
2128                 opacity = (temp_type)(alpha * max + 0.5); \
2129         else \
2130                 opacity = (temp_type)(alpha * max); \
2131         temp_type transparency = max - opacity; \
2133         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2134         { \
2135                 int in_y = y_table[i - out_y1]; \
2136                 type *in_row = (type*)in_rows[in_y] + in_x1 * components; \
2137                 type *output = (type*)out_rows[i] + out_x1 * components; \
2139 /* X direction is scaled and requires a table lookup */ \
2140                 if(out_w != in_x2 - in_x1) \
2141                 { \
2142                         for(int j = 0; j < out_w; j++) \
2143                         { \
2144                                 type *in_row_plus_x = in_row + x_table[j] * components; \
2145                                 temp_type input1, input2, input3, input4; \
2146          \
2147                                 input1 = in_row_plus_x[0]; \
2148                                 input2 = in_row_plus_x[1]; \
2149                                 input3 = in_row_plus_x[2]; \
2150                                 if(components == 4) \
2151                                         input4 = in_row_plus_x[3]; \
2152          \
2153                                 if(components == 3) \
2154                                 { \
2155                                         BLEND_3(max, temp_type, type, chroma_offset); \
2156                                 } \
2157                                 else \
2158                                 { \
2159                                         BLEND_4(max, temp_type, type, chroma_offset); \
2160                                 } \
2161                                 output += components; \
2162                         } \
2163                 } \
2164                 else \
2165 /* X direction is not scaled */ \
2166                 { \
2167                         for(int j = 0; j < out_w; j++) \
2168                         { \
2169                                 temp_type input1, input2, input3, input4; \
2170          \
2171                                 input1 = in_row[0]; \
2172                                 input2 = in_row[1]; \
2173                                 input3 = in_row[2]; \
2174                                 if(components == 4) \
2175                                         input4 = in_row[3]; \
2176          \
2177                                 if(components == 3) \
2178                                 { \
2179                                         BLEND_3(max, temp_type, type, chroma_offset); \
2180                                 } \
2181                                 else \
2182                                 { \
2183                                         BLEND_4(max, temp_type, type, chroma_offset); \
2184                                 } \
2185                                 in_row += components; \
2186                                 output += components; \
2187                         } \
2188                 } \
2189         } \
2194 ScaleTranslateUnit::ScaleTranslateUnit(ScaleTranslateEngine *server, OverlayFrame *overlay)
2195  : LoadClient(server)
2197         this->overlay = overlay;
2198         this->scale_translate = server;
2201 ScaleTranslateUnit::~ScaleTranslateUnit()
2205 void ScaleTranslateUnit::scale_array(int* &table, 
2206         int out_x1, 
2207         int out_x2,
2208         int in_x1,
2209         int in_x2,
2210         int is_x)
2212         float scale = (float)(out_x2 - out_x1) / (in_x2 - in_x1);
2214         table = new int[out_x2 - out_x1];
2215         
2216         if(!is_x)
2217         {
2218                 for(int i = 0; i < out_x2 - out_x1; i++)
2219                 {
2220                         table[i] = (int)((float)i / scale + in_x1);
2221                 }
2222         }
2223         else
2224         {       
2225                 for(int i = 0; i < out_x2 - out_x1; i++)
2226                 {
2227                         table[i] = (int)((float)i / scale);
2228                 }
2229         }
2233 void ScaleTranslateUnit::process_package(LoadPackage *package)
2235         ScaleTranslatePackage *pkg = (ScaleTranslatePackage*)package;
2237 // Args for NEAREST_NEIGHBOR_MACRO
2238         VFrame *output = scale_translate->output;
2239         VFrame *input = scale_translate->input;
2240         int in_x1 = scale_translate->in_x1;
2241         int in_y1 = scale_translate->in_y1;
2242         int in_x2 = scale_translate->in_x2;
2243         int in_y2 = scale_translate->in_y2;
2244         int out_x1 = scale_translate->out_x1;
2245         int out_y1 = scale_translate->out_y1;
2246         int out_x2 = scale_translate->out_x2;
2247         int out_y2 = scale_translate->out_y2;
2248         float alpha = scale_translate->alpha;
2249         int mode = scale_translate->mode;
2250         int out_w = out_x2 - out_x1;
2252         int *x_table;
2253         int *y_table;
2254         unsigned char **in_rows = input->get_rows();
2255         unsigned char **out_rows = output->get_rows();
2257 //      Timer a;
2258 //      a.update();
2259 //printf("ScaleTranslateUnit::process_package 1 %d\n", mode);
2260         if(out_w != in_x2 - in_x1)
2261         {
2262                 scale_array(x_table, 
2263                         out_x1, 
2264                         out_x2,
2265                         in_x1,
2266                         in_x2,
2267                         1);
2268         }
2269         scale_array(y_table, 
2270                 out_y1, 
2271                 out_y2,
2272                 in_y1,
2273                 in_y2,
2274                 0);
2277         if (mode == TRANSFER_REPLACE && (out_w == in_x2 - in_x1)) 
2278         {
2279 // if we have transfer replace and x direction is not scaled, PARTY!
2280                 char bytes_per_pixel = input->calculate_bytes_per_pixel(input->get_color_model());
2281                 int line_len = out_w * bytes_per_pixel;
2282                 int in_start_byte = in_x1 * bytes_per_pixel;
2283                 int out_start_byte = out_x1 * bytes_per_pixel;
2284                 for(int i = pkg->out_row1; i < pkg->out_row2; i++) 
2285                 {
2286                         memcpy (out_rows[i] + out_start_byte, 
2287                                 in_rows[y_table[i - out_y1]] + in_start_byte , 
2288                                 line_len);
2289                 }
2291         } 
2292         else
2293         switch(input->get_color_model())
2294         {
2295                 case BC_RGB888:
2296                         SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 3, 0);
2297                         break;
2299                 case BC_RGB_FLOAT:
2300                         SCALE_TRANSLATE(1.0, float, float, 3, 0);
2301                         break;
2303                 case BC_YUV888:
2304                         SCALE_TRANSLATE(0xff, int32_t, uint8_t, 3, 0x80);
2305                         break;
2307                 case BC_RGBA8888:
2308                         SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 4, 0);
2309                         break;
2311                 case BC_RGBA_FLOAT:
2312                         SCALE_TRANSLATE(1.0, float, float, 4, 0);
2313                         break;
2315                 case BC_YUVA8888:
2316                         SCALE_TRANSLATE(0xff, int32_t, uint8_t, 4, 0x80);
2317                         break;
2320                 case BC_RGB161616:
2321                         SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2322                         break;
2324                 case BC_YUV161616:
2325                         SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2326                         break;
2328                 case BC_RGBA16161616:
2329                         SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2330                         break;
2332                 case BC_YUVA16161616:
2333                         SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2334                         break;
2335         }
2336         
2337 //printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2338         if(out_x2 - out_x1 != in_x2 - in_x1)
2339                 delete [] x_table;
2340         delete [] y_table;
2352 ScaleTranslateEngine::ScaleTranslateEngine(OverlayFrame *overlay, int cpus)
2353  : LoadServer(cpus, cpus)
2355         this->overlay = overlay;
2358 ScaleTranslateEngine::~ScaleTranslateEngine()
2362 void ScaleTranslateEngine::init_packages()
2364         int out_h = out_y2 - out_y1;
2366         for(int i = 0; i < total_packages; i++)
2367         {
2368                 ScaleTranslatePackage *package = (ScaleTranslatePackage*)packages[i];
2369                 package->out_row1 = (int)(out_y1 + out_h / 
2370                         total_packages * 
2371                         i);
2372                 package->out_row2 = (int)((float)package->out_row1 + 
2373                         out_h / 
2374                         total_packages);
2375                 if(i >= total_packages - 1)
2376                         package->out_row2 = out_y2;
2377         }
2380 LoadClient* ScaleTranslateEngine::new_client()
2382         return new ScaleTranslateUnit(this, overlay);
2385 LoadPackage* ScaleTranslateEngine::new_package()
2387         return new ScaleTranslatePackage;
2391 ScaleTranslatePackage::ScaleTranslatePackage()
2422 #define BLEND_ONLY(temp_type, type, max, components, chroma_offset) \
2423 { \
2424         temp_type opacity; \
2425         if(sizeof(type) != 4) \
2426                 opacity = (temp_type)(alpha * max + 0.5); \
2427         else \
2428                 opacity = (temp_type)(alpha * max); \
2429         temp_type transparency = max - opacity; \
2431         type** output_rows = (type**)output->get_rows(); \
2432         type** input_rows = (type**)input->get_rows(); \
2433         int w = input->get_w(); \
2434         int h = input->get_h(); \
2436         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2437         { \
2438                 type* in_row = input_rows[i]; \
2439                 type* output = output_rows[i]; \
2441                 for(int j = 0; j < w; j++) \
2442                 { \
2443                         temp_type input1, input2, input3, input4; \
2444                         input1 = in_row[0]; \
2445                         input2 = in_row[1]; \
2446                         input3 = in_row[2]; \
2447                         if(components == 4) input4 = in_row[3]; \
2450                         if(components == 3) \
2451                         { \
2452                                 BLEND_3(max, temp_type, type, chroma_offset); \
2453                         } \
2454                         else \
2455                         { \
2456                                 BLEND_4(max, temp_type, type, chroma_offset); \
2457                         } \
2459                         in_row += components; \
2460                         output += components; \
2461                 } \
2462         } \
2466 #define BLEND_ONLY_TRANSFER_REPLACE(type, components) \
2467 { \
2469         type** output_rows = (type**)output->get_rows(); \
2470         type** input_rows = (type**)input->get_rows(); \
2471         int w = input->get_w(); \
2472         int h = input->get_h(); \
2473         int line_len = w * sizeof(type) * components; \
2475         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2476         { \
2477                 memcpy(output_rows[i], input_rows[i], line_len); \
2478         } \
2481 // components is always 4
2482 #define BLEND_ONLY_4_NORMAL(temp_type, type, max, chroma_offset) \
2483 { \
2484         temp_type opacity = (temp_type)(alpha * max + 0.5); \
2485         temp_type transparency = max - opacity; \
2486         temp_type max_squared = ((temp_type)max) * max; \
2488         type** output_rows = (type**)output->get_rows(); \
2489         type** input_rows = (type**)input->get_rows(); \
2490         int w = input->get_w(); \
2491         int h = input->get_h(); \
2493         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2494         { \
2495                 type* in_row = input_rows[i]; \
2496                 type* output = output_rows[i]; \
2498                 for(int j = 0; j < w; j++) \
2499                 { \
2500                         temp_type pixel_opacity, pixel_transparency; \
2501                         pixel_opacity = opacity * in_row[3]; \
2502                         pixel_transparency = (temp_type)max_squared - pixel_opacity; \
2503                  \
2504                  \
2505                         temp_type r,g,b; \
2506                         output[0] = ((temp_type)in_row[0] * pixel_opacity + \
2507                                 (temp_type)output[0] * pixel_transparency) / max / max; \
2508                         output[1] = (((temp_type)in_row[1] - chroma_offset) * pixel_opacity + \
2509                                 ((temp_type)output[1] - chroma_offset) * pixel_transparency) \
2510                                 / max / max + \
2511                                 chroma_offset; \
2512                         output[2] = (((temp_type)in_row[2] - chroma_offset) * pixel_opacity + \
2513                                 ((temp_type)output[2] - chroma_offset) * pixel_transparency) \
2514                                 / max / max + \
2515                                 chroma_offset; \
2516                         output[3] = (type)(in_row[3] > output[3] ? in_row[3] : output[3]); \
2518                         in_row += 4; \
2519                         output += 4; \
2520                 } \
2521         } \
2526 // components is always 3
2527 #define BLEND_ONLY_3_NORMAL(temp_type, type, max, chroma_offset) \
2528 { \
2529         const int bits = sizeof(type) * 8; \
2530         temp_type opacity = (temp_type)(alpha * ((temp_type)1 << bits) + 0.5); \
2531         temp_type transparency = ((temp_type)1 << bits) - opacity; \
2533         type** output_rows = (type**)output->get_rows(); \
2534         type** input_rows = (type**)input->get_rows(); \
2535         int w = input->get_w() * 3; \
2536         int h = input->get_h(); \
2538         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2539         { \
2540                 type* in_row = input_rows[i]; \
2541                 type* output = output_rows[i]; \
2543                 for(int j = 0; j < w; j++) /* w = 3x width! */ \
2544                 { \
2545                         *output = ((temp_type)*in_row * opacity + *output * transparency) >> bits; \
2546                         in_row ++; \
2547                         output ++; \
2548                 } \
2549         } \
2554 BlendUnit::BlendUnit(BlendEngine *server, OverlayFrame *overlay)
2555  : LoadClient(server)
2557         this->overlay = overlay;
2558         this->blend_engine = server;
2561 BlendUnit::~BlendUnit()
2565 void BlendUnit::process_package(LoadPackage *package)
2567         BlendPackage *pkg = (BlendPackage*)package;
2570         VFrame *output = blend_engine->output;
2571         VFrame *input = blend_engine->input;
2572         float alpha = blend_engine->alpha;
2573         int mode = blend_engine->mode;
2575         if (mode == TRANSFER_REPLACE) 
2576         {
2577                 switch(input->get_color_model())
2578                 {
2579                         case BC_RGB_FLOAT:
2580                                 BLEND_ONLY_TRANSFER_REPLACE(float, 3);
2581                                 break;
2582                         case BC_RGBA_FLOAT:
2583                                 BLEND_ONLY_TRANSFER_REPLACE(float, 4);
2584                                 break;
2585                         case BC_RGB888:
2586                         case BC_YUV888:
2587                                 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 3);
2588                                 break;
2589                         case BC_RGBA8888:
2590                         case BC_YUVA8888:
2591                                 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 4);
2592                                 break;
2593                         case BC_RGB161616:
2594                         case BC_YUV161616:
2595                                 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 3);
2596                                 break;
2597                         case BC_RGBA16161616:
2598                         case BC_YUVA16161616:
2599                                 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 4);
2600                                 break;
2601                 }
2602         } 
2603         else
2604         if (mode == TRANSFER_NORMAL) 
2605         {
2606                 switch(input->get_color_model())
2607                 {
2608                         case BC_RGB_FLOAT:
2609                         {
2610                                 float opacity = alpha;
2611                                 float transparency = 1.0 - alpha;
2613                                 float** output_rows = (float**)output->get_rows();
2614                                 float** input_rows = (float**)input->get_rows();
2615                                 int w = input->get_w() * 3;
2616                                 int h = input->get_h();
2618                                 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2619                                 {
2620                                         float* in_row = input_rows[i];
2621                                         float* output = output_rows[i];
2622 /* w = 3x width! */
2623                                         for(int j = 0; j < w; j++) 
2624                                         {
2625                                                 *output = *in_row * opacity + *output * transparency;
2626                                                 in_row++;
2627                                                 output++;
2628                                         }
2629                                 }
2630                                 break;
2631                         }
2632                         case BC_RGBA_FLOAT:
2633                         {
2634                                 float opacity = alpha;
2635                                 float transparency = 1.0 - alpha;
2636                         
2637                                 float** output_rows = (float**)output->get_rows();
2638                                 float** input_rows = (float**)input->get_rows();
2639                                 int w = input->get_w();
2640                                 int h = input->get_h();
2641                         
2642                                 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2643                                 {
2644                                         float* in_row = input_rows[i];
2645                                         float* output = output_rows[i];
2646                         
2647                                         for(int j = 0; j < w; j++)
2648                                         {
2649                                                 float pixel_opacity, pixel_transparency;
2650                                                 pixel_opacity = opacity * in_row[3];
2651                                                 pixel_transparency = 1.0 - pixel_opacity;
2652                                         
2653                                         
2654                                                 output[0] = in_row[0] * pixel_opacity +
2655                                                         output[0] * pixel_transparency;
2656                                                 output[1] = in_row[1] * pixel_opacity +
2657                                                         output[1] * pixel_transparency;
2658                                                 output[2] = in_row[2] * pixel_opacity +
2659                                                         output[2] * pixel_transparency;
2660                                                 output[3] = in_row[3] > output[3] ? in_row[3] : output[3];
2662                                                 in_row += 4;
2663                                                 output += 4;
2664                                         }
2665                                 }
2666                                 break;
2667                         }
2668                         case BC_RGB888:
2669                                 BLEND_ONLY_3_NORMAL(uint32_t, unsigned char, 0xff, 0);
2670                                 break;
2671                         case BC_YUV888:
2672                                 BLEND_ONLY_3_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2673                                 break;
2674                         case BC_RGBA8888:
2675                                 BLEND_ONLY_4_NORMAL(uint32_t, unsigned char, 0xff, 0);
2676                                 break;
2677                         case BC_YUVA8888:
2678                                 BLEND_ONLY_4_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2679                                 break;
2680                         case BC_RGB161616:
2681                                 BLEND_ONLY_3_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2682                                 break;
2683                         case BC_YUV161616:
2684                                 BLEND_ONLY_3_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2685                                 break;
2686                         case BC_RGBA16161616:
2687                                 BLEND_ONLY_4_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2688                                 break;
2689                         case BC_YUVA16161616:
2690                                 BLEND_ONLY_4_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2691                                 break;
2692                 }
2693         }
2694         else
2695         switch(input->get_color_model())
2696         {
2697                 case BC_RGB_FLOAT:
2698                         BLEND_ONLY(float, float, 1.0, 3, 0);
2699                         break;
2700                 case BC_RGBA_FLOAT:
2701                         BLEND_ONLY(float, float, 1.0, 4, 0);
2702                         break;
2703                 case BC_RGB888:
2704                         BLEND_ONLY(uint32_t, unsigned char, 0xff, 3, 0);
2705                         break;
2706                 case BC_YUV888:
2707                         BLEND_ONLY(int32_t, unsigned char, 0xff, 3, 0x80);
2708                         break;
2709                 case BC_RGBA8888:
2710                         BLEND_ONLY(uint32_t, unsigned char, 0xff, 4, 0);
2711                         break;
2712                 case BC_YUVA8888:
2713                         BLEND_ONLY(int32_t, unsigned char, 0xff, 4, 0x80);
2714                         break;
2715                 case BC_RGB161616:
2716                         BLEND_ONLY(uint64_t, uint16_t, 0xffff, 3, 0);
2717                         break;
2718                 case BC_YUV161616:
2719                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 3, 0x8000);
2720                         break;
2721                 case BC_RGBA16161616:
2722                         BLEND_ONLY(uint64_t, uint16_t, 0xffff, 4, 0);
2723                         break;
2724                 case BC_YUVA16161616:
2725                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 4, 0x8000);
2726                         break;
2727         }
2732 BlendEngine::BlendEngine(OverlayFrame *overlay, int cpus)
2733  : LoadServer(cpus, cpus)
2735         this->overlay = overlay;
2738 BlendEngine::~BlendEngine()
2742 void BlendEngine::init_packages()
2744         for(int i = 0; i < total_packages; i++)
2745         {
2746                 BlendPackage *package = (BlendPackage*)packages[i];
2747                 package->out_row1 = (int)(input->get_h() / 
2748                         total_packages * 
2749                         i);
2750                 package->out_row2 = (int)((float)package->out_row1 +
2751                         input->get_h() / 
2752                         total_packages);
2754                 if(i >= total_packages - 1)
2755                         package->out_row2 = input->get_h();
2756         }
2759 LoadClient* BlendEngine::new_client()
2761         return new BlendUnit(this, overlay);
2764 LoadPackage* BlendEngine::new_package()
2766         return new BlendPackage;
2770 BlendPackage::BlendPackage()