cinelerra/overlayframe.C

   1 #include <math.h>
   2 #include <stdio.h>
   3 #include <string.h>
   4 #include <stdint.h>
   5 #include <stdlib.h>
   6 #include <unistd.h>
   7
   8 #include "clip.h"
   9 #include "edl.inc"
  10 #include "mutex.h"
  11 #include "overlayframe.h"
  12 #include "units.h"
  13 #include "vframe.h"
  14
  15 // Easy abstraction of the float and int types.  Most of these are never used
  16 // but GCC expects them.
  17 static int my_abs(int32_t x)
  18 {
  19         return abs(x);
  20 }
  21
  22 static int my_abs(uint32_t x)
  23 {
  24         return x;
  25 }
  26
  27 static int my_abs(int64_t x)
  28 {
  29         return llabs(x);
  30 }
  31
  32 static int my_abs(uint64_t x)
  33 {
  34         return x;
  35 }
  36
  37 static float my_abs(float x)
  38 {
  39         return fabsf(x);
  40 }
  41
  42
  43
  44
  45 OverlayFrame::OverlayFrame(int cpus)
  46 {
  47         temp_frame = 0;
  48         blend_engine = 0;
  49         scale_engine = 0;
  50         scaletranslate_engine = 0;
  51         translate_engine = 0;
  52         this->cpus = cpus;
  53 }
  54
  55 OverlayFrame::~OverlayFrame()
  56 {
  57         if(temp_frame) delete temp_frame;
  58         if(scale_engine) delete scale_engine;
  59         if(translate_engine) delete translate_engine;
  60         if(blend_engine) delete blend_engine;
  61         if(scaletranslate_engine) delete scaletranslate_engine;
  62 }
  63
  64
  65
  66
  67
  68
  69
  70
  71 // Verification:
  72
  73 // (255 * 255 + 0 * 0) / 255 = 255
  74 // (255 * 127 + 255 * (255 - 127)) / 255 = 255
  75
  76 // (65535 * 65535 + 0 * 0) / 65535 = 65535
  77 // (65535 * 32767 + 65535 * (65535 - 32767)) / 65535 = 65535
  78
  79
  80 // Branch prediction 4 U
  81
  82 #define BLEND_3(max, temp_type, type, chroma_offset) \
  83 { \
  84         temp_type r, g, b; \
  85  \
  86 /* if(mode != TRANSFER_NORMAL) printf("BLEND mode = %d\n", mode); */ \
  87         switch(mode) \
  88         { \
  89                 case TRANSFER_DIVIDE: \
  90                         r = output[0] ? (((temp_type)input1 * max) / output[0]) : max; \
  91                         if(chroma_offset) \
  92                         { \
  93                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
  94                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
  95                         } \
  96                         else \
  97                         { \
  98                                 g = output[1] ? (temp_type)input2 * max / (temp_type)output[1] : max; \
  99                                 b = output[2] ? (temp_type)input3 * max / (temp_type)output[2] : max; \
 100                         } \
 101                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
 102                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
 103                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
 104                         break; \
 105                 case TRANSFER_MULTIPLY: \
 106                         r = ((temp_type)input1 * output[0]) / max; \
 107                         if(chroma_offset) \
 108                         { \
 109                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
 110                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
 111                         } \
 112                         else \
 113                         { \
 114                                 g = (temp_type)input2 * (temp_type)output[1] / max; \
 115                                 b = (temp_type)input3 * (temp_type)output[2] / max; \
 116                         } \
 117                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
 118                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
 119                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
 120                         break; \
 121                 case TRANSFER_SUBTRACT: \
 122                         r = (temp_type)output[0] - (temp_type)input1; \
 123                         g = ((temp_type)output[1] - (temp_type)chroma_offset) - \
 124                                 ((temp_type)input2 - (temp_type)chroma_offset) + \
 125                                 (temp_type)chroma_offset; \
 126                         b = ((temp_type)output[2] - (temp_type)chroma_offset) - \
 127                                 ((temp_type)input3 - (temp_type)chroma_offset) + \
 128                                 (temp_type)chroma_offset; \
 129                         r = (r * opacity + output[0] * transparency) / max; \
 130                         g = (g * opacity + output[1] * transparency) / max; \
 131                         b = (b * opacity + output[2] * transparency) / max; \
 132                         break; \
 133                 case TRANSFER_ADDITION: \
 134                         r = (temp_type)input1 + output[0]; \
 135                         g = ((temp_type)input2 - chroma_offset) + \
 136                                 ((temp_type)output[1] - chroma_offset) + \
 137                                 (temp_type)chroma_offset; \
 138                         b = ((temp_type)input3 - chroma_offset) + \
 139                                 ((temp_type)output[2] - chroma_offset) + \
 140                                 (temp_type)chroma_offset; \
 141                         r = (r * opacity + output[0] * transparency) / max; \
 142                         g = (g * opacity + output[1] * transparency) / max; \
 143                         b = (b * opacity + output[2] * transparency) / max; \
 144                         break; \
 145                 case TRANSFER_REPLACE: \
 146                         r = input1; \
 147                         g = input2; \
 148                         b = input3; \
 149                         break; \
 150                 case TRANSFER_NORMAL: \
 151                         r = ((temp_type)input1 * opacity + output[0] * transparency) / max; \
 152                         g = ((temp_type)input2 * opacity + output[1] * transparency) / max; \
 153                         b = ((temp_type)input3 * opacity + output[2] * transparency) / max; \
 154                         break; \
 155         } \
 156  \
 157         if(sizeof(type) != 4) \
 158         { \
 159                 output[0] = (type)CLIP(r, 0, max); \
 160                 output[1] = (type)CLIP(g, 0, max); \
 161                 output[2] = (type)CLIP(b, 0, max); \
 162         } \
 163         else \
 164         { \
 165                 output[0] = r; \
 166                 output[1] = g; \
 167                 output[2] = b; \
 168         } \
 169 }
 170
 171
 172
 173
 174
 175 // Blending equations are drastically different for 3 and 4 components
 176 #define BLEND_4(max, temp_type, type, chroma_offset) \
 177 { \
 178         temp_type r, g, b, a; \
 179         temp_type pixel_opacity, pixel_transparency; \
 180         temp_type output1 = output[0]; \
 181         temp_type output2 = output[1]; \
 182         temp_type output3 = output[2]; \
 183         temp_type output4 = output[3]; \
 184  \
 185         pixel_opacity = opacity * input4; \
 186         pixel_transparency = (temp_type)max * max - pixel_opacity; \
 187  \
 188         switch(mode) \
 189         { \
 190                 case TRANSFER_DIVIDE: \
 191                         r = output1 ? (((temp_type)input1 * max) / output1) : max; \
 192                         if(chroma_offset) \
 193                         { \
 194                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
 195                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
 196                         } \
 197                         else \
 198                         { \
 199                                 g = output2 ? (temp_type)input2 * max / (temp_type)output2 : max; \
 200                                 b = output3 ? (temp_type)input3 * max / (temp_type)output3 : max; \
 201                         } \
 202                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
 203                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
 204                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
 205                         a = input4 > output4 ? input4 : output4; \
 206                         break; \
 207                 case TRANSFER_MULTIPLY: \
 208                         r = ((temp_type)input1 * output1) / max; \
 209                         if(chroma_offset) \
 210                         { \
 211                                 g = my_abs((temp_type)input2 - chroma_offset) > my_abs((temp_type)output2 - chroma_offset) ? input2 : output2; \
 212                                 b = my_abs((temp_type)input3 - chroma_offset) > my_abs((temp_type)output3 - chroma_offset) ? input3 : output3; \
 213                         } \
 214                         else \
 215                         { \
 216                                 g = (temp_type)input2 * (temp_type)output2 / max; \
 217                                 b = (temp_type)input3 * (temp_type)output3 / max; \
 218                         } \
 219                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
 220                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
 221                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
 222                         a = input4 > output4 ? input4 : output4; \
 223                         break; \
 224                 case TRANSFER_SUBTRACT: \
 225                         r = (temp_type)input1 - output1; \
 226                         g = ((temp_type)output2 - chroma_offset) - \
 227                                 ((temp_type)input2 - (temp_type)chroma_offset) + \
 228                                 (temp_type)chroma_offset; \
 229                         b = ((temp_type)output3 - chroma_offset) - \
 230                                 ((temp_type)input3 - (temp_type)chroma_offset) + \
 231                                 (temp_type)chroma_offset; \
 232                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
 233                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
 234                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
 235                         a = input4 > output4 ? input4 : output4; \
 236                         break; \
 237                 case TRANSFER_ADDITION: \
 238                         r = (temp_type)input1 + output1; \
 239                         g = ((temp_type)input2 - chroma_offset) + \
 240                                 ((temp_type)output2 - chroma_offset) + \
 241                                 chroma_offset; \
 242                         b = ((temp_type)input3 - chroma_offset) + \
 243                                 ((temp_type)output3 - chroma_offset) + \
 244                                 chroma_offset; \
 245                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
 246                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
 247                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
 248                         a = input4 > output4 ? input4 : output4; \
 249                         break; \
 250                 case TRANSFER_REPLACE: \
 251                         r = input1; \
 252                         g = input2; \
 253                         b = input3; \
 254                         a = input4; \
 255                         break; \
 256                 case TRANSFER_NORMAL: \
 257                         r = (input1 * pixel_opacity + \
 258                                 output1 * pixel_transparency) / max / max; \
 259                         g = ((input2 - chroma_offset) * pixel_opacity + \
 260                                 (output2 - chroma_offset) * pixel_transparency) \
 261                                 / max / max + \
 262                                 chroma_offset; \
 263                         b = ((input3 - chroma_offset) * pixel_opacity + \
 264                                 (output3 - chroma_offset) * pixel_transparency) \
 265                                 / max / max + \
 266                                 chroma_offset; \
 267                         a = input4 > output4 ? input4 : output4; \
 268                         break; \
 269         } \
 270  \
 271         if(sizeof(type) != 4) \
 272         { \
 273                 output[0] = (type)CLIP(r, 0, max); \
 274                 output[1] = (type)CLIP(g, 0, max); \
 275                 output[2] = (type)CLIP(b, 0, max); \
 276                 output[3] = (type)a; \
 277         } \
 278         else \
 279         { \
 280                 output[0] = r; \
 281                 output[1] = g; \
 282                 output[2] = b; \
 283                 output[3] = a; \
 284         } \
 285 }
 286
 287
 288
 289 // Bicubic algorithm using multiprocessors
 290 // input -> scale nearest integer boundaries -> temp -> translation -> blend -> output
 291
 292 // Nearest neighbor algorithm using multiprocessors for blending
 293 // input -> scale + translate -> blend -> output
 294
 295
 296 int OverlayFrame::overlay(VFrame *output,
 297         VFrame *input,
 298         float in_x1,
 299         float in_y1,
 300         float in_x2,
 301         float in_y2,
 302         float out_x1,
 303         float out_y1,
 304         float out_x2,
 305         float out_y2,
 306         float alpha,       // 0 - 1
 307         int mode,
 308         int interpolation_type)
 309 {
 310         float w_scale = (out_x2 - out_x1) / (in_x2 - in_x1);
 311         float h_scale = (out_y2 - out_y1) / (in_y2 - in_y1);
 312
 313
 314
 315
 316
 317
 318
 319
 320         if(isnan(in_x1) ||
 321                 isnan(in_y1) ||
 322                 isnan(in_x2) ||
 323                 isnan(in_y2) ||
 324                 isnan(out_x1) ||
 325                 isnan(out_y1) ||
 326                 isnan(out_x2) ||
 327                 isnan(out_y2)) return 1;
 328 // printf("OverlayFrame::overlay 1 %f %f %f %f -> %f %f %f %f scale=%f %f\n", in_x1,
 329 // in_y1,
 330 // in_x2,
 331 // in_y2,
 332 // out_x1,
 333 // out_y1,
 334 // out_x2,
 335 // out_y2,
 336 // out_x2 - out_x1,
 337 // out_y2 - out_y1);
 338
 339 // Limit values
 340         if(in_x1 < 0)
 341         {
 342                 out_x1 += -in_x1 * w_scale;
 343                 in_x1 = 0;
 344         }
 345         else
 346         if(in_x1 >= input->get_w())
 347         {
 348                 out_x1 -= (in_x1 - input->get_w()) * w_scale;
 349                 in_x1 = input->get_w();
 350         }
 351
 352         if(in_y1 < 0)
 353         {
 354                 out_y1 += -in_y1 * h_scale;
 355                 in_y1 = 0;
 356         }
 357         else
 358         if(in_y1 >= input->get_h())
 359         {
 360                 out_y1 -= (in_y1 - input->get_h()) * h_scale;
 361                 in_y1 = input->get_h();
 362         }
 363
 364         if(in_x2 < 0)
 365         {
 366                 out_x2 += -in_x2 * w_scale;
 367                 in_x2 = 0;
 368         }
 369         else
 370         if(in_x2 >= input->get_w())
 371         {
 372                 out_x2 -= (in_x2 - input->get_w()) * w_scale;
 373                 in_x2 = input->get_w();
 374         }
 375
 376         if(in_y2 < 0)
 377         {
 378                 out_y2 += -in_y2 * h_scale;
 379                 in_y2 = 0;
 380         }
 381         else
 382         if(in_y2 >= input->get_h())
 383         {
 384                 out_y2 -= (in_y2 - input->get_h()) * h_scale;
 385                 in_y2 = input->get_h();
 386         }
 387
 388         if(out_x1 < 0)
 389         {
 390                 in_x1 += -out_x1 / w_scale;
 391                 out_x1 = 0;
 392         }
 393         else
 394         if(out_x1 >= output->get_w())
 395         {
 396                 in_x1 -= (out_x1 - output->get_w()) / w_scale;
 397                 out_x1 = output->get_w();
 398         }
 399
 400         if(out_y1 < 0)
 401         {
 402                 in_y1 += -out_y1 / h_scale;
 403                 out_y1 = 0;
 404         }
 405         else
 406         if(out_y1 >= output->get_h())
 407         {
 408                 in_y1 -= (out_y1 - output->get_h()) / h_scale;
 409                 out_y1 = output->get_h();
 410         }
 411
 412         if(out_x2 < 0)
 413         {
 414                 in_x2 += -out_x2 / w_scale;
 415                 out_x2 = 0;
 416         }
 417         else
 418         if(out_x2 >= output->get_w())
 419         {
 420                 in_x2 -= (out_x2 - output->get_w()) / w_scale;
 421                 out_x2 = output->get_w();
 422         }
 423
 424         if(out_y2 < 0)
 425         {
 426                 in_y2 += -out_y2 / h_scale;
 427                 out_y2 = 0;
 428         }
 429         else
 430         if(out_y2 >= output->get_h())
 431         {
 432                 in_y2 -= (out_y2 - output->get_h()) / h_scale;
 433                 out_y2 = output->get_h();
 434         }
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445         float in_w = in_x2 - in_x1;
 446         float in_h = in_y2 - in_y1;
 447         float out_w = out_x2 - out_x1;
 448         float out_h = out_y2 - out_y1;
 449 // Input for translation operation
 450         VFrame *translation_input = input;
 451
 452
 453         if(in_w <= 0 || in_h <= 0 || out_w <= 0 || out_h <= 0) return 0;
 454
 455
 456 // printf("OverlayFrame::overlay 2 %f %f %f %f -> %f %f %f %f\n", in_x1,
 457 //                      in_y1,
 458 //                      in_x2,
 459 //                      in_y2,
 460 //                      out_x1,
 461 //                      out_y1,
 462 //                      out_x2,
 463 //                      out_y2);
 464
 465
 466
 467
 468
 469 // ****************************************************************************
 470 // Transfer to temp buffer by scaling nearest integer boundaries
 471 // ****************************************************************************
 472         if(interpolation_type != NEAREST_NEIGHBOR &&
 473                 (!EQUIV(w_scale, 1) || !EQUIV(h_scale, 1)))
 474         {
 475 // Create integer boundaries for interpolation
 476                 int in_x1_int = (int)in_x1;
 477                 int in_y1_int = (int)in_y1;
 478                 int in_x2_int = MIN((int)ceil(in_x2), input->get_w());
 479                 int in_y2_int = MIN((int)ceil(in_y2), input->get_h());
 480                 int out_x1_int = (int)out_x1;
 481                 int out_y1_int = (int)out_y1;
 482                 int out_x2_int = MIN((int)ceil(out_x2), output->get_w());
 483                 int out_y2_int = MIN((int)ceil(out_y2), output->get_h());
 484
 485 // Dimensions of temp frame.  Integer boundaries scaled.
 486                 int temp_w = (out_x2_int - out_x1_int);
 487                 int temp_h = (out_y2_int - out_y1_int);
 488                 VFrame *scale_output;
 489
 490
 491
 492 #define NO_TRANSLATION1 \
 493         (EQUIV(in_x1, 0) && \
 494         EQUIV(in_y1, 0) && \
 495         EQUIV(out_x1, 0) && \
 496         EQUIV(out_y1, 0) && \
 497         EQUIV(in_x2, in_x2_int) && \
 498         EQUIV(in_y2, in_y2_int) && \
 499         EQUIV(out_x2, temp_w) && \
 500         EQUIV(out_y2, temp_h))
 501
 502
 503 #define NO_BLEND \
 504         (EQUIV(alpha, 1) && \
 505         (mode == TRANSFER_REPLACE || \
 506         (mode == TRANSFER_NORMAL && cmodel_components(input->get_color_model()) == 3)))
 507
 508
 509
 510
 511
 512 // Prepare destination for operation
 513
 514 // No translation and no blending.  The blending operation is built into the
 515 // translation unit but not the scaling unit.
 516 // input -> output
 517                 if(NO_TRANSLATION1 &&
 518                         NO_BLEND)
 519                 {
 520 // printf("OverlayFrame::overlay input -> output\n");
 521
 522                         scale_output = output;
 523                         translation_input = 0;
 524                 }
 525                 else
 526 // If translation or blending
 527 // input -> nearest integer boundary temp
 528                 {
 529                         if(temp_frame &&
 530                                 (temp_frame->get_w() != temp_w ||
 531                                         temp_frame->get_h() != temp_h))
 532                         {
 533                                 delete temp_frame;
 534                                 temp_frame = 0;
 535                         }
 536
 537                         if(!temp_frame)
 538                         {
 539                                 temp_frame = new VFrame(0,
 540                                         temp_w,
 541                                         temp_h,
 542                                         input->get_color_model(),
 543                                         -1);
 544                         }
 545 //printf("OverlayFrame::overlay input -> temp\n");
 546
 547
 548                         temp_frame->clear_frame();
 549
 550 // printf("OverlayFrame::overlay 4 temp_w=%d temp_h=%d\n",
 551 //      temp_w, temp_h);
 552                         scale_output = temp_frame;
 553                         translation_input = scale_output;
 554
 555 // Adjust input coordinates to reflect new scaled coordinates.
 556                         in_x1 = 0;
 557                         in_y1 = 0;
 558                         in_x2 = temp_w;
 559                         in_y2 = temp_h;
 560                 }
 561
 562
 563
 564 //printf("Overlay 1\n");
 565
 566 // Scale input -> scale_output
 567                 if(!scale_engine) scale_engine = new ScaleEngine(this, cpus);
 568                 scale_engine->scale_output = scale_output;
 569                 scale_engine->scale_input = input;
 570                 scale_engine->w_scale = w_scale;
 571                 scale_engine->h_scale = h_scale;
 572                 scale_engine->in_x1_int = in_x1_int;
 573                 scale_engine->in_y1_int = in_y1_int;
 574                 scale_engine->out_w_int = temp_w;
 575                 scale_engine->out_h_int = temp_h;
 576                 scale_engine->interpolation_type = interpolation_type;
 577 //printf("Overlay 2\n");
 578
 579 //printf("OverlayFrame::overlay ScaleEngine 1 %d\n", out_h_int);
 580                 scale_engine->process_packages();
 581 //printf("OverlayFrame::overlay ScaleEngine 2\n");
 582
 583
 584
 585         }
 586
 587 // printf("OverlayFrame::overlay 1  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
 588 //      in_x1,
 589 //      in_y1,
 590 //      in_x2,
 591 //      in_y2,
 592 //      out_x1,
 593 //      out_y1,
 594 //      out_x2,
 595 //      out_y2);
 596
 597
 598
 599
 600
 601 #define NO_TRANSLATION2 \
 602         (EQUIV(in_x1, 0) && \
 603         EQUIV(in_y1, 0) && \
 604         EQUIV(in_x2, translation_input->get_w()) && \
 605         EQUIV(in_y2, translation_input->get_h()) && \
 606         EQUIV(out_x1, 0) && \
 607         EQUIV(out_y1, 0) && \
 608         EQUIV(out_x2, output->get_w()) && \
 609         EQUIV(out_y2, output->get_h())) \
 610
 611 #define NO_SCALE \
 612         (EQUIV(out_x2 - out_x1, in_x2 - in_x1) && \
 613         EQUIV(out_y2 - out_y1, in_y2 - in_y1))
 614
 615
 616
 617
 618 //printf("OverlayFrame::overlay 4 %d\n", mode);
 619
 620
 621
 622
 623         if(translation_input)
 624         {
 625 // Direct copy
 626                 if( NO_TRANSLATION2 &&
 627                         NO_SCALE &&
 628                         NO_BLEND)
 629                 {
 630 //printf("OverlayFrame::overlay direct copy\n");
 631                         output->copy_from(translation_input);
 632                 }
 633                 else
 634 // Blend only
 635                 if( NO_TRANSLATION2 &&
 636                         NO_SCALE)
 637                 {
 638                         if(!blend_engine) blend_engine = new BlendEngine(this, cpus);
 639
 640
 641                         blend_engine->output = output;
 642                         blend_engine->input = translation_input;
 643                         blend_engine->alpha = alpha;
 644                         blend_engine->mode = mode;
 645
 646                         blend_engine->process_packages();
 647                 }
 648                 else
 649 // Scale and translate using nearest neighbor
 650 // Translation is exactly on integer boundaries
 651                 if(interpolation_type == NEAREST_NEIGHBOR ||
 652                         EQUIV(in_x1, (int)in_x1) &&
 653                         EQUIV(in_y1, (int)in_y1) &&
 654                         EQUIV(in_x2, (int)in_x2) &&
 655                         EQUIV(in_y2, (int)in_y2) &&
 656
 657                         EQUIV(out_x1, (int)out_x1) &&
 658                         EQUIV(out_y1, (int)out_y1) &&
 659                         EQUIV(out_x2, (int)out_x2) &&
 660                         EQUIV(out_y2, (int)out_y2))
 661                 {
 662 //printf("OverlayFrame::overlay NEAREST_NEIGHBOR 1\n");
 663                         if(!scaletranslate_engine) scaletranslate_engine =
 664                                 new ScaleTranslateEngine(this, cpus);
 665
 666
 667                         scaletranslate_engine->output = output;
 668                         scaletranslate_engine->input = translation_input;
 669                         scaletranslate_engine->in_x1 = (int)in_x1;
 670                         scaletranslate_engine->in_y1 = (int)in_y1;
 671 // we need to do this mumbo-jumbo in order to get numerical stability
 672 // other option would be to round all the coordinates
 673                         scaletranslate_engine->in_x2 = (int)in_x1 + (int)(in_x2 - in_x1);
 674                         scaletranslate_engine->in_y2 = (int)in_y1 + (int)(in_y2 - in_y1);
 675                         scaletranslate_engine->out_x1 = (int)out_x1;
 676                         scaletranslate_engine->out_y1 = (int)out_y1;
 677                         scaletranslate_engine->out_x2 = (int)out_x1 + (int)(out_x2 - out_x1);
 678                         scaletranslate_engine->out_y2 = (int)out_y1 + (int)(out_y2 - out_y1);
 679                         scaletranslate_engine->alpha = alpha;
 680                         scaletranslate_engine->mode = mode;
 681
 682                         scaletranslate_engine->process_packages();
 683                 }
 684                 else
 685 // Fractional translation
 686                 {
 687 // Use fractional translation
 688 // printf("OverlayFrame::overlay temp -> output  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
 689 //      in_x1,
 690 //      in_y1,
 691 //      in_x2,
 692 //      in_y2,
 693 //      out_x1,
 694 //      out_y1,
 695 //      out_x2,
 696 //      out_y2);
 697
 698 //printf("Overlay 3\n");
 699                         if(!translate_engine) translate_engine = new TranslateEngine(this, cpus);
 700                         translate_engine->translate_output = output;
 701                         translate_engine->translate_input = translation_input;
 702                         translate_engine->translate_in_x1 = in_x1;
 703                         translate_engine->translate_in_y1 = in_y1;
 704                         translate_engine->translate_in_x2 = in_x2;
 705                         translate_engine->translate_in_y2 = in_y2;
 706                         translate_engine->translate_out_x1 = out_x1;
 707                         translate_engine->translate_out_y1 = out_y1;
 708                         translate_engine->translate_out_x2 = out_x2;
 709                         translate_engine->translate_out_y2 = out_y2;
 710                         translate_engine->translate_alpha = alpha;
 711                         translate_engine->translate_mode = mode;
 712 //printf("Overlay 4\n");
 713
 714 //printf("OverlayFrame::overlay 5 %d\n", mode);
 715                         translate_engine->process_packages();
 716
 717                 }
 718         }
 719 //printf("OverlayFrame::overlay 2\n");
 720
 721         return 0;
 722 }
 723
 724
 725
 726
 727
 728
 729
 730 ScalePackage::ScalePackage()
 731 {
 732 }
 733
 734
 735
 736
 737 ScaleUnit::ScaleUnit(ScaleEngine *server, OverlayFrame *overlay)
 738  : LoadClient(server)
 739 {
 740         this->overlay = overlay;
 741         this->engine = server;
 742 }
 743
 744 ScaleUnit::~ScaleUnit()
 745 {
 746 }
 747
 748
 749
 750 void ScaleUnit::tabulate_reduction(bilinear_table_t* &table,
 751         float scale,
 752         int in_pixel1,
 753         int out_total,
 754         int in_total)
 755 {
 756         table = new bilinear_table_t[out_total];
 757         bzero(table, sizeof(bilinear_table_t) * out_total);
 758 //printf("ScaleUnit::tabulate_reduction 1 %f %d %d %d\n", scale, in_pixel1, out_total, in_total);
 759         for(int i = 0; i < out_total; i++)
 760         {
 761                 float out_start = i;
 762                 float in_start = out_start * scale;
 763                 float out_end = i + 1;
 764                 float in_end = out_end * scale;
 765                 bilinear_table_t *entry = table + i;
 766 //printf("ScaleUnit::tabulate_reduction 1 %f %f %f %f\n", out_start, out_end, in_start, in_end);
 767
 768 // Store input fraction.  Using scale to normalize these didn't work.
 769                 entry->input_fraction1 = (floor(in_start + 1) - in_start) /* / scale */;
 770                 entry->input_fraction2 = 1.0 /* / scale */;
 771                 entry->input_fraction3 = (in_end - floor(in_end)) /* / scale */;
 772
 773                 if(in_end >= in_total - in_pixel1)
 774                 {
 775                         in_end = in_total - in_pixel1 - 1;
 776
 777                         int difference = (int)in_end - (int)in_start - 1;
 778                         if(difference < 0) difference = 0;
 779                         entry->input_fraction3 = 1.0 -
 780                                 entry->input_fraction1 -
 781                                 entry->input_fraction2 * difference;
 782                 }
 783
 784 // Store input pixels
 785                 entry->input_pixel1 = (int)in_start;
 786                 entry->input_pixel2 = (int)in_end;
 787
 788 // Normalize for middle pixels
 789                 if(entry->input_pixel2 > entry->input_pixel1 + 1)
 790                 {
 791                         float total = entry->input_fraction1 +
 792                                 entry->input_fraction2 *
 793                                 (entry->input_pixel2 - entry->input_pixel1 - 1) +
 794                                 entry->input_fraction3;
 795                         entry->input_fraction1 /= total;
 796                         entry->input_fraction2 /= total;
 797                         entry->input_fraction3 /= total;
 798                 }
 799                 else
 800                 {
 801                         float total = entry->input_fraction1 +
 802                                 entry->input_fraction3;
 803                         entry->input_fraction1 /= total;
 804                         entry->input_fraction3 /= total;
 805                 }
 806
 807 // printf("ScaleUnit::tabulate_reduction 1 %d %d %d %f %f %f %f\n",
 808 // i,
 809 // entry->input_pixel1,
 810 // entry->input_pixel2,
 811 // entry->input_fraction1,
 812 // entry->input_fraction2,
 813 // entry->input_fraction3,
 814 // entry->input_fraction1 +
 815 //      entry->input_fraction2 *
 816 //      (entry->input_pixel2 - entry->input_pixel1 - 1) +
 817 //      entry->input_fraction3);
 818
 819
 820 // Sanity check
 821                 if(entry->input_pixel1 > entry->input_pixel2)
 822                 {
 823                         entry->input_pixel1 = entry->input_pixel2;
 824                         entry->input_fraction1 = 0;
 825                 }
 826
 827 // Get total fraction of output pixel used
 828 //              if(entry->input_pixel2 > entry->input_pixel1)
 829                 entry->total_fraction =
 830                         entry->input_fraction1 +
 831                         entry->input_fraction2 * (entry->input_pixel2 - entry->input_pixel1 - 1) +
 832                         entry->input_fraction3;
 833                 entry->input_pixel1 += in_pixel1;
 834                 entry->input_pixel2 += in_pixel1;
 835         }
 836 }
 837
 838 void ScaleUnit::tabulate_enlarge(bilinear_table_t* &table,
 839         float scale,
 840         int in_pixel1,
 841         int out_total,
 842         int in_total)
 843 {
 844         table = new bilinear_table_t[out_total];
 845         bzero(table, sizeof(bilinear_table_t) * out_total);
 846
 847         for(int i = 0; i < out_total; i++)
 848         {
 849                 bilinear_table_t *entry = table + i;
 850                 float in_pixel = i * scale;
 851                 entry->input_pixel1 = (int)floor(in_pixel);
 852                 entry->input_pixel2 = entry->input_pixel1 + 1;
 853
 854                 if(in_pixel <= in_total)
 855                 {
 856                         entry->input_fraction3 = in_pixel - entry->input_pixel1;
 857                 }
 858                 else
 859                 {
 860                         entry->input_fraction3 = 0;
 861                         entry->input_pixel2 = 0;
 862                 }
 863
 864                 if(in_pixel >= 0)
 865                 {
 866                         entry->input_fraction1 = entry->input_pixel2 - in_pixel;
 867                 }
 868                 else
 869                 {
 870                         entry->input_fraction1 = 0;
 871                         entry->input_pixel1 = 0;
 872                 }
 873
 874                 if(entry->input_pixel2 >= in_total - in_pixel1)
 875                 {
 876                         entry->input_pixel2 = entry->input_pixel1;
 877                         entry->input_fraction3 = 1.0 - entry->input_fraction1;
 878                 }
 879
 880                 entry->total_fraction =
 881                         entry->input_fraction1 +
 882                         entry->input_fraction3;
 883                 entry->input_pixel1 += in_pixel1;
 884                 entry->input_pixel2 += in_pixel1;
 885 //
 886 // printf("ScaleUnit::tabulate_enlarge %d %d %f %f %f\n",
 887 // entry->input_pixel1,
 888 // entry->input_pixel2,
 889 // entry->input_fraction1,
 890 // entry->input_fraction2,
 891 // entry->input_fraction3);
 892         }
 893 }
 894
 895 void ScaleUnit::dump_bilinear(bilinear_table_t *table, int total)
 896 {
 897         printf("ScaleUnit::dump_bilinear\n");
 898         for(int i = 0; i < total; i++)
 899         {
 900                 printf("out=%d inpixel1=%d inpixel2=%d infrac1=%f infrac2=%f infrac3=%f total=%f\n",
 901                         i,
 902                         table[i].input_pixel1,
 903                         table[i].input_pixel2,
 904                         table[i].input_fraction1,
 905                         table[i].input_fraction2,
 906                         table[i].input_fraction3,
 907                         table[i].total_fraction);
 908         }
 909 }
 910
 911 #define PIXEL_REDUCE_MACRO(type, components, row) \
 912 { \
 913         type *input_row = &in_rows[row][x_entry->input_pixel1 * components]; \
 914         type *input_end = &in_rows[row][x_entry->input_pixel2 * components]; \
 915  \
 916 /* Do first pixel */ \
 917         temp_f1 += input_scale1 * input_row[0]; \
 918         temp_f2 += input_scale1 * input_row[1]; \
 919         temp_f3 += input_scale1 * input_row[2]; \
 920         if(components == 4) temp_f4 += input_scale1 * input_row[3]; \
 921  \
 922 /* Do last pixel */ \
 923 /*      if(input_row < input_end) */\
 924         { \
 925                 temp_f1 += input_scale3 * input_end[0]; \
 926                 temp_f2 += input_scale3 * input_end[1]; \
 927                 temp_f3 += input_scale3 * input_end[2]; \
 928                 if(components == 4) temp_f4 += input_scale3 * input_end[3]; \
 929         } \
 930  \
 931 /* Do middle pixels */ \
 932         for(input_row += components; input_row < input_end; input_row += components) \
 933         { \
 934                 temp_f1 += input_scale2 * input_row[0]; \
 935                 temp_f2 += input_scale2 * input_row[1]; \
 936                 temp_f3 += input_scale2 * input_row[2]; \
 937                 if(components == 4) temp_f4 += input_scale2 * input_row[3]; \
 938         } \
 939 }
 940
 941 // Bilinear reduction and suboptimal enlargement.
 942 // Very high quality.
 943 #define BILINEAR_REDUCE(max, type, components) \
 944 { \
 945         bilinear_table_t *x_table, *y_table; \
 946         int out_h = pkg->out_row2 - pkg->out_row1; \
 947         type **in_rows = (type**)input->get_rows(); \
 948         type **out_rows = (type**)output->get_rows(); \
 949  \
 950         if(scale_w < 1) \
 951                 tabulate_reduction(x_table, \
 952                         1.0 / scale_w, \
 953                         in_x1_int, \
 954                         out_w_int, \
 955                         input->get_w()); \
 956         else \
 957                 tabulate_enlarge(x_table, \
 958                         1.0 / scale_w, \
 959                         in_x1_int, \
 960                         out_w_int, \
 961                         input->get_w()); \
 962  \
 963         if(scale_h < 1) \
 964                 tabulate_reduction(y_table, \
 965                         1.0 / scale_h, \
 966                         in_y1_int, \
 967                         out_h_int, \
 968                         input->get_h()); \
 969         else \
 970                 tabulate_enlarge(y_table, \
 971                         1.0 / scale_h, \
 972                         in_y1_int, \
 973                         out_h_int, \
 974                         input->get_h()); \
 975 /* dump_bilinear(y_table, out_h_int); */\
 976  \
 977         for(int i = 0; i < out_h; i++) \
 978         { \
 979                 type *out_row = out_rows[i + pkg->out_row1]; \
 980                 bilinear_table_t *y_entry = &y_table[i + pkg->out_row1]; \
 981 /* printf("BILINEAR_REDUCE 2 %d %d %d %f %f %f\n", */ \
 982 /* i, */ \
 983 /* y_entry->input_pixel1, */ \
 984 /* y_entry->input_pixel2, */ \
 985 /* y_entry->input_fraction1, */ \
 986 /* y_entry->input_fraction2, */ \
 987 /* y_entry->input_fraction3); */ \
 988  \
 989                 for(int j = 0; j < out_w_int; j++) \
 990                 { \
 991                         bilinear_table_t *x_entry = &x_table[j]; \
 992 /* Load rounding factors */ \
 993                         float temp_f1; \
 994                         float temp_f2; \
 995                         float temp_f3; \
 996                         float temp_f4; \
 997                         if(sizeof(type) != 4) \
 998                                 temp_f1 = temp_f2 = temp_f3 = temp_f4 = .5; \
 999                         else \
1000                                 temp_f1 = temp_f2 = temp_f3 = temp_f4 = 0; \
1001  \
1002 /* First row */ \
1003                         float input_scale1 = y_entry->input_fraction1 * x_entry->input_fraction1; \
1004                         float input_scale2 = y_entry->input_fraction1 * x_entry->input_fraction2; \
1005                         float input_scale3 = y_entry->input_fraction1 * x_entry->input_fraction3; \
1006                         PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel1) \
1007  \
1008 /* Last row */ \
1009                         if(out_h) \
1010                         { \
1011                                 input_scale1 = y_entry->input_fraction3 * x_entry->input_fraction1; \
1012                                 input_scale2 = y_entry->input_fraction3 * x_entry->input_fraction2; \
1013                                 input_scale3 = y_entry->input_fraction3 * x_entry->input_fraction3; \
1014                                 PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel2) \
1015  \
1016 /* Middle rows */ \
1017                                 if(out_h > 1) \
1018                                 { \
1019                                         input_scale1 = y_entry->input_fraction2 * x_entry->input_fraction1; \
1020                                         input_scale2 = y_entry->input_fraction2 * x_entry->input_fraction2; \
1021                                         input_scale3 = y_entry->input_fraction2 * x_entry->input_fraction3; \
1022                                         for(int k = y_entry->input_pixel1 + 1; \
1023                                                 k < y_entry->input_pixel2; \
1024                                                 k++) \
1025                                         { \
1026                                                 PIXEL_REDUCE_MACRO(type, components, k) \
1027                                         } \
1028                                 } \
1029                         } \
1030  \
1031  \
1032                         if(max != 1.0) \
1033                         { \
1034                                 if(temp_f1 > max) temp_f1 = max; \
1035                                 if(temp_f2 > max) temp_f2 = max; \
1036                                 if(temp_f3 > max) temp_f3 = max; \
1037                                 if(components == 4) if(temp_f4 > max) temp_f4 = max; \
1038                         } \
1039  \
1040                         out_row[j * components    ] = (type)temp_f1; \
1041                         out_row[j * components + 1] = (type)temp_f2; \
1042                         out_row[j * components + 2] = (type)temp_f3; \
1043                         if(components == 4) out_row[j * components + 3] = (type)temp_f4; \
1044                 } \
1045 /*printf("BILINEAR_REDUCE 3 %d\n", i);*/ \
1046         } \
1047  \
1048         delete [] x_table; \
1049         delete [] y_table; \
1050 }
1051
1052
1053
1054 // Only 2 input pixels
1055 #define BILINEAR_ENLARGE(max, type, components) \
1056 { \
1057 /*printf("BILINEAR_ENLARGE 1\n");*/ \
1058         float k_y = 1.0 / scale_h; \
1059         float k_x = 1.0 / scale_w; \
1060         type **in_rows = (type**)input->get_rows(); \
1061         type **out_rows = (type**)output->get_rows(); \
1062         int out_h = pkg->out_row2 - pkg->out_row1; \
1063         int in_h_int = input->get_h(); \
1064         int in_w_int = input->get_w(); \
1065         int *table_int_x1, *table_int_y1; \
1066         int *table_int_x2, *table_int_y2; \
1067         float *table_frac_x_f, *table_antifrac_x_f, *table_frac_y_f, *table_antifrac_y_f; \
1068         int *table_frac_x_i, *table_antifrac_x_i, *table_frac_y_i, *table_antifrac_y_i; \
1069  \
1070         tabulate_blinear_f(table_int_x1,  \
1071                 table_int_x2,  \
1072                 table_frac_x_f,  \
1073                 table_antifrac_x_f,  \
1074                 k_x,  \
1075                 0,  \
1076                 out_w_int, \
1077                 in_x1_int,  \
1078                 in_w_int); \
1079         tabulate_blinear_f(table_int_y1,  \
1080                 table_int_y2,  \
1081                 table_frac_y_f,  \
1082                 table_antifrac_y_f,  \
1083                 k_y,  \
1084                 pkg->out_row1,  \
1085                 pkg->out_row2,  \
1086                 in_y1_int, \
1087                 in_h_int); \
1088  \
1089         for(int i = 0; i < out_h; i++) \
1090         { \
1091                 int i_y1 = table_int_y1[i]; \
1092                 int i_y2 = table_int_y2[i]; \
1093                 float a_f; \
1094         float anti_a_f; \
1095                 uint64_t a_i; \
1096         uint64_t anti_a_i; \
1097                 a_f = table_frac_y_f[i]; \
1098         anti_a_f = table_antifrac_y_f[i]; \
1099                 type *in_row1 = in_rows[i_y1]; \
1100                 type *in_row2 = in_rows[i_y2]; \
1101                 type *out_row = out_rows[i + pkg->out_row1]; \
1102  \
1103                 for(int j = 0; j < out_w_int; j++) \
1104                 { \
1105                         int i_x1 = table_int_x1[j]; \
1106                         int i_x2 = table_int_x2[j]; \
1107                         float output1r, output1g, output1b, output1a; \
1108                         float output2r, output2g, output2b, output2a; \
1109                         float output3r, output3g, output3b, output3a; \
1110                         float output4r, output4g, output4b, output4a; \
1111                         float b_f; \
1112                         float anti_b_f; \
1113                         b_f = table_frac_x_f[j]; \
1114                         anti_b_f = table_antifrac_x_f[j]; \
1115 \
1116                 output1r = in_row1[i_x1 * components]; \
1117                 output1g = in_row1[i_x1 * components + 1]; \
1118                 output1b = in_row1[i_x1 * components + 2]; \
1119                 if(components == 4) output1a = in_row1[i_x1 * components + 3]; \
1120 \
1121                 output2r = in_row1[i_x2 * components]; \
1122                 output2g = in_row1[i_x2 * components + 1]; \
1123                 output2b = in_row1[i_x2 * components + 2]; \
1124                 if(components == 4) output2a = in_row1[i_x2 * components + 3]; \
1125 \
1126                 output3r = in_row2[i_x1 * components]; \
1127                 output3g = in_row2[i_x1 * components + 1]; \
1128                 output3b = in_row2[i_x1 * components + 2]; \
1129                 if(components == 4) output3a = in_row2[i_x1 * components + 3]; \
1130 \
1131                 output4r = in_row2[i_x2 * components]; \
1132                 output4g = in_row2[i_x2 * components + 1]; \
1133                 output4b = in_row2[i_x2 * components + 2]; \
1134                 if(components == 4) output4a = in_row2[i_x2 * components + 3]; \
1135 \
1136                         out_row[j * components] =  \
1137                                 (type)(anti_a_f * (anti_b_f * output1r +  \
1138                                 b_f * output2r) +  \
1139                 a_f * (anti_b_f * output3r +  \
1140                                 b_f * output4r)); \
1141                         out_row[j * components + 1] =   \
1142                                 (type)(anti_a_f * (anti_b_f * output1g +  \
1143                                 b_f * output2g) +  \
1144                 a_f * ((anti_b_f * output3g) +  \
1145                                 b_f * output4g)); \
1146                         out_row[j * components + 2] =   \
1147                                 (type)(anti_a_f * ((anti_b_f * output1b) +  \
1148                                 (b_f * output2b)) +  \
1149                 a_f * ((anti_b_f * output3b) +  \
1150                                 b_f * output4b)); \
1151                         if(components == 4) \
1152                                 out_row[j * components + 3] =   \
1153                                         (type)(anti_a_f * ((anti_b_f * output1a) +  \
1154                                         (b_f * output2a)) +  \
1155                         a_f * ((anti_b_f * output3a) +  \
1156                                         b_f * output4a)); \
1157                 } \
1158         } \
1159  \
1160  \
1161         delete [] table_int_x1; \
1162         delete [] table_int_x2; \
1163         delete [] table_int_y1; \
1164         delete [] table_int_y2; \
1165         delete [] table_frac_x_f; \
1166         delete [] table_antifrac_x_f; \
1167         delete [] table_frac_y_f; \
1168         delete [] table_antifrac_y_f; \
1169  \
1170 /*printf("BILINEAR_ENLARGE 2\n");*/ \
1171 }
1172
1173
1174 #define BICUBIC(max, type, components) \
1175 { \
1176         float k_y = 1.0 / scale_h; \
1177         float k_x = 1.0 / scale_w; \
1178         type **in_rows = (type**)input->get_rows(); \
1179         type **out_rows = (type**)output->get_rows(); \
1180         float *bspline_x_f, *bspline_y_f; \
1181         int *bspline_x_i, *bspline_y_i; \
1182         int *in_x_table, *in_y_table; \
1183         int in_h_int = input->get_h(); \
1184         int in_w_int = input->get_w(); \
1185  \
1186         tabulate_bcubic_f(bspline_x_f,  \
1187                 in_x_table, \
1188                 k_x, \
1189                 in_x1_int, \
1190                 out_w_int, \
1191                 in_w_int, \
1192                 -1); \
1193  \
1194         tabulate_bcubic_f(bspline_y_f,  \
1195                 in_y_table, \
1196                 k_y, \
1197                 in_y1_int, \
1198                 out_h_int, \
1199                 in_h_int, \
1200                 1); \
1201  \
1202         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1203         { \
1204                 for(int j = 0; j < out_w_int; j++) \
1205                 { \
1206                         int i_x = (int)(k_x * j); \
1207                         float output1_f, output2_f, output3_f, output4_f; \
1208                         uint64_t output1_i, output2_i, output3_i, output4_i; \
1209                         output1_f = 0; \
1210                         output2_f = 0; \
1211                         output3_f = 0; \
1212                         if(components == 4) \
1213                                 output4_f = 0; \
1214                         int table_y = i * 4; \
1215  \
1216 /* Kernel */ \
1217                         for(int m = -1; m < 3; m++) \
1218                         { \
1219                                 float r1_f; \
1220                                 uint64_t r1_i; \
1221                                 r1_f = bspline_y_f[table_y]; \
1222                                 int y = in_y_table[table_y]; \
1223                                 int table_x = j * 4; \
1224  \
1225                                 for(int n = -1; n < 3; n++) \
1226                                 { \
1227                                         float r2_f; \
1228                                         uint64_t r2_i; \
1229                                         r2_f = bspline_x_f[table_x]; \
1230                                         int x = in_x_table[table_x]; \
1231                                         float r_square_f; \
1232                                         uint64_t r_square_i; \
1233                                         r_square_f = r1_f * r2_f; \
1234                                         output1_f += r_square_f * in_rows[y][x * components]; \
1235                                         output2_f += r_square_f * in_rows[y][x * components + 1]; \
1236                                         output3_f += r_square_f * in_rows[y][x * components + 2]; \
1237                                         if(components == 4) \
1238                                                 output4_f += r_square_f * in_rows[y][x * components + 3]; \
1239  \
1240                                         table_x++; \
1241                                 } \
1242                                 table_y++; \
1243                         } \
1244  \
1245  \
1246                         out_rows[i][j * components] = (type)output1_f; \
1247                         out_rows[i][j * components + 1] = (type)output2_f; \
1248                         out_rows[i][j * components + 2] = (type)output3_f; \
1249                         if(components == 4) \
1250                                 out_rows[i][j * components + 3] = (type)output4_f; \
1251  \
1252                 } \
1253         } \
1254  \
1255         delete [] bspline_x_f; \
1256         delete [] bspline_y_f; \
1257         delete [] in_x_table; \
1258         delete [] in_y_table; \
1259 }
1260
1261
1262
1263
1264 // Pow function is not thread safe in Compaqt C
1265 #define CUBE(x) ((x) * (x) * (x))
1266
1267 float ScaleUnit::cubic_bspline(float x)
1268 {
1269         float a, b, c, d;
1270
1271         if((x + 2.0F) <= 0.0F)
1272         {
1273         a = 0.0F;
1274         }
1275         else
1276         {
1277         a = CUBE(x + 2.0F);
1278         }
1279
1280
1281         if((x + 1.0F) <= 0.0F)
1282         {
1283         b = 0.0F;
1284         }
1285         else
1286         {
1287         b = CUBE(x + 1.0F);
1288         }
1289
1290         if(x <= 0)
1291         {
1292         c = 0.0F;
1293         }
1294         else
1295         {
1296         c = CUBE(x);
1297         }
1298
1299         if((x - 1.0F) <= 0.0F)
1300         {
1301         d = 0.0F;
1302         }
1303         else
1304         {
1305         d = CUBE(x - 1.0F);
1306         }
1307
1308
1309         return (a - (4.0F * b) + (6.0F * c) - (4.0F * d)) / 6.0;
1310 }
1311
1312
1313 void ScaleUnit::tabulate_bcubic_f(float* &coef_table,
1314         int* &coord_table,
1315         float scale,
1316         int start,
1317         int pixels,
1318         int total_pixels,
1319         float coefficient)
1320 {
1321         coef_table = new float[pixels * 4];
1322         coord_table = new int[pixels * 4];
1323         for(int i = 0, j = 0; i < pixels; i++)
1324         {
1325                 float f_x = (float)i * scale;
1326                 float a = f_x - floor(f_x);
1327
1328                 for(float m = -1; m < 3; m++)
1329                 {
1330                         coef_table[j] = cubic_bspline(coefficient * (m - a));
1331                         coord_table[j] = (int)(start + (int)f_x + m);
1332                         CLAMP(coord_table[j], 0, total_pixels - 1);
1333                         j++;
1334                 }
1335
1336         }
1337 }
1338
1339 void ScaleUnit::tabulate_bcubic_i(int* &coef_table,
1340         int* &coord_table,
1341         float scale,
1342         int start,
1343         int pixels,
1344         int total_pixels,
1345         float coefficient)
1346 {
1347         coef_table = new int[pixels * 4];
1348         coord_table = new int[pixels * 4];
1349         for(int i = 0, j = 0; i < pixels; i++)
1350         {
1351                 float f_x = (float)i * scale;
1352                 float a = f_x - floor(f_x);
1353
1354                 for(float m = -1; m < 3; m++)
1355                 {
1356                         coef_table[j] = (int)(cubic_bspline(coefficient * (m - a)) * 0x10000);
1357                         coord_table[j] = (int)(start + (int)f_x + m);
1358                         CLAMP(coord_table[j], 0, total_pixels - 1);
1359                         j++;
1360                 }
1361
1362         }
1363 }
1364
1365 void ScaleUnit::tabulate_blinear_f(int* &table_int1,
1366                 int* &table_int2,
1367                 float* &table_frac,
1368                 float* &table_antifrac,
1369                 float scale,
1370                 int pixel1,
1371                 int pixel2,
1372                 int start,
1373                 int total_pixels)
1374 {
1375         table_int1 = new int[pixel2 - pixel1];
1376         table_int2 = new int[pixel2 - pixel1];
1377         table_frac = new float[pixel2 - pixel1];
1378         table_antifrac = new float[pixel2 - pixel1];
1379
1380         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1381         {
1382                 float f_x = (float)i * scale;
1383                 int i_x = (int)floor(f_x);
1384                 float a = (f_x - floor(f_x));
1385
1386                 table_int1[j] = i_x + start;
1387                 table_int2[j] = i_x + start + 1;
1388                 CLAMP(table_int1[j], 0, total_pixels - 1);
1389                 CLAMP(table_int2[j], 0, total_pixels - 1);
1390                 table_frac[j] = a;
1391                 table_antifrac[j] = 1.0F - a;
1392 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1393         }
1394 }
1395
1396 void ScaleUnit::tabulate_blinear_i(int* &table_int1,
1397                 int* &table_int2,
1398                 int* &table_frac,
1399                 int* &table_antifrac,
1400                 float scale,
1401                 int pixel1,
1402                 int pixel2,
1403                 int start,
1404                 int total_pixels)
1405 {
1406         table_int1 = new int[pixel2 - pixel1];
1407         table_int2 = new int[pixel2 - pixel1];
1408         table_frac = new int[pixel2 - pixel1];
1409         table_antifrac = new int[pixel2 - pixel1];
1410
1411         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1412         {
1413                 double f_x = (float)i * scale;
1414                 int i_x = (int)floor(f_x);
1415                 float a = (f_x - floor(f_x));
1416
1417                 table_int1[j] = i_x + start;
1418                 table_int2[j] = i_x + start + 1;
1419                 CLAMP(table_int1[j], 0, total_pixels - 1);
1420                 CLAMP(table_int2[j], 0, total_pixels - 1);
1421                 table_frac[j] = (int)(a * 0xffff);
1422                 table_antifrac[j] = (int)((1.0F - a) * 0x10000);
1423 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1424         }
1425 }
1426
1427 void ScaleUnit::process_package(LoadPackage *package)
1428 {
1429         ScalePackage *pkg = (ScalePackage*)package;
1430
1431 //printf("ScaleUnit::process_package 1\n");
1432 // Arguments for macros
1433         VFrame *output = engine->scale_output;
1434         VFrame *input = engine->scale_input;
1435         float scale_w = engine->w_scale;
1436         float scale_h = engine->h_scale;
1437         int in_x1_int = engine->in_x1_int;
1438         int in_y1_int = engine->in_y1_int;
1439         int out_h_int = engine->out_h_int;
1440         int out_w_int = engine->out_w_int;
1441         int do_yuv =
1442                 (input->get_color_model() == BC_YUV888 ||
1443                 input->get_color_model() == BC_YUVA8888 ||
1444                 input->get_color_model() == BC_YUV161616 ||
1445                 input->get_color_model() == BC_YUVA16161616);
1446
1447 //printf("ScaleUnit::process_package 2 %f %f\n", engine->w_scale, engine->h_scale);
1448         if(engine->interpolation_type == CUBIC_CUBIC ||
1449                 (engine->interpolation_type == CUBIC_LINEAR
1450                         && engine->w_scale > 1 &&
1451                         engine->h_scale > 1))
1452         {
1453                 switch(engine->scale_input->get_color_model())
1454                 {
1455                         case BC_RGB_FLOAT:
1456                                 BICUBIC(1.0, float, 3);
1457                                 break;
1458
1459                         case BC_RGBA_FLOAT:
1460                                 BICUBIC(1.0, float, 4);
1461                                 break;
1462
1463                         case BC_RGB888:
1464                         case BC_YUV888:
1465                                 BICUBIC(0xff, unsigned char, 3);
1466                                 break;
1467
1468                         case BC_RGBA8888:
1469                         case BC_YUVA8888:
1470                                 BICUBIC(0xff, unsigned char, 4);
1471                                 break;
1472
1473                         case BC_RGB161616:
1474                         case BC_YUV161616:
1475                                 BICUBIC(0xffff, uint16_t, 3);
1476                                 break;
1477
1478                         case BC_RGBA16161616:
1479                         case BC_YUVA16161616:
1480                                 BICUBIC(0xffff, uint16_t, 4);
1481                                 break;
1482                 }
1483         }
1484         else
1485 // Perform bilinear scaling input -> scale_output
1486         if(engine->w_scale > 1 &&
1487                 engine->h_scale > 1)
1488         {
1489                 switch(engine->scale_input->get_color_model())
1490                 {
1491                         case BC_RGB_FLOAT:
1492                                 BILINEAR_ENLARGE(1.0, float, 3);
1493                                 break;
1494
1495                         case BC_RGBA_FLOAT:
1496                                 BILINEAR_ENLARGE(1.0, float, 4);
1497                                 break;
1498
1499                         case BC_RGB888:
1500                         case BC_YUV888:
1501                                 BILINEAR_ENLARGE(0xff, unsigned char, 3);
1502                                 break;
1503
1504                         case BC_RGBA8888:
1505                         case BC_YUVA8888:
1506                                 BILINEAR_ENLARGE(0xff, unsigned char, 4);
1507                                 break;
1508
1509                         case BC_RGB161616:
1510                         case BC_YUV161616:
1511                                 BILINEAR_ENLARGE(0xffff, uint16_t, 3);
1512                                 break;
1513
1514                         case BC_RGBA16161616:
1515                         case BC_YUVA16161616:
1516                                 BILINEAR_ENLARGE(0xffff, uint16_t, 4);
1517                                 break;
1518                 }
1519         }
1520         else
1521 // Bilinear reduction
1522         {
1523                 switch(engine->scale_input->get_color_model())
1524                 {
1525                         case BC_RGB_FLOAT:
1526                                 BILINEAR_REDUCE(1.0, float, 3);
1527                                 break;
1528                         case BC_RGBA_FLOAT:
1529                                 BILINEAR_REDUCE(1.0, float, 4);
1530                                 break;
1531                         case BC_RGB888:
1532                         case BC_YUV888:
1533                                 BILINEAR_REDUCE(0xff, unsigned char, 3);
1534                                 break;
1535
1536                         case BC_RGBA8888:
1537                         case BC_YUVA8888:
1538                                 BILINEAR_REDUCE(0xff, unsigned char, 4);
1539                                 break;
1540
1541                         case BC_RGB161616:
1542                         case BC_YUV161616:
1543                                 BILINEAR_REDUCE(0xffff, uint16_t, 3);
1544                                 break;
1545
1546                         case BC_RGBA16161616:
1547                         case BC_YUVA16161616:
1548                                 BILINEAR_REDUCE(0xffff, uint16_t, 4);
1549                                 break;
1550                 }
1551         }
1552 //printf("ScaleUnit::process_package 3\n");
1553
1554 }
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568 ScaleEngine::ScaleEngine(OverlayFrame *overlay, int cpus)
1569  : LoadServer(cpus, cpus)
1570 {
1571         this->overlay = overlay;
1572 }
1573
1574 ScaleEngine::~ScaleEngine()
1575 {
1576 }
1577
1578 void ScaleEngine::init_packages()
1579 {
1580         for(int i = 0; i < total_packages; i++)
1581         {
1582                 ScalePackage *package = (ScalePackage*)packages[i];
1583                 package->out_row1 = out_h_int / total_packages * i;
1584                 package->out_row2 = package->out_row1 + out_h_int / total_packages;
1585
1586                 if(i >= total_packages - 1)
1587                         package->out_row2 = out_h_int;
1588         }
1589 }
1590
1591 LoadClient* ScaleEngine::new_client()
1592 {
1593         return new ScaleUnit(this, overlay);
1594 }
1595
1596 LoadPackage* ScaleEngine::new_package()
1597 {
1598         return new ScalePackage;
1599 }
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613 TranslatePackage::TranslatePackage()
1614 {
1615 }
1616
1617
1618
1619 TranslateUnit::TranslateUnit(TranslateEngine *server, OverlayFrame *overlay)
1620  : LoadClient(server)
1621 {
1622         this->overlay = overlay;
1623         this->engine = server;
1624 }
1625
1626 TranslateUnit::~TranslateUnit()
1627 {
1628 }
1629
1630
1631
1632 void TranslateUnit::translation_array_f(transfer_table_f* &table,
1633         float out_x1,
1634         float out_x2,
1635         float in_x1,
1636         float in_x2,
1637         int in_total,
1638         int out_total,
1639         int &out_x1_int,
1640         int &out_x2_int)
1641 {
1642         int out_w_int;
1643         float offset = out_x1 - in_x1;
1644 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1645
1646         out_x1_int = (int)out_x1;
1647         out_x2_int = MIN((int)ceil(out_x2), out_total);
1648         out_w_int = out_x2_int - out_x1_int;
1649
1650         table = new transfer_table_f[out_w_int];
1651         bzero(table, sizeof(transfer_table_f) * out_w_int);
1652
1653
1654 // printf("OverlayFrame::translation_array_f 2 %f %f -> %f %f scale=%f %f\n",
1655 // in_x1,
1656 // in_x2,
1657 // out_x1,
1658 // out_x2,
1659 // in_x2 - in_x1,
1660 // out_x2 - out_x1);
1661 //
1662
1663         float in_x = in_x1;
1664         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1665         {
1666                 transfer_table_f *entry = &table[out_x - out_x1_int];
1667
1668                 entry->in_x1 = (int)in_x;
1669                 entry->in_x2 = (int)in_x + 1;
1670
1671 // Get fraction of output pixel to fill
1672                 entry->output_fraction = 1;
1673
1674                 if(out_x1 > out_x)
1675                 {
1676                         entry->output_fraction -= out_x1 - out_x;
1677                 }
1678
1679                 if(out_x2 < out_x + 1)
1680                 {
1681                         entry->output_fraction = (out_x2 - out_x);
1682                 }
1683
1684 // Advance in_x until out_x_fraction is filled
1685                 float out_x_fraction = entry->output_fraction;
1686                 float in_x_fraction = floor(in_x + 1) - in_x;
1687
1688                 if(out_x_fraction <= in_x_fraction)
1689                 {
1690                         entry->in_fraction1 = out_x_fraction;
1691                         entry->in_fraction2 = 0.0;
1692                         in_x += out_x_fraction;
1693                 }
1694                 else
1695                 {
1696                         entry->in_fraction1 = in_x_fraction;
1697                         in_x += out_x_fraction;
1698                         entry->in_fraction2 = in_x - floor(in_x);
1699                 }
1700
1701 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1702                 if(entry->in_x2 >= in_total)
1703                 {
1704                         entry->in_x2 = in_total - 1;
1705                         entry->in_fraction2 = 0.0;
1706                 }
1707
1708                 if(entry->in_x1 >= in_total)
1709                 {
1710                         entry->in_x1 = in_total - 1;
1711                         entry->in_fraction1 = 0.0;
1712                 }
1713 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n",
1714 //      out_x,
1715 //      entry->in_x1,
1716 //      entry->in_x2,
1717 //      entry->in_fraction1,
1718 //      entry->in_fraction2,
1719 //      entry->output_fraction);
1720         }
1721 }
1722
1723
1724 void TranslateUnit::translation_array_i(transfer_table_i* &table,
1725         float out_x1,
1726         float out_x2,
1727         float in_x1,
1728         float in_x2,
1729         int in_total,
1730         int out_total,
1731         int &out_x1_int,
1732         int &out_x2_int)
1733 {
1734         int out_w_int;
1735         float offset = out_x1 - in_x1;
1736
1737         out_x1_int = (int)out_x1;
1738         out_x2_int = MIN((int)ceil(out_x2), out_total);
1739         out_w_int = out_x2_int - out_x1_int;
1740
1741         table = new transfer_table_i[out_w_int];
1742         bzero(table, sizeof(transfer_table_i) * out_w_int);
1743
1744
1745 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1746
1747         float in_x = in_x1;
1748         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1749         {
1750                 transfer_table_i *entry = &table[out_x - out_x1_int];
1751
1752                 entry->in_x1 = (int)in_x;
1753                 entry->in_x2 = (int)in_x + 1;
1754
1755 // Get fraction of output pixel to fill
1756                 entry->output_fraction = 0x10000;
1757
1758                 if(out_x1 > out_x)
1759                 {
1760                         entry->output_fraction -= (int)((out_x1 - out_x) * 0x10000);
1761                 }
1762
1763                 if(out_x2 < out_x + 1)
1764                 {
1765                         entry->output_fraction = (int)((out_x2 - out_x) * 0x10000);
1766                 }
1767
1768 // Advance in_x until out_x_fraction is filled
1769                 int out_x_fraction = entry->output_fraction;
1770                 int in_x_fraction = (int)((floor(in_x + 1) - in_x) * 0x10000);
1771
1772                 if(out_x_fraction <= in_x_fraction)
1773                 {
1774                         entry->in_fraction1 = out_x_fraction;
1775                         entry->in_fraction2 = 0;
1776                         in_x += (float)out_x_fraction / 0x10000;
1777                 }
1778                 else
1779                 {
1780                         entry->in_fraction1 = in_x_fraction;
1781                         in_x += (float)out_x_fraction / 0x10000;
1782                         entry->in_fraction2 = (int)((in_x - floor(in_x)) * 0x10000);
1783                 }
1784
1785 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1786                 if(entry->in_x2 >= in_total)
1787                 {
1788                         entry->in_x2 = in_total - 1;
1789                         entry->in_fraction2 = 0;
1790                 }
1791
1792                 if(entry->in_x1 >= in_total)
1793                 {
1794                         entry->in_x1 = in_total - 1;
1795                         entry->in_fraction1 = 0;
1796                 }
1797 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n",
1798 //      out_x,
1799 //      entry->in_x1,
1800 //      entry->in_x2,
1801 //      entry->in_fraction1,
1802 //      entry->in_fraction2,
1803 //      entry->output_fraction);
1804         }
1805 }
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840 #define TRANSLATE(max, temp_type, type, components, chroma_offset) \
1841 { \
1842  \
1843         type **in_rows = (type**)input->get_rows(); \
1844         type **out_rows = (type**)output->get_rows(); \
1845  \
1846  \
1847         temp_type master_opacity; \
1848         if(sizeof(type) != 4) \
1849                 master_opacity = (temp_type)(alpha * max + 0.5); \
1850         else \
1851                 master_opacity = (temp_type)(alpha * max); \
1852         temp_type master_transparency = max - master_opacity; \
1853         float round = 0.0; \
1854         if(sizeof(type) != 4) \
1855                 round = 0.5; \
1856  \
1857  \
1858         for(int i = row1; i < row2; i++) \
1859         { \
1860                 int in_y1; \
1861                 int in_y2; \
1862                 float y_fraction1_f; \
1863                 float y_fraction2_f; \
1864                 float y_output_fraction_f; \
1865                 in_y1 = y_table_f[i - out_y1_int].in_x1; \
1866                 in_y2 = y_table_f[i - out_y1_int].in_x2; \
1867                 y_fraction1_f = y_table_f[i - out_y1_int].in_fraction1; \
1868                 y_fraction2_f = y_table_f[i - out_y1_int].in_fraction2; \
1869                 y_output_fraction_f = y_table_f[i - out_y1_int].output_fraction; \
1870                 type *in_row1 = in_rows[(in_y1)]; \
1871                 type *in_row2 = in_rows[(in_y2)]; \
1872                 type *out_row = out_rows[i]; \
1873  \
1874                 for(int j = out_x1_int; j < out_x2_int; j++) \
1875                 { \
1876                         int in_x1; \
1877                         int in_x2; \
1878                         float x_fraction1_f; \
1879                         float x_fraction2_f; \
1880                         float x_output_fraction_f; \
1881                         in_x1 = x_table_f[j - out_x1_int].in_x1; \
1882                         in_x2 = x_table_f[j - out_x1_int].in_x2; \
1883                         x_fraction1_f = x_table_f[j - out_x1_int].in_fraction1; \
1884                         x_fraction2_f = x_table_f[j - out_x1_int].in_fraction2; \
1885                         x_output_fraction_f = x_table_f[j - out_x1_int].output_fraction; \
1886                         type *output = &out_row[j * components]; \
1887                         temp_type input1, input2, input3, input4; \
1888  \
1889                         float fraction1 = x_fraction1_f * y_fraction1_f; \
1890                         float fraction2 = x_fraction2_f * y_fraction1_f; \
1891                         float fraction3 = x_fraction1_f * y_fraction2_f; \
1892                         float fraction4 = x_fraction2_f * y_fraction2_f; \
1893  \
1894                         input1 = (type)(in_row1[in_x1 * components] * fraction1 +  \
1895                                 in_row1[in_x2 * components] * fraction2 +  \
1896                                 in_row2[in_x1 * components] * fraction3 +  \
1897                                 in_row2[in_x2 * components] * fraction4 + round); \
1898  \
1899 /* Add chroma to fractional pixels */ \
1900                         if(chroma_offset) \
1901                         { \
1902                                 float extra_chroma = (1.0F - \
1903                                         fraction1 - \
1904                                         fraction2 - \
1905                                         fraction3 - \
1906                                         fraction4) * chroma_offset; \
1907                                 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 +  \
1908                                         in_row1[in_x2 * components + 1] * fraction2 +  \
1909                                         in_row2[in_x1 * components + 1] * fraction3 +  \
1910                                         in_row2[in_x2 * components + 1] * fraction4 + \
1911                                         extra_chroma + round); \
1912                                 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 +  \
1913                                         in_row1[in_x2 * components + 2] * fraction2 +  \
1914                                         in_row2[in_x1 * components + 2] * fraction3 +  \
1915                                         in_row2[in_x2 * components + 2] * fraction4 +  \
1916                                         extra_chroma + round); \
1917                         } \
1918                         else \
1919                         { \
1920                                 input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 +  \
1921                                         in_row1[in_x2 * components + 1] * fraction2 +  \
1922                                         in_row2[in_x1 * components + 1] * fraction3 +  \
1923                                         in_row2[in_x2 * components + 1] * fraction4 + round); \
1924                                 input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 +  \
1925                                         in_row1[in_x2 * components + 2] * fraction2 +  \
1926                                         in_row2[in_x1 * components + 2] * fraction3 +  \
1927                                         in_row2[in_x2 * components + 2] * fraction4 + round); \
1928                         } \
1929  \
1930                         if(components == 4) \
1931                                 input4 = (type)(in_row1[in_x1 * components + 3] * fraction1 +  \
1932                                         in_row1[in_x2 * components + 3] * fraction2 +  \
1933                                         in_row2[in_x1 * components + 3] * fraction3 +  \
1934                                         in_row2[in_x2 * components + 3] * fraction4 + round); \
1935  \
1936                         temp_type opacity; \
1937                         if(sizeof(type) != 4) \
1938                                 opacity = (temp_type)(master_opacity *  \
1939                                         y_output_fraction_f *  \
1940                                         x_output_fraction_f + 0.5); \
1941                         else \
1942                                 opacity = (temp_type)(master_opacity *  \
1943                                         y_output_fraction_f *  \
1944                                         x_output_fraction_f); \
1945                         temp_type transparency = max - opacity; \
1946  \
1947 /* printf("TRANSLATE 2 %x %d %d\n", opacity, j, i); */ \
1948  \
1949                         if(components == 3) \
1950                         { \
1951                                 BLEND_3(max, temp_type, type, chroma_offset); \
1952                         } \
1953                         else \
1954                         { \
1955                                 BLEND_4(max, temp_type, type, chroma_offset); \
1956                         } \
1957                 } \
1958         } \
1959 }
1960
1961 void TranslateUnit::process_package(LoadPackage *package)
1962 {
1963         TranslatePackage *pkg = (TranslatePackage*)package;
1964         int out_y1_int;
1965         int out_y2_int;
1966         int out_x1_int;
1967         int out_x2_int;
1968
1969
1970 // Variables for TRANSLATE
1971         VFrame *input = engine->translate_input;
1972         VFrame *output = engine->translate_output;
1973         float in_x1 = engine->translate_in_x1;
1974         float in_y1 = engine->translate_in_y1;
1975         float in_x2 = engine->translate_in_x2;
1976         float in_y2 = engine->translate_in_y2;
1977         float out_x1 = engine->translate_out_x1;
1978         float out_y1 = engine->translate_out_y1;
1979         float out_x2 = engine->translate_out_x2;
1980         float out_y2 = engine->translate_out_y2;
1981         float alpha = engine->translate_alpha;
1982         int row1 = pkg->out_row1;
1983         int row2 = pkg->out_row2;
1984         int mode = engine->translate_mode;
1985         int in_total_x = input->get_w();
1986         int in_total_y = input->get_h();
1987         int do_yuv =
1988                 (engine->translate_input->get_color_model() == BC_YUV888 ||
1989                 engine->translate_input->get_color_model() == BC_YUVA8888 ||
1990                 engine->translate_input->get_color_model() == BC_YUV161616 ||
1991                 engine->translate_input->get_color_model() == BC_YUVA16161616);
1992
1993         transfer_table_f *x_table_f;
1994         transfer_table_f *y_table_f;
1995         transfer_table_i *x_table_i;
1996         transfer_table_i *y_table_i;
1997
1998         translation_array_f(x_table_f,
1999                 out_x1,
2000                 out_x2,
2001                 in_x1,
2002                 in_x2,
2003                 in_total_x,
2004                 output->get_w(),
2005                 out_x1_int,
2006                 out_x2_int);
2007         translation_array_f(y_table_f,
2008                 out_y1,
2009                 out_y2,
2010                 in_y1,
2011                 in_y2,
2012                 in_total_y,
2013                 output->get_h(),
2014                 out_y1_int,
2015                 out_y2_int);
2016 //      printf("TranslateUnit::process_package 1 %d\n", mode);
2017 //      Timer a;
2018 //      a.update();
2019
2020         switch(engine->translate_input->get_color_model())
2021         {
2022                 case BC_RGB888:
2023                         TRANSLATE(0xff, uint32_t, unsigned char, 3, 0);
2024                         break;
2025
2026                 case BC_RGBA8888:
2027                         TRANSLATE(0xff, uint32_t, unsigned char, 4, 0);
2028                         break;
2029
2030                 case BC_RGB_FLOAT:
2031                         TRANSLATE(1.0, float, float, 3, 0);
2032                         break;
2033
2034                 case BC_RGBA_FLOAT:
2035                         TRANSLATE(1.0, float, float, 4, 0);
2036                         break;
2037
2038                 case BC_RGB161616:
2039                         TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2040                         break;
2041
2042                 case BC_RGBA16161616:
2043                         TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2044                         break;
2045
2046                 case BC_YUV888:
2047                         TRANSLATE(0xff, int32_t, unsigned char, 3, 0x80);
2048                         break;
2049
2050                 case BC_YUVA8888:
2051                         TRANSLATE(0xff, int32_t, unsigned char, 4, 0x80);
2052                         break;
2053
2054                 case BC_YUV161616:
2055                         TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2056                         break;
2057
2058                 case BC_YUVA16161616:
2059                         TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2060                         break;
2061         }
2062 //      printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2063
2064         delete [] x_table_f;
2065         delete [] y_table_f;
2066 }
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077 TranslateEngine::TranslateEngine(OverlayFrame *overlay, int cpus)
2078  : LoadServer(cpus, cpus)
2079 {
2080         this->overlay = overlay;
2081 }
2082
2083 TranslateEngine::~TranslateEngine()
2084 {
2085 }
2086
2087 void TranslateEngine::init_packages()
2088 {
2089         int out_y1_int = (int)translate_out_y1;
2090         int out_y2_int = MIN((int)ceil(translate_out_y2), translate_output->get_h());
2091         int out_h = out_y2_int - out_y1_int;
2092
2093         for(int i = 0; i < total_packages; i++)
2094         {
2095                 TranslatePackage *package = (TranslatePackage*)packages[i];
2096                 package->out_row1 = (int)(out_y1_int + out_h /
2097                         total_packages *
2098                         i);
2099                 package->out_row2 = (int)((float)package->out_row1 +
2100                         out_h /
2101                         total_packages);
2102                 if(i >= total_packages - 1)
2103                         package->out_row2 = out_y2_int;
2104         }
2105 }
2106
2107 LoadClient* TranslateEngine::new_client()
2108 {
2109         return new TranslateUnit(this, overlay);
2110 }
2111
2112 LoadPackage* TranslateEngine::new_package()
2113 {
2114         return new TranslatePackage;
2115 }
2116
2117
2118
2119
2120
2121
2122
2123
2124 #define SCALE_TRANSLATE(max, temp_type, type, components, chroma_offset) \
2125 { \
2126         temp_type opacity; \
2127         if(sizeof(type) != 4) \
2128                 opacity = (temp_type)(alpha * max + 0.5); \
2129         else \
2130                 opacity = (temp_type)(alpha * max); \
2131         temp_type transparency = max - opacity; \
2132  \
2133         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2134         { \
2135                 int in_y = y_table[i - out_y1]; \
2136                 type *in_row = (type*)in_rows[in_y] + in_x1 * components; \
2137                 type *output = (type*)out_rows[i] + out_x1 * components; \
2138  \
2139 /* X direction is scaled and requires a table lookup */ \
2140                 if(out_w != in_x2 - in_x1) \
2141                 { \
2142                         for(int j = 0; j < out_w; j++) \
2143                         { \
2144                                 type *in_row_plus_x = in_row + x_table[j] * components; \
2145                                 temp_type input1, input2, input3, input4; \
2146          \
2147                                 input1 = in_row_plus_x[0]; \
2148                                 input2 = in_row_plus_x[1]; \
2149                                 input3 = in_row_plus_x[2]; \
2150                                 if(components == 4) \
2151                                         input4 = in_row_plus_x[3]; \
2152          \
2153                                 if(components == 3) \
2154                                 { \
2155                                         BLEND_3(max, temp_type, type, chroma_offset); \
2156                                 } \
2157                                 else \
2158                                 { \
2159                                         BLEND_4(max, temp_type, type, chroma_offset); \
2160                                 } \
2161                                 output += components; \
2162                         } \
2163                 } \
2164                 else \
2165 /* X direction is not scaled */ \
2166                 { \
2167                         for(int j = 0; j < out_w; j++) \
2168                         { \
2169                                 temp_type input1, input2, input3, input4; \
2170          \
2171                                 input1 = in_row[0]; \
2172                                 input2 = in_row[1]; \
2173                                 input3 = in_row[2]; \
2174                                 if(components == 4) \
2175                                         input4 = in_row[3]; \
2176          \
2177                                 if(components == 3) \
2178                                 { \
2179                                         BLEND_3(max, temp_type, type, chroma_offset); \
2180                                 } \
2181                                 else \
2182                                 { \
2183                                         BLEND_4(max, temp_type, type, chroma_offset); \
2184                                 } \
2185                                 in_row += components; \
2186                                 output += components; \
2187                         } \
2188                 } \
2189         } \
2190 }
2191
2192
2193
2194 ScaleTranslateUnit::ScaleTranslateUnit(ScaleTranslateEngine *server, OverlayFrame *overlay)
2195  : LoadClient(server)
2196 {
2197         this->overlay = overlay;
2198         this->scale_translate = server;
2199 }
2200
2201 ScaleTranslateUnit::~ScaleTranslateUnit()
2202 {
2203 }
2204
2205 void ScaleTranslateUnit::scale_array(int* &table,
2206         int out_x1,
2207         int out_x2,
2208         int in_x1,
2209         int in_x2,
2210         int is_x)
2211 {
2212         float scale = (float)(out_x2 - out_x1) / (in_x2 - in_x1);
2213
2214         table = new int[out_x2 - out_x1];
2215
2216         if(!is_x)
2217         {
2218                 for(int i = 0; i < out_x2 - out_x1; i++)
2219                 {
2220                         table[i] = (int)((float)i / scale + in_x1);
2221                 }
2222         }
2223         else
2224         {
2225                 for(int i = 0; i < out_x2 - out_x1; i++)
2226                 {
2227                         table[i] = (int)((float)i / scale);
2228                 }
2229         }
2230 }
2231
2232
2233 void ScaleTranslateUnit::process_package(LoadPackage *package)
2234 {
2235         ScaleTranslatePackage *pkg = (ScaleTranslatePackage*)package;
2236
2237 // Args for NEAREST_NEIGHBOR_MACRO
2238         VFrame *output = scale_translate->output;
2239         VFrame *input = scale_translate->input;
2240         int in_x1 = scale_translate->in_x1;
2241         int in_y1 = scale_translate->in_y1;
2242         int in_x2 = scale_translate->in_x2;
2243         int in_y2 = scale_translate->in_y2;
2244         int out_x1 = scale_translate->out_x1;
2245         int out_y1 = scale_translate->out_y1;
2246         int out_x2 = scale_translate->out_x2;
2247         int out_y2 = scale_translate->out_y2;
2248         float alpha = scale_translate->alpha;
2249         int mode = scale_translate->mode;
2250         int out_w = out_x2 - out_x1;
2251
2252         int *x_table;
2253         int *y_table;
2254         unsigned char **in_rows = input->get_rows();
2255         unsigned char **out_rows = output->get_rows();
2256
2257 //      Timer a;
2258 //      a.update();
2259 //printf("ScaleTranslateUnit::process_package 1 %d\n", mode);
2260         if(out_w != in_x2 - in_x1)
2261         {
2262                 scale_array(x_table,
2263                         out_x1,
2264                         out_x2,
2265                         in_x1,
2266                         in_x2,
2267                         1);
2268         }
2269         scale_array(y_table,
2270                 out_y1,
2271                 out_y2,
2272                 in_y1,
2273                 in_y2,
2274                 0);
2275
2276
2277         if (mode == TRANSFER_REPLACE && (out_w == in_x2 - in_x1))
2278         {
2279 // if we have transfer replace and x direction is not scaled, PARTY!
2280                 char bytes_per_pixel = input->calculate_bytes_per_pixel(input->get_color_model());
2281                 int line_len = out_w * bytes_per_pixel;
2282                 int in_start_byte = in_x1 * bytes_per_pixel;
2283                 int out_start_byte = out_x1 * bytes_per_pixel;
2284                 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2285                 {
2286                         memcpy (out_rows[i] + out_start_byte,
2287                                 in_rows[y_table[i - out_y1]] + in_start_byte ,
2288                                 line_len);
2289                 }
2290
2291         }
2292         else
2293         switch(input->get_color_model())
2294         {
2295                 case BC_RGB888:
2296                         SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 3, 0);
2297                         break;
2298
2299                 case BC_RGB_FLOAT:
2300                         SCALE_TRANSLATE(1.0, float, float, 3, 0);
2301                         break;
2302
2303                 case BC_YUV888:
2304                         SCALE_TRANSLATE(0xff, int32_t, uint8_t, 3, 0x80);
2305                         break;
2306
2307                 case BC_RGBA8888:
2308                         SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 4, 0);
2309                         break;
2310
2311                 case BC_RGBA_FLOAT:
2312                         SCALE_TRANSLATE(1.0, float, float, 4, 0);
2313                         break;
2314
2315                 case BC_YUVA8888:
2316                         SCALE_TRANSLATE(0xff, int32_t, uint8_t, 4, 0x80);
2317                         break;
2318
2319
2320                 case BC_RGB161616:
2321                         SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2322                         break;
2323
2324                 case BC_YUV161616:
2325                         SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2326                         break;
2327
2328                 case BC_RGBA16161616:
2329                         SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2330                         break;
2331
2332                 case BC_YUVA16161616:
2333                         SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2334                         break;
2335         }
2336
2337 //printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2338         if(out_x2 - out_x1 != in_x2 - in_x1)
2339                 delete [] x_table;
2340         delete [] y_table;
2341
2342 };
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352 ScaleTranslateEngine::ScaleTranslateEngine(OverlayFrame *overlay, int cpus)
2353  : LoadServer(cpus, cpus)
2354 {
2355         this->overlay = overlay;
2356 }
2357
2358 ScaleTranslateEngine::~ScaleTranslateEngine()
2359 {
2360 }
2361
2362 void ScaleTranslateEngine::init_packages()
2363 {
2364         int out_h = out_y2 - out_y1;
2365
2366         for(int i = 0; i < total_packages; i++)
2367         {
2368                 ScaleTranslatePackage *package = (ScaleTranslatePackage*)packages[i];
2369                 package->out_row1 = (int)(out_y1 + out_h /
2370                         total_packages *
2371                         i);
2372                 package->out_row2 = (int)((float)package->out_row1 +
2373                         out_h /
2374                         total_packages);
2375                 if(i >= total_packages - 1)
2376                         package->out_row2 = out_y2;
2377         }
2378 }
2379
2380 LoadClient* ScaleTranslateEngine::new_client()
2381 {
2382         return new ScaleTranslateUnit(this, overlay);
2383 }
2384
2385 LoadPackage* ScaleTranslateEngine::new_package()
2386 {
2387         return new ScaleTranslatePackage;
2388 }
2389
2390
2391 ScaleTranslatePackage::ScaleTranslatePackage()
2392 {
2393 }
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422 #define BLEND_ONLY(temp_type, type, max, components, chroma_offset) \
2423 { \
2424         temp_type opacity; \
2425         if(sizeof(type) != 4) \
2426                 opacity = (temp_type)(alpha * max + 0.5); \
2427         else \
2428                 opacity = (temp_type)(alpha * max); \
2429         temp_type transparency = max - opacity; \
2430  \
2431         type** output_rows = (type**)output->get_rows(); \
2432         type** input_rows = (type**)input->get_rows(); \
2433         int w = input->get_w(); \
2434         int h = input->get_h(); \
2435  \
2436         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2437         { \
2438                 type* in_row = input_rows[i]; \
2439                 type* output = output_rows[i]; \
2440  \
2441                 for(int j = 0; j < w; j++) \
2442                 { \
2443                         temp_type input1, input2, input3, input4; \
2444                         input1 = in_row[0]; \
2445                         input2 = in_row[1]; \
2446                         input3 = in_row[2]; \
2447                         if(components == 4) input4 = in_row[3]; \
2448  \
2449  \
2450                         if(components == 3) \
2451                         { \
2452                                 BLEND_3(max, temp_type, type, chroma_offset); \
2453                         } \
2454                         else \
2455                         { \
2456                                 BLEND_4(max, temp_type, type, chroma_offset); \
2457                         } \
2458  \
2459                         in_row += components; \
2460                         output += components; \
2461                 } \
2462         } \
2463 }
2464
2465
2466 #define BLEND_ONLY_TRANSFER_REPLACE(type, components) \
2467 { \
2468  \
2469         type** output_rows = (type**)output->get_rows(); \
2470         type** input_rows = (type**)input->get_rows(); \
2471         int w = input->get_w(); \
2472         int h = input->get_h(); \
2473         int line_len = w * sizeof(type) * components; \
2474  \
2475         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2476         { \
2477                 memcpy(output_rows[i], input_rows[i], line_len); \
2478         } \
2479 }
2480
2481 // components is always 4
2482 #define BLEND_ONLY_4_NORMAL(temp_type, type, max, chroma_offset) \
2483 { \
2484         temp_type opacity = (temp_type)(alpha * max + 0.5); \
2485         temp_type transparency = max - opacity; \
2486         temp_type max_squared = ((temp_type)max) * max; \
2487  \
2488         type** output_rows = (type**)output->get_rows(); \
2489         type** input_rows = (type**)input->get_rows(); \
2490         int w = input->get_w(); \
2491         int h = input->get_h(); \
2492  \
2493         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2494         { \
2495                 type* in_row = input_rows[i]; \
2496                 type* output = output_rows[i]; \
2497  \
2498                 for(int j = 0; j < w; j++) \
2499                 { \
2500                         temp_type pixel_opacity, pixel_transparency; \
2501                         pixel_opacity = opacity * in_row[3]; \
2502                         pixel_transparency = (temp_type)max_squared - pixel_opacity; \
2503                  \
2504                  \
2505                         temp_type r,g,b; \
2506                         output[0] = ((temp_type)in_row[0] * pixel_opacity + \
2507                                 (temp_type)output[0] * pixel_transparency) / max / max; \
2508                         output[1] = (((temp_type)in_row[1] - chroma_offset) * pixel_opacity + \
2509                                 ((temp_type)output[1] - chroma_offset) * pixel_transparency) \
2510                                 / max / max + \
2511                                 chroma_offset; \
2512                         output[2] = (((temp_type)in_row[2] - chroma_offset) * pixel_opacity + \
2513                                 ((temp_type)output[2] - chroma_offset) * pixel_transparency) \
2514                                 / max / max + \
2515                                 chroma_offset; \
2516                         output[3] = (type)(in_row[3] > output[3] ? in_row[3] : output[3]); \
2517  \
2518                         in_row += 4; \
2519                         output += 4; \
2520                 } \
2521         } \
2522 }
2523
2524
2525
2526 // components is always 3
2527 #define BLEND_ONLY_3_NORMAL(temp_type, type, max, chroma_offset) \
2528 { \
2529         const int bits = sizeof(type) * 8; \
2530         temp_type opacity = (temp_type)(alpha * ((temp_type)1 << bits) + 0.5); \
2531         temp_type transparency = ((temp_type)1 << bits) - opacity; \
2532  \
2533         type** output_rows = (type**)output->get_rows(); \
2534         type** input_rows = (type**)input->get_rows(); \
2535         int w = input->get_w() * 3; \
2536         int h = input->get_h(); \
2537  \
2538         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2539         { \
2540                 type* in_row = input_rows[i]; \
2541                 type* output = output_rows[i]; \
2542  \
2543                 for(int j = 0; j < w; j++) /* w = 3x width! */ \
2544                 { \
2545                         *output = ((temp_type)*in_row * opacity + *output * transparency) >> bits; \
2546                         in_row ++; \
2547                         output ++; \
2548                 } \
2549         } \
2550 }
2551
2552
2553
2554 BlendUnit::BlendUnit(BlendEngine *server, OverlayFrame *overlay)
2555  : LoadClient(server)
2556 {
2557         this->overlay = overlay;
2558         this->blend_engine = server;
2559 }
2560
2561 BlendUnit::~BlendUnit()
2562 {
2563 }
2564
2565 void BlendUnit::process_package(LoadPackage *package)
2566 {
2567         BlendPackage *pkg = (BlendPackage*)package;
2568
2569
2570         VFrame *output = blend_engine->output;
2571         VFrame *input = blend_engine->input;
2572         float alpha = blend_engine->alpha;
2573         int mode = blend_engine->mode;
2574
2575         if (mode == TRANSFER_REPLACE)
2576         {
2577                 switch(input->get_color_model())
2578                 {
2579                         case BC_RGB_FLOAT:
2580                                 BLEND_ONLY_TRANSFER_REPLACE(float, 3);
2581                                 break;
2582                         case BC_RGBA_FLOAT:
2583                                 BLEND_ONLY_TRANSFER_REPLACE(float, 4);
2584                                 break;
2585                         case BC_RGB888:
2586                         case BC_YUV888:
2587                                 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 3);
2588                                 break;
2589                         case BC_RGBA8888:
2590                         case BC_YUVA8888:
2591                                 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 4);
2592                                 break;
2593                         case BC_RGB161616:
2594                         case BC_YUV161616:
2595                                 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 3);
2596                                 break;
2597                         case BC_RGBA16161616:
2598                         case BC_YUVA16161616:
2599                                 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 4);
2600                                 break;
2601                 }
2602         }
2603         else
2604         if (mode == TRANSFER_NORMAL)
2605         {
2606                 switch(input->get_color_model())
2607                 {
2608                         case BC_RGB_FLOAT:
2609                         {
2610                                 float opacity = alpha;
2611                                 float transparency = 1.0 - alpha;
2612
2613                                 float** output_rows = (float**)output->get_rows();
2614                                 float** input_rows = (float**)input->get_rows();
2615                                 int w = input->get_w() * 3;
2616                                 int h = input->get_h();
2617
2618                                 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2619                                 {
2620                                         float* in_row = input_rows[i];
2621                                         float* output = output_rows[i];
2622 /* w = 3x width! */
2623                                         for(int j = 0; j < w; j++)
2624                                         {
2625                                                 *output = *in_row * opacity + *output * transparency;
2626                                                 in_row++;
2627                                                 output++;
2628                                         }
2629                                 }
2630                                 break;
2631                         }
2632                         case BC_RGBA_FLOAT:
2633                         {
2634                                 float opacity = alpha;
2635                                 float transparency = 1.0 - alpha;
2636
2637                                 float** output_rows = (float**)output->get_rows();
2638                                 float** input_rows = (float**)input->get_rows();
2639                                 int w = input->get_w();
2640                                 int h = input->get_h();
2641
2642                                 for(int i = pkg->out_row1; i < pkg->out_row2; i++)
2643                                 {
2644                                         float* in_row = input_rows[i];
2645                                         float* output = output_rows[i];
2646
2647                                         for(int j = 0; j < w; j++)
2648                                         {
2649                                                 float pixel_opacity, pixel_transparency;
2650                                                 pixel_opacity = opacity * in_row[3];
2651                                                 pixel_transparency = 1.0 - pixel_opacity;
2652
2653
2654                                                 output[0] = in_row[0] * pixel_opacity +
2655                                                         output[0] * pixel_transparency;
2656                                                 output[1] = in_row[1] * pixel_opacity +
2657                                                         output[1] * pixel_transparency;
2658                                                 output[2] = in_row[2] * pixel_opacity +
2659                                                         output[2] * pixel_transparency;
2660                                                 output[3] = in_row[3] > output[3] ? in_row[3] : output[3];
2661
2662                                                 in_row += 4;
2663                                                 output += 4;
2664                                         }
2665                                 }
2666                                 break;
2667                         }
2668                         case BC_RGB888:
2669                                 BLEND_ONLY_3_NORMAL(uint32_t, unsigned char, 0xff, 0);
2670                                 break;
2671                         case BC_YUV888:
2672                                 BLEND_ONLY_3_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2673                                 break;
2674                         case BC_RGBA8888:
2675                                 BLEND_ONLY_4_NORMAL(uint32_t, unsigned char, 0xff, 0);
2676                                 break;
2677                         case BC_YUVA8888:
2678                                 BLEND_ONLY_4_NORMAL(int32_t, unsigned char, 0xff, 0x80);
2679                                 break;
2680                         case BC_RGB161616:
2681                                 BLEND_ONLY_3_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2682                                 break;
2683                         case BC_YUV161616:
2684                                 BLEND_ONLY_3_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2685                                 break;
2686                         case BC_RGBA16161616:
2687                                 BLEND_ONLY_4_NORMAL(uint64_t, uint16_t, 0xffff, 0);
2688                                 break;
2689                         case BC_YUVA16161616:
2690                                 BLEND_ONLY_4_NORMAL(int64_t, uint16_t, 0xffff, 0x8000);
2691                                 break;
2692                 }
2693         }
2694         else
2695         switch(input->get_color_model())
2696         {
2697                 case BC_RGB_FLOAT:
2698                         BLEND_ONLY(float, float, 1.0, 3, 0);
2699                         break;
2700                 case BC_RGBA_FLOAT:
2701                         BLEND_ONLY(float, float, 1.0, 4, 0);
2702                         break;
2703                 case BC_RGB888:
2704                         BLEND_ONLY(uint32_t, unsigned char, 0xff, 3, 0);
2705                         break;
2706                 case BC_YUV888:
2707                         BLEND_ONLY(int32_t, unsigned char, 0xff, 3, 0x80);
2708                         break;
2709                 case BC_RGBA8888:
2710                         BLEND_ONLY(uint32_t, unsigned char, 0xff, 4, 0);
2711                         break;
2712                 case BC_YUVA8888:
2713                         BLEND_ONLY(int32_t, unsigned char, 0xff, 4, 0x80);
2714                         break;
2715                 case BC_RGB161616:
2716                         BLEND_ONLY(uint64_t, uint16_t, 0xffff, 3, 0);
2717                         break;
2718                 case BC_YUV161616:
2719                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 3, 0x8000);
2720                         break;
2721                 case BC_RGBA16161616:
2722                         BLEND_ONLY(uint64_t, uint16_t, 0xffff, 4, 0);
2723                         break;
2724                 case BC_YUVA16161616:
2725                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 4, 0x8000);
2726                         break;
2727         }
2728 }
2729
2730
2731
2732 BlendEngine::BlendEngine(OverlayFrame *overlay, int cpus)
2733  : LoadServer(cpus, cpus)
2734 {
2735         this->overlay = overlay;
2736 }
2737
2738 BlendEngine::~BlendEngine()
2739 {
2740 }
2741
2742 void BlendEngine::init_packages()
2743 {
2744         for(int i = 0; i < total_packages; i++)
2745         {
2746                 BlendPackage *package = (BlendPackage*)packages[i];
2747                 package->out_row1 = (int)(input->get_h() /
2748                         total_packages *
2749                         i);
2750                 package->out_row2 = (int)((float)package->out_row1 +
2751                         input->get_h() /
2752                         total_packages);
2753
2754                 if(i >= total_packages - 1)
2755                         package->out_row2 = input->get_h();
2756         }
2757 }
2758
2759 LoadClient* BlendEngine::new_client()
2760 {
2761         return new BlendUnit(this, overlay);
2762 }
2763
2764 LoadPackage* BlendEngine::new_package()
2765 {
2766         return new BlendPackage;
2767 }
2768
2769
2770 BlendPackage::BlendPackage()
2771 {
2772 }
2773
2774