hvirtual/cinelerra/overlayframe.C

   1 #include <math.h>
   2 #include <stdio.h>
   3 #include <string.h>
   4 #include <stdint.h>
   5 #include <stdlib.h>
   6
   7 #include "clip.h"
   8 #include "edl.inc"
   9 #include "mutex.h"
  10 #include "overlayframe.h"
  11 #include "vframe.h"
  12
  13 #if 1
  14         #define use_float 1
  15 #else
  16         #define use_float 0
  17 #endif
  18
  19
  20
  21
  22
  23 OverlayFrame::OverlayFrame(int cpus)
  24 {
  25         temp_frame = 0;
  26         blend_engine = 0;
  27         scale_engine = 0;
  28         scaletranslate_engine = 0;
  29         translate_engine = 0;
  30         this->cpus = cpus;
  31 }
  32
  33 OverlayFrame::~OverlayFrame()
  34 {
  35 //printf("OverlayFrame::~OverlayFrame 1\n");
  36         if(temp_frame) delete temp_frame;
  37         if(scale_engine) delete scale_engine;
  38         if(translate_engine) delete translate_engine;
  39         if(blend_engine) delete blend_engine;
  40         if(scaletranslate_engine) delete scaletranslate_engine;
  41 //printf("OverlayFrame::~OverlayFrame 2\n");
  42 }
  43
  44
  45
  46
  47
  48
  49
  50
  51 // Verification:
  52
  53 // (255 * 255 + 0 * 0) / 255 = 255
  54 // (255 * 127 + 255 * (255 - 127)) / 255 = 255
  55
  56 // (65535 * 65535 + 0 * 0) / 65535 = 65535
  57 // (65535 * 32767 + 65535 * (65535 - 32767)) / 65535 = 65535
  58
  59
  60 // Branch prediction 4 U
  61
  62 #define BLEND_3(max, temp_type, type, chroma_offset) \
  63 { \
  64         temp_type r, g, b; \
  65  \
  66 /* if(mode != TRANSFER_NORMAL) printf("BLEND mode = %d\n", mode); */ \
  67         switch(mode) \
  68         { \
  69                 case TRANSFER_DIVIDE: \
  70                         r = output[0] ? (((temp_type)input1 * max) / output[0]) : max; \
  71                         if(chroma_offset) \
  72                         { \
  73                                 g = labs((int)input2 - chroma_offset) > labs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
  74                                 b = labs((int)input3 - chroma_offset) > labs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
  75                         } \
  76                         else \
  77                         { \
  78                                 g = output[1] ? (temp_type)input2 * max / (temp_type)output[1] : max; \
  79                                 b = output[2] ? (temp_type)input3 * max / (temp_type)output[2] : max; \
  80                         } \
  81                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
  82                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
  83                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
  84                         break; \
  85                 case TRANSFER_MULTIPLY: \
  86                         r = ((temp_type)input1 * output[0]) / max; \
  87                         if(chroma_offset) \
  88                         { \
  89                                 g = labs((temp_type)input2 - chroma_offset) > labs((temp_type)output[1] - chroma_offset) ? input2 : output[1]; \
  90                                 b = labs((temp_type)input3 - chroma_offset) > labs((temp_type)output[2] - chroma_offset) ? input3 : output[2]; \
  91                         } \
  92                         else \
  93                         { \
  94                                 g = (temp_type)input2 * (temp_type)output[1] / max; \
  95                                 b = (temp_type)input3 * (temp_type)output[2] / max; \
  96                         } \
  97                         r = (r * opacity + (temp_type)output[0] * transparency) / max; \
  98                         g = (g * opacity + (temp_type)output[1] * transparency) / max; \
  99                         b = (b * opacity + (temp_type)output[2] * transparency) / max; \
 100                         break; \
 101                 case TRANSFER_SUBTRACT: \
 102                         r = (temp_type)input1 - output[0]; \
 103                         g = (temp_type)input2 - ((temp_type)output[1] - chroma_offset); \
 104                         b = (temp_type)input3 - ((temp_type)output[2] - chroma_offset); \
 105                         r = (r * opacity + output[0] * transparency) / max; \
 106                         g = (g * opacity + output[1] * transparency) / max; \
 107                         b = (b * opacity + output[2] * transparency) / max; \
 108                         break; \
 109                 case TRANSFER_ADDITION: \
 110                         r = (temp_type)input1 + output[0]; \
 111                         g = (temp_type)input2 - chroma_offset + output[1]; \
 112                         b = (temp_type)input3 - chroma_offset + output[2]; \
 113                         r = (r * opacity + output[0] * transparency) / max; \
 114                         g = (g * opacity + output[1] * transparency) / max; \
 115                         b = (b * opacity + output[2] * transparency) / max; \
 116                         break; \
 117                 case TRANSFER_REPLACE: \
 118                         r = input1; \
 119                         g = input2; \
 120                         b = input3; \
 121                         break; \
 122                 case TRANSFER_NORMAL: \
 123                         r = ((temp_type)input1 * opacity + output[0] * transparency) / max; \
 124                         g = ((temp_type)input2 * opacity + output[1] * transparency) / max; \
 125                         b = ((temp_type)input3 * opacity + output[2] * transparency) / max; \
 126                         break; \
 127         } \
 128  \
 129         output[0] = (type)CLIP(r, 0, max); \
 130         output[1] = (type)CLIP(g, 0, max); \
 131         output[2] = (type)CLIP(b, 0, max); \
 132 }
 133
 134
 135
 136
 137
 138 // Blending equations are drastically different for 3 and 4 components
 139 #define BLEND_4(max, temp_type, type, chroma_offset) \
 140 { \
 141         temp_type r, g, b, a; \
 142         temp_type pixel_opacity, pixel_transparency; \
 143         temp_type output1 = output[0]; \
 144         temp_type output2 = output[1]; \
 145         temp_type output3 = output[2]; \
 146         temp_type output4 = output[3]; \
 147  \
 148         pixel_opacity = opacity * input4; \
 149         pixel_transparency = (temp_type)max * max - pixel_opacity; \
 150  \
 151         switch(mode) \
 152         { \
 153                 case TRANSFER_DIVIDE: \
 154                         r = output1 ? (((temp_type)input1 * max) / output1) : max; \
 155                         if(chroma_offset) \
 156                         { \
 157                                 g = labs((int)input2 - chroma_offset) > labs((int)output2 - chroma_offset) ? input2 : output2; \
 158                                 b = labs((int)input3 - chroma_offset) > labs((int)output3 - chroma_offset) ? input3 : output3; \
 159                         } \
 160                         else \
 161                         { \
 162                                 g = output2 ? (temp_type)input2 * max / (temp_type)output2 : max; \
 163                                 b = output3 ? (temp_type)input3 * max / (temp_type)output3 : max; \
 164                         } \
 165                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
 166                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
 167                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
 168                         a = input4 > output4 ? input4 : output4; \
 169                         break; \
 170                 case TRANSFER_MULTIPLY: \
 171                         r = ((temp_type)input1 * output1) / max; \
 172                         if(chroma_offset) \
 173                         { \
 174                                 g = labs((temp_type)input2 - chroma_offset) > labs((temp_type)output2 - chroma_offset) ? input2 : output2; \
 175                                 b = labs((temp_type)input3 - chroma_offset) > labs((temp_type)output3 - chroma_offset) ? input3 : output3; \
 176                         } \
 177                         else \
 178                         { \
 179                                 g = (temp_type)input2 * (temp_type)output2 / max; \
 180                                 b = (temp_type)input3 * (temp_type)output3 / max; \
 181                         } \
 182                         r = (r * pixel_opacity + (temp_type)output1 * pixel_transparency) / max / max; \
 183                         g = (g * pixel_opacity + (temp_type)output2 * pixel_transparency) / max / max; \
 184                         b = (b * pixel_opacity + (temp_type)output3 * pixel_transparency) / max / max; \
 185                         a = input4 > output4 ? input4 : output4; \
 186                         break; \
 187                 case TRANSFER_SUBTRACT: \
 188                         r = (temp_type)input1 - output1; \
 189                         g = (temp_type)input2 - ((temp_type)output2 - chroma_offset); \
 190                         b = (temp_type)input3 - ((temp_type)output3 - chroma_offset); \
 191                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
 192                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
 193                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
 194                         a = input4 > output4 ? input4 : output4; \
 195                         break; \
 196                 case TRANSFER_ADDITION: \
 197                         r = (temp_type)input1 + output1; \
 198                         g = (temp_type)input2 - chroma_offset + output2; \
 199                         b = (temp_type)input3 - chroma_offset + output3; \
 200                         r = (r * pixel_opacity + output1 * pixel_transparency) / max / max; \
 201                         g = (g * pixel_opacity + output2 * pixel_transparency) / max / max; \
 202                         b = (b * pixel_opacity + output3 * pixel_transparency) / max / max; \
 203                         a = input4 > output4 ? input4 : output4; \
 204                         break; \
 205                 case TRANSFER_REPLACE: \
 206                         r = input1; \
 207                         g = input2; \
 208                         b = input3; \
 209                         a = input4; \
 210                         break; \
 211                 case TRANSFER_NORMAL: \
 212                         r = (input1 * pixel_opacity + \
 213                                 output1 * pixel_transparency) / max / max; \
 214                         g = ((input2 - chroma_offset) * pixel_opacity + \
 215                                 (output2 - chroma_offset) * pixel_transparency) \
 216                                 / max / max + \
 217                                 chroma_offset; \
 218                         b = ((input3 - chroma_offset) * pixel_opacity + \
 219                                 (output3 - chroma_offset) * pixel_transparency) \
 220                                 / max / max + \
 221                                 chroma_offset; \
 222                         a = input4 > output4 ? input4 : output4; \
 223                         break; \
 224         } \
 225  \
 226         output[0] = (type)CLIP(r, 0, max); \
 227         output[1] = (type)CLIP(g, 0, max); \
 228         output[2] = (type)CLIP(b, 0, max); \
 229         output[3] = (type)a; \
 230 }
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240 // Bicubic algorithm using multiprocessors
 241 // input -> scale nearest integer boundaries -> temp -> translation -> blend -> output
 242
 243 // Nearest neighbor algorithm using multiprocessors for blending
 244 // input -> scale + translate -> blend -> output
 245
 246
 247 int OverlayFrame::overlay(VFrame *output,
 248         VFrame *input,
 249         float in_x1,
 250         float in_y1,
 251         float in_x2,
 252         float in_y2,
 253         float out_x1,
 254         float out_y1,
 255         float out_x2,
 256         float out_y2,
 257         float alpha,       // 0 - 1
 258         int mode,
 259         int interpolation_type)
 260 {
 261         float w_scale = (out_x2 - out_x1) / (in_x2 - in_x1);
 262         float h_scale = (out_y2 - out_y1) / (in_y2 - in_y1);
 263
 264 // printf("OverlayFrame::overlay 1 %f %f %f %f -> %f %f %f %f\n", in_x1,
 265 //                      in_y1,
 266 //                      in_x2,
 267 //                      in_y2,
 268 //                      out_x1,
 269 //                      out_y1,
 270 //                      out_x2,
 271 //                      out_y2);
 272
 273 // Limit values
 274         if(in_x1 < 0)
 275         {
 276                 out_x1 += -in_x1 * w_scale;
 277                 in_x1 = 0;
 278         }
 279         else
 280         if(in_x1 >= input->get_w())
 281         {
 282                 out_x1 -= (in_x1 - input->get_w()) * w_scale;
 283                 in_x1 = input->get_w();
 284         }
 285
 286         if(in_y1 < 0)
 287         {
 288                 out_y1 += -in_y1 * h_scale;
 289                 in_y1 = 0;
 290         }
 291         else
 292         if(in_y1 >= input->get_h())
 293         {
 294                 out_y1 -= (in_y1 - input->get_h()) * h_scale;
 295                 in_y1 = input->get_h();
 296         }
 297
 298         if(in_x2 < 0)
 299         {
 300                 out_x2 += -in_x2 * w_scale;
 301                 in_x2 = 0;
 302         }
 303         else
 304         if(in_x2 >= input->get_w())
 305         {
 306                 out_x2 -= (in_x2 - input->get_w()) * w_scale;
 307                 in_x2 = input->get_w();
 308         }
 309
 310         if(in_y2 < 0)
 311         {
 312                 out_y2 += -in_y2 * h_scale;
 313                 in_y2 = 0;
 314         }
 315         else
 316         if(in_y2 >= input->get_h())
 317         {
 318                 out_y2 -= (in_y2 - input->get_h()) * h_scale;
 319                 in_y2 = input->get_h();
 320         }
 321
 322         if(out_x1 < 0)
 323         {
 324                 in_x1 += -out_x1 / w_scale;
 325                 out_x1 = 0;
 326         }
 327         else
 328         if(out_x1 >= output->get_w())
 329         {
 330                 in_x1 -= (out_x1 - output->get_w()) / w_scale;
 331                 out_x1 = output->get_w();
 332         }
 333
 334         if(out_y1 < 0)
 335         {
 336                 in_y1 += -out_y1 / h_scale;
 337                 out_y1 = 0;
 338         }
 339         else
 340         if(out_y1 >= output->get_h())
 341         {
 342                 in_y1 -= (out_y1 - output->get_h()) / h_scale;
 343                 out_y1 = output->get_h();
 344         }
 345
 346         if(out_x2 < 0)
 347         {
 348                 in_x2 += -out_x2 / w_scale;
 349                 out_x2 = 0;
 350         }
 351         else
 352         if(out_x2 >= output->get_w())
 353         {
 354                 in_x2 -= (out_x2 - output->get_w()) / w_scale;
 355                 out_x2 = output->get_w();
 356         }
 357
 358         if(out_y2 < 0)
 359         {
 360                 in_y2 += -out_y2 / h_scale;
 361                 out_y2 = 0;
 362         }
 363         else
 364         if(out_y2 >= output->get_h())
 365         {
 366                 in_y2 -= (out_y2 - output->get_h()) / h_scale;
 367                 out_y2 = output->get_h();
 368         }
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379         float in_w = in_x2 - in_x1;
 380         float in_h = in_y2 - in_y1;
 381         float out_w = out_x2 - out_x1;
 382         float out_h = out_y2 - out_y1;
 383 // Input for translation operation
 384         VFrame *translation_input = input;
 385
 386
 387         if(in_w <= 0 || in_h <= 0 || out_w <= 0 || out_h <= 0) return 0;
 388
 389
 390 // printf("OverlayFrame::overlay 2 %f %f %f %f -> %f %f %f %f\n", in_x1,
 391 //                      in_y1,
 392 //                      in_x2,
 393 //                      in_y2,
 394 //                      out_x1,
 395 //                      out_y1,
 396 //                      out_x2,
 397 //                      out_y2);
 398
 399
 400
 401
 402
 403 // ****************************************************************************
 404 // Transfer to temp buffer by scaling nearest integer boundaries
 405 // ****************************************************************************
 406         if(interpolation_type != NEAREST_NEIGHBOR &&
 407                 (!EQUIV(w_scale, 1) || !EQUIV(h_scale, 1)))
 408         {
 409 // Create integer boundaries for interpolation
 410                 int in_x1_int = (int)in_x1;
 411                 int in_y1_int = (int)in_y1;
 412                 int in_x2_int = MIN((int)ceil(in_x2), input->get_w());
 413                 int in_y2_int = MIN((int)ceil(in_y2), input->get_h());
 414
 415 // Dimensions of temp frame.  Integer boundaries scaled.
 416                 int temp_w = (int)ceil(w_scale * (in_x2_int - in_x1_int));
 417                 int temp_h = (int)ceil(h_scale * (in_y2_int - in_y1_int));
 418                 VFrame *scale_output;
 419
 420
 421
 422 #define NO_TRANSLATION1 \
 423         (EQUIV(in_x1, 0) && \
 424         EQUIV(in_y1, 0) && \
 425         EQUIV(out_x1, 0) && \
 426         EQUIV(out_y1, 0) && \
 427         EQUIV(in_x2, in_x2_int) && \
 428         EQUIV(in_y2, in_y2_int) && \
 429         EQUIV(out_x2, temp_w) && \
 430         EQUIV(out_y2, temp_h))
 431
 432
 433 #define NO_BLEND \
 434         (EQUIV(alpha, 1) && \
 435         (mode == TRANSFER_REPLACE || \
 436         (mode == TRANSFER_NORMAL && cmodel_components(input->get_color_model()) == 3)))
 437
 438
 439
 440
 441
 442 // Prepare destination for operation
 443
 444 // No translation and no blending.  The blending operation is built into the
 445 // translation unit but not the scaling unit.
 446 // input -> output
 447                 if(NO_TRANSLATION1 &&
 448                         NO_BLEND)
 449                 {
 450 // printf("OverlayFrame::overlay input -> output\n");
 451
 452                         scale_output = output;
 453                         translation_input = 0;
 454                 }
 455                 else
 456 // If translation or blending
 457 // input -> nearest integer boundary temp
 458                 {
 459                         if(temp_frame &&
 460                                 (temp_frame->get_w() != temp_w ||
 461                                         temp_frame->get_h() != temp_h))
 462                         {
 463                                 delete temp_frame;
 464                                 temp_frame = 0;
 465                         }
 466
 467                         if(!temp_frame)
 468                         {
 469                                 temp_frame = new VFrame(0,
 470                                         temp_w,
 471                                         temp_h,
 472                                         input->get_color_model(),
 473                                         -1);
 474                         }
 475 //printf("OverlayFrame::overlay input -> temp\n");
 476
 477
 478                         temp_frame->clear_frame();
 479
 480 // printf("OverlayFrame::overlay 4 temp_w=%d temp_h=%d\n",
 481 //      temp_w, temp_h);
 482                         scale_output = temp_frame;
 483                         translation_input = scale_output;
 484
 485 // Adjust input coordinates to reflect new scaled coordinates.
 486                         in_x1 = (in_x1 - in_x1_int) * w_scale;
 487                         in_y1 = (in_y1 - in_y1_int) * h_scale;
 488                         in_x2 = (in_x2 - in_x1_int) * w_scale;
 489                         in_y2 = (in_y2 - in_y1_int) * h_scale;
 490                 }
 491
 492
 493
 494 //printf("Overlay 1\n");
 495
 496 // Scale input -> scale_output
 497                 if(!scale_engine) scale_engine = new ScaleEngine(this, cpus);
 498                 scale_engine->scale_output = scale_output;
 499                 scale_engine->scale_input = input;
 500                 scale_engine->w_scale = w_scale;
 501                 scale_engine->h_scale = h_scale;
 502                 scale_engine->in_x1_int = in_x1_int;
 503                 scale_engine->in_y1_int = in_y1_int;
 504                 scale_engine->out_w_int = temp_w;
 505                 scale_engine->out_h_int = temp_h;
 506                 scale_engine->interpolation_type = interpolation_type;
 507 //printf("Overlay 2\n");
 508
 509 //printf("OverlayFrame::overlay ScaleEngine 1 %d\n", out_h_int);
 510                 scale_engine->process_packages();
 511 //printf("OverlayFrame::overlay ScaleEngine 2\n");
 512
 513
 514
 515         }
 516
 517 // printf("OverlayFrame::overlay 1  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
 518 //      in_x1,
 519 //      in_y1,
 520 //      in_x2,
 521 //      in_y2,
 522 //      out_x1,
 523 //      out_y1,
 524 //      out_x2,
 525 //      out_y2);
 526
 527
 528
 529
 530
 531 #define NO_TRANSLATION2 \
 532         (EQUIV(in_x1, 0) && \
 533         EQUIV(in_y1, 0) && \
 534         EQUIV(in_x2, translation_input->get_w()) && \
 535         EQUIV(in_y2, translation_input->get_h()) && \
 536         EQUIV(out_x1, 0) && \
 537         EQUIV(out_y1, 0) && \
 538         EQUIV(out_x2, output->get_w()) && \
 539         EQUIV(out_y2, output->get_h())) \
 540
 541 #define NO_SCALE \
 542         (EQUIV(out_x2 - out_x1, in_x2 - in_x1) && \
 543         EQUIV(out_y2 - out_y1, in_y2 - in_y1))
 544
 545
 546
 547
 548 //printf("OverlayFrame::overlay 4 %d\n", mode);
 549
 550
 551
 552
 553         if(translation_input)
 554         {
 555 // Direct copy
 556                 if( NO_TRANSLATION2 &&
 557                         NO_SCALE &&
 558                         NO_BLEND)
 559                 {
 560 //printf("OverlayFrame::overlay direct copy\n");
 561                         output->copy_from(translation_input);
 562                 }
 563                 else
 564 // Blend only
 565                 if( NO_TRANSLATION2 &&
 566                         NO_SCALE)
 567                 {
 568                         if(!blend_engine) blend_engine = new BlendEngine(this, cpus);
 569
 570
 571                         blend_engine->output = output;
 572                         blend_engine->input = translation_input;
 573                         blend_engine->alpha = alpha;
 574                         blend_engine->mode = mode;
 575
 576                         blend_engine->process_packages();
 577                 }
 578                 else
 579 // Scale and translate using nearest neighbor
 580 // Translation is exactly on integer boundaries
 581                 if(interpolation_type == NEAREST_NEIGHBOR ||
 582                         EQUIV(in_x1, (int)in_x1) &&
 583                         EQUIV(in_y1, (int)in_y1) &&
 584                         EQUIV(in_x2, (int)in_x2) &&
 585                         EQUIV(in_y2, (int)in_y2) &&
 586
 587                         EQUIV(out_x1, (int)out_x1) &&
 588                         EQUIV(out_y1, (int)out_y1) &&
 589                         EQUIV(out_x2, (int)out_x2) &&
 590                         EQUIV(out_y2, (int)out_y2))
 591                 {
 592 //printf("OverlayFrame::overlay NEAREST_NEIGHBOR 1\n");
 593                         if(!scaletranslate_engine) scaletranslate_engine =
 594                                 new ScaleTranslateEngine(this, cpus);
 595
 596
 597                         scaletranslate_engine->output = output;
 598                         scaletranslate_engine->input = translation_input;
 599                         scaletranslate_engine->in_x1 = (int)in_x1;
 600                         scaletranslate_engine->in_y1 = (int)in_y1;
 601                         scaletranslate_engine->in_x2 = (int)in_x2;
 602                         scaletranslate_engine->in_y2 = (int)in_y2;
 603                         scaletranslate_engine->out_x1 = (int)out_x1;
 604                         scaletranslate_engine->out_y1 = (int)out_y1;
 605                         scaletranslate_engine->out_x2 = (int)out_x2;
 606                         scaletranslate_engine->out_y2 = (int)out_y2;
 607                         scaletranslate_engine->alpha = alpha;
 608                         scaletranslate_engine->mode = mode;
 609
 610                         scaletranslate_engine->process_packages();
 611                 }
 612                 else
 613 // Fractional translation
 614                 {
 615 // Use fractional translation
 616 // printf("OverlayFrame::overlay temp -> output  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
 617 //      in_x1,
 618 //      in_y1,
 619 //      in_x2,
 620 //      in_y2,
 621 //      out_x1,
 622 //      out_y1,
 623 //      out_x2,
 624 //      out_y2);
 625
 626 //printf("Overlay 3\n");
 627                         if(!translate_engine) translate_engine = new TranslateEngine(this, cpus);
 628                         translate_engine->translate_output = output;
 629                         translate_engine->translate_input = translation_input;
 630                         translate_engine->translate_in_x1 = in_x1;
 631                         translate_engine->translate_in_y1 = in_y1;
 632                         translate_engine->translate_in_x2 = in_x2;
 633                         translate_engine->translate_in_y2 = in_y2;
 634                         translate_engine->translate_out_x1 = out_x1;
 635                         translate_engine->translate_out_y1 = out_y1;
 636                         translate_engine->translate_out_x2 = out_x2;
 637                         translate_engine->translate_out_y2 = out_y2;
 638                         translate_engine->translate_alpha = alpha;
 639                         translate_engine->translate_mode = mode;
 640 //printf("Overlay 4\n");
 641
 642 //printf("OverlayFrame::overlay 5 %d\n", mode);
 643                         translate_engine->process_packages();
 644
 645                 }
 646         }
 647 //printf("OverlayFrame::overlay 2\n");
 648
 649         return 0;
 650 }
 651
 652
 653
 654
 655
 656
 657
 658 ScalePackage::ScalePackage()
 659 {
 660 }
 661
 662
 663
 664
 665 ScaleUnit::ScaleUnit(ScaleEngine *server, OverlayFrame *overlay)
 666  : LoadClient(server)
 667 {
 668         this->overlay = overlay;
 669         this->engine = server;
 670 }
 671
 672 ScaleUnit::~ScaleUnit()
 673 {
 674 }
 675
 676
 677
 678 void ScaleUnit::tabulate_reduction(bilinear_table_t* &table,
 679         float scale,
 680         int in_pixel1,
 681         int out_total,
 682         int in_total)
 683 {
 684         table = new bilinear_table_t[out_total];
 685         bzero(table, sizeof(bilinear_table_t) * out_total);
 686 //printf("ScaleUnit::tabulate_reduction 1 %f %d %d %d\n", scale, in_pixel1, out_total, in_total);
 687         for(int i = 0; i < out_total; i++)
 688         {
 689                 float out_start = i;
 690                 float in_start = out_start * scale;
 691                 float out_end = i + 1;
 692                 float in_end = out_end * scale;
 693                 bilinear_table_t *entry = table + i;
 694 //printf("ScaleUnit::tabulate_reduction 1 %f %f %f %f\n", out_start, out_end, in_start, in_end);
 695
 696 // Store input fraction
 697                 entry->input_fraction1 = (floor(in_start + 1) - in_start) / scale;
 698                 entry->input_fraction2 = 1.0 / scale;
 699                 entry->input_fraction3 = (in_end - floor(in_end)) / scale;
 700
 701                 if(in_end >= in_total - in_pixel1)
 702                 {
 703                         in_end = in_total - in_pixel1 - 1;
 704
 705                         int difference = (int)in_end - (int)in_start - 1;
 706                         if(difference < 0) difference = 0;
 707                         entry->input_fraction3 = 1.0 -
 708                                 entry->input_fraction1 -
 709                                 entry->input_fraction2 * difference;
 710                 }
 711
 712 // Store input pixels
 713                 entry->input_pixel1 = (int)in_start;
 714                 entry->input_pixel2 = (int)in_end;
 715
 716 // printf("ScaleUnit::tabulate_reduction 1 %d %d %f %f  %f\n",
 717 // entry->input_pixel1,
 718 // entry->input_pixel2,
 719 // entry->input_fraction1,
 720 // entry->input_fraction2,
 721 // entry->input_fraction3);
 722
 723
 724 // Sanity check
 725                 if(entry->input_pixel1 > entry->input_pixel2)
 726                 {
 727                         entry->input_pixel1 = entry->input_pixel2;
 728                         entry->input_fraction1 = 0;
 729                 }
 730
 731 // Get total fraction of output pixel used
 732 //              if(entry->input_pixel2 > entry->input_pixel1)
 733                 entry->total_fraction =
 734                         entry->input_fraction1 +
 735                         entry->input_fraction2 * (entry->input_pixel2 - entry->input_pixel1 - 1) +
 736                         entry->input_fraction3;
 737                 entry->input_pixel1 += in_pixel1;
 738                 entry->input_pixel2 += in_pixel1;
 739         }
 740 }
 741
 742 void ScaleUnit::tabulate_enlarge(bilinear_table_t* &table,
 743         float scale,
 744         int in_pixel1,
 745         int out_total,
 746         int in_total)
 747 {
 748         table = new bilinear_table_t[out_total];
 749         bzero(table, sizeof(bilinear_table_t) * out_total);
 750
 751         for(int i = 0; i < out_total; i++)
 752         {
 753                 bilinear_table_t *entry = table + i;
 754                 float in_pixel = i * scale;
 755                 entry->input_pixel1 = (int)floor(in_pixel);
 756                 entry->input_pixel2 = entry->input_pixel1 + 1;
 757
 758                 if(in_pixel <= in_total)
 759                 {
 760                         entry->input_fraction3 = in_pixel - entry->input_pixel1;
 761                 }
 762                 else
 763                 {
 764                         entry->input_fraction3 = 0;
 765                         entry->input_pixel2 = 0;
 766                 }
 767
 768                 if(in_pixel >= 0)
 769                 {
 770                         entry->input_fraction1 = entry->input_pixel2 - in_pixel;
 771                 }
 772                 else
 773                 {
 774                         entry->input_fraction1 = 0;
 775                         entry->input_pixel1 = 0;
 776                 }
 777
 778                 if(entry->input_pixel2 >= in_total - in_pixel1)
 779                 {
 780                         entry->input_pixel2 = entry->input_pixel1;
 781                         entry->input_fraction3 = 1.0 - entry->input_fraction1;
 782                 }
 783
 784                 entry->total_fraction =
 785                         entry->input_fraction1 +
 786                         entry->input_fraction3;
 787                 entry->input_pixel1 += in_pixel1;
 788                 entry->input_pixel2 += in_pixel1;
 789 //
 790 // printf("ScaleUnit::tabulate_enlarge %d %d %f %f %f\n",
 791 // entry->input_pixel1,
 792 // entry->input_pixel2,
 793 // entry->input_fraction1,
 794 // entry->input_fraction2,
 795 // entry->input_fraction3);
 796         }
 797 }
 798
 799 void ScaleUnit::dump_bilinear(bilinear_table_t *table, int total)
 800 {
 801         printf("ScaleUnit::dump_bilinear\n");
 802         for(int i = 0; i < total; i++)
 803         {
 804                 printf("out=%d inpixel1=%d inpixel2=%d infrac1=%f infrac2=%f infrac3=%f total=%f\n",
 805                         i,
 806                         table[i].input_pixel1,
 807                         table[i].input_pixel2,
 808                         table[i].input_fraction1,
 809                         table[i].input_fraction2,
 810                         table[i].input_fraction3,
 811                         table[i].total_fraction);
 812         }
 813 }
 814
 815 #define PIXEL_REDUCE_MACRO(type, components, row) \
 816 { \
 817         type *input_row = &in_rows[row][x_entry->input_pixel1 * components]; \
 818         type *input_end = &in_rows[row][x_entry->input_pixel2 * components]; \
 819  \
 820 /* Do first pixel */ \
 821         temp_f1 += input_scale1 * input_row[0]; \
 822         temp_f2 += input_scale1 * input_row[1]; \
 823         temp_f3 += input_scale1 * input_row[2]; \
 824         if(components == 4) temp_f4 += input_scale1 * input_row[3]; \
 825  \
 826 /* Do last pixel */ \
 827 /*      if(input_row < input_end) */\
 828         { \
 829                 temp_f1 += input_scale3 * input_end[0]; \
 830                 temp_f2 += input_scale3 * input_end[1]; \
 831                 temp_f3 += input_scale3 * input_end[2]; \
 832                 if(components == 4) temp_f4 += input_scale3 * input_end[3]; \
 833         } \
 834  \
 835 /* Do middle pixels */ \
 836         for(input_row += components; input_row < input_end; input_row += components) \
 837         { \
 838                 temp_f1 += input_scale2 * input_row[0]; \
 839                 temp_f2 += input_scale2 * input_row[1]; \
 840                 temp_f3 += input_scale2 * input_row[2]; \
 841                 if(components == 4) temp_f4 += input_scale2 * input_row[3]; \
 842         } \
 843 }
 844
 845 // Bilinear reduction and suboptimal enlargement.
 846 // Very high quality.
 847 #define BILINEAR_REDUCE(max, type, components) \
 848 { \
 849         bilinear_table_t *x_table, *y_table; \
 850         int out_h = pkg->out_row2 - pkg->out_row1; \
 851         type **in_rows = (type**)input->get_rows(); \
 852         type **out_rows = (type**)output->get_rows(); \
 853  \
 854         if(scale_w < 1) \
 855                 tabulate_reduction(x_table, \
 856                         1.0 / scale_w, \
 857                         in_x1_int, \
 858                         out_w_int, \
 859                         input->get_w()); \
 860         else \
 861                 tabulate_enlarge(x_table, \
 862                         1.0 / scale_w, \
 863                         in_x1_int, \
 864                         out_w_int, \
 865                         input->get_w()); \
 866  \
 867         if(scale_h < 1) \
 868                 tabulate_reduction(y_table, \
 869                         1.0 / scale_h, \
 870                         in_y1_int, \
 871                         out_h_int, \
 872                         input->get_h()); \
 873         else \
 874                 tabulate_enlarge(y_table, \
 875                         1.0 / scale_h, \
 876                         in_y1_int, \
 877                         out_h_int, \
 878                         input->get_h()); \
 879 /* dump_bilinear(y_table, out_h_int); */\
 880  \
 881         for(int i = 0; i < out_h; i++) \
 882         { \
 883                 type *out_row = out_rows[i + pkg->out_row1]; \
 884                 bilinear_table_t *y_entry = &y_table[i + pkg->out_row1]; \
 885 /*printf("BILINEAR_REDUCE 2 %d %d %d\n", i, y_entry->input_pixel1, y_entry->input_pixel2); */\
 886  \
 887                 for(int j = 0; j < out_w_int; j++) \
 888                 { \
 889                         bilinear_table_t *x_entry = &x_table[j]; \
 890 /* Load rounding factors */ \
 891                         float temp_f1 = .5; \
 892                         float temp_f2 = .5; \
 893                         float temp_f3 = .5; \
 894                         float temp_f4 = .5; \
 895  \
 896 /* First row */ \
 897                         float input_scale1 = y_entry->input_fraction1 * x_entry->input_fraction1; \
 898                         float input_scale2 = y_entry->input_fraction1 * x_entry->input_fraction2; \
 899                         float input_scale3 = y_entry->input_fraction1 * x_entry->input_fraction3; \
 900                         PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel1) \
 901  \
 902 /* Last row */ \
 903                         if(out_h) \
 904                         { \
 905                                 input_scale1 = y_entry->input_fraction3 * x_entry->input_fraction1; \
 906                                 input_scale2 = y_entry->input_fraction3 * x_entry->input_fraction2; \
 907                                 input_scale3 = y_entry->input_fraction3 * x_entry->input_fraction3; \
 908                                 PIXEL_REDUCE_MACRO(type, components, y_entry->input_pixel2) \
 909  \
 910 /* Middle rows */ \
 911                                 if(out_h > 1) \
 912                                 { \
 913                                         input_scale1 = y_entry->input_fraction2 * x_entry->input_fraction1; \
 914                                         input_scale2 = y_entry->input_fraction2 * x_entry->input_fraction2; \
 915                                         input_scale3 = y_entry->input_fraction2 * x_entry->input_fraction3; \
 916                                         for(int k = y_entry->input_pixel1 + 1; \
 917                                                 k < y_entry->input_pixel2; \
 918                                                 k++) \
 919                                         { \
 920                                                 PIXEL_REDUCE_MACRO(type, components, k) \
 921                                         } \
 922                                 } \
 923                         } \
 924  \
 925                         if(temp_f1 > max) temp_f1 = max; \
 926                         if(temp_f2 > max) temp_f2 = max; \
 927                         if(temp_f3 > max) temp_f3 = max; \
 928                         if(components == 4) if(temp_f4 > max) temp_f4 = max; \
 929                         out_row[j * components    ] = (type)temp_f1; \
 930                         out_row[j * components + 1] = (type)temp_f2; \
 931                         out_row[j * components + 2] = (type)temp_f3; \
 932                         if(components == 4) out_row[j * components + 3] = (type)temp_f4; \
 933                 } \
 934 /*printf("BILINEAR_REDUCE 3 %d\n", i);*/ \
 935         } \
 936  \
 937         delete [] x_table; \
 938         delete [] y_table; \
 939 }
 940
 941
 942
 943 // Only 2 input pixels
 944 #define BILINEAR_ENLARGE(max, type, components) \
 945 { \
 946 /*printf("BILINEAR_ENLARGE 1\n");*/ \
 947         float k_y = 1.0 / scale_h; \
 948         float k_x = 1.0 / scale_w; \
 949         type **in_rows = (type**)input->get_rows(); \
 950         type **out_rows = (type**)output->get_rows(); \
 951         int out_h = pkg->out_row2 - pkg->out_row1; \
 952         int in_h_int = input->get_h(); \
 953         int in_w_int = input->get_w(); \
 954         int *table_int_x1, *table_int_y1; \
 955         int *table_int_x2, *table_int_y2; \
 956         float *table_frac_x_f, *table_antifrac_x_f, *table_frac_y_f, *table_antifrac_y_f; \
 957         int *table_frac_x_i, *table_antifrac_x_i, *table_frac_y_i, *table_antifrac_y_i; \
 958  \
 959         if(use_float) \
 960         { \
 961                 tabulate_blinear_f(table_int_x1,  \
 962                         table_int_x2,  \
 963                         table_frac_x_f,  \
 964                         table_antifrac_x_f,  \
 965                         k_x,  \
 966                         0,  \
 967                         out_w_int, \
 968                         in_x1_int,  \
 969                         in_w_int); \
 970                 tabulate_blinear_f(table_int_y1,  \
 971                         table_int_y2,  \
 972                         table_frac_y_f,  \
 973                         table_antifrac_y_f,  \
 974                         k_y,  \
 975                         pkg->out_row1,  \
 976                         pkg->out_row2,  \
 977                         in_y1_int, \
 978                         in_h_int); \
 979         } \
 980         else \
 981         { \
 982                 tabulate_blinear_i(table_int_x1,  \
 983                         table_int_x2,  \
 984                         table_frac_x_i,  \
 985                         table_antifrac_x_i,  \
 986                         k_x,  \
 987                         0,  \
 988                         out_w_int, \
 989                         in_x1_int,  \
 990                         in_w_int); \
 991                 tabulate_blinear_i(table_int_y1,  \
 992                         table_int_y2,  \
 993                         table_frac_y_i,  \
 994                         table_antifrac_y_i,  \
 995                         k_y,  \
 996                         pkg->out_row1,  \
 997                         pkg->out_row2,  \
 998                         in_y1_int, \
 999                         in_h_int); \
1000         } \
1001  \
1002         for(int i = 0; i < out_h; i++) \
1003         { \
1004                 int i_y1 = table_int_y1[i]; \
1005                 int i_y2 = table_int_y2[i]; \
1006                 float a_f; \
1007         float anti_a_f; \
1008                 uint64_t a_i; \
1009         uint64_t anti_a_i; \
1010                 if(use_float) \
1011                 { \
1012                         a_f = table_frac_y_f[i]; \
1013                 anti_a_f = table_antifrac_y_f[i]; \
1014                 } \
1015                 else \
1016                 { \
1017                         a_i = table_frac_y_i[i]; \
1018                 anti_a_i = table_antifrac_y_i[i]; \
1019                 } \
1020                 type *in_row1 = in_rows[i_y1]; \
1021                 type *in_row2 = in_rows[i_y2]; \
1022                 type *out_row = out_rows[i + pkg->out_row1]; \
1023  \
1024                 for(int j = 0; j < out_w_int; j++) \
1025                 { \
1026                         int i_x1 = table_int_x1[j]; \
1027                         int i_x2 = table_int_x2[j]; \
1028                         if(use_float) \
1029                         { \
1030                                 float output1r, output1g, output1b, output1a; \
1031                                 float output2r, output2g, output2b, output2a; \
1032                                 float output3r, output3g, output3b, output3a; \
1033                                 float output4r, output4g, output4b, output4a; \
1034                                 float b_f; \
1035                                 float anti_b_f; \
1036                                 b_f = table_frac_x_f[j]; \
1037                                 anti_b_f = table_antifrac_x_f[j]; \
1038  \
1039                         output1r = in_row1[i_x1 * components]; \
1040                         output1g = in_row1[i_x1 * components + 1]; \
1041                         output1b = in_row1[i_x1 * components + 2]; \
1042                         if(components == 4) output1a = in_row1[i_x1 * components + 3]; \
1043  \
1044                         output2r = in_row1[i_x2 * components]; \
1045                         output2g = in_row1[i_x2 * components + 1]; \
1046                         output2b = in_row1[i_x2 * components + 2]; \
1047                         if(components == 4) output2a = in_row1[i_x2 * components + 3]; \
1048  \
1049                         output3r = in_row2[i_x1 * components]; \
1050                         output3g = in_row2[i_x1 * components + 1]; \
1051                         output3b = in_row2[i_x1 * components + 2]; \
1052                         if(components == 4) output3a = in_row2[i_x1 * components + 3]; \
1053 \
1054                         output4r = in_row2[i_x2 * components]; \
1055                         output4g = in_row2[i_x2 * components + 1]; \
1056                         output4b = in_row2[i_x2 * components + 2]; \
1057                         if(components == 4) output4a = in_row2[i_x2 * components + 3]; \
1058  \
1059                                 out_row[j * components] =  \
1060                                         (type)(anti_a_f * (anti_b_f * output1r +  \
1061                                         b_f * output2r) +  \
1062                         a_f * (anti_b_f * output3r +  \
1063                                         b_f * output4r)); \
1064                                 out_row[j * components + 1] =   \
1065                                         (type)(anti_a_f * (anti_b_f * output1g +  \
1066                                         b_f * output2g) +  \
1067                         a_f * ((anti_b_f * output3g) +  \
1068                                         b_f * output4g)); \
1069                                 out_row[j * components + 2] =   \
1070                                         (type)(anti_a_f * ((anti_b_f * output1b) +  \
1071                                         (b_f * output2b)) +  \
1072                         a_f * ((anti_b_f * output3b) +  \
1073                                         b_f * output4b)); \
1074                                 if(components == 4) \
1075                                         out_row[j * components + 3] =   \
1076                                                 (type)(anti_a_f * ((anti_b_f * output1a) +  \
1077                                                 (b_f * output2a)) +  \
1078                                 a_f * ((anti_b_f * output3a) +  \
1079                                                 b_f * output4a)); \
1080                         } \
1081                         else \
1082                         { \
1083                                 uint64_t output1r, output1g, output1b, output1a; \
1084                                 uint64_t output2r, output2g, output2b, output2a; \
1085                                 uint64_t output3r, output3g, output3b, output3a; \
1086                                 uint64_t output4r, output4g, output4b, output4a; \
1087                                 uint64_t b_i; \
1088                                 uint64_t anti_b_i; \
1089                                 b_i = table_frac_x_i[j]; \
1090                                 anti_b_i = table_antifrac_x_i[j]; \
1091  \
1092                         output1r = in_row1[i_x1 * components]; \
1093                         output1g = in_row1[i_x1 * components + 1]; \
1094                         output1b = in_row1[i_x1 * components + 2]; \
1095                         if(components == 4) output1a = in_row1[i_x1 * components + 3]; \
1096  \
1097                         output2r = in_row1[i_x2 * components]; \
1098                         output2g = in_row1[i_x2 * components + 1]; \
1099                         output2b = in_row1[i_x2 * components + 2]; \
1100                         if(components == 4) output2a = in_row1[i_x2 * components + 3]; \
1101  \
1102                         output3r = in_row2[i_x1 * components]; \
1103                         output3g = in_row2[i_x1 * components + 1]; \
1104                         output3b = in_row2[i_x1 * components + 2]; \
1105                         if(components == 4) output3a = in_row2[i_x1 * components + 3]; \
1106 \
1107                         output4r = in_row2[i_x2 * components]; \
1108                         output4g = in_row2[i_x2 * components + 1]; \
1109                         output4b = in_row2[i_x2 * components + 2]; \
1110                         if(components == 4) output4a = in_row2[i_x2 * components + 3]; \
1111  \
1112                                 out_row[j * components] =  \
1113                                         (type)((anti_a_i * (anti_b_i * output1r +  \
1114                                         b_i * output2r) +  \
1115                         a_i * (anti_b_i * output3r +  \
1116                                         b_i * output4r)) / 0xffffffff); \
1117                                 out_row[j * components + 1] =   \
1118                                         (type)((anti_a_i * (anti_b_i * output1g +  \
1119                                         b_i * output2g) +  \
1120                         a_i * (anti_b_i * output3g +  \
1121                                         b_i * output4g)) / 0xffffffff); \
1122                                 out_row[j * components + 2] =   \
1123                                         (type)((anti_a_i * (anti_b_i * output1b +  \
1124                                         b_i * output2b) +  \
1125                         a_i * (anti_b_i * output3b +  \
1126                                         b_i * output4b)) / 0xffffffff); \
1127                                 if(components == 4) \
1128                                         out_row[j * components + 3] =   \
1129                                                 (type)((anti_a_i * (anti_b_i * output1a +  \
1130                                                 b_i * output2a) +  \
1131                                 a_i * (anti_b_i * output3a +  \
1132                                                 b_i * output4a)) / 0xffffffff); \
1133                         } \
1134                 } \
1135         } \
1136  \
1137  \
1138         delete [] table_int_x1; \
1139         delete [] table_int_x2; \
1140         delete [] table_int_y1; \
1141         delete [] table_int_y2; \
1142         if(use_float) \
1143         { \
1144                 delete [] table_frac_x_f; \
1145                 delete [] table_antifrac_x_f; \
1146                 delete [] table_frac_y_f; \
1147                 delete [] table_antifrac_y_f; \
1148         } \
1149         else \
1150         { \
1151                 delete [] table_frac_x_i; \
1152                 delete [] table_antifrac_x_i; \
1153                 delete [] table_frac_y_i; \
1154                 delete [] table_antifrac_y_i; \
1155         } \
1156  \
1157 /*printf("BILINEAR_ENLARGE 2\n");*/ \
1158 }
1159
1160
1161 #define BICUBIC(max, type, components) \
1162 { \
1163         float k_y = 1.0 / scale_h; \
1164         float k_x = 1.0 / scale_w; \
1165         type **in_rows = (type**)input->get_rows(); \
1166         type **out_rows = (type**)output->get_rows(); \
1167         float *bspline_x_f, *bspline_y_f; \
1168         int *bspline_x_i, *bspline_y_i; \
1169         int *in_x_table, *in_y_table; \
1170         int in_h_int = input->get_h(); \
1171         int in_w_int = input->get_w(); \
1172  \
1173         if(use_float) \
1174         { \
1175                 tabulate_bcubic_f(bspline_x_f,  \
1176                         in_x_table, \
1177                         k_x, \
1178                         in_x1_int, \
1179                         out_w_int, \
1180                         in_w_int, \
1181                         -1); \
1182          \
1183                 tabulate_bcubic_f(bspline_y_f,  \
1184                         in_y_table, \
1185                         k_y, \
1186                         in_y1_int, \
1187                         out_h_int, \
1188                         in_h_int, \
1189                         1); \
1190         } \
1191         else \
1192         { \
1193                 tabulate_bcubic_i(bspline_x_i,  \
1194                         in_x_table, \
1195                         k_x, \
1196                         in_x1_int, \
1197                         out_w_int, \
1198                         in_w_int, \
1199                         -1); \
1200          \
1201                 tabulate_bcubic_i(bspline_y_i,  \
1202                         in_y_table, \
1203                         k_y, \
1204                         in_y1_int, \
1205                         out_h_int, \
1206                         in_h_int, \
1207                         1); \
1208         } \
1209  \
1210         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1211         { \
1212                 for(int j = 0; j < out_w_int; j++) \
1213                 { \
1214                         int i_x = (int)(k_x * j); \
1215                         float output1_f, output2_f, output3_f, output4_f; \
1216                         uint64_t output1_i, output2_i, output3_i, output4_i; \
1217                         if(use_float) \
1218                         { \
1219                                 output1_f = 0; \
1220                                 output2_f = 0; \
1221                                 output3_f = 0; \
1222                                 if(components == 4) \
1223                                         output4_f = 0; \
1224                         } \
1225                         else \
1226                         { \
1227                                 output1_i = 0; \
1228                                 output2_i = 0; \
1229                                 output3_i = 0; \
1230                                 if(components == 4) \
1231                                         output4_i = 0; \
1232                         } \
1233                         int table_y = i * 4; \
1234  \
1235 /* Kernel */ \
1236                         for(int m = -1; m < 3; m++) \
1237                         { \
1238                                 float r1_f; \
1239                                 uint64_t r1_i; \
1240                                 if(use_float) \
1241                                         r1_f = bspline_y_f[table_y]; \
1242                                 else \
1243                                         r1_i = bspline_y_i[table_y]; \
1244                                 int y = in_y_table[table_y]; \
1245                                 int table_x = j * 4; \
1246  \
1247                                 for(int n = -1; n < 3; n++) \
1248                                 { \
1249                                         float r2_f; \
1250                                         uint64_t r2_i; \
1251                                         if(use_float) \
1252                                                 r2_f = bspline_x_f[table_x]; \
1253                                         else \
1254                                                 r2_i = bspline_x_i[table_x]; \
1255                                         int x = in_x_table[table_x]; \
1256                                         float r_square_f; \
1257                                         uint64_t r_square_i; \
1258                                         if(use_float) \
1259                                         { \
1260                                                 r_square_f = r1_f * r2_f; \
1261                                                 output1_f += r_square_f * in_rows[y][x * components]; \
1262                                                 output2_f += r_square_f * in_rows[y][x * components + 1]; \
1263                                                 output3_f += r_square_f * in_rows[y][x * components + 2]; \
1264                                                 if(components == 4) \
1265                                                         output4_f += r_square_f * in_rows[y][x * components + 3]; \
1266                                         } \
1267                                         else \
1268                                         { \
1269                                                 r_square_i = r1_i * r2_i; \
1270                                                 output1_i += r_square_i * in_rows[y][x * components]; \
1271                                                 output2_i += r_square_i * in_rows[y][x * components + 1]; \
1272                                                 output3_i += r_square_i * in_rows[y][x * components + 2]; \
1273                                                 if(components == 4) \
1274                                                         output4_i += r_square_i * in_rows[y][x * components + 3]; \
1275                                         } \
1276  \
1277                                         table_x++; \
1278                                 } \
1279                                 table_y++; \
1280                         } \
1281  \
1282  \
1283                         if(use_float) \
1284                         { \
1285                                 out_rows[i][j * components] = (type)output1_f; \
1286                                 out_rows[i][j * components + 1] = (type)output2_f; \
1287                                 out_rows[i][j * components + 2] = (type)output3_f; \
1288                                 if(components == 4) \
1289                                         out_rows[i][j * components + 3] = (type)output4_f; \
1290                         } \
1291                         else \
1292                         { \
1293                                 out_rows[i][j * components] = (type)(output1_i / 0xffffffff); \
1294                                 out_rows[i][j * components + 1] = (type)(output2_i / 0xffffffff); \
1295                                 out_rows[i][j * components + 2] = (type)(output3_i / 0xffffffff); \
1296                                 if(components == 4) \
1297                                         out_rows[i][j * components + 3] = (type)(output4_i / 0xffffffff); \
1298                         } \
1299  \
1300                 } \
1301         } \
1302  \
1303         if(use_float) \
1304         { \
1305                 delete [] bspline_x_f; \
1306                 delete [] bspline_y_f; \
1307         } \
1308         else \
1309         { \
1310                 delete [] bspline_x_i; \
1311                 delete [] bspline_y_i; \
1312         } \
1313         delete [] in_x_table; \
1314         delete [] in_y_table; \
1315 }
1316
1317
1318
1319
1320 // Pow function is not thread safe in Compaqt C
1321 #define CUBE(x) ((x) * (x) * (x))
1322
1323 float ScaleUnit::cubic_bspline(float x)
1324 {
1325         float a, b, c, d;
1326
1327         if((x + 2.0F) <= 0.0F)
1328         {
1329         a = 0.0F;
1330         }
1331         else
1332         {
1333         a = CUBE(x + 2.0F);
1334         }
1335
1336
1337         if((x + 1.0F) <= 0.0F)
1338         {
1339         b = 0.0F;
1340         }
1341         else
1342         {
1343         b = CUBE(x + 1.0F);
1344         }
1345
1346         if(x <= 0)
1347         {
1348         c = 0.0F;
1349         }
1350         else
1351         {
1352         c = CUBE(x);
1353         }
1354
1355         if((x - 1.0F) <= 0.0F)
1356         {
1357         d = 0.0F;
1358         }
1359         else
1360         {
1361         d = CUBE(x - 1.0F);
1362         }
1363
1364
1365         return (a - (4.0F * b) + (6.0F * c) - (4.0F * d)) / 6.0;
1366 }
1367
1368
1369 void ScaleUnit::tabulate_bcubic_f(float* &coef_table,
1370         int* &coord_table,
1371         float scale,
1372         int start,
1373         int pixels,
1374         int total_pixels,
1375         float coefficient)
1376 {
1377         coef_table = new float[pixels * 4];
1378         coord_table = new int[pixels * 4];
1379         for(int i = 0, j = 0; i < pixels; i++)
1380         {
1381                 float f_x = (float)i * scale;
1382                 float a = f_x - floor(f_x);
1383
1384                 for(float m = -1; m < 3; m++)
1385                 {
1386                         coef_table[j] = cubic_bspline(coefficient * (m - a));
1387                         coord_table[j] = (int)(start + (int)f_x + m);
1388                         CLAMP(coord_table[j], 0, total_pixels - 1);
1389                         j++;
1390                 }
1391
1392         }
1393 }
1394
1395 void ScaleUnit::tabulate_bcubic_i(int* &coef_table,
1396         int* &coord_table,
1397         float scale,
1398         int start,
1399         int pixels,
1400         int total_pixels,
1401         float coefficient)
1402 {
1403         coef_table = new int[pixels * 4];
1404         coord_table = new int[pixels * 4];
1405         for(int i = 0, j = 0; i < pixels; i++)
1406         {
1407                 float f_x = (float)i * scale;
1408                 float a = f_x - floor(f_x);
1409
1410                 for(float m = -1; m < 3; m++)
1411                 {
1412                         coef_table[j] = (int)(cubic_bspline(coefficient * (m - a)) * 0x10000);
1413                         coord_table[j] = (int)(start + (int)f_x + m);
1414                         CLAMP(coord_table[j], 0, total_pixels - 1);
1415                         j++;
1416                 }
1417
1418         }
1419 }
1420
1421 void ScaleUnit::tabulate_blinear_f(int* &table_int1,
1422                 int* &table_int2,
1423                 float* &table_frac,
1424                 float* &table_antifrac,
1425                 float scale,
1426                 int pixel1,
1427                 int pixel2,
1428                 int start,
1429                 int total_pixels)
1430 {
1431         table_int1 = new int[pixel2 - pixel1];
1432         table_int2 = new int[pixel2 - pixel1];
1433         table_frac = new float[pixel2 - pixel1];
1434         table_antifrac = new float[pixel2 - pixel1];
1435
1436         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1437         {
1438                 float f_x = (float)i * scale;
1439                 int i_x = (int)floor(f_x);
1440                 float a = (f_x - floor(f_x));
1441
1442                 table_int1[j] = i_x + start;
1443                 table_int2[j] = i_x + start + 1;
1444                 CLAMP(table_int1[j], 0, total_pixels - 1);
1445                 CLAMP(table_int2[j], 0, total_pixels - 1);
1446                 table_frac[j] = a;
1447                 table_antifrac[j] = 1.0F - a;
1448 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1449         }
1450 }
1451
1452 void ScaleUnit::tabulate_blinear_i(int* &table_int1,
1453                 int* &table_int2,
1454                 int* &table_frac,
1455                 int* &table_antifrac,
1456                 float scale,
1457                 int pixel1,
1458                 int pixel2,
1459                 int start,
1460                 int total_pixels)
1461 {
1462         table_int1 = new int[pixel2 - pixel1];
1463         table_int2 = new int[pixel2 - pixel1];
1464         table_frac = new int[pixel2 - pixel1];
1465         table_antifrac = new int[pixel2 - pixel1];
1466
1467         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
1468         {
1469                 double f_x = (float)i * scale;
1470                 int i_x = (int)floor(f_x);
1471                 float a = (f_x - floor(f_x));
1472
1473                 table_int1[j] = i_x + start;
1474                 table_int2[j] = i_x + start + 1;
1475                 CLAMP(table_int1[j], 0, total_pixels - 1);
1476                 CLAMP(table_int2[j], 0, total_pixels - 1);
1477                 table_frac[j] = (int)(a * 0xffff);
1478                 table_antifrac[j] = (int)((1.0F - a) * 0x10000);
1479 //printf("ScaleUnit::tabulate_blinear %d %d %d\n", j, table_int1[j], table_int2[j]);
1480         }
1481 }
1482
1483 void ScaleUnit::process_package(LoadPackage *package)
1484 {
1485         ScalePackage *pkg = (ScalePackage*)package;
1486
1487 //printf("ScaleUnit::process_package 1\n");
1488 // Arguments for macros
1489         VFrame *output = engine->scale_output;
1490         VFrame *input = engine->scale_input;
1491         float scale_w = engine->w_scale;
1492         float scale_h = engine->h_scale;
1493         int in_x1_int = engine->in_x1_int;
1494         int in_y1_int = engine->in_y1_int;
1495         int out_h_int = engine->out_h_int;
1496         int out_w_int = engine->out_w_int;
1497         int do_yuv =
1498                 (input->get_color_model() == BC_YUV888 ||
1499                 input->get_color_model() == BC_YUVA8888 ||
1500                 input->get_color_model() == BC_YUV161616 ||
1501                 input->get_color_model() == BC_YUVA16161616);
1502
1503 //printf("ScaleUnit::process_package 2 %f %f\n", engine->w_scale, engine->h_scale);
1504         if(engine->interpolation_type == CUBIC_CUBIC ||
1505                 (engine->interpolation_type == CUBIC_LINEAR
1506                         && engine->w_scale > 1 &&
1507                         engine->h_scale > 1))
1508         {
1509
1510                 switch(engine->scale_input->get_color_model())
1511                 {
1512                         case BC_RGB888:
1513                         case BC_YUV888:
1514                                 BICUBIC(0xff, unsigned char, 3);
1515                                 break;
1516
1517                         case BC_RGBA8888:
1518                         case BC_YUVA8888:
1519                                 BICUBIC(0xff, unsigned char, 4);
1520                                 break;
1521
1522                         case BC_RGB161616:
1523                         case BC_YUV161616:
1524                                 BICUBIC(0xffff, uint16_t, 3);
1525                                 break;
1526
1527                         case BC_RGBA16161616:
1528                         case BC_YUVA16161616:
1529                                 BICUBIC(0xffff, uint16_t, 4);
1530                                 break;
1531                 }
1532         }
1533         else
1534         if(engine->w_scale > 1 &&
1535                 engine->h_scale > 1)
1536 //if(0)
1537 // Perform bilinear scaling input -> scale_output
1538         {
1539                 switch(engine->scale_input->get_color_model())
1540                 {
1541                         case BC_RGB888:
1542                         case BC_YUV888:
1543                                 BILINEAR_ENLARGE(0xff, unsigned char, 3);
1544                                 break;
1545
1546                         case BC_RGBA8888:
1547                         case BC_YUVA8888:
1548                                 BILINEAR_ENLARGE(0xff, unsigned char, 4);
1549                                 break;
1550
1551                         case BC_RGB161616:
1552                         case BC_YUV161616:
1553                                 BILINEAR_ENLARGE(0xffff, uint16_t, 3);
1554                                 break;
1555
1556                         case BC_RGBA16161616:
1557                         case BC_YUVA16161616:
1558                                 BILINEAR_ENLARGE(0xffff, uint16_t, 4);
1559                                 break;
1560                 }
1561         }
1562         else
1563         {
1564                 switch(engine->scale_input->get_color_model())
1565                 {
1566                         case BC_RGB888:
1567                         case BC_YUV888:
1568                                 BILINEAR_REDUCE(0xff, unsigned char, 3);
1569                                 break;
1570
1571                         case BC_RGBA8888:
1572                         case BC_YUVA8888:
1573                                 BILINEAR_REDUCE(0xff, unsigned char, 4);
1574                                 break;
1575
1576                         case BC_RGB161616:
1577                         case BC_YUV161616:
1578                                 BILINEAR_REDUCE(0xffff, uint16_t, 3);
1579                                 break;
1580
1581                         case BC_RGBA16161616:
1582                         case BC_YUVA16161616:
1583                                 BILINEAR_REDUCE(0xffff, uint16_t, 4);
1584                                 break;
1585                 }
1586         }
1587 //printf("ScaleUnit::process_package 3\n");
1588
1589 }
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603 ScaleEngine::ScaleEngine(OverlayFrame *overlay, int cpus)
1604  : LoadServer(cpus, cpus)
1605 {
1606         this->overlay = overlay;
1607 }
1608
1609 ScaleEngine::~ScaleEngine()
1610 {
1611 }
1612
1613 void ScaleEngine::init_packages()
1614 {
1615         for(int i = 0; i < total_packages; i++)
1616         {
1617                 ScalePackage *package = (ScalePackage*)packages[i];
1618                 package->out_row1 = out_h_int / total_packages * i;
1619                 package->out_row2 = package->out_row1 + out_h_int / total_packages;
1620
1621                 if(i >= total_packages - 1)
1622                         package->out_row2 = out_h_int;
1623         }
1624 }
1625
1626 LoadClient* ScaleEngine::new_client()
1627 {
1628         return new ScaleUnit(this, overlay);
1629 }
1630
1631 LoadPackage* ScaleEngine::new_package()
1632 {
1633         return new ScalePackage;
1634 }
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648 TranslatePackage::TranslatePackage()
1649 {
1650 }
1651
1652
1653
1654 TranslateUnit::TranslateUnit(TranslateEngine *server, OverlayFrame *overlay)
1655  : LoadClient(server)
1656 {
1657         this->overlay = overlay;
1658         this->engine = server;
1659 }
1660
1661 TranslateUnit::~TranslateUnit()
1662 {
1663 }
1664
1665
1666
1667 void TranslateUnit::translation_array_f(transfer_table_f* &table,
1668         float out_x1,
1669         float out_x2,
1670         float in_x1,
1671         float in_x2,
1672         int in_total,
1673         int out_total,
1674         int &out_x1_int,
1675         int &out_x2_int)
1676 {
1677         int out_w_int;
1678         float offset = out_x1 - in_x1;
1679 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1680
1681         out_x1_int = (int)out_x1;
1682         out_x2_int = MIN((int)ceil(out_x2), out_total);
1683         out_w_int = out_x2_int - out_x1_int;
1684
1685         table = new transfer_table_f[out_w_int];
1686         bzero(table, sizeof(transfer_table_f) * out_w_int);
1687
1688
1689 //printf("OverlayFrame::translation_array_f 2 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1690
1691         float in_x = in_x1;
1692         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1693         {
1694                 transfer_table_f *entry = &table[out_x - out_x1_int];
1695
1696                 entry->in_x1 = (int)in_x;
1697                 entry->in_x2 = (int)in_x + 1;
1698
1699 // Get fraction of output pixel to fill
1700                 entry->output_fraction = 1;
1701
1702                 if(out_x1 > out_x)
1703                 {
1704                         entry->output_fraction -= out_x1 - out_x;
1705                 }
1706
1707                 if(out_x2 < out_x + 1)
1708                 {
1709                         entry->output_fraction = (out_x2 - out_x);
1710                 }
1711
1712 // Advance in_x until out_x_fraction is filled
1713                 float out_x_fraction = entry->output_fraction;
1714                 float in_x_fraction = floor(in_x + 1) - in_x;
1715
1716                 if(out_x_fraction <= in_x_fraction)
1717                 {
1718                         entry->in_fraction1 = out_x_fraction;
1719                         entry->in_fraction2 = 0.0;
1720                         in_x += out_x_fraction;
1721                 }
1722                 else
1723                 {
1724                         entry->in_fraction1 = in_x_fraction;
1725                         in_x += out_x_fraction;
1726                         entry->in_fraction2 = in_x - floor(in_x);
1727                 }
1728
1729 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1730                 if(entry->in_x2 >= in_total)
1731                 {
1732                         entry->in_x2 = in_total - 1;
1733                         entry->in_fraction2 = 0.0;
1734                 }
1735
1736                 if(entry->in_x1 >= in_total)
1737                 {
1738                         entry->in_x1 = in_total - 1;
1739                         entry->in_fraction1 = 0.0;
1740                 }
1741 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n",
1742 //      out_x,
1743 //      entry->in_x1,
1744 //      entry->in_x2,
1745 //      entry->in_fraction1,
1746 //      entry->in_fraction2,
1747 //      entry->output_fraction);
1748         }
1749 }
1750
1751
1752 void TranslateUnit::translation_array_i(transfer_table_i* &table,
1753         float out_x1,
1754         float out_x2,
1755         float in_x1,
1756         float in_x2,
1757         int in_total,
1758         int out_total,
1759         int &out_x1_int,
1760         int &out_x2_int)
1761 {
1762         int out_w_int;
1763         float offset = out_x1 - in_x1;
1764
1765         out_x1_int = (int)out_x1;
1766         out_x2_int = MIN((int)ceil(out_x2), out_total);
1767         out_w_int = out_x2_int - out_x1_int;
1768
1769         table = new transfer_table_i[out_w_int];
1770         bzero(table, sizeof(transfer_table_i) * out_w_int);
1771
1772
1773 //printf("OverlayFrame::translation_array_f 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1774
1775         float in_x = in_x1;
1776         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1777         {
1778                 transfer_table_i *entry = &table[out_x - out_x1_int];
1779
1780                 entry->in_x1 = (int)in_x;
1781                 entry->in_x2 = (int)in_x + 1;
1782
1783 // Get fraction of output pixel to fill
1784                 entry->output_fraction = 0x10000;
1785
1786                 if(out_x1 > out_x)
1787                 {
1788                         entry->output_fraction -= (int)((out_x1 - out_x) * 0x10000);
1789                 }
1790
1791                 if(out_x2 < out_x + 1)
1792                 {
1793                         entry->output_fraction = (int)((out_x2 - out_x) * 0x10000);
1794                 }
1795
1796 // Advance in_x until out_x_fraction is filled
1797                 int out_x_fraction = entry->output_fraction;
1798                 int in_x_fraction = (int)((floor(in_x + 1) - in_x) * 0x10000);
1799
1800                 if(out_x_fraction <= in_x_fraction)
1801                 {
1802                         entry->in_fraction1 = out_x_fraction;
1803                         entry->in_fraction2 = 0;
1804                         in_x += (float)out_x_fraction / 0x10000;
1805                 }
1806                 else
1807                 {
1808                         entry->in_fraction1 = in_x_fraction;
1809                         in_x += (float)out_x_fraction / 0x10000;
1810                         entry->in_fraction2 = (int)((in_x - floor(in_x)) * 0x10000);
1811                 }
1812
1813 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1814                 if(entry->in_x2 >= in_total)
1815                 {
1816                         entry->in_x2 = in_total - 1;
1817                         entry->in_fraction2 = 0;
1818                 }
1819
1820                 if(entry->in_x1 >= in_total)
1821                 {
1822                         entry->in_x1 = in_total - 1;
1823                         entry->in_fraction1 = 0;
1824                 }
1825 // printf("OverlayFrame::translation_array_f 2 %d %d %d %f %f %f\n",
1826 //      out_x,
1827 //      entry->in_x1,
1828 //      entry->in_x2,
1829 //      entry->in_fraction1,
1830 //      entry->in_fraction2,
1831 //      entry->output_fraction);
1832         }
1833 }
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868 #define TRANSLATE(max, temp_type, type, components, chroma_offset) \
1869 { \
1870  \
1871         type **in_rows = (type**)input->get_rows(); \
1872         type **out_rows = (type**)output->get_rows(); \
1873  \
1874 /* printf("OverlayFrame::translate 1  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",  */ \
1875 /*      (in_x1),  in_y1,  in_x2,  in_y2,  out_x1,  out_y1, out_x2,  out_y2); */ \
1876  \
1877         temp_type master_opacity = (temp_type)(alpha * max + 0.5); \
1878         temp_type master_transparency = max - master_opacity; \
1879  \
1880 /* printf("TRANSLATE %d\n", mode); */ \
1881  \
1882         for(int i = row1; i < row2; i++) \
1883         { \
1884                 int in_y1; \
1885                 int in_y2; \
1886                 float y_fraction1_f; \
1887                 float y_fraction2_f; \
1888                 float y_output_fraction_f; \
1889                 uint64_t y_fraction1_i; \
1890                 uint64_t y_fraction2_i; \
1891                 uint64_t y_output_fraction_i; \
1892                 if(use_float) \
1893                 { \
1894                         in_y1 = y_table_f[i - out_y1_int].in_x1; \
1895                         in_y2 = y_table_f[i - out_y1_int].in_x2; \
1896                         y_fraction1_f = y_table_f[i - out_y1_int].in_fraction1; \
1897                         y_fraction2_f = y_table_f[i - out_y1_int].in_fraction2; \
1898                         y_output_fraction_f = y_table_f[i - out_y1_int].output_fraction; \
1899                 } \
1900                 else \
1901                 { \
1902                         in_y1 = y_table_i[i - out_y1_int].in_x1; \
1903                         in_y2 = y_table_i[i - out_y1_int].in_x2; \
1904                         y_fraction1_i = y_table_i[i - out_y1_int].in_fraction1; \
1905                         y_fraction2_i = y_table_i[i - out_y1_int].in_fraction2; \
1906                         y_output_fraction_i = y_table_i[i - out_y1_int].output_fraction; \
1907                 } \
1908                 type *in_row1 = in_rows[(in_y1)]; \
1909                 type *in_row2 = in_rows[(in_y2)]; \
1910                 type *out_row = out_rows[i]; \
1911  \
1912                 for(int j = out_x1_int; j < out_x2_int; j++) \
1913                 { \
1914                         int in_x1; \
1915                         int in_x2; \
1916                         float x_fraction1_f; \
1917                         float x_fraction2_f; \
1918                         float x_output_fraction_f; \
1919                         uint64_t x_fraction1_i; \
1920                         uint64_t x_fraction2_i; \
1921                         uint64_t x_output_fraction_i; \
1922                         if(use_float) \
1923                         { \
1924                                 in_x1 = x_table_f[j - out_x1_int].in_x1; \
1925                                 in_x2 = x_table_f[j - out_x1_int].in_x2; \
1926                                 x_fraction1_f = x_table_f[j - out_x1_int].in_fraction1; \
1927                                 x_fraction2_f = x_table_f[j - out_x1_int].in_fraction2; \
1928                                 x_output_fraction_f = x_table_f[j - out_x1_int].output_fraction; \
1929                         } \
1930                         else \
1931                         { \
1932                                 in_x1 = x_table_i[j - out_x1_int].in_x1; \
1933                                 in_x2 = x_table_i[j - out_x1_int].in_x2; \
1934                                 x_fraction1_i = x_table_i[j - out_x1_int].in_fraction1; \
1935                                 x_fraction2_i = x_table_i[j - out_x1_int].in_fraction2; \
1936                                 x_output_fraction_i = x_table_i[j - out_x1_int].output_fraction; \
1937                         } \
1938                         type *output = &out_row[j * components]; \
1939                         type input1, input2, input3, input4; \
1940  \
1941                         if(use_float) \
1942                         { \
1943                                 float fraction1 = x_fraction1_f * y_fraction1_f; \
1944                                 float fraction2 = x_fraction2_f * y_fraction1_f; \
1945                                 float fraction3 = x_fraction1_f * y_fraction2_f; \
1946                                 float fraction4 = x_fraction2_f * y_fraction2_f; \
1947          \
1948                                 input1 = (type)(in_row1[in_x1 * components] * fraction1 +  \
1949                                         in_row1[in_x2 * components] * fraction2 +  \
1950                                         in_row2[in_x1 * components] * fraction3 +  \
1951                                         in_row2[in_x2 * components] * fraction4 + 0.5); \
1952          \
1953 /* Add chroma to fractional pixels */ \
1954                                 if(chroma_offset) \
1955                                 { \
1956                                         float extra_chroma = (1.0F - \
1957                                                 fraction1 - \
1958                                                 fraction2 - \
1959                                                 fraction3 - \
1960                                                 fraction4) * chroma_offset; \
1961                                         input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 +  \
1962                                                 in_row1[in_x2 * components + 1] * fraction2 +  \
1963                                                 in_row2[in_x1 * components + 1] * fraction3 +  \
1964                                                 in_row2[in_x2 * components + 1] * fraction4 + \
1965                                                 extra_chroma + 0.5); \
1966                                         input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 +  \
1967                                                 in_row1[in_x2 * components + 2] * fraction2 +  \
1968                                                 in_row2[in_x1 * components + 2] * fraction3 +  \
1969                                                 in_row2[in_x2 * components + 2] * fraction4 +  \
1970                                                 extra_chroma + 0.5); \
1971                                 } \
1972                                 else \
1973                                 { \
1974                                         input2 = (type)(in_row1[in_x1 * components + 1] * fraction1 +  \
1975                                                 in_row1[in_x2 * components + 1] * fraction2 +  \
1976                                                 in_row2[in_x1 * components + 1] * fraction3 +  \
1977                                                 in_row2[in_x2 * components + 1] * fraction4 + 0.5); \
1978                                         input3 = (type)(in_row1[in_x1 * components + 2] * fraction1 +  \
1979                                                 in_row1[in_x2 * components + 2] * fraction2 +  \
1980                                                 in_row2[in_x1 * components + 2] * fraction3 +  \
1981                                                 in_row2[in_x2 * components + 2] * fraction4 + 0.5); \
1982                                 } \
1983          \
1984                                 if(components == 4) \
1985                                         input4 = (type)(in_row1[in_x1 * components + 3] * fraction1 +  \
1986                                                 in_row1[in_x2 * components + 3] * fraction2 +  \
1987                                                 in_row2[in_x1 * components + 3] * fraction3 +  \
1988                                                 in_row2[in_x2 * components + 3] * fraction4 + 0.5); \
1989                         } \
1990                         else \
1991                         { \
1992                                 uint64_t fraction1 = x_fraction1_i * y_fraction1_i; \
1993                                 uint64_t fraction2 = x_fraction2_i * y_fraction1_i; \
1994                                 uint64_t fraction3 = x_fraction1_i * y_fraction2_i; \
1995                                 uint64_t fraction4 = x_fraction2_i * y_fraction2_i; \
1996          \
1997                                 input1 = (type)((in_row1[in_x1 * components] * fraction1 +  \
1998                                         in_row1[in_x2 * components] * fraction2 +  \
1999                                         in_row2[in_x1 * components] * fraction3 +  \
2000                                         in_row2[in_x2 * components] * fraction4) / 0xffffffff); \
2001          \
2002 /* Add chroma to fractional pixels */ \
2003                                 if(chroma_offset) \
2004                                 { \
2005                                         uint64_t extra_chroma = (0xffffffff - \
2006                                                 fraction1 - \
2007                                                 fraction2 - \
2008                                                 fraction3 - \
2009                                                 fraction4) * \
2010                                                 chroma_offset; \
2011                                         input2 = (type)((in_row1[in_x1 * components + 1] * fraction1 +  \
2012                                                 in_row1[in_x2 * components + 1] * fraction2 +  \
2013                                                 in_row2[in_x1 * components + 1] * fraction3 +  \
2014                                                 in_row2[in_x2 * components + 1] * fraction4 + \
2015                                                 extra_chroma) / 0xffffffff); \
2016                                         input3 = (type)((in_row1[in_x1 * components + 2] * fraction1 +  \
2017                                                 in_row1[in_x2 * components + 2] * fraction2 +  \
2018                                                 in_row2[in_x1 * components + 2] * fraction3 +  \
2019                                                 in_row2[in_x2 * components + 2] * fraction4 +  \
2020                                                 extra_chroma) / 0xffffffff); \
2021                                 } \
2022                                 else \
2023                                 { \
2024                                         input2 = (type)((in_row1[in_x1 * components + 1] * fraction1 +  \
2025                                                 in_row1[in_x2 * components + 1] * fraction2 +  \
2026                                                 in_row2[in_x1 * components + 1] * fraction3 +  \
2027                                                 in_row2[in_x2 * components + 1] * fraction4) / 0xffffffff); \
2028                                         input3 = (type)((in_row1[in_x1 * components + 2] * fraction1 +  \
2029                                                 in_row1[in_x2 * components + 2] * fraction2 +  \
2030                                                 in_row2[in_x1 * components + 2] * fraction3 +  \
2031                                                 in_row2[in_x2 * components + 2] * fraction4) / 0xffffffff); \
2032                                 } \
2033          \
2034                                 if(components == 4) \
2035                                         input4 = (type)((in_row1[in_x1 * components + 3] * fraction1 +  \
2036                                                 in_row1[in_x2 * components + 3] * fraction2 +  \
2037                                                 in_row2[in_x1 * components + 3] * fraction3 +  \
2038                                                 in_row2[in_x2 * components + 3] * fraction4) / 0xffffffff); \
2039                         } \
2040  \
2041                         temp_type opacity; \
2042                         if(use_float) \
2043                                 opacity = (temp_type)(master_opacity *  \
2044                                         y_output_fraction_f *  \
2045                                         x_output_fraction_f + 0.5); \
2046                         else \
2047                                 opacity = (temp_type)((int64_t)master_opacity *  \
2048                                         y_output_fraction_i *  \
2049                                         x_output_fraction_i / \
2050                                         0xffffffff); \
2051                         temp_type transparency = max - opacity; \
2052  \
2053 /* printf("TRANSLATE 2 %x %d %d\n", opacity, j, i); */ \
2054  \
2055                         if(components == 3) \
2056                         { \
2057                                 BLEND_3(max, temp_type, type, chroma_offset); \
2058                         } \
2059                         else \
2060                         { \
2061                                 BLEND_4(max, temp_type, type, chroma_offset); \
2062                         } \
2063                 } \
2064         } \
2065 }
2066
2067 void TranslateUnit::process_package(LoadPackage *package)
2068 {
2069         TranslatePackage *pkg = (TranslatePackage*)package;
2070         int out_y1_int;
2071         int out_y2_int;
2072         int out_x1_int;
2073         int out_x2_int;
2074
2075
2076 // Variables for TRANSLATE
2077         VFrame *input = engine->translate_input;
2078         VFrame *output = engine->translate_output;
2079         float in_x1 = engine->translate_in_x1;
2080         float in_y1 = engine->translate_in_y1;
2081         float in_x2 = engine->translate_in_x2;
2082         float in_y2 = engine->translate_in_y2;
2083         float out_x1 = engine->translate_out_x1;
2084         float out_y1 = engine->translate_out_y1;
2085         float out_x2 = engine->translate_out_x2;
2086         float out_y2 = engine->translate_out_y2;
2087         float alpha = engine->translate_alpha;
2088         int row1 = pkg->out_row1;
2089         int row2 = pkg->out_row2;
2090         int mode = engine->translate_mode;
2091         int in_total_x = input->get_w();
2092         int in_total_y = input->get_h();
2093         int do_yuv =
2094                 (engine->translate_input->get_color_model() == BC_YUV888 ||
2095                 engine->translate_input->get_color_model() == BC_YUVA8888 ||
2096                 engine->translate_input->get_color_model() == BC_YUV161616 ||
2097                 engine->translate_input->get_color_model() == BC_YUVA16161616);
2098
2099         transfer_table_f *x_table_f;
2100         transfer_table_f *y_table_f;
2101         transfer_table_i *x_table_i;
2102         transfer_table_i *y_table_i;
2103
2104         if(use_float)
2105         {
2106                 translation_array_f(x_table_f,
2107                         out_x1,
2108                         out_x2,
2109                         in_x1,
2110                         in_x2,
2111                         in_total_x,
2112                         output->get_w(),
2113                         out_x1_int,
2114                         out_x2_int);
2115                 translation_array_f(y_table_f,
2116                         out_y1,
2117                         out_y2,
2118                         in_y1,
2119                         in_y2,
2120                         in_total_y,
2121                         output->get_h(),
2122                         out_y1_int,
2123                         out_y2_int);
2124         }
2125         else
2126         {
2127                 translation_array_i(x_table_i,
2128                         out_x1,
2129                         out_x2,
2130                         in_x1,
2131                         in_x2,
2132                         in_total_x,
2133                         output->get_w(),
2134                         out_x1_int,
2135                         out_x2_int);
2136                 translation_array_i(y_table_i,
2137                         out_y1,
2138                         out_y2,
2139                         in_y1,
2140                         in_y2,
2141                         in_total_y,
2142                         output->get_h(),
2143                         out_y1_int,
2144                         out_y2_int);
2145         }
2146 //      printf("TranslateUnit::process_package 1 %d\n", mode);
2147 //      Timer a;
2148 //      a.update();
2149
2150         switch(engine->translate_input->get_color_model())
2151         {
2152                 case BC_RGB888:
2153                         TRANSLATE(0xff, uint32_t, unsigned char, 3, 0);
2154                         break;
2155
2156                 case BC_RGBA8888:
2157                         TRANSLATE(0xff, uint32_t, unsigned char, 4, 0);
2158                         break;
2159
2160                 case BC_RGB161616:
2161                         TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2162                         break;
2163
2164                 case BC_RGBA16161616:
2165                         TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2166                         break;
2167
2168                 case BC_YUV888:
2169                         TRANSLATE(0xff, int32_t, unsigned char, 3, 0x80);
2170                         break;
2171
2172                 case BC_YUVA8888:
2173                         TRANSLATE(0xff, int32_t, unsigned char, 4, 0x80);
2174                         break;
2175
2176                 case BC_YUV161616:
2177                         TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2178                         break;
2179
2180                 case BC_YUVA16161616:
2181                         TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2182                         break;
2183         }
2184 //      printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2185
2186         if(use_float)
2187         {
2188                 delete [] x_table_f;
2189                 delete [] y_table_f;
2190         }
2191         else
2192         {
2193                 delete [] x_table_i;
2194                 delete [] y_table_i;
2195         }
2196 }
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207 TranslateEngine::TranslateEngine(OverlayFrame *overlay, int cpus)
2208  : LoadServer(cpus, cpus)
2209 {
2210         this->overlay = overlay;
2211 }
2212
2213 TranslateEngine::~TranslateEngine()
2214 {
2215 }
2216
2217 void TranslateEngine::init_packages()
2218 {
2219         int out_y1_int = (int)translate_out_y1;
2220         int out_y2_int = MIN((int)ceil(translate_out_y2), translate_output->get_h());
2221         int out_h = out_y2_int - out_y1_int;
2222
2223         for(int i = 0; i < total_packages; i++)
2224         {
2225                 TranslatePackage *package = (TranslatePackage*)packages[i];
2226                 package->out_row1 = (int)(out_y1_int + out_h /
2227                         total_packages *
2228                         i);
2229                 package->out_row2 = (int)((float)package->out_row1 +
2230                         out_h /
2231                         total_packages);
2232                 if(i >= total_packages - 1)
2233                         package->out_row2 = out_y2_int;
2234         }
2235 }
2236
2237 LoadClient* TranslateEngine::new_client()
2238 {
2239         return new TranslateUnit(this, overlay);
2240 }
2241
2242 LoadPackage* TranslateEngine::new_package()
2243 {
2244         return new TranslatePackage;
2245 }
2246
2247
2248
2249
2250
2251
2252
2253
2254 #define SCALE_TRANSLATE(max, temp_type, type, components, chroma_offset) \
2255 { \
2256         temp_type opacity = (temp_type)(alpha * max + 0.5); \
2257         temp_type transparency = max - opacity; \
2258         int out_w = out_x2 - out_x1; \
2259  \
2260         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2261         { \
2262                 int in_y = y_table[i - out_y1]; \
2263                 type *in_row = (type*)in_rows[in_y] + in_x1 * components; \
2264                 type *output = (type*)out_rows[i] + out_x1 * components; \
2265  \
2266 /* X direction is scaled and requires a table lookup */ \
2267                 if(out_w != in_x2 - in_x1) \
2268                 { \
2269                         for(int j = 0; j < out_w; j++) \
2270                         { \
2271                                 type *in_row_plus_x = in_row + x_table[j] * components; \
2272                                 temp_type input1, input2, input3, input4; \
2273          \
2274                                 input1 = in_row_plus_x[0]; \
2275                                 input2 = in_row_plus_x[1]; \
2276                                 input3 = in_row_plus_x[2]; \
2277                                 if(components == 4) \
2278                                         input4 = in_row_plus_x[3]; \
2279          \
2280                                 if(components == 3) \
2281                                 { \
2282                                         BLEND_3(max, temp_type, type, chroma_offset); \
2283                                 } \
2284                                 else \
2285                                 { \
2286                                         BLEND_4(max, temp_type, type, chroma_offset); \
2287                                 } \
2288                                 output += components; \
2289                         } \
2290                 } \
2291                 else \
2292 /* X direction is not scaled */ \
2293                 { \
2294                         for(int j = 0; j < out_w; j++) \
2295                         { \
2296                                 temp_type input1, input2, input3, input4; \
2297          \
2298                                 input1 = in_row[0]; \
2299                                 input2 = in_row[1]; \
2300                                 input3 = in_row[2]; \
2301                                 if(components == 4) \
2302                                         input4 = in_row[3]; \
2303          \
2304                                 if(components == 3) \
2305                                 { \
2306                                         BLEND_3(max, temp_type, type, chroma_offset); \
2307                                 } \
2308                                 else \
2309                                 { \
2310                                         BLEND_4(max, temp_type, type, chroma_offset); \
2311                                 } \
2312                                 in_row += components; \
2313                                 output += components; \
2314                         } \
2315                 } \
2316         } \
2317 }
2318
2319
2320
2321 ScaleTranslateUnit::ScaleTranslateUnit(ScaleTranslateEngine *server, OverlayFrame *overlay)
2322  : LoadClient(server)
2323 {
2324         this->overlay = overlay;
2325         this->scale_translate = server;
2326 }
2327
2328 ScaleTranslateUnit::~ScaleTranslateUnit()
2329 {
2330 }
2331
2332 void ScaleTranslateUnit::scale_array(int* &table,
2333         int out_x1,
2334         int out_x2,
2335         int in_x1,
2336         int in_x2,
2337         int is_x)
2338 {
2339         float scale = (float)(out_x2 - out_x1) / (in_x2 - in_x1);
2340
2341         table = new int[out_x2 - out_x1];
2342
2343         if(!is_x)
2344         {
2345                 for(int i = 0; i < out_x2 - out_x1; i++)
2346                 {
2347                         table[i] = (int)((float)i / scale + in_x1);
2348                 }
2349         }
2350         else
2351         {
2352                 for(int i = 0; i < out_x2 - out_x1; i++)
2353                 {
2354                         table[i] = (int)((float)i / scale);
2355                 }
2356         }
2357 }
2358
2359
2360 void ScaleTranslateUnit::process_package(LoadPackage *package)
2361 {
2362         ScaleTranslatePackage *pkg = (ScaleTranslatePackage*)package;
2363
2364 // Args for NEAREST_NEIGHBOR_MACRO
2365         VFrame *output = scale_translate->output;
2366         VFrame *input = scale_translate->input;
2367         int in_x1 = scale_translate->in_x1;
2368         int in_y1 = scale_translate->in_y1;
2369         int in_x2 = scale_translate->in_x2;
2370         int in_y2 = scale_translate->in_y2;
2371         int out_x1 = scale_translate->out_x1;
2372         int out_y1 = scale_translate->out_y1;
2373         int out_x2 = scale_translate->out_x2;
2374         int out_y2 = scale_translate->out_y2;
2375         float alpha = scale_translate->alpha;
2376         int mode = scale_translate->mode;
2377
2378         int *x_table;
2379         int *y_table;
2380         unsigned char **in_rows = input->get_rows();
2381         unsigned char **out_rows = output->get_rows();
2382
2383 //      Timer a;
2384 //      a.update();
2385 //printf("ScaleTranslateUnit::process_package 1 %d\n", mode);
2386         if(out_x2 - out_x1 != in_x2 - in_x1)
2387         {
2388                 scale_array(x_table,
2389                         out_x1,
2390                         out_x2,
2391                         in_x1,
2392                         in_x2,
2393                         1);
2394         }
2395         scale_array(y_table,
2396                 out_y1,
2397                 out_y2,
2398                 in_y1,
2399                 in_y2,
2400                 0);
2401
2402
2403         switch(input->get_color_model())
2404         {
2405                 case BC_RGB888:
2406                         SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 3, 0);
2407                         break;
2408
2409                 case BC_YUV888:
2410                         SCALE_TRANSLATE(0xff, int32_t, uint8_t, 3, 0x80);
2411                         break;
2412
2413                 case BC_RGBA8888:
2414                         SCALE_TRANSLATE(0xff, uint32_t, uint8_t, 4, 0);
2415                         break;
2416
2417                 case BC_YUVA8888:
2418                         SCALE_TRANSLATE(0xff, int32_t, uint8_t, 4, 0x80);
2419                         break;
2420
2421
2422                 case BC_RGB161616:
2423                         SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 3, 0);
2424                         break;
2425
2426                 case BC_YUV161616:
2427                         SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 3, 0x8000);
2428                         break;
2429
2430                 case BC_RGBA16161616:
2431                         SCALE_TRANSLATE(0xffff, uint64_t, uint16_t, 4, 0);
2432                         break;
2433
2434                 case BC_YUVA16161616:
2435                         SCALE_TRANSLATE(0xffff, int64_t, uint16_t, 4, 0x8000);
2436                         break;
2437         }
2438
2439 //printf("blend mode %i, took %li ms\n", mode, a.get_difference());
2440         if(out_x2 - out_x1 != in_x2 - in_x1)
2441                 delete [] x_table;
2442         delete [] y_table;
2443
2444 };
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454 ScaleTranslateEngine::ScaleTranslateEngine(OverlayFrame *overlay, int cpus)
2455  : LoadServer(cpus, cpus)
2456 {
2457         this->overlay = overlay;
2458 }
2459
2460 ScaleTranslateEngine::~ScaleTranslateEngine()
2461 {
2462 }
2463
2464 void ScaleTranslateEngine::init_packages()
2465 {
2466         int out_h = out_y2 - out_y1;
2467
2468         for(int i = 0; i < total_packages; i++)
2469         {
2470                 ScaleTranslatePackage *package = (ScaleTranslatePackage*)packages[i];
2471                 package->out_row1 = (int)(out_y1 + out_h /
2472                         total_packages *
2473                         i);
2474                 package->out_row2 = (int)((float)package->out_row1 +
2475                         out_h /
2476                         total_packages);
2477                 if(i >= total_packages - 1)
2478                         package->out_row2 = out_y2;
2479         }
2480 }
2481
2482 LoadClient* ScaleTranslateEngine::new_client()
2483 {
2484         return new ScaleTranslateUnit(this, overlay);
2485 }
2486
2487 LoadPackage* ScaleTranslateEngine::new_package()
2488 {
2489         return new ScaleTranslatePackage;
2490 }
2491
2492
2493 ScaleTranslatePackage::ScaleTranslatePackage()
2494 {
2495 }
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524 #define BLEND_ONLY(temp_type, type, max, components, chroma_offset) \
2525 { \
2526         temp_type opacity = (temp_type)(alpha * max + 0.5); \
2527         temp_type transparency = max - opacity; \
2528  \
2529         type** output_rows = (type**)output->get_rows(); \
2530         type** input_rows = (type**)input->get_rows(); \
2531         int w = input->get_w(); \
2532         int h = input->get_h(); \
2533  \
2534         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2535         { \
2536                 type* in_row = input_rows[i]; \
2537                 type* output = output_rows[i]; \
2538  \
2539                 for(int j = 0; j < w; j++) \
2540                 { \
2541                         temp_type input1, input2, input3, input4; \
2542                         input1 = in_row[0]; \
2543                         input2 = in_row[1]; \
2544                         input3 = in_row[2]; \
2545                         if(components == 4) input4 = in_row[3]; \
2546  \
2547  \
2548                         if(components == 3) \
2549                         { \
2550                                 BLEND_3(max, temp_type, type, chroma_offset); \
2551                         } \
2552                         else \
2553                         { \
2554                                 BLEND_4(max, temp_type, type, chroma_offset); \
2555                         } \
2556  \
2557                         in_row += components; \
2558                         output += components; \
2559                 } \
2560         } \
2561 }
2562
2563
2564 #define BLEND_ONLY_TRANSFER_REPLACE(type, components) \
2565 { \
2566  \
2567         type** output_rows = (type**)output->get_rows(); \
2568         type** input_rows = (type**)input->get_rows(); \
2569         int w = input->get_w(); \
2570         int h = input->get_h(); \
2571         int line_len = w * sizeof(type) * components; \
2572  \
2573         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2574         { \
2575                 memcpy(output_rows[i], input_rows[i], line_len); \
2576         } \
2577 }
2578
2579 // components is always 4
2580 #define BLEND_ONLY_4_NORMAL(temp_type, type, chroma_offset, maxbits) \
2581 { \
2582         temp_type opacity = (temp_type)(alpha * (((temp_type) 1) << maxbits) + 0.5); \
2583         temp_type maxsq = ((temp_type) 1) << (maxbits * 2) ; \
2584  \
2585         type** output_rows = (type**)output->get_rows(); \
2586         type** input_rows = (type**)input->get_rows(); \
2587         int w = input->get_w(); \
2588         int h = input->get_h(); \
2589  \
2590         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2591         { \
2592                 type* in_row = input_rows[i]; \
2593                 type* output = output_rows[i]; \
2594  \
2595                 for(int j = 0; j < w; j++) \
2596                 { \
2597                         temp_type pixel_opacity, pixel_transparency; \
2598                         pixel_opacity = opacity * in_row[3]; \
2599                         pixel_transparency = (temp_type)maxsq - pixel_opacity; \
2600                  \
2601                  \
2602                         output[0] = (type)(((temp_type)in_row[0] * pixel_opacity + \
2603                                 (temp_type)output[0] * pixel_transparency) >> (maxbits * 2)); \
2604                         output[1] = (type)(((((temp_type)in_row[1] - chroma_offset) * pixel_opacity + \
2605                                 ((temp_type)output[1] - chroma_offset) * pixel_transparency) \
2606                                 >> (maxbits * 2)) + \
2607                                 chroma_offset); \
2608                         output[2] = (type)(((((temp_type)in_row[2] - chroma_offset) * pixel_opacity + \
2609                                 ((temp_type)output[2] - chroma_offset) * pixel_transparency) \
2610                                 >> (maxbits * 2)) + \
2611                                 chroma_offset); \
2612                         output[3] = (type)(in_row[3] > output[3] ? in_row[3] : output[3]); \
2613  \
2614                         in_row += 4; \
2615                         output += 4; \
2616                 } \
2617         } \
2618 }
2619
2620 // components is always 3
2621 #define BLEND_ONLY_3_NORMAL(temp_type, type, chroma_offset, maxbits) \
2622 { \
2623         temp_type opacity = (temp_type)(alpha * (((temp_type) 1) << maxbits) + 0.5); \
2624         temp_type transparency = (((temp_type) 1) << maxbits) - opacity; \
2625  \
2626         type** output_rows = (type**)output->get_rows(); \
2627         type** input_rows = (type**)input->get_rows(); \
2628         int w = input->get_w() * 3; \
2629         int h = input->get_h(); \
2630  \
2631         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
2632         { \
2633                 type* in_row = input_rows[i]; \
2634                 type* output = output_rows[i]; \
2635  \
2636                 for(int j = 0; j < w; j++) /* w = 3x width! */ \
2637                 { \
2638                         *output = (type)((temp_type)*in_row * opacity + *output * transparency) >> maxbits; \
2639                         in_row ++; \
2640                         output ++; \
2641                 } \
2642         } \
2643 }
2644
2645
2646 BlendUnit::BlendUnit(BlendEngine *server, OverlayFrame *overlay)
2647  : LoadClient(server)
2648 {
2649         this->overlay = overlay;
2650         this->blend_engine = server;
2651 }
2652
2653 BlendUnit::~BlendUnit()
2654 {
2655 }
2656
2657 void BlendUnit::process_package(LoadPackage *package)
2658 {
2659         BlendPackage *pkg = (BlendPackage*)package;
2660
2661
2662         VFrame *output = blend_engine->output;
2663         VFrame *input = blend_engine->input;
2664         float alpha = blend_engine->alpha;
2665         if (alpha > 1.0) alpha = 1.0;
2666         int mode = blend_engine->mode;
2667
2668         if (mode == TRANSFER_REPLACE)
2669         {
2670                 switch(input->get_color_model())
2671                 {
2672                         case BC_RGB888:
2673                         case BC_YUV888:
2674                                 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 3);
2675                                 break;
2676                         case BC_RGBA8888:
2677                         case BC_YUVA8888:
2678                                 BLEND_ONLY_TRANSFER_REPLACE(unsigned char, 4);
2679                                 break;
2680                         case BC_RGB161616:
2681                         case BC_YUV161616:
2682                                 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 3);
2683                                 break;
2684                         case BC_RGBA16161616:
2685                         case BC_YUVA16161616:
2686                                 BLEND_ONLY_TRANSFER_REPLACE(uint16_t, 4);
2687                                 break;
2688                 }
2689         }
2690         else
2691         if (mode == TRANSFER_NORMAL)
2692         {
2693                 switch(input->get_color_model())
2694                 {
2695                         case BC_RGB888:
2696                                 BLEND_ONLY_3_NORMAL(uint32_t, unsigned char, 0, 8);
2697                                 break;
2698                         case BC_YUV888:
2699                                 BLEND_ONLY_3_NORMAL(int32_t, unsigned char, 0x80, 8);
2700                                 break;
2701                         case BC_RGBA8888:
2702                                 BLEND_ONLY_4_NORMAL(uint32_t, unsigned char, 0, 8);
2703                                 break;
2704                         case BC_YUVA8888:
2705                                 BLEND_ONLY_4_NORMAL(int32_t, unsigned char, 0x80, 8);
2706                                 break;
2707                         case BC_RGB161616:
2708                                 BLEND_ONLY_3_NORMAL(uint64_t, uint16_t, 0, 16);
2709                                 break;
2710                         case BC_YUV161616:
2711                                 BLEND_ONLY_3_NORMAL(int64_t, uint16_t, 0x8000, 16);
2712                                 break;
2713                         case BC_RGBA16161616:
2714                                 BLEND_ONLY_4_NORMAL(uint64_t, uint16_t, 0, 16);
2715                                 break;
2716                         case BC_YUVA16161616:
2717                                 BLEND_ONLY_4_NORMAL(int64_t, uint16_t, 0x8000, 16);
2718                                 break;
2719                 }
2720         }
2721         else
2722         switch(input->get_color_model())
2723         {
2724                 case BC_RGB888:
2725                         BLEND_ONLY(uint32_t, unsigned char, 0xff, 3, 0);
2726                         break;
2727                 case BC_YUV888:
2728                         BLEND_ONLY(int32_t, unsigned char, 0xff, 3, 0x80);
2729                         break;
2730                 case BC_RGBA8888:
2731                         BLEND_ONLY(uint32_t, unsigned char, 0xff, 4, 0);
2732                         break;
2733                 case BC_YUVA8888:
2734                         BLEND_ONLY(int32_t, unsigned char, 0xff, 4, 0x80);
2735                         break;
2736                 case BC_RGB161616:
2737                         BLEND_ONLY(uint64_t, uint16_t, 0xffff, 3, 0);
2738                         break;
2739                 case BC_YUV161616:
2740                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 3, 0x8000);
2741                         break;
2742                 case BC_RGBA16161616:
2743                         BLEND_ONLY(uint64_t, uint16_t, 0xffff, 4, 0);
2744                         break;
2745                 case BC_YUVA16161616:
2746                         BLEND_ONLY(int64_t, uint16_t, 0xffff, 4, 0x8000);
2747                         break;
2748         }
2749 }
2750
2751
2752
2753 BlendEngine::BlendEngine(OverlayFrame *overlay, int cpus)
2754  : LoadServer(cpus, cpus)
2755 {
2756         this->overlay = overlay;
2757 }
2758
2759 BlendEngine::~BlendEngine()
2760 {
2761 }
2762
2763 void BlendEngine::init_packages()
2764 {
2765         for(int i = 0; i < total_packages; i++)
2766         {
2767                 BlendPackage *package = (BlendPackage*)packages[i];
2768                 package->out_row1 = (int)(input->get_h() /
2769                         total_packages *
2770                         i);
2771                 package->out_row2 = (int)((float)package->out_row1 +
2772                         input->get_h() /
2773                         total_packages);
2774
2775                 if(i >= total_packages - 1)
2776                         package->out_row2 = input->get_h();
2777         }
2778 }
2779
2780 LoadClient* BlendEngine::new_client()
2781 {
2782         return new BlendUnit(this, overlay);
2783 }
2784
2785 LoadPackage* BlendEngine::new_package()
2786 {
2787         return new BlendPackage;
2788 }
2789
2790
2791 BlendPackage::BlendPackage()
2792 {
2793 }
2794
2795