hvirtual/cinelerra/overlayframe.C.int

   1 #include <math.h>
   2 #include <stdio.h>
   3 #include <string.h>
   4 #include <stdint.h>
   5
   6 #include "clip.h"
   7 #include "edl.inc"
   8 #include "mutex.h"
   9 #include "overlayframe.h"
  10 #include "vframe.h"
  11
  12 OverlayFrame::OverlayFrame(int cpus)
  13 {
  14         temp_frame = 0;
  15         blend_engine = 0;
  16         scale_engine = 0;
  17         scaletranslate_engine = 0;
  18         translate_engine = 0;
  19         this->cpus = cpus;
  20 }
  21
  22 OverlayFrame::~OverlayFrame()
  23 {
  24 //printf("OverlayFrame::~OverlayFrame 1\n");
  25         if(temp_frame) delete temp_frame;
  26         if(scale_engine) delete scale_engine;
  27         if(translate_engine) delete translate_engine;
  28         if(blend_engine) delete blend_engine;
  29         if(scaletranslate_engine) delete scaletranslate_engine;
  30 //printf("OverlayFrame::~OverlayFrame 2\n");
  31 }
  32
  33
  34
  35
  36
  37
  38
  39
  40 // Verification:
  41
  42 // (255 * 255 + 0 * 0) / 255 = 255
  43 // (255 * 127 + 255 * (255 - 127)) / 255 = 255
  44
  45 // (65535 * 65535 + 0 * 0) / 65535 = 65535
  46 // (65535 * 32767 + 65535 * (65535 - 32767)) / 65535 = 65535
  47
  48
  49 // Branch prediction 4 U
  50
  51 #define BLEND_3(max, type) \
  52 { \
  53         int64_t r, g, b; \
  54  \
  55 /* if(mode != TRANSFER_NORMAL) printf("BLEND mode = %d\n", mode); */ \
  56         switch(mode) \
  57         { \
  58                 case TRANSFER_DIVIDE: \
  59                         r = output[0] ? (((int64_t)input1 * max) / output[0]) : max; \
  60                         g = output[1] ? (((int64_t)input2 * max) / output[1]) : max; \
  61                         b = output[2] ? (((int64_t)input3 * max) / output[2]) : max; \
  62                         r = (r * opacity + output[0] * transparency) / max; \
  63                         g = (g * opacity + output[1] * transparency) / max; \
  64                         b = (b * opacity + output[2] * transparency) / max; \
  65                         break; \
  66                 case TRANSFER_MULTIPLY: \
  67                         r = ((int64_t)input1 * output[0]) / max; \
  68                         g = ((int64_t)input2 * output[1]) / max; \
  69                         b = ((int64_t)input3 * output[2]) / max; \
  70                         r = (r * opacity + output[0] * transparency) / max; \
  71                         g = (g * opacity + output[1] * transparency) / max; \
  72                         b = (b * opacity + output[2] * transparency) / max; \
  73                         break; \
  74                 case TRANSFER_SUBTRACT: \
  75                         r = (((int64_t)input1 - output[0]) * opacity + output[0] * transparency) / max; \
  76                         g = (((int64_t)input2 - output[1]) * opacity + output[1] * transparency) / max; \
  77                         b = (((int64_t)input3 - output[2]) * opacity + output[2] * transparency) / max; \
  78                         break; \
  79                 case TRANSFER_ADDITION: \
  80                         r = (((int64_t)input1 + output[0]) * opacity + output[0] * transparency) / max; \
  81                         g = (((int64_t)input2 + output[1]) * opacity + output[1] * transparency) / max; \
  82                         b = (((int64_t)input3 + output[2]) * opacity + output[2] * transparency) / max; \
  83                         break; \
  84                 case TRANSFER_REPLACE: \
  85                         r = input1; \
  86                         g = input2; \
  87                         b = input3; \
  88                         break; \
  89                 case TRANSFER_NORMAL: \
  90                         r = ((int64_t)input1 * opacity + output[0] * transparency) / max; \
  91                         g = ((int64_t)input2 * opacity + output[1] * transparency) / max; \
  92                         b = ((int64_t)input3 * opacity + output[2] * transparency) / max; \
  93                         break; \
  94         } \
  95  \
  96         output[0] = (type)CLIP(r, 0, max); \
  97         output[1] = (type)CLIP(g, 0, max); \
  98         output[2] = (type)CLIP(b, 0, max); \
  99 }
 100
 101
 102
 103
 104
 105 // Blending equations are drastically different for 3 and 4 components
 106 #define BLEND_4(max, type) \
 107 { \
 108         int64_t r, g, b, a; \
 109         int64_t pixel_opacity, pixel_transparency; \
 110  \
 111         pixel_opacity = opacity * input4 / max; \
 112         pixel_transparency = (max - pixel_opacity) * output[3] / max; \
 113  \
 114         switch(mode) \
 115         { \
 116                 case TRANSFER_DIVIDE: \
 117                         r = output[0] ? (((int64_t)input1 * max) / output[0]) : max; \
 118                         g = output[1] ? (((int64_t)input2 * max) / output[1]) : max; \
 119                         b = output[2] ? (((int64_t)input3 * max) / output[2]) : max; \
 120                         r = (r * pixel_opacity + output[0] * pixel_transparency) / max; \
 121                         g = (g * pixel_opacity + output[1] * pixel_transparency) / max; \
 122                         b = (b * pixel_opacity + output[2] * pixel_transparency) / max; \
 123                         a = input4 > output[3] ? input4 : output[3]; \
 124                         break; \
 125                 case TRANSFER_MULTIPLY: \
 126                         r = ((int64_t)input1 * output[0]) / max; \
 127                         g = ((int64_t)input2 * output[1]) / max; \
 128                         b = ((int64_t)input3 * output[2]) / max; \
 129                         r = (r * pixel_opacity + output[0] * pixel_transparency) / max; \
 130                         g = (g * pixel_opacity + output[1] * pixel_transparency) / max; \
 131                         b = (b * pixel_opacity + output[2] * pixel_transparency) / max; \
 132                         a = input4 > output[3] ? input4 : output[3]; \
 133                         break; \
 134                 case TRANSFER_SUBTRACT: \
 135                         r = (((int64_t)input1 - output[0]) * pixel_opacity + output[0] * pixel_transparency) / max; \
 136                         g = (((int64_t)input2 - output[1]) * pixel_opacity + output[1] * pixel_transparency) / max; \
 137                         b = (((int64_t)input3 - output[2]) * pixel_opacity + output[2] * pixel_transparency) / max; \
 138                         a = input4 > output[3] ? input4 : output[3]; \
 139                         break; \
 140                 case TRANSFER_ADDITION: \
 141                         r = (((int64_t)input1 + output[0]) * pixel_opacity + output[0] * pixel_transparency) / max; \
 142                         g = (((int64_t)input2 + output[1]) * pixel_opacity + output[1] * pixel_transparency) / max; \
 143                         b = (((int64_t)input3 + output[2]) * pixel_opacity + output[2] * pixel_transparency) / max; \
 144                         a = input4 > output[3] ? input4 : output[3]; \
 145                         break; \
 146                 case TRANSFER_REPLACE: \
 147                         r = input1; \
 148                         g = input2; \
 149                         b = input3; \
 150                         a = input4; \
 151                         break; \
 152                 case TRANSFER_NORMAL: \
 153                         r = ((int64_t)input1 * pixel_opacity + output[0] * pixel_transparency) / max; \
 154                         g = ((int64_t)input2 * pixel_opacity + output[1] * pixel_transparency) / max; \
 155                         b = ((int64_t)input3 * pixel_opacity + output[2] * pixel_transparency) / max; \
 156                         a = input4 > output[3] ? input4 : output[3]; \
 157                         break; \
 158         } \
 159  \
 160         output[0] = (type)CLIP(r, 0, max); \
 161         output[1] = (type)CLIP(g, 0, max); \
 162         output[2] = (type)CLIP(b, 0, max); \
 163         output[3] = (type)a; \
 164 }
 165
 166
 167
 168
 169
 170
 171
 172
 173 // Bicubic algorithm using multiprocessors
 174 // input -> scale nearest integer boundaries -> temp -> translation -> blend -> output
 175
 176 // Nearest neighbor algorithm using multiprocessors for blending
 177 // input -> scale + translate -> blend -> output
 178
 179
 180 int OverlayFrame::overlay(VFrame *output,
 181         VFrame *input,
 182         float in_x1,
 183         float in_y1,
 184         float in_x2,
 185         float in_y2,
 186         float out_x1,
 187         float out_y1,
 188         float out_x2,
 189         float out_y2,
 190         float alpha,       // 0 - 1
 191         int mode,
 192         int interpolation_type)
 193 {
 194         float w_scale = (out_x2 - out_x1) / (in_x2 - in_x1);
 195         float h_scale = (out_y2 - out_y1) / (in_y2 - in_y1);
 196
 197 //printf("OverlayFrame::overlay 1 %d %f\n", mode, alpha);
 198 // Limit values
 199         if(in_x1 < 0)
 200         {
 201                 out_x1 += -in_x1 * w_scale;
 202                 in_x1 = 0;
 203         }
 204         else
 205         if(in_x1 >= input->get_w())
 206         {
 207                 out_x1 -= (in_x1 - input->get_w()) * w_scale;
 208                 in_x1 = input->get_w();
 209         }
 210
 211         if(in_y1 < 0)
 212         {
 213                 out_y1 += -in_y1 * h_scale;
 214                 in_y1 = 0;
 215         }
 216         else
 217         if(in_y1 >= input->get_h())
 218         {
 219                 out_y1 -= (in_y1 - input->get_h()) * h_scale;
 220                 in_y1 = input->get_h();
 221         }
 222
 223         if(in_x2 < 0)
 224         {
 225                 out_x2 += -in_x2 * w_scale;
 226                 in_x2 = 0;
 227         }
 228         else
 229         if(in_x2 >= input->get_w())
 230         {
 231                 out_x2 -= (in_x2 - input->get_w()) * w_scale;
 232                 in_x2 = input->get_w();
 233         }
 234
 235         if(in_y2 < 0)
 236         {
 237                 out_y2 += -in_y2 * h_scale;
 238                 in_y2 = 0;
 239         }
 240         else
 241         if(in_y2 >= input->get_h())
 242         {
 243                 out_y2 -= (in_y2 - input->get_h()) * h_scale;
 244                 in_y2 = input->get_h();
 245         }
 246
 247         if(out_x1 < 0)
 248         {
 249                 in_x1 += -out_x1 / w_scale;
 250                 out_x1 = 0;
 251         }
 252         else
 253         if(out_x1 >= output->get_w())
 254         {
 255                 in_x1 -= (out_x1 - output->get_w()) / w_scale;
 256                 out_x1 = output->get_w();
 257         }
 258
 259         if(out_y1 < 0)
 260         {
 261                 in_y1 += -out_y1 / h_scale;
 262                 out_y1 = 0;
 263         }
 264         else
 265         if(out_y1 >= output->get_h())
 266         {
 267                 in_y1 -= (out_y1 - output->get_h()) / h_scale;
 268                 out_y1 = output->get_h();
 269         }
 270
 271         if(out_x2 < 0)
 272         {
 273                 in_x2 += -out_x2 / w_scale;
 274                 out_x2 = 0;
 275         }
 276         else
 277         if(out_x2 >= output->get_w())
 278         {
 279                 in_x2 -= (out_x2 - output->get_w()) / w_scale;
 280                 out_x2 = output->get_w();
 281         }
 282
 283         if(out_y2 < 0)
 284         {
 285                 in_y2 += -out_y2 / h_scale;
 286                 out_y2 = 0;
 287         }
 288         else
 289         if(out_y2 >= output->get_h())
 290         {
 291                 in_y2 -= (out_y2 - output->get_h()) / h_scale;
 292                 out_y2 = output->get_h();
 293         }
 294
 295
 296
 297
 298
 299         float in_w = in_x2 - in_x1;
 300         float in_h = in_y2 - in_y1;
 301         float out_w = out_x2 - out_x1;
 302         float out_h = out_y2 - out_y1;
 303 // Input for translation operation
 304         VFrame *translation_input = input;
 305
 306
 307
 308 // printf("OverlayFrame::overlay %f %f %f %f -> %f %f %f %f\n", in_x1,
 309 //                      in_y1,
 310 //                      in_x2,
 311 //                      in_y2,
 312 //                      out_x1,
 313 //                      out_y1,
 314 //                      out_x2,
 315 //                      out_y2);
 316
 317
 318
 319
 320
 321 // ****************************************************************************
 322 // Transfer to temp buffer by scaling nearest integer boundaries
 323 // ****************************************************************************
 324         if(interpolation_type != NEAREST_NEIGHBOR &&
 325                 (!EQUIV(w_scale, 1) || !EQUIV(h_scale, 1)))
 326         {
 327 // Create integer boundaries for interpolation
 328                 int in_x1_int = (int)in_x1;
 329                 int in_y1_int = (int)in_y1;
 330                 int in_x2_int = MIN((int)ceil(in_x2), input->get_w());
 331                 int in_y2_int = MIN((int)ceil(in_y2), input->get_h());
 332
 333 // Dimensions of temp frame.  Integer boundaries scaled.
 334                 int temp_w = (int)ceil(w_scale * (in_x2_int - in_x1_int));
 335                 int temp_h = (int)ceil(h_scale * (in_y2_int - in_y1_int));
 336                 VFrame *scale_output;
 337
 338
 339
 340 #define NO_TRANSLATION1 \
 341         (EQUIV(in_x1, 0) && \
 342         EQUIV(in_y1, 0) && \
 343         EQUIV(out_x1, 0) && \
 344         EQUIV(out_y1, 0) && \
 345         EQUIV(in_x2, in_x2_int) && \
 346         EQUIV(in_y2, in_y2_int) && \
 347         EQUIV(out_x2, temp_w) && \
 348         EQUIV(out_y2, temp_h))
 349
 350
 351 #define NO_BLEND \
 352         (EQUIV(alpha, 1) && \
 353         (mode == TRANSFER_REPLACE || \
 354         (mode == TRANSFER_NORMAL && cmodel_components(input->get_color_model()) == 3)))
 355
 356
 357
 358
 359
 360 // Prepare destination for operation
 361
 362 // No translation and no blending.  The blending operation is built into the
 363 // translation unit but not the scaling unit.
 364 // input -> output
 365                 if(NO_TRANSLATION1 &&
 366                         NO_BLEND)
 367                 {
 368 // printf("OverlayFrame::overlay input -> output\n");
 369
 370                         scale_output = output;
 371                         translation_input = 0;
 372                 }
 373                 else
 374 // If translation or blending
 375 // input -> nearest integer boundary temp
 376                 {
 377                         if(temp_frame &&
 378                                 (temp_frame->get_w() != temp_w ||
 379                                         temp_frame->get_h() != temp_h))
 380                         {
 381                                 delete temp_frame;
 382                                 temp_frame = 0;
 383                         }
 384
 385                         if(!temp_frame)
 386                         {
 387                                 temp_frame = new VFrame(0,
 388                                         temp_w,
 389                                         temp_h,
 390                                         input->get_color_model(),
 391                                         -1);
 392                         }
 393 //printf("OverlayFrame::overlay input -> temp\n");
 394
 395
 396                         temp_frame->clear_frame();
 397
 398 // printf("OverlayFrame::overlay 4 temp_w=%d temp_h=%d\n",
 399 //      temp_w, temp_h);
 400                         scale_output = temp_frame;
 401                         translation_input = scale_output;
 402
 403 // Adjust input coordinates to reflect new scaled coordinates.
 404                         in_x1 = (in_x1 - in_x1_int) * w_scale;
 405                         in_y1 = (in_y1 - in_y1_int) * h_scale;
 406                         in_x2 = (in_x2 - in_x1_int) * w_scale;
 407                         in_y2 = (in_y2 - in_y1_int) * h_scale;
 408                 }
 409
 410
 411
 412
 413 // Scale input -> scale_output
 414                 this->scale_output = scale_output;
 415                 this->scale_input = input;
 416                 this->w_scale = w_scale;
 417                 this->h_scale = h_scale;
 418                 this->in_x1_int = in_x1_int;
 419                 this->in_y1_int = in_y1_int;
 420                 this->out_w_int = temp_w;
 421                 this->out_h_int = temp_h;
 422                 this->interpolation_type = interpolation_type;
 423
 424 //printf("OverlayFrame::overlay ScaleEngine 1 %d\n", out_h_int);
 425                 if(!scale_engine) scale_engine = new ScaleEngine(this, cpus);
 426                 scale_engine->process_packages();
 427 //printf("OverlayFrame::overlay ScaleEngine 2\n");
 428
 429
 430
 431         }
 432
 433 // printf("OverlayFrame::overlay 1  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
 434 //      in_x1,
 435 //      in_y1,
 436 //      in_x2,
 437 //      in_y2,
 438 //      out_x1,
 439 //      out_y1,
 440 //      out_x2,
 441 //      out_y2);
 442
 443
 444
 445
 446
 447 #define NO_TRANSLATION2 \
 448         (EQUIV(in_x1, 0) && \
 449         EQUIV(in_y1, 0) && \
 450         EQUIV(in_x2, translation_input->get_w()) && \
 451         EQUIV(in_y2, translation_input->get_h()) && \
 452         EQUIV(out_x1, 0) && \
 453         EQUIV(out_y1, 0) && \
 454         EQUIV(out_x2, output->get_w()) && \
 455         EQUIV(out_y2, output->get_h())) \
 456
 457 #define NO_SCALE \
 458         (EQUIV(out_x2 - out_x1, in_x2 - in_x1) && \
 459         EQUIV(out_y2 - out_y1, in_y2 - in_y1))
 460
 461
 462
 463
 464 //printf("OverlayFrame::overlay 4 %d\n", mode);
 465
 466
 467
 468
 469         if(translation_input)
 470         {
 471 // Direct copy
 472                 if( NO_TRANSLATION2 &&
 473                         NO_SCALE &&
 474                         NO_BLEND)
 475                 {
 476 //printf("OverlayFrame::overlay direct copy\n");
 477                         output->copy_from(translation_input);
 478                 }
 479                 else
 480 // Blend only
 481                 if( NO_TRANSLATION2 &&
 482                         NO_SCALE)
 483                 {
 484                         if(!blend_engine) blend_engine = new BlendEngine(this, cpus);
 485
 486
 487                         blend_engine->output = output;
 488                         blend_engine->input = translation_input;
 489                         blend_engine->alpha = alpha;
 490                         blend_engine->mode = mode;
 491
 492                         blend_engine->process_packages();
 493                 }
 494                 else
 495 // Scale and translate using nearest neighbor
 496 // Translation is exactly on integer boundaries
 497                 if(interpolation_type == NEAREST_NEIGHBOR ||
 498                         EQUIV(in_x1, (int)in_x1) &&
 499                         EQUIV(in_y1, (int)in_y1) &&
 500                         EQUIV(in_x2, (int)in_x2) &&
 501                         EQUIV(in_y2, (int)in_y2) &&
 502
 503                         EQUIV(out_x1, (int)out_x1) &&
 504                         EQUIV(out_y1, (int)out_y1) &&
 505                         EQUIV(out_x2, (int)out_x2) &&
 506                         EQUIV(out_y2, (int)out_y2))
 507                 {
 508 //printf("OverlayFrame::overlay NEAREST_NEIGHBOR 1\n");
 509                         if(!scaletranslate_engine) scaletranslate_engine = new ScaleTranslateEngine(this, cpus);
 510
 511
 512                         scaletranslate_engine->output = output;
 513                         scaletranslate_engine->input = translation_input;
 514                         scaletranslate_engine->in_x1 = (int)in_x1;
 515                         scaletranslate_engine->in_y1 = (int)in_y1;
 516                         scaletranslate_engine->in_x2 = (int)in_x2;
 517                         scaletranslate_engine->in_y2 = (int)in_y2;
 518                         scaletranslate_engine->out_x1 = (int)out_x1;
 519                         scaletranslate_engine->out_y1 = (int)out_y1;
 520                         scaletranslate_engine->out_x2 = (int)out_x2;
 521                         scaletranslate_engine->out_y2 = (int)out_y2;
 522                         scaletranslate_engine->alpha = alpha;
 523                         scaletranslate_engine->mode = mode;
 524
 525                         scaletranslate_engine->process_packages();
 526                 }
 527                 else
 528 // Fractional translation
 529                 {
 530 // Use fractional translation
 531 // printf("OverlayFrame::overlay temp -> output  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
 532 //      in_x1,
 533 //      in_y1,
 534 //      in_x2,
 535 //      in_y2,
 536 //      out_x1,
 537 //      out_y1,
 538 //      out_x2,
 539 //      out_y2);
 540                         this->translate_output = output;
 541                         this->translate_input = translation_input;
 542                         this->translate_in_x1 = in_x1;
 543                         this->translate_in_y1 = in_y1;
 544                         this->translate_in_x2 = in_x2;
 545                         this->translate_in_y2 = in_y2;
 546                         this->translate_out_x1 = out_x1;
 547                         this->translate_out_y1 = out_y1;
 548                         this->translate_out_x2 = out_x2;
 549                         this->translate_out_y2 = out_y2;
 550                         this->translate_alpha = alpha;
 551                         this->translate_mode = mode;
 552
 553 //printf("OverlayFrame::overlay 5 %d\n", mode);
 554                         if(!translate_engine) translate_engine = new TranslateEngine(this, cpus);
 555                         translate_engine->process_packages();
 556
 557                 }
 558         }
 559 //printf("OverlayFrame::overlay 2\n");
 560
 561         return 0;
 562 }
 563
 564
 565
 566
 567
 568
 569
 570 ScalePackage::ScalePackage()
 571 {
 572 }
 573
 574
 575
 576
 577 ScaleUnit::ScaleUnit(ScaleEngine *server, OverlayFrame *overlay)
 578  : LoadClient(server)
 579 {
 580         this->overlay = overlay;
 581 }
 582
 583 ScaleUnit::~ScaleUnit()
 584 {
 585 }
 586
 587
 588
 589 #define BILINEAR(max, type, components) \
 590 { \
 591         float k_y = 1.0 / scale_h; \
 592         float k_x = 1.0 / scale_w; \
 593         type **in_rows = (type**)input->get_rows(); \
 594         type **out_rows = (type**)output->get_rows(); \
 595         type zero_r, zero_g, zero_b, zero_a; \
 596         int in_h_int = input->get_h(); \
 597         int in_w_int = input->get_w(); \
 598         int *table_int_x, *table_int_y; \
 599         int *table_frac_x, *table_frac_y; \
 600  \
 601         zero_r = 0; \
 602         zero_g = ((max + 1) >> 1) * (do_yuv); \
 603         zero_b = ((max + 1) >> 1) * (do_yuv); \
 604         if(components == 4) zero_a = 0; \
 605  \
 606         tabulate_blinear(table_int_x, table_frac_x, k_x, 0, out_w_int); \
 607         tabulate_blinear(table_int_y, table_frac_y, k_y, pkg->out_row1, pkg->out_row2); \
 608  \
 609         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
 610         { \
 611                 int i_y = table_int_y[i - pkg->out_row1]; \
 612                 uint64_t a = table_frac_y[i - pkg->out_row1]; \
 613         uint64_t anti_a = 0xffff - a; \
 614                 type *in_row1 = in_rows[i_y + in_y1_int]; \
 615                 type *in_row2 = (i_y + in_y1_int < in_h_int - 1) ?  \
 616                         in_rows[i_y + in_y1_int + 1] : \
 617                         0; \
 618                 type *out_row = out_rows[i]; \
 619  \
 620                 for(int j = 0; j < out_w_int; j++) \
 621                 { \
 622                         int i_x = table_int_x[j]; \
 623                         uint64_t b = table_frac_x[j]; \
 624                         uint64_t anti_b = 0xffff - b; \
 625                         int x = i_x + in_x1_int; \
 626                         uint64_t output1r, output1g, output1b, output1a; \
 627                         uint64_t output2r, output2g, output2b, output2a; \
 628                         uint64_t output3r, output3g, output3b, output3a; \
 629                         uint64_t output4r, output4g, output4b, output4a; \
 630  \
 631                         output1r = in_row1[x * components]; \
 632                         output1g = in_row1[x * components + 1]; \
 633                         output1b = in_row1[x * components + 2]; \
 634                         if(components == 4) output1a = in_row1[x * components + 3]; \
 635  \
 636                         if(x < in_w_int - 1) \
 637                         { \
 638                                 output2r = in_row1[x * components + components]; \
 639                                 output2g = in_row1[x * components + components + 1]; \
 640                                 output2b = in_row1[x * components + components + 2]; \
 641                                 if(components == 4) output2a = in_row1[x * components + components + 3]; \
 642  \
 643                                 if(in_row2) \
 644                                 { \
 645                                         output4r = in_row2[x * components + components]; \
 646                                         output4g = in_row2[x * components + components + 1]; \
 647                                         output4b = in_row2[x * components + components + 2]; \
 648                                         if(components == 4) output4a = in_row2[x * components + components + 3]; \
 649                                 } \
 650                                 else \
 651                                 { \
 652                                         output4r = zero_r; \
 653                                         output4g = zero_g; \
 654                                         output4b = zero_b; \
 655                                         if(components == 4) output4a = zero_a; \
 656                                 } \
 657                         } \
 658                         else \
 659                         { \
 660                                 output2r = zero_r; \
 661                                 output2g = zero_g; \
 662                                 output2b = zero_b; \
 663                                 if(components == 4) output2a = zero_a; \
 664                                 output4r = zero_r; \
 665                                 output4g = zero_g; \
 666                                 output4b = zero_b; \
 667                                 if(components == 4) output4a = zero_a; \
 668                         } \
 669  \
 670                         if(in_row2) \
 671                         { \
 672                                 output3r = in_row2[x * components]; \
 673                                 output3g = in_row2[x * components + 1]; \
 674                                 output3b = in_row2[x * components + 2]; \
 675                                 if(components == 4) output3a = in_row2[x * components + 3]; \
 676                         } \
 677                         else \
 678                         { \
 679                                 output3r = zero_r; \
 680                                 output3g = zero_g; \
 681                                 output3b = zero_b; \
 682                                 if(components == 4) output3a = zero_a; \
 683                         } \
 684  \
 685                         out_row[j * components] =  \
 686                                 (type)(((anti_a) * (((anti_b) * output1r) +  \
 687                                 (b * output2r)) +  \
 688                 a * (((anti_b) * output3r) +  \
 689                                 (b * output4r))) / 0xffffffff); \
 690                         out_row[j * components + 1] =   \
 691                                 (type)(((anti_a) * (((anti_b) * output1g) +  \
 692                                 (b * output2g)) +  \
 693                 a * (((anti_b) * output3g) +  \
 694                                 (b * output4g))) / 0xffffffff); \
 695                         out_row[j * components + 2] =   \
 696                                 (type)(((anti_a) * (((anti_b) * output1b) +  \
 697                                 (b * output2b)) +  \
 698                 a * (((anti_b) * output3b) +  \
 699                                 (b * output4b))) / 0xffffffff); \
 700                         if(components == 4) \
 701                                 out_row[j * components + 3] =   \
 702                                         (type)(((anti_a) * (((anti_b) * output1a) +  \
 703                                         (b * output2a)) +  \
 704                         a * (((anti_b) * output3a) +  \
 705                                         (b * output4a))) / 0xffffffff); \
 706                 } \
 707         } \
 708  \
 709  \
 710         delete [] table_int_x; \
 711         delete [] table_frac_x; \
 712         delete [] table_int_y; \
 713         delete [] table_frac_y; \
 714  \
 715 }
 716
 717
 718 #define BICUBIC(max, type, components) \
 719 { \
 720         float k_y = 1.0 / scale_h; \
 721         float k_x = 1.0 / scale_w; \
 722         type **in_rows = (type**)input->get_rows(); \
 723         type **out_rows = (type**)output->get_rows(); \
 724         int *bspline_x, *bspline_y; \
 725         int in_h_int = input->get_h(); \
 726         int in_w_int = input->get_w(); \
 727         type zero_r, zero_g, zero_b, zero_a; \
 728  \
 729         zero_r = 0; \
 730         zero_b = ((max + 1) >> 1) * (do_yuv); \
 731         zero_g = ((max + 1) >> 1) * (do_yuv); \
 732         if(components == 4) \
 733                 zero_a = 0; \
 734  \
 735         tabulate_bspline(bspline_x,  \
 736                 k_x, \
 737                 out_w_int, \
 738                 -1); \
 739  \
 740         tabulate_bspline(bspline_y,  \
 741                 k_y, \
 742                 out_h_int, \
 743                 1); \
 744  \
 745         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
 746         { \
 747                 int i_y = (int)(k_y * i); \
 748  \
 749  \
 750                 for(int j = 0; j < out_w_int; j++) \
 751                 { \
 752                         int i_x = (int)(k_x * j); \
 753                         uint64_t output1, output2, output3, output4; \
 754                         output1 = 0; \
 755                         output2 = 0; \
 756                         output3 = 0; \
 757                         if(components == 4) \
 758                                 output4 = 0; \
 759                         int table_y = i * 4; \
 760  \
 761 /* Kernel */ \
 762                         for(int m = -1; m < 3; m++) \
 763                         { \
 764                                 uint64_t r1 = bspline_y[table_y++]; \
 765                                 int y = in_y1_int + i_y + m; \
 766                                 int table_x = j * 4; \
 767  \
 768                                 for(int n = -1; n < 3; n++) \
 769                                 { \
 770                                         uint64_t r2 = bspline_x[table_x++]; \
 771                                         int x = in_x1_int + i_x + n; \
 772                                         uint64_t r_square = r1 * r2; \
 773  \
 774 /* Inside boundary. */ \
 775                                         if(x >= 0 && \
 776                                                 x < in_w_int && \
 777                                                 y >= 0 && \
 778                                                 y < in_h_int) \
 779                                         { \
 780                                                 output1 += r_square * in_rows[y][x * components]; \
 781                                                 output2 += r_square * in_rows[y][x * components + 1]; \
 782                                                 output3 += r_square * in_rows[y][x * components + 2]; \
 783                                                 if(components == 4) \
 784                                                         output4 += r_square * in_rows[y][x * components + 3]; \
 785                                         } \
 786                                         else \
 787                                         { \
 788                                                 output1 += r_square * zero_r; \
 789                                                 output2 += r_square * zero_g; \
 790                                                 output3 += r_square * zero_b; \
 791                                                 if(components == 4) \
 792                                                         output4 += r_square * zero_a; \
 793                                         } \
 794                                 } \
 795                         } \
 796  \
 797  \
 798                         out_rows[i][j * components] = (type)(output1 / 0xffffffff); \
 799                         out_rows[i][j * components + 1] = (type)(output2 / 0xffffffff); \
 800                         out_rows[i][j * components + 2] = (type)(output3 / 0xffffffff); \
 801                         if(components == 4) \
 802                                 out_rows[i][j * components + 3] = (type)(output4 / 0xffffffff); \
 803  \
 804                 } \
 805         } \
 806  \
 807         delete [] bspline_x; \
 808         delete [] bspline_y; \
 809 }
 810
 811
 812
 813
 814 // Pow function is not thread safe in Compaqt C
 815 #define CUBE(x) ((x) * (x) * (x))
 816
 817 int ScaleUnit::cubic_bspline(float x)
 818 {
 819         float a, b, c, d;
 820
 821         if((x + 2.0F) <= 0.0F)
 822         {
 823         a = 0.0F;
 824         }
 825         else
 826         {
 827         a = CUBE(x + 2.0F);
 828         }
 829
 830
 831         if((x + 1.0F) <= 0.0F)
 832         {
 833         b = 0.0F;
 834         }
 835         else
 836         {
 837         b = CUBE(x + 1.0F);
 838         }
 839
 840         if(x <= 0)
 841         {
 842         c = 0.0F;
 843         }
 844         else
 845         {
 846         c = CUBE(x);
 847         }
 848
 849         if((x - 1.0F) <= 0.0F)
 850         {
 851         d = 0.0F;
 852         }
 853         else
 854         {
 855         d = CUBE(x - 1.0F);
 856         }
 857
 858
 859         return (int)((a - (4.0F * b) + (6.0F * c) - (4.0F * d)) / 6.0 * 0x10000);
 860 }
 861
 862
 863 void ScaleUnit::tabulate_bspline(int* &table,
 864         float scale,
 865         int pixels,
 866         float coefficient)
 867 {
 868         table = new int[pixels * 4];
 869         for(int i = 0, j = 0; i < pixels; i++)
 870         {
 871                 float f_x = (float)i * scale;
 872                 float a = f_x - floor(f_x);
 873
 874                 for(float m = -1; m < 3; m++)
 875                 {
 876                         table[j++] = cubic_bspline(coefficient * (m - a));
 877                 }
 878
 879         }
 880 }
 881
 882 void ScaleUnit::tabulate_blinear(int* &table_int,
 883                 int* &table_frac,
 884                 float scale,
 885                 int pixel1,
 886                 int pixel2)
 887 {
 888         table_int = new int[pixel2 - pixel1];
 889         table_frac = new int[pixel2 - pixel1];
 890
 891         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
 892         {
 893                 float f_x = (float)i * scale;
 894                 int i_x = (int)floor(f_x);
 895                 int a = (int)((f_x - floor(f_x)) * 0xffff);
 896
 897                 table_int[j] = i_x;
 898                 table_frac[j] = a;
 899         }
 900 }
 901
 902 void ScaleUnit::process_package(LoadPackage *package)
 903 {
 904         ScalePackage *pkg = (ScalePackage*)package;
 905
 906 // Arguments for macros
 907         VFrame *output = overlay->scale_output;
 908         VFrame *input = overlay->scale_input;
 909         float scale_w = overlay->w_scale;
 910         float scale_h = overlay->h_scale;
 911         int in_x1_int = overlay->in_x1_int;
 912         int in_y1_int = overlay->in_y1_int;
 913         int out_h_int = overlay->out_h_int;
 914         int out_w_int = overlay->out_w_int;
 915         int do_yuv =
 916                 (overlay->scale_input->get_color_model() == BC_YUV888 ||
 917                 overlay->scale_input->get_color_model() == BC_YUVA8888 ||
 918                 overlay->scale_input->get_color_model() == BC_YUV161616 ||
 919                 overlay->scale_input->get_color_model() == BC_YUVA16161616);
 920
 921         if(overlay->interpolation_type == CUBIC_CUBIC ||
 922                 (overlay->interpolation_type == CUBIC_LINEAR
 923                         && overlay->w_scale > 1 &&
 924                         overlay->h_scale > 1))
 925         {
 926
 927                 switch(overlay->scale_input->get_color_model())
 928                 {
 929                         case BC_RGB888:
 930                         case BC_YUV888:
 931                                 BICUBIC(0xff, unsigned char, 3);
 932                                 break;
 933
 934                         case BC_RGBA8888:
 935                         case BC_YUVA8888:
 936                                 BICUBIC(0xff, unsigned char, 4);
 937                                 break;
 938
 939                         case BC_RGB161616:
 940                         case BC_YUV161616:
 941                                 BICUBIC(0xffff, uint16_t, 3);
 942                                 break;
 943
 944                         case BC_RGBA16161616:
 945                         case BC_YUVA16161616:
 946                                 BICUBIC(0xffff, uint16_t, 4);
 947                                 break;
 948                 }
 949         }
 950         else
 951 // Perform bilinear scaling input -> scale_output
 952         {
 953                 switch(overlay->scale_input->get_color_model())
 954                 {
 955                         case BC_RGB888:
 956                         case BC_YUV888:
 957                                 BILINEAR(0xff, unsigned char, 3);
 958                                 break;
 959
 960                         case BC_RGBA8888:
 961                         case BC_YUVA8888:
 962                                 BILINEAR(0xff, unsigned char, 4);
 963                                 break;
 964
 965                         case BC_RGB161616:
 966                         case BC_YUV161616:
 967                                 BILINEAR(0xffff, uint16_t, 3);
 968                                 break;
 969
 970                         case BC_RGBA16161616:
 971                         case BC_YUVA16161616:
 972                                 BILINEAR(0xffff, uint16_t, 4);
 973                                 break;
 974                 }
 975         }
 976
 977 }
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991 ScaleEngine::ScaleEngine(OverlayFrame *overlay, int cpus)
 992  : LoadServer(cpus, cpus)
 993 {
 994         this->overlay = overlay;
 995 }
 996
 997 ScaleEngine::~ScaleEngine()
 998 {
 999 }
1000
1001 void ScaleEngine::init_packages()
1002 {
1003         for(int i = 0; i < total_packages; i++)
1004         {
1005                 ScalePackage *package = (ScalePackage*)packages[i];
1006                 package->out_row1 = overlay->out_h_int / total_packages * i;
1007                 package->out_row2 = package->out_row1 + overlay->out_h_int / total_packages;
1008
1009                 if(i >= total_packages - 1)
1010                         package->out_row2 = overlay->out_h_int;
1011         }
1012 }
1013
1014 LoadClient* ScaleEngine::new_client()
1015 {
1016         return new ScaleUnit(this, overlay);
1017 }
1018
1019 LoadPackage* ScaleEngine::new_package()
1020 {
1021         return new ScalePackage;
1022 }
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036 TranslatePackage::TranslatePackage()
1037 {
1038 }
1039
1040
1041
1042 TranslateUnit::TranslateUnit(TranslateEngine *server, OverlayFrame *overlay)
1043  : LoadClient(server)
1044 {
1045         this->overlay = overlay;
1046 }
1047
1048 TranslateUnit::~TranslateUnit()
1049 {
1050 }
1051
1052
1053
1054 void TranslateUnit::translation_array(transfer_table* &table,
1055         float out_x1,
1056         float out_x2,
1057         float in_x1,
1058         float in_x2,
1059         int in_total,
1060         int out_total,
1061         int &out_x1_int,
1062         int &out_x2_int)
1063 {
1064         int out_w_int;
1065         float offset = out_x1 - in_x1;
1066
1067         out_x1_int = (int)out_x1;
1068         out_x2_int = MIN((int)ceil(out_x2), out_total);
1069         out_w_int = out_x2_int - out_x1_int;
1070
1071         table = new transfer_table[out_w_int];
1072         bzero(table, sizeof(transfer_table) * out_w_int);
1073
1074
1075 //printf("OverlayFrame::translation_array 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1076
1077         float in_x = in_x1;
1078         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1079         {
1080                 transfer_table *entry = &table[out_x - out_x1_int];
1081
1082                 entry->in_x1 = (int)in_x;
1083                 entry->in_x2 = (int)in_x + 1;
1084
1085 // Get fraction of output pixel to fill
1086                 entry->output_fraction = 1;
1087
1088                 if(out_x1 > out_x)
1089                 {
1090                         entry->output_fraction -= out_x1 - out_x;
1091                 }
1092
1093                 if(out_x2 < out_x + 1)
1094                 {
1095                         entry->output_fraction = (out_x2 - out_x);
1096                 }
1097
1098 // Advance in_x until out_x_fraction is filled
1099                 float out_x_fraction = entry->output_fraction;
1100                 float in_x_fraction = floor(in_x + 1) - in_x;
1101
1102                 if(out_x_fraction <= in_x_fraction)
1103                 {
1104                         entry->in_fraction1 = out_x_fraction;
1105                         entry->in_fraction2 = 0.0;
1106                         in_x += out_x_fraction;
1107                 }
1108                 else
1109                 {
1110                         entry->in_fraction1 = in_x_fraction;
1111                         in_x += out_x_fraction;
1112                         entry->in_fraction2 = in_x - floor(in_x);
1113                 }
1114
1115 // Clip in_x
1116                 if(entry->in_x2 >= in_total)
1117                 {
1118                         entry->in_x2 = in_total - 1;
1119                         entry->in_fraction2 = 0.0;
1120                 }
1121
1122                 if(entry->in_x1 >= in_total)
1123                 {
1124                         entry->in_x1 = in_total - 1;
1125                         entry->in_fraction1 = 0.0;
1126                 }
1127 // printf("OverlayFrame::translation_array 2 %d %d %d %f %f %f\n",
1128 //      out_x,
1129 //      entry->in_x1,
1130 //      entry->in_x2,
1131 //      entry->in_fraction1,
1132 //      entry->in_fraction2,
1133 //      entry->output_fraction);
1134         }
1135 }
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170 #define TRANSLATE(max, type, components) \
1171 { \
1172  \
1173         type **in_rows = (type**)input->get_rows(); \
1174         type **out_rows = (type**)output->get_rows(); \
1175  \
1176 /* printf("OverlayFrame::translate 1  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",  */ \
1177 /*      (in_x1),  in_y1,  in_x2,  in_y2,  out_x1,  out_y1, out_x2,  out_y2); */ \
1178  \
1179         unsigned int master_opacity = (int)(alpha * max + 0.5); \
1180         unsigned int master_transparency = max - master_opacity; \
1181  \
1182 /* printf("TRANSLATE %d\n", mode); */ \
1183  \
1184         for(int i = row1; i < row2; i++) \
1185         { \
1186                 int in_y1 = y_table[i - out_y1_int].in_x1; \
1187                 int in_y2 = y_table[i - out_y1_int].in_x2; \
1188                 float y_fraction1 = y_table[i - out_y1_int].in_fraction1; \
1189                 float y_fraction2 = y_table[i - out_y1_int].in_fraction2; \
1190                 float y_output_fraction = y_table[i - out_y1_int].output_fraction; \
1191                 type *in_row1 = in_rows[(in_y1)]; \
1192                 type *in_row2 = in_rows[(in_y2)]; \
1193                 type *out_row = out_rows[i]; \
1194  \
1195                 for(int j = out_x1_int; j < out_x2_int; j++) \
1196                 { \
1197                         int in_x1 = x_table[j - out_x1_int].in_x1; \
1198                         int in_x2 = x_table[j - out_x1_int].in_x2; \
1199                         float x_fraction1 = x_table[j - out_x1_int].in_fraction1; \
1200                         float x_fraction2 = x_table[j - out_x1_int].in_fraction2; \
1201                         float x_output_fraction = x_table[j - out_x1_int].output_fraction; \
1202                         type *output = &out_row[j * components]; \
1203                         int input1, input2, input3, input4; \
1204  \
1205                         input1 = (int)(in_row1[in_x1 * components] * x_fraction1 * y_fraction1 +  \
1206                                 in_row1[in_x2 * components] * x_fraction2 * y_fraction1 +  \
1207                                 in_row2[in_x1 * components] * x_fraction1 * y_fraction2 +  \
1208                                 in_row2[in_x2 * components] * x_fraction2 * y_fraction2 + 0.5); \
1209                         input2 = (int)(in_row1[in_x1 * components + 1] * x_fraction1 * y_fraction1 +  \
1210                                 in_row1[in_x2 * components + 1] * x_fraction2 * y_fraction1 +  \
1211                                 in_row2[in_x1 * components + 1] * x_fraction1 * y_fraction2 +  \
1212                                 in_row2[in_x2 * components + 1] * x_fraction2 * y_fraction2 + 0.5); \
1213                         input3 = (int)(in_row1[in_x1 * components + 2] * x_fraction1 * y_fraction1 +  \
1214                                 in_row1[in_x2 * components + 2] * x_fraction2 * y_fraction1 +  \
1215                                 in_row2[in_x1 * components + 2] * x_fraction1 * y_fraction2 +  \
1216                                 in_row2[in_x2 * components + 2] * x_fraction2 * y_fraction2 + 0.5); \
1217                         if(components == 4) \
1218                                 input4 = (int)(in_row1[in_x1 * components + 3] * x_fraction1 * y_fraction1 +  \
1219                                         in_row1[in_x2 * components + 3] * x_fraction2 * y_fraction1 +  \
1220                                         in_row2[in_x1 * components + 3] * x_fraction1 * y_fraction2 +  \
1221                                         in_row2[in_x2 * components + 3] * x_fraction2 * y_fraction2 + 0.5); \
1222  \
1223                         unsigned int opacity = (int)(master_opacity *  \
1224                                 y_output_fraction *  \
1225                                 x_output_fraction + 0.5); \
1226                         unsigned int transparency = max - opacity; \
1227  \
1228 /* if(opacity != max) printf("TRANSLATE %x %d %d\n", opacity, j, i); */ \
1229  \
1230                         if(components == 3) \
1231                         { \
1232                                 BLEND_3(max, type); \
1233                         } \
1234                         else \
1235                         { \
1236                                 BLEND_4(max, type); \
1237                         } \
1238                 } \
1239         } \
1240 }
1241
1242 void TranslateUnit::process_package(LoadPackage *package)
1243 {
1244         TranslatePackage *pkg = (TranslatePackage*)package;
1245         int out_y1_int;
1246         int out_y2_int;
1247         int out_x1_int;
1248         int out_x2_int;
1249
1250
1251 // Variables for TRANSLATE
1252         VFrame *input = overlay->translate_input;
1253         VFrame *output = overlay->translate_output;
1254         float in_x1 = overlay->translate_in_x1;
1255         float in_y1 = overlay->translate_in_y1;
1256         float in_x2 = overlay->translate_in_x2;
1257         float in_y2 = overlay->translate_in_y2;
1258         float out_x1 = overlay->translate_out_x1;
1259         float out_y1 = overlay->translate_out_y1;
1260         float out_x2 = overlay->translate_out_x2;
1261         float out_y2 = overlay->translate_out_y2;
1262         float alpha = overlay->translate_alpha;
1263         int row1 = pkg->out_row1;
1264         int row2 = pkg->out_row2;
1265         int mode = overlay->translate_mode;
1266
1267         transfer_table *x_table;
1268         transfer_table *y_table;
1269
1270         translation_array(x_table,
1271                 out_x1,
1272                 out_x2,
1273                 in_x1,
1274                 in_x2,
1275                 input->get_w(),
1276                 output->get_w(),
1277                 out_x1_int,
1278                 out_x2_int);
1279         translation_array(y_table,
1280                 out_y1,
1281                 out_y2,
1282                 in_y1,
1283                 in_y2,
1284                 input->get_h(),
1285                 output->get_h(),
1286                 out_y1_int,
1287                 out_y2_int);
1288
1289         switch(overlay->translate_input->get_color_model())
1290         {
1291                 case BC_RGB888:
1292                 case BC_YUV888:
1293                         TRANSLATE(0xff, unsigned char, 3);
1294                         break;
1295
1296                 case BC_RGBA8888:
1297                 case BC_YUVA8888:
1298                         TRANSLATE(0xff, unsigned char, 4);
1299                         break;
1300
1301                 case BC_RGB161616:
1302                 case BC_YUV161616:
1303                         TRANSLATE(0xffff, uint16_t, 3);
1304                         break;
1305
1306                 case BC_RGBA16161616:
1307                 case BC_YUVA16161616:
1308                         TRANSLATE(0xffff, uint16_t, 4);
1309                         break;
1310         }
1311
1312         delete [] x_table;
1313         delete [] y_table;
1314 }
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325 TranslateEngine::TranslateEngine(OverlayFrame *overlay, int cpus)
1326  : LoadServer(cpus, cpus)
1327 {
1328         this->overlay = overlay;
1329 }
1330
1331 TranslateEngine::~TranslateEngine()
1332 {
1333 }
1334
1335 void TranslateEngine::init_packages()
1336 {
1337         int out_y1_int = (int)overlay->translate_out_y1;
1338         int out_y2_int = MIN((int)ceil(overlay->translate_out_y2), overlay->translate_output->get_h());
1339         int out_h = out_y2_int - out_y1_int;
1340
1341         for(int i = 0; i < total_packages; i++)
1342         {
1343                 TranslatePackage *package = (TranslatePackage*)packages[i];
1344                 package->out_row1 = (int)(out_y1_int + out_h /
1345                         total_packages *
1346                         i);
1347                 package->out_row2 = (int)((float)package->out_row1 +
1348                         out_h /
1349                         total_packages);
1350                 if(i >= total_packages - 1)
1351                         package->out_row2 = out_y2_int;
1352         }
1353 }
1354
1355 LoadClient* TranslateEngine::new_client()
1356 {
1357         return new TranslateUnit(this, overlay);
1358 }
1359
1360 LoadPackage* TranslateEngine::new_package()
1361 {
1362         return new TranslatePackage;
1363 }
1364
1365
1366
1367
1368
1369
1370
1371
1372 #define SCALE_TRANSLATE(max, type, components) \
1373 { \
1374         int64_t opacity = (int)(alpha * max + 0.5); \
1375         int64_t transparency = max - opacity; \
1376         int out_w = out_x2 - out_x1; \
1377  \
1378         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1379         { \
1380                 int in_y = y_table[i - out_y1]; \
1381                 type *in_row = (type*)in_rows[in_y] + in_x1 * components; \
1382                 type *out_row = (type*)out_rows[i] + out_x1 * components; \
1383  \
1384 /* X direction is scaled and requires a table lookup */ \
1385                 if(out_w != in_x2 - in_x1) \
1386                 { \
1387                         for(int j = 0; j < out_w; j++) \
1388                         { \
1389                                 int in_x = x_table[j]; \
1390                                 int input1, input2, input3, input4; \
1391                                 type *output = out_row + j * components; \
1392          \
1393                                 input1 = in_row[in_x * components]; \
1394                                 input2 = in_row[in_x * components + 1]; \
1395                                 input3 = in_row[in_x * components + 2]; \
1396                                 if(components == 4) \
1397                                         input4 = in_row[in_x * components + 3]; \
1398          \
1399                                 if(components == 3) \
1400                                 { \
1401                                         BLEND_3(max, type); \
1402                                 } \
1403                                 else \
1404                                 { \
1405                                         BLEND_4(max, type); \
1406                                 } \
1407                         } \
1408                 } \
1409                 else \
1410 /* X direction is not scaled */ \
1411                 { \
1412                         for(int j = 0; j < out_w; j++) \
1413                         { \
1414                                 int input1, input2, input3, input4; \
1415                                 type *output = out_row + j * components; \
1416          \
1417                                 input1 = in_row[j * components]; \
1418                                 input2 = in_row[j * components + 1]; \
1419                                 input3 = in_row[j * components + 2]; \
1420                                 if(components == 4) \
1421                                         input4 = in_row[j * components + 3]; \
1422          \
1423                                 if(components == 3) \
1424                                 { \
1425                                         BLEND_3(max, type); \
1426                                 } \
1427                                 else \
1428                                 { \
1429                                         BLEND_4(max, type); \
1430                                 } \
1431                         } \
1432                 } \
1433         } \
1434 }
1435
1436
1437
1438 ScaleTranslateUnit::ScaleTranslateUnit(ScaleTranslateEngine *server, OverlayFrame *overlay)
1439  : LoadClient(server)
1440 {
1441         this->overlay = overlay;
1442         this->scale_translate = server;
1443 }
1444
1445 ScaleTranslateUnit::~ScaleTranslateUnit()
1446 {
1447 }
1448
1449 void ScaleTranslateUnit::scale_array(int* &table,
1450         int out_x1,
1451         int out_x2,
1452         int in_x1,
1453         int in_x2,
1454         int is_x)
1455 {
1456         float scale = (float)(out_x2 - out_x1) / (in_x2 - in_x1);
1457
1458         table = new int[out_x2 - out_x1];
1459
1460         if(!is_x)
1461         {
1462                 for(int i = 0; i < out_x2 - out_x1; i++)
1463                 {
1464                         table[i] = (int)((float)i / scale + in_x1);
1465                 }
1466         }
1467         else
1468         {
1469                 for(int i = 0; i < out_x2 - out_x1; i++)
1470                 {
1471                         table[i] = (int)((float)i / scale);
1472                 }
1473         }
1474 }
1475
1476
1477 void ScaleTranslateUnit::process_package(LoadPackage *package)
1478 {
1479         ScaleTranslatePackage *pkg = (ScaleTranslatePackage*)package;
1480
1481 // Args for NEAREST_NEIGHBOR_MACRO
1482         VFrame *output = scale_translate->output;
1483         VFrame *input = scale_translate->input;
1484         int in_x1 = scale_translate->in_x1;
1485         int in_y1 = scale_translate->in_y1;
1486         int in_x2 = scale_translate->in_x2;
1487         int in_y2 = scale_translate->in_y2;
1488         int out_x1 = scale_translate->out_x1;
1489         int out_y1 = scale_translate->out_y1;
1490         int out_x2 = scale_translate->out_x2;
1491         int out_y2 = scale_translate->out_y2;
1492         float alpha = scale_translate->alpha;
1493         int mode = scale_translate->mode;
1494
1495         int *x_table;
1496         int *y_table;
1497         unsigned char **in_rows = input->get_rows();
1498         unsigned char **out_rows = output->get_rows();
1499
1500         scale_array(x_table,
1501                 out_x1,
1502                 out_x2,
1503                 in_x1,
1504                 in_x2,
1505                 1);
1506         scale_array(y_table,
1507                 out_y1,
1508                 out_y2,
1509                 in_y1,
1510                 in_y2,
1511                 0);
1512
1513
1514         switch(input->get_color_model())
1515         {
1516                 case BC_RGB888:
1517                 case BC_YUV888:
1518                         SCALE_TRANSLATE(0xff, uint8_t, 3);
1519                         break;
1520
1521                 case BC_RGBA8888:
1522                 case BC_YUVA8888:
1523                         SCALE_TRANSLATE(0xff, uint8_t, 4);
1524                         break;
1525
1526
1527                 case BC_RGB161616:
1528                 case BC_YUV161616:
1529                         SCALE_TRANSLATE(0xffff, uint16_t, 3);
1530                         break;
1531
1532                 case BC_RGBA16161616:
1533                 case BC_YUVA16161616:
1534                         SCALE_TRANSLATE(0xffff, uint16_t, 4);
1535                         break;
1536         }
1537
1538         delete [] x_table;
1539         delete [] y_table;
1540
1541 };
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551 ScaleTranslateEngine::ScaleTranslateEngine(OverlayFrame *overlay, int cpus)
1552  : LoadServer(cpus, cpus)
1553 {
1554         this->overlay = overlay;
1555 }
1556
1557 ScaleTranslateEngine::~ScaleTranslateEngine()
1558 {
1559 }
1560
1561 void ScaleTranslateEngine::init_packages()
1562 {
1563         int out_h = out_y2 - out_y1;
1564
1565         for(int i = 0; i < total_packages; i++)
1566         {
1567                 ScaleTranslatePackage *package = (ScaleTranslatePackage*)packages[i];
1568                 package->out_row1 = (int)(out_y1 + out_h /
1569                         total_packages *
1570                         i);
1571                 package->out_row2 = (int)((float)package->out_row1 +
1572                         out_h /
1573                         total_packages);
1574                 if(i >= total_packages - 1)
1575                         package->out_row2 = out_y2;
1576         }
1577 }
1578
1579 LoadClient* ScaleTranslateEngine::new_client()
1580 {
1581         return new ScaleTranslateUnit(this, overlay);
1582 }
1583
1584 LoadPackage* ScaleTranslateEngine::new_package()
1585 {
1586         return new ScaleTranslatePackage;
1587 }
1588
1589
1590 ScaleTranslatePackage::ScaleTranslatePackage()
1591 {
1592 }
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621 #define BLEND_ONLY(type, max, components) \
1622 { \
1623         int64_t opacity = (int)(alpha * max + 0.5); \
1624         int64_t transparency = max - opacity; \
1625  \
1626         type** output_rows = (type**)output->get_rows(); \
1627         type** input_rows = (type**)input->get_rows(); \
1628         int w = input->get_w(); \
1629         int h = input->get_h(); \
1630  \
1631         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1632         { \
1633                 type* in_row = input_rows[i]; \
1634                 type* output = output_rows[i]; \
1635  \
1636                 for(int j = 0; j < w; j++) \
1637                 { \
1638                         int input1, input2, input3, input4; \
1639                         input1 = in_row[j * components]; \
1640                         input2 = in_row[j * components + 1]; \
1641                         input3 = in_row[j * components + 2]; \
1642                         if(components == 4) input4 = in_row[j * components + 3]; \
1643  \
1644  \
1645                         if(components == 3) \
1646                         { \
1647                                 BLEND_3(max, type); \
1648                         } \
1649                         else \
1650                         { \
1651                                 BLEND_4(max, type); \
1652                         } \
1653  \
1654                         input += components; \
1655                         output += components; \
1656                 } \
1657         } \
1658 }
1659
1660
1661
1662
1663 BlendUnit::BlendUnit(BlendEngine *server, OverlayFrame *overlay)
1664  : LoadClient(server)
1665 {
1666         this->overlay = overlay;
1667         this->blend_engine = server;
1668 }
1669
1670 BlendUnit::~BlendUnit()
1671 {
1672 }
1673
1674 void BlendUnit::process_package(LoadPackage *package)
1675 {
1676         BlendPackage *pkg = (BlendPackage*)package;
1677
1678
1679         VFrame *output = blend_engine->output;
1680         VFrame *input = blend_engine->input;
1681         float alpha = blend_engine->alpha;
1682         int mode = blend_engine->mode;
1683
1684         switch(input->get_color_model())
1685         {
1686                 case BC_RGB888:
1687                 case BC_YUV888:
1688                         BLEND_ONLY(unsigned char, 0xff, 3);
1689                         break;
1690                 case BC_RGBA8888:
1691                 case BC_YUVA8888:
1692                         BLEND_ONLY(unsigned char, 0xff, 4);
1693                         break;
1694                 case BC_RGB161616:
1695                 case BC_YUV161616:
1696                         BLEND_ONLY(uint16_t, 0xffff, 3);
1697                         break;
1698                 case BC_RGBA16161616:
1699                 case BC_YUVA16161616:
1700                         BLEND_ONLY(uint16_t, 0xffff, 4);
1701                         break;
1702         }
1703 }
1704
1705
1706
1707 BlendEngine::BlendEngine(OverlayFrame *overlay, int cpus)
1708  : LoadServer(cpus, cpus)
1709 {
1710         this->overlay = overlay;
1711 }
1712
1713 BlendEngine::~BlendEngine()
1714 {
1715 }
1716
1717 void BlendEngine::init_packages()
1718 {
1719         for(int i = 0; i < total_packages; i++)
1720         {
1721                 BlendPackage *package = (BlendPackage*)packages[i];
1722                 package->out_row1 = (int)(input->get_h() /
1723                         total_packages *
1724                         i);
1725                 package->out_row2 = (int)((float)package->out_row1 +
1726                         input->get_h() /
1727                         total_packages);
1728
1729                 if(i >= total_packages - 1)
1730                         package->out_row2 = input->get_h();
1731         }
1732 }
1733
1734 LoadClient* BlendEngine::new_client()
1735 {
1736         return new BlendUnit(this, overlay);
1737 }
1738
1739 LoadPackage* BlendEngine::new_package()
1740 {
1741         return new BlendPackage;
1742 }
1743
1744
1745 BlendPackage::BlendPackage()
1746 {
1747 }
1748
1749