hvirtual/cinelerra/overlayframe.C.floattable

   1 #include <math.h>
   2 #include <stdio.h>
   3 #include <string.h>
   4 #include <stdint.h>
   5
   6 #include "clip.h"
   7 #include "edl.inc"
   8 #include "mutex.h"
   9 #include "overlayframe.h"
  10 #include "vframe.h"
  11
  12 OverlayFrame::OverlayFrame(int cpus)
  13 {
  14         temp_frame = 0;
  15         blend_engine = 0;
  16         scale_engine = 0;
  17         scaletranslate_engine = 0;
  18         translate_engine = 0;
  19         this->cpus = cpus;
  20 }
  21
  22 OverlayFrame::~OverlayFrame()
  23 {
  24 //printf("OverlayFrame::~OverlayFrame 1\n");
  25         if(temp_frame) delete temp_frame;
  26         if(scale_engine) delete scale_engine;
  27         if(translate_engine) delete translate_engine;
  28         if(blend_engine) delete blend_engine;
  29         if(scaletranslate_engine) delete scaletranslate_engine;
  30 //printf("OverlayFrame::~OverlayFrame 2\n");
  31 }
  32
  33
  34
  35
  36
  37
  38
  39
  40 // Verification:
  41
  42 // (255 * 255 + 0 * 0) / 255 = 255
  43 // (255 * 127 + 255 * (255 - 127)) / 255 = 255
  44
  45 // (65535 * 65535 + 0 * 0) / 65535 = 65535
  46 // (65535 * 32767 + 65535 * (65535 - 32767)) / 65535 = 65535
  47
  48
  49 // Branch prediction 4 U
  50
  51 #define BLEND_3(max, type) \
  52 { \
  53         int64_t r, g, b; \
  54  \
  55 /* if(mode != TRANSFER_NORMAL) printf("BLEND mode = %d\n", mode); */ \
  56         switch(mode) \
  57         { \
  58                 case TRANSFER_DIVIDE: \
  59                         r = output[0] ? (((int64_t)input1 * max) / output[0]) : max; \
  60                         g = output[1] ? (((int64_t)input2 * max) / output[1]) : max; \
  61                         b = output[2] ? (((int64_t)input3 * max) / output[2]) : max; \
  62                         r = (r * opacity + output[0] * transparency) / max; \
  63                         g = (g * opacity + output[1] * transparency) / max; \
  64                         b = (b * opacity + output[2] * transparency) / max; \
  65                         break; \
  66                 case TRANSFER_MULTIPLY: \
  67                         r = ((int64_t)input1 * output[0]) / max; \
  68                         g = ((int64_t)input2 * output[1]) / max; \
  69                         b = ((int64_t)input3 * output[2]) / max; \
  70                         r = (r * opacity + output[0] * transparency) / max; \
  71                         g = (g * opacity + output[1] * transparency) / max; \
  72                         b = (b * opacity + output[2] * transparency) / max; \
  73                         break; \
  74                 case TRANSFER_SUBTRACT: \
  75                         r = (((int64_t)input1 - output[0]) * opacity + output[0] * transparency) / max; \
  76                         g = (((int64_t)input2 - output[1]) * opacity + output[1] * transparency) / max; \
  77                         b = (((int64_t)input3 - output[2]) * opacity + output[2] * transparency) / max; \
  78                         break; \
  79                 case TRANSFER_ADDITION: \
  80                         r = (((int64_t)input1 + output[0]) * opacity + output[0] * transparency) / max; \
  81                         g = (((int64_t)input2 + output[1]) * opacity + output[1] * transparency) / max; \
  82                         b = (((int64_t)input3 + output[2]) * opacity + output[2] * transparency) / max; \
  83                         break; \
  84                 case TRANSFER_REPLACE: \
  85                         r = input1; \
  86                         g = input2; \
  87                         b = input3; \
  88                         break; \
  89                 case TRANSFER_NORMAL: \
  90                         r = ((int64_t)input1 * opacity + output[0] * transparency) / max; \
  91                         g = ((int64_t)input2 * opacity + output[1] * transparency) / max; \
  92                         b = ((int64_t)input3 * opacity + output[2] * transparency) / max; \
  93                         break; \
  94         } \
  95  \
  96         output[0] = (type)CLIP(r, 0, max); \
  97         output[1] = (type)CLIP(g, 0, max); \
  98         output[2] = (type)CLIP(b, 0, max); \
  99 }
 100
 101
 102
 103
 104
 105 // Blending equations are drastically different for 3 and 4 components
 106 #define BLEND_4(max, type) \
 107 { \
 108         int64_t r, g, b, a; \
 109         int64_t pixel_opacity, pixel_transparency; \
 110  \
 111         pixel_opacity = opacity * input4 / max; \
 112         pixel_transparency = (max - pixel_opacity) * output[3] / max; \
 113  \
 114         switch(mode) \
 115         { \
 116                 case TRANSFER_DIVIDE: \
 117                         r = output[0] ? (((int64_t)input1 * max) / output[0]) : max; \
 118                         g = output[1] ? (((int64_t)input2 * max) / output[1]) : max; \
 119                         b = output[2] ? (((int64_t)input3 * max) / output[2]) : max; \
 120                         r = (r * pixel_opacity + output[0] * pixel_transparency) / max; \
 121                         g = (g * pixel_opacity + output[1] * pixel_transparency) / max; \
 122                         b = (b * pixel_opacity + output[2] * pixel_transparency) / max; \
 123                         a = input4 > output[3] ? input4 : output[3]; \
 124                         break; \
 125                 case TRANSFER_MULTIPLY: \
 126                         r = ((int64_t)input1 * output[0]) / max; \
 127                         g = ((int64_t)input2 * output[1]) / max; \
 128                         b = ((int64_t)input3 * output[2]) / max; \
 129                         r = (r * pixel_opacity + output[0] * pixel_transparency) / max; \
 130                         g = (g * pixel_opacity + output[1] * pixel_transparency) / max; \
 131                         b = (b * pixel_opacity + output[2] * pixel_transparency) / max; \
 132                         a = input4 > output[3] ? input4 : output[3]; \
 133                         break; \
 134                 case TRANSFER_SUBTRACT: \
 135                         r = (((int64_t)input1 - output[0]) * pixel_opacity + output[0] * pixel_transparency) / max; \
 136                         g = (((int64_t)input2 - output[1]) * pixel_opacity + output[1] * pixel_transparency) / max; \
 137                         b = (((int64_t)input3 - output[2]) * pixel_opacity + output[2] * pixel_transparency) / max; \
 138                         a = input4 > output[3] ? input4 : output[3]; \
 139                         break; \
 140                 case TRANSFER_ADDITION: \
 141                         r = (((int64_t)input1 + output[0]) * pixel_opacity + output[0] * pixel_transparency) / max; \
 142                         g = (((int64_t)input2 + output[1]) * pixel_opacity + output[1] * pixel_transparency) / max; \
 143                         b = (((int64_t)input3 + output[2]) * pixel_opacity + output[2] * pixel_transparency) / max; \
 144                         a = input4 > output[3] ? input4 : output[3]; \
 145                         break; \
 146                 case TRANSFER_REPLACE: \
 147                         r = input1; \
 148                         g = input2; \
 149                         b = input3; \
 150                         a = input4; \
 151                         break; \
 152                 case TRANSFER_NORMAL: \
 153                         r = ((int64_t)input1 * pixel_opacity + output[0] * pixel_transparency) / max; \
 154                         g = ((int64_t)input2 * pixel_opacity + output[1] * pixel_transparency) / max; \
 155                         b = ((int64_t)input3 * pixel_opacity + output[2] * pixel_transparency) / max; \
 156                         a = input4 > output[3] ? input4 : output[3]; \
 157                         break; \
 158         } \
 159  \
 160         output[0] = (type)CLIP(r, 0, max); \
 161         output[1] = (type)CLIP(g, 0, max); \
 162         output[2] = (type)CLIP(b, 0, max); \
 163         output[3] = (type)a; \
 164 }
 165
 166
 167
 168
 169
 170
 171
 172
 173 // Bicubic algorithm using multiprocessors
 174 // input -> scale nearest integer boundaries -> temp -> translation -> blend -> output
 175
 176 // Nearest neighbor algorithm using multiprocessors for blending
 177 // input -> scale + translate -> blend -> output
 178
 179
 180 int OverlayFrame::overlay(VFrame *output,
 181         VFrame *input,
 182         float in_x1,
 183         float in_y1,
 184         float in_x2,
 185         float in_y2,
 186         float out_x1,
 187         float out_y1,
 188         float out_x2,
 189         float out_y2,
 190         float alpha,       // 0 - 1
 191         int mode,
 192         int interpolation_type)
 193 {
 194         float w_scale = (out_x2 - out_x1) / (in_x2 - in_x1);
 195         float h_scale = (out_y2 - out_y1) / (in_y2 - in_y1);
 196
 197 //printf("OverlayFrame::overlay 1 %d %f\n", mode, alpha);
 198 // Limit values
 199         if(in_x1 < 0)
 200         {
 201                 out_x1 += -in_x1 * w_scale;
 202                 in_x1 = 0;
 203         }
 204         else
 205         if(in_x1 >= input->get_w())
 206         {
 207                 out_x1 -= (in_x1 - input->get_w()) * w_scale;
 208                 in_x1 = input->get_w();
 209         }
 210
 211         if(in_y1 < 0)
 212         {
 213                 out_y1 += -in_y1 * h_scale;
 214                 in_y1 = 0;
 215         }
 216         else
 217         if(in_y1 >= input->get_h())
 218         {
 219                 out_y1 -= (in_y1 - input->get_h()) * h_scale;
 220                 in_y1 = input->get_h();
 221         }
 222
 223         if(in_x2 < 0)
 224         {
 225                 out_x2 += -in_x2 * w_scale;
 226                 in_x2 = 0;
 227         }
 228         else
 229         if(in_x2 >= input->get_w())
 230         {
 231                 out_x2 -= (in_x2 - input->get_w()) * w_scale;
 232                 in_x2 = input->get_w();
 233         }
 234
 235         if(in_y2 < 0)
 236         {
 237                 out_y2 += -in_y2 * h_scale;
 238                 in_y2 = 0;
 239         }
 240         else
 241         if(in_y2 >= input->get_h())
 242         {
 243                 out_y2 -= (in_y2 - input->get_h()) * h_scale;
 244                 in_y2 = input->get_h();
 245         }
 246
 247         if(out_x1 < 0)
 248         {
 249                 in_x1 += -out_x1 / w_scale;
 250                 out_x1 = 0;
 251         }
 252         else
 253         if(out_x1 >= output->get_w())
 254         {
 255                 in_x1 -= (out_x1 - output->get_w()) / w_scale;
 256                 out_x1 = output->get_w();
 257         }
 258
 259         if(out_y1 < 0)
 260         {
 261                 in_y1 += -out_y1 / h_scale;
 262                 out_y1 = 0;
 263         }
 264         else
 265         if(out_y1 >= output->get_h())
 266         {
 267                 in_y1 -= (out_y1 - output->get_h()) / h_scale;
 268                 out_y1 = output->get_h();
 269         }
 270
 271         if(out_x2 < 0)
 272         {
 273                 in_x2 += -out_x2 / w_scale;
 274                 out_x2 = 0;
 275         }
 276         else
 277         if(out_x2 >= output->get_w())
 278         {
 279                 in_x2 -= (out_x2 - output->get_w()) / w_scale;
 280                 out_x2 = output->get_w();
 281         }
 282
 283         if(out_y2 < 0)
 284         {
 285                 in_y2 += -out_y2 / h_scale;
 286                 out_y2 = 0;
 287         }
 288         else
 289         if(out_y2 >= output->get_h())
 290         {
 291                 in_y2 -= (out_y2 - output->get_h()) / h_scale;
 292                 out_y2 = output->get_h();
 293         }
 294
 295
 296
 297
 298
 299         float in_w = in_x2 - in_x1;
 300         float in_h = in_y2 - in_y1;
 301         float out_w = out_x2 - out_x1;
 302         float out_h = out_y2 - out_y1;
 303 // Input for translation operation
 304         VFrame *translation_input = input;
 305
 306
 307
 308 // printf("OverlayFrame::overlay %f %f %f %f -> %f %f %f %f\n", in_x1,
 309 //                      in_y1,
 310 //                      in_x2,
 311 //                      in_y2,
 312 //                      out_x1,
 313 //                      out_y1,
 314 //                      out_x2,
 315 //                      out_y2);
 316
 317
 318
 319
 320
 321 // ****************************************************************************
 322 // Transfer to temp buffer by scaling nearest integer boundaries
 323 // ****************************************************************************
 324         if(interpolation_type != NEAREST_NEIGHBOR &&
 325                 interpolation_type != LINEAR_LINEAR &&
 326                 (!EQUIV(w_scale, 1) || !EQUIV(h_scale, 1)))
 327         {
 328 // Create integer boundaries for interpolation
 329                 int in_x1_int = (int)in_x1;
 330                 int in_y1_int = (int)in_y1;
 331                 int in_x2_int = MIN((int)ceil(in_x2), input->get_w());
 332                 int in_y2_int = MIN((int)ceil(in_y2), input->get_h());
 333
 334 // Dimensions of temp frame.  Integer boundaries scaled.
 335                 int temp_w = (int)ceil(w_scale * (in_x2_int - in_x1_int));
 336                 int temp_h = (int)ceil(h_scale * (in_y2_int - in_y1_int));
 337                 VFrame *scale_output;
 338
 339
 340
 341 #define NO_TRANSLATION1 \
 342         (EQUIV(in_x1, 0) && \
 343         EQUIV(in_y1, 0) && \
 344         EQUIV(out_x1, 0) && \
 345         EQUIV(out_y1, 0) && \
 346         EQUIV(in_x2, in_x2_int) && \
 347         EQUIV(in_y2, in_y2_int) && \
 348         EQUIV(out_x2, temp_w) && \
 349         EQUIV(out_y2, temp_h))
 350
 351
 352 #define NO_BLEND \
 353         (EQUIV(alpha, 1) && \
 354         (mode == TRANSFER_REPLACE || \
 355         (mode == TRANSFER_NORMAL && cmodel_components(input->get_color_model()) == 3)))
 356
 357
 358
 359
 360
 361 // Prepare destination for operation
 362
 363 // No translation and no blending.  The blending operation is built into the
 364 // translation unit but not the scaling unit.
 365 // input -> output
 366                 if(NO_TRANSLATION1 &&
 367                         NO_BLEND)
 368                 {
 369 // printf("OverlayFrame::overlay input -> output\n");
 370
 371                         scale_output = output;
 372                         translation_input = 0;
 373                 }
 374                 else
 375 // If translation or blending
 376 // input -> nearest integer boundary temp
 377                 {
 378                         if(temp_frame &&
 379                                 (temp_frame->get_w() != temp_w ||
 380                                         temp_frame->get_h() != temp_h))
 381                         {
 382                                 delete temp_frame;
 383                                 temp_frame = 0;
 384                         }
 385
 386                         if(!temp_frame)
 387                         {
 388                                 temp_frame = new VFrame(0,
 389                                         temp_w,
 390                                         temp_h,
 391                                         input->get_color_model(),
 392                                         -1);
 393                         }
 394 //printf("OverlayFrame::overlay input -> temp\n");
 395
 396
 397                         temp_frame->clear_frame();
 398
 399 // printf("OverlayFrame::overlay 4 temp_w=%d temp_h=%d\n",
 400 //      temp_w, temp_h);
 401                         scale_output = temp_frame;
 402                         translation_input = scale_output;
 403
 404 // Adjust input coordinates to reflect new scaled coordinates.
 405                         in_x1 = (in_x1 - in_x1_int) * w_scale;
 406                         in_y1 = (in_y1 - in_y1_int) * h_scale;
 407                         in_x2 = (in_x2 - in_x1_int) * w_scale;
 408                         in_y2 = (in_y2 - in_y1_int) * h_scale;
 409                 }
 410
 411
 412
 413 //printf("Overlay 1\n");
 414
 415 // Scale input -> scale_output
 416                 if(!scale_engine) scale_engine = new ScaleEngine(this, cpus);
 417                 scale_engine->scale_output = scale_output;
 418                 scale_engine->scale_input = input;
 419                 scale_engine->w_scale = w_scale;
 420                 scale_engine->h_scale = h_scale;
 421                 scale_engine->in_x1_int = in_x1_int;
 422                 scale_engine->in_y1_int = in_y1_int;
 423                 scale_engine->out_w_int = temp_w;
 424                 scale_engine->out_h_int = temp_h;
 425                 scale_engine->interpolation_type = interpolation_type;
 426 //printf("Overlay 2\n");
 427
 428 //printf("OverlayFrame::overlay ScaleEngine 1 %d\n", out_h_int);
 429                 scale_engine->process_packages();
 430 //printf("OverlayFrame::overlay ScaleEngine 2\n");
 431
 432
 433
 434         }
 435
 436 // printf("OverlayFrame::overlay 1  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
 437 //      in_x1,
 438 //      in_y1,
 439 //      in_x2,
 440 //      in_y2,
 441 //      out_x1,
 442 //      out_y1,
 443 //      out_x2,
 444 //      out_y2);
 445
 446
 447
 448
 449
 450 #define NO_TRANSLATION2 \
 451         (EQUIV(in_x1, 0) && \
 452         EQUIV(in_y1, 0) && \
 453         EQUIV(in_x2, translation_input->get_w()) && \
 454         EQUIV(in_y2, translation_input->get_h()) && \
 455         EQUIV(out_x1, 0) && \
 456         EQUIV(out_y1, 0) && \
 457         EQUIV(out_x2, output->get_w()) && \
 458         EQUIV(out_y2, output->get_h())) \
 459
 460 #define NO_SCALE \
 461         (EQUIV(out_x2 - out_x1, in_x2 - in_x1) && \
 462         EQUIV(out_y2 - out_y1, in_y2 - in_y1))
 463
 464
 465
 466
 467 //printf("OverlayFrame::overlay 4 %d\n", mode);
 468
 469
 470
 471
 472         if(translation_input)
 473         {
 474 // Direct copy
 475                 if( NO_TRANSLATION2 &&
 476                         NO_SCALE &&
 477                         NO_BLEND)
 478                 {
 479 //printf("OverlayFrame::overlay direct copy\n");
 480                         output->copy_from(translation_input);
 481                 }
 482                 else
 483 // Blend only
 484                 if( NO_TRANSLATION2 &&
 485                         NO_SCALE)
 486                 {
 487                         if(!blend_engine) blend_engine = new BlendEngine(this, cpus);
 488
 489
 490                         blend_engine->output = output;
 491                         blend_engine->input = translation_input;
 492                         blend_engine->alpha = alpha;
 493                         blend_engine->mode = mode;
 494
 495                         blend_engine->process_packages();
 496                 }
 497                 else
 498 // Scale and translate using nearest neighbor
 499 // Translation is exactly on integer boundaries
 500                 if(interpolation_type == NEAREST_NEIGHBOR ||
 501                         EQUIV(in_x1, (int)in_x1) &&
 502                         EQUIV(in_y1, (int)in_y1) &&
 503                         EQUIV(in_x2, (int)in_x2) &&
 504                         EQUIV(in_y2, (int)in_y2) &&
 505
 506                         EQUIV(out_x1, (int)out_x1) &&
 507                         EQUIV(out_y1, (int)out_y1) &&
 508                         EQUIV(out_x2, (int)out_x2) &&
 509                         EQUIV(out_y2, (int)out_y2))
 510                 {
 511 //printf("OverlayFrame::overlay NEAREST_NEIGHBOR 1\n");
 512                         if(!scaletranslate_engine) scaletranslate_engine = new ScaleTranslateEngine(this, cpus);
 513
 514
 515                         scaletranslate_engine->output = output;
 516                         scaletranslate_engine->input = translation_input;
 517                         scaletranslate_engine->in_x1 = (int)in_x1;
 518                         scaletranslate_engine->in_y1 = (int)in_y1;
 519                         scaletranslate_engine->in_x2 = (int)in_x2;
 520                         scaletranslate_engine->in_y2 = (int)in_y2;
 521                         scaletranslate_engine->out_x1 = (int)out_x1;
 522                         scaletranslate_engine->out_y1 = (int)out_y1;
 523                         scaletranslate_engine->out_x2 = (int)out_x2;
 524                         scaletranslate_engine->out_y2 = (int)out_y2;
 525                         scaletranslate_engine->alpha = alpha;
 526                         scaletranslate_engine->mode = mode;
 527
 528                         scaletranslate_engine->process_packages();
 529                 }
 530                 else
 531 // Fractional translation
 532                 {
 533 // Use fractional translation
 534 // printf("OverlayFrame::overlay temp -> output  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",
 535 //      in_x1,
 536 //      in_y1,
 537 //      in_x2,
 538 //      in_y2,
 539 //      out_x1,
 540 //      out_y1,
 541 //      out_x2,
 542 //      out_y2);
 543
 544 //printf("Overlay 3\n");
 545                         if(!translate_engine) translate_engine = new TranslateEngine(this, cpus);
 546                         translate_engine->translate_output = output;
 547                         translate_engine->translate_input = translation_input;
 548                         translate_engine->translate_in_x1 = in_x1;
 549                         translate_engine->translate_in_y1 = in_y1;
 550                         translate_engine->translate_in_x2 = in_x2;
 551                         translate_engine->translate_in_y2 = in_y2;
 552                         translate_engine->translate_out_x1 = out_x1;
 553                         translate_engine->translate_out_y1 = out_y1;
 554                         translate_engine->translate_out_x2 = out_x2;
 555                         translate_engine->translate_out_y2 = out_y2;
 556                         translate_engine->translate_alpha = alpha;
 557                         translate_engine->translate_mode = mode;
 558 //printf("Overlay 4\n");
 559
 560 //printf("OverlayFrame::overlay 5 %d\n", mode);
 561                         translate_engine->process_packages();
 562
 563                 }
 564         }
 565 //printf("OverlayFrame::overlay 2\n");
 566
 567         return 0;
 568 }
 569
 570
 571
 572
 573
 574
 575
 576 ScalePackage::ScalePackage()
 577 {
 578 }
 579
 580
 581
 582
 583 ScaleUnit::ScaleUnit(ScaleEngine *server, OverlayFrame *overlay)
 584  : LoadClient(server)
 585 {
 586         this->overlay = overlay;
 587         this->engine = server;
 588 }
 589
 590 ScaleUnit::~ScaleUnit()
 591 {
 592 }
 593
 594
 595
 596 #define BILINEAR(max, type, components) \
 597 { \
 598         float k_y = 1.0 / scale_h; \
 599         float k_x = 1.0 / scale_w; \
 600         type **in_rows = (type**)input->get_rows(); \
 601         type **out_rows = (type**)output->get_rows(); \
 602         type zero_r, zero_g, zero_b, zero_a; \
 603         int in_h_int = input->get_h(); \
 604         int in_w_int = input->get_w(); \
 605         int *table_int_x, *table_int_y; \
 606         float *table_frac_x, *table_antifrac_x, *table_frac_y, *table_antifrac_y; \
 607  \
 608         zero_r = 0; \
 609         zero_g = ((max + 1) >> 1) * (do_yuv); \
 610         zero_b = ((max + 1) >> 1) * (do_yuv); \
 611         if(components == 4) zero_a = 0; \
 612  \
 613         tabulate_blinear(table_int_x, table_frac_x, table_antifrac_x, k_x, 0, out_w_int, in_w_int); \
 614         tabulate_blinear(table_int_y, table_frac_y, table_antifrac_y, k_y, pkg->out_row1, pkg->out_row2, in_h_int); \
 615  \
 616         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
 617         { \
 618                 int i_y = table_int_y[i - pkg->out_row1]; \
 619                 float a = table_frac_y[i - pkg->out_row1]; \
 620         float anti_a = table_antifrac_y[i - pkg->out_row1]; \
 621                 type *in_row1 = in_rows[i_y + in_y1_int]; \
 622                 type *in_row2 = (i_y + in_y1_int < in_h_int - 1) ?  \
 623                         in_rows[i_y + in_y1_int + 1] : \
 624                         0; \
 625                 type *out_row = out_rows[i]; \
 626  \
 627                 for(int j = 0; j < out_w_int; j++) \
 628                 { \
 629                         int i_x = table_int_x[j]; \
 630                         float b = table_frac_x[j]; \
 631                         float anti_b = table_antifrac_x[j]; \
 632                         int x = i_x + in_x1_int; \
 633                         float output1r, output1g, output1b, output1a; \
 634                         float output2r, output2g, output2b, output2a; \
 635                         float output3r, output3g, output3b, output3a; \
 636                         float output4r, output4g, output4b, output4a; \
 637  \
 638                         output1r = in_row1[x * components]; \
 639                         output1g = in_row1[x * components + 1]; \
 640                         output1b = in_row1[x * components + 2]; \
 641                         if(components == 4) output1a = in_row1[x * components + 3]; \
 642  \
 643                         if(x < in_w_int - 1) \
 644                         { \
 645                                 output2r = in_row1[x * components + components]; \
 646                                 output2g = in_row1[x * components + components + 1]; \
 647                                 output2b = in_row1[x * components + components + 2]; \
 648                                 if(components == 4) output2a = in_row1[x * components + components + 3]; \
 649  \
 650                                 if(in_row2) \
 651                                 { \
 652                                         output4r = in_row2[x * components + components]; \
 653                                         output4g = in_row2[x * components + components + 1]; \
 654                                         output4b = in_row2[x * components + components + 2]; \
 655                                         if(components == 4) output4a = in_row2[x * components + components + 3]; \
 656                                 } \
 657                                 else \
 658                                 { \
 659                                         output4r = zero_r; \
 660                                         output4g = zero_g; \
 661                                         output4b = zero_b; \
 662                                         if(components == 4) output4a = zero_a; \
 663                                 } \
 664                         } \
 665                         else \
 666                         { \
 667                                 output2r = zero_r; \
 668                                 output2g = zero_g; \
 669                                 output2b = zero_b; \
 670                                 if(components == 4) output2a = zero_a; \
 671                                 output4r = zero_r; \
 672                                 output4g = zero_g; \
 673                                 output4b = zero_b; \
 674                                 if(components == 4) output4a = zero_a; \
 675                         } \
 676  \
 677                         if(in_row2) \
 678                         { \
 679                                 output3r = in_row2[x * components]; \
 680                                 output3g = in_row2[x * components + 1]; \
 681                                 output3b = in_row2[x * components + 2]; \
 682                                 if(components == 4) output3a = in_row2[x * components + 3]; \
 683                         } \
 684                         else \
 685                         { \
 686                                 output3r = zero_r; \
 687                                 output3g = zero_g; \
 688                                 output3b = zero_b; \
 689                                 if(components == 4) output3a = zero_a; \
 690                         } \
 691  \
 692                         out_row[j * components] =  \
 693                                 (type)((anti_a) * (((anti_b) * output1r) +  \
 694                                 (b * output2r)) +  \
 695                 a * (((anti_b) * output3r) +  \
 696                                 (b * output4r))); \
 697                         out_row[j * components + 1] =   \
 698                                 (type)((anti_a) * (((anti_b) * output1g) +  \
 699                                 (b * output2g)) +  \
 700                 a * (((anti_b) * output3g) +  \
 701                                 (b * output4g))); \
 702                         out_row[j * components + 2] =   \
 703                                 (type)((anti_a) * (((anti_b) * output1b) +  \
 704                                 (b * output2b)) +  \
 705                 a * (((anti_b) * output3b) +  \
 706                                 (b * output4b))); \
 707                         if(components == 4) \
 708                                 out_row[j * components + 3] =   \
 709                                         (type)((anti_a) * (((anti_b) * output1a) +  \
 710                                         (b * output2a)) +  \
 711                         a * (((anti_b) * output3a) +  \
 712                                         (b * output4a))); \
 713                 } \
 714         } \
 715  \
 716  \
 717         delete [] table_int_x; \
 718         delete [] table_frac_x; \
 719         delete [] table_antifrac_x; \
 720         delete [] table_int_y; \
 721         delete [] table_frac_y; \
 722         delete [] table_antifrac_y; \
 723  \
 724 }
 725
 726
 727 #define BICUBIC(max, type, components) \
 728 { \
 729         float k_y = 1.0 / scale_h; \
 730         float k_x = 1.0 / scale_w; \
 731         type **in_rows = (type**)input->get_rows(); \
 732         type **out_rows = (type**)output->get_rows(); \
 733         float *bspline_x, *bspline_y; \
 734         int in_h_int = input->get_h(); \
 735         int in_w_int = input->get_w(); \
 736         type zero_r, zero_g, zero_b, zero_a; \
 737 /* printf("BICUBIC\n"); */ \
 738  \
 739         zero_r = 0; \
 740         zero_b = ((max + 1) >> 1) * (do_yuv); \
 741         zero_g = ((max + 1) >> 1) * (do_yuv); \
 742         if(components == 4) \
 743                 zero_a = 0; \
 744  \
 745         tabulate_bspline(bspline_x,  \
 746                 k_x, \
 747                 out_w_int, \
 748                 -1); \
 749  \
 750         tabulate_bspline(bspline_y,  \
 751                 k_y, \
 752                 out_h_int, \
 753                 1); \
 754  \
 755         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
 756         { \
 757                 int i_y = (int)(k_y * i); \
 758  \
 759  \
 760                 for(int j = 0; j < out_w_int; j++) \
 761                 { \
 762                         int i_x = (int)(k_x * j); \
 763                         float output1, output2, output3, output4; \
 764                         output1 = 0; \
 765                         output2 = 0; \
 766                         output3 = 0; \
 767                         if(components == 4) \
 768                                 output4 = 0; \
 769                         int table_y = i * 4; \
 770  \
 771 /* Kernel */ \
 772                         for(int m = -1; m < 3; m++) \
 773                         { \
 774                                 float r1 = bspline_y[table_y++]; \
 775                                 int y = in_y1_int + i_y + m; \
 776                                 int table_x = j * 4; \
 777  \
 778                                 CLAMP(y, 0, in_h_int - 1); \
 779  \
 780                                 for(int n = -1; n < 3; n++) \
 781                                 { \
 782                                         float r2 = bspline_x[table_x++]; \
 783                                         int x = in_x1_int + i_x + n; \
 784                                         float r_square = r1 * r2; \
 785  \
 786                                         CLAMP(x, 0, in_w_int - 1); \
 787  \
 788                                         output1 += r_square * in_rows[y][x * components]; \
 789                                         output2 += r_square * in_rows[y][x * components + 1]; \
 790                                         output3 += r_square * in_rows[y][x * components + 2]; \
 791                                         if(components == 4) \
 792                                                 output4 += r_square * in_rows[y][x * components + 3]; \
 793                                 } \
 794                         } \
 795  \
 796  \
 797                         out_rows[i][j * components] = (type)output1; \
 798                         out_rows[i][j * components + 1] = (type)output2; \
 799                         out_rows[i][j * components + 2] = (type)output3; \
 800                         if(components == 4) \
 801                                 out_rows[i][j * components + 3] = (type)output4; \
 802  \
 803                 } \
 804         } \
 805  \
 806         delete [] bspline_x; \
 807         delete [] bspline_y; \
 808 }
 809
 810
 811
 812
 813 // Pow function is not thread safe in Compaqt C
 814 #define CUBE(x) ((x) * (x) * (x))
 815
 816 float ScaleUnit::cubic_bspline(float x)
 817 {
 818         float a, b, c, d;
 819
 820         if((x + 2.0F) <= 0.0F)
 821         {
 822         a = 0.0F;
 823         }
 824         else
 825         {
 826         a = CUBE(x + 2.0F);
 827         }
 828
 829
 830         if((x + 1.0F) <= 0.0F)
 831         {
 832         b = 0.0F;
 833         }
 834         else
 835         {
 836         b = CUBE(x + 1.0F);
 837         }
 838
 839         if(x <= 0)
 840         {
 841         c = 0.0F;
 842         }
 843         else
 844         {
 845         c = CUBE(x);
 846         }
 847
 848         if((x - 1.0F) <= 0.0F)
 849         {
 850         d = 0.0F;
 851         }
 852         else
 853         {
 854         d = CUBE(x - 1.0F);
 855         }
 856
 857
 858         return (a - (4.0F * b) + (6.0F * c) - (4.0F * d)) / 6.0;
 859 }
 860
 861
 862 void ScaleUnit::tabulate_bspline(float* &table,
 863         float scale,
 864         int pixels,
 865         float coefficient)
 866 {
 867         table = new float[pixels * 4];
 868         for(int i = 0, j = 0; i < pixels; i++)
 869         {
 870                 float f_x = (float)i * scale;
 871                 float a = f_x - floor(f_x);
 872
 873                 for(float m = -1; m < 3; m++)
 874                 {
 875                         table[j++] = cubic_bspline(coefficient * (m - a));
 876                 }
 877
 878         }
 879 }
 880
 881 void ScaleUnit::tabulate_blinear(int* &table_int,
 882                 float* &table_frac,
 883                 float* &table_antifrac,
 884                 float scale,
 885                 int pixel1,
 886                 int pixel2,
 887                 total_pixels)
 888 {
 889         table_int = new int[pixel2 - pixel1];
 890         table_frac = new float[pixel2 - pixel1];
 891         table_antifrac = new float[pixel2 - pixel1];
 892
 893         for(int i = pixel1, j = 0; i < pixel2; i++, j++)
 894         {
 895                 float f_x = (float)i * scale;
 896                 int i_x = (int)floor(f_x);
 897                 float a = (f_x - floor(f_x));
 898
 899                 table_int[j] = CLAMP(i_x, 0, total_pixels - 1);
 900                 table_frac[j] = a;
 901                 table_antifrac[j] = 1.0F - a;
 902         }
 903 }
 904
 905 void ScaleUnit::process_package(LoadPackage *package)
 906 {
 907         ScalePackage *pkg = (ScalePackage*)package;
 908
 909 //printf("ScaleUnit::process_package 1\n");
 910 // Arguments for macros
 911         VFrame *output = engine->scale_output;
 912         VFrame *input = engine->scale_input;
 913         float scale_w = engine->w_scale;
 914         float scale_h = engine->h_scale;
 915         int in_x1_int = engine->in_x1_int;
 916         int in_y1_int = engine->in_y1_int;
 917         int out_h_int = engine->out_h_int;
 918         int out_w_int = engine->out_w_int;
 919         int do_yuv =
 920                 (input->get_color_model() == BC_YUV888 ||
 921                 input->get_color_model() == BC_YUVA8888 ||
 922                 input->get_color_model() == BC_YUV161616 ||
 923                 input->get_color_model() == BC_YUVA16161616);
 924
 925 //printf("ScaleUnit::process_package 2\n");
 926         if(engine->interpolation_type == CUBIC_CUBIC ||
 927                 (engine->interpolation_type == CUBIC_LINEAR
 928                         && engine->w_scale > 1 &&
 929                         engine->h_scale > 1))
 930         {
 931
 932                 switch(engine->scale_input->get_color_model())
 933                 {
 934                         case BC_RGB888:
 935                         case BC_YUV888:
 936                                 BICUBIC(0xff, unsigned char, 3);
 937                                 break;
 938
 939                         case BC_RGBA8888:
 940                         case BC_YUVA8888:
 941                                 BICUBIC(0xff, unsigned char, 4);
 942                                 break;
 943
 944                         case BC_RGB161616:
 945                         case BC_YUV161616:
 946                                 BICUBIC(0xffff, uint16_t, 3);
 947                                 break;
 948
 949                         case BC_RGBA16161616:
 950                         case BC_YUVA16161616:
 951                                 BICUBIC(0xffff, uint16_t, 4);
 952                                 break;
 953                 }
 954         }
 955         else
 956 // Perform bilinear scaling input -> scale_output
 957         {
 958                 switch(engine->scale_input->get_color_model())
 959                 {
 960                         case BC_RGB888:
 961                         case BC_YUV888:
 962                                 BILINEAR(0xff, unsigned char, 3);
 963                                 break;
 964
 965                         case BC_RGBA8888:
 966                         case BC_YUVA8888:
 967                                 BILINEAR(0xff, unsigned char, 4);
 968                                 break;
 969
 970                         case BC_RGB161616:
 971                         case BC_YUV161616:
 972                                 BILINEAR(0xffff, uint16_t, 3);
 973                                 break;
 974
 975                         case BC_RGBA16161616:
 976                         case BC_YUVA16161616:
 977                                 BILINEAR(0xffff, uint16_t, 4);
 978                                 break;
 979                 }
 980         }
 981 //printf("ScaleUnit::process_package 3\n");
 982
 983 }
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997 ScaleEngine::ScaleEngine(OverlayFrame *overlay, int cpus)
 998  : LoadServer(cpus, cpus)
 999 {
1000         this->overlay = overlay;
1001 }
1002
1003 ScaleEngine::~ScaleEngine()
1004 {
1005 }
1006
1007 void ScaleEngine::init_packages()
1008 {
1009         for(int i = 0; i < total_packages; i++)
1010         {
1011                 ScalePackage *package = (ScalePackage*)packages[i];
1012                 package->out_row1 = out_h_int / total_packages * i;
1013                 package->out_row2 = package->out_row1 + out_h_int / total_packages;
1014
1015                 if(i >= total_packages - 1)
1016                         package->out_row2 = out_h_int;
1017         }
1018 }
1019
1020 LoadClient* ScaleEngine::new_client()
1021 {
1022         return new ScaleUnit(this, overlay);
1023 }
1024
1025 LoadPackage* ScaleEngine::new_package()
1026 {
1027         return new ScalePackage;
1028 }
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042 TranslatePackage::TranslatePackage()
1043 {
1044 }
1045
1046
1047
1048 TranslateUnit::TranslateUnit(TranslateEngine *server, OverlayFrame *overlay)
1049  : LoadClient(server)
1050 {
1051         this->overlay = overlay;
1052         this->engine = server;
1053 }
1054
1055 TranslateUnit::~TranslateUnit()
1056 {
1057 }
1058
1059
1060
1061 void TranslateUnit::translation_array(transfer_table* &table,
1062         float out_x1,
1063         float out_x2,
1064         float in_x1,
1065         float in_x2,
1066         int in_total,
1067         int out_total,
1068         int &out_x1_int,
1069         int &out_x2_int)
1070 {
1071         int out_w_int;
1072         float offset = out_x1 - in_x1;
1073
1074         out_x1_int = (int)out_x1;
1075         out_x2_int = MIN((int)ceil(out_x2), out_total);
1076         out_w_int = out_x2_int - out_x1_int;
1077
1078         table = new transfer_table[out_w_int];
1079         bzero(table, sizeof(transfer_table) * out_w_int);
1080
1081
1082 //printf("OverlayFrame::translation_array 1 %f %f -> %f %f\n", in_x1, in_x2, out_x1, out_x2);
1083
1084         float in_x = in_x1;
1085         for(int out_x = out_x1_int; out_x < out_x2_int; out_x++)
1086         {
1087                 transfer_table *entry = &table[out_x - out_x1_int];
1088
1089                 entry->in_x1 = (int)in_x;
1090                 entry->in_x2 = (int)in_x + 1;
1091
1092 // Get fraction of output pixel to fill
1093                 entry->output_fraction = 1;
1094
1095                 if(out_x1 > out_x)
1096                 {
1097                         entry->output_fraction -= out_x1 - out_x;
1098                 }
1099
1100                 if(out_x2 < out_x + 1)
1101                 {
1102                         entry->output_fraction = (out_x2 - out_x);
1103                 }
1104
1105 // Advance in_x until out_x_fraction is filled
1106                 float out_x_fraction = entry->output_fraction;
1107                 float in_x_fraction = floor(in_x + 1) - in_x;
1108
1109                 if(out_x_fraction <= in_x_fraction)
1110                 {
1111                         entry->in_fraction1 = out_x_fraction;
1112                         entry->in_fraction2 = 0.0;
1113                         in_x += out_x_fraction;
1114                 }
1115                 else
1116                 {
1117                         entry->in_fraction1 = in_x_fraction;
1118                         in_x += out_x_fraction;
1119                         entry->in_fraction2 = in_x - floor(in_x);
1120                 }
1121
1122 // Clip in_x and zero out fraction.  This doesn't work for YUV.
1123                 if(entry->in_x2 >= in_total)
1124                 {
1125                         entry->in_x2 = in_total - 1;
1126                         entry->in_fraction2 = 0.0;
1127                 }
1128
1129                 if(entry->in_x1 >= in_total)
1130                 {
1131                         entry->in_x1 = in_total - 1;
1132                         entry->in_fraction1 = 0.0;
1133                 }
1134 // printf("OverlayFrame::translation_array 2 %d %d %d %f %f %f\n",
1135 //      out_x,
1136 //      entry->in_x1,
1137 //      entry->in_x2,
1138 //      entry->in_fraction1,
1139 //      entry->in_fraction2,
1140 //      entry->output_fraction);
1141         }
1142 }
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177 #define TRANSLATE(max, type, components) \
1178 { \
1179  \
1180         type **in_rows = (type**)input->get_rows(); \
1181         type **out_rows = (type**)output->get_rows(); \
1182  \
1183 /* printf("OverlayFrame::translate 1  %.2f %.2f %.2f %.2f -> %.2f %.2f %.2f %.2f\n",  */ \
1184 /*      (in_x1),  in_y1,  in_x2,  in_y2,  out_x1,  out_y1, out_x2,  out_y2); */ \
1185  \
1186         unsigned int master_opacity = (int)(alpha * max + 0.5); \
1187         unsigned int master_transparency = max - master_opacity; \
1188         type zero_r, zero_g, zero_b, zero_a; \
1189         zero_r = 0; \
1190         zero_b = ((max + 1) >> 1) * (do_yuv); \
1191         zero_g = ((max + 1) >> 1) * (do_yuv); \
1192         if(components == 4) \
1193                 zero_a = 0; \
1194  \
1195 /* printf("TRANSLATE %d\n", mode); */ \
1196  \
1197         for(int i = row1; i < row2; i++) \
1198         { \
1199                 int in_y1 = y_table[i - out_y1_int].in_x1; \
1200                 int in_y2 = y_table[i - out_y1_int].in_x2; \
1201                 float y_fraction1 = y_table[i - out_y1_int].in_fraction1; \
1202                 float y_fraction2 = y_table[i - out_y1_int].in_fraction2; \
1203                 float y_output_fraction = y_table[i - out_y1_int].output_fraction; \
1204                 type *in_row1 = in_rows[(in_y1)]; \
1205                 type *in_row2 = in_rows[(in_y2)]; \
1206                 type *out_row = out_rows[i]; \
1207  \
1208                 for(int j = out_x1_int; j < out_x2_int; j++) \
1209                 { \
1210                         int in_x1 = x_table[j - out_x1_int].in_x1; \
1211                         int in_x2 = x_table[j - out_x1_int].in_x2; \
1212                         float x_fraction1 = x_table[j - out_x1_int].in_fraction1; \
1213                         float x_fraction2 = x_table[j - out_x1_int].in_fraction2; \
1214                         float x_output_fraction = x_table[j - out_x1_int].output_fraction; \
1215                         type *output = &out_row[j * components]; \
1216                         int input1, input2, input3, input4; \
1217  \
1218                         input1 = (int)(in_row1[in_x1 * components] * x_fraction1 * y_fraction1 +  \
1219                                 in_row1[in_x2 * components] * x_fraction2 * y_fraction1 +  \
1220                                 in_row2[in_x1 * components] * x_fraction1 * y_fraction2 +  \
1221                                 in_row2[in_x2 * components] * x_fraction2 * y_fraction2 + 0.5); \
1222                         input2 = (int)(in_row1[in_x1 * components + 1] * x_fraction1 * y_fraction1 +  \
1223                                 in_row1[in_x2 * components + 1] * x_fraction2 * y_fraction1 +  \
1224                                 in_row2[in_x1 * components + 1] * x_fraction1 * y_fraction2 +  \
1225                                 in_row2[in_x2 * components + 1] * x_fraction2 * y_fraction2 + 0.5); \
1226                         input3 = (int)(in_row1[in_x1 * components + 2] * x_fraction1 * y_fraction1 +  \
1227                                 in_row1[in_x2 * components + 2] * x_fraction2 * y_fraction1 +  \
1228                                 in_row2[in_x1 * components + 2] * x_fraction1 * y_fraction2 +  \
1229                                 in_row2[in_x2 * components + 2] * x_fraction2 * y_fraction2 + 0.5); \
1230                         if(components == 4) \
1231                                 input4 = (int)(in_row1[in_x1 * components + 3] * x_fraction1 * y_fraction1 +  \
1232                                         in_row1[in_x2 * components + 3] * x_fraction2 * y_fraction1 +  \
1233                                         in_row2[in_x1 * components + 3] * x_fraction1 * y_fraction2 +  \
1234                                         in_row2[in_x2 * components + 3] * x_fraction2 * y_fraction2 + 0.5); \
1235  \
1236                         unsigned int opacity = (int)(master_opacity *  \
1237                                 y_output_fraction *  \
1238                                 x_output_fraction + 0.5); \
1239                         unsigned int transparency = max - opacity; \
1240  \
1241 /* if(opacity != max) printf("TRANSLATE %x %d %d\n", opacity, j, i); */ \
1242  \
1243                         if(components == 3) \
1244                         { \
1245                                 BLEND_3(max, type); \
1246                         } \
1247                         else \
1248                         { \
1249                                 BLEND_4(max, type); \
1250                         } \
1251                 } \
1252         } \
1253 }
1254
1255 void TranslateUnit::process_package(LoadPackage *package)
1256 {
1257         TranslatePackage *pkg = (TranslatePackage*)package;
1258         int out_y1_int;
1259         int out_y2_int;
1260         int out_x1_int;
1261         int out_x2_int;
1262
1263
1264 // Variables for TRANSLATE
1265         VFrame *input = engine->translate_input;
1266         VFrame *output = engine->translate_output;
1267         float in_x1 = engine->translate_in_x1;
1268         float in_y1 = engine->translate_in_y1;
1269         float in_x2 = engine->translate_in_x2;
1270         float in_y2 = engine->translate_in_y2;
1271         float out_x1 = engine->translate_out_x1;
1272         float out_y1 = engine->translate_out_y1;
1273         float out_x2 = engine->translate_out_x2;
1274         float out_y2 = engine->translate_out_y2;
1275         float alpha = engine->translate_alpha;
1276         int row1 = pkg->out_row1;
1277         int row2 = pkg->out_row2;
1278         int mode = engine->translate_mode;
1279         int in_total_x = input->get_w();
1280         int in_total_y = input->get_h();
1281         int do_yuv =
1282                 (engine->translate_input->get_color_model() == BC_YUV888 ||
1283                 engine->translate_input->get_color_model() == BC_YUVA8888 ||
1284                 engine->translate_input->get_color_model() == BC_YUV161616 ||
1285                 engine->translate_input->get_color_model() == BC_YUVA16161616);
1286
1287         transfer_table *x_table;
1288         transfer_table *y_table;
1289
1290         translation_array(x_table,
1291                 out_x1,
1292                 out_x2,
1293                 in_x1,
1294                 in_x2,
1295                 in_total_x,
1296                 output->get_w(),
1297                 out_x1_int,
1298                 out_x2_int);
1299         translation_array(y_table,
1300                 out_y1,
1301                 out_y2,
1302                 in_y1,
1303                 in_y2,
1304                 in_total_y,
1305                 output->get_h(),
1306                 out_y1_int,
1307                 out_y2_int);
1308
1309         switch(engine->translate_input->get_color_model())
1310         {
1311                 case BC_RGB888:
1312                 case BC_YUV888:
1313                         TRANSLATE(0xff, unsigned char, 3);
1314                         break;
1315
1316                 case BC_RGBA8888:
1317                 case BC_YUVA8888:
1318                         TRANSLATE(0xff, unsigned char, 4);
1319                         break;
1320
1321                 case BC_RGB161616:
1322                 case BC_YUV161616:
1323                         TRANSLATE(0xffff, uint16_t, 3);
1324                         break;
1325
1326                 case BC_RGBA16161616:
1327                 case BC_YUVA16161616:
1328                         TRANSLATE(0xffff, uint16_t, 4);
1329                         break;
1330         }
1331
1332         delete [] x_table;
1333         delete [] y_table;
1334 }
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345 TranslateEngine::TranslateEngine(OverlayFrame *overlay, int cpus)
1346  : LoadServer(cpus, cpus)
1347 {
1348         this->overlay = overlay;
1349 }
1350
1351 TranslateEngine::~TranslateEngine()
1352 {
1353 }
1354
1355 void TranslateEngine::init_packages()
1356 {
1357         int out_y1_int = (int)translate_out_y1;
1358         int out_y2_int = MIN((int)ceil(translate_out_y2), translate_output->get_h());
1359         int out_h = out_y2_int - out_y1_int;
1360
1361         for(int i = 0; i < total_packages; i++)
1362         {
1363                 TranslatePackage *package = (TranslatePackage*)packages[i];
1364                 package->out_row1 = (int)(out_y1_int + out_h /
1365                         total_packages *
1366                         i);
1367                 package->out_row2 = (int)((float)package->out_row1 +
1368                         out_h /
1369                         total_packages);
1370                 if(i >= total_packages - 1)
1371                         package->out_row2 = out_y2_int;
1372         }
1373 }
1374
1375 LoadClient* TranslateEngine::new_client()
1376 {
1377         return new TranslateUnit(this, overlay);
1378 }
1379
1380 LoadPackage* TranslateEngine::new_package()
1381 {
1382         return new TranslatePackage;
1383 }
1384
1385
1386
1387
1388
1389
1390
1391
1392 #define SCALE_TRANSLATE(max, type, components) \
1393 { \
1394         int64_t opacity = (int)(alpha * max + 0.5); \
1395         int64_t transparency = max - opacity; \
1396         int out_w = out_x2 - out_x1; \
1397  \
1398         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1399         { \
1400                 int in_y = y_table[i - out_y1]; \
1401                 type *in_row = (type*)in_rows[in_y] + in_x1 * components; \
1402                 type *out_row = (type*)out_rows[i] + out_x1 * components; \
1403  \
1404 /* X direction is scaled and requires a table lookup */ \
1405                 if(out_w != in_x2 - in_x1) \
1406                 { \
1407                         for(int j = 0; j < out_w; j++) \
1408                         { \
1409                                 int in_x = x_table[j]; \
1410                                 int input1, input2, input3, input4; \
1411                                 type *output = out_row + j * components; \
1412          \
1413                                 input1 = in_row[in_x * components]; \
1414                                 input2 = in_row[in_x * components + 1]; \
1415                                 input3 = in_row[in_x * components + 2]; \
1416                                 if(components == 4) \
1417                                         input4 = in_row[in_x * components + 3]; \
1418          \
1419                                 if(components == 3) \
1420                                 { \
1421                                         BLEND_3(max, type); \
1422                                 } \
1423                                 else \
1424                                 { \
1425                                         BLEND_4(max, type); \
1426                                 } \
1427                         } \
1428                 } \
1429                 else \
1430 /* X direction is not scaled */ \
1431                 { \
1432                         for(int j = 0; j < out_w; j++) \
1433                         { \
1434                                 int input1, input2, input3, input4; \
1435                                 type *output = out_row + j * components; \
1436          \
1437                                 input1 = in_row[j * components]; \
1438                                 input2 = in_row[j * components + 1]; \
1439                                 input3 = in_row[j * components + 2]; \
1440                                 if(components == 4) \
1441                                         input4 = in_row[j * components + 3]; \
1442          \
1443                                 if(components == 3) \
1444                                 { \
1445                                         BLEND_3(max, type); \
1446                                 } \
1447                                 else \
1448                                 { \
1449                                         BLEND_4(max, type); \
1450                                 } \
1451                         } \
1452                 } \
1453         } \
1454 }
1455
1456
1457
1458 ScaleTranslateUnit::ScaleTranslateUnit(ScaleTranslateEngine *server, OverlayFrame *overlay)
1459  : LoadClient(server)
1460 {
1461         this->overlay = overlay;
1462         this->scale_translate = server;
1463 }
1464
1465 ScaleTranslateUnit::~ScaleTranslateUnit()
1466 {
1467 }
1468
1469 void ScaleTranslateUnit::scale_array(int* &table,
1470         int out_x1,
1471         int out_x2,
1472         int in_x1,
1473         int in_x2,
1474         int is_x)
1475 {
1476         float scale = (float)(out_x2 - out_x1) / (in_x2 - in_x1);
1477
1478         table = new int[out_x2 - out_x1];
1479
1480         if(!is_x)
1481         {
1482                 for(int i = 0; i < out_x2 - out_x1; i++)
1483                 {
1484                         table[i] = (int)((float)i / scale + in_x1);
1485                 }
1486         }
1487         else
1488         {
1489                 for(int i = 0; i < out_x2 - out_x1; i++)
1490                 {
1491                         table[i] = (int)((float)i / scale);
1492                 }
1493         }
1494 }
1495
1496
1497 void ScaleTranslateUnit::process_package(LoadPackage *package)
1498 {
1499         ScaleTranslatePackage *pkg = (ScaleTranslatePackage*)package;
1500
1501 // Args for NEAREST_NEIGHBOR_MACRO
1502         VFrame *output = scale_translate->output;
1503         VFrame *input = scale_translate->input;
1504         int in_x1 = scale_translate->in_x1;
1505         int in_y1 = scale_translate->in_y1;
1506         int in_x2 = scale_translate->in_x2;
1507         int in_y2 = scale_translate->in_y2;
1508         int out_x1 = scale_translate->out_x1;
1509         int out_y1 = scale_translate->out_y1;
1510         int out_x2 = scale_translate->out_x2;
1511         int out_y2 = scale_translate->out_y2;
1512         float alpha = scale_translate->alpha;
1513         int mode = scale_translate->mode;
1514
1515         int *x_table;
1516         int *y_table;
1517         unsigned char **in_rows = input->get_rows();
1518         unsigned char **out_rows = output->get_rows();
1519
1520         scale_array(x_table,
1521                 out_x1,
1522                 out_x2,
1523                 in_x1,
1524                 in_x2,
1525                 1);
1526         scale_array(y_table,
1527                 out_y1,
1528                 out_y2,
1529                 in_y1,
1530                 in_y2,
1531                 0);
1532
1533
1534         switch(input->get_color_model())
1535         {
1536                 case BC_RGB888:
1537                 case BC_YUV888:
1538                         SCALE_TRANSLATE(0xff, uint8_t, 3);
1539                         break;
1540
1541                 case BC_RGBA8888:
1542                 case BC_YUVA8888:
1543                         SCALE_TRANSLATE(0xff, uint8_t, 4);
1544                         break;
1545
1546
1547                 case BC_RGB161616:
1548                 case BC_YUV161616:
1549                         SCALE_TRANSLATE(0xffff, uint16_t, 3);
1550                         break;
1551
1552                 case BC_RGBA16161616:
1553                 case BC_YUVA16161616:
1554                         SCALE_TRANSLATE(0xffff, uint16_t, 4);
1555                         break;
1556         }
1557
1558         delete [] x_table;
1559         delete [] y_table;
1560
1561 };
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571 ScaleTranslateEngine::ScaleTranslateEngine(OverlayFrame *overlay, int cpus)
1572  : LoadServer(cpus, cpus)
1573 {
1574         this->overlay = overlay;
1575 }
1576
1577 ScaleTranslateEngine::~ScaleTranslateEngine()
1578 {
1579 }
1580
1581 void ScaleTranslateEngine::init_packages()
1582 {
1583         int out_h = out_y2 - out_y1;
1584
1585         for(int i = 0; i < total_packages; i++)
1586         {
1587                 ScaleTranslatePackage *package = (ScaleTranslatePackage*)packages[i];
1588                 package->out_row1 = (int)(out_y1 + out_h /
1589                         total_packages *
1590                         i);
1591                 package->out_row2 = (int)((float)package->out_row1 +
1592                         out_h /
1593                         total_packages);
1594                 if(i >= total_packages - 1)
1595                         package->out_row2 = out_y2;
1596         }
1597 }
1598
1599 LoadClient* ScaleTranslateEngine::new_client()
1600 {
1601         return new ScaleTranslateUnit(this, overlay);
1602 }
1603
1604 LoadPackage* ScaleTranslateEngine::new_package()
1605 {
1606         return new ScaleTranslatePackage;
1607 }
1608
1609
1610 ScaleTranslatePackage::ScaleTranslatePackage()
1611 {
1612 }
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641 #define BLEND_ONLY(type, max, components) \
1642 { \
1643         int64_t opacity = (int)(alpha * max + 0.5); \
1644         int64_t transparency = max - opacity; \
1645  \
1646         type** output_rows = (type**)output->get_rows(); \
1647         type** input_rows = (type**)input->get_rows(); \
1648         int w = input->get_w(); \
1649         int h = input->get_h(); \
1650  \
1651         for(int i = pkg->out_row1; i < pkg->out_row2; i++) \
1652         { \
1653                 type* in_row = input_rows[i]; \
1654                 type* output = output_rows[i]; \
1655  \
1656                 for(int j = 0; j < w; j++) \
1657                 { \
1658                         int input1, input2, input3, input4; \
1659                         input1 = in_row[j * components]; \
1660                         input2 = in_row[j * components + 1]; \
1661                         input3 = in_row[j * components + 2]; \
1662                         if(components == 4) input4 = in_row[j * components + 3]; \
1663  \
1664  \
1665                         if(components == 3) \
1666                         { \
1667                                 BLEND_3(max, type); \
1668                         } \
1669                         else \
1670                         { \
1671                                 BLEND_4(max, type); \
1672                         } \
1673  \
1674                         input += components; \
1675                         output += components; \
1676                 } \
1677         } \
1678 }
1679
1680
1681
1682
1683 BlendUnit::BlendUnit(BlendEngine *server, OverlayFrame *overlay)
1684  : LoadClient(server)
1685 {
1686         this->overlay = overlay;
1687         this->blend_engine = server;
1688 }
1689
1690 BlendUnit::~BlendUnit()
1691 {
1692 }
1693
1694 void BlendUnit::process_package(LoadPackage *package)
1695 {
1696         BlendPackage *pkg = (BlendPackage*)package;
1697
1698
1699         VFrame *output = blend_engine->output;
1700         VFrame *input = blend_engine->input;
1701         float alpha = blend_engine->alpha;
1702         int mode = blend_engine->mode;
1703
1704         switch(input->get_color_model())
1705         {
1706                 case BC_RGB888:
1707                 case BC_YUV888:
1708                         BLEND_ONLY(unsigned char, 0xff, 3);
1709                         break;
1710                 case BC_RGBA8888:
1711                 case BC_YUVA8888:
1712                         BLEND_ONLY(unsigned char, 0xff, 4);
1713                         break;
1714                 case BC_RGB161616:
1715                 case BC_YUV161616:
1716                         BLEND_ONLY(uint16_t, 0xffff, 3);
1717                         break;
1718                 case BC_RGBA16161616:
1719                 case BC_YUVA16161616:
1720                         BLEND_ONLY(uint16_t, 0xffff, 4);
1721                         break;
1722         }
1723 }
1724
1725
1726
1727 BlendEngine::BlendEngine(OverlayFrame *overlay, int cpus)
1728  : LoadServer(cpus, cpus)
1729 {
1730         this->overlay = overlay;
1731 }
1732
1733 BlendEngine::~BlendEngine()
1734 {
1735 }
1736
1737 void BlendEngine::init_packages()
1738 {
1739         for(int i = 0; i < total_packages; i++)
1740         {
1741                 BlendPackage *package = (BlendPackage*)packages[i];
1742                 package->out_row1 = (int)(input->get_h() /
1743                         total_packages *
1744                         i);
1745                 package->out_row2 = (int)((float)package->out_row1 +
1746                         input->get_h() /
1747                         total_packages);
1748
1749                 if(i >= total_packages - 1)
1750                         package->out_row2 = input->get_h();
1751         }
1752 }
1753
1754 LoadClient* BlendEngine::new_client()
1755 {
1756         return new BlendUnit(this, overlay);
1757 }
1758
1759 LoadPackage* BlendEngine::new_package()
1760 {
1761         return new BlendPackage;
1762 }
1763
1764
1765 BlendPackage::BlendPackage()
1766 {
1767 }
1768
1769