r793: Small API addon, so plugins can 'see' camera and projector automation
[cinelerra_cv/mob.git] / cinelerra / maskengine.C
blobdad6ebf2de6621164200f1a060ae2e08abde5ee3
1 #include "clip.h"
2 #include "condition.h"
3 #include "maskauto.h"
4 #include "maskautos.h"
5 #include "maskengine.h"
6 #include "mutex.h"
7 #include "vframe.h"
9 #include <math.h>
10 #include <stdint.h>
11 #include <string.h>
12 #include <limits.h>
14 #include "feather.h"
17 int64_t get_difference(struct timeval *start_time)
19         struct timeval new_time;
21         gettimeofday(&new_time, 0);
23         new_time.tv_usec -= start_time->tv_usec;
24         new_time.tv_sec -= start_time->tv_sec;
25         if(new_time.tv_usec < 0)
26         {
27                 new_time.tv_usec += 1000000;
28                 new_time.tv_sec--;
29         }
31         return (int64_t)new_time.tv_sec * 1000000 + 
32                 (int64_t)new_time.tv_usec;
38 MaskPackage::MaskPackage()
42 MaskPackage::~MaskPackage()
50 MaskUnit::MaskUnit(MaskEngine *engine)
51  : LoadClient(engine)
53         this->engine = engine;
54         row_spans_h = 0;
55         row_spans = 0;
59 MaskUnit::~MaskUnit()
61         if (row_spans)
62         {
63                 for (int i = 0; i < row_spans_h; i++) 
64                         free(row_spans[i]);
65                 delete [] row_spans;
66         }
69 #ifndef SQR
70 #define SQR(x) ((x) * (x))
71 #endif
75 inline void MaskUnit::draw_line_clamped(
76         int draw_x1, 
77         int draw_y1, 
78         int draw_x2, 
79         int draw_y2,
80         int w,
81         int h,
82         int hoffset)
84 //printf("MaskUnit::draw_line_clamped 1 %d %d %d %d\n", x1, y1, x2, y2);
85         if (draw_y1 == draw_y2) return; 
87         if(draw_y2 < draw_y1)
88         { /* change the order */
89                 int tmp;
90                 tmp = draw_x1;
91                 draw_x1 = draw_x2;
92                 draw_x2 = tmp;
93                 tmp = draw_y1;
94                 draw_y1 = draw_y2;
95                 draw_y2 = tmp;
96         }
98         float slope = ((float)draw_x2 - draw_x1) / ((float)draw_y2 - draw_y1); 
99         w--;
100         for(int y_i = draw_y1; y_i < draw_y2; y_i++) 
101         { 
102                 if (y_i >= h) 
103                         return; // since y gets larger, there is no point in continuing
104                 else if(y_i >= 0) 
105                 { 
106                         int x = (int)(slope * (y_i - draw_y1) + draw_x1); 
107                         int x_i = CLIP(x, 0, w); 
109                         /* now insert into span in order */
110                         short *span = row_spans[y_i + hoffset]; 
111                         if (span[0] >= span[1]) { /* do the reallocation */
112                                 span[1] *= 2;
113                                 span = row_spans[y_i + hoffset] = (short *) realloc (span, span[1] * sizeof(short)); /* be careful! row_spans has to be updated! */
114                         };
116                         short index = 2;
117                         while (index < span[0]  && span[index] < x_i)
118                                 index++;
119                         for (int j = span[0]; j > index; j--) {       // move forward
120                                 span[j] = span[j-1];
121                         }
122                         span[index] = x_i;
123                         span[0] ++;
124                 } 
125         } 
128 template<class T>
129 void MaskUnit::blur_strip(float *val_p, 
130         float *val_m, 
131         float *dst, 
132         float *src, 
133         int size,
134         T max)
136         float *sp_p = src;
137         float *sp_m = src + size - 1;
138         float *vp = val_p;
139         float *vm = val_m + size - 1;
140         float initial_p = sp_p[0];
141         float initial_m = sp_m[0];
143 //printf("MaskUnit::blur_strip %d\n", size);
144         for(int k = 0; k < size; k++)
145         {
146                 int terms = (k < 4) ? k : 4;
147                 int l;
148                 for(l = 0; l <= terms; l++)
149                 {
150                         *vp += n_p[l] * sp_p[-l] - d_p[l] * vp[-l];
151                         *vm += n_m[l] * sp_m[l] - d_m[l] * vm[l];
152                 }
154                 for( ; l <= 4; l++)
155                 {
156                         *vp += (n_p[l] - bd_p[l]) * initial_p;
157                         *vm += (n_m[l] - bd_m[l]) * initial_m;
158                 }
159                 sp_p++;
160                 sp_m--;
161                 vp++;
162                 vm--;
163         }
165         for(int i = 0; i < size; i++)
166         {
167                 float sum = val_p[i] + val_m[i];
168                 CLAMP(sum, 0, max);
169                 dst[i] = sum;
170         }
175 int MaskUnit::do_feather_2(VFrame *output,
176         VFrame *input, 
177         float feather, 
178         int start_out, 
179         int end_out)
181         
182         int fint = (int)feather;
183         DO_FEATHER_N(unsigned char, uint32_t, 0xffff, fint);
188 void MaskUnit::do_feather(VFrame *output,
189         VFrame *input, 
190         float feather, 
191         int start_out, 
192         int end_out)
194 //printf("MaskUnit::do_feather %f\n", feather);
195 // Get constants
196         double constants[8];
197         double div;
198         double std_dev = sqrt(-(double)(feather * feather) / (2 * log(1.0 / 255.0)));
199         div = sqrt(2 * M_PI) * std_dev;
200         constants[0] = -1.783 / std_dev;
201         constants[1] = -1.723 / std_dev;
202         constants[2] = 0.6318 / std_dev;
203         constants[3] = 1.997  / std_dev;
204         constants[4] = 1.6803 / div;
205         constants[5] = 3.735 / div;
206         constants[6] = -0.6803 / div;
207         constants[7] = -0.2598 / div;
209         n_p[0] = constants[4] + constants[6];
210         n_p[1] = exp(constants[1]) *
211                                 (constants[7] * sin(constants[3]) -
212                                 (constants[6] + 2 * constants[4]) * cos(constants[3])) +
213                                 exp(constants[0]) *
214                                 (constants[5] * sin(constants[2]) -
215                                 (2 * constants[6] + constants[4]) * cos(constants[2]));
217         n_p[2] = 2 * exp(constants[0] + constants[1]) *
218                                 ((constants[4] + constants[6]) * cos(constants[3]) * 
219                                 cos(constants[2]) - constants[5] * 
220                                 cos(constants[3]) * sin(constants[2]) -
221                                 constants[7] * cos(constants[2]) * sin(constants[3])) +
222                                 constants[6] * exp(2 * constants[0]) +
223                                 constants[4] * exp(2 * constants[1]);
225         n_p[3] = exp(constants[1] + 2 * constants[0]) *
226                                 (constants[7] * sin(constants[3]) - 
227                                 constants[6] * cos(constants[3])) +
228                                 exp(constants[0] + 2 * constants[1]) *
229                                 (constants[5] * sin(constants[2]) - constants[4] * 
230                                 cos(constants[2]));
231         n_p[4] = 0.0;
233         d_p[0] = 0.0;
234         d_p[1] = -2 * exp(constants[1]) * cos(constants[3]) -
235                                 2 * exp(constants[0]) * cos(constants[2]);
237         d_p[2] = 4 * cos(constants[3]) * cos(constants[2]) * 
238                                 exp(constants[0] + constants[1]) +
239                                 exp(2 * constants[1]) + exp (2 * constants[0]);
241         d_p[3] = -2 * cos(constants[2]) * exp(constants[0] + 2 * constants[1]) -
242                                 2 * cos(constants[3]) * exp(constants[1] + 2 * constants[0]);
244         d_p[4] = exp(2 * constants[0] + 2 * constants[1]);
246         for(int i = 0; i < 5; i++) d_m[i] = d_p[i];
248         n_m[0] = 0.0;
249         for(int i = 1; i <= 4; i++)
250                 n_m[i] = n_p[i] - d_p[i] * n_p[0];
252         double sum_n_p, sum_n_m, sum_d;
253         double a, b;
255         sum_n_p = 0.0;
256         sum_n_m = 0.0;
257         sum_d = 0.0;
258         for(int i = 0; i < 5; i++)
259         {
260                 sum_n_p += n_p[i];
261                 sum_n_m += n_m[i];
262                 sum_d += d_p[i];
263         }
265         a = sum_n_p / (1 + sum_d);
266         b = sum_n_m / (1 + sum_d);
268         for(int i = 0; i < 5; i++)
269         {
270                 bd_p[i] = d_p[i] * a;
271                 bd_m[i] = d_m[i] * b;
272         }
295 #define DO_FEATHER(type, max) \
296 { \
297         int frame_w = input->get_w(); \
298         int frame_h = input->get_h(); \
299         int size = MAX(frame_w, frame_h); \
300         float *src = new float[size]; \
301         float *dst = new float[size]; \
302         float *val_p = new float[size]; \
303         float *val_m = new float[size]; \
304         int start_in = start_out - (int)feather; \
305         int end_in = end_out + (int)feather; \
306         if(start_in < 0) start_in = 0; \
307         if(end_in > frame_h) end_in = frame_h; \
308         int strip_size = end_in - start_in; \
309         type **in_rows = (type**)input->get_rows(); \
310         type **out_rows = (type**)output->get_rows(); \
311         int j; \
313 /* printf("DO_FEATHER 1\n"); */ \
314         for(j = 0; j < frame_w; j++) \
315         { \
316 /* printf("DO_FEATHER 1.1 %d\n", j); */ \
317                 bzero(val_p, sizeof(float) * (end_in - start_in)); \
318                 bzero(val_m, sizeof(float) * (end_in - start_in)); \
319                 for(int l = 0, k = start_in; k < end_in; l++, k++) \
320                 { \
321                         src[l] = (float)in_rows[k][j]; \
322                 } \
324                 blur_strip(val_p, val_m, dst, src, strip_size, max); \
326                 for(int l = start_out - start_in, k = start_out; k < end_out; l++, k++) \
327                 { \
328                         out_rows[k][j] = (type)dst[l]; \
329                 } \
330         } \
332         for(j = start_out; j < end_out; j++) \
333         { \
334 /* printf("DO_FEATHER 2 %d\n", j); */ \
335                 bzero(val_p, sizeof(float) * frame_w); \
336                 bzero(val_m, sizeof(float) * frame_w); \
337                 for(int k = 0; k < frame_w; k++) \
338                 { \
339                         src[k] = (float)out_rows[j][k]; \
340                 } \
342                 blur_strip(val_p, val_m, dst, src, frame_w, max); \
344                 for(int k = 0; k < frame_w; k++) \
345                 { \
346                         out_rows[j][k] = (type)dst[k]; \
347                 } \
348         } \
350 /* printf("DO_FEATHER 3\n"); */ \
352         delete [] src; \
353         delete [] dst; \
354         delete [] val_p; \
355         delete [] val_m; \
356 /* printf("DO_FEATHER 4\n"); */ \
366 //printf("do_feather %d\n", frame->get_color_model());
367         switch(input->get_color_model())
368         {
369                 case BC_A8:
370                         DO_FEATHER(unsigned char, 0xff);
371                         break;
372                 
373                 case BC_A16:
374                         DO_FEATHER(uint16_t, 0xffff);
375                         break;
377                 case BC_A_FLOAT:
378                         DO_FEATHER(float, 1.0f);
379                         break;
380         }
387 void MaskUnit::process_package(LoadPackage *package)
389         MaskPackage *ptr = (MaskPackage*)package;
390         
391         int start_row = SHRT_MIN;         // part for which mask exists
392         int end_row;
393         if(engine->recalculate)
394         {
395                 VFrame *mask;
396 //printf("MaskUnit::process_package 1 %d\n", get_package_number());
397                 if(engine->feather > 0) 
398                         mask = engine->temp_mask;
399                 else
400                         mask = engine->mask;
402                 int mask_w = mask->get_w();
403                 int mask_h = mask->get_h();
404                 int mask_color_model = mask->get_color_model();
405                 int oversampled_package_w = mask_w * OVERSAMPLE;
406                 int oversampled_package_h = (ptr->row2 - ptr->row1) * OVERSAMPLE;
407                 int local_first_nonempty_rowspan = SHRT_MIN;
408                 int local_last_nonempty_rowspan = SHRT_MIN;
410                 if (!row_spans || row_spans_h != mask_h * OVERSAMPLE) {
411                         int i;  
412                         if (row_spans) {   /* size change */
413                                 for (i = 0; i < row_spans_h; i++) 
414                                         free(row_spans[i]);
415                                 delete [] row_spans;
416                         }
417                         row_spans_h = mask_h * OVERSAMPLE;
418                         row_spans = new short *[mask_h * OVERSAMPLE]; 
419                         for (i= 0; i<mask_h * OVERSAMPLE; i++) {
420                                 /* we use malloc so we can use realloc */
421                                 row_spans[i] = (short *)malloc(sizeof(short) * NUM_SPANS);
422                                 /* [0] is initialized later */
423                                 row_spans[i][1] = NUM_SPANS;
424                         }
425                 }
426                 
427 //printf("MaskUnit::process_package 1 %d\n", engine->point_sets.total);
428                 
430 // Draw bezier curves onto span buffer
431 //struct timeval start_time;
432 //gettimeofday(&start_time, 0);
434                 for(int k = 0; k < engine->point_sets.total; k++)
435                 {               
436                         int old_x, old_y;
437                         old_x = SHRT_MIN; // sentinel
438                         ArrayList<MaskPoint*> *points = engine->point_sets.values[k];
440                         if(points->total < 2) continue;
441 //printf("MaskUnit::process_package 2 %d %d\n", k, points->total);
442                         for (int i = ptr->row1 * OVERSAMPLE; i < ptr->row2 * OVERSAMPLE; i++) 
443                                 row_spans[i][0] = 2; /* initialize to zero */ 
444                         (ptr->row1*OVERSAMPLE, ptr->row2*OVERSAMPLE); // init just my rows
445                         for(int i = 0; i < points->total; i++)
446                         {
447                                 MaskPoint *point1 = points->values[i];
448                                 MaskPoint *point2 = (i >= points->total - 1) ? 
449                                         points->values[0] : 
450                                         points->values[i + 1];
452                                 float x0 = point1->x;
453                                 float y0 = point1->y;
454                                 float x1 = point1->x + point1->control_x2;
455                                 float y1 = point1->y + point1->control_y2;
456                                 float x2 = point2->x + point2->control_x1;
457                                 float y2 = point2->y + point2->control_y1;
458                                 float x3 = point2->x;
459                                 float y3 = point2->y;
461                                 // possible optimization here... since these coordinates are bounding box for curve
462                                 // we can continue with next curve if they are out of our range
464                                 // forward differencing bezier curves implementation taken from GPL code at
465                                 // http://cvs.sourceforge.net/viewcvs.py/guliverkli/guliverkli/src/subtitles/Rasterizer.cpp?rev=1.3
469                                 float cx3, cx2, cx1, cx0, cy3, cy2, cy1, cy0;
472                                 // [-1 +3 -3 +1]
473                                 // [+3 -6 +3  0]
474                                 // [-3 +3  0  0]
475                                 // [+1  0  0  0]
477                                 cx3 = (-  x0 + 3*x1 - 3*x2 + x3) * OVERSAMPLE;
478                                 cx2 = ( 3*x0 - 6*x1 + 3*x2) * OVERSAMPLE;
479                                 cx1 = (-3*x0 + 3*x1) * OVERSAMPLE;
480                                 cx0 = (   x0) * OVERSAMPLE;
482                                 cy3 = (-  y0 + 3*y1 - 3*y2 + y3) * OVERSAMPLE;
483                                 cy2 = ( 3*y0 - 6*y1 + 3*y2) * OVERSAMPLE;
484                                 cy1 = (-3*y0 + 3*y1) * OVERSAMPLE;
485                                 cy0 = (   y0 - ptr->row1) * OVERSAMPLE;
487                                 float maxaccel1 = fabs(2*cy2) + fabs(6*cy3);
488                                 float maxaccel2 = fabs(2*cx2) + fabs(6*cx3);
490                                 float maxaccel = maxaccel1 > maxaccel2 ? maxaccel1 : maxaccel2;
491                                 float h = 1.0;
493                                 if(maxaccel > 8.0 * OVERSAMPLE) h = sqrt((8.0 * OVERSAMPLE) / maxaccel);
495                                 for(float t = 0.0; t < 1.0; t += h)
496                                 {
497                                         int x = (int) (cx0 + t*(cx1 + t*(cx2 + t*cx3)));
498                                         int y = (int) (cy0 + t*(cy1 + t*(cy2 + t*cy3)));
500                                         if (old_x != SHRT_MIN) 
501                                                 draw_line_clamped(old_x, old_y, x, y, oversampled_package_w, oversampled_package_h, ptr->row1 * OVERSAMPLE);
502                                         old_x = x;
503                                         old_y = y;
504                                 }
506                                 int x = (int)(x3 * OVERSAMPLE);
507                                 int y = (int)((y3 - ptr->row1) * OVERSAMPLE);
508                                 draw_line_clamped(old_x, old_y, x, y, oversampled_package_w, oversampled_package_h, ptr->row1 * OVERSAMPLE);
509                                 old_x = (int)x;
510                                 old_y = (int)y;
511                 
512                         }
513 //printf("MaskUnit::process_package 1\n");
515                         // Now we have ordered spans ready!
516                         //printf("Segment : %i , row1: %i\n", oversampled_package_h, ptr->row1);
517                         uint16_t value;
518                         if (mask_color_model == BC_A8)
519                                 value = (int)((float)engine->value / 100 * 0xff);
520                         else
521                                 value = (int)((float)engine->value / 100 * 0xffff);     // also for BC_A_FLOAT
523                         /* Scaneline sampling, inspired by Graphics gems I, page 81 */
524                         for (int i = ptr->row1; i < ptr->row2; i++) 
525                         {
526                                 short min_x = SHRT_MAX;
527                                 short max_x = SHRT_MIN;
528                                 int j;                          /* universal counter for 0..OVERSAMPLE-1 */
529                                 short *span;                    /* current span - set inside loops with j */
530                                 short span_p[OVERSAMPLE];       /* pointers to current positions in spans */
531                                 #define P (span_p[j])           /* current span pointer */
532                                 #define MAXP (span[0])          /* current span length */
533                                 int num_empty_spans = 0;
534                                 /* get the initial span pointers ready */
535                                 for (j = 0; j < OVERSAMPLE; j++)
536                                 {       
537                                         span = row_spans[j + i * OVERSAMPLE];
538                                         P = 2;              /* starting pointers to spans */
539                                                 /* hypotetical hypotetical fix goes here: take care that there is maximum one empty span for every subpixel */ 
540                                         if (MAXP != 2) {                                        /* if span is not empty */
541                                                 if (span[2] < min_x) min_x = span[2];           /* take start of the first span */
542                                                 if (span[MAXP-1] > max_x) max_x = span[MAXP-1]; /* and end of last */
543                                         } else              
544                                         {       /* span is empty */
545                                                 num_empty_spans ++;     
546                                         }       
547                                 }
548                                 if (num_empty_spans == OVERSAMPLE)
549                                         continue; /* no work for us here */
550                                 else 
551                                 {       /* if we have engaged first nonempty rowspan... remember it to speed up mask applying */
552                                         if (local_first_nonempty_rowspan < 0 || i < local_first_nonempty_rowspan) 
553                                                 local_first_nonempty_rowspan = i;  
554                                         if (i > local_last_nonempty_rowspan) local_last_nonempty_rowspan = i;
555                                 }
556                                 /* we have some pixels to fill, do coverage calculation for span */
558                                 void *output_row = (unsigned char*)mask->get_rows()[i];
559                                 min_x = min_x / OVERSAMPLE;
560                                 max_x = (max_x + OVERSAMPLE - 1) / OVERSAMPLE;
561                                 
562                                 /* printf("row %i, pixel range: %i %i, spans0: %i\n", i, min_x, max_x, row_spans[i*OVERSAMPLE][0]-2); */
564                                 /* this is not a full loop, since we jump trough h if possible */
565                                 for (int h = min_x; h <= max_x; h++) 
566                                 {
567                                         short pixelleft = h * OVERSAMPLE;  /* leftmost subpixel of pixel*/
568                                         short pixelright = pixelleft + OVERSAMPLE - 1; /* rightmost subpixel of pixel */
569                                         uint32_t coverage = 0;
570                                         int num_left = 0;               /* number of spans that have start left of the next pixel */
571                                         short right_end = SHRT_MAX;     /* leftmost end of any span - right end of a full scanline */
572                                         short right_start = SHRT_MAX;   /* leftmost start of any span - left end of empty scanline */
574                                         for (j=0; j< OVERSAMPLE; j++) 
575                                         {       
576                                                 char chg = 1;
577                                                 span = row_spans[j + i * OVERSAMPLE];
578                                                 while (P < MAXP && chg)
579                                                 {
580                                                 //      printf("Sp: %i %i\n", span[P], span[P+1]);
581                                                         if (span[P] == span[P+1])           /* ignore empty spans */
582                                                         {
583                                                                 P +=2;
584                                                                 continue;
585                                                         }
586                                                         if (span[P] <= pixelright)          /* if span start is before the end of pixel */
587                                                                 coverage += MIN(span[P+1], pixelright)  /* 'clip' the span to pixel */
588                                                                           - MAX(span[P], pixelleft) + 1;
589                                                         if (span[P+1] <= pixelright) 
590                                                                 P += 2;
591                                                         else 
592                                                                 chg = 0;
593                                                 } 
594                                                 if (P == MAXP) 
595                                                         num_left = -OVERSAMPLE; /* just take care that num_left cannot equal OVERSAMPLE or zero again */
596                                                 else    
597                                                 { 
598                                                         if (span[P] <= pixelright)  /* if span starts before subpixel in the pixel on the right */
599                                                         {    /* useful for determining filled space till next non-fully-filled pixel */
600                                                                 num_left ++;                                            
601                                                                 if (span[P+1] < right_end) right_end = span[P+1]; 
602                                                         } else 
603                                                         {    /* useful for determining empty space till next non-empty pixel */
604                                                                 if (span[P] < right_start) right_start = span[P]; 
605                                                         }
606                                                 }
607                                         }
608                                         // calculate coverage
609                                         coverage *= value;
610                                         coverage /= OVERSAMPLE * OVERSAMPLE;
612                                         // when we have multiple masks the highest coverage wins
613                                         switch (mask_color_model)
614                                         {
615                                         case BC_A8:
616                                                 if (((unsigned char *) output_row)[h] < coverage)
617                                                         ((unsigned char*)output_row)[h] = coverage;
618                                                 break;
619                                         case BC_A16:
620                                                 if (((uint16_t *) output_row)[h] < coverage)
621                                                         ((uint16_t *) output_row)[h] = coverage;
622                                                 break;
623                                         case BC_A_FLOAT:
624                                                 if (((float *) output_row)[h] < coverage/float(0xffff))
625                                                         ((float *) output_row)[h] = coverage/float(0xffff);
626                                                 break;
627                                         }
628                                         /* possible optimization: do joining of multiple masks by span logics, not by bitmap logics*/
629                                         
630                                         if (num_left == OVERSAMPLE) 
631                                         {
632                                                 /* all current spans start more left than next pixel */
633                                                 /* this means we can probably (if lucky) draw a longer horizontal line */
634                                                 right_end = (right_end / OVERSAMPLE) - 1; /* last fully covered pixel */
635                                                 if (right_end > h)
636                                                 {
637                                                         if (mask_color_model == BC_A8) 
638                                                                 memset((char *)output_row + h + 1, value, right_end - h);
639                                                         else {
640                                                                 /* we are fucked, since there is no 16bit memset */
641                                                                 if (mask_color_model == BC_A16) {
642                                                                         for (int z = h +1; z <= right_end; z++)
643                                                                                 ((uint16_t *) output_row)[z] =  value;
644                                                                 } else {
645                                                                         for (int z = h +1; z <= right_end; z++)
646                                                                                 ((float *) output_row)[z] =  value/float(0xffff);
647                                                                 }
648                                                         }
649                                                         h = right_end;  
650                                                 }
651                                         } else 
652                                         if (num_left == 0) 
653                                         {
654                                                 /* all current spans start right of next pixel */ 
655                                                 /* this means we can probably (if lucky) skip some pixels */
656                                                 right_start = (right_start / OVERSAMPLE) - 1; /* last fully empty pixel */
657                                                 if (right_start > h)
658                                                 {
659                                                         h = right_start;
660                                                 }
661                                         }
662                                 }
663                                 
664                         }                                       
665                         
666                 }
667                 engine->protect_data.lock();
668                 if (local_first_nonempty_rowspan < engine->first_nonempty_rowspan)
669                         engine->first_nonempty_rowspan = local_first_nonempty_rowspan;
670                 if (local_last_nonempty_rowspan > engine->last_nonempty_rowspan)
671                         engine->last_nonempty_rowspan = local_last_nonempty_rowspan;
672                 engine->protect_data.unlock();
673         
675 //              int64_t dif= get_difference(&start_time);
676 //              printf("diff: %lli\n", dif);
677         }       /* END OF RECALCULATION! */
680         /* possible optimization: this could be useful for do_feather also */
682         // Feather polygon
683         if(engine->recalculate && engine->feather > 0) 
684         {       
685                 /* first take care that all packages are already drawn onto mask */
686                 pthread_mutex_lock(&engine->stage1_finished_mutex);
687                 engine->stage1_finished_count ++;
688                 if (engine->stage1_finished_count == engine->get_total_packages())
689                 {
690                         // let others pass
691                         pthread_cond_broadcast(&engine->stage1_finished_cond);
692                 }
693                 else
694                 {
695                         // wait until all are finished
696                         while (engine->stage1_finished_count < engine->get_total_packages())
697                                 pthread_cond_wait(&engine->stage1_finished_cond, &engine->stage1_finished_mutex);
698                 }
699                 pthread_mutex_unlock(&engine->stage1_finished_mutex);
700                 
701                 /* now do the feather */
702 //printf("MaskUnit::process_package 3 %f\n", engine->feather);
704         struct timeval start_time;
705         gettimeofday(&start_time, 0);
707         /* 
708         {
709         // EXPERIMENTAL CODE to find out how values between old and new do_feather map
710         // create a testcase and find out the closest match between do_feather_2 at 3 and do_feather
711         //                      2       3       4       5       6       7       8       10      13      15
712         // do_feather_2         3       5       7       9       11      13      15      19      25      29
713         // do_feather_1         2.683   3.401   4.139   4.768   5.315   5.819   6.271   7.093   8.170   8.844           
714         // diff                         0.718   0.738   0.629   0.547   0.504   0.452
715         // {(2,2.683),(3,3.401),(4,4.139),(5,4.768),(6,5.315),(7,5.819),(8,6.271),(10,7.093),(13,8.170),(15,8.844)}
716         // use http://mss.math.vanderbilt.edu/cgi-bin/MSSAgent/~pscrooke/MSS/fitpoly.def
717         // for calculating the coefficients
719                 VFrame *df2 = new VFrame (*engine->mask);
720                 VFrame *one_sample = new VFrame(*engine->mask);
721                 do_feather_2(df2, 
722                         engine->temp_mask, 
723                         25, 
724                         ptr->row1, 
725                         ptr->row2);
726                 float ftmp;
727                 for (ftmp = 8.15; ftmp <8.18; ftmp += 0.001) 
728                 {
729                         do_feather(one_sample, 
730                         engine->temp_mask, 
731                         ftmp, 
732                         ptr->row1, 
733                         ptr->row2);
734                         double squarediff = 0;
735                         for (int i=0; i< engine->mask->get_h(); i++)
736                                 for (int j = 0; j< engine->mask->get_w(); j++)
737                                 {
738                                         double v1= ((unsigned char *)one_sample->get_rows()[i])[j];
739                                         double v2= ((unsigned char *)df2->get_rows()[i])[j];
740                                         squarediff += (v1-v2)*(v1-v2);
741                                 }
742                         squarediff = sqrt(squarediff);
743                         printf("for value 3: ftmp: %2.3f, squarediff: %f\n", ftmp, squarediff);
744                 }
745         }
746         */      
747         
748                 int done = 0;
749                 done = do_feather_2(engine->mask,        // try if we have super fast implementation ready
750                                 engine->temp_mask,
751                                 engine->feather * 2 - 1, 
752                                 ptr->row1, 
753                                 ptr->row2);
754                 if (done) {
755                         engine->realfeather = engine->feather;
756                 }
757                 if (!done)
758                 {
759                 //      printf("not done\n");
760                         float feather = engine->feather;
761                         engine->realfeather = 0.878441 + 0.988534*feather - 0.0490204 *feather*feather  + 0.0012359 *feather*feather*feather;
762                         do_feather(engine->mask, 
763                                 engine->temp_mask, 
764                                 engine->realfeather, 
765                                 ptr->row1, 
766                                 ptr->row2); 
767                 }
768                 int64_t dif= get_difference(&start_time);
769                 printf("diff: %lli\n", dif);
770         } else
771         if (engine->feather <= 0) {
772                 engine->realfeather = 0;
773         }
774         start_row = MAX (ptr->row1, engine->first_nonempty_rowspan - (int)ceil(engine->realfeather)); 
775         end_row = MIN (ptr->row2, engine->last_nonempty_rowspan + 1 + (int)ceil(engine->realfeather));
779 // Apply mask
782 /* use the info about first and last column that are coloured from rowspan!  */
783 /* possible optimisation: also remember total spans */
784 /* possible optimisation: lookup for  X * (max - *mask_row) / max, where max is known mask_row and X are variabiles */
785 #define APPLY_MASK_SUBTRACT_ALPHA(type, max, components, do_yuv) \
786 { \
787         type chroma_offset = (max + 1) / 2; \
788         for(int i = start_row; i < end_row; i++) \
789         { \
790         type *output_row = (type*)engine->output->get_rows()[i]; \
791         type *mask_row = (type*)engine->mask->get_rows()[i]; \
792         \
794         for(int j  = 0; j < mask_w; j++) \
795         { \
796                 if(components == 4) \
797                 { \
798                         output_row[3] = output_row[3] * (max - *mask_row) / max; \
799                 } \
800                 else \
801                 { \
802                         output_row[0] = output_row[0] * (max - *mask_row) / max; \
804                         output_row[1] = output_row[1] * (max - *mask_row) / max; \
805                         output_row[2] = output_row[2] * (max - *mask_row) / max; \
807                         if(do_yuv) \
808                         { \
809                                 output_row[1] += chroma_offset * *mask_row / max; \
810                                 output_row[2] += chroma_offset * *mask_row / max; \
811                         } \
812                 } \
813                 output_row += components; \
814                 mask_row += 1;           \
815         } \
816         } \
819 #define APPLY_MASK_MULTIPLY_ALPHA(type, max, components, do_yuv) \
820 { \
821         type chroma_offset = (max + 1) / 2; \
822                 for(int i = ptr->row1; i < ptr->row2; i++) \
823                 { \
824         type *output_row = (type*)engine->output->get_rows()[i]; \
825         type *mask_row = (type*)engine->mask->get_rows()[i]; \
827         if (components == 4) output_row += 3; \
828         for(int j  = mask_w; j != 0;  j--) \
829         { \
830                 if(components == 4) \
831                 { \
832                         *output_row = *output_row * *mask_row / max; \
833                 } \
834                 else \
835                 { \
836                         output_row[0] = output_row[3] * *mask_row / max; \
838                         output_row[1] = output_row[1] * *mask_row / max; \
839                         output_row[2] = output_row[2] * *mask_row / max; \
841                         if(do_yuv) \
842                         { \
843                                 output_row[1] += chroma_offset * (max - *mask_row) / max; \
844                                 output_row[2] += chroma_offset * (max - *mask_row) / max; \
845                         } \
846                 } \
847                 output_row += components; \
848                 mask_row += 1;           \
849         } \
850         } \
854 //struct timeval start_time;
855 //gettimeofday(&start_time, 0);
857 //printf("MaskUnit::process_package 1 %d\n", engine->mode);
858         int mask_w = engine->mask->get_w();
859         switch(engine->mode)
860         {
861                 case MASK_MULTIPLY_ALPHA:
862                         switch(engine->output->get_color_model())
863                         {
864                                 case BC_RGB888:
865                                         APPLY_MASK_MULTIPLY_ALPHA(unsigned char, 0xff, 3, 0);
866                                         break;
867                                 case BC_YUV888:
868                                         APPLY_MASK_MULTIPLY_ALPHA(unsigned char, 0xff, 3, 1);
869                                         break;
870                                 case BC_YUVA8888:
871                                 case BC_RGBA8888:
872                                         APPLY_MASK_MULTIPLY_ALPHA(unsigned char, 0xff, 4, 0);
873                                         break;
874                                 case BC_RGB161616:
875                                         APPLY_MASK_MULTIPLY_ALPHA(uint16_t, 0xffff, 3, 0);
876                                         break;
877                                 case BC_YUV161616:
878                                         APPLY_MASK_MULTIPLY_ALPHA(uint16_t, 0xffff, 3, 1);
879                                         break;
880                                 case BC_YUVA16161616:
881                                 case BC_RGBA16161616:
882                                         APPLY_MASK_MULTIPLY_ALPHA(uint16_t, 0xffff, 4, 0);
883                                         break;
884                                 case BC_RGB_FLOAT:
885                                         APPLY_MASK_MULTIPLY_ALPHA(float, 1.0f, 3, 0);
886                                         break;
887                                 case BC_RGBA_FLOAT:
888                                         APPLY_MASK_MULTIPLY_ALPHA(float, 1.0f, 4, 0);
889                                         break;
890                         }
891                         break;
893                 case MASK_SUBTRACT_ALPHA:
894                         switch(engine->output->get_color_model())
895                         {
896                                 case BC_RGB888:
897                                         APPLY_MASK_SUBTRACT_ALPHA(unsigned char, 0xff, 3, 0);
898                                         break;
899                                 case BC_YUV888:
900                                         APPLY_MASK_SUBTRACT_ALPHA(unsigned char, 0xff, 3, 1);
901                                         break;
902                                 case BC_YUVA8888:
903                                 case BC_RGBA8888:
904                                         APPLY_MASK_SUBTRACT_ALPHA(unsigned char, 0xff, 4, 0);
905                                         break;
906                                 case BC_RGB161616:
907                                         APPLY_MASK_SUBTRACT_ALPHA(uint16_t, 0xffff, 3, 0);
908                                         break;
909                                 case BC_YUV161616:
910                                         APPLY_MASK_SUBTRACT_ALPHA(uint16_t, 0xffff, 3, 1);
911                                         break;
912                                 case BC_YUVA16161616:
913                                 case BC_RGBA16161616:
914                                         APPLY_MASK_SUBTRACT_ALPHA(uint16_t, 0xffff, 4, 0);
915                                         break;
916                                 case BC_RGB_FLOAT:
917                                         APPLY_MASK_SUBTRACT_ALPHA(float, 1.0f, 3, 0);
918                                         break;
919                                 case BC_RGBA_FLOAT:
920                                         APPLY_MASK_SUBTRACT_ALPHA(float, 1.0f, 4, 0);
921                                         break;
922                         }
923                         break;
924         }
925 //      int64_t dif= get_difference(&start_time);
926 //      printf("diff: %lli\n", dif);
927 //printf("diff2: %lli\n", get_difference(&start_time));
928 //printf("MaskUnit::process_package 4 %d\n", get_package_number());
935 MaskEngine::MaskEngine(int cpus)
936  : LoadServer(cpus, cpus )      /* these two HAVE to be the same, since packages communicate  */
937 // : LoadServer(1, 2)
939         mask = 0;
940         pthread_mutex_init(&stage1_finished_mutex, NULL);
941         pthread_cond_init(&stage1_finished_cond, NULL);
944 MaskEngine::~MaskEngine()
946         pthread_cond_destroy(&stage1_finished_cond);
947         pthread_mutex_destroy(&stage1_finished_mutex);
948         if(mask) 
949         {
950                 delete mask;
951                 delete temp_mask;
952         }
953         point_sets.remove_all_objects();
956 int MaskEngine::points_equivalent(ArrayList<MaskPoint*> *new_points, 
957         ArrayList<MaskPoint*> *points)
959 //printf("MaskEngine::points_equivalent %d %d\n", new_points->total, points->total);
960         if(new_points->total != points->total) return 0;
961         
962         for(int i = 0; i < new_points->total; i++)
963         {
964                 if(!(*new_points->values[i] == *points->values[i])) return 0;
965         }
966         
967         return 1;
970 void MaskEngine::do_mask(VFrame *output, 
971         int64_t start_position,
972         double frame_rate,
973         double project_frame_rate,
974         MaskAutos *keyframe_set, 
975         int direction,
976         int before_plugins)
978         int64_t start_position_project = (int64_t)(start_position *
979                 project_frame_rate / 
980                 frame_rate);
981         Auto *current = 0;
982         MaskAuto *default_auto = (MaskAuto*)keyframe_set->default_auto;
983         MaskAuto *keyframe = (MaskAuto*)keyframe_set->get_prev_auto(start_position_project, 
984                 direction,
985                 current);
986         
987         if (keyframe->apply_before_plugins != before_plugins)
988                 return;
991         int total_points = 0;
992         for(int i = 0; i < keyframe->masks.total; i++)
993         {
994                 SubMask *mask = keyframe->get_submask(i);
995                 int submask_points = mask->points.total;
996                 if(submask_points > 1) total_points += submask_points;
997         }
999 //printf("MaskEngine::do_mask 1 %d %d\n", total_points, keyframe->value);
1000 // Ignore certain masks
1001         if(total_points < 2 || 
1002                 (keyframe->value == 0 && default_auto->mode == MASK_SUBTRACT_ALPHA))
1003         {
1004                 return;
1005         }
1007 // Fake certain masks
1008         if(keyframe->value == 0 && default_auto->mode == MASK_MULTIPLY_ALPHA)
1009         {
1010                 output->clear_frame();
1011                 return;
1012         }
1014 //printf("MaskEngine::do_mask 1\n");
1016         int new_color_model = 0;
1017         recalculate = 0;
1018         switch(output->get_color_model())
1019         {
1020                 case BC_RGB888:
1021                 case BC_RGBA8888:
1022                 case BC_YUV888:
1023                 case BC_YUVA8888:
1024                         new_color_model = BC_A8;
1025                         break;
1027                 case BC_RGB161616:
1028                 case BC_RGBA16161616:
1029                 case BC_YUV161616:
1030                 case BC_YUVA16161616:
1031                         new_color_model = BC_A16;
1032                         break;
1034                 case BC_RGB_FLOAT:
1035                 case BC_RGBA_FLOAT:
1036                         new_color_model = BC_A_FLOAT;
1037                         break;
1038         }
1040 // Determine if recalculation is needed
1042         if(mask && 
1043                 (mask->get_w() != output->get_w() ||
1044                 mask->get_h() != output->get_h() ||
1045                 mask->get_color_model() != new_color_model))
1046         {
1047                 delete mask;
1048                 delete temp_mask;
1049                 mask = 0;
1050                 recalculate = 1;
1051         }
1053         if(!recalculate)
1054         {
1055                 if(point_sets.total != keyframe_set->total_submasks(start_position_project, 
1056                         direction))
1057                         recalculate = 1;
1058         }
1060         if(!recalculate)
1061         {
1062                 for(int i = 0; 
1063                         i < keyframe_set->total_submasks(start_position_project, 
1064                                 direction) && !recalculate; 
1065                         i++)
1066                 {
1067                         ArrayList<MaskPoint*> *new_points = new ArrayList<MaskPoint*>;
1068                         keyframe_set->get_points(new_points, 
1069                                 i, 
1070                                 start_position_project, 
1071                                 direction);
1072                         if(!points_equivalent(new_points, point_sets.values[i])) recalculate = 1;
1073                         new_points->remove_all_objects();
1074                 }
1075         }
1077         if(recalculate ||
1078                 !EQUIV(keyframe->feather, feather) ||
1079                 !EQUIV(keyframe->value, value))
1080         {
1081                 recalculate = 1;
1082                 if(!mask) 
1083                 {
1084                         mask = new VFrame(0, 
1085                                         output->get_w(), 
1086                                         output->get_h(),
1087                                         new_color_model);
1088                         temp_mask = new VFrame(0, 
1089                                         output->get_w(), 
1090                                         output->get_h(),
1091                                         new_color_model);
1092                 }
1093                 if(keyframe->feather > 0)
1094                         temp_mask->clear_frame();
1095                 else
1096                         mask->clear_frame();
1097                 point_sets.remove_all_objects();
1099                 for(int i = 0; 
1100                         i < keyframe_set->total_submasks(start_position_project, 
1101                                 direction); 
1102                         i++)
1103                 {
1104                         ArrayList<MaskPoint*> *new_points = new ArrayList<MaskPoint*>;
1105                         keyframe_set->get_points(new_points, 
1106                                 i, 
1107                                 start_position_project, 
1108                                 direction);
1109                         point_sets.append(new_points);
1110                 }
1111         }
1115         this->output = output;
1116         this->mode = default_auto->mode;
1117         this->feather = keyframe->feather;
1118         this->value = keyframe->value;
1121 // Run units
1122         process_packages();
1125 //printf("MaskEngine::do_mask 6\n");
1128 void MaskEngine::init_packages()
1130 //printf("MaskEngine::init_packages 1\n");
1131         int division = (int)((float)output->get_h() / (get_total_packages()) + 0.5);
1132         if(division < 1) division = 1;
1134         stage1_finished_count = 0;
1135         if (recalculate) {
1136                 last_nonempty_rowspan = SHRT_MIN;
1137                 first_nonempty_rowspan = SHRT_MAX;
1138         }
1139 // Always a multiple of 2 packages exist
1140         for(int i = 0; i < get_total_packages(); i++)
1141         {
1142                 MaskPackage *pkg = (MaskPackage*)packages[i];
1143                 pkg->row1 = division * i;
1144                 pkg->row2 = MIN (division * i + division, output->get_h());
1145                 
1146                 if(i == get_total_packages() - 1)  // last package
1147                 {
1148                         pkg->row2 = pkg->row2 = output->get_h();
1149                 }
1151         }
1152 //printf("MaskEngine::init_packages 2\n");
1155 LoadClient* MaskEngine::new_client()
1157         return new MaskUnit(this);
1160 LoadPackage* MaskEngine::new_package()
1162         return new MaskPackage;