r602: Fix baver's code... don't insert timecode when show_tc is not set
[cinelerra_cv/mob.git] / cinelerra / maskengine.C
blob4efc9708ad1d85a5a76464ae51e7292f96ce6f0f
1 #include "clip.h"
2 #include "condition.h"
3 #include "maskauto.h"
4 #include "maskautos.h"
5 #include "maskengine.h"
6 #include "mutex.h"
7 #include "vframe.h"
9 #include <math.h>
10 #include <stdint.h>
11 #include <string.h>
12 #include <limits.h>
14 #include "feather.h"
17 int64_t get_difference(struct timeval *start_time)
19         struct timeval new_time;
21         gettimeofday(&new_time, 0);
23         new_time.tv_usec -= start_time->tv_usec;
24         new_time.tv_sec -= start_time->tv_sec;
25         if(new_time.tv_usec < 0)
26         {
27                 new_time.tv_usec += 1000000;
28                 new_time.tv_sec--;
29         }
31         return (int64_t)new_time.tv_sec * 1000000 + 
32                 (int64_t)new_time.tv_usec;
38 MaskPackage::MaskPackage()
42 MaskPackage::~MaskPackage()
50 MaskUnit::MaskUnit(MaskEngine *engine)
51  : LoadClient(engine)
53         this->engine = engine;
54         row_spans_h = 0;
55         row_spans = 0;
59 MaskUnit::~MaskUnit()
61         if (row_spans)
62         {
63                 for (int i = 0; i < row_spans_h; i++) 
64                         free(row_spans[i]);
65                 delete [] row_spans;
66         }
69 #ifndef SQR
70 #define SQR(x) ((x) * (x))
71 #endif
75 inline void MaskUnit::draw_line_clamped(
76         int draw_x1, 
77         int draw_y1, 
78         int draw_x2, 
79         int draw_y2,
80         int w,
81         int h,
82         int hoffset)
84 //printf("MaskUnit::draw_line_clamped 1 %d %d %d %d\n", x1, y1, x2, y2);
85         if (draw_y1 == draw_y2) return; 
87         if(draw_y2 < draw_y1)
88         { /* change the order */
89                 int tmp;
90                 tmp = draw_x1;
91                 draw_x1 = draw_x2;
92                 draw_x2 = tmp;
93                 tmp = draw_y1;
94                 draw_y1 = draw_y2;
95                 draw_y2 = tmp;
96         }
98         float slope = ((float)draw_x2 - draw_x1) / ((float)draw_y2 - draw_y1); 
99         w--;
100         for(int y_i = draw_y1; y_i < draw_y2; y_i++) 
101         { 
102                 if (y_i >= h) 
103                         return; // since y gets larger, there is no point in continuing
104                 else if(y_i >= 0) 
105                 { 
106                         int x = (int)(slope * (y_i - draw_y1) + draw_x1); 
107                         int x_i = CLIP(x, 0, w); 
109                         /* now insert into span in order */
110                         short *span = row_spans[y_i + hoffset]; 
111                         if (span[0] >= span[1]) { /* do the reallocation */
112                                 span[1] *= 2;
113                                 span = row_spans[y_i + hoffset] = (short *) realloc (span, span[1] * sizeof(short)); /* be careful! row_spans has to be updated! */
114                         };
116                         short index = 2;
117                         while (index < span[0]  && span[index] < x_i)
118                                 index++;
119                         for (int j = span[0]; j > index; j--) {       // move forward
120                                 span[j] = span[j-1];
121                         }
122                         span[index] = x_i;
123                         span[0] ++;
124                 } 
125         } 
128 void MaskUnit::blur_strip(float *val_p, 
129         float *val_m, 
130         float *dst, 
131         float *src, 
132         int size,
133         int max)
135         float *sp_p = src;
136         float *sp_m = src + size - 1;
137         float *vp = val_p;
138         float *vm = val_m + size - 1;
139         float initial_p = sp_p[0];
140         float initial_m = sp_m[0];
142 //printf("MaskUnit::blur_strip %d\n", size);
143         for(int k = 0; k < size; k++)
144         {
145                 int terms = (k < 4) ? k : 4;
146                 int l;
147                 for(l = 0; l <= terms; l++)
148                 {
149                         *vp += n_p[l] * sp_p[-l] - d_p[l] * vp[-l];
150                         *vm += n_m[l] * sp_m[l] - d_m[l] * vm[l];
151                 }
153                 for( ; l <= 4; l++)
154                 {
155                         *vp += (n_p[l] - bd_p[l]) * initial_p;
156                         *vm += (n_m[l] - bd_m[l]) * initial_m;
157                 }
158                 sp_p++;
159                 sp_m--;
160                 vp++;
161                 vm--;
162         }
164         for(int i = 0; i < size; i++)
165         {
166                 float sum = val_p[i] + val_m[i];
167                 CLAMP(sum, 0, max);
168                 dst[i] = sum;
169         }
174 int MaskUnit::do_feather_2(VFrame *output,
175         VFrame *input, 
176         float feather, 
177         int start_out, 
178         int end_out)
180         
181         int fint = (int)feather;
182         DO_FEATHER_N(unsigned char, uint32_t, 0xffff, fint);
187 void MaskUnit::do_feather(VFrame *output,
188         VFrame *input, 
189         float feather, 
190         int start_out, 
191         int end_out)
193 //printf("MaskUnit::do_feather %f\n", feather);
194 // Get constants
195         double constants[8];
196         double div;
197         double std_dev = sqrt(-(double)(feather * feather) / (2 * log(1.0 / 255.0)));
198         div = sqrt(2 * M_PI) * std_dev;
199         constants[0] = -1.783 / std_dev;
200         constants[1] = -1.723 / std_dev;
201         constants[2] = 0.6318 / std_dev;
202         constants[3] = 1.997  / std_dev;
203         constants[4] = 1.6803 / div;
204         constants[5] = 3.735 / div;
205         constants[6] = -0.6803 / div;
206         constants[7] = -0.2598 / div;
208         n_p[0] = constants[4] + constants[6];
209         n_p[1] = exp(constants[1]) *
210                                 (constants[7] * sin(constants[3]) -
211                                 (constants[6] + 2 * constants[4]) * cos(constants[3])) +
212                                 exp(constants[0]) *
213                                 (constants[5] * sin(constants[2]) -
214                                 (2 * constants[6] + constants[4]) * cos(constants[2]));
216         n_p[2] = 2 * exp(constants[0] + constants[1]) *
217                                 ((constants[4] + constants[6]) * cos(constants[3]) * 
218                                 cos(constants[2]) - constants[5] * 
219                                 cos(constants[3]) * sin(constants[2]) -
220                                 constants[7] * cos(constants[2]) * sin(constants[3])) +
221                                 constants[6] * exp(2 * constants[0]) +
222                                 constants[4] * exp(2 * constants[1]);
224         n_p[3] = exp(constants[1] + 2 * constants[0]) *
225                                 (constants[7] * sin(constants[3]) - 
226                                 constants[6] * cos(constants[3])) +
227                                 exp(constants[0] + 2 * constants[1]) *
228                                 (constants[5] * sin(constants[2]) - constants[4] * 
229                                 cos(constants[2]));
230         n_p[4] = 0.0;
232         d_p[0] = 0.0;
233         d_p[1] = -2 * exp(constants[1]) * cos(constants[3]) -
234                                 2 * exp(constants[0]) * cos(constants[2]);
236         d_p[2] = 4 * cos(constants[3]) * cos(constants[2]) * 
237                                 exp(constants[0] + constants[1]) +
238                                 exp(2 * constants[1]) + exp (2 * constants[0]);
240         d_p[3] = -2 * cos(constants[2]) * exp(constants[0] + 2 * constants[1]) -
241                                 2 * cos(constants[3]) * exp(constants[1] + 2 * constants[0]);
243         d_p[4] = exp(2 * constants[0] + 2 * constants[1]);
245         for(int i = 0; i < 5; i++) d_m[i] = d_p[i];
247         n_m[0] = 0.0;
248         for(int i = 1; i <= 4; i++)
249                 n_m[i] = n_p[i] - d_p[i] * n_p[0];
251         double sum_n_p, sum_n_m, sum_d;
252         double a, b;
254         sum_n_p = 0.0;
255         sum_n_m = 0.0;
256         sum_d = 0.0;
257         for(int i = 0; i < 5; i++)
258         {
259                 sum_n_p += n_p[i];
260                 sum_n_m += n_m[i];
261                 sum_d += d_p[i];
262         }
264         a = sum_n_p / (1 + sum_d);
265         b = sum_n_m / (1 + sum_d);
267         for(int i = 0; i < 5; i++)
268         {
269                 bd_p[i] = d_p[i] * a;
270                 bd_m[i] = d_m[i] * b;
271         }
294 #define DO_FEATHER(type, max) \
295 { \
296         int frame_w = input->get_w(); \
297         int frame_h = input->get_h(); \
298         int size = MAX(frame_w, frame_h); \
299         float *src = new float[size]; \
300         float *dst = new float[size]; \
301         float *val_p = new float[size]; \
302         float *val_m = new float[size]; \
303         int start_in = start_out - (int)feather; \
304         int end_in = end_out + (int)feather; \
305         if(start_in < 0) start_in = 0; \
306         if(end_in > frame_h) end_in = frame_h; \
307         int strip_size = end_in - start_in; \
308         type **in_rows = (type**)input->get_rows(); \
309         type **out_rows = (type**)output->get_rows(); \
310         int j; \
312 /* printf("DO_FEATHER 1\n"); */ \
313         for(j = 0; j < frame_w; j++) \
314         { \
315 /* printf("DO_FEATHER 1.1 %d\n", j); */ \
316                 bzero(val_p, sizeof(float) * (end_in - start_in)); \
317                 bzero(val_m, sizeof(float) * (end_in - start_in)); \
318                 for(int l = 0, k = start_in; k < end_in; l++, k++) \
319                 { \
320                         src[l] = (float)in_rows[k][j]; \
321                 } \
323                 blur_strip(val_p, val_m, dst, src, strip_size, max); \
325                 for(int l = start_out - start_in, k = start_out; k < end_out; l++, k++) \
326                 { \
327                         out_rows[k][j] = (type)dst[l]; \
328                 } \
329         } \
331         for(j = start_out; j < end_out; j++) \
332         { \
333 /* printf("DO_FEATHER 2 %d\n", j); */ \
334                 bzero(val_p, sizeof(float) * frame_w); \
335                 bzero(val_m, sizeof(float) * frame_w); \
336                 for(int k = 0; k < frame_w; k++) \
337                 { \
338                         src[k] = (float)out_rows[j][k]; \
339                 } \
341                 blur_strip(val_p, val_m, dst, src, frame_w, max); \
343                 for(int k = 0; k < frame_w; k++) \
344                 { \
345                         out_rows[j][k] = (type)dst[k]; \
346                 } \
347         } \
349 /* printf("DO_FEATHER 3\n"); */ \
351         delete [] src; \
352         delete [] dst; \
353         delete [] val_p; \
354         delete [] val_m; \
355 /* printf("DO_FEATHER 4\n"); */ \
365 //printf("do_feather %d\n", frame->get_color_model());
366         switch(input->get_color_model())
367         {
368                 case BC_A8:
369                         DO_FEATHER(unsigned char, 0xff);
370                         break;
371                 
372                 case BC_A16:
373                         DO_FEATHER(uint16_t, 0xffff);
374                         break;
375         }
382 void MaskUnit::process_package(LoadPackage *package)
384         MaskPackage *ptr = (MaskPackage*)package;
385         
386         int start_row = SHRT_MIN;         // part for which mask exists
387         int end_row;
388         if(engine->recalculate)
389         {
390                 VFrame *mask;
391 //printf("MaskUnit::process_package 1 %d\n", get_package_number());
392                 if(engine->feather > 0) 
393                         mask = engine->temp_mask;
394                 else
395                         mask = engine->mask;
397                 int mask_w = mask->get_w();
398                 int mask_h = mask->get_h();
399                 int mask_color_model = mask->get_color_model();
400                 int oversampled_package_w = mask_w * OVERSAMPLE;
401                 int oversampled_package_h = (ptr->row2 - ptr->row1) * OVERSAMPLE;
402                 int local_first_nonempty_rowspan = SHRT_MIN;
403                 int local_last_nonempty_rowspan = SHRT_MIN;
405                 if (!row_spans || row_spans_h != mask_h * OVERSAMPLE) {
406                         int i;  
407                         if (row_spans) {   /* size change */
408                                 for (i = 0; i < row_spans_h; i++) 
409                                         free(row_spans[i]);
410                                 delete [] row_spans;
411                         }
412                         row_spans_h = mask_h * OVERSAMPLE;
413                         row_spans = new short *[mask_h * OVERSAMPLE]; 
414                         for (i= 0; i<mask_h * OVERSAMPLE; i++) {
415                                 /* we use malloc so we can use realloc */
416                                 row_spans[i] = (short *)malloc(sizeof(short) * NUM_SPANS);
417                                 /* [0] is initialized later */
418                                 row_spans[i][1] = NUM_SPANS;
419                         }
420                 }
421                 
422 //printf("MaskUnit::process_package 1 %d\n", engine->point_sets.total);
423                 
425 // Draw bezier curves onto span buffer
426 //struct timeval start_time;
427 //gettimeofday(&start_time, 0);
429                 for(int k = 0; k < engine->point_sets.total; k++)
430                 {               
431                         int old_x, old_y;
432                         old_x = SHRT_MIN; // sentinel
433                         ArrayList<MaskPoint*> *points = engine->point_sets.values[k];
435                         if(points->total < 2) continue;
436 //printf("MaskUnit::process_package 2 %d %d\n", k, points->total);
437                         for (int i = ptr->row1 * OVERSAMPLE; i < ptr->row2 * OVERSAMPLE; i++) 
438                                 row_spans[i][0] = 2; /* initialize to zero */ 
439                         (ptr->row1*OVERSAMPLE, ptr->row2*OVERSAMPLE); // init just my rows
440                         for(int i = 0; i < points->total; i++)
441                         {
442                                 MaskPoint *point1 = points->values[i];
443                                 MaskPoint *point2 = (i >= points->total - 1) ? 
444                                         points->values[0] : 
445                                         points->values[i + 1];
447                                 float x0 = point1->x;
448                                 float y0 = point1->y;
449                                 float x1 = point1->x + point1->control_x2;
450                                 float y1 = point1->y + point1->control_y2;
451                                 float x2 = point2->x + point2->control_x1;
452                                 float y2 = point2->y + point2->control_y1;
453                                 float x3 = point2->x;
454                                 float y3 = point2->y;
456                                 // possible optimization here... since these coordinates are bounding box for curve
457                                 // we can continue with next curve if they are out of our range
459                                 // forward differencing bezier curves implementation taken from GPL code at
460                                 // http://cvs.sourceforge.net/viewcvs.py/guliverkli/guliverkli/src/subtitles/Rasterizer.cpp?rev=1.3
464                                 float cx3, cx2, cx1, cx0, cy3, cy2, cy1, cy0;
467                                 // [-1 +3 -3 +1]
468                                 // [+3 -6 +3  0]
469                                 // [-3 +3  0  0]
470                                 // [+1  0  0  0]
472                                 cx3 = (-  x0 + 3*x1 - 3*x2 + x3) * OVERSAMPLE;
473                                 cx2 = ( 3*x0 - 6*x1 + 3*x2) * OVERSAMPLE;
474                                 cx1 = (-3*x0 + 3*x1) * OVERSAMPLE;
475                                 cx0 = (   x0) * OVERSAMPLE;
477                                 cy3 = (-  y0 + 3*y1 - 3*y2 + y3) * OVERSAMPLE;
478                                 cy2 = ( 3*y0 - 6*y1 + 3*y2) * OVERSAMPLE;
479                                 cy1 = (-3*y0 + 3*y1) * OVERSAMPLE;
480                                 cy0 = (   y0 - ptr->row1) * OVERSAMPLE;
482                                 float maxaccel1 = fabs(2*cy2) + fabs(6*cy3);
483                                 float maxaccel2 = fabs(2*cx2) + fabs(6*cx3);
485                                 float maxaccel = maxaccel1 > maxaccel2 ? maxaccel1 : maxaccel2;
486                                 float h = 1.0;
488                                 if(maxaccel > 8.0 * OVERSAMPLE) h = sqrt((8.0 * OVERSAMPLE) / maxaccel);
490                                 for(float t = 0.0; t < 1.0; t += h)
491                                 {
492                                         int x = (int) (cx0 + t*(cx1 + t*(cx2 + t*cx3)));
493                                         int y = (int) (cy0 + t*(cy1 + t*(cy2 + t*cy3)));
495                                         if (old_x != SHRT_MIN) 
496                                                 draw_line_clamped(old_x, old_y, x, y, oversampled_package_w, oversampled_package_h, ptr->row1 * OVERSAMPLE);
497                                         old_x = x;
498                                         old_y = y;
499                                 }
501                                 int x = (int)(x3 * OVERSAMPLE);
502                                 int y = (int)((y3 - ptr->row1) * OVERSAMPLE);
503                                 draw_line_clamped(old_x, old_y, x, y, oversampled_package_w, oversampled_package_h, ptr->row1 * OVERSAMPLE);
504                                 old_x = (int)x;
505                                 old_y = (int)y;
506                 
507                         }
508 //printf("MaskUnit::process_package 1\n");
510                         // Now we have ordered spans ready!
511                         //printf("Segment : %i , row1: %i\n", oversampled_package_h, ptr->row1);
512                         uint16_t value;
513                         if (mask_color_model == BC_A8)
514                                 value = (int)((float)engine->value / 100 * 0xff);
515                         else
516                                 value = (int)((float)engine->value / 100 * 0xffff);
518                         /* Scaneline sampling, inspired by Graphics gems I, page 81 */
519                         for (int i = ptr->row1; i < ptr->row2; i++) 
520                         {
521                                 short min_x = SHRT_MAX;
522                                 short max_x = SHRT_MIN;
523                                 int j;                          /* universal counter for 0..OVERSAMPLE-1 */
524                                 short *span;                    /* current span - set inside loops with j */
525                                 short span_p[OVERSAMPLE];       /* pointers to current positions in spans */
526                                 #define P (span_p[j])           /* current span pointer */
527                                 #define MAXP (span[0])          /* current span length */
528                                 int num_empty_spans = 0;
529                                 /* get the initial span pointers ready */
530                                 for (j = 0; j < OVERSAMPLE; j++)
531                                 {       
532                                         span = row_spans[j + i * OVERSAMPLE];
533                                         P = 2;              /* starting pointers to spans */
534                                                 /* hypotetical hypotetical fix goes here: take care that there is maximum one empty span for every subpixel */ 
535                                         if (MAXP != 2) {                                        /* if span is not empty */
536                                                 if (span[2] < min_x) min_x = span[2];           /* take start of the first span */
537                                                 if (span[MAXP-1] > max_x) max_x = span[MAXP-1]; /* and end of last */
538                                         } else              
539                                         {       /* span is empty */
540                                                 num_empty_spans ++;     
541                                         }       
542                                 }
543                                 if (num_empty_spans == OVERSAMPLE)
544                                         continue; /* no work for us here */
545                                 else 
546                                 {       /* if we have engaged first nonempty rowspan... remember it to speed up mask applying */
547                                         if (local_first_nonempty_rowspan < 0 || i < local_first_nonempty_rowspan) 
548                                                 local_first_nonempty_rowspan = i;  
549                                         if (i > local_last_nonempty_rowspan) local_last_nonempty_rowspan = i;
550                                 }
551                                 /* we have some pixels to fill, do coverage calculation for span */
553                                 void *output_row = (unsigned char*)mask->get_rows()[i];
554                                 min_x = min_x / OVERSAMPLE;
555                                 max_x = (max_x + OVERSAMPLE - 1) / OVERSAMPLE;
556                                 
557                                 /* printf("row %i, pixel range: %i %i, spans0: %i\n", i, min_x, max_x, row_spans[i*OVERSAMPLE][0]-2); */
559                                 /* this is not a full loop, since we jump trough h if possible */
560                                 for (int h = min_x; h <= max_x; h++) 
561                                 {
562                                         short pixelleft = h * OVERSAMPLE;  /* leftmost subpixel of pixel*/
563                                         short pixelright = pixelleft + OVERSAMPLE - 1; /* rightmost subpixel of pixel */
564                                         uint32_t coverage = 0;
565                                         int num_left = 0;               /* number of spans that have start left of the next pixel */
566                                         short right_end = SHRT_MAX;     /* leftmost end of any span - right end of a full scanline */
567                                         short right_start = SHRT_MAX;   /* leftmost start of any span - left end of empty scanline */
569                                         for (j=0; j< OVERSAMPLE; j++) 
570                                         {       
571                                                 char chg = 1;
572                                                 span = row_spans[j + i * OVERSAMPLE];
573                                                 while (P < MAXP && chg)
574                                                 {
575                                                 //      printf("Sp: %i %i\n", span[P], span[P+1]);
576                                                         if (span[P] <= pixelright)          /* if span start is before the end of pixel */
577                                                                 coverage += MIN(span[P+1], pixelright)  /* 'clip' the span to pixel */
578                                                                           - MAX(span[P], pixelleft) + 1;
579                                                         if (span[P+1] <= pixelright) 
580                                                                 P += 2;
581                                                         else 
582                                                                 chg = 0;
583                                                 } 
584                                                 if (P == MAXP) 
585                                                         num_left = -OVERSAMPLE; /* just take care that num_left cannot equal OVERSAMPLE or zero again */
586                                                 else    
587                                                 { 
588                                                         if (span[P] <= pixelright)  /* if span starts before subpixel in the pixel on the right */
589                                                         {    /* useful for determining filled space till next non-fully-filled pixel */
590                                                                 num_left ++;                                            
591                                                                 if (span[P+1] < right_end) right_end = span[P+1]; 
592                                                         } else 
593                                                         {    /* useful for determining empty space till next non-empty pixel */
594                                                                 if (span[P] < right_start) right_start = span[P]; 
595                                                         }
596                                                 }
597                                         }
598                                         // calculate coverage
599                                         coverage *= value;
600                                         if(OVERSAMPLE == 8) coverage >>= 6; \
601                                         else \
602                                         if(OVERSAMPLE == 4) coverage >>= 2; \
603                                         else \
604                                         if(OVERSAMPLE == 2) coverage >>= 1; \
605                                         else coverage /= OVERSAMPLE * OVERSAMPLE; \
607                                         
608                                         if (mask_color_model == BC_A8) 
609                                         {
610                                                 if (((unsigned char *) output_row)[h] < coverage) /* when we have multiple masks... we don't want aliasing inside areas */
611                                                         ((unsigned char*)output_row)[h] = coverage;
612                                         } else
613                                         {
614                                                 if (((uint16_t *) output_row)[h] < coverage) /* when we have multiple masks... we don't want aliasing inside areas */
615                                                         ((uint16_t *) output_row)[h] = coverage;
616                                         }
617                                         /* possible optimization: do joining of multiple masks by span logics, not by bitmap logics*/
618                                         
619                                         if (num_left == OVERSAMPLE) 
620                                         {
621                                                 /* all current spans start more left than next pixel */
622                                                 /* this means we can probably (if lucky) draw a longer horizontal line */
623                                                 right_end = (right_end / OVERSAMPLE) - 1; /* last fully covered pixel */
624                                                 if (right_end > h)
625                                                 {
626                                                         if (mask_color_model == BC_A8) 
627                                                                 memset((char *)output_row + h + 1, value, right_end - h);
628                                                         else {
629                                                                 /* we are fucked, since there is no 16bit memset */
630                                                                 for (int z = h +1; z <= right_end; z++)
631                                                                         ((uint16_t *) output_row)[z] =  value;
632                 
633                                                         }
634                                                         h = right_end;  
635                                                 }
636                                         } else 
637                                         if (num_left == 0) 
638                                         {
639                                                 /* all current spans start right of next pixel */ 
640                                                 /* this means we can probably (if lucky) skip some pixels */
641                                                 right_start = (right_start / OVERSAMPLE) - 1; /* last fully empty pixel */
642                                                 if (right_start > h)
643                                                 {
644                                                         h = right_start;
645                                                 }
646                                         }
647                                 }
648                                 
649                         }                                       
650                         
651                 }
652                 engine->protect_data.lock();
653                 if (local_first_nonempty_rowspan < engine->first_nonempty_rowspan)
654                         engine->first_nonempty_rowspan = local_first_nonempty_rowspan;
655                 if (local_last_nonempty_rowspan > engine->last_nonempty_rowspan)
656                         engine->last_nonempty_rowspan = local_last_nonempty_rowspan;
657                 engine->protect_data.unlock();
658         
660 //              int64_t dif= get_difference(&start_time);
661 //              printf("diff: %lli\n", dif);
662         }       /* END OF RECALCULATION! */
665         /* possible optimization: this could be useful for do_feather also */
667         // Feather polygon
668         if(engine->recalculate && engine->feather > 0) 
669         {       
670                 /* first take care that all packages are already drawn onto mask */
671                 pthread_mutex_lock(&engine->stage1_finished_mutex);
672                 engine->stage1_finished_count ++;
673                 if (engine->stage1_finished_count == engine->get_total_packages())
674                 {
675                         // let others pass
676                         pthread_cond_broadcast(&engine->stage1_finished_cond);
677                 }
678                 else
679                 {
680                         // wait until all are finished
681                         while (engine->stage1_finished_count < engine->get_total_packages())
682                                 pthread_cond_wait(&engine->stage1_finished_cond, &engine->stage1_finished_mutex);
683                 }
684                 pthread_mutex_unlock(&engine->stage1_finished_mutex);
685                 
686                 /* now do the feather */
687 //printf("MaskUnit::process_package 3 %f\n", engine->feather);
689         struct timeval start_time;
690         gettimeofday(&start_time, 0);
692         /* 
693         {
694         // EXPERIMENTAL CODE to find out how values between old and new do_feather map
695         // create a testcase and find out the closest match between do_feather_2 at 3 and do_feather
696         //                      2       3       4       5       6       7       8       10      13      15
697         // do_feather_2         3       5       7       9       11      13      15      19      25      29
698         // do_feather_1         2.683   3.401   4.139   4.768   5.315   5.819   6.271   7.093   8.170   8.844           
699         // diff                         0.718   0.738   0.629   0.547   0.504   0.452
700         // {(2,2.683),(3,3.401),(4,4.139),(5,4.768),(6,5.315),(7,5.819),(8,6.271),(10,7.093),(13,8.170),(15,8.844)}
701         // use http://mss.math.vanderbilt.edu/cgi-bin/MSSAgent/~pscrooke/MSS/fitpoly.def
702         // for calculating the coefficients
704                 VFrame *df2 = new VFrame (*engine->mask);
705                 VFrame *one_sample = new VFrame(*engine->mask);
706                 do_feather_2(df2, 
707                         engine->temp_mask, 
708                         25, 
709                         ptr->row1, 
710                         ptr->row2);
711                 float ftmp;
712                 for (ftmp = 8.15; ftmp <8.18; ftmp += 0.001) 
713                 {
714                         do_feather(one_sample, 
715                         engine->temp_mask, 
716                         ftmp, 
717                         ptr->row1, 
718                         ptr->row2);
719                         double squarediff = 0;
720                         for (int i=0; i< engine->mask->get_h(); i++)
721                                 for (int j = 0; j< engine->mask->get_w(); j++)
722                                 {
723                                         double v1= ((unsigned char *)one_sample->get_rows()[i])[j];
724                                         double v2= ((unsigned char *)df2->get_rows()[i])[j];
725                                         squarediff += (v1-v2)*(v1-v2);
726                                 }
727                         squarediff = sqrt(squarediff);
728                         printf("for value 3: ftmp: %2.3f, squarediff: %f\n", ftmp, squarediff);
729                 }
730         }
731         */      
732         
733                 int done = 0;
734                 done = do_feather_2(engine->mask,        // try if we have super fast implementation ready
735                                 engine->temp_mask,
736                                 engine->feather * 2 - 1, 
737                                 ptr->row1, 
738                                 ptr->row2);
739                 if (done) {
740                         engine->realfeather = engine->feather;
741                 }
742                 if (!done)
743                 {
744                 //      printf("not done\n");
745                         float feather = engine->feather;
746                         engine->realfeather = 0.878441 + 0.988534*feather - 0.0490204 *feather*feather  + 0.0012359 *feather*feather*feather;
747                         do_feather(engine->mask, 
748                                 engine->temp_mask, 
749                                 engine->realfeather, 
750                                 ptr->row1, 
751                                 ptr->row2); 
752                 }
753                 int64_t dif= get_difference(&start_time);
754                 printf("diff: %lli\n", dif);
755         } else
756         if (engine->feather <= 0) {
757                 engine->realfeather = 0;
758         }
759         start_row = MAX (ptr->row1, engine->first_nonempty_rowspan - (int)ceil(engine->realfeather)); 
760         end_row = MIN (ptr->row2, engine->last_nonempty_rowspan + 1 + (int)ceil(engine->realfeather));
764 // Apply mask
767 /* use the info about first and last column that are coloured from rowspan!  */
768 /* possible optimisation: also remember total spans */
769 /* possible optimisation: lookup for  X * (max - *mask_row) / max, where max is known mask_row and X are variabiles */
770 #define APPLY_MASK_SUBTRACT_ALPHA(type, max, components, do_yuv) \
771 { \
772         int chroma_offset = (max + 1) / 2; \
773         for(int i = start_row; i < end_row; i++) \
774         { \
775         type *output_row = (type*)engine->output->get_rows()[i]; \
776         type *mask_row = (type*)engine->mask->get_rows()[i]; \
777         \
779         for(int j  = 0; j < mask_w; j++) \
780         { \
781                 if(components == 4) \
782                 { \
783                         output_row[3] = output_row[3] * (max - *mask_row) / max; \
784                 } \
785                 else \
786                 { \
787                         output_row[0] = output_row[0] * (max - *mask_row) / max; \
789                         output_row[1] = output_row[1] * (max - *mask_row) / max; \
790                         output_row[2] = output_row[2] * (max - *mask_row) / max; \
792                         if(do_yuv) \
793                         { \
794                                 output_row[1] += chroma_offset * *mask_row / max; \
795                                 output_row[2] += chroma_offset * *mask_row / max; \
796                         } \
797                 } \
798                 output_row += components; \
799                 mask_row += 1;           \
800         } \
801         } \
804 #define APPLY_MASK_MULTIPLY_ALPHA(type, max, components, do_yuv) \
805 { \
806         int chroma_offset = (max + 1) / 2; \
807                 for(int i = ptr->row1; i < ptr->row2; i++) \
808                 { \
809         type *output_row = (type*)engine->output->get_rows()[i]; \
810         type *mask_row = (type*)engine->mask->get_rows()[i]; \
812         if (components == 4) output_row += 3; \
813         for(int j  = mask_w; j != 0;  j--) \
814         { \
815                 if(components == 4) \
816                 { \
817                         *output_row = *output_row * *mask_row / max; \
818                 } \
819                 else \
820                 { \
821                         output_row[0] = output_row[3] * *mask_row / max; \
823                         output_row[1] = output_row[1] * *mask_row / max; \
824                         output_row[2] = output_row[2] * *mask_row / max; \
826                         if(do_yuv) \
827                         { \
828                                 output_row[1] += chroma_offset * (max - *mask_row) / max; \
829                                 output_row[2] += chroma_offset * (max - *mask_row) / max; \
830                         } \
831                 } \
832                 output_row += components; \
833                 mask_row += 1;           \
834         } \
835         } \
839 //struct timeval start_time;
840 //gettimeofday(&start_time, 0);
842 //printf("MaskUnit::process_package 1 %d\n", engine->mode);
843         int mask_w = engine->mask->get_w();
844         switch(engine->mode)
845         {
846                 case MASK_MULTIPLY_ALPHA:
847                         switch(engine->output->get_color_model())
848                         {
849                                 case BC_RGB888:
850                                         APPLY_MASK_MULTIPLY_ALPHA(unsigned char, 0xff, 3, 0);
851                                         break;
852                                 case BC_YUV888:
853                                         APPLY_MASK_MULTIPLY_ALPHA(unsigned char, 0xff, 3, 1);
854                                         break;
855                                 case BC_YUVA8888:
856                                 case BC_RGBA8888:
857                                         APPLY_MASK_MULTIPLY_ALPHA(unsigned char, 0xff, 4, 0);
858                                         break;
859                                 case BC_RGB161616:
860                                         APPLY_MASK_MULTIPLY_ALPHA(uint16_t, 0xffff, 3, 0);
861                                         break;
862                                 case BC_YUV161616:
863                                         APPLY_MASK_MULTIPLY_ALPHA(uint16_t, 0xffff, 3, 1);
864                                         break;
865                                 case BC_YUVA16161616:
866                                 case BC_RGBA16161616:
867                                         APPLY_MASK_MULTIPLY_ALPHA(uint16_t, 0xffff, 4, 0);
868                                         break;
869                         }
870                         break;
872                 case MASK_SUBTRACT_ALPHA:
873                         switch(engine->output->get_color_model())
874                         {
875                                 case BC_RGB888:
876                                         APPLY_MASK_SUBTRACT_ALPHA(unsigned char, 0xff, 3, 0);
877                                         break;
878                                 case BC_YUV888:
879                                         APPLY_MASK_SUBTRACT_ALPHA(unsigned char, 0xff, 3, 1);
880                                         break;
881                                 case BC_YUVA8888:
882                                 case BC_RGBA8888:
883                                         APPLY_MASK_SUBTRACT_ALPHA(unsigned char, 0xff, 4, 0);
884                                         break;
885                                 case BC_RGB161616:
886                                         APPLY_MASK_SUBTRACT_ALPHA(uint16_t, 0xffff, 3, 0);
887                                         break;
888                                 case BC_YUV161616:
889                                         APPLY_MASK_SUBTRACT_ALPHA(uint16_t, 0xffff, 3, 1);
890                                         break;
891                                 case BC_YUVA16161616:
892                                 case BC_RGBA16161616:
893                                         APPLY_MASK_SUBTRACT_ALPHA(uint16_t, 0xffff, 4, 0);
894                                         break;
895                         }
896                         break;
897         }
898 //      int64_t dif= get_difference(&start_time);
899 //      printf("diff: %lli\n", dif);
900 //printf("diff2: %lli\n", get_difference(&start_time));
901 //printf("MaskUnit::process_package 4 %d\n", get_package_number());
908 MaskEngine::MaskEngine(int cpus)
909  : LoadServer(cpus, cpus )      /* these two HAVE to be the same, since packages communicate  */
910 // : LoadServer(1, 2)
912         mask = 0;
913         pthread_mutex_init(&stage1_finished_mutex, NULL);
914         pthread_cond_init(&stage1_finished_cond, NULL);
917 MaskEngine::~MaskEngine()
919         pthread_cond_destroy(&stage1_finished_cond);
920         pthread_mutex_destroy(&stage1_finished_mutex);
921         if(mask) 
922         {
923                 delete mask;
924                 delete temp_mask;
925         }
926         point_sets.remove_all_objects();
929 int MaskEngine::points_equivalent(ArrayList<MaskPoint*> *new_points, 
930         ArrayList<MaskPoint*> *points)
932 //printf("MaskEngine::points_equivalent %d %d\n", new_points->total, points->total);
933         if(new_points->total != points->total) return 0;
934         
935         for(int i = 0; i < new_points->total; i++)
936         {
937                 if(!(*new_points->values[i] == *points->values[i])) return 0;
938         }
939         
940         return 1;
943 void MaskEngine::do_mask(VFrame *output, 
944         int64_t start_position,
945         double frame_rate,
946         double project_frame_rate,
947         MaskAutos *keyframe_set, 
948         int direction)
950         int64_t start_position_project = (int64_t)(start_position *
951                 project_frame_rate / 
952                 frame_rate);
953         Auto *current = 0;
954         MaskAuto *default_auto = (MaskAuto*)keyframe_set->default_auto;
955         MaskAuto *keyframe = (MaskAuto*)keyframe_set->get_prev_auto(start_position_project, 
956                 direction,
957                 current);
960         int total_points = 0;
961         for(int i = 0; i < keyframe->masks.total; i++)
962         {
963                 SubMask *mask = keyframe->get_submask(i);
964                 int submask_points = mask->points.total;
965                 if(submask_points > 1) total_points += submask_points;
966         }
968 //printf("MaskEngine::do_mask 1 %d %d\n", total_points, keyframe->value);
969 // Ignore certain masks
970         if(total_points < 2 || 
971                 (keyframe->value == 0 && default_auto->mode == MASK_SUBTRACT_ALPHA))
972         {
973                 return;
974         }
976 // Fake certain masks
977         if(keyframe->value == 0 && default_auto->mode == MASK_MULTIPLY_ALPHA)
978         {
979                 output->clear_frame();
980                 return;
981         }
983 //printf("MaskEngine::do_mask 1\n");
985         int new_color_model = 0;
986         recalculate = 0;
987         switch(output->get_color_model())
988         {
989                 case BC_RGB888:
990                 case BC_RGBA8888:
991                 case BC_YUV888:
992                 case BC_YUVA8888:
993                         new_color_model = BC_A8;
994                         break;
996                 case BC_RGB161616:
997                 case BC_RGBA16161616:
998                 case BC_YUV161616:
999                 case BC_YUVA16161616:
1000                         new_color_model = BC_A16;
1001                         break;
1002         }
1004 // Determine if recalculation is needed
1006         if(mask && 
1007                 (mask->get_w() != output->get_w() ||
1008                 mask->get_h() != output->get_h() ||
1009                 mask->get_color_model() != new_color_model))
1010         {
1011                 delete mask;
1012                 delete temp_mask;
1013                 mask = 0;
1014                 recalculate = 1;
1015         }
1017         if(!recalculate)
1018         {
1019                 if(point_sets.total != keyframe_set->total_submasks(start_position_project, 
1020                         direction))
1021                         recalculate = 1;
1022         }
1024         if(!recalculate)
1025         {
1026                 for(int i = 0; 
1027                         i < keyframe_set->total_submasks(start_position_project, 
1028                                 direction) && !recalculate; 
1029                         i++)
1030                 {
1031                         ArrayList<MaskPoint*> *new_points = new ArrayList<MaskPoint*>;
1032                         keyframe_set->get_points(new_points, 
1033                                 i, 
1034                                 start_position_project, 
1035                                 direction);
1036                         if(!points_equivalent(new_points, point_sets.values[i])) recalculate = 1;
1037                         new_points->remove_all_objects();
1038                 }
1039         }
1041         if(recalculate ||
1042                 !EQUIV(keyframe->feather, feather) ||
1043                 !EQUIV(keyframe->value, value))
1044         {
1045                 recalculate = 1;
1046                 if(!mask) 
1047                 {
1048                         mask = new VFrame(0, 
1049                                         output->get_w(), 
1050                                         output->get_h(),
1051                                         new_color_model);
1052                         temp_mask = new VFrame(0, 
1053                                         output->get_w(), 
1054                                         output->get_h(),
1055                                         new_color_model);
1056                 }
1057                 if(keyframe->feather > 0)
1058                         temp_mask->clear_frame();
1059                 else
1060                         mask->clear_frame();
1061                 point_sets.remove_all_objects();
1063                 for(int i = 0; 
1064                         i < keyframe_set->total_submasks(start_position_project, 
1065                                 direction); 
1066                         i++)
1067                 {
1068                         ArrayList<MaskPoint*> *new_points = new ArrayList<MaskPoint*>;
1069                         keyframe_set->get_points(new_points, 
1070                                 i, 
1071                                 start_position_project, 
1072                                 direction);
1073                         point_sets.append(new_points);
1074                 }
1075         }
1079         this->output = output;
1080         this->mode = default_auto->mode;
1081         this->feather = keyframe->feather;
1082         this->value = keyframe->value;
1085 // Run units
1086         process_packages();
1089 //printf("MaskEngine::do_mask 6\n");
1092 void MaskEngine::init_packages()
1094 //printf("MaskEngine::init_packages 1\n");
1095         int division = (int)((float)output->get_h() / (get_total_packages()) + 0.5);
1096         if(division < 1) division = 1;
1098         stage1_finished_count = 0;
1099         if (recalculate) {
1100                 last_nonempty_rowspan = SHRT_MIN;
1101                 first_nonempty_rowspan = SHRT_MAX;
1102         }
1103 // Always a multiple of 2 packages exist
1104         for(int i = 0; i < get_total_packages(); i++)
1105         {
1106                 MaskPackage *pkg = (MaskPackage*)packages[i];
1107                 pkg->row1 = division * i;
1108                 pkg->row2 = MIN (division * i + division, output->get_h());
1109                 
1110                 if(i == get_total_packages() - 1)  // last package
1111                 {
1112                         pkg->row2 = pkg->row2 = output->get_h();
1113                 }
1115         }
1116 //printf("MaskEngine::init_packages 2\n");
1119 LoadClient* MaskEngine::new_client()
1121         return new MaskUnit(this);
1124 LoadPackage* MaskEngine::new_package()
1126         return new MaskPackage;