r726: Implementing ability to add textural info to the labels
[cinelerra_cv/mob.git] / cinelerra / maskengine.C
blob451ee41b3eb556220c0ada83d079c438b01625da
1 #include "clip.h"
2 #include "condition.h"
3 #include "maskauto.h"
4 #include "maskautos.h"
5 #include "maskengine.h"
6 #include "mutex.h"
7 #include "vframe.h"
9 #include <math.h>
10 #include <stdint.h>
11 #include <string.h>
12 #include <limits.h>
14 #include "feather.h"
17 int64_t get_difference(struct timeval *start_time)
19         struct timeval new_time;
21         gettimeofday(&new_time, 0);
23         new_time.tv_usec -= start_time->tv_usec;
24         new_time.tv_sec -= start_time->tv_sec;
25         if(new_time.tv_usec < 0)
26         {
27                 new_time.tv_usec += 1000000;
28                 new_time.tv_sec--;
29         }
31         return (int64_t)new_time.tv_sec * 1000000 + 
32                 (int64_t)new_time.tv_usec;
38 MaskPackage::MaskPackage()
42 MaskPackage::~MaskPackage()
50 MaskUnit::MaskUnit(MaskEngine *engine)
51  : LoadClient(engine)
53         this->engine = engine;
54         row_spans_h = 0;
55         row_spans = 0;
59 MaskUnit::~MaskUnit()
61         if (row_spans)
62         {
63                 for (int i = 0; i < row_spans_h; i++) 
64                         free(row_spans[i]);
65                 delete [] row_spans;
66         }
69 #ifndef SQR
70 #define SQR(x) ((x) * (x))
71 #endif
75 inline void MaskUnit::draw_line_clamped(
76         int draw_x1, 
77         int draw_y1, 
78         int draw_x2, 
79         int draw_y2,
80         int w,
81         int h,
82         int hoffset)
84 //printf("MaskUnit::draw_line_clamped 1 %d %d %d %d\n", x1, y1, x2, y2);
85         if (draw_y1 == draw_y2) return; 
87         if(draw_y2 < draw_y1)
88         { /* change the order */
89                 int tmp;
90                 tmp = draw_x1;
91                 draw_x1 = draw_x2;
92                 draw_x2 = tmp;
93                 tmp = draw_y1;
94                 draw_y1 = draw_y2;
95                 draw_y2 = tmp;
96         }
98         float slope = ((float)draw_x2 - draw_x1) / ((float)draw_y2 - draw_y1); 
99         w--;
100         for(int y_i = draw_y1; y_i < draw_y2; y_i++) 
101         { 
102                 if (y_i >= h) 
103                         return; // since y gets larger, there is no point in continuing
104                 else if(y_i >= 0) 
105                 { 
106                         int x = (int)(slope * (y_i - draw_y1) + draw_x1); 
107                         int x_i = CLIP(x, 0, w); 
109                         /* now insert into span in order */
110                         short *span = row_spans[y_i + hoffset]; 
111                         if (span[0] >= span[1]) { /* do the reallocation */
112                                 span[1] *= 2;
113                                 span = row_spans[y_i + hoffset] = (short *) realloc (span, span[1] * sizeof(short)); /* be careful! row_spans has to be updated! */
114                         };
116                         short index = 2;
117                         while (index < span[0]  && span[index] < x_i)
118                                 index++;
119                         for (int j = span[0]; j > index; j--) {       // move forward
120                                 span[j] = span[j-1];
121                         }
122                         span[index] = x_i;
123                         span[0] ++;
124                 } 
125         } 
128 void MaskUnit::blur_strip(float *val_p, 
129         float *val_m, 
130         float *dst, 
131         float *src, 
132         int size,
133         int max)
135         float *sp_p = src;
136         float *sp_m = src + size - 1;
137         float *vp = val_p;
138         float *vm = val_m + size - 1;
139         float initial_p = sp_p[0];
140         float initial_m = sp_m[0];
142 //printf("MaskUnit::blur_strip %d\n", size);
143         for(int k = 0; k < size; k++)
144         {
145                 int terms = (k < 4) ? k : 4;
146                 int l;
147                 for(l = 0; l <= terms; l++)
148                 {
149                         *vp += n_p[l] * sp_p[-l] - d_p[l] * vp[-l];
150                         *vm += n_m[l] * sp_m[l] - d_m[l] * vm[l];
151                 }
153                 for( ; l <= 4; l++)
154                 {
155                         *vp += (n_p[l] - bd_p[l]) * initial_p;
156                         *vm += (n_m[l] - bd_m[l]) * initial_m;
157                 }
158                 sp_p++;
159                 sp_m--;
160                 vp++;
161                 vm--;
162         }
164         for(int i = 0; i < size; i++)
165         {
166                 float sum = val_p[i] + val_m[i];
167                 CLAMP(sum, 0, max);
168                 dst[i] = sum;
169         }
174 int MaskUnit::do_feather_2(VFrame *output,
175         VFrame *input, 
176         float feather, 
177         int start_out, 
178         int end_out)
180         
181         int fint = (int)feather;
182         DO_FEATHER_N(unsigned char, uint32_t, 0xffff, fint);
187 void MaskUnit::do_feather(VFrame *output,
188         VFrame *input, 
189         float feather, 
190         int start_out, 
191         int end_out)
193 //printf("MaskUnit::do_feather %f\n", feather);
194 // Get constants
195         double constants[8];
196         double div;
197         double std_dev = sqrt(-(double)(feather * feather) / (2 * log(1.0 / 255.0)));
198         div = sqrt(2 * M_PI) * std_dev;
199         constants[0] = -1.783 / std_dev;
200         constants[1] = -1.723 / std_dev;
201         constants[2] = 0.6318 / std_dev;
202         constants[3] = 1.997  / std_dev;
203         constants[4] = 1.6803 / div;
204         constants[5] = 3.735 / div;
205         constants[6] = -0.6803 / div;
206         constants[7] = -0.2598 / div;
208         n_p[0] = constants[4] + constants[6];
209         n_p[1] = exp(constants[1]) *
210                                 (constants[7] * sin(constants[3]) -
211                                 (constants[6] + 2 * constants[4]) * cos(constants[3])) +
212                                 exp(constants[0]) *
213                                 (constants[5] * sin(constants[2]) -
214                                 (2 * constants[6] + constants[4]) * cos(constants[2]));
216         n_p[2] = 2 * exp(constants[0] + constants[1]) *
217                                 ((constants[4] + constants[6]) * cos(constants[3]) * 
218                                 cos(constants[2]) - constants[5] * 
219                                 cos(constants[3]) * sin(constants[2]) -
220                                 constants[7] * cos(constants[2]) * sin(constants[3])) +
221                                 constants[6] * exp(2 * constants[0]) +
222                                 constants[4] * exp(2 * constants[1]);
224         n_p[3] = exp(constants[1] + 2 * constants[0]) *
225                                 (constants[7] * sin(constants[3]) - 
226                                 constants[6] * cos(constants[3])) +
227                                 exp(constants[0] + 2 * constants[1]) *
228                                 (constants[5] * sin(constants[2]) - constants[4] * 
229                                 cos(constants[2]));
230         n_p[4] = 0.0;
232         d_p[0] = 0.0;
233         d_p[1] = -2 * exp(constants[1]) * cos(constants[3]) -
234                                 2 * exp(constants[0]) * cos(constants[2]);
236         d_p[2] = 4 * cos(constants[3]) * cos(constants[2]) * 
237                                 exp(constants[0] + constants[1]) +
238                                 exp(2 * constants[1]) + exp (2 * constants[0]);
240         d_p[3] = -2 * cos(constants[2]) * exp(constants[0] + 2 * constants[1]) -
241                                 2 * cos(constants[3]) * exp(constants[1] + 2 * constants[0]);
243         d_p[4] = exp(2 * constants[0] + 2 * constants[1]);
245         for(int i = 0; i < 5; i++) d_m[i] = d_p[i];
247         n_m[0] = 0.0;
248         for(int i = 1; i <= 4; i++)
249                 n_m[i] = n_p[i] - d_p[i] * n_p[0];
251         double sum_n_p, sum_n_m, sum_d;
252         double a, b;
254         sum_n_p = 0.0;
255         sum_n_m = 0.0;
256         sum_d = 0.0;
257         for(int i = 0; i < 5; i++)
258         {
259                 sum_n_p += n_p[i];
260                 sum_n_m += n_m[i];
261                 sum_d += d_p[i];
262         }
264         a = sum_n_p / (1 + sum_d);
265         b = sum_n_m / (1 + sum_d);
267         for(int i = 0; i < 5; i++)
268         {
269                 bd_p[i] = d_p[i] * a;
270                 bd_m[i] = d_m[i] * b;
271         }
294 #define DO_FEATHER(type, max) \
295 { \
296         int frame_w = input->get_w(); \
297         int frame_h = input->get_h(); \
298         int size = MAX(frame_w, frame_h); \
299         float *src = new float[size]; \
300         float *dst = new float[size]; \
301         float *val_p = new float[size]; \
302         float *val_m = new float[size]; \
303         int start_in = start_out - (int)feather; \
304         int end_in = end_out + (int)feather; \
305         if(start_in < 0) start_in = 0; \
306         if(end_in > frame_h) end_in = frame_h; \
307         int strip_size = end_in - start_in; \
308         type **in_rows = (type**)input->get_rows(); \
309         type **out_rows = (type**)output->get_rows(); \
310         int j; \
312 /* printf("DO_FEATHER 1\n"); */ \
313         for(j = 0; j < frame_w; j++) \
314         { \
315 /* printf("DO_FEATHER 1.1 %d\n", j); */ \
316                 bzero(val_p, sizeof(float) * (end_in - start_in)); \
317                 bzero(val_m, sizeof(float) * (end_in - start_in)); \
318                 for(int l = 0, k = start_in; k < end_in; l++, k++) \
319                 { \
320                         src[l] = (float)in_rows[k][j]; \
321                 } \
323                 blur_strip(val_p, val_m, dst, src, strip_size, max); \
325                 for(int l = start_out - start_in, k = start_out; k < end_out; l++, k++) \
326                 { \
327                         out_rows[k][j] = (type)dst[l]; \
328                 } \
329         } \
331         for(j = start_out; j < end_out; j++) \
332         { \
333 /* printf("DO_FEATHER 2 %d\n", j); */ \
334                 bzero(val_p, sizeof(float) * frame_w); \
335                 bzero(val_m, sizeof(float) * frame_w); \
336                 for(int k = 0; k < frame_w; k++) \
337                 { \
338                         src[k] = (float)out_rows[j][k]; \
339                 } \
341                 blur_strip(val_p, val_m, dst, src, frame_w, max); \
343                 for(int k = 0; k < frame_w; k++) \
344                 { \
345                         out_rows[j][k] = (type)dst[k]; \
346                 } \
347         } \
349 /* printf("DO_FEATHER 3\n"); */ \
351         delete [] src; \
352         delete [] dst; \
353         delete [] val_p; \
354         delete [] val_m; \
355 /* printf("DO_FEATHER 4\n"); */ \
365 //printf("do_feather %d\n", frame->get_color_model());
366         switch(input->get_color_model())
367         {
368                 case BC_A8:
369                         DO_FEATHER(unsigned char, 0xff);
370                         break;
371                 
372                 case BC_A16:
373                         DO_FEATHER(uint16_t, 0xffff);
374                         break;
375         }
382 void MaskUnit::process_package(LoadPackage *package)
384         MaskPackage *ptr = (MaskPackage*)package;
385         
386         int start_row = SHRT_MIN;         // part for which mask exists
387         int end_row;
388         if(engine->recalculate)
389         {
390                 VFrame *mask;
391 //printf("MaskUnit::process_package 1 %d\n", get_package_number());
392                 if(engine->feather > 0) 
393                         mask = engine->temp_mask;
394                 else
395                         mask = engine->mask;
397                 int mask_w = mask->get_w();
398                 int mask_h = mask->get_h();
399                 int mask_color_model = mask->get_color_model();
400                 int oversampled_package_w = mask_w * OVERSAMPLE;
401                 int oversampled_package_h = (ptr->row2 - ptr->row1) * OVERSAMPLE;
402                 int local_first_nonempty_rowspan = SHRT_MIN;
403                 int local_last_nonempty_rowspan = SHRT_MIN;
405                 if (!row_spans || row_spans_h != mask_h * OVERSAMPLE) {
406                         int i;  
407                         if (row_spans) {   /* size change */
408                                 for (i = 0; i < row_spans_h; i++) 
409                                         free(row_spans[i]);
410                                 delete [] row_spans;
411                         }
412                         row_spans_h = mask_h * OVERSAMPLE;
413                         row_spans = new short *[mask_h * OVERSAMPLE]; 
414                         for (i= 0; i<mask_h * OVERSAMPLE; i++) {
415                                 /* we use malloc so we can use realloc */
416                                 row_spans[i] = (short *)malloc(sizeof(short) * NUM_SPANS);
417                                 /* [0] is initialized later */
418                                 row_spans[i][1] = NUM_SPANS;
419                         }
420                 }
421                 
422 //printf("MaskUnit::process_package 1 %d\n", engine->point_sets.total);
423                 
425 // Draw bezier curves onto span buffer
426 //struct timeval start_time;
427 //gettimeofday(&start_time, 0);
429                 for(int k = 0; k < engine->point_sets.total; k++)
430                 {               
431                         int old_x, old_y;
432                         old_x = SHRT_MIN; // sentinel
433                         ArrayList<MaskPoint*> *points = engine->point_sets.values[k];
435                         if(points->total < 2) continue;
436 //printf("MaskUnit::process_package 2 %d %d\n", k, points->total);
437                         for (int i = ptr->row1 * OVERSAMPLE; i < ptr->row2 * OVERSAMPLE; i++) 
438                                 row_spans[i][0] = 2; /* initialize to zero */ 
439                         (ptr->row1*OVERSAMPLE, ptr->row2*OVERSAMPLE); // init just my rows
440                         for(int i = 0; i < points->total; i++)
441                         {
442                                 MaskPoint *point1 = points->values[i];
443                                 MaskPoint *point2 = (i >= points->total - 1) ? 
444                                         points->values[0] : 
445                                         points->values[i + 1];
447                                 float x0 = point1->x;
448                                 float y0 = point1->y;
449                                 float x1 = point1->x + point1->control_x2;
450                                 float y1 = point1->y + point1->control_y2;
451                                 float x2 = point2->x + point2->control_x1;
452                                 float y2 = point2->y + point2->control_y1;
453                                 float x3 = point2->x;
454                                 float y3 = point2->y;
456                                 // possible optimization here... since these coordinates are bounding box for curve
457                                 // we can continue with next curve if they are out of our range
459                                 // forward differencing bezier curves implementation taken from GPL code at
460                                 // http://cvs.sourceforge.net/viewcvs.py/guliverkli/guliverkli/src/subtitles/Rasterizer.cpp?rev=1.3
464                                 float cx3, cx2, cx1, cx0, cy3, cy2, cy1, cy0;
467                                 // [-1 +3 -3 +1]
468                                 // [+3 -6 +3  0]
469                                 // [-3 +3  0  0]
470                                 // [+1  0  0  0]
472                                 cx3 = (-  x0 + 3*x1 - 3*x2 + x3) * OVERSAMPLE;
473                                 cx2 = ( 3*x0 - 6*x1 + 3*x2) * OVERSAMPLE;
474                                 cx1 = (-3*x0 + 3*x1) * OVERSAMPLE;
475                                 cx0 = (   x0) * OVERSAMPLE;
477                                 cy3 = (-  y0 + 3*y1 - 3*y2 + y3) * OVERSAMPLE;
478                                 cy2 = ( 3*y0 - 6*y1 + 3*y2) * OVERSAMPLE;
479                                 cy1 = (-3*y0 + 3*y1) * OVERSAMPLE;
480                                 cy0 = (   y0 - ptr->row1) * OVERSAMPLE;
482                                 float maxaccel1 = fabs(2*cy2) + fabs(6*cy3);
483                                 float maxaccel2 = fabs(2*cx2) + fabs(6*cx3);
485                                 float maxaccel = maxaccel1 > maxaccel2 ? maxaccel1 : maxaccel2;
486                                 float h = 1.0;
488                                 if(maxaccel > 8.0 * OVERSAMPLE) h = sqrt((8.0 * OVERSAMPLE) / maxaccel);
490                                 for(float t = 0.0; t < 1.0; t += h)
491                                 {
492                                         int x = (int) (cx0 + t*(cx1 + t*(cx2 + t*cx3)));
493                                         int y = (int) (cy0 + t*(cy1 + t*(cy2 + t*cy3)));
495                                         if (old_x != SHRT_MIN) 
496                                                 draw_line_clamped(old_x, old_y, x, y, oversampled_package_w, oversampled_package_h, ptr->row1 * OVERSAMPLE);
497                                         old_x = x;
498                                         old_y = y;
499                                 }
501                                 int x = (int)(x3 * OVERSAMPLE);
502                                 int y = (int)((y3 - ptr->row1) * OVERSAMPLE);
503                                 draw_line_clamped(old_x, old_y, x, y, oversampled_package_w, oversampled_package_h, ptr->row1 * OVERSAMPLE);
504                                 old_x = (int)x;
505                                 old_y = (int)y;
506                 
507                         }
508 //printf("MaskUnit::process_package 1\n");
510                         // Now we have ordered spans ready!
511                         //printf("Segment : %i , row1: %i\n", oversampled_package_h, ptr->row1);
512                         uint16_t value;
513                         if (mask_color_model == BC_A8)
514                                 value = (int)((float)engine->value / 100 * 0xff);
515                         else
516                                 value = (int)((float)engine->value / 100 * 0xffff);
518                         /* Scaneline sampling, inspired by Graphics gems I, page 81 */
519                         for (int i = ptr->row1; i < ptr->row2; i++) 
520                         {
521                                 short min_x = SHRT_MAX;
522                                 short max_x = SHRT_MIN;
523                                 int j;                          /* universal counter for 0..OVERSAMPLE-1 */
524                                 short *span;                    /* current span - set inside loops with j */
525                                 short span_p[OVERSAMPLE];       /* pointers to current positions in spans */
526                                 #define P (span_p[j])           /* current span pointer */
527                                 #define MAXP (span[0])          /* current span length */
528                                 int num_empty_spans = 0;
529                                 /* get the initial span pointers ready */
530                                 for (j = 0; j < OVERSAMPLE; j++)
531                                 {       
532                                         span = row_spans[j + i * OVERSAMPLE];
533                                         P = 2;              /* starting pointers to spans */
534                                                 /* hypotetical hypotetical fix goes here: take care that there is maximum one empty span for every subpixel */ 
535                                         if (MAXP != 2) {                                        /* if span is not empty */
536                                                 if (span[2] < min_x) min_x = span[2];           /* take start of the first span */
537                                                 if (span[MAXP-1] > max_x) max_x = span[MAXP-1]; /* and end of last */
538                                         } else              
539                                         {       /* span is empty */
540                                                 num_empty_spans ++;     
541                                         }       
542                                 }
543                                 if (num_empty_spans == OVERSAMPLE)
544                                         continue; /* no work for us here */
545                                 else 
546                                 {       /* if we have engaged first nonempty rowspan... remember it to speed up mask applying */
547                                         if (local_first_nonempty_rowspan < 0 || i < local_first_nonempty_rowspan) 
548                                                 local_first_nonempty_rowspan = i;  
549                                         if (i > local_last_nonempty_rowspan) local_last_nonempty_rowspan = i;
550                                 }
551                                 /* we have some pixels to fill, do coverage calculation for span */
553                                 void *output_row = (unsigned char*)mask->get_rows()[i];
554                                 min_x = min_x / OVERSAMPLE;
555                                 max_x = (max_x + OVERSAMPLE - 1) / OVERSAMPLE;
556                                 
557                                 /* printf("row %i, pixel range: %i %i, spans0: %i\n", i, min_x, max_x, row_spans[i*OVERSAMPLE][0]-2); */
559                                 /* this is not a full loop, since we jump trough h if possible */
560                                 for (int h = min_x; h <= max_x; h++) 
561                                 {
562                                         short pixelleft = h * OVERSAMPLE;  /* leftmost subpixel of pixel*/
563                                         short pixelright = pixelleft + OVERSAMPLE - 1; /* rightmost subpixel of pixel */
564                                         uint32_t coverage = 0;
565                                         int num_left = 0;               /* number of spans that have start left of the next pixel */
566                                         short right_end = SHRT_MAX;     /* leftmost end of any span - right end of a full scanline */
567                                         short right_start = SHRT_MAX;   /* leftmost start of any span - left end of empty scanline */
569                                         for (j=0; j< OVERSAMPLE; j++) 
570                                         {       
571                                                 char chg = 1;
572                                                 span = row_spans[j + i * OVERSAMPLE];
573                                                 while (P < MAXP && chg)
574                                                 {
575                                                 //      printf("Sp: %i %i\n", span[P], span[P+1]);
576                                                         if (span[P] == span[P+1])           /* ignore empty spans */
577                                                         {
578                                                                 P +=2;
579                                                                 continue;
580                                                         }
581                                                         if (span[P] <= pixelright)          /* if span start is before the end of pixel */
582                                                                 coverage += MIN(span[P+1], pixelright)  /* 'clip' the span to pixel */
583                                                                           - MAX(span[P], pixelleft) + 1;
584                                                         if (span[P+1] <= pixelright) 
585                                                                 P += 2;
586                                                         else 
587                                                                 chg = 0;
588                                                 } 
589                                                 if (P == MAXP) 
590                                                         num_left = -OVERSAMPLE; /* just take care that num_left cannot equal OVERSAMPLE or zero again */
591                                                 else    
592                                                 { 
593                                                         if (span[P] <= pixelright)  /* if span starts before subpixel in the pixel on the right */
594                                                         {    /* useful for determining filled space till next non-fully-filled pixel */
595                                                                 num_left ++;                                            
596                                                                 if (span[P+1] < right_end) right_end = span[P+1]; 
597                                                         } else 
598                                                         {    /* useful for determining empty space till next non-empty pixel */
599                                                                 if (span[P] < right_start) right_start = span[P]; 
600                                                         }
601                                                 }
602                                         }
603                                         // calculate coverage
604                                         coverage *= value;
605                                         if(OVERSAMPLE == 8) coverage >>= 6; \
606                                         else \
607                                         if(OVERSAMPLE == 4) coverage >>= 2; \
608                                         else \
609                                         if(OVERSAMPLE == 2) coverage >>= 1; \
610                                         else coverage /= OVERSAMPLE * OVERSAMPLE; \
612                                         
613                                         if (mask_color_model == BC_A8) 
614                                         {
615                                                 if (((unsigned char *) output_row)[h] < coverage) /* when we have multiple masks... we don't want aliasing inside areas */
616                                                         ((unsigned char*)output_row)[h] = coverage;
617                                         } else
618                                         {
619                                                 if (((uint16_t *) output_row)[h] < coverage) /* when we have multiple masks... we don't want aliasing inside areas */
620                                                         ((uint16_t *) output_row)[h] = coverage;
621                                         }
622                                         /* possible optimization: do joining of multiple masks by span logics, not by bitmap logics*/
623                                         
624                                         if (num_left == OVERSAMPLE) 
625                                         {
626                                                 /* all current spans start more left than next pixel */
627                                                 /* this means we can probably (if lucky) draw a longer horizontal line */
628                                                 right_end = (right_end / OVERSAMPLE) - 1; /* last fully covered pixel */
629                                                 if (right_end > h)
630                                                 {
631                                                         if (mask_color_model == BC_A8) 
632                                                                 memset((char *)output_row + h + 1, value, right_end - h);
633                                                         else {
634                                                                 /* we are fucked, since there is no 16bit memset */
635                                                                 for (int z = h +1; z <= right_end; z++)
636                                                                         ((uint16_t *) output_row)[z] =  value;
637                 
638                                                         }
639                                                         h = right_end;  
640                                                 }
641                                         } else 
642                                         if (num_left == 0) 
643                                         {
644                                                 /* all current spans start right of next pixel */ 
645                                                 /* this means we can probably (if lucky) skip some pixels */
646                                                 right_start = (right_start / OVERSAMPLE) - 1; /* last fully empty pixel */
647                                                 if (right_start > h)
648                                                 {
649                                                         h = right_start;
650                                                 }
651                                         }
652                                 }
653                                 
654                         }                                       
655                         
656                 }
657                 engine->protect_data.lock();
658                 if (local_first_nonempty_rowspan < engine->first_nonempty_rowspan)
659                         engine->first_nonempty_rowspan = local_first_nonempty_rowspan;
660                 if (local_last_nonempty_rowspan > engine->last_nonempty_rowspan)
661                         engine->last_nonempty_rowspan = local_last_nonempty_rowspan;
662                 engine->protect_data.unlock();
663         
665 //              int64_t dif= get_difference(&start_time);
666 //              printf("diff: %lli\n", dif);
667         }       /* END OF RECALCULATION! */
670         /* possible optimization: this could be useful for do_feather also */
672         // Feather polygon
673         if(engine->recalculate && engine->feather > 0) 
674         {       
675                 /* first take care that all packages are already drawn onto mask */
676                 pthread_mutex_lock(&engine->stage1_finished_mutex);
677                 engine->stage1_finished_count ++;
678                 if (engine->stage1_finished_count == engine->get_total_packages())
679                 {
680                         // let others pass
681                         pthread_cond_broadcast(&engine->stage1_finished_cond);
682                 }
683                 else
684                 {
685                         // wait until all are finished
686                         while (engine->stage1_finished_count < engine->get_total_packages())
687                                 pthread_cond_wait(&engine->stage1_finished_cond, &engine->stage1_finished_mutex);
688                 }
689                 pthread_mutex_unlock(&engine->stage1_finished_mutex);
690                 
691                 /* now do the feather */
692 //printf("MaskUnit::process_package 3 %f\n", engine->feather);
694         struct timeval start_time;
695         gettimeofday(&start_time, 0);
697         /* 
698         {
699         // EXPERIMENTAL CODE to find out how values between old and new do_feather map
700         // create a testcase and find out the closest match between do_feather_2 at 3 and do_feather
701         //                      2       3       4       5       6       7       8       10      13      15
702         // do_feather_2         3       5       7       9       11      13      15      19      25      29
703         // do_feather_1         2.683   3.401   4.139   4.768   5.315   5.819   6.271   7.093   8.170   8.844           
704         // diff                         0.718   0.738   0.629   0.547   0.504   0.452
705         // {(2,2.683),(3,3.401),(4,4.139),(5,4.768),(6,5.315),(7,5.819),(8,6.271),(10,7.093),(13,8.170),(15,8.844)}
706         // use http://mss.math.vanderbilt.edu/cgi-bin/MSSAgent/~pscrooke/MSS/fitpoly.def
707         // for calculating the coefficients
709                 VFrame *df2 = new VFrame (*engine->mask);
710                 VFrame *one_sample = new VFrame(*engine->mask);
711                 do_feather_2(df2, 
712                         engine->temp_mask, 
713                         25, 
714                         ptr->row1, 
715                         ptr->row2);
716                 float ftmp;
717                 for (ftmp = 8.15; ftmp <8.18; ftmp += 0.001) 
718                 {
719                         do_feather(one_sample, 
720                         engine->temp_mask, 
721                         ftmp, 
722                         ptr->row1, 
723                         ptr->row2);
724                         double squarediff = 0;
725                         for (int i=0; i< engine->mask->get_h(); i++)
726                                 for (int j = 0; j< engine->mask->get_w(); j++)
727                                 {
728                                         double v1= ((unsigned char *)one_sample->get_rows()[i])[j];
729                                         double v2= ((unsigned char *)df2->get_rows()[i])[j];
730                                         squarediff += (v1-v2)*(v1-v2);
731                                 }
732                         squarediff = sqrt(squarediff);
733                         printf("for value 3: ftmp: %2.3f, squarediff: %f\n", ftmp, squarediff);
734                 }
735         }
736         */      
737         
738                 int done = 0;
739                 done = do_feather_2(engine->mask,        // try if we have super fast implementation ready
740                                 engine->temp_mask,
741                                 engine->feather * 2 - 1, 
742                                 ptr->row1, 
743                                 ptr->row2);
744                 if (done) {
745                         engine->realfeather = engine->feather;
746                 }
747                 if (!done)
748                 {
749                 //      printf("not done\n");
750                         float feather = engine->feather;
751                         engine->realfeather = 0.878441 + 0.988534*feather - 0.0490204 *feather*feather  + 0.0012359 *feather*feather*feather;
752                         do_feather(engine->mask, 
753                                 engine->temp_mask, 
754                                 engine->realfeather, 
755                                 ptr->row1, 
756                                 ptr->row2); 
757                 }
758                 int64_t dif= get_difference(&start_time);
759                 printf("diff: %lli\n", dif);
760         } else
761         if (engine->feather <= 0) {
762                 engine->realfeather = 0;
763         }
764         start_row = MAX (ptr->row1, engine->first_nonempty_rowspan - (int)ceil(engine->realfeather)); 
765         end_row = MIN (ptr->row2, engine->last_nonempty_rowspan + 1 + (int)ceil(engine->realfeather));
769 // Apply mask
772 /* use the info about first and last column that are coloured from rowspan!  */
773 /* possible optimisation: also remember total spans */
774 /* possible optimisation: lookup for  X * (max - *mask_row) / max, where max is known mask_row and X are variabiles */
775 #define APPLY_MASK_SUBTRACT_ALPHA(type, max, components, do_yuv) \
776 { \
777         int chroma_offset = (max + 1) / 2; \
778         for(int i = start_row; i < end_row; i++) \
779         { \
780         type *output_row = (type*)engine->output->get_rows()[i]; \
781         type *mask_row = (type*)engine->mask->get_rows()[i]; \
782         \
784         for(int j  = 0; j < mask_w; j++) \
785         { \
786                 if(components == 4) \
787                 { \
788                         output_row[3] = output_row[3] * (max - *mask_row) / max; \
789                 } \
790                 else \
791                 { \
792                         output_row[0] = output_row[0] * (max - *mask_row) / max; \
794                         output_row[1] = output_row[1] * (max - *mask_row) / max; \
795                         output_row[2] = output_row[2] * (max - *mask_row) / max; \
797                         if(do_yuv) \
798                         { \
799                                 output_row[1] += chroma_offset * *mask_row / max; \
800                                 output_row[2] += chroma_offset * *mask_row / max; \
801                         } \
802                 } \
803                 output_row += components; \
804                 mask_row += 1;           \
805         } \
806         } \
809 #define APPLY_MASK_MULTIPLY_ALPHA(type, max, components, do_yuv) \
810 { \
811         int chroma_offset = (max + 1) / 2; \
812                 for(int i = ptr->row1; i < ptr->row2; i++) \
813                 { \
814         type *output_row = (type*)engine->output->get_rows()[i]; \
815         type *mask_row = (type*)engine->mask->get_rows()[i]; \
817         if (components == 4) output_row += 3; \
818         for(int j  = mask_w; j != 0;  j--) \
819         { \
820                 if(components == 4) \
821                 { \
822                         *output_row = *output_row * *mask_row / max; \
823                 } \
824                 else \
825                 { \
826                         output_row[0] = output_row[3] * *mask_row / max; \
828                         output_row[1] = output_row[1] * *mask_row / max; \
829                         output_row[2] = output_row[2] * *mask_row / max; \
831                         if(do_yuv) \
832                         { \
833                                 output_row[1] += chroma_offset * (max - *mask_row) / max; \
834                                 output_row[2] += chroma_offset * (max - *mask_row) / max; \
835                         } \
836                 } \
837                 output_row += components; \
838                 mask_row += 1;           \
839         } \
840         } \
844 //struct timeval start_time;
845 //gettimeofday(&start_time, 0);
847 //printf("MaskUnit::process_package 1 %d\n", engine->mode);
848         int mask_w = engine->mask->get_w();
849         switch(engine->mode)
850         {
851                 case MASK_MULTIPLY_ALPHA:
852                         switch(engine->output->get_color_model())
853                         {
854                                 case BC_RGB888:
855                                         APPLY_MASK_MULTIPLY_ALPHA(unsigned char, 0xff, 3, 0);
856                                         break;
857                                 case BC_YUV888:
858                                         APPLY_MASK_MULTIPLY_ALPHA(unsigned char, 0xff, 3, 1);
859                                         break;
860                                 case BC_YUVA8888:
861                                 case BC_RGBA8888:
862                                         APPLY_MASK_MULTIPLY_ALPHA(unsigned char, 0xff, 4, 0);
863                                         break;
864                                 case BC_RGB161616:
865                                         APPLY_MASK_MULTIPLY_ALPHA(uint16_t, 0xffff, 3, 0);
866                                         break;
867                                 case BC_YUV161616:
868                                         APPLY_MASK_MULTIPLY_ALPHA(uint16_t, 0xffff, 3, 1);
869                                         break;
870                                 case BC_YUVA16161616:
871                                 case BC_RGBA16161616:
872                                         APPLY_MASK_MULTIPLY_ALPHA(uint16_t, 0xffff, 4, 0);
873                                         break;
874                         }
875                         break;
877                 case MASK_SUBTRACT_ALPHA:
878                         switch(engine->output->get_color_model())
879                         {
880                                 case BC_RGB888:
881                                         APPLY_MASK_SUBTRACT_ALPHA(unsigned char, 0xff, 3, 0);
882                                         break;
883                                 case BC_YUV888:
884                                         APPLY_MASK_SUBTRACT_ALPHA(unsigned char, 0xff, 3, 1);
885                                         break;
886                                 case BC_YUVA8888:
887                                 case BC_RGBA8888:
888                                         APPLY_MASK_SUBTRACT_ALPHA(unsigned char, 0xff, 4, 0);
889                                         break;
890                                 case BC_RGB161616:
891                                         APPLY_MASK_SUBTRACT_ALPHA(uint16_t, 0xffff, 3, 0);
892                                         break;
893                                 case BC_YUV161616:
894                                         APPLY_MASK_SUBTRACT_ALPHA(uint16_t, 0xffff, 3, 1);
895                                         break;
896                                 case BC_YUVA16161616:
897                                 case BC_RGBA16161616:
898                                         APPLY_MASK_SUBTRACT_ALPHA(uint16_t, 0xffff, 4, 0);
899                                         break;
900                         }
901                         break;
902         }
903 //      int64_t dif= get_difference(&start_time);
904 //      printf("diff: %lli\n", dif);
905 //printf("diff2: %lli\n", get_difference(&start_time));
906 //printf("MaskUnit::process_package 4 %d\n", get_package_number());
913 MaskEngine::MaskEngine(int cpus)
914  : LoadServer(cpus, cpus )      /* these two HAVE to be the same, since packages communicate  */
915 // : LoadServer(1, 2)
917         mask = 0;
918         pthread_mutex_init(&stage1_finished_mutex, NULL);
919         pthread_cond_init(&stage1_finished_cond, NULL);
922 MaskEngine::~MaskEngine()
924         pthread_cond_destroy(&stage1_finished_cond);
925         pthread_mutex_destroy(&stage1_finished_mutex);
926         if(mask) 
927         {
928                 delete mask;
929                 delete temp_mask;
930         }
931         point_sets.remove_all_objects();
934 int MaskEngine::points_equivalent(ArrayList<MaskPoint*> *new_points, 
935         ArrayList<MaskPoint*> *points)
937 //printf("MaskEngine::points_equivalent %d %d\n", new_points->total, points->total);
938         if(new_points->total != points->total) return 0;
939         
940         for(int i = 0; i < new_points->total; i++)
941         {
942                 if(!(*new_points->values[i] == *points->values[i])) return 0;
943         }
944         
945         return 1;
948 void MaskEngine::do_mask(VFrame *output, 
949         int64_t start_position,
950         double frame_rate,
951         double project_frame_rate,
952         MaskAutos *keyframe_set, 
953         int direction)
955         int64_t start_position_project = (int64_t)(start_position *
956                 project_frame_rate / 
957                 frame_rate);
958         Auto *current = 0;
959         MaskAuto *default_auto = (MaskAuto*)keyframe_set->default_auto;
960         MaskAuto *keyframe = (MaskAuto*)keyframe_set->get_prev_auto(start_position_project, 
961                 direction,
962                 current);
965         int total_points = 0;
966         for(int i = 0; i < keyframe->masks.total; i++)
967         {
968                 SubMask *mask = keyframe->get_submask(i);
969                 int submask_points = mask->points.total;
970                 if(submask_points > 1) total_points += submask_points;
971         }
973 //printf("MaskEngine::do_mask 1 %d %d\n", total_points, keyframe->value);
974 // Ignore certain masks
975         if(total_points < 2 || 
976                 (keyframe->value == 0 && default_auto->mode == MASK_SUBTRACT_ALPHA))
977         {
978                 return;
979         }
981 // Fake certain masks
982         if(keyframe->value == 0 && default_auto->mode == MASK_MULTIPLY_ALPHA)
983         {
984                 output->clear_frame();
985                 return;
986         }
988 //printf("MaskEngine::do_mask 1\n");
990         int new_color_model = 0;
991         recalculate = 0;
992         switch(output->get_color_model())
993         {
994                 case BC_RGB888:
995                 case BC_RGBA8888:
996                 case BC_YUV888:
997                 case BC_YUVA8888:
998                         new_color_model = BC_A8;
999                         break;
1001                 case BC_RGB161616:
1002                 case BC_RGBA16161616:
1003                 case BC_YUV161616:
1004                 case BC_YUVA16161616:
1005                         new_color_model = BC_A16;
1006                         break;
1007         }
1009 // Determine if recalculation is needed
1011         if(mask && 
1012                 (mask->get_w() != output->get_w() ||
1013                 mask->get_h() != output->get_h() ||
1014                 mask->get_color_model() != new_color_model))
1015         {
1016                 delete mask;
1017                 delete temp_mask;
1018                 mask = 0;
1019                 recalculate = 1;
1020         }
1022         if(!recalculate)
1023         {
1024                 if(point_sets.total != keyframe_set->total_submasks(start_position_project, 
1025                         direction))
1026                         recalculate = 1;
1027         }
1029         if(!recalculate)
1030         {
1031                 for(int i = 0; 
1032                         i < keyframe_set->total_submasks(start_position_project, 
1033                                 direction) && !recalculate; 
1034                         i++)
1035                 {
1036                         ArrayList<MaskPoint*> *new_points = new ArrayList<MaskPoint*>;
1037                         keyframe_set->get_points(new_points, 
1038                                 i, 
1039                                 start_position_project, 
1040                                 direction);
1041                         if(!points_equivalent(new_points, point_sets.values[i])) recalculate = 1;
1042                         new_points->remove_all_objects();
1043                 }
1044         }
1046         if(recalculate ||
1047                 !EQUIV(keyframe->feather, feather) ||
1048                 !EQUIV(keyframe->value, value))
1049         {
1050                 recalculate = 1;
1051                 if(!mask) 
1052                 {
1053                         mask = new VFrame(0, 
1054                                         output->get_w(), 
1055                                         output->get_h(),
1056                                         new_color_model);
1057                         temp_mask = new VFrame(0, 
1058                                         output->get_w(), 
1059                                         output->get_h(),
1060                                         new_color_model);
1061                 }
1062                 if(keyframe->feather > 0)
1063                         temp_mask->clear_frame();
1064                 else
1065                         mask->clear_frame();
1066                 point_sets.remove_all_objects();
1068                 for(int i = 0; 
1069                         i < keyframe_set->total_submasks(start_position_project, 
1070                                 direction); 
1071                         i++)
1072                 {
1073                         ArrayList<MaskPoint*> *new_points = new ArrayList<MaskPoint*>;
1074                         keyframe_set->get_points(new_points, 
1075                                 i, 
1076                                 start_position_project, 
1077                                 direction);
1078                         point_sets.append(new_points);
1079                 }
1080         }
1084         this->output = output;
1085         this->mode = default_auto->mode;
1086         this->feather = keyframe->feather;
1087         this->value = keyframe->value;
1090 // Run units
1091         process_packages();
1094 //printf("MaskEngine::do_mask 6\n");
1097 void MaskEngine::init_packages()
1099 //printf("MaskEngine::init_packages 1\n");
1100         int division = (int)((float)output->get_h() / (get_total_packages()) + 0.5);
1101         if(division < 1) division = 1;
1103         stage1_finished_count = 0;
1104         if (recalculate) {
1105                 last_nonempty_rowspan = SHRT_MIN;
1106                 first_nonempty_rowspan = SHRT_MAX;
1107         }
1108 // Always a multiple of 2 packages exist
1109         for(int i = 0; i < get_total_packages(); i++)
1110         {
1111                 MaskPackage *pkg = (MaskPackage*)packages[i];
1112                 pkg->row1 = division * i;
1113                 pkg->row2 = MIN (division * i + division, output->get_h());
1114                 
1115                 if(i == get_total_packages() - 1)  // last package
1116                 {
1117                         pkg->row2 = pkg->row2 = output->get_h();
1118                 }
1120         }
1121 //printf("MaskEngine::init_packages 2\n");
1124 LoadClient* MaskEngine::new_client()
1126         return new MaskUnit(this);
1129 LoadPackage* MaskEngine::new_package()
1131         return new MaskPackage;