libavfilter/silenceremove_template.c

   1 /*
   2  * This file is part of FFmpeg.
   3  *
   4  * FFmpeg is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU Lesser General Public
   6  * License as published by the Free Software Foundation; either
   7  * version 2.1 of the License, or (at your option) any later version.
   8  *
   9  * FFmpeg is distributed in the hope that it will be useful,
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * Lesser General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Lesser General Public
  15  * License along with FFmpeg; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17  */
  18
  19 #undef ftype
  20 #undef FABS
  21 #undef FMAX
  22 #undef SAMPLE_FORMAT
  23 #undef SQRT
  24 #undef ZERO
  25 #undef ONE
  26 #undef TMIN
  27 #if DEPTH == 32
  28 #define SAMPLE_FORMAT flt
  29 #define SQRT sqrtf
  30 #define FMAX fmaxf
  31 #define FABS fabsf
  32 #define ftype float
  33 #define ZERO 0.f
  34 #define ONE 1.f
  35 #define TMIN -FLT_MAX
  36 #else
  37 #define SAMPLE_FORMAT dbl
  38 #define SQRT sqrt
  39 #define FMAX fmax
  40 #define FABS fabs
  41 #define ftype double
  42 #define ZERO 0.0
  43 #define ONE 1.0
  44 #define TMIN -DBL_MAX
  45 #endif
  46
  47 #define fn3(a,b)   a##_##b
  48 #define fn2(a,b)   fn3(a,b)
  49 #define fn(a)      fn2(a, SAMPLE_FORMAT)
  50
  51 static void fn(flush)(ftype *dst, const ftype *src, int src_pos,
  52                       int nb_channels, int count, int src_nb_samples,
  53                       int *out_nb_samples)
  54 {
  55     int oidx, out_count = count;
  56     int sidx = src_pos;
  57
  58     if (count <= 0)
  59         return;
  60
  61     oidx = *out_nb_samples + out_count - 1;
  62     *out_nb_samples += out_count;
  63     while (out_count-- > 0) {
  64         const int spos = sidx * nb_channels;
  65         const int opos = oidx * nb_channels;
  66
  67         for (int ch = 0; ch < nb_channels; ch++)
  68             dst[opos + ch] = src[spos + ch];
  69
  70         oidx--;
  71         sidx--;
  72         if (sidx < 0)
  73             sidx = src_nb_samples - 1;
  74     }
  75 }
  76
  77 static void fn(queue_sample)(AVFilterContext *ctx,
  78                              const ftype *src,
  79                              ftype *queue,
  80                              int *queue_pos,
  81                              int *queue_size,
  82                              int *window_pos,
  83                              int *window_size,
  84                              const int nb_channels,
  85                              const int nb_samples,
  86                              const int window_nb_samples)
  87 {
  88     const int pos = *queue_pos * nb_channels;
  89
  90     for (int ch = 0; ch < nb_channels; ch++)
  91         queue[pos + ch] = src[ch];
  92
  93     (*queue_pos)++;
  94     if (*queue_pos >= nb_samples)
  95         *queue_pos = 0;
  96
  97     if (*queue_size < nb_samples)
  98         (*queue_size)++;
  99
 100     if (*window_size < window_nb_samples)
 101         (*window_size)++;
 102
 103     (*window_pos)++;
 104     if (*window_pos >= window_nb_samples)
 105         *window_pos = 0;
 106 }
 107
 108 static ftype fn(compute_avg)(ftype *cache, ftype x, ftype px,
 109                              int window_size, int *unused, int *unused2)
 110 {
 111     ftype r;
 112
 113     cache[0] += FABS(x);
 114     cache[0] -= FABS(px);
 115     cache[0] = r = FMAX(cache[0], ZERO);
 116
 117     return r / window_size;
 118 }
 119
 120 #define PEAKS(empty_value,op,sample, psample)\
 121     if (!empty && psample == ss[front]) {    \
 122         ss[front] = empty_value;             \
 123         if (back != front) {                 \
 124             front--;                         \
 125             if (front < 0)                   \
 126                 front = n - 1;               \
 127         }                                    \
 128         empty = front == back;               \
 129     }                                        \
 130                                              \
 131     if (!empty && sample op ss[front]) {     \
 132         while (1) {                          \
 133             ss[front] = empty_value;         \
 134             if (back == front) {             \
 135                 empty = 1;                   \
 136                 break;                       \
 137             }                                \
 138             front--;                         \
 139             if (front < 0)                   \
 140                 front = n - 1;               \
 141         }                                    \
 142     }                                        \
 143                                              \
 144     while (!empty && sample op ss[back]) {   \
 145         ss[back] = empty_value;              \
 146         if (back == front) {                 \
 147             empty = 1;                       \
 148             break;                           \
 149         }                                    \
 150         back++;                              \
 151         if (back >= n)                       \
 152             back = 0;                        \
 153     }                                        \
 154                                              \
 155     if (!empty) {                            \
 156         back--;                              \
 157         if (back < 0)                        \
 158             back = n - 1;                    \
 159     }
 160
 161 static ftype fn(compute_median)(ftype *ss, ftype x, ftype px,
 162                                 int n, int *ffront, int *bback)
 163 {
 164     ftype r, ax = FABS(x);
 165     int front = *ffront;
 166     int back = *bback;
 167     int empty = front == back && ss[front] == -ONE;
 168     int idx;
 169
 170     PEAKS(-ONE, >, ax, FABS(px))
 171
 172     ss[back] = ax;
 173     idx = (back <= front) ? back + (front - back + 1) / 2 : back + (n + front - back + 1) / 2;
 174     if (idx >= n)
 175         idx -= n;
 176     av_assert2(idx >= 0 && idx < n);
 177     r = ss[idx];
 178
 179     *ffront = front;
 180     *bback = back;
 181
 182     return r;
 183 }
 184
 185 static ftype fn(compute_peak)(ftype *ss, ftype x, ftype px,
 186                               int n, int *ffront, int *bback)
 187 {
 188     ftype r, ax = FABS(x);
 189     int front = *ffront;
 190     int back = *bback;
 191     int empty = front == back && ss[front] == ZERO;
 192
 193     PEAKS(ZERO, >=, ax, FABS(px))
 194
 195     ss[back] = ax;
 196     r = ss[front];
 197
 198     *ffront = front;
 199     *bback = back;
 200
 201     return r;
 202 }
 203
 204 static ftype fn(compute_ptp)(ftype *ss, ftype x, ftype px,
 205                              int n, int *ffront, int *bback)
 206 {
 207     int front = *ffront;
 208     int back = *bback;
 209     int empty = front == back && ss[front] == TMIN;
 210     ftype r, max, min;
 211
 212     PEAKS(TMIN, >=, x, px)
 213
 214     ss[back] = x;
 215     max = ss[front];
 216     min = x;
 217     r = FABS(min) + FABS(max - min);
 218
 219     *ffront = front;
 220     *bback = back;
 221
 222     return r;
 223 }
 224
 225 static ftype fn(compute_rms)(ftype *cache, ftype x, ftype px,
 226                              int window_size, int *unused, int *unused2)
 227 {
 228     ftype r;
 229
 230     cache[0] += x * x;
 231     cache[0] -= px * px;
 232     cache[0] = r = FMAX(cache[0], ZERO);
 233
 234     return SQRT(r / window_size);
 235 }
 236
 237 static ftype fn(compute_dev)(ftype *ss, ftype x, ftype px,
 238                              int n, int *unused, int *unused2)
 239 {
 240     ftype r;
 241
 242     ss[0] += x;
 243     ss[0] -= px;
 244
 245     ss[1] += x * x;
 246     ss[1] -= px * px;
 247     ss[1] = FMAX(ss[1], ZERO);
 248
 249     r = FMAX(ss[1] - ss[0] * ss[0] / n, ZERO) / n;
 250
 251     return SQRT(r);
 252 }
 253
 254 static void fn(filter_start)(AVFilterContext *ctx,
 255                              const ftype *src, ftype *dst,
 256                              int *nb_out_samples,
 257                              const int nb_channels)
 258 {
 259     SilenceRemoveContext *s = ctx->priv;
 260     const int start_periods = s->start_periods;
 261     int out_nb_samples = *nb_out_samples;
 262     const int start_window_nb_samples = s->start_window->nb_samples;
 263     const int start_nb_samples = s->start_queuef->nb_samples;
 264     const int start_wpos = s->start_window_pos * nb_channels;
 265     const int start_pos = s->start_queue_pos * nb_channels;
 266     ftype *startw = (ftype *)s->start_window->data[0];
 267     ftype *start = (ftype *)s->start_queuef->data[0];
 268     const ftype start_threshold = s->start_threshold;
 269     const int start_mode = s->start_mode;
 270     int start_thres = (start_mode == T_ANY) ? 0 : 1;
 271     const int start_duration = s->start_duration;
 272     ftype *start_cache = (ftype *)s->start_cache;
 273     const int start_silence = s->start_silence;
 274     int window_size = start_window_nb_samples;
 275     const int cache_size = s->cache_size;
 276     int *front = s->start_front;
 277     int *back = s->start_back;
 278
 279     fn(queue_sample)(ctx, src, start,
 280                      &s->start_queue_pos,
 281                      &s->start_queue_size,
 282                      &s->start_window_pos,
 283                      &s->start_window_size,
 284                      nb_channels,
 285                      start_nb_samples,
 286                      start_window_nb_samples);
 287
 288     if (s->start_found_periods < 0)
 289         goto skip;
 290
 291     if (s->detection != D_PEAK && s->detection != D_MEDIAN &&
 292         s->detection != D_PTP)
 293         window_size = s->start_window_size;
 294
 295     for (int ch = 0; ch < nb_channels; ch++) {
 296         ftype start_sample = start[start_pos + ch];
 297         ftype start_ow = startw[start_wpos + ch];
 298         ftype tstart;
 299
 300         tstart = fn(s->compute)(start_cache + ch * cache_size,
 301                                 start_sample,
 302                                 start_ow,
 303                                 window_size,
 304                                 front + ch,
 305                                 back + ch);
 306
 307         startw[start_wpos + ch] = start_sample;
 308
 309         if (start_mode == T_ANY) {
 310             start_thres |= tstart > start_threshold;
 311         } else {
 312             start_thres &= tstart > start_threshold;
 313         }
 314     }
 315
 316     if (s->start_found_periods >= 0) {
 317         if (start_silence > 0) {
 318             s->start_silence_count++;
 319             if (s->start_silence_count > start_silence)
 320                 s->start_silence_count = start_silence;
 321         }
 322
 323         s->start_sample_count += start_thres;
 324     }
 325
 326     if (s->start_sample_count > start_duration) {
 327         s->start_found_periods++;
 328         if (s->start_found_periods >= start_periods) {
 329             if (!ctx->is_disabled)
 330                 fn(flush)(dst, start, s->start_queue_pos, nb_channels,
 331                           s->start_silence_count, start_nb_samples,
 332                           &out_nb_samples);
 333             s->start_silence_count = 0;
 334             s->start_found_periods = -1;
 335         }
 336
 337         s->start_sample_count = 0;
 338     }
 339
 340 skip:
 341     if (s->start_found_periods < 0 || ctx->is_disabled) {
 342         const int dst_pos = out_nb_samples * nb_channels;
 343         for (int ch = 0; ch < nb_channels; ch++)
 344             dst[dst_pos + ch] = start[start_pos + ch];
 345         out_nb_samples++;
 346     }
 347
 348     *nb_out_samples = out_nb_samples;
 349 }
 350
 351 static void fn(filter_stop)(AVFilterContext *ctx,
 352                             const ftype *src, ftype *dst,
 353                             int *nb_out_samples,
 354                             const int nb_channels)
 355 {
 356     SilenceRemoveContext *s = ctx->priv;
 357     const int stop_periods = s->stop_periods;
 358     int out_nb_samples = *nb_out_samples;
 359     const int stop_window_nb_samples = s->stop_window->nb_samples;
 360     const int stop_nb_samples = s->stop_queuef->nb_samples;
 361     const int stop_wpos = s->stop_window_pos * nb_channels;
 362     const int stop_pos = s->stop_queue_pos * nb_channels;
 363     ftype *stopw = (ftype *)s->stop_window->data[0];
 364     const ftype stop_threshold = s->stop_threshold;
 365     ftype *stop = (ftype *)s->stop_queuef->data[0];
 366     const int stop_mode = s->stop_mode;
 367     int stop_thres = (stop_mode == T_ANY) ? 0 : 1;
 368     const int stop_duration = s->stop_duration;
 369     ftype *stop_cache = (ftype *)s->stop_cache;
 370     const int stop_silence = s->stop_silence;
 371     int window_size = stop_window_nb_samples;
 372     const int cache_size = s->cache_size;
 373     const int restart = s->restart;
 374     int *front = s->stop_front;
 375     int *back = s->stop_back;
 376
 377     fn(queue_sample)(ctx, src, stop,
 378                      &s->stop_queue_pos,
 379                      &s->stop_queue_size,
 380                      &s->stop_window_pos,
 381                      &s->stop_window_size,
 382                      nb_channels,
 383                      stop_nb_samples,
 384                      stop_window_nb_samples);
 385
 386     if (s->detection != D_PEAK && s->detection != D_MEDIAN &&
 387         s->detection != D_PTP)
 388         window_size = s->stop_window_size;
 389
 390     for (int ch = 0; ch < nb_channels; ch++) {
 391         ftype stop_sample = stop[stop_pos + ch];
 392         ftype stop_ow = stopw[stop_wpos + ch];
 393         ftype tstop;
 394
 395         tstop = fn(s->compute)(stop_cache + ch * cache_size,
 396                                stop_sample,
 397                                stop_ow,
 398                                window_size,
 399                                front + ch,
 400                                back + ch);
 401
 402         stopw[stop_wpos + ch] = stop_sample;
 403
 404         if (stop_mode == T_ANY) {
 405             stop_thres |= tstop <= stop_threshold;
 406         } else {
 407             stop_thres &= tstop <= stop_threshold;
 408         }
 409     }
 410
 411     s->found_nonsilence = FFMAX(s->found_nonsilence, !stop_thres);
 412     if (restart && !stop_thres)
 413         s->stop_found_periods = 0;
 414
 415     if (s->stop_found_periods >= 0 || ctx->is_disabled) {
 416         if (s->found_nonsilence) {
 417             s->stop_sample_count += stop_thres;
 418             s->stop_sample_count *= stop_thres;
 419         }
 420     } else if (s->stop_silence_count > 0) {
 421         const int dst_pos = out_nb_samples * nb_channels;
 422         for (int ch = 0; ch < nb_channels; ch++)
 423             dst[dst_pos + ch] = stop[stop_pos + ch];
 424         s->stop_silence_count--;
 425         out_nb_samples++;
 426     }
 427
 428     if (s->stop_sample_count > stop_duration) {
 429         s->stop_found_periods++;
 430         if (s->stop_found_periods >= stop_periods) {
 431             s->stop_found_periods = -1;
 432             s->stop_silence_count = stop_silence;
 433         }
 434
 435         s->stop_sample_count = 0;
 436     }
 437
 438     if (s->stop_found_periods >= 0 || ctx->is_disabled) {
 439         const int dst_pos = out_nb_samples * nb_channels;
 440         for (int ch = 0; ch < nb_channels; ch++)
 441             dst[dst_pos + ch] = stop[stop_pos + ch];
 442         out_nb_samples++;
 443     }
 444
 445     *nb_out_samples = out_nb_samples;
 446 }