libavfilter/af_apsyclip.c

   1 /*
   2  * Copyright (c) 2014 - 2021 Jason Jang
   3  * Copyright (c) 2021 Paul B Mahol
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public License
   9  * as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  * GNU Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public License
  18  * along with FFmpeg; if not, write to the Free Software Foundation, Inc.,
  19  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "libavutil/mem.h"
  23 #include "libavutil/opt.h"
  24 #include "libavutil/tx.h"
  25 #include "audio.h"
  26 #include "avfilter.h"
  27 #include "filters.h"
  28
  29 typedef struct AudioPsyClipContext {
  30     const AVClass *class;
  31
  32     double level_in;
  33     double level_out;
  34     double clip_level;
  35     double adaptive;
  36     int auto_level;
  37     int diff_only;
  38     int iterations;
  39     char *protections_str;
  40     double *protections;
  41
  42     int num_psy_bins;
  43     int fft_size;
  44     int overlap;
  45     int channels;
  46
  47     int spread_table_rows;
  48     int *spread_table_index;
  49     int (*spread_table_range)[2];
  50     float *window, *inv_window, *spread_table, *margin_curve;
  51
  52     AVFrame *in;
  53     AVFrame *in_buffer;
  54     AVFrame *in_frame;
  55     AVFrame *out_dist_frame;
  56     AVFrame *windowed_frame;
  57     AVFrame *clipping_delta;
  58     AVFrame *spectrum_buf;
  59     AVFrame *mask_curve;
  60
  61     AVTXContext **tx_ctx;
  62     av_tx_fn tx_fn;
  63     AVTXContext **itx_ctx;
  64     av_tx_fn itx_fn;
  65 } AudioPsyClipContext;
  66
  67 #define OFFSET(x) offsetof(AudioPsyClipContext, x)
  68 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_RUNTIME_PARAM
  69
  70 static const AVOption apsyclip_options[] = {
  71     { "level_in",   "set input level",         OFFSET(level_in),   AV_OPT_TYPE_DOUBLE, {.dbl=1},.015625,   64, FLAGS },
  72     { "level_out",  "set output level",        OFFSET(level_out),  AV_OPT_TYPE_DOUBLE, {.dbl=1},.015625,   64, FLAGS },
  73     { "clip",       "set clip level",          OFFSET(clip_level), AV_OPT_TYPE_DOUBLE, {.dbl=1},.015625,    1, FLAGS },
  74     { "diff",       "enable difference",       OFFSET(diff_only),  AV_OPT_TYPE_BOOL,   {.i64=0},      0,    1, FLAGS },
  75     { "adaptive",   "set adaptive distortion", OFFSET(adaptive),   AV_OPT_TYPE_DOUBLE, {.dbl=0.5},    0,    1, FLAGS },
  76     { "iterations", "set iterations",          OFFSET(iterations), AV_OPT_TYPE_INT,    {.i64=10},     1,   20, FLAGS },
  77     { "level",      "set auto level",          OFFSET(auto_level), AV_OPT_TYPE_BOOL,   {.i64=0},      0,    1, FLAGS },
  78     {NULL}
  79 };
  80
  81 AVFILTER_DEFINE_CLASS(apsyclip);
  82
  83 static void generate_hann_window(float *window, float *inv_window, int size)
  84 {
  85     for (int i = 0; i < size; i++) {
  86         float value = 0.5f * (1.f - cosf(2.f * M_PI * i / size));
  87
  88         window[i] = value;
  89         // 1/window to calculate unwindowed peak.
  90         inv_window[i] = value > 0.1f ? 1.f / value : 0.f;
  91     }
  92 }
  93
  94 static void set_margin_curve(AudioPsyClipContext *s,
  95                              const int (*points)[2], int num_points, int sample_rate)
  96 {
  97     int j = 0;
  98
  99     s->margin_curve[0] = points[0][1];
 100
 101     for (int i = 0; i < num_points - 1; i++) {
 102         while (j < s->fft_size / 2 + 1 && j * sample_rate / s->fft_size < points[i + 1][0]) {
 103             // linearly interpolate between points
 104             int binHz = j * sample_rate / s->fft_size;
 105             s->margin_curve[j] = points[i][1] + (binHz - points[i][0]) * (points[i + 1][1] - points[i][1]) / (points[i + 1][0] - points[i][0]);
 106             j++;
 107         }
 108     }
 109     // handle bins after the last point
 110     while (j < s->fft_size / 2 + 1) {
 111         s->margin_curve[j] = points[num_points - 1][1];
 112         j++;
 113     }
 114
 115     // convert margin curve to linear amplitude scale
 116     for (j = 0; j < s->fft_size / 2 + 1; j++)
 117         s->margin_curve[j] = powf(10.f, s->margin_curve[j] / 20.f);
 118 }
 119
 120 static void generate_spread_table(AudioPsyClipContext *s)
 121 {
 122     // Calculate tent-shape function in log-log scale.
 123
 124     // As an optimization, only consider bins close to "bin"
 125     // This reduces the number of multiplications needed in calculate_mask_curve
 126     // The masking contribution at faraway bins is negligeable
 127
 128     // Another optimization to save memory and speed up the calculation of the
 129     // spread table is to calculate and store only 2 spread functions per
 130     // octave, and reuse the same spread function for multiple bins.
 131     int table_index = 0;
 132     int bin = 0;
 133     int increment = 1;
 134
 135     while (bin < s->num_psy_bins) {
 136         float sum = 0;
 137         int base_idx = table_index * s->num_psy_bins;
 138         int start_bin = bin * 3 / 4;
 139         int end_bin = FFMIN(s->num_psy_bins, ((bin + 1) * 4 + 2) / 3);
 140         int next_bin;
 141
 142         for (int j = start_bin; j < end_bin; j++) {
 143             // add 0.5 so i=0 doesn't get log(0)
 144             float rel_idx_log = FFABS(logf((j + 0.5f) / (bin + 0.5f)));
 145             float value;
 146             if (j >= bin) {
 147                 // mask up
 148                 value = expf(-rel_idx_log * 40.f);
 149             } else {
 150                 // mask down
 151                 value = expf(-rel_idx_log * 80.f);
 152             }
 153             // the spreading function is centred in the row
 154             sum += value;
 155             s->spread_table[base_idx + s->num_psy_bins / 2 + j - bin] = value;
 156         }
 157         // now normalize it
 158         for (int j = start_bin; j < end_bin; j++) {
 159             s->spread_table[base_idx + s->num_psy_bins / 2 + j - bin] /= sum;
 160         }
 161
 162         s->spread_table_range[table_index][0] = start_bin - bin;
 163         s->spread_table_range[table_index][1] = end_bin - bin;
 164
 165         if (bin <= 1) {
 166             next_bin = bin + 1;
 167         } else {
 168             if ((bin & (bin - 1)) == 0) {
 169                 // power of 2
 170                 increment = bin / 2;
 171             }
 172
 173             next_bin = bin + increment;
 174         }
 175
 176         // set bins between "bin" and "next_bin" to use this table_index
 177         for (int i = bin; i < next_bin; i++)
 178             s->spread_table_index[i] = table_index;
 179
 180         bin = next_bin;
 181         table_index++;
 182     }
 183 }
 184
 185 static int config_input(AVFilterLink *inlink)
 186 {
 187     AVFilterContext *ctx = inlink->dst;
 188     AudioPsyClipContext *s = ctx->priv;
 189     static const int points[][2] = { {0,14}, {125,14}, {250,16}, {500,18}, {1000,20}, {2000,20}, {4000,20}, {8000,17}, {16000,14}, {20000,-10} };
 190     static const int num_points = 10;
 191     float scale = 1.f;
 192     int ret;
 193
 194     s->fft_size = inlink->sample_rate > 100000 ? 1024 : inlink->sample_rate > 50000 ? 512 : 256;
 195     s->overlap = s->fft_size / 4;
 196
 197     // The psy masking calculation is O(n^2),
 198     // so skip it for frequencies not covered by base sampling rantes (i.e. 44k)
 199     if (inlink->sample_rate <= 50000) {
 200         s->num_psy_bins = s->fft_size / 2;
 201     } else if (inlink->sample_rate <= 100000) {
 202         s->num_psy_bins = s->fft_size / 4;
 203     } else {
 204         s->num_psy_bins = s->fft_size / 8;
 205     }
 206
 207     s->window = av_calloc(s->fft_size, sizeof(*s->window));
 208     s->inv_window = av_calloc(s->fft_size, sizeof(*s->inv_window));
 209     if (!s->window || !s->inv_window)
 210         return AVERROR(ENOMEM);
 211
 212     s->in_buffer      = ff_get_audio_buffer(inlink, s->fft_size * 2);
 213     s->in_frame       = ff_get_audio_buffer(inlink, s->fft_size * 2);
 214     s->out_dist_frame = ff_get_audio_buffer(inlink, s->fft_size * 2);
 215     s->windowed_frame = ff_get_audio_buffer(inlink, s->fft_size * 2);
 216     s->clipping_delta = ff_get_audio_buffer(inlink, s->fft_size * 2);
 217     s->spectrum_buf   = ff_get_audio_buffer(inlink, s->fft_size * 2);
 218     s->mask_curve     = ff_get_audio_buffer(inlink, s->fft_size / 2 + 1);
 219     if (!s->in_buffer || !s->in_frame ||
 220         !s->out_dist_frame || !s->windowed_frame ||
 221         !s->clipping_delta || !s->spectrum_buf || !s->mask_curve)
 222         return AVERROR(ENOMEM);
 223
 224     generate_hann_window(s->window, s->inv_window, s->fft_size);
 225
 226     s->margin_curve = av_calloc(s->fft_size / 2 + 1, sizeof(*s->margin_curve));
 227     if (!s->margin_curve)
 228         return AVERROR(ENOMEM);
 229
 230     s->spread_table_rows = av_log2(s->num_psy_bins) * 2;
 231     s->spread_table = av_calloc(s->spread_table_rows * s->num_psy_bins, sizeof(*s->spread_table));
 232     if (!s->spread_table)
 233         return AVERROR(ENOMEM);
 234
 235     s->spread_table_range = av_calloc(s->spread_table_rows * 2, sizeof(*s->spread_table_range));
 236     if (!s->spread_table_range)
 237         return AVERROR(ENOMEM);
 238
 239     s->spread_table_index = av_calloc(s->num_psy_bins, sizeof(*s->spread_table_index));
 240     if (!s->spread_table_index)
 241         return AVERROR(ENOMEM);
 242
 243     set_margin_curve(s, points, num_points, inlink->sample_rate);
 244
 245     generate_spread_table(s);
 246
 247     s->channels = inlink->ch_layout.nb_channels;
 248
 249     s->tx_ctx = av_calloc(s->channels, sizeof(*s->tx_ctx));
 250     s->itx_ctx = av_calloc(s->channels, sizeof(*s->itx_ctx));
 251     if (!s->tx_ctx || !s->itx_ctx)
 252         return AVERROR(ENOMEM);
 253
 254     for (int ch = 0; ch < s->channels; ch++) {
 255         ret = av_tx_init(&s->tx_ctx[ch], &s->tx_fn, AV_TX_FLOAT_FFT, 0, s->fft_size, &scale, 0);
 256         if (ret < 0)
 257             return ret;
 258
 259         ret = av_tx_init(&s->itx_ctx[ch], &s->itx_fn, AV_TX_FLOAT_FFT, 1, s->fft_size, &scale, 0);
 260         if (ret < 0)
 261             return ret;
 262     }
 263
 264     return 0;
 265 }
 266
 267 static void apply_window(AudioPsyClipContext *s,
 268                          const float *in_frame, float *out_frame, const int add_to_out_frame)
 269 {
 270     const float *window = s->window;
 271
 272     for (int i = 0; i < s->fft_size; i++) {
 273         if (add_to_out_frame) {
 274             out_frame[i] += in_frame[i] * window[i];
 275         } else {
 276             out_frame[i] = in_frame[i] * window[i];
 277         }
 278     }
 279 }
 280
 281 static void calculate_mask_curve(AudioPsyClipContext *s,
 282                                  const float *spectrum, float *mask_curve)
 283 {
 284     for (int i = 0; i < s->fft_size / 2 + 1; i++)
 285         mask_curve[i] = 0;
 286
 287     for (int i = 0; i < s->num_psy_bins; i++) {
 288         int base_idx, start_bin, end_bin, table_idx;
 289         float magnitude;
 290         int range[2];
 291
 292         if (i == 0) {
 293             magnitude = FFABS(spectrum[0]);
 294         } else if (i == s->fft_size / 2) {
 295             magnitude = FFABS(spectrum[s->fft_size]);
 296         } else {
 297             // Because the input signal is real, the + and - frequencies are redundant.
 298             // Multiply the magnitude by 2 to simulate adding up the + and - frequencies.
 299             magnitude = hypotf(spectrum[2 * i], spectrum[2 * i + 1]) * 2;
 300         }
 301
 302         table_idx = s->spread_table_index[i];
 303         range[0] = s->spread_table_range[table_idx][0];
 304         range[1] = s->spread_table_range[table_idx][1];
 305         base_idx = table_idx * s->num_psy_bins;
 306         start_bin = FFMAX(0, i + range[0]);
 307         end_bin = FFMIN(s->num_psy_bins, i + range[1]);
 308
 309         for (int j = start_bin; j < end_bin; j++)
 310             mask_curve[j] += s->spread_table[base_idx + s->num_psy_bins / 2 + j - i] * magnitude;
 311     }
 312
 313     // for ultrasonic frequencies, skip the O(n^2) spread calculation and just copy the magnitude
 314     for (int i = s->num_psy_bins; i < s->fft_size / 2 + 1; i++) {
 315         float magnitude;
 316         if (i == s->fft_size / 2) {
 317             magnitude = FFABS(spectrum[s->fft_size]);
 318         } else {
 319             // Because the input signal is real, the + and - frequencies are redundant.
 320             // Multiply the magnitude by 2 to simulate adding up the + and - frequencies.
 321             magnitude = hypotf(spectrum[2 * i], spectrum[2 * i + 1]) * 2;
 322         }
 323
 324         mask_curve[i] = magnitude;
 325     }
 326
 327     for (int i = 0; i < s->fft_size / 2 + 1; i++)
 328         mask_curve[i] = mask_curve[i] / s->margin_curve[i];
 329 }
 330
 331 static void clip_to_window(AudioPsyClipContext *s,
 332                            const float *windowed_frame, float *clipping_delta, float delta_boost)
 333 {
 334     const float *window = s->window;
 335
 336     for (int i = 0; i < s->fft_size; i++) {
 337         const float limit = s->clip_level * window[i];
 338         const float effective_value = windowed_frame[i] + clipping_delta[i];
 339
 340         if (effective_value > limit) {
 341             clipping_delta[i] += (limit - effective_value) * delta_boost;
 342         } else if (effective_value < -limit) {
 343             clipping_delta[i] += (-limit - effective_value) * delta_boost;
 344         }
 345     }
 346 }
 347
 348 static void limit_clip_spectrum(AudioPsyClipContext *s,
 349                                 float *clip_spectrum, const float *mask_curve)
 350 {
 351     // bin 0
 352     float relative_distortion_level = FFABS(clip_spectrum[0]) / mask_curve[0];
 353
 354     if (relative_distortion_level > 1.f)
 355         clip_spectrum[0] /= relative_distortion_level;
 356
 357     // bin 1..N/2-1
 358     for (int i = 1; i < s->fft_size / 2; i++) {
 359         float real = clip_spectrum[i * 2];
 360         float imag = clip_spectrum[i * 2 + 1];
 361         // Because the input signal is real, the + and - frequencies are redundant.
 362         // Multiply the magnitude by 2 to simulate adding up the + and - frequencies.
 363         relative_distortion_level = hypotf(real, imag) * 2 / mask_curve[i];
 364         if (relative_distortion_level > 1.0) {
 365             clip_spectrum[i * 2] /= relative_distortion_level;
 366             clip_spectrum[i * 2 + 1] /= relative_distortion_level;
 367             clip_spectrum[s->fft_size * 2 - i * 2] /= relative_distortion_level;
 368             clip_spectrum[s->fft_size * 2 - i * 2 + 1] /= relative_distortion_level;
 369         }
 370     }
 371     // bin N/2
 372     relative_distortion_level = FFABS(clip_spectrum[s->fft_size]) / mask_curve[s->fft_size / 2];
 373     if (relative_distortion_level > 1.f)
 374         clip_spectrum[s->fft_size] /= relative_distortion_level;
 375 }
 376
 377 static void r2c(float *buffer, int size)
 378 {
 379     for (int i = size - 1; i >= 0; i--)
 380         buffer[2 * i] = buffer[i];
 381
 382     for (int i = size - 1; i >= 0; i--)
 383         buffer[2 * i + 1] = 0.f;
 384 }
 385
 386 static void c2r(float *buffer, int size)
 387 {
 388     for (int i = 0; i < size; i++)
 389         buffer[i] = buffer[2 * i];
 390
 391     for (int i = 0; i < size; i++)
 392         buffer[i + size] = 0.f;
 393 }
 394
 395 static void feed(AVFilterContext *ctx, int ch,
 396                  const float *in_samples, float *out_samples, int diff_only,
 397                  float *in_frame, float *out_dist_frame,
 398                  float *windowed_frame, float *clipping_delta,
 399                  float *spectrum_buf, float *mask_curve)
 400 {
 401     AudioPsyClipContext *s = ctx->priv;
 402     const float clip_level_inv = 1.f / s->clip_level;
 403     const float level_out = s->level_out;
 404     float orig_peak = 0;
 405     float peak;
 406
 407     // shift in/out buffers
 408     for (int i = 0; i < s->fft_size - s->overlap; i++) {
 409         in_frame[i] = in_frame[i + s->overlap];
 410         out_dist_frame[i] = out_dist_frame[i + s->overlap];
 411     }
 412
 413     for (int i = 0; i < s->overlap; i++) {
 414         in_frame[i + s->fft_size - s->overlap] = in_samples[i];
 415         out_dist_frame[i + s->fft_size - s->overlap] = 0.f;
 416     }
 417
 418     apply_window(s, in_frame, windowed_frame, 0);
 419     r2c(windowed_frame, s->fft_size);
 420     s->tx_fn(s->tx_ctx[ch], spectrum_buf, windowed_frame, sizeof(AVComplexFloat));
 421     c2r(windowed_frame, s->fft_size);
 422     calculate_mask_curve(s, spectrum_buf, mask_curve);
 423
 424     // It would be easier to calculate the peak from the unwindowed input.
 425     // This is just for consistency with the clipped peak calculateion
 426     // because the inv_window zeros out samples on the edge of the window.
 427     for (int i = 0; i < s->fft_size; i++)
 428         orig_peak = FFMAX(orig_peak, FFABS(windowed_frame[i] * s->inv_window[i]));
 429     orig_peak *= clip_level_inv;
 430     peak = orig_peak;
 431
 432     // clear clipping_delta
 433     for (int i = 0; i < s->fft_size * 2; i++)
 434         clipping_delta[i] = 0.f;
 435
 436     // repeat clipping-filtering process a few times to control both the peaks and the spectrum
 437     for (int i = 0; i < s->iterations; i++) {
 438         float mask_curve_shift = 1.122f; // 1.122 is 1dB
 439         // The last 1/3 of rounds have boosted delta to help reach the peak target faster
 440         float delta_boost = 1.f;
 441         if (i >= s->iterations - s->iterations / 3) {
 442             // boosting the delta when largs peaks are still present is dangerous
 443             if (peak < 2.f)
 444                 delta_boost = 2.f;
 445         }
 446
 447         clip_to_window(s, windowed_frame, clipping_delta, delta_boost);
 448
 449         r2c(clipping_delta, s->fft_size);
 450         s->tx_fn(s->tx_ctx[ch], spectrum_buf, clipping_delta, sizeof(AVComplexFloat));
 451
 452         limit_clip_spectrum(s, spectrum_buf, mask_curve);
 453
 454         s->itx_fn(s->itx_ctx[ch], clipping_delta, spectrum_buf, sizeof(AVComplexFloat));
 455         c2r(clipping_delta, s->fft_size);
 456
 457         for (int i = 0; i < s->fft_size; i++)
 458             clipping_delta[i] /= s->fft_size;
 459
 460         peak = 0;
 461         for (int i = 0; i < s->fft_size; i++)
 462             peak = FFMAX(peak, FFABS((windowed_frame[i] + clipping_delta[i]) * s->inv_window[i]));
 463         peak *= clip_level_inv;
 464
 465         // Automatically adjust mask_curve as necessary to reach peak target
 466         if (orig_peak > 1.f && peak > 1.f) {
 467             float diff_achieved = orig_peak - peak;
 468             if (i + 1 < s->iterations - s->iterations / 3 && diff_achieved > 0) {
 469                 float diff_needed = orig_peak - 1.f;
 470                 float diff_ratio = diff_needed / diff_achieved;
 471                 // If a good amount of peak reduction was already achieved,
 472                 // don't shift the mask_curve by the full peak value
 473                 // On the other hand, if only a little peak reduction was achieved,
 474                 // don't shift the mask_curve by the enormous diff_ratio.
 475                 diff_ratio = FFMIN(diff_ratio, peak);
 476                 mask_curve_shift = FFMAX(mask_curve_shift, diff_ratio);
 477             } else {
 478                 // If the peak got higher than the input or we are in the last 1/3 rounds,
 479                 // go back to the heavy-handed peak heuristic.
 480                 mask_curve_shift = FFMAX(mask_curve_shift, peak);
 481             }
 482         }
 483
 484         mask_curve_shift = 1.f + (mask_curve_shift - 1.f) * s->adaptive;
 485
 486         // Be less strict in the next iteration.
 487         // This helps with peak control.
 488         for (int i = 0; i < s->fft_size / 2 + 1; i++)
 489             mask_curve[i] *= mask_curve_shift;
 490     }
 491
 492     // do overlap & add
 493     apply_window(s, clipping_delta, out_dist_frame, 1);
 494
 495     for (int i = 0; i < s->overlap; i++) {
 496         // 4 times overlap with squared hanning window results in 1.5 time increase in amplitude
 497         if (!ctx->is_disabled) {
 498             out_samples[i] = out_dist_frame[i] / 1.5f;
 499             if (!diff_only)
 500                 out_samples[i] += in_frame[i];
 501             if (s->auto_level)
 502                 out_samples[i] *= clip_level_inv;
 503             out_samples[i] *= level_out;
 504         } else {
 505             out_samples[i] = in_frame[i];
 506         }
 507     }
 508 }
 509
 510 static int psy_channel(AVFilterContext *ctx, AVFrame *in, AVFrame *out, int ch)
 511 {
 512     AudioPsyClipContext *s = ctx->priv;
 513     const float *src = (const float *)in->extended_data[ch];
 514     float *in_buffer = (float *)s->in_buffer->extended_data[ch];
 515     float *dst = (float *)out->extended_data[ch];
 516
 517     for (int n = 0; n < s->overlap; n++)
 518         in_buffer[n] = src[n] * s->level_in;
 519
 520     feed(ctx, ch, in_buffer, dst, s->diff_only,
 521          (float *)(s->in_frame->extended_data[ch]),
 522          (float *)(s->out_dist_frame->extended_data[ch]),
 523          (float *)(s->windowed_frame->extended_data[ch]),
 524          (float *)(s->clipping_delta->extended_data[ch]),
 525          (float *)(s->spectrum_buf->extended_data[ch]),
 526          (float *)(s->mask_curve->extended_data[ch]));
 527
 528     return 0;
 529 }
 530
 531 static int psy_channels(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 532 {
 533     AudioPsyClipContext *s = ctx->priv;
 534     AVFrame *out = arg;
 535     const int start = (out->ch_layout.nb_channels * jobnr) / nb_jobs;
 536     const int end = (out->ch_layout.nb_channels * (jobnr+1)) / nb_jobs;
 537
 538     for (int ch = start; ch < end; ch++)
 539         psy_channel(ctx, s->in, out, ch);
 540
 541     return 0;
 542 }
 543
 544 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 545 {
 546     AVFilterContext *ctx = inlink->dst;
 547     AVFilterLink *outlink = ctx->outputs[0];
 548     AudioPsyClipContext *s = ctx->priv;
 549     AVFrame *out;
 550     int ret;
 551
 552     out = ff_get_audio_buffer(outlink, s->overlap);
 553     if (!out) {
 554         ret = AVERROR(ENOMEM);
 555         goto fail;
 556     }
 557
 558     s->in = in;
 559     av_frame_copy_props(out, in);
 560     ff_filter_execute(ctx, psy_channels, out, NULL,
 561                       FFMIN(outlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx)));
 562
 563     out->pts = in->pts;
 564     out->nb_samples = in->nb_samples;
 565     ret = ff_filter_frame(outlink, out);
 566 fail:
 567     av_frame_free(&in);
 568     s->in = NULL;
 569     return ret < 0 ? ret : 0;
 570 }
 571
 572 static int activate(AVFilterContext *ctx)
 573 {
 574     AVFilterLink *inlink = ctx->inputs[0];
 575     AVFilterLink *outlink = ctx->outputs[0];
 576     AudioPsyClipContext *s = ctx->priv;
 577     AVFrame *in = NULL;
 578     int ret = 0, status;
 579     int64_t pts;
 580
 581     FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
 582
 583     ret = ff_inlink_consume_samples(inlink, s->overlap, s->overlap, &in);
 584     if (ret < 0)
 585         return ret;
 586
 587     if (ret > 0) {
 588         return filter_frame(inlink, in);
 589     } else if (ff_inlink_acknowledge_status(inlink, &status, &pts)) {
 590         ff_outlink_set_status(outlink, status, pts);
 591         return 0;
 592     } else {
 593         if (ff_inlink_queued_samples(inlink) >= s->overlap) {
 594             ff_filter_set_ready(ctx, 10);
 595         } else if (ff_outlink_frame_wanted(outlink)) {
 596             ff_inlink_request_frame(inlink);
 597         }
 598         return 0;
 599     }
 600 }
 601
 602 static av_cold void uninit(AVFilterContext *ctx)
 603 {
 604     AudioPsyClipContext *s = ctx->priv;
 605
 606     av_freep(&s->window);
 607     av_freep(&s->inv_window);
 608     av_freep(&s->spread_table);
 609     av_freep(&s->spread_table_range);
 610     av_freep(&s->spread_table_index);
 611     av_freep(&s->margin_curve);
 612
 613     av_frame_free(&s->in_buffer);
 614     av_frame_free(&s->in_frame);
 615     av_frame_free(&s->out_dist_frame);
 616     av_frame_free(&s->windowed_frame);
 617     av_frame_free(&s->clipping_delta);
 618     av_frame_free(&s->spectrum_buf);
 619     av_frame_free(&s->mask_curve);
 620
 621     for (int ch = 0; ch < s->channels; ch++) {
 622         if (s->tx_ctx)
 623             av_tx_uninit(&s->tx_ctx[ch]);
 624         if (s->itx_ctx)
 625             av_tx_uninit(&s->itx_ctx[ch]);
 626     }
 627
 628     av_freep(&s->tx_ctx);
 629     av_freep(&s->itx_ctx);
 630 }
 631
 632 static const AVFilterPad inputs[] = {
 633     {
 634         .name         = "default",
 635         .type         = AVMEDIA_TYPE_AUDIO,
 636         .config_props = config_input,
 637     },
 638 };
 639
 640 const FFFilter ff_af_apsyclip = {
 641     .p.name          = "apsyclip",
 642     .p.description   = NULL_IF_CONFIG_SMALL("Audio Psychoacoustic Clipper."),
 643     .p.priv_class    = &apsyclip_class,
 644     .p.flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
 645                        AVFILTER_FLAG_SLICE_THREADS,
 646     .priv_size       = sizeof(AudioPsyClipContext),
 647     .uninit          = uninit,
 648     FILTER_INPUTS(inputs),
 649     FILTER_OUTPUTS(ff_audio_default_filterpad),
 650     FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_FLTP),
 651     .activate        = activate,
 652     .process_command = ff_filter_process_command,
 653 };