libavfilter/af_afftdn.c

   1 /*
   2  * Copyright (c) 2018 The FFmpeg Project
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include <float.h>
  22
  23 #include "libavutil/avassert.h"
  24 #include "libavutil/avstring.h"
  25 #include "libavutil/channel_layout.h"
  26 #include "libavutil/mem.h"
  27 #include "libavutil/opt.h"
  28 #include "libavutil/tx.h"
  29 #include "avfilter.h"
  30 #include "audio.h"
  31 #include "filters.h"
  32
  33 #define C       (M_LN10 * 0.1)
  34 #define SOLVE_SIZE (5)
  35 #define NB_PROFILE_BANDS (15)
  36
  37 enum SampleNoiseModes {
  38     SAMPLE_NONE,
  39     SAMPLE_START,
  40     SAMPLE_STOP,
  41     NB_SAMPLEMODES
  42 };
  43
  44 enum OutModes {
  45     IN_MODE,
  46     OUT_MODE,
  47     NOISE_MODE,
  48     NB_MODES
  49 };
  50
  51 enum NoiseLinkType {
  52     NONE_LINK,
  53     MIN_LINK,
  54     MAX_LINK,
  55     AVERAGE_LINK,
  56     NB_LINK
  57 };
  58
  59 enum NoiseType {
  60     WHITE_NOISE,
  61     VINYL_NOISE,
  62     SHELLAC_NOISE,
  63     CUSTOM_NOISE,
  64     NB_NOISE
  65 };
  66
  67 typedef struct DeNoiseChannel {
  68     double      band_noise[NB_PROFILE_BANDS];
  69     double      noise_band_auto_var[NB_PROFILE_BANDS];
  70     double      noise_band_sample[NB_PROFILE_BANDS];
  71     double     *amt;
  72     double     *band_amt;
  73     double     *band_excit;
  74     double     *gain;
  75     double     *smoothed_gain;
  76     double     *prior;
  77     double     *prior_band_excit;
  78     double     *clean_data;
  79     double     *noisy_data;
  80     double     *out_samples;
  81     double     *spread_function;
  82     double     *abs_var;
  83     double     *rel_var;
  84     double     *min_abs_var;
  85     void       *fft_in;
  86     void       *fft_out;
  87     AVTXContext *fft, *ifft;
  88     av_tx_fn   tx_fn, itx_fn;
  89
  90     double      noise_band_norm[NB_PROFILE_BANDS];
  91     double      noise_band_avr[NB_PROFILE_BANDS];
  92     double      noise_band_avi[NB_PROFILE_BANDS];
  93     double      noise_band_var[NB_PROFILE_BANDS];
  94
  95     double      noise_reduction;
  96     double      last_noise_reduction;
  97     double      noise_floor;
  98     double      last_noise_floor;
  99     double      residual_floor;
 100     double      last_residual_floor;
 101     double      max_gain;
 102     double      max_var;
 103     double      gain_scale;
 104 } DeNoiseChannel;
 105
 106 typedef struct AudioFFTDeNoiseContext {
 107     const AVClass *class;
 108
 109     int     format;
 110     size_t  sample_size;
 111     size_t  complex_sample_size;
 112
 113     float   noise_reduction;
 114     float   noise_floor;
 115     int     noise_type;
 116     char   *band_noise_str;
 117     float   residual_floor;
 118     int     track_noise;
 119     int     track_residual;
 120     int     output_mode;
 121     int     noise_floor_link;
 122     float   ratio;
 123     int     gain_smooth;
 124     float   band_multiplier;
 125     float   floor_offset;
 126
 127     int     channels;
 128     int     sample_noise;
 129     int     sample_noise_blocks;
 130     int     sample_noise_mode;
 131     float   sample_rate;
 132     int     buffer_length;
 133     int     fft_length;
 134     int     fft_length2;
 135     int     bin_count;
 136     int     window_length;
 137     int     sample_advance;
 138     int     number_of_bands;
 139
 140     int     band_centre[NB_PROFILE_BANDS];
 141
 142     int    *bin2band;
 143     double *window;
 144     double *band_alpha;
 145     double *band_beta;
 146
 147     DeNoiseChannel *dnch;
 148
 149     AVFrame *winframe;
 150
 151     double  window_weight;
 152     double  floor;
 153     double  sample_floor;
 154
 155     int     noise_band_edge[NB_PROFILE_BANDS + 2];
 156     int     noise_band_count;
 157     double  matrix_a[SOLVE_SIZE * SOLVE_SIZE];
 158     double  vector_b[SOLVE_SIZE];
 159     double  matrix_b[SOLVE_SIZE * NB_PROFILE_BANDS];
 160     double  matrix_c[SOLVE_SIZE * NB_PROFILE_BANDS];
 161 } AudioFFTDeNoiseContext;
 162
 163 #define OFFSET(x) offsetof(AudioFFTDeNoiseContext, x)
 164 #define AF  AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
 165 #define AFR AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
 166
 167 static const AVOption afftdn_options[] = {
 168     { "noise_reduction", "set the noise reduction",OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT,{.dbl = 12},   .01, 97, AFR },
 169     { "nr", "set the noise reduction",    OFFSET(noise_reduction), AV_OPT_TYPE_FLOAT,  {.dbl = 12},          .01, 97, AFR },
 170     { "noise_floor", "set the noise floor",OFFSET(noise_floor),    AV_OPT_TYPE_FLOAT,  {.dbl =-50},          -80,-20, AFR },
 171     { "nf", "set the noise floor",        OFFSET(noise_floor),     AV_OPT_TYPE_FLOAT,  {.dbl =-50},          -80,-20, AFR },
 172     { "noise_type", "set the noise type", OFFSET(noise_type),      AV_OPT_TYPE_INT,    {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, .unit = "type" },
 173     { "nt", "set the noise type",         OFFSET(noise_type),      AV_OPT_TYPE_INT,    {.i64 = WHITE_NOISE}, WHITE_NOISE, NB_NOISE-1, AF, .unit = "type" },
 174     {  "white", "white noise",            0,                       AV_OPT_TYPE_CONST,  {.i64 = WHITE_NOISE},   0,  0, AF, .unit = "type" },
 175     {  "w", "white noise",                0,                       AV_OPT_TYPE_CONST,  {.i64 = WHITE_NOISE},   0,  0, AF, .unit = "type" },
 176     {  "vinyl", "vinyl noise",            0,                       AV_OPT_TYPE_CONST,  {.i64 = VINYL_NOISE},   0,  0, AF, .unit = "type" },
 177     {  "v", "vinyl noise",                0,                       AV_OPT_TYPE_CONST,  {.i64 = VINYL_NOISE},   0,  0, AF, .unit = "type" },
 178     {  "shellac", "shellac noise",        0,                       AV_OPT_TYPE_CONST,  {.i64 = SHELLAC_NOISE}, 0,  0, AF, .unit = "type" },
 179     {  "s", "shellac noise",              0,                       AV_OPT_TYPE_CONST,  {.i64 = SHELLAC_NOISE}, 0,  0, AF, .unit = "type" },
 180     {  "custom", "custom noise",          0,                       AV_OPT_TYPE_CONST,  {.i64 = CUSTOM_NOISE},  0,  0, AF, .unit = "type" },
 181     {  "c", "custom noise",               0,                       AV_OPT_TYPE_CONST,  {.i64 = CUSTOM_NOISE},  0,  0, AF, .unit = "type" },
 182     { "band_noise", "set the custom bands noise", OFFSET(band_noise_str),  AV_OPT_TYPE_STRING, {.str = 0},     0,  0, AF },
 183     { "bn", "set the custom bands noise", OFFSET(band_noise_str),  AV_OPT_TYPE_STRING, {.str = 0},             0,  0, AF },
 184     { "residual_floor", "set the residual floor",OFFSET(residual_floor),  AV_OPT_TYPE_FLOAT, {.dbl =-38},    -80,-20, AFR },
 185     { "rf", "set the residual floor",     OFFSET(residual_floor),  AV_OPT_TYPE_FLOAT,  {.dbl =-38},          -80,-20, AFR },
 186     { "track_noise", "track noise",       OFFSET(track_noise),     AV_OPT_TYPE_BOOL,   {.i64 =  0},            0,  1, AFR },
 187     { "tn", "track noise",                OFFSET(track_noise),     AV_OPT_TYPE_BOOL,   {.i64 =  0},            0,  1, AFR },
 188     { "track_residual", "track residual", OFFSET(track_residual),  AV_OPT_TYPE_BOOL,   {.i64 =  0},            0,  1, AFR },
 189     { "tr", "track residual",             OFFSET(track_residual),  AV_OPT_TYPE_BOOL,   {.i64 =  0},            0,  1, AFR },
 190     { "output_mode", "set output mode",   OFFSET(output_mode),     AV_OPT_TYPE_INT,    {.i64 = OUT_MODE},      0,  NB_MODES-1, AFR, .unit = "mode" },
 191     { "om", "set output mode",            OFFSET(output_mode),     AV_OPT_TYPE_INT,    {.i64 = OUT_MODE},      0,  NB_MODES-1, AFR, .unit = "mode" },
 192     {  "input", "input",                  0,                       AV_OPT_TYPE_CONST,  {.i64 = IN_MODE},       0,  0, AFR, .unit = "mode" },
 193     {  "i", "input",                      0,                       AV_OPT_TYPE_CONST,  {.i64 = IN_MODE},       0,  0, AFR, .unit = "mode" },
 194     {  "output", "output",                0,                       AV_OPT_TYPE_CONST,  {.i64 = OUT_MODE},      0,  0, AFR, .unit = "mode" },
 195     {  "o", "output",                     0,                       AV_OPT_TYPE_CONST,  {.i64 = OUT_MODE},      0,  0, AFR, .unit = "mode" },
 196     {  "noise", "noise",                  0,                       AV_OPT_TYPE_CONST,  {.i64 = NOISE_MODE},    0,  0, AFR, .unit = "mode" },
 197     {  "n", "noise",                      0,                       AV_OPT_TYPE_CONST,  {.i64 = NOISE_MODE},    0,  0, AFR, .unit = "mode" },
 198     { "adaptivity", "set adaptivity factor",OFFSET(ratio),         AV_OPT_TYPE_FLOAT,  {.dbl = 0.5},           0,  1, AFR },
 199     { "ad",         "set adaptivity factor",OFFSET(ratio),         AV_OPT_TYPE_FLOAT,  {.dbl = 0.5},           0,  1, AFR },
 200     { "floor_offset", "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0},  -2,  2, AFR },
 201     { "fo",           "set noise floor offset factor",OFFSET(floor_offset), AV_OPT_TYPE_FLOAT, {.dbl = 1.0},  -2,  2, AFR },
 202     { "noise_link", "set the noise floor link",OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK},     0,  NB_LINK-1, AFR, .unit = "link" },
 203     { "nl", "set the noise floor link",        OFFSET(noise_floor_link),AV_OPT_TYPE_INT,{.i64 = MIN_LINK},     0,  NB_LINK-1, AFR, .unit = "link" },
 204     {  "none",    "none",                 0,                       AV_OPT_TYPE_CONST,  {.i64 = NONE_LINK},     0,  0, AFR, .unit = "link" },
 205     {  "min",     "min",                  0,                       AV_OPT_TYPE_CONST,  {.i64 = MIN_LINK},      0,  0, AFR, .unit = "link" },
 206     {  "max",     "max",                  0,                       AV_OPT_TYPE_CONST,  {.i64 = MAX_LINK},      0,  0, AFR, .unit = "link" },
 207     {  "average", "average",              0,                       AV_OPT_TYPE_CONST,  {.i64 = AVERAGE_LINK},  0,  0, AFR, .unit = "link" },
 208     { "band_multiplier", "set band multiplier",OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25},       0.2,5, AF  },
 209     { "bm",       "set band multiplier",       OFFSET(band_multiplier), AV_OPT_TYPE_FLOAT,{.dbl = 1.25},       0.2,5, AF  },
 210     { "sample_noise", "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE},  0,  NB_SAMPLEMODES-1, AFR, .unit = "sample" },
 211     { "sn",           "set sample noise mode",OFFSET(sample_noise_mode),AV_OPT_TYPE_INT,{.i64 = SAMPLE_NONE},  0,  NB_SAMPLEMODES-1, AFR, .unit = "sample" },
 212     {  "none",    "none",                 0,                       AV_OPT_TYPE_CONST,  {.i64 = SAMPLE_NONE},   0,  0, AFR, .unit = "sample" },
 213     {  "start",   "start",                0,                       AV_OPT_TYPE_CONST,  {.i64 = SAMPLE_START},  0,  0, AFR, .unit = "sample" },
 214     {  "begin",   "start",                0,                       AV_OPT_TYPE_CONST,  {.i64 = SAMPLE_START},  0,  0, AFR, .unit = "sample" },
 215     {  "stop",    "stop",                 0,                       AV_OPT_TYPE_CONST,  {.i64 = SAMPLE_STOP},   0,  0, AFR, .unit = "sample" },
 216     {  "end",     "stop",                 0,                       AV_OPT_TYPE_CONST,  {.i64 = SAMPLE_STOP},   0,  0, AFR, .unit = "sample" },
 217     { "gain_smooth", "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT,    {.i64 = 0},             0, 50, AFR },
 218     { "gs",          "set gain smooth radius",OFFSET(gain_smooth), AV_OPT_TYPE_INT,    {.i64 = 0},             0, 50, AFR },
 219     { NULL }
 220 };
 221
 222 AVFILTER_DEFINE_CLASS(afftdn);
 223
 224 static double get_band_noise(AudioFFTDeNoiseContext *s,
 225                              int band, double a,
 226                              double b, double c)
 227 {
 228     double d1, d2, d3;
 229
 230     d1 = a / s->band_centre[band];
 231     d1 = 10.0 * log(1.0 + d1 * d1) / M_LN10;
 232     d2 = b / s->band_centre[band];
 233     d2 = 10.0 * log(1.0 + d2 * d2) / M_LN10;
 234     d3 = s->band_centre[band] / c;
 235     d3 = 10.0 * log(1.0 + d3 * d3) / M_LN10;
 236
 237     return -d1 + d2 - d3;
 238 }
 239
 240 static void factor(double *array, int size)
 241 {
 242     for (int i = 0; i < size - 1; i++) {
 243         for (int j = i + 1; j < size; j++) {
 244             double d = array[j + i * size] / array[i + i * size];
 245
 246             array[j + i * size] = d;
 247             for (int k = i + 1; k < size; k++) {
 248                 array[j + k * size] -= d * array[i + k * size];
 249             }
 250         }
 251     }
 252 }
 253
 254 static void solve(double *matrix, double *vector, int size)
 255 {
 256     for (int i = 0; i < size - 1; i++) {
 257         for (int j = i + 1; j < size; j++) {
 258             double d = matrix[j + i * size];
 259             vector[j] -= d * vector[i];
 260         }
 261     }
 262
 263     vector[size - 1] /= matrix[size * size - 1];
 264
 265     for (int i = size - 2; i >= 0; i--) {
 266         double d = vector[i];
 267         for (int j = i + 1; j < size; j++)
 268             d -= matrix[i + j * size] * vector[j];
 269         vector[i] = d / matrix[i + i * size];
 270     }
 271 }
 272
 273 static double process_get_band_noise(AudioFFTDeNoiseContext *s,
 274                                      DeNoiseChannel *dnch,
 275                                      int band)
 276 {
 277     double product, sum, f;
 278     int i = 0;
 279
 280     if (band < NB_PROFILE_BANDS)
 281         return dnch->band_noise[band];
 282
 283     for (int j = 0; j < SOLVE_SIZE; j++) {
 284         sum = 0.0;
 285         for (int k = 0; k < NB_PROFILE_BANDS; k++)
 286             sum += s->matrix_b[i++] * dnch->band_noise[k];
 287         s->vector_b[j] = sum;
 288     }
 289
 290     solve(s->matrix_a, s->vector_b, SOLVE_SIZE);
 291     f = (0.5 * s->sample_rate) / s->band_centre[NB_PROFILE_BANDS-1];
 292     f = 15.0 + log(f / 1.5) / log(1.5);
 293     sum = 0.0;
 294     product = 1.0;
 295     for (int j = 0; j < SOLVE_SIZE; j++) {
 296         sum += product * s->vector_b[j];
 297         product *= f;
 298     }
 299
 300     return sum;
 301 }
 302
 303 static double limit_gain(double a, double b)
 304 {
 305     if (a > 1.0)
 306         return (b * a - 1.0) / (b + a - 2.0);
 307     if (a < 1.0)
 308         return (b * a - 2.0 * a + 1.0) / (b - a);
 309     return 1.0;
 310 }
 311
 312 static void spectral_flatness(AudioFFTDeNoiseContext *s, const double *const spectral,
 313                               double floor, int len, double *rnum, double *rden)
 314 {
 315     double num = 0., den = 0.;
 316     int size = 0;
 317
 318     for (int n = 0; n < len; n++) {
 319         const double v = spectral[n];
 320         if (v > floor) {
 321             num += log(v);
 322             den += v;
 323             size++;
 324         }
 325     }
 326
 327     size = FFMAX(size, 1);
 328
 329     num /= size;
 330     den /= size;
 331
 332     num = exp(num);
 333
 334     *rnum = num;
 335     *rden = den;
 336 }
 337
 338 static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var);
 339
 340 static double floor_offset(const double *S, int size, double mean)
 341 {
 342     double offset = 0.0;
 343
 344     for (int n = 0; n < size; n++) {
 345         const double p = S[n] - mean;
 346
 347         offset = fmax(offset, fabs(p));
 348     }
 349
 350     return offset / mean;
 351 }
 352
 353 static void process_frame(AVFilterContext *ctx,
 354                           AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch,
 355                           double *prior, double *prior_band_excit, int track_noise)
 356 {
 357     AVFilterLink *outlink = ctx->outputs[0];
 358     FilterLink      *outl = ff_filter_link(outlink);
 359     const double *abs_var = dnch->abs_var;
 360     const double ratio = outl->frame_count_out ? s->ratio : 1.0;
 361     const double rratio = 1. - ratio;
 362     const int *bin2band = s->bin2band;
 363     double *noisy_data = dnch->noisy_data;
 364     double *band_excit = dnch->band_excit;
 365     double *band_amt = dnch->band_amt;
 366     double *smoothed_gain = dnch->smoothed_gain;
 367     AVComplexDouble *fft_data_dbl = dnch->fft_out;
 368     AVComplexFloat *fft_data_flt = dnch->fft_out;
 369     double *gain = dnch->gain;
 370
 371     for (int i = 0; i < s->bin_count; i++) {
 372         double sqr_new_gain, new_gain, power, mag, mag_abs_var, new_mag_abs_var;
 373
 374         switch (s->format) {
 375         case AV_SAMPLE_FMT_FLTP:
 376             noisy_data[i] = mag = hypot(fft_data_flt[i].re, fft_data_flt[i].im);
 377             break;
 378         case AV_SAMPLE_FMT_DBLP:
 379             noisy_data[i] = mag = hypot(fft_data_dbl[i].re, fft_data_dbl[i].im);
 380             break;
 381         default:
 382             av_assert0(0);
 383         }
 384
 385         power = mag * mag;
 386         mag_abs_var = power / abs_var[i];
 387         new_mag_abs_var = ratio * prior[i] + rratio * fmax(mag_abs_var - 1.0, 0.0);
 388         new_gain = new_mag_abs_var / (1.0 + new_mag_abs_var);
 389         sqr_new_gain = new_gain * new_gain;
 390         prior[i] = mag_abs_var * sqr_new_gain;
 391         dnch->clean_data[i] = power * sqr_new_gain;
 392         gain[i] = new_gain;
 393     }
 394
 395     if (track_noise) {
 396         double flatness, num, den;
 397
 398         spectral_flatness(s, noisy_data, s->floor, s->bin_count, &num, &den);
 399
 400         flatness = num / den;
 401         if (flatness > 0.8) {
 402             const double offset = s->floor_offset * floor_offset(noisy_data, s->bin_count, den);
 403             const double new_floor = av_clipd(10.0 * log10(den) - 100.0 + offset, -90., -20.);
 404
 405             dnch->noise_floor = 0.1 * new_floor + dnch->noise_floor * 0.9;
 406             set_parameters(s, dnch, 1, 1);
 407         }
 408     }
 409
 410     for (int i = 0; i < s->number_of_bands; i++) {
 411         band_excit[i] = 0.0;
 412         band_amt[i] = 0.0;
 413     }
 414
 415     for (int i = 0; i < s->bin_count; i++)
 416         band_excit[bin2band[i]] += dnch->clean_data[i];
 417
 418     for (int i = 0; i < s->number_of_bands; i++) {
 419         band_excit[i] = fmax(band_excit[i],
 420                              s->band_alpha[i] * band_excit[i] +
 421                              s->band_beta[i] * prior_band_excit[i]);
 422         prior_band_excit[i] = band_excit[i];
 423     }
 424
 425     for (int j = 0, i = 0; j < s->number_of_bands; j++) {
 426         for (int k = 0; k < s->number_of_bands; k++) {
 427             band_amt[j] += dnch->spread_function[i++] * band_excit[k];
 428         }
 429     }
 430
 431     for (int i = 0; i < s->bin_count; i++)
 432         dnch->amt[i] = band_amt[bin2band[i]];
 433
 434     for (int i = 0; i < s->bin_count; i++) {
 435         if (dnch->amt[i] > abs_var[i]) {
 436             gain[i] = 1.0;
 437         } else if (dnch->amt[i] > dnch->min_abs_var[i]) {
 438             const double limit = sqrt(abs_var[i] / dnch->amt[i]);
 439
 440             gain[i] = limit_gain(gain[i], limit);
 441         } else {
 442             gain[i] = limit_gain(gain[i], dnch->max_gain);
 443         }
 444     }
 445
 446     memcpy(smoothed_gain, gain, s->bin_count * sizeof(*smoothed_gain));
 447     if (s->gain_smooth > 0) {
 448         const int r = s->gain_smooth;
 449
 450         for (int i = r; i < s->bin_count - r; i++) {
 451             const double gc = gain[i];
 452             double num = 0., den = 0.;
 453
 454             for (int j = -r; j <= r; j++) {
 455                 const double g = gain[i + j];
 456                 const double d = 1. - fabs(g - gc);
 457
 458                 num += g * d;
 459                 den += d;
 460             }
 461
 462             smoothed_gain[i] = num / den;
 463         }
 464     }
 465
 466     switch (s->format) {
 467     case AV_SAMPLE_FMT_FLTP:
 468         for (int i = 0; i < s->bin_count; i++) {
 469             const float new_gain = smoothed_gain[i];
 470
 471             fft_data_flt[i].re *= new_gain;
 472             fft_data_flt[i].im *= new_gain;
 473         }
 474         break;
 475     case AV_SAMPLE_FMT_DBLP:
 476         for (int i = 0; i < s->bin_count; i++) {
 477             const double new_gain = smoothed_gain[i];
 478
 479             fft_data_dbl[i].re *= new_gain;
 480             fft_data_dbl[i].im *= new_gain;
 481         }
 482         break;
 483     }
 484 }
 485
 486 static double freq2bark(double x)
 487 {
 488     double d = x / 7500.0;
 489
 490     return 13.0 * atan(7.6E-4 * x) + 3.5 * atan(d * d);
 491 }
 492
 493 static int get_band_centre(AudioFFTDeNoiseContext *s, int band)
 494 {
 495     if (band == -1)
 496         return lrint(s->band_centre[0] / 1.5);
 497
 498     return s->band_centre[band];
 499 }
 500
 501 static int get_band_edge(AudioFFTDeNoiseContext *s, int band)
 502 {
 503     int i;
 504
 505     if (band == NB_PROFILE_BANDS) {
 506         i = lrint(s->band_centre[NB_PROFILE_BANDS - 1] * 1.224745);
 507     } else {
 508         i = lrint(s->band_centre[band] / 1.224745);
 509     }
 510
 511     return FFMIN(i, s->sample_rate / 2);
 512 }
 513
 514 static void set_band_parameters(AudioFFTDeNoiseContext *s,
 515                                 DeNoiseChannel *dnch)
 516 {
 517     double band_noise, d2, d3, d4, d5;
 518     int i = 0, j = 0, k = 0;
 519
 520     d5 = 0.0;
 521     band_noise = process_get_band_noise(s, dnch, 0);
 522     for (int m = j; m < s->bin_count; m++) {
 523         if (m == j) {
 524             i = j;
 525             d5 = band_noise;
 526             if (k >= NB_PROFILE_BANDS) {
 527                 j = s->bin_count;
 528             } else {
 529                 j = s->fft_length * get_band_centre(s, k) / s->sample_rate;
 530             }
 531             d2 = j - i;
 532             band_noise = process_get_band_noise(s, dnch, k);
 533             k++;
 534         }
 535         d3 = (j - m) / d2;
 536         d4 = (m - i) / d2;
 537         dnch->rel_var[m] = exp((d5 * d3 + band_noise * d4) * C);
 538     }
 539
 540     for (i = 0; i < NB_PROFILE_BANDS; i++)
 541         dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
 542 }
 543
 544 static void read_custom_noise(AudioFFTDeNoiseContext *s, int ch)
 545 {
 546     DeNoiseChannel *dnch = &s->dnch[ch];
 547     char *custom_noise_str, *p, *arg, *saveptr = NULL;
 548     double band_noise[NB_PROFILE_BANDS] = { 0.f };
 549     int ret;
 550
 551     if (!s->band_noise_str)
 552         return;
 553
 554     custom_noise_str = p = av_strdup(s->band_noise_str);
 555     if (!p)
 556         return;
 557
 558     for (int i = 0; i < NB_PROFILE_BANDS; i++) {
 559         float noise;
 560
 561         if (!(arg = av_strtok(p, "| ", &saveptr)))
 562             break;
 563
 564         p = NULL;
 565
 566         ret = av_sscanf(arg, "%f", &noise);
 567         if (ret != 1) {
 568             av_log(s, AV_LOG_ERROR, "Custom band noise must be float.\n");
 569             break;
 570         }
 571
 572         band_noise[i] = av_clipd(noise, -24., 24.);
 573     }
 574
 575     av_free(custom_noise_str);
 576     memcpy(dnch->band_noise, band_noise, sizeof(band_noise));
 577 }
 578
 579 static void set_parameters(AudioFFTDeNoiseContext *s, DeNoiseChannel *dnch, int update_var, int update_auto_var)
 580 {
 581     if (dnch->last_noise_floor != dnch->noise_floor)
 582         dnch->last_noise_floor = dnch->noise_floor;
 583
 584     if (s->track_residual)
 585         dnch->last_noise_floor = fmax(dnch->last_noise_floor, dnch->residual_floor);
 586
 587     dnch->max_var = s->floor * exp((100.0 + dnch->last_noise_floor) * C);
 588     if (update_auto_var) {
 589         for (int i = 0; i < NB_PROFILE_BANDS; i++)
 590             dnch->noise_band_auto_var[i] = dnch->max_var * exp((process_get_band_noise(s, dnch, i) - 2.0) * C);
 591     }
 592
 593     if (s->track_residual) {
 594         if (update_var || dnch->last_residual_floor != dnch->residual_floor) {
 595             update_var = 1;
 596             dnch->last_residual_floor = dnch->residual_floor;
 597             dnch->last_noise_reduction = fmax(dnch->last_noise_floor - dnch->last_residual_floor + 100., 0);
 598             dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C));
 599         }
 600     } else if (update_var || dnch->noise_reduction != dnch->last_noise_reduction) {
 601         update_var = 1;
 602         dnch->last_noise_reduction = dnch->noise_reduction;
 603         dnch->last_residual_floor = av_clipd(dnch->last_noise_floor - dnch->last_noise_reduction, -80, -20);
 604         dnch->max_gain = exp(dnch->last_noise_reduction * (0.5 * C));
 605     }
 606
 607     dnch->gain_scale = 1.0 / (dnch->max_gain * dnch->max_gain);
 608
 609     if (update_var) {
 610         set_band_parameters(s, dnch);
 611
 612         for (int i = 0; i < s->bin_count; i++) {
 613             dnch->abs_var[i] = fmax(dnch->max_var * dnch->rel_var[i], 1.0);
 614             dnch->min_abs_var[i] = dnch->gain_scale * dnch->abs_var[i];
 615         }
 616     }
 617 }
 618
 619 static void reduce_mean(double *band_noise)
 620 {
 621     double mean = 0.f;
 622
 623     for (int i = 0; i < NB_PROFILE_BANDS; i++)
 624         mean += band_noise[i];
 625     mean /= NB_PROFILE_BANDS;
 626
 627     for (int i = 0; i < NB_PROFILE_BANDS; i++)
 628         band_noise[i] -= mean;
 629 }
 630
 631 static int config_input(AVFilterLink *inlink)
 632 {
 633     AVFilterContext *ctx = inlink->dst;
 634     AudioFFTDeNoiseContext *s = ctx->priv;
 635     double wscale, sar, sum, sdiv;
 636     int i, j, k, m, n, ret, tx_type;
 637     double dscale = 1.;
 638     float fscale = 1.f;
 639     void *scale;
 640
 641     s->format = inlink->format;
 642
 643     switch (s->format) {
 644     case AV_SAMPLE_FMT_FLTP:
 645         s->sample_size = sizeof(float);
 646         s->complex_sample_size = sizeof(AVComplexFloat);
 647         tx_type = AV_TX_FLOAT_RDFT;
 648         scale = &fscale;
 649         break;
 650     case AV_SAMPLE_FMT_DBLP:
 651         s->sample_size = sizeof(double);
 652         s->complex_sample_size = sizeof(AVComplexDouble);
 653         tx_type = AV_TX_DOUBLE_RDFT;
 654         scale = &dscale;
 655         break;
 656     }
 657
 658     s->dnch = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->dnch));
 659     if (!s->dnch)
 660         return AVERROR(ENOMEM);
 661
 662     s->channels = inlink->ch_layout.nb_channels;
 663     s->sample_rate = inlink->sample_rate;
 664     s->sample_advance = s->sample_rate / 80;
 665     s->window_length = 3 * s->sample_advance;
 666     s->fft_length2 = 1 << (32 - ff_clz(s->window_length));
 667     s->fft_length = s->fft_length2;
 668     s->buffer_length = s->fft_length * 2;
 669     s->bin_count = s->fft_length2 / 2 + 1;
 670
 671     s->band_centre[0] = 80;
 672     for (i = 1; i < NB_PROFILE_BANDS; i++) {
 673         s->band_centre[i] = lrint(1.5 * s->band_centre[i - 1] + 5.0);
 674         if (s->band_centre[i] < 1000) {
 675             s->band_centre[i] = 10 * (s->band_centre[i] / 10);
 676         } else if (s->band_centre[i] < 5000) {
 677             s->band_centre[i] = 50 * ((s->band_centre[i] + 20) / 50);
 678         } else if (s->band_centre[i] < 15000) {
 679             s->band_centre[i] = 100 * ((s->band_centre[i] + 45) / 100);
 680         } else {
 681             s->band_centre[i] = 1000 * ((s->band_centre[i] + 495) / 1000);
 682         }
 683     }
 684
 685     for (j = 0; j < SOLVE_SIZE; j++) {
 686         for (k = 0; k < SOLVE_SIZE; k++) {
 687             s->matrix_a[j + k * SOLVE_SIZE] = 0.0;
 688             for (m = 0; m < NB_PROFILE_BANDS; m++)
 689                 s->matrix_a[j + k * SOLVE_SIZE] += pow(m, j + k);
 690         }
 691     }
 692
 693     factor(s->matrix_a, SOLVE_SIZE);
 694
 695     i = 0;
 696     for (j = 0; j < SOLVE_SIZE; j++)
 697         for (k = 0; k < NB_PROFILE_BANDS; k++)
 698             s->matrix_b[i++] = pow(k, j);
 699
 700     i = 0;
 701     for (j = 0; j < NB_PROFILE_BANDS; j++)
 702         for (k = 0; k < SOLVE_SIZE; k++)
 703             s->matrix_c[i++] = pow(j, k);
 704
 705     s->window = av_calloc(s->window_length, sizeof(*s->window));
 706     s->bin2band = av_calloc(s->bin_count, sizeof(*s->bin2band));
 707     if (!s->window || !s->bin2band)
 708         return AVERROR(ENOMEM);
 709
 710     sdiv = s->band_multiplier;
 711     for (i = 0; i < s->bin_count; i++)
 712         s->bin2band[i] = lrint(sdiv * freq2bark((0.5 * i * s->sample_rate) / s->fft_length2));
 713
 714     s->number_of_bands = s->bin2band[s->bin_count - 1] + 1;
 715
 716     s->band_alpha = av_calloc(s->number_of_bands, sizeof(*s->band_alpha));
 717     s->band_beta = av_calloc(s->number_of_bands, sizeof(*s->band_beta));
 718     if (!s->band_alpha || !s->band_beta)
 719         return AVERROR(ENOMEM);
 720
 721     for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
 722         DeNoiseChannel *dnch = &s->dnch[ch];
 723
 724         switch (s->noise_type) {
 725         case WHITE_NOISE:
 726             for (i = 0; i < NB_PROFILE_BANDS; i++)
 727                 dnch->band_noise[i] = 0.;
 728             break;
 729         case VINYL_NOISE:
 730             for (i = 0; i < NB_PROFILE_BANDS; i++)
 731                 dnch->band_noise[i] = get_band_noise(s, i, 50.0, 500.5, 2125.0);
 732             break;
 733         case SHELLAC_NOISE:
 734             for (i = 0; i < NB_PROFILE_BANDS; i++)
 735                 dnch->band_noise[i] = get_band_noise(s, i, 1.0, 500.0, 1.0E10);
 736             break;
 737         case CUSTOM_NOISE:
 738             read_custom_noise(s, ch);
 739             break;
 740         default:
 741             return AVERROR_BUG;
 742         }
 743
 744         reduce_mean(dnch->band_noise);
 745
 746         dnch->amt = av_calloc(s->bin_count, sizeof(*dnch->amt));
 747         dnch->band_amt = av_calloc(s->number_of_bands, sizeof(*dnch->band_amt));
 748         dnch->band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->band_excit));
 749         dnch->gain = av_calloc(s->bin_count, sizeof(*dnch->gain));
 750         dnch->smoothed_gain = av_calloc(s->bin_count, sizeof(*dnch->smoothed_gain));
 751         dnch->prior = av_calloc(s->bin_count, sizeof(*dnch->prior));
 752         dnch->prior_band_excit = av_calloc(s->number_of_bands, sizeof(*dnch->prior_band_excit));
 753         dnch->clean_data = av_calloc(s->bin_count, sizeof(*dnch->clean_data));
 754         dnch->noisy_data = av_calloc(s->bin_count, sizeof(*dnch->noisy_data));
 755         dnch->out_samples = av_calloc(s->buffer_length, sizeof(*dnch->out_samples));
 756         dnch->abs_var = av_calloc(s->bin_count, sizeof(*dnch->abs_var));
 757         dnch->rel_var = av_calloc(s->bin_count, sizeof(*dnch->rel_var));
 758         dnch->min_abs_var = av_calloc(s->bin_count, sizeof(*dnch->min_abs_var));
 759         dnch->fft_in = av_calloc(s->fft_length2, s->sample_size);
 760         dnch->fft_out = av_calloc(s->fft_length2 + 1, s->complex_sample_size);
 761         ret = av_tx_init(&dnch->fft, &dnch->tx_fn, tx_type, 0, s->fft_length2, scale, 0);
 762         if (ret < 0)
 763             return ret;
 764         ret = av_tx_init(&dnch->ifft, &dnch->itx_fn, tx_type, 1, s->fft_length2, scale, 0);
 765         if (ret < 0)
 766             return ret;
 767         dnch->spread_function = av_calloc(s->number_of_bands * s->number_of_bands,
 768                                           sizeof(*dnch->spread_function));
 769
 770         if (!dnch->amt ||
 771             !dnch->band_amt ||
 772             !dnch->band_excit ||
 773             !dnch->gain ||
 774             !dnch->smoothed_gain ||
 775             !dnch->prior ||
 776             !dnch->prior_band_excit ||
 777             !dnch->clean_data ||
 778             !dnch->noisy_data ||
 779             !dnch->out_samples ||
 780             !dnch->fft_in ||
 781             !dnch->fft_out ||
 782             !dnch->abs_var ||
 783             !dnch->rel_var ||
 784             !dnch->min_abs_var ||
 785             !dnch->spread_function ||
 786             !dnch->fft ||
 787             !dnch->ifft)
 788             return AVERROR(ENOMEM);
 789     }
 790
 791     for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
 792         DeNoiseChannel *dnch = &s->dnch[ch];
 793         double *prior_band_excit = dnch->prior_band_excit;
 794         double min, max;
 795         double p1, p2;
 796
 797         p1 = pow(0.1, 2.5 / sdiv);
 798         p2 = pow(0.1, 1.0 / sdiv);
 799         j = 0;
 800         for (m = 0; m < s->number_of_bands; m++) {
 801             for (n = 0; n < s->number_of_bands; n++) {
 802                 if (n < m) {
 803                     dnch->spread_function[j++] = pow(p2, m - n);
 804                 } else if (n > m) {
 805                     dnch->spread_function[j++] = pow(p1, n - m);
 806                 } else {
 807                     dnch->spread_function[j++] = 1.0;
 808                 }
 809             }
 810         }
 811
 812         for (m = 0; m < s->number_of_bands; m++) {
 813             dnch->band_excit[m] = 0.0;
 814             prior_band_excit[m] = 0.0;
 815         }
 816
 817         for (m = 0; m < s->bin_count; m++)
 818             dnch->band_excit[s->bin2band[m]] += 1.0;
 819
 820         j = 0;
 821         for (m = 0; m < s->number_of_bands; m++) {
 822             for (n = 0; n < s->number_of_bands; n++)
 823                 prior_band_excit[m] += dnch->spread_function[j++] * dnch->band_excit[n];
 824         }
 825
 826         min = pow(0.1, 2.5);
 827         max = pow(0.1, 1.0);
 828         for (int i = 0; i < s->number_of_bands; i++) {
 829             if (i < lrint(12.0 * sdiv)) {
 830                 dnch->band_excit[i] = pow(0.1, 1.45 + 0.1 * i / sdiv);
 831             } else {
 832                 dnch->band_excit[i] = pow(0.1, 2.5 - 0.2 * (i / sdiv - 14.0));
 833             }
 834             dnch->band_excit[i] = av_clipd(dnch->band_excit[i], min, max);
 835         }
 836
 837         for (int i = 0; i < s->buffer_length; i++)
 838             dnch->out_samples[i] = 0;
 839
 840         j = 0;
 841         for (int i = 0; i < s->number_of_bands; i++)
 842             for (int k = 0; k < s->number_of_bands; k++)
 843                 dnch->spread_function[j++] *= dnch->band_excit[i] / prior_band_excit[i];
 844     }
 845
 846     j = 0;
 847     sar = s->sample_advance / s->sample_rate;
 848     for (int i = 0; i < s->bin_count; i++) {
 849         if ((i == s->fft_length2) || (s->bin2band[i] > j)) {
 850             double d6 = (i - 1) * s->sample_rate / s->fft_length;
 851             double d7 = fmin(0.008 + 2.2 / d6, 0.03);
 852             s->band_alpha[j] = exp(-sar / d7);
 853             s->band_beta[j] = 1.0 - s->band_alpha[j];
 854             j = s->bin2band[i];
 855         }
 856     }
 857
 858     s->winframe = ff_get_audio_buffer(inlink, s->window_length);
 859     if (!s->winframe)
 860         return AVERROR(ENOMEM);
 861
 862     wscale = sqrt(8.0 / (9.0 * s->fft_length));
 863     sum = 0.0;
 864     for (int i = 0; i < s->window_length; i++) {
 865         double d10 = sin(i * M_PI / s->window_length);
 866         d10 *= wscale * d10;
 867         s->window[i] = d10;
 868         sum += d10 * d10;
 869     }
 870
 871     s->window_weight = 0.5 * sum;
 872     s->floor = (1LL << 48) * exp(-23.025558369790467) * s->window_weight;
 873     s->sample_floor = s->floor * exp(4.144600506562284);
 874
 875     for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
 876         DeNoiseChannel *dnch = &s->dnch[ch];
 877
 878         dnch->noise_reduction = s->noise_reduction;
 879         dnch->noise_floor     = s->noise_floor;
 880         dnch->residual_floor  = s->residual_floor;
 881
 882         set_parameters(s, dnch, 1, 1);
 883     }
 884
 885     s->noise_band_edge[0] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, 0) / s->sample_rate);
 886     i = 0;
 887     for (int j = 1; j < NB_PROFILE_BANDS + 1; j++) {
 888         s->noise_band_edge[j] = FFMIN(s->fft_length2, s->fft_length * get_band_edge(s, j) / s->sample_rate);
 889         if (s->noise_band_edge[j] > lrint(1.1 * s->noise_band_edge[j - 1]))
 890             i++;
 891         s->noise_band_edge[NB_PROFILE_BANDS + 1] = i;
 892     }
 893     s->noise_band_count = s->noise_band_edge[NB_PROFILE_BANDS + 1];
 894
 895     return 0;
 896 }
 897
 898 static void init_sample_noise(DeNoiseChannel *dnch)
 899 {
 900     for (int i = 0; i < NB_PROFILE_BANDS; i++) {
 901         dnch->noise_band_norm[i] = 0.0;
 902         dnch->noise_band_avr[i] = 0.0;
 903         dnch->noise_band_avi[i] = 0.0;
 904         dnch->noise_band_var[i] = 0.0;
 905     }
 906 }
 907
 908 static void sample_noise_block(AudioFFTDeNoiseContext *s,
 909                                DeNoiseChannel *dnch,
 910                                AVFrame *in, int ch)
 911 {
 912     double *src_dbl = (double *)in->extended_data[ch];
 913     float *src_flt = (float *)in->extended_data[ch];
 914     double mag2, var = 0.0, avr = 0.0, avi = 0.0;
 915     AVComplexDouble *fft_out_dbl = dnch->fft_out;
 916     AVComplexFloat *fft_out_flt = dnch->fft_out;
 917     double *fft_in_dbl = dnch->fft_in;
 918     float *fft_in_flt = dnch->fft_in;
 919     int edge, j, k, n, edgemax;
 920
 921     switch (s->format) {
 922     case AV_SAMPLE_FMT_FLTP:
 923         for (int i = 0; i < s->window_length; i++)
 924             fft_in_flt[i] = s->window[i] * src_flt[i] * (1LL << 23);
 925
 926         for (int i = s->window_length; i < s->fft_length2; i++)
 927             fft_in_flt[i] = 0.f;
 928         break;
 929     case AV_SAMPLE_FMT_DBLP:
 930         for (int i = 0; i < s->window_length; i++)
 931             fft_in_dbl[i] = s->window[i] * src_dbl[i] * (1LL << 23);
 932
 933         for (int i = s->window_length; i < s->fft_length2; i++)
 934             fft_in_dbl[i] = 0.;
 935         break;
 936     }
 937
 938     dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, s->sample_size);
 939
 940     edge = s->noise_band_edge[0];
 941     j = edge;
 942     k = 0;
 943     n = j;
 944     edgemax = fmin(s->fft_length2, s->noise_band_edge[NB_PROFILE_BANDS]);
 945     for (int i = j; i <= edgemax; i++) {
 946         if ((i == j) && (i < edgemax)) {
 947             if (j > edge) {
 948                 dnch->noise_band_norm[k - 1] += j - edge;
 949                 dnch->noise_band_avr[k - 1] += avr;
 950                 dnch->noise_band_avi[k - 1] += avi;
 951                 dnch->noise_band_var[k - 1] += var;
 952             }
 953             k++;
 954             edge = j;
 955             j = s->noise_band_edge[k];
 956             if (k == NB_PROFILE_BANDS) {
 957                 j++;
 958             }
 959             var = 0.0;
 960             avr = 0.0;
 961             avi = 0.0;
 962         }
 963
 964         switch (s->format) {
 965         case AV_SAMPLE_FMT_FLTP:
 966             avr += fft_out_flt[n].re;
 967             avi += fft_out_flt[n].im;
 968             mag2 = fft_out_flt[n].re * fft_out_flt[n].re +
 969                    fft_out_flt[n].im * fft_out_flt[n].im;
 970             break;
 971         case AV_SAMPLE_FMT_DBLP:
 972             avr += fft_out_dbl[n].re;
 973             avi += fft_out_dbl[n].im;
 974             mag2 = fft_out_dbl[n].re * fft_out_dbl[n].re +
 975                    fft_out_dbl[n].im * fft_out_dbl[n].im;
 976             break;
 977         default:
 978             av_assert2(0);
 979         }
 980
 981         mag2 = fmax(mag2, s->sample_floor);
 982
 983         var += mag2;
 984         n++;
 985     }
 986
 987     dnch->noise_band_norm[k - 1] += j - edge;
 988     dnch->noise_band_avr[k - 1] += avr;
 989     dnch->noise_band_avi[k - 1] += avi;
 990     dnch->noise_band_var[k - 1] += var;
 991 }
 992
 993 static void finish_sample_noise(AudioFFTDeNoiseContext *s,
 994                                 DeNoiseChannel *dnch,
 995                                 double *sample_noise)
 996 {
 997     for (int i = 0; i < s->noise_band_count; i++) {
 998         dnch->noise_band_avr[i] /= dnch->noise_band_norm[i];
 999         dnch->noise_band_avi[i] /= dnch->noise_band_norm[i];
1000         dnch->noise_band_var[i] /= dnch->noise_band_norm[i];
1001         dnch->noise_band_var[i] -= dnch->noise_band_avr[i] * dnch->noise_band_avr[i] +
1002                                    dnch->noise_band_avi[i] * dnch->noise_band_avi[i];
1003         dnch->noise_band_auto_var[i] = dnch->noise_band_var[i];
1004         sample_noise[i] = 10.0 * log10(dnch->noise_band_var[i] / s->floor) - 100.0;
1005     }
1006     if (s->noise_band_count < NB_PROFILE_BANDS) {
1007         for (int i = s->noise_band_count; i < NB_PROFILE_BANDS; i++)
1008             sample_noise[i] = sample_noise[i - 1];
1009     }
1010 }
1011
1012 static void set_noise_profile(AudioFFTDeNoiseContext *s,
1013                               DeNoiseChannel *dnch,
1014                               double *sample_noise)
1015 {
1016     double new_band_noise[NB_PROFILE_BANDS];
1017     double temp[NB_PROFILE_BANDS];
1018     double sum = 0.0;
1019
1020     for (int m = 0; m < NB_PROFILE_BANDS; m++)
1021         temp[m] = sample_noise[m];
1022
1023     for (int m = 0, i = 0; m < SOLVE_SIZE; m++) {
1024         sum = 0.0;
1025         for (int n = 0; n < NB_PROFILE_BANDS; n++)
1026             sum += s->matrix_b[i++] * temp[n];
1027         s->vector_b[m] = sum;
1028     }
1029     solve(s->matrix_a, s->vector_b, SOLVE_SIZE);
1030     for (int m = 0, i = 0; m < NB_PROFILE_BANDS; m++) {
1031         sum = 0.0;
1032         for (int n = 0; n < SOLVE_SIZE; n++)
1033             sum += s->matrix_c[i++] * s->vector_b[n];
1034         temp[m] = sum;
1035     }
1036
1037     reduce_mean(temp);
1038
1039     av_log(s, AV_LOG_INFO, "bn=");
1040     for (int m = 0; m < NB_PROFILE_BANDS; m++) {
1041         new_band_noise[m] = temp[m];
1042         new_band_noise[m] = av_clipd(new_band_noise[m], -24.0, 24.0);
1043         av_log(s, AV_LOG_INFO, "%f ", new_band_noise[m]);
1044     }
1045     av_log(s, AV_LOG_INFO, "\n");
1046     memcpy(dnch->band_noise, new_band_noise, sizeof(new_band_noise));
1047 }
1048
1049 static int filter_channel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
1050 {
1051     AudioFFTDeNoiseContext *s = ctx->priv;
1052     AVFrame *in = arg;
1053     const int start = (in->ch_layout.nb_channels * jobnr) / nb_jobs;
1054     const int end = (in->ch_layout.nb_channels * (jobnr+1)) / nb_jobs;
1055     const int window_length = s->window_length;
1056     const double *window = s->window;
1057
1058     for (int ch = start; ch < end; ch++) {
1059         DeNoiseChannel *dnch = &s->dnch[ch];
1060         const double *src_dbl = (const double *)in->extended_data[ch];
1061         const float *src_flt = (const float *)in->extended_data[ch];
1062         double *dst = dnch->out_samples;
1063         double *fft_in_dbl = dnch->fft_in;
1064         float *fft_in_flt = dnch->fft_in;
1065
1066         switch (s->format) {
1067         case AV_SAMPLE_FMT_FLTP:
1068             for (int m = 0; m < window_length; m++)
1069                 fft_in_flt[m] = window[m] * src_flt[m] * (1LL << 23);
1070
1071             for (int m = window_length; m < s->fft_length2; m++)
1072                 fft_in_flt[m] = 0.f;
1073             break;
1074         case AV_SAMPLE_FMT_DBLP:
1075             for (int m = 0; m < window_length; m++)
1076                 fft_in_dbl[m] = window[m] * src_dbl[m] * (1LL << 23);
1077
1078             for (int m = window_length; m < s->fft_length2; m++)
1079                 fft_in_dbl[m] = 0.;
1080             break;
1081         }
1082
1083         dnch->tx_fn(dnch->fft, dnch->fft_out, dnch->fft_in, s->sample_size);
1084
1085         process_frame(ctx, s, dnch,
1086                       dnch->prior,
1087                       dnch->prior_band_excit,
1088                       s->track_noise);
1089
1090         dnch->itx_fn(dnch->ifft, dnch->fft_in, dnch->fft_out, s->complex_sample_size);
1091
1092         switch (s->format) {
1093         case AV_SAMPLE_FMT_FLTP:
1094             for (int m = 0; m < window_length; m++)
1095                 dst[m] += s->window[m] * fft_in_flt[m] / (1LL << 23);
1096             break;
1097         case AV_SAMPLE_FMT_DBLP:
1098             for (int m = 0; m < window_length; m++)
1099                 dst[m] += s->window[m] * fft_in_dbl[m] / (1LL << 23);
1100             break;
1101         }
1102     }
1103
1104     return 0;
1105 }
1106
1107 static int output_frame(AVFilterLink *inlink, AVFrame *in)
1108 {
1109     AVFilterContext *ctx = inlink->dst;
1110     AVFilterLink *outlink = ctx->outputs[0];
1111     AudioFFTDeNoiseContext *s = ctx->priv;
1112     const int output_mode = ctx->is_disabled ? IN_MODE : s->output_mode;
1113     const int offset = s->window_length - s->sample_advance;
1114     AVFrame *out;
1115
1116     for (int ch = 0; ch < s->channels; ch++) {
1117         uint8_t *src = (uint8_t *)s->winframe->extended_data[ch];
1118
1119         memmove(src, src + s->sample_advance * s->sample_size,
1120                 offset * s->sample_size);
1121         memcpy(src + offset * s->sample_size, in->extended_data[ch],
1122                in->nb_samples * s->sample_size);
1123         memset(src + s->sample_size * (offset + in->nb_samples), 0,
1124                (s->sample_advance - in->nb_samples) * s->sample_size);
1125     }
1126
1127     if (s->track_noise) {
1128         double average = 0.0, min = DBL_MAX, max = -DBL_MAX;
1129
1130         for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1131             DeNoiseChannel *dnch = &s->dnch[ch];
1132
1133             average += dnch->noise_floor;
1134             max = fmax(max, dnch->noise_floor);
1135             min = fmin(min, dnch->noise_floor);
1136         }
1137
1138         average /= inlink->ch_layout.nb_channels;
1139
1140         for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1141             DeNoiseChannel *dnch = &s->dnch[ch];
1142
1143             switch (s->noise_floor_link) {
1144             case MIN_LINK:     dnch->noise_floor = min;     break;
1145             case MAX_LINK:     dnch->noise_floor = max;     break;
1146             case AVERAGE_LINK: dnch->noise_floor = average; break;
1147             case NONE_LINK:
1148             default:
1149                 break;
1150             }
1151
1152             if (dnch->noise_floor != dnch->last_noise_floor)
1153                 set_parameters(s, dnch, 1, 0);
1154         }
1155     }
1156
1157     if (s->sample_noise_mode == SAMPLE_START) {
1158         for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1159             DeNoiseChannel *dnch = &s->dnch[ch];
1160
1161             init_sample_noise(dnch);
1162         }
1163         s->sample_noise_mode = SAMPLE_NONE;
1164         s->sample_noise = 1;
1165         s->sample_noise_blocks = 0;
1166     }
1167
1168     if (s->sample_noise) {
1169         for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1170             DeNoiseChannel *dnch = &s->dnch[ch];
1171
1172             sample_noise_block(s, dnch, s->winframe, ch);
1173         }
1174         s->sample_noise_blocks++;
1175     }
1176
1177     if (s->sample_noise_mode == SAMPLE_STOP) {
1178         for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1179             DeNoiseChannel *dnch = &s->dnch[ch];
1180             double sample_noise[NB_PROFILE_BANDS];
1181
1182             if (s->sample_noise_blocks <= 0)
1183                 break;
1184             finish_sample_noise(s, dnch, sample_noise);
1185             set_noise_profile(s, dnch, sample_noise);
1186             set_parameters(s, dnch, 1, 1);
1187         }
1188         s->sample_noise = 0;
1189         s->sample_noise_blocks = 0;
1190         s->sample_noise_mode = SAMPLE_NONE;
1191     }
1192
1193     ff_filter_execute(ctx, filter_channel, s->winframe, NULL,
1194                       FFMIN(outlink->ch_layout.nb_channels, ff_filter_get_nb_threads(ctx)));
1195
1196     if (av_frame_is_writable(in)) {
1197         out = in;
1198     } else {
1199         out = ff_get_audio_buffer(outlink, in->nb_samples);
1200         if (!out) {
1201             av_frame_free(&in);
1202             return AVERROR(ENOMEM);
1203         }
1204
1205         av_frame_copy_props(out, in);
1206     }
1207
1208     for (int ch = 0; ch < inlink->ch_layout.nb_channels; ch++) {
1209         DeNoiseChannel *dnch = &s->dnch[ch];
1210         double *src = dnch->out_samples;
1211         const double *orig_dbl = (const double *)s->winframe->extended_data[ch];
1212         const float *orig_flt = (const float *)s->winframe->extended_data[ch];
1213         double *dst_dbl = (double *)out->extended_data[ch];
1214         float *dst_flt = (float *)out->extended_data[ch];
1215
1216         switch (output_mode) {
1217         case IN_MODE:
1218             switch (s->format) {
1219             case AV_SAMPLE_FMT_FLTP:
1220                 for (int m = 0; m < out->nb_samples; m++)
1221                     dst_flt[m] = orig_flt[m];
1222                 break;
1223             case AV_SAMPLE_FMT_DBLP:
1224                 for (int m = 0; m < out->nb_samples; m++)
1225                     dst_dbl[m] = orig_dbl[m];
1226                 break;
1227             }
1228             break;
1229         case OUT_MODE:
1230             switch (s->format) {
1231             case AV_SAMPLE_FMT_FLTP:
1232                 for (int m = 0; m < out->nb_samples; m++)
1233                     dst_flt[m] = src[m];
1234                 break;
1235             case AV_SAMPLE_FMT_DBLP:
1236                 for (int m = 0; m < out->nb_samples; m++)
1237                     dst_dbl[m] = src[m];
1238                 break;
1239             }
1240             break;
1241         case NOISE_MODE:
1242             switch (s->format) {
1243             case AV_SAMPLE_FMT_FLTP:
1244                 for (int m = 0; m < out->nb_samples; m++)
1245                     dst_flt[m] = orig_flt[m] - src[m];
1246                 break;
1247             case AV_SAMPLE_FMT_DBLP:
1248                 for (int m = 0; m < out->nb_samples; m++)
1249                     dst_dbl[m] = orig_dbl[m] - src[m];
1250                 break;
1251             }
1252             break;
1253         default:
1254             if (in != out)
1255                 av_frame_free(&in);
1256             av_frame_free(&out);
1257             return AVERROR_BUG;
1258         }
1259
1260         memmove(src, src + s->sample_advance, (s->window_length - s->sample_advance) * sizeof(*src));
1261         memset(src + (s->window_length - s->sample_advance), 0, s->sample_advance * sizeof(*src));
1262     }
1263
1264     if (out != in)
1265         av_frame_free(&in);
1266     return ff_filter_frame(outlink, out);
1267 }
1268
1269 static int activate(AVFilterContext *ctx)
1270 {
1271     AVFilterLink *inlink = ctx->inputs[0];
1272     AVFilterLink *outlink = ctx->outputs[0];
1273     AudioFFTDeNoiseContext *s = ctx->priv;
1274     AVFrame *in = NULL;
1275     int ret;
1276
1277     FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
1278
1279     ret = ff_inlink_consume_samples(inlink, s->sample_advance, s->sample_advance, &in);
1280     if (ret < 0)
1281         return ret;
1282     if (ret > 0)
1283         return output_frame(inlink, in);
1284
1285     if (ff_inlink_queued_samples(inlink) >= s->sample_advance) {
1286         ff_filter_set_ready(ctx, 10);
1287         return 0;
1288     }
1289
1290     FF_FILTER_FORWARD_STATUS(inlink, outlink);
1291     FF_FILTER_FORWARD_WANTED(outlink, inlink);
1292
1293     return FFERROR_NOT_READY;
1294 }
1295
1296 static av_cold void uninit(AVFilterContext *ctx)
1297 {
1298     AudioFFTDeNoiseContext *s = ctx->priv;
1299
1300     av_freep(&s->window);
1301     av_freep(&s->bin2band);
1302     av_freep(&s->band_alpha);
1303     av_freep(&s->band_beta);
1304     av_frame_free(&s->winframe);
1305
1306     if (s->dnch) {
1307         for (int ch = 0; ch < s->channels; ch++) {
1308             DeNoiseChannel *dnch = &s->dnch[ch];
1309             av_freep(&dnch->amt);
1310             av_freep(&dnch->band_amt);
1311             av_freep(&dnch->band_excit);
1312             av_freep(&dnch->gain);
1313             av_freep(&dnch->smoothed_gain);
1314             av_freep(&dnch->prior);
1315             av_freep(&dnch->prior_band_excit);
1316             av_freep(&dnch->clean_data);
1317             av_freep(&dnch->noisy_data);
1318             av_freep(&dnch->out_samples);
1319             av_freep(&dnch->spread_function);
1320             av_freep(&dnch->abs_var);
1321             av_freep(&dnch->rel_var);
1322             av_freep(&dnch->min_abs_var);
1323             av_freep(&dnch->fft_in);
1324             av_freep(&dnch->fft_out);
1325             av_tx_uninit(&dnch->fft);
1326             av_tx_uninit(&dnch->ifft);
1327         }
1328         av_freep(&s->dnch);
1329     }
1330 }
1331
1332 static int process_command(AVFilterContext *ctx, const char *cmd, const char *args,
1333                            char *res, int res_len, int flags)
1334 {
1335     AudioFFTDeNoiseContext *s = ctx->priv;
1336     int ret = 0;
1337
1338     ret = ff_filter_process_command(ctx, cmd, args, res, res_len, flags);
1339     if (ret < 0)
1340         return ret;
1341
1342     if (!strcmp(cmd, "sample_noise") || !strcmp(cmd, "sn"))
1343         return 0;
1344
1345     for (int ch = 0; ch < s->channels; ch++) {
1346         DeNoiseChannel *dnch = &s->dnch[ch];
1347
1348         dnch->noise_reduction = s->noise_reduction;
1349         dnch->noise_floor     = s->noise_floor;
1350         dnch->residual_floor  = s->residual_floor;
1351
1352         set_parameters(s, dnch, 1, 1);
1353     }
1354
1355     return 0;
1356 }
1357
1358 static const AVFilterPad inputs[] = {
1359     {
1360         .name         = "default",
1361         .type         = AVMEDIA_TYPE_AUDIO,
1362         .config_props = config_input,
1363     },
1364 };
1365
1366 const FFFilter ff_af_afftdn = {
1367     .p.name          = "afftdn",
1368     .p.description   = NULL_IF_CONFIG_SMALL("Denoise audio samples using FFT."),
1369     .p.priv_class    = &afftdn_class,
1370     .p.flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
1371                        AVFILTER_FLAG_SLICE_THREADS,
1372     .priv_size       = sizeof(AudioFFTDeNoiseContext),
1373     .activate        = activate,
1374     .uninit          = uninit,
1375     FILTER_INPUTS(inputs),
1376     FILTER_OUTPUTS(ff_audio_default_filterpad),
1377     FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_DBLP),
1378     .process_command = process_command,
1379 };