2 * Copyright (c) 2021 Paul B Mahol
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "libavutil/mem.h"
25 #include "libavutil/opt.h"
26 #include "libavutil/tx.h"
30 #include "window_func.h"
32 #define MEASURE_ALL UINT_MAX
33 #define MEASURE_NONE 0
34 #define MEASURE_MEAN (1 << 0)
35 #define MEASURE_VARIANCE (1 << 1)
36 #define MEASURE_CENTROID (1 << 2)
37 #define MEASURE_SPREAD (1 << 3)
38 #define MEASURE_SKEWNESS (1 << 4)
39 #define MEASURE_KURTOSIS (1 << 5)
40 #define MEASURE_ENTROPY (1 << 6)
41 #define MEASURE_FLATNESS (1 << 7)
42 #define MEASURE_CREST (1 << 8)
43 #define MEASURE_FLUX (1 << 9)
44 #define MEASURE_SLOPE (1 << 10)
45 #define MEASURE_DECREASE (1 << 11)
46 #define MEASURE_ROLLOFF (1 << 12)
48 typedef struct ChannelSpectralStats
{
62 } ChannelSpectralStats
;
64 typedef struct AudioSpectralStatsContext
{
72 ChannelSpectralStats
*stats
;
73 float *window_func_lut
;
76 AVComplexFloat
**fft_in
;
77 AVComplexFloat
**fft_out
;
78 float **prev_magnitude
;
81 } AudioSpectralStatsContext
;
83 #define OFFSET(x) offsetof(AudioSpectralStatsContext, x)
84 #define A AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
86 static const AVOption aspectralstats_options
[] = {
87 { "win_size", "set the window size", OFFSET(win_size
), AV_OPT_TYPE_INT
, {.i64
=2048}, 32, 65536, A
},
88 WIN_FUNC_OPTION("win_func", OFFSET(win_func
), A
, WFUNC_HANNING
),
89 { "overlap", "set window overlap", OFFSET(overlap
), AV_OPT_TYPE_FLOAT
, {.dbl
=0.5}, 0, 1, A
},
90 { "measure", "select the parameters which are measured", OFFSET(measure
), AV_OPT_TYPE_FLAGS
, {.i64
=MEASURE_ALL
}, 0, UINT_MAX
, A
, .unit
= "measure" },
91 { "none", "", 0, AV_OPT_TYPE_CONST
, {.i64
=MEASURE_NONE
}, 0, 0, A
, .unit
= "measure" },
92 { "all", "", 0, AV_OPT_TYPE_CONST
, {.i64
=MEASURE_ALL
}, 0, 0, A
, .unit
= "measure" },
93 { "mean", "", 0, AV_OPT_TYPE_CONST
, {.i64
=MEASURE_MEAN
}, 0, 0, A
, .unit
= "measure" },
94 { "variance", "", 0, AV_OPT_TYPE_CONST
, {.i64
=MEASURE_VARIANCE
}, 0, 0, A
, .unit
= "measure" },
95 { "centroid", "", 0, AV_OPT_TYPE_CONST
, {.i64
=MEASURE_CENTROID
}, 0, 0, A
, .unit
= "measure" },
96 { "spread", "", 0, AV_OPT_TYPE_CONST
, {.i64
=MEASURE_SPREAD
}, 0, 0, A
, .unit
= "measure" },
97 { "skewness", "", 0, AV_OPT_TYPE_CONST
, {.i64
=MEASURE_SKEWNESS
}, 0, 0, A
, .unit
= "measure" },
98 { "kurtosis", "", 0, AV_OPT_TYPE_CONST
, {.i64
=MEASURE_KURTOSIS
}, 0, 0, A
, .unit
= "measure" },
99 { "entropy", "", 0, AV_OPT_TYPE_CONST
, {.i64
=MEASURE_ENTROPY
}, 0, 0, A
, .unit
= "measure" },
100 { "flatness", "", 0, AV_OPT_TYPE_CONST
, {.i64
=MEASURE_FLATNESS
}, 0, 0, A
, .unit
= "measure" },
101 { "crest", "", 0, AV_OPT_TYPE_CONST
, {.i64
=MEASURE_CREST
}, 0, 0, A
, .unit
= "measure" },
102 { "flux", "", 0, AV_OPT_TYPE_CONST
, {.i64
=MEASURE_FLUX
}, 0, 0, A
, .unit
= "measure" },
103 { "slope", "", 0, AV_OPT_TYPE_CONST
, {.i64
=MEASURE_SLOPE
}, 0, 0, A
, .unit
= "measure" },
104 { "decrease", "", 0, AV_OPT_TYPE_CONST
, {.i64
=MEASURE_DECREASE
}, 0, 0, A
, .unit
= "measure" },
105 { "rolloff", "", 0, AV_OPT_TYPE_CONST
, {.i64
=MEASURE_ROLLOFF
}, 0, 0, A
, .unit
= "measure" },
109 AVFILTER_DEFINE_CLASS(aspectralstats
);
111 static int config_output(AVFilterLink
*outlink
)
113 AudioSpectralStatsContext
*s
= outlink
->src
->priv
;
114 float overlap
, scale
= 1.f
;
117 s
->nb_channels
= outlink
->ch_layout
.nb_channels
;
118 s
->window_func_lut
= av_realloc_f(s
->window_func_lut
, s
->win_size
,
119 sizeof(*s
->window_func_lut
));
120 if (!s
->window_func_lut
)
121 return AVERROR(ENOMEM
);
122 generate_window_func(s
->window_func_lut
, s
->win_size
, s
->win_func
, &overlap
);
123 if (s
->overlap
== 1.f
)
124 s
->overlap
= overlap
;
126 s
->hop_size
= s
->win_size
* (1.f
- s
->overlap
);
127 if (s
->hop_size
<= 0)
128 return AVERROR(EINVAL
);
130 s
->stats
= av_calloc(s
->nb_channels
, sizeof(*s
->stats
));
132 return AVERROR(ENOMEM
);
134 s
->fft
= av_calloc(s
->nb_channels
, sizeof(*s
->fft
));
136 return AVERROR(ENOMEM
);
138 s
->magnitude
= av_calloc(s
->nb_channels
, sizeof(*s
->magnitude
));
140 return AVERROR(ENOMEM
);
142 s
->prev_magnitude
= av_calloc(s
->nb_channels
, sizeof(*s
->prev_magnitude
));
143 if (!s
->prev_magnitude
)
144 return AVERROR(ENOMEM
);
146 s
->fft_in
= av_calloc(s
->nb_channels
, sizeof(*s
->fft_in
));
148 return AVERROR(ENOMEM
);
150 s
->fft_out
= av_calloc(s
->nb_channels
, sizeof(*s
->fft_out
));
152 return AVERROR(ENOMEM
);
154 for (int ch
= 0; ch
< s
->nb_channels
; ch
++) {
155 ret
= av_tx_init(&s
->fft
[ch
], &s
->tx_fn
, AV_TX_FLOAT_FFT
, 0, s
->win_size
, &scale
, 0);
159 s
->fft_in
[ch
] = av_calloc(s
->win_size
, sizeof(**s
->fft_in
));
161 return AVERROR(ENOMEM
);
163 s
->fft_out
[ch
] = av_calloc(s
->win_size
, sizeof(**s
->fft_out
));
165 return AVERROR(ENOMEM
);
167 s
->magnitude
[ch
] = av_calloc(s
->win_size
, sizeof(**s
->magnitude
));
168 if (!s
->magnitude
[ch
])
169 return AVERROR(ENOMEM
);
171 s
->prev_magnitude
[ch
] = av_calloc(s
->win_size
, sizeof(**s
->prev_magnitude
));
172 if (!s
->prev_magnitude
[ch
])
173 return AVERROR(ENOMEM
);
176 s
->window
= ff_get_audio_buffer(outlink
, s
->win_size
);
178 return AVERROR(ENOMEM
);
183 static void set_meta(AVDictionary
**metadata
, int chan
, const char *key
,
184 const char *fmt
, float val
)
189 snprintf(value
, sizeof(value
), fmt
, val
);
191 snprintf(key2
, sizeof(key2
), "lavfi.aspectralstats.%d.%s", chan
, key
);
193 snprintf(key2
, sizeof(key2
), "lavfi.aspectralstats.%s", key
);
194 av_dict_set(metadata
, key2
, value
, 0);
197 static void set_metadata(AudioSpectralStatsContext
*s
, AVDictionary
**metadata
)
199 for (int ch
= 0; ch
< s
->nb_channels
; ch
++) {
200 ChannelSpectralStats
*stats
= &s
->stats
[ch
];
202 if (s
->measure
& MEASURE_MEAN
)
203 set_meta(metadata
, ch
+ 1, "mean", "%g", stats
->mean
);
204 if (s
->measure
& MEASURE_VARIANCE
)
205 set_meta(metadata
, ch
+ 1, "variance", "%g", stats
->variance
);
206 if (s
->measure
& MEASURE_CENTROID
)
207 set_meta(metadata
, ch
+ 1, "centroid", "%g", stats
->centroid
);
208 if (s
->measure
& MEASURE_SPREAD
)
209 set_meta(metadata
, ch
+ 1, "spread", "%g", stats
->spread
);
210 if (s
->measure
& MEASURE_SKEWNESS
)
211 set_meta(metadata
, ch
+ 1, "skewness", "%g", stats
->skewness
);
212 if (s
->measure
& MEASURE_KURTOSIS
)
213 set_meta(metadata
, ch
+ 1, "kurtosis", "%g", stats
->kurtosis
);
214 if (s
->measure
& MEASURE_ENTROPY
)
215 set_meta(metadata
, ch
+ 1, "entropy", "%g", stats
->entropy
);
216 if (s
->measure
& MEASURE_FLATNESS
)
217 set_meta(metadata
, ch
+ 1, "flatness", "%g", stats
->flatness
);
218 if (s
->measure
& MEASURE_CREST
)
219 set_meta(metadata
, ch
+ 1, "crest", "%g", stats
->crest
);
220 if (s
->measure
& MEASURE_FLUX
)
221 set_meta(metadata
, ch
+ 1, "flux", "%g", stats
->flux
);
222 if (s
->measure
& MEASURE_SLOPE
)
223 set_meta(metadata
, ch
+ 1, "slope", "%g", stats
->slope
);
224 if (s
->measure
& MEASURE_DECREASE
)
225 set_meta(metadata
, ch
+ 1, "decrease", "%g", stats
->decrease
);
226 if (s
->measure
& MEASURE_ROLLOFF
)
227 set_meta(metadata
, ch
+ 1, "rolloff", "%g", stats
->rolloff
);
231 static float spectral_mean(const float *const spectral
, int size
, int max_freq
)
235 for (int n
= 0; n
< size
; n
++)
241 static float sqrf(float a
)
246 static float spectral_variance(const float *const spectral
, int size
, int max_freq
, float mean
)
250 for (int n
= 0; n
< size
; n
++)
251 sum
+= sqrf(spectral
[n
] - mean
);
256 static float spectral_centroid(const float *const spectral
, int size
, int max_freq
)
258 const float scale
= max_freq
/ (float)size
;
259 float num
= 0.f
, den
= 0.f
;
261 for (int n
= 0; n
< size
; n
++) {
262 num
+= spectral
[n
] * n
* scale
;
266 if (den
<= FLT_EPSILON
)
271 static float spectral_spread(const float *const spectral
, int size
, int max_freq
, float centroid
)
273 const float scale
= max_freq
/ (float)size
;
274 float num
= 0.f
, den
= 0.f
;
276 for (int n
= 0; n
< size
; n
++) {
277 num
+= spectral
[n
] * sqrf(n
* scale
- centroid
);
281 if (den
<= FLT_EPSILON
)
283 return sqrtf(num
/ den
);
286 static float cbrf(float a
)
291 static float spectral_skewness(const float *const spectral
, int size
, int max_freq
, float centroid
, float spread
)
293 const float scale
= max_freq
/ (float)size
;
294 float num
= 0.f
, den
= 0.f
;
296 for (int n
= 0; n
< size
; n
++) {
297 num
+= spectral
[n
] * cbrf(n
* scale
- centroid
);
302 if (den
<= FLT_EPSILON
)
307 static float spectral_kurtosis(const float *const spectral
, int size
, int max_freq
, float centroid
, float spread
)
309 const float scale
= max_freq
/ (float)size
;
310 float num
= 0.f
, den
= 0.f
;
312 for (int n
= 0; n
< size
; n
++) {
313 num
+= spectral
[n
] * sqrf(sqrf(n
* scale
- centroid
));
317 den
*= sqrf(sqrf(spread
));
318 if (den
<= FLT_EPSILON
)
323 static float spectral_entropy(const float *const spectral
, int size
, int max_freq
)
325 float num
= 0.f
, den
= 0.f
;
327 for (int n
= 0; n
< size
; n
++) {
328 num
+= spectral
[n
] * logf(spectral
[n
] + FLT_EPSILON
);
332 if (den
<= FLT_EPSILON
)
337 static float spectral_flatness(const float *const spectral
, int size
, int max_freq
)
339 float num
= 0.f
, den
= 0.f
;
341 for (int n
= 0; n
< size
; n
++) {
342 float v
= FLT_EPSILON
+ spectral
[n
];
350 if (den
<= FLT_EPSILON
)
355 static float spectral_crest(const float *const spectral
, int size
, int max_freq
)
357 float max
= 0.f
, mean
= 0.f
;
359 for (int n
= 0; n
< size
; n
++) {
360 max
= fmaxf(max
, spectral
[n
]);
365 if (mean
<= FLT_EPSILON
)
370 static float spectral_flux(const float *const spectral
, const float *const prev_spectral
,
371 int size
, int max_freq
)
375 for (int n
= 0; n
< size
; n
++)
376 sum
+= sqrf(spectral
[n
] - prev_spectral
[n
]);
381 static float spectral_slope(const float *const spectral
, int size
, int max_freq
)
383 const float mean_freq
= size
* 0.5f
;
384 float mean_spectral
= 0.f
, num
= 0.f
, den
= 0.f
;
386 for (int n
= 0; n
< size
; n
++)
387 mean_spectral
+= spectral
[n
];
388 mean_spectral
/= size
;
390 for (int n
= 0; n
< size
; n
++) {
391 num
+= ((n
- mean_freq
) / mean_freq
) * (spectral
[n
] - mean_spectral
);
392 den
+= sqrf((n
- mean_freq
) / mean_freq
);
395 if (fabsf(den
) <= FLT_EPSILON
)
400 static float spectral_decrease(const float *const spectral
, int size
, int max_freq
)
402 float num
= 0.f
, den
= 0.f
;
404 for (int n
= 1; n
< size
; n
++) {
405 num
+= (spectral
[n
] - spectral
[0]) / n
;
409 if (den
<= FLT_EPSILON
)
414 static float spectral_rolloff(const float *const spectral
, int size
, int max_freq
)
416 const float scale
= max_freq
/ (float)size
;
417 float norm
= 0.f
, sum
= 0.f
;
420 for (int n
= 0; n
< size
; n
++)
424 for (int n
= 0; n
< size
; n
++) {
435 static int filter_channel(AVFilterContext
*ctx
, void *arg
, int jobnr
, int nb_jobs
)
437 AudioSpectralStatsContext
*s
= ctx
->priv
;
438 const float *window_func_lut
= s
->window_func_lut
;
440 const int channels
= s
->nb_channels
;
441 const int start
= (channels
* jobnr
) / nb_jobs
;
442 const int end
= (channels
* (jobnr
+1)) / nb_jobs
;
443 const int offset
= s
->win_size
- s
->hop_size
;
445 for (int ch
= start
; ch
< end
; ch
++) {
446 float *window
= (float *)s
->window
->extended_data
[ch
];
447 ChannelSpectralStats
*stats
= &s
->stats
[ch
];
448 AVComplexFloat
*fft_out
= s
->fft_out
[ch
];
449 AVComplexFloat
*fft_in
= s
->fft_in
[ch
];
450 float *magnitude
= s
->magnitude
[ch
];
451 float *prev_magnitude
= s
->prev_magnitude
[ch
];
452 const float scale
= 1.f
/ s
->win_size
;
454 memmove(window
, &window
[s
->hop_size
], offset
* sizeof(float));
455 memcpy(&window
[offset
], in
->extended_data
[ch
], in
->nb_samples
* sizeof(float));
456 memset(&window
[offset
+ in
->nb_samples
], 0, (s
->hop_size
- in
->nb_samples
) * sizeof(float));
458 for (int n
= 0; n
< s
->win_size
; n
++) {
459 fft_in
[n
].re
= window
[n
] * window_func_lut
[n
];
463 s
->tx_fn(s
->fft
[ch
], fft_out
, fft_in
, sizeof(*fft_in
));
465 for (int n
= 0; n
< s
->win_size
/ 2; n
++) {
466 fft_out
[n
].re
*= scale
;
467 fft_out
[n
].im
*= scale
;
470 for (int n
= 0; n
< s
->win_size
/ 2; n
++)
471 magnitude
[n
] = hypotf(fft_out
[n
].re
, fft_out
[n
].im
);
473 if (s
->measure
& (MEASURE_MEAN
| MEASURE_VARIANCE
))
474 stats
->mean
= spectral_mean(magnitude
, s
->win_size
/ 2, in
->sample_rate
/ 2);
475 if (s
->measure
& MEASURE_VARIANCE
)
476 stats
->variance
= spectral_variance(magnitude
, s
->win_size
/ 2, in
->sample_rate
/ 2, stats
->mean
);
477 if (s
->measure
& (MEASURE_SPREAD
| MEASURE_KURTOSIS
| MEASURE_SKEWNESS
| MEASURE_CENTROID
))
478 stats
->centroid
= spectral_centroid(magnitude
, s
->win_size
/ 2, in
->sample_rate
/ 2);
479 if (s
->measure
& (MEASURE_SPREAD
| MEASURE_KURTOSIS
| MEASURE_SKEWNESS
))
480 stats
->spread
= spectral_spread(magnitude
, s
->win_size
/ 2, in
->sample_rate
/ 2, stats
->centroid
);
481 if (s
->measure
& MEASURE_SKEWNESS
)
482 stats
->skewness
= spectral_skewness(magnitude
, s
->win_size
/ 2, in
->sample_rate
/ 2, stats
->centroid
, stats
->spread
);
483 if (s
->measure
& MEASURE_KURTOSIS
)
484 stats
->kurtosis
= spectral_kurtosis(magnitude
, s
->win_size
/ 2, in
->sample_rate
/ 2, stats
->centroid
, stats
->spread
);
485 if (s
->measure
& MEASURE_ENTROPY
)
486 stats
->entropy
= spectral_entropy(magnitude
, s
->win_size
/ 2, in
->sample_rate
/ 2);
487 if (s
->measure
& MEASURE_FLATNESS
)
488 stats
->flatness
= spectral_flatness(magnitude
, s
->win_size
/ 2, in
->sample_rate
/ 2);
489 if (s
->measure
& MEASURE_CREST
)
490 stats
->crest
= spectral_crest(magnitude
, s
->win_size
/ 2, in
->sample_rate
/ 2);
491 if (s
->measure
& MEASURE_FLUX
)
492 stats
->flux
= spectral_flux(magnitude
, prev_magnitude
, s
->win_size
/ 2, in
->sample_rate
/ 2);
493 if (s
->measure
& MEASURE_SLOPE
)
494 stats
->slope
= spectral_slope(magnitude
, s
->win_size
/ 2, in
->sample_rate
/ 2);
495 if (s
->measure
& MEASURE_DECREASE
)
496 stats
->decrease
= spectral_decrease(magnitude
, s
->win_size
/ 2, in
->sample_rate
/ 2);
497 if (s
->measure
& MEASURE_ROLLOFF
)
498 stats
->rolloff
= spectral_rolloff(magnitude
, s
->win_size
/ 2, in
->sample_rate
/ 2);
500 memcpy(prev_magnitude
, magnitude
, s
->win_size
* sizeof(float));
506 static int filter_frame(AVFilterLink
*inlink
, AVFrame
*in
)
508 AVFilterContext
*ctx
= inlink
->dst
;
509 AVFilterLink
*outlink
= ctx
->outputs
[0];
510 AudioSpectralStatsContext
*s
= ctx
->priv
;
511 AVDictionary
**metadata
;
515 if (av_frame_is_writable(in
)) {
518 out
= ff_get_audio_buffer(outlink
, in
->nb_samples
);
521 return AVERROR(ENOMEM
);
523 ret
= av_frame_copy_props(out
, in
);
526 ret
= av_frame_copy(out
, in
);
531 metadata
= &out
->metadata
;
532 ff_filter_execute(ctx
, filter_channel
, in
, NULL
,
533 FFMIN(inlink
->ch_layout
.nb_channels
, ff_filter_get_nb_threads(ctx
)));
535 set_metadata(s
, metadata
);
539 return ff_filter_frame(outlink
, out
);
546 static int activate(AVFilterContext
*ctx
)
548 AudioSpectralStatsContext
*s
= ctx
->priv
;
549 AVFilterLink
*outlink
= ctx
->outputs
[0];
550 AVFilterLink
*inlink
= ctx
->inputs
[0];
554 FF_FILTER_FORWARD_STATUS_BACK(outlink
, inlink
);
556 ret
= ff_inlink_consume_samples(inlink
, s
->hop_size
, s
->hop_size
, &in
);
560 ret
= filter_frame(inlink
, in
);
564 if (ff_inlink_queued_samples(inlink
) >= s
->hop_size
) {
565 ff_filter_set_ready(ctx
, 10);
569 FF_FILTER_FORWARD_STATUS(inlink
, outlink
);
570 FF_FILTER_FORWARD_WANTED(outlink
, inlink
);
572 return FFERROR_NOT_READY
;
575 static av_cold
void uninit(AVFilterContext
*ctx
)
577 AudioSpectralStatsContext
*s
= ctx
->priv
;
579 for (int ch
= 0; ch
< s
->nb_channels
; ch
++) {
581 av_tx_uninit(&s
->fft
[ch
]);
583 av_freep(&s
->fft_in
[ch
]);
585 av_freep(&s
->fft_out
[ch
]);
587 av_freep(&s
->magnitude
[ch
]);
588 if (s
->prev_magnitude
)
589 av_freep(&s
->prev_magnitude
[ch
]);
593 av_freep(&s
->magnitude
);
594 av_freep(&s
->prev_magnitude
);
595 av_freep(&s
->fft_in
);
596 av_freep(&s
->fft_out
);
599 av_freep(&s
->window_func_lut
);
600 av_frame_free(&s
->window
);
603 static const AVFilterPad aspectralstats_outputs
[] = {
606 .type
= AVMEDIA_TYPE_AUDIO
,
607 .config_props
= config_output
,
611 const FFFilter ff_af_aspectralstats
= {
612 .p
.name
= "aspectralstats",
613 .p
.description
= NULL_IF_CONFIG_SMALL("Show frequency domain statistics about audio frames."),
614 .p
.priv_class
= &aspectralstats_class
,
615 .p
.flags
= AVFILTER_FLAG_SLICE_THREADS
,
616 .priv_size
= sizeof(AudioSpectralStatsContext
),
618 .activate
= activate
,
619 FILTER_INPUTS(ff_audio_default_filterpad
),
620 FILTER_OUTPUTS(aspectralstats_outputs
),
621 FILTER_SINGLE_SAMPLEFMT(AV_SAMPLE_FMT_FLTP
),