2 * Copyright (c) 2016 Kyle Swanson <k@ylo.ph>.
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 /* http://k.ylo.ph/2016/04/04/loudnorm.html */
23 #include "libavutil/mem.h"
24 #include "libavutil/opt.h"
54 typedef struct LoudNormContext
{
62 double measured_thresh
;
66 enum PrintFormat print_format
;
78 double gain_reduction
[2];
81 int limiter_buf_index
;
83 enum LimiterState limiter_state
;
91 enum FrameType frame_type
;
96 FFEBUR128State
*r128_in
;
97 FFEBUR128State
*r128_out
;
100 #define OFFSET(x) offsetof(LoudNormContext, x)
101 #define FLAGS AV_OPT_FLAG_AUDIO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
103 static const AVOption loudnorm_options
[] = {
104 { "I", "set integrated loudness target", OFFSET(target_i
), AV_OPT_TYPE_DOUBLE
, {.dbl
= -24.}, -70., -5., FLAGS
},
105 { "i", "set integrated loudness target", OFFSET(target_i
), AV_OPT_TYPE_DOUBLE
, {.dbl
= -24.}, -70., -5., FLAGS
},
106 { "LRA", "set loudness range target", OFFSET(target_lra
), AV_OPT_TYPE_DOUBLE
, {.dbl
= 7.}, 1., 50., FLAGS
},
107 { "lra", "set loudness range target", OFFSET(target_lra
), AV_OPT_TYPE_DOUBLE
, {.dbl
= 7.}, 1., 50., FLAGS
},
108 { "TP", "set maximum true peak", OFFSET(target_tp
), AV_OPT_TYPE_DOUBLE
, {.dbl
= -2.}, -9., 0., FLAGS
},
109 { "tp", "set maximum true peak", OFFSET(target_tp
), AV_OPT_TYPE_DOUBLE
, {.dbl
= -2.}, -9., 0., FLAGS
},
110 { "measured_I", "measured IL of input file", OFFSET(measured_i
), AV_OPT_TYPE_DOUBLE
, {.dbl
= 0.}, -99., 0., FLAGS
},
111 { "measured_i", "measured IL of input file", OFFSET(measured_i
), AV_OPT_TYPE_DOUBLE
, {.dbl
= 0.}, -99., 0., FLAGS
},
112 { "measured_LRA", "measured LRA of input file", OFFSET(measured_lra
), AV_OPT_TYPE_DOUBLE
, {.dbl
= 0.}, 0., 99., FLAGS
},
113 { "measured_lra", "measured LRA of input file", OFFSET(measured_lra
), AV_OPT_TYPE_DOUBLE
, {.dbl
= 0.}, 0., 99., FLAGS
},
114 { "measured_TP", "measured true peak of input file", OFFSET(measured_tp
), AV_OPT_TYPE_DOUBLE
, {.dbl
= 99.}, -99., 99., FLAGS
},
115 { "measured_tp", "measured true peak of input file", OFFSET(measured_tp
), AV_OPT_TYPE_DOUBLE
, {.dbl
= 99.}, -99., 99., FLAGS
},
116 { "measured_thresh", "measured threshold of input file", OFFSET(measured_thresh
), AV_OPT_TYPE_DOUBLE
, {.dbl
= -70.}, -99., 0., FLAGS
},
117 { "offset", "set offset gain", OFFSET(offset
), AV_OPT_TYPE_DOUBLE
, {.dbl
= 0.}, -99., 99., FLAGS
},
118 { "linear", "normalize linearly if possible", OFFSET(linear
), AV_OPT_TYPE_BOOL
, {.i64
= 1}, 0, 1, FLAGS
},
119 { "dual_mono", "treat mono input as dual-mono", OFFSET(dual_mono
), AV_OPT_TYPE_BOOL
, {.i64
= 0}, 0, 1, FLAGS
},
120 { "print_format", "set print format for stats", OFFSET(print_format
), AV_OPT_TYPE_INT
, {.i64
= NONE
}, NONE
, PF_NB
-1, FLAGS
, .unit
= "print_format" },
121 { "none", 0, 0, AV_OPT_TYPE_CONST
, {.i64
= NONE
}, 0, 0, FLAGS
, .unit
= "print_format" },
122 { "json", 0, 0, AV_OPT_TYPE_CONST
, {.i64
= JSON
}, 0, 0, FLAGS
, .unit
= "print_format" },
123 { "summary", 0, 0, AV_OPT_TYPE_CONST
, {.i64
= SUMMARY
}, 0, 0, FLAGS
, .unit
= "print_format" },
127 AVFILTER_DEFINE_CLASS(loudnorm
);
129 static inline int frame_size(int sample_rate
, int frame_len_msec
)
131 const int frame_size
= round((double)sample_rate
* (frame_len_msec
/ 1000.0));
132 return frame_size
+ (frame_size
% 2);
135 static void init_gaussian_filter(LoudNormContext
*s
)
137 double total_weight
= 0.0;
138 const double sigma
= 3.5;
142 const int offset
= 21 / 2;
143 const double c1
= 1.0 / (sigma
* sqrt(2.0 * M_PI
));
144 const double c2
= 2.0 * pow(sigma
, 2.0);
146 for (i
= 0; i
< 21; i
++) {
147 const int x
= i
- offset
;
148 s
->weights
[i
] = c1
* exp(-(pow(x
, 2.0) / c2
));
149 total_weight
+= s
->weights
[i
];
152 adjust
= 1.0 / total_weight
;
153 for (i
= 0; i
< 21; i
++)
154 s
->weights
[i
] *= adjust
;
157 static double gaussian_filter(LoudNormContext
*s
, int index
)
162 index
= index
- 10 > 0 ? index
- 10 : index
+ 20;
163 for (i
= 0; i
< 21; i
++)
164 result
+= s
->delta
[((index
+ i
) < 30) ? (index
+ i
) : (index
+ i
- 30)] * s
->weights
[i
];
169 static void detect_peak(LoudNormContext
*s
, int offset
, int nb_samples
, int channels
, int *peak_delta
, double *peak_value
)
176 buf
= s
->limiter_buf
;
177 ceiling
= s
->target_tp
;
179 index
= s
->limiter_buf_index
+ (offset
* channels
) + (1920 * channels
);
180 if (index
>= s
->limiter_buf_size
)
181 index
-= s
->limiter_buf_size
;
183 if (s
->frame_type
== FIRST_FRAME
) {
184 for (c
= 0; c
< channels
; c
++)
185 s
->prev_smp
[c
] = fabs(buf
[index
+ c
- channels
]);
188 for (n
= 0; n
< nb_samples
; n
++) {
189 for (c
= 0; c
< channels
; c
++) {
190 double this, next
, max_peak
;
192 this = fabs(buf
[(index
+ c
) < s
->limiter_buf_size
? (index
+ c
) : (index
+ c
- s
->limiter_buf_size
)]);
193 next
= fabs(buf
[(index
+ c
+ channels
) < s
->limiter_buf_size
? (index
+ c
+ channels
) : (index
+ c
+ channels
- s
->limiter_buf_size
)]);
195 if ((s
->prev_smp
[c
] <= this) && (next
<= this) && (this > ceiling
) && (n
> 0)) {
199 for (i
= 2; i
< 12; i
++) {
200 next
= fabs(buf
[(index
+ c
+ (i
* channels
)) < s
->limiter_buf_size
? (index
+ c
+ (i
* channels
)) : (index
+ c
+ (i
* channels
) - s
->limiter_buf_size
)]);
210 for (c
= 0; c
< channels
; c
++) {
211 if (c
== 0 || fabs(buf
[index
+ c
]) > max_peak
)
212 max_peak
= fabs(buf
[index
+ c
]);
214 s
->prev_smp
[c
] = fabs(buf
[(index
+ c
) < s
->limiter_buf_size
? (index
+ c
) : (index
+ c
- s
->limiter_buf_size
)]);
218 s
->peak_index
= index
;
219 *peak_value
= max_peak
;
223 s
->prev_smp
[c
] = this;
227 if (index
>= s
->limiter_buf_size
)
228 index
-= s
->limiter_buf_size
;
232 static void true_peak_limiter(LoudNormContext
*s
, double *out
, int nb_samples
, int channels
)
234 int n
, c
, index
, peak_delta
, smp_cnt
;
235 double ceiling
, peak_value
;
238 buf
= s
->limiter_buf
;
239 ceiling
= s
->target_tp
;
240 index
= s
->limiter_buf_index
;
243 if (s
->frame_type
== FIRST_FRAME
) {
247 for (n
= 0; n
< 1920; n
++) {
248 for (c
= 0; c
< channels
; c
++) {
249 max
= fabs(buf
[c
]) > max
? fabs(buf
[c
]) : max
;
255 s
->gain_reduction
[1] = ceiling
/ max
;
256 s
->limiter_state
= SUSTAIN
;
257 buf
= s
->limiter_buf
;
259 for (n
= 0; n
< 1920; n
++) {
260 for (c
= 0; c
< channels
; c
++) {
262 env
= s
->gain_reduction
[1];
269 buf
= s
->limiter_buf
;
274 switch(s
->limiter_state
) {
276 detect_peak(s
, smp_cnt
, nb_samples
- smp_cnt
, channels
, &peak_delta
, &peak_value
);
277 if (peak_delta
!= -1) {
279 smp_cnt
+= (peak_delta
- s
->attack_length
);
280 s
->gain_reduction
[0] = 1.;
281 s
->gain_reduction
[1] = ceiling
/ peak_value
;
282 s
->limiter_state
= ATTACK
;
284 s
->env_index
= s
->peak_index
- (s
->attack_length
* channels
);
285 if (s
->env_index
< 0)
286 s
->env_index
+= s
->limiter_buf_size
;
288 s
->env_index
+= (s
->env_cnt
* channels
);
289 if (s
->env_index
> s
->limiter_buf_size
)
290 s
->env_index
-= s
->limiter_buf_size
;
293 smp_cnt
= nb_samples
;
298 for (; s
->env_cnt
< s
->attack_length
; s
->env_cnt
++) {
299 for (c
= 0; c
< channels
; c
++) {
301 env
= s
->gain_reduction
[0] - ((double) s
->env_cnt
/ (s
->attack_length
- 1) * (s
->gain_reduction
[0] - s
->gain_reduction
[1]));
302 buf
[s
->env_index
+ c
] *= env
;
305 s
->env_index
+= channels
;
306 if (s
->env_index
>= s
->limiter_buf_size
)
307 s
->env_index
-= s
->limiter_buf_size
;
310 if (smp_cnt
>= nb_samples
) {
316 if (smp_cnt
< nb_samples
) {
318 s
->attack_length
= 1920;
319 s
->limiter_state
= SUSTAIN
;
324 detect_peak(s
, smp_cnt
, nb_samples
, channels
, &peak_delta
, &peak_value
);
325 if (peak_delta
== -1) {
326 s
->limiter_state
= RELEASE
;
327 s
->gain_reduction
[0] = s
->gain_reduction
[1];
328 s
->gain_reduction
[1] = 1.;
332 double gain_reduction
;
333 gain_reduction
= ceiling
/ peak_value
;
335 if (gain_reduction
< s
->gain_reduction
[1]) {
336 s
->limiter_state
= ATTACK
;
338 s
->attack_length
= peak_delta
;
339 if (s
->attack_length
<= 1)
340 s
->attack_length
= 2;
342 s
->gain_reduction
[0] = s
->gain_reduction
[1];
343 s
->gain_reduction
[1] = gain_reduction
;
348 for (s
->env_cnt
= 0; s
->env_cnt
< peak_delta
; s
->env_cnt
++) {
349 for (c
= 0; c
< channels
; c
++) {
351 env
= s
->gain_reduction
[1];
352 buf
[s
->env_index
+ c
] *= env
;
355 s
->env_index
+= channels
;
356 if (s
->env_index
>= s
->limiter_buf_size
)
357 s
->env_index
-= s
->limiter_buf_size
;
360 if (smp_cnt
>= nb_samples
) {
369 for (; s
->env_cnt
< s
->release_length
; s
->env_cnt
++) {
370 for (c
= 0; c
< channels
; c
++) {
372 env
= s
->gain_reduction
[0] + (((double) s
->env_cnt
/ (s
->release_length
- 1)) * (s
->gain_reduction
[1] - s
->gain_reduction
[0]));
373 buf
[s
->env_index
+ c
] *= env
;
376 s
->env_index
+= channels
;
377 if (s
->env_index
>= s
->limiter_buf_size
)
378 s
->env_index
-= s
->limiter_buf_size
;
381 if (smp_cnt
>= nb_samples
) {
387 if (smp_cnt
< nb_samples
) {
389 s
->limiter_state
= OUT
;
395 } while (smp_cnt
< nb_samples
);
397 for (n
= 0; n
< nb_samples
; n
++) {
398 for (c
= 0; c
< channels
; c
++) {
399 out
[c
] = buf
[index
+ c
];
400 if (fabs(out
[c
]) > ceiling
) {
401 out
[c
] = ceiling
* (out
[c
] < 0 ? -1 : 1);
406 if (index
>= s
->limiter_buf_size
)
407 index
-= s
->limiter_buf_size
;
411 static int filter_frame(AVFilterLink
*inlink
, AVFrame
*in
)
413 AVFilterContext
*ctx
= inlink
->dst
;
414 LoudNormContext
*s
= ctx
->priv
;
415 AVFilterLink
*outlink
= ctx
->outputs
[0];
421 int i
, n
, c
, subframe_length
, src_index
;
422 double gain
, gain_next
, env_global
, env_shortterm
,
423 global
, shortterm
, lra
, relative_threshold
;
425 if (av_frame_is_writable(in
)) {
428 out
= ff_get_audio_buffer(outlink
, in
->nb_samples
);
431 return AVERROR(ENOMEM
);
433 av_frame_copy_props(out
, in
);
436 out
->pts
= s
->pts
[0];
437 memmove(s
->pts
, &s
->pts
[1], (FF_ARRAY_ELEMS(s
->pts
) - 1) * sizeof(s
->pts
[0]));
439 src
= (const double *)in
->data
[0];
440 dst
= (double *)out
->data
[0];
442 limiter_buf
= s
->limiter_buf
;
444 ff_ebur128_add_frames_double(s
->r128_in
, src
, in
->nb_samples
);
446 if (s
->frame_type
== FIRST_FRAME
&& in
->nb_samples
< frame_size(inlink
->sample_rate
, 3000)) {
447 double offset
, offset_tp
, true_peak
;
449 ff_ebur128_loudness_global(s
->r128_in
, &global
);
450 for (c
= 0; c
< inlink
->ch_layout
.nb_channels
; c
++) {
452 ff_ebur128_sample_peak(s
->r128_in
, c
, &tmp
);
453 if (c
== 0 || tmp
> true_peak
)
457 offset
= pow(10., (s
->target_i
- global
) / 20.);
458 offset_tp
= true_peak
* offset
;
459 s
->offset
= offset_tp
< s
->target_tp
? offset
: s
->target_tp
/ true_peak
;
460 s
->frame_type
= LINEAR_MODE
;
463 switch (s
->frame_type
) {
465 for (n
= 0; n
< in
->nb_samples
; n
++) {
466 for (c
= 0; c
< inlink
->ch_layout
.nb_channels
; c
++) {
467 buf
[s
->buf_index
+ c
] = src
[c
];
469 src
+= inlink
->ch_layout
.nb_channels
;
470 s
->buf_index
+= inlink
->ch_layout
.nb_channels
;
473 ff_ebur128_loudness_shortterm(s
->r128_in
, &shortterm
);
475 if (shortterm
< s
->measured_thresh
) {
476 s
->above_threshold
= 0;
477 env_shortterm
= shortterm
<= -70. ? 0. : s
->target_i
- s
->measured_i
;
479 s
->above_threshold
= 1;
480 env_shortterm
= shortterm
<= -70. ? 0. : s
->target_i
- shortterm
;
483 for (n
= 0; n
< 30; n
++)
484 s
->delta
[n
] = pow(10., env_shortterm
/ 20.);
485 s
->prev_delta
= s
->delta
[s
->index
];
488 s
->limiter_buf_index
= 0;
490 for (n
= 0; n
< (s
->limiter_buf_size
/ inlink
->ch_layout
.nb_channels
); n
++) {
491 for (c
= 0; c
< inlink
->ch_layout
.nb_channels
; c
++) {
492 limiter_buf
[s
->limiter_buf_index
+ c
] = buf
[s
->buf_index
+ c
] * s
->delta
[s
->index
] * s
->offset
;
494 s
->limiter_buf_index
+= inlink
->ch_layout
.nb_channels
;
495 if (s
->limiter_buf_index
>= s
->limiter_buf_size
)
496 s
->limiter_buf_index
-= s
->limiter_buf_size
;
498 s
->buf_index
+= inlink
->ch_layout
.nb_channels
;
501 subframe_length
= frame_size(inlink
->sample_rate
, 100);
502 true_peak_limiter(s
, dst
, subframe_length
, inlink
->ch_layout
.nb_channels
);
503 ff_ebur128_add_frames_double(s
->r128_out
, dst
, subframe_length
);
505 out
->nb_samples
= subframe_length
;
507 s
->frame_type
= INNER_FRAME
;
511 gain
= gaussian_filter(s
, s
->index
+ 10 < 30 ? s
->index
+ 10 : s
->index
+ 10 - 30);
512 gain_next
= gaussian_filter(s
, s
->index
+ 11 < 30 ? s
->index
+ 11 : s
->index
+ 11 - 30);
514 for (n
= 0; n
< in
->nb_samples
; n
++) {
515 for (c
= 0; c
< inlink
->ch_layout
.nb_channels
; c
++) {
516 buf
[s
->prev_buf_index
+ c
] = src
[c
];
517 limiter_buf
[s
->limiter_buf_index
+ c
] = buf
[s
->buf_index
+ c
] * (gain
+ (((double) n
/ in
->nb_samples
) * (gain_next
- gain
))) * s
->offset
;
519 src
+= inlink
->ch_layout
.nb_channels
;
521 s
->limiter_buf_index
+= inlink
->ch_layout
.nb_channels
;
522 if (s
->limiter_buf_index
>= s
->limiter_buf_size
)
523 s
->limiter_buf_index
-= s
->limiter_buf_size
;
525 s
->prev_buf_index
+= inlink
->ch_layout
.nb_channels
;
526 if (s
->prev_buf_index
>= s
->buf_size
)
527 s
->prev_buf_index
-= s
->buf_size
;
529 s
->buf_index
+= inlink
->ch_layout
.nb_channels
;
530 if (s
->buf_index
>= s
->buf_size
)
531 s
->buf_index
-= s
->buf_size
;
534 subframe_length
= (frame_size(inlink
->sample_rate
, 100) - in
->nb_samples
) * inlink
->ch_layout
.nb_channels
;
535 s
->limiter_buf_index
= s
->limiter_buf_index
+ subframe_length
< s
->limiter_buf_size
? s
->limiter_buf_index
+ subframe_length
: s
->limiter_buf_index
+ subframe_length
- s
->limiter_buf_size
;
537 true_peak_limiter(s
, dst
, in
->nb_samples
, inlink
->ch_layout
.nb_channels
);
538 ff_ebur128_add_frames_double(s
->r128_out
, dst
, in
->nb_samples
);
540 ff_ebur128_loudness_range(s
->r128_in
, &lra
);
541 ff_ebur128_loudness_global(s
->r128_in
, &global
);
542 ff_ebur128_loudness_shortterm(s
->r128_in
, &shortterm
);
543 ff_ebur128_relative_threshold(s
->r128_in
, &relative_threshold
);
545 if (s
->above_threshold
== 0) {
546 double shortterm_out
;
548 if (shortterm
> s
->measured_thresh
)
549 s
->prev_delta
*= 1.0058;
551 ff_ebur128_loudness_shortterm(s
->r128_out
, &shortterm_out
);
552 if (shortterm_out
>= s
->target_i
)
553 s
->above_threshold
= 1;
556 if (shortterm
< relative_threshold
|| shortterm
<= -70. || s
->above_threshold
== 0) {
557 s
->delta
[s
->index
] = s
->prev_delta
;
559 env_global
= fabs(shortterm
- global
) < (s
->target_lra
/ 2.) ? shortterm
- global
: (s
->target_lra
/ 2.) * ((shortterm
- global
) < 0 ? -1 : 1);
560 env_shortterm
= s
->target_i
- shortterm
;
561 s
->delta
[s
->index
] = pow(10., (env_global
+ env_shortterm
) / 20.);
564 s
->prev_delta
= s
->delta
[s
->index
];
568 s
->prev_nb_samples
= in
->nb_samples
;
572 gain
= gaussian_filter(s
, s
->index
+ 10 < 30 ? s
->index
+ 10 : s
->index
+ 10 - 30);
573 s
->limiter_buf_index
= 0;
576 for (n
= 0; n
< s
->limiter_buf_size
/ inlink
->ch_layout
.nb_channels
; n
++) {
577 for (c
= 0; c
< inlink
->ch_layout
.nb_channels
; c
++) {
578 s
->limiter_buf
[s
->limiter_buf_index
+ c
] = src
[src_index
+ c
] * gain
* s
->offset
;
580 src_index
+= inlink
->ch_layout
.nb_channels
;
582 s
->limiter_buf_index
+= inlink
->ch_layout
.nb_channels
;
583 if (s
->limiter_buf_index
>= s
->limiter_buf_size
)
584 s
->limiter_buf_index
-= s
->limiter_buf_size
;
587 subframe_length
= frame_size(inlink
->sample_rate
, 100);
588 for (i
= 0; i
< in
->nb_samples
/ subframe_length
; i
++) {
589 true_peak_limiter(s
, dst
, subframe_length
, inlink
->ch_layout
.nb_channels
);
591 for (n
= 0; n
< subframe_length
; n
++) {
592 for (c
= 0; c
< inlink
->ch_layout
.nb_channels
; c
++) {
593 if (src_index
< (in
->nb_samples
* inlink
->ch_layout
.nb_channels
)) {
594 limiter_buf
[s
->limiter_buf_index
+ c
] = src
[src_index
+ c
] * gain
* s
->offset
;
596 limiter_buf
[s
->limiter_buf_index
+ c
] = 0.;
600 if (src_index
< (in
->nb_samples
* inlink
->ch_layout
.nb_channels
))
601 src_index
+= inlink
->ch_layout
.nb_channels
;
603 s
->limiter_buf_index
+= inlink
->ch_layout
.nb_channels
;
604 if (s
->limiter_buf_index
>= s
->limiter_buf_size
)
605 s
->limiter_buf_index
-= s
->limiter_buf_size
;
608 dst
+= (subframe_length
* inlink
->ch_layout
.nb_channels
);
611 dst
= (double *)out
->data
[0];
612 ff_ebur128_add_frames_double(s
->r128_out
, dst
, in
->nb_samples
);
616 for (n
= 0; n
< in
->nb_samples
; n
++) {
617 for (c
= 0; c
< inlink
->ch_layout
.nb_channels
; c
++) {
618 dst
[c
] = src
[c
] * s
->offset
;
620 src
+= inlink
->ch_layout
.nb_channels
;
621 dst
+= inlink
->ch_layout
.nb_channels
;
624 dst
= (double *)out
->data
[0];
625 ff_ebur128_add_frames_double(s
->r128_out
, dst
, in
->nb_samples
);
631 return ff_filter_frame(outlink
, out
);
634 static int flush_frame(AVFilterLink
*outlink
)
636 AVFilterContext
*ctx
= outlink
->src
;
637 AVFilterLink
*inlink
= ctx
->inputs
[0];
638 LoudNormContext
*s
= ctx
->priv
;
641 if (s
->frame_type
== INNER_FRAME
) {
644 int nb_samples
, n
, c
, offset
;
647 nb_samples
= (s
->buf_size
/ inlink
->ch_layout
.nb_channels
) - s
->prev_nb_samples
;
648 nb_samples
-= (frame_size(inlink
->sample_rate
, 100) - s
->prev_nb_samples
);
650 frame
= ff_get_audio_buffer(outlink
, nb_samples
);
652 return AVERROR(ENOMEM
);
653 frame
->nb_samples
= nb_samples
;
656 src
= (double *)frame
->data
[0];
658 offset
= ((s
->limiter_buf_size
/ inlink
->ch_layout
.nb_channels
) - s
->prev_nb_samples
) * inlink
->ch_layout
.nb_channels
;
659 offset
-= (frame_size(inlink
->sample_rate
, 100) - s
->prev_nb_samples
) * inlink
->ch_layout
.nb_channels
;
660 s
->buf_index
= s
->buf_index
- offset
< 0 ? s
->buf_index
- offset
+ s
->buf_size
: s
->buf_index
- offset
;
662 for (n
= 0; n
< nb_samples
; n
++) {
663 for (c
= 0; c
< inlink
->ch_layout
.nb_channels
; c
++) {
664 src
[c
] = buf
[s
->buf_index
+ c
];
666 src
+= inlink
->ch_layout
.nb_channels
;
667 s
->buf_index
+= inlink
->ch_layout
.nb_channels
;
668 if (s
->buf_index
>= s
->buf_size
)
669 s
->buf_index
-= s
->buf_size
;
672 s
->frame_type
= FINAL_FRAME
;
673 ret
= filter_frame(inlink
, frame
);
678 static int activate(AVFilterContext
*ctx
)
680 AVFilterLink
*inlink
= ctx
->inputs
[0];
681 AVFilterLink
*outlink
= ctx
->outputs
[0];
682 LoudNormContext
*s
= ctx
->priv
;
687 FF_FILTER_FORWARD_STATUS_BACK(outlink
, inlink
);
689 if (s
->frame_type
!= LINEAR_MODE
) {
692 if (s
->frame_type
== FIRST_FRAME
) {
693 nb_samples
= frame_size(inlink
->sample_rate
, 3000);
695 nb_samples
= frame_size(inlink
->sample_rate
, 100);
698 ret
= ff_inlink_consume_samples(inlink
, nb_samples
, nb_samples
, &in
);
700 ret
= ff_inlink_consume_frame(inlink
, &in
);
706 if (s
->frame_type
== FIRST_FRAME
) {
707 const int nb_samples
= frame_size(inlink
->sample_rate
, 100);
709 for (int i
= 0; i
< FF_ARRAY_ELEMS(s
->pts
); i
++)
710 s
->pts
[i
] = in
->pts
+ i
* nb_samples
;
711 } else if (s
->frame_type
== LINEAR_MODE
) {
714 s
->pts
[FF_ARRAY_ELEMS(s
->pts
) - 1] = in
->pts
;
716 ret
= filter_frame(inlink
, in
);
721 if (ff_inlink_acknowledge_status(inlink
, &status
, &pts
)) {
722 ff_outlink_set_status(outlink
, status
, pts
);
723 return flush_frame(outlink
);
726 FF_FILTER_FORWARD_WANTED(outlink
, inlink
);
728 return FFERROR_NOT_READY
;
731 static int query_formats(const AVFilterContext
*ctx
,
732 AVFilterFormatsConfig
**cfg_in
,
733 AVFilterFormatsConfig
**cfg_out
)
735 LoudNormContext
*s
= ctx
->priv
;
736 static const int input_srate
[] = {192000, -1};
737 static const enum AVSampleFormat sample_fmts
[] = {
743 ret
= ff_set_common_formats_from_list2(ctx
, cfg_in
, cfg_out
, sample_fmts
);
747 if (s
->frame_type
!= LINEAR_MODE
) {
748 return ff_set_common_samplerates_from_list2(ctx
, cfg_in
, cfg_out
, input_srate
);
753 static int config_input(AVFilterLink
*inlink
)
755 AVFilterContext
*ctx
= inlink
->dst
;
756 LoudNormContext
*s
= ctx
->priv
;
758 s
->r128_in
= ff_ebur128_init(inlink
->ch_layout
.nb_channels
, inlink
->sample_rate
, 0, FF_EBUR128_MODE_I
| FF_EBUR128_MODE_S
| FF_EBUR128_MODE_LRA
| FF_EBUR128_MODE_SAMPLE_PEAK
);
760 return AVERROR(ENOMEM
);
762 s
->r128_out
= ff_ebur128_init(inlink
->ch_layout
.nb_channels
, inlink
->sample_rate
, 0, FF_EBUR128_MODE_I
| FF_EBUR128_MODE_S
| FF_EBUR128_MODE_LRA
| FF_EBUR128_MODE_SAMPLE_PEAK
);
764 return AVERROR(ENOMEM
);
766 if (inlink
->ch_layout
.nb_channels
== 1 && s
->dual_mono
) {
767 ff_ebur128_set_channel(s
->r128_in
, 0, FF_EBUR128_DUAL_MONO
);
768 ff_ebur128_set_channel(s
->r128_out
, 0, FF_EBUR128_DUAL_MONO
);
771 s
->buf_size
= frame_size(inlink
->sample_rate
, 3000) * inlink
->ch_layout
.nb_channels
;
772 s
->buf
= av_malloc_array(s
->buf_size
, sizeof(*s
->buf
));
774 return AVERROR(ENOMEM
);
776 s
->limiter_buf_size
= frame_size(inlink
->sample_rate
, 210) * inlink
->ch_layout
.nb_channels
;
777 s
->limiter_buf
= av_malloc_array(s
->buf_size
, sizeof(*s
->limiter_buf
));
779 return AVERROR(ENOMEM
);
781 s
->prev_smp
= av_malloc_array(inlink
->ch_layout
.nb_channels
, sizeof(*s
->prev_smp
));
783 return AVERROR(ENOMEM
);
785 init_gaussian_filter(s
);
789 s
->limiter_buf_index
= 0;
790 s
->channels
= inlink
->ch_layout
.nb_channels
;
792 s
->limiter_state
= OUT
;
793 s
->offset
= pow(10., s
->offset
/ 20.);
794 s
->target_tp
= pow(10., s
->target_tp
/ 20.);
795 s
->attack_length
= frame_size(inlink
->sample_rate
, 10);
796 s
->release_length
= frame_size(inlink
->sample_rate
, 100);
801 static av_cold
int init(AVFilterContext
*ctx
)
803 LoudNormContext
*s
= ctx
->priv
;
804 s
->frame_type
= FIRST_FRAME
;
807 double offset
, offset_tp
;
808 offset
= s
->target_i
- s
->measured_i
;
809 offset_tp
= s
->measured_tp
+ offset
;
811 if (s
->measured_tp
!= 99 && s
->measured_thresh
!= -70 && s
->measured_lra
!= 0 && s
->measured_i
!= 0) {
812 if ((offset_tp
<= s
->target_tp
) && (s
->measured_lra
<= s
->target_lra
)) {
813 s
->frame_type
= LINEAR_MODE
;
822 static av_cold
void uninit(AVFilterContext
*ctx
)
824 LoudNormContext
*s
= ctx
->priv
;
825 double i_in
, i_out
, lra_in
, lra_out
, thresh_in
, thresh_out
, tp_in
, tp_out
;
828 if (!s
->r128_in
|| !s
->r128_out
)
831 ff_ebur128_loudness_range(s
->r128_in
, &lra_in
);
832 ff_ebur128_loudness_global(s
->r128_in
, &i_in
);
833 ff_ebur128_relative_threshold(s
->r128_in
, &thresh_in
);
834 for (c
= 0; c
< s
->channels
; c
++) {
836 ff_ebur128_sample_peak(s
->r128_in
, c
, &tmp
);
837 if ((c
== 0) || (tmp
> tp_in
))
841 ff_ebur128_loudness_range(s
->r128_out
, &lra_out
);
842 ff_ebur128_loudness_global(s
->r128_out
, &i_out
);
843 ff_ebur128_relative_threshold(s
->r128_out
, &thresh_out
);
844 for (c
= 0; c
< s
->channels
; c
++) {
846 ff_ebur128_sample_peak(s
->r128_out
, c
, &tmp
);
847 if ((c
== 0) || (tmp
> tp_out
))
851 switch(s
->print_format
) {
856 av_log(ctx
, AV_LOG_INFO
,
858 "\t\"input_i\" : \"%.2f\",\n"
859 "\t\"input_tp\" : \"%.2f\",\n"
860 "\t\"input_lra\" : \"%.2f\",\n"
861 "\t\"input_thresh\" : \"%.2f\",\n"
862 "\t\"output_i\" : \"%.2f\",\n"
863 "\t\"output_tp\" : \"%+.2f\",\n"
864 "\t\"output_lra\" : \"%.2f\",\n"
865 "\t\"output_thresh\" : \"%.2f\",\n"
866 "\t\"normalization_type\" : \"%s\",\n"
867 "\t\"target_offset\" : \"%.2f\"\n"
877 s
->frame_type
== LINEAR_MODE
? "linear" : "dynamic",
883 av_log(ctx
, AV_LOG_INFO
,
885 "Input Integrated: %+6.1f LUFS\n"
886 "Input True Peak: %+6.1f dBTP\n"
887 "Input LRA: %6.1f LU\n"
888 "Input Threshold: %+6.1f LUFS\n"
890 "Output Integrated: %+6.1f LUFS\n"
891 "Output True Peak: %+6.1f dBTP\n"
892 "Output LRA: %6.1f LU\n"
893 "Output Threshold: %+6.1f LUFS\n"
895 "Normalization Type: %s\n"
896 "Target Offset: %+6.1f LU\n",
905 s
->frame_type
== LINEAR_MODE
? "Linear" : "Dynamic",
913 ff_ebur128_destroy(&s
->r128_in
);
915 ff_ebur128_destroy(&s
->r128_out
);
916 av_freep(&s
->limiter_buf
);
917 av_freep(&s
->prev_smp
);
921 static const AVFilterPad avfilter_af_loudnorm_inputs
[] = {
924 .type
= AVMEDIA_TYPE_AUDIO
,
925 .config_props
= config_input
,
929 const FFFilter ff_af_loudnorm
= {
930 .p
.name
= "loudnorm",
931 .p
.description
= NULL_IF_CONFIG_SMALL("EBU R128 loudness normalization"),
932 .p
.priv_class
= &loudnorm_class
,
933 .priv_size
= sizeof(LoudNormContext
),
935 .activate
= activate
,
937 FILTER_INPUTS(avfilter_af_loudnorm_inputs
),
938 FILTER_OUTPUTS(ff_audio_default_filterpad
),
939 FILTER_QUERY_FUNC2(query_formats
),