2 * Copyright (c) 2012 Clément Bœsch <u pkh me>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 * Audio silence detector
26 #include <float.h> /* DBL_MAX */
28 #include "libavutil/mem.h"
29 #include "libavutil/opt.h"
30 #include "libavutil/timestamp.h"
35 typedef struct SilenceDetectContext
{
37 double noise
; ///< noise amplitude ratio
38 int64_t duration
; ///< minimum duration of silence until notification
39 int mono
; ///< mono mode : check each channel separately (default = check when ALL channels are silent)
40 int channels
; ///< number of channels
41 int independent_channels
; ///< number of entries in following arrays (always 1 in mono mode)
42 int64_t *nb_null_samples
; ///< (array) current number of continuous zero samples
43 int64_t *start
; ///< (array) if silence is detected, this value contains the time of the first zero sample (default/unset = INT64_MIN)
44 int64_t frame_end
; ///< pts of the end of the current frame (used to compute duration of silence at EOS)
45 int last_sample_rate
; ///< last sample rate to check for sample rate changes
46 AVRational time_base
; ///< time_base
48 void (*silencedetect
)(struct SilenceDetectContext
*s
, AVFrame
*insamples
,
49 int nb_samples
, int64_t nb_samples_notify
,
50 AVRational time_base
);
51 } SilenceDetectContext
;
53 #define MAX_DURATION (24*3600*1000000LL)
54 #define OFFSET(x) offsetof(SilenceDetectContext, x)
55 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_AUDIO_PARAM
56 static const AVOption silencedetect_options
[] = {
57 { "n", "set noise tolerance", OFFSET(noise
), AV_OPT_TYPE_DOUBLE
, {.dbl
=0.001}, 0, DBL_MAX
, FLAGS
},
58 { "noise", "set noise tolerance", OFFSET(noise
), AV_OPT_TYPE_DOUBLE
, {.dbl
=0.001}, 0, DBL_MAX
, FLAGS
},
59 { "d", "set minimum duration in seconds", OFFSET(duration
), AV_OPT_TYPE_DURATION
, {.i64
=2000000}, 0, MAX_DURATION
,FLAGS
},
60 { "duration", "set minimum duration in seconds", OFFSET(duration
), AV_OPT_TYPE_DURATION
, {.i64
=2000000}, 0, MAX_DURATION
,FLAGS
},
61 { "mono", "check each channel separately", OFFSET(mono
), AV_OPT_TYPE_BOOL
, {.i64
=0}, 0, 1, FLAGS
},
62 { "m", "check each channel separately", OFFSET(mono
), AV_OPT_TYPE_BOOL
, {.i64
=0}, 0, 1, FLAGS
},
66 AVFILTER_DEFINE_CLASS(silencedetect
);
68 static void set_meta(AVFrame
*insamples
, int channel
, const char *key
, char *value
)
73 snprintf(key2
, sizeof(key2
), "lavfi.%s.%d", key
, channel
);
75 snprintf(key2
, sizeof(key2
), "lavfi.%s", key
);
76 av_dict_set(&insamples
->metadata
, key2
, value
, 0);
78 static av_always_inline
void update(SilenceDetectContext
*s
, AVFrame
*insamples
,
79 int is_silence
, int current_sample
, int64_t nb_samples_notify
,
82 int channel
= current_sample
% s
->independent_channels
;
84 if (s
->start
[channel
] == INT64_MIN
) {
85 s
->nb_null_samples
[channel
]++;
86 if (s
->nb_null_samples
[channel
] >= nb_samples_notify
) {
87 s
->start
[channel
] = insamples
->pts
+ av_rescale_q(current_sample
/ s
->channels
+ 1 - nb_samples_notify
* s
->independent_channels
/ s
->channels
,
88 (AVRational
){ 1, s
->last_sample_rate
}, time_base
);
89 set_meta(insamples
, s
->mono
? channel
+ 1 : 0, "silence_start",
90 av_ts2timestr(s
->start
[channel
], &time_base
));
92 av_log(s
, AV_LOG_INFO
, "channel: %d | ", channel
);
93 av_log(s
, AV_LOG_INFO
, "silence_start: %s\n",
94 av_ts2timestr(s
->start
[channel
], &time_base
));
98 if (s
->start
[channel
] > INT64_MIN
) {
99 int64_t end_pts
= insamples
? insamples
->pts
+ av_rescale_q(current_sample
/ s
->channels
,
100 (AVRational
){ 1, s
->last_sample_rate
}, time_base
)
102 int64_t duration_ts
= end_pts
- s
->start
[channel
];
104 set_meta(insamples
, s
->mono
? channel
+ 1 : 0, "silence_end",
105 av_ts2timestr(end_pts
, &time_base
));
106 set_meta(insamples
, s
->mono
? channel
+ 1 : 0, "silence_duration",
107 av_ts2timestr(duration_ts
, &time_base
));
110 av_log(s
, AV_LOG_INFO
, "channel: %d | ", channel
);
111 av_log(s
, AV_LOG_INFO
, "silence_end: %s | silence_duration: %s\n",
112 av_ts2timestr(end_pts
, &time_base
),
113 av_ts2timestr(duration_ts
, &time_base
));
115 s
->nb_null_samples
[channel
] = 0;
116 s
->start
[channel
] = INT64_MIN
;
120 #define SILENCE_DETECT(name, type) \
121 static void silencedetect_##name(SilenceDetectContext *s, AVFrame *insamples, \
122 int nb_samples, int64_t nb_samples_notify, \
123 AVRational time_base) \
125 const type *p = (const type *)insamples->data[0]; \
126 const type noise = s->noise; \
129 for (i = 0; i < nb_samples; i++, p++) \
130 update(s, insamples, *p < noise && *p > -noise, i, \
131 nb_samples_notify, time_base); \
134 #define SILENCE_DETECT_PLANAR(name, type) \
135 static void silencedetect_##name(SilenceDetectContext *s, AVFrame *insamples, \
136 int nb_samples, int64_t nb_samples_notify, \
137 AVRational time_base) \
139 const int channels = insamples->ch_layout.nb_channels; \
140 const type noise = s->noise; \
142 nb_samples /= channels; \
143 for (int i = 0; i < nb_samples; i++) { \
144 for (int ch = 0; ch < insamples->ch_layout.nb_channels; ch++) { \
145 const type *p = (const type *)insamples->extended_data[ch]; \
146 update(s, insamples, p[i] < noise && p[i] > -noise, \
148 nb_samples_notify, time_base); \
153 SILENCE_DETECT(dbl
, double)
154 SILENCE_DETECT(flt
, float)
155 SILENCE_DETECT(s32
, int32_t)
156 SILENCE_DETECT(s16
, int16_t)
158 SILENCE_DETECT_PLANAR(dblp
, double)
159 SILENCE_DETECT_PLANAR(fltp
, float)
160 SILENCE_DETECT_PLANAR(s32p
, int32_t)
161 SILENCE_DETECT_PLANAR(s16p
, int16_t)
163 static int config_input(AVFilterLink
*inlink
)
165 AVFilterContext
*ctx
= inlink
->dst
;
166 SilenceDetectContext
*s
= ctx
->priv
;
169 s
->channels
= inlink
->ch_layout
.nb_channels
;
170 s
->duration
= av_rescale(s
->duration
, inlink
->sample_rate
, AV_TIME_BASE
);
171 s
->independent_channels
= s
->mono
? s
->channels
: 1;
172 s
->nb_null_samples
= av_calloc(s
->independent_channels
,
173 sizeof(*s
->nb_null_samples
));
174 if (!s
->nb_null_samples
)
175 return AVERROR(ENOMEM
);
176 s
->start
= av_malloc_array(sizeof(*s
->start
), s
->independent_channels
);
178 return AVERROR(ENOMEM
);
179 for (c
= 0; c
< s
->independent_channels
; c
++)
180 s
->start
[c
] = INT64_MIN
;
182 switch (inlink
->format
) {
183 case AV_SAMPLE_FMT_DBL
: s
->silencedetect
= silencedetect_dbl
; break;
184 case AV_SAMPLE_FMT_FLT
: s
->silencedetect
= silencedetect_flt
; break;
185 case AV_SAMPLE_FMT_S32
:
186 s
->noise
*= INT32_MAX
;
187 s
->silencedetect
= silencedetect_s32
;
189 case AV_SAMPLE_FMT_S16
:
190 s
->noise
*= INT16_MAX
;
191 s
->silencedetect
= silencedetect_s16
;
193 case AV_SAMPLE_FMT_DBLP
: s
->silencedetect
= silencedetect_dblp
; break;
194 case AV_SAMPLE_FMT_FLTP
: s
->silencedetect
= silencedetect_fltp
; break;
195 case AV_SAMPLE_FMT_S32P
:
196 s
->noise
*= INT32_MAX
;
197 s
->silencedetect
= silencedetect_s32p
;
199 case AV_SAMPLE_FMT_S16P
:
200 s
->noise
*= INT16_MAX
;
201 s
->silencedetect
= silencedetect_s16p
;
210 static int filter_frame(AVFilterLink
*inlink
, AVFrame
*insamples
)
212 SilenceDetectContext
*s
= inlink
->dst
->priv
;
213 const int nb_channels
= inlink
->ch_layout
.nb_channels
;
214 const int srate
= inlink
->sample_rate
;
215 const int nb_samples
= insamples
->nb_samples
* nb_channels
;
216 const int64_t nb_samples_notify
= s
->duration
* (s
->mono
? 1 : nb_channels
);
219 // scale number of null samples to the new sample rate
220 if (s
->last_sample_rate
&& s
->last_sample_rate
!= srate
)
221 for (c
= 0; c
< s
->independent_channels
; c
++) {
222 s
->nb_null_samples
[c
] = srate
* s
->nb_null_samples
[c
] / s
->last_sample_rate
;
224 s
->last_sample_rate
= srate
;
225 s
->time_base
= inlink
->time_base
;
226 s
->frame_end
= insamples
->pts
+ av_rescale_q(insamples
->nb_samples
,
227 (AVRational
){ 1, s
->last_sample_rate
}, inlink
->time_base
);
229 s
->silencedetect(s
, insamples
, nb_samples
, nb_samples_notify
,
232 return ff_filter_frame(inlink
->dst
->outputs
[0], insamples
);
235 static av_cold
void uninit(AVFilterContext
*ctx
)
237 SilenceDetectContext
*s
= ctx
->priv
;
240 for (c
= 0; c
< s
->independent_channels
; c
++)
241 if (s
->start
[c
] > INT64_MIN
)
242 update(s
, NULL
, 0, c
, 0, s
->time_base
);
243 av_freep(&s
->nb_null_samples
);
247 static const AVFilterPad silencedetect_inputs
[] = {
250 .type
= AVMEDIA_TYPE_AUDIO
,
251 .config_props
= config_input
,
252 .filter_frame
= filter_frame
,
256 const FFFilter ff_af_silencedetect
= {
257 .p
.name
= "silencedetect",
258 .p
.description
= NULL_IF_CONFIG_SMALL("Detect silence."),
259 .p
.priv_class
= &silencedetect_class
,
260 .p
.flags
= AVFILTER_FLAG_METADATA_ONLY
,
261 .priv_size
= sizeof(SilenceDetectContext
),
263 FILTER_INPUTS(silencedetect_inputs
),
264 FILTER_OUTPUTS(ff_audio_default_filterpad
),
265 FILTER_SAMPLEFMTS(AV_SAMPLE_FMT_DBL
, AV_SAMPLE_FMT_DBLP
,
266 AV_SAMPLE_FMT_FLT
, AV_SAMPLE_FMT_FLTP
,
267 AV_SAMPLE_FMT_S32
, AV_SAMPLE_FMT_S32P
,
268 AV_SAMPLE_FMT_S16
, AV_SAMPLE_FMT_S16P
),