1 /* Silence effect for SoX
2 * by Heikki Leinonen (heilei@iki.fi) 25.03.2001
3 * Major Modifications by Chris Bagwell 06.08.2001
4 * Minor addition by Donnie Smith 13.08.2003
6 * This effect can delete samples from the start of a sound file
7 * until it sees a specified count of samples exceed a given threshold
8 * (any of the channels).
9 * This effect can also delete samples from the end of a sound file
10 * when it sees a specified count of samples below a given threshold
12 * It may also be used to delete samples anywhere in a sound file.
13 * Thesholds can be given as either a percentage or in decibels.
20 /* Private data for silence effect. */
22 #define SILENCE_TRIM 0
23 #define SILENCE_TRIM_FLUSH 1
24 #define SILENCE_COPY 2
25 #define SILENCE_COPY_FLUSH 3
26 #define SILENCE_STOP 4
31 char *start_duration_str
;
32 size_t start_duration
;
33 double start_threshold
;
34 char start_unit
; /* "d" for decibels or "%" for percent. */
37 sox_sample_t
*start_holdoff
;
38 size_t start_holdoff_offset
;
39 size_t start_holdoff_end
;
40 int start_found_periods
;
44 char *stop_duration_str
;
46 double stop_threshold
;
49 sox_sample_t
*stop_holdoff
;
50 size_t stop_holdoff_offset
;
51 size_t stop_holdoff_end
;
52 int stop_found_periods
;
55 double *window_current
;
66 static void clear_rms(sox_effect_t
* effp
)
69 priv_t
* silence
= (priv_t
*) effp
->priv
;
71 memset(silence
->window
, 0,
72 silence
->window_size
* sizeof(double));
74 silence
->window_current
= silence
->window
;
75 silence
->window_end
= silence
->window
+ silence
->window_size
;
79 static int sox_silence_getopts(sox_effect_t
* effp
, int argc
, char **argv
)
81 priv_t
* silence
= (priv_t
*) effp
->priv
;
87 /* check for option switches */
88 silence
->leave_silence
= sox_false
;
91 if (!strcmp("-l", *argv
)) {
93 silence
->leave_silence
= sox_true
;
98 return lsx_usage(effp
);
100 /* Parse data related to trimming front side */
101 silence
->start
= sox_false
;
102 if (sscanf(argv
[0], "%d", &silence
->start_periods
) != 1)
103 return lsx_usage(effp
);
104 if (silence
->start_periods
< 0)
106 lsx_fail("Periods must not be negative");
112 if (silence
->start_periods
> 0)
114 silence
->start
= sox_true
;
116 return lsx_usage(effp
);
118 /* We do not know the sample rate so we can not fully
119 * parse the duration info yet. So save argument off
120 * for future processing.
122 silence
->start_duration_str
= lsx_strdup(argv
[0]);
123 /* Perform a fake parse to do error checking */
124 n
= lsx_parsesamples(0.,silence
->start_duration_str
,&temp
,'s');
126 return lsx_usage(effp
);
127 silence
->start_duration
= temp
;
129 parse_count
= sscanf(argv
[1], "%lf%c", &silence
->start_threshold
,
130 &silence
->start_unit
);
132 return lsx_usage(effp
);
133 else if (parse_count
< 2)
134 silence
->start_unit
= '%';
140 silence
->stop
= sox_false
;
141 /* Parse data needed for trimming of backside */
145 return lsx_usage(effp
);
146 if (sscanf(argv
[0], "%d", &silence
->stop_periods
) != 1)
147 return lsx_usage(effp
);
148 if (silence
->stop_periods
< 0)
150 silence
->stop_periods
= -silence
->stop_periods
;
151 silence
->restart
= 1;
154 silence
->restart
= 0;
155 silence
->stop
= sox_true
;
159 /* We do not know the sample rate so we can not fully
160 * parse the duration info yet. So save argument off
161 * for future processing.
163 silence
->stop_duration_str
= lsx_strdup(argv
[0]);
164 /* Perform a fake parse to do error checking */
165 n
= lsx_parsesamples(0.,silence
->stop_duration_str
,&temp
,'s');
167 return lsx_usage(effp
);
168 silence
->stop_duration
= temp
;
170 parse_count
= sscanf(argv
[1], "%lf%c", &silence
->stop_threshold
,
171 &silence
->stop_unit
);
173 return lsx_usage(effp
);
174 else if (parse_count
< 2)
175 silence
->stop_unit
= '%';
184 if ((silence
->start_unit
!= '%') && (silence
->start_unit
!= 'd'))
186 lsx_fail("Invalid unit specified");
187 return lsx_usage(effp
);
189 if ((silence
->start_unit
== '%') && ((silence
->start_threshold
< 0.0)
190 || (silence
->start_threshold
> 100.0)))
192 lsx_fail("silence threshold should be between 0.0 and 100.0 %%");
195 if ((silence
->start_unit
== 'd') && (silence
->start_threshold
>= 0.0))
197 lsx_fail("silence threshold should be less than 0.0 dB");
204 if ((silence
->stop_unit
!= '%') && (silence
->stop_unit
!= 'd'))
206 lsx_fail("Invalid unit specified");
209 if ((silence
->stop_unit
== '%') && ((silence
->stop_threshold
< 0.0) ||
210 (silence
->stop_threshold
> 100.0)))
212 lsx_fail("silence threshold should be between 0.0 and 100.0 %%");
215 if ((silence
->stop_unit
== 'd') && (silence
->stop_threshold
>= 0.0))
217 lsx_fail("silence threshold should be less than 0.0 dB");
224 static int sox_silence_start(sox_effect_t
* effp
)
226 priv_t
*silence
= (priv_t
*)effp
->priv
;
229 /* When you want to remove silence, small window sizes are
230 * better or else RMS will look like non-silence at
231 * aburpt changes from load to silence.
233 silence
->window_size
= (effp
->in_signal
.rate
/ 50) *
234 effp
->in_signal
.channels
;
235 silence
->window
= lsx_malloc(silence
->window_size
* sizeof(double));
239 /* Now that we know sample rate, reparse duration. */
242 if (lsx_parsesamples(effp
->in_signal
.rate
, silence
->start_duration_str
,
244 return lsx_usage(effp
);
245 silence
->start_duration
= temp
* effp
->in_signal
.channels
;
249 if (lsx_parsesamples(effp
->in_signal
.rate
,silence
->stop_duration_str
,
251 return lsx_usage(effp
);
252 silence
->stop_duration
= temp
* effp
->in_signal
.channels
;
256 silence
->mode
= SILENCE_TRIM
;
258 silence
->mode
= SILENCE_COPY
;
260 silence
->start_holdoff
= lsx_malloc(sizeof(sox_sample_t
)*silence
->start_duration
);
261 silence
->start_holdoff_offset
= 0;
262 silence
->start_holdoff_end
= 0;
263 silence
->start_found_periods
= 0;
265 silence
->stop_holdoff
= lsx_malloc(sizeof(sox_sample_t
)*silence
->stop_duration
);
266 silence
->stop_holdoff_offset
= 0;
267 silence
->stop_holdoff_end
= 0;
268 silence
->stop_found_periods
= 0;
270 effp
->out_signal
.length
= SOX_UNKNOWN_LEN
; /* depends on input data */
275 static sox_bool
aboveThreshold(sox_effect_t
const * effp
,
276 sox_sample_t value
/* >= 0 */, double threshold
, int unit
)
278 /* When scaling low bit data, noise values got scaled way up */
279 /* Only consider the original bits when looking for silence */
280 sox_sample_t masked_value
= value
& (-1 << (32 - effp
->in_signal
.precision
));
282 double scaled_value
= (double)masked_value
/ SOX_SAMPLE_MAX
;
286 else if (unit
== 'd')
287 scaled_value
= linear_to_dB(scaled_value
);
289 return scaled_value
> threshold
;
292 static sox_sample_t
compute_rms(sox_effect_t
* effp
, sox_sample_t sample
)
294 priv_t
* silence
= (priv_t
*) effp
->priv
;
298 new_sum
= silence
->rms_sum
;
299 new_sum
-= *silence
->window_current
;
300 new_sum
+= ((double)sample
* (double)sample
);
302 rms
= sqrt(new_sum
/ silence
->window_size
);
307 static void update_rms(sox_effect_t
* effp
, sox_sample_t sample
)
309 priv_t
* silence
= (priv_t
*) effp
->priv
;
311 silence
->rms_sum
-= *silence
->window_current
;
312 *silence
->window_current
= ((double)sample
* (double)sample
);
313 silence
->rms_sum
+= *silence
->window_current
;
315 silence
->window_current
++;
316 if (silence
->window_current
>= silence
->window_end
)
317 silence
->window_current
= silence
->window
;
320 /* Process signed long samples from ibuf to obuf. */
321 /* Return number of samples processed in isamp and osamp. */
322 static int sox_silence_flow(sox_effect_t
* effp
, const sox_sample_t
*ibuf
, sox_sample_t
*obuf
,
323 size_t *isamp
, size_t *osamp
)
325 priv_t
* silence
= (priv_t
*) effp
->priv
;
328 size_t nrOfTicks
, /* sometimes wide, sometimes non-wide samples */
329 nrOfInSamplesRead
, nrOfOutSamplesWritten
; /* non-wide samples */
331 nrOfInSamplesRead
= 0;
332 nrOfOutSamplesWritten
= 0;
334 switch (silence
->mode
)
337 /* Reads and discards all input data until it detects a
338 * sample that is above the specified threshold. Turns on
339 * copy mode when detected.
340 * Need to make sure and copy input in groups of "channels" to
341 * prevent getting buffers out of sync.
342 * nrOfTicks counts wide samples here.
345 nrOfTicks
= min((*isamp
-nrOfInSamplesRead
),
346 (*osamp
-nrOfOutSamplesWritten
)) /
347 effp
->in_signal
.channels
;
348 for(i
= 0; i
< nrOfTicks
; i
++)
351 for (j
= 0; j
< effp
->in_signal
.channels
; j
++)
353 threshold
|= aboveThreshold(effp
,
354 compute_rms(effp
, ibuf
[j
]),
355 silence
->start_threshold
,
356 silence
->start_unit
);
361 /* Add to holdoff buffer */
362 for (j
= 0; j
< effp
->in_signal
.channels
; j
++)
364 update_rms(effp
, *ibuf
);
365 silence
->start_holdoff
[
366 silence
->start_holdoff_end
++] = *ibuf
++;
370 if (silence
->start_holdoff_end
>=
371 silence
->start_duration
)
373 if (++silence
->start_found_periods
>=
374 silence
->start_periods
)
376 silence
->mode
= SILENCE_TRIM_FLUSH
;
377 goto silence_trim_flush
;
379 /* Trash holdoff buffer since its not
380 * needed. Start looking again.
382 silence
->start_holdoff_offset
= 0;
383 silence
->start_holdoff_end
= 0;
386 else /* !above Threshold */
388 silence
->start_holdoff_end
= 0;
389 for (j
= 0; j
< effp
->in_signal
.channels
; j
++)
391 update_rms(effp
, ibuf
[j
]);
393 ibuf
+= effp
->in_signal
.channels
;
394 nrOfInSamplesRead
+= effp
->in_signal
.channels
;
396 } /* for nrOfTicks */
399 case SILENCE_TRIM_FLUSH
:
400 /* nrOfTicks counts non-wide samples here. */
402 nrOfTicks
= min((silence
->start_holdoff_end
-
403 silence
->start_holdoff_offset
),
404 (*osamp
-nrOfOutSamplesWritten
));
405 nrOfTicks
-= nrOfTicks
% effp
->in_signal
.channels
;
406 for(i
= 0; i
< nrOfTicks
; i
++)
408 *obuf
++ = silence
->start_holdoff
[silence
->start_holdoff_offset
++];
409 nrOfOutSamplesWritten
++;
412 /* If fully drained holdoff then switch to copy mode */
413 if (silence
->start_holdoff_offset
== silence
->start_holdoff_end
)
415 silence
->start_holdoff_offset
= 0;
416 silence
->start_holdoff_end
= 0;
417 silence
->mode
= SILENCE_COPY
;
423 /* Attempts to copy samples into output buffer.
426 * If not looking for silence to terminate copy then
427 * blindly copy data into output buffer.
432 * If previous silence was detect then see if input sample is
433 * above threshold. If found then flush out hold off buffer
434 * and copy over to output buffer.
437 * If no previous silence detect then see if input sample
438 * is above threshold. If found then copy directly
442 * If not above threshold then silence is detect so
443 * store in hold off buffer and do not write to output
444 * buffer. Even though it wasn't put in output
445 * buffer, inform user that input was consumed.
447 * If hold off buffer is full after this then stop
448 * copying data and discard data in hold off buffer.
450 * Special leave_silence logic:
452 * During this mode, go ahead and copy input
453 * samples to output buffer instead of holdoff buffer
454 * Then also short ciruit any flushes that would occur
455 * when non-silence is detect since samples were already
456 * copied. This has the effect of always leaving
457 * holdoff[] amount of silence but deleting any
458 * beyond that amount.
460 * nrOfTicks counts wide samples here.
463 nrOfTicks
= min((*isamp
-nrOfInSamplesRead
),
464 (*osamp
-nrOfOutSamplesWritten
)) /
465 effp
->in_signal
.channels
;
469 for(i
= 0; i
< nrOfTicks
; i
++)
472 for (j
= 0; j
< effp
->in_signal
.channels
; j
++)
474 threshold
&= aboveThreshold(effp
,
475 compute_rms(effp
, ibuf
[j
]),
476 silence
->stop_threshold
,
481 * If above threshold, check to see if we where holding
482 * off previously. If so then flush this buffer.
483 * We haven't incremented any pointers yet so nothing
486 * If user wants to leave_silence, then we
487 * were already copying the data and so no
488 * need to flush the old data. Just resume
489 * copying as if we were not holding off.
491 if (threshold
&& silence
->stop_holdoff_end
492 && !silence
->leave_silence
)
494 silence
->mode
= SILENCE_COPY_FLUSH
;
495 goto silence_copy_flush
;
500 /* Not holding off so copy into output buffer */
501 for (j
= 0; j
< effp
->in_signal
.channels
; j
++)
503 update_rms(effp
, *ibuf
);
506 nrOfOutSamplesWritten
++;
512 /* Add to holdoff buffer */
513 for (j
= 0; j
< effp
->in_signal
.channels
; j
++)
515 update_rms(effp
, *ibuf
);
516 if (silence
->leave_silence
) {
518 nrOfOutSamplesWritten
++;
520 silence
->stop_holdoff
[
521 silence
->stop_holdoff_end
++] = *ibuf
++;
525 /* Check if holdoff buffer is greater than duration
527 if (silence
->stop_holdoff_end
>=
528 silence
->stop_duration
)
530 /* Increment found counter and see if this
531 * is the last period. If so then exit.
533 if (++silence
->stop_found_periods
>=
534 silence
->stop_periods
)
536 silence
->stop_holdoff_offset
= 0;
537 silence
->stop_holdoff_end
= 0;
538 if (!silence
->restart
)
540 *isamp
= nrOfInSamplesRead
;
541 *osamp
= nrOfOutSamplesWritten
;
542 silence
->mode
= SILENCE_STOP
;
543 /* Return SOX_EOF since no more processing */
548 silence
->stop_found_periods
= 0;
549 silence
->start_found_periods
= 0;
550 silence
->start_holdoff_offset
= 0;
551 silence
->start_holdoff_end
= 0;
553 silence
->mode
= SILENCE_TRIM
;
560 /* Flush this buffer and start
563 silence
->mode
= SILENCE_COPY_FLUSH
;
564 goto silence_copy_flush
;
567 } /* Filled holdoff buffer */
568 } /* Detected silence */
569 } /* For # of samples */
570 } /* Trimming off backend */
571 else /* !(silence->stop) */
574 memcpy(obuf
, ibuf
, sizeof(sox_sample_t
)*nrOfTicks
*
575 effp
->in_signal
.channels
);
576 nrOfInSamplesRead
+= (nrOfTicks
*effp
->in_signal
.channels
);
577 nrOfOutSamplesWritten
+= (nrOfTicks
*effp
->in_signal
.channels
);
581 case SILENCE_COPY_FLUSH
:
582 /* nrOfTicks counts non-wide samples here. */
584 nrOfTicks
= min((silence
->stop_holdoff_end
-
585 silence
->stop_holdoff_offset
),
586 (*osamp
-nrOfOutSamplesWritten
));
587 nrOfTicks
-= nrOfTicks
% effp
->in_signal
.channels
;
589 for(i
= 0; i
< nrOfTicks
; i
++)
591 *obuf
++ = silence
->stop_holdoff
[silence
->stop_holdoff_offset
++];
592 nrOfOutSamplesWritten
++;
595 /* If fully drained holdoff then return to copy mode */
596 if (silence
->stop_holdoff_offset
== silence
->stop_holdoff_end
)
598 silence
->stop_holdoff_offset
= 0;
599 silence
->stop_holdoff_end
= 0;
600 silence
->mode
= SILENCE_COPY
;
606 /* This code can't be reached. */
607 nrOfInSamplesRead
= *isamp
;
611 *isamp
= nrOfInSamplesRead
;
612 *osamp
= nrOfOutSamplesWritten
;
614 return (SOX_SUCCESS
);
617 static int sox_silence_drain(sox_effect_t
* effp
, sox_sample_t
*obuf
, size_t *osamp
)
619 priv_t
* silence
= (priv_t
*) effp
->priv
;
621 size_t nrOfTicks
, nrOfOutSamplesWritten
= 0; /* non-wide samples */
623 /* Only if in flush mode will there be possible samples to write
624 * out during drain() call.
626 if (silence
->mode
== SILENCE_COPY_FLUSH
||
627 silence
->mode
== SILENCE_COPY
)
629 nrOfTicks
= min((silence
->stop_holdoff_end
-
630 silence
->stop_holdoff_offset
), *osamp
);
631 nrOfTicks
-= nrOfTicks
% effp
->in_signal
.channels
;
632 for(i
= 0; i
< nrOfTicks
; i
++)
634 *obuf
++ = silence
->stop_holdoff
[silence
->stop_holdoff_offset
++];
635 nrOfOutSamplesWritten
++;
638 /* If fully drained holdoff then stop */
639 if (silence
->stop_holdoff_offset
== silence
->stop_holdoff_end
)
641 silence
->stop_holdoff_offset
= 0;
642 silence
->stop_holdoff_end
= 0;
643 silence
->mode
= SILENCE_STOP
;
647 *osamp
= nrOfOutSamplesWritten
;
648 if (silence
->mode
== SILENCE_STOP
|| *osamp
== 0)
654 static int sox_silence_stop(sox_effect_t
* effp
)
656 priv_t
* silence
= (priv_t
*) effp
->priv
;
658 free(silence
->window
);
659 free(silence
->start_holdoff
);
660 free(silence
->stop_holdoff
);
665 static int lsx_kill(sox_effect_t
* effp
)
667 priv_t
* silence
= (priv_t
*) effp
->priv
;
669 free(silence
->start_duration_str
);
670 free(silence
->stop_duration_str
);
675 static sox_effect_handler_t sox_silence_effect
= {
677 "[ -l ] above_periods [ duration threshold[d|%] ] [ below_periods duration threshold[d|%] ]",
678 SOX_EFF_MCHAN
| SOX_EFF_MODIFY
| SOX_EFF_LENGTH
,
684 lsx_kill
, sizeof(priv_t
)
687 const sox_effect_handler_t
*lsx_silence_effect_fn(void)
689 return &sox_silence_effect
;