id3: fix COMM frame handling
[sox.git] / src / silence.c
blob1db3aec1cb1a36f9cb55f779c4dd01889693ee1b
1 /* Silence effect for SoX
2 * by Heikki Leinonen (heilei@iki.fi) 25.03.2001
3 * Major Modifications by Chris Bagwell 06.08.2001
4 * Minor addition by Donnie Smith 13.08.2003
6 * This effect can delete samples from the start of a sound file
7 * until it sees a specified count of samples exceed a given threshold
8 * (any of the channels).
9 * This effect can also delete samples from the end of a sound file
10 * when it sees a specified count of samples below a given threshold
11 * (all channels).
12 * It may also be used to delete samples anywhere in a sound file.
13 * Thesholds can be given as either a percentage or in decibels.
16 #include "sox_i.h"
18 #include <string.h>
20 /* Private data for silence effect. */
22 #define SILENCE_TRIM 0
23 #define SILENCE_TRIM_FLUSH 1
24 #define SILENCE_COPY 2
25 #define SILENCE_COPY_FLUSH 3
26 #define SILENCE_STOP 4
28 typedef struct {
29 char start;
30 int start_periods;
31 char *start_duration_str;
32 size_t start_duration;
33 double start_threshold;
34 char start_unit; /* "d" for decibels or "%" for percent. */
35 int restart;
37 sox_sample_t *start_holdoff;
38 size_t start_holdoff_offset;
39 size_t start_holdoff_end;
40 int start_found_periods;
42 char stop;
43 int stop_periods;
44 char *stop_duration_str;
45 size_t stop_duration;
46 double stop_threshold;
47 char stop_unit;
49 sox_sample_t *stop_holdoff;
50 size_t stop_holdoff_offset;
51 size_t stop_holdoff_end;
52 int stop_found_periods;
54 double *window;
55 double *window_current;
56 double *window_end;
57 size_t window_size;
58 double rms_sum;
60 char leave_silence;
62 /* State Machine */
63 char mode;
64 } priv_t;
66 static void clear_rms(sox_effect_t * effp)
69 priv_t * silence = (priv_t *) effp->priv;
71 memset(silence->window, 0,
72 silence->window_size * sizeof(double));
74 silence->window_current = silence->window;
75 silence->window_end = silence->window + silence->window_size;
76 silence->rms_sum = 0;
79 static int sox_silence_getopts(sox_effect_t * effp, int argc, char **argv)
81 priv_t * silence = (priv_t *) effp->priv;
82 int parse_count;
83 uint64_t temp;
84 const char *n;
85 --argc, ++argv;
87 /* check for option switches */
88 silence->leave_silence = sox_false;
89 if (argc > 0)
91 if (!strcmp("-l", *argv)) {
92 argc--; argv++;
93 silence->leave_silence = sox_true;
97 if (argc < 1)
98 return lsx_usage(effp);
100 /* Parse data related to trimming front side */
101 silence->start = sox_false;
102 if (sscanf(argv[0], "%d", &silence->start_periods) != 1)
103 return lsx_usage(effp);
104 if (silence->start_periods < 0)
106 lsx_fail("Periods must not be negative");
107 return(SOX_EOF);
109 argv++;
110 argc--;
112 if (silence->start_periods > 0)
114 silence->start = sox_true;
115 if (argc < 2)
116 return lsx_usage(effp);
118 /* We do not know the sample rate so we can not fully
119 * parse the duration info yet. So save argument off
120 * for future processing.
122 silence->start_duration_str = lsx_strdup(argv[0]);
123 /* Perform a fake parse to do error checking */
124 n = lsx_parsesamples(0.,silence->start_duration_str,&temp,'s');
125 if (!n || *n)
126 return lsx_usage(effp);
127 silence->start_duration = temp;
129 parse_count = sscanf(argv[1], "%lf%c", &silence->start_threshold,
130 &silence->start_unit);
131 if (parse_count < 1)
132 return lsx_usage(effp);
133 else if (parse_count < 2)
134 silence->start_unit = '%';
136 argv++; argv++;
137 argc--; argc--;
140 silence->stop = sox_false;
141 /* Parse data needed for trimming of backside */
142 if (argc > 0)
144 if (argc < 3)
145 return lsx_usage(effp);
146 if (sscanf(argv[0], "%d", &silence->stop_periods) != 1)
147 return lsx_usage(effp);
148 if (silence->stop_periods < 0)
150 silence->stop_periods = -silence->stop_periods;
151 silence->restart = 1;
153 else
154 silence->restart = 0;
155 silence->stop = sox_true;
156 argv++;
157 argc--;
159 /* We do not know the sample rate so we can not fully
160 * parse the duration info yet. So save argument off
161 * for future processing.
163 silence->stop_duration_str = lsx_strdup(argv[0]);
164 /* Perform a fake parse to do error checking */
165 n = lsx_parsesamples(0.,silence->stop_duration_str,&temp,'s');
166 if (!n || *n)
167 return lsx_usage(effp);
168 silence->stop_duration = temp;
170 parse_count = sscanf(argv[1], "%lf%c", &silence->stop_threshold,
171 &silence->stop_unit);
172 if (parse_count < 1)
173 return lsx_usage(effp);
174 else if (parse_count < 2)
175 silence->stop_unit = '%';
177 argv++; argv++;
178 argc--; argc--;
181 /* Error checking */
182 if (silence->start)
184 if ((silence->start_unit != '%') && (silence->start_unit != 'd'))
186 lsx_fail("Invalid unit specified");
187 return lsx_usage(effp);
189 if ((silence->start_unit == '%') && ((silence->start_threshold < 0.0)
190 || (silence->start_threshold > 100.0)))
192 lsx_fail("silence threshold should be between 0.0 and 100.0 %%");
193 return (SOX_EOF);
195 if ((silence->start_unit == 'd') && (silence->start_threshold >= 0.0))
197 lsx_fail("silence threshold should be less than 0.0 dB");
198 return(SOX_EOF);
202 if (silence->stop)
204 if ((silence->stop_unit != '%') && (silence->stop_unit != 'd'))
206 lsx_fail("Invalid unit specified");
207 return(SOX_EOF);
209 if ((silence->stop_unit == '%') && ((silence->stop_threshold < 0.0) ||
210 (silence->stop_threshold > 100.0)))
212 lsx_fail("silence threshold should be between 0.0 and 100.0 %%");
213 return (SOX_EOF);
215 if ((silence->stop_unit == 'd') && (silence->stop_threshold >= 0.0))
217 lsx_fail("silence threshold should be less than 0.0 dB");
218 return(SOX_EOF);
221 return(SOX_SUCCESS);
224 static int sox_silence_start(sox_effect_t * effp)
226 priv_t *silence = (priv_t *)effp->priv;
227 uint64_t temp;
229 /* When you want to remove silence, small window sizes are
230 * better or else RMS will look like non-silence at
231 * aburpt changes from load to silence.
233 silence->window_size = (effp->in_signal.rate / 50) *
234 effp->in_signal.channels;
235 silence->window = lsx_malloc(silence->window_size * sizeof(double));
237 clear_rms(effp);
239 /* Now that we know sample rate, reparse duration. */
240 if (silence->start)
242 if (lsx_parsesamples(effp->in_signal.rate, silence->start_duration_str,
243 &temp, 's') == NULL)
244 return lsx_usage(effp);
245 silence->start_duration = temp * effp->in_signal.channels;
247 if (silence->stop)
249 if (lsx_parsesamples(effp->in_signal.rate,silence->stop_duration_str,
250 &temp,'s') == NULL)
251 return lsx_usage(effp);
252 silence->stop_duration = temp * effp->in_signal.channels;
255 if (silence->start)
256 silence->mode = SILENCE_TRIM;
257 else
258 silence->mode = SILENCE_COPY;
260 silence->start_holdoff = lsx_malloc(sizeof(sox_sample_t)*silence->start_duration);
261 silence->start_holdoff_offset = 0;
262 silence->start_holdoff_end = 0;
263 silence->start_found_periods = 0;
265 silence->stop_holdoff = lsx_malloc(sizeof(sox_sample_t)*silence->stop_duration);
266 silence->stop_holdoff_offset = 0;
267 silence->stop_holdoff_end = 0;
268 silence->stop_found_periods = 0;
270 effp->out_signal.length = SOX_UNKNOWN_LEN; /* depends on input data */
272 return(SOX_SUCCESS);
275 static sox_bool aboveThreshold(sox_effect_t const * effp,
276 sox_sample_t value /* >= 0 */, double threshold, int unit)
278 /* When scaling low bit data, noise values got scaled way up */
279 /* Only consider the original bits when looking for silence */
280 sox_sample_t masked_value = value & (-1 << (32 - effp->in_signal.precision));
282 double scaled_value = (double)masked_value / SOX_SAMPLE_MAX;
284 if (unit == '%')
285 scaled_value *= 100;
286 else if (unit == 'd')
287 scaled_value = linear_to_dB(scaled_value);
289 return scaled_value > threshold;
292 static sox_sample_t compute_rms(sox_effect_t * effp, sox_sample_t sample)
294 priv_t * silence = (priv_t *) effp->priv;
295 double new_sum;
296 sox_sample_t rms;
298 new_sum = silence->rms_sum;
299 new_sum -= *silence->window_current;
300 new_sum += ((double)sample * (double)sample);
302 rms = sqrt(new_sum / silence->window_size);
304 return (rms);
307 static void update_rms(sox_effect_t * effp, sox_sample_t sample)
309 priv_t * silence = (priv_t *) effp->priv;
311 silence->rms_sum -= *silence->window_current;
312 *silence->window_current = ((double)sample * (double)sample);
313 silence->rms_sum += *silence->window_current;
315 silence->window_current++;
316 if (silence->window_current >= silence->window_end)
317 silence->window_current = silence->window;
320 /* Process signed long samples from ibuf to obuf. */
321 /* Return number of samples processed in isamp and osamp. */
322 static int sox_silence_flow(sox_effect_t * effp, const sox_sample_t *ibuf, sox_sample_t *obuf,
323 size_t *isamp, size_t *osamp)
325 priv_t * silence = (priv_t *) effp->priv;
326 int threshold;
327 size_t i, j;
328 size_t nrOfTicks, /* sometimes wide, sometimes non-wide samples */
329 nrOfInSamplesRead, nrOfOutSamplesWritten; /* non-wide samples */
331 nrOfInSamplesRead = 0;
332 nrOfOutSamplesWritten = 0;
334 switch (silence->mode)
336 case SILENCE_TRIM:
337 /* Reads and discards all input data until it detects a
338 * sample that is above the specified threshold. Turns on
339 * copy mode when detected.
340 * Need to make sure and copy input in groups of "channels" to
341 * prevent getting buffers out of sync.
342 * nrOfTicks counts wide samples here.
344 silence_trim:
345 nrOfTicks = min((*isamp-nrOfInSamplesRead),
346 (*osamp-nrOfOutSamplesWritten)) /
347 effp->in_signal.channels;
348 for(i = 0; i < nrOfTicks; i++)
350 threshold = 0;
351 for (j = 0; j < effp->in_signal.channels; j++)
353 threshold |= aboveThreshold(effp,
354 compute_rms(effp, ibuf[j]),
355 silence->start_threshold,
356 silence->start_unit);
359 if (threshold)
361 /* Add to holdoff buffer */
362 for (j = 0; j < effp->in_signal.channels; j++)
364 update_rms(effp, *ibuf);
365 silence->start_holdoff[
366 silence->start_holdoff_end++] = *ibuf++;
367 nrOfInSamplesRead++;
370 if (silence->start_holdoff_end >=
371 silence->start_duration)
373 if (++silence->start_found_periods >=
374 silence->start_periods)
376 silence->mode = SILENCE_TRIM_FLUSH;
377 goto silence_trim_flush;
379 /* Trash holdoff buffer since its not
380 * needed. Start looking again.
382 silence->start_holdoff_offset = 0;
383 silence->start_holdoff_end = 0;
386 else /* !above Threshold */
388 silence->start_holdoff_end = 0;
389 for (j = 0; j < effp->in_signal.channels; j++)
391 update_rms(effp, ibuf[j]);
393 ibuf += effp->in_signal.channels;
394 nrOfInSamplesRead += effp->in_signal.channels;
396 } /* for nrOfTicks */
397 break;
399 case SILENCE_TRIM_FLUSH:
400 /* nrOfTicks counts non-wide samples here. */
401 silence_trim_flush:
402 nrOfTicks = min((silence->start_holdoff_end -
403 silence->start_holdoff_offset),
404 (*osamp-nrOfOutSamplesWritten));
405 nrOfTicks -= nrOfTicks % effp->in_signal.channels;
406 for(i = 0; i < nrOfTicks; i++)
408 *obuf++ = silence->start_holdoff[silence->start_holdoff_offset++];
409 nrOfOutSamplesWritten++;
412 /* If fully drained holdoff then switch to copy mode */
413 if (silence->start_holdoff_offset == silence->start_holdoff_end)
415 silence->start_holdoff_offset = 0;
416 silence->start_holdoff_end = 0;
417 silence->mode = SILENCE_COPY;
418 goto silence_copy;
420 break;
422 case SILENCE_COPY:
423 /* Attempts to copy samples into output buffer.
425 * Case B:
426 * If not looking for silence to terminate copy then
427 * blindly copy data into output buffer.
429 * Case A:
431 * Case 1a:
432 * If previous silence was detect then see if input sample is
433 * above threshold. If found then flush out hold off buffer
434 * and copy over to output buffer.
436 * Case 1b:
437 * If no previous silence detect then see if input sample
438 * is above threshold. If found then copy directly
439 * to output buffer.
441 * Case 2:
442 * If not above threshold then silence is detect so
443 * store in hold off buffer and do not write to output
444 * buffer. Even though it wasn't put in output
445 * buffer, inform user that input was consumed.
447 * If hold off buffer is full after this then stop
448 * copying data and discard data in hold off buffer.
450 * Special leave_silence logic:
452 * During this mode, go ahead and copy input
453 * samples to output buffer instead of holdoff buffer
454 * Then also short ciruit any flushes that would occur
455 * when non-silence is detect since samples were already
456 * copied. This has the effect of always leaving
457 * holdoff[] amount of silence but deleting any
458 * beyond that amount.
460 * nrOfTicks counts wide samples here.
462 silence_copy:
463 nrOfTicks = min((*isamp-nrOfInSamplesRead),
464 (*osamp-nrOfOutSamplesWritten)) /
465 effp->in_signal.channels;
466 if (silence->stop)
468 /* Case A */
469 for(i = 0; i < nrOfTicks; i++)
471 threshold = 1;
472 for (j = 0; j < effp->in_signal.channels; j++)
474 threshold &= aboveThreshold(effp,
475 compute_rms(effp, ibuf[j]),
476 silence->stop_threshold,
477 silence->stop_unit);
480 /* Case 1a
481 * If above threshold, check to see if we where holding
482 * off previously. If so then flush this buffer.
483 * We haven't incremented any pointers yet so nothing
484 * is lost.
486 * If user wants to leave_silence, then we
487 * were already copying the data and so no
488 * need to flush the old data. Just resume
489 * copying as if we were not holding off.
491 if (threshold && silence->stop_holdoff_end
492 && !silence->leave_silence)
494 silence->mode = SILENCE_COPY_FLUSH;
495 goto silence_copy_flush;
497 /* Case 1b */
498 else if (threshold)
500 /* Not holding off so copy into output buffer */
501 for (j = 0; j < effp->in_signal.channels; j++)
503 update_rms(effp, *ibuf);
504 *obuf++ = *ibuf++;
505 nrOfInSamplesRead++;
506 nrOfOutSamplesWritten++;
509 /* Case 2 */
510 else if (!threshold)
512 /* Add to holdoff buffer */
513 for (j = 0; j < effp->in_signal.channels; j++)
515 update_rms(effp, *ibuf);
516 if (silence->leave_silence) {
517 *obuf++ = *ibuf;
518 nrOfOutSamplesWritten++;
520 silence->stop_holdoff[
521 silence->stop_holdoff_end++] = *ibuf++;
522 nrOfInSamplesRead++;
525 /* Check if holdoff buffer is greater than duration
527 if (silence->stop_holdoff_end >=
528 silence->stop_duration)
530 /* Increment found counter and see if this
531 * is the last period. If so then exit.
533 if (++silence->stop_found_periods >=
534 silence->stop_periods)
536 silence->stop_holdoff_offset = 0;
537 silence->stop_holdoff_end = 0;
538 if (!silence->restart)
540 *isamp = nrOfInSamplesRead;
541 *osamp = nrOfOutSamplesWritten;
542 silence->mode = SILENCE_STOP;
543 /* Return SOX_EOF since no more processing */
544 return (SOX_EOF);
546 else
548 silence->stop_found_periods = 0;
549 silence->start_found_periods = 0;
550 silence->start_holdoff_offset = 0;
551 silence->start_holdoff_end = 0;
552 clear_rms(effp);
553 silence->mode = SILENCE_TRIM;
555 goto silence_trim;
558 else
560 /* Flush this buffer and start
561 * looking again.
563 silence->mode = SILENCE_COPY_FLUSH;
564 goto silence_copy_flush;
566 break;
567 } /* Filled holdoff buffer */
568 } /* Detected silence */
569 } /* For # of samples */
570 } /* Trimming off backend */
571 else /* !(silence->stop) */
573 /* Case B */
574 memcpy(obuf, ibuf, sizeof(sox_sample_t)*nrOfTicks*
575 effp->in_signal.channels);
576 nrOfInSamplesRead += (nrOfTicks*effp->in_signal.channels);
577 nrOfOutSamplesWritten += (nrOfTicks*effp->in_signal.channels);
579 break;
581 case SILENCE_COPY_FLUSH:
582 /* nrOfTicks counts non-wide samples here. */
583 silence_copy_flush:
584 nrOfTicks = min((silence->stop_holdoff_end -
585 silence->stop_holdoff_offset),
586 (*osamp-nrOfOutSamplesWritten));
587 nrOfTicks -= nrOfTicks % effp->in_signal.channels;
589 for(i = 0; i < nrOfTicks; i++)
591 *obuf++ = silence->stop_holdoff[silence->stop_holdoff_offset++];
592 nrOfOutSamplesWritten++;
595 /* If fully drained holdoff then return to copy mode */
596 if (silence->stop_holdoff_offset == silence->stop_holdoff_end)
598 silence->stop_holdoff_offset = 0;
599 silence->stop_holdoff_end = 0;
600 silence->mode = SILENCE_COPY;
601 goto silence_copy;
603 break;
605 case SILENCE_STOP:
606 /* This code can't be reached. */
607 nrOfInSamplesRead = *isamp;
608 break;
611 *isamp = nrOfInSamplesRead;
612 *osamp = nrOfOutSamplesWritten;
614 return (SOX_SUCCESS);
617 static int sox_silence_drain(sox_effect_t * effp, sox_sample_t *obuf, size_t *osamp)
619 priv_t * silence = (priv_t *) effp->priv;
620 size_t i;
621 size_t nrOfTicks, nrOfOutSamplesWritten = 0; /* non-wide samples */
623 /* Only if in flush mode will there be possible samples to write
624 * out during drain() call.
626 if (silence->mode == SILENCE_COPY_FLUSH ||
627 silence->mode == SILENCE_COPY)
629 nrOfTicks = min((silence->stop_holdoff_end -
630 silence->stop_holdoff_offset), *osamp);
631 nrOfTicks -= nrOfTicks % effp->in_signal.channels;
632 for(i = 0; i < nrOfTicks; i++)
634 *obuf++ = silence->stop_holdoff[silence->stop_holdoff_offset++];
635 nrOfOutSamplesWritten++;
638 /* If fully drained holdoff then stop */
639 if (silence->stop_holdoff_offset == silence->stop_holdoff_end)
641 silence->stop_holdoff_offset = 0;
642 silence->stop_holdoff_end = 0;
643 silence->mode = SILENCE_STOP;
647 *osamp = nrOfOutSamplesWritten;
648 if (silence->mode == SILENCE_STOP || *osamp == 0)
649 return SOX_EOF;
650 else
651 return SOX_SUCCESS;
654 static int sox_silence_stop(sox_effect_t * effp)
656 priv_t * silence = (priv_t *) effp->priv;
658 free(silence->window);
659 free(silence->start_holdoff);
660 free(silence->stop_holdoff);
662 return(SOX_SUCCESS);
665 static int lsx_kill(sox_effect_t * effp)
667 priv_t * silence = (priv_t *) effp->priv;
669 free(silence->start_duration_str);
670 free(silence->stop_duration_str);
672 return SOX_SUCCESS;
675 static sox_effect_handler_t sox_silence_effect = {
676 "silence",
677 "[ -l ] above_periods [ duration threshold[d|%] ] [ below_periods duration threshold[d|%] ]",
678 SOX_EFF_MCHAN | SOX_EFF_MODIFY | SOX_EFF_LENGTH,
679 sox_silence_getopts,
680 sox_silence_start,
681 sox_silence_flow,
682 sox_silence_drain,
683 sox_silence_stop,
684 lsx_kill, sizeof(priv_t)
687 const sox_effect_handler_t *lsx_silence_effect_fn(void)
689 return &sox_silence_effect;