README.osx wasn't easily readable in Finder. Revert back to
[sox.git] / src / tempo.c
blob440de1481e8604fd37abea7dc6d688cafe21cde5
1 /* libSoX effect: change tempo (and duration) or pitch (maintain duration)
2 * Copyright (c) 2007,8 robs@users.sourceforge.net
3 * Based on ideas from Olli Parviainen's SoundTouch Library.
5 * This library is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU Lesser General Public License as published by
7 * the Free Software Foundation; either version 2.1 of the License, or (at
8 * your option) any later version.
10 * This library is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
13 * General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with this library; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 #include "sox_i.h"
21 #include "fifo.h"
22 #include "sgetopt.h"
23 #include <math.h>
25 typedef struct {
26 /* Configuration parameters: */
27 size_t channels;
28 sox_bool quick_search; /* Whether to quick search or linear search */
29 double factor; /* 1 for no change, < 1 for slower, > 1 for faster. */
30 size_t search; /* Wide samples to search for best overlap position */
31 size_t segment; /* Processing segment length in wide samples */
32 size_t overlap; /* In wide samples */
34 size_t process_size; /* # input wide samples needed to process 1 segment */
36 /* Buffers: */
37 fifo_t input_fifo;
38 float * overlap_buf;
39 fifo_t output_fifo;
41 /* Counters: */
42 size_t samples_in;
43 size_t samples_out;
44 size_t segments_total;
45 size_t skip_total;
46 } tempo_t;
48 /* Waveform Similarity by least squares; works across multi-channels */
49 static float difference(const float * a, const float * b, size_t length)
51 float diff = 0;
52 size_t i = 0;
54 #define _ diff += sqr(a[i] - b[i]), ++i; /* Loop optimisation */
55 do {_ _ _ _ _ _ _ _} while (i < length); /* N.B. length ≡ 0 (mod 8) */
56 #undef _
57 return diff;
60 /* Find where the two segments are most alike over the overlap period. */
61 static size_t tempo_best_overlap_position(tempo_t * t, float const * new_win)
63 float * f = t->overlap_buf;
64 size_t j, best_pos, prev_best_pos = (t->search + 1) >> 1, step = 64;
65 size_t i = best_pos = t->quick_search? prev_best_pos : 0;
66 float diff, least_diff = difference(new_win + t->channels * i, f, t->channels * t->overlap);
67 int k = 0;
69 if (t->quick_search) do { /* hierarchical search */
70 for (k = -1; k <= 1; k += 2) for (j = 1; j < 4 || step == 64; ++j) {
71 i = prev_best_pos + k * j * step;
72 if ((int)i < 0 || i >= t->search)
73 break;
74 diff = difference(new_win + t->channels * i, f, t->channels * t->overlap);
75 if (diff < least_diff)
76 least_diff = diff, best_pos = i;
78 prev_best_pos = best_pos;
79 } while (step >>= 2);
80 else for (i = 1; i < t->search; i++) { /* linear search */
81 diff = difference(new_win + t->channels * i, f, t->channels * t->overlap);
82 if (diff < least_diff)
83 least_diff = diff, best_pos = i;
85 return best_pos;
88 static void tempo_overlap(
89 tempo_t * t, const float * in1, const float * in2, float * output)
91 size_t i, j, k = 0;
92 float fade_step = 1.0f / (float) t->overlap;
94 for (i = 0; i < t->overlap; ++i) {
95 float fade_in = fade_step * (float) i;
96 float fade_out = 1.0f - fade_in;
97 for (j = 0; j < t->channels; ++j, ++k)
98 output[k] = in1[k] * fade_out + in2[k] * fade_in;
102 static void tempo_process(tempo_t * t)
104 while (fifo_occupancy(&t->input_fifo) >= t->process_size) {
105 size_t skip, offset;
107 /* Copy or overlap the first bit to the output */
108 if (!t->segments_total) {
109 offset = t->search / 2;
110 fifo_write(&t->output_fifo, t->overlap, (float *) fifo_read_ptr(&t->input_fifo) + t->channels * offset);
111 } else {
112 offset = tempo_best_overlap_position(t, fifo_read_ptr(&t->input_fifo));
113 tempo_overlap(t, t->overlap_buf,
114 (float *) fifo_read_ptr(&t->input_fifo) + t->channels * offset,
115 fifo_write(&t->output_fifo, t->overlap, NULL));
117 /* Copy the middle bit to the output */
118 fifo_write(&t->output_fifo, t->segment - 2 * t->overlap,
119 (float *) fifo_read_ptr(&t->input_fifo) +
120 t->channels * (offset + t->overlap));
122 /* Copy the end bit to overlap_buf ready to be mixed with
123 * the beginning of the next segment. */
124 memcpy(t->overlap_buf,
125 (float *) fifo_read_ptr(&t->input_fifo) +
126 t->channels * (offset + t->segment - t->overlap),
127 t->channels * t->overlap * sizeof(*(t->overlap_buf)));
129 /* Advance through the input stream */
130 skip = t->factor * (++t->segments_total * (t->segment - t->overlap)) + 0.5;
131 t->skip_total += skip -= t->skip_total;
132 fifo_read(&t->input_fifo, skip, NULL);
136 static float * tempo_input(tempo_t * t, float const * samples, size_t n)
138 t->samples_in += n;
139 return fifo_write(&t->input_fifo, n, samples);
142 static float const * tempo_output(tempo_t * t, float * samples, size_t * n)
144 t->samples_out += *n = min(*n, fifo_occupancy(&t->output_fifo));
145 return fifo_read(&t->output_fifo, *n, samples);
148 /* Flush samples remaining in overlap_buf & input_fifo to the output. */
149 static void tempo_flush(tempo_t * t)
151 size_t samples_out = t->samples_in / t->factor + .5;
152 size_t remaining = samples_out - t->samples_out;
153 float * buff = lsx_calloc(128 * t->channels, sizeof(*buff));
155 if ((int)remaining > 0) {
156 while (fifo_occupancy(&t->output_fifo) < remaining) {
157 tempo_input(t, buff, (size_t) 128);
158 tempo_process(t);
160 fifo_trim_to(&t->output_fifo, remaining);
161 t->samples_in = 0;
163 free(buff);
166 static void tempo_setup(tempo_t * t,
167 double sample_rate, sox_bool quick_search, double factor,
168 double segment_ms, double search_ms, double overlap_ms)
170 size_t max_skip;
171 t->quick_search = quick_search;
172 t->factor = factor;
173 t->segment = sample_rate * segment_ms / 1000 + .5;
174 t->search = sample_rate * search_ms / 1000 + .5;
175 t->overlap = max(sample_rate * overlap_ms / 1000 + 4.5, 16);
176 t->overlap &= ~7; /* Make divisible by 8 for loop optimisation */
177 if (t->overlap * 2 > t->segment)
178 t->overlap -= 8;
179 t->overlap_buf = lsx_malloc(t->overlap * t->channels * sizeof(*t->overlap_buf));
180 max_skip = ceil(factor * (t->segment - t->overlap));
181 t->process_size = max(max_skip + t->overlap, t->segment) + t->search;
182 memset(fifo_reserve(&t->input_fifo, t->search / 2), 0, (t->search / 2) * t->channels * sizeof(float));
185 static void tempo_delete(tempo_t * t)
187 free(t->overlap_buf);
188 fifo_delete(&t->output_fifo);
189 fifo_delete(&t->input_fifo);
190 free(t);
193 static tempo_t * tempo_create(size_t channels)
195 tempo_t * t = lsx_calloc(1, sizeof(*t));
196 t->channels = channels;
197 fifo_create(&t->input_fifo, t->channels * sizeof(float));
198 fifo_create(&t->output_fifo, t->channels * sizeof(float));
199 return t;
202 /*------------------------------- SoX Wrapper --------------------------------*/
204 typedef struct {
205 tempo_t * tempo;
206 sox_bool quick_search;
207 double factor, segment_ms, search_ms, overlap_ms;
208 } priv_t;
210 static int getopts(sox_effect_t * effp, int argc, char **argv)
212 priv_t * p = (priv_t *)effp->priv;
213 enum {Default, Music, Speech, Linear} profile = Default;
214 static const double segments_ms [] = { 82,82, 35 , 20};
215 static const double segments_pow[] = { 0, 1, .33 , 1};
216 static const double overlaps_div[] = {6.833, 7, 2.5 , 2};
217 static const double searches_div[] = {5.587, 6, 2.14, 2};
218 int c;
220 p->segment_ms = p->search_ms = p->overlap_ms = HUGE_VAL;
221 while ((c = lsx_getopt(argc, argv, "+qmls")) != -1) switch (c) {
222 case 'q': p->quick_search = sox_true; break;
223 case 'm': profile = Music; break;
224 case 's': profile = Speech; break;
225 case 'l': profile = Linear; p->search_ms = 0; break;
226 default: lsx_fail("unknown option `-%c'", optopt); return lsx_usage(effp);
228 argc -= lsx_optind, argv += lsx_optind;
229 do { /* break-able block */
230 NUMERIC_PARAMETER(factor ,0.1 , 100 )
231 NUMERIC_PARAMETER(segment_ms , 10 , 120)
232 NUMERIC_PARAMETER(search_ms , 0 , 30 )
233 NUMERIC_PARAMETER(overlap_ms , 0 , 30 )
234 } while (0);
236 if (p->segment_ms == HUGE_VAL)
237 p->segment_ms = max(10, segments_ms[profile] / max(pow(p->factor, segments_pow[profile]), 1));
238 if (p->overlap_ms == HUGE_VAL)
239 p->overlap_ms = p->segment_ms / overlaps_div[profile];
240 if (p->search_ms == HUGE_VAL)
241 p->search_ms = p->segment_ms / searches_div[profile];
243 p->overlap_ms = min(p->overlap_ms, p->segment_ms / 2);
244 lsx_report("quick_search=%u factor=%g segment=%g search=%g overlap=%g",
245 p->quick_search, p->factor, p->segment_ms, p->search_ms, p->overlap_ms);
246 return argc? lsx_usage(effp) : SOX_SUCCESS;
249 static int start(sox_effect_t * effp)
251 priv_t * p = (priv_t *)effp->priv;
252 if (p->factor == 1)
253 return SOX_EFF_NULL;
255 p->tempo = tempo_create((size_t)effp->in_signal.channels);
256 tempo_setup(p->tempo, effp->in_signal.rate, p->quick_search, p->factor,
257 p->segment_ms, p->search_ms, p->overlap_ms);
258 return SOX_SUCCESS;
261 static int flow(sox_effect_t * effp, const sox_sample_t * ibuf,
262 sox_sample_t * obuf, size_t * isamp, size_t * osamp)
264 priv_t * p = (priv_t *)effp->priv;
265 size_t i, odone = *osamp /= effp->in_signal.channels;
266 float const * s = tempo_output(p->tempo, NULL, &odone);
267 SOX_SAMPLE_LOCALS;
269 for (i = 0; i < odone * effp->in_signal.channels; ++i)
270 *obuf++ = SOX_FLOAT_32BIT_TO_SAMPLE(*s++, effp->clips);
272 if (*isamp && odone < *osamp) {
273 float * t = tempo_input(p->tempo, NULL, *isamp / effp->in_signal.channels);
274 for (i = *isamp; i; --i)
275 *t++ = SOX_SAMPLE_TO_FLOAT_32BIT(*ibuf++, effp->clips);
276 tempo_process(p->tempo);
278 else *isamp = 0;
280 *osamp = odone * effp->in_signal.channels;
281 return SOX_SUCCESS;
284 static int drain(sox_effect_t * effp, sox_sample_t * obuf, size_t * osamp)
286 priv_t * p = (priv_t *)effp->priv;
287 static size_t isamp = 0;
288 tempo_flush(p->tempo);
289 return flow(effp, 0, obuf, &isamp, osamp);
292 static int stop(sox_effect_t * effp)
294 priv_t * p = (priv_t *)effp->priv;
295 tempo_delete(p->tempo);
296 return SOX_SUCCESS;
299 sox_effect_handler_t const * lsx_tempo_effect_fn(void)
301 static sox_effect_handler_t handler = {
302 "tempo", "[-q] [-m | -s | -l] factor [segment-ms [search-ms [overlap-ms]]]",
303 SOX_EFF_MCHAN | SOX_EFF_LENGTH,
304 getopts, start, flow, drain, stop, NULL, sizeof(priv_t)
306 return &handler;
309 /*---------------------------------- pitch -----------------------------------*/
311 static int pitch_getopts(sox_effect_t * effp, int argc, char **argv)
313 double d;
314 char dummy, arg[100], **argv2 = lsx_malloc(argc * sizeof(*argv2));
315 int result, pos = (argc > 1 && !strcmp(argv[1], "-q"))? 2 : 1;
317 if (argc <= pos || sscanf(argv[pos], "%lf %c", &d, &dummy) != 1)
318 return lsx_usage(effp);
320 d = pow(2., d / 1200); /* cents --> factor */
321 sprintf(arg, "%g", 1 / d);
322 memcpy(argv2, argv, argc * sizeof(*argv2));
323 argv2[pos] = arg;
324 result = getopts(effp, argc, argv2);
325 free(argv2);
326 return result;
329 static int pitch_start(sox_effect_t * effp)
331 priv_t * p = (priv_t *) effp->priv;
332 int result = start(effp);
334 effp->out_signal.rate = effp->in_signal.rate / p->factor;
335 return result;
338 sox_effect_handler_t const * lsx_pitch_effect_fn(void)
340 static sox_effect_handler_t handler;
341 handler = *lsx_tempo_effect_fn();
342 handler.name = "pitch";
343 handler.usage = "[-q] shift-in-cents [segment-ms [search-ms [overlap-ms]]]",
344 handler.getopts = pitch_getopts;
345 handler.start = pitch_start;
346 handler.flags &= ~SOX_EFF_LENGTH;
347 return &handler;
350 /*------------------------- key (old name for pitch) -------------------------*/
352 sox_effect_handler_t const * lsx_key_effect_fn(void)
354 static sox_effect_handler_t handler;
355 handler = *lsx_pitch_effect_fn();
356 handler.name = "key";
357 handler.flags |= SOX_EFF_DEPRECATED;
358 return &handler;