1 /* libSoX effect: change tempo (and duration) or pitch (maintain duration)
2 * Copyright (c) 2007,8 robs@users.sourceforge.net
3 * Based on ideas from Olli Parviainen's SoundTouch Library.
5 * This library is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU Lesser General Public License as published by
7 * the Free Software Foundation; either version 2.1 of the License, or (at
8 * your option) any later version.
10 * This library is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
13 * General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with this library; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 /* Configuration parameters: */
28 sox_bool quick_search
; /* Whether to quick search or linear search */
29 double factor
; /* 1 for no change, < 1 for slower, > 1 for faster. */
30 size_t search
; /* Wide samples to search for best overlap position */
31 size_t segment
; /* Processing segment length in wide samples */
32 size_t overlap
; /* In wide samples */
34 size_t process_size
; /* # input wide samples needed to process 1 segment */
44 size_t segments_total
;
48 /* Waveform Similarity by least squares; works across multi-channels */
49 static float difference(const float * a
, const float * b
, size_t length
)
54 #define _ diff += sqr(a[i] - b[i]), ++i; /* Loop optimisation */
55 do {_ _ _ _ _ _ _ _
} while (i
< length
); /* N.B. length ≡ 0 (mod 8) */
60 /* Find where the two segments are most alike over the overlap period. */
61 static size_t tempo_best_overlap_position(tempo_t
* t
, float const * new_win
)
63 float * f
= t
->overlap_buf
;
64 size_t j
, best_pos
, prev_best_pos
= (t
->search
+ 1) >> 1, step
= 64;
65 size_t i
= best_pos
= t
->quick_search
? prev_best_pos
: 0;
66 float diff
, least_diff
= difference(new_win
+ t
->channels
* i
, f
, t
->channels
* t
->overlap
);
69 if (t
->quick_search
) do { /* hierarchical search */
70 for (k
= -1; k
<= 1; k
+= 2) for (j
= 1; j
< 4 || step
== 64; ++j
) {
71 i
= prev_best_pos
+ k
* j
* step
;
72 if ((int)i
< 0 || i
>= t
->search
)
74 diff
= difference(new_win
+ t
->channels
* i
, f
, t
->channels
* t
->overlap
);
75 if (diff
< least_diff
)
76 least_diff
= diff
, best_pos
= i
;
78 prev_best_pos
= best_pos
;
80 else for (i
= 1; i
< t
->search
; i
++) { /* linear search */
81 diff
= difference(new_win
+ t
->channels
* i
, f
, t
->channels
* t
->overlap
);
82 if (diff
< least_diff
)
83 least_diff
= diff
, best_pos
= i
;
88 static void tempo_overlap(
89 tempo_t
* t
, const float * in1
, const float * in2
, float * output
)
92 float fade_step
= 1.0f
/ (float) t
->overlap
;
94 for (i
= 0; i
< t
->overlap
; ++i
) {
95 float fade_in
= fade_step
* (float) i
;
96 float fade_out
= 1.0f
- fade_in
;
97 for (j
= 0; j
< t
->channels
; ++j
, ++k
)
98 output
[k
] = in1
[k
] * fade_out
+ in2
[k
] * fade_in
;
102 static void tempo_process(tempo_t
* t
)
104 while (fifo_occupancy(&t
->input_fifo
) >= t
->process_size
) {
107 /* Copy or overlap the first bit to the output */
108 if (!t
->segments_total
) {
109 offset
= t
->search
/ 2;
110 fifo_write(&t
->output_fifo
, t
->overlap
, (float *) fifo_read_ptr(&t
->input_fifo
) + t
->channels
* offset
);
112 offset
= tempo_best_overlap_position(t
, fifo_read_ptr(&t
->input_fifo
));
113 tempo_overlap(t
, t
->overlap_buf
,
114 (float *) fifo_read_ptr(&t
->input_fifo
) + t
->channels
* offset
,
115 fifo_write(&t
->output_fifo
, t
->overlap
, NULL
));
117 /* Copy the middle bit to the output */
118 fifo_write(&t
->output_fifo
, t
->segment
- 2 * t
->overlap
,
119 (float *) fifo_read_ptr(&t
->input_fifo
) +
120 t
->channels
* (offset
+ t
->overlap
));
122 /* Copy the end bit to overlap_buf ready to be mixed with
123 * the beginning of the next segment. */
124 memcpy(t
->overlap_buf
,
125 (float *) fifo_read_ptr(&t
->input_fifo
) +
126 t
->channels
* (offset
+ t
->segment
- t
->overlap
),
127 t
->channels
* t
->overlap
* sizeof(*(t
->overlap_buf
)));
129 /* Advance through the input stream */
130 skip
= t
->factor
* (++t
->segments_total
* (t
->segment
- t
->overlap
)) + 0.5;
131 t
->skip_total
+= skip
-= t
->skip_total
;
132 fifo_read(&t
->input_fifo
, skip
, NULL
);
136 static float * tempo_input(tempo_t
* t
, float const * samples
, size_t n
)
139 return fifo_write(&t
->input_fifo
, n
, samples
);
142 static float const * tempo_output(tempo_t
* t
, float * samples
, size_t * n
)
144 t
->samples_out
+= *n
= min(*n
, fifo_occupancy(&t
->output_fifo
));
145 return fifo_read(&t
->output_fifo
, *n
, samples
);
148 /* Flush samples remaining in overlap_buf & input_fifo to the output. */
149 static void tempo_flush(tempo_t
* t
)
151 size_t samples_out
= t
->samples_in
/ t
->factor
+ .5;
152 size_t remaining
= samples_out
- t
->samples_out
;
153 float * buff
= lsx_calloc(128 * t
->channels
, sizeof(*buff
));
155 if ((int)remaining
> 0) {
156 while (fifo_occupancy(&t
->output_fifo
) < remaining
) {
157 tempo_input(t
, buff
, (size_t) 128);
160 fifo_trim_to(&t
->output_fifo
, remaining
);
166 static void tempo_setup(tempo_t
* t
,
167 double sample_rate
, sox_bool quick_search
, double factor
,
168 double segment_ms
, double search_ms
, double overlap_ms
)
171 t
->quick_search
= quick_search
;
173 t
->segment
= sample_rate
* segment_ms
/ 1000 + .5;
174 t
->search
= sample_rate
* search_ms
/ 1000 + .5;
175 t
->overlap
= max(sample_rate
* overlap_ms
/ 1000 + 4.5, 16);
176 t
->overlap
&= ~7; /* Make divisible by 8 for loop optimisation */
177 if (t
->overlap
* 2 > t
->segment
)
179 t
->overlap_buf
= lsx_malloc(t
->overlap
* t
->channels
* sizeof(*t
->overlap_buf
));
180 max_skip
= ceil(factor
* (t
->segment
- t
->overlap
));
181 t
->process_size
= max(max_skip
+ t
->overlap
, t
->segment
) + t
->search
;
182 memset(fifo_reserve(&t
->input_fifo
, t
->search
/ 2), 0, (t
->search
/ 2) * t
->channels
* sizeof(float));
185 static void tempo_delete(tempo_t
* t
)
187 free(t
->overlap_buf
);
188 fifo_delete(&t
->output_fifo
);
189 fifo_delete(&t
->input_fifo
);
193 static tempo_t
* tempo_create(size_t channels
)
195 tempo_t
* t
= lsx_calloc(1, sizeof(*t
));
196 t
->channels
= channels
;
197 fifo_create(&t
->input_fifo
, t
->channels
* sizeof(float));
198 fifo_create(&t
->output_fifo
, t
->channels
* sizeof(float));
202 /*------------------------------- SoX Wrapper --------------------------------*/
206 sox_bool quick_search
;
207 double factor
, segment_ms
, search_ms
, overlap_ms
;
210 static int getopts(sox_effect_t
* effp
, int argc
, char **argv
)
212 priv_t
* p
= (priv_t
*)effp
->priv
;
213 enum {Default
, Music
, Speech
, Linear
} profile
= Default
;
214 static const double segments_ms
[] = { 82,82, 35 , 20};
215 static const double segments_pow
[] = { 0, 1, .33 , 1};
216 static const double overlaps_div
[] = {6.833, 7, 2.5 , 2};
217 static const double searches_div
[] = {5.587, 6, 2.14, 2};
220 p
->segment_ms
= p
->search_ms
= p
->overlap_ms
= HUGE_VAL
;
221 while ((c
= lsx_getopt(argc
, argv
, "+qmls")) != -1) switch (c
) {
222 case 'q': p
->quick_search
= sox_true
; break;
223 case 'm': profile
= Music
; break;
224 case 's': profile
= Speech
; break;
225 case 'l': profile
= Linear
; p
->search_ms
= 0; break;
226 default: lsx_fail("unknown option `-%c'", optopt
); return lsx_usage(effp
);
228 argc
-= lsx_optind
, argv
+= lsx_optind
;
229 do { /* break-able block */
230 NUMERIC_PARAMETER(factor
,0.1 , 100 )
231 NUMERIC_PARAMETER(segment_ms
, 10 , 120)
232 NUMERIC_PARAMETER(search_ms
, 0 , 30 )
233 NUMERIC_PARAMETER(overlap_ms
, 0 , 30 )
236 if (p
->segment_ms
== HUGE_VAL
)
237 p
->segment_ms
= max(10, segments_ms
[profile
] / max(pow(p
->factor
, segments_pow
[profile
]), 1));
238 if (p
->overlap_ms
== HUGE_VAL
)
239 p
->overlap_ms
= p
->segment_ms
/ overlaps_div
[profile
];
240 if (p
->search_ms
== HUGE_VAL
)
241 p
->search_ms
= p
->segment_ms
/ searches_div
[profile
];
243 p
->overlap_ms
= min(p
->overlap_ms
, p
->segment_ms
/ 2);
244 lsx_report("quick_search=%u factor=%g segment=%g search=%g overlap=%g",
245 p
->quick_search
, p
->factor
, p
->segment_ms
, p
->search_ms
, p
->overlap_ms
);
246 return argc
? lsx_usage(effp
) : SOX_SUCCESS
;
249 static int start(sox_effect_t
* effp
)
251 priv_t
* p
= (priv_t
*)effp
->priv
;
255 p
->tempo
= tempo_create((size_t)effp
->in_signal
.channels
);
256 tempo_setup(p
->tempo
, effp
->in_signal
.rate
, p
->quick_search
, p
->factor
,
257 p
->segment_ms
, p
->search_ms
, p
->overlap_ms
);
261 static int flow(sox_effect_t
* effp
, const sox_sample_t
* ibuf
,
262 sox_sample_t
* obuf
, size_t * isamp
, size_t * osamp
)
264 priv_t
* p
= (priv_t
*)effp
->priv
;
265 size_t i
, odone
= *osamp
/= effp
->in_signal
.channels
;
266 float const * s
= tempo_output(p
->tempo
, NULL
, &odone
);
269 for (i
= 0; i
< odone
* effp
->in_signal
.channels
; ++i
)
270 *obuf
++ = SOX_FLOAT_32BIT_TO_SAMPLE(*s
++, effp
->clips
);
272 if (*isamp
&& odone
< *osamp
) {
273 float * t
= tempo_input(p
->tempo
, NULL
, *isamp
/ effp
->in_signal
.channels
);
274 for (i
= *isamp
; i
; --i
)
275 *t
++ = SOX_SAMPLE_TO_FLOAT_32BIT(*ibuf
++, effp
->clips
);
276 tempo_process(p
->tempo
);
280 *osamp
= odone
* effp
->in_signal
.channels
;
284 static int drain(sox_effect_t
* effp
, sox_sample_t
* obuf
, size_t * osamp
)
286 priv_t
* p
= (priv_t
*)effp
->priv
;
287 static size_t isamp
= 0;
288 tempo_flush(p
->tempo
);
289 return flow(effp
, 0, obuf
, &isamp
, osamp
);
292 static int stop(sox_effect_t
* effp
)
294 priv_t
* p
= (priv_t
*)effp
->priv
;
295 tempo_delete(p
->tempo
);
299 sox_effect_handler_t
const * lsx_tempo_effect_fn(void)
301 static sox_effect_handler_t handler
= {
302 "tempo", "[-q] [-m | -s | -l] factor [segment-ms [search-ms [overlap-ms]]]",
303 SOX_EFF_MCHAN
| SOX_EFF_LENGTH
,
304 getopts
, start
, flow
, drain
, stop
, NULL
, sizeof(priv_t
)
309 /*---------------------------------- pitch -----------------------------------*/
311 static int pitch_getopts(sox_effect_t
* effp
, int argc
, char **argv
)
314 char dummy
, arg
[100], **argv2
= lsx_malloc(argc
* sizeof(*argv2
));
315 int result
, pos
= (argc
> 1 && !strcmp(argv
[1], "-q"))? 2 : 1;
317 if (argc
<= pos
|| sscanf(argv
[pos
], "%lf %c", &d
, &dummy
) != 1)
318 return lsx_usage(effp
);
320 d
= pow(2., d
/ 1200); /* cents --> factor */
321 sprintf(arg
, "%g", 1 / d
);
322 memcpy(argv2
, argv
, argc
* sizeof(*argv2
));
324 result
= getopts(effp
, argc
, argv2
);
329 static int pitch_start(sox_effect_t
* effp
)
331 priv_t
* p
= (priv_t
*) effp
->priv
;
332 int result
= start(effp
);
334 effp
->out_signal
.rate
= effp
->in_signal
.rate
/ p
->factor
;
338 sox_effect_handler_t
const * lsx_pitch_effect_fn(void)
340 static sox_effect_handler_t handler
;
341 handler
= *lsx_tempo_effect_fn();
342 handler
.name
= "pitch";
343 handler
.usage
= "[-q] shift-in-cents [segment-ms [search-ms [overlap-ms]]]",
344 handler
.getopts
= pitch_getopts
;
345 handler
.start
= pitch_start
;
346 handler
.flags
&= ~SOX_EFF_LENGTH
;
350 /*------------------------- key (old name for pitch) -------------------------*/
352 sox_effect_handler_t
const * lsx_key_effect_fn(void)
354 static sox_effect_handler_t handler
;
355 handler
= *lsx_pitch_effect_fn();
356 handler
.name
= "key";
357 handler
.flags
|= SOX_EFF_DEPRECATED
;