1 /* libSoX effect: change tempo (and duration) or pitch (maintain duration)
2 * Copyright (c) 2007,8 robs@users.sourceforge.net
3 * Based on ideas from Olli Parviainen's SoundTouch Library.
5 * This library is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU Lesser General Public License as published by
7 * the Free Software Foundation; either version 2.1 of the License, or (at
8 * your option) any later version.
10 * This library is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
13 * General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with this library; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 /* Configuration parameters: */
27 sox_bool quick_search
; /* Whether to quick search or linear search */
28 double factor
; /* 1 for no change, < 1 for slower, > 1 for faster. */
29 size_t search
; /* Wide samples to search for best overlap position */
30 size_t segment
; /* Processing segment length in wide samples */
31 size_t overlap
; /* In wide samples */
33 size_t process_size
; /* # input wide samples needed to process 1 segment */
43 uint64_t segments_total
;
47 /* Waveform Similarity by least squares; works across multi-channels */
48 static float difference(const float * a
, const float * b
, size_t length
)
53 #define _ diff += sqr(a[i] - b[i]), ++i; /* Loop optimisation */
54 do {_ _ _ _ _ _ _ _
} while (i
< length
); /* N.B. length ≡ 0 (mod 8) */
59 /* Find where the two segments are most alike over the overlap period. */
60 static size_t tempo_best_overlap_position(tempo_t
* t
, float const * new_win
)
62 float * f
= t
->overlap_buf
;
63 size_t j
, best_pos
, prev_best_pos
= (t
->search
+ 1) >> 1, step
= 64;
64 size_t i
= best_pos
= t
->quick_search
? prev_best_pos
: 0;
65 float diff
, least_diff
= difference(new_win
+ t
->channels
* i
, f
, t
->channels
* t
->overlap
);
68 if (t
->quick_search
) do { /* hierarchical search */
69 for (k
= -1; k
<= 1; k
+= 2) for (j
= 1; j
< 4 || step
== 64; ++j
) {
70 i
= prev_best_pos
+ k
* j
* step
;
71 if ((int)i
< 0 || i
>= t
->search
)
73 diff
= difference(new_win
+ t
->channels
* i
, f
, t
->channels
* t
->overlap
);
74 if (diff
< least_diff
)
75 least_diff
= diff
, best_pos
= i
;
77 prev_best_pos
= best_pos
;
79 else for (i
= 1; i
< t
->search
; i
++) { /* linear search */
80 diff
= difference(new_win
+ t
->channels
* i
, f
, t
->channels
* t
->overlap
);
81 if (diff
< least_diff
)
82 least_diff
= diff
, best_pos
= i
;
87 static void tempo_overlap(
88 tempo_t
* t
, const float * in1
, const float * in2
, float * output
)
91 float fade_step
= 1.0f
/ (float) t
->overlap
;
93 for (i
= 0; i
< t
->overlap
; ++i
) {
94 float fade_in
= fade_step
* (float) i
;
95 float fade_out
= 1.0f
- fade_in
;
96 for (j
= 0; j
< t
->channels
; ++j
, ++k
)
97 output
[k
] = in1
[k
] * fade_out
+ in2
[k
] * fade_in
;
101 static void tempo_process(tempo_t
* t
)
103 while (fifo_occupancy(&t
->input_fifo
) >= t
->process_size
) {
106 /* Copy or overlap the first bit to the output */
107 if (!t
->segments_total
) {
108 offset
= t
->search
/ 2;
109 fifo_write(&t
->output_fifo
, t
->overlap
, (float *) fifo_read_ptr(&t
->input_fifo
) + t
->channels
* offset
);
111 offset
= tempo_best_overlap_position(t
, fifo_read_ptr(&t
->input_fifo
));
112 tempo_overlap(t
, t
->overlap_buf
,
113 (float *) fifo_read_ptr(&t
->input_fifo
) + t
->channels
* offset
,
114 fifo_write(&t
->output_fifo
, t
->overlap
, NULL
));
116 /* Copy the middle bit to the output */
117 fifo_write(&t
->output_fifo
, t
->segment
- 2 * t
->overlap
,
118 (float *) fifo_read_ptr(&t
->input_fifo
) +
119 t
->channels
* (offset
+ t
->overlap
));
121 /* Copy the end bit to overlap_buf ready to be mixed with
122 * the beginning of the next segment. */
123 memcpy(t
->overlap_buf
,
124 (float *) fifo_read_ptr(&t
->input_fifo
) +
125 t
->channels
* (offset
+ t
->segment
- t
->overlap
),
126 t
->channels
* t
->overlap
* sizeof(*(t
->overlap_buf
)));
128 /* Advance through the input stream */
129 skip
= t
->factor
* (++t
->segments_total
* (t
->segment
- t
->overlap
)) + 0.5;
130 t
->skip_total
+= skip
-= t
->skip_total
;
131 fifo_read(&t
->input_fifo
, skip
, NULL
);
135 static float * tempo_input(tempo_t
* t
, float const * samples
, size_t n
)
138 return fifo_write(&t
->input_fifo
, n
, samples
);
141 static float const * tempo_output(tempo_t
* t
, float * samples
, size_t * n
)
143 t
->samples_out
+= *n
= min(*n
, fifo_occupancy(&t
->output_fifo
));
144 return fifo_read(&t
->output_fifo
, *n
, samples
);
147 /* Flush samples remaining in overlap_buf & input_fifo to the output. */
148 static void tempo_flush(tempo_t
* t
)
150 uint64_t samples_out
= t
->samples_in
/ t
->factor
+ .5;
151 size_t remaining
= samples_out
> t
->samples_out
?
152 (size_t)(samples_out
- t
->samples_out
) : 0;
153 float * buff
= lsx_calloc(128 * t
->channels
, sizeof(*buff
));
156 while (fifo_occupancy(&t
->output_fifo
) < remaining
) {
157 tempo_input(t
, buff
, (size_t) 128);
160 fifo_trim_to(&t
->output_fifo
, remaining
);
166 static void tempo_setup(tempo_t
* t
,
167 double sample_rate
, sox_bool quick_search
, double factor
,
168 double segment_ms
, double search_ms
, double overlap_ms
)
171 t
->quick_search
= quick_search
;
173 t
->segment
= sample_rate
* segment_ms
/ 1000 + .5;
174 t
->search
= sample_rate
* search_ms
/ 1000 + .5;
175 t
->overlap
= max(sample_rate
* overlap_ms
/ 1000 + 4.5, 16);
176 t
->overlap
&= ~7; /* Make divisible by 8 for loop optimisation */
177 if (t
->overlap
* 2 > t
->segment
)
179 t
->overlap_buf
= lsx_malloc(t
->overlap
* t
->channels
* sizeof(*t
->overlap_buf
));
180 max_skip
= ceil(factor
* (t
->segment
- t
->overlap
));
181 t
->process_size
= max(max_skip
+ t
->overlap
, t
->segment
) + t
->search
;
182 memset(fifo_reserve(&t
->input_fifo
, t
->search
/ 2), 0, (t
->search
/ 2) * t
->channels
* sizeof(float));
185 static void tempo_delete(tempo_t
* t
)
187 free(t
->overlap_buf
);
188 fifo_delete(&t
->output_fifo
);
189 fifo_delete(&t
->input_fifo
);
193 static tempo_t
* tempo_create(size_t channels
)
195 tempo_t
* t
= lsx_calloc(1, sizeof(*t
));
196 t
->channels
= channels
;
197 fifo_create(&t
->input_fifo
, t
->channels
* sizeof(float));
198 fifo_create(&t
->output_fifo
, t
->channels
* sizeof(float));
202 /*------------------------------- SoX Wrapper --------------------------------*/
206 sox_bool quick_search
;
207 double factor
, segment_ms
, search_ms
, overlap_ms
;
210 static int getopts(sox_effect_t
* effp
, int argc
, char **argv
)
212 priv_t
* p
= (priv_t
*)effp
->priv
;
213 enum {Default
, Music
, Speech
, Linear
} profile
= Default
;
214 static const double segments_ms
[] = { 82,82, 35 , 20};
215 static const double segments_pow
[] = { 0, 1, .33 , 1};
216 static const double overlaps_div
[] = {6.833, 7, 2.5 , 2};
217 static const double searches_div
[] = {5.587, 6, 2.14, 2};
219 lsx_getopt_t optstate
;
220 lsx_getopt_init(argc
, argv
, "+qmls", NULL
, lsx_getopt_flag_none
, 1, &optstate
);
222 p
->segment_ms
= p
->search_ms
= p
->overlap_ms
= HUGE_VAL
;
223 while ((c
= lsx_getopt(&optstate
)) != -1) switch (c
) {
224 case 'q': p
->quick_search
= sox_true
; break;
225 case 'm': profile
= Music
; break;
226 case 's': profile
= Speech
; break;
227 case 'l': profile
= Linear
; p
->search_ms
= 0; break;
228 default: lsx_fail("unknown option `-%c'", optstate
.opt
); return lsx_usage(effp
);
230 argc
-= optstate
.ind
, argv
+= optstate
.ind
;
231 do { /* break-able block */
232 NUMERIC_PARAMETER(factor
,0.1 , 100 )
233 NUMERIC_PARAMETER(segment_ms
, 10 , 120)
234 NUMERIC_PARAMETER(search_ms
, 0 , 30 )
235 NUMERIC_PARAMETER(overlap_ms
, 0 , 30 )
238 if (p
->segment_ms
== HUGE_VAL
)
239 p
->segment_ms
= max(10, segments_ms
[profile
] / max(pow(p
->factor
, segments_pow
[profile
]), 1));
240 if (p
->overlap_ms
== HUGE_VAL
)
241 p
->overlap_ms
= p
->segment_ms
/ overlaps_div
[profile
];
242 if (p
->search_ms
== HUGE_VAL
)
243 p
->search_ms
= p
->segment_ms
/ searches_div
[profile
];
245 p
->overlap_ms
= min(p
->overlap_ms
, p
->segment_ms
/ 2);
246 lsx_report("quick_search=%u factor=%g segment=%g search=%g overlap=%g",
247 p
->quick_search
, p
->factor
, p
->segment_ms
, p
->search_ms
, p
->overlap_ms
);
248 return argc
? lsx_usage(effp
) : SOX_SUCCESS
;
251 static int start(sox_effect_t
* effp
)
253 priv_t
* p
= (priv_t
*)effp
->priv
;
258 p
->tempo
= tempo_create((size_t)effp
->in_signal
.channels
);
259 tempo_setup(p
->tempo
, effp
->in_signal
.rate
, p
->quick_search
, p
->factor
,
260 p
->segment_ms
, p
->search_ms
, p
->overlap_ms
);
262 effp
->out_signal
.length
= SOX_UNKNOWN_LEN
;
263 if (effp
->in_signal
.length
!= SOX_UNKNOWN_LEN
) {
264 uint64_t in_length
= effp
->in_signal
.length
/ effp
->in_signal
.channels
;
265 uint64_t out_length
= in_length
/ p
->factor
+ .5;
266 effp
->out_signal
.length
= out_length
* effp
->in_signal
.channels
;
272 static int flow(sox_effect_t
* effp
, const sox_sample_t
* ibuf
,
273 sox_sample_t
* obuf
, size_t * isamp
, size_t * osamp
)
275 priv_t
* p
= (priv_t
*)effp
->priv
;
276 size_t i
, odone
= *osamp
/= effp
->in_signal
.channels
;
277 float const * s
= tempo_output(p
->tempo
, NULL
, &odone
);
280 for (i
= 0; i
< odone
* effp
->in_signal
.channels
; ++i
)
281 *obuf
++ = SOX_FLOAT_32BIT_TO_SAMPLE(*s
++, effp
->clips
);
283 if (*isamp
&& odone
< *osamp
) {
284 float * t
= tempo_input(p
->tempo
, NULL
, *isamp
/ effp
->in_signal
.channels
);
285 for (i
= *isamp
; i
; --i
)
286 *t
++ = SOX_SAMPLE_TO_FLOAT_32BIT(*ibuf
++, effp
->clips
);
287 tempo_process(p
->tempo
);
291 *osamp
= odone
* effp
->in_signal
.channels
;
295 static int drain(sox_effect_t
* effp
, sox_sample_t
* obuf
, size_t * osamp
)
297 priv_t
* p
= (priv_t
*)effp
->priv
;
298 static size_t isamp
= 0;
299 tempo_flush(p
->tempo
);
300 return flow(effp
, 0, obuf
, &isamp
, osamp
);
303 static int stop(sox_effect_t
* effp
)
305 priv_t
* p
= (priv_t
*)effp
->priv
;
306 tempo_delete(p
->tempo
);
310 sox_effect_handler_t
const * lsx_tempo_effect_fn(void)
312 static sox_effect_handler_t handler
= {
313 "tempo", "[-q] [-m | -s | -l] factor [segment-ms [search-ms [overlap-ms]]]",
314 SOX_EFF_MCHAN
| SOX_EFF_LENGTH
,
315 getopts
, start
, flow
, drain
, stop
, NULL
, sizeof(priv_t
)
320 /*---------------------------------- pitch -----------------------------------*/
322 static int pitch_getopts(sox_effect_t
* effp
, int argc
, char **argv
)
325 char dummy
, arg
[100], **argv2
= lsx_malloc(argc
* sizeof(*argv2
));
326 int result
, pos
= (argc
> 1 && !strcmp(argv
[1], "-q"))? 2 : 1;
328 if (argc
<= pos
|| sscanf(argv
[pos
], "%lf %c", &d
, &dummy
) != 1)
329 return lsx_usage(effp
);
331 d
= pow(2., d
/ 1200); /* cents --> factor */
332 sprintf(arg
, "%g", 1 / d
);
333 memcpy(argv2
, argv
, argc
* sizeof(*argv2
));
335 result
= getopts(effp
, argc
, argv2
);
340 static int pitch_start(sox_effect_t
* effp
)
342 priv_t
* p
= (priv_t
*) effp
->priv
;
343 int result
= start(effp
);
345 effp
->out_signal
.rate
= effp
->in_signal
.rate
/ p
->factor
;
349 sox_effect_handler_t
const * lsx_pitch_effect_fn(void)
351 static sox_effect_handler_t handler
;
352 handler
= *lsx_tempo_effect_fn();
353 handler
.name
= "pitch";
354 handler
.usage
= "[-q] shift-in-cents [segment-ms [search-ms [overlap-ms]]]",
355 handler
.getopts
= pitch_getopts
;
356 handler
.start
= pitch_start
;
357 handler
.flags
&= ~SOX_EFF_LENGTH
;
358 handler
.flags
|= SOX_EFF_RATE
;