mp3: fix error handling in rewrite_tags()
[sox.git] / src / spectrogram.c
blob3dcda69c190e4d76f43e02cca69cf0358e19e9b7
1 /* libSoX effect: Spectrogram (c) 2008-9 robs@users.sourceforge.net
3 * This library is free software; you can redistribute it and/or modify it
4 * under the terms of the GNU Lesser General Public License as published by
5 * the Free Software Foundation; either version 2.1 of the License, or (at
6 * your option) any later version.
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
11 * General Public License for more details.
13 * You should have received a copy of the GNU Lesser General Public License
14 * along with this library; if not, write to the Free Software Foundation,
15 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 #ifdef NDEBUG /* Enable assert always. */
19 #undef NDEBUG /* Must undef above assert.h or other that might include it. */
20 #endif
22 #include "sox_i.h"
23 #include "fft4g.h"
24 #include <assert.h>
25 #include <math.h>
26 #ifdef HAVE_LIBPNG_PNG_H
27 #include <libpng/png.h>
28 #else
29 #include <png.h>
30 #endif
31 #include <zlib.h>
33 /* For SET_BINARY_MODE: */
34 #include <fcntl.h>
35 #ifdef HAVE_IO_H
36 #include <io.h>
37 #endif
39 #define is_p2(x) !(x & (x - 1))
41 #define MAX_X_SIZE 200000
43 #if SSIZE_MAX < UINT32_MAX
44 #define MAX_Y_SIZE 16384 /* avoid multiplication overflow on 32-bit systems */
45 #else
46 #define MAX_Y_SIZE 200000
47 #endif
49 typedef enum {
50 Window_Hann,
51 Window_Hamming,
52 Window_Bartlett,
53 Window_Rectangular,
54 Window_Kaiser,
55 Window_Dolph
56 } win_type_t;
58 static const lsx_enum_item window_options[] = {
59 LSX_ENUM_ITEM(Window_,Hann)
60 LSX_ENUM_ITEM(Window_,Hamming)
61 LSX_ENUM_ITEM(Window_,Bartlett)
62 LSX_ENUM_ITEM(Window_,Rectangular)
63 LSX_ENUM_ITEM(Window_,Kaiser)
64 LSX_ENUM_ITEM(Window_,Dolph)
65 {0, 0}
68 typedef struct {
69 /* Parameters */
70 double pixels_per_sec;
71 double window_adjust;
72 int x_size0;
73 int y_size;
74 int Y_size;
75 int dB_range;
76 int gain;
77 int spectrum_points;
78 int perm;
79 sox_bool monochrome;
80 sox_bool light_background;
81 sox_bool high_colour;
82 sox_bool slack_overlap;
83 sox_bool no_axes;
84 sox_bool normalize;
85 sox_bool raw;
86 sox_bool alt_palette;
87 sox_bool truncate;
88 win_type_t win_type;
89 const char *out_name;
90 const char *title;
91 const char *comment;
92 const char *duration_str;
93 const char *start_time_str;
94 sox_bool using_stdout; /* output image to stdout */
96 /* Shared work area */
97 double *shared;
98 double **shared_ptr;
100 /* Per-channel work area */
101 int WORK; /* Start of work area is marked by this dummy variable. */
102 uint64_t skip;
103 int dft_size;
104 int step_size;
105 int block_steps;
106 int block_num;
107 int rows;
108 int cols;
109 int read;
110 int x_size;
111 int end;
112 int end_min;
113 int last_end;
114 sox_bool truncated;
115 double *buf; /* [dft_size] */
116 double *dft_buf; /* [dft_size] */
117 double *window; /* [dft_size + 1] */
118 double block_norm;
119 double max;
120 double *magnitudes; /* [dft_size / 2 + 1] */
121 float *dBfs;
122 } priv_t;
124 #define secs(cols) \
125 ((double)(cols) * p->step_size * p->block_steps / effp->in_signal.rate)
127 static const unsigned char alt_palette[] = {
128 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x01, 0x05,
129 0x00, 0x01, 0x08, 0x00, 0x01, 0x0a, 0x00, 0x01, 0x0b,
130 0x00, 0x01, 0x0e, 0x01, 0x02, 0x10, 0x01, 0x02, 0x12,
131 0x01, 0x02, 0x15, 0x01, 0x02, 0x16, 0x01, 0x02, 0x18,
132 0x01, 0x03, 0x1b, 0x01, 0x03, 0x1d, 0x01, 0x03, 0x1f,
133 0x01, 0x03, 0x20, 0x01, 0x03, 0x22, 0x01, 0x03, 0x24,
134 0x01, 0x03, 0x25, 0x01, 0x03, 0x27, 0x01, 0x03, 0x28,
135 0x01, 0x03, 0x2a, 0x01, 0x03, 0x2c, 0x01, 0x03, 0x2e,
136 0x01, 0x03, 0x2f, 0x01, 0x03, 0x30, 0x01, 0x03, 0x32,
137 0x01, 0x03, 0x34, 0x02, 0x03, 0x36, 0x04, 0x03, 0x38,
138 0x05, 0x03, 0x39, 0x07, 0x03, 0x3b, 0x09, 0x03, 0x3d,
139 0x0b, 0x03, 0x3f, 0x0e, 0x03, 0x41, 0x0f, 0x02, 0x42,
140 0x11, 0x02, 0x44, 0x13, 0x02, 0x46, 0x15, 0x02, 0x48,
141 0x17, 0x02, 0x4a, 0x18, 0x02, 0x4b, 0x1a, 0x02, 0x4d,
142 0x1d, 0x02, 0x4f, 0x20, 0x02, 0x51, 0x24, 0x02, 0x53,
143 0x28, 0x02, 0x55, 0x2b, 0x02, 0x57, 0x30, 0x02, 0x5a,
144 0x33, 0x02, 0x5c, 0x37, 0x02, 0x5f, 0x3b, 0x02, 0x61,
145 0x3e, 0x02, 0x63, 0x42, 0x02, 0x65, 0x45, 0x02, 0x68,
146 0x49, 0x02, 0x6a, 0x4d, 0x02, 0x6c, 0x51, 0x02, 0x6e,
147 0x55, 0x02, 0x70, 0x5a, 0x02, 0x72, 0x5f, 0x02, 0x74,
148 0x63, 0x02, 0x75, 0x68, 0x02, 0x76, 0x6c, 0x02, 0x78,
149 0x70, 0x03, 0x7a, 0x75, 0x03, 0x7c, 0x7a, 0x03, 0x7d,
150 0x7e, 0x03, 0x7e, 0x83, 0x03, 0x80, 0x87, 0x03, 0x82,
151 0x8c, 0x03, 0x84, 0x90, 0x03, 0x85, 0x93, 0x03, 0x83,
152 0x96, 0x03, 0x80, 0x98, 0x03, 0x7e, 0x9b, 0x03, 0x7c,
153 0x9e, 0x03, 0x7a, 0xa0, 0x03, 0x78, 0xa3, 0x03, 0x75,
154 0xa6, 0x03, 0x73, 0xa9, 0x03, 0x71, 0xab, 0x03, 0x6f,
155 0xae, 0x03, 0x6d, 0xb1, 0x03, 0x6a, 0xb3, 0x03, 0x68,
156 0xb6, 0x03, 0x66, 0xba, 0x03, 0x62, 0xbc, 0x03, 0x5e,
157 0xc0, 0x03, 0x5a, 0xc3, 0x03, 0x56, 0xc7, 0x03, 0x52,
158 0xca, 0x03, 0x4e, 0xcd, 0x03, 0x4a, 0xd1, 0x03, 0x46,
159 0xd4, 0x03, 0x43, 0xd7, 0x03, 0x3e, 0xdb, 0x03, 0x3a,
160 0xde, 0x03, 0x36, 0xe2, 0x03, 0x32, 0xe4, 0x03, 0x2f,
161 0xe6, 0x07, 0x2d, 0xe8, 0x0d, 0x2c, 0xea, 0x11, 0x2b,
162 0xec, 0x17, 0x2a, 0xed, 0x1b, 0x29, 0xee, 0x20, 0x28,
163 0xf0, 0x26, 0x27, 0xf2, 0x2a, 0x26, 0xf4, 0x2f, 0x24,
164 0xf5, 0x34, 0x23, 0xf6, 0x39, 0x23, 0xf8, 0x3e, 0x21,
165 0xfa, 0x43, 0x20, 0xfc, 0x49, 0x20, 0xfc, 0x4f, 0x22,
166 0xfc, 0x56, 0x26, 0xfc, 0x5d, 0x2a, 0xfc, 0x64, 0x2c,
167 0xfc, 0x6b, 0x30, 0xfc, 0x72, 0x33, 0xfc, 0x7a, 0x37,
168 0xfd, 0x81, 0x3b, 0xfd, 0x88, 0x3e, 0xfd, 0x8f, 0x42,
169 0xfd, 0x96, 0x45, 0xfd, 0x9e, 0x49, 0xfd, 0xa5, 0x4d,
170 0xfd, 0xac, 0x50, 0xfd, 0xb1, 0x54, 0xfd, 0xb7, 0x58,
171 0xfd, 0xbc, 0x5c, 0xfd, 0xc1, 0x61, 0xfd, 0xc6, 0x65,
172 0xfd, 0xcb, 0x69, 0xfd, 0xd0, 0x6d, 0xfe, 0xd5, 0x71,
173 0xfe, 0xda, 0x76, 0xfe, 0xdf, 0x7a, 0xfe, 0xe4, 0x7e,
174 0xfe, 0xe9, 0x82, 0xfe, 0xee, 0x86, 0xfe, 0xf3, 0x8b,
175 0xfd, 0xf5, 0x8f, 0xfc, 0xf6, 0x93, 0xfb, 0xf7, 0x98,
176 0xfa, 0xf7, 0x9c, 0xf9, 0xf8, 0xa1, 0xf8, 0xf9, 0xa5,
177 0xf7, 0xf9, 0xaa, 0xf6, 0xfa, 0xae, 0xf5, 0xfa, 0xb3,
178 0xf4, 0xfb, 0xb7, 0xf3, 0xfc, 0xbc, 0xf1, 0xfd, 0xc0,
179 0xf0, 0xfd, 0xc5, 0xf0, 0xfe, 0xc9, 0xef, 0xfe, 0xcc,
180 0xef, 0xfe, 0xcf, 0xf0, 0xfe, 0xd1, 0xf0, 0xfe, 0xd4,
181 0xf0, 0xfe, 0xd6, 0xf0, 0xfe, 0xd8, 0xf0, 0xfe, 0xda,
182 0xf1, 0xff, 0xdd, 0xf1, 0xff, 0xdf, 0xf1, 0xff, 0xe1,
183 0xf1, 0xff, 0xe4, 0xf1, 0xff, 0xe6, 0xf2, 0xff, 0xe8,
185 #define alt_palette_len (array_length(alt_palette) / 3)
187 static int getopts(sox_effect_t *effp, int argc, char **argv)
189 priv_t *p = effp->priv;
190 uint64_t dummy;
191 const char *next;
192 int c;
193 lsx_getopt_t optstate;
195 lsx_getopt_init(argc, argv, "+S:d:x:X:y:Y:z:Z:q:p:W:w:st:c:AarmnlhTo:",
196 NULL, lsx_getopt_flag_none, 1, &optstate);
198 p->dB_range = 120;
199 p->spectrum_points = 249;
200 p->perm = 1;
201 p->out_name = "spectrogram.png";
202 p->comment = "Created by SoX";
204 while ((c = lsx_getopt(&optstate)) != -1) {
205 switch (c) {
206 GETOPT_NUMERIC(optstate, 'x', x_size0, 100, MAX_X_SIZE)
207 GETOPT_NUMERIC(optstate, 'X', pixels_per_sec, 1, 5000)
208 GETOPT_NUMERIC(optstate, 'y', y_size, 64, MAX_Y_SIZE)
209 GETOPT_NUMERIC(optstate, 'Y', Y_size, 130, MAX_Y_SIZE)
210 GETOPT_NUMERIC(optstate, 'z', dB_range, 20, 180)
211 GETOPT_NUMERIC(optstate, 'Z', gain, -100, 100)
212 GETOPT_NUMERIC(optstate, 'q', spectrum_points, 0, p->spectrum_points)
213 GETOPT_NUMERIC(optstate, 'p', perm, 1, 6)
214 GETOPT_NUMERIC(optstate, 'W', window_adjust, -10, 10)
215 case 'w': p->win_type = lsx_enum_option(c, optstate.arg, window_options);
216 break;
217 case 's': p->slack_overlap = sox_true; break;
218 case 'A': p->alt_palette = sox_true; break;
219 case 'a': p->no_axes = sox_true; break;
220 case 'r': p->raw = sox_true; break;
221 case 'm': p->monochrome = sox_true; break;
222 case 'n': p->normalize = sox_true; break;
223 case 'l': p->light_background = sox_true; break;
224 case 'h': p->high_colour = sox_true; break;
225 case 'T': p->truncate = sox_true; break;
226 case 't': p->title = optstate.arg; break;
227 case 'c': p->comment = optstate.arg; break;
228 case 'o': p->out_name = optstate.arg; break;
229 case 'S':
230 next = lsx_parseposition(0, optstate.arg, NULL, 0, 0, '=');
231 if (next && !*next) {
232 p->start_time_str = lsx_strdup(optstate.arg);
233 break;
235 return lsx_usage(effp);
236 case 'd':
237 next = lsx_parsesamples(1e5, optstate.arg, &dummy, 't');
238 if (next && !*next) {
239 p->duration_str = lsx_strdup(optstate.arg);
240 break;
242 return lsx_usage(effp);
243 default:
244 lsx_fail("invalid option `-%c'", optstate.opt);
245 return lsx_usage(effp);
249 if (!!p->x_size0 + !!p->pixels_per_sec + !!p->duration_str > 2) {
250 lsx_fail("only two of -x, -X, -d may be given");
251 return SOX_EOF;
254 if (p->y_size && p->Y_size) {
255 lsx_fail("only one of -y, -Y may be given");
256 return SOX_EOF;
259 p->gain = -p->gain;
260 --p->perm;
261 p->spectrum_points += 2;
262 if (p->alt_palette)
263 p->spectrum_points = min(p->spectrum_points, alt_palette_len);
264 p->shared_ptr = &p->shared;
266 if (!strcmp(p->out_name, "-")) {
267 if (effp->global_info->global_info->stdout_in_use_by) {
268 lsx_fail("stdout already in use by `%s'",
269 effp->global_info->global_info->stdout_in_use_by);
270 return SOX_EOF;
272 effp->global_info->global_info->stdout_in_use_by = effp->handler.name;
273 p->using_stdout = sox_true;
276 return optstate.ind != argc || p->win_type == INT_MAX ?
277 lsx_usage(effp) : SOX_SUCCESS;
280 static double make_window(priv_t *p, int end)
282 double sum = 0;
283 double *w = end < 0 ? p->window : p->window + end;
284 double beta;
285 int n = 1 + p->dft_size - abs(end);
286 int i;
288 if (end)
289 memset(p->window, 0, sizeof(*p->window) * (p->dft_size + 1));
291 for (i = 0; i < n; ++i)
292 w[i] = 1;
294 switch (p->win_type) {
295 case Window_Hann: lsx_apply_hann(w, n); break;
296 case Window_Hamming: lsx_apply_hamming(w, n); break;
297 case Window_Bartlett: lsx_apply_bartlett(w, n); break;
298 case Window_Rectangular: break;
299 case Window_Kaiser:
300 beta = lsx_kaiser_beta((p->dB_range + p->gain) *
301 (1.1 + p->window_adjust / 50), .1);
302 lsx_apply_kaiser(w, n, beta);
303 break;
304 default:
305 lsx_apply_dolph(w, n, (p->dB_range + p->gain) *
306 (1.005 + p->window_adjust / 50) + 6);
309 for (i = 0; i < p->dft_size; ++i)
310 sum += p->window[i];
312 /* empirical small window adjustment */
313 for (--n, i = 0; i < p->dft_size; ++i)
314 p->window[i] *= 2 / sum * sqr((double)n / p->dft_size);
316 return sum;
319 static double *rdft_init(size_t n)
321 double *q = lsx_malloc(2 * (n / 2 + 1) * n * sizeof(*q));
322 double *p = q;
323 int i, j;
325 for (j = 0; j <= n / 2; ++j) {
326 for (i = 0; i < n; ++i) {
327 *p++ = cos(2 * M_PI * j * i / n);
328 *p++ = sin(2 * M_PI * j * i / n);
332 return q;
335 #define _ re += in[i] * *q++, im += in[i++] * *q++,
336 static void rdft_p(const double *q, const double *in, double *out, int n)
338 int i, j;
340 for (j = 0; j <= n / 2; ++j) {
341 double re = 0, im = 0;
343 for (i = 0; i < (n & ~7);)
344 _ _ _ _ _ _ _ _ (void)0;
346 while (i < n)
347 _ (void)0;
349 *out++ += re * re + im * im;
353 static int start(sox_effect_t *effp)
355 priv_t *p = effp->priv;
356 double actual;
357 double duration = 0.0;
358 double start_time = 0.0;
359 double pixels_per_sec = p->pixels_per_sec;
360 uint64_t d;
362 memset(&p->WORK, 0, sizeof(*p) - field_offset(priv_t, WORK));
364 if (p->duration_str) {
365 lsx_parsesamples(effp->in_signal.rate, p->duration_str, &d, 't');
366 duration = d / effp->in_signal.rate;
369 if (p->start_time_str) {
370 uint64_t in_length = effp->in_signal.length != SOX_UNKNOWN_LEN ?
371 effp->in_signal.length / effp->in_signal.channels : SOX_UNKNOWN_LEN;
373 if (!lsx_parseposition(effp->in_signal.rate, p->start_time_str, &d,
374 0, in_length, '=') || d == SOX_UNKNOWN_LEN) {
375 lsx_fail("-S option: audio length is unknown");
376 return SOX_EOF;
379 start_time = d / effp->in_signal.rate;
380 p->skip = d;
383 p->x_size = p->x_size0;
385 while (sox_true) {
386 if (!pixels_per_sec && p->x_size && duration)
387 pixels_per_sec = min(5000, p->x_size / duration);
388 else if (!p->x_size && pixels_per_sec && duration)
389 p->x_size = min(MAX_X_SIZE, (int)(pixels_per_sec * duration + .5));
391 if (!duration && effp->in_signal.length != SOX_UNKNOWN_LEN) {
392 duration = effp->in_signal.length /
393 (effp->in_signal.rate * effp->in_signal.channels);
394 duration -= start_time;
395 if (duration <= 0)
396 duration = 1;
397 continue;
398 } else if (!p->x_size) {
399 p->x_size = 800;
400 continue;
401 } else if (!pixels_per_sec) {
402 pixels_per_sec = 100;
403 continue;
405 break;
408 if (p->y_size) {
409 p->dft_size = 2 * (p->y_size - 1);
410 if (!is_p2(p->dft_size) && !effp->flow)
411 p->shared = rdft_init(p->dft_size);
412 } else {
413 int y = max(32, (p->Y_size? p->Y_size : 550) / effp->in_signal.channels - 2);
414 for (p->dft_size = 128; p->dft_size <= y; p->dft_size <<= 1);
417 /* Now that dft_size is set, allocate variable-sized elements of priv_t */
418 p->buf = lsx_calloc(p->dft_size, sizeof(*p->buf));
419 p->dft_buf = lsx_calloc(p->dft_size, sizeof(*p->dft_buf));
420 p->window = lsx_calloc(p->dft_size + 1, sizeof(*p->window));
421 p->magnitudes = lsx_calloc(p->dft_size / 2 + 1, sizeof(*p->magnitudes));
423 if (is_p2(p->dft_size) && !effp->flow)
424 lsx_safe_rdft(p->dft_size, 1, p->dft_buf);
426 lsx_debug("duration=%g x_size=%i pixels_per_sec=%g dft_size=%i",
427 duration, p->x_size, pixels_per_sec, p->dft_size);
429 p->end = p->dft_size;
430 p->rows = (p->dft_size >> 1) + 1;
431 actual = make_window(p, p->last_end = 0);
432 lsx_debug("window_density=%g", actual / p->dft_size);
433 p->step_size = (p->slack_overlap ? sqrt(actual * p->dft_size) : actual) + 0.5;
434 p->block_steps = effp->in_signal.rate / pixels_per_sec;
435 p->step_size =
436 p->block_steps / ceil((double)p->block_steps / p->step_size) + 0.5;
437 p->block_steps = floor((double)p->block_steps / p->step_size + 0.5);
438 p->block_norm = 1.0 / p->block_steps;
439 actual = effp->in_signal.rate / p->step_size / p->block_steps;
440 if (!effp->flow && actual != pixels_per_sec)
441 lsx_report("actual pixels/s = %g", actual);
442 lsx_debug("step_size=%i block_steps=%i", p->step_size, p->block_steps);
443 p->max = -p->dB_range;
444 p->read = (p->step_size - p->dft_size) / 2;
446 return SOX_SUCCESS;
449 static int do_column(sox_effect_t *effp)
451 priv_t *p = effp->priv;
452 int i;
454 if (p->cols == p->x_size) {
455 p->truncated = sox_true;
456 if (!effp->flow)
457 lsx_report("PNG truncated at %g seconds", secs(p->cols));
458 return p->truncate ? SOX_EOF : SOX_SUCCESS;
461 ++p->cols;
462 p->dBfs = lsx_realloc(p->dBfs, p->cols * p->rows * sizeof(*p->dBfs));
464 /* FIXME: allocate in larger steps (for several columns) */
465 for (i = 0; i < p->rows; ++i) {
466 double dBfs = 10 * log10(p->magnitudes[i] * p->block_norm);
467 p->dBfs[(p->cols - 1) * p->rows + i] = dBfs + p->gain;
468 p->max = max(dBfs, p->max);
471 memset(p->magnitudes, 0, p->rows * sizeof(*p->magnitudes));
472 p->block_num = 0;
474 return SOX_SUCCESS;
477 static int flow(sox_effect_t *effp,
478 const sox_sample_t *ibuf, sox_sample_t *obuf,
479 size_t *isamp, size_t *osamp)
481 priv_t *p = effp->priv;
482 size_t len = *isamp = *osamp = min(*isamp, *osamp);
483 int i;
485 memcpy(obuf, ibuf, len * sizeof(*obuf)); /* Pass on audio unaffected */
487 if (p->skip) {
488 if (p->skip >= len) {
489 p->skip -= len;
490 return SOX_SUCCESS;
492 ibuf += p->skip;
493 len -= p->skip;
494 p->skip = 0;
497 while (!p->truncated) {
498 if (p->read == p->step_size) {
499 memmove(p->buf, p->buf + p->step_size,
500 (p->dft_size - p->step_size) * sizeof(*p->buf));
501 p->read = 0;
504 for (; len && p->read < p->step_size; --len, ++p->read, --p->end)
505 p->buf[p->dft_size - p->step_size + p->read] =
506 SOX_SAMPLE_TO_FLOAT_64BIT(*ibuf++,);
508 if (p->read != p->step_size)
509 break;
511 if ((p->end = max(p->end, p->end_min)) != p->last_end)
512 make_window(p, p->last_end = p->end);
514 for (i = 0; i < p->dft_size; ++i)
515 p->dft_buf[i] = p->buf[i] * p->window[i];
517 if (is_p2(p->dft_size)) {
518 lsx_safe_rdft(p->dft_size, 1, p->dft_buf);
519 p->magnitudes[0] += sqr(p->dft_buf[0]);
521 for (i = 1; i < p->dft_size >> 1; ++i)
522 p->magnitudes[i] += sqr(p->dft_buf[2*i]) + sqr(p->dft_buf[2*i+1]);
524 p->magnitudes[p->dft_size >> 1] += sqr(p->dft_buf[1]);
526 else
527 rdft_p(*p->shared_ptr, p->dft_buf, p->magnitudes, p->dft_size);
529 if (++p->block_num == p->block_steps && do_column(effp) == SOX_EOF)
530 return SOX_EOF;
533 return SOX_SUCCESS;
536 static int drain(sox_effect_t *effp, sox_sample_t *obuf_, size_t *osamp)
538 priv_t *p = effp->priv;
540 if (!p->truncated) {
541 sox_sample_t * ibuf = lsx_calloc(p->dft_size, sizeof(*ibuf));
542 sox_sample_t * obuf = lsx_calloc(p->dft_size, sizeof(*obuf));
543 size_t isamp = (p->dft_size - p->step_size) / 2;
544 int left_over = (isamp + p->read) % p->step_size;
546 if (left_over >= p->step_size >> 1)
547 isamp += p->step_size - left_over;
549 lsx_debug("cols=%i left=%i end=%i", p->cols, p->read, p->end);
551 p->end = 0, p->end_min = -p->dft_size;
553 if (flow(effp, ibuf, obuf, &isamp, &isamp) == SOX_SUCCESS && p->block_num) {
554 p->block_norm *= (double)p->block_steps / p->block_num;
555 do_column(effp);
558 lsx_debug("flushed cols=%i left=%i end=%i", p->cols, p->read, p->end);
559 free(obuf);
560 free(ibuf);
563 *osamp = 0;
565 return SOX_SUCCESS;
568 enum {
569 Background,
570 Text,
571 Labels,
572 Grid,
573 fixed_palette
576 static unsigned colour(const priv_t *p, double x)
578 unsigned c = x < -p->dB_range ? 0 : x >= 0 ? p->spectrum_points - 1 :
579 1 + (1 + x / p->dB_range) * (p->spectrum_points - 2);
580 return fixed_palette + c;
583 static void make_palette(const priv_t *p, png_color *palette)
585 static const unsigned char black[] = { 0x00, 0x00, 0x00 };
586 static const unsigned char dgrey[] = { 0x3f, 0x3f, 0x3f };
587 static const unsigned char mgrey[] = { 0x7f, 0x7f, 0x7f };
588 static const unsigned char lgrey[] = { 0xbf, 0xbf, 0xbf };
589 static const unsigned char white[] = { 0xff, 0xff, 0xff };
590 static const unsigned char lbgnd[] = { 0xdd, 0xd8, 0xd0 };
591 static const unsigned char mbgnd[] = { 0xdf, 0xdf, 0xdf };
592 int i;
594 if (p->light_background) {
595 memcpy(palette++, p->monochrome ? mbgnd : lbgnd, 3);
596 memcpy(palette++, black, 3);
597 memcpy(palette++, dgrey, 3);
598 memcpy(palette++, dgrey, 3);
599 } else {
600 memcpy(palette++, black, 3);
601 memcpy(palette++, white, 3);
602 memcpy(palette++, lgrey, 3);
603 memcpy(palette++, mgrey, 3);
606 for (i = 0; i < p->spectrum_points; ++i) {
607 double c[3];
608 double x = (double)i / (p->spectrum_points - 1);
609 int at = p->light_background ? p->spectrum_points - 1 - i : i;
611 if (p->monochrome) {
612 c[2] = c[1] = c[0] = x;
613 if (p->high_colour) {
614 c[(1 + p->perm) % 3] = x < .4? 0 : 5 / 3. * (x - .4);
615 if (p->perm < 3)
616 c[(2 + p->perm) % 3] = x < .4? 0 : 5 / 3. * (x - .4);
618 palette[at].red = .5 + 255 * c[0];
619 palette[at].green= .5 + 255 * c[1];
620 palette[at].blue = .5 + 255 * c[2];
621 continue;
624 if (p->high_colour) {
625 static const int states[3][7] = {
626 { 4, 5, 0, 0, 2, 1, 1 },
627 { 0, 0, 2, 1, 1, 3, 2 },
628 { 4, 1, 1, 3, 0, 0, 2 },
630 int j, phase_num = min(7 * x, 6);
632 for (j = 0; j < 3; ++j) {
633 switch (states[j][phase_num]) {
634 case 0: c[j] = 0; break;
635 case 1: c[j] = 1; break;
636 case 2: c[j] = sin((7 * x - phase_num) * M_PI / 2); break;
637 case 3: c[j] = cos((7 * x - phase_num) * M_PI / 2); break;
638 case 4: c[j] = 7 * x - phase_num; break;
639 case 5: c[j] = 1 - (7 * x - phase_num); break;
642 } else if (p->alt_palette) {
643 int n = (double)i / (p->spectrum_points - 1) * (alt_palette_len - 1) + .5;
644 c[0] = alt_palette[3 * n + 0] / 255.;
645 c[1] = alt_palette[3 * n + 1] / 255.;
646 c[2] = alt_palette[3 * n + 2] / 255.;
647 } else {
648 if (x < .13) c[0] = 0;
649 else if (x < .73) c[0] = 1 * sin((x - .13) / .60 * M_PI / 2);
650 else c[0] = 1;
651 if (x < .60) c[1] = 0;
652 else if (x < .91) c[1] = 1 * sin((x - .60) / .31 * M_PI / 2);
653 else c[1] = 1;
654 if (x < .60) c[2] = .5 * sin((x - .00) / .60 * M_PI);
655 else if (x < .78) c[2] = 0;
656 else c[2] = (x - .78) / .22;
659 palette[at].red = .5 + 255 * c[p->perm % 3];
660 palette[at].green= .5 + 255 * c[(1 + p->perm + (p->perm % 2)) % 3];
661 palette[at].blue = .5 + 255 * c[(2 + p->perm - (p->perm % 2)) % 3];
665 static const Bytef fixed[] = {
666 0x78, 0xda, 0x65, 0x54, 0xa1, 0xb6, 0xa5, 0x30, 0x0c, 0x44, 0x56, 0x56,
667 0x3e, 0x59, 0xf9, 0x24, 0x72, 0x65, 0x25, 0x32, 0x9f, 0x80, 0x7c, 0x32,
668 0x12, 0x59, 0x59, 0x89, 0x44, 0x22, 0x2b, 0xdf, 0x27, 0x3c, 0x79, 0xe5,
669 0xca, 0xfd, 0x0c, 0x64, 0xe5, 0x66, 0x92, 0x94, 0xcb, 0x9e, 0x9d, 0x7b,
670 0xb8, 0xe4, 0x4c, 0xa7, 0x61, 0x9a, 0x04, 0xa6, 0xe9, 0x81, 0x64, 0x98,
671 0x92, 0xc4, 0x44, 0xf4, 0x5e, 0x20, 0xea, 0xf2, 0x53, 0x22, 0x6d, 0xe7,
672 0xc9, 0x9f, 0x9f, 0x17, 0x34, 0x4b, 0xa3, 0x98, 0x32, 0xb5, 0x1d, 0x0b,
673 0xf9, 0x3c, 0xf3, 0x79, 0xec, 0x5f, 0x96, 0x67, 0xec, 0x8c, 0x29, 0x65,
674 0x20, 0xa5, 0x38, 0xe1, 0x0f, 0x10, 0x4a, 0x34, 0x8d, 0x5b, 0x7a, 0x3c,
675 0xb9, 0xbf, 0xf7, 0x00, 0x33, 0x34, 0x40, 0x7f, 0xd8, 0x63, 0x97, 0x84,
676 0x20, 0x49, 0x72, 0x2e, 0x05, 0x24, 0x55, 0x80, 0xb0, 0x94, 0xd6, 0x53,
677 0x0f, 0x80, 0x3d, 0x5c, 0x6b, 0x10, 0x51, 0x41, 0xdc, 0x25, 0xe2, 0x10,
678 0x2a, 0xc3, 0x50, 0x9c, 0x89, 0xf6, 0x1e, 0x23, 0xf8, 0x52, 0xbe, 0x5f,
679 0xce, 0x73, 0x2d, 0xe5, 0x92, 0x44, 0x6c, 0x7a, 0xf3, 0x6d, 0x79, 0x2a,
680 0x3b, 0x8f, 0xfb, 0xe6, 0x7a, 0xb7, 0xe3, 0x9e, 0xf4, 0xa6, 0x9e, 0xf5,
681 0xa1, 0x39, 0xc5, 0x70, 0xdb, 0xb7, 0x13, 0x28, 0x87, 0xb5, 0xdb, 0x9b,
682 0xd5, 0x59, 0xe2, 0xa3, 0xb5, 0xef, 0xb2, 0x8d, 0xb3, 0x74, 0xb9, 0x24,
683 0xbe, 0x96, 0x65, 0x61, 0xb9, 0x2e, 0xf7, 0x26, 0xd0, 0xe7, 0x82, 0x5f,
684 0x9c, 0x17, 0xff, 0xe5, 0x92, 0xab, 0x3f, 0xe2, 0x32, 0xf4, 0x87, 0x79,
685 0xae, 0x9e, 0x12, 0x39, 0xd9, 0x1b, 0x0c, 0xfe, 0x97, 0xb6, 0x22, 0xee,
686 0xab, 0x6a, 0xf6, 0xf3, 0xe7, 0xdc, 0x55, 0x53, 0x1c, 0x5d, 0xf9, 0x3f,
687 0xad, 0xf9, 0xde, 0xfa, 0x7a, 0xb5, 0x76, 0x1c, 0x96, 0xa7, 0x1a, 0xd4,
688 0x8f, 0xdc, 0xdf, 0xcf, 0x55, 0x34, 0x0e, 0xce, 0x7b, 0x4e, 0xf8, 0x19,
689 0xf5, 0xef, 0x69, 0x4c, 0x99, 0x79, 0x1b, 0x79, 0xb4, 0x89, 0x44, 0x37,
690 0xdf, 0x04, 0xa4, 0xb1, 0x90, 0x44, 0xe6, 0x01, 0xb1, 0xef, 0xed, 0x3e,
691 0x03, 0xe2, 0x93, 0xf3, 0x00, 0xc3, 0xbf, 0x0c, 0x5b, 0x8c, 0x41, 0x2c,
692 0x70, 0x1c, 0x60, 0xad, 0xed, 0xf4, 0x1f, 0xfa, 0x94, 0xe2, 0xbf, 0x0c,
693 0x87, 0xad, 0x1e, 0x5f, 0x56, 0x07, 0x1c, 0xa1, 0x5e, 0xce, 0x57, 0xaf,
694 0x7f, 0x08, 0xa2, 0xc0, 0x1c, 0x0c, 0xbe, 0x1b, 0x3f, 0x2f, 0x39, 0x5f,
695 0xd9, 0x66, 0xe6, 0x1e, 0x15, 0x59, 0x29, 0x98, 0x31, 0xaf, 0xa1, 0x34,
696 0x7c, 0x1d, 0xf5, 0x9f, 0xe2, 0x34, 0x6b, 0x03, 0xa4, 0x03, 0xa2, 0xb1,
697 0x06, 0x08, 0xbd, 0x3e, 0x7a, 0x24, 0xf8, 0x8d, 0x3a, 0xb8, 0xf3, 0x77,
698 0x1e, 0x2f, 0xb5, 0x6b, 0x46, 0x0b, 0x10, 0x6f, 0x36, 0xa2, 0xc1, 0xf4,
699 0xde, 0x17, 0xd5, 0xaf, 0xd1, 0xf4, 0x66, 0x73, 0x99, 0x8d, 0x47, 0x1a,
700 0x3d, 0xaf, 0xc5, 0x44, 0x69, 0xc4, 0x5e, 0x7f, 0xc4, 0x52, 0xf4, 0x51,
701 0x3d, 0x95, 0xfb, 0x1b, 0xd0, 0xfd, 0xfd, 0xfa, 0x80, 0xdf, 0x1f, 0xfc,
702 0x7d, 0xdc, 0xdf, 0x10, 0xf4, 0xc8, 0x28, 0x5d, 0xc4, 0xb7, 0x62, 0x7f,
703 0xd6, 0x59, 0x72, 0x6a, 0xca, 0xbf, 0xfb, 0x9b, 0x1f, 0xe0,
706 static unsigned char *font;
708 #define font_x 5
709 #define font_y 12
710 #define font_X (font_x + 1)
712 #define pixel(x,y) pixels[(y) * cols + (x)]
713 #define print_at(x,y,c,t) print_at_(pixels,cols,x,y,c,t,0)
714 #define print_up(x,y,c,t) print_at_(pixels,cols,x,y,c,t,1)
716 static void print_at_(png_byte *pixels, int cols, int x, int y, int c,
717 const char *text, int orientation)
719 for (; *text; ++text) {
720 int pos = ((*text < ' ' || *text > '~'? '~' + 1 : *text) - ' ') * font_y;
721 int i, j;
723 for (i = 0; i < font_y; ++i) {
724 unsigned line = font[pos++];
725 for (j = 0; j < font_x; ++j, line <<= 1) {
726 if (line & 0x80) {
727 switch (orientation) {
728 case 0: pixel(x + j, y - i) = c; break;
729 case 1: pixel(x + i, y + j) = c; break;
735 switch (orientation) {
736 case 0: x += font_X; break;
737 case 1: y += font_X; break;
742 static int axis(double to, int max_steps, double *limit, char **prefix)
744 double scale = 1, step = max(1, 10 * to);
745 int i, prefix_num = 0;
747 if (max_steps) {
748 double try;
749 double log_10 = HUGE_VAL;
750 double min_step = (to *= 10) / max_steps;
752 for (i = 5; i; i >>= 1) {
753 if ((try = ceil(log10(min_step * i))) <= log_10) {
754 step = pow(10., log_10 = try) / i;
755 log_10 -= i > 1;
759 prefix_num = floor(log_10 / 3);
760 scale = pow(10., -3. * prefix_num);
763 *prefix = "pnum-kMGTPE" + prefix_num + (prefix_num? 4 : 11);
764 *limit = to * scale;
766 return step * scale + .5;
769 #define below 48
770 #define left 58
771 #define between 37
772 #define spectrum_width 14
773 #define right 35
775 static int stop(sox_effect_t *effp) /* only called, by end(), on flow 0 */
777 priv_t *p = effp->priv;
778 FILE *file;
779 uLong font_len = 96 * font_y;
780 int chans = effp->in_signal.channels;
781 int c_rows = p->rows * chans + chans - 1;
782 int rows = p->raw? c_rows : below + c_rows + 30 + 20 * !!p->title;
783 int cols = p->raw? p->cols : left + p->cols + between + spectrum_width + right;
784 png_byte *pixels = lsx_malloc(cols * rows * sizeof(*pixels));
785 png_bytepp png_rows = lsx_malloc(rows * sizeof(*png_rows));
786 png_structp png = png_create_write_struct(PNG_LIBPNG_VER_STRING, 0, 0,0);
787 png_infop png_info = png_create_info_struct(png);
788 png_color palette[256];
789 int i, j, k;
790 int base;
791 int step;
792 int tick_len = 3 - p->no_axes;
793 char text[200];
794 char *prefix;
795 double limit;
796 float autogain = 0.0; /* Is changed if the -n flag was supplied */
798 free(p->shared);
800 if (p->using_stdout) {
801 SET_BINARY_MODE(stdout);
802 file = stdout;
803 } else {
804 file = fopen(p->out_name, "wb");
805 if (!file) {
806 lsx_fail("failed to create `%s': %s", p->out_name, strerror(errno));
807 goto error;
811 lsx_debug("signal-max=%g", p->max);
813 font = lsx_malloc(font_len);
814 assert(uncompress(font, &font_len, fixed, sizeof(fixed)) == Z_OK);
816 make_palette(p, palette);
817 memset(pixels, Background, cols * rows * sizeof(*pixels));
819 png_init_io(png, file);
820 png_set_PLTE(png, png_info, palette, fixed_palette + p->spectrum_points);
821 png_set_IHDR(png, png_info, cols, rows, 8,
822 PNG_COLOR_TYPE_PALETTE, PNG_INTERLACE_NONE,
823 PNG_COMPRESSION_TYPE_DEFAULT, PNG_FILTER_TYPE_DEFAULT);
825 for (j = 0; j < rows; ++j) /* Put (0,0) at bottom-left of PNG */
826 png_rows[rows - 1 - j] = pixels + j * cols;
828 /* Spectrogram */
830 if (p->normalize)
831 /* values are already in dB, so we subtract the maximum value
832 * (which will normally be negative) to raise the maximum to 0.0.
834 autogain = -p->max;
836 for (k = 0; k < chans; ++k) {
837 priv_t *q = (effp - effp->flow + k)->priv;
839 if (p->normalize) {
840 float *fp = q->dBfs;
841 for (i = p->rows * p->cols; i > 0; i--)
842 *fp++ += autogain;
845 base = !p->raw * below + (chans - 1 - k) * (p->rows + 1);
847 for (j = 0; j < p->rows; ++j) {
848 for (i = 0; i < p->cols; ++i)
849 pixel(!p->raw * left + i, base + j) = colour(p, q->dBfs[i*p->rows + j]);
851 if (!p->raw && !p->no_axes) /* Y-axis lines */
852 pixel(left - 1, base + j) = pixel(left + p->cols, base + j) = Grid;
855 if (!p->raw && !p->no_axes) /* X-axis lines */
856 for (i = -1; i <= p->cols; ++i)
857 pixel(left + i, base - 1) = pixel(left + i, base + p->rows) = Grid;
860 if (!p->raw) {
861 if (p->title && (i = strlen(p->title) * font_X) < cols + 1) /* Title */
862 print_at((cols - i) / 2, rows - font_y, Text, p->title);
864 if (strlen(p->comment) * font_X < cols + 1) /* Footer comment */
865 print_at(1, font_y, Text, p->comment);
867 /* X-axis */
868 step = axis(secs(p->cols), p->cols / (font_X * 9 / 2), &limit, &prefix);
869 sprintf(text, "Time (%.1ss)", prefix); /* Axis label */
870 print_at(left + (p->cols - font_X * strlen(text)) / 2, 24, Text, text);
872 for (i = 0; i <= limit; i += step) {
873 int x = limit ? (double)i / limit * p->cols + .5 : 0;
874 int y;
876 for (y = 0; y < tick_len; ++y) /* Ticks */
877 pixel(left-1+x, below-1-y) = pixel(left-1+x, below+c_rows+y) = Grid;
879 if (step == 5 && (i%10))
880 continue;
882 sprintf(text, "%g", .1 * i); /* Tick labels */
883 x = left + x - 3 * strlen(text);
884 print_at(x, below - 6, Labels, text);
885 print_at(x, below + c_rows + 14, Labels, text);
888 /* Y-axis */
889 step = axis(effp->in_signal.rate / 2,
890 (p->rows - 1) / ((font_y * 3 + 1) >> 1), &limit, &prefix);
891 sprintf(text, "Frequency (%.1sHz)", prefix); /* Axis label */
892 print_up(10, below + (c_rows - font_X * strlen(text)) / 2, Text, text);
894 for (k = 0; k < chans; ++k) {
895 base = below + k * (p->rows + 1);
897 for (i = 0; i <= limit; i += step) {
898 int y = limit ? (double)i / limit * (p->rows - 1) + .5 : 0;
899 int x;
901 for (x = 0; x < tick_len; ++x) /* Ticks */
902 pixel(left-1-x, base+y) = pixel(left+p->cols+x, base+y) = Grid;
904 if ((step == 5 && (i%10)) || (!i && k && chans > 1))
905 continue;
907 sprintf(text, i?"%5g":" DC", .1 * i); /* Tick labels */
908 print_at(left - 4 - font_X * 5, base + y + 5, Labels, text);
909 sprintf(text, i?"%g":"DC", .1 * i);
910 print_at(left + p->cols + 6, base + y + 5, Labels, text);
914 /* Z-axis */
915 k = min(400, c_rows);
916 base = below + (c_rows - k) / 2;
917 print_at(cols - right - 2 - font_X, base - 13, Text, "dBFS");/* Axis label */
918 for (j = 0; j < k; ++j) { /* Spectrum */
919 png_byte b = colour(p, p->dB_range * (j / (k - 1.) - 1));
920 for (i = 0; i < spectrum_width; ++i)
921 pixel(cols - right - 1 - i, base + j) = b;
924 step = 10 * ceil(p->dB_range / 10. * (font_y + 2) / (k - 1));
926 for (i = 0; i <= p->dB_range; i += step) { /* (Tick) labels */
927 int y = (double)i / p->dB_range * (k - 1) + .5;
928 sprintf(text, "%+i", i - p->gain - p->dB_range - (int)(autogain+0.5));
929 print_at(cols - right + 1, base + y + 5, Labels, text);
933 free(font);
935 png_set_rows(png, png_info, png_rows);
936 png_write_png(png, png_info, PNG_TRANSFORM_IDENTITY, NULL);
938 if (!p->using_stdout)
939 fclose(file);
941 error:
942 png_destroy_write_struct(&png, &png_info);
943 free(png_rows);
944 free(pixels);
945 free(p->dBfs);
946 free(p->buf);
947 free(p->dft_buf);
948 free(p->window);
949 free(p->magnitudes);
951 return SOX_SUCCESS;
954 static int end(sox_effect_t *effp)
956 priv_t *p = effp->priv;
958 if (effp->flow == 0)
959 return stop(effp);
961 free(p->dBfs);
963 return SOX_SUCCESS;
966 const sox_effect_handler_t *lsx_spectrogram_effect_fn(void)
968 static sox_effect_handler_t handler = {
969 "spectrogram",
971 SOX_EFF_MODIFY,
972 getopts,
973 start,
974 flow,
975 drain,
976 end,
978 sizeof(priv_t)
980 static const char *lines[] = {
981 "[options]",
982 "\t-x num\tX-axis size in pixels; default derived or 800",
983 "\t-X num\tX-axis pixels/second; default derived or 100",
984 "\t-y num\tY-axis size in pixels (per channel); slow if not 1 + 2^n",
985 "\t-Y num\tY-height total (i.e. not per channel); default 550",
986 "\t-z num\tZ-axis range in dB; default 120",
987 "\t-Z num\tZ-axis maximum in dBFS; default 0",
988 "\t-n\tSet Z-axis maximum to the brightest pixel",
989 "\t-q num\tZ-axis quantisation (0 - 249); default 249",
990 "\t-w name\tWindow: Hann(default)/Hamming/Bartlett/Rectangular/Kaiser/Dolph",
991 "\t-W num\tWindow adjust parameter (-10 - 10); applies only to Kaiser/Dolph",
992 "\t-s\tSlack overlap of windows",
993 "\t-a\tSuppress axis lines",
994 "\t-r\tRaw spectrogram; no axes or legends",
995 "\t-l\tLight background",
996 "\t-m\tMonochrome",
997 "\t-h\tHigh colour",
998 "\t-p num\tPermute colours (1 - 6); default 1",
999 "\t-A\tAlternative, inferior, fixed colour-set (for compatibility only)",
1000 "\t-t text\tTitle text",
1001 "\t-c text\tComment text",
1002 "\t-o text\tOutput file name; default `spectrogram.png'",
1003 "\t-d time\tAudio duration to fit to X-axis; e.g. 1:00, 48",
1004 "\t-S position\tStart the spectrogram at the given input position",
1006 static char *usage;
1007 handler.usage = lsx_usage_lines(&usage, lines, array_length(lines));
1008 return &handler;