id3: fix COMM frame handling
[sox.git] / src / wav.c
blob3f6beb4517198a4a8499eb14d74976007ea41f73
1 /* libSoX microsoft's WAVE sound format handler
3 * Copyright 1998-2006 Chris Bagwell and SoX Contributors
4 * Copyright 1997 Graeme W. Gill, 93/5/17
5 * Copyright 1992 Rick Richardson
6 * Copyright 1991 Lance Norskog And Sundry Contributors
8 * Info for format tags can be found at:
9 * http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
13 #include "sox_i.h"
15 #include <string.h>
16 #include <stdlib.h>
17 #include <stdio.h>
19 #include "ima_rw.h"
20 #include "adpcm.h"
22 #ifdef HAVE_LIBGSM
23 #ifdef HAVE_GSM_GSM_H
24 #include <gsm/gsm.h>
25 #else
26 #include <gsm.h>
27 #endif
28 #endif
30 /* Magic length sometimes used to indicate unknown or too large size.
31 * When detected on inputs, disable any length logic.
33 #define MS_UNSPEC 0x7ffff000
35 #define WAVE_FORMAT_UNKNOWN 0x0000
36 #define WAVE_FORMAT_PCM 0x0001
37 #define WAVE_FORMAT_ADPCM 0x0002
38 #define WAVE_FORMAT_IEEE_FLOAT 0x0003
39 #define WAVE_FORMAT_IBM_CVSD 0x0005
40 #define WAVE_FORMAT_ALAW 0x0006
41 #define WAVE_FORMAT_MULAW 0x0007
42 #define WAVE_FORMAT_OKI_ADPCM 0x0010
43 #define WAVE_FORMAT_IMA_ADPCM 0x0011
44 #define WAVE_FORMAT_MEDIASPACE_ADPCM 0x0012
45 #define WAVE_FORMAT_SIERRA_ADPCM 0x0013
46 #define WAVE_FORMAT_G723_ADPCM 0x0014
47 #define WAVE_FORMAT_DIGISTD 0x0015
48 #define WAVE_FORMAT_DIGIFIX 0x0016
49 #define WAVE_FORMAT_YAMAHA_ADPCM 0x0020
50 #define WAVE_FORMAT_SONARC 0x0021
51 #define WAVE_FORMAT_TRUESPEECH 0x0022
52 #define WAVE_FORMAT_ECHOSC1 0x0023
53 #define WAVE_FORMAT_AUDIOFILE_AF36 0x0024
54 #define WAVE_FORMAT_APTX 0x0025
55 #define WAVE_FORMAT_AUDIOFILE_AF10 0x0026
56 #define WAVE_FORMAT_DOLBY_AC2 0x0030
57 #define WAVE_FORMAT_GSM610 0x0031
58 #define WAVE_FORMAT_ADPCME 0x0033
59 #define WAVE_FORMAT_CONTROL_RES_VQLPC 0x0034
60 #define WAVE_FORMAT_DIGIREAL 0x0035
61 #define WAVE_FORMAT_DIGIADPCM 0x0036
62 #define WAVE_FORMAT_CONTROL_RES_CR10 0x0037
63 #define WAVE_FORMAT_ROCKWELL_ADPCM 0x003b
64 #define WAVE_FORMAT_ROCKWELL_DIGITALK 0x003c
65 #define WAVE_FORMAT_G721_ADPCM 0x0040
66 #define WAVE_FORMAT_G728_CELP 0x0041
67 #define WAVE_FORMAT_MPEG 0x0050
68 #define WAVE_FORMAT_MPEGLAYER3 0x0055
69 #define WAVE_FORMAT_G726_ADPCM 0x0064
70 #define WAVE_FORMAT_G722_ADPCM 0x0065
71 #define WAVE_FORMAT_CREATIVE_ADPCM 0x0200
72 #define WAVE_FORMAT_CREATIVE_FSP8 0x0202
73 #define WAVE_FORMAT_CREATIVE_FSP10 0x0203
74 #define WAVE_FORMAT_FM_TOWNS_SND 0x0300
75 #define WAVE_FORMAT_OLIGSM 0x1000
76 #define WAVE_FORMAT_OLIADPCM 0x1001
77 #define WAVE_FORMAT_OLISBC 0x1003
78 #define WAVE_FORMAT_OLIOPR 0x1004
79 #define WAVE_FORMAT_EXTENSIBLE 0xfffe
81 /* To allow padding to samplesPerBlock. Works, but currently never true. */
82 static const size_t pad_nsamps = sox_false;
84 /* Private data for .wav file */
85 typedef struct {
86 /* samples/channel reading: starts at total count and decremented */
87 /* writing: starts at 0 and counts samples written */
88 uint64_t numSamples;
89 size_t dataLength; /* needed for ADPCM writing */
90 unsigned short formatTag; /* What type of encoding file is using */
91 unsigned short samplesPerBlock;
92 unsigned short blockAlign;
93 uint16_t bitsPerSample; /* bits per sample */
94 size_t dataStart; /* need to for seeking */
95 int ignoreSize; /* ignoreSize allows us to process 32-bit WAV files that are
96 * greater then 2 Gb and can't be represented by the
97 * 32-bit size field. */
98 /* FIXME: Have some front-end code which sets this flag. */
100 /* following used by *ADPCM wav files */
101 unsigned short nCoefs; /* ADPCM: number of coef sets */
102 short *lsx_ms_adpcm_i_coefs; /* ADPCM: coef sets */
103 void *ms_adpcm_data; /* Private data of adpcm decoder */
104 unsigned char *packet; /* Temporary buffer for packets */
105 short *samples; /* interleaved samples buffer */
106 short *samplePtr; /* Pointer to current sample */
107 short *sampleTop; /* End of samples-buffer */
108 unsigned short blockSamplesRemaining;/* Samples remaining per channel */
109 int state[16]; /* step-size info for *ADPCM writes */
111 #ifdef HAVE_LIBGSM
112 /* following used by GSM 6.10 wav */
113 gsm gsmhandle;
114 gsm_signal *gsmsample;
115 int gsmindex;
116 size_t gsmbytecount; /* counts bytes written to data block */
117 #endif
118 } priv_t;
120 struct wave_format {
121 uint16_t tag;
122 const char *name;
123 sox_encoding_t encoding;
124 int (*read_fmt)(sox_format_t *ft, uint32_t len);
127 static const char *wav_format_str(unsigned tag);
129 static int wavwritehdr(sox_format_t *, int);
131 /****************************************************************************/
132 /* IMA ADPCM Support Functions Section */
133 /****************************************************************************/
135 static int wav_ima_adpcm_fmt(sox_format_t *ft, uint32_t len)
137 priv_t *wav = ft->priv;
138 size_t bytesPerBlock;
139 int err;
141 if (wav->bitsPerSample != 4) {
142 lsx_fail_errno(ft, SOX_EOF,
143 "Can only handle 4-bit IMA ADPCM in wav files");
144 return SOX_EOF;
147 err = lsx_read_fields(ft, &len, "h", &wav->samplesPerBlock);
148 if (err)
149 return SOX_EOF;
151 bytesPerBlock = lsx_ima_bytes_per_block(ft->signal.channels,
152 wav->samplesPerBlock);
154 if (bytesPerBlock != wav->blockAlign || wav->samplesPerBlock % 8 != 1) {
155 lsx_fail_errno(ft, SOX_EOF,
156 "format[%s]: samplesPerBlock(%d) != blockAlign(%d)",
157 wav_format_str(wav->formatTag),
158 wav->samplesPerBlock, wav->blockAlign);
159 return SOX_EOF;
162 wav->packet = lsx_malloc(wav->blockAlign);
163 wav->samples =
164 lsx_malloc(ft->signal.channels * wav->samplesPerBlock * sizeof(short));
166 return SOX_SUCCESS;
171 * ImaAdpcmReadBlock - Grab and decode complete block of samples
174 static unsigned short ImaAdpcmReadBlock(sox_format_t * ft)
176 priv_t * wav = (priv_t *) ft->priv;
177 size_t bytesRead;
178 int samplesThisBlock;
180 /* Pull in the packet and check the header */
181 bytesRead = lsx_readbuf(ft, wav->packet, (size_t)wav->blockAlign);
182 samplesThisBlock = wav->samplesPerBlock;
183 if (bytesRead < wav->blockAlign)
185 /* If it looks like a valid header is around then try and */
186 /* work with partial blocks. Specs say it should be null */
187 /* padded but I guess this is better than trailing quiet. */
188 samplesThisBlock = lsx_ima_samples_in((size_t)0, (size_t)ft->signal.channels, bytesRead, (size_t) 0);
189 if (samplesThisBlock == 0 || samplesThisBlock > wav->samplesPerBlock)
191 lsx_warn("Premature EOF on .wav input file");
192 return 0;
196 wav->samplePtr = wav->samples;
198 /* For a full block, the following should be true: */
199 /* wav->samplesPerBlock = blockAlign - 8byte header + 1 sample in header */
200 lsx_ima_block_expand_i(ft->signal.channels, wav->packet, wav->samples, samplesThisBlock);
201 return samplesThisBlock;
205 /****************************************************************************/
206 /* MS ADPCM Support Functions Section */
207 /****************************************************************************/
209 static int wav_ms_adpcm_fmt(sox_format_t *ft, uint32_t len)
211 priv_t *wav = ft->priv;
212 size_t bytesPerBlock;
213 int i, errct = 0;
214 int err;
216 if (wav->bitsPerSample != 4) {
217 lsx_fail_errno(ft, SOX_EOF,
218 "Can only handle 4-bit MS ADPCM in wav files");
219 return SOX_EOF;
222 err = lsx_read_fields(ft, &len, "hh", &wav->samplesPerBlock, &wav->nCoefs);
223 if (err)
224 return SOX_EOF;
226 bytesPerBlock = lsx_ms_adpcm_bytes_per_block(ft->signal.channels,
227 wav->samplesPerBlock);
229 if (bytesPerBlock != wav->blockAlign) {
230 lsx_fail_errno(ft, SOX_EOF,
231 "format[%s]: samplesPerBlock(%d) != blockAlign(%d)",
232 wav_format_str(wav->formatTag),
233 wav->samplesPerBlock, wav->blockAlign);
234 return SOX_EOF;
237 if (wav->nCoefs < 7 || wav->nCoefs > 0x100) {
238 lsx_fail_errno(ft, SOX_EOF,
239 "ADPCM file nCoefs (%.4hx) makes no sense",
240 wav->nCoefs);
241 return SOX_EOF;
244 if (len < 4 * wav->nCoefs) {
245 lsx_fail_errno(ft, SOX_EOF, "wave header error: cbSize too small");
246 return SOX_EOF;
249 wav->packet = lsx_malloc(wav->blockAlign);
250 wav->samples =
251 lsx_malloc(ft->signal.channels * wav->samplesPerBlock * sizeof(short));
253 /* nCoefs, lsx_ms_adpcm_i_coefs used by adpcm.c */
254 wav->lsx_ms_adpcm_i_coefs = lsx_malloc(wav->nCoefs * 2 * sizeof(short));
255 wav->ms_adpcm_data = lsx_ms_adpcm_alloc(ft->signal.channels);
257 err = lsx_read_fields(ft, &len, "*h",
258 2 * wav->nCoefs, wav->lsx_ms_adpcm_i_coefs);
259 if (err)
260 return SOX_EOF;
262 for (i = 0; i < 14; i++)
263 errct += wav->lsx_ms_adpcm_i_coefs[i] != lsx_ms_adpcm_i_coef[i/2][i%2];
265 if (errct)
266 lsx_warn("base lsx_ms_adpcm_i_coefs differ in %d/14 positions", errct);
268 return SOX_SUCCESS;
273 * AdpcmReadBlock - Grab and decode complete block of samples
276 static unsigned short AdpcmReadBlock(sox_format_t * ft)
278 priv_t * wav = (priv_t *) ft->priv;
279 size_t bytesRead;
280 int samplesThisBlock;
281 const char *errmsg;
283 /* Pull in the packet and check the header */
284 bytesRead = lsx_readbuf(ft, wav->packet, (size_t) wav->blockAlign);
285 samplesThisBlock = wav->samplesPerBlock;
286 if (bytesRead < wav->blockAlign)
288 /* If it looks like a valid header is around then try and */
289 /* work with partial blocks. Specs say it should be null */
290 /* padded but I guess this is better than trailing quiet. */
291 samplesThisBlock = lsx_ms_adpcm_samples_in((size_t)0, (size_t)ft->signal.channels, bytesRead, (size_t)0);
292 if (samplesThisBlock == 0 || samplesThisBlock > wav->samplesPerBlock)
294 lsx_warn("Premature EOF on .wav input file");
295 return 0;
299 errmsg = lsx_ms_adpcm_block_expand_i(wav->ms_adpcm_data, ft->signal.channels, wav->nCoefs, wav->lsx_ms_adpcm_i_coefs, wav->packet, wav->samples, samplesThisBlock);
301 if (errmsg)
302 lsx_warn("%s", errmsg);
304 return samplesThisBlock;
307 /****************************************************************************/
308 /* Common ADPCM Write Function */
309 /****************************************************************************/
311 static int xxxAdpcmWriteBlock(sox_format_t * ft)
313 priv_t * wav = (priv_t *) ft->priv;
314 size_t chans, ct;
315 short *p;
317 chans = ft->signal.channels;
318 p = wav->samplePtr;
319 ct = p - wav->samples;
320 if (ct>=chans) {
321 /* zero-fill samples if needed to complete block */
322 for (p = wav->samplePtr; p < wav->sampleTop; p++) *p=0;
323 /* compress the samples to wav->packet */
324 if (wav->formatTag == WAVE_FORMAT_ADPCM) {
325 lsx_ms_adpcm_block_mash_i((unsigned) chans, wav->samples, wav->samplesPerBlock, wav->state, wav->packet, wav->blockAlign);
326 }else{ /* WAVE_FORMAT_IMA_ADPCM */
327 lsx_ima_block_mash_i((unsigned) chans, wav->samples, wav->samplesPerBlock, wav->state, wav->packet, 9);
329 /* write the compressed packet */
330 if (lsx_writebuf(ft, wav->packet, (size_t) wav->blockAlign) != wav->blockAlign)
332 lsx_fail_errno(ft,SOX_EOF,"write error");
333 return (SOX_EOF);
335 /* update lengths and samplePtr */
336 wav->dataLength += wav->blockAlign;
337 if (pad_nsamps)
338 wav->numSamples += wav->samplesPerBlock;
339 else
340 wav->numSamples += ct/chans;
341 wav->samplePtr = wav->samples;
343 return (SOX_SUCCESS);
346 #ifdef HAVE_LIBGSM
347 /****************************************************************************/
348 /* WAV GSM6.10 support functions */
349 /****************************************************************************/
351 static int wav_gsm_fmt(sox_format_t *ft, uint32_t len)
353 priv_t *wav = ft->priv;
354 int err;
356 err = lsx_read_fields(ft, &len, "h", &wav->samplesPerBlock);
357 if (err)
358 return SOX_EOF;
360 if (wav->blockAlign != 65) {
361 lsx_fail_errno(ft, SOX_EOF, "format[%s]: expects blockAlign(%d) = %d",
362 wav_format_str(wav->formatTag), wav->blockAlign, 65);
363 return SOX_EOF;
366 if (wav->samplesPerBlock != 320) {
367 lsx_fail_errno(ft, SOX_EOF,
368 "format[%s]: expects samplesPerBlock(%d) = %d",
369 wav_format_str(wav->formatTag),
370 wav->samplesPerBlock, 320);
371 return SOX_EOF;
374 return SOX_SUCCESS;
377 /* create the gsm object, malloc buffer for 160*2 samples */
378 static int wavgsminit(sox_format_t * ft)
380 int valueP=1;
381 priv_t * wav = (priv_t *) ft->priv;
382 wav->gsmbytecount=0;
383 wav->gsmhandle=gsm_create();
384 if (!wav->gsmhandle)
386 lsx_fail_errno(ft,SOX_EOF,"cannot create GSM object");
387 return (SOX_EOF);
390 if(gsm_option(wav->gsmhandle,GSM_OPT_WAV49,&valueP) == -1){
391 lsx_fail_errno(ft,SOX_EOF,"error setting gsm_option for WAV49 format. Recompile gsm library with -DWAV49 option and relink sox");
392 return (SOX_EOF);
395 wav->gsmsample=lsx_malloc(sizeof(gsm_signal)*160*2);
396 wav->gsmindex=0;
397 return (SOX_SUCCESS);
400 /*destroy the gsm object and free the buffer */
401 static void wavgsmdestroy(sox_format_t * ft)
403 priv_t * wav = (priv_t *) ft->priv;
404 gsm_destroy(wav->gsmhandle);
405 free(wav->gsmsample);
408 static size_t wavgsmread(sox_format_t * ft, sox_sample_t *buf, size_t len)
410 priv_t * wav = (priv_t *) ft->priv;
411 size_t done=0;
412 int bytes;
413 gsm_byte frame[65];
415 ft->sox_errno = SOX_SUCCESS;
417 /* copy out any samples left from the last call */
418 while(wav->gsmindex && (wav->gsmindex<160*2) && (done < len))
419 buf[done++]=SOX_SIGNED_16BIT_TO_SAMPLE(wav->gsmsample[wav->gsmindex++],);
421 /* read and decode loop, possibly leaving some samples in wav->gsmsample */
422 while (done < len) {
423 wav->gsmindex=0;
424 bytes = lsx_readbuf(ft, frame, (size_t)65);
425 if (bytes <=0)
426 return done;
427 if (bytes<65) {
428 lsx_warn("invalid wav gsm frame size: %d bytes",bytes);
429 return done;
431 /* decode the long 33 byte half */
432 if(gsm_decode(wav->gsmhandle,frame, wav->gsmsample)<0)
434 lsx_fail_errno(ft,SOX_EOF,"error during gsm decode");
435 return 0;
437 /* decode the short 32 byte half */
438 if(gsm_decode(wav->gsmhandle,frame+33, wav->gsmsample+160)<0)
440 lsx_fail_errno(ft,SOX_EOF,"error during gsm decode");
441 return 0;
444 while ((wav->gsmindex <160*2) && (done < len)){
445 buf[done++]=SOX_SIGNED_16BIT_TO_SAMPLE(wav->gsmsample[(wav->gsmindex)++],);
449 return done;
452 static int wavgsmflush(sox_format_t * ft)
454 gsm_byte frame[65];
455 priv_t * wav = (priv_t *) ft->priv;
457 /* zero fill as needed */
458 while(wav->gsmindex<160*2)
459 wav->gsmsample[wav->gsmindex++]=0;
461 /*encode the even half short (32 byte) frame */
462 gsm_encode(wav->gsmhandle, wav->gsmsample, frame);
463 /*encode the odd half long (33 byte) frame */
464 gsm_encode(wav->gsmhandle, wav->gsmsample+160, frame+32);
465 if (lsx_writebuf(ft, frame, (size_t) 65) != 65)
467 lsx_fail_errno(ft,SOX_EOF,"write error");
468 return (SOX_EOF);
470 wav->gsmbytecount += 65;
472 wav->gsmindex = 0;
473 return (SOX_SUCCESS);
476 static size_t wavgsmwrite(sox_format_t * ft, const sox_sample_t *buf, size_t len)
478 priv_t * wav = (priv_t *) ft->priv;
479 size_t done = 0;
480 int rc;
482 ft->sox_errno = SOX_SUCCESS;
484 while (done < len) {
485 SOX_SAMPLE_LOCALS;
486 while ((wav->gsmindex < 160*2) && (done < len))
487 wav->gsmsample[(wav->gsmindex)++] =
488 SOX_SAMPLE_TO_SIGNED_16BIT(buf[done++], ft->clips);
490 if (wav->gsmindex < 160*2)
491 break;
493 rc = wavgsmflush(ft);
494 if (rc)
495 return 0;
497 return done;
501 static void wavgsmstopwrite(sox_format_t * ft)
503 priv_t * wav = (priv_t *) ft->priv;
505 ft->sox_errno = SOX_SUCCESS;
507 if (wav->gsmindex)
508 wavgsmflush(ft);
510 /* Add a pad byte if amount of written bytes is not even. */
511 if (wav->gsmbytecount && wav->gsmbytecount % 2){
512 if(lsx_writeb(ft, 0))
513 lsx_fail_errno(ft,SOX_EOF,"write error");
514 else
515 wav->gsmbytecount += 1;
518 wavgsmdestroy(ft);
521 #endif /* HAVE_LIBGSM */
523 /****************************************************************************/
524 /* General Sox WAV file code */
525 /****************************************************************************/
527 static int wav_pcm_fmt(sox_format_t *ft, uint32_t len)
529 priv_t *wav = ft->priv;
530 int bps = (wav->bitsPerSample + 7) / 8;
532 if (bps == 1) {
533 ft->encoding.encoding = SOX_ENCODING_UNSIGNED;
534 } else if (bps <= 4) {
535 ft->encoding.encoding = SOX_ENCODING_SIGN2;
536 } else {
537 lsx_fail_errno(ft, SOX_EFMT, "%d bytes per sample not suppored", bps);
538 return SOX_EOF;
541 return SOX_SUCCESS;
544 static const struct wave_format wave_formats[] = {
545 { WAVE_FORMAT_UNKNOWN, "Unknown Wave Type" },
546 { WAVE_FORMAT_PCM, "PCM",
547 SOX_ENCODING_UNKNOWN,
548 wav_pcm_fmt,
550 { WAVE_FORMAT_ADPCM, "Microsoft ADPCM",
551 SOX_ENCODING_MS_ADPCM,
552 wav_ms_adpcm_fmt,
554 { WAVE_FORMAT_IEEE_FLOAT, "IEEE Float",
555 SOX_ENCODING_FLOAT },
556 { WAVE_FORMAT_IBM_CVSD, "Digispeech CVSD" },
557 { WAVE_FORMAT_ALAW, "CCITT A-law",
558 SOX_ENCODING_ALAW },
559 { WAVE_FORMAT_MULAW, "CCITT u-law",
560 SOX_ENCODING_ULAW },
561 { WAVE_FORMAT_OKI_ADPCM, "OKI ADPCM" },
562 { WAVE_FORMAT_IMA_ADPCM, "IMA ADPCM",
563 SOX_ENCODING_IMA_ADPCM,
564 wav_ima_adpcm_fmt,
566 { WAVE_FORMAT_MEDIASPACE_ADPCM, "MediaSpace ADPCM" },
567 { WAVE_FORMAT_SIERRA_ADPCM, "Sierra ADPCM" },
568 { WAVE_FORMAT_G723_ADPCM, "G.723 ADPCM" },
569 { WAVE_FORMAT_DIGISTD, "DIGISTD" },
570 { WAVE_FORMAT_DIGIFIX, "DigiFix" },
571 { WAVE_FORMAT_YAMAHA_ADPCM, "Yamaha ADPCM" },
572 { WAVE_FORMAT_SONARC, "Sonarc" },
573 { WAVE_FORMAT_TRUESPEECH, "Truespeech" },
574 { WAVE_FORMAT_ECHOSC1, "ECHO SC-1", },
575 { WAVE_FORMAT_AUDIOFILE_AF36, "Audio File AF36" },
576 { WAVE_FORMAT_APTX, "aptX" },
577 { WAVE_FORMAT_AUDIOFILE_AF10, "Audio File AF10" },
578 { WAVE_FORMAT_DOLBY_AC2, "Dolby AC-2" },
579 { WAVE_FORMAT_GSM610, "GSM 6.10",
580 #ifdef HAVE_LIBGSM
581 SOX_ENCODING_GSM,
582 wav_gsm_fmt,
583 #endif
585 { WAVE_FORMAT_ADPCME, "Antex ADPCME" },
586 { WAVE_FORMAT_CONTROL_RES_VQLPC, "Control Resources VQLPC" },
587 { WAVE_FORMAT_DIGIREAL, "DSP Solutions REAL" },
588 { WAVE_FORMAT_DIGIADPCM, "DSP Solutions ADPCM" },
589 { WAVE_FORMAT_CONTROL_RES_CR10, "Control Resources CR10" },
590 { WAVE_FORMAT_ROCKWELL_ADPCM, "Rockwell ADPCM" },
591 { WAVE_FORMAT_ROCKWELL_DIGITALK, "Rockwell DIGITALK" },
592 { WAVE_FORMAT_G721_ADPCM, "G.721 ADPCM" },
593 { WAVE_FORMAT_G728_CELP, "G.728 CELP" },
594 { WAVE_FORMAT_MPEG, "MPEG-1 Audio" },
595 { WAVE_FORMAT_MPEGLAYER3, "MPEG-1 Layer 3" },
596 { WAVE_FORMAT_G726_ADPCM, "G.726 ADPCM" },
597 { WAVE_FORMAT_G722_ADPCM, "G.722 ADPCM" },
598 { WAVE_FORMAT_CREATIVE_ADPCM, "Creative Labs ADPCM" },
599 { WAVE_FORMAT_CREATIVE_FSP8, "Creative Labs FastSpeech 8" },
600 { WAVE_FORMAT_CREATIVE_FSP10, "Creative Labs FastSpeech 10" },
601 { WAVE_FORMAT_FM_TOWNS_SND, "Fujitsu FM Towns SND" },
602 { WAVE_FORMAT_OLIGSM, "Olivetti GSM" },
603 { WAVE_FORMAT_OLIADPCM, "Olivetti ADPCM" },
604 { WAVE_FORMAT_OLISBC, "Olivetti CELP" },
605 { WAVE_FORMAT_OLIOPR, "Olivetti OPR" },
609 static const struct wave_format *wav_find_format(unsigned tag)
611 const struct wave_format *f;
613 for (f = wave_formats; f->name; f++)
614 if (f->tag == tag)
615 return f;
617 return NULL;
620 static int wavfail(sox_format_t *ft, int tag, const char *name)
622 if (name)
623 lsx_fail_errno(ft, SOX_EHDR, "WAVE format '%s' (%04x) not supported",
624 name, tag);
625 else
626 lsx_fail_errno(ft, SOX_EHDR, "Unknown WAVE format %04x", tag);
628 return SOX_EOF;
631 static int wav_read_fmt(sox_format_t *ft, uint32_t len)
633 priv_t *wav = ft->priv;
634 uint16_t wChannels; /* number of channels */
635 uint32_t dwSamplesPerSecond; /* samples per second per channel */
636 uint32_t dwAvgBytesPerSec; /* estimate of bytes per second needed */
637 uint16_t wExtSize = 0; /* extended field for non-PCM */
638 const struct wave_format *fmt;
639 sox_encoding_t user_enc = ft->encoding.encoding;
640 int err;
642 if (len < 16) {
643 lsx_fail_errno(ft, SOX_EHDR, "WAVE file fmt chunk is too short");
644 return SOX_EOF;
647 err = lsx_read_fields(ft, &len, "hhiihh",
648 &wav->formatTag,
649 &wChannels,
650 &dwSamplesPerSecond,
651 &dwAvgBytesPerSec,
652 &wav->blockAlign,
653 &wav->bitsPerSample);
654 if (err)
655 return SOX_EOF;
657 /* non-PCM formats except alaw and mulaw formats have extended fmt chunk.
658 * Check for those cases.
660 if (wav->formatTag != WAVE_FORMAT_PCM &&
661 wav->formatTag != WAVE_FORMAT_ALAW &&
662 wav->formatTag != WAVE_FORMAT_MULAW &&
663 len < 2)
664 lsx_warn("WAVE file missing extended part of fmt chunk");
666 if (len >= 2) {
667 err = lsx_read_fields(ft, &len, "h", &wExtSize);
668 if (err)
669 return SOX_EOF;
672 if (wExtSize != len) {
673 lsx_fail_errno(ft, SOX_EOF,
674 "WAVE header error: cbSize inconsistent with fmt size");
675 return SOX_EOF;
678 if (wav->formatTag == WAVE_FORMAT_EXTENSIBLE) {
679 uint16_t numberOfValidBits;
680 uint32_t speakerPositionMask;
681 uint16_t subFormatTag;
683 if (len < 22) {
684 lsx_fail_errno(ft, SOX_EHDR, "WAVE file fmt chunk is too short");
685 return SOX_EOF;
688 err = lsx_read_fields(ft, &len, "hih14x",
689 &numberOfValidBits,
690 &speakerPositionMask,
691 &subFormatTag);
692 if (err)
693 return SOX_EOF;
695 if (numberOfValidBits > wav->bitsPerSample) {
696 lsx_fail_errno(ft, SOX_EHDR,
697 "wValidBitsPerSample > wBitsPerSample");
698 return SOX_EOF;
701 wav->formatTag = subFormatTag;
702 lsx_report("EXTENSIBLE");
705 /* User options take precedence */
706 if (ft->signal.channels == 0 || ft->signal.channels == wChannels)
707 ft->signal.channels = wChannels;
708 else
709 lsx_report("User options overriding channels read in .wav header");
711 if (ft->signal.channels == 0) {
712 lsx_fail_errno(ft, SOX_EHDR, "Channel count is zero");
713 return SOX_EOF;
716 if (ft->signal.rate == 0 || ft->signal.rate == dwSamplesPerSecond)
717 ft->signal.rate = dwSamplesPerSecond;
718 else
719 lsx_report("User options overriding rate read in .wav header");
721 fmt = wav_find_format(wav->formatTag);
722 if (!fmt)
723 return wavfail(ft, wav->formatTag, NULL);
725 /* format handler might override */
726 ft->encoding.encoding = fmt->encoding;
728 if (fmt->read_fmt) {
729 if (fmt->read_fmt(ft, len))
730 return SOX_EOF;
731 } else if (!fmt->encoding) {
732 return wavfail(ft, wav->formatTag, fmt->name);
735 /* User options take precedence */
736 if (!ft->encoding.bits_per_sample ||
737 ft->encoding.bits_per_sample == wav->bitsPerSample)
738 ft->encoding.bits_per_sample = wav->bitsPerSample;
739 else
740 lsx_warn("User options overriding size read in .wav header");
742 if (user_enc && user_enc != ft->encoding.encoding) {
743 lsx_report("User options overriding encoding read in .wav header");
744 ft->encoding.encoding = user_enc;
747 return 0;
750 static sox_bool valid_chunk_id(const char p[4])
752 int i;
754 for (i = 0; i < 4; i++)
755 if (p[i] < 0x20 || p[i] > 0x7f)
756 return sox_false;
758 return sox_true;
761 static int read_chunk_header(sox_format_t *ft, char tag[4], uint32_t *len)
763 int r;
765 r = lsx_readbuf(ft, tag, 4);
766 if (r < 4)
767 return SOX_EOF;
769 return lsx_readdw(ft, len);
773 * Do anything required before you start reading samples.
774 * Read file header.
775 * Find out sampling rate,
776 * size and encoding of samples,
777 * mono/stereo/quad.
779 static int startread(sox_format_t *ft)
781 priv_t *wav = ft->priv;
782 char magic[5] = { 0 };
783 uint32_t clen;
784 int err;
786 sox_bool isRF64 = sox_false;
787 uint64_t ds64_riff_size;
788 uint64_t ds64_data_size;
789 uint64_t ds64_sample_count;
791 /* wave file characteristics */
792 uint64_t qwRiffLength;
793 uint64_t qwDataLength = 0;
794 sox_bool have_fmt = sox_false;
796 ft->sox_errno = SOX_SUCCESS;
797 wav->ignoreSize = ft->signal.length == SOX_IGNORE_LENGTH;
798 ft->encoding.reverse_bytes = MACHINE_IS_BIGENDIAN;
800 if (read_chunk_header(ft, magic, &clen))
801 return SOX_EOF;
803 if (!memcmp(magic, "RIFX", 4)) {
804 lsx_debug("Found RIFX header");
805 ft->encoding.reverse_bytes = MACHINE_IS_LITTLEENDIAN;
806 } else if (!memcmp(magic, "RF64", 4)) {
807 lsx_debug("Found RF64 header");
808 isRF64 = sox_true;
809 } else if (memcmp(magic, "RIFF", 4)) {
810 lsx_fail_errno(ft, SOX_EHDR, "WAVE: RIFF header not found");
811 return SOX_EOF;
814 qwRiffLength = clen;
816 if (lsx_readbuf(ft, magic, 4) < 4 || memcmp(magic, "WAVE", 4)) {
817 lsx_fail_errno(ft, SOX_EHDR, "WAVE header not found");
818 return SOX_EOF;
821 while (!read_chunk_header(ft, magic, &clen)) {
822 uint32_t len = clen;
823 off_t cstart = lsx_tell(ft);
824 off_t pos;
826 if (!valid_chunk_id(magic)) {
827 lsx_fail_errno(ft, SOX_EHDR, "invalid chunk ID found");
828 return SOX_EOF;
831 lsx_debug("Found chunk '%s', size %u", magic, clen);
833 if (!memcmp(magic, "ds64", 4)) {
834 if (!isRF64)
835 lsx_warn("ds64 chunk in non-RF64 file");
837 if (clen < 28) {
838 lsx_fail_errno(ft, SOX_EHDR, "ds64 chunk too small");
839 return SOX_EOF;
842 if (clen == 32) {
843 lsx_warn("ds64 chunk size invalid, attempting workaround");
844 clen = 28;
847 err = lsx_read_fields(ft, &len, "qqq",
848 &ds64_riff_size,
849 &ds64_data_size,
850 &ds64_sample_count);
851 if (err)
852 return SOX_EOF;
854 goto next;
857 if (!memcmp(magic, "fmt ", 4)) {
858 err = wav_read_fmt(ft, clen);
859 if (err)
860 return err;
862 have_fmt = sox_true;
864 goto next;
867 if (!memcmp(magic, "fact", 4)) {
868 uint32_t val;
870 err = lsx_read_fields(ft, &len, "i", &val);
871 if (err)
872 return SOX_EOF;
874 wav->numSamples = val;
876 goto next;
879 if (!memcmp(magic, "data", 4)) {
880 if (isRF64 && clen == UINT32_MAX)
881 clen = ds64_data_size;
883 qwDataLength = clen;
884 wav->dataStart = lsx_tell(ft);
886 if (qwDataLength == UINT32_MAX || qwDataLength == MS_UNSPEC)
887 break;
889 if (!ft->seekable)
890 break;
892 goto next;
895 next:
896 pos = lsx_tell(ft);
897 clen += clen & 1;
899 if (pos > cstart + clen) {
900 lsx_fail_errno(ft, SOX_EHDR, "malformed chunk %s", magic);
901 return SOX_EOF;
904 err = lsx_seeki(ft, cstart + clen - pos, SEEK_CUR);
905 if (err)
906 return SOX_EOF;
909 if (isRF64) {
910 if (wav->numSamples == UINT32_MAX)
911 wav->numSamples = ds64_sample_count;
913 if (qwRiffLength == UINT32_MAX)
914 qwRiffLength = ds64_riff_size;
917 if (!have_fmt) {
918 lsx_fail_errno(ft, SOX_EOF, "fmt chunk not found");
919 return SOX_EOF;
922 if (!wav->dataStart) {
923 lsx_fail_errno(ft, SOX_EOF, "data chunk not found");
924 return SOX_EOF;
927 if (ft->seekable)
928 lsx_seeki(ft, wav->dataStart, SEEK_SET);
930 /* some files wrongly report total samples across all channels */
931 if (wav->numSamples * wav->blockAlign == qwDataLength * ft->signal.channels)
932 wav->numSamples /= ft->signal.channels;
934 if ((qwDataLength == UINT32_MAX && !wav->numSamples) ||
935 qwDataLength == MS_UNSPEC) {
936 lsx_warn("WAV data length is magic value or UINT32_MAX, ignoring");
937 wav->ignoreSize = 1;
940 switch (wav->formatTag) {
941 case WAVE_FORMAT_ADPCM:
942 wav->numSamples =
943 lsx_ms_adpcm_samples_in(qwDataLength, ft->signal.channels,
944 wav->blockAlign, wav->samplesPerBlock);
945 wav->blockSamplesRemaining = 0; /* Samples left in buffer */
946 break;
948 case WAVE_FORMAT_IMA_ADPCM:
949 /* Compute easiest part of number of samples. For every block, there
950 are samplesPerBlock samples to read. */
951 wav->numSamples =
952 lsx_ima_samples_in(qwDataLength, ft->signal.channels,
953 wav->blockAlign, wav->samplesPerBlock);
954 wav->blockSamplesRemaining = 0; /* Samples left in buffer */
955 lsx_ima_init_table();
956 break;
958 #ifdef HAVE_LIBGSM
959 case WAVE_FORMAT_GSM610:
960 wav->numSamples = qwDataLength / wav->blockAlign * wav->samplesPerBlock;
961 wavgsminit(ft);
962 break;
963 #endif
966 if (!wav->numSamples)
967 wav->numSamples = div_bits(qwDataLength, ft->encoding.bits_per_sample)
968 / ft->signal.channels;
970 if (wav->ignoreSize)
971 ft->signal.length = SOX_UNSPEC;
972 else
973 ft->signal.length = wav->numSamples * ft->signal.channels;
975 return lsx_rawstartread(ft);
980 * Read up to len samples from file.
981 * Convert to signed longs.
982 * Place in buf[].
983 * Return number of samples read.
986 static size_t read_samples(sox_format_t *ft, sox_sample_t *buf, size_t len)
988 priv_t *wav = ft->priv;
989 size_t done;
991 ft->sox_errno = SOX_SUCCESS;
993 if (!wav->ignoreSize)
994 len = min(len, wav->numSamples * ft->signal.channels);
996 /* If file is in ADPCM encoding then read in multiple blocks else */
997 /* read as much as possible and return quickly. */
998 switch (ft->encoding.encoding) {
999 case SOX_ENCODING_IMA_ADPCM:
1000 case SOX_ENCODING_MS_ADPCM:
1001 done = 0;
1002 while (done < len) { /* Still want data? */
1003 short *p, *top;
1004 size_t ct;
1006 /* See if need to read more from disk */
1007 if (wav->blockSamplesRemaining == 0) {
1008 if (wav->formatTag == WAVE_FORMAT_IMA_ADPCM)
1009 wav->blockSamplesRemaining = ImaAdpcmReadBlock(ft);
1010 else
1011 wav->blockSamplesRemaining = AdpcmReadBlock(ft);
1013 if (wav->blockSamplesRemaining == 0) {
1014 /* Don't try to read any more samples */
1015 wav->numSamples = 0;
1016 return done;
1018 wav->samplePtr = wav->samples;
1021 /* Copy interleaved data into buf, converting to sox_sample_t */
1022 ct = len - done;
1023 if (ct > wav->blockSamplesRemaining * ft->signal.channels)
1024 ct = wav->blockSamplesRemaining * ft->signal.channels;
1026 done += ct;
1027 wav->blockSamplesRemaining -= ct / ft->signal.channels;
1028 p = wav->samplePtr;
1029 top = p + ct;
1031 /* Output is already signed */
1032 while (p < top)
1033 *buf++ = SOX_SIGNED_16BIT_TO_SAMPLE(*p++,);
1035 wav->samplePtr = p;
1038 /* "done" for ADPCM equals total data processed and not
1039 * total samples procesed. The only way to take care of that
1040 * is to return here and not fall thru.
1042 wav->numSamples -= done / ft->signal.channels;
1044 return done;
1046 #ifdef HAVE_LIBGSM
1047 case SOX_ENCODING_GSM:
1048 done = wavgsmread(ft, buf, len);
1049 break;
1050 #endif
1052 default: /* assume PCM or float encoding */
1053 done = lsx_rawread(ft, buf, len);
1054 break;
1057 if (done == 0 && wav->numSamples && !wav->ignoreSize)
1058 lsx_warn("Premature EOF on .wav input file");
1060 /* Only return buffers that contain a totally playable
1061 * amount of audio.
1063 done -= done % ft->signal.channels;
1065 if (done / ft->signal.channels > wav->numSamples)
1066 wav->numSamples = 0;
1067 else
1068 wav->numSamples -= done / ft->signal.channels;
1070 return done;
1074 * Do anything required when you stop reading samples.
1075 * Don't close input file!
1077 static int stopread(sox_format_t * ft)
1079 priv_t * wav = (priv_t *) ft->priv;
1081 ft->sox_errno = SOX_SUCCESS;
1083 free(wav->packet);
1084 free(wav->samples);
1085 free(wav->lsx_ms_adpcm_i_coefs);
1086 free(wav->ms_adpcm_data);
1088 switch (ft->encoding.encoding)
1090 #ifdef HAVE_LIBGSM
1091 case SOX_ENCODING_GSM:
1092 wavgsmdestroy(ft);
1093 break;
1094 #endif
1095 case SOX_ENCODING_IMA_ADPCM:
1096 case SOX_ENCODING_MS_ADPCM:
1097 break;
1098 default:
1099 break;
1101 return SOX_SUCCESS;
1104 static int startwrite(sox_format_t * ft)
1106 priv_t * wav = (priv_t *) ft->priv;
1107 int rc;
1109 ft->sox_errno = SOX_SUCCESS;
1111 if (ft->encoding.encoding != SOX_ENCODING_MS_ADPCM &&
1112 ft->encoding.encoding != SOX_ENCODING_IMA_ADPCM &&
1113 ft->encoding.encoding != SOX_ENCODING_GSM)
1115 rc = lsx_rawstartwrite(ft);
1116 if (rc)
1117 return rc;
1120 wav->numSamples = 0;
1121 wav->dataLength = 0;
1122 if (!ft->signal.length && !ft->seekable)
1123 lsx_warn("Length in output .wav header will be wrong since can't seek to fix it");
1125 rc = wavwritehdr(ft, 0); /* also calculates various wav->* info */
1126 if (rc != 0)
1127 return rc;
1129 wav->packet = NULL;
1130 wav->samples = NULL;
1131 wav->lsx_ms_adpcm_i_coefs = NULL;
1132 switch (wav->formatTag)
1134 size_t ch, sbsize;
1136 case WAVE_FORMAT_IMA_ADPCM:
1137 lsx_ima_init_table();
1138 /* intentional case fallthru! */
1139 case WAVE_FORMAT_ADPCM:
1140 /* #channels already range-checked for overflow in wavwritehdr() */
1141 for (ch=0; ch<ft->signal.channels; ch++)
1142 wav->state[ch] = 0;
1143 sbsize = ft->signal.channels * wav->samplesPerBlock;
1144 wav->packet = lsx_malloc((size_t)wav->blockAlign);
1145 wav->samples = lsx_malloc(sbsize*sizeof(short));
1146 wav->sampleTop = wav->samples + sbsize;
1147 wav->samplePtr = wav->samples;
1148 break;
1150 #ifdef HAVE_LIBGSM
1151 case WAVE_FORMAT_GSM610:
1152 return wavgsminit(ft);
1153 #endif
1155 default:
1156 break;
1158 return SOX_SUCCESS;
1161 /* wavwritehdr: write .wav headers as follows:
1163 bytes variable description
1164 0 - 3 'RIFF'/'RIFX' Little/Big-endian
1165 4 - 7 wRiffLength length of file minus the 8 byte riff header
1166 8 - 11 'WAVE'
1167 12 - 15 'fmt '
1168 16 - 19 wFmtSize length of format chunk minus 8 byte header
1169 20 - 21 wFormatTag identifies PCM, ULAW etc
1170 22 - 23 wChannels
1171 24 - 27 dwSamplesPerSecond samples per second per channel
1172 28 - 31 dwAvgBytesPerSec non-trivial for compressed formats
1173 32 - 33 wBlockAlign basic block size
1174 34 - 35 wBitsPerSample non-trivial for compressed formats
1176 PCM formats then go straight to the data chunk:
1177 36 - 39 'data'
1178 40 - 43 dwDataLength length of data chunk minus 8 byte header
1179 44 - (dwDataLength + 43) the data
1180 (+ a padding byte if dwDataLength is odd)
1182 non-PCM formats must write an extended format chunk and a fact chunk:
1184 ULAW, ALAW formats:
1185 36 - 37 wExtSize = 0 the length of the format extension
1186 38 - 41 'fact'
1187 42 - 45 dwFactSize = 4 length of the fact chunk minus 8 byte header
1188 46 - 49 dwSamplesWritten actual number of samples written out
1189 50 - 53 'data'
1190 54 - 57 dwDataLength length of data chunk minus 8 byte header
1191 58 - (dwDataLength + 57) the data
1192 (+ a padding byte if dwDataLength is odd)
1195 GSM6.10 format:
1196 36 - 37 wExtSize = 2 the length in bytes of the format-dependent extension
1197 38 - 39 320 number of samples per block
1198 40 - 43 'fact'
1199 44 - 47 dwFactSize = 4 length of the fact chunk minus 8 byte header
1200 48 - 51 dwSamplesWritten actual number of samples written out
1201 52 - 55 'data'
1202 56 - 59 dwDataLength length of data chunk minus 8 byte header
1203 60 - (dwDataLength + 59) the data (including a padding byte, if necessary,
1204 so dwDataLength is always even)
1207 note that header contains (up to) 3 separate ways of describing the
1208 length of the file, all derived here from the number of (input)
1209 samples wav->numSamples in a way that is non-trivial for the blocked
1210 and padded compressed formats:
1212 wRiffLength - (riff header) the length of the file, minus 8
1213 dwSamplesWritten - (fact header) the number of samples written (after padding
1214 to a complete block eg for GSM)
1215 dwDataLength - (data chunk header) the number of (valid) data bytes written
1219 static int wavwritehdr(sox_format_t * ft, int second_header)
1221 priv_t * wav = (priv_t *) ft->priv;
1223 /* variables written to wav file header */
1224 /* RIFF header */
1225 uint64_t wRiffLength ; /* length of file after 8 byte riff header */
1226 /* fmt chunk */
1227 uint16_t wFmtSize = 16; /* size field of the fmt chunk */
1228 uint16_t wFormatTag = 0; /* data format */
1229 uint16_t wChannels; /* number of channels */
1230 uint32_t dwSamplesPerSecond; /* samples per second per channel*/
1231 uint32_t dwAvgBytesPerSec=0; /* estimate of bytes per second needed */
1232 uint32_t wBlockAlign=0; /* byte alignment of a basic sample block */
1233 uint16_t wBitsPerSample=0; /* bits per sample */
1234 /* fmt chunk extension (not PCM) */
1235 uint16_t wExtSize=0; /* extra bytes in the format extension */
1236 uint16_t wSamplesPerBlock; /* samples per channel per block */
1237 /* wSamplesPerBlock and other things may go into format extension */
1239 /* fact chunk (not PCM) */
1240 uint32_t dwFactSize=4; /* length of the fact chunk */
1241 uint64_t dwSamplesWritten=0; /* windows doesnt seem to use this*/
1243 /* data chunk */
1244 uint64_t dwDataLength; /* length of sound data in bytes */
1245 /* end of variables written to header */
1247 /* internal variables, intermediate values etc */
1248 int bytespersample; /* (uncompressed) bytes per sample (per channel) */
1249 uint64_t blocksWritten = 0;
1250 sox_bool isExtensible = sox_false; /* WAVE_FORMAT_EXTENSIBLE? */
1252 if (ft->signal.channels > UINT16_MAX) {
1253 lsx_fail_errno(ft, SOX_EOF, "Too many channels (%u)",
1254 ft->signal.channels);
1255 return SOX_EOF;
1258 dwSamplesPerSecond = ft->signal.rate;
1259 wChannels = ft->signal.channels;
1260 wBitsPerSample = ft->encoding.bits_per_sample;
1261 wSamplesPerBlock = 1; /* common default for PCM data */
1263 switch (ft->encoding.encoding)
1265 case SOX_ENCODING_UNSIGNED:
1266 case SOX_ENCODING_SIGN2:
1267 wFormatTag = WAVE_FORMAT_PCM;
1268 bytespersample = (wBitsPerSample + 7)/8;
1269 wBlockAlign = wChannels * bytespersample;
1270 break;
1271 case SOX_ENCODING_FLOAT:
1272 wFormatTag = WAVE_FORMAT_IEEE_FLOAT;
1273 bytespersample = (wBitsPerSample + 7)/8;
1274 wBlockAlign = wChannels * bytespersample;
1275 break;
1276 case SOX_ENCODING_ALAW:
1277 wFormatTag = WAVE_FORMAT_ALAW;
1278 wBlockAlign = wChannels;
1279 break;
1280 case SOX_ENCODING_ULAW:
1281 wFormatTag = WAVE_FORMAT_MULAW;
1282 wBlockAlign = wChannels;
1283 break;
1284 case SOX_ENCODING_IMA_ADPCM:
1285 if (wChannels>16)
1287 lsx_fail_errno(ft,SOX_EOF,"Channels(%d) must be <= 16",wChannels);
1288 return SOX_EOF;
1290 wFormatTag = WAVE_FORMAT_IMA_ADPCM;
1291 wBlockAlign = wChannels * 256; /* reasonable default */
1292 wBitsPerSample = 4;
1293 wExtSize = 2;
1294 wSamplesPerBlock = lsx_ima_samples_in((size_t) 0, (size_t) wChannels, (size_t) wBlockAlign, (size_t) 0);
1295 break;
1296 case SOX_ENCODING_MS_ADPCM:
1297 if (wChannels>16)
1299 lsx_fail_errno(ft,SOX_EOF,"Channels(%d) must be <= 16",wChannels);
1300 return SOX_EOF;
1302 wFormatTag = WAVE_FORMAT_ADPCM;
1303 wBlockAlign = ft->signal.rate / 11008;
1304 wBlockAlign = max(wBlockAlign, 1) * wChannels * 256;
1305 wBitsPerSample = 4;
1306 wExtSize = 4+4*7; /* Ext fmt data length */
1307 wSamplesPerBlock = lsx_ms_adpcm_samples_in((size_t) 0, (size_t) wChannels, (size_t) wBlockAlign, (size_t) 0);
1308 break;
1309 #ifdef HAVE_LIBGSM
1310 case SOX_ENCODING_GSM:
1311 if (wChannels!=1)
1313 lsx_report("Overriding GSM audio from %d channel to 1",wChannels);
1314 if (!second_header)
1315 ft->signal.length /= max(1, ft->signal.channels);
1316 wChannels = ft->signal.channels = 1;
1318 wFormatTag = WAVE_FORMAT_GSM610;
1319 /* dwAvgBytesPerSec = 1625*(dwSamplesPerSecond/8000.)+0.5; */
1320 wBlockAlign=65;
1321 wBitsPerSample=0; /* not representable as int */
1322 wExtSize=2; /* length of format extension */
1323 wSamplesPerBlock = 320;
1324 break;
1325 #endif
1326 default:
1327 break;
1330 if (wBlockAlign > UINT16_MAX) {
1331 lsx_fail_errno(ft, SOX_EOF, "Too many channels (%u)",
1332 ft->signal.channels);
1333 return SOX_EOF;
1336 wav->formatTag = wFormatTag;
1337 wav->blockAlign = wBlockAlign;
1338 wav->samplesPerBlock = wSamplesPerBlock;
1340 /* When creating header, use length hint given by input file. If no
1341 * hint then write default value. Also, use default value even
1342 * on header update if more then 32-bit length needs to be written.
1345 dwSamplesWritten =
1346 second_header ? wav->numSamples : ft->signal.length / wChannels;
1347 blocksWritten =
1348 (dwSamplesWritten + wSamplesPerBlock - 1) / wSamplesPerBlock;
1349 dwDataLength = blocksWritten * wBlockAlign;
1351 if (wFormatTag == WAVE_FORMAT_GSM610)
1352 dwDataLength = (dwDataLength+1) & ~1u; /* round up to even */
1354 if (wFormatTag == WAVE_FORMAT_PCM && (wBitsPerSample > 16 || wChannels > 2)
1355 && strcmp(ft->filetype, "wavpcm")) {
1356 isExtensible = sox_true;
1357 wFmtSize += 2 + 22;
1359 else if (wFormatTag != WAVE_FORMAT_PCM)
1360 wFmtSize += 2+wExtSize; /* plus ExtData */
1362 wRiffLength = 4 + (8+wFmtSize) + (8+dwDataLength+dwDataLength%2);
1363 if (isExtensible || wFormatTag != WAVE_FORMAT_PCM) /* PCM omits the "fact" chunk */
1364 wRiffLength += (8+dwFactSize);
1366 if (dwSamplesWritten > UINT32_MAX)
1367 dwSamplesWritten = UINT32_MAX;
1369 if (dwDataLength > UINT32_MAX)
1370 dwDataLength = UINT32_MAX;
1372 if (!second_header && !ft->signal.length)
1373 dwDataLength = UINT32_MAX;
1375 if (wRiffLength > UINT32_MAX)
1376 wRiffLength = UINT32_MAX;
1378 /* dwAvgBytesPerSec <-- this is BEFORE compression, isn't it? guess not. */
1379 dwAvgBytesPerSec = (double)wBlockAlign*ft->signal.rate / (double)wSamplesPerBlock + 0.5;
1381 /* figured out header info, so write it */
1383 /* If user specified opposite swap than we think, assume they are
1384 * asking to write a RIFX file.
1386 if (ft->encoding.reverse_bytes == MACHINE_IS_LITTLEENDIAN)
1388 if (!second_header)
1389 lsx_report("Requested to swap bytes so writing RIFX header");
1390 lsx_writes(ft, "RIFX");
1392 else
1393 lsx_writes(ft, "RIFF");
1394 lsx_writedw(ft, wRiffLength);
1395 lsx_writes(ft, "WAVE");
1396 lsx_writes(ft, "fmt ");
1397 lsx_writedw(ft, wFmtSize);
1398 lsx_writew(ft, isExtensible ? WAVE_FORMAT_EXTENSIBLE : wFormatTag);
1399 lsx_writew(ft, wChannels);
1400 lsx_writedw(ft, dwSamplesPerSecond);
1401 lsx_writedw(ft, dwAvgBytesPerSec);
1402 lsx_writew(ft, wBlockAlign);
1403 lsx_writew(ft, wBitsPerSample); /* end info common to all fmts */
1405 if (isExtensible) {
1406 uint32_t dwChannelMask=0; /* unassigned speaker mapping by default */
1407 static unsigned char const guids[][14] = {
1408 "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71", /* wav */
1409 "\x00\x00\x21\x07\xd3\x11\x86\x44\xc8\xc1\xca\x00\x00\x00"}; /* amb */
1411 /* if not amb, assume most likely channel masks from number of channels; not
1412 * ideal solution, but will make files playable in many/most situations
1414 if (strcmp(ft->filetype, "amb")) {
1415 if (wChannels == 1) dwChannelMask = 0x4; /* 1 channel (mono) = FC */
1416 else if (wChannels == 2) dwChannelMask = 0x3; /* 2 channels (stereo) = FL, FR */
1417 else if (wChannels == 4) dwChannelMask = 0x33; /* 4 channels (quad) = FL, FR, BL, BR */
1418 else if (wChannels == 6) dwChannelMask = 0x3F; /* 6 channels (5.1) = FL, FR, FC, LF, BL, BR */
1419 else if (wChannels == 8) dwChannelMask = 0x63F; /* 8 channels (7.1) = FL, FR, FC, LF, BL, BR, SL, SR */
1422 lsx_writew(ft, 22);
1423 lsx_writew(ft, wBitsPerSample); /* No padding in container */
1424 lsx_writedw(ft, dwChannelMask); /* Speaker mapping is something reasonable */
1425 lsx_writew(ft, wFormatTag);
1426 lsx_writebuf(ft, guids[!strcmp(ft->filetype, "amb")], (size_t)14);
1428 else
1429 /* if not PCM, we need to write out wExtSize even if wExtSize=0 */
1430 if (wFormatTag != WAVE_FORMAT_PCM)
1431 lsx_writew(ft,wExtSize);
1433 switch (wFormatTag)
1435 int i;
1436 case WAVE_FORMAT_IMA_ADPCM:
1437 lsx_writew(ft, wSamplesPerBlock);
1438 break;
1439 case WAVE_FORMAT_ADPCM:
1440 lsx_writew(ft, wSamplesPerBlock);
1441 lsx_writew(ft, 7); /* nCoefs */
1442 for (i=0; i<7; i++) {
1443 lsx_writew(ft, (uint16_t)(lsx_ms_adpcm_i_coef[i][0]));
1444 lsx_writew(ft, (uint16_t)(lsx_ms_adpcm_i_coef[i][1]));
1446 break;
1447 case WAVE_FORMAT_GSM610:
1448 lsx_writew(ft, wSamplesPerBlock);
1449 break;
1450 default:
1451 break;
1454 /* if not PCM, write the 'fact' chunk */
1455 if (isExtensible || wFormatTag != WAVE_FORMAT_PCM){
1456 lsx_writes(ft, "fact");
1457 lsx_writedw(ft,dwFactSize);
1458 lsx_writedw(ft,dwSamplesWritten);
1461 lsx_writes(ft, "data");
1462 lsx_writedw(ft, dwDataLength); /* data chunk size */
1464 if (!second_header) {
1465 lsx_debug("Writing Wave file: %s format, %d channel%s, %d samp/sec",
1466 wav_format_str(wFormatTag), wChannels,
1467 wChannels == 1 ? "" : "s", dwSamplesPerSecond);
1468 lsx_debug(" %d byte/sec, %d block align, %d bits/samp",
1469 dwAvgBytesPerSec, wBlockAlign, wBitsPerSample);
1470 } else {
1471 if (wRiffLength == UINT32_MAX || dwDataLength == UINT32_MAX ||
1472 dwSamplesWritten == UINT32_MAX)
1473 lsx_warn("File too large, writing truncated values in header");
1475 lsx_debug("Finished writing Wave file, %"PRIu64" data bytes %"PRIu64" samples",
1476 dwDataLength, wav->numSamples);
1477 #ifdef HAVE_LIBGSM
1478 if (wFormatTag == WAVE_FORMAT_GSM610){
1479 lsx_debug("GSM6.10 format: %"PRIu64" blocks %"PRIu64" padded samples %"PRIu64" padded data bytes",
1480 blocksWritten, dwSamplesWritten, dwDataLength);
1481 if (wav->gsmbytecount != dwDataLength)
1482 lsx_warn("help ! internal inconsistency - data_written %"PRIu64" gsmbytecount %zu",
1483 dwDataLength, wav->gsmbytecount);
1486 #endif
1488 return SOX_SUCCESS;
1491 static size_t write_samples(sox_format_t * ft, const sox_sample_t *buf, size_t len)
1493 priv_t * wav = (priv_t *) ft->priv;
1494 ptrdiff_t total_len = len;
1496 ft->sox_errno = SOX_SUCCESS;
1498 switch (wav->formatTag)
1500 case WAVE_FORMAT_IMA_ADPCM:
1501 case WAVE_FORMAT_ADPCM:
1502 while (len>0) {
1503 short *p = wav->samplePtr;
1504 short *top = wav->sampleTop;
1506 if (top>p+len) top = p+len;
1507 len -= top-p; /* update residual len */
1508 while (p < top)
1509 *p++ = (*buf++) >> 16;
1511 wav->samplePtr = p;
1512 if (p == wav->sampleTop)
1513 xxxAdpcmWriteBlock(ft);
1516 return total_len - len;
1517 break;
1519 #ifdef HAVE_LIBGSM
1520 case WAVE_FORMAT_GSM610:
1521 len = wavgsmwrite(ft, buf, len);
1522 wav->numSamples += (len/ft->signal.channels);
1523 return len;
1524 break;
1525 #endif
1527 default:
1528 len = lsx_rawwrite(ft, buf, len);
1529 wav->numSamples += (len/ft->signal.channels);
1530 return len;
1534 static int stopwrite(sox_format_t * ft)
1536 priv_t * wav = (priv_t *) ft->priv;
1538 ft->sox_errno = SOX_SUCCESS;
1541 /* Call this to flush out any remaining data. */
1542 switch (wav->formatTag)
1544 case WAVE_FORMAT_IMA_ADPCM:
1545 case WAVE_FORMAT_ADPCM:
1546 xxxAdpcmWriteBlock(ft);
1547 break;
1548 #ifdef HAVE_LIBGSM
1549 case WAVE_FORMAT_GSM610:
1550 wavgsmstopwrite(ft);
1551 break;
1552 #endif
1555 /* Add a pad byte if the number of data bytes is odd.
1556 See wavwritehdr() above for the calculation. */
1557 if (wav->formatTag != WAVE_FORMAT_GSM610)
1558 lsx_padbytes(ft, (size_t)((wav->numSamples + wav->samplesPerBlock - 1)/wav->samplesPerBlock*wav->blockAlign) % 2);
1560 free(wav->packet);
1561 free(wav->samples);
1562 free(wav->lsx_ms_adpcm_i_coefs);
1564 /* All samples are already written out. */
1565 /* If file header needs fixing up, for example it needs the */
1566 /* the number of samples in a field, seek back and write them here. */
1567 if (ft->signal.length && wav->numSamples <= 0xffffffff &&
1568 wav->numSamples == ft->signal.length)
1569 return SOX_SUCCESS;
1570 if (!ft->seekable)
1571 return SOX_EOF;
1573 if (lsx_seeki(ft, (off_t)0, SEEK_SET) != 0)
1575 lsx_fail_errno(ft,SOX_EOF,"Can't rewind output file to rewrite .wav header.");
1576 return SOX_EOF;
1579 return (wavwritehdr(ft, 1));
1583 * Return a string corresponding to the wave format type.
1585 static const char *wav_format_str(unsigned tag)
1587 const struct wave_format *f = wav_find_format(tag);
1588 return f ? f->name : "unknown";
1591 static int seek(sox_format_t * ft, uint64_t offset)
1593 priv_t * wav = (priv_t *) ft->priv;
1595 if (ft->encoding.bits_per_sample & 7)
1596 lsx_fail_errno(ft, SOX_ENOTSUP, "seeking not supported with this encoding");
1597 else if (wav->formatTag == WAVE_FORMAT_GSM610) {
1598 int alignment;
1599 size_t gsmoff;
1601 /* rounding bytes to blockAlign so that we
1602 * don't have to decode partial block. */
1603 gsmoff = offset * wav->blockAlign / wav->samplesPerBlock +
1604 wav->blockAlign * ft->signal.channels / 2;
1605 gsmoff -= gsmoff % (wav->blockAlign * ft->signal.channels);
1607 ft->sox_errno = lsx_seeki(ft, (off_t)(gsmoff + wav->dataStart), SEEK_SET);
1608 if (ft->sox_errno == SOX_SUCCESS) {
1609 /* offset is in samples */
1610 uint64_t new_offset = offset;
1611 alignment = offset % wav->samplesPerBlock;
1612 if (alignment != 0)
1613 new_offset += (wav->samplesPerBlock - alignment);
1614 wav->numSamples = ft->signal.length - (new_offset / ft->signal.channels);
1616 } else {
1617 double wide_sample = offset - (offset % ft->signal.channels);
1618 double to_d = wide_sample * ft->encoding.bits_per_sample / 8;
1619 off_t to = to_d;
1620 ft->sox_errno = (to != to_d)? SOX_EOF : lsx_seeki(ft, (off_t)wav->dataStart + (off_t)to, SEEK_SET);
1621 if (ft->sox_errno == SOX_SUCCESS)
1622 wav->numSamples -= (size_t)wide_sample / ft->signal.channels;
1625 return ft->sox_errno;
1628 LSX_FORMAT_HANDLER(wav)
1630 static char const * const names[] = {"wav", "wavpcm", "amb", NULL};
1631 static unsigned const write_encodings[] = {
1632 SOX_ENCODING_SIGN2, 16, 24, 32, 0,
1633 SOX_ENCODING_UNSIGNED, 8, 0,
1634 SOX_ENCODING_ULAW, 8, 0,
1635 SOX_ENCODING_ALAW, 8, 0,
1636 #ifdef HAVE_LIBGSM
1637 SOX_ENCODING_GSM, 0,
1638 #endif
1639 SOX_ENCODING_MS_ADPCM, 4, 0,
1640 SOX_ENCODING_IMA_ADPCM, 4, 0,
1641 SOX_ENCODING_FLOAT, 32, 64, 0,
1643 static sox_format_handler_t const handler = {SOX_LIB_VERSION_CODE,
1644 "Microsoft audio format", names, SOX_FILE_LIT_END,
1645 startread, read_samples, stopread,
1646 startwrite, write_samples, stopwrite,
1647 seek, write_encodings, NULL, sizeof(priv_t)
1649 return &handler;