2 * ALAC (Apple Lossless Audio Codec) decoder
3 * Copyright (c) 2005 David Hammerton
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * ALAC (Apple Lossless Audio Codec) decoder
25 * @author 2005 David Hammerton
26 * @see http://crazney.net/programs/itunes/alac.html
28 * Note: This decoder expects a 36-byte QuickTime atom to be
29 * passed through the extradata[_size] fields. This atom is tacked onto
30 * the end of an 'alac' stsd atom and has the following format:
33 * 32 bits tag ("alac")
34 * 32 bits tag version (0)
35 * 32 bits samples per frame (used when not set explicitly in the frames)
36 * 8 bits compatible version (0)
38 * 8 bits history mult (40)
39 * 8 bits initial history (14)
40 * 8 bits rice param limit (10)
42 * 16 bits maxRun (255)
43 * 32 bits max coded frame size (0 means unknown)
44 * 32 bits average bitrate (0 means unknown)
50 #include "libavutil/channel_layout.h"
53 #include "bitstream.h"
54 #include "bytestream.h"
58 #include "alac_data.h"
60 #define ALAC_EXTRADATA_SIZE 36
62 typedef struct ALACContext
{
63 AVCodecContext
*avctx
;
67 int32_t *predict_error_buffer
[2];
68 int32_t *output_samples_buffer
[2];
69 int32_t *extra_bits_buffer
[2];
71 uint32_t max_samples_per_frame
;
73 uint8_t rice_history_mult
;
74 uint8_t rice_initial_history
;
77 int extra_bits
; /**< number of extra bits beyond 16-bit */
78 int nb_samples
; /**< number of samples in the current frame */
81 static inline unsigned int decode_scalar(BitstreamContext
*bc
, int k
, int bps
)
83 unsigned int x
= get_unary_0_9(bc
);
85 if (x
> 8) { /* RICE THRESHOLD */
86 /* use alternative encoding */
87 x
= bitstream_read(bc
, bps
);
89 int extrabits
= bitstream_peek(bc
, k
);
91 /* multiply x by 2^k - 1, as part of their strange algorithm */
96 bitstream_skip(bc
, k
);
98 bitstream_skip(bc
, k
- 1);
103 static void rice_decompress(ALACContext
*alac
, int32_t *output_buffer
,
104 int nb_samples
, int bps
, int rice_history_mult
)
107 unsigned int history
= alac
->rice_initial_history
;
108 int sign_modifier
= 0;
110 for (i
= 0; i
< nb_samples
; i
++) {
114 /* calculate rice param and decode next value */
115 k
= av_log2((history
>> 9) + 3);
116 k
= FFMIN(k
, alac
->rice_limit
);
117 x
= decode_scalar(&alac
->bc
, k
, bps
);
120 output_buffer
[i
] = (x
>> 1) ^ -(x
& 1);
122 /* update the history */
126 history
+= x
* rice_history_mult
-
127 ((history
* rice_history_mult
) >> 9);
129 /* special case: there may be compressed blocks of 0 */
130 if ((history
< 128) && (i
+ 1 < nb_samples
)) {
133 /* calculate rice param and decode block size */
134 k
= 7 - av_log2(history
) + ((history
+ 16) >> 6);
135 k
= FFMIN(k
, alac
->rice_limit
);
136 block_size
= decode_scalar(&alac
->bc
, k
, 16);
138 if (block_size
> 0) {
139 if (block_size
>= nb_samples
- i
) {
140 av_log(alac
->avctx
, AV_LOG_ERROR
,
141 "invalid zero block size of %d %d %d\n", block_size
,
143 block_size
= nb_samples
- i
- 1;
145 memset(&output_buffer
[i
+ 1], 0,
146 block_size
* sizeof(*output_buffer
));
149 if (block_size
<= 0xffff)
156 static inline int sign_only(int v
)
158 return v
? FFSIGN(v
) : 0;
161 static void lpc_prediction(int32_t *error_buffer
, int32_t *buffer_out
,
162 int nb_samples
, int bps
, int16_t *lpc_coefs
,
163 int lpc_order
, int lpc_quant
)
166 int32_t *pred
= buffer_out
;
168 /* first sample always copies */
169 *buffer_out
= *error_buffer
;
175 memcpy(&buffer_out
[1], &error_buffer
[1],
176 (nb_samples
- 1) * sizeof(*buffer_out
));
180 if (lpc_order
== 31) {
181 /* simple 1st-order prediction */
182 for (i
= 1; i
< nb_samples
; i
++) {
183 buffer_out
[i
] = sign_extend(buffer_out
[i
- 1] + error_buffer
[i
],
189 /* read warm-up samples */
190 for (i
= 1; i
<= lpc_order
; i
++)
191 buffer_out
[i
] = sign_extend(buffer_out
[i
- 1] + error_buffer
[i
], bps
);
193 /* NOTE: 4 and 8 are very common cases that could be optimized. */
195 for (; i
< nb_samples
; i
++) {
198 int error_val
= error_buffer
[i
];
203 for (j
= 0; j
< lpc_order
; j
++)
204 val
+= (pred
[j
] - d
) * lpc_coefs
[j
];
205 val
= (val
+ (1 << (lpc_quant
- 1))) >> lpc_quant
;
206 val
+= d
+ error_val
;
207 buffer_out
[i
] = sign_extend(val
, bps
);
209 /* adapt LPC coefficients */
210 error_sign
= sign_only(error_val
);
212 for (j
= 0; j
< lpc_order
&& error_val
* error_sign
> 0; j
++) {
215 sign
= sign_only(val
) * error_sign
;
216 lpc_coefs
[j
] -= sign
;
218 error_val
-= (val
>> lpc_quant
) * (j
+ 1);
224 static void decorrelate_stereo(int32_t *buffer
[2], int nb_samples
,
225 int decorr_shift
, int decorr_left_weight
)
229 for (i
= 0; i
< nb_samples
; i
++) {
235 a
-= (b
* decorr_left_weight
) >> decorr_shift
;
243 static void append_extra_bits(int32_t *buffer
[2], int32_t *extra_bits_buffer
[2],
244 int extra_bits
, int channels
, int nb_samples
)
248 for (ch
= 0; ch
< channels
; ch
++)
249 for (i
= 0; i
< nb_samples
; i
++)
250 buffer
[ch
][i
] = (buffer
[ch
][i
] << extra_bits
) | extra_bits_buffer
[ch
][i
];
253 static int decode_element(AVCodecContext
*avctx
, AVFrame
*frame
, int ch_index
,
256 ALACContext
*alac
= avctx
->priv_data
;
257 int has_size
, bps
, is_compressed
, decorr_shift
, decorr_left_weight
, ret
;
258 uint32_t output_samples
;
261 bitstream_skip(&alac
->bc
, 4); /* element instance tag */
262 bitstream_skip(&alac
->bc
, 12); /* unused header bits */
264 /* the number of output samples is stored in the frame */
265 has_size
= bitstream_read_bit(&alac
->bc
);
267 alac
->extra_bits
= bitstream_read(&alac
->bc
, 2) << 3;
268 bps
= alac
->sample_size
- alac
->extra_bits
+ channels
- 1;
270 avpriv_report_missing_feature(avctx
, "bps %d", bps
);
271 return AVERROR_PATCHWELCOME
;
274 /* whether the frame is compressed */
275 is_compressed
= !bitstream_read_bit(&alac
->bc
);
278 output_samples
= bitstream_read(&alac
->bc
, 32);
280 output_samples
= alac
->max_samples_per_frame
;
281 if (!output_samples
|| output_samples
> alac
->max_samples_per_frame
) {
282 av_log(avctx
, AV_LOG_ERROR
, "invalid samples per frame: %"PRIu32
"\n",
284 return AVERROR_INVALIDDATA
;
286 if (!alac
->nb_samples
) {
287 /* get output buffer */
288 frame
->nb_samples
= output_samples
;
289 if ((ret
= ff_get_buffer(avctx
, frame
, 0)) < 0) {
290 av_log(avctx
, AV_LOG_ERROR
, "get_buffer() failed\n");
293 } else if (output_samples
!= alac
->nb_samples
) {
294 av_log(avctx
, AV_LOG_ERROR
, "sample count mismatch: %"PRIu32
" != %d\n",
295 output_samples
, alac
->nb_samples
);
296 return AVERROR_INVALIDDATA
;
298 alac
->nb_samples
= output_samples
;
299 if (alac
->sample_size
> 16) {
300 for (ch
= 0; ch
< channels
; ch
++)
301 alac
->output_samples_buffer
[ch
] = (int32_t *)frame
->extended_data
[ch_index
+ ch
];
305 int16_t lpc_coefs
[2][32];
307 int prediction_type
[2];
309 int rice_history_mult
[2];
311 if (!alac
->rice_limit
) {
312 avpriv_request_sample(alac
->avctx
,
313 "Compression with rice limit 0");
314 return AVERROR(ENOSYS
);
317 decorr_shift
= bitstream_read(&alac
->bc
, 8);
318 decorr_left_weight
= bitstream_read(&alac
->bc
, 8);
320 for (ch
= 0; ch
< channels
; ch
++) {
321 prediction_type
[ch
] = bitstream_read(&alac
->bc
, 4);
322 lpc_quant
[ch
] = bitstream_read(&alac
->bc
, 4);
323 rice_history_mult
[ch
] = bitstream_read(&alac
->bc
, 3);
324 lpc_order
[ch
] = bitstream_read(&alac
->bc
, 5);
326 if (lpc_order
[ch
] >= alac
->max_samples_per_frame
)
327 return AVERROR_INVALIDDATA
;
329 /* read the predictor table */
330 for (i
= lpc_order
[ch
] - 1; i
>= 0; i
--)
331 lpc_coefs
[ch
][i
] = bitstream_read_signed(&alac
->bc
, 16);
334 if (alac
->extra_bits
) {
335 for (i
= 0; i
< alac
->nb_samples
; i
++) {
336 for (ch
= 0; ch
< channels
; ch
++)
337 alac
->extra_bits_buffer
[ch
][i
] = bitstream_read(&alac
->bc
, alac
->extra_bits
);
340 for (ch
= 0; ch
< channels
; ch
++) {
341 rice_decompress(alac
, alac
->predict_error_buffer
[ch
],
342 alac
->nb_samples
, bps
,
343 rice_history_mult
[ch
] * alac
->rice_history_mult
/ 4);
345 /* adaptive FIR filter */
346 if (prediction_type
[ch
] == 15) {
347 /* Prediction type 15 runs the adaptive FIR twice.
348 * The first pass uses the special-case coef_num = 31, while
349 * the second pass uses the coefs from the bitstream.
351 * However, this prediction type is not currently used by the
354 lpc_prediction(alac
->predict_error_buffer
[ch
],
355 alac
->predict_error_buffer
[ch
],
356 alac
->nb_samples
, bps
, NULL
, 31, 0);
357 } else if (prediction_type
[ch
] > 0) {
358 av_log(avctx
, AV_LOG_WARNING
, "unknown prediction type: %i\n",
359 prediction_type
[ch
]);
361 lpc_prediction(alac
->predict_error_buffer
[ch
],
362 alac
->output_samples_buffer
[ch
], alac
->nb_samples
,
363 bps
, lpc_coefs
[ch
], lpc_order
[ch
], lpc_quant
[ch
]);
366 /* not compressed, easy case */
367 for (i
= 0; i
< alac
->nb_samples
; i
++) {
368 for (ch
= 0; ch
< channels
; ch
++) {
369 alac
->output_samples_buffer
[ch
][i
] =
370 bitstream_read_signed(&alac
->bc
, alac
->sample_size
);
373 alac
->extra_bits
= 0;
375 decorr_left_weight
= 0;
378 if (channels
== 2 && decorr_left_weight
) {
379 decorrelate_stereo(alac
->output_samples_buffer
, alac
->nb_samples
,
380 decorr_shift
, decorr_left_weight
);
383 if (alac
->extra_bits
) {
384 append_extra_bits(alac
->output_samples_buffer
, alac
->extra_bits_buffer
,
385 alac
->extra_bits
, channels
, alac
->nb_samples
);
388 switch(alac
->sample_size
) {
390 for (ch
= 0; ch
< channels
; ch
++) {
391 int16_t *outbuffer
= (int16_t *)frame
->extended_data
[ch_index
+ ch
];
392 for (i
= 0; i
< alac
->nb_samples
; i
++)
393 *outbuffer
++ = alac
->output_samples_buffer
[ch
][i
];
397 for (ch
= 0; ch
< channels
; ch
++) {
398 for (i
= 0; i
< alac
->nb_samples
; i
++)
399 alac
->output_samples_buffer
[ch
][i
] <<= 8;
407 static int alac_decode_frame(AVCodecContext
*avctx
, void *data
,
408 int *got_frame_ptr
, AVPacket
*avpkt
)
410 ALACContext
*alac
= avctx
->priv_data
;
411 AVFrame
*frame
= data
;
412 enum AlacRawDataBlockType element
;
414 int ch
, ret
, got_end
;
416 bitstream_init8(&alac
->bc
, avpkt
->data
, avpkt
->size
);
419 alac
->nb_samples
= 0;
421 while (bitstream_bits_left(&alac
->bc
) >= 3) {
422 element
= bitstream_read(&alac
->bc
, 3);
423 if (element
== TYPE_END
) {
427 if (element
> TYPE_CPE
&& element
!= TYPE_LFE
) {
428 avpriv_report_missing_feature(avctx
, "Syntax element %d", element
);
429 return AVERROR_PATCHWELCOME
;
432 channels
= (element
== TYPE_CPE
) ? 2 : 1;
433 if (ch
+ channels
> alac
->channels
||
434 ff_alac_channel_layout_offsets
[alac
->channels
- 1][ch
] + channels
> alac
->channels
) {
435 av_log(avctx
, AV_LOG_ERROR
, "invalid element channel count\n");
436 return AVERROR_INVALIDDATA
;
439 ret
= decode_element(avctx
, frame
,
440 ff_alac_channel_layout_offsets
[alac
->channels
- 1][ch
],
442 if (ret
< 0 && bitstream_bits_left(&alac
->bc
))
448 av_log(avctx
, AV_LOG_ERROR
, "no end tag found. incomplete packet.\n");
449 return AVERROR_INVALIDDATA
;
451 if (!alac
->nb_samples
) {
452 av_log(avctx
, AV_LOG_ERROR
, "No decodable data in the packet\n");
453 return AVERROR_INVALIDDATA
;
456 if (avpkt
->size
* 8 - bitstream_tell(&alac
->bc
) > 8) {
457 av_log(avctx
, AV_LOG_ERROR
, "Error : %d bits left\n",
458 avpkt
->size
* 8 - bitstream_tell(&alac
->bc
));
466 static av_cold
int alac_decode_close(AVCodecContext
*avctx
)
468 ALACContext
*alac
= avctx
->priv_data
;
471 for (ch
= 0; ch
< FFMIN(alac
->channels
, 2); ch
++) {
472 av_freep(&alac
->predict_error_buffer
[ch
]);
473 if (alac
->sample_size
== 16)
474 av_freep(&alac
->output_samples_buffer
[ch
]);
475 av_freep(&alac
->extra_bits_buffer
[ch
]);
481 static int allocate_buffers(ALACContext
*alac
)
484 int buf_size
= alac
->max_samples_per_frame
* sizeof(int32_t);
486 for (ch
= 0; ch
< FFMIN(alac
->channels
, 2); ch
++) {
487 FF_ALLOC_OR_GOTO(alac
->avctx
, alac
->predict_error_buffer
[ch
],
488 buf_size
, buf_alloc_fail
);
490 if (alac
->sample_size
== 16) {
491 FF_ALLOC_OR_GOTO(alac
->avctx
, alac
->output_samples_buffer
[ch
],
492 buf_size
, buf_alloc_fail
);
495 FF_ALLOC_OR_GOTO(alac
->avctx
, alac
->extra_bits_buffer
[ch
],
496 buf_size
, buf_alloc_fail
);
500 alac_decode_close(alac
->avctx
);
501 return AVERROR(ENOMEM
);
504 static int alac_set_info(ALACContext
*alac
)
508 bytestream2_init(&gb
, alac
->avctx
->extradata
,
509 alac
->avctx
->extradata_size
);
511 bytestream2_skipu(&gb
, 12); // size:4, alac:4, version:4
513 alac
->max_samples_per_frame
= bytestream2_get_be32u(&gb
);
514 if (!alac
->max_samples_per_frame
||
515 alac
->max_samples_per_frame
> INT_MAX
/ sizeof(int32_t)) {
516 av_log(alac
->avctx
, AV_LOG_ERROR
,
517 "max samples per frame invalid: %"PRIu32
"\n",
518 alac
->max_samples_per_frame
);
519 return AVERROR_INVALIDDATA
;
521 bytestream2_skipu(&gb
, 1); // compatible version
522 alac
->sample_size
= bytestream2_get_byteu(&gb
);
523 alac
->rice_history_mult
= bytestream2_get_byteu(&gb
);
524 alac
->rice_initial_history
= bytestream2_get_byteu(&gb
);
525 alac
->rice_limit
= bytestream2_get_byteu(&gb
);
526 alac
->channels
= bytestream2_get_byteu(&gb
);
527 bytestream2_get_be16u(&gb
); // maxRun
528 bytestream2_get_be32u(&gb
); // max coded frame size
529 bytestream2_get_be32u(&gb
); // average bitrate
530 bytestream2_get_be32u(&gb
); // samplerate
535 static av_cold
int alac_decode_init(AVCodecContext
* avctx
)
538 ALACContext
*alac
= avctx
->priv_data
;
541 /* initialize from the extradata */
542 if (alac
->avctx
->extradata_size
< ALAC_EXTRADATA_SIZE
) {
543 av_log(avctx
, AV_LOG_ERROR
, "alac: extradata is too small\n");
544 return AVERROR_INVALIDDATA
;
546 if (alac_set_info(alac
)) {
547 av_log(avctx
, AV_LOG_ERROR
, "alac: set_info failed\n");
551 switch (alac
->sample_size
) {
552 case 16: avctx
->sample_fmt
= AV_SAMPLE_FMT_S16P
;
555 case 32: avctx
->sample_fmt
= AV_SAMPLE_FMT_S32P
;
557 default: avpriv_request_sample(avctx
, "Sample depth %d", alac
->sample_size
);
558 return AVERROR_PATCHWELCOME
;
560 avctx
->bits_per_raw_sample
= alac
->sample_size
;
562 if (alac
->channels
< 1) {
563 av_log(avctx
, AV_LOG_WARNING
, "Invalid channel count\n");
564 alac
->channels
= avctx
->channels
;
566 if (alac
->channels
> ALAC_MAX_CHANNELS
)
567 alac
->channels
= avctx
->channels
;
569 avctx
->channels
= alac
->channels
;
571 if (avctx
->channels
> ALAC_MAX_CHANNELS
) {
572 avpriv_report_missing_feature(avctx
, "Channel count %d",
574 return AVERROR_PATCHWELCOME
;
576 avctx
->channel_layout
= ff_alac_channel_layouts
[alac
->channels
- 1];
578 if ((ret
= allocate_buffers(alac
)) < 0) {
579 av_log(avctx
, AV_LOG_ERROR
, "Error allocating buffers\n");
586 AVCodec ff_alac_decoder
= {
588 .long_name
= NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),
589 .type
= AVMEDIA_TYPE_AUDIO
,
590 .id
= AV_CODEC_ID_ALAC
,
591 .priv_data_size
= sizeof(ALACContext
),
592 .init
= alac_decode_init
,
593 .close
= alac_decode_close
,
594 .decode
= alac_decode_frame
,
595 .capabilities
= AV_CODEC_CAP_DR1
,