3 * Copyright (c) 2008 Jaikrishnan Menon <realityman@gmx.net>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
29 #define DEFAULT_FRAME_SIZE 4096
30 #define DEFAULT_SAMPLE_SIZE 16
31 #define MAX_CHANNELS 8
32 #define ALAC_EXTRADATA_SIZE 36
33 #define ALAC_FRAME_HEADER_SIZE 55
34 #define ALAC_FRAME_FOOTER_SIZE 3
36 #define ALAC_ESCAPE_CODE 0x1FF
37 #define ALAC_MAX_LPC_ORDER 30
38 #define DEFAULT_MAX_PRED_ORDER 6
39 #define DEFAULT_MIN_PRED_ORDER 4
40 #define ALAC_MAX_LPC_PRECISION 9
41 #define ALAC_MAX_LPC_SHIFT 9
43 #define ALAC_CHMODE_LEFT_RIGHT 0
44 #define ALAC_CHMODE_LEFT_SIDE 1
45 #define ALAC_CHMODE_RIGHT_SIDE 2
46 #define ALAC_CHMODE_MID_SIDE 3
48 typedef struct RiceContext
{
55 typedef struct LPCContext
{
57 int lpc_coeff
[ALAC_MAX_LPC_ORDER
+1];
61 typedef struct AlacEncodeContext
{
62 int compression_level
;
63 int min_prediction_order
;
64 int max_prediction_order
;
65 int max_coded_frame_size
;
66 int write_sample_size
;
67 int32_t sample_buf
[MAX_CHANNELS
][DEFAULT_FRAME_SIZE
];
68 int32_t predictor_buf
[DEFAULT_FRAME_SIZE
];
69 int interlacing_shift
;
70 int interlacing_leftweight
;
73 LPCContext lpc
[MAX_CHANNELS
];
75 AVCodecContext
*avctx
;
79 static void init_sample_buffers(AlacEncodeContext
*s
, int16_t *input_samples
)
83 for(ch
=0;ch
<s
->avctx
->channels
;ch
++) {
84 int16_t *sptr
= input_samples
+ ch
;
85 for(i
=0;i
<s
->avctx
->frame_size
;i
++) {
86 s
->sample_buf
[ch
][i
] = *sptr
;
87 sptr
+= s
->avctx
->channels
;
92 static void encode_scalar(AlacEncodeContext
*s
, int x
, int k
, int write_sample_size
)
96 k
= FFMIN(k
, s
->rc
.k_modifier
);
102 // write escape code and sample value directly
103 put_bits(&s
->pbctx
, 9, ALAC_ESCAPE_CODE
);
104 put_bits(&s
->pbctx
, write_sample_size
, x
);
107 put_bits(&s
->pbctx
, q
, (1<<q
) - 1);
108 put_bits(&s
->pbctx
, 1, 0);
112 put_bits(&s
->pbctx
, k
, r
+1);
114 put_bits(&s
->pbctx
, k
-1, 0);
119 static void write_frame_header(AlacEncodeContext
*s
, int is_verbatim
)
121 put_bits(&s
->pbctx
, 3, s
->avctx
->channels
-1); // No. of channels -1
122 put_bits(&s
->pbctx
, 16, 0); // Seems to be zero
123 put_bits(&s
->pbctx
, 1, 1); // Sample count is in the header
124 put_bits(&s
->pbctx
, 2, 0); // FIXME: Wasted bytes field
125 put_bits(&s
->pbctx
, 1, is_verbatim
); // Audio block is verbatim
126 put_bits(&s
->pbctx
, 32, s
->avctx
->frame_size
); // No. of samples in the frame
129 static void calc_predictor_params(AlacEncodeContext
*s
, int ch
)
131 int32_t coefs
[MAX_LPC_ORDER
][MAX_LPC_ORDER
];
132 int shift
[MAX_LPC_ORDER
];
135 opt_order
= ff_lpc_calc_coefs(&s
->dspctx
, s
->sample_buf
[ch
], s
->avctx
->frame_size
, s
->min_prediction_order
, s
->max_prediction_order
,
136 ALAC_MAX_LPC_PRECISION
, coefs
, shift
, 1, ORDER_METHOD_EST
, ALAC_MAX_LPC_SHIFT
, 1);
138 s
->lpc
[ch
].lpc_order
= opt_order
;
139 s
->lpc
[ch
].lpc_quant
= shift
[opt_order
-1];
140 memcpy(s
->lpc
[ch
].lpc_coeff
, coefs
[opt_order
-1], opt_order
*sizeof(int));
143 static int estimate_stereo_mode(int32_t *left_ch
, int32_t *right_ch
, int n
)
150 /* calculate sum of 2nd order residual for each channel */
151 sum
[0] = sum
[1] = sum
[2] = sum
[3] = 0;
153 lt
= left_ch
[i
] - 2*left_ch
[i
-1] + left_ch
[i
-2];
154 rt
= right_ch
[i
] - 2*right_ch
[i
-1] + right_ch
[i
-2];
155 sum
[2] += FFABS((lt
+ rt
) >> 1);
156 sum
[3] += FFABS(lt
- rt
);
161 /* calculate score for each mode */
162 score
[0] = sum
[0] + sum
[1];
163 score
[1] = sum
[0] + sum
[3];
164 score
[2] = sum
[1] + sum
[3];
165 score
[3] = sum
[2] + sum
[3];
167 /* return mode with lowest score */
170 if(score
[i
] < score
[best
]) {
177 static void alac_stereo_decorrelation(AlacEncodeContext
*s
)
179 int32_t *left
= s
->sample_buf
[0], *right
= s
->sample_buf
[1];
180 int i
, mode
, n
= s
->avctx
->frame_size
;
183 mode
= estimate_stereo_mode(left
, right
, n
);
187 case ALAC_CHMODE_LEFT_RIGHT
:
188 s
->interlacing_leftweight
= 0;
189 s
->interlacing_shift
= 0;
192 case ALAC_CHMODE_LEFT_SIDE
:
194 right
[i
] = left
[i
] - right
[i
];
196 s
->interlacing_leftweight
= 1;
197 s
->interlacing_shift
= 0;
200 case ALAC_CHMODE_RIGHT_SIDE
:
203 right
[i
] = left
[i
] - right
[i
];
204 left
[i
] = tmp
+ (right
[i
] >> 31);
206 s
->interlacing_leftweight
= 1;
207 s
->interlacing_shift
= 31;
213 left
[i
] = (tmp
+ right
[i
]) >> 1;
214 right
[i
] = tmp
- right
[i
];
216 s
->interlacing_leftweight
= 1;
217 s
->interlacing_shift
= 1;
222 static void alac_linear_predictor(AlacEncodeContext
*s
, int ch
)
225 LPCContext lpc
= s
->lpc
[ch
];
227 if(lpc
.lpc_order
== 31) {
228 s
->predictor_buf
[0] = s
->sample_buf
[ch
][0];
230 for(i
=1; i
<s
->avctx
->frame_size
; i
++)
231 s
->predictor_buf
[i
] = s
->sample_buf
[ch
][i
] - s
->sample_buf
[ch
][i
-1];
236 // generalised linear predictor
238 if(lpc
.lpc_order
> 0) {
239 int32_t *samples
= s
->sample_buf
[ch
];
240 int32_t *residual
= s
->predictor_buf
;
242 // generate warm-up samples
243 residual
[0] = samples
[0];
244 for(i
=1;i
<=lpc
.lpc_order
;i
++)
245 residual
[i
] = samples
[i
] - samples
[i
-1];
247 // perform lpc on remaining samples
248 for(i
= lpc
.lpc_order
+ 1; i
< s
->avctx
->frame_size
; i
++) {
249 int sum
= 1 << (lpc
.lpc_quant
- 1), res_val
, j
;
251 for (j
= 0; j
< lpc
.lpc_order
; j
++) {
252 sum
+= (samples
[lpc
.lpc_order
-j
] - samples
[0]) *
256 sum
>>= lpc
.lpc_quant
;
258 residual
[i
] = sign_extend(samples
[lpc
.lpc_order
+1] - sum
,
259 s
->write_sample_size
);
260 res_val
= residual
[i
];
263 int index
= lpc
.lpc_order
- 1;
264 int neg
= (res_val
< 0);
266 while(index
>= 0 && (neg
? (res_val
< 0):(res_val
> 0))) {
267 int val
= samples
[0] - samples
[lpc
.lpc_order
- index
];
268 int sign
= (val
? FFSIGN(val
) : 0);
273 lpc
.lpc_coeff
[index
] -= sign
;
275 res_val
-= ((val
>> lpc
.lpc_quant
) *
276 (lpc
.lpc_order
- index
));
285 static void alac_entropy_coder(AlacEncodeContext
*s
)
287 unsigned int history
= s
->rc
.initial_history
;
288 int sign_modifier
= 0, i
, k
;
289 int32_t *samples
= s
->predictor_buf
;
291 for(i
=0;i
< s
->avctx
->frame_size
;) {
294 k
= av_log2((history
>> 9) + 3);
302 encode_scalar(s
, x
- sign_modifier
, k
, s
->write_sample_size
);
304 history
+= x
* s
->rc
.history_mult
305 - ((history
* s
->rc
.history_mult
) >> 9);
311 if((history
< 128) && (i
< s
->avctx
->frame_size
)) {
312 unsigned int block_size
= 0;
314 k
= 7 - av_log2(history
) + ((history
+ 16) >> 6);
316 while((*samples
== 0) && (i
< s
->avctx
->frame_size
)) {
321 encode_scalar(s
, block_size
, k
, 16);
323 sign_modifier
= (block_size
<= 0xFFFF);
331 static void write_compressed_frame(AlacEncodeContext
*s
)
335 if(s
->avctx
->channels
== 2)
336 alac_stereo_decorrelation(s
);
337 put_bits(&s
->pbctx
, 8, s
->interlacing_shift
);
338 put_bits(&s
->pbctx
, 8, s
->interlacing_leftweight
);
340 for(i
=0;i
<s
->avctx
->channels
;i
++) {
342 calc_predictor_params(s
, i
);
344 put_bits(&s
->pbctx
, 4, 0); // prediction type : currently only type 0 has been RE'd
345 put_bits(&s
->pbctx
, 4, s
->lpc
[i
].lpc_quant
);
347 put_bits(&s
->pbctx
, 3, s
->rc
.rice_modifier
);
348 put_bits(&s
->pbctx
, 5, s
->lpc
[i
].lpc_order
);
349 // predictor coeff. table
350 for(j
=0;j
<s
->lpc
[i
].lpc_order
;j
++) {
351 put_sbits(&s
->pbctx
, 16, s
->lpc
[i
].lpc_coeff
[j
]);
355 // apply lpc and entropy coding to audio samples
357 for(i
=0;i
<s
->avctx
->channels
;i
++) {
358 alac_linear_predictor(s
, i
);
359 alac_entropy_coder(s
);
363 static av_cold
int alac_encode_init(AVCodecContext
*avctx
)
365 AlacEncodeContext
*s
= avctx
->priv_data
;
366 uint8_t *alac_extradata
= av_mallocz(ALAC_EXTRADATA_SIZE
+1);
368 avctx
->frame_size
= DEFAULT_FRAME_SIZE
;
369 avctx
->bits_per_coded_sample
= DEFAULT_SAMPLE_SIZE
;
371 if(avctx
->sample_fmt
!= SAMPLE_FMT_S16
) {
372 av_log(avctx
, AV_LOG_ERROR
, "only pcm_s16 input samples are supported\n");
376 // Set default compression level
377 if(avctx
->compression_level
== FF_COMPRESSION_DEFAULT
)
378 s
->compression_level
= 1;
380 s
->compression_level
= av_clip(avctx
->compression_level
, 0, 1);
382 // Initialize default Rice parameters
383 s
->rc
.history_mult
= 40;
384 s
->rc
.initial_history
= 10;
385 s
->rc
.k_modifier
= 14;
386 s
->rc
.rice_modifier
= 4;
388 s
->max_coded_frame_size
= (ALAC_FRAME_HEADER_SIZE
+ ALAC_FRAME_FOOTER_SIZE
+
389 avctx
->frame_size
*avctx
->channels
*avctx
->bits_per_coded_sample
)>>3;
391 s
->write_sample_size
= avctx
->bits_per_coded_sample
+ avctx
->channels
- 1; // FIXME: consider wasted_bytes
393 AV_WB32(alac_extradata
, ALAC_EXTRADATA_SIZE
);
394 AV_WB32(alac_extradata
+4, MKBETAG('a','l','a','c'));
395 AV_WB32(alac_extradata
+12, avctx
->frame_size
);
396 AV_WB8 (alac_extradata
+17, avctx
->bits_per_coded_sample
);
397 AV_WB8 (alac_extradata
+21, avctx
->channels
);
398 AV_WB32(alac_extradata
+24, s
->max_coded_frame_size
);
399 AV_WB32(alac_extradata
+28, avctx
->sample_rate
*avctx
->channels
*avctx
->bits_per_coded_sample
); // average bitrate
400 AV_WB32(alac_extradata
+32, avctx
->sample_rate
);
402 // Set relevant extradata fields
403 if(s
->compression_level
> 0) {
404 AV_WB8(alac_extradata
+18, s
->rc
.history_mult
);
405 AV_WB8(alac_extradata
+19, s
->rc
.initial_history
);
406 AV_WB8(alac_extradata
+20, s
->rc
.k_modifier
);
409 s
->min_prediction_order
= DEFAULT_MIN_PRED_ORDER
;
410 if(avctx
->min_prediction_order
>= 0) {
411 if(avctx
->min_prediction_order
< MIN_LPC_ORDER
||
412 avctx
->min_prediction_order
> ALAC_MAX_LPC_ORDER
) {
413 av_log(avctx
, AV_LOG_ERROR
, "invalid min prediction order: %d\n", avctx
->min_prediction_order
);
417 s
->min_prediction_order
= avctx
->min_prediction_order
;
420 s
->max_prediction_order
= DEFAULT_MAX_PRED_ORDER
;
421 if(avctx
->max_prediction_order
>= 0) {
422 if(avctx
->max_prediction_order
< MIN_LPC_ORDER
||
423 avctx
->max_prediction_order
> ALAC_MAX_LPC_ORDER
) {
424 av_log(avctx
, AV_LOG_ERROR
, "invalid max prediction order: %d\n", avctx
->max_prediction_order
);
428 s
->max_prediction_order
= avctx
->max_prediction_order
;
431 if(s
->max_prediction_order
< s
->min_prediction_order
) {
432 av_log(avctx
, AV_LOG_ERROR
, "invalid prediction orders: min=%d max=%d\n",
433 s
->min_prediction_order
, s
->max_prediction_order
);
437 avctx
->extradata
= alac_extradata
;
438 avctx
->extradata_size
= ALAC_EXTRADATA_SIZE
;
440 avctx
->coded_frame
= avcodec_alloc_frame();
441 avctx
->coded_frame
->key_frame
= 1;
444 dsputil_init(&s
->dspctx
, avctx
);
449 static int alac_encode_frame(AVCodecContext
*avctx
, uint8_t *frame
,
450 int buf_size
, void *data
)
452 AlacEncodeContext
*s
= avctx
->priv_data
;
453 PutBitContext
*pb
= &s
->pbctx
;
454 int i
, out_bytes
, verbatim_flag
= 0;
456 if(avctx
->frame_size
> DEFAULT_FRAME_SIZE
) {
457 av_log(avctx
, AV_LOG_ERROR
, "input frame size exceeded\n");
461 if(buf_size
< 2*s
->max_coded_frame_size
) {
462 av_log(avctx
, AV_LOG_ERROR
, "buffer size is too small\n");
467 init_put_bits(pb
, frame
, buf_size
);
469 if((s
->compression_level
== 0) || verbatim_flag
) {
471 int16_t *samples
= data
;
472 write_frame_header(s
, 1);
473 for(i
=0; i
<avctx
->frame_size
*avctx
->channels
; i
++) {
474 put_sbits(pb
, 16, *samples
++);
477 init_sample_buffers(s
, data
);
478 write_frame_header(s
, 0);
479 write_compressed_frame(s
);
484 out_bytes
= put_bits_count(pb
) >> 3;
486 if(out_bytes
> s
->max_coded_frame_size
) {
487 /* frame too large. use verbatim mode */
488 if(verbatim_flag
|| (s
->compression_level
== 0)) {
489 /* still too large. must be an error. */
490 av_log(avctx
, AV_LOG_ERROR
, "error encoding frame\n");
500 static av_cold
int alac_encode_close(AVCodecContext
*avctx
)
502 av_freep(&avctx
->extradata
);
503 avctx
->extradata_size
= 0;
504 av_freep(&avctx
->coded_frame
);
508 AVCodec alac_encoder
= {
512 sizeof(AlacEncodeContext
),
516 .capabilities
= CODEC_CAP_SMALL_LAST_FRAME
,
517 .long_name
= NULL_IF_CONFIG_SMALL("ALAC (Apple Lossless Audio Codec)"),