avformat/mpeg: demux ivtv captions
[ffmpeg.git] / libavcodec / flacenc.c
blob3a9578f5cd61e14d8f77bf7aa18729497d202184
1 /*
2 * FLAC audio encoder
3 * Copyright (c) 2006 Justin Ruggles <justin.ruggles@gmail.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/avassert.h"
23 #include "libavutil/channel_layout.h"
24 #include "libavutil/crc.h"
25 #include "libavutil/intmath.h"
26 #include "libavutil/md5.h"
27 #include "libavutil/mem.h"
28 #include "libavutil/opt.h"
30 #include "avcodec.h"
31 #include "bswapdsp.h"
32 #include "codec_internal.h"
33 #include "encode.h"
34 #include "put_bits.h"
35 #include "lpc.h"
36 #include "flac.h"
37 #include "flacdata.h"
38 #include "flacencdsp.h"
40 #define FLAC_SUBFRAME_CONSTANT 0
41 #define FLAC_SUBFRAME_VERBATIM 1
42 #define FLAC_SUBFRAME_FIXED 8
43 #define FLAC_SUBFRAME_LPC 32
45 #define MAX_FIXED_ORDER 4
46 #define MAX_PARTITION_ORDER 8
47 #define MAX_PARTITIONS (1 << MAX_PARTITION_ORDER)
48 #define MAX_LPC_PRECISION 15
49 #define MIN_LPC_SHIFT 0
50 #define MAX_LPC_SHIFT 15
52 enum CodingMode {
53 CODING_MODE_RICE = 4,
54 CODING_MODE_RICE2 = 5,
57 typedef struct CompressionOptions {
58 int compression_level;
59 int block_time_ms;
60 enum FFLPCType lpc_type;
61 int lpc_passes;
62 int lpc_coeff_precision;
63 int min_prediction_order;
64 int max_prediction_order;
65 int prediction_order_method;
66 int min_partition_order;
67 int max_partition_order;
68 int ch_mode;
69 int exact_rice_parameters;
70 int multi_dim_quant;
71 } CompressionOptions;
73 typedef struct RiceContext {
74 enum CodingMode coding_mode;
75 int porder;
76 int params[MAX_PARTITIONS];
77 } RiceContext;
79 typedef struct FlacSubframe {
80 int type;
81 int type_code;
82 int obits;
83 int wasted;
84 int order;
85 int32_t coefs[MAX_LPC_ORDER];
86 int shift;
88 RiceContext rc;
89 uint32_t rc_udata[FLAC_MAX_BLOCKSIZE];
90 uint64_t rc_sums[32][MAX_PARTITIONS];
92 int32_t samples[FLAC_MAX_BLOCKSIZE];
93 int32_t residual[FLAC_MAX_BLOCKSIZE+11];
94 } FlacSubframe;
96 typedef struct FlacFrame {
97 FlacSubframe subframes[FLAC_MAX_CHANNELS];
98 int64_t samples_33bps[FLAC_MAX_BLOCKSIZE];
99 int blocksize;
100 int bs_code[2];
101 uint8_t crc8;
102 int ch_mode;
103 int verbatim_only;
104 } FlacFrame;
106 typedef struct FlacEncodeContext {
107 AVClass *class;
108 PutBitContext pb;
109 int channels;
110 int samplerate;
111 int sr_code[2];
112 int bps_code;
113 int max_blocksize;
114 int min_framesize;
115 int max_framesize;
116 int max_encoded_framesize;
117 uint32_t frame_count;
118 uint64_t sample_count;
119 uint8_t md5sum[16];
120 FlacFrame frame;
121 CompressionOptions options;
122 AVCodecContext *avctx;
123 LPCContext lpc_ctx;
124 struct AVMD5 *md5ctx;
125 uint8_t *md5_buffer;
126 unsigned int md5_buffer_size;
127 BswapDSPContext bdsp;
128 FLACEncDSPContext flac_dsp;
130 int flushed;
131 int64_t next_pts;
132 } FlacEncodeContext;
136 * Write streaminfo metadata block to byte array.
138 static void write_streaminfo(FlacEncodeContext *s, uint8_t *header)
140 PutBitContext pb;
142 memset(header, 0, FLAC_STREAMINFO_SIZE);
143 init_put_bits(&pb, header, FLAC_STREAMINFO_SIZE);
145 /* streaminfo metadata block */
146 put_bits(&pb, 16, s->max_blocksize);
147 put_bits(&pb, 16, s->max_blocksize);
148 put_bits(&pb, 24, s->min_framesize);
149 put_bits(&pb, 24, s->max_framesize);
150 put_bits(&pb, 20, s->samplerate);
151 put_bits(&pb, 3, s->channels-1);
152 put_bits(&pb, 5, s->avctx->bits_per_raw_sample - 1);
153 /* write 36-bit sample count in 2 put_bits() calls */
154 put_bits(&pb, 24, (s->sample_count & 0xFFFFFF000LL) >> 12);
155 put_bits(&pb, 12, s->sample_count & 0x000000FFFLL);
156 flush_put_bits(&pb);
157 memcpy(&header[18], s->md5sum, 16);
162 * Calculate an estimate for the maximum frame size based on verbatim mode.
163 * @param blocksize block size, in samples
164 * @param ch number of channels
165 * @param bps bits-per-sample
167 static int flac_get_max_frame_size(int blocksize, int ch, int bps)
169 /* Technically, there is no limit to FLAC frame size, but an encoder
170 should not write a frame that is larger than if verbatim encoding mode
171 were to be used. */
173 int count;
175 count = 16; /* frame header */
176 count += ch * ((7+bps+7)/8); /* subframe headers */
177 if (ch == 2) {
178 /* for stereo, need to account for using decorrelation */
179 count += (( 2*bps+1) * blocksize + 7) / 8;
180 } else {
181 count += ( ch*bps * blocksize + 7) / 8;
183 count += 2; /* frame footer */
185 return count;
190 * Set blocksize based on samplerate.
191 * Choose the closest predefined blocksize >= BLOCK_TIME_MS milliseconds.
193 static int select_blocksize(int samplerate, int block_time_ms)
195 int i;
196 int target;
197 int blocksize;
199 av_assert0(samplerate > 0);
200 blocksize = ff_flac_blocksize_table[1];
201 target = (samplerate * block_time_ms) / 1000;
202 for (i = 0; i < 16; i++) {
203 if (target >= ff_flac_blocksize_table[i] &&
204 ff_flac_blocksize_table[i] > blocksize) {
205 blocksize = ff_flac_blocksize_table[i];
208 return blocksize;
212 static av_cold void dprint_compression_options(FlacEncodeContext *s)
214 AVCodecContext *avctx = s->avctx;
215 CompressionOptions *opt = &s->options;
217 av_log(avctx, AV_LOG_DEBUG, " compression: %d\n", opt->compression_level);
219 switch (opt->lpc_type) {
220 case FF_LPC_TYPE_NONE:
221 av_log(avctx, AV_LOG_DEBUG, " lpc type: None\n");
222 break;
223 case FF_LPC_TYPE_FIXED:
224 av_log(avctx, AV_LOG_DEBUG, " lpc type: Fixed pre-defined coefficients\n");
225 break;
226 case FF_LPC_TYPE_LEVINSON:
227 av_log(avctx, AV_LOG_DEBUG, " lpc type: Levinson-Durbin recursion with Welch window\n");
228 break;
229 case FF_LPC_TYPE_CHOLESKY:
230 av_log(avctx, AV_LOG_DEBUG, " lpc type: Cholesky factorization, %d pass%s\n",
231 opt->lpc_passes, opt->lpc_passes == 1 ? "" : "es");
232 break;
235 av_log(avctx, AV_LOG_DEBUG, " prediction order: %d, %d\n",
236 opt->min_prediction_order, opt->max_prediction_order);
238 switch (opt->prediction_order_method) {
239 case ORDER_METHOD_EST:
240 av_log(avctx, AV_LOG_DEBUG, " order method: %s\n", "estimate");
241 break;
242 case ORDER_METHOD_2LEVEL:
243 av_log(avctx, AV_LOG_DEBUG, " order method: %s\n", "2-level");
244 break;
245 case ORDER_METHOD_4LEVEL:
246 av_log(avctx, AV_LOG_DEBUG, " order method: %s\n", "4-level");
247 break;
248 case ORDER_METHOD_8LEVEL:
249 av_log(avctx, AV_LOG_DEBUG, " order method: %s\n", "8-level");
250 break;
251 case ORDER_METHOD_SEARCH:
252 av_log(avctx, AV_LOG_DEBUG, " order method: %s\n", "full search");
253 break;
254 case ORDER_METHOD_LOG:
255 av_log(avctx, AV_LOG_DEBUG, " order method: %s\n", "log search");
256 break;
260 av_log(avctx, AV_LOG_DEBUG, " partition order: %d, %d\n",
261 opt->min_partition_order, opt->max_partition_order);
263 av_log(avctx, AV_LOG_DEBUG, " block size: %d\n", avctx->frame_size);
265 av_log(avctx, AV_LOG_DEBUG, " lpc precision: %d\n",
266 opt->lpc_coeff_precision);
270 static av_cold int flac_encode_init(AVCodecContext *avctx)
272 int freq = avctx->sample_rate;
273 int channels = avctx->ch_layout.nb_channels;
274 FlacEncodeContext *s = avctx->priv_data;
275 int i, level, ret;
276 uint8_t *streaminfo;
278 s->avctx = avctx;
280 switch (avctx->sample_fmt) {
281 case AV_SAMPLE_FMT_S16:
282 avctx->bits_per_raw_sample = 16;
283 s->bps_code = 4;
284 break;
285 case AV_SAMPLE_FMT_S32:
286 if (avctx->bits_per_raw_sample <= 24) {
287 if (avctx->bits_per_raw_sample < 24)
288 av_log(avctx, AV_LOG_WARNING, "encoding as 24 bits-per-sample\n");
289 avctx->bits_per_raw_sample = 24;
290 s->bps_code = 6;
291 } else if (avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
292 av_log(avctx, AV_LOG_WARNING,
293 "encoding as 24 bits-per-sample, more is considered "
294 "experimental. Add -strict experimental if you want "
295 "to encode more than 24 bits-per-sample\n");
296 avctx->bits_per_raw_sample = 24;
297 s->bps_code = 6;
298 } else {
299 avctx->bits_per_raw_sample = 32;
300 s->bps_code = 7;
302 break;
305 if (channels < 1 || channels > FLAC_MAX_CHANNELS) {
306 av_log(avctx, AV_LOG_ERROR, "%d channels not supported (max %d)\n",
307 channels, FLAC_MAX_CHANNELS);
308 return AVERROR(EINVAL);
310 s->channels = channels;
312 /* find samplerate in table */
313 if (freq < 1)
314 return AVERROR(EINVAL);
315 for (i = 1; i < 12; i++) {
316 if (freq == ff_flac_sample_rate_table[i]) {
317 s->samplerate = ff_flac_sample_rate_table[i];
318 s->sr_code[0] = i;
319 s->sr_code[1] = 0;
320 break;
323 /* if not in table, samplerate is non-standard */
324 if (i == 12) {
325 if (freq % 1000 == 0 && freq < 255000) {
326 s->sr_code[0] = 12;
327 s->sr_code[1] = freq / 1000;
328 } else if (freq % 10 == 0 && freq < 655350) {
329 s->sr_code[0] = 14;
330 s->sr_code[1] = freq / 10;
331 } else if (freq < 65535) {
332 s->sr_code[0] = 13;
333 s->sr_code[1] = freq;
334 } else if (freq < 1048576) {
335 s->sr_code[0] = 0;
336 s->sr_code[1] = 0;
337 } else {
338 av_log(avctx, AV_LOG_ERROR, "%d Hz not supported\n", freq);
339 return AVERROR(EINVAL);
341 s->samplerate = freq;
344 /* set compression option defaults based on avctx->compression_level */
345 if (avctx->compression_level < 0)
346 s->options.compression_level = 5;
347 else
348 s->options.compression_level = avctx->compression_level;
350 level = s->options.compression_level;
351 if (level > 12) {
352 av_log(avctx, AV_LOG_ERROR, "invalid compression level: %d\n",
353 s->options.compression_level);
354 return AVERROR(EINVAL);
357 s->options.block_time_ms = ((int[]){ 27, 27, 27,105,105,105,105,105,105,105,105,105,105})[level];
359 if (s->options.lpc_type == FF_LPC_TYPE_DEFAULT)
360 s->options.lpc_type = ((int[]){ FF_LPC_TYPE_FIXED, FF_LPC_TYPE_FIXED, FF_LPC_TYPE_FIXED,
361 FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON,
362 FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON,
363 FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON, FF_LPC_TYPE_LEVINSON,
364 FF_LPC_TYPE_LEVINSON})[level];
366 if (s->options.min_prediction_order < 0)
367 s->options.min_prediction_order = ((int[]){ 2, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1})[level];
368 if (s->options.max_prediction_order < 0)
369 s->options.max_prediction_order = ((int[]){ 3, 4, 4, 6, 8, 8, 8, 8, 12, 12, 12, 32, 32})[level];
371 if (s->options.prediction_order_method < 0)
372 s->options.prediction_order_method = ((int[]){ ORDER_METHOD_EST, ORDER_METHOD_EST, ORDER_METHOD_EST,
373 ORDER_METHOD_EST, ORDER_METHOD_EST, ORDER_METHOD_EST,
374 ORDER_METHOD_4LEVEL, ORDER_METHOD_LOG, ORDER_METHOD_4LEVEL,
375 ORDER_METHOD_LOG, ORDER_METHOD_SEARCH, ORDER_METHOD_LOG,
376 ORDER_METHOD_SEARCH})[level];
378 if (s->options.min_partition_order > s->options.max_partition_order) {
379 av_log(avctx, AV_LOG_ERROR, "invalid partition orders: min=%d max=%d\n",
380 s->options.min_partition_order, s->options.max_partition_order);
381 return AVERROR(EINVAL);
383 if (s->options.min_partition_order < 0)
384 s->options.min_partition_order = ((int[]){ 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})[level];
385 if (s->options.max_partition_order < 0)
386 s->options.max_partition_order = ((int[]){ 2, 2, 3, 3, 3, 8, 8, 8, 8, 8, 8, 8, 8})[level];
388 if (s->options.lpc_type == FF_LPC_TYPE_NONE) {
389 s->options.min_prediction_order = 0;
390 s->options.max_prediction_order = 0;
391 } else if (s->options.lpc_type == FF_LPC_TYPE_FIXED) {
392 if (s->options.min_prediction_order > MAX_FIXED_ORDER) {
393 av_log(avctx, AV_LOG_WARNING,
394 "invalid min prediction order %d, clamped to %d\n",
395 s->options.min_prediction_order, MAX_FIXED_ORDER);
396 s->options.min_prediction_order = MAX_FIXED_ORDER;
398 if (s->options.max_prediction_order > MAX_FIXED_ORDER) {
399 av_log(avctx, AV_LOG_WARNING,
400 "invalid max prediction order %d, clamped to %d\n",
401 s->options.max_prediction_order, MAX_FIXED_ORDER);
402 s->options.max_prediction_order = MAX_FIXED_ORDER;
406 if (s->options.max_prediction_order < s->options.min_prediction_order) {
407 av_log(avctx, AV_LOG_ERROR, "invalid prediction orders: min=%d max=%d\n",
408 s->options.min_prediction_order, s->options.max_prediction_order);
409 return AVERROR(EINVAL);
412 if (avctx->frame_size > 0) {
413 if (avctx->frame_size < FLAC_MIN_BLOCKSIZE ||
414 avctx->frame_size > FLAC_MAX_BLOCKSIZE) {
415 av_log(avctx, AV_LOG_ERROR, "invalid block size: %d\n",
416 avctx->frame_size);
417 return AVERROR(EINVAL);
419 } else {
420 s->avctx->frame_size = select_blocksize(s->samplerate, s->options.block_time_ms);
422 s->max_blocksize = s->avctx->frame_size;
424 /* set maximum encoded frame size in verbatim mode */
425 s->max_framesize = flac_get_max_frame_size(s->avctx->frame_size,
426 s->channels,
427 s->avctx->bits_per_raw_sample);
429 /* initialize MD5 context */
430 s->md5ctx = av_md5_alloc();
431 if (!s->md5ctx)
432 return AVERROR(ENOMEM);
433 av_md5_init(s->md5ctx);
435 streaminfo = av_malloc(FLAC_STREAMINFO_SIZE);
436 if (!streaminfo)
437 return AVERROR(ENOMEM);
438 write_streaminfo(s, streaminfo);
439 avctx->extradata = streaminfo;
440 avctx->extradata_size = FLAC_STREAMINFO_SIZE;
442 s->frame_count = 0;
443 s->min_framesize = s->max_framesize;
445 if ((channels == 3 &&
446 av_channel_layout_compare(&avctx->ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_SURROUND)) ||
447 (channels == 4 &&
448 av_channel_layout_compare(&avctx->ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_2_2) &&
449 av_channel_layout_compare(&avctx->ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_QUAD)) ||
450 (channels == 5 &&
451 av_channel_layout_compare(&avctx->ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_5POINT0) &&
452 av_channel_layout_compare(&avctx->ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_5POINT0_BACK)) ||
453 (channels == 6 &&
454 av_channel_layout_compare(&avctx->ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_5POINT1) &&
455 av_channel_layout_compare(&avctx->ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_5POINT1_BACK))) {
456 if (avctx->ch_layout.order != AV_CHANNEL_ORDER_UNSPEC) {
457 av_log(avctx, AV_LOG_ERROR, "Channel layout not supported by Flac, "
458 "output stream will have incorrect "
459 "channel layout.\n");
460 } else {
461 av_log(avctx, AV_LOG_WARNING, "No channel layout specified. The encoder "
462 "will use Flac channel layout for "
463 "%d channels.\n", channels);
467 ret = ff_lpc_init(&s->lpc_ctx, avctx->frame_size,
468 s->options.max_prediction_order, FF_LPC_TYPE_LEVINSON);
470 ff_bswapdsp_init(&s->bdsp);
471 ff_flacencdsp_init(&s->flac_dsp);
473 dprint_compression_options(s);
475 return ret;
479 static void init_frame(FlacEncodeContext *s, int nb_samples)
481 int i, ch;
482 FlacFrame *frame;
484 frame = &s->frame;
486 for (i = 0; i < 16; i++) {
487 if (nb_samples == ff_flac_blocksize_table[i]) {
488 frame->blocksize = ff_flac_blocksize_table[i];
489 frame->bs_code[0] = i;
490 frame->bs_code[1] = 0;
491 break;
494 if (i == 16) {
495 frame->blocksize = nb_samples;
496 if (frame->blocksize <= 256) {
497 frame->bs_code[0] = 6;
498 frame->bs_code[1] = frame->blocksize-1;
499 } else {
500 frame->bs_code[0] = 7;
501 frame->bs_code[1] = frame->blocksize-1;
505 for (ch = 0; ch < s->channels; ch++) {
506 FlacSubframe *sub = &frame->subframes[ch];
508 sub->wasted = 0;
509 sub->obits = s->avctx->bits_per_raw_sample;
511 if (sub->obits > 16)
512 sub->rc.coding_mode = CODING_MODE_RICE2;
513 else
514 sub->rc.coding_mode = CODING_MODE_RICE;
517 frame->verbatim_only = 0;
522 * Copy channel-interleaved input samples into separate subframes.
524 static void copy_samples(FlacEncodeContext *s, const void *samples)
526 int i, j, ch;
527 FlacFrame *frame;
529 #define COPY_SAMPLES(bits, shift0) do { \
530 const int ## bits ## _t *samples0 = samples; \
531 const int shift = shift0; \
532 frame = &s->frame; \
533 for (i = 0, j = 0; i < frame->blocksize; i++) \
534 for (ch = 0; ch < s->channels; ch++, j++) \
535 frame->subframes[ch].samples[i] = samples0[j] >> shift; \
536 } while (0)
538 if (s->avctx->sample_fmt == AV_SAMPLE_FMT_S16)
539 COPY_SAMPLES(16, 0);
540 else
541 COPY_SAMPLES(32, 32 - s->avctx->bits_per_raw_sample);
545 static uint64_t rice_count_exact(const int32_t *res, int n, int k)
547 int i;
548 uint64_t count = 0;
550 for (i = 0; i < n; i++) {
551 unsigned v = ((unsigned)(res[i]) << 1) ^ (res[i] >> 31);
552 count += (v >> k) + 1 + k;
554 return count;
558 static uint64_t subframe_count_exact(FlacEncodeContext *s, FlacSubframe *sub,
559 int pred_order)
561 int p, porder, psize;
562 int i, part_end;
563 uint64_t count = 0;
565 /* subframe header */
566 count += 8;
568 if (sub->wasted)
569 count += sub->wasted;
571 /* subframe */
572 if (sub->type == FLAC_SUBFRAME_CONSTANT) {
573 count += sub->obits;
574 } else if (sub->type == FLAC_SUBFRAME_VERBATIM) {
575 count += s->frame.blocksize * sub->obits;
576 } else {
577 /* warm-up samples */
578 count += pred_order * sub->obits;
580 /* LPC coefficients */
581 if (sub->type == FLAC_SUBFRAME_LPC)
582 count += 4 + 5 + pred_order * s->options.lpc_coeff_precision;
584 /* rice-encoded block */
585 count += 2;
587 /* partition order */
588 porder = sub->rc.porder;
589 psize = s->frame.blocksize >> porder;
590 count += 4;
592 /* residual */
593 i = pred_order;
594 part_end = psize;
595 for (p = 0; p < 1 << porder; p++) {
596 int k = sub->rc.params[p];
597 count += sub->rc.coding_mode;
598 count += rice_count_exact(&sub->residual[i], part_end - i, k);
599 i = part_end;
600 part_end = FFMIN(s->frame.blocksize, part_end + psize);
604 return count;
608 #define rice_encode_count(sum, n, k) (((n)*((k)+1))+((sum-(n>>1))>>(k)))
611 * Solve for d/dk(rice_encode_count) = n-((sum-(n>>1))>>(k+1)) = 0.
613 static int find_optimal_param(uint64_t sum, int n, int max_param)
615 int k;
616 uint64_t sum2;
618 if (sum <= n >> 1)
619 return 0;
620 sum2 = sum - (n >> 1);
621 k = av_log2(av_clipl_int32(sum2 / n));
622 return FFMIN(k, max_param);
625 static int find_optimal_param_exact(uint64_t sums[32][MAX_PARTITIONS], int i, int max_param)
627 int bestk = 0;
628 int64_t bestbits = INT64_MAX;
629 int k;
631 for (k = 0; k <= max_param; k++) {
632 int64_t bits = sums[k][i];
633 if (bits < bestbits) {
634 bestbits = bits;
635 bestk = k;
639 return bestk;
642 static uint64_t calc_optimal_rice_params(RiceContext *rc, int porder,
643 uint64_t sums[32][MAX_PARTITIONS],
644 int n, int pred_order, int max_param, int exact)
646 int i;
647 int k, cnt, part;
648 uint64_t all_bits;
650 part = (1 << porder);
651 all_bits = 4 * part;
653 cnt = (n >> porder) - pred_order;
654 for (i = 0; i < part; i++) {
655 if (exact) {
656 k = find_optimal_param_exact(sums, i, max_param);
657 all_bits += sums[k][i];
658 } else {
659 k = find_optimal_param(sums[0][i], cnt, max_param);
660 all_bits += rice_encode_count(sums[0][i], cnt, k);
662 rc->params[i] = k;
663 cnt = n >> porder;
666 rc->porder = porder;
668 return all_bits;
672 static void calc_sum_top(int pmax, int kmax, const uint32_t *data, int n, int pred_order,
673 uint64_t sums[32][MAX_PARTITIONS])
675 int i, k;
676 int parts;
677 const uint32_t *res, *res_end;
679 /* sums for highest level */
680 parts = (1 << pmax);
682 for (k = 0; k <= kmax; k++) {
683 res = &data[pred_order];
684 res_end = &data[n >> pmax];
685 for (i = 0; i < parts; i++) {
686 if (kmax) {
687 uint64_t sum = (1LL + k) * (res_end - res);
688 while (res < res_end)
689 sum += *(res++) >> k;
690 sums[k][i] = sum;
691 } else {
692 uint64_t sum = 0;
693 while (res < res_end)
694 sum += *(res++);
695 sums[k][i] = sum;
697 res_end += n >> pmax;
702 static void calc_sum_next(int level, uint64_t sums[32][MAX_PARTITIONS], int kmax)
704 int i, k;
705 int parts = (1 << level);
706 for (i = 0; i < parts; i++) {
707 for (k=0; k<=kmax; k++)
708 sums[k][i] = sums[k][2*i] + sums[k][2*i+1];
712 static uint64_t calc_rice_params(RiceContext *rc,
713 uint32_t udata[FLAC_MAX_BLOCKSIZE],
714 uint64_t sums[32][MAX_PARTITIONS],
715 int pmin, int pmax,
716 const int32_t *data, int n, int pred_order, int exact)
718 int i;
719 uint64_t bits[MAX_PARTITION_ORDER+1];
720 int opt_porder;
721 RiceContext tmp_rc;
722 int kmax = (1 << rc->coding_mode) - 2;
724 av_assert1(pmin >= 0 && pmin <= MAX_PARTITION_ORDER);
725 av_assert1(pmax >= 0 && pmax <= MAX_PARTITION_ORDER);
726 av_assert1(pmin <= pmax);
728 tmp_rc.coding_mode = rc->coding_mode;
730 for (i = pred_order; i < n; i++)
731 udata[i] = ((unsigned)(data[i]) << 1) ^ (data[i] >> 31);
733 calc_sum_top(pmax, exact ? kmax : 0, udata, n, pred_order, sums);
735 opt_porder = pmin;
736 bits[pmin] = UINT32_MAX;
737 for (i = pmax; ; ) {
738 bits[i] = calc_optimal_rice_params(&tmp_rc, i, sums, n, pred_order, kmax, exact);
739 if (bits[i] < bits[opt_porder] || pmax == pmin) {
740 opt_porder = i;
741 *rc = tmp_rc;
743 if (i == pmin)
744 break;
745 calc_sum_next(--i, sums, exact ? kmax : 0);
748 return bits[opt_porder];
752 static int get_max_p_order(int max_porder, int n, int order)
754 int porder = FFMIN(max_porder, av_log2(n^(n-1)));
755 if (order > 0)
756 porder = FFMIN(porder, av_log2(n/order));
757 return porder;
761 static uint64_t find_subframe_rice_params(FlacEncodeContext *s,
762 FlacSubframe *sub, int pred_order)
764 int pmin = get_max_p_order(s->options.min_partition_order,
765 s->frame.blocksize, pred_order);
766 int pmax = get_max_p_order(s->options.max_partition_order,
767 s->frame.blocksize, pred_order);
769 uint64_t bits = 8 + pred_order * sub->obits + 2 + sub->rc.coding_mode;
770 if (sub->type == FLAC_SUBFRAME_LPC)
771 bits += 4 + 5 + pred_order * s->options.lpc_coeff_precision;
772 bits += calc_rice_params(&sub->rc, sub->rc_udata, sub->rc_sums, pmin, pmax, sub->residual,
773 s->frame.blocksize, pred_order, s->options.exact_rice_parameters);
774 return bits;
778 static void encode_residual_fixed(int32_t *res, const int32_t *smp, int n,
779 int order)
781 int i;
783 for (i = 0; i < order; i++)
784 res[i] = smp[i];
786 if (order == 0) {
787 for (i = order; i < n; i++)
788 res[i] = smp[i];
789 } else if (order == 1) {
790 for (i = order; i < n; i++)
791 res[i] = smp[i] - smp[i-1];
792 } else if (order == 2) {
793 int a = smp[order-1] - smp[order-2];
794 for (i = order; i < n; i += 2) {
795 int b = smp[i ] - smp[i-1];
796 res[i] = b - a;
797 a = smp[i+1] - smp[i ];
798 res[i+1] = a - b;
800 } else if (order == 3) {
801 int a = smp[order-1] - smp[order-2];
802 int c = smp[order-1] - 2*smp[order-2] + smp[order-3];
803 for (i = order; i < n; i += 2) {
804 int b = smp[i ] - smp[i-1];
805 int d = b - a;
806 res[i] = d - c;
807 a = smp[i+1] - smp[i ];
808 c = a - b;
809 res[i+1] = c - d;
811 } else {
812 int a = smp[order-1] - smp[order-2];
813 int c = smp[order-1] - 2*smp[order-2] + smp[order-3];
814 int e = smp[order-1] - 3*smp[order-2] + 3*smp[order-3] - smp[order-4];
815 for (i = order; i < n; i += 2) {
816 int b = smp[i ] - smp[i-1];
817 int d = b - a;
818 int f = d - c;
819 res[i ] = f - e;
820 a = smp[i+1] - smp[i ];
821 c = a - b;
822 e = c - d;
823 res[i+1] = e - f;
829 /* These four functions check for every residual whether it can be
830 * contained in <INT32_MIN,INT32_MAX]. In case it doesn't, the
831 * function that called this function has to try something else.
832 * Each function is duplicated, once for int32_t input, once for
833 * int64_t input */
834 #define ENCODE_RESIDUAL_FIXED_WITH_RESIDUAL_LIMIT() \
836 for (int i = 0; i < order; i++) \
837 res[i] = smp[i]; \
838 if (order == 0) { \
839 for (int i = order; i < n; i++) { \
840 if (smp[i] == INT32_MIN) \
841 return 1; \
842 res[i] = smp[i]; \
844 } else if (order == 1) { \
845 for (int i = order; i < n; i++) { \
846 int64_t res64 = (int64_t)smp[i] - smp[i-1]; \
847 if (res64 <= INT32_MIN || res64 > INT32_MAX) \
848 return 1; \
849 res[i] = res64; \
851 } else if (order == 2) { \
852 for (int i = order; i < n; i++) { \
853 int64_t res64 = (int64_t)smp[i] - 2*(int64_t)smp[i-1] + smp[i-2]; \
854 if (res64 <= INT32_MIN || res64 > INT32_MAX) \
855 return 1; \
856 res[i] = res64; \
858 } else if (order == 3) { \
859 for (int i = order; i < n; i++) { \
860 int64_t res64 = (int64_t)smp[i] - 3*(int64_t)smp[i-1] + 3*(int64_t)smp[i-2] - smp[i-3]; \
861 if (res64 <= INT32_MIN || res64 > INT32_MAX) \
862 return 1; \
863 res[i] = res64; \
865 } else { \
866 for (int i = order; i < n; i++) { \
867 int64_t res64 = (int64_t)smp[i] - 4*(int64_t)smp[i-1] + 6*(int64_t)smp[i-2] - 4*(int64_t)smp[i-3] + smp[i-4]; \
868 if (res64 <= INT32_MIN || res64 > INT32_MAX) \
869 return 1; \
870 res[i] = res64; \
873 return 0; \
876 static int encode_residual_fixed_with_residual_limit(int32_t *res, const int32_t *smp,
877 int n, int order)
879 ENCODE_RESIDUAL_FIXED_WITH_RESIDUAL_LIMIT();
883 static int encode_residual_fixed_with_residual_limit_33bps(int32_t *res, const int64_t *smp,
884 int n, int order)
886 ENCODE_RESIDUAL_FIXED_WITH_RESIDUAL_LIMIT();
889 #define LPC_ENCODE_WITH_RESIDUAL_LIMIT() \
891 for (int i = 0; i < order; i++) \
892 res[i] = smp[i]; \
893 for (int i = order; i < len; i++) { \
894 int64_t p = 0, tmp; \
895 for (int j = 0; j < order; j++) \
896 p += (int64_t)coefs[j]*smp[(i-1)-j]; \
897 p >>= shift; \
898 tmp = smp[i] - p; \
899 if (tmp <= INT32_MIN || tmp > INT32_MAX) \
900 return 1; \
901 res[i] = tmp; \
903 return 0; \
906 static int lpc_encode_with_residual_limit(int32_t *res, const int32_t *smp, int len,
907 int order, int32_t *coefs, int shift)
909 LPC_ENCODE_WITH_RESIDUAL_LIMIT();
912 static int lpc_encode_with_residual_limit_33bps(int32_t *res, const int64_t *smp, int len,
913 int order, int32_t *coefs, int shift)
915 LPC_ENCODE_WITH_RESIDUAL_LIMIT();
918 static int lpc_encode_choose_datapath(FlacEncodeContext *s, int32_t bps,
919 int32_t *res, const int32_t *smp,
920 const int64_t *smp_33bps, int len,
921 int order, int32_t *coefs, int shift)
923 uint64_t max_residual_value = 0;
924 int64_t max_sample_value = ((int64_t)(1) << (bps-1));
925 /* This calculates the max size of any residual with the current
926 * predictor, so we know whether we need to check the residual */
927 for (int i = 0; i < order; i++)
928 max_residual_value += FFABS(max_sample_value * coefs[i]);
929 max_residual_value >>= shift;
930 max_residual_value += max_sample_value;
931 if (bps > 32) {
932 if (lpc_encode_with_residual_limit_33bps(res, smp_33bps, len, order, coefs, shift))
933 return 1;
934 } else if (max_residual_value > INT32_MAX) {
935 if (lpc_encode_with_residual_limit(res, smp, len, order, coefs, shift))
936 return 1;
937 } else if (bps + s->options.lpc_coeff_precision + av_log2(order) <= 32) {
938 s->flac_dsp.lpc16_encode(res, smp, len, order, coefs, shift);
939 } else {
940 s->flac_dsp.lpc32_encode(res, smp, len, order, coefs, shift);
942 return 0;
945 #define DEFAULT_TO_VERBATIM() \
947 sub->type = sub->type_code = FLAC_SUBFRAME_VERBATIM; \
948 if (sub->obits <= 32) \
949 memcpy(res, smp, n * sizeof(int32_t)); \
950 return subframe_count_exact(s, sub, 0); \
953 static int encode_residual_ch(FlacEncodeContext *s, int ch)
955 int i, n;
956 int min_order, max_order, opt_order, omethod;
957 FlacFrame *frame;
958 FlacSubframe *sub;
959 int32_t coefs[MAX_LPC_ORDER][MAX_LPC_ORDER];
960 int shift[MAX_LPC_ORDER];
961 int32_t *res, *smp;
962 int64_t *smp_33bps;
964 frame = &s->frame;
965 sub = &frame->subframes[ch];
966 res = sub->residual;
967 smp = sub->samples;
968 smp_33bps = frame->samples_33bps;
969 n = frame->blocksize;
971 /* CONSTANT */
972 if (sub->obits > 32) {
973 for (i = 1; i < n; i++)
974 if(smp_33bps[i] != smp_33bps[0])
975 break;
976 if (i == n) {
977 sub->type = sub->type_code = FLAC_SUBFRAME_CONSTANT;
978 return subframe_count_exact(s, sub, 0);
980 } else {
981 for (i = 1; i < n; i++)
982 if(smp[i] != smp[0])
983 break;
984 if (i == n) {
985 sub->type = sub->type_code = FLAC_SUBFRAME_CONSTANT;
986 res[0] = smp[0];
987 return subframe_count_exact(s, sub, 0);
991 /* VERBATIM */
992 if (frame->verbatim_only || n < 5) {
993 DEFAULT_TO_VERBATIM();
996 min_order = s->options.min_prediction_order;
997 max_order = s->options.max_prediction_order;
998 omethod = s->options.prediction_order_method;
1000 /* FIXED */
1001 sub->type = FLAC_SUBFRAME_FIXED;
1002 if (s->options.lpc_type == FF_LPC_TYPE_NONE ||
1003 s->options.lpc_type == FF_LPC_TYPE_FIXED || n <= max_order) {
1004 uint64_t bits[MAX_FIXED_ORDER+1];
1005 if (max_order > MAX_FIXED_ORDER)
1006 max_order = MAX_FIXED_ORDER;
1007 opt_order = 0;
1008 bits[0] = UINT32_MAX;
1009 for (i = min_order; i <= max_order; i++) {
1010 if (sub->obits == 33) {
1011 if (encode_residual_fixed_with_residual_limit_33bps(res, smp_33bps, n, i))
1012 continue;
1013 } else if (sub->obits + i >= 32) {
1014 if (encode_residual_fixed_with_residual_limit(res, smp, n, i))
1015 continue;
1016 } else
1017 encode_residual_fixed(res, smp, n, i);
1018 bits[i] = find_subframe_rice_params(s, sub, i);
1019 if (bits[i] < bits[opt_order])
1020 opt_order = i;
1022 if (opt_order == 0 && bits[0] == UINT32_MAX) {
1023 /* No predictor found with residuals within <INT32_MIN,INT32_MAX],
1024 * so encode a verbatim subframe instead */
1025 DEFAULT_TO_VERBATIM();
1027 sub->order = opt_order;
1028 sub->type_code = sub->type | sub->order;
1029 if (sub->order != max_order) {
1030 if (sub->obits == 33)
1031 encode_residual_fixed_with_residual_limit_33bps(res, smp_33bps, n, sub->order);
1032 else if (sub->obits + i >= 32)
1033 encode_residual_fixed_with_residual_limit(res, smp, n, sub->order);
1034 else
1035 encode_residual_fixed(res, smp, n, sub->order);
1036 find_subframe_rice_params(s, sub, sub->order);
1038 return subframe_count_exact(s, sub, sub->order);
1041 /* LPC */
1042 sub->type = FLAC_SUBFRAME_LPC;
1043 if (sub->obits == 33)
1044 /* As ff_lpc_calc_coefs is shared with other codecs and the LSB
1045 * probably isn't predictable anyway, throw away LSB for analysis
1046 * so it fits 32 bit int and existing function can be used
1047 * unmodified */
1048 for (i = 0; i < n; i++)
1049 smp[i] = smp_33bps[i] >> 1;
1051 opt_order = ff_lpc_calc_coefs(&s->lpc_ctx, smp, n, min_order, max_order,
1052 s->options.lpc_coeff_precision, coefs, shift, s->options.lpc_type,
1053 s->options.lpc_passes, omethod,
1054 MIN_LPC_SHIFT, MAX_LPC_SHIFT, 0);
1056 if (omethod == ORDER_METHOD_2LEVEL ||
1057 omethod == ORDER_METHOD_4LEVEL ||
1058 omethod == ORDER_METHOD_8LEVEL) {
1059 int levels = 1 << omethod;
1060 uint64_t bits[1 << ORDER_METHOD_8LEVEL];
1061 int order = -1;
1062 int opt_index = levels-1;
1063 opt_order = max_order-1;
1064 bits[opt_index] = UINT32_MAX;
1065 for (i = levels-1; i >= 0; i--) {
1066 int last_order = order;
1067 order = min_order + (((max_order-min_order+1) * (i+1)) / levels)-1;
1068 order = av_clip(order, min_order - 1, max_order - 1);
1069 if (order == last_order)
1070 continue;
1071 if(lpc_encode_choose_datapath(s, sub->obits, res, smp, smp_33bps, n, order+1, coefs[order], shift[order]))
1072 continue;
1073 bits[i] = find_subframe_rice_params(s, sub, order+1);
1074 if (bits[i] < bits[opt_index]) {
1075 opt_index = i;
1076 opt_order = order;
1079 opt_order++;
1080 } else if (omethod == ORDER_METHOD_SEARCH) {
1081 // brute-force optimal order search
1082 uint64_t bits[MAX_LPC_ORDER];
1083 opt_order = 0;
1084 bits[0] = UINT32_MAX;
1085 for (i = min_order-1; i < max_order; i++) {
1086 if(lpc_encode_choose_datapath(s, sub->obits, res, smp, smp_33bps, n, i+1, coefs[i], shift[i]))
1087 continue;
1088 bits[i] = find_subframe_rice_params(s, sub, i+1);
1089 if (bits[i] < bits[opt_order])
1090 opt_order = i;
1092 opt_order++;
1093 } else if (omethod == ORDER_METHOD_LOG) {
1094 uint64_t bits[MAX_LPC_ORDER];
1095 int step;
1097 opt_order = min_order - 1 + (max_order-min_order)/3;
1098 memset(bits, -1, sizeof(bits));
1100 for (step = 16; step; step >>= 1) {
1101 int last = opt_order;
1102 for (i = last-step; i <= last+step; i += step) {
1103 if (i < min_order-1 || i >= max_order || bits[i] < UINT32_MAX)
1104 continue;
1105 if(lpc_encode_choose_datapath(s, sub->obits, res, smp, smp_33bps, n, i+1, coefs[i], shift[i]))
1106 continue;
1107 bits[i] = find_subframe_rice_params(s, sub, i+1);
1108 if (bits[i] < bits[opt_order])
1109 opt_order = i;
1112 opt_order++;
1115 if (s->options.multi_dim_quant) {
1116 int allsteps = 1;
1117 int i, step, improved;
1118 int64_t best_score = INT64_MAX;
1119 int32_t qmax;
1121 qmax = (1 << (s->options.lpc_coeff_precision - 1)) - 1;
1123 for (i=0; i<opt_order; i++)
1124 allsteps *= 3;
1126 do {
1127 improved = 0;
1128 for (step = 0; step < allsteps; step++) {
1129 int tmp = step;
1130 int32_t lpc_try[MAX_LPC_ORDER];
1131 int64_t score = 0;
1132 int diffsum = 0;
1134 for (i=0; i<opt_order; i++) {
1135 int diff = ((tmp + 1) % 3) - 1;
1136 lpc_try[i] = av_clip(coefs[opt_order - 1][i] + diff, -qmax, qmax);
1137 tmp /= 3;
1138 diffsum += !!diff;
1140 if (diffsum >8)
1141 continue;
1143 if(lpc_encode_choose_datapath(s, sub->obits, res, smp, smp_33bps, n, opt_order, lpc_try, shift[opt_order-1]))
1144 continue;
1145 score = find_subframe_rice_params(s, sub, opt_order);
1146 if (score < best_score) {
1147 best_score = score;
1148 memcpy(coefs[opt_order-1], lpc_try, sizeof(*coefs));
1149 improved=1;
1152 } while(improved);
1155 sub->order = opt_order;
1156 sub->type_code = sub->type | (sub->order-1);
1157 sub->shift = shift[sub->order-1];
1158 for (i = 0; i < sub->order; i++)
1159 sub->coefs[i] = coefs[sub->order-1][i];
1161 if(lpc_encode_choose_datapath(s, sub->obits, res, smp, smp_33bps, n, sub->order, sub->coefs, sub->shift)) {
1162 /* No predictor found with residuals within <INT32_MIN,INT32_MAX],
1163 * so encode a verbatim subframe instead */
1164 DEFAULT_TO_VERBATIM();
1167 find_subframe_rice_params(s, sub, sub->order);
1169 return subframe_count_exact(s, sub, sub->order);
1173 static int count_frame_header(FlacEncodeContext *s)
1175 uint8_t av_unused tmp;
1176 int count;
1179 <14> Sync code
1180 <1> Reserved
1181 <1> Blocking strategy
1182 <4> Block size in inter-channel samples
1183 <4> Sample rate
1184 <4> Channel assignment
1185 <3> Sample size in bits
1186 <1> Reserved
1188 count = 32;
1190 /* coded frame number */
1191 PUT_UTF8(s->frame_count, tmp, count += 8;)
1193 /* explicit block size */
1194 if (s->frame.bs_code[0] == 6)
1195 count += 8;
1196 else if (s->frame.bs_code[0] == 7)
1197 count += 16;
1199 /* explicit sample rate */
1200 count += ((s->sr_code[0] == 12) + (s->sr_code[0] > 12) * 2) * 8;
1202 /* frame header CRC-8 */
1203 count += 8;
1205 return count;
1209 static int encode_frame(FlacEncodeContext *s)
1211 int ch;
1212 uint64_t count;
1214 count = count_frame_header(s);
1216 for (ch = 0; ch < s->channels; ch++)
1217 count += encode_residual_ch(s, ch);
1219 count += (8 - (count & 7)) & 7; // byte alignment
1220 count += 16; // CRC-16
1222 count >>= 3;
1223 if (count > INT_MAX)
1224 return AVERROR_BUG;
1225 return count;
1229 static void remove_wasted_bits(FlacEncodeContext *s)
1231 int ch, i, wasted_bits;
1233 for (ch = 0; ch < s->channels; ch++) {
1234 FlacSubframe *sub = &s->frame.subframes[ch];
1236 if (sub->obits > 32) {
1237 int64_t v = 0;
1238 for (i = 0; i < s->frame.blocksize; i++) {
1239 v |= s->frame.samples_33bps[i];
1240 if (v & 1)
1241 break;
1244 if (!v || (v & 1))
1245 return;
1247 v = ff_ctzll(v);
1249 /* If any wasted bits are found, samples are moved
1250 * from frame.samples_33bps to frame.subframes[ch] */
1251 for (i = 0; i < s->frame.blocksize; i++)
1252 sub->samples[i] = s->frame.samples_33bps[i] >> v;
1253 wasted_bits = v;
1254 } else {
1255 int32_t v = 0;
1256 for (i = 0; i < s->frame.blocksize; i++) {
1257 v |= sub->samples[i];
1258 if (v & 1)
1259 break;
1262 if (!v || (v & 1))
1263 return;
1265 v = ff_ctz(v);
1267 for (i = 0; i < s->frame.blocksize; i++)
1268 sub->samples[i] >>= v;
1269 wasted_bits = v;
1272 sub->wasted = wasted_bits;
1273 sub->obits -= wasted_bits;
1275 /* for 24-bit, check if removing wasted bits makes the range better
1276 * suited for using RICE instead of RICE2 for entropy coding */
1277 if (sub->obits <= 17)
1278 sub->rc.coding_mode = CODING_MODE_RICE;
1283 static int estimate_stereo_mode(const int32_t *left_ch, const int32_t *right_ch, int n,
1284 int max_rice_param, int bps)
1286 int best;
1287 uint64_t sum[4];
1288 uint64_t score[4];
1289 int k;
1291 /* calculate sum of 2nd order residual for each channel */
1292 sum[0] = sum[1] = sum[2] = sum[3] = 0;
1293 if(bps < 30) {
1294 int32_t lt, rt;
1295 for (int i = 2; i < n; i++) {
1296 lt = left_ch[i] - 2*left_ch[i-1] + left_ch[i-2];
1297 rt = right_ch[i] - 2*right_ch[i-1] + right_ch[i-2];
1298 sum[2] += FFABS((lt + rt) >> 1);
1299 sum[3] += FFABS(lt - rt);
1300 sum[0] += FFABS(lt);
1301 sum[1] += FFABS(rt);
1303 } else {
1304 int64_t lt, rt;
1305 for (int i = 2; i < n; i++) {
1306 lt = (int64_t)left_ch[i] - 2*(int64_t)left_ch[i-1] + left_ch[i-2];
1307 rt = (int64_t)right_ch[i] - 2*(int64_t)right_ch[i-1] + right_ch[i-2];
1308 sum[2] += FFABS((lt + rt) >> 1);
1309 sum[3] += FFABS(lt - rt);
1310 sum[0] += FFABS(lt);
1311 sum[1] += FFABS(rt);
1314 /* estimate bit counts */
1315 for (int i = 0; i < 4; i++) {
1316 k = find_optimal_param(2 * sum[i], n, max_rice_param);
1317 sum[i] = rice_encode_count( 2 * sum[i], n, k);
1320 /* calculate score for each mode */
1321 score[0] = sum[0] + sum[1];
1322 score[1] = sum[0] + sum[3];
1323 score[2] = sum[1] + sum[3];
1324 score[3] = sum[2] + sum[3];
1326 /* return mode with lowest score */
1327 best = 0;
1328 for (int i = 1; i < 4; i++)
1329 if (score[i] < score[best])
1330 best = i;
1332 return best;
1337 * Perform stereo channel decorrelation.
1339 static void channel_decorrelation(FlacEncodeContext *s)
1341 FlacFrame *frame;
1342 int32_t *left, *right;
1343 int64_t *side_33bps;
1344 int n;
1346 frame = &s->frame;
1347 n = frame->blocksize;
1348 left = frame->subframes[0].samples;
1349 right = frame->subframes[1].samples;
1350 side_33bps = frame->samples_33bps;
1352 if (s->channels != 2) {
1353 frame->ch_mode = FLAC_CHMODE_INDEPENDENT;
1354 return;
1357 if (s->options.ch_mode < 0) {
1358 int max_rice_param = (1 << frame->subframes[0].rc.coding_mode) - 2;
1359 frame->ch_mode = estimate_stereo_mode(left, right, n, max_rice_param, s->avctx->bits_per_raw_sample);
1360 } else
1361 frame->ch_mode = s->options.ch_mode;
1363 /* perform decorrelation and adjust bits-per-sample */
1364 if (frame->ch_mode == FLAC_CHMODE_INDEPENDENT)
1365 return;
1366 if(s->avctx->bits_per_raw_sample == 32) {
1367 if (frame->ch_mode == FLAC_CHMODE_MID_SIDE) {
1368 int64_t tmp;
1369 for (int i = 0; i < n; i++) {
1370 tmp = left[i];
1371 left[i] = (tmp + right[i]) >> 1;
1372 side_33bps[i] = tmp - right[i];
1374 frame->subframes[1].obits++;
1375 } else if (frame->ch_mode == FLAC_CHMODE_LEFT_SIDE) {
1376 for (int i = 0; i < n; i++)
1377 side_33bps[i] = (int64_t)left[i] - right[i];
1378 frame->subframes[1].obits++;
1379 } else {
1380 for (int i = 0; i < n; i++)
1381 side_33bps[i] = (int64_t)left[i] - right[i];
1382 frame->subframes[0].obits++;
1384 } else {
1385 if (frame->ch_mode == FLAC_CHMODE_MID_SIDE) {
1386 int32_t tmp;
1387 for (int i = 0; i < n; i++) {
1388 tmp = left[i];
1389 left[i] = (tmp + right[i]) >> 1;
1390 right[i] = tmp - right[i];
1392 frame->subframes[1].obits++;
1393 } else if (frame->ch_mode == FLAC_CHMODE_LEFT_SIDE) {
1394 for (int i = 0; i < n; i++)
1395 right[i] = left[i] - right[i];
1396 frame->subframes[1].obits++;
1397 } else {
1398 for (int i = 0; i < n; i++)
1399 left[i] -= right[i];
1400 frame->subframes[0].obits++;
1406 static void write_utf8(PutBitContext *pb, uint32_t val)
1408 uint8_t tmp;
1409 PUT_UTF8(val, tmp, put_bits(pb, 8, tmp);)
1413 static void write_frame_header(FlacEncodeContext *s)
1415 FlacFrame *frame;
1416 int crc;
1418 frame = &s->frame;
1420 put_bits(&s->pb, 16, 0xFFF8);
1421 put_bits(&s->pb, 4, frame->bs_code[0]);
1422 put_bits(&s->pb, 4, s->sr_code[0]);
1424 if (frame->ch_mode == FLAC_CHMODE_INDEPENDENT)
1425 put_bits(&s->pb, 4, s->channels-1);
1426 else
1427 put_bits(&s->pb, 4, frame->ch_mode + FLAC_MAX_CHANNELS - 1);
1429 put_bits(&s->pb, 3, s->bps_code);
1430 put_bits(&s->pb, 1, 0);
1431 write_utf8(&s->pb, s->frame_count);
1433 if (frame->bs_code[0] == 6)
1434 put_bits(&s->pb, 8, frame->bs_code[1]);
1435 else if (frame->bs_code[0] == 7)
1436 put_bits(&s->pb, 16, frame->bs_code[1]);
1438 if (s->sr_code[0] == 12)
1439 put_bits(&s->pb, 8, s->sr_code[1]);
1440 else if (s->sr_code[0] > 12)
1441 put_bits(&s->pb, 16, s->sr_code[1]);
1443 flush_put_bits(&s->pb);
1444 crc = av_crc(av_crc_get_table(AV_CRC_8_ATM), 0, s->pb.buf,
1445 put_bytes_output(&s->pb));
1446 put_bits(&s->pb, 8, crc);
1450 static inline void set_sr_golomb_flac(PutBitContext *pb, int i, int k)
1452 unsigned v, e;
1454 v = ((unsigned)(i) << 1) ^ (i >> 31);
1456 e = (v >> k) + 1;
1457 while (e > 31) {
1458 put_bits(pb, 31, 0);
1459 e -= 31;
1461 put_bits(pb, e, 1);
1462 if (k) {
1463 unsigned mask = UINT32_MAX >> (32-k);
1464 put_bits(pb, k, v & mask);
1469 static void write_subframes(FlacEncodeContext *s)
1471 int ch;
1473 for (ch = 0; ch < s->channels; ch++) {
1474 FlacSubframe *sub = &s->frame.subframes[ch];
1475 int p, porder, psize;
1476 int32_t *part_end;
1477 int32_t *res = sub->residual;
1478 int32_t *frame_end = &sub->residual[s->frame.blocksize];
1480 /* subframe header */
1481 put_bits(&s->pb, 1, 0);
1482 put_bits(&s->pb, 6, sub->type_code);
1483 put_bits(&s->pb, 1, !!sub->wasted);
1484 if (sub->wasted)
1485 put_bits(&s->pb, sub->wasted, 1);
1487 /* subframe */
1488 if (sub->type == FLAC_SUBFRAME_CONSTANT) {
1489 if(sub->obits == 33)
1490 put_sbits63(&s->pb, 33, s->frame.samples_33bps[0]);
1491 else if(sub->obits == 32)
1492 put_bits32(&s->pb, res[0]);
1493 else
1494 put_sbits(&s->pb, sub->obits, res[0]);
1495 } else if (sub->type == FLAC_SUBFRAME_VERBATIM) {
1496 if (sub->obits == 33) {
1497 int64_t *res64 = s->frame.samples_33bps;
1498 int64_t *frame_end64 = &s->frame.samples_33bps[s->frame.blocksize];
1499 while (res64 < frame_end64)
1500 put_sbits63(&s->pb, 33, (*res64++));
1501 } else if (sub->obits == 32) {
1502 while (res < frame_end)
1503 put_bits32(&s->pb, *res++);
1504 } else {
1505 while (res < frame_end)
1506 put_sbits(&s->pb, sub->obits, *res++);
1508 } else {
1509 /* warm-up samples */
1510 if (sub->obits == 33) {
1511 for (int i = 0; i < sub->order; i++)
1512 put_sbits63(&s->pb, 33, s->frame.samples_33bps[i]);
1513 res += sub->order;
1514 } else if (sub->obits == 32) {
1515 for (int i = 0; i < sub->order; i++)
1516 put_bits32(&s->pb, *res++);
1517 } else {
1518 for (int i = 0; i < sub->order; i++)
1519 put_sbits(&s->pb, sub->obits, *res++);
1522 /* LPC coefficients */
1523 if (sub->type == FLAC_SUBFRAME_LPC) {
1524 int cbits = s->options.lpc_coeff_precision;
1525 put_bits( &s->pb, 4, cbits-1);
1526 put_sbits(&s->pb, 5, sub->shift);
1527 for (int i = 0; i < sub->order; i++)
1528 put_sbits(&s->pb, cbits, sub->coefs[i]);
1531 /* rice-encoded block */
1532 put_bits(&s->pb, 2, sub->rc.coding_mode - 4);
1534 /* partition order */
1535 porder = sub->rc.porder;
1536 psize = s->frame.blocksize >> porder;
1537 put_bits(&s->pb, 4, porder);
1539 /* residual */
1540 part_end = &sub->residual[psize];
1541 for (p = 0; p < 1 << porder; p++) {
1542 int k = sub->rc.params[p];
1543 put_bits(&s->pb, sub->rc.coding_mode, k);
1544 while (res < part_end)
1545 set_sr_golomb_flac(&s->pb, *res++, k);
1546 part_end = FFMIN(frame_end, part_end + psize);
1553 static void write_frame_footer(FlacEncodeContext *s)
1555 int crc;
1556 flush_put_bits(&s->pb);
1557 crc = av_bswap16(av_crc(av_crc_get_table(AV_CRC_16_ANSI), 0, s->pb.buf,
1558 put_bytes_output(&s->pb)));
1559 put_bits(&s->pb, 16, crc);
1560 flush_put_bits(&s->pb);
1564 static int write_frame(FlacEncodeContext *s, AVPacket *avpkt)
1566 init_put_bits(&s->pb, avpkt->data, avpkt->size);
1567 write_frame_header(s);
1568 write_subframes(s);
1569 write_frame_footer(s);
1570 return put_bytes_output(&s->pb);
1574 static int update_md5_sum(FlacEncodeContext *s, const void *samples)
1576 const uint8_t *buf;
1577 int buf_size = s->frame.blocksize * s->channels *
1578 ((s->avctx->bits_per_raw_sample + 7) / 8);
1580 if (s->avctx->bits_per_raw_sample > 16 || HAVE_BIGENDIAN) {
1581 av_fast_malloc(&s->md5_buffer, &s->md5_buffer_size, buf_size);
1582 if (!s->md5_buffer)
1583 return AVERROR(ENOMEM);
1586 if (s->avctx->bits_per_raw_sample <= 16) {
1587 buf = (const uint8_t *)samples;
1588 #if HAVE_BIGENDIAN
1589 s->bdsp.bswap16_buf((uint16_t *) s->md5_buffer,
1590 (const uint16_t *) samples, buf_size / 2);
1591 buf = s->md5_buffer;
1592 #endif
1593 } else if (s->avctx->bits_per_raw_sample <= 24) {
1594 int i;
1595 const int32_t *samples0 = samples;
1596 uint8_t *tmp = s->md5_buffer;
1598 for (i = 0; i < s->frame.blocksize * s->channels; i++) {
1599 int32_t v = samples0[i] >> 8;
1600 AV_WL24(tmp + 3*i, v);
1602 buf = s->md5_buffer;
1603 } else {
1604 /* s->avctx->bits_per_raw_sample <= 32 */
1605 int i;
1606 const int32_t *samples0 = samples;
1607 uint8_t *tmp = s->md5_buffer;
1609 for (i = 0; i < s->frame.blocksize * s->channels; i++)
1610 AV_WL32(tmp + 4*i, samples0[i]);
1611 buf = s->md5_buffer;
1613 av_md5_update(s->md5ctx, buf, buf_size);
1615 return 0;
1619 static int flac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
1620 const AVFrame *frame, int *got_packet_ptr)
1622 FlacEncodeContext *s;
1623 int frame_bytes, out_bytes, ret;
1625 s = avctx->priv_data;
1627 /* when the last block is reached, update the header in extradata */
1628 if (!frame) {
1629 s->max_framesize = s->max_encoded_framesize;
1630 av_md5_final(s->md5ctx, s->md5sum);
1631 write_streaminfo(s, avctx->extradata);
1633 if (!s->flushed) {
1634 uint8_t *side_data = av_packet_new_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
1635 avctx->extradata_size);
1636 if (!side_data)
1637 return AVERROR(ENOMEM);
1638 memcpy(side_data, avctx->extradata, avctx->extradata_size);
1640 avpkt->pts = s->next_pts;
1642 *got_packet_ptr = 1;
1643 s->flushed = 1;
1646 return 0;
1649 /* change max_framesize for small final frame */
1650 if (frame->nb_samples < s->frame.blocksize) {
1651 s->max_framesize = flac_get_max_frame_size(frame->nb_samples,
1652 s->channels,
1653 avctx->bits_per_raw_sample);
1656 init_frame(s, frame->nb_samples);
1658 copy_samples(s, frame->data[0]);
1660 channel_decorrelation(s);
1662 remove_wasted_bits(s);
1664 frame_bytes = encode_frame(s);
1666 /* Fall back on verbatim mode if the compressed frame is larger than it
1667 would be if encoded uncompressed. */
1668 if (frame_bytes < 0 || frame_bytes > s->max_framesize) {
1669 s->frame.verbatim_only = 1;
1670 frame_bytes = encode_frame(s);
1671 if (frame_bytes < 0) {
1672 av_log(avctx, AV_LOG_ERROR, "Bad frame count\n");
1673 return frame_bytes;
1677 if ((ret = ff_get_encode_buffer(avctx, avpkt, frame_bytes, 0)) < 0)
1678 return ret;
1680 out_bytes = write_frame(s, avpkt);
1682 s->frame_count++;
1683 s->sample_count += frame->nb_samples;
1684 if ((ret = update_md5_sum(s, frame->data[0])) < 0) {
1685 av_log(avctx, AV_LOG_ERROR, "Error updating MD5 checksum\n");
1686 return ret;
1688 if (out_bytes > s->max_encoded_framesize)
1689 s->max_encoded_framesize = out_bytes;
1690 if (out_bytes < s->min_framesize)
1691 s->min_framesize = out_bytes;
1693 s->next_pts = frame->pts + ff_samples_to_time_base(avctx, frame->nb_samples);
1695 av_shrink_packet(avpkt, out_bytes);
1697 *got_packet_ptr = 1;
1698 return 0;
1702 static av_cold int flac_encode_close(AVCodecContext *avctx)
1704 FlacEncodeContext *s = avctx->priv_data;
1706 av_freep(&s->md5ctx);
1707 av_freep(&s->md5_buffer);
1708 ff_lpc_end(&s->lpc_ctx);
1709 return 0;
1712 #define FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
1713 static const AVOption options[] = {
1714 { "lpc_coeff_precision", "LPC coefficient precision", offsetof(FlacEncodeContext, options.lpc_coeff_precision), AV_OPT_TYPE_INT, {.i64 = 15 }, 0, MAX_LPC_PRECISION, FLAGS },
1715 { "lpc_type", "LPC algorithm", offsetof(FlacEncodeContext, options.lpc_type), AV_OPT_TYPE_INT, {.i64 = FF_LPC_TYPE_DEFAULT }, FF_LPC_TYPE_DEFAULT, FF_LPC_TYPE_NB-1, FLAGS, .unit = "lpc_type" },
1716 { "none", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_LPC_TYPE_NONE }, INT_MIN, INT_MAX, FLAGS, .unit = "lpc_type" },
1717 { "fixed", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_LPC_TYPE_FIXED }, INT_MIN, INT_MAX, FLAGS, .unit = "lpc_type" },
1718 { "levinson", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_LPC_TYPE_LEVINSON }, INT_MIN, INT_MAX, FLAGS, .unit = "lpc_type" },
1719 { "cholesky", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_LPC_TYPE_CHOLESKY }, INT_MIN, INT_MAX, FLAGS, .unit = "lpc_type" },
1720 { "lpc_passes", "Number of passes to use for Cholesky factorization during LPC analysis", offsetof(FlacEncodeContext, options.lpc_passes), AV_OPT_TYPE_INT, {.i64 = 2 }, 1, INT_MAX, FLAGS },
1721 { "min_partition_order", NULL, offsetof(FlacEncodeContext, options.min_partition_order), AV_OPT_TYPE_INT, {.i64 = -1 }, -1, MAX_PARTITION_ORDER, FLAGS },
1722 { "max_partition_order", NULL, offsetof(FlacEncodeContext, options.max_partition_order), AV_OPT_TYPE_INT, {.i64 = -1 }, -1, MAX_PARTITION_ORDER, FLAGS },
1723 { "prediction_order_method", "Search method for selecting prediction order", offsetof(FlacEncodeContext, options.prediction_order_method), AV_OPT_TYPE_INT, {.i64 = -1 }, -1, ORDER_METHOD_LOG, FLAGS, .unit = "predm" },
1724 { "estimation", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = ORDER_METHOD_EST }, INT_MIN, INT_MAX, FLAGS, .unit = "predm" },
1725 { "2level", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = ORDER_METHOD_2LEVEL }, INT_MIN, INT_MAX, FLAGS, .unit = "predm" },
1726 { "4level", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = ORDER_METHOD_4LEVEL }, INT_MIN, INT_MAX, FLAGS, .unit = "predm" },
1727 { "8level", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = ORDER_METHOD_8LEVEL }, INT_MIN, INT_MAX, FLAGS, .unit = "predm" },
1728 { "search", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = ORDER_METHOD_SEARCH }, INT_MIN, INT_MAX, FLAGS, .unit = "predm" },
1729 { "log", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = ORDER_METHOD_LOG }, INT_MIN, INT_MAX, FLAGS, .unit = "predm" },
1730 { "ch_mode", "Stereo decorrelation mode", offsetof(FlacEncodeContext, options.ch_mode), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, FLAC_CHMODE_MID_SIDE, FLAGS, .unit = "ch_mode" },
1731 { "auto", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 }, INT_MIN, INT_MAX, FLAGS, .unit = "ch_mode" },
1732 { "indep", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FLAC_CHMODE_INDEPENDENT }, INT_MIN, INT_MAX, FLAGS, .unit = "ch_mode" },
1733 { "left_side", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FLAC_CHMODE_LEFT_SIDE }, INT_MIN, INT_MAX, FLAGS, .unit = "ch_mode" },
1734 { "right_side", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FLAC_CHMODE_RIGHT_SIDE }, INT_MIN, INT_MAX, FLAGS, .unit = "ch_mode" },
1735 { "mid_side", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = FLAC_CHMODE_MID_SIDE }, INT_MIN, INT_MAX, FLAGS, .unit = "ch_mode" },
1736 { "exact_rice_parameters", "Calculate rice parameters exactly", offsetof(FlacEncodeContext, options.exact_rice_parameters), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
1737 { "multi_dim_quant", "Multi-dimensional quantization", offsetof(FlacEncodeContext, options.multi_dim_quant), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, FLAGS },
1738 { "min_prediction_order", NULL, offsetof(FlacEncodeContext, options.min_prediction_order), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, MAX_LPC_ORDER, FLAGS },
1739 { "max_prediction_order", NULL, offsetof(FlacEncodeContext, options.max_prediction_order), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, MAX_LPC_ORDER, FLAGS },
1741 { NULL },
1744 static const AVClass flac_encoder_class = {
1745 .class_name = "FLAC encoder",
1746 .item_name = av_default_item_name,
1747 .option = options,
1748 .version = LIBAVUTIL_VERSION_INT,
1751 const FFCodec ff_flac_encoder = {
1752 .p.name = "flac",
1753 CODEC_LONG_NAME("FLAC (Free Lossless Audio Codec)"),
1754 .p.type = AVMEDIA_TYPE_AUDIO,
1755 .p.id = AV_CODEC_ID_FLAC,
1756 .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
1757 AV_CODEC_CAP_SMALL_LAST_FRAME |
1758 AV_CODEC_CAP_ENCODER_REORDERED_OPAQUE,
1759 .priv_data_size = sizeof(FlacEncodeContext),
1760 .init = flac_encode_init,
1761 FF_CODEC_ENCODE_CB(flac_encode_frame),
1762 .close = flac_encode_close,
1763 .p.sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
1764 AV_SAMPLE_FMT_S32,
1765 AV_SAMPLE_FMT_NONE },
1766 .p.priv_class = &flac_encoder_class,
1767 .caps_internal = FF_CODEC_CAP_INIT_CLEANUP | FF_CODEC_CAP_EOF_FLUSH,