3 * Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
5 * VC-3 encoder funded by the British Broadcasting Corporation
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #define RC_VARIANCE 1 // use variance or ssd for fast rc
29 #include "mpegvideo.h"
30 #include "dnxhddata.h"
42 int dct_quantize_c(MpegEncContext
*s
, DCTELEM
*block
, int n
, int qscale
, int *overflow
);
44 typedef struct DNXHDEncContext
{
45 MpegEncContext m
; ///< Used for quantization dsp functions
49 const CIDEntry
*cid_table
;
50 uint8_t *msip
; ///< Macroblock Scan Indices Payload
53 struct DNXHDEncContext
*thread
[MAX_THREADS
];
55 unsigned dct_y_offset
;
56 unsigned dct_uv_offset
;
60 DECLARE_ALIGNED_16(DCTELEM
, blocks
[8][64]);
62 int (*qmatrix_c
) [64];
63 int (*qmatrix_l
) [64];
64 uint16_t (*qmatrix_l16
)[2][64];
65 uint16_t (*qmatrix_c16
)[2][64];
70 uint16_t *table_vlc_codes
;
71 uint8_t *table_vlc_bits
;
72 uint16_t *table_run_codes
;
73 uint8_t *table_run_bits
;
86 RCEntry (*mb_rc
)[8160];
89 #define LAMBDA_FRAC_BITS 10
91 static int dnxhd_init_vlc(DNXHDEncContext
*ctx
)
95 CHECKED_ALLOCZ(ctx
->table_vlc_codes
, 449*2);
96 CHECKED_ALLOCZ(ctx
->table_vlc_bits
, 449);
97 CHECKED_ALLOCZ(ctx
->table_run_codes
, 63*2);
98 CHECKED_ALLOCZ(ctx
->table_run_bits
, 63);
100 for (i
= 0; i
< 257; i
++) {
101 int level
= ctx
->cid_table
->ac_level
[i
] +
102 (ctx
->cid_table
->ac_run_flag
[i
] << 7) + (ctx
->cid_table
->ac_index_flag
[i
] << 8);
104 if (ctx
->cid_table
->ac_level
[i
] == 64 && ctx
->cid_table
->ac_index_flag
[i
])
105 level
-= 64; // use 0+(1<<8) level
106 ctx
->table_vlc_codes
[level
] = ctx
->cid_table
->ac_codes
[i
];
107 ctx
->table_vlc_bits
[level
] = ctx
->cid_table
->ac_bits
[i
];
109 for (i
= 0; i
< 62; i
++) {
110 int run
= ctx
->cid_table
->run
[i
];
112 ctx
->table_run_codes
[run
] = ctx
->cid_table
->run_codes
[i
];
113 ctx
->table_run_bits
[run
] = ctx
->cid_table
->run_bits
[i
];
120 static int dnxhd_init_qmat(DNXHDEncContext
*ctx
, int lbias
, int cbias
)
122 // init first elem to 1 to avoid div by 0 in convert_matrix
123 uint16_t weight_matrix
[64] = {1,}; // convert_matrix needs uint16_t*
126 CHECKED_ALLOCZ(ctx
->qmatrix_l
, (ctx
->m
.avctx
->qmax
+1) * 64 * sizeof(int));
127 CHECKED_ALLOCZ(ctx
->qmatrix_c
, (ctx
->m
.avctx
->qmax
+1) * 64 * sizeof(int));
128 CHECKED_ALLOCZ(ctx
->qmatrix_l16
, (ctx
->m
.avctx
->qmax
+1) * 64 * 2 * sizeof(uint16_t));
129 CHECKED_ALLOCZ(ctx
->qmatrix_c16
, (ctx
->m
.avctx
->qmax
+1) * 64 * 2 * sizeof(uint16_t));
131 for (i
= 1; i
< 64; i
++) {
132 int j
= ctx
->m
.dsp
.idct_permutation
[ff_zigzag_direct
[i
]];
133 weight_matrix
[j
] = ctx
->cid_table
->luma_weight
[i
];
135 ff_convert_matrix(&ctx
->m
.dsp
, ctx
->qmatrix_l
, ctx
->qmatrix_l16
, weight_matrix
,
136 ctx
->m
.intra_quant_bias
, 1, ctx
->m
.avctx
->qmax
, 1);
137 for (i
= 1; i
< 64; i
++) {
138 int j
= ctx
->m
.dsp
.idct_permutation
[ff_zigzag_direct
[i
]];
139 weight_matrix
[j
] = ctx
->cid_table
->chroma_weight
[i
];
141 ff_convert_matrix(&ctx
->m
.dsp
, ctx
->qmatrix_c
, ctx
->qmatrix_c16
, weight_matrix
,
142 ctx
->m
.intra_quant_bias
, 1, ctx
->m
.avctx
->qmax
, 1);
143 for (qscale
= 1; qscale
<= ctx
->m
.avctx
->qmax
; qscale
++) {
144 for (i
= 0; i
< 64; i
++) {
145 ctx
->qmatrix_l
[qscale
] [i
] <<= 2; ctx
->qmatrix_c
[qscale
] [i
] <<= 2;
146 ctx
->qmatrix_l16
[qscale
][0][i
] <<= 2; ctx
->qmatrix_l16
[qscale
][1][i
] <<= 2;
147 ctx
->qmatrix_c16
[qscale
][0][i
] <<= 2; ctx
->qmatrix_c16
[qscale
][1][i
] <<= 2;
155 static int dnxhd_init_rc(DNXHDEncContext
*ctx
)
157 CHECKED_ALLOCZ(ctx
->mb_rc
, 8160*ctx
->m
.avctx
->qmax
*sizeof(RCEntry
));
158 if (ctx
->m
.avctx
->mb_decision
!= FF_MB_DECISION_RD
)
159 CHECKED_ALLOCZ(ctx
->mb_cmp
, ctx
->m
.mb_num
*sizeof(RCCMPEntry
));
161 ctx
->frame_bits
= (ctx
->cid_table
->coding_unit_size
- 640 - 4) * 8;
163 ctx
->lambda
= 2<<LAMBDA_FRAC_BITS
; // qscale 2
169 static int dnxhd_encode_init(AVCodecContext
*avctx
)
171 DNXHDEncContext
*ctx
= avctx
->priv_data
;
174 ctx
->cid
= ff_dnxhd_find_cid(avctx
);
175 if (!ctx
->cid
|| avctx
->pix_fmt
!= PIX_FMT_YUV422P
) {
176 av_log(avctx
, AV_LOG_ERROR
, "video parameters incompatible with DNxHD\n");
179 av_log(avctx
, AV_LOG_DEBUG
, "cid %d\n", ctx
->cid
);
181 index
= ff_dnxhd_get_cid_table(ctx
->cid
);
182 ctx
->cid_table
= &ff_dnxhd_cid_table
[index
];
184 ctx
->m
.avctx
= avctx
;
188 dsputil_init(&ctx
->m
.dsp
, avctx
);
189 ff_dct_common_init(&ctx
->m
);
190 if (!ctx
->m
.dct_quantize
)
191 ctx
->m
.dct_quantize
= dct_quantize_c
;
193 ctx
->m
.mb_height
= (avctx
->height
+ 15) / 16;
194 ctx
->m
.mb_width
= (avctx
->width
+ 15) / 16;
196 if (avctx
->flags
& CODEC_FLAG_INTERLACED_DCT
) {
198 ctx
->m
.mb_height
/= 2;
201 ctx
->m
.mb_num
= ctx
->m
.mb_height
* ctx
->m
.mb_width
;
203 if (avctx
->intra_quant_bias
!= FF_DEFAULT_QUANT_BIAS
)
204 ctx
->m
.intra_quant_bias
= avctx
->intra_quant_bias
;
205 if (dnxhd_init_qmat(ctx
, ctx
->m
.intra_quant_bias
, 0) < 0) // XXX tune lbias/cbias
208 if (dnxhd_init_vlc(ctx
) < 0)
210 if (dnxhd_init_rc(ctx
) < 0)
213 CHECKED_ALLOCZ(ctx
->slice_size
, ctx
->m
.mb_height
*sizeof(uint32_t));
214 CHECKED_ALLOCZ(ctx
->mb_bits
, ctx
->m
.mb_num
*sizeof(uint16_t));
215 CHECKED_ALLOCZ(ctx
->mb_qscale
, ctx
->m
.mb_num
*sizeof(uint8_t));
217 ctx
->frame
.key_frame
= 1;
218 ctx
->frame
.pict_type
= FF_I_TYPE
;
219 ctx
->m
.avctx
->coded_frame
= &ctx
->frame
;
221 if (avctx
->thread_count
> MAX_THREADS
|| (avctx
->thread_count
> ctx
->m
.mb_height
)) {
222 av_log(avctx
, AV_LOG_ERROR
, "too many threads\n");
226 ctx
->thread
[0] = ctx
;
227 for (i
= 1; i
< avctx
->thread_count
; i
++) {
228 ctx
->thread
[i
] = av_malloc(sizeof(DNXHDEncContext
));
229 memcpy(ctx
->thread
[i
], ctx
, sizeof(DNXHDEncContext
));
232 for (i
= 0; i
< avctx
->thread_count
; i
++) {
233 ctx
->thread
[i
]->m
.start_mb_y
= (ctx
->m
.mb_height
*(i
) + avctx
->thread_count
/2) / avctx
->thread_count
;
234 ctx
->thread
[i
]->m
.end_mb_y
= (ctx
->m
.mb_height
*(i
+1) + avctx
->thread_count
/2) / avctx
->thread_count
;
238 fail
: //for CHECKED_ALLOCZ
242 static int dnxhd_write_header(AVCodecContext
*avctx
, uint8_t *buf
)
244 DNXHDEncContext
*ctx
= avctx
->priv_data
;
245 const uint8_t header_prefix
[5] = { 0x00,0x00,0x02,0x80,0x01 };
247 memcpy(buf
, header_prefix
, 5);
248 buf
[5] = ctx
->interlaced
? ctx
->cur_field
+2 : 0x01;
249 buf
[6] = 0x80; // crc flag off
250 buf
[7] = 0xa0; // reserved
251 AV_WB16(buf
+ 0x18, avctx
->height
); // ALPF
252 AV_WB16(buf
+ 0x1a, avctx
->width
); // SPL
253 AV_WB16(buf
+ 0x1d, avctx
->height
); // NAL
255 buf
[0x21] = 0x38; // FIXME 8 bit per comp
256 buf
[0x22] = 0x88 + (ctx
->frame
.interlaced_frame
<<2);
257 AV_WB32(buf
+ 0x28, ctx
->cid
); // CID
258 buf
[0x2c] = ctx
->interlaced
? 0 : 0x80;
260 buf
[0x5f] = 0x01; // UDL
262 buf
[0x167] = 0x02; // reserved
263 AV_WB16(buf
+ 0x16a, ctx
->m
.mb_height
* 4 + 4); // MSIPS
264 buf
[0x16d] = ctx
->m
.mb_height
; // Ns
265 buf
[0x16f] = 0x10; // reserved
267 ctx
->msip
= buf
+ 0x170;
271 static av_always_inline
void dnxhd_encode_dc(DNXHDEncContext
*ctx
, int diff
)
275 nbits
= av_log2_16bit(-2*diff
);
278 nbits
= av_log2_16bit(2*diff
);
280 put_bits(&ctx
->m
.pb
, ctx
->cid_table
->dc_bits
[nbits
] + nbits
,
281 (ctx
->cid_table
->dc_codes
[nbits
]<<nbits
) + (diff
& ((1 << nbits
) - 1)));
284 static av_always_inline
void dnxhd_encode_block(DNXHDEncContext
*ctx
, DCTELEM
*block
, int last_index
, int n
)
286 int last_non_zero
= 0;
290 dnxhd_encode_dc(ctx
, block
[0] - ctx
->m
.last_dc
[n
]);
291 ctx
->m
.last_dc
[n
] = block
[0];
293 for (i
= 1; i
<= last_index
; i
++) {
294 j
= ctx
->m
.intra_scantable
.permutated
[i
];
297 int run_level
= i
- last_non_zero
- 1;
299 MASK_ABS(sign
, slevel
);
301 offset
= (slevel
-1) >> 6;
302 slevel
= 256 | (slevel
& 63); // level 64 is treated as 0
306 put_bits(&ctx
->m
.pb
, ctx
->table_vlc_bits
[slevel
]+1, (ctx
->table_vlc_codes
[slevel
]<<1)|(sign
&1));
308 put_bits(&ctx
->m
.pb
, 4, offset
);
312 put_bits(&ctx
->m
.pb
, ctx
->table_run_bits
[run_level
], ctx
->table_run_codes
[run_level
]);
316 put_bits(&ctx
->m
.pb
, ctx
->table_vlc_bits
[0], ctx
->table_vlc_codes
[0]); // EOB
319 static av_always_inline
void dnxhd_unquantize_c(DNXHDEncContext
*ctx
, DCTELEM
*block
, int n
, int qscale
, int last_index
)
321 const uint8_t *weight_matrix
;
325 weight_matrix
= (n
&2) ? ctx
->cid_table
->chroma_weight
: ctx
->cid_table
->luma_weight
;
327 for (i
= 1; i
<= last_index
; i
++) {
328 int j
= ctx
->m
.intra_scantable
.permutated
[i
];
332 level
= (1-2*level
) * qscale
* weight_matrix
[i
];
333 if (weight_matrix
[i
] != 32)
338 level
= (2*level
+1) * qscale
* weight_matrix
[i
];
339 if (weight_matrix
[i
] != 32)
348 static av_always_inline
int dnxhd_ssd_block(DCTELEM
*qblock
, DCTELEM
*block
)
352 for (i
= 0; i
< 64; i
++)
353 score
+= (block
[i
]-qblock
[i
])*(block
[i
]-qblock
[i
]);
357 static av_always_inline
int dnxhd_calc_ac_bits(DNXHDEncContext
*ctx
, DCTELEM
*block
, int last_index
)
359 int last_non_zero
= 0;
362 for (i
= 1; i
<= last_index
; i
++) {
363 j
= ctx
->m
.intra_scantable
.permutated
[i
];
366 int run_level
= i
- last_non_zero
- 1;
367 level
= FFABS(level
);
369 level
= 256 | (level
& 63); // level 64 is treated as 0
372 level
|= (!!run_level
)<<7;
373 bits
+= ctx
->table_vlc_bits
[level
]+1 + ctx
->table_run_bits
[run_level
];
380 static av_always_inline
void dnxhd_get_pixels_4x8(DCTELEM
*restrict block
, const uint8_t *pixels
, int line_size
)
383 for (i
= 0; i
< 4; i
++) {
384 block
[0] = pixels
[0];
385 block
[1] = pixels
[1];
386 block
[2] = pixels
[2];
387 block
[3] = pixels
[3];
388 block
[4] = pixels
[4];
389 block
[5] = pixels
[5];
390 block
[6] = pixels
[6];
391 block
[7] = pixels
[7];
395 memcpy(block
, block
- 8, sizeof(*block
)*8);
396 memcpy(block
+ 8, block
-16, sizeof(*block
)*8);
397 memcpy(block
+16, block
-24, sizeof(*block
)*8);
398 memcpy(block
+24, block
-32, sizeof(*block
)*8);
401 static av_always_inline
void dnxhd_get_blocks(DNXHDEncContext
*ctx
, int mb_x
, int mb_y
)
403 const uint8_t *ptr_y
= ctx
->thread
[0]->src
[0] + ((mb_y
<< 4) * ctx
->m
.linesize
) + (mb_x
<< 4);
404 const uint8_t *ptr_u
= ctx
->thread
[0]->src
[1] + ((mb_y
<< 4) * ctx
->m
.uvlinesize
) + (mb_x
<< 3);
405 const uint8_t *ptr_v
= ctx
->thread
[0]->src
[2] + ((mb_y
<< 4) * ctx
->m
.uvlinesize
) + (mb_x
<< 3);
406 DSPContext
*dsp
= &ctx
->m
.dsp
;
408 dsp
->get_pixels(ctx
->blocks
[0], ptr_y
, ctx
->m
.linesize
);
409 dsp
->get_pixels(ctx
->blocks
[1], ptr_y
+ 8, ctx
->m
.linesize
);
410 dsp
->get_pixels(ctx
->blocks
[2], ptr_u
, ctx
->m
.uvlinesize
);
411 dsp
->get_pixels(ctx
->blocks
[3], ptr_v
, ctx
->m
.uvlinesize
);
413 if (mb_y
+1 == ctx
->m
.mb_height
&& ctx
->m
.avctx
->height
== 1080) {
414 if (ctx
->interlaced
) {
415 dnxhd_get_pixels_4x8(ctx
->blocks
[4], ptr_y
+ ctx
->dct_y_offset
, ctx
->m
.linesize
);
416 dnxhd_get_pixels_4x8(ctx
->blocks
[5], ptr_y
+ ctx
->dct_y_offset
+ 8, ctx
->m
.linesize
);
417 dnxhd_get_pixels_4x8(ctx
->blocks
[6], ptr_u
+ ctx
->dct_uv_offset
, ctx
->m
.uvlinesize
);
418 dnxhd_get_pixels_4x8(ctx
->blocks
[7], ptr_v
+ ctx
->dct_uv_offset
, ctx
->m
.uvlinesize
);
420 memset(ctx
->blocks
[4], 0, 4*64*sizeof(DCTELEM
));
422 dsp
->get_pixels(ctx
->blocks
[4], ptr_y
+ ctx
->dct_y_offset
, ctx
->m
.linesize
);
423 dsp
->get_pixels(ctx
->blocks
[5], ptr_y
+ ctx
->dct_y_offset
+ 8, ctx
->m
.linesize
);
424 dsp
->get_pixels(ctx
->blocks
[6], ptr_u
+ ctx
->dct_uv_offset
, ctx
->m
.uvlinesize
);
425 dsp
->get_pixels(ctx
->blocks
[7], ptr_v
+ ctx
->dct_uv_offset
, ctx
->m
.uvlinesize
);
429 static av_always_inline
int dnxhd_switch_matrix(DNXHDEncContext
*ctx
, int i
)
432 ctx
->m
.q_intra_matrix16
= ctx
->qmatrix_c16
;
433 ctx
->m
.q_intra_matrix
= ctx
->qmatrix_c
;
436 ctx
->m
.q_intra_matrix16
= ctx
->qmatrix_l16
;
437 ctx
->m
.q_intra_matrix
= ctx
->qmatrix_l
;
442 static int dnxhd_calc_bits_thread(AVCodecContext
*avctx
, void *arg
)
444 DNXHDEncContext
*ctx
= arg
;
446 int qscale
= ctx
->thread
[0]->qscale
;
448 for (mb_y
= ctx
->m
.start_mb_y
; mb_y
< ctx
->m
.end_mb_y
; mb_y
++) {
451 ctx
->m
.last_dc
[2] = 1024;
453 for (mb_x
= 0; mb_x
< ctx
->m
.mb_width
; mb_x
++) {
454 unsigned mb
= mb_y
* ctx
->m
.mb_width
+ mb_x
;
460 dnxhd_get_blocks(ctx
, mb_x
, mb_y
);
462 for (i
= 0; i
< 8; i
++) {
463 DECLARE_ALIGNED_16(DCTELEM
, block
[64]);
464 DCTELEM
*src_block
= ctx
->blocks
[i
];
465 int overflow
, nbits
, diff
, last_index
;
466 int n
= dnxhd_switch_matrix(ctx
, i
);
468 memcpy(block
, src_block
, sizeof(block
));
469 last_index
= ctx
->m
.dct_quantize((MpegEncContext
*)ctx
, block
, i
, qscale
, &overflow
);
470 ac_bits
+= dnxhd_calc_ac_bits(ctx
, block
, last_index
);
472 diff
= block
[0] - ctx
->m
.last_dc
[n
];
473 if (diff
< 0) nbits
= av_log2_16bit(-2*diff
);
474 else nbits
= av_log2_16bit( 2*diff
);
475 dc_bits
+= ctx
->cid_table
->dc_bits
[nbits
] + nbits
;
477 ctx
->m
.last_dc
[n
] = block
[0];
479 if (avctx
->mb_decision
== FF_MB_DECISION_RD
|| !RC_VARIANCE
) {
480 dnxhd_unquantize_c(ctx
, block
, i
, qscale
, last_index
);
481 ctx
->m
.dsp
.idct(block
);
482 ssd
+= dnxhd_ssd_block(block
, src_block
);
485 ctx
->mb_rc
[qscale
][mb
].ssd
= ssd
;
486 ctx
->mb_rc
[qscale
][mb
].bits
= ac_bits
+dc_bits
+12+8*ctx
->table_vlc_bits
[0];
492 static int dnxhd_encode_thread(AVCodecContext
*avctx
, void *arg
)
494 DNXHDEncContext
*ctx
= arg
;
497 for (mb_y
= ctx
->m
.start_mb_y
; mb_y
< ctx
->m
.end_mb_y
; mb_y
++) {
500 ctx
->m
.last_dc
[2] = 1024;
501 for (mb_x
= 0; mb_x
< ctx
->m
.mb_width
; mb_x
++) {
502 unsigned mb
= mb_y
* ctx
->m
.mb_width
+ mb_x
;
503 int qscale
= ctx
->mb_qscale
[mb
];
506 put_bits(&ctx
->m
.pb
, 12, qscale
<<1);
508 dnxhd_get_blocks(ctx
, mb_x
, mb_y
);
510 for (i
= 0; i
< 8; i
++) {
511 DCTELEM
*block
= ctx
->blocks
[i
];
512 int last_index
, overflow
;
513 int n
= dnxhd_switch_matrix(ctx
, i
);
514 last_index
= ctx
->m
.dct_quantize((MpegEncContext
*)ctx
, block
, i
, qscale
, &overflow
);
515 dnxhd_encode_block(ctx
, block
, last_index
, n
);
518 if (put_bits_count(&ctx
->m
.pb
)&31)
519 put_bits(&ctx
->m
.pb
, 32-(put_bits_count(&ctx
->m
.pb
)&31), 0);
521 flush_put_bits(&ctx
->m
.pb
);
525 static void dnxhd_setup_threads_slices(DNXHDEncContext
*ctx
, uint8_t *buf
)
529 for (i
= 0; i
< ctx
->m
.avctx
->thread_count
; i
++) {
531 for (mb_y
= ctx
->thread
[i
]->m
.start_mb_y
; mb_y
< ctx
->thread
[i
]->m
.end_mb_y
; mb_y
++) {
532 ctx
->slice_size
[mb_y
] = 0;
533 for (mb_x
= 0; mb_x
< ctx
->m
.mb_width
; mb_x
++) {
534 unsigned mb
= mb_y
* ctx
->m
.mb_width
+ mb_x
;
535 ctx
->slice_size
[mb_y
] += ctx
->mb_bits
[mb
];
537 ctx
->slice_size
[mb_y
] = (ctx
->slice_size
[mb_y
]+31)&~31;
538 ctx
->slice_size
[mb_y
] >>= 3;
539 thread_size
+= ctx
->slice_size
[mb_y
];
541 init_put_bits(&ctx
->thread
[i
]->m
.pb
, buf
+ 640 + offset
, thread_size
);
542 offset
+= thread_size
;
546 static int dnxhd_mb_var_thread(AVCodecContext
*avctx
, void *arg
)
548 DNXHDEncContext
*ctx
= arg
;
550 for (mb_y
= ctx
->m
.start_mb_y
; mb_y
< ctx
->m
.end_mb_y
; mb_y
++) {
551 for (mb_x
= 0; mb_x
< ctx
->m
.mb_width
; mb_x
++) {
552 unsigned mb
= mb_y
* ctx
->m
.mb_width
+ mb_x
;
553 uint8_t *pix
= ctx
->thread
[0]->src
[0] + ((mb_y
<<4) * ctx
->m
.linesize
) + (mb_x
<<4);
554 int sum
= ctx
->m
.dsp
.pix_sum(pix
, ctx
->m
.linesize
);
555 int varc
= (ctx
->m
.dsp
.pix_norm1(pix
, ctx
->m
.linesize
) - (((unsigned)(sum
*sum
))>>8)+128)>>8;
556 ctx
->mb_cmp
[mb
].value
= varc
;
557 ctx
->mb_cmp
[mb
].mb
= mb
;
563 static int dnxhd_encode_rdo(AVCodecContext
*avctx
, DNXHDEncContext
*ctx
)
565 int lambda
, up_step
, down_step
;
566 int last_lower
= INT_MAX
, last_higher
= 0;
569 for (q
= 1; q
< avctx
->qmax
; q
++) {
571 avctx
->execute(avctx
, dnxhd_calc_bits_thread
, (void**)&ctx
->thread
[0], NULL
, avctx
->thread_count
);
573 up_step
= down_step
= 2<<LAMBDA_FRAC_BITS
;
574 lambda
= ctx
->lambda
;
579 if (lambda
== last_higher
) {
581 end
= 1; // need to set final qscales/bits
583 for (y
= 0; y
< ctx
->m
.mb_height
; y
++) {
584 for (x
= 0; x
< ctx
->m
.mb_width
; x
++) {
585 unsigned min
= UINT_MAX
;
587 int mb
= y
*ctx
->m
.mb_width
+x
;
588 for (q
= 1; q
< avctx
->qmax
; q
++) {
589 unsigned score
= ctx
->mb_rc
[q
][mb
].bits
*lambda
+(ctx
->mb_rc
[q
][mb
].ssd
<<LAMBDA_FRAC_BITS
);
595 bits
+= ctx
->mb_rc
[qscale
][mb
].bits
;
596 ctx
->mb_qscale
[mb
] = qscale
;
597 ctx
->mb_bits
[mb
] = ctx
->mb_rc
[qscale
][mb
].bits
;
599 bits
= (bits
+31)&~31; // padding
600 if (bits
> ctx
->frame_bits
)
603 //dprintf(ctx->m.avctx, "lambda %d, up %u, down %u, bits %d, frame %d\n",
604 // lambda, last_higher, last_lower, bits, ctx->frame_bits);
606 if (bits
> ctx
->frame_bits
)
610 if (bits
< ctx
->frame_bits
) {
611 last_lower
= FFMIN(lambda
, last_lower
);
612 if (last_higher
!= 0)
613 lambda
= (lambda
+last_higher
)>>1;
616 down_step
*= 5; // XXX tune ?
617 up_step
= 1<<LAMBDA_FRAC_BITS
;
618 lambda
= FFMAX(1, lambda
);
619 if (lambda
== last_lower
)
622 last_higher
= FFMAX(lambda
, last_higher
);
623 if (last_lower
!= INT_MAX
)
624 lambda
= (lambda
+last_lower
)>>1;
628 down_step
= 1<<LAMBDA_FRAC_BITS
;
631 //dprintf(ctx->m.avctx, "out lambda %d\n", lambda);
632 ctx
->lambda
= lambda
;
636 static int dnxhd_find_qscale(DNXHDEncContext
*ctx
)
642 int last_lower
= INT_MAX
;
646 qscale
= ctx
->qscale
;
649 ctx
->qscale
= qscale
;
650 // XXX avoid recalculating bits
651 ctx
->m
.avctx
->execute(ctx
->m
.avctx
, dnxhd_calc_bits_thread
, (void**)&ctx
->thread
[0], NULL
, ctx
->m
.avctx
->thread_count
);
652 for (y
= 0; y
< ctx
->m
.mb_height
; y
++) {
653 for (x
= 0; x
< ctx
->m
.mb_width
; x
++)
654 bits
+= ctx
->mb_rc
[qscale
][y
*ctx
->m
.mb_width
+x
].bits
;
655 bits
= (bits
+31)&~31; // padding
656 if (bits
> ctx
->frame_bits
)
659 //dprintf(ctx->m.avctx, "%d, qscale %d, bits %d, frame %d, higher %d, lower %d\n",
660 // ctx->m.avctx->frame_number, qscale, bits, ctx->frame_bits, last_higher, last_lower);
661 if (bits
< ctx
->frame_bits
) {
664 if (last_higher
== qscale
- 1) {
665 qscale
= last_higher
;
668 last_lower
= FFMIN(qscale
, last_lower
);
669 if (last_higher
!= 0)
670 qscale
= (qscale
+last_higher
)>>1;
672 qscale
-= down_step
++;
677 if (last_lower
== qscale
+ 1)
679 last_higher
= FFMAX(qscale
, last_higher
);
680 if (last_lower
!= INT_MAX
)
681 qscale
= (qscale
+last_lower
)>>1;
685 if (qscale
>= ctx
->m
.avctx
->qmax
)
689 //dprintf(ctx->m.avctx, "out qscale %d\n", qscale);
690 ctx
->qscale
= qscale
;
694 static int dnxhd_rc_cmp(const void *a
, const void *b
)
696 return ((const RCCMPEntry
*)b
)->value
- ((const RCCMPEntry
*)a
)->value
;
699 static int dnxhd_encode_fast(AVCodecContext
*avctx
, DNXHDEncContext
*ctx
)
703 if ((ret
= dnxhd_find_qscale(ctx
)) < 0)
705 for (y
= 0; y
< ctx
->m
.mb_height
; y
++) {
706 for (x
= 0; x
< ctx
->m
.mb_width
; x
++) {
707 int mb
= y
*ctx
->m
.mb_width
+x
;
709 ctx
->mb_qscale
[mb
] = ctx
->qscale
;
710 ctx
->mb_bits
[mb
] = ctx
->mb_rc
[ctx
->qscale
][mb
].bits
;
711 max_bits
+= ctx
->mb_rc
[ctx
->qscale
][mb
].bits
;
713 delta_bits
= ctx
->mb_rc
[ctx
->qscale
][mb
].bits
-ctx
->mb_rc
[ctx
->qscale
+1][mb
].bits
;
714 ctx
->mb_cmp
[mb
].mb
= mb
;
715 ctx
->mb_cmp
[mb
].value
= delta_bits
?
716 ((ctx
->mb_rc
[ctx
->qscale
][mb
].ssd
-ctx
->mb_rc
[ctx
->qscale
+1][mb
].ssd
)*100)/delta_bits
717 : INT_MIN
; //avoid increasing qscale
720 max_bits
+= 31; //worst padding
724 avctx
->execute(avctx
, dnxhd_mb_var_thread
, (void**)&ctx
->thread
[0], NULL
, avctx
->thread_count
);
725 qsort(ctx
->mb_cmp
, ctx
->m
.mb_num
, sizeof(RCEntry
), dnxhd_rc_cmp
);
726 for (x
= 0; x
< ctx
->m
.mb_num
&& max_bits
> ctx
->frame_bits
; x
++) {
727 int mb
= ctx
->mb_cmp
[x
].mb
;
728 max_bits
-= ctx
->mb_rc
[ctx
->qscale
][mb
].bits
- ctx
->mb_rc
[ctx
->qscale
+1][mb
].bits
;
729 ctx
->mb_qscale
[mb
] = ctx
->qscale
+1;
730 ctx
->mb_bits
[mb
] = ctx
->mb_rc
[ctx
->qscale
+1][mb
].bits
;
736 static void dnxhd_load_picture(DNXHDEncContext
*ctx
, const AVFrame
*frame
)
740 for (i
= 0; i
< 3; i
++) {
741 ctx
->frame
.data
[i
] = frame
->data
[i
];
742 ctx
->frame
.linesize
[i
] = frame
->linesize
[i
];
745 for (i
= 0; i
< ctx
->m
.avctx
->thread_count
; i
++) {
746 ctx
->thread
[i
]->m
.linesize
= ctx
->frame
.linesize
[0]<<ctx
->interlaced
;
747 ctx
->thread
[i
]->m
.uvlinesize
= ctx
->frame
.linesize
[1]<<ctx
->interlaced
;
748 ctx
->thread
[i
]->dct_y_offset
= ctx
->m
.linesize
*8;
749 ctx
->thread
[i
]->dct_uv_offset
= ctx
->m
.uvlinesize
*8;
752 ctx
->frame
.interlaced_frame
= frame
->interlaced_frame
;
753 ctx
->cur_field
= frame
->interlaced_frame
&& !frame
->top_field_first
;
756 static int dnxhd_encode_picture(AVCodecContext
*avctx
, unsigned char *buf
, int buf_size
, const void *data
)
758 DNXHDEncContext
*ctx
= avctx
->priv_data
;
762 if (buf_size
< ctx
->cid_table
->frame_size
) {
763 av_log(avctx
, AV_LOG_ERROR
, "output buffer is too small to compress picture\n");
767 dnxhd_load_picture(ctx
, data
);
770 for (i
= 0; i
< 3; i
++) {
771 ctx
->src
[i
] = ctx
->frame
.data
[i
];
772 if (ctx
->interlaced
&& ctx
->cur_field
)
773 ctx
->src
[i
] += ctx
->frame
.linesize
[i
];
776 dnxhd_write_header(avctx
, buf
);
778 if (avctx
->mb_decision
== FF_MB_DECISION_RD
)
779 ret
= dnxhd_encode_rdo(avctx
, ctx
);
781 ret
= dnxhd_encode_fast(avctx
, ctx
);
783 av_log(avctx
, AV_LOG_ERROR
, "picture could not fit ratecontrol constraints\n");
787 dnxhd_setup_threads_slices(ctx
, buf
);
790 for (i
= 0; i
< ctx
->m
.mb_height
; i
++) {
791 AV_WB32(ctx
->msip
+ i
* 4, offset
);
792 offset
+= ctx
->slice_size
[i
];
793 assert(!(ctx
->slice_size
[i
] & 3));
796 avctx
->execute(avctx
, dnxhd_encode_thread
, (void**)&ctx
->thread
[0], NULL
, avctx
->thread_count
);
798 AV_WB32(buf
+ ctx
->cid_table
->coding_unit_size
- 4, 0x600DC0DE); // EOF
800 if (ctx
->interlaced
&& first_field
) {
803 buf
+= ctx
->cid_table
->coding_unit_size
;
804 buf_size
-= ctx
->cid_table
->coding_unit_size
;
805 goto encode_coding_unit
;
808 ctx
->frame
.quality
= ctx
->qscale
*FF_QP2LAMBDA
;
810 return ctx
->cid_table
->frame_size
;
813 static int dnxhd_encode_end(AVCodecContext
*avctx
)
815 DNXHDEncContext
*ctx
= avctx
->priv_data
;
818 av_freep(&ctx
->table_vlc_codes
);
819 av_freep(&ctx
->table_vlc_bits
);
820 av_freep(&ctx
->table_run_codes
);
821 av_freep(&ctx
->table_run_bits
);
823 av_freep(&ctx
->mb_bits
);
824 av_freep(&ctx
->mb_qscale
);
825 av_freep(&ctx
->mb_rc
);
826 av_freep(&ctx
->mb_cmp
);
827 av_freep(&ctx
->slice_size
);
829 av_freep(&ctx
->qmatrix_c
);
830 av_freep(&ctx
->qmatrix_l
);
831 av_freep(&ctx
->qmatrix_c16
);
832 av_freep(&ctx
->qmatrix_l16
);
834 for (i
= 1; i
< avctx
->thread_count
; i
++)
835 av_freep(&ctx
->thread
[i
]);
840 AVCodec dnxhd_encoder
= {
844 sizeof(DNXHDEncContext
),
846 dnxhd_encode_picture
,
848 .pix_fmts
= (enum PixelFormat
[]){PIX_FMT_YUV422P
, PIX_FMT_NONE
},
849 .long_name
= "VC3/DNxHD",