2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * H.264 / AVC / MPEG4 part10 codec.
24 * @author Michael Niedermayer <michaelni@gmx.at>
30 #include "mpegvideo.h"
39 #define interlaced_dct interlaced_dct_is_a_bad_name
40 #define mb_intra mb_intra_isnt_initalized_see_mb_type
42 #define LUMA_DC_BLOCK_INDEX 25
43 #define CHROMA_DC_BLOCK_INDEX 26
45 #define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46 #define COEFF_TOKEN_VLC_BITS 8
47 #define TOTAL_ZEROS_VLC_BITS 9
48 #define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49 #define RUN_VLC_BITS 3
50 #define RUN7_VLC_BITS 6
52 #define MAX_SPS_COUNT 32
53 #define MAX_PPS_COUNT 256
55 #define MAX_MMCO_COUNT 66
58 * Sequence parameter set
64 int transform_bypass
; ///< qpprime_y_zero_transform_bypass_flag
65 int log2_max_frame_num
; ///< log2_max_frame_num_minus4 + 4
66 int poc_type
; ///< pic_order_cnt_type
67 int log2_max_poc_lsb
; ///< log2_max_pic_order_cnt_lsb_minus4
68 int delta_pic_order_always_zero_flag
;
69 int offset_for_non_ref_pic
;
70 int offset_for_top_to_bottom_field
;
71 int poc_cycle_length
; ///< num_ref_frames_in_pic_order_cnt_cycle
72 int ref_frame_count
; ///< num_ref_frames
73 int gaps_in_frame_num_allowed_flag
;
74 int mb_width
; ///< frame_width_in_mbs_minus1 + 1
75 int mb_height
; ///< frame_height_in_mbs_minus1 + 1
76 int frame_mbs_only_flag
;
77 int mb_aff
; ///<mb_adaptive_frame_field_flag
78 int direct_8x8_inference_flag
;
79 int crop
; ///< frame_cropping_flag
80 int crop_left
; ///< frame_cropping_rect_left_offset
81 int crop_right
; ///< frame_cropping_rect_right_offset
82 int crop_top
; ///< frame_cropping_rect_top_offset
83 int crop_bottom
; ///< frame_cropping_rect_bottom_offset
84 int vui_parameters_present_flag
;
86 int timing_info_present_flag
;
87 uint32_t num_units_in_tick
;
89 int fixed_frame_rate_flag
;
90 short offset_for_ref_frame
[256]; //FIXME dyn aloc?
91 int bitstream_restriction_flag
;
92 int num_reorder_frames
;
96 * Picture parameter set
100 int cabac
; ///< entropy_coding_mode_flag
101 int pic_order_present
; ///< pic_order_present_flag
102 int slice_group_count
; ///< num_slice_groups_minus1 + 1
103 int mb_slice_group_map_type
;
104 int ref_count
[2]; ///< num_ref_idx_l0/1_active_minus1 + 1
105 int weighted_pred
; ///< weighted_pred_flag
106 int weighted_bipred_idc
;
107 int init_qp
; ///< pic_init_qp_minus26 + 26
108 int init_qs
; ///< pic_init_qs_minus26 + 26
109 int chroma_qp_index_offset
;
110 int deblocking_filter_parameters_present
; ///< deblocking_filter_parameters_present_flag
111 int constrained_intra_pred
; ///< constrained_intra_pred_flag
112 int redundant_pic_cnt_present
; ///< redundant_pic_cnt_present_flag
113 int transform_8x8_mode
; ///< transform_8x8_mode_flag
117 * Memory management control operation opcode.
119 typedef enum MMCOOpcode
{
130 * Memory management control operation.
141 typedef struct H264Context
{
149 #define NAL_IDR_SLICE 5
153 #define NAL_PICTURE_DELIMITER 9
154 #define NAL_FILTER_DATA 10
155 uint8_t *rbsp_buffer
;
156 int rbsp_buffer_size
;
159 * Used to parse AVC variant of h264
161 int is_avc
; ///< this flag is != 0 if codec is avc1
162 int got_avcC
; ///< flag used to parse avcC data only once
163 int nal_length_size
; ///< Number of bytes used for nal length (1, 2 or 4)
167 int prev_mb_skipped
; //FIXME remove (IMHO not used)
170 int chroma_pred_mode
;
171 int intra16x16_pred_mode
;
176 int8_t intra4x4_pred_mode_cache
[5*8];
177 int8_t (*intra4x4_pred_mode
)[8];
178 void (*pred4x4
[9+3])(uint8_t *src
, uint8_t *topright
, int stride
);//FIXME move to dsp?
179 void (*pred8x8l
[9+3])(uint8_t *src
, int topleft
, int topright
, int stride
);
180 void (*pred8x8
[4+3])(uint8_t *src
, int stride
);
181 void (*pred16x16
[4+3])(uint8_t *src
, int stride
);
182 unsigned int topleft_samples_available
;
183 unsigned int top_samples_available
;
184 unsigned int topright_samples_available
;
185 unsigned int left_samples_available
;
186 uint8_t (*top_borders
[2])[16+2*8];
187 uint8_t left_border
[2*(17+2*9)];
190 * non zero coeff count cache.
191 * is 64 if not available.
193 uint8_t non_zero_count_cache
[6*8] __align8
;
194 uint8_t (*non_zero_count
)[16];
197 * Motion vector cache.
199 int16_t mv_cache
[2][5*8][2] __align8
;
200 int8_t ref_cache
[2][5*8] __align8
;
201 #define LIST_NOT_USED -1 //FIXME rename?
202 #define PART_NOT_AVAILABLE -2
205 * is 1 if the specific list MV&references are set to 0,0,-2.
207 int mv_cache_clean
[2];
210 * number of neighbors (top and/or left) that used 8x8 dct
212 int neighbor_transform_size
;
215 * block_offset[ 0..23] for frame macroblocks
216 * block_offset[24..47] for field macroblocks
218 int block_offset
[2*(16+8)];
220 uint32_t *mb2b_xy
; //FIXME are these 4 a good idea?
222 int b_stride
; //FIXME use s->b4_stride
228 int unknown_svq3_flag
;
229 int next_slice_index
;
231 SPS sps_buffer
[MAX_SPS_COUNT
];
232 SPS sps
; ///< current sps
234 PPS pps_buffer
[MAX_PPS_COUNT
];
238 PPS pps
; //FIXME move to Picture perhaps? (->no) do we need that?
240 uint16_t (*dequant4_coeff
)[16]; // FIXME quant matrices should be per SPS or PPS
241 uint16_t (*dequant8_coeff
)[64];
244 uint8_t *slice_table_base
;
245 uint8_t *slice_table
; ///< slice_table_base + mb_stride + 1
247 int slice_type_fixed
;
249 //interlacing specific flags
251 int mb_field_decoding_flag
;
258 int delta_poc_bottom
;
261 int prev_poc_msb
; ///< poc_msb of the last reference pic for POC type 0
262 int prev_poc_lsb
; ///< poc_lsb of the last reference pic for POC type 0
263 int frame_num_offset
; ///< for POC type 2
264 int prev_frame_num_offset
; ///< for POC type 2
265 int prev_frame_num
; ///< frame_num of the last pic for POC type 1/2
268 * frame_num for frames or 2*frame_num for field pics.
273 * max_frame_num or 2*max_frame_num for field pics.
277 //Weighted pred stuff
279 int use_weight_chroma
;
280 int luma_log2_weight_denom
;
281 int chroma_log2_weight_denom
;
282 int luma_weight
[2][16];
283 int luma_offset
[2][16];
284 int chroma_weight
[2][16][2];
285 int chroma_offset
[2][16][2];
286 int implicit_weight
[16][16];
289 int deblocking_filter
; ///< disable_deblocking_filter_idc with 1<->0
290 int slice_alpha_c0_offset
;
291 int slice_beta_offset
;
293 int redundant_pic_count
;
295 int direct_spatial_mv_pred
;
296 int dist_scale_factor
[16];
297 int map_col_to_list0
[2][16];
300 * num_ref_idx_l0/1_active_minus1 + 1
302 int ref_count
[2];// FIXME split for AFF
303 Picture
*short_ref
[32];
304 Picture
*long_ref
[32];
305 Picture default_ref_list
[2][32];
306 Picture ref_list
[2][32]; //FIXME size?
307 Picture field_ref_list
[2][32]; //FIXME size?
308 Picture
*delayed_pic
[16]; //FIXME size?
309 Picture
*delayed_output_pic
;
312 * memory management control operations buffer.
314 MMCO mmco
[MAX_MMCO_COUNT
];
317 int long_ref_count
; ///< number of actual long term references
318 int short_ref_count
; ///< number of actual short term references
321 GetBitContext intra_gb
;
322 GetBitContext inter_gb
;
323 GetBitContext
*intra_gb_ptr
;
324 GetBitContext
*inter_gb_ptr
;
326 DCTELEM mb
[16*24] __align8
;
332 uint8_t cabac_state
[460];
335 /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
339 /* chroma_pred_mode for i4x4 or i16x16, else 0 */
340 uint8_t *chroma_pred_mode_table
;
341 int last_qscale_diff
;
342 int16_t (*mvd_table
[2])[2];
343 int16_t mvd_cache
[2][5*8][2] __align8
;
344 uint8_t *direct_table
;
345 uint8_t direct_cache
[5*8];
347 uint8_t zigzag_scan
[16];
348 uint8_t field_scan
[16];
349 const uint8_t *zigzag_scan_q0
;
350 const uint8_t *field_scan_q0
;
353 static VLC coeff_token_vlc
[4];
354 static VLC chroma_dc_coeff_token_vlc
;
356 static VLC total_zeros_vlc
[15];
357 static VLC chroma_dc_total_zeros_vlc
[3];
359 static VLC run_vlc
[6];
362 static void svq3_luma_dc_dequant_idct_c(DCTELEM
*block
, int qp
);
363 static void svq3_add_idct_c(uint8_t *dst
, DCTELEM
*block
, int stride
, int qp
, int dc
);
364 static void filter_mb( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
);
366 static inline uint32_t pack16to32(int a
, int b
){
367 #ifdef WORDS_BIGENDIAN
368 return (b
&0xFFFF) + (a
<<16);
370 return (a
&0xFFFF) + (b
<<16);
376 * @param h height of the rectangle, should be a constant
377 * @param w width of the rectangle, should be a constant
378 * @param size the size of val (1 or 4), should be a constant
380 static inline void fill_rectangle(void *vp
, int w
, int h
, int stride
, uint32_t val
, int size
){ //FIXME ensure this IS inlined
381 uint8_t *p
= (uint8_t*)vp
;
382 assert(size
==1 || size
==4);
387 assert((((int)vp
)&(FFMIN(w
, STRIDE_ALIGN
)-1)) == 0);
388 assert((stride
&(w
-1))==0);
389 //FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it
392 *(uint16_t*)(p
+ stride
)= size
==4 ? val
: val
*0x0101;
393 }else if(w
==2 && h
==4){
394 *(uint16_t*)(p
+ 0*stride
)=
395 *(uint16_t*)(p
+ 1*stride
)=
396 *(uint16_t*)(p
+ 2*stride
)=
397 *(uint16_t*)(p
+ 3*stride
)= size
==4 ? val
: val
*0x0101;
398 }else if(w
==4 && h
==1){
399 *(uint32_t*)(p
+ 0*stride
)= size
==4 ? val
: val
*0x01010101;
400 }else if(w
==4 && h
==2){
401 *(uint32_t*)(p
+ 0*stride
)=
402 *(uint32_t*)(p
+ 1*stride
)= size
==4 ? val
: val
*0x01010101;
403 }else if(w
==4 && h
==4){
404 *(uint32_t*)(p
+ 0*stride
)=
405 *(uint32_t*)(p
+ 1*stride
)=
406 *(uint32_t*)(p
+ 2*stride
)=
407 *(uint32_t*)(p
+ 3*stride
)= size
==4 ? val
: val
*0x01010101;
408 }else if(w
==8 && h
==1){
410 *(uint32_t*)(p
+ 4)= size
==4 ? val
: val
*0x01010101;
411 }else if(w
==8 && h
==2){
412 *(uint32_t*)(p
+ 0 + 0*stride
)=
413 *(uint32_t*)(p
+ 4 + 0*stride
)=
414 *(uint32_t*)(p
+ 0 + 1*stride
)=
415 *(uint32_t*)(p
+ 4 + 1*stride
)= size
==4 ? val
: val
*0x01010101;
416 }else if(w
==8 && h
==4){
417 *(uint64_t*)(p
+ 0*stride
)=
418 *(uint64_t*)(p
+ 1*stride
)=
419 *(uint64_t*)(p
+ 2*stride
)=
420 *(uint64_t*)(p
+ 3*stride
)= size
==4 ? val
*0x0100000001ULL
: val
*0x0101010101010101ULL
;
421 }else if(w
==16 && h
==2){
422 *(uint64_t*)(p
+ 0+0*stride
)=
423 *(uint64_t*)(p
+ 8+0*stride
)=
424 *(uint64_t*)(p
+ 0+1*stride
)=
425 *(uint64_t*)(p
+ 8+1*stride
)= size
==4 ? val
*0x0100000001ULL
: val
*0x0101010101010101ULL
;
426 }else if(w
==16 && h
==4){
427 *(uint64_t*)(p
+ 0+0*stride
)=
428 *(uint64_t*)(p
+ 8+0*stride
)=
429 *(uint64_t*)(p
+ 0+1*stride
)=
430 *(uint64_t*)(p
+ 8+1*stride
)=
431 *(uint64_t*)(p
+ 0+2*stride
)=
432 *(uint64_t*)(p
+ 8+2*stride
)=
433 *(uint64_t*)(p
+ 0+3*stride
)=
434 *(uint64_t*)(p
+ 8+3*stride
)= size
==4 ? val
*0x0100000001ULL
: val
*0x0101010101010101ULL
;
439 static inline void fill_caches(H264Context
*h
, int mb_type
, int for_deblock
){
440 MpegEncContext
* const s
= &h
->s
;
441 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
442 int topleft_xy
, top_xy
, topright_xy
, left_xy
[2];
443 int topleft_type
, top_type
, topright_type
, left_type
[2];
447 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
448 // the actual condition is whether we're on the edge of a slice,
449 // and even then the intra and nnz parts are unnecessary.
450 if(for_deblock
&& h
->slice_num
== 1)
453 //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
455 top_xy
= mb_xy
- s
->mb_stride
;
456 topleft_xy
= top_xy
- 1;
457 topright_xy
= top_xy
+ 1;
458 left_xy
[1] = left_xy
[0] = mb_xy
-1;
468 const int pair_xy
= s
->mb_x
+ (s
->mb_y
& ~1)*s
->mb_stride
;
469 const int top_pair_xy
= pair_xy
- s
->mb_stride
;
470 const int topleft_pair_xy
= top_pair_xy
- 1;
471 const int topright_pair_xy
= top_pair_xy
+ 1;
472 const int topleft_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[topleft_pair_xy
]);
473 const int top_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[top_pair_xy
]);
474 const int topright_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[topright_pair_xy
]);
475 const int left_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[pair_xy
-1]);
476 const int curr_mb_frame_flag
= !IS_INTERLACED(mb_type
);
477 const int bottom
= (s
->mb_y
& 1);
478 tprintf("fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag
, left_mb_frame_flag
, topleft_mb_frame_flag
, top_mb_frame_flag
, topright_mb_frame_flag
);
480 ? !curr_mb_frame_flag
// bottom macroblock
481 : (!curr_mb_frame_flag
&& !top_mb_frame_flag
) // top macroblock
483 top_xy
-= s
->mb_stride
;
486 ? !curr_mb_frame_flag
// bottom macroblock
487 : (!curr_mb_frame_flag
&& !topleft_mb_frame_flag
) // top macroblock
489 topleft_xy
-= s
->mb_stride
;
492 ? !curr_mb_frame_flag
// bottom macroblock
493 : (!curr_mb_frame_flag
&& !topright_mb_frame_flag
) // top macroblock
495 topright_xy
-= s
->mb_stride
;
497 if (left_mb_frame_flag
!= curr_mb_frame_flag
) {
498 left_xy
[1] = left_xy
[0] = pair_xy
- 1;
499 if (curr_mb_frame_flag
) {
520 left_xy
[1] += s
->mb_stride
;
533 h
->top_mb_xy
= top_xy
;
534 h
->left_mb_xy
[0] = left_xy
[0];
535 h
->left_mb_xy
[1] = left_xy
[1];
537 topleft_type
= h
->slice_table
[topleft_xy
] < 255 ? s
->current_picture
.mb_type
[topleft_xy
] : 0;
538 top_type
= h
->slice_table
[top_xy
] < 255 ? s
->current_picture
.mb_type
[top_xy
] : 0;
539 topright_type
= h
->slice_table
[topright_xy
] < 255 ? s
->current_picture
.mb_type
[topright_xy
]: 0;
540 left_type
[0] = h
->slice_table
[left_xy
[0] ] < 255 ? s
->current_picture
.mb_type
[left_xy
[0]] : 0;
541 left_type
[1] = h
->slice_table
[left_xy
[1] ] < 255 ? s
->current_picture
.mb_type
[left_xy
[1]] : 0;
543 topleft_type
= h
->slice_table
[topleft_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[topleft_xy
] : 0;
544 top_type
= h
->slice_table
[top_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[top_xy
] : 0;
545 topright_type
= h
->slice_table
[topright_xy
] == h
->slice_num
? s
->current_picture
.mb_type
[topright_xy
]: 0;
546 left_type
[0] = h
->slice_table
[left_xy
[0] ] == h
->slice_num
? s
->current_picture
.mb_type
[left_xy
[0]] : 0;
547 left_type
[1] = h
->slice_table
[left_xy
[1] ] == h
->slice_num
? s
->current_picture
.mb_type
[left_xy
[1]] : 0;
550 if(IS_INTRA(mb_type
)){
551 h
->topleft_samples_available
=
552 h
->top_samples_available
=
553 h
->left_samples_available
= 0xFFFF;
554 h
->topright_samples_available
= 0xEEEA;
556 if(!IS_INTRA(top_type
) && (top_type
==0 || h
->pps
.constrained_intra_pred
)){
557 h
->topleft_samples_available
= 0xB3FF;
558 h
->top_samples_available
= 0x33FF;
559 h
->topright_samples_available
= 0x26EA;
562 if(!IS_INTRA(left_type
[i
]) && (left_type
[i
]==0 || h
->pps
.constrained_intra_pred
)){
563 h
->topleft_samples_available
&= 0xDF5F;
564 h
->left_samples_available
&= 0x5F5F;
568 if(!IS_INTRA(topleft_type
) && (topleft_type
==0 || h
->pps
.constrained_intra_pred
))
569 h
->topleft_samples_available
&= 0x7FFF;
571 if(!IS_INTRA(topright_type
) && (topright_type
==0 || h
->pps
.constrained_intra_pred
))
572 h
->topright_samples_available
&= 0xFBFF;
574 if(IS_INTRA4x4(mb_type
)){
575 if(IS_INTRA4x4(top_type
)){
576 h
->intra4x4_pred_mode_cache
[4+8*0]= h
->intra4x4_pred_mode
[top_xy
][4];
577 h
->intra4x4_pred_mode_cache
[5+8*0]= h
->intra4x4_pred_mode
[top_xy
][5];
578 h
->intra4x4_pred_mode_cache
[6+8*0]= h
->intra4x4_pred_mode
[top_xy
][6];
579 h
->intra4x4_pred_mode_cache
[7+8*0]= h
->intra4x4_pred_mode
[top_xy
][3];
582 if(!top_type
|| (IS_INTER(top_type
) && h
->pps
.constrained_intra_pred
))
587 h
->intra4x4_pred_mode_cache
[4+8*0]=
588 h
->intra4x4_pred_mode_cache
[5+8*0]=
589 h
->intra4x4_pred_mode_cache
[6+8*0]=
590 h
->intra4x4_pred_mode_cache
[7+8*0]= pred
;
593 if(IS_INTRA4x4(left_type
[i
])){
594 h
->intra4x4_pred_mode_cache
[3+8*1 + 2*8*i
]= h
->intra4x4_pred_mode
[left_xy
[i
]][left_block
[0+2*i
]];
595 h
->intra4x4_pred_mode_cache
[3+8*2 + 2*8*i
]= h
->intra4x4_pred_mode
[left_xy
[i
]][left_block
[1+2*i
]];
598 if(!left_type
[i
] || (IS_INTER(left_type
[i
]) && h
->pps
.constrained_intra_pred
))
603 h
->intra4x4_pred_mode_cache
[3+8*1 + 2*8*i
]=
604 h
->intra4x4_pred_mode_cache
[3+8*2 + 2*8*i
]= pred
;
619 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
621 h
->non_zero_count_cache
[4+8*0]= h
->non_zero_count
[top_xy
][4];
622 h
->non_zero_count_cache
[5+8*0]= h
->non_zero_count
[top_xy
][5];
623 h
->non_zero_count_cache
[6+8*0]= h
->non_zero_count
[top_xy
][6];
624 h
->non_zero_count_cache
[7+8*0]= h
->non_zero_count
[top_xy
][3];
626 h
->non_zero_count_cache
[1+8*0]= h
->non_zero_count
[top_xy
][9];
627 h
->non_zero_count_cache
[2+8*0]= h
->non_zero_count
[top_xy
][8];
629 h
->non_zero_count_cache
[1+8*3]= h
->non_zero_count
[top_xy
][12];
630 h
->non_zero_count_cache
[2+8*3]= h
->non_zero_count
[top_xy
][11];
633 h
->non_zero_count_cache
[4+8*0]=
634 h
->non_zero_count_cache
[5+8*0]=
635 h
->non_zero_count_cache
[6+8*0]=
636 h
->non_zero_count_cache
[7+8*0]=
638 h
->non_zero_count_cache
[1+8*0]=
639 h
->non_zero_count_cache
[2+8*0]=
641 h
->non_zero_count_cache
[1+8*3]=
642 h
->non_zero_count_cache
[2+8*3]= h
->pps
.cabac
&& !IS_INTRA(mb_type
) ? 0 : 64;
646 for (i
=0; i
<2; i
++) {
648 h
->non_zero_count_cache
[3+8*1 + 2*8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[0+2*i
]];
649 h
->non_zero_count_cache
[3+8*2 + 2*8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[1+2*i
]];
650 h
->non_zero_count_cache
[0+8*1 + 8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[4+2*i
]];
651 h
->non_zero_count_cache
[0+8*4 + 8*i
]= h
->non_zero_count
[left_xy
[i
]][left_block
[5+2*i
]];
653 h
->non_zero_count_cache
[3+8*1 + 2*8*i
]=
654 h
->non_zero_count_cache
[3+8*2 + 2*8*i
]=
655 h
->non_zero_count_cache
[0+8*1 + 8*i
]=
656 h
->non_zero_count_cache
[0+8*4 + 8*i
]= h
->pps
.cabac
&& !IS_INTRA(mb_type
) ? 0 : 64;
663 h
->top_cbp
= h
->cbp_table
[top_xy
];
664 } else if(IS_INTRA(mb_type
)) {
671 h
->left_cbp
= h
->cbp_table
[left_xy
[0]] & 0x1f0;
672 } else if(IS_INTRA(mb_type
)) {
678 h
->left_cbp
|= ((h
->cbp_table
[left_xy
[0]]>>((left_block
[0]&(~1))+1))&0x1) << 1;
681 h
->left_cbp
|= ((h
->cbp_table
[left_xy
[1]]>>((left_block
[2]&(~1))+1))&0x1) << 3;
686 //FIXME direct mb can skip much of this
687 if(IS_INTER(mb_type
) || IS_DIRECT(mb_type
)){
689 for(list
=0; list
<1+(h
->slice_type
==B_TYPE
); list
++){
690 if(!USES_LIST(mb_type
, list
) && !IS_DIRECT(mb_type
) && !h
->deblocking_filter
){
691 /*if(!h->mv_cache_clean[list]){
692 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
693 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
694 h->mv_cache_clean[list]= 1;
698 h
->mv_cache_clean
[list
]= 0;
700 if(IS_INTER(top_type
)){
701 const int b_xy
= h
->mb2b_xy
[top_xy
] + 3*h
->b_stride
;
702 const int b8_xy
= h
->mb2b8_xy
[top_xy
] + h
->b8_stride
;
703 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 0 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 0];
704 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 1 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 1];
705 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 2 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 2];
706 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 3 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 3];
707 h
->ref_cache
[list
][scan8
[0] + 0 - 1*8]=
708 h
->ref_cache
[list
][scan8
[0] + 1 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ 0];
709 h
->ref_cache
[list
][scan8
[0] + 2 - 1*8]=
710 h
->ref_cache
[list
][scan8
[0] + 3 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ 1];
712 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 0 - 1*8]=
713 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 1 - 1*8]=
714 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 2 - 1*8]=
715 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 3 - 1*8]= 0;
716 *(uint32_t*)&h
->ref_cache
[list
][scan8
[0] + 0 - 1*8]= ((top_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
)&0xFF)*0x01010101;
719 //FIXME unify cleanup or sth
720 if(IS_INTER(left_type
[0])){
721 const int b_xy
= h
->mb2b_xy
[left_xy
[0]] + 3;
722 const int b8_xy
= h
->mb2b8_xy
[left_xy
[0]] + 1;
723 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 + 0*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[0]];
724 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 + 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[1]];
725 h
->ref_cache
[list
][scan8
[0] - 1 + 0*8]=
726 h
->ref_cache
[list
][scan8
[0] - 1 + 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ h
->b8_stride
*(left_block
[0]>>1)];
728 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 + 0*8]=
729 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 + 1*8]= 0;
730 h
->ref_cache
[list
][scan8
[0] - 1 + 0*8]=
731 h
->ref_cache
[list
][scan8
[0] - 1 + 1*8]= left_type
[0] ? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
734 if(IS_INTER(left_type
[1])){
735 const int b_xy
= h
->mb2b_xy
[left_xy
[1]] + 3;
736 const int b8_xy
= h
->mb2b8_xy
[left_xy
[1]] + 1;
737 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 + 2*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[2]];
738 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 + 3*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
+ h
->b_stride
*left_block
[3]];
739 h
->ref_cache
[list
][scan8
[0] - 1 + 2*8]=
740 h
->ref_cache
[list
][scan8
[0] - 1 + 3*8]= s
->current_picture
.ref_index
[list
][b8_xy
+ h
->b8_stride
*(left_block
[2]>>1)];
742 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 + 2*8]=
743 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 + 3*8]= 0;
744 h
->ref_cache
[list
][scan8
[0] - 1 + 2*8]=
745 h
->ref_cache
[list
][scan8
[0] - 1 + 3*8]= left_type
[0] ? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
746 assert((!left_type
[0]) == (!left_type
[1]));
749 if(for_deblock
|| (IS_DIRECT(mb_type
) && !h
->direct_spatial_mv_pred
))
752 if(IS_INTER(topleft_type
)){
753 const int b_xy
= h
->mb2b_xy
[topleft_xy
] + 3 + 3*h
->b_stride
;
754 const int b8_xy
= h
->mb2b8_xy
[topleft_xy
] + 1 + h
->b8_stride
;
755 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
];
756 h
->ref_cache
[list
][scan8
[0] - 1 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
];
758 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] - 1 - 1*8]= 0;
759 h
->ref_cache
[list
][scan8
[0] - 1 - 1*8]= topleft_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
762 if(IS_INTER(topright_type
)){
763 const int b_xy
= h
->mb2b_xy
[topright_xy
] + 3*h
->b_stride
;
764 const int b8_xy
= h
->mb2b8_xy
[topright_xy
] + h
->b8_stride
;
765 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 4 - 1*8]= *(uint32_t*)s
->current_picture
.motion_val
[list
][b_xy
];
766 h
->ref_cache
[list
][scan8
[0] + 4 - 1*8]= s
->current_picture
.ref_index
[list
][b8_xy
];
768 *(uint32_t*)h
->mv_cache
[list
][scan8
[0] + 4 - 1*8]= 0;
769 h
->ref_cache
[list
][scan8
[0] + 4 - 1*8]= topright_type
? LIST_NOT_USED
: PART_NOT_AVAILABLE
;
773 h
->ref_cache
[list
][scan8
[5 ]+1] =
774 h
->ref_cache
[list
][scan8
[7 ]+1] =
775 h
->ref_cache
[list
][scan8
[13]+1] = //FIXME remove past 3 (init somewhere else)
776 h
->ref_cache
[list
][scan8
[4 ]] =
777 h
->ref_cache
[list
][scan8
[12]] = PART_NOT_AVAILABLE
;
778 *(uint32_t*)h
->mv_cache
[list
][scan8
[5 ]+1]=
779 *(uint32_t*)h
->mv_cache
[list
][scan8
[7 ]+1]=
780 *(uint32_t*)h
->mv_cache
[list
][scan8
[13]+1]= //FIXME remove past 3 (init somewhere else)
781 *(uint32_t*)h
->mv_cache
[list
][scan8
[4 ]]=
782 *(uint32_t*)h
->mv_cache
[list
][scan8
[12]]= 0;
785 /* XXX beurk, Load mvd */
786 if(IS_INTER(topleft_type
)){
787 const int b_xy
= h
->mb2b_xy
[topleft_xy
] + 3 + 3*h
->b_stride
;
788 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
];
790 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 - 1*8]= 0;
793 if(IS_INTER(top_type
)){
794 const int b_xy
= h
->mb2b_xy
[top_xy
] + 3*h
->b_stride
;
795 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 0 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 0];
796 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 1 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 1];
797 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 2 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 2];
798 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 3 - 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ 3];
800 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 0 - 1*8]=
801 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 1 - 1*8]=
802 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 2 - 1*8]=
803 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] + 3 - 1*8]= 0;
805 if(IS_INTER(left_type
[0])){
806 const int b_xy
= h
->mb2b_xy
[left_xy
[0]] + 3;
807 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 0*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[0]];
808 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 1*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[1]];
810 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 0*8]=
811 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 1*8]= 0;
813 if(IS_INTER(left_type
[1])){
814 const int b_xy
= h
->mb2b_xy
[left_xy
[1]] + 3;
815 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 2*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[2]];
816 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 3*8]= *(uint32_t*)h
->mvd_table
[list
][b_xy
+ h
->b_stride
*left_block
[3]];
818 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 2*8]=
819 *(uint32_t*)h
->mvd_cache
[list
][scan8
[0] - 1 + 3*8]= 0;
821 *(uint32_t*)h
->mvd_cache
[list
][scan8
[5 ]+1]=
822 *(uint32_t*)h
->mvd_cache
[list
][scan8
[7 ]+1]=
823 *(uint32_t*)h
->mvd_cache
[list
][scan8
[13]+1]= //FIXME remove past 3 (init somewhere else)
824 *(uint32_t*)h
->mvd_cache
[list
][scan8
[4 ]]=
825 *(uint32_t*)h
->mvd_cache
[list
][scan8
[12]]= 0;
827 if(h
->slice_type
== B_TYPE
){
828 fill_rectangle(&h
->direct_cache
[scan8
[0]], 4, 4, 8, 0, 1);
830 if(IS_DIRECT(top_type
)){
831 *(uint32_t*)&h
->direct_cache
[scan8
[0] - 1*8]= 0x01010101;
832 }else if(IS_8X8(top_type
)){
833 int b8_xy
= h
->mb2b8_xy
[top_xy
] + h
->b8_stride
;
834 h
->direct_cache
[scan8
[0] + 0 - 1*8]= h
->direct_table
[b8_xy
];
835 h
->direct_cache
[scan8
[0] + 2 - 1*8]= h
->direct_table
[b8_xy
+ 1];
837 *(uint32_t*)&h
->direct_cache
[scan8
[0] - 1*8]= 0;
841 if(IS_DIRECT(left_type
[0])){
842 h
->direct_cache
[scan8
[0] - 1 + 0*8]=
843 h
->direct_cache
[scan8
[0] - 1 + 2*8]= 1;
844 }else if(IS_8X8(left_type
[0])){
845 int b8_xy
= h
->mb2b8_xy
[left_xy
[0]] + 1;
846 h
->direct_cache
[scan8
[0] - 1 + 0*8]= h
->direct_table
[b8_xy
];
847 h
->direct_cache
[scan8
[0] - 1 + 2*8]= h
->direct_table
[b8_xy
+ h
->b8_stride
];
849 h
->direct_cache
[scan8
[0] - 1 + 0*8]=
850 h
->direct_cache
[scan8
[0] - 1 + 2*8]= 0;
858 h
->neighbor_transform_size
= !!IS_8x8DCT(top_type
) + !!IS_8x8DCT(left_type
[0]);
861 static inline void write_back_intra_pred_mode(H264Context
*h
){
862 MpegEncContext
* const s
= &h
->s
;
863 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
865 h
->intra4x4_pred_mode
[mb_xy
][0]= h
->intra4x4_pred_mode_cache
[7+8*1];
866 h
->intra4x4_pred_mode
[mb_xy
][1]= h
->intra4x4_pred_mode_cache
[7+8*2];
867 h
->intra4x4_pred_mode
[mb_xy
][2]= h
->intra4x4_pred_mode_cache
[7+8*3];
868 h
->intra4x4_pred_mode
[mb_xy
][3]= h
->intra4x4_pred_mode_cache
[7+8*4];
869 h
->intra4x4_pred_mode
[mb_xy
][4]= h
->intra4x4_pred_mode_cache
[4+8*4];
870 h
->intra4x4_pred_mode
[mb_xy
][5]= h
->intra4x4_pred_mode_cache
[5+8*4];
871 h
->intra4x4_pred_mode
[mb_xy
][6]= h
->intra4x4_pred_mode_cache
[6+8*4];
875 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
877 static inline int check_intra4x4_pred_mode(H264Context
*h
){
878 MpegEncContext
* const s
= &h
->s
;
879 static const int8_t top
[12]= {-1, 0,LEFT_DC_PRED
,-1,-1,-1,-1,-1, 0};
880 static const int8_t left
[12]= { 0,-1, TOP_DC_PRED
, 0,-1,-1,-1, 0,-1,DC_128_PRED
};
883 if(!(h
->top_samples_available
&0x8000)){
885 int status
= top
[ h
->intra4x4_pred_mode_cache
[scan8
[0] + i
] ];
887 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status
, s
->mb_x
, s
->mb_y
);
890 h
->intra4x4_pred_mode_cache
[scan8
[0] + i
]= status
;
895 if(!(h
->left_samples_available
&0x8000)){
897 int status
= left
[ h
->intra4x4_pred_mode_cache
[scan8
[0] + 8*i
] ];
899 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status
, s
->mb_x
, s
->mb_y
);
902 h
->intra4x4_pred_mode_cache
[scan8
[0] + 8*i
]= status
;
908 } //FIXME cleanup like next
911 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
913 static inline int check_intra_pred_mode(H264Context
*h
, int mode
){
914 MpegEncContext
* const s
= &h
->s
;
915 static const int8_t top
[7]= {LEFT_DC_PRED8x8
, 1,-1,-1};
916 static const int8_t left
[7]= { TOP_DC_PRED8x8
,-1, 2,-1,DC_128_PRED8x8
};
918 if(mode
< 0 || mode
> 6) {
919 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "out of range intra chroma pred mode at %d %d\n", s
->mb_x
, s
->mb_y
);
923 if(!(h
->top_samples_available
&0x8000)){
926 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "top block unavailable for requested intra mode at %d %d\n", s
->mb_x
, s
->mb_y
);
931 if(!(h
->left_samples_available
&0x8000)){
934 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "left block unavailable for requested intra mode at %d %d\n", s
->mb_x
, s
->mb_y
);
943 * gets the predicted intra4x4 prediction mode.
945 static inline int pred_intra_mode(H264Context
*h
, int n
){
946 const int index8
= scan8
[n
];
947 const int left
= h
->intra4x4_pred_mode_cache
[index8
- 1];
948 const int top
= h
->intra4x4_pred_mode_cache
[index8
- 8];
949 const int min
= FFMIN(left
, top
);
951 tprintf("mode:%d %d min:%d\n", left
,top
, min
);
953 if(min
<0) return DC_PRED
;
957 static inline void write_back_non_zero_count(H264Context
*h
){
958 MpegEncContext
* const s
= &h
->s
;
959 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
961 h
->non_zero_count
[mb_xy
][0]= h
->non_zero_count_cache
[7+8*1];
962 h
->non_zero_count
[mb_xy
][1]= h
->non_zero_count_cache
[7+8*2];
963 h
->non_zero_count
[mb_xy
][2]= h
->non_zero_count_cache
[7+8*3];
964 h
->non_zero_count
[mb_xy
][3]= h
->non_zero_count_cache
[7+8*4];
965 h
->non_zero_count
[mb_xy
][4]= h
->non_zero_count_cache
[4+8*4];
966 h
->non_zero_count
[mb_xy
][5]= h
->non_zero_count_cache
[5+8*4];
967 h
->non_zero_count
[mb_xy
][6]= h
->non_zero_count_cache
[6+8*4];
969 h
->non_zero_count
[mb_xy
][9]= h
->non_zero_count_cache
[1+8*2];
970 h
->non_zero_count
[mb_xy
][8]= h
->non_zero_count_cache
[2+8*2];
971 h
->non_zero_count
[mb_xy
][7]= h
->non_zero_count_cache
[2+8*1];
973 h
->non_zero_count
[mb_xy
][12]=h
->non_zero_count_cache
[1+8*5];
974 h
->non_zero_count
[mb_xy
][11]=h
->non_zero_count_cache
[2+8*5];
975 h
->non_zero_count
[mb_xy
][10]=h
->non_zero_count_cache
[2+8*4];
979 * gets the predicted number of non zero coefficients.
980 * @param n block index
982 static inline int pred_non_zero_count(H264Context
*h
, int n
){
983 const int index8
= scan8
[n
];
984 const int left
= h
->non_zero_count_cache
[index8
- 1];
985 const int top
= h
->non_zero_count_cache
[index8
- 8];
988 if(i
<64) i
= (i
+1)>>1;
990 tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left
, top
, n
, scan8
[n
], i
&31);
995 static inline int fetch_diagonal_mv(H264Context
*h
, const int16_t **C
, int i
, int list
, int part_width
){
996 const int topright_ref
= h
->ref_cache
[list
][ i
- 8 + part_width
];
998 if(topright_ref
!= PART_NOT_AVAILABLE
){
999 *C
= h
->mv_cache
[list
][ i
- 8 + part_width
];
1000 return topright_ref
;
1002 tprintf("topright MV not available\n");
1004 *C
= h
->mv_cache
[list
][ i
- 8 - 1 ];
1005 return h
->ref_cache
[list
][ i
- 8 - 1 ];
1010 * gets the predicted MV.
1011 * @param n the block index
1012 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
1013 * @param mx the x component of the predicted motion vector
1014 * @param my the y component of the predicted motion vector
1016 static inline void pred_motion(H264Context
* const h
, int n
, int part_width
, int list
, int ref
, int * const mx
, int * const my
){
1017 const int index8
= scan8
[n
];
1018 const int top_ref
= h
->ref_cache
[list
][ index8
- 8 ];
1019 const int left_ref
= h
->ref_cache
[list
][ index8
- 1 ];
1020 const int16_t * const A
= h
->mv_cache
[list
][ index8
- 1 ];
1021 const int16_t * const B
= h
->mv_cache
[list
][ index8
- 8 ];
1023 int diagonal_ref
, match_count
;
1025 assert(part_width
==1 || part_width
==2 || part_width
==4);
1035 diagonal_ref
= fetch_diagonal_mv(h
, &C
, index8
, list
, part_width
);
1036 match_count
= (diagonal_ref
==ref
) + (top_ref
==ref
) + (left_ref
==ref
);
1037 tprintf("pred_motion match_count=%d\n", match_count
);
1038 if(match_count
> 1){ //most common
1039 *mx
= mid_pred(A
[0], B
[0], C
[0]);
1040 *my
= mid_pred(A
[1], B
[1], C
[1]);
1041 }else if(match_count
==1){
1045 }else if(top_ref
==ref
){
1053 if(top_ref
== PART_NOT_AVAILABLE
&& diagonal_ref
== PART_NOT_AVAILABLE
&& left_ref
!= PART_NOT_AVAILABLE
){
1057 *mx
= mid_pred(A
[0], B
[0], C
[0]);
1058 *my
= mid_pred(A
[1], B
[1], C
[1]);
1062 tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref
, B
[0], B
[1], diagonal_ref
, C
[0], C
[1], left_ref
, A
[0], A
[1], ref
, *mx
, *my
, h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
1066 * gets the directionally predicted 16x8 MV.
1067 * @param n the block index
1068 * @param mx the x component of the predicted motion vector
1069 * @param my the y component of the predicted motion vector
1071 static inline void pred_16x8_motion(H264Context
* const h
, int n
, int list
, int ref
, int * const mx
, int * const my
){
1073 const int top_ref
= h
->ref_cache
[list
][ scan8
[0] - 8 ];
1074 const int16_t * const B
= h
->mv_cache
[list
][ scan8
[0] - 8 ];
1076 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref
, B
[0], B
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
1084 const int left_ref
= h
->ref_cache
[list
][ scan8
[8] - 1 ];
1085 const int16_t * const A
= h
->mv_cache
[list
][ scan8
[8] - 1 ];
1087 tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref
, A
[0], A
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
1089 if(left_ref
== ref
){
1097 pred_motion(h
, n
, 4, list
, ref
, mx
, my
);
1101 * gets the directionally predicted 8x16 MV.
1102 * @param n the block index
1103 * @param mx the x component of the predicted motion vector
1104 * @param my the y component of the predicted motion vector
1106 static inline void pred_8x16_motion(H264Context
* const h
, int n
, int list
, int ref
, int * const mx
, int * const my
){
1108 const int left_ref
= h
->ref_cache
[list
][ scan8
[0] - 1 ];
1109 const int16_t * const A
= h
->mv_cache
[list
][ scan8
[0] - 1 ];
1111 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref
, A
[0], A
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
1113 if(left_ref
== ref
){
1122 diagonal_ref
= fetch_diagonal_mv(h
, &C
, scan8
[4], list
, 2);
1124 tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref
, C
[0], C
[1], h
->s
.mb_x
, h
->s
.mb_y
, n
, list
);
1126 if(diagonal_ref
== ref
){
1134 pred_motion(h
, n
, 2, list
, ref
, mx
, my
);
1137 static inline void pred_pskip_motion(H264Context
* const h
, int * const mx
, int * const my
){
1138 const int top_ref
= h
->ref_cache
[0][ scan8
[0] - 8 ];
1139 const int left_ref
= h
->ref_cache
[0][ scan8
[0] - 1 ];
1141 tprintf("pred_pskip: (%d) (%d) at %2d %2d\n", top_ref
, left_ref
, h
->s
.mb_x
, h
->s
.mb_y
);
1143 if(top_ref
== PART_NOT_AVAILABLE
|| left_ref
== PART_NOT_AVAILABLE
1144 || (top_ref
== 0 && *(uint32_t*)h
->mv_cache
[0][ scan8
[0] - 8 ] == 0)
1145 || (left_ref
== 0 && *(uint32_t*)h
->mv_cache
[0][ scan8
[0] - 1 ] == 0)){
1151 pred_motion(h
, 0, 4, 0, 0, mx
, my
);
1156 static inline void direct_dist_scale_factor(H264Context
* const h
){
1157 const int poc
= h
->s
.current_picture_ptr
->poc
;
1158 const int poc1
= h
->ref_list
[1][0].poc
;
1160 for(i
=0; i
<h
->ref_count
[0]; i
++){
1161 int poc0
= h
->ref_list
[0][i
].poc
;
1162 int td
= clip(poc1
- poc0
, -128, 127);
1163 if(td
== 0 /* FIXME || pic0 is a long-term ref */){
1164 h
->dist_scale_factor
[i
] = 256;
1166 int tb
= clip(poc
- poc0
, -128, 127);
1167 int tx
= (16384 + (ABS(td
) >> 1)) / td
;
1168 h
->dist_scale_factor
[i
] = clip((tb
*tx
+ 32) >> 6, -1024, 1023);
1172 static inline void direct_ref_list_init(H264Context
* const h
){
1173 MpegEncContext
* const s
= &h
->s
;
1174 Picture
* const ref1
= &h
->ref_list
[1][0];
1175 Picture
* const cur
= s
->current_picture_ptr
;
1177 if(cur
->pict_type
== I_TYPE
)
1178 cur
->ref_count
[0] = 0;
1179 if(cur
->pict_type
!= B_TYPE
)
1180 cur
->ref_count
[1] = 0;
1181 for(list
=0; list
<2; list
++){
1182 cur
->ref_count
[list
] = h
->ref_count
[list
];
1183 for(j
=0; j
<h
->ref_count
[list
]; j
++)
1184 cur
->ref_poc
[list
][j
] = h
->ref_list
[list
][j
].poc
;
1186 if(cur
->pict_type
!= B_TYPE
|| h
->direct_spatial_mv_pred
)
1188 for(list
=0; list
<2; list
++){
1189 for(i
=0; i
<ref1
->ref_count
[list
]; i
++){
1190 const int poc
= ref1
->ref_poc
[list
][i
];
1191 h
->map_col_to_list0
[list
][i
] = PART_NOT_AVAILABLE
;
1192 for(j
=0; j
<h
->ref_count
[list
]; j
++)
1193 if(h
->ref_list
[list
][j
].poc
== poc
){
1194 h
->map_col_to_list0
[list
][i
] = j
;
1201 static inline void pred_direct_motion(H264Context
* const h
, int *mb_type
){
1202 MpegEncContext
* const s
= &h
->s
;
1203 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
1204 const int b8_xy
= 2*s
->mb_x
+ 2*s
->mb_y
*h
->b8_stride
;
1205 const int b4_xy
= 4*s
->mb_x
+ 4*s
->mb_y
*h
->b_stride
;
1206 const int mb_type_col
= h
->ref_list
[1][0].mb_type
[mb_xy
];
1207 const int16_t (*l1mv0
)[2] = (const int16_t (*)[2]) &h
->ref_list
[1][0].motion_val
[0][b4_xy
];
1208 const int16_t (*l1mv1
)[2] = (const int16_t (*)[2]) &h
->ref_list
[1][0].motion_val
[1][b4_xy
];
1209 const int8_t *l1ref0
= &h
->ref_list
[1][0].ref_index
[0][b8_xy
];
1210 const int8_t *l1ref1
= &h
->ref_list
[1][0].ref_index
[1][b8_xy
];
1211 const int is_b8x8
= IS_8X8(*mb_type
);
1215 if(IS_8X8(mb_type_col
) && !h
->sps
.direct_8x8_inference_flag
){
1216 /* FIXME save sub mb types from previous frames (or derive from MVs)
1217 * so we know exactly what block size to use */
1218 sub_mb_type
= MB_TYPE_8x8
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_4x4 */
1219 *mb_type
= MB_TYPE_8x8
|MB_TYPE_L0L1
;
1220 }else if(!is_b8x8
&& (IS_16X16(mb_type_col
) || IS_INTRA(mb_type_col
))){
1221 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
1222 *mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_16x16 */
1224 sub_mb_type
= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
1225 *mb_type
= MB_TYPE_8x8
|MB_TYPE_L0L1
;
1228 *mb_type
|= MB_TYPE_DIRECT2
;
1230 tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type
, sub_mb_type
, is_b8x8
, mb_type_col
);
1232 if(h
->direct_spatial_mv_pred
){
1237 /* ref = min(neighbors) */
1238 for(list
=0; list
<2; list
++){
1239 int refa
= h
->ref_cache
[list
][scan8
[0] - 1];
1240 int refb
= h
->ref_cache
[list
][scan8
[0] - 8];
1241 int refc
= h
->ref_cache
[list
][scan8
[0] - 8 + 4];
1243 refc
= h
->ref_cache
[list
][scan8
[0] - 8 - 1];
1245 if(ref
[list
] < 0 || (refb
< ref
[list
] && refb
>= 0))
1247 if(ref
[list
] < 0 || (refc
< ref
[list
] && refc
>= 0))
1253 if(ref
[0] < 0 && ref
[1] < 0){
1254 ref
[0] = ref
[1] = 0;
1255 mv
[0][0] = mv
[0][1] =
1256 mv
[1][0] = mv
[1][1] = 0;
1258 for(list
=0; list
<2; list
++){
1260 pred_motion(h
, 0, 4, list
, ref
[list
], &mv
[list
][0], &mv
[list
][1]);
1262 mv
[list
][0] = mv
[list
][1] = 0;
1267 *mb_type
&= ~MB_TYPE_P0L1
;
1268 sub_mb_type
&= ~MB_TYPE_P0L1
;
1269 }else if(ref
[0] < 0){
1270 *mb_type
&= ~MB_TYPE_P0L0
;
1271 sub_mb_type
&= ~MB_TYPE_P0L0
;
1274 if(IS_16X16(*mb_type
)){
1275 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, ref
[0], 1);
1276 fill_rectangle(&h
->ref_cache
[1][scan8
[0]], 4, 4, 8, ref
[1], 1);
1277 if(!IS_INTRA(mb_type_col
)
1278 && ( l1ref0
[0] == 0 && ABS(l1mv0
[0][0]) <= 1 && ABS(l1mv0
[0][1]) <= 1
1279 || l1ref0
[0] < 0 && l1ref1
[0] == 0 && ABS(l1mv1
[0][0]) <= 1 && ABS(l1mv1
[0][1]) <= 1)){
1281 fill_rectangle(&h
->mv_cache
[0][scan8
[0]], 4, 4, 8, pack16to32(mv
[0][0],mv
[0][1]), 4);
1283 fill_rectangle(&h
->mv_cache
[0][scan8
[0]], 4, 4, 8, 0, 4);
1285 fill_rectangle(&h
->mv_cache
[1][scan8
[0]], 4, 4, 8, pack16to32(mv
[1][0],mv
[1][1]), 4);
1287 fill_rectangle(&h
->mv_cache
[1][scan8
[0]], 4, 4, 8, 0, 4);
1289 fill_rectangle(&h
->mv_cache
[0][scan8
[0]], 4, 4, 8, pack16to32(mv
[0][0],mv
[0][1]), 4);
1290 fill_rectangle(&h
->mv_cache
[1][scan8
[0]], 4, 4, 8, pack16to32(mv
[1][0],mv
[1][1]), 4);
1293 for(i8
=0; i8
<4; i8
++){
1294 const int x8
= i8
&1;
1295 const int y8
= i8
>>1;
1297 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1299 h
->sub_mb_type
[i8
] = sub_mb_type
;
1301 fill_rectangle(&h
->mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, pack16to32(mv
[0][0],mv
[0][1]), 4);
1302 fill_rectangle(&h
->mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, pack16to32(mv
[1][0],mv
[1][1]), 4);
1303 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, ref
[0], 1);
1304 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, ref
[1], 1);
1307 if(!IS_INTRA(mb_type_col
) && ( l1ref0
[x8
+ y8
*h
->b8_stride
] == 0
1308 || l1ref0
[x8
+ y8
*h
->b8_stride
] < 0 && l1ref1
[x8
+ y8
*h
->b8_stride
] == 0)){
1309 const int16_t (*l1mv
)[2]= l1ref0
[x8
+ y8
*h
->b8_stride
] == 0 ? l1mv0
: l1mv1
;
1310 for(i4
=0; i4
<4; i4
++){
1311 const int16_t *mv_col
= l1mv
[x8
*2 + (i4
&1) + (y8
*2 + (i4
>>1))*h
->b_stride
];
1312 if(ABS(mv_col
[0]) <= 1 && ABS(mv_col
[1]) <= 1){
1314 *(uint32_t*)h
->mv_cache
[0][scan8
[i8
*4+i4
]] = 0;
1316 *(uint32_t*)h
->mv_cache
[1][scan8
[i8
*4+i4
]] = 0;
1322 }else{ /* direct temporal mv pred */
1323 if(IS_16X16(*mb_type
)){
1324 fill_rectangle(&h
->ref_cache
[1][scan8
[0]], 4, 4, 8, 0, 1);
1325 if(IS_INTRA(mb_type_col
)){
1326 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, 0, 1);
1327 fill_rectangle(&h
-> mv_cache
[0][scan8
[0]], 4, 4, 8, 0, 4);
1328 fill_rectangle(&h
-> mv_cache
[1][scan8
[0]], 4, 4, 8, 0, 4);
1330 const int ref0
= l1ref0
[0] >= 0 ? h
->map_col_to_list0
[0][l1ref0
[0]]
1331 : h
->map_col_to_list0
[1][l1ref1
[0]];
1332 const int dist_scale_factor
= h
->dist_scale_factor
[ref0
];
1333 const int16_t *mv_col
= l1mv0
[0];
1335 mv_l0
[0] = (dist_scale_factor
* mv_col
[0] + 128) >> 8;
1336 mv_l0
[1] = (dist_scale_factor
* mv_col
[1] + 128) >> 8;
1337 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, ref0
, 1);
1338 fill_rectangle(&h
-> mv_cache
[0][scan8
[0]], 4, 4, 8, pack16to32(mv_l0
[0],mv_l0
[1]), 4);
1339 fill_rectangle(&h
-> mv_cache
[1][scan8
[0]], 4, 4, 8, pack16to32(mv_l0
[0]-mv_col
[0],mv_l0
[1]-mv_col
[1]), 4);
1342 for(i8
=0; i8
<4; i8
++){
1343 const int x8
= i8
&1;
1344 const int y8
= i8
>>1;
1345 int ref0
, dist_scale_factor
;
1347 if(is_b8x8
&& !IS_DIRECT(h
->sub_mb_type
[i8
]))
1349 h
->sub_mb_type
[i8
] = sub_mb_type
;
1350 if(IS_INTRA(mb_type_col
)){
1351 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1352 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1353 fill_rectangle(&h
-> mv_cache
[0][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1354 fill_rectangle(&h
-> mv_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 4);
1358 ref0
= l1ref0
[x8
+ y8
*h
->b8_stride
];
1360 ref0
= h
->map_col_to_list0
[0][ref0
];
1362 ref0
= h
->map_col_to_list0
[1][l1ref1
[x8
+ y8
*h
->b8_stride
]];
1363 dist_scale_factor
= h
->dist_scale_factor
[ref0
];
1365 fill_rectangle(&h
->ref_cache
[0][scan8
[i8
*4]], 2, 2, 8, ref0
, 1);
1366 fill_rectangle(&h
->ref_cache
[1][scan8
[i8
*4]], 2, 2, 8, 0, 1);
1367 for(i4
=0; i4
<4; i4
++){
1368 const int16_t *mv_col
= l1mv0
[x8
*2 + (i4
&1) + (y8
*2 + (i4
>>1))*h
->b_stride
];
1369 int16_t *mv_l0
= h
->mv_cache
[0][scan8
[i8
*4+i4
]];
1370 mv_l0
[0] = (dist_scale_factor
* mv_col
[0] + 128) >> 8;
1371 mv_l0
[1] = (dist_scale_factor
* mv_col
[1] + 128) >> 8;
1372 *(uint32_t*)h
->mv_cache
[1][scan8
[i8
*4+i4
]] =
1373 pack16to32(mv_l0
[0]-mv_col
[0],mv_l0
[1]-mv_col
[1]);
1380 static inline void write_back_motion(H264Context
*h
, int mb_type
){
1381 MpegEncContext
* const s
= &h
->s
;
1382 const int b_xy
= 4*s
->mb_x
+ 4*s
->mb_y
*h
->b_stride
;
1383 const int b8_xy
= 2*s
->mb_x
+ 2*s
->mb_y
*h
->b8_stride
;
1386 for(list
=0; list
<2; list
++){
1388 if(!USES_LIST(mb_type
, list
)){
1389 if(1){ //FIXME skip or never read if mb_type doesn't use it
1391 *(uint64_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 0 + y
*h
->b_stride
]=
1392 *(uint64_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 2 + y
*h
->b_stride
]= 0;
1394 if( h
->pps
.cabac
) {
1395 /* FIXME needed ? */
1397 *(uint64_t*)h
->mvd_table
[list
][b_xy
+ 0 + y
*h
->b_stride
]=
1398 *(uint64_t*)h
->mvd_table
[list
][b_xy
+ 2 + y
*h
->b_stride
]= 0;
1402 s
->current_picture
.ref_index
[list
][b8_xy
+ 0 + y
*h
->b8_stride
]=
1403 s
->current_picture
.ref_index
[list
][b8_xy
+ 1 + y
*h
->b8_stride
]= LIST_NOT_USED
;
1410 *(uint64_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 0 + y
*h
->b_stride
]= *(uint64_t*)h
->mv_cache
[list
][scan8
[0]+0 + 8*y
];
1411 *(uint64_t*)s
->current_picture
.motion_val
[list
][b_xy
+ 2 + y
*h
->b_stride
]= *(uint64_t*)h
->mv_cache
[list
][scan8
[0]+2 + 8*y
];
1413 if( h
->pps
.cabac
) {
1415 *(uint64_t*)h
->mvd_table
[list
][b_xy
+ 0 + y
*h
->b_stride
]= *(uint64_t*)h
->mvd_cache
[list
][scan8
[0]+0 + 8*y
];
1416 *(uint64_t*)h
->mvd_table
[list
][b_xy
+ 2 + y
*h
->b_stride
]= *(uint64_t*)h
->mvd_cache
[list
][scan8
[0]+2 + 8*y
];
1420 s
->current_picture
.ref_index
[list
][b8_xy
+ 0 + y
*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[0]+0 + 16*y
];
1421 s
->current_picture
.ref_index
[list
][b8_xy
+ 1 + y
*h
->b8_stride
]= h
->ref_cache
[list
][scan8
[0]+2 + 16*y
];
1425 if(h
->slice_type
== B_TYPE
&& h
->pps
.cabac
){
1426 if(IS_8X8(mb_type
)){
1427 h
->direct_table
[b8_xy
+1+0*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[1]) ? 1 : 0;
1428 h
->direct_table
[b8_xy
+0+1*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[2]) ? 1 : 0;
1429 h
->direct_table
[b8_xy
+1+1*h
->b8_stride
] = IS_DIRECT(h
->sub_mb_type
[3]) ? 1 : 0;
1435 * Decodes a network abstraction layer unit.
1436 * @param consumed is the number of bytes used as input
1437 * @param length is the length of the array
1438 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1439 * @returns decoded bytes, might be src+1 if no escapes
1441 static uint8_t *decode_nal(H264Context
*h
, uint8_t *src
, int *dst_length
, int *consumed
, int length
){
1445 // src[0]&0x80; //forbidden bit
1446 h
->nal_ref_idc
= src
[0]>>5;
1447 h
->nal_unit_type
= src
[0]&0x1F;
1451 for(i
=0; i
<length
; i
++)
1452 printf("%2X ", src
[i
]);
1454 for(i
=0; i
+1<length
; i
+=2){
1455 if(src
[i
]) continue;
1456 if(i
>0 && src
[i
-1]==0) i
--;
1457 if(i
+2<length
&& src
[i
+1]==0 && src
[i
+2]<=3){
1459 /* startcode, so we must be past the end */
1466 if(i
>=length
-1){ //no escaped 0
1467 *dst_length
= length
;
1468 *consumed
= length
+1; //+1 for the header
1472 h
->rbsp_buffer
= av_fast_realloc(h
->rbsp_buffer
, &h
->rbsp_buffer_size
, length
);
1473 dst
= h
->rbsp_buffer
;
1475 //printf("decoding esc\n");
1478 //remove escapes (very rare 1:2^22)
1479 if(si
+2<length
&& src
[si
]==0 && src
[si
+1]==0 && src
[si
+2]<=3){
1480 if(src
[si
+2]==3){ //escape
1485 }else //next start code
1489 dst
[di
++]= src
[si
++];
1493 *consumed
= si
+ 1;//+1 for the header
1494 //FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1500 * @param src the data which should be escaped
1501 * @param dst the target buffer, dst+1 == src is allowed as a special case
1502 * @param length the length of the src data
1503 * @param dst_length the length of the dst array
1504 * @returns length of escaped data in bytes or -1 if an error occured
1506 static int encode_nal(H264Context
*h
, uint8_t *dst
, uint8_t *src
, int length
, int dst_length
){
1507 int i
, escape_count
, si
, di
;
1511 assert(dst_length
>0);
1513 dst
[0]= (h
->nal_ref_idc
<<5) + h
->nal_unit_type
;
1515 if(length
==0) return 1;
1518 for(i
=0; i
<length
; i
+=2){
1519 if(src
[i
]) continue;
1520 if(i
>0 && src
[i
-1]==0)
1522 if(i
+2<length
&& src
[i
+1]==0 && src
[i
+2]<=3){
1528 if(escape_count
==0){
1530 memcpy(dst
+1, src
, length
);
1534 if(length
+ escape_count
+ 1> dst_length
)
1537 //this should be damn rare (hopefully)
1539 h
->rbsp_buffer
= av_fast_realloc(h
->rbsp_buffer
, &h
->rbsp_buffer_size
, length
+ escape_count
);
1540 temp
= h
->rbsp_buffer
;
1541 //printf("encoding esc\n");
1546 if(si
+2<length
&& src
[si
]==0 && src
[si
+1]==0 && src
[si
+2]<=3){
1547 temp
[di
++]= 0; si
++;
1548 temp
[di
++]= 0; si
++;
1550 temp
[di
++]= src
[si
++];
1553 temp
[di
++]= src
[si
++];
1555 memcpy(dst
+1, temp
, length
+escape_count
);
1557 assert(di
== length
+escape_count
);
1563 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1565 static void encode_rbsp_trailing(PutBitContext
*pb
){
1568 length
= (-put_bits_count(pb
))&7;
1569 if(length
) put_bits(pb
, length
, 0);
1574 * identifies the exact end of the bitstream
1575 * @return the length of the trailing, or 0 if damaged
1577 static int decode_rbsp_trailing(uint8_t *src
){
1581 tprintf("rbsp trailing %X\n", v
);
1591 * idct tranforms the 16 dc values and dequantize them.
1592 * @param qp quantization parameter
1594 static void h264_luma_dc_dequant_idct_c(DCTELEM
*block
, int qp
){
1595 const int qmul
= dequant_coeff
[qp
][0];
1598 int temp
[16]; //FIXME check if this is a good idea
1599 static const int x_offset
[4]={0, 1*stride
, 4* stride
, 5*stride
};
1600 static const int y_offset
[4]={0, 2*stride
, 8* stride
, 10*stride
};
1602 //memset(block, 64, 2*256);
1605 const int offset
= y_offset
[i
];
1606 const int z0
= block
[offset
+stride
*0] + block
[offset
+stride
*4];
1607 const int z1
= block
[offset
+stride
*0] - block
[offset
+stride
*4];
1608 const int z2
= block
[offset
+stride
*1] - block
[offset
+stride
*5];
1609 const int z3
= block
[offset
+stride
*1] + block
[offset
+stride
*5];
1618 const int offset
= x_offset
[i
];
1619 const int z0
= temp
[4*0+i
] + temp
[4*2+i
];
1620 const int z1
= temp
[4*0+i
] - temp
[4*2+i
];
1621 const int z2
= temp
[4*1+i
] - temp
[4*3+i
];
1622 const int z3
= temp
[4*1+i
] + temp
[4*3+i
];
1624 block
[stride
*0 +offset
]= ((z0
+ z3
)*qmul
+ 2)>>2; //FIXME think about merging this into decode_resdual
1625 block
[stride
*2 +offset
]= ((z1
+ z2
)*qmul
+ 2)>>2;
1626 block
[stride
*8 +offset
]= ((z1
- z2
)*qmul
+ 2)>>2;
1627 block
[stride
*10+offset
]= ((z0
- z3
)*qmul
+ 2)>>2;
1633 * dct tranforms the 16 dc values.
1634 * @param qp quantization parameter ??? FIXME
1636 static void h264_luma_dc_dct_c(DCTELEM
*block
/*, int qp*/){
1637 // const int qmul= dequant_coeff[qp][0];
1639 int temp
[16]; //FIXME check if this is a good idea
1640 static const int x_offset
[4]={0, 1*stride
, 4* stride
, 5*stride
};
1641 static const int y_offset
[4]={0, 2*stride
, 8* stride
, 10*stride
};
1644 const int offset
= y_offset
[i
];
1645 const int z0
= block
[offset
+stride
*0] + block
[offset
+stride
*4];
1646 const int z1
= block
[offset
+stride
*0] - block
[offset
+stride
*4];
1647 const int z2
= block
[offset
+stride
*1] - block
[offset
+stride
*5];
1648 const int z3
= block
[offset
+stride
*1] + block
[offset
+stride
*5];
1657 const int offset
= x_offset
[i
];
1658 const int z0
= temp
[4*0+i
] + temp
[4*2+i
];
1659 const int z1
= temp
[4*0+i
] - temp
[4*2+i
];
1660 const int z2
= temp
[4*1+i
] - temp
[4*3+i
];
1661 const int z3
= temp
[4*1+i
] + temp
[4*3+i
];
1663 block
[stride
*0 +offset
]= (z0
+ z3
)>>1;
1664 block
[stride
*2 +offset
]= (z1
+ z2
)>>1;
1665 block
[stride
*8 +offset
]= (z1
- z2
)>>1;
1666 block
[stride
*10+offset
]= (z0
- z3
)>>1;
1674 static void chroma_dc_dequant_idct_c(DCTELEM
*block
, int qp
){
1675 const int qmul
= dequant_coeff
[qp
][0];
1676 const int stride
= 16*2;
1677 const int xStride
= 16;
1680 a
= block
[stride
*0 + xStride
*0];
1681 b
= block
[stride
*0 + xStride
*1];
1682 c
= block
[stride
*1 + xStride
*0];
1683 d
= block
[stride
*1 + xStride
*1];
1690 block
[stride
*0 + xStride
*0]= ((a
+c
)*qmul
+ 0)>>1;
1691 block
[stride
*0 + xStride
*1]= ((e
+b
)*qmul
+ 0)>>1;
1692 block
[stride
*1 + xStride
*0]= ((a
-c
)*qmul
+ 0)>>1;
1693 block
[stride
*1 + xStride
*1]= ((e
-b
)*qmul
+ 0)>>1;
1697 static void chroma_dc_dct_c(DCTELEM
*block
){
1698 const int stride
= 16*2;
1699 const int xStride
= 16;
1702 a
= block
[stride
*0 + xStride
*0];
1703 b
= block
[stride
*0 + xStride
*1];
1704 c
= block
[stride
*1 + xStride
*0];
1705 d
= block
[stride
*1 + xStride
*1];
1712 block
[stride
*0 + xStride
*0]= (a
+c
);
1713 block
[stride
*0 + xStride
*1]= (e
+b
);
1714 block
[stride
*1 + xStride
*0]= (a
-c
);
1715 block
[stride
*1 + xStride
*1]= (e
-b
);
1720 * gets the chroma qp.
1722 static inline int get_chroma_qp(int chroma_qp_index_offset
, int qscale
){
1724 return chroma_qp
[clip(qscale
+ chroma_qp_index_offset
, 0, 51)];
1729 static void h264_diff_dct_c(DCTELEM
*block
, uint8_t *src1
, uint8_t *src2
, int stride
){
1731 //FIXME try int temp instead of block
1734 const int d0
= src1
[0 + i
*stride
] - src2
[0 + i
*stride
];
1735 const int d1
= src1
[1 + i
*stride
] - src2
[1 + i
*stride
];
1736 const int d2
= src1
[2 + i
*stride
] - src2
[2 + i
*stride
];
1737 const int d3
= src1
[3 + i
*stride
] - src2
[3 + i
*stride
];
1738 const int z0
= d0
+ d3
;
1739 const int z3
= d0
- d3
;
1740 const int z1
= d1
+ d2
;
1741 const int z2
= d1
- d2
;
1743 block
[0 + 4*i
]= z0
+ z1
;
1744 block
[1 + 4*i
]= 2*z3
+ z2
;
1745 block
[2 + 4*i
]= z0
- z1
;
1746 block
[3 + 4*i
]= z3
- 2*z2
;
1750 const int z0
= block
[0*4 + i
] + block
[3*4 + i
];
1751 const int z3
= block
[0*4 + i
] - block
[3*4 + i
];
1752 const int z1
= block
[1*4 + i
] + block
[2*4 + i
];
1753 const int z2
= block
[1*4 + i
] - block
[2*4 + i
];
1755 block
[0*4 + i
]= z0
+ z1
;
1756 block
[1*4 + i
]= 2*z3
+ z2
;
1757 block
[2*4 + i
]= z0
- z1
;
1758 block
[3*4 + i
]= z3
- 2*z2
;
1763 //FIXME need to check that this doesnt overflow signed 32 bit for low qp, i am not sure, it's very close
1764 //FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1765 static inline int quantize_c(DCTELEM
*block
, uint8_t *scantable
, int qscale
, int intra
, int seperate_dc
){
1767 const int * const quant_table
= quant_coeff
[qscale
];
1768 const int bias
= intra
? (1<<QUANT_SHIFT
)/3 : (1<<QUANT_SHIFT
)/6;
1769 const unsigned int threshold1
= (1<<QUANT_SHIFT
) - bias
- 1;
1770 const unsigned int threshold2
= (threshold1
<<1);
1776 const int dc_bias
= intra
? (1<<(QUANT_SHIFT
-2))/3 : (1<<(QUANT_SHIFT
-2))/6;
1777 const unsigned int dc_threshold1
= (1<<(QUANT_SHIFT
-2)) - dc_bias
- 1;
1778 const unsigned int dc_threshold2
= (dc_threshold1
<<1);
1780 int level
= block
[0]*quant_coeff
[qscale
+18][0];
1781 if(((unsigned)(level
+dc_threshold1
))>dc_threshold2
){
1783 level
= (dc_bias
+ level
)>>(QUANT_SHIFT
-2);
1786 level
= (dc_bias
- level
)>>(QUANT_SHIFT
-2);
1789 // last_non_zero = i;
1794 const int dc_bias
= intra
? (1<<(QUANT_SHIFT
+1))/3 : (1<<(QUANT_SHIFT
+1))/6;
1795 const unsigned int dc_threshold1
= (1<<(QUANT_SHIFT
+1)) - dc_bias
- 1;
1796 const unsigned int dc_threshold2
= (dc_threshold1
<<1);
1798 int level
= block
[0]*quant_table
[0];
1799 if(((unsigned)(level
+dc_threshold1
))>dc_threshold2
){
1801 level
= (dc_bias
+ level
)>>(QUANT_SHIFT
+1);
1804 level
= (dc_bias
- level
)>>(QUANT_SHIFT
+1);
1807 // last_non_zero = i;
1820 const int j
= scantable
[i
];
1821 int level
= block
[j
]*quant_table
[j
];
1823 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1824 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1825 if(((unsigned)(level
+threshold1
))>threshold2
){
1827 level
= (bias
+ level
)>>QUANT_SHIFT
;
1830 level
= (bias
- level
)>>QUANT_SHIFT
;
1839 return last_non_zero
;
1842 static void pred4x4_vertical_c(uint8_t *src
, uint8_t *topright
, int stride
){
1843 const uint32_t a
= ((uint32_t*)(src
-stride
))[0];
1844 ((uint32_t*)(src
+0*stride
))[0]= a
;
1845 ((uint32_t*)(src
+1*stride
))[0]= a
;
1846 ((uint32_t*)(src
+2*stride
))[0]= a
;
1847 ((uint32_t*)(src
+3*stride
))[0]= a
;
1850 static void pred4x4_horizontal_c(uint8_t *src
, uint8_t *topright
, int stride
){
1851 ((uint32_t*)(src
+0*stride
))[0]= src
[-1+0*stride
]*0x01010101;
1852 ((uint32_t*)(src
+1*stride
))[0]= src
[-1+1*stride
]*0x01010101;
1853 ((uint32_t*)(src
+2*stride
))[0]= src
[-1+2*stride
]*0x01010101;
1854 ((uint32_t*)(src
+3*stride
))[0]= src
[-1+3*stride
]*0x01010101;
1857 static void pred4x4_dc_c(uint8_t *src
, uint8_t *topright
, int stride
){
1858 const int dc
= ( src
[-stride
] + src
[1-stride
] + src
[2-stride
] + src
[3-stride
]
1859 + src
[-1+0*stride
] + src
[-1+1*stride
] + src
[-1+2*stride
] + src
[-1+3*stride
] + 4) >>3;
1861 ((uint32_t*)(src
+0*stride
))[0]=
1862 ((uint32_t*)(src
+1*stride
))[0]=
1863 ((uint32_t*)(src
+2*stride
))[0]=
1864 ((uint32_t*)(src
+3*stride
))[0]= dc
* 0x01010101;
1867 static void pred4x4_left_dc_c(uint8_t *src
, uint8_t *topright
, int stride
){
1868 const int dc
= ( src
[-1+0*stride
] + src
[-1+1*stride
] + src
[-1+2*stride
] + src
[-1+3*stride
] + 2) >>2;
1870 ((uint32_t*)(src
+0*stride
))[0]=
1871 ((uint32_t*)(src
+1*stride
))[0]=
1872 ((uint32_t*)(src
+2*stride
))[0]=
1873 ((uint32_t*)(src
+3*stride
))[0]= dc
* 0x01010101;
1876 static void pred4x4_top_dc_c(uint8_t *src
, uint8_t *topright
, int stride
){
1877 const int dc
= ( src
[-stride
] + src
[1-stride
] + src
[2-stride
] + src
[3-stride
] + 2) >>2;
1879 ((uint32_t*)(src
+0*stride
))[0]=
1880 ((uint32_t*)(src
+1*stride
))[0]=
1881 ((uint32_t*)(src
+2*stride
))[0]=
1882 ((uint32_t*)(src
+3*stride
))[0]= dc
* 0x01010101;
1885 static void pred4x4_128_dc_c(uint8_t *src
, uint8_t *topright
, int stride
){
1886 ((uint32_t*)(src
+0*stride
))[0]=
1887 ((uint32_t*)(src
+1*stride
))[0]=
1888 ((uint32_t*)(src
+2*stride
))[0]=
1889 ((uint32_t*)(src
+3*stride
))[0]= 128U*0x01010101U
;
1893 #define LOAD_TOP_RIGHT_EDGE\
1894 const int t4= topright[0];\
1895 const int t5= topright[1];\
1896 const int t6= topright[2];\
1897 const int t7= topright[3];\
1899 #define LOAD_LEFT_EDGE\
1900 const int l0= src[-1+0*stride];\
1901 const int l1= src[-1+1*stride];\
1902 const int l2= src[-1+2*stride];\
1903 const int l3= src[-1+3*stride];\
1905 #define LOAD_TOP_EDGE\
1906 const int t0= src[ 0-1*stride];\
1907 const int t1= src[ 1-1*stride];\
1908 const int t2= src[ 2-1*stride];\
1909 const int t3= src[ 3-1*stride];\
1911 static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1912 const int lt
= src
[-1-1*stride
];
1916 src
[0+3*stride
]=(l3
+ 2*l2
+ l1
+ 2)>>2;
1918 src
[1+3*stride
]=(l2
+ 2*l1
+ l0
+ 2)>>2;
1921 src
[2+3*stride
]=(l1
+ 2*l0
+ lt
+ 2)>>2;
1925 src
[3+3*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
1928 src
[3+2*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
1930 src
[3+1*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
1931 src
[3+0*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
1934 static void pred4x4_down_left_c(uint8_t *src
, uint8_t *topright
, int stride
){
1939 src
[0+0*stride
]=(t0
+ t2
+ 2*t1
+ 2)>>2;
1941 src
[0+1*stride
]=(t1
+ t3
+ 2*t2
+ 2)>>2;
1944 src
[0+2*stride
]=(t2
+ t4
+ 2*t3
+ 2)>>2;
1948 src
[0+3*stride
]=(t3
+ t5
+ 2*t4
+ 2)>>2;
1951 src
[1+3*stride
]=(t4
+ t6
+ 2*t5
+ 2)>>2;
1953 src
[2+3*stride
]=(t5
+ t7
+ 2*t6
+ 2)>>2;
1954 src
[3+3*stride
]=(t6
+ 3*t7
+ 2)>>2;
1957 static void pred4x4_vertical_right_c(uint8_t *src
, uint8_t *topright
, int stride
){
1958 const int lt
= src
[-1-1*stride
];
1961 const __attribute__((unused
)) int unu
= l3
;
1964 src
[1+2*stride
]=(lt
+ t0
+ 1)>>1;
1966 src
[2+2*stride
]=(t0
+ t1
+ 1)>>1;
1968 src
[3+2*stride
]=(t1
+ t2
+ 1)>>1;
1969 src
[3+0*stride
]=(t2
+ t3
+ 1)>>1;
1971 src
[1+3*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
1973 src
[2+3*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
1975 src
[3+3*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
1976 src
[3+1*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
1977 src
[0+2*stride
]=(lt
+ 2*l0
+ l1
+ 2)>>2;
1978 src
[0+3*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
1981 static void pred4x4_vertical_left_c(uint8_t *src
, uint8_t *topright
, int stride
){
1984 const __attribute__((unused
)) int unu
= t7
;
1986 src
[0+0*stride
]=(t0
+ t1
+ 1)>>1;
1988 src
[0+2*stride
]=(t1
+ t2
+ 1)>>1;
1990 src
[1+2*stride
]=(t2
+ t3
+ 1)>>1;
1992 src
[2+2*stride
]=(t3
+ t4
+ 1)>>1;
1993 src
[3+2*stride
]=(t4
+ t5
+ 1)>>1;
1994 src
[0+1*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
1996 src
[0+3*stride
]=(t1
+ 2*t2
+ t3
+ 2)>>2;
1998 src
[1+3*stride
]=(t2
+ 2*t3
+ t4
+ 2)>>2;
2000 src
[2+3*stride
]=(t3
+ 2*t4
+ t5
+ 2)>>2;
2001 src
[3+3*stride
]=(t4
+ 2*t5
+ t6
+ 2)>>2;
2004 static void pred4x4_horizontal_up_c(uint8_t *src
, uint8_t *topright
, int stride
){
2007 src
[0+0*stride
]=(l0
+ l1
+ 1)>>1;
2008 src
[1+0*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
2010 src
[0+1*stride
]=(l1
+ l2
+ 1)>>1;
2012 src
[1+1*stride
]=(l1
+ 2*l2
+ l3
+ 2)>>2;
2014 src
[0+2*stride
]=(l2
+ l3
+ 1)>>1;
2016 src
[1+2*stride
]=(l2
+ 2*l3
+ l3
+ 2)>>2;
2025 static void pred4x4_horizontal_down_c(uint8_t *src
, uint8_t *topright
, int stride
){
2026 const int lt
= src
[-1-1*stride
];
2029 const __attribute__((unused
)) int unu
= t3
;
2032 src
[2+1*stride
]=(lt
+ l0
+ 1)>>1;
2034 src
[3+1*stride
]=(l0
+ 2*lt
+ t0
+ 2)>>2;
2035 src
[2+0*stride
]=(lt
+ 2*t0
+ t1
+ 2)>>2;
2036 src
[3+0*stride
]=(t0
+ 2*t1
+ t2
+ 2)>>2;
2038 src
[2+2*stride
]=(l0
+ l1
+ 1)>>1;
2040 src
[3+2*stride
]=(lt
+ 2*l0
+ l1
+ 2)>>2;
2042 src
[2+3*stride
]=(l1
+ l2
+ 1)>>1;
2044 src
[3+3*stride
]=(l0
+ 2*l1
+ l2
+ 2)>>2;
2045 src
[0+3*stride
]=(l2
+ l3
+ 1)>>1;
2046 src
[1+3*stride
]=(l1
+ 2*l2
+ l3
+ 2)>>2;
2049 static void pred16x16_vertical_c(uint8_t *src
, int stride
){
2051 const uint32_t a
= ((uint32_t*)(src
-stride
))[0];
2052 const uint32_t b
= ((uint32_t*)(src
-stride
))[1];
2053 const uint32_t c
= ((uint32_t*)(src
-stride
))[2];
2054 const uint32_t d
= ((uint32_t*)(src
-stride
))[3];
2056 for(i
=0; i
<16; i
++){
2057 ((uint32_t*)(src
+i
*stride
))[0]= a
;
2058 ((uint32_t*)(src
+i
*stride
))[1]= b
;
2059 ((uint32_t*)(src
+i
*stride
))[2]= c
;
2060 ((uint32_t*)(src
+i
*stride
))[3]= d
;
2064 static void pred16x16_horizontal_c(uint8_t *src
, int stride
){
2067 for(i
=0; i
<16; i
++){
2068 ((uint32_t*)(src
+i
*stride
))[0]=
2069 ((uint32_t*)(src
+i
*stride
))[1]=
2070 ((uint32_t*)(src
+i
*stride
))[2]=
2071 ((uint32_t*)(src
+i
*stride
))[3]= src
[-1+i
*stride
]*0x01010101;
2075 static void pred16x16_dc_c(uint8_t *src
, int stride
){
2079 dc
+= src
[-1+i
*stride
];
2086 dc
= 0x01010101*((dc
+ 16)>>5);
2088 for(i
=0; i
<16; i
++){
2089 ((uint32_t*)(src
+i
*stride
))[0]=
2090 ((uint32_t*)(src
+i
*stride
))[1]=
2091 ((uint32_t*)(src
+i
*stride
))[2]=
2092 ((uint32_t*)(src
+i
*stride
))[3]= dc
;
2096 static void pred16x16_left_dc_c(uint8_t *src
, int stride
){
2100 dc
+= src
[-1+i
*stride
];
2103 dc
= 0x01010101*((dc
+ 8)>>4);
2105 for(i
=0; i
<16; i
++){
2106 ((uint32_t*)(src
+i
*stride
))[0]=
2107 ((uint32_t*)(src
+i
*stride
))[1]=
2108 ((uint32_t*)(src
+i
*stride
))[2]=
2109 ((uint32_t*)(src
+i
*stride
))[3]= dc
;
2113 static void pred16x16_top_dc_c(uint8_t *src
, int stride
){
2119 dc
= 0x01010101*((dc
+ 8)>>4);
2121 for(i
=0; i
<16; i
++){
2122 ((uint32_t*)(src
+i
*stride
))[0]=
2123 ((uint32_t*)(src
+i
*stride
))[1]=
2124 ((uint32_t*)(src
+i
*stride
))[2]=
2125 ((uint32_t*)(src
+i
*stride
))[3]= dc
;
2129 static void pred16x16_128_dc_c(uint8_t *src
, int stride
){
2132 for(i
=0; i
<16; i
++){
2133 ((uint32_t*)(src
+i
*stride
))[0]=
2134 ((uint32_t*)(src
+i
*stride
))[1]=
2135 ((uint32_t*)(src
+i
*stride
))[2]=
2136 ((uint32_t*)(src
+i
*stride
))[3]= 0x01010101U
*128U;
2140 static inline void pred16x16_plane_compat_c(uint8_t *src
, int stride
, const int svq3
){
2143 uint8_t *cm
= cropTbl
+ MAX_NEG_CROP
;
2144 const uint8_t * const src0
= src
+7-stride
;
2145 const uint8_t *src1
= src
+8*stride
-1;
2146 const uint8_t *src2
= src1
-2*stride
; // == src+6*stride-1;
2147 int H
= src0
[1] - src0
[-1];
2148 int V
= src1
[0] - src2
[ 0];
2149 for(k
=2; k
<=8; ++k
) {
2150 src1
+= stride
; src2
-= stride
;
2151 H
+= k
*(src0
[k
] - src0
[-k
]);
2152 V
+= k
*(src1
[0] - src2
[ 0]);
2155 H
= ( 5*(H
/4) ) / 16;
2156 V
= ( 5*(V
/4) ) / 16;
2158 /* required for 100% accuracy */
2159 i
= H
; H
= V
; V
= i
;
2161 H
= ( 5*H
+32 ) >> 6;
2162 V
= ( 5*V
+32 ) >> 6;
2165 a
= 16*(src1
[0] + src2
[16] + 1) - 7*(V
+H
);
2166 for(j
=16; j
>0; --j
) {
2169 for(i
=-16; i
<0; i
+=4) {
2170 src
[16+i
] = cm
[ (b
) >> 5 ];
2171 src
[17+i
] = cm
[ (b
+ H
) >> 5 ];
2172 src
[18+i
] = cm
[ (b
+2*H
) >> 5 ];
2173 src
[19+i
] = cm
[ (b
+3*H
) >> 5 ];
2180 static void pred16x16_plane_c(uint8_t *src
, int stride
){
2181 pred16x16_plane_compat_c(src
, stride
, 0);
2184 static void pred8x8_vertical_c(uint8_t *src
, int stride
){
2186 const uint32_t a
= ((uint32_t*)(src
-stride
))[0];
2187 const uint32_t b
= ((uint32_t*)(src
-stride
))[1];
2190 ((uint32_t*)(src
+i
*stride
))[0]= a
;
2191 ((uint32_t*)(src
+i
*stride
))[1]= b
;
2195 static void pred8x8_horizontal_c(uint8_t *src
, int stride
){
2199 ((uint32_t*)(src
+i
*stride
))[0]=
2200 ((uint32_t*)(src
+i
*stride
))[1]= src
[-1+i
*stride
]*0x01010101;
2204 static void pred8x8_128_dc_c(uint8_t *src
, int stride
){
2208 ((uint32_t*)(src
+i
*stride
))[0]=
2209 ((uint32_t*)(src
+i
*stride
))[1]= 0x01010101U
*128U;
2213 static void pred8x8_left_dc_c(uint8_t *src
, int stride
){
2219 dc0
+= src
[-1+i
*stride
];
2220 dc2
+= src
[-1+(i
+4)*stride
];
2222 dc0
= 0x01010101*((dc0
+ 2)>>2);
2223 dc2
= 0x01010101*((dc2
+ 2)>>2);
2226 ((uint32_t*)(src
+i
*stride
))[0]=
2227 ((uint32_t*)(src
+i
*stride
))[1]= dc0
;
2230 ((uint32_t*)(src
+i
*stride
))[0]=
2231 ((uint32_t*)(src
+i
*stride
))[1]= dc2
;
2235 static void pred8x8_top_dc_c(uint8_t *src
, int stride
){
2241 dc0
+= src
[i
-stride
];
2242 dc1
+= src
[4+i
-stride
];
2244 dc0
= 0x01010101*((dc0
+ 2)>>2);
2245 dc1
= 0x01010101*((dc1
+ 2)>>2);
2248 ((uint32_t*)(src
+i
*stride
))[0]= dc0
;
2249 ((uint32_t*)(src
+i
*stride
))[1]= dc1
;
2252 ((uint32_t*)(src
+i
*stride
))[0]= dc0
;
2253 ((uint32_t*)(src
+i
*stride
))[1]= dc1
;
2258 static void pred8x8_dc_c(uint8_t *src
, int stride
){
2260 int dc0
, dc1
, dc2
, dc3
;
2264 dc0
+= src
[-1+i
*stride
] + src
[i
-stride
];
2265 dc1
+= src
[4+i
-stride
];
2266 dc2
+= src
[-1+(i
+4)*stride
];
2268 dc3
= 0x01010101*((dc1
+ dc2
+ 4)>>3);
2269 dc0
= 0x01010101*((dc0
+ 4)>>3);
2270 dc1
= 0x01010101*((dc1
+ 2)>>2);
2271 dc2
= 0x01010101*((dc2
+ 2)>>2);
2274 ((uint32_t*)(src
+i
*stride
))[0]= dc0
;
2275 ((uint32_t*)(src
+i
*stride
))[1]= dc1
;
2278 ((uint32_t*)(src
+i
*stride
))[0]= dc2
;
2279 ((uint32_t*)(src
+i
*stride
))[1]= dc3
;
2283 static void pred8x8_plane_c(uint8_t *src
, int stride
){
2286 uint8_t *cm
= cropTbl
+ MAX_NEG_CROP
;
2287 const uint8_t * const src0
= src
+3-stride
;
2288 const uint8_t *src1
= src
+4*stride
-1;
2289 const uint8_t *src2
= src1
-2*stride
; // == src+2*stride-1;
2290 int H
= src0
[1] - src0
[-1];
2291 int V
= src1
[0] - src2
[ 0];
2292 for(k
=2; k
<=4; ++k
) {
2293 src1
+= stride
; src2
-= stride
;
2294 H
+= k
*(src0
[k
] - src0
[-k
]);
2295 V
+= k
*(src1
[0] - src2
[ 0]);
2297 H
= ( 17*H
+16 ) >> 5;
2298 V
= ( 17*V
+16 ) >> 5;
2300 a
= 16*(src1
[0] + src2
[8]+1) - 3*(V
+H
);
2301 for(j
=8; j
>0; --j
) {
2304 src
[0] = cm
[ (b
) >> 5 ];
2305 src
[1] = cm
[ (b
+ H
) >> 5 ];
2306 src
[2] = cm
[ (b
+2*H
) >> 5 ];
2307 src
[3] = cm
[ (b
+3*H
) >> 5 ];
2308 src
[4] = cm
[ (b
+4*H
) >> 5 ];
2309 src
[5] = cm
[ (b
+5*H
) >> 5 ];
2310 src
[6] = cm
[ (b
+6*H
) >> 5 ];
2311 src
[7] = cm
[ (b
+7*H
) >> 5 ];
2316 #define SRC(x,y) src[(x)+(y)*stride]
2318 const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
2319 #define PREDICT_8x8_LOAD_LEFT \
2320 const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
2321 + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
2322 PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
2323 const int l7 = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
2326 const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2327 #define PREDICT_8x8_LOAD_TOP \
2328 const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
2329 + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
2330 PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
2331 const int t7 = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
2332 + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
2335 t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
2336 #define PREDICT_8x8_LOAD_TOPRIGHT \
2337 int t8, t9, t10, t11, t12, t13, t14, t15; \
2338 if(has_topright) { \
2339 PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
2340 t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
2341 } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
2343 #define PREDICT_8x8_LOAD_TOPLEFT \
2344 const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
2346 #define PREDICT_8x8_DC(v) \
2348 for( y = 0; y < 8; y++ ) { \
2349 ((uint32_t*)src)[0] = \
2350 ((uint32_t*)src)[1] = v; \
2354 static void pred8x8l_128_dc_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2356 PREDICT_8x8_DC(0x80808080);
2358 static void pred8x8l_left_dc_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2360 PREDICT_8x8_LOAD_LEFT
;
2361 const uint32_t dc
= ((l0
+l1
+l2
+l3
+l4
+l5
+l6
+l7
+4) >> 3) * 0x01010101;
2364 static void pred8x8l_top_dc_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2366 PREDICT_8x8_LOAD_TOP
;
2367 const uint32_t dc
= ((t0
+t1
+t2
+t3
+t4
+t5
+t6
+t7
+4) >> 3) * 0x01010101;
2370 static void pred8x8l_dc_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2372 PREDICT_8x8_LOAD_LEFT
;
2373 PREDICT_8x8_LOAD_TOP
;
2374 const uint32_t dc
= ((l0
+l1
+l2
+l3
+l4
+l5
+l6
+l7
2375 +t0
+t1
+t2
+t3
+t4
+t5
+t6
+t7
+8) >> 4) * 0x01010101;
2378 static void pred8x8l_horizontal_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2380 PREDICT_8x8_LOAD_LEFT
;
2381 #define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
2382 ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
2383 ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
2386 static void pred8x8l_vertical_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2389 PREDICT_8x8_LOAD_TOP
;
2398 for( y
= 1; y
< 8; y
++ )
2399 *(uint64_t*)(src
+y
*stride
) = *(uint64_t*)src
;
2401 static void pred8x8l_down_left_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2403 PREDICT_8x8_LOAD_TOP
;
2404 PREDICT_8x8_LOAD_TOPRIGHT
;
2405 SRC(0,0)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
2406 SRC(0,1)=SRC(1,0)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
2407 SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
2408 SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
2409 SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
2410 SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
2411 SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6
+ 2*t7
+ t8
+ 2) >> 2;
2412 SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7
+ 2*t8
+ t9
+ 2) >> 2;
2413 SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8
+ 2*t9
+ t10
+ 2) >> 2;
2414 SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9
+ 2*t10
+ t11
+ 2) >> 2;
2415 SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10
+ 2*t11
+ t12
+ 2) >> 2;
2416 SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11
+ 2*t12
+ t13
+ 2) >> 2;
2417 SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12
+ 2*t13
+ t14
+ 2) >> 2;
2418 SRC(6,7)=SRC(7,6)= (t13
+ 2*t14
+ t15
+ 2) >> 2;
2419 SRC(7,7)= (t14
+ 3*t15
+ 2) >> 2;
2421 static void pred8x8l_down_right_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2423 PREDICT_8x8_LOAD_TOP
;
2424 PREDICT_8x8_LOAD_LEFT
;
2425 PREDICT_8x8_LOAD_TOPLEFT
;
2426 SRC(0,7)= (l7
+ 2*l6
+ l5
+ 2) >> 2;
2427 SRC(0,6)=SRC(1,7)= (l6
+ 2*l5
+ l4
+ 2) >> 2;
2428 SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5
+ 2*l4
+ l3
+ 2) >> 2;
2429 SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4
+ 2*l3
+ l2
+ 2) >> 2;
2430 SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3
+ 2*l2
+ l1
+ 2) >> 2;
2431 SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2
+ 2*l1
+ l0
+ 2) >> 2;
2432 SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1
+ 2*l0
+ lt
+ 2) >> 2;
2433 SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
2434 SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt
+ 2*t0
+ t1
+ 2) >> 2;
2435 SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
2436 SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
2437 SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
2438 SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
2439 SRC(6,0)=SRC(7,1)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
2440 SRC(7,0)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
2443 static void pred8x8l_vertical_right_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2445 PREDICT_8x8_LOAD_TOP
;
2446 PREDICT_8x8_LOAD_LEFT
;
2447 PREDICT_8x8_LOAD_TOPLEFT
;
2448 SRC(0,6)= (l5
+ 2*l4
+ l3
+ 2) >> 2;
2449 SRC(0,7)= (l6
+ 2*l5
+ l4
+ 2) >> 2;
2450 SRC(0,4)=SRC(1,6)= (l3
+ 2*l2
+ l1
+ 2) >> 2;
2451 SRC(0,5)=SRC(1,7)= (l4
+ 2*l3
+ l2
+ 2) >> 2;
2452 SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1
+ 2*l0
+ lt
+ 2) >> 2;
2453 SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2
+ 2*l1
+ l0
+ 2) >> 2;
2454 SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
2455 SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt
+ t0
+ 1) >> 1;
2456 SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt
+ 2*t0
+ t1
+ 2) >> 2;
2457 SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0
+ t1
+ 1) >> 1;
2458 SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
2459 SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1
+ t2
+ 1) >> 1;
2460 SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
2461 SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2
+ t3
+ 1) >> 1;
2462 SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
2463 SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3
+ t4
+ 1) >> 1;
2464 SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
2465 SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4
+ t5
+ 1) >> 1;
2466 SRC(6,1)=SRC(7,3)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
2467 SRC(6,0)=SRC(7,2)= (t5
+ t6
+ 1) >> 1;
2468 SRC(7,1)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
2469 SRC(7,0)= (t6
+ t7
+ 1) >> 1;
2471 static void pred8x8l_horizontal_down_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2473 PREDICT_8x8_LOAD_TOP
;
2474 PREDICT_8x8_LOAD_LEFT
;
2475 PREDICT_8x8_LOAD_TOPLEFT
;
2476 SRC(0,7)= (l6
+ l7
+ 1) >> 1;
2477 SRC(1,7)= (l5
+ 2*l6
+ l7
+ 2) >> 2;
2478 SRC(0,6)=SRC(2,7)= (l5
+ l6
+ 1) >> 1;
2479 SRC(1,6)=SRC(3,7)= (l4
+ 2*l5
+ l6
+ 2) >> 2;
2480 SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4
+ l5
+ 1) >> 1;
2481 SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3
+ 2*l4
+ l5
+ 2) >> 2;
2482 SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3
+ l4
+ 1) >> 1;
2483 SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2
+ 2*l3
+ l4
+ 2) >> 2;
2484 SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2
+ l3
+ 1) >> 1;
2485 SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1
+ 2*l2
+ l3
+ 2) >> 2;
2486 SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1
+ l2
+ 1) >> 1;
2487 SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0
+ 2*l1
+ l2
+ 2) >> 2;
2488 SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0
+ l1
+ 1) >> 1;
2489 SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt
+ 2*l0
+ l1
+ 2) >> 2;
2490 SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt
+ l0
+ 1) >> 1;
2491 SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0
+ 2*lt
+ t0
+ 2) >> 2;
2492 SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1
+ 2*t0
+ lt
+ 2) >> 2;
2493 SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2
+ 2*t1
+ t0
+ 2) >> 2;
2494 SRC(4,0)=SRC(6,1)= (t3
+ 2*t2
+ t1
+ 2) >> 2;
2495 SRC(5,0)=SRC(7,1)= (t4
+ 2*t3
+ t2
+ 2) >> 2;
2496 SRC(6,0)= (t5
+ 2*t4
+ t3
+ 2) >> 2;
2497 SRC(7,0)= (t6
+ 2*t5
+ t4
+ 2) >> 2;
2499 static void pred8x8l_vertical_left_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2501 PREDICT_8x8_LOAD_TOP
;
2502 PREDICT_8x8_LOAD_TOPRIGHT
;
2503 SRC(0,0)= (t0
+ t1
+ 1) >> 1;
2504 SRC(0,1)= (t0
+ 2*t1
+ t2
+ 2) >> 2;
2505 SRC(0,2)=SRC(1,0)= (t1
+ t2
+ 1) >> 1;
2506 SRC(0,3)=SRC(1,1)= (t1
+ 2*t2
+ t3
+ 2) >> 2;
2507 SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2
+ t3
+ 1) >> 1;
2508 SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2
+ 2*t3
+ t4
+ 2) >> 2;
2509 SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3
+ t4
+ 1) >> 1;
2510 SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3
+ 2*t4
+ t5
+ 2) >> 2;
2511 SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4
+ t5
+ 1) >> 1;
2512 SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4
+ 2*t5
+ t6
+ 2) >> 2;
2513 SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5
+ t6
+ 1) >> 1;
2514 SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5
+ 2*t6
+ t7
+ 2) >> 2;
2515 SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6
+ t7
+ 1) >> 1;
2516 SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6
+ 2*t7
+ t8
+ 2) >> 2;
2517 SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7
+ t8
+ 1) >> 1;
2518 SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7
+ 2*t8
+ t9
+ 2) >> 2;
2519 SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8
+ t9
+ 1) >> 1;
2520 SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8
+ 2*t9
+ t10
+ 2) >> 2;
2521 SRC(6,6)=SRC(7,4)= (t9
+ t10
+ 1) >> 1;
2522 SRC(6,7)=SRC(7,5)= (t9
+ 2*t10
+ t11
+ 2) >> 2;
2523 SRC(7,6)= (t10
+ t11
+ 1) >> 1;
2524 SRC(7,7)= (t10
+ 2*t11
+ t12
+ 2) >> 2;
2526 static void pred8x8l_horizontal_up_c(uint8_t *src
, int has_topleft
, int has_topright
, int stride
)
2528 PREDICT_8x8_LOAD_LEFT
;
2529 SRC(0,0)= (l0
+ l1
+ 1) >> 1;
2530 SRC(1,0)= (l0
+ 2*l1
+ l2
+ 2) >> 2;
2531 SRC(0,1)=SRC(2,0)= (l1
+ l2
+ 1) >> 1;
2532 SRC(1,1)=SRC(3,0)= (l1
+ 2*l2
+ l3
+ 2) >> 2;
2533 SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2
+ l3
+ 1) >> 1;
2534 SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2
+ 2*l3
+ l4
+ 2) >> 2;
2535 SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3
+ l4
+ 1) >> 1;
2536 SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3
+ 2*l4
+ l5
+ 2) >> 2;
2537 SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4
+ l5
+ 1) >> 1;
2538 SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4
+ 2*l5
+ l6
+ 2) >> 2;
2539 SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5
+ l6
+ 1) >> 1;
2540 SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5
+ 2*l6
+ l7
+ 2) >> 2;
2541 SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6
+ l7
+ 1) >> 1;
2542 SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6
+ 3*l7
+ 2) >> 2;
2543 SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
2544 SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
2545 SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
2546 SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7
;
2548 #undef PREDICT_8x8_LOAD_LEFT
2549 #undef PREDICT_8x8_LOAD_TOP
2550 #undef PREDICT_8x8_LOAD_TOPLEFT
2551 #undef PREDICT_8x8_LOAD_TOPRIGHT
2552 #undef PREDICT_8x8_DC
2558 static inline void mc_dir_part(H264Context
*h
, Picture
*pic
, int n
, int square
, int chroma_height
, int delta
, int list
,
2559 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
2560 int src_x_offset
, int src_y_offset
,
2561 qpel_mc_func
*qpix_op
, h264_chroma_mc_func chroma_op
){
2562 MpegEncContext
* const s
= &h
->s
;
2563 const int mx
= h
->mv_cache
[list
][ scan8
[n
] ][0] + src_x_offset
*8;
2564 const int my
= h
->mv_cache
[list
][ scan8
[n
] ][1] + src_y_offset
*8;
2565 const int luma_xy
= (mx
&3) + ((my
&3)<<2);
2566 uint8_t * src_y
= pic
->data
[0] + (mx
>>2) + (my
>>2)*s
->linesize
;
2567 uint8_t * src_cb
= pic
->data
[1] + (mx
>>3) + (my
>>3)*s
->uvlinesize
;
2568 uint8_t * src_cr
= pic
->data
[2] + (mx
>>3) + (my
>>3)*s
->uvlinesize
;
2569 int extra_width
= (s
->flags
&CODEC_FLAG_EMU_EDGE
) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
2570 int extra_height
= extra_width
;
2572 const int full_mx
= mx
>>2;
2573 const int full_my
= my
>>2;
2575 assert(pic
->data
[0]);
2577 if(mx
&7) extra_width
-= 3;
2578 if(my
&7) extra_height
-= 3;
2580 if( full_mx
< 0-extra_width
2581 || full_my
< 0-extra_height
2582 || full_mx
+ 16/*FIXME*/ > s
->width
+ extra_width
2583 || full_my
+ 16/*FIXME*/ > s
->height
+ extra_height
){
2584 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_y
- 2 - 2*s
->linesize
, s
->linesize
, 16+5, 16+5/*FIXME*/, full_mx
-2, full_my
-2, s
->width
, s
->height
);
2585 src_y
= s
->edge_emu_buffer
+ 2 + 2*s
->linesize
;
2589 qpix_op
[luma_xy
](dest_y
, src_y
, s
->linesize
); //FIXME try variable height perhaps?
2591 qpix_op
[luma_xy
](dest_y
+ delta
, src_y
+ delta
, s
->linesize
);
2594 if(s
->flags
&CODEC_FLAG_GRAY
) return;
2597 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_cb
, s
->uvlinesize
, 9, 9/*FIXME*/, (mx
>>3), (my
>>3), s
->width
>>1, s
->height
>>1);
2598 src_cb
= s
->edge_emu_buffer
;
2600 chroma_op(dest_cb
, src_cb
, s
->uvlinesize
, chroma_height
, mx
&7, my
&7);
2603 ff_emulated_edge_mc(s
->edge_emu_buffer
, src_cr
, s
->uvlinesize
, 9, 9/*FIXME*/, (mx
>>3), (my
>>3), s
->width
>>1, s
->height
>>1);
2604 src_cr
= s
->edge_emu_buffer
;
2606 chroma_op(dest_cr
, src_cr
, s
->uvlinesize
, chroma_height
, mx
&7, my
&7);
2609 static inline void mc_part_std(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
2610 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
2611 int x_offset
, int y_offset
,
2612 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
2613 qpel_mc_func
*qpix_avg
, h264_chroma_mc_func chroma_avg
,
2614 int list0
, int list1
){
2615 MpegEncContext
* const s
= &h
->s
;
2616 qpel_mc_func
*qpix_op
= qpix_put
;
2617 h264_chroma_mc_func chroma_op
= chroma_put
;
2619 dest_y
+= 2*x_offset
+ 2*y_offset
*s
-> linesize
;
2620 dest_cb
+= x_offset
+ y_offset
*s
->uvlinesize
;
2621 dest_cr
+= x_offset
+ y_offset
*s
->uvlinesize
;
2622 x_offset
+= 8*s
->mb_x
;
2623 y_offset
+= 8*s
->mb_y
;
2626 Picture
*ref
= &h
->ref_list
[0][ h
->ref_cache
[0][ scan8
[n
] ] ];
2627 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, 0,
2628 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
2629 qpix_op
, chroma_op
);
2632 chroma_op
= chroma_avg
;
2636 Picture
*ref
= &h
->ref_list
[1][ h
->ref_cache
[1][ scan8
[n
] ] ];
2637 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, 1,
2638 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
2639 qpix_op
, chroma_op
);
2643 static inline void mc_part_weighted(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
2644 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
2645 int x_offset
, int y_offset
,
2646 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
2647 h264_weight_func luma_weight_op
, h264_weight_func chroma_weight_op
,
2648 h264_biweight_func luma_weight_avg
, h264_biweight_func chroma_weight_avg
,
2649 int list0
, int list1
){
2650 MpegEncContext
* const s
= &h
->s
;
2652 dest_y
+= 2*x_offset
+ 2*y_offset
*s
-> linesize
;
2653 dest_cb
+= x_offset
+ y_offset
*s
->uvlinesize
;
2654 dest_cr
+= x_offset
+ y_offset
*s
->uvlinesize
;
2655 x_offset
+= 8*s
->mb_x
;
2656 y_offset
+= 8*s
->mb_y
;
2659 /* don't optimize for luma-only case, since B-frames usually
2660 * use implicit weights => chroma too. */
2661 uint8_t *tmp_cb
= s
->obmc_scratchpad
;
2662 uint8_t *tmp_cr
= tmp_cb
+ 8*s
->uvlinesize
;
2663 uint8_t *tmp_y
= tmp_cr
+ 8*s
->uvlinesize
;
2664 int refn0
= h
->ref_cache
[0][ scan8
[n
] ];
2665 int refn1
= h
->ref_cache
[1][ scan8
[n
] ];
2667 mc_dir_part(h
, &h
->ref_list
[0][refn0
], n
, square
, chroma_height
, delta
, 0,
2668 dest_y
, dest_cb
, dest_cr
,
2669 x_offset
, y_offset
, qpix_put
, chroma_put
);
2670 mc_dir_part(h
, &h
->ref_list
[1][refn1
], n
, square
, chroma_height
, delta
, 1,
2671 tmp_y
, tmp_cb
, tmp_cr
,
2672 x_offset
, y_offset
, qpix_put
, chroma_put
);
2674 if(h
->use_weight
== 2){
2675 int weight0
= h
->implicit_weight
[refn0
][refn1
];
2676 int weight1
= 64 - weight0
;
2677 luma_weight_avg( dest_y
, tmp_y
, s
-> linesize
, 5, weight0
, weight1
, 0, 0);
2678 chroma_weight_avg(dest_cb
, tmp_cb
, s
->uvlinesize
, 5, weight0
, weight1
, 0, 0);
2679 chroma_weight_avg(dest_cr
, tmp_cr
, s
->uvlinesize
, 5, weight0
, weight1
, 0, 0);
2681 luma_weight_avg(dest_y
, tmp_y
, s
->linesize
, h
->luma_log2_weight_denom
,
2682 h
->luma_weight
[0][refn0
], h
->luma_weight
[1][refn1
],
2683 h
->luma_offset
[0][refn0
], h
->luma_offset
[1][refn1
]);
2684 chroma_weight_avg(dest_cb
, tmp_cb
, s
->uvlinesize
, h
->chroma_log2_weight_denom
,
2685 h
->chroma_weight
[0][refn0
][0], h
->chroma_weight
[1][refn1
][0],
2686 h
->chroma_offset
[0][refn0
][0], h
->chroma_offset
[1][refn1
][0]);
2687 chroma_weight_avg(dest_cr
, tmp_cr
, s
->uvlinesize
, h
->chroma_log2_weight_denom
,
2688 h
->chroma_weight
[0][refn0
][1], h
->chroma_weight
[1][refn1
][1],
2689 h
->chroma_offset
[0][refn0
][1], h
->chroma_offset
[1][refn1
][1]);
2692 int list
= list1
? 1 : 0;
2693 int refn
= h
->ref_cache
[list
][ scan8
[n
] ];
2694 Picture
*ref
= &h
->ref_list
[list
][refn
];
2695 mc_dir_part(h
, ref
, n
, square
, chroma_height
, delta
, list
,
2696 dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
2697 qpix_put
, chroma_put
);
2699 luma_weight_op(dest_y
, s
->linesize
, h
->luma_log2_weight_denom
,
2700 h
->luma_weight
[list
][refn
], h
->luma_offset
[list
][refn
]);
2701 if(h
->use_weight_chroma
){
2702 chroma_weight_op(dest_cb
, s
->uvlinesize
, h
->chroma_log2_weight_denom
,
2703 h
->chroma_weight
[list
][refn
][0], h
->chroma_offset
[list
][refn
][0]);
2704 chroma_weight_op(dest_cr
, s
->uvlinesize
, h
->chroma_log2_weight_denom
,
2705 h
->chroma_weight
[list
][refn
][1], h
->chroma_offset
[list
][refn
][1]);
2710 static inline void mc_part(H264Context
*h
, int n
, int square
, int chroma_height
, int delta
,
2711 uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
2712 int x_offset
, int y_offset
,
2713 qpel_mc_func
*qpix_put
, h264_chroma_mc_func chroma_put
,
2714 qpel_mc_func
*qpix_avg
, h264_chroma_mc_func chroma_avg
,
2715 h264_weight_func
*weight_op
, h264_biweight_func
*weight_avg
,
2716 int list0
, int list1
){
2717 if((h
->use_weight
==2 && list0
&& list1
2718 && (h
->implicit_weight
[ h
->ref_cache
[0][scan8
[n
]] ][ h
->ref_cache
[1][scan8
[n
]] ] != 32))
2719 || h
->use_weight
==1)
2720 mc_part_weighted(h
, n
, square
, chroma_height
, delta
, dest_y
, dest_cb
, dest_cr
,
2721 x_offset
, y_offset
, qpix_put
, chroma_put
,
2722 weight_op
[0], weight_op
[3], weight_avg
[0], weight_avg
[3], list0
, list1
);
2724 mc_part_std(h
, n
, square
, chroma_height
, delta
, dest_y
, dest_cb
, dest_cr
,
2725 x_offset
, y_offset
, qpix_put
, chroma_put
, qpix_avg
, chroma_avg
, list0
, list1
);
2728 static void hl_motion(H264Context
*h
, uint8_t *dest_y
, uint8_t *dest_cb
, uint8_t *dest_cr
,
2729 qpel_mc_func (*qpix_put
)[16], h264_chroma_mc_func (*chroma_put
),
2730 qpel_mc_func (*qpix_avg
)[16], h264_chroma_mc_func (*chroma_avg
),
2731 h264_weight_func
*weight_op
, h264_biweight_func
*weight_avg
){
2732 MpegEncContext
* const s
= &h
->s
;
2733 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
2734 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
2736 assert(IS_INTER(mb_type
));
2738 if(IS_16X16(mb_type
)){
2739 mc_part(h
, 0, 1, 8, 0, dest_y
, dest_cb
, dest_cr
, 0, 0,
2740 qpix_put
[0], chroma_put
[0], qpix_avg
[0], chroma_avg
[0],
2741 &weight_op
[0], &weight_avg
[0],
2742 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
2743 }else if(IS_16X8(mb_type
)){
2744 mc_part(h
, 0, 0, 4, 8, dest_y
, dest_cb
, dest_cr
, 0, 0,
2745 qpix_put
[1], chroma_put
[0], qpix_avg
[1], chroma_avg
[0],
2746 &weight_op
[1], &weight_avg
[1],
2747 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
2748 mc_part(h
, 8, 0, 4, 8, dest_y
, dest_cb
, dest_cr
, 0, 4,
2749 qpix_put
[1], chroma_put
[0], qpix_avg
[1], chroma_avg
[0],
2750 &weight_op
[1], &weight_avg
[1],
2751 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1));
2752 }else if(IS_8X16(mb_type
)){
2753 mc_part(h
, 0, 0, 8, 8*s
->linesize
, dest_y
, dest_cb
, dest_cr
, 0, 0,
2754 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
2755 &weight_op
[2], &weight_avg
[2],
2756 IS_DIR(mb_type
, 0, 0), IS_DIR(mb_type
, 0, 1));
2757 mc_part(h
, 4, 0, 8, 8*s
->linesize
, dest_y
, dest_cb
, dest_cr
, 4, 0,
2758 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
2759 &weight_op
[2], &weight_avg
[2],
2760 IS_DIR(mb_type
, 1, 0), IS_DIR(mb_type
, 1, 1));
2764 assert(IS_8X8(mb_type
));
2767 const int sub_mb_type
= h
->sub_mb_type
[i
];
2769 int x_offset
= (i
&1)<<2;
2770 int y_offset
= (i
&2)<<1;
2772 if(IS_SUB_8X8(sub_mb_type
)){
2773 mc_part(h
, n
, 1, 4, 0, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
2774 qpix_put
[1], chroma_put
[1], qpix_avg
[1], chroma_avg
[1],
2775 &weight_op
[3], &weight_avg
[3],
2776 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
2777 }else if(IS_SUB_8X4(sub_mb_type
)){
2778 mc_part(h
, n
, 0, 2, 4, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
2779 qpix_put
[2], chroma_put
[1], qpix_avg
[2], chroma_avg
[1],
2780 &weight_op
[4], &weight_avg
[4],
2781 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
2782 mc_part(h
, n
+2, 0, 2, 4, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
+2,
2783 qpix_put
[2], chroma_put
[1], qpix_avg
[2], chroma_avg
[1],
2784 &weight_op
[4], &weight_avg
[4],
2785 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
2786 }else if(IS_SUB_4X8(sub_mb_type
)){
2787 mc_part(h
, n
, 0, 4, 4*s
->linesize
, dest_y
, dest_cb
, dest_cr
, x_offset
, y_offset
,
2788 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
2789 &weight_op
[5], &weight_avg
[5],
2790 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
2791 mc_part(h
, n
+1, 0, 4, 4*s
->linesize
, dest_y
, dest_cb
, dest_cr
, x_offset
+2, y_offset
,
2792 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
2793 &weight_op
[5], &weight_avg
[5],
2794 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
2797 assert(IS_SUB_4X4(sub_mb_type
));
2799 int sub_x_offset
= x_offset
+ 2*(j
&1);
2800 int sub_y_offset
= y_offset
+ (j
&2);
2801 mc_part(h
, n
+j
, 1, 2, 0, dest_y
, dest_cb
, dest_cr
, sub_x_offset
, sub_y_offset
,
2802 qpix_put
[2], chroma_put
[2], qpix_avg
[2], chroma_avg
[2],
2803 &weight_op
[6], &weight_avg
[6],
2804 IS_DIR(sub_mb_type
, 0, 0), IS_DIR(sub_mb_type
, 0, 1));
2811 static void decode_init_vlc(H264Context
*h
){
2812 static int done
= 0;
2818 init_vlc(&chroma_dc_coeff_token_vlc
, CHROMA_DC_COEFF_TOKEN_VLC_BITS
, 4*5,
2819 &chroma_dc_coeff_token_len
[0], 1, 1,
2820 &chroma_dc_coeff_token_bits
[0], 1, 1, 1);
2823 init_vlc(&coeff_token_vlc
[i
], COEFF_TOKEN_VLC_BITS
, 4*17,
2824 &coeff_token_len
[i
][0], 1, 1,
2825 &coeff_token_bits
[i
][0], 1, 1, 1);
2829 init_vlc(&chroma_dc_total_zeros_vlc
[i
], CHROMA_DC_TOTAL_ZEROS_VLC_BITS
, 4,
2830 &chroma_dc_total_zeros_len
[i
][0], 1, 1,
2831 &chroma_dc_total_zeros_bits
[i
][0], 1, 1, 1);
2833 for(i
=0; i
<15; i
++){
2834 init_vlc(&total_zeros_vlc
[i
], TOTAL_ZEROS_VLC_BITS
, 16,
2835 &total_zeros_len
[i
][0], 1, 1,
2836 &total_zeros_bits
[i
][0], 1, 1, 1);
2840 init_vlc(&run_vlc
[i
], RUN_VLC_BITS
, 7,
2841 &run_len
[i
][0], 1, 1,
2842 &run_bits
[i
][0], 1, 1, 1);
2844 init_vlc(&run7_vlc
, RUN7_VLC_BITS
, 16,
2845 &run_len
[6][0], 1, 1,
2846 &run_bits
[6][0], 1, 1, 1);
2851 * Sets the intra prediction function pointers.
2853 static void init_pred_ptrs(H264Context
*h
){
2854 // MpegEncContext * const s = &h->s;
2856 h
->pred4x4
[VERT_PRED
]= pred4x4_vertical_c
;
2857 h
->pred4x4
[HOR_PRED
]= pred4x4_horizontal_c
;
2858 h
->pred4x4
[DC_PRED
]= pred4x4_dc_c
;
2859 h
->pred4x4
[DIAG_DOWN_LEFT_PRED
]= pred4x4_down_left_c
;
2860 h
->pred4x4
[DIAG_DOWN_RIGHT_PRED
]= pred4x4_down_right_c
;
2861 h
->pred4x4
[VERT_RIGHT_PRED
]= pred4x4_vertical_right_c
;
2862 h
->pred4x4
[HOR_DOWN_PRED
]= pred4x4_horizontal_down_c
;
2863 h
->pred4x4
[VERT_LEFT_PRED
]= pred4x4_vertical_left_c
;
2864 h
->pred4x4
[HOR_UP_PRED
]= pred4x4_horizontal_up_c
;
2865 h
->pred4x4
[LEFT_DC_PRED
]= pred4x4_left_dc_c
;
2866 h
->pred4x4
[TOP_DC_PRED
]= pred4x4_top_dc_c
;
2867 h
->pred4x4
[DC_128_PRED
]= pred4x4_128_dc_c
;
2869 h
->pred8x8l
[VERT_PRED
]= pred8x8l_vertical_c
;
2870 h
->pred8x8l
[HOR_PRED
]= pred8x8l_horizontal_c
;
2871 h
->pred8x8l
[DC_PRED
]= pred8x8l_dc_c
;
2872 h
->pred8x8l
[DIAG_DOWN_LEFT_PRED
]= pred8x8l_down_left_c
;
2873 h
->pred8x8l
[DIAG_DOWN_RIGHT_PRED
]= pred8x8l_down_right_c
;
2874 h
->pred8x8l
[VERT_RIGHT_PRED
]= pred8x8l_vertical_right_c
;
2875 h
->pred8x8l
[HOR_DOWN_PRED
]= pred8x8l_horizontal_down_c
;
2876 h
->pred8x8l
[VERT_LEFT_PRED
]= pred8x8l_vertical_left_c
;
2877 h
->pred8x8l
[HOR_UP_PRED
]= pred8x8l_horizontal_up_c
;
2878 h
->pred8x8l
[LEFT_DC_PRED
]= pred8x8l_left_dc_c
;
2879 h
->pred8x8l
[TOP_DC_PRED
]= pred8x8l_top_dc_c
;
2880 h
->pred8x8l
[DC_128_PRED
]= pred8x8l_128_dc_c
;
2882 h
->pred8x8
[DC_PRED8x8
]= pred8x8_dc_c
;
2883 h
->pred8x8
[VERT_PRED8x8
]= pred8x8_vertical_c
;
2884 h
->pred8x8
[HOR_PRED8x8
]= pred8x8_horizontal_c
;
2885 h
->pred8x8
[PLANE_PRED8x8
]= pred8x8_plane_c
;
2886 h
->pred8x8
[LEFT_DC_PRED8x8
]= pred8x8_left_dc_c
;
2887 h
->pred8x8
[TOP_DC_PRED8x8
]= pred8x8_top_dc_c
;
2888 h
->pred8x8
[DC_128_PRED8x8
]= pred8x8_128_dc_c
;
2890 h
->pred16x16
[DC_PRED8x8
]= pred16x16_dc_c
;
2891 h
->pred16x16
[VERT_PRED8x8
]= pred16x16_vertical_c
;
2892 h
->pred16x16
[HOR_PRED8x8
]= pred16x16_horizontal_c
;
2893 h
->pred16x16
[PLANE_PRED8x8
]= pred16x16_plane_c
;
2894 h
->pred16x16
[LEFT_DC_PRED8x8
]= pred16x16_left_dc_c
;
2895 h
->pred16x16
[TOP_DC_PRED8x8
]= pred16x16_top_dc_c
;
2896 h
->pred16x16
[DC_128_PRED8x8
]= pred16x16_128_dc_c
;
2899 static void free_tables(H264Context
*h
){
2900 av_freep(&h
->intra4x4_pred_mode
);
2901 av_freep(&h
->chroma_pred_mode_table
);
2902 av_freep(&h
->cbp_table
);
2903 av_freep(&h
->mvd_table
[0]);
2904 av_freep(&h
->mvd_table
[1]);
2905 av_freep(&h
->direct_table
);
2906 av_freep(&h
->non_zero_count
);
2907 av_freep(&h
->slice_table_base
);
2908 av_freep(&h
->top_borders
[1]);
2909 av_freep(&h
->top_borders
[0]);
2910 h
->slice_table
= NULL
;
2912 av_freep(&h
->mb2b_xy
);
2913 av_freep(&h
->mb2b8_xy
);
2915 av_freep(&h
->dequant4_coeff
);
2916 av_freep(&h
->dequant8_coeff
);
2918 av_freep(&h
->s
.obmc_scratchpad
);
2923 * needs width/height
2925 static int alloc_tables(H264Context
*h
){
2926 MpegEncContext
* const s
= &h
->s
;
2927 const int big_mb_num
= s
->mb_stride
* (s
->mb_height
+1);
2930 CHECKED_ALLOCZ(h
->intra4x4_pred_mode
, big_mb_num
* 8 * sizeof(uint8_t))
2932 CHECKED_ALLOCZ(h
->non_zero_count
, big_mb_num
* 16 * sizeof(uint8_t))
2933 CHECKED_ALLOCZ(h
->slice_table_base
, big_mb_num
* sizeof(uint8_t))
2934 CHECKED_ALLOCZ(h
->top_borders
[0] , s
->mb_width
* (16+8+8) * sizeof(uint8_t))
2935 CHECKED_ALLOCZ(h
->top_borders
[1] , s
->mb_width
* (16+8+8) * sizeof(uint8_t))
2936 CHECKED_ALLOCZ(h
->cbp_table
, big_mb_num
* sizeof(uint16_t))
2938 if( h
->pps
.cabac
) {
2939 CHECKED_ALLOCZ(h
->chroma_pred_mode_table
, big_mb_num
* sizeof(uint8_t))
2940 CHECKED_ALLOCZ(h
->mvd_table
[0], 32*big_mb_num
* sizeof(uint16_t));
2941 CHECKED_ALLOCZ(h
->mvd_table
[1], 32*big_mb_num
* sizeof(uint16_t));
2942 CHECKED_ALLOCZ(h
->direct_table
, 32*big_mb_num
* sizeof(uint8_t));
2945 memset(h
->slice_table_base
, -1, big_mb_num
* sizeof(uint8_t));
2946 h
->slice_table
= h
->slice_table_base
+ s
->mb_stride
+ 1;
2948 CHECKED_ALLOCZ(h
->mb2b_xy
, big_mb_num
* sizeof(uint32_t));
2949 CHECKED_ALLOCZ(h
->mb2b8_xy
, big_mb_num
* sizeof(uint32_t));
2950 for(y
=0; y
<s
->mb_height
; y
++){
2951 for(x
=0; x
<s
->mb_width
; x
++){
2952 const int mb_xy
= x
+ y
*s
->mb_stride
;
2953 const int b_xy
= 4*x
+ 4*y
*h
->b_stride
;
2954 const int b8_xy
= 2*x
+ 2*y
*h
->b8_stride
;
2956 h
->mb2b_xy
[mb_xy
]= b_xy
;
2957 h
->mb2b8_xy
[mb_xy
]= b8_xy
;
2961 CHECKED_ALLOCZ(h
->dequant4_coeff
, 52*16 * sizeof(uint16_t));
2962 CHECKED_ALLOCZ(h
->dequant8_coeff
, 52*64 * sizeof(uint16_t));
2963 memcpy(h
->dequant4_coeff
, dequant_coeff
, 52*16 * sizeof(uint16_t));
2964 for(q
=0; q
<52; q
++){
2965 int shift
= div6
[q
];
2967 if(shift
>= 2) // qp<12 are shifted during dequant
2970 h
->dequant8_coeff
[q
][x
] = dequant8_coeff_init
[idx
][
2971 dequant8_coeff_init_scan
[((x
>>1)&12) | (x
&3)] ] << shift
;
2973 if(h
->sps
.transform_bypass
){
2975 h
->dequant4_coeff
[0][x
] = 1;
2977 h
->dequant8_coeff
[0][x
] = 1<<2;
2980 s
->obmc_scratchpad
= NULL
;
2988 static void common_init(H264Context
*h
){
2989 MpegEncContext
* const s
= &h
->s
;
2991 s
->width
= s
->avctx
->width
;
2992 s
->height
= s
->avctx
->height
;
2993 s
->codec_id
= s
->avctx
->codec
->id
;
2997 s
->unrestricted_mv
=1;
2998 s
->decode
=1; //FIXME
3001 static int decode_init(AVCodecContext
*avctx
){
3002 H264Context
*h
= avctx
->priv_data
;
3003 MpegEncContext
* const s
= &h
->s
;
3005 MPV_decode_defaults(s
);
3010 s
->out_format
= FMT_H264
;
3011 s
->workaround_bugs
= avctx
->workaround_bugs
;
3014 // s->decode_mb= ff_h263_decode_mb;
3016 avctx
->pix_fmt
= PIX_FMT_YUV420P
;
3020 if(avctx
->extradata_size
> 0 && avctx
->extradata
&&
3021 *(char *)avctx
->extradata
== 1){
3031 static void frame_start(H264Context
*h
){
3032 MpegEncContext
* const s
= &h
->s
;
3035 MPV_frame_start(s
, s
->avctx
);
3036 ff_er_frame_start(s
);
3038 assert(s
->linesize
&& s
->uvlinesize
);
3040 for(i
=0; i
<16; i
++){
3041 h
->block_offset
[i
]= 4*((scan8
[i
] - scan8
[0])&7) + 4*s
->linesize
*((scan8
[i
] - scan8
[0])>>3);
3042 h
->block_offset
[24+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 8*s
->linesize
*((scan8
[i
] - scan8
[0])>>3);
3045 h
->block_offset
[16+i
]=
3046 h
->block_offset
[20+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 4*s
->uvlinesize
*((scan8
[i
] - scan8
[0])>>3);
3047 h
->block_offset
[24+16+i
]=
3048 h
->block_offset
[24+20+i
]= 4*((scan8
[i
] - scan8
[0])&7) + 8*s
->uvlinesize
*((scan8
[i
] - scan8
[0])>>3);
3051 /* can't be in alloc_tables because linesize isn't known there.
3052 * FIXME: redo bipred weight to not require extra buffer? */
3053 if(!s
->obmc_scratchpad
)
3054 s
->obmc_scratchpad
= av_malloc(16*s
->linesize
+ 2*8*s
->uvlinesize
);
3056 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
3059 static inline void backup_mb_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
){
3060 MpegEncContext
* const s
= &h
->s
;
3064 src_cb
-= uvlinesize
;
3065 src_cr
-= uvlinesize
;
3067 // There are two lines saved, the line above the the top macroblock of a pair,
3068 // and the line above the bottom macroblock
3069 h
->left_border
[0]= h
->top_borders
[0][s
->mb_x
][15];
3070 for(i
=1; i
<17; i
++){
3071 h
->left_border
[i
]= src_y
[15+i
* linesize
];
3074 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+0)= *(uint64_t*)(src_y
+ 16*linesize
);
3075 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+8)= *(uint64_t*)(src_y
+8+16*linesize
);
3077 if(!(s
->flags
&CODEC_FLAG_GRAY
)){
3078 h
->left_border
[17 ]= h
->top_borders
[0][s
->mb_x
][16+7];
3079 h
->left_border
[17+9]= h
->top_borders
[0][s
->mb_x
][24+7];
3081 h
->left_border
[i
+17 ]= src_cb
[7+i
*uvlinesize
];
3082 h
->left_border
[i
+17+9]= src_cr
[7+i
*uvlinesize
];
3084 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+8*uvlinesize
);
3085 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+8*uvlinesize
);
3089 static inline void xchg_mb_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
, int xchg
){
3090 MpegEncContext
* const s
= &h
->s
;
3093 int deblock_left
= (s
->mb_x
> 0);
3094 int deblock_top
= (s
->mb_y
> 0);
3096 src_y
-= linesize
+ 1;
3097 src_cb
-= uvlinesize
+ 1;
3098 src_cr
-= uvlinesize
+ 1;
3100 #define XCHG(a,b,t,xchg)\
3107 for(i
= !deblock_top
; i
<17; i
++){
3108 XCHG(h
->left_border
[i
], src_y
[i
* linesize
], temp8
, xchg
);
3113 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+0), *(uint64_t*)(src_y
+1), temp64
, xchg
);
3114 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+8), *(uint64_t*)(src_y
+9), temp64
, 1);
3115 if(s
->mb_x
+1 < s
->mb_width
){
3116 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
+1]), *(uint64_t*)(src_y
+17), temp64
, 1);
3120 if(!(s
->flags
&CODEC_FLAG_GRAY
)){
3122 for(i
= !deblock_top
; i
<9; i
++){
3123 XCHG(h
->left_border
[i
+17 ], src_cb
[i
*uvlinesize
], temp8
, xchg
);
3124 XCHG(h
->left_border
[i
+17+9], src_cr
[i
*uvlinesize
], temp8
, xchg
);
3128 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16), *(uint64_t*)(src_cb
+1), temp64
, 1);
3129 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24), *(uint64_t*)(src_cr
+1), temp64
, 1);
3134 static inline void backup_pair_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
){
3135 MpegEncContext
* const s
= &h
->s
;
3138 src_y
-= 2 * linesize
;
3139 src_cb
-= 2 * uvlinesize
;
3140 src_cr
-= 2 * uvlinesize
;
3142 // There are two lines saved, the line above the the top macroblock of a pair,
3143 // and the line above the bottom macroblock
3144 h
->left_border
[0]= h
->top_borders
[0][s
->mb_x
][15];
3145 h
->left_border
[1]= h
->top_borders
[1][s
->mb_x
][15];
3146 for(i
=2; i
<34; i
++){
3147 h
->left_border
[i
]= src_y
[15+i
* linesize
];
3150 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+0)= *(uint64_t*)(src_y
+ 32*linesize
);
3151 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+8)= *(uint64_t*)(src_y
+8+32*linesize
);
3152 *(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+0)= *(uint64_t*)(src_y
+ 33*linesize
);
3153 *(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+8)= *(uint64_t*)(src_y
+8+33*linesize
);
3155 if(!(s
->flags
&CODEC_FLAG_GRAY
)){
3156 h
->left_border
[34 ]= h
->top_borders
[0][s
->mb_x
][16+7];
3157 h
->left_border
[34+ 1]= h
->top_borders
[1][s
->mb_x
][16+7];
3158 h
->left_border
[34+18 ]= h
->top_borders
[0][s
->mb_x
][24+7];
3159 h
->left_border
[34+18+1]= h
->top_borders
[1][s
->mb_x
][24+7];
3160 for(i
=2; i
<18; i
++){
3161 h
->left_border
[i
+34 ]= src_cb
[7+i
*uvlinesize
];
3162 h
->left_border
[i
+34+18]= src_cr
[7+i
*uvlinesize
];
3164 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+16*uvlinesize
);
3165 *(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+16*uvlinesize
);
3166 *(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+16)= *(uint64_t*)(src_cb
+17*uvlinesize
);
3167 *(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+24)= *(uint64_t*)(src_cr
+17*uvlinesize
);
3171 static inline void xchg_pair_border(H264Context
*h
, uint8_t *src_y
, uint8_t *src_cb
, uint8_t *src_cr
, int linesize
, int uvlinesize
, int xchg
){
3172 MpegEncContext
* const s
= &h
->s
;
3175 int deblock_left
= (s
->mb_x
> 0);
3176 int deblock_top
= (s
->mb_y
> 0);
3178 tprintf("xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y
, src_cb
, src_cr
, linesize
, uvlinesize
);
3180 src_y
-= 2 * linesize
+ 1;
3181 src_cb
-= 2 * uvlinesize
+ 1;
3182 src_cr
-= 2 * uvlinesize
+ 1;
3184 #define XCHG(a,b,t,xchg)\
3191 for(i
= (!deblock_top
)<<1; i
<34; i
++){
3192 XCHG(h
->left_border
[i
], src_y
[i
* linesize
], temp8
, xchg
);
3197 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+0), *(uint64_t*)(src_y
+1), temp64
, xchg
);
3198 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+8), *(uint64_t*)(src_y
+9), temp64
, 1);
3199 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+0), *(uint64_t*)(src_y
+1 +linesize
), temp64
, xchg
);
3200 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+8), *(uint64_t*)(src_y
+9 +linesize
), temp64
, 1);
3203 if(!(s
->flags
&CODEC_FLAG_GRAY
)){
3205 for(i
= (!deblock_top
) << 1; i
<18; i
++){
3206 XCHG(h
->left_border
[i
+34 ], src_cb
[i
*uvlinesize
], temp8
, xchg
);
3207 XCHG(h
->left_border
[i
+34+18], src_cr
[i
*uvlinesize
], temp8
, xchg
);
3211 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+16), *(uint64_t*)(src_cb
+1), temp64
, 1);
3212 XCHG(*(uint64_t*)(h
->top_borders
[0][s
->mb_x
]+24), *(uint64_t*)(src_cr
+1), temp64
, 1);
3213 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+16), *(uint64_t*)(src_cb
+1 +uvlinesize
), temp64
, 1);
3214 XCHG(*(uint64_t*)(h
->top_borders
[1][s
->mb_x
]+24), *(uint64_t*)(src_cr
+1 +uvlinesize
), temp64
, 1);
3219 static void hl_decode_mb(H264Context
*h
){
3220 MpegEncContext
* const s
= &h
->s
;
3221 const int mb_x
= s
->mb_x
;
3222 const int mb_y
= s
->mb_y
;
3223 const int mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
3224 const int mb_type
= s
->current_picture
.mb_type
[mb_xy
];
3225 uint8_t *dest_y
, *dest_cb
, *dest_cr
;
3226 int linesize
, uvlinesize
/*dct_offset*/;
3228 int *block_offset
= &h
->block_offset
[0];
3229 const unsigned int bottom
= mb_y
& 1;
3230 const int transform_bypass
= (s
->qscale
== 0 && h
->sps
.transform_bypass
);
3231 void (*idct_add
)(uint8_t *dst
, DCTELEM
*block
, int stride
);
3236 dest_y
= s
->current_picture
.data
[0] + (mb_y
* 16* s
->linesize
) + mb_x
* 16;
3237 dest_cb
= s
->current_picture
.data
[1] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
3238 dest_cr
= s
->current_picture
.data
[2] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
3240 if (h
->mb_field_decoding_flag
) {
3241 linesize
= s
->linesize
* 2;
3242 uvlinesize
= s
->uvlinesize
* 2;
3243 block_offset
= &h
->block_offset
[24];
3244 if(mb_y
&1){ //FIXME move out of this func?
3245 dest_y
-= s
->linesize
*15;
3246 dest_cb
-= s
->uvlinesize
*7;
3247 dest_cr
-= s
->uvlinesize
*7;
3250 linesize
= s
->linesize
;
3251 uvlinesize
= s
->uvlinesize
;
3252 // dct_offset = s->linesize * 16;
3255 idct_add
= transform_bypass
3256 ? IS_8x8DCT(mb_type
) ? s
->dsp
.add_pixels8
: s
->dsp
.add_pixels4
3257 : IS_8x8DCT(mb_type
) ? s
->dsp
.h264_idct8_add
: s
->dsp
.h264_idct_add
;
3259 if (IS_INTRA_PCM(mb_type
)) {
3262 // The pixels are stored in h->mb array in the same order as levels,
3263 // copy them in output in the correct order.
3264 for(i
=0; i
<16; i
++) {
3265 for (y
=0; y
<4; y
++) {
3266 for (x
=0; x
<4; x
++) {
3267 *(dest_y
+ block_offset
[i
] + y
*linesize
+ x
) = h
->mb
[i
*16+y
*4+x
];
3271 for(i
=16; i
<16+4; i
++) {
3272 for (y
=0; y
<4; y
++) {
3273 for (x
=0; x
<4; x
++) {
3274 *(dest_cb
+ block_offset
[i
] + y
*uvlinesize
+ x
) = h
->mb
[i
*16+y
*4+x
];
3278 for(i
=20; i
<20+4; i
++) {
3279 for (y
=0; y
<4; y
++) {
3280 for (x
=0; x
<4; x
++) {
3281 *(dest_cr
+ block_offset
[i
] + y
*uvlinesize
+ x
) = h
->mb
[i
*16+y
*4+x
];
3286 if(IS_INTRA(mb_type
)){
3287 if(h
->deblocking_filter
) {
3288 if (h
->mb_aff_frame
) {
3290 xchg_pair_border(h
, dest_y
, dest_cb
, dest_cr
, s
->linesize
, s
->uvlinesize
, 1);
3292 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, 1);
3296 if(!(s
->flags
&CODEC_FLAG_GRAY
)){
3297 h
->pred8x8
[ h
->chroma_pred_mode
](dest_cb
, uvlinesize
);
3298 h
->pred8x8
[ h
->chroma_pred_mode
](dest_cr
, uvlinesize
);
3301 if(IS_INTRA4x4(mb_type
)){
3303 if(IS_8x8DCT(mb_type
)){
3304 for(i
=0; i
<16; i
+=4){
3305 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
3306 const int dir
= h
->intra4x4_pred_mode_cache
[ scan8
[i
] ];
3307 h
->pred8x8l
[ dir
](ptr
, (h
->topleft_samples_available
<<i
)&0x8000,
3308 (h
->topright_samples_available
<<(i
+1))&0x8000, linesize
);
3309 if(h
->non_zero_count_cache
[ scan8
[i
] ])
3310 idct_add(ptr
, h
->mb
+ i
*16, linesize
);
3313 for(i
=0; i
<16; i
++){
3314 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
3316 const int dir
= h
->intra4x4_pred_mode_cache
[ scan8
[i
] ];
3319 if(dir
== DIAG_DOWN_LEFT_PRED
|| dir
== VERT_LEFT_PRED
){
3320 const int topright_avail
= (h
->topright_samples_available
<<i
)&0x8000;
3321 assert(mb_y
|| linesize
<= block_offset
[i
]);
3322 if(!topright_avail
){
3323 tr
= ptr
[3 - linesize
]*0x01010101;
3324 topright
= (uint8_t*) &tr
;
3326 topright
= ptr
+ 4 - linesize
;
3330 h
->pred4x4
[ dir
](ptr
, topright
, linesize
);
3331 if(h
->non_zero_count_cache
[ scan8
[i
] ]){
3332 if(s
->codec_id
== CODEC_ID_H264
)
3333 idct_add(ptr
, h
->mb
+ i
*16, linesize
);
3335 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, linesize
, s
->qscale
, 0);
3340 h
->pred16x16
[ h
->intra16x16_pred_mode
](dest_y
, linesize
);
3341 if(s
->codec_id
== CODEC_ID_H264
){
3342 if(!transform_bypass
)
3343 h264_luma_dc_dequant_idct_c(h
->mb
, s
->qscale
);
3345 svq3_luma_dc_dequant_idct_c(h
->mb
, s
->qscale
);
3347 if(h
->deblocking_filter
) {
3348 if (h
->mb_aff_frame
) {
3350 uint8_t *pair_dest_y
= s
->current_picture
.data
[0] + ((mb_y
-1) * 16* s
->linesize
) + mb_x
* 16;
3351 uint8_t *pair_dest_cb
= s
->current_picture
.data
[1] + ((mb_y
-1) * 8 * s
->uvlinesize
) + mb_x
* 8;
3352 uint8_t *pair_dest_cr
= s
->current_picture
.data
[2] + ((mb_y
-1) * 8 * s
->uvlinesize
) + mb_x
* 8;
3354 xchg_pair_border(h
, pair_dest_y
, pair_dest_cb
, pair_dest_cr
, s
->linesize
, s
->uvlinesize
, 0);
3358 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
, 0);
3361 }else if(s
->codec_id
== CODEC_ID_H264
){
3362 hl_motion(h
, dest_y
, dest_cb
, dest_cr
,
3363 s
->dsp
.put_h264_qpel_pixels_tab
, s
->dsp
.put_h264_chroma_pixels_tab
,
3364 s
->dsp
.avg_h264_qpel_pixels_tab
, s
->dsp
.avg_h264_chroma_pixels_tab
,
3365 s
->dsp
.weight_h264_pixels_tab
, s
->dsp
.biweight_h264_pixels_tab
);
3369 if(!IS_INTRA4x4(mb_type
)){
3370 if(s
->codec_id
== CODEC_ID_H264
){
3371 const int di
= IS_8x8DCT(mb_type
) ? 4 : 1;
3372 for(i
=0; i
<16; i
+=di
){
3373 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){ //FIXME benchmark weird rule, & below
3374 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
3375 idct_add(ptr
, h
->mb
+ i
*16, linesize
);
3379 for(i
=0; i
<16; i
++){
3380 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){ //FIXME benchmark weird rule, & below
3381 uint8_t * const ptr
= dest_y
+ block_offset
[i
];
3382 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, linesize
, s
->qscale
, IS_INTRA(mb_type
) ? 1 : 0);
3388 if(!(s
->flags
&CODEC_FLAG_GRAY
)){
3389 idct_add
= transform_bypass
? s
->dsp
.add_pixels4
: s
->dsp
.h264_idct_add
;
3390 if(!transform_bypass
){
3391 chroma_dc_dequant_idct_c(h
->mb
+ 16*16, h
->chroma_qp
);
3392 chroma_dc_dequant_idct_c(h
->mb
+ 16*16+4*16, h
->chroma_qp
);
3394 if(s
->codec_id
== CODEC_ID_H264
){
3395 for(i
=16; i
<16+4; i
++){
3396 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){
3397 uint8_t * const ptr
= dest_cb
+ block_offset
[i
];
3398 idct_add(ptr
, h
->mb
+ i
*16, uvlinesize
);
3401 for(i
=20; i
<20+4; i
++){
3402 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){
3403 uint8_t * const ptr
= dest_cr
+ block_offset
[i
];
3404 idct_add(ptr
, h
->mb
+ i
*16, uvlinesize
);
3408 for(i
=16; i
<16+4; i
++){
3409 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){
3410 uint8_t * const ptr
= dest_cb
+ block_offset
[i
];
3411 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, uvlinesize
, chroma_qp
[s
->qscale
+ 12] - 12, 2);
3414 for(i
=20; i
<20+4; i
++){
3415 if(h
->non_zero_count_cache
[ scan8
[i
] ] || h
->mb
[i
*16]){
3416 uint8_t * const ptr
= dest_cr
+ block_offset
[i
];
3417 svq3_add_idct_c(ptr
, h
->mb
+ i
*16, uvlinesize
, chroma_qp
[s
->qscale
+ 12] - 12, 2);
3423 if(h
->deblocking_filter
) {
3424 if (h
->mb_aff_frame
) {
3425 const int mb_y
= s
->mb_y
- 1;
3426 uint8_t *pair_dest_y
, *pair_dest_cb
, *pair_dest_cr
;
3427 const int mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
3428 const int mb_type_top
= s
->current_picture
.mb_type
[mb_xy
];
3429 const int mb_type_bottom
= s
->current_picture
.mb_type
[mb_xy
+s
->mb_stride
];
3430 uint8_t tmp
= s
->current_picture
.data
[1][384];
3431 if (!bottom
) return;
3432 pair_dest_y
= s
->current_picture
.data
[0] + (mb_y
* 16* s
->linesize
) + mb_x
* 16;
3433 pair_dest_cb
= s
->current_picture
.data
[1] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
3434 pair_dest_cr
= s
->current_picture
.data
[2] + (mb_y
* 8 * s
->uvlinesize
) + mb_x
* 8;
3436 backup_pair_border(h
, pair_dest_y
, pair_dest_cb
, pair_dest_cr
, s
->linesize
, s
->uvlinesize
);
3437 // TODO deblock a pair
3440 tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x
, mb_y
, pair_dest_y
, dest_y
);
3441 fill_caches(h
, mb_type_top
, 1); //FIXME don't fill stuff which isn't used by filter_mb
3442 filter_mb(h
, mb_x
, mb_y
, pair_dest_y
, pair_dest_cb
, pair_dest_cr
, linesize
, uvlinesize
);
3443 if (tmp
!= s
->current_picture
.data
[1][384]) {
3444 tprintf("modified pixel 8,1 (1)\n");
3448 tprintf("call mbaff filter_mb\n");
3449 fill_caches(h
, mb_type_bottom
, 1); //FIXME don't fill stuff which isn't used by filter_mb
3450 filter_mb(h
, mb_x
, mb_y
+1, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
);
3451 if (tmp
!= s
->current_picture
.data
[1][384]) {
3452 tprintf("modified pixel 8,1 (2)\n");
3455 tprintf("call filter_mb\n");
3456 backup_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
);
3457 fill_caches(h
, mb_type
, 1); //FIXME don't fill stuff which isn't used by filter_mb
3458 filter_mb(h
, mb_x
, mb_y
, dest_y
, dest_cb
, dest_cr
, linesize
, uvlinesize
);
3464 * fills the default_ref_list.
3466 static int fill_default_ref_list(H264Context
*h
){
3467 MpegEncContext
* const s
= &h
->s
;
3469 int smallest_poc_greater_than_current
= -1;
3470 Picture sorted_short_ref
[32];
3472 if(h
->slice_type
==B_TYPE
){
3476 /* sort frame according to poc in B slice */
3477 for(out_i
=0; out_i
<h
->short_ref_count
; out_i
++){
3479 int best_poc
=INT_MAX
;
3481 for(i
=0; i
<h
->short_ref_count
; i
++){
3482 const int poc
= h
->short_ref
[i
]->poc
;
3483 if(poc
> limit
&& poc
< best_poc
){
3489 assert(best_i
!= INT_MIN
);
3492 sorted_short_ref
[out_i
]= *h
->short_ref
[best_i
];
3493 tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i
, out_i
, sorted_short_ref
[out_i
].poc
, sorted_short_ref
[out_i
].frame_num
);
3494 if (-1 == smallest_poc_greater_than_current
) {
3495 if (h
->short_ref
[best_i
]->poc
>= s
->current_picture_ptr
->poc
) {
3496 smallest_poc_greater_than_current
= out_i
;
3502 if(s
->picture_structure
== PICT_FRAME
){
3503 if(h
->slice_type
==B_TYPE
){
3505 tprintf("current poc: %d, smallest_poc_greater_than_current: %d\n", s
->current_picture_ptr
->poc
, smallest_poc_greater_than_current
);
3507 // find the largest poc
3508 for(list
=0; list
<2; list
++){
3511 int step
= list
? -1 : 1;
3513 for(i
=0; i
<h
->short_ref_count
&& index
< h
->ref_count
[list
]; i
++, j
+=step
) {
3514 while(j
<0 || j
>= h
->short_ref_count
){
3515 if(j
!= -99 && step
== (list
? -1 : 1))
3518 j
= smallest_poc_greater_than_current
+ (step
>>1);
3520 if(sorted_short_ref
[j
].reference
!= 3) continue;
3521 h
->default_ref_list
[list
][index
]= sorted_short_ref
[j
];
3522 h
->default_ref_list
[list
][index
++].pic_id
= sorted_short_ref
[j
].frame_num
;
3525 for(i
= 0; i
< 16 && index
< h
->ref_count
[ list
]; i
++){
3526 if(h
->long_ref
[i
] == NULL
) continue;
3527 if(h
->long_ref
[i
]->reference
!= 3) continue;
3529 h
->default_ref_list
[ list
][index
]= *h
->long_ref
[i
];
3530 h
->default_ref_list
[ list
][index
++].pic_id
= i
;;
3533 if(list
&& (smallest_poc_greater_than_current
<=0 || smallest_poc_greater_than_current
>=h
->short_ref_count
) && (1 < index
)){
3534 // swap the two first elements of L1 when
3535 // L0 and L1 are identical
3536 Picture temp
= h
->default_ref_list
[1][0];
3537 h
->default_ref_list
[1][0] = h
->default_ref_list
[1][1];
3538 h
->default_ref_list
[1][1] = temp
;
3541 if(index
< h
->ref_count
[ list
])
3542 memset(&h
->default_ref_list
[list
][index
], 0, sizeof(Picture
)*(h
->ref_count
[ list
] - index
));
3546 for(i
=0; i
<h
->short_ref_count
; i
++){
3547 if(h
->short_ref
[i
]->reference
!= 3) continue; //FIXME refernce field shit
3548 h
->default_ref_list
[0][index
]= *h
->short_ref
[i
];
3549 h
->default_ref_list
[0][index
++].pic_id
= h
->short_ref
[i
]->frame_num
;
3551 for(i
= 0; i
< 16; i
++){
3552 if(h
->long_ref
[i
] == NULL
) continue;
3553 if(h
->long_ref
[i
]->reference
!= 3) continue;
3554 h
->default_ref_list
[0][index
]= *h
->long_ref
[i
];
3555 h
->default_ref_list
[0][index
++].pic_id
= i
;;
3557 if(index
< h
->ref_count
[0])
3558 memset(&h
->default_ref_list
[0][index
], 0, sizeof(Picture
)*(h
->ref_count
[0] - index
));
3561 if(h
->slice_type
==B_TYPE
){
3563 //FIXME second field balh
3567 for (i
=0; i
<h
->ref_count
[0]; i
++) {
3568 tprintf("List0: %s fn:%d 0x%p\n", (h
->default_ref_list
[0][i
].long_ref
? "LT" : "ST"), h
->default_ref_list
[0][i
].pic_id
, h
->default_ref_list
[0][i
].data
[0]);
3570 if(h
->slice_type
==B_TYPE
){
3571 for (i
=0; i
<h
->ref_count
[1]; i
++) {
3572 tprintf("List1: %s fn:%d 0x%p\n", (h
->default_ref_list
[1][i
].long_ref
? "LT" : "ST"), h
->default_ref_list
[1][i
].pic_id
, h
->default_ref_list
[0][i
].data
[0]);
3579 static void print_short_term(H264Context
*h
);
3580 static void print_long_term(H264Context
*h
);
3582 static int decode_ref_pic_list_reordering(H264Context
*h
){
3583 MpegEncContext
* const s
= &h
->s
;
3586 print_short_term(h
);
3588 if(h
->slice_type
==I_TYPE
|| h
->slice_type
==SI_TYPE
) return 0; //FIXME move before func
3590 for(list
=0; list
<2; list
++){
3591 memcpy(h
->ref_list
[list
], h
->default_ref_list
[list
], sizeof(Picture
)*h
->ref_count
[list
]);
3593 if(get_bits1(&s
->gb
)){
3594 int pred
= h
->curr_pic_num
;
3596 for(index
=0; ; index
++){
3597 int reordering_of_pic_nums_idc
= get_ue_golomb(&s
->gb
);
3600 Picture
*ref
= NULL
;
3602 if(reordering_of_pic_nums_idc
==3)
3605 if(index
>= h
->ref_count
[list
]){
3606 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference count overflow\n");
3610 if(reordering_of_pic_nums_idc
<3){
3611 if(reordering_of_pic_nums_idc
<2){
3612 const int abs_diff_pic_num
= get_ue_golomb(&s
->gb
) + 1;
3614 if(abs_diff_pic_num
>= h
->max_pic_num
){
3615 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "abs_diff_pic_num overflow\n");
3619 if(reordering_of_pic_nums_idc
== 0) pred
-= abs_diff_pic_num
;
3620 else pred
+= abs_diff_pic_num
;
3621 pred
&= h
->max_pic_num
- 1;
3623 for(i
= h
->short_ref_count
-1; i
>=0; i
--){
3624 ref
= h
->short_ref
[i
];
3625 assert(ref
->reference
== 3);
3626 assert(!ref
->long_ref
);
3627 if(ref
->data
[0] != NULL
&& ref
->frame_num
== pred
&& ref
->long_ref
== 0) // ignore non existing pictures by testing data[0] pointer
3631 ref
->pic_id
= ref
->frame_num
;
3633 pic_id
= get_ue_golomb(&s
->gb
); //long_term_pic_idx
3634 ref
= h
->long_ref
[pic_id
];
3635 ref
->pic_id
= pic_id
;
3636 assert(ref
->reference
== 3);
3637 assert(ref
->long_ref
);
3642 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference picture missing during reorder\n");
3643 memset(&h
->ref_list
[list
][index
], 0, sizeof(Picture
)); //FIXME
3645 for(i
=index
; i
+1<h
->ref_count
[list
]; i
++){
3646 if(ref
->long_ref
== h
->ref_list
[list
][i
].long_ref
&& ref
->pic_id
== h
->ref_list
[list
][i
].pic_id
)
3649 for(; i
> index
; i
--){
3650 h
->ref_list
[list
][i
]= h
->ref_list
[list
][i
-1];
3652 h
->ref_list
[list
][index
]= *ref
;
3655 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal reordering_of_pic_nums_idc\n");
3661 if(h
->slice_type
!=B_TYPE
) break;
3663 for(list
=0; list
<2; list
++){
3664 for(index
= 0; index
< h
->ref_count
[list
]; index
++){
3665 if(!h
->ref_list
[list
][index
].data
[0])
3666 h
->ref_list
[list
][index
]= s
->current_picture
;
3668 if(h
->slice_type
!=B_TYPE
) break;
3671 if(h
->slice_type
==B_TYPE
&& !h
->direct_spatial_mv_pred
)
3672 direct_dist_scale_factor(h
);
3673 direct_ref_list_init(h
);
3677 static int pred_weight_table(H264Context
*h
){
3678 MpegEncContext
* const s
= &h
->s
;
3680 int luma_def
, chroma_def
;
3683 h
->use_weight_chroma
= 0;
3684 h
->luma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
3685 h
->chroma_log2_weight_denom
= get_ue_golomb(&s
->gb
);
3686 luma_def
= 1<<h
->luma_log2_weight_denom
;
3687 chroma_def
= 1<<h
->chroma_log2_weight_denom
;
3689 for(list
=0; list
<2; list
++){
3690 for(i
=0; i
<h
->ref_count
[list
]; i
++){
3691 int luma_weight_flag
, chroma_weight_flag
;
3693 luma_weight_flag
= get_bits1(&s
->gb
);
3694 if(luma_weight_flag
){
3695 h
->luma_weight
[list
][i
]= get_se_golomb(&s
->gb
);
3696 h
->luma_offset
[list
][i
]= get_se_golomb(&s
->gb
);
3697 if( h
->luma_weight
[list
][i
] != luma_def
3698 || h
->luma_offset
[list
][i
] != 0)
3701 h
->luma_weight
[list
][i
]= luma_def
;
3702 h
->luma_offset
[list
][i
]= 0;
3705 chroma_weight_flag
= get_bits1(&s
->gb
);
3706 if(chroma_weight_flag
){
3709 h
->chroma_weight
[list
][i
][j
]= get_se_golomb(&s
->gb
);
3710 h
->chroma_offset
[list
][i
][j
]= get_se_golomb(&s
->gb
);
3711 if( h
->chroma_weight
[list
][i
][j
] != chroma_def
3712 || h
->chroma_offset
[list
][i
][j
] != 0)
3713 h
->use_weight_chroma
= 1;
3718 h
->chroma_weight
[list
][i
][j
]= chroma_def
;
3719 h
->chroma_offset
[list
][i
][j
]= 0;
3723 if(h
->slice_type
!= B_TYPE
) break;
3725 h
->use_weight
= h
->use_weight
|| h
->use_weight_chroma
;
3729 static void implicit_weight_table(H264Context
*h
){
3730 MpegEncContext
* const s
= &h
->s
;
3732 int cur_poc
= s
->current_picture_ptr
->poc
;
3734 if( h
->ref_count
[0] == 1 && h
->ref_count
[1] == 1
3735 && h
->ref_list
[0][0].poc
+ h
->ref_list
[1][0].poc
== 2*cur_poc
){
3737 h
->use_weight_chroma
= 0;
3742 h
->use_weight_chroma
= 2;
3743 h
->luma_log2_weight_denom
= 5;
3744 h
->chroma_log2_weight_denom
= 5;
3747 for(ref0
=0; ref0
< h
->ref_count
[0]; ref0
++){
3748 int poc0
= h
->ref_list
[0][ref0
].poc
;
3749 for(ref1
=0; ref1
< h
->ref_count
[1]; ref1
++){
3750 int poc1
= h
->ref_list
[1][ref1
].poc
;
3751 int td
= clip(poc1
- poc0
, -128, 127);
3753 int tb
= clip(cur_poc
- poc0
, -128, 127);
3754 int tx
= (16384 + (ABS(td
) >> 1)) / td
;
3755 int dist_scale_factor
= clip((tb
*tx
+ 32) >> 6, -1024, 1023) >> 2;
3756 if(dist_scale_factor
< -64 || dist_scale_factor
> 128)
3757 h
->implicit_weight
[ref0
][ref1
] = 32;
3759 h
->implicit_weight
[ref0
][ref1
] = 64 - dist_scale_factor
;
3761 h
->implicit_weight
[ref0
][ref1
] = 32;
3766 static inline void unreference_pic(H264Context
*h
, Picture
*pic
){
3769 if(pic
== h
->delayed_output_pic
)
3772 for(i
= 0; h
->delayed_pic
[i
]; i
++)
3773 if(pic
== h
->delayed_pic
[i
]){
3781 * instantaneous decoder refresh.
3783 static void idr(H264Context
*h
){
3786 for(i
=0; i
<16; i
++){
3787 if (h
->long_ref
[i
] != NULL
) {
3788 unreference_pic(h
, h
->long_ref
[i
]);
3789 h
->long_ref
[i
]= NULL
;
3792 h
->long_ref_count
=0;
3794 for(i
=0; i
<h
->short_ref_count
; i
++){
3795 unreference_pic(h
, h
->short_ref
[i
]);
3796 h
->short_ref
[i
]= NULL
;
3798 h
->short_ref_count
=0;
3801 /* forget old pics after a seek */
3802 static void flush_dpb(AVCodecContext
*avctx
){
3803 H264Context
*h
= avctx
->priv_data
;
3806 h
->delayed_pic
[i
]= NULL
;
3807 h
->delayed_output_pic
= NULL
;
3809 if(h
->s
.current_picture_ptr
)
3810 h
->s
.current_picture_ptr
->reference
= 0;
3815 * @return the removed picture or NULL if an error occurs
3817 static Picture
* remove_short(H264Context
*h
, int frame_num
){
3818 MpegEncContext
* const s
= &h
->s
;
3821 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3822 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "remove short %d count %d\n", frame_num
, h
->short_ref_count
);
3824 for(i
=0; i
<h
->short_ref_count
; i
++){
3825 Picture
*pic
= h
->short_ref
[i
];
3826 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3827 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d %d %p\n", i
, pic
->frame_num
, pic
);
3828 if(pic
->frame_num
== frame_num
){
3829 h
->short_ref
[i
]= NULL
;
3830 memmove(&h
->short_ref
[i
], &h
->short_ref
[i
+1], (h
->short_ref_count
- i
- 1)*sizeof(Picture
*));
3831 h
->short_ref_count
--;
3840 * @return the removed picture or NULL if an error occurs
3842 static Picture
* remove_long(H264Context
*h
, int i
){
3845 pic
= h
->long_ref
[i
];
3846 h
->long_ref
[i
]= NULL
;
3847 if(pic
) h
->long_ref_count
--;
3853 * print short term list
3855 static void print_short_term(H264Context
*h
) {
3857 if(h
->s
.avctx
->debug
&FF_DEBUG_MMCO
) {
3858 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "short term list:\n");
3859 for(i
=0; i
<h
->short_ref_count
; i
++){
3860 Picture
*pic
= h
->short_ref
[i
];
3861 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d fn:%d poc:%d %p\n", i
, pic
->frame_num
, pic
->poc
, pic
->data
[0]);
3867 * print long term list
3869 static void print_long_term(H264Context
*h
) {
3871 if(h
->s
.avctx
->debug
&FF_DEBUG_MMCO
) {
3872 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "long term list:\n");
3873 for(i
= 0; i
< 16; i
++){
3874 Picture
*pic
= h
->long_ref
[i
];
3876 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "%d fn:%d poc:%d %p\n", i
, pic
->frame_num
, pic
->poc
, pic
->data
[0]);
3883 * Executes the reference picture marking (memory management control operations).
3885 static int execute_ref_pic_marking(H264Context
*h
, MMCO
*mmco
, int mmco_count
){
3886 MpegEncContext
* const s
= &h
->s
;
3888 int current_is_long
=0;
3891 if((s
->avctx
->debug
&FF_DEBUG_MMCO
) && mmco_count
==0)
3892 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "no mmco here\n");
3894 for(i
=0; i
<mmco_count
; i
++){
3895 if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3896 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco:%d %d %d\n", h
->mmco
[i
].opcode
, h
->mmco
[i
].short_frame_num
, h
->mmco
[i
].long_index
);
3898 switch(mmco
[i
].opcode
){
3899 case MMCO_SHORT2UNUSED
:
3900 pic
= remove_short(h
, mmco
[i
].short_frame_num
);
3902 unreference_pic(h
, pic
);
3903 else if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3904 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco: remove_short() failure\n");
3906 case MMCO_SHORT2LONG
:
3907 pic
= remove_long(h
, mmco
[i
].long_index
);
3908 if(pic
) unreference_pic(h
, pic
);
3910 h
->long_ref
[ mmco
[i
].long_index
]= remove_short(h
, mmco
[i
].short_frame_num
);
3911 h
->long_ref
[ mmco
[i
].long_index
]->long_ref
=1;
3912 h
->long_ref_count
++;
3914 case MMCO_LONG2UNUSED
:
3915 pic
= remove_long(h
, mmco
[i
].long_index
);
3917 unreference_pic(h
, pic
);
3918 else if(s
->avctx
->debug
&FF_DEBUG_MMCO
)
3919 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "mmco: remove_long() failure\n");
3922 pic
= remove_long(h
, mmco
[i
].long_index
);
3923 if(pic
) unreference_pic(h
, pic
);
3925 h
->long_ref
[ mmco
[i
].long_index
]= s
->current_picture_ptr
;
3926 h
->long_ref
[ mmco
[i
].long_index
]->long_ref
=1;
3927 h
->long_ref_count
++;
3931 case MMCO_SET_MAX_LONG
:
3932 assert(mmco
[i
].long_index
<= 16);
3933 // just remove the long term which index is greater than new max
3934 for(j
= mmco
[i
].long_index
; j
<16; j
++){
3935 pic
= remove_long(h
, j
);
3936 if (pic
) unreference_pic(h
, pic
);
3940 while(h
->short_ref_count
){
3941 pic
= remove_short(h
, h
->short_ref
[0]->frame_num
);
3942 unreference_pic(h
, pic
);
3944 for(j
= 0; j
< 16; j
++) {
3945 pic
= remove_long(h
, j
);
3946 if(pic
) unreference_pic(h
, pic
);
3953 if(!current_is_long
){
3954 pic
= remove_short(h
, s
->current_picture_ptr
->frame_num
);
3956 unreference_pic(h
, pic
);
3957 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal short term buffer state detected\n");
3960 if(h
->short_ref_count
)
3961 memmove(&h
->short_ref
[1], &h
->short_ref
[0], h
->short_ref_count
*sizeof(Picture
*));
3963 h
->short_ref
[0]= s
->current_picture_ptr
;
3964 h
->short_ref
[0]->long_ref
=0;
3965 h
->short_ref_count
++;
3968 print_short_term(h
);
3973 static int decode_ref_pic_marking(H264Context
*h
){
3974 MpegEncContext
* const s
= &h
->s
;
3977 if(h
->nal_unit_type
== NAL_IDR_SLICE
){ //FIXME fields
3978 s
->broken_link
= get_bits1(&s
->gb
) -1;
3979 h
->mmco
[0].long_index
= get_bits1(&s
->gb
) - 1; // current_long_term_idx
3980 if(h
->mmco
[0].long_index
== -1)
3983 h
->mmco
[0].opcode
= MMCO_LONG
;
3987 if(get_bits1(&s
->gb
)){ // adaptive_ref_pic_marking_mode_flag
3988 for(i
= 0; i
<MAX_MMCO_COUNT
; i
++) {
3989 MMCOOpcode opcode
= get_ue_golomb(&s
->gb
);;
3991 h
->mmco
[i
].opcode
= opcode
;
3992 if(opcode
==MMCO_SHORT2UNUSED
|| opcode
==MMCO_SHORT2LONG
){
3993 h
->mmco
[i
].short_frame_num
= (h
->frame_num
- get_ue_golomb(&s
->gb
) - 1) & ((1<<h
->sps
.log2_max_frame_num
)-1); //FIXME fields
3994 /* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
3995 fprintf(stderr, "illegal short ref in memory management control operation %d\n", mmco);
3999 if(opcode
==MMCO_SHORT2LONG
|| opcode
==MMCO_LONG2UNUSED
|| opcode
==MMCO_LONG
|| opcode
==MMCO_SET_MAX_LONG
){
4000 h
->mmco
[i
].long_index
= get_ue_golomb(&s
->gb
);
4001 if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h
->mmco
[i
].long_index
>= 16){
4002 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal long ref in memory management control operation %d\n", opcode
);
4007 if(opcode
> MMCO_LONG
){
4008 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal memory management control operation %d\n", opcode
);
4011 if(opcode
== MMCO_END
)
4016 assert(h
->long_ref_count
+ h
->short_ref_count
<= h
->sps
.ref_frame_count
);
4018 if(h
->long_ref_count
+ h
->short_ref_count
== h
->sps
.ref_frame_count
){ //FIXME fields
4019 h
->mmco
[0].opcode
= MMCO_SHORT2UNUSED
;
4020 h
->mmco
[0].short_frame_num
= h
->short_ref
[ h
->short_ref_count
- 1 ]->frame_num
;
4030 static int init_poc(H264Context
*h
){
4031 MpegEncContext
* const s
= &h
->s
;
4032 const int max_frame_num
= 1<<h
->sps
.log2_max_frame_num
;
4035 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
4036 h
->frame_num_offset
= 0;
4038 if(h
->frame_num
< h
->prev_frame_num
)
4039 h
->frame_num_offset
= h
->prev_frame_num_offset
+ max_frame_num
;
4041 h
->frame_num_offset
= h
->prev_frame_num_offset
;
4044 if(h
->sps
.poc_type
==0){
4045 const int max_poc_lsb
= 1<<h
->sps
.log2_max_poc_lsb
;
4047 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
4052 if (h
->poc_lsb
< h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
>= max_poc_lsb
/2)
4053 h
->poc_msb
= h
->prev_poc_msb
+ max_poc_lsb
;
4054 else if(h
->poc_lsb
> h
->prev_poc_lsb
&& h
->prev_poc_lsb
- h
->poc_lsb
< -max_poc_lsb
/2)
4055 h
->poc_msb
= h
->prev_poc_msb
- max_poc_lsb
;
4057 h
->poc_msb
= h
->prev_poc_msb
;
4058 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
4060 field_poc
[1] = h
->poc_msb
+ h
->poc_lsb
;
4061 if(s
->picture_structure
== PICT_FRAME
)
4062 field_poc
[1] += h
->delta_poc_bottom
;
4063 }else if(h
->sps
.poc_type
==1){
4064 int abs_frame_num
, expected_delta_per_poc_cycle
, expectedpoc
;
4067 if(h
->sps
.poc_cycle_length
!= 0)
4068 abs_frame_num
= h
->frame_num_offset
+ h
->frame_num
;
4072 if(h
->nal_ref_idc
==0 && abs_frame_num
> 0)
4075 expected_delta_per_poc_cycle
= 0;
4076 for(i
=0; i
< h
->sps
.poc_cycle_length
; i
++)
4077 expected_delta_per_poc_cycle
+= h
->sps
.offset_for_ref_frame
[ i
]; //FIXME integrate during sps parse
4079 if(abs_frame_num
> 0){
4080 int poc_cycle_cnt
= (abs_frame_num
- 1) / h
->sps
.poc_cycle_length
;
4081 int frame_num_in_poc_cycle
= (abs_frame_num
- 1) % h
->sps
.poc_cycle_length
;
4083 expectedpoc
= poc_cycle_cnt
* expected_delta_per_poc_cycle
;
4084 for(i
= 0; i
<= frame_num_in_poc_cycle
; i
++)
4085 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_ref_frame
[ i
];
4089 if(h
->nal_ref_idc
== 0)
4090 expectedpoc
= expectedpoc
+ h
->sps
.offset_for_non_ref_pic
;
4092 field_poc
[0] = expectedpoc
+ h
->delta_poc
[0];
4093 field_poc
[1] = field_poc
[0] + h
->sps
.offset_for_top_to_bottom_field
;
4095 if(s
->picture_structure
== PICT_FRAME
)
4096 field_poc
[1] += h
->delta_poc
[1];
4099 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
4102 if(h
->nal_ref_idc
) poc
= 2*(h
->frame_num_offset
+ h
->frame_num
);
4103 else poc
= 2*(h
->frame_num_offset
+ h
->frame_num
) - 1;
4109 if(s
->picture_structure
!= PICT_BOTTOM_FIELD
)
4110 s
->current_picture_ptr
->field_poc
[0]= field_poc
[0];
4111 if(s
->picture_structure
!= PICT_TOP_FIELD
)
4112 s
->current_picture_ptr
->field_poc
[1]= field_poc
[1];
4113 if(s
->picture_structure
== PICT_FRAME
) // FIXME field pix?
4114 s
->current_picture_ptr
->poc
= FFMIN(field_poc
[0], field_poc
[1]);
4120 * decodes a slice header.
4121 * this will allso call MPV_common_init() and frame_start() as needed
4123 static int decode_slice_header(H264Context
*h
){
4124 MpegEncContext
* const s
= &h
->s
;
4125 int first_mb_in_slice
, pps_id
;
4126 int num_ref_idx_active_override_flag
;
4127 static const uint8_t slice_type_map
[5]= {P_TYPE
, B_TYPE
, I_TYPE
, SP_TYPE
, SI_TYPE
};
4129 int default_ref_list_done
= 0;
4131 s
->current_picture
.reference
= h
->nal_ref_idc
!= 0;
4132 s
->dropable
= h
->nal_ref_idc
== 0;
4134 first_mb_in_slice
= get_ue_golomb(&s
->gb
);
4136 slice_type
= get_ue_golomb(&s
->gb
);
4138 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "slice type too large (%d) at %d %d\n", h
->slice_type
, s
->mb_x
, s
->mb_y
);
4143 h
->slice_type_fixed
=1;
4145 h
->slice_type_fixed
=0;
4147 slice_type
= slice_type_map
[ slice_type
];
4148 if (slice_type
== I_TYPE
4149 || (h
->slice_num
!= 0 && slice_type
== h
->slice_type
) ) {
4150 default_ref_list_done
= 1;
4152 h
->slice_type
= slice_type
;
4154 s
->pict_type
= h
->slice_type
; // to make a few old func happy, it's wrong though
4156 pps_id
= get_ue_golomb(&s
->gb
);
4158 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "pps_id out of range\n");
4161 h
->pps
= h
->pps_buffer
[pps_id
];
4162 if(h
->pps
.slice_group_count
== 0){
4163 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "non existing PPS referenced\n");
4167 h
->sps
= h
->sps_buffer
[ h
->pps
.sps_id
];
4168 if(h
->sps
.log2_max_frame_num
== 0){
4169 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "non existing SPS referenced\n");
4173 s
->mb_width
= h
->sps
.mb_width
;
4174 s
->mb_height
= h
->sps
.mb_height
* (2 - h
->sps
.frame_mbs_only_flag
);
4176 h
->b_stride
= s
->mb_width
*4 + 1;
4177 h
->b8_stride
= s
->mb_width
*2 + 1;
4179 s
->width
= 16*s
->mb_width
- 2*(h
->sps
.crop_left
+ h
->sps
.crop_right
);
4180 if(h
->sps
.frame_mbs_only_flag
)
4181 s
->height
= 16*s
->mb_height
- 2*(h
->sps
.crop_top
+ h
->sps
.crop_bottom
);
4183 s
->height
= 16*s
->mb_height
- 4*(h
->sps
.crop_top
+ h
->sps
.crop_bottom
); //FIXME recheck
4185 if (s
->context_initialized
4186 && ( s
->width
!= s
->avctx
->width
|| s
->height
!= s
->avctx
->height
)) {
4190 if (!s
->context_initialized
) {
4191 if (MPV_common_init(s
) < 0)
4194 if(s
->dsp
.h264_idct_add
== ff_h264_idct_add_c
){ //FIXME little ugly
4195 memcpy(h
->zigzag_scan
, zigzag_scan
, 16*sizeof(uint8_t));
4196 memcpy(h
-> field_scan
, field_scan
, 16*sizeof(uint8_t));
4199 for(i
=0; i
<16; i
++){
4200 #define T(x) (x>>2) | ((x<<2) & 0xF)
4201 h
->zigzag_scan
[i
] = T(zigzag_scan
[i
]);
4202 h
-> field_scan
[i
] = T( field_scan
[i
]);
4205 if(h
->sps
.transform_bypass
){ //FIXME same ugly
4206 h
->zigzag_scan_q0
= zigzag_scan
;
4207 h
->field_scan_q0
= field_scan
;
4209 h
->zigzag_scan_q0
= h
->zigzag_scan
;
4210 h
->field_scan_q0
= h
->field_scan
;
4215 s
->avctx
->width
= s
->width
;
4216 s
->avctx
->height
= s
->height
;
4217 s
->avctx
->sample_aspect_ratio
= h
->sps
.sar
;
4218 if(!s
->avctx
->sample_aspect_ratio
.den
)
4219 s
->avctx
->sample_aspect_ratio
.den
= 1;
4221 if(h
->sps
.timing_info_present_flag
){
4222 s
->avctx
->time_base
= (AVRational
){h
->sps
.num_units_in_tick
, h
->sps
.time_scale
};
4226 if(h
->slice_num
== 0){
4230 s
->current_picture_ptr
->frame_num
= //FIXME frame_num cleanup
4231 h
->frame_num
= get_bits(&s
->gb
, h
->sps
.log2_max_frame_num
);
4233 h
->mb_aff_frame
= 0;
4234 if(h
->sps
.frame_mbs_only_flag
){
4235 s
->picture_structure
= PICT_FRAME
;
4237 if(get_bits1(&s
->gb
)) { //field_pic_flag
4238 s
->picture_structure
= PICT_TOP_FIELD
+ get_bits1(&s
->gb
); //bottom_field_flag
4240 s
->picture_structure
= PICT_FRAME
;
4241 first_mb_in_slice
<<= h
->sps
.mb_aff
;
4242 h
->mb_aff_frame
= h
->sps
.mb_aff
;
4246 s
->resync_mb_x
= s
->mb_x
= first_mb_in_slice
% s
->mb_width
;
4247 s
->resync_mb_y
= s
->mb_y
= first_mb_in_slice
/ s
->mb_width
;
4248 if(s
->mb_y
>= s
->mb_height
){
4252 if(s
->picture_structure
==PICT_FRAME
){
4253 h
->curr_pic_num
= h
->frame_num
;
4254 h
->max_pic_num
= 1<< h
->sps
.log2_max_frame_num
;
4256 h
->curr_pic_num
= 2*h
->frame_num
;
4257 h
->max_pic_num
= 1<<(h
->sps
.log2_max_frame_num
+ 1);
4260 if(h
->nal_unit_type
== NAL_IDR_SLICE
){
4261 get_ue_golomb(&s
->gb
); /* idr_pic_id */
4264 if(h
->sps
.poc_type
==0){
4265 h
->poc_lsb
= get_bits(&s
->gb
, h
->sps
.log2_max_poc_lsb
);
4267 if(h
->pps
.pic_order_present
==1 && s
->picture_structure
==PICT_FRAME
){
4268 h
->delta_poc_bottom
= get_se_golomb(&s
->gb
);
4272 if(h
->sps
.poc_type
==1 && !h
->sps
.delta_pic_order_always_zero_flag
){
4273 h
->delta_poc
[0]= get_se_golomb(&s
->gb
);
4275 if(h
->pps
.pic_order_present
==1 && s
->picture_structure
==PICT_FRAME
)
4276 h
->delta_poc
[1]= get_se_golomb(&s
->gb
);
4281 if(h
->pps
.redundant_pic_cnt_present
){
4282 h
->redundant_pic_count
= get_ue_golomb(&s
->gb
);
4285 //set defaults, might be overriden a few line later
4286 h
->ref_count
[0]= h
->pps
.ref_count
[0];
4287 h
->ref_count
[1]= h
->pps
.ref_count
[1];
4289 if(h
->slice_type
== P_TYPE
|| h
->slice_type
== SP_TYPE
|| h
->slice_type
== B_TYPE
){
4290 if(h
->slice_type
== B_TYPE
){
4291 h
->direct_spatial_mv_pred
= get_bits1(&s
->gb
);
4293 num_ref_idx_active_override_flag
= get_bits1(&s
->gb
);
4295 if(num_ref_idx_active_override_flag
){
4296 h
->ref_count
[0]= get_ue_golomb(&s
->gb
) + 1;
4297 if(h
->slice_type
==B_TYPE
)
4298 h
->ref_count
[1]= get_ue_golomb(&s
->gb
) + 1;
4300 if(h
->ref_count
[0] > 32 || h
->ref_count
[1] > 32){
4301 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference overflow\n");
4307 if(!default_ref_list_done
){
4308 fill_default_ref_list(h
);
4311 if(decode_ref_pic_list_reordering(h
) < 0)
4314 if( (h
->pps
.weighted_pred
&& (h
->slice_type
== P_TYPE
|| h
->slice_type
== SP_TYPE
))
4315 || (h
->pps
.weighted_bipred_idc
==1 && h
->slice_type
==B_TYPE
) )
4316 pred_weight_table(h
);
4317 else if(h
->pps
.weighted_bipred_idc
==2 && h
->slice_type
==B_TYPE
)
4318 implicit_weight_table(h
);
4322 if(s
->current_picture
.reference
)
4323 decode_ref_pic_marking(h
);
4325 if( h
->slice_type
!= I_TYPE
&& h
->slice_type
!= SI_TYPE
&& h
->pps
.cabac
)
4326 h
->cabac_init_idc
= get_ue_golomb(&s
->gb
);
4328 h
->last_qscale_diff
= 0;
4329 s
->qscale
= h
->pps
.init_qp
+ get_se_golomb(&s
->gb
);
4330 if(s
->qscale
<0 || s
->qscale
>51){
4331 av_log(s
->avctx
, AV_LOG_ERROR
, "QP %d out of range\n", s
->qscale
);
4334 h
->chroma_qp
= get_chroma_qp(h
->pps
.chroma_qp_index_offset
, s
->qscale
);
4335 //FIXME qscale / qp ... stuff
4336 if(h
->slice_type
== SP_TYPE
){
4337 get_bits1(&s
->gb
); /* sp_for_switch_flag */
4339 if(h
->slice_type
==SP_TYPE
|| h
->slice_type
== SI_TYPE
){
4340 get_se_golomb(&s
->gb
); /* slice_qs_delta */
4343 h
->deblocking_filter
= 1;
4344 h
->slice_alpha_c0_offset
= 0;
4345 h
->slice_beta_offset
= 0;
4346 if( h
->pps
.deblocking_filter_parameters_present
) {
4347 h
->deblocking_filter
= get_ue_golomb(&s
->gb
);
4348 if(h
->deblocking_filter
< 2)
4349 h
->deblocking_filter
^= 1; // 1<->0
4351 if( h
->deblocking_filter
) {
4352 h
->slice_alpha_c0_offset
= get_se_golomb(&s
->gb
) << 1;
4353 h
->slice_beta_offset
= get_se_golomb(&s
->gb
) << 1;
4356 if( s
->avctx
->skip_loop_filter
>= AVDISCARD_ALL
4357 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_NONKEY
&& h
->slice_type
!= I_TYPE
)
4358 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_BIDIR
&& h
->slice_type
== B_TYPE
)
4359 ||(s
->avctx
->skip_loop_filter
>= AVDISCARD_NONREF
&& h
->nal_ref_idc
== 0))
4360 h
->deblocking_filter
= 0;
4363 if( h
->pps
.num_slice_groups
> 1 && h
->pps
.mb_slice_group_map_type
>= 3 && h
->pps
.mb_slice_group_map_type
<= 5)
4364 slice_group_change_cycle
= get_bits(&s
->gb
, ?);
4369 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
4370 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
4372 (s
->picture_structure
==PICT_FRAME
? "F" : s
->picture_structure
==PICT_TOP_FIELD
? "T" : "B"),
4374 av_get_pict_type_char(h
->slice_type
),
4375 pps_id
, h
->frame_num
,
4376 s
->current_picture_ptr
->field_poc
[0], s
->current_picture_ptr
->field_poc
[1],
4377 h
->ref_count
[0], h
->ref_count
[1],
4379 h
->deblocking_filter
, h
->slice_alpha_c0_offset
/2, h
->slice_beta_offset
/2,
4381 h
->use_weight
==1 && h
->use_weight_chroma
? "c" : ""
4391 static inline int get_level_prefix(GetBitContext
*gb
){
4395 OPEN_READER(re
, gb
);
4396 UPDATE_CACHE(re
, gb
);
4397 buf
=GET_CACHE(re
, gb
);
4399 log
= 32 - av_log2(buf
);
4401 print_bin(buf
>>(32-log
), log
);
4402 av_log(NULL
, AV_LOG_DEBUG
, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf
>>(32-log
), log
, log
-1, get_bits_count(gb
), __FILE__
);
4405 LAST_SKIP_BITS(re
, gb
, log
);
4406 CLOSE_READER(re
, gb
);
4411 static inline int get_dct8x8_allowed(H264Context
*h
){
4414 if(!IS_SUB_8X8(h
->sub_mb_type
[i
])
4415 || (!h
->sps
.direct_8x8_inference_flag
&& IS_DIRECT(h
->sub_mb_type
[i
])))
4422 * decodes a residual block.
4423 * @param n block index
4424 * @param scantable scantable
4425 * @param max_coeff number of coefficients in the block
4426 * @return <0 if an error occured
4428 static int decode_residual(H264Context
*h
, GetBitContext
*gb
, DCTELEM
*block
, int n
, const uint8_t *scantable
, const uint16_t *qmul
, int max_coeff
){
4429 MpegEncContext
* const s
= &h
->s
;
4430 static const int coeff_token_table_index
[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4431 int level
[16], run
[16];
4432 int suffix_length
, zeros_left
, coeff_num
, coeff_token
, total_coeff
, i
, trailing_ones
;
4434 //FIXME put trailing_onex into the context
4436 if(n
== CHROMA_DC_BLOCK_INDEX
){
4437 coeff_token
= get_vlc2(gb
, chroma_dc_coeff_token_vlc
.table
, CHROMA_DC_COEFF_TOKEN_VLC_BITS
, 1);
4438 total_coeff
= coeff_token
>>2;
4440 if(n
== LUMA_DC_BLOCK_INDEX
){
4441 total_coeff
= pred_non_zero_count(h
, 0);
4442 coeff_token
= get_vlc2(gb
, coeff_token_vlc
[ coeff_token_table_index
[total_coeff
] ].table
, COEFF_TOKEN_VLC_BITS
, 2);
4443 total_coeff
= coeff_token
>>2;
4445 total_coeff
= pred_non_zero_count(h
, n
);
4446 coeff_token
= get_vlc2(gb
, coeff_token_vlc
[ coeff_token_table_index
[total_coeff
] ].table
, COEFF_TOKEN_VLC_BITS
, 2);
4447 total_coeff
= coeff_token
>>2;
4448 h
->non_zero_count_cache
[ scan8
[n
] ]= total_coeff
;
4452 //FIXME set last_non_zero?
4457 trailing_ones
= coeff_token
&3;
4458 tprintf("trailing:%d, total:%d\n", trailing_ones
, total_coeff
);
4459 assert(total_coeff
<=16);
4461 for(i
=0; i
<trailing_ones
; i
++){
4462 level
[i
]= 1 - 2*get_bits1(gb
);
4465 suffix_length
= total_coeff
> 10 && trailing_ones
< 3;
4467 for(; i
<total_coeff
; i
++){
4468 const int prefix
= get_level_prefix(gb
);
4469 int level_code
, mask
;
4471 if(prefix
<14){ //FIXME try to build a large unified VLC table for all this
4473 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
); //part
4475 level_code
= (prefix
<<suffix_length
); //part
4476 }else if(prefix
==14){
4478 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, suffix_length
); //part
4480 level_code
= prefix
+ get_bits(gb
, 4); //part
4481 }else if(prefix
==15){
4482 level_code
= (prefix
<<suffix_length
) + get_bits(gb
, 12); //part
4483 if(suffix_length
==0) level_code
+=15; //FIXME doesn't make (much)sense
4485 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "prefix too large at %d %d\n", s
->mb_x
, s
->mb_y
);
4489 if(i
==trailing_ones
&& i
<3) level_code
+= 2; //FIXME split first iteration
4491 mask
= -(level_code
&1);
4492 level
[i
]= (((2+level_code
)>>1) ^ mask
) - mask
;
4494 if(suffix_length
==0) suffix_length
=1; //FIXME split first iteration
4497 if(ABS(level
[i
]) > (3<<(suffix_length
-1)) && suffix_length
<6) suffix_length
++;
4499 if((2+level_code
)>>1) > (3<<(suffix_length
-1)) && suffix_length
<6) suffix_length
++;
4500 /* ? == prefix > 2 or sth */
4502 tprintf("level: %d suffix_length:%d\n", level
[i
], suffix_length
);
4505 if(total_coeff
== max_coeff
)
4508 if(n
== CHROMA_DC_BLOCK_INDEX
)
4509 zeros_left
= get_vlc2(gb
, chroma_dc_total_zeros_vlc
[ total_coeff
-1 ].table
, CHROMA_DC_TOTAL_ZEROS_VLC_BITS
, 1);
4511 zeros_left
= get_vlc2(gb
, total_zeros_vlc
[ total_coeff
-1 ].table
, TOTAL_ZEROS_VLC_BITS
, 1);
4514 for(i
=0; i
<total_coeff
-1; i
++){
4517 else if(zeros_left
< 7){
4518 run
[i
]= get_vlc2(gb
, run_vlc
[zeros_left
-1].table
, RUN_VLC_BITS
, 1);
4520 run
[i
]= get_vlc2(gb
, run7_vlc
.table
, RUN7_VLC_BITS
, 2);
4522 zeros_left
-= run
[i
];
4526 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "negative number of zero coeffs at %d %d\n", s
->mb_x
, s
->mb_y
);
4530 for(; i
<total_coeff
-1; i
++){
4538 for(i
=total_coeff
-1; i
>=0; i
--){ //FIXME merge into rundecode?
4541 coeff_num
+= run
[i
] + 1; //FIXME add 1 earlier ?
4542 j
= scantable
[ coeff_num
];
4547 for(i
=total_coeff
-1; i
>=0; i
--){ //FIXME merge into rundecode?
4550 coeff_num
+= run
[i
] + 1; //FIXME add 1 earlier ?
4551 j
= scantable
[ coeff_num
];
4553 block
[j
]= level
[i
] * qmul
[j
];
4554 // printf("%d %d ", block[j], qmul[j]);
4561 * decodes a P_SKIP or B_SKIP macroblock
4563 static void decode_mb_skip(H264Context
*h
){
4564 MpegEncContext
* const s
= &h
->s
;
4565 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
4568 memset(h
->non_zero_count
[mb_xy
], 0, 16);
4569 memset(h
->non_zero_count_cache
+ 8, 0, 8*5); //FIXME ugly, remove pfui
4571 if(h
->mb_aff_frame
&& s
->mb_skip_run
==0 && (s
->mb_y
&1)==0){
4572 h
->mb_field_decoding_flag
= get_bits1(&s
->gb
);
4574 if(h
->mb_field_decoding_flag
)
4575 mb_type
|= MB_TYPE_INTERLACED
;
4577 if( h
->slice_type
== B_TYPE
)
4579 // just for fill_caches. pred_direct_motion will set the real mb_type
4580 mb_type
|= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P0L1
|MB_TYPE_DIRECT2
|MB_TYPE_SKIP
;
4582 fill_caches(h
, mb_type
, 0); //FIXME check what is needed and what not ...
4583 pred_direct_motion(h
, &mb_type
);
4585 fill_rectangle(h
->mvd_cache
[0][scan8
[0]], 4, 4, 8, 0, 4);
4586 fill_rectangle(h
->mvd_cache
[1][scan8
[0]], 4, 4, 8, 0, 4);
4592 mb_type
|= MB_TYPE_16x16
|MB_TYPE_P0L0
|MB_TYPE_P1L0
|MB_TYPE_SKIP
;
4594 fill_caches(h
, mb_type
, 0); //FIXME check what is needed and what not ...
4595 pred_pskip_motion(h
, &mx
, &my
);
4596 fill_rectangle(&h
->ref_cache
[0][scan8
[0]], 4, 4, 8, 0, 1);
4597 fill_rectangle( h
->mv_cache
[0][scan8
[0]], 4, 4, 8, pack16to32(mx
,my
), 4);
4599 fill_rectangle(h
->mvd_cache
[0][scan8
[0]], 4, 4, 8, 0, 4);
4602 write_back_motion(h
, mb_type
);
4603 s
->current_picture
.mb_type
[mb_xy
]= mb_type
|MB_TYPE_SKIP
;
4604 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
4605 h
->slice_table
[ mb_xy
]= h
->slice_num
;
4606 h
->prev_mb_skipped
= 1;
4610 * decodes a macroblock
4611 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4613 static int decode_mb_cavlc(H264Context
*h
){
4614 MpegEncContext
* const s
= &h
->s
;
4615 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
4616 int mb_type
, partition_count
, cbp
;
4617 int dct8x8_allowed
= h
->pps
.transform_8x8_mode
;
4619 s
->dsp
.clear_blocks(h
->mb
); //FIXME avoid if already clear (move after skip handlong?
4621 tprintf("pic:%d mb:%d/%d\n", h
->frame_num
, s
->mb_x
, s
->mb_y
);
4622 cbp
= 0; /* avoid warning. FIXME: find a solution without slowing
4624 if(h
->slice_type
!= I_TYPE
&& h
->slice_type
!= SI_TYPE
){
4625 if(s
->mb_skip_run
==-1)
4626 s
->mb_skip_run
= get_ue_golomb(&s
->gb
);
4628 if (s
->mb_skip_run
--) {
4633 if(h
->mb_aff_frame
){
4634 if ( ((s
->mb_y
&1) == 0) || h
->prev_mb_skipped
)
4635 h
->mb_field_decoding_flag
= get_bits1(&s
->gb
);
4637 h
->mb_field_decoding_flag
= (s
->picture_structure
!=PICT_FRAME
);
4639 h
->prev_mb_skipped
= 0;
4641 mb_type
= get_ue_golomb(&s
->gb
);
4642 if(h
->slice_type
== B_TYPE
){
4644 partition_count
= b_mb_type_info
[mb_type
].partition_count
;
4645 mb_type
= b_mb_type_info
[mb_type
].type
;
4648 goto decode_intra_mb
;
4650 }else if(h
->slice_type
== P_TYPE
/*|| h->slice_type == SP_TYPE */){
4652 partition_count
= p_mb_type_info
[mb_type
].partition_count
;
4653 mb_type
= p_mb_type_info
[mb_type
].type
;
4656 goto decode_intra_mb
;
4659 assert(h
->slice_type
== I_TYPE
);
4662 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "mb_type %d in %c slice to large at %d %d\n", mb_type
, av_get_pict_type_char(h
->slice_type
), s
->mb_x
, s
->mb_y
);
4666 cbp
= i_mb_type_info
[mb_type
].cbp
;
4667 h
->intra16x16_pred_mode
= i_mb_type_info
[mb_type
].pred_mode
;
4668 mb_type
= i_mb_type_info
[mb_type
].type
;
4671 if(h
->mb_field_decoding_flag
)
4672 mb_type
|= MB_TYPE_INTERLACED
;
4674 h
->slice_table
[ mb_xy
]= h
->slice_num
;
4676 if(IS_INTRA_PCM(mb_type
)){
4679 // we assume these blocks are very rare so we dont optimize it
4680 align_get_bits(&s
->gb
);
4682 // The pixels are stored in the same order as levels in h->mb array.
4683 for(y
=0; y
<16; y
++){
4684 const int index
= 4*(y
&3) + 32*((y
>>2)&1) + 128*(y
>>3);
4685 for(x
=0; x
<16; x
++){
4686 tprintf("LUMA ICPM LEVEL (%3d)\n", show_bits(&s
->gb
, 8));
4687 h
->mb
[index
+ (x
&3) + 16*((x
>>2)&1) + 64*(x
>>3)]= get_bits(&s
->gb
, 8);
4691 const int index
= 256 + 4*(y
&3) + 32*(y
>>2);
4693 tprintf("CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s
->gb
, 8));
4694 h
->mb
[index
+ (x
&3) + 16*(x
>>2)]= get_bits(&s
->gb
, 8);
4698 const int index
= 256 + 64 + 4*(y
&3) + 32*(y
>>2);
4700 tprintf("CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s
->gb
, 8));
4701 h
->mb
[index
+ (x
&3) + 16*(x
>>2)]= get_bits(&s
->gb
, 8);
4705 // In deblocking, the quantizer is 0
4706 s
->current_picture
.qscale_table
[mb_xy
]= 0;
4707 h
->chroma_qp
= get_chroma_qp(h
->pps
.chroma_qp_index_offset
, 0);
4708 // All coeffs are present
4709 memset(h
->non_zero_count
[mb_xy
], 16, 16);
4711 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
4715 fill_caches(h
, mb_type
, 0);
4718 if(IS_INTRA(mb_type
)){
4719 // init_top_left_availability(h);
4720 if(IS_INTRA4x4(mb_type
)){
4723 if(dct8x8_allowed
&& get_bits1(&s
->gb
)){
4724 mb_type
|= MB_TYPE_8x8DCT
;
4728 // fill_intra4x4_pred_table(h);
4729 for(i
=0; i
<16; i
+=di
){
4730 const int mode_coded
= !get_bits1(&s
->gb
);
4731 const int predicted_mode
= pred_intra_mode(h
, i
);
4735 const int rem_mode
= get_bits(&s
->gb
, 3);
4736 if(rem_mode
<predicted_mode
)
4741 mode
= predicted_mode
;
4745 fill_rectangle( &h
->intra4x4_pred_mode_cache
[ scan8
[i
] ], 2, 2, 8, mode
, 1 );
4747 h
->intra4x4_pred_mode_cache
[ scan8
[i
] ] = mode
;
4749 write_back_intra_pred_mode(h
);
4750 if( check_intra4x4_pred_mode(h
) < 0)
4753 h
->intra16x16_pred_mode
= check_intra_pred_mode(h
, h
->intra16x16_pred_mode
);
4754 if(h
->intra16x16_pred_mode
< 0)
4757 h
->chroma_pred_mode
= get_ue_golomb(&s
->gb
);
4759 h
->chroma_pred_mode
= check_intra_pred_mode(h
, h
->chroma_pred_mode
);
4760 if(h
->chroma_pred_mode
< 0)
4762 }else if(partition_count
==4){
4763 int i
, j
, sub_partition_count
[4], list
, ref
[2][4];
4765 if(h
->slice_type
== B_TYPE
){
4767 h
->sub_mb_type
[i
]= get_ue_golomb(&s
->gb
);
4768 if(h
->sub_mb_type
[i
] >=13){
4769 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "B sub_mb_type %d out of range at %d %d\n", h
->sub_mb_type
[i
], s
->mb_x
, s
->mb_y
);
4772 sub_partition_count
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
4773 h
->sub_mb_type
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
4775 if( IS_DIRECT(h
->sub_mb_type
[0]) || IS_DIRECT(h
->sub_mb_type
[1])
4776 || IS_DIRECT(h
->sub_mb_type
[2]) || IS_DIRECT(h
->sub_mb_type
[3]))
4777 pred_direct_motion(h
, &mb_type
);
4779 assert(h
->slice_type
== P_TYPE
|| h
->slice_type
== SP_TYPE
); //FIXME SP correct ?
4781 h
->sub_mb_type
[i
]= get_ue_golomb(&s
->gb
);
4782 if(h
->sub_mb_type
[i
] >=4){
4783 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "P sub_mb_type %d out of range at %d %d\n", h
->sub_mb_type
[i
], s
->mb_x
, s
->mb_y
);
4786 sub_partition_count
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
4787 h
->sub_mb_type
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
4791 for(list
=0; list
<2; list
++){
4792 int ref_count
= IS_REF0(mb_type
) ? 1 : h
->ref_count
[list
];
4793 if(ref_count
== 0) continue;
4794 if (h
->mb_aff_frame
&& h
->mb_field_decoding_flag
) {
4798 if(IS_DIRECT(h
->sub_mb_type
[i
])) continue;
4799 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
4800 ref
[list
][i
] = get_te0_golomb(&s
->gb
, ref_count
); //FIXME init to 0 before and skip?
4809 dct8x8_allowed
= get_dct8x8_allowed(h
);
4811 for(list
=0; list
<2; list
++){
4812 const int ref_count
= IS_REF0(mb_type
) ? 1 : h
->ref_count
[list
];
4813 if(ref_count
== 0) continue;
4816 if(IS_DIRECT(h
->sub_mb_type
[i
])) continue;
4817 h
->ref_cache
[list
][ scan8
[4*i
] ]=h
->ref_cache
[list
][ scan8
[4*i
]+1 ]=
4818 h
->ref_cache
[list
][ scan8
[4*i
]+8 ]=h
->ref_cache
[list
][ scan8
[4*i
]+9 ]= ref
[list
][i
];
4820 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
4821 const int sub_mb_type
= h
->sub_mb_type
[i
];
4822 const int block_width
= (sub_mb_type
& (MB_TYPE_16x16
|MB_TYPE_16x8
)) ? 2 : 1;
4823 for(j
=0; j
<sub_partition_count
[i
]; j
++){
4825 const int index
= 4*i
+ block_width
*j
;
4826 int16_t (* mv_cache
)[2]= &h
->mv_cache
[list
][ scan8
[index
] ];
4827 pred_motion(h
, index
, block_width
, list
, h
->ref_cache
[list
][ scan8
[index
] ], &mx
, &my
);
4828 mx
+= get_se_golomb(&s
->gb
);
4829 my
+= get_se_golomb(&s
->gb
);
4830 tprintf("final mv:%d %d\n", mx
, my
);
4832 if(IS_SUB_8X8(sub_mb_type
)){
4833 mv_cache
[ 0 ][0]= mv_cache
[ 1 ][0]=
4834 mv_cache
[ 8 ][0]= mv_cache
[ 9 ][0]= mx
;
4835 mv_cache
[ 0 ][1]= mv_cache
[ 1 ][1]=
4836 mv_cache
[ 8 ][1]= mv_cache
[ 9 ][1]= my
;
4837 }else if(IS_SUB_8X4(sub_mb_type
)){
4838 mv_cache
[ 0 ][0]= mv_cache
[ 1 ][0]= mx
;
4839 mv_cache
[ 0 ][1]= mv_cache
[ 1 ][1]= my
;
4840 }else if(IS_SUB_4X8(sub_mb_type
)){
4841 mv_cache
[ 0 ][0]= mv_cache
[ 8 ][0]= mx
;
4842 mv_cache
[ 0 ][1]= mv_cache
[ 8 ][1]= my
;
4844 assert(IS_SUB_4X4(sub_mb_type
));
4845 mv_cache
[ 0 ][0]= mx
;
4846 mv_cache
[ 0 ][1]= my
;
4850 uint32_t *p
= (uint32_t *)&h
->mv_cache
[list
][ scan8
[4*i
] ][0];
4856 }else if(IS_DIRECT(mb_type
)){
4857 pred_direct_motion(h
, &mb_type
);
4858 dct8x8_allowed
&= h
->sps
.direct_8x8_inference_flag
;
4860 int list
, mx
, my
, i
;
4861 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4862 if(IS_16X16(mb_type
)){
4863 for(list
=0; list
<2; list
++){
4864 if(h
->ref_count
[list
]>0){
4865 if(IS_DIR(mb_type
, 0, list
)){
4866 const int val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
4867 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, val
, 1);
4869 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, (LIST_NOT_USED
&0xFF), 1);
4872 for(list
=0; list
<2; list
++){
4873 if(IS_DIR(mb_type
, 0, list
)){
4874 pred_motion(h
, 0, 4, list
, h
->ref_cache
[list
][ scan8
[0] ], &mx
, &my
);
4875 mx
+= get_se_golomb(&s
->gb
);
4876 my
+= get_se_golomb(&s
->gb
);
4877 tprintf("final mv:%d %d\n", mx
, my
);
4879 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, pack16to32(mx
,my
), 4);
4881 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, 0, 4);
4884 else if(IS_16X8(mb_type
)){
4885 for(list
=0; list
<2; list
++){
4886 if(h
->ref_count
[list
]>0){
4888 if(IS_DIR(mb_type
, i
, list
)){
4889 const int val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
4890 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, val
, 1);
4892 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, (LIST_NOT_USED
&0xFF), 1);
4896 for(list
=0; list
<2; list
++){
4898 if(IS_DIR(mb_type
, i
, list
)){
4899 pred_16x8_motion(h
, 8*i
, list
, h
->ref_cache
[list
][scan8
[0] + 16*i
], &mx
, &my
);
4900 mx
+= get_se_golomb(&s
->gb
);
4901 my
+= get_se_golomb(&s
->gb
);
4902 tprintf("final mv:%d %d\n", mx
, my
);
4904 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, pack16to32(mx
,my
), 4);
4906 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, 0, 4);
4910 assert(IS_8X16(mb_type
));
4911 for(list
=0; list
<2; list
++){
4912 if(h
->ref_count
[list
]>0){
4914 if(IS_DIR(mb_type
, i
, list
)){ //FIXME optimize
4915 const int val
= get_te0_golomb(&s
->gb
, h
->ref_count
[list
]);
4916 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, val
, 1);
4918 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, (LIST_NOT_USED
&0xFF), 1);
4922 for(list
=0; list
<2; list
++){
4924 if(IS_DIR(mb_type
, i
, list
)){
4925 pred_8x16_motion(h
, i
*4, list
, h
->ref_cache
[list
][ scan8
[0] + 2*i
], &mx
, &my
);
4926 mx
+= get_se_golomb(&s
->gb
);
4927 my
+= get_se_golomb(&s
->gb
);
4928 tprintf("final mv:%d %d\n", mx
, my
);
4930 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, pack16to32(mx
,my
), 4);
4932 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, 0, 4);
4938 if(IS_INTER(mb_type
))
4939 write_back_motion(h
, mb_type
);
4941 if(!IS_INTRA16x16(mb_type
)){
4942 cbp
= get_ue_golomb(&s
->gb
);
4944 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "cbp too large (%d) at %d %d\n", cbp
, s
->mb_x
, s
->mb_y
);
4948 if(IS_INTRA4x4(mb_type
))
4949 cbp
= golomb_to_intra4x4_cbp
[cbp
];
4951 cbp
= golomb_to_inter_cbp
[cbp
];
4954 if(dct8x8_allowed
&& (cbp
&15) && !IS_INTRA(mb_type
)){
4955 if(get_bits1(&s
->gb
))
4956 mb_type
|= MB_TYPE_8x8DCT
;
4958 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
4960 if(cbp
|| IS_INTRA16x16(mb_type
)){
4961 int i8x8
, i4x4
, chroma_idx
;
4962 int chroma_qp
, dquant
;
4963 GetBitContext
*gb
= IS_INTRA(mb_type
) ? h
->intra_gb_ptr
: h
->inter_gb_ptr
;
4964 const uint8_t *scan
, *dc_scan
;
4966 // fill_non_zero_count_cache(h);
4968 if(IS_INTERLACED(mb_type
)){
4969 scan
= s
->qscale
? h
->field_scan
: h
->field_scan_q0
;
4970 dc_scan
= luma_dc_field_scan
;
4972 scan
= s
->qscale
? h
->zigzag_scan
: h
->zigzag_scan_q0
;
4973 dc_scan
= luma_dc_zigzag_scan
;
4976 dquant
= get_se_golomb(&s
->gb
);
4978 if( dquant
> 25 || dquant
< -26 ){
4979 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "dquant out of range (%d) at %d %d\n", dquant
, s
->mb_x
, s
->mb_y
);
4983 s
->qscale
+= dquant
;
4984 if(((unsigned)s
->qscale
) > 51){
4985 if(s
->qscale
<0) s
->qscale
+= 52;
4986 else s
->qscale
-= 52;
4989 h
->chroma_qp
= chroma_qp
= get_chroma_qp(h
->pps
.chroma_qp_index_offset
, s
->qscale
);
4990 if(IS_INTRA16x16(mb_type
)){
4991 if( decode_residual(h
, h
->intra_gb_ptr
, h
->mb
, LUMA_DC_BLOCK_INDEX
, dc_scan
, h
->dequant4_coeff
[s
->qscale
], 16) < 0){
4992 return -1; //FIXME continue if partitioned and other return -1 too
4995 assert((cbp
&15) == 0 || (cbp
&15) == 15);
4998 for(i8x8
=0; i8x8
<4; i8x8
++){
4999 for(i4x4
=0; i4x4
<4; i4x4
++){
5000 const int index
= i4x4
+ 4*i8x8
;
5001 if( decode_residual(h
, h
->intra_gb_ptr
, h
->mb
+ 16*index
, index
, scan
+ 1, h
->dequant4_coeff
[s
->qscale
], 15) < 0 ){
5007 fill_rectangle(&h
->non_zero_count_cache
[scan8
[0]], 4, 4, 8, 0, 1);
5010 for(i8x8
=0; i8x8
<4; i8x8
++){
5011 if(cbp
& (1<<i8x8
)){
5012 if(IS_8x8DCT(mb_type
)){
5013 DCTELEM
*buf
= &h
->mb
[64*i8x8
];
5015 for(i4x4
=0; i4x4
<4; i4x4
++){
5016 if( decode_residual(h
, gb
, buf
, i4x4
+4*i8x8
, zigzag_scan8x8_cavlc
+16*i4x4
,
5017 h
->dequant8_coeff
[s
->qscale
], 16) <0 )
5023 buf
[i
] = (buf
[i
] + 2) >> 2;
5025 nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
5026 nnz
[0] |= nnz
[1] | nnz
[8] | nnz
[9];
5028 for(i4x4
=0; i4x4
<4; i4x4
++){
5029 const int index
= i4x4
+ 4*i8x8
;
5031 if( decode_residual(h
, gb
, h
->mb
+ 16*index
, index
, scan
, h
->dequant4_coeff
[s
->qscale
], 16) <0 ){
5037 uint8_t * const nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
5038 nnz
[0] = nnz
[1] = nnz
[8] = nnz
[9] = 0;
5044 for(chroma_idx
=0; chroma_idx
<2; chroma_idx
++)
5045 if( decode_residual(h
, gb
, h
->mb
+ 256 + 16*4*chroma_idx
, CHROMA_DC_BLOCK_INDEX
, chroma_dc_scan
, h
->dequant4_coeff
[chroma_qp
], 4) < 0){
5051 for(chroma_idx
=0; chroma_idx
<2; chroma_idx
++){
5052 for(i4x4
=0; i4x4
<4; i4x4
++){
5053 const int index
= 16 + 4*chroma_idx
+ i4x4
;
5054 if( decode_residual(h
, gb
, h
->mb
+ 16*index
, index
, scan
+ 1, h
->dequant4_coeff
[chroma_qp
], 15) < 0){
5060 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
5061 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
5062 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
5065 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
5066 fill_rectangle(&nnz
[scan8
[0]], 4, 4, 8, 0, 1);
5067 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
5068 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
5070 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
5071 write_back_non_zero_count(h
);
5076 static int decode_cabac_field_decoding_flag(H264Context
*h
) {
5077 MpegEncContext
* const s
= &h
->s
;
5078 const int mb_x
= s
->mb_x
;
5079 const int mb_y
= s
->mb_y
& ~1;
5080 const int mba_xy
= mb_x
- 1 + mb_y
*s
->mb_stride
;
5081 const int mbb_xy
= mb_x
+ (mb_y
-2)*s
->mb_stride
;
5083 unsigned int ctx
= 0;
5085 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& IS_INTERLACED( s
->current_picture
.mb_type
[mba_xy
] ) ) {
5088 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& IS_INTERLACED( s
->current_picture
.mb_type
[mbb_xy
] ) ) {
5092 return get_cabac( &h
->cabac
, &h
->cabac_state
[70 + ctx
] );
5095 static int decode_cabac_intra_mb_type(H264Context
*h
, int ctx_base
, int intra_slice
) {
5096 uint8_t *state
= &h
->cabac_state
[ctx_base
];
5100 MpegEncContext
* const s
= &h
->s
;
5101 const int mba_xy
= h
->left_mb_xy
[0];
5102 const int mbb_xy
= h
->top_mb_xy
;
5104 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_INTRA4x4( s
->current_picture
.mb_type
[mba_xy
] ) )
5106 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_INTRA4x4( s
->current_picture
.mb_type
[mbb_xy
] ) )
5108 if( get_cabac( &h
->cabac
, &state
[ctx
] ) == 0 )
5109 return 0; /* I4x4 */
5112 if( get_cabac( &h
->cabac
, &state
[0] ) == 0 )
5113 return 0; /* I4x4 */
5116 if( get_cabac_terminate( &h
->cabac
) )
5117 return 25; /* PCM */
5119 mb_type
= 1; /* I16x16 */
5120 if( get_cabac( &h
->cabac
, &state
[1] ) )
5121 mb_type
+= 12; /* cbp_luma != 0 */
5123 if( get_cabac( &h
->cabac
, &state
[2] ) ) {
5124 if( get_cabac( &h
->cabac
, &state
[2+intra_slice
] ) )
5125 mb_type
+= 4 * 2; /* cbp_chroma == 2 */
5127 mb_type
+= 4 * 1; /* cbp_chroma == 1 */
5129 if( get_cabac( &h
->cabac
, &state
[3+intra_slice
] ) )
5131 if( get_cabac( &h
->cabac
, &state
[3+2*intra_slice
] ) )
5136 static int decode_cabac_mb_type( H264Context
*h
) {
5137 MpegEncContext
* const s
= &h
->s
;
5139 if( h
->slice_type
== I_TYPE
) {
5140 return decode_cabac_intra_mb_type(h
, 3, 1);
5141 } else if( h
->slice_type
== P_TYPE
) {
5142 if( get_cabac( &h
->cabac
, &h
->cabac_state
[14] ) == 0 ) {
5144 if( get_cabac( &h
->cabac
, &h
->cabac_state
[15] ) == 0 ) {
5145 if( get_cabac( &h
->cabac
, &h
->cabac_state
[16] ) == 0 )
5146 return 0; /* P_L0_D16x16; */
5148 return 3; /* P_8x8; */
5150 if( get_cabac( &h
->cabac
, &h
->cabac_state
[17] ) == 0 )
5151 return 2; /* P_L0_D8x16; */
5153 return 1; /* P_L0_D16x8; */
5156 return decode_cabac_intra_mb_type(h
, 17, 0) + 5;
5158 } else if( h
->slice_type
== B_TYPE
) {
5159 const int mba_xy
= h
->left_mb_xy
[0];
5160 const int mbb_xy
= h
->top_mb_xy
;
5164 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_SKIP( s
->current_picture
.mb_type
[mba_xy
] )
5165 && !IS_DIRECT( s
->current_picture
.mb_type
[mba_xy
] ) )
5167 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_SKIP( s
->current_picture
.mb_type
[mbb_xy
] )
5168 && !IS_DIRECT( s
->current_picture
.mb_type
[mbb_xy
] ) )
5171 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[27+ctx
] ) )
5172 return 0; /* B_Direct_16x16 */
5174 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[27+3] ) ) {
5175 return 1 + get_cabac( &h
->cabac
, &h
->cabac_state
[27+5] ); /* B_L[01]_16x16 */
5178 bits
= get_cabac( &h
->cabac
, &h
->cabac_state
[27+4] ) << 3;
5179 bits
|= get_cabac( &h
->cabac
, &h
->cabac_state
[27+5] ) << 2;
5180 bits
|= get_cabac( &h
->cabac
, &h
->cabac_state
[27+5] ) << 1;
5181 bits
|= get_cabac( &h
->cabac
, &h
->cabac_state
[27+5] );
5183 return bits
+ 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5184 else if( bits
== 13 ) {
5185 return decode_cabac_intra_mb_type(h
, 32, 0) + 23;
5186 } else if( bits
== 14 )
5187 return 11; /* B_L1_L0_8x16 */
5188 else if( bits
== 15 )
5189 return 22; /* B_8x8 */
5191 bits
= ( bits
<<1 ) | get_cabac( &h
->cabac
, &h
->cabac_state
[27+5] );
5192 return bits
- 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5194 /* TODO SI/SP frames? */
5199 static int decode_cabac_mb_skip( H264Context
*h
) {
5200 MpegEncContext
* const s
= &h
->s
;
5201 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
5202 const int mba_xy
= mb_xy
- 1;
5203 const int mbb_xy
= mb_xy
- s
->mb_stride
;
5206 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& !IS_SKIP( s
->current_picture
.mb_type
[mba_xy
] ))
5208 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& !IS_SKIP( s
->current_picture
.mb_type
[mbb_xy
] ))
5211 if( h
->slice_type
== P_TYPE
|| h
->slice_type
== SP_TYPE
)
5212 return get_cabac( &h
->cabac
, &h
->cabac_state
[11+ctx
] );
5214 return get_cabac( &h
->cabac
, &h
->cabac_state
[24+ctx
] );
5217 static int decode_cabac_mb_intra4x4_pred_mode( H264Context
*h
, int pred_mode
) {
5220 if( get_cabac( &h
->cabac
, &h
->cabac_state
[68] ) )
5223 if( get_cabac( &h
->cabac
, &h
->cabac_state
[69] ) )
5225 if( get_cabac( &h
->cabac
, &h
->cabac_state
[69] ) )
5227 if( get_cabac( &h
->cabac
, &h
->cabac_state
[69] ) )
5229 if( mode
>= pred_mode
)
5235 static int decode_cabac_mb_chroma_pre_mode( H264Context
*h
) {
5236 const int mba_xy
= h
->left_mb_xy
[0];
5237 const int mbb_xy
= h
->top_mb_xy
;
5241 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5242 if( h
->slice_table
[mba_xy
] == h
->slice_num
&& h
->chroma_pred_mode_table
[mba_xy
] != 0 )
5245 if( h
->slice_table
[mbb_xy
] == h
->slice_num
&& h
->chroma_pred_mode_table
[mbb_xy
] != 0 )
5248 if( get_cabac( &h
->cabac
, &h
->cabac_state
[64+ctx
] ) == 0 )
5251 if( get_cabac( &h
->cabac
, &h
->cabac_state
[64+3] ) == 0 )
5253 if( get_cabac( &h
->cabac
, &h
->cabac_state
[64+3] ) == 0 )
5259 static const uint8_t block_idx_x
[16] = {
5260 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
5262 static const uint8_t block_idx_y
[16] = {
5263 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
5265 static const uint8_t block_idx_xy
[4][4] = {
5272 static int decode_cabac_mb_cbp_luma( H264Context
*h
) {
5273 MpegEncContext
* const s
= &h
->s
;
5278 for( i8x8
= 0; i8x8
< 4; i8x8
++ ) {
5284 x
= block_idx_x
[4*i8x8
];
5285 y
= block_idx_y
[4*i8x8
];
5289 else if( s
->mb_x
> 0 && (h
->slice_table
[h
->left_mb_xy
[0]] == h
->slice_num
)) {
5290 cbp_a
= h
->left_cbp
;
5291 tprintf("cbp_a = left_cbp = %x\n", cbp_a
);
5296 else if( s
->mb_y
> 0 && (h
->slice_table
[h
->top_mb_xy
] == h
->slice_num
)) {
5298 tprintf("cbp_b = top_cbp = %x\n", cbp_b
);
5301 /* No need to test for skip as we put 0 for skip block */
5302 /* No need to test for IPCM as we put 1 for IPCM block */
5304 int i8x8a
= block_idx_xy
[(x
-1)&0x03][y
]/4;
5305 if( ((cbp_a
>> i8x8a
)&0x01) == 0 )
5310 int i8x8b
= block_idx_xy
[x
][(y
-1)&0x03]/4;
5311 if( ((cbp_b
>> i8x8b
)&0x01) == 0 )
5315 if( get_cabac( &h
->cabac
, &h
->cabac_state
[73 + ctx
] ) ) {
5321 static int decode_cabac_mb_cbp_chroma( H264Context
*h
) {
5325 cbp_a
= (h
->left_cbp
>>4)&0x03;
5326 cbp_b
= (h
-> top_cbp
>>4)&0x03;
5329 if( cbp_a
> 0 ) ctx
++;
5330 if( cbp_b
> 0 ) ctx
+= 2;
5331 if( get_cabac( &h
->cabac
, &h
->cabac_state
[77 + ctx
] ) == 0 )
5335 if( cbp_a
== 2 ) ctx
++;
5336 if( cbp_b
== 2 ) ctx
+= 2;
5337 return 1 + get_cabac( &h
->cabac
, &h
->cabac_state
[77 + ctx
] );
5339 static int decode_cabac_mb_dqp( H264Context
*h
) {
5340 MpegEncContext
* const s
= &h
->s
;
5346 mbn_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
- 1;
5348 mbn_xy
= s
->mb_width
- 1 + (s
->mb_y
-1)*s
->mb_stride
;
5350 if( h
->last_qscale_diff
!= 0 && ( IS_INTRA16x16(s
->current_picture
.mb_type
[mbn_xy
] ) || (h
->cbp_table
[mbn_xy
]&0x3f) ) )
5353 while( get_cabac( &h
->cabac
, &h
->cabac_state
[60 + ctx
] ) ) {
5364 return -(val
+ 1)/2;
5366 static int decode_cabac_p_mb_sub_type( H264Context
*h
) {
5367 if( get_cabac( &h
->cabac
, &h
->cabac_state
[21] ) )
5369 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[22] ) )
5371 if( get_cabac( &h
->cabac
, &h
->cabac_state
[23] ) )
5375 static int decode_cabac_b_mb_sub_type( H264Context
*h
) {
5377 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[36] ) )
5378 return 0; /* B_Direct_8x8 */
5379 if( !get_cabac( &h
->cabac
, &h
->cabac_state
[37] ) )
5380 return 1 + get_cabac( &h
->cabac
, &h
->cabac_state
[39] ); /* B_L0_8x8, B_L1_8x8 */
5382 if( get_cabac( &h
->cabac
, &h
->cabac_state
[38] ) ) {
5383 if( get_cabac( &h
->cabac
, &h
->cabac_state
[39] ) )
5384 return 11 + get_cabac( &h
->cabac
, &h
->cabac_state
[39] ); /* B_L1_4x4, B_Bi_4x4 */
5387 type
+= 2*get_cabac( &h
->cabac
, &h
->cabac_state
[39] );
5388 type
+= get_cabac( &h
->cabac
, &h
->cabac_state
[39] );
5392 static inline int decode_cabac_mb_transform_size( H264Context
*h
) {
5393 return get_cabac( &h
->cabac
, &h
->cabac_state
[399 + h
->neighbor_transform_size
] );
5396 static int decode_cabac_mb_ref( H264Context
*h
, int list
, int n
) {
5397 int refa
= h
->ref_cache
[list
][scan8
[n
] - 1];
5398 int refb
= h
->ref_cache
[list
][scan8
[n
] - 8];
5402 if( h
->slice_type
== B_TYPE
) {
5403 if( refa
> 0 && !h
->direct_cache
[scan8
[n
] - 1] )
5405 if( refb
> 0 && !h
->direct_cache
[scan8
[n
] - 8] )
5414 while( get_cabac( &h
->cabac
, &h
->cabac_state
[54+ctx
] ) ) {
5424 static int decode_cabac_mb_mvd( H264Context
*h
, int list
, int n
, int l
) {
5425 int amvd
= abs( h
->mvd_cache
[list
][scan8
[n
] - 1][l
] ) +
5426 abs( h
->mvd_cache
[list
][scan8
[n
] - 8][l
] );
5427 int ctxbase
= (l
== 0) ? 40 : 47;
5432 else if( amvd
> 32 )
5437 if(!get_cabac(&h
->cabac
, &h
->cabac_state
[ctxbase
+ctx
]))
5442 while( mvd
< 9 && get_cabac( &h
->cabac
, &h
->cabac_state
[ctxbase
+ctx
] ) ) {
5450 while( get_cabac_bypass( &h
->cabac
) ) {
5455 if( get_cabac_bypass( &h
->cabac
) )
5459 if( get_cabac_bypass( &h
->cabac
) ) return -mvd
;
5463 static int inline get_cabac_cbf_ctx( H264Context
*h
, int cat
, int idx
) {
5468 nza
= h
->left_cbp
&0x100;
5469 nzb
= h
-> top_cbp
&0x100;
5470 } else if( cat
== 1 || cat
== 2 ) {
5471 nza
= h
->non_zero_count_cache
[scan8
[idx
] - 1];
5472 nzb
= h
->non_zero_count_cache
[scan8
[idx
] - 8];
5473 } else if( cat
== 3 ) {
5474 nza
= (h
->left_cbp
>>(6+idx
))&0x01;
5475 nzb
= (h
-> top_cbp
>>(6+idx
))&0x01;
5478 nza
= h
->non_zero_count_cache
[scan8
[16+idx
] - 1];
5479 nzb
= h
->non_zero_count_cache
[scan8
[16+idx
] - 8];
5488 return ctx
+ 4 * cat
;
5491 static int inline decode_cabac_residual( H264Context
*h
, DCTELEM
*block
, int cat
, int n
, const uint8_t *scantable
, const uint16_t *qmul
, int max_coeff
) {
5492 const int mb_xy
= h
->s
.mb_x
+ h
->s
.mb_y
*h
->s
.mb_stride
;
5493 static const int significant_coeff_flag_field_offset
[2] = { 105, 277 };
5494 static const int last_significant_coeff_flag_field_offset
[2] = { 166, 338 };
5495 static const int significant_coeff_flag_offset
[6] = { 0, 15, 29, 44, 47, 297 };
5496 static const int last_significant_coeff_flag_offset
[6] = { 0, 15, 29, 44, 47, 251 };
5497 static const int coeff_abs_level_m1_offset
[6] = { 227+0, 227+10, 227+20, 227+30, 227+39, 426 };
5498 static const int identity
[15] = {
5499 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
5501 static const int significant_coeff_flag_offset_8x8
[63] = {
5502 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5503 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5504 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5505 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12
5507 static const int last_coeff_flag_offset_8x8
[63] = {
5508 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5509 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5510 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5511 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5517 int coeff_count
= 0;
5520 int abslevelgt1
= 0;
5522 const int* significant_coeff_ctx_offset
;
5523 const int* last_coeff_ctx_offset
;
5524 const int significant_coeff_ctx_base
= significant_coeff_flag_offset
[cat
]
5525 + significant_coeff_flag_field_offset
[h
->mb_field_decoding_flag
];
5526 const int last_coeff_ctx_base
= last_significant_coeff_flag_offset
[cat
]
5527 + last_significant_coeff_flag_field_offset
[h
->mb_field_decoding_flag
];
5529 /* cat: 0-> DC 16x16 n = 0
5530 * 1-> AC 16x16 n = luma4x4idx
5531 * 2-> Luma4x4 n = luma4x4idx
5532 * 3-> DC Chroma n = iCbCr
5533 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5534 * 5-> Luma8x8 n = 4 * luma8x8idx
5537 /* read coded block flag */
5539 significant_coeff_ctx_offset
= significant_coeff_flag_offset_8x8
;
5540 last_coeff_ctx_offset
= last_coeff_flag_offset_8x8
;
5542 if( get_cabac( &h
->cabac
, &h
->cabac_state
[85 + get_cabac_cbf_ctx( h
, cat
, n
) ] ) == 0 ) {
5543 if( cat
== 1 || cat
== 2 )
5544 h
->non_zero_count_cache
[scan8
[n
]] = 0;
5546 h
->non_zero_count_cache
[scan8
[16+n
]] = 0;
5551 significant_coeff_ctx_offset
=
5552 last_coeff_ctx_offset
= identity
;
5555 for(last
= 0; last
< max_coeff
- 1; last
++) {
5556 int sig_ctx
= significant_coeff_ctx_base
+ significant_coeff_ctx_offset
[last
];
5557 if( get_cabac( &h
->cabac
, &h
->cabac_state
[sig_ctx
] )) {
5558 int last_ctx
= last_coeff_ctx_base
+ last_coeff_ctx_offset
[last
];
5559 index
[coeff_count
++] = last
;
5560 if( get_cabac( &h
->cabac
, &h
->cabac_state
[last_ctx
] ) ) {
5566 if( last
== max_coeff
-1 ) {
5567 index
[coeff_count
++] = last
;
5569 assert(coeff_count
> 0);
5572 h
->cbp_table
[mb_xy
] |= 0x100;
5573 else if( cat
== 1 || cat
== 2 )
5574 h
->non_zero_count_cache
[scan8
[n
]] = coeff_count
;
5576 h
->cbp_table
[mb_xy
] |= 0x40 << n
;
5578 h
->non_zero_count_cache
[scan8
[16+n
]] = coeff_count
;
5581 fill_rectangle(&h
->non_zero_count_cache
[scan8
[n
]], 2, 2, 8, 1, 1);
5584 for( i
= coeff_count
- 1; i
>= 0; i
-- ) {
5585 int ctx
= (abslevelgt1
!= 0 ? 0 : FFMIN( 4, abslevel1
)) + coeff_abs_level_m1_offset
[cat
];
5586 int j
= scantable
[index
[i
]];
5588 if( get_cabac( &h
->cabac
, &h
->cabac_state
[ctx
] ) == 0 ) {
5589 if( cat
== 0 || cat
== 3 ) {
5590 if( get_cabac_bypass( &h
->cabac
) ) block
[j
] = -1;
5593 if( get_cabac_bypass( &h
->cabac
) ) block
[j
] = -qmul
[j
];
5594 else block
[j
] = qmul
[j
];
5600 ctx
= 5 + FFMIN( 4, abslevelgt1
) + coeff_abs_level_m1_offset
[cat
];
5601 while( coeff_abs
< 15 && get_cabac( &h
->cabac
, &h
->cabac_state
[ctx
] ) ) {
5605 if( coeff_abs
>= 15 ) {
5607 while( get_cabac_bypass( &h
->cabac
) ) {
5608 coeff_abs
+= 1 << j
;
5613 if( get_cabac_bypass( &h
->cabac
) )
5614 coeff_abs
+= 1 << j
;
5618 if( cat
== 0 || cat
== 3 ) {
5619 if( get_cabac_bypass( &h
->cabac
) ) block
[j
] = -coeff_abs
;
5620 else block
[j
] = coeff_abs
;
5622 if( get_cabac_bypass( &h
->cabac
) ) block
[j
] = -coeff_abs
* qmul
[j
];
5623 else block
[j
] = coeff_abs
* qmul
[j
];
5632 void inline compute_mb_neighboors(H264Context
*h
)
5634 MpegEncContext
* const s
= &h
->s
;
5635 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
5636 h
->top_mb_xy
= mb_xy
- s
->mb_stride
;
5637 h
->left_mb_xy
[0] = mb_xy
- 1;
5638 if(h
->mb_aff_frame
){
5639 const int pair_xy
= s
->mb_x
+ (s
->mb_y
& ~1)*s
->mb_stride
;
5640 const int top_pair_xy
= pair_xy
- s
->mb_stride
;
5641 const int top_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[top_pair_xy
]);
5642 const int left_mb_frame_flag
= !IS_INTERLACED(s
->current_picture
.mb_type
[pair_xy
-1]);
5643 const int curr_mb_frame_flag
= !h
->mb_field_decoding_flag
;
5644 const int bottom
= (s
->mb_y
& 1);
5646 ? !curr_mb_frame_flag
// bottom macroblock
5647 : (!curr_mb_frame_flag
&& !top_mb_frame_flag
) // top macroblock
5649 h
->top_mb_xy
-= s
->mb_stride
;
5651 if (left_mb_frame_flag
!= curr_mb_frame_flag
) {
5652 h
->left_mb_xy
[0] = pair_xy
- 1;
5659 * decodes a macroblock
5660 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5662 static int decode_mb_cabac(H264Context
*h
) {
5663 MpegEncContext
* const s
= &h
->s
;
5664 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
5665 int mb_type
, partition_count
, cbp
= 0;
5666 int dct8x8_allowed
= h
->pps
.transform_8x8_mode
;
5668 s
->dsp
.clear_blocks(h
->mb
); //FIXME avoid if already clear (move after skip handlong?)
5670 tprintf("pic:%d mb:%d/%d\n", h
->frame_num
, s
->mb_x
, s
->mb_y
);
5671 if( h
->slice_type
!= I_TYPE
&& h
->slice_type
!= SI_TYPE
) {
5672 /* read skip flags */
5673 if( decode_cabac_mb_skip( h
) ) {
5676 h
->cbp_table
[mb_xy
] = 0;
5677 h
->chroma_pred_mode_table
[mb_xy
] = 0;
5678 h
->last_qscale_diff
= 0;
5684 if(h
->mb_aff_frame
){
5685 if ( ((s
->mb_y
&1) == 0) || h
->prev_mb_skipped
)
5686 h
->mb_field_decoding_flag
= decode_cabac_field_decoding_flag(h
);
5688 h
->mb_field_decoding_flag
= (s
->picture_structure
!=PICT_FRAME
);
5690 h
->prev_mb_skipped
= 0;
5692 compute_mb_neighboors(h
);
5693 if( ( mb_type
= decode_cabac_mb_type( h
) ) < 0 ) {
5694 av_log( h
->s
.avctx
, AV_LOG_ERROR
, "decode_cabac_mb_type failed\n" );
5698 if( h
->slice_type
== B_TYPE
) {
5700 partition_count
= b_mb_type_info
[mb_type
].partition_count
;
5701 mb_type
= b_mb_type_info
[mb_type
].type
;
5704 goto decode_intra_mb
;
5706 } else if( h
->slice_type
== P_TYPE
) {
5708 partition_count
= p_mb_type_info
[mb_type
].partition_count
;
5709 mb_type
= p_mb_type_info
[mb_type
].type
;
5712 goto decode_intra_mb
;
5715 assert(h
->slice_type
== I_TYPE
);
5717 partition_count
= 0;
5718 cbp
= i_mb_type_info
[mb_type
].cbp
;
5719 h
->intra16x16_pred_mode
= i_mb_type_info
[mb_type
].pred_mode
;
5720 mb_type
= i_mb_type_info
[mb_type
].type
;
5722 if(h
->mb_field_decoding_flag
)
5723 mb_type
|= MB_TYPE_INTERLACED
;
5725 h
->slice_table
[ mb_xy
]= h
->slice_num
;
5727 if(IS_INTRA_PCM(mb_type
)) {
5731 // We assume these blocks are very rare so we dont optimize it.
5732 // FIXME The two following lines get the bitstream position in the cabac
5733 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5734 ptr
= h
->cabac
.bytestream
;
5735 if (h
->cabac
.low
&0x1) ptr
-=CABAC_BITS
/8;
5737 // The pixels are stored in the same order as levels in h->mb array.
5738 for(y
=0; y
<16; y
++){
5739 const int index
= 4*(y
&3) + 32*((y
>>2)&1) + 128*(y
>>3);
5740 for(x
=0; x
<16; x
++){
5741 tprintf("LUMA ICPM LEVEL (%3d)\n", *ptr
);
5742 h
->mb
[index
+ (x
&3) + 16*((x
>>2)&1) + 64*(x
>>3)]= *ptr
++;
5746 const int index
= 256 + 4*(y
&3) + 32*(y
>>2);
5748 tprintf("CHROMA U ICPM LEVEL (%3d)\n", *ptr
);
5749 h
->mb
[index
+ (x
&3) + 16*(x
>>2)]= *ptr
++;
5753 const int index
= 256 + 64 + 4*(y
&3) + 32*(y
>>2);
5755 tprintf("CHROMA V ICPM LEVEL (%3d)\n", *ptr
);
5756 h
->mb
[index
+ (x
&3) + 16*(x
>>2)]= *ptr
++;
5760 ff_init_cabac_decoder(&h
->cabac
, ptr
, h
->cabac
.bytestream_end
- ptr
);
5762 // All blocks are present
5763 h
->cbp_table
[mb_xy
] = 0x1ef;
5764 h
->chroma_pred_mode_table
[mb_xy
] = 0;
5765 // In deblocking, the quantizer is 0
5766 s
->current_picture
.qscale_table
[mb_xy
]= 0;
5767 h
->chroma_qp
= get_chroma_qp(h
->pps
.chroma_qp_index_offset
, 0);
5768 // All coeffs are present
5769 memset(h
->non_zero_count
[mb_xy
], 16, 16);
5770 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
5774 fill_caches(h
, mb_type
, 0);
5776 if( IS_INTRA( mb_type
) ) {
5778 if( IS_INTRA4x4( mb_type
) ) {
5779 if( dct8x8_allowed
&& decode_cabac_mb_transform_size( h
) ) {
5780 mb_type
|= MB_TYPE_8x8DCT
;
5781 for( i
= 0; i
< 16; i
+=4 ) {
5782 int pred
= pred_intra_mode( h
, i
);
5783 int mode
= decode_cabac_mb_intra4x4_pred_mode( h
, pred
);
5784 fill_rectangle( &h
->intra4x4_pred_mode_cache
[ scan8
[i
] ], 2, 2, 8, mode
, 1 );
5787 for( i
= 0; i
< 16; i
++ ) {
5788 int pred
= pred_intra_mode( h
, i
);
5789 h
->intra4x4_pred_mode_cache
[ scan8
[i
] ] = decode_cabac_mb_intra4x4_pred_mode( h
, pred
);
5791 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5794 write_back_intra_pred_mode(h
);
5795 if( check_intra4x4_pred_mode(h
) < 0 ) return -1;
5797 h
->intra16x16_pred_mode
= check_intra_pred_mode( h
, h
->intra16x16_pred_mode
);
5798 if( h
->intra16x16_pred_mode
< 0 ) return -1;
5800 h
->chroma_pred_mode_table
[mb_xy
] =
5801 h
->chroma_pred_mode
= decode_cabac_mb_chroma_pre_mode( h
);
5803 h
->chroma_pred_mode
= check_intra_pred_mode( h
, h
->chroma_pred_mode
);
5804 if( h
->chroma_pred_mode
< 0 ) return -1;
5805 } else if( partition_count
== 4 ) {
5806 int i
, j
, sub_partition_count
[4], list
, ref
[2][4];
5808 if( h
->slice_type
== B_TYPE
) {
5809 for( i
= 0; i
< 4; i
++ ) {
5810 h
->sub_mb_type
[i
] = decode_cabac_b_mb_sub_type( h
);
5811 sub_partition_count
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
5812 h
->sub_mb_type
[i
]= b_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
5814 if( IS_DIRECT(h
->sub_mb_type
[0]) || IS_DIRECT(h
->sub_mb_type
[1])
5815 || IS_DIRECT(h
->sub_mb_type
[2]) || IS_DIRECT(h
->sub_mb_type
[3])) {
5816 pred_direct_motion(h
, &mb_type
);
5817 if( h
->ref_count
[0] > 1 || h
->ref_count
[1] > 1 ) {
5818 for( i
= 0; i
< 4; i
++ )
5819 if( IS_DIRECT(h
->sub_mb_type
[i
]) )
5820 fill_rectangle( &h
->direct_cache
[scan8
[4*i
]], 2, 2, 8, 1, 1 );
5824 for( i
= 0; i
< 4; i
++ ) {
5825 h
->sub_mb_type
[i
] = decode_cabac_p_mb_sub_type( h
);
5826 sub_partition_count
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].partition_count
;
5827 h
->sub_mb_type
[i
]= p_sub_mb_type_info
[ h
->sub_mb_type
[i
] ].type
;
5831 for( list
= 0; list
< 2; list
++ ) {
5832 if( h
->ref_count
[list
] > 0 ) {
5833 for( i
= 0; i
< 4; i
++ ) {
5834 if(IS_DIRECT(h
->sub_mb_type
[i
])) continue;
5835 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
)){
5836 if( h
->ref_count
[list
] > 1 )
5837 ref
[list
][i
] = decode_cabac_mb_ref( h
, list
, 4*i
);
5843 h
->ref_cache
[list
][ scan8
[4*i
]+1 ]=
5844 h
->ref_cache
[list
][ scan8
[4*i
]+8 ]=h
->ref_cache
[list
][ scan8
[4*i
]+9 ]= ref
[list
][i
];
5850 dct8x8_allowed
= get_dct8x8_allowed(h
);
5852 for(list
=0; list
<2; list
++){
5854 if(IS_DIRECT(h
->sub_mb_type
[i
])){
5855 fill_rectangle(h
->mvd_cache
[list
][scan8
[4*i
]], 2, 2, 8, 0, 4);
5858 h
->ref_cache
[list
][ scan8
[4*i
] ]=h
->ref_cache
[list
][ scan8
[4*i
]+1 ];
5860 if(IS_DIR(h
->sub_mb_type
[i
], 0, list
) && !IS_DIRECT(h
->sub_mb_type
[i
])){
5861 const int sub_mb_type
= h
->sub_mb_type
[i
];
5862 const int block_width
= (sub_mb_type
& (MB_TYPE_16x16
|MB_TYPE_16x8
)) ? 2 : 1;
5863 for(j
=0; j
<sub_partition_count
[i
]; j
++){
5866 const int index
= 4*i
+ block_width
*j
;
5867 int16_t (* mv_cache
)[2]= &h
->mv_cache
[list
][ scan8
[index
] ];
5868 int16_t (* mvd_cache
)[2]= &h
->mvd_cache
[list
][ scan8
[index
] ];
5869 pred_motion(h
, index
, block_width
, list
, h
->ref_cache
[list
][ scan8
[index
] ], &mpx
, &mpy
);
5871 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, index
, 0 );
5872 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, index
, 1 );
5873 tprintf("final mv:%d %d\n", mx
, my
);
5875 if(IS_SUB_8X8(sub_mb_type
)){
5876 mv_cache
[ 0 ][0]= mv_cache
[ 1 ][0]=
5877 mv_cache
[ 8 ][0]= mv_cache
[ 9 ][0]= mx
;
5878 mv_cache
[ 0 ][1]= mv_cache
[ 1 ][1]=
5879 mv_cache
[ 8 ][1]= mv_cache
[ 9 ][1]= my
;
5881 mvd_cache
[ 0 ][0]= mvd_cache
[ 1 ][0]=
5882 mvd_cache
[ 8 ][0]= mvd_cache
[ 9 ][0]= mx
- mpx
;
5883 mvd_cache
[ 0 ][1]= mvd_cache
[ 1 ][1]=
5884 mvd_cache
[ 8 ][1]= mvd_cache
[ 9 ][1]= my
- mpy
;
5885 }else if(IS_SUB_8X4(sub_mb_type
)){
5886 mv_cache
[ 0 ][0]= mv_cache
[ 1 ][0]= mx
;
5887 mv_cache
[ 0 ][1]= mv_cache
[ 1 ][1]= my
;
5889 mvd_cache
[ 0 ][0]= mvd_cache
[ 1 ][0]= mx
- mpx
;
5890 mvd_cache
[ 0 ][1]= mvd_cache
[ 1 ][1]= my
- mpy
;
5891 }else if(IS_SUB_4X8(sub_mb_type
)){
5892 mv_cache
[ 0 ][0]= mv_cache
[ 8 ][0]= mx
;
5893 mv_cache
[ 0 ][1]= mv_cache
[ 8 ][1]= my
;
5895 mvd_cache
[ 0 ][0]= mvd_cache
[ 8 ][0]= mx
- mpx
;
5896 mvd_cache
[ 0 ][1]= mvd_cache
[ 8 ][1]= my
- mpy
;
5898 assert(IS_SUB_4X4(sub_mb_type
));
5899 mv_cache
[ 0 ][0]= mx
;
5900 mv_cache
[ 0 ][1]= my
;
5902 mvd_cache
[ 0 ][0]= mx
- mpx
;
5903 mvd_cache
[ 0 ][1]= my
- mpy
;
5907 uint32_t *p
= (uint32_t *)&h
->mv_cache
[list
][ scan8
[4*i
] ][0];
5908 uint32_t *pd
= (uint32_t *)&h
->mvd_cache
[list
][ scan8
[4*i
] ][0];
5909 p
[0] = p
[1] = p
[8] = p
[9] = 0;
5910 pd
[0]= pd
[1]= pd
[8]= pd
[9]= 0;
5914 } else if( IS_DIRECT(mb_type
) ) {
5915 pred_direct_motion(h
, &mb_type
);
5916 fill_rectangle(h
->mvd_cache
[0][scan8
[0]], 4, 4, 8, 0, 4);
5917 fill_rectangle(h
->mvd_cache
[1][scan8
[0]], 4, 4, 8, 0, 4);
5918 dct8x8_allowed
&= h
->sps
.direct_8x8_inference_flag
;
5920 int list
, mx
, my
, i
, mpx
, mpy
;
5921 if(IS_16X16(mb_type
)){
5922 for(list
=0; list
<2; list
++){
5923 if(IS_DIR(mb_type
, 0, list
)){
5924 if(h
->ref_count
[list
] > 0 ){
5925 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 0 ) : 0;
5926 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, ref
, 1);
5929 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED
, 1);
5931 for(list
=0; list
<2; list
++){
5932 if(IS_DIR(mb_type
, 0, list
)){
5933 pred_motion(h
, 0, 4, list
, h
->ref_cache
[list
][ scan8
[0] ], &mpx
, &mpy
);
5935 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 0, 0 );
5936 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 0, 1 );
5937 tprintf("final mv:%d %d\n", mx
, my
);
5939 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] ], 4, 4, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5940 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, pack16to32(mx
,my
), 4);
5942 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] ], 4, 4, 8, 0, 4);
5945 else if(IS_16X8(mb_type
)){
5946 for(list
=0; list
<2; list
++){
5947 if(h
->ref_count
[list
]>0){
5949 if(IS_DIR(mb_type
, i
, list
)){
5950 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 8*i
) : 0;
5951 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, ref
, 1);
5953 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, (LIST_NOT_USED
&0xFF), 1);
5957 for(list
=0; list
<2; list
++){
5959 if(IS_DIR(mb_type
, i
, list
)){
5960 pred_16x8_motion(h
, 8*i
, list
, h
->ref_cache
[list
][scan8
[0] + 16*i
], &mpx
, &mpy
);
5961 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 8*i
, 0 );
5962 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 8*i
, 1 );
5963 tprintf("final mv:%d %d\n", mx
, my
);
5965 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5966 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, pack16to32(mx
,my
), 4);
5968 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, 0, 4);
5969 fill_rectangle(h
-> mv_cache
[list
][ scan8
[0] + 16*i
], 4, 2, 8, 0, 4);
5974 assert(IS_8X16(mb_type
));
5975 for(list
=0; list
<2; list
++){
5976 if(h
->ref_count
[list
]>0){
5978 if(IS_DIR(mb_type
, i
, list
)){ //FIXME optimize
5979 const int ref
= h
->ref_count
[list
] > 1 ? decode_cabac_mb_ref( h
, list
, 4*i
) : 0;
5980 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, ref
, 1);
5982 fill_rectangle(&h
->ref_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, (LIST_NOT_USED
&0xFF), 1);
5986 for(list
=0; list
<2; list
++){
5988 if(IS_DIR(mb_type
, i
, list
)){
5989 pred_8x16_motion(h
, i
*4, list
, h
->ref_cache
[list
][ scan8
[0] + 2*i
], &mpx
, &mpy
);
5990 mx
= mpx
+ decode_cabac_mb_mvd( h
, list
, 4*i
, 0 );
5991 my
= mpy
+ decode_cabac_mb_mvd( h
, list
, 4*i
, 1 );
5993 tprintf("final mv:%d %d\n", mx
, my
);
5994 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, pack16to32(mx
-mpx
,my
-mpy
), 4);
5995 fill_rectangle(h
->mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, pack16to32(mx
,my
), 4);
5997 fill_rectangle(h
->mvd_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, 0, 4);
5998 fill_rectangle(h
-> mv_cache
[list
][ scan8
[0] + 2*i
], 2, 4, 8, 0, 4);
6005 if( IS_INTER( mb_type
) ) {
6006 h
->chroma_pred_mode_table
[mb_xy
] = 0;
6007 write_back_motion( h
, mb_type
);
6010 if( !IS_INTRA16x16( mb_type
) ) {
6011 cbp
= decode_cabac_mb_cbp_luma( h
);
6012 cbp
|= decode_cabac_mb_cbp_chroma( h
) << 4;
6015 h
->cbp_table
[mb_xy
] = cbp
;
6017 if( dct8x8_allowed
&& (cbp
&15) && !IS_INTRA( mb_type
) ) {
6018 if( decode_cabac_mb_transform_size( h
) )
6019 mb_type
|= MB_TYPE_8x8DCT
;
6021 s
->current_picture
.mb_type
[mb_xy
]= mb_type
;
6023 if( cbp
|| IS_INTRA16x16( mb_type
) ) {
6024 const uint8_t *scan
, *dc_scan
;
6027 if(IS_INTERLACED(mb_type
)){
6028 scan
= s
->qscale
? h
->field_scan
: h
->field_scan_q0
;
6029 dc_scan
= luma_dc_field_scan
;
6031 scan
= s
->qscale
? h
->zigzag_scan
: h
->zigzag_scan_q0
;
6032 dc_scan
= luma_dc_zigzag_scan
;
6035 h
->last_qscale_diff
= dqp
= decode_cabac_mb_dqp( h
);
6037 if(((unsigned)s
->qscale
) > 51){
6038 if(s
->qscale
<0) s
->qscale
+= 52;
6039 else s
->qscale
-= 52;
6041 h
->chroma_qp
= get_chroma_qp(h
->pps
.chroma_qp_index_offset
, s
->qscale
);
6043 if( IS_INTRA16x16( mb_type
) ) {
6045 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6046 if( decode_cabac_residual( h
, h
->mb
, 0, 0, dc_scan
, h
->dequant4_coeff
[s
->qscale
], 16) < 0)
6049 for( i
= 0; i
< 16; i
++ ) {
6050 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6051 if( decode_cabac_residual(h
, h
->mb
+ 16*i
, 1, i
, scan
+ 1, h
->dequant4_coeff
[s
->qscale
], 15) < 0 )
6055 fill_rectangle(&h
->non_zero_count_cache
[scan8
[0]], 4, 4, 8, 0, 1);
6059 for( i8x8
= 0; i8x8
< 4; i8x8
++ ) {
6060 if( cbp
& (1<<i8x8
) ) {
6061 if( IS_8x8DCT(mb_type
) ) {
6062 if( decode_cabac_residual(h
, h
->mb
+ 64*i8x8
, 5, 4*i8x8
,
6063 zigzag_scan8x8
, h
->dequant8_coeff
[s
->qscale
], 64) < 0 )
6068 h
->mb
[64*i8x8
+i
] = (h
->mb
[64*i8x8
+i
] + 2) >> 2;
6071 for( i4x4
= 0; i4x4
< 4; i4x4
++ ) {
6072 const int index
= 4*i8x8
+ i4x4
;
6073 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6074 if( decode_cabac_residual(h
, h
->mb
+ 16*index
, 2, index
, scan
, h
->dequant4_coeff
[s
->qscale
], 16) < 0 )
6078 uint8_t * const nnz
= &h
->non_zero_count_cache
[ scan8
[4*i8x8
] ];
6079 nnz
[0] = nnz
[1] = nnz
[8] = nnz
[9] = 0;
6086 for( c
= 0; c
< 2; c
++ ) {
6087 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6088 if( decode_cabac_residual(h
, h
->mb
+ 256 + 16*4*c
, 3, c
, chroma_dc_scan
, h
->dequant4_coeff
[h
->chroma_qp
], 4) < 0)
6095 for( c
= 0; c
< 2; c
++ ) {
6096 for( i
= 0; i
< 4; i
++ ) {
6097 const int index
= 16 + 4 * c
+ i
;
6098 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6099 if( decode_cabac_residual(h
, h
->mb
+ 16*index
, 4, index
- 16, scan
+ 1, h
->dequant4_coeff
[h
->chroma_qp
], 15) < 0)
6104 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
6105 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
6106 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
6109 uint8_t * const nnz
= &h
->non_zero_count_cache
[0];
6110 fill_rectangle(&nnz
[scan8
[0]], 4, 4, 8, 0, 1);
6111 nnz
[ scan8
[16]+0 ] = nnz
[ scan8
[16]+1 ] =nnz
[ scan8
[16]+8 ] =nnz
[ scan8
[16]+9 ] =
6112 nnz
[ scan8
[20]+0 ] = nnz
[ scan8
[20]+1 ] =nnz
[ scan8
[20]+8 ] =nnz
[ scan8
[20]+9 ] = 0;
6115 s
->current_picture
.qscale_table
[mb_xy
]= s
->qscale
;
6116 write_back_non_zero_count(h
);
6122 static void filter_mb_edgev( H264Context
*h
, uint8_t *pix
, int stride
, int bS
[4], int qp
) {
6124 const int index_a
= clip( qp
+ h
->slice_alpha_c0_offset
, 0, 51 );
6125 const int alpha
= alpha_table
[index_a
];
6126 const int beta
= beta_table
[clip( qp
+ h
->slice_beta_offset
, 0, 51 )];
6131 tc
[i
] = bS
[i
] ? tc0_table
[index_a
][bS
[i
] - 1] : -1;
6132 h
->s
.dsp
.h264_h_loop_filter_luma(pix
, stride
, alpha
, beta
, tc
);
6134 /* 16px edge length, because bS=4 is triggered by being at
6135 * the edge of an intra MB, so all 4 bS are the same */
6136 for( d
= 0; d
< 16; d
++ ) {
6137 const int p0
= pix
[-1];
6138 const int p1
= pix
[-2];
6139 const int p2
= pix
[-3];
6141 const int q0
= pix
[0];
6142 const int q1
= pix
[1];
6143 const int q2
= pix
[2];
6145 if( ABS( p0
- q0
) < alpha
&&
6146 ABS( p1
- p0
) < beta
&&
6147 ABS( q1
- q0
) < beta
) {
6149 if(ABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
6150 if( ABS( p2
- p0
) < beta
)
6152 const int p3
= pix
[-4];
6154 pix
[-1] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
6155 pix
[-2] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
6156 pix
[-3] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
6159 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6161 if( ABS( q2
- q0
) < beta
)
6163 const int q3
= pix
[3];
6165 pix
[0] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
6166 pix
[1] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
6167 pix
[2] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
6170 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6174 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6175 pix
[ 0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6177 tprintf("filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, d
, p2
, p1
, p0
, q0
, q1
, q2
, pix
[-2], pix
[-1], pix
[0], pix
[1]);
6183 static void filter_mb_edgecv( H264Context
*h
, uint8_t *pix
, int stride
, int bS
[4], int qp
) {
6185 const int index_a
= clip( qp
+ h
->slice_alpha_c0_offset
, 0, 51 );
6186 const int alpha
= alpha_table
[index_a
];
6187 const int beta
= beta_table
[clip( qp
+ h
->slice_beta_offset
, 0, 51 )];
6192 tc
[i
] = bS
[i
] ? tc0_table
[index_a
][bS
[i
] - 1] + 1 : 0;
6193 h
->s
.dsp
.h264_h_loop_filter_chroma(pix
, stride
, alpha
, beta
, tc
);
6195 h
->s
.dsp
.h264_h_loop_filter_chroma_intra(pix
, stride
, alpha
, beta
);
6199 static void filter_mb_mbaff_edgev( H264Context
*h
, uint8_t *pix
, int stride
, int bS
[8], int qp
[2] ) {
6201 for( i
= 0; i
< 16; i
++, pix
+= stride
) {
6207 int bS_index
= (i
>> 1);
6208 if (h
->mb_field_decoding_flag
) {
6210 bS_index
|= (i
& 1);
6213 if( bS
[bS_index
] == 0 ) {
6217 qp_index
= h
->mb_field_decoding_flag
? (i
& 1) : (i
>> 3);
6218 index_a
= clip( qp
[qp_index
] + h
->slice_alpha_c0_offset
, 0, 51 );
6219 alpha
= alpha_table
[index_a
];
6220 beta
= beta_table
[clip( qp
[qp_index
] + h
->slice_beta_offset
, 0, 51 )];
6223 if( bS
[bS_index
] < 4 ) {
6224 const int tc0
= tc0_table
[index_a
][bS
[bS_index
] - 1];
6225 /* 4px edge length */
6226 const int p0
= pix
[-1];
6227 const int p1
= pix
[-2];
6228 const int p2
= pix
[-3];
6229 const int q0
= pix
[0];
6230 const int q1
= pix
[1];
6231 const int q2
= pix
[2];
6233 if( ABS( p0
- q0
) < alpha
&&
6234 ABS( p1
- p0
) < beta
&&
6235 ABS( q1
- q0
) < beta
) {
6239 if( ABS( p2
- p0
) < beta
) {
6240 pix
[-2] = p1
+ clip( ( p2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) - ( p1
<< 1 ) ) >> 1, -tc0
, tc0
);
6243 if( ABS( q2
- q0
) < beta
) {
6244 pix
[1] = q1
+ clip( ( q2
+ ( ( p0
+ q0
+ 1 ) >> 1 ) - ( q1
<< 1 ) ) >> 1, -tc0
, tc0
);
6248 i_delta
= clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
6249 pix
[-1] = clip_uint8( p0
+ i_delta
); /* p0' */
6250 pix
[0] = clip_uint8( q0
- i_delta
); /* q0' */
6251 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, tc
, bS
[bS_index
], pix
[-3], p1
, p0
, q0
, q1
, pix
[2], p1
, pix
[-1], pix
[0], q1
);
6254 /* 4px edge length */
6255 const int p0
= pix
[-1];
6256 const int p1
= pix
[-2];
6257 const int p2
= pix
[-3];
6259 const int q0
= pix
[0];
6260 const int q1
= pix
[1];
6261 const int q2
= pix
[2];
6263 if( ABS( p0
- q0
) < alpha
&&
6264 ABS( p1
- p0
) < beta
&&
6265 ABS( q1
- q0
) < beta
) {
6267 if(ABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
6268 if( ABS( p2
- p0
) < beta
)
6270 const int p3
= pix
[-4];
6272 pix
[-1] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
6273 pix
[-2] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
6274 pix
[-3] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
6277 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6279 if( ABS( q2
- q0
) < beta
)
6281 const int q3
= pix
[3];
6283 pix
[0] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
6284 pix
[1] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
6285 pix
[2] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
6288 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6292 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6293 pix
[ 0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6295 tprintf("filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, p2
, p1
, p0
, q0
, q1
, q2
, pix
[-3], pix
[-2], pix
[-1], pix
[0], pix
[1], pix
[2]);
6300 static void filter_mb_mbaff_edgecv( H264Context
*h
, uint8_t *pix
, int stride
, int bS
[4], int qp
[2] ) {
6302 for( i
= 0; i
< 8; i
++, pix
+= stride
) {
6310 if( bS
[bS_index
] == 0 ) {
6314 qp_index
= h
->mb_field_decoding_flag
? (i
& 1) : (i
>> 3);
6315 index_a
= clip( qp
[qp_index
] + h
->slice_alpha_c0_offset
, 0, 51 );
6316 alpha
= alpha_table
[index_a
];
6317 beta
= beta_table
[clip( qp
[qp_index
] + h
->slice_beta_offset
, 0, 51 )];
6318 if( bS
[bS_index
] < 4 ) {
6319 const int tc
= tc0_table
[index_a
][bS
[bS_index
] - 1] + 1;
6320 /* 2px edge length (because we use same bS than the one for luma) */
6321 const int p0
= pix
[-1];
6322 const int p1
= pix
[-2];
6323 const int q0
= pix
[0];
6324 const int q1
= pix
[1];
6326 if( ABS( p0
- q0
) < alpha
&&
6327 ABS( p1
- p0
) < beta
&&
6328 ABS( q1
- q0
) < beta
) {
6329 const int i_delta
= clip( (((q0
- p0
) << 2) + (p1
- q1
) + 4) >> 3, -tc
, tc
);
6331 pix
[-1] = clip_uint8( p0
+ i_delta
); /* p0' */
6332 pix
[0] = clip_uint8( q0
- i_delta
); /* q0' */
6333 tprintf("filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, qp
[qp_index
], index_a
, alpha
, beta
, tc
, bS
[bS_index
], pix
[-3], p1
, p0
, q0
, q1
, pix
[2], p1
, pix
[-1], pix
[0], q1
);
6336 const int p0
= pix
[-1];
6337 const int p1
= pix
[-2];
6338 const int q0
= pix
[0];
6339 const int q1
= pix
[1];
6341 if( ABS( p0
- q0
) < alpha
&&
6342 ABS( p1
- p0
) < beta
&&
6343 ABS( q1
- q0
) < beta
) {
6345 pix
[-1] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2; /* p0' */
6346 pix
[0] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2; /* q0' */
6347 tprintf("filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i
, pix
[-3], p1
, p0
, q0
, q1
, pix
[2], pix
[-3], pix
[-2], pix
[-1], pix
[0], pix
[1], pix
[2]);
6353 static void filter_mb_edgeh( H264Context
*h
, uint8_t *pix
, int stride
, int bS
[4], int qp
) {
6355 const int index_a
= clip( qp
+ h
->slice_alpha_c0_offset
, 0, 51 );
6356 const int alpha
= alpha_table
[index_a
];
6357 const int beta
= beta_table
[clip( qp
+ h
->slice_beta_offset
, 0, 51 )];
6358 const int pix_next
= stride
;
6363 tc
[i
] = bS
[i
] ? tc0_table
[index_a
][bS
[i
] - 1] : -1;
6364 h
->s
.dsp
.h264_v_loop_filter_luma(pix
, stride
, alpha
, beta
, tc
);
6366 /* 16px edge length, see filter_mb_edgev */
6367 for( d
= 0; d
< 16; d
++ ) {
6368 const int p0
= pix
[-1*pix_next
];
6369 const int p1
= pix
[-2*pix_next
];
6370 const int p2
= pix
[-3*pix_next
];
6371 const int q0
= pix
[0];
6372 const int q1
= pix
[1*pix_next
];
6373 const int q2
= pix
[2*pix_next
];
6375 if( ABS( p0
- q0
) < alpha
&&
6376 ABS( p1
- p0
) < beta
&&
6377 ABS( q1
- q0
) < beta
) {
6379 const int p3
= pix
[-4*pix_next
];
6380 const int q3
= pix
[ 3*pix_next
];
6382 if(ABS( p0
- q0
) < (( alpha
>> 2 ) + 2 )){
6383 if( ABS( p2
- p0
) < beta
) {
6385 pix
[-1*pix_next
] = ( p2
+ 2*p1
+ 2*p0
+ 2*q0
+ q1
+ 4 ) >> 3;
6386 pix
[-2*pix_next
] = ( p2
+ p1
+ p0
+ q0
+ 2 ) >> 2;
6387 pix
[-3*pix_next
] = ( 2*p3
+ 3*p2
+ p1
+ p0
+ q0
+ 4 ) >> 3;
6390 pix
[-1*pix_next
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6392 if( ABS( q2
- q0
) < beta
) {
6394 pix
[0*pix_next
] = ( p1
+ 2*p0
+ 2*q0
+ 2*q1
+ q2
+ 4 ) >> 3;
6395 pix
[1*pix_next
] = ( p0
+ q0
+ q1
+ q2
+ 2 ) >> 2;
6396 pix
[2*pix_next
] = ( 2*q3
+ 3*q2
+ q1
+ q0
+ p0
+ 4 ) >> 3;
6399 pix
[0*pix_next
] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6403 pix
[-1*pix_next
] = ( 2*p1
+ p0
+ q1
+ 2 ) >> 2;
6404 pix
[ 0*pix_next
] = ( 2*q1
+ q0
+ p1
+ 2 ) >> 2;
6406 tprintf("filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i
, d
, qp
, index_a
, alpha
, beta
, bS
[i
], p2
, p1
, p0
, q0
, q1
, q2
, pix
[-2*pix_next
], pix
[-pix_next
], pix
[0], pix
[pix_next
]);
6413 static void filter_mb_edgech( H264Context
*h
, uint8_t *pix
, int stride
, int bS
[4], int qp
) {
6415 const int index_a
= clip( qp
+ h
->slice_alpha_c0_offset
, 0, 51 );
6416 const int alpha
= alpha_table
[index_a
];
6417 const int beta
= beta_table
[clip( qp
+ h
->slice_beta_offset
, 0, 51 )];
6422 tc
[i
] = bS
[i
] ? tc0_table
[index_a
][bS
[i
] - 1] + 1 : 0;
6423 h
->s
.dsp
.h264_v_loop_filter_chroma(pix
, stride
, alpha
, beta
, tc
);
6425 h
->s
.dsp
.h264_v_loop_filter_chroma_intra(pix
, stride
, alpha
, beta
);
6429 static void filter_mb( H264Context
*h
, int mb_x
, int mb_y
, uint8_t *img_y
, uint8_t *img_cb
, uint8_t *img_cr
, unsigned int linesize
, unsigned int uvlinesize
) {
6430 MpegEncContext
* const s
= &h
->s
;
6431 const int mb_xy
= mb_x
+ mb_y
*s
->mb_stride
;
6432 int first_vertical_edge_done
= 0;
6434 /* FIXME: A given frame may occupy more than one position in
6435 * the reference list. So ref2frm should be populated with
6436 * frame numbers, not indices. */
6437 static const int ref2frm
[18] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
6440 // left mb is in picture
6441 && h
->slice_table
[mb_xy
-1] != 255
6442 // and current and left pair do not have the same interlaced type
6443 && (IS_INTERLACED(s
->current_picture
.mb_type
[mb_xy
]) != IS_INTERLACED(s
->current_picture
.mb_type
[mb_xy
-1]))
6444 // and left mb is in the same slice if deblocking_filter == 2
6445 && (h
->deblocking_filter
!=2 || h
->slice_table
[mb_xy
-1] == h
->slice_table
[mb_xy
])) {
6446 /* First vertical edge is different in MBAFF frames
6447 * There are 8 different bS to compute and 2 different Qp
6454 first_vertical_edge_done
= 1;
6455 for( i
= 0; i
< 8; i
++ ) {
6457 int b_idx
= 8 + 4 + 8*y
;
6458 int bn_idx
= b_idx
- 1;
6460 int mbn_xy
= h
->mb_field_decoding_flag
? h
->left_mb_xy
[i
>>2] : h
->left_mb_xy
[i
&1];
6462 if( IS_INTRA( s
->current_picture
.mb_type
[mb_xy
] ) ||
6463 IS_INTRA( s
->current_picture
.mb_type
[mbn_xy
] ) ) {
6465 } else if( h
->non_zero_count_cache
[b_idx
] != 0 ||
6466 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6467 h
->non_zero_count_cache
[bn_idx
] != 0 ) {
6472 for( l
= 0; l
< 1 + (h
->slice_type
== B_TYPE
); l
++ ) {
6473 if( ref2frm
[h
->ref_cache
[l
][b_idx
]+2] != ref2frm
[h
->ref_cache
[l
][bn_idx
]+2] ||
6474 ABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[l
][bn_idx
][0] ) >= 4 ||
6475 ABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[l
][bn_idx
][1] ) >= 4 ) {
6482 if(bS
[0]+bS
[1]+bS
[2]+bS
[3] != 0) {
6483 // Do not use s->qscale as luma quantizer because it has not the same
6484 // value in IPCM macroblocks.
6485 qp
[0] = ( s
->current_picture
.qscale_table
[mb_xy
] + s
->current_picture
.qscale_table
[h
->left_mb_xy
[0]] + 1 ) >> 1;
6486 chroma_qp
[0] = ( get_chroma_qp( h
->pps
.chroma_qp_index_offset
, s
->current_picture
.qscale_table
[mb_xy
] ) +
6487 get_chroma_qp( h
->pps
.chroma_qp_index_offset
, s
->current_picture
.qscale_table
[h
->left_mb_xy
[0]] ) + 1 ) >> 1;
6488 qp
[1] = ( s
->current_picture
.qscale_table
[mb_xy
] + s
->current_picture
.qscale_table
[h
->left_mb_xy
[1]] + 1 ) >> 1;
6489 chroma_qp
[1] = ( get_chroma_qp( h
->pps
.chroma_qp_index_offset
, s
->current_picture
.qscale_table
[mb_xy
] ) +
6490 get_chroma_qp( h
->pps
.chroma_qp_index_offset
, s
->current_picture
.qscale_table
[h
->left_mb_xy
[1]] ) + 1 ) >> 1;
6493 tprintf("filter mb:%d/%d MBAFF, QPy:%d/%d, QPc:%d/%d ls:%d uvls:%d", mb_x
, mb_y
, qp
[0], qp
[1], chroma_qp
[0], chroma_qp
[1], linesize
, uvlinesize
);
6494 { int i
; for (i
= 0; i
< 8; i
++) tprintf(" bS[%d]:%d", i
, bS
[i
]); tprintf("\n"); }
6495 filter_mb_mbaff_edgev ( h
, &img_y
[0], linesize
, bS
, qp
);
6496 filter_mb_mbaff_edgecv( h
, &img_cb
[0], uvlinesize
, bS
, chroma_qp
);
6497 filter_mb_mbaff_edgecv( h
, &img_cr
[0], uvlinesize
, bS
, chroma_qp
);
6500 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6501 for( dir
= 0; dir
< 2; dir
++ )
6504 const int mbm_xy
= dir
== 0 ? mb_xy
-1 : h
->top_mb_xy
;
6505 int start
= h
->slice_table
[mbm_xy
] == 255 ? 1 : 0;
6507 if (first_vertical_edge_done
) {
6509 first_vertical_edge_done
= 0;
6512 if (h
->deblocking_filter
==2 && h
->slice_table
[mbm_xy
] != h
->slice_table
[mb_xy
])
6516 for( edge
= start
; edge
< 4; edge
++ ) {
6517 /* mbn_xy: neighbor macroblock */
6518 int mbn_xy
= edge
> 0 ? mb_xy
: mbm_xy
;
6522 if( (edge
&1) && IS_8x8DCT(s
->current_picture
.mb_type
[mb_xy
]) )
6525 if (h
->mb_aff_frame
&& (dir
== 1) && (edge
== 0) && ((mb_y
& 1) == 0)
6526 && !IS_INTERLACED(s
->current_picture
.mb_type
[mb_xy
])
6527 && IS_INTERLACED(s
->current_picture
.mb_type
[mbn_xy
])
6529 // This is a special case in the norm where the filtering must
6530 // be done twice (one each of the field) even if we are in a
6531 // frame macroblock.
6533 unsigned int tmp_linesize
= 2 * linesize
;
6534 unsigned int tmp_uvlinesize
= 2 * uvlinesize
;
6535 int mbn_xy
= mb_xy
- 2 * s
->mb_stride
;
6539 if( IS_INTRA( s
->current_picture
.mb_type
[mb_xy
] ) ||
6540 IS_INTRA( s
->current_picture
.mb_type
[mbn_xy
] ) ) {
6541 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 3;
6547 // Do not use s->qscale as luma quantizer because it has not the same
6548 // value in IPCM macroblocks.
6549 qp
= ( s
->current_picture
.qscale_table
[mb_xy
] + s
->current_picture
.qscale_table
[mbn_xy
] + 1 ) >> 1;
6550 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x
, mb_y
, dir
, edge
, qp
, tmp_linesize
, tmp_uvlinesize
);
6551 { int i
; for (i
= 0; i
< 4; i
++) tprintf(" bS[%d]:%d", i
, bS
[i
]); tprintf("\n"); }
6552 filter_mb_edgeh( h
, &img_y
[0], tmp_linesize
, bS
, qp
);
6553 chroma_qp
= ( h
->chroma_qp
+
6554 get_chroma_qp( h
->pps
.chroma_qp_index_offset
, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1;
6555 filter_mb_edgech( h
, &img_cb
[0], tmp_uvlinesize
, bS
, chroma_qp
);
6556 filter_mb_edgech( h
, &img_cr
[0], tmp_uvlinesize
, bS
, chroma_qp
);
6559 mbn_xy
+= s
->mb_stride
;
6560 if( IS_INTRA( s
->current_picture
.mb_type
[mb_xy
] ) ||
6561 IS_INTRA( s
->current_picture
.mb_type
[mbn_xy
] ) ) {
6562 bS
[0] = bS
[1] = bS
[2] = bS
[3] = 3;
6568 // Do not use s->qscale as luma quantizer because it has not the same
6569 // value in IPCM macroblocks.
6570 qp
= ( s
->current_picture
.qscale_table
[mb_xy
] + s
->current_picture
.qscale_table
[mbn_xy
] + 1 ) >> 1;
6571 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x
, mb_y
, dir
, edge
, qp
, tmp_linesize
, tmp_uvlinesize
);
6572 { int i
; for (i
= 0; i
< 4; i
++) tprintf(" bS[%d]:%d", i
, bS
[i
]); tprintf("\n"); }
6573 filter_mb_edgeh( h
, &img_y
[linesize
], tmp_linesize
, bS
, qp
);
6574 chroma_qp
= ( h
->chroma_qp
+
6575 get_chroma_qp( h
->pps
.chroma_qp_index_offset
, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1;
6576 filter_mb_edgech( h
, &img_cb
[uvlinesize
], tmp_uvlinesize
, bS
, chroma_qp
);
6577 filter_mb_edgech( h
, &img_cr
[uvlinesize
], tmp_uvlinesize
, bS
, chroma_qp
);
6580 if( IS_INTRA( s
->current_picture
.mb_type
[mb_xy
] ) ||
6581 IS_INTRA( s
->current_picture
.mb_type
[mbn_xy
] ) ) {
6584 if ( (!IS_INTERLACED(s
->current_picture
.mb_type
[mb_xy
]) && !IS_INTERLACED(s
->current_picture
.mb_type
[mbm_xy
]))
6585 || ((h
->mb_aff_frame
|| (s
->picture_structure
!= PICT_FRAME
)) && (dir
== 0))
6594 bS
[0] = bS
[1] = bS
[2] = bS
[3] = value
;
6597 for( i
= 0; i
< 4; i
++ ) {
6598 int x
= dir
== 0 ? edge
: i
;
6599 int y
= dir
== 0 ? i
: edge
;
6600 int b_idx
= 8 + 4 + x
+ 8*y
;
6601 int bn_idx
= b_idx
- (dir
? 8:1);
6603 if( h
->non_zero_count_cache
[b_idx
] != 0 ||
6604 h
->non_zero_count_cache
[bn_idx
] != 0 ) {
6611 for( l
= 0; l
< 1 + (h
->slice_type
== B_TYPE
); l
++ ) {
6612 if( ref2frm
[h
->ref_cache
[l
][b_idx
]+2] != ref2frm
[h
->ref_cache
[l
][bn_idx
]+2] ||
6613 ABS( h
->mv_cache
[l
][b_idx
][0] - h
->mv_cache
[l
][bn_idx
][0] ) >= 4 ||
6614 ABS( h
->mv_cache
[l
][b_idx
][1] - h
->mv_cache
[l
][bn_idx
][1] ) >= 4 ) {
6622 if(bS
[0]+bS
[1]+bS
[2]+bS
[3] == 0)
6627 // Do not use s->qscale as luma quantizer because it has not the same
6628 // value in IPCM macroblocks.
6629 qp
= ( s
->current_picture
.qscale_table
[mb_xy
] + s
->current_picture
.qscale_table
[mbn_xy
] + 1 ) >> 1;
6630 //tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6631 tprintf("filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x
, mb_y
, dir
, edge
, qp
, linesize
, uvlinesize
);
6632 { int i
; for (i
= 0; i
< 4; i
++) tprintf(" bS[%d]:%d", i
, bS
[i
]); tprintf("\n"); }
6634 filter_mb_edgev( h
, &img_y
[4*edge
], linesize
, bS
, qp
);
6635 if( (edge
&1) == 0 ) {
6636 int chroma_qp
= ( h
->chroma_qp
+
6637 get_chroma_qp( h
->pps
.chroma_qp_index_offset
, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1;
6638 filter_mb_edgecv( h
, &img_cb
[2*edge
], uvlinesize
, bS
, chroma_qp
);
6639 filter_mb_edgecv( h
, &img_cr
[2*edge
], uvlinesize
, bS
, chroma_qp
);
6642 filter_mb_edgeh( h
, &img_y
[4*edge
*linesize
], linesize
, bS
, qp
);
6643 if( (edge
&1) == 0 ) {
6644 int chroma_qp
= ( h
->chroma_qp
+
6645 get_chroma_qp( h
->pps
.chroma_qp_index_offset
, s
->current_picture
.qscale_table
[mbn_xy
] ) + 1 ) >> 1;
6646 filter_mb_edgech( h
, &img_cb
[2*edge
*uvlinesize
], uvlinesize
, bS
, chroma_qp
);
6647 filter_mb_edgech( h
, &img_cr
[2*edge
*uvlinesize
], uvlinesize
, bS
, chroma_qp
);
6654 static int decode_slice(H264Context
*h
){
6655 MpegEncContext
* const s
= &h
->s
;
6656 const int part_mask
= s
->partitioned_frame
? (AC_END
|AC_ERROR
) : 0x7F;
6660 if( h
->pps
.cabac
) {
6664 align_get_bits( &s
->gb
);
6667 ff_init_cabac_states( &h
->cabac
, ff_h264_lps_range
, ff_h264_mps_state
, ff_h264_lps_state
, 64 );
6668 ff_init_cabac_decoder( &h
->cabac
,
6669 s
->gb
.buffer
+ get_bits_count(&s
->gb
)/8,
6670 ( s
->gb
.size_in_bits
- get_bits_count(&s
->gb
) + 7)/8);
6671 /* calculate pre-state */
6672 for( i
= 0; i
< 460; i
++ ) {
6674 if( h
->slice_type
== I_TYPE
)
6675 pre
= clip( ((cabac_context_init_I
[i
][0] * s
->qscale
) >>4 ) + cabac_context_init_I
[i
][1], 1, 126 );
6677 pre
= clip( ((cabac_context_init_PB
[h
->cabac_init_idc
][i
][0] * s
->qscale
) >>4 ) + cabac_context_init_PB
[h
->cabac_init_idc
][i
][1], 1, 126 );
6680 h
->cabac_state
[i
] = 2 * ( 63 - pre
) + 0;
6682 h
->cabac_state
[i
] = 2 * ( pre
- 64 ) + 1;
6686 int ret
= decode_mb_cabac(h
);
6689 if(ret
>=0) hl_decode_mb(h
);
6691 /* XXX: useless as decode_mb_cabac it doesn't support that ... */
6692 if( ret
>= 0 && h
->mb_aff_frame
) { //FIXME optimal? or let mb_decode decode 16x32 ?
6695 if(ret
>=0) ret
= decode_mb_cabac(h
);
6700 eos
= get_cabac_terminate( &h
->cabac
);
6702 if( ret
< 0 || h
->cabac
.bytestream
> h
->cabac
.bytestream_end
+ 1) {
6703 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d\n", s
->mb_x
, s
->mb_y
);
6704 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6708 if( ++s
->mb_x
>= s
->mb_width
) {
6710 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6712 if(h
->mb_aff_frame
) {
6717 if( eos
|| s
->mb_y
>= s
->mb_height
) {
6718 tprintf("slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6719 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6726 int ret
= decode_mb_cavlc(h
);
6728 if(ret
>=0) hl_decode_mb(h
);
6730 if(ret
>=0 && h
->mb_aff_frame
){ //FIXME optimal? or let mb_decode decode 16x32 ?
6732 ret
= decode_mb_cavlc(h
);
6734 if(ret
>=0) hl_decode_mb(h
);
6739 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "error while decoding MB %d %d\n", s
->mb_x
, s
->mb_y
);
6740 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6745 if(++s
->mb_x
>= s
->mb_width
){
6747 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6749 if(h
->mb_aff_frame
) {
6752 if(s
->mb_y
>= s
->mb_height
){
6753 tprintf("slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6755 if(get_bits_count(&s
->gb
) == s
->gb
.size_in_bits
) {
6756 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6760 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6767 if(get_bits_count(&s
->gb
) >= s
->gb
.size_in_bits
&& s
->mb_skip_run
<=0){
6768 tprintf("slice end %d %d\n", get_bits_count(&s
->gb
), s
->gb
.size_in_bits
);
6769 if(get_bits_count(&s
->gb
) == s
->gb
.size_in_bits
){
6770 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6774 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6783 for(;s
->mb_y
< s
->mb_height
; s
->mb_y
++){
6784 for(;s
->mb_x
< s
->mb_width
; s
->mb_x
++){
6785 int ret
= decode_mb(h
);
6790 fprintf(stderr
, "error while decoding MB %d %d\n", s
->mb_x
, s
->mb_y
);
6791 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6796 if(++s
->mb_x
>= s
->mb_width
){
6798 if(++s
->mb_y
>= s
->mb_height
){
6799 if(get_bits_count(s
->gb
) == s
->gb
.size_in_bits
){
6800 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6804 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6811 if(get_bits_count(s
->?gb
) >= s
->gb
?.size_in_bits
){
6812 if(get_bits_count(s
->gb
) == s
->gb
.size_in_bits
){
6813 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
-1, s
->mb_y
, (AC_END
|DC_END
|MV_END
)&part_mask
);
6817 ff_er_add_slice(s
, s
->resync_mb_x
, s
->resync_mb_y
, s
->mb_x
, s
->mb_y
, (AC_ERROR
|DC_ERROR
|MV_ERROR
)&part_mask
);
6824 ff_draw_horiz_band(s
, 16*s
->mb_y
, 16);
6827 return -1; //not reached
6830 static inline void decode_hrd_parameters(H264Context
*h
, SPS
*sps
){
6831 MpegEncContext
* const s
= &h
->s
;
6833 cpb_count
= get_ue_golomb(&s
->gb
) + 1;
6834 get_bits(&s
->gb
, 4); /* bit_rate_scale */
6835 get_bits(&s
->gb
, 4); /* cpb_size_scale */
6836 for(i
=0; i
<cpb_count
; i
++){
6837 get_ue_golomb(&s
->gb
); /* bit_rate_value_minus1 */
6838 get_ue_golomb(&s
->gb
); /* cpb_size_value_minus1 */
6839 get_bits1(&s
->gb
); /* cbr_flag */
6841 get_bits(&s
->gb
, 5); /* initial_cpb_removal_delay_length_minus1 */
6842 get_bits(&s
->gb
, 5); /* cpb_removal_delay_length_minus1 */
6843 get_bits(&s
->gb
, 5); /* dpb_output_delay_length_minus1 */
6844 get_bits(&s
->gb
, 5); /* time_offset_length */
6847 static inline int decode_vui_parameters(H264Context
*h
, SPS
*sps
){
6848 MpegEncContext
* const s
= &h
->s
;
6849 int aspect_ratio_info_present_flag
, aspect_ratio_idc
;
6850 int nal_hrd_parameters_present_flag
, vcl_hrd_parameters_present_flag
;
6852 aspect_ratio_info_present_flag
= get_bits1(&s
->gb
);
6854 if( aspect_ratio_info_present_flag
) {
6855 aspect_ratio_idc
= get_bits(&s
->gb
, 8);
6856 if( aspect_ratio_idc
== EXTENDED_SAR
) {
6857 sps
->sar
.num
= get_bits(&s
->gb
, 16);
6858 sps
->sar
.den
= get_bits(&s
->gb
, 16);
6859 }else if(aspect_ratio_idc
< 16){
6860 sps
->sar
= pixel_aspect
[aspect_ratio_idc
];
6862 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal aspect ratio\n");
6869 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
6871 if(get_bits1(&s
->gb
)){ /* overscan_info_present_flag */
6872 get_bits1(&s
->gb
); /* overscan_appropriate_flag */
6875 if(get_bits1(&s
->gb
)){ /* video_signal_type_present_flag */
6876 get_bits(&s
->gb
, 3); /* video_format */
6877 get_bits1(&s
->gb
); /* video_full_range_flag */
6878 if(get_bits1(&s
->gb
)){ /* colour_description_present_flag */
6879 get_bits(&s
->gb
, 8); /* colour_primaries */
6880 get_bits(&s
->gb
, 8); /* transfer_characteristics */
6881 get_bits(&s
->gb
, 8); /* matrix_coefficients */
6885 if(get_bits1(&s
->gb
)){ /* chroma_location_info_present_flag */
6886 get_ue_golomb(&s
->gb
); /* chroma_sample_location_type_top_field */
6887 get_ue_golomb(&s
->gb
); /* chroma_sample_location_type_bottom_field */
6890 sps
->timing_info_present_flag
= get_bits1(&s
->gb
);
6891 if(sps
->timing_info_present_flag
){
6892 sps
->num_units_in_tick
= get_bits_long(&s
->gb
, 32);
6893 sps
->time_scale
= get_bits_long(&s
->gb
, 32);
6894 sps
->fixed_frame_rate_flag
= get_bits1(&s
->gb
);
6897 nal_hrd_parameters_present_flag
= get_bits1(&s
->gb
);
6898 if(nal_hrd_parameters_present_flag
)
6899 decode_hrd_parameters(h
, sps
);
6900 vcl_hrd_parameters_present_flag
= get_bits1(&s
->gb
);
6901 if(vcl_hrd_parameters_present_flag
)
6902 decode_hrd_parameters(h
, sps
);
6903 if(nal_hrd_parameters_present_flag
|| vcl_hrd_parameters_present_flag
)
6904 get_bits1(&s
->gb
); /* low_delay_hrd_flag */
6905 get_bits1(&s
->gb
); /* pic_struct_present_flag */
6907 sps
->bitstream_restriction_flag
= get_bits1(&s
->gb
);
6908 if(sps
->bitstream_restriction_flag
){
6909 get_bits1(&s
->gb
); /* motion_vectors_over_pic_boundaries_flag */
6910 get_ue_golomb(&s
->gb
); /* max_bytes_per_pic_denom */
6911 get_ue_golomb(&s
->gb
); /* max_bits_per_mb_denom */
6912 get_ue_golomb(&s
->gb
); /* log2_max_mv_length_horizontal */
6913 get_ue_golomb(&s
->gb
); /* log2_max_mv_length_vertical */
6914 sps
->num_reorder_frames
= get_ue_golomb(&s
->gb
);
6915 get_ue_golomb(&s
->gb
); /* max_dec_frame_buffering */
6921 static inline int decode_seq_parameter_set(H264Context
*h
){
6922 MpegEncContext
* const s
= &h
->s
;
6923 int profile_idc
, level_idc
;
6927 profile_idc
= get_bits(&s
->gb
, 8);
6928 get_bits1(&s
->gb
); //constraint_set0_flag
6929 get_bits1(&s
->gb
); //constraint_set1_flag
6930 get_bits1(&s
->gb
); //constraint_set2_flag
6931 get_bits1(&s
->gb
); //constraint_set3_flag
6932 get_bits(&s
->gb
, 4); // reserved
6933 level_idc
= get_bits(&s
->gb
, 8);
6934 sps_id
= get_ue_golomb(&s
->gb
);
6936 sps
= &h
->sps_buffer
[ sps_id
];
6937 sps
->profile_idc
= profile_idc
;
6938 sps
->level_idc
= level_idc
;
6940 if(sps
->profile_idc
>= 100){ //high profile
6941 if(get_ue_golomb(&s
->gb
) == 3) //chroma_format_idc
6942 get_bits1(&s
->gb
); //residual_color_transform_flag
6943 get_ue_golomb(&s
->gb
); //bit_depth_luma_minus8
6944 get_ue_golomb(&s
->gb
); //bit_depth_chroma_minus8
6945 sps
->transform_bypass
= get_bits1(&s
->gb
);
6946 if(get_bits1(&s
->gb
)){ //seq_scaling_matrix_present_flag
6947 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "custom scaling matrix not implemented\n");
6952 sps
->log2_max_frame_num
= get_ue_golomb(&s
->gb
) + 4;
6953 sps
->poc_type
= get_ue_golomb(&s
->gb
);
6955 if(sps
->poc_type
== 0){ //FIXME #define
6956 sps
->log2_max_poc_lsb
= get_ue_golomb(&s
->gb
) + 4;
6957 } else if(sps
->poc_type
== 1){//FIXME #define
6958 sps
->delta_pic_order_always_zero_flag
= get_bits1(&s
->gb
);
6959 sps
->offset_for_non_ref_pic
= get_se_golomb(&s
->gb
);
6960 sps
->offset_for_top_to_bottom_field
= get_se_golomb(&s
->gb
);
6961 sps
->poc_cycle_length
= get_ue_golomb(&s
->gb
);
6963 for(i
=0; i
<sps
->poc_cycle_length
; i
++)
6964 sps
->offset_for_ref_frame
[i
]= get_se_golomb(&s
->gb
);
6966 if(sps
->poc_type
> 2){
6967 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "illegal POC type %d\n", sps
->poc_type
);
6971 sps
->ref_frame_count
= get_ue_golomb(&s
->gb
);
6972 if(sps
->ref_frame_count
> MAX_PICTURE_COUNT
-2){
6973 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "too many reference frames\n");
6975 sps
->gaps_in_frame_num_allowed_flag
= get_bits1(&s
->gb
);
6976 sps
->mb_width
= get_ue_golomb(&s
->gb
) + 1;
6977 sps
->mb_height
= get_ue_golomb(&s
->gb
) + 1;
6978 if((unsigned)sps
->mb_width
>= INT_MAX
/16 || (unsigned)sps
->mb_height
>= INT_MAX
/16 ||
6979 avcodec_check_dimensions(NULL
, 16*sps
->mb_width
, 16*sps
->mb_height
))
6982 sps
->frame_mbs_only_flag
= get_bits1(&s
->gb
);
6983 if(!sps
->frame_mbs_only_flag
)
6984 sps
->mb_aff
= get_bits1(&s
->gb
);
6988 sps
->direct_8x8_inference_flag
= get_bits1(&s
->gb
);
6990 sps
->crop
= get_bits1(&s
->gb
);
6992 sps
->crop_left
= get_ue_golomb(&s
->gb
);
6993 sps
->crop_right
= get_ue_golomb(&s
->gb
);
6994 sps
->crop_top
= get_ue_golomb(&s
->gb
);
6995 sps
->crop_bottom
= get_ue_golomb(&s
->gb
);
6996 if(sps
->crop_left
|| sps
->crop_top
){
6997 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "insane cropping not completely supported, this could look slightly wrong ...\n");
7003 sps
->crop_bottom
= 0;
7006 sps
->vui_parameters_present_flag
= get_bits1(&s
->gb
);
7007 if( sps
->vui_parameters_present_flag
)
7008 decode_vui_parameters(h
, sps
);
7010 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
7011 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7012 sps_id
, sps
->profile_idc
, sps
->level_idc
,
7014 sps
->ref_frame_count
,
7015 sps
->mb_width
, sps
->mb_height
,
7016 sps
->frame_mbs_only_flag
? "FRM" : (sps
->mb_aff
? "MB-AFF" : "PIC-AFF"),
7017 sps
->direct_8x8_inference_flag
? "8B8" : "",
7018 sps
->crop_left
, sps
->crop_right
,
7019 sps
->crop_top
, sps
->crop_bottom
,
7020 sps
->vui_parameters_present_flag
? "VUI" : ""
7026 static inline int decode_picture_parameter_set(H264Context
*h
, int bit_length
){
7027 MpegEncContext
* const s
= &h
->s
;
7028 int pps_id
= get_ue_golomb(&s
->gb
);
7029 PPS
*pps
= &h
->pps_buffer
[pps_id
];
7031 pps
->sps_id
= get_ue_golomb(&s
->gb
);
7032 pps
->cabac
= get_bits1(&s
->gb
);
7033 pps
->pic_order_present
= get_bits1(&s
->gb
);
7034 pps
->slice_group_count
= get_ue_golomb(&s
->gb
) + 1;
7035 if(pps
->slice_group_count
> 1 ){
7036 pps
->mb_slice_group_map_type
= get_ue_golomb(&s
->gb
);
7037 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "FMO not supported\n");
7038 switch(pps
->mb_slice_group_map_type
){
7041 | for( i
= 0; i
<= num_slice_groups_minus1
; i
++ ) | | |
7042 | run_length
[ i
] |1 |ue(v
) |
7047 | for( i
= 0; i
< num_slice_groups_minus1
; i
++ ) | | |
7049 | top_left_mb
[ i
] |1 |ue(v
) |
7050 | bottom_right_mb
[ i
] |1 |ue(v
) |
7058 | slice_group_change_direction_flag
|1 |u(1) |
7059 | slice_group_change_rate_minus1
|1 |ue(v
) |
7064 | slice_group_id_cnt_minus1
|1 |ue(v
) |
7065 | for( i
= 0; i
<= slice_group_id_cnt_minus1
; i
++ | | |
7067 | slice_group_id
[ i
] |1 |u(v
) |
7072 pps
->ref_count
[0]= get_ue_golomb(&s
->gb
) + 1;
7073 pps
->ref_count
[1]= get_ue_golomb(&s
->gb
) + 1;
7074 if(pps
->ref_count
[0] > 32 || pps
->ref_count
[1] > 32){
7075 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "reference overflow (pps)\n");
7079 pps
->weighted_pred
= get_bits1(&s
->gb
);
7080 pps
->weighted_bipred_idc
= get_bits(&s
->gb
, 2);
7081 pps
->init_qp
= get_se_golomb(&s
->gb
) + 26;
7082 pps
->init_qs
= get_se_golomb(&s
->gb
) + 26;
7083 pps
->chroma_qp_index_offset
= get_se_golomb(&s
->gb
);
7084 pps
->deblocking_filter_parameters_present
= get_bits1(&s
->gb
);
7085 pps
->constrained_intra_pred
= get_bits1(&s
->gb
);
7086 pps
->redundant_pic_cnt_present
= get_bits1(&s
->gb
);
7088 if(get_bits_count(&s
->gb
) < bit_length
){
7089 pps
->transform_8x8_mode
= get_bits1(&s
->gb
);
7090 if(get_bits1(&s
->gb
)){ //pic_scaling_matrix_present_flag
7091 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "custom scaling matrix not implemented\n");
7094 get_se_golomb(&s
->gb
); //second_chroma_qp_index_offset
7097 if(s
->avctx
->debug
&FF_DEBUG_PICT_INFO
){
7098 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
7099 pps_id
, pps
->sps_id
,
7100 pps
->cabac
? "CABAC" : "CAVLC",
7101 pps
->slice_group_count
,
7102 pps
->ref_count
[0], pps
->ref_count
[1],
7103 pps
->weighted_pred
? "weighted" : "",
7104 pps
->init_qp
, pps
->init_qs
, pps
->chroma_qp_index_offset
,
7105 pps
->deblocking_filter_parameters_present
? "LPAR" : "",
7106 pps
->constrained_intra_pred
? "CONSTR" : "",
7107 pps
->redundant_pic_cnt_present
? "REDU" : "",
7108 pps
->transform_8x8_mode
? "8x8DCT" : ""
7116 * finds the end of the current frame in the bitstream.
7117 * @return the position of the first byte of the next frame, or -1
7119 static int find_frame_end(H264Context
*h
, const uint8_t *buf
, int buf_size
){
7122 ParseContext
*pc
= &(h
->s
.parse_context
);
7123 //printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
7124 // mb_addr= pc->mb_addr - 1;
7126 for(i
=0; i
<=buf_size
; i
++){
7127 if((state
&0xFFFFFF1F) == 0x101 || (state
&0xFFFFFF1F) == 0x102 || (state
&0xFFFFFF1F) == 0x105){
7128 tprintf("find_frame_end new startcode = %08x, frame_start_found = %d, pos = %d\n", state
, pc
->frame_start_found
, i
);
7129 if(pc
->frame_start_found
){
7130 // If there isn't one more byte in the buffer
7131 // the test on first_mb_in_slice cannot be done yet
7132 // do it at next call.
7133 if (i
>= buf_size
) break;
7134 if (buf
[i
] & 0x80) {
7135 // first_mb_in_slice is 0, probably the first nal of a new
7137 tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state
, i
);
7139 pc
->frame_start_found
= 0;
7143 pc
->frame_start_found
= 1;
7145 if((state
&0xFFFFFF1F) == 0x107 || (state
&0xFFFFFF1F) == 0x108 || (state
&0xFFFFFF1F) == 0x109){
7146 if(pc
->frame_start_found
){
7148 pc
->frame_start_found
= 0;
7153 state
= (state
<<8) | buf
[i
];
7157 return END_NOT_FOUND
;
7160 static int h264_parse(AVCodecParserContext
*s
,
7161 AVCodecContext
*avctx
,
7162 uint8_t **poutbuf
, int *poutbuf_size
,
7163 const uint8_t *buf
, int buf_size
)
7165 H264Context
*h
= s
->priv_data
;
7166 ParseContext
*pc
= &h
->s
.parse_context
;
7169 next
= find_frame_end(h
, buf
, buf_size
);
7171 if (ff_combine_frame(pc
, next
, (uint8_t **)&buf
, &buf_size
) < 0) {
7177 *poutbuf
= (uint8_t *)buf
;
7178 *poutbuf_size
= buf_size
;
7182 static int h264_split(AVCodecContext
*avctx
,
7183 const uint8_t *buf
, int buf_size
)
7186 uint32_t state
= -1;
7189 for(i
=0; i
<=buf_size
; i
++){
7190 if((state
&0xFFFFFF1F) == 0x107)
7192 /* if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
7194 if((state
&0xFFFFFF00) == 0x100 && (state
&0xFFFFFF1F) != 0x107 && (state
&0xFFFFFF1F) != 0x108 && (state
&0xFFFFFF1F) != 0x109){
7196 while(i
>4 && buf
[i
-5]==0) i
--;
7201 state
= (state
<<8) | buf
[i
];
7207 static int decode_nal_units(H264Context
*h
, uint8_t *buf
, int buf_size
){
7208 MpegEncContext
* const s
= &h
->s
;
7209 AVCodecContext
* const avctx
= s
->avctx
;
7213 for(i
=0; i
<32; i
++){
7214 printf("%X ", buf
[i
]);
7218 s
->current_picture_ptr
= NULL
;
7227 if(buf_index
>= buf_size
) break;
7229 for(i
= 0; i
< h
->nal_length_size
; i
++)
7230 nalsize
= (nalsize
<< 8) | buf
[buf_index
++];
7232 // start code prefix search
7233 for(; buf_index
+ 3 < buf_size
; buf_index
++){
7234 // this should allways succeed in the first iteration
7235 if(buf
[buf_index
] == 0 && buf
[buf_index
+1] == 0 && buf
[buf_index
+2] == 1)
7239 if(buf_index
+3 >= buf_size
) break;
7244 ptr
= decode_nal(h
, buf
+ buf_index
, &dst_length
, &consumed
, h
->is_avc
? nalsize
: buf_size
- buf_index
);
7245 if(ptr
[dst_length
- 1] == 0) dst_length
--;
7246 bit_length
= 8*dst_length
- decode_rbsp_trailing(ptr
+ dst_length
- 1);
7248 if(s
->avctx
->debug
&FF_DEBUG_STARTCODE
){
7249 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "NAL %d at %d/%d length %d\n", h
->nal_unit_type
, buf_index
, buf_size
, dst_length
);
7252 if (h
->is_avc
&& (nalsize
!= consumed
))
7253 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "AVC: Consumed only %d bytes instead of %d\n", consumed
, nalsize
);
7255 buf_index
+= consumed
;
7257 if( (s
->hurry_up
== 1 && h
->nal_ref_idc
== 0)
7258 ||(avctx
->skip_frame
>= AVDISCARD_NONREF
&& h
->nal_ref_idc
== 0))
7261 switch(h
->nal_unit_type
){
7263 idr(h
); //FIXME ensure we don't loose some frames if there is reordering
7265 init_get_bits(&s
->gb
, ptr
, bit_length
);
7267 h
->inter_gb_ptr
= &s
->gb
;
7268 s
->data_partitioning
= 0;
7270 if(decode_slice_header(h
) < 0){
7271 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "decode_slice_header error\n");
7274 if(h
->redundant_pic_count
==0 && s
->hurry_up
< 5
7275 && (avctx
->skip_frame
< AVDISCARD_NONREF
|| h
->nal_ref_idc
)
7276 && (avctx
->skip_frame
< AVDISCARD_BIDIR
|| h
->slice_type
!=B_TYPE
)
7277 && (avctx
->skip_frame
< AVDISCARD_NONKEY
|| h
->slice_type
==I_TYPE
)
7278 && avctx
->skip_frame
< AVDISCARD_ALL
)
7282 init_get_bits(&s
->gb
, ptr
, bit_length
);
7284 h
->inter_gb_ptr
= NULL
;
7285 s
->data_partitioning
= 1;
7287 if(decode_slice_header(h
) < 0){
7288 av_log(h
->s
.avctx
, AV_LOG_ERROR
, "decode_slice_header error\n");
7292 init_get_bits(&h
->intra_gb
, ptr
, bit_length
);
7293 h
->intra_gb_ptr
= &h
->intra_gb
;
7296 init_get_bits(&h
->inter_gb
, ptr
, bit_length
);
7297 h
->inter_gb_ptr
= &h
->inter_gb
;
7299 if(h
->redundant_pic_count
==0 && h
->intra_gb_ptr
&& s
->data_partitioning
7301 && (avctx
->skip_frame
< AVDISCARD_NONREF
|| h
->nal_ref_idc
)
7302 && (avctx
->skip_frame
< AVDISCARD_BIDIR
|| h
->slice_type
!=B_TYPE
)
7303 && (avctx
->skip_frame
< AVDISCARD_NONKEY
|| h
->slice_type
==I_TYPE
)
7304 && avctx
->skip_frame
< AVDISCARD_ALL
)
7310 init_get_bits(&s
->gb
, ptr
, bit_length
);
7311 decode_seq_parameter_set(h
);
7313 if(s
->flags
& CODEC_FLAG_LOW_DELAY
)
7316 if(avctx
->has_b_frames
< 2)
7317 avctx
->has_b_frames
= !s
->low_delay
;
7320 init_get_bits(&s
->gb
, ptr
, bit_length
);
7322 decode_picture_parameter_set(h
, bit_length
);
7325 case NAL_PICTURE_DELIMITER
:
7327 case NAL_FILTER_DATA
:
7330 av_log(avctx
, AV_LOG_ERROR
, "Unknown NAL code: %d\n", h
->nal_unit_type
);
7334 if(!s
->current_picture_ptr
) return buf_index
; //no frame
7336 s
->current_picture_ptr
->pict_type
= s
->pict_type
;
7337 s
->current_picture_ptr
->key_frame
= s
->pict_type
== I_TYPE
&& h
->nal_unit_type
== NAL_IDR_SLICE
;
7339 h
->prev_frame_num_offset
= h
->frame_num_offset
;
7340 h
->prev_frame_num
= h
->frame_num
;
7341 if(s
->current_picture_ptr
->reference
){
7342 h
->prev_poc_msb
= h
->poc_msb
;
7343 h
->prev_poc_lsb
= h
->poc_lsb
;
7345 if(s
->current_picture_ptr
->reference
)
7346 execute_ref_pic_marking(h
, h
->mmco
, h
->mmco_index
);
7356 * returns the number of bytes consumed for building the current frame
7358 static int get_consumed_bytes(MpegEncContext
*s
, int pos
, int buf_size
){
7359 if(s
->flags
&CODEC_FLAG_TRUNCATED
){
7360 pos
-= s
->parse_context
.last_index
;
7361 if(pos
<0) pos
=0; // FIXME remove (unneeded?)
7365 if(pos
==0) pos
=1; //avoid infinite loops (i doubt thats needed but ...)
7366 if(pos
+10>buf_size
) pos
=buf_size
; // oops ;)
7372 static int decode_frame(AVCodecContext
*avctx
,
7373 void *data
, int *data_size
,
7374 uint8_t *buf
, int buf_size
)
7376 H264Context
*h
= avctx
->priv_data
;
7377 MpegEncContext
*s
= &h
->s
;
7378 AVFrame
*pict
= data
;
7381 s
->flags
= avctx
->flags
;
7382 s
->flags2
= avctx
->flags2
;
7384 /* no supplementary picture */
7385 if (buf_size
== 0) {
7389 if(s
->flags
&CODEC_FLAG_TRUNCATED
){
7390 int next
= find_frame_end(h
, buf
, buf_size
);
7392 if( ff_combine_frame(&s
->parse_context
, next
, &buf
, &buf_size
) < 0 )
7394 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7397 if(h
->is_avc
&& !h
->got_avcC
) {
7398 int i
, cnt
, nalsize
;
7399 unsigned char *p
= avctx
->extradata
;
7400 if(avctx
->extradata_size
< 7) {
7401 av_log(avctx
, AV_LOG_ERROR
, "avcC too short\n");
7405 av_log(avctx
, AV_LOG_ERROR
, "Unknown avcC version %d\n", *p
);
7408 /* sps and pps in the avcC always have length coded with 2 bytes,
7409 so put a fake nal_length_size = 2 while parsing them */
7410 h
->nal_length_size
= 2;
7411 // Decode sps from avcC
7412 cnt
= *(p
+5) & 0x1f; // Number of sps
7414 for (i
= 0; i
< cnt
; i
++) {
7415 nalsize
= BE_16(p
) + 2;
7416 if(decode_nal_units(h
, p
, nalsize
) != nalsize
) {
7417 av_log(avctx
, AV_LOG_ERROR
, "Decoding sps %d from avcC failed\n", i
);
7422 // Decode pps from avcC
7423 cnt
= *(p
++); // Number of pps
7424 for (i
= 0; i
< cnt
; i
++) {
7425 nalsize
= BE_16(p
) + 2;
7426 if(decode_nal_units(h
, p
, nalsize
) != nalsize
) {
7427 av_log(avctx
, AV_LOG_ERROR
, "Decoding pps %d from avcC failed\n", i
);
7432 // Now store right nal length size, that will be use to parse all other nals
7433 h
->nal_length_size
= ((*(((char*)(avctx
->extradata
))+4))&0x03)+1;
7434 // Do not reparse avcC
7438 if(!h
->is_avc
&& s
->avctx
->extradata_size
&& s
->picture_number
==0){
7439 if(decode_nal_units(h
, s
->avctx
->extradata
, s
->avctx
->extradata_size
) < 0)
7443 buf_index
=decode_nal_units(h
, buf
, buf_size
);
7447 //FIXME do something with unavailable reference frames
7449 // if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
7450 if(!s
->current_picture_ptr
){
7451 av_log(h
->s
.avctx
, AV_LOG_DEBUG
, "error, NO frame\n");
7456 Picture
*out
= s
->current_picture_ptr
;
7457 #if 0 //decode order
7458 *data_size
= sizeof(AVFrame
);
7460 /* Sort B-frames into display order */
7461 Picture
*cur
= s
->current_picture_ptr
;
7462 Picture
*prev
= h
->delayed_output_pic
;
7467 int dropped_frame
= 0;
7470 if(h
->sps
.bitstream_restriction_flag
7471 && s
->avctx
->has_b_frames
< h
->sps
.num_reorder_frames
){
7472 s
->avctx
->has_b_frames
= h
->sps
.num_reorder_frames
;
7476 while(h
->delayed_pic
[pics
]) pics
++;
7477 h
->delayed_pic
[pics
++] = cur
;
7478 if(cur
->reference
== 0)
7481 for(i
=0; h
->delayed_pic
[i
]; i
++)
7482 if(h
->delayed_pic
[i
]->key_frame
|| h
->delayed_pic
[i
]->poc
==0)
7485 out
= h
->delayed_pic
[0];
7486 for(i
=1; h
->delayed_pic
[i
] && !h
->delayed_pic
[i
]->key_frame
; i
++)
7487 if(h
->delayed_pic
[i
]->poc
< out
->poc
){
7488 out
= h
->delayed_pic
[i
];
7492 out_of_order
= !cross_idr
&& prev
&& out
->poc
< prev
->poc
;
7493 if(prev
&& pics
<= s
->avctx
->has_b_frames
)
7495 else if((out_of_order
&& pics
-1 == s
->avctx
->has_b_frames
)
7497 ((!cross_idr
&& prev
&& out
->poc
> prev
->poc
+ 2)
7498 || cur
->pict_type
== B_TYPE
)))
7501 s
->avctx
->has_b_frames
++;
7504 else if(out_of_order
)
7507 if(out_of_order
|| pics
> s
->avctx
->has_b_frames
){
7508 dropped_frame
= (out
!= h
->delayed_pic
[out_idx
]);
7509 for(i
=out_idx
; h
->delayed_pic
[i
]; i
++)
7510 h
->delayed_pic
[i
] = h
->delayed_pic
[i
+1];
7513 if(prev
== out
&& !dropped_frame
)
7516 *data_size
= sizeof(AVFrame
);
7517 if(prev
&& prev
!= out
&& prev
->reference
== 1)
7518 prev
->reference
= 0;
7519 h
->delayed_output_pic
= out
;
7522 *pict
= *(AVFrame
*)out
;
7525 assert(pict
->data
[0]);
7526 ff_print_debug_info(s
, pict
);
7527 //printf("out %d\n", (int)pict->data[0]);
7530 /* Return the Picture timestamp as the frame number */
7531 /* we substract 1 because it is added on utils.c */
7532 avctx
->frame_number
= s
->picture_number
- 1;
7534 return get_consumed_bytes(s
, buf_index
, buf_size
);
7537 static inline void fill_mb_avail(H264Context
*h
){
7538 MpegEncContext
* const s
= &h
->s
;
7539 const int mb_xy
= s
->mb_x
+ s
->mb_y
*s
->mb_stride
;
7542 h
->mb_avail
[0]= s
->mb_x
&& h
->slice_table
[mb_xy
- s
->mb_stride
- 1] == h
->slice_num
;
7543 h
->mb_avail
[1]= h
->slice_table
[mb_xy
- s
->mb_stride
] == h
->slice_num
;
7544 h
->mb_avail
[2]= s
->mb_x
+1 < s
->mb_width
&& h
->slice_table
[mb_xy
- s
->mb_stride
+ 1] == h
->slice_num
;
7550 h
->mb_avail
[3]= s
->mb_x
&& h
->slice_table
[mb_xy
- 1] == h
->slice_num
;
7551 h
->mb_avail
[4]= 1; //FIXME move out
7552 h
->mb_avail
[5]= 0; //FIXME move out
7558 #define SIZE (COUNT*40)
7564 // int int_temp[10000];
7566 AVCodecContext avctx
;
7568 dsputil_init(&dsp
, &avctx
);
7570 init_put_bits(&pb
, temp
, SIZE
);
7571 printf("testing unsigned exp golomb\n");
7572 for(i
=0; i
<COUNT
; i
++){
7574 set_ue_golomb(&pb
, i
);
7575 STOP_TIMER("set_ue_golomb");
7577 flush_put_bits(&pb
);
7579 init_get_bits(&gb
, temp
, 8*SIZE
);
7580 for(i
=0; i
<COUNT
; i
++){
7583 s
= show_bits(&gb
, 24);
7586 j
= get_ue_golomb(&gb
);
7588 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i
, j
, i
, s
);
7591 STOP_TIMER("get_ue_golomb");
7595 init_put_bits(&pb
, temp
, SIZE
);
7596 printf("testing signed exp golomb\n");
7597 for(i
=0; i
<COUNT
; i
++){
7599 set_se_golomb(&pb
, i
- COUNT
/2);
7600 STOP_TIMER("set_se_golomb");
7602 flush_put_bits(&pb
);
7604 init_get_bits(&gb
, temp
, 8*SIZE
);
7605 for(i
=0; i
<COUNT
; i
++){
7608 s
= show_bits(&gb
, 24);
7611 j
= get_se_golomb(&gb
);
7612 if(j
!= i
- COUNT
/2){
7613 printf("missmatch! at %d (%d should be %d) bits:%6X\n", i
, j
, i
, s
);
7616 STOP_TIMER("get_se_golomb");
7619 printf("testing 4x4 (I)DCT\n");
7622 uint8_t src
[16], ref
[16];
7623 uint64_t error
= 0, max_error
=0;
7625 for(i
=0; i
<COUNT
; i
++){
7627 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7628 for(j
=0; j
<16; j
++){
7629 ref
[j
]= random()%255;
7630 src
[j
]= random()%255;
7633 h264_diff_dct_c(block
, src
, ref
, 4);
7636 for(j
=0; j
<16; j
++){
7637 // printf("%d ", block[j]);
7638 block
[j
]= block
[j
]*4;
7639 if(j
&1) block
[j
]= (block
[j
]*4 + 2)/5;
7640 if(j
&4) block
[j
]= (block
[j
]*4 + 2)/5;
7644 s
->dsp
.h264_idct_add(ref
, block
, 4);
7645 /* for(j=0; j<16; j++){
7646 printf("%d ", ref[j]);
7650 for(j
=0; j
<16; j
++){
7651 int diff
= ABS(src
[j
] - ref
[j
]);
7654 max_error
= FFMAX(max_error
, diff
);
7657 printf("error=%f max_error=%d\n", ((float)error
)/COUNT
/16, (int)max_error
);
7659 printf("testing quantizer\n");
7660 for(qp
=0; qp
<52; qp
++){
7662 src1_block
[i
]= src2_block
[i
]= random()%255;
7666 printf("Testing NAL layer\n");
7668 uint8_t bitstream
[COUNT
];
7669 uint8_t nal
[COUNT
*2];
7671 memset(&h
, 0, sizeof(H264Context
));
7673 for(i
=0; i
<COUNT
; i
++){
7681 for(j
=0; j
<COUNT
; j
++){
7682 bitstream
[j
]= (random() % 255) + 1;
7685 for(j
=0; j
<zeros
; j
++){
7686 int pos
= random() % COUNT
;
7687 while(bitstream
[pos
] == 0){
7696 nal_length
= encode_nal(&h
, nal
, bitstream
, COUNT
, COUNT
*2);
7698 printf("encoding failed\n");
7702 out
= decode_nal(&h
, nal
, &out_length
, &consumed
, nal_length
);
7706 if(out_length
!= COUNT
){
7707 printf("incorrect length %d %d\n", out_length
, COUNT
);
7711 if(consumed
!= nal_length
){
7712 printf("incorrect consumed length %d %d\n", nal_length
, consumed
);
7716 if(memcmp(bitstream
, out
, COUNT
)){
7717 printf("missmatch\n");
7722 printf("Testing RBSP\n");
7730 static int decode_end(AVCodecContext
*avctx
)
7732 H264Context
*h
= avctx
->priv_data
;
7733 MpegEncContext
*s
= &h
->s
;
7735 free_tables(h
); //FIXME cleanup init stuff perhaps
7738 // memset(h, 0, sizeof(H264Context));
7744 AVCodec h264_decoder
= {
7748 sizeof(H264Context
),
7753 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1
| CODEC_CAP_TRUNCATED
| CODEC_CAP_DELAY
,
7757 AVCodecParser h264_parser
= {
7759 sizeof(H264Context
),